1/*************************************************
2*      Perl-Compatible Regular Expressions       *
3*************************************************/
4
5/* PCRE is a library of functions to support regular expressions whose syntax
6and semantics are as close as possible to those of the Perl 5 language.
7
8                       Written by Philip Hazel
9           Copyright (c) 1997-2012 University of Cambridge
10
11-----------------------------------------------------------------------------
12Redistribution and use in source and binary forms, with or without
13modification, are permitted provided that the following conditions are met:
14
15    * Redistributions of source code must retain the above copyright notice,
16      this list of conditions and the following disclaimer.
17
18    * Redistributions in binary form must reproduce the above copyright
19      notice, this list of conditions and the following disclaimer in the
20      documentation and/or other materials provided with the distribution.
21
22    * Neither the name of the University of Cambridge nor the names of its
23      contributors may be used to endorse or promote products derived from
24      this software without specific prior written permission.
25
26THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36POSSIBILITY OF SUCH DAMAGE.
37-----------------------------------------------------------------------------
38*/
39
40/* This module contains pcre_exec(), the externally visible function that does
41pattern matching using an NFA algorithm, trying to mimic Perl as closely as
42possible. There are also some static supporting functions. */
43
44#ifdef HAVE_CONFIG_H
45#include "config.h"
46#endif
47
48#define NLBLOCK md             /* Block containing newline information */
49#define PSSTART start_subject  /* Field containing processed string start */
50#define PSEND   end_subject    /* Field containing processed string end */
51
52#include "pcre_internal.h"
53
54/* Undefine some potentially clashing cpp symbols */
55
56#undef min
57#undef max
58
59/* Values for setting in md->match_function_type to indicate two special types
60of call to match(). We do it this way to save on using another stack variable,
61as stack usage is to be discouraged. */
62
63#define MATCH_CONDASSERT     1  /* Called to check a condition assertion */
64#define MATCH_CBEGROUP       2  /* Could-be-empty unlimited repeat group */
65
66/* Non-error returns from the match() function. Error returns are externally
67defined PCRE_ERROR_xxx codes, which are all negative. */
68
69#define MATCH_MATCH        1
70#define MATCH_NOMATCH      0
71
72/* Special internal returns from the match() function. Make them sufficiently
73negative to avoid the external error codes. */
74
75#define MATCH_ACCEPT       (-999)
76#define MATCH_COMMIT       (-998)
77#define MATCH_KETRPOS      (-997)
78#define MATCH_ONCE         (-996)
79#define MATCH_PRUNE        (-995)
80#define MATCH_SKIP         (-994)
81#define MATCH_SKIP_ARG     (-993)
82#define MATCH_THEN         (-992)
83
84/* Maximum number of ints of offset to save on the stack for recursive calls.
85If the offset vector is bigger, malloc is used. This should be a multiple of 3,
86because the offset vector is always a multiple of 3 long. */
87
88#define REC_STACK_SAVE_MAX 30
89
90/* Min and max values for the common repeats; for the maxima, 0 => infinity */
91
92static const char rep_min[] = { 0, 0, 1, 1, 0, 0 };
93static const char rep_max[] = { 0, 0, 0, 0, 1, 1 };
94
95
96
97#ifdef PCRE_DEBUG
98/*************************************************
99*        Debugging function to print chars       *
100*************************************************/
101
102/* Print a sequence of chars in printable format, stopping at the end of the
103subject if the requested.
104
105Arguments:
106  p           points to characters
107  length      number to print
108  is_subject  TRUE if printing from within md->start_subject
109  md          pointer to matching data block, if is_subject is TRUE
110
111Returns:     nothing
112*/
113
114static void
115pchars(const pcre_uchar *p, int length, BOOL is_subject, match_data *md)
116{
117unsigned int c;
118if (is_subject && length > md->end_subject - p) length = md->end_subject - p;
119while (length-- > 0)
120  if (isprint(c = *(p++))) printf("%c", c); else printf("\\x%02x", c);
121}
122#endif
123
124
125
126/*************************************************
127*          Match a back-reference                *
128*************************************************/
129
130/* Normally, if a back reference hasn't been set, the length that is passed is
131negative, so the match always fails. However, in JavaScript compatibility mode,
132the length passed is zero. Note that in caseless UTF-8 mode, the number of
133subject bytes matched may be different to the number of reference bytes.
134
135Arguments:
136  offset      index into the offset vector
137  eptr        pointer into the subject
138  length      length of reference to be matched (number of bytes)
139  md          points to match data block
140  caseless    TRUE if caseless
141
142Returns:      >= 0 the number of subject bytes matched
143              -1 no match
144              -2 partial match; always given if at end subject
145*/
146
147static int
148match_ref(int offset, register PCRE_PUCHAR eptr, int length, match_data *md,
149  BOOL caseless)
150{
151PCRE_PUCHAR eptr_start = eptr;
152register PCRE_PUCHAR p = md->start_subject + md->offset_vector[offset];
153
154#ifdef PCRE_DEBUG
155if (eptr >= md->end_subject)
156  printf("matching subject <null>");
157else
158  {
159  printf("matching subject ");
160  pchars(eptr, length, TRUE, md);
161  }
162printf(" against backref ");
163pchars(p, length, FALSE, md);
164printf("\n");
165#endif
166
167/* Always fail if reference not set (and not JavaScript compatible - in that
168case the length is passed as zero). */
169
170if (length < 0) return -1;
171
172/* Separate the caseless case for speed. In UTF-8 mode we can only do this
173properly if Unicode properties are supported. Otherwise, we can check only
174ASCII characters. */
175
176if (caseless)
177  {
178#ifdef SUPPORT_UTF
179#ifdef SUPPORT_UCP
180  if (md->utf)
181    {
182    /* Match characters up to the end of the reference. NOTE: the number of
183    bytes matched may differ, because there are some characters whose upper and
184    lower case versions code as different numbers of bytes. For example, U+023A
185    (2 bytes in UTF-8) is the upper case version of U+2C65 (3 bytes in UTF-8);
186    a sequence of 3 of the former uses 6 bytes, as does a sequence of two of
187    the latter. It is important, therefore, to check the length along the
188    reference, not along the subject (earlier code did this wrong). */
189
190    PCRE_PUCHAR endptr = p + length;
191    while (p < endptr)
192      {
193      int c, d;
194      if (eptr >= md->end_subject) return -2;   /* Partial match */
195      GETCHARINC(c, eptr);
196      GETCHARINC(d, p);
197      if (c != d && c != UCD_OTHERCASE(d)) return -1;
198      }
199    }
200  else
201#endif
202#endif
203
204  /* The same code works when not in UTF-8 mode and in UTF-8 mode when there
205  is no UCP support. */
206    {
207    while (length-- > 0)
208      {
209      if (eptr >= md->end_subject) return -2;   /* Partial match */
210      if (TABLE_GET(*p, md->lcc, *p) != TABLE_GET(*eptr, md->lcc, *eptr)) return -1;
211      p++;
212      eptr++;
213      }
214    }
215  }
216
217/* In the caseful case, we can just compare the bytes, whether or not we
218are in UTF-8 mode. */
219
220else
221  {
222  while (length-- > 0)
223    {
224    if (eptr >= md->end_subject) return -2;   /* Partial match */
225    if (*p++ != *eptr++) return -1;
226    }
227  }
228
229return (int)(eptr - eptr_start);
230}
231
232
233
234/***************************************************************************
235****************************************************************************
236                   RECURSION IN THE match() FUNCTION
237
238The match() function is highly recursive, though not every recursive call
239increases the recursive depth. Nevertheless, some regular expressions can cause
240it to recurse to a great depth. I was writing for Unix, so I just let it call
241itself recursively. This uses the stack for saving everything that has to be
242saved for a recursive call. On Unix, the stack can be large, and this works
243fine.
244
245It turns out that on some non-Unix-like systems there are problems with
246programs that use a lot of stack. (This despite the fact that every last chip
247has oodles of memory these days, and techniques for extending the stack have
248been known for decades.) So....
249
250There is a fudge, triggered by defining NO_RECURSE, which avoids recursive
251calls by keeping local variables that need to be preserved in blocks of memory
252obtained from malloc() instead instead of on the stack. Macros are used to
253achieve this so that the actual code doesn't look very different to what it
254always used to.
255
256The original heap-recursive code used longjmp(). However, it seems that this
257can be very slow on some operating systems. Following a suggestion from Stan
258Switzer, the use of longjmp() has been abolished, at the cost of having to
259provide a unique number for each call to RMATCH. There is no way of generating
260a sequence of numbers at compile time in C. I have given them names, to make
261them stand out more clearly.
262
263Crude tests on x86 Linux show a small speedup of around 5-8%. However, on
264FreeBSD, avoiding longjmp() more than halves the time taken to run the standard
265tests. Furthermore, not using longjmp() means that local dynamic variables
266don't have indeterminate values; this has meant that the frame size can be
267reduced because the result can be "passed back" by straight setting of the
268variable instead of being passed in the frame.
269****************************************************************************
270***************************************************************************/
271
272/* Numbers for RMATCH calls. When this list is changed, the code at HEAP_RETURN
273below must be updated in sync.  */
274
275enum { RM1=1, RM2,  RM3,  RM4,  RM5,  RM6,  RM7,  RM8,  RM9,  RM10,
276       RM11,  RM12, RM13, RM14, RM15, RM16, RM17, RM18, RM19, RM20,
277       RM21,  RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30,
278       RM31,  RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,
279       RM41,  RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,
280       RM51,  RM52, RM53, RM54, RM55, RM56, RM57, RM58, RM59, RM60,
281       RM61,  RM62, RM63, RM64, RM65, RM66 };
282
283/* These versions of the macros use the stack, as normal. There are debugging
284versions and production versions. Note that the "rw" argument of RMATCH isn't
285actually used in this definition. */
286
287#ifndef NO_RECURSE
288#define REGISTER register
289
290#ifdef PCRE_DEBUG
291#define RMATCH(ra,rb,rc,rd,re,rw) \
292  { \
293  printf("match() called in line %d\n", __LINE__); \
294  rrc = match(ra,rb,mstart,rc,rd,re,rdepth+1); \
295  printf("to line %d\n", __LINE__); \
296  }
297#define RRETURN(ra) \
298  { \
299  printf("match() returned %d from line %d ", ra, __LINE__); \
300  return ra; \
301  }
302#else
303#define RMATCH(ra,rb,rc,rd,re,rw) \
304  rrc = match(ra,rb,mstart,rc,rd,re,rdepth+1)
305#define RRETURN(ra) return ra
306#endif
307
308#else
309
310
311/* These versions of the macros manage a private stack on the heap. Note that
312the "rd" argument of RMATCH isn't actually used in this definition. It's the md
313argument of match(), which never changes. */
314
315#define REGISTER
316
317#define RMATCH(ra,rb,rc,rd,re,rw)\
318  {\
319  heapframe *newframe = frame->Xnextframe;\
320  if (newframe == NULL)\
321    {\
322    newframe = (heapframe *)(PUBL(stack_malloc))(sizeof(heapframe));\
323    if (newframe == NULL) RRETURN(PCRE_ERROR_NOMEMORY);\
324    newframe->Xnextframe = NULL;\
325    frame->Xnextframe = newframe;\
326    }\
327  frame->Xwhere = rw;\
328  newframe->Xeptr = ra;\
329  newframe->Xecode = rb;\
330  newframe->Xmstart = mstart;\
331  newframe->Xoffset_top = rc;\
332  newframe->Xeptrb = re;\
333  newframe->Xrdepth = frame->Xrdepth + 1;\
334  newframe->Xprevframe = frame;\
335  frame = newframe;\
336  DPRINTF(("restarting from line %d\n", __LINE__));\
337  goto HEAP_RECURSE;\
338  L_##rw:\
339  DPRINTF(("jumped back to line %d\n", __LINE__));\
340  }
341
342#define RRETURN(ra)\
343  {\
344  heapframe *oldframe = frame;\
345  frame = oldframe->Xprevframe;\
346  if (frame != NULL)\
347    {\
348    rrc = ra;\
349    goto HEAP_RETURN;\
350    }\
351  return ra;\
352  }
353
354
355/* Structure for remembering the local variables in a private frame */
356
357typedef struct heapframe {
358  struct heapframe *Xprevframe;
359  struct heapframe *Xnextframe;
360
361  /* Function arguments that may change */
362
363  PCRE_PUCHAR Xeptr;
364  const pcre_uchar *Xecode;
365  PCRE_PUCHAR Xmstart;
366  int Xoffset_top;
367  eptrblock *Xeptrb;
368  unsigned int Xrdepth;
369
370  /* Function local variables */
371
372  PCRE_PUCHAR Xcallpat;
373#ifdef SUPPORT_UTF
374  PCRE_PUCHAR Xcharptr;
375#endif
376  PCRE_PUCHAR Xdata;
377  PCRE_PUCHAR Xnext;
378  PCRE_PUCHAR Xpp;
379  PCRE_PUCHAR Xprev;
380  PCRE_PUCHAR Xsaved_eptr;
381
382  recursion_info Xnew_recursive;
383
384  BOOL Xcur_is_word;
385  BOOL Xcondition;
386  BOOL Xprev_is_word;
387
388#ifdef SUPPORT_UCP
389  int Xprop_type;
390  int Xprop_value;
391  int Xprop_fail_result;
392  int Xoclength;
393  pcre_uchar Xocchars[6];
394#endif
395
396  int Xcodelink;
397  int Xctype;
398  unsigned int Xfc;
399  int Xfi;
400  int Xlength;
401  int Xmax;
402  int Xmin;
403  int Xnumber;
404  int Xoffset;
405  int Xop;
406  int Xsave_capture_last;
407  int Xsave_offset1, Xsave_offset2, Xsave_offset3;
408  int Xstacksave[REC_STACK_SAVE_MAX];
409
410  eptrblock Xnewptrb;
411
412  /* Where to jump back to */
413
414  int Xwhere;
415
416} heapframe;
417
418#endif
419
420
421/***************************************************************************
422***************************************************************************/
423
424
425
426/*************************************************
427*         Match from current position            *
428*************************************************/
429
430/* This function is called recursively in many circumstances. Whenever it
431returns a negative (error) response, the outer incarnation must also return the
432same response. */
433
434/* These macros pack up tests that are used for partial matching, and which
435appear several times in the code. We set the "hit end" flag if the pointer is
436at the end of the subject and also past the start of the subject (i.e.
437something has been matched). For hard partial matching, we then return
438immediately. The second one is used when we already know we are past the end of
439the subject. */
440
441#define CHECK_PARTIAL()\
442  if (md->partial != 0 && eptr >= md->end_subject && \
443      eptr > md->start_used_ptr) \
444    { \
445    md->hitend = TRUE; \
446    if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL); \
447    }
448
449#define SCHECK_PARTIAL()\
450  if (md->partial != 0 && eptr > md->start_used_ptr) \
451    { \
452    md->hitend = TRUE; \
453    if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL); \
454    }
455
456
457/* Performance note: It might be tempting to extract commonly used fields from
458the md structure (e.g. utf, end_subject) into individual variables to improve
459performance. Tests using gcc on a SPARC disproved this; in the first case, it
460made performance worse.
461
462Arguments:
463   eptr        pointer to current character in subject
464   ecode       pointer to current position in compiled code
465   mstart      pointer to the current match start position (can be modified
466                 by encountering \K)
467   offset_top  current top pointer
468   md          pointer to "static" info for the match
469   eptrb       pointer to chain of blocks containing eptr at start of
470                 brackets - for testing for empty matches
471   rdepth      the recursion depth
472
473Returns:       MATCH_MATCH if matched            )  these values are >= 0
474               MATCH_NOMATCH if failed to match  )
475               a negative MATCH_xxx value for PRUNE, SKIP, etc
476               a negative PCRE_ERROR_xxx value if aborted by an error condition
477                 (e.g. stopped by repeated call or recursion limit)
478*/
479
480static int
481match(REGISTER PCRE_PUCHAR eptr, REGISTER const pcre_uchar *ecode,
482  PCRE_PUCHAR mstart, int offset_top, match_data *md, eptrblock *eptrb,
483  unsigned int rdepth)
484{
485/* These variables do not need to be preserved over recursion in this function,
486so they can be ordinary variables in all cases. Mark some of them with
487"register" because they are used a lot in loops. */
488
489register int  rrc;         /* Returns from recursive calls */
490register int  i;           /* Used for loops not involving calls to RMATCH() */
491register unsigned int c;   /* Character values not kept over RMATCH() calls */
492register BOOL utf;         /* Local copy of UTF flag for speed */
493
494BOOL minimize, possessive; /* Quantifier options */
495BOOL caseless;
496int condcode;
497
498/* When recursion is not being used, all "local" variables that have to be
499preserved over calls to RMATCH() are part of a "frame". We set up the top-level
500frame on the stack here; subsequent instantiations are obtained from the heap
501whenever RMATCH() does a "recursion". See the macro definitions above. Putting
502the top-level on the stack rather than malloc-ing them all gives a performance
503boost in many cases where there is not much "recursion". */
504
505#ifdef NO_RECURSE
506heapframe *frame = (heapframe *)md->match_frames_base;
507
508/* Copy in the original argument variables */
509
510frame->Xeptr = eptr;
511frame->Xecode = ecode;
512frame->Xmstart = mstart;
513frame->Xoffset_top = offset_top;
514frame->Xeptrb = eptrb;
515frame->Xrdepth = rdepth;
516
517/* This is where control jumps back to to effect "recursion" */
518
519HEAP_RECURSE:
520
521/* Macros make the argument variables come from the current frame */
522
523#define eptr               frame->Xeptr
524#define ecode              frame->Xecode
525#define mstart             frame->Xmstart
526#define offset_top         frame->Xoffset_top
527#define eptrb              frame->Xeptrb
528#define rdepth             frame->Xrdepth
529
530/* Ditto for the local variables */
531
532#ifdef SUPPORT_UTF
533#define charptr            frame->Xcharptr
534#endif
535#define callpat            frame->Xcallpat
536#define codelink           frame->Xcodelink
537#define data               frame->Xdata
538#define next               frame->Xnext
539#define pp                 frame->Xpp
540#define prev               frame->Xprev
541#define saved_eptr         frame->Xsaved_eptr
542
543#define new_recursive      frame->Xnew_recursive
544
545#define cur_is_word        frame->Xcur_is_word
546#define condition          frame->Xcondition
547#define prev_is_word       frame->Xprev_is_word
548
549#ifdef SUPPORT_UCP
550#define prop_type          frame->Xprop_type
551#define prop_value         frame->Xprop_value
552#define prop_fail_result   frame->Xprop_fail_result
553#define oclength           frame->Xoclength
554#define occhars            frame->Xocchars
555#endif
556
557#define ctype              frame->Xctype
558#define fc                 frame->Xfc
559#define fi                 frame->Xfi
560#define length             frame->Xlength
561#define max                frame->Xmax
562#define min                frame->Xmin
563#define number             frame->Xnumber
564#define offset             frame->Xoffset
565#define op                 frame->Xop
566#define save_capture_last  frame->Xsave_capture_last
567#define save_offset1       frame->Xsave_offset1
568#define save_offset2       frame->Xsave_offset2
569#define save_offset3       frame->Xsave_offset3
570#define stacksave          frame->Xstacksave
571
572#define newptrb            frame->Xnewptrb
573
574/* When recursion is being used, local variables are allocated on the stack and
575get preserved during recursion in the normal way. In this environment, fi and
576i, and fc and c, can be the same variables. */
577
578#else         /* NO_RECURSE not defined */
579#define fi i
580#define fc c
581
582/* Many of the following variables are used only in small blocks of the code.
583My normal style of coding would have declared them within each of those blocks.
584However, in order to accommodate the version of this code that uses an external
585"stack" implemented on the heap, it is easier to declare them all here, so the
586declarations can be cut out in a block. The only declarations within blocks
587below are for variables that do not have to be preserved over a recursive call
588to RMATCH(). */
589
590#ifdef SUPPORT_UTF
591const pcre_uchar *charptr;
592#endif
593const pcre_uchar *callpat;
594const pcre_uchar *data;
595const pcre_uchar *next;
596PCRE_PUCHAR       pp;
597const pcre_uchar *prev;
598PCRE_PUCHAR       saved_eptr;
599
600recursion_info new_recursive;
601
602BOOL cur_is_word;
603BOOL condition;
604BOOL prev_is_word;
605
606#ifdef SUPPORT_UCP
607int prop_type;
608int prop_value;
609int prop_fail_result;
610int oclength;
611pcre_uchar occhars[6];
612#endif
613
614int codelink;
615int ctype;
616int length;
617int max;
618int min;
619int number;
620int offset;
621int op;
622int save_capture_last;
623int save_offset1, save_offset2, save_offset3;
624int stacksave[REC_STACK_SAVE_MAX];
625
626eptrblock newptrb;
627
628/* There is a special fudge for calling match() in a way that causes it to
629measure the size of its basic stack frame when the stack is being used for
630recursion. The second argument (ecode) being NULL triggers this behaviour. It
631cannot normally ever be NULL. The return is the negated value of the frame
632size. */
633
634if (ecode == NULL)
635  {
636  if (rdepth == 0)
637    return match((PCRE_PUCHAR)&rdepth, NULL, NULL, 0, NULL, NULL, 1);
638  else
639    {
640    int len = (char *)&rdepth - (char *)eptr;
641    return (len > 0)? -len : len;
642    }
643  }
644#endif     /* NO_RECURSE */
645
646/* To save space on the stack and in the heap frame, I have doubled up on some
647of the local variables that are used only in localised parts of the code, but
648still need to be preserved over recursive calls of match(). These macros define
649the alternative names that are used. */
650
651#define allow_zero    cur_is_word
652#define cbegroup      condition
653#define code_offset   codelink
654#define condassert    condition
655#define matched_once  prev_is_word
656#define foc           number
657#define save_mark     data
658
659/* These statements are here to stop the compiler complaining about unitialized
660variables. */
661
662#ifdef SUPPORT_UCP
663prop_value = 0;
664prop_fail_result = 0;
665#endif
666
667
668/* This label is used for tail recursion, which is used in a few cases even
669when NO_RECURSE is not defined, in order to reduce the amount of stack that is
670used. Thanks to Ian Taylor for noticing this possibility and sending the
671original patch. */
672
673TAIL_RECURSE:
674
675/* OK, now we can get on with the real code of the function. Recursive calls
676are specified by the macro RMATCH and RRETURN is used to return. When
677NO_RECURSE is *not* defined, these just turn into a recursive call to match()
678and a "return", respectively (possibly with some debugging if PCRE_DEBUG is
679defined). However, RMATCH isn't like a function call because it's quite a
680complicated macro. It has to be used in one particular way. This shouldn't,
681however, impact performance when true recursion is being used. */
682
683#ifdef SUPPORT_UTF
684utf = md->utf;       /* Local copy of the flag */
685#else
686utf = FALSE;
687#endif
688
689/* First check that we haven't called match() too many times, or that we
690haven't exceeded the recursive call limit. */
691
692if (md->match_call_count++ >= md->match_limit) RRETURN(PCRE_ERROR_MATCHLIMIT);
693if (rdepth >= md->match_limit_recursion) RRETURN(PCRE_ERROR_RECURSIONLIMIT);
694
695/* At the start of a group with an unlimited repeat that may match an empty
696string, the variable md->match_function_type is set to MATCH_CBEGROUP. It is
697done this way to save having to use another function argument, which would take
698up space on the stack. See also MATCH_CONDASSERT below.
699
700When MATCH_CBEGROUP is set, add the current subject pointer to the chain of
701such remembered pointers, to be checked when we hit the closing ket, in order
702to break infinite loops that match no characters. When match() is called in
703other circumstances, don't add to the chain. The MATCH_CBEGROUP feature must
704NOT be used with tail recursion, because the memory block that is used is on
705the stack, so a new one may be required for each match(). */
706
707if (md->match_function_type == MATCH_CBEGROUP)
708  {
709  newptrb.epb_saved_eptr = eptr;
710  newptrb.epb_prev = eptrb;
711  eptrb = &newptrb;
712  md->match_function_type = 0;
713  }
714
715/* Now start processing the opcodes. */
716
717for (;;)
718  {
719  minimize = possessive = FALSE;
720  op = *ecode;
721
722  switch(op)
723    {
724    case OP_MARK:
725    md->nomatch_mark = ecode + 2;
726    md->mark = NULL;    /* In case previously set by assertion */
727    RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top, md,
728      eptrb, RM55);
729    if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) &&
730         md->mark == NULL) md->mark = ecode + 2;
731
732    /* A return of MATCH_SKIP_ARG means that matching failed at SKIP with an
733    argument, and we must check whether that argument matches this MARK's
734    argument. It is passed back in md->start_match_ptr (an overloading of that
735    variable). If it does match, we reset that variable to the current subject
736    position and return MATCH_SKIP. Otherwise, pass back the return code
737    unaltered. */
738
739    else if (rrc == MATCH_SKIP_ARG &&
740        STRCMP_UC_UC(ecode + 2, md->start_match_ptr) == 0)
741      {
742      md->start_match_ptr = eptr;
743      RRETURN(MATCH_SKIP);
744      }
745    RRETURN(rrc);
746
747    case OP_FAIL:
748    RRETURN(MATCH_NOMATCH);
749
750    /* COMMIT overrides PRUNE, SKIP, and THEN */
751
752    case OP_COMMIT:
753    RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
754      eptrb, RM52);
755    if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE &&
756        rrc != MATCH_SKIP && rrc != MATCH_SKIP_ARG &&
757        rrc != MATCH_THEN)
758      RRETURN(rrc);
759    RRETURN(MATCH_COMMIT);
760
761    /* PRUNE overrides THEN */
762
763    case OP_PRUNE:
764    RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
765      eptrb, RM51);
766    if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
767    RRETURN(MATCH_PRUNE);
768
769    case OP_PRUNE_ARG:
770    md->nomatch_mark = ecode + 2;
771    md->mark = NULL;    /* In case previously set by assertion */
772    RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top, md,
773      eptrb, RM56);
774    if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) &&
775         md->mark == NULL) md->mark = ecode + 2;
776    if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
777    RRETURN(MATCH_PRUNE);
778
779    /* SKIP overrides PRUNE and THEN */
780
781    case OP_SKIP:
782    RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
783      eptrb, RM53);
784    if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN)
785      RRETURN(rrc);
786    md->start_match_ptr = eptr;   /* Pass back current position */
787    RRETURN(MATCH_SKIP);
788
789    /* Note that, for Perl compatibility, SKIP with an argument does NOT set
790    nomatch_mark. There is a flag that disables this opcode when re-matching a
791    pattern that ended with a SKIP for which there was not a matching MARK. */
792
793    case OP_SKIP_ARG:
794    if (md->ignore_skip_arg)
795      {
796      ecode += PRIV(OP_lengths)[*ecode] + ecode[1];
797      break;
798      }
799    RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top, md,
800      eptrb, RM57);
801    if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN)
802      RRETURN(rrc);
803
804    /* Pass back the current skip name by overloading md->start_match_ptr and
805    returning the special MATCH_SKIP_ARG return code. This will either be
806    caught by a matching MARK, or get to the top, where it causes a rematch
807    with the md->ignore_skip_arg flag set. */
808
809    md->start_match_ptr = ecode + 2;
810    RRETURN(MATCH_SKIP_ARG);
811
812    /* For THEN (and THEN_ARG) we pass back the address of the opcode, so that
813    the branch in which it occurs can be determined. Overload the start of
814    match pointer to do this. */
815
816    case OP_THEN:
817    RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
818      eptrb, RM54);
819    if (rrc != MATCH_NOMATCH) RRETURN(rrc);
820    md->start_match_ptr = ecode;
821    RRETURN(MATCH_THEN);
822
823    case OP_THEN_ARG:
824    md->nomatch_mark = ecode + 2;
825    md->mark = NULL;    /* In case previously set by assertion */
826    RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top,
827      md, eptrb, RM58);
828    if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) &&
829         md->mark == NULL) md->mark = ecode + 2;
830    if (rrc != MATCH_NOMATCH) RRETURN(rrc);
831    md->start_match_ptr = ecode;
832    RRETURN(MATCH_THEN);
833
834    /* Handle an atomic group that does not contain any capturing parentheses.
835    This can be handled like an assertion. Prior to 8.13, all atomic groups
836    were handled this way. In 8.13, the code was changed as below for ONCE, so
837    that backups pass through the group and thereby reset captured values.
838    However, this uses a lot more stack, so in 8.20, atomic groups that do not
839    contain any captures generate OP_ONCE_NC, which can be handled in the old,
840    less stack intensive way.
841
842    Check the alternative branches in turn - the matching won't pass the KET
843    for this kind of subpattern. If any one branch matches, we carry on as at
844    the end of a normal bracket, leaving the subject pointer, but resetting
845    the start-of-match value in case it was changed by \K. */
846
847    case OP_ONCE_NC:
848    prev = ecode;
849    saved_eptr = eptr;
850    save_mark = md->mark;
851    do
852      {
853      RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM64);
854      if (rrc == MATCH_MATCH)  /* Note: _not_ MATCH_ACCEPT */
855        {
856        mstart = md->start_match_ptr;
857        break;
858        }
859      if (rrc == MATCH_THEN)
860        {
861        next = ecode + GET(ecode,1);
862        if (md->start_match_ptr < next &&
863            (*ecode == OP_ALT || *next == OP_ALT))
864          rrc = MATCH_NOMATCH;
865        }
866
867      if (rrc != MATCH_NOMATCH) RRETURN(rrc);
868      ecode += GET(ecode,1);
869      md->mark = save_mark;
870      }
871    while (*ecode == OP_ALT);
872
873    /* If hit the end of the group (which could be repeated), fail */
874
875    if (*ecode != OP_ONCE_NC && *ecode != OP_ALT) RRETURN(MATCH_NOMATCH);
876
877    /* Continue as from after the group, updating the offsets high water
878    mark, since extracts may have been taken. */
879
880    do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
881
882    offset_top = md->end_offset_top;
883    eptr = md->end_match_ptr;
884
885    /* For a non-repeating ket, just continue at this level. This also
886    happens for a repeating ket if no characters were matched in the group.
887    This is the forcible breaking of infinite loops as implemented in Perl
888    5.005. */
889
890    if (*ecode == OP_KET || eptr == saved_eptr)
891      {
892      ecode += 1+LINK_SIZE;
893      break;
894      }
895
896    /* The repeating kets try the rest of the pattern or restart from the
897    preceding bracket, in the appropriate order. The second "call" of match()
898    uses tail recursion, to avoid using another stack frame. */
899
900    if (*ecode == OP_KETRMIN)
901      {
902      RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM65);
903      if (rrc != MATCH_NOMATCH) RRETURN(rrc);
904      ecode = prev;
905      goto TAIL_RECURSE;
906      }
907    else  /* OP_KETRMAX */
908      {
909      RMATCH(eptr, prev, offset_top, md, eptrb, RM66);
910      if (rrc != MATCH_NOMATCH) RRETURN(rrc);
911      ecode += 1 + LINK_SIZE;
912      goto TAIL_RECURSE;
913      }
914    /* Control never gets here */
915
916    /* Handle a capturing bracket, other than those that are possessive with an
917    unlimited repeat. If there is space in the offset vector, save the current
918    subject position in the working slot at the top of the vector. We mustn't
919    change the current values of the data slot, because they may be set from a
920    previous iteration of this group, and be referred to by a reference inside
921    the group. A failure to match might occur after the group has succeeded,
922    if something later on doesn't match. For this reason, we need to restore
923    the working value and also the values of the final offsets, in case they
924    were set by a previous iteration of the same bracket.
925
926    If there isn't enough space in the offset vector, treat this as if it were
927    a non-capturing bracket. Don't worry about setting the flag for the error
928    case here; that is handled in the code for KET. */
929
930    case OP_CBRA:
931    case OP_SCBRA:
932    number = GET2(ecode, 1+LINK_SIZE);
933    offset = number << 1;
934
935#ifdef PCRE_DEBUG
936    printf("start bracket %d\n", number);
937    printf("subject=");
938    pchars(eptr, 16, TRUE, md);
939    printf("\n");
940#endif
941
942    if (offset < md->offset_max)
943      {
944      save_offset1 = md->offset_vector[offset];
945      save_offset2 = md->offset_vector[offset+1];
946      save_offset3 = md->offset_vector[md->offset_end - number];
947      save_capture_last = md->capture_last;
948      save_mark = md->mark;
949
950      DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
951      md->offset_vector[md->offset_end - number] =
952        (int)(eptr - md->start_subject);
953
954      for (;;)
955        {
956        if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;
957        RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
958          eptrb, RM1);
959        if (rrc == MATCH_ONCE) break;  /* Backing up through an atomic group */
960
961        /* If we backed up to a THEN, check whether it is within the current
962        branch by comparing the address of the THEN that is passed back with
963        the end of the branch. If it is within the current branch, and the
964        branch is one of two or more alternatives (it either starts or ends
965        with OP_ALT), we have reached the limit of THEN's action, so convert
966        the return code to NOMATCH, which will cause normal backtracking to
967        happen from now on. Otherwise, THEN is passed back to an outer
968        alternative. This implements Perl's treatment of parenthesized groups,
969        where a group not containing | does not affect the current alternative,
970        that is, (X) is NOT the same as (X|(*F)). */
971
972        if (rrc == MATCH_THEN)
973          {
974          next = ecode + GET(ecode,1);
975          if (md->start_match_ptr < next &&
976              (*ecode == OP_ALT || *next == OP_ALT))
977            rrc = MATCH_NOMATCH;
978          }
979
980        /* Anything other than NOMATCH is passed back. */
981
982        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
983        md->capture_last = save_capture_last;
984        ecode += GET(ecode, 1);
985        md->mark = save_mark;
986        if (*ecode != OP_ALT) break;
987        }
988
989      DPRINTF(("bracket %d failed\n", number));
990      md->offset_vector[offset] = save_offset1;
991      md->offset_vector[offset+1] = save_offset2;
992      md->offset_vector[md->offset_end - number] = save_offset3;
993
994      /* At this point, rrc will be one of MATCH_ONCE or MATCH_NOMATCH. */
995
996      RRETURN(rrc);
997      }
998
999    /* FALL THROUGH ... Insufficient room for saving captured contents. Treat
1000    as a non-capturing bracket. */
1001
1002    /* VVVVVVVVVVVVVVVVVVVVVVVVV */
1003    /* VVVVVVVVVVVVVVVVVVVVVVVVV */
1004
1005    DPRINTF(("insufficient capture room: treat as non-capturing\n"));
1006
1007    /* VVVVVVVVVVVVVVVVVVVVVVVVV */
1008    /* VVVVVVVVVVVVVVVVVVVVVVVVV */
1009
1010    /* Non-capturing or atomic group, except for possessive with unlimited
1011    repeat and ONCE group with no captures. Loop for all the alternatives.
1012
1013    When we get to the final alternative within the brackets, we used to return
1014    the result of a recursive call to match() whatever happened so it was
1015    possible to reduce stack usage by turning this into a tail recursion,
1016    except in the case of a possibly empty group. However, now that there is
1017    the possiblity of (*THEN) occurring in the final alternative, this
1018    optimization is no longer always possible.
1019
1020    We can optimize if we know there are no (*THEN)s in the pattern; at present
1021    this is the best that can be done.
1022
1023    MATCH_ONCE is returned when the end of an atomic group is successfully
1024    reached, but subsequent matching fails. It passes back up the tree (causing
1025    captured values to be reset) until the original atomic group level is
1026    reached. This is tested by comparing md->once_target with the start of the
1027    group. At this point, the return is converted into MATCH_NOMATCH so that
1028    previous backup points can be taken. */
1029
1030    case OP_ONCE:
1031    case OP_BRA:
1032    case OP_SBRA:
1033    DPRINTF(("start non-capturing bracket\n"));
1034
1035    for (;;)
1036      {
1037      if (op >= OP_SBRA || op == OP_ONCE)
1038        md->match_function_type = MATCH_CBEGROUP;
1039
1040      /* If this is not a possibly empty group, and there are no (*THEN)s in
1041      the pattern, and this is the final alternative, optimize as described
1042      above. */
1043
1044      else if (!md->hasthen && ecode[GET(ecode, 1)] != OP_ALT)
1045        {
1046        ecode += PRIV(OP_lengths)[*ecode];
1047        goto TAIL_RECURSE;
1048        }
1049
1050      /* In all other cases, we have to make another call to match(). */
1051
1052      save_mark = md->mark;
1053      RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md, eptrb,
1054        RM2);
1055
1056      /* See comment in the code for capturing groups above about handling
1057      THEN. */
1058
1059      if (rrc == MATCH_THEN)
1060        {
1061        next = ecode + GET(ecode,1);
1062        if (md->start_match_ptr < next &&
1063            (*ecode == OP_ALT || *next == OP_ALT))
1064          rrc = MATCH_NOMATCH;
1065        }
1066
1067      if (rrc != MATCH_NOMATCH)
1068        {
1069        if (rrc == MATCH_ONCE)
1070          {
1071          const pcre_uchar *scode = ecode;
1072          if (*scode != OP_ONCE)           /* If not at start, find it */
1073            {
1074            while (*scode == OP_ALT) scode += GET(scode, 1);
1075            scode -= GET(scode, 1);
1076            }
1077          if (md->once_target == scode) rrc = MATCH_NOMATCH;
1078          }
1079        RRETURN(rrc);
1080        }
1081      ecode += GET(ecode, 1);
1082      md->mark = save_mark;
1083      if (*ecode != OP_ALT) break;
1084      }
1085
1086    RRETURN(MATCH_NOMATCH);
1087
1088    /* Handle possessive capturing brackets with an unlimited repeat. We come
1089    here from BRAZERO with allow_zero set TRUE. The offset_vector values are
1090    handled similarly to the normal case above. However, the matching is
1091    different. The end of these brackets will always be OP_KETRPOS, which
1092    returns MATCH_KETRPOS without going further in the pattern. By this means
1093    we can handle the group by iteration rather than recursion, thereby
1094    reducing the amount of stack needed. */
1095
1096    case OP_CBRAPOS:
1097    case OP_SCBRAPOS:
1098    allow_zero = FALSE;
1099
1100    POSSESSIVE_CAPTURE:
1101    number = GET2(ecode, 1+LINK_SIZE);
1102    offset = number << 1;
1103
1104#ifdef PCRE_DEBUG
1105    printf("start possessive bracket %d\n", number);
1106    printf("subject=");
1107    pchars(eptr, 16, TRUE, md);
1108    printf("\n");
1109#endif
1110
1111    if (offset < md->offset_max)
1112      {
1113      matched_once = FALSE;
1114      code_offset = (int)(ecode - md->start_code);
1115
1116      save_offset1 = md->offset_vector[offset];
1117      save_offset2 = md->offset_vector[offset+1];
1118      save_offset3 = md->offset_vector[md->offset_end - number];
1119      save_capture_last = md->capture_last;
1120
1121      DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
1122
1123      /* Each time round the loop, save the current subject position for use
1124      when the group matches. For MATCH_MATCH, the group has matched, so we
1125      restart it with a new subject starting position, remembering that we had
1126      at least one match. For MATCH_NOMATCH, carry on with the alternatives, as
1127      usual. If we haven't matched any alternatives in any iteration, check to
1128      see if a previous iteration matched. If so, the group has matched;
1129      continue from afterwards. Otherwise it has failed; restore the previous
1130      capture values before returning NOMATCH. */
1131
1132      for (;;)
1133        {
1134        md->offset_vector[md->offset_end - number] =
1135          (int)(eptr - md->start_subject);
1136        if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;
1137        RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
1138          eptrb, RM63);
1139        if (rrc == MATCH_KETRPOS)
1140          {
1141          offset_top = md->end_offset_top;
1142          eptr = md->end_match_ptr;
1143          ecode = md->start_code + code_offset;
1144          save_capture_last = md->capture_last;
1145          matched_once = TRUE;
1146          continue;
1147          }
1148
1149        /* See comment in the code for capturing groups above about handling
1150        THEN. */
1151
1152        if (rrc == MATCH_THEN)
1153          {
1154          next = ecode + GET(ecode,1);
1155          if (md->start_match_ptr < next &&
1156              (*ecode == OP_ALT || *next == OP_ALT))
1157            rrc = MATCH_NOMATCH;
1158          }
1159
1160        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1161        md->capture_last = save_capture_last;
1162        ecode += GET(ecode, 1);
1163        if (*ecode != OP_ALT) break;
1164        }
1165
1166      if (!matched_once)
1167        {
1168        md->offset_vector[offset] = save_offset1;
1169        md->offset_vector[offset+1] = save_offset2;
1170        md->offset_vector[md->offset_end - number] = save_offset3;
1171        }
1172
1173      if (allow_zero || matched_once)
1174        {
1175        ecode += 1 + LINK_SIZE;
1176        break;
1177        }
1178
1179      RRETURN(MATCH_NOMATCH);
1180      }
1181
1182    /* FALL THROUGH ... Insufficient room for saving captured contents. Treat
1183    as a non-capturing bracket. */
1184
1185    /* VVVVVVVVVVVVVVVVVVVVVVVVV */
1186    /* VVVVVVVVVVVVVVVVVVVVVVVVV */
1187
1188    DPRINTF(("insufficient capture room: treat as non-capturing\n"));
1189
1190    /* VVVVVVVVVVVVVVVVVVVVVVVVV */
1191    /* VVVVVVVVVVVVVVVVVVVVVVVVV */
1192
1193    /* Non-capturing possessive bracket with unlimited repeat. We come here
1194    from BRAZERO with allow_zero = TRUE. The code is similar to the above,
1195    without the capturing complication. It is written out separately for speed
1196    and cleanliness. */
1197
1198    case OP_BRAPOS:
1199    case OP_SBRAPOS:
1200    allow_zero = FALSE;
1201
1202    POSSESSIVE_NON_CAPTURE:
1203    matched_once = FALSE;
1204    code_offset = (int)(ecode - md->start_code);
1205
1206    for (;;)
1207      {
1208      if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;
1209      RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
1210        eptrb, RM48);
1211      if (rrc == MATCH_KETRPOS)
1212        {
1213        offset_top = md->end_offset_top;
1214        eptr = md->end_match_ptr;
1215        ecode = md->start_code + code_offset;
1216        matched_once = TRUE;
1217        continue;
1218        }
1219
1220      /* See comment in the code for capturing groups above about handling
1221      THEN. */
1222
1223      if (rrc == MATCH_THEN)
1224        {
1225        next = ecode + GET(ecode,1);
1226        if (md->start_match_ptr < next &&
1227            (*ecode == OP_ALT || *next == OP_ALT))
1228          rrc = MATCH_NOMATCH;
1229        }
1230
1231      if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1232      ecode += GET(ecode, 1);
1233      if (*ecode != OP_ALT) break;
1234      }
1235
1236    if (matched_once || allow_zero)
1237      {
1238      ecode += 1 + LINK_SIZE;
1239      break;
1240      }
1241    RRETURN(MATCH_NOMATCH);
1242
1243    /* Control never reaches here. */
1244
1245    /* Conditional group: compilation checked that there are no more than
1246    two branches. If the condition is false, skipping the first branch takes us
1247    past the end if there is only one branch, but that's OK because that is
1248    exactly what going to the ket would do. */
1249
1250    case OP_COND:
1251    case OP_SCOND:
1252    codelink = GET(ecode, 1);
1253
1254    /* Because of the way auto-callout works during compile, a callout item is
1255    inserted between OP_COND and an assertion condition. */
1256
1257    if (ecode[LINK_SIZE+1] == OP_CALLOUT)
1258      {
1259      if (PUBL(callout) != NULL)
1260        {
1261        PUBL(callout_block) cb;
1262        cb.version          = 2;   /* Version 1 of the callout block */
1263        cb.callout_number   = ecode[LINK_SIZE+2];
1264        cb.offset_vector    = md->offset_vector;
1265#ifdef COMPILE_PCRE8
1266        cb.subject          = (PCRE_SPTR)md->start_subject;
1267#else
1268        cb.subject          = (PCRE_SPTR16)md->start_subject;
1269#endif
1270        cb.subject_length   = (int)(md->end_subject - md->start_subject);
1271        cb.start_match      = (int)(mstart - md->start_subject);
1272        cb.current_position = (int)(eptr - md->start_subject);
1273        cb.pattern_position = GET(ecode, LINK_SIZE + 3);
1274        cb.next_item_length = GET(ecode, 3 + 2*LINK_SIZE);
1275        cb.capture_top      = offset_top/2;
1276        cb.capture_last     = md->capture_last;
1277        cb.callout_data     = md->callout_data;
1278        cb.mark             = md->nomatch_mark;
1279        if ((rrc = (*PUBL(callout))(&cb)) > 0) RRETURN(MATCH_NOMATCH);
1280        if (rrc < 0) RRETURN(rrc);
1281        }
1282      ecode += PRIV(OP_lengths)[OP_CALLOUT];
1283      }
1284
1285    condcode = ecode[LINK_SIZE+1];
1286
1287    /* Now see what the actual condition is */
1288
1289    if (condcode == OP_RREF || condcode == OP_NRREF)    /* Recursion test */
1290      {
1291      if (md->recursive == NULL)                /* Not recursing => FALSE */
1292        {
1293        condition = FALSE;
1294        ecode += GET(ecode, 1);
1295        }
1296      else
1297        {
1298        int recno = GET2(ecode, LINK_SIZE + 2);   /* Recursion group number*/
1299        condition = (recno == RREF_ANY || recno == md->recursive->group_num);
1300
1301        /* If the test is for recursion into a specific subpattern, and it is
1302        false, but the test was set up by name, scan the table to see if the
1303        name refers to any other numbers, and test them. The condition is true
1304        if any one is set. */
1305
1306        if (!condition && condcode == OP_NRREF)
1307          {
1308          pcre_uchar *slotA = md->name_table;
1309          for (i = 0; i < md->name_count; i++)
1310            {
1311            if (GET2(slotA, 0) == recno) break;
1312            slotA += md->name_entry_size;
1313            }
1314
1315          /* Found a name for the number - there can be only one; duplicate
1316          names for different numbers are allowed, but not vice versa. First
1317          scan down for duplicates. */
1318
1319          if (i < md->name_count)
1320            {
1321            pcre_uchar *slotB = slotA;
1322            while (slotB > md->name_table)
1323              {
1324              slotB -= md->name_entry_size;
1325              if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
1326                {
1327                condition = GET2(slotB, 0) == md->recursive->group_num;
1328                if (condition) break;
1329                }
1330              else break;
1331              }
1332
1333            /* Scan up for duplicates */
1334
1335            if (!condition)
1336              {
1337              slotB = slotA;
1338              for (i++; i < md->name_count; i++)
1339                {
1340                slotB += md->name_entry_size;
1341                if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
1342                  {
1343                  condition = GET2(slotB, 0) == md->recursive->group_num;
1344                  if (condition) break;
1345                  }
1346                else break;
1347                }
1348              }
1349            }
1350          }
1351
1352        /* Chose branch according to the condition */
1353
1354        ecode += condition? 1 + IMM2_SIZE : GET(ecode, 1);
1355        }
1356      }
1357
1358    else if (condcode == OP_CREF || condcode == OP_NCREF)  /* Group used test */
1359      {
1360      offset = GET2(ecode, LINK_SIZE+2) << 1;  /* Doubled ref number */
1361      condition = offset < offset_top && md->offset_vector[offset] >= 0;
1362
1363      /* If the numbered capture is unset, but the reference was by name,
1364      scan the table to see if the name refers to any other numbers, and test
1365      them. The condition is true if any one is set. This is tediously similar
1366      to the code above, but not close enough to try to amalgamate. */
1367
1368      if (!condition && condcode == OP_NCREF)
1369        {
1370        int refno = offset >> 1;
1371        pcre_uchar *slotA = md->name_table;
1372
1373        for (i = 0; i < md->name_count; i++)
1374          {
1375          if (GET2(slotA, 0) == refno) break;
1376          slotA += md->name_entry_size;
1377          }
1378
1379        /* Found a name for the number - there can be only one; duplicate names
1380        for different numbers are allowed, but not vice versa. First scan down
1381        for duplicates. */
1382
1383        if (i < md->name_count)
1384          {
1385          pcre_uchar *slotB = slotA;
1386          while (slotB > md->name_table)
1387            {
1388            slotB -= md->name_entry_size;
1389            if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
1390              {
1391              offset = GET2(slotB, 0) << 1;
1392              condition = offset < offset_top &&
1393                md->offset_vector[offset] >= 0;
1394              if (condition) break;
1395              }
1396            else break;
1397            }
1398
1399          /* Scan up for duplicates */
1400
1401          if (!condition)
1402            {
1403            slotB = slotA;
1404            for (i++; i < md->name_count; i++)
1405              {
1406              slotB += md->name_entry_size;
1407              if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
1408                {
1409                offset = GET2(slotB, 0) << 1;
1410                condition = offset < offset_top &&
1411                  md->offset_vector[offset] >= 0;
1412                if (condition) break;
1413                }
1414              else break;
1415              }
1416            }
1417          }
1418        }
1419
1420      /* Chose branch according to the condition */
1421
1422      ecode += condition? 1 + IMM2_SIZE : GET(ecode, 1);
1423      }
1424
1425    else if (condcode == OP_DEF)     /* DEFINE - always false */
1426      {
1427      condition = FALSE;
1428      ecode += GET(ecode, 1);
1429      }
1430
1431    /* The condition is an assertion. Call match() to evaluate it - setting
1432    md->match_function_type to MATCH_CONDASSERT causes it to stop at the end of
1433    an assertion. */
1434
1435    else
1436      {
1437      md->match_function_type = MATCH_CONDASSERT;
1438      RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, RM3);
1439      if (rrc == MATCH_MATCH)
1440        {
1441        if (md->end_offset_top > offset_top)
1442          offset_top = md->end_offset_top;  /* Captures may have happened */
1443        condition = TRUE;
1444        ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);
1445        while (*ecode == OP_ALT) ecode += GET(ecode, 1);
1446        }
1447
1448      /* PCRE doesn't allow the effect of (*THEN) to escape beyond an
1449      assertion; it is therefore treated as NOMATCH. */
1450
1451      else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
1452        {
1453        RRETURN(rrc);         /* Need braces because of following else */
1454        }
1455      else
1456        {
1457        condition = FALSE;
1458        ecode += codelink;
1459        }
1460      }
1461
1462    /* We are now at the branch that is to be obeyed. As there is only one, can
1463    use tail recursion to avoid using another stack frame, except when there is
1464    unlimited repeat of a possibly empty group. In the latter case, a recursive
1465    call to match() is always required, unless the second alternative doesn't
1466    exist, in which case we can just plough on. Note that, for compatibility
1467    with Perl, the | in a conditional group is NOT treated as creating two
1468    alternatives. If a THEN is encountered in the branch, it propagates out to
1469    the enclosing alternative (unless nested in a deeper set of alternatives,
1470    of course). */
1471
1472    if (condition || *ecode == OP_ALT)
1473      {
1474      if (op != OP_SCOND)
1475        {
1476        ecode += 1 + LINK_SIZE;
1477        goto TAIL_RECURSE;
1478        }
1479
1480      md->match_function_type = MATCH_CBEGROUP;
1481      RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM49);
1482      RRETURN(rrc);
1483      }
1484
1485     /* Condition false & no alternative; continue after the group. */
1486
1487    else
1488      {
1489      ecode += 1 + LINK_SIZE;
1490      }
1491    break;
1492
1493
1494    /* Before OP_ACCEPT there may be any number of OP_CLOSE opcodes,
1495    to close any currently open capturing brackets. */
1496
1497    case OP_CLOSE:
1498    number = GET2(ecode, 1);
1499    offset = number << 1;
1500
1501#ifdef PCRE_DEBUG
1502      printf("end bracket %d at *ACCEPT", number);
1503      printf("\n");
1504#endif
1505
1506    md->capture_last = number;
1507    if (offset >= md->offset_max) md->offset_overflow = TRUE; else
1508      {
1509      md->offset_vector[offset] =
1510        md->offset_vector[md->offset_end - number];
1511      md->offset_vector[offset+1] = (int)(eptr - md->start_subject);
1512      if (offset_top <= offset) offset_top = offset + 2;
1513      }
1514    ecode += 1 + IMM2_SIZE;
1515    break;
1516
1517
1518    /* End of the pattern, either real or forced. */
1519
1520    case OP_END:
1521    case OP_ACCEPT:
1522    case OP_ASSERT_ACCEPT:
1523
1524    /* If we have matched an empty string, fail if not in an assertion and not
1525    in a recursion if either PCRE_NOTEMPTY is set, or if PCRE_NOTEMPTY_ATSTART
1526    is set and we have matched at the start of the subject. In both cases,
1527    backtracking will then try other alternatives, if any. */
1528
1529    if (eptr == mstart && op != OP_ASSERT_ACCEPT &&
1530         md->recursive == NULL &&
1531         (md->notempty ||
1532           (md->notempty_atstart &&
1533             mstart == md->start_subject + md->start_offset)))
1534      RRETURN(MATCH_NOMATCH);
1535
1536    /* Otherwise, we have a match. */
1537
1538    md->end_match_ptr = eptr;           /* Record where we ended */
1539    md->end_offset_top = offset_top;    /* and how many extracts were taken */
1540    md->start_match_ptr = mstart;       /* and the start (\K can modify) */
1541
1542    /* For some reason, the macros don't work properly if an expression is
1543    given as the argument to RRETURN when the heap is in use. */
1544
1545    rrc = (op == OP_END)? MATCH_MATCH : MATCH_ACCEPT;
1546    RRETURN(rrc);
1547
1548    /* Assertion brackets. Check the alternative branches in turn - the
1549    matching won't pass the KET for an assertion. If any one branch matches,
1550    the assertion is true. Lookbehind assertions have an OP_REVERSE item at the
1551    start of each branch to move the current point backwards, so the code at
1552    this level is identical to the lookahead case. When the assertion is part
1553    of a condition, we want to return immediately afterwards. The caller of
1554    this incarnation of the match() function will have set MATCH_CONDASSERT in
1555    md->match_function type, and one of these opcodes will be the first opcode
1556    that is processed. We use a local variable that is preserved over calls to
1557    match() to remember this case. */
1558
1559    case OP_ASSERT:
1560    case OP_ASSERTBACK:
1561    save_mark = md->mark;
1562    if (md->match_function_type == MATCH_CONDASSERT)
1563      {
1564      condassert = TRUE;
1565      md->match_function_type = 0;
1566      }
1567    else condassert = FALSE;
1568
1569    do
1570      {
1571      RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, RM4);
1572      if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
1573        {
1574        mstart = md->start_match_ptr;   /* In case \K reset it */
1575        break;
1576        }
1577      md->mark = save_mark;
1578
1579      /* A COMMIT failure must fail the entire assertion, without trying any
1580      subsequent branches. */
1581
1582      if (rrc == MATCH_COMMIT) RRETURN(MATCH_NOMATCH);
1583
1584      /* PCRE does not allow THEN to escape beyond an assertion; it
1585      is treated as NOMATCH. */
1586
1587      if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
1588      ecode += GET(ecode, 1);
1589      }
1590    while (*ecode == OP_ALT);
1591
1592    if (*ecode == OP_KET) RRETURN(MATCH_NOMATCH);
1593
1594    /* If checking an assertion for a condition, return MATCH_MATCH. */
1595
1596    if (condassert) RRETURN(MATCH_MATCH);
1597
1598    /* Continue from after the assertion, updating the offsets high water
1599    mark, since extracts may have been taken during the assertion. */
1600
1601    do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1602    ecode += 1 + LINK_SIZE;
1603    offset_top = md->end_offset_top;
1604    continue;
1605
1606    /* Negative assertion: all branches must fail to match. Encountering SKIP,
1607    PRUNE, or COMMIT means we must assume failure without checking subsequent
1608    branches. */
1609
1610    case OP_ASSERT_NOT:
1611    case OP_ASSERTBACK_NOT:
1612    save_mark = md->mark;
1613    if (md->match_function_type == MATCH_CONDASSERT)
1614      {
1615      condassert = TRUE;
1616      md->match_function_type = 0;
1617      }
1618    else condassert = FALSE;
1619
1620    do
1621      {
1622      RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, RM5);
1623      md->mark = save_mark;
1624      if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) RRETURN(MATCH_NOMATCH);
1625      if (rrc == MATCH_SKIP || rrc == MATCH_PRUNE || rrc == MATCH_COMMIT)
1626        {
1627        do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1628        break;
1629        }
1630
1631      /* PCRE does not allow THEN to escape beyond an assertion; it is treated
1632      as NOMATCH. */
1633
1634      if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
1635      ecode += GET(ecode,1);
1636      }
1637    while (*ecode == OP_ALT);
1638
1639    if (condassert) RRETURN(MATCH_MATCH);  /* Condition assertion */
1640
1641    ecode += 1 + LINK_SIZE;
1642    continue;
1643
1644    /* Move the subject pointer back. This occurs only at the start of
1645    each branch of a lookbehind assertion. If we are too close to the start to
1646    move back, this match function fails. When working with UTF-8 we move
1647    back a number of characters, not bytes. */
1648
1649    case OP_REVERSE:
1650#ifdef SUPPORT_UTF
1651    if (utf)
1652      {
1653      i = GET(ecode, 1);
1654      while (i-- > 0)
1655        {
1656        eptr--;
1657        if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
1658        BACKCHAR(eptr);
1659        }
1660      }
1661    else
1662#endif
1663
1664    /* No UTF-8 support, or not in UTF-8 mode: count is byte count */
1665
1666      {
1667      eptr -= GET(ecode, 1);
1668      if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
1669      }
1670
1671    /* Save the earliest consulted character, then skip to next op code */
1672
1673    if (eptr < md->start_used_ptr) md->start_used_ptr = eptr;
1674    ecode += 1 + LINK_SIZE;
1675    break;
1676
1677    /* The callout item calls an external function, if one is provided, passing
1678    details of the match so far. This is mainly for debugging, though the
1679    function is able to force a failure. */
1680
1681    case OP_CALLOUT:
1682    if (PUBL(callout) != NULL)
1683      {
1684      PUBL(callout_block) cb;
1685      cb.version          = 2;   /* Version 1 of the callout block */
1686      cb.callout_number   = ecode[1];
1687      cb.offset_vector    = md->offset_vector;
1688#ifdef COMPILE_PCRE8
1689      cb.subject          = (PCRE_SPTR)md->start_subject;
1690#else
1691      cb.subject          = (PCRE_SPTR16)md->start_subject;
1692#endif
1693      cb.subject_length   = (int)(md->end_subject - md->start_subject);
1694      cb.start_match      = (int)(mstart - md->start_subject);
1695      cb.current_position = (int)(eptr - md->start_subject);
1696      cb.pattern_position = GET(ecode, 2);
1697      cb.next_item_length = GET(ecode, 2 + LINK_SIZE);
1698      cb.capture_top      = offset_top/2;
1699      cb.capture_last     = md->capture_last;
1700      cb.callout_data     = md->callout_data;
1701      cb.mark             = md->nomatch_mark;
1702      if ((rrc = (*PUBL(callout))(&cb)) > 0) RRETURN(MATCH_NOMATCH);
1703      if (rrc < 0) RRETURN(rrc);
1704      }
1705    ecode += 2 + 2*LINK_SIZE;
1706    break;
1707
1708    /* Recursion either matches the current regex, or some subexpression. The
1709    offset data is the offset to the starting bracket from the start of the
1710    whole pattern. (This is so that it works from duplicated subpatterns.)
1711
1712    The state of the capturing groups is preserved over recursion, and
1713    re-instated afterwards. We don't know how many are started and not yet
1714    finished (offset_top records the completed total) so we just have to save
1715    all the potential data. There may be up to 65535 such values, which is too
1716    large to put on the stack, but using malloc for small numbers seems
1717    expensive. As a compromise, the stack is used when there are no more than
1718    REC_STACK_SAVE_MAX values to store; otherwise malloc is used.
1719
1720    There are also other values that have to be saved. We use a chained
1721    sequence of blocks that actually live on the stack. Thanks to Robin Houston
1722    for the original version of this logic. It has, however, been hacked around
1723    a lot, so he is not to blame for the current way it works. */
1724
1725    case OP_RECURSE:
1726      {
1727      recursion_info *ri;
1728      int recno;
1729
1730      callpat = md->start_code + GET(ecode, 1);
1731      recno = (callpat == md->start_code)? 0 :
1732        GET2(callpat, 1 + LINK_SIZE);
1733
1734      /* Check for repeating a recursion without advancing the subject pointer.
1735      This should catch convoluted mutual recursions. (Some simple cases are
1736      caught at compile time.) */
1737
1738      for (ri = md->recursive; ri != NULL; ri = ri->prevrec)
1739        if (recno == ri->group_num && eptr == ri->subject_position)
1740          RRETURN(PCRE_ERROR_RECURSELOOP);
1741
1742      /* Add to "recursing stack" */
1743
1744      new_recursive.group_num = recno;
1745      new_recursive.subject_position = eptr;
1746      new_recursive.prevrec = md->recursive;
1747      md->recursive = &new_recursive;
1748
1749      /* Where to continue from afterwards */
1750
1751      ecode += 1 + LINK_SIZE;
1752
1753      /* Now save the offset data */
1754
1755      new_recursive.saved_max = md->offset_end;
1756      if (new_recursive.saved_max <= REC_STACK_SAVE_MAX)
1757        new_recursive.offset_save = stacksave;
1758      else
1759        {
1760        new_recursive.offset_save =
1761          (int *)(PUBL(malloc))(new_recursive.saved_max * sizeof(int));
1762        if (new_recursive.offset_save == NULL) RRETURN(PCRE_ERROR_NOMEMORY);
1763        }
1764      memcpy(new_recursive.offset_save, md->offset_vector,
1765            new_recursive.saved_max * sizeof(int));
1766
1767      /* OK, now we can do the recursion. After processing each alternative,
1768      restore the offset data. If there were nested recursions, md->recursive
1769      might be changed, so reset it before looping. */
1770
1771      DPRINTF(("Recursing into group %d\n", new_recursive.group_num));
1772      cbegroup = (*callpat >= OP_SBRA);
1773      do
1774        {
1775        if (cbegroup) md->match_function_type = MATCH_CBEGROUP;
1776        RMATCH(eptr, callpat + PRIV(OP_lengths)[*callpat], offset_top,
1777          md, eptrb, RM6);
1778        memcpy(md->offset_vector, new_recursive.offset_save,
1779            new_recursive.saved_max * sizeof(int));
1780        md->recursive = new_recursive.prevrec;
1781        if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
1782          {
1783          DPRINTF(("Recursion matched\n"));
1784          if (new_recursive.offset_save != stacksave)
1785            (PUBL(free))(new_recursive.offset_save);
1786
1787          /* Set where we got to in the subject, and reset the start in case
1788          it was changed by \K. This *is* propagated back out of a recursion,
1789          for Perl compatibility. */
1790
1791          eptr = md->end_match_ptr;
1792          mstart = md->start_match_ptr;
1793          goto RECURSION_MATCHED;        /* Exit loop; end processing */
1794          }
1795
1796        /* PCRE does not allow THEN or COMMIT to escape beyond a recursion; it
1797        is treated as NOMATCH. */
1798
1799        else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN &&
1800                 rrc != MATCH_COMMIT)
1801          {
1802          DPRINTF(("Recursion gave error %d\n", rrc));
1803          if (new_recursive.offset_save != stacksave)
1804            (PUBL(free))(new_recursive.offset_save);
1805          RRETURN(rrc);
1806          }
1807
1808        md->recursive = &new_recursive;
1809        callpat += GET(callpat, 1);
1810        }
1811      while (*callpat == OP_ALT);
1812
1813      DPRINTF(("Recursion didn't match\n"));
1814      md->recursive = new_recursive.prevrec;
1815      if (new_recursive.offset_save != stacksave)
1816        (PUBL(free))(new_recursive.offset_save);
1817      RRETURN(MATCH_NOMATCH);
1818      }
1819
1820    RECURSION_MATCHED:
1821    break;
1822
1823    /* An alternation is the end of a branch; scan along to find the end of the
1824    bracketed group and go to there. */
1825
1826    case OP_ALT:
1827    do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1828    break;
1829
1830    /* BRAZERO, BRAMINZERO and SKIPZERO occur just before a bracket group,
1831    indicating that it may occur zero times. It may repeat infinitely, or not
1832    at all - i.e. it could be ()* or ()? or even (){0} in the pattern. Brackets
1833    with fixed upper repeat limits are compiled as a number of copies, with the
1834    optional ones preceded by BRAZERO or BRAMINZERO. */
1835
1836    case OP_BRAZERO:
1837    next = ecode + 1;
1838    RMATCH(eptr, next, offset_top, md, eptrb, RM10);
1839    if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1840    do next += GET(next, 1); while (*next == OP_ALT);
1841    ecode = next + 1 + LINK_SIZE;
1842    break;
1843
1844    case OP_BRAMINZERO:
1845    next = ecode + 1;
1846    do next += GET(next, 1); while (*next == OP_ALT);
1847    RMATCH(eptr, next + 1+LINK_SIZE, offset_top, md, eptrb, RM11);
1848    if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1849    ecode++;
1850    break;
1851
1852    case OP_SKIPZERO:
1853    next = ecode+1;
1854    do next += GET(next,1); while (*next == OP_ALT);
1855    ecode = next + 1 + LINK_SIZE;
1856    break;
1857
1858    /* BRAPOSZERO occurs before a possessive bracket group. Don't do anything
1859    here; just jump to the group, with allow_zero set TRUE. */
1860
1861    case OP_BRAPOSZERO:
1862    op = *(++ecode);
1863    allow_zero = TRUE;
1864    if (op == OP_CBRAPOS || op == OP_SCBRAPOS) goto POSSESSIVE_CAPTURE;
1865      goto POSSESSIVE_NON_CAPTURE;
1866
1867    /* End of a group, repeated or non-repeating. */
1868
1869    case OP_KET:
1870    case OP_KETRMIN:
1871    case OP_KETRMAX:
1872    case OP_KETRPOS:
1873    prev = ecode - GET(ecode, 1);
1874
1875    /* If this was a group that remembered the subject start, in order to break
1876    infinite repeats of empty string matches, retrieve the subject start from
1877    the chain. Otherwise, set it NULL. */
1878
1879    if (*prev >= OP_SBRA || *prev == OP_ONCE)
1880      {
1881      saved_eptr = eptrb->epb_saved_eptr;   /* Value at start of group */
1882      eptrb = eptrb->epb_prev;              /* Backup to previous group */
1883      }
1884    else saved_eptr = NULL;
1885
1886    /* If we are at the end of an assertion group or a non-capturing atomic
1887    group, stop matching and return MATCH_MATCH, but record the current high
1888    water mark for use by positive assertions. We also need to record the match
1889    start in case it was changed by \K. */
1890
1891    if ((*prev >= OP_ASSERT && *prev <= OP_ASSERTBACK_NOT) ||
1892         *prev == OP_ONCE_NC)
1893      {
1894      md->end_match_ptr = eptr;      /* For ONCE_NC */
1895      md->end_offset_top = offset_top;
1896      md->start_match_ptr = mstart;
1897      RRETURN(MATCH_MATCH);         /* Sets md->mark */
1898      }
1899
1900    /* For capturing groups we have to check the group number back at the start
1901    and if necessary complete handling an extraction by setting the offsets and
1902    bumping the high water mark. Whole-pattern recursion is coded as a recurse
1903    into group 0, so it won't be picked up here. Instead, we catch it when the
1904    OP_END is reached. Other recursion is handled here. We just have to record
1905    the current subject position and start match pointer and give a MATCH
1906    return. */
1907
1908    if (*prev == OP_CBRA || *prev == OP_SCBRA ||
1909        *prev == OP_CBRAPOS || *prev == OP_SCBRAPOS)
1910      {
1911      number = GET2(prev, 1+LINK_SIZE);
1912      offset = number << 1;
1913
1914#ifdef PCRE_DEBUG
1915      printf("end bracket %d", number);
1916      printf("\n");
1917#endif
1918
1919      /* Handle a recursively called group. */
1920
1921      if (md->recursive != NULL && md->recursive->group_num == number)
1922        {
1923        md->end_match_ptr = eptr;
1924        md->start_match_ptr = mstart;
1925        RRETURN(MATCH_MATCH);
1926        }
1927
1928      /* Deal with capturing */
1929
1930      md->capture_last = number;
1931      if (offset >= md->offset_max) md->offset_overflow = TRUE; else
1932        {
1933        /* If offset is greater than offset_top, it means that we are
1934        "skipping" a capturing group, and that group's offsets must be marked
1935        unset. In earlier versions of PCRE, all the offsets were unset at the
1936        start of matching, but this doesn't work because atomic groups and
1937        assertions can cause a value to be set that should later be unset.
1938        Example: matching /(?>(a))b|(a)c/ against "ac". This sets group 1 as
1939        part of the atomic group, but this is not on the final matching path,
1940        so must be unset when 2 is set. (If there is no group 2, there is no
1941        problem, because offset_top will then be 2, indicating no capture.) */
1942
1943        if (offset > offset_top)
1944          {
1945          register int *iptr = md->offset_vector + offset_top;
1946          register int *iend = md->offset_vector + offset;
1947          while (iptr < iend) *iptr++ = -1;
1948          }
1949
1950        /* Now make the extraction */
1951
1952        md->offset_vector[offset] =
1953          md->offset_vector[md->offset_end - number];
1954        md->offset_vector[offset+1] = (int)(eptr - md->start_subject);
1955        if (offset_top <= offset) offset_top = offset + 2;
1956        }
1957      }
1958
1959    /* For an ordinary non-repeating ket, just continue at this level. This
1960    also happens for a repeating ket if no characters were matched in the
1961    group. This is the forcible breaking of infinite loops as implemented in
1962    Perl 5.005. For a non-repeating atomic group that includes captures,
1963    establish a backup point by processing the rest of the pattern at a lower
1964    level. If this results in a NOMATCH return, pass MATCH_ONCE back to the
1965    original OP_ONCE level, thereby bypassing intermediate backup points, but
1966    resetting any captures that happened along the way. */
1967
1968    if (*ecode == OP_KET || eptr == saved_eptr)
1969      {
1970      if (*prev == OP_ONCE)
1971        {
1972        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM12);
1973        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1974        md->once_target = prev;  /* Level at which to change to MATCH_NOMATCH */
1975        RRETURN(MATCH_ONCE);
1976        }
1977      ecode += 1 + LINK_SIZE;    /* Carry on at this level */
1978      break;
1979      }
1980
1981    /* OP_KETRPOS is a possessive repeating ket. Remember the current position,
1982    and return the MATCH_KETRPOS. This makes it possible to do the repeats one
1983    at a time from the outer level, thus saving stack. */
1984
1985    if (*ecode == OP_KETRPOS)
1986      {
1987      md->end_match_ptr = eptr;
1988      md->end_offset_top = offset_top;
1989      RRETURN(MATCH_KETRPOS);
1990      }
1991
1992    /* The normal repeating kets try the rest of the pattern or restart from
1993    the preceding bracket, in the appropriate order. In the second case, we can
1994    use tail recursion to avoid using another stack frame, unless we have an
1995    an atomic group or an unlimited repeat of a group that can match an empty
1996    string. */
1997
1998    if (*ecode == OP_KETRMIN)
1999      {
2000      RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM7);
2001      if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2002      if (*prev == OP_ONCE)
2003        {
2004        RMATCH(eptr, prev, offset_top, md, eptrb, RM8);
2005        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2006        md->once_target = prev;  /* Level at which to change to MATCH_NOMATCH */
2007        RRETURN(MATCH_ONCE);
2008        }
2009      if (*prev >= OP_SBRA)    /* Could match an empty string */
2010        {
2011        RMATCH(eptr, prev, offset_top, md, eptrb, RM50);
2012        RRETURN(rrc);
2013        }
2014      ecode = prev;
2015      goto TAIL_RECURSE;
2016      }
2017    else  /* OP_KETRMAX */
2018      {
2019      RMATCH(eptr, prev, offset_top, md, eptrb, RM13);
2020      if (rrc == MATCH_ONCE && md->once_target == prev) rrc = MATCH_NOMATCH;
2021      if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2022      if (*prev == OP_ONCE)
2023        {
2024        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM9);
2025        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2026        md->once_target = prev;
2027        RRETURN(MATCH_ONCE);
2028        }
2029      ecode += 1 + LINK_SIZE;
2030      goto TAIL_RECURSE;
2031      }
2032    /* Control never gets here */
2033
2034    /* Not multiline mode: start of subject assertion, unless notbol. */
2035
2036    case OP_CIRC:
2037    if (md->notbol && eptr == md->start_subject) RRETURN(MATCH_NOMATCH);
2038
2039    /* Start of subject assertion */
2040
2041    case OP_SOD:
2042    if (eptr != md->start_subject) RRETURN(MATCH_NOMATCH);
2043    ecode++;
2044    break;
2045
2046    /* Multiline mode: start of subject unless notbol, or after any newline. */
2047
2048    case OP_CIRCM:
2049    if (md->notbol && eptr == md->start_subject) RRETURN(MATCH_NOMATCH);
2050    if (eptr != md->start_subject &&
2051        (eptr == md->end_subject || !WAS_NEWLINE(eptr)))
2052      RRETURN(MATCH_NOMATCH);
2053    ecode++;
2054    break;
2055
2056    /* Start of match assertion */
2057
2058    case OP_SOM:
2059    if (eptr != md->start_subject + md->start_offset) RRETURN(MATCH_NOMATCH);
2060    ecode++;
2061    break;
2062
2063    /* Reset the start of match point */
2064
2065    case OP_SET_SOM:
2066    mstart = eptr;
2067    ecode++;
2068    break;
2069
2070    /* Multiline mode: assert before any newline, or before end of subject
2071    unless noteol is set. */
2072
2073    case OP_DOLLM:
2074    if (eptr < md->end_subject)
2075      {
2076      if (!IS_NEWLINE(eptr))
2077        {
2078        if (md->partial != 0 &&
2079            eptr + 1 >= md->end_subject &&
2080            NLBLOCK->nltype == NLTYPE_FIXED &&
2081            NLBLOCK->nllen == 2 &&
2082            *eptr == NLBLOCK->nl[0])
2083          {
2084          md->hitend = TRUE;
2085          if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
2086          }
2087        RRETURN(MATCH_NOMATCH);
2088        }
2089      }
2090    else
2091      {
2092      if (md->noteol) RRETURN(MATCH_NOMATCH);
2093      SCHECK_PARTIAL();
2094      }
2095    ecode++;
2096    break;
2097
2098    /* Not multiline mode: assert before a terminating newline or before end of
2099    subject unless noteol is set. */
2100
2101    case OP_DOLL:
2102    if (md->noteol) RRETURN(MATCH_NOMATCH);
2103    if (!md->endonly) goto ASSERT_NL_OR_EOS;
2104
2105    /* ... else fall through for endonly */
2106
2107    /* End of subject assertion (\z) */
2108
2109    case OP_EOD:
2110    if (eptr < md->end_subject) RRETURN(MATCH_NOMATCH);
2111    SCHECK_PARTIAL();
2112    ecode++;
2113    break;
2114
2115    /* End of subject or ending \n assertion (\Z) */
2116
2117    case OP_EODN:
2118    ASSERT_NL_OR_EOS:
2119    if (eptr < md->end_subject &&
2120        (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
2121      {
2122      if (md->partial != 0 &&
2123          eptr + 1 >= md->end_subject &&
2124          NLBLOCK->nltype == NLTYPE_FIXED &&
2125          NLBLOCK->nllen == 2 &&
2126          *eptr == NLBLOCK->nl[0])
2127        {
2128        md->hitend = TRUE;
2129        if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
2130        }
2131      RRETURN(MATCH_NOMATCH);
2132      }
2133
2134    /* Either at end of string or \n before end. */
2135
2136    SCHECK_PARTIAL();
2137    ecode++;
2138    break;
2139
2140    /* Word boundary assertions */
2141
2142    case OP_NOT_WORD_BOUNDARY:
2143    case OP_WORD_BOUNDARY:
2144      {
2145
2146      /* Find out if the previous and current characters are "word" characters.
2147      It takes a bit more work in UTF-8 mode. Characters > 255 are assumed to
2148      be "non-word" characters. Remember the earliest consulted character for
2149      partial matching. */
2150
2151#ifdef SUPPORT_UTF
2152      if (utf)
2153        {
2154        /* Get status of previous character */
2155
2156        if (eptr == md->start_subject) prev_is_word = FALSE; else
2157          {
2158          PCRE_PUCHAR lastptr = eptr - 1;
2159          BACKCHAR(lastptr);
2160          if (lastptr < md->start_used_ptr) md->start_used_ptr = lastptr;
2161          GETCHAR(c, lastptr);
2162#ifdef SUPPORT_UCP
2163          if (md->use_ucp)
2164            {
2165            if (c == '_') prev_is_word = TRUE; else
2166              {
2167              int cat = UCD_CATEGORY(c);
2168              prev_is_word = (cat == ucp_L || cat == ucp_N);
2169              }
2170            }
2171          else
2172#endif
2173          prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
2174          }
2175
2176        /* Get status of next character */
2177
2178        if (eptr >= md->end_subject)
2179          {
2180          SCHECK_PARTIAL();
2181          cur_is_word = FALSE;
2182          }
2183        else
2184          {
2185          GETCHAR(c, eptr);
2186#ifdef SUPPORT_UCP
2187          if (md->use_ucp)
2188            {
2189            if (c == '_') cur_is_word = TRUE; else
2190              {
2191              int cat = UCD_CATEGORY(c);
2192              cur_is_word = (cat == ucp_L || cat == ucp_N);
2193              }
2194            }
2195          else
2196#endif
2197          cur_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
2198          }
2199        }
2200      else
2201#endif
2202
2203      /* Not in UTF-8 mode, but we may still have PCRE_UCP set, and for
2204      consistency with the behaviour of \w we do use it in this case. */
2205
2206        {
2207        /* Get status of previous character */
2208
2209        if (eptr == md->start_subject) prev_is_word = FALSE; else
2210          {
2211          if (eptr <= md->start_used_ptr) md->start_used_ptr = eptr - 1;
2212#ifdef SUPPORT_UCP
2213          if (md->use_ucp)
2214            {
2215            c = eptr[-1];
2216            if (c == '_') prev_is_word = TRUE; else
2217              {
2218              int cat = UCD_CATEGORY(c);
2219              prev_is_word = (cat == ucp_L || cat == ucp_N);
2220              }
2221            }
2222          else
2223#endif
2224          prev_is_word = MAX_255(eptr[-1])
2225            && ((md->ctypes[eptr[-1]] & ctype_word) != 0);
2226          }
2227
2228        /* Get status of next character */
2229
2230        if (eptr >= md->end_subject)
2231          {
2232          SCHECK_PARTIAL();
2233          cur_is_word = FALSE;
2234          }
2235        else
2236#ifdef SUPPORT_UCP
2237        if (md->use_ucp)
2238          {
2239          c = *eptr;
2240          if (c == '_') cur_is_word = TRUE; else
2241            {
2242            int cat = UCD_CATEGORY(c);
2243            cur_is_word = (cat == ucp_L || cat == ucp_N);
2244            }
2245          }
2246        else
2247#endif
2248        cur_is_word = MAX_255(*eptr)
2249          && ((md->ctypes[*eptr] & ctype_word) != 0);
2250        }
2251
2252      /* Now see if the situation is what we want */
2253
2254      if ((*ecode++ == OP_WORD_BOUNDARY)?
2255           cur_is_word == prev_is_word : cur_is_word != prev_is_word)
2256        RRETURN(MATCH_NOMATCH);
2257      }
2258    break;
2259
2260    /* Match any single character type except newline; have to take care with
2261    CRLF newlines and partial matching. */
2262
2263    case OP_ANY:
2264    if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
2265    if (md->partial != 0 &&
2266        eptr + 1 >= md->end_subject &&
2267        NLBLOCK->nltype == NLTYPE_FIXED &&
2268        NLBLOCK->nllen == 2 &&
2269        *eptr == NLBLOCK->nl[0])
2270      {
2271      md->hitend = TRUE;
2272      if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
2273      }
2274
2275    /* Fall through */
2276
2277    /* Match any single character whatsoever. */
2278
2279    case OP_ALLANY:
2280    if (eptr >= md->end_subject)   /* DO NOT merge the eptr++ here; it must */
2281      {                            /* not be updated before SCHECK_PARTIAL. */
2282      SCHECK_PARTIAL();
2283      RRETURN(MATCH_NOMATCH);
2284      }
2285    eptr++;
2286#ifdef SUPPORT_UTF
2287    if (utf) ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
2288#endif
2289    ecode++;
2290    break;
2291
2292    /* Match a single byte, even in UTF-8 mode. This opcode really does match
2293    any byte, even newline, independent of the setting of PCRE_DOTALL. */
2294
2295    case OP_ANYBYTE:
2296    if (eptr >= md->end_subject)   /* DO NOT merge the eptr++ here; it must */
2297      {                            /* not be updated before SCHECK_PARTIAL. */
2298      SCHECK_PARTIAL();
2299      RRETURN(MATCH_NOMATCH);
2300      }
2301    eptr++;
2302    ecode++;
2303    break;
2304
2305    case OP_NOT_DIGIT:
2306    if (eptr >= md->end_subject)
2307      {
2308      SCHECK_PARTIAL();
2309      RRETURN(MATCH_NOMATCH);
2310      }
2311    GETCHARINCTEST(c, eptr);
2312    if (
2313#if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
2314       c < 256 &&
2315#endif
2316       (md->ctypes[c] & ctype_digit) != 0
2317       )
2318      RRETURN(MATCH_NOMATCH);
2319    ecode++;
2320    break;
2321
2322    case OP_DIGIT:
2323    if (eptr >= md->end_subject)
2324      {
2325      SCHECK_PARTIAL();
2326      RRETURN(MATCH_NOMATCH);
2327      }
2328    GETCHARINCTEST(c, eptr);
2329    if (
2330#if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
2331       c > 255 ||
2332#endif
2333       (md->ctypes[c] & ctype_digit) == 0
2334       )
2335      RRETURN(MATCH_NOMATCH);
2336    ecode++;
2337    break;
2338
2339    case OP_NOT_WHITESPACE:
2340    if (eptr >= md->end_subject)
2341      {
2342      SCHECK_PARTIAL();
2343      RRETURN(MATCH_NOMATCH);
2344      }
2345    GETCHARINCTEST(c, eptr);
2346    if (
2347#if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
2348       c < 256 &&
2349#endif
2350       (md->ctypes[c] & ctype_space) != 0
2351       )
2352      RRETURN(MATCH_NOMATCH);
2353    ecode++;
2354    break;
2355
2356    case OP_WHITESPACE:
2357    if (eptr >= md->end_subject)
2358      {
2359      SCHECK_PARTIAL();
2360      RRETURN(MATCH_NOMATCH);
2361      }
2362    GETCHARINCTEST(c, eptr);
2363    if (
2364#if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
2365       c > 255 ||
2366#endif
2367       (md->ctypes[c] & ctype_space) == 0
2368       )
2369      RRETURN(MATCH_NOMATCH);
2370    ecode++;
2371    break;
2372
2373    case OP_NOT_WORDCHAR:
2374    if (eptr >= md->end_subject)
2375      {
2376      SCHECK_PARTIAL();
2377      RRETURN(MATCH_NOMATCH);
2378      }
2379    GETCHARINCTEST(c, eptr);
2380    if (
2381#if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
2382       c < 256 &&
2383#endif
2384       (md->ctypes[c] & ctype_word) != 0
2385       )
2386      RRETURN(MATCH_NOMATCH);
2387    ecode++;
2388    break;
2389
2390    case OP_WORDCHAR:
2391    if (eptr >= md->end_subject)
2392      {
2393      SCHECK_PARTIAL();
2394      RRETURN(MATCH_NOMATCH);
2395      }
2396    GETCHARINCTEST(c, eptr);
2397    if (
2398#if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
2399       c > 255 ||
2400#endif
2401       (md->ctypes[c] & ctype_word) == 0
2402       )
2403      RRETURN(MATCH_NOMATCH);
2404    ecode++;
2405    break;
2406
2407    case OP_ANYNL:
2408    if (eptr >= md->end_subject)
2409      {
2410      SCHECK_PARTIAL();
2411      RRETURN(MATCH_NOMATCH);
2412      }
2413    GETCHARINCTEST(c, eptr);
2414    switch(c)
2415      {
2416      default: RRETURN(MATCH_NOMATCH);
2417
2418      case 0x000d:
2419      if (eptr >= md->end_subject)
2420        {
2421        SCHECK_PARTIAL();
2422        }
2423      else if (*eptr == 0x0a) eptr++;
2424      break;
2425
2426      case 0x000a:
2427      break;
2428
2429      case 0x000b:
2430      case 0x000c:
2431      case 0x0085:
2432      case 0x2028:
2433      case 0x2029:
2434      if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
2435      break;
2436      }
2437    ecode++;
2438    break;
2439
2440    case OP_NOT_HSPACE:
2441    if (eptr >= md->end_subject)
2442      {
2443      SCHECK_PARTIAL();
2444      RRETURN(MATCH_NOMATCH);
2445      }
2446    GETCHARINCTEST(c, eptr);
2447    switch(c)
2448      {
2449      default: break;
2450      case 0x09:      /* HT */
2451      case 0x20:      /* SPACE */
2452      case 0xa0:      /* NBSP */
2453      case 0x1680:    /* OGHAM SPACE MARK */
2454      case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
2455      case 0x2000:    /* EN QUAD */
2456      case 0x2001:    /* EM QUAD */
2457      case 0x2002:    /* EN SPACE */
2458      case 0x2003:    /* EM SPACE */
2459      case 0x2004:    /* THREE-PER-EM SPACE */
2460      case 0x2005:    /* FOUR-PER-EM SPACE */
2461      case 0x2006:    /* SIX-PER-EM SPACE */
2462      case 0x2007:    /* FIGURE SPACE */
2463      case 0x2008:    /* PUNCTUATION SPACE */
2464      case 0x2009:    /* THIN SPACE */
2465      case 0x200A:    /* HAIR SPACE */
2466      case 0x202f:    /* NARROW NO-BREAK SPACE */
2467      case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
2468      case 0x3000:    /* IDEOGRAPHIC SPACE */
2469      RRETURN(MATCH_NOMATCH);
2470      }
2471    ecode++;
2472    break;
2473
2474    case OP_HSPACE:
2475    if (eptr >= md->end_subject)
2476      {
2477      SCHECK_PARTIAL();
2478      RRETURN(MATCH_NOMATCH);
2479      }
2480    GETCHARINCTEST(c, eptr);
2481    switch(c)
2482      {
2483      default: RRETURN(MATCH_NOMATCH);
2484      case 0x09:      /* HT */
2485      case 0x20:      /* SPACE */
2486      case 0xa0:      /* NBSP */
2487      case 0x1680:    /* OGHAM SPACE MARK */
2488      case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
2489      case 0x2000:    /* EN QUAD */
2490      case 0x2001:    /* EM QUAD */
2491      case 0x2002:    /* EN SPACE */
2492      case 0x2003:    /* EM SPACE */
2493      case 0x2004:    /* THREE-PER-EM SPACE */
2494      case 0x2005:    /* FOUR-PER-EM SPACE */
2495      case 0x2006:    /* SIX-PER-EM SPACE */
2496      case 0x2007:    /* FIGURE SPACE */
2497      case 0x2008:    /* PUNCTUATION SPACE */
2498      case 0x2009:    /* THIN SPACE */
2499      case 0x200A:    /* HAIR SPACE */
2500      case 0x202f:    /* NARROW NO-BREAK SPACE */
2501      case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
2502      case 0x3000:    /* IDEOGRAPHIC SPACE */
2503      break;
2504      }
2505    ecode++;
2506    break;
2507
2508    case OP_NOT_VSPACE:
2509    if (eptr >= md->end_subject)
2510      {
2511      SCHECK_PARTIAL();
2512      RRETURN(MATCH_NOMATCH);
2513      }
2514    GETCHARINCTEST(c, eptr);
2515    switch(c)
2516      {
2517      default: break;
2518      case 0x0a:      /* LF */
2519      case 0x0b:      /* VT */
2520      case 0x0c:      /* FF */
2521      case 0x0d:      /* CR */
2522      case 0x85:      /* NEL */
2523      case 0x2028:    /* LINE SEPARATOR */
2524      case 0x2029:    /* PARAGRAPH SEPARATOR */
2525      RRETURN(MATCH_NOMATCH);
2526      }
2527    ecode++;
2528    break;
2529
2530    case OP_VSPACE:
2531    if (eptr >= md->end_subject)
2532      {
2533      SCHECK_PARTIAL();
2534      RRETURN(MATCH_NOMATCH);
2535      }
2536    GETCHARINCTEST(c, eptr);
2537    switch(c)
2538      {
2539      default: RRETURN(MATCH_NOMATCH);
2540      case 0x0a:      /* LF */
2541      case 0x0b:      /* VT */
2542      case 0x0c:      /* FF */
2543      case 0x0d:      /* CR */
2544      case 0x85:      /* NEL */
2545      case 0x2028:    /* LINE SEPARATOR */
2546      case 0x2029:    /* PARAGRAPH SEPARATOR */
2547      break;
2548      }
2549    ecode++;
2550    break;
2551
2552#ifdef SUPPORT_UCP
2553    /* Check the next character by Unicode property. We will get here only
2554    if the support is in the binary; otherwise a compile-time error occurs. */
2555
2556    case OP_PROP:
2557    case OP_NOTPROP:
2558    if (eptr >= md->end_subject)
2559      {
2560      SCHECK_PARTIAL();
2561      RRETURN(MATCH_NOMATCH);
2562      }
2563    GETCHARINCTEST(c, eptr);
2564      {
2565      const ucd_record *prop = GET_UCD(c);
2566
2567      switch(ecode[1])
2568        {
2569        case PT_ANY:
2570        if (op == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
2571        break;
2572
2573        case PT_LAMP:
2574        if ((prop->chartype == ucp_Lu ||
2575             prop->chartype == ucp_Ll ||
2576             prop->chartype == ucp_Lt) == (op == OP_NOTPROP))
2577          RRETURN(MATCH_NOMATCH);
2578        break;
2579
2580        case PT_GC:
2581        if ((ecode[2] != PRIV(ucp_gentype)[prop->chartype]) == (op == OP_PROP))
2582          RRETURN(MATCH_NOMATCH);
2583        break;
2584
2585        case PT_PC:
2586        if ((ecode[2] != prop->chartype) == (op == OP_PROP))
2587          RRETURN(MATCH_NOMATCH);
2588        break;
2589
2590        case PT_SC:
2591        if ((ecode[2] != prop->script) == (op == OP_PROP))
2592          RRETURN(MATCH_NOMATCH);
2593        break;
2594
2595        /* These are specials */
2596
2597        case PT_ALNUM:
2598        if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
2599             PRIV(ucp_gentype)[prop->chartype] == ucp_N) == (op == OP_NOTPROP))
2600          RRETURN(MATCH_NOMATCH);
2601        break;
2602
2603        case PT_SPACE:    /* Perl space */
2604        if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
2605             c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR)
2606               == (op == OP_NOTPROP))
2607          RRETURN(MATCH_NOMATCH);
2608        break;
2609
2610        case PT_PXSPACE:  /* POSIX space */
2611        if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
2612             c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
2613             c == CHAR_FF || c == CHAR_CR)
2614               == (op == OP_NOTPROP))
2615          RRETURN(MATCH_NOMATCH);
2616        break;
2617
2618        case PT_WORD:
2619        if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
2620             PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
2621             c == CHAR_UNDERSCORE) == (op == OP_NOTPROP))
2622          RRETURN(MATCH_NOMATCH);
2623        break;
2624
2625        /* This should never occur */
2626
2627        default:
2628        RRETURN(PCRE_ERROR_INTERNAL);
2629        }
2630
2631      ecode += 3;
2632      }
2633    break;
2634
2635    /* Match an extended Unicode sequence. We will get here only if the support
2636    is in the binary; otherwise a compile-time error occurs. */
2637
2638    case OP_EXTUNI:
2639    if (eptr >= md->end_subject)
2640      {
2641      SCHECK_PARTIAL();
2642      RRETURN(MATCH_NOMATCH);
2643      }
2644    GETCHARINCTEST(c, eptr);
2645    if (UCD_CATEGORY(c) == ucp_M) RRETURN(MATCH_NOMATCH);
2646    while (eptr < md->end_subject)
2647      {
2648      int len = 1;
2649      if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
2650      if (UCD_CATEGORY(c) != ucp_M) break;
2651      eptr += len;
2652      }
2653    CHECK_PARTIAL();
2654    ecode++;
2655    break;
2656#endif
2657
2658
2659    /* Match a back reference, possibly repeatedly. Look past the end of the
2660    item to see if there is repeat information following. The code is similar
2661    to that for character classes, but repeated for efficiency. Then obey
2662    similar code to character type repeats - written out again for speed.
2663    However, if the referenced string is the empty string, always treat
2664    it as matched, any number of times (otherwise there could be infinite
2665    loops). */
2666
2667    case OP_REF:
2668    case OP_REFI:
2669    caseless = op == OP_REFI;
2670    offset = GET2(ecode, 1) << 1;               /* Doubled ref number */
2671    ecode += 1 + IMM2_SIZE;
2672
2673    /* If the reference is unset, there are two possibilities:
2674
2675    (a) In the default, Perl-compatible state, set the length negative;
2676    this ensures that every attempt at a match fails. We can't just fail
2677    here, because of the possibility of quantifiers with zero minima.
2678
2679    (b) If the JavaScript compatibility flag is set, set the length to zero
2680    so that the back reference matches an empty string.
2681
2682    Otherwise, set the length to the length of what was matched by the
2683    referenced subpattern. */
2684
2685    if (offset >= offset_top || md->offset_vector[offset] < 0)
2686      length = (md->jscript_compat)? 0 : -1;
2687    else
2688      length = md->offset_vector[offset+1] - md->offset_vector[offset];
2689
2690    /* Set up for repetition, or handle the non-repeated case */
2691
2692    switch (*ecode)
2693      {
2694      case OP_CRSTAR:
2695      case OP_CRMINSTAR:
2696      case OP_CRPLUS:
2697      case OP_CRMINPLUS:
2698      case OP_CRQUERY:
2699      case OP_CRMINQUERY:
2700      c = *ecode++ - OP_CRSTAR;
2701      minimize = (c & 1) != 0;
2702      min = rep_min[c];                 /* Pick up values from tables; */
2703      max = rep_max[c];                 /* zero for max => infinity */
2704      if (max == 0) max = INT_MAX;
2705      break;
2706
2707      case OP_CRRANGE:
2708      case OP_CRMINRANGE:
2709      minimize = (*ecode == OP_CRMINRANGE);
2710      min = GET2(ecode, 1);
2711      max = GET2(ecode, 1 + IMM2_SIZE);
2712      if (max == 0) max = INT_MAX;
2713      ecode += 1 + 2 * IMM2_SIZE;
2714      break;
2715
2716      default:               /* No repeat follows */
2717      if ((length = match_ref(offset, eptr, length, md, caseless)) < 0)
2718        {
2719        if (length == -2) eptr = md->end_subject;   /* Partial match */
2720        CHECK_PARTIAL();
2721        RRETURN(MATCH_NOMATCH);
2722        }
2723      eptr += length;
2724      continue;              /* With the main loop */
2725      }
2726
2727    /* Handle repeated back references. If the length of the reference is
2728    zero, just continue with the main loop. If the length is negative, it
2729    means the reference is unset in non-Java-compatible mode. If the minimum is
2730    zero, we can continue at the same level without recursion. For any other
2731    minimum, carrying on will result in NOMATCH. */
2732
2733    if (length == 0) continue;
2734    if (length < 0 && min == 0) continue;
2735
2736    /* First, ensure the minimum number of matches are present. We get back
2737    the length of the reference string explicitly rather than passing the
2738    address of eptr, so that eptr can be a register variable. */
2739
2740    for (i = 1; i <= min; i++)
2741      {
2742      int slength;
2743      if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0)
2744        {
2745        if (slength == -2) eptr = md->end_subject;   /* Partial match */
2746        CHECK_PARTIAL();
2747        RRETURN(MATCH_NOMATCH);
2748        }
2749      eptr += slength;
2750      }
2751
2752    /* If min = max, continue at the same level without recursion.
2753    They are not both allowed to be zero. */
2754
2755    if (min == max) continue;
2756
2757    /* If minimizing, keep trying and advancing the pointer */
2758
2759    if (minimize)
2760      {
2761      for (fi = min;; fi++)
2762        {
2763        int slength;
2764        RMATCH(eptr, ecode, offset_top, md, eptrb, RM14);
2765        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2766        if (fi >= max) RRETURN(MATCH_NOMATCH);
2767        if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0)
2768          {
2769          if (slength == -2) eptr = md->end_subject;   /* Partial match */
2770          CHECK_PARTIAL();
2771          RRETURN(MATCH_NOMATCH);
2772          }
2773        eptr += slength;
2774        }
2775      /* Control never gets here */
2776      }
2777
2778    /* If maximizing, find the longest string and work backwards */
2779
2780    else
2781      {
2782      pp = eptr;
2783      for (i = min; i < max; i++)
2784        {
2785        int slength;
2786        if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0)
2787          {
2788          /* Can't use CHECK_PARTIAL because we don't want to update eptr in
2789          the soft partial matching case. */
2790
2791          if (slength == -2 && md->partial != 0 &&
2792              md->end_subject > md->start_used_ptr)
2793            {
2794            md->hitend = TRUE;
2795            if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
2796            }
2797          break;
2798          }
2799        eptr += slength;
2800        }
2801
2802      while (eptr >= pp)
2803        {
2804        RMATCH(eptr, ecode, offset_top, md, eptrb, RM15);
2805        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2806        eptr -= length;
2807        }
2808      RRETURN(MATCH_NOMATCH);
2809      }
2810    /* Control never gets here */
2811
2812    /* Match a bit-mapped character class, possibly repeatedly. This op code is
2813    used when all the characters in the class have values in the range 0-255,
2814    and either the matching is caseful, or the characters are in the range
2815    0-127 when UTF-8 processing is enabled. The only difference between
2816    OP_CLASS and OP_NCLASS occurs when a data character outside the range is
2817    encountered.
2818
2819    First, look past the end of the item to see if there is repeat information
2820    following. Then obey similar code to character type repeats - written out
2821    again for speed. */
2822
2823    case OP_NCLASS:
2824    case OP_CLASS:
2825      {
2826      /* The data variable is saved across frames, so the byte map needs to
2827      be stored there. */
2828#define BYTE_MAP ((pcre_uint8 *)data)
2829      data = ecode + 1;                /* Save for matching */
2830      ecode += 1 + (32 / sizeof(pcre_uchar)); /* Advance past the item */
2831
2832      switch (*ecode)
2833        {
2834        case OP_CRSTAR:
2835        case OP_CRMINSTAR:
2836        case OP_CRPLUS:
2837        case OP_CRMINPLUS:
2838        case OP_CRQUERY:
2839        case OP_CRMINQUERY:
2840        c = *ecode++ - OP_CRSTAR;
2841        minimize = (c & 1) != 0;
2842        min = rep_min[c];                 /* Pick up values from tables; */
2843        max = rep_max[c];                 /* zero for max => infinity */
2844        if (max == 0) max = INT_MAX;
2845        break;
2846
2847        case OP_CRRANGE:
2848        case OP_CRMINRANGE:
2849        minimize = (*ecode == OP_CRMINRANGE);
2850        min = GET2(ecode, 1);
2851        max = GET2(ecode, 1 + IMM2_SIZE);
2852        if (max == 0) max = INT_MAX;
2853        ecode += 1 + 2 * IMM2_SIZE;
2854        break;
2855
2856        default:               /* No repeat follows */
2857        min = max = 1;
2858        break;
2859        }
2860
2861      /* First, ensure the minimum number of matches are present. */
2862
2863#ifdef SUPPORT_UTF
2864      if (utf)
2865        {
2866        for (i = 1; i <= min; i++)
2867          {
2868          if (eptr >= md->end_subject)
2869            {
2870            SCHECK_PARTIAL();
2871            RRETURN(MATCH_NOMATCH);
2872            }
2873          GETCHARINC(c, eptr);
2874          if (c > 255)
2875            {
2876            if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
2877            }
2878          else
2879            if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
2880          }
2881        }
2882      else
2883#endif
2884      /* Not UTF mode */
2885        {
2886        for (i = 1; i <= min; i++)
2887          {
2888          if (eptr >= md->end_subject)
2889            {
2890            SCHECK_PARTIAL();
2891            RRETURN(MATCH_NOMATCH);
2892            }
2893          c = *eptr++;
2894#ifndef COMPILE_PCRE8
2895          if (c > 255)
2896            {
2897            if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
2898            }
2899          else
2900#endif
2901            if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
2902          }
2903        }
2904
2905      /* If max == min we can continue with the main loop without the
2906      need to recurse. */
2907
2908      if (min == max) continue;
2909
2910      /* If minimizing, keep testing the rest of the expression and advancing
2911      the pointer while it matches the class. */
2912
2913      if (minimize)
2914        {
2915#ifdef SUPPORT_UTF
2916        if (utf)
2917          {
2918          for (fi = min;; fi++)
2919            {
2920            RMATCH(eptr, ecode, offset_top, md, eptrb, RM16);
2921            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2922            if (fi >= max) RRETURN(MATCH_NOMATCH);
2923            if (eptr >= md->end_subject)
2924              {
2925              SCHECK_PARTIAL();
2926              RRETURN(MATCH_NOMATCH);
2927              }
2928            GETCHARINC(c, eptr);
2929            if (c > 255)
2930              {
2931              if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
2932              }
2933            else
2934              if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
2935            }
2936          }
2937        else
2938#endif
2939        /* Not UTF mode */
2940          {
2941          for (fi = min;; fi++)
2942            {
2943            RMATCH(eptr, ecode, offset_top, md, eptrb, RM17);
2944            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2945            if (fi >= max) RRETURN(MATCH_NOMATCH);
2946            if (eptr >= md->end_subject)
2947              {
2948              SCHECK_PARTIAL();
2949              RRETURN(MATCH_NOMATCH);
2950              }
2951            c = *eptr++;
2952#ifndef COMPILE_PCRE8
2953            if (c > 255)
2954              {
2955              if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
2956              }
2957            else
2958#endif
2959              if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
2960            }
2961          }
2962        /* Control never gets here */
2963        }
2964
2965      /* If maximizing, find the longest possible run, then work backwards. */
2966
2967      else
2968        {
2969        pp = eptr;
2970
2971#ifdef SUPPORT_UTF
2972        if (utf)
2973          {
2974          for (i = min; i < max; i++)
2975            {
2976            int len = 1;
2977            if (eptr >= md->end_subject)
2978              {
2979              SCHECK_PARTIAL();
2980              break;
2981              }
2982            GETCHARLEN(c, eptr, len);
2983            if (c > 255)
2984              {
2985              if (op == OP_CLASS) break;
2986              }
2987            else
2988              if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) break;
2989            eptr += len;
2990            }
2991          for (;;)
2992            {
2993            RMATCH(eptr, ecode, offset_top, md, eptrb, RM18);
2994            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2995            if (eptr-- == pp) break;        /* Stop if tried at original pos */
2996            BACKCHAR(eptr);
2997            }
2998          }
2999        else
3000#endif
3001          /* Not UTF mode */
3002          {
3003          for (i = min; i < max; i++)
3004            {
3005            if (eptr >= md->end_subject)
3006              {
3007              SCHECK_PARTIAL();
3008              break;
3009              }
3010            c = *eptr;
3011#ifndef COMPILE_PCRE8
3012            if (c > 255)
3013              {
3014              if (op == OP_CLASS) break;
3015              }
3016            else
3017#endif
3018              if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) break;
3019            eptr++;
3020            }
3021          while (eptr >= pp)
3022            {
3023            RMATCH(eptr, ecode, offset_top, md, eptrb, RM19);
3024            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3025            eptr--;
3026            }
3027          }
3028
3029        RRETURN(MATCH_NOMATCH);
3030        }
3031#undef BYTE_MAP
3032      }
3033    /* Control never gets here */
3034
3035
3036    /* Match an extended character class. This opcode is encountered only
3037    when UTF-8 mode mode is supported. Nevertheless, we may not be in UTF-8
3038    mode, because Unicode properties are supported in non-UTF-8 mode. */
3039
3040#if defined SUPPORT_UTF || !defined COMPILE_PCRE8
3041    case OP_XCLASS:
3042      {
3043      data = ecode + 1 + LINK_SIZE;                /* Save for matching */
3044      ecode += GET(ecode, 1);                      /* Advance past the item */
3045
3046      switch (*ecode)
3047        {
3048        case OP_CRSTAR:
3049        case OP_CRMINSTAR:
3050        case OP_CRPLUS:
3051        case OP_CRMINPLUS:
3052        case OP_CRQUERY:
3053        case OP_CRMINQUERY:
3054        c = *ecode++ - OP_CRSTAR;
3055        minimize = (c & 1) != 0;
3056        min = rep_min[c];                 /* Pick up values from tables; */
3057        max = rep_max[c];                 /* zero for max => infinity */
3058        if (max == 0) max = INT_MAX;
3059        break;
3060
3061        case OP_CRRANGE:
3062        case OP_CRMINRANGE:
3063        minimize = (*ecode == OP_CRMINRANGE);
3064        min = GET2(ecode, 1);
3065        max = GET2(ecode, 1 + IMM2_SIZE);
3066        if (max == 0) max = INT_MAX;
3067        ecode += 1 + 2 * IMM2_SIZE;
3068        break;
3069
3070        default:               /* No repeat follows */
3071        min = max = 1;
3072        break;
3073        }
3074
3075      /* First, ensure the minimum number of matches are present. */
3076
3077      for (i = 1; i <= min; i++)
3078        {
3079        if (eptr >= md->end_subject)
3080          {
3081          SCHECK_PARTIAL();
3082          RRETURN(MATCH_NOMATCH);
3083          }
3084        GETCHARINCTEST(c, eptr);
3085        if (!PRIV(xclass)(c, data, utf)) RRETURN(MATCH_NOMATCH);
3086        }
3087
3088      /* If max == min we can continue with the main loop without the
3089      need to recurse. */
3090
3091      if (min == max) continue;
3092
3093      /* If minimizing, keep testing the rest of the expression and advancing
3094      the pointer while it matches the class. */
3095
3096      if (minimize)
3097        {
3098        for (fi = min;; fi++)
3099          {
3100          RMATCH(eptr, ecode, offset_top, md, eptrb, RM20);
3101          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3102          if (fi >= max) RRETURN(MATCH_NOMATCH);
3103          if (eptr >= md->end_subject)
3104            {
3105            SCHECK_PARTIAL();
3106            RRETURN(MATCH_NOMATCH);
3107            }
3108          GETCHARINCTEST(c, eptr);
3109          if (!PRIV(xclass)(c, data, utf)) RRETURN(MATCH_NOMATCH);
3110          }
3111        /* Control never gets here */
3112        }
3113
3114      /* If maximizing, find the longest possible run, then work backwards. */
3115
3116      else
3117        {
3118        pp = eptr;
3119        for (i = min; i < max; i++)
3120          {
3121          int len = 1;
3122          if (eptr >= md->end_subject)
3123            {
3124            SCHECK_PARTIAL();
3125            break;
3126            }
3127#ifdef SUPPORT_UTF
3128          GETCHARLENTEST(c, eptr, len);
3129#else
3130          c = *eptr;
3131#endif
3132          if (!PRIV(xclass)(c, data, utf)) break;
3133          eptr += len;
3134          }
3135        for(;;)
3136          {
3137          RMATCH(eptr, ecode, offset_top, md, eptrb, RM21);
3138          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3139          if (eptr-- == pp) break;        /* Stop if tried at original pos */
3140#ifdef SUPPORT_UTF
3141          if (utf) BACKCHAR(eptr);
3142#endif
3143          }
3144        RRETURN(MATCH_NOMATCH);
3145        }
3146
3147      /* Control never gets here */
3148      }
3149#endif    /* End of XCLASS */
3150
3151    /* Match a single character, casefully */
3152
3153    case OP_CHAR:
3154#ifdef SUPPORT_UTF
3155    if (utf)
3156      {
3157      length = 1;
3158      ecode++;
3159      GETCHARLEN(fc, ecode, length);
3160      if (length > md->end_subject - eptr)
3161        {
3162        CHECK_PARTIAL();             /* Not SCHECK_PARTIAL() */
3163        RRETURN(MATCH_NOMATCH);
3164        }
3165      while (length-- > 0) if (*ecode++ != *eptr++) RRETURN(MATCH_NOMATCH);
3166      }
3167    else
3168#endif
3169    /* Not UTF mode */
3170      {
3171      if (md->end_subject - eptr < 1)
3172        {
3173        SCHECK_PARTIAL();            /* This one can use SCHECK_PARTIAL() */
3174        RRETURN(MATCH_NOMATCH);
3175        }
3176      if (ecode[1] != *eptr++) RRETURN(MATCH_NOMATCH);
3177      ecode += 2;
3178      }
3179    break;
3180
3181    /* Match a single character, caselessly. If we are at the end of the
3182    subject, give up immediately. */
3183
3184    case OP_CHARI:
3185    if (eptr >= md->end_subject)
3186      {
3187      SCHECK_PARTIAL();
3188      RRETURN(MATCH_NOMATCH);
3189      }
3190
3191#ifdef SUPPORT_UTF
3192    if (utf)
3193      {
3194      length = 1;
3195      ecode++;
3196      GETCHARLEN(fc, ecode, length);
3197
3198      /* If the pattern character's value is < 128, we have only one byte, and
3199      we know that its other case must also be one byte long, so we can use the
3200      fast lookup table. We know that there is at least one byte left in the
3201      subject. */
3202
3203      if (fc < 128)
3204        {
3205        if (md->lcc[fc]
3206            != TABLE_GET(*eptr, md->lcc, *eptr)) RRETURN(MATCH_NOMATCH);
3207        ecode++;
3208        eptr++;
3209        }
3210
3211      /* Otherwise we must pick up the subject character. Note that we cannot
3212      use the value of "length" to check for sufficient bytes left, because the
3213      other case of the character may have more or fewer bytes.  */
3214
3215      else
3216        {
3217        unsigned int dc;
3218        GETCHARINC(dc, eptr);
3219        ecode += length;
3220
3221        /* If we have Unicode property support, we can use it to test the other
3222        case of the character, if there is one. */
3223
3224        if (fc != dc)
3225          {
3226#ifdef SUPPORT_UCP
3227          if (dc != UCD_OTHERCASE(fc))
3228#endif
3229            RRETURN(MATCH_NOMATCH);
3230          }
3231        }
3232      }
3233    else
3234#endif   /* SUPPORT_UTF */
3235
3236    /* Not UTF mode */
3237      {
3238      if (TABLE_GET(ecode[1], md->lcc, ecode[1])
3239          != TABLE_GET(*eptr, md->lcc, *eptr)) RRETURN(MATCH_NOMATCH);
3240      eptr++;
3241      ecode += 2;
3242      }
3243    break;
3244
3245    /* Match a single character repeatedly. */
3246
3247    case OP_EXACT:
3248    case OP_EXACTI:
3249    min = max = GET2(ecode, 1);
3250    ecode += 1 + IMM2_SIZE;
3251    goto REPEATCHAR;
3252
3253    case OP_POSUPTO:
3254    case OP_POSUPTOI:
3255    possessive = TRUE;
3256    /* Fall through */
3257
3258    case OP_UPTO:
3259    case OP_UPTOI:
3260    case OP_MINUPTO:
3261    case OP_MINUPTOI:
3262    min = 0;
3263    max = GET2(ecode, 1);
3264    minimize = *ecode == OP_MINUPTO || *ecode == OP_MINUPTOI;
3265    ecode += 1 + IMM2_SIZE;
3266    goto REPEATCHAR;
3267
3268    case OP_POSSTAR:
3269    case OP_POSSTARI:
3270    possessive = TRUE;
3271    min = 0;
3272    max = INT_MAX;
3273    ecode++;
3274    goto REPEATCHAR;
3275
3276    case OP_POSPLUS:
3277    case OP_POSPLUSI:
3278    possessive = TRUE;
3279    min = 1;
3280    max = INT_MAX;
3281    ecode++;
3282    goto REPEATCHAR;
3283
3284    case OP_POSQUERY:
3285    case OP_POSQUERYI:
3286    possessive = TRUE;
3287    min = 0;
3288    max = 1;
3289    ecode++;
3290    goto REPEATCHAR;
3291
3292    case OP_STAR:
3293    case OP_STARI:
3294    case OP_MINSTAR:
3295    case OP_MINSTARI:
3296    case OP_PLUS:
3297    case OP_PLUSI:
3298    case OP_MINPLUS:
3299    case OP_MINPLUSI:
3300    case OP_QUERY:
3301    case OP_QUERYI:
3302    case OP_MINQUERY:
3303    case OP_MINQUERYI:
3304    c = *ecode++ - ((op < OP_STARI)? OP_STAR : OP_STARI);
3305    minimize = (c & 1) != 0;
3306    min = rep_min[c];                 /* Pick up values from tables; */
3307    max = rep_max[c];                 /* zero for max => infinity */
3308    if (max == 0) max = INT_MAX;
3309
3310    /* Common code for all repeated single-character matches. */
3311
3312    REPEATCHAR:
3313#ifdef SUPPORT_UTF
3314    if (utf)
3315      {
3316      length = 1;
3317      charptr = ecode;
3318      GETCHARLEN(fc, ecode, length);
3319      ecode += length;
3320
3321      /* Handle multibyte character matching specially here. There is
3322      support for caseless matching if UCP support is present. */
3323
3324      if (length > 1)
3325        {
3326#ifdef SUPPORT_UCP
3327        unsigned int othercase;
3328        if (op >= OP_STARI &&     /* Caseless */
3329            (othercase = UCD_OTHERCASE(fc)) != fc)
3330          oclength = PRIV(ord2utf)(othercase, occhars);
3331        else oclength = 0;
3332#endif  /* SUPPORT_UCP */
3333
3334        for (i = 1; i <= min; i++)
3335          {
3336          if (eptr <= md->end_subject - length &&
3337            memcmp(eptr, charptr, IN_UCHARS(length)) == 0) eptr += length;
3338#ifdef SUPPORT_UCP
3339          else if (oclength > 0 &&
3340                   eptr <= md->end_subject - oclength &&
3341                   memcmp(eptr, occhars, IN_UCHARS(oclength)) == 0) eptr += oclength;
3342#endif  /* SUPPORT_UCP */
3343          else
3344            {
3345            CHECK_PARTIAL();
3346            RRETURN(MATCH_NOMATCH);
3347            }
3348          }
3349
3350        if (min == max) continue;
3351
3352        if (minimize)
3353          {
3354          for (fi = min;; fi++)
3355            {
3356            RMATCH(eptr, ecode, offset_top, md, eptrb, RM22);
3357            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3358            if (fi >= max) RRETURN(MATCH_NOMATCH);
3359            if (eptr <= md->end_subject - length &&
3360              memcmp(eptr, charptr, IN_UCHARS(length)) == 0) eptr += length;
3361#ifdef SUPPORT_UCP
3362            else if (oclength > 0 &&
3363                     eptr <= md->end_subject - oclength &&
3364                     memcmp(eptr, occhars, IN_UCHARS(oclength)) == 0) eptr += oclength;
3365#endif  /* SUPPORT_UCP */
3366            else
3367              {
3368              CHECK_PARTIAL();
3369              RRETURN(MATCH_NOMATCH);
3370              }
3371            }
3372          /* Control never gets here */
3373          }
3374
3375        else  /* Maximize */
3376          {
3377          pp = eptr;
3378          for (i = min; i < max; i++)
3379            {
3380            if (eptr <= md->end_subject - length &&
3381                memcmp(eptr, charptr, IN_UCHARS(length)) == 0) eptr += length;
3382#ifdef SUPPORT_UCP
3383            else if (oclength > 0 &&
3384                     eptr <= md->end_subject - oclength &&
3385                     memcmp(eptr, occhars, IN_UCHARS(oclength)) == 0) eptr += oclength;
3386#endif  /* SUPPORT_UCP */
3387            else
3388              {
3389              CHECK_PARTIAL();
3390              break;
3391              }
3392            }
3393
3394          if (possessive) continue;
3395
3396          for(;;)
3397            {
3398            RMATCH(eptr, ecode, offset_top, md, eptrb, RM23);
3399            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3400            if (eptr == pp) { RRETURN(MATCH_NOMATCH); }
3401#ifdef SUPPORT_UCP
3402            eptr--;
3403            BACKCHAR(eptr);
3404#else   /* without SUPPORT_UCP */
3405            eptr -= length;
3406#endif  /* SUPPORT_UCP */
3407            }
3408          }
3409        /* Control never gets here */
3410        }
3411
3412      /* If the length of a UTF-8 character is 1, we fall through here, and
3413      obey the code as for non-UTF-8 characters below, though in this case the
3414      value of fc will always be < 128. */
3415      }
3416    else
3417#endif  /* SUPPORT_UTF */
3418      /* When not in UTF-8 mode, load a single-byte character. */
3419      fc = *ecode++;
3420
3421    /* The value of fc at this point is always one character, though we may
3422    or may not be in UTF mode. The code is duplicated for the caseless and
3423    caseful cases, for speed, since matching characters is likely to be quite
3424    common. First, ensure the minimum number of matches are present. If min =
3425    max, continue at the same level without recursing. Otherwise, if
3426    minimizing, keep trying the rest of the expression and advancing one
3427    matching character if failing, up to the maximum. Alternatively, if
3428    maximizing, find the maximum number of characters and work backwards. */
3429
3430    DPRINTF(("matching %c{%d,%d} against subject %.*s\n", fc, min, max,
3431      max, (char *)eptr));
3432
3433    if (op >= OP_STARI)  /* Caseless */
3434      {
3435#ifdef COMPILE_PCRE8
3436      /* fc must be < 128 if UTF is enabled. */
3437      foc = md->fcc[fc];
3438#else
3439#ifdef SUPPORT_UTF
3440#ifdef SUPPORT_UCP
3441      if (utf && fc > 127)
3442        foc = UCD_OTHERCASE(fc);
3443#else
3444      if (utf && fc > 127)
3445        foc = fc;
3446#endif /* SUPPORT_UCP */
3447      else
3448#endif /* SUPPORT_UTF */
3449        foc = TABLE_GET(fc, md->fcc, fc);
3450#endif /* COMPILE_PCRE8 */
3451
3452      for (i = 1; i <= min; i++)
3453        {
3454        if (eptr >= md->end_subject)
3455          {
3456          SCHECK_PARTIAL();
3457          RRETURN(MATCH_NOMATCH);
3458          }
3459        if (fc != *eptr && foc != *eptr) RRETURN(MATCH_NOMATCH);
3460        eptr++;
3461        }
3462      if (min == max) continue;
3463      if (minimize)
3464        {
3465        for (fi = min;; fi++)
3466          {
3467          RMATCH(eptr, ecode, offset_top, md, eptrb, RM24);
3468          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3469          if (fi >= max) RRETURN(MATCH_NOMATCH);
3470          if (eptr >= md->end_subject)
3471            {
3472            SCHECK_PARTIAL();
3473            RRETURN(MATCH_NOMATCH);
3474            }
3475          if (fc != *eptr && foc != *eptr) RRETURN(MATCH_NOMATCH);
3476          eptr++;
3477          }
3478        /* Control never gets here */
3479        }
3480      else  /* Maximize */
3481        {
3482        pp = eptr;
3483        for (i = min; i < max; i++)
3484          {
3485          if (eptr >= md->end_subject)
3486            {
3487            SCHECK_PARTIAL();
3488            break;
3489            }
3490          if (fc != *eptr && foc != *eptr) break;
3491          eptr++;
3492          }
3493
3494        if (possessive) continue;
3495
3496        while (eptr >= pp)
3497          {
3498          RMATCH(eptr, ecode, offset_top, md, eptrb, RM25);
3499          eptr--;
3500          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3501          }
3502        RRETURN(MATCH_NOMATCH);
3503        }
3504      /* Control never gets here */
3505      }
3506
3507    /* Caseful comparisons (includes all multi-byte characters) */
3508
3509    else
3510      {
3511      for (i = 1; i <= min; i++)
3512        {
3513        if (eptr >= md->end_subject)
3514          {
3515          SCHECK_PARTIAL();
3516          RRETURN(MATCH_NOMATCH);
3517          }
3518        if (fc != *eptr++) RRETURN(MATCH_NOMATCH);
3519        }
3520
3521      if (min == max) continue;
3522
3523      if (minimize)
3524        {
3525        for (fi = min;; fi++)
3526          {
3527          RMATCH(eptr, ecode, offset_top, md, eptrb, RM26);
3528          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3529          if (fi >= max) RRETURN(MATCH_NOMATCH);
3530          if (eptr >= md->end_subject)
3531            {
3532            SCHECK_PARTIAL();
3533            RRETURN(MATCH_NOMATCH);
3534            }
3535          if (fc != *eptr++) RRETURN(MATCH_NOMATCH);
3536          }
3537        /* Control never gets here */
3538        }
3539      else  /* Maximize */
3540        {
3541        pp = eptr;
3542        for (i = min; i < max; i++)
3543          {
3544          if (eptr >= md->end_subject)
3545            {
3546            SCHECK_PARTIAL();
3547            break;
3548            }
3549          if (fc != *eptr) break;
3550          eptr++;
3551          }
3552        if (possessive) continue;
3553
3554        while (eptr >= pp)
3555          {
3556          RMATCH(eptr, ecode, offset_top, md, eptrb, RM27);
3557          eptr--;
3558          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3559          }
3560        RRETURN(MATCH_NOMATCH);
3561        }
3562      }
3563    /* Control never gets here */
3564
3565    /* Match a negated single one-byte character. The character we are
3566    checking can be multibyte. */
3567
3568    case OP_NOT:
3569    case OP_NOTI:
3570    if (eptr >= md->end_subject)
3571      {
3572      SCHECK_PARTIAL();
3573      RRETURN(MATCH_NOMATCH);
3574      }
3575#ifdef SUPPORT_UTF
3576    if (utf)
3577      {
3578      register unsigned int ch, och;
3579
3580      ecode++;
3581      GETCHARINC(ch, ecode);
3582      GETCHARINC(c, eptr);
3583
3584      if (op == OP_NOT)
3585        {
3586        if (ch == c) RRETURN(MATCH_NOMATCH);
3587        }
3588      else
3589        {
3590#ifdef SUPPORT_UCP
3591        if (ch > 127)
3592          och = UCD_OTHERCASE(ch);
3593#else
3594        if (ch > 127)
3595          och = ch;
3596#endif /* SUPPORT_UCP */
3597        else
3598          och = TABLE_GET(ch, md->fcc, ch);
3599        if (ch == c || och == c) RRETURN(MATCH_NOMATCH);
3600        }
3601      }
3602    else
3603#endif
3604      {
3605      register unsigned int ch = ecode[1];
3606      c = *eptr++;
3607      if (ch == c || (op == OP_NOTI && TABLE_GET(ch, md->fcc, ch) == c))
3608        RRETURN(MATCH_NOMATCH);
3609      ecode += 2;
3610      }
3611    break;
3612
3613    /* Match a negated single one-byte character repeatedly. This is almost a
3614    repeat of the code for a repeated single character, but I haven't found a
3615    nice way of commoning these up that doesn't require a test of the
3616    positive/negative option for each character match. Maybe that wouldn't add
3617    very much to the time taken, but character matching *is* what this is all
3618    about... */
3619
3620    case OP_NOTEXACT:
3621    case OP_NOTEXACTI:
3622    min = max = GET2(ecode, 1);
3623    ecode += 1 + IMM2_SIZE;
3624    goto REPEATNOTCHAR;
3625
3626    case OP_NOTUPTO:
3627    case OP_NOTUPTOI:
3628    case OP_NOTMINUPTO:
3629    case OP_NOTMINUPTOI:
3630    min = 0;
3631    max = GET2(ecode, 1);
3632    minimize = *ecode == OP_NOTMINUPTO || *ecode == OP_NOTMINUPTOI;
3633    ecode += 1 + IMM2_SIZE;
3634    goto REPEATNOTCHAR;
3635
3636    case OP_NOTPOSSTAR:
3637    case OP_NOTPOSSTARI:
3638    possessive = TRUE;
3639    min = 0;
3640    max = INT_MAX;
3641    ecode++;
3642    goto REPEATNOTCHAR;
3643
3644    case OP_NOTPOSPLUS:
3645    case OP_NOTPOSPLUSI:
3646    possessive = TRUE;
3647    min = 1;
3648    max = INT_MAX;
3649    ecode++;
3650    goto REPEATNOTCHAR;
3651
3652    case OP_NOTPOSQUERY:
3653    case OP_NOTPOSQUERYI:
3654    possessive = TRUE;
3655    min = 0;
3656    max = 1;
3657    ecode++;
3658    goto REPEATNOTCHAR;
3659
3660    case OP_NOTPOSUPTO:
3661    case OP_NOTPOSUPTOI:
3662    possessive = TRUE;
3663    min = 0;
3664    max = GET2(ecode, 1);
3665    ecode += 1 + IMM2_SIZE;
3666    goto REPEATNOTCHAR;
3667
3668    case OP_NOTSTAR:
3669    case OP_NOTSTARI:
3670    case OP_NOTMINSTAR:
3671    case OP_NOTMINSTARI:
3672    case OP_NOTPLUS:
3673    case OP_NOTPLUSI:
3674    case OP_NOTMINPLUS:
3675    case OP_NOTMINPLUSI:
3676    case OP_NOTQUERY:
3677    case OP_NOTQUERYI:
3678    case OP_NOTMINQUERY:
3679    case OP_NOTMINQUERYI:
3680    c = *ecode++ - ((op >= OP_NOTSTARI)? OP_NOTSTARI: OP_NOTSTAR);
3681    minimize = (c & 1) != 0;
3682    min = rep_min[c];                 /* Pick up values from tables; */
3683    max = rep_max[c];                 /* zero for max => infinity */
3684    if (max == 0) max = INT_MAX;
3685
3686    /* Common code for all repeated single-byte matches. */
3687
3688    REPEATNOTCHAR:
3689    GETCHARINCTEST(fc, ecode);
3690
3691    /* The code is duplicated for the caseless and caseful cases, for speed,
3692    since matching characters is likely to be quite common. First, ensure the
3693    minimum number of matches are present. If min = max, continue at the same
3694    level without recursing. Otherwise, if minimizing, keep trying the rest of
3695    the expression and advancing one matching character if failing, up to the
3696    maximum. Alternatively, if maximizing, find the maximum number of
3697    characters and work backwards. */
3698
3699    DPRINTF(("negative matching %c{%d,%d} against subject %.*s\n", fc, min, max,
3700      max, (char *)eptr));
3701
3702    if (op >= OP_NOTSTARI)     /* Caseless */
3703      {
3704#ifdef SUPPORT_UTF
3705#ifdef SUPPORT_UCP
3706      if (utf && fc > 127)
3707        foc = UCD_OTHERCASE(fc);
3708#else
3709      if (utf && fc > 127)
3710        foc = fc;
3711#endif /* SUPPORT_UCP */
3712      else
3713#endif /* SUPPORT_UTF */
3714        foc = TABLE_GET(fc, md->fcc, fc);
3715
3716#ifdef SUPPORT_UTF
3717      if (utf)
3718        {
3719        register unsigned int d;
3720        for (i = 1; i <= min; i++)
3721          {
3722          if (eptr >= md->end_subject)
3723            {
3724            SCHECK_PARTIAL();
3725            RRETURN(MATCH_NOMATCH);
3726            }
3727          GETCHARINC(d, eptr);
3728          if (fc == d || (unsigned int)foc == d) RRETURN(MATCH_NOMATCH);
3729          }
3730        }
3731      else
3732#endif
3733      /* Not UTF mode */
3734        {
3735        for (i = 1; i <= min; i++)
3736          {
3737          if (eptr >= md->end_subject)
3738            {
3739            SCHECK_PARTIAL();
3740            RRETURN(MATCH_NOMATCH);
3741            }
3742          if (fc == *eptr || foc == *eptr) RRETURN(MATCH_NOMATCH);
3743          eptr++;
3744          }
3745        }
3746
3747      if (min == max) continue;
3748
3749      if (minimize)
3750        {
3751#ifdef SUPPORT_UTF
3752        if (utf)
3753          {
3754          register unsigned int d;
3755          for (fi = min;; fi++)
3756            {
3757            RMATCH(eptr, ecode, offset_top, md, eptrb, RM28);
3758            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3759            if (fi >= max) RRETURN(MATCH_NOMATCH);
3760            if (eptr >= md->end_subject)
3761              {
3762              SCHECK_PARTIAL();
3763              RRETURN(MATCH_NOMATCH);
3764              }
3765            GETCHARINC(d, eptr);
3766            if (fc == d || (unsigned int)foc == d) RRETURN(MATCH_NOMATCH);
3767            }
3768          }
3769        else
3770#endif
3771        /* Not UTF mode */
3772          {
3773          for (fi = min;; fi++)
3774            {
3775            RMATCH(eptr, ecode, offset_top, md, eptrb, RM29);
3776            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3777            if (fi >= max) RRETURN(MATCH_NOMATCH);
3778            if (eptr >= md->end_subject)
3779              {
3780              SCHECK_PARTIAL();
3781              RRETURN(MATCH_NOMATCH);
3782              }
3783            if (fc == *eptr || foc == *eptr) RRETURN(MATCH_NOMATCH);
3784            eptr++;
3785            }
3786          }
3787        /* Control never gets here */
3788        }
3789
3790      /* Maximize case */
3791
3792      else
3793        {
3794        pp = eptr;
3795
3796#ifdef SUPPORT_UTF
3797        if (utf)
3798          {
3799          register unsigned int d;
3800          for (i = min; i < max; i++)
3801            {
3802            int len = 1;
3803            if (eptr >= md->end_subject)
3804              {
3805              SCHECK_PARTIAL();
3806              break;
3807              }
3808            GETCHARLEN(d, eptr, len);
3809            if (fc == d || (unsigned int)foc == d) break;
3810            eptr += len;
3811            }
3812          if (possessive) continue;
3813          for(;;)
3814            {
3815            RMATCH(eptr, ecode, offset_top, md, eptrb, RM30);
3816            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3817            if (eptr-- == pp) break;        /* Stop if tried at original pos */
3818            BACKCHAR(eptr);
3819            }
3820          }
3821        else
3822#endif
3823        /* Not UTF mode */
3824          {
3825          for (i = min; i < max; i++)
3826            {
3827            if (eptr >= md->end_subject)
3828              {
3829              SCHECK_PARTIAL();
3830              break;
3831              }
3832            if (fc == *eptr || foc == *eptr) break;
3833            eptr++;
3834            }
3835          if (possessive) continue;
3836          while (eptr >= pp)
3837            {
3838            RMATCH(eptr, ecode, offset_top, md, eptrb, RM31);
3839            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3840            eptr--;
3841            }
3842          }
3843
3844        RRETURN(MATCH_NOMATCH);
3845        }
3846      /* Control never gets here */
3847      }
3848
3849    /* Caseful comparisons */
3850
3851    else
3852      {
3853#ifdef SUPPORT_UTF
3854      if (utf)
3855        {
3856        register unsigned int d;
3857        for (i = 1; i <= min; i++)
3858          {
3859          if (eptr >= md->end_subject)
3860            {
3861            SCHECK_PARTIAL();
3862            RRETURN(MATCH_NOMATCH);
3863            }
3864          GETCHARINC(d, eptr);
3865          if (fc == d) RRETURN(MATCH_NOMATCH);
3866          }
3867        }
3868      else
3869#endif
3870      /* Not UTF mode */
3871        {
3872        for (i = 1; i <= min; i++)
3873          {
3874          if (eptr >= md->end_subject)
3875            {
3876            SCHECK_PARTIAL();
3877            RRETURN(MATCH_NOMATCH);
3878            }
3879          if (fc == *eptr++) RRETURN(MATCH_NOMATCH);
3880          }
3881        }
3882
3883      if (min == max) continue;
3884
3885      if (minimize)
3886        {
3887#ifdef SUPPORT_UTF
3888        if (utf)
3889          {
3890          register unsigned int d;
3891          for (fi = min;; fi++)
3892            {
3893            RMATCH(eptr, ecode, offset_top, md, eptrb, RM32);
3894            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3895            if (fi >= max) RRETURN(MATCH_NOMATCH);
3896            if (eptr >= md->end_subject)
3897              {
3898              SCHECK_PARTIAL();
3899              RRETURN(MATCH_NOMATCH);
3900              }
3901            GETCHARINC(d, eptr);
3902            if (fc == d) RRETURN(MATCH_NOMATCH);
3903            }
3904          }
3905        else
3906#endif
3907        /* Not UTF mode */
3908          {
3909          for (fi = min;; fi++)
3910            {
3911            RMATCH(eptr, ecode, offset_top, md, eptrb, RM33);
3912            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3913            if (fi >= max) RRETURN(MATCH_NOMATCH);
3914            if (eptr >= md->end_subject)
3915              {
3916              SCHECK_PARTIAL();
3917              RRETURN(MATCH_NOMATCH);
3918              }
3919            if (fc == *eptr++) RRETURN(MATCH_NOMATCH);
3920            }
3921          }
3922        /* Control never gets here */
3923        }
3924
3925      /* Maximize case */
3926
3927      else
3928        {
3929        pp = eptr;
3930
3931#ifdef SUPPORT_UTF
3932        if (utf)
3933          {
3934          register unsigned int d;
3935          for (i = min; i < max; i++)
3936            {
3937            int len = 1;
3938            if (eptr >= md->end_subject)
3939              {
3940              SCHECK_PARTIAL();
3941              break;
3942              }
3943            GETCHARLEN(d, eptr, len);
3944            if (fc == d) break;
3945            eptr += len;
3946            }
3947          if (possessive) continue;
3948          for(;;)
3949            {
3950            RMATCH(eptr, ecode, offset_top, md, eptrb, RM34);
3951            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3952            if (eptr-- == pp) break;        /* Stop if tried at original pos */
3953            BACKCHAR(eptr);
3954            }
3955          }
3956        else
3957#endif
3958        /* Not UTF mode */
3959          {
3960          for (i = min; i < max; i++)
3961            {
3962            if (eptr >= md->end_subject)
3963              {
3964              SCHECK_PARTIAL();
3965              break;
3966              }
3967            if (fc == *eptr) break;
3968            eptr++;
3969            }
3970          if (possessive) continue;
3971          while (eptr >= pp)
3972            {
3973            RMATCH(eptr, ecode, offset_top, md, eptrb, RM35);
3974            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3975            eptr--;
3976            }
3977          }
3978
3979        RRETURN(MATCH_NOMATCH);
3980        }
3981      }
3982    /* Control never gets here */
3983
3984    /* Match a single character type repeatedly; several different opcodes
3985    share code. This is very similar to the code for single characters, but we
3986    repeat it in the interests of efficiency. */
3987
3988    case OP_TYPEEXACT:
3989    min = max = GET2(ecode, 1);
3990    minimize = TRUE;
3991    ecode += 1 + IMM2_SIZE;
3992    goto REPEATTYPE;
3993
3994    case OP_TYPEUPTO:
3995    case OP_TYPEMINUPTO:
3996    min = 0;
3997    max = GET2(ecode, 1);
3998    minimize = *ecode == OP_TYPEMINUPTO;
3999    ecode += 1 + IMM2_SIZE;
4000    goto REPEATTYPE;
4001
4002    case OP_TYPEPOSSTAR:
4003    possessive = TRUE;
4004    min = 0;
4005    max = INT_MAX;
4006    ecode++;
4007    goto REPEATTYPE;
4008
4009    case OP_TYPEPOSPLUS:
4010    possessive = TRUE;
4011    min = 1;
4012    max = INT_MAX;
4013    ecode++;
4014    goto REPEATTYPE;
4015
4016    case OP_TYPEPOSQUERY:
4017    possessive = TRUE;
4018    min = 0;
4019    max = 1;
4020    ecode++;
4021    goto REPEATTYPE;
4022
4023    case OP_TYPEPOSUPTO:
4024    possessive = TRUE;
4025    min = 0;
4026    max = GET2(ecode, 1);
4027    ecode += 1 + IMM2_SIZE;
4028    goto REPEATTYPE;
4029
4030    case OP_TYPESTAR:
4031    case OP_TYPEMINSTAR:
4032    case OP_TYPEPLUS:
4033    case OP_TYPEMINPLUS:
4034    case OP_TYPEQUERY:
4035    case OP_TYPEMINQUERY:
4036    c = *ecode++ - OP_TYPESTAR;
4037    minimize = (c & 1) != 0;
4038    min = rep_min[c];                 /* Pick up values from tables; */
4039    max = rep_max[c];                 /* zero for max => infinity */
4040    if (max == 0) max = INT_MAX;
4041
4042    /* Common code for all repeated single character type matches. Note that
4043    in UTF-8 mode, '.' matches a character of any length, but for the other
4044    character types, the valid characters are all one-byte long. */
4045
4046    REPEATTYPE:
4047    ctype = *ecode++;      /* Code for the character type */
4048
4049#ifdef SUPPORT_UCP
4050    if (ctype == OP_PROP || ctype == OP_NOTPROP)
4051      {
4052      prop_fail_result = ctype == OP_NOTPROP;
4053      prop_type = *ecode++;
4054      prop_value = *ecode++;
4055      }
4056    else prop_type = -1;
4057#endif
4058
4059    /* First, ensure the minimum number of matches are present. Use inline
4060    code for maximizing the speed, and do the type test once at the start
4061    (i.e. keep it out of the loop). Separate the UTF-8 code completely as that
4062    is tidier. Also separate the UCP code, which can be the same for both UTF-8
4063    and single-bytes. */
4064
4065    if (min > 0)
4066      {
4067#ifdef SUPPORT_UCP
4068      if (prop_type >= 0)
4069        {
4070        switch(prop_type)
4071          {
4072          case PT_ANY:
4073          if (prop_fail_result) RRETURN(MATCH_NOMATCH);
4074          for (i = 1; i <= min; i++)
4075            {
4076            if (eptr >= md->end_subject)
4077              {
4078              SCHECK_PARTIAL();
4079              RRETURN(MATCH_NOMATCH);
4080              }
4081            GETCHARINCTEST(c, eptr);
4082            }
4083          break;
4084
4085          case PT_LAMP:
4086          for (i = 1; i <= min; i++)
4087            {
4088            int chartype;
4089            if (eptr >= md->end_subject)
4090              {
4091              SCHECK_PARTIAL();
4092              RRETURN(MATCH_NOMATCH);
4093              }
4094            GETCHARINCTEST(c, eptr);
4095            chartype = UCD_CHARTYPE(c);
4096            if ((chartype == ucp_Lu ||
4097                 chartype == ucp_Ll ||
4098                 chartype == ucp_Lt) == prop_fail_result)
4099              RRETURN(MATCH_NOMATCH);
4100            }
4101          break;
4102
4103          case PT_GC:
4104          for (i = 1; i <= min; i++)
4105            {
4106            if (eptr >= md->end_subject)
4107              {
4108              SCHECK_PARTIAL();
4109              RRETURN(MATCH_NOMATCH);
4110              }
4111            GETCHARINCTEST(c, eptr);
4112            if ((UCD_CATEGORY(c) == prop_value) == prop_fail_result)
4113              RRETURN(MATCH_NOMATCH);
4114            }
4115          break;
4116
4117          case PT_PC:
4118          for (i = 1; i <= min; i++)
4119            {
4120            if (eptr >= md->end_subject)
4121              {
4122              SCHECK_PARTIAL();
4123              RRETURN(MATCH_NOMATCH);
4124              }
4125            GETCHARINCTEST(c, eptr);
4126            if ((UCD_CHARTYPE(c) == prop_value) == prop_fail_result)
4127              RRETURN(MATCH_NOMATCH);
4128            }
4129          break;
4130
4131          case PT_SC:
4132          for (i = 1; i <= min; i++)
4133            {
4134            if (eptr >= md->end_subject)
4135              {
4136              SCHECK_PARTIAL();
4137              RRETURN(MATCH_NOMATCH);
4138              }
4139            GETCHARINCTEST(c, eptr);
4140            if ((UCD_SCRIPT(c) == prop_value) == prop_fail_result)
4141              RRETURN(MATCH_NOMATCH);
4142            }
4143          break;
4144
4145          case PT_ALNUM:
4146          for (i = 1; i <= min; i++)
4147            {
4148            int category;
4149            if (eptr >= md->end_subject)
4150              {
4151              SCHECK_PARTIAL();
4152              RRETURN(MATCH_NOMATCH);
4153              }
4154            GETCHARINCTEST(c, eptr);
4155            category = UCD_CATEGORY(c);
4156            if ((category == ucp_L || category == ucp_N) == prop_fail_result)
4157              RRETURN(MATCH_NOMATCH);
4158            }
4159          break;
4160
4161          case PT_SPACE:    /* Perl space */
4162          for (i = 1; i <= min; i++)
4163            {
4164            if (eptr >= md->end_subject)
4165              {
4166              SCHECK_PARTIAL();
4167              RRETURN(MATCH_NOMATCH);
4168              }
4169            GETCHARINCTEST(c, eptr);
4170            if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
4171                 c == CHAR_FF || c == CHAR_CR)
4172                   == prop_fail_result)
4173              RRETURN(MATCH_NOMATCH);
4174            }
4175          break;
4176
4177          case PT_PXSPACE:  /* POSIX space */
4178          for (i = 1; i <= min; i++)
4179            {
4180            if (eptr >= md->end_subject)
4181              {
4182              SCHECK_PARTIAL();
4183              RRETURN(MATCH_NOMATCH);
4184              }
4185            GETCHARINCTEST(c, eptr);
4186            if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
4187                 c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)
4188                   == prop_fail_result)
4189              RRETURN(MATCH_NOMATCH);
4190            }
4191          break;
4192
4193          case PT_WORD:
4194          for (i = 1; i <= min; i++)
4195            {
4196            int category;
4197            if (eptr >= md->end_subject)
4198              {
4199              SCHECK_PARTIAL();
4200              RRETURN(MATCH_NOMATCH);
4201              }
4202            GETCHARINCTEST(c, eptr);
4203            category = UCD_CATEGORY(c);
4204            if ((category == ucp_L || category == ucp_N || c == CHAR_UNDERSCORE)
4205                   == prop_fail_result)
4206              RRETURN(MATCH_NOMATCH);
4207            }
4208          break;
4209
4210          /* This should not occur */
4211
4212          default:
4213          RRETURN(PCRE_ERROR_INTERNAL);
4214          }
4215        }
4216
4217      /* Match extended Unicode sequences. We will get here only if the
4218      support is in the binary; otherwise a compile-time error occurs. */
4219
4220      else if (ctype == OP_EXTUNI)
4221        {
4222        for (i = 1; i <= min; i++)
4223          {
4224          if (eptr >= md->end_subject)
4225            {
4226            SCHECK_PARTIAL();
4227            RRETURN(MATCH_NOMATCH);
4228            }
4229          GETCHARINCTEST(c, eptr);
4230          if (UCD_CATEGORY(c) == ucp_M) RRETURN(MATCH_NOMATCH);
4231          while (eptr < md->end_subject)
4232            {
4233            int len = 1;
4234            if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
4235            if (UCD_CATEGORY(c) != ucp_M) break;
4236            eptr += len;
4237            }
4238          CHECK_PARTIAL();
4239          }
4240        }
4241
4242      else
4243#endif     /* SUPPORT_UCP */
4244
4245/* Handle all other cases when the coding is UTF-8 */
4246
4247#ifdef SUPPORT_UTF
4248      if (utf) switch(ctype)
4249        {
4250        case OP_ANY:
4251        for (i = 1; i <= min; i++)
4252          {
4253          if (eptr >= md->end_subject)
4254            {
4255            SCHECK_PARTIAL();
4256            RRETURN(MATCH_NOMATCH);
4257            }
4258          if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
4259          if (md->partial != 0 &&
4260              eptr + 1 >= md->end_subject &&
4261              NLBLOCK->nltype == NLTYPE_FIXED &&
4262              NLBLOCK->nllen == 2 &&
4263              *eptr == NLBLOCK->nl[0])
4264            {
4265            md->hitend = TRUE;
4266            if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
4267            }
4268          eptr++;
4269          ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
4270          }
4271        break;
4272
4273        case OP_ALLANY:
4274        for (i = 1; i <= min; i++)
4275          {
4276          if (eptr >= md->end_subject)
4277            {
4278            SCHECK_PARTIAL();
4279            RRETURN(MATCH_NOMATCH);
4280            }
4281          eptr++;
4282          ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
4283          }
4284        break;
4285
4286        case OP_ANYBYTE:
4287        if (eptr > md->end_subject - min) RRETURN(MATCH_NOMATCH);
4288        eptr += min;
4289        break;
4290
4291        case OP_ANYNL:
4292        for (i = 1; i <= min; i++)
4293          {
4294          if (eptr >= md->end_subject)
4295            {
4296            SCHECK_PARTIAL();
4297            RRETURN(MATCH_NOMATCH);
4298            }
4299          GETCHARINC(c, eptr);
4300          switch(c)
4301            {
4302            default: RRETURN(MATCH_NOMATCH);
4303
4304            case 0x000d:
4305            if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
4306            break;
4307
4308            case 0x000a:
4309            break;
4310
4311            case 0x000b:
4312            case 0x000c:
4313            case 0x0085:
4314            case 0x2028:
4315            case 0x2029:
4316            if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
4317            break;
4318            }
4319          }
4320        break;
4321
4322        case OP_NOT_HSPACE:
4323        for (i = 1; i <= min; i++)
4324          {
4325          if (eptr >= md->end_subject)
4326            {
4327            SCHECK_PARTIAL();
4328            RRETURN(MATCH_NOMATCH);
4329            }
4330          GETCHARINC(c, eptr);
4331          switch(c)
4332            {
4333            default: break;
4334            case 0x09:      /* HT */
4335            case 0x20:      /* SPACE */
4336            case 0xa0:      /* NBSP */
4337            case 0x1680:    /* OGHAM SPACE MARK */
4338            case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
4339            case 0x2000:    /* EN QUAD */
4340            case 0x2001:    /* EM QUAD */
4341            case 0x2002:    /* EN SPACE */
4342            case 0x2003:    /* EM SPACE */
4343            case 0x2004:    /* THREE-PER-EM SPACE */
4344            case 0x2005:    /* FOUR-PER-EM SPACE */
4345            case 0x2006:    /* SIX-PER-EM SPACE */
4346            case 0x2007:    /* FIGURE SPACE */
4347            case 0x2008:    /* PUNCTUATION SPACE */
4348            case 0x2009:    /* THIN SPACE */
4349            case 0x200A:    /* HAIR SPACE */
4350            case 0x202f:    /* NARROW NO-BREAK SPACE */
4351            case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
4352            case 0x3000:    /* IDEOGRAPHIC SPACE */
4353            RRETURN(MATCH_NOMATCH);
4354            }
4355          }
4356        break;
4357
4358        case OP_HSPACE:
4359        for (i = 1; i <= min; i++)
4360          {
4361          if (eptr >= md->end_subject)
4362            {
4363            SCHECK_PARTIAL();
4364            RRETURN(MATCH_NOMATCH);
4365            }
4366          GETCHARINC(c, eptr);
4367          switch(c)
4368            {
4369            default: RRETURN(MATCH_NOMATCH);
4370            case 0x09:      /* HT */
4371            case 0x20:      /* SPACE */
4372            case 0xa0:      /* NBSP */
4373            case 0x1680:    /* OGHAM SPACE MARK */
4374            case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
4375            case 0x2000:    /* EN QUAD */
4376            case 0x2001:    /* EM QUAD */
4377            case 0x2002:    /* EN SPACE */
4378            case 0x2003:    /* EM SPACE */
4379            case 0x2004:    /* THREE-PER-EM SPACE */
4380            case 0x2005:    /* FOUR-PER-EM SPACE */
4381            case 0x2006:    /* SIX-PER-EM SPACE */
4382            case 0x2007:    /* FIGURE SPACE */
4383            case 0x2008:    /* PUNCTUATION SPACE */
4384            case 0x2009:    /* THIN SPACE */
4385            case 0x200A:    /* HAIR SPACE */
4386            case 0x202f:    /* NARROW NO-BREAK SPACE */
4387            case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
4388            case 0x3000:    /* IDEOGRAPHIC SPACE */
4389            break;
4390            }
4391          }
4392        break;
4393
4394        case OP_NOT_VSPACE:
4395        for (i = 1; i <= min; i++)
4396          {
4397          if (eptr >= md->end_subject)
4398            {
4399            SCHECK_PARTIAL();
4400            RRETURN(MATCH_NOMATCH);
4401            }
4402          GETCHARINC(c, eptr);
4403          switch(c)
4404            {
4405            default: break;
4406            case 0x0a:      /* LF */
4407            case 0x0b:      /* VT */
4408            case 0x0c:      /* FF */
4409            case 0x0d:      /* CR */
4410            case 0x85:      /* NEL */
4411            case 0x2028:    /* LINE SEPARATOR */
4412            case 0x2029:    /* PARAGRAPH SEPARATOR */
4413            RRETURN(MATCH_NOMATCH);
4414            }
4415          }
4416        break;
4417
4418        case OP_VSPACE:
4419        for (i = 1; i <= min; i++)
4420          {
4421          if (eptr >= md->end_subject)
4422            {
4423            SCHECK_PARTIAL();
4424            RRETURN(MATCH_NOMATCH);
4425            }
4426          GETCHARINC(c, eptr);
4427          switch(c)
4428            {
4429            default: RRETURN(MATCH_NOMATCH);
4430            case 0x0a:      /* LF */
4431            case 0x0b:      /* VT */
4432            case 0x0c:      /* FF */
4433            case 0x0d:      /* CR */
4434            case 0x85:      /* NEL */
4435            case 0x2028:    /* LINE SEPARATOR */
4436            case 0x2029:    /* PARAGRAPH SEPARATOR */
4437            break;
4438            }
4439          }
4440        break;
4441
4442        case OP_NOT_DIGIT:
4443        for (i = 1; i <= min; i++)
4444          {
4445          if (eptr >= md->end_subject)
4446            {
4447            SCHECK_PARTIAL();
4448            RRETURN(MATCH_NOMATCH);
4449            }
4450          GETCHARINC(c, eptr);
4451          if (c < 128 && (md->ctypes[c] & ctype_digit) != 0)
4452            RRETURN(MATCH_NOMATCH);
4453          }
4454        break;
4455
4456        case OP_DIGIT:
4457        for (i = 1; i <= min; i++)
4458          {
4459          if (eptr >= md->end_subject)
4460            {
4461            SCHECK_PARTIAL();
4462            RRETURN(MATCH_NOMATCH);
4463            }
4464          if (*eptr >= 128 || (md->ctypes[*eptr] & ctype_digit) == 0)
4465            RRETURN(MATCH_NOMATCH);
4466          eptr++;
4467          /* No need to skip more bytes - we know it's a 1-byte character */
4468          }
4469        break;
4470
4471        case OP_NOT_WHITESPACE:
4472        for (i = 1; i <= min; i++)
4473          {
4474          if (eptr >= md->end_subject)
4475            {
4476            SCHECK_PARTIAL();
4477            RRETURN(MATCH_NOMATCH);
4478            }
4479          if (*eptr < 128 && (md->ctypes[*eptr] & ctype_space) != 0)
4480            RRETURN(MATCH_NOMATCH);
4481          eptr++;
4482          ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
4483          }
4484        break;
4485
4486        case OP_WHITESPACE:
4487        for (i = 1; i <= min; i++)
4488          {
4489          if (eptr >= md->end_subject)
4490            {
4491            SCHECK_PARTIAL();
4492            RRETURN(MATCH_NOMATCH);
4493            }
4494          if (*eptr >= 128 || (md->ctypes[*eptr] & ctype_space) == 0)
4495            RRETURN(MATCH_NOMATCH);
4496          eptr++;
4497          /* No need to skip more bytes - we know it's a 1-byte character */
4498          }
4499        break;
4500
4501        case OP_NOT_WORDCHAR:
4502        for (i = 1; i <= min; i++)
4503          {
4504          if (eptr >= md->end_subject)
4505            {
4506            SCHECK_PARTIAL();
4507            RRETURN(MATCH_NOMATCH);
4508            }
4509          if (*eptr < 128 && (md->ctypes[*eptr] & ctype_word) != 0)
4510            RRETURN(MATCH_NOMATCH);
4511          eptr++;
4512          ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
4513          }
4514        break;
4515
4516        case OP_WORDCHAR:
4517        for (i = 1; i <= min; i++)
4518          {
4519          if (eptr >= md->end_subject)
4520            {
4521            SCHECK_PARTIAL();
4522            RRETURN(MATCH_NOMATCH);
4523            }
4524          if (*eptr >= 128 || (md->ctypes[*eptr] & ctype_word) == 0)
4525            RRETURN(MATCH_NOMATCH);
4526          eptr++;
4527          /* No need to skip more bytes - we know it's a 1-byte character */
4528          }
4529        break;
4530
4531        default:
4532        RRETURN(PCRE_ERROR_INTERNAL);
4533        }  /* End switch(ctype) */
4534
4535      else
4536#endif     /* SUPPORT_UTF */
4537
4538      /* Code for the non-UTF-8 case for minimum matching of operators other
4539      than OP_PROP and OP_NOTPROP. */
4540
4541      switch(ctype)
4542        {
4543        case OP_ANY:
4544        for (i = 1; i <= min; i++)
4545          {
4546          if (eptr >= md->end_subject)
4547            {
4548            SCHECK_PARTIAL();
4549            RRETURN(MATCH_NOMATCH);
4550            }
4551          if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
4552          if (md->partial != 0 &&
4553              eptr + 1 >= md->end_subject &&
4554              NLBLOCK->nltype == NLTYPE_FIXED &&
4555              NLBLOCK->nllen == 2 &&
4556              *eptr == NLBLOCK->nl[0])
4557            {
4558            md->hitend = TRUE;
4559            if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
4560            }
4561          eptr++;
4562          }
4563        break;
4564
4565        case OP_ALLANY:
4566        if (eptr > md->end_subject - min)
4567          {
4568          SCHECK_PARTIAL();
4569          RRETURN(MATCH_NOMATCH);
4570          }
4571        eptr += min;
4572        break;
4573
4574        case OP_ANYBYTE:
4575        if (eptr > md->end_subject - min)
4576          {
4577          SCHECK_PARTIAL();
4578          RRETURN(MATCH_NOMATCH);
4579          }
4580        eptr += min;
4581        break;
4582
4583        case OP_ANYNL:
4584        for (i = 1; i <= min; i++)
4585          {
4586          if (eptr >= md->end_subject)
4587            {
4588            SCHECK_PARTIAL();
4589            RRETURN(MATCH_NOMATCH);
4590            }
4591          switch(*eptr++)
4592            {
4593            default: RRETURN(MATCH_NOMATCH);
4594
4595            case 0x000d:
4596            if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
4597            break;
4598
4599            case 0x000a:
4600            break;
4601
4602            case 0x000b:
4603            case 0x000c:
4604            case 0x0085:
4605#ifdef COMPILE_PCRE16
4606            case 0x2028:
4607            case 0x2029:
4608#endif
4609            if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
4610            break;
4611            }
4612          }
4613        break;
4614
4615        case OP_NOT_HSPACE:
4616        for (i = 1; i <= min; i++)
4617          {
4618          if (eptr >= md->end_subject)
4619            {
4620            SCHECK_PARTIAL();
4621            RRETURN(MATCH_NOMATCH);
4622            }
4623          switch(*eptr++)
4624            {
4625            default: break;
4626            case 0x09:      /* HT */
4627            case 0x20:      /* SPACE */
4628            case 0xa0:      /* NBSP */
4629#ifdef COMPILE_PCRE16
4630            case 0x1680:    /* OGHAM SPACE MARK */
4631            case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
4632            case 0x2000:    /* EN QUAD */
4633            case 0x2001:    /* EM QUAD */
4634            case 0x2002:    /* EN SPACE */
4635            case 0x2003:    /* EM SPACE */
4636            case 0x2004:    /* THREE-PER-EM SPACE */
4637            case 0x2005:    /* FOUR-PER-EM SPACE */
4638            case 0x2006:    /* SIX-PER-EM SPACE */
4639            case 0x2007:    /* FIGURE SPACE */
4640            case 0x2008:    /* PUNCTUATION SPACE */
4641            case 0x2009:    /* THIN SPACE */
4642            case 0x200A:    /* HAIR SPACE */
4643            case 0x202f:    /* NARROW NO-BREAK SPACE */
4644            case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
4645            case 0x3000:    /* IDEOGRAPHIC SPACE */
4646#endif
4647            RRETURN(MATCH_NOMATCH);
4648            }
4649          }
4650        break;
4651
4652        case OP_HSPACE:
4653        for (i = 1; i <= min; i++)
4654          {
4655          if (eptr >= md->end_subject)
4656            {
4657            SCHECK_PARTIAL();
4658            RRETURN(MATCH_NOMATCH);
4659            }
4660          switch(*eptr++)
4661            {
4662            default: RRETURN(MATCH_NOMATCH);
4663            case 0x09:      /* HT */
4664            case 0x20:      /* SPACE */
4665            case 0xa0:      /* NBSP */
4666#ifdef COMPILE_PCRE16
4667            case 0x1680:    /* OGHAM SPACE MARK */
4668            case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
4669            case 0x2000:    /* EN QUAD */
4670            case 0x2001:    /* EM QUAD */
4671            case 0x2002:    /* EN SPACE */
4672            case 0x2003:    /* EM SPACE */
4673            case 0x2004:    /* THREE-PER-EM SPACE */
4674            case 0x2005:    /* FOUR-PER-EM SPACE */
4675            case 0x2006:    /* SIX-PER-EM SPACE */
4676            case 0x2007:    /* FIGURE SPACE */
4677            case 0x2008:    /* PUNCTUATION SPACE */
4678            case 0x2009:    /* THIN SPACE */
4679            case 0x200A:    /* HAIR SPACE */
4680            case 0x202f:    /* NARROW NO-BREAK SPACE */
4681            case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
4682            case 0x3000:    /* IDEOGRAPHIC SPACE */
4683#endif
4684            break;
4685            }
4686          }
4687        break;
4688
4689        case OP_NOT_VSPACE:
4690        for (i = 1; i <= min; i++)
4691          {
4692          if (eptr >= md->end_subject)
4693            {
4694            SCHECK_PARTIAL();
4695            RRETURN(MATCH_NOMATCH);
4696            }
4697          switch(*eptr++)
4698            {
4699            default: break;
4700            case 0x0a:      /* LF */
4701            case 0x0b:      /* VT */
4702            case 0x0c:      /* FF */
4703            case 0x0d:      /* CR */
4704            case 0x85:      /* NEL */
4705#ifdef COMPILE_PCRE16
4706            case 0x2028:    /* LINE SEPARATOR */
4707            case 0x2029:    /* PARAGRAPH SEPARATOR */
4708#endif
4709            RRETURN(MATCH_NOMATCH);
4710            }
4711          }
4712        break;
4713
4714        case OP_VSPACE:
4715        for (i = 1; i <= min; i++)
4716          {
4717          if (eptr >= md->end_subject)
4718            {
4719            SCHECK_PARTIAL();
4720            RRETURN(MATCH_NOMATCH);
4721            }
4722          switch(*eptr++)
4723            {
4724            default: RRETURN(MATCH_NOMATCH);
4725            case 0x0a:      /* LF */
4726            case 0x0b:      /* VT */
4727            case 0x0c:      /* FF */
4728            case 0x0d:      /* CR */
4729            case 0x85:      /* NEL */
4730#ifdef COMPILE_PCRE16
4731            case 0x2028:    /* LINE SEPARATOR */
4732            case 0x2029:    /* PARAGRAPH SEPARATOR */
4733#endif
4734            break;
4735            }
4736          }
4737        break;
4738
4739        case OP_NOT_DIGIT:
4740        for (i = 1; i <= min; i++)
4741          {
4742          if (eptr >= md->end_subject)
4743            {
4744            SCHECK_PARTIAL();
4745            RRETURN(MATCH_NOMATCH);
4746            }
4747          if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_digit) != 0)
4748            RRETURN(MATCH_NOMATCH);
4749          eptr++;
4750          }
4751        break;
4752
4753        case OP_DIGIT:
4754        for (i = 1; i <= min; i++)
4755          {
4756          if (eptr >= md->end_subject)
4757            {
4758            SCHECK_PARTIAL();
4759            RRETURN(MATCH_NOMATCH);
4760            }
4761          if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_digit) == 0)
4762            RRETURN(MATCH_NOMATCH);
4763          eptr++;
4764          }
4765        break;
4766
4767        case OP_NOT_WHITESPACE:
4768        for (i = 1; i <= min; i++)
4769          {
4770          if (eptr >= md->end_subject)
4771            {
4772            SCHECK_PARTIAL();
4773            RRETURN(MATCH_NOMATCH);
4774            }
4775          if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_space) != 0)
4776            RRETURN(MATCH_NOMATCH);
4777          eptr++;
4778          }
4779        break;
4780
4781        case OP_WHITESPACE:
4782        for (i = 1; i <= min; i++)
4783          {
4784          if (eptr >= md->end_subject)
4785            {
4786            SCHECK_PARTIAL();
4787            RRETURN(MATCH_NOMATCH);
4788            }
4789          if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_space) == 0)
4790            RRETURN(MATCH_NOMATCH);
4791          eptr++;
4792          }
4793        break;
4794
4795        case OP_NOT_WORDCHAR:
4796        for (i = 1; i <= min; i++)
4797          {
4798          if (eptr >= md->end_subject)
4799            {
4800            SCHECK_PARTIAL();
4801            RRETURN(MATCH_NOMATCH);
4802            }
4803          if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_word) != 0)
4804            RRETURN(MATCH_NOMATCH);
4805          eptr++;
4806          }
4807        break;
4808
4809        case OP_WORDCHAR:
4810        for (i = 1; i <= min; i++)
4811          {
4812          if (eptr >= md->end_subject)
4813            {
4814            SCHECK_PARTIAL();
4815            RRETURN(MATCH_NOMATCH);
4816            }
4817          if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_word) == 0)
4818            RRETURN(MATCH_NOMATCH);
4819          eptr++;
4820          }
4821        break;
4822
4823        default:
4824        RRETURN(PCRE_ERROR_INTERNAL);
4825        }
4826      }
4827
4828    /* If min = max, continue at the same level without recursing */
4829
4830    if (min == max) continue;
4831
4832    /* If minimizing, we have to test the rest of the pattern before each
4833    subsequent match. Again, separate the UTF-8 case for speed, and also
4834    separate the UCP cases. */
4835
4836    if (minimize)
4837      {
4838#ifdef SUPPORT_UCP
4839      if (prop_type >= 0)
4840        {
4841        switch(prop_type)
4842          {
4843          case PT_ANY:
4844          for (fi = min;; fi++)
4845            {
4846            RMATCH(eptr, ecode, offset_top, md, eptrb, RM36);
4847            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4848            if (fi >= max) RRETURN(MATCH_NOMATCH);
4849            if (eptr >= md->end_subject)
4850              {
4851              SCHECK_PARTIAL();
4852              RRETURN(MATCH_NOMATCH);
4853              }
4854            GETCHARINCTEST(c, eptr);
4855            if (prop_fail_result) RRETURN(MATCH_NOMATCH);
4856            }
4857          /* Control never gets here */
4858
4859          case PT_LAMP:
4860          for (fi = min;; fi++)
4861            {
4862            int chartype;
4863            RMATCH(eptr, ecode, offset_top, md, eptrb, RM37);
4864            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4865            if (fi >= max) RRETURN(MATCH_NOMATCH);
4866            if (eptr >= md->end_subject)
4867              {
4868              SCHECK_PARTIAL();
4869              RRETURN(MATCH_NOMATCH);
4870              }
4871            GETCHARINCTEST(c, eptr);
4872            chartype = UCD_CHARTYPE(c);
4873            if ((chartype == ucp_Lu ||
4874                 chartype == ucp_Ll ||
4875                 chartype == ucp_Lt) == prop_fail_result)
4876              RRETURN(MATCH_NOMATCH);
4877            }
4878          /* Control never gets here */
4879
4880          case PT_GC:
4881          for (fi = min;; fi++)
4882            {
4883            RMATCH(eptr, ecode, offset_top, md, eptrb, RM38);
4884            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4885            if (fi >= max) RRETURN(MATCH_NOMATCH);
4886            if (eptr >= md->end_subject)
4887              {
4888              SCHECK_PARTIAL();
4889              RRETURN(MATCH_NOMATCH);
4890              }
4891            GETCHARINCTEST(c, eptr);
4892            if ((UCD_CATEGORY(c) == prop_value) == prop_fail_result)
4893              RRETURN(MATCH_NOMATCH);
4894            }
4895          /* Control never gets here */
4896
4897          case PT_PC:
4898          for (fi = min;; fi++)
4899            {
4900            RMATCH(eptr, ecode, offset_top, md, eptrb, RM39);
4901            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4902            if (fi >= max) RRETURN(MATCH_NOMATCH);
4903            if (eptr >= md->end_subject)
4904              {
4905              SCHECK_PARTIAL();
4906              RRETURN(MATCH_NOMATCH);
4907              }
4908            GETCHARINCTEST(c, eptr);
4909            if ((UCD_CHARTYPE(c) == prop_value) == prop_fail_result)
4910              RRETURN(MATCH_NOMATCH);
4911            }
4912          /* Control never gets here */
4913
4914          case PT_SC:
4915          for (fi = min;; fi++)
4916            {
4917            RMATCH(eptr, ecode, offset_top, md, eptrb, RM40);
4918            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4919            if (fi >= max) RRETURN(MATCH_NOMATCH);
4920            if (eptr >= md->end_subject)
4921              {
4922              SCHECK_PARTIAL();
4923              RRETURN(MATCH_NOMATCH);
4924              }
4925            GETCHARINCTEST(c, eptr);
4926            if ((UCD_SCRIPT(c) == prop_value) == prop_fail_result)
4927              RRETURN(MATCH_NOMATCH);
4928            }
4929          /* Control never gets here */
4930
4931          case PT_ALNUM:
4932          for (fi = min;; fi++)
4933            {
4934            int category;
4935            RMATCH(eptr, ecode, offset_top, md, eptrb, RM59);
4936            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4937            if (fi >= max) RRETURN(MATCH_NOMATCH);
4938            if (eptr >= md->end_subject)
4939              {
4940              SCHECK_PARTIAL();
4941              RRETURN(MATCH_NOMATCH);
4942              }
4943            GETCHARINCTEST(c, eptr);
4944            category = UCD_CATEGORY(c);
4945            if ((category == ucp_L || category == ucp_N) == prop_fail_result)
4946              RRETURN(MATCH_NOMATCH);
4947            }
4948          /* Control never gets here */
4949
4950          case PT_SPACE:    /* Perl space */
4951          for (fi = min;; fi++)
4952            {
4953            RMATCH(eptr, ecode, offset_top, md, eptrb, RM60);
4954            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4955            if (fi >= max) RRETURN(MATCH_NOMATCH);
4956            if (eptr >= md->end_subject)
4957              {
4958              SCHECK_PARTIAL();
4959              RRETURN(MATCH_NOMATCH);
4960              }
4961            GETCHARINCTEST(c, eptr);
4962            if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
4963                 c == CHAR_FF || c == CHAR_CR)
4964                   == prop_fail_result)
4965              RRETURN(MATCH_NOMATCH);
4966            }
4967          /* Control never gets here */
4968
4969          case PT_PXSPACE:  /* POSIX space */
4970          for (fi = min;; fi++)
4971            {
4972            RMATCH(eptr, ecode, offset_top, md, eptrb, RM61);
4973            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4974            if (fi >= max) RRETURN(MATCH_NOMATCH);
4975            if (eptr >= md->end_subject)
4976              {
4977              SCHECK_PARTIAL();
4978              RRETURN(MATCH_NOMATCH);
4979              }
4980            GETCHARINCTEST(c, eptr);
4981            if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
4982                 c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)
4983                   == prop_fail_result)
4984              RRETURN(MATCH_NOMATCH);
4985            }
4986          /* Control never gets here */
4987
4988          case PT_WORD:
4989          for (fi = min;; fi++)
4990            {
4991            int category;
4992            RMATCH(eptr, ecode, offset_top, md, eptrb, RM62);
4993            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4994            if (fi >= max) RRETURN(MATCH_NOMATCH);
4995            if (eptr >= md->end_subject)
4996              {
4997              SCHECK_PARTIAL();
4998              RRETURN(MATCH_NOMATCH);
4999              }
5000            GETCHARINCTEST(c, eptr);
5001            category = UCD_CATEGORY(c);
5002            if ((category == ucp_L ||
5003                 category == ucp_N ||
5004                 c == CHAR_UNDERSCORE)
5005                   == prop_fail_result)
5006              RRETURN(MATCH_NOMATCH);
5007            }
5008          /* Control never gets here */
5009
5010          /* This should never occur */
5011
5012          default:
5013          RRETURN(PCRE_ERROR_INTERNAL);
5014          }
5015        }
5016
5017      /* Match extended Unicode sequences. We will get here only if the
5018      support is in the binary; otherwise a compile-time error occurs. */
5019
5020      else if (ctype == OP_EXTUNI)
5021        {
5022        for (fi = min;; fi++)
5023          {
5024          RMATCH(eptr, ecode, offset_top, md, eptrb, RM41);
5025          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5026          if (fi >= max) RRETURN(MATCH_NOMATCH);
5027          if (eptr >= md->end_subject)
5028            {
5029            SCHECK_PARTIAL();
5030            RRETURN(MATCH_NOMATCH);
5031            }
5032          GETCHARINCTEST(c, eptr);
5033          if (UCD_CATEGORY(c) == ucp_M) RRETURN(MATCH_NOMATCH);
5034          while (eptr < md->end_subject)
5035            {
5036            int len = 1;
5037            if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
5038            if (UCD_CATEGORY(c) != ucp_M) break;
5039            eptr += len;
5040            }
5041          CHECK_PARTIAL();
5042          }
5043        }
5044      else
5045#endif     /* SUPPORT_UCP */
5046
5047#ifdef SUPPORT_UTF
5048      if (utf)
5049        {
5050        for (fi = min;; fi++)
5051          {
5052          RMATCH(eptr, ecode, offset_top, md, eptrb, RM42);
5053          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5054          if (fi >= max) RRETURN(MATCH_NOMATCH);
5055          if (eptr >= md->end_subject)
5056            {
5057            SCHECK_PARTIAL();
5058            RRETURN(MATCH_NOMATCH);
5059            }
5060          if (ctype == OP_ANY && IS_NEWLINE(eptr))
5061            RRETURN(MATCH_NOMATCH);
5062          GETCHARINC(c, eptr);
5063          switch(ctype)
5064            {
5065            case OP_ANY:               /* This is the non-NL case */
5066            if (md->partial != 0 &&    /* Take care with CRLF partial */
5067                eptr >= md->end_subject &&
5068                NLBLOCK->nltype == NLTYPE_FIXED &&
5069                NLBLOCK->nllen == 2 &&
5070                c == NLBLOCK->nl[0])
5071              {
5072              md->hitend = TRUE;
5073              if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
5074              }
5075            break;
5076
5077            case OP_ALLANY:
5078            case OP_ANYBYTE:
5079            break;
5080
5081            case OP_ANYNL:
5082            switch(c)
5083              {
5084              default: RRETURN(MATCH_NOMATCH);
5085              case 0x000d:
5086              if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
5087              break;
5088              case 0x000a:
5089              break;
5090
5091              case 0x000b:
5092              case 0x000c:
5093              case 0x0085:
5094              case 0x2028:
5095              case 0x2029:
5096              if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
5097              break;
5098              }
5099            break;
5100
5101            case OP_NOT_HSPACE:
5102            switch(c)
5103              {
5104              default: break;
5105              case 0x09:      /* HT */
5106              case 0x20:      /* SPACE */
5107              case 0xa0:      /* NBSP */
5108              case 0x1680:    /* OGHAM SPACE MARK */
5109              case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
5110              case 0x2000:    /* EN QUAD */
5111              case 0x2001:    /* EM QUAD */
5112              case 0x2002:    /* EN SPACE */
5113              case 0x2003:    /* EM SPACE */
5114              case 0x2004:    /* THREE-PER-EM SPACE */
5115              case 0x2005:    /* FOUR-PER-EM SPACE */
5116              case 0x2006:    /* SIX-PER-EM SPACE */
5117              case 0x2007:    /* FIGURE SPACE */
5118              case 0x2008:    /* PUNCTUATION SPACE */
5119              case 0x2009:    /* THIN SPACE */
5120              case 0x200A:    /* HAIR SPACE */
5121              case 0x202f:    /* NARROW NO-BREAK SPACE */
5122              case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
5123              case 0x3000:    /* IDEOGRAPHIC SPACE */
5124              RRETURN(MATCH_NOMATCH);
5125              }
5126            break;
5127
5128            case OP_HSPACE:
5129            switch(c)
5130              {
5131              default: RRETURN(MATCH_NOMATCH);
5132              case 0x09:      /* HT */
5133              case 0x20:      /* SPACE */
5134              case 0xa0:      /* NBSP */
5135              case 0x1680:    /* OGHAM SPACE MARK */
5136              case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
5137              case 0x2000:    /* EN QUAD */
5138              case 0x2001:    /* EM QUAD */
5139              case 0x2002:    /* EN SPACE */
5140              case 0x2003:    /* EM SPACE */
5141              case 0x2004:    /* THREE-PER-EM SPACE */
5142              case 0x2005:    /* FOUR-PER-EM SPACE */
5143              case 0x2006:    /* SIX-PER-EM SPACE */
5144              case 0x2007:    /* FIGURE SPACE */
5145              case 0x2008:    /* PUNCTUATION SPACE */
5146              case 0x2009:    /* THIN SPACE */
5147              case 0x200A:    /* HAIR SPACE */
5148              case 0x202f:    /* NARROW NO-BREAK SPACE */
5149              case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
5150              case 0x3000:    /* IDEOGRAPHIC SPACE */
5151              break;
5152              }
5153            break;
5154
5155            case OP_NOT_VSPACE:
5156            switch(c)
5157              {
5158              default: break;
5159              case 0x0a:      /* LF */
5160              case 0x0b:      /* VT */
5161              case 0x0c:      /* FF */
5162              case 0x0d:      /* CR */
5163              case 0x85:      /* NEL */
5164              case 0x2028:    /* LINE SEPARATOR */
5165              case 0x2029:    /* PARAGRAPH SEPARATOR */
5166              RRETURN(MATCH_NOMATCH);
5167              }
5168            break;
5169
5170            case OP_VSPACE:
5171            switch(c)
5172              {
5173              default: RRETURN(MATCH_NOMATCH);
5174              case 0x0a:      /* LF */
5175              case 0x0b:      /* VT */
5176              case 0x0c:      /* FF */
5177              case 0x0d:      /* CR */
5178              case 0x85:      /* NEL */
5179              case 0x2028:    /* LINE SEPARATOR */
5180              case 0x2029:    /* PARAGRAPH SEPARATOR */
5181              break;
5182              }
5183            break;
5184
5185            case OP_NOT_DIGIT:
5186            if (c < 256 && (md->ctypes[c] & ctype_digit) != 0)
5187              RRETURN(MATCH_NOMATCH);
5188            break;
5189
5190            case OP_DIGIT:
5191            if (c >= 256 || (md->ctypes[c] & ctype_digit) == 0)
5192              RRETURN(MATCH_NOMATCH);
5193            break;
5194
5195            case OP_NOT_WHITESPACE:
5196            if (c < 256 && (md->ctypes[c] & ctype_space) != 0)
5197              RRETURN(MATCH_NOMATCH);
5198            break;
5199
5200            case OP_WHITESPACE:
5201            if (c >= 256 || (md->ctypes[c] & ctype_space) == 0)
5202              RRETURN(MATCH_NOMATCH);
5203            break;
5204
5205            case OP_NOT_WORDCHAR:
5206            if (c < 256 && (md->ctypes[c] & ctype_word) != 0)
5207              RRETURN(MATCH_NOMATCH);
5208            break;
5209
5210            case OP_WORDCHAR:
5211            if (c >= 256 || (md->ctypes[c] & ctype_word) == 0)
5212              RRETURN(MATCH_NOMATCH);
5213            break;
5214
5215            default:
5216            RRETURN(PCRE_ERROR_INTERNAL);
5217            }
5218          }
5219        }
5220      else
5221#endif
5222      /* Not UTF mode */
5223        {
5224        for (fi = min;; fi++)
5225          {
5226          RMATCH(eptr, ecode, offset_top, md, eptrb, RM43);
5227          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5228          if (fi >= max) RRETURN(MATCH_NOMATCH);
5229          if (eptr >= md->end_subject)
5230            {
5231            SCHECK_PARTIAL();
5232            RRETURN(MATCH_NOMATCH);
5233            }
5234          if (ctype == OP_ANY && IS_NEWLINE(eptr))
5235            RRETURN(MATCH_NOMATCH);
5236          c = *eptr++;
5237          switch(ctype)
5238            {
5239            case OP_ANY:               /* This is the non-NL case */
5240            if (md->partial != 0 &&    /* Take care with CRLF partial */
5241                eptr >= md->end_subject &&
5242                NLBLOCK->nltype == NLTYPE_FIXED &&
5243                NLBLOCK->nllen == 2 &&
5244                c == NLBLOCK->nl[0])
5245              {
5246              md->hitend = TRUE;
5247              if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
5248              }
5249            break;
5250
5251            case OP_ALLANY:
5252            case OP_ANYBYTE:
5253            break;
5254
5255            case OP_ANYNL:
5256            switch(c)
5257              {
5258              default: RRETURN(MATCH_NOMATCH);
5259              case 0x000d:
5260              if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
5261              break;
5262
5263              case 0x000a:
5264              break;
5265
5266              case 0x000b:
5267              case 0x000c:
5268              case 0x0085:
5269#ifdef COMPILE_PCRE16
5270              case 0x2028:
5271              case 0x2029:
5272#endif
5273              if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
5274              break;
5275              }
5276            break;
5277
5278            case OP_NOT_HSPACE:
5279            switch(c)
5280              {
5281              default: break;
5282              case 0x09:      /* HT */
5283              case 0x20:      /* SPACE */
5284              case 0xa0:      /* NBSP */
5285#ifdef COMPILE_PCRE16
5286              case 0x1680:    /* OGHAM SPACE MARK */
5287              case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
5288              case 0x2000:    /* EN QUAD */
5289              case 0x2001:    /* EM QUAD */
5290              case 0x2002:    /* EN SPACE */
5291              case 0x2003:    /* EM SPACE */
5292              case 0x2004:    /* THREE-PER-EM SPACE */
5293              case 0x2005:    /* FOUR-PER-EM SPACE */
5294              case 0x2006:    /* SIX-PER-EM SPACE */
5295              case 0x2007:    /* FIGURE SPACE */
5296              case 0x2008:    /* PUNCTUATION SPACE */
5297              case 0x2009:    /* THIN SPACE */
5298              case 0x200A:    /* HAIR SPACE */
5299              case 0x202f:    /* NARROW NO-BREAK SPACE */
5300              case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
5301              case 0x3000:    /* IDEOGRAPHIC SPACE */
5302#endif
5303              RRETURN(MATCH_NOMATCH);
5304              }
5305            break;
5306
5307            case OP_HSPACE:
5308            switch(c)
5309              {
5310              default: RRETURN(MATCH_NOMATCH);
5311              case 0x09:      /* HT */
5312              case 0x20:      /* SPACE */
5313              case 0xa0:      /* NBSP */
5314#ifdef COMPILE_PCRE16
5315              case 0x1680:    /* OGHAM SPACE MARK */
5316              case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
5317              case 0x2000:    /* EN QUAD */
5318              case 0x2001:    /* EM QUAD */
5319              case 0x2002:    /* EN SPACE */
5320              case 0x2003:    /* EM SPACE */
5321              case 0x2004:    /* THREE-PER-EM SPACE */
5322              case 0x2005:    /* FOUR-PER-EM SPACE */
5323              case 0x2006:    /* SIX-PER-EM SPACE */
5324              case 0x2007:    /* FIGURE SPACE */
5325              case 0x2008:    /* PUNCTUATION SPACE */
5326              case 0x2009:    /* THIN SPACE */
5327              case 0x200A:    /* HAIR SPACE */
5328              case 0x202f:    /* NARROW NO-BREAK SPACE */
5329              case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
5330              case 0x3000:    /* IDEOGRAPHIC SPACE */
5331#endif
5332              break;
5333              }
5334            break;
5335
5336            case OP_NOT_VSPACE:
5337            switch(c)
5338              {
5339              default: break;
5340              case 0x0a:      /* LF */
5341              case 0x0b:      /* VT */
5342              case 0x0c:      /* FF */
5343              case 0x0d:      /* CR */
5344              case 0x85:      /* NEL */
5345#ifdef COMPILE_PCRE16
5346              case 0x2028:    /* LINE SEPARATOR */
5347              case 0x2029:    /* PARAGRAPH SEPARATOR */
5348#endif
5349              RRETURN(MATCH_NOMATCH);
5350              }
5351            break;
5352
5353            case OP_VSPACE:
5354            switch(c)
5355              {
5356              default: RRETURN(MATCH_NOMATCH);
5357              case 0x0a:      /* LF */
5358              case 0x0b:      /* VT */
5359              case 0x0c:      /* FF */
5360              case 0x0d:      /* CR */
5361              case 0x85:      /* NEL */
5362#ifdef COMPILE_PCRE16
5363              case 0x2028:    /* LINE SEPARATOR */
5364              case 0x2029:    /* PARAGRAPH SEPARATOR */
5365#endif
5366              break;
5367              }
5368            break;
5369
5370            case OP_NOT_DIGIT:
5371            if (MAX_255(c) && (md->ctypes[c] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);
5372            break;
5373
5374            case OP_DIGIT:
5375            if (!MAX_255(c) || (md->ctypes[c] & ctype_digit) == 0) RRETURN(MATCH_NOMATCH);
5376            break;
5377
5378            case OP_NOT_WHITESPACE:
5379            if (MAX_255(c) && (md->ctypes[c] & ctype_space) != 0) RRETURN(MATCH_NOMATCH);
5380            break;
5381
5382            case OP_WHITESPACE:
5383            if (!MAX_255(c) || (md->ctypes[c] & ctype_space) == 0) RRETURN(MATCH_NOMATCH);
5384            break;
5385
5386            case OP_NOT_WORDCHAR:
5387            if (MAX_255(c) && (md->ctypes[c] & ctype_word) != 0) RRETURN(MATCH_NOMATCH);
5388            break;
5389
5390            case OP_WORDCHAR:
5391            if (!MAX_255(c) || (md->ctypes[c] & ctype_word) == 0) RRETURN(MATCH_NOMATCH);
5392            break;
5393
5394            default:
5395            RRETURN(PCRE_ERROR_INTERNAL);
5396            }
5397          }
5398        }
5399      /* Control never gets here */
5400      }
5401
5402    /* If maximizing, it is worth using inline code for speed, doing the type
5403    test once at the start (i.e. keep it out of the loop). Again, keep the
5404    UTF-8 and UCP stuff separate. */
5405
5406    else
5407      {
5408      pp = eptr;  /* Remember where we started */
5409
5410#ifdef SUPPORT_UCP
5411      if (prop_type >= 0)
5412        {
5413        switch(prop_type)
5414          {
5415          case PT_ANY:
5416          for (i = min; i < max; i++)
5417            {
5418            int len = 1;
5419            if (eptr >= md->end_subject)
5420              {
5421              SCHECK_PARTIAL();
5422              break;
5423              }
5424            GETCHARLENTEST(c, eptr, len);
5425            if (prop_fail_result) break;
5426            eptr+= len;
5427            }
5428          break;
5429
5430          case PT_LAMP:
5431          for (i = min; i < max; i++)
5432            {
5433            int chartype;
5434            int len = 1;
5435            if (eptr >= md->end_subject)
5436              {
5437              SCHECK_PARTIAL();
5438              break;
5439              }
5440            GETCHARLENTEST(c, eptr, len);
5441            chartype = UCD_CHARTYPE(c);
5442            if ((chartype == ucp_Lu ||
5443                 chartype == ucp_Ll ||
5444                 chartype == ucp_Lt) == prop_fail_result)
5445              break;
5446            eptr+= len;
5447            }
5448          break;
5449
5450          case PT_GC:
5451          for (i = min; i < max; i++)
5452            {
5453            int len = 1;
5454            if (eptr >= md->end_subject)
5455              {
5456              SCHECK_PARTIAL();
5457              break;
5458              }
5459            GETCHARLENTEST(c, eptr, len);
5460            if ((UCD_CATEGORY(c) == prop_value) == prop_fail_result) break;
5461            eptr+= len;
5462            }
5463          break;
5464
5465          case PT_PC:
5466          for (i = min; i < max; i++)
5467            {
5468            int len = 1;
5469            if (eptr >= md->end_subject)
5470              {
5471              SCHECK_PARTIAL();
5472              break;
5473              }
5474            GETCHARLENTEST(c, eptr, len);
5475            if ((UCD_CHARTYPE(c) == prop_value) == prop_fail_result) break;
5476            eptr+= len;
5477            }
5478          break;
5479
5480          case PT_SC:
5481          for (i = min; i < max; i++)
5482            {
5483            int len = 1;
5484            if (eptr >= md->end_subject)
5485              {
5486              SCHECK_PARTIAL();
5487              break;
5488              }
5489            GETCHARLENTEST(c, eptr, len);
5490            if ((UCD_SCRIPT(c) == prop_value) == prop_fail_result) break;
5491            eptr+= len;
5492            }
5493          break;
5494
5495          case PT_ALNUM:
5496          for (i = min; i < max; i++)
5497            {
5498            int category;
5499            int len = 1;
5500            if (eptr >= md->end_subject)
5501              {
5502              SCHECK_PARTIAL();
5503              break;
5504              }
5505            GETCHARLENTEST(c, eptr, len);
5506            category = UCD_CATEGORY(c);
5507            if ((category == ucp_L || category == ucp_N) == prop_fail_result)
5508              break;
5509            eptr+= len;
5510            }
5511          break;
5512
5513          case PT_SPACE:    /* Perl space */
5514          for (i = min; i < max; i++)
5515            {
5516            int len = 1;
5517            if (eptr >= md->end_subject)
5518              {
5519              SCHECK_PARTIAL();
5520              break;
5521              }
5522            GETCHARLENTEST(c, eptr, len);
5523            if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
5524                 c == CHAR_FF || c == CHAR_CR)
5525                 == prop_fail_result)
5526              break;
5527            eptr+= len;
5528            }
5529          break;
5530
5531          case PT_PXSPACE:  /* POSIX space */
5532          for (i = min; i < max; i++)
5533            {
5534            int len = 1;
5535            if (eptr >= md->end_subject)
5536              {
5537              SCHECK_PARTIAL();
5538              break;
5539              }
5540            GETCHARLENTEST(c, eptr, len);
5541            if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
5542                 c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)
5543                 == prop_fail_result)
5544              break;
5545            eptr+= len;
5546            }
5547          break;
5548
5549          case PT_WORD:
5550          for (i = min; i < max; i++)
5551            {
5552            int category;
5553            int len = 1;
5554            if (eptr >= md->end_subject)
5555              {
5556              SCHECK_PARTIAL();
5557              break;
5558              }
5559            GETCHARLENTEST(c, eptr, len);
5560            category = UCD_CATEGORY(c);
5561            if ((category == ucp_L || category == ucp_N ||
5562                 c == CHAR_UNDERSCORE) == prop_fail_result)
5563              break;
5564            eptr+= len;
5565            }
5566          break;
5567
5568          default:
5569          RRETURN(PCRE_ERROR_INTERNAL);
5570          }
5571
5572        /* eptr is now past the end of the maximum run */
5573
5574        if (possessive) continue;
5575        for(;;)
5576          {
5577          RMATCH(eptr, ecode, offset_top, md, eptrb, RM44);
5578          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5579          if (eptr-- == pp) break;        /* Stop if tried at original pos */
5580          if (utf) BACKCHAR(eptr);
5581          }
5582        }
5583
5584      /* Match extended Unicode sequences. We will get here only if the
5585      support is in the binary; otherwise a compile-time error occurs. */
5586
5587      else if (ctype == OP_EXTUNI)
5588        {
5589        for (i = min; i < max; i++)
5590          {
5591          int len = 1;
5592          if (eptr >= md->end_subject)
5593            {
5594            SCHECK_PARTIAL();
5595            break;
5596            }
5597          if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
5598          if (UCD_CATEGORY(c) == ucp_M) break;
5599          eptr += len;
5600          while (eptr < md->end_subject)
5601            {
5602            len = 1;
5603            if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
5604            if (UCD_CATEGORY(c) != ucp_M) break;
5605            eptr += len;
5606            }
5607          CHECK_PARTIAL();
5608          }
5609
5610        /* eptr is now past the end of the maximum run */
5611
5612        if (possessive) continue;
5613
5614        for(;;)
5615          {
5616          RMATCH(eptr, ecode, offset_top, md, eptrb, RM45);
5617          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5618          if (eptr-- == pp) break;        /* Stop if tried at original pos */
5619          for (;;)                        /* Move back over one extended */
5620            {
5621            if (!utf) c = *eptr; else
5622              {
5623              BACKCHAR(eptr);
5624              GETCHAR(c, eptr);
5625              }
5626            if (UCD_CATEGORY(c) != ucp_M) break;
5627            eptr--;
5628            }
5629          }
5630        }
5631
5632      else
5633#endif   /* SUPPORT_UCP */
5634
5635#ifdef SUPPORT_UTF
5636      if (utf)
5637        {
5638        switch(ctype)
5639          {
5640          case OP_ANY:
5641          if (max < INT_MAX)
5642            {
5643            for (i = min; i < max; i++)
5644              {
5645              if (eptr >= md->end_subject)
5646                {
5647                SCHECK_PARTIAL();
5648                break;
5649                }
5650              if (IS_NEWLINE(eptr)) break;
5651              if (md->partial != 0 &&    /* Take care with CRLF partial */
5652                  eptr + 1 >= md->end_subject &&
5653                  NLBLOCK->nltype == NLTYPE_FIXED &&
5654                  NLBLOCK->nllen == 2 &&
5655                  *eptr == NLBLOCK->nl[0])
5656                {
5657                md->hitend = TRUE;
5658                if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
5659                }
5660              eptr++;
5661              ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
5662              }
5663            }
5664
5665          /* Handle unlimited UTF-8 repeat */
5666
5667          else
5668            {
5669            for (i = min; i < max; i++)
5670              {
5671              if (eptr >= md->end_subject)
5672                {
5673                SCHECK_PARTIAL();
5674                break;
5675                }
5676              if (IS_NEWLINE(eptr)) break;
5677              if (md->partial != 0 &&    /* Take care with CRLF partial */
5678                  eptr + 1 >= md->end_subject &&
5679                  NLBLOCK->nltype == NLTYPE_FIXED &&
5680                  NLBLOCK->nllen == 2 &&
5681                  *eptr == NLBLOCK->nl[0])
5682                {
5683                md->hitend = TRUE;
5684                if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
5685                }
5686              eptr++;
5687              ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
5688              }
5689            }
5690          break;
5691
5692          case OP_ALLANY:
5693          if (max < INT_MAX)
5694            {
5695            for (i = min; i < max; i++)
5696              {
5697              if (eptr >= md->end_subject)
5698                {
5699                SCHECK_PARTIAL();
5700                break;
5701                }
5702              eptr++;
5703              ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
5704              }
5705            }
5706          else
5707            {
5708            eptr = md->end_subject;   /* Unlimited UTF-8 repeat */
5709            SCHECK_PARTIAL();
5710            }
5711          break;
5712
5713          /* The byte case is the same as non-UTF8 */
5714
5715          case OP_ANYBYTE:
5716          c = max - min;
5717          if (c > (unsigned int)(md->end_subject - eptr))
5718            {
5719            eptr = md->end_subject;
5720            SCHECK_PARTIAL();
5721            }
5722          else eptr += c;
5723          break;
5724
5725          case OP_ANYNL:
5726          for (i = min; i < max; i++)
5727            {
5728            int len = 1;
5729            if (eptr >= md->end_subject)
5730              {
5731              SCHECK_PARTIAL();
5732              break;
5733              }
5734            GETCHARLEN(c, eptr, len);
5735            if (c == 0x000d)
5736              {
5737              if (++eptr >= md->end_subject) break;
5738              if (*eptr == 0x000a) eptr++;
5739              }
5740            else
5741              {
5742              if (c != 0x000a &&
5743                  (md->bsr_anycrlf ||
5744                   (c != 0x000b && c != 0x000c &&
5745                    c != 0x0085 && c != 0x2028 && c != 0x2029)))
5746                break;
5747              eptr += len;
5748              }
5749            }
5750          break;
5751
5752          case OP_NOT_HSPACE:
5753          case OP_HSPACE:
5754          for (i = min; i < max; i++)
5755            {
5756            BOOL gotspace;
5757            int len = 1;
5758            if (eptr >= md->end_subject)
5759              {
5760              SCHECK_PARTIAL();
5761              break;
5762              }
5763            GETCHARLEN(c, eptr, len);
5764            switch(c)
5765              {
5766              default: gotspace = FALSE; break;
5767              case 0x09:      /* HT */
5768              case 0x20:      /* SPACE */
5769              case 0xa0:      /* NBSP */
5770              case 0x1680:    /* OGHAM SPACE MARK */
5771              case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
5772              case 0x2000:    /* EN QUAD */
5773              case 0x2001:    /* EM QUAD */
5774              case 0x2002:    /* EN SPACE */
5775              case 0x2003:    /* EM SPACE */
5776              case 0x2004:    /* THREE-PER-EM SPACE */
5777              case 0x2005:    /* FOUR-PER-EM SPACE */
5778              case 0x2006:    /* SIX-PER-EM SPACE */
5779              case 0x2007:    /* FIGURE SPACE */
5780              case 0x2008:    /* PUNCTUATION SPACE */
5781              case 0x2009:    /* THIN SPACE */
5782              case 0x200A:    /* HAIR SPACE */
5783              case 0x202f:    /* NARROW NO-BREAK SPACE */
5784              case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
5785              case 0x3000:    /* IDEOGRAPHIC SPACE */
5786              gotspace = TRUE;
5787              break;
5788              }
5789            if (gotspace == (ctype == OP_NOT_HSPACE)) break;
5790            eptr += len;
5791            }
5792          break;
5793
5794          case OP_NOT_VSPACE:
5795          case OP_VSPACE:
5796          for (i = min; i < max; i++)
5797            {
5798            BOOL gotspace;
5799            int len = 1;
5800            if (eptr >= md->end_subject)
5801              {
5802              SCHECK_PARTIAL();
5803              break;
5804              }
5805            GETCHARLEN(c, eptr, len);
5806            switch(c)
5807              {
5808              default: gotspace = FALSE; break;
5809              case 0x0a:      /* LF */
5810              case 0x0b:      /* VT */
5811              case 0x0c:      /* FF */
5812              case 0x0d:      /* CR */
5813              case 0x85:      /* NEL */
5814              case 0x2028:    /* LINE SEPARATOR */
5815              case 0x2029:    /* PARAGRAPH SEPARATOR */
5816              gotspace = TRUE;
5817              break;
5818              }
5819            if (gotspace == (ctype == OP_NOT_VSPACE)) break;
5820            eptr += len;
5821            }
5822          break;
5823
5824          case OP_NOT_DIGIT:
5825          for (i = min; i < max; i++)
5826            {
5827            int len = 1;
5828            if (eptr >= md->end_subject)
5829              {
5830              SCHECK_PARTIAL();
5831              break;
5832              }
5833            GETCHARLEN(c, eptr, len);
5834            if (c < 256 && (md->ctypes[c] & ctype_digit) != 0) break;
5835            eptr+= len;
5836            }
5837          break;
5838
5839          case OP_DIGIT:
5840          for (i = min; i < max; i++)
5841            {
5842            int len = 1;
5843            if (eptr >= md->end_subject)
5844              {
5845              SCHECK_PARTIAL();
5846              break;
5847              }
5848            GETCHARLEN(c, eptr, len);
5849            if (c >= 256 ||(md->ctypes[c] & ctype_digit) == 0) break;
5850            eptr+= len;
5851            }
5852          break;
5853
5854          case OP_NOT_WHITESPACE:
5855          for (i = min; i < max; i++)
5856            {
5857            int len = 1;
5858            if (eptr >= md->end_subject)
5859              {
5860              SCHECK_PARTIAL();
5861              break;
5862              }
5863            GETCHARLEN(c, eptr, len);
5864            if (c < 256 && (md->ctypes[c] & ctype_space) != 0) break;
5865            eptr+= len;
5866            }
5867          break;
5868
5869          case OP_WHITESPACE:
5870          for (i = min; i < max; i++)
5871            {
5872            int len = 1;
5873            if (eptr >= md->end_subject)
5874              {
5875              SCHECK_PARTIAL();
5876              break;
5877              }
5878            GETCHARLEN(c, eptr, len);
5879            if (c >= 256 ||(md->ctypes[c] & ctype_space) == 0) break;
5880            eptr+= len;
5881            }
5882          break;
5883
5884          case OP_NOT_WORDCHAR:
5885          for (i = min; i < max; i++)
5886            {
5887            int len = 1;
5888            if (eptr >= md->end_subject)
5889              {
5890              SCHECK_PARTIAL();
5891              break;
5892              }
5893            GETCHARLEN(c, eptr, len);
5894            if (c < 256 && (md->ctypes[c] & ctype_word) != 0) break;
5895            eptr+= len;
5896            }
5897          break;
5898
5899          case OP_WORDCHAR:
5900          for (i = min; i < max; i++)
5901            {
5902            int len = 1;
5903            if (eptr >= md->end_subject)
5904              {
5905              SCHECK_PARTIAL();
5906              break;
5907              }
5908            GETCHARLEN(c, eptr, len);
5909            if (c >= 256 || (md->ctypes[c] & ctype_word) == 0) break;
5910            eptr+= len;
5911            }
5912          break;
5913
5914          default:
5915          RRETURN(PCRE_ERROR_INTERNAL);
5916          }
5917
5918        /* eptr is now past the end of the maximum run. If possessive, we are
5919        done (no backing up). Otherwise, match at this position; anything other
5920        than no match is immediately returned. For nomatch, back up one
5921        character, unless we are matching \R and the last thing matched was
5922        \r\n, in which case, back up two bytes. */
5923
5924        if (possessive) continue;
5925        for(;;)
5926          {
5927          RMATCH(eptr, ecode, offset_top, md, eptrb, RM46);
5928          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5929          if (eptr-- == pp) break;        /* Stop if tried at original pos */
5930          BACKCHAR(eptr);
5931          if (ctype == OP_ANYNL && eptr > pp  && *eptr == '\n' &&
5932              eptr[-1] == '\r') eptr--;
5933          }
5934        }
5935      else
5936#endif  /* SUPPORT_UTF */
5937      /* Not UTF mode */
5938        {
5939        switch(ctype)
5940          {
5941          case OP_ANY:
5942          for (i = min; i < max; i++)
5943            {
5944            if (eptr >= md->end_subject)
5945              {
5946              SCHECK_PARTIAL();
5947              break;
5948              }
5949            if (IS_NEWLINE(eptr)) break;
5950            if (md->partial != 0 &&    /* Take care with CRLF partial */
5951                eptr + 1 >= md->end_subject &&
5952                NLBLOCK->nltype == NLTYPE_FIXED &&
5953                NLBLOCK->nllen == 2 &&
5954                *eptr == NLBLOCK->nl[0])
5955              {
5956              md->hitend = TRUE;
5957              if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
5958              }
5959            eptr++;
5960            }
5961          break;
5962
5963          case OP_ALLANY:
5964          case OP_ANYBYTE:
5965          c = max - min;
5966          if (c > (unsigned int)(md->end_subject - eptr))
5967            {
5968            eptr = md->end_subject;
5969            SCHECK_PARTIAL();
5970            }
5971          else eptr += c;
5972          break;
5973
5974          case OP_ANYNL:
5975          for (i = min; i < max; i++)
5976            {
5977            if (eptr >= md->end_subject)
5978              {
5979              SCHECK_PARTIAL();
5980              break;
5981              }
5982            c = *eptr;
5983            if (c == 0x000d)
5984              {
5985              if (++eptr >= md->end_subject) break;
5986              if (*eptr == 0x000a) eptr++;
5987              }
5988            else
5989              {
5990              if (c != 0x000a && (md->bsr_anycrlf ||
5991                (c != 0x000b && c != 0x000c && c != 0x0085
5992#ifdef COMPILE_PCRE16
5993                && c != 0x2028 && c != 0x2029
5994#endif
5995                ))) break;
5996              eptr++;
5997              }
5998            }
5999          break;
6000
6001          case OP_NOT_HSPACE:
6002          for (i = min; i < max; i++)
6003            {
6004            if (eptr >= md->end_subject)
6005              {
6006              SCHECK_PARTIAL();
6007              break;
6008              }
6009            c = *eptr;
6010            if (c == 0x09 || c == 0x20 || c == 0xa0
6011#ifdef COMPILE_PCRE16
6012              || c == 0x1680 || c == 0x180e || (c >= 0x2000 && c <= 0x200A)
6013              || c == 0x202f || c == 0x205f || c == 0x3000
6014#endif
6015              ) break;
6016            eptr++;
6017            }
6018          break;
6019
6020          case OP_HSPACE:
6021          for (i = min; i < max; i++)
6022            {
6023            if (eptr >= md->end_subject)
6024              {
6025              SCHECK_PARTIAL();
6026              break;
6027              }
6028            c = *eptr;
6029            if (c != 0x09 && c != 0x20 && c != 0xa0
6030#ifdef COMPILE_PCRE16
6031              && c != 0x1680 && c != 0x180e && (c < 0x2000 || c > 0x200A)
6032              && c != 0x202f && c != 0x205f && c != 0x3000
6033#endif
6034              ) break;
6035            eptr++;
6036            }
6037          break;
6038
6039          case OP_NOT_VSPACE:
6040          for (i = min; i < max; i++)
6041            {
6042            if (eptr >= md->end_subject)
6043              {
6044              SCHECK_PARTIAL();
6045              break;
6046              }
6047            c = *eptr;
6048            if (c == 0x0a || c == 0x0b || c == 0x0c || c == 0x0d || c == 0x85
6049#ifdef COMPILE_PCRE16
6050              || c == 0x2028 || c == 0x2029
6051#endif
6052              ) break;
6053            eptr++;
6054            }
6055          break;
6056
6057          case OP_VSPACE:
6058          for (i = min; i < max; i++)
6059            {
6060            if (eptr >= md->end_subject)
6061              {
6062              SCHECK_PARTIAL();
6063              break;
6064              }
6065            c = *eptr;
6066            if (c != 0x0a && c != 0x0b && c != 0x0c && c != 0x0d && c != 0x85
6067#ifdef COMPILE_PCRE16
6068              && c != 0x2028 && c != 0x2029
6069#endif
6070              ) break;
6071            eptr++;
6072            }
6073          break;
6074
6075          case OP_NOT_DIGIT:
6076          for (i = min; i < max; i++)
6077            {
6078            if (eptr >= md->end_subject)
6079              {
6080              SCHECK_PARTIAL();
6081              break;
6082              }
6083            if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_digit) != 0) break;
6084            eptr++;
6085            }
6086          break;
6087
6088          case OP_DIGIT:
6089          for (i = min; i < max; i++)
6090            {
6091            if (eptr >= md->end_subject)
6092              {
6093              SCHECK_PARTIAL();
6094              break;
6095              }
6096            if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_digit) == 0) break;
6097            eptr++;
6098            }
6099          break;
6100
6101          case OP_NOT_WHITESPACE:
6102          for (i = min; i < max; i++)
6103            {
6104            if (eptr >= md->end_subject)
6105              {
6106              SCHECK_PARTIAL();
6107              break;
6108              }
6109            if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_space) != 0) break;
6110            eptr++;
6111            }
6112          break;
6113
6114          case OP_WHITESPACE:
6115          for (i = min; i < max; i++)
6116            {
6117            if (eptr >= md->end_subject)
6118              {
6119              SCHECK_PARTIAL();
6120              break;
6121              }
6122            if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_space) == 0) break;
6123            eptr++;
6124            }
6125          break;
6126
6127          case OP_NOT_WORDCHAR:
6128          for (i = min; i < max; i++)
6129            {
6130            if (eptr >= md->end_subject)
6131              {
6132              SCHECK_PARTIAL();
6133              break;
6134              }
6135            if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_word) != 0) break;
6136            eptr++;
6137            }
6138          break;
6139
6140          case OP_WORDCHAR:
6141          for (i = min; i < max; i++)
6142            {
6143            if (eptr >= md->end_subject)
6144              {
6145              SCHECK_PARTIAL();
6146              break;
6147              }
6148            if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_word) == 0) break;
6149            eptr++;
6150            }
6151          break;
6152
6153          default:
6154          RRETURN(PCRE_ERROR_INTERNAL);
6155          }
6156
6157        /* eptr is now past the end of the maximum run. If possessive, we are
6158        done (no backing up). Otherwise, match at this position; anything other
6159        than no match is immediately returned. For nomatch, back up one
6160        character (byte), unless we are matching \R and the last thing matched
6161        was \r\n, in which case, back up two bytes. */
6162
6163        if (possessive) continue;
6164        while (eptr >= pp)
6165          {
6166          RMATCH(eptr, ecode, offset_top, md, eptrb, RM47);
6167          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
6168          eptr--;
6169          if (ctype == OP_ANYNL && eptr > pp  && *eptr == '\n' &&
6170              eptr[-1] == '\r') eptr--;
6171          }
6172        }
6173
6174      /* Get here if we can't make it match with any permitted repetitions */
6175
6176      RRETURN(MATCH_NOMATCH);
6177      }
6178    /* Control never gets here */
6179
6180    /* There's been some horrible disaster. Arrival here can only mean there is
6181    something seriously wrong in the code above or the OP_xxx definitions. */
6182
6183    default:
6184    DPRINTF(("Unknown opcode %d\n", *ecode));
6185    RRETURN(PCRE_ERROR_UNKNOWN_OPCODE);
6186    }
6187
6188  /* Do not stick any code in here without much thought; it is assumed
6189  that "continue" in the code above comes out to here to repeat the main
6190  loop. */
6191
6192  }             /* End of main loop */
6193/* Control never reaches here */
6194
6195
6196/* When compiling to use the heap rather than the stack for recursive calls to
6197match(), the RRETURN() macro jumps here. The number that is saved in
6198frame->Xwhere indicates which label we actually want to return to. */
6199
6200#ifdef NO_RECURSE
6201#define LBL(val) case val: goto L_RM##val;
6202HEAP_RETURN:
6203switch (frame->Xwhere)
6204  {
6205  LBL( 1) LBL( 2) LBL( 3) LBL( 4) LBL( 5) LBL( 6) LBL( 7) LBL( 8)
6206  LBL( 9) LBL(10) LBL(11) LBL(12) LBL(13) LBL(14) LBL(15) LBL(17)
6207  LBL(19) LBL(24) LBL(25) LBL(26) LBL(27) LBL(29) LBL(31) LBL(33)
6208  LBL(35) LBL(43) LBL(47) LBL(48) LBL(49) LBL(50) LBL(51) LBL(52)
6209  LBL(53) LBL(54) LBL(55) LBL(56) LBL(57) LBL(58) LBL(63) LBL(64)
6210  LBL(65) LBL(66)
6211#if defined SUPPORT_UTF || !defined COMPILE_PCRE8
6212  LBL(21)
6213#endif
6214#ifdef SUPPORT_UTF
6215  LBL(16) LBL(18) LBL(20)
6216  LBL(22) LBL(23) LBL(28) LBL(30)
6217  LBL(32) LBL(34) LBL(42) LBL(46)
6218#ifdef SUPPORT_UCP
6219  LBL(36) LBL(37) LBL(38) LBL(39) LBL(40) LBL(41) LBL(44) LBL(45)
6220  LBL(59) LBL(60) LBL(61) LBL(62)
6221#endif  /* SUPPORT_UCP */
6222#endif  /* SUPPORT_UTF */
6223  default:
6224  DPRINTF(("jump error in pcre match: label %d non-existent\n", frame->Xwhere));
6225
6226printf("+++jump error in pcre match: label %d non-existent\n", frame->Xwhere);
6227
6228  return PCRE_ERROR_INTERNAL;
6229  }
6230#undef LBL
6231#endif  /* NO_RECURSE */
6232}
6233
6234
6235/***************************************************************************
6236****************************************************************************
6237                   RECURSION IN THE match() FUNCTION
6238
6239Undefine all the macros that were defined above to handle this. */
6240
6241#ifdef NO_RECURSE
6242#undef eptr
6243#undef ecode
6244#undef mstart
6245#undef offset_top
6246#undef eptrb
6247#undef flags
6248
6249#undef callpat
6250#undef charptr
6251#undef data
6252#undef next
6253#undef pp
6254#undef prev
6255#undef saved_eptr
6256
6257#undef new_recursive
6258
6259#undef cur_is_word
6260#undef condition
6261#undef prev_is_word
6262
6263#undef ctype
6264#undef length
6265#undef max
6266#undef min
6267#undef number
6268#undef offset
6269#undef op
6270#undef save_capture_last
6271#undef save_offset1
6272#undef save_offset2
6273#undef save_offset3
6274#undef stacksave
6275
6276#undef newptrb
6277
6278#endif
6279
6280/* These two are defined as macros in both cases */
6281
6282#undef fc
6283#undef fi
6284
6285/***************************************************************************
6286***************************************************************************/
6287
6288
6289#ifdef NO_RECURSE
6290/*************************************************
6291*          Release allocated heap frames         *
6292*************************************************/
6293
6294/* This function releases all the allocated frames. The base frame is on the
6295machine stack, and so must not be freed.
6296
6297Argument: the address of the base frame
6298Returns:  nothing
6299*/
6300
6301static void
6302release_match_heapframes (heapframe *frame_base)
6303{
6304heapframe *nextframe = frame_base->Xnextframe;
6305while (nextframe != NULL)
6306  {
6307  heapframe *oldframe = nextframe;
6308  nextframe = nextframe->Xnextframe;
6309  (PUBL(stack_free))(oldframe);
6310  }
6311}
6312#endif
6313
6314
6315/*************************************************
6316*         Execute a Regular Expression           *
6317*************************************************/
6318
6319/* This function applies a compiled re to a subject string and picks out
6320portions of the string if it matches. Two elements in the vector are set for
6321each substring: the offsets to the start and end of the substring.
6322
6323Arguments:
6324  argument_re     points to the compiled expression
6325  extra_data      points to extra data or is NULL
6326  subject         points to the subject string
6327  length          length of subject string (may contain binary zeros)
6328  start_offset    where to start in the subject string
6329  options         option bits
6330  offsets         points to a vector of ints to be filled in with offsets
6331  offsetcount     the number of elements in the vector
6332
6333Returns:          > 0 => success; value is the number of elements filled in
6334                  = 0 => success, but offsets is not big enough
6335                   -1 => failed to match
6336                 < -1 => some kind of unexpected problem
6337*/
6338
6339#ifdef COMPILE_PCRE8
6340PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
6341pcre_exec(const pcre *argument_re, const pcre_extra *extra_data,
6342  PCRE_SPTR subject, int length, int start_offset, int options, int *offsets,
6343  int offsetcount)
6344#else
6345PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
6346pcre16_exec(const pcre16 *argument_re, const pcre16_extra *extra_data,
6347  PCRE_SPTR16 subject, int length, int start_offset, int options, int *offsets,
6348  int offsetcount)
6349#endif
6350{
6351int rc, ocount, arg_offset_max;
6352int newline;
6353BOOL using_temporary_offsets = FALSE;
6354BOOL anchored;
6355BOOL startline;
6356BOOL firstline;
6357BOOL utf;
6358BOOL has_first_char = FALSE;
6359BOOL has_req_char = FALSE;
6360pcre_uchar first_char = 0;
6361pcre_uchar first_char2 = 0;
6362pcre_uchar req_char = 0;
6363pcre_uchar req_char2 = 0;
6364match_data match_block;
6365match_data *md = &match_block;
6366const pcre_uint8 *tables;
6367const pcre_uint8 *start_bits = NULL;
6368PCRE_PUCHAR start_match = (PCRE_PUCHAR)subject + start_offset;
6369PCRE_PUCHAR end_subject;
6370PCRE_PUCHAR start_partial = NULL;
6371PCRE_PUCHAR req_char_ptr = start_match - 1;
6372
6373const pcre_study_data *study;
6374const REAL_PCRE *re = (const REAL_PCRE *)argument_re;
6375
6376#ifdef NO_RECURSE
6377heapframe frame_zero;
6378frame_zero.Xprevframe = NULL;            /* Marks the top level */
6379frame_zero.Xnextframe = NULL;            /* None are allocated yet */
6380md->match_frames_base = &frame_zero;
6381#endif
6382
6383/* Check for the special magic call that measures the size of the stack used
6384per recursive call of match(). Without the funny casting for sizeof, a Windows
6385compiler gave this error: "unary minus operator applied to unsigned type,
6386result still unsigned". Hopefully the cast fixes that. */
6387
6388if (re == NULL && extra_data == NULL && subject == NULL && length == -999 &&
6389    start_offset == -999)
6390#ifdef NO_RECURSE
6391  return -((int)sizeof(heapframe));
6392#else
6393  return match(NULL, NULL, NULL, 0, NULL, NULL, 0);
6394#endif
6395
6396/* Plausibility checks */
6397
6398if ((options & ~PUBLIC_EXEC_OPTIONS) != 0) return PCRE_ERROR_BADOPTION;
6399if (re == NULL || subject == NULL || (offsets == NULL && offsetcount > 0))
6400  return PCRE_ERROR_NULL;
6401if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;
6402if (start_offset < 0 || start_offset > length) return PCRE_ERROR_BADOFFSET;
6403
6404/* Check that the first field in the block is the magic number. If it is not,
6405return with PCRE_ERROR_BADMAGIC. However, if the magic number is equal to
6406REVERSED_MAGIC_NUMBER we return with PCRE_ERROR_BADENDIANNESS, which
6407means that the pattern is likely compiled with different endianness. */
6408
6409if (re->magic_number != MAGIC_NUMBER)
6410  return re->magic_number == REVERSED_MAGIC_NUMBER?
6411    PCRE_ERROR_BADENDIANNESS:PCRE_ERROR_BADMAGIC;
6412if ((re->flags & PCRE_MODE) == 0) return PCRE_ERROR_BADMODE;
6413
6414/* These two settings are used in the code for checking a UTF-8 string that
6415follows immediately afterwards. Other values in the md block are used only
6416during "normal" pcre_exec() processing, not when the JIT support is in use,
6417so they are set up later. */
6418
6419/* PCRE_UTF16 has the same value as PCRE_UTF8. */
6420utf = md->utf = (re->options & PCRE_UTF8) != 0;
6421md->partial = ((options & PCRE_PARTIAL_HARD) != 0)? 2 :
6422              ((options & PCRE_PARTIAL_SOFT) != 0)? 1 : 0;
6423
6424/* Check a UTF-8 string if required. Pass back the character offset and error
6425code for an invalid string if a results vector is available. */
6426
6427#ifdef SUPPORT_UTF
6428if (utf && (options & PCRE_NO_UTF8_CHECK) == 0)
6429  {
6430  int erroroffset;
6431  int errorcode = PRIV(valid_utf)((PCRE_PUCHAR)subject, length, &erroroffset);
6432  if (errorcode != 0)
6433    {
6434    if (offsetcount >= 2)
6435      {
6436      offsets[0] = erroroffset;
6437      offsets[1] = errorcode;
6438      }
6439#ifdef COMPILE_PCRE16
6440    return (errorcode <= PCRE_UTF16_ERR1 && md->partial > 1)?
6441      PCRE_ERROR_SHORTUTF16 : PCRE_ERROR_BADUTF16;
6442#else
6443    return (errorcode <= PCRE_UTF8_ERR5 && md->partial > 1)?
6444      PCRE_ERROR_SHORTUTF8 : PCRE_ERROR_BADUTF8;
6445#endif
6446    }
6447
6448  /* Check that a start_offset points to the start of a UTF character. */
6449  if (start_offset > 0 && start_offset < length &&
6450      NOT_FIRSTCHAR(((PCRE_PUCHAR)subject)[start_offset]))
6451    return PCRE_ERROR_BADUTF8_OFFSET;
6452  }
6453#endif
6454
6455/* If the pattern was successfully studied with JIT support, run the JIT
6456executable instead of the rest of this function. Most options must be set at
6457compile time for the JIT code to be usable. Fallback to the normal code path if
6458an unsupported flag is set. */
6459
6460#ifdef SUPPORT_JIT
6461if (extra_data != NULL
6462    && (extra_data->flags & (PCRE_EXTRA_EXECUTABLE_JIT |
6463                             PCRE_EXTRA_TABLES)) == PCRE_EXTRA_EXECUTABLE_JIT
6464    && extra_data->executable_jit != NULL
6465    && (options & ~(PCRE_NO_UTF8_CHECK | PCRE_NOTBOL | PCRE_NOTEOL |
6466                    PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART |
6467                    PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD)) == 0)
6468  {
6469  rc = PRIV(jit_exec)(re, extra_data, (const pcre_uchar *)subject, length,
6470       start_offset, options, offsets, offsetcount);
6471
6472  /* PCRE_ERROR_NULL means that the selected normal or partial matching
6473  mode is not compiled. In this case we simply fallback to interpreter. */
6474
6475  if (rc != PCRE_ERROR_NULL) return rc;
6476  }
6477#endif
6478
6479/* Carry on with non-JIT matching. This information is for finding all the
6480numbers associated with a given name, for condition testing. */
6481
6482md->name_table = (pcre_uchar *)re + re->name_table_offset;
6483md->name_count = re->name_count;
6484md->name_entry_size = re->name_entry_size;
6485
6486/* Fish out the optional data from the extra_data structure, first setting
6487the default values. */
6488
6489study = NULL;
6490md->match_limit = MATCH_LIMIT;
6491md->match_limit_recursion = MATCH_LIMIT_RECURSION;
6492md->callout_data = NULL;
6493
6494/* The table pointer is always in native byte order. */
6495
6496tables = re->tables;
6497
6498if (extra_data != NULL)
6499  {
6500  register unsigned int flags = extra_data->flags;
6501  if ((flags & PCRE_EXTRA_STUDY_DATA) != 0)
6502    study = (const pcre_study_data *)extra_data->study_data;
6503  if ((flags & PCRE_EXTRA_MATCH_LIMIT) != 0)
6504    md->match_limit = extra_data->match_limit;
6505  if ((flags & PCRE_EXTRA_MATCH_LIMIT_RECURSION) != 0)
6506    md->match_limit_recursion = extra_data->match_limit_recursion;
6507  if ((flags & PCRE_EXTRA_CALLOUT_DATA) != 0)
6508    md->callout_data = extra_data->callout_data;
6509  if ((flags & PCRE_EXTRA_TABLES) != 0) tables = extra_data->tables;
6510  }
6511
6512/* If the exec call supplied NULL for tables, use the inbuilt ones. This
6513is a feature that makes it possible to save compiled regex and re-use them
6514in other programs later. */
6515
6516if (tables == NULL) tables = PRIV(default_tables);
6517
6518/* Set up other data */
6519
6520anchored = ((re->options | options) & PCRE_ANCHORED) != 0;
6521startline = (re->flags & PCRE_STARTLINE) != 0;
6522firstline = (re->options & PCRE_FIRSTLINE) != 0;
6523
6524/* The code starts after the real_pcre block and the capture name table. */
6525
6526md->start_code = (const pcre_uchar *)re + re->name_table_offset +
6527  re->name_count * re->name_entry_size;
6528
6529md->start_subject = (PCRE_PUCHAR)subject;
6530md->start_offset = start_offset;
6531md->end_subject = md->start_subject + length;
6532end_subject = md->end_subject;
6533
6534md->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
6535md->use_ucp = (re->options & PCRE_UCP) != 0;
6536md->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0;
6537md->ignore_skip_arg = FALSE;
6538
6539/* Some options are unpacked into BOOL variables in the hope that testing
6540them will be faster than individual option bits. */
6541
6542md->notbol = (options & PCRE_NOTBOL) != 0;
6543md->noteol = (options & PCRE_NOTEOL) != 0;
6544md->notempty = (options & PCRE_NOTEMPTY) != 0;
6545md->notempty_atstart = (options & PCRE_NOTEMPTY_ATSTART) != 0;
6546
6547md->hitend = FALSE;
6548md->mark = md->nomatch_mark = NULL;     /* In case never set */
6549
6550md->recursive = NULL;                   /* No recursion at top level */
6551md->hasthen = (re->flags & PCRE_HASTHEN) != 0;
6552
6553md->lcc = tables + lcc_offset;
6554md->fcc = tables + fcc_offset;
6555md->ctypes = tables + ctypes_offset;
6556
6557/* Handle different \R options. */
6558
6559switch (options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE))
6560  {
6561  case 0:
6562  if ((re->options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) != 0)
6563    md->bsr_anycrlf = (re->options & PCRE_BSR_ANYCRLF) != 0;
6564  else
6565#ifdef BSR_ANYCRLF
6566  md->bsr_anycrlf = TRUE;
6567#else
6568  md->bsr_anycrlf = FALSE;
6569#endif
6570  break;
6571
6572  case PCRE_BSR_ANYCRLF:
6573  md->bsr_anycrlf = TRUE;
6574  break;
6575
6576  case PCRE_BSR_UNICODE:
6577  md->bsr_anycrlf = FALSE;
6578  break;
6579
6580  default: return PCRE_ERROR_BADNEWLINE;
6581  }
6582
6583/* Handle different types of newline. The three bits give eight cases. If
6584nothing is set at run time, whatever was used at compile time applies. */
6585
6586switch ((((options & PCRE_NEWLINE_BITS) == 0)? re->options :
6587        (pcre_uint32)options) & PCRE_NEWLINE_BITS)
6588  {
6589  case 0: newline = NEWLINE; break;   /* Compile-time default */
6590  case PCRE_NEWLINE_CR: newline = CHAR_CR; break;
6591  case PCRE_NEWLINE_LF: newline = CHAR_NL; break;
6592  case PCRE_NEWLINE_CR+
6593       PCRE_NEWLINE_LF: newline = (CHAR_CR << 8) | CHAR_NL; break;
6594  case PCRE_NEWLINE_ANY: newline = -1; break;
6595  case PCRE_NEWLINE_ANYCRLF: newline = -2; break;
6596  default: return PCRE_ERROR_BADNEWLINE;
6597  }
6598
6599if (newline == -2)
6600  {
6601  md->nltype = NLTYPE_ANYCRLF;
6602  }
6603else if (newline < 0)
6604  {
6605  md->nltype = NLTYPE_ANY;
6606  }
6607else
6608  {
6609  md->nltype = NLTYPE_FIXED;
6610  if (newline > 255)
6611    {
6612    md->nllen = 2;
6613    md->nl[0] = (newline >> 8) & 255;
6614    md->nl[1] = newline & 255;
6615    }
6616  else
6617    {
6618    md->nllen = 1;
6619    md->nl[0] = newline;
6620    }
6621  }
6622
6623/* Partial matching was originally supported only for a restricted set of
6624regexes; from release 8.00 there are no restrictions, but the bits are still
6625defined (though never set). So there's no harm in leaving this code. */
6626
6627if (md->partial && (re->flags & PCRE_NOPARTIAL) != 0)
6628  return PCRE_ERROR_BADPARTIAL;
6629
6630/* If the expression has got more back references than the offsets supplied can
6631hold, we get a temporary chunk of working store to use during the matching.
6632Otherwise, we can use the vector supplied, rounding down its size to a multiple
6633of 3. */
6634
6635ocount = offsetcount - (offsetcount % 3);
6636arg_offset_max = (2*ocount)/3;
6637
6638if (re->top_backref > 0 && re->top_backref >= ocount/3)
6639  {
6640  ocount = re->top_backref * 3 + 3;
6641  md->offset_vector = (int *)(PUBL(malloc))(ocount * sizeof(int));
6642  if (md->offset_vector == NULL) return PCRE_ERROR_NOMEMORY;
6643  using_temporary_offsets = TRUE;
6644  DPRINTF(("Got memory to hold back references\n"));
6645  }
6646else md->offset_vector = offsets;
6647
6648md->offset_end = ocount;
6649md->offset_max = (2*ocount)/3;
6650md->offset_overflow = FALSE;
6651md->capture_last = -1;
6652
6653/* Reset the working variable associated with each extraction. These should
6654never be used unless previously set, but they get saved and restored, and so we
6655initialize them to avoid reading uninitialized locations. Also, unset the
6656offsets for the matched string. This is really just for tidiness with callouts,
6657in case they inspect these fields. */
6658
6659if (md->offset_vector != NULL)
6660  {
6661  register int *iptr = md->offset_vector + ocount;
6662  register int *iend = iptr - re->top_bracket;
6663  if (iend < md->offset_vector + 2) iend = md->offset_vector + 2;
6664  while (--iptr >= iend) *iptr = -1;
6665  md->offset_vector[0] = md->offset_vector[1] = -1;
6666  }
6667
6668/* Set up the first character to match, if available. The first_char value is
6669never set for an anchored regular expression, but the anchoring may be forced
6670at run time, so we have to test for anchoring. The first char may be unset for
6671an unanchored pattern, of course. If there's no first char and the pattern was
6672studied, there may be a bitmap of possible first characters. */
6673
6674if (!anchored)
6675  {
6676  if ((re->flags & PCRE_FIRSTSET) != 0)
6677    {
6678    has_first_char = TRUE;
6679    first_char = first_char2 = (pcre_uchar)(re->first_char);
6680    if ((re->flags & PCRE_FCH_CASELESS) != 0)
6681      {
6682      first_char2 = TABLE_GET(first_char, md->fcc, first_char);
6683#if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
6684      if (utf && first_char > 127)
6685        first_char2 = UCD_OTHERCASE(first_char);
6686#endif
6687      }
6688    }
6689  else
6690    if (!startline && study != NULL &&
6691      (study->flags & PCRE_STUDY_MAPPED) != 0)
6692        start_bits = study->start_bits;
6693  }
6694
6695/* For anchored or unanchored matches, there may be a "last known required
6696character" set. */
6697
6698if ((re->flags & PCRE_REQCHSET) != 0)
6699  {
6700  has_req_char = TRUE;
6701  req_char = req_char2 = (pcre_uchar)(re->req_char);
6702  if ((re->flags & PCRE_RCH_CASELESS) != 0)
6703    {
6704    req_char2 = TABLE_GET(req_char, md->fcc, req_char);
6705#if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
6706    if (utf && req_char > 127)
6707      req_char2 = UCD_OTHERCASE(req_char);
6708#endif
6709    }
6710  }
6711
6712
6713/* ==========================================================================*/
6714
6715/* Loop for handling unanchored repeated matching attempts; for anchored regexs
6716the loop runs just once. */
6717
6718for(;;)
6719  {
6720  PCRE_PUCHAR save_end_subject = end_subject;
6721  PCRE_PUCHAR new_start_match;
6722
6723  /* If firstline is TRUE, the start of the match is constrained to the first
6724  line of a multiline string. That is, the match must be before or at the first
6725  newline. Implement this by temporarily adjusting end_subject so that we stop
6726  scanning at a newline. If the match fails at the newline, later code breaks
6727  this loop. */
6728
6729  if (firstline)
6730    {
6731    PCRE_PUCHAR t = start_match;
6732#ifdef SUPPORT_UTF
6733    if (utf)
6734      {
6735      while (t < md->end_subject && !IS_NEWLINE(t))
6736        {
6737        t++;
6738        ACROSSCHAR(t < end_subject, *t, t++);
6739        }
6740      }
6741    else
6742#endif
6743    while (t < md->end_subject && !IS_NEWLINE(t)) t++;
6744    end_subject = t;
6745    }
6746
6747  /* There are some optimizations that avoid running the match if a known
6748  starting point is not found, or if a known later character is not present.
6749  However, there is an option that disables these, for testing and for ensuring
6750  that all callouts do actually occur. The option can be set in the regex by
6751  (*NO_START_OPT) or passed in match-time options. */
6752
6753  if (((options | re->options) & PCRE_NO_START_OPTIMIZE) == 0)
6754    {
6755    /* Advance to a unique first char if there is one. */
6756
6757    if (has_first_char)
6758      {
6759      if (first_char != first_char2)
6760        while (start_match < end_subject &&
6761            *start_match != first_char && *start_match != first_char2)
6762          start_match++;
6763      else
6764        while (start_match < end_subject && *start_match != first_char)
6765          start_match++;
6766      }
6767
6768    /* Or to just after a linebreak for a multiline match */
6769
6770    else if (startline)
6771      {
6772      if (start_match > md->start_subject + start_offset)
6773        {
6774#ifdef SUPPORT_UTF
6775        if (utf)
6776          {
6777          while (start_match < end_subject && !WAS_NEWLINE(start_match))
6778            {
6779            start_match++;
6780            ACROSSCHAR(start_match < end_subject, *start_match,
6781              start_match++);
6782            }
6783          }
6784        else
6785#endif
6786        while (start_match < end_subject && !WAS_NEWLINE(start_match))
6787          start_match++;
6788
6789        /* If we have just passed a CR and the newline option is ANY or ANYCRLF,
6790        and we are now at a LF, advance the match position by one more character.
6791        */
6792
6793        if (start_match[-1] == CHAR_CR &&
6794             (md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) &&
6795             start_match < end_subject &&
6796             *start_match == CHAR_NL)
6797          start_match++;
6798        }
6799      }
6800
6801    /* Or to a non-unique first byte after study */
6802
6803    else if (start_bits != NULL)
6804      {
6805      while (start_match < end_subject)
6806        {
6807        register unsigned int c = *start_match;
6808#ifndef COMPILE_PCRE8
6809        if (c > 255) c = 255;
6810#endif
6811        if ((start_bits[c/8] & (1 << (c&7))) == 0)
6812          {
6813          start_match++;
6814#if defined SUPPORT_UTF && defined COMPILE_PCRE8
6815          /* In non 8-bit mode, the iteration will stop for
6816          characters > 255 at the beginning or not stop at all. */
6817          if (utf)
6818            ACROSSCHAR(start_match < end_subject, *start_match,
6819              start_match++);
6820#endif
6821          }
6822        else break;
6823        }
6824      }
6825    }   /* Starting optimizations */
6826
6827  /* Restore fudged end_subject */
6828
6829  end_subject = save_end_subject;
6830
6831  /* The following two optimizations are disabled for partial matching or if
6832  disabling is explicitly requested. */
6833
6834  if (((options | re->options) & PCRE_NO_START_OPTIMIZE) == 0 && !md->partial)
6835    {
6836    /* If the pattern was studied, a minimum subject length may be set. This is
6837    a lower bound; no actual string of that length may actually match the
6838    pattern. Although the value is, strictly, in characters, we treat it as
6839    bytes to avoid spending too much time in this optimization. */
6840
6841    if (study != NULL && (study->flags & PCRE_STUDY_MINLEN) != 0 &&
6842        (pcre_uint32)(end_subject - start_match) < study->minlength)
6843      {
6844      rc = MATCH_NOMATCH;
6845      break;
6846      }
6847
6848    /* If req_char is set, we know that that character must appear in the
6849    subject for the match to succeed. If the first character is set, req_char
6850    must be later in the subject; otherwise the test starts at the match point.
6851    This optimization can save a huge amount of backtracking in patterns with
6852    nested unlimited repeats that aren't going to match. Writing separate code
6853    for cased/caseless versions makes it go faster, as does using an
6854    autoincrement and backing off on a match.
6855
6856    HOWEVER: when the subject string is very, very long, searching to its end
6857    can take a long time, and give bad performance on quite ordinary patterns.
6858    This showed up when somebody was matching something like /^\d+C/ on a
6859    32-megabyte string... so we don't do this when the string is sufficiently
6860    long. */
6861
6862    if (has_req_char && end_subject - start_match < REQ_BYTE_MAX)
6863      {
6864      register PCRE_PUCHAR p = start_match + (has_first_char? 1:0);
6865
6866      /* We don't need to repeat the search if we haven't yet reached the
6867      place we found it at last time. */
6868
6869      if (p > req_char_ptr)
6870        {
6871        if (req_char != req_char2)
6872          {
6873          while (p < end_subject)
6874            {
6875            register int pp = *p++;
6876            if (pp == req_char || pp == req_char2) { p--; break; }
6877            }
6878          }
6879        else
6880          {
6881          while (p < end_subject)
6882            {
6883            if (*p++ == req_char) { p--; break; }
6884            }
6885          }
6886
6887        /* If we can't find the required character, break the matching loop,
6888        forcing a match failure. */
6889
6890        if (p >= end_subject)
6891          {
6892          rc = MATCH_NOMATCH;
6893          break;
6894          }
6895
6896        /* If we have found the required character, save the point where we
6897        found it, so that we don't search again next time round the loop if
6898        the start hasn't passed this character yet. */
6899
6900        req_char_ptr = p;
6901        }
6902      }
6903    }
6904
6905#ifdef PCRE_DEBUG  /* Sigh. Some compilers never learn. */
6906  printf(">>>> Match against: ");
6907  pchars(start_match, end_subject - start_match, TRUE, md);
6908  printf("\n");
6909#endif
6910
6911  /* OK, we can now run the match. If "hitend" is set afterwards, remember the
6912  first starting point for which a partial match was found. */
6913
6914  md->start_match_ptr = start_match;
6915  md->start_used_ptr = start_match;
6916  md->match_call_count = 0;
6917  md->match_function_type = 0;
6918  md->end_offset_top = 0;
6919  rc = match(start_match, md->start_code, start_match, 2, md, NULL, 0);
6920  if (md->hitend && start_partial == NULL) start_partial = md->start_used_ptr;
6921
6922  switch(rc)
6923    {
6924    /* If MATCH_SKIP_ARG reaches this level it means that a MARK that matched
6925    the SKIP's arg was not found. In this circumstance, Perl ignores the SKIP
6926    entirely. The only way we can do that is to re-do the match at the same
6927    point, with a flag to force SKIP with an argument to be ignored. Just
6928    treating this case as NOMATCH does not work because it does not check other
6929    alternatives in patterns such as A(*SKIP:A)B|AC when the subject is AC. */
6930
6931    case MATCH_SKIP_ARG:
6932    new_start_match = start_match;
6933    md->ignore_skip_arg = TRUE;
6934    break;
6935
6936    /* SKIP passes back the next starting point explicitly, but if it is the
6937    same as the match we have just done, treat it as NOMATCH. */
6938
6939    case MATCH_SKIP:
6940    if (md->start_match_ptr != start_match)
6941      {
6942      new_start_match = md->start_match_ptr;
6943      break;
6944      }
6945    /* Fall through */
6946
6947    /* NOMATCH and PRUNE advance by one character. THEN at this level acts
6948    exactly like PRUNE. Unset the ignore SKIP-with-argument flag. */
6949
6950    case MATCH_NOMATCH:
6951    case MATCH_PRUNE:
6952    case MATCH_THEN:
6953    md->ignore_skip_arg = FALSE;
6954    new_start_match = start_match + 1;
6955#ifdef SUPPORT_UTF
6956    if (utf)
6957      ACROSSCHAR(new_start_match < end_subject, *new_start_match,
6958        new_start_match++);
6959#endif
6960    break;
6961
6962    /* COMMIT disables the bumpalong, but otherwise behaves as NOMATCH. */
6963
6964    case MATCH_COMMIT:
6965    rc = MATCH_NOMATCH;
6966    goto ENDLOOP;
6967
6968    /* Any other return is either a match, or some kind of error. */
6969
6970    default:
6971    goto ENDLOOP;
6972    }
6973
6974  /* Control reaches here for the various types of "no match at this point"
6975  result. Reset the code to MATCH_NOMATCH for subsequent checking. */
6976
6977  rc = MATCH_NOMATCH;
6978
6979  /* If PCRE_FIRSTLINE is set, the match must happen before or at the first
6980  newline in the subject (though it may continue over the newline). Therefore,
6981  if we have just failed to match, starting at a newline, do not continue. */
6982
6983  if (firstline && IS_NEWLINE(start_match)) break;
6984
6985  /* Advance to new matching position */
6986
6987  start_match = new_start_match;
6988
6989  /* Break the loop if the pattern is anchored or if we have passed the end of
6990  the subject. */
6991
6992  if (anchored || start_match > end_subject) break;
6993
6994  /* If we have just passed a CR and we are now at a LF, and the pattern does
6995  not contain any explicit matches for \r or \n, and the newline option is CRLF
6996  or ANY or ANYCRLF, advance the match position by one more character. In
6997  normal matching start_match will aways be greater than the first position at
6998  this stage, but a failed *SKIP can cause a return at the same point, which is
6999  why the first test exists. */
7000
7001  if (start_match > (PCRE_PUCHAR)subject + start_offset &&
7002      start_match[-1] == CHAR_CR &&
7003      start_match < end_subject &&
7004      *start_match == CHAR_NL &&
7005      (re->flags & PCRE_HASCRORLF) == 0 &&
7006        (md->nltype == NLTYPE_ANY ||
7007         md->nltype == NLTYPE_ANYCRLF ||
7008         md->nllen == 2))
7009    start_match++;
7010
7011  md->mark = NULL;   /* Reset for start of next match attempt */
7012  }                  /* End of for(;;) "bumpalong" loop */
7013
7014/* ==========================================================================*/
7015
7016/* We reach here when rc is not MATCH_NOMATCH, or if one of the stopping
7017conditions is true:
7018
7019(1) The pattern is anchored or the match was failed by (*COMMIT);
7020
7021(2) We are past the end of the subject;
7022
7023(3) PCRE_FIRSTLINE is set and we have failed to match at a newline, because
7024    this option requests that a match occur at or before the first newline in
7025    the subject.
7026
7027When we have a match and the offset vector is big enough to deal with any
7028backreferences, captured substring offsets will already be set up. In the case
7029where we had to get some local store to hold offsets for backreference
7030processing, copy those that we can. In this case there need not be overflow if
7031certain parts of the pattern were not used, even though there are more
7032capturing parentheses than vector slots. */
7033
7034ENDLOOP:
7035
7036if (rc == MATCH_MATCH || rc == MATCH_ACCEPT)
7037  {
7038  if (using_temporary_offsets)
7039    {
7040    if (arg_offset_max >= 4)
7041      {
7042      memcpy(offsets + 2, md->offset_vector + 2,
7043        (arg_offset_max - 2) * sizeof(int));
7044      DPRINTF(("Copied offsets from temporary memory\n"));
7045      }
7046    if (md->end_offset_top > arg_offset_max) md->offset_overflow = TRUE;
7047    DPRINTF(("Freeing temporary memory\n"));
7048    (PUBL(free))(md->offset_vector);
7049    }
7050
7051  /* Set the return code to the number of captured strings, or 0 if there were
7052  too many to fit into the vector. */
7053
7054  rc = (md->offset_overflow && md->end_offset_top >= arg_offset_max)?
7055    0 : md->end_offset_top/2;
7056
7057  /* If there is space in the offset vector, set any unused pairs at the end of
7058  the pattern to -1 for backwards compatibility. It is documented that this
7059  happens. In earlier versions, the whole set of potential capturing offsets
7060  was set to -1 each time round the loop, but this is handled differently now.
7061  "Gaps" are set to -1 dynamically instead (this fixes a bug). Thus, it is only
7062  those at the end that need unsetting here. We can't just unset them all at
7063  the start of the whole thing because they may get set in one branch that is
7064  not the final matching branch. */
7065
7066  if (md->end_offset_top/2 <= re->top_bracket && offsets != NULL)
7067    {
7068    register int *iptr, *iend;
7069    int resetcount = 2 + re->top_bracket * 2;
7070    if (resetcount > offsetcount) resetcount = offsetcount;
7071    iptr = offsets + md->end_offset_top;
7072    iend = offsets + resetcount;
7073    while (iptr < iend) *iptr++ = -1;
7074    }
7075
7076  /* If there is space, set up the whole thing as substring 0. The value of
7077  md->start_match_ptr might be modified if \K was encountered on the success
7078  matching path. */
7079
7080  if (offsetcount < 2) rc = 0; else
7081    {
7082    offsets[0] = (int)(md->start_match_ptr - md->start_subject);
7083    offsets[1] = (int)(md->end_match_ptr - md->start_subject);
7084    }
7085
7086  /* Return MARK data if requested */
7087
7088  if (extra_data != NULL && (extra_data->flags & PCRE_EXTRA_MARK) != 0)
7089    *(extra_data->mark) = (pcre_uchar *)md->mark;
7090  DPRINTF((">>>> returning %d\n", rc));
7091#ifdef NO_RECURSE
7092  release_match_heapframes(&frame_zero);
7093#endif
7094  return rc;
7095  }
7096
7097/* Control gets here if there has been an error, or if the overall match
7098attempt has failed at all permitted starting positions. */
7099
7100if (using_temporary_offsets)
7101  {
7102  DPRINTF(("Freeing temporary memory\n"));
7103  (PUBL(free))(md->offset_vector);
7104  }
7105
7106/* For anything other than nomatch or partial match, just return the code. */
7107
7108if (rc != MATCH_NOMATCH && rc != PCRE_ERROR_PARTIAL)
7109  {
7110  DPRINTF((">>>> error: returning %d\n", rc));
7111#ifdef NO_RECURSE
7112  release_match_heapframes(&frame_zero);
7113#endif
7114  return rc;
7115  }
7116
7117/* Handle partial matches - disable any mark data */
7118
7119if (start_partial != NULL)
7120  {
7121  DPRINTF((">>>> returning PCRE_ERROR_PARTIAL\n"));
7122  md->mark = NULL;
7123  if (offsetcount > 1)
7124    {
7125    offsets[0] = (int)(start_partial - (PCRE_PUCHAR)subject);
7126    offsets[1] = (int)(end_subject - (PCRE_PUCHAR)subject);
7127    }
7128  rc = PCRE_ERROR_PARTIAL;
7129  }
7130
7131/* This is the classic nomatch case */
7132
7133else
7134  {
7135  DPRINTF((">>>> returning PCRE_ERROR_NOMATCH\n"));
7136  rc = PCRE_ERROR_NOMATCH;
7137  }
7138
7139/* Return the MARK data if it has been requested. */
7140
7141if (extra_data != NULL && (extra_data->flags & PCRE_EXTRA_MARK) != 0)
7142  *(extra_data->mark) = (pcre_uchar *)md->nomatch_mark;
7143#ifdef NO_RECURSE
7144  release_match_heapframes(&frame_zero);
7145#endif
7146return rc;
7147}
7148
7149/* End of pcre_exec.c */
7150