1/*************************************************
2*      Perl-Compatible Regular Expressions       *
3*************************************************/
4
5/* PCRE is a library of functions to support regular expressions whose syntax
6and semantics are as close as possible to those of the Perl 5 language.
7
8                       Written by Philip Hazel
9           Copyright (c) 1997-2010 University of Cambridge
10
11-----------------------------------------------------------------------------
12Redistribution and use in source and binary forms, with or without
13modification, are permitted provided that the following conditions are met:
14
15    * Redistributions of source code must retain the above copyright notice,
16      this list of conditions and the following disclaimer.
17
18    * Redistributions in binary form must reproduce the above copyright
19      notice, this list of conditions and the following disclaimer in the
20      documentation and/or other materials provided with the distribution.
21
22    * Neither the name of the University of Cambridge nor the names of its
23      contributors may be used to endorse or promote products derived from
24      this software without specific prior written permission.
25
26THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36POSSIBILITY OF SUCH DAMAGE.
37-----------------------------------------------------------------------------
38*/
39
40
41/* This module contains pcre_exec(), the externally visible function that does
42pattern matching using an NFA algorithm, trying to mimic Perl as closely as
43possible. There are also some static supporting functions. */
44
45#ifdef HAVE_CONFIG_H
46#include "config.h"
47#endif
48
49#define NLBLOCK md             /* Block containing newline information */
50#define PSSTART start_subject  /* Field containing processed string start */
51#define PSEND   end_subject    /* Field containing processed string end */
52
53#include "pcre_internal.h"
54
55/* Undefine some potentially clashing cpp symbols */
56
57#undef min
58#undef max
59
60/* Flag bits for the match() function */
61
62#define match_condassert     0x01  /* Called to check a condition assertion */
63#define match_cbegroup       0x02  /* Could-be-empty unlimited repeat group */
64
65/* Non-error returns from the match() function. Error returns are externally
66defined PCRE_ERROR_xxx codes, which are all negative. */
67
68#define MATCH_MATCH        1
69#define MATCH_NOMATCH      0
70
71/* Special internal returns from the match() function. Make them sufficiently
72negative to avoid the external error codes. */
73
74#define MATCH_COMMIT       (-999)
75#define MATCH_PRUNE        (-998)
76#define MATCH_SKIP         (-997)
77#define MATCH_THEN         (-996)
78
79/* Maximum number of ints of offset to save on the stack for recursive calls.
80If the offset vector is bigger, malloc is used. This should be a multiple of 3,
81because the offset vector is always a multiple of 3 long. */
82
83#define REC_STACK_SAVE_MAX 30
84
85/* Min and max values for the common repeats; for the maxima, 0 => infinity */
86
87static const char rep_min[] = { 0, 0, 1, 1, 0, 0 };
88static const char rep_max[] = { 0, 0, 0, 0, 1, 1 };
89
90
91
92#ifdef PCRE_DEBUG
93/*************************************************
94*        Debugging function to print chars       *
95*************************************************/
96
97/* Print a sequence of chars in printable format, stopping at the end of the
98subject if the requested.
99
100Arguments:
101  p           points to characters
102  length      number to print
103  is_subject  TRUE if printing from within md->start_subject
104  md          pointer to matching data block, if is_subject is TRUE
105
106Returns:     nothing
107*/
108
109static void
110pchars(const uschar *p, int length, BOOL is_subject, match_data *md)
111{
112unsigned int c;
113if (is_subject && length > md->end_subject - p) length = md->end_subject - p;
114while (length-- > 0)
115  if (isprint(c = *(p++))) printf("%c", c); else printf("\\x%02x", c);
116}
117#endif
118
119
120
121/*************************************************
122*          Match a back-reference                *
123*************************************************/
124
125/* If a back reference hasn't been set, the length that is passed is greater
126than the number of characters left in the string, so the match fails.
127
128Arguments:
129  offset      index into the offset vector
130  eptr        points into the subject
131  length      length to be matched
132  md          points to match data block
133  ims         the ims flags
134
135Returns:      TRUE if matched
136*/
137
138static BOOL
139match_ref(int offset, register USPTR eptr, int length, match_data *md,
140  unsigned long int ims)
141{
142USPTR p = md->start_subject + md->offset_vector[offset];
143
144#ifdef PCRE_DEBUG
145if (eptr >= md->end_subject)
146  printf("matching subject <null>");
147else
148  {
149  printf("matching subject ");
150  pchars(eptr, length, TRUE, md);
151  }
152printf(" against backref ");
153pchars(p, length, FALSE, md);
154printf("\n");
155#endif
156
157/* Always fail if not enough characters left */
158
159if (length > md->end_subject - eptr) return FALSE;
160
161/* Separate the caseless case for speed. In UTF-8 mode we can only do this
162properly if Unicode properties are supported. Otherwise, we can check only
163ASCII characters. */
164
165if ((ims & PCRE_CASELESS) != 0)
166  {
167#ifdef SUPPORT_UTF8
168#ifdef SUPPORT_UCP
169  if (md->utf8)
170    {
171    USPTR endptr = eptr + length;
172    while (eptr < endptr)
173      {
174      int c, d;
175      GETCHARINC(c, eptr);
176      GETCHARINC(d, p);
177      if (c != d && c != UCD_OTHERCASE(d)) return FALSE;
178      }
179    }
180  else
181#endif
182#endif
183
184  /* The same code works when not in UTF-8 mode and in UTF-8 mode when there
185  is no UCP support. */
186
187  while (length-- > 0)
188    { if (md->lcc[*p++] != md->lcc[*eptr++]) return FALSE; }
189  }
190
191/* In the caseful case, we can just compare the bytes, whether or not we
192are in UTF-8 mode. */
193
194else
195  { while (length-- > 0) if (*p++ != *eptr++) return FALSE; }
196
197return TRUE;
198}
199
200
201
202/***************************************************************************
203****************************************************************************
204                   RECURSION IN THE match() FUNCTION
205
206The match() function is highly recursive, though not every recursive call
207increases the recursive depth. Nevertheless, some regular expressions can cause
208it to recurse to a great depth. I was writing for Unix, so I just let it call
209itself recursively. This uses the stack for saving everything that has to be
210saved for a recursive call. On Unix, the stack can be large, and this works
211fine.
212
213It turns out that on some non-Unix-like systems there are problems with
214programs that use a lot of stack. (This despite the fact that every last chip
215has oodles of memory these days, and techniques for extending the stack have
216been known for decades.) So....
217
218There is a fudge, triggered by defining NO_RECURSE, which avoids recursive
219calls by keeping local variables that need to be preserved in blocks of memory
220obtained from malloc() instead instead of on the stack. Macros are used to
221achieve this so that the actual code doesn't look very different to what it
222always used to.
223
224The original heap-recursive code used longjmp(). However, it seems that this
225can be very slow on some operating systems. Following a suggestion from Stan
226Switzer, the use of longjmp() has been abolished, at the cost of having to
227provide a unique number for each call to RMATCH. There is no way of generating
228a sequence of numbers at compile time in C. I have given them names, to make
229them stand out more clearly.
230
231Crude tests on x86 Linux show a small speedup of around 5-8%. However, on
232FreeBSD, avoiding longjmp() more than halves the time taken to run the standard
233tests. Furthermore, not using longjmp() means that local dynamic variables
234don't have indeterminate values; this has meant that the frame size can be
235reduced because the result can be "passed back" by straight setting of the
236variable instead of being passed in the frame.
237****************************************************************************
238***************************************************************************/
239
240/* Numbers for RMATCH calls. When this list is changed, the code at HEAP_RETURN
241below must be updated in sync.  */
242
243enum { RM1=1, RM2,  RM3,  RM4,  RM5,  RM6,  RM7,  RM8,  RM9,  RM10,
244       RM11,  RM12, RM13, RM14, RM15, RM16, RM17, RM18, RM19, RM20,
245       RM21,  RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30,
246       RM31,  RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,
247       RM41,  RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,
248       RM51,  RM52, RM53, RM54 };
249
250/* These versions of the macros use the stack, as normal. There are debugging
251versions and production versions. Note that the "rw" argument of RMATCH isn't
252actually used in this definition. */
253
254#ifndef NO_RECURSE
255#define REGISTER register
256
257#ifdef PCRE_DEBUG
258#define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
259  { \
260  printf("match() called in line %d\n", __LINE__); \
261  rrc = match(ra,rb,mstart,markptr,rc,rd,re,rf,rg,rdepth+1); \
262  printf("to line %d\n", __LINE__); \
263  }
264#define RRETURN(ra) \
265  { \
266  printf("match() returned %d from line %d ", ra, __LINE__); \
267  return ra; \
268  }
269#else
270#define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
271  rrc = match(ra,rb,mstart,markptr,rc,rd,re,rf,rg,rdepth+1)
272#define RRETURN(ra) return ra
273#endif
274
275#else
276
277
278/* These versions of the macros manage a private stack on the heap. Note that
279the "rd" argument of RMATCH isn't actually used in this definition. It's the md
280argument of match(), which never changes. */
281
282#define REGISTER
283
284#define RMATCH(ra,rb,rc,rd,re,rf,rg,rw)\
285  {\
286  heapframe *newframe = (pcre_stack_malloc)(sizeof(heapframe));\
287  frame->Xwhere = rw; \
288  newframe->Xeptr = ra;\
289  newframe->Xecode = rb;\
290  newframe->Xmstart = mstart;\
291  newframe->Xmarkptr = markptr;\
292  newframe->Xoffset_top = rc;\
293  newframe->Xims = re;\
294  newframe->Xeptrb = rf;\
295  newframe->Xflags = rg;\
296  newframe->Xrdepth = frame->Xrdepth + 1;\
297  newframe->Xprevframe = frame;\
298  frame = newframe;\
299  DPRINTF(("restarting from line %d\n", __LINE__));\
300  goto HEAP_RECURSE;\
301  L_##rw:\
302  DPRINTF(("jumped back to line %d\n", __LINE__));\
303  }
304
305#define RRETURN(ra)\
306  {\
307  heapframe *newframe = frame;\
308  frame = newframe->Xprevframe;\
309  (pcre_stack_free)(newframe);\
310  if (frame != NULL)\
311    {\
312    rrc = ra;\
313    goto HEAP_RETURN;\
314    }\
315  return ra;\
316  }
317
318
319/* Structure for remembering the local variables in a private frame */
320
321typedef struct heapframe {
322  struct heapframe *Xprevframe;
323
324  /* Function arguments that may change */
325
326  USPTR Xeptr;
327  const uschar *Xecode;
328  USPTR Xmstart;
329  USPTR Xmarkptr;
330  int Xoffset_top;
331  long int Xims;
332  eptrblock *Xeptrb;
333  int Xflags;
334  unsigned int Xrdepth;
335
336  /* Function local variables */
337
338  USPTR Xcallpat;
339#ifdef SUPPORT_UTF8
340  USPTR Xcharptr;
341#endif
342  USPTR Xdata;
343  USPTR Xnext;
344  USPTR Xpp;
345  USPTR Xprev;
346  USPTR Xsaved_eptr;
347
348  recursion_info Xnew_recursive;
349
350  BOOL Xcur_is_word;
351  BOOL Xcondition;
352  BOOL Xprev_is_word;
353
354  unsigned long int Xoriginal_ims;
355
356#ifdef SUPPORT_UCP
357  int Xprop_type;
358  int Xprop_value;
359  int Xprop_fail_result;
360  int Xprop_category;
361  int Xprop_chartype;
362  int Xprop_script;
363  int Xoclength;
364  uschar Xocchars[8];
365#endif
366
367  int Xcodelink;
368  int Xctype;
369  unsigned int Xfc;
370  int Xfi;
371  int Xlength;
372  int Xmax;
373  int Xmin;
374  int Xnumber;
375  int Xoffset;
376  int Xop;
377  int Xsave_capture_last;
378  int Xsave_offset1, Xsave_offset2, Xsave_offset3;
379  int Xstacksave[REC_STACK_SAVE_MAX];
380
381  eptrblock Xnewptrb;
382
383  /* Where to jump back to */
384
385  int Xwhere;
386
387} heapframe;
388
389#endif
390
391
392/***************************************************************************
393***************************************************************************/
394
395
396
397/*************************************************
398*         Match from current position            *
399*************************************************/
400
401/* This function is called recursively in many circumstances. Whenever it
402returns a negative (error) response, the outer incarnation must also return the
403same response. */
404
405/* These macros pack up tests that are used for partial matching, and which
406appears several times in the code. We set the "hit end" flag if the pointer is
407at the end of the subject and also past the start of the subject (i.e.
408something has been matched). For hard partial matching, we then return
409immediately. The second one is used when we already know we are past the end of
410the subject. */
411
412#define CHECK_PARTIAL()\
413  if (md->partial != 0 && eptr >= md->end_subject && eptr > mstart)\
414    {\
415    md->hitend = TRUE;\
416    if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);\
417    }
418
419#define SCHECK_PARTIAL()\
420  if (md->partial != 0 && eptr > mstart)\
421    {\
422    md->hitend = TRUE;\
423    if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);\
424    }
425
426
427/* Performance note: It might be tempting to extract commonly used fields from
428the md structure (e.g. utf8, end_subject) into individual variables to improve
429performance. Tests using gcc on a SPARC disproved this; in the first case, it
430made performance worse.
431
432Arguments:
433   eptr        pointer to current character in subject
434   ecode       pointer to current position in compiled code
435   mstart      pointer to the current match start position (can be modified
436                 by encountering \K)
437   markptr     pointer to the most recent MARK name, or NULL
438   offset_top  current top pointer
439   md          pointer to "static" info for the match
440   ims         current /i, /m, and /s options
441   eptrb       pointer to chain of blocks containing eptr at start of
442                 brackets - for testing for empty matches
443   flags       can contain
444                 match_condassert - this is an assertion condition
445                 match_cbegroup - this is the start of an unlimited repeat
446                   group that can match an empty string
447   rdepth      the recursion depth
448
449Returns:       MATCH_MATCH if matched            )  these values are >= 0
450               MATCH_NOMATCH if failed to match  )
451               a negative PCRE_ERROR_xxx value if aborted by an error condition
452                 (e.g. stopped by repeated call or recursion limit)
453*/
454
455static int
456match(REGISTER USPTR eptr, REGISTER const uschar *ecode, USPTR mstart, USPTR
457  markptr, int offset_top, match_data *md, unsigned long int ims,
458  eptrblock *eptrb, int flags, unsigned int rdepth)
459{
460/* These variables do not need to be preserved over recursion in this function,
461so they can be ordinary variables in all cases. Mark some of them with
462"register" because they are used a lot in loops. */
463
464register int  rrc;         /* Returns from recursive calls */
465register int  i;           /* Used for loops not involving calls to RMATCH() */
466register unsigned int c;   /* Character values not kept over RMATCH() calls */
467register BOOL utf8;        /* Local copy of UTF-8 flag for speed */
468
469BOOL minimize, possessive; /* Quantifier options */
470int condcode;
471
472/* When recursion is not being used, all "local" variables that have to be
473preserved over calls to RMATCH() are part of a "frame" which is obtained from
474heap storage. Set up the top-level frame here; others are obtained from the
475heap whenever RMATCH() does a "recursion". See the macro definitions above. */
476
477#ifdef NO_RECURSE
478heapframe *frame = (pcre_stack_malloc)(sizeof(heapframe));
479frame->Xprevframe = NULL;            /* Marks the top level */
480
481/* Copy in the original argument variables */
482
483frame->Xeptr = eptr;
484frame->Xecode = ecode;
485frame->Xmstart = mstart;
486frame->Xmarkptr = markptr;
487frame->Xoffset_top = offset_top;
488frame->Xims = ims;
489frame->Xeptrb = eptrb;
490frame->Xflags = flags;
491frame->Xrdepth = rdepth;
492
493/* This is where control jumps back to to effect "recursion" */
494
495HEAP_RECURSE:
496
497/* Macros make the argument variables come from the current frame */
498
499#define eptr               frame->Xeptr
500#define ecode              frame->Xecode
501#define mstart             frame->Xmstart
502#define markptr            frame->Xmarkptr
503#define offset_top         frame->Xoffset_top
504#define ims                frame->Xims
505#define eptrb              frame->Xeptrb
506#define flags              frame->Xflags
507#define rdepth             frame->Xrdepth
508
509/* Ditto for the local variables */
510
511#ifdef SUPPORT_UTF8
512#define charptr            frame->Xcharptr
513#endif
514#define callpat            frame->Xcallpat
515#define codelink           frame->Xcodelink
516#define data               frame->Xdata
517#define next               frame->Xnext
518#define pp                 frame->Xpp
519#define prev               frame->Xprev
520#define saved_eptr         frame->Xsaved_eptr
521
522#define new_recursive      frame->Xnew_recursive
523
524#define cur_is_word        frame->Xcur_is_word
525#define condition          frame->Xcondition
526#define prev_is_word       frame->Xprev_is_word
527
528#define original_ims       frame->Xoriginal_ims
529
530#ifdef SUPPORT_UCP
531#define prop_type          frame->Xprop_type
532#define prop_value         frame->Xprop_value
533#define prop_fail_result   frame->Xprop_fail_result
534#define prop_category      frame->Xprop_category
535#define prop_chartype      frame->Xprop_chartype
536#define prop_script        frame->Xprop_script
537#define oclength           frame->Xoclength
538#define occhars            frame->Xocchars
539#endif
540
541#define ctype              frame->Xctype
542#define fc                 frame->Xfc
543#define fi                 frame->Xfi
544#define length             frame->Xlength
545#define max                frame->Xmax
546#define min                frame->Xmin
547#define number             frame->Xnumber
548#define offset             frame->Xoffset
549#define op                 frame->Xop
550#define save_capture_last  frame->Xsave_capture_last
551#define save_offset1       frame->Xsave_offset1
552#define save_offset2       frame->Xsave_offset2
553#define save_offset3       frame->Xsave_offset3
554#define stacksave          frame->Xstacksave
555
556#define newptrb            frame->Xnewptrb
557
558/* When recursion is being used, local variables are allocated on the stack and
559get preserved during recursion in the normal way. In this environment, fi and
560i, and fc and c, can be the same variables. */
561
562#else         /* NO_RECURSE not defined */
563#define fi i
564#define fc c
565
566
567#ifdef SUPPORT_UTF8                /* Many of these variables are used only  */
568const uschar *charptr;             /* in small blocks of the code. My normal */
569#endif                             /* style of coding would have declared    */
570const uschar *callpat;             /* them within each of those blocks.      */
571const uschar *data;                /* However, in order to accommodate the   */
572const uschar *next;                /* version of this code that uses an      */
573USPTR         pp;                  /* external "stack" implemented on the    */
574const uschar *prev;                /* heap, it is easier to declare them all */
575USPTR         saved_eptr;          /* here, so the declarations can be cut   */
576                                   /* out in a block. The only declarations  */
577recursion_info new_recursive;      /* within blocks below are for variables  */
578                                   /* that do not have to be preserved over  */
579BOOL cur_is_word;                  /* a recursive call to RMATCH().          */
580BOOL condition;
581BOOL prev_is_word;
582
583unsigned long int original_ims;
584
585#ifdef SUPPORT_UCP
586int prop_type;
587int prop_value;
588int prop_fail_result;
589int prop_category;
590int prop_chartype;
591int prop_script;
592int oclength;
593uschar occhars[8];
594#endif
595
596int codelink;
597int ctype;
598int length;
599int max;
600int min;
601int number;
602int offset;
603int op;
604int save_capture_last;
605int save_offset1, save_offset2, save_offset3;
606int stacksave[REC_STACK_SAVE_MAX];
607
608eptrblock newptrb;
609#endif     /* NO_RECURSE */
610
611/* These statements are here to stop the compiler complaining about unitialized
612variables. */
613
614#ifdef SUPPORT_UCP
615prop_value = 0;
616prop_fail_result = 0;
617#endif
618
619
620/* This label is used for tail recursion, which is used in a few cases even
621when NO_RECURSE is not defined, in order to reduce the amount of stack that is
622used. Thanks to Ian Taylor for noticing this possibility and sending the
623original patch. */
624
625TAIL_RECURSE:
626
627/* OK, now we can get on with the real code of the function. Recursive calls
628are specified by the macro RMATCH and RRETURN is used to return. When
629NO_RECURSE is *not* defined, these just turn into a recursive call to match()
630and a "return", respectively (possibly with some debugging if PCRE_DEBUG is
631defined). However, RMATCH isn't like a function call because it's quite a
632complicated macro. It has to be used in one particular way. This shouldn't,
633however, impact performance when true recursion is being used. */
634
635#ifdef SUPPORT_UTF8
636utf8 = md->utf8;       /* Local copy of the flag */
637#else
638utf8 = FALSE;
639#endif
640
641/* First check that we haven't called match() too many times, or that we
642haven't exceeded the recursive call limit. */
643
644if (md->match_call_count++ >= md->match_limit) RRETURN(PCRE_ERROR_MATCHLIMIT);
645if (rdepth >= md->match_limit_recursion) RRETURN(PCRE_ERROR_RECURSIONLIMIT);
646
647original_ims = ims;    /* Save for resetting on ')' */
648
649/* At the start of a group with an unlimited repeat that may match an empty
650string, the match_cbegroup flag is set. When this is the case, add the current
651subject pointer to the chain of such remembered pointers, to be checked when we
652hit the closing ket, in order to break infinite loops that match no characters.
653When match() is called in other circumstances, don't add to the chain. The
654match_cbegroup flag must NOT be used with tail recursion, because the memory
655block that is used is on the stack, so a new one may be required for each
656match(). */
657
658if ((flags & match_cbegroup) != 0)
659  {
660  newptrb.epb_saved_eptr = eptr;
661  newptrb.epb_prev = eptrb;
662  eptrb = &newptrb;
663  }
664
665/* Now start processing the opcodes. */
666
667for (;;)
668  {
669  minimize = possessive = FALSE;
670  op = *ecode;
671
672  switch(op)
673    {
674    case OP_FAIL:
675    RRETURN(MATCH_NOMATCH);
676
677    case OP_PRUNE:
678    RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
679      ims, eptrb, flags, RM51);
680    if (rrc != MATCH_NOMATCH) RRETURN(rrc);
681    RRETURN(MATCH_PRUNE);
682
683    case OP_COMMIT:
684    RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
685      ims, eptrb, flags, RM52);
686    if (rrc != MATCH_NOMATCH) RRETURN(rrc);
687    RRETURN(MATCH_COMMIT);
688
689    case OP_SKIP:
690    RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
691      ims, eptrb, flags, RM53);
692    if (rrc != MATCH_NOMATCH) RRETURN(rrc);
693    md->start_match_ptr = eptr;   /* Pass back current position */
694    RRETURN(MATCH_SKIP);
695
696    case OP_THEN:
697    RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
698      ims, eptrb, flags, RM54);
699    if (rrc != MATCH_NOMATCH) RRETURN(rrc);
700    RRETURN(MATCH_THEN);
701
702    /* Handle a capturing bracket. If there is space in the offset vector, save
703    the current subject position in the working slot at the top of the vector.
704    We mustn't change the current values of the data slot, because they may be
705    set from a previous iteration of this group, and be referred to by a
706    reference inside the group.
707
708    If the bracket fails to match, we need to restore this value and also the
709    values of the final offsets, in case they were set by a previous iteration
710    of the same bracket.
711
712    If there isn't enough space in the offset vector, treat this as if it were
713    a non-capturing bracket. Don't worry about setting the flag for the error
714    case here; that is handled in the code for KET. */
715
716    case OP_CBRA:
717    case OP_SCBRA:
718    number = GET2(ecode, 1+LINK_SIZE);
719    offset = number << 1;
720
721#ifdef PCRE_DEBUG
722    printf("start bracket %d\n", number);
723    printf("subject=");
724    pchars(eptr, 16, TRUE, md);
725    printf("\n");
726#endif
727
728    if (offset < md->offset_max)
729      {
730      save_offset1 = md->offset_vector[offset];
731      save_offset2 = md->offset_vector[offset+1];
732      save_offset3 = md->offset_vector[md->offset_end - number];
733      save_capture_last = md->capture_last;
734
735      DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
736      md->offset_vector[md->offset_end - number] = eptr - md->start_subject;
737
738      flags = (op == OP_SCBRA)? match_cbegroup : 0;
739      do
740        {
741        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
742          ims, eptrb, flags, RM1);
743        if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
744        md->capture_last = save_capture_last;
745        ecode += GET(ecode, 1);
746        }
747      while (*ecode == OP_ALT);
748
749      DPRINTF(("bracket %d failed\n", number));
750
751      md->offset_vector[offset] = save_offset1;
752      md->offset_vector[offset+1] = save_offset2;
753      md->offset_vector[md->offset_end - number] = save_offset3;
754
755      RRETURN(MATCH_NOMATCH);
756      }
757
758    /* FALL THROUGH ... Insufficient room for saving captured contents. Treat
759    as a non-capturing bracket. */
760
761    /* VVVVVVVVVVVVVVVVVVVVVVVVV */
762    /* VVVVVVVVVVVVVVVVVVVVVVVVV */
763
764    DPRINTF(("insufficient capture room: treat as non-capturing\n"));
765
766    /* VVVVVVVVVVVVVVVVVVVVVVVVV */
767    /* VVVVVVVVVVVVVVVVVVVVVVVVV */
768
769    /* Non-capturing bracket. Loop for all the alternatives. When we get to the
770    final alternative within the brackets, we would return the result of a
771    recursive call to match() whatever happened. We can reduce stack usage by
772    turning this into a tail recursion, except in the case when match_cbegroup
773    is set.*/
774
775    case OP_BRA:
776    case OP_SBRA:
777    DPRINTF(("start non-capturing bracket\n"));
778    flags = (op >= OP_SBRA)? match_cbegroup : 0;
779    for (;;)
780      {
781      if (ecode[GET(ecode, 1)] != OP_ALT)   /* Final alternative */
782        {
783        if (flags == 0)    /* Not a possibly empty group */
784          {
785          ecode += _pcre_OP_lengths[*ecode];
786          DPRINTF(("bracket 0 tail recursion\n"));
787          goto TAIL_RECURSE;
788          }
789
790        /* Possibly empty group; can't use tail recursion. */
791
792        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
793          eptrb, flags, RM48);
794        RRETURN(rrc);
795        }
796
797      /* For non-final alternatives, continue the loop for a NOMATCH result;
798      otherwise return. */
799
800      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
801        eptrb, flags, RM2);
802      if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
803      ecode += GET(ecode, 1);
804      }
805    /* Control never reaches here. */
806
807    /* Conditional group: compilation checked that there are no more than
808    two branches. If the condition is false, skipping the first branch takes us
809    past the end if there is only one branch, but that's OK because that is
810    exactly what going to the ket would do. As there is only one branch to be
811    obeyed, we can use tail recursion to avoid using another stack frame. */
812
813    case OP_COND:
814    case OP_SCOND:
815    codelink= GET(ecode, 1);
816
817    /* Because of the way auto-callout works during compile, a callout item is
818    inserted between OP_COND and an assertion condition. */
819
820    if (ecode[LINK_SIZE+1] == OP_CALLOUT)
821      {
822      if (pcre_callout != NULL)
823        {
824        pcre_callout_block cb;
825        cb.version          = 1;   /* Version 1 of the callout block */
826        cb.callout_number   = ecode[LINK_SIZE+2];
827        cb.offset_vector    = md->offset_vector;
828        cb.subject          = (PCRE_SPTR)md->start_subject;
829        cb.subject_length   = md->end_subject - md->start_subject;
830        cb.start_match      = mstart - md->start_subject;
831        cb.current_position = eptr - md->start_subject;
832        cb.pattern_position = GET(ecode, LINK_SIZE + 3);
833        cb.next_item_length = GET(ecode, 3 + 2*LINK_SIZE);
834        cb.capture_top      = offset_top/2;
835        cb.capture_last     = md->capture_last;
836        cb.callout_data     = md->callout_data;
837        if ((rrc = (*pcre_callout)(&cb)) > 0) RRETURN(MATCH_NOMATCH);
838        if (rrc < 0) RRETURN(rrc);
839        }
840      ecode += _pcre_OP_lengths[OP_CALLOUT];
841      }
842
843    condcode = ecode[LINK_SIZE+1];
844
845    /* Now see what the actual condition is */
846
847    if (condcode == OP_RREF || condcode == OP_NRREF)    /* Recursion test */
848      {
849      if (md->recursive == NULL)                /* Not recursing => FALSE */
850        {
851        condition = FALSE;
852        ecode += GET(ecode, 1);
853        }
854      else
855        {
856        int recno = GET2(ecode, LINK_SIZE + 2);   /* Recursion group number*/
857        condition =  (recno == RREF_ANY || recno == md->recursive->group_num);
858
859        /* If the test is for recursion into a specific subpattern, and it is
860        false, but the test was set up by name, scan the table to see if the
861        name refers to any other numbers, and test them. The condition is true
862        if any one is set. */
863
864        if (!condition && condcode == OP_NRREF && recno != RREF_ANY)
865          {
866          uschar *slotA = md->name_table;
867          for (i = 0; i < md->name_count; i++)
868            {
869            if (GET2(slotA, 0) == recno) break;
870            slotA += md->name_entry_size;
871            }
872
873          /* Found a name for the number - there can be only one; duplicate
874          names for different numbers are allowed, but not vice versa. First
875          scan down for duplicates. */
876
877          if (i < md->name_count)
878            {
879            uschar *slotB = slotA;
880            while (slotB > md->name_table)
881              {
882              slotB -= md->name_entry_size;
883              if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
884                {
885                condition = GET2(slotB, 0) == md->recursive->group_num;
886                if (condition) break;
887                }
888              else break;
889              }
890
891            /* Scan up for duplicates */
892
893            if (!condition)
894              {
895              slotB = slotA;
896              for (i++; i < md->name_count; i++)
897                {
898                slotB += md->name_entry_size;
899                if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
900                  {
901                  condition = GET2(slotB, 0) == md->recursive->group_num;
902                  if (condition) break;
903                  }
904                else break;
905                }
906              }
907            }
908          }
909
910        /* Chose branch according to the condition */
911
912        ecode += condition? 3 : GET(ecode, 1);
913        }
914      }
915
916    else if (condcode == OP_CREF || condcode == OP_NCREF)  /* Group used test */
917      {
918      offset = GET2(ecode, LINK_SIZE+2) << 1;  /* Doubled ref number */
919      condition = offset < offset_top && md->offset_vector[offset] >= 0;
920
921      /* If the numbered capture is unset, but the reference was by name,
922      scan the table to see if the name refers to any other numbers, and test
923      them. The condition is true if any one is set. This is tediously similar
924      to the code above, but not close enough to try to amalgamate. */
925
926      if (!condition && condcode == OP_NCREF)
927        {
928        int refno = offset >> 1;
929        uschar *slotA = md->name_table;
930
931        for (i = 0; i < md->name_count; i++)
932          {
933          if (GET2(slotA, 0) == refno) break;
934          slotA += md->name_entry_size;
935          }
936
937        /* Found a name for the number - there can be only one; duplicate names
938        for different numbers are allowed, but not vice versa. First scan down
939        for duplicates. */
940
941        if (i < md->name_count)
942          {
943          uschar *slotB = slotA;
944          while (slotB > md->name_table)
945            {
946            slotB -= md->name_entry_size;
947            if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
948              {
949              offset = GET2(slotB, 0) << 1;
950              condition = offset < offset_top &&
951                md->offset_vector[offset] >= 0;
952              if (condition) break;
953              }
954            else break;
955            }
956
957          /* Scan up for duplicates */
958
959          if (!condition)
960            {
961            slotB = slotA;
962            for (i++; i < md->name_count; i++)
963              {
964              slotB += md->name_entry_size;
965              if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
966                {
967                offset = GET2(slotB, 0) << 1;
968                condition = offset < offset_top &&
969                  md->offset_vector[offset] >= 0;
970                if (condition) break;
971                }
972              else break;
973              }
974            }
975          }
976        }
977
978      /* Chose branch according to the condition */
979
980      ecode += condition? 3 : GET(ecode, 1);
981      }
982
983    else if (condcode == OP_DEF)     /* DEFINE - always false */
984      {
985      condition = FALSE;
986      ecode += GET(ecode, 1);
987      }
988
989    /* The condition is an assertion. Call match() to evaluate it - setting
990    the final argument match_condassert causes it to stop at the end of an
991    assertion. */
992
993    else
994      {
995      RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,
996          match_condassert, RM3);
997      if (rrc == MATCH_MATCH)
998        {
999        condition = TRUE;
1000        ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);
1001        while (*ecode == OP_ALT) ecode += GET(ecode, 1);
1002        }
1003      else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
1004        {
1005        RRETURN(rrc);         /* Need braces because of following else */
1006        }
1007      else
1008        {
1009        condition = FALSE;
1010        ecode += codelink;
1011        }
1012      }
1013
1014    /* We are now at the branch that is to be obeyed. As there is only one,
1015    we can use tail recursion to avoid using another stack frame, except when
1016    match_cbegroup is required for an unlimited repeat of a possibly empty
1017    group. If the second alternative doesn't exist, we can just plough on. */
1018
1019    if (condition || *ecode == OP_ALT)
1020      {
1021      ecode += 1 + LINK_SIZE;
1022      if (op == OP_SCOND)        /* Possibly empty group */
1023        {
1024        RMATCH(eptr, ecode, offset_top, md, ims, eptrb, match_cbegroup, RM49);
1025        RRETURN(rrc);
1026        }
1027      else                       /* Group must match something */
1028        {
1029        flags = 0;
1030        goto TAIL_RECURSE;
1031        }
1032      }
1033    else                         /* Condition false & no alternative */
1034      {
1035      ecode += 1 + LINK_SIZE;
1036      }
1037    break;
1038
1039
1040    /* Before OP_ACCEPT there may be any number of OP_CLOSE opcodes,
1041    to close any currently open capturing brackets. */
1042
1043    case OP_CLOSE:
1044    number = GET2(ecode, 1);
1045    offset = number << 1;
1046
1047#ifdef PCRE_DEBUG
1048      printf("end bracket %d at *ACCEPT", number);
1049      printf("\n");
1050#endif
1051
1052    md->capture_last = number;
1053    if (offset >= md->offset_max) md->offset_overflow = TRUE; else
1054      {
1055      md->offset_vector[offset] =
1056        md->offset_vector[md->offset_end - number];
1057      md->offset_vector[offset+1] = eptr - md->start_subject;
1058      if (offset_top <= offset) offset_top = offset + 2;
1059      }
1060    ecode += 3;
1061    break;
1062
1063
1064    /* End of the pattern, either real or forced. If we are in a top-level
1065    recursion, we should restore the offsets appropriately and continue from
1066    after the call. */
1067
1068    case OP_ACCEPT:
1069    case OP_END:
1070    if (md->recursive != NULL && md->recursive->group_num == 0)
1071      {
1072      recursion_info *rec = md->recursive;
1073      DPRINTF(("End of pattern in a (?0) recursion\n"));
1074      md->recursive = rec->prevrec;
1075      memmove(md->offset_vector, rec->offset_save,
1076        rec->saved_max * sizeof(int));
1077      offset_top = rec->save_offset_top;
1078      ims = original_ims;
1079      ecode = rec->after_call;
1080      break;
1081      }
1082
1083    /* Otherwise, if we have matched an empty string, fail if PCRE_NOTEMPTY is
1084    set, or if PCRE_NOTEMPTY_ATSTART is set and we have matched at the start of
1085    the subject. In both cases, backtracking will then try other alternatives,
1086    if any. */
1087
1088    if (eptr == mstart &&
1089        (md->notempty ||
1090          (md->notempty_atstart &&
1091            mstart == md->start_subject + md->start_offset)))
1092      RRETURN(MATCH_NOMATCH);
1093
1094    /* Otherwise, we have a match. */
1095
1096    md->end_match_ptr = eptr;           /* Record where we ended */
1097    md->end_offset_top = offset_top;    /* and how many extracts were taken */
1098    md->start_match_ptr = mstart;       /* and the start (\K can modify) */
1099    RRETURN(MATCH_MATCH);
1100
1101    /* Change option settings */
1102
1103    case OP_OPT:
1104    ims = ecode[1];
1105    ecode += 2;
1106    DPRINTF(("ims set to %02lx\n", ims));
1107    break;
1108
1109    /* Assertion brackets. Check the alternative branches in turn - the
1110    matching won't pass the KET for an assertion. If any one branch matches,
1111    the assertion is true. Lookbehind assertions have an OP_REVERSE item at the
1112    start of each branch to move the current point backwards, so the code at
1113    this level is identical to the lookahead case. */
1114
1115    case OP_ASSERT:
1116    case OP_ASSERTBACK:
1117    do
1118      {
1119      RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
1120        RM4);
1121      if (rrc == MATCH_MATCH)
1122        {
1123        mstart = md->start_match_ptr;   /* In case \K reset it */
1124        break;
1125        }
1126      if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
1127      ecode += GET(ecode, 1);
1128      }
1129    while (*ecode == OP_ALT);
1130    if (*ecode == OP_KET) RRETURN(MATCH_NOMATCH);
1131
1132    /* If checking an assertion for a condition, return MATCH_MATCH. */
1133
1134    if ((flags & match_condassert) != 0) RRETURN(MATCH_MATCH);
1135
1136    /* Continue from after the assertion, updating the offsets high water
1137    mark, since extracts may have been taken during the assertion. */
1138
1139    do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1140    ecode += 1 + LINK_SIZE;
1141    offset_top = md->end_offset_top;
1142    continue;
1143
1144    /* Negative assertion: all branches must fail to match. Encountering SKIP,
1145    PRUNE, or COMMIT means we must assume failure without checking subsequent
1146    branches. */
1147
1148    case OP_ASSERT_NOT:
1149    case OP_ASSERTBACK_NOT:
1150    do
1151      {
1152      RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
1153        RM5);
1154      if (rrc == MATCH_MATCH) RRETURN(MATCH_NOMATCH);
1155      if (rrc == MATCH_SKIP || rrc == MATCH_PRUNE || rrc == MATCH_COMMIT)
1156        {
1157        do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1158        break;
1159        }
1160      if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
1161      ecode += GET(ecode,1);
1162      }
1163    while (*ecode == OP_ALT);
1164
1165    if ((flags & match_condassert) != 0) RRETURN(MATCH_MATCH);
1166
1167    ecode += 1 + LINK_SIZE;
1168    continue;
1169
1170    /* Move the subject pointer back. This occurs only at the start of
1171    each branch of a lookbehind assertion. If we are too close to the start to
1172    move back, this match function fails. When working with UTF-8 we move
1173    back a number of characters, not bytes. */
1174
1175    case OP_REVERSE:
1176#ifdef SUPPORT_UTF8
1177    if (utf8)
1178      {
1179      i = GET(ecode, 1);
1180      while (i-- > 0)
1181        {
1182        eptr--;
1183        if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
1184        BACKCHAR(eptr);
1185        }
1186      }
1187    else
1188#endif
1189
1190    /* No UTF-8 support, or not in UTF-8 mode: count is byte count */
1191
1192      {
1193      eptr -= GET(ecode, 1);
1194      if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
1195      }
1196
1197    /* Save the earliest consulted character, then skip to next op code */
1198
1199    if (eptr < md->start_used_ptr) md->start_used_ptr = eptr;
1200    ecode += 1 + LINK_SIZE;
1201    break;
1202
1203    /* The callout item calls an external function, if one is provided, passing
1204    details of the match so far. This is mainly for debugging, though the
1205    function is able to force a failure. */
1206
1207    case OP_CALLOUT:
1208    if (pcre_callout != NULL)
1209      {
1210      pcre_callout_block cb;
1211      cb.version          = 1;   /* Version 1 of the callout block */
1212      cb.callout_number   = ecode[1];
1213      cb.offset_vector    = md->offset_vector;
1214      cb.subject          = (PCRE_SPTR)md->start_subject;
1215      cb.subject_length   = md->end_subject - md->start_subject;
1216      cb.start_match      = mstart - md->start_subject;
1217      cb.current_position = eptr - md->start_subject;
1218      cb.pattern_position = GET(ecode, 2);
1219      cb.next_item_length = GET(ecode, 2 + LINK_SIZE);
1220      cb.capture_top      = offset_top/2;
1221      cb.capture_last     = md->capture_last;
1222      cb.callout_data     = md->callout_data;
1223      if ((rrc = (*pcre_callout)(&cb)) > 0) RRETURN(MATCH_NOMATCH);
1224      if (rrc < 0) RRETURN(rrc);
1225      }
1226    ecode += 2 + 2*LINK_SIZE;
1227    break;
1228
1229    /* Recursion either matches the current regex, or some subexpression. The
1230    offset data is the offset to the starting bracket from the start of the
1231    whole pattern. (This is so that it works from duplicated subpatterns.)
1232
1233    If there are any capturing brackets started but not finished, we have to
1234    save their starting points and reinstate them after the recursion. However,
1235    we don't know how many such there are (offset_top records the completed
1236    total) so we just have to save all the potential data. There may be up to
1237    65535 such values, which is too large to put on the stack, but using malloc
1238    for small numbers seems expensive. As a compromise, the stack is used when
1239    there are no more than REC_STACK_SAVE_MAX values to store; otherwise malloc
1240    is used. A problem is what to do if the malloc fails ... there is no way of
1241    returning to the top level with an error. Save the top REC_STACK_SAVE_MAX
1242    values on the stack, and accept that the rest may be wrong.
1243
1244    There are also other values that have to be saved. We use a chained
1245    sequence of blocks that actually live on the stack. Thanks to Robin Houston
1246    for the original version of this logic. */
1247
1248    case OP_RECURSE:
1249      {
1250      callpat = md->start_code + GET(ecode, 1);
1251      new_recursive.group_num = (callpat == md->start_code)? 0 :
1252        GET2(callpat, 1 + LINK_SIZE);
1253
1254      /* Add to "recursing stack" */
1255
1256      new_recursive.prevrec = md->recursive;
1257      md->recursive = &new_recursive;
1258
1259      /* Find where to continue from afterwards */
1260
1261      ecode += 1 + LINK_SIZE;
1262      new_recursive.after_call = ecode;
1263
1264      /* Now save the offset data. */
1265
1266      new_recursive.saved_max = md->offset_end;
1267      if (new_recursive.saved_max <= REC_STACK_SAVE_MAX)
1268        new_recursive.offset_save = stacksave;
1269      else
1270        {
1271        new_recursive.offset_save =
1272          (int *)(pcre_malloc)(new_recursive.saved_max * sizeof(int));
1273        if (new_recursive.offset_save == NULL) RRETURN(PCRE_ERROR_NOMEMORY);
1274        }
1275
1276      memcpy(new_recursive.offset_save, md->offset_vector,
1277            new_recursive.saved_max * sizeof(int));
1278      new_recursive.save_offset_top = offset_top;
1279
1280      /* OK, now we can do the recursion. For each top-level alternative we
1281      restore the offset and recursion data. */
1282
1283      DPRINTF(("Recursing into group %d\n", new_recursive.group_num));
1284      flags = (*callpat >= OP_SBRA)? match_cbegroup : 0;
1285      do
1286        {
1287        RMATCH(eptr, callpat + _pcre_OP_lengths[*callpat], offset_top,
1288          md, ims, eptrb, flags, RM6);
1289        if (rrc == MATCH_MATCH)
1290          {
1291          DPRINTF(("Recursion matched\n"));
1292          md->recursive = new_recursive.prevrec;
1293          if (new_recursive.offset_save != stacksave)
1294            (pcre_free)(new_recursive.offset_save);
1295          RRETURN(MATCH_MATCH);
1296          }
1297        else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
1298          {
1299          DPRINTF(("Recursion gave error %d\n", rrc));
1300          if (new_recursive.offset_save != stacksave)
1301            (pcre_free)(new_recursive.offset_save);
1302          RRETURN(rrc);
1303          }
1304
1305        md->recursive = &new_recursive;
1306        memcpy(md->offset_vector, new_recursive.offset_save,
1307            new_recursive.saved_max * sizeof(int));
1308        callpat += GET(callpat, 1);
1309        }
1310      while (*callpat == OP_ALT);
1311
1312      DPRINTF(("Recursion didn't match\n"));
1313      md->recursive = new_recursive.prevrec;
1314      if (new_recursive.offset_save != stacksave)
1315        (pcre_free)(new_recursive.offset_save);
1316      RRETURN(MATCH_NOMATCH);
1317      }
1318    /* Control never reaches here */
1319
1320    /* "Once" brackets are like assertion brackets except that after a match,
1321    the point in the subject string is not moved back. Thus there can never be
1322    a move back into the brackets. Friedl calls these "atomic" subpatterns.
1323    Check the alternative branches in turn - the matching won't pass the KET
1324    for this kind of subpattern. If any one branch matches, we carry on as at
1325    the end of a normal bracket, leaving the subject pointer, but resetting
1326    the start-of-match value in case it was changed by \K. */
1327
1328    case OP_ONCE:
1329    prev = ecode;
1330    saved_eptr = eptr;
1331
1332    do
1333      {
1334      RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM7);
1335      if (rrc == MATCH_MATCH)
1336        {
1337        mstart = md->start_match_ptr;
1338        break;
1339        }
1340      if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
1341      ecode += GET(ecode,1);
1342      }
1343    while (*ecode == OP_ALT);
1344
1345    /* If hit the end of the group (which could be repeated), fail */
1346
1347    if (*ecode != OP_ONCE && *ecode != OP_ALT) RRETURN(MATCH_NOMATCH);
1348
1349    /* Continue as from after the assertion, updating the offsets high water
1350    mark, since extracts may have been taken. */
1351
1352    do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
1353
1354    offset_top = md->end_offset_top;
1355    eptr = md->end_match_ptr;
1356
1357    /* For a non-repeating ket, just continue at this level. This also
1358    happens for a repeating ket if no characters were matched in the group.
1359    This is the forcible breaking of infinite loops as implemented in Perl
1360    5.005. If there is an options reset, it will get obeyed in the normal
1361    course of events. */
1362
1363    if (*ecode == OP_KET || eptr == saved_eptr)
1364      {
1365      ecode += 1+LINK_SIZE;
1366      break;
1367      }
1368
1369    /* The repeating kets try the rest of the pattern or restart from the
1370    preceding bracket, in the appropriate order. The second "call" of match()
1371    uses tail recursion, to avoid using another stack frame. We need to reset
1372    any options that changed within the bracket before re-running it, so
1373    check the next opcode. */
1374
1375    if (ecode[1+LINK_SIZE] == OP_OPT)
1376      {
1377      ims = (ims & ~PCRE_IMS) | ecode[4];
1378      DPRINTF(("ims set to %02lx at group repeat\n", ims));
1379      }
1380
1381    if (*ecode == OP_KETRMIN)
1382      {
1383      RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM8);
1384      if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1385      ecode = prev;
1386      flags = 0;
1387      goto TAIL_RECURSE;
1388      }
1389    else  /* OP_KETRMAX */
1390      {
1391      RMATCH(eptr, prev, offset_top, md, ims, eptrb, match_cbegroup, RM9);
1392      if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1393      ecode += 1 + LINK_SIZE;
1394      flags = 0;
1395      goto TAIL_RECURSE;
1396      }
1397    /* Control never gets here */
1398
1399    /* An alternation is the end of a branch; scan along to find the end of the
1400    bracketed group and go to there. */
1401
1402    case OP_ALT:
1403    do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1404    break;
1405
1406    /* BRAZERO, BRAMINZERO and SKIPZERO occur just before a bracket group,
1407    indicating that it may occur zero times. It may repeat infinitely, or not
1408    at all - i.e. it could be ()* or ()? or even (){0} in the pattern. Brackets
1409    with fixed upper repeat limits are compiled as a number of copies, with the
1410    optional ones preceded by BRAZERO or BRAMINZERO. */
1411
1412    case OP_BRAZERO:
1413      {
1414      next = ecode+1;
1415      RMATCH(eptr, next, offset_top, md, ims, eptrb, 0, RM10);
1416      if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1417      do next += GET(next,1); while (*next == OP_ALT);
1418      ecode = next + 1 + LINK_SIZE;
1419      }
1420    break;
1421
1422    case OP_BRAMINZERO:
1423      {
1424      next = ecode+1;
1425      do next += GET(next, 1); while (*next == OP_ALT);
1426      RMATCH(eptr, next + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0, RM11);
1427      if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1428      ecode++;
1429      }
1430    break;
1431
1432    case OP_SKIPZERO:
1433      {
1434      next = ecode+1;
1435      do next += GET(next,1); while (*next == OP_ALT);
1436      ecode = next + 1 + LINK_SIZE;
1437      }
1438    break;
1439
1440    /* End of a group, repeated or non-repeating. */
1441
1442    case OP_KET:
1443    case OP_KETRMIN:
1444    case OP_KETRMAX:
1445    prev = ecode - GET(ecode, 1);
1446
1447    /* If this was a group that remembered the subject start, in order to break
1448    infinite repeats of empty string matches, retrieve the subject start from
1449    the chain. Otherwise, set it NULL. */
1450
1451    if (*prev >= OP_SBRA)
1452      {
1453      saved_eptr = eptrb->epb_saved_eptr;   /* Value at start of group */
1454      eptrb = eptrb->epb_prev;              /* Backup to previous group */
1455      }
1456    else saved_eptr = NULL;
1457
1458    /* If we are at the end of an assertion group or an atomic group, stop
1459    matching and return MATCH_MATCH, but record the current high water mark for
1460    use by positive assertions. We also need to record the match start in case
1461    it was changed by \K. */
1462
1463    if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||
1464        *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT ||
1465        *prev == OP_ONCE)
1466      {
1467      md->end_match_ptr = eptr;      /* For ONCE */
1468      md->end_offset_top = offset_top;
1469      md->start_match_ptr = mstart;
1470      RRETURN(MATCH_MATCH);
1471      }
1472
1473    /* For capturing groups we have to check the group number back at the start
1474    and if necessary complete handling an extraction by setting the offsets and
1475    bumping the high water mark. Note that whole-pattern recursion is coded as
1476    a recurse into group 0, so it won't be picked up here. Instead, we catch it
1477    when the OP_END is reached. Other recursion is handled here. */
1478
1479    if (*prev == OP_CBRA || *prev == OP_SCBRA)
1480      {
1481      number = GET2(prev, 1+LINK_SIZE);
1482      offset = number << 1;
1483
1484#ifdef PCRE_DEBUG
1485      printf("end bracket %d", number);
1486      printf("\n");
1487#endif
1488
1489      md->capture_last = number;
1490      if (offset >= md->offset_max) md->offset_overflow = TRUE; else
1491        {
1492        md->offset_vector[offset] =
1493          md->offset_vector[md->offset_end - number];
1494        md->offset_vector[offset+1] = eptr - md->start_subject;
1495        if (offset_top <= offset) offset_top = offset + 2;
1496        }
1497
1498      /* Handle a recursively called group. Restore the offsets
1499      appropriately and continue from after the call. */
1500
1501      if (md->recursive != NULL && md->recursive->group_num == number)
1502        {
1503        recursion_info *rec = md->recursive;
1504        DPRINTF(("Recursion (%d) succeeded - continuing\n", number));
1505        md->recursive = rec->prevrec;
1506        memcpy(md->offset_vector, rec->offset_save,
1507          rec->saved_max * sizeof(int));
1508        offset_top = rec->save_offset_top;
1509        ecode = rec->after_call;
1510        ims = original_ims;
1511        break;
1512        }
1513      }
1514
1515    /* For both capturing and non-capturing groups, reset the value of the ims
1516    flags, in case they got changed during the group. */
1517
1518    ims = original_ims;
1519    DPRINTF(("ims reset to %02lx\n", ims));
1520
1521    /* For a non-repeating ket, just continue at this level. This also
1522    happens for a repeating ket if no characters were matched in the group.
1523    This is the forcible breaking of infinite loops as implemented in Perl
1524    5.005. If there is an options reset, it will get obeyed in the normal
1525    course of events. */
1526
1527    if (*ecode == OP_KET || eptr == saved_eptr)
1528      {
1529      ecode += 1 + LINK_SIZE;
1530      break;
1531      }
1532
1533    /* The repeating kets try the rest of the pattern or restart from the
1534    preceding bracket, in the appropriate order. In the second case, we can use
1535    tail recursion to avoid using another stack frame, unless we have an
1536    unlimited repeat of a group that can match an empty string. */
1537
1538    flags = (*prev >= OP_SBRA)? match_cbegroup : 0;
1539
1540    if (*ecode == OP_KETRMIN)
1541      {
1542      RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM12);
1543      if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1544      if (flags != 0)    /* Could match an empty string */
1545        {
1546        RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM50);
1547        RRETURN(rrc);
1548        }
1549      ecode = prev;
1550      goto TAIL_RECURSE;
1551      }
1552    else  /* OP_KETRMAX */
1553      {
1554      RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM13);
1555      if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1556      ecode += 1 + LINK_SIZE;
1557      flags = 0;
1558      goto TAIL_RECURSE;
1559      }
1560    /* Control never gets here */
1561
1562    /* Start of subject unless notbol, or after internal newline if multiline */
1563
1564    case OP_CIRC:
1565    if (md->notbol && eptr == md->start_subject) RRETURN(MATCH_NOMATCH);
1566    if ((ims & PCRE_MULTILINE) != 0)
1567      {
1568      if (eptr != md->start_subject &&
1569          (eptr == md->end_subject || !WAS_NEWLINE(eptr)))
1570        RRETURN(MATCH_NOMATCH);
1571      ecode++;
1572      break;
1573      }
1574    /* ... else fall through */
1575
1576    /* Start of subject assertion */
1577
1578    case OP_SOD:
1579    if (eptr != md->start_subject) RRETURN(MATCH_NOMATCH);
1580    ecode++;
1581    break;
1582
1583    /* Start of match assertion */
1584
1585    case OP_SOM:
1586    if (eptr != md->start_subject + md->start_offset) RRETURN(MATCH_NOMATCH);
1587    ecode++;
1588    break;
1589
1590    /* Reset the start of match point */
1591
1592    case OP_SET_SOM:
1593    mstart = eptr;
1594    ecode++;
1595    break;
1596
1597    /* Assert before internal newline if multiline, or before a terminating
1598    newline unless endonly is set, else end of subject unless noteol is set. */
1599
1600    case OP_DOLL:
1601    if ((ims & PCRE_MULTILINE) != 0)
1602      {
1603      if (eptr < md->end_subject)
1604        { if (!IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH); }
1605      else
1606        { if (md->noteol) RRETURN(MATCH_NOMATCH); }
1607      ecode++;
1608      break;
1609      }
1610    else
1611      {
1612      if (md->noteol) RRETURN(MATCH_NOMATCH);
1613      if (!md->endonly)
1614        {
1615        if (eptr != md->end_subject &&
1616            (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
1617          RRETURN(MATCH_NOMATCH);
1618        ecode++;
1619        break;
1620        }
1621      }
1622    /* ... else fall through for endonly */
1623
1624    /* End of subject assertion (\z) */
1625
1626    case OP_EOD:
1627    if (eptr < md->end_subject) RRETURN(MATCH_NOMATCH);
1628    ecode++;
1629    break;
1630
1631    /* End of subject or ending \n assertion (\Z) */
1632
1633    case OP_EODN:
1634    if (eptr != md->end_subject &&
1635        (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
1636      RRETURN(MATCH_NOMATCH);
1637    ecode++;
1638    break;
1639
1640    /* Word boundary assertions */
1641
1642    case OP_NOT_WORD_BOUNDARY:
1643    case OP_WORD_BOUNDARY:
1644      {
1645
1646      /* Find out if the previous and current characters are "word" characters.
1647      It takes a bit more work in UTF-8 mode. Characters > 255 are assumed to
1648      be "non-word" characters. Remember the earliest consulted character for
1649      partial matching. */
1650
1651#ifdef SUPPORT_UTF8
1652      if (utf8)
1653        {
1654        if (eptr == md->start_subject) prev_is_word = FALSE; else
1655          {
1656          USPTR lastptr = eptr - 1;
1657          while((*lastptr & 0xc0) == 0x80) lastptr--;
1658          if (lastptr < md->start_used_ptr) md->start_used_ptr = lastptr;
1659          GETCHAR(c, lastptr);
1660          prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
1661          }
1662        if (eptr >= md->end_subject)
1663          {
1664          SCHECK_PARTIAL();
1665          cur_is_word = FALSE;
1666          }
1667        else
1668          {
1669          GETCHAR(c, eptr);
1670          cur_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
1671          }
1672        }
1673      else
1674#endif
1675
1676      /* Not in UTF-8 mode */
1677
1678        {
1679        if (eptr == md->start_subject) prev_is_word = FALSE; else
1680          {
1681          if (eptr <= md->start_used_ptr) md->start_used_ptr = eptr - 1;
1682          prev_is_word = ((md->ctypes[eptr[-1]] & ctype_word) != 0);
1683          }
1684        if (eptr >= md->end_subject)
1685          {
1686          SCHECK_PARTIAL();
1687          cur_is_word = FALSE;
1688          }
1689        else cur_is_word = ((md->ctypes[*eptr] & ctype_word) != 0);
1690        }
1691
1692      /* Now see if the situation is what we want */
1693
1694      if ((*ecode++ == OP_WORD_BOUNDARY)?
1695           cur_is_word == prev_is_word : cur_is_word != prev_is_word)
1696        RRETURN(MATCH_NOMATCH);
1697      }
1698    break;
1699
1700    /* Match a single character type; inline for speed */
1701
1702    case OP_ANY:
1703    if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
1704    /* Fall through */
1705
1706    case OP_ALLANY:
1707    if (eptr++ >= md->end_subject)
1708      {
1709      SCHECK_PARTIAL();
1710      RRETURN(MATCH_NOMATCH);
1711      }
1712    if (utf8) while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
1713    ecode++;
1714    break;
1715
1716    /* Match a single byte, even in UTF-8 mode. This opcode really does match
1717    any byte, even newline, independent of the setting of PCRE_DOTALL. */
1718
1719    case OP_ANYBYTE:
1720    if (eptr++ >= md->end_subject)
1721      {
1722      SCHECK_PARTIAL();
1723      RRETURN(MATCH_NOMATCH);
1724      }
1725    ecode++;
1726    break;
1727
1728    case OP_NOT_DIGIT:
1729    if (eptr >= md->end_subject)
1730      {
1731      SCHECK_PARTIAL();
1732      RRETURN(MATCH_NOMATCH);
1733      }
1734    GETCHARINCTEST(c, eptr);
1735    if (
1736#ifdef SUPPORT_UTF8
1737       c < 256 &&
1738#endif
1739       (md->ctypes[c] & ctype_digit) != 0
1740       )
1741      RRETURN(MATCH_NOMATCH);
1742    ecode++;
1743    break;
1744
1745    case OP_DIGIT:
1746    if (eptr >= md->end_subject)
1747      {
1748      SCHECK_PARTIAL();
1749      RRETURN(MATCH_NOMATCH);
1750      }
1751    GETCHARINCTEST(c, eptr);
1752    if (
1753#ifdef SUPPORT_UTF8
1754       c >= 256 ||
1755#endif
1756       (md->ctypes[c] & ctype_digit) == 0
1757       )
1758      RRETURN(MATCH_NOMATCH);
1759    ecode++;
1760    break;
1761
1762    case OP_NOT_WHITESPACE:
1763    if (eptr >= md->end_subject)
1764      {
1765      SCHECK_PARTIAL();
1766      RRETURN(MATCH_NOMATCH);
1767      }
1768    GETCHARINCTEST(c, eptr);
1769    if (
1770#ifdef SUPPORT_UTF8
1771       c < 256 &&
1772#endif
1773       (md->ctypes[c] & ctype_space) != 0
1774       )
1775      RRETURN(MATCH_NOMATCH);
1776    ecode++;
1777    break;
1778
1779    case OP_WHITESPACE:
1780    if (eptr >= md->end_subject)
1781      {
1782      SCHECK_PARTIAL();
1783      RRETURN(MATCH_NOMATCH);
1784      }
1785    GETCHARINCTEST(c, eptr);
1786    if (
1787#ifdef SUPPORT_UTF8
1788       c >= 256 ||
1789#endif
1790       (md->ctypes[c] & ctype_space) == 0
1791       )
1792      RRETURN(MATCH_NOMATCH);
1793    ecode++;
1794    break;
1795
1796    case OP_NOT_WORDCHAR:
1797    if (eptr >= md->end_subject)
1798      {
1799      SCHECK_PARTIAL();
1800      RRETURN(MATCH_NOMATCH);
1801      }
1802    GETCHARINCTEST(c, eptr);
1803    if (
1804#ifdef SUPPORT_UTF8
1805       c < 256 &&
1806#endif
1807       (md->ctypes[c] & ctype_word) != 0
1808       )
1809      RRETURN(MATCH_NOMATCH);
1810    ecode++;
1811    break;
1812
1813    case OP_WORDCHAR:
1814    if (eptr >= md->end_subject)
1815      {
1816      SCHECK_PARTIAL();
1817      RRETURN(MATCH_NOMATCH);
1818      }
1819    GETCHARINCTEST(c, eptr);
1820    if (
1821#ifdef SUPPORT_UTF8
1822       c >= 256 ||
1823#endif
1824       (md->ctypes[c] & ctype_word) == 0
1825       )
1826      RRETURN(MATCH_NOMATCH);
1827    ecode++;
1828    break;
1829
1830    case OP_ANYNL:
1831    if (eptr >= md->end_subject)
1832      {
1833      SCHECK_PARTIAL();
1834      RRETURN(MATCH_NOMATCH);
1835      }
1836    GETCHARINCTEST(c, eptr);
1837    switch(c)
1838      {
1839      default: RRETURN(MATCH_NOMATCH);
1840      case 0x000d:
1841      if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
1842      break;
1843
1844      case 0x000a:
1845      break;
1846
1847      case 0x000b:
1848      case 0x000c:
1849      case 0x0085:
1850      case 0x2028:
1851      case 0x2029:
1852      if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
1853      break;
1854      }
1855    ecode++;
1856    break;
1857
1858    case OP_NOT_HSPACE:
1859    if (eptr >= md->end_subject)
1860      {
1861      SCHECK_PARTIAL();
1862      RRETURN(MATCH_NOMATCH);
1863      }
1864    GETCHARINCTEST(c, eptr);
1865    switch(c)
1866      {
1867      default: break;
1868      case 0x09:      /* HT */
1869      case 0x20:      /* SPACE */
1870      case 0xa0:      /* NBSP */
1871      case 0x1680:    /* OGHAM SPACE MARK */
1872      case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
1873      case 0x2000:    /* EN QUAD */
1874      case 0x2001:    /* EM QUAD */
1875      case 0x2002:    /* EN SPACE */
1876      case 0x2003:    /* EM SPACE */
1877      case 0x2004:    /* THREE-PER-EM SPACE */
1878      case 0x2005:    /* FOUR-PER-EM SPACE */
1879      case 0x2006:    /* SIX-PER-EM SPACE */
1880      case 0x2007:    /* FIGURE SPACE */
1881      case 0x2008:    /* PUNCTUATION SPACE */
1882      case 0x2009:    /* THIN SPACE */
1883      case 0x200A:    /* HAIR SPACE */
1884      case 0x202f:    /* NARROW NO-BREAK SPACE */
1885      case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
1886      case 0x3000:    /* IDEOGRAPHIC SPACE */
1887      RRETURN(MATCH_NOMATCH);
1888      }
1889    ecode++;
1890    break;
1891
1892    case OP_HSPACE:
1893    if (eptr >= md->end_subject)
1894      {
1895      SCHECK_PARTIAL();
1896      RRETURN(MATCH_NOMATCH);
1897      }
1898    GETCHARINCTEST(c, eptr);
1899    switch(c)
1900      {
1901      default: RRETURN(MATCH_NOMATCH);
1902      case 0x09:      /* HT */
1903      case 0x20:      /* SPACE */
1904      case 0xa0:      /* NBSP */
1905      case 0x1680:    /* OGHAM SPACE MARK */
1906      case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
1907      case 0x2000:    /* EN QUAD */
1908      case 0x2001:    /* EM QUAD */
1909      case 0x2002:    /* EN SPACE */
1910      case 0x2003:    /* EM SPACE */
1911      case 0x2004:    /* THREE-PER-EM SPACE */
1912      case 0x2005:    /* FOUR-PER-EM SPACE */
1913      case 0x2006:    /* SIX-PER-EM SPACE */
1914      case 0x2007:    /* FIGURE SPACE */
1915      case 0x2008:    /* PUNCTUATION SPACE */
1916      case 0x2009:    /* THIN SPACE */
1917      case 0x200A:    /* HAIR SPACE */
1918      case 0x202f:    /* NARROW NO-BREAK SPACE */
1919      case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
1920      case 0x3000:    /* IDEOGRAPHIC SPACE */
1921      break;
1922      }
1923    ecode++;
1924    break;
1925
1926    case OP_NOT_VSPACE:
1927    if (eptr >= md->end_subject)
1928      {
1929      SCHECK_PARTIAL();
1930      RRETURN(MATCH_NOMATCH);
1931      }
1932    GETCHARINCTEST(c, eptr);
1933    switch(c)
1934      {
1935      default: break;
1936      case 0x0a:      /* LF */
1937      case 0x0b:      /* VT */
1938      case 0x0c:      /* FF */
1939      case 0x0d:      /* CR */
1940      case 0x85:      /* NEL */
1941      case 0x2028:    /* LINE SEPARATOR */
1942      case 0x2029:    /* PARAGRAPH SEPARATOR */
1943      RRETURN(MATCH_NOMATCH);
1944      }
1945    ecode++;
1946    break;
1947
1948    case OP_VSPACE:
1949    if (eptr >= md->end_subject)
1950      {
1951      SCHECK_PARTIAL();
1952      RRETURN(MATCH_NOMATCH);
1953      }
1954    GETCHARINCTEST(c, eptr);
1955    switch(c)
1956      {
1957      default: RRETURN(MATCH_NOMATCH);
1958      case 0x0a:      /* LF */
1959      case 0x0b:      /* VT */
1960      case 0x0c:      /* FF */
1961      case 0x0d:      /* CR */
1962      case 0x85:      /* NEL */
1963      case 0x2028:    /* LINE SEPARATOR */
1964      case 0x2029:    /* PARAGRAPH SEPARATOR */
1965      break;
1966      }
1967    ecode++;
1968    break;
1969
1970#ifdef SUPPORT_UCP
1971    /* Check the next character by Unicode property. We will get here only
1972    if the support is in the binary; otherwise a compile-time error occurs. */
1973
1974    case OP_PROP:
1975    case OP_NOTPROP:
1976    if (eptr >= md->end_subject)
1977      {
1978      SCHECK_PARTIAL();
1979      RRETURN(MATCH_NOMATCH);
1980      }
1981    GETCHARINCTEST(c, eptr);
1982      {
1983      const ucd_record *prop = GET_UCD(c);
1984
1985      switch(ecode[1])
1986        {
1987        case PT_ANY:
1988        if (op == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
1989        break;
1990
1991        case PT_LAMP:
1992        if ((prop->chartype == ucp_Lu ||
1993             prop->chartype == ucp_Ll ||
1994             prop->chartype == ucp_Lt) == (op == OP_NOTPROP))
1995          RRETURN(MATCH_NOMATCH);
1996         break;
1997
1998        case PT_GC:
1999        if ((ecode[2] != _pcre_ucp_gentype[prop->chartype]) == (op == OP_PROP))
2000          RRETURN(MATCH_NOMATCH);
2001        break;
2002
2003        case PT_PC:
2004        if ((ecode[2] != prop->chartype) == (op == OP_PROP))
2005          RRETURN(MATCH_NOMATCH);
2006        break;
2007
2008        case PT_SC:
2009        if ((ecode[2] != prop->script) == (op == OP_PROP))
2010          RRETURN(MATCH_NOMATCH);
2011        break;
2012
2013        default:
2014        RRETURN(PCRE_ERROR_INTERNAL);
2015        }
2016
2017      ecode += 3;
2018      }
2019    break;
2020
2021    /* Match an extended Unicode sequence. We will get here only if the support
2022    is in the binary; otherwise a compile-time error occurs. */
2023
2024    case OP_EXTUNI:
2025    if (eptr >= md->end_subject)
2026      {
2027      SCHECK_PARTIAL();
2028      RRETURN(MATCH_NOMATCH);
2029      }
2030    GETCHARINCTEST(c, eptr);
2031      {
2032      int category = UCD_CATEGORY(c);
2033      if (category == ucp_M) RRETURN(MATCH_NOMATCH);
2034      while (eptr < md->end_subject)
2035        {
2036        int len = 1;
2037        if (!utf8) c = *eptr; else
2038          {
2039          GETCHARLEN(c, eptr, len);
2040          }
2041        category = UCD_CATEGORY(c);
2042        if (category != ucp_M) break;
2043        eptr += len;
2044        }
2045      }
2046    ecode++;
2047    break;
2048#endif
2049
2050
2051    /* Match a back reference, possibly repeatedly. Look past the end of the
2052    item to see if there is repeat information following. The code is similar
2053    to that for character classes, but repeated for efficiency. Then obey
2054    similar code to character type repeats - written out again for speed.
2055    However, if the referenced string is the empty string, always treat
2056    it as matched, any number of times (otherwise there could be infinite
2057    loops). */
2058
2059    case OP_REF:
2060      {
2061      offset = GET2(ecode, 1) << 1;               /* Doubled ref number */
2062      ecode += 3;
2063
2064      /* If the reference is unset, there are two possibilities:
2065
2066      (a) In the default, Perl-compatible state, set the length to be longer
2067      than the amount of subject left; this ensures that every attempt at a
2068      match fails. We can't just fail here, because of the possibility of
2069      quantifiers with zero minima.
2070
2071      (b) If the JavaScript compatibility flag is set, set the length to zero
2072      so that the back reference matches an empty string.
2073
2074      Otherwise, set the length to the length of what was matched by the
2075      referenced subpattern. */
2076
2077      if (offset >= offset_top || md->offset_vector[offset] < 0)
2078        length = (md->jscript_compat)? 0 : md->end_subject - eptr + 1;
2079      else
2080        length = md->offset_vector[offset+1] - md->offset_vector[offset];
2081
2082      /* Set up for repetition, or handle the non-repeated case */
2083
2084      switch (*ecode)
2085        {
2086        case OP_CRSTAR:
2087        case OP_CRMINSTAR:
2088        case OP_CRPLUS:
2089        case OP_CRMINPLUS:
2090        case OP_CRQUERY:
2091        case OP_CRMINQUERY:
2092        c = *ecode++ - OP_CRSTAR;
2093        minimize = (c & 1) != 0;
2094        min = rep_min[c];                 /* Pick up values from tables; */
2095        max = rep_max[c];                 /* zero for max => infinity */
2096        if (max == 0) max = INT_MAX;
2097        break;
2098
2099        case OP_CRRANGE:
2100        case OP_CRMINRANGE:
2101        minimize = (*ecode == OP_CRMINRANGE);
2102        min = GET2(ecode, 1);
2103        max = GET2(ecode, 3);
2104        if (max == 0) max = INT_MAX;
2105        ecode += 5;
2106        break;
2107
2108        default:               /* No repeat follows */
2109        if (!match_ref(offset, eptr, length, md, ims))
2110          {
2111          CHECK_PARTIAL();
2112          RRETURN(MATCH_NOMATCH);
2113          }
2114        eptr += length;
2115        continue;              /* With the main loop */
2116        }
2117
2118      /* If the length of the reference is zero, just continue with the
2119      main loop. */
2120
2121      if (length == 0) continue;
2122
2123      /* First, ensure the minimum number of matches are present. We get back
2124      the length of the reference string explicitly rather than passing the
2125      address of eptr, so that eptr can be a register variable. */
2126
2127      for (i = 1; i <= min; i++)
2128        {
2129        if (!match_ref(offset, eptr, length, md, ims))
2130          {
2131          CHECK_PARTIAL();
2132          RRETURN(MATCH_NOMATCH);
2133          }
2134        eptr += length;
2135        }
2136
2137      /* If min = max, continue at the same level without recursion.
2138      They are not both allowed to be zero. */
2139
2140      if (min == max) continue;
2141
2142      /* If minimizing, keep trying and advancing the pointer */
2143
2144      if (minimize)
2145        {
2146        for (fi = min;; fi++)
2147          {
2148          RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM14);
2149          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2150          if (fi >= max) RRETURN(MATCH_NOMATCH);
2151          if (!match_ref(offset, eptr, length, md, ims))
2152            {
2153            CHECK_PARTIAL();
2154            RRETURN(MATCH_NOMATCH);
2155            }
2156          eptr += length;
2157          }
2158        /* Control never gets here */
2159        }
2160
2161      /* If maximizing, find the longest string and work backwards */
2162
2163      else
2164        {
2165        pp = eptr;
2166        for (i = min; i < max; i++)
2167          {
2168          if (!match_ref(offset, eptr, length, md, ims))
2169            {
2170            CHECK_PARTIAL();
2171            break;
2172            }
2173          eptr += length;
2174          }
2175        while (eptr >= pp)
2176          {
2177          RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM15);
2178          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2179          eptr -= length;
2180          }
2181        RRETURN(MATCH_NOMATCH);
2182        }
2183      }
2184    /* Control never gets here */
2185
2186    /* Match a bit-mapped character class, possibly repeatedly. This op code is
2187    used when all the characters in the class have values in the range 0-255,
2188    and either the matching is caseful, or the characters are in the range
2189    0-127 when UTF-8 processing is enabled. The only difference between
2190    OP_CLASS and OP_NCLASS occurs when a data character outside the range is
2191    encountered.
2192
2193    First, look past the end of the item to see if there is repeat information
2194    following. Then obey similar code to character type repeats - written out
2195    again for speed. */
2196
2197    case OP_NCLASS:
2198    case OP_CLASS:
2199      {
2200      data = ecode + 1;                /* Save for matching */
2201      ecode += 33;                     /* Advance past the item */
2202
2203      switch (*ecode)
2204        {
2205        case OP_CRSTAR:
2206        case OP_CRMINSTAR:
2207        case OP_CRPLUS:
2208        case OP_CRMINPLUS:
2209        case OP_CRQUERY:
2210        case OP_CRMINQUERY:
2211        c = *ecode++ - OP_CRSTAR;
2212        minimize = (c & 1) != 0;
2213        min = rep_min[c];                 /* Pick up values from tables; */
2214        max = rep_max[c];                 /* zero for max => infinity */
2215        if (max == 0) max = INT_MAX;
2216        break;
2217
2218        case OP_CRRANGE:
2219        case OP_CRMINRANGE:
2220        minimize = (*ecode == OP_CRMINRANGE);
2221        min = GET2(ecode, 1);
2222        max = GET2(ecode, 3);
2223        if (max == 0) max = INT_MAX;
2224        ecode += 5;
2225        break;
2226
2227        default:               /* No repeat follows */
2228        min = max = 1;
2229        break;
2230        }
2231
2232      /* First, ensure the minimum number of matches are present. */
2233
2234#ifdef SUPPORT_UTF8
2235      /* UTF-8 mode */
2236      if (utf8)
2237        {
2238        for (i = 1; i <= min; i++)
2239          {
2240          if (eptr >= md->end_subject)
2241            {
2242            SCHECK_PARTIAL();
2243            RRETURN(MATCH_NOMATCH);
2244            }
2245          GETCHARINC(c, eptr);
2246          if (c > 255)
2247            {
2248            if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
2249            }
2250          else
2251            {
2252            if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
2253            }
2254          }
2255        }
2256      else
2257#endif
2258      /* Not UTF-8 mode */
2259        {
2260        for (i = 1; i <= min; i++)
2261          {
2262          if (eptr >= md->end_subject)
2263            {
2264            SCHECK_PARTIAL();
2265            RRETURN(MATCH_NOMATCH);
2266            }
2267          c = *eptr++;
2268          if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
2269          }
2270        }
2271
2272      /* If max == min we can continue with the main loop without the
2273      need to recurse. */
2274
2275      if (min == max) continue;
2276
2277      /* If minimizing, keep testing the rest of the expression and advancing
2278      the pointer while it matches the class. */
2279
2280      if (minimize)
2281        {
2282#ifdef SUPPORT_UTF8
2283        /* UTF-8 mode */
2284        if (utf8)
2285          {
2286          for (fi = min;; fi++)
2287            {
2288            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM16);
2289            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2290            if (fi >= max) RRETURN(MATCH_NOMATCH);
2291            if (eptr >= md->end_subject)
2292              {
2293              SCHECK_PARTIAL();
2294              RRETURN(MATCH_NOMATCH);
2295              }
2296            GETCHARINC(c, eptr);
2297            if (c > 255)
2298              {
2299              if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
2300              }
2301            else
2302              {
2303              if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
2304              }
2305            }
2306          }
2307        else
2308#endif
2309        /* Not UTF-8 mode */
2310          {
2311          for (fi = min;; fi++)
2312            {
2313            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM17);
2314            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2315            if (fi >= max) RRETURN(MATCH_NOMATCH);
2316            if (eptr >= md->end_subject)
2317              {
2318              SCHECK_PARTIAL();
2319              RRETURN(MATCH_NOMATCH);
2320              }
2321            c = *eptr++;
2322            if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
2323            }
2324          }
2325        /* Control never gets here */
2326        }
2327
2328      /* If maximizing, find the longest possible run, then work backwards. */
2329
2330      else
2331        {
2332        pp = eptr;
2333
2334#ifdef SUPPORT_UTF8
2335        /* UTF-8 mode */
2336        if (utf8)
2337          {
2338          for (i = min; i < max; i++)
2339            {
2340            int len = 1;
2341            if (eptr >= md->end_subject)
2342              {
2343              SCHECK_PARTIAL();
2344              break;
2345              }
2346            GETCHARLEN(c, eptr, len);
2347            if (c > 255)
2348              {
2349              if (op == OP_CLASS) break;
2350              }
2351            else
2352              {
2353              if ((data[c/8] & (1 << (c&7))) == 0) break;
2354              }
2355            eptr += len;
2356            }
2357          for (;;)
2358            {
2359            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM18);
2360            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2361            if (eptr-- == pp) break;        /* Stop if tried at original pos */
2362            BACKCHAR(eptr);
2363            }
2364          }
2365        else
2366#endif
2367          /* Not UTF-8 mode */
2368          {
2369          for (i = min; i < max; i++)
2370            {
2371            if (eptr >= md->end_subject)
2372              {
2373              SCHECK_PARTIAL();
2374              break;
2375              }
2376            c = *eptr;
2377            if ((data[c/8] & (1 << (c&7))) == 0) break;
2378            eptr++;
2379            }
2380          while (eptr >= pp)
2381            {
2382            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM19);
2383            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2384            eptr--;
2385            }
2386          }
2387
2388        RRETURN(MATCH_NOMATCH);
2389        }
2390      }
2391    /* Control never gets here */
2392
2393
2394    /* Match an extended character class. This opcode is encountered only
2395    when UTF-8 mode mode is supported. Nevertheless, we may not be in UTF-8
2396    mode, because Unicode properties are supported in non-UTF-8 mode. */
2397
2398#ifdef SUPPORT_UTF8
2399    case OP_XCLASS:
2400      {
2401      data = ecode + 1 + LINK_SIZE;                /* Save for matching */
2402      ecode += GET(ecode, 1);                      /* Advance past the item */
2403
2404      switch (*ecode)
2405        {
2406        case OP_CRSTAR:
2407        case OP_CRMINSTAR:
2408        case OP_CRPLUS:
2409        case OP_CRMINPLUS:
2410        case OP_CRQUERY:
2411        case OP_CRMINQUERY:
2412        c = *ecode++ - OP_CRSTAR;
2413        minimize = (c & 1) != 0;
2414        min = rep_min[c];                 /* Pick up values from tables; */
2415        max = rep_max[c];                 /* zero for max => infinity */
2416        if (max == 0) max = INT_MAX;
2417        break;
2418
2419        case OP_CRRANGE:
2420        case OP_CRMINRANGE:
2421        minimize = (*ecode == OP_CRMINRANGE);
2422        min = GET2(ecode, 1);
2423        max = GET2(ecode, 3);
2424        if (max == 0) max = INT_MAX;
2425        ecode += 5;
2426        break;
2427
2428        default:               /* No repeat follows */
2429        min = max = 1;
2430        break;
2431        }
2432
2433      /* First, ensure the minimum number of matches are present. */
2434
2435      for (i = 1; i <= min; i++)
2436        {
2437        if (eptr >= md->end_subject)
2438          {
2439          SCHECK_PARTIAL();
2440          RRETURN(MATCH_NOMATCH);
2441          }
2442        GETCHARINCTEST(c, eptr);
2443        if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);
2444        }
2445
2446      /* If max == min we can continue with the main loop without the
2447      need to recurse. */
2448
2449      if (min == max) continue;
2450
2451      /* If minimizing, keep testing the rest of the expression and advancing
2452      the pointer while it matches the class. */
2453
2454      if (minimize)
2455        {
2456        for (fi = min;; fi++)
2457          {
2458          RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM20);
2459          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2460          if (fi >= max) RRETURN(MATCH_NOMATCH);
2461          if (eptr >= md->end_subject)
2462            {
2463            SCHECK_PARTIAL();
2464            RRETURN(MATCH_NOMATCH);
2465            }
2466          GETCHARINCTEST(c, eptr);
2467          if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);
2468          }
2469        /* Control never gets here */
2470        }
2471
2472      /* If maximizing, find the longest possible run, then work backwards. */
2473
2474      else
2475        {
2476        pp = eptr;
2477        for (i = min; i < max; i++)
2478          {
2479          int len = 1;
2480          if (eptr >= md->end_subject)
2481            {
2482            SCHECK_PARTIAL();
2483            break;
2484            }
2485          GETCHARLENTEST(c, eptr, len);
2486          if (!_pcre_xclass(c, data)) break;
2487          eptr += len;
2488          }
2489        for(;;)
2490          {
2491          RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM21);
2492          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2493          if (eptr-- == pp) break;        /* Stop if tried at original pos */
2494          if (utf8) BACKCHAR(eptr);
2495          }
2496        RRETURN(MATCH_NOMATCH);
2497        }
2498
2499      /* Control never gets here */
2500      }
2501#endif    /* End of XCLASS */
2502
2503    /* Match a single character, casefully */
2504
2505    case OP_CHAR:
2506#ifdef SUPPORT_UTF8
2507    if (utf8)
2508      {
2509      length = 1;
2510      ecode++;
2511      GETCHARLEN(fc, ecode, length);
2512      if (length > md->end_subject - eptr)
2513        {
2514        CHECK_PARTIAL();             /* Not SCHECK_PARTIAL() */
2515        RRETURN(MATCH_NOMATCH);
2516        }
2517      while (length-- > 0) if (*ecode++ != *eptr++) RRETURN(MATCH_NOMATCH);
2518      }
2519    else
2520#endif
2521
2522    /* Non-UTF-8 mode */
2523      {
2524      if (md->end_subject - eptr < 1)
2525        {
2526        SCHECK_PARTIAL();            /* This one can use SCHECK_PARTIAL() */
2527        RRETURN(MATCH_NOMATCH);
2528        }
2529      if (ecode[1] != *eptr++) RRETURN(MATCH_NOMATCH);
2530      ecode += 2;
2531      }
2532    break;
2533
2534    /* Match a single character, caselessly */
2535
2536    case OP_CHARNC:
2537#ifdef SUPPORT_UTF8
2538    if (utf8)
2539      {
2540      length = 1;
2541      ecode++;
2542      GETCHARLEN(fc, ecode, length);
2543
2544      if (length > md->end_subject - eptr)
2545        {
2546        CHECK_PARTIAL();             /* Not SCHECK_PARTIAL() */
2547        RRETURN(MATCH_NOMATCH);
2548        }
2549
2550      /* If the pattern character's value is < 128, we have only one byte, and
2551      can use the fast lookup table. */
2552
2553      if (fc < 128)
2554        {
2555        if (md->lcc[*ecode++] != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
2556        }
2557
2558      /* Otherwise we must pick up the subject character */
2559
2560      else
2561        {
2562        unsigned int dc;
2563        GETCHARINC(dc, eptr);
2564        ecode += length;
2565
2566        /* If we have Unicode property support, we can use it to test the other
2567        case of the character, if there is one. */
2568
2569        if (fc != dc)
2570          {
2571#ifdef SUPPORT_UCP
2572          if (dc != UCD_OTHERCASE(fc))
2573#endif
2574            RRETURN(MATCH_NOMATCH);
2575          }
2576        }
2577      }
2578    else
2579#endif   /* SUPPORT_UTF8 */
2580
2581    /* Non-UTF-8 mode */
2582      {
2583      if (md->end_subject - eptr < 1)
2584        {
2585        SCHECK_PARTIAL();            /* This one can use SCHECK_PARTIAL() */
2586        RRETURN(MATCH_NOMATCH);
2587        }
2588      if (md->lcc[ecode[1]] != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
2589      ecode += 2;
2590      }
2591    break;
2592
2593    /* Match a single character repeatedly. */
2594
2595    case OP_EXACT:
2596    min = max = GET2(ecode, 1);
2597    ecode += 3;
2598    goto REPEATCHAR;
2599
2600    case OP_POSUPTO:
2601    possessive = TRUE;
2602    /* Fall through */
2603
2604    case OP_UPTO:
2605    case OP_MINUPTO:
2606    min = 0;
2607    max = GET2(ecode, 1);
2608    minimize = *ecode == OP_MINUPTO;
2609    ecode += 3;
2610    goto REPEATCHAR;
2611
2612    case OP_POSSTAR:
2613    possessive = TRUE;
2614    min = 0;
2615    max = INT_MAX;
2616    ecode++;
2617    goto REPEATCHAR;
2618
2619    case OP_POSPLUS:
2620    possessive = TRUE;
2621    min = 1;
2622    max = INT_MAX;
2623    ecode++;
2624    goto REPEATCHAR;
2625
2626    case OP_POSQUERY:
2627    possessive = TRUE;
2628    min = 0;
2629    max = 1;
2630    ecode++;
2631    goto REPEATCHAR;
2632
2633    case OP_STAR:
2634    case OP_MINSTAR:
2635    case OP_PLUS:
2636    case OP_MINPLUS:
2637    case OP_QUERY:
2638    case OP_MINQUERY:
2639    c = *ecode++ - OP_STAR;
2640    minimize = (c & 1) != 0;
2641
2642    min = rep_min[c];                 /* Pick up values from tables; */
2643    max = rep_max[c];                 /* zero for max => infinity */
2644    if (max == 0) max = INT_MAX;
2645
2646    /* Common code for all repeated single-character matches. */
2647
2648    REPEATCHAR:
2649#ifdef SUPPORT_UTF8
2650    if (utf8)
2651      {
2652      length = 1;
2653      charptr = ecode;
2654      GETCHARLEN(fc, ecode, length);
2655      ecode += length;
2656
2657      /* Handle multibyte character matching specially here. There is
2658      support for caseless matching if UCP support is present. */
2659
2660      if (length > 1)
2661        {
2662#ifdef SUPPORT_UCP
2663        unsigned int othercase;
2664        if ((ims & PCRE_CASELESS) != 0 &&
2665            (othercase = UCD_OTHERCASE(fc)) != fc)
2666          oclength = _pcre_ord2utf8(othercase, occhars);
2667        else oclength = 0;
2668#endif  /* SUPPORT_UCP */
2669
2670        for (i = 1; i <= min; i++)
2671          {
2672          if (eptr <= md->end_subject - length &&
2673            memcmp(eptr, charptr, length) == 0) eptr += length;
2674#ifdef SUPPORT_UCP
2675          else if (oclength > 0 &&
2676                   eptr <= md->end_subject - oclength &&
2677                   memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
2678#endif  /* SUPPORT_UCP */
2679          else
2680            {
2681            CHECK_PARTIAL();
2682            RRETURN(MATCH_NOMATCH);
2683            }
2684          }
2685
2686        if (min == max) continue;
2687
2688        if (minimize)
2689          {
2690          for (fi = min;; fi++)
2691            {
2692            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM22);
2693            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2694            if (fi >= max) RRETURN(MATCH_NOMATCH);
2695            if (eptr <= md->end_subject - length &&
2696              memcmp(eptr, charptr, length) == 0) eptr += length;
2697#ifdef SUPPORT_UCP
2698            else if (oclength > 0 &&
2699                     eptr <= md->end_subject - oclength &&
2700                     memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
2701#endif  /* SUPPORT_UCP */
2702            else
2703              {
2704              CHECK_PARTIAL();
2705              RRETURN(MATCH_NOMATCH);
2706              }
2707            }
2708          /* Control never gets here */
2709          }
2710
2711        else  /* Maximize */
2712          {
2713          pp = eptr;
2714          for (i = min; i < max; i++)
2715            {
2716            if (eptr <= md->end_subject - length &&
2717                memcmp(eptr, charptr, length) == 0) eptr += length;
2718#ifdef SUPPORT_UCP
2719            else if (oclength > 0 &&
2720                     eptr <= md->end_subject - oclength &&
2721                     memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
2722#endif  /* SUPPORT_UCP */
2723            else
2724              {
2725              CHECK_PARTIAL();
2726              break;
2727              }
2728            }
2729
2730          if (possessive) continue;
2731
2732          for(;;)
2733            {
2734            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM23);
2735            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2736            if (eptr == pp) { RRETURN(MATCH_NOMATCH); }
2737#ifdef SUPPORT_UCP
2738            eptr--;
2739            BACKCHAR(eptr);
2740#else   /* without SUPPORT_UCP */
2741            eptr -= length;
2742#endif  /* SUPPORT_UCP */
2743            }
2744          }
2745        /* Control never gets here */
2746        }
2747
2748      /* If the length of a UTF-8 character is 1, we fall through here, and
2749      obey the code as for non-UTF-8 characters below, though in this case the
2750      value of fc will always be < 128. */
2751      }
2752    else
2753#endif  /* SUPPORT_UTF8 */
2754
2755    /* When not in UTF-8 mode, load a single-byte character. */
2756
2757    fc = *ecode++;
2758
2759    /* The value of fc at this point is always less than 256, though we may or
2760    may not be in UTF-8 mode. The code is duplicated for the caseless and
2761    caseful cases, for speed, since matching characters is likely to be quite
2762    common. First, ensure the minimum number of matches are present. If min =
2763    max, continue at the same level without recursing. Otherwise, if
2764    minimizing, keep trying the rest of the expression and advancing one
2765    matching character if failing, up to the maximum. Alternatively, if
2766    maximizing, find the maximum number of characters and work backwards. */
2767
2768    DPRINTF(("matching %c{%d,%d} against subject %.*s\n", fc, min, max,
2769      max, eptr));
2770
2771    if ((ims & PCRE_CASELESS) != 0)
2772      {
2773      fc = md->lcc[fc];
2774      for (i = 1; i <= min; i++)
2775        {
2776        if (eptr >= md->end_subject)
2777          {
2778          SCHECK_PARTIAL();
2779          RRETURN(MATCH_NOMATCH);
2780          }
2781        if (fc != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
2782        }
2783      if (min == max) continue;
2784      if (minimize)
2785        {
2786        for (fi = min;; fi++)
2787          {
2788          RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM24);
2789          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2790          if (fi >= max) RRETURN(MATCH_NOMATCH);
2791          if (eptr >= md->end_subject)
2792            {
2793            SCHECK_PARTIAL();
2794            RRETURN(MATCH_NOMATCH);
2795            }
2796          if (fc != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
2797          }
2798        /* Control never gets here */
2799        }
2800      else  /* Maximize */
2801        {
2802        pp = eptr;
2803        for (i = min; i < max; i++)
2804          {
2805          if (eptr >= md->end_subject)
2806            {
2807            SCHECK_PARTIAL();
2808            break;
2809            }
2810          if (fc != md->lcc[*eptr]) break;
2811          eptr++;
2812          }
2813
2814        if (possessive) continue;
2815
2816        while (eptr >= pp)
2817          {
2818          RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM25);
2819          eptr--;
2820          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2821          }
2822        RRETURN(MATCH_NOMATCH);
2823        }
2824      /* Control never gets here */
2825      }
2826
2827    /* Caseful comparisons (includes all multi-byte characters) */
2828
2829    else
2830      {
2831      for (i = 1; i <= min; i++)
2832        {
2833        if (eptr >= md->end_subject)
2834          {
2835          SCHECK_PARTIAL();
2836          RRETURN(MATCH_NOMATCH);
2837          }
2838        if (fc != *eptr++) RRETURN(MATCH_NOMATCH);
2839        }
2840
2841      if (min == max) continue;
2842
2843      if (minimize)
2844        {
2845        for (fi = min;; fi++)
2846          {
2847          RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM26);
2848          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2849          if (fi >= max) RRETURN(MATCH_NOMATCH);
2850          if (eptr >= md->end_subject)
2851            {
2852            SCHECK_PARTIAL();
2853            RRETURN(MATCH_NOMATCH);
2854            }
2855          if (fc != *eptr++) RRETURN(MATCH_NOMATCH);
2856          }
2857        /* Control never gets here */
2858        }
2859      else  /* Maximize */
2860        {
2861        pp = eptr;
2862        for (i = min; i < max; i++)
2863          {
2864          if (eptr >= md->end_subject)
2865            {
2866            SCHECK_PARTIAL();
2867            break;
2868            }
2869          if (fc != *eptr) break;
2870          eptr++;
2871          }
2872        if (possessive) continue;
2873
2874        while (eptr >= pp)
2875          {
2876          RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM27);
2877          eptr--;
2878          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2879          }
2880        RRETURN(MATCH_NOMATCH);
2881        }
2882      }
2883    /* Control never gets here */
2884
2885    /* Match a negated single one-byte character. The character we are
2886    checking can be multibyte. */
2887
2888    case OP_NOT:
2889    if (eptr >= md->end_subject)
2890      {
2891      SCHECK_PARTIAL();
2892      RRETURN(MATCH_NOMATCH);
2893      }
2894    ecode++;
2895    GETCHARINCTEST(c, eptr);
2896    if ((ims & PCRE_CASELESS) != 0)
2897      {
2898#ifdef SUPPORT_UTF8
2899      if (c < 256)
2900#endif
2901      c = md->lcc[c];
2902      if (md->lcc[*ecode++] == c) RRETURN(MATCH_NOMATCH);
2903      }
2904    else
2905      {
2906      if (*ecode++ == c) RRETURN(MATCH_NOMATCH);
2907      }
2908    break;
2909
2910    /* Match a negated single one-byte character repeatedly. This is almost a
2911    repeat of the code for a repeated single character, but I haven't found a
2912    nice way of commoning these up that doesn't require a test of the
2913    positive/negative option for each character match. Maybe that wouldn't add
2914    very much to the time taken, but character matching *is* what this is all
2915    about... */
2916
2917    case OP_NOTEXACT:
2918    min = max = GET2(ecode, 1);
2919    ecode += 3;
2920    goto REPEATNOTCHAR;
2921
2922    case OP_NOTUPTO:
2923    case OP_NOTMINUPTO:
2924    min = 0;
2925    max = GET2(ecode, 1);
2926    minimize = *ecode == OP_NOTMINUPTO;
2927    ecode += 3;
2928    goto REPEATNOTCHAR;
2929
2930    case OP_NOTPOSSTAR:
2931    possessive = TRUE;
2932    min = 0;
2933    max = INT_MAX;
2934    ecode++;
2935    goto REPEATNOTCHAR;
2936
2937    case OP_NOTPOSPLUS:
2938    possessive = TRUE;
2939    min = 1;
2940    max = INT_MAX;
2941    ecode++;
2942    goto REPEATNOTCHAR;
2943
2944    case OP_NOTPOSQUERY:
2945    possessive = TRUE;
2946    min = 0;
2947    max = 1;
2948    ecode++;
2949    goto REPEATNOTCHAR;
2950
2951    case OP_NOTPOSUPTO:
2952    possessive = TRUE;
2953    min = 0;
2954    max = GET2(ecode, 1);
2955    ecode += 3;
2956    goto REPEATNOTCHAR;
2957
2958    case OP_NOTSTAR:
2959    case OP_NOTMINSTAR:
2960    case OP_NOTPLUS:
2961    case OP_NOTMINPLUS:
2962    case OP_NOTQUERY:
2963    case OP_NOTMINQUERY:
2964    c = *ecode++ - OP_NOTSTAR;
2965    minimize = (c & 1) != 0;
2966    min = rep_min[c];                 /* Pick up values from tables; */
2967    max = rep_max[c];                 /* zero for max => infinity */
2968    if (max == 0) max = INT_MAX;
2969
2970    /* Common code for all repeated single-byte matches. */
2971
2972    REPEATNOTCHAR:
2973    fc = *ecode++;
2974
2975    /* The code is duplicated for the caseless and caseful cases, for speed,
2976    since matching characters is likely to be quite common. First, ensure the
2977    minimum number of matches are present. If min = max, continue at the same
2978    level without recursing. Otherwise, if minimizing, keep trying the rest of
2979    the expression and advancing one matching character if failing, up to the
2980    maximum. Alternatively, if maximizing, find the maximum number of
2981    characters and work backwards. */
2982
2983    DPRINTF(("negative matching %c{%d,%d} against subject %.*s\n", fc, min, max,
2984      max, eptr));
2985
2986    if ((ims & PCRE_CASELESS) != 0)
2987      {
2988      fc = md->lcc[fc];
2989
2990#ifdef SUPPORT_UTF8
2991      /* UTF-8 mode */
2992      if (utf8)
2993        {
2994        register unsigned int d;
2995        for (i = 1; i <= min; i++)
2996          {
2997          if (eptr >= md->end_subject)
2998            {
2999            SCHECK_PARTIAL();
3000            RRETURN(MATCH_NOMATCH);
3001            }
3002          GETCHARINC(d, eptr);
3003          if (d < 256) d = md->lcc[d];
3004          if (fc == d) RRETURN(MATCH_NOMATCH);
3005          }
3006        }
3007      else
3008#endif
3009
3010      /* Not UTF-8 mode */
3011        {
3012        for (i = 1; i <= min; i++)
3013          {
3014          if (eptr >= md->end_subject)
3015            {
3016            SCHECK_PARTIAL();
3017            RRETURN(MATCH_NOMATCH);
3018            }
3019          if (fc == md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
3020          }
3021        }
3022
3023      if (min == max) continue;
3024
3025      if (minimize)
3026        {
3027#ifdef SUPPORT_UTF8
3028        /* UTF-8 mode */
3029        if (utf8)
3030          {
3031          register unsigned int d;
3032          for (fi = min;; fi++)
3033            {
3034            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM28);
3035            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3036            if (fi >= max) RRETURN(MATCH_NOMATCH);
3037            if (eptr >= md->end_subject)
3038              {
3039              SCHECK_PARTIAL();
3040              RRETURN(MATCH_NOMATCH);
3041              }
3042            GETCHARINC(d, eptr);
3043            if (d < 256) d = md->lcc[d];
3044            if (fc == d) RRETURN(MATCH_NOMATCH);
3045            }
3046          }
3047        else
3048#endif
3049        /* Not UTF-8 mode */
3050          {
3051          for (fi = min;; fi++)
3052            {
3053            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM29);
3054            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3055            if (fi >= max) RRETURN(MATCH_NOMATCH);
3056            if (eptr >= md->end_subject)
3057              {
3058              SCHECK_PARTIAL();
3059              RRETURN(MATCH_NOMATCH);
3060              }
3061            if (fc == md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
3062            }
3063          }
3064        /* Control never gets here */
3065        }
3066
3067      /* Maximize case */
3068
3069      else
3070        {
3071        pp = eptr;
3072
3073#ifdef SUPPORT_UTF8
3074        /* UTF-8 mode */
3075        if (utf8)
3076          {
3077          register unsigned int d;
3078          for (i = min; i < max; i++)
3079            {
3080            int len = 1;
3081            if (eptr >= md->end_subject)
3082              {
3083              SCHECK_PARTIAL();
3084              break;
3085              }
3086            GETCHARLEN(d, eptr, len);
3087            if (d < 256) d = md->lcc[d];
3088            if (fc == d) break;
3089            eptr += len;
3090            }
3091        if (possessive) continue;
3092        for(;;)
3093            {
3094            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM30);
3095            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3096            if (eptr-- == pp) break;        /* Stop if tried at original pos */
3097            BACKCHAR(eptr);
3098            }
3099          }
3100        else
3101#endif
3102        /* Not UTF-8 mode */
3103          {
3104          for (i = min; i < max; i++)
3105            {
3106            if (eptr >= md->end_subject)
3107              {
3108              SCHECK_PARTIAL();
3109              break;
3110              }
3111            if (fc == md->lcc[*eptr]) break;
3112            eptr++;
3113            }
3114          if (possessive) continue;
3115          while (eptr >= pp)
3116            {
3117            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM31);
3118            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3119            eptr--;
3120            }
3121          }
3122
3123        RRETURN(MATCH_NOMATCH);
3124        }
3125      /* Control never gets here */
3126      }
3127
3128    /* Caseful comparisons */
3129
3130    else
3131      {
3132#ifdef SUPPORT_UTF8
3133      /* UTF-8 mode */
3134      if (utf8)
3135        {
3136        register unsigned int d;
3137        for (i = 1; i <= min; i++)
3138          {
3139          if (eptr >= md->end_subject)
3140            {
3141            SCHECK_PARTIAL();
3142            RRETURN(MATCH_NOMATCH);
3143            }
3144          GETCHARINC(d, eptr);
3145          if (fc == d) RRETURN(MATCH_NOMATCH);
3146          }
3147        }
3148      else
3149#endif
3150      /* Not UTF-8 mode */
3151        {
3152        for (i = 1; i <= min; i++)
3153          {
3154          if (eptr >= md->end_subject)
3155            {
3156            SCHECK_PARTIAL();
3157            RRETURN(MATCH_NOMATCH);
3158            }
3159          if (fc == *eptr++) RRETURN(MATCH_NOMATCH);
3160          }
3161        }
3162
3163      if (min == max) continue;
3164
3165      if (minimize)
3166        {
3167#ifdef SUPPORT_UTF8
3168        /* UTF-8 mode */
3169        if (utf8)
3170          {
3171          register unsigned int d;
3172          for (fi = min;; fi++)
3173            {
3174            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM32);
3175            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3176            if (fi >= max) RRETURN(MATCH_NOMATCH);
3177            if (eptr >= md->end_subject)
3178              {
3179              SCHECK_PARTIAL();
3180              RRETURN(MATCH_NOMATCH);
3181              }
3182            GETCHARINC(d, eptr);
3183            if (fc == d) RRETURN(MATCH_NOMATCH);
3184            }
3185          }
3186        else
3187#endif
3188        /* Not UTF-8 mode */
3189          {
3190          for (fi = min;; fi++)
3191            {
3192            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM33);
3193            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3194            if (fi >= max) RRETURN(MATCH_NOMATCH);
3195            if (eptr >= md->end_subject)
3196              {
3197              SCHECK_PARTIAL();
3198              RRETURN(MATCH_NOMATCH);
3199              }
3200            if (fc == *eptr++) RRETURN(MATCH_NOMATCH);
3201            }
3202          }
3203        /* Control never gets here */
3204        }
3205
3206      /* Maximize case */
3207
3208      else
3209        {
3210        pp = eptr;
3211
3212#ifdef SUPPORT_UTF8
3213        /* UTF-8 mode */
3214        if (utf8)
3215          {
3216          register unsigned int d;
3217          for (i = min; i < max; i++)
3218            {
3219            int len = 1;
3220            if (eptr >= md->end_subject)
3221              {
3222              SCHECK_PARTIAL();
3223              break;
3224              }
3225            GETCHARLEN(d, eptr, len);
3226            if (fc == d) break;
3227            eptr += len;
3228            }
3229          if (possessive) continue;
3230          for(;;)
3231            {
3232            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM34);
3233            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3234            if (eptr-- == pp) break;        /* Stop if tried at original pos */
3235            BACKCHAR(eptr);
3236            }
3237          }
3238        else
3239#endif
3240        /* Not UTF-8 mode */
3241          {
3242          for (i = min; i < max; i++)
3243            {
3244            if (eptr >= md->end_subject)
3245              {
3246              SCHECK_PARTIAL();
3247              break;
3248              }
3249            if (fc == *eptr) break;
3250            eptr++;
3251            }
3252          if (possessive) continue;
3253          while (eptr >= pp)
3254            {
3255            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM35);
3256            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3257            eptr--;
3258            }
3259          }
3260
3261        RRETURN(MATCH_NOMATCH);
3262        }
3263      }
3264    /* Control never gets here */
3265
3266    /* Match a single character type repeatedly; several different opcodes
3267    share code. This is very similar to the code for single characters, but we
3268    repeat it in the interests of efficiency. */
3269
3270    case OP_TYPEEXACT:
3271    min = max = GET2(ecode, 1);
3272    minimize = TRUE;
3273    ecode += 3;
3274    goto REPEATTYPE;
3275
3276    case OP_TYPEUPTO:
3277    case OP_TYPEMINUPTO:
3278    min = 0;
3279    max = GET2(ecode, 1);
3280    minimize = *ecode == OP_TYPEMINUPTO;
3281    ecode += 3;
3282    goto REPEATTYPE;
3283
3284    case OP_TYPEPOSSTAR:
3285    possessive = TRUE;
3286    min = 0;
3287    max = INT_MAX;
3288    ecode++;
3289    goto REPEATTYPE;
3290
3291    case OP_TYPEPOSPLUS:
3292    possessive = TRUE;
3293    min = 1;
3294    max = INT_MAX;
3295    ecode++;
3296    goto REPEATTYPE;
3297
3298    case OP_TYPEPOSQUERY:
3299    possessive = TRUE;
3300    min = 0;
3301    max = 1;
3302    ecode++;
3303    goto REPEATTYPE;
3304
3305    case OP_TYPEPOSUPTO:
3306    possessive = TRUE;
3307    min = 0;
3308    max = GET2(ecode, 1);
3309    ecode += 3;
3310    goto REPEATTYPE;
3311
3312    case OP_TYPESTAR:
3313    case OP_TYPEMINSTAR:
3314    case OP_TYPEPLUS:
3315    case OP_TYPEMINPLUS:
3316    case OP_TYPEQUERY:
3317    case OP_TYPEMINQUERY:
3318    c = *ecode++ - OP_TYPESTAR;
3319    minimize = (c & 1) != 0;
3320    min = rep_min[c];                 /* Pick up values from tables; */
3321    max = rep_max[c];                 /* zero for max => infinity */
3322    if (max == 0) max = INT_MAX;
3323
3324    /* Common code for all repeated single character type matches. Note that
3325    in UTF-8 mode, '.' matches a character of any length, but for the other
3326    character types, the valid characters are all one-byte long. */
3327
3328    REPEATTYPE:
3329    ctype = *ecode++;      /* Code for the character type */
3330
3331#ifdef SUPPORT_UCP
3332    if (ctype == OP_PROP || ctype == OP_NOTPROP)
3333      {
3334      prop_fail_result = ctype == OP_NOTPROP;
3335      prop_type = *ecode++;
3336      prop_value = *ecode++;
3337      }
3338    else prop_type = -1;
3339#endif
3340
3341    /* First, ensure the minimum number of matches are present. Use inline
3342    code for maximizing the speed, and do the type test once at the start
3343    (i.e. keep it out of the loop). Separate the UTF-8 code completely as that
3344    is tidier. Also separate the UCP code, which can be the same for both UTF-8
3345    and single-bytes. */
3346
3347    if (min > 0)
3348      {
3349#ifdef SUPPORT_UCP
3350      if (prop_type >= 0)
3351        {
3352        switch(prop_type)
3353          {
3354          case PT_ANY:
3355          if (prop_fail_result) RRETURN(MATCH_NOMATCH);
3356          for (i = 1; i <= min; i++)
3357            {
3358            if (eptr >= md->end_subject)
3359              {
3360              SCHECK_PARTIAL();
3361              RRETURN(MATCH_NOMATCH);
3362              }
3363            GETCHARINCTEST(c, eptr);
3364            }
3365          break;
3366
3367          case PT_LAMP:
3368          for (i = 1; i <= min; i++)
3369            {
3370            if (eptr >= md->end_subject)
3371              {
3372              SCHECK_PARTIAL();
3373              RRETURN(MATCH_NOMATCH);
3374              }
3375            GETCHARINCTEST(c, eptr);
3376            prop_chartype = UCD_CHARTYPE(c);
3377            if ((prop_chartype == ucp_Lu ||
3378                 prop_chartype == ucp_Ll ||
3379                 prop_chartype == ucp_Lt) == prop_fail_result)
3380              RRETURN(MATCH_NOMATCH);
3381            }
3382          break;
3383
3384          case PT_GC:
3385          for (i = 1; i <= min; i++)
3386            {
3387            if (eptr >= md->end_subject)
3388              {
3389              SCHECK_PARTIAL();
3390              RRETURN(MATCH_NOMATCH);
3391              }
3392            GETCHARINCTEST(c, eptr);
3393            prop_category = UCD_CATEGORY(c);
3394            if ((prop_category == prop_value) == prop_fail_result)
3395              RRETURN(MATCH_NOMATCH);
3396            }
3397          break;
3398
3399          case PT_PC:
3400          for (i = 1; i <= min; i++)
3401            {
3402            if (eptr >= md->end_subject)
3403              {
3404              SCHECK_PARTIAL();
3405              RRETURN(MATCH_NOMATCH);
3406              }
3407            GETCHARINCTEST(c, eptr);
3408            prop_chartype = UCD_CHARTYPE(c);
3409            if ((prop_chartype == prop_value) == prop_fail_result)
3410              RRETURN(MATCH_NOMATCH);
3411            }
3412          break;
3413
3414          case PT_SC:
3415          for (i = 1; i <= min; i++)
3416            {
3417            if (eptr >= md->end_subject)
3418              {
3419              SCHECK_PARTIAL();
3420              RRETURN(MATCH_NOMATCH);
3421              }
3422            GETCHARINCTEST(c, eptr);
3423            prop_script = UCD_SCRIPT(c);
3424            if ((prop_script == prop_value) == prop_fail_result)
3425              RRETURN(MATCH_NOMATCH);
3426            }
3427          break;
3428
3429          default:
3430          RRETURN(PCRE_ERROR_INTERNAL);
3431          }
3432        }
3433
3434      /* Match extended Unicode sequences. We will get here only if the
3435      support is in the binary; otherwise a compile-time error occurs. */
3436
3437      else if (ctype == OP_EXTUNI)
3438        {
3439        for (i = 1; i <= min; i++)
3440          {
3441          if (eptr >= md->end_subject)
3442            {
3443            SCHECK_PARTIAL();
3444            RRETURN(MATCH_NOMATCH);
3445            }
3446          GETCHARINCTEST(c, eptr);
3447          prop_category = UCD_CATEGORY(c);
3448          if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);
3449          while (eptr < md->end_subject)
3450            {
3451            int len = 1;
3452            if (!utf8) c = *eptr;
3453              else { GETCHARLEN(c, eptr, len); }
3454            prop_category = UCD_CATEGORY(c);
3455            if (prop_category != ucp_M) break;
3456            eptr += len;
3457            }
3458          }
3459        }
3460
3461      else
3462#endif     /* SUPPORT_UCP */
3463
3464/* Handle all other cases when the coding is UTF-8 */
3465
3466#ifdef SUPPORT_UTF8
3467      if (utf8) switch(ctype)
3468        {
3469        case OP_ANY:
3470        for (i = 1; i <= min; i++)
3471          {
3472          if (eptr >= md->end_subject)
3473            {
3474            SCHECK_PARTIAL();
3475            RRETURN(MATCH_NOMATCH);
3476            }
3477          if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
3478          eptr++;
3479          while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3480          }
3481        break;
3482
3483        case OP_ALLANY:
3484        for (i = 1; i <= min; i++)
3485          {
3486          if (eptr >= md->end_subject)
3487            {
3488            SCHECK_PARTIAL();
3489            RRETURN(MATCH_NOMATCH);
3490            }
3491          eptr++;
3492          while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3493          }
3494        break;
3495
3496        case OP_ANYBYTE:
3497        if (eptr > md->end_subject - min) RRETURN(MATCH_NOMATCH);
3498        eptr += min;
3499        break;
3500
3501        case OP_ANYNL:
3502        for (i = 1; i <= min; i++)
3503          {
3504          if (eptr >= md->end_subject)
3505            {
3506            SCHECK_PARTIAL();
3507            RRETURN(MATCH_NOMATCH);
3508            }
3509          GETCHARINC(c, eptr);
3510          switch(c)
3511            {
3512            default: RRETURN(MATCH_NOMATCH);
3513            case 0x000d:
3514            if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
3515            break;
3516
3517            case 0x000a:
3518            break;
3519
3520            case 0x000b:
3521            case 0x000c:
3522            case 0x0085:
3523            case 0x2028:
3524            case 0x2029:
3525            if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
3526            break;
3527            }
3528          }
3529        break;
3530
3531        case OP_NOT_HSPACE:
3532        for (i = 1; i <= min; i++)
3533          {
3534          if (eptr >= md->end_subject)
3535            {
3536            SCHECK_PARTIAL();
3537            RRETURN(MATCH_NOMATCH);
3538            }
3539          GETCHARINC(c, eptr);
3540          switch(c)
3541            {
3542            default: break;
3543            case 0x09:      /* HT */
3544            case 0x20:      /* SPACE */
3545            case 0xa0:      /* NBSP */
3546            case 0x1680:    /* OGHAM SPACE MARK */
3547            case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
3548            case 0x2000:    /* EN QUAD */
3549            case 0x2001:    /* EM QUAD */
3550            case 0x2002:    /* EN SPACE */
3551            case 0x2003:    /* EM SPACE */
3552            case 0x2004:    /* THREE-PER-EM SPACE */
3553            case 0x2005:    /* FOUR-PER-EM SPACE */
3554            case 0x2006:    /* SIX-PER-EM SPACE */
3555            case 0x2007:    /* FIGURE SPACE */
3556            case 0x2008:    /* PUNCTUATION SPACE */
3557            case 0x2009:    /* THIN SPACE */
3558            case 0x200A:    /* HAIR SPACE */
3559            case 0x202f:    /* NARROW NO-BREAK SPACE */
3560            case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
3561            case 0x3000:    /* IDEOGRAPHIC SPACE */
3562            RRETURN(MATCH_NOMATCH);
3563            }
3564          }
3565        break;
3566
3567        case OP_HSPACE:
3568        for (i = 1; i <= min; i++)
3569          {
3570          if (eptr >= md->end_subject)
3571            {
3572            SCHECK_PARTIAL();
3573            RRETURN(MATCH_NOMATCH);
3574            }
3575          GETCHARINC(c, eptr);
3576          switch(c)
3577            {
3578            default: RRETURN(MATCH_NOMATCH);
3579            case 0x09:      /* HT */
3580            case 0x20:      /* SPACE */
3581            case 0xa0:      /* NBSP */
3582            case 0x1680:    /* OGHAM SPACE MARK */
3583            case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
3584            case 0x2000:    /* EN QUAD */
3585            case 0x2001:    /* EM QUAD */
3586            case 0x2002:    /* EN SPACE */
3587            case 0x2003:    /* EM SPACE */
3588            case 0x2004:    /* THREE-PER-EM SPACE */
3589            case 0x2005:    /* FOUR-PER-EM SPACE */
3590            case 0x2006:    /* SIX-PER-EM SPACE */
3591            case 0x2007:    /* FIGURE SPACE */
3592            case 0x2008:    /* PUNCTUATION SPACE */
3593            case 0x2009:    /* THIN SPACE */
3594            case 0x200A:    /* HAIR SPACE */
3595            case 0x202f:    /* NARROW NO-BREAK SPACE */
3596            case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
3597            case 0x3000:    /* IDEOGRAPHIC SPACE */
3598            break;
3599            }
3600          }
3601        break;
3602
3603        case OP_NOT_VSPACE:
3604        for (i = 1; i <= min; i++)
3605          {
3606          if (eptr >= md->end_subject)
3607            {
3608            SCHECK_PARTIAL();
3609            RRETURN(MATCH_NOMATCH);
3610            }
3611          GETCHARINC(c, eptr);
3612          switch(c)
3613            {
3614            default: break;
3615            case 0x0a:      /* LF */
3616            case 0x0b:      /* VT */
3617            case 0x0c:      /* FF */
3618            case 0x0d:      /* CR */
3619            case 0x85:      /* NEL */
3620            case 0x2028:    /* LINE SEPARATOR */
3621            case 0x2029:    /* PARAGRAPH SEPARATOR */
3622            RRETURN(MATCH_NOMATCH);
3623            }
3624          }
3625        break;
3626
3627        case OP_VSPACE:
3628        for (i = 1; i <= min; i++)
3629          {
3630          if (eptr >= md->end_subject)
3631            {
3632            SCHECK_PARTIAL();
3633            RRETURN(MATCH_NOMATCH);
3634            }
3635          GETCHARINC(c, eptr);
3636          switch(c)
3637            {
3638            default: RRETURN(MATCH_NOMATCH);
3639            case 0x0a:      /* LF */
3640            case 0x0b:      /* VT */
3641            case 0x0c:      /* FF */
3642            case 0x0d:      /* CR */
3643            case 0x85:      /* NEL */
3644            case 0x2028:    /* LINE SEPARATOR */
3645            case 0x2029:    /* PARAGRAPH SEPARATOR */
3646            break;
3647            }
3648          }
3649        break;
3650
3651        case OP_NOT_DIGIT:
3652        for (i = 1; i <= min; i++)
3653          {
3654          if (eptr >= md->end_subject)
3655            {
3656            SCHECK_PARTIAL();
3657            RRETURN(MATCH_NOMATCH);
3658            }
3659          GETCHARINC(c, eptr);
3660          if (c < 128 && (md->ctypes[c] & ctype_digit) != 0)
3661            RRETURN(MATCH_NOMATCH);
3662          }
3663        break;
3664
3665        case OP_DIGIT:
3666        for (i = 1; i <= min; i++)
3667          {
3668          if (eptr >= md->end_subject)
3669            {
3670            SCHECK_PARTIAL();
3671            RRETURN(MATCH_NOMATCH);
3672            }
3673          if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_digit) == 0)
3674            RRETURN(MATCH_NOMATCH);
3675          /* No need to skip more bytes - we know it's a 1-byte character */
3676          }
3677        break;
3678
3679        case OP_NOT_WHITESPACE:
3680        for (i = 1; i <= min; i++)
3681          {
3682          if (eptr >= md->end_subject)
3683            {
3684            SCHECK_PARTIAL();
3685            RRETURN(MATCH_NOMATCH);
3686            }
3687          if (*eptr < 128 && (md->ctypes[*eptr] & ctype_space) != 0)
3688            RRETURN(MATCH_NOMATCH);
3689          while (++eptr < md->end_subject && (*eptr & 0xc0) == 0x80);
3690          }
3691        break;
3692
3693        case OP_WHITESPACE:
3694        for (i = 1; i <= min; i++)
3695          {
3696          if (eptr >= md->end_subject)
3697            {
3698            SCHECK_PARTIAL();
3699            RRETURN(MATCH_NOMATCH);
3700            }
3701          if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_space) == 0)
3702            RRETURN(MATCH_NOMATCH);
3703          /* No need to skip more bytes - we know it's a 1-byte character */
3704          }
3705        break;
3706
3707        case OP_NOT_WORDCHAR:
3708        for (i = 1; i <= min; i++)
3709          {
3710          if (eptr >= md->end_subject)
3711            {
3712            SCHECK_PARTIAL();
3713            RRETURN(MATCH_NOMATCH);
3714            }
3715          if (*eptr < 128 && (md->ctypes[*eptr] & ctype_word) != 0)
3716            RRETURN(MATCH_NOMATCH);
3717          while (++eptr < md->end_subject && (*eptr & 0xc0) == 0x80);
3718          }
3719        break;
3720
3721        case OP_WORDCHAR:
3722        for (i = 1; i <= min; i++)
3723          {
3724          if (eptr >= md->end_subject)
3725            {
3726            SCHECK_PARTIAL();
3727            RRETURN(MATCH_NOMATCH);
3728            }
3729          if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_word) == 0)
3730            RRETURN(MATCH_NOMATCH);
3731          /* No need to skip more bytes - we know it's a 1-byte character */
3732          }
3733        break;
3734
3735        default:
3736        RRETURN(PCRE_ERROR_INTERNAL);
3737        }  /* End switch(ctype) */
3738
3739      else
3740#endif     /* SUPPORT_UTF8 */
3741
3742      /* Code for the non-UTF-8 case for minimum matching of operators other
3743      than OP_PROP and OP_NOTPROP. */
3744
3745      switch(ctype)
3746        {
3747        case OP_ANY:
3748        for (i = 1; i <= min; i++)
3749          {
3750          if (eptr >= md->end_subject)
3751            {
3752            SCHECK_PARTIAL();
3753            RRETURN(MATCH_NOMATCH);
3754            }
3755          if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
3756          eptr++;
3757          }
3758        break;
3759
3760        case OP_ALLANY:
3761        if (eptr > md->end_subject - min)
3762          {
3763          SCHECK_PARTIAL();
3764          RRETURN(MATCH_NOMATCH);
3765          }
3766        eptr += min;
3767        break;
3768
3769        case OP_ANYBYTE:
3770        if (eptr > md->end_subject - min)
3771          {
3772          SCHECK_PARTIAL();
3773          RRETURN(MATCH_NOMATCH);
3774          }
3775        eptr += min;
3776        break;
3777
3778        case OP_ANYNL:
3779        for (i = 1; i <= min; i++)
3780          {
3781          if (eptr >= md->end_subject)
3782            {
3783            SCHECK_PARTIAL();
3784            RRETURN(MATCH_NOMATCH);
3785            }
3786          switch(*eptr++)
3787            {
3788            default: RRETURN(MATCH_NOMATCH);
3789            case 0x000d:
3790            if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
3791            break;
3792            case 0x000a:
3793            break;
3794
3795            case 0x000b:
3796            case 0x000c:
3797            case 0x0085:
3798            if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
3799            break;
3800            }
3801          }
3802        break;
3803
3804        case OP_NOT_HSPACE:
3805        for (i = 1; i <= min; i++)
3806          {
3807          if (eptr >= md->end_subject)
3808            {
3809            SCHECK_PARTIAL();
3810            RRETURN(MATCH_NOMATCH);
3811            }
3812          switch(*eptr++)
3813            {
3814            default: break;
3815            case 0x09:      /* HT */
3816            case 0x20:      /* SPACE */
3817            case 0xa0:      /* NBSP */
3818            RRETURN(MATCH_NOMATCH);
3819            }
3820          }
3821        break;
3822
3823        case OP_HSPACE:
3824        for (i = 1; i <= min; i++)
3825          {
3826          if (eptr >= md->end_subject)
3827            {
3828            SCHECK_PARTIAL();
3829            RRETURN(MATCH_NOMATCH);
3830            }
3831          switch(*eptr++)
3832            {
3833            default: RRETURN(MATCH_NOMATCH);
3834            case 0x09:      /* HT */
3835            case 0x20:      /* SPACE */
3836            case 0xa0:      /* NBSP */
3837            break;
3838            }
3839          }
3840        break;
3841
3842        case OP_NOT_VSPACE:
3843        for (i = 1; i <= min; i++)
3844          {
3845          if (eptr >= md->end_subject)
3846            {
3847            SCHECK_PARTIAL();
3848            RRETURN(MATCH_NOMATCH);
3849            }
3850          switch(*eptr++)
3851            {
3852            default: break;
3853            case 0x0a:      /* LF */
3854            case 0x0b:      /* VT */
3855            case 0x0c:      /* FF */
3856            case 0x0d:      /* CR */
3857            case 0x85:      /* NEL */
3858            RRETURN(MATCH_NOMATCH);
3859            }
3860          }
3861        break;
3862
3863        case OP_VSPACE:
3864        for (i = 1; i <= min; i++)
3865          {
3866          if (eptr >= md->end_subject)
3867            {
3868            SCHECK_PARTIAL();
3869            RRETURN(MATCH_NOMATCH);
3870            }
3871          switch(*eptr++)
3872            {
3873            default: RRETURN(MATCH_NOMATCH);
3874            case 0x0a:      /* LF */
3875            case 0x0b:      /* VT */
3876            case 0x0c:      /* FF */
3877            case 0x0d:      /* CR */
3878            case 0x85:      /* NEL */
3879            break;
3880            }
3881          }
3882        break;
3883
3884        case OP_NOT_DIGIT:
3885        for (i = 1; i <= min; i++)
3886          {
3887          if (eptr >= md->end_subject)
3888            {
3889            SCHECK_PARTIAL();
3890            RRETURN(MATCH_NOMATCH);
3891            }
3892          if ((md->ctypes[*eptr++] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);
3893          }
3894        break;
3895
3896        case OP_DIGIT:
3897        for (i = 1; i <= min; i++)
3898          {
3899          if (eptr >= md->end_subject)
3900            {
3901            SCHECK_PARTIAL();
3902            RRETURN(MATCH_NOMATCH);
3903            }
3904          if ((md->ctypes[*eptr++] & ctype_digit) == 0) RRETURN(MATCH_NOMATCH);
3905          }
3906        break;
3907
3908        case OP_NOT_WHITESPACE:
3909        for (i = 1; i <= min; i++)
3910          {
3911          if (eptr >= md->end_subject)
3912            {
3913            SCHECK_PARTIAL();
3914            RRETURN(MATCH_NOMATCH);
3915            }
3916          if ((md->ctypes[*eptr++] & ctype_space) != 0) RRETURN(MATCH_NOMATCH);
3917          }
3918        break;
3919
3920        case OP_WHITESPACE:
3921        for (i = 1; i <= min; i++)
3922          {
3923          if (eptr >= md->end_subject)
3924            {
3925            SCHECK_PARTIAL();
3926            RRETURN(MATCH_NOMATCH);
3927            }
3928          if ((md->ctypes[*eptr++] & ctype_space) == 0) RRETURN(MATCH_NOMATCH);
3929          }
3930        break;
3931
3932        case OP_NOT_WORDCHAR:
3933        for (i = 1; i <= min; i++)
3934          {
3935          if (eptr >= md->end_subject)
3936            {
3937            SCHECK_PARTIAL();
3938            RRETURN(MATCH_NOMATCH);
3939            }
3940          if ((md->ctypes[*eptr++] & ctype_word) != 0)
3941            RRETURN(MATCH_NOMATCH);
3942          }
3943        break;
3944
3945        case OP_WORDCHAR:
3946        for (i = 1; i <= min; i++)
3947          {
3948          if (eptr >= md->end_subject)
3949            {
3950            SCHECK_PARTIAL();
3951            RRETURN(MATCH_NOMATCH);
3952            }
3953          if ((md->ctypes[*eptr++] & ctype_word) == 0)
3954            RRETURN(MATCH_NOMATCH);
3955          }
3956        break;
3957
3958        default:
3959        RRETURN(PCRE_ERROR_INTERNAL);
3960        }
3961      }
3962
3963    /* If min = max, continue at the same level without recursing */
3964
3965    if (min == max) continue;
3966
3967    /* If minimizing, we have to test the rest of the pattern before each
3968    subsequent match. Again, separate the UTF-8 case for speed, and also
3969    separate the UCP cases. */
3970
3971    if (minimize)
3972      {
3973#ifdef SUPPORT_UCP
3974      if (prop_type >= 0)
3975        {
3976        switch(prop_type)
3977          {
3978          case PT_ANY:
3979          for (fi = min;; fi++)
3980            {
3981            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM36);
3982            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3983            if (fi >= max) RRETURN(MATCH_NOMATCH);
3984            if (eptr >= md->end_subject)
3985              {
3986              SCHECK_PARTIAL();
3987              RRETURN(MATCH_NOMATCH);
3988              }
3989            GETCHARINC(c, eptr);
3990            if (prop_fail_result) RRETURN(MATCH_NOMATCH);
3991            }
3992          /* Control never gets here */
3993
3994          case PT_LAMP:
3995          for (fi = min;; fi++)
3996            {
3997            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM37);
3998            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3999            if (fi >= max) RRETURN(MATCH_NOMATCH);
4000            if (eptr >= md->end_subject)
4001              {
4002              SCHECK_PARTIAL();
4003              RRETURN(MATCH_NOMATCH);
4004              }
4005            GETCHARINC(c, eptr);
4006            prop_chartype = UCD_CHARTYPE(c);
4007            if ((prop_chartype == ucp_Lu ||
4008                 prop_chartype == ucp_Ll ||
4009                 prop_chartype == ucp_Lt) == prop_fail_result)
4010              RRETURN(MATCH_NOMATCH);
4011            }
4012          /* Control never gets here */
4013
4014          case PT_GC:
4015          for (fi = min;; fi++)
4016            {
4017            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM38);
4018            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4019            if (fi >= max) RRETURN(MATCH_NOMATCH);
4020            if (eptr >= md->end_subject)
4021              {
4022              SCHECK_PARTIAL();
4023              RRETURN(MATCH_NOMATCH);
4024              }
4025            GETCHARINC(c, eptr);
4026            prop_category = UCD_CATEGORY(c);
4027            if ((prop_category == prop_value) == prop_fail_result)
4028              RRETURN(MATCH_NOMATCH);
4029            }
4030          /* Control never gets here */
4031
4032          case PT_PC:
4033          for (fi = min;; fi++)
4034            {
4035            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM39);
4036            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4037            if (fi >= max) RRETURN(MATCH_NOMATCH);
4038            if (eptr >= md->end_subject)
4039              {
4040              SCHECK_PARTIAL();
4041              RRETURN(MATCH_NOMATCH);
4042              }
4043            GETCHARINC(c, eptr);
4044            prop_chartype = UCD_CHARTYPE(c);
4045            if ((prop_chartype == prop_value) == prop_fail_result)
4046              RRETURN(MATCH_NOMATCH);
4047            }
4048          /* Control never gets here */
4049
4050          case PT_SC:
4051          for (fi = min;; fi++)
4052            {
4053            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM40);
4054            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4055            if (fi >= max) RRETURN(MATCH_NOMATCH);
4056            if (eptr >= md->end_subject)
4057              {
4058              SCHECK_PARTIAL();
4059              RRETURN(MATCH_NOMATCH);
4060              }
4061            GETCHARINC(c, eptr);
4062            prop_script = UCD_SCRIPT(c);
4063            if ((prop_script == prop_value) == prop_fail_result)
4064              RRETURN(MATCH_NOMATCH);
4065            }
4066          /* Control never gets here */
4067
4068          default:
4069          RRETURN(PCRE_ERROR_INTERNAL);
4070          }
4071        }
4072
4073      /* Match extended Unicode sequences. We will get here only if the
4074      support is in the binary; otherwise a compile-time error occurs. */
4075
4076      else if (ctype == OP_EXTUNI)
4077        {
4078        for (fi = min;; fi++)
4079          {
4080          RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM41);
4081          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4082          if (fi >= max) RRETURN(MATCH_NOMATCH);
4083          if (eptr >= md->end_subject)
4084            {
4085            SCHECK_PARTIAL();
4086            RRETURN(MATCH_NOMATCH);
4087            }
4088          GETCHARINCTEST(c, eptr);
4089          prop_category = UCD_CATEGORY(c);
4090          if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);
4091          while (eptr < md->end_subject)
4092            {
4093            int len = 1;
4094            if (!utf8) c = *eptr;
4095              else { GETCHARLEN(c, eptr, len); }
4096            prop_category = UCD_CATEGORY(c);
4097            if (prop_category != ucp_M) break;
4098            eptr += len;
4099            }
4100          }
4101        }
4102
4103      else
4104#endif     /* SUPPORT_UCP */
4105
4106#ifdef SUPPORT_UTF8
4107      /* UTF-8 mode */
4108      if (utf8)
4109        {
4110        for (fi = min;; fi++)
4111          {
4112          RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM42);
4113          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4114          if (fi >= max) RRETURN(MATCH_NOMATCH);
4115          if (eptr >= md->end_subject)
4116            {
4117            SCHECK_PARTIAL();
4118            RRETURN(MATCH_NOMATCH);
4119            }
4120          if (ctype == OP_ANY && IS_NEWLINE(eptr))
4121            RRETURN(MATCH_NOMATCH);
4122          GETCHARINC(c, eptr);
4123          switch(ctype)
4124            {
4125            case OP_ANY:        /* This is the non-NL case */
4126            case OP_ALLANY:
4127            case OP_ANYBYTE:
4128            break;
4129
4130            case OP_ANYNL:
4131            switch(c)
4132              {
4133              default: RRETURN(MATCH_NOMATCH);
4134              case 0x000d:
4135              if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
4136              break;
4137              case 0x000a:
4138              break;
4139
4140              case 0x000b:
4141              case 0x000c:
4142              case 0x0085:
4143              case 0x2028:
4144              case 0x2029:
4145              if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
4146              break;
4147              }
4148            break;
4149
4150            case OP_NOT_HSPACE:
4151            switch(c)
4152              {
4153              default: break;
4154              case 0x09:      /* HT */
4155              case 0x20:      /* SPACE */
4156              case 0xa0:      /* NBSP */
4157              case 0x1680:    /* OGHAM SPACE MARK */
4158              case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
4159              case 0x2000:    /* EN QUAD */
4160              case 0x2001:    /* EM QUAD */
4161              case 0x2002:    /* EN SPACE */
4162              case 0x2003:    /* EM SPACE */
4163              case 0x2004:    /* THREE-PER-EM SPACE */
4164              case 0x2005:    /* FOUR-PER-EM SPACE */
4165              case 0x2006:    /* SIX-PER-EM SPACE */
4166              case 0x2007:    /* FIGURE SPACE */
4167              case 0x2008:    /* PUNCTUATION SPACE */
4168              case 0x2009:    /* THIN SPACE */
4169              case 0x200A:    /* HAIR SPACE */
4170              case 0x202f:    /* NARROW NO-BREAK SPACE */
4171              case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
4172              case 0x3000:    /* IDEOGRAPHIC SPACE */
4173              RRETURN(MATCH_NOMATCH);
4174              }
4175            break;
4176
4177            case OP_HSPACE:
4178            switch(c)
4179              {
4180              default: RRETURN(MATCH_NOMATCH);
4181              case 0x09:      /* HT */
4182              case 0x20:      /* SPACE */
4183              case 0xa0:      /* NBSP */
4184              case 0x1680:    /* OGHAM SPACE MARK */
4185              case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
4186              case 0x2000:    /* EN QUAD */
4187              case 0x2001:    /* EM QUAD */
4188              case 0x2002:    /* EN SPACE */
4189              case 0x2003:    /* EM SPACE */
4190              case 0x2004:    /* THREE-PER-EM SPACE */
4191              case 0x2005:    /* FOUR-PER-EM SPACE */
4192              case 0x2006:    /* SIX-PER-EM SPACE */
4193              case 0x2007:    /* FIGURE SPACE */
4194              case 0x2008:    /* PUNCTUATION SPACE */
4195              case 0x2009:    /* THIN SPACE */
4196              case 0x200A:    /* HAIR SPACE */
4197              case 0x202f:    /* NARROW NO-BREAK SPACE */
4198              case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
4199              case 0x3000:    /* IDEOGRAPHIC SPACE */
4200              break;
4201              }
4202            break;
4203
4204            case OP_NOT_VSPACE:
4205            switch(c)
4206              {
4207              default: break;
4208              case 0x0a:      /* LF */
4209              case 0x0b:      /* VT */
4210              case 0x0c:      /* FF */
4211              case 0x0d:      /* CR */
4212              case 0x85:      /* NEL */
4213              case 0x2028:    /* LINE SEPARATOR */
4214              case 0x2029:    /* PARAGRAPH SEPARATOR */
4215              RRETURN(MATCH_NOMATCH);
4216              }
4217            break;
4218
4219            case OP_VSPACE:
4220            switch(c)
4221              {
4222              default: RRETURN(MATCH_NOMATCH);
4223              case 0x0a:      /* LF */
4224              case 0x0b:      /* VT */
4225              case 0x0c:      /* FF */
4226              case 0x0d:      /* CR */
4227              case 0x85:      /* NEL */
4228              case 0x2028:    /* LINE SEPARATOR */
4229              case 0x2029:    /* PARAGRAPH SEPARATOR */
4230              break;
4231              }
4232            break;
4233
4234            case OP_NOT_DIGIT:
4235            if (c < 256 && (md->ctypes[c] & ctype_digit) != 0)
4236              RRETURN(MATCH_NOMATCH);
4237            break;
4238
4239            case OP_DIGIT:
4240            if (c >= 256 || (md->ctypes[c] & ctype_digit) == 0)
4241              RRETURN(MATCH_NOMATCH);
4242            break;
4243
4244            case OP_NOT_WHITESPACE:
4245            if (c < 256 && (md->ctypes[c] & ctype_space) != 0)
4246              RRETURN(MATCH_NOMATCH);
4247            break;
4248
4249            case OP_WHITESPACE:
4250            if  (c >= 256 || (md->ctypes[c] & ctype_space) == 0)
4251              RRETURN(MATCH_NOMATCH);
4252            break;
4253
4254            case OP_NOT_WORDCHAR:
4255            if (c < 256 && (md->ctypes[c] & ctype_word) != 0)
4256              RRETURN(MATCH_NOMATCH);
4257            break;
4258
4259            case OP_WORDCHAR:
4260            if (c >= 256 || (md->ctypes[c] & ctype_word) == 0)
4261              RRETURN(MATCH_NOMATCH);
4262            break;
4263
4264            default:
4265            RRETURN(PCRE_ERROR_INTERNAL);
4266            }
4267          }
4268        }
4269      else
4270#endif
4271      /* Not UTF-8 mode */
4272        {
4273        for (fi = min;; fi++)
4274          {
4275          RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM43);
4276          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4277          if (fi >= max) RRETURN(MATCH_NOMATCH);
4278          if (eptr >= md->end_subject)
4279            {
4280            SCHECK_PARTIAL();
4281            RRETURN(MATCH_NOMATCH);
4282            }
4283          if (ctype == OP_ANY && IS_NEWLINE(eptr))
4284            RRETURN(MATCH_NOMATCH);
4285          c = *eptr++;
4286          switch(ctype)
4287            {
4288            case OP_ANY:     /* This is the non-NL case */
4289            case OP_ALLANY:
4290            case OP_ANYBYTE:
4291            break;
4292
4293            case OP_ANYNL:
4294            switch(c)
4295              {
4296              default: RRETURN(MATCH_NOMATCH);
4297              case 0x000d:
4298              if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
4299              break;
4300
4301              case 0x000a:
4302              break;
4303
4304              case 0x000b:
4305              case 0x000c:
4306              case 0x0085:
4307              if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
4308              break;
4309              }
4310            break;
4311
4312            case OP_NOT_HSPACE:
4313            switch(c)
4314              {
4315              default: break;
4316              case 0x09:      /* HT */
4317              case 0x20:      /* SPACE */
4318              case 0xa0:      /* NBSP */
4319              RRETURN(MATCH_NOMATCH);
4320              }
4321            break;
4322
4323            case OP_HSPACE:
4324            switch(c)
4325              {
4326              default: RRETURN(MATCH_NOMATCH);
4327              case 0x09:      /* HT */
4328              case 0x20:      /* SPACE */
4329              case 0xa0:      /* NBSP */
4330              break;
4331              }
4332            break;
4333
4334            case OP_NOT_VSPACE:
4335            switch(c)
4336              {
4337              default: break;
4338              case 0x0a:      /* LF */
4339              case 0x0b:      /* VT */
4340              case 0x0c:      /* FF */
4341              case 0x0d:      /* CR */
4342              case 0x85:      /* NEL */
4343              RRETURN(MATCH_NOMATCH);
4344              }
4345            break;
4346
4347            case OP_VSPACE:
4348            switch(c)
4349              {
4350              default: RRETURN(MATCH_NOMATCH);
4351              case 0x0a:      /* LF */
4352              case 0x0b:      /* VT */
4353              case 0x0c:      /* FF */
4354              case 0x0d:      /* CR */
4355              case 0x85:      /* NEL */
4356              break;
4357              }
4358            break;
4359
4360            case OP_NOT_DIGIT:
4361            if ((md->ctypes[c] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);
4362            break;
4363
4364            case OP_DIGIT:
4365            if ((md->ctypes[c] & ctype_digit) == 0) RRETURN(MATCH_NOMATCH);
4366            break;
4367
4368            case OP_NOT_WHITESPACE:
4369            if ((md->ctypes[c] & ctype_space) != 0) RRETURN(MATCH_NOMATCH);
4370            break;
4371
4372            case OP_WHITESPACE:
4373            if  ((md->ctypes[c] & ctype_space) == 0) RRETURN(MATCH_NOMATCH);
4374            break;
4375
4376            case OP_NOT_WORDCHAR:
4377            if ((md->ctypes[c] & ctype_word) != 0) RRETURN(MATCH_NOMATCH);
4378            break;
4379
4380            case OP_WORDCHAR:
4381            if ((md->ctypes[c] & ctype_word) == 0) RRETURN(MATCH_NOMATCH);
4382            break;
4383
4384            default:
4385            RRETURN(PCRE_ERROR_INTERNAL);
4386            }
4387          }
4388        }
4389      /* Control never gets here */
4390      }
4391
4392    /* If maximizing, it is worth using inline code for speed, doing the type
4393    test once at the start (i.e. keep it out of the loop). Again, keep the
4394    UTF-8 and UCP stuff separate. */
4395
4396    else
4397      {
4398      pp = eptr;  /* Remember where we started */
4399
4400#ifdef SUPPORT_UCP
4401      if (prop_type >= 0)
4402        {
4403        switch(prop_type)
4404          {
4405          case PT_ANY:
4406          for (i = min; i < max; i++)
4407            {
4408            int len = 1;
4409            if (eptr >= md->end_subject)
4410              {
4411              SCHECK_PARTIAL();
4412              break;
4413              }
4414            GETCHARLEN(c, eptr, len);
4415            if (prop_fail_result) break;
4416            eptr+= len;
4417            }
4418          break;
4419
4420          case PT_LAMP:
4421          for (i = min; i < max; i++)
4422            {
4423            int len = 1;
4424            if (eptr >= md->end_subject)
4425              {
4426              SCHECK_PARTIAL();
4427              break;
4428              }
4429            GETCHARLEN(c, eptr, len);
4430            prop_chartype = UCD_CHARTYPE(c);
4431            if ((prop_chartype == ucp_Lu ||
4432                 prop_chartype == ucp_Ll ||
4433                 prop_chartype == ucp_Lt) == prop_fail_result)
4434              break;
4435            eptr+= len;
4436            }
4437          break;
4438
4439          case PT_GC:
4440          for (i = min; i < max; i++)
4441            {
4442            int len = 1;
4443            if (eptr >= md->end_subject)
4444              {
4445              SCHECK_PARTIAL();
4446              break;
4447              }
4448            GETCHARLEN(c, eptr, len);
4449            prop_category = UCD_CATEGORY(c);
4450            if ((prop_category == prop_value) == prop_fail_result)
4451              break;
4452            eptr+= len;
4453            }
4454          break;
4455
4456          case PT_PC:
4457          for (i = min; i < max; i++)
4458            {
4459            int len = 1;
4460            if (eptr >= md->end_subject)
4461              {
4462              SCHECK_PARTIAL();
4463              break;
4464              }
4465            GETCHARLEN(c, eptr, len);
4466            prop_chartype = UCD_CHARTYPE(c);
4467            if ((prop_chartype == prop_value) == prop_fail_result)
4468              break;
4469            eptr+= len;
4470            }
4471          break;
4472
4473          case PT_SC:
4474          for (i = min; i < max; i++)
4475            {
4476            int len = 1;
4477            if (eptr >= md->end_subject)
4478              {
4479              SCHECK_PARTIAL();
4480              break;
4481              }
4482            GETCHARLEN(c, eptr, len);
4483            prop_script = UCD_SCRIPT(c);
4484            if ((prop_script == prop_value) == prop_fail_result)
4485              break;
4486            eptr+= len;
4487            }
4488          break;
4489          }
4490
4491        /* eptr is now past the end of the maximum run */
4492
4493        if (possessive) continue;
4494        for(;;)
4495          {
4496          RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM44);
4497          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4498          if (eptr-- == pp) break;        /* Stop if tried at original pos */
4499          if (utf8) BACKCHAR(eptr);
4500          }
4501        }
4502
4503      /* Match extended Unicode sequences. We will get here only if the
4504      support is in the binary; otherwise a compile-time error occurs. */
4505
4506      else if (ctype == OP_EXTUNI)
4507        {
4508        for (i = min; i < max; i++)
4509          {
4510          if (eptr >= md->end_subject)
4511            {
4512            SCHECK_PARTIAL();
4513            break;
4514            }
4515          GETCHARINCTEST(c, eptr);
4516          prop_category = UCD_CATEGORY(c);
4517          if (prop_category == ucp_M) break;
4518          while (eptr < md->end_subject)
4519            {
4520            int len = 1;
4521            if (!utf8) c = *eptr; else
4522              {
4523              GETCHARLEN(c, eptr, len);
4524              }
4525            prop_category = UCD_CATEGORY(c);
4526            if (prop_category != ucp_M) break;
4527            eptr += len;
4528            }
4529          }
4530
4531        /* eptr is now past the end of the maximum run */
4532
4533        if (possessive) continue;
4534
4535        for(;;)
4536          {
4537          RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM45);
4538          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4539          if (eptr-- == pp) break;        /* Stop if tried at original pos */
4540          for (;;)                        /* Move back over one extended */
4541            {
4542            int len = 1;
4543            if (!utf8) c = *eptr; else
4544              {
4545              BACKCHAR(eptr);
4546              GETCHARLEN(c, eptr, len);
4547              }
4548            prop_category = UCD_CATEGORY(c);
4549            if (prop_category != ucp_M) break;
4550            eptr--;
4551            }
4552          }
4553        }
4554
4555      else
4556#endif   /* SUPPORT_UCP */
4557
4558#ifdef SUPPORT_UTF8
4559      /* UTF-8 mode */
4560
4561      if (utf8)
4562        {
4563        switch(ctype)
4564          {
4565          case OP_ANY:
4566          if (max < INT_MAX)
4567            {
4568            for (i = min; i < max; i++)
4569              {
4570              if (eptr >= md->end_subject)
4571                {
4572                SCHECK_PARTIAL();
4573                break;
4574                }
4575              if (IS_NEWLINE(eptr)) break;
4576              eptr++;
4577              while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
4578              }
4579            }
4580
4581          /* Handle unlimited UTF-8 repeat */
4582
4583          else
4584            {
4585            for (i = min; i < max; i++)
4586              {
4587              if (eptr >= md->end_subject)
4588                {
4589                SCHECK_PARTIAL();
4590                break;
4591                }
4592              if (IS_NEWLINE(eptr)) break;
4593              eptr++;
4594              while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
4595              }
4596            }
4597          break;
4598
4599          case OP_ALLANY:
4600          if (max < INT_MAX)
4601            {
4602            for (i = min; i < max; i++)
4603              {
4604              if (eptr >= md->end_subject)
4605                {
4606                SCHECK_PARTIAL();
4607                break;
4608                }
4609              eptr++;
4610              while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
4611              }
4612            }
4613          else eptr = md->end_subject;   /* Unlimited UTF-8 repeat */
4614          break;
4615
4616          /* The byte case is the same as non-UTF8 */
4617
4618          case OP_ANYBYTE:
4619          c = max - min;
4620          if (c > (unsigned int)(md->end_subject - eptr))
4621            {
4622            eptr = md->end_subject;
4623            SCHECK_PARTIAL();
4624            }
4625          else eptr += c;
4626          break;
4627
4628          case OP_ANYNL:
4629          for (i = min; i < max; i++)
4630            {
4631            int len = 1;
4632            if (eptr >= md->end_subject)
4633              {
4634              SCHECK_PARTIAL();
4635              break;
4636              }
4637            GETCHARLEN(c, eptr, len);
4638            if (c == 0x000d)
4639              {
4640              if (++eptr >= md->end_subject) break;
4641              if (*eptr == 0x000a) eptr++;
4642              }
4643            else
4644              {
4645              if (c != 0x000a &&
4646                  (md->bsr_anycrlf ||
4647                   (c != 0x000b && c != 0x000c &&
4648                    c != 0x0085 && c != 0x2028 && c != 0x2029)))
4649                break;
4650              eptr += len;
4651              }
4652            }
4653          break;
4654
4655          case OP_NOT_HSPACE:
4656          case OP_HSPACE:
4657          for (i = min; i < max; i++)
4658            {
4659            BOOL gotspace;
4660            int len = 1;
4661            if (eptr >= md->end_subject)
4662              {
4663              SCHECK_PARTIAL();
4664              break;
4665              }
4666            GETCHARLEN(c, eptr, len);
4667            switch(c)
4668              {
4669              default: gotspace = FALSE; break;
4670              case 0x09:      /* HT */
4671              case 0x20:      /* SPACE */
4672              case 0xa0:      /* NBSP */
4673              case 0x1680:    /* OGHAM SPACE MARK */
4674              case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
4675              case 0x2000:    /* EN QUAD */
4676              case 0x2001:    /* EM QUAD */
4677              case 0x2002:    /* EN SPACE */
4678              case 0x2003:    /* EM SPACE */
4679              case 0x2004:    /* THREE-PER-EM SPACE */
4680              case 0x2005:    /* FOUR-PER-EM SPACE */
4681              case 0x2006:    /* SIX-PER-EM SPACE */
4682              case 0x2007:    /* FIGURE SPACE */
4683              case 0x2008:    /* PUNCTUATION SPACE */
4684              case 0x2009:    /* THIN SPACE */
4685              case 0x200A:    /* HAIR SPACE */
4686              case 0x202f:    /* NARROW NO-BREAK SPACE */
4687              case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
4688              case 0x3000:    /* IDEOGRAPHIC SPACE */
4689              gotspace = TRUE;
4690              break;
4691              }
4692            if (gotspace == (ctype == OP_NOT_HSPACE)) break;
4693            eptr += len;
4694            }
4695          break;
4696
4697          case OP_NOT_VSPACE:
4698          case OP_VSPACE:
4699          for (i = min; i < max; i++)
4700            {
4701            BOOL gotspace;
4702            int len = 1;
4703            if (eptr >= md->end_subject)
4704              {
4705              SCHECK_PARTIAL();
4706              break;
4707              }
4708            GETCHARLEN(c, eptr, len);
4709            switch(c)
4710              {
4711              default: gotspace = FALSE; break;
4712              case 0x0a:      /* LF */
4713              case 0x0b:      /* VT */
4714              case 0x0c:      /* FF */
4715              case 0x0d:      /* CR */
4716              case 0x85:      /* NEL */
4717              case 0x2028:    /* LINE SEPARATOR */
4718              case 0x2029:    /* PARAGRAPH SEPARATOR */
4719              gotspace = TRUE;
4720              break;
4721              }
4722            if (gotspace == (ctype == OP_NOT_VSPACE)) break;
4723            eptr += len;
4724            }
4725          break;
4726
4727          case OP_NOT_DIGIT:
4728          for (i = min; i < max; i++)
4729            {
4730            int len = 1;
4731            if (eptr >= md->end_subject)
4732              {
4733              SCHECK_PARTIAL();
4734              break;
4735              }
4736            GETCHARLEN(c, eptr, len);
4737            if (c < 256 && (md->ctypes[c] & ctype_digit) != 0) break;
4738            eptr+= len;
4739            }
4740          break;
4741
4742          case OP_DIGIT:
4743          for (i = min; i < max; i++)
4744            {
4745            int len = 1;
4746            if (eptr >= md->end_subject)
4747              {
4748              SCHECK_PARTIAL();
4749              break;
4750              }
4751            GETCHARLEN(c, eptr, len);
4752            if (c >= 256 ||(md->ctypes[c] & ctype_digit) == 0) break;
4753            eptr+= len;
4754            }
4755          break;
4756
4757          case OP_NOT_WHITESPACE:
4758          for (i = min; i < max; i++)
4759            {
4760            int len = 1;
4761            if (eptr >= md->end_subject)
4762              {
4763              SCHECK_PARTIAL();
4764              break;
4765              }
4766            GETCHARLEN(c, eptr, len);
4767            if (c < 256 && (md->ctypes[c] & ctype_space) != 0) break;
4768            eptr+= len;
4769            }
4770          break;
4771
4772          case OP_WHITESPACE:
4773          for (i = min; i < max; i++)
4774            {
4775            int len = 1;
4776            if (eptr >= md->end_subject)
4777              {
4778              SCHECK_PARTIAL();
4779              break;
4780              }
4781            GETCHARLEN(c, eptr, len);
4782            if (c >= 256 ||(md->ctypes[c] & ctype_space) == 0) break;
4783            eptr+= len;
4784            }
4785          break;
4786
4787          case OP_NOT_WORDCHAR:
4788          for (i = min; i < max; i++)
4789            {
4790            int len = 1;
4791            if (eptr >= md->end_subject)
4792              {
4793              SCHECK_PARTIAL();
4794              break;
4795              }
4796            GETCHARLEN(c, eptr, len);
4797            if (c < 256 && (md->ctypes[c] & ctype_word) != 0) break;
4798            eptr+= len;
4799            }
4800          break;
4801
4802          case OP_WORDCHAR:
4803          for (i = min; i < max; i++)
4804            {
4805            int len = 1;
4806            if (eptr >= md->end_subject)
4807              {
4808              SCHECK_PARTIAL();
4809              break;
4810              }
4811            GETCHARLEN(c, eptr, len);
4812            if (c >= 256 || (md->ctypes[c] & ctype_word) == 0) break;
4813            eptr+= len;
4814            }
4815          break;
4816
4817          default:
4818          RRETURN(PCRE_ERROR_INTERNAL);
4819          }
4820
4821        /* eptr is now past the end of the maximum run */
4822
4823        if (possessive) continue;
4824        for(;;)
4825          {
4826          RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM46);
4827          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4828          if (eptr-- == pp) break;        /* Stop if tried at original pos */
4829          BACKCHAR(eptr);
4830          }
4831        }
4832      else
4833#endif  /* SUPPORT_UTF8 */
4834
4835      /* Not UTF-8 mode */
4836        {
4837        switch(ctype)
4838          {
4839          case OP_ANY:
4840          for (i = min; i < max; i++)
4841            {
4842            if (eptr >= md->end_subject)
4843              {
4844              SCHECK_PARTIAL();
4845              break;
4846              }
4847            if (IS_NEWLINE(eptr)) break;
4848            eptr++;
4849            }
4850          break;
4851
4852          case OP_ALLANY:
4853          case OP_ANYBYTE:
4854          c = max - min;
4855          if (c > (unsigned int)(md->end_subject - eptr))
4856            {
4857            eptr = md->end_subject;
4858            SCHECK_PARTIAL();
4859            }
4860          else eptr += c;
4861          break;
4862
4863          case OP_ANYNL:
4864          for (i = min; i < max; i++)
4865            {
4866            if (eptr >= md->end_subject)
4867              {
4868              SCHECK_PARTIAL();
4869              break;
4870              }
4871            c = *eptr;
4872            if (c == 0x000d)
4873              {
4874              if (++eptr >= md->end_subject) break;
4875              if (*eptr == 0x000a) eptr++;
4876              }
4877            else
4878              {
4879              if (c != 0x000a &&
4880                  (md->bsr_anycrlf ||
4881                    (c != 0x000b && c != 0x000c && c != 0x0085)))
4882                break;
4883              eptr++;
4884              }
4885            }
4886          break;
4887
4888          case OP_NOT_HSPACE:
4889          for (i = min; i < max; i++)
4890            {
4891            if (eptr >= md->end_subject)
4892              {
4893              SCHECK_PARTIAL();
4894              break;
4895              }
4896            c = *eptr;
4897            if (c == 0x09 || c == 0x20 || c == 0xa0) break;
4898            eptr++;
4899            }
4900          break;
4901
4902          case OP_HSPACE:
4903          for (i = min; i < max; i++)
4904            {
4905            if (eptr >= md->end_subject)
4906              {
4907              SCHECK_PARTIAL();
4908              break;
4909              }
4910            c = *eptr;
4911            if (c != 0x09 && c != 0x20 && c != 0xa0) break;
4912            eptr++;
4913            }
4914          break;
4915
4916          case OP_NOT_VSPACE:
4917          for (i = min; i < max; i++)
4918            {
4919            if (eptr >= md->end_subject)
4920              {
4921              SCHECK_PARTIAL();
4922              break;
4923              }
4924            c = *eptr;
4925            if (c == 0x0a || c == 0x0b || c == 0x0c || c == 0x0d || c == 0x85)
4926              break;
4927            eptr++;
4928            }
4929          break;
4930
4931          case OP_VSPACE:
4932          for (i = min; i < max; i++)
4933            {
4934            if (eptr >= md->end_subject)
4935              {
4936              SCHECK_PARTIAL();
4937              break;
4938              }
4939            c = *eptr;
4940            if (c != 0x0a && c != 0x0b && c != 0x0c && c != 0x0d && c != 0x85)
4941              break;
4942            eptr++;
4943            }
4944          break;
4945
4946          case OP_NOT_DIGIT:
4947          for (i = min; i < max; i++)
4948            {
4949            if (eptr >= md->end_subject)
4950              {
4951              SCHECK_PARTIAL();
4952              break;
4953              }
4954            if ((md->ctypes[*eptr] & ctype_digit) != 0) break;
4955            eptr++;
4956            }
4957          break;
4958
4959          case OP_DIGIT:
4960          for (i = min; i < max; i++)
4961            {
4962            if (eptr >= md->end_subject)
4963              {
4964              SCHECK_PARTIAL();
4965              break;
4966              }
4967            if ((md->ctypes[*eptr] & ctype_digit) == 0) break;
4968            eptr++;
4969            }
4970          break;
4971
4972          case OP_NOT_WHITESPACE:
4973          for (i = min; i < max; i++)
4974            {
4975            if (eptr >= md->end_subject)
4976              {
4977              SCHECK_PARTIAL();
4978              break;
4979              }
4980            if ((md->ctypes[*eptr] & ctype_space) != 0) break;
4981            eptr++;
4982            }
4983          break;
4984
4985          case OP_WHITESPACE:
4986          for (i = min; i < max; i++)
4987            {
4988            if (eptr >= md->end_subject)
4989              {
4990              SCHECK_PARTIAL();
4991              break;
4992              }
4993            if ((md->ctypes[*eptr] & ctype_space) == 0) break;
4994            eptr++;
4995            }
4996          break;
4997
4998          case OP_NOT_WORDCHAR:
4999          for (i = min; i < max; i++)
5000            {
5001            if (eptr >= md->end_subject)
5002              {
5003              SCHECK_PARTIAL();
5004              break;
5005              }
5006            if ((md->ctypes[*eptr] & ctype_word) != 0) break;
5007            eptr++;
5008            }
5009          break;
5010
5011          case OP_WORDCHAR:
5012          for (i = min; i < max; i++)
5013            {
5014            if (eptr >= md->end_subject)
5015              {
5016              SCHECK_PARTIAL();
5017              break;
5018              }
5019            if ((md->ctypes[*eptr] & ctype_word) == 0) break;
5020            eptr++;
5021            }
5022          break;
5023
5024          default:
5025          RRETURN(PCRE_ERROR_INTERNAL);
5026          }
5027
5028        /* eptr is now past the end of the maximum run */
5029
5030        if (possessive) continue;
5031        while (eptr >= pp)
5032          {
5033          RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM47);
5034          eptr--;
5035          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5036          }
5037        }
5038
5039      /* Get here if we can't make it match with any permitted repetitions */
5040
5041      RRETURN(MATCH_NOMATCH);
5042      }
5043    /* Control never gets here */
5044
5045    /* There's been some horrible disaster. Arrival here can only mean there is
5046    something seriously wrong in the code above or the OP_xxx definitions. */
5047
5048    default:
5049    DPRINTF(("Unknown opcode %d\n", *ecode));
5050    RRETURN(PCRE_ERROR_UNKNOWN_OPCODE);
5051    }
5052
5053  /* Do not stick any code in here without much thought; it is assumed
5054  that "continue" in the code above comes out to here to repeat the main
5055  loop. */
5056
5057  }             /* End of main loop */
5058/* Control never reaches here */
5059
5060
5061/* When compiling to use the heap rather than the stack for recursive calls to
5062match(), the RRETURN() macro jumps here. The number that is saved in
5063frame->Xwhere indicates which label we actually want to return to. */
5064
5065#ifdef NO_RECURSE
5066#define LBL(val) case val: goto L_RM##val;
5067HEAP_RETURN:
5068switch (frame->Xwhere)
5069  {
5070  LBL( 1) LBL( 2) LBL( 3) LBL( 4) LBL( 5) LBL( 6) LBL( 7) LBL( 8)
5071  LBL( 9) LBL(10) LBL(11) LBL(12) LBL(13) LBL(14) LBL(15) LBL(17)
5072  LBL(19) LBL(24) LBL(25) LBL(26) LBL(27) LBL(29) LBL(31) LBL(33)
5073  LBL(35) LBL(43) LBL(47) LBL(48) LBL(49) LBL(50) LBL(51) LBL(52)
5074  LBL(53) LBL(54)
5075#ifdef SUPPORT_UTF8
5076  LBL(16) LBL(18) LBL(20) LBL(21) LBL(22) LBL(23) LBL(28) LBL(30)
5077  LBL(32) LBL(34) LBL(42) LBL(46)
5078#ifdef SUPPORT_UCP
5079  LBL(36) LBL(37) LBL(38) LBL(39) LBL(40) LBL(41) LBL(44) LBL(45)
5080#endif  /* SUPPORT_UCP */
5081#endif  /* SUPPORT_UTF8 */
5082  default:
5083  DPRINTF(("jump error in pcre match: label %d non-existent\n", frame->Xwhere));
5084  return PCRE_ERROR_INTERNAL;
5085  }
5086#undef LBL
5087#endif  /* NO_RECURSE */
5088}
5089
5090
5091/***************************************************************************
5092****************************************************************************
5093                   RECURSION IN THE match() FUNCTION
5094
5095Undefine all the macros that were defined above to handle this. */
5096
5097#ifdef NO_RECURSE
5098#undef eptr
5099#undef ecode
5100#undef mstart
5101#undef offset_top
5102#undef ims
5103#undef eptrb
5104#undef flags
5105
5106#undef callpat
5107#undef charptr
5108#undef data
5109#undef next
5110#undef pp
5111#undef prev
5112#undef saved_eptr
5113
5114#undef new_recursive
5115
5116#undef cur_is_word
5117#undef condition
5118#undef prev_is_word
5119
5120#undef original_ims
5121
5122#undef ctype
5123#undef length
5124#undef max
5125#undef min
5126#undef number
5127#undef offset
5128#undef op
5129#undef save_capture_last
5130#undef save_offset1
5131#undef save_offset2
5132#undef save_offset3
5133#undef stacksave
5134
5135#undef newptrb
5136
5137#endif
5138
5139/* These two are defined as macros in both cases */
5140
5141#undef fc
5142#undef fi
5143
5144/***************************************************************************
5145***************************************************************************/
5146
5147
5148
5149/*************************************************
5150*         Execute a Regular Expression           *
5151*************************************************/
5152
5153/* This function applies a compiled re to a subject string and picks out
5154portions of the string if it matches. Two elements in the vector are set for
5155each substring: the offsets to the start and end of the substring.
5156
5157Arguments:
5158  argument_re     points to the compiled expression
5159  extra_data      points to extra data or is NULL
5160  subject         points to the subject string
5161  length          length of subject string (may contain binary zeros)
5162  start_offset    where to start in the subject string
5163  options         option bits
5164  offsets         points to a vector of ints to be filled in with offsets
5165  offsetcount     the number of elements in the vector
5166
5167Returns:          > 0 => success; value is the number of elements filled in
5168                  = 0 => success, but offsets is not big enough
5169                   -1 => failed to match
5170                 < -1 => some kind of unexpected problem
5171*/
5172
5173PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
5174pcre_exec(const pcre *argument_re, const pcre_extra *extra_data,
5175  PCRE_SPTR subject, int length, int start_offset, int options, int *offsets,
5176  int offsetcount)
5177{
5178int rc, resetcount, ocount;
5179int first_byte = -1;
5180int req_byte = -1;
5181int req_byte2 = -1;
5182int newline;
5183unsigned long int ims;
5184BOOL using_temporary_offsets = FALSE;
5185BOOL anchored;
5186BOOL startline;
5187BOOL firstline;
5188BOOL first_byte_caseless = FALSE;
5189BOOL req_byte_caseless = FALSE;
5190BOOL utf8;
5191match_data match_block;
5192match_data *md = &match_block;
5193const uschar *tables;
5194const uschar *start_bits = NULL;
5195USPTR start_match = (USPTR)subject + start_offset;
5196USPTR end_subject;
5197USPTR start_partial = NULL;
5198USPTR req_byte_ptr = start_match - 1;
5199
5200pcre_study_data internal_study;
5201const pcre_study_data *study;
5202
5203real_pcre internal_re;
5204const real_pcre *external_re = (const real_pcre *)argument_re;
5205const real_pcre *re = external_re;
5206
5207/* Plausibility checks */
5208
5209if ((options & ~PUBLIC_EXEC_OPTIONS) != 0) return PCRE_ERROR_BADOPTION;
5210if (re == NULL || subject == NULL ||
5211   (offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL;
5212if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;
5213
5214/* This information is for finding all the numbers associated with a given
5215name, for condition testing. */
5216
5217md->name_table = (uschar *)re + re->name_table_offset;
5218md->name_count = re->name_count;
5219md->name_entry_size = re->name_entry_size;
5220
5221/* Fish out the optional data from the extra_data structure, first setting
5222the default values. */
5223
5224study = NULL;
5225md->match_limit = MATCH_LIMIT;
5226md->match_limit_recursion = MATCH_LIMIT_RECURSION;
5227md->callout_data = NULL;
5228
5229/* The table pointer is always in native byte order. */
5230
5231tables = external_re->tables;
5232
5233if (extra_data != NULL)
5234  {
5235  register unsigned int flags = extra_data->flags;
5236  if ((flags & PCRE_EXTRA_STUDY_DATA) != 0)
5237    study = (const pcre_study_data *)extra_data->study_data;
5238  if ((flags & PCRE_EXTRA_MATCH_LIMIT) != 0)
5239    md->match_limit = extra_data->match_limit;
5240  if ((flags & PCRE_EXTRA_MATCH_LIMIT_RECURSION) != 0)
5241    md->match_limit_recursion = extra_data->match_limit_recursion;
5242  if ((flags & PCRE_EXTRA_CALLOUT_DATA) != 0)
5243    md->callout_data = extra_data->callout_data;
5244  if ((flags & PCRE_EXTRA_TABLES) != 0) tables = extra_data->tables;
5245  }
5246
5247/* If the exec call supplied NULL for tables, use the inbuilt ones. This
5248is a feature that makes it possible to save compiled regex and re-use them
5249in other programs later. */
5250
5251if (tables == NULL) tables = _pcre_default_tables;
5252
5253/* Check that the first field in the block is the magic number. If it is not,
5254test for a regex that was compiled on a host of opposite endianness. If this is
5255the case, flipped values are put in internal_re and internal_study if there was
5256study data too. */
5257
5258if (re->magic_number != MAGIC_NUMBER)
5259  {
5260  re = _pcre_try_flipped(re, &internal_re, study, &internal_study);
5261  if (re == NULL) return PCRE_ERROR_BADMAGIC;
5262  if (study != NULL) study = &internal_study;
5263  }
5264
5265/* Set up other data */
5266
5267anchored = ((re->options | options) & PCRE_ANCHORED) != 0;
5268startline = (re->flags & PCRE_STARTLINE) != 0;
5269firstline = (re->options & PCRE_FIRSTLINE) != 0;
5270
5271/* The code starts after the real_pcre block and the capture name table. */
5272
5273md->start_code = (const uschar *)external_re + re->name_table_offset +
5274  re->name_count * re->name_entry_size;
5275
5276md->start_subject = (USPTR)subject;
5277md->start_offset = start_offset;
5278md->end_subject = md->start_subject + length;
5279end_subject = md->end_subject;
5280
5281md->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
5282utf8 = md->utf8 = (re->options & PCRE_UTF8) != 0;
5283md->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0;
5284
5285md->notbol = (options & PCRE_NOTBOL) != 0;
5286md->noteol = (options & PCRE_NOTEOL) != 0;
5287md->notempty = (options & PCRE_NOTEMPTY) != 0;
5288md->notempty_atstart = (options & PCRE_NOTEMPTY_ATSTART) != 0;
5289md->partial = ((options & PCRE_PARTIAL_HARD) != 0)? 2 :
5290              ((options & PCRE_PARTIAL_SOFT) != 0)? 1 : 0;
5291md->hitend = FALSE;
5292
5293md->recursive = NULL;                   /* No recursion at top level */
5294
5295md->lcc = tables + lcc_offset;
5296md->ctypes = tables + ctypes_offset;
5297
5298/* Handle different \R options. */
5299
5300switch (options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE))
5301  {
5302  case 0:
5303  if ((re->options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) != 0)
5304    md->bsr_anycrlf = (re->options & PCRE_BSR_ANYCRLF) != 0;
5305  else
5306#ifdef BSR_ANYCRLF
5307  md->bsr_anycrlf = TRUE;
5308#else
5309  md->bsr_anycrlf = FALSE;
5310#endif
5311  break;
5312
5313  case PCRE_BSR_ANYCRLF:
5314  md->bsr_anycrlf = TRUE;
5315  break;
5316
5317  case PCRE_BSR_UNICODE:
5318  md->bsr_anycrlf = FALSE;
5319  break;
5320
5321  default: return PCRE_ERROR_BADNEWLINE;
5322  }
5323
5324/* Handle different types of newline. The three bits give eight cases. If
5325nothing is set at run time, whatever was used at compile time applies. */
5326
5327switch ((((options & PCRE_NEWLINE_BITS) == 0)? re->options :
5328        (pcre_uint32)options) & PCRE_NEWLINE_BITS)
5329  {
5330  case 0: newline = NEWLINE; break;   /* Compile-time default */
5331  case PCRE_NEWLINE_CR: newline = CHAR_CR; break;
5332  case PCRE_NEWLINE_LF: newline = CHAR_NL; break;
5333  case PCRE_NEWLINE_CR+
5334       PCRE_NEWLINE_LF: newline = (CHAR_CR << 8) | CHAR_NL; break;
5335  case PCRE_NEWLINE_ANY: newline = -1; break;
5336  case PCRE_NEWLINE_ANYCRLF: newline = -2; break;
5337  default: return PCRE_ERROR_BADNEWLINE;
5338  }
5339
5340if (newline == -2)
5341  {
5342  md->nltype = NLTYPE_ANYCRLF;
5343  }
5344else if (newline < 0)
5345  {
5346  md->nltype = NLTYPE_ANY;
5347  }
5348else
5349  {
5350  md->nltype = NLTYPE_FIXED;
5351  if (newline > 255)
5352    {
5353    md->nllen = 2;
5354    md->nl[0] = (newline >> 8) & 255;
5355    md->nl[1] = newline & 255;
5356    }
5357  else
5358    {
5359    md->nllen = 1;
5360    md->nl[0] = newline;
5361    }
5362  }
5363
5364/* Partial matching was originally supported only for a restricted set of
5365regexes; from release 8.00 there are no restrictions, but the bits are still
5366defined (though never set). So there's no harm in leaving this code. */
5367
5368if (md->partial && (re->flags & PCRE_NOPARTIAL) != 0)
5369  return PCRE_ERROR_BADPARTIAL;
5370
5371/* Check a UTF-8 string if required. Unfortunately there's no way of passing
5372back the character offset. */
5373
5374#ifdef SUPPORT_UTF8
5375if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0)
5376  {
5377  if (_pcre_valid_utf8((USPTR)subject, length) >= 0)
5378    return PCRE_ERROR_BADUTF8;
5379  if (start_offset > 0 && start_offset < length)
5380    {
5381    int tb = ((USPTR)subject)[start_offset];
5382    if (tb > 127)
5383      {
5384      tb &= 0xc0;
5385      if (tb != 0 && tb != 0xc0) return PCRE_ERROR_BADUTF8_OFFSET;
5386      }
5387    }
5388  }
5389#endif
5390
5391/* The ims options can vary during the matching as a result of the presence
5392of (?ims) items in the pattern. They are kept in a local variable so that
5393restoring at the exit of a group is easy. */
5394
5395ims = re->options & (PCRE_CASELESS|PCRE_MULTILINE|PCRE_DOTALL);
5396
5397/* If the expression has got more back references than the offsets supplied can
5398hold, we get a temporary chunk of working store to use during the matching.
5399Otherwise, we can use the vector supplied, rounding down its size to a multiple
5400of 3. */
5401
5402ocount = offsetcount - (offsetcount % 3);
5403
5404if (re->top_backref > 0 && re->top_backref >= ocount/3)
5405  {
5406  ocount = re->top_backref * 3 + 3;
5407  md->offset_vector = (int *)(pcre_malloc)(ocount * sizeof(int));
5408  if (md->offset_vector == NULL) return PCRE_ERROR_NOMEMORY;
5409  using_temporary_offsets = TRUE;
5410  DPRINTF(("Got memory to hold back references\n"));
5411  }
5412else md->offset_vector = offsets;
5413
5414md->offset_end = ocount;
5415md->offset_max = (2*ocount)/3;
5416md->offset_overflow = FALSE;
5417md->capture_last = -1;
5418
5419/* Compute the minimum number of offsets that we need to reset each time. Doing
5420this makes a huge difference to execution time when there aren't many brackets
5421in the pattern. */
5422
5423resetcount = 2 + re->top_bracket * 2;
5424if (resetcount > offsetcount) resetcount = ocount;
5425
5426/* Reset the working variable associated with each extraction. These should
5427never be used unless previously set, but they get saved and restored, and so we
5428initialize them to avoid reading uninitialized locations. */
5429
5430if (md->offset_vector != NULL)
5431  {
5432  register int *iptr = md->offset_vector + ocount;
5433  register int *iend = iptr - resetcount/2 + 1;
5434  while (--iptr >= iend) *iptr = -1;
5435  }
5436
5437/* Set up the first character to match, if available. The first_byte value is
5438never set for an anchored regular expression, but the anchoring may be forced
5439at run time, so we have to test for anchoring. The first char may be unset for
5440an unanchored pattern, of course. If there's no first char and the pattern was
5441studied, there may be a bitmap of possible first characters. */
5442
5443if (!anchored)
5444  {
5445  if ((re->flags & PCRE_FIRSTSET) != 0)
5446    {
5447    first_byte = re->first_byte & 255;
5448    if ((first_byte_caseless = ((re->first_byte & REQ_CASELESS) != 0)) == TRUE)
5449      first_byte = md->lcc[first_byte];
5450    }
5451  else
5452    if (!startline && study != NULL &&
5453      (study->flags & PCRE_STUDY_MAPPED) != 0)
5454        start_bits = study->start_bits;
5455  }
5456
5457/* For anchored or unanchored matches, there may be a "last known required
5458character" set. */
5459
5460if ((re->flags & PCRE_REQCHSET) != 0)
5461  {
5462  req_byte = re->req_byte & 255;
5463  req_byte_caseless = (re->req_byte & REQ_CASELESS) != 0;
5464  req_byte2 = (tables + fcc_offset)[req_byte];  /* case flipped */
5465  }
5466
5467
5468/* ==========================================================================*/
5469
5470/* Loop for handling unanchored repeated matching attempts; for anchored regexs
5471the loop runs just once. */
5472
5473for(;;)
5474  {
5475  USPTR save_end_subject = end_subject;
5476  USPTR new_start_match;
5477
5478  /* Reset the maximum number of extractions we might see. */
5479
5480  if (md->offset_vector != NULL)
5481    {
5482    register int *iptr = md->offset_vector;
5483    register int *iend = iptr + resetcount;
5484    while (iptr < iend) *iptr++ = -1;
5485    }
5486
5487  /* If firstline is TRUE, the start of the match is constrained to the first
5488  line of a multiline string. That is, the match must be before or at the first
5489  newline. Implement this by temporarily adjusting end_subject so that we stop
5490  scanning at a newline. If the match fails at the newline, later code breaks
5491  this loop. */
5492
5493  if (firstline)
5494    {
5495    USPTR t = start_match;
5496#ifdef SUPPORT_UTF8
5497    if (utf8)
5498      {
5499      while (t < md->end_subject && !IS_NEWLINE(t))
5500        {
5501        t++;
5502        while (t < end_subject && (*t & 0xc0) == 0x80) t++;
5503        }
5504      }
5505    else
5506#endif
5507    while (t < md->end_subject && !IS_NEWLINE(t)) t++;
5508    end_subject = t;
5509    }
5510
5511  /* There are some optimizations that avoid running the match if a known
5512  starting point is not found, or if a known later character is not present.
5513  However, there is an option that disables these, for testing and for ensuring
5514  that all callouts do actually occur. */
5515
5516  if ((options & PCRE_NO_START_OPTIMIZE) == 0)
5517    {
5518    /* Advance to a unique first byte if there is one. */
5519
5520    if (first_byte >= 0)
5521      {
5522      if (first_byte_caseless)
5523        while (start_match < end_subject && md->lcc[*start_match] != first_byte)
5524          start_match++;
5525      else
5526        while (start_match < end_subject && *start_match != first_byte)
5527          start_match++;
5528      }
5529
5530    /* Or to just after a linebreak for a multiline match */
5531
5532    else if (startline)
5533      {
5534      if (start_match > md->start_subject + start_offset)
5535        {
5536#ifdef SUPPORT_UTF8
5537        if (utf8)
5538          {
5539          while (start_match < end_subject && !WAS_NEWLINE(start_match))
5540            {
5541            start_match++;
5542            while(start_match < end_subject && (*start_match & 0xc0) == 0x80)
5543              start_match++;
5544            }
5545          }
5546        else
5547#endif
5548        while (start_match < end_subject && !WAS_NEWLINE(start_match))
5549          start_match++;
5550
5551        /* If we have just passed a CR and the newline option is ANY or ANYCRLF,
5552        and we are now at a LF, advance the match position by one more character.
5553        */
5554
5555        if (start_match[-1] == CHAR_CR &&
5556             (md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) &&
5557             start_match < end_subject &&
5558             *start_match == CHAR_NL)
5559          start_match++;
5560        }
5561      }
5562
5563    /* Or to a non-unique first byte after study */
5564
5565    else if (start_bits != NULL)
5566      {
5567      while (start_match < end_subject)
5568        {
5569        register unsigned int c = *start_match;
5570        if ((start_bits[c/8] & (1 << (c&7))) == 0) start_match++;
5571          else break;
5572        }
5573      }
5574    }   /* Starting optimizations */
5575
5576  /* Restore fudged end_subject */
5577
5578  end_subject = save_end_subject;
5579
5580  /* The following two optimizations are disabled for partial matching or if
5581  disabling is explicitly requested. */
5582
5583  if ((options & PCRE_NO_START_OPTIMIZE) == 0 && !md->partial)
5584    {
5585    /* If the pattern was studied, a minimum subject length may be set. This is
5586    a lower bound; no actual string of that length may actually match the
5587    pattern. Although the value is, strictly, in characters, we treat it as
5588    bytes to avoid spending too much time in this optimization. */
5589
5590    if (study != NULL && (study->flags & PCRE_STUDY_MINLEN) != 0 &&
5591        (pcre_uint32)(end_subject - start_match) < study->minlength)
5592      {
5593      rc = MATCH_NOMATCH;
5594      break;
5595      }
5596
5597    /* If req_byte is set, we know that that character must appear in the
5598    subject for the match to succeed. If the first character is set, req_byte
5599    must be later in the subject; otherwise the test starts at the match point.
5600    This optimization can save a huge amount of backtracking in patterns with
5601    nested unlimited repeats that aren't going to match. Writing separate code
5602    for cased/caseless versions makes it go faster, as does using an
5603    autoincrement and backing off on a match.
5604
5605    HOWEVER: when the subject string is very, very long, searching to its end
5606    can take a long time, and give bad performance on quite ordinary patterns.
5607    This showed up when somebody was matching something like /^\d+C/ on a
5608    32-megabyte string... so we don't do this when the string is sufficiently
5609    long. */
5610
5611    if (req_byte >= 0 && end_subject - start_match < REQ_BYTE_MAX)
5612      {
5613      register USPTR p = start_match + ((first_byte >= 0)? 1 : 0);
5614
5615      /* We don't need to repeat the search if we haven't yet reached the
5616      place we found it at last time. */
5617
5618      if (p > req_byte_ptr)
5619        {
5620        if (req_byte_caseless)
5621          {
5622          while (p < end_subject)
5623            {
5624            register int pp = *p++;
5625            if (pp == req_byte || pp == req_byte2) { p--; break; }
5626            }
5627          }
5628        else
5629          {
5630          while (p < end_subject)
5631            {
5632            if (*p++ == req_byte) { p--; break; }
5633            }
5634          }
5635
5636        /* If we can't find the required character, break the matching loop,
5637        forcing a match failure. */
5638
5639        if (p >= end_subject)
5640          {
5641          rc = MATCH_NOMATCH;
5642          break;
5643          }
5644
5645        /* If we have found the required character, save the point where we
5646        found it, so that we don't search again next time round the loop if
5647        the start hasn't passed this character yet. */
5648
5649        req_byte_ptr = p;
5650        }
5651      }
5652    }
5653
5654#ifdef PCRE_DEBUG  /* Sigh. Some compilers never learn. */
5655  printf(">>>> Match against: ");
5656  pchars(start_match, end_subject - start_match, TRUE, md);
5657  printf("\n");
5658#endif
5659
5660  /* OK, we can now run the match. If "hitend" is set afterwards, remember the
5661  first starting point for which a partial match was found. */
5662
5663  md->start_match_ptr = start_match;
5664  md->start_used_ptr = start_match;
5665  md->match_call_count = 0;
5666  rc = match(start_match, md->start_code, start_match, NULL, 2, md, ims, NULL,
5667    0, 0);
5668  if (md->hitend && start_partial == NULL) start_partial = md->start_used_ptr;
5669
5670  switch(rc)
5671    {
5672    /* NOMATCH and PRUNE advance by one character. THEN at this level acts
5673    exactly like PRUNE. */
5674
5675    case MATCH_NOMATCH:
5676    case MATCH_PRUNE:
5677    case MATCH_THEN:
5678    new_start_match = start_match + 1;
5679#ifdef SUPPORT_UTF8
5680    if (utf8)
5681      while(new_start_match < end_subject && (*new_start_match & 0xc0) == 0x80)
5682        new_start_match++;
5683#endif
5684    break;
5685
5686    /* SKIP passes back the next starting point explicitly. */
5687
5688    case MATCH_SKIP:
5689    new_start_match = md->start_match_ptr;
5690    break;
5691
5692    /* COMMIT disables the bumpalong, but otherwise behaves as NOMATCH. */
5693
5694    case MATCH_COMMIT:
5695    rc = MATCH_NOMATCH;
5696    goto ENDLOOP;
5697
5698    /* Any other return is either a match, or some kind of error. */
5699
5700    default:
5701    goto ENDLOOP;
5702    }
5703
5704  /* Control reaches here for the various types of "no match at this point"
5705  result. Reset the code to MATCH_NOMATCH for subsequent checking. */
5706
5707  rc = MATCH_NOMATCH;
5708
5709  /* If PCRE_FIRSTLINE is set, the match must happen before or at the first
5710  newline in the subject (though it may continue over the newline). Therefore,
5711  if we have just failed to match, starting at a newline, do not continue. */
5712
5713  if (firstline && IS_NEWLINE(start_match)) break;
5714
5715  /* Advance to new matching position */
5716
5717  start_match = new_start_match;
5718
5719  /* Break the loop if the pattern is anchored or if we have passed the end of
5720  the subject. */
5721
5722  if (anchored || start_match > end_subject) break;
5723
5724  /* If we have just passed a CR and we are now at a LF, and the pattern does
5725  not contain any explicit matches for \r or \n, and the newline option is CRLF
5726  or ANY or ANYCRLF, advance the match position by one more character. */
5727
5728  if (start_match[-1] == CHAR_CR &&
5729      start_match < end_subject &&
5730      *start_match == CHAR_NL &&
5731      (re->flags & PCRE_HASCRORLF) == 0 &&
5732        (md->nltype == NLTYPE_ANY ||
5733         md->nltype == NLTYPE_ANYCRLF ||
5734         md->nllen == 2))
5735    start_match++;
5736
5737  }   /* End of for(;;) "bumpalong" loop */
5738
5739/* ==========================================================================*/
5740
5741/* We reach here when rc is not MATCH_NOMATCH, or if one of the stopping
5742conditions is true:
5743
5744(1) The pattern is anchored or the match was failed by (*COMMIT);
5745
5746(2) We are past the end of the subject;
5747
5748(3) PCRE_FIRSTLINE is set and we have failed to match at a newline, because
5749    this option requests that a match occur at or before the first newline in
5750    the subject.
5751
5752When we have a match and the offset vector is big enough to deal with any
5753backreferences, captured substring offsets will already be set up. In the case
5754where we had to get some local store to hold offsets for backreference
5755processing, copy those that we can. In this case there need not be overflow if
5756certain parts of the pattern were not used, even though there are more
5757capturing parentheses than vector slots. */
5758
5759ENDLOOP:
5760
5761if (rc == MATCH_MATCH)
5762  {
5763  if (using_temporary_offsets)
5764    {
5765    if (offsetcount >= 4)
5766      {
5767      memcpy(offsets + 2, md->offset_vector + 2,
5768        (offsetcount - 2) * sizeof(int));
5769      DPRINTF(("Copied offsets from temporary memory\n"));
5770      }
5771    if (md->end_offset_top > offsetcount) md->offset_overflow = TRUE;
5772    DPRINTF(("Freeing temporary memory\n"));
5773    (pcre_free)(md->offset_vector);
5774    }
5775
5776  /* Set the return code to the number of captured strings, or 0 if there are
5777  too many to fit into the vector. */
5778
5779  rc = md->offset_overflow? 0 : md->end_offset_top/2;
5780
5781  /* If there is space, set up the whole thing as substring 0. The value of
5782  md->start_match_ptr might be modified if \K was encountered on the success
5783  matching path. */
5784
5785  if (offsetcount < 2) rc = 0; else
5786    {
5787    offsets[0] = md->start_match_ptr - md->start_subject;
5788    offsets[1] = md->end_match_ptr - md->start_subject;
5789    }
5790
5791  DPRINTF((">>>> returning %d\n", rc));
5792  return rc;
5793  }
5794
5795/* Control gets here if there has been an error, or if the overall match
5796attempt has failed at all permitted starting positions. */
5797
5798if (using_temporary_offsets)
5799  {
5800  DPRINTF(("Freeing temporary memory\n"));
5801  (pcre_free)(md->offset_vector);
5802  }
5803
5804if (rc != MATCH_NOMATCH && rc != PCRE_ERROR_PARTIAL)
5805  {
5806  DPRINTF((">>>> error: returning %d\n", rc));
5807  return rc;
5808  }
5809else if (start_partial != NULL)
5810  {
5811  DPRINTF((">>>> returning PCRE_ERROR_PARTIAL\n"));
5812  if (offsetcount > 1)
5813    {
5814    offsets[0] = start_partial - (USPTR)subject;
5815    offsets[1] = end_subject - (USPTR)subject;
5816    }
5817  return PCRE_ERROR_PARTIAL;
5818  }
5819else
5820  {
5821  DPRINTF((">>>> returning PCRE_ERROR_NOMATCH\n"));
5822  return PCRE_ERROR_NOMATCH;
5823  }
5824}
5825
5826/* End of pcre_exec.c */
5827