1/* Extended regular expression matching and search library,
2   version 0.12.
3   (Implements POSIX draft P1003.2/D11.2, except for some of the
4   internationalization features.)
5   Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998
6   Free Software Foundation, Inc.
7
8   NOTE: The canonical source of this file is maintained with the
9   GNU C Library.  Bugs can be reported to bug-glibc@prep.ai.mit.edu.
10
11   This program is free software; you can redistribute it and/or modify it
12   under the terms of the GNU General Public License as published by the
13   Free Software Foundation; either version 2, or (at your option) any
14   later version.
15
16   This program is distributed in the hope that it will be useful,
17   but WITHOUT ANY WARRANTY; without even the implied warranty of
18   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19   GNU General Public License for more details.
20
21   You should have received a copy of the GNU General Public License
22   along with this program; if not, write to the Free Software Foundation,
23   Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
24
25#ifdef HAVE_CONFIG_H
26# include <config.h>
27#endif
28
29/* GCC LOCAL: we don't need NLS here.  */
30#undef ENABLE_NLS
31/* GCC LOCAL: to handle defining alloca.  */
32#include "libiberty.h"
33
34/* Do not use a C alloca, we will leak memory and crash.  */
35#ifdef C_ALLOCA
36# define REGEX_MALLOC
37#endif
38
39/* AIX requires this to be the first thing in the file. */
40#if defined _AIX && !defined REGEX_MALLOC
41  #pragma alloca
42#endif
43
44#ifndef PARAMS
45# if defined __GNUC__ || (defined __STDC__ && __STDC__)
46#  define PARAMS(args) args
47# else
48#  define PARAMS(args) ()
49# endif  /* GCC.  */
50#endif  /* Not PARAMS.  */
51
52#if defined STDC_HEADERS && !defined emacs
53# include <stddef.h>
54#else
55/* We need this for `gnu-regex.h', and perhaps for the Emacs include files.  */
56# include <sys/types.h>
57#endif
58
59/* For platform which support the ISO C amendement 1 functionality we
60   support user defined character classes.  */
61#if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H)
62 /* Solaris 2.5 has a bug: <wchar.h> must be included before <wctype.h>.  */
63# include <wchar.h>
64# include <wctype.h>
65#endif
66
67/* This is for other GNU distributions with internationalized messages.  */
68/* GCC LOCAL: ../intl will handle this for us */
69#ifdef ENABLE_NLS
70# include <libintl.h>
71#else
72# define gettext(msgid) (msgid)
73#endif
74
75#ifndef gettext_noop
76/* This define is so xgettext can find the internationalizable
77   strings.  */
78# define gettext_noop(String) String
79#endif
80
81# if !defined(volatile) && !defined(HAVE_VOLATILE)
82#  define volatile
83# endif
84
85/* If we are not linking with Emacs proper,
86   we can't use the relocating allocator
87   even if config.h says that we can.  */
88# undef REL_ALLOC
89
90# if defined STDC_HEADERS || defined _LIBC
91#  include <stdlib.h>
92# else
93char *malloc ();
94char *realloc ();
95# endif
96
97/* When used in Emacs's lib-src, we need to get bzero and bcopy somehow.
98   If nothing else has been done, use the method below.  */
99# ifdef INHIBIT_STRING_HEADER
100#  if !(defined HAVE_BZERO && defined HAVE_BCOPY)
101#   if !defined bzero && !defined bcopy
102#    undef INHIBIT_STRING_HEADER
103#   endif
104#  endif
105# endif
106
107/* This is the normal way of making sure we have a bcopy and a bzero.
108   This is used in most programs--a few other programs avoid this
109   by defining INHIBIT_STRING_HEADER.  */
110# ifndef INHIBIT_STRING_HEADER
111#  if defined HAVE_STRING_H || defined STDC_HEADERS || defined _LIBC
112#   include <string.h>
113#   ifndef bzero
114#    ifndef _LIBC
115#     define bzero(s, n)	(memset (s, '\0', n), (s))
116#    else
117#     define bzero(s, n)	__bzero (s, n)
118#    endif
119#   endif
120#  else
121#   include <strings.h>
122#   ifndef memcmp
123#    define memcmp(s1, s2, n)	bcmp (s1, s2, n)
124#   endif
125#   ifndef memcpy
126#    define memcpy(d, s, n)	(bcopy (s, d, n), (d))
127#   endif
128#  endif
129# endif
130
131/* Define the syntax stuff for \<, \>, etc.  */
132
133/* This must be nonzero for the wordchar and notwordchar pattern
134   commands in re_match_2.  */
135# ifndef Sword
136#  define Sword 1
137# endif
138
139# ifdef SWITCH_ENUM_BUG
140#  define SWITCH_ENUM_CAST(x) ((int)(x))
141# else
142#  define SWITCH_ENUM_CAST(x) (x)
143# endif
144
145/* How many characters in the character set.  */
146# define CHAR_SET_SIZE 256
147
148# ifdef SYNTAX_TABLE
149
150extern char *re_syntax_table;
151
152# else /* not SYNTAX_TABLE */
153
154static char re_syntax_table[CHAR_SET_SIZE];
155
156static void init_syntax_once PARAMS ((void));
157
158static void
159init_syntax_once ()
160{
161   register int c;
162   static int done = 0;
163
164   if (done)
165     return;
166
167   bzero (re_syntax_table, sizeof re_syntax_table);
168
169   for (c = 'a'; c <= 'z'; c++)
170     re_syntax_table[c] = Sword;
171
172   for (c = 'A'; c <= 'Z'; c++)
173     re_syntax_table[c] = Sword;
174
175   for (c = '0'; c <= '9'; c++)
176     re_syntax_table[c] = Sword;
177
178   re_syntax_table['_'] = Sword;
179
180   done = 1;
181}
182
183# endif /* not SYNTAX_TABLE */
184
185# define SYNTAX(c) re_syntax_table[c]
186
187/* Get the interface, including the syntax bits.  */
188/* GCC LOCAL: call it gnu-regex.h, not regex.h, to avoid name conflicts */
189#include "gnu-regex.h"
190
191/* ISALPHA etc. are used for the character classes.  */
192/* GCC LOCAL: use libiberty's safe-ctype.h, don't bother defining
193   wrapper macros ourselves.  */
194#include <safe-ctype.h>
195
196#ifndef NULL
197# define NULL (void *)0
198#endif
199
200/* We remove any previous definition of `SIGN_EXTEND_CHAR',
201   since ours (we hope) works properly with all combinations of
202   machines, compilers, `char' and `unsigned char' argument types.
203   (Per Bothner suggested the basic approach.)  */
204#undef SIGN_EXTEND_CHAR
205#if __STDC__
206# define SIGN_EXTEND_CHAR(c) ((signed char) (c))
207#else  /* not __STDC__ */
208/* As in Harbison and Steele.  */
209# define SIGN_EXTEND_CHAR(c) ((((unsigned char) (c)) ^ 128) - 128)
210#endif
211
212/* Should we use malloc or alloca?  If REGEX_MALLOC is not defined, we
213   use `alloca' instead of `malloc'.  This is because using malloc in
214   re_search* or re_match* could cause memory leaks when C-g is used in
215   Emacs; also, malloc is slower and causes storage fragmentation.  On
216   the other hand, malloc is more portable, and easier to debug.
217
218   Because we sometimes use alloca, some routines have to be macros,
219   not functions -- `alloca'-allocated space disappears at the end of the
220   function it is called in.  */
221
222#ifdef REGEX_MALLOC
223
224# define REGEX_ALLOCATE malloc
225# define REGEX_REALLOCATE(source, osize, nsize) realloc (source, nsize)
226# define REGEX_FREE free
227
228#else /* not REGEX_MALLOC  */
229
230/* Emacs already defines alloca, sometimes.  */
231# ifndef alloca
232
233/* Make alloca work the best possible way.  */
234#  ifdef __GNUC__
235#   define alloca __builtin_alloca
236#  else /* not __GNUC__ */
237#   if HAVE_ALLOCA_H
238#    include <alloca.h>
239#   endif /* HAVE_ALLOCA_H */
240#  endif /* not __GNUC__ */
241
242# endif /* not alloca */
243
244# define REGEX_ALLOCATE alloca
245
246/* Assumes a `char *destination' variable.  */
247# define REGEX_REALLOCATE(source, osize, nsize)				\
248  (destination = (char *) alloca (nsize),				\
249   memcpy (destination, source, osize))
250
251/* No need to do anything to free, after alloca.  */
252# define REGEX_FREE(arg) ((void)0) /* Do nothing!  But inhibit gcc warning.  */
253
254#endif /* not REGEX_MALLOC */
255
256/* Define how to allocate the failure stack.  */
257
258#if defined REL_ALLOC && defined REGEX_MALLOC
259
260# define REGEX_ALLOCATE_STACK(size)				\
261  r_alloc (&failure_stack_ptr, (size))
262# define REGEX_REALLOCATE_STACK(source, osize, nsize)		\
263  r_re_alloc (&failure_stack_ptr, (nsize))
264# define REGEX_FREE_STACK(ptr)					\
265  r_alloc_free (&failure_stack_ptr)
266
267#else /* not using relocating allocator */
268
269# ifdef REGEX_MALLOC
270
271#  define REGEX_ALLOCATE_STACK malloc
272#  define REGEX_REALLOCATE_STACK(source, osize, nsize) realloc (source, nsize)
273#  define REGEX_FREE_STACK free
274
275# else /* not REGEX_MALLOC */
276
277#  define REGEX_ALLOCATE_STACK alloca
278
279#  define REGEX_REALLOCATE_STACK(source, osize, nsize)			\
280   REGEX_REALLOCATE (source, osize, nsize)
281/* No need to explicitly free anything.  */
282#  define REGEX_FREE_STACK(arg)
283
284# endif /* not REGEX_MALLOC */
285#endif /* not using relocating allocator */
286
287
288/* True if `size1' is non-NULL and PTR is pointing anywhere inside
289   `string1' or just past its end.  This works if PTR is NULL, which is
290   a good thing.  */
291#define FIRST_STRING_P(ptr) 					\
292  (size1 && string1 <= (ptr) && (ptr) <= string1 + size1)
293
294/* (Re)Allocate N items of type T using malloc, or fail.  */
295#define TALLOC(n, t) ((t *) malloc ((n) * sizeof (t)))
296#define RETALLOC(addr, n, t) ((addr) = (t *) realloc (addr, (n) * sizeof (t)))
297#define RETALLOC_IF(addr, n, t) \
298  if (addr) RETALLOC((addr), (n), t); else (addr) = TALLOC ((n), t)
299#define REGEX_TALLOC(n, t) ((t *) REGEX_ALLOCATE ((n) * sizeof (t)))
300
301#define BYTEWIDTH 8 /* In bits.  */
302
303#define STREQ(s1, s2) ((strcmp (s1, s2) == 0))
304
305#undef MAX
306#undef MIN
307#define MAX(a, b) ((a) > (b) ? (a) : (b))
308#define MIN(a, b) ((a) < (b) ? (a) : (b))
309
310typedef char boolean;
311#define false 0
312#define true 1
313
314static int re_match_2_internal PARAMS ((struct re_pattern_buffer *bufp,
315					const char *string1, int size1,
316					const char *string2, int size2,
317					int pos,
318					struct re_registers *regs,
319					int stop));
320
321/* These are the command codes that appear in compiled regular
322   expressions.  Some opcodes are followed by argument bytes.  A
323   command code can specify any interpretation whatsoever for its
324   arguments.  Zero bytes may appear in the compiled regular expression.  */
325
326typedef enum
327{
328  no_op = 0,
329
330  /* Succeed right away--no more backtracking.  */
331  succeed,
332
333        /* Followed by one byte giving n, then by n literal bytes.  */
334  exactn,
335
336        /* Matches any (more or less) character.  */
337  anychar,
338
339        /* Matches any one char belonging to specified set.  First
340           following byte is number of bitmap bytes.  Then come bytes
341           for a bitmap saying which chars are in.  Bits in each byte
342           are ordered low-bit-first.  A character is in the set if its
343           bit is 1.  A character too large to have a bit in the map is
344           automatically not in the set.  */
345  charset,
346
347        /* Same parameters as charset, but match any character that is
348           not one of those specified.  */
349  charset_not,
350
351        /* Start remembering the text that is matched, for storing in a
352           register.  Followed by one byte with the register number, in
353           the range 0 to one less than the pattern buffer's re_nsub
354           field.  Then followed by one byte with the number of groups
355           inner to this one.  (This last has to be part of the
356           start_memory only because we need it in the on_failure_jump
357           of re_match_2.)  */
358  start_memory,
359
360        /* Stop remembering the text that is matched and store it in a
361           memory register.  Followed by one byte with the register
362           number, in the range 0 to one less than `re_nsub' in the
363           pattern buffer, and one byte with the number of inner groups,
364           just like `start_memory'.  (We need the number of inner
365           groups here because we don't have any easy way of finding the
366           corresponding start_memory when we're at a stop_memory.)  */
367  stop_memory,
368
369        /* Match a duplicate of something remembered. Followed by one
370           byte containing the register number.  */
371  duplicate,
372
373        /* Fail unless at beginning of line.  */
374  begline,
375
376        /* Fail unless at end of line.  */
377  endline,
378
379        /* Succeeds if at beginning of buffer (if emacs) or at beginning
380           of string to be matched (if not).  */
381  begbuf,
382
383        /* Analogously, for end of buffer/string.  */
384  endbuf,
385
386        /* Followed by two byte relative address to which to jump.  */
387  jump,
388
389	/* Same as jump, but marks the end of an alternative.  */
390  jump_past_alt,
391
392        /* Followed by two-byte relative address of place to resume at
393           in case of failure.  */
394  on_failure_jump,
395
396        /* Like on_failure_jump, but pushes a placeholder instead of the
397           current string position when executed.  */
398  on_failure_keep_string_jump,
399
400        /* Throw away latest failure point and then jump to following
401           two-byte relative address.  */
402  pop_failure_jump,
403
404        /* Change to pop_failure_jump if know won't have to backtrack to
405           match; otherwise change to jump.  This is used to jump
406           back to the beginning of a repeat.  If what follows this jump
407           clearly won't match what the repeat does, such that we can be
408           sure that there is no use backtracking out of repetitions
409           already matched, then we change it to a pop_failure_jump.
410           Followed by two-byte address.  */
411  maybe_pop_jump,
412
413        /* Jump to following two-byte address, and push a dummy failure
414           point. This failure point will be thrown away if an attempt
415           is made to use it for a failure.  A `+' construct makes this
416           before the first repeat.  Also used as an intermediary kind
417           of jump when compiling an alternative.  */
418  dummy_failure_jump,
419
420	/* Push a dummy failure point and continue.  Used at the end of
421	   alternatives.  */
422  push_dummy_failure,
423
424        /* Followed by two-byte relative address and two-byte number n.
425           After matching N times, jump to the address upon failure.  */
426  succeed_n,
427
428        /* Followed by two-byte relative address, and two-byte number n.
429           Jump to the address N times, then fail.  */
430  jump_n,
431
432        /* Set the following two-byte relative address to the
433           subsequent two-byte number.  The address *includes* the two
434           bytes of number.  */
435  set_number_at,
436
437  wordchar,	/* Matches any word-constituent character.  */
438  notwordchar,	/* Matches any char that is not a word-constituent.  */
439
440  wordbeg,	/* Succeeds if at word beginning.  */
441  wordend,	/* Succeeds if at word end.  */
442
443  wordbound,	/* Succeeds if at a word boundary.  */
444  notwordbound	/* Succeeds if not at a word boundary.  */
445
446#ifdef emacs
447  ,before_dot,	/* Succeeds if before point.  */
448  at_dot,	/* Succeeds if at point.  */
449  after_dot,	/* Succeeds if after point.  */
450
451	/* Matches any character whose syntax is specified.  Followed by
452           a byte which contains a syntax code, e.g., Sword.  */
453  syntaxspec,
454
455	/* Matches any character whose syntax is not that specified.  */
456  notsyntaxspec
457#endif /* emacs */
458} re_opcode_t;
459
460/* Common operations on the compiled pattern.  */
461
462/* Store NUMBER in two contiguous bytes starting at DESTINATION.  */
463
464#define STORE_NUMBER(destination, number)				\
465  do {									\
466    (destination)[0] = (number) & 0377;					\
467    (destination)[1] = (number) >> 8;					\
468  } while (0)
469
470/* Same as STORE_NUMBER, except increment DESTINATION to
471   the byte after where the number is stored.  Therefore, DESTINATION
472   must be an lvalue.  */
473
474#define STORE_NUMBER_AND_INCR(destination, number)			\
475  do {									\
476    STORE_NUMBER (destination, number);					\
477    (destination) += 2;							\
478  } while (0)
479
480/* Put into DESTINATION a number stored in two contiguous bytes starting
481   at SOURCE.  */
482
483#define EXTRACT_NUMBER(destination, source)				\
484  do {									\
485    (destination) = *(source) & 0377;					\
486    (destination) += SIGN_EXTEND_CHAR (*((source) + 1)) << 8;		\
487  } while (0)
488
489#ifdef DEBUG
490static void extract_number _RE_ARGS ((int *dest, unsigned char *source));
491static void
492extract_number (dest, source)
493    int *dest;
494    unsigned char *source;
495{
496  int temp = SIGN_EXTEND_CHAR (*(source + 1));
497  *dest = *source & 0377;
498  *dest += temp << 8;
499}
500
501# ifndef EXTRACT_MACROS /* To debug the macros.  */
502#  undef EXTRACT_NUMBER
503#  define EXTRACT_NUMBER(dest, src) extract_number (&dest, src)
504# endif /* not EXTRACT_MACROS */
505
506#endif /* DEBUG */
507
508/* Same as EXTRACT_NUMBER, except increment SOURCE to after the number.
509   SOURCE must be an lvalue.  */
510
511#define EXTRACT_NUMBER_AND_INCR(destination, source)			\
512  do {									\
513    EXTRACT_NUMBER (destination, source);				\
514    (source) += 2; 							\
515  } while (0)
516
517#ifdef DEBUG
518static void extract_number_and_incr _RE_ARGS ((int *destination,
519					       unsigned char **source));
520static void
521extract_number_and_incr (destination, source)
522    int *destination;
523    unsigned char **source;
524{
525  extract_number (destination, *source);
526  *source += 2;
527}
528
529# ifndef EXTRACT_MACROS
530#  undef EXTRACT_NUMBER_AND_INCR
531#  define EXTRACT_NUMBER_AND_INCR(dest, src) \
532  extract_number_and_incr (&dest, &src)
533# endif /* not EXTRACT_MACROS */
534
535#endif /* DEBUG */
536
537/* If DEBUG is defined, Regex prints many voluminous messages about what
538   it is doing (if the variable `debug' is nonzero).  If linked with the
539   main program in `iregex.c', you can enter patterns and strings
540   interactively.  And if linked with the main program in `main.c' and
541   the other test files, you can run the already-written tests.  */
542
543#ifdef DEBUG
544
545/* We use standard I/O for debugging.  */
546# include <stdio.h>
547
548/* It is useful to test things that ``must'' be true when debugging.  */
549# include <assert.h>
550
551static int debug = 0;
552
553# define DEBUG_STATEMENT(e) e
554# define DEBUG_PRINT1(x) if (debug) printf (x)
555# define DEBUG_PRINT2(x1, x2) if (debug) printf (x1, x2)
556# define DEBUG_PRINT3(x1, x2, x3) if (debug) printf (x1, x2, x3)
557# define DEBUG_PRINT4(x1, x2, x3, x4) if (debug) printf (x1, x2, x3, x4)
558# define DEBUG_PRINT_COMPILED_PATTERN(p, s, e) 				\
559  if (debug) print_partial_compiled_pattern (s, e)
560# define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)			\
561  if (debug) print_double_string (w, s1, sz1, s2, sz2)
562
563
564/* Print the fastmap in human-readable form.  */
565
566void
567print_fastmap (fastmap)
568    char *fastmap;
569{
570  unsigned was_a_range = 0;
571  unsigned i = 0;
572
573  while (i < (1 << BYTEWIDTH))
574    {
575      if (fastmap[i++])
576	{
577	  was_a_range = 0;
578          putchar (i - 1);
579          while (i < (1 << BYTEWIDTH)  &&  fastmap[i])
580            {
581              was_a_range = 1;
582              i++;
583            }
584	  if (was_a_range)
585            {
586              printf ("-");
587              putchar (i - 1);
588            }
589        }
590    }
591  putchar ('\n');
592}
593
594
595/* Print a compiled pattern string in human-readable form, starting at
596   the START pointer into it and ending just before the pointer END.  */
597
598void
599print_partial_compiled_pattern (start, end)
600    unsigned char *start;
601    unsigned char *end;
602{
603  int mcnt, mcnt2;
604  unsigned char *p1;
605  unsigned char *p = start;
606  unsigned char *pend = end;
607
608  if (start == NULL)
609    {
610      printf ("(null)\n");
611      return;
612    }
613
614  /* Loop over pattern commands.  */
615  while (p < pend)
616    {
617      printf ("%d:\t", p - start);
618
619      switch ((re_opcode_t) *p++)
620	{
621        case no_op:
622          printf ("/no_op");
623          break;
624
625	case exactn:
626	  mcnt = *p++;
627          printf ("/exactn/%d", mcnt);
628          do
629	    {
630              putchar ('/');
631	      putchar (*p++);
632            }
633          while (--mcnt);
634          break;
635
636	case start_memory:
637          mcnt = *p++;
638          printf ("/start_memory/%d/%d", mcnt, *p++);
639          break;
640
641	case stop_memory:
642          mcnt = *p++;
643	  printf ("/stop_memory/%d/%d", mcnt, *p++);
644          break;
645
646	case duplicate:
647	  printf ("/duplicate/%d", *p++);
648	  break;
649
650	case anychar:
651	  printf ("/anychar");
652	  break;
653
654	case charset:
655        case charset_not:
656          {
657            register int c, last = -100;
658	    register int in_range = 0;
659
660	    printf ("/charset [%s",
661	            (re_opcode_t) *(p - 1) == charset_not ? "^" : "");
662
663            assert (p + *p < pend);
664
665            for (c = 0; c < 256; c++)
666	      if (c / 8 < *p
667		  && (p[1 + (c/8)] & (1 << (c % 8))))
668		{
669		  /* Are we starting a range?  */
670		  if (last + 1 == c && ! in_range)
671		    {
672		      putchar ('-');
673		      in_range = 1;
674		    }
675		  /* Have we broken a range?  */
676		  else if (last + 1 != c && in_range)
677              {
678		      putchar (last);
679		      in_range = 0;
680		    }
681
682		  if (! in_range)
683		    putchar (c);
684
685		  last = c;
686              }
687
688	    if (in_range)
689	      putchar (last);
690
691	    putchar (']');
692
693	    p += 1 + *p;
694	  }
695	  break;
696
697	case begline:
698	  printf ("/begline");
699          break;
700
701	case endline:
702          printf ("/endline");
703          break;
704
705	case on_failure_jump:
706          extract_number_and_incr (&mcnt, &p);
707  	  printf ("/on_failure_jump to %d", p + mcnt - start);
708          break;
709
710	case on_failure_keep_string_jump:
711          extract_number_and_incr (&mcnt, &p);
712  	  printf ("/on_failure_keep_string_jump to %d", p + mcnt - start);
713          break;
714
715	case dummy_failure_jump:
716          extract_number_and_incr (&mcnt, &p);
717  	  printf ("/dummy_failure_jump to %d", p + mcnt - start);
718          break;
719
720	case push_dummy_failure:
721          printf ("/push_dummy_failure");
722          break;
723
724        case maybe_pop_jump:
725          extract_number_and_incr (&mcnt, &p);
726  	  printf ("/maybe_pop_jump to %d", p + mcnt - start);
727	  break;
728
729        case pop_failure_jump:
730	  extract_number_and_incr (&mcnt, &p);
731  	  printf ("/pop_failure_jump to %d", p + mcnt - start);
732	  break;
733
734        case jump_past_alt:
735	  extract_number_and_incr (&mcnt, &p);
736  	  printf ("/jump_past_alt to %d", p + mcnt - start);
737	  break;
738
739        case jump:
740	  extract_number_and_incr (&mcnt, &p);
741  	  printf ("/jump to %d", p + mcnt - start);
742	  break;
743
744        case succeed_n:
745          extract_number_and_incr (&mcnt, &p);
746	  p1 = p + mcnt;
747          extract_number_and_incr (&mcnt2, &p);
748	  printf ("/succeed_n to %d, %d times", p1 - start, mcnt2);
749          break;
750
751        case jump_n:
752          extract_number_and_incr (&mcnt, &p);
753	  p1 = p + mcnt;
754          extract_number_and_incr (&mcnt2, &p);
755	  printf ("/jump_n to %d, %d times", p1 - start, mcnt2);
756          break;
757
758        case set_number_at:
759          extract_number_and_incr (&mcnt, &p);
760	  p1 = p + mcnt;
761          extract_number_and_incr (&mcnt2, &p);
762	  printf ("/set_number_at location %d to %d", p1 - start, mcnt2);
763          break;
764
765        case wordbound:
766	  printf ("/wordbound");
767	  break;
768
769	case notwordbound:
770	  printf ("/notwordbound");
771          break;
772
773	case wordbeg:
774	  printf ("/wordbeg");
775	  break;
776
777	case wordend:
778	  printf ("/wordend");
779
780# ifdef emacs
781	case before_dot:
782	  printf ("/before_dot");
783          break;
784
785	case at_dot:
786	  printf ("/at_dot");
787          break;
788
789	case after_dot:
790	  printf ("/after_dot");
791          break;
792
793	case syntaxspec:
794          printf ("/syntaxspec");
795	  mcnt = *p++;
796	  printf ("/%d", mcnt);
797          break;
798
799	case notsyntaxspec:
800          printf ("/notsyntaxspec");
801	  mcnt = *p++;
802	  printf ("/%d", mcnt);
803	  break;
804# endif /* emacs */
805
806	case wordchar:
807	  printf ("/wordchar");
808          break;
809
810	case notwordchar:
811	  printf ("/notwordchar");
812          break;
813
814	case begbuf:
815	  printf ("/begbuf");
816          break;
817
818	case endbuf:
819	  printf ("/endbuf");
820          break;
821
822        default:
823          printf ("?%d", *(p-1));
824	}
825
826      putchar ('\n');
827    }
828
829  printf ("%d:\tend of pattern.\n", p - start);
830}
831
832
833void
834print_compiled_pattern (bufp)
835    struct re_pattern_buffer *bufp;
836{
837  unsigned char *buffer = bufp->buffer;
838
839  print_partial_compiled_pattern (buffer, buffer + bufp->used);
840  printf ("%ld bytes used/%ld bytes allocated.\n",
841	  bufp->used, bufp->allocated);
842
843  if (bufp->fastmap_accurate && bufp->fastmap)
844    {
845      printf ("fastmap: ");
846      print_fastmap (bufp->fastmap);
847    }
848
849  printf ("re_nsub: %d\t", bufp->re_nsub);
850  printf ("regs_alloc: %d\t", bufp->regs_allocated);
851  printf ("can_be_null: %d\t", bufp->can_be_null);
852  printf ("newline_anchor: %d\n", bufp->newline_anchor);
853  printf ("no_sub: %d\t", bufp->no_sub);
854  printf ("not_bol: %d\t", bufp->not_bol);
855  printf ("not_eol: %d\t", bufp->not_eol);
856  printf ("syntax: %lx\n", bufp->syntax);
857  /* Perhaps we should print the translate table?  */
858}
859
860
861void
862print_double_string (where, string1, size1, string2, size2)
863    const char *where;
864    const char *string1;
865    const char *string2;
866    int size1;
867    int size2;
868{
869  int this_char;
870
871  if (where == NULL)
872    printf ("(null)");
873  else
874    {
875      if (FIRST_STRING_P (where))
876        {
877          for (this_char = where - string1; this_char < size1; this_char++)
878            putchar (string1[this_char]);
879
880          where = string2;
881        }
882
883      for (this_char = where - string2; this_char < size2; this_char++)
884        putchar (string2[this_char]);
885    }
886}
887
888void
889printchar (c)
890     int c;
891{
892  putc (c, stderr);
893}
894
895#else /* not DEBUG */
896
897# undef assert
898# define assert(e)
899
900# define DEBUG_STATEMENT(e)
901# define DEBUG_PRINT1(x)
902# define DEBUG_PRINT2(x1, x2)
903# define DEBUG_PRINT3(x1, x2, x3)
904# define DEBUG_PRINT4(x1, x2, x3, x4)
905# define DEBUG_PRINT_COMPILED_PATTERN(p, s, e)
906# define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)
907
908#endif /* not DEBUG */
909
910/* Set by `re_set_syntax' to the current regexp syntax to recognize.  Can
911   also be assigned to arbitrarily: each pattern buffer stores its own
912   syntax, so it can be changed between regex compilations.  */
913/* This has no initializer because initialized variables in Emacs
914   become read-only after dumping.  */
915reg_syntax_t re_syntax_options;
916
917
918/* Specify the precise syntax of regexps for compilation.  This provides
919   for compatibility for various utilities which historically have
920   different, incompatible syntaxes.
921
922   The argument SYNTAX is a bit mask comprised of the various bits
923   defined in gnu-regex.h.  We return the old syntax.  */
924
925reg_syntax_t
926re_set_syntax (syntax)
927    reg_syntax_t syntax;
928{
929  reg_syntax_t ret = re_syntax_options;
930
931  re_syntax_options = syntax;
932#ifdef DEBUG
933  if (syntax & RE_DEBUG)
934    debug = 1;
935  else if (debug) /* was on but now is not */
936    debug = 0;
937#endif /* DEBUG */
938  return ret;
939}
940#ifdef _LIBC
941weak_alias (__re_set_syntax, re_set_syntax)
942#endif
943
944/* This table gives an error message for each of the error codes listed
945   in gnu-regex.h.  Obviously the order here has to be same as there.
946   POSIX doesn't require that we do anything for REG_NOERROR,
947   but why not be nice?  */
948
949static const char *const re_error_msgid[] =
950  {
951    gettext_noop ("Success"),	/* REG_NOERROR */
952    gettext_noop ("No match"),	/* REG_NOMATCH */
953    gettext_noop ("Invalid regular expression"), /* REG_BADPAT */
954    gettext_noop ("Invalid collation character"), /* REG_ECOLLATE */
955    gettext_noop ("Invalid character class name"), /* REG_ECTYPE */
956    gettext_noop ("Trailing backslash"), /* REG_EESCAPE */
957    gettext_noop ("Invalid back reference"), /* REG_ESUBREG */
958    gettext_noop ("Unmatched [ or [^"),	/* REG_EBRACK */
959    gettext_noop ("Unmatched ( or \\("), /* REG_EPAREN */
960    gettext_noop ("Unmatched \\{"), /* REG_EBRACE */
961    gettext_noop ("Invalid content of \\{\\}"), /* REG_BADBR */
962    gettext_noop ("Invalid range end"),	/* REG_ERANGE */
963    gettext_noop ("Memory exhausted"), /* REG_ESPACE */
964    gettext_noop ("Invalid preceding regular expression"), /* REG_BADRPT */
965    gettext_noop ("Premature end of regular expression"), /* REG_EEND */
966    gettext_noop ("Regular expression too big"), /* REG_ESIZE */
967    gettext_noop ("Unmatched ) or \\)"), /* REG_ERPAREN */
968  };
969
970/* Avoiding alloca during matching, to placate r_alloc.  */
971
972/* Define MATCH_MAY_ALLOCATE unless we need to make sure that the
973   searching and matching functions should not call alloca.  On some
974   systems, alloca is implemented in terms of malloc, and if we're
975   using the relocating allocator routines, then malloc could cause a
976   relocation, which might (if the strings being searched are in the
977   ralloc heap) shift the data out from underneath the regexp
978   routines.
979
980   Here's another reason to avoid allocation: Emacs
981   processes input from X in a signal handler; processing X input may
982   call malloc; if input arrives while a matching routine is calling
983   malloc, then we're scrod.  But Emacs can't just block input while
984   calling matching routines; then we don't notice interrupts when
985   they come in.  So, Emacs blocks input around all regexp calls
986   except the matching calls, which it leaves unprotected, in the
987   faith that they will not malloc.  */
988
989/* Normally, this is fine.  */
990#define MATCH_MAY_ALLOCATE
991
992/* When using GNU C, we are not REALLY using the C alloca, no matter
993   what config.h may say.  So don't take precautions for it.  */
994#ifdef __GNUC__
995# undef C_ALLOCA
996#endif
997
998/* The match routines may not allocate if (1) they would do it with malloc
999   and (2) it's not safe for them to use malloc.
1000   Note that if REL_ALLOC is defined, matching would not use malloc for the
1001   failure stack, but we would still use it for the register vectors;
1002   so REL_ALLOC should not affect this.  */
1003#if (defined C_ALLOCA || defined REGEX_MALLOC) && defined emacs
1004# undef MATCH_MAY_ALLOCATE
1005#endif
1006
1007
1008/* Failure stack declarations and macros; both re_compile_fastmap and
1009   re_match_2 use a failure stack.  These have to be macros because of
1010   REGEX_ALLOCATE_STACK.  */
1011
1012
1013/* Number of failure points for which to initially allocate space
1014   when matching.  If this number is exceeded, we allocate more
1015   space, so it is not a hard limit.  */
1016#ifndef INIT_FAILURE_ALLOC
1017# define INIT_FAILURE_ALLOC 5
1018#endif
1019
1020/* Roughly the maximum number of failure points on the stack.  Would be
1021   exactly that if always used MAX_FAILURE_ITEMS items each time we failed.
1022   This is a variable only so users of regex can assign to it; we never
1023   change it ourselves.  */
1024
1025#ifdef INT_IS_16BIT
1026
1027# if defined MATCH_MAY_ALLOCATE
1028/* 4400 was enough to cause a crash on Alpha OSF/1,
1029   whose default stack limit is 2mb.  */
1030long int re_max_failures = 4000;
1031# else
1032long int re_max_failures = 2000;
1033# endif
1034
1035union fail_stack_elt
1036{
1037  unsigned char *pointer;
1038  long int integer;
1039};
1040
1041typedef union fail_stack_elt fail_stack_elt_t;
1042
1043typedef struct
1044{
1045  fail_stack_elt_t *stack;
1046  unsigned long int size;
1047  unsigned long int avail;		/* Offset of next open position.  */
1048} fail_stack_type;
1049
1050#else /* not INT_IS_16BIT */
1051
1052# if defined MATCH_MAY_ALLOCATE
1053/* 4400 was enough to cause a crash on Alpha OSF/1,
1054   whose default stack limit is 2mb.  */
1055int re_max_failures = 20000;
1056# else
1057int re_max_failures = 2000;
1058# endif
1059
1060union fail_stack_elt
1061{
1062  unsigned char *pointer;
1063  int integer;
1064};
1065
1066typedef union fail_stack_elt fail_stack_elt_t;
1067
1068typedef struct
1069{
1070  fail_stack_elt_t *stack;
1071  unsigned size;
1072  unsigned avail;			/* Offset of next open position.  */
1073} fail_stack_type;
1074
1075#endif /* INT_IS_16BIT */
1076
1077#define FAIL_STACK_EMPTY()     (fail_stack.avail == 0)
1078#define FAIL_STACK_PTR_EMPTY() (fail_stack_ptr->avail == 0)
1079#define FAIL_STACK_FULL()      (fail_stack.avail == fail_stack.size)
1080
1081
1082/* Define macros to initialize and free the failure stack.
1083   Do `return -2' if the alloc fails.  */
1084
1085#ifdef MATCH_MAY_ALLOCATE
1086# define INIT_FAIL_STACK()						\
1087  do {									\
1088    fail_stack.stack = (fail_stack_elt_t *)				\
1089      REGEX_ALLOCATE_STACK (INIT_FAILURE_ALLOC * sizeof (fail_stack_elt_t)); \
1090									\
1091    if (fail_stack.stack == NULL)					\
1092      return -2;							\
1093									\
1094    fail_stack.size = INIT_FAILURE_ALLOC;				\
1095    fail_stack.avail = 0;						\
1096  } while (0)
1097
1098# define RESET_FAIL_STACK()  REGEX_FREE_STACK (fail_stack.stack)
1099#else
1100# define INIT_FAIL_STACK()						\
1101  do {									\
1102    fail_stack.avail = 0;						\
1103  } while (0)
1104
1105# define RESET_FAIL_STACK()
1106#endif
1107
1108
1109/* Double the size of FAIL_STACK, up to approximately `re_max_failures' items.
1110
1111   Return 1 if succeeds, and 0 if either ran out of memory
1112   allocating space for it or it was already too large.
1113
1114   REGEX_REALLOCATE_STACK requires `destination' be declared.   */
1115
1116#define DOUBLE_FAIL_STACK(fail_stack)					\
1117  ((fail_stack).size > (unsigned) (re_max_failures * MAX_FAILURE_ITEMS)	\
1118   ? 0									\
1119   : ((fail_stack).stack = (fail_stack_elt_t *)				\
1120        REGEX_REALLOCATE_STACK ((fail_stack).stack, 			\
1121          (fail_stack).size * sizeof (fail_stack_elt_t),		\
1122          ((fail_stack).size << 1) * sizeof (fail_stack_elt_t)),	\
1123									\
1124      (fail_stack).stack == NULL					\
1125      ? 0								\
1126      : ((fail_stack).size <<= 1, 					\
1127         1)))
1128
1129
1130/* Push pointer POINTER on FAIL_STACK.
1131   Return 1 if was able to do so and 0 if ran out of memory allocating
1132   space to do so.  */
1133#define PUSH_PATTERN_OP(POINTER, FAIL_STACK)				\
1134  ((FAIL_STACK_FULL ()							\
1135    && !DOUBLE_FAIL_STACK (FAIL_STACK))					\
1136   ? 0									\
1137   : ((FAIL_STACK).stack[(FAIL_STACK).avail++].pointer = POINTER,	\
1138      1))
1139
1140/* Push a pointer value onto the failure stack.
1141   Assumes the variable `fail_stack'.  Probably should only
1142   be called from within `PUSH_FAILURE_POINT'.  */
1143#define PUSH_FAILURE_POINTER(item)					\
1144  fail_stack.stack[fail_stack.avail++].pointer = (unsigned char *) (item)
1145
1146/* This pushes an integer-valued item onto the failure stack.
1147   Assumes the variable `fail_stack'.  Probably should only
1148   be called from within `PUSH_FAILURE_POINT'.  */
1149#define PUSH_FAILURE_INT(item)					\
1150  fail_stack.stack[fail_stack.avail++].integer = (item)
1151
1152/* Push a fail_stack_elt_t value onto the failure stack.
1153   Assumes the variable `fail_stack'.  Probably should only
1154   be called from within `PUSH_FAILURE_POINT'.  */
1155#define PUSH_FAILURE_ELT(item)					\
1156  fail_stack.stack[fail_stack.avail++] =  (item)
1157
1158/* These three POP... operations complement the three PUSH... operations.
1159   All assume that `fail_stack' is nonempty.  */
1160#define POP_FAILURE_POINTER() fail_stack.stack[--fail_stack.avail].pointer
1161#define POP_FAILURE_INT() fail_stack.stack[--fail_stack.avail].integer
1162#define POP_FAILURE_ELT() fail_stack.stack[--fail_stack.avail]
1163
1164/* Used to omit pushing failure point id's when we're not debugging.  */
1165#ifdef DEBUG
1166# define DEBUG_PUSH PUSH_FAILURE_INT
1167# define DEBUG_POP(item_addr) *(item_addr) = POP_FAILURE_INT ()
1168#else
1169# define DEBUG_PUSH(item)
1170# define DEBUG_POP(item_addr)
1171#endif
1172
1173
1174/* Push the information about the state we will need
1175   if we ever fail back to it.
1176
1177   Requires variables fail_stack, regstart, regend, reg_info, and
1178   num_regs_pushed be declared.  DOUBLE_FAIL_STACK requires `destination'
1179   be declared.
1180
1181   Does `return FAILURE_CODE' if runs out of memory.  */
1182
1183#define PUSH_FAILURE_POINT(pattern_place, string_place, failure_code)	\
1184  do {									\
1185    char *destination;							\
1186    /* Must be int, so when we don't save any registers, the arithmetic	\
1187       of 0 + -1 isn't done as unsigned.  */				\
1188    /* Can't be int, since there is not a shred of a guarantee that int	\
1189       is wide enough to hold a value of something to which pointer can	\
1190       be assigned */							\
1191    active_reg_t this_reg;						\
1192    									\
1193    DEBUG_STATEMENT (failure_id++);					\
1194    DEBUG_STATEMENT (nfailure_points_pushed++);				\
1195    DEBUG_PRINT2 ("\nPUSH_FAILURE_POINT #%u:\n", failure_id);		\
1196    DEBUG_PRINT2 ("  Before push, next avail: %d\n", (fail_stack).avail);\
1197    DEBUG_PRINT2 ("                     size: %d\n", (fail_stack).size);\
1198									\
1199    DEBUG_PRINT2 ("  slots needed: %ld\n", NUM_FAILURE_ITEMS);		\
1200    DEBUG_PRINT2 ("     available: %d\n", REMAINING_AVAIL_SLOTS);	\
1201									\
1202    /* Ensure we have enough space allocated for what we will push.  */	\
1203    while (REMAINING_AVAIL_SLOTS < NUM_FAILURE_ITEMS)			\
1204      {									\
1205        if (!DOUBLE_FAIL_STACK (fail_stack))				\
1206          return failure_code;						\
1207									\
1208        DEBUG_PRINT2 ("\n  Doubled stack; size now: %d\n",		\
1209		       (fail_stack).size);				\
1210        DEBUG_PRINT2 ("  slots available: %d\n", REMAINING_AVAIL_SLOTS);\
1211      }									\
1212									\
1213    /* Push the info, starting with the registers.  */			\
1214    DEBUG_PRINT1 ("\n");						\
1215									\
1216    if (1)								\
1217      for (this_reg = lowest_active_reg; this_reg <= highest_active_reg; \
1218	   this_reg++)							\
1219	{								\
1220	  DEBUG_PRINT2 ("  Pushing reg: %lu\n", this_reg);		\
1221	  DEBUG_STATEMENT (num_regs_pushed++);				\
1222									\
1223	  DEBUG_PRINT2 ("    start: %p\n", regstart[this_reg]);		\
1224	  PUSH_FAILURE_POINTER (regstart[this_reg]);			\
1225									\
1226	  DEBUG_PRINT2 ("    end: %p\n", regend[this_reg]);		\
1227	  PUSH_FAILURE_POINTER (regend[this_reg]);			\
1228									\
1229	  DEBUG_PRINT2 ("    info: %p\n      ",				\
1230			reg_info[this_reg].word.pointer);		\
1231	  DEBUG_PRINT2 (" match_null=%d",				\
1232			REG_MATCH_NULL_STRING_P (reg_info[this_reg]));	\
1233	  DEBUG_PRINT2 (" active=%d", IS_ACTIVE (reg_info[this_reg]));	\
1234	  DEBUG_PRINT2 (" matched_something=%d",			\
1235			MATCHED_SOMETHING (reg_info[this_reg]));	\
1236	  DEBUG_PRINT2 (" ever_matched=%d",				\
1237			EVER_MATCHED_SOMETHING (reg_info[this_reg]));	\
1238	  DEBUG_PRINT1 ("\n");						\
1239	  PUSH_FAILURE_ELT (reg_info[this_reg].word);			\
1240	}								\
1241									\
1242    DEBUG_PRINT2 ("  Pushing  low active reg: %ld\n", lowest_active_reg);\
1243    PUSH_FAILURE_INT (lowest_active_reg);				\
1244									\
1245    DEBUG_PRINT2 ("  Pushing high active reg: %ld\n", highest_active_reg);\
1246    PUSH_FAILURE_INT (highest_active_reg);				\
1247									\
1248    DEBUG_PRINT2 ("  Pushing pattern %p:\n", pattern_place);		\
1249    DEBUG_PRINT_COMPILED_PATTERN (bufp, pattern_place, pend);		\
1250    PUSH_FAILURE_POINTER (pattern_place);				\
1251									\
1252    DEBUG_PRINT2 ("  Pushing string %p: `", string_place);		\
1253    DEBUG_PRINT_DOUBLE_STRING (string_place, string1, size1, string2,   \
1254				 size2);				\
1255    DEBUG_PRINT1 ("'\n");						\
1256    PUSH_FAILURE_POINTER (string_place);				\
1257									\
1258    DEBUG_PRINT2 ("  Pushing failure id: %u\n", failure_id);		\
1259    DEBUG_PUSH (failure_id);						\
1260  } while (0)
1261
1262/* This is the number of items that are pushed and popped on the stack
1263   for each register.  */
1264#define NUM_REG_ITEMS  3
1265
1266/* Individual items aside from the registers.  */
1267#ifdef DEBUG
1268# define NUM_NONREG_ITEMS 5 /* Includes failure point id.  */
1269#else
1270# define NUM_NONREG_ITEMS 4
1271#endif
1272
1273/* We push at most this many items on the stack.  */
1274/* We used to use (num_regs - 1), which is the number of registers
1275   this regexp will save; but that was changed to 5
1276   to avoid stack overflow for a regexp with lots of parens.  */
1277#define MAX_FAILURE_ITEMS (5 * NUM_REG_ITEMS + NUM_NONREG_ITEMS)
1278
1279/* We actually push this many items.  */
1280#define NUM_FAILURE_ITEMS				\
1281  (((0							\
1282     ? 0 : highest_active_reg - lowest_active_reg + 1)	\
1283    * NUM_REG_ITEMS)					\
1284   + NUM_NONREG_ITEMS)
1285
1286/* How many items can still be added to the stack without overflowing it.  */
1287#define REMAINING_AVAIL_SLOTS ((fail_stack).size - (fail_stack).avail)
1288
1289
1290/* Pops what PUSH_FAIL_STACK pushes.
1291
1292   We restore into the parameters, all of which should be lvalues:
1293     STR -- the saved data position.
1294     PAT -- the saved pattern position.
1295     LOW_REG, HIGH_REG -- the highest and lowest active registers.
1296     REGSTART, REGEND -- arrays of string positions.
1297     REG_INFO -- array of information about each subexpression.
1298
1299   Also assumes the variables `fail_stack' and (if debugging), `bufp',
1300   `pend', `string1', `size1', `string2', and `size2'.  */
1301
1302#define POP_FAILURE_POINT(str, pat, low_reg, high_reg, regstart, regend, reg_info)\
1303{									\
1304  DEBUG_STATEMENT (unsigned failure_id;)				\
1305  active_reg_t this_reg;						\
1306  const unsigned char *string_temp;					\
1307									\
1308  assert (!FAIL_STACK_EMPTY ());					\
1309									\
1310  /* Remove failure points and point to how many regs pushed.  */	\
1311  DEBUG_PRINT1 ("POP_FAILURE_POINT:\n");				\
1312  DEBUG_PRINT2 ("  Before pop, next avail: %d\n", fail_stack.avail);	\
1313  DEBUG_PRINT2 ("                    size: %d\n", fail_stack.size);	\
1314									\
1315  assert (fail_stack.avail >= NUM_NONREG_ITEMS);			\
1316									\
1317  DEBUG_POP (&failure_id);						\
1318  DEBUG_PRINT2 ("  Popping failure id: %u\n", failure_id);		\
1319									\
1320  /* If the saved string location is NULL, it came from an		\
1321     on_failure_keep_string_jump opcode, and we want to throw away the	\
1322     saved NULL, thus retaining our current position in the string.  */	\
1323  string_temp = POP_FAILURE_POINTER ();					\
1324  if (string_temp != NULL)						\
1325    str = (const char *) string_temp;					\
1326									\
1327  DEBUG_PRINT2 ("  Popping string %p: `", str);				\
1328  DEBUG_PRINT_DOUBLE_STRING (str, string1, size1, string2, size2);	\
1329  DEBUG_PRINT1 ("'\n");							\
1330									\
1331  pat = (unsigned char *) POP_FAILURE_POINTER ();			\
1332  DEBUG_PRINT2 ("  Popping pattern %p:\n", pat);			\
1333  DEBUG_PRINT_COMPILED_PATTERN (bufp, pat, pend);			\
1334									\
1335  /* Restore register info.  */						\
1336  high_reg = (active_reg_t) POP_FAILURE_INT ();				\
1337  DEBUG_PRINT2 ("  Popping high active reg: %ld\n", high_reg);		\
1338									\
1339  low_reg = (active_reg_t) POP_FAILURE_INT ();				\
1340  DEBUG_PRINT2 ("  Popping  low active reg: %ld\n", low_reg);		\
1341									\
1342  if (1)								\
1343    for (this_reg = high_reg; this_reg >= low_reg; this_reg--)		\
1344      {									\
1345	DEBUG_PRINT2 ("    Popping reg: %ld\n", this_reg);		\
1346									\
1347	reg_info[this_reg].word = POP_FAILURE_ELT ();			\
1348	DEBUG_PRINT2 ("      info: %p\n",				\
1349		      reg_info[this_reg].word.pointer);			\
1350									\
1351	regend[this_reg] = (const char *) POP_FAILURE_POINTER ();	\
1352	DEBUG_PRINT2 ("      end: %p\n", regend[this_reg]);		\
1353									\
1354	regstart[this_reg] = (const char *) POP_FAILURE_POINTER ();	\
1355	DEBUG_PRINT2 ("      start: %p\n", regstart[this_reg]);		\
1356      }									\
1357  else									\
1358    {									\
1359      for (this_reg = highest_active_reg; this_reg > high_reg; this_reg--) \
1360	{								\
1361	  reg_info[this_reg].word.integer = 0;				\
1362	  regend[this_reg] = 0;						\
1363	  regstart[this_reg] = 0;					\
1364	}								\
1365      highest_active_reg = high_reg;					\
1366    }									\
1367									\
1368  set_regs_matched_done = 0;						\
1369  DEBUG_STATEMENT (nfailure_points_popped++);				\
1370} /* POP_FAILURE_POINT */
1371
1372
1373
1374/* Structure for per-register (a.k.a. per-group) information.
1375   Other register information, such as the
1376   starting and ending positions (which are addresses), and the list of
1377   inner groups (which is a bits list) are maintained in separate
1378   variables.
1379
1380   We are making a (strictly speaking) nonportable assumption here: that
1381   the compiler will pack our bit fields into something that fits into
1382   the type of `word', i.e., is something that fits into one item on the
1383   failure stack.  */
1384
1385
1386/* Declarations and macros for re_match_2.  */
1387
1388typedef union
1389{
1390  fail_stack_elt_t word;
1391  struct
1392  {
1393      /* This field is one if this group can match the empty string,
1394         zero if not.  If not yet determined,  `MATCH_NULL_UNSET_VALUE'.  */
1395#define MATCH_NULL_UNSET_VALUE 3
1396    unsigned match_null_string_p : 2;
1397    unsigned is_active : 1;
1398    unsigned matched_something : 1;
1399    unsigned ever_matched_something : 1;
1400  } bits;
1401} register_info_type;
1402
1403#define REG_MATCH_NULL_STRING_P(R)  ((R).bits.match_null_string_p)
1404#define IS_ACTIVE(R)  ((R).bits.is_active)
1405#define MATCHED_SOMETHING(R)  ((R).bits.matched_something)
1406#define EVER_MATCHED_SOMETHING(R)  ((R).bits.ever_matched_something)
1407
1408
1409/* Call this when have matched a real character; it sets `matched' flags
1410   for the subexpressions which we are currently inside.  Also records
1411   that those subexprs have matched.  */
1412#define SET_REGS_MATCHED()						\
1413  do									\
1414    {									\
1415      if (!set_regs_matched_done)					\
1416	{								\
1417	  active_reg_t r;						\
1418	  set_regs_matched_done = 1;					\
1419	  for (r = lowest_active_reg; r <= highest_active_reg; r++)	\
1420	    {								\
1421	      MATCHED_SOMETHING (reg_info[r])				\
1422		= EVER_MATCHED_SOMETHING (reg_info[r])			\
1423		= 1;							\
1424	    }								\
1425	}								\
1426    }									\
1427  while (0)
1428
1429/* Registers are set to a sentinel when they haven't yet matched.  */
1430static char reg_unset_dummy;
1431#define REG_UNSET_VALUE (&reg_unset_dummy)
1432#define REG_UNSET(e) ((e) == REG_UNSET_VALUE)
1433
1434/* Subroutine declarations and macros for regex_compile.  */
1435
1436static reg_errcode_t regex_compile _RE_ARGS ((const char *pattern, size_t size,
1437					      reg_syntax_t syntax,
1438					      struct re_pattern_buffer *bufp));
1439static void store_op1 _RE_ARGS ((re_opcode_t op, unsigned char *loc, int arg));
1440static void store_op2 _RE_ARGS ((re_opcode_t op, unsigned char *loc,
1441				 int arg1, int arg2));
1442static void insert_op1 _RE_ARGS ((re_opcode_t op, unsigned char *loc,
1443				  int arg, unsigned char *end));
1444static void insert_op2 _RE_ARGS ((re_opcode_t op, unsigned char *loc,
1445				  int arg1, int arg2, unsigned char *end));
1446static boolean at_begline_loc_p _RE_ARGS ((const char *pattern, const char *p,
1447					   reg_syntax_t syntax));
1448static boolean at_endline_loc_p _RE_ARGS ((const char *p, const char *pend,
1449					   reg_syntax_t syntax));
1450static reg_errcode_t compile_range _RE_ARGS ((const char **p_ptr,
1451					      const char *pend,
1452					      char *translate,
1453					      reg_syntax_t syntax,
1454					      unsigned char *b));
1455
1456/* Fetch the next character in the uncompiled pattern---translating it
1457   if necessary.  Also cast from a signed character in the constant
1458   string passed to us by the user to an unsigned char that we can use
1459   as an array index (in, e.g., `translate').  */
1460#ifndef PATFETCH
1461# define PATFETCH(c)							\
1462  do {if (p == pend) return REG_EEND;					\
1463    c = (unsigned char) *p++;						\
1464    if (translate) c = (unsigned char) translate[c];			\
1465  } while (0)
1466#endif
1467
1468/* Fetch the next character in the uncompiled pattern, with no
1469   translation.  */
1470#define PATFETCH_RAW(c)							\
1471  do {if (p == pend) return REG_EEND;					\
1472    c = (unsigned char) *p++; 						\
1473  } while (0)
1474
1475/* Go backwards one character in the pattern.  */
1476#define PATUNFETCH p--
1477
1478
1479/* If `translate' is non-null, return translate[D], else just D.  We
1480   cast the subscript to translate because some data is declared as
1481   `char *', to avoid warnings when a string constant is passed.  But
1482   when we use a character as a subscript we must make it unsigned.  */
1483#ifndef TRANSLATE
1484# define TRANSLATE(d) \
1485  (translate ? (char) translate[(unsigned char) (d)] : (d))
1486#endif
1487
1488
1489/* Macros for outputting the compiled pattern into `buffer'.  */
1490
1491/* If the buffer isn't allocated when it comes in, use this.  */
1492#define INIT_BUF_SIZE  32
1493
1494/* Make sure we have at least N more bytes of space in buffer.  */
1495#define GET_BUFFER_SPACE(n)						\
1496    while ((unsigned long) (b - bufp->buffer + (n)) > bufp->allocated)	\
1497      EXTEND_BUFFER ()
1498
1499/* Make sure we have one more byte of buffer space and then add C to it.  */
1500#define BUF_PUSH(c)							\
1501  do {									\
1502    GET_BUFFER_SPACE (1);						\
1503    *b++ = (unsigned char) (c);						\
1504  } while (0)
1505
1506
1507/* Ensure we have two more bytes of buffer space and then append C1 and C2.  */
1508#define BUF_PUSH_2(c1, c2)						\
1509  do {									\
1510    GET_BUFFER_SPACE (2);						\
1511    *b++ = (unsigned char) (c1);					\
1512    *b++ = (unsigned char) (c2);					\
1513  } while (0)
1514
1515
1516/* As with BUF_PUSH_2, except for three bytes.  */
1517#define BUF_PUSH_3(c1, c2, c3)						\
1518  do {									\
1519    GET_BUFFER_SPACE (3);						\
1520    *b++ = (unsigned char) (c1);					\
1521    *b++ = (unsigned char) (c2);					\
1522    *b++ = (unsigned char) (c3);					\
1523  } while (0)
1524
1525
1526/* Store a jump with opcode OP at LOC to location TO.  We store a
1527   relative address offset by the three bytes the jump itself occupies.  */
1528#define STORE_JUMP(op, loc, to) \
1529  store_op1 (op, loc, (int) ((to) - (loc) - 3))
1530
1531/* Likewise, for a two-argument jump.  */
1532#define STORE_JUMP2(op, loc, to, arg) \
1533  store_op2 (op, loc, (int) ((to) - (loc) - 3), arg)
1534
1535/* Like `STORE_JUMP', but for inserting.  Assume `b' is the buffer end.  */
1536#define INSERT_JUMP(op, loc, to) \
1537  insert_op1 (op, loc, (int) ((to) - (loc) - 3), b)
1538
1539/* Like `STORE_JUMP2', but for inserting.  Assume `b' is the buffer end.  */
1540#define INSERT_JUMP2(op, loc, to, arg) \
1541  insert_op2 (op, loc, (int) ((to) - (loc) - 3), arg, b)
1542
1543
1544/* This is not an arbitrary limit: the arguments which represent offsets
1545   into the pattern are two bytes long.  So if 2^16 bytes turns out to
1546   be too small, many things would have to change.  */
1547/* Any other compiler which, like MSC, has allocation limit below 2^16
1548   bytes will have to use approach similar to what was done below for
1549   MSC and drop MAX_BUF_SIZE a bit.  Otherwise you may end up
1550   reallocating to 0 bytes.  Such thing is not going to work too well.
1551   You have been warned!!  */
1552#if defined _MSC_VER  && !defined WIN32
1553/* Microsoft C 16-bit versions limit malloc to approx 65512 bytes.
1554   The REALLOC define eliminates a flurry of conversion warnings,
1555   but is not required. */
1556# define MAX_BUF_SIZE  65500L
1557# define REALLOC(p,s) realloc ((p), (size_t) (s))
1558#else
1559# define MAX_BUF_SIZE (1L << 16)
1560# define REALLOC(p,s) realloc ((p), (s))
1561#endif
1562
1563/* Extend the buffer by twice its current size via realloc and
1564   reset the pointers that pointed into the old block to point to the
1565   correct places in the new one.  If extending the buffer results in it
1566   being larger than MAX_BUF_SIZE, then flag memory exhausted.  */
1567#define EXTEND_BUFFER()							\
1568  do { 									\
1569    unsigned char *old_buffer = bufp->buffer;				\
1570    if (bufp->allocated == MAX_BUF_SIZE) 				\
1571      return REG_ESIZE;							\
1572    bufp->allocated <<= 1;						\
1573    if (bufp->allocated > MAX_BUF_SIZE)					\
1574      bufp->allocated = MAX_BUF_SIZE; 					\
1575    bufp->buffer = (unsigned char *) REALLOC (bufp->buffer, bufp->allocated);\
1576    if (bufp->buffer == NULL)						\
1577      return REG_ESPACE;						\
1578    /* If the buffer moved, move all the pointers into it.  */		\
1579    if (old_buffer != bufp->buffer)					\
1580      {									\
1581        b = (b - old_buffer) + bufp->buffer;				\
1582        begalt = (begalt - old_buffer) + bufp->buffer;			\
1583        if (fixup_alt_jump)						\
1584          fixup_alt_jump = (fixup_alt_jump - old_buffer) + bufp->buffer;\
1585        if (laststart)							\
1586          laststart = (laststart - old_buffer) + bufp->buffer;		\
1587        if (pending_exact)						\
1588          pending_exact = (pending_exact - old_buffer) + bufp->buffer;	\
1589      }									\
1590  } while (0)
1591
1592
1593/* Since we have one byte reserved for the register number argument to
1594   {start,stop}_memory, the maximum number of groups we can report
1595   things about is what fits in that byte.  */
1596#define MAX_REGNUM 255
1597
1598/* But patterns can have more than `MAX_REGNUM' registers.  We just
1599   ignore the excess.  */
1600typedef unsigned regnum_t;
1601
1602
1603/* Macros for the compile stack.  */
1604
1605/* Since offsets can go either forwards or backwards, this type needs to
1606   be able to hold values from -(MAX_BUF_SIZE - 1) to MAX_BUF_SIZE - 1.  */
1607/* int may be not enough when sizeof(int) == 2.  */
1608typedef long pattern_offset_t;
1609
1610typedef struct
1611{
1612  pattern_offset_t begalt_offset;
1613  pattern_offset_t fixup_alt_jump;
1614  pattern_offset_t inner_group_offset;
1615  pattern_offset_t laststart_offset;
1616  regnum_t regnum;
1617} compile_stack_elt_t;
1618
1619
1620typedef struct
1621{
1622  compile_stack_elt_t *stack;
1623  unsigned size;
1624  unsigned avail;			/* Offset of next open position.  */
1625} compile_stack_type;
1626
1627
1628#define INIT_COMPILE_STACK_SIZE 32
1629
1630#define COMPILE_STACK_EMPTY  (compile_stack.avail == 0)
1631#define COMPILE_STACK_FULL  (compile_stack.avail == compile_stack.size)
1632
1633/* The next available element.  */
1634#define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail])
1635
1636
1637/* Set the bit for character C in a list.  */
1638#define SET_LIST_BIT(c)                               \
1639  (b[((unsigned char) (c)) / BYTEWIDTH]               \
1640   |= 1 << (((unsigned char) c) % BYTEWIDTH))
1641
1642
1643/* Get the next unsigned number in the uncompiled pattern.  */
1644#define GET_UNSIGNED_NUMBER(num) 					\
1645  { if (p != pend)							\
1646     {									\
1647       PATFETCH (c); 							\
1648       while (ISDIGIT (c)) 						\
1649         { 								\
1650           if (num < 0)							\
1651              num = 0;							\
1652           num = num * 10 + c - '0'; 					\
1653           if (p == pend) 						\
1654              break; 							\
1655           PATFETCH (c);						\
1656         } 								\
1657       } 								\
1658    }
1659
1660#if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H)
1661/* The GNU C library provides support for user-defined character classes
1662   and the functions from ISO C amendement 1.  */
1663# ifdef CHARCLASS_NAME_MAX
1664#  define CHAR_CLASS_MAX_LENGTH CHARCLASS_NAME_MAX
1665# else
1666/* This shouldn't happen but some implementation might still have this
1667   problem.  Use a reasonable default value.  */
1668#  define CHAR_CLASS_MAX_LENGTH 256
1669# endif
1670
1671# ifdef _LIBC
1672#  define IS_CHAR_CLASS(string) __wctype (string)
1673# else
1674#  define IS_CHAR_CLASS(string) wctype (string)
1675# endif
1676#else
1677# define CHAR_CLASS_MAX_LENGTH  6 /* Namely, `xdigit'.  */
1678
1679# define IS_CHAR_CLASS(string)						\
1680   (STREQ (string, "alpha") || STREQ (string, "upper")			\
1681    || STREQ (string, "lower") || STREQ (string, "digit")		\
1682    || STREQ (string, "alnum") || STREQ (string, "xdigit")		\
1683    || STREQ (string, "space") || STREQ (string, "print")		\
1684    || STREQ (string, "punct") || STREQ (string, "graph")		\
1685    || STREQ (string, "cntrl") || STREQ (string, "blank"))
1686#endif
1687
1688#ifndef MATCH_MAY_ALLOCATE
1689
1690/* If we cannot allocate large objects within re_match_2_internal,
1691   we make the fail stack and register vectors global.
1692   The fail stack, we grow to the maximum size when a regexp
1693   is compiled.
1694   The register vectors, we adjust in size each time we
1695   compile a regexp, according to the number of registers it needs.  */
1696
1697static fail_stack_type fail_stack;
1698
1699/* Size with which the following vectors are currently allocated.
1700   That is so we can make them bigger as needed,
1701   but never make them smaller.  */
1702static int regs_allocated_size;
1703
1704static const char **     regstart, **     regend;
1705static const char ** old_regstart, ** old_regend;
1706static const char **best_regstart, **best_regend;
1707static register_info_type *reg_info;
1708static const char **reg_dummy;
1709static register_info_type *reg_info_dummy;
1710
1711/* Make the register vectors big enough for NUM_REGS registers,
1712   but don't make them smaller.  */
1713
1714static
1715regex_grow_registers (num_regs)
1716     int num_regs;
1717{
1718  if (num_regs > regs_allocated_size)
1719    {
1720      RETALLOC_IF (regstart,	 num_regs, const char *);
1721      RETALLOC_IF (regend,	 num_regs, const char *);
1722      RETALLOC_IF (old_regstart, num_regs, const char *);
1723      RETALLOC_IF (old_regend,	 num_regs, const char *);
1724      RETALLOC_IF (best_regstart, num_regs, const char *);
1725      RETALLOC_IF (best_regend,	 num_regs, const char *);
1726      RETALLOC_IF (reg_info,	 num_regs, register_info_type);
1727      RETALLOC_IF (reg_dummy,	 num_regs, const char *);
1728      RETALLOC_IF (reg_info_dummy, num_regs, register_info_type);
1729
1730      regs_allocated_size = num_regs;
1731    }
1732}
1733
1734#endif /* not MATCH_MAY_ALLOCATE */
1735
1736static boolean group_in_compile_stack _RE_ARGS ((compile_stack_type
1737						 compile_stack,
1738						 regnum_t regnum));
1739
1740/* `regex_compile' compiles PATTERN (of length SIZE) according to SYNTAX.
1741   Returns one of error codes defined in `gnu-regex.h', or zero for success.
1742
1743   Assumes the `allocated' (and perhaps `buffer') and `translate'
1744   fields are set in BUFP on entry.
1745
1746   If it succeeds, results are put in BUFP (if it returns an error, the
1747   contents of BUFP are undefined):
1748     `buffer' is the compiled pattern;
1749     `syntax' is set to SYNTAX;
1750     `used' is set to the length of the compiled pattern;
1751     `fastmap_accurate' is zero;
1752     `re_nsub' is the number of subexpressions in PATTERN;
1753     `not_bol' and `not_eol' are zero;
1754
1755   The `fastmap' and `newline_anchor' fields are neither
1756   examined nor set.  */
1757
1758/* Return, freeing storage we allocated.  */
1759#define FREE_STACK_RETURN(value)		\
1760  return (free (compile_stack.stack), value)
1761
1762static reg_errcode_t
1763regex_compile (pattern, size, syntax, bufp)
1764     const char *pattern;
1765     size_t size;
1766     reg_syntax_t syntax;
1767     struct re_pattern_buffer *bufp;
1768{
1769  /* We fetch characters from PATTERN here.  Even though PATTERN is
1770     `char *' (i.e., signed), we declare these variables as unsigned, so
1771     they can be reliably used as array indices.  */
1772  register unsigned char c, c1;
1773
1774  /* A random temporary spot in PATTERN.  */
1775  const char *p1;
1776
1777  /* Points to the end of the buffer, where we should append.  */
1778  register unsigned char *b;
1779
1780  /* Keeps track of unclosed groups.  */
1781  compile_stack_type compile_stack;
1782
1783  /* Points to the current (ending) position in the pattern.  */
1784  const char *p = pattern;
1785  const char *pend = pattern + size;
1786
1787  /* How to translate the characters in the pattern.  */
1788  RE_TRANSLATE_TYPE translate = bufp->translate;
1789
1790  /* Address of the count-byte of the most recently inserted `exactn'
1791     command.  This makes it possible to tell if a new exact-match
1792     character can be added to that command or if the character requires
1793     a new `exactn' command.  */
1794  unsigned char *pending_exact = 0;
1795
1796  /* Address of start of the most recently finished expression.
1797     This tells, e.g., postfix * where to find the start of its
1798     operand.  Reset at the beginning of groups and alternatives.  */
1799  unsigned char *laststart = 0;
1800
1801  /* Address of beginning of regexp, or inside of last group.  */
1802  unsigned char *begalt;
1803
1804  /* Place in the uncompiled pattern (i.e., the {) to
1805     which to go back if the interval is invalid.  */
1806  const char *beg_interval;
1807
1808  /* Address of the place where a forward jump should go to the end of
1809     the containing expression.  Each alternative of an `or' -- except the
1810     last -- ends with a forward jump of this sort.  */
1811  unsigned char *fixup_alt_jump = 0;
1812
1813  /* Counts open-groups as they are encountered.  Remembered for the
1814     matching close-group on the compile stack, so the same register
1815     number is put in the stop_memory as the start_memory.  */
1816  regnum_t regnum = 0;
1817
1818#ifdef DEBUG
1819  DEBUG_PRINT1 ("\nCompiling pattern: ");
1820  if (debug)
1821    {
1822      unsigned debug_count;
1823
1824      for (debug_count = 0; debug_count < size; debug_count++)
1825        putchar (pattern[debug_count]);
1826      putchar ('\n');
1827    }
1828#endif /* DEBUG */
1829
1830  /* Initialize the compile stack.  */
1831  compile_stack.stack = TALLOC (INIT_COMPILE_STACK_SIZE, compile_stack_elt_t);
1832  if (compile_stack.stack == NULL)
1833    return REG_ESPACE;
1834
1835  compile_stack.size = INIT_COMPILE_STACK_SIZE;
1836  compile_stack.avail = 0;
1837
1838  /* Initialize the pattern buffer.  */
1839  bufp->syntax = syntax;
1840  bufp->fastmap_accurate = 0;
1841  bufp->not_bol = bufp->not_eol = 0;
1842
1843  /* Set `used' to zero, so that if we return an error, the pattern
1844     printer (for debugging) will think there's no pattern.  We reset it
1845     at the end.  */
1846  bufp->used = 0;
1847
1848  /* Always count groups, whether or not bufp->no_sub is set.  */
1849  bufp->re_nsub = 0;
1850
1851#if !defined emacs && !defined SYNTAX_TABLE
1852  /* Initialize the syntax table.  */
1853   init_syntax_once ();
1854#endif
1855
1856  if (bufp->allocated == 0)
1857    {
1858      if (bufp->buffer)
1859	{ /* If zero allocated, but buffer is non-null, try to realloc
1860             enough space.  This loses if buffer's address is bogus, but
1861             that is the user's responsibility.  */
1862          RETALLOC (bufp->buffer, INIT_BUF_SIZE, unsigned char);
1863        }
1864      else
1865        { /* Caller did not allocate a buffer.  Do it for them.  */
1866          bufp->buffer = TALLOC (INIT_BUF_SIZE, unsigned char);
1867        }
1868      if (!bufp->buffer) FREE_STACK_RETURN (REG_ESPACE);
1869
1870      bufp->allocated = INIT_BUF_SIZE;
1871    }
1872
1873  begalt = b = bufp->buffer;
1874
1875  /* Loop through the uncompiled pattern until we're at the end.  */
1876  while (p != pend)
1877    {
1878      PATFETCH (c);
1879
1880      switch (c)
1881        {
1882        case '^':
1883          {
1884            if (   /* If at start of pattern, it's an operator.  */
1885                   p == pattern + 1
1886                   /* If context independent, it's an operator.  */
1887                || syntax & RE_CONTEXT_INDEP_ANCHORS
1888                   /* Otherwise, depends on what's come before.  */
1889                || at_begline_loc_p (pattern, p, syntax))
1890              BUF_PUSH (begline);
1891            else
1892              goto normal_char;
1893          }
1894          break;
1895
1896
1897        case '$':
1898          {
1899            if (   /* If at end of pattern, it's an operator.  */
1900                   p == pend
1901                   /* If context independent, it's an operator.  */
1902                || syntax & RE_CONTEXT_INDEP_ANCHORS
1903                   /* Otherwise, depends on what's next.  */
1904                || at_endline_loc_p (p, pend, syntax))
1905               BUF_PUSH (endline);
1906             else
1907               goto normal_char;
1908           }
1909           break;
1910
1911
1912	case '+':
1913        case '?':
1914          if ((syntax & RE_BK_PLUS_QM)
1915              || (syntax & RE_LIMITED_OPS))
1916            goto normal_char;
1917        handle_plus:
1918        case '*':
1919          /* If there is no previous pattern... */
1920          if (!laststart)
1921            {
1922              if (syntax & RE_CONTEXT_INVALID_OPS)
1923                FREE_STACK_RETURN (REG_BADRPT);
1924              else if (!(syntax & RE_CONTEXT_INDEP_OPS))
1925                goto normal_char;
1926            }
1927
1928          {
1929            /* Are we optimizing this jump?  */
1930            boolean keep_string_p = false;
1931
1932            /* 1 means zero (many) matches is allowed.  */
1933            char zero_times_ok = 0, many_times_ok = 0;
1934
1935            /* If there is a sequence of repetition chars, collapse it
1936               down to just one (the right one).  We can't combine
1937               interval operators with these because of, e.g., `a{2}*',
1938               which should only match an even number of `a's.  */
1939
1940            for (;;)
1941              {
1942                zero_times_ok |= c != '+';
1943                many_times_ok |= c != '?';
1944
1945                if (p == pend)
1946                  break;
1947
1948                PATFETCH (c);
1949
1950                if (c == '*'
1951                    || (!(syntax & RE_BK_PLUS_QM) && (c == '+' || c == '?')))
1952                  ;
1953
1954                else if (syntax & RE_BK_PLUS_QM  &&  c == '\\')
1955                  {
1956                    if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
1957
1958                    PATFETCH (c1);
1959                    if (!(c1 == '+' || c1 == '?'))
1960                      {
1961                        PATUNFETCH;
1962                        PATUNFETCH;
1963                        break;
1964                      }
1965
1966                    c = c1;
1967                  }
1968                else
1969                  {
1970                    PATUNFETCH;
1971                    break;
1972                  }
1973
1974                /* If we get here, we found another repeat character.  */
1975               }
1976
1977            /* Star, etc. applied to an empty pattern is equivalent
1978               to an empty pattern.  */
1979            if (!laststart)
1980              break;
1981
1982            /* Now we know whether or not zero matches is allowed
1983               and also whether or not two or more matches is allowed.  */
1984            if (many_times_ok)
1985              { /* More than one repetition is allowed, so put in at the
1986                   end a backward relative jump from `b' to before the next
1987                   jump we're going to put in below (which jumps from
1988                   laststart to after this jump).
1989
1990                   But if we are at the `*' in the exact sequence `.*\n',
1991                   insert an unconditional jump backwards to the .,
1992                   instead of the beginning of the loop.  This way we only
1993                   push a failure point once, instead of every time
1994                   through the loop.  */
1995                assert (p - 1 > pattern);
1996
1997                /* Allocate the space for the jump.  */
1998                GET_BUFFER_SPACE (3);
1999
2000                /* We know we are not at the first character of the pattern,
2001                   because laststart was nonzero.  And we've already
2002                   incremented `p', by the way, to be the character after
2003                   the `*'.  Do we have to do something analogous here
2004                   for null bytes, because of RE_DOT_NOT_NULL?  */
2005                if (TRANSLATE (*(p - 2)) == TRANSLATE ('.')
2006		    && zero_times_ok
2007                    && p < pend && TRANSLATE (*p) == TRANSLATE ('\n')
2008                    && !(syntax & RE_DOT_NEWLINE))
2009                  { /* We have .*\n.  */
2010                    STORE_JUMP (jump, b, laststart);
2011                    keep_string_p = true;
2012                  }
2013                else
2014                  /* Anything else.  */
2015                  STORE_JUMP (maybe_pop_jump, b, laststart - 3);
2016
2017                /* We've added more stuff to the buffer.  */
2018                b += 3;
2019              }
2020
2021            /* On failure, jump from laststart to b + 3, which will be the
2022               end of the buffer after this jump is inserted.  */
2023            GET_BUFFER_SPACE (3);
2024            INSERT_JUMP (keep_string_p ? on_failure_keep_string_jump
2025                                       : on_failure_jump,
2026                         laststart, b + 3);
2027            pending_exact = 0;
2028            b += 3;
2029
2030            if (!zero_times_ok)
2031              {
2032                /* At least one repetition is required, so insert a
2033                   `dummy_failure_jump' before the initial
2034                   `on_failure_jump' instruction of the loop. This
2035                   effects a skip over that instruction the first time
2036                   we hit that loop.  */
2037                GET_BUFFER_SPACE (3);
2038                INSERT_JUMP (dummy_failure_jump, laststart, laststart + 6);
2039                b += 3;
2040              }
2041            }
2042	  break;
2043
2044
2045	case '.':
2046          laststart = b;
2047          BUF_PUSH (anychar);
2048          break;
2049
2050
2051        case '[':
2052          {
2053            boolean had_char_class = false;
2054
2055            if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
2056
2057            /* Ensure that we have enough space to push a charset: the
2058               opcode, the length count, and the bitset; 34 bytes in all.  */
2059	    GET_BUFFER_SPACE (34);
2060
2061            laststart = b;
2062
2063            /* We test `*p == '^' twice, instead of using an if
2064               statement, so we only need one BUF_PUSH.  */
2065            BUF_PUSH (*p == '^' ? charset_not : charset);
2066            if (*p == '^')
2067              p++;
2068
2069            /* Remember the first position in the bracket expression.  */
2070            p1 = p;
2071
2072            /* Push the number of bytes in the bitmap.  */
2073            BUF_PUSH ((1 << BYTEWIDTH) / BYTEWIDTH);
2074
2075            /* Clear the whole map.  */
2076            bzero (b, (1 << BYTEWIDTH) / BYTEWIDTH);
2077
2078            /* charset_not matches newline according to a syntax bit.  */
2079            if ((re_opcode_t) b[-2] == charset_not
2080                && (syntax & RE_HAT_LISTS_NOT_NEWLINE))
2081              SET_LIST_BIT ('\n');
2082
2083            /* Read in characters and ranges, setting map bits.  */
2084            for (;;)
2085              {
2086                if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
2087
2088                PATFETCH (c);
2089
2090                /* \ might escape characters inside [...] and [^...].  */
2091                if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\')
2092                  {
2093                    if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
2094
2095                    PATFETCH (c1);
2096                    SET_LIST_BIT (c1);
2097                    continue;
2098                  }
2099
2100                /* Could be the end of the bracket expression.  If it's
2101                   not (i.e., when the bracket expression is `[]' so
2102                   far), the ']' character bit gets set way below.  */
2103                if (c == ']' && p != p1 + 1)
2104                  break;
2105
2106                /* Look ahead to see if it's a range when the last thing
2107                   was a character class.  */
2108                if (had_char_class && c == '-' && *p != ']')
2109                  FREE_STACK_RETURN (REG_ERANGE);
2110
2111                /* Look ahead to see if it's a range when the last thing
2112                   was a character: if this is a hyphen not at the
2113                   beginning or the end of a list, then it's the range
2114                   operator.  */
2115                if (c == '-'
2116                    && !(p - 2 >= pattern && p[-2] == '[')
2117                    && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^')
2118                    && *p != ']')
2119                  {
2120                    reg_errcode_t ret
2121                      = compile_range (&p, pend, translate, syntax, b);
2122                    if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
2123                  }
2124
2125                else if (p[0] == '-' && p[1] != ']')
2126                  { /* This handles ranges made up of characters only.  */
2127                    reg_errcode_t ret;
2128
2129		    /* Move past the `-'.  */
2130                    PATFETCH (c1);
2131
2132                    ret = compile_range (&p, pend, translate, syntax, b);
2133                    if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
2134                  }
2135
2136                /* See if we're at the beginning of a possible character
2137                   class.  */
2138
2139                else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':')
2140                  { /* Leave room for the null.  */
2141                    char str[CHAR_CLASS_MAX_LENGTH + 1];
2142
2143                    PATFETCH (c);
2144                    c1 = 0;
2145
2146                    /* If pattern is `[[:'.  */
2147                    if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
2148
2149                    for (;;)
2150                      {
2151                        PATFETCH (c);
2152                        if ((c == ':' && *p == ']') || p == pend
2153                            || c1 == CHAR_CLASS_MAX_LENGTH)
2154                          break;
2155                        str[c1++] = c;
2156                      }
2157                    str[c1] = '\0';
2158
2159                    /* If isn't a word bracketed by `[:' and `:]':
2160                       undo the ending character, the letters, and leave
2161                       the leading `:' and `[' (but set bits for them).  */
2162                    if (c == ':' && *p == ']')
2163                      {
2164/* GCC LOCAL: Skip this code if we don't have btowc().  btowc() is */
2165/* defined in the 1994 Amendment 1 to ISO C and may not be present on */
2166/* systems where we have wchar.h and wctype.h.   */
2167#if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H && defined HAVE_BTOWC)
2168                        boolean is_lower = STREQ (str, "lower");
2169                        boolean is_upper = STREQ (str, "upper");
2170			wctype_t wt;
2171                        int ch;
2172
2173			wt = IS_CHAR_CLASS (str);
2174			if (wt == 0)
2175			  FREE_STACK_RETURN (REG_ECTYPE);
2176
2177                        /* Throw away the ] at the end of the character
2178                           class.  */
2179                        PATFETCH (c);
2180
2181                        if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
2182
2183                        for (ch = 0; ch < 1 << BYTEWIDTH; ++ch)
2184			  {
2185# ifdef _LIBC
2186			    if (__iswctype (__btowc (ch), wt))
2187			      SET_LIST_BIT (ch);
2188#else
2189			    if (iswctype (btowc (ch), wt))
2190			      SET_LIST_BIT (ch);
2191#endif
2192
2193			    if (translate && (is_upper || is_lower)
2194				&& (ISUPPER (ch) || ISLOWER (ch)))
2195			      SET_LIST_BIT (ch);
2196			  }
2197
2198                        had_char_class = true;
2199#else
2200                        int ch;
2201                        boolean is_alnum = STREQ (str, "alnum");
2202                        boolean is_alpha = STREQ (str, "alpha");
2203                        boolean is_blank = STREQ (str, "blank");
2204                        boolean is_cntrl = STREQ (str, "cntrl");
2205                        boolean is_digit = STREQ (str, "digit");
2206                        boolean is_graph = STREQ (str, "graph");
2207                        boolean is_lower = STREQ (str, "lower");
2208                        boolean is_print = STREQ (str, "print");
2209                        boolean is_punct = STREQ (str, "punct");
2210                        boolean is_space = STREQ (str, "space");
2211                        boolean is_upper = STREQ (str, "upper");
2212                        boolean is_xdigit = STREQ (str, "xdigit");
2213
2214                        if (!IS_CHAR_CLASS (str))
2215			  FREE_STACK_RETURN (REG_ECTYPE);
2216
2217                        /* Throw away the ] at the end of the character
2218                           class.  */
2219                        PATFETCH (c);
2220
2221                        if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
2222
2223                        for (ch = 0; ch < 1 << BYTEWIDTH; ch++)
2224                          {
2225			    /* This was split into 3 if's to
2226			       avoid an arbitrary limit in some compiler.  */
2227                            if (   (is_alnum  && ISALNUM (ch))
2228                                || (is_alpha  && ISALPHA (ch))
2229                                || (is_blank  && ISBLANK (ch))
2230                                || (is_cntrl  && ISCNTRL (ch)))
2231			      SET_LIST_BIT (ch);
2232			    if (   (is_digit  && ISDIGIT (ch))
2233                                || (is_graph  && ISGRAPH (ch))
2234                                || (is_lower  && ISLOWER (ch))
2235                                || (is_print  && ISPRINT (ch)))
2236			      SET_LIST_BIT (ch);
2237			    if (   (is_punct  && ISPUNCT (ch))
2238                                || (is_space  && ISSPACE (ch))
2239                                || (is_upper  && ISUPPER (ch))
2240                                || (is_xdigit && ISXDIGIT (ch)))
2241			      SET_LIST_BIT (ch);
2242			    if (   translate && (is_upper || is_lower)
2243				&& (ISUPPER (ch) || ISLOWER (ch)))
2244			      SET_LIST_BIT (ch);
2245                          }
2246                        had_char_class = true;
2247#endif	/* libc || wctype.h */
2248                      }
2249                    else
2250                      {
2251                        c1++;
2252                        while (c1--)
2253                          PATUNFETCH;
2254                        SET_LIST_BIT ('[');
2255                        SET_LIST_BIT (':');
2256                        had_char_class = false;
2257                      }
2258                  }
2259                else
2260                  {
2261                    had_char_class = false;
2262                    SET_LIST_BIT (c);
2263                  }
2264              }
2265
2266            /* Discard any (non)matching list bytes that are all 0 at the
2267               end of the map.  Decrease the map-length byte too.  */
2268            while ((int) b[-1] > 0 && b[b[-1] - 1] == 0)
2269              b[-1]--;
2270            b += b[-1];
2271          }
2272          break;
2273
2274
2275	case '(':
2276          if (syntax & RE_NO_BK_PARENS)
2277            goto handle_open;
2278          else
2279            goto normal_char;
2280
2281
2282        case ')':
2283          if (syntax & RE_NO_BK_PARENS)
2284            goto handle_close;
2285          else
2286            goto normal_char;
2287
2288
2289        case '\n':
2290          if (syntax & RE_NEWLINE_ALT)
2291            goto handle_alt;
2292          else
2293            goto normal_char;
2294
2295
2296	case '|':
2297          if (syntax & RE_NO_BK_VBAR)
2298            goto handle_alt;
2299          else
2300            goto normal_char;
2301
2302
2303        case '{':
2304           if (syntax & RE_INTERVALS && syntax & RE_NO_BK_BRACES)
2305             goto handle_interval;
2306           else
2307             goto normal_char;
2308
2309
2310        case '\\':
2311          if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
2312
2313          /* Do not translate the character after the \, so that we can
2314             distinguish, e.g., \B from \b, even if we normally would
2315             translate, e.g., B to b.  */
2316          PATFETCH_RAW (c);
2317
2318          switch (c)
2319            {
2320            case '(':
2321              if (syntax & RE_NO_BK_PARENS)
2322                goto normal_backslash;
2323
2324            handle_open:
2325              bufp->re_nsub++;
2326              regnum++;
2327
2328              if (COMPILE_STACK_FULL)
2329                {
2330                  RETALLOC (compile_stack.stack, compile_stack.size << 1,
2331                            compile_stack_elt_t);
2332                  if (compile_stack.stack == NULL) return REG_ESPACE;
2333
2334                  compile_stack.size <<= 1;
2335                }
2336
2337              /* These are the values to restore when we hit end of this
2338                 group.  They are all relative offsets, so that if the
2339                 whole pattern moves because of realloc, they will still
2340                 be valid.  */
2341              COMPILE_STACK_TOP.begalt_offset = begalt - bufp->buffer;
2342              COMPILE_STACK_TOP.fixup_alt_jump
2343                = fixup_alt_jump ? fixup_alt_jump - bufp->buffer + 1 : 0;
2344              COMPILE_STACK_TOP.laststart_offset = b - bufp->buffer;
2345              COMPILE_STACK_TOP.regnum = regnum;
2346
2347              /* We will eventually replace the 0 with the number of
2348                 groups inner to this one.  But do not push a
2349                 start_memory for groups beyond the last one we can
2350                 represent in the compiled pattern.  */
2351              if (regnum <= MAX_REGNUM)
2352                {
2353                  COMPILE_STACK_TOP.inner_group_offset = b - bufp->buffer + 2;
2354                  BUF_PUSH_3 (start_memory, regnum, 0);
2355                }
2356
2357              compile_stack.avail++;
2358
2359              fixup_alt_jump = 0;
2360              laststart = 0;
2361              begalt = b;
2362	      /* If we've reached MAX_REGNUM groups, then this open
2363		 won't actually generate any code, so we'll have to
2364		 clear pending_exact explicitly.  */
2365	      pending_exact = 0;
2366              break;
2367
2368
2369            case ')':
2370              if (syntax & RE_NO_BK_PARENS) goto normal_backslash;
2371
2372              if (COMPILE_STACK_EMPTY)
2373		{
2374		  if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
2375		    goto normal_backslash;
2376		  else
2377		    FREE_STACK_RETURN (REG_ERPAREN);
2378		}
2379
2380            handle_close:
2381              if (fixup_alt_jump)
2382                { /* Push a dummy failure point at the end of the
2383                     alternative for a possible future
2384                     `pop_failure_jump' to pop.  See comments at
2385                     `push_dummy_failure' in `re_match_2'.  */
2386                  BUF_PUSH (push_dummy_failure);
2387
2388                  /* We allocated space for this jump when we assigned
2389                     to `fixup_alt_jump', in the `handle_alt' case below.  */
2390                  STORE_JUMP (jump_past_alt, fixup_alt_jump, b - 1);
2391                }
2392
2393              /* See similar code for backslashed left paren above.  */
2394              if (COMPILE_STACK_EMPTY)
2395		{
2396		  if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
2397		    goto normal_char;
2398		  else
2399		    FREE_STACK_RETURN (REG_ERPAREN);
2400		}
2401
2402              /* Since we just checked for an empty stack above, this
2403                 ``can't happen''.  */
2404              assert (compile_stack.avail != 0);
2405              {
2406                /* We don't just want to restore into `regnum', because
2407                   later groups should continue to be numbered higher,
2408                   as in `(ab)c(de)' -- the second group is #2.  */
2409                regnum_t this_group_regnum;
2410
2411                compile_stack.avail--;
2412                begalt = bufp->buffer + COMPILE_STACK_TOP.begalt_offset;
2413                fixup_alt_jump
2414                  = COMPILE_STACK_TOP.fixup_alt_jump
2415                    ? bufp->buffer + COMPILE_STACK_TOP.fixup_alt_jump - 1
2416                    : 0;
2417                laststart = bufp->buffer + COMPILE_STACK_TOP.laststart_offset;
2418                this_group_regnum = COMPILE_STACK_TOP.regnum;
2419		/* If we've reached MAX_REGNUM groups, then this open
2420		   won't actually generate any code, so we'll have to
2421		   clear pending_exact explicitly.  */
2422		pending_exact = 0;
2423
2424                /* We're at the end of the group, so now we know how many
2425                   groups were inside this one.  */
2426                if (this_group_regnum <= MAX_REGNUM)
2427                  {
2428                    unsigned char *inner_group_loc
2429                      = bufp->buffer + COMPILE_STACK_TOP.inner_group_offset;
2430
2431                    *inner_group_loc = regnum - this_group_regnum;
2432                    BUF_PUSH_3 (stop_memory, this_group_regnum,
2433                                regnum - this_group_regnum);
2434                  }
2435              }
2436              break;
2437
2438
2439            case '|':					/* `\|'.  */
2440              if (syntax & RE_LIMITED_OPS || syntax & RE_NO_BK_VBAR)
2441                goto normal_backslash;
2442            handle_alt:
2443              if (syntax & RE_LIMITED_OPS)
2444                goto normal_char;
2445
2446              /* Insert before the previous alternative a jump which
2447                 jumps to this alternative if the former fails.  */
2448              GET_BUFFER_SPACE (3);
2449              INSERT_JUMP (on_failure_jump, begalt, b + 6);
2450              pending_exact = 0;
2451              b += 3;
2452
2453              /* The alternative before this one has a jump after it
2454                 which gets executed if it gets matched.  Adjust that
2455                 jump so it will jump to this alternative's analogous
2456                 jump (put in below, which in turn will jump to the next
2457                 (if any) alternative's such jump, etc.).  The last such
2458                 jump jumps to the correct final destination.  A picture:
2459                          _____ _____
2460                          |   | |   |
2461                          |   v |   v
2462                         a | b   | c
2463
2464                 If we are at `b', then fixup_alt_jump right now points to a
2465                 three-byte space after `a'.  We'll put in the jump, set
2466                 fixup_alt_jump to right after `b', and leave behind three
2467                 bytes which we'll fill in when we get to after `c'.  */
2468
2469              if (fixup_alt_jump)
2470                STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
2471
2472              /* Mark and leave space for a jump after this alternative,
2473                 to be filled in later either by next alternative or
2474                 when know we're at the end of a series of alternatives.  */
2475              fixup_alt_jump = b;
2476              GET_BUFFER_SPACE (3);
2477              b += 3;
2478
2479              laststart = 0;
2480              begalt = b;
2481              break;
2482
2483
2484            case '{':
2485              /* If \{ is a literal.  */
2486              if (!(syntax & RE_INTERVALS)
2487                     /* If we're at `\{' and it's not the open-interval
2488                        operator.  */
2489                  || ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES))
2490                  || (p - 2 == pattern  &&  p == pend))
2491                goto normal_backslash;
2492
2493            handle_interval:
2494              {
2495                /* If got here, then the syntax allows intervals.  */
2496
2497                /* At least (most) this many matches must be made.  */
2498                int lower_bound = -1, upper_bound = -1;
2499
2500                beg_interval = p - 1;
2501
2502                if (p == pend)
2503                  {
2504                    if (syntax & RE_NO_BK_BRACES)
2505                      goto unfetch_interval;
2506                    else
2507                      FREE_STACK_RETURN (REG_EBRACE);
2508                  }
2509
2510                GET_UNSIGNED_NUMBER (lower_bound);
2511
2512                if (c == ',')
2513                  {
2514                    GET_UNSIGNED_NUMBER (upper_bound);
2515                    if (upper_bound < 0) upper_bound = RE_DUP_MAX;
2516                  }
2517                else
2518                  /* Interval such as `{1}' => match exactly once. */
2519                  upper_bound = lower_bound;
2520
2521                if (lower_bound < 0 || upper_bound > RE_DUP_MAX
2522                    || lower_bound > upper_bound)
2523                  {
2524                    if (syntax & RE_NO_BK_BRACES)
2525                      goto unfetch_interval;
2526                    else
2527                      FREE_STACK_RETURN (REG_BADBR);
2528                  }
2529
2530                if (!(syntax & RE_NO_BK_BRACES))
2531                  {
2532                    if (c != '\\') FREE_STACK_RETURN (REG_EBRACE);
2533
2534                    PATFETCH (c);
2535                  }
2536
2537                if (c != '}')
2538                  {
2539                    if (syntax & RE_NO_BK_BRACES)
2540                      goto unfetch_interval;
2541                    else
2542                      FREE_STACK_RETURN (REG_BADBR);
2543                  }
2544
2545                /* We just parsed a valid interval.  */
2546
2547                /* If it's invalid to have no preceding re.  */
2548                if (!laststart)
2549                  {
2550                    if (syntax & RE_CONTEXT_INVALID_OPS)
2551                      FREE_STACK_RETURN (REG_BADRPT);
2552                    else if (syntax & RE_CONTEXT_INDEP_OPS)
2553                      laststart = b;
2554                    else
2555                      goto unfetch_interval;
2556                  }
2557
2558                /* If the upper bound is zero, don't want to succeed at
2559                   all; jump from `laststart' to `b + 3', which will be
2560                   the end of the buffer after we insert the jump.  */
2561                 if (upper_bound == 0)
2562                   {
2563                     GET_BUFFER_SPACE (3);
2564                     INSERT_JUMP (jump, laststart, b + 3);
2565                     b += 3;
2566                   }
2567
2568                 /* Otherwise, we have a nontrivial interval.  When
2569                    we're all done, the pattern will look like:
2570                      set_number_at <jump count> <upper bound>
2571                      set_number_at <succeed_n count> <lower bound>
2572                      succeed_n <after jump addr> <succeed_n count>
2573                      <body of loop>
2574                      jump_n <succeed_n addr> <jump count>
2575                    (The upper bound and `jump_n' are omitted if
2576                    `upper_bound' is 1, though.)  */
2577                 else
2578                   { /* If the upper bound is > 1, we need to insert
2579                        more at the end of the loop.  */
2580                     unsigned nbytes = 10 + (upper_bound > 1) * 10;
2581
2582                     GET_BUFFER_SPACE (nbytes);
2583
2584                     /* Initialize lower bound of the `succeed_n', even
2585                        though it will be set during matching by its
2586                        attendant `set_number_at' (inserted next),
2587                        because `re_compile_fastmap' needs to know.
2588                        Jump to the `jump_n' we might insert below.  */
2589                     INSERT_JUMP2 (succeed_n, laststart,
2590                                   b + 5 + (upper_bound > 1) * 5,
2591                                   lower_bound);
2592                     b += 5;
2593
2594                     /* Code to initialize the lower bound.  Insert
2595                        before the `succeed_n'.  The `5' is the last two
2596                        bytes of this `set_number_at', plus 3 bytes of
2597                        the following `succeed_n'.  */
2598                     insert_op2 (set_number_at, laststart, 5, lower_bound, b);
2599                     b += 5;
2600
2601                     if (upper_bound > 1)
2602                       { /* More than one repetition is allowed, so
2603                            append a backward jump to the `succeed_n'
2604                            that starts this interval.
2605
2606                            When we've reached this during matching,
2607                            we'll have matched the interval once, so
2608                            jump back only `upper_bound - 1' times.  */
2609                         STORE_JUMP2 (jump_n, b, laststart + 5,
2610                                      upper_bound - 1);
2611                         b += 5;
2612
2613                         /* The location we want to set is the second
2614                            parameter of the `jump_n'; that is `b-2' as
2615                            an absolute address.  `laststart' will be
2616                            the `set_number_at' we're about to insert;
2617                            `laststart+3' the number to set, the source
2618                            for the relative address.  But we are
2619                            inserting into the middle of the pattern --
2620                            so everything is getting moved up by 5.
2621                            Conclusion: (b - 2) - (laststart + 3) + 5,
2622                            i.e., b - laststart.
2623
2624                            We insert this at the beginning of the loop
2625                            so that if we fail during matching, we'll
2626                            reinitialize the bounds.  */
2627                         insert_op2 (set_number_at, laststart, b - laststart,
2628                                     upper_bound - 1, b);
2629                         b += 5;
2630                       }
2631                   }
2632                pending_exact = 0;
2633                beg_interval = NULL;
2634              }
2635              break;
2636
2637            unfetch_interval:
2638              /* If an invalid interval, match the characters as literals.  */
2639               assert (beg_interval);
2640               p = beg_interval;
2641               beg_interval = NULL;
2642
2643               /* normal_char and normal_backslash need `c'.  */
2644               PATFETCH (c);
2645
2646               if (!(syntax & RE_NO_BK_BRACES))
2647                 {
2648                   if (p > pattern  &&  p[-1] == '\\')
2649                     goto normal_backslash;
2650                 }
2651               goto normal_char;
2652
2653#ifdef emacs
2654            /* There is no way to specify the before_dot and after_dot
2655               operators.  rms says this is ok.  --karl  */
2656            case '=':
2657              BUF_PUSH (at_dot);
2658              break;
2659
2660            case 's':
2661              laststart = b;
2662              PATFETCH (c);
2663              BUF_PUSH_2 (syntaxspec, syntax_spec_code[c]);
2664              break;
2665
2666            case 'S':
2667              laststart = b;
2668              PATFETCH (c);
2669              BUF_PUSH_2 (notsyntaxspec, syntax_spec_code[c]);
2670              break;
2671#endif /* emacs */
2672
2673
2674            case 'w':
2675	      if (syntax & RE_NO_GNU_OPS)
2676		goto normal_char;
2677              laststart = b;
2678              BUF_PUSH (wordchar);
2679              break;
2680
2681
2682            case 'W':
2683	      if (syntax & RE_NO_GNU_OPS)
2684		goto normal_char;
2685              laststart = b;
2686              BUF_PUSH (notwordchar);
2687              break;
2688
2689
2690            case '<':
2691	      if (syntax & RE_NO_GNU_OPS)
2692		goto normal_char;
2693              BUF_PUSH (wordbeg);
2694              break;
2695
2696            case '>':
2697	      if (syntax & RE_NO_GNU_OPS)
2698		goto normal_char;
2699              BUF_PUSH (wordend);
2700              break;
2701
2702            case 'b':
2703	      if (syntax & RE_NO_GNU_OPS)
2704		goto normal_char;
2705              BUF_PUSH (wordbound);
2706              break;
2707
2708            case 'B':
2709	      if (syntax & RE_NO_GNU_OPS)
2710		goto normal_char;
2711              BUF_PUSH (notwordbound);
2712              break;
2713
2714            case '`':
2715	      if (syntax & RE_NO_GNU_OPS)
2716		goto normal_char;
2717              BUF_PUSH (begbuf);
2718              break;
2719
2720            case '\'':
2721	      if (syntax & RE_NO_GNU_OPS)
2722		goto normal_char;
2723              BUF_PUSH (endbuf);
2724              break;
2725
2726            case '1': case '2': case '3': case '4': case '5':
2727            case '6': case '7': case '8': case '9':
2728              if (syntax & RE_NO_BK_REFS)
2729                goto normal_char;
2730
2731              c1 = c - '0';
2732
2733              if (c1 > regnum)
2734                FREE_STACK_RETURN (REG_ESUBREG);
2735
2736              /* Can't back reference to a subexpression if inside of it.  */
2737              if (group_in_compile_stack (compile_stack, (regnum_t) c1))
2738                goto normal_char;
2739
2740              laststart = b;
2741              BUF_PUSH_2 (duplicate, c1);
2742              break;
2743
2744
2745            case '+':
2746            case '?':
2747              if (syntax & RE_BK_PLUS_QM)
2748                goto handle_plus;
2749              else
2750                goto normal_backslash;
2751
2752            default:
2753            normal_backslash:
2754              /* You might think it would be useful for \ to mean
2755                 not to translate; but if we don't translate it
2756                 it will never match anything.  */
2757              c = TRANSLATE (c);
2758              goto normal_char;
2759            }
2760          break;
2761
2762
2763	default:
2764        /* Expects the character in `c'.  */
2765	normal_char:
2766	      /* If no exactn currently being built.  */
2767          if (!pending_exact
2768
2769              /* If last exactn not at current position.  */
2770              || pending_exact + *pending_exact + 1 != b
2771
2772              /* We have only one byte following the exactn for the count.  */
2773	      || *pending_exact == (1 << BYTEWIDTH) - 1
2774
2775              /* If followed by a repetition operator.  */
2776              || *p == '*' || *p == '^'
2777	      || ((syntax & RE_BK_PLUS_QM)
2778		  ? *p == '\\' && (p[1] == '+' || p[1] == '?')
2779		  : (*p == '+' || *p == '?'))
2780	      || ((syntax & RE_INTERVALS)
2781                  && ((syntax & RE_NO_BK_BRACES)
2782		      ? *p == '{'
2783                      : (p[0] == '\\' && p[1] == '{'))))
2784	    {
2785	      /* Start building a new exactn.  */
2786
2787              laststart = b;
2788
2789	      BUF_PUSH_2 (exactn, 0);
2790	      pending_exact = b - 1;
2791            }
2792
2793	  BUF_PUSH (c);
2794          (*pending_exact)++;
2795	  break;
2796        } /* switch (c) */
2797    } /* while p != pend */
2798
2799
2800  /* Through the pattern now.  */
2801
2802  if (fixup_alt_jump)
2803    STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
2804
2805  if (!COMPILE_STACK_EMPTY)
2806    FREE_STACK_RETURN (REG_EPAREN);
2807
2808  /* If we don't want backtracking, force success
2809     the first time we reach the end of the compiled pattern.  */
2810  if (syntax & RE_NO_POSIX_BACKTRACKING)
2811    BUF_PUSH (succeed);
2812
2813  free (compile_stack.stack);
2814
2815  /* We have succeeded; set the length of the buffer.  */
2816  bufp->used = b - bufp->buffer;
2817
2818#ifdef DEBUG
2819  if (debug)
2820    {
2821      DEBUG_PRINT1 ("\nCompiled pattern: \n");
2822      print_compiled_pattern (bufp);
2823    }
2824#endif /* DEBUG */
2825
2826#ifndef MATCH_MAY_ALLOCATE
2827  /* Initialize the failure stack to the largest possible stack.  This
2828     isn't necessary unless we're trying to avoid calling alloca in
2829     the search and match routines.  */
2830  {
2831    int num_regs = bufp->re_nsub + 1;
2832
2833    /* Since DOUBLE_FAIL_STACK refuses to double only if the current size
2834       is strictly greater than re_max_failures, the largest possible stack
2835       is 2 * re_max_failures failure points.  */
2836    if (fail_stack.size < (2 * re_max_failures * MAX_FAILURE_ITEMS))
2837      {
2838	fail_stack.size = (2 * re_max_failures * MAX_FAILURE_ITEMS);
2839
2840# ifdef emacs
2841	if (! fail_stack.stack)
2842	  fail_stack.stack
2843	    = (fail_stack_elt_t *) xmalloc (fail_stack.size
2844					    * sizeof (fail_stack_elt_t));
2845	else
2846	  fail_stack.stack
2847	    = (fail_stack_elt_t *) xrealloc (fail_stack.stack,
2848					     (fail_stack.size
2849					      * sizeof (fail_stack_elt_t)));
2850# else /* not emacs */
2851	if (! fail_stack.stack)
2852	  fail_stack.stack
2853	    = (fail_stack_elt_t *) malloc (fail_stack.size
2854					   * sizeof (fail_stack_elt_t));
2855	else
2856	  fail_stack.stack
2857	    = (fail_stack_elt_t *) realloc (fail_stack.stack,
2858					    (fail_stack.size
2859					     * sizeof (fail_stack_elt_t)));
2860# endif /* not emacs */
2861      }
2862
2863    regex_grow_registers (num_regs);
2864  }
2865#endif /* not MATCH_MAY_ALLOCATE */
2866
2867  return REG_NOERROR;
2868} /* regex_compile */
2869
2870/* Subroutines for `regex_compile'.  */
2871
2872/* Store OP at LOC followed by two-byte integer parameter ARG.  */
2873
2874static void
2875store_op1 (op, loc, arg)
2876    re_opcode_t op;
2877    unsigned char *loc;
2878    int arg;
2879{
2880  *loc = (unsigned char) op;
2881  STORE_NUMBER (loc + 1, arg);
2882}
2883
2884
2885/* Like `store_op1', but for two two-byte parameters ARG1 and ARG2.  */
2886
2887static void
2888store_op2 (op, loc, arg1, arg2)
2889    re_opcode_t op;
2890    unsigned char *loc;
2891    int arg1, arg2;
2892{
2893  *loc = (unsigned char) op;
2894  STORE_NUMBER (loc + 1, arg1);
2895  STORE_NUMBER (loc + 3, arg2);
2896}
2897
2898
2899/* Copy the bytes from LOC to END to open up three bytes of space at LOC
2900   for OP followed by two-byte integer parameter ARG.  */
2901
2902static void
2903insert_op1 (op, loc, arg, end)
2904    re_opcode_t op;
2905    unsigned char *loc;
2906    int arg;
2907    unsigned char *end;
2908{
2909  register unsigned char *pfrom = end;
2910  register unsigned char *pto = end + 3;
2911
2912  while (pfrom != loc)
2913    *--pto = *--pfrom;
2914
2915  store_op1 (op, loc, arg);
2916}
2917
2918
2919/* Like `insert_op1', but for two two-byte parameters ARG1 and ARG2.  */
2920
2921static void
2922insert_op2 (op, loc, arg1, arg2, end)
2923    re_opcode_t op;
2924    unsigned char *loc;
2925    int arg1, arg2;
2926    unsigned char *end;
2927{
2928  register unsigned char *pfrom = end;
2929  register unsigned char *pto = end + 5;
2930
2931  while (pfrom != loc)
2932    *--pto = *--pfrom;
2933
2934  store_op2 (op, loc, arg1, arg2);
2935}
2936
2937
2938/* P points to just after a ^ in PATTERN.  Return true if that ^ comes
2939   after an alternative or a begin-subexpression.  We assume there is at
2940   least one character before the ^.  */
2941
2942static boolean
2943at_begline_loc_p (pattern, p, syntax)
2944    const char *pattern, *p;
2945    reg_syntax_t syntax;
2946{
2947  const char *prev = p - 2;
2948  boolean prev_prev_backslash = prev > pattern && prev[-1] == '\\';
2949
2950  return
2951       /* After a subexpression?  */
2952       (*prev == '(' && (syntax & RE_NO_BK_PARENS || prev_prev_backslash))
2953       /* After an alternative?  */
2954    || (*prev == '|' && (syntax & RE_NO_BK_VBAR || prev_prev_backslash));
2955}
2956
2957
2958/* The dual of at_begline_loc_p.  This one is for $.  We assume there is
2959   at least one character after the $, i.e., `P < PEND'.  */
2960
2961static boolean
2962at_endline_loc_p (p, pend, syntax)
2963    const char *p, *pend;
2964    reg_syntax_t syntax;
2965{
2966  const char *next = p;
2967  boolean next_backslash = *next == '\\';
2968  const char *next_next = p + 1 < pend ? p + 1 : 0;
2969
2970  return
2971       /* Before a subexpression?  */
2972       (syntax & RE_NO_BK_PARENS ? *next == ')'
2973        : next_backslash && next_next && *next_next == ')')
2974       /* Before an alternative?  */
2975    || (syntax & RE_NO_BK_VBAR ? *next == '|'
2976        : next_backslash && next_next && *next_next == '|');
2977}
2978
2979
2980/* Returns true if REGNUM is in one of COMPILE_STACK's elements and
2981   false if it's not.  */
2982
2983static boolean
2984group_in_compile_stack (compile_stack, regnum)
2985    compile_stack_type compile_stack;
2986    regnum_t regnum;
2987{
2988  int this_element;
2989
2990  for (this_element = compile_stack.avail - 1;
2991       this_element >= 0;
2992       this_element--)
2993    if (compile_stack.stack[this_element].regnum == regnum)
2994      return true;
2995
2996  return false;
2997}
2998
2999
3000/* Read the ending character of a range (in a bracket expression) from the
3001   uncompiled pattern *P_PTR (which ends at PEND).  We assume the
3002   starting character is in `P[-2]'.  (`P[-1]' is the character `-'.)
3003   Then we set the translation of all bits between the starting and
3004   ending characters (inclusive) in the compiled pattern B.
3005
3006   Return an error code.
3007
3008   We use these short variable names so we can use the same macros as
3009   `regex_compile' itself.  */
3010
3011static reg_errcode_t
3012compile_range (p_ptr, pend, translate, syntax, b)
3013    const char **p_ptr, *pend;
3014    RE_TRANSLATE_TYPE translate;
3015    reg_syntax_t syntax;
3016    unsigned char *b;
3017{
3018  unsigned this_char;
3019
3020  const char *p = *p_ptr;
3021  unsigned int range_start, range_end;
3022
3023  if (p == pend)
3024    return REG_ERANGE;
3025
3026  /* Even though the pattern is a signed `char *', we need to fetch
3027     with unsigned char *'s; if the high bit of the pattern character
3028     is set, the range endpoints will be negative if we fetch using a
3029     signed char *.
3030
3031     We also want to fetch the endpoints without translating them; the
3032     appropriate translation is done in the bit-setting loop below.  */
3033  /* The SVR4 compiler on the 3B2 had trouble with unsigned const char *.  */
3034  range_start = ((const unsigned char *) p)[-2];
3035  range_end   = ((const unsigned char *) p)[0];
3036
3037  /* Have to increment the pointer into the pattern string, so the
3038     caller isn't still at the ending character.  */
3039  (*p_ptr)++;
3040
3041  /* If the start is after the end, the range is empty.  */
3042  if (range_start > range_end)
3043    return syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR;
3044
3045  /* Here we see why `this_char' has to be larger than an `unsigned
3046     char' -- the range is inclusive, so if `range_end' == 0xff
3047     (assuming 8-bit characters), we would otherwise go into an infinite
3048     loop, since all characters <= 0xff.  */
3049  for (this_char = range_start; this_char <= range_end; this_char++)
3050    {
3051      SET_LIST_BIT (TRANSLATE (this_char));
3052    }
3053
3054  return REG_NOERROR;
3055}
3056
3057/* re_compile_fastmap computes a ``fastmap'' for the compiled pattern in
3058   BUFP.  A fastmap records which of the (1 << BYTEWIDTH) possible
3059   characters can start a string that matches the pattern.  This fastmap
3060   is used by re_search to skip quickly over impossible starting points.
3061
3062   The caller must supply the address of a (1 << BYTEWIDTH)-byte data
3063   area as BUFP->fastmap.
3064
3065   We set the `fastmap', `fastmap_accurate', and `can_be_null' fields in
3066   the pattern buffer.
3067
3068   Returns 0 if we succeed, -2 if an internal error.   */
3069
3070int
3071re_compile_fastmap (bufp)
3072     struct re_pattern_buffer *bufp;
3073{
3074  int j, k;
3075#ifdef MATCH_MAY_ALLOCATE
3076  fail_stack_type fail_stack;
3077#endif
3078#ifndef REGEX_MALLOC
3079  char *destination;
3080#endif
3081
3082  register char *fastmap = bufp->fastmap;
3083  unsigned char *pattern = bufp->buffer;
3084  unsigned char *p = pattern;
3085  register unsigned char *pend = pattern + bufp->used;
3086
3087#ifdef REL_ALLOC
3088  /* This holds the pointer to the failure stack, when
3089     it is allocated relocatably.  */
3090  fail_stack_elt_t *failure_stack_ptr;
3091#endif
3092
3093  /* Assume that each path through the pattern can be null until
3094     proven otherwise.  We set this false at the bottom of switch
3095     statement, to which we get only if a particular path doesn't
3096     match the empty string.  */
3097  boolean path_can_be_null = true;
3098
3099  /* We aren't doing a `succeed_n' to begin with.  */
3100  boolean succeed_n_p = false;
3101
3102  assert (fastmap != NULL && p != NULL);
3103
3104  INIT_FAIL_STACK ();
3105  bzero (fastmap, 1 << BYTEWIDTH);  /* Assume nothing's valid.  */
3106  bufp->fastmap_accurate = 1;	    /* It will be when we're done.  */
3107  bufp->can_be_null = 0;
3108
3109  while (1)
3110    {
3111      if (p == pend || *p == succeed)
3112	{
3113	  /* We have reached the (effective) end of pattern.  */
3114	  if (!FAIL_STACK_EMPTY ())
3115	    {
3116	      bufp->can_be_null |= path_can_be_null;
3117
3118	      /* Reset for next path.  */
3119	      path_can_be_null = true;
3120
3121	      p = fail_stack.stack[--fail_stack.avail].pointer;
3122
3123	      continue;
3124	    }
3125	  else
3126	    break;
3127	}
3128
3129      /* We should never be about to go beyond the end of the pattern.  */
3130      assert (p < pend);
3131
3132      switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++))
3133	{
3134
3135        /* I guess the idea here is to simply not bother with a fastmap
3136           if a backreference is used, since it's too hard to figure out
3137           the fastmap for the corresponding group.  Setting
3138           `can_be_null' stops `re_search_2' from using the fastmap, so
3139           that is all we do.  */
3140	case duplicate:
3141	  bufp->can_be_null = 1;
3142          goto done;
3143
3144
3145      /* Following are the cases which match a character.  These end
3146         with `break'.  */
3147
3148	case exactn:
3149          fastmap[p[1]] = 1;
3150	  break;
3151
3152
3153        case charset:
3154          for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
3155	    if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))
3156              fastmap[j] = 1;
3157	  break;
3158
3159
3160	case charset_not:
3161	  /* Chars beyond end of map must be allowed.  */
3162	  for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++)
3163            fastmap[j] = 1;
3164
3165	  for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
3166	    if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))))
3167              fastmap[j] = 1;
3168          break;
3169
3170
3171	case wordchar:
3172	  for (j = 0; j < (1 << BYTEWIDTH); j++)
3173	    if (SYNTAX (j) == Sword)
3174	      fastmap[j] = 1;
3175	  break;
3176
3177
3178	case notwordchar:
3179	  for (j = 0; j < (1 << BYTEWIDTH); j++)
3180	    if (SYNTAX (j) != Sword)
3181	      fastmap[j] = 1;
3182	  break;
3183
3184
3185        case anychar:
3186	  {
3187	    int fastmap_newline = fastmap['\n'];
3188
3189	    /* `.' matches anything ...  */
3190	    for (j = 0; j < (1 << BYTEWIDTH); j++)
3191	      fastmap[j] = 1;
3192
3193	    /* ... except perhaps newline.  */
3194	    if (!(bufp->syntax & RE_DOT_NEWLINE))
3195	      fastmap['\n'] = fastmap_newline;
3196
3197	    /* Return if we have already set `can_be_null'; if we have,
3198	       then the fastmap is irrelevant.  Something's wrong here.  */
3199	    else if (bufp->can_be_null)
3200	      goto done;
3201
3202	    /* Otherwise, have to check alternative paths.  */
3203	    break;
3204	  }
3205
3206#ifdef emacs
3207        case syntaxspec:
3208	  k = *p++;
3209	  for (j = 0; j < (1 << BYTEWIDTH); j++)
3210	    if (SYNTAX (j) == (enum syntaxcode) k)
3211	      fastmap[j] = 1;
3212	  break;
3213
3214
3215	case notsyntaxspec:
3216	  k = *p++;
3217	  for (j = 0; j < (1 << BYTEWIDTH); j++)
3218	    if (SYNTAX (j) != (enum syntaxcode) k)
3219	      fastmap[j] = 1;
3220	  break;
3221
3222
3223      /* All cases after this match the empty string.  These end with
3224         `continue'.  */
3225
3226
3227	case before_dot:
3228	case at_dot:
3229	case after_dot:
3230          continue;
3231#endif /* emacs */
3232
3233
3234        case no_op:
3235        case begline:
3236        case endline:
3237	case begbuf:
3238	case endbuf:
3239	case wordbound:
3240	case notwordbound:
3241	case wordbeg:
3242	case wordend:
3243        case push_dummy_failure:
3244          continue;
3245
3246
3247	case jump_n:
3248        case pop_failure_jump:
3249	case maybe_pop_jump:
3250	case jump:
3251        case jump_past_alt:
3252	case dummy_failure_jump:
3253          EXTRACT_NUMBER_AND_INCR (j, p);
3254	  p += j;
3255	  if (j > 0)
3256	    continue;
3257
3258          /* Jump backward implies we just went through the body of a
3259             loop and matched nothing.  Opcode jumped to should be
3260             `on_failure_jump' or `succeed_n'.  Just treat it like an
3261             ordinary jump.  For a * loop, it has pushed its failure
3262             point already; if so, discard that as redundant.  */
3263          if ((re_opcode_t) *p != on_failure_jump
3264	      && (re_opcode_t) *p != succeed_n)
3265	    continue;
3266
3267          p++;
3268          EXTRACT_NUMBER_AND_INCR (j, p);
3269          p += j;
3270
3271          /* If what's on the stack is where we are now, pop it.  */
3272          if (!FAIL_STACK_EMPTY ()
3273	      && fail_stack.stack[fail_stack.avail - 1].pointer == p)
3274            fail_stack.avail--;
3275
3276          continue;
3277
3278
3279        case on_failure_jump:
3280        case on_failure_keep_string_jump:
3281	handle_on_failure_jump:
3282          EXTRACT_NUMBER_AND_INCR (j, p);
3283
3284          /* For some patterns, e.g., `(a?)?', `p+j' here points to the
3285             end of the pattern.  We don't want to push such a point,
3286             since when we restore it above, entering the switch will
3287             increment `p' past the end of the pattern.  We don't need
3288             to push such a point since we obviously won't find any more
3289             fastmap entries beyond `pend'.  Such a pattern can match
3290             the null string, though.  */
3291          if (p + j < pend)
3292            {
3293              if (!PUSH_PATTERN_OP (p + j, fail_stack))
3294		{
3295		  RESET_FAIL_STACK ();
3296		  return -2;
3297		}
3298            }
3299          else
3300            bufp->can_be_null = 1;
3301
3302          if (succeed_n_p)
3303            {
3304              EXTRACT_NUMBER_AND_INCR (k, p);	/* Skip the n.  */
3305              succeed_n_p = false;
3306	    }
3307
3308          continue;
3309
3310
3311	case succeed_n:
3312          /* Get to the number of times to succeed.  */
3313          p += 2;
3314
3315          /* Increment p past the n for when k != 0.  */
3316          EXTRACT_NUMBER_AND_INCR (k, p);
3317          if (k == 0)
3318	    {
3319              p -= 4;
3320  	      succeed_n_p = true;  /* Spaghetti code alert.  */
3321              goto handle_on_failure_jump;
3322            }
3323          continue;
3324
3325
3326	case set_number_at:
3327          p += 4;
3328          continue;
3329
3330
3331	case start_memory:
3332        case stop_memory:
3333	  p += 2;
3334	  continue;
3335
3336
3337	default:
3338          abort (); /* We have listed all the cases.  */
3339        } /* switch *p++ */
3340
3341      /* Getting here means we have found the possible starting
3342         characters for one path of the pattern -- and that the empty
3343         string does not match.  We need not follow this path further.
3344         Instead, look at the next alternative (remembered on the
3345         stack), or quit if no more.  The test at the top of the loop
3346         does these things.  */
3347      path_can_be_null = false;
3348      p = pend;
3349    } /* while p */
3350
3351  /* Set `can_be_null' for the last path (also the first path, if the
3352     pattern is empty).  */
3353  bufp->can_be_null |= path_can_be_null;
3354
3355 done:
3356  RESET_FAIL_STACK ();
3357  return 0;
3358} /* re_compile_fastmap */
3359#ifdef _LIBC
3360weak_alias (__re_compile_fastmap, re_compile_fastmap)
3361#endif
3362
3363/* Set REGS to hold NUM_REGS registers, storing them in STARTS and
3364   ENDS.  Subsequent matches using PATTERN_BUFFER and REGS will use
3365   this memory for recording register information.  STARTS and ENDS
3366   must be allocated using the malloc library routine, and must each
3367   be at least NUM_REGS * sizeof (regoff_t) bytes long.
3368
3369   If NUM_REGS == 0, then subsequent matches should allocate their own
3370   register data.
3371
3372   Unless this function is called, the first search or match using
3373   PATTERN_BUFFER will allocate its own register data, without
3374   freeing the old data.  */
3375
3376void
3377re_set_registers (bufp, regs, num_regs, starts, ends)
3378    struct re_pattern_buffer *bufp;
3379    struct re_registers *regs;
3380    unsigned num_regs;
3381    regoff_t *starts, *ends;
3382{
3383  if (num_regs)
3384    {
3385      bufp->regs_allocated = REGS_REALLOCATE;
3386      regs->num_regs = num_regs;
3387      regs->start = starts;
3388      regs->end = ends;
3389    }
3390  else
3391    {
3392      bufp->regs_allocated = REGS_UNALLOCATED;
3393      regs->num_regs = 0;
3394      regs->start = regs->end = (regoff_t *) 0;
3395    }
3396}
3397#ifdef _LIBC
3398weak_alias (__re_set_registers, re_set_registers)
3399#endif
3400
3401/* Searching routines.  */
3402
3403/* Like re_search_2, below, but only one string is specified, and
3404   doesn't let you say where to stop matching. */
3405
3406int
3407re_search (bufp, string, size, startpos, range, regs)
3408     struct re_pattern_buffer *bufp;
3409     const char *string;
3410     int size, startpos, range;
3411     struct re_registers *regs;
3412{
3413  return re_search_2 (bufp, NULL, 0, string, size, startpos, range,
3414		      regs, size);
3415}
3416#ifdef _LIBC
3417weak_alias (__re_search, re_search)
3418#endif
3419
3420
3421/* Using the compiled pattern in BUFP->buffer, first tries to match the
3422   virtual concatenation of STRING1 and STRING2, starting first at index
3423   STARTPOS, then at STARTPOS + 1, and so on.
3424
3425   STRING1 and STRING2 have length SIZE1 and SIZE2, respectively.
3426
3427   RANGE is how far to scan while trying to match.  RANGE = 0 means try
3428   only at STARTPOS; in general, the last start tried is STARTPOS +
3429   RANGE.
3430
3431   In REGS, return the indices of the virtual concatenation of STRING1
3432   and STRING2 that matched the entire BUFP->buffer and its contained
3433   subexpressions.
3434
3435   Do not consider matching one past the index STOP in the virtual
3436   concatenation of STRING1 and STRING2.
3437
3438   We return either the position in the strings at which the match was
3439   found, -1 if no match, or -2 if error (such as failure
3440   stack overflow).  */
3441
3442int
3443re_search_2 (bufp, string1, size1, string2, size2, startpos, range, regs, stop)
3444     struct re_pattern_buffer *bufp;
3445     const char *string1, *string2;
3446     int size1, size2;
3447     int startpos;
3448     int range;
3449     struct re_registers *regs;
3450     int stop;
3451{
3452  int val;
3453  register char *fastmap = bufp->fastmap;
3454  register RE_TRANSLATE_TYPE translate = bufp->translate;
3455  int total_size = size1 + size2;
3456  int endpos = startpos + range;
3457
3458  /* Check for out-of-range STARTPOS.  */
3459  if (startpos < 0 || startpos > total_size)
3460    return -1;
3461
3462  /* Fix up RANGE if it might eventually take us outside
3463     the virtual concatenation of STRING1 and STRING2.
3464     Make sure we won't move STARTPOS below 0 or above TOTAL_SIZE.  */
3465  if (endpos < 0)
3466    range = 0 - startpos;
3467  else if (endpos > total_size)
3468    range = total_size - startpos;
3469
3470  /* If the search isn't to be a backwards one, don't waste time in a
3471     search for a pattern that must be anchored.  */
3472  if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == begbuf && range > 0)
3473    {
3474      if (startpos > 0)
3475	return -1;
3476      else
3477	range = 1;
3478    }
3479
3480#ifdef emacs
3481  /* In a forward search for something that starts with \=.
3482     don't keep searching past point.  */
3483  if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == at_dot && range > 0)
3484    {
3485      range = PT - startpos;
3486      if (range <= 0)
3487	return -1;
3488    }
3489#endif /* emacs */
3490
3491  /* Update the fastmap now if not correct already.  */
3492  if (fastmap && !bufp->fastmap_accurate)
3493    if (re_compile_fastmap (bufp) == -2)
3494      return -2;
3495
3496  /* Loop through the string, looking for a place to start matching.  */
3497  for (;;)
3498    {
3499      /* If a fastmap is supplied, skip quickly over characters that
3500         cannot be the start of a match.  If the pattern can match the
3501         null string, however, we don't need to skip characters; we want
3502         the first null string.  */
3503      if (fastmap && startpos < total_size && !bufp->can_be_null)
3504	{
3505	  if (range > 0)	/* Searching forwards.  */
3506	    {
3507	      register const char *d;
3508	      register int lim = 0;
3509	      int irange = range;
3510
3511              if (startpos < size1 && startpos + range >= size1)
3512                lim = range - (size1 - startpos);
3513
3514	      d = (startpos >= size1 ? string2 - size1 : string1) + startpos;
3515
3516              /* Written out as an if-else to avoid testing `translate'
3517                 inside the loop.  */
3518	      if (translate)
3519                while (range > lim
3520                       && !fastmap[(unsigned char)
3521				   translate[(unsigned char) *d++]])
3522                  range--;
3523	      else
3524                while (range > lim && !fastmap[(unsigned char) *d++])
3525                  range--;
3526
3527	      startpos += irange - range;
3528	    }
3529	  else				/* Searching backwards.  */
3530	    {
3531	      register char c = (size1 == 0 || startpos >= size1
3532                                 ? string2[startpos - size1]
3533                                 : string1[startpos]);
3534
3535	      if (!fastmap[(unsigned char) TRANSLATE (c)])
3536		goto advance;
3537	    }
3538	}
3539
3540      /* If can't match the null string, and that's all we have left, fail.  */
3541      if (range >= 0 && startpos == total_size && fastmap
3542          && !bufp->can_be_null)
3543	return -1;
3544
3545      val = re_match_2_internal (bufp, string1, size1, string2, size2,
3546				 startpos, regs, stop);
3547#ifndef REGEX_MALLOC
3548# ifdef C_ALLOCA
3549      alloca (0);
3550# endif
3551#endif
3552
3553      if (val >= 0)
3554	return startpos;
3555
3556      if (val == -2)
3557	return -2;
3558
3559    advance:
3560      if (!range)
3561        break;
3562      else if (range > 0)
3563        {
3564          range--;
3565          startpos++;
3566        }
3567      else
3568        {
3569          range++;
3570          startpos--;
3571        }
3572    }
3573  return -1;
3574} /* re_search_2 */
3575#ifdef _LIBC
3576weak_alias (__re_search_2, re_search_2)
3577#endif
3578
3579/* This converts PTR, a pointer into one of the search strings `string1'
3580   and `string2' into an offset from the beginning of that string.  */
3581#define POINTER_TO_OFFSET(ptr)			\
3582  (FIRST_STRING_P (ptr)				\
3583   ? ((regoff_t) ((ptr) - string1))		\
3584   : ((regoff_t) ((ptr) - string2 + size1)))
3585
3586/* Macros for dealing with the split strings in re_match_2.  */
3587
3588#define MATCHING_IN_FIRST_STRING  (dend == end_match_1)
3589
3590/* Call before fetching a character with *d.  This switches over to
3591   string2 if necessary.  */
3592#define PREFETCH()							\
3593  while (d == dend)						    	\
3594    {									\
3595      /* End of string2 => fail.  */					\
3596      if (dend == end_match_2) 						\
3597        goto fail;							\
3598      /* End of string1 => advance to string2.  */ 			\
3599      d = string2;						        \
3600      dend = end_match_2;						\
3601    }
3602
3603
3604/* Test if at very beginning or at very end of the virtual concatenation
3605   of `string1' and `string2'.  If only one string, it's `string2'.  */
3606#define AT_STRINGS_BEG(d) ((d) == (size1 ? string1 : string2) || !size2)
3607#define AT_STRINGS_END(d) ((d) == end2)
3608
3609
3610/* Test if D points to a character which is word-constituent.  We have
3611   two special cases to check for: if past the end of string1, look at
3612   the first character in string2; and if before the beginning of
3613   string2, look at the last character in string1.  */
3614#define WORDCHAR_P(d)							\
3615  (SYNTAX ((d) == end1 ? *string2					\
3616           : (d) == string2 - 1 ? *(end1 - 1) : *(d))			\
3617   == Sword)
3618
3619/* Disabled due to a compiler bug -- see comment at case wordbound */
3620#if 0
3621/* Test if the character before D and the one at D differ with respect
3622   to being word-constituent.  */
3623#define AT_WORD_BOUNDARY(d)						\
3624  (AT_STRINGS_BEG (d) || AT_STRINGS_END (d)				\
3625   || WORDCHAR_P (d - 1) != WORDCHAR_P (d))
3626#endif
3627
3628/* Free everything we malloc.  */
3629#ifdef MATCH_MAY_ALLOCATE
3630# define FREE_VAR(var) if (var) REGEX_FREE (var); var = NULL
3631# define FREE_VARIABLES()						\
3632  do {									\
3633    REGEX_FREE_STACK (fail_stack.stack);				\
3634    FREE_VAR (regstart);						\
3635    FREE_VAR (regend);							\
3636    FREE_VAR (old_regstart);						\
3637    FREE_VAR (old_regend);						\
3638    FREE_VAR (best_regstart);						\
3639    FREE_VAR (best_regend);						\
3640    FREE_VAR (reg_info);						\
3641    FREE_VAR (reg_dummy);						\
3642    FREE_VAR (reg_info_dummy);						\
3643  } while (0)
3644#else
3645# define FREE_VARIABLES() ((void)0) /* Do nothing!  But inhibit gcc warning. */
3646#endif /* not MATCH_MAY_ALLOCATE */
3647
3648/* These values must meet several constraints.  They must not be valid
3649   register values; since we have a limit of 255 registers (because
3650   we use only one byte in the pattern for the register number), we can
3651   use numbers larger than 255.  They must differ by 1, because of
3652   NUM_FAILURE_ITEMS above.  And the value for the lowest register must
3653   be larger than the value for the highest register, so we do not try
3654   to actually save any registers when none are active.  */
3655#define NO_HIGHEST_ACTIVE_REG (1 << BYTEWIDTH)
3656#define NO_LOWEST_ACTIVE_REG (NO_HIGHEST_ACTIVE_REG + 1)
3657
3658/* Matching routines.  */
3659
3660#ifndef emacs   /* Emacs never uses this.  */
3661/* re_match is like re_match_2 except it takes only a single string.  */
3662
3663int
3664re_match (bufp, string, size, pos, regs)
3665     struct re_pattern_buffer *bufp;
3666     const char *string;
3667     int size, pos;
3668     struct re_registers *regs;
3669{
3670  int result = re_match_2_internal (bufp, NULL, 0, string, size,
3671				    pos, regs, size);
3672# ifndef REGEX_MALLOC
3673#  ifdef C_ALLOCA
3674  alloca (0);
3675#  endif
3676# endif
3677  return result;
3678}
3679# ifdef _LIBC
3680weak_alias (__re_match, re_match)
3681# endif
3682#endif /* not emacs */
3683
3684static boolean group_match_null_string_p _RE_ARGS ((unsigned char **p,
3685						    unsigned char *end,
3686						register_info_type *reg_info));
3687static boolean alt_match_null_string_p _RE_ARGS ((unsigned char *p,
3688						  unsigned char *end,
3689						register_info_type *reg_info));
3690static boolean common_op_match_null_string_p _RE_ARGS ((unsigned char **p,
3691							unsigned char *end,
3692						register_info_type *reg_info));
3693static int bcmp_translate _RE_ARGS ((const char *s1, const char *s2,
3694				     int len, char *translate));
3695
3696/* re_match_2 matches the compiled pattern in BUFP against the
3697   the (virtual) concatenation of STRING1 and STRING2 (of length SIZE1
3698   and SIZE2, respectively).  We start matching at POS, and stop
3699   matching at STOP.
3700
3701   If REGS is non-null and the `no_sub' field of BUFP is nonzero, we
3702   store offsets for the substring each group matched in REGS.  See the
3703   documentation for exactly how many groups we fill.
3704
3705   We return -1 if no match, -2 if an internal error (such as the
3706   failure stack overflowing).  Otherwise, we return the length of the
3707   matched substring.  */
3708
3709int
3710re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
3711     struct re_pattern_buffer *bufp;
3712     const char *string1, *string2;
3713     int size1, size2;
3714     int pos;
3715     struct re_registers *regs;
3716     int stop;
3717{
3718  int result = re_match_2_internal (bufp, string1, size1, string2, size2,
3719				    pos, regs, stop);
3720#ifndef REGEX_MALLOC
3721# ifdef C_ALLOCA
3722  alloca (0);
3723# endif
3724#endif
3725  return result;
3726}
3727#ifdef _LIBC
3728weak_alias (__re_match_2, re_match_2)
3729#endif
3730
3731/* This is a separate function so that we can force an alloca cleanup
3732   afterwards.  */
3733static int
3734re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
3735     struct re_pattern_buffer *bufp;
3736     const char *string1, *string2;
3737     int size1, size2;
3738     int pos;
3739     struct re_registers *regs;
3740     int stop;
3741{
3742  /* General temporaries.  */
3743  int mcnt;
3744  unsigned char *p1;
3745
3746  /* Just past the end of the corresponding string.  */
3747  const char *end1, *end2;
3748
3749  /* Pointers into string1 and string2, just past the last characters in
3750     each to consider matching.  */
3751  const char *end_match_1, *end_match_2;
3752
3753  /* Where we are in the data, and the end of the current string.  */
3754  const char *d, *dend;
3755
3756  /* Where we are in the pattern, and the end of the pattern.  */
3757  unsigned char *p = bufp->buffer;
3758  register unsigned char *pend = p + bufp->used;
3759
3760  /* Mark the opcode just after a start_memory, so we can test for an
3761     empty subpattern when we get to the stop_memory.  */
3762  unsigned char *just_past_start_mem = 0;
3763
3764  /* We use this to map every character in the string.  */
3765  RE_TRANSLATE_TYPE translate = bufp->translate;
3766
3767  /* Failure point stack.  Each place that can handle a failure further
3768     down the line pushes a failure point on this stack.  It consists of
3769     restart, regend, and reg_info for all registers corresponding to
3770     the subexpressions we're currently inside, plus the number of such
3771     registers, and, finally, two char *'s.  The first char * is where
3772     to resume scanning the pattern; the second one is where to resume
3773     scanning the strings.  If the latter is zero, the failure point is
3774     a ``dummy''; if a failure happens and the failure point is a dummy,
3775     it gets discarded and the next next one is tried.  */
3776#ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global.  */
3777  fail_stack_type fail_stack;
3778#endif
3779#ifdef DEBUG
3780  static unsigned failure_id = 0;
3781  unsigned nfailure_points_pushed = 0, nfailure_points_popped = 0;
3782#endif
3783
3784#ifdef REL_ALLOC
3785  /* This holds the pointer to the failure stack, when
3786     it is allocated relocatably.  */
3787  fail_stack_elt_t *failure_stack_ptr;
3788#endif
3789
3790  /* We fill all the registers internally, independent of what we
3791     return, for use in backreferences.  The number here includes
3792     an element for register zero.  */
3793  size_t num_regs = bufp->re_nsub + 1;
3794
3795  /* The currently active registers.  */
3796  active_reg_t lowest_active_reg = NO_LOWEST_ACTIVE_REG;
3797  active_reg_t highest_active_reg = NO_HIGHEST_ACTIVE_REG;
3798
3799  /* Information on the contents of registers. These are pointers into
3800     the input strings; they record just what was matched (on this
3801     attempt) by a subexpression part of the pattern, that is, the
3802     regnum-th regstart pointer points to where in the pattern we began
3803     matching and the regnum-th regend points to right after where we
3804     stopped matching the regnum-th subexpression.  (The zeroth register
3805     keeps track of what the whole pattern matches.)  */
3806#ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global.  */
3807  const char **regstart, **regend;
3808#endif
3809
3810  /* If a group that's operated upon by a repetition operator fails to
3811     match anything, then the register for its start will need to be
3812     restored because it will have been set to wherever in the string we
3813     are when we last see its open-group operator.  Similarly for a
3814     register's end.  */
3815#ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global.  */
3816  const char **old_regstart, **old_regend;
3817#endif
3818
3819  /* The is_active field of reg_info helps us keep track of which (possibly
3820     nested) subexpressions we are currently in. The matched_something
3821     field of reg_info[reg_num] helps us tell whether or not we have
3822     matched any of the pattern so far this time through the reg_num-th
3823     subexpression.  These two fields get reset each time through any
3824     loop their register is in.  */
3825#ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global.  */
3826  register_info_type *reg_info;
3827#endif
3828
3829  /* The following record the register info as found in the above
3830     variables when we find a match better than any we've seen before.
3831     This happens as we backtrack through the failure points, which in
3832     turn happens only if we have not yet matched the entire string. */
3833  unsigned best_regs_set = false;
3834#ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global.  */
3835  const char **best_regstart, **best_regend;
3836#endif
3837
3838  /* Logically, this is `best_regend[0]'.  But we don't want to have to
3839     allocate space for that if we're not allocating space for anything
3840     else (see below).  Also, we never need info about register 0 for
3841     any of the other register vectors, and it seems rather a kludge to
3842     treat `best_regend' differently than the rest.  So we keep track of
3843     the end of the best match so far in a separate variable.  We
3844     initialize this to NULL so that when we backtrack the first time
3845     and need to test it, it's not garbage.  */
3846  const char *match_end = NULL;
3847
3848  /* This helps SET_REGS_MATCHED avoid doing redundant work.  */
3849  int set_regs_matched_done = 0;
3850
3851  /* Used when we pop values we don't care about.  */
3852#ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global.  */
3853  const char **reg_dummy;
3854  register_info_type *reg_info_dummy;
3855#endif
3856
3857#ifdef DEBUG
3858  /* Counts the total number of registers pushed.  */
3859  unsigned num_regs_pushed = 0;
3860#endif
3861
3862  DEBUG_PRINT1 ("\n\nEntering re_match_2.\n");
3863
3864  INIT_FAIL_STACK ();
3865
3866#ifdef MATCH_MAY_ALLOCATE
3867  /* Do not bother to initialize all the register variables if there are
3868     no groups in the pattern, as it takes a fair amount of time.  If
3869     there are groups, we include space for register 0 (the whole
3870     pattern), even though we never use it, since it simplifies the
3871     array indexing.  We should fix this.  */
3872  if (bufp->re_nsub)
3873    {
3874      regstart = REGEX_TALLOC (num_regs, const char *);
3875      regend = REGEX_TALLOC (num_regs, const char *);
3876      old_regstart = REGEX_TALLOC (num_regs, const char *);
3877      old_regend = REGEX_TALLOC (num_regs, const char *);
3878      best_regstart = REGEX_TALLOC (num_regs, const char *);
3879      best_regend = REGEX_TALLOC (num_regs, const char *);
3880      reg_info = REGEX_TALLOC (num_regs, register_info_type);
3881      reg_dummy = REGEX_TALLOC (num_regs, const char *);
3882      reg_info_dummy = REGEX_TALLOC (num_regs, register_info_type);
3883
3884      if (!(regstart && regend && old_regstart && old_regend && reg_info
3885            && best_regstart && best_regend && reg_dummy && reg_info_dummy))
3886        {
3887          FREE_VARIABLES ();
3888          return -2;
3889        }
3890    }
3891  else
3892    {
3893      /* We must initialize all our variables to NULL, so that
3894         `FREE_VARIABLES' doesn't try to free them.  */
3895      regstart = regend = old_regstart = old_regend = best_regstart
3896        = best_regend = reg_dummy = NULL;
3897      reg_info = reg_info_dummy = (register_info_type *) NULL;
3898    }
3899#endif /* MATCH_MAY_ALLOCATE */
3900
3901  /* The starting position is bogus.  */
3902  if (pos < 0 || pos > size1 + size2)
3903    {
3904      FREE_VARIABLES ();
3905      return -1;
3906    }
3907
3908  /* Initialize subexpression text positions to -1 to mark ones that no
3909     start_memory/stop_memory has been seen for. Also initialize the
3910     register information struct.  */
3911  for (mcnt = 1; (unsigned) mcnt < num_regs; mcnt++)
3912    {
3913      regstart[mcnt] = regend[mcnt]
3914        = old_regstart[mcnt] = old_regend[mcnt] = REG_UNSET_VALUE;
3915
3916      REG_MATCH_NULL_STRING_P (reg_info[mcnt]) = MATCH_NULL_UNSET_VALUE;
3917      IS_ACTIVE (reg_info[mcnt]) = 0;
3918      MATCHED_SOMETHING (reg_info[mcnt]) = 0;
3919      EVER_MATCHED_SOMETHING (reg_info[mcnt]) = 0;
3920    }
3921
3922  /* We move `string1' into `string2' if the latter's empty -- but not if
3923     `string1' is null.  */
3924  if (size2 == 0 && string1 != NULL)
3925    {
3926      string2 = string1;
3927      size2 = size1;
3928      string1 = 0;
3929      size1 = 0;
3930    }
3931  end1 = string1 + size1;
3932  end2 = string2 + size2;
3933
3934  /* Compute where to stop matching, within the two strings.  */
3935  if (stop <= size1)
3936    {
3937      end_match_1 = string1 + stop;
3938      end_match_2 = string2;
3939    }
3940  else
3941    {
3942      end_match_1 = end1;
3943      end_match_2 = string2 + stop - size1;
3944    }
3945
3946  /* `p' scans through the pattern as `d' scans through the data.
3947     `dend' is the end of the input string that `d' points within.  `d'
3948     is advanced into the following input string whenever necessary, but
3949     this happens before fetching; therefore, at the beginning of the
3950     loop, `d' can be pointing at the end of a string, but it cannot
3951     equal `string2'.  */
3952  if (size1 > 0 && pos <= size1)
3953    {
3954      d = string1 + pos;
3955      dend = end_match_1;
3956    }
3957  else
3958    {
3959      d = string2 + pos - size1;
3960      dend = end_match_2;
3961    }
3962
3963  DEBUG_PRINT1 ("The compiled pattern is:\n");
3964  DEBUG_PRINT_COMPILED_PATTERN (bufp, p, pend);
3965  DEBUG_PRINT1 ("The string to match is: `");
3966  DEBUG_PRINT_DOUBLE_STRING (d, string1, size1, string2, size2);
3967  DEBUG_PRINT1 ("'\n");
3968
3969  /* This loops over pattern commands.  It exits by returning from the
3970     function if the match is complete, or it drops through if the match
3971     fails at this starting point in the input data.  */
3972  for (;;)
3973    {
3974#ifdef _LIBC
3975      DEBUG_PRINT2 ("\n%p: ", p);
3976#else
3977      DEBUG_PRINT2 ("\n0x%x: ", p);
3978#endif
3979
3980      if (p == pend)
3981	{ /* End of pattern means we might have succeeded.  */
3982          DEBUG_PRINT1 ("end of pattern ... ");
3983
3984	  /* If we haven't matched the entire string, and we want the
3985             longest match, try backtracking.  */
3986          if (d != end_match_2)
3987	    {
3988	      /* 1 if this match ends in the same string (string1 or string2)
3989		 as the best previous match.  */
3990	      boolean same_str_p = (FIRST_STRING_P (match_end)
3991				    == MATCHING_IN_FIRST_STRING);
3992	      /* 1 if this match is the best seen so far.  */
3993	      boolean best_match_p;
3994
3995	      /* AIX compiler got confused when this was combined
3996		 with the previous declaration.  */
3997	      if (same_str_p)
3998		best_match_p = d > match_end;
3999	      else
4000		best_match_p = !MATCHING_IN_FIRST_STRING;
4001
4002              DEBUG_PRINT1 ("backtracking.\n");
4003
4004              if (!FAIL_STACK_EMPTY ())
4005                { /* More failure points to try.  */
4006
4007                  /* If exceeds best match so far, save it.  */
4008                  if (!best_regs_set || best_match_p)
4009                    {
4010                      best_regs_set = true;
4011                      match_end = d;
4012
4013                      DEBUG_PRINT1 ("\nSAVING match as best so far.\n");
4014
4015                      for (mcnt = 1; (unsigned) mcnt < num_regs; mcnt++)
4016                        {
4017                          best_regstart[mcnt] = regstart[mcnt];
4018                          best_regend[mcnt] = regend[mcnt];
4019                        }
4020                    }
4021                  goto fail;
4022                }
4023
4024              /* If no failure points, don't restore garbage.  And if
4025                 last match is real best match, don't restore second
4026                 best one. */
4027              else if (best_regs_set && !best_match_p)
4028                {
4029  	        restore_best_regs:
4030                  /* Restore best match.  It may happen that `dend ==
4031                     end_match_1' while the restored d is in string2.
4032                     For example, the pattern `x.*y.*z' against the
4033                     strings `x-' and `y-z-', if the two strings are
4034                     not consecutive in memory.  */
4035                  DEBUG_PRINT1 ("Restoring best registers.\n");
4036
4037                  d = match_end;
4038                  dend = ((d >= string1 && d <= end1)
4039		           ? end_match_1 : end_match_2);
4040
4041		  for (mcnt = 1; (unsigned) mcnt < num_regs; mcnt++)
4042		    {
4043		      regstart[mcnt] = best_regstart[mcnt];
4044		      regend[mcnt] = best_regend[mcnt];
4045		    }
4046                }
4047            } /* d != end_match_2 */
4048
4049	succeed_label:
4050          DEBUG_PRINT1 ("Accepting match.\n");
4051
4052          /* If caller wants register contents data back, do it.  */
4053          if (regs && !bufp->no_sub)
4054	    {
4055              /* Have the register data arrays been allocated?  */
4056              if (bufp->regs_allocated == REGS_UNALLOCATED)
4057                { /* No.  So allocate them with malloc.  We need one
4058                     extra element beyond `num_regs' for the `-1' marker
4059                     GNU code uses.  */
4060                  regs->num_regs = MAX (RE_NREGS, num_regs + 1);
4061                  regs->start = TALLOC (regs->num_regs, regoff_t);
4062                  regs->end = TALLOC (regs->num_regs, regoff_t);
4063                  if (regs->start == NULL || regs->end == NULL)
4064		    {
4065		      FREE_VARIABLES ();
4066		      return -2;
4067		    }
4068                  bufp->regs_allocated = REGS_REALLOCATE;
4069                }
4070              else if (bufp->regs_allocated == REGS_REALLOCATE)
4071                { /* Yes.  If we need more elements than were already
4072                     allocated, reallocate them.  If we need fewer, just
4073                     leave it alone.  */
4074                  if (regs->num_regs < num_regs + 1)
4075                    {
4076                      regs->num_regs = num_regs + 1;
4077                      RETALLOC (regs->start, regs->num_regs, regoff_t);
4078                      RETALLOC (regs->end, regs->num_regs, regoff_t);
4079                      if (regs->start == NULL || regs->end == NULL)
4080			{
4081			  FREE_VARIABLES ();
4082			  return -2;
4083			}
4084                    }
4085                }
4086              else
4087		{
4088		  /* These braces fend off a "empty body in an else-statement"
4089		     warning under GCC when assert expands to nothing.  */
4090		  assert (bufp->regs_allocated == REGS_FIXED);
4091		}
4092
4093              /* Convert the pointer data in `regstart' and `regend' to
4094                 indices.  Register zero has to be set differently,
4095                 since we haven't kept track of any info for it.  */
4096              if (regs->num_regs > 0)
4097                {
4098                  regs->start[0] = pos;
4099                  regs->end[0] = (MATCHING_IN_FIRST_STRING
4100				  ? ((regoff_t) (d - string1))
4101			          : ((regoff_t) (d - string2 + size1)));
4102                }
4103
4104              /* Go through the first `min (num_regs, regs->num_regs)'
4105                 registers, since that is all we initialized.  */
4106	      for (mcnt = 1; (unsigned) mcnt < MIN (num_regs, regs->num_regs);
4107		   mcnt++)
4108		{
4109                  if (REG_UNSET (regstart[mcnt]) || REG_UNSET (regend[mcnt]))
4110                    regs->start[mcnt] = regs->end[mcnt] = -1;
4111                  else
4112                    {
4113		      regs->start[mcnt]
4114			= (regoff_t) POINTER_TO_OFFSET (regstart[mcnt]);
4115                      regs->end[mcnt]
4116			= (regoff_t) POINTER_TO_OFFSET (regend[mcnt]);
4117                    }
4118		}
4119
4120              /* If the regs structure we return has more elements than
4121                 were in the pattern, set the extra elements to -1.  If
4122                 we (re)allocated the registers, this is the case,
4123                 because we always allocate enough to have at least one
4124                 -1 at the end.  */
4125              for (mcnt = num_regs; (unsigned) mcnt < regs->num_regs; mcnt++)
4126                regs->start[mcnt] = regs->end[mcnt] = -1;
4127	    } /* regs && !bufp->no_sub */
4128
4129          DEBUG_PRINT4 ("%u failure points pushed, %u popped (%u remain).\n",
4130                        nfailure_points_pushed, nfailure_points_popped,
4131                        nfailure_points_pushed - nfailure_points_popped);
4132          DEBUG_PRINT2 ("%u registers pushed.\n", num_regs_pushed);
4133
4134          mcnt = d - pos - (MATCHING_IN_FIRST_STRING
4135			    ? string1
4136			    : string2 - size1);
4137
4138          DEBUG_PRINT2 ("Returning %d from re_match_2.\n", mcnt);
4139
4140          FREE_VARIABLES ();
4141          return mcnt;
4142        }
4143
4144      /* Otherwise match next pattern command.  */
4145      switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++))
4146	{
4147        /* Ignore these.  Used to ignore the n of succeed_n's which
4148           currently have n == 0.  */
4149        case no_op:
4150          DEBUG_PRINT1 ("EXECUTING no_op.\n");
4151          break;
4152
4153	case succeed:
4154          DEBUG_PRINT1 ("EXECUTING succeed.\n");
4155	  goto succeed_label;
4156
4157        /* Match the next n pattern characters exactly.  The following
4158           byte in the pattern defines n, and the n bytes after that
4159           are the characters to match.  */
4160	case exactn:
4161	  mcnt = *p++;
4162          DEBUG_PRINT2 ("EXECUTING exactn %d.\n", mcnt);
4163
4164          /* This is written out as an if-else so we don't waste time
4165             testing `translate' inside the loop.  */
4166          if (translate)
4167	    {
4168	      do
4169		{
4170		  PREFETCH ();
4171		  if ((unsigned char) translate[(unsigned char) *d++]
4172		      != (unsigned char) *p++)
4173                    goto fail;
4174		}
4175	      while (--mcnt);
4176	    }
4177	  else
4178	    {
4179	      do
4180		{
4181		  PREFETCH ();
4182		  if (*d++ != (char) *p++) goto fail;
4183		}
4184	      while (--mcnt);
4185	    }
4186	  SET_REGS_MATCHED ();
4187          break;
4188
4189
4190        /* Match any character except possibly a newline or a null.  */
4191	case anychar:
4192          DEBUG_PRINT1 ("EXECUTING anychar.\n");
4193
4194          PREFETCH ();
4195
4196          if ((!(bufp->syntax & RE_DOT_NEWLINE) && TRANSLATE (*d) == '\n')
4197              || (bufp->syntax & RE_DOT_NOT_NULL && TRANSLATE (*d) == '\000'))
4198	    goto fail;
4199
4200          SET_REGS_MATCHED ();
4201          DEBUG_PRINT2 ("  Matched `%d'.\n", *d);
4202          d++;
4203	  break;
4204
4205
4206	case charset:
4207	case charset_not:
4208	  {
4209	    register unsigned char c;
4210	    boolean not = (re_opcode_t) *(p - 1) == charset_not;
4211
4212            DEBUG_PRINT2 ("EXECUTING charset%s.\n", not ? "_not" : "");
4213
4214	    PREFETCH ();
4215	    c = TRANSLATE (*d); /* The character to match.  */
4216
4217            /* Cast to `unsigned' instead of `unsigned char' in case the
4218               bit list is a full 32 bytes long.  */
4219	    if (c < (unsigned) (*p * BYTEWIDTH)
4220		&& p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
4221	      not = !not;
4222
4223	    p += 1 + *p;
4224
4225	    if (!not) goto fail;
4226
4227	    SET_REGS_MATCHED ();
4228            d++;
4229	    break;
4230	  }
4231
4232
4233        /* The beginning of a group is represented by start_memory.
4234           The arguments are the register number in the next byte, and the
4235           number of groups inner to this one in the next.  The text
4236           matched within the group is recorded (in the internal
4237           registers data structure) under the register number.  */
4238        case start_memory:
4239	  DEBUG_PRINT3 ("EXECUTING start_memory %d (%d):\n", *p, p[1]);
4240
4241          /* Find out if this group can match the empty string.  */
4242	  p1 = p;		/* To send to group_match_null_string_p.  */
4243
4244          if (REG_MATCH_NULL_STRING_P (reg_info[*p]) == MATCH_NULL_UNSET_VALUE)
4245            REG_MATCH_NULL_STRING_P (reg_info[*p])
4246              = group_match_null_string_p (&p1, pend, reg_info);
4247
4248          /* Save the position in the string where we were the last time
4249             we were at this open-group operator in case the group is
4250             operated upon by a repetition operator, e.g., with `(a*)*b'
4251             against `ab'; then we want to ignore where we are now in
4252             the string in case this attempt to match fails.  */
4253          old_regstart[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p])
4254                             ? REG_UNSET (regstart[*p]) ? d : regstart[*p]
4255                             : regstart[*p];
4256	  DEBUG_PRINT2 ("  old_regstart: %d\n",
4257			 POINTER_TO_OFFSET (old_regstart[*p]));
4258
4259          regstart[*p] = d;
4260	  DEBUG_PRINT2 ("  regstart: %d\n", POINTER_TO_OFFSET (regstart[*p]));
4261
4262          IS_ACTIVE (reg_info[*p]) = 1;
4263          MATCHED_SOMETHING (reg_info[*p]) = 0;
4264
4265	  /* Clear this whenever we change the register activity status.  */
4266	  set_regs_matched_done = 0;
4267
4268          /* This is the new highest active register.  */
4269          highest_active_reg = *p;
4270
4271          /* If nothing was active before, this is the new lowest active
4272             register.  */
4273          if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
4274            lowest_active_reg = *p;
4275
4276          /* Move past the register number and inner group count.  */
4277          p += 2;
4278	  just_past_start_mem = p;
4279
4280          break;
4281
4282
4283        /* The stop_memory opcode represents the end of a group.  Its
4284           arguments are the same as start_memory's: the register
4285           number, and the number of inner groups.  */
4286	case stop_memory:
4287	  DEBUG_PRINT3 ("EXECUTING stop_memory %d (%d):\n", *p, p[1]);
4288
4289          /* We need to save the string position the last time we were at
4290             this close-group operator in case the group is operated
4291             upon by a repetition operator, e.g., with `((a*)*(b*)*)*'
4292             against `aba'; then we want to ignore where we are now in
4293             the string in case this attempt to match fails.  */
4294          old_regend[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p])
4295                           ? REG_UNSET (regend[*p]) ? d : regend[*p]
4296			   : regend[*p];
4297	  DEBUG_PRINT2 ("      old_regend: %d\n",
4298			 POINTER_TO_OFFSET (old_regend[*p]));
4299
4300          regend[*p] = d;
4301	  DEBUG_PRINT2 ("      regend: %d\n", POINTER_TO_OFFSET (regend[*p]));
4302
4303          /* This register isn't active anymore.  */
4304          IS_ACTIVE (reg_info[*p]) = 0;
4305
4306	  /* Clear this whenever we change the register activity status.  */
4307	  set_regs_matched_done = 0;
4308
4309          /* If this was the only register active, nothing is active
4310             anymore.  */
4311          if (lowest_active_reg == highest_active_reg)
4312            {
4313              lowest_active_reg = NO_LOWEST_ACTIVE_REG;
4314              highest_active_reg = NO_HIGHEST_ACTIVE_REG;
4315            }
4316          else
4317            { /* We must scan for the new highest active register, since
4318                 it isn't necessarily one less than now: consider
4319                 (a(b)c(d(e)f)g).  When group 3 ends, after the f), the
4320                 new highest active register is 1.  */
4321              unsigned char r = *p - 1;
4322              while (r > 0 && !IS_ACTIVE (reg_info[r]))
4323                r--;
4324
4325              /* If we end up at register zero, that means that we saved
4326                 the registers as the result of an `on_failure_jump', not
4327                 a `start_memory', and we jumped to past the innermost
4328                 `stop_memory'.  For example, in ((.)*) we save
4329                 registers 1 and 2 as a result of the *, but when we pop
4330                 back to the second ), we are at the stop_memory 1.
4331                 Thus, nothing is active.  */
4332	      if (r == 0)
4333                {
4334                  lowest_active_reg = NO_LOWEST_ACTIVE_REG;
4335                  highest_active_reg = NO_HIGHEST_ACTIVE_REG;
4336                }
4337              else
4338                highest_active_reg = r;
4339            }
4340
4341          /* If just failed to match something this time around with a
4342             group that's operated on by a repetition operator, try to
4343             force exit from the ``loop'', and restore the register
4344             information for this group that we had before trying this
4345             last match.  */
4346          if ((!MATCHED_SOMETHING (reg_info[*p])
4347               || just_past_start_mem == p - 1)
4348	      && (p + 2) < pend)
4349            {
4350              boolean is_a_jump_n = false;
4351
4352              p1 = p + 2;
4353              mcnt = 0;
4354              switch ((re_opcode_t) *p1++)
4355                {
4356                  case jump_n:
4357		    is_a_jump_n = true;
4358                  case pop_failure_jump:
4359		  case maybe_pop_jump:
4360		  case jump:
4361		  case dummy_failure_jump:
4362                    EXTRACT_NUMBER_AND_INCR (mcnt, p1);
4363		    if (is_a_jump_n)
4364		      p1 += 2;
4365                    break;
4366
4367                  default:
4368                    /* do nothing */ ;
4369                }
4370	      p1 += mcnt;
4371
4372              /* If the next operation is a jump backwards in the pattern
4373	         to an on_failure_jump right before the start_memory
4374                 corresponding to this stop_memory, exit from the loop
4375                 by forcing a failure after pushing on the stack the
4376                 on_failure_jump's jump in the pattern, and d.  */
4377              if (mcnt < 0 && (re_opcode_t) *p1 == on_failure_jump
4378                  && (re_opcode_t) p1[3] == start_memory && p1[4] == *p)
4379		{
4380                  /* If this group ever matched anything, then restore
4381                     what its registers were before trying this last
4382                     failed match, e.g., with `(a*)*b' against `ab' for
4383                     regstart[1], and, e.g., with `((a*)*(b*)*)*'
4384                     against `aba' for regend[3].
4385
4386                     Also restore the registers for inner groups for,
4387                     e.g., `((a*)(b*))*' against `aba' (register 3 would
4388                     otherwise get trashed).  */
4389
4390                  if (EVER_MATCHED_SOMETHING (reg_info[*p]))
4391		    {
4392		      unsigned r;
4393
4394                      EVER_MATCHED_SOMETHING (reg_info[*p]) = 0;
4395
4396		      /* Restore this and inner groups' (if any) registers.  */
4397                      for (r = *p; r < (unsigned) *p + (unsigned) *(p + 1);
4398			   r++)
4399                        {
4400                          regstart[r] = old_regstart[r];
4401
4402                          /* xx why this test?  */
4403                          if (old_regend[r] >= regstart[r])
4404                            regend[r] = old_regend[r];
4405                        }
4406                    }
4407		  p1++;
4408                  EXTRACT_NUMBER_AND_INCR (mcnt, p1);
4409                  PUSH_FAILURE_POINT (p1 + mcnt, d, -2);
4410
4411                  goto fail;
4412                }
4413            }
4414
4415          /* Move past the register number and the inner group count.  */
4416          p += 2;
4417          break;
4418
4419
4420	/* \<digit> has been turned into a `duplicate' command which is
4421           followed by the numeric value of <digit> as the register number.  */
4422        case duplicate:
4423	  {
4424	    register const char *d2, *dend2;
4425	    int regno = *p++;   /* Get which register to match against.  */
4426	    DEBUG_PRINT2 ("EXECUTING duplicate %d.\n", regno);
4427
4428	    /* Can't back reference a group which we've never matched.  */
4429            if (REG_UNSET (regstart[regno]) || REG_UNSET (regend[regno]))
4430              goto fail;
4431
4432            /* Where in input to try to start matching.  */
4433            d2 = regstart[regno];
4434
4435            /* Where to stop matching; if both the place to start and
4436               the place to stop matching are in the same string, then
4437               set to the place to stop, otherwise, for now have to use
4438               the end of the first string.  */
4439
4440            dend2 = ((FIRST_STRING_P (regstart[regno])
4441		      == FIRST_STRING_P (regend[regno]))
4442		     ? regend[regno] : end_match_1);
4443	    for (;;)
4444	      {
4445		/* If necessary, advance to next segment in register
4446                   contents.  */
4447		while (d2 == dend2)
4448		  {
4449		    if (dend2 == end_match_2) break;
4450		    if (dend2 == regend[regno]) break;
4451
4452                    /* End of string1 => advance to string2. */
4453                    d2 = string2;
4454                    dend2 = regend[regno];
4455		  }
4456		/* At end of register contents => success */
4457		if (d2 == dend2) break;
4458
4459		/* If necessary, advance to next segment in data.  */
4460		PREFETCH ();
4461
4462		/* How many characters left in this segment to match.  */
4463		mcnt = dend - d;
4464
4465		/* Want how many consecutive characters we can match in
4466                   one shot, so, if necessary, adjust the count.  */
4467                if (mcnt > dend2 - d2)
4468		  mcnt = dend2 - d2;
4469
4470		/* Compare that many; failure if mismatch, else move
4471                   past them.  */
4472		if (translate
4473                    ? bcmp_translate (d, d2, mcnt, translate)
4474                    : memcmp (d, d2, mcnt))
4475		  goto fail;
4476		d += mcnt, d2 += mcnt;
4477
4478		/* Do this because we've match some characters.  */
4479		SET_REGS_MATCHED ();
4480	      }
4481	  }
4482	  break;
4483
4484
4485        /* begline matches the empty string at the beginning of the string
4486           (unless `not_bol' is set in `bufp'), and, if
4487           `newline_anchor' is set, after newlines.  */
4488	case begline:
4489          DEBUG_PRINT1 ("EXECUTING begline.\n");
4490
4491          if (AT_STRINGS_BEG (d))
4492            {
4493              if (!bufp->not_bol) break;
4494            }
4495          else if (d[-1] == '\n' && bufp->newline_anchor)
4496            {
4497              break;
4498            }
4499          /* In all other cases, we fail.  */
4500          goto fail;
4501
4502
4503        /* endline is the dual of begline.  */
4504	case endline:
4505          DEBUG_PRINT1 ("EXECUTING endline.\n");
4506
4507          if (AT_STRINGS_END (d))
4508            {
4509              if (!bufp->not_eol) break;
4510            }
4511
4512          /* We have to ``prefetch'' the next character.  */
4513          else if ((d == end1 ? *string2 : *d) == '\n'
4514                   && bufp->newline_anchor)
4515            {
4516              break;
4517            }
4518          goto fail;
4519
4520
4521	/* Match at the very beginning of the data.  */
4522        case begbuf:
4523          DEBUG_PRINT1 ("EXECUTING begbuf.\n");
4524          if (AT_STRINGS_BEG (d))
4525            break;
4526          goto fail;
4527
4528
4529	/* Match at the very end of the data.  */
4530        case endbuf:
4531          DEBUG_PRINT1 ("EXECUTING endbuf.\n");
4532	  if (AT_STRINGS_END (d))
4533	    break;
4534          goto fail;
4535
4536
4537        /* on_failure_keep_string_jump is used to optimize `.*\n'.  It
4538           pushes NULL as the value for the string on the stack.  Then
4539           `pop_failure_point' will keep the current value for the
4540           string, instead of restoring it.  To see why, consider
4541           matching `foo\nbar' against `.*\n'.  The .* matches the foo;
4542           then the . fails against the \n.  But the next thing we want
4543           to do is match the \n against the \n; if we restored the
4544           string value, we would be back at the foo.
4545
4546           Because this is used only in specific cases, we don't need to
4547           check all the things that `on_failure_jump' does, to make
4548           sure the right things get saved on the stack.  Hence we don't
4549           share its code.  The only reason to push anything on the
4550           stack at all is that otherwise we would have to change
4551           `anychar's code to do something besides goto fail in this
4552           case; that seems worse than this.  */
4553        case on_failure_keep_string_jump:
4554          DEBUG_PRINT1 ("EXECUTING on_failure_keep_string_jump");
4555
4556          EXTRACT_NUMBER_AND_INCR (mcnt, p);
4557#ifdef _LIBC
4558          DEBUG_PRINT3 (" %d (to %p):\n", mcnt, p + mcnt);
4559#else
4560          DEBUG_PRINT3 (" %d (to 0x%x):\n", mcnt, p + mcnt);
4561#endif
4562
4563          PUSH_FAILURE_POINT (p + mcnt, NULL, -2);
4564          break;
4565
4566
4567	/* Uses of on_failure_jump:
4568
4569           Each alternative starts with an on_failure_jump that points
4570           to the beginning of the next alternative.  Each alternative
4571           except the last ends with a jump that in effect jumps past
4572           the rest of the alternatives.  (They really jump to the
4573           ending jump of the following alternative, because tensioning
4574           these jumps is a hassle.)
4575
4576           Repeats start with an on_failure_jump that points past both
4577           the repetition text and either the following jump or
4578           pop_failure_jump back to this on_failure_jump.  */
4579	case on_failure_jump:
4580        on_failure:
4581          DEBUG_PRINT1 ("EXECUTING on_failure_jump");
4582
4583          EXTRACT_NUMBER_AND_INCR (mcnt, p);
4584#ifdef _LIBC
4585          DEBUG_PRINT3 (" %d (to %p)", mcnt, p + mcnt);
4586#else
4587          DEBUG_PRINT3 (" %d (to 0x%x)", mcnt, p + mcnt);
4588#endif
4589
4590          /* If this on_failure_jump comes right before a group (i.e.,
4591             the original * applied to a group), save the information
4592             for that group and all inner ones, so that if we fail back
4593             to this point, the group's information will be correct.
4594             For example, in \(a*\)*\1, we need the preceding group,
4595             and in \(zz\(a*\)b*\)\2, we need the inner group.  */
4596
4597          /* We can't use `p' to check ahead because we push
4598             a failure point to `p + mcnt' after we do this.  */
4599          p1 = p;
4600
4601          /* We need to skip no_op's before we look for the
4602             start_memory in case this on_failure_jump is happening as
4603             the result of a completed succeed_n, as in \(a\)\{1,3\}b\1
4604             against aba.  */
4605          while (p1 < pend && (re_opcode_t) *p1 == no_op)
4606            p1++;
4607
4608          if (p1 < pend && (re_opcode_t) *p1 == start_memory)
4609            {
4610              /* We have a new highest active register now.  This will
4611                 get reset at the start_memory we are about to get to,
4612                 but we will have saved all the registers relevant to
4613                 this repetition op, as described above.  */
4614              highest_active_reg = *(p1 + 1) + *(p1 + 2);
4615              if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
4616                lowest_active_reg = *(p1 + 1);
4617            }
4618
4619          DEBUG_PRINT1 (":\n");
4620          PUSH_FAILURE_POINT (p + mcnt, d, -2);
4621          break;
4622
4623
4624        /* A smart repeat ends with `maybe_pop_jump'.
4625	   We change it to either `pop_failure_jump' or `jump'.  */
4626        case maybe_pop_jump:
4627          EXTRACT_NUMBER_AND_INCR (mcnt, p);
4628          DEBUG_PRINT2 ("EXECUTING maybe_pop_jump %d.\n", mcnt);
4629          {
4630	    register unsigned char *p2 = p;
4631
4632            /* Compare the beginning of the repeat with what in the
4633               pattern follows its end. If we can establish that there
4634               is nothing that they would both match, i.e., that we
4635               would have to backtrack because of (as in, e.g., `a*a')
4636               then we can change to pop_failure_jump, because we'll
4637               never have to backtrack.
4638
4639               This is not true in the case of alternatives: in
4640               `(a|ab)*' we do need to backtrack to the `ab' alternative
4641               (e.g., if the string was `ab').  But instead of trying to
4642               detect that here, the alternative has put on a dummy
4643               failure point which is what we will end up popping.  */
4644
4645	    /* Skip over open/close-group commands.
4646	       If what follows this loop is a ...+ construct,
4647	       look at what begins its body, since we will have to
4648	       match at least one of that.  */
4649	    while (1)
4650	      {
4651		if (p2 + 2 < pend
4652		    && ((re_opcode_t) *p2 == stop_memory
4653			|| (re_opcode_t) *p2 == start_memory))
4654		  p2 += 3;
4655		else if (p2 + 6 < pend
4656			 && (re_opcode_t) *p2 == dummy_failure_jump)
4657		  p2 += 6;
4658		else
4659		  break;
4660	      }
4661
4662	    p1 = p + mcnt;
4663	    /* p1[0] ... p1[2] are the `on_failure_jump' corresponding
4664	       to the `maybe_finalize_jump' of this case.  Examine what
4665	       follows.  */
4666
4667            /* If we're at the end of the pattern, we can change.  */
4668            if (p2 == pend)
4669	      {
4670		/* Consider what happens when matching ":\(.*\)"
4671		   against ":/".  I don't really understand this code
4672		   yet.  */
4673  	        p[-3] = (unsigned char) pop_failure_jump;
4674                DEBUG_PRINT1
4675                  ("  End of pattern: change to `pop_failure_jump'.\n");
4676              }
4677
4678            else if ((re_opcode_t) *p2 == exactn
4679		     || (bufp->newline_anchor && (re_opcode_t) *p2 == endline))
4680	      {
4681		register unsigned char c
4682                  = *p2 == (unsigned char) endline ? '\n' : p2[2];
4683
4684                if ((re_opcode_t) p1[3] == exactn && p1[5] != c)
4685                  {
4686  		    p[-3] = (unsigned char) pop_failure_jump;
4687                    DEBUG_PRINT3 ("  %c != %c => pop_failure_jump.\n",
4688                                  c, p1[5]);
4689                  }
4690
4691		else if ((re_opcode_t) p1[3] == charset
4692			 || (re_opcode_t) p1[3] == charset_not)
4693		  {
4694		    int not = (re_opcode_t) p1[3] == charset_not;
4695
4696		    if (c < (unsigned char) (p1[4] * BYTEWIDTH)
4697			&& p1[5 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
4698		      not = !not;
4699
4700                    /* `not' is equal to 1 if c would match, which means
4701                        that we can't change to pop_failure_jump.  */
4702		    if (!not)
4703                      {
4704  		        p[-3] = (unsigned char) pop_failure_jump;
4705                        DEBUG_PRINT1 ("  No match => pop_failure_jump.\n");
4706                      }
4707		  }
4708	      }
4709            else if ((re_opcode_t) *p2 == charset)
4710	      {
4711#ifdef DEBUG
4712		register unsigned char c
4713                  = *p2 == (unsigned char) endline ? '\n' : p2[2];
4714#endif
4715
4716#if 0
4717                if ((re_opcode_t) p1[3] == exactn
4718		    && ! ((int) p2[1] * BYTEWIDTH > (int) p1[5]
4719			  && (p2[2 + p1[5] / BYTEWIDTH]
4720			      & (1 << (p1[5] % BYTEWIDTH)))))
4721#else
4722                if ((re_opcode_t) p1[3] == exactn
4723		    && ! ((int) p2[1] * BYTEWIDTH > (int) p1[4]
4724			  && (p2[2 + p1[4] / BYTEWIDTH]
4725			      & (1 << (p1[4] % BYTEWIDTH)))))
4726#endif
4727                  {
4728  		    p[-3] = (unsigned char) pop_failure_jump;
4729                    DEBUG_PRINT3 ("  %c != %c => pop_failure_jump.\n",
4730                                  c, p1[5]);
4731                  }
4732
4733		else if ((re_opcode_t) p1[3] == charset_not)
4734		  {
4735		    int idx;
4736		    /* We win if the charset_not inside the loop
4737		       lists every character listed in the charset after.  */
4738		    for (idx = 0; idx < (int) p2[1]; idx++)
4739		      if (! (p2[2 + idx] == 0
4740			     || (idx < (int) p1[4]
4741				 && ((p2[2 + idx] & ~ p1[5 + idx]) == 0))))
4742			break;
4743
4744		    if (idx == p2[1])
4745                      {
4746  		        p[-3] = (unsigned char) pop_failure_jump;
4747                        DEBUG_PRINT1 ("  No match => pop_failure_jump.\n");
4748                      }
4749		  }
4750		else if ((re_opcode_t) p1[3] == charset)
4751		  {
4752		    int idx;
4753		    /* We win if the charset inside the loop
4754		       has no overlap with the one after the loop.  */
4755		    for (idx = 0;
4756			 idx < (int) p2[1] && idx < (int) p1[4];
4757			 idx++)
4758		      if ((p2[2 + idx] & p1[5 + idx]) != 0)
4759			break;
4760
4761		    if (idx == p2[1] || idx == p1[4])
4762                      {
4763  		        p[-3] = (unsigned char) pop_failure_jump;
4764                        DEBUG_PRINT1 ("  No match => pop_failure_jump.\n");
4765                      }
4766		  }
4767	      }
4768	  }
4769	  p -= 2;		/* Point at relative address again.  */
4770	  if ((re_opcode_t) p[-1] != pop_failure_jump)
4771	    {
4772	      p[-1] = (unsigned char) jump;
4773              DEBUG_PRINT1 ("  Match => jump.\n");
4774	      goto unconditional_jump;
4775	    }
4776        /* Note fall through.  */
4777
4778
4779	/* The end of a simple repeat has a pop_failure_jump back to
4780           its matching on_failure_jump, where the latter will push a
4781           failure point.  The pop_failure_jump takes off failure
4782           points put on by this pop_failure_jump's matching
4783           on_failure_jump; we got through the pattern to here from the
4784           matching on_failure_jump, so didn't fail.  */
4785        case pop_failure_jump:
4786          {
4787            /* We need to pass separate storage for the lowest and
4788               highest registers, even though we don't care about the
4789               actual values.  Otherwise, we will restore only one
4790               register from the stack, since lowest will == highest in
4791               `pop_failure_point'.  */
4792            active_reg_t dummy_low_reg, dummy_high_reg;
4793            unsigned char *pdummy;
4794            const char *sdummy;
4795
4796            DEBUG_PRINT1 ("EXECUTING pop_failure_jump.\n");
4797            POP_FAILURE_POINT (sdummy, pdummy,
4798                               dummy_low_reg, dummy_high_reg,
4799                               reg_dummy, reg_dummy, reg_info_dummy);
4800          }
4801	  /* Note fall through.  */
4802
4803	unconditional_jump:
4804#ifdef _LIBC
4805	  DEBUG_PRINT2 ("\n%p: ", p);
4806#else
4807	  DEBUG_PRINT2 ("\n0x%x: ", p);
4808#endif
4809          /* Note fall through.  */
4810
4811        /* Unconditionally jump (without popping any failure points).  */
4812        case jump:
4813	  EXTRACT_NUMBER_AND_INCR (mcnt, p);	/* Get the amount to jump.  */
4814          DEBUG_PRINT2 ("EXECUTING jump %d ", mcnt);
4815	  p += mcnt;				/* Do the jump.  */
4816#ifdef _LIBC
4817          DEBUG_PRINT2 ("(to %p).\n", p);
4818#else
4819          DEBUG_PRINT2 ("(to 0x%x).\n", p);
4820#endif
4821	  break;
4822
4823
4824        /* We need this opcode so we can detect where alternatives end
4825           in `group_match_null_string_p' et al.  */
4826        case jump_past_alt:
4827          DEBUG_PRINT1 ("EXECUTING jump_past_alt.\n");
4828          goto unconditional_jump;
4829
4830
4831        /* Normally, the on_failure_jump pushes a failure point, which
4832           then gets popped at pop_failure_jump.  We will end up at
4833           pop_failure_jump, also, and with a pattern of, say, `a+', we
4834           are skipping over the on_failure_jump, so we have to push
4835           something meaningless for pop_failure_jump to pop.  */
4836        case dummy_failure_jump:
4837          DEBUG_PRINT1 ("EXECUTING dummy_failure_jump.\n");
4838          /* It doesn't matter what we push for the string here.  What
4839             the code at `fail' tests is the value for the pattern.  */
4840          PUSH_FAILURE_POINT (NULL, NULL, -2);
4841          goto unconditional_jump;
4842
4843
4844        /* At the end of an alternative, we need to push a dummy failure
4845           point in case we are followed by a `pop_failure_jump', because
4846           we don't want the failure point for the alternative to be
4847           popped.  For example, matching `(a|ab)*' against `aab'
4848           requires that we match the `ab' alternative.  */
4849        case push_dummy_failure:
4850          DEBUG_PRINT1 ("EXECUTING push_dummy_failure.\n");
4851          /* See comments just above at `dummy_failure_jump' about the
4852             two zeroes.  */
4853          PUSH_FAILURE_POINT (NULL, NULL, -2);
4854          break;
4855
4856        /* Have to succeed matching what follows at least n times.
4857           After that, handle like `on_failure_jump'.  */
4858        case succeed_n:
4859          EXTRACT_NUMBER (mcnt, p + 2);
4860          DEBUG_PRINT2 ("EXECUTING succeed_n %d.\n", mcnt);
4861
4862          assert (mcnt >= 0);
4863          /* Originally, this is how many times we HAVE to succeed.  */
4864          if (mcnt > 0)
4865            {
4866               mcnt--;
4867	       p += 2;
4868               STORE_NUMBER_AND_INCR (p, mcnt);
4869#ifdef _LIBC
4870               DEBUG_PRINT3 ("  Setting %p to %d.\n", p - 2, mcnt);
4871#else
4872               DEBUG_PRINT3 ("  Setting 0x%x to %d.\n", p - 2, mcnt);
4873#endif
4874            }
4875	  else if (mcnt == 0)
4876            {
4877#ifdef _LIBC
4878              DEBUG_PRINT2 ("  Setting two bytes from %p to no_op.\n", p+2);
4879#else
4880              DEBUG_PRINT2 ("  Setting two bytes from 0x%x to no_op.\n", p+2);
4881#endif
4882	      p[2] = (unsigned char) no_op;
4883              p[3] = (unsigned char) no_op;
4884              goto on_failure;
4885            }
4886          break;
4887
4888        case jump_n:
4889          EXTRACT_NUMBER (mcnt, p + 2);
4890          DEBUG_PRINT2 ("EXECUTING jump_n %d.\n", mcnt);
4891
4892          /* Originally, this is how many times we CAN jump.  */
4893          if (mcnt)
4894            {
4895               mcnt--;
4896               STORE_NUMBER (p + 2, mcnt);
4897#ifdef _LIBC
4898               DEBUG_PRINT3 ("  Setting %p to %d.\n", p + 2, mcnt);
4899#else
4900               DEBUG_PRINT3 ("  Setting 0x%x to %d.\n", p + 2, mcnt);
4901#endif
4902	       goto unconditional_jump;
4903            }
4904          /* If don't have to jump any more, skip over the rest of command.  */
4905	  else
4906	    p += 4;
4907          break;
4908
4909	case set_number_at:
4910	  {
4911            DEBUG_PRINT1 ("EXECUTING set_number_at.\n");
4912
4913            EXTRACT_NUMBER_AND_INCR (mcnt, p);
4914            p1 = p + mcnt;
4915            EXTRACT_NUMBER_AND_INCR (mcnt, p);
4916#ifdef _LIBC
4917            DEBUG_PRINT3 ("  Setting %p to %d.\n", p1, mcnt);
4918#else
4919            DEBUG_PRINT3 ("  Setting 0x%x to %d.\n", p1, mcnt);
4920#endif
4921	    STORE_NUMBER (p1, mcnt);
4922            break;
4923          }
4924
4925#if 0
4926	/* The DEC Alpha C compiler 3.x generates incorrect code for the
4927	   test  WORDCHAR_P (d - 1) != WORDCHAR_P (d)  in the expansion of
4928	   AT_WORD_BOUNDARY, so this code is disabled.  Expanding the
4929	   macro and introducing temporary variables works around the bug.  */
4930
4931	case wordbound:
4932	  DEBUG_PRINT1 ("EXECUTING wordbound.\n");
4933	  if (AT_WORD_BOUNDARY (d))
4934	    break;
4935	  goto fail;
4936
4937	case notwordbound:
4938	  DEBUG_PRINT1 ("EXECUTING notwordbound.\n");
4939	  if (AT_WORD_BOUNDARY (d))
4940	    goto fail;
4941	  break;
4942#else
4943	case wordbound:
4944	{
4945	  boolean prevchar, thischar;
4946
4947	  DEBUG_PRINT1 ("EXECUTING wordbound.\n");
4948	  if (AT_STRINGS_BEG (d) || AT_STRINGS_END (d))
4949	    break;
4950
4951	  prevchar = WORDCHAR_P (d - 1);
4952	  thischar = WORDCHAR_P (d);
4953	  if (prevchar != thischar)
4954	    break;
4955	  goto fail;
4956	}
4957
4958      case notwordbound:
4959	{
4960	  boolean prevchar, thischar;
4961
4962	  DEBUG_PRINT1 ("EXECUTING notwordbound.\n");
4963	  if (AT_STRINGS_BEG (d) || AT_STRINGS_END (d))
4964	    goto fail;
4965
4966	  prevchar = WORDCHAR_P (d - 1);
4967	  thischar = WORDCHAR_P (d);
4968	  if (prevchar != thischar)
4969	    goto fail;
4970	  break;
4971	}
4972#endif
4973
4974	case wordbeg:
4975          DEBUG_PRINT1 ("EXECUTING wordbeg.\n");
4976	  if (WORDCHAR_P (d) && (AT_STRINGS_BEG (d) || !WORDCHAR_P (d - 1)))
4977	    break;
4978          goto fail;
4979
4980	case wordend:
4981          DEBUG_PRINT1 ("EXECUTING wordend.\n");
4982	  if (!AT_STRINGS_BEG (d) && WORDCHAR_P (d - 1)
4983              && (!WORDCHAR_P (d) || AT_STRINGS_END (d)))
4984	    break;
4985          goto fail;
4986
4987#ifdef emacs
4988  	case before_dot:
4989          DEBUG_PRINT1 ("EXECUTING before_dot.\n");
4990 	  if (PTR_CHAR_POS ((unsigned char *) d) >= point)
4991  	    goto fail;
4992  	  break;
4993
4994  	case at_dot:
4995          DEBUG_PRINT1 ("EXECUTING at_dot.\n");
4996 	  if (PTR_CHAR_POS ((unsigned char *) d) != point)
4997  	    goto fail;
4998  	  break;
4999
5000  	case after_dot:
5001          DEBUG_PRINT1 ("EXECUTING after_dot.\n");
5002          if (PTR_CHAR_POS ((unsigned char *) d) <= point)
5003  	    goto fail;
5004  	  break;
5005
5006	case syntaxspec:
5007          DEBUG_PRINT2 ("EXECUTING syntaxspec %d.\n", mcnt);
5008	  mcnt = *p++;
5009	  goto matchsyntax;
5010
5011        case wordchar:
5012          DEBUG_PRINT1 ("EXECUTING Emacs wordchar.\n");
5013	  mcnt = (int) Sword;
5014        matchsyntax:
5015	  PREFETCH ();
5016	  /* Can't use *d++ here; SYNTAX may be an unsafe macro.  */
5017	  d++;
5018	  if (SYNTAX (d[-1]) != (enum syntaxcode) mcnt)
5019	    goto fail;
5020          SET_REGS_MATCHED ();
5021	  break;
5022
5023	case notsyntaxspec:
5024          DEBUG_PRINT2 ("EXECUTING notsyntaxspec %d.\n", mcnt);
5025	  mcnt = *p++;
5026	  goto matchnotsyntax;
5027
5028        case notwordchar:
5029          DEBUG_PRINT1 ("EXECUTING Emacs notwordchar.\n");
5030	  mcnt = (int) Sword;
5031        matchnotsyntax:
5032	  PREFETCH ();
5033	  /* Can't use *d++ here; SYNTAX may be an unsafe macro.  */
5034	  d++;
5035	  if (SYNTAX (d[-1]) == (enum syntaxcode) mcnt)
5036	    goto fail;
5037	  SET_REGS_MATCHED ();
5038          break;
5039
5040#else /* not emacs */
5041	case wordchar:
5042          DEBUG_PRINT1 ("EXECUTING non-Emacs wordchar.\n");
5043	  PREFETCH ();
5044          if (!WORDCHAR_P (d))
5045            goto fail;
5046	  SET_REGS_MATCHED ();
5047          d++;
5048	  break;
5049
5050	case notwordchar:
5051          DEBUG_PRINT1 ("EXECUTING non-Emacs notwordchar.\n");
5052	  PREFETCH ();
5053	  if (WORDCHAR_P (d))
5054            goto fail;
5055          SET_REGS_MATCHED ();
5056          d++;
5057	  break;
5058#endif /* not emacs */
5059
5060        default:
5061          abort ();
5062	}
5063      continue;  /* Successfully executed one pattern command; keep going.  */
5064
5065
5066    /* We goto here if a matching operation fails. */
5067    fail:
5068      if (!FAIL_STACK_EMPTY ())
5069	{ /* A restart point is known.  Restore to that state.  */
5070          DEBUG_PRINT1 ("\nFAIL:\n");
5071          POP_FAILURE_POINT (d, p,
5072                             lowest_active_reg, highest_active_reg,
5073                             regstart, regend, reg_info);
5074
5075          /* If this failure point is a dummy, try the next one.  */
5076          if (!p)
5077	    goto fail;
5078
5079          /* If we failed to the end of the pattern, don't examine *p.  */
5080	  assert (p <= pend);
5081          if (p < pend)
5082            {
5083              boolean is_a_jump_n = false;
5084
5085              /* If failed to a backwards jump that's part of a repetition
5086                 loop, need to pop this failure point and use the next one.  */
5087              switch ((re_opcode_t) *p)
5088                {
5089                case jump_n:
5090                  is_a_jump_n = true;
5091                case maybe_pop_jump:
5092                case pop_failure_jump:
5093                case jump:
5094                  p1 = p + 1;
5095                  EXTRACT_NUMBER_AND_INCR (mcnt, p1);
5096                  p1 += mcnt;
5097
5098                  if ((is_a_jump_n && (re_opcode_t) *p1 == succeed_n)
5099                      || (!is_a_jump_n
5100                          && (re_opcode_t) *p1 == on_failure_jump))
5101                    goto fail;
5102                  break;
5103                default:
5104                  /* do nothing */ ;
5105                }
5106            }
5107
5108          if (d >= string1 && d <= end1)
5109	    dend = end_match_1;
5110        }
5111      else
5112        break;   /* Matching at this starting point really fails.  */
5113    } /* for (;;) */
5114
5115  if (best_regs_set)
5116    goto restore_best_regs;
5117
5118  FREE_VARIABLES ();
5119
5120  return -1;         			/* Failure to match.  */
5121} /* re_match_2 */
5122
5123/* Subroutine definitions for re_match_2.  */
5124
5125
5126/* We are passed P pointing to a register number after a start_memory.
5127
5128   Return true if the pattern up to the corresponding stop_memory can
5129   match the empty string, and false otherwise.
5130
5131   If we find the matching stop_memory, sets P to point to one past its number.
5132   Otherwise, sets P to an undefined byte less than or equal to END.
5133
5134   We don't handle duplicates properly (yet).  */
5135
5136static boolean
5137group_match_null_string_p (p, end, reg_info)
5138    unsigned char **p, *end;
5139    register_info_type *reg_info;
5140{
5141  int mcnt;
5142  /* Point to after the args to the start_memory.  */
5143  unsigned char *p1 = *p + 2;
5144
5145  while (p1 < end)
5146    {
5147      /* Skip over opcodes that can match nothing, and return true or
5148	 false, as appropriate, when we get to one that can't, or to the
5149         matching stop_memory.  */
5150
5151      switch ((re_opcode_t) *p1)
5152        {
5153        /* Could be either a loop or a series of alternatives.  */
5154        case on_failure_jump:
5155          p1++;
5156          EXTRACT_NUMBER_AND_INCR (mcnt, p1);
5157
5158          /* If the next operation is not a jump backwards in the
5159	     pattern.  */
5160
5161	  if (mcnt >= 0)
5162	    {
5163              /* Go through the on_failure_jumps of the alternatives,
5164                 seeing if any of the alternatives cannot match nothing.
5165                 The last alternative starts with only a jump,
5166                 whereas the rest start with on_failure_jump and end
5167                 with a jump, e.g., here is the pattern for `a|b|c':
5168
5169                 /on_failure_jump/0/6/exactn/1/a/jump_past_alt/0/6
5170                 /on_failure_jump/0/6/exactn/1/b/jump_past_alt/0/3
5171                 /exactn/1/c
5172
5173                 So, we have to first go through the first (n-1)
5174                 alternatives and then deal with the last one separately.  */
5175
5176
5177              /* Deal with the first (n-1) alternatives, which start
5178                 with an on_failure_jump (see above) that jumps to right
5179                 past a jump_past_alt.  */
5180
5181              while ((re_opcode_t) p1[mcnt-3] == jump_past_alt)
5182                {
5183                  /* `mcnt' holds how many bytes long the alternative
5184                     is, including the ending `jump_past_alt' and
5185                     its number.  */
5186
5187                  if (!alt_match_null_string_p (p1, p1 + mcnt - 3,
5188				                      reg_info))
5189                    return false;
5190
5191                  /* Move to right after this alternative, including the
5192		     jump_past_alt.  */
5193                  p1 += mcnt;
5194
5195                  /* Break if it's the beginning of an n-th alternative
5196                     that doesn't begin with an on_failure_jump.  */
5197                  if ((re_opcode_t) *p1 != on_failure_jump)
5198                    break;
5199
5200		  /* Still have to check that it's not an n-th
5201		     alternative that starts with an on_failure_jump.  */
5202		  p1++;
5203                  EXTRACT_NUMBER_AND_INCR (mcnt, p1);
5204                  if ((re_opcode_t) p1[mcnt-3] != jump_past_alt)
5205                    {
5206		      /* Get to the beginning of the n-th alternative.  */
5207                      p1 -= 3;
5208                      break;
5209                    }
5210                }
5211
5212              /* Deal with the last alternative: go back and get number
5213                 of the `jump_past_alt' just before it.  `mcnt' contains
5214                 the length of the alternative.  */
5215              EXTRACT_NUMBER (mcnt, p1 - 2);
5216
5217              if (!alt_match_null_string_p (p1, p1 + mcnt, reg_info))
5218                return false;
5219
5220              p1 += mcnt;	/* Get past the n-th alternative.  */
5221            } /* if mcnt > 0 */
5222          break;
5223
5224
5225        case stop_memory:
5226	  assert (p1[1] == **p);
5227          *p = p1 + 2;
5228          return true;
5229
5230
5231        default:
5232          if (!common_op_match_null_string_p (&p1, end, reg_info))
5233            return false;
5234        }
5235    } /* while p1 < end */
5236
5237  return false;
5238} /* group_match_null_string_p */
5239
5240
5241/* Similar to group_match_null_string_p, but doesn't deal with alternatives:
5242   It expects P to be the first byte of a single alternative and END one
5243   byte past the last. The alternative can contain groups.  */
5244
5245static boolean
5246alt_match_null_string_p (p, end, reg_info)
5247    unsigned char *p, *end;
5248    register_info_type *reg_info;
5249{
5250  int mcnt;
5251  unsigned char *p1 = p;
5252
5253  while (p1 < end)
5254    {
5255      /* Skip over opcodes that can match nothing, and break when we get
5256         to one that can't.  */
5257
5258      switch ((re_opcode_t) *p1)
5259        {
5260	/* It's a loop.  */
5261        case on_failure_jump:
5262          p1++;
5263          EXTRACT_NUMBER_AND_INCR (mcnt, p1);
5264          p1 += mcnt;
5265          break;
5266
5267	default:
5268          if (!common_op_match_null_string_p (&p1, end, reg_info))
5269            return false;
5270        }
5271    }  /* while p1 < end */
5272
5273  return true;
5274} /* alt_match_null_string_p */
5275
5276
5277/* Deals with the ops common to group_match_null_string_p and
5278   alt_match_null_string_p.
5279
5280   Sets P to one after the op and its arguments, if any.  */
5281
5282static boolean
5283common_op_match_null_string_p (p, end, reg_info)
5284    unsigned char **p, *end;
5285    register_info_type *reg_info;
5286{
5287  int mcnt;
5288  boolean ret;
5289  int reg_no;
5290  unsigned char *p1 = *p;
5291
5292  switch ((re_opcode_t) *p1++)
5293    {
5294    case no_op:
5295    case begline:
5296    case endline:
5297    case begbuf:
5298    case endbuf:
5299    case wordbeg:
5300    case wordend:
5301    case wordbound:
5302    case notwordbound:
5303#ifdef emacs
5304    case before_dot:
5305    case at_dot:
5306    case after_dot:
5307#endif
5308      break;
5309
5310    case start_memory:
5311      reg_no = *p1;
5312      assert (reg_no > 0 && reg_no <= MAX_REGNUM);
5313      ret = group_match_null_string_p (&p1, end, reg_info);
5314
5315      /* Have to set this here in case we're checking a group which
5316         contains a group and a back reference to it.  */
5317
5318      if (REG_MATCH_NULL_STRING_P (reg_info[reg_no]) == MATCH_NULL_UNSET_VALUE)
5319        REG_MATCH_NULL_STRING_P (reg_info[reg_no]) = ret;
5320
5321      if (!ret)
5322        return false;
5323      break;
5324
5325    /* If this is an optimized succeed_n for zero times, make the jump.  */
5326    case jump:
5327      EXTRACT_NUMBER_AND_INCR (mcnt, p1);
5328      if (mcnt >= 0)
5329        p1 += mcnt;
5330      else
5331        return false;
5332      break;
5333
5334    case succeed_n:
5335      /* Get to the number of times to succeed.  */
5336      p1 += 2;
5337      EXTRACT_NUMBER_AND_INCR (mcnt, p1);
5338
5339      if (mcnt == 0)
5340        {
5341          p1 -= 4;
5342          EXTRACT_NUMBER_AND_INCR (mcnt, p1);
5343          p1 += mcnt;
5344        }
5345      else
5346        return false;
5347      break;
5348
5349    case duplicate:
5350      if (!REG_MATCH_NULL_STRING_P (reg_info[*p1]))
5351        return false;
5352      break;
5353
5354    case set_number_at:
5355      p1 += 4;
5356
5357    default:
5358      /* All other opcodes mean we cannot match the empty string.  */
5359      return false;
5360  }
5361
5362  *p = p1;
5363  return true;
5364} /* common_op_match_null_string_p */
5365
5366
5367/* Return zero if TRANSLATE[S1] and TRANSLATE[S2] are identical for LEN
5368   bytes; nonzero otherwise.  */
5369
5370static int
5371bcmp_translate (s1, s2, len, translate)
5372     const char *s1, *s2;
5373     register int len;
5374     RE_TRANSLATE_TYPE translate;
5375{
5376  register const unsigned char *p1 = (const unsigned char *) s1;
5377  register const unsigned char *p2 = (const unsigned char *) s2;
5378  while (len)
5379    {
5380      if (translate[*p1++] != translate[*p2++]) return 1;
5381      len--;
5382    }
5383  return 0;
5384}
5385
5386/* Entry points for GNU code.  */
5387
5388/* re_compile_pattern is the GNU regular expression compiler: it
5389   compiles PATTERN (of length SIZE) and puts the result in BUFP.
5390   Returns 0 if the pattern was valid, otherwise an error string.
5391
5392   Assumes the `allocated' (and perhaps `buffer') and `translate' fields
5393   are set in BUFP on entry.
5394
5395   We call regex_compile to do the actual compilation.  */
5396
5397const char *
5398re_compile_pattern (pattern, length, bufp)
5399     const char *pattern;
5400     size_t length;
5401     struct re_pattern_buffer *bufp;
5402{
5403  reg_errcode_t ret;
5404
5405  /* GNU code is written to assume at least RE_NREGS registers will be set
5406     (and at least one extra will be -1).  */
5407  bufp->regs_allocated = REGS_UNALLOCATED;
5408
5409  /* And GNU code determines whether or not to get register information
5410     by passing null for the REGS argument to re_match, etc., not by
5411     setting no_sub.  */
5412  bufp->no_sub = 0;
5413
5414  /* Match anchors at newline.  */
5415  bufp->newline_anchor = 1;
5416
5417  ret = regex_compile (pattern, length, re_syntax_options, bufp);
5418
5419  if (!ret)
5420    return NULL;
5421  return gettext (re_error_msgid[(int) ret]);
5422}
5423#ifdef _LIBC
5424weak_alias (__re_compile_pattern, re_compile_pattern)
5425#endif
5426
5427/* Entry points compatible with 4.2 BSD regex library.  We don't define
5428   them unless specifically requested.  */
5429
5430#if defined _REGEX_RE_COMP || defined _LIBC
5431
5432/* BSD has one and only one pattern buffer.  */
5433static struct re_pattern_buffer re_comp_buf;
5434
5435char *
5436#ifdef _LIBC
5437/* Make these definitions weak in libc, so POSIX programs can redefine
5438   these names if they don't use our functions, and still use
5439   regcomp/regexec below without link errors.  */
5440weak_function
5441#endif
5442re_comp (s)
5443    const char *s;
5444{
5445  reg_errcode_t ret;
5446
5447  if (!s)
5448    {
5449      if (!re_comp_buf.buffer)
5450	return gettext ("No previous regular expression");
5451      return 0;
5452    }
5453
5454  if (!re_comp_buf.buffer)
5455    {
5456      re_comp_buf.buffer = (unsigned char *) malloc (200);
5457      if (re_comp_buf.buffer == NULL)
5458        return (char *) gettext (re_error_msgid[(int) REG_ESPACE]);
5459      re_comp_buf.allocated = 200;
5460
5461      re_comp_buf.fastmap = (char *) malloc (1 << BYTEWIDTH);
5462      if (re_comp_buf.fastmap == NULL)
5463	return (char *) gettext (re_error_msgid[(int) REG_ESPACE]);
5464    }
5465
5466  /* Since `re_exec' always passes NULL for the `regs' argument, we
5467     don't need to initialize the pattern buffer fields which affect it.  */
5468
5469  /* Match anchors at newlines.  */
5470  re_comp_buf.newline_anchor = 1;
5471
5472  ret = regex_compile (s, strlen (s), re_syntax_options, &re_comp_buf);
5473
5474  if (!ret)
5475    return NULL;
5476
5477  /* Yes, we're discarding `const' here if !HAVE_LIBINTL.  */
5478  return (char *) gettext (re_error_msgid[(int) ret]);
5479}
5480
5481
5482int
5483#ifdef _LIBC
5484weak_function
5485#endif
5486re_exec (s)
5487    const char *s;
5488{
5489  const int len = strlen (s);
5490  return
5491    0 <= re_search (&re_comp_buf, s, len, 0, len, (struct re_registers *) 0);
5492}
5493
5494#endif /* _REGEX_RE_COMP */
5495
5496/* POSIX.2 functions.  Don't define these for Emacs.  */
5497
5498#ifndef emacs
5499
5500/* regcomp takes a regular expression as a string and compiles it.
5501
5502   PREG is a regex_t *.  We do not expect any fields to be initialized,
5503   since POSIX says we shouldn't.  Thus, we set
5504
5505     `buffer' to the compiled pattern;
5506     `used' to the length of the compiled pattern;
5507     `syntax' to RE_SYNTAX_POSIX_EXTENDED if the
5508       REG_EXTENDED bit in CFLAGS is set; otherwise, to
5509       RE_SYNTAX_POSIX_BASIC;
5510     `newline_anchor' to REG_NEWLINE being set in CFLAGS;
5511     `fastmap' to an allocated space for the fastmap;
5512     `fastmap_accurate' to 1;
5513     `re_nsub' to the number of subexpressions in PATTERN.
5514
5515   PATTERN is the address of the pattern string.
5516
5517   CFLAGS is a series of bits which affect compilation.
5518
5519     If REG_EXTENDED is set, we use POSIX extended syntax; otherwise, we
5520     use POSIX basic syntax.
5521
5522     If REG_NEWLINE is set, then . and [^...] don't match newline.
5523     Also, regexec will try a match beginning after every newline.
5524
5525     If REG_ICASE is set, then we considers upper- and lowercase
5526     versions of letters to be equivalent when matching.
5527
5528     If REG_NOSUB is set, then when PREG is passed to regexec, that
5529     routine will report only success or failure, and nothing about the
5530     registers.
5531
5532   It returns 0 if it succeeds, nonzero if it doesn't.  (See gnu-regex.h for
5533   the return codes and their meanings.)  */
5534
5535int
5536regcomp (preg, pattern, cflags)
5537    regex_t *preg;
5538    const char *pattern;
5539    int cflags;
5540{
5541  reg_errcode_t ret;
5542  reg_syntax_t syntax
5543    = (cflags & REG_EXTENDED) ?
5544      RE_SYNTAX_POSIX_EXTENDED : RE_SYNTAX_POSIX_BASIC;
5545
5546  /* regex_compile will allocate the space for the compiled pattern.  */
5547  preg->buffer = 0;
5548  preg->allocated = 0;
5549  preg->used = 0;
5550
5551  /* Try to allocate space for the fastmap.  */
5552  preg->fastmap = (char *) malloc (1 << BYTEWIDTH);
5553
5554  if (cflags & REG_ICASE)
5555    {
5556      unsigned i;
5557
5558      preg->translate
5559	= (RE_TRANSLATE_TYPE) malloc (CHAR_SET_SIZE
5560				      * sizeof (*(RE_TRANSLATE_TYPE)0));
5561      if (preg->translate == NULL)
5562        return (int) REG_ESPACE;
5563
5564      /* Map uppercase characters to corresponding lowercase ones.  */
5565      for (i = 0; i < CHAR_SET_SIZE; i++)
5566        preg->translate[i] = TOLOWER (i);
5567    }
5568  else
5569    preg->translate = NULL;
5570
5571  /* If REG_NEWLINE is set, newlines are treated differently.  */
5572  if (cflags & REG_NEWLINE)
5573    { /* REG_NEWLINE implies neither . nor [^...] match newline.  */
5574      syntax &= ~RE_DOT_NEWLINE;
5575      syntax |= RE_HAT_LISTS_NOT_NEWLINE;
5576      /* It also changes the matching behavior.  */
5577      preg->newline_anchor = 1;
5578    }
5579  else
5580    preg->newline_anchor = 0;
5581
5582  preg->no_sub = !!(cflags & REG_NOSUB);
5583
5584  /* POSIX says a null character in the pattern terminates it, so we
5585     can use strlen here in compiling the pattern.  */
5586  ret = regex_compile (pattern, strlen (pattern), syntax, preg);
5587
5588  /* POSIX doesn't distinguish between an unmatched open-group and an
5589     unmatched close-group: both are REG_EPAREN.  */
5590  if (ret == REG_ERPAREN) ret = REG_EPAREN;
5591
5592  if (ret == REG_NOERROR && preg->fastmap)
5593    {
5594      /* Compute the fastmap now, since regexec cannot modify the pattern
5595        buffer.  */
5596      if (re_compile_fastmap (preg) == -2)
5597       {
5598         /* Some error occured while computing the fastmap, just forget
5599            about it.  */
5600         free (preg->fastmap);
5601         preg->fastmap = NULL;
5602       }
5603    }
5604
5605  return (int) ret;
5606}
5607#ifdef _LIBC
5608weak_alias (__regcomp, regcomp)
5609#endif
5610
5611
5612/* regexec searches for a given pattern, specified by PREG, in the
5613   string STRING.
5614
5615   If NMATCH is zero or REG_NOSUB was set in the cflags argument to
5616   `regcomp', we ignore PMATCH.  Otherwise, we assume PMATCH has at
5617   least NMATCH elements, and we set them to the offsets of the
5618   corresponding matched substrings.
5619
5620   EFLAGS specifies `execution flags' which affect matching: if
5621   REG_NOTBOL is set, then ^ does not match at the beginning of the
5622   string; if REG_NOTEOL is set, then $ does not match at the end.
5623
5624   We return 0 if we find a match and REG_NOMATCH if not.  */
5625
5626int
5627regexec (preg, string, nmatch, pmatch, eflags)
5628    const regex_t *preg;
5629    const char *string;
5630    size_t nmatch;
5631    regmatch_t pmatch[];
5632    int eflags;
5633{
5634  int ret;
5635  struct re_registers regs;
5636  regex_t private_preg;
5637  int len = strlen (string);
5638  boolean want_reg_info = !preg->no_sub && nmatch > 0;
5639
5640  private_preg = *preg;
5641
5642  private_preg.not_bol = !!(eflags & REG_NOTBOL);
5643  private_preg.not_eol = !!(eflags & REG_NOTEOL);
5644
5645  /* The user has told us exactly how many registers to return
5646     information about, via `nmatch'.  We have to pass that on to the
5647     matching routines.  */
5648  private_preg.regs_allocated = REGS_FIXED;
5649
5650  if (want_reg_info)
5651    {
5652      regs.num_regs = nmatch;
5653      regs.start = TALLOC (nmatch, regoff_t);
5654      regs.end = TALLOC (nmatch, regoff_t);
5655      if (regs.start == NULL || regs.end == NULL)
5656        return (int) REG_NOMATCH;
5657    }
5658
5659  /* Perform the searching operation.  */
5660  ret = re_search (&private_preg, string, len,
5661                   /* start: */ 0, /* range: */ len,
5662                   want_reg_info ? &regs : (struct re_registers *) 0);
5663
5664  /* Copy the register information to the POSIX structure.  */
5665  if (want_reg_info)
5666    {
5667      if (ret >= 0)
5668        {
5669          unsigned r;
5670
5671          for (r = 0; r < nmatch; r++)
5672            {
5673              pmatch[r].rm_so = regs.start[r];
5674              pmatch[r].rm_eo = regs.end[r];
5675            }
5676        }
5677
5678      /* If we needed the temporary register info, free the space now.  */
5679      free (regs.start);
5680      free (regs.end);
5681    }
5682
5683  /* We want zero return to mean success, unlike `re_search'.  */
5684  return ret >= 0 ? (int) REG_NOERROR : (int) REG_NOMATCH;
5685}
5686#ifdef _LIBC
5687weak_alias (__regexec, regexec)
5688#endif
5689
5690
5691/* Returns a message corresponding to an error code, ERRCODE, returned
5692   from either regcomp or regexec.   We don't use PREG here.  */
5693
5694size_t
5695regerror (errcode, preg, errbuf, errbuf_size)
5696    int errcode;
5697    const regex_t *preg;
5698    char *errbuf;
5699    size_t errbuf_size;
5700{
5701  const char *msg;
5702  size_t msg_size;
5703  (void)preg;
5704
5705  if (errcode < 0
5706      || errcode >= (int) (sizeof (re_error_msgid)
5707			   / sizeof (re_error_msgid[0])))
5708    /* Only error codes returned by the rest of the code should be passed
5709       to this routine.  If we are given anything else, or if other regex
5710       code generates an invalid error code, then the program has a bug.
5711       Dump core so we can fix it.  */
5712    abort ();
5713
5714  msg = gettext (re_error_msgid[errcode]);
5715
5716  msg_size = strlen (msg) + 1; /* Includes the null.  */
5717
5718  if (errbuf_size != 0)
5719    {
5720      if (msg_size > errbuf_size)
5721        {
5722#if defined HAVE_MEMPCPY || defined _LIBC
5723	  *((char *) mempcpy (errbuf, msg, errbuf_size - 1)) = '\0';
5724#else
5725          memcpy (errbuf, msg, errbuf_size - 1);
5726          errbuf[errbuf_size - 1] = 0;
5727#endif
5728        }
5729      else
5730        memcpy (errbuf, msg, msg_size);
5731    }
5732
5733  return msg_size;
5734}
5735#ifdef _LIBC
5736weak_alias (__regerror, regerror)
5737#endif
5738
5739
5740/* Free dynamically allocated space used by PREG.  */
5741
5742void
5743regfree (preg)
5744    regex_t *preg;
5745{
5746  if (preg->buffer != NULL)
5747    free (preg->buffer);
5748  preg->buffer = NULL;
5749
5750  preg->allocated = 0;
5751  preg->used = 0;
5752
5753  if (preg->fastmap != NULL)
5754    free (preg->fastmap);
5755  preg->fastmap = NULL;
5756  preg->fastmap_accurate = 0;
5757
5758  if (preg->translate != NULL)
5759    free (preg->translate);
5760  preg->translate = NULL;
5761}
5762#ifdef _LIBC
5763weak_alias (__regfree, regfree)
5764#endif
5765
5766#endif /* not emacs  */
5767