1/* Extended regular expression matching and search library,
2   version 0.12.
3   (Implements POSIX draft P1003.2/D11.2, except for some of the
4   internationalization features.)
5   Copyright (C) 1993, 94, 95, 96, 97, 98 Free Software Foundation, Inc.
6
7   NOTE: The canonical source of this file is maintained with the
8   GNU C Library.  Bugs can be reported to bug-glibc@prep.ai.mit.edu.
9
10   This program is free software; you can redistribute it and/or modify it
11   under the terms of the GNU General Public License as published by the
12   Free Software Foundation; either version 2, or (at your option) any
13   later version.
14
15   This program is distributed in the hope that it will be useful,
16   but WITHOUT ANY WARRANTY; without even the implied warranty of
17   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18   GNU General Public License for more details.
19
20   You should have received a copy of the GNU General Public License
21   along with this program; if not, write to the Free Software Foundation,
22   Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
23
24/* AIX requires this to be the first thing in the file. */
25#if defined _AIX && !defined REGEX_MALLOC
26  #pragma alloca
27#endif
28
29#undef	_GNU_SOURCE
30#define _GNU_SOURCE
31
32#ifdef HAVE_CONFIG_H
33# include <config.h>
34#endif
35
36#ifndef PARAMS
37# if defined __GNUC__ || (defined __STDC__ && __STDC__)
38#  define PARAMS(args) args
39# else
40#  define PARAMS(args) ()
41# endif  /* GCC.  */
42#endif  /* Not PARAMS.  */
43
44#if defined STDC_HEADERS && !defined emacs
45# include <stddef.h>
46#else
47/* We need this for `gnu-regex.h', and perhaps for the Emacs include files.  */
48# include <sys/types.h>
49#endif
50
51/* For platform which support the ISO C amendement 1 functionality we
52   support user defined character classes.  */
53#if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H)
54 /* Solaris 2.5 has a bug: <wchar.h> must be included before <wctype.h>.  */
55# include <wchar.h>
56# include <wctype.h>
57#endif
58
59/* This is for other GNU distributions with internationalized messages.  */
60/* CYGNUS LOCAL: ../intl will handle this for us */
61#ifdef ENABLE_NLS
62# include <libintl.h>
63#else
64# define gettext(msgid) (msgid)
65#endif
66
67#ifndef gettext_noop
68/* This define is so xgettext can find the internationalizable
69   strings.  */
70# define gettext_noop(String) String
71#endif
72
73/* The `emacs' switch turns on certain matching commands
74   that make sense only in Emacs. */
75#ifdef emacs
76
77# include "lisp.h"
78# include "buffer.h"
79# include "syntax.h"
80
81#else  /* not emacs */
82
83# include "auto-host.h"
84
85# if !defined(const) && !defined(HAVE_CONST)
86#  define const
87# endif
88
89# if !defined(volatile) && !defined(HAVE_VOLATILE)
90#  define volatile
91# endif
92
93/* If we are not linking with Emacs proper,
94   we can't use the relocating allocator
95   even if config.h says that we can.  */
96# undef REL_ALLOC
97
98# if defined STDC_HEADERS || defined _LIBC
99#  include <stdlib.h>
100# else
101char *malloc ();
102char *realloc ();
103# endif
104
105/* When used in Emacs's lib-src, we need to get bzero and bcopy somehow.
106   If nothing else has been done, use the method below.  */
107# ifdef INHIBIT_STRING_HEADER
108#  if !(defined HAVE_BZERO && defined HAVE_BCOPY)
109#   if !defined bzero && !defined bcopy
110#    undef INHIBIT_STRING_HEADER
111#   endif
112#  endif
113# endif
114
115/* This is the normal way of making sure we have a bcopy and a bzero.
116   This is used in most programs--a few other programs avoid this
117   by defining INHIBIT_STRING_HEADER.  */
118# ifndef INHIBIT_STRING_HEADER
119#  if defined HAVE_STRING_H || defined STDC_HEADERS || defined _LIBC
120#   include <string.h>
121#   ifndef bzero
122#    ifndef _LIBC
123#     define bzero(s, n)	(memset (s, '\0', n), (s))
124#    else
125#     define bzero(s, n)	__bzero (s, n)
126#    endif
127#   endif
128#  else
129#   include <strings.h>
130#   ifndef memcmp
131#    define memcmp(s1, s2, n)	bcmp (s1, s2, n)
132#   endif
133#   ifndef memcpy
134#    define memcpy(d, s, n)	(bcopy (s, d, n), (d))
135#   endif
136#  endif
137# endif
138
139/* Define the syntax stuff for \<, \>, etc.  */
140
141/* This must be nonzero for the wordchar and notwordchar pattern
142   commands in re_match_2.  */
143# ifndef Sword
144#  define Sword 1
145# endif
146
147# ifdef SWITCH_ENUM_BUG
148#  define SWITCH_ENUM_CAST(x) ((int)(x))
149# else
150#  define SWITCH_ENUM_CAST(x) (x)
151# endif
152
153/* How many characters in the character set.  */
154# define CHAR_SET_SIZE 256
155
156/* GDB LOCAL: define _REGEX_RE_COMP to get BSD style re_comp and re_exec */
157#ifndef _REGEX_RE_COMP
158#define _REGEX_RE_COMP
159#endif
160
161# ifdef SYNTAX_TABLE
162
163extern char *re_syntax_table;
164
165# else /* not SYNTAX_TABLE */
166
167static char re_syntax_table[CHAR_SET_SIZE];
168
169static void
170init_syntax_once ()
171{
172   register int c;
173   static int done = 0;
174
175   if (done)
176     return;
177
178   bzero (re_syntax_table, sizeof re_syntax_table);
179
180   for (c = 'a'; c <= 'z'; c++)
181     re_syntax_table[c] = Sword;
182
183   for (c = 'A'; c <= 'Z'; c++)
184     re_syntax_table[c] = Sword;
185
186   for (c = '0'; c <= '9'; c++)
187     re_syntax_table[c] = Sword;
188
189   re_syntax_table['_'] = Sword;
190
191   done = 1;
192}
193
194# endif /* not SYNTAX_TABLE */
195
196# define SYNTAX(c) re_syntax_table[c]
197
198#endif /* not emacs */
199
200/* Get the interface, including the syntax bits.  */
201/* CYGNUS LOCAL: call it gnu-regex.h, not regex.h, to avoid name conflicts */
202#include "gnu-regex.h"
203
204/* isalpha etc. are used for the character classes.  */
205#include <ctype.h>
206
207/* Jim Meyering writes:
208
209   "... Some ctype macros are valid only for character codes that
210   isascii says are ASCII (SGI's IRIX-4.0.5 is one such system --when
211   using /bin/cc or gcc but without giving an ansi option).  So, all
212   ctype uses should be through macros like ISPRINT...  If
213   STDC_HEADERS is defined, then autoconf has verified that the ctype
214   macros don't need to be guarded with references to isascii. ...
215   Defining isascii to 1 should let any compiler worth its salt
216   eliminate the && through constant folding."
217   Solaris defines some of these symbols so we must undefine them first.  */
218
219#undef ISASCII
220#if defined STDC_HEADERS || (!defined isascii && !defined HAVE_ISASCII)
221# define ISASCII(c) 1
222#else
223# define ISASCII(c) isascii(c)
224#endif
225
226#ifdef isblank
227# define ISBLANK(c) (ISASCII (c) && isblank (c))
228#else
229# define ISBLANK(c) ((c) == ' ' || (c) == '\t')
230#endif
231#ifdef isgraph
232# define ISGRAPH(c) (ISASCII (c) && isgraph (c))
233#else
234# define ISGRAPH(c) (ISASCII (c) && isprint (c) && !isspace (c))
235#endif
236
237#undef ISPRINT
238#define ISPRINT(c) (ISASCII (c) && isprint (c))
239#define ISDIGIT(c) (ISASCII (c) && isdigit (c))
240#define ISALNUM(c) (ISASCII (c) && isalnum (c))
241#define ISALPHA(c) (ISASCII (c) && isalpha (c))
242#define ISCNTRL(c) (ISASCII (c) && iscntrl (c))
243#define ISLOWER(c) (ISASCII (c) && islower (c))
244#define ISPUNCT(c) (ISASCII (c) && ispunct (c))
245#define ISSPACE(c) (ISASCII (c) && isspace (c))
246#define ISUPPER(c) (ISASCII (c) && isupper (c))
247#define ISXDIGIT(c) (ISASCII (c) && isxdigit (c))
248
249#ifndef NULL
250# define NULL (void *)0
251#endif
252
253/* We remove any previous definition of `SIGN_EXTEND_CHAR',
254   since ours (we hope) works properly with all combinations of
255   machines, compilers, `char' and `unsigned char' argument types.
256   (Per Bothner suggested the basic approach.)  */
257#undef SIGN_EXTEND_CHAR
258#if __STDC__
259# define SIGN_EXTEND_CHAR(c) ((signed char) (c))
260#else  /* not __STDC__ */
261/* As in Harbison and Steele.  */
262# define SIGN_EXTEND_CHAR(c) ((((unsigned char) (c)) ^ 128) - 128)
263#endif
264
265/* Should we use malloc or alloca?  If REGEX_MALLOC is not defined, we
266   use `alloca' instead of `malloc'.  This is because using malloc in
267   re_search* or re_match* could cause memory leaks when C-g is used in
268   Emacs; also, malloc is slower and causes storage fragmentation.  On
269   the other hand, malloc is more portable, and easier to debug.
270
271   Because we sometimes use alloca, some routines have to be macros,
272   not functions -- `alloca'-allocated space disappears at the end of the
273   function it is called in.  */
274
275#ifdef REGEX_MALLOC
276
277# define REGEX_ALLOCATE malloc
278# define REGEX_REALLOCATE(source, osize, nsize) realloc (source, nsize)
279# define REGEX_FREE free
280
281#else /* not REGEX_MALLOC  */
282
283/* Emacs already defines alloca, sometimes.  */
284# ifndef alloca
285
286/* Make alloca work the best possible way.  */
287#  ifdef __GNUC__
288#   define alloca __builtin_alloca
289#  else /* not __GNUC__ */
290#   if HAVE_ALLOCA_H
291#    include <alloca.h>
292#   endif /* HAVE_ALLOCA_H */
293#  endif /* not __GNUC__ */
294
295# endif /* not alloca */
296
297# define REGEX_ALLOCATE alloca
298
299/* Assumes a `char *destination' variable.  */
300# define REGEX_REALLOCATE(source, osize, nsize)				\
301  (destination = (char *) alloca (nsize),				\
302   memcpy (destination, source, osize))
303
304/* No need to do anything to free, after alloca.  */
305# define REGEX_FREE(arg) ((void)0) /* Do nothing!  But inhibit gcc warning.  */
306
307#endif /* not REGEX_MALLOC */
308
309/* Define how to allocate the failure stack.  */
310
311#if defined REL_ALLOC && defined REGEX_MALLOC
312
313# define REGEX_ALLOCATE_STACK(size)				\
314  r_alloc (&failure_stack_ptr, (size))
315# define REGEX_REALLOCATE_STACK(source, osize, nsize)		\
316  r_re_alloc (&failure_stack_ptr, (nsize))
317# define REGEX_FREE_STACK(ptr)					\
318  r_alloc_free (&failure_stack_ptr)
319
320#else /* not using relocating allocator */
321
322# ifdef REGEX_MALLOC
323
324#  define REGEX_ALLOCATE_STACK malloc
325#  define REGEX_REALLOCATE_STACK(source, osize, nsize) realloc (source, nsize)
326#  define REGEX_FREE_STACK free
327
328# else /* not REGEX_MALLOC */
329
330#  define REGEX_ALLOCATE_STACK alloca
331
332#  define REGEX_REALLOCATE_STACK(source, osize, nsize)			\
333   REGEX_REALLOCATE (source, osize, nsize)
334/* No need to explicitly free anything.  */
335#  define REGEX_FREE_STACK(arg)
336
337# endif /* not REGEX_MALLOC */
338#endif /* not using relocating allocator */
339
340
341/* True if `size1' is non-NULL and PTR is pointing anywhere inside
342   `string1' or just past its end.  This works if PTR is NULL, which is
343   a good thing.  */
344#define FIRST_STRING_P(ptr) 					\
345  (size1 && string1 <= (ptr) && (ptr) <= string1 + size1)
346
347/* (Re)Allocate N items of type T using malloc, or fail.  */
348#define TALLOC(n, t) ((t *) malloc ((n) * sizeof (t)))
349#define RETALLOC(addr, n, t) ((addr) = (t *) realloc (addr, (n) * sizeof (t)))
350#define RETALLOC_IF(addr, n, t) \
351  if (addr) RETALLOC((addr), (n), t); else (addr) = TALLOC ((n), t)
352#define REGEX_TALLOC(n, t) ((t *) REGEX_ALLOCATE ((n) * sizeof (t)))
353
354#define BYTEWIDTH 8 /* In bits.  */
355
356#define STREQ(s1, s2) ((strcmp (s1, s2) == 0))
357
358#undef MAX
359#undef MIN
360#define MAX(a, b) ((a) > (b) ? (a) : (b))
361#define MIN(a, b) ((a) < (b) ? (a) : (b))
362
363typedef char boolean;
364#define false 0
365#define true 1
366
367static int re_match_2_internal PARAMS ((struct re_pattern_buffer *bufp,
368					const char *string1, int size1,
369					const char *string2, int size2,
370					int pos,
371					struct re_registers *regs,
372					int stop));
373
374/* These are the command codes that appear in compiled regular
375   expressions.  Some opcodes are followed by argument bytes.  A
376   command code can specify any interpretation whatsoever for its
377   arguments.  Zero bytes may appear in the compiled regular expression.  */
378
379typedef enum
380{
381  no_op = 0,
382
383  /* Succeed right away--no more backtracking.  */
384  succeed,
385
386        /* Followed by one byte giving n, then by n literal bytes.  */
387  exactn,
388
389        /* Matches any (more or less) character.  */
390  anychar,
391
392        /* Matches any one char belonging to specified set.  First
393           following byte is number of bitmap bytes.  Then come bytes
394           for a bitmap saying which chars are in.  Bits in each byte
395           are ordered low-bit-first.  A character is in the set if its
396           bit is 1.  A character too large to have a bit in the map is
397           automatically not in the set.  */
398  charset,
399
400        /* Same parameters as charset, but match any character that is
401           not one of those specified.  */
402  charset_not,
403
404        /* Start remembering the text that is matched, for storing in a
405           register.  Followed by one byte with the register number, in
406           the range 0 to one less than the pattern buffer's re_nsub
407           field.  Then followed by one byte with the number of groups
408           inner to this one.  (This last has to be part of the
409           start_memory only because we need it in the on_failure_jump
410           of re_match_2.)  */
411  start_memory,
412
413        /* Stop remembering the text that is matched and store it in a
414           memory register.  Followed by one byte with the register
415           number, in the range 0 to one less than `re_nsub' in the
416           pattern buffer, and one byte with the number of inner groups,
417           just like `start_memory'.  (We need the number of inner
418           groups here because we don't have any easy way of finding the
419           corresponding start_memory when we're at a stop_memory.)  */
420  stop_memory,
421
422        /* Match a duplicate of something remembered. Followed by one
423           byte containing the register number.  */
424  duplicate,
425
426        /* Fail unless at beginning of line.  */
427  begline,
428
429        /* Fail unless at end of line.  */
430  endline,
431
432        /* Succeeds if at beginning of buffer (if emacs) or at beginning
433           of string to be matched (if not).  */
434  begbuf,
435
436        /* Analogously, for end of buffer/string.  */
437  endbuf,
438
439        /* Followed by two byte relative address to which to jump.  */
440  jump,
441
442	/* Same as jump, but marks the end of an alternative.  */
443  jump_past_alt,
444
445        /* Followed by two-byte relative address of place to resume at
446           in case of failure.  */
447  on_failure_jump,
448
449        /* Like on_failure_jump, but pushes a placeholder instead of the
450           current string position when executed.  */
451  on_failure_keep_string_jump,
452
453        /* Throw away latest failure point and then jump to following
454           two-byte relative address.  */
455  pop_failure_jump,
456
457        /* Change to pop_failure_jump if know won't have to backtrack to
458           match; otherwise change to jump.  This is used to jump
459           back to the beginning of a repeat.  If what follows this jump
460           clearly won't match what the repeat does, such that we can be
461           sure that there is no use backtracking out of repetitions
462           already matched, then we change it to a pop_failure_jump.
463           Followed by two-byte address.  */
464  maybe_pop_jump,
465
466        /* Jump to following two-byte address, and push a dummy failure
467           point. This failure point will be thrown away if an attempt
468           is made to use it for a failure.  A `+' construct makes this
469           before the first repeat.  Also used as an intermediary kind
470           of jump when compiling an alternative.  */
471  dummy_failure_jump,
472
473	/* Push a dummy failure point and continue.  Used at the end of
474	   alternatives.  */
475  push_dummy_failure,
476
477        /* Followed by two-byte relative address and two-byte number n.
478           After matching N times, jump to the address upon failure.  */
479  succeed_n,
480
481        /* Followed by two-byte relative address, and two-byte number n.
482           Jump to the address N times, then fail.  */
483  jump_n,
484
485        /* Set the following two-byte relative address to the
486           subsequent two-byte number.  The address *includes* the two
487           bytes of number.  */
488  set_number_at,
489
490  wordchar,	/* Matches any word-constituent character.  */
491  notwordchar,	/* Matches any char that is not a word-constituent.  */
492
493  wordbeg,	/* Succeeds if at word beginning.  */
494  wordend,	/* Succeeds if at word end.  */
495
496  wordbound,	/* Succeeds if at a word boundary.  */
497  notwordbound	/* Succeeds if not at a word boundary.  */
498
499#ifdef emacs
500  ,before_dot,	/* Succeeds if before point.  */
501  at_dot,	/* Succeeds if at point.  */
502  after_dot,	/* Succeeds if after point.  */
503
504	/* Matches any character whose syntax is specified.  Followed by
505           a byte which contains a syntax code, e.g., Sword.  */
506  syntaxspec,
507
508	/* Matches any character whose syntax is not that specified.  */
509  notsyntaxspec
510#endif /* emacs */
511} re_opcode_t;
512
513/* Common operations on the compiled pattern.  */
514
515/* Store NUMBER in two contiguous bytes starting at DESTINATION.  */
516
517#define STORE_NUMBER(destination, number)				\
518  do {									\
519    (destination)[0] = (number) & 0377;					\
520    (destination)[1] = (number) >> 8;					\
521  } while (0)
522
523/* Same as STORE_NUMBER, except increment DESTINATION to
524   the byte after where the number is stored.  Therefore, DESTINATION
525   must be an lvalue.  */
526
527#define STORE_NUMBER_AND_INCR(destination, number)			\
528  do {									\
529    STORE_NUMBER (destination, number);					\
530    (destination) += 2;							\
531  } while (0)
532
533/* Put into DESTINATION a number stored in two contiguous bytes starting
534   at SOURCE.  */
535
536#define EXTRACT_NUMBER(destination, source)				\
537  do {									\
538    (destination) = *(source) & 0377;					\
539    (destination) += SIGN_EXTEND_CHAR (*((source) + 1)) << 8;		\
540  } while (0)
541
542#ifdef DEBUG
543static void extract_number _RE_ARGS ((int *dest, unsigned char *source));
544static void
545extract_number (dest, source)
546    int *dest;
547    unsigned char *source;
548{
549  int temp = SIGN_EXTEND_CHAR (*(source + 1));
550  *dest = *source & 0377;
551  *dest += temp << 8;
552}
553
554# ifndef EXTRACT_MACROS /* To debug the macros.  */
555#  undef EXTRACT_NUMBER
556#  define EXTRACT_NUMBER(dest, src) extract_number (&dest, src)
557# endif /* not EXTRACT_MACROS */
558
559#endif /* DEBUG */
560
561/* Same as EXTRACT_NUMBER, except increment SOURCE to after the number.
562   SOURCE must be an lvalue.  */
563
564#define EXTRACT_NUMBER_AND_INCR(destination, source)			\
565  do {									\
566    EXTRACT_NUMBER (destination, source);				\
567    (source) += 2; 							\
568  } while (0)
569
570#ifdef DEBUG
571static void extract_number_and_incr _RE_ARGS ((int *destination,
572					       unsigned char **source));
573static void
574extract_number_and_incr (destination, source)
575    int *destination;
576    unsigned char **source;
577{
578  extract_number (destination, *source);
579  *source += 2;
580}
581
582# ifndef EXTRACT_MACROS
583#  undef EXTRACT_NUMBER_AND_INCR
584#  define EXTRACT_NUMBER_AND_INCR(dest, src) \
585  extract_number_and_incr (&dest, &src)
586# endif /* not EXTRACT_MACROS */
587
588#endif /* DEBUG */
589
590/* If DEBUG is defined, Regex prints many voluminous messages about what
591   it is doing (if the variable `debug' is nonzero).  If linked with the
592   main program in `iregex.c', you can enter patterns and strings
593   interactively.  And if linked with the main program in `main.c' and
594   the other test files, you can run the already-written tests.  */
595
596#ifdef DEBUG
597
598/* We use standard I/O for debugging.  */
599# include <stdio.h>
600
601/* It is useful to test things that ``must'' be true when debugging.  */
602# include <assert.h>
603
604static int debug = 0;
605
606# define DEBUG_STATEMENT(e) e
607# define DEBUG_PRINT1(x) if (debug) printf (x)
608# define DEBUG_PRINT2(x1, x2) if (debug) printf (x1, x2)
609# define DEBUG_PRINT3(x1, x2, x3) if (debug) printf (x1, x2, x3)
610# define DEBUG_PRINT4(x1, x2, x3, x4) if (debug) printf (x1, x2, x3, x4)
611# define DEBUG_PRINT_COMPILED_PATTERN(p, s, e) 				\
612  if (debug) print_partial_compiled_pattern (s, e)
613# define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)			\
614  if (debug) print_double_string (w, s1, sz1, s2, sz2)
615
616
617/* Print the fastmap in human-readable form.  */
618
619void
620print_fastmap (fastmap)
621    char *fastmap;
622{
623  unsigned was_a_range = 0;
624  unsigned i = 0;
625
626  while (i < (1 << BYTEWIDTH))
627    {
628      if (fastmap[i++])
629	{
630	  was_a_range = 0;
631          putchar (i - 1);
632          while (i < (1 << BYTEWIDTH)  &&  fastmap[i])
633            {
634              was_a_range = 1;
635              i++;
636            }
637	  if (was_a_range)
638            {
639              printf ("-");
640              putchar (i - 1);
641            }
642        }
643    }
644  putchar ('\n');
645}
646
647
648/* Print a compiled pattern string in human-readable form, starting at
649   the START pointer into it and ending just before the pointer END.  */
650
651void
652print_partial_compiled_pattern (start, end)
653    unsigned char *start;
654    unsigned char *end;
655{
656  int mcnt, mcnt2;
657  unsigned char *p1;
658  unsigned char *p = start;
659  unsigned char *pend = end;
660
661  if (start == NULL)
662    {
663      printf ("(null)\n");
664      return;
665    }
666
667  /* Loop over pattern commands.  */
668  while (p < pend)
669    {
670      printf ("%d:\t", p - start);
671
672      switch ((re_opcode_t) *p++)
673	{
674        case no_op:
675          printf ("/no_op");
676          break;
677
678	case exactn:
679	  mcnt = *p++;
680          printf ("/exactn/%d", mcnt);
681          do
682	    {
683              putchar ('/');
684	      putchar (*p++);
685            }
686          while (--mcnt);
687          break;
688
689	case start_memory:
690          mcnt = *p++;
691          printf ("/start_memory/%d/%d", mcnt, *p++);
692          break;
693
694	case stop_memory:
695          mcnt = *p++;
696	  printf ("/stop_memory/%d/%d", mcnt, *p++);
697          break;
698
699	case duplicate:
700	  printf ("/duplicate/%d", *p++);
701	  break;
702
703	case anychar:
704	  printf ("/anychar");
705	  break;
706
707	case charset:
708        case charset_not:
709          {
710            register int c, last = -100;
711	    register int in_range = 0;
712
713	    printf ("/charset [%s",
714	            (re_opcode_t) *(p - 1) == charset_not ? "^" : "");
715
716            assert (p + *p < pend);
717
718            for (c = 0; c < 256; c++)
719	      if (c / 8 < *p
720		  && (p[1 + (c/8)] & (1 << (c % 8))))
721		{
722		  /* Are we starting a range?  */
723		  if (last + 1 == c && ! in_range)
724		    {
725		      putchar ('-');
726		      in_range = 1;
727		    }
728		  /* Have we broken a range?  */
729		  else if (last + 1 != c && in_range)
730              {
731		      putchar (last);
732		      in_range = 0;
733		    }
734
735		  if (! in_range)
736		    putchar (c);
737
738		  last = c;
739              }
740
741	    if (in_range)
742	      putchar (last);
743
744	    putchar (']');
745
746	    p += 1 + *p;
747	  }
748	  break;
749
750	case begline:
751	  printf ("/begline");
752          break;
753
754	case endline:
755          printf ("/endline");
756          break;
757
758	case on_failure_jump:
759          extract_number_and_incr (&mcnt, &p);
760  	  printf ("/on_failure_jump to %d", p + mcnt - start);
761          break;
762
763	case on_failure_keep_string_jump:
764          extract_number_and_incr (&mcnt, &p);
765  	  printf ("/on_failure_keep_string_jump to %d", p + mcnt - start);
766          break;
767
768	case dummy_failure_jump:
769          extract_number_and_incr (&mcnt, &p);
770  	  printf ("/dummy_failure_jump to %d", p + mcnt - start);
771          break;
772
773	case push_dummy_failure:
774          printf ("/push_dummy_failure");
775          break;
776
777        case maybe_pop_jump:
778          extract_number_and_incr (&mcnt, &p);
779  	  printf ("/maybe_pop_jump to %d", p + mcnt - start);
780	  break;
781
782        case pop_failure_jump:
783	  extract_number_and_incr (&mcnt, &p);
784  	  printf ("/pop_failure_jump to %d", p + mcnt - start);
785	  break;
786
787        case jump_past_alt:
788	  extract_number_and_incr (&mcnt, &p);
789  	  printf ("/jump_past_alt to %d", p + mcnt - start);
790	  break;
791
792        case jump:
793	  extract_number_and_incr (&mcnt, &p);
794  	  printf ("/jump to %d", p + mcnt - start);
795	  break;
796
797        case succeed_n:
798          extract_number_and_incr (&mcnt, &p);
799	  p1 = p + mcnt;
800          extract_number_and_incr (&mcnt2, &p);
801	  printf ("/succeed_n to %d, %d times", p1 - start, mcnt2);
802          break;
803
804        case jump_n:
805          extract_number_and_incr (&mcnt, &p);
806	  p1 = p + mcnt;
807          extract_number_and_incr (&mcnt2, &p);
808	  printf ("/jump_n to %d, %d times", p1 - start, mcnt2);
809          break;
810
811        case set_number_at:
812          extract_number_and_incr (&mcnt, &p);
813	  p1 = p + mcnt;
814          extract_number_and_incr (&mcnt2, &p);
815	  printf ("/set_number_at location %d to %d", p1 - start, mcnt2);
816          break;
817
818        case wordbound:
819	  printf ("/wordbound");
820	  break;
821
822	case notwordbound:
823	  printf ("/notwordbound");
824          break;
825
826	case wordbeg:
827	  printf ("/wordbeg");
828	  break;
829
830	case wordend:
831	  printf ("/wordend");
832
833# ifdef emacs
834	case before_dot:
835	  printf ("/before_dot");
836          break;
837
838	case at_dot:
839	  printf ("/at_dot");
840          break;
841
842	case after_dot:
843	  printf ("/after_dot");
844          break;
845
846	case syntaxspec:
847          printf ("/syntaxspec");
848	  mcnt = *p++;
849	  printf ("/%d", mcnt);
850          break;
851
852	case notsyntaxspec:
853          printf ("/notsyntaxspec");
854	  mcnt = *p++;
855	  printf ("/%d", mcnt);
856	  break;
857# endif /* emacs */
858
859	case wordchar:
860	  printf ("/wordchar");
861          break;
862
863	case notwordchar:
864	  printf ("/notwordchar");
865          break;
866
867	case begbuf:
868	  printf ("/begbuf");
869          break;
870
871	case endbuf:
872	  printf ("/endbuf");
873          break;
874
875        default:
876          printf ("?%d", *(p-1));
877	}
878
879      putchar ('\n');
880    }
881
882  printf ("%d:\tend of pattern.\n", p - start);
883}
884
885
886void
887print_compiled_pattern (bufp)
888    struct re_pattern_buffer *bufp;
889{
890  unsigned char *buffer = bufp->buffer;
891
892  print_partial_compiled_pattern (buffer, buffer + bufp->used);
893  printf ("%ld bytes used/%ld bytes allocated.\n",
894	  bufp->used, bufp->allocated);
895
896  if (bufp->fastmap_accurate && bufp->fastmap)
897    {
898      printf ("fastmap: ");
899      print_fastmap (bufp->fastmap);
900    }
901
902  printf ("re_nsub: %d\t", bufp->re_nsub);
903  printf ("regs_alloc: %d\t", bufp->regs_allocated);
904  printf ("can_be_null: %d\t", bufp->can_be_null);
905  printf ("newline_anchor: %d\n", bufp->newline_anchor);
906  printf ("no_sub: %d\t", bufp->no_sub);
907  printf ("not_bol: %d\t", bufp->not_bol);
908  printf ("not_eol: %d\t", bufp->not_eol);
909  printf ("syntax: %lx\n", bufp->syntax);
910  /* Perhaps we should print the translate table?  */
911}
912
913
914void
915print_double_string (where, string1, size1, string2, size2)
916    const char *where;
917    const char *string1;
918    const char *string2;
919    int size1;
920    int size2;
921{
922  int this_char;
923
924  if (where == NULL)
925    printf ("(null)");
926  else
927    {
928      if (FIRST_STRING_P (where))
929        {
930          for (this_char = where - string1; this_char < size1; this_char++)
931            putchar (string1[this_char]);
932
933          where = string2;
934        }
935
936      for (this_char = where - string2; this_char < size2; this_char++)
937        putchar (string2[this_char]);
938    }
939}
940
941void
942printchar (c)
943     int c;
944{
945  putc (c, stderr);
946}
947
948#else /* not DEBUG */
949
950# undef assert
951# define assert(e)
952
953# define DEBUG_STATEMENT(e)
954# define DEBUG_PRINT1(x)
955# define DEBUG_PRINT2(x1, x2)
956# define DEBUG_PRINT3(x1, x2, x3)
957# define DEBUG_PRINT4(x1, x2, x3, x4)
958# define DEBUG_PRINT_COMPILED_PATTERN(p, s, e)
959# define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)
960
961#endif /* not DEBUG */
962
963/* Set by `re_set_syntax' to the current regexp syntax to recognize.  Can
964   also be assigned to arbitrarily: each pattern buffer stores its own
965   syntax, so it can be changed between regex compilations.  */
966/* This has no initializer because initialized variables in Emacs
967   become read-only after dumping.  */
968reg_syntax_t re_syntax_options;
969
970
971/* Specify the precise syntax of regexps for compilation.  This provides
972   for compatibility for various utilities which historically have
973   different, incompatible syntaxes.
974
975   The argument SYNTAX is a bit mask comprised of the various bits
976   defined in gnu-regex.h.  We return the old syntax.  */
977
978reg_syntax_t
979re_set_syntax (syntax)
980    reg_syntax_t syntax;
981{
982  reg_syntax_t ret = re_syntax_options;
983
984  re_syntax_options = syntax;
985#ifdef DEBUG
986  if (syntax & RE_DEBUG)
987    debug = 1;
988  else if (debug) /* was on but now is not */
989    debug = 0;
990#endif /* DEBUG */
991  return ret;
992}
993#ifdef _LIBC
994weak_alias (__re_set_syntax, re_set_syntax)
995#endif
996
997/* This table gives an error message for each of the error codes listed
998   in gnu-regex.h.  Obviously the order here has to be same as there.
999   POSIX doesn't require that we do anything for REG_NOERROR,
1000   but why not be nice?  */
1001
1002static const char *re_error_msgid[] =
1003  {
1004    gettext_noop ("Success"),	/* REG_NOERROR */
1005    gettext_noop ("No match"),	/* REG_NOMATCH */
1006    gettext_noop ("Invalid regular expression"), /* REG_BADPAT */
1007    gettext_noop ("Invalid collation character"), /* REG_ECOLLATE */
1008    gettext_noop ("Invalid character class name"), /* REG_ECTYPE */
1009    gettext_noop ("Trailing backslash"), /* REG_EESCAPE */
1010    gettext_noop ("Invalid back reference"), /* REG_ESUBREG */
1011    gettext_noop ("Unmatched [ or [^"),	/* REG_EBRACK */
1012    gettext_noop ("Unmatched ( or \\("), /* REG_EPAREN */
1013    gettext_noop ("Unmatched \\{"), /* REG_EBRACE */
1014    gettext_noop ("Invalid content of \\{\\}"), /* REG_BADBR */
1015    gettext_noop ("Invalid range end"),	/* REG_ERANGE */
1016    gettext_noop ("Memory exhausted"), /* REG_ESPACE */
1017    gettext_noop ("Invalid preceding regular expression"), /* REG_BADRPT */
1018    gettext_noop ("Premature end of regular expression"), /* REG_EEND */
1019    gettext_noop ("Regular expression too big"), /* REG_ESIZE */
1020    gettext_noop ("Unmatched ) or \\)"), /* REG_ERPAREN */
1021  };
1022
1023/* Avoiding alloca during matching, to placate r_alloc.  */
1024
1025/* Define MATCH_MAY_ALLOCATE unless we need to make sure that the
1026   searching and matching functions should not call alloca.  On some
1027   systems, alloca is implemented in terms of malloc, and if we're
1028   using the relocating allocator routines, then malloc could cause a
1029   relocation, which might (if the strings being searched are in the
1030   ralloc heap) shift the data out from underneath the regexp
1031   routines.
1032
1033   Here's another reason to avoid allocation: Emacs
1034   processes input from X in a signal handler; processing X input may
1035   call malloc; if input arrives while a matching routine is calling
1036   malloc, then we're scrod.  But Emacs can't just block input while
1037   calling matching routines; then we don't notice interrupts when
1038   they come in.  So, Emacs blocks input around all regexp calls
1039   except the matching calls, which it leaves unprotected, in the
1040   faith that they will not malloc.  */
1041
1042/* Normally, this is fine.  */
1043#define MATCH_MAY_ALLOCATE
1044
1045/* When using GNU C, we are not REALLY using the C alloca, no matter
1046   what config.h may say.  So don't take precautions for it.  */
1047#ifdef __GNUC__
1048# undef C_ALLOCA
1049#endif
1050
1051/* The match routines may not allocate if (1) they would do it with malloc
1052   and (2) it's not safe for them to use malloc.
1053   Note that if REL_ALLOC is defined, matching would not use malloc for the
1054   failure stack, but we would still use it for the register vectors;
1055   so REL_ALLOC should not affect this.  */
1056#if (defined C_ALLOCA || defined REGEX_MALLOC) && defined emacs
1057# undef MATCH_MAY_ALLOCATE
1058#endif
1059
1060
1061/* Failure stack declarations and macros; both re_compile_fastmap and
1062   re_match_2 use a failure stack.  These have to be macros because of
1063   REGEX_ALLOCATE_STACK.  */
1064
1065
1066/* Number of failure points for which to initially allocate space
1067   when matching.  If this number is exceeded, we allocate more
1068   space, so it is not a hard limit.  */
1069#ifndef INIT_FAILURE_ALLOC
1070# define INIT_FAILURE_ALLOC 5
1071#endif
1072
1073/* Roughly the maximum number of failure points on the stack.  Would be
1074   exactly that if always used MAX_FAILURE_ITEMS items each time we failed.
1075   This is a variable only so users of regex can assign to it; we never
1076   change it ourselves.  */
1077
1078#ifdef INT_IS_16BIT
1079
1080# if defined MATCH_MAY_ALLOCATE
1081/* 4400 was enough to cause a crash on Alpha OSF/1,
1082   whose default stack limit is 2mb.  */
1083long int re_max_failures = 4000;
1084# else
1085long int re_max_failures = 2000;
1086# endif
1087
1088union fail_stack_elt
1089{
1090  unsigned char *pointer;
1091  long int integer;
1092};
1093
1094typedef union fail_stack_elt fail_stack_elt_t;
1095
1096typedef struct
1097{
1098  fail_stack_elt_t *stack;
1099  unsigned long int size;
1100  unsigned long int avail;		/* Offset of next open position.  */
1101} fail_stack_type;
1102
1103#else /* not INT_IS_16BIT */
1104
1105# if defined MATCH_MAY_ALLOCATE
1106/* 4400 was enough to cause a crash on Alpha OSF/1,
1107   whose default stack limit is 2mb.  */
1108int re_max_failures = 20000;
1109# else
1110int re_max_failures = 2000;
1111# endif
1112
1113union fail_stack_elt
1114{
1115  unsigned char *pointer;
1116  int integer;
1117};
1118
1119typedef union fail_stack_elt fail_stack_elt_t;
1120
1121typedef struct
1122{
1123  fail_stack_elt_t *stack;
1124  unsigned size;
1125  unsigned avail;			/* Offset of next open position.  */
1126} fail_stack_type;
1127
1128#endif /* INT_IS_16BIT */
1129
1130#define FAIL_STACK_EMPTY()     (fail_stack.avail == 0)
1131#define FAIL_STACK_PTR_EMPTY() (fail_stack_ptr->avail == 0)
1132#define FAIL_STACK_FULL()      (fail_stack.avail == fail_stack.size)
1133
1134
1135/* Define macros to initialize and free the failure stack.
1136   Do `return -2' if the alloc fails.  */
1137
1138#ifdef MATCH_MAY_ALLOCATE
1139# define INIT_FAIL_STACK()						\
1140  do {									\
1141    fail_stack.stack = (fail_stack_elt_t *)				\
1142      REGEX_ALLOCATE_STACK (INIT_FAILURE_ALLOC * sizeof (fail_stack_elt_t)); \
1143									\
1144    if (fail_stack.stack == NULL)					\
1145      return -2;							\
1146									\
1147    fail_stack.size = INIT_FAILURE_ALLOC;				\
1148    fail_stack.avail = 0;						\
1149  } while (0)
1150
1151# define RESET_FAIL_STACK()  REGEX_FREE_STACK (fail_stack.stack)
1152#else
1153# define INIT_FAIL_STACK()						\
1154  do {									\
1155    fail_stack.avail = 0;						\
1156  } while (0)
1157
1158# define RESET_FAIL_STACK()
1159#endif
1160
1161
1162/* Double the size of FAIL_STACK, up to approximately `re_max_failures' items.
1163
1164   Return 1 if succeeds, and 0 if either ran out of memory
1165   allocating space for it or it was already too large.
1166
1167   REGEX_REALLOCATE_STACK requires `destination' be declared.   */
1168
1169#define DOUBLE_FAIL_STACK(fail_stack)					\
1170  ((fail_stack).size > (unsigned) (re_max_failures * MAX_FAILURE_ITEMS)	\
1171   ? 0									\
1172   : ((fail_stack).stack = (fail_stack_elt_t *)				\
1173        REGEX_REALLOCATE_STACK ((fail_stack).stack, 			\
1174          (fail_stack).size * sizeof (fail_stack_elt_t),		\
1175          ((fail_stack).size << 1) * sizeof (fail_stack_elt_t)),	\
1176									\
1177      (fail_stack).stack == NULL					\
1178      ? 0								\
1179      : ((fail_stack).size <<= 1, 					\
1180         1)))
1181
1182
1183/* Push pointer POINTER on FAIL_STACK.
1184   Return 1 if was able to do so and 0 if ran out of memory allocating
1185   space to do so.  */
1186#define PUSH_PATTERN_OP(POINTER, FAIL_STACK)				\
1187  ((FAIL_STACK_FULL ()							\
1188    && !DOUBLE_FAIL_STACK (FAIL_STACK))					\
1189   ? 0									\
1190   : ((FAIL_STACK).stack[(FAIL_STACK).avail++].pointer = POINTER,	\
1191      1))
1192
1193/* Push a pointer value onto the failure stack.
1194   Assumes the variable `fail_stack'.  Probably should only
1195   be called from within `PUSH_FAILURE_POINT'.  */
1196#define PUSH_FAILURE_POINTER(item)					\
1197  fail_stack.stack[fail_stack.avail++].pointer = (unsigned char *) (item)
1198
1199/* This pushes an integer-valued item onto the failure stack.
1200   Assumes the variable `fail_stack'.  Probably should only
1201   be called from within `PUSH_FAILURE_POINT'.  */
1202#define PUSH_FAILURE_INT(item)					\
1203  fail_stack.stack[fail_stack.avail++].integer = (item)
1204
1205/* Push a fail_stack_elt_t value onto the failure stack.
1206   Assumes the variable `fail_stack'.  Probably should only
1207   be called from within `PUSH_FAILURE_POINT'.  */
1208#define PUSH_FAILURE_ELT(item)					\
1209  fail_stack.stack[fail_stack.avail++] =  (item)
1210
1211/* These three POP... operations complement the three PUSH... operations.
1212   All assume that `fail_stack' is nonempty.  */
1213#define POP_FAILURE_POINTER() fail_stack.stack[--fail_stack.avail].pointer
1214#define POP_FAILURE_INT() fail_stack.stack[--fail_stack.avail].integer
1215#define POP_FAILURE_ELT() fail_stack.stack[--fail_stack.avail]
1216
1217/* Used to omit pushing failure point id's when we're not debugging.  */
1218#ifdef DEBUG
1219# define DEBUG_PUSH PUSH_FAILURE_INT
1220# define DEBUG_POP(item_addr) *(item_addr) = POP_FAILURE_INT ()
1221#else
1222# define DEBUG_PUSH(item)
1223# define DEBUG_POP(item_addr)
1224#endif
1225
1226
1227/* Push the information about the state we will need
1228   if we ever fail back to it.
1229
1230   Requires variables fail_stack, regstart, regend, reg_info, and
1231   num_regs_pushed be declared.  DOUBLE_FAIL_STACK requires `destination'
1232   be declared.
1233
1234   Does `return FAILURE_CODE' if runs out of memory.  */
1235
1236#define PUSH_FAILURE_POINT(pattern_place, string_place, failure_code)	\
1237  do {									\
1238    char *destination;							\
1239    /* Must be int, so when we don't save any registers, the arithmetic	\
1240       of 0 + -1 isn't done as unsigned.  */				\
1241    /* Can't be int, since there is not a shred of a guarantee that int	\
1242       is wide enough to hold a value of something to which pointer can	\
1243       be assigned */							\
1244    active_reg_t this_reg;						\
1245    									\
1246    DEBUG_STATEMENT (failure_id++);					\
1247    DEBUG_STATEMENT (nfailure_points_pushed++);				\
1248    DEBUG_PRINT2 ("\nPUSH_FAILURE_POINT #%u:\n", failure_id);		\
1249    DEBUG_PRINT2 ("  Before push, next avail: %d\n", (fail_stack).avail);\
1250    DEBUG_PRINT2 ("                     size: %d\n", (fail_stack).size);\
1251									\
1252    DEBUG_PRINT2 ("  slots needed: %ld\n", NUM_FAILURE_ITEMS);		\
1253    DEBUG_PRINT2 ("     available: %d\n", REMAINING_AVAIL_SLOTS);	\
1254									\
1255    /* Ensure we have enough space allocated for what we will push.  */	\
1256    while (REMAINING_AVAIL_SLOTS < NUM_FAILURE_ITEMS)			\
1257      {									\
1258        if (!DOUBLE_FAIL_STACK (fail_stack))				\
1259          return failure_code;						\
1260									\
1261        DEBUG_PRINT2 ("\n  Doubled stack; size now: %d\n",		\
1262		       (fail_stack).size);				\
1263        DEBUG_PRINT2 ("  slots available: %d\n", REMAINING_AVAIL_SLOTS);\
1264      }									\
1265									\
1266    /* Push the info, starting with the registers.  */			\
1267    DEBUG_PRINT1 ("\n");						\
1268									\
1269    if (1)								\
1270      for (this_reg = lowest_active_reg; this_reg <= highest_active_reg; \
1271	   this_reg++)							\
1272	{								\
1273	  DEBUG_PRINT2 ("  Pushing reg: %lu\n", this_reg);		\
1274	  DEBUG_STATEMENT (num_regs_pushed++);				\
1275									\
1276	  DEBUG_PRINT2 ("    start: %p\n", regstart[this_reg]);		\
1277	  PUSH_FAILURE_POINTER (regstart[this_reg]);			\
1278									\
1279	  DEBUG_PRINT2 ("    end: %p\n", regend[this_reg]);		\
1280	  PUSH_FAILURE_POINTER (regend[this_reg]);			\
1281									\
1282	  DEBUG_PRINT2 ("    info: %p\n      ",				\
1283			reg_info[this_reg].word.pointer);		\
1284	  DEBUG_PRINT2 (" match_null=%d",				\
1285			REG_MATCH_NULL_STRING_P (reg_info[this_reg]));	\
1286	  DEBUG_PRINT2 (" active=%d", IS_ACTIVE (reg_info[this_reg]));	\
1287	  DEBUG_PRINT2 (" matched_something=%d",			\
1288			MATCHED_SOMETHING (reg_info[this_reg]));	\
1289	  DEBUG_PRINT2 (" ever_matched=%d",				\
1290			EVER_MATCHED_SOMETHING (reg_info[this_reg]));	\
1291	  DEBUG_PRINT1 ("\n");						\
1292	  PUSH_FAILURE_ELT (reg_info[this_reg].word);			\
1293	}								\
1294									\
1295    DEBUG_PRINT2 ("  Pushing  low active reg: %ld\n", lowest_active_reg);\
1296    PUSH_FAILURE_INT (lowest_active_reg);				\
1297									\
1298    DEBUG_PRINT2 ("  Pushing high active reg: %ld\n", highest_active_reg);\
1299    PUSH_FAILURE_INT (highest_active_reg);				\
1300									\
1301    DEBUG_PRINT2 ("  Pushing pattern %p:\n", pattern_place);		\
1302    DEBUG_PRINT_COMPILED_PATTERN (bufp, pattern_place, pend);		\
1303    PUSH_FAILURE_POINTER (pattern_place);				\
1304									\
1305    DEBUG_PRINT2 ("  Pushing string %p: `", string_place);		\
1306    DEBUG_PRINT_DOUBLE_STRING (string_place, string1, size1, string2,   \
1307				 size2);				\
1308    DEBUG_PRINT1 ("'\n");						\
1309    PUSH_FAILURE_POINTER (string_place);				\
1310									\
1311    DEBUG_PRINT2 ("  Pushing failure id: %u\n", failure_id);		\
1312    DEBUG_PUSH (failure_id);						\
1313  } while (0)
1314
1315/* This is the number of items that are pushed and popped on the stack
1316   for each register.  */
1317#define NUM_REG_ITEMS  3
1318
1319/* Individual items aside from the registers.  */
1320#ifdef DEBUG
1321# define NUM_NONREG_ITEMS 5 /* Includes failure point id.  */
1322#else
1323# define NUM_NONREG_ITEMS 4
1324#endif
1325
1326/* We push at most this many items on the stack.  */
1327/* We used to use (num_regs - 1), which is the number of registers
1328   this regexp will save; but that was changed to 5
1329   to avoid stack overflow for a regexp with lots of parens.  */
1330#define MAX_FAILURE_ITEMS (5 * NUM_REG_ITEMS + NUM_NONREG_ITEMS)
1331
1332/* We actually push this many items.  */
1333#define NUM_FAILURE_ITEMS				\
1334  (((0							\
1335     ? 0 : highest_active_reg - lowest_active_reg + 1)	\
1336    * NUM_REG_ITEMS)					\
1337   + NUM_NONREG_ITEMS)
1338
1339/* How many items can still be added to the stack without overflowing it.  */
1340#define REMAINING_AVAIL_SLOTS ((fail_stack).size - (fail_stack).avail)
1341
1342
1343/* Pops what PUSH_FAIL_STACK pushes.
1344
1345   We restore into the parameters, all of which should be lvalues:
1346     STR -- the saved data position.
1347     PAT -- the saved pattern position.
1348     LOW_REG, HIGH_REG -- the highest and lowest active registers.
1349     REGSTART, REGEND -- arrays of string positions.
1350     REG_INFO -- array of information about each subexpression.
1351
1352   Also assumes the variables `fail_stack' and (if debugging), `bufp',
1353   `pend', `string1', `size1', `string2', and `size2'.  */
1354
1355#define POP_FAILURE_POINT(str, pat, low_reg, high_reg, regstart, regend, reg_info)\
1356{									\
1357  DEBUG_STATEMENT (unsigned failure_id;)				\
1358  active_reg_t this_reg;						\
1359  const unsigned char *string_temp;					\
1360									\
1361  assert (!FAIL_STACK_EMPTY ());					\
1362									\
1363  /* Remove failure points and point to how many regs pushed.  */	\
1364  DEBUG_PRINT1 ("POP_FAILURE_POINT:\n");				\
1365  DEBUG_PRINT2 ("  Before pop, next avail: %d\n", fail_stack.avail);	\
1366  DEBUG_PRINT2 ("                    size: %d\n", fail_stack.size);	\
1367									\
1368  assert (fail_stack.avail >= NUM_NONREG_ITEMS);			\
1369									\
1370  DEBUG_POP (&failure_id);						\
1371  DEBUG_PRINT2 ("  Popping failure id: %u\n", failure_id);		\
1372									\
1373  /* If the saved string location is NULL, it came from an		\
1374     on_failure_keep_string_jump opcode, and we want to throw away the	\
1375     saved NULL, thus retaining our current position in the string.  */	\
1376  string_temp = POP_FAILURE_POINTER ();					\
1377  if (string_temp != NULL)						\
1378    str = (const char *) string_temp;					\
1379									\
1380  DEBUG_PRINT2 ("  Popping string %p: `", str);				\
1381  DEBUG_PRINT_DOUBLE_STRING (str, string1, size1, string2, size2);	\
1382  DEBUG_PRINT1 ("'\n");							\
1383									\
1384  pat = (unsigned char *) POP_FAILURE_POINTER ();			\
1385  DEBUG_PRINT2 ("  Popping pattern %p:\n", pat);			\
1386  DEBUG_PRINT_COMPILED_PATTERN (bufp, pat, pend);			\
1387									\
1388  /* Restore register info.  */						\
1389  high_reg = (active_reg_t) POP_FAILURE_INT ();				\
1390  DEBUG_PRINT2 ("  Popping high active reg: %ld\n", high_reg);		\
1391									\
1392  low_reg = (active_reg_t) POP_FAILURE_INT ();				\
1393  DEBUG_PRINT2 ("  Popping  low active reg: %ld\n", low_reg);		\
1394									\
1395  if (1)								\
1396    for (this_reg = high_reg; this_reg >= low_reg; this_reg--)		\
1397      {									\
1398	DEBUG_PRINT2 ("    Popping reg: %ld\n", this_reg);		\
1399									\
1400	reg_info[this_reg].word = POP_FAILURE_ELT ();			\
1401	DEBUG_PRINT2 ("      info: %p\n",				\
1402		      reg_info[this_reg].word.pointer);			\
1403									\
1404	regend[this_reg] = (const char *) POP_FAILURE_POINTER ();	\
1405	DEBUG_PRINT2 ("      end: %p\n", regend[this_reg]);		\
1406									\
1407	regstart[this_reg] = (const char *) POP_FAILURE_POINTER ();	\
1408	DEBUG_PRINT2 ("      start: %p\n", regstart[this_reg]);		\
1409      }									\
1410  else									\
1411    {									\
1412      for (this_reg = highest_active_reg; this_reg > high_reg; this_reg--) \
1413	{								\
1414	  reg_info[this_reg].word.integer = 0;				\
1415	  regend[this_reg] = 0;						\
1416	  regstart[this_reg] = 0;					\
1417	}								\
1418      highest_active_reg = high_reg;					\
1419    }									\
1420									\
1421  set_regs_matched_done = 0;						\
1422  DEBUG_STATEMENT (nfailure_points_popped++);				\
1423} /* POP_FAILURE_POINT */
1424
1425
1426
1427/* Structure for per-register (a.k.a. per-group) information.
1428   Other register information, such as the
1429   starting and ending positions (which are addresses), and the list of
1430   inner groups (which is a bits list) are maintained in separate
1431   variables.
1432
1433   We are making a (strictly speaking) nonportable assumption here: that
1434   the compiler will pack our bit fields into something that fits into
1435   the type of `word', i.e., is something that fits into one item on the
1436   failure stack.  */
1437
1438
1439/* Declarations and macros for re_match_2.  */
1440
1441typedef union
1442{
1443  fail_stack_elt_t word;
1444  struct
1445  {
1446      /* This field is one if this group can match the empty string,
1447         zero if not.  If not yet determined,  `MATCH_NULL_UNSET_VALUE'.  */
1448#define MATCH_NULL_UNSET_VALUE 3
1449    unsigned match_null_string_p : 2;
1450    unsigned is_active : 1;
1451    unsigned matched_something : 1;
1452    unsigned ever_matched_something : 1;
1453  } bits;
1454} register_info_type;
1455
1456#define REG_MATCH_NULL_STRING_P(R)  ((R).bits.match_null_string_p)
1457#define IS_ACTIVE(R)  ((R).bits.is_active)
1458#define MATCHED_SOMETHING(R)  ((R).bits.matched_something)
1459#define EVER_MATCHED_SOMETHING(R)  ((R).bits.ever_matched_something)
1460
1461
1462/* Call this when have matched a real character; it sets `matched' flags
1463   for the subexpressions which we are currently inside.  Also records
1464   that those subexprs have matched.  */
1465#define SET_REGS_MATCHED()						\
1466  do									\
1467    {									\
1468      if (!set_regs_matched_done)					\
1469	{								\
1470	  active_reg_t r;						\
1471	  set_regs_matched_done = 1;					\
1472	  for (r = lowest_active_reg; r <= highest_active_reg; r++)	\
1473	    {								\
1474	      MATCHED_SOMETHING (reg_info[r])				\
1475		= EVER_MATCHED_SOMETHING (reg_info[r])			\
1476		= 1;							\
1477	    }								\
1478	}								\
1479    }									\
1480  while (0)
1481
1482/* Registers are set to a sentinel when they haven't yet matched.  */
1483static char reg_unset_dummy;
1484#define REG_UNSET_VALUE (&reg_unset_dummy)
1485#define REG_UNSET(e) ((e) == REG_UNSET_VALUE)
1486
1487/* Subroutine declarations and macros for regex_compile.  */
1488
1489static reg_errcode_t regex_compile _RE_ARGS ((const char *pattern, size_t size,
1490					      reg_syntax_t syntax,
1491					      struct re_pattern_buffer *bufp));
1492static void store_op1 _RE_ARGS ((re_opcode_t op, unsigned char *loc, int arg));
1493static void store_op2 _RE_ARGS ((re_opcode_t op, unsigned char *loc,
1494				 int arg1, int arg2));
1495static void insert_op1 _RE_ARGS ((re_opcode_t op, unsigned char *loc,
1496				  int arg, unsigned char *end));
1497static void insert_op2 _RE_ARGS ((re_opcode_t op, unsigned char *loc,
1498				  int arg1, int arg2, unsigned char *end));
1499static boolean at_begline_loc_p _RE_ARGS ((const char *pattern, const char *p,
1500					   reg_syntax_t syntax));
1501static boolean at_endline_loc_p _RE_ARGS ((const char *p, const char *pend,
1502					   reg_syntax_t syntax));
1503static reg_errcode_t compile_range _RE_ARGS ((const char **p_ptr,
1504					      const char *pend,
1505					      char *translate,
1506					      reg_syntax_t syntax,
1507					      unsigned char *b));
1508
1509/* Fetch the next character in the uncompiled pattern---translating it
1510   if necessary.  Also cast from a signed character in the constant
1511   string passed to us by the user to an unsigned char that we can use
1512   as an array index (in, e.g., `translate').  */
1513#ifndef PATFETCH
1514# define PATFETCH(c)							\
1515  do {if (p == pend) return REG_EEND;					\
1516    c = (unsigned char) *p++;						\
1517    if (translate) c = (unsigned char) translate[c];			\
1518  } while (0)
1519#endif
1520
1521/* Fetch the next character in the uncompiled pattern, with no
1522   translation.  */
1523#define PATFETCH_RAW(c)							\
1524  do {if (p == pend) return REG_EEND;					\
1525    c = (unsigned char) *p++; 						\
1526  } while (0)
1527
1528/* Go backwards one character in the pattern.  */
1529#define PATUNFETCH p--
1530
1531
1532/* If `translate' is non-null, return translate[D], else just D.  We
1533   cast the subscript to translate because some data is declared as
1534   `char *', to avoid warnings when a string constant is passed.  But
1535   when we use a character as a subscript we must make it unsigned.  */
1536#ifndef TRANSLATE
1537# define TRANSLATE(d) \
1538  (translate ? (char) translate[(unsigned char) (d)] : (d))
1539#endif
1540
1541
1542/* Macros for outputting the compiled pattern into `buffer'.  */
1543
1544/* If the buffer isn't allocated when it comes in, use this.  */
1545#define INIT_BUF_SIZE  32
1546
1547/* Make sure we have at least N more bytes of space in buffer.  */
1548#define GET_BUFFER_SPACE(n)						\
1549    while ((unsigned long) (b - bufp->buffer + (n)) > bufp->allocated)	\
1550      EXTEND_BUFFER ()
1551
1552/* Make sure we have one more byte of buffer space and then add C to it.  */
1553#define BUF_PUSH(c)							\
1554  do {									\
1555    GET_BUFFER_SPACE (1);						\
1556    *b++ = (unsigned char) (c);						\
1557  } while (0)
1558
1559
1560/* Ensure we have two more bytes of buffer space and then append C1 and C2.  */
1561#define BUF_PUSH_2(c1, c2)						\
1562  do {									\
1563    GET_BUFFER_SPACE (2);						\
1564    *b++ = (unsigned char) (c1);					\
1565    *b++ = (unsigned char) (c2);					\
1566  } while (0)
1567
1568
1569/* As with BUF_PUSH_2, except for three bytes.  */
1570#define BUF_PUSH_3(c1, c2, c3)						\
1571  do {									\
1572    GET_BUFFER_SPACE (3);						\
1573    *b++ = (unsigned char) (c1);					\
1574    *b++ = (unsigned char) (c2);					\
1575    *b++ = (unsigned char) (c3);					\
1576  } while (0)
1577
1578
1579/* Store a jump with opcode OP at LOC to location TO.  We store a
1580   relative address offset by the three bytes the jump itself occupies.  */
1581#define STORE_JUMP(op, loc, to) \
1582  store_op1 (op, loc, (int) ((to) - (loc) - 3))
1583
1584/* Likewise, for a two-argument jump.  */
1585#define STORE_JUMP2(op, loc, to, arg) \
1586  store_op2 (op, loc, (int) ((to) - (loc) - 3), arg)
1587
1588/* Like `STORE_JUMP', but for inserting.  Assume `b' is the buffer end.  */
1589#define INSERT_JUMP(op, loc, to) \
1590  insert_op1 (op, loc, (int) ((to) - (loc) - 3), b)
1591
1592/* Like `STORE_JUMP2', but for inserting.  Assume `b' is the buffer end.  */
1593#define INSERT_JUMP2(op, loc, to, arg) \
1594  insert_op2 (op, loc, (int) ((to) - (loc) - 3), arg, b)
1595
1596
1597/* This is not an arbitrary limit: the arguments which represent offsets
1598   into the pattern are two bytes long.  So if 2^16 bytes turns out to
1599   be too small, many things would have to change.  */
1600/* Any other compiler which, like MSC, has allocation limit below 2^16
1601   bytes will have to use approach similar to what was done below for
1602   MSC and drop MAX_BUF_SIZE a bit.  Otherwise you may end up
1603   reallocating to 0 bytes.  Such thing is not going to work too well.
1604   You have been warned!!  */
1605#if defined _MSC_VER  && !defined WIN32
1606/* Microsoft C 16-bit versions limit malloc to approx 65512 bytes.
1607   The REALLOC define eliminates a flurry of conversion warnings,
1608   but is not required. */
1609# define MAX_BUF_SIZE  65500L
1610# define REALLOC(p,s) realloc ((p), (size_t) (s))
1611#else
1612# define MAX_BUF_SIZE (1L << 16)
1613# define REALLOC(p,s) realloc ((p), (s))
1614#endif
1615
1616/* Extend the buffer by twice its current size via realloc and
1617   reset the pointers that pointed into the old block to point to the
1618   correct places in the new one.  If extending the buffer results in it
1619   being larger than MAX_BUF_SIZE, then flag memory exhausted.  */
1620#define EXTEND_BUFFER()							\
1621  do { 									\
1622    unsigned char *old_buffer = bufp->buffer;				\
1623    if (bufp->allocated == MAX_BUF_SIZE) 				\
1624      return REG_ESIZE;							\
1625    bufp->allocated <<= 1;						\
1626    if (bufp->allocated > MAX_BUF_SIZE)					\
1627      bufp->allocated = MAX_BUF_SIZE; 					\
1628    bufp->buffer = (unsigned char *) REALLOC (bufp->buffer, bufp->allocated);\
1629    if (bufp->buffer == NULL)						\
1630      return REG_ESPACE;						\
1631    /* If the buffer moved, move all the pointers into it.  */		\
1632    if (old_buffer != bufp->buffer)					\
1633      {									\
1634        b = (b - old_buffer) + bufp->buffer;				\
1635        begalt = (begalt - old_buffer) + bufp->buffer;			\
1636        if (fixup_alt_jump)						\
1637          fixup_alt_jump = (fixup_alt_jump - old_buffer) + bufp->buffer;\
1638        if (laststart)							\
1639          laststart = (laststart - old_buffer) + bufp->buffer;		\
1640        if (pending_exact)						\
1641          pending_exact = (pending_exact - old_buffer) + bufp->buffer;	\
1642      }									\
1643  } while (0)
1644
1645
1646/* Since we have one byte reserved for the register number argument to
1647   {start,stop}_memory, the maximum number of groups we can report
1648   things about is what fits in that byte.  */
1649#define MAX_REGNUM 255
1650
1651/* But patterns can have more than `MAX_REGNUM' registers.  We just
1652   ignore the excess.  */
1653typedef unsigned regnum_t;
1654
1655
1656/* Macros for the compile stack.  */
1657
1658/* Since offsets can go either forwards or backwards, this type needs to
1659   be able to hold values from -(MAX_BUF_SIZE - 1) to MAX_BUF_SIZE - 1.  */
1660/* int may be not enough when sizeof(int) == 2.  */
1661typedef long pattern_offset_t;
1662
1663typedef struct
1664{
1665  pattern_offset_t begalt_offset;
1666  pattern_offset_t fixup_alt_jump;
1667  pattern_offset_t inner_group_offset;
1668  pattern_offset_t laststart_offset;
1669  regnum_t regnum;
1670} compile_stack_elt_t;
1671
1672
1673typedef struct
1674{
1675  compile_stack_elt_t *stack;
1676  unsigned size;
1677  unsigned avail;			/* Offset of next open position.  */
1678} compile_stack_type;
1679
1680
1681#define INIT_COMPILE_STACK_SIZE 32
1682
1683#define COMPILE_STACK_EMPTY  (compile_stack.avail == 0)
1684#define COMPILE_STACK_FULL  (compile_stack.avail == compile_stack.size)
1685
1686/* The next available element.  */
1687#define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail])
1688
1689
1690/* Set the bit for character C in a list.  */
1691#define SET_LIST_BIT(c)                               \
1692  (b[((unsigned char) (c)) / BYTEWIDTH]               \
1693   |= 1 << (((unsigned char) c) % BYTEWIDTH))
1694
1695
1696/* Get the next unsigned number in the uncompiled pattern.  */
1697#define GET_UNSIGNED_NUMBER(num) 					\
1698  { if (p != pend)							\
1699     {									\
1700       PATFETCH (c); 							\
1701       while (ISDIGIT (c)) 						\
1702         { 								\
1703           if (num < 0)							\
1704              num = 0;							\
1705           num = num * 10 + c - '0'; 					\
1706           if (p == pend) 						\
1707              break; 							\
1708           PATFETCH (c);						\
1709         } 								\
1710       } 								\
1711    }
1712
1713#if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H)
1714/* The GNU C library provides support for user-defined character classes
1715   and the functions from ISO C amendement 1.  */
1716# ifdef CHARCLASS_NAME_MAX
1717#  define CHAR_CLASS_MAX_LENGTH CHARCLASS_NAME_MAX
1718# else
1719/* This shouldn't happen but some implementation might still have this
1720   problem.  Use a reasonable default value.  */
1721#  define CHAR_CLASS_MAX_LENGTH 256
1722# endif
1723
1724# ifdef _LIBC
1725#  define IS_CHAR_CLASS(string) __wctype (string)
1726# else
1727#  define IS_CHAR_CLASS(string) wctype (string)
1728# endif
1729#else
1730# define CHAR_CLASS_MAX_LENGTH  6 /* Namely, `xdigit'.  */
1731
1732# define IS_CHAR_CLASS(string)						\
1733   (STREQ (string, "alpha") || STREQ (string, "upper")			\
1734    || STREQ (string, "lower") || STREQ (string, "digit")		\
1735    || STREQ (string, "alnum") || STREQ (string, "xdigit")		\
1736    || STREQ (string, "space") || STREQ (string, "print")		\
1737    || STREQ (string, "punct") || STREQ (string, "graph")		\
1738    || STREQ (string, "cntrl") || STREQ (string, "blank"))
1739#endif
1740
1741#ifndef MATCH_MAY_ALLOCATE
1742
1743/* If we cannot allocate large objects within re_match_2_internal,
1744   we make the fail stack and register vectors global.
1745   The fail stack, we grow to the maximum size when a regexp
1746   is compiled.
1747   The register vectors, we adjust in size each time we
1748   compile a regexp, according to the number of registers it needs.  */
1749
1750static fail_stack_type fail_stack;
1751
1752/* Size with which the following vectors are currently allocated.
1753   That is so we can make them bigger as needed,
1754   but never make them smaller.  */
1755static int regs_allocated_size;
1756
1757static const char **     regstart, **     regend;
1758static const char ** old_regstart, ** old_regend;
1759static const char **best_regstart, **best_regend;
1760static register_info_type *reg_info;
1761static const char **reg_dummy;
1762static register_info_type *reg_info_dummy;
1763
1764/* Make the register vectors big enough for NUM_REGS registers,
1765   but don't make them smaller.  */
1766
1767static
1768regex_grow_registers (num_regs)
1769     int num_regs;
1770{
1771  if (num_regs > regs_allocated_size)
1772    {
1773      RETALLOC_IF (regstart,	 num_regs, const char *);
1774      RETALLOC_IF (regend,	 num_regs, const char *);
1775      RETALLOC_IF (old_regstart, num_regs, const char *);
1776      RETALLOC_IF (old_regend,	 num_regs, const char *);
1777      RETALLOC_IF (best_regstart, num_regs, const char *);
1778      RETALLOC_IF (best_regend,	 num_regs, const char *);
1779      RETALLOC_IF (reg_info,	 num_regs, register_info_type);
1780      RETALLOC_IF (reg_dummy,	 num_regs, const char *);
1781      RETALLOC_IF (reg_info_dummy, num_regs, register_info_type);
1782
1783      regs_allocated_size = num_regs;
1784    }
1785}
1786
1787#endif /* not MATCH_MAY_ALLOCATE */
1788
1789static boolean group_in_compile_stack _RE_ARGS ((compile_stack_type
1790						 compile_stack,
1791						 regnum_t regnum));
1792
1793/* `regex_compile' compiles PATTERN (of length SIZE) according to SYNTAX.
1794   Returns one of error codes defined in `gnu-regex.h', or zero for success.
1795
1796   Assumes the `allocated' (and perhaps `buffer') and `translate'
1797   fields are set in BUFP on entry.
1798
1799   If it succeeds, results are put in BUFP (if it returns an error, the
1800   contents of BUFP are undefined):
1801     `buffer' is the compiled pattern;
1802     `syntax' is set to SYNTAX;
1803     `used' is set to the length of the compiled pattern;
1804     `fastmap_accurate' is zero;
1805     `re_nsub' is the number of subexpressions in PATTERN;
1806     `not_bol' and `not_eol' are zero;
1807
1808   The `fastmap' and `newline_anchor' fields are neither
1809   examined nor set.  */
1810
1811/* Return, freeing storage we allocated.  */
1812#define FREE_STACK_RETURN(value)		\
1813  return (free (compile_stack.stack), value)
1814
1815static reg_errcode_t
1816regex_compile (pattern, size, syntax, bufp)
1817     const char *pattern;
1818     size_t size;
1819     reg_syntax_t syntax;
1820     struct re_pattern_buffer *bufp;
1821{
1822  /* We fetch characters from PATTERN here.  Even though PATTERN is
1823     `char *' (i.e., signed), we declare these variables as unsigned, so
1824     they can be reliably used as array indices.  */
1825  register unsigned char c, c1;
1826
1827  /* A random temporary spot in PATTERN.  */
1828  const char *p1;
1829
1830  /* Points to the end of the buffer, where we should append.  */
1831  register unsigned char *b;
1832
1833  /* Keeps track of unclosed groups.  */
1834  compile_stack_type compile_stack;
1835
1836  /* Points to the current (ending) position in the pattern.  */
1837  const char *p = pattern;
1838  const char *pend = pattern + size;
1839
1840  /* How to translate the characters in the pattern.  */
1841  RE_TRANSLATE_TYPE translate = bufp->translate;
1842
1843  /* Address of the count-byte of the most recently inserted `exactn'
1844     command.  This makes it possible to tell if a new exact-match
1845     character can be added to that command or if the character requires
1846     a new `exactn' command.  */
1847  unsigned char *pending_exact = 0;
1848
1849  /* Address of start of the most recently finished expression.
1850     This tells, e.g., postfix * where to find the start of its
1851     operand.  Reset at the beginning of groups and alternatives.  */
1852  unsigned char *laststart = 0;
1853
1854  /* Address of beginning of regexp, or inside of last group.  */
1855  unsigned char *begalt;
1856
1857  /* Place in the uncompiled pattern (i.e., the {) to
1858     which to go back if the interval is invalid.  */
1859  const char *beg_interval;
1860
1861  /* Address of the place where a forward jump should go to the end of
1862     the containing expression.  Each alternative of an `or' -- except the
1863     last -- ends with a forward jump of this sort.  */
1864  unsigned char *fixup_alt_jump = 0;
1865
1866  /* Counts open-groups as they are encountered.  Remembered for the
1867     matching close-group on the compile stack, so the same register
1868     number is put in the stop_memory as the start_memory.  */
1869  regnum_t regnum = 0;
1870
1871#ifdef DEBUG
1872  DEBUG_PRINT1 ("\nCompiling pattern: ");
1873  if (debug)
1874    {
1875      unsigned debug_count;
1876
1877      for (debug_count = 0; debug_count < size; debug_count++)
1878        putchar (pattern[debug_count]);
1879      putchar ('\n');
1880    }
1881#endif /* DEBUG */
1882
1883  /* Initialize the compile stack.  */
1884  compile_stack.stack = TALLOC (INIT_COMPILE_STACK_SIZE, compile_stack_elt_t);
1885  if (compile_stack.stack == NULL)
1886    return REG_ESPACE;
1887
1888  compile_stack.size = INIT_COMPILE_STACK_SIZE;
1889  compile_stack.avail = 0;
1890
1891  /* Initialize the pattern buffer.  */
1892  bufp->syntax = syntax;
1893  bufp->fastmap_accurate = 0;
1894  bufp->not_bol = bufp->not_eol = 0;
1895
1896  /* Set `used' to zero, so that if we return an error, the pattern
1897     printer (for debugging) will think there's no pattern.  We reset it
1898     at the end.  */
1899  bufp->used = 0;
1900
1901  /* Always count groups, whether or not bufp->no_sub is set.  */
1902  bufp->re_nsub = 0;
1903
1904#if !defined emacs && !defined SYNTAX_TABLE
1905  /* Initialize the syntax table.  */
1906   init_syntax_once ();
1907#endif
1908
1909  if (bufp->allocated == 0)
1910    {
1911      if (bufp->buffer)
1912	{ /* If zero allocated, but buffer is non-null, try to realloc
1913             enough space.  This loses if buffer's address is bogus, but
1914             that is the user's responsibility.  */
1915          RETALLOC (bufp->buffer, INIT_BUF_SIZE, unsigned char);
1916        }
1917      else
1918        { /* Caller did not allocate a buffer.  Do it for them.  */
1919          bufp->buffer = TALLOC (INIT_BUF_SIZE, unsigned char);
1920        }
1921      if (!bufp->buffer) FREE_STACK_RETURN (REG_ESPACE);
1922
1923      bufp->allocated = INIT_BUF_SIZE;
1924    }
1925
1926  begalt = b = bufp->buffer;
1927
1928  /* Loop through the uncompiled pattern until we're at the end.  */
1929  while (p != pend)
1930    {
1931      PATFETCH (c);
1932
1933      switch (c)
1934        {
1935        case '^':
1936          {
1937            if (   /* If at start of pattern, it's an operator.  */
1938                   p == pattern + 1
1939                   /* If context independent, it's an operator.  */
1940                || syntax & RE_CONTEXT_INDEP_ANCHORS
1941                   /* Otherwise, depends on what's come before.  */
1942                || at_begline_loc_p (pattern, p, syntax))
1943              BUF_PUSH (begline);
1944            else
1945              goto normal_char;
1946          }
1947          break;
1948
1949
1950        case '$':
1951          {
1952            if (   /* If at end of pattern, it's an operator.  */
1953                   p == pend
1954                   /* If context independent, it's an operator.  */
1955                || syntax & RE_CONTEXT_INDEP_ANCHORS
1956                   /* Otherwise, depends on what's next.  */
1957                || at_endline_loc_p (p, pend, syntax))
1958               BUF_PUSH (endline);
1959             else
1960               goto normal_char;
1961           }
1962           break;
1963
1964
1965	case '+':
1966        case '?':
1967          if ((syntax & RE_BK_PLUS_QM)
1968              || (syntax & RE_LIMITED_OPS))
1969            goto normal_char;
1970        handle_plus:
1971        case '*':
1972          /* If there is no previous pattern... */
1973          if (!laststart)
1974            {
1975              if (syntax & RE_CONTEXT_INVALID_OPS)
1976                FREE_STACK_RETURN (REG_BADRPT);
1977              else if (!(syntax & RE_CONTEXT_INDEP_OPS))
1978                goto normal_char;
1979            }
1980
1981          {
1982            /* Are we optimizing this jump?  */
1983            boolean keep_string_p = false;
1984
1985            /* 1 means zero (many) matches is allowed.  */
1986            char zero_times_ok = 0, many_times_ok = 0;
1987
1988            /* If there is a sequence of repetition chars, collapse it
1989               down to just one (the right one).  We can't combine
1990               interval operators with these because of, e.g., `a{2}*',
1991               which should only match an even number of `a's.  */
1992
1993            for (;;)
1994              {
1995                zero_times_ok |= c != '+';
1996                many_times_ok |= c != '?';
1997
1998                if (p == pend)
1999                  break;
2000
2001                PATFETCH (c);
2002
2003                if (c == '*'
2004                    || (!(syntax & RE_BK_PLUS_QM) && (c == '+' || c == '?')))
2005                  ;
2006
2007                else if (syntax & RE_BK_PLUS_QM  &&  c == '\\')
2008                  {
2009                    if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
2010
2011                    PATFETCH (c1);
2012                    if (!(c1 == '+' || c1 == '?'))
2013                      {
2014                        PATUNFETCH;
2015                        PATUNFETCH;
2016                        break;
2017                      }
2018
2019                    c = c1;
2020                  }
2021                else
2022                  {
2023                    PATUNFETCH;
2024                    break;
2025                  }
2026
2027                /* If we get here, we found another repeat character.  */
2028               }
2029
2030            /* Star, etc. applied to an empty pattern is equivalent
2031               to an empty pattern.  */
2032            if (!laststart)
2033              break;
2034
2035            /* Now we know whether or not zero matches is allowed
2036               and also whether or not two or more matches is allowed.  */
2037            if (many_times_ok)
2038              { /* More than one repetition is allowed, so put in at the
2039                   end a backward relative jump from `b' to before the next
2040                   jump we're going to put in below (which jumps from
2041                   laststart to after this jump).
2042
2043                   But if we are at the `*' in the exact sequence `.*\n',
2044                   insert an unconditional jump backwards to the .,
2045                   instead of the beginning of the loop.  This way we only
2046                   push a failure point once, instead of every time
2047                   through the loop.  */
2048                assert (p - 1 > pattern);
2049
2050                /* Allocate the space for the jump.  */
2051                GET_BUFFER_SPACE (3);
2052
2053                /* We know we are not at the first character of the pattern,
2054                   because laststart was nonzero.  And we've already
2055                   incremented `p', by the way, to be the character after
2056                   the `*'.  Do we have to do something analogous here
2057                   for null bytes, because of RE_DOT_NOT_NULL?  */
2058                if (TRANSLATE (*(p - 2)) == TRANSLATE ('.')
2059		    && zero_times_ok
2060                    && p < pend && TRANSLATE (*p) == TRANSLATE ('\n')
2061                    && !(syntax & RE_DOT_NEWLINE))
2062                  { /* We have .*\n.  */
2063                    STORE_JUMP (jump, b, laststart);
2064                    keep_string_p = true;
2065                  }
2066                else
2067                  /* Anything else.  */
2068                  STORE_JUMP (maybe_pop_jump, b, laststart - 3);
2069
2070                /* We've added more stuff to the buffer.  */
2071                b += 3;
2072              }
2073
2074            /* On failure, jump from laststart to b + 3, which will be the
2075               end of the buffer after this jump is inserted.  */
2076            GET_BUFFER_SPACE (3);
2077            INSERT_JUMP (keep_string_p ? on_failure_keep_string_jump
2078                                       : on_failure_jump,
2079                         laststart, b + 3);
2080            pending_exact = 0;
2081            b += 3;
2082
2083            if (!zero_times_ok)
2084              {
2085                /* At least one repetition is required, so insert a
2086                   `dummy_failure_jump' before the initial
2087                   `on_failure_jump' instruction of the loop. This
2088                   effects a skip over that instruction the first time
2089                   we hit that loop.  */
2090                GET_BUFFER_SPACE (3);
2091                INSERT_JUMP (dummy_failure_jump, laststart, laststart + 6);
2092                b += 3;
2093              }
2094            }
2095	  break;
2096
2097
2098	case '.':
2099          laststart = b;
2100          BUF_PUSH (anychar);
2101          break;
2102
2103
2104        case '[':
2105          {
2106            boolean had_char_class = false;
2107
2108            if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
2109
2110            /* Ensure that we have enough space to push a charset: the
2111               opcode, the length count, and the bitset; 34 bytes in all.  */
2112	    GET_BUFFER_SPACE (34);
2113
2114            laststart = b;
2115
2116            /* We test `*p == '^' twice, instead of using an if
2117               statement, so we only need one BUF_PUSH.  */
2118            BUF_PUSH (*p == '^' ? charset_not : charset);
2119            if (*p == '^')
2120              p++;
2121
2122            /* Remember the first position in the bracket expression.  */
2123            p1 = p;
2124
2125            /* Push the number of bytes in the bitmap.  */
2126            BUF_PUSH ((1 << BYTEWIDTH) / BYTEWIDTH);
2127
2128            /* Clear the whole map.  */
2129            bzero (b, (1 << BYTEWIDTH) / BYTEWIDTH);
2130
2131            /* charset_not matches newline according to a syntax bit.  */
2132            if ((re_opcode_t) b[-2] == charset_not
2133                && (syntax & RE_HAT_LISTS_NOT_NEWLINE))
2134              SET_LIST_BIT ('\n');
2135
2136            /* Read in characters and ranges, setting map bits.  */
2137            for (;;)
2138              {
2139                if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
2140
2141                PATFETCH (c);
2142
2143                /* \ might escape characters inside [...] and [^...].  */
2144                if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\')
2145                  {
2146                    if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
2147
2148                    PATFETCH (c1);
2149                    SET_LIST_BIT (c1);
2150                    continue;
2151                  }
2152
2153                /* Could be the end of the bracket expression.  If it's
2154                   not (i.e., when the bracket expression is `[]' so
2155                   far), the ']' character bit gets set way below.  */
2156                if (c == ']' && p != p1 + 1)
2157                  break;
2158
2159                /* Look ahead to see if it's a range when the last thing
2160                   was a character class.  */
2161                if (had_char_class && c == '-' && *p != ']')
2162                  FREE_STACK_RETURN (REG_ERANGE);
2163
2164                /* Look ahead to see if it's a range when the last thing
2165                   was a character: if this is a hyphen not at the
2166                   beginning or the end of a list, then it's the range
2167                   operator.  */
2168                if (c == '-'
2169                    && !(p - 2 >= pattern && p[-2] == '[')
2170                    && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^')
2171                    && *p != ']')
2172                  {
2173                    reg_errcode_t ret
2174                      = compile_range (&p, pend, translate, syntax, b);
2175                    if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
2176                  }
2177
2178                else if (p[0] == '-' && p[1] != ']')
2179                  { /* This handles ranges made up of characters only.  */
2180                    reg_errcode_t ret;
2181
2182		    /* Move past the `-'.  */
2183                    PATFETCH (c1);
2184
2185                    ret = compile_range (&p, pend, translate, syntax, b);
2186                    if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
2187                  }
2188
2189                /* See if we're at the beginning of a possible character
2190                   class.  */
2191
2192                else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':')
2193                  { /* Leave room for the null.  */
2194                    char str[CHAR_CLASS_MAX_LENGTH + 1];
2195
2196                    PATFETCH (c);
2197                    c1 = 0;
2198
2199                    /* If pattern is `[[:'.  */
2200                    if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
2201
2202                    for (;;)
2203                      {
2204                        PATFETCH (c);
2205                        if ((c == ':' && *p == ']') || p == pend
2206                            || c1 == CHAR_CLASS_MAX_LENGTH)
2207                          break;
2208                        str[c1++] = c;
2209                      }
2210                    str[c1] = '\0';
2211
2212                    /* If isn't a word bracketed by `[:' and `:]':
2213                       undo the ending character, the letters, and leave
2214                       the leading `:' and `[' (but set bits for them).  */
2215                    if (c == ':' && *p == ']')
2216                      {
2217/* CYGNUS LOCAL: Skip this code if we don't have btowc().  btowc() is */
2218/* defined in the 1994 Amendment 1 to ISO C and may not be present on */
2219/* systems where we have wchar.h and wctype.h.   */
2220#if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H && defined HAVE_BTOWC)
2221                        boolean is_lower = STREQ (str, "lower");
2222                        boolean is_upper = STREQ (str, "upper");
2223			wctype_t wt;
2224                        int ch;
2225
2226			wt = IS_CHAR_CLASS (str);
2227			if (wt == 0)
2228			  FREE_STACK_RETURN (REG_ECTYPE);
2229
2230                        /* Throw away the ] at the end of the character
2231                           class.  */
2232                        PATFETCH (c);
2233
2234                        if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
2235
2236                        for (ch = 0; ch < 1 << BYTEWIDTH; ++ch)
2237			  {
2238# ifdef _LIBC
2239			    if (__iswctype (__btowc (ch), wt))
2240			      SET_LIST_BIT (ch);
2241#else
2242			    if (iswctype (btowc (ch), wt))
2243			      SET_LIST_BIT (ch);
2244#endif
2245
2246			    if (translate && (is_upper || is_lower)
2247				&& (ISUPPER (ch) || ISLOWER (ch)))
2248			      SET_LIST_BIT (ch);
2249			  }
2250
2251                        had_char_class = true;
2252#else
2253                        int ch;
2254                        boolean is_alnum = STREQ (str, "alnum");
2255                        boolean is_alpha = STREQ (str, "alpha");
2256                        boolean is_blank = STREQ (str, "blank");
2257                        boolean is_cntrl = STREQ (str, "cntrl");
2258                        boolean is_digit = STREQ (str, "digit");
2259                        boolean is_graph = STREQ (str, "graph");
2260                        boolean is_lower = STREQ (str, "lower");
2261                        boolean is_print = STREQ (str, "print");
2262                        boolean is_punct = STREQ (str, "punct");
2263                        boolean is_space = STREQ (str, "space");
2264                        boolean is_upper = STREQ (str, "upper");
2265                        boolean is_xdigit = STREQ (str, "xdigit");
2266
2267                        if (!IS_CHAR_CLASS (str))
2268			  FREE_STACK_RETURN (REG_ECTYPE);
2269
2270                        /* Throw away the ] at the end of the character
2271                           class.  */
2272                        PATFETCH (c);
2273
2274                        if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
2275
2276                        for (ch = 0; ch < 1 << BYTEWIDTH; ch++)
2277                          {
2278			    /* This was split into 3 if's to
2279			       avoid an arbitrary limit in some compiler.  */
2280                            if (   (is_alnum  && ISALNUM (ch))
2281                                || (is_alpha  && ISALPHA (ch))
2282                                || (is_blank  && ISBLANK (ch))
2283                                || (is_cntrl  && ISCNTRL (ch)))
2284			      SET_LIST_BIT (ch);
2285			    if (   (is_digit  && ISDIGIT (ch))
2286                                || (is_graph  && ISGRAPH (ch))
2287                                || (is_lower  && ISLOWER (ch))
2288                                || (is_print  && ISPRINT (ch)))
2289			      SET_LIST_BIT (ch);
2290			    if (   (is_punct  && ISPUNCT (ch))
2291                                || (is_space  && ISSPACE (ch))
2292                                || (is_upper  && ISUPPER (ch))
2293                                || (is_xdigit && ISXDIGIT (ch)))
2294			      SET_LIST_BIT (ch);
2295			    if (   translate && (is_upper || is_lower)
2296				&& (ISUPPER (ch) || ISLOWER (ch)))
2297			      SET_LIST_BIT (ch);
2298                          }
2299                        had_char_class = true;
2300#endif	/* libc || wctype.h */
2301                      }
2302                    else
2303                      {
2304                        c1++;
2305                        while (c1--)
2306                          PATUNFETCH;
2307                        SET_LIST_BIT ('[');
2308                        SET_LIST_BIT (':');
2309                        had_char_class = false;
2310                      }
2311                  }
2312                else
2313                  {
2314                    had_char_class = false;
2315                    SET_LIST_BIT (c);
2316                  }
2317              }
2318
2319            /* Discard any (non)matching list bytes that are all 0 at the
2320               end of the map.  Decrease the map-length byte too.  */
2321            while ((int) b[-1] > 0 && b[b[-1] - 1] == 0)
2322              b[-1]--;
2323            b += b[-1];
2324          }
2325          break;
2326
2327
2328	case '(':
2329          if (syntax & RE_NO_BK_PARENS)
2330            goto handle_open;
2331          else
2332            goto normal_char;
2333
2334
2335        case ')':
2336          if (syntax & RE_NO_BK_PARENS)
2337            goto handle_close;
2338          else
2339            goto normal_char;
2340
2341
2342        case '\n':
2343          if (syntax & RE_NEWLINE_ALT)
2344            goto handle_alt;
2345          else
2346            goto normal_char;
2347
2348
2349	case '|':
2350          if (syntax & RE_NO_BK_VBAR)
2351            goto handle_alt;
2352          else
2353            goto normal_char;
2354
2355
2356        case '{':
2357           if (syntax & RE_INTERVALS && syntax & RE_NO_BK_BRACES)
2358             goto handle_interval;
2359           else
2360             goto normal_char;
2361
2362
2363        case '\\':
2364          if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
2365
2366          /* Do not translate the character after the \, so that we can
2367             distinguish, e.g., \B from \b, even if we normally would
2368             translate, e.g., B to b.  */
2369          PATFETCH_RAW (c);
2370
2371          switch (c)
2372            {
2373            case '(':
2374              if (syntax & RE_NO_BK_PARENS)
2375                goto normal_backslash;
2376
2377            handle_open:
2378              bufp->re_nsub++;
2379              regnum++;
2380
2381              if (COMPILE_STACK_FULL)
2382                {
2383                  RETALLOC (compile_stack.stack, compile_stack.size << 1,
2384                            compile_stack_elt_t);
2385                  if (compile_stack.stack == NULL) return REG_ESPACE;
2386
2387                  compile_stack.size <<= 1;
2388                }
2389
2390              /* These are the values to restore when we hit end of this
2391                 group.  They are all relative offsets, so that if the
2392                 whole pattern moves because of realloc, they will still
2393                 be valid.  */
2394              COMPILE_STACK_TOP.begalt_offset = begalt - bufp->buffer;
2395              COMPILE_STACK_TOP.fixup_alt_jump
2396                = fixup_alt_jump ? fixup_alt_jump - bufp->buffer + 1 : 0;
2397              COMPILE_STACK_TOP.laststart_offset = b - bufp->buffer;
2398              COMPILE_STACK_TOP.regnum = regnum;
2399
2400              /* We will eventually replace the 0 with the number of
2401                 groups inner to this one.  But do not push a
2402                 start_memory for groups beyond the last one we can
2403                 represent in the compiled pattern.  */
2404              if (regnum <= MAX_REGNUM)
2405                {
2406                  COMPILE_STACK_TOP.inner_group_offset = b - bufp->buffer + 2;
2407                  BUF_PUSH_3 (start_memory, regnum, 0);
2408                }
2409
2410              compile_stack.avail++;
2411
2412              fixup_alt_jump = 0;
2413              laststart = 0;
2414              begalt = b;
2415	      /* If we've reached MAX_REGNUM groups, then this open
2416		 won't actually generate any code, so we'll have to
2417		 clear pending_exact explicitly.  */
2418	      pending_exact = 0;
2419              break;
2420
2421
2422            case ')':
2423              if (syntax & RE_NO_BK_PARENS) goto normal_backslash;
2424
2425              if (COMPILE_STACK_EMPTY)
2426		{
2427		  if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
2428		    goto normal_backslash;
2429		  else
2430		    FREE_STACK_RETURN (REG_ERPAREN);
2431		}
2432
2433            handle_close:
2434              if (fixup_alt_jump)
2435                { /* Push a dummy failure point at the end of the
2436                     alternative for a possible future
2437                     `pop_failure_jump' to pop.  See comments at
2438                     `push_dummy_failure' in `re_match_2'.  */
2439                  BUF_PUSH (push_dummy_failure);
2440
2441                  /* We allocated space for this jump when we assigned
2442                     to `fixup_alt_jump', in the `handle_alt' case below.  */
2443                  STORE_JUMP (jump_past_alt, fixup_alt_jump, b - 1);
2444                }
2445
2446              /* See similar code for backslashed left paren above.  */
2447              if (COMPILE_STACK_EMPTY)
2448		{
2449		  if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
2450		    goto normal_char;
2451		  else
2452		    FREE_STACK_RETURN (REG_ERPAREN);
2453		}
2454
2455              /* Since we just checked for an empty stack above, this
2456                 ``can't happen''.  */
2457              assert (compile_stack.avail != 0);
2458              {
2459                /* We don't just want to restore into `regnum', because
2460                   later groups should continue to be numbered higher,
2461                   as in `(ab)c(de)' -- the second group is #2.  */
2462                regnum_t this_group_regnum;
2463
2464                compile_stack.avail--;
2465                begalt = bufp->buffer + COMPILE_STACK_TOP.begalt_offset;
2466                fixup_alt_jump
2467                  = COMPILE_STACK_TOP.fixup_alt_jump
2468                    ? bufp->buffer + COMPILE_STACK_TOP.fixup_alt_jump - 1
2469                    : 0;
2470                laststart = bufp->buffer + COMPILE_STACK_TOP.laststart_offset;
2471                this_group_regnum = COMPILE_STACK_TOP.regnum;
2472		/* If we've reached MAX_REGNUM groups, then this open
2473		   won't actually generate any code, so we'll have to
2474		   clear pending_exact explicitly.  */
2475		pending_exact = 0;
2476
2477                /* We're at the end of the group, so now we know how many
2478                   groups were inside this one.  */
2479                if (this_group_regnum <= MAX_REGNUM)
2480                  {
2481                    unsigned char *inner_group_loc
2482                      = bufp->buffer + COMPILE_STACK_TOP.inner_group_offset;
2483
2484                    *inner_group_loc = regnum - this_group_regnum;
2485                    BUF_PUSH_3 (stop_memory, this_group_regnum,
2486                                regnum - this_group_regnum);
2487                  }
2488              }
2489              break;
2490
2491
2492            case '|':					/* `\|'.  */
2493              if (syntax & RE_LIMITED_OPS || syntax & RE_NO_BK_VBAR)
2494                goto normal_backslash;
2495            handle_alt:
2496              if (syntax & RE_LIMITED_OPS)
2497                goto normal_char;
2498
2499              /* Insert before the previous alternative a jump which
2500                 jumps to this alternative if the former fails.  */
2501              GET_BUFFER_SPACE (3);
2502              INSERT_JUMP (on_failure_jump, begalt, b + 6);
2503              pending_exact = 0;
2504              b += 3;
2505
2506              /* The alternative before this one has a jump after it
2507                 which gets executed if it gets matched.  Adjust that
2508                 jump so it will jump to this alternative's analogous
2509                 jump (put in below, which in turn will jump to the next
2510                 (if any) alternative's such jump, etc.).  The last such
2511                 jump jumps to the correct final destination.  A picture:
2512                          _____ _____
2513                          |   | |   |
2514                          |   v |   v
2515                         a | b   | c
2516
2517                 If we are at `b', then fixup_alt_jump right now points to a
2518                 three-byte space after `a'.  We'll put in the jump, set
2519                 fixup_alt_jump to right after `b', and leave behind three
2520                 bytes which we'll fill in when we get to after `c'.  */
2521
2522              if (fixup_alt_jump)
2523                STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
2524
2525              /* Mark and leave space for a jump after this alternative,
2526                 to be filled in later either by next alternative or
2527                 when know we're at the end of a series of alternatives.  */
2528              fixup_alt_jump = b;
2529              GET_BUFFER_SPACE (3);
2530              b += 3;
2531
2532              laststart = 0;
2533              begalt = b;
2534              break;
2535
2536
2537            case '{':
2538              /* If \{ is a literal.  */
2539              if (!(syntax & RE_INTERVALS)
2540                     /* If we're at `\{' and it's not the open-interval
2541                        operator.  */
2542                  || ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES))
2543                  || (p - 2 == pattern  &&  p == pend))
2544                goto normal_backslash;
2545
2546            handle_interval:
2547              {
2548                /* If got here, then the syntax allows intervals.  */
2549
2550                /* At least (most) this many matches must be made.  */
2551                int lower_bound = -1, upper_bound = -1;
2552
2553                beg_interval = p - 1;
2554
2555                if (p == pend)
2556                  {
2557                    if (syntax & RE_NO_BK_BRACES)
2558                      goto unfetch_interval;
2559                    else
2560                      FREE_STACK_RETURN (REG_EBRACE);
2561                  }
2562
2563                GET_UNSIGNED_NUMBER (lower_bound);
2564
2565                if (c == ',')
2566                  {
2567                    GET_UNSIGNED_NUMBER (upper_bound);
2568                    if (upper_bound < 0) upper_bound = RE_DUP_MAX;
2569                  }
2570                else
2571                  /* Interval such as `{1}' => match exactly once. */
2572                  upper_bound = lower_bound;
2573
2574                if (lower_bound < 0 || upper_bound > RE_DUP_MAX
2575                    || lower_bound > upper_bound)
2576                  {
2577                    if (syntax & RE_NO_BK_BRACES)
2578                      goto unfetch_interval;
2579                    else
2580                      FREE_STACK_RETURN (REG_BADBR);
2581                  }
2582
2583                if (!(syntax & RE_NO_BK_BRACES))
2584                  {
2585                    if (c != '\\') FREE_STACK_RETURN (REG_EBRACE);
2586
2587                    PATFETCH (c);
2588                  }
2589
2590                if (c != '}')
2591                  {
2592                    if (syntax & RE_NO_BK_BRACES)
2593                      goto unfetch_interval;
2594                    else
2595                      FREE_STACK_RETURN (REG_BADBR);
2596                  }
2597
2598                /* We just parsed a valid interval.  */
2599
2600                /* If it's invalid to have no preceding re.  */
2601                if (!laststart)
2602                  {
2603                    if (syntax & RE_CONTEXT_INVALID_OPS)
2604                      FREE_STACK_RETURN (REG_BADRPT);
2605                    else if (syntax & RE_CONTEXT_INDEP_OPS)
2606                      laststart = b;
2607                    else
2608                      goto unfetch_interval;
2609                  }
2610
2611                /* If the upper bound is zero, don't want to succeed at
2612                   all; jump from `laststart' to `b + 3', which will be
2613                   the end of the buffer after we insert the jump.  */
2614                 if (upper_bound == 0)
2615                   {
2616                     GET_BUFFER_SPACE (3);
2617                     INSERT_JUMP (jump, laststart, b + 3);
2618                     b += 3;
2619                   }
2620
2621                 /* Otherwise, we have a nontrivial interval.  When
2622                    we're all done, the pattern will look like:
2623                      set_number_at <jump count> <upper bound>
2624                      set_number_at <succeed_n count> <lower bound>
2625                      succeed_n <after jump addr> <succeed_n count>
2626                      <body of loop>
2627                      jump_n <succeed_n addr> <jump count>
2628                    (The upper bound and `jump_n' are omitted if
2629                    `upper_bound' is 1, though.)  */
2630                 else
2631                   { /* If the upper bound is > 1, we need to insert
2632                        more at the end of the loop.  */
2633                     unsigned nbytes = 10 + (upper_bound > 1) * 10;
2634
2635                     GET_BUFFER_SPACE (nbytes);
2636
2637                     /* Initialize lower bound of the `succeed_n', even
2638                        though it will be set during matching by its
2639                        attendant `set_number_at' (inserted next),
2640                        because `re_compile_fastmap' needs to know.
2641                        Jump to the `jump_n' we might insert below.  */
2642                     INSERT_JUMP2 (succeed_n, laststart,
2643                                   b + 5 + (upper_bound > 1) * 5,
2644                                   lower_bound);
2645                     b += 5;
2646
2647                     /* Code to initialize the lower bound.  Insert
2648                        before the `succeed_n'.  The `5' is the last two
2649                        bytes of this `set_number_at', plus 3 bytes of
2650                        the following `succeed_n'.  */
2651                     insert_op2 (set_number_at, laststart, 5, lower_bound, b);
2652                     b += 5;
2653
2654                     if (upper_bound > 1)
2655                       { /* More than one repetition is allowed, so
2656                            append a backward jump to the `succeed_n'
2657                            that starts this interval.
2658
2659                            When we've reached this during matching,
2660                            we'll have matched the interval once, so
2661                            jump back only `upper_bound - 1' times.  */
2662                         STORE_JUMP2 (jump_n, b, laststart + 5,
2663                                      upper_bound - 1);
2664                         b += 5;
2665
2666                         /* The location we want to set is the second
2667                            parameter of the `jump_n'; that is `b-2' as
2668                            an absolute address.  `laststart' will be
2669                            the `set_number_at' we're about to insert;
2670                            `laststart+3' the number to set, the source
2671                            for the relative address.  But we are
2672                            inserting into the middle of the pattern --
2673                            so everything is getting moved up by 5.
2674                            Conclusion: (b - 2) - (laststart + 3) + 5,
2675                            i.e., b - laststart.
2676
2677                            We insert this at the beginning of the loop
2678                            so that if we fail during matching, we'll
2679                            reinitialize the bounds.  */
2680                         insert_op2 (set_number_at, laststart, b - laststart,
2681                                     upper_bound - 1, b);
2682                         b += 5;
2683                       }
2684                   }
2685                pending_exact = 0;
2686                beg_interval = NULL;
2687              }
2688              break;
2689
2690            unfetch_interval:
2691              /* If an invalid interval, match the characters as literals.  */
2692               assert (beg_interval);
2693               p = beg_interval;
2694               beg_interval = NULL;
2695
2696               /* normal_char and normal_backslash need `c'.  */
2697               PATFETCH (c);
2698
2699               if (!(syntax & RE_NO_BK_BRACES))
2700                 {
2701                   if (p > pattern  &&  p[-1] == '\\')
2702                     goto normal_backslash;
2703                 }
2704               goto normal_char;
2705
2706#ifdef emacs
2707            /* There is no way to specify the before_dot and after_dot
2708               operators.  rms says this is ok.  --karl  */
2709            case '=':
2710              BUF_PUSH (at_dot);
2711              break;
2712
2713            case 's':
2714              laststart = b;
2715              PATFETCH (c);
2716              BUF_PUSH_2 (syntaxspec, syntax_spec_code[c]);
2717              break;
2718
2719            case 'S':
2720              laststart = b;
2721              PATFETCH (c);
2722              BUF_PUSH_2 (notsyntaxspec, syntax_spec_code[c]);
2723              break;
2724#endif /* emacs */
2725
2726
2727            case 'w':
2728	      if (syntax & RE_NO_GNU_OPS)
2729		goto normal_char;
2730              laststart = b;
2731              BUF_PUSH (wordchar);
2732              break;
2733
2734
2735            case 'W':
2736	      if (syntax & RE_NO_GNU_OPS)
2737		goto normal_char;
2738              laststart = b;
2739              BUF_PUSH (notwordchar);
2740              break;
2741
2742
2743            case '<':
2744	      if (syntax & RE_NO_GNU_OPS)
2745		goto normal_char;
2746              BUF_PUSH (wordbeg);
2747              break;
2748
2749            case '>':
2750	      if (syntax & RE_NO_GNU_OPS)
2751		goto normal_char;
2752              BUF_PUSH (wordend);
2753              break;
2754
2755            case 'b':
2756	      if (syntax & RE_NO_GNU_OPS)
2757		goto normal_char;
2758              BUF_PUSH (wordbound);
2759              break;
2760
2761            case 'B':
2762	      if (syntax & RE_NO_GNU_OPS)
2763		goto normal_char;
2764              BUF_PUSH (notwordbound);
2765              break;
2766
2767            case '`':
2768	      if (syntax & RE_NO_GNU_OPS)
2769		goto normal_char;
2770              BUF_PUSH (begbuf);
2771              break;
2772
2773            case '\'':
2774	      if (syntax & RE_NO_GNU_OPS)
2775		goto normal_char;
2776              BUF_PUSH (endbuf);
2777              break;
2778
2779            case '1': case '2': case '3': case '4': case '5':
2780            case '6': case '7': case '8': case '9':
2781              if (syntax & RE_NO_BK_REFS)
2782                goto normal_char;
2783
2784              c1 = c - '0';
2785
2786              if (c1 > regnum)
2787                FREE_STACK_RETURN (REG_ESUBREG);
2788
2789              /* Can't back reference to a subexpression if inside of it.  */
2790              if (group_in_compile_stack (compile_stack, (regnum_t) c1))
2791                goto normal_char;
2792
2793              laststart = b;
2794              BUF_PUSH_2 (duplicate, c1);
2795              break;
2796
2797
2798            case '+':
2799            case '?':
2800              if (syntax & RE_BK_PLUS_QM)
2801                goto handle_plus;
2802              else
2803                goto normal_backslash;
2804
2805            default:
2806            normal_backslash:
2807              /* You might think it would be useful for \ to mean
2808                 not to translate; but if we don't translate it
2809                 it will never match anything.  */
2810              c = TRANSLATE (c);
2811              goto normal_char;
2812            }
2813          break;
2814
2815
2816	default:
2817        /* Expects the character in `c'.  */
2818	normal_char:
2819	      /* If no exactn currently being built.  */
2820          if (!pending_exact
2821
2822              /* If last exactn not at current position.  */
2823              || pending_exact + *pending_exact + 1 != b
2824
2825              /* We have only one byte following the exactn for the count.  */
2826	      || *pending_exact == (1 << BYTEWIDTH) - 1
2827
2828              /* If followed by a repetition operator.  */
2829              || *p == '*' || *p == '^'
2830	      || ((syntax & RE_BK_PLUS_QM)
2831		  ? *p == '\\' && (p[1] == '+' || p[1] == '?')
2832		  : (*p == '+' || *p == '?'))
2833	      || ((syntax & RE_INTERVALS)
2834                  && ((syntax & RE_NO_BK_BRACES)
2835		      ? *p == '{'
2836                      : (p[0] == '\\' && p[1] == '{'))))
2837	    {
2838	      /* Start building a new exactn.  */
2839
2840              laststart = b;
2841
2842	      BUF_PUSH_2 (exactn, 0);
2843	      pending_exact = b - 1;
2844            }
2845
2846	  BUF_PUSH (c);
2847          (*pending_exact)++;
2848	  break;
2849        } /* switch (c) */
2850    } /* while p != pend */
2851
2852
2853  /* Through the pattern now.  */
2854
2855  if (fixup_alt_jump)
2856    STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
2857
2858  if (!COMPILE_STACK_EMPTY)
2859    FREE_STACK_RETURN (REG_EPAREN);
2860
2861  /* If we don't want backtracking, force success
2862     the first time we reach the end of the compiled pattern.  */
2863  if (syntax & RE_NO_POSIX_BACKTRACKING)
2864    BUF_PUSH (succeed);
2865
2866  free (compile_stack.stack);
2867
2868  /* We have succeeded; set the length of the buffer.  */
2869  bufp->used = b - bufp->buffer;
2870
2871#ifdef DEBUG
2872  if (debug)
2873    {
2874      DEBUG_PRINT1 ("\nCompiled pattern: \n");
2875      print_compiled_pattern (bufp);
2876    }
2877#endif /* DEBUG */
2878
2879#ifndef MATCH_MAY_ALLOCATE
2880  /* Initialize the failure stack to the largest possible stack.  This
2881     isn't necessary unless we're trying to avoid calling alloca in
2882     the search and match routines.  */
2883  {
2884    int num_regs = bufp->re_nsub + 1;
2885
2886    /* Since DOUBLE_FAIL_STACK refuses to double only if the current size
2887       is strictly greater than re_max_failures, the largest possible stack
2888       is 2 * re_max_failures failure points.  */
2889    if (fail_stack.size < (2 * re_max_failures * MAX_FAILURE_ITEMS))
2890      {
2891	fail_stack.size = (2 * re_max_failures * MAX_FAILURE_ITEMS);
2892
2893# ifdef emacs
2894	if (! fail_stack.stack)
2895	  fail_stack.stack
2896	    = (fail_stack_elt_t *) xmalloc (fail_stack.size
2897					    * sizeof (fail_stack_elt_t));
2898	else
2899	  fail_stack.stack
2900	    = (fail_stack_elt_t *) xrealloc (fail_stack.stack,
2901					     (fail_stack.size
2902					      * sizeof (fail_stack_elt_t)));
2903# else /* not emacs */
2904	if (! fail_stack.stack)
2905	  fail_stack.stack
2906	    = (fail_stack_elt_t *) malloc (fail_stack.size
2907					   * sizeof (fail_stack_elt_t));
2908	else
2909	  fail_stack.stack
2910	    = (fail_stack_elt_t *) realloc (fail_stack.stack,
2911					    (fail_stack.size
2912					     * sizeof (fail_stack_elt_t)));
2913# endif /* not emacs */
2914      }
2915
2916    regex_grow_registers (num_regs);
2917  }
2918#endif /* not MATCH_MAY_ALLOCATE */
2919
2920  return REG_NOERROR;
2921} /* regex_compile */
2922
2923/* Subroutines for `regex_compile'.  */
2924
2925/* Store OP at LOC followed by two-byte integer parameter ARG.  */
2926
2927static void
2928store_op1 (op, loc, arg)
2929    re_opcode_t op;
2930    unsigned char *loc;
2931    int arg;
2932{
2933  *loc = (unsigned char) op;
2934  STORE_NUMBER (loc + 1, arg);
2935}
2936
2937
2938/* Like `store_op1', but for two two-byte parameters ARG1 and ARG2.  */
2939
2940static void
2941store_op2 (op, loc, arg1, arg2)
2942    re_opcode_t op;
2943    unsigned char *loc;
2944    int arg1, arg2;
2945{
2946  *loc = (unsigned char) op;
2947  STORE_NUMBER (loc + 1, arg1);
2948  STORE_NUMBER (loc + 3, arg2);
2949}
2950
2951
2952/* Copy the bytes from LOC to END to open up three bytes of space at LOC
2953   for OP followed by two-byte integer parameter ARG.  */
2954
2955static void
2956insert_op1 (op, loc, arg, end)
2957    re_opcode_t op;
2958    unsigned char *loc;
2959    int arg;
2960    unsigned char *end;
2961{
2962  register unsigned char *pfrom = end;
2963  register unsigned char *pto = end + 3;
2964
2965  while (pfrom != loc)
2966    *--pto = *--pfrom;
2967
2968  store_op1 (op, loc, arg);
2969}
2970
2971
2972/* Like `insert_op1', but for two two-byte parameters ARG1 and ARG2.  */
2973
2974static void
2975insert_op2 (op, loc, arg1, arg2, end)
2976    re_opcode_t op;
2977    unsigned char *loc;
2978    int arg1, arg2;
2979    unsigned char *end;
2980{
2981  register unsigned char *pfrom = end;
2982  register unsigned char *pto = end + 5;
2983
2984  while (pfrom != loc)
2985    *--pto = *--pfrom;
2986
2987  store_op2 (op, loc, arg1, arg2);
2988}
2989
2990
2991/* P points to just after a ^ in PATTERN.  Return true if that ^ comes
2992   after an alternative or a begin-subexpression.  We assume there is at
2993   least one character before the ^.  */
2994
2995static boolean
2996at_begline_loc_p (pattern, p, syntax)
2997    const char *pattern, *p;
2998    reg_syntax_t syntax;
2999{
3000  const char *prev = p - 2;
3001  boolean prev_prev_backslash = prev > pattern && prev[-1] == '\\';
3002
3003  return
3004       /* After a subexpression?  */
3005       (*prev == '(' && (syntax & RE_NO_BK_PARENS || prev_prev_backslash))
3006       /* After an alternative?  */
3007    || (*prev == '|' && (syntax & RE_NO_BK_VBAR || prev_prev_backslash));
3008}
3009
3010
3011/* The dual of at_begline_loc_p.  This one is for $.  We assume there is
3012   at least one character after the $, i.e., `P < PEND'.  */
3013
3014static boolean
3015at_endline_loc_p (p, pend, syntax)
3016    const char *p, *pend;
3017    reg_syntax_t syntax;
3018{
3019  const char *next = p;
3020  boolean next_backslash = *next == '\\';
3021  const char *next_next = p + 1 < pend ? p + 1 : 0;
3022
3023  return
3024       /* Before a subexpression?  */
3025       (syntax & RE_NO_BK_PARENS ? *next == ')'
3026        : next_backslash && next_next && *next_next == ')')
3027       /* Before an alternative?  */
3028    || (syntax & RE_NO_BK_VBAR ? *next == '|'
3029        : next_backslash && next_next && *next_next == '|');
3030}
3031
3032
3033/* Returns true if REGNUM is in one of COMPILE_STACK's elements and
3034   false if it's not.  */
3035
3036static boolean
3037group_in_compile_stack (compile_stack, regnum)
3038    compile_stack_type compile_stack;
3039    regnum_t regnum;
3040{
3041  int this_element;
3042
3043  for (this_element = compile_stack.avail - 1;
3044       this_element >= 0;
3045       this_element--)
3046    if (compile_stack.stack[this_element].regnum == regnum)
3047      return true;
3048
3049  return false;
3050}
3051
3052
3053/* Read the ending character of a range (in a bracket expression) from the
3054   uncompiled pattern *P_PTR (which ends at PEND).  We assume the
3055   starting character is in `P[-2]'.  (`P[-1]' is the character `-'.)
3056   Then we set the translation of all bits between the starting and
3057   ending characters (inclusive) in the compiled pattern B.
3058
3059   Return an error code.
3060
3061   We use these short variable names so we can use the same macros as
3062   `regex_compile' itself.  */
3063
3064static reg_errcode_t
3065compile_range (p_ptr, pend, translate, syntax, b)
3066    const char **p_ptr, *pend;
3067    RE_TRANSLATE_TYPE translate;
3068    reg_syntax_t syntax;
3069    unsigned char *b;
3070{
3071  unsigned this_char;
3072
3073  const char *p = *p_ptr;
3074  unsigned int range_start, range_end;
3075
3076  if (p == pend)
3077    return REG_ERANGE;
3078
3079  /* Even though the pattern is a signed `char *', we need to fetch
3080     with unsigned char *'s; if the high bit of the pattern character
3081     is set, the range endpoints will be negative if we fetch using a
3082     signed char *.
3083
3084     We also want to fetch the endpoints without translating them; the
3085     appropriate translation is done in the bit-setting loop below.  */
3086  /* The SVR4 compiler on the 3B2 had trouble with unsigned const char *.  */
3087  range_start = ((const unsigned char *) p)[-2];
3088  range_end   = ((const unsigned char *) p)[0];
3089
3090  /* Have to increment the pointer into the pattern string, so the
3091     caller isn't still at the ending character.  */
3092  (*p_ptr)++;
3093
3094  /* If the start is after the end, the range is empty.  */
3095  if (range_start > range_end)
3096    return syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR;
3097
3098  /* Here we see why `this_char' has to be larger than an `unsigned
3099     char' -- the range is inclusive, so if `range_end' == 0xff
3100     (assuming 8-bit characters), we would otherwise go into an infinite
3101     loop, since all characters <= 0xff.  */
3102  for (this_char = range_start; this_char <= range_end; this_char++)
3103    {
3104      SET_LIST_BIT (TRANSLATE (this_char));
3105    }
3106
3107  return REG_NOERROR;
3108}
3109
3110/* re_compile_fastmap computes a ``fastmap'' for the compiled pattern in
3111   BUFP.  A fastmap records which of the (1 << BYTEWIDTH) possible
3112   characters can start a string that matches the pattern.  This fastmap
3113   is used by re_search to skip quickly over impossible starting points.
3114
3115   The caller must supply the address of a (1 << BYTEWIDTH)-byte data
3116   area as BUFP->fastmap.
3117
3118   We set the `fastmap', `fastmap_accurate', and `can_be_null' fields in
3119   the pattern buffer.
3120
3121   Returns 0 if we succeed, -2 if an internal error.   */
3122
3123int
3124re_compile_fastmap (bufp)
3125     struct re_pattern_buffer *bufp;
3126{
3127  int j, k;
3128#ifdef MATCH_MAY_ALLOCATE
3129  fail_stack_type fail_stack;
3130#endif
3131#ifndef REGEX_MALLOC
3132  char *destination;
3133#endif
3134
3135  register char *fastmap = bufp->fastmap;
3136  unsigned char *pattern = bufp->buffer;
3137  unsigned char *p = pattern;
3138  register unsigned char *pend = pattern + bufp->used;
3139
3140#ifdef REL_ALLOC
3141  /* This holds the pointer to the failure stack, when
3142     it is allocated relocatably.  */
3143  fail_stack_elt_t *failure_stack_ptr;
3144#endif
3145
3146  /* Assume that each path through the pattern can be null until
3147     proven otherwise.  We set this false at the bottom of switch
3148     statement, to which we get only if a particular path doesn't
3149     match the empty string.  */
3150  boolean path_can_be_null = true;
3151
3152  /* We aren't doing a `succeed_n' to begin with.  */
3153  boolean succeed_n_p = false;
3154
3155  assert (fastmap != NULL && p != NULL);
3156
3157  INIT_FAIL_STACK ();
3158  bzero (fastmap, 1 << BYTEWIDTH);  /* Assume nothing's valid.  */
3159  bufp->fastmap_accurate = 1;	    /* It will be when we're done.  */
3160  bufp->can_be_null = 0;
3161
3162  while (1)
3163    {
3164      if (p == pend || *p == succeed)
3165	{
3166	  /* We have reached the (effective) end of pattern.  */
3167	  if (!FAIL_STACK_EMPTY ())
3168	    {
3169	      bufp->can_be_null |= path_can_be_null;
3170
3171	      /* Reset for next path.  */
3172	      path_can_be_null = true;
3173
3174	      p = fail_stack.stack[--fail_stack.avail].pointer;
3175
3176	      continue;
3177	    }
3178	  else
3179	    break;
3180	}
3181
3182      /* We should never be about to go beyond the end of the pattern.  */
3183      assert (p < pend);
3184
3185      switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++))
3186	{
3187
3188        /* I guess the idea here is to simply not bother with a fastmap
3189           if a backreference is used, since it's too hard to figure out
3190           the fastmap for the corresponding group.  Setting
3191           `can_be_null' stops `re_search_2' from using the fastmap, so
3192           that is all we do.  */
3193	case duplicate:
3194	  bufp->can_be_null = 1;
3195          goto done;
3196
3197
3198      /* Following are the cases which match a character.  These end
3199         with `break'.  */
3200
3201	case exactn:
3202          fastmap[p[1]] = 1;
3203	  break;
3204
3205
3206        case charset:
3207          for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
3208	    if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))
3209              fastmap[j] = 1;
3210	  break;
3211
3212
3213	case charset_not:
3214	  /* Chars beyond end of map must be allowed.  */
3215	  for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++)
3216            fastmap[j] = 1;
3217
3218	  for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
3219	    if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))))
3220              fastmap[j] = 1;
3221          break;
3222
3223
3224	case wordchar:
3225	  for (j = 0; j < (1 << BYTEWIDTH); j++)
3226	    if (SYNTAX (j) == Sword)
3227	      fastmap[j] = 1;
3228	  break;
3229
3230
3231	case notwordchar:
3232	  for (j = 0; j < (1 << BYTEWIDTH); j++)
3233	    if (SYNTAX (j) != Sword)
3234	      fastmap[j] = 1;
3235	  break;
3236
3237
3238        case anychar:
3239	  {
3240	    int fastmap_newline = fastmap['\n'];
3241
3242	    /* `.' matches anything ...  */
3243	    for (j = 0; j < (1 << BYTEWIDTH); j++)
3244	      fastmap[j] = 1;
3245
3246	    /* ... except perhaps newline.  */
3247	    if (!(bufp->syntax & RE_DOT_NEWLINE))
3248	      fastmap['\n'] = fastmap_newline;
3249
3250	    /* Return if we have already set `can_be_null'; if we have,
3251	       then the fastmap is irrelevant.  Something's wrong here.  */
3252	    else if (bufp->can_be_null)
3253	      goto done;
3254
3255	    /* Otherwise, have to check alternative paths.  */
3256	    break;
3257	  }
3258
3259#ifdef emacs
3260        case syntaxspec:
3261	  k = *p++;
3262	  for (j = 0; j < (1 << BYTEWIDTH); j++)
3263	    if (SYNTAX (j) == (enum syntaxcode) k)
3264	      fastmap[j] = 1;
3265	  break;
3266
3267
3268	case notsyntaxspec:
3269	  k = *p++;
3270	  for (j = 0; j < (1 << BYTEWIDTH); j++)
3271	    if (SYNTAX (j) != (enum syntaxcode) k)
3272	      fastmap[j] = 1;
3273	  break;
3274
3275
3276      /* All cases after this match the empty string.  These end with
3277         `continue'.  */
3278
3279
3280	case before_dot:
3281	case at_dot:
3282	case after_dot:
3283          continue;
3284#endif /* emacs */
3285
3286
3287        case no_op:
3288        case begline:
3289        case endline:
3290	case begbuf:
3291	case endbuf:
3292	case wordbound:
3293	case notwordbound:
3294	case wordbeg:
3295	case wordend:
3296        case push_dummy_failure:
3297          continue;
3298
3299
3300	case jump_n:
3301        case pop_failure_jump:
3302	case maybe_pop_jump:
3303	case jump:
3304        case jump_past_alt:
3305	case dummy_failure_jump:
3306          EXTRACT_NUMBER_AND_INCR (j, p);
3307	  p += j;
3308	  if (j > 0)
3309	    continue;
3310
3311          /* Jump backward implies we just went through the body of a
3312             loop and matched nothing.  Opcode jumped to should be
3313             `on_failure_jump' or `succeed_n'.  Just treat it like an
3314             ordinary jump.  For a * loop, it has pushed its failure
3315             point already; if so, discard that as redundant.  */
3316          if ((re_opcode_t) *p != on_failure_jump
3317	      && (re_opcode_t) *p != succeed_n)
3318	    continue;
3319
3320          p++;
3321          EXTRACT_NUMBER_AND_INCR (j, p);
3322          p += j;
3323
3324          /* If what's on the stack is where we are now, pop it.  */
3325          if (!FAIL_STACK_EMPTY ()
3326	      && fail_stack.stack[fail_stack.avail - 1].pointer == p)
3327            fail_stack.avail--;
3328
3329          continue;
3330
3331
3332        case on_failure_jump:
3333        case on_failure_keep_string_jump:
3334	handle_on_failure_jump:
3335          EXTRACT_NUMBER_AND_INCR (j, p);
3336
3337          /* For some patterns, e.g., `(a?)?', `p+j' here points to the
3338             end of the pattern.  We don't want to push such a point,
3339             since when we restore it above, entering the switch will
3340             increment `p' past the end of the pattern.  We don't need
3341             to push such a point since we obviously won't find any more
3342             fastmap entries beyond `pend'.  Such a pattern can match
3343             the null string, though.  */
3344          if (p + j < pend)
3345            {
3346              if (!PUSH_PATTERN_OP (p + j, fail_stack))
3347		{
3348		  RESET_FAIL_STACK ();
3349		  return -2;
3350		}
3351            }
3352          else
3353            bufp->can_be_null = 1;
3354
3355          if (succeed_n_p)
3356            {
3357              EXTRACT_NUMBER_AND_INCR (k, p);	/* Skip the n.  */
3358              succeed_n_p = false;
3359	    }
3360
3361          continue;
3362
3363
3364	case succeed_n:
3365          /* Get to the number of times to succeed.  */
3366          p += 2;
3367
3368          /* Increment p past the n for when k != 0.  */
3369          EXTRACT_NUMBER_AND_INCR (k, p);
3370          if (k == 0)
3371	    {
3372              p -= 4;
3373  	      succeed_n_p = true;  /* Spaghetti code alert.  */
3374              goto handle_on_failure_jump;
3375            }
3376          continue;
3377
3378
3379	case set_number_at:
3380          p += 4;
3381          continue;
3382
3383
3384	case start_memory:
3385        case stop_memory:
3386	  p += 2;
3387	  continue;
3388
3389
3390	default:
3391          abort (); /* We have listed all the cases.  */
3392        } /* switch *p++ */
3393
3394      /* Getting here means we have found the possible starting
3395         characters for one path of the pattern -- and that the empty
3396         string does not match.  We need not follow this path further.
3397         Instead, look at the next alternative (remembered on the
3398         stack), or quit if no more.  The test at the top of the loop
3399         does these things.  */
3400      path_can_be_null = false;
3401      p = pend;
3402    } /* while p */
3403
3404  /* Set `can_be_null' for the last path (also the first path, if the
3405     pattern is empty).  */
3406  bufp->can_be_null |= path_can_be_null;
3407
3408 done:
3409  RESET_FAIL_STACK ();
3410  return 0;
3411} /* re_compile_fastmap */
3412#ifdef _LIBC
3413weak_alias (__re_compile_fastmap, re_compile_fastmap)
3414#endif
3415
3416/* Set REGS to hold NUM_REGS registers, storing them in STARTS and
3417   ENDS.  Subsequent matches using PATTERN_BUFFER and REGS will use
3418   this memory for recording register information.  STARTS and ENDS
3419   must be allocated using the malloc library routine, and must each
3420   be at least NUM_REGS * sizeof (regoff_t) bytes long.
3421
3422   If NUM_REGS == 0, then subsequent matches should allocate their own
3423   register data.
3424
3425   Unless this function is called, the first search or match using
3426   PATTERN_BUFFER will allocate its own register data, without
3427   freeing the old data.  */
3428
3429void
3430re_set_registers (bufp, regs, num_regs, starts, ends)
3431    struct re_pattern_buffer *bufp;
3432    struct re_registers *regs;
3433    unsigned num_regs;
3434    regoff_t *starts, *ends;
3435{
3436  if (num_regs)
3437    {
3438      bufp->regs_allocated = REGS_REALLOCATE;
3439      regs->num_regs = num_regs;
3440      regs->start = starts;
3441      regs->end = ends;
3442    }
3443  else
3444    {
3445      bufp->regs_allocated = REGS_UNALLOCATED;
3446      regs->num_regs = 0;
3447      regs->start = regs->end = (regoff_t *) 0;
3448    }
3449}
3450#ifdef _LIBC
3451weak_alias (__re_set_registers, re_set_registers)
3452#endif
3453
3454/* Searching routines.  */
3455
3456/* Like re_search_2, below, but only one string is specified, and
3457   doesn't let you say where to stop matching. */
3458
3459int
3460re_search (bufp, string, size, startpos, range, regs)
3461     struct re_pattern_buffer *bufp;
3462     const char *string;
3463     int size, startpos, range;
3464     struct re_registers *regs;
3465{
3466  return re_search_2 (bufp, NULL, 0, string, size, startpos, range,
3467		      regs, size);
3468}
3469#ifdef _LIBC
3470weak_alias (__re_search, re_search)
3471#endif
3472
3473
3474/* Using the compiled pattern in BUFP->buffer, first tries to match the
3475   virtual concatenation of STRING1 and STRING2, starting first at index
3476   STARTPOS, then at STARTPOS + 1, and so on.
3477
3478   STRING1 and STRING2 have length SIZE1 and SIZE2, respectively.
3479
3480   RANGE is how far to scan while trying to match.  RANGE = 0 means try
3481   only at STARTPOS; in general, the last start tried is STARTPOS +
3482   RANGE.
3483
3484   In REGS, return the indices of the virtual concatenation of STRING1
3485   and STRING2 that matched the entire BUFP->buffer and its contained
3486   subexpressions.
3487
3488   Do not consider matching one past the index STOP in the virtual
3489   concatenation of STRING1 and STRING2.
3490
3491   We return either the position in the strings at which the match was
3492   found, -1 if no match, or -2 if error (such as failure
3493   stack overflow).  */
3494
3495int
3496re_search_2 (bufp, string1, size1, string2, size2, startpos, range, regs, stop)
3497     struct re_pattern_buffer *bufp;
3498     const char *string1, *string2;
3499     int size1, size2;
3500     int startpos;
3501     int range;
3502     struct re_registers *regs;
3503     int stop;
3504{
3505  int val;
3506  register char *fastmap = bufp->fastmap;
3507  register RE_TRANSLATE_TYPE translate = bufp->translate;
3508  int total_size = size1 + size2;
3509  int endpos = startpos + range;
3510
3511  /* Check for out-of-range STARTPOS.  */
3512  if (startpos < 0 || startpos > total_size)
3513    return -1;
3514
3515  /* Fix up RANGE if it might eventually take us outside
3516     the virtual concatenation of STRING1 and STRING2.
3517     Make sure we won't move STARTPOS below 0 or above TOTAL_SIZE.  */
3518  if (endpos < 0)
3519    range = 0 - startpos;
3520  else if (endpos > total_size)
3521    range = total_size - startpos;
3522
3523  /* If the search isn't to be a backwards one, don't waste time in a
3524     search for a pattern that must be anchored.  */
3525  if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == begbuf && range > 0)
3526    {
3527      if (startpos > 0)
3528	return -1;
3529      else
3530	range = 1;
3531    }
3532
3533#ifdef emacs
3534  /* In a forward search for something that starts with \=.
3535     don't keep searching past point.  */
3536  if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == at_dot && range > 0)
3537    {
3538      range = PT - startpos;
3539      if (range <= 0)
3540	return -1;
3541    }
3542#endif /* emacs */
3543
3544  /* Update the fastmap now if not correct already.  */
3545  if (fastmap && !bufp->fastmap_accurate)
3546    if (re_compile_fastmap (bufp) == -2)
3547      return -2;
3548
3549  /* Loop through the string, looking for a place to start matching.  */
3550  for (;;)
3551    {
3552      /* If a fastmap is supplied, skip quickly over characters that
3553         cannot be the start of a match.  If the pattern can match the
3554         null string, however, we don't need to skip characters; we want
3555         the first null string.  */
3556      if (fastmap && startpos < total_size && !bufp->can_be_null)
3557	{
3558	  if (range > 0)	/* Searching forwards.  */
3559	    {
3560	      register const char *d;
3561	      register int lim = 0;
3562	      int irange = range;
3563
3564              if (startpos < size1 && startpos + range >= size1)
3565                lim = range - (size1 - startpos);
3566
3567	      d = (startpos >= size1 ? string2 - size1 : string1) + startpos;
3568
3569              /* Written out as an if-else to avoid testing `translate'
3570                 inside the loop.  */
3571	      if (translate)
3572                while (range > lim
3573                       && !fastmap[(unsigned char)
3574				   translate[(unsigned char) *d++]])
3575                  range--;
3576	      else
3577                while (range > lim && !fastmap[(unsigned char) *d++])
3578                  range--;
3579
3580	      startpos += irange - range;
3581	    }
3582	  else				/* Searching backwards.  */
3583	    {
3584	      register char c = (size1 == 0 || startpos >= size1
3585                                 ? string2[startpos - size1]
3586                                 : string1[startpos]);
3587
3588	      if (!fastmap[(unsigned char) TRANSLATE (c)])
3589		goto advance;
3590	    }
3591	}
3592
3593      /* If can't match the null string, and that's all we have left, fail.  */
3594      if (range >= 0 && startpos == total_size && fastmap
3595          && !bufp->can_be_null)
3596	return -1;
3597
3598      val = re_match_2_internal (bufp, string1, size1, string2, size2,
3599				 startpos, regs, stop);
3600#ifndef REGEX_MALLOC
3601# ifdef C_ALLOCA
3602      alloca (0);
3603# endif
3604#endif
3605
3606      if (val >= 0)
3607	return startpos;
3608
3609      if (val == -2)
3610	return -2;
3611
3612    advance:
3613      if (!range)
3614        break;
3615      else if (range > 0)
3616        {
3617          range--;
3618          startpos++;
3619        }
3620      else
3621        {
3622          range++;
3623          startpos--;
3624        }
3625    }
3626  return -1;
3627} /* re_search_2 */
3628#ifdef _LIBC
3629weak_alias (__re_search_2, re_search_2)
3630#endif
3631
3632/* This converts PTR, a pointer into one of the search strings `string1'
3633   and `string2' into an offset from the beginning of that string.  */
3634#define POINTER_TO_OFFSET(ptr)			\
3635  (FIRST_STRING_P (ptr)				\
3636   ? ((regoff_t) ((ptr) - string1))		\
3637   : ((regoff_t) ((ptr) - string2 + size1)))
3638
3639/* Macros for dealing with the split strings in re_match_2.  */
3640
3641#define MATCHING_IN_FIRST_STRING  (dend == end_match_1)
3642
3643/* Call before fetching a character with *d.  This switches over to
3644   string2 if necessary.  */
3645#define PREFETCH()							\
3646  while (d == dend)						    	\
3647    {									\
3648      /* End of string2 => fail.  */					\
3649      if (dend == end_match_2) 						\
3650        goto fail;							\
3651      /* End of string1 => advance to string2.  */ 			\
3652      d = string2;						        \
3653      dend = end_match_2;						\
3654    }
3655
3656
3657/* Test if at very beginning or at very end of the virtual concatenation
3658   of `string1' and `string2'.  If only one string, it's `string2'.  */
3659#define AT_STRINGS_BEG(d) ((d) == (size1 ? string1 : string2) || !size2)
3660#define AT_STRINGS_END(d) ((d) == end2)
3661
3662
3663/* Test if D points to a character which is word-constituent.  We have
3664   two special cases to check for: if past the end of string1, look at
3665   the first character in string2; and if before the beginning of
3666   string2, look at the last character in string1.  */
3667#define WORDCHAR_P(d)							\
3668  (SYNTAX ((d) == end1 ? *string2					\
3669           : (d) == string2 - 1 ? *(end1 - 1) : *(d))			\
3670   == Sword)
3671
3672/* Disabled due to a compiler bug -- see comment at case wordbound */
3673#if 0
3674/* Test if the character before D and the one at D differ with respect
3675   to being word-constituent.  */
3676#define AT_WORD_BOUNDARY(d)						\
3677  (AT_STRINGS_BEG (d) || AT_STRINGS_END (d)				\
3678   || WORDCHAR_P (d - 1) != WORDCHAR_P (d))
3679#endif
3680
3681/* Free everything we malloc.  */
3682#ifdef MATCH_MAY_ALLOCATE
3683# define FREE_VAR(var) if (var) REGEX_FREE (var); var = NULL
3684# define FREE_VARIABLES()						\
3685  do {									\
3686    REGEX_FREE_STACK (fail_stack.stack);				\
3687    FREE_VAR (regstart);						\
3688    FREE_VAR (regend);							\
3689    FREE_VAR (old_regstart);						\
3690    FREE_VAR (old_regend);						\
3691    FREE_VAR (best_regstart);						\
3692    FREE_VAR (best_regend);						\
3693    FREE_VAR (reg_info);						\
3694    FREE_VAR (reg_dummy);						\
3695    FREE_VAR (reg_info_dummy);						\
3696  } while (0)
3697#else
3698# define FREE_VARIABLES() ((void)0) /* Do nothing!  But inhibit gcc warning. */
3699#endif /* not MATCH_MAY_ALLOCATE */
3700
3701/* These values must meet several constraints.  They must not be valid
3702   register values; since we have a limit of 255 registers (because
3703   we use only one byte in the pattern for the register number), we can
3704   use numbers larger than 255.  They must differ by 1, because of
3705   NUM_FAILURE_ITEMS above.  And the value for the lowest register must
3706   be larger than the value for the highest register, so we do not try
3707   to actually save any registers when none are active.  */
3708#define NO_HIGHEST_ACTIVE_REG (1 << BYTEWIDTH)
3709#define NO_LOWEST_ACTIVE_REG (NO_HIGHEST_ACTIVE_REG + 1)
3710
3711/* Matching routines.  */
3712
3713#ifndef emacs   /* Emacs never uses this.  */
3714/* re_match is like re_match_2 except it takes only a single string.  */
3715
3716int
3717re_match (bufp, string, size, pos, regs)
3718     struct re_pattern_buffer *bufp;
3719     const char *string;
3720     int size, pos;
3721     struct re_registers *regs;
3722{
3723  int result = re_match_2_internal (bufp, NULL, 0, string, size,
3724				    pos, regs, size);
3725# ifndef REGEX_MALLOC
3726#  ifdef C_ALLOCA
3727  alloca (0);
3728#  endif
3729# endif
3730  return result;
3731}
3732# ifdef _LIBC
3733weak_alias (__re_match, re_match)
3734# endif
3735#endif /* not emacs */
3736
3737static boolean group_match_null_string_p _RE_ARGS ((unsigned char **p,
3738						    unsigned char *end,
3739						register_info_type *reg_info));
3740static boolean alt_match_null_string_p _RE_ARGS ((unsigned char *p,
3741						  unsigned char *end,
3742						register_info_type *reg_info));
3743static boolean common_op_match_null_string_p _RE_ARGS ((unsigned char **p,
3744							unsigned char *end,
3745						register_info_type *reg_info));
3746static int bcmp_translate _RE_ARGS ((const char *s1, const char *s2,
3747				     int len, char *translate));
3748
3749/* re_match_2 matches the compiled pattern in BUFP against the
3750   the (virtual) concatenation of STRING1 and STRING2 (of length SIZE1
3751   and SIZE2, respectively).  We start matching at POS, and stop
3752   matching at STOP.
3753
3754   If REGS is non-null and the `no_sub' field of BUFP is nonzero, we
3755   store offsets for the substring each group matched in REGS.  See the
3756   documentation for exactly how many groups we fill.
3757
3758   We return -1 if no match, -2 if an internal error (such as the
3759   failure stack overflowing).  Otherwise, we return the length of the
3760   matched substring.  */
3761
3762int
3763re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
3764     struct re_pattern_buffer *bufp;
3765     const char *string1, *string2;
3766     int size1, size2;
3767     int pos;
3768     struct re_registers *regs;
3769     int stop;
3770{
3771  int result = re_match_2_internal (bufp, string1, size1, string2, size2,
3772				    pos, regs, stop);
3773#ifndef REGEX_MALLOC
3774# ifdef C_ALLOCA
3775  alloca (0);
3776# endif
3777#endif
3778  return result;
3779}
3780#ifdef _LIBC
3781weak_alias (__re_match_2, re_match_2)
3782#endif
3783
3784/* This is a separate function so that we can force an alloca cleanup
3785   afterwards.  */
3786static int
3787re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
3788     struct re_pattern_buffer *bufp;
3789     const char *string1, *string2;
3790     int size1, size2;
3791     int pos;
3792     struct re_registers *regs;
3793     int stop;
3794{
3795  /* General temporaries.  */
3796  int mcnt;
3797  unsigned char *p1;
3798
3799  /* Just past the end of the corresponding string.  */
3800  const char *end1, *end2;
3801
3802  /* Pointers into string1 and string2, just past the last characters in
3803     each to consider matching.  */
3804  const char *end_match_1, *end_match_2;
3805
3806  /* Where we are in the data, and the end of the current string.  */
3807  const char *d, *dend;
3808
3809  /* Where we are in the pattern, and the end of the pattern.  */
3810  unsigned char *p = bufp->buffer;
3811  register unsigned char *pend = p + bufp->used;
3812
3813  /* Mark the opcode just after a start_memory, so we can test for an
3814     empty subpattern when we get to the stop_memory.  */
3815  unsigned char *just_past_start_mem = 0;
3816
3817  /* We use this to map every character in the string.  */
3818  RE_TRANSLATE_TYPE translate = bufp->translate;
3819
3820  /* Failure point stack.  Each place that can handle a failure further
3821     down the line pushes a failure point on this stack.  It consists of
3822     restart, regend, and reg_info for all registers corresponding to
3823     the subexpressions we're currently inside, plus the number of such
3824     registers, and, finally, two char *'s.  The first char * is where
3825     to resume scanning the pattern; the second one is where to resume
3826     scanning the strings.  If the latter is zero, the failure point is
3827     a ``dummy''; if a failure happens and the failure point is a dummy,
3828     it gets discarded and the next next one is tried.  */
3829#ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global.  */
3830  fail_stack_type fail_stack;
3831#endif
3832#ifdef DEBUG
3833  static unsigned failure_id = 0;
3834  unsigned nfailure_points_pushed = 0, nfailure_points_popped = 0;
3835#endif
3836
3837#ifdef REL_ALLOC
3838  /* This holds the pointer to the failure stack, when
3839     it is allocated relocatably.  */
3840  fail_stack_elt_t *failure_stack_ptr;
3841#endif
3842
3843  /* We fill all the registers internally, independent of what we
3844     return, for use in backreferences.  The number here includes
3845     an element for register zero.  */
3846  size_t num_regs = bufp->re_nsub + 1;
3847
3848  /* The currently active registers.  */
3849  active_reg_t lowest_active_reg = NO_LOWEST_ACTIVE_REG;
3850  active_reg_t highest_active_reg = NO_HIGHEST_ACTIVE_REG;
3851
3852  /* Information on the contents of registers. These are pointers into
3853     the input strings; they record just what was matched (on this
3854     attempt) by a subexpression part of the pattern, that is, the
3855     regnum-th regstart pointer points to where in the pattern we began
3856     matching and the regnum-th regend points to right after where we
3857     stopped matching the regnum-th subexpression.  (The zeroth register
3858     keeps track of what the whole pattern matches.)  */
3859#ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global.  */
3860  const char **regstart, **regend;
3861#endif
3862
3863  /* If a group that's operated upon by a repetition operator fails to
3864     match anything, then the register for its start will need to be
3865     restored because it will have been set to wherever in the string we
3866     are when we last see its open-group operator.  Similarly for a
3867     register's end.  */
3868#ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global.  */
3869  const char **old_regstart, **old_regend;
3870#endif
3871
3872  /* The is_active field of reg_info helps us keep track of which (possibly
3873     nested) subexpressions we are currently in. The matched_something
3874     field of reg_info[reg_num] helps us tell whether or not we have
3875     matched any of the pattern so far this time through the reg_num-th
3876     subexpression.  These two fields get reset each time through any
3877     loop their register is in.  */
3878#ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global.  */
3879  register_info_type *reg_info;
3880#endif
3881
3882  /* The following record the register info as found in the above
3883     variables when we find a match better than any we've seen before.
3884     This happens as we backtrack through the failure points, which in
3885     turn happens only if we have not yet matched the entire string. */
3886  unsigned best_regs_set = false;
3887#ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global.  */
3888  const char **best_regstart, **best_regend;
3889#endif
3890
3891  /* Logically, this is `best_regend[0]'.  But we don't want to have to
3892     allocate space for that if we're not allocating space for anything
3893     else (see below).  Also, we never need info about register 0 for
3894     any of the other register vectors, and it seems rather a kludge to
3895     treat `best_regend' differently than the rest.  So we keep track of
3896     the end of the best match so far in a separate variable.  We
3897     initialize this to NULL so that when we backtrack the first time
3898     and need to test it, it's not garbage.  */
3899  const char *match_end = NULL;
3900
3901  /* This helps SET_REGS_MATCHED avoid doing redundant work.  */
3902  int set_regs_matched_done = 0;
3903
3904  /* Used when we pop values we don't care about.  */
3905#ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global.  */
3906  const char **reg_dummy;
3907  register_info_type *reg_info_dummy;
3908#endif
3909
3910#ifdef DEBUG
3911  /* Counts the total number of registers pushed.  */
3912  unsigned num_regs_pushed = 0;
3913#endif
3914
3915  DEBUG_PRINT1 ("\n\nEntering re_match_2.\n");
3916
3917  INIT_FAIL_STACK ();
3918
3919#ifdef MATCH_MAY_ALLOCATE
3920  /* Do not bother to initialize all the register variables if there are
3921     no groups in the pattern, as it takes a fair amount of time.  If
3922     there are groups, we include space for register 0 (the whole
3923     pattern), even though we never use it, since it simplifies the
3924     array indexing.  We should fix this.  */
3925  if (bufp->re_nsub)
3926    {
3927      regstart = REGEX_TALLOC (num_regs, const char *);
3928      regend = REGEX_TALLOC (num_regs, const char *);
3929      old_regstart = REGEX_TALLOC (num_regs, const char *);
3930      old_regend = REGEX_TALLOC (num_regs, const char *);
3931      best_regstart = REGEX_TALLOC (num_regs, const char *);
3932      best_regend = REGEX_TALLOC (num_regs, const char *);
3933      reg_info = REGEX_TALLOC (num_regs, register_info_type);
3934      reg_dummy = REGEX_TALLOC (num_regs, const char *);
3935      reg_info_dummy = REGEX_TALLOC (num_regs, register_info_type);
3936
3937      if (!(regstart && regend && old_regstart && old_regend && reg_info
3938            && best_regstart && best_regend && reg_dummy && reg_info_dummy))
3939        {
3940          FREE_VARIABLES ();
3941          return -2;
3942        }
3943    }
3944  else
3945    {
3946      /* We must initialize all our variables to NULL, so that
3947         `FREE_VARIABLES' doesn't try to free them.  */
3948      regstart = regend = old_regstart = old_regend = best_regstart
3949        = best_regend = reg_dummy = NULL;
3950      reg_info = reg_info_dummy = (register_info_type *) NULL;
3951    }
3952#endif /* MATCH_MAY_ALLOCATE */
3953
3954  /* The starting position is bogus.  */
3955  if (pos < 0 || pos > size1 + size2)
3956    {
3957      FREE_VARIABLES ();
3958      return -1;
3959    }
3960
3961  /* Initialize subexpression text positions to -1 to mark ones that no
3962     start_memory/stop_memory has been seen for. Also initialize the
3963     register information struct.  */
3964  for (mcnt = 1; (unsigned) mcnt < num_regs; mcnt++)
3965    {
3966      regstart[mcnt] = regend[mcnt]
3967        = old_regstart[mcnt] = old_regend[mcnt] = REG_UNSET_VALUE;
3968
3969      REG_MATCH_NULL_STRING_P (reg_info[mcnt]) = MATCH_NULL_UNSET_VALUE;
3970      IS_ACTIVE (reg_info[mcnt]) = 0;
3971      MATCHED_SOMETHING (reg_info[mcnt]) = 0;
3972      EVER_MATCHED_SOMETHING (reg_info[mcnt]) = 0;
3973    }
3974
3975  /* We move `string1' into `string2' if the latter's empty -- but not if
3976     `string1' is null.  */
3977  if (size2 == 0 && string1 != NULL)
3978    {
3979      string2 = string1;
3980      size2 = size1;
3981      string1 = 0;
3982      size1 = 0;
3983    }
3984  end1 = string1 + size1;
3985  end2 = string2 + size2;
3986
3987  /* Compute where to stop matching, within the two strings.  */
3988  if (stop <= size1)
3989    {
3990      end_match_1 = string1 + stop;
3991      end_match_2 = string2;
3992    }
3993  else
3994    {
3995      end_match_1 = end1;
3996      end_match_2 = string2 + stop - size1;
3997    }
3998
3999  /* `p' scans through the pattern as `d' scans through the data.
4000     `dend' is the end of the input string that `d' points within.  `d'
4001     is advanced into the following input string whenever necessary, but
4002     this happens before fetching; therefore, at the beginning of the
4003     loop, `d' can be pointing at the end of a string, but it cannot
4004     equal `string2'.  */
4005  if (size1 > 0 && pos <= size1)
4006    {
4007      d = string1 + pos;
4008      dend = end_match_1;
4009    }
4010  else
4011    {
4012      d = string2 + pos - size1;
4013      dend = end_match_2;
4014    }
4015
4016  DEBUG_PRINT1 ("The compiled pattern is:\n");
4017  DEBUG_PRINT_COMPILED_PATTERN (bufp, p, pend);
4018  DEBUG_PRINT1 ("The string to match is: `");
4019  DEBUG_PRINT_DOUBLE_STRING (d, string1, size1, string2, size2);
4020  DEBUG_PRINT1 ("'\n");
4021
4022  /* This loops over pattern commands.  It exits by returning from the
4023     function if the match is complete, or it drops through if the match
4024     fails at this starting point in the input data.  */
4025  for (;;)
4026    {
4027#ifdef _LIBC
4028      DEBUG_PRINT2 ("\n%p: ", p);
4029#else
4030      DEBUG_PRINT2 ("\n0x%x: ", p);
4031#endif
4032
4033      if (p == pend)
4034	{ /* End of pattern means we might have succeeded.  */
4035          DEBUG_PRINT1 ("end of pattern ... ");
4036
4037	  /* If we haven't matched the entire string, and we want the
4038             longest match, try backtracking.  */
4039          if (d != end_match_2)
4040	    {
4041	      /* 1 if this match ends in the same string (string1 or string2)
4042		 as the best previous match.  */
4043	      boolean same_str_p = (FIRST_STRING_P (match_end)
4044				    == MATCHING_IN_FIRST_STRING);
4045	      /* 1 if this match is the best seen so far.  */
4046	      boolean best_match_p;
4047
4048	      /* AIX compiler got confused when this was combined
4049		 with the previous declaration.  */
4050	      if (same_str_p)
4051		best_match_p = d > match_end;
4052	      else
4053		best_match_p = !MATCHING_IN_FIRST_STRING;
4054
4055              DEBUG_PRINT1 ("backtracking.\n");
4056
4057              if (!FAIL_STACK_EMPTY ())
4058                { /* More failure points to try.  */
4059
4060                  /* If exceeds best match so far, save it.  */
4061                  if (!best_regs_set || best_match_p)
4062                    {
4063                      best_regs_set = true;
4064                      match_end = d;
4065
4066                      DEBUG_PRINT1 ("\nSAVING match as best so far.\n");
4067
4068                      for (mcnt = 1; (unsigned) mcnt < num_regs; mcnt++)
4069                        {
4070                          best_regstart[mcnt] = regstart[mcnt];
4071                          best_regend[mcnt] = regend[mcnt];
4072                        }
4073                    }
4074                  goto fail;
4075                }
4076
4077              /* If no failure points, don't restore garbage.  And if
4078                 last match is real best match, don't restore second
4079                 best one. */
4080              else if (best_regs_set && !best_match_p)
4081                {
4082  	        restore_best_regs:
4083                  /* Restore best match.  It may happen that `dend ==
4084                     end_match_1' while the restored d is in string2.
4085                     For example, the pattern `x.*y.*z' against the
4086                     strings `x-' and `y-z-', if the two strings are
4087                     not consecutive in memory.  */
4088                  DEBUG_PRINT1 ("Restoring best registers.\n");
4089
4090                  d = match_end;
4091                  dend = ((d >= string1 && d <= end1)
4092		           ? end_match_1 : end_match_2);
4093
4094		  for (mcnt = 1; (unsigned) mcnt < num_regs; mcnt++)
4095		    {
4096		      regstart[mcnt] = best_regstart[mcnt];
4097		      regend[mcnt] = best_regend[mcnt];
4098		    }
4099                }
4100            } /* d != end_match_2 */
4101
4102	succeed_label:
4103          DEBUG_PRINT1 ("Accepting match.\n");
4104
4105          /* If caller wants register contents data back, do it.  */
4106          if (regs && !bufp->no_sub)
4107	    {
4108              /* Have the register data arrays been allocated?  */
4109              if (bufp->regs_allocated == REGS_UNALLOCATED)
4110                { /* No.  So allocate them with malloc.  We need one
4111                     extra element beyond `num_regs' for the `-1' marker
4112                     GNU code uses.  */
4113                  regs->num_regs = MAX (RE_NREGS, num_regs + 1);
4114                  regs->start = TALLOC (regs->num_regs, regoff_t);
4115                  regs->end = TALLOC (regs->num_regs, regoff_t);
4116                  if (regs->start == NULL || regs->end == NULL)
4117		    {
4118		      FREE_VARIABLES ();
4119		      return -2;
4120		    }
4121                  bufp->regs_allocated = REGS_REALLOCATE;
4122                }
4123              else if (bufp->regs_allocated == REGS_REALLOCATE)
4124                { /* Yes.  If we need more elements than were already
4125                     allocated, reallocate them.  If we need fewer, just
4126                     leave it alone.  */
4127                  if (regs->num_regs < num_regs + 1)
4128                    {
4129                      regs->num_regs = num_regs + 1;
4130                      RETALLOC (regs->start, regs->num_regs, regoff_t);
4131                      RETALLOC (regs->end, regs->num_regs, regoff_t);
4132                      if (regs->start == NULL || regs->end == NULL)
4133			{
4134			  FREE_VARIABLES ();
4135			  return -2;
4136			}
4137                    }
4138                }
4139              else
4140		{
4141		  /* These braces fend off a "empty body in an else-statement"
4142		     warning under GCC when assert expands to nothing.  */
4143		  assert (bufp->regs_allocated == REGS_FIXED);
4144		}
4145
4146              /* Convert the pointer data in `regstart' and `regend' to
4147                 indices.  Register zero has to be set differently,
4148                 since we haven't kept track of any info for it.  */
4149              if (regs->num_regs > 0)
4150                {
4151                  regs->start[0] = pos;
4152                  regs->end[0] = (MATCHING_IN_FIRST_STRING
4153				  ? ((regoff_t) (d - string1))
4154			          : ((regoff_t) (d - string2 + size1)));
4155                }
4156
4157              /* Go through the first `min (num_regs, regs->num_regs)'
4158                 registers, since that is all we initialized.  */
4159	      for (mcnt = 1; (unsigned) mcnt < MIN (num_regs, regs->num_regs);
4160		   mcnt++)
4161		{
4162                  if (REG_UNSET (regstart[mcnt]) || REG_UNSET (regend[mcnt]))
4163                    regs->start[mcnt] = regs->end[mcnt] = -1;
4164                  else
4165                    {
4166		      regs->start[mcnt]
4167			= (regoff_t) POINTER_TO_OFFSET (regstart[mcnt]);
4168                      regs->end[mcnt]
4169			= (regoff_t) POINTER_TO_OFFSET (regend[mcnt]);
4170                    }
4171		}
4172
4173              /* If the regs structure we return has more elements than
4174                 were in the pattern, set the extra elements to -1.  If
4175                 we (re)allocated the registers, this is the case,
4176                 because we always allocate enough to have at least one
4177                 -1 at the end.  */
4178              for (mcnt = num_regs; (unsigned) mcnt < regs->num_regs; mcnt++)
4179                regs->start[mcnt] = regs->end[mcnt] = -1;
4180	    } /* regs && !bufp->no_sub */
4181
4182          DEBUG_PRINT4 ("%u failure points pushed, %u popped (%u remain).\n",
4183                        nfailure_points_pushed, nfailure_points_popped,
4184                        nfailure_points_pushed - nfailure_points_popped);
4185          DEBUG_PRINT2 ("%u registers pushed.\n", num_regs_pushed);
4186
4187          mcnt = d - pos - (MATCHING_IN_FIRST_STRING
4188			    ? string1
4189			    : string2 - size1);
4190
4191          DEBUG_PRINT2 ("Returning %d from re_match_2.\n", mcnt);
4192
4193          FREE_VARIABLES ();
4194          return mcnt;
4195        }
4196
4197      /* Otherwise match next pattern command.  */
4198      switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++))
4199	{
4200        /* Ignore these.  Used to ignore the n of succeed_n's which
4201           currently have n == 0.  */
4202        case no_op:
4203          DEBUG_PRINT1 ("EXECUTING no_op.\n");
4204          break;
4205
4206	case succeed:
4207          DEBUG_PRINT1 ("EXECUTING succeed.\n");
4208	  goto succeed_label;
4209
4210        /* Match the next n pattern characters exactly.  The following
4211           byte in the pattern defines n, and the n bytes after that
4212           are the characters to match.  */
4213	case exactn:
4214	  mcnt = *p++;
4215          DEBUG_PRINT2 ("EXECUTING exactn %d.\n", mcnt);
4216
4217          /* This is written out as an if-else so we don't waste time
4218             testing `translate' inside the loop.  */
4219          if (translate)
4220	    {
4221	      do
4222		{
4223		  PREFETCH ();
4224		  if ((unsigned char) translate[(unsigned char) *d++]
4225		      != (unsigned char) *p++)
4226                    goto fail;
4227		}
4228	      while (--mcnt);
4229	    }
4230	  else
4231	    {
4232	      do
4233		{
4234		  PREFETCH ();
4235		  if (*d++ != (char) *p++) goto fail;
4236		}
4237	      while (--mcnt);
4238	    }
4239	  SET_REGS_MATCHED ();
4240          break;
4241
4242
4243        /* Match any character except possibly a newline or a null.  */
4244	case anychar:
4245          DEBUG_PRINT1 ("EXECUTING anychar.\n");
4246
4247          PREFETCH ();
4248
4249          if ((!(bufp->syntax & RE_DOT_NEWLINE) && TRANSLATE (*d) == '\n')
4250              || (bufp->syntax & RE_DOT_NOT_NULL && TRANSLATE (*d) == '\000'))
4251	    goto fail;
4252
4253          SET_REGS_MATCHED ();
4254          DEBUG_PRINT2 ("  Matched `%d'.\n", *d);
4255          d++;
4256	  break;
4257
4258
4259	case charset:
4260	case charset_not:
4261	  {
4262	    register unsigned char c;
4263	    boolean not = (re_opcode_t) *(p - 1) == charset_not;
4264
4265            DEBUG_PRINT2 ("EXECUTING charset%s.\n", not ? "_not" : "");
4266
4267	    PREFETCH ();
4268	    c = TRANSLATE (*d); /* The character to match.  */
4269
4270            /* Cast to `unsigned' instead of `unsigned char' in case the
4271               bit list is a full 32 bytes long.  */
4272	    if (c < (unsigned) (*p * BYTEWIDTH)
4273		&& p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
4274	      not = !not;
4275
4276	    p += 1 + *p;
4277
4278	    if (!not) goto fail;
4279
4280	    SET_REGS_MATCHED ();
4281            d++;
4282	    break;
4283	  }
4284
4285
4286        /* The beginning of a group is represented by start_memory.
4287           The arguments are the register number in the next byte, and the
4288           number of groups inner to this one in the next.  The text
4289           matched within the group is recorded (in the internal
4290           registers data structure) under the register number.  */
4291        case start_memory:
4292	  DEBUG_PRINT3 ("EXECUTING start_memory %d (%d):\n", *p, p[1]);
4293
4294          /* Find out if this group can match the empty string.  */
4295	  p1 = p;		/* To send to group_match_null_string_p.  */
4296
4297          if (REG_MATCH_NULL_STRING_P (reg_info[*p]) == MATCH_NULL_UNSET_VALUE)
4298            REG_MATCH_NULL_STRING_P (reg_info[*p])
4299              = group_match_null_string_p (&p1, pend, reg_info);
4300
4301          /* Save the position in the string where we were the last time
4302             we were at this open-group operator in case the group is
4303             operated upon by a repetition operator, e.g., with `(a*)*b'
4304             against `ab'; then we want to ignore where we are now in
4305             the string in case this attempt to match fails.  */
4306          old_regstart[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p])
4307                             ? REG_UNSET (regstart[*p]) ? d : regstart[*p]
4308                             : regstart[*p];
4309	  DEBUG_PRINT2 ("  old_regstart: %d\n",
4310			 POINTER_TO_OFFSET (old_regstart[*p]));
4311
4312          regstart[*p] = d;
4313	  DEBUG_PRINT2 ("  regstart: %d\n", POINTER_TO_OFFSET (regstart[*p]));
4314
4315          IS_ACTIVE (reg_info[*p]) = 1;
4316          MATCHED_SOMETHING (reg_info[*p]) = 0;
4317
4318	  /* Clear this whenever we change the register activity status.  */
4319	  set_regs_matched_done = 0;
4320
4321          /* This is the new highest active register.  */
4322          highest_active_reg = *p;
4323
4324          /* If nothing was active before, this is the new lowest active
4325             register.  */
4326          if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
4327            lowest_active_reg = *p;
4328
4329          /* Move past the register number and inner group count.  */
4330          p += 2;
4331	  just_past_start_mem = p;
4332
4333          break;
4334
4335
4336        /* The stop_memory opcode represents the end of a group.  Its
4337           arguments are the same as start_memory's: the register
4338           number, and the number of inner groups.  */
4339	case stop_memory:
4340	  DEBUG_PRINT3 ("EXECUTING stop_memory %d (%d):\n", *p, p[1]);
4341
4342          /* We need to save the string position the last time we were at
4343             this close-group operator in case the group is operated
4344             upon by a repetition operator, e.g., with `((a*)*(b*)*)*'
4345             against `aba'; then we want to ignore where we are now in
4346             the string in case this attempt to match fails.  */
4347          old_regend[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p])
4348                           ? REG_UNSET (regend[*p]) ? d : regend[*p]
4349			   : regend[*p];
4350	  DEBUG_PRINT2 ("      old_regend: %d\n",
4351			 POINTER_TO_OFFSET (old_regend[*p]));
4352
4353          regend[*p] = d;
4354	  DEBUG_PRINT2 ("      regend: %d\n", POINTER_TO_OFFSET (regend[*p]));
4355
4356          /* This register isn't active anymore.  */
4357          IS_ACTIVE (reg_info[*p]) = 0;
4358
4359	  /* Clear this whenever we change the register activity status.  */
4360	  set_regs_matched_done = 0;
4361
4362          /* If this was the only register active, nothing is active
4363             anymore.  */
4364          if (lowest_active_reg == highest_active_reg)
4365            {
4366              lowest_active_reg = NO_LOWEST_ACTIVE_REG;
4367              highest_active_reg = NO_HIGHEST_ACTIVE_REG;
4368            }
4369          else
4370            { /* We must scan for the new highest active register, since
4371                 it isn't necessarily one less than now: consider
4372                 (a(b)c(d(e)f)g).  When group 3 ends, after the f), the
4373                 new highest active register is 1.  */
4374              unsigned char r = *p - 1;
4375              while (r > 0 && !IS_ACTIVE (reg_info[r]))
4376                r--;
4377
4378              /* If we end up at register zero, that means that we saved
4379                 the registers as the result of an `on_failure_jump', not
4380                 a `start_memory', and we jumped to past the innermost
4381                 `stop_memory'.  For example, in ((.)*) we save
4382                 registers 1 and 2 as a result of the *, but when we pop
4383                 back to the second ), we are at the stop_memory 1.
4384                 Thus, nothing is active.  */
4385	      if (r == 0)
4386                {
4387                  lowest_active_reg = NO_LOWEST_ACTIVE_REG;
4388                  highest_active_reg = NO_HIGHEST_ACTIVE_REG;
4389                }
4390              else
4391                highest_active_reg = r;
4392            }
4393
4394          /* If just failed to match something this time around with a
4395             group that's operated on by a repetition operator, try to
4396             force exit from the ``loop'', and restore the register
4397             information for this group that we had before trying this
4398             last match.  */
4399          if ((!MATCHED_SOMETHING (reg_info[*p])
4400               || just_past_start_mem == p - 1)
4401	      && (p + 2) < pend)
4402            {
4403              boolean is_a_jump_n = false;
4404
4405              p1 = p + 2;
4406              mcnt = 0;
4407              switch ((re_opcode_t) *p1++)
4408                {
4409                  case jump_n:
4410		    is_a_jump_n = true;
4411                  case pop_failure_jump:
4412		  case maybe_pop_jump:
4413		  case jump:
4414		  case dummy_failure_jump:
4415                    EXTRACT_NUMBER_AND_INCR (mcnt, p1);
4416		    if (is_a_jump_n)
4417		      p1 += 2;
4418                    break;
4419
4420                  default:
4421                    /* do nothing */ ;
4422                }
4423	      p1 += mcnt;
4424
4425              /* If the next operation is a jump backwards in the pattern
4426	         to an on_failure_jump right before the start_memory
4427                 corresponding to this stop_memory, exit from the loop
4428                 by forcing a failure after pushing on the stack the
4429                 on_failure_jump's jump in the pattern, and d.  */
4430              if (mcnt < 0 && (re_opcode_t) *p1 == on_failure_jump
4431                  && (re_opcode_t) p1[3] == start_memory && p1[4] == *p)
4432		{
4433                  /* If this group ever matched anything, then restore
4434                     what its registers were before trying this last
4435                     failed match, e.g., with `(a*)*b' against `ab' for
4436                     regstart[1], and, e.g., with `((a*)*(b*)*)*'
4437                     against `aba' for regend[3].
4438
4439                     Also restore the registers for inner groups for,
4440                     e.g., `((a*)(b*))*' against `aba' (register 3 would
4441                     otherwise get trashed).  */
4442
4443                  if (EVER_MATCHED_SOMETHING (reg_info[*p]))
4444		    {
4445		      unsigned r;
4446
4447                      EVER_MATCHED_SOMETHING (reg_info[*p]) = 0;
4448
4449		      /* Restore this and inner groups' (if any) registers.  */
4450                      for (r = *p; r < (unsigned) *p + (unsigned) *(p + 1);
4451			   r++)
4452                        {
4453                          regstart[r] = old_regstart[r];
4454
4455                          /* xx why this test?  */
4456                          if (old_regend[r] >= regstart[r])
4457                            regend[r] = old_regend[r];
4458                        }
4459                    }
4460		  p1++;
4461                  EXTRACT_NUMBER_AND_INCR (mcnt, p1);
4462                  PUSH_FAILURE_POINT (p1 + mcnt, d, -2);
4463
4464                  goto fail;
4465                }
4466            }
4467
4468          /* Move past the register number and the inner group count.  */
4469          p += 2;
4470          break;
4471
4472
4473	/* \<digit> has been turned into a `duplicate' command which is
4474           followed by the numeric value of <digit> as the register number.  */
4475        case duplicate:
4476	  {
4477	    register const char *d2, *dend2;
4478	    int regno = *p++;   /* Get which register to match against.  */
4479	    DEBUG_PRINT2 ("EXECUTING duplicate %d.\n", regno);
4480
4481	    /* Can't back reference a group which we've never matched.  */
4482            if (REG_UNSET (regstart[regno]) || REG_UNSET (regend[regno]))
4483              goto fail;
4484
4485            /* Where in input to try to start matching.  */
4486            d2 = regstart[regno];
4487
4488            /* Where to stop matching; if both the place to start and
4489               the place to stop matching are in the same string, then
4490               set to the place to stop, otherwise, for now have to use
4491               the end of the first string.  */
4492
4493            dend2 = ((FIRST_STRING_P (regstart[regno])
4494		      == FIRST_STRING_P (regend[regno]))
4495		     ? regend[regno] : end_match_1);
4496	    for (;;)
4497	      {
4498		/* If necessary, advance to next segment in register
4499                   contents.  */
4500		while (d2 == dend2)
4501		  {
4502		    if (dend2 == end_match_2) break;
4503		    if (dend2 == regend[regno]) break;
4504
4505                    /* End of string1 => advance to string2. */
4506                    d2 = string2;
4507                    dend2 = regend[regno];
4508		  }
4509		/* At end of register contents => success */
4510		if (d2 == dend2) break;
4511
4512		/* If necessary, advance to next segment in data.  */
4513		PREFETCH ();
4514
4515		/* How many characters left in this segment to match.  */
4516		mcnt = dend - d;
4517
4518		/* Want how many consecutive characters we can match in
4519                   one shot, so, if necessary, adjust the count.  */
4520                if (mcnt > dend2 - d2)
4521		  mcnt = dend2 - d2;
4522
4523		/* Compare that many; failure if mismatch, else move
4524                   past them.  */
4525		if (translate
4526                    ? bcmp_translate (d, d2, mcnt, translate)
4527                    : memcmp (d, d2, mcnt))
4528		  goto fail;
4529		d += mcnt, d2 += mcnt;
4530
4531		/* Do this because we've match some characters.  */
4532		SET_REGS_MATCHED ();
4533	      }
4534	  }
4535	  break;
4536
4537
4538        /* begline matches the empty string at the beginning of the string
4539           (unless `not_bol' is set in `bufp'), and, if
4540           `newline_anchor' is set, after newlines.  */
4541	case begline:
4542          DEBUG_PRINT1 ("EXECUTING begline.\n");
4543
4544          if (AT_STRINGS_BEG (d))
4545            {
4546              if (!bufp->not_bol) break;
4547            }
4548          else if (d[-1] == '\n' && bufp->newline_anchor)
4549            {
4550              break;
4551            }
4552          /* In all other cases, we fail.  */
4553          goto fail;
4554
4555
4556        /* endline is the dual of begline.  */
4557	case endline:
4558          DEBUG_PRINT1 ("EXECUTING endline.\n");
4559
4560          if (AT_STRINGS_END (d))
4561            {
4562              if (!bufp->not_eol) break;
4563            }
4564
4565          /* We have to ``prefetch'' the next character.  */
4566          else if ((d == end1 ? *string2 : *d) == '\n'
4567                   && bufp->newline_anchor)
4568            {
4569              break;
4570            }
4571          goto fail;
4572
4573
4574	/* Match at the very beginning of the data.  */
4575        case begbuf:
4576          DEBUG_PRINT1 ("EXECUTING begbuf.\n");
4577          if (AT_STRINGS_BEG (d))
4578            break;
4579          goto fail;
4580
4581
4582	/* Match at the very end of the data.  */
4583        case endbuf:
4584          DEBUG_PRINT1 ("EXECUTING endbuf.\n");
4585	  if (AT_STRINGS_END (d))
4586	    break;
4587          goto fail;
4588
4589
4590        /* on_failure_keep_string_jump is used to optimize `.*\n'.  It
4591           pushes NULL as the value for the string on the stack.  Then
4592           `pop_failure_point' will keep the current value for the
4593           string, instead of restoring it.  To see why, consider
4594           matching `foo\nbar' against `.*\n'.  The .* matches the foo;
4595           then the . fails against the \n.  But the next thing we want
4596           to do is match the \n against the \n; if we restored the
4597           string value, we would be back at the foo.
4598
4599           Because this is used only in specific cases, we don't need to
4600           check all the things that `on_failure_jump' does, to make
4601           sure the right things get saved on the stack.  Hence we don't
4602           share its code.  The only reason to push anything on the
4603           stack at all is that otherwise we would have to change
4604           `anychar's code to do something besides goto fail in this
4605           case; that seems worse than this.  */
4606        case on_failure_keep_string_jump:
4607          DEBUG_PRINT1 ("EXECUTING on_failure_keep_string_jump");
4608
4609          EXTRACT_NUMBER_AND_INCR (mcnt, p);
4610#ifdef _LIBC
4611          DEBUG_PRINT3 (" %d (to %p):\n", mcnt, p + mcnt);
4612#else
4613          DEBUG_PRINT3 (" %d (to 0x%x):\n", mcnt, p + mcnt);
4614#endif
4615
4616          PUSH_FAILURE_POINT (p + mcnt, NULL, -2);
4617          break;
4618
4619
4620	/* Uses of on_failure_jump:
4621
4622           Each alternative starts with an on_failure_jump that points
4623           to the beginning of the next alternative.  Each alternative
4624           except the last ends with a jump that in effect jumps past
4625           the rest of the alternatives.  (They really jump to the
4626           ending jump of the following alternative, because tensioning
4627           these jumps is a hassle.)
4628
4629           Repeats start with an on_failure_jump that points past both
4630           the repetition text and either the following jump or
4631           pop_failure_jump back to this on_failure_jump.  */
4632	case on_failure_jump:
4633        on_failure:
4634          DEBUG_PRINT1 ("EXECUTING on_failure_jump");
4635
4636          EXTRACT_NUMBER_AND_INCR (mcnt, p);
4637#ifdef _LIBC
4638          DEBUG_PRINT3 (" %d (to %p)", mcnt, p + mcnt);
4639#else
4640          DEBUG_PRINT3 (" %d (to 0x%x)", mcnt, p + mcnt);
4641#endif
4642
4643          /* If this on_failure_jump comes right before a group (i.e.,
4644             the original * applied to a group), save the information
4645             for that group and all inner ones, so that if we fail back
4646             to this point, the group's information will be correct.
4647             For example, in \(a*\)*\1, we need the preceding group,
4648             and in \(zz\(a*\)b*\)\2, we need the inner group.  */
4649
4650          /* We can't use `p' to check ahead because we push
4651             a failure point to `p + mcnt' after we do this.  */
4652          p1 = p;
4653
4654          /* We need to skip no_op's before we look for the
4655             start_memory in case this on_failure_jump is happening as
4656             the result of a completed succeed_n, as in \(a\)\{1,3\}b\1
4657             against aba.  */
4658          while (p1 < pend && (re_opcode_t) *p1 == no_op)
4659            p1++;
4660
4661          if (p1 < pend && (re_opcode_t) *p1 == start_memory)
4662            {
4663              /* We have a new highest active register now.  This will
4664                 get reset at the start_memory we are about to get to,
4665                 but we will have saved all the registers relevant to
4666                 this repetition op, as described above.  */
4667              highest_active_reg = *(p1 + 1) + *(p1 + 2);
4668              if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
4669                lowest_active_reg = *(p1 + 1);
4670            }
4671
4672          DEBUG_PRINT1 (":\n");
4673          PUSH_FAILURE_POINT (p + mcnt, d, -2);
4674          break;
4675
4676
4677        /* A smart repeat ends with `maybe_pop_jump'.
4678	   We change it to either `pop_failure_jump' or `jump'.  */
4679        case maybe_pop_jump:
4680          EXTRACT_NUMBER_AND_INCR (mcnt, p);
4681          DEBUG_PRINT2 ("EXECUTING maybe_pop_jump %d.\n", mcnt);
4682          {
4683	    register unsigned char *p2 = p;
4684
4685            /* Compare the beginning of the repeat with what in the
4686               pattern follows its end. If we can establish that there
4687               is nothing that they would both match, i.e., that we
4688               would have to backtrack because of (as in, e.g., `a*a')
4689               then we can change to pop_failure_jump, because we'll
4690               never have to backtrack.
4691
4692               This is not true in the case of alternatives: in
4693               `(a|ab)*' we do need to backtrack to the `ab' alternative
4694               (e.g., if the string was `ab').  But instead of trying to
4695               detect that here, the alternative has put on a dummy
4696               failure point which is what we will end up popping.  */
4697
4698	    /* Skip over open/close-group commands.
4699	       If what follows this loop is a ...+ construct,
4700	       look at what begins its body, since we will have to
4701	       match at least one of that.  */
4702	    while (1)
4703	      {
4704		if (p2 + 2 < pend
4705		    && ((re_opcode_t) *p2 == stop_memory
4706			|| (re_opcode_t) *p2 == start_memory))
4707		  p2 += 3;
4708		else if (p2 + 6 < pend
4709			 && (re_opcode_t) *p2 == dummy_failure_jump)
4710		  p2 += 6;
4711		else
4712		  break;
4713	      }
4714
4715	    p1 = p + mcnt;
4716	    /* p1[0] ... p1[2] are the `on_failure_jump' corresponding
4717	       to the `maybe_finalize_jump' of this case.  Examine what
4718	       follows.  */
4719
4720            /* If we're at the end of the pattern, we can change.  */
4721            if (p2 == pend)
4722	      {
4723		/* Consider what happens when matching ":\(.*\)"
4724		   against ":/".  I don't really understand this code
4725		   yet.  */
4726  	        p[-3] = (unsigned char) pop_failure_jump;
4727                DEBUG_PRINT1
4728                  ("  End of pattern: change to `pop_failure_jump'.\n");
4729              }
4730
4731            else if ((re_opcode_t) *p2 == exactn
4732		     || (bufp->newline_anchor && (re_opcode_t) *p2 == endline))
4733	      {
4734		register unsigned char c
4735                  = *p2 == (unsigned char) endline ? '\n' : p2[2];
4736
4737                if ((re_opcode_t) p1[3] == exactn && p1[5] != c)
4738                  {
4739  		    p[-3] = (unsigned char) pop_failure_jump;
4740                    DEBUG_PRINT3 ("  %c != %c => pop_failure_jump.\n",
4741                                  c, p1[5]);
4742                  }
4743
4744		else if ((re_opcode_t) p1[3] == charset
4745			 || (re_opcode_t) p1[3] == charset_not)
4746		  {
4747		    int not = (re_opcode_t) p1[3] == charset_not;
4748
4749		    if (c < (unsigned char) (p1[4] * BYTEWIDTH)
4750			&& p1[5 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
4751		      not = !not;
4752
4753                    /* `not' is equal to 1 if c would match, which means
4754                        that we can't change to pop_failure_jump.  */
4755		    if (!not)
4756                      {
4757  		        p[-3] = (unsigned char) pop_failure_jump;
4758                        DEBUG_PRINT1 ("  No match => pop_failure_jump.\n");
4759                      }
4760		  }
4761	      }
4762            else if ((re_opcode_t) *p2 == charset)
4763	      {
4764#ifdef DEBUG
4765		register unsigned char c
4766                  = *p2 == (unsigned char) endline ? '\n' : p2[2];
4767#endif
4768
4769#if 0
4770                if ((re_opcode_t) p1[3] == exactn
4771		    && ! ((int) p2[1] * BYTEWIDTH > (int) p1[5]
4772			  && (p2[2 + p1[5] / BYTEWIDTH]
4773			      & (1 << (p1[5] % BYTEWIDTH)))))
4774#else
4775                if ((re_opcode_t) p1[3] == exactn
4776		    && ! ((int) p2[1] * BYTEWIDTH > (int) p1[4]
4777			  && (p2[2 + p1[4] / BYTEWIDTH]
4778			      & (1 << (p1[4] % BYTEWIDTH)))))
4779#endif
4780                  {
4781  		    p[-3] = (unsigned char) pop_failure_jump;
4782                    DEBUG_PRINT3 ("  %c != %c => pop_failure_jump.\n",
4783                                  c, p1[5]);
4784                  }
4785
4786		else if ((re_opcode_t) p1[3] == charset_not)
4787		  {
4788		    int idx;
4789		    /* We win if the charset_not inside the loop
4790		       lists every character listed in the charset after.  */
4791		    for (idx = 0; idx < (int) p2[1]; idx++)
4792		      if (! (p2[2 + idx] == 0
4793			     || (idx < (int) p1[4]
4794				 && ((p2[2 + idx] & ~ p1[5 + idx]) == 0))))
4795			break;
4796
4797		    if (idx == p2[1])
4798                      {
4799  		        p[-3] = (unsigned char) pop_failure_jump;
4800                        DEBUG_PRINT1 ("  No match => pop_failure_jump.\n");
4801                      }
4802		  }
4803		else if ((re_opcode_t) p1[3] == charset)
4804		  {
4805		    int idx;
4806		    /* We win if the charset inside the loop
4807		       has no overlap with the one after the loop.  */
4808		    for (idx = 0;
4809			 idx < (int) p2[1] && idx < (int) p1[4];
4810			 idx++)
4811		      if ((p2[2 + idx] & p1[5 + idx]) != 0)
4812			break;
4813
4814		    if (idx == p2[1] || idx == p1[4])
4815                      {
4816  		        p[-3] = (unsigned char) pop_failure_jump;
4817                        DEBUG_PRINT1 ("  No match => pop_failure_jump.\n");
4818                      }
4819		  }
4820	      }
4821	  }
4822	  p -= 2;		/* Point at relative address again.  */
4823	  if ((re_opcode_t) p[-1] != pop_failure_jump)
4824	    {
4825	      p[-1] = (unsigned char) jump;
4826              DEBUG_PRINT1 ("  Match => jump.\n");
4827	      goto unconditional_jump;
4828	    }
4829        /* Note fall through.  */
4830
4831
4832	/* The end of a simple repeat has a pop_failure_jump back to
4833           its matching on_failure_jump, where the latter will push a
4834           failure point.  The pop_failure_jump takes off failure
4835           points put on by this pop_failure_jump's matching
4836           on_failure_jump; we got through the pattern to here from the
4837           matching on_failure_jump, so didn't fail.  */
4838        case pop_failure_jump:
4839          {
4840            /* We need to pass separate storage for the lowest and
4841               highest registers, even though we don't care about the
4842               actual values.  Otherwise, we will restore only one
4843               register from the stack, since lowest will == highest in
4844               `pop_failure_point'.  */
4845            active_reg_t dummy_low_reg, dummy_high_reg;
4846            unsigned char *pdummy;
4847            const char *sdummy;
4848
4849            DEBUG_PRINT1 ("EXECUTING pop_failure_jump.\n");
4850            POP_FAILURE_POINT (sdummy, pdummy,
4851                               dummy_low_reg, dummy_high_reg,
4852                               reg_dummy, reg_dummy, reg_info_dummy);
4853          }
4854	  /* Note fall through.  */
4855
4856	unconditional_jump:
4857#ifdef _LIBC
4858	  DEBUG_PRINT2 ("\n%p: ", p);
4859#else
4860	  DEBUG_PRINT2 ("\n0x%x: ", p);
4861#endif
4862          /* Note fall through.  */
4863
4864        /* Unconditionally jump (without popping any failure points).  */
4865        case jump:
4866	  EXTRACT_NUMBER_AND_INCR (mcnt, p);	/* Get the amount to jump.  */
4867          DEBUG_PRINT2 ("EXECUTING jump %d ", mcnt);
4868	  p += mcnt;				/* Do the jump.  */
4869#ifdef _LIBC
4870          DEBUG_PRINT2 ("(to %p).\n", p);
4871#else
4872          DEBUG_PRINT2 ("(to 0x%x).\n", p);
4873#endif
4874	  break;
4875
4876
4877        /* We need this opcode so we can detect where alternatives end
4878           in `group_match_null_string_p' et al.  */
4879        case jump_past_alt:
4880          DEBUG_PRINT1 ("EXECUTING jump_past_alt.\n");
4881          goto unconditional_jump;
4882
4883
4884        /* Normally, the on_failure_jump pushes a failure point, which
4885           then gets popped at pop_failure_jump.  We will end up at
4886           pop_failure_jump, also, and with a pattern of, say, `a+', we
4887           are skipping over the on_failure_jump, so we have to push
4888           something meaningless for pop_failure_jump to pop.  */
4889        case dummy_failure_jump:
4890          DEBUG_PRINT1 ("EXECUTING dummy_failure_jump.\n");
4891          /* It doesn't matter what we push for the string here.  What
4892             the code at `fail' tests is the value for the pattern.  */
4893          PUSH_FAILURE_POINT (NULL, NULL, -2);
4894          goto unconditional_jump;
4895
4896
4897        /* At the end of an alternative, we need to push a dummy failure
4898           point in case we are followed by a `pop_failure_jump', because
4899           we don't want the failure point for the alternative to be
4900           popped.  For example, matching `(a|ab)*' against `aab'
4901           requires that we match the `ab' alternative.  */
4902        case push_dummy_failure:
4903          DEBUG_PRINT1 ("EXECUTING push_dummy_failure.\n");
4904          /* See comments just above at `dummy_failure_jump' about the
4905             two zeroes.  */
4906          PUSH_FAILURE_POINT (NULL, NULL, -2);
4907          break;
4908
4909        /* Have to succeed matching what follows at least n times.
4910           After that, handle like `on_failure_jump'.  */
4911        case succeed_n:
4912          EXTRACT_NUMBER (mcnt, p + 2);
4913          DEBUG_PRINT2 ("EXECUTING succeed_n %d.\n", mcnt);
4914
4915          assert (mcnt >= 0);
4916          /* Originally, this is how many times we HAVE to succeed.  */
4917          if (mcnt > 0)
4918            {
4919               mcnt--;
4920	       p += 2;
4921               STORE_NUMBER_AND_INCR (p, mcnt);
4922#ifdef _LIBC
4923               DEBUG_PRINT3 ("  Setting %p to %d.\n", p - 2, mcnt);
4924#else
4925               DEBUG_PRINT3 ("  Setting 0x%x to %d.\n", p - 2, mcnt);
4926#endif
4927            }
4928	  else if (mcnt == 0)
4929            {
4930#ifdef _LIBC
4931              DEBUG_PRINT2 ("  Setting two bytes from %p to no_op.\n", p+2);
4932#else
4933              DEBUG_PRINT2 ("  Setting two bytes from 0x%x to no_op.\n", p+2);
4934#endif
4935	      p[2] = (unsigned char) no_op;
4936              p[3] = (unsigned char) no_op;
4937              goto on_failure;
4938            }
4939          break;
4940
4941        case jump_n:
4942          EXTRACT_NUMBER (mcnt, p + 2);
4943          DEBUG_PRINT2 ("EXECUTING jump_n %d.\n", mcnt);
4944
4945          /* Originally, this is how many times we CAN jump.  */
4946          if (mcnt)
4947            {
4948               mcnt--;
4949               STORE_NUMBER (p + 2, mcnt);
4950#ifdef _LIBC
4951               DEBUG_PRINT3 ("  Setting %p to %d.\n", p + 2, mcnt);
4952#else
4953               DEBUG_PRINT3 ("  Setting 0x%x to %d.\n", p + 2, mcnt);
4954#endif
4955	       goto unconditional_jump;
4956            }
4957          /* If don't have to jump any more, skip over the rest of command.  */
4958	  else
4959	    p += 4;
4960          break;
4961
4962	case set_number_at:
4963	  {
4964            DEBUG_PRINT1 ("EXECUTING set_number_at.\n");
4965
4966            EXTRACT_NUMBER_AND_INCR (mcnt, p);
4967            p1 = p + mcnt;
4968            EXTRACT_NUMBER_AND_INCR (mcnt, p);
4969#ifdef _LIBC
4970            DEBUG_PRINT3 ("  Setting %p to %d.\n", p1, mcnt);
4971#else
4972            DEBUG_PRINT3 ("  Setting 0x%x to %d.\n", p1, mcnt);
4973#endif
4974	    STORE_NUMBER (p1, mcnt);
4975            break;
4976          }
4977
4978#if 0
4979	/* The DEC Alpha C compiler 3.x generates incorrect code for the
4980	   test  WORDCHAR_P (d - 1) != WORDCHAR_P (d)  in the expansion of
4981	   AT_WORD_BOUNDARY, so this code is disabled.  Expanding the
4982	   macro and introducing temporary variables works around the bug.  */
4983
4984	case wordbound:
4985	  DEBUG_PRINT1 ("EXECUTING wordbound.\n");
4986	  if (AT_WORD_BOUNDARY (d))
4987	    break;
4988	  goto fail;
4989
4990	case notwordbound:
4991	  DEBUG_PRINT1 ("EXECUTING notwordbound.\n");
4992	  if (AT_WORD_BOUNDARY (d))
4993	    goto fail;
4994	  break;
4995#else
4996	case wordbound:
4997	{
4998	  boolean prevchar, thischar;
4999
5000	  DEBUG_PRINT1 ("EXECUTING wordbound.\n");
5001	  if (AT_STRINGS_BEG (d) || AT_STRINGS_END (d))
5002	    break;
5003
5004	  prevchar = WORDCHAR_P (d - 1);
5005	  thischar = WORDCHAR_P (d);
5006	  if (prevchar != thischar)
5007	    break;
5008	  goto fail;
5009	}
5010
5011      case notwordbound:
5012	{
5013	  boolean prevchar, thischar;
5014
5015	  DEBUG_PRINT1 ("EXECUTING notwordbound.\n");
5016	  if (AT_STRINGS_BEG (d) || AT_STRINGS_END (d))
5017	    goto fail;
5018
5019	  prevchar = WORDCHAR_P (d - 1);
5020	  thischar = WORDCHAR_P (d);
5021	  if (prevchar != thischar)
5022	    goto fail;
5023	  break;
5024	}
5025#endif
5026
5027	case wordbeg:
5028          DEBUG_PRINT1 ("EXECUTING wordbeg.\n");
5029	  if (WORDCHAR_P (d) && (AT_STRINGS_BEG (d) || !WORDCHAR_P (d - 1)))
5030	    break;
5031          goto fail;
5032
5033	case wordend:
5034          DEBUG_PRINT1 ("EXECUTING wordend.\n");
5035	  if (!AT_STRINGS_BEG (d) && WORDCHAR_P (d - 1)
5036              && (!WORDCHAR_P (d) || AT_STRINGS_END (d)))
5037	    break;
5038          goto fail;
5039
5040#ifdef emacs
5041  	case before_dot:
5042          DEBUG_PRINT1 ("EXECUTING before_dot.\n");
5043 	  if (PTR_CHAR_POS ((unsigned char *) d) >= point)
5044  	    goto fail;
5045  	  break;
5046
5047  	case at_dot:
5048          DEBUG_PRINT1 ("EXECUTING at_dot.\n");
5049 	  if (PTR_CHAR_POS ((unsigned char *) d) != point)
5050  	    goto fail;
5051  	  break;
5052
5053  	case after_dot:
5054          DEBUG_PRINT1 ("EXECUTING after_dot.\n");
5055          if (PTR_CHAR_POS ((unsigned char *) d) <= point)
5056  	    goto fail;
5057  	  break;
5058
5059	case syntaxspec:
5060          DEBUG_PRINT2 ("EXECUTING syntaxspec %d.\n", mcnt);
5061	  mcnt = *p++;
5062	  goto matchsyntax;
5063
5064        case wordchar:
5065          DEBUG_PRINT1 ("EXECUTING Emacs wordchar.\n");
5066	  mcnt = (int) Sword;
5067        matchsyntax:
5068	  PREFETCH ();
5069	  /* Can't use *d++ here; SYNTAX may be an unsafe macro.  */
5070	  d++;
5071	  if (SYNTAX (d[-1]) != (enum syntaxcode) mcnt)
5072	    goto fail;
5073          SET_REGS_MATCHED ();
5074	  break;
5075
5076	case notsyntaxspec:
5077          DEBUG_PRINT2 ("EXECUTING notsyntaxspec %d.\n", mcnt);
5078	  mcnt = *p++;
5079	  goto matchnotsyntax;
5080
5081        case notwordchar:
5082          DEBUG_PRINT1 ("EXECUTING Emacs notwordchar.\n");
5083	  mcnt = (int) Sword;
5084        matchnotsyntax:
5085	  PREFETCH ();
5086	  /* Can't use *d++ here; SYNTAX may be an unsafe macro.  */
5087	  d++;
5088	  if (SYNTAX (d[-1]) == (enum syntaxcode) mcnt)
5089	    goto fail;
5090	  SET_REGS_MATCHED ();
5091          break;
5092
5093#else /* not emacs */
5094	case wordchar:
5095          DEBUG_PRINT1 ("EXECUTING non-Emacs wordchar.\n");
5096	  PREFETCH ();
5097          if (!WORDCHAR_P (d))
5098            goto fail;
5099	  SET_REGS_MATCHED ();
5100          d++;
5101	  break;
5102
5103	case notwordchar:
5104          DEBUG_PRINT1 ("EXECUTING non-Emacs notwordchar.\n");
5105	  PREFETCH ();
5106	  if (WORDCHAR_P (d))
5107            goto fail;
5108          SET_REGS_MATCHED ();
5109          d++;
5110	  break;
5111#endif /* not emacs */
5112
5113        default:
5114          abort ();
5115	}
5116      continue;  /* Successfully executed one pattern command; keep going.  */
5117
5118
5119    /* We goto here if a matching operation fails. */
5120    fail:
5121      if (!FAIL_STACK_EMPTY ())
5122	{ /* A restart point is known.  Restore to that state.  */
5123          DEBUG_PRINT1 ("\nFAIL:\n");
5124          POP_FAILURE_POINT (d, p,
5125                             lowest_active_reg, highest_active_reg,
5126                             regstart, regend, reg_info);
5127
5128          /* If this failure point is a dummy, try the next one.  */
5129          if (!p)
5130	    goto fail;
5131
5132          /* If we failed to the end of the pattern, don't examine *p.  */
5133	  assert (p <= pend);
5134          if (p < pend)
5135            {
5136              boolean is_a_jump_n = false;
5137
5138              /* If failed to a backwards jump that's part of a repetition
5139                 loop, need to pop this failure point and use the next one.  */
5140              switch ((re_opcode_t) *p)
5141                {
5142                case jump_n:
5143                  is_a_jump_n = true;
5144                case maybe_pop_jump:
5145                case pop_failure_jump:
5146                case jump:
5147                  p1 = p + 1;
5148                  EXTRACT_NUMBER_AND_INCR (mcnt, p1);
5149                  p1 += mcnt;
5150
5151                  if ((is_a_jump_n && (re_opcode_t) *p1 == succeed_n)
5152                      || (!is_a_jump_n
5153                          && (re_opcode_t) *p1 == on_failure_jump))
5154                    goto fail;
5155                  break;
5156                default:
5157                  /* do nothing */ ;
5158                }
5159            }
5160
5161          if (d >= string1 && d <= end1)
5162	    dend = end_match_1;
5163        }
5164      else
5165        break;   /* Matching at this starting point really fails.  */
5166    } /* for (;;) */
5167
5168  if (best_regs_set)
5169    goto restore_best_regs;
5170
5171  FREE_VARIABLES ();
5172
5173  return -1;         			/* Failure to match.  */
5174} /* re_match_2 */
5175
5176/* Subroutine definitions for re_match_2.  */
5177
5178
5179/* We are passed P pointing to a register number after a start_memory.
5180
5181   Return true if the pattern up to the corresponding stop_memory can
5182   match the empty string, and false otherwise.
5183
5184   If we find the matching stop_memory, sets P to point to one past its number.
5185   Otherwise, sets P to an undefined byte less than or equal to END.
5186
5187   We don't handle duplicates properly (yet).  */
5188
5189static boolean
5190group_match_null_string_p (p, end, reg_info)
5191    unsigned char **p, *end;
5192    register_info_type *reg_info;
5193{
5194  int mcnt;
5195  /* Point to after the args to the start_memory.  */
5196  unsigned char *p1 = *p + 2;
5197
5198  while (p1 < end)
5199    {
5200      /* Skip over opcodes that can match nothing, and return true or
5201	 false, as appropriate, when we get to one that can't, or to the
5202         matching stop_memory.  */
5203
5204      switch ((re_opcode_t) *p1)
5205        {
5206        /* Could be either a loop or a series of alternatives.  */
5207        case on_failure_jump:
5208          p1++;
5209          EXTRACT_NUMBER_AND_INCR (mcnt, p1);
5210
5211          /* If the next operation is not a jump backwards in the
5212	     pattern.  */
5213
5214	  if (mcnt >= 0)
5215	    {
5216              /* Go through the on_failure_jumps of the alternatives,
5217                 seeing if any of the alternatives cannot match nothing.
5218                 The last alternative starts with only a jump,
5219                 whereas the rest start with on_failure_jump and end
5220                 with a jump, e.g., here is the pattern for `a|b|c':
5221
5222                 /on_failure_jump/0/6/exactn/1/a/jump_past_alt/0/6
5223                 /on_failure_jump/0/6/exactn/1/b/jump_past_alt/0/3
5224                 /exactn/1/c
5225
5226                 So, we have to first go through the first (n-1)
5227                 alternatives and then deal with the last one separately.  */
5228
5229
5230              /* Deal with the first (n-1) alternatives, which start
5231                 with an on_failure_jump (see above) that jumps to right
5232                 past a jump_past_alt.  */
5233
5234              while ((re_opcode_t) p1[mcnt-3] == jump_past_alt)
5235                {
5236                  /* `mcnt' holds how many bytes long the alternative
5237                     is, including the ending `jump_past_alt' and
5238                     its number.  */
5239
5240                  if (!alt_match_null_string_p (p1, p1 + mcnt - 3,
5241				                      reg_info))
5242                    return false;
5243
5244                  /* Move to right after this alternative, including the
5245		     jump_past_alt.  */
5246                  p1 += mcnt;
5247
5248                  /* Break if it's the beginning of an n-th alternative
5249                     that doesn't begin with an on_failure_jump.  */
5250                  if ((re_opcode_t) *p1 != on_failure_jump)
5251                    break;
5252
5253		  /* Still have to check that it's not an n-th
5254		     alternative that starts with an on_failure_jump.  */
5255		  p1++;
5256                  EXTRACT_NUMBER_AND_INCR (mcnt, p1);
5257                  if ((re_opcode_t) p1[mcnt-3] != jump_past_alt)
5258                    {
5259		      /* Get to the beginning of the n-th alternative.  */
5260                      p1 -= 3;
5261                      break;
5262                    }
5263                }
5264
5265              /* Deal with the last alternative: go back and get number
5266                 of the `jump_past_alt' just before it.  `mcnt' contains
5267                 the length of the alternative.  */
5268              EXTRACT_NUMBER (mcnt, p1 - 2);
5269
5270              if (!alt_match_null_string_p (p1, p1 + mcnt, reg_info))
5271                return false;
5272
5273              p1 += mcnt;	/* Get past the n-th alternative.  */
5274            } /* if mcnt > 0 */
5275          break;
5276
5277
5278        case stop_memory:
5279	  assert (p1[1] == **p);
5280          *p = p1 + 2;
5281          return true;
5282
5283
5284        default:
5285          if (!common_op_match_null_string_p (&p1, end, reg_info))
5286            return false;
5287        }
5288    } /* while p1 < end */
5289
5290  return false;
5291} /* group_match_null_string_p */
5292
5293
5294/* Similar to group_match_null_string_p, but doesn't deal with alternatives:
5295   It expects P to be the first byte of a single alternative and END one
5296   byte past the last. The alternative can contain groups.  */
5297
5298static boolean
5299alt_match_null_string_p (p, end, reg_info)
5300    unsigned char *p, *end;
5301    register_info_type *reg_info;
5302{
5303  int mcnt;
5304  unsigned char *p1 = p;
5305
5306  while (p1 < end)
5307    {
5308      /* Skip over opcodes that can match nothing, and break when we get
5309         to one that can't.  */
5310
5311      switch ((re_opcode_t) *p1)
5312        {
5313	/* It's a loop.  */
5314        case on_failure_jump:
5315          p1++;
5316          EXTRACT_NUMBER_AND_INCR (mcnt, p1);
5317          p1 += mcnt;
5318          break;
5319
5320	default:
5321          if (!common_op_match_null_string_p (&p1, end, reg_info))
5322            return false;
5323        }
5324    }  /* while p1 < end */
5325
5326  return true;
5327} /* alt_match_null_string_p */
5328
5329
5330/* Deals with the ops common to group_match_null_string_p and
5331   alt_match_null_string_p.
5332
5333   Sets P to one after the op and its arguments, if any.  */
5334
5335static boolean
5336common_op_match_null_string_p (p, end, reg_info)
5337    unsigned char **p, *end;
5338    register_info_type *reg_info;
5339{
5340  int mcnt;
5341  boolean ret;
5342  int reg_no;
5343  unsigned char *p1 = *p;
5344
5345  switch ((re_opcode_t) *p1++)
5346    {
5347    case no_op:
5348    case begline:
5349    case endline:
5350    case begbuf:
5351    case endbuf:
5352    case wordbeg:
5353    case wordend:
5354    case wordbound:
5355    case notwordbound:
5356#ifdef emacs
5357    case before_dot:
5358    case at_dot:
5359    case after_dot:
5360#endif
5361      break;
5362
5363    case start_memory:
5364      reg_no = *p1;
5365      assert (reg_no > 0 && reg_no <= MAX_REGNUM);
5366      ret = group_match_null_string_p (&p1, end, reg_info);
5367
5368      /* Have to set this here in case we're checking a group which
5369         contains a group and a back reference to it.  */
5370
5371      if (REG_MATCH_NULL_STRING_P (reg_info[reg_no]) == MATCH_NULL_UNSET_VALUE)
5372        REG_MATCH_NULL_STRING_P (reg_info[reg_no]) = ret;
5373
5374      if (!ret)
5375        return false;
5376      break;
5377
5378    /* If this is an optimized succeed_n for zero times, make the jump.  */
5379    case jump:
5380      EXTRACT_NUMBER_AND_INCR (mcnt, p1);
5381      if (mcnt >= 0)
5382        p1 += mcnt;
5383      else
5384        return false;
5385      break;
5386
5387    case succeed_n:
5388      /* Get to the number of times to succeed.  */
5389      p1 += 2;
5390      EXTRACT_NUMBER_AND_INCR (mcnt, p1);
5391
5392      if (mcnt == 0)
5393        {
5394          p1 -= 4;
5395          EXTRACT_NUMBER_AND_INCR (mcnt, p1);
5396          p1 += mcnt;
5397        }
5398      else
5399        return false;
5400      break;
5401
5402    case duplicate:
5403      if (!REG_MATCH_NULL_STRING_P (reg_info[*p1]))
5404        return false;
5405      break;
5406
5407    case set_number_at:
5408      p1 += 4;
5409
5410    default:
5411      /* All other opcodes mean we cannot match the empty string.  */
5412      return false;
5413  }
5414
5415  *p = p1;
5416  return true;
5417} /* common_op_match_null_string_p */
5418
5419
5420/* Return zero if TRANSLATE[S1] and TRANSLATE[S2] are identical for LEN
5421   bytes; nonzero otherwise.  */
5422
5423static int
5424bcmp_translate (s1, s2, len, translate)
5425     const char *s1, *s2;
5426     register int len;
5427     RE_TRANSLATE_TYPE translate;
5428{
5429  register const unsigned char *p1 = (const unsigned char *) s1;
5430  register const unsigned char *p2 = (const unsigned char *) s2;
5431  while (len)
5432    {
5433      if (translate[*p1++] != translate[*p2++]) return 1;
5434      len--;
5435    }
5436  return 0;
5437}
5438
5439/* Entry points for GNU code.  */
5440
5441/* re_compile_pattern is the GNU regular expression compiler: it
5442   compiles PATTERN (of length SIZE) and puts the result in BUFP.
5443   Returns 0 if the pattern was valid, otherwise an error string.
5444
5445   Assumes the `allocated' (and perhaps `buffer') and `translate' fields
5446   are set in BUFP on entry.
5447
5448   We call regex_compile to do the actual compilation.  */
5449
5450const char *
5451re_compile_pattern (pattern, length, bufp)
5452     const char *pattern;
5453     size_t length;
5454     struct re_pattern_buffer *bufp;
5455{
5456  reg_errcode_t ret;
5457
5458  /* GNU code is written to assume at least RE_NREGS registers will be set
5459     (and at least one extra will be -1).  */
5460  bufp->regs_allocated = REGS_UNALLOCATED;
5461
5462  /* And GNU code determines whether or not to get register information
5463     by passing null for the REGS argument to re_match, etc., not by
5464     setting no_sub.  */
5465  bufp->no_sub = 0;
5466
5467  /* Match anchors at newline.  */
5468  bufp->newline_anchor = 1;
5469
5470  ret = regex_compile (pattern, length, re_syntax_options, bufp);
5471
5472  if (!ret)
5473    return NULL;
5474  return gettext (re_error_msgid[(int) ret]);
5475}
5476#ifdef _LIBC
5477weak_alias (__re_compile_pattern, re_compile_pattern)
5478#endif
5479
5480/* Entry points compatible with 4.2 BSD regex library.  We don't define
5481   them unless specifically requested.  */
5482
5483#if defined _REGEX_RE_COMP || defined _LIBC
5484
5485/* BSD has one and only one pattern buffer.  */
5486static struct re_pattern_buffer re_comp_buf;
5487
5488char *
5489#ifdef _LIBC
5490/* Make these definitions weak in libc, so POSIX programs can redefine
5491   these names if they don't use our functions, and still use
5492   regcomp/regexec below without link errors.  */
5493weak_function
5494#endif
5495re_comp (s)
5496    const char *s;
5497{
5498  reg_errcode_t ret;
5499
5500  if (!s)
5501    {
5502      if (!re_comp_buf.buffer)
5503	return gettext ("No previous regular expression");
5504      return 0;
5505    }
5506
5507  if (!re_comp_buf.buffer)
5508    {
5509      re_comp_buf.buffer = (unsigned char *) malloc (200);
5510      if (re_comp_buf.buffer == NULL)
5511        return (char *) gettext (re_error_msgid[(int) REG_ESPACE]);
5512      re_comp_buf.allocated = 200;
5513
5514      re_comp_buf.fastmap = (char *) malloc (1 << BYTEWIDTH);
5515      if (re_comp_buf.fastmap == NULL)
5516	return (char *) gettext (re_error_msgid[(int) REG_ESPACE]);
5517    }
5518
5519  /* Since `re_exec' always passes NULL for the `regs' argument, we
5520     don't need to initialize the pattern buffer fields which affect it.  */
5521
5522  /* Match anchors at newlines.  */
5523  re_comp_buf.newline_anchor = 1;
5524
5525  ret = regex_compile (s, strlen (s), re_syntax_options, &re_comp_buf);
5526
5527  if (!ret)
5528    return NULL;
5529
5530  /* Yes, we're discarding `const' here if !HAVE_LIBINTL.  */
5531  return (char *) gettext (re_error_msgid[(int) ret]);
5532}
5533
5534
5535int
5536#ifdef _LIBC
5537weak_function
5538#endif
5539re_exec (s)
5540    const char *s;
5541{
5542  const int len = strlen (s);
5543  return
5544    0 <= re_search (&re_comp_buf, s, len, 0, len, (struct re_registers *) 0);
5545}
5546
5547#endif /* _REGEX_RE_COMP */
5548
5549/* POSIX.2 functions.  Don't define these for Emacs.  */
5550
5551#ifndef emacs
5552
5553/* regcomp takes a regular expression as a string and compiles it.
5554
5555   PREG is a regex_t *.  We do not expect any fields to be initialized,
5556   since POSIX says we shouldn't.  Thus, we set
5557
5558     `buffer' to the compiled pattern;
5559     `used' to the length of the compiled pattern;
5560     `syntax' to RE_SYNTAX_POSIX_EXTENDED if the
5561       REG_EXTENDED bit in CFLAGS is set; otherwise, to
5562       RE_SYNTAX_POSIX_BASIC;
5563     `newline_anchor' to REG_NEWLINE being set in CFLAGS;
5564     `fastmap' and `fastmap_accurate' to zero;
5565     `re_nsub' to the number of subexpressions in PATTERN.
5566
5567   PATTERN is the address of the pattern string.
5568
5569   CFLAGS is a series of bits which affect compilation.
5570
5571     If REG_EXTENDED is set, we use POSIX extended syntax; otherwise, we
5572     use POSIX basic syntax.
5573
5574     If REG_NEWLINE is set, then . and [^...] don't match newline.
5575     Also, regexec will try a match beginning after every newline.
5576
5577     If REG_ICASE is set, then we considers upper- and lowercase
5578     versions of letters to be equivalent when matching.
5579
5580     If REG_NOSUB is set, then when PREG is passed to regexec, that
5581     routine will report only success or failure, and nothing about the
5582     registers.
5583
5584   It returns 0 if it succeeds, nonzero if it doesn't.  (See gnu-regex.h for
5585   the return codes and their meanings.)  */
5586
5587int
5588regcomp (preg, pattern, cflags)
5589    regex_t *preg;
5590    const char *pattern;
5591    int cflags;
5592{
5593  reg_errcode_t ret;
5594  reg_syntax_t syntax
5595    = (cflags & REG_EXTENDED) ?
5596      RE_SYNTAX_POSIX_EXTENDED : RE_SYNTAX_POSIX_BASIC;
5597
5598  /* regex_compile will allocate the space for the compiled pattern.  */
5599  preg->buffer = 0;
5600  preg->allocated = 0;
5601  preg->used = 0;
5602
5603  /* Don't bother to use a fastmap when searching.  This simplifies the
5604     REG_NEWLINE case: if we used a fastmap, we'd have to put all the
5605     characters after newlines into the fastmap.  This way, we just try
5606     every character.  */
5607  preg->fastmap = 0;
5608
5609  if (cflags & REG_ICASE)
5610    {
5611      unsigned i;
5612
5613      preg->translate
5614	= (RE_TRANSLATE_TYPE) malloc (CHAR_SET_SIZE
5615				      * sizeof (*(RE_TRANSLATE_TYPE)0));
5616      if (preg->translate == NULL)
5617        return (int) REG_ESPACE;
5618
5619      /* Map uppercase characters to corresponding lowercase ones.  */
5620      for (i = 0; i < CHAR_SET_SIZE; i++)
5621        preg->translate[i] = ISUPPER (i) ? tolower (i) : i;
5622    }
5623  else
5624    preg->translate = NULL;
5625
5626  /* If REG_NEWLINE is set, newlines are treated differently.  */
5627  if (cflags & REG_NEWLINE)
5628    { /* REG_NEWLINE implies neither . nor [^...] match newline.  */
5629      syntax &= ~RE_DOT_NEWLINE;
5630      syntax |= RE_HAT_LISTS_NOT_NEWLINE;
5631      /* It also changes the matching behavior.  */
5632      preg->newline_anchor = 1;
5633    }
5634  else
5635    preg->newline_anchor = 0;
5636
5637  preg->no_sub = !!(cflags & REG_NOSUB);
5638
5639  /* POSIX says a null character in the pattern terminates it, so we
5640     can use strlen here in compiling the pattern.  */
5641  ret = regex_compile (pattern, strlen (pattern), syntax, preg);
5642
5643  /* POSIX doesn't distinguish between an unmatched open-group and an
5644     unmatched close-group: both are REG_EPAREN.  */
5645  if (ret == REG_ERPAREN) ret = REG_EPAREN;
5646
5647  return (int) ret;
5648}
5649#ifdef _LIBC
5650weak_alias (__regcomp, regcomp)
5651#endif
5652
5653
5654/* regexec searches for a given pattern, specified by PREG, in the
5655   string STRING.
5656
5657   If NMATCH is zero or REG_NOSUB was set in the cflags argument to
5658   `regcomp', we ignore PMATCH.  Otherwise, we assume PMATCH has at
5659   least NMATCH elements, and we set them to the offsets of the
5660   corresponding matched substrings.
5661
5662   EFLAGS specifies `execution flags' which affect matching: if
5663   REG_NOTBOL is set, then ^ does not match at the beginning of the
5664   string; if REG_NOTEOL is set, then $ does not match at the end.
5665
5666   We return 0 if we find a match and REG_NOMATCH if not.  */
5667
5668int
5669regexec (preg, string, nmatch, pmatch, eflags)
5670    const regex_t *preg;
5671    const char *string;
5672    size_t nmatch;
5673    regmatch_t pmatch[];
5674    int eflags;
5675{
5676  int ret;
5677  struct re_registers regs;
5678  regex_t private_preg;
5679  int len = strlen (string);
5680  boolean want_reg_info = !preg->no_sub && nmatch > 0;
5681
5682  private_preg = *preg;
5683
5684  private_preg.not_bol = !!(eflags & REG_NOTBOL);
5685  private_preg.not_eol = !!(eflags & REG_NOTEOL);
5686
5687  /* The user has told us exactly how many registers to return
5688     information about, via `nmatch'.  We have to pass that on to the
5689     matching routines.  */
5690  private_preg.regs_allocated = REGS_FIXED;
5691
5692  if (want_reg_info)
5693    {
5694      regs.num_regs = nmatch;
5695      regs.start = TALLOC (nmatch, regoff_t);
5696      regs.end = TALLOC (nmatch, regoff_t);
5697      if (regs.start == NULL || regs.end == NULL)
5698        return (int) REG_NOMATCH;
5699    }
5700
5701  /* Perform the searching operation.  */
5702  ret = re_search (&private_preg, string, len,
5703                   /* start: */ 0, /* range: */ len,
5704                   want_reg_info ? &regs : (struct re_registers *) 0);
5705
5706  /* Copy the register information to the POSIX structure.  */
5707  if (want_reg_info)
5708    {
5709      if (ret >= 0)
5710        {
5711          unsigned r;
5712
5713          for (r = 0; r < nmatch; r++)
5714            {
5715              pmatch[r].rm_so = regs.start[r];
5716              pmatch[r].rm_eo = regs.end[r];
5717            }
5718        }
5719
5720      /* If we needed the temporary register info, free the space now.  */
5721      free (regs.start);
5722      free (regs.end);
5723    }
5724
5725  /* We want zero return to mean success, unlike `re_search'.  */
5726  return ret >= 0 ? (int) REG_NOERROR : (int) REG_NOMATCH;
5727}
5728#ifdef _LIBC
5729weak_alias (__regexec, regexec)
5730#endif
5731
5732
5733/* Returns a message corresponding to an error code, ERRCODE, returned
5734   from either regcomp or regexec.   We don't use PREG here.  */
5735
5736size_t
5737__regerror (errcode, preg, errbuf, errbuf_size)
5738    int errcode;
5739    const regex_t *preg;
5740    char *errbuf;
5741    size_t errbuf_size;
5742{
5743  const char *msg;
5744  size_t msg_size;
5745
5746  if (errcode < 0
5747      || errcode >= (int) (sizeof (re_error_msgid)
5748			   / sizeof (re_error_msgid[0])))
5749    /* Only error codes returned by the rest of the code should be passed
5750       to this routine.  If we are given anything else, or if other regex
5751       code generates an invalid error code, then the program has a bug.
5752       Dump core so we can fix it.  */
5753    abort ();
5754
5755  msg = gettext (re_error_msgid[errcode]);
5756
5757  msg_size = strlen (msg) + 1; /* Includes the null.  */
5758
5759  if (errbuf_size != 0)
5760    {
5761      if (msg_size > errbuf_size)
5762        {
5763#if defined HAVE_MEMPCPY || defined _LIBC
5764	  *((char *) __mempcpy (errbuf, msg, errbuf_size - 1)) = '\0';
5765#else
5766          memcpy (errbuf, msg, errbuf_size - 1);
5767          errbuf[errbuf_size - 1] = 0;
5768#endif
5769        }
5770      else
5771        memcpy (errbuf, msg, msg_size);
5772    }
5773
5774  return msg_size;
5775}
5776#ifdef _LIBC
5777weak_alias (__regerror, regerror)
5778#endif
5779
5780
5781/* Free dynamically allocated space used by PREG.  */
5782
5783void
5784regfree (preg)
5785    regex_t *preg;
5786{
5787  if (preg->buffer != NULL)
5788    free (preg->buffer);
5789  preg->buffer = NULL;
5790
5791  preg->allocated = 0;
5792  preg->used = 0;
5793
5794  if (preg->fastmap != NULL)
5795    free (preg->fastmap);
5796  preg->fastmap = NULL;
5797  preg->fastmap_accurate = 0;
5798
5799  if (preg->translate != NULL)
5800    free (preg->translate);
5801  preg->translate = NULL;
5802}
5803#ifdef _LIBC
5804weak_alias (__regfree, regfree)
5805#endif
5806
5807#endif /* not emacs  */
5808