1/* Extended regular expression matching and search library,
2   version 0.12.
3   (Implements POSIX draft P1003.2/D11.2, except for some of the
4   internationalization features.)
5   Copyright (C) 1993, 94, 95, 96, 97, 98, 99 Free Software Foundation, Inc.
6
7   The GNU C Library is free software; you can redistribute it and/or
8   modify it under the terms of the GNU Library General Public License as
9   published by the Free Software Foundation; either version 2 of the
10   License, or (at your option) any later version.
11
12   The GNU C Library is distributed in the hope that it will be useful,
13   but WITHOUT ANY WARRANTY; without even the implied warranty of
14   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15   Library General Public License for more details.
16
17   You should have received a copy of the GNU Library General Public
18   License along with the GNU C Library; see the file COPYING.LIB.  If not,
19   write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20   Boston, MA 02111-1307, USA.  */
21
22/* AIX requires this to be the first thing in the file. */
23#if defined _AIX && !defined REGEX_MALLOC
24  #pragma alloca
25#endif
26
27#undef	_GNU_SOURCE
28#define _GNU_SOURCE
29
30#ifdef HAVE_CONFIG_H
31# include <config.h>
32#endif
33
34#ifndef PARAMS
35# if defined __GNUC__ || (defined __STDC__ && __STDC__)
36#  define PARAMS(args) args
37# else
38#  define PARAMS(args) ()
39# endif  /* GCC.  */
40#endif  /* Not PARAMS.  */
41
42#if defined STDC_HEADERS && !defined emacs
43# include <stddef.h>
44#else
45/* We need this for `regex.h', and perhaps for the Emacs include files.  */
46# include <sys/types.h>
47#endif
48
49#define WIDE_CHAR_SUPPORT (HAVE_WCTYPE_H && HAVE_WCHAR_H && HAVE_BTOWC)
50
51/* For platform which support the ISO C amendement 1 functionality we
52   support user defined character classes.  */
53#if defined _LIBC || WIDE_CHAR_SUPPORT
54/* Solaris 2.5 has a bug: <wchar.h> must be included before <wctype.h>.  */
55# include <wchar.h>
56# include <wctype.h>
57#endif
58
59#ifdef _LIBC
60/* We have to keep the namespace clean.  */
61# define regfree(preg) __regfree (preg)
62# define regexec(pr, st, nm, pm, ef) __regexec (pr, st, nm, pm, ef)
63# define regcomp(preg, pattern, cflags) __regcomp (preg, pattern, cflags)
64# define regerror(errcode, preg, errbuf, errbuf_size) \
65	__regerror(errcode, preg, errbuf, errbuf_size)
66# define re_set_registers(bu, re, nu, st, en) \
67	__re_set_registers (bu, re, nu, st, en)
68# define re_match_2(bufp, string1, size1, string2, size2, pos, regs, stop) \
69	__re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
70# define re_match(bufp, string, size, pos, regs) \
71	__re_match (bufp, string, size, pos, regs)
72# define re_search(bufp, string, size, startpos, range, regs) \
73	__re_search (bufp, string, size, startpos, range, regs)
74# define re_compile_pattern(pattern, length, bufp) \
75	__re_compile_pattern (pattern, length, bufp)
76# define re_set_syntax(syntax) __re_set_syntax (syntax)
77# define re_search_2(bufp, st1, s1, st2, s2, startpos, range, regs, stop) \
78	__re_search_2 (bufp, st1, s1, st2, s2, startpos, range, regs, stop)
79# define re_compile_fastmap(bufp) __re_compile_fastmap (bufp)
80
81#define btowc __btowc
82#endif
83
84/* This is for other GNU distributions with internationalized messages.  */
85#if HAVE_LIBINTL_H || defined _LIBC
86# include <libintl.h>
87#else
88# define gettext(msgid) (msgid)
89#endif
90
91#ifndef gettext_noop
92/* This define is so xgettext can find the internationalizable
93   strings.  */
94# define gettext_noop(String) String
95#endif
96
97/* The `emacs' switch turns on certain matching commands
98   that make sense only in Emacs. */
99#ifdef emacs
100
101# include "lisp.h"
102# include "buffer.h"
103# include "syntax.h"
104
105#else  /* not emacs */
106
107/* If we are not linking with Emacs proper,
108   we can't use the relocating allocator
109   even if config.h says that we can.  */
110# undef REL_ALLOC
111
112# if defined STDC_HEADERS || defined _LIBC
113#  include <stdlib.h>
114# else
115char *malloc ();
116char *realloc ();
117# endif
118
119/* When used in Emacs's lib-src, we need to get bzero and bcopy somehow.
120   If nothing else has been done, use the method below.  */
121# ifdef INHIBIT_STRING_HEADER
122#  if !(defined HAVE_BZERO && defined HAVE_BCOPY)
123#   if !defined bzero && !defined bcopy
124#    undef INHIBIT_STRING_HEADER
125#   endif
126#  endif
127# endif
128
129/* This is the normal way of making sure we have a bcopy and a bzero.
130   This is used in most programs--a few other programs avoid this
131   by defining INHIBIT_STRING_HEADER.  */
132# ifndef INHIBIT_STRING_HEADER
133#  if defined HAVE_STRING_H || defined STDC_HEADERS || defined _LIBC
134#   include <string.h>
135#   ifndef bzero
136#    ifndef _LIBC
137#     define bzero(s, n)	(memset (s, '\0', n), (s))
138#    else
139#     define bzero(s, n)	__bzero (s, n)
140#    endif
141#   endif
142#  else
143#   include <strings.h>
144#   ifndef memcmp
145#    define memcmp(s1, s2, n)	bcmp (s1, s2, n)
146#   endif
147#   ifndef memcpy
148#    define memcpy(d, s, n)	(bcopy (s, d, n), (d))
149#   endif
150#  endif
151# endif
152
153/* Define the syntax stuff for \<, \>, etc.  */
154
155/* This must be nonzero for the wordchar and notwordchar pattern
156   commands in re_match_2.  */
157# ifndef Sword
158#  define Sword 1
159# endif
160
161# ifdef SWITCH_ENUM_BUG
162#  define SWITCH_ENUM_CAST(x) ((int)(x))
163# else
164#  define SWITCH_ENUM_CAST(x) (x)
165# endif
166
167/* How many characters in the character set.  */
168# define CHAR_SET_SIZE 256
169
170# ifdef SYNTAX_TABLE
171
172extern char *re_syntax_table;
173
174# else /* not SYNTAX_TABLE */
175
176static char re_syntax_table[CHAR_SET_SIZE];
177
178static void
179init_syntax_once ()
180{
181   register int c;
182   static int done;
183
184   if (done)
185     return;
186
187   bzero (re_syntax_table, sizeof re_syntax_table);
188
189   for (c = 'a'; c <= 'z'; c++)
190     re_syntax_table[c] = Sword;
191
192   for (c = 'A'; c <= 'Z'; c++)
193     re_syntax_table[c] = Sword;
194
195   for (c = '0'; c <= '9'; c++)
196     re_syntax_table[c] = Sword;
197
198   re_syntax_table['_'] = Sword;
199
200   done = 1;
201}
202
203# endif /* not SYNTAX_TABLE */
204
205# define SYNTAX(c) re_syntax_table[c]
206
207#endif /* not emacs */
208
209/* Get the interface, including the syntax bits.  */
210#include <regex-gnu.h>
211
212/* isalpha etc. are used for the character classes.  */
213#include <ctype.h>
214
215/* Jim Meyering writes:
216
217   "... Some ctype macros are valid only for character codes that
218   isascii says are ASCII (SGI's IRIX-4.0.5 is one such system --when
219   using /bin/cc or gcc but without giving an ansi option).  So, all
220   ctype uses should be through macros like ISPRINT...  If
221   STDC_HEADERS is defined, then autoconf has verified that the ctype
222   macros don't need to be guarded with references to isascii. ...
223   Defining isascii to 1 should let any compiler worth its salt
224   eliminate the && through constant folding."
225   Solaris defines some of these symbols so we must undefine them first.  */
226
227#undef ISASCII
228#if defined STDC_HEADERS || (!defined isascii && !defined HAVE_ISASCII)
229# define ISASCII(c) 1
230#else
231# define ISASCII(c) isascii(c)
232#endif
233
234#ifdef isblank
235# define ISBLANK(c) (ISASCII (c) && isblank (c))
236#else
237# define ISBLANK(c) ((c) == ' ' || (c) == '\t')
238#endif
239#ifdef isgraph
240# define ISGRAPH(c) (ISASCII (c) && isgraph (c))
241#else
242# define ISGRAPH(c) (ISASCII (c) && isprint (c) && !isspace (c))
243#endif
244
245#undef ISPRINT
246#define ISPRINT(c) (ISASCII (c) && isprint (c))
247#define ISDIGIT(c) (ISASCII (c) && isdigit (c))
248#define ISALNUM(c) (ISASCII (c) && isalnum (c))
249#define ISALPHA(c) (ISASCII (c) && isalpha (c))
250#define ISCNTRL(c) (ISASCII (c) && iscntrl (c))
251#define ISLOWER(c) (ISASCII (c) && islower (c))
252#define ISPUNCT(c) (ISASCII (c) && ispunct (c))
253#define ISSPACE(c) (ISASCII (c) && isspace (c))
254#define ISUPPER(c) (ISASCII (c) && isupper (c))
255#define ISXDIGIT(c) (ISASCII (c) && isxdigit (c))
256
257#ifdef _tolower
258# define TOLOWER(c) _tolower(c)
259#else
260# define TOLOWER(c) tolower(c)
261#endif
262
263#ifndef NULL
264# define NULL (void *)0
265#endif
266
267/* We remove any previous definition of `SIGN_EXTEND_CHAR',
268   since ours (we hope) works properly with all combinations of
269   machines, compilers, `char' and `unsigned char' argument types.
270   (Per Bothner suggested the basic approach.)  */
271#undef SIGN_EXTEND_CHAR
272#if __STDC__
273# define SIGN_EXTEND_CHAR(c) ((signed char) (c))
274#else  /* not __STDC__ */
275/* As in Harbison and Steele.  */
276# define SIGN_EXTEND_CHAR(c) ((((unsigned char) (c)) ^ 128) - 128)
277#endif
278
279/* Should we use malloc or alloca?  If REGEX_MALLOC is not defined, we
280   use `alloca' instead of `malloc'.  This is because using malloc in
281   re_search* or re_match* could cause memory leaks when C-g is used in
282   Emacs; also, malloc is slower and causes storage fragmentation.  On
283   the other hand, malloc is more portable, and easier to debug.
284
285   Because we sometimes use alloca, some routines have to be macros,
286   not functions -- `alloca'-allocated space disappears at the end of the
287   function it is called in.  */
288
289#ifdef REGEX_MALLOC
290
291# define REGEX_ALLOCATE malloc
292# define REGEX_REALLOCATE(source, osize, nsize) realloc (source, nsize)
293# define REGEX_FREE free
294
295#else /* not REGEX_MALLOC  */
296
297/* Emacs already defines alloca, sometimes.  */
298# ifndef alloca
299
300/* Make alloca work the best possible way.  */
301#  ifdef __GNUC__
302#   define alloca __builtin_alloca
303#  else /* not __GNUC__ */
304#   if HAVE_ALLOCA_H
305#    include <alloca.h>
306#   endif /* HAVE_ALLOCA_H */
307#  endif /* not __GNUC__ */
308
309# endif /* not alloca */
310
311# define REGEX_ALLOCATE alloca
312
313/* Assumes a `char *destination' variable.  */
314# define REGEX_REALLOCATE(source, osize, nsize)				\
315  (destination = (char *) alloca (nsize),				\
316   memcpy (destination, source, osize))
317
318/* No need to do anything to free, after alloca.  */
319# define REGEX_FREE(arg) ((void)0) /* Do nothing!  But inhibit gcc warning.  */
320
321#endif /* not REGEX_MALLOC */
322
323/* Define how to allocate the failure stack.  */
324
325#if defined REL_ALLOC && defined REGEX_MALLOC
326
327# define REGEX_ALLOCATE_STACK(size)				\
328  r_alloc (&failure_stack_ptr, (size))
329# define REGEX_REALLOCATE_STACK(source, osize, nsize)		\
330  r_re_alloc (&failure_stack_ptr, (nsize))
331# define REGEX_FREE_STACK(ptr)					\
332  r_alloc_free (&failure_stack_ptr)
333
334#else /* not using relocating allocator */
335
336# ifdef REGEX_MALLOC
337
338#  define REGEX_ALLOCATE_STACK malloc
339#  define REGEX_REALLOCATE_STACK(source, osize, nsize) realloc (source, nsize)
340#  define REGEX_FREE_STACK free
341
342# else /* not REGEX_MALLOC */
343
344#  define REGEX_ALLOCATE_STACK alloca
345
346#  define REGEX_REALLOCATE_STACK(source, osize, nsize)			\
347   REGEX_REALLOCATE (source, osize, nsize)
348/* No need to explicitly free anything.  */
349#  define REGEX_FREE_STACK(arg)
350
351# endif /* not REGEX_MALLOC */
352#endif /* not using relocating allocator */
353
354
355/* True if `size1' is non-NULL and PTR is pointing anywhere inside
356   `string1' or just past its end.  This works if PTR is NULL, which is
357   a good thing.  */
358#define FIRST_STRING_P(ptr) 					\
359  (size1 && string1 <= (ptr) && (ptr) <= string1 + size1)
360
361/* (Re)Allocate N items of type T using malloc, or fail.  */
362#define TALLOC(n, t) ((t *) malloc ((n) * sizeof (t)))
363#define RETALLOC(addr, n, t) ((addr) = (t *) realloc (addr, (n) * sizeof (t)))
364#define RETALLOC_IF(addr, n, t) \
365  if (addr) RETALLOC((addr), (n), t); else (addr) = TALLOC ((n), t)
366#define REGEX_TALLOC(n, t) ((t *) REGEX_ALLOCATE ((n) * sizeof (t)))
367
368#define BYTEWIDTH 8 /* In bits.  */
369
370#define STREQ(s1, s2) ((strcmp (s1, s2) == 0))
371
372#undef MAX
373#undef MIN
374#define MAX(a, b) ((a) > (b) ? (a) : (b))
375#define MIN(a, b) ((a) < (b) ? (a) : (b))
376
377typedef char boolean;
378#define false 0
379#define true 1
380
381static int re_match_2_internal PARAMS ((struct re_pattern_buffer *bufp,
382					const char *string1, int size1,
383					const char *string2, int size2,
384					int pos,
385					struct re_registers *regs,
386					int stop));
387
388/* These are the command codes that appear in compiled regular
389   expressions.  Some opcodes are followed by argument bytes.  A
390   command code can specify any interpretation whatsoever for its
391   arguments.  Zero bytes may appear in the compiled regular expression.  */
392
393typedef enum
394{
395  no_op = 0,
396
397  /* Succeed right away--no more backtracking.  */
398  succeed,
399
400        /* Followed by one byte giving n, then by n literal bytes.  */
401  exactn,
402
403        /* Matches any (more or less) character.  */
404  anychar,
405
406        /* Matches any one char belonging to specified set.  First
407           following byte is number of bitmap bytes.  Then come bytes
408           for a bitmap saying which chars are in.  Bits in each byte
409           are ordered low-bit-first.  A character is in the set if its
410           bit is 1.  A character too large to have a bit in the map is
411           automatically not in the set.  */
412  charset,
413
414        /* Same parameters as charset, but match any character that is
415           not one of those specified.  */
416  charset_not,
417
418        /* Start remembering the text that is matched, for storing in a
419           register.  Followed by one byte with the register number, in
420           the range 0 to one less than the pattern buffer's re_nsub
421           field.  Then followed by one byte with the number of groups
422           inner to this one.  (This last has to be part of the
423           start_memory only because we need it in the on_failure_jump
424           of re_match_2.)  */
425  start_memory,
426
427        /* Stop remembering the text that is matched and store it in a
428           memory register.  Followed by one byte with the register
429           number, in the range 0 to one less than `re_nsub' in the
430           pattern buffer, and one byte with the number of inner groups,
431           just like `start_memory'.  (We need the number of inner
432           groups here because we don't have any easy way of finding the
433           corresponding start_memory when we're at a stop_memory.)  */
434  stop_memory,
435
436        /* Match a duplicate of something remembered. Followed by one
437           byte containing the register number.  */
438  duplicate,
439
440        /* Fail unless at beginning of line.  */
441  begline,
442
443        /* Fail unless at end of line.  */
444  endline,
445
446        /* Succeeds if at beginning of buffer (if emacs) or at beginning
447           of string to be matched (if not).  */
448  begbuf,
449
450        /* Analogously, for end of buffer/string.  */
451  endbuf,
452
453        /* Followed by two byte relative address to which to jump.  */
454  jump,
455
456	/* Same as jump, but marks the end of an alternative.  */
457  jump_past_alt,
458
459        /* Followed by two-byte relative address of place to resume at
460           in case of failure.  */
461  on_failure_jump,
462
463        /* Like on_failure_jump, but pushes a placeholder instead of the
464           current string position when executed.  */
465  on_failure_keep_string_jump,
466
467        /* Throw away latest failure point and then jump to following
468           two-byte relative address.  */
469  pop_failure_jump,
470
471        /* Change to pop_failure_jump if know won't have to backtrack to
472           match; otherwise change to jump.  This is used to jump
473           back to the beginning of a repeat.  If what follows this jump
474           clearly won't match what the repeat does, such that we can be
475           sure that there is no use backtracking out of repetitions
476           already matched, then we change it to a pop_failure_jump.
477           Followed by two-byte address.  */
478  maybe_pop_jump,
479
480        /* Jump to following two-byte address, and push a dummy failure
481           point. This failure point will be thrown away if an attempt
482           is made to use it for a failure.  A `+' construct makes this
483           before the first repeat.  Also used as an intermediary kind
484           of jump when compiling an alternative.  */
485  dummy_failure_jump,
486
487	/* Push a dummy failure point and continue.  Used at the end of
488	   alternatives.  */
489  push_dummy_failure,
490
491        /* Followed by two-byte relative address and two-byte number n.
492           After matching N times, jump to the address upon failure.  */
493  succeed_n,
494
495        /* Followed by two-byte relative address, and two-byte number n.
496           Jump to the address N times, then fail.  */
497  jump_n,
498
499        /* Set the following two-byte relative address to the
500           subsequent two-byte number.  The address *includes* the two
501           bytes of number.  */
502  set_number_at,
503
504  wordchar,	/* Matches any word-constituent character.  */
505  notwordchar,	/* Matches any char that is not a word-constituent.  */
506
507  wordbeg,	/* Succeeds if at word beginning.  */
508  wordend,	/* Succeeds if at word end.  */
509
510  wordbound,	/* Succeeds if at a word boundary.  */
511  notwordbound	/* Succeeds if not at a word boundary.  */
512
513#ifdef emacs
514  ,before_dot,	/* Succeeds if before point.  */
515  at_dot,	/* Succeeds if at point.  */
516  after_dot,	/* Succeeds if after point.  */
517
518	/* Matches any character whose syntax is specified.  Followed by
519           a byte which contains a syntax code, e.g., Sword.  */
520  syntaxspec,
521
522	/* Matches any character whose syntax is not that specified.  */
523  notsyntaxspec
524#endif /* emacs */
525} re_opcode_t;
526
527/* Common operations on the compiled pattern.  */
528
529/* Store NUMBER in two contiguous bytes starting at DESTINATION.  */
530
531#define STORE_NUMBER(destination, number)				\
532  do {									\
533    (destination)[0] = (number) & 0377;					\
534    (destination)[1] = (number) >> 8;					\
535  } while (0)
536
537/* Same as STORE_NUMBER, except increment DESTINATION to
538   the byte after where the number is stored.  Therefore, DESTINATION
539   must be an lvalue.  */
540
541#define STORE_NUMBER_AND_INCR(destination, number)			\
542  do {									\
543    STORE_NUMBER (destination, number);					\
544    (destination) += 2;							\
545  } while (0)
546
547/* Put into DESTINATION a number stored in two contiguous bytes starting
548   at SOURCE.  */
549
550#define EXTRACT_NUMBER(destination, source)				\
551  do {									\
552    (destination) = *(source) & 0377;					\
553    (destination) += SIGN_EXTEND_CHAR (*((source) + 1)) << 8;		\
554  } while (0)
555
556#ifdef DEBUG
557static void extract_number _RE_ARGS ((int *dest, unsigned char *source));
558static void
559extract_number (dest, source)
560    int *dest;
561    unsigned char *source;
562{
563  int temp = SIGN_EXTEND_CHAR (*(source + 1));
564  *dest = *source & 0377;
565  *dest += temp << 8;
566}
567
568# ifndef EXTRACT_MACROS /* To debug the macros.  */
569#  undef EXTRACT_NUMBER
570#  define EXTRACT_NUMBER(dest, src) extract_number (&dest, src)
571# endif /* not EXTRACT_MACROS */
572
573#endif /* DEBUG */
574
575/* Same as EXTRACT_NUMBER, except increment SOURCE to after the number.
576   SOURCE must be an lvalue.  */
577
578#define EXTRACT_NUMBER_AND_INCR(destination, source)			\
579  do {									\
580    EXTRACT_NUMBER (destination, source);				\
581    (source) += 2; 							\
582  } while (0)
583
584#ifdef DEBUG
585static void extract_number_and_incr _RE_ARGS ((int *destination,
586					       unsigned char **source));
587static void
588extract_number_and_incr (destination, source)
589    int *destination;
590    unsigned char **source;
591{
592  extract_number (destination, *source);
593  *source += 2;
594}
595
596# ifndef EXTRACT_MACROS
597#  undef EXTRACT_NUMBER_AND_INCR
598#  define EXTRACT_NUMBER_AND_INCR(dest, src) \
599  extract_number_and_incr (&dest, &src)
600# endif /* not EXTRACT_MACROS */
601
602#endif /* DEBUG */
603
604/* If DEBUG is defined, Regex prints many voluminous messages about what
605   it is doing (if the variable `debug' is nonzero).  If linked with the
606   main program in `iregex.c', you can enter patterns and strings
607   interactively.  And if linked with the main program in `main.c' and
608   the other test files, you can run the already-written tests.  */
609
610#ifdef DEBUG
611
612/* We use standard I/O for debugging.  */
613# include <stdio.h>
614
615/* It is useful to test things that ``must'' be true when debugging.  */
616# include <assert.h>
617
618static int debug;
619
620# define DEBUG_STATEMENT(e) e
621# define DEBUG_PRINT1(x) if (debug) printf (x)
622# define DEBUG_PRINT2(x1, x2) if (debug) printf (x1, x2)
623# define DEBUG_PRINT3(x1, x2, x3) if (debug) printf (x1, x2, x3)
624# define DEBUG_PRINT4(x1, x2, x3, x4) if (debug) printf (x1, x2, x3, x4)
625# define DEBUG_PRINT_COMPILED_PATTERN(p, s, e) 				\
626  if (debug) print_partial_compiled_pattern (s, e)
627# define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)			\
628  if (debug) print_double_string (w, s1, sz1, s2, sz2)
629
630
631/* Print the fastmap in human-readable form.  */
632
633void
634print_fastmap (fastmap)
635    char *fastmap;
636{
637  unsigned was_a_range = 0;
638  unsigned i = 0;
639
640  while (i < (1 << BYTEWIDTH))
641    {
642      if (fastmap[i++])
643	{
644	  was_a_range = 0;
645          putchar (i - 1);
646          while (i < (1 << BYTEWIDTH)  &&  fastmap[i])
647            {
648              was_a_range = 1;
649              i++;
650            }
651	  if (was_a_range)
652            {
653              printf ("-");
654              putchar (i - 1);
655            }
656        }
657    }
658  putchar ('\n');
659}
660
661
662/* Print a compiled pattern string in human-readable form, starting at
663   the START pointer into it and ending just before the pointer END.  */
664
665void
666print_partial_compiled_pattern (start, end)
667    unsigned char *start;
668    unsigned char *end;
669{
670  int mcnt, mcnt2;
671  unsigned char *p1;
672  unsigned char *p = start;
673  unsigned char *pend = end;
674
675  if (start == NULL)
676    {
677      printf ("(null)\n");
678      return;
679    }
680
681  /* Loop over pattern commands.  */
682  while (p < pend)
683    {
684      printf ("%d:\t", p - start);
685
686      switch ((re_opcode_t) *p++)
687	{
688        case no_op:
689          printf ("/no_op");
690          break;
691
692	case exactn:
693	  mcnt = *p++;
694          printf ("/exactn/%d", mcnt);
695          do
696	    {
697              putchar ('/');
698	      putchar (*p++);
699            }
700          while (--mcnt);
701          break;
702
703	case start_memory:
704          mcnt = *p++;
705          printf ("/start_memory/%d/%d", mcnt, *p++);
706          break;
707
708	case stop_memory:
709          mcnt = *p++;
710	  printf ("/stop_memory/%d/%d", mcnt, *p++);
711          break;
712
713	case duplicate:
714	  printf ("/duplicate/%d", *p++);
715	  break;
716
717	case anychar:
718	  printf ("/anychar");
719	  break;
720
721	case charset:
722        case charset_not:
723          {
724            register int c, last = -100;
725	    register int in_range = 0;
726
727	    printf ("/charset [%s",
728	            (re_opcode_t) *(p - 1) == charset_not ? "^" : "");
729
730            assert (p + *p < pend);
731
732            for (c = 0; c < 256; c++)
733	      if (c / 8 < *p
734		  && (p[1 + (c/8)] & (1 << (c % 8))))
735		{
736		  /* Are we starting a range?  */
737		  if (last + 1 == c && ! in_range)
738		    {
739		      putchar ('-');
740		      in_range = 1;
741		    }
742		  /* Have we broken a range?  */
743		  else if (last + 1 != c && in_range)
744              {
745		      putchar (last);
746		      in_range = 0;
747		    }
748
749		  if (! in_range)
750		    putchar (c);
751
752		  last = c;
753              }
754
755	    if (in_range)
756	      putchar (last);
757
758	    putchar (']');
759
760	    p += 1 + *p;
761	  }
762	  break;
763
764	case begline:
765	  printf ("/begline");
766          break;
767
768	case endline:
769          printf ("/endline");
770          break;
771
772	case on_failure_jump:
773          extract_number_and_incr (&mcnt, &p);
774  	  printf ("/on_failure_jump to %d", p + mcnt - start);
775          break;
776
777	case on_failure_keep_string_jump:
778          extract_number_and_incr (&mcnt, &p);
779  	  printf ("/on_failure_keep_string_jump to %d", p + mcnt - start);
780          break;
781
782	case dummy_failure_jump:
783          extract_number_and_incr (&mcnt, &p);
784  	  printf ("/dummy_failure_jump to %d", p + mcnt - start);
785          break;
786
787	case push_dummy_failure:
788          printf ("/push_dummy_failure");
789          break;
790
791        case maybe_pop_jump:
792          extract_number_and_incr (&mcnt, &p);
793  	  printf ("/maybe_pop_jump to %d", p + mcnt - start);
794	  break;
795
796        case pop_failure_jump:
797	  extract_number_and_incr (&mcnt, &p);
798  	  printf ("/pop_failure_jump to %d", p + mcnt - start);
799	  break;
800
801        case jump_past_alt:
802	  extract_number_and_incr (&mcnt, &p);
803  	  printf ("/jump_past_alt to %d", p + mcnt - start);
804	  break;
805
806        case jump:
807	  extract_number_and_incr (&mcnt, &p);
808  	  printf ("/jump to %d", p + mcnt - start);
809	  break;
810
811        case succeed_n:
812          extract_number_and_incr (&mcnt, &p);
813	  p1 = p + mcnt;
814          extract_number_and_incr (&mcnt2, &p);
815	  printf ("/succeed_n to %d, %d times", p1 - start, mcnt2);
816          break;
817
818        case jump_n:
819          extract_number_and_incr (&mcnt, &p);
820	  p1 = p + mcnt;
821          extract_number_and_incr (&mcnt2, &p);
822	  printf ("/jump_n to %d, %d times", p1 - start, mcnt2);
823          break;
824
825        case set_number_at:
826          extract_number_and_incr (&mcnt, &p);
827	  p1 = p + mcnt;
828          extract_number_and_incr (&mcnt2, &p);
829	  printf ("/set_number_at location %d to %d", p1 - start, mcnt2);
830          break;
831
832        case wordbound:
833	  printf ("/wordbound");
834	  break;
835
836	case notwordbound:
837	  printf ("/notwordbound");
838          break;
839
840	case wordbeg:
841	  printf ("/wordbeg");
842	  break;
843
844	case wordend:
845	  printf ("/wordend");
846
847# ifdef emacs
848	case before_dot:
849	  printf ("/before_dot");
850          break;
851
852	case at_dot:
853	  printf ("/at_dot");
854          break;
855
856	case after_dot:
857	  printf ("/after_dot");
858          break;
859
860	case syntaxspec:
861          printf ("/syntaxspec");
862	  mcnt = *p++;
863	  printf ("/%d", mcnt);
864          break;
865
866	case notsyntaxspec:
867          printf ("/notsyntaxspec");
868	  mcnt = *p++;
869	  printf ("/%d", mcnt);
870	  break;
871# endif /* emacs */
872
873	case wordchar:
874	  printf ("/wordchar");
875          break;
876
877	case notwordchar:
878	  printf ("/notwordchar");
879          break;
880
881	case begbuf:
882	  printf ("/begbuf");
883          break;
884
885	case endbuf:
886	  printf ("/endbuf");
887          break;
888
889        default:
890          printf ("?%d", *(p-1));
891	}
892
893      putchar ('\n');
894    }
895
896  printf ("%d:\tend of pattern.\n", p - start);
897}
898
899
900void
901print_compiled_pattern (bufp)
902    struct re_pattern_buffer *bufp;
903{
904  unsigned char *buffer = bufp->buffer;
905
906  print_partial_compiled_pattern (buffer, buffer + bufp->used);
907  printf ("%ld bytes used/%ld bytes allocated.\n",
908	  bufp->used, bufp->allocated);
909
910  if (bufp->fastmap_accurate && bufp->fastmap)
911    {
912      printf ("fastmap: ");
913      print_fastmap (bufp->fastmap);
914    }
915
916  printf ("re_nsub: %d\t", bufp->re_nsub);
917  printf ("regs_alloc: %d\t", bufp->regs_allocated);
918  printf ("can_be_null: %d\t", bufp->can_be_null);
919  printf ("newline_anchor: %d\n", bufp->newline_anchor);
920  printf ("no_sub: %d\t", bufp->no_sub);
921  printf ("not_bol: %d\t", bufp->not_bol);
922  printf ("not_eol: %d\t", bufp->not_eol);
923  printf ("syntax: %lx\n", bufp->syntax);
924  /* Perhaps we should print the translate table?  */
925}
926
927
928void
929print_double_string (where, string1, size1, string2, size2)
930    const char *where;
931    const char *string1;
932    const char *string2;
933    int size1;
934    int size2;
935{
936  int this_char;
937
938  if (where == NULL)
939    printf ("(null)");
940  else
941    {
942      if (FIRST_STRING_P (where))
943        {
944          for (this_char = where - string1; this_char < size1; this_char++)
945            putchar (string1[this_char]);
946
947          where = string2;
948        }
949
950      for (this_char = where - string2; this_char < size2; this_char++)
951        putchar (string2[this_char]);
952    }
953}
954
955void
956printchar (c)
957     int c;
958{
959  putc (c, stderr);
960}
961
962#else /* not DEBUG */
963
964# undef assert
965# define assert(e)
966
967# define DEBUG_STATEMENT(e)
968# define DEBUG_PRINT1(x)
969# define DEBUG_PRINT2(x1, x2)
970# define DEBUG_PRINT3(x1, x2, x3)
971# define DEBUG_PRINT4(x1, x2, x3, x4)
972# define DEBUG_PRINT_COMPILED_PATTERN(p, s, e)
973# define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)
974
975#endif /* not DEBUG */
976
977/* Set by `re_set_syntax' to the current regexp syntax to recognize.  Can
978   also be assigned to arbitrarily: each pattern buffer stores its own
979   syntax, so it can be changed between regex compilations.  */
980/* This has no initializer because initialized variables in Emacs
981   become read-only after dumping.  */
982reg_syntax_t re_syntax_options;
983
984
985/* Specify the precise syntax of regexps for compilation.  This provides
986   for compatibility for various utilities which historically have
987   different, incompatible syntaxes.
988
989   The argument SYNTAX is a bit mask comprised of the various bits
990   defined in regex.h.  We return the old syntax.  */
991
992reg_syntax_t
993re_set_syntax (syntax)
994    reg_syntax_t syntax;
995{
996  reg_syntax_t ret = re_syntax_options;
997
998  re_syntax_options = syntax;
999#ifdef DEBUG
1000  if (syntax & RE_DEBUG)
1001    debug = 1;
1002  else if (debug) /* was on but now is not */
1003    debug = 0;
1004#endif /* DEBUG */
1005  return ret;
1006}
1007#ifdef _LIBC
1008weak_alias (__re_set_syntax, re_set_syntax)
1009#endif
1010
1011/* This table gives an error message for each of the error codes listed
1012   in regex.h.  Obviously the order here has to be same as there.
1013   POSIX doesn't require that we do anything for REG_NOERROR,
1014   but why not be nice?  */
1015
1016static const char re_error_msgid[] =
1017  {
1018#define REG_NOERROR_IDX	0
1019    gettext_noop ("Success")	/* REG_NOERROR */
1020    "\0"
1021#define REG_NOMATCH_IDX (REG_NOERROR_IDX + sizeof "Success")
1022    gettext_noop ("No match")	/* REG_NOMATCH */
1023    "\0"
1024#define REG_BADPAT_IDX	(REG_NOMATCH_IDX + sizeof "No match")
1025    gettext_noop ("Invalid regular expression") /* REG_BADPAT */
1026    "\0"
1027#define REG_ECOLLATE_IDX (REG_BADPAT_IDX + sizeof "Invalid regular expression")
1028    gettext_noop ("Invalid collation character"), /* REG_ECOLLATE */
1029    "\0"
1030#define REG_ECTYPE_IDX	(REG_ECOLLATE_IDX + sizeof "Invalid collation character")
1031    gettext_noop ("Invalid character class name") /* REG_ECTYPE */
1032    "\0"
1033#define REG_EESCAPE_IDX	(REG_ECTYPE_IDX + sizeof "Invalid character class name")
1034    gettext_noop ("Trailing backslash") /* REG_EESCAPE */
1035    "\0"
1036#define REG_ESUBREG_IDX	(REG_EESCAPE_IDX + sizeof "Trailing backslash")
1037    gettext_noop ("Invalid back reference") /* REG_ESUBREG */
1038    "\0"
1039#define REG_EBRACK_IDX	(REG_ESUBREG_IDX + sizeof "Invalid back reference")
1040    gettext_noop ("Unmatched [ or [^")	/* REG_EBRACK */
1041    "\0"
1042#define REG_EPAREN_IDX	(REG_EBRACK_IDX + sizeof "Unmatched [ or [^")
1043    gettext_noop ("Unmatched ( or \\(") /* REG_EPAREN */
1044    "\0"
1045#define REG_EBRACE_IDX	(REG_EPAREN_IDX + sizeof "Unmatched ( or \\(")
1046    gettext_noop ("Unmatched \\{") /* REG_EBRACE */
1047    "\0"
1048#define REG_BADBR_IDX	(REG_EBRACE_IDX + sizeof "Unmatched \\{")
1049    gettext_noop ("Invalid content of \\{\\}") /* REG_BADBR */
1050    "\0"
1051#define REG_ERANGE_IDX	(REG_BADBR_IDX + sizeof "Invalid content of \\{\\}")
1052    gettext_noop ("Invalid range end")	/* REG_ERANGE */
1053    "\0"
1054#define REG_ESPACE_IDX	(REG_ERANGE_IDX + sizeof "Invalid range end")
1055    gettext_noop ("Memory exhausted") /* REG_ESPACE */
1056    "\0"
1057#define REG_BADRPT_IDX	(REG_ESPACE_IDX + sizeof "Memory exhausted")
1058    gettext_noop ("Invalid preceding regular expression") /* REG_BADRPT */
1059    "\0"
1060#define REG_EEND_IDX	(REG_BADRPT_IDX + sizeof "Invalid preceding regular expression")
1061    gettext_noop ("Premature end of regular expression") /* REG_EEND */
1062    "\0"
1063#define REG_ESIZE_IDX	(REG_EEND_IDX + sizeof "Premature end of regular expression")
1064    gettext_noop ("Regular expression too big") /* REG_ESIZE */
1065    "\0"
1066#define REG_ERPAREN_IDX	(REG_ESIZE_IDX + sizeof "Regular expression too big")
1067    gettext_noop ("Unmatched ) or \\)"), /* REG_ERPAREN */
1068  };
1069
1070static const size_t re_error_msgid_idx[] =
1071  {
1072    REG_NOERROR_IDX,
1073    REG_NOMATCH_IDX,
1074    REG_BADPAT_IDX,
1075    REG_ECOLLATE_IDX,
1076    REG_ECTYPE_IDX,
1077    REG_EESCAPE_IDX,
1078    REG_ESUBREG_IDX,
1079    REG_EBRACK_IDX,
1080    REG_EPAREN_IDX,
1081    REG_EBRACE_IDX,
1082    REG_BADBR_IDX,
1083    REG_ERANGE_IDX,
1084    REG_ESPACE_IDX,
1085    REG_BADRPT_IDX,
1086    REG_EEND_IDX,
1087    REG_ESIZE_IDX,
1088    REG_ERPAREN_IDX
1089  };
1090
1091/* Avoiding alloca during matching, to placate r_alloc.  */
1092
1093/* Define MATCH_MAY_ALLOCATE unless we need to make sure that the
1094   searching and matching functions should not call alloca.  On some
1095   systems, alloca is implemented in terms of malloc, and if we're
1096   using the relocating allocator routines, then malloc could cause a
1097   relocation, which might (if the strings being searched are in the
1098   ralloc heap) shift the data out from underneath the regexp
1099   routines.
1100
1101   Here's another reason to avoid allocation: Emacs
1102   processes input from X in a signal handler; processing X input may
1103   call malloc; if input arrives while a matching routine is calling
1104   malloc, then we're scrod.  But Emacs can't just block input while
1105   calling matching routines; then we don't notice interrupts when
1106   they come in.  So, Emacs blocks input around all regexp calls
1107   except the matching calls, which it leaves unprotected, in the
1108   faith that they will not malloc.  */
1109
1110/* Normally, this is fine.  */
1111#define MATCH_MAY_ALLOCATE
1112
1113/* When using GNU C, we are not REALLY using the C alloca, no matter
1114   what config.h may say.  So don't take precautions for it.  */
1115#ifdef __GNUC__
1116# undef C_ALLOCA
1117#endif
1118
1119/* The match routines may not allocate if (1) they would do it with malloc
1120   and (2) it's not safe for them to use malloc.
1121   Note that if REL_ALLOC is defined, matching would not use malloc for the
1122   failure stack, but we would still use it for the register vectors;
1123   so REL_ALLOC should not affect this.  */
1124#if (defined C_ALLOCA || defined REGEX_MALLOC) && defined emacs
1125# undef MATCH_MAY_ALLOCATE
1126#endif
1127
1128
1129/* Failure stack declarations and macros; both re_compile_fastmap and
1130   re_match_2 use a failure stack.  These have to be macros because of
1131   REGEX_ALLOCATE_STACK.  */
1132
1133
1134/* Number of failure points for which to initially allocate space
1135   when matching.  If this number is exceeded, we allocate more
1136   space, so it is not a hard limit.  */
1137#ifndef INIT_FAILURE_ALLOC
1138# define INIT_FAILURE_ALLOC 5
1139#endif
1140
1141/* Roughly the maximum number of failure points on the stack.  Would be
1142   exactly that if always used MAX_FAILURE_ITEMS items each time we failed.
1143   This is a variable only so users of regex can assign to it; we never
1144   change it ourselves.  */
1145
1146#ifdef INT_IS_16BIT
1147
1148# if defined MATCH_MAY_ALLOCATE
1149/* 4400 was enough to cause a crash on Alpha OSF/1,
1150   whose default stack limit is 2mb.  */
1151long int re_max_failures = 4000;
1152# else
1153long int re_max_failures = 2000;
1154# endif
1155
1156union fail_stack_elt
1157{
1158  unsigned char *pointer;
1159  long int integer;
1160};
1161
1162typedef union fail_stack_elt fail_stack_elt_t;
1163
1164typedef struct
1165{
1166  fail_stack_elt_t *stack;
1167  unsigned long int size;
1168  unsigned long int avail;		/* Offset of next open position.  */
1169} fail_stack_type;
1170
1171#else /* not INT_IS_16BIT */
1172
1173# if defined MATCH_MAY_ALLOCATE
1174/* 4400 was enough to cause a crash on Alpha OSF/1,
1175   whose default stack limit is 2mb.  */
1176int re_max_failures = 20000;
1177# else
1178int re_max_failures = 2000;
1179# endif
1180
1181union fail_stack_elt
1182{
1183  unsigned char *pointer;
1184  int integer;
1185};
1186
1187typedef union fail_stack_elt fail_stack_elt_t;
1188
1189typedef struct
1190{
1191  fail_stack_elt_t *stack;
1192  unsigned size;
1193  unsigned avail;			/* Offset of next open position.  */
1194} fail_stack_type;
1195
1196#endif /* INT_IS_16BIT */
1197
1198#define FAIL_STACK_EMPTY()     (fail_stack.avail == 0)
1199#define FAIL_STACK_PTR_EMPTY() (fail_stack_ptr->avail == 0)
1200#define FAIL_STACK_FULL()      (fail_stack.avail == fail_stack.size)
1201
1202
1203/* Define macros to initialize and free the failure stack.
1204   Do `return -2' if the alloc fails.  */
1205
1206#ifdef MATCH_MAY_ALLOCATE
1207# define INIT_FAIL_STACK()						\
1208  do {									\
1209    fail_stack.stack = (fail_stack_elt_t *)				\
1210      REGEX_ALLOCATE_STACK (INIT_FAILURE_ALLOC * sizeof (fail_stack_elt_t)); \
1211									\
1212    if (fail_stack.stack == NULL)					\
1213      return -2;							\
1214									\
1215    fail_stack.size = INIT_FAILURE_ALLOC;				\
1216    fail_stack.avail = 0;						\
1217  } while (0)
1218
1219# define RESET_FAIL_STACK()  REGEX_FREE_STACK (fail_stack.stack)
1220#else
1221# define INIT_FAIL_STACK()						\
1222  do {									\
1223    fail_stack.avail = 0;						\
1224  } while (0)
1225
1226# define RESET_FAIL_STACK()
1227#endif
1228
1229
1230/* Double the size of FAIL_STACK, up to approximately `re_max_failures' items.
1231
1232   Return 1 if succeeds, and 0 if either ran out of memory
1233   allocating space for it or it was already too large.
1234
1235   REGEX_REALLOCATE_STACK requires `destination' be declared.   */
1236
1237#define DOUBLE_FAIL_STACK(fail_stack)					\
1238  ((fail_stack).size > (unsigned) (re_max_failures * MAX_FAILURE_ITEMS)	\
1239   ? 0									\
1240   : ((fail_stack).stack = (fail_stack_elt_t *)				\
1241        REGEX_REALLOCATE_STACK ((fail_stack).stack, 			\
1242          (fail_stack).size * sizeof (fail_stack_elt_t),		\
1243          ((fail_stack).size << 1) * sizeof (fail_stack_elt_t)),	\
1244									\
1245      (fail_stack).stack == NULL					\
1246      ? 0								\
1247      : ((fail_stack).size <<= 1, 					\
1248         1)))
1249
1250
1251/* Push pointer POINTER on FAIL_STACK.
1252   Return 1 if was able to do so and 0 if ran out of memory allocating
1253   space to do so.  */
1254#define PUSH_PATTERN_OP(POINTER, FAIL_STACK)				\
1255  ((FAIL_STACK_FULL ()							\
1256    && !DOUBLE_FAIL_STACK (FAIL_STACK))					\
1257   ? 0									\
1258   : ((FAIL_STACK).stack[(FAIL_STACK).avail++].pointer = POINTER,	\
1259      1))
1260
1261/* Push a pointer value onto the failure stack.
1262   Assumes the variable `fail_stack'.  Probably should only
1263   be called from within `PUSH_FAILURE_POINT'.  */
1264#define PUSH_FAILURE_POINTER(item)					\
1265  fail_stack.stack[fail_stack.avail++].pointer = (unsigned char *) (item)
1266
1267/* This pushes an integer-valued item onto the failure stack.
1268   Assumes the variable `fail_stack'.  Probably should only
1269   be called from within `PUSH_FAILURE_POINT'.  */
1270#define PUSH_FAILURE_INT(item)					\
1271  fail_stack.stack[fail_stack.avail++].integer = (item)
1272
1273/* Push a fail_stack_elt_t value onto the failure stack.
1274   Assumes the variable `fail_stack'.  Probably should only
1275   be called from within `PUSH_FAILURE_POINT'.  */
1276#define PUSH_FAILURE_ELT(item)					\
1277  fail_stack.stack[fail_stack.avail++] =  (item)
1278
1279/* These three POP... operations complement the three PUSH... operations.
1280   All assume that `fail_stack' is nonempty.  */
1281#define POP_FAILURE_POINTER() fail_stack.stack[--fail_stack.avail].pointer
1282#define POP_FAILURE_INT() fail_stack.stack[--fail_stack.avail].integer
1283#define POP_FAILURE_ELT() fail_stack.stack[--fail_stack.avail]
1284
1285/* Used to omit pushing failure point id's when we're not debugging.  */
1286#ifdef DEBUG
1287# define DEBUG_PUSH PUSH_FAILURE_INT
1288# define DEBUG_POP(item_addr) *(item_addr) = POP_FAILURE_INT ()
1289#else
1290# define DEBUG_PUSH(item)
1291# define DEBUG_POP(item_addr)
1292#endif
1293
1294
1295/* Push the information about the state we will need
1296   if we ever fail back to it.
1297
1298   Requires variables fail_stack, regstart, regend, reg_info, and
1299   num_regs_pushed be declared.  DOUBLE_FAIL_STACK requires `destination'
1300   be declared.
1301
1302   Does `return FAILURE_CODE' if runs out of memory.  */
1303
1304#define PUSH_FAILURE_POINT(pattern_place, string_place, failure_code)	\
1305  do {									\
1306    char *destination;							\
1307    /* Must be int, so when we don't save any registers, the arithmetic	\
1308       of 0 + -1 isn't done as unsigned.  */				\
1309    /* Can't be int, since there is not a shred of a guarantee that int	\
1310       is wide enough to hold a value of something to which pointer can	\
1311       be assigned */							\
1312    active_reg_t this_reg;						\
1313    									\
1314    DEBUG_STATEMENT (failure_id++);					\
1315    DEBUG_STATEMENT (nfailure_points_pushed++);				\
1316    DEBUG_PRINT2 ("\nPUSH_FAILURE_POINT #%u:\n", failure_id);		\
1317    DEBUG_PRINT2 ("  Before push, next avail: %d\n", (fail_stack).avail);\
1318    DEBUG_PRINT2 ("                     size: %d\n", (fail_stack).size);\
1319									\
1320    DEBUG_PRINT2 ("  slots needed: %ld\n", NUM_FAILURE_ITEMS);		\
1321    DEBUG_PRINT2 ("     available: %d\n", REMAINING_AVAIL_SLOTS);	\
1322									\
1323    /* Ensure we have enough space allocated for what we will push.  */	\
1324    while (REMAINING_AVAIL_SLOTS < NUM_FAILURE_ITEMS)			\
1325      {									\
1326        if (!DOUBLE_FAIL_STACK (fail_stack))				\
1327          return failure_code;						\
1328									\
1329        DEBUG_PRINT2 ("\n  Doubled stack; size now: %d\n",		\
1330		       (fail_stack).size);				\
1331        DEBUG_PRINT2 ("  slots available: %d\n", REMAINING_AVAIL_SLOTS);\
1332      }									\
1333									\
1334    /* Push the info, starting with the registers.  */			\
1335    DEBUG_PRINT1 ("\n");						\
1336									\
1337    if (1)								\
1338      for (this_reg = lowest_active_reg; this_reg <= highest_active_reg; \
1339	   this_reg++)							\
1340	{								\
1341	  DEBUG_PRINT2 ("  Pushing reg: %lu\n", this_reg);		\
1342	  DEBUG_STATEMENT (num_regs_pushed++);				\
1343									\
1344	  DEBUG_PRINT2 ("    start: %p\n", regstart[this_reg]);		\
1345	  PUSH_FAILURE_POINTER (regstart[this_reg]);			\
1346									\
1347	  DEBUG_PRINT2 ("    end: %p\n", regend[this_reg]);		\
1348	  PUSH_FAILURE_POINTER (regend[this_reg]);			\
1349									\
1350	  DEBUG_PRINT2 ("    info: %p\n      ",				\
1351			reg_info[this_reg].word.pointer);		\
1352	  DEBUG_PRINT2 (" match_null=%d",				\
1353			REG_MATCH_NULL_STRING_P (reg_info[this_reg]));	\
1354	  DEBUG_PRINT2 (" active=%d", IS_ACTIVE (reg_info[this_reg]));	\
1355	  DEBUG_PRINT2 (" matched_something=%d",			\
1356			MATCHED_SOMETHING (reg_info[this_reg]));	\
1357	  DEBUG_PRINT2 (" ever_matched=%d",				\
1358			EVER_MATCHED_SOMETHING (reg_info[this_reg]));	\
1359	  DEBUG_PRINT1 ("\n");						\
1360	  PUSH_FAILURE_ELT (reg_info[this_reg].word);			\
1361	}								\
1362									\
1363    DEBUG_PRINT2 ("  Pushing  low active reg: %ld\n", lowest_active_reg);\
1364    PUSH_FAILURE_INT (lowest_active_reg);				\
1365									\
1366    DEBUG_PRINT2 ("  Pushing high active reg: %ld\n", highest_active_reg);\
1367    PUSH_FAILURE_INT (highest_active_reg);				\
1368									\
1369    DEBUG_PRINT2 ("  Pushing pattern %p:\n", pattern_place);		\
1370    DEBUG_PRINT_COMPILED_PATTERN (bufp, pattern_place, pend);		\
1371    PUSH_FAILURE_POINTER (pattern_place);				\
1372									\
1373    DEBUG_PRINT2 ("  Pushing string %p: `", string_place);		\
1374    DEBUG_PRINT_DOUBLE_STRING (string_place, string1, size1, string2,   \
1375				 size2);				\
1376    DEBUG_PRINT1 ("'\n");						\
1377    PUSH_FAILURE_POINTER (string_place);				\
1378									\
1379    DEBUG_PRINT2 ("  Pushing failure id: %u\n", failure_id);		\
1380    DEBUG_PUSH (failure_id);						\
1381  } while (0)
1382
1383/* This is the number of items that are pushed and popped on the stack
1384   for each register.  */
1385#define NUM_REG_ITEMS  3
1386
1387/* Individual items aside from the registers.  */
1388#ifdef DEBUG
1389# define NUM_NONREG_ITEMS 5 /* Includes failure point id.  */
1390#else
1391# define NUM_NONREG_ITEMS 4
1392#endif
1393
1394/* We push at most this many items on the stack.  */
1395/* We used to use (num_regs - 1), which is the number of registers
1396   this regexp will save; but that was changed to 5
1397   to avoid stack overflow for a regexp with lots of parens.  */
1398#define MAX_FAILURE_ITEMS (5 * NUM_REG_ITEMS + NUM_NONREG_ITEMS)
1399
1400/* We actually push this many items.  */
1401#define NUM_FAILURE_ITEMS				\
1402  (((0							\
1403     ? 0 : highest_active_reg - lowest_active_reg + 1)	\
1404    * NUM_REG_ITEMS)					\
1405   + NUM_NONREG_ITEMS)
1406
1407/* How many items can still be added to the stack without overflowing it.  */
1408#define REMAINING_AVAIL_SLOTS ((fail_stack).size - (fail_stack).avail)
1409
1410
1411/* Pops what PUSH_FAIL_STACK pushes.
1412
1413   We restore into the parameters, all of which should be lvalues:
1414     STR -- the saved data position.
1415     PAT -- the saved pattern position.
1416     LOW_REG, HIGH_REG -- the highest and lowest active registers.
1417     REGSTART, REGEND -- arrays of string positions.
1418     REG_INFO -- array of information about each subexpression.
1419
1420   Also assumes the variables `fail_stack' and (if debugging), `bufp',
1421   `pend', `string1', `size1', `string2', and `size2'.  */
1422
1423#define POP_FAILURE_POINT(str, pat, low_reg, high_reg, regstart, regend, reg_info)\
1424{									\
1425  DEBUG_STATEMENT (unsigned failure_id;)				\
1426  active_reg_t this_reg;						\
1427  const unsigned char *string_temp;					\
1428									\
1429  assert (!FAIL_STACK_EMPTY ());					\
1430									\
1431  /* Remove failure points and point to how many regs pushed.  */	\
1432  DEBUG_PRINT1 ("POP_FAILURE_POINT:\n");				\
1433  DEBUG_PRINT2 ("  Before pop, next avail: %d\n", fail_stack.avail);	\
1434  DEBUG_PRINT2 ("                    size: %d\n", fail_stack.size);	\
1435									\
1436  assert (fail_stack.avail >= NUM_NONREG_ITEMS);			\
1437									\
1438  DEBUG_POP (&failure_id);						\
1439  DEBUG_PRINT2 ("  Popping failure id: %u\n", failure_id);		\
1440									\
1441  /* If the saved string location is NULL, it came from an		\
1442     on_failure_keep_string_jump opcode, and we want to throw away the	\
1443     saved NULL, thus retaining our current position in the string.  */	\
1444  string_temp = POP_FAILURE_POINTER ();					\
1445  if (string_temp != NULL)						\
1446    str = (const char *) string_temp;					\
1447									\
1448  DEBUG_PRINT2 ("  Popping string %p: `", str);				\
1449  DEBUG_PRINT_DOUBLE_STRING (str, string1, size1, string2, size2);	\
1450  DEBUG_PRINT1 ("'\n");							\
1451									\
1452  pat = (unsigned char *) POP_FAILURE_POINTER ();			\
1453  DEBUG_PRINT2 ("  Popping pattern %p:\n", pat);			\
1454  DEBUG_PRINT_COMPILED_PATTERN (bufp, pat, pend);			\
1455									\
1456  /* Restore register info.  */						\
1457  high_reg = (active_reg_t) POP_FAILURE_INT ();				\
1458  DEBUG_PRINT2 ("  Popping high active reg: %ld\n", high_reg);		\
1459									\
1460  low_reg = (active_reg_t) POP_FAILURE_INT ();				\
1461  DEBUG_PRINT2 ("  Popping  low active reg: %ld\n", low_reg);		\
1462									\
1463  if (1)								\
1464    for (this_reg = high_reg; this_reg >= low_reg; this_reg--)		\
1465      {									\
1466	DEBUG_PRINT2 ("    Popping reg: %ld\n", this_reg);		\
1467									\
1468	reg_info[this_reg].word = POP_FAILURE_ELT ();			\
1469	DEBUG_PRINT2 ("      info: %p\n",				\
1470		      reg_info[this_reg].word.pointer);			\
1471									\
1472	regend[this_reg] = (const char *) POP_FAILURE_POINTER ();	\
1473	DEBUG_PRINT2 ("      end: %p\n", regend[this_reg]);		\
1474									\
1475	regstart[this_reg] = (const char *) POP_FAILURE_POINTER ();	\
1476	DEBUG_PRINT2 ("      start: %p\n", regstart[this_reg]);		\
1477      }									\
1478  else									\
1479    {									\
1480      for (this_reg = highest_active_reg; this_reg > high_reg; this_reg--) \
1481	{								\
1482	  reg_info[this_reg].word.integer = 0;				\
1483	  regend[this_reg] = 0;						\
1484	  regstart[this_reg] = 0;					\
1485	}								\
1486      highest_active_reg = high_reg;					\
1487    }									\
1488									\
1489  set_regs_matched_done = 0;						\
1490  DEBUG_STATEMENT (nfailure_points_popped++);				\
1491} /* POP_FAILURE_POINT */
1492
1493
1494
1495/* Structure for per-register (a.k.a. per-group) information.
1496   Other register information, such as the
1497   starting and ending positions (which are addresses), and the list of
1498   inner groups (which is a bits list) are maintained in separate
1499   variables.
1500
1501   We are making a (strictly speaking) nonportable assumption here: that
1502   the compiler will pack our bit fields into something that fits into
1503   the type of `word', i.e., is something that fits into one item on the
1504   failure stack.  */
1505
1506
1507/* Declarations and macros for re_match_2.  */
1508
1509typedef union
1510{
1511  fail_stack_elt_t word;
1512  struct
1513  {
1514      /* This field is one if this group can match the empty string,
1515         zero if not.  If not yet determined,  `MATCH_NULL_UNSET_VALUE'.  */
1516#define MATCH_NULL_UNSET_VALUE 3
1517    unsigned match_null_string_p : 2;
1518    unsigned is_active : 1;
1519    unsigned matched_something : 1;
1520    unsigned ever_matched_something : 1;
1521  } bits;
1522} register_info_type;
1523
1524#define REG_MATCH_NULL_STRING_P(R)  ((R).bits.match_null_string_p)
1525#define IS_ACTIVE(R)  ((R).bits.is_active)
1526#define MATCHED_SOMETHING(R)  ((R).bits.matched_something)
1527#define EVER_MATCHED_SOMETHING(R)  ((R).bits.ever_matched_something)
1528
1529
1530/* Call this when have matched a real character; it sets `matched' flags
1531   for the subexpressions which we are currently inside.  Also records
1532   that those subexprs have matched.  */
1533#define SET_REGS_MATCHED()						\
1534  do									\
1535    {									\
1536      if (!set_regs_matched_done)					\
1537	{								\
1538	  active_reg_t r;						\
1539	  set_regs_matched_done = 1;					\
1540	  for (r = lowest_active_reg; r <= highest_active_reg; r++)	\
1541	    {								\
1542	      MATCHED_SOMETHING (reg_info[r])				\
1543		= EVER_MATCHED_SOMETHING (reg_info[r])			\
1544		= 1;							\
1545	    }								\
1546	}								\
1547    }									\
1548  while (0)
1549
1550/* Registers are set to a sentinel when they haven't yet matched.  */
1551static char reg_unset_dummy;
1552#define REG_UNSET_VALUE (&reg_unset_dummy)
1553#define REG_UNSET(e) ((e) == REG_UNSET_VALUE)
1554
1555/* Subroutine declarations and macros for regex_compile.  */
1556
1557static reg_errcode_t regex_compile _RE_ARGS ((const char *pattern, size_t size,
1558					      reg_syntax_t syntax,
1559					      struct re_pattern_buffer *bufp));
1560static void store_op1 _RE_ARGS ((re_opcode_t op, unsigned char *loc, int arg));
1561static void store_op2 _RE_ARGS ((re_opcode_t op, unsigned char *loc,
1562				 int arg1, int arg2));
1563static void insert_op1 _RE_ARGS ((re_opcode_t op, unsigned char *loc,
1564				  int arg, unsigned char *end));
1565static void insert_op2 _RE_ARGS ((re_opcode_t op, unsigned char *loc,
1566				  int arg1, int arg2, unsigned char *end));
1567static boolean at_begline_loc_p _RE_ARGS ((const char *pattern, const char *p,
1568					   reg_syntax_t syntax));
1569static boolean at_endline_loc_p _RE_ARGS ((const char *p, const char *pend,
1570					   reg_syntax_t syntax));
1571static reg_errcode_t compile_range _RE_ARGS ((const char **p_ptr,
1572					      const char *pend,
1573					      char *translate,
1574					      reg_syntax_t syntax,
1575					      unsigned char *b));
1576
1577/* Fetch the next character in the uncompiled pattern---translating it
1578   if necessary.  Also cast from a signed character in the constant
1579   string passed to us by the user to an unsigned char that we can use
1580   as an array index (in, e.g., `translate').  */
1581#ifndef PATFETCH
1582# define PATFETCH(c)							\
1583  do {if (p == pend) return REG_EEND;					\
1584    c = (unsigned char) *p++;						\
1585    if (translate) c = (unsigned char) translate[c];			\
1586  } while (0)
1587#endif
1588
1589/* Fetch the next character in the uncompiled pattern, with no
1590   translation.  */
1591#define PATFETCH_RAW(c)							\
1592  do {if (p == pend) return REG_EEND;					\
1593    c = (unsigned char) *p++; 						\
1594  } while (0)
1595
1596/* Go backwards one character in the pattern.  */
1597#define PATUNFETCH p--
1598
1599
1600/* If `translate' is non-null, return translate[D], else just D.  We
1601   cast the subscript to translate because some data is declared as
1602   `char *', to avoid warnings when a string constant is passed.  But
1603   when we use a character as a subscript we must make it unsigned.  */
1604#ifndef TRANSLATE
1605# define TRANSLATE(d) \
1606  (translate ? (char) translate[(unsigned char) (d)] : (d))
1607#endif
1608
1609
1610/* Macros for outputting the compiled pattern into `buffer'.  */
1611
1612/* If the buffer isn't allocated when it comes in, use this.  */
1613#define INIT_BUF_SIZE  32
1614
1615/* Make sure we have at least N more bytes of space in buffer.  */
1616#define GET_BUFFER_SPACE(n)						\
1617    while ((unsigned long) (b - bufp->buffer + (n)) > bufp->allocated)	\
1618      EXTEND_BUFFER ()
1619
1620/* Make sure we have one more byte of buffer space and then add C to it.  */
1621#define BUF_PUSH(c)							\
1622  do {									\
1623    GET_BUFFER_SPACE (1);						\
1624    *b++ = (unsigned char) (c);						\
1625  } while (0)
1626
1627
1628/* Ensure we have two more bytes of buffer space and then append C1 and C2.  */
1629#define BUF_PUSH_2(c1, c2)						\
1630  do {									\
1631    GET_BUFFER_SPACE (2);						\
1632    *b++ = (unsigned char) (c1);					\
1633    *b++ = (unsigned char) (c2);					\
1634  } while (0)
1635
1636
1637/* As with BUF_PUSH_2, except for three bytes.  */
1638#define BUF_PUSH_3(c1, c2, c3)						\
1639  do {									\
1640    GET_BUFFER_SPACE (3);						\
1641    *b++ = (unsigned char) (c1);					\
1642    *b++ = (unsigned char) (c2);					\
1643    *b++ = (unsigned char) (c3);					\
1644  } while (0)
1645
1646
1647/* Store a jump with opcode OP at LOC to location TO.  We store a
1648   relative address offset by the three bytes the jump itself occupies.  */
1649#define STORE_JUMP(op, loc, to) \
1650  store_op1 (op, loc, (int) ((to) - (loc) - 3))
1651
1652/* Likewise, for a two-argument jump.  */
1653#define STORE_JUMP2(op, loc, to, arg) \
1654  store_op2 (op, loc, (int) ((to) - (loc) - 3), arg)
1655
1656/* Like `STORE_JUMP', but for inserting.  Assume `b' is the buffer end.  */
1657#define INSERT_JUMP(op, loc, to) \
1658  insert_op1 (op, loc, (int) ((to) - (loc) - 3), b)
1659
1660/* Like `STORE_JUMP2', but for inserting.  Assume `b' is the buffer end.  */
1661#define INSERT_JUMP2(op, loc, to, arg) \
1662  insert_op2 (op, loc, (int) ((to) - (loc) - 3), arg, b)
1663
1664
1665/* This is not an arbitrary limit: the arguments which represent offsets
1666   into the pattern are two bytes long.  So if 2^16 bytes turns out to
1667   be too small, many things would have to change.  */
1668/* Any other compiler which, like MSC, has allocation limit below 2^16
1669   bytes will have to use approach similar to what was done below for
1670   MSC and drop MAX_BUF_SIZE a bit.  Otherwise you may end up
1671   reallocating to 0 bytes.  Such thing is not going to work too well.
1672   You have been warned!!  */
1673#if defined _MSC_VER  && !defined WIN32
1674/* Microsoft C 16-bit versions limit malloc to approx 65512 bytes.
1675   The REALLOC define eliminates a flurry of conversion warnings,
1676   but is not required. */
1677# define MAX_BUF_SIZE  65500L
1678# define REALLOC(p,s) realloc ((p), (size_t) (s))
1679#else
1680# define MAX_BUF_SIZE (1L << 16)
1681# define REALLOC(p,s) realloc ((p), (s))
1682#endif
1683
1684/* Extend the buffer by twice its current size via realloc and
1685   reset the pointers that pointed into the old block to point to the
1686   correct places in the new one.  If extending the buffer results in it
1687   being larger than MAX_BUF_SIZE, then flag memory exhausted.  */
1688#define EXTEND_BUFFER()							\
1689  do { 									\
1690    unsigned char *old_buffer = bufp->buffer;				\
1691    if (bufp->allocated == MAX_BUF_SIZE) 				\
1692      return REG_ESIZE;							\
1693    bufp->allocated <<= 1;						\
1694    if (bufp->allocated > MAX_BUF_SIZE)					\
1695      bufp->allocated = MAX_BUF_SIZE; 					\
1696    bufp->buffer = (unsigned char *) REALLOC (bufp->buffer, bufp->allocated);\
1697    if (bufp->buffer == NULL)						\
1698      return REG_ESPACE;						\
1699    /* If the buffer moved, move all the pointers into it.  */		\
1700    if (old_buffer != bufp->buffer)					\
1701      {									\
1702        b = (b - old_buffer) + bufp->buffer;				\
1703        begalt = (begalt - old_buffer) + bufp->buffer;			\
1704        if (fixup_alt_jump)						\
1705          fixup_alt_jump = (fixup_alt_jump - old_buffer) + bufp->buffer;\
1706        if (laststart)							\
1707          laststart = (laststart - old_buffer) + bufp->buffer;		\
1708        if (pending_exact)						\
1709          pending_exact = (pending_exact - old_buffer) + bufp->buffer;	\
1710      }									\
1711  } while (0)
1712
1713
1714/* Since we have one byte reserved for the register number argument to
1715   {start,stop}_memory, the maximum number of groups we can report
1716   things about is what fits in that byte.  */
1717#define MAX_REGNUM 255
1718
1719/* But patterns can have more than `MAX_REGNUM' registers.  We just
1720   ignore the excess.  */
1721typedef unsigned regnum_t;
1722
1723
1724/* Macros for the compile stack.  */
1725
1726/* Since offsets can go either forwards or backwards, this type needs to
1727   be able to hold values from -(MAX_BUF_SIZE - 1) to MAX_BUF_SIZE - 1.  */
1728/* int may be not enough when sizeof(int) == 2.  */
1729typedef long pattern_offset_t;
1730
1731typedef struct
1732{
1733  pattern_offset_t begalt_offset;
1734  pattern_offset_t fixup_alt_jump;
1735  pattern_offset_t inner_group_offset;
1736  pattern_offset_t laststart_offset;
1737  regnum_t regnum;
1738} compile_stack_elt_t;
1739
1740
1741typedef struct
1742{
1743  compile_stack_elt_t *stack;
1744  unsigned size;
1745  unsigned avail;			/* Offset of next open position.  */
1746} compile_stack_type;
1747
1748
1749#define INIT_COMPILE_STACK_SIZE 32
1750
1751#define COMPILE_STACK_EMPTY  (compile_stack.avail == 0)
1752#define COMPILE_STACK_FULL  (compile_stack.avail == compile_stack.size)
1753
1754/* The next available element.  */
1755#define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail])
1756
1757
1758/* Set the bit for character C in a list.  */
1759#define SET_LIST_BIT(c)                               \
1760  (b[((unsigned char) (c)) / BYTEWIDTH]               \
1761   |= 1 << (((unsigned char) c) % BYTEWIDTH))
1762
1763
1764/* Get the next unsigned number in the uncompiled pattern.  */
1765#define GET_UNSIGNED_NUMBER(num) 					\
1766  { if (p != pend)							\
1767     {									\
1768       PATFETCH (c); 							\
1769       while (ISDIGIT (c)) 						\
1770         { 								\
1771           if (num < 0)							\
1772              num = 0;							\
1773           num = num * 10 + c - '0'; 					\
1774           if (p == pend) 						\
1775              break; 							\
1776           PATFETCH (c);						\
1777         } 								\
1778       } 								\
1779    }
1780
1781#if defined _LIBC || WIDE_CHAR_SUPPORT
1782/* The GNU C library provides support for user-defined character classes
1783   and the functions from ISO C amendement 1.  */
1784# ifdef CHARCLASS_NAME_MAX
1785#  define CHAR_CLASS_MAX_LENGTH CHARCLASS_NAME_MAX
1786# else
1787/* This shouldn't happen but some implementation might still have this
1788   problem.  Use a reasonable default value.  */
1789#  define CHAR_CLASS_MAX_LENGTH 256
1790# endif
1791
1792# ifdef _LIBC
1793#  define IS_CHAR_CLASS(string) __wctype (string)
1794# else
1795#  define IS_CHAR_CLASS(string) wctype (string)
1796# endif
1797#else
1798# define CHAR_CLASS_MAX_LENGTH  6 /* Namely, `xdigit'.  */
1799
1800# define IS_CHAR_CLASS(string)						\
1801   (STREQ (string, "alpha") || STREQ (string, "upper")			\
1802    || STREQ (string, "lower") || STREQ (string, "digit")		\
1803    || STREQ (string, "alnum") || STREQ (string, "xdigit")		\
1804    || STREQ (string, "space") || STREQ (string, "print")		\
1805    || STREQ (string, "punct") || STREQ (string, "graph")		\
1806    || STREQ (string, "cntrl") || STREQ (string, "blank"))
1807#endif
1808
1809#ifndef MATCH_MAY_ALLOCATE
1810
1811/* If we cannot allocate large objects within re_match_2_internal,
1812   we make the fail stack and register vectors global.
1813   The fail stack, we grow to the maximum size when a regexp
1814   is compiled.
1815   The register vectors, we adjust in size each time we
1816   compile a regexp, according to the number of registers it needs.  */
1817
1818static fail_stack_type fail_stack;
1819
1820/* Size with which the following vectors are currently allocated.
1821   That is so we can make them bigger as needed,
1822   but never make them smaller.  */
1823static int regs_allocated_size;
1824
1825static const char **     regstart, **     regend;
1826static const char ** old_regstart, ** old_regend;
1827static const char **best_regstart, **best_regend;
1828static register_info_type *reg_info;
1829static const char **reg_dummy;
1830static register_info_type *reg_info_dummy;
1831
1832/* Make the register vectors big enough for NUM_REGS registers,
1833   but don't make them smaller.  */
1834
1835static
1836regex_grow_registers (num_regs)
1837     int num_regs;
1838{
1839  if (num_regs > regs_allocated_size)
1840    {
1841      RETALLOC_IF (regstart,	 num_regs, const char *);
1842      RETALLOC_IF (regend,	 num_regs, const char *);
1843      RETALLOC_IF (old_regstart, num_regs, const char *);
1844      RETALLOC_IF (old_regend,	 num_regs, const char *);
1845      RETALLOC_IF (best_regstart, num_regs, const char *);
1846      RETALLOC_IF (best_regend,	 num_regs, const char *);
1847      RETALLOC_IF (reg_info,	 num_regs, register_info_type);
1848      RETALLOC_IF (reg_dummy,	 num_regs, const char *);
1849      RETALLOC_IF (reg_info_dummy, num_regs, register_info_type);
1850
1851      regs_allocated_size = num_regs;
1852    }
1853}
1854
1855#endif /* not MATCH_MAY_ALLOCATE */
1856
1857static boolean group_in_compile_stack _RE_ARGS ((compile_stack_type
1858						 compile_stack,
1859						 regnum_t regnum));
1860
1861/* `regex_compile' compiles PATTERN (of length SIZE) according to SYNTAX.
1862   Returns one of error codes defined in `regex.h', or zero for success.
1863
1864   Assumes the `allocated' (and perhaps `buffer') and `translate'
1865   fields are set in BUFP on entry.
1866
1867   If it succeeds, results are put in BUFP (if it returns an error, the
1868   contents of BUFP are undefined):
1869     `buffer' is the compiled pattern;
1870     `syntax' is set to SYNTAX;
1871     `used' is set to the length of the compiled pattern;
1872     `fastmap_accurate' is zero;
1873     `re_nsub' is the number of subexpressions in PATTERN;
1874     `not_bol' and `not_eol' are zero;
1875
1876   The `fastmap' and `newline_anchor' fields are neither
1877   examined nor set.  */
1878
1879/* Return, freeing storage we allocated.  */
1880#define FREE_STACK_RETURN(value)		\
1881  return (free (compile_stack.stack), value)
1882
1883static reg_errcode_t
1884regex_compile (pattern, size, syntax, bufp)
1885     const char *pattern;
1886     size_t size;
1887     reg_syntax_t syntax;
1888     struct re_pattern_buffer *bufp;
1889{
1890  /* We fetch characters from PATTERN here.  Even though PATTERN is
1891     `char *' (i.e., signed), we declare these variables as unsigned, so
1892     they can be reliably used as array indices.  */
1893  register unsigned char c, c1;
1894
1895  /* A random temporary spot in PATTERN.  */
1896  const char *p1;
1897
1898  /* Points to the end of the buffer, where we should append.  */
1899  register unsigned char *b;
1900
1901  /* Keeps track of unclosed groups.  */
1902  compile_stack_type compile_stack;
1903
1904  /* Points to the current (ending) position in the pattern.  */
1905  const char *p = pattern;
1906  const char *pend = pattern + size;
1907
1908  /* How to translate the characters in the pattern.  */
1909  RE_TRANSLATE_TYPE translate = bufp->translate;
1910
1911  /* Address of the count-byte of the most recently inserted `exactn'
1912     command.  This makes it possible to tell if a new exact-match
1913     character can be added to that command or if the character requires
1914     a new `exactn' command.  */
1915  unsigned char *pending_exact = 0;
1916
1917  /* Address of start of the most recently finished expression.
1918     This tells, e.g., postfix * where to find the start of its
1919     operand.  Reset at the beginning of groups and alternatives.  */
1920  unsigned char *laststart = 0;
1921
1922  /* Address of beginning of regexp, or inside of last group.  */
1923  unsigned char *begalt;
1924
1925  /* Place in the uncompiled pattern (i.e., the {) to
1926     which to go back if the interval is invalid.  */
1927  const char *beg_interval;
1928
1929  /* Address of the place where a forward jump should go to the end of
1930     the containing expression.  Each alternative of an `or' -- except the
1931     last -- ends with a forward jump of this sort.  */
1932  unsigned char *fixup_alt_jump = 0;
1933
1934  /* Counts open-groups as they are encountered.  Remembered for the
1935     matching close-group on the compile stack, so the same register
1936     number is put in the stop_memory as the start_memory.  */
1937  regnum_t regnum = 0;
1938
1939#ifdef DEBUG
1940  DEBUG_PRINT1 ("\nCompiling pattern: ");
1941  if (debug)
1942    {
1943      unsigned debug_count;
1944
1945      for (debug_count = 0; debug_count < size; debug_count++)
1946        putchar (pattern[debug_count]);
1947      putchar ('\n');
1948    }
1949#endif /* DEBUG */
1950
1951  /* Initialize the compile stack.  */
1952  compile_stack.stack = TALLOC (INIT_COMPILE_STACK_SIZE, compile_stack_elt_t);
1953  if (compile_stack.stack == NULL)
1954    return REG_ESPACE;
1955
1956  compile_stack.size = INIT_COMPILE_STACK_SIZE;
1957  compile_stack.avail = 0;
1958
1959  /* Initialize the pattern buffer.  */
1960  bufp->syntax = syntax;
1961  bufp->fastmap_accurate = 0;
1962  bufp->not_bol = bufp->not_eol = 0;
1963
1964  /* Set `used' to zero, so that if we return an error, the pattern
1965     printer (for debugging) will think there's no pattern.  We reset it
1966     at the end.  */
1967  bufp->used = 0;
1968
1969  /* Always count groups, whether or not bufp->no_sub is set.  */
1970  bufp->re_nsub = 0;
1971
1972#if !defined emacs && !defined SYNTAX_TABLE
1973  /* Initialize the syntax table.  */
1974   init_syntax_once ();
1975#endif
1976
1977  if (bufp->allocated == 0)
1978    {
1979      if (bufp->buffer)
1980	{ /* If zero allocated, but buffer is non-null, try to realloc
1981             enough space.  This loses if buffer's address is bogus, but
1982             that is the user's responsibility.  */
1983          RETALLOC (bufp->buffer, INIT_BUF_SIZE, unsigned char);
1984        }
1985      else
1986        { /* Caller did not allocate a buffer.  Do it for them.  */
1987          bufp->buffer = TALLOC (INIT_BUF_SIZE, unsigned char);
1988        }
1989      if (!bufp->buffer) FREE_STACK_RETURN (REG_ESPACE);
1990
1991      bufp->allocated = INIT_BUF_SIZE;
1992    }
1993
1994  begalt = b = bufp->buffer;
1995
1996  /* Loop through the uncompiled pattern until we're at the end.  */
1997  while (p != pend)
1998    {
1999      PATFETCH (c);
2000
2001      switch (c)
2002        {
2003        case '^':
2004          {
2005            if (   /* If at start of pattern, it's an operator.  */
2006                   p == pattern + 1
2007                   /* If context independent, it's an operator.  */
2008                || syntax & RE_CONTEXT_INDEP_ANCHORS
2009                   /* Otherwise, depends on what's come before.  */
2010                || at_begline_loc_p (pattern, p, syntax))
2011              BUF_PUSH (begline);
2012            else
2013              goto normal_char;
2014          }
2015          break;
2016
2017
2018        case '$':
2019          {
2020            if (   /* If at end of pattern, it's an operator.  */
2021                   p == pend
2022                   /* If context independent, it's an operator.  */
2023                || syntax & RE_CONTEXT_INDEP_ANCHORS
2024                   /* Otherwise, depends on what's next.  */
2025                || at_endline_loc_p (p, pend, syntax))
2026               BUF_PUSH (endline);
2027             else
2028               goto normal_char;
2029           }
2030           break;
2031
2032
2033	case '+':
2034        case '?':
2035          if ((syntax & RE_BK_PLUS_QM)
2036              || (syntax & RE_LIMITED_OPS))
2037            goto normal_char;
2038        handle_plus:
2039        case '*':
2040          /* If there is no previous pattern... */
2041          if (!laststart)
2042            {
2043              if (syntax & RE_CONTEXT_INVALID_OPS)
2044                FREE_STACK_RETURN (REG_BADRPT);
2045              else if (!(syntax & RE_CONTEXT_INDEP_OPS))
2046                goto normal_char;
2047            }
2048
2049          {
2050            /* Are we optimizing this jump?  */
2051            boolean keep_string_p = false;
2052
2053            /* 1 means zero (many) matches is allowed.  */
2054            char zero_times_ok = 0, many_times_ok = 0;
2055
2056            /* If there is a sequence of repetition chars, collapse it
2057               down to just one (the right one).  We can't combine
2058               interval operators with these because of, e.g., `a{2}*',
2059               which should only match an even number of `a's.  */
2060
2061            for (;;)
2062              {
2063                zero_times_ok |= c != '+';
2064                many_times_ok |= c != '?';
2065
2066                if (p == pend)
2067                  break;
2068
2069                PATFETCH (c);
2070
2071                if (c == '*'
2072                    || (!(syntax & RE_BK_PLUS_QM) && (c == '+' || c == '?')))
2073                  ;
2074
2075                else if (syntax & RE_BK_PLUS_QM  &&  c == '\\')
2076                  {
2077                    if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
2078
2079                    PATFETCH (c1);
2080                    if (!(c1 == '+' || c1 == '?'))
2081                      {
2082                        PATUNFETCH;
2083                        PATUNFETCH;
2084                        break;
2085                      }
2086
2087                    c = c1;
2088                  }
2089                else
2090                  {
2091                    PATUNFETCH;
2092                    break;
2093                  }
2094
2095                /* If we get here, we found another repeat character.  */
2096               }
2097
2098            /* Star, etc. applied to an empty pattern is equivalent
2099               to an empty pattern.  */
2100            if (!laststart)
2101              break;
2102
2103            /* Now we know whether or not zero matches is allowed
2104               and also whether or not two or more matches is allowed.  */
2105            if (many_times_ok)
2106              { /* More than one repetition is allowed, so put in at the
2107                   end a backward relative jump from `b' to before the next
2108                   jump we're going to put in below (which jumps from
2109                   laststart to after this jump).
2110
2111                   But if we are at the `*' in the exact sequence `.*\n',
2112                   insert an unconditional jump backwards to the .,
2113                   instead of the beginning of the loop.  This way we only
2114                   push a failure point once, instead of every time
2115                   through the loop.  */
2116                assert (p - 1 > pattern);
2117
2118                /* Allocate the space for the jump.  */
2119                GET_BUFFER_SPACE (3);
2120
2121                /* We know we are not at the first character of the pattern,
2122                   because laststart was nonzero.  And we've already
2123                   incremented `p', by the way, to be the character after
2124                   the `*'.  Do we have to do something analogous here
2125                   for null bytes, because of RE_DOT_NOT_NULL?  */
2126                if (TRANSLATE (*(p - 2)) == TRANSLATE ('.')
2127		    && zero_times_ok
2128                    && p < pend && TRANSLATE (*p) == TRANSLATE ('\n')
2129                    && !(syntax & RE_DOT_NEWLINE))
2130                  { /* We have .*\n.  */
2131                    STORE_JUMP (jump, b, laststart);
2132                    keep_string_p = true;
2133                  }
2134                else
2135                  /* Anything else.  */
2136                  STORE_JUMP (maybe_pop_jump, b, laststart - 3);
2137
2138                /* We've added more stuff to the buffer.  */
2139                b += 3;
2140              }
2141
2142            /* On failure, jump from laststart to b + 3, which will be the
2143               end of the buffer after this jump is inserted.  */
2144            GET_BUFFER_SPACE (3);
2145            INSERT_JUMP (keep_string_p ? on_failure_keep_string_jump
2146                                       : on_failure_jump,
2147                         laststart, b + 3);
2148            pending_exact = 0;
2149            b += 3;
2150
2151            if (!zero_times_ok)
2152              {
2153                /* At least one repetition is required, so insert a
2154                   `dummy_failure_jump' before the initial
2155                   `on_failure_jump' instruction of the loop. This
2156                   effects a skip over that instruction the first time
2157                   we hit that loop.  */
2158                GET_BUFFER_SPACE (3);
2159                INSERT_JUMP (dummy_failure_jump, laststart, laststart + 6);
2160                b += 3;
2161              }
2162            }
2163	  break;
2164
2165
2166	case '.':
2167          laststart = b;
2168          BUF_PUSH (anychar);
2169          break;
2170
2171
2172        case '[':
2173          {
2174            boolean had_char_class = false;
2175
2176            if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
2177
2178            /* Ensure that we have enough space to push a charset: the
2179               opcode, the length count, and the bitset; 34 bytes in all.  */
2180	    GET_BUFFER_SPACE (34);
2181
2182            laststart = b;
2183
2184            /* We test `*p == '^' twice, instead of using an if
2185               statement, so we only need one BUF_PUSH.  */
2186            BUF_PUSH (*p == '^' ? charset_not : charset);
2187            if (*p == '^')
2188              p++;
2189
2190            /* Remember the first position in the bracket expression.  */
2191            p1 = p;
2192
2193            /* Push the number of bytes in the bitmap.  */
2194            BUF_PUSH ((1 << BYTEWIDTH) / BYTEWIDTH);
2195
2196            /* Clear the whole map.  */
2197            bzero (b, (1 << BYTEWIDTH) / BYTEWIDTH);
2198
2199            /* charset_not matches newline according to a syntax bit.  */
2200            if ((re_opcode_t) b[-2] == charset_not
2201                && (syntax & RE_HAT_LISTS_NOT_NEWLINE))
2202              SET_LIST_BIT ('\n');
2203
2204            /* Read in characters and ranges, setting map bits.  */
2205            for (;;)
2206              {
2207                if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
2208
2209                PATFETCH (c);
2210
2211                /* \ might escape characters inside [...] and [^...].  */
2212                if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\')
2213                  {
2214                    if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
2215
2216                    PATFETCH (c1);
2217                    SET_LIST_BIT (c1);
2218                    continue;
2219                  }
2220
2221                /* Could be the end of the bracket expression.  If it's
2222                   not (i.e., when the bracket expression is `[]' so
2223                   far), the ']' character bit gets set way below.  */
2224                if (c == ']' && p != p1 + 1)
2225                  break;
2226
2227                /* Look ahead to see if it's a range when the last thing
2228                   was a character class.  */
2229                if (had_char_class && c == '-' && *p != ']')
2230                  FREE_STACK_RETURN (REG_ERANGE);
2231
2232                /* Look ahead to see if it's a range when the last thing
2233                   was a character: if this is a hyphen not at the
2234                   beginning or the end of a list, then it's the range
2235                   operator.  */
2236                if (c == '-'
2237                    && !(p - 2 >= pattern && p[-2] == '[')
2238                    && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^')
2239                    && *p != ']')
2240                  {
2241                    reg_errcode_t ret
2242                      = compile_range (&p, pend, translate, syntax, b);
2243                    if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
2244                  }
2245
2246                else if (p[0] == '-' && p[1] != ']')
2247                  { /* This handles ranges made up of characters only.  */
2248                    reg_errcode_t ret;
2249
2250		    /* Move past the `-'.  */
2251                    PATFETCH (c1);
2252
2253                    ret = compile_range (&p, pend, translate, syntax, b);
2254                    if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
2255                  }
2256
2257                /* See if we're at the beginning of a possible character
2258                   class.  */
2259
2260                else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':')
2261                  { /* Leave room for the null.  */
2262                    char str[CHAR_CLASS_MAX_LENGTH + 1];
2263
2264                    PATFETCH (c);
2265                    c1 = 0;
2266
2267                    /* If pattern is `[[:'.  */
2268                    if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
2269
2270                    for (;;)
2271                      {
2272                        PATFETCH (c);
2273                        if ((c == ':' && *p == ']') || p == pend)
2274                          break;
2275			if (c1 < CHAR_CLASS_MAX_LENGTH)
2276			  str[c1++] = c;
2277			else
2278			  /* This is in any case an invalid class name.  */
2279			  str[0] = '\0';
2280                      }
2281                    str[c1] = '\0';
2282
2283                    /* If isn't a word bracketed by `[:' and `:]':
2284                       undo the ending character, the letters, and leave
2285                       the leading `:' and `[' (but set bits for them).  */
2286                    if (c == ':' && *p == ']')
2287                      {
2288#if defined _LIBC || WIDE_CHAR_SUPPORT
2289                        boolean is_lower = STREQ (str, "lower");
2290                        boolean is_upper = STREQ (str, "upper");
2291			wctype_t wt;
2292                        int ch;
2293
2294			wt = IS_CHAR_CLASS (str);
2295			if (wt == 0)
2296			  FREE_STACK_RETURN (REG_ECTYPE);
2297
2298                        /* Throw away the ] at the end of the character
2299                           class.  */
2300                        PATFETCH (c);
2301
2302                        if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
2303
2304                        for (ch = 0; ch < 1 << BYTEWIDTH; ++ch)
2305			  {
2306# ifdef _LIBC
2307			    if (__iswctype (__btowc (ch), wt))
2308			      SET_LIST_BIT (ch);
2309# else
2310			    if (iswctype (btowc (ch), wt))
2311			      SET_LIST_BIT (ch);
2312# endif
2313
2314			    if (translate && (is_upper || is_lower)
2315				&& (ISUPPER (ch) || ISLOWER (ch)))
2316			      SET_LIST_BIT (ch);
2317			  }
2318
2319                        had_char_class = true;
2320#else
2321                        int ch;
2322                        boolean is_alnum = STREQ (str, "alnum");
2323                        boolean is_alpha = STREQ (str, "alpha");
2324                        boolean is_blank = STREQ (str, "blank");
2325                        boolean is_cntrl = STREQ (str, "cntrl");
2326                        boolean is_digit = STREQ (str, "digit");
2327                        boolean is_graph = STREQ (str, "graph");
2328                        boolean is_lower = STREQ (str, "lower");
2329                        boolean is_print = STREQ (str, "print");
2330                        boolean is_punct = STREQ (str, "punct");
2331                        boolean is_space = STREQ (str, "space");
2332                        boolean is_upper = STREQ (str, "upper");
2333                        boolean is_xdigit = STREQ (str, "xdigit");
2334
2335                        if (!IS_CHAR_CLASS (str))
2336			  FREE_STACK_RETURN (REG_ECTYPE);
2337
2338                        /* Throw away the ] at the end of the character
2339                           class.  */
2340                        PATFETCH (c);
2341
2342                        if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
2343
2344                        for (ch = 0; ch < 1 << BYTEWIDTH; ch++)
2345                          {
2346			    /* This was split into 3 if's to
2347			       avoid an arbitrary limit in some compiler.  */
2348                            if (   (is_alnum  && ISALNUM (ch))
2349                                || (is_alpha  && ISALPHA (ch))
2350                                || (is_blank  && ISBLANK (ch))
2351                                || (is_cntrl  && ISCNTRL (ch)))
2352			      SET_LIST_BIT (ch);
2353			    if (   (is_digit  && ISDIGIT (ch))
2354                                || (is_graph  && ISGRAPH (ch))
2355                                || (is_lower  && ISLOWER (ch))
2356                                || (is_print  && ISPRINT (ch)))
2357			      SET_LIST_BIT (ch);
2358			    if (   (is_punct  && ISPUNCT (ch))
2359                                || (is_space  && ISSPACE (ch))
2360                                || (is_upper  && ISUPPER (ch))
2361                                || (is_xdigit && ISXDIGIT (ch)))
2362			      SET_LIST_BIT (ch);
2363			    if (   translate && (is_upper || is_lower)
2364				&& (ISUPPER (ch) || ISLOWER (ch)))
2365			      SET_LIST_BIT (ch);
2366                          }
2367                        had_char_class = true;
2368#endif	/* libc || wctype.h */
2369                      }
2370                    else
2371                      {
2372                        c1++;
2373                        while (c1--)
2374                          PATUNFETCH;
2375                        SET_LIST_BIT ('[');
2376                        SET_LIST_BIT (':');
2377                        had_char_class = false;
2378                      }
2379                  }
2380                else
2381                  {
2382                    had_char_class = false;
2383                    SET_LIST_BIT (c);
2384                  }
2385              }
2386
2387            /* Discard any (non)matching list bytes that are all 0 at the
2388               end of the map.  Decrease the map-length byte too.  */
2389            while ((int) b[-1] > 0 && b[b[-1] - 1] == 0)
2390              b[-1]--;
2391            b += b[-1];
2392          }
2393          break;
2394
2395
2396	case '(':
2397          if (syntax & RE_NO_BK_PARENS)
2398            goto handle_open;
2399          else
2400            goto normal_char;
2401
2402
2403        case ')':
2404          if (syntax & RE_NO_BK_PARENS)
2405            goto handle_close;
2406          else
2407            goto normal_char;
2408
2409
2410        case '\n':
2411          if (syntax & RE_NEWLINE_ALT)
2412            goto handle_alt;
2413          else
2414            goto normal_char;
2415
2416
2417	case '|':
2418          if (syntax & RE_NO_BK_VBAR)
2419            goto handle_alt;
2420          else
2421            goto normal_char;
2422
2423
2424        case '{':
2425           if (syntax & RE_INTERVALS && syntax & RE_NO_BK_BRACES)
2426             goto handle_interval;
2427           else
2428             goto normal_char;
2429
2430
2431        case '\\':
2432          if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
2433
2434          /* Do not translate the character after the \, so that we can
2435             distinguish, e.g., \B from \b, even if we normally would
2436             translate, e.g., B to b.  */
2437          PATFETCH_RAW (c);
2438
2439          switch (c)
2440            {
2441            case '(':
2442              if (syntax & RE_NO_BK_PARENS)
2443                goto normal_backslash;
2444
2445            handle_open:
2446              bufp->re_nsub++;
2447              regnum++;
2448
2449              if (COMPILE_STACK_FULL)
2450                {
2451                  RETALLOC (compile_stack.stack, compile_stack.size << 1,
2452                            compile_stack_elt_t);
2453                  if (compile_stack.stack == NULL) return REG_ESPACE;
2454
2455                  compile_stack.size <<= 1;
2456                }
2457
2458              /* These are the values to restore when we hit end of this
2459                 group.  They are all relative offsets, so that if the
2460                 whole pattern moves because of realloc, they will still
2461                 be valid.  */
2462              COMPILE_STACK_TOP.begalt_offset = begalt - bufp->buffer;
2463              COMPILE_STACK_TOP.fixup_alt_jump
2464                = fixup_alt_jump ? fixup_alt_jump - bufp->buffer + 1 : 0;
2465              COMPILE_STACK_TOP.laststart_offset = b - bufp->buffer;
2466              COMPILE_STACK_TOP.regnum = regnum;
2467
2468              /* We will eventually replace the 0 with the number of
2469                 groups inner to this one.  But do not push a
2470                 start_memory for groups beyond the last one we can
2471                 represent in the compiled pattern.  */
2472              if (regnum <= MAX_REGNUM)
2473                {
2474                  COMPILE_STACK_TOP.inner_group_offset = b - bufp->buffer + 2;
2475                  BUF_PUSH_3 (start_memory, regnum, 0);
2476                }
2477
2478              compile_stack.avail++;
2479
2480              fixup_alt_jump = 0;
2481              laststart = 0;
2482              begalt = b;
2483	      /* If we've reached MAX_REGNUM groups, then this open
2484		 won't actually generate any code, so we'll have to
2485		 clear pending_exact explicitly.  */
2486	      pending_exact = 0;
2487              break;
2488
2489
2490            case ')':
2491              if (syntax & RE_NO_BK_PARENS) goto normal_backslash;
2492
2493              if (COMPILE_STACK_EMPTY)
2494		{
2495		  if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
2496		    goto normal_backslash;
2497		  else
2498		    FREE_STACK_RETURN (REG_ERPAREN);
2499		}
2500
2501            handle_close:
2502              if (fixup_alt_jump)
2503                { /* Push a dummy failure point at the end of the
2504                     alternative for a possible future
2505                     `pop_failure_jump' to pop.  See comments at
2506                     `push_dummy_failure' in `re_match_2'.  */
2507                  BUF_PUSH (push_dummy_failure);
2508
2509                  /* We allocated space for this jump when we assigned
2510                     to `fixup_alt_jump', in the `handle_alt' case below.  */
2511                  STORE_JUMP (jump_past_alt, fixup_alt_jump, b - 1);
2512                }
2513
2514              /* See similar code for backslashed left paren above.  */
2515              if (COMPILE_STACK_EMPTY)
2516		{
2517		  if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
2518		    goto normal_char;
2519		  else
2520		    FREE_STACK_RETURN (REG_ERPAREN);
2521		}
2522
2523              /* Since we just checked for an empty stack above, this
2524                 ``can't happen''.  */
2525              assert (compile_stack.avail != 0);
2526              {
2527                /* We don't just want to restore into `regnum', because
2528                   later groups should continue to be numbered higher,
2529                   as in `(ab)c(de)' -- the second group is #2.  */
2530                regnum_t this_group_regnum;
2531
2532                compile_stack.avail--;
2533                begalt = bufp->buffer + COMPILE_STACK_TOP.begalt_offset;
2534                fixup_alt_jump
2535                  = COMPILE_STACK_TOP.fixup_alt_jump
2536                    ? bufp->buffer + COMPILE_STACK_TOP.fixup_alt_jump - 1
2537                    : 0;
2538                laststart = bufp->buffer + COMPILE_STACK_TOP.laststart_offset;
2539                this_group_regnum = COMPILE_STACK_TOP.regnum;
2540		/* If we've reached MAX_REGNUM groups, then this open
2541		   won't actually generate any code, so we'll have to
2542		   clear pending_exact explicitly.  */
2543		pending_exact = 0;
2544
2545                /* We're at the end of the group, so now we know how many
2546                   groups were inside this one.  */
2547                if (this_group_regnum <= MAX_REGNUM)
2548                  {
2549                    unsigned char *inner_group_loc
2550                      = bufp->buffer + COMPILE_STACK_TOP.inner_group_offset;
2551
2552                    *inner_group_loc = regnum - this_group_regnum;
2553                    BUF_PUSH_3 (stop_memory, this_group_regnum,
2554                                regnum - this_group_regnum);
2555                  }
2556              }
2557              break;
2558
2559
2560            case '|':					/* `\|'.  */
2561              if (syntax & RE_LIMITED_OPS || syntax & RE_NO_BK_VBAR)
2562                goto normal_backslash;
2563            handle_alt:
2564              if (syntax & RE_LIMITED_OPS)
2565                goto normal_char;
2566
2567              /* Insert before the previous alternative a jump which
2568                 jumps to this alternative if the former fails.  */
2569              GET_BUFFER_SPACE (3);
2570              INSERT_JUMP (on_failure_jump, begalt, b + 6);
2571              pending_exact = 0;
2572              b += 3;
2573
2574              /* The alternative before this one has a jump after it
2575                 which gets executed if it gets matched.  Adjust that
2576                 jump so it will jump to this alternative's analogous
2577                 jump (put in below, which in turn will jump to the next
2578                 (if any) alternative's such jump, etc.).  The last such
2579                 jump jumps to the correct final destination.  A picture:
2580                          _____ _____
2581                          |   | |   |
2582                          |   v |   v
2583                         a | b   | c
2584
2585                 If we are at `b', then fixup_alt_jump right now points to a
2586                 three-byte space after `a'.  We'll put in the jump, set
2587                 fixup_alt_jump to right after `b', and leave behind three
2588                 bytes which we'll fill in when we get to after `c'.  */
2589
2590              if (fixup_alt_jump)
2591                STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
2592
2593              /* Mark and leave space for a jump after this alternative,
2594                 to be filled in later either by next alternative or
2595                 when know we're at the end of a series of alternatives.  */
2596              fixup_alt_jump = b;
2597              GET_BUFFER_SPACE (3);
2598              b += 3;
2599
2600              laststart = 0;
2601              begalt = b;
2602              break;
2603
2604
2605            case '{':
2606              /* If \{ is a literal.  */
2607              if (!(syntax & RE_INTERVALS)
2608                     /* If we're at `\{' and it's not the open-interval
2609                        operator.  */
2610                  || ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES))
2611                  || (p - 2 == pattern  &&  p == pend))
2612                goto normal_backslash;
2613
2614            handle_interval:
2615              {
2616                /* If got here, then the syntax allows intervals.  */
2617
2618                /* At least (most) this many matches must be made.  */
2619                int lower_bound = -1, upper_bound = -1;
2620
2621                beg_interval = p - 1;
2622
2623                if (p == pend)
2624                  {
2625                    if (syntax & RE_NO_BK_BRACES)
2626                      goto unfetch_interval;
2627                    else
2628                      FREE_STACK_RETURN (REG_EBRACE);
2629                  }
2630
2631                GET_UNSIGNED_NUMBER (lower_bound);
2632
2633                if (c == ',')
2634                  {
2635                    GET_UNSIGNED_NUMBER (upper_bound);
2636                    if (upper_bound < 0) upper_bound = RE_DUP_MAX;
2637                  }
2638                else
2639                  /* Interval such as `{1}' => match exactly once. */
2640                  upper_bound = lower_bound;
2641
2642                if (lower_bound < 0 || upper_bound > RE_DUP_MAX
2643                    || lower_bound > upper_bound)
2644                  {
2645                    if (syntax & RE_NO_BK_BRACES)
2646                      goto unfetch_interval;
2647                    else
2648                      FREE_STACK_RETURN (REG_BADBR);
2649                  }
2650
2651                if (!(syntax & RE_NO_BK_BRACES))
2652                  {
2653                    if (c != '\\') FREE_STACK_RETURN (REG_EBRACE);
2654
2655                    PATFETCH (c);
2656                  }
2657
2658                if (c != '}')
2659                  {
2660                    if (syntax & RE_NO_BK_BRACES)
2661                      goto unfetch_interval;
2662                    else
2663                      FREE_STACK_RETURN (REG_BADBR);
2664                  }
2665
2666                /* We just parsed a valid interval.  */
2667
2668                /* If it's invalid to have no preceding re.  */
2669                if (!laststart)
2670                  {
2671                    if (syntax & RE_CONTEXT_INVALID_OPS)
2672                      FREE_STACK_RETURN (REG_BADRPT);
2673                    else if (syntax & RE_CONTEXT_INDEP_OPS)
2674                      laststart = b;
2675                    else
2676                      goto unfetch_interval;
2677                  }
2678
2679                /* If the upper bound is zero, don't want to succeed at
2680                   all; jump from `laststart' to `b + 3', which will be
2681                   the end of the buffer after we insert the jump.  */
2682                 if (upper_bound == 0)
2683                   {
2684                     GET_BUFFER_SPACE (3);
2685                     INSERT_JUMP (jump, laststart, b + 3);
2686                     b += 3;
2687                   }
2688
2689                 /* Otherwise, we have a nontrivial interval.  When
2690                    we're all done, the pattern will look like:
2691                      set_number_at <jump count> <upper bound>
2692                      set_number_at <succeed_n count> <lower bound>
2693                      succeed_n <after jump addr> <succeed_n count>
2694                      <body of loop>
2695                      jump_n <succeed_n addr> <jump count>
2696                    (The upper bound and `jump_n' are omitted if
2697                    `upper_bound' is 1, though.)  */
2698                 else
2699                   { /* If the upper bound is > 1, we need to insert
2700                        more at the end of the loop.  */
2701                     unsigned nbytes = 10 + (upper_bound > 1) * 10;
2702
2703                     GET_BUFFER_SPACE (nbytes);
2704
2705                     /* Initialize lower bound of the `succeed_n', even
2706                        though it will be set during matching by its
2707                        attendant `set_number_at' (inserted next),
2708                        because `re_compile_fastmap' needs to know.
2709                        Jump to the `jump_n' we might insert below.  */
2710                     INSERT_JUMP2 (succeed_n, laststart,
2711                                   b + 5 + (upper_bound > 1) * 5,
2712                                   lower_bound);
2713                     b += 5;
2714
2715                     /* Code to initialize the lower bound.  Insert
2716                        before the `succeed_n'.  The `5' is the last two
2717                        bytes of this `set_number_at', plus 3 bytes of
2718                        the following `succeed_n'.  */
2719                     insert_op2 (set_number_at, laststart, 5, lower_bound, b);
2720                     b += 5;
2721
2722                     if (upper_bound > 1)
2723                       { /* More than one repetition is allowed, so
2724                            append a backward jump to the `succeed_n'
2725                            that starts this interval.
2726
2727                            When we've reached this during matching,
2728                            we'll have matched the interval once, so
2729                            jump back only `upper_bound - 1' times.  */
2730                         STORE_JUMP2 (jump_n, b, laststart + 5,
2731                                      upper_bound - 1);
2732                         b += 5;
2733
2734                         /* The location we want to set is the second
2735                            parameter of the `jump_n'; that is `b-2' as
2736                            an absolute address.  `laststart' will be
2737                            the `set_number_at' we're about to insert;
2738                            `laststart+3' the number to set, the source
2739                            for the relative address.  But we are
2740                            inserting into the middle of the pattern --
2741                            so everything is getting moved up by 5.
2742                            Conclusion: (b - 2) - (laststart + 3) + 5,
2743                            i.e., b - laststart.
2744
2745                            We insert this at the beginning of the loop
2746                            so that if we fail during matching, we'll
2747                            reinitialize the bounds.  */
2748                         insert_op2 (set_number_at, laststart, b - laststart,
2749                                     upper_bound - 1, b);
2750                         b += 5;
2751                       }
2752                   }
2753                pending_exact = 0;
2754                beg_interval = NULL;
2755              }
2756              break;
2757
2758            unfetch_interval:
2759              /* If an invalid interval, match the characters as literals.  */
2760               assert (beg_interval);
2761               p = beg_interval;
2762               beg_interval = NULL;
2763
2764               /* normal_char and normal_backslash need `c'.  */
2765               PATFETCH (c);
2766
2767               if (!(syntax & RE_NO_BK_BRACES))
2768                 {
2769                   if (p > pattern  &&  p[-1] == '\\')
2770                     goto normal_backslash;
2771                 }
2772               goto normal_char;
2773
2774#ifdef emacs
2775            /* There is no way to specify the before_dot and after_dot
2776               operators.  rms says this is ok.  --karl  */
2777            case '=':
2778              BUF_PUSH (at_dot);
2779              break;
2780
2781            case 's':
2782              laststart = b;
2783              PATFETCH (c);
2784              BUF_PUSH_2 (syntaxspec, syntax_spec_code[c]);
2785              break;
2786
2787            case 'S':
2788              laststart = b;
2789              PATFETCH (c);
2790              BUF_PUSH_2 (notsyntaxspec, syntax_spec_code[c]);
2791              break;
2792#endif /* emacs */
2793
2794
2795            case 'w':
2796	      if (syntax & RE_NO_GNU_OPS)
2797		goto normal_char;
2798              laststart = b;
2799              BUF_PUSH (wordchar);
2800              break;
2801
2802
2803            case 'W':
2804	      if (syntax & RE_NO_GNU_OPS)
2805		goto normal_char;
2806              laststart = b;
2807              BUF_PUSH (notwordchar);
2808              break;
2809
2810
2811            case '<':
2812	      if (syntax & RE_NO_GNU_OPS)
2813		goto normal_char;
2814              BUF_PUSH (wordbeg);
2815              break;
2816
2817            case '>':
2818	      if (syntax & RE_NO_GNU_OPS)
2819		goto normal_char;
2820              BUF_PUSH (wordend);
2821              break;
2822
2823            case 'b':
2824	      if (syntax & RE_NO_GNU_OPS)
2825		goto normal_char;
2826              BUF_PUSH (wordbound);
2827              break;
2828
2829            case 'B':
2830	      if (syntax & RE_NO_GNU_OPS)
2831		goto normal_char;
2832              BUF_PUSH (notwordbound);
2833              break;
2834
2835            case '`':
2836	      if (syntax & RE_NO_GNU_OPS)
2837		goto normal_char;
2838              BUF_PUSH (begbuf);
2839              break;
2840
2841            case '\'':
2842	      if (syntax & RE_NO_GNU_OPS)
2843		goto normal_char;
2844              BUF_PUSH (endbuf);
2845              break;
2846
2847            case '1': case '2': case '3': case '4': case '5':
2848            case '6': case '7': case '8': case '9':
2849              if (syntax & RE_NO_BK_REFS)
2850                goto normal_char;
2851
2852              c1 = c - '0';
2853
2854              if (c1 > regnum)
2855                FREE_STACK_RETURN (REG_ESUBREG);
2856
2857              /* Can't back reference to a subexpression if inside of it.  */
2858              if (group_in_compile_stack (compile_stack, (regnum_t) c1))
2859                goto normal_char;
2860
2861              laststart = b;
2862              BUF_PUSH_2 (duplicate, c1);
2863              break;
2864
2865
2866            case '+':
2867            case '?':
2868              if (syntax & RE_BK_PLUS_QM)
2869                goto handle_plus;
2870              else
2871                goto normal_backslash;
2872
2873            default:
2874            normal_backslash:
2875              /* You might think it would be useful for \ to mean
2876                 not to translate; but if we don't translate it
2877                 it will never match anything.  */
2878              c = TRANSLATE (c);
2879              goto normal_char;
2880            }
2881          break;
2882
2883
2884	default:
2885        /* Expects the character in `c'.  */
2886	normal_char:
2887	      /* If no exactn currently being built.  */
2888          if (!pending_exact
2889
2890              /* If last exactn not at current position.  */
2891              || pending_exact + *pending_exact + 1 != b
2892
2893              /* We have only one byte following the exactn for the count.  */
2894	      || *pending_exact == (1 << BYTEWIDTH) - 1
2895
2896              /* If followed by a repetition operator.  */
2897              || *p == '*' || *p == '^'
2898	      || ((syntax & RE_BK_PLUS_QM)
2899		  ? *p == '\\' && (p[1] == '+' || p[1] == '?')
2900		  : (*p == '+' || *p == '?'))
2901	      || ((syntax & RE_INTERVALS)
2902                  && ((syntax & RE_NO_BK_BRACES)
2903		      ? *p == '{'
2904                      : (p[0] == '\\' && p[1] == '{'))))
2905	    {
2906	      /* Start building a new exactn.  */
2907
2908              laststart = b;
2909
2910	      BUF_PUSH_2 (exactn, 0);
2911	      pending_exact = b - 1;
2912            }
2913
2914	  BUF_PUSH (c);
2915          (*pending_exact)++;
2916	  break;
2917        } /* switch (c) */
2918    } /* while p != pend */
2919
2920
2921  /* Through the pattern now.  */
2922
2923  if (fixup_alt_jump)
2924    STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
2925
2926  if (!COMPILE_STACK_EMPTY)
2927    FREE_STACK_RETURN (REG_EPAREN);
2928
2929  /* If we don't want backtracking, force success
2930     the first time we reach the end of the compiled pattern.  */
2931  if (syntax & RE_NO_POSIX_BACKTRACKING)
2932    BUF_PUSH (succeed);
2933
2934  free (compile_stack.stack);
2935
2936  /* We have succeeded; set the length of the buffer.  */
2937  bufp->used = b - bufp->buffer;
2938
2939#ifdef DEBUG
2940  if (debug)
2941    {
2942      DEBUG_PRINT1 ("\nCompiled pattern: \n");
2943      print_compiled_pattern (bufp);
2944    }
2945#endif /* DEBUG */
2946
2947#ifndef MATCH_MAY_ALLOCATE
2948  /* Initialize the failure stack to the largest possible stack.  This
2949     isn't necessary unless we're trying to avoid calling alloca in
2950     the search and match routines.  */
2951  {
2952    int num_regs = bufp->re_nsub + 1;
2953
2954    /* Since DOUBLE_FAIL_STACK refuses to double only if the current size
2955       is strictly greater than re_max_failures, the largest possible stack
2956       is 2 * re_max_failures failure points.  */
2957    if (fail_stack.size < (2 * re_max_failures * MAX_FAILURE_ITEMS))
2958      {
2959	fail_stack.size = (2 * re_max_failures * MAX_FAILURE_ITEMS);
2960
2961# ifdef emacs
2962	if (! fail_stack.stack)
2963	  fail_stack.stack
2964	    = (fail_stack_elt_t *) xmalloc (fail_stack.size
2965					    * sizeof (fail_stack_elt_t));
2966	else
2967	  fail_stack.stack
2968	    = (fail_stack_elt_t *) xrealloc (fail_stack.stack,
2969					     (fail_stack.size
2970					      * sizeof (fail_stack_elt_t)));
2971# else /* not emacs */
2972	if (! fail_stack.stack)
2973	  fail_stack.stack
2974	    = (fail_stack_elt_t *) malloc (fail_stack.size
2975					   * sizeof (fail_stack_elt_t));
2976	else
2977	  fail_stack.stack
2978	    = (fail_stack_elt_t *) realloc (fail_stack.stack,
2979					    (fail_stack.size
2980					     * sizeof (fail_stack_elt_t)));
2981# endif /* not emacs */
2982      }
2983
2984    regex_grow_registers (num_regs);
2985  }
2986#endif /* not MATCH_MAY_ALLOCATE */
2987
2988  return REG_NOERROR;
2989} /* regex_compile */
2990
2991/* Subroutines for `regex_compile'.  */
2992
2993/* Store OP at LOC followed by two-byte integer parameter ARG.  */
2994
2995static void
2996store_op1 (op, loc, arg)
2997    re_opcode_t op;
2998    unsigned char *loc;
2999    int arg;
3000{
3001  *loc = (unsigned char) op;
3002  STORE_NUMBER (loc + 1, arg);
3003}
3004
3005
3006/* Like `store_op1', but for two two-byte parameters ARG1 and ARG2.  */
3007
3008static void
3009store_op2 (op, loc, arg1, arg2)
3010    re_opcode_t op;
3011    unsigned char *loc;
3012    int arg1, arg2;
3013{
3014  *loc = (unsigned char) op;
3015  STORE_NUMBER (loc + 1, arg1);
3016  STORE_NUMBER (loc + 3, arg2);
3017}
3018
3019
3020/* Copy the bytes from LOC to END to open up three bytes of space at LOC
3021   for OP followed by two-byte integer parameter ARG.  */
3022
3023static void
3024insert_op1 (op, loc, arg, end)
3025    re_opcode_t op;
3026    unsigned char *loc;
3027    int arg;
3028    unsigned char *end;
3029{
3030  register unsigned char *pfrom = end;
3031  register unsigned char *pto = end + 3;
3032
3033  while (pfrom != loc)
3034    *--pto = *--pfrom;
3035
3036  store_op1 (op, loc, arg);
3037}
3038
3039
3040/* Like `insert_op1', but for two two-byte parameters ARG1 and ARG2.  */
3041
3042static void
3043insert_op2 (op, loc, arg1, arg2, end)
3044    re_opcode_t op;
3045    unsigned char *loc;
3046    int arg1, arg2;
3047    unsigned char *end;
3048{
3049  register unsigned char *pfrom = end;
3050  register unsigned char *pto = end + 5;
3051
3052  while (pfrom != loc)
3053    *--pto = *--pfrom;
3054
3055  store_op2 (op, loc, arg1, arg2);
3056}
3057
3058
3059/* P points to just after a ^ in PATTERN.  Return true if that ^ comes
3060   after an alternative or a begin-subexpression.  We assume there is at
3061   least one character before the ^.  */
3062
3063static boolean
3064at_begline_loc_p (pattern, p, syntax)
3065    const char *pattern, *p;
3066    reg_syntax_t syntax;
3067{
3068  const char *prev = p - 2;
3069  boolean prev_prev_backslash = prev > pattern && prev[-1] == '\\';
3070
3071  return
3072       /* After a subexpression?  */
3073       (*prev == '(' && (syntax & RE_NO_BK_PARENS || prev_prev_backslash))
3074       /* After an alternative?  */
3075    || (*prev == '|' && (syntax & RE_NO_BK_VBAR || prev_prev_backslash));
3076}
3077
3078
3079/* The dual of at_begline_loc_p.  This one is for $.  We assume there is
3080   at least one character after the $, i.e., `P < PEND'.  */
3081
3082static boolean
3083at_endline_loc_p (p, pend, syntax)
3084    const char *p, *pend;
3085    reg_syntax_t syntax;
3086{
3087  const char *next = p;
3088  boolean next_backslash = *next == '\\';
3089  const char *next_next = p + 1 < pend ? p + 1 : 0;
3090
3091  return
3092       /* Before a subexpression?  */
3093       (syntax & RE_NO_BK_PARENS ? *next == ')'
3094        : next_backslash && next_next && *next_next == ')')
3095       /* Before an alternative?  */
3096    || (syntax & RE_NO_BK_VBAR ? *next == '|'
3097        : next_backslash && next_next && *next_next == '|');
3098}
3099
3100
3101/* Returns true if REGNUM is in one of COMPILE_STACK's elements and
3102   false if it's not.  */
3103
3104static boolean
3105group_in_compile_stack (compile_stack, regnum)
3106    compile_stack_type compile_stack;
3107    regnum_t regnum;
3108{
3109  int this_element;
3110
3111  for (this_element = compile_stack.avail - 1;
3112       this_element >= 0;
3113       this_element--)
3114    if (compile_stack.stack[this_element].regnum == regnum)
3115      return true;
3116
3117  return false;
3118}
3119
3120
3121/* Read the ending character of a range (in a bracket expression) from the
3122   uncompiled pattern *P_PTR (which ends at PEND).  We assume the
3123   starting character is in `P[-2]'.  (`P[-1]' is the character `-'.)
3124   Then we set the translation of all bits between the starting and
3125   ending characters (inclusive) in the compiled pattern B.
3126
3127   Return an error code.
3128
3129   We use these short variable names so we can use the same macros as
3130   `regex_compile' itself.  */
3131
3132static reg_errcode_t
3133compile_range (p_ptr, pend, translate, syntax, b)
3134    const char **p_ptr, *pend;
3135    RE_TRANSLATE_TYPE translate;
3136    reg_syntax_t syntax;
3137    unsigned char *b;
3138{
3139  unsigned this_char;
3140
3141  const char *p = *p_ptr;
3142  unsigned int range_start, range_end;
3143
3144  if (p == pend)
3145    return REG_ERANGE;
3146
3147  /* Even though the pattern is a signed `char *', we need to fetch
3148     with unsigned char *'s; if the high bit of the pattern character
3149     is set, the range endpoints will be negative if we fetch using a
3150     signed char *.
3151
3152     We also want to fetch the endpoints without translating them; the
3153     appropriate translation is done in the bit-setting loop below.  */
3154  /* The SVR4 compiler on the 3B2 had trouble with unsigned const char *.  */
3155  range_start = ((const unsigned char *) p)[-2];
3156  range_end   = ((const unsigned char *) p)[0];
3157
3158  /* Have to increment the pointer into the pattern string, so the
3159     caller isn't still at the ending character.  */
3160  (*p_ptr)++;
3161
3162  /* If the start is after the end, the range is empty.  */
3163  if (range_start > range_end)
3164    return syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR;
3165
3166  /* Here we see why `this_char' has to be larger than an `unsigned
3167     char' -- the range is inclusive, so if `range_end' == 0xff
3168     (assuming 8-bit characters), we would otherwise go into an infinite
3169     loop, since all characters <= 0xff.  */
3170  for (this_char = range_start; this_char <= range_end; this_char++)
3171    {
3172      SET_LIST_BIT (TRANSLATE (this_char));
3173    }
3174
3175  return REG_NOERROR;
3176}
3177
3178/* re_compile_fastmap computes a ``fastmap'' for the compiled pattern in
3179   BUFP.  A fastmap records which of the (1 << BYTEWIDTH) possible
3180   characters can start a string that matches the pattern.  This fastmap
3181   is used by re_search to skip quickly over impossible starting points.
3182
3183   The caller must supply the address of a (1 << BYTEWIDTH)-byte data
3184   area as BUFP->fastmap.
3185
3186   We set the `fastmap', `fastmap_accurate', and `can_be_null' fields in
3187   the pattern buffer.
3188
3189   Returns 0 if we succeed, -2 if an internal error.   */
3190
3191int
3192re_compile_fastmap (bufp)
3193     struct re_pattern_buffer *bufp;
3194{
3195  int j, k;
3196#ifdef MATCH_MAY_ALLOCATE
3197  fail_stack_type fail_stack;
3198#endif
3199#ifndef REGEX_MALLOC
3200  char *destination;
3201#endif
3202
3203  register char *fastmap = bufp->fastmap;
3204  unsigned char *pattern = bufp->buffer;
3205  unsigned char *p = pattern;
3206  register unsigned char *pend = pattern + bufp->used;
3207
3208#ifdef REL_ALLOC
3209  /* This holds the pointer to the failure stack, when
3210     it is allocated relocatably.  */
3211  fail_stack_elt_t *failure_stack_ptr;
3212#endif
3213
3214  /* Assume that each path through the pattern can be null until
3215     proven otherwise.  We set this false at the bottom of switch
3216     statement, to which we get only if a particular path doesn't
3217     match the empty string.  */
3218  boolean path_can_be_null = true;
3219
3220  /* We aren't doing a `succeed_n' to begin with.  */
3221  boolean succeed_n_p = false;
3222
3223  assert (fastmap != NULL && p != NULL);
3224
3225  INIT_FAIL_STACK ();
3226  bzero (fastmap, 1 << BYTEWIDTH);  /* Assume nothing's valid.  */
3227  bufp->fastmap_accurate = 1;	    /* It will be when we're done.  */
3228  bufp->can_be_null = 0;
3229
3230  while (1)
3231    {
3232      if (p == pend || *p == succeed)
3233	{
3234	  /* We have reached the (effective) end of pattern.  */
3235	  if (!FAIL_STACK_EMPTY ())
3236	    {
3237	      bufp->can_be_null |= path_can_be_null;
3238
3239	      /* Reset for next path.  */
3240	      path_can_be_null = true;
3241
3242	      p = fail_stack.stack[--fail_stack.avail].pointer;
3243
3244	      continue;
3245	    }
3246	  else
3247	    break;
3248	}
3249
3250      /* We should never be about to go beyond the end of the pattern.  */
3251      assert (p < pend);
3252
3253      switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++))
3254	{
3255
3256        /* I guess the idea here is to simply not bother with a fastmap
3257           if a backreference is used, since it's too hard to figure out
3258           the fastmap for the corresponding group.  Setting
3259           `can_be_null' stops `re_search_2' from using the fastmap, so
3260           that is all we do.  */
3261	case duplicate:
3262	  bufp->can_be_null = 1;
3263          goto done;
3264
3265
3266      /* Following are the cases which match a character.  These end
3267         with `break'.  */
3268
3269	case exactn:
3270          fastmap[p[1]] = 1;
3271	  break;
3272
3273
3274        case charset:
3275          for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
3276	    if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))
3277              fastmap[j] = 1;
3278	  break;
3279
3280
3281	case charset_not:
3282	  /* Chars beyond end of map must be allowed.  */
3283	  for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++)
3284            fastmap[j] = 1;
3285
3286	  for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
3287	    if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))))
3288              fastmap[j] = 1;
3289          break;
3290
3291
3292	case wordchar:
3293	  for (j = 0; j < (1 << BYTEWIDTH); j++)
3294	    if (SYNTAX (j) == Sword)
3295	      fastmap[j] = 1;
3296	  break;
3297
3298
3299	case notwordchar:
3300	  for (j = 0; j < (1 << BYTEWIDTH); j++)
3301	    if (SYNTAX (j) != Sword)
3302	      fastmap[j] = 1;
3303	  break;
3304
3305
3306        case anychar:
3307	  {
3308	    int fastmap_newline = fastmap['\n'];
3309
3310	    /* `.' matches anything ...  */
3311	    for (j = 0; j < (1 << BYTEWIDTH); j++)
3312	      fastmap[j] = 1;
3313
3314	    /* ... except perhaps newline.  */
3315	    if (!(bufp->syntax & RE_DOT_NEWLINE))
3316	      fastmap['\n'] = fastmap_newline;
3317
3318	    /* Return if we have already set `can_be_null'; if we have,
3319	       then the fastmap is irrelevant.  Something's wrong here.  */
3320	    else if (bufp->can_be_null)
3321	      goto done;
3322
3323	    /* Otherwise, have to check alternative paths.  */
3324	    break;
3325	  }
3326
3327#ifdef emacs
3328        case syntaxspec:
3329	  k = *p++;
3330	  for (j = 0; j < (1 << BYTEWIDTH); j++)
3331	    if (SYNTAX (j) == (enum syntaxcode) k)
3332	      fastmap[j] = 1;
3333	  break;
3334
3335
3336	case notsyntaxspec:
3337	  k = *p++;
3338	  for (j = 0; j < (1 << BYTEWIDTH); j++)
3339	    if (SYNTAX (j) != (enum syntaxcode) k)
3340	      fastmap[j] = 1;
3341	  break;
3342
3343
3344      /* All cases after this match the empty string.  These end with
3345         `continue'.  */
3346
3347
3348	case before_dot:
3349	case at_dot:
3350	case after_dot:
3351          continue;
3352#endif /* emacs */
3353
3354
3355        case no_op:
3356        case begline:
3357        case endline:
3358	case begbuf:
3359	case endbuf:
3360	case wordbound:
3361	case notwordbound:
3362	case wordbeg:
3363	case wordend:
3364        case push_dummy_failure:
3365          continue;
3366
3367
3368	case jump_n:
3369        case pop_failure_jump:
3370	case maybe_pop_jump:
3371	case jump:
3372        case jump_past_alt:
3373	case dummy_failure_jump:
3374          EXTRACT_NUMBER_AND_INCR (j, p);
3375	  p += j;
3376	  if (j > 0)
3377	    continue;
3378
3379          /* Jump backward implies we just went through the body of a
3380             loop and matched nothing.  Opcode jumped to should be
3381             `on_failure_jump' or `succeed_n'.  Just treat it like an
3382             ordinary jump.  For a * loop, it has pushed its failure
3383             point already; if so, discard that as redundant.  */
3384          if ((re_opcode_t) *p != on_failure_jump
3385	      && (re_opcode_t) *p != succeed_n)
3386	    continue;
3387
3388          p++;
3389          EXTRACT_NUMBER_AND_INCR (j, p);
3390          p += j;
3391
3392          /* If what's on the stack is where we are now, pop it.  */
3393          if (!FAIL_STACK_EMPTY ()
3394	      && fail_stack.stack[fail_stack.avail - 1].pointer == p)
3395            fail_stack.avail--;
3396
3397          continue;
3398
3399
3400        case on_failure_jump:
3401        case on_failure_keep_string_jump:
3402	handle_on_failure_jump:
3403          EXTRACT_NUMBER_AND_INCR (j, p);
3404
3405          /* For some patterns, e.g., `(a?)?', `p+j' here points to the
3406             end of the pattern.  We don't want to push such a point,
3407             since when we restore it above, entering the switch will
3408             increment `p' past the end of the pattern.  We don't need
3409             to push such a point since we obviously won't find any more
3410             fastmap entries beyond `pend'.  Such a pattern can match
3411             the null string, though.  */
3412          if (p + j < pend)
3413            {
3414              if (!PUSH_PATTERN_OP (p + j, fail_stack))
3415		{
3416		  RESET_FAIL_STACK ();
3417		  return -2;
3418		}
3419            }
3420          else
3421            bufp->can_be_null = 1;
3422
3423          if (succeed_n_p)
3424            {
3425              EXTRACT_NUMBER_AND_INCR (k, p);	/* Skip the n.  */
3426              succeed_n_p = false;
3427	    }
3428
3429          continue;
3430
3431
3432	case succeed_n:
3433          /* Get to the number of times to succeed.  */
3434          p += 2;
3435
3436          /* Increment p past the n for when k != 0.  */
3437          EXTRACT_NUMBER_AND_INCR (k, p);
3438          if (k == 0)
3439	    {
3440              p -= 4;
3441  	      succeed_n_p = true;  /* Spaghetti code alert.  */
3442              goto handle_on_failure_jump;
3443            }
3444          continue;
3445
3446
3447	case set_number_at:
3448          p += 4;
3449          continue;
3450
3451
3452	case start_memory:
3453        case stop_memory:
3454	  p += 2;
3455	  continue;
3456
3457
3458	default:
3459          abort (); /* We have listed all the cases.  */
3460        } /* switch *p++ */
3461
3462      /* Getting here means we have found the possible starting
3463         characters for one path of the pattern -- and that the empty
3464         string does not match.  We need not follow this path further.
3465         Instead, look at the next alternative (remembered on the
3466         stack), or quit if no more.  The test at the top of the loop
3467         does these things.  */
3468      path_can_be_null = false;
3469      p = pend;
3470    } /* while p */
3471
3472  /* Set `can_be_null' for the last path (also the first path, if the
3473     pattern is empty).  */
3474  bufp->can_be_null |= path_can_be_null;
3475
3476 done:
3477  RESET_FAIL_STACK ();
3478  return 0;
3479} /* re_compile_fastmap */
3480#ifdef _LIBC
3481weak_alias (__re_compile_fastmap, re_compile_fastmap)
3482#endif
3483
3484/* Set REGS to hold NUM_REGS registers, storing them in STARTS and
3485   ENDS.  Subsequent matches using PATTERN_BUFFER and REGS will use
3486   this memory for recording register information.  STARTS and ENDS
3487   must be allocated using the malloc library routine, and must each
3488   be at least NUM_REGS * sizeof (regoff_t) bytes long.
3489
3490   If NUM_REGS == 0, then subsequent matches should allocate their own
3491   register data.
3492
3493   Unless this function is called, the first search or match using
3494   PATTERN_BUFFER will allocate its own register data, without
3495   freeing the old data.  */
3496
3497void
3498re_set_registers (bufp, regs, num_regs, starts, ends)
3499    struct re_pattern_buffer *bufp;
3500    struct re_registers *regs;
3501    unsigned num_regs;
3502    regoff_t *starts, *ends;
3503{
3504  if (num_regs)
3505    {
3506      bufp->regs_allocated = REGS_REALLOCATE;
3507      regs->num_regs = num_regs;
3508      regs->start = starts;
3509      regs->end = ends;
3510    }
3511  else
3512    {
3513      bufp->regs_allocated = REGS_UNALLOCATED;
3514      regs->num_regs = 0;
3515      regs->start = regs->end = (regoff_t *) 0;
3516    }
3517}
3518#ifdef _LIBC
3519weak_alias (__re_set_registers, re_set_registers)
3520#endif
3521
3522/* Searching routines.  */
3523
3524/* Like re_search_2, below, but only one string is specified, and
3525   doesn't let you say where to stop matching. */
3526
3527int
3528re_search (bufp, string, size, startpos, range, regs)
3529     struct re_pattern_buffer *bufp;
3530     const char *string;
3531     int size, startpos, range;
3532     struct re_registers *regs;
3533{
3534  return re_search_2 (bufp, NULL, 0, string, size, startpos, range,
3535		      regs, size);
3536}
3537#ifdef _LIBC
3538weak_alias (__re_search, re_search)
3539#endif
3540
3541
3542/* Using the compiled pattern in BUFP->buffer, first tries to match the
3543   virtual concatenation of STRING1 and STRING2, starting first at index
3544   STARTPOS, then at STARTPOS + 1, and so on.
3545
3546   STRING1 and STRING2 have length SIZE1 and SIZE2, respectively.
3547
3548   RANGE is how far to scan while trying to match.  RANGE = 0 means try
3549   only at STARTPOS; in general, the last start tried is STARTPOS +
3550   RANGE.
3551
3552   In REGS, return the indices of the virtual concatenation of STRING1
3553   and STRING2 that matched the entire BUFP->buffer and its contained
3554   subexpressions.
3555
3556   Do not consider matching one past the index STOP in the virtual
3557   concatenation of STRING1 and STRING2.
3558
3559   We return either the position in the strings at which the match was
3560   found, -1 if no match, or -2 if error (such as failure
3561   stack overflow).  */
3562
3563int
3564re_search_2 (bufp, string1, size1, string2, size2, startpos, range, regs, stop)
3565     struct re_pattern_buffer *bufp;
3566     const char *string1, *string2;
3567     int size1, size2;
3568     int startpos;
3569     int range;
3570     struct re_registers *regs;
3571     int stop;
3572{
3573  int val;
3574  register char *fastmap = bufp->fastmap;
3575  register RE_TRANSLATE_TYPE translate = bufp->translate;
3576  int total_size = size1 + size2;
3577  int endpos = startpos + range;
3578
3579  /* Check for out-of-range STARTPOS.  */
3580  if (startpos < 0 || startpos > total_size)
3581    return -1;
3582
3583  /* Fix up RANGE if it might eventually take us outside
3584     the virtual concatenation of STRING1 and STRING2.
3585     Make sure we won't move STARTPOS below 0 or above TOTAL_SIZE.  */
3586  if (endpos < 0)
3587    range = 0 - startpos;
3588  else if (endpos > total_size)
3589    range = total_size - startpos;
3590
3591  /* If the search isn't to be a backwards one, don't waste time in a
3592     search for a pattern that must be anchored.  */
3593  if (bufp->used > 0 && range > 0
3594      && ((re_opcode_t) bufp->buffer[0] == begbuf
3595	  /* `begline' is like `begbuf' if it cannot match at newlines.  */
3596	  || ((re_opcode_t) bufp->buffer[0] == begline
3597	      && !bufp->newline_anchor)))
3598    {
3599      if (startpos > 0)
3600	return -1;
3601      else
3602	range = 1;
3603    }
3604
3605#ifdef emacs
3606  /* In a forward search for something that starts with \=.
3607     don't keep searching past point.  */
3608  if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == at_dot && range > 0)
3609    {
3610      range = PT - startpos;
3611      if (range <= 0)
3612	return -1;
3613    }
3614#endif /* emacs */
3615
3616  /* Update the fastmap now if not correct already.  */
3617  if (fastmap && !bufp->fastmap_accurate)
3618    if (re_compile_fastmap (bufp) == -2)
3619      return -2;
3620
3621  /* Loop through the string, looking for a place to start matching.  */
3622  for (;;)
3623    {
3624      /* If a fastmap is supplied, skip quickly over characters that
3625         cannot be the start of a match.  If the pattern can match the
3626         null string, however, we don't need to skip characters; we want
3627         the first null string.  */
3628      if (fastmap && startpos < total_size && !bufp->can_be_null)
3629	{
3630	  if (range > 0)	/* Searching forwards.  */
3631	    {
3632	      register const char *d;
3633	      register int lim = 0;
3634	      int irange = range;
3635
3636              if (startpos < size1 && startpos + range >= size1)
3637                lim = range - (size1 - startpos);
3638
3639	      d = (startpos >= size1 ? string2 - size1 : string1) + startpos;
3640
3641              /* Written out as an if-else to avoid testing `translate'
3642                 inside the loop.  */
3643	      if (translate)
3644                while (range > lim
3645                       && !fastmap[(unsigned char)
3646				   translate[(unsigned char) *d++]])
3647                  range--;
3648	      else
3649                while (range > lim && !fastmap[(unsigned char) *d++])
3650                  range--;
3651
3652	      startpos += irange - range;
3653	    }
3654	  else				/* Searching backwards.  */
3655	    {
3656	      register char c = (size1 == 0 || startpos >= size1
3657                                 ? string2[startpos - size1]
3658                                 : string1[startpos]);
3659
3660	      if (!fastmap[(unsigned char) TRANSLATE (c)])
3661		goto advance;
3662	    }
3663	}
3664
3665      /* If can't match the null string, and that's all we have left, fail.  */
3666      if (range >= 0 && startpos == total_size && fastmap
3667          && !bufp->can_be_null)
3668	return -1;
3669
3670      val = re_match_2_internal (bufp, string1, size1, string2, size2,
3671				 startpos, regs, stop);
3672#ifndef REGEX_MALLOC
3673# ifdef C_ALLOCA
3674      alloca (0);
3675# endif
3676#endif
3677
3678      if (val >= 0)
3679	return startpos;
3680
3681      if (val == -2)
3682	return -2;
3683
3684    advance:
3685      if (!range)
3686        break;
3687      else if (range > 0)
3688        {
3689          range--;
3690          startpos++;
3691        }
3692      else
3693        {
3694          range++;
3695          startpos--;
3696        }
3697    }
3698  return -1;
3699} /* re_search_2 */
3700#ifdef _LIBC
3701weak_alias (__re_search_2, re_search_2)
3702#endif
3703
3704/* This converts PTR, a pointer into one of the search strings `string1'
3705   and `string2' into an offset from the beginning of that string.  */
3706#define POINTER_TO_OFFSET(ptr)			\
3707  (FIRST_STRING_P (ptr)				\
3708   ? ((regoff_t) ((ptr) - string1))		\
3709   : ((regoff_t) ((ptr) - string2 + size1)))
3710
3711/* Macros for dealing with the split strings in re_match_2.  */
3712
3713#define MATCHING_IN_FIRST_STRING  (dend == end_match_1)
3714
3715/* Call before fetching a character with *d.  This switches over to
3716   string2 if necessary.  */
3717#define PREFETCH()							\
3718  while (d == dend)						    	\
3719    {									\
3720      /* End of string2 => fail.  */					\
3721      if (dend == end_match_2) 						\
3722        goto fail;							\
3723      /* End of string1 => advance to string2.  */ 			\
3724      d = string2;						        \
3725      dend = end_match_2;						\
3726    }
3727
3728
3729/* Test if at very beginning or at very end of the virtual concatenation
3730   of `string1' and `string2'.  If only one string, it's `string2'.  */
3731#define AT_STRINGS_BEG(d) ((d) == (size1 ? string1 : string2) || !size2)
3732#define AT_STRINGS_END(d) ((d) == end2)
3733
3734
3735/* Test if D points to a character which is word-constituent.  We have
3736   two special cases to check for: if past the end of string1, look at
3737   the first character in string2; and if before the beginning of
3738   string2, look at the last character in string1.  */
3739#define WORDCHAR_P(d)							\
3740  (SYNTAX ((d) == end1 ? *string2					\
3741           : (d) == string2 - 1 ? *(end1 - 1) : *(d))			\
3742   == Sword)
3743
3744/* Disabled due to a compiler bug -- see comment at case wordbound */
3745#if 0
3746/* Test if the character before D and the one at D differ with respect
3747   to being word-constituent.  */
3748#define AT_WORD_BOUNDARY(d)						\
3749  (AT_STRINGS_BEG (d) || AT_STRINGS_END (d)				\
3750   || WORDCHAR_P (d - 1) != WORDCHAR_P (d))
3751#endif
3752
3753/* Free everything we malloc.  */
3754#ifdef MATCH_MAY_ALLOCATE
3755# define FREE_VAR(var) if (var) REGEX_FREE (var); var = NULL
3756# define FREE_VARIABLES()						\
3757  do {									\
3758    REGEX_FREE_STACK (fail_stack.stack);				\
3759    FREE_VAR (regstart);						\
3760    FREE_VAR (regend);							\
3761    FREE_VAR (old_regstart);						\
3762    FREE_VAR (old_regend);						\
3763    FREE_VAR (best_regstart);						\
3764    FREE_VAR (best_regend);						\
3765    FREE_VAR (reg_info);						\
3766    FREE_VAR (reg_dummy);						\
3767    FREE_VAR (reg_info_dummy);						\
3768  } while (0)
3769#else
3770# define FREE_VARIABLES() ((void)0) /* Do nothing!  But inhibit gcc warning. */
3771#endif /* not MATCH_MAY_ALLOCATE */
3772
3773/* These values must meet several constraints.  They must not be valid
3774   register values; since we have a limit of 255 registers (because
3775   we use only one byte in the pattern for the register number), we can
3776   use numbers larger than 255.  They must differ by 1, because of
3777   NUM_FAILURE_ITEMS above.  And the value for the lowest register must
3778   be larger than the value for the highest register, so we do not try
3779   to actually save any registers when none are active.  */
3780#define NO_HIGHEST_ACTIVE_REG (1 << BYTEWIDTH)
3781#define NO_LOWEST_ACTIVE_REG (NO_HIGHEST_ACTIVE_REG + 1)
3782
3783/* Matching routines.  */
3784
3785#ifndef emacs   /* Emacs never uses this.  */
3786/* re_match is like re_match_2 except it takes only a single string.  */
3787
3788int
3789re_match (bufp, string, size, pos, regs)
3790     struct re_pattern_buffer *bufp;
3791     const char *string;
3792     int size, pos;
3793     struct re_registers *regs;
3794{
3795  int result = re_match_2_internal (bufp, NULL, 0, string, size,
3796				    pos, regs, size);
3797# ifndef REGEX_MALLOC
3798#  ifdef C_ALLOCA
3799  alloca (0);
3800#  endif
3801# endif
3802  return result;
3803}
3804# ifdef _LIBC
3805weak_alias (__re_match, re_match)
3806# endif
3807#endif /* not emacs */
3808
3809static boolean group_match_null_string_p _RE_ARGS ((unsigned char **p,
3810						    unsigned char *end,
3811						register_info_type *reg_info));
3812static boolean alt_match_null_string_p _RE_ARGS ((unsigned char *p,
3813						  unsigned char *end,
3814						register_info_type *reg_info));
3815static boolean common_op_match_null_string_p _RE_ARGS ((unsigned char **p,
3816							unsigned char *end,
3817						register_info_type *reg_info));
3818static int bcmp_translate _RE_ARGS ((const char *s1, const char *s2,
3819				     int len, char *translate));
3820
3821/* re_match_2 matches the compiled pattern in BUFP against the
3822   the (virtual) concatenation of STRING1 and STRING2 (of length SIZE1
3823   and SIZE2, respectively).  We start matching at POS, and stop
3824   matching at STOP.
3825
3826   If REGS is non-null and the `no_sub' field of BUFP is nonzero, we
3827   store offsets for the substring each group matched in REGS.  See the
3828   documentation for exactly how many groups we fill.
3829
3830   We return -1 if no match, -2 if an internal error (such as the
3831   failure stack overflowing).  Otherwise, we return the length of the
3832   matched substring.  */
3833
3834int
3835re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
3836     struct re_pattern_buffer *bufp;
3837     const char *string1, *string2;
3838     int size1, size2;
3839     int pos;
3840     struct re_registers *regs;
3841     int stop;
3842{
3843  int result = re_match_2_internal (bufp, string1, size1, string2, size2,
3844				    pos, regs, stop);
3845#ifndef REGEX_MALLOC
3846# ifdef C_ALLOCA
3847  alloca (0);
3848# endif
3849#endif
3850  return result;
3851}
3852#ifdef _LIBC
3853weak_alias (__re_match_2, re_match_2)
3854#endif
3855
3856/* This is a separate function so that we can force an alloca cleanup
3857   afterwards.  */
3858static int
3859re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
3860     struct re_pattern_buffer *bufp;
3861     const char *string1, *string2;
3862     int size1, size2;
3863     int pos;
3864     struct re_registers *regs;
3865     int stop;
3866{
3867  /* General temporaries.  */
3868  int mcnt;
3869  unsigned char *p1;
3870
3871  /* Just past the end of the corresponding string.  */
3872  const char *end1, *end2;
3873
3874  /* Pointers into string1 and string2, just past the last characters in
3875     each to consider matching.  */
3876  const char *end_match_1, *end_match_2;
3877
3878  /* Where we are in the data, and the end of the current string.  */
3879  const char *d, *dend;
3880
3881  /* Where we are in the pattern, and the end of the pattern.  */
3882  unsigned char *p = bufp->buffer;
3883  register unsigned char *pend = p + bufp->used;
3884
3885  /* Mark the opcode just after a start_memory, so we can test for an
3886     empty subpattern when we get to the stop_memory.  */
3887  unsigned char *just_past_start_mem = 0;
3888
3889  /* We use this to map every character in the string.  */
3890  RE_TRANSLATE_TYPE translate = bufp->translate;
3891
3892  /* Failure point stack.  Each place that can handle a failure further
3893     down the line pushes a failure point on this stack.  It consists of
3894     restart, regend, and reg_info for all registers corresponding to
3895     the subexpressions we're currently inside, plus the number of such
3896     registers, and, finally, two char *'s.  The first char * is where
3897     to resume scanning the pattern; the second one is where to resume
3898     scanning the strings.  If the latter is zero, the failure point is
3899     a ``dummy''; if a failure happens and the failure point is a dummy,
3900     it gets discarded and the next next one is tried.  */
3901#ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global.  */
3902  fail_stack_type fail_stack;
3903#endif
3904#ifdef DEBUG
3905  static unsigned failure_id;
3906  unsigned nfailure_points_pushed = 0, nfailure_points_popped = 0;
3907#endif
3908
3909#ifdef REL_ALLOC
3910  /* This holds the pointer to the failure stack, when
3911     it is allocated relocatably.  */
3912  fail_stack_elt_t *failure_stack_ptr;
3913#endif
3914
3915  /* We fill all the registers internally, independent of what we
3916     return, for use in backreferences.  The number here includes
3917     an element for register zero.  */
3918  size_t num_regs = bufp->re_nsub + 1;
3919
3920  /* The currently active registers.  */
3921  active_reg_t lowest_active_reg = NO_LOWEST_ACTIVE_REG;
3922  active_reg_t highest_active_reg = NO_HIGHEST_ACTIVE_REG;
3923
3924  /* Information on the contents of registers. These are pointers into
3925     the input strings; they record just what was matched (on this
3926     attempt) by a subexpression part of the pattern, that is, the
3927     regnum-th regstart pointer points to where in the pattern we began
3928     matching and the regnum-th regend points to right after where we
3929     stopped matching the regnum-th subexpression.  (The zeroth register
3930     keeps track of what the whole pattern matches.)  */
3931#ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global.  */
3932  const char **regstart, **regend;
3933#endif
3934
3935  /* If a group that's operated upon by a repetition operator fails to
3936     match anything, then the register for its start will need to be
3937     restored because it will have been set to wherever in the string we
3938     are when we last see its open-group operator.  Similarly for a
3939     register's end.  */
3940#ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global.  */
3941  const char **old_regstart, **old_regend;
3942#endif
3943
3944  /* The is_active field of reg_info helps us keep track of which (possibly
3945     nested) subexpressions we are currently in. The matched_something
3946     field of reg_info[reg_num] helps us tell whether or not we have
3947     matched any of the pattern so far this time through the reg_num-th
3948     subexpression.  These two fields get reset each time through any
3949     loop their register is in.  */
3950#ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global.  */
3951  register_info_type *reg_info;
3952#endif
3953
3954  /* The following record the register info as found in the above
3955     variables when we find a match better than any we've seen before.
3956     This happens as we backtrack through the failure points, which in
3957     turn happens only if we have not yet matched the entire string. */
3958  unsigned best_regs_set = false;
3959#ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global.  */
3960  const char **best_regstart, **best_regend;
3961#endif
3962
3963  /* Logically, this is `best_regend[0]'.  But we don't want to have to
3964     allocate space for that if we're not allocating space for anything
3965     else (see below).  Also, we never need info about register 0 for
3966     any of the other register vectors, and it seems rather a kludge to
3967     treat `best_regend' differently than the rest.  So we keep track of
3968     the end of the best match so far in a separate variable.  We
3969     initialize this to NULL so that when we backtrack the first time
3970     and need to test it, it's not garbage.  */
3971  const char *match_end = NULL;
3972
3973  /* This helps SET_REGS_MATCHED avoid doing redundant work.  */
3974  int set_regs_matched_done = 0;
3975
3976  /* Used when we pop values we don't care about.  */
3977#ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global.  */
3978  const char **reg_dummy;
3979  register_info_type *reg_info_dummy;
3980#endif
3981
3982#ifdef DEBUG
3983  /* Counts the total number of registers pushed.  */
3984  unsigned num_regs_pushed = 0;
3985#endif
3986
3987  DEBUG_PRINT1 ("\n\nEntering re_match_2.\n");
3988
3989  INIT_FAIL_STACK ();
3990
3991#ifdef MATCH_MAY_ALLOCATE
3992  /* Do not bother to initialize all the register variables if there are
3993     no groups in the pattern, as it takes a fair amount of time.  If
3994     there are groups, we include space for register 0 (the whole
3995     pattern), even though we never use it, since it simplifies the
3996     array indexing.  We should fix this.  */
3997  if (bufp->re_nsub)
3998    {
3999      regstart = REGEX_TALLOC (num_regs, const char *);
4000      regend = REGEX_TALLOC (num_regs, const char *);
4001      old_regstart = REGEX_TALLOC (num_regs, const char *);
4002      old_regend = REGEX_TALLOC (num_regs, const char *);
4003      best_regstart = REGEX_TALLOC (num_regs, const char *);
4004      best_regend = REGEX_TALLOC (num_regs, const char *);
4005      reg_info = REGEX_TALLOC (num_regs, register_info_type);
4006      reg_dummy = REGEX_TALLOC (num_regs, const char *);
4007      reg_info_dummy = REGEX_TALLOC (num_regs, register_info_type);
4008
4009      if (!(regstart && regend && old_regstart && old_regend && reg_info
4010            && best_regstart && best_regend && reg_dummy && reg_info_dummy))
4011        {
4012          FREE_VARIABLES ();
4013          return -2;
4014        }
4015    }
4016  else
4017    {
4018      /* We must initialize all our variables to NULL, so that
4019         `FREE_VARIABLES' doesn't try to free them.  */
4020      regstart = regend = old_regstart = old_regend = best_regstart
4021        = best_regend = reg_dummy = NULL;
4022      reg_info = reg_info_dummy = (register_info_type *) NULL;
4023    }
4024#endif /* MATCH_MAY_ALLOCATE */
4025
4026  /* The starting position is bogus.  */
4027  if (pos < 0 || pos > size1 + size2)
4028    {
4029      FREE_VARIABLES ();
4030      return -1;
4031    }
4032
4033  /* Initialize subexpression text positions to -1 to mark ones that no
4034     start_memory/stop_memory has been seen for. Also initialize the
4035     register information struct.  */
4036  for (mcnt = 1; (unsigned) mcnt < num_regs; mcnt++)
4037    {
4038      regstart[mcnt] = regend[mcnt]
4039        = old_regstart[mcnt] = old_regend[mcnt] = REG_UNSET_VALUE;
4040
4041      REG_MATCH_NULL_STRING_P (reg_info[mcnt]) = MATCH_NULL_UNSET_VALUE;
4042      IS_ACTIVE (reg_info[mcnt]) = 0;
4043      MATCHED_SOMETHING (reg_info[mcnt]) = 0;
4044      EVER_MATCHED_SOMETHING (reg_info[mcnt]) = 0;
4045    }
4046
4047  /* We move `string1' into `string2' if the latter's empty -- but not if
4048     `string1' is null.  */
4049  if (size2 == 0 && string1 != NULL)
4050    {
4051      string2 = string1;
4052      size2 = size1;
4053      string1 = 0;
4054      size1 = 0;
4055    }
4056  end1 = string1 + size1;
4057  end2 = string2 + size2;
4058
4059  /* Compute where to stop matching, within the two strings.  */
4060  if (stop <= size1)
4061    {
4062      end_match_1 = string1 + stop;
4063      end_match_2 = string2;
4064    }
4065  else
4066    {
4067      end_match_1 = end1;
4068      end_match_2 = string2 + stop - size1;
4069    }
4070
4071  /* `p' scans through the pattern as `d' scans through the data.
4072     `dend' is the end of the input string that `d' points within.  `d'
4073     is advanced into the following input string whenever necessary, but
4074     this happens before fetching; therefore, at the beginning of the
4075     loop, `d' can be pointing at the end of a string, but it cannot
4076     equal `string2'.  */
4077  if (size1 > 0 && pos <= size1)
4078    {
4079      d = string1 + pos;
4080      dend = end_match_1;
4081    }
4082  else
4083    {
4084      d = string2 + pos - size1;
4085      dend = end_match_2;
4086    }
4087
4088  DEBUG_PRINT1 ("The compiled pattern is:\n");
4089  DEBUG_PRINT_COMPILED_PATTERN (bufp, p, pend);
4090  DEBUG_PRINT1 ("The string to match is: `");
4091  DEBUG_PRINT_DOUBLE_STRING (d, string1, size1, string2, size2);
4092  DEBUG_PRINT1 ("'\n");
4093
4094  /* This loops over pattern commands.  It exits by returning from the
4095     function if the match is complete, or it drops through if the match
4096     fails at this starting point in the input data.  */
4097  for (;;)
4098    {
4099#ifdef _LIBC
4100      DEBUG_PRINT2 ("\n%p: ", p);
4101#else
4102      DEBUG_PRINT2 ("\n0x%x: ", p);
4103#endif
4104
4105      if (p == pend)
4106	{ /* End of pattern means we might have succeeded.  */
4107          DEBUG_PRINT1 ("end of pattern ... ");
4108
4109	  /* If we haven't matched the entire string, and we want the
4110             longest match, try backtracking.  */
4111          if (d != end_match_2)
4112	    {
4113	      /* 1 if this match ends in the same string (string1 or string2)
4114		 as the best previous match.  */
4115	      boolean same_str_p = (FIRST_STRING_P (match_end)
4116				    == MATCHING_IN_FIRST_STRING);
4117	      /* 1 if this match is the best seen so far.  */
4118	      boolean best_match_p;
4119
4120	      /* AIX compiler got confused when this was combined
4121		 with the previous declaration.  */
4122	      if (same_str_p)
4123		best_match_p = d > match_end;
4124	      else
4125		best_match_p = !MATCHING_IN_FIRST_STRING;
4126
4127              DEBUG_PRINT1 ("backtracking.\n");
4128
4129              if (!FAIL_STACK_EMPTY ())
4130                { /* More failure points to try.  */
4131
4132                  /* If exceeds best match so far, save it.  */
4133                  if (!best_regs_set || best_match_p)
4134                    {
4135                      best_regs_set = true;
4136                      match_end = d;
4137
4138                      DEBUG_PRINT1 ("\nSAVING match as best so far.\n");
4139
4140                      for (mcnt = 1; (unsigned) mcnt < num_regs; mcnt++)
4141                        {
4142                          best_regstart[mcnt] = regstart[mcnt];
4143                          best_regend[mcnt] = regend[mcnt];
4144                        }
4145                    }
4146                  goto fail;
4147                }
4148
4149              /* If no failure points, don't restore garbage.  And if
4150                 last match is real best match, don't restore second
4151                 best one. */
4152              else if (best_regs_set && !best_match_p)
4153                {
4154  	        restore_best_regs:
4155                  /* Restore best match.  It may happen that `dend ==
4156                     end_match_1' while the restored d is in string2.
4157                     For example, the pattern `x.*y.*z' against the
4158                     strings `x-' and `y-z-', if the two strings are
4159                     not consecutive in memory.  */
4160                  DEBUG_PRINT1 ("Restoring best registers.\n");
4161
4162                  d = match_end;
4163                  dend = ((d >= string1 && d <= end1)
4164		           ? end_match_1 : end_match_2);
4165
4166		  for (mcnt = 1; (unsigned) mcnt < num_regs; mcnt++)
4167		    {
4168		      regstart[mcnt] = best_regstart[mcnt];
4169		      regend[mcnt] = best_regend[mcnt];
4170		    }
4171                }
4172            } /* d != end_match_2 */
4173
4174	succeed_label:
4175          DEBUG_PRINT1 ("Accepting match.\n");
4176
4177          /* If caller wants register contents data back, do it.  */
4178          if (regs && !bufp->no_sub)
4179	    {
4180              /* Have the register data arrays been allocated?  */
4181              if (bufp->regs_allocated == REGS_UNALLOCATED)
4182                { /* No.  So allocate them with malloc.  We need one
4183                     extra element beyond `num_regs' for the `-1' marker
4184                     GNU code uses.  */
4185                  regs->num_regs = MAX (RE_NREGS, num_regs + 1);
4186                  regs->start = TALLOC (regs->num_regs, regoff_t);
4187                  regs->end = TALLOC (regs->num_regs, regoff_t);
4188                  if (regs->start == NULL || regs->end == NULL)
4189		    {
4190		      FREE_VARIABLES ();
4191		      return -2;
4192		    }
4193                  bufp->regs_allocated = REGS_REALLOCATE;
4194                }
4195              else if (bufp->regs_allocated == REGS_REALLOCATE)
4196                { /* Yes.  If we need more elements than were already
4197                     allocated, reallocate them.  If we need fewer, just
4198                     leave it alone.  */
4199                  if (regs->num_regs < num_regs + 1)
4200                    {
4201                      regs->num_regs = num_regs + 1;
4202                      RETALLOC (regs->start, regs->num_regs, regoff_t);
4203                      RETALLOC (regs->end, regs->num_regs, regoff_t);
4204                      if (regs->start == NULL || regs->end == NULL)
4205			{
4206			  FREE_VARIABLES ();
4207			  return -2;
4208			}
4209                    }
4210                }
4211              else
4212		{
4213		  /* These braces fend off a "empty body in an else-statement"
4214		     warning under GCC when assert expands to nothing.  */
4215		  assert (bufp->regs_allocated == REGS_FIXED);
4216		}
4217
4218              /* Convert the pointer data in `regstart' and `regend' to
4219                 indices.  Register zero has to be set differently,
4220                 since we haven't kept track of any info for it.  */
4221              if (regs->num_regs > 0)
4222                {
4223                  regs->start[0] = pos;
4224                  regs->end[0] = (MATCHING_IN_FIRST_STRING
4225				  ? ((regoff_t) (d - string1))
4226			          : ((regoff_t) (d - string2 + size1)));
4227                }
4228
4229              /* Go through the first `min (num_regs, regs->num_regs)'
4230                 registers, since that is all we initialized.  */
4231	      for (mcnt = 1; (unsigned) mcnt < MIN (num_regs, regs->num_regs);
4232		   mcnt++)
4233		{
4234                  if (REG_UNSET (regstart[mcnt]) || REG_UNSET (regend[mcnt]))
4235                    regs->start[mcnt] = regs->end[mcnt] = -1;
4236                  else
4237                    {
4238		      regs->start[mcnt]
4239			= (regoff_t) POINTER_TO_OFFSET (regstart[mcnt]);
4240                      regs->end[mcnt]
4241			= (regoff_t) POINTER_TO_OFFSET (regend[mcnt]);
4242                    }
4243		}
4244
4245              /* If the regs structure we return has more elements than
4246                 were in the pattern, set the extra elements to -1.  If
4247                 we (re)allocated the registers, this is the case,
4248                 because we always allocate enough to have at least one
4249                 -1 at the end.  */
4250              for (mcnt = num_regs; (unsigned) mcnt < regs->num_regs; mcnt++)
4251                regs->start[mcnt] = regs->end[mcnt] = -1;
4252	    } /* regs && !bufp->no_sub */
4253
4254          DEBUG_PRINT4 ("%u failure points pushed, %u popped (%u remain).\n",
4255                        nfailure_points_pushed, nfailure_points_popped,
4256                        nfailure_points_pushed - nfailure_points_popped);
4257          DEBUG_PRINT2 ("%u registers pushed.\n", num_regs_pushed);
4258
4259          mcnt = d - pos - (MATCHING_IN_FIRST_STRING
4260			    ? string1
4261			    : string2 - size1);
4262
4263          DEBUG_PRINT2 ("Returning %d from re_match_2.\n", mcnt);
4264
4265          FREE_VARIABLES ();
4266          return mcnt;
4267        }
4268
4269      /* Otherwise match next pattern command.  */
4270      switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++))
4271	{
4272        /* Ignore these.  Used to ignore the n of succeed_n's which
4273           currently have n == 0.  */
4274        case no_op:
4275          DEBUG_PRINT1 ("EXECUTING no_op.\n");
4276          break;
4277
4278	case succeed:
4279          DEBUG_PRINT1 ("EXECUTING succeed.\n");
4280	  goto succeed_label;
4281
4282        /* Match the next n pattern characters exactly.  The following
4283           byte in the pattern defines n, and the n bytes after that
4284           are the characters to match.  */
4285	case exactn:
4286	  mcnt = *p++;
4287          DEBUG_PRINT2 ("EXECUTING exactn %d.\n", mcnt);
4288
4289          /* This is written out as an if-else so we don't waste time
4290             testing `translate' inside the loop.  */
4291          if (translate)
4292	    {
4293	      do
4294		{
4295		  PREFETCH ();
4296		  if ((unsigned char) translate[(unsigned char) *d++]
4297		      != (unsigned char) *p++)
4298                    goto fail;
4299		}
4300	      while (--mcnt);
4301	    }
4302	  else
4303	    {
4304	      do
4305		{
4306		  PREFETCH ();
4307		  if (*d++ != (char) *p++) goto fail;
4308		}
4309	      while (--mcnt);
4310	    }
4311	  SET_REGS_MATCHED ();
4312          break;
4313
4314
4315        /* Match any character except possibly a newline or a null.  */
4316	case anychar:
4317          DEBUG_PRINT1 ("EXECUTING anychar.\n");
4318
4319          PREFETCH ();
4320
4321          if ((!(bufp->syntax & RE_DOT_NEWLINE) && TRANSLATE (*d) == '\n')
4322              || (bufp->syntax & RE_DOT_NOT_NULL && TRANSLATE (*d) == '\000'))
4323	    goto fail;
4324
4325          SET_REGS_MATCHED ();
4326          DEBUG_PRINT2 ("  Matched `%d'.\n", *d);
4327          d++;
4328	  break;
4329
4330
4331	case charset:
4332	case charset_not:
4333	  {
4334	    register unsigned char c;
4335	    boolean not = (re_opcode_t) *(p - 1) == charset_not;
4336
4337            DEBUG_PRINT2 ("EXECUTING charset%s.\n", not ? "_not" : "");
4338
4339	    PREFETCH ();
4340	    c = TRANSLATE (*d); /* The character to match.  */
4341
4342            /* Cast to `unsigned' instead of `unsigned char' in case the
4343               bit list is a full 32 bytes long.  */
4344	    if (c < (unsigned) (*p * BYTEWIDTH)
4345		&& p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
4346	      not = !not;
4347
4348	    p += 1 + *p;
4349
4350	    if (!not) goto fail;
4351
4352	    SET_REGS_MATCHED ();
4353            d++;
4354	    break;
4355	  }
4356
4357
4358        /* The beginning of a group is represented by start_memory.
4359           The arguments are the register number in the next byte, and the
4360           number of groups inner to this one in the next.  The text
4361           matched within the group is recorded (in the internal
4362           registers data structure) under the register number.  */
4363        case start_memory:
4364	  DEBUG_PRINT3 ("EXECUTING start_memory %d (%d):\n", *p, p[1]);
4365
4366          /* Find out if this group can match the empty string.  */
4367	  p1 = p;		/* To send to group_match_null_string_p.  */
4368
4369          if (REG_MATCH_NULL_STRING_P (reg_info[*p]) == MATCH_NULL_UNSET_VALUE)
4370            REG_MATCH_NULL_STRING_P (reg_info[*p])
4371              = group_match_null_string_p (&p1, pend, reg_info);
4372
4373          /* Save the position in the string where we were the last time
4374             we were at this open-group operator in case the group is
4375             operated upon by a repetition operator, e.g., with `(a*)*b'
4376             against `ab'; then we want to ignore where we are now in
4377             the string in case this attempt to match fails.  */
4378          old_regstart[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p])
4379                             ? REG_UNSET (regstart[*p]) ? d : regstart[*p]
4380                             : regstart[*p];
4381	  DEBUG_PRINT2 ("  old_regstart: %d\n",
4382			 POINTER_TO_OFFSET (old_regstart[*p]));
4383
4384          regstart[*p] = d;
4385	  DEBUG_PRINT2 ("  regstart: %d\n", POINTER_TO_OFFSET (regstart[*p]));
4386
4387          IS_ACTIVE (reg_info[*p]) = 1;
4388          MATCHED_SOMETHING (reg_info[*p]) = 0;
4389
4390	  /* Clear this whenever we change the register activity status.  */
4391	  set_regs_matched_done = 0;
4392
4393          /* This is the new highest active register.  */
4394          highest_active_reg = *p;
4395
4396          /* If nothing was active before, this is the new lowest active
4397             register.  */
4398          if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
4399            lowest_active_reg = *p;
4400
4401          /* Move past the register number and inner group count.  */
4402          p += 2;
4403	  just_past_start_mem = p;
4404
4405          break;
4406
4407
4408        /* The stop_memory opcode represents the end of a group.  Its
4409           arguments are the same as start_memory's: the register
4410           number, and the number of inner groups.  */
4411	case stop_memory:
4412	  DEBUG_PRINT3 ("EXECUTING stop_memory %d (%d):\n", *p, p[1]);
4413
4414          /* We need to save the string position the last time we were at
4415             this close-group operator in case the group is operated
4416             upon by a repetition operator, e.g., with `((a*)*(b*)*)*'
4417             against `aba'; then we want to ignore where we are now in
4418             the string in case this attempt to match fails.  */
4419          old_regend[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p])
4420                           ? REG_UNSET (regend[*p]) ? d : regend[*p]
4421			   : regend[*p];
4422	  DEBUG_PRINT2 ("      old_regend: %d\n",
4423			 POINTER_TO_OFFSET (old_regend[*p]));
4424
4425          regend[*p] = d;
4426	  DEBUG_PRINT2 ("      regend: %d\n", POINTER_TO_OFFSET (regend[*p]));
4427
4428          /* This register isn't active anymore.  */
4429          IS_ACTIVE (reg_info[*p]) = 0;
4430
4431	  /* Clear this whenever we change the register activity status.  */
4432	  set_regs_matched_done = 0;
4433
4434          /* If this was the only register active, nothing is active
4435             anymore.  */
4436          if (lowest_active_reg == highest_active_reg)
4437            {
4438              lowest_active_reg = NO_LOWEST_ACTIVE_REG;
4439              highest_active_reg = NO_HIGHEST_ACTIVE_REG;
4440            }
4441          else
4442            { /* We must scan for the new highest active register, since
4443                 it isn't necessarily one less than now: consider
4444                 (a(b)c(d(e)f)g).  When group 3 ends, after the f), the
4445                 new highest active register is 1.  */
4446              unsigned char r = *p - 1;
4447              while (r > 0 && !IS_ACTIVE (reg_info[r]))
4448                r--;
4449
4450              /* If we end up at register zero, that means that we saved
4451                 the registers as the result of an `on_failure_jump', not
4452                 a `start_memory', and we jumped to past the innermost
4453                 `stop_memory'.  For example, in ((.)*) we save
4454                 registers 1 and 2 as a result of the *, but when we pop
4455                 back to the second ), we are at the stop_memory 1.
4456                 Thus, nothing is active.  */
4457	      if (r == 0)
4458                {
4459                  lowest_active_reg = NO_LOWEST_ACTIVE_REG;
4460                  highest_active_reg = NO_HIGHEST_ACTIVE_REG;
4461                }
4462              else
4463                highest_active_reg = r;
4464            }
4465
4466          /* If just failed to match something this time around with a
4467             group that's operated on by a repetition operator, try to
4468             force exit from the ``loop'', and restore the register
4469             information for this group that we had before trying this
4470             last match.  */
4471          if ((!MATCHED_SOMETHING (reg_info[*p])
4472               || just_past_start_mem == p - 1)
4473	      && (p + 2) < pend)
4474            {
4475              boolean is_a_jump_n = false;
4476
4477              p1 = p + 2;
4478              mcnt = 0;
4479              switch ((re_opcode_t) *p1++)
4480                {
4481                  case jump_n:
4482		    is_a_jump_n = true;
4483                  case pop_failure_jump:
4484		  case maybe_pop_jump:
4485		  case jump:
4486		  case dummy_failure_jump:
4487                    EXTRACT_NUMBER_AND_INCR (mcnt, p1);
4488		    if (is_a_jump_n)
4489		      p1 += 2;
4490                    break;
4491
4492                  default:
4493                    /* do nothing */ ;
4494                }
4495	      p1 += mcnt;
4496
4497              /* If the next operation is a jump backwards in the pattern
4498	         to an on_failure_jump right before the start_memory
4499                 corresponding to this stop_memory, exit from the loop
4500                 by forcing a failure after pushing on the stack the
4501                 on_failure_jump's jump in the pattern, and d.  */
4502              if (mcnt < 0 && (re_opcode_t) *p1 == on_failure_jump
4503                  && (re_opcode_t) p1[3] == start_memory && p1[4] == *p)
4504		{
4505                  /* If this group ever matched anything, then restore
4506                     what its registers were before trying this last
4507                     failed match, e.g., with `(a*)*b' against `ab' for
4508                     regstart[1], and, e.g., with `((a*)*(b*)*)*'
4509                     against `aba' for regend[3].
4510
4511                     Also restore the registers for inner groups for,
4512                     e.g., `((a*)(b*))*' against `aba' (register 3 would
4513                     otherwise get trashed).  */
4514
4515                  if (EVER_MATCHED_SOMETHING (reg_info[*p]))
4516		    {
4517		      unsigned r;
4518
4519                      EVER_MATCHED_SOMETHING (reg_info[*p]) = 0;
4520
4521		      /* Restore this and inner groups' (if any) registers.  */
4522                      for (r = *p; r < (unsigned) *p + (unsigned) *(p + 1);
4523			   r++)
4524                        {
4525                          regstart[r] = old_regstart[r];
4526
4527                          /* xx why this test?  */
4528                          if (old_regend[r] >= regstart[r])
4529                            regend[r] = old_regend[r];
4530                        }
4531                    }
4532		  p1++;
4533                  EXTRACT_NUMBER_AND_INCR (mcnt, p1);
4534                  PUSH_FAILURE_POINT (p1 + mcnt, d, -2);
4535
4536                  goto fail;
4537                }
4538            }
4539
4540          /* Move past the register number and the inner group count.  */
4541          p += 2;
4542          break;
4543
4544
4545	/* \<digit> has been turned into a `duplicate' command which is
4546           followed by the numeric value of <digit> as the register number.  */
4547        case duplicate:
4548	  {
4549	    register const char *d2, *dend2;
4550	    int regno = *p++;   /* Get which register to match against.  */
4551	    DEBUG_PRINT2 ("EXECUTING duplicate %d.\n", regno);
4552
4553	    /* Can't back reference a group which we've never matched.  */
4554            if (REG_UNSET (regstart[regno]) || REG_UNSET (regend[regno]))
4555              goto fail;
4556
4557            /* Where in input to try to start matching.  */
4558            d2 = regstart[regno];
4559
4560            /* Where to stop matching; if both the place to start and
4561               the place to stop matching are in the same string, then
4562               set to the place to stop, otherwise, for now have to use
4563               the end of the first string.  */
4564
4565            dend2 = ((FIRST_STRING_P (regstart[regno])
4566		      == FIRST_STRING_P (regend[regno]))
4567		     ? regend[regno] : end_match_1);
4568	    for (;;)
4569	      {
4570		/* If necessary, advance to next segment in register
4571                   contents.  */
4572		while (d2 == dend2)
4573		  {
4574		    if (dend2 == end_match_2) break;
4575		    if (dend2 == regend[regno]) break;
4576
4577                    /* End of string1 => advance to string2. */
4578                    d2 = string2;
4579                    dend2 = regend[regno];
4580		  }
4581		/* At end of register contents => success */
4582		if (d2 == dend2) break;
4583
4584		/* If necessary, advance to next segment in data.  */
4585		PREFETCH ();
4586
4587		/* How many characters left in this segment to match.  */
4588		mcnt = dend - d;
4589
4590		/* Want how many consecutive characters we can match in
4591                   one shot, so, if necessary, adjust the count.  */
4592                if (mcnt > dend2 - d2)
4593		  mcnt = dend2 - d2;
4594
4595		/* Compare that many; failure if mismatch, else move
4596                   past them.  */
4597		if (translate
4598                    ? bcmp_translate (d, d2, mcnt, translate)
4599                    : memcmp (d, d2, mcnt))
4600		  goto fail;
4601		d += mcnt, d2 += mcnt;
4602
4603		/* Do this because we've match some characters.  */
4604		SET_REGS_MATCHED ();
4605	      }
4606	  }
4607	  break;
4608
4609
4610        /* begline matches the empty string at the beginning of the string
4611           (unless `not_bol' is set in `bufp'), and, if
4612           `newline_anchor' is set, after newlines.  */
4613	case begline:
4614          DEBUG_PRINT1 ("EXECUTING begline.\n");
4615
4616          if (AT_STRINGS_BEG (d))
4617            {
4618              if (!bufp->not_bol) break;
4619            }
4620          else if (d[-1] == '\n' && bufp->newline_anchor)
4621            {
4622              break;
4623            }
4624          /* In all other cases, we fail.  */
4625          goto fail;
4626
4627
4628        /* endline is the dual of begline.  */
4629	case endline:
4630          DEBUG_PRINT1 ("EXECUTING endline.\n");
4631
4632          if (AT_STRINGS_END (d))
4633            {
4634              if (!bufp->not_eol) break;
4635            }
4636
4637          /* We have to ``prefetch'' the next character.  */
4638          else if ((d == end1 ? *string2 : *d) == '\n'
4639                   && bufp->newline_anchor)
4640            {
4641              break;
4642            }
4643          goto fail;
4644
4645
4646	/* Match at the very beginning of the data.  */
4647        case begbuf:
4648          DEBUG_PRINT1 ("EXECUTING begbuf.\n");
4649          if (AT_STRINGS_BEG (d))
4650            break;
4651          goto fail;
4652
4653
4654	/* Match at the very end of the data.  */
4655        case endbuf:
4656          DEBUG_PRINT1 ("EXECUTING endbuf.\n");
4657	  if (AT_STRINGS_END (d))
4658	    break;
4659          goto fail;
4660
4661
4662        /* on_failure_keep_string_jump is used to optimize `.*\n'.  It
4663           pushes NULL as the value for the string on the stack.  Then
4664           `pop_failure_point' will keep the current value for the
4665           string, instead of restoring it.  To see why, consider
4666           matching `foo\nbar' against `.*\n'.  The .* matches the foo;
4667           then the . fails against the \n.  But the next thing we want
4668           to do is match the \n against the \n; if we restored the
4669           string value, we would be back at the foo.
4670
4671           Because this is used only in specific cases, we don't need to
4672           check all the things that `on_failure_jump' does, to make
4673           sure the right things get saved on the stack.  Hence we don't
4674           share its code.  The only reason to push anything on the
4675           stack at all is that otherwise we would have to change
4676           `anychar's code to do something besides goto fail in this
4677           case; that seems worse than this.  */
4678        case on_failure_keep_string_jump:
4679          DEBUG_PRINT1 ("EXECUTING on_failure_keep_string_jump");
4680
4681          EXTRACT_NUMBER_AND_INCR (mcnt, p);
4682#ifdef _LIBC
4683          DEBUG_PRINT3 (" %d (to %p):\n", mcnt, p + mcnt);
4684#else
4685          DEBUG_PRINT3 (" %d (to 0x%x):\n", mcnt, p + mcnt);
4686#endif
4687
4688          PUSH_FAILURE_POINT (p + mcnt, NULL, -2);
4689          break;
4690
4691
4692	/* Uses of on_failure_jump:
4693
4694           Each alternative starts with an on_failure_jump that points
4695           to the beginning of the next alternative.  Each alternative
4696           except the last ends with a jump that in effect jumps past
4697           the rest of the alternatives.  (They really jump to the
4698           ending jump of the following alternative, because tensioning
4699           these jumps is a hassle.)
4700
4701           Repeats start with an on_failure_jump that points past both
4702           the repetition text and either the following jump or
4703           pop_failure_jump back to this on_failure_jump.  */
4704	case on_failure_jump:
4705        on_failure:
4706          DEBUG_PRINT1 ("EXECUTING on_failure_jump");
4707
4708          EXTRACT_NUMBER_AND_INCR (mcnt, p);
4709#ifdef _LIBC
4710          DEBUG_PRINT3 (" %d (to %p)", mcnt, p + mcnt);
4711#else
4712          DEBUG_PRINT3 (" %d (to 0x%x)", mcnt, p + mcnt);
4713#endif
4714
4715          /* If this on_failure_jump comes right before a group (i.e.,
4716             the original * applied to a group), save the information
4717             for that group and all inner ones, so that if we fail back
4718             to this point, the group's information will be correct.
4719             For example, in \(a*\)*\1, we need the preceding group,
4720             and in \(zz\(a*\)b*\)\2, we need the inner group.  */
4721
4722          /* We can't use `p' to check ahead because we push
4723             a failure point to `p + mcnt' after we do this.  */
4724          p1 = p;
4725
4726          /* We need to skip no_op's before we look for the
4727             start_memory in case this on_failure_jump is happening as
4728             the result of a completed succeed_n, as in \(a\)\{1,3\}b\1
4729             against aba.  */
4730          while (p1 < pend && (re_opcode_t) *p1 == no_op)
4731            p1++;
4732
4733          if (p1 < pend && (re_opcode_t) *p1 == start_memory)
4734            {
4735              /* We have a new highest active register now.  This will
4736                 get reset at the start_memory we are about to get to,
4737                 but we will have saved all the registers relevant to
4738                 this repetition op, as described above.  */
4739              highest_active_reg = *(p1 + 1) + *(p1 + 2);
4740              if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
4741                lowest_active_reg = *(p1 + 1);
4742            }
4743
4744          DEBUG_PRINT1 (":\n");
4745          PUSH_FAILURE_POINT (p + mcnt, d, -2);
4746          break;
4747
4748
4749        /* A smart repeat ends with `maybe_pop_jump'.
4750	   We change it to either `pop_failure_jump' or `jump'.  */
4751        case maybe_pop_jump:
4752          EXTRACT_NUMBER_AND_INCR (mcnt, p);
4753          DEBUG_PRINT2 ("EXECUTING maybe_pop_jump %d.\n", mcnt);
4754          {
4755	    register unsigned char *p2 = p;
4756
4757            /* Compare the beginning of the repeat with what in the
4758               pattern follows its end. If we can establish that there
4759               is nothing that they would both match, i.e., that we
4760               would have to backtrack because of (as in, e.g., `a*a')
4761               then we can change to pop_failure_jump, because we'll
4762               never have to backtrack.
4763
4764               This is not true in the case of alternatives: in
4765               `(a|ab)*' we do need to backtrack to the `ab' alternative
4766               (e.g., if the string was `ab').  But instead of trying to
4767               detect that here, the alternative has put on a dummy
4768               failure point which is what we will end up popping.  */
4769
4770	    /* Skip over open/close-group commands.
4771	       If what follows this loop is a ...+ construct,
4772	       look at what begins its body, since we will have to
4773	       match at least one of that.  */
4774	    while (1)
4775	      {
4776		if (p2 + 2 < pend
4777		    && ((re_opcode_t) *p2 == stop_memory
4778			|| (re_opcode_t) *p2 == start_memory))
4779		  p2 += 3;
4780		else if (p2 + 6 < pend
4781			 && (re_opcode_t) *p2 == dummy_failure_jump)
4782		  p2 += 6;
4783		else
4784		  break;
4785	      }
4786
4787	    p1 = p + mcnt;
4788	    /* p1[0] ... p1[2] are the `on_failure_jump' corresponding
4789	       to the `maybe_finalize_jump' of this case.  Examine what
4790	       follows.  */
4791
4792            /* If we're at the end of the pattern, we can change.  */
4793            if (p2 == pend)
4794	      {
4795		/* Consider what happens when matching ":\(.*\)"
4796		   against ":/".  I don't really understand this code
4797		   yet.  */
4798  	        p[-3] = (unsigned char) pop_failure_jump;
4799                DEBUG_PRINT1
4800                  ("  End of pattern: change to `pop_failure_jump'.\n");
4801              }
4802
4803            else if ((re_opcode_t) *p2 == exactn
4804		     || (bufp->newline_anchor && (re_opcode_t) *p2 == endline))
4805	      {
4806		register unsigned char c
4807                  = *p2 == (unsigned char) endline ? '\n' : p2[2];
4808
4809                if ((re_opcode_t) p1[3] == exactn && p1[5] != c)
4810                  {
4811  		    p[-3] = (unsigned char) pop_failure_jump;
4812                    DEBUG_PRINT3 ("  %c != %c => pop_failure_jump.\n",
4813                                  c, p1[5]);
4814                  }
4815
4816		else if ((re_opcode_t) p1[3] == charset
4817			 || (re_opcode_t) p1[3] == charset_not)
4818		  {
4819		    int not = (re_opcode_t) p1[3] == charset_not;
4820
4821		    if (c < (unsigned char) (p1[4] * BYTEWIDTH)
4822			&& p1[5 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
4823		      not = !not;
4824
4825                    /* `not' is equal to 1 if c would match, which means
4826                        that we can't change to pop_failure_jump.  */
4827		    if (!not)
4828                      {
4829  		        p[-3] = (unsigned char) pop_failure_jump;
4830                        DEBUG_PRINT1 ("  No match => pop_failure_jump.\n");
4831                      }
4832		  }
4833	      }
4834            else if ((re_opcode_t) *p2 == charset)
4835	      {
4836#ifdef DEBUG
4837		register unsigned char c
4838                  = *p2 == (unsigned char) endline ? '\n' : p2[2];
4839#endif
4840
4841#if 0
4842                if ((re_opcode_t) p1[3] == exactn
4843		    && ! ((int) p2[1] * BYTEWIDTH > (int) p1[5]
4844			  && (p2[2 + p1[5] / BYTEWIDTH]
4845			      & (1 << (p1[5] % BYTEWIDTH)))))
4846#else
4847                if ((re_opcode_t) p1[3] == exactn
4848		    && ! ((int) p2[1] * BYTEWIDTH > (int) p1[4]
4849			  && (p2[2 + p1[4] / BYTEWIDTH]
4850			      & (1 << (p1[4] % BYTEWIDTH)))))
4851#endif
4852                  {
4853  		    p[-3] = (unsigned char) pop_failure_jump;
4854                    DEBUG_PRINT3 ("  %c != %c => pop_failure_jump.\n",
4855                                  c, p1[5]);
4856                  }
4857
4858		else if ((re_opcode_t) p1[3] == charset_not)
4859		  {
4860		    int idx;
4861		    /* We win if the charset_not inside the loop
4862		       lists every character listed in the charset after.  */
4863		    for (idx = 0; idx < (int) p2[1]; idx++)
4864		      if (! (p2[2 + idx] == 0
4865			     || (idx < (int) p1[4]
4866				 && ((p2[2 + idx] & ~ p1[5 + idx]) == 0))))
4867			break;
4868
4869		    if (idx == p2[1])
4870                      {
4871  		        p[-3] = (unsigned char) pop_failure_jump;
4872                        DEBUG_PRINT1 ("  No match => pop_failure_jump.\n");
4873                      }
4874		  }
4875		else if ((re_opcode_t) p1[3] == charset)
4876		  {
4877		    int idx;
4878		    /* We win if the charset inside the loop
4879		       has no overlap with the one after the loop.  */
4880		    for (idx = 0;
4881			 idx < (int) p2[1] && idx < (int) p1[4];
4882			 idx++)
4883		      if ((p2[2 + idx] & p1[5 + idx]) != 0)
4884			break;
4885
4886		    if (idx == p2[1] || idx == p1[4])
4887                      {
4888  		        p[-3] = (unsigned char) pop_failure_jump;
4889                        DEBUG_PRINT1 ("  No match => pop_failure_jump.\n");
4890                      }
4891		  }
4892	      }
4893	  }
4894	  p -= 2;		/* Point at relative address again.  */
4895	  if ((re_opcode_t) p[-1] != pop_failure_jump)
4896	    {
4897	      p[-1] = (unsigned char) jump;
4898              DEBUG_PRINT1 ("  Match => jump.\n");
4899	      goto unconditional_jump;
4900	    }
4901        /* Note fall through.  */
4902
4903
4904	/* The end of a simple repeat has a pop_failure_jump back to
4905           its matching on_failure_jump, where the latter will push a
4906           failure point.  The pop_failure_jump takes off failure
4907           points put on by this pop_failure_jump's matching
4908           on_failure_jump; we got through the pattern to here from the
4909           matching on_failure_jump, so didn't fail.  */
4910        case pop_failure_jump:
4911          {
4912            /* We need to pass separate storage for the lowest and
4913               highest registers, even though we don't care about the
4914               actual values.  Otherwise, we will restore only one
4915               register from the stack, since lowest will == highest in
4916               `pop_failure_point'.  */
4917            active_reg_t dummy_low_reg, dummy_high_reg;
4918            unsigned char *pdummy;
4919            const char *sdummy;
4920
4921            DEBUG_PRINT1 ("EXECUTING pop_failure_jump.\n");
4922            POP_FAILURE_POINT (sdummy, pdummy,
4923                               dummy_low_reg, dummy_high_reg,
4924                               reg_dummy, reg_dummy, reg_info_dummy);
4925          }
4926	  /* Note fall through.  */
4927
4928	unconditional_jump:
4929#ifdef _LIBC
4930	  DEBUG_PRINT2 ("\n%p: ", p);
4931#else
4932	  DEBUG_PRINT2 ("\n0x%x: ", p);
4933#endif
4934          /* Note fall through.  */
4935
4936        /* Unconditionally jump (without popping any failure points).  */
4937        case jump:
4938	  EXTRACT_NUMBER_AND_INCR (mcnt, p);	/* Get the amount to jump.  */
4939          DEBUG_PRINT2 ("EXECUTING jump %d ", mcnt);
4940	  p += mcnt;				/* Do the jump.  */
4941#ifdef _LIBC
4942          DEBUG_PRINT2 ("(to %p).\n", p);
4943#else
4944          DEBUG_PRINT2 ("(to 0x%x).\n", p);
4945#endif
4946	  break;
4947
4948
4949        /* We need this opcode so we can detect where alternatives end
4950           in `group_match_null_string_p' et al.  */
4951        case jump_past_alt:
4952          DEBUG_PRINT1 ("EXECUTING jump_past_alt.\n");
4953          goto unconditional_jump;
4954
4955
4956        /* Normally, the on_failure_jump pushes a failure point, which
4957           then gets popped at pop_failure_jump.  We will end up at
4958           pop_failure_jump, also, and with a pattern of, say, `a+', we
4959           are skipping over the on_failure_jump, so we have to push
4960           something meaningless for pop_failure_jump to pop.  */
4961        case dummy_failure_jump:
4962          DEBUG_PRINT1 ("EXECUTING dummy_failure_jump.\n");
4963          /* It doesn't matter what we push for the string here.  What
4964             the code at `fail' tests is the value for the pattern.  */
4965          PUSH_FAILURE_POINT (NULL, NULL, -2);
4966          goto unconditional_jump;
4967
4968
4969        /* At the end of an alternative, we need to push a dummy failure
4970           point in case we are followed by a `pop_failure_jump', because
4971           we don't want the failure point for the alternative to be
4972           popped.  For example, matching `(a|ab)*' against `aab'
4973           requires that we match the `ab' alternative.  */
4974        case push_dummy_failure:
4975          DEBUG_PRINT1 ("EXECUTING push_dummy_failure.\n");
4976          /* See comments just above at `dummy_failure_jump' about the
4977             two zeroes.  */
4978          PUSH_FAILURE_POINT (NULL, NULL, -2);
4979          break;
4980
4981        /* Have to succeed matching what follows at least n times.
4982           After that, handle like `on_failure_jump'.  */
4983        case succeed_n:
4984          EXTRACT_NUMBER (mcnt, p + 2);
4985          DEBUG_PRINT2 ("EXECUTING succeed_n %d.\n", mcnt);
4986
4987          assert (mcnt >= 0);
4988          /* Originally, this is how many times we HAVE to succeed.  */
4989          if (mcnt > 0)
4990            {
4991               mcnt--;
4992	       p += 2;
4993               STORE_NUMBER_AND_INCR (p, mcnt);
4994#ifdef _LIBC
4995               DEBUG_PRINT3 ("  Setting %p to %d.\n", p - 2, mcnt);
4996#else
4997               DEBUG_PRINT3 ("  Setting 0x%x to %d.\n", p - 2, mcnt);
4998#endif
4999            }
5000	  else if (mcnt == 0)
5001            {
5002#ifdef _LIBC
5003              DEBUG_PRINT2 ("  Setting two bytes from %p to no_op.\n", p+2);
5004#else
5005              DEBUG_PRINT2 ("  Setting two bytes from 0x%x to no_op.\n", p+2);
5006#endif
5007	      p[2] = (unsigned char) no_op;
5008              p[3] = (unsigned char) no_op;
5009              goto on_failure;
5010            }
5011          break;
5012
5013        case jump_n:
5014          EXTRACT_NUMBER (mcnt, p + 2);
5015          DEBUG_PRINT2 ("EXECUTING jump_n %d.\n", mcnt);
5016
5017          /* Originally, this is how many times we CAN jump.  */
5018          if (mcnt)
5019            {
5020               mcnt--;
5021               STORE_NUMBER (p + 2, mcnt);
5022#ifdef _LIBC
5023               DEBUG_PRINT3 ("  Setting %p to %d.\n", p + 2, mcnt);
5024#else
5025               DEBUG_PRINT3 ("  Setting 0x%x to %d.\n", p + 2, mcnt);
5026#endif
5027	       goto unconditional_jump;
5028            }
5029          /* If don't have to jump any more, skip over the rest of command.  */
5030	  else
5031	    p += 4;
5032          break;
5033
5034	case set_number_at:
5035	  {
5036            DEBUG_PRINT1 ("EXECUTING set_number_at.\n");
5037
5038            EXTRACT_NUMBER_AND_INCR (mcnt, p);
5039            p1 = p + mcnt;
5040            EXTRACT_NUMBER_AND_INCR (mcnt, p);
5041#ifdef _LIBC
5042            DEBUG_PRINT3 ("  Setting %p to %d.\n", p1, mcnt);
5043#else
5044            DEBUG_PRINT3 ("  Setting 0x%x to %d.\n", p1, mcnt);
5045#endif
5046	    STORE_NUMBER (p1, mcnt);
5047            break;
5048          }
5049
5050#if 0
5051	/* The DEC Alpha C compiler 3.x generates incorrect code for the
5052	   test  WORDCHAR_P (d - 1) != WORDCHAR_P (d)  in the expansion of
5053	   AT_WORD_BOUNDARY, so this code is disabled.  Expanding the
5054	   macro and introducing temporary variables works around the bug.  */
5055
5056	case wordbound:
5057	  DEBUG_PRINT1 ("EXECUTING wordbound.\n");
5058	  if (AT_WORD_BOUNDARY (d))
5059	    break;
5060	  goto fail;
5061
5062	case notwordbound:
5063	  DEBUG_PRINT1 ("EXECUTING notwordbound.\n");
5064	  if (AT_WORD_BOUNDARY (d))
5065	    goto fail;
5066	  break;
5067#else
5068	case wordbound:
5069	{
5070	  boolean prevchar, thischar;
5071
5072	  DEBUG_PRINT1 ("EXECUTING wordbound.\n");
5073	  if (AT_STRINGS_BEG (d) || AT_STRINGS_END (d))
5074	    break;
5075
5076	  prevchar = WORDCHAR_P (d - 1);
5077	  thischar = WORDCHAR_P (d);
5078	  if (prevchar != thischar)
5079	    break;
5080	  goto fail;
5081	}
5082
5083      case notwordbound:
5084	{
5085	  boolean prevchar, thischar;
5086
5087	  DEBUG_PRINT1 ("EXECUTING notwordbound.\n");
5088	  if (AT_STRINGS_BEG (d) || AT_STRINGS_END (d))
5089	    goto fail;
5090
5091	  prevchar = WORDCHAR_P (d - 1);
5092	  thischar = WORDCHAR_P (d);
5093	  if (prevchar != thischar)
5094	    goto fail;
5095	  break;
5096	}
5097#endif
5098
5099	case wordbeg:
5100          DEBUG_PRINT1 ("EXECUTING wordbeg.\n");
5101	  if (WORDCHAR_P (d) && (AT_STRINGS_BEG (d) || !WORDCHAR_P (d - 1)))
5102	    break;
5103          goto fail;
5104
5105	case wordend:
5106          DEBUG_PRINT1 ("EXECUTING wordend.\n");
5107	  if (!AT_STRINGS_BEG (d) && WORDCHAR_P (d - 1)
5108              && (!WORDCHAR_P (d) || AT_STRINGS_END (d)))
5109	    break;
5110          goto fail;
5111
5112#ifdef emacs
5113  	case before_dot:
5114          DEBUG_PRINT1 ("EXECUTING before_dot.\n");
5115 	  if (PTR_CHAR_POS ((unsigned char *) d) >= point)
5116  	    goto fail;
5117  	  break;
5118
5119  	case at_dot:
5120          DEBUG_PRINT1 ("EXECUTING at_dot.\n");
5121 	  if (PTR_CHAR_POS ((unsigned char *) d) != point)
5122  	    goto fail;
5123  	  break;
5124
5125  	case after_dot:
5126          DEBUG_PRINT1 ("EXECUTING after_dot.\n");
5127          if (PTR_CHAR_POS ((unsigned char *) d) <= point)
5128  	    goto fail;
5129  	  break;
5130
5131	case syntaxspec:
5132          DEBUG_PRINT2 ("EXECUTING syntaxspec %d.\n", mcnt);
5133	  mcnt = *p++;
5134	  goto matchsyntax;
5135
5136        case wordchar:
5137          DEBUG_PRINT1 ("EXECUTING Emacs wordchar.\n");
5138	  mcnt = (int) Sword;
5139        matchsyntax:
5140	  PREFETCH ();
5141	  /* Can't use *d++ here; SYNTAX may be an unsafe macro.  */
5142	  d++;
5143	  if (SYNTAX (d[-1]) != (enum syntaxcode) mcnt)
5144	    goto fail;
5145          SET_REGS_MATCHED ();
5146	  break;
5147
5148	case notsyntaxspec:
5149          DEBUG_PRINT2 ("EXECUTING notsyntaxspec %d.\n", mcnt);
5150	  mcnt = *p++;
5151	  goto matchnotsyntax;
5152
5153        case notwordchar:
5154          DEBUG_PRINT1 ("EXECUTING Emacs notwordchar.\n");
5155	  mcnt = (int) Sword;
5156        matchnotsyntax:
5157	  PREFETCH ();
5158	  /* Can't use *d++ here; SYNTAX may be an unsafe macro.  */
5159	  d++;
5160	  if (SYNTAX (d[-1]) == (enum syntaxcode) mcnt)
5161	    goto fail;
5162	  SET_REGS_MATCHED ();
5163          break;
5164
5165#else /* not emacs */
5166	case wordchar:
5167          DEBUG_PRINT1 ("EXECUTING non-Emacs wordchar.\n");
5168	  PREFETCH ();
5169          if (!WORDCHAR_P (d))
5170            goto fail;
5171	  SET_REGS_MATCHED ();
5172          d++;
5173	  break;
5174
5175	case notwordchar:
5176          DEBUG_PRINT1 ("EXECUTING non-Emacs notwordchar.\n");
5177	  PREFETCH ();
5178	  if (WORDCHAR_P (d))
5179            goto fail;
5180          SET_REGS_MATCHED ();
5181          d++;
5182	  break;
5183#endif /* not emacs */
5184
5185        default:
5186          abort ();
5187	}
5188      continue;  /* Successfully executed one pattern command; keep going.  */
5189
5190
5191    /* We goto here if a matching operation fails. */
5192    fail:
5193      if (!FAIL_STACK_EMPTY ())
5194	{ /* A restart point is known.  Restore to that state.  */
5195          DEBUG_PRINT1 ("\nFAIL:\n");
5196          POP_FAILURE_POINT (d, p,
5197                             lowest_active_reg, highest_active_reg,
5198                             regstart, regend, reg_info);
5199
5200          /* If this failure point is a dummy, try the next one.  */
5201          if (!p)
5202	    goto fail;
5203
5204          /* If we failed to the end of the pattern, don't examine *p.  */
5205	  assert (p <= pend);
5206          if (p < pend)
5207            {
5208              boolean is_a_jump_n = false;
5209
5210              /* If failed to a backwards jump that's part of a repetition
5211                 loop, need to pop this failure point and use the next one.  */
5212              switch ((re_opcode_t) *p)
5213                {
5214                case jump_n:
5215                  is_a_jump_n = true;
5216                case maybe_pop_jump:
5217                case pop_failure_jump:
5218                case jump:
5219                  p1 = p + 1;
5220                  EXTRACT_NUMBER_AND_INCR (mcnt, p1);
5221                  p1 += mcnt;
5222
5223                  if ((is_a_jump_n && (re_opcode_t) *p1 == succeed_n)
5224                      || (!is_a_jump_n
5225                          && (re_opcode_t) *p1 == on_failure_jump))
5226                    goto fail;
5227                  break;
5228                default:
5229                  /* do nothing */ ;
5230                }
5231            }
5232
5233          if (d >= string1 && d <= end1)
5234	    dend = end_match_1;
5235        }
5236      else
5237        break;   /* Matching at this starting point really fails.  */
5238    } /* for (;;) */
5239
5240  if (best_regs_set)
5241    goto restore_best_regs;
5242
5243  FREE_VARIABLES ();
5244
5245  return -1;         			/* Failure to match.  */
5246} /* re_match_2 */
5247
5248/* Subroutine definitions for re_match_2.  */
5249
5250
5251/* We are passed P pointing to a register number after a start_memory.
5252
5253   Return true if the pattern up to the corresponding stop_memory can
5254   match the empty string, and false otherwise.
5255
5256   If we find the matching stop_memory, sets P to point to one past its number.
5257   Otherwise, sets P to an undefined byte less than or equal to END.
5258
5259   We don't handle duplicates properly (yet).  */
5260
5261static boolean
5262group_match_null_string_p (p, end, reg_info)
5263    unsigned char **p, *end;
5264    register_info_type *reg_info;
5265{
5266  int mcnt;
5267  /* Point to after the args to the start_memory.  */
5268  unsigned char *p1 = *p + 2;
5269
5270  while (p1 < end)
5271    {
5272      /* Skip over opcodes that can match nothing, and return true or
5273	 false, as appropriate, when we get to one that can't, or to the
5274         matching stop_memory.  */
5275
5276      switch ((re_opcode_t) *p1)
5277        {
5278        /* Could be either a loop or a series of alternatives.  */
5279        case on_failure_jump:
5280          p1++;
5281          EXTRACT_NUMBER_AND_INCR (mcnt, p1);
5282
5283          /* If the next operation is not a jump backwards in the
5284	     pattern.  */
5285
5286	  if (mcnt >= 0)
5287	    {
5288              /* Go through the on_failure_jumps of the alternatives,
5289                 seeing if any of the alternatives cannot match nothing.
5290                 The last alternative starts with only a jump,
5291                 whereas the rest start with on_failure_jump and end
5292                 with a jump, e.g., here is the pattern for `a|b|c':
5293
5294                 /on_failure_jump/0/6/exactn/1/a/jump_past_alt/0/6
5295                 /on_failure_jump/0/6/exactn/1/b/jump_past_alt/0/3
5296                 /exactn/1/c
5297
5298                 So, we have to first go through the first (n-1)
5299                 alternatives and then deal with the last one separately.  */
5300
5301
5302              /* Deal with the first (n-1) alternatives, which start
5303                 with an on_failure_jump (see above) that jumps to right
5304                 past a jump_past_alt.  */
5305
5306              while ((re_opcode_t) p1[mcnt-3] == jump_past_alt)
5307                {
5308                  /* `mcnt' holds how many bytes long the alternative
5309                     is, including the ending `jump_past_alt' and
5310                     its number.  */
5311
5312                  if (!alt_match_null_string_p (p1, p1 + mcnt - 3,
5313				                      reg_info))
5314                    return false;
5315
5316                  /* Move to right after this alternative, including the
5317		     jump_past_alt.  */
5318                  p1 += mcnt;
5319
5320                  /* Break if it's the beginning of an n-th alternative
5321                     that doesn't begin with an on_failure_jump.  */
5322                  if ((re_opcode_t) *p1 != on_failure_jump)
5323                    break;
5324
5325		  /* Still have to check that it's not an n-th
5326		     alternative that starts with an on_failure_jump.  */
5327		  p1++;
5328                  EXTRACT_NUMBER_AND_INCR (mcnt, p1);
5329                  if ((re_opcode_t) p1[mcnt-3] != jump_past_alt)
5330                    {
5331		      /* Get to the beginning of the n-th alternative.  */
5332                      p1 -= 3;
5333                      break;
5334                    }
5335                }
5336
5337              /* Deal with the last alternative: go back and get number
5338                 of the `jump_past_alt' just before it.  `mcnt' contains
5339                 the length of the alternative.  */
5340              EXTRACT_NUMBER (mcnt, p1 - 2);
5341
5342              if (!alt_match_null_string_p (p1, p1 + mcnt, reg_info))
5343                return false;
5344
5345              p1 += mcnt;	/* Get past the n-th alternative.  */
5346            } /* if mcnt > 0 */
5347          break;
5348
5349
5350        case stop_memory:
5351	  assert (p1[1] == **p);
5352          *p = p1 + 2;
5353          return true;
5354
5355
5356        default:
5357          if (!common_op_match_null_string_p (&p1, end, reg_info))
5358            return false;
5359        }
5360    } /* while p1 < end */
5361
5362  return false;
5363} /* group_match_null_string_p */
5364
5365
5366/* Similar to group_match_null_string_p, but doesn't deal with alternatives:
5367   It expects P to be the first byte of a single alternative and END one
5368   byte past the last. The alternative can contain groups.  */
5369
5370static boolean
5371alt_match_null_string_p (p, end, reg_info)
5372    unsigned char *p, *end;
5373    register_info_type *reg_info;
5374{
5375  int mcnt;
5376  unsigned char *p1 = p;
5377
5378  while (p1 < end)
5379    {
5380      /* Skip over opcodes that can match nothing, and break when we get
5381         to one that can't.  */
5382
5383      switch ((re_opcode_t) *p1)
5384        {
5385	/* It's a loop.  */
5386        case on_failure_jump:
5387          p1++;
5388          EXTRACT_NUMBER_AND_INCR (mcnt, p1);
5389          p1 += mcnt;
5390          break;
5391
5392	default:
5393          if (!common_op_match_null_string_p (&p1, end, reg_info))
5394            return false;
5395        }
5396    }  /* while p1 < end */
5397
5398  return true;
5399} /* alt_match_null_string_p */
5400
5401
5402/* Deals with the ops common to group_match_null_string_p and
5403   alt_match_null_string_p.
5404
5405   Sets P to one after the op and its arguments, if any.  */
5406
5407static boolean
5408common_op_match_null_string_p (p, end, reg_info)
5409    unsigned char **p, *end;
5410    register_info_type *reg_info;
5411{
5412  int mcnt;
5413  boolean ret;
5414  int reg_no;
5415  unsigned char *p1 = *p;
5416
5417  switch ((re_opcode_t) *p1++)
5418    {
5419    case no_op:
5420    case begline:
5421    case endline:
5422    case begbuf:
5423    case endbuf:
5424    case wordbeg:
5425    case wordend:
5426    case wordbound:
5427    case notwordbound:
5428#ifdef emacs
5429    case before_dot:
5430    case at_dot:
5431    case after_dot:
5432#endif
5433      break;
5434
5435    case start_memory:
5436      reg_no = *p1;
5437      assert (reg_no > 0 && reg_no <= MAX_REGNUM);
5438      ret = group_match_null_string_p (&p1, end, reg_info);
5439
5440      /* Have to set this here in case we're checking a group which
5441         contains a group and a back reference to it.  */
5442
5443      if (REG_MATCH_NULL_STRING_P (reg_info[reg_no]) == MATCH_NULL_UNSET_VALUE)
5444        REG_MATCH_NULL_STRING_P (reg_info[reg_no]) = ret;
5445
5446      if (!ret)
5447        return false;
5448      break;
5449
5450    /* If this is an optimized succeed_n for zero times, make the jump.  */
5451    case jump:
5452      EXTRACT_NUMBER_AND_INCR (mcnt, p1);
5453      if (mcnt >= 0)
5454        p1 += mcnt;
5455      else
5456        return false;
5457      break;
5458
5459    case succeed_n:
5460      /* Get to the number of times to succeed.  */
5461      p1 += 2;
5462      EXTRACT_NUMBER_AND_INCR (mcnt, p1);
5463
5464      if (mcnt == 0)
5465        {
5466          p1 -= 4;
5467          EXTRACT_NUMBER_AND_INCR (mcnt, p1);
5468          p1 += mcnt;
5469        }
5470      else
5471        return false;
5472      break;
5473
5474    case duplicate:
5475      if (!REG_MATCH_NULL_STRING_P (reg_info[*p1]))
5476        return false;
5477      break;
5478
5479    case set_number_at:
5480      p1 += 4;
5481
5482    default:
5483      /* All other opcodes mean we cannot match the empty string.  */
5484      return false;
5485  }
5486
5487  *p = p1;
5488  return true;
5489} /* common_op_match_null_string_p */
5490
5491
5492/* Return zero if TRANSLATE[S1] and TRANSLATE[S2] are identical for LEN
5493   bytes; nonzero otherwise.  */
5494
5495static int
5496bcmp_translate (s1, s2, len, translate)
5497     const char *s1, *s2;
5498     register int len;
5499     RE_TRANSLATE_TYPE translate;
5500{
5501  register const unsigned char *p1 = (const unsigned char *) s1;
5502  register const unsigned char *p2 = (const unsigned char *) s2;
5503  while (len)
5504    {
5505      if (translate[*p1++] != translate[*p2++]) return 1;
5506      len--;
5507    }
5508  return 0;
5509}
5510
5511/* Entry points for GNU code.  */
5512
5513/* re_compile_pattern is the GNU regular expression compiler: it
5514   compiles PATTERN (of length SIZE) and puts the result in BUFP.
5515   Returns 0 if the pattern was valid, otherwise an error string.
5516
5517   Assumes the `allocated' (and perhaps `buffer') and `translate' fields
5518   are set in BUFP on entry.
5519
5520   We call regex_compile to do the actual compilation.  */
5521
5522const char *
5523re_compile_pattern (pattern, length, bufp)
5524     const char *pattern;
5525     size_t length;
5526     struct re_pattern_buffer *bufp;
5527{
5528  reg_errcode_t ret;
5529
5530  /* GNU code is written to assume at least RE_NREGS registers will be set
5531     (and at least one extra will be -1).  */
5532  bufp->regs_allocated = REGS_UNALLOCATED;
5533
5534  /* And GNU code determines whether or not to get register information
5535     by passing null for the REGS argument to re_match, etc., not by
5536     setting no_sub.  */
5537  bufp->no_sub = 0;
5538
5539  /* Match anchors at newline.  */
5540  bufp->newline_anchor = 1;
5541
5542  ret = regex_compile (pattern, length, re_syntax_options, bufp);
5543
5544  if (!ret)
5545    return NULL;
5546  return gettext (re_error_msgid + re_error_msgid_idx[(int) ret]);
5547}
5548#ifdef _LIBC
5549weak_alias (__re_compile_pattern, re_compile_pattern)
5550#endif
5551
5552/* Entry points compatible with 4.2 BSD regex library.  We don't define
5553   them unless specifically requested.  */
5554
5555#if defined _REGEX_RE_COMP || defined _LIBC
5556
5557/* BSD has one and only one pattern buffer.  */
5558static struct re_pattern_buffer re_comp_buf;
5559
5560char *
5561#ifdef _LIBC
5562/* Make these definitions weak in libc, so POSIX programs can redefine
5563   these names if they don't use our functions, and still use
5564   regcomp/regexec below without link errors.  */
5565weak_function
5566#endif
5567re_comp (s)
5568    const char *s;
5569{
5570  reg_errcode_t ret;
5571
5572  if (!s)
5573    {
5574      if (!re_comp_buf.buffer)
5575	return gettext ("No previous regular expression");
5576      return 0;
5577    }
5578
5579  if (!re_comp_buf.buffer)
5580    {
5581      re_comp_buf.buffer = (unsigned char *) malloc (200);
5582      if (re_comp_buf.buffer == NULL)
5583        return (char *) gettext (re_error_msgid
5584				 + re_error_msgid_idx[(int) REG_ESPACE]);
5585      re_comp_buf.allocated = 200;
5586
5587      re_comp_buf.fastmap = (char *) malloc (1 << BYTEWIDTH);
5588      if (re_comp_buf.fastmap == NULL)
5589	return (char *) gettext (re_error_msgid
5590				 + re_error_msgid_idx[(int) REG_ESPACE]);
5591    }
5592
5593  /* Since `re_exec' always passes NULL for the `regs' argument, we
5594     don't need to initialize the pattern buffer fields which affect it.  */
5595
5596  /* Match anchors at newlines.  */
5597  re_comp_buf.newline_anchor = 1;
5598
5599  ret = regex_compile (s, strlen (s), re_syntax_options, &re_comp_buf);
5600
5601  if (!ret)
5602    return NULL;
5603
5604  /* Yes, we're discarding `const' here if !HAVE_LIBINTL.  */
5605  return (char *) gettext (re_error_msgid + re_error_msgid_idx[(int) ret]);
5606}
5607
5608
5609int
5610#ifdef _LIBC
5611weak_function
5612#endif
5613re_exec (s)
5614    const char *s;
5615{
5616  const int len = strlen (s);
5617  return
5618    0 <= re_search (&re_comp_buf, s, len, 0, len, (struct re_registers *) 0);
5619}
5620
5621#endif /* _REGEX_RE_COMP */
5622
5623/* POSIX.2 functions.  Don't define these for Emacs.  */
5624
5625#ifndef emacs
5626
5627/* regcomp takes a regular expression as a string and compiles it.
5628
5629   PREG is a regex_t *.  We do not expect any fields to be initialized,
5630   since POSIX says we shouldn't.  Thus, we set
5631
5632     `buffer' to the compiled pattern;
5633     `used' to the length of the compiled pattern;
5634     `syntax' to RE_SYNTAX_POSIX_EXTENDED if the
5635       REG_EXTENDED bit in CFLAGS is set; otherwise, to
5636       RE_SYNTAX_POSIX_BASIC;
5637     `newline_anchor' to REG_NEWLINE being set in CFLAGS;
5638     `fastmap' to an allocated space for the fastmap;
5639     `fastmap_accurate' to zero;
5640     `re_nsub' to the number of subexpressions in PATTERN.
5641
5642   PATTERN is the address of the pattern string.
5643
5644   CFLAGS is a series of bits which affect compilation.
5645
5646     If REG_EXTENDED is set, we use POSIX extended syntax; otherwise, we
5647     use POSIX basic syntax.
5648
5649     If REG_NEWLINE is set, then . and [^...] don't match newline.
5650     Also, regexec will try a match beginning after every newline.
5651
5652     If REG_ICASE is set, then we considers upper- and lowercase
5653     versions of letters to be equivalent when matching.
5654
5655     If REG_NOSUB is set, then when PREG is passed to regexec, that
5656     routine will report only success or failure, and nothing about the
5657     registers.
5658
5659   It returns 0 if it succeeds, nonzero if it doesn't.  (See regex.h for
5660   the return codes and their meanings.)  */
5661
5662int
5663regcomp (preg, pattern, cflags)
5664    regex_t *preg;
5665    const char *pattern;
5666    int cflags;
5667{
5668  reg_errcode_t ret;
5669  reg_syntax_t syntax
5670    = (cflags & REG_EXTENDED) ?
5671      RE_SYNTAX_POSIX_EXTENDED : RE_SYNTAX_POSIX_BASIC;
5672
5673  /* regex_compile will allocate the space for the compiled pattern.  */
5674  preg->buffer = 0;
5675  preg->allocated = 0;
5676  preg->used = 0;
5677
5678  /* Try to allocate space for the fastmap.  */
5679  preg->fastmap = (char *) malloc (1 << BYTEWIDTH);
5680
5681  if (cflags & REG_ICASE)
5682    {
5683      unsigned i;
5684
5685      preg->translate
5686	= (RE_TRANSLATE_TYPE) malloc (CHAR_SET_SIZE
5687				      * sizeof (*(RE_TRANSLATE_TYPE)0));
5688      if (preg->translate == NULL)
5689        return (int) REG_ESPACE;
5690
5691      /* Map uppercase characters to corresponding lowercase ones.  */
5692      for (i = 0; i < CHAR_SET_SIZE; i++)
5693        preg->translate[i] = ISUPPER (i) ? TOLOWER (i) : i;
5694    }
5695  else
5696    preg->translate = NULL;
5697
5698  /* If REG_NEWLINE is set, newlines are treated differently.  */
5699  if (cflags & REG_NEWLINE)
5700    { /* REG_NEWLINE implies neither . nor [^...] match newline.  */
5701      syntax &= ~RE_DOT_NEWLINE;
5702      syntax |= RE_HAT_LISTS_NOT_NEWLINE;
5703      /* It also changes the matching behavior.  */
5704      preg->newline_anchor = 1;
5705    }
5706  else
5707    preg->newline_anchor = 0;
5708
5709  preg->no_sub = !!(cflags & REG_NOSUB);
5710
5711  /* POSIX says a null character in the pattern terminates it, so we
5712     can use strlen here in compiling the pattern.  */
5713  ret = regex_compile (pattern, strlen (pattern), syntax, preg);
5714
5715  /* POSIX doesn't distinguish between an unmatched open-group and an
5716     unmatched close-group: both are REG_EPAREN.  */
5717  if (ret == REG_ERPAREN) ret = REG_EPAREN;
5718
5719  if (ret == REG_NOERROR && preg->fastmap)
5720    {
5721      /* Compute the fastmap now, since regexec cannot modify the pattern
5722	 buffer.  */
5723      if (re_compile_fastmap (preg) == -2)
5724	{
5725	  /* Some error occured while computing the fastmap, just forget
5726	     about it.  */
5727	  free (preg->fastmap);
5728	  preg->fastmap = NULL;
5729	}
5730    }
5731
5732  return (int) ret;
5733}
5734#ifdef _LIBC
5735weak_alias (__regcomp, regcomp)
5736#endif
5737
5738
5739/* regexec searches for a given pattern, specified by PREG, in the
5740   string STRING.
5741
5742   If NMATCH is zero or REG_NOSUB was set in the cflags argument to
5743   `regcomp', we ignore PMATCH.  Otherwise, we assume PMATCH has at
5744   least NMATCH elements, and we set them to the offsets of the
5745   corresponding matched substrings.
5746
5747   EFLAGS specifies `execution flags' which affect matching: if
5748   REG_NOTBOL is set, then ^ does not match at the beginning of the
5749   string; if REG_NOTEOL is set, then $ does not match at the end.
5750
5751   We return 0 if we find a match and REG_NOMATCH if not.  */
5752
5753int
5754regexec (preg, string, nmatch, pmatch, eflags)
5755    const regex_t *preg;
5756    const char *string;
5757    size_t nmatch;
5758    regmatch_t pmatch[];
5759    int eflags;
5760{
5761  int ret;
5762  struct re_registers regs;
5763  regex_t private_preg;
5764  int len = strlen (string);
5765  boolean want_reg_info = !preg->no_sub && nmatch > 0;
5766
5767  private_preg = *preg;
5768
5769  private_preg.not_bol = !!(eflags & REG_NOTBOL);
5770  private_preg.not_eol = !!(eflags & REG_NOTEOL);
5771
5772  /* The user has told us exactly how many registers to return
5773     information about, via `nmatch'.  We have to pass that on to the
5774     matching routines.  */
5775  private_preg.regs_allocated = REGS_FIXED;
5776
5777  if (want_reg_info)
5778    {
5779      regs.num_regs = nmatch;
5780      regs.start = TALLOC (nmatch * 2, regoff_t);
5781      if (regs.start == NULL)
5782        return (int) REG_NOMATCH;
5783      regs.end = regs.start + nmatch;
5784    }
5785
5786  /* Perform the searching operation.  */
5787  ret = re_search (&private_preg, string, len,
5788                   /* start: */ 0, /* range: */ len,
5789                   want_reg_info ? &regs : (struct re_registers *) 0);
5790
5791  /* Copy the register information to the POSIX structure.  */
5792  if (want_reg_info)
5793    {
5794      if (ret >= 0)
5795        {
5796          unsigned r;
5797
5798          for (r = 0; r < nmatch; r++)
5799            {
5800              pmatch[r].rm_so = regs.start[r];
5801              pmatch[r].rm_eo = regs.end[r];
5802            }
5803        }
5804
5805      /* If we needed the temporary register info, free the space now.  */
5806      free (regs.start);
5807    }
5808
5809  /* We want zero return to mean success, unlike `re_search'.  */
5810  return ret >= 0 ? (int) REG_NOERROR : (int) REG_NOMATCH;
5811}
5812#ifdef _LIBC
5813weak_alias (__regexec, regexec)
5814#endif
5815
5816
5817/* Returns a message corresponding to an error code, ERRCODE, returned
5818   from either regcomp or regexec.   We don't use PREG here.  */
5819
5820size_t
5821regerror (errcode, preg, errbuf, errbuf_size)
5822    int errcode;
5823    const regex_t *preg;
5824    char *errbuf;
5825    size_t errbuf_size;
5826{
5827  const char *msg;
5828  size_t msg_size;
5829
5830  if (errcode < 0
5831      || errcode >= (int) (sizeof (re_error_msgid_idx)
5832			   / sizeof (re_error_msgid_idx[0])))
5833    /* Only error codes returned by the rest of the code should be passed
5834       to this routine.  If we are given anything else, or if other regex
5835       code generates an invalid error code, then the program has a bug.
5836       Dump core so we can fix it.  */
5837    abort ();
5838
5839  msg = gettext (re_error_msgid + re_error_msgid_idx[errcode]);
5840
5841  msg_size = strlen (msg) + 1; /* Includes the null.  */
5842
5843  if (errbuf_size != 0)
5844    {
5845      if (msg_size > errbuf_size)
5846        {
5847#if defined HAVE_MEMPCPY || defined _LIBC
5848	  *((char *) __mempcpy (errbuf, msg, errbuf_size - 1)) = '\0';
5849#else
5850          memcpy (errbuf, msg, errbuf_size - 1);
5851          errbuf[errbuf_size - 1] = 0;
5852#endif
5853        }
5854      else
5855        memcpy (errbuf, msg, msg_size);
5856    }
5857
5858  return msg_size;
5859}
5860#ifdef _LIBC
5861weak_alias (__regerror, regerror)
5862#endif
5863
5864
5865/* Free dynamically allocated space used by PREG.  */
5866
5867void
5868regfree (preg)
5869    regex_t *preg;
5870{
5871  if (preg->buffer != NULL)
5872    free (preg->buffer);
5873  preg->buffer = NULL;
5874
5875  preg->allocated = 0;
5876  preg->used = 0;
5877
5878  if (preg->fastmap != NULL)
5879    free (preg->fastmap);
5880  preg->fastmap = NULL;
5881  preg->fastmap_accurate = 0;
5882
5883  if (preg->translate != NULL)
5884    free (preg->translate);
5885  preg->translate = NULL;
5886}
5887#ifdef _LIBC
5888weak_alias (__regfree, regfree)
5889#endif
5890
5891#endif /* not emacs  */
5892