regex.c revision 126209
1/* Extended regular expression matching and search library,
2   version 0.12.
3   (Implements POSIX draft P1003.2/D11.2, except for some of the
4   internationalization features.)
5   Copyright (C) 1993, 94, 95, 96, 97, 98, 99 Free Software Foundation, Inc.
6
7   The GNU C Library is free software; you can redistribute it and/or
8   modify it under the terms of the GNU Library General Public License as
9   published by the Free Software Foundation; either version 2 of the
10   License, or (at your option) any later version.
11
12   The GNU C Library is distributed in the hope that it will be useful,
13   but WITHOUT ANY WARRANTY; without even the implied warranty of
14   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15   Library General Public License for more details.
16
17   You should have received a copy of the GNU Library General Public
18   License along with the GNU C Library; see the file COPYING.LIB.  If not,
19   write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20   Boston, MA 02111-1307, USA.  */
21
22/* AIX requires this to be the first thing in the file. */
23#if defined _AIX && !defined REGEX_MALLOC
24  #pragma alloca
25#endif
26
27#undef	_GNU_SOURCE
28#define _GNU_SOURCE
29
30#ifdef HAVE_CONFIG_H
31# include <config.h>
32#endif
33
34#ifndef PARAMS
35# if defined __GNUC__ || (defined __STDC__ && __STDC__)
36#  define PARAMS(args) args
37# else
38#  define PARAMS(args) ()
39# endif  /* GCC.  */
40#endif  /* Not PARAMS.  */
41
42#if defined STDC_HEADERS && !defined emacs
43# include <stddef.h>
44#else
45/* We need this for `regex.h', and perhaps for the Emacs include files.  */
46# include <sys/types.h>
47#endif
48
49#define WIDE_CHAR_SUPPORT (HAVE_WCTYPE_H && HAVE_WCHAR_H && HAVE_BTOWC)
50
51/* For platform which support the ISO C amendement 1 functionality we
52   support user defined character classes.  */
53#if defined _LIBC || WIDE_CHAR_SUPPORT
54/* Solaris 2.5 has a bug: <wchar.h> must be included before <wctype.h>.  */
55# include <wchar.h>
56# include <wctype.h>
57#endif
58
59#ifdef _LIBC
60/* We have to keep the namespace clean.  */
61# define regfree(preg) __regfree (preg)
62# define regexec(pr, st, nm, pm, ef) __regexec (pr, st, nm, pm, ef)
63# define regcomp(preg, pattern, cflags) __regcomp (preg, pattern, cflags)
64# define regerror(errcode, preg, errbuf, errbuf_size) \
65	__regerror(errcode, preg, errbuf, errbuf_size)
66# define re_set_registers(bu, re, nu, st, en) \
67	__re_set_registers (bu, re, nu, st, en)
68# define re_match_2(bufp, string1, size1, string2, size2, pos, regs, stop) \
69	__re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
70# define re_match(bufp, string, size, pos, regs) \
71	__re_match (bufp, string, size, pos, regs)
72# define re_search(bufp, string, size, startpos, range, regs) \
73	__re_search (bufp, string, size, startpos, range, regs)
74# define re_compile_pattern(pattern, length, bufp) \
75	__re_compile_pattern (pattern, length, bufp)
76# define re_set_syntax(syntax) __re_set_syntax (syntax)
77# define re_search_2(bufp, st1, s1, st2, s2, startpos, range, regs, stop) \
78	__re_search_2 (bufp, st1, s1, st2, s2, startpos, range, regs, stop)
79# define re_compile_fastmap(bufp) __re_compile_fastmap (bufp)
80
81#define btowc __btowc
82#endif
83
84
85#ifndef _
86/* This is for other GNU distributions with internationalized messages.
87   When compiling libc, the _ and N_ macros are predefined.  */
88# ifdef HAVE_LIBINTL_H
89#  include <libintl.h>
90# else
91#  define gettext(msgid) (msgid)
92# endif
93# define N_(msgid) (msgid)
94#endif
95
96/* The `emacs' switch turns on certain matching commands
97   that make sense only in Emacs. */
98#ifdef emacs
99
100# include "lisp.h"
101# include "buffer.h"
102# include "syntax.h"
103
104#else  /* not emacs */
105
106/* If we are not linking with Emacs proper,
107   we can't use the relocating allocator
108   even if config.h says that we can.  */
109# undef REL_ALLOC
110
111# if defined STDC_HEADERS || defined _LIBC
112#  include <stdlib.h>
113# else
114char *malloc ();
115char *realloc ();
116# endif
117
118/* When used in Emacs's lib-src, we need to get bzero and bcopy somehow.
119   If nothing else has been done, use the method below.  */
120# ifdef INHIBIT_STRING_HEADER
121#  if !(defined HAVE_BZERO && defined HAVE_BCOPY)
122#   if !defined bzero && !defined bcopy
123#    undef INHIBIT_STRING_HEADER
124#   endif
125#  endif
126# endif
127
128/* This is the normal way of making sure we have a bcopy and a bzero.
129   This is used in most programs--a few other programs avoid this
130   by defining INHIBIT_STRING_HEADER.  */
131# ifndef INHIBIT_STRING_HEADER
132#  if defined HAVE_STRING_H || defined STDC_HEADERS || defined _LIBC
133#   include <string.h>
134#   ifndef bzero
135#    ifndef _LIBC
136#     define bzero(s, n)	(memset (s, '\0', n), (s))
137#    else
138#     define bzero(s, n)	__bzero (s, n)
139#    endif
140#   endif
141#  else
142#   include <strings.h>
143#   ifndef memcmp
144#    define memcmp(s1, s2, n)	bcmp (s1, s2, n)
145#   endif
146#   ifndef memcpy
147#    define memcpy(d, s, n)	(bcopy (s, d, n), (d))
148#   endif
149#  endif
150# endif
151
152/* Define the syntax stuff for \<, \>, etc.  */
153
154/* This must be nonzero for the wordchar and notwordchar pattern
155   commands in re_match_2.  */
156# ifndef Sword
157#  define Sword 1
158# endif
159
160# ifdef SWITCH_ENUM_BUG
161#  define SWITCH_ENUM_CAST(x) ((int)(x))
162# else
163#  define SWITCH_ENUM_CAST(x) (x)
164# endif
165
166#endif /* not emacs */
167
168/* Get the interface, including the syntax bits.  */
169#include <regex.h>
170
171/* isalpha etc. are used for the character classes.  */
172#include <ctype.h>
173
174/* Jim Meyering writes:
175
176   "... Some ctype macros are valid only for character codes that
177   isascii says are ASCII (SGI's IRIX-4.0.5 is one such system --when
178   using /bin/cc or gcc but without giving an ansi option).  So, all
179   ctype uses should be through macros like ISPRINT...  If
180   STDC_HEADERS is defined, then autoconf has verified that the ctype
181   macros don't need to be guarded with references to isascii. ...
182   Defining isascii to 1 should let any compiler worth its salt
183   eliminate the && through constant folding."
184   Solaris defines some of these symbols so we must undefine them first.  */
185
186#undef ISASCII
187#if defined STDC_HEADERS || (!defined isascii && !defined HAVE_ISASCII)
188# define ISASCII(c) 1
189#else
190# define ISASCII(c) isascii(c)
191#endif
192
193#ifdef isblank
194# define ISBLANK(c) (ISASCII (c) && isblank (c))
195#else
196# define ISBLANK(c) ((c) == ' ' || (c) == '\t')
197#endif
198#ifdef isgraph
199# define ISGRAPH(c) (ISASCII (c) && isgraph (c))
200#else
201# define ISGRAPH(c) (ISASCII (c) && isprint (c) && !isspace (c))
202#endif
203
204#undef ISPRINT
205#define ISPRINT(c) (ISASCII (c) && isprint (c))
206#define ISDIGIT(c) (ISASCII (c) && isdigit (c))
207#define ISALNUM(c) (ISASCII (c) && isalnum (c))
208#define ISALPHA(c) (ISASCII (c) && isalpha (c))
209#define ISCNTRL(c) (ISASCII (c) && iscntrl (c))
210#define ISLOWER(c) (ISASCII (c) && islower (c))
211#define ISPUNCT(c) (ISASCII (c) && ispunct (c))
212#define ISSPACE(c) (ISASCII (c) && isspace (c))
213#define ISUPPER(c) (ISASCII (c) && isupper (c))
214#define ISXDIGIT(c) (ISASCII (c) && isxdigit (c))
215
216#ifdef _tolower
217# define TOLOWER(c) _tolower(c)
218#else
219# define TOLOWER(c) tolower(c)
220#endif
221
222#ifndef NULL
223# define NULL (void *)0
224#endif
225
226/* We remove any previous definition of `SIGN_EXTEND_CHAR',
227   since ours (we hope) works properly with all combinations of
228   machines, compilers, `char' and `unsigned char' argument types.
229   (Per Bothner suggested the basic approach.)  */
230#undef SIGN_EXTEND_CHAR
231#if __STDC__
232# define SIGN_EXTEND_CHAR(c) ((signed char) (c))
233#else  /* not __STDC__ */
234/* As in Harbison and Steele.  */
235# define SIGN_EXTEND_CHAR(c) ((((unsigned char) (c)) ^ 128) - 128)
236#endif
237
238#ifndef emacs
239/* How many characters in the character set.  */
240# define CHAR_SET_SIZE 256
241
242# ifdef SYNTAX_TABLE
243
244extern char *re_syntax_table;
245
246# else /* not SYNTAX_TABLE */
247
248static char re_syntax_table[CHAR_SET_SIZE];
249
250static void
251init_syntax_once ()
252{
253   register int c;
254   static int done = 0;
255
256   if (done)
257     return;
258   bzero (re_syntax_table, sizeof re_syntax_table);
259
260   for (c = 0; c < CHAR_SET_SIZE; ++c)
261     if (ISALNUM (c))
262	re_syntax_table[c] = Sword;
263
264   re_syntax_table['_'] = Sword;
265
266   done = 1;
267}
268
269# endif /* not SYNTAX_TABLE */
270
271# define SYNTAX(c) re_syntax_table[((c) & 0xFF)]
272
273#endif /* emacs */
274
275/* Should we use malloc or alloca?  If REGEX_MALLOC is not defined, we
276   use `alloca' instead of `malloc'.  This is because using malloc in
277   re_search* or re_match* could cause memory leaks when C-g is used in
278   Emacs; also, malloc is slower and causes storage fragmentation.  On
279   the other hand, malloc is more portable, and easier to debug.
280
281   Because we sometimes use alloca, some routines have to be macros,
282   not functions -- `alloca'-allocated space disappears at the end of the
283   function it is called in.  */
284
285#ifdef REGEX_MALLOC
286
287# define REGEX_ALLOCATE malloc
288# define REGEX_REALLOCATE(source, osize, nsize) realloc (source, nsize)
289# define REGEX_FREE free
290
291#else /* not REGEX_MALLOC  */
292
293/* Emacs already defines alloca, sometimes.  */
294# ifndef alloca
295
296/* Make alloca work the best possible way.  */
297#  ifdef __GNUC__
298#   define alloca __builtin_alloca
299#  else /* not __GNUC__ */
300#   if HAVE_ALLOCA_H
301#    include <alloca.h>
302#   endif /* HAVE_ALLOCA_H */
303#  endif /* not __GNUC__ */
304
305# endif /* not alloca */
306
307# define REGEX_ALLOCATE alloca
308
309/* Assumes a `char *destination' variable.  */
310# define REGEX_REALLOCATE(source, osize, nsize)				\
311  (destination = (char *) alloca (nsize),				\
312   memcpy (destination, source, osize))
313
314/* No need to do anything to free, after alloca.  */
315# define REGEX_FREE(arg) ((void)0) /* Do nothing!  But inhibit gcc warning.  */
316
317#endif /* not REGEX_MALLOC */
318
319/* Define how to allocate the failure stack.  */
320
321#if defined REL_ALLOC && defined REGEX_MALLOC
322
323# define REGEX_ALLOCATE_STACK(size)				\
324  r_alloc (&failure_stack_ptr, (size))
325# define REGEX_REALLOCATE_STACK(source, osize, nsize)		\
326  r_re_alloc (&failure_stack_ptr, (nsize))
327# define REGEX_FREE_STACK(ptr)					\
328  r_alloc_free (&failure_stack_ptr)
329
330#else /* not using relocating allocator */
331
332# ifdef REGEX_MALLOC
333
334#  define REGEX_ALLOCATE_STACK malloc
335#  define REGEX_REALLOCATE_STACK(source, osize, nsize) realloc (source, nsize)
336#  define REGEX_FREE_STACK free
337
338# else /* not REGEX_MALLOC */
339
340#  define REGEX_ALLOCATE_STACK alloca
341
342#  define REGEX_REALLOCATE_STACK(source, osize, nsize)			\
343   REGEX_REALLOCATE (source, osize, nsize)
344/* No need to explicitly free anything.  */
345#  define REGEX_FREE_STACK(arg)
346
347# endif /* not REGEX_MALLOC */
348#endif /* not using relocating allocator */
349
350
351/* True if `size1' is non-NULL and PTR is pointing anywhere inside
352   `string1' or just past its end.  This works if PTR is NULL, which is
353   a good thing.  */
354#define FIRST_STRING_P(ptr) 					\
355  (size1 && string1 <= (ptr) && (ptr) <= string1 + size1)
356
357/* (Re)Allocate N items of type T using malloc, or fail.  */
358#define TALLOC(n, t) ((t *) malloc ((n) * sizeof (t)))
359#define RETALLOC(addr, n, t) ((addr) = (t *) realloc (addr, (n) * sizeof (t)))
360#define RETALLOC_IF(addr, n, t) \
361  if (addr) RETALLOC((addr), (n), t); else (addr) = TALLOC ((n), t)
362#define REGEX_TALLOC(n, t) ((t *) REGEX_ALLOCATE ((n) * sizeof (t)))
363
364#define BYTEWIDTH 8 /* In bits.  */
365
366#define STREQ(s1, s2) ((strcmp (s1, s2) == 0))
367
368#undef MAX
369#undef MIN
370#define MAX(a, b) ((a) > (b) ? (a) : (b))
371#define MIN(a, b) ((a) < (b) ? (a) : (b))
372
373typedef char boolean;
374#define false 0
375#define true 1
376
377static int re_match_2_internal PARAMS ((struct re_pattern_buffer *bufp,
378					const char *string1, int size1,
379					const char *string2, int size2,
380					int pos,
381					struct re_registers *regs,
382					int stop));
383
384/* These are the command codes that appear in compiled regular
385   expressions.  Some opcodes are followed by argument bytes.  A
386   command code can specify any interpretation whatsoever for its
387   arguments.  Zero bytes may appear in the compiled regular expression.  */
388
389typedef enum
390{
391  no_op = 0,
392
393  /* Succeed right away--no more backtracking.  */
394  succeed,
395
396        /* Followed by one byte giving n, then by n literal bytes.  */
397  exactn,
398
399        /* Matches any (more or less) character.  */
400  anychar,
401
402        /* Matches any one char belonging to specified set.  First
403           following byte is number of bitmap bytes.  Then come bytes
404           for a bitmap saying which chars are in.  Bits in each byte
405           are ordered low-bit-first.  A character is in the set if its
406           bit is 1.  A character too large to have a bit in the map is
407           automatically not in the set.  */
408  charset,
409
410        /* Same parameters as charset, but match any character that is
411           not one of those specified.  */
412  charset_not,
413
414        /* Start remembering the text that is matched, for storing in a
415           register.  Followed by one byte with the register number, in
416           the range 0 to one less than the pattern buffer's re_nsub
417           field.  Then followed by one byte with the number of groups
418           inner to this one.  (This last has to be part of the
419           start_memory only because we need it in the on_failure_jump
420           of re_match_2.)  */
421  start_memory,
422
423        /* Stop remembering the text that is matched and store it in a
424           memory register.  Followed by one byte with the register
425           number, in the range 0 to one less than `re_nsub' in the
426           pattern buffer, and one byte with the number of inner groups,
427           just like `start_memory'.  (We need the number of inner
428           groups here because we don't have any easy way of finding the
429           corresponding start_memory when we're at a stop_memory.)  */
430  stop_memory,
431
432        /* Match a duplicate of something remembered. Followed by one
433           byte containing the register number.  */
434  duplicate,
435
436        /* Fail unless at beginning of line.  */
437  begline,
438
439        /* Fail unless at end of line.  */
440  endline,
441
442        /* Succeeds if at beginning of buffer (if emacs) or at beginning
443           of string to be matched (if not).  */
444  begbuf,
445
446        /* Analogously, for end of buffer/string.  */
447  endbuf,
448
449        /* Followed by two byte relative address to which to jump.  */
450  jump,
451
452	/* Same as jump, but marks the end of an alternative.  */
453  jump_past_alt,
454
455        /* Followed by two-byte relative address of place to resume at
456           in case of failure.  */
457  on_failure_jump,
458
459        /* Like on_failure_jump, but pushes a placeholder instead of the
460           current string position when executed.  */
461  on_failure_keep_string_jump,
462
463        /* Throw away latest failure point and then jump to following
464           two-byte relative address.  */
465  pop_failure_jump,
466
467        /* Change to pop_failure_jump if know won't have to backtrack to
468           match; otherwise change to jump.  This is used to jump
469           back to the beginning of a repeat.  If what follows this jump
470           clearly won't match what the repeat does, such that we can be
471           sure that there is no use backtracking out of repetitions
472           already matched, then we change it to a pop_failure_jump.
473           Followed by two-byte address.  */
474  maybe_pop_jump,
475
476        /* Jump to following two-byte address, and push a dummy failure
477           point. This failure point will be thrown away if an attempt
478           is made to use it for a failure.  A `+' construct makes this
479           before the first repeat.  Also used as an intermediary kind
480           of jump when compiling an alternative.  */
481  dummy_failure_jump,
482
483	/* Push a dummy failure point and continue.  Used at the end of
484	   alternatives.  */
485  push_dummy_failure,
486
487        /* Followed by two-byte relative address and two-byte number n.
488           After matching N times, jump to the address upon failure.  */
489  succeed_n,
490
491        /* Followed by two-byte relative address, and two-byte number n.
492           Jump to the address N times, then fail.  */
493  jump_n,
494
495        /* Set the following two-byte relative address to the
496           subsequent two-byte number.  The address *includes* the two
497           bytes of number.  */
498  set_number_at,
499
500  wordchar,	/* Matches any word-constituent character.  */
501  notwordchar,	/* Matches any char that is not a word-constituent.  */
502
503  wordbeg,	/* Succeeds if at word beginning.  */
504  wordend,	/* Succeeds if at word end.  */
505
506  wordbound,	/* Succeeds if at a word boundary.  */
507  notwordbound	/* Succeeds if not at a word boundary.  */
508
509#ifdef emacs
510  ,before_dot,	/* Succeeds if before point.  */
511  at_dot,	/* Succeeds if at point.  */
512  after_dot,	/* Succeeds if after point.  */
513
514	/* Matches any character whose syntax is specified.  Followed by
515           a byte which contains a syntax code, e.g., Sword.  */
516  syntaxspec,
517
518	/* Matches any character whose syntax is not that specified.  */
519  notsyntaxspec
520#endif /* emacs */
521} re_opcode_t;
522
523/* Common operations on the compiled pattern.  */
524
525/* Store NUMBER in two contiguous bytes starting at DESTINATION.  */
526
527#define STORE_NUMBER(destination, number)				\
528  do {									\
529    (destination)[0] = (number) & 0377;					\
530    (destination)[1] = (number) >> 8;					\
531  } while (0)
532
533/* Same as STORE_NUMBER, except increment DESTINATION to
534   the byte after where the number is stored.  Therefore, DESTINATION
535   must be an lvalue.  */
536
537#define STORE_NUMBER_AND_INCR(destination, number)			\
538  do {									\
539    STORE_NUMBER (destination, number);					\
540    (destination) += 2;							\
541  } while (0)
542
543/* Put into DESTINATION a number stored in two contiguous bytes starting
544   at SOURCE.  */
545
546#define EXTRACT_NUMBER(destination, source)				\
547  do {									\
548    (destination) = *(source) & 0377;					\
549    (destination) += SIGN_EXTEND_CHAR (*((source) + 1)) << 8;		\
550  } while (0)
551
552#ifdef DEBUG
553static void extract_number _RE_ARGS ((int *dest, unsigned char *source));
554static void
555extract_number (dest, source)
556    int *dest;
557    unsigned char *source;
558{
559  int temp = SIGN_EXTEND_CHAR (*(source + 1));
560  *dest = *source & 0377;
561  *dest += temp << 8;
562}
563
564# ifndef EXTRACT_MACROS /* To debug the macros.  */
565#  undef EXTRACT_NUMBER
566#  define EXTRACT_NUMBER(dest, src) extract_number (&dest, src)
567# endif /* not EXTRACT_MACROS */
568
569#endif /* DEBUG */
570
571/* Same as EXTRACT_NUMBER, except increment SOURCE to after the number.
572   SOURCE must be an lvalue.  */
573
574#define EXTRACT_NUMBER_AND_INCR(destination, source)			\
575  do {									\
576    EXTRACT_NUMBER (destination, source);				\
577    (source) += 2; 							\
578  } while (0)
579
580#ifdef DEBUG
581static void extract_number_and_incr _RE_ARGS ((int *destination,
582					       unsigned char **source));
583static void
584extract_number_and_incr (destination, source)
585    int *destination;
586    unsigned char **source;
587{
588  extract_number (destination, *source);
589  *source += 2;
590}
591
592# ifndef EXTRACT_MACROS
593#  undef EXTRACT_NUMBER_AND_INCR
594#  define EXTRACT_NUMBER_AND_INCR(dest, src) \
595  extract_number_and_incr (&dest, &src)
596# endif /* not EXTRACT_MACROS */
597
598#endif /* DEBUG */
599
600/* If DEBUG is defined, Regex prints many voluminous messages about what
601   it is doing (if the variable `debug' is nonzero).  If linked with the
602   main program in `iregex.c', you can enter patterns and strings
603   interactively.  And if linked with the main program in `main.c' and
604   the other test files, you can run the already-written tests.  */
605
606#ifdef DEBUG
607
608/* We use standard I/O for debugging.  */
609# include <stdio.h>
610
611/* It is useful to test things that ``must'' be true when debugging.  */
612# include <assert.h>
613
614static int debug;
615
616# define DEBUG_STATEMENT(e) e
617# define DEBUG_PRINT1(x) if (debug) printf (x)
618# define DEBUG_PRINT2(x1, x2) if (debug) printf (x1, x2)
619# define DEBUG_PRINT3(x1, x2, x3) if (debug) printf (x1, x2, x3)
620# define DEBUG_PRINT4(x1, x2, x3, x4) if (debug) printf (x1, x2, x3, x4)
621# define DEBUG_PRINT_COMPILED_PATTERN(p, s, e) 				\
622  if (debug) print_partial_compiled_pattern (s, e)
623# define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)			\
624  if (debug) print_double_string (w, s1, sz1, s2, sz2)
625
626
627/* Print the fastmap in human-readable form.  */
628
629void
630print_fastmap (fastmap)
631    char *fastmap;
632{
633  unsigned was_a_range = 0;
634  unsigned i = 0;
635
636  while (i < (1 << BYTEWIDTH))
637    {
638      if (fastmap[i++])
639	{
640	  was_a_range = 0;
641          putchar (i - 1);
642          while (i < (1 << BYTEWIDTH)  &&  fastmap[i])
643            {
644              was_a_range = 1;
645              i++;
646            }
647	  if (was_a_range)
648            {
649              printf ("-");
650              putchar (i - 1);
651            }
652        }
653    }
654  putchar ('\n');
655}
656
657
658/* Print a compiled pattern string in human-readable form, starting at
659   the START pointer into it and ending just before the pointer END.  */
660
661void
662print_partial_compiled_pattern (start, end)
663    unsigned char *start;
664    unsigned char *end;
665{
666  int mcnt, mcnt2;
667  unsigned char *p1;
668  unsigned char *p = start;
669  unsigned char *pend = end;
670
671  if (start == NULL)
672    {
673      printf ("(null)\n");
674      return;
675    }
676
677  /* Loop over pattern commands.  */
678  while (p < pend)
679    {
680      printf ("%d:\t", p - start);
681
682      switch ((re_opcode_t) *p++)
683	{
684        case no_op:
685          printf ("/no_op");
686          break;
687
688	case exactn:
689	  mcnt = *p++;
690          printf ("/exactn/%d", mcnt);
691          do
692	    {
693              putchar ('/');
694	      putchar (*p++);
695            }
696          while (--mcnt);
697          break;
698
699	case start_memory:
700          mcnt = *p++;
701          printf ("/start_memory/%d/%d", mcnt, *p++);
702          break;
703
704	case stop_memory:
705          mcnt = *p++;
706	  printf ("/stop_memory/%d/%d", mcnt, *p++);
707          break;
708
709	case duplicate:
710	  printf ("/duplicate/%d", *p++);
711	  break;
712
713	case anychar:
714	  printf ("/anychar");
715	  break;
716
717	case charset:
718        case charset_not:
719          {
720            register int c, last = -100;
721	    register int in_range = 0;
722
723	    printf ("/charset [%s",
724	            (re_opcode_t) *(p - 1) == charset_not ? "^" : "");
725
726            assert (p + *p < pend);
727
728            for (c = 0; c < 256; c++)
729	      if (c / 8 < *p
730		  && (p[1 + (c/8)] & (1 << (c % 8))))
731		{
732		  /* Are we starting a range?  */
733		  if (last + 1 == c && ! in_range)
734		    {
735		      putchar ('-');
736		      in_range = 1;
737		    }
738		  /* Have we broken a range?  */
739		  else if (last + 1 != c && in_range)
740              {
741		      putchar (last);
742		      in_range = 0;
743		    }
744
745		  if (! in_range)
746		    putchar (c);
747
748		  last = c;
749              }
750
751	    if (in_range)
752	      putchar (last);
753
754	    putchar (']');
755
756	    p += 1 + *p;
757	  }
758	  break;
759
760	case begline:
761	  printf ("/begline");
762          break;
763
764	case endline:
765          printf ("/endline");
766          break;
767
768	case on_failure_jump:
769          extract_number_and_incr (&mcnt, &p);
770  	  printf ("/on_failure_jump to %d", p + mcnt - start);
771          break;
772
773	case on_failure_keep_string_jump:
774          extract_number_and_incr (&mcnt, &p);
775  	  printf ("/on_failure_keep_string_jump to %d", p + mcnt - start);
776          break;
777
778	case dummy_failure_jump:
779          extract_number_and_incr (&mcnt, &p);
780  	  printf ("/dummy_failure_jump to %d", p + mcnt - start);
781          break;
782
783	case push_dummy_failure:
784          printf ("/push_dummy_failure");
785          break;
786
787        case maybe_pop_jump:
788          extract_number_and_incr (&mcnt, &p);
789  	  printf ("/maybe_pop_jump to %d", p + mcnt - start);
790	  break;
791
792        case pop_failure_jump:
793	  extract_number_and_incr (&mcnt, &p);
794  	  printf ("/pop_failure_jump to %d", p + mcnt - start);
795	  break;
796
797        case jump_past_alt:
798	  extract_number_and_incr (&mcnt, &p);
799  	  printf ("/jump_past_alt to %d", p + mcnt - start);
800	  break;
801
802        case jump:
803	  extract_number_and_incr (&mcnt, &p);
804  	  printf ("/jump to %d", p + mcnt - start);
805	  break;
806
807        case succeed_n:
808          extract_number_and_incr (&mcnt, &p);
809	  p1 = p + mcnt;
810          extract_number_and_incr (&mcnt2, &p);
811	  printf ("/succeed_n to %d, %d times", p1 - start, mcnt2);
812          break;
813
814        case jump_n:
815          extract_number_and_incr (&mcnt, &p);
816	  p1 = p + mcnt;
817          extract_number_and_incr (&mcnt2, &p);
818	  printf ("/jump_n to %d, %d times", p1 - start, mcnt2);
819          break;
820
821        case set_number_at:
822          extract_number_and_incr (&mcnt, &p);
823	  p1 = p + mcnt;
824          extract_number_and_incr (&mcnt2, &p);
825	  printf ("/set_number_at location %d to %d", p1 - start, mcnt2);
826          break;
827
828        case wordbound:
829	  printf ("/wordbound");
830	  break;
831
832	case notwordbound:
833	  printf ("/notwordbound");
834          break;
835
836	case wordbeg:
837	  printf ("/wordbeg");
838	  break;
839
840	case wordend:
841	  printf ("/wordend");
842
843# ifdef emacs
844	case before_dot:
845	  printf ("/before_dot");
846          break;
847
848	case at_dot:
849	  printf ("/at_dot");
850          break;
851
852	case after_dot:
853	  printf ("/after_dot");
854          break;
855
856	case syntaxspec:
857          printf ("/syntaxspec");
858	  mcnt = *p++;
859	  printf ("/%d", mcnt);
860          break;
861
862	case notsyntaxspec:
863          printf ("/notsyntaxspec");
864	  mcnt = *p++;
865	  printf ("/%d", mcnt);
866	  break;
867# endif /* emacs */
868
869	case wordchar:
870	  printf ("/wordchar");
871          break;
872
873	case notwordchar:
874	  printf ("/notwordchar");
875          break;
876
877	case begbuf:
878	  printf ("/begbuf");
879          break;
880
881	case endbuf:
882	  printf ("/endbuf");
883          break;
884
885        default:
886          printf ("?%d", *(p-1));
887	}
888
889      putchar ('\n');
890    }
891
892  printf ("%d:\tend of pattern.\n", p - start);
893}
894
895
896void
897print_compiled_pattern (bufp)
898    struct re_pattern_buffer *bufp;
899{
900  unsigned char *buffer = bufp->buffer;
901
902  print_partial_compiled_pattern (buffer, buffer + bufp->used);
903  printf ("%ld bytes used/%ld bytes allocated.\n",
904	  bufp->used, bufp->allocated);
905
906  if (bufp->fastmap_accurate && bufp->fastmap)
907    {
908      printf ("fastmap: ");
909      print_fastmap (bufp->fastmap);
910    }
911
912  printf ("re_nsub: %d\t", bufp->re_nsub);
913  printf ("regs_alloc: %d\t", bufp->regs_allocated);
914  printf ("can_be_null: %d\t", bufp->can_be_null);
915  printf ("newline_anchor: %d\n", bufp->newline_anchor);
916  printf ("no_sub: %d\t", bufp->no_sub);
917  printf ("not_bol: %d\t", bufp->not_bol);
918  printf ("not_eol: %d\t", bufp->not_eol);
919  printf ("syntax: %lx\n", bufp->syntax);
920  /* Perhaps we should print the translate table?  */
921}
922
923
924void
925print_double_string (where, string1, size1, string2, size2)
926    const char *where;
927    const char *string1;
928    const char *string2;
929    int size1;
930    int size2;
931{
932  int this_char;
933
934  if (where == NULL)
935    printf ("(null)");
936  else
937    {
938      if (FIRST_STRING_P (where))
939        {
940          for (this_char = where - string1; this_char < size1; this_char++)
941            putchar (string1[this_char]);
942
943          where = string2;
944        }
945
946      for (this_char = where - string2; this_char < size2; this_char++)
947        putchar (string2[this_char]);
948    }
949}
950
951void
952printchar (c)
953     int c;
954{
955  putc (c, stderr);
956}
957
958#else /* not DEBUG */
959
960# undef assert
961# define assert(e)
962
963# define DEBUG_STATEMENT(e)
964# define DEBUG_PRINT1(x)
965# define DEBUG_PRINT2(x1, x2)
966# define DEBUG_PRINT3(x1, x2, x3)
967# define DEBUG_PRINT4(x1, x2, x3, x4)
968# define DEBUG_PRINT_COMPILED_PATTERN(p, s, e)
969# define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)
970
971#endif /* not DEBUG */
972
973/* Set by `re_set_syntax' to the current regexp syntax to recognize.  Can
974   also be assigned to arbitrarily: each pattern buffer stores its own
975   syntax, so it can be changed between regex compilations.  */
976/* This has no initializer because initialized variables in Emacs
977   become read-only after dumping.  */
978reg_syntax_t re_syntax_options;
979
980
981/* Specify the precise syntax of regexps for compilation.  This provides
982   for compatibility for various utilities which historically have
983   different, incompatible syntaxes.
984
985   The argument SYNTAX is a bit mask comprised of the various bits
986   defined in regex.h.  We return the old syntax.  */
987
988reg_syntax_t
989re_set_syntax (syntax)
990    reg_syntax_t syntax;
991{
992  reg_syntax_t ret = re_syntax_options;
993
994  re_syntax_options = syntax;
995#ifdef DEBUG
996  if (syntax & RE_DEBUG)
997    debug = 1;
998  else if (debug) /* was on but now is not */
999    debug = 0;
1000#endif /* DEBUG */
1001  return ret;
1002}
1003#ifdef _LIBC
1004weak_alias (__re_set_syntax, re_set_syntax)
1005#endif
1006
1007/* This table gives an error message for each of the error codes listed
1008   in regex.h.  Obviously the order here has to be same as there.
1009   POSIX doesn't require that we do anything for REG_NOERROR,
1010   but why not be nice?  */
1011
1012#if 0
1013  /* This section is for xgettext; it sees the strings wrapped inside
1014     N_() and marks them as needing translation.  They should match
1015     the strings in re_error_msgid.  We can't use the usual string
1016     concatenation trick to initialize re_error_msgid, since other GNU
1017     distributions use this file with traditional C, and traditional C
1018     lacks string concatenation.  */
1019  N_("Success") /* REG_NOERROR */
1020  N_("No match") /* REG_NOMATCH */
1021  N_("Invalid regular expression") /* REG_BADPAT */
1022  N_("Invalid collation character") /* REG_ECOLLATE */
1023  N_("Invalid character class name") /* REG_ECTYPE */
1024  N_("Trailing backslash") /* REG_EESCAPE */
1025  N_("Invalid back reference") /* REG_ESUBREG */
1026  N_("Unmatched [ or [^") /* REG_EBRACK */
1027  N_("Unmatched ( or \\(") /* REG_EPAREN */
1028  N_("Unmatched \\{") /* REG_EBRACE */
1029  N_("Invalid content of \\{\\}") /* REG_BADBR */
1030  N_("Invalid range end") /* REG_ERANGE */
1031  N_("Memory exhausted") /* REG_ESPACE */
1032  N_("Invalid preceding regular expression") /* REG_BADRPT */
1033  N_("Premature end of regular expression") /* REG_EEND */
1034  N_("Regular expression too big") /* REG_ESIZE */
1035  N_("Unmatched ) or \\)") /* REG_ERPAREN */
1036#endif
1037
1038static const char re_error_msgid[] = "\
1039Success\0\
1040No match\0\
1041Invalid regular expression\0\
1042Invalid collation character\0\
1043Invalid character class name\0\
1044Trailing backslash\0\
1045Invalid back reference\0\
1046Unmatched [ or [^\0\
1047Unmatched ( or \\(\0\
1048Unmatched \\{\0\
1049Invalid content of \\{\\}\0\
1050Invalid range end\0\
1051Memory exhausted\0\
1052Invalid preceding regular expression\0\
1053Premature end of regular expression\0\
1054Regular expression too big\0\
1055Unmatched ) or \\)";
1056
1057#define REG_NOERROR_IDX	0
1058#define REG_NOMATCH_IDX (REG_NOERROR_IDX + sizeof "Success")
1059#define REG_BADPAT_IDX	(REG_NOMATCH_IDX + sizeof "No match")
1060#define REG_ECOLLATE_IDX (REG_BADPAT_IDX + sizeof "Invalid regular expression")
1061#define REG_ECTYPE_IDX	(REG_ECOLLATE_IDX + sizeof "Invalid collation character")
1062#define REG_EESCAPE_IDX	(REG_ECTYPE_IDX + sizeof "Invalid character class name")
1063#define REG_ESUBREG_IDX	(REG_EESCAPE_IDX + sizeof "Trailing backslash")
1064#define REG_EBRACK_IDX	(REG_ESUBREG_IDX + sizeof "Invalid back reference")
1065#define REG_EPAREN_IDX	(REG_EBRACK_IDX + sizeof "Unmatched [ or [^")
1066#define REG_EBRACE_IDX	(REG_EPAREN_IDX + sizeof "Unmatched ( or \\(")
1067#define REG_BADBR_IDX	(REG_EBRACE_IDX + sizeof "Unmatched \\{")
1068#define REG_ERANGE_IDX	(REG_BADBR_IDX + sizeof "Invalid content of \\{\\}")
1069#define REG_ESPACE_IDX	(REG_ERANGE_IDX + sizeof "Invalid range end")
1070#define REG_BADRPT_IDX	(REG_ESPACE_IDX + sizeof "Memory exhausted")
1071#define REG_EEND_IDX	(REG_BADRPT_IDX + sizeof "Invalid preceding regular expression")
1072#define REG_ESIZE_IDX	(REG_EEND_IDX + sizeof "Premature end of regular expression")
1073#define REG_ERPAREN_IDX	(REG_ESIZE_IDX + sizeof "Regular expression too big")
1074
1075static const size_t re_error_msgid_idx[] =
1076  {
1077    REG_NOERROR_IDX,
1078    REG_NOMATCH_IDX,
1079    REG_BADPAT_IDX,
1080    REG_ECOLLATE_IDX,
1081    REG_ECTYPE_IDX,
1082    REG_EESCAPE_IDX,
1083    REG_ESUBREG_IDX,
1084    REG_EBRACK_IDX,
1085    REG_EPAREN_IDX,
1086    REG_EBRACE_IDX,
1087    REG_BADBR_IDX,
1088    REG_ERANGE_IDX,
1089    REG_ESPACE_IDX,
1090    REG_BADRPT_IDX,
1091    REG_EEND_IDX,
1092    REG_ESIZE_IDX,
1093    REG_ERPAREN_IDX
1094  };
1095
1096/* Avoiding alloca during matching, to placate r_alloc.  */
1097
1098/* Define MATCH_MAY_ALLOCATE unless we need to make sure that the
1099   searching and matching functions should not call alloca.  On some
1100   systems, alloca is implemented in terms of malloc, and if we're
1101   using the relocating allocator routines, then malloc could cause a
1102   relocation, which might (if the strings being searched are in the
1103   ralloc heap) shift the data out from underneath the regexp
1104   routines.
1105
1106   Here's another reason to avoid allocation: Emacs
1107   processes input from X in a signal handler; processing X input may
1108   call malloc; if input arrives while a matching routine is calling
1109   malloc, then we're scrod.  But Emacs can't just block input while
1110   calling matching routines; then we don't notice interrupts when
1111   they come in.  So, Emacs blocks input around all regexp calls
1112   except the matching calls, which it leaves unprotected, in the
1113   faith that they will not malloc.  */
1114
1115/* Normally, this is fine.  */
1116#define MATCH_MAY_ALLOCATE
1117
1118/* When using GNU C, we are not REALLY using the C alloca, no matter
1119   what config.h may say.  So don't take precautions for it.  */
1120#ifdef __GNUC__
1121# undef C_ALLOCA
1122#endif
1123
1124/* The match routines may not allocate if (1) they would do it with malloc
1125   and (2) it's not safe for them to use malloc.
1126   Note that if REL_ALLOC is defined, matching would not use malloc for the
1127   failure stack, but we would still use it for the register vectors;
1128   so REL_ALLOC should not affect this.  */
1129#if (defined C_ALLOCA || defined REGEX_MALLOC) && defined emacs
1130# undef MATCH_MAY_ALLOCATE
1131#endif
1132
1133
1134/* Failure stack declarations and macros; both re_compile_fastmap and
1135   re_match_2 use a failure stack.  These have to be macros because of
1136   REGEX_ALLOCATE_STACK.  */
1137
1138
1139/* Number of failure points for which to initially allocate space
1140   when matching.  If this number is exceeded, we allocate more
1141   space, so it is not a hard limit.  */
1142#ifndef INIT_FAILURE_ALLOC
1143# define INIT_FAILURE_ALLOC 5
1144#endif
1145
1146/* Roughly the maximum number of failure points on the stack.  Would be
1147   exactly that if always used MAX_FAILURE_ITEMS items each time we failed.
1148   This is a variable only so users of regex can assign to it; we never
1149   change it ourselves.  */
1150
1151#ifdef INT_IS_16BIT
1152
1153# if defined MATCH_MAY_ALLOCATE
1154/* 4400 was enough to cause a crash on Alpha OSF/1,
1155   whose default stack limit is 2mb.  */
1156long int re_max_failures = 4000;
1157# else
1158long int re_max_failures = 2000;
1159# endif
1160
1161union fail_stack_elt
1162{
1163  unsigned char *pointer;
1164  long int integer;
1165};
1166
1167typedef union fail_stack_elt fail_stack_elt_t;
1168
1169typedef struct
1170{
1171  fail_stack_elt_t *stack;
1172  unsigned long int size;
1173  unsigned long int avail;		/* Offset of next open position.  */
1174} fail_stack_type;
1175
1176#else /* not INT_IS_16BIT */
1177
1178# if defined MATCH_MAY_ALLOCATE
1179/* 4400 was enough to cause a crash on Alpha OSF/1,
1180   whose default stack limit is 2mb.  */
1181int re_max_failures = 20000;
1182# else
1183int re_max_failures = 2000;
1184# endif
1185
1186union fail_stack_elt
1187{
1188  unsigned char *pointer;
1189  int integer;
1190};
1191
1192typedef union fail_stack_elt fail_stack_elt_t;
1193
1194typedef struct
1195{
1196  fail_stack_elt_t *stack;
1197  unsigned size;
1198  unsigned avail;			/* Offset of next open position.  */
1199} fail_stack_type;
1200
1201#endif /* INT_IS_16BIT */
1202
1203#define FAIL_STACK_EMPTY()     (fail_stack.avail == 0)
1204#define FAIL_STACK_PTR_EMPTY() (fail_stack_ptr->avail == 0)
1205#define FAIL_STACK_FULL()      (fail_stack.avail == fail_stack.size)
1206
1207
1208/* Define macros to initialize and free the failure stack.
1209   Do `return -2' if the alloc fails.  */
1210
1211#ifdef MATCH_MAY_ALLOCATE
1212# define INIT_FAIL_STACK()						\
1213  do {									\
1214    fail_stack.stack = (fail_stack_elt_t *)				\
1215      REGEX_ALLOCATE_STACK (INIT_FAILURE_ALLOC * sizeof (fail_stack_elt_t)); \
1216									\
1217    if (fail_stack.stack == NULL)					\
1218      return -2;							\
1219									\
1220    fail_stack.size = INIT_FAILURE_ALLOC;				\
1221    fail_stack.avail = 0;						\
1222  } while (0)
1223
1224# define RESET_FAIL_STACK()  REGEX_FREE_STACK (fail_stack.stack)
1225#else
1226# define INIT_FAIL_STACK()						\
1227  do {									\
1228    fail_stack.avail = 0;						\
1229  } while (0)
1230
1231# define RESET_FAIL_STACK()
1232#endif
1233
1234
1235/* Double the size of FAIL_STACK, up to approximately `re_max_failures' items.
1236
1237   Return 1 if succeeds, and 0 if either ran out of memory
1238   allocating space for it or it was already too large.
1239
1240   REGEX_REALLOCATE_STACK requires `destination' be declared.   */
1241
1242#define DOUBLE_FAIL_STACK(fail_stack)					\
1243  ((fail_stack).size > (unsigned) (re_max_failures * MAX_FAILURE_ITEMS)	\
1244   ? 0									\
1245   : ((fail_stack).stack = (fail_stack_elt_t *)				\
1246        REGEX_REALLOCATE_STACK ((fail_stack).stack, 			\
1247          (fail_stack).size * sizeof (fail_stack_elt_t),		\
1248          ((fail_stack).size << 1) * sizeof (fail_stack_elt_t)),	\
1249									\
1250      (fail_stack).stack == NULL					\
1251      ? 0								\
1252      : ((fail_stack).size <<= 1, 					\
1253         1)))
1254
1255
1256/* Push pointer POINTER on FAIL_STACK.
1257   Return 1 if was able to do so and 0 if ran out of memory allocating
1258   space to do so.  */
1259#define PUSH_PATTERN_OP(POINTER, FAIL_STACK)				\
1260  ((FAIL_STACK_FULL ()							\
1261    && !DOUBLE_FAIL_STACK (FAIL_STACK))					\
1262   ? 0									\
1263   : ((FAIL_STACK).stack[(FAIL_STACK).avail++].pointer = POINTER,	\
1264      1))
1265
1266/* Push a pointer value onto the failure stack.
1267   Assumes the variable `fail_stack'.  Probably should only
1268   be called from within `PUSH_FAILURE_POINT'.  */
1269#define PUSH_FAILURE_POINTER(item)					\
1270  fail_stack.stack[fail_stack.avail++].pointer = (unsigned char *) (item)
1271
1272/* This pushes an integer-valued item onto the failure stack.
1273   Assumes the variable `fail_stack'.  Probably should only
1274   be called from within `PUSH_FAILURE_POINT'.  */
1275#define PUSH_FAILURE_INT(item)					\
1276  fail_stack.stack[fail_stack.avail++].integer = (item)
1277
1278/* Push a fail_stack_elt_t value onto the failure stack.
1279   Assumes the variable `fail_stack'.  Probably should only
1280   be called from within `PUSH_FAILURE_POINT'.  */
1281#define PUSH_FAILURE_ELT(item)					\
1282  fail_stack.stack[fail_stack.avail++] =  (item)
1283
1284/* These three POP... operations complement the three PUSH... operations.
1285   All assume that `fail_stack' is nonempty.  */
1286#define POP_FAILURE_POINTER() fail_stack.stack[--fail_stack.avail].pointer
1287#define POP_FAILURE_INT() fail_stack.stack[--fail_stack.avail].integer
1288#define POP_FAILURE_ELT() fail_stack.stack[--fail_stack.avail]
1289
1290/* Used to omit pushing failure point id's when we're not debugging.  */
1291#ifdef DEBUG
1292# define DEBUG_PUSH PUSH_FAILURE_INT
1293# define DEBUG_POP(item_addr) *(item_addr) = POP_FAILURE_INT ()
1294#else
1295# define DEBUG_PUSH(item)
1296# define DEBUG_POP(item_addr)
1297#endif
1298
1299
1300/* Push the information about the state we will need
1301   if we ever fail back to it.
1302
1303   Requires variables fail_stack, regstart, regend, reg_info, and
1304   num_regs_pushed be declared.  DOUBLE_FAIL_STACK requires `destination'
1305   be declared.
1306
1307   Does `return FAILURE_CODE' if runs out of memory.  */
1308
1309#define PUSH_FAILURE_POINT(pattern_place, string_place, failure_code)	\
1310  do {									\
1311    char *destination;							\
1312    /* Must be int, so when we don't save any registers, the arithmetic	\
1313       of 0 + -1 isn't done as unsigned.  */				\
1314    /* Can't be int, since there is not a shred of a guarantee that int	\
1315       is wide enough to hold a value of something to which pointer can	\
1316       be assigned */							\
1317    active_reg_t this_reg;						\
1318    									\
1319    DEBUG_STATEMENT (failure_id++);					\
1320    DEBUG_STATEMENT (nfailure_points_pushed++);				\
1321    DEBUG_PRINT2 ("\nPUSH_FAILURE_POINT #%u:\n", failure_id);		\
1322    DEBUG_PRINT2 ("  Before push, next avail: %d\n", (fail_stack).avail);\
1323    DEBUG_PRINT2 ("                     size: %d\n", (fail_stack).size);\
1324									\
1325    DEBUG_PRINT2 ("  slots needed: %ld\n", NUM_FAILURE_ITEMS);		\
1326    DEBUG_PRINT2 ("     available: %d\n", REMAINING_AVAIL_SLOTS);	\
1327									\
1328    /* Ensure we have enough space allocated for what we will push.  */	\
1329    while (REMAINING_AVAIL_SLOTS < NUM_FAILURE_ITEMS)			\
1330      {									\
1331        if (!DOUBLE_FAIL_STACK (fail_stack))				\
1332          return failure_code;						\
1333									\
1334        DEBUG_PRINT2 ("\n  Doubled stack; size now: %d\n",		\
1335		       (fail_stack).size);				\
1336        DEBUG_PRINT2 ("  slots available: %d\n", REMAINING_AVAIL_SLOTS);\
1337      }									\
1338									\
1339    /* Push the info, starting with the registers.  */			\
1340    DEBUG_PRINT1 ("\n");						\
1341									\
1342    if (1)								\
1343      for (this_reg = lowest_active_reg; this_reg <= highest_active_reg; \
1344	   this_reg++)							\
1345	{								\
1346	  DEBUG_PRINT2 ("  Pushing reg: %lu\n", this_reg);		\
1347	  DEBUG_STATEMENT (num_regs_pushed++);				\
1348									\
1349	  DEBUG_PRINT2 ("    start: %p\n", regstart[this_reg]);		\
1350	  PUSH_FAILURE_POINTER (regstart[this_reg]);			\
1351									\
1352	  DEBUG_PRINT2 ("    end: %p\n", regend[this_reg]);		\
1353	  PUSH_FAILURE_POINTER (regend[this_reg]);			\
1354									\
1355	  DEBUG_PRINT2 ("    info: %p\n      ",				\
1356			reg_info[this_reg].word.pointer);		\
1357	  DEBUG_PRINT2 (" match_null=%d",				\
1358			REG_MATCH_NULL_STRING_P (reg_info[this_reg]));	\
1359	  DEBUG_PRINT2 (" active=%d", IS_ACTIVE (reg_info[this_reg]));	\
1360	  DEBUG_PRINT2 (" matched_something=%d",			\
1361			MATCHED_SOMETHING (reg_info[this_reg]));	\
1362	  DEBUG_PRINT2 (" ever_matched=%d",				\
1363			EVER_MATCHED_SOMETHING (reg_info[this_reg]));	\
1364	  DEBUG_PRINT1 ("\n");						\
1365	  PUSH_FAILURE_ELT (reg_info[this_reg].word);			\
1366	}								\
1367									\
1368    DEBUG_PRINT2 ("  Pushing  low active reg: %ld\n", lowest_active_reg);\
1369    PUSH_FAILURE_INT (lowest_active_reg);				\
1370									\
1371    DEBUG_PRINT2 ("  Pushing high active reg: %ld\n", highest_active_reg);\
1372    PUSH_FAILURE_INT (highest_active_reg);				\
1373									\
1374    DEBUG_PRINT2 ("  Pushing pattern %p:\n", pattern_place);		\
1375    DEBUG_PRINT_COMPILED_PATTERN (bufp, pattern_place, pend);		\
1376    PUSH_FAILURE_POINTER (pattern_place);				\
1377									\
1378    DEBUG_PRINT2 ("  Pushing string %p: `", string_place);		\
1379    DEBUG_PRINT_DOUBLE_STRING (string_place, string1, size1, string2,   \
1380				 size2);				\
1381    DEBUG_PRINT1 ("'\n");						\
1382    PUSH_FAILURE_POINTER (string_place);				\
1383									\
1384    DEBUG_PRINT2 ("  Pushing failure id: %u\n", failure_id);		\
1385    DEBUG_PUSH (failure_id);						\
1386  } while (0)
1387
1388/* This is the number of items that are pushed and popped on the stack
1389   for each register.  */
1390#define NUM_REG_ITEMS  3
1391
1392/* Individual items aside from the registers.  */
1393#ifdef DEBUG
1394# define NUM_NONREG_ITEMS 5 /* Includes failure point id.  */
1395#else
1396# define NUM_NONREG_ITEMS 4
1397#endif
1398
1399/* We push at most this many items on the stack.  */
1400/* We used to use (num_regs - 1), which is the number of registers
1401   this regexp will save; but that was changed to 5
1402   to avoid stack overflow for a regexp with lots of parens.  */
1403#define MAX_FAILURE_ITEMS (5 * NUM_REG_ITEMS + NUM_NONREG_ITEMS)
1404
1405/* We actually push this many items.  */
1406#define NUM_FAILURE_ITEMS				\
1407  (((0							\
1408     ? 0 : highest_active_reg - lowest_active_reg + 1)	\
1409    * NUM_REG_ITEMS)					\
1410   + NUM_NONREG_ITEMS)
1411
1412/* How many items can still be added to the stack without overflowing it.  */
1413#define REMAINING_AVAIL_SLOTS ((fail_stack).size - (fail_stack).avail)
1414
1415
1416/* Pops what PUSH_FAIL_STACK pushes.
1417
1418   We restore into the parameters, all of which should be lvalues:
1419     STR -- the saved data position.
1420     PAT -- the saved pattern position.
1421     LOW_REG, HIGH_REG -- the highest and lowest active registers.
1422     REGSTART, REGEND -- arrays of string positions.
1423     REG_INFO -- array of information about each subexpression.
1424
1425   Also assumes the variables `fail_stack' and (if debugging), `bufp',
1426   `pend', `string1', `size1', `string2', and `size2'.  */
1427
1428#define POP_FAILURE_POINT(str, pat, low_reg, high_reg, regstart, regend, reg_info)\
1429{									\
1430  DEBUG_STATEMENT (unsigned failure_id;)				\
1431  active_reg_t this_reg;						\
1432  const unsigned char *string_temp;					\
1433									\
1434  assert (!FAIL_STACK_EMPTY ());					\
1435									\
1436  /* Remove failure points and point to how many regs pushed.  */	\
1437  DEBUG_PRINT1 ("POP_FAILURE_POINT:\n");				\
1438  DEBUG_PRINT2 ("  Before pop, next avail: %d\n", fail_stack.avail);	\
1439  DEBUG_PRINT2 ("                    size: %d\n", fail_stack.size);	\
1440									\
1441  assert (fail_stack.avail >= NUM_NONREG_ITEMS);			\
1442									\
1443  DEBUG_POP (&failure_id);						\
1444  DEBUG_PRINT2 ("  Popping failure id: %u\n", failure_id);		\
1445									\
1446  /* If the saved string location is NULL, it came from an		\
1447     on_failure_keep_string_jump opcode, and we want to throw away the	\
1448     saved NULL, thus retaining our current position in the string.  */	\
1449  string_temp = POP_FAILURE_POINTER ();					\
1450  if (string_temp != NULL)						\
1451    str = (const char *) string_temp;					\
1452									\
1453  DEBUG_PRINT2 ("  Popping string %p: `", str);				\
1454  DEBUG_PRINT_DOUBLE_STRING (str, string1, size1, string2, size2);	\
1455  DEBUG_PRINT1 ("'\n");							\
1456									\
1457  pat = (unsigned char *) POP_FAILURE_POINTER ();			\
1458  DEBUG_PRINT2 ("  Popping pattern %p:\n", pat);			\
1459  DEBUG_PRINT_COMPILED_PATTERN (bufp, pat, pend);			\
1460									\
1461  /* Restore register info.  */						\
1462  high_reg = (active_reg_t) POP_FAILURE_INT ();				\
1463  DEBUG_PRINT2 ("  Popping high active reg: %ld\n", high_reg);		\
1464									\
1465  low_reg = (active_reg_t) POP_FAILURE_INT ();				\
1466  DEBUG_PRINT2 ("  Popping  low active reg: %ld\n", low_reg);		\
1467									\
1468  if (1)								\
1469    for (this_reg = high_reg; this_reg >= low_reg; this_reg--)		\
1470      {									\
1471	DEBUG_PRINT2 ("    Popping reg: %ld\n", this_reg);		\
1472									\
1473	reg_info[this_reg].word = POP_FAILURE_ELT ();			\
1474	DEBUG_PRINT2 ("      info: %p\n",				\
1475		      reg_info[this_reg].word.pointer);			\
1476									\
1477	regend[this_reg] = (const char *) POP_FAILURE_POINTER ();	\
1478	DEBUG_PRINT2 ("      end: %p\n", regend[this_reg]);		\
1479									\
1480	regstart[this_reg] = (const char *) POP_FAILURE_POINTER ();	\
1481	DEBUG_PRINT2 ("      start: %p\n", regstart[this_reg]);		\
1482      }									\
1483  else									\
1484    {									\
1485      for (this_reg = highest_active_reg; this_reg > high_reg; this_reg--) \
1486	{								\
1487	  reg_info[this_reg].word.integer = 0;				\
1488	  regend[this_reg] = 0;						\
1489	  regstart[this_reg] = 0;					\
1490	}								\
1491      highest_active_reg = high_reg;					\
1492    }									\
1493									\
1494  set_regs_matched_done = 0;						\
1495  DEBUG_STATEMENT (nfailure_points_popped++);				\
1496} /* POP_FAILURE_POINT */
1497
1498
1499
1500/* Structure for per-register (a.k.a. per-group) information.
1501   Other register information, such as the
1502   starting and ending positions (which are addresses), and the list of
1503   inner groups (which is a bits list) are maintained in separate
1504   variables.
1505
1506   We are making a (strictly speaking) nonportable assumption here: that
1507   the compiler will pack our bit fields into something that fits into
1508   the type of `word', i.e., is something that fits into one item on the
1509   failure stack.  */
1510
1511
1512/* Declarations and macros for re_match_2.  */
1513
1514typedef union
1515{
1516  fail_stack_elt_t word;
1517  struct
1518  {
1519      /* This field is one if this group can match the empty string,
1520         zero if not.  If not yet determined,  `MATCH_NULL_UNSET_VALUE'.  */
1521#define MATCH_NULL_UNSET_VALUE 3
1522    unsigned match_null_string_p : 2;
1523    unsigned is_active : 1;
1524    unsigned matched_something : 1;
1525    unsigned ever_matched_something : 1;
1526  } bits;
1527} register_info_type;
1528
1529#define REG_MATCH_NULL_STRING_P(R)  ((R).bits.match_null_string_p)
1530#define IS_ACTIVE(R)  ((R).bits.is_active)
1531#define MATCHED_SOMETHING(R)  ((R).bits.matched_something)
1532#define EVER_MATCHED_SOMETHING(R)  ((R).bits.ever_matched_something)
1533
1534
1535/* Call this when have matched a real character; it sets `matched' flags
1536   for the subexpressions which we are currently inside.  Also records
1537   that those subexprs have matched.  */
1538#define SET_REGS_MATCHED()						\
1539  do									\
1540    {									\
1541      if (!set_regs_matched_done)					\
1542	{								\
1543	  active_reg_t r;						\
1544	  set_regs_matched_done = 1;					\
1545	  for (r = lowest_active_reg; r <= highest_active_reg; r++)	\
1546	    {								\
1547	      MATCHED_SOMETHING (reg_info[r])				\
1548		= EVER_MATCHED_SOMETHING (reg_info[r])			\
1549		= 1;							\
1550	    }								\
1551	}								\
1552    }									\
1553  while (0)
1554
1555/* Registers are set to a sentinel when they haven't yet matched.  */
1556static char reg_unset_dummy;
1557#define REG_UNSET_VALUE (&reg_unset_dummy)
1558#define REG_UNSET(e) ((e) == REG_UNSET_VALUE)
1559
1560/* Subroutine declarations and macros for regex_compile.  */
1561
1562static reg_errcode_t regex_compile _RE_ARGS ((const char *pattern, size_t size,
1563					      reg_syntax_t syntax,
1564					      struct re_pattern_buffer *bufp));
1565static void store_op1 _RE_ARGS ((re_opcode_t op, unsigned char *loc, int arg));
1566static void store_op2 _RE_ARGS ((re_opcode_t op, unsigned char *loc,
1567				 int arg1, int arg2));
1568static void insert_op1 _RE_ARGS ((re_opcode_t op, unsigned char *loc,
1569				  int arg, unsigned char *end));
1570static void insert_op2 _RE_ARGS ((re_opcode_t op, unsigned char *loc,
1571				  int arg1, int arg2, unsigned char *end));
1572static boolean at_begline_loc_p _RE_ARGS ((const char *pattern, const char *p,
1573					   reg_syntax_t syntax));
1574static boolean at_endline_loc_p _RE_ARGS ((const char *p, const char *pend,
1575					   reg_syntax_t syntax));
1576static reg_errcode_t compile_range _RE_ARGS ((const char **p_ptr,
1577					      const char *pend,
1578					      char *translate,
1579					      reg_syntax_t syntax,
1580					      unsigned char *b));
1581
1582/* Fetch the next character in the uncompiled pattern---translating it
1583   if necessary.  Also cast from a signed character in the constant
1584   string passed to us by the user to an unsigned char that we can use
1585   as an array index (in, e.g., `translate').  */
1586#ifndef PATFETCH
1587# define PATFETCH(c)							\
1588  do {if (p == pend) return REG_EEND;					\
1589    c = (unsigned char) *p++;						\
1590    if (translate) c = (unsigned char) translate[c];			\
1591  } while (0)
1592#endif
1593
1594/* Fetch the next character in the uncompiled pattern, with no
1595   translation.  */
1596#define PATFETCH_RAW(c)							\
1597  do {if (p == pend) return REG_EEND;					\
1598    c = (unsigned char) *p++; 						\
1599  } while (0)
1600
1601/* Go backwards one character in the pattern.  */
1602#define PATUNFETCH p--
1603
1604
1605/* If `translate' is non-null, return translate[D], else just D.  We
1606   cast the subscript to translate because some data is declared as
1607   `char *', to avoid warnings when a string constant is passed.  But
1608   when we use a character as a subscript we must make it unsigned.  */
1609#ifndef TRANSLATE
1610# define TRANSLATE(d) \
1611  (translate ? (char) translate[(unsigned char) (d)] : (d))
1612#endif
1613
1614
1615/* Macros for outputting the compiled pattern into `buffer'.  */
1616
1617/* If the buffer isn't allocated when it comes in, use this.  */
1618#define INIT_BUF_SIZE  32
1619
1620/* Make sure we have at least N more bytes of space in buffer.  */
1621#define GET_BUFFER_SPACE(n)						\
1622    while ((unsigned long) (b - bufp->buffer + (n)) > bufp->allocated)	\
1623      EXTEND_BUFFER ()
1624
1625/* Make sure we have one more byte of buffer space and then add C to it.  */
1626#define BUF_PUSH(c)							\
1627  do {									\
1628    GET_BUFFER_SPACE (1);						\
1629    *b++ = (unsigned char) (c);						\
1630  } while (0)
1631
1632
1633/* Ensure we have two more bytes of buffer space and then append C1 and C2.  */
1634#define BUF_PUSH_2(c1, c2)						\
1635  do {									\
1636    GET_BUFFER_SPACE (2);						\
1637    *b++ = (unsigned char) (c1);					\
1638    *b++ = (unsigned char) (c2);					\
1639  } while (0)
1640
1641
1642/* As with BUF_PUSH_2, except for three bytes.  */
1643#define BUF_PUSH_3(c1, c2, c3)						\
1644  do {									\
1645    GET_BUFFER_SPACE (3);						\
1646    *b++ = (unsigned char) (c1);					\
1647    *b++ = (unsigned char) (c2);					\
1648    *b++ = (unsigned char) (c3);					\
1649  } while (0)
1650
1651
1652/* Store a jump with opcode OP at LOC to location TO.  We store a
1653   relative address offset by the three bytes the jump itself occupies.  */
1654#define STORE_JUMP(op, loc, to) \
1655  store_op1 (op, loc, (int) ((to) - (loc) - 3))
1656
1657/* Likewise, for a two-argument jump.  */
1658#define STORE_JUMP2(op, loc, to, arg) \
1659  store_op2 (op, loc, (int) ((to) - (loc) - 3), arg)
1660
1661/* Like `STORE_JUMP', but for inserting.  Assume `b' is the buffer end.  */
1662#define INSERT_JUMP(op, loc, to) \
1663  insert_op1 (op, loc, (int) ((to) - (loc) - 3), b)
1664
1665/* Like `STORE_JUMP2', but for inserting.  Assume `b' is the buffer end.  */
1666#define INSERT_JUMP2(op, loc, to, arg) \
1667  insert_op2 (op, loc, (int) ((to) - (loc) - 3), arg, b)
1668
1669
1670/* This is not an arbitrary limit: the arguments which represent offsets
1671   into the pattern are two bytes long.  So if 2^16 bytes turns out to
1672   be too small, many things would have to change.  */
1673/* Any other compiler which, like MSC, has allocation limit below 2^16
1674   bytes will have to use approach similar to what was done below for
1675   MSC and drop MAX_BUF_SIZE a bit.  Otherwise you may end up
1676   reallocating to 0 bytes.  Such thing is not going to work too well.
1677   You have been warned!!  */
1678#if defined _MSC_VER  && !defined WIN32
1679/* Microsoft C 16-bit versions limit malloc to approx 65512 bytes.
1680   The REALLOC define eliminates a flurry of conversion warnings,
1681   but is not required. */
1682# define MAX_BUF_SIZE  65500L
1683# define REALLOC(p,s) realloc ((p), (size_t) (s))
1684#else
1685# define MAX_BUF_SIZE (1L << 16)
1686# define REALLOC(p,s) realloc ((p), (s))
1687#endif
1688
1689/* Extend the buffer by twice its current size via realloc and
1690   reset the pointers that pointed into the old block to point to the
1691   correct places in the new one.  If extending the buffer results in it
1692   being larger than MAX_BUF_SIZE, then flag memory exhausted.  */
1693#define EXTEND_BUFFER()							\
1694  do { 									\
1695    unsigned char *old_buffer = bufp->buffer;				\
1696    if (bufp->allocated == MAX_BUF_SIZE) 				\
1697      return REG_ESIZE;							\
1698    bufp->allocated <<= 1;						\
1699    if (bufp->allocated > MAX_BUF_SIZE)					\
1700      bufp->allocated = MAX_BUF_SIZE; 					\
1701    bufp->buffer = (unsigned char *) REALLOC (bufp->buffer, bufp->allocated);\
1702    if (bufp->buffer == NULL)						\
1703      return REG_ESPACE;						\
1704    /* If the buffer moved, move all the pointers into it.  */		\
1705    if (old_buffer != bufp->buffer)					\
1706      {									\
1707        b = (b - old_buffer) + bufp->buffer;				\
1708        begalt = (begalt - old_buffer) + bufp->buffer;			\
1709        if (fixup_alt_jump)						\
1710          fixup_alt_jump = (fixup_alt_jump - old_buffer) + bufp->buffer;\
1711        if (laststart)							\
1712          laststart = (laststart - old_buffer) + bufp->buffer;		\
1713        if (pending_exact)						\
1714          pending_exact = (pending_exact - old_buffer) + bufp->buffer;	\
1715      }									\
1716  } while (0)
1717
1718
1719/* Since we have one byte reserved for the register number argument to
1720   {start,stop}_memory, the maximum number of groups we can report
1721   things about is what fits in that byte.  */
1722#define MAX_REGNUM 255
1723
1724/* But patterns can have more than `MAX_REGNUM' registers.  We just
1725   ignore the excess.  */
1726typedef unsigned regnum_t;
1727
1728
1729/* Macros for the compile stack.  */
1730
1731/* Since offsets can go either forwards or backwards, this type needs to
1732   be able to hold values from -(MAX_BUF_SIZE - 1) to MAX_BUF_SIZE - 1.  */
1733/* int may be not enough when sizeof(int) == 2.  */
1734typedef long pattern_offset_t;
1735
1736typedef struct
1737{
1738  pattern_offset_t begalt_offset;
1739  pattern_offset_t fixup_alt_jump;
1740  pattern_offset_t inner_group_offset;
1741  pattern_offset_t laststart_offset;
1742  regnum_t regnum;
1743} compile_stack_elt_t;
1744
1745
1746typedef struct
1747{
1748  compile_stack_elt_t *stack;
1749  unsigned size;
1750  unsigned avail;			/* Offset of next open position.  */
1751} compile_stack_type;
1752
1753
1754#define INIT_COMPILE_STACK_SIZE 32
1755
1756#define COMPILE_STACK_EMPTY  (compile_stack.avail == 0)
1757#define COMPILE_STACK_FULL  (compile_stack.avail == compile_stack.size)
1758
1759/* The next available element.  */
1760#define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail])
1761
1762
1763/* Set the bit for character C in a list.  */
1764#define SET_LIST_BIT(c)                               \
1765  (b[((unsigned char) (c)) / BYTEWIDTH]               \
1766   |= 1 << (((unsigned char) c) % BYTEWIDTH))
1767
1768
1769/* Get the next unsigned number in the uncompiled pattern.  */
1770#define GET_UNSIGNED_NUMBER(num) 					\
1771  { if (p != pend)							\
1772     {									\
1773       PATFETCH (c); 							\
1774       while ('0' <= c && c <= '9')					\
1775         { 								\
1776           if (num < 0)							\
1777              num = 0;							\
1778           num = num * 10 + c - '0'; 					\
1779           if (p == pend) 						\
1780              break; 							\
1781           PATFETCH (c);						\
1782         } 								\
1783       } 								\
1784    }
1785
1786#if defined _LIBC || WIDE_CHAR_SUPPORT
1787/* The GNU C library provides support for user-defined character classes
1788   and the functions from ISO C amendement 1.  */
1789# ifdef CHARCLASS_NAME_MAX
1790#  define CHAR_CLASS_MAX_LENGTH CHARCLASS_NAME_MAX
1791# else
1792/* This shouldn't happen but some implementation might still have this
1793   problem.  Use a reasonable default value.  */
1794#  define CHAR_CLASS_MAX_LENGTH 256
1795# endif
1796
1797# ifdef _LIBC
1798#  define IS_CHAR_CLASS(string) __wctype (string)
1799# else
1800#  define IS_CHAR_CLASS(string) wctype (string)
1801# endif
1802#else
1803# define CHAR_CLASS_MAX_LENGTH  6 /* Namely, `xdigit'.  */
1804
1805# define IS_CHAR_CLASS(string)						\
1806   (STREQ (string, "alpha") || STREQ (string, "upper")			\
1807    || STREQ (string, "lower") || STREQ (string, "digit")		\
1808    || STREQ (string, "alnum") || STREQ (string, "xdigit")		\
1809    || STREQ (string, "space") || STREQ (string, "print")		\
1810    || STREQ (string, "punct") || STREQ (string, "graph")		\
1811    || STREQ (string, "cntrl") || STREQ (string, "blank"))
1812#endif
1813
1814#ifndef MATCH_MAY_ALLOCATE
1815
1816/* If we cannot allocate large objects within re_match_2_internal,
1817   we make the fail stack and register vectors global.
1818   The fail stack, we grow to the maximum size when a regexp
1819   is compiled.
1820   The register vectors, we adjust in size each time we
1821   compile a regexp, according to the number of registers it needs.  */
1822
1823static fail_stack_type fail_stack;
1824
1825/* Size with which the following vectors are currently allocated.
1826   That is so we can make them bigger as needed,
1827   but never make them smaller.  */
1828static int regs_allocated_size;
1829
1830static const char **     regstart, **     regend;
1831static const char ** old_regstart, ** old_regend;
1832static const char **best_regstart, **best_regend;
1833static register_info_type *reg_info;
1834static const char **reg_dummy;
1835static register_info_type *reg_info_dummy;
1836
1837/* Make the register vectors big enough for NUM_REGS registers,
1838   but don't make them smaller.  */
1839
1840static
1841regex_grow_registers (num_regs)
1842     int num_regs;
1843{
1844  if (num_regs > regs_allocated_size)
1845    {
1846      RETALLOC_IF (regstart,	 num_regs, const char *);
1847      RETALLOC_IF (regend,	 num_regs, const char *);
1848      RETALLOC_IF (old_regstart, num_regs, const char *);
1849      RETALLOC_IF (old_regend,	 num_regs, const char *);
1850      RETALLOC_IF (best_regstart, num_regs, const char *);
1851      RETALLOC_IF (best_regend,	 num_regs, const char *);
1852      RETALLOC_IF (reg_info,	 num_regs, register_info_type);
1853      RETALLOC_IF (reg_dummy,	 num_regs, const char *);
1854      RETALLOC_IF (reg_info_dummy, num_regs, register_info_type);
1855
1856      regs_allocated_size = num_regs;
1857    }
1858}
1859
1860#endif /* not MATCH_MAY_ALLOCATE */
1861
1862static boolean group_in_compile_stack _RE_ARGS ((compile_stack_type
1863						 compile_stack,
1864						 regnum_t regnum));
1865
1866/* `regex_compile' compiles PATTERN (of length SIZE) according to SYNTAX.
1867   Returns one of error codes defined in `regex.h', or zero for success.
1868
1869   Assumes the `allocated' (and perhaps `buffer') and `translate'
1870   fields are set in BUFP on entry.
1871
1872   If it succeeds, results are put in BUFP (if it returns an error, the
1873   contents of BUFP are undefined):
1874     `buffer' is the compiled pattern;
1875     `syntax' is set to SYNTAX;
1876     `used' is set to the length of the compiled pattern;
1877     `fastmap_accurate' is zero;
1878     `re_nsub' is the number of subexpressions in PATTERN;
1879     `not_bol' and `not_eol' are zero;
1880
1881   The `fastmap' and `newline_anchor' fields are neither
1882   examined nor set.  */
1883
1884/* Return, freeing storage we allocated.  */
1885#define FREE_STACK_RETURN(value)		\
1886  return (free (compile_stack.stack), value)
1887
1888static reg_errcode_t
1889regex_compile (pattern, size, syntax, bufp)
1890     const char *pattern;
1891     size_t size;
1892     reg_syntax_t syntax;
1893     struct re_pattern_buffer *bufp;
1894{
1895  /* We fetch characters from PATTERN here.  Even though PATTERN is
1896     `char *' (i.e., signed), we declare these variables as unsigned, so
1897     they can be reliably used as array indices.  */
1898  register unsigned char c, c1;
1899
1900  /* A random temporary spot in PATTERN.  */
1901  const char *p1;
1902
1903  /* Points to the end of the buffer, where we should append.  */
1904  register unsigned char *b;
1905
1906  /* Keeps track of unclosed groups.  */
1907  compile_stack_type compile_stack;
1908
1909  /* Points to the current (ending) position in the pattern.  */
1910  const char *p = pattern;
1911  const char *pend = pattern + size;
1912
1913  /* How to translate the characters in the pattern.  */
1914  RE_TRANSLATE_TYPE translate = bufp->translate;
1915
1916  /* Address of the count-byte of the most recently inserted `exactn'
1917     command.  This makes it possible to tell if a new exact-match
1918     character can be added to that command or if the character requires
1919     a new `exactn' command.  */
1920  unsigned char *pending_exact = 0;
1921
1922  /* Address of start of the most recently finished expression.
1923     This tells, e.g., postfix * where to find the start of its
1924     operand.  Reset at the beginning of groups and alternatives.  */
1925  unsigned char *laststart = 0;
1926
1927  /* Address of beginning of regexp, or inside of last group.  */
1928  unsigned char *begalt;
1929
1930  /* Place in the uncompiled pattern (i.e., the {) to
1931     which to go back if the interval is invalid.  */
1932  const char *beg_interval;
1933
1934  /* Address of the place where a forward jump should go to the end of
1935     the containing expression.  Each alternative of an `or' -- except the
1936     last -- ends with a forward jump of this sort.  */
1937  unsigned char *fixup_alt_jump = 0;
1938
1939  /* Counts open-groups as they are encountered.  Remembered for the
1940     matching close-group on the compile stack, so the same register
1941     number is put in the stop_memory as the start_memory.  */
1942  regnum_t regnum = 0;
1943
1944#ifdef DEBUG
1945  DEBUG_PRINT1 ("\nCompiling pattern: ");
1946  if (debug)
1947    {
1948      unsigned debug_count;
1949
1950      for (debug_count = 0; debug_count < size; debug_count++)
1951        putchar (pattern[debug_count]);
1952      putchar ('\n');
1953    }
1954#endif /* DEBUG */
1955
1956  /* Initialize the compile stack.  */
1957  compile_stack.stack = TALLOC (INIT_COMPILE_STACK_SIZE, compile_stack_elt_t);
1958  if (compile_stack.stack == NULL)
1959    return REG_ESPACE;
1960
1961  compile_stack.size = INIT_COMPILE_STACK_SIZE;
1962  compile_stack.avail = 0;
1963
1964  /* Initialize the pattern buffer.  */
1965  bufp->syntax = syntax;
1966  bufp->fastmap_accurate = 0;
1967  bufp->not_bol = bufp->not_eol = 0;
1968
1969  /* Set `used' to zero, so that if we return an error, the pattern
1970     printer (for debugging) will think there's no pattern.  We reset it
1971     at the end.  */
1972  bufp->used = 0;
1973
1974  /* Always count groups, whether or not bufp->no_sub is set.  */
1975  bufp->re_nsub = 0;
1976
1977#if !defined emacs && !defined SYNTAX_TABLE
1978  /* Initialize the syntax table.  */
1979   init_syntax_once ();
1980#endif
1981
1982  if (bufp->allocated == 0)
1983    {
1984      if (bufp->buffer)
1985	{ /* If zero allocated, but buffer is non-null, try to realloc
1986             enough space.  This loses if buffer's address is bogus, but
1987             that is the user's responsibility.  */
1988          RETALLOC (bufp->buffer, INIT_BUF_SIZE, unsigned char);
1989        }
1990      else
1991        { /* Caller did not allocate a buffer.  Do it for them.  */
1992          bufp->buffer = TALLOC (INIT_BUF_SIZE, unsigned char);
1993        }
1994      if (!bufp->buffer) FREE_STACK_RETURN (REG_ESPACE);
1995
1996      bufp->allocated = INIT_BUF_SIZE;
1997    }
1998
1999  begalt = b = bufp->buffer;
2000
2001  /* Loop through the uncompiled pattern until we're at the end.  */
2002  while (p != pend)
2003    {
2004      PATFETCH (c);
2005
2006      switch (c)
2007        {
2008        case '^':
2009          {
2010            if (   /* If at start of pattern, it's an operator.  */
2011                   p == pattern + 1
2012                   /* If context independent, it's an operator.  */
2013                || syntax & RE_CONTEXT_INDEP_ANCHORS
2014                   /* Otherwise, depends on what's come before.  */
2015                || at_begline_loc_p (pattern, p, syntax))
2016              BUF_PUSH (begline);
2017            else
2018              goto normal_char;
2019          }
2020          break;
2021
2022
2023        case '$':
2024          {
2025            if (   /* If at end of pattern, it's an operator.  */
2026                   p == pend
2027                   /* If context independent, it's an operator.  */
2028                || syntax & RE_CONTEXT_INDEP_ANCHORS
2029                   /* Otherwise, depends on what's next.  */
2030                || at_endline_loc_p (p, pend, syntax))
2031               BUF_PUSH (endline);
2032             else
2033               goto normal_char;
2034           }
2035           break;
2036
2037
2038	case '+':
2039        case '?':
2040          if ((syntax & RE_BK_PLUS_QM)
2041              || (syntax & RE_LIMITED_OPS))
2042            goto normal_char;
2043        handle_plus:
2044        case '*':
2045          /* If there is no previous pattern... */
2046          if (!laststart)
2047            {
2048              if (syntax & RE_CONTEXT_INVALID_OPS)
2049                FREE_STACK_RETURN (REG_BADRPT);
2050              else if (!(syntax & RE_CONTEXT_INDEP_OPS))
2051                goto normal_char;
2052            }
2053
2054          {
2055            /* Are we optimizing this jump?  */
2056            boolean keep_string_p = false;
2057
2058            /* 1 means zero (many) matches is allowed.  */
2059            char zero_times_ok = 0, many_times_ok = 0;
2060
2061            /* If there is a sequence of repetition chars, collapse it
2062               down to just one (the right one).  We can't combine
2063               interval operators with these because of, e.g., `a{2}*',
2064               which should only match an even number of `a's.  */
2065
2066            for (;;)
2067              {
2068                zero_times_ok |= c != '+';
2069                many_times_ok |= c != '?';
2070
2071                if (p == pend)
2072                  break;
2073
2074                PATFETCH (c);
2075
2076                if (c == '*'
2077                    || (!(syntax & RE_BK_PLUS_QM) && (c == '+' || c == '?')))
2078                  ;
2079
2080                else if (syntax & RE_BK_PLUS_QM  &&  c == '\\')
2081                  {
2082                    if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
2083
2084                    PATFETCH (c1);
2085                    if (!(c1 == '+' || c1 == '?'))
2086                      {
2087                        PATUNFETCH;
2088                        PATUNFETCH;
2089                        break;
2090                      }
2091
2092                    c = c1;
2093                  }
2094                else
2095                  {
2096                    PATUNFETCH;
2097                    break;
2098                  }
2099
2100                /* If we get here, we found another repeat character.  */
2101               }
2102
2103            /* Star, etc. applied to an empty pattern is equivalent
2104               to an empty pattern.  */
2105            if (!laststart)
2106              break;
2107
2108            /* Now we know whether or not zero matches is allowed
2109               and also whether or not two or more matches is allowed.  */
2110            if (many_times_ok)
2111              { /* More than one repetition is allowed, so put in at the
2112                   end a backward relative jump from `b' to before the next
2113                   jump we're going to put in below (which jumps from
2114                   laststart to after this jump).
2115
2116                   But if we are at the `*' in the exact sequence `.*\n',
2117                   insert an unconditional jump backwards to the .,
2118                   instead of the beginning of the loop.  This way we only
2119                   push a failure point once, instead of every time
2120                   through the loop.  */
2121                assert (p - 1 > pattern);
2122
2123                /* Allocate the space for the jump.  */
2124                GET_BUFFER_SPACE (3);
2125
2126                /* We know we are not at the first character of the pattern,
2127                   because laststart was nonzero.  And we've already
2128                   incremented `p', by the way, to be the character after
2129                   the `*'.  Do we have to do something analogous here
2130                   for null bytes, because of RE_DOT_NOT_NULL?  */
2131                if (TRANSLATE (*(p - 2)) == TRANSLATE ('.')
2132		    && zero_times_ok
2133                    && p < pend && TRANSLATE (*p) == TRANSLATE ('\n')
2134                    && !(syntax & RE_DOT_NEWLINE))
2135                  { /* We have .*\n.  */
2136                    STORE_JUMP (jump, b, laststart);
2137                    keep_string_p = true;
2138                  }
2139                else
2140                  /* Anything else.  */
2141                  STORE_JUMP (maybe_pop_jump, b, laststart - 3);
2142
2143                /* We've added more stuff to the buffer.  */
2144                b += 3;
2145              }
2146
2147            /* On failure, jump from laststart to b + 3, which will be the
2148               end of the buffer after this jump is inserted.  */
2149            GET_BUFFER_SPACE (3);
2150            INSERT_JUMP (keep_string_p ? on_failure_keep_string_jump
2151                                       : on_failure_jump,
2152                         laststart, b + 3);
2153            pending_exact = 0;
2154            b += 3;
2155
2156            if (!zero_times_ok)
2157              {
2158                /* At least one repetition is required, so insert a
2159                   `dummy_failure_jump' before the initial
2160                   `on_failure_jump' instruction of the loop. This
2161                   effects a skip over that instruction the first time
2162                   we hit that loop.  */
2163                GET_BUFFER_SPACE (3);
2164                INSERT_JUMP (dummy_failure_jump, laststart, laststart + 6);
2165                b += 3;
2166              }
2167            }
2168	  break;
2169
2170
2171	case '.':
2172          laststart = b;
2173          BUF_PUSH (anychar);
2174          break;
2175
2176
2177        case '[':
2178          {
2179            boolean had_char_class = false;
2180
2181            if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
2182
2183            /* Ensure that we have enough space to push a charset: the
2184               opcode, the length count, and the bitset; 34 bytes in all.  */
2185	    GET_BUFFER_SPACE (34);
2186
2187            laststart = b;
2188
2189            /* We test `*p == '^' twice, instead of using an if
2190               statement, so we only need one BUF_PUSH.  */
2191            BUF_PUSH (*p == '^' ? charset_not : charset);
2192            if (*p == '^')
2193              p++;
2194
2195            /* Remember the first position in the bracket expression.  */
2196            p1 = p;
2197
2198            /* Push the number of bytes in the bitmap.  */
2199            BUF_PUSH ((1 << BYTEWIDTH) / BYTEWIDTH);
2200
2201            /* Clear the whole map.  */
2202            bzero (b, (1 << BYTEWIDTH) / BYTEWIDTH);
2203
2204            /* charset_not matches newline according to a syntax bit.  */
2205            if ((re_opcode_t) b[-2] == charset_not
2206                && (syntax & RE_HAT_LISTS_NOT_NEWLINE))
2207              SET_LIST_BIT ('\n');
2208
2209            /* Read in characters and ranges, setting map bits.  */
2210            for (;;)
2211              {
2212                if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
2213
2214                PATFETCH (c);
2215
2216                /* \ might escape characters inside [...] and [^...].  */
2217                if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\')
2218                  {
2219                    if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
2220
2221                    PATFETCH (c1);
2222                    SET_LIST_BIT (c1);
2223                    continue;
2224                  }
2225
2226                /* Could be the end of the bracket expression.  If it's
2227                   not (i.e., when the bracket expression is `[]' so
2228                   far), the ']' character bit gets set way below.  */
2229                if (c == ']' && p != p1 + 1)
2230                  break;
2231
2232                /* Look ahead to see if it's a range when the last thing
2233                   was a character class.  */
2234                if (had_char_class && c == '-' && *p != ']')
2235                  FREE_STACK_RETURN (REG_ERANGE);
2236
2237                /* Look ahead to see if it's a range when the last thing
2238                   was a character: if this is a hyphen not at the
2239                   beginning or the end of a list, then it's the range
2240                   operator.  */
2241                if (c == '-'
2242                    && !(p - 2 >= pattern && p[-2] == '[')
2243                    && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^')
2244                    && *p != ']')
2245                  {
2246                    reg_errcode_t ret
2247                      = compile_range (&p, pend, translate, syntax, b);
2248                    if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
2249                  }
2250
2251                else if (p[0] == '-' && p[1] != ']')
2252                  { /* This handles ranges made up of characters only.  */
2253                    reg_errcode_t ret;
2254
2255		    /* Move past the `-'.  */
2256                    PATFETCH (c1);
2257
2258                    ret = compile_range (&p, pend, translate, syntax, b);
2259                    if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
2260                  }
2261
2262                /* See if we're at the beginning of a possible character
2263                   class.  */
2264
2265                else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':')
2266                  { /* Leave room for the null.  */
2267                    char str[CHAR_CLASS_MAX_LENGTH + 1];
2268
2269                    PATFETCH (c);
2270                    c1 = 0;
2271
2272                    /* If pattern is `[[:'.  */
2273                    if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
2274
2275                    for (;;)
2276                      {
2277                        PATFETCH (c);
2278                        if ((c == ':' && *p == ']') || p == pend)
2279                          break;
2280			if (c1 < CHAR_CLASS_MAX_LENGTH)
2281			  str[c1++] = c;
2282			else
2283			  /* This is in any case an invalid class name.  */
2284			  str[0] = '\0';
2285                      }
2286                    str[c1] = '\0';
2287
2288                    /* If isn't a word bracketed by `[:' and `:]':
2289                       undo the ending character, the letters, and leave
2290                       the leading `:' and `[' (but set bits for them).  */
2291                    if (c == ':' && *p == ']')
2292                      {
2293#if defined _LIBC || WIDE_CHAR_SUPPORT
2294                        boolean is_lower = STREQ (str, "lower");
2295                        boolean is_upper = STREQ (str, "upper");
2296			wctype_t wt;
2297                        int ch;
2298
2299			wt = IS_CHAR_CLASS (str);
2300			if (wt == 0)
2301			  FREE_STACK_RETURN (REG_ECTYPE);
2302
2303                        /* Throw away the ] at the end of the character
2304                           class.  */
2305                        PATFETCH (c);
2306
2307                        if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
2308
2309                        for (ch = 0; ch < 1 << BYTEWIDTH; ++ch)
2310			  {
2311# ifdef _LIBC
2312			    if (__iswctype (__btowc (ch), wt))
2313			      SET_LIST_BIT (ch);
2314# else
2315			    if (iswctype (btowc (ch), wt))
2316			      SET_LIST_BIT (ch);
2317# endif
2318
2319			    if (translate && (is_upper || is_lower)
2320				&& (ISUPPER (ch) || ISLOWER (ch)))
2321			      SET_LIST_BIT (ch);
2322			  }
2323
2324                        had_char_class = true;
2325#else
2326                        int ch;
2327                        boolean is_alnum = STREQ (str, "alnum");
2328                        boolean is_alpha = STREQ (str, "alpha");
2329                        boolean is_blank = STREQ (str, "blank");
2330                        boolean is_cntrl = STREQ (str, "cntrl");
2331                        boolean is_digit = STREQ (str, "digit");
2332                        boolean is_graph = STREQ (str, "graph");
2333                        boolean is_lower = STREQ (str, "lower");
2334                        boolean is_print = STREQ (str, "print");
2335                        boolean is_punct = STREQ (str, "punct");
2336                        boolean is_space = STREQ (str, "space");
2337                        boolean is_upper = STREQ (str, "upper");
2338                        boolean is_xdigit = STREQ (str, "xdigit");
2339
2340                        if (!IS_CHAR_CLASS (str))
2341			  FREE_STACK_RETURN (REG_ECTYPE);
2342
2343                        /* Throw away the ] at the end of the character
2344                           class.  */
2345                        PATFETCH (c);
2346
2347                        if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
2348
2349                        for (ch = 0; ch < 1 << BYTEWIDTH; ch++)
2350                          {
2351			    /* This was split into 3 if's to
2352			       avoid an arbitrary limit in some compiler.  */
2353                            if (   (is_alnum  && ISALNUM (ch))
2354                                || (is_alpha  && ISALPHA (ch))
2355                                || (is_blank  && ISBLANK (ch))
2356                                || (is_cntrl  && ISCNTRL (ch)))
2357			      SET_LIST_BIT (ch);
2358			    if (   (is_digit  && ISDIGIT (ch))
2359                                || (is_graph  && ISGRAPH (ch))
2360                                || (is_lower  && ISLOWER (ch))
2361                                || (is_print  && ISPRINT (ch)))
2362			      SET_LIST_BIT (ch);
2363			    if (   (is_punct  && ISPUNCT (ch))
2364                                || (is_space  && ISSPACE (ch))
2365                                || (is_upper  && ISUPPER (ch))
2366                                || (is_xdigit && ISXDIGIT (ch)))
2367			      SET_LIST_BIT (ch);
2368			    if (   translate && (is_upper || is_lower)
2369				&& (ISUPPER (ch) || ISLOWER (ch)))
2370			      SET_LIST_BIT (ch);
2371                          }
2372                        had_char_class = true;
2373#endif	/* libc || wctype.h */
2374                      }
2375                    else
2376                      {
2377                        c1++;
2378                        while (c1--)
2379                          PATUNFETCH;
2380                        SET_LIST_BIT ('[');
2381                        SET_LIST_BIT (':');
2382                        had_char_class = false;
2383                      }
2384                  }
2385                else
2386                  {
2387                    had_char_class = false;
2388                    SET_LIST_BIT (c);
2389                  }
2390              }
2391
2392            /* Discard any (non)matching list bytes that are all 0 at the
2393               end of the map.  Decrease the map-length byte too.  */
2394            while ((int) b[-1] > 0 && b[b[-1] - 1] == 0)
2395              b[-1]--;
2396            b += b[-1];
2397          }
2398          break;
2399
2400
2401	case '(':
2402          if (syntax & RE_NO_BK_PARENS)
2403            goto handle_open;
2404          else
2405            goto normal_char;
2406
2407
2408        case ')':
2409          if (syntax & RE_NO_BK_PARENS)
2410            goto handle_close;
2411          else
2412            goto normal_char;
2413
2414
2415        case '\n':
2416          if (syntax & RE_NEWLINE_ALT)
2417            goto handle_alt;
2418          else
2419            goto normal_char;
2420
2421
2422	case '|':
2423          if (syntax & RE_NO_BK_VBAR)
2424            goto handle_alt;
2425          else
2426            goto normal_char;
2427
2428
2429        case '{':
2430           if (syntax & RE_INTERVALS && syntax & RE_NO_BK_BRACES)
2431             goto handle_interval;
2432           else
2433             goto normal_char;
2434
2435
2436        case '\\':
2437          if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
2438
2439          /* Do not translate the character after the \, so that we can
2440             distinguish, e.g., \B from \b, even if we normally would
2441             translate, e.g., B to b.  */
2442          PATFETCH_RAW (c);
2443
2444          switch (c)
2445            {
2446            case '(':
2447              if (syntax & RE_NO_BK_PARENS)
2448                goto normal_backslash;
2449
2450            handle_open:
2451              bufp->re_nsub++;
2452              regnum++;
2453
2454              if (COMPILE_STACK_FULL)
2455                {
2456                  RETALLOC (compile_stack.stack, compile_stack.size << 1,
2457                            compile_stack_elt_t);
2458                  if (compile_stack.stack == NULL) return REG_ESPACE;
2459
2460                  compile_stack.size <<= 1;
2461                }
2462
2463              /* These are the values to restore when we hit end of this
2464                 group.  They are all relative offsets, so that if the
2465                 whole pattern moves because of realloc, they will still
2466                 be valid.  */
2467              COMPILE_STACK_TOP.begalt_offset = begalt - bufp->buffer;
2468              COMPILE_STACK_TOP.fixup_alt_jump
2469                = fixup_alt_jump ? fixup_alt_jump - bufp->buffer + 1 : 0;
2470              COMPILE_STACK_TOP.laststart_offset = b - bufp->buffer;
2471              COMPILE_STACK_TOP.regnum = regnum;
2472
2473              /* We will eventually replace the 0 with the number of
2474                 groups inner to this one.  But do not push a
2475                 start_memory for groups beyond the last one we can
2476                 represent in the compiled pattern.  */
2477              if (regnum <= MAX_REGNUM)
2478                {
2479                  COMPILE_STACK_TOP.inner_group_offset = b - bufp->buffer + 2;
2480                  BUF_PUSH_3 (start_memory, regnum, 0);
2481                }
2482
2483              compile_stack.avail++;
2484
2485              fixup_alt_jump = 0;
2486              laststart = 0;
2487              begalt = b;
2488	      /* If we've reached MAX_REGNUM groups, then this open
2489		 won't actually generate any code, so we'll have to
2490		 clear pending_exact explicitly.  */
2491	      pending_exact = 0;
2492              break;
2493
2494
2495            case ')':
2496              if (syntax & RE_NO_BK_PARENS) goto normal_backslash;
2497
2498              if (COMPILE_STACK_EMPTY)
2499		{
2500		  if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
2501		    goto normal_backslash;
2502		  else
2503		    FREE_STACK_RETURN (REG_ERPAREN);
2504		}
2505
2506            handle_close:
2507              if (fixup_alt_jump)
2508                { /* Push a dummy failure point at the end of the
2509                     alternative for a possible future
2510                     `pop_failure_jump' to pop.  See comments at
2511                     `push_dummy_failure' in `re_match_2'.  */
2512                  BUF_PUSH (push_dummy_failure);
2513
2514                  /* We allocated space for this jump when we assigned
2515                     to `fixup_alt_jump', in the `handle_alt' case below.  */
2516                  STORE_JUMP (jump_past_alt, fixup_alt_jump, b - 1);
2517                }
2518
2519              /* See similar code for backslashed left paren above.  */
2520              if (COMPILE_STACK_EMPTY)
2521		{
2522		  if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
2523		    goto normal_char;
2524		  else
2525		    FREE_STACK_RETURN (REG_ERPAREN);
2526		}
2527
2528              /* Since we just checked for an empty stack above, this
2529                 ``can't happen''.  */
2530              assert (compile_stack.avail != 0);
2531              {
2532                /* We don't just want to restore into `regnum', because
2533                   later groups should continue to be numbered higher,
2534                   as in `(ab)c(de)' -- the second group is #2.  */
2535                regnum_t this_group_regnum;
2536
2537                compile_stack.avail--;
2538                begalt = bufp->buffer + COMPILE_STACK_TOP.begalt_offset;
2539                fixup_alt_jump
2540                  = COMPILE_STACK_TOP.fixup_alt_jump
2541                    ? bufp->buffer + COMPILE_STACK_TOP.fixup_alt_jump - 1
2542                    : 0;
2543                laststart = bufp->buffer + COMPILE_STACK_TOP.laststart_offset;
2544                this_group_regnum = COMPILE_STACK_TOP.regnum;
2545		/* If we've reached MAX_REGNUM groups, then this open
2546		   won't actually generate any code, so we'll have to
2547		   clear pending_exact explicitly.  */
2548		pending_exact = 0;
2549
2550                /* We're at the end of the group, so now we know how many
2551                   groups were inside this one.  */
2552                if (this_group_regnum <= MAX_REGNUM)
2553                  {
2554                    unsigned char *inner_group_loc
2555                      = bufp->buffer + COMPILE_STACK_TOP.inner_group_offset;
2556
2557                    *inner_group_loc = regnum - this_group_regnum;
2558                    BUF_PUSH_3 (stop_memory, this_group_regnum,
2559                                regnum - this_group_regnum);
2560                  }
2561              }
2562              break;
2563
2564
2565            case '|':					/* `\|'.  */
2566              if (syntax & RE_LIMITED_OPS || syntax & RE_NO_BK_VBAR)
2567                goto normal_backslash;
2568            handle_alt:
2569              if (syntax & RE_LIMITED_OPS)
2570                goto normal_char;
2571
2572              /* Insert before the previous alternative a jump which
2573                 jumps to this alternative if the former fails.  */
2574              GET_BUFFER_SPACE (3);
2575              INSERT_JUMP (on_failure_jump, begalt, b + 6);
2576              pending_exact = 0;
2577              b += 3;
2578
2579              /* The alternative before this one has a jump after it
2580                 which gets executed if it gets matched.  Adjust that
2581                 jump so it will jump to this alternative's analogous
2582                 jump (put in below, which in turn will jump to the next
2583                 (if any) alternative's such jump, etc.).  The last such
2584                 jump jumps to the correct final destination.  A picture:
2585                          _____ _____
2586                          |   | |   |
2587                          |   v |   v
2588                         a | b   | c
2589
2590                 If we are at `b', then fixup_alt_jump right now points to a
2591                 three-byte space after `a'.  We'll put in the jump, set
2592                 fixup_alt_jump to right after `b', and leave behind three
2593                 bytes which we'll fill in when we get to after `c'.  */
2594
2595              if (fixup_alt_jump)
2596                STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
2597
2598              /* Mark and leave space for a jump after this alternative,
2599                 to be filled in later either by next alternative or
2600                 when know we're at the end of a series of alternatives.  */
2601              fixup_alt_jump = b;
2602              GET_BUFFER_SPACE (3);
2603              b += 3;
2604
2605              laststart = 0;
2606              begalt = b;
2607              break;
2608
2609
2610            case '{':
2611              /* If \{ is a literal.  */
2612              if (!(syntax & RE_INTERVALS)
2613                     /* If we're at `\{' and it's not the open-interval
2614                        operator.  */
2615                  || ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES))
2616                  || (p - 2 == pattern  &&  p == pend))
2617                goto normal_backslash;
2618
2619            handle_interval:
2620              {
2621                /* If got here, then the syntax allows intervals.  */
2622
2623                /* At least (most) this many matches must be made.  */
2624                int lower_bound = -1, upper_bound = -1;
2625
2626                beg_interval = p - 1;
2627
2628                if (p == pend)
2629                  {
2630                    if (syntax & RE_NO_BK_BRACES)
2631                      goto unfetch_interval;
2632                    else
2633                      FREE_STACK_RETURN (REG_EBRACE);
2634                  }
2635
2636                GET_UNSIGNED_NUMBER (lower_bound);
2637
2638                if (c == ',')
2639                  {
2640                    GET_UNSIGNED_NUMBER (upper_bound);
2641                    if (upper_bound < 0) upper_bound = RE_DUP_MAX;
2642                  }
2643                else
2644                  /* Interval such as `{1}' => match exactly once. */
2645                  upper_bound = lower_bound;
2646
2647                if (lower_bound < 0 || upper_bound > RE_DUP_MAX
2648                    || lower_bound > upper_bound)
2649                  {
2650                    if (syntax & RE_NO_BK_BRACES)
2651                      goto unfetch_interval;
2652                    else
2653                      FREE_STACK_RETURN (REG_BADBR);
2654                  }
2655
2656                if (!(syntax & RE_NO_BK_BRACES))
2657                  {
2658                    if (c != '\\') FREE_STACK_RETURN (REG_EBRACE);
2659
2660                    PATFETCH (c);
2661                  }
2662
2663                if (c != '}')
2664                  {
2665                    if (syntax & RE_NO_BK_BRACES)
2666                      goto unfetch_interval;
2667                    else
2668                      FREE_STACK_RETURN (REG_BADBR);
2669                  }
2670
2671                /* We just parsed a valid interval.  */
2672
2673                /* If it's invalid to have no preceding re.  */
2674                if (!laststart)
2675                  {
2676                    if (syntax & RE_CONTEXT_INVALID_OPS)
2677                      FREE_STACK_RETURN (REG_BADRPT);
2678                    else if (syntax & RE_CONTEXT_INDEP_OPS)
2679                      laststart = b;
2680                    else
2681                      goto unfetch_interval;
2682                  }
2683
2684                /* If the upper bound is zero, don't want to succeed at
2685                   all; jump from `laststart' to `b + 3', which will be
2686                   the end of the buffer after we insert the jump.  */
2687                 if (upper_bound == 0)
2688                   {
2689                     GET_BUFFER_SPACE (3);
2690                     INSERT_JUMP (jump, laststart, b + 3);
2691                     b += 3;
2692                   }
2693
2694                 /* Otherwise, we have a nontrivial interval.  When
2695                    we're all done, the pattern will look like:
2696                      set_number_at <jump count> <upper bound>
2697                      set_number_at <succeed_n count> <lower bound>
2698                      succeed_n <after jump addr> <succeed_n count>
2699                      <body of loop>
2700                      jump_n <succeed_n addr> <jump count>
2701                    (The upper bound and `jump_n' are omitted if
2702                    `upper_bound' is 1, though.)  */
2703                 else
2704                   { /* If the upper bound is > 1, we need to insert
2705                        more at the end of the loop.  */
2706                     unsigned nbytes = 10 + (upper_bound > 1) * 10;
2707
2708                     GET_BUFFER_SPACE (nbytes);
2709
2710                     /* Initialize lower bound of the `succeed_n', even
2711                        though it will be set during matching by its
2712                        attendant `set_number_at' (inserted next),
2713                        because `re_compile_fastmap' needs to know.
2714                        Jump to the `jump_n' we might insert below.  */
2715                     INSERT_JUMP2 (succeed_n, laststart,
2716                                   b + 5 + (upper_bound > 1) * 5,
2717                                   lower_bound);
2718                     b += 5;
2719
2720                     /* Code to initialize the lower bound.  Insert
2721                        before the `succeed_n'.  The `5' is the last two
2722                        bytes of this `set_number_at', plus 3 bytes of
2723                        the following `succeed_n'.  */
2724                     insert_op2 (set_number_at, laststart, 5, lower_bound, b);
2725                     b += 5;
2726
2727                     if (upper_bound > 1)
2728                       { /* More than one repetition is allowed, so
2729                            append a backward jump to the `succeed_n'
2730                            that starts this interval.
2731
2732                            When we've reached this during matching,
2733                            we'll have matched the interval once, so
2734                            jump back only `upper_bound - 1' times.  */
2735                         STORE_JUMP2 (jump_n, b, laststart + 5,
2736                                      upper_bound - 1);
2737                         b += 5;
2738
2739                         /* The location we want to set is the second
2740                            parameter of the `jump_n'; that is `b-2' as
2741                            an absolute address.  `laststart' will be
2742                            the `set_number_at' we're about to insert;
2743                            `laststart+3' the number to set, the source
2744                            for the relative address.  But we are
2745                            inserting into the middle of the pattern --
2746                            so everything is getting moved up by 5.
2747                            Conclusion: (b - 2) - (laststart + 3) + 5,
2748                            i.e., b - laststart.
2749
2750                            We insert this at the beginning of the loop
2751                            so that if we fail during matching, we'll
2752                            reinitialize the bounds.  */
2753                         insert_op2 (set_number_at, laststart, b - laststart,
2754                                     upper_bound - 1, b);
2755                         b += 5;
2756                       }
2757                   }
2758                pending_exact = 0;
2759                beg_interval = NULL;
2760              }
2761              break;
2762
2763            unfetch_interval:
2764              /* If an invalid interval, match the characters as literals.  */
2765               assert (beg_interval);
2766               p = beg_interval;
2767               beg_interval = NULL;
2768
2769               /* normal_char and normal_backslash need `c'.  */
2770               PATFETCH (c);
2771
2772               if (!(syntax & RE_NO_BK_BRACES))
2773                 {
2774                   if (p > pattern  &&  p[-1] == '\\')
2775                     goto normal_backslash;
2776                 }
2777               goto normal_char;
2778
2779#ifdef emacs
2780            /* There is no way to specify the before_dot and after_dot
2781               operators.  rms says this is ok.  --karl  */
2782            case '=':
2783              BUF_PUSH (at_dot);
2784              break;
2785
2786            case 's':
2787              laststart = b;
2788              PATFETCH (c);
2789              BUF_PUSH_2 (syntaxspec, syntax_spec_code[c]);
2790              break;
2791
2792            case 'S':
2793              laststart = b;
2794              PATFETCH (c);
2795              BUF_PUSH_2 (notsyntaxspec, syntax_spec_code[c]);
2796              break;
2797#endif /* emacs */
2798
2799
2800            case 'w':
2801	      if (syntax & RE_NO_GNU_OPS)
2802		goto normal_char;
2803              laststart = b;
2804              BUF_PUSH (wordchar);
2805              break;
2806
2807
2808            case 'W':
2809	      if (syntax & RE_NO_GNU_OPS)
2810		goto normal_char;
2811              laststart = b;
2812              BUF_PUSH (notwordchar);
2813              break;
2814
2815
2816            case '<':
2817	      if (syntax & RE_NO_GNU_OPS)
2818		goto normal_char;
2819              BUF_PUSH (wordbeg);
2820              break;
2821
2822            case '>':
2823	      if (syntax & RE_NO_GNU_OPS)
2824		goto normal_char;
2825              BUF_PUSH (wordend);
2826              break;
2827
2828            case 'b':
2829	      if (syntax & RE_NO_GNU_OPS)
2830		goto normal_char;
2831              BUF_PUSH (wordbound);
2832              break;
2833
2834            case 'B':
2835	      if (syntax & RE_NO_GNU_OPS)
2836		goto normal_char;
2837              BUF_PUSH (notwordbound);
2838              break;
2839
2840            case '`':
2841	      if (syntax & RE_NO_GNU_OPS)
2842		goto normal_char;
2843              BUF_PUSH (begbuf);
2844              break;
2845
2846            case '\'':
2847	      if (syntax & RE_NO_GNU_OPS)
2848		goto normal_char;
2849              BUF_PUSH (endbuf);
2850              break;
2851
2852            case '1': case '2': case '3': case '4': case '5':
2853            case '6': case '7': case '8': case '9':
2854              if (syntax & RE_NO_BK_REFS)
2855                goto normal_char;
2856
2857              c1 = c - '0';
2858
2859              if (c1 > regnum)
2860                FREE_STACK_RETURN (REG_ESUBREG);
2861
2862              /* Can't back reference to a subexpression if inside of it.  */
2863              if (group_in_compile_stack (compile_stack, (regnum_t) c1))
2864                goto normal_char;
2865
2866              laststart = b;
2867              BUF_PUSH_2 (duplicate, c1);
2868              break;
2869
2870
2871            case '+':
2872            case '?':
2873              if (syntax & RE_BK_PLUS_QM)
2874                goto handle_plus;
2875              else
2876                goto normal_backslash;
2877
2878            default:
2879            normal_backslash:
2880              /* You might think it would be useful for \ to mean
2881                 not to translate; but if we don't translate it
2882                 it will never match anything.  */
2883              c = TRANSLATE (c);
2884              goto normal_char;
2885            }
2886          break;
2887
2888
2889	default:
2890        /* Expects the character in `c'.  */
2891	normal_char:
2892	      /* If no exactn currently being built.  */
2893          if (!pending_exact
2894
2895              /* If last exactn not at current position.  */
2896              || pending_exact + *pending_exact + 1 != b
2897
2898              /* We have only one byte following the exactn for the count.  */
2899	      || *pending_exact == (1 << BYTEWIDTH) - 1
2900
2901              /* If followed by a repetition operator.  */
2902              || *p == '*' || *p == '^'
2903	      || ((syntax & RE_BK_PLUS_QM)
2904		  ? *p == '\\' && (p[1] == '+' || p[1] == '?')
2905		  : (*p == '+' || *p == '?'))
2906	      || ((syntax & RE_INTERVALS)
2907                  && ((syntax & RE_NO_BK_BRACES)
2908		      ? *p == '{'
2909                      : (p[0] == '\\' && p[1] == '{'))))
2910	    {
2911	      /* Start building a new exactn.  */
2912
2913              laststart = b;
2914
2915	      BUF_PUSH_2 (exactn, 0);
2916	      pending_exact = b - 1;
2917            }
2918
2919	  BUF_PUSH (c);
2920          (*pending_exact)++;
2921	  break;
2922        } /* switch (c) */
2923    } /* while p != pend */
2924
2925
2926  /* Through the pattern now.  */
2927
2928  if (fixup_alt_jump)
2929    STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
2930
2931  if (!COMPILE_STACK_EMPTY)
2932    FREE_STACK_RETURN (REG_EPAREN);
2933
2934  /* If we don't want backtracking, force success
2935     the first time we reach the end of the compiled pattern.  */
2936  if (syntax & RE_NO_POSIX_BACKTRACKING)
2937    BUF_PUSH (succeed);
2938
2939  free (compile_stack.stack);
2940
2941  /* We have succeeded; set the length of the buffer.  */
2942  bufp->used = b - bufp->buffer;
2943
2944#ifdef DEBUG
2945  if (debug)
2946    {
2947      DEBUG_PRINT1 ("\nCompiled pattern: \n");
2948      print_compiled_pattern (bufp);
2949    }
2950#endif /* DEBUG */
2951
2952#ifndef MATCH_MAY_ALLOCATE
2953  /* Initialize the failure stack to the largest possible stack.  This
2954     isn't necessary unless we're trying to avoid calling alloca in
2955     the search and match routines.  */
2956  {
2957    int num_regs = bufp->re_nsub + 1;
2958
2959    /* Since DOUBLE_FAIL_STACK refuses to double only if the current size
2960       is strictly greater than re_max_failures, the largest possible stack
2961       is 2 * re_max_failures failure points.  */
2962    if (fail_stack.size < (2 * re_max_failures * MAX_FAILURE_ITEMS))
2963      {
2964	fail_stack.size = (2 * re_max_failures * MAX_FAILURE_ITEMS);
2965
2966# ifdef emacs
2967	if (! fail_stack.stack)
2968	  fail_stack.stack
2969	    = (fail_stack_elt_t *) xmalloc (fail_stack.size
2970					    * sizeof (fail_stack_elt_t));
2971	else
2972	  fail_stack.stack
2973	    = (fail_stack_elt_t *) xrealloc (fail_stack.stack,
2974					     (fail_stack.size
2975					      * sizeof (fail_stack_elt_t)));
2976# else /* not emacs */
2977	if (! fail_stack.stack)
2978	  fail_stack.stack
2979	    = (fail_stack_elt_t *) malloc (fail_stack.size
2980					   * sizeof (fail_stack_elt_t));
2981	else
2982	  fail_stack.stack
2983	    = (fail_stack_elt_t *) realloc (fail_stack.stack,
2984					    (fail_stack.size
2985					     * sizeof (fail_stack_elt_t)));
2986# endif /* not emacs */
2987      }
2988
2989    regex_grow_registers (num_regs);
2990  }
2991#endif /* not MATCH_MAY_ALLOCATE */
2992
2993  return REG_NOERROR;
2994} /* regex_compile */
2995
2996/* Subroutines for `regex_compile'.  */
2997
2998/* Store OP at LOC followed by two-byte integer parameter ARG.  */
2999
3000static void
3001store_op1 (op, loc, arg)
3002    re_opcode_t op;
3003    unsigned char *loc;
3004    int arg;
3005{
3006  *loc = (unsigned char) op;
3007  STORE_NUMBER (loc + 1, arg);
3008}
3009
3010
3011/* Like `store_op1', but for two two-byte parameters ARG1 and ARG2.  */
3012
3013static void
3014store_op2 (op, loc, arg1, arg2)
3015    re_opcode_t op;
3016    unsigned char *loc;
3017    int arg1, arg2;
3018{
3019  *loc = (unsigned char) op;
3020  STORE_NUMBER (loc + 1, arg1);
3021  STORE_NUMBER (loc + 3, arg2);
3022}
3023
3024
3025/* Copy the bytes from LOC to END to open up three bytes of space at LOC
3026   for OP followed by two-byte integer parameter ARG.  */
3027
3028static void
3029insert_op1 (op, loc, arg, end)
3030    re_opcode_t op;
3031    unsigned char *loc;
3032    int arg;
3033    unsigned char *end;
3034{
3035  register unsigned char *pfrom = end;
3036  register unsigned char *pto = end + 3;
3037
3038  while (pfrom != loc)
3039    *--pto = *--pfrom;
3040
3041  store_op1 (op, loc, arg);
3042}
3043
3044
3045/* Like `insert_op1', but for two two-byte parameters ARG1 and ARG2.  */
3046
3047static void
3048insert_op2 (op, loc, arg1, arg2, end)
3049    re_opcode_t op;
3050    unsigned char *loc;
3051    int arg1, arg2;
3052    unsigned char *end;
3053{
3054  register unsigned char *pfrom = end;
3055  register unsigned char *pto = end + 5;
3056
3057  while (pfrom != loc)
3058    *--pto = *--pfrom;
3059
3060  store_op2 (op, loc, arg1, arg2);
3061}
3062
3063
3064/* P points to just after a ^ in PATTERN.  Return true if that ^ comes
3065   after an alternative or a begin-subexpression.  We assume there is at
3066   least one character before the ^.  */
3067
3068static boolean
3069at_begline_loc_p (pattern, p, syntax)
3070    const char *pattern, *p;
3071    reg_syntax_t syntax;
3072{
3073  const char *prev = p - 2;
3074  boolean prev_prev_backslash = prev > pattern && prev[-1] == '\\';
3075
3076  return
3077       /* After a subexpression?  */
3078       (*prev == '(' && (syntax & RE_NO_BK_PARENS || prev_prev_backslash))
3079       /* After an alternative?  */
3080    || (*prev == '|' && (syntax & RE_NO_BK_VBAR || prev_prev_backslash));
3081}
3082
3083
3084/* The dual of at_begline_loc_p.  This one is for $.  We assume there is
3085   at least one character after the $, i.e., `P < PEND'.  */
3086
3087static boolean
3088at_endline_loc_p (p, pend, syntax)
3089    const char *p, *pend;
3090    reg_syntax_t syntax;
3091{
3092  const char *next = p;
3093  boolean next_backslash = *next == '\\';
3094  const char *next_next = p + 1 < pend ? p + 1 : 0;
3095
3096  return
3097       /* Before a subexpression?  */
3098       (syntax & RE_NO_BK_PARENS ? *next == ')'
3099        : next_backslash && next_next && *next_next == ')')
3100       /* Before an alternative?  */
3101    || (syntax & RE_NO_BK_VBAR ? *next == '|'
3102        : next_backslash && next_next && *next_next == '|');
3103}
3104
3105
3106/* Returns true if REGNUM is in one of COMPILE_STACK's elements and
3107   false if it's not.  */
3108
3109static boolean
3110group_in_compile_stack (compile_stack, regnum)
3111    compile_stack_type compile_stack;
3112    regnum_t regnum;
3113{
3114  int this_element;
3115
3116  for (this_element = compile_stack.avail - 1;
3117       this_element >= 0;
3118       this_element--)
3119    if (compile_stack.stack[this_element].regnum == regnum)
3120      return true;
3121
3122  return false;
3123}
3124
3125
3126/* Read the ending character of a range (in a bracket expression) from the
3127   uncompiled pattern *P_PTR (which ends at PEND).  We assume the
3128   starting character is in `P[-2]'.  (`P[-1]' is the character `-'.)
3129   Then we set the translation of all bits between the starting and
3130   ending characters (inclusive) in the compiled pattern B.
3131
3132   Return an error code.
3133
3134   We use these short variable names so we can use the same macros as
3135   `regex_compile' itself.  */
3136
3137static reg_errcode_t
3138compile_range (p_ptr, pend, translate, syntax, b)
3139    const char **p_ptr, *pend;
3140    RE_TRANSLATE_TYPE translate;
3141    reg_syntax_t syntax;
3142    unsigned char *b;
3143{
3144  unsigned this_char;
3145
3146  const char *p = *p_ptr;
3147  reg_errcode_t ret;
3148  char range_start[2];
3149  char range_end[2];
3150
3151  if (p == pend)
3152    return REG_ERANGE;
3153
3154  /* Fetch the endpoints without translating them; the
3155     appropriate translation is done in the bit-setting loop below.  */
3156  range_start[0] = p[-2];  range_start[1] = '\0';
3157  range_end[0]   = p[ 0];  range_end[1]   = '\0';
3158
3159  /* Have to increment the pointer into the pattern string, so the
3160     caller isn't still at the ending character.  */
3161  (*p_ptr)++;
3162
3163  /* Report an error if the range is empty and the syntax prohibits this.  */
3164  ret = syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR;
3165
3166  /* Here we see why `this_char' has to be larger than an `unsigned
3167     char' -- we would otherwise go into an infinite
3168     loop, since all characters <= 0xff.  */
3169  for (this_char = 0; this_char <= (unsigned char) -1; this_char++)
3170    {
3171      char ch[2];
3172      ch[0] = this_char;  ch[1] = '\0';
3173      if (strcoll (range_start, ch) <= 0 && strcoll (ch, range_end) <= 0)
3174	{
3175	  SET_LIST_BIT (TRANSLATE (this_char));
3176	  ret = REG_NOERROR;
3177	}
3178    }
3179
3180  return ret;
3181}
3182
3183/* re_compile_fastmap computes a ``fastmap'' for the compiled pattern in
3184   BUFP.  A fastmap records which of the (1 << BYTEWIDTH) possible
3185   characters can start a string that matches the pattern.  This fastmap
3186   is used by re_search to skip quickly over impossible starting points.
3187
3188   The caller must supply the address of a (1 << BYTEWIDTH)-byte data
3189   area as BUFP->fastmap.
3190
3191   We set the `fastmap', `fastmap_accurate', and `can_be_null' fields in
3192   the pattern buffer.
3193
3194   Returns 0 if we succeed, -2 if an internal error.   */
3195
3196int
3197re_compile_fastmap (bufp)
3198     struct re_pattern_buffer *bufp;
3199{
3200  int j, k;
3201#ifdef MATCH_MAY_ALLOCATE
3202  fail_stack_type fail_stack;
3203#endif
3204#ifndef REGEX_MALLOC
3205  char *destination;
3206#endif
3207
3208  register char *fastmap = bufp->fastmap;
3209  unsigned char *pattern = bufp->buffer;
3210  unsigned char *p = pattern;
3211  register unsigned char *pend = pattern + bufp->used;
3212
3213#ifdef REL_ALLOC
3214  /* This holds the pointer to the failure stack, when
3215     it is allocated relocatably.  */
3216  fail_stack_elt_t *failure_stack_ptr;
3217#endif
3218
3219  /* Assume that each path through the pattern can be null until
3220     proven otherwise.  We set this false at the bottom of switch
3221     statement, to which we get only if a particular path doesn't
3222     match the empty string.  */
3223  boolean path_can_be_null = true;
3224
3225  /* We aren't doing a `succeed_n' to begin with.  */
3226  boolean succeed_n_p = false;
3227
3228  assert (fastmap != NULL && p != NULL);
3229
3230  INIT_FAIL_STACK ();
3231  bzero (fastmap, 1 << BYTEWIDTH);  /* Assume nothing's valid.  */
3232  bufp->fastmap_accurate = 1;	    /* It will be when we're done.  */
3233  bufp->can_be_null = 0;
3234
3235  while (1)
3236    {
3237      if (p == pend || *p == succeed)
3238	{
3239	  /* We have reached the (effective) end of pattern.  */
3240	  if (!FAIL_STACK_EMPTY ())
3241	    {
3242	      bufp->can_be_null |= path_can_be_null;
3243
3244	      /* Reset for next path.  */
3245	      path_can_be_null = true;
3246
3247	      p = fail_stack.stack[--fail_stack.avail].pointer;
3248
3249	      continue;
3250	    }
3251	  else
3252	    break;
3253	}
3254
3255      /* We should never be about to go beyond the end of the pattern.  */
3256      assert (p < pend);
3257
3258      switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++))
3259	{
3260
3261        /* I guess the idea here is to simply not bother with a fastmap
3262           if a backreference is used, since it's too hard to figure out
3263           the fastmap for the corresponding group.  Setting
3264           `can_be_null' stops `re_search_2' from using the fastmap, so
3265           that is all we do.  */
3266	case duplicate:
3267	  bufp->can_be_null = 1;
3268          goto done;
3269
3270
3271      /* Following are the cases which match a character.  These end
3272         with `break'.  */
3273
3274	case exactn:
3275          fastmap[p[1]] = 1;
3276	  break;
3277
3278
3279        case charset:
3280          for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
3281	    if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))
3282              fastmap[j] = 1;
3283	  break;
3284
3285
3286	case charset_not:
3287	  /* Chars beyond end of map must be allowed.  */
3288	  for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++)
3289            fastmap[j] = 1;
3290
3291	  for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
3292	    if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))))
3293              fastmap[j] = 1;
3294          break;
3295
3296
3297	case wordchar:
3298	  for (j = 0; j < (1 << BYTEWIDTH); j++)
3299	    if (SYNTAX (j) == Sword)
3300	      fastmap[j] = 1;
3301	  break;
3302
3303
3304	case notwordchar:
3305	  for (j = 0; j < (1 << BYTEWIDTH); j++)
3306	    if (SYNTAX (j) != Sword)
3307	      fastmap[j] = 1;
3308	  break;
3309
3310
3311        case anychar:
3312	  {
3313	    int fastmap_newline = fastmap['\n'];
3314
3315	    /* `.' matches anything ...  */
3316	    for (j = 0; j < (1 << BYTEWIDTH); j++)
3317	      fastmap[j] = 1;
3318
3319	    /* ... except perhaps newline.  */
3320	    if (!(bufp->syntax & RE_DOT_NEWLINE))
3321	      fastmap['\n'] = fastmap_newline;
3322
3323	    /* Return if we have already set `can_be_null'; if we have,
3324	       then the fastmap is irrelevant.  Something's wrong here.  */
3325	    else if (bufp->can_be_null)
3326	      goto done;
3327
3328	    /* Otherwise, have to check alternative paths.  */
3329	    break;
3330	  }
3331
3332#ifdef emacs
3333        case syntaxspec:
3334	  k = *p++;
3335	  for (j = 0; j < (1 << BYTEWIDTH); j++)
3336	    if (SYNTAX (j) == (enum syntaxcode) k)
3337	      fastmap[j] = 1;
3338	  break;
3339
3340
3341	case notsyntaxspec:
3342	  k = *p++;
3343	  for (j = 0; j < (1 << BYTEWIDTH); j++)
3344	    if (SYNTAX (j) != (enum syntaxcode) k)
3345	      fastmap[j] = 1;
3346	  break;
3347
3348
3349      /* All cases after this match the empty string.  These end with
3350         `continue'.  */
3351
3352
3353	case before_dot:
3354	case at_dot:
3355	case after_dot:
3356          continue;
3357#endif /* emacs */
3358
3359
3360        case no_op:
3361        case begline:
3362        case endline:
3363	case begbuf:
3364	case endbuf:
3365	case wordbound:
3366	case notwordbound:
3367	case wordbeg:
3368	case wordend:
3369        case push_dummy_failure:
3370          continue;
3371
3372
3373	case jump_n:
3374        case pop_failure_jump:
3375	case maybe_pop_jump:
3376	case jump:
3377        case jump_past_alt:
3378	case dummy_failure_jump:
3379          EXTRACT_NUMBER_AND_INCR (j, p);
3380	  p += j;
3381	  if (j > 0)
3382	    continue;
3383
3384          /* Jump backward implies we just went through the body of a
3385             loop and matched nothing.  Opcode jumped to should be
3386             `on_failure_jump' or `succeed_n'.  Just treat it like an
3387             ordinary jump.  For a * loop, it has pushed its failure
3388             point already; if so, discard that as redundant.  */
3389          if ((re_opcode_t) *p != on_failure_jump
3390	      && (re_opcode_t) *p != succeed_n)
3391	    continue;
3392
3393          p++;
3394          EXTRACT_NUMBER_AND_INCR (j, p);
3395          p += j;
3396
3397          /* If what's on the stack is where we are now, pop it.  */
3398          if (!FAIL_STACK_EMPTY ()
3399	      && fail_stack.stack[fail_stack.avail - 1].pointer == p)
3400            fail_stack.avail--;
3401
3402          continue;
3403
3404
3405        case on_failure_jump:
3406        case on_failure_keep_string_jump:
3407	handle_on_failure_jump:
3408          EXTRACT_NUMBER_AND_INCR (j, p);
3409
3410          /* For some patterns, e.g., `(a?)?', `p+j' here points to the
3411             end of the pattern.  We don't want to push such a point,
3412             since when we restore it above, entering the switch will
3413             increment `p' past the end of the pattern.  We don't need
3414             to push such a point since we obviously won't find any more
3415             fastmap entries beyond `pend'.  Such a pattern can match
3416             the null string, though.  */
3417          if (p + j < pend)
3418            {
3419              if (!PUSH_PATTERN_OP (p + j, fail_stack))
3420		{
3421		  RESET_FAIL_STACK ();
3422		  return -2;
3423		}
3424            }
3425          else
3426            bufp->can_be_null = 1;
3427
3428          if (succeed_n_p)
3429            {
3430              EXTRACT_NUMBER_AND_INCR (k, p);	/* Skip the n.  */
3431              succeed_n_p = false;
3432	    }
3433
3434          continue;
3435
3436
3437	case succeed_n:
3438          /* Get to the number of times to succeed.  */
3439          p += 2;
3440
3441          /* Increment p past the n for when k != 0.  */
3442          EXTRACT_NUMBER_AND_INCR (k, p);
3443          if (k == 0)
3444	    {
3445              p -= 4;
3446  	      succeed_n_p = true;  /* Spaghetti code alert.  */
3447              goto handle_on_failure_jump;
3448            }
3449          continue;
3450
3451
3452	case set_number_at:
3453          p += 4;
3454          continue;
3455
3456
3457	case start_memory:
3458        case stop_memory:
3459	  p += 2;
3460	  continue;
3461
3462
3463	default:
3464          abort (); /* We have listed all the cases.  */
3465        } /* switch *p++ */
3466
3467      /* Getting here means we have found the possible starting
3468         characters for one path of the pattern -- and that the empty
3469         string does not match.  We need not follow this path further.
3470         Instead, look at the next alternative (remembered on the
3471         stack), or quit if no more.  The test at the top of the loop
3472         does these things.  */
3473      path_can_be_null = false;
3474      p = pend;
3475    } /* while p */
3476
3477  /* Set `can_be_null' for the last path (also the first path, if the
3478     pattern is empty).  */
3479  bufp->can_be_null |= path_can_be_null;
3480
3481 done:
3482  RESET_FAIL_STACK ();
3483  return 0;
3484} /* re_compile_fastmap */
3485#ifdef _LIBC
3486weak_alias (__re_compile_fastmap, re_compile_fastmap)
3487#endif
3488
3489/* Set REGS to hold NUM_REGS registers, storing them in STARTS and
3490   ENDS.  Subsequent matches using PATTERN_BUFFER and REGS will use
3491   this memory for recording register information.  STARTS and ENDS
3492   must be allocated using the malloc library routine, and must each
3493   be at least NUM_REGS * sizeof (regoff_t) bytes long.
3494
3495   If NUM_REGS == 0, then subsequent matches should allocate their own
3496   register data.
3497
3498   Unless this function is called, the first search or match using
3499   PATTERN_BUFFER will allocate its own register data, without
3500   freeing the old data.  */
3501
3502void
3503re_set_registers (bufp, regs, num_regs, starts, ends)
3504    struct re_pattern_buffer *bufp;
3505    struct re_registers *regs;
3506    unsigned num_regs;
3507    regoff_t *starts, *ends;
3508{
3509  if (num_regs)
3510    {
3511      bufp->regs_allocated = REGS_REALLOCATE;
3512      regs->num_regs = num_regs;
3513      regs->start = starts;
3514      regs->end = ends;
3515    }
3516  else
3517    {
3518      bufp->regs_allocated = REGS_UNALLOCATED;
3519      regs->num_regs = 0;
3520      regs->start = regs->end = (regoff_t *) 0;
3521    }
3522}
3523#ifdef _LIBC
3524weak_alias (__re_set_registers, re_set_registers)
3525#endif
3526
3527/* Searching routines.  */
3528
3529/* Like re_search_2, below, but only one string is specified, and
3530   doesn't let you say where to stop matching. */
3531
3532int
3533re_search (bufp, string, size, startpos, range, regs)
3534     struct re_pattern_buffer *bufp;
3535     const char *string;
3536     int size, startpos, range;
3537     struct re_registers *regs;
3538{
3539  return re_search_2 (bufp, NULL, 0, string, size, startpos, range,
3540		      regs, size);
3541}
3542#ifdef _LIBC
3543weak_alias (__re_search, re_search)
3544#endif
3545
3546
3547/* Using the compiled pattern in BUFP->buffer, first tries to match the
3548   virtual concatenation of STRING1 and STRING2, starting first at index
3549   STARTPOS, then at STARTPOS + 1, and so on.
3550
3551   STRING1 and STRING2 have length SIZE1 and SIZE2, respectively.
3552
3553   RANGE is how far to scan while trying to match.  RANGE = 0 means try
3554   only at STARTPOS; in general, the last start tried is STARTPOS +
3555   RANGE.
3556
3557   In REGS, return the indices of the virtual concatenation of STRING1
3558   and STRING2 that matched the entire BUFP->buffer and its contained
3559   subexpressions.
3560
3561   Do not consider matching one past the index STOP in the virtual
3562   concatenation of STRING1 and STRING2.
3563
3564   We return either the position in the strings at which the match was
3565   found, -1 if no match, or -2 if error (such as failure
3566   stack overflow).  */
3567
3568int
3569re_search_2 (bufp, string1, size1, string2, size2, startpos, range, regs, stop)
3570     struct re_pattern_buffer *bufp;
3571     const char *string1, *string2;
3572     int size1, size2;
3573     int startpos;
3574     int range;
3575     struct re_registers *regs;
3576     int stop;
3577{
3578  int val;
3579  register char *fastmap = bufp->fastmap;
3580  register RE_TRANSLATE_TYPE translate = bufp->translate;
3581  int total_size = size1 + size2;
3582  int endpos = startpos + range;
3583
3584  /* Check for out-of-range STARTPOS.  */
3585  if (startpos < 0 || startpos > total_size)
3586    return -1;
3587
3588  /* Fix up RANGE if it might eventually take us outside
3589     the virtual concatenation of STRING1 and STRING2.
3590     Make sure we won't move STARTPOS below 0 or above TOTAL_SIZE.  */
3591  if (endpos < 0)
3592    range = 0 - startpos;
3593  else if (endpos > total_size)
3594    range = total_size - startpos;
3595
3596  /* If the search isn't to be a backwards one, don't waste time in a
3597     search for a pattern that must be anchored.  */
3598  if (bufp->used > 0 && range > 0
3599      && ((re_opcode_t) bufp->buffer[0] == begbuf
3600	  /* `begline' is like `begbuf' if it cannot match at newlines.  */
3601	  || ((re_opcode_t) bufp->buffer[0] == begline
3602	      && !bufp->newline_anchor)))
3603    {
3604      if (startpos > 0)
3605	return -1;
3606      else
3607	range = 1;
3608    }
3609
3610#ifdef emacs
3611  /* In a forward search for something that starts with \=.
3612     don't keep searching past point.  */
3613  if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == at_dot && range > 0)
3614    {
3615      range = PT - startpos;
3616      if (range <= 0)
3617	return -1;
3618    }
3619#endif /* emacs */
3620
3621  /* Update the fastmap now if not correct already.  */
3622  if (fastmap && !bufp->fastmap_accurate)
3623    if (re_compile_fastmap (bufp) == -2)
3624      return -2;
3625
3626  /* Loop through the string, looking for a place to start matching.  */
3627  for (;;)
3628    {
3629      /* If a fastmap is supplied, skip quickly over characters that
3630         cannot be the start of a match.  If the pattern can match the
3631         null string, however, we don't need to skip characters; we want
3632         the first null string.  */
3633      if (fastmap && startpos < total_size && !bufp->can_be_null)
3634	{
3635	  if (range > 0)	/* Searching forwards.  */
3636	    {
3637	      register const char *d;
3638	      register int lim = 0;
3639	      int irange = range;
3640
3641              if (startpos < size1 && startpos + range >= size1)
3642                lim = range - (size1 - startpos);
3643
3644	      d = (startpos >= size1 ? string2 - size1 : string1) + startpos;
3645
3646              /* Written out as an if-else to avoid testing `translate'
3647                 inside the loop.  */
3648	      if (translate)
3649                while (range > lim
3650                       && !fastmap[(unsigned char)
3651				   translate[(unsigned char) *d++]])
3652                  range--;
3653	      else
3654                while (range > lim && !fastmap[(unsigned char) *d++])
3655                  range--;
3656
3657	      startpos += irange - range;
3658	    }
3659	  else				/* Searching backwards.  */
3660	    {
3661	      register char c = (size1 == 0 || startpos >= size1
3662                                 ? string2[startpos - size1]
3663                                 : string1[startpos]);
3664
3665	      if (!fastmap[(unsigned char) TRANSLATE (c)])
3666		goto advance;
3667	    }
3668	}
3669
3670      /* If can't match the null string, and that's all we have left, fail.  */
3671      if (range >= 0 && startpos == total_size && fastmap
3672          && !bufp->can_be_null)
3673	return -1;
3674
3675      val = re_match_2_internal (bufp, string1, size1, string2, size2,
3676				 startpos, regs, stop);
3677#ifndef REGEX_MALLOC
3678# ifdef C_ALLOCA
3679      alloca (0);
3680# endif
3681#endif
3682
3683      if (val >= 0)
3684	return startpos;
3685
3686      if (val == -2)
3687	return -2;
3688
3689    advance:
3690      if (!range)
3691        break;
3692      else if (range > 0)
3693        {
3694          range--;
3695          startpos++;
3696        }
3697      else
3698        {
3699          range++;
3700          startpos--;
3701        }
3702    }
3703  return -1;
3704} /* re_search_2 */
3705#ifdef _LIBC
3706weak_alias (__re_search_2, re_search_2)
3707#endif
3708
3709/* This converts PTR, a pointer into one of the search strings `string1'
3710   and `string2' into an offset from the beginning of that string.  */
3711#define POINTER_TO_OFFSET(ptr)			\
3712  (FIRST_STRING_P (ptr)				\
3713   ? ((regoff_t) ((ptr) - string1))		\
3714   : ((regoff_t) ((ptr) - string2 + size1)))
3715
3716/* Macros for dealing with the split strings in re_match_2.  */
3717
3718#define MATCHING_IN_FIRST_STRING  (dend == end_match_1)
3719
3720/* Call before fetching a character with *d.  This switches over to
3721   string2 if necessary.  */
3722#define PREFETCH()							\
3723  while (d == dend)						    	\
3724    {									\
3725      /* End of string2 => fail.  */					\
3726      if (dend == end_match_2) 						\
3727        goto fail;							\
3728      /* End of string1 => advance to string2.  */ 			\
3729      d = string2;						        \
3730      dend = end_match_2;						\
3731    }
3732
3733
3734/* Test if at very beginning or at very end of the virtual concatenation
3735   of `string1' and `string2'.  If only one string, it's `string2'.  */
3736#define AT_STRINGS_BEG(d) ((d) == (size1 ? string1 : string2) || !size2)
3737#define AT_STRINGS_END(d) ((d) == end2)
3738
3739
3740/* Test if D points to a character which is word-constituent.  We have
3741   two special cases to check for: if past the end of string1, look at
3742   the first character in string2; and if before the beginning of
3743   string2, look at the last character in string1.  */
3744#define WORDCHAR_P(d)							\
3745  (SYNTAX ((d) == end1 ? *string2					\
3746           : (d) == string2 - 1 ? *(end1 - 1) : *(d))			\
3747   == Sword)
3748
3749/* Disabled due to a compiler bug -- see comment at case wordbound */
3750#if 0
3751/* Test if the character before D and the one at D differ with respect
3752   to being word-constituent.  */
3753#define AT_WORD_BOUNDARY(d)						\
3754  (AT_STRINGS_BEG (d) || AT_STRINGS_END (d)				\
3755   || WORDCHAR_P (d - 1) != WORDCHAR_P (d))
3756#endif
3757
3758/* Free everything we malloc.  */
3759#ifdef MATCH_MAY_ALLOCATE
3760# define FREE_VAR(var) if (var) REGEX_FREE (var); var = NULL
3761# define FREE_VARIABLES()						\
3762  do {									\
3763    REGEX_FREE_STACK (fail_stack.stack);				\
3764    FREE_VAR (regstart);						\
3765    FREE_VAR (regend);							\
3766    FREE_VAR (old_regstart);						\
3767    FREE_VAR (old_regend);						\
3768    FREE_VAR (best_regstart);						\
3769    FREE_VAR (best_regend);						\
3770    FREE_VAR (reg_info);						\
3771    FREE_VAR (reg_dummy);						\
3772    FREE_VAR (reg_info_dummy);						\
3773  } while (0)
3774#else
3775# define FREE_VARIABLES() ((void)0) /* Do nothing!  But inhibit gcc warning. */
3776#endif /* not MATCH_MAY_ALLOCATE */
3777
3778/* These values must meet several constraints.  They must not be valid
3779   register values; since we have a limit of 255 registers (because
3780   we use only one byte in the pattern for the register number), we can
3781   use numbers larger than 255.  They must differ by 1, because of
3782   NUM_FAILURE_ITEMS above.  And the value for the lowest register must
3783   be larger than the value for the highest register, so we do not try
3784   to actually save any registers when none are active.  */
3785#define NO_HIGHEST_ACTIVE_REG (1 << BYTEWIDTH)
3786#define NO_LOWEST_ACTIVE_REG (NO_HIGHEST_ACTIVE_REG + 1)
3787
3788/* Matching routines.  */
3789
3790#ifndef emacs   /* Emacs never uses this.  */
3791/* re_match is like re_match_2 except it takes only a single string.  */
3792
3793int
3794re_match (bufp, string, size, pos, regs)
3795     struct re_pattern_buffer *bufp;
3796     const char *string;
3797     int size, pos;
3798     struct re_registers *regs;
3799{
3800  int result = re_match_2_internal (bufp, NULL, 0, string, size,
3801				    pos, regs, size);
3802# ifndef REGEX_MALLOC
3803#  ifdef C_ALLOCA
3804  alloca (0);
3805#  endif
3806# endif
3807  return result;
3808}
3809# ifdef _LIBC
3810weak_alias (__re_match, re_match)
3811# endif
3812#endif /* not emacs */
3813
3814static boolean group_match_null_string_p _RE_ARGS ((unsigned char **p,
3815						    unsigned char *end,
3816						register_info_type *reg_info));
3817static boolean alt_match_null_string_p _RE_ARGS ((unsigned char *p,
3818						  unsigned char *end,
3819						register_info_type *reg_info));
3820static boolean common_op_match_null_string_p _RE_ARGS ((unsigned char **p,
3821							unsigned char *end,
3822						register_info_type *reg_info));
3823static int bcmp_translate _RE_ARGS ((const char *s1, const char *s2,
3824				     int len, char *translate));
3825
3826/* re_match_2 matches the compiled pattern in BUFP against the
3827   the (virtual) concatenation of STRING1 and STRING2 (of length SIZE1
3828   and SIZE2, respectively).  We start matching at POS, and stop
3829   matching at STOP.
3830
3831   If REGS is non-null and the `no_sub' field of BUFP is nonzero, we
3832   store offsets for the substring each group matched in REGS.  See the
3833   documentation for exactly how many groups we fill.
3834
3835   We return -1 if no match, -2 if an internal error (such as the
3836   failure stack overflowing).  Otherwise, we return the length of the
3837   matched substring.  */
3838
3839int
3840re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
3841     struct re_pattern_buffer *bufp;
3842     const char *string1, *string2;
3843     int size1, size2;
3844     int pos;
3845     struct re_registers *regs;
3846     int stop;
3847{
3848  int result = re_match_2_internal (bufp, string1, size1, string2, size2,
3849				    pos, regs, stop);
3850#ifndef REGEX_MALLOC
3851# ifdef C_ALLOCA
3852  alloca (0);
3853# endif
3854#endif
3855  return result;
3856}
3857#ifdef _LIBC
3858weak_alias (__re_match_2, re_match_2)
3859#endif
3860
3861/* This is a separate function so that we can force an alloca cleanup
3862   afterwards.  */
3863static int
3864re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
3865     struct re_pattern_buffer *bufp;
3866     const char *string1, *string2;
3867     int size1, size2;
3868     int pos;
3869     struct re_registers *regs;
3870     int stop;
3871{
3872  /* General temporaries.  */
3873  int mcnt;
3874  unsigned char *p1;
3875
3876  /* Just past the end of the corresponding string.  */
3877  const char *end1, *end2;
3878
3879  /* Pointers into string1 and string2, just past the last characters in
3880     each to consider matching.  */
3881  const char *end_match_1, *end_match_2;
3882
3883  /* Where we are in the data, and the end of the current string.  */
3884  const char *d, *dend;
3885
3886  /* Where we are in the pattern, and the end of the pattern.  */
3887  unsigned char *p = bufp->buffer;
3888  register unsigned char *pend = p + bufp->used;
3889
3890  /* Mark the opcode just after a start_memory, so we can test for an
3891     empty subpattern when we get to the stop_memory.  */
3892  unsigned char *just_past_start_mem = 0;
3893
3894  /* We use this to map every character in the string.  */
3895  RE_TRANSLATE_TYPE translate = bufp->translate;
3896
3897  /* Failure point stack.  Each place that can handle a failure further
3898     down the line pushes a failure point on this stack.  It consists of
3899     restart, regend, and reg_info for all registers corresponding to
3900     the subexpressions we're currently inside, plus the number of such
3901     registers, and, finally, two char *'s.  The first char * is where
3902     to resume scanning the pattern; the second one is where to resume
3903     scanning the strings.  If the latter is zero, the failure point is
3904     a ``dummy''; if a failure happens and the failure point is a dummy,
3905     it gets discarded and the next next one is tried.  */
3906#ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global.  */
3907  fail_stack_type fail_stack;
3908#endif
3909#ifdef DEBUG
3910  static unsigned failure_id;
3911  unsigned nfailure_points_pushed = 0, nfailure_points_popped = 0;
3912#endif
3913
3914#ifdef REL_ALLOC
3915  /* This holds the pointer to the failure stack, when
3916     it is allocated relocatably.  */
3917  fail_stack_elt_t *failure_stack_ptr;
3918#endif
3919
3920  /* We fill all the registers internally, independent of what we
3921     return, for use in backreferences.  The number here includes
3922     an element for register zero.  */
3923  size_t num_regs = bufp->re_nsub + 1;
3924
3925  /* The currently active registers.  */
3926  active_reg_t lowest_active_reg = NO_LOWEST_ACTIVE_REG;
3927  active_reg_t highest_active_reg = NO_HIGHEST_ACTIVE_REG;
3928
3929  /* Information on the contents of registers. These are pointers into
3930     the input strings; they record just what was matched (on this
3931     attempt) by a subexpression part of the pattern, that is, the
3932     regnum-th regstart pointer points to where in the pattern we began
3933     matching and the regnum-th regend points to right after where we
3934     stopped matching the regnum-th subexpression.  (The zeroth register
3935     keeps track of what the whole pattern matches.)  */
3936#ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global.  */
3937  const char **regstart, **regend;
3938#endif
3939
3940  /* If a group that's operated upon by a repetition operator fails to
3941     match anything, then the register for its start will need to be
3942     restored because it will have been set to wherever in the string we
3943     are when we last see its open-group operator.  Similarly for a
3944     register's end.  */
3945#ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global.  */
3946  const char **old_regstart, **old_regend;
3947#endif
3948
3949  /* The is_active field of reg_info helps us keep track of which (possibly
3950     nested) subexpressions we are currently in. The matched_something
3951     field of reg_info[reg_num] helps us tell whether or not we have
3952     matched any of the pattern so far this time through the reg_num-th
3953     subexpression.  These two fields get reset each time through any
3954     loop their register is in.  */
3955#ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global.  */
3956  register_info_type *reg_info;
3957#endif
3958
3959  /* The following record the register info as found in the above
3960     variables when we find a match better than any we've seen before.
3961     This happens as we backtrack through the failure points, which in
3962     turn happens only if we have not yet matched the entire string. */
3963  unsigned best_regs_set = false;
3964#ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global.  */
3965  const char **best_regstart, **best_regend;
3966#endif
3967
3968  /* Logically, this is `best_regend[0]'.  But we don't want to have to
3969     allocate space for that if we're not allocating space for anything
3970     else (see below).  Also, we never need info about register 0 for
3971     any of the other register vectors, and it seems rather a kludge to
3972     treat `best_regend' differently than the rest.  So we keep track of
3973     the end of the best match so far in a separate variable.  We
3974     initialize this to NULL so that when we backtrack the first time
3975     and need to test it, it's not garbage.  */
3976  const char *match_end = NULL;
3977
3978  /* This helps SET_REGS_MATCHED avoid doing redundant work.  */
3979  int set_regs_matched_done = 0;
3980
3981  /* Used when we pop values we don't care about.  */
3982#ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global.  */
3983  const char **reg_dummy;
3984  register_info_type *reg_info_dummy;
3985#endif
3986
3987#ifdef DEBUG
3988  /* Counts the total number of registers pushed.  */
3989  unsigned num_regs_pushed = 0;
3990#endif
3991
3992  DEBUG_PRINT1 ("\n\nEntering re_match_2.\n");
3993
3994  INIT_FAIL_STACK ();
3995
3996#ifdef MATCH_MAY_ALLOCATE
3997  /* Do not bother to initialize all the register variables if there are
3998     no groups in the pattern, as it takes a fair amount of time.  If
3999     there are groups, we include space for register 0 (the whole
4000     pattern), even though we never use it, since it simplifies the
4001     array indexing.  We should fix this.  */
4002  if (bufp->re_nsub)
4003    {
4004      regstart = REGEX_TALLOC (num_regs, const char *);
4005      regend = REGEX_TALLOC (num_regs, const char *);
4006      old_regstart = REGEX_TALLOC (num_regs, const char *);
4007      old_regend = REGEX_TALLOC (num_regs, const char *);
4008      best_regstart = REGEX_TALLOC (num_regs, const char *);
4009      best_regend = REGEX_TALLOC (num_regs, const char *);
4010      reg_info = REGEX_TALLOC (num_regs, register_info_type);
4011      reg_dummy = REGEX_TALLOC (num_regs, const char *);
4012      reg_info_dummy = REGEX_TALLOC (num_regs, register_info_type);
4013
4014      if (!(regstart && regend && old_regstart && old_regend && reg_info
4015            && best_regstart && best_regend && reg_dummy && reg_info_dummy))
4016        {
4017          FREE_VARIABLES ();
4018          return -2;
4019        }
4020    }
4021  else
4022    {
4023      /* We must initialize all our variables to NULL, so that
4024         `FREE_VARIABLES' doesn't try to free them.  */
4025      regstart = regend = old_regstart = old_regend = best_regstart
4026        = best_regend = reg_dummy = NULL;
4027      reg_info = reg_info_dummy = (register_info_type *) NULL;
4028    }
4029#endif /* MATCH_MAY_ALLOCATE */
4030
4031  /* The starting position is bogus.  */
4032  if (pos < 0 || pos > size1 + size2)
4033    {
4034      FREE_VARIABLES ();
4035      return -1;
4036    }
4037
4038  /* Initialize subexpression text positions to -1 to mark ones that no
4039     start_memory/stop_memory has been seen for. Also initialize the
4040     register information struct.  */
4041  for (mcnt = 1; (unsigned) mcnt < num_regs; mcnt++)
4042    {
4043      regstart[mcnt] = regend[mcnt]
4044        = old_regstart[mcnt] = old_regend[mcnt] = REG_UNSET_VALUE;
4045
4046      REG_MATCH_NULL_STRING_P (reg_info[mcnt]) = MATCH_NULL_UNSET_VALUE;
4047      IS_ACTIVE (reg_info[mcnt]) = 0;
4048      MATCHED_SOMETHING (reg_info[mcnt]) = 0;
4049      EVER_MATCHED_SOMETHING (reg_info[mcnt]) = 0;
4050    }
4051
4052  /* We move `string1' into `string2' if the latter's empty -- but not if
4053     `string1' is null.  */
4054  if (size2 == 0 && string1 != NULL)
4055    {
4056      string2 = string1;
4057      size2 = size1;
4058      string1 = 0;
4059      size1 = 0;
4060    }
4061  end1 = string1 + size1;
4062  end2 = string2 + size2;
4063
4064  /* Compute where to stop matching, within the two strings.  */
4065  if (stop <= size1)
4066    {
4067      end_match_1 = string1 + stop;
4068      end_match_2 = string2;
4069    }
4070  else
4071    {
4072      end_match_1 = end1;
4073      end_match_2 = string2 + stop - size1;
4074    }
4075
4076  /* `p' scans through the pattern as `d' scans through the data.
4077     `dend' is the end of the input string that `d' points within.  `d'
4078     is advanced into the following input string whenever necessary, but
4079     this happens before fetching; therefore, at the beginning of the
4080     loop, `d' can be pointing at the end of a string, but it cannot
4081     equal `string2'.  */
4082  if (size1 > 0 && pos <= size1)
4083    {
4084      d = string1 + pos;
4085      dend = end_match_1;
4086    }
4087  else
4088    {
4089      d = string2 + pos - size1;
4090      dend = end_match_2;
4091    }
4092
4093  DEBUG_PRINT1 ("The compiled pattern is:\n");
4094  DEBUG_PRINT_COMPILED_PATTERN (bufp, p, pend);
4095  DEBUG_PRINT1 ("The string to match is: `");
4096  DEBUG_PRINT_DOUBLE_STRING (d, string1, size1, string2, size2);
4097  DEBUG_PRINT1 ("'\n");
4098
4099  /* This loops over pattern commands.  It exits by returning from the
4100     function if the match is complete, or it drops through if the match
4101     fails at this starting point in the input data.  */
4102  for (;;)
4103    {
4104#ifdef _LIBC
4105      DEBUG_PRINT2 ("\n%p: ", p);
4106#else
4107      DEBUG_PRINT2 ("\n0x%x: ", p);
4108#endif
4109
4110      if (p == pend)
4111	{ /* End of pattern means we might have succeeded.  */
4112          DEBUG_PRINT1 ("end of pattern ... ");
4113
4114	  /* If we haven't matched the entire string, and we want the
4115             longest match, try backtracking.  */
4116          if (d != end_match_2)
4117	    {
4118	      /* 1 if this match ends in the same string (string1 or string2)
4119		 as the best previous match.  */
4120	      boolean same_str_p = (FIRST_STRING_P (match_end)
4121				    == MATCHING_IN_FIRST_STRING);
4122	      /* 1 if this match is the best seen so far.  */
4123	      boolean best_match_p;
4124
4125	      /* AIX compiler got confused when this was combined
4126		 with the previous declaration.  */
4127	      if (same_str_p)
4128		best_match_p = d > match_end;
4129	      else
4130		best_match_p = !MATCHING_IN_FIRST_STRING;
4131
4132              DEBUG_PRINT1 ("backtracking.\n");
4133
4134              if (!FAIL_STACK_EMPTY ())
4135                { /* More failure points to try.  */
4136
4137                  /* If exceeds best match so far, save it.  */
4138                  if (!best_regs_set || best_match_p)
4139                    {
4140                      best_regs_set = true;
4141                      match_end = d;
4142
4143                      DEBUG_PRINT1 ("\nSAVING match as best so far.\n");
4144
4145                      for (mcnt = 1; (unsigned) mcnt < num_regs; mcnt++)
4146                        {
4147                          best_regstart[mcnt] = regstart[mcnt];
4148                          best_regend[mcnt] = regend[mcnt];
4149                        }
4150                    }
4151                  goto fail;
4152                }
4153
4154              /* If no failure points, don't restore garbage.  And if
4155                 last match is real best match, don't restore second
4156                 best one. */
4157              else if (best_regs_set && !best_match_p)
4158                {
4159  	        restore_best_regs:
4160                  /* Restore best match.  It may happen that `dend ==
4161                     end_match_1' while the restored d is in string2.
4162                     For example, the pattern `x.*y.*z' against the
4163                     strings `x-' and `y-z-', if the two strings are
4164                     not consecutive in memory.  */
4165                  DEBUG_PRINT1 ("Restoring best registers.\n");
4166
4167                  d = match_end;
4168                  dend = ((d >= string1 && d <= end1)
4169		           ? end_match_1 : end_match_2);
4170
4171		  for (mcnt = 1; (unsigned) mcnt < num_regs; mcnt++)
4172		    {
4173		      regstart[mcnt] = best_regstart[mcnt];
4174		      regend[mcnt] = best_regend[mcnt];
4175		    }
4176                }
4177            } /* d != end_match_2 */
4178
4179	succeed_label:
4180          DEBUG_PRINT1 ("Accepting match.\n");
4181
4182          /* If caller wants register contents data back, do it.  */
4183          if (regs && !bufp->no_sub)
4184	    {
4185              /* Have the register data arrays been allocated?  */
4186              if (bufp->regs_allocated == REGS_UNALLOCATED)
4187                { /* No.  So allocate them with malloc.  We need one
4188                     extra element beyond `num_regs' for the `-1' marker
4189                     GNU code uses.  */
4190                  regs->num_regs = MAX (RE_NREGS, num_regs + 1);
4191                  regs->start = TALLOC (regs->num_regs, regoff_t);
4192                  regs->end = TALLOC (regs->num_regs, regoff_t);
4193                  if (regs->start == NULL || regs->end == NULL)
4194		    {
4195		      FREE_VARIABLES ();
4196		      return -2;
4197		    }
4198                  bufp->regs_allocated = REGS_REALLOCATE;
4199                }
4200              else if (bufp->regs_allocated == REGS_REALLOCATE)
4201                { /* Yes.  If we need more elements than were already
4202                     allocated, reallocate them.  If we need fewer, just
4203                     leave it alone.  */
4204                  if (regs->num_regs < num_regs + 1)
4205                    {
4206                      regs->num_regs = num_regs + 1;
4207                      RETALLOC (regs->start, regs->num_regs, regoff_t);
4208                      RETALLOC (regs->end, regs->num_regs, regoff_t);
4209                      if (regs->start == NULL || regs->end == NULL)
4210			{
4211			  FREE_VARIABLES ();
4212			  return -2;
4213			}
4214                    }
4215                }
4216              else
4217		{
4218		  /* These braces fend off a "empty body in an else-statement"
4219		     warning under GCC when assert expands to nothing.  */
4220		  assert (bufp->regs_allocated == REGS_FIXED);
4221		}
4222
4223              /* Convert the pointer data in `regstart' and `regend' to
4224                 indices.  Register zero has to be set differently,
4225                 since we haven't kept track of any info for it.  */
4226              if (regs->num_regs > 0)
4227                {
4228                  regs->start[0] = pos;
4229                  regs->end[0] = (MATCHING_IN_FIRST_STRING
4230				  ? ((regoff_t) (d - string1))
4231			          : ((regoff_t) (d - string2 + size1)));
4232                }
4233
4234              /* Go through the first `min (num_regs, regs->num_regs)'
4235                 registers, since that is all we initialized.  */
4236	      for (mcnt = 1; (unsigned) mcnt < MIN (num_regs, regs->num_regs);
4237		   mcnt++)
4238		{
4239                  if (REG_UNSET (regstart[mcnt]) || REG_UNSET (regend[mcnt]))
4240                    regs->start[mcnt] = regs->end[mcnt] = -1;
4241                  else
4242                    {
4243		      regs->start[mcnt]
4244			= (regoff_t) POINTER_TO_OFFSET (regstart[mcnt]);
4245                      regs->end[mcnt]
4246			= (regoff_t) POINTER_TO_OFFSET (regend[mcnt]);
4247                    }
4248		}
4249
4250              /* If the regs structure we return has more elements than
4251                 were in the pattern, set the extra elements to -1.  If
4252                 we (re)allocated the registers, this is the case,
4253                 because we always allocate enough to have at least one
4254                 -1 at the end.  */
4255              for (mcnt = num_regs; (unsigned) mcnt < regs->num_regs; mcnt++)
4256                regs->start[mcnt] = regs->end[mcnt] = -1;
4257	    } /* regs && !bufp->no_sub */
4258
4259          DEBUG_PRINT4 ("%u failure points pushed, %u popped (%u remain).\n",
4260                        nfailure_points_pushed, nfailure_points_popped,
4261                        nfailure_points_pushed - nfailure_points_popped);
4262          DEBUG_PRINT2 ("%u registers pushed.\n", num_regs_pushed);
4263
4264          mcnt = d - pos - (MATCHING_IN_FIRST_STRING
4265			    ? string1
4266			    : string2 - size1);
4267
4268          DEBUG_PRINT2 ("Returning %d from re_match_2.\n", mcnt);
4269
4270          FREE_VARIABLES ();
4271          return mcnt;
4272        }
4273
4274      /* Otherwise match next pattern command.  */
4275      switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++))
4276	{
4277        /* Ignore these.  Used to ignore the n of succeed_n's which
4278           currently have n == 0.  */
4279        case no_op:
4280          DEBUG_PRINT1 ("EXECUTING no_op.\n");
4281          break;
4282
4283	case succeed:
4284          DEBUG_PRINT1 ("EXECUTING succeed.\n");
4285	  goto succeed_label;
4286
4287        /* Match the next n pattern characters exactly.  The following
4288           byte in the pattern defines n, and the n bytes after that
4289           are the characters to match.  */
4290	case exactn:
4291	  mcnt = *p++;
4292          DEBUG_PRINT2 ("EXECUTING exactn %d.\n", mcnt);
4293
4294          /* This is written out as an if-else so we don't waste time
4295             testing `translate' inside the loop.  */
4296          if (translate)
4297	    {
4298	      do
4299		{
4300		  PREFETCH ();
4301		  if ((unsigned char) translate[(unsigned char) *d++]
4302		      != (unsigned char) *p++)
4303                    goto fail;
4304		}
4305	      while (--mcnt);
4306	    }
4307	  else
4308	    {
4309	      do
4310		{
4311		  PREFETCH ();
4312		  if (*d++ != (char) *p++) goto fail;
4313		}
4314	      while (--mcnt);
4315	    }
4316	  SET_REGS_MATCHED ();
4317          break;
4318
4319
4320        /* Match any character except possibly a newline or a null.  */
4321	case anychar:
4322          DEBUG_PRINT1 ("EXECUTING anychar.\n");
4323
4324          PREFETCH ();
4325
4326          if ((!(bufp->syntax & RE_DOT_NEWLINE) && TRANSLATE (*d) == '\n')
4327              || (bufp->syntax & RE_DOT_NOT_NULL && TRANSLATE (*d) == '\000'))
4328	    goto fail;
4329
4330          SET_REGS_MATCHED ();
4331          DEBUG_PRINT2 ("  Matched `%d'.\n", *d);
4332          d++;
4333	  break;
4334
4335
4336	case charset:
4337	case charset_not:
4338	  {
4339	    register unsigned char c;
4340	    boolean not = (re_opcode_t) *(p - 1) == charset_not;
4341
4342            DEBUG_PRINT2 ("EXECUTING charset%s.\n", not ? "_not" : "");
4343
4344	    PREFETCH ();
4345	    c = TRANSLATE (*d); /* The character to match.  */
4346
4347            /* Cast to `unsigned' instead of `unsigned char' in case the
4348               bit list is a full 32 bytes long.  */
4349	    if (c < (unsigned) (*p * BYTEWIDTH)
4350		&& p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
4351	      not = !not;
4352
4353	    p += 1 + *p;
4354
4355	    if (!not) goto fail;
4356
4357	    SET_REGS_MATCHED ();
4358            d++;
4359	    break;
4360	  }
4361
4362
4363        /* The beginning of a group is represented by start_memory.
4364           The arguments are the register number in the next byte, and the
4365           number of groups inner to this one in the next.  The text
4366           matched within the group is recorded (in the internal
4367           registers data structure) under the register number.  */
4368        case start_memory:
4369	  DEBUG_PRINT3 ("EXECUTING start_memory %d (%d):\n", *p, p[1]);
4370
4371          /* Find out if this group can match the empty string.  */
4372	  p1 = p;		/* To send to group_match_null_string_p.  */
4373
4374          if (REG_MATCH_NULL_STRING_P (reg_info[*p]) == MATCH_NULL_UNSET_VALUE)
4375            REG_MATCH_NULL_STRING_P (reg_info[*p])
4376              = group_match_null_string_p (&p1, pend, reg_info);
4377
4378          /* Save the position in the string where we were the last time
4379             we were at this open-group operator in case the group is
4380             operated upon by a repetition operator, e.g., with `(a*)*b'
4381             against `ab'; then we want to ignore where we are now in
4382             the string in case this attempt to match fails.  */
4383          old_regstart[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p])
4384                             ? REG_UNSET (regstart[*p]) ? d : regstart[*p]
4385                             : regstart[*p];
4386	  DEBUG_PRINT2 ("  old_regstart: %d\n",
4387			 POINTER_TO_OFFSET (old_regstart[*p]));
4388
4389          regstart[*p] = d;
4390	  DEBUG_PRINT2 ("  regstart: %d\n", POINTER_TO_OFFSET (regstart[*p]));
4391
4392          IS_ACTIVE (reg_info[*p]) = 1;
4393          MATCHED_SOMETHING (reg_info[*p]) = 0;
4394
4395	  /* Clear this whenever we change the register activity status.  */
4396	  set_regs_matched_done = 0;
4397
4398          /* This is the new highest active register.  */
4399          highest_active_reg = *p;
4400
4401          /* If nothing was active before, this is the new lowest active
4402             register.  */
4403          if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
4404            lowest_active_reg = *p;
4405
4406          /* Move past the register number and inner group count.  */
4407          p += 2;
4408	  just_past_start_mem = p;
4409
4410          break;
4411
4412
4413        /* The stop_memory opcode represents the end of a group.  Its
4414           arguments are the same as start_memory's: the register
4415           number, and the number of inner groups.  */
4416	case stop_memory:
4417	  DEBUG_PRINT3 ("EXECUTING stop_memory %d (%d):\n", *p, p[1]);
4418
4419          /* We need to save the string position the last time we were at
4420             this close-group operator in case the group is operated
4421             upon by a repetition operator, e.g., with `((a*)*(b*)*)*'
4422             against `aba'; then we want to ignore where we are now in
4423             the string in case this attempt to match fails.  */
4424          old_regend[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p])
4425                           ? REG_UNSET (regend[*p]) ? d : regend[*p]
4426			   : regend[*p];
4427	  DEBUG_PRINT2 ("      old_regend: %d\n",
4428			 POINTER_TO_OFFSET (old_regend[*p]));
4429
4430          regend[*p] = d;
4431	  DEBUG_PRINT2 ("      regend: %d\n", POINTER_TO_OFFSET (regend[*p]));
4432
4433          /* This register isn't active anymore.  */
4434          IS_ACTIVE (reg_info[*p]) = 0;
4435
4436	  /* Clear this whenever we change the register activity status.  */
4437	  set_regs_matched_done = 0;
4438
4439          /* If this was the only register active, nothing is active
4440             anymore.  */
4441          if (lowest_active_reg == highest_active_reg)
4442            {
4443              lowest_active_reg = NO_LOWEST_ACTIVE_REG;
4444              highest_active_reg = NO_HIGHEST_ACTIVE_REG;
4445            }
4446          else
4447            { /* We must scan for the new highest active register, since
4448                 it isn't necessarily one less than now: consider
4449                 (a(b)c(d(e)f)g).  When group 3 ends, after the f), the
4450                 new highest active register is 1.  */
4451              unsigned char r = *p - 1;
4452              while (r > 0 && !IS_ACTIVE (reg_info[r]))
4453                r--;
4454
4455              /* If we end up at register zero, that means that we saved
4456                 the registers as the result of an `on_failure_jump', not
4457                 a `start_memory', and we jumped to past the innermost
4458                 `stop_memory'.  For example, in ((.)*) we save
4459                 registers 1 and 2 as a result of the *, but when we pop
4460                 back to the second ), we are at the stop_memory 1.
4461                 Thus, nothing is active.  */
4462	      if (r == 0)
4463                {
4464                  lowest_active_reg = NO_LOWEST_ACTIVE_REG;
4465                  highest_active_reg = NO_HIGHEST_ACTIVE_REG;
4466                }
4467              else
4468                highest_active_reg = r;
4469            }
4470
4471          /* If just failed to match something this time around with a
4472             group that's operated on by a repetition operator, try to
4473             force exit from the ``loop'', and restore the register
4474             information for this group that we had before trying this
4475             last match.  */
4476          if ((!MATCHED_SOMETHING (reg_info[*p])
4477               || just_past_start_mem == p - 1)
4478	      && (p + 2) < pend)
4479            {
4480              boolean is_a_jump_n = false;
4481
4482              p1 = p + 2;
4483              mcnt = 0;
4484              switch ((re_opcode_t) *p1++)
4485                {
4486                  case jump_n:
4487		    is_a_jump_n = true;
4488                  case pop_failure_jump:
4489		  case maybe_pop_jump:
4490		  case jump:
4491		  case dummy_failure_jump:
4492                    EXTRACT_NUMBER_AND_INCR (mcnt, p1);
4493		    if (is_a_jump_n)
4494		      p1 += 2;
4495                    break;
4496
4497                  default:
4498                    /* do nothing */ ;
4499                }
4500	      p1 += mcnt;
4501
4502              /* If the next operation is a jump backwards in the pattern
4503	         to an on_failure_jump right before the start_memory
4504                 corresponding to this stop_memory, exit from the loop
4505                 by forcing a failure after pushing on the stack the
4506                 on_failure_jump's jump in the pattern, and d.  */
4507              if (mcnt < 0 && (re_opcode_t) *p1 == on_failure_jump
4508                  && (re_opcode_t) p1[3] == start_memory && p1[4] == *p)
4509		{
4510                  /* If this group ever matched anything, then restore
4511                     what its registers were before trying this last
4512                     failed match, e.g., with `(a*)*b' against `ab' for
4513                     regstart[1], and, e.g., with `((a*)*(b*)*)*'
4514                     against `aba' for regend[3].
4515
4516                     Also restore the registers for inner groups for,
4517                     e.g., `((a*)(b*))*' against `aba' (register 3 would
4518                     otherwise get trashed).  */
4519
4520                  if (EVER_MATCHED_SOMETHING (reg_info[*p]))
4521		    {
4522		      unsigned r;
4523
4524                      EVER_MATCHED_SOMETHING (reg_info[*p]) = 0;
4525
4526		      /* Restore this and inner groups' (if any) registers.  */
4527                      for (r = *p; r < (unsigned) *p + (unsigned) *(p + 1);
4528			   r++)
4529                        {
4530                          regstart[r] = old_regstart[r];
4531
4532                          /* xx why this test?  */
4533                          if (old_regend[r] >= regstart[r])
4534                            regend[r] = old_regend[r];
4535                        }
4536                    }
4537		  p1++;
4538                  EXTRACT_NUMBER_AND_INCR (mcnt, p1);
4539                  PUSH_FAILURE_POINT (p1 + mcnt, d, -2);
4540
4541                  goto fail;
4542                }
4543            }
4544
4545          /* Move past the register number and the inner group count.  */
4546          p += 2;
4547          break;
4548
4549
4550	/* \<digit> has been turned into a `duplicate' command which is
4551           followed by the numeric value of <digit> as the register number.  */
4552        case duplicate:
4553	  {
4554	    register const char *d2, *dend2;
4555	    int regno = *p++;   /* Get which register to match against.  */
4556	    DEBUG_PRINT2 ("EXECUTING duplicate %d.\n", regno);
4557
4558	    /* Can't back reference a group which we've never matched.  */
4559            if (REG_UNSET (regstart[regno]) || REG_UNSET (regend[regno]))
4560              goto fail;
4561
4562            /* Where in input to try to start matching.  */
4563            d2 = regstart[regno];
4564
4565            /* Where to stop matching; if both the place to start and
4566               the place to stop matching are in the same string, then
4567               set to the place to stop, otherwise, for now have to use
4568               the end of the first string.  */
4569
4570            dend2 = ((FIRST_STRING_P (regstart[regno])
4571		      == FIRST_STRING_P (regend[regno]))
4572		     ? regend[regno] : end_match_1);
4573	    for (;;)
4574	      {
4575		/* If necessary, advance to next segment in register
4576                   contents.  */
4577		while (d2 == dend2)
4578		  {
4579		    if (dend2 == end_match_2) break;
4580		    if (dend2 == regend[regno]) break;
4581
4582                    /* End of string1 => advance to string2. */
4583                    d2 = string2;
4584                    dend2 = regend[regno];
4585		  }
4586		/* At end of register contents => success */
4587		if (d2 == dend2) break;
4588
4589		/* If necessary, advance to next segment in data.  */
4590		PREFETCH ();
4591
4592		/* How many characters left in this segment to match.  */
4593		mcnt = dend - d;
4594
4595		/* Want how many consecutive characters we can match in
4596                   one shot, so, if necessary, adjust the count.  */
4597                if (mcnt > dend2 - d2)
4598		  mcnt = dend2 - d2;
4599
4600		/* Compare that many; failure if mismatch, else move
4601                   past them.  */
4602		if (translate
4603                    ? bcmp_translate (d, d2, mcnt, translate)
4604                    : memcmp (d, d2, mcnt))
4605		  goto fail;
4606		d += mcnt, d2 += mcnt;
4607
4608		/* Do this because we've match some characters.  */
4609		SET_REGS_MATCHED ();
4610	      }
4611	  }
4612	  break;
4613
4614
4615        /* begline matches the empty string at the beginning of the string
4616           (unless `not_bol' is set in `bufp'), and, if
4617           `newline_anchor' is set, after newlines.  */
4618	case begline:
4619          DEBUG_PRINT1 ("EXECUTING begline.\n");
4620
4621          if (AT_STRINGS_BEG (d))
4622            {
4623              if (!bufp->not_bol) break;
4624            }
4625          else if (d[-1] == '\n' && bufp->newline_anchor)
4626            {
4627              break;
4628            }
4629          /* In all other cases, we fail.  */
4630          goto fail;
4631
4632
4633        /* endline is the dual of begline.  */
4634	case endline:
4635          DEBUG_PRINT1 ("EXECUTING endline.\n");
4636
4637          if (AT_STRINGS_END (d))
4638            {
4639              if (!bufp->not_eol) break;
4640            }
4641
4642          /* We have to ``prefetch'' the next character.  */
4643          else if ((d == end1 ? *string2 : *d) == '\n'
4644                   && bufp->newline_anchor)
4645            {
4646              break;
4647            }
4648          goto fail;
4649
4650
4651	/* Match at the very beginning of the data.  */
4652        case begbuf:
4653          DEBUG_PRINT1 ("EXECUTING begbuf.\n");
4654          if (AT_STRINGS_BEG (d))
4655            break;
4656          goto fail;
4657
4658
4659	/* Match at the very end of the data.  */
4660        case endbuf:
4661          DEBUG_PRINT1 ("EXECUTING endbuf.\n");
4662	  if (AT_STRINGS_END (d))
4663	    break;
4664          goto fail;
4665
4666
4667        /* on_failure_keep_string_jump is used to optimize `.*\n'.  It
4668           pushes NULL as the value for the string on the stack.  Then
4669           `pop_failure_point' will keep the current value for the
4670           string, instead of restoring it.  To see why, consider
4671           matching `foo\nbar' against `.*\n'.  The .* matches the foo;
4672           then the . fails against the \n.  But the next thing we want
4673           to do is match the \n against the \n; if we restored the
4674           string value, we would be back at the foo.
4675
4676           Because this is used only in specific cases, we don't need to
4677           check all the things that `on_failure_jump' does, to make
4678           sure the right things get saved on the stack.  Hence we don't
4679           share its code.  The only reason to push anything on the
4680           stack at all is that otherwise we would have to change
4681           `anychar's code to do something besides goto fail in this
4682           case; that seems worse than this.  */
4683        case on_failure_keep_string_jump:
4684          DEBUG_PRINT1 ("EXECUTING on_failure_keep_string_jump");
4685
4686          EXTRACT_NUMBER_AND_INCR (mcnt, p);
4687#ifdef _LIBC
4688          DEBUG_PRINT3 (" %d (to %p):\n", mcnt, p + mcnt);
4689#else
4690          DEBUG_PRINT3 (" %d (to 0x%x):\n", mcnt, p + mcnt);
4691#endif
4692
4693          PUSH_FAILURE_POINT (p + mcnt, NULL, -2);
4694          break;
4695
4696
4697	/* Uses of on_failure_jump:
4698
4699           Each alternative starts with an on_failure_jump that points
4700           to the beginning of the next alternative.  Each alternative
4701           except the last ends with a jump that in effect jumps past
4702           the rest of the alternatives.  (They really jump to the
4703           ending jump of the following alternative, because tensioning
4704           these jumps is a hassle.)
4705
4706           Repeats start with an on_failure_jump that points past both
4707           the repetition text and either the following jump or
4708           pop_failure_jump back to this on_failure_jump.  */
4709	case on_failure_jump:
4710        on_failure:
4711          DEBUG_PRINT1 ("EXECUTING on_failure_jump");
4712
4713          EXTRACT_NUMBER_AND_INCR (mcnt, p);
4714#ifdef _LIBC
4715          DEBUG_PRINT3 (" %d (to %p)", mcnt, p + mcnt);
4716#else
4717          DEBUG_PRINT3 (" %d (to 0x%x)", mcnt, p + mcnt);
4718#endif
4719
4720          /* If this on_failure_jump comes right before a group (i.e.,
4721             the original * applied to a group), save the information
4722             for that group and all inner ones, so that if we fail back
4723             to this point, the group's information will be correct.
4724             For example, in \(a*\)*\1, we need the preceding group,
4725             and in \(zz\(a*\)b*\)\2, we need the inner group.  */
4726
4727          /* We can't use `p' to check ahead because we push
4728             a failure point to `p + mcnt' after we do this.  */
4729          p1 = p;
4730
4731          /* We need to skip no_op's before we look for the
4732             start_memory in case this on_failure_jump is happening as
4733             the result of a completed succeed_n, as in \(a\)\{1,3\}b\1
4734             against aba.  */
4735          while (p1 < pend && (re_opcode_t) *p1 == no_op)
4736            p1++;
4737
4738          if (p1 < pend && (re_opcode_t) *p1 == start_memory)
4739            {
4740              /* We have a new highest active register now.  This will
4741                 get reset at the start_memory we are about to get to,
4742                 but we will have saved all the registers relevant to
4743                 this repetition op, as described above.  */
4744              highest_active_reg = *(p1 + 1) + *(p1 + 2);
4745              if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
4746                lowest_active_reg = *(p1 + 1);
4747            }
4748
4749          DEBUG_PRINT1 (":\n");
4750          PUSH_FAILURE_POINT (p + mcnt, d, -2);
4751          break;
4752
4753
4754        /* A smart repeat ends with `maybe_pop_jump'.
4755	   We change it to either `pop_failure_jump' or `jump'.  */
4756        case maybe_pop_jump:
4757          EXTRACT_NUMBER_AND_INCR (mcnt, p);
4758          DEBUG_PRINT2 ("EXECUTING maybe_pop_jump %d.\n", mcnt);
4759          {
4760	    register unsigned char *p2 = p;
4761
4762            /* Compare the beginning of the repeat with what in the
4763               pattern follows its end. If we can establish that there
4764               is nothing that they would both match, i.e., that we
4765               would have to backtrack because of (as in, e.g., `a*a')
4766               then we can change to pop_failure_jump, because we'll
4767               never have to backtrack.
4768
4769               This is not true in the case of alternatives: in
4770               `(a|ab)*' we do need to backtrack to the `ab' alternative
4771               (e.g., if the string was `ab').  But instead of trying to
4772               detect that here, the alternative has put on a dummy
4773               failure point which is what we will end up popping.  */
4774
4775	    /* Skip over open/close-group commands.
4776	       If what follows this loop is a ...+ construct,
4777	       look at what begins its body, since we will have to
4778	       match at least one of that.  */
4779	    while (1)
4780	      {
4781		if (p2 + 2 < pend
4782		    && ((re_opcode_t) *p2 == stop_memory
4783			|| (re_opcode_t) *p2 == start_memory))
4784		  p2 += 3;
4785		else if (p2 + 6 < pend
4786			 && (re_opcode_t) *p2 == dummy_failure_jump)
4787		  p2 += 6;
4788		else
4789		  break;
4790	      }
4791
4792	    p1 = p + mcnt;
4793	    /* p1[0] ... p1[2] are the `on_failure_jump' corresponding
4794	       to the `maybe_finalize_jump' of this case.  Examine what
4795	       follows.  */
4796
4797            /* If we're at the end of the pattern, we can change.  */
4798            if (p2 == pend)
4799	      {
4800		/* Consider what happens when matching ":\(.*\)"
4801		   against ":/".  I don't really understand this code
4802		   yet.  */
4803  	        p[-3] = (unsigned char) pop_failure_jump;
4804                DEBUG_PRINT1
4805                  ("  End of pattern: change to `pop_failure_jump'.\n");
4806              }
4807
4808            else if ((re_opcode_t) *p2 == exactn
4809		     || (bufp->newline_anchor && (re_opcode_t) *p2 == endline))
4810	      {
4811		register unsigned char c
4812                  = *p2 == (unsigned char) endline ? '\n' : p2[2];
4813
4814                if ((re_opcode_t) p1[3] == exactn && p1[5] != c)
4815                  {
4816  		    p[-3] = (unsigned char) pop_failure_jump;
4817                    DEBUG_PRINT3 ("  %c != %c => pop_failure_jump.\n",
4818                                  c, p1[5]);
4819                  }
4820
4821		else if ((re_opcode_t) p1[3] == charset
4822			 || (re_opcode_t) p1[3] == charset_not)
4823		  {
4824		    int not = (re_opcode_t) p1[3] == charset_not;
4825
4826		    if (c < (unsigned char) (p1[4] * BYTEWIDTH)
4827			&& p1[5 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
4828		      not = !not;
4829
4830                    /* `not' is equal to 1 if c would match, which means
4831                        that we can't change to pop_failure_jump.  */
4832		    if (!not)
4833                      {
4834  		        p[-3] = (unsigned char) pop_failure_jump;
4835                        DEBUG_PRINT1 ("  No match => pop_failure_jump.\n");
4836                      }
4837		  }
4838	      }
4839            else if ((re_opcode_t) *p2 == charset)
4840	      {
4841		/* We win if the first character of the loop is not part
4842                   of the charset.  */
4843                if ((re_opcode_t) p1[3] == exactn
4844 		    && ! ((int) p2[1] * BYTEWIDTH > (int) p1[5]
4845 			  && (p2[2 + p1[5] / BYTEWIDTH]
4846 			      & (1 << (p1[5] % BYTEWIDTH)))))
4847		  {
4848		    p[-3] = (unsigned char) pop_failure_jump;
4849		    DEBUG_PRINT1 ("  No match => pop_failure_jump.\n");
4850                  }
4851
4852		else if ((re_opcode_t) p1[3] == charset_not)
4853		  {
4854		    int idx;
4855		    /* We win if the charset_not inside the loop
4856		       lists every character listed in the charset after.  */
4857		    for (idx = 0; idx < (int) p2[1]; idx++)
4858		      if (! (p2[2 + idx] == 0
4859			     || (idx < (int) p1[4]
4860				 && ((p2[2 + idx] & ~ p1[5 + idx]) == 0))))
4861			break;
4862
4863		    if (idx == p2[1])
4864                      {
4865  		        p[-3] = (unsigned char) pop_failure_jump;
4866                        DEBUG_PRINT1 ("  No match => pop_failure_jump.\n");
4867                      }
4868		  }
4869		else if ((re_opcode_t) p1[3] == charset)
4870		  {
4871		    int idx;
4872		    /* We win if the charset inside the loop
4873		       has no overlap with the one after the loop.  */
4874		    for (idx = 0;
4875			 idx < (int) p2[1] && idx < (int) p1[4];
4876			 idx++)
4877		      if ((p2[2 + idx] & p1[5 + idx]) != 0)
4878			break;
4879
4880		    if (idx == p2[1] || idx == p1[4])
4881                      {
4882  		        p[-3] = (unsigned char) pop_failure_jump;
4883                        DEBUG_PRINT1 ("  No match => pop_failure_jump.\n");
4884                      }
4885		  }
4886	      }
4887	  }
4888	  p -= 2;		/* Point at relative address again.  */
4889	  if ((re_opcode_t) p[-1] != pop_failure_jump)
4890	    {
4891	      p[-1] = (unsigned char) jump;
4892              DEBUG_PRINT1 ("  Match => jump.\n");
4893	      goto unconditional_jump;
4894	    }
4895        /* Note fall through.  */
4896
4897
4898	/* The end of a simple repeat has a pop_failure_jump back to
4899           its matching on_failure_jump, where the latter will push a
4900           failure point.  The pop_failure_jump takes off failure
4901           points put on by this pop_failure_jump's matching
4902           on_failure_jump; we got through the pattern to here from the
4903           matching on_failure_jump, so didn't fail.  */
4904        case pop_failure_jump:
4905          {
4906            /* We need to pass separate storage for the lowest and
4907               highest registers, even though we don't care about the
4908               actual values.  Otherwise, we will restore only one
4909               register from the stack, since lowest will == highest in
4910               `pop_failure_point'.  */
4911            active_reg_t dummy_low_reg, dummy_high_reg;
4912            unsigned char *pdummy;
4913            const char *sdummy;
4914
4915            DEBUG_PRINT1 ("EXECUTING pop_failure_jump.\n");
4916            POP_FAILURE_POINT (sdummy, pdummy,
4917                               dummy_low_reg, dummy_high_reg,
4918                               reg_dummy, reg_dummy, reg_info_dummy);
4919          }
4920	  /* Note fall through.  */
4921
4922	unconditional_jump:
4923#ifdef _LIBC
4924	  DEBUG_PRINT2 ("\n%p: ", p);
4925#else
4926	  DEBUG_PRINT2 ("\n0x%x: ", p);
4927#endif
4928          /* Note fall through.  */
4929
4930        /* Unconditionally jump (without popping any failure points).  */
4931        case jump:
4932	  EXTRACT_NUMBER_AND_INCR (mcnt, p);	/* Get the amount to jump.  */
4933          DEBUG_PRINT2 ("EXECUTING jump %d ", mcnt);
4934	  p += mcnt;				/* Do the jump.  */
4935#ifdef _LIBC
4936          DEBUG_PRINT2 ("(to %p).\n", p);
4937#else
4938          DEBUG_PRINT2 ("(to 0x%x).\n", p);
4939#endif
4940	  break;
4941
4942
4943        /* We need this opcode so we can detect where alternatives end
4944           in `group_match_null_string_p' et al.  */
4945        case jump_past_alt:
4946          DEBUG_PRINT1 ("EXECUTING jump_past_alt.\n");
4947          goto unconditional_jump;
4948
4949
4950        /* Normally, the on_failure_jump pushes a failure point, which
4951           then gets popped at pop_failure_jump.  We will end up at
4952           pop_failure_jump, also, and with a pattern of, say, `a+', we
4953           are skipping over the on_failure_jump, so we have to push
4954           something meaningless for pop_failure_jump to pop.  */
4955        case dummy_failure_jump:
4956          DEBUG_PRINT1 ("EXECUTING dummy_failure_jump.\n");
4957          /* It doesn't matter what we push for the string here.  What
4958             the code at `fail' tests is the value for the pattern.  */
4959          PUSH_FAILURE_POINT (NULL, NULL, -2);
4960          goto unconditional_jump;
4961
4962
4963        /* At the end of an alternative, we need to push a dummy failure
4964           point in case we are followed by a `pop_failure_jump', because
4965           we don't want the failure point for the alternative to be
4966           popped.  For example, matching `(a|ab)*' against `aab'
4967           requires that we match the `ab' alternative.  */
4968        case push_dummy_failure:
4969          DEBUG_PRINT1 ("EXECUTING push_dummy_failure.\n");
4970          /* See comments just above at `dummy_failure_jump' about the
4971             two zeroes.  */
4972          PUSH_FAILURE_POINT (NULL, NULL, -2);
4973          break;
4974
4975        /* Have to succeed matching what follows at least n times.
4976           After that, handle like `on_failure_jump'.  */
4977        case succeed_n:
4978          EXTRACT_NUMBER (mcnt, p + 2);
4979          DEBUG_PRINT2 ("EXECUTING succeed_n %d.\n", mcnt);
4980
4981          assert (mcnt >= 0);
4982          /* Originally, this is how many times we HAVE to succeed.  */
4983          if (mcnt > 0)
4984            {
4985               mcnt--;
4986	       p += 2;
4987               STORE_NUMBER_AND_INCR (p, mcnt);
4988#ifdef _LIBC
4989               DEBUG_PRINT3 ("  Setting %p to %d.\n", p - 2, mcnt);
4990#else
4991               DEBUG_PRINT3 ("  Setting 0x%x to %d.\n", p - 2, mcnt);
4992#endif
4993            }
4994	  else if (mcnt == 0)
4995            {
4996#ifdef _LIBC
4997              DEBUG_PRINT2 ("  Setting two bytes from %p to no_op.\n", p+2);
4998#else
4999              DEBUG_PRINT2 ("  Setting two bytes from 0x%x to no_op.\n", p+2);
5000#endif
5001	      p[2] = (unsigned char) no_op;
5002              p[3] = (unsigned char) no_op;
5003              goto on_failure;
5004            }
5005          break;
5006
5007        case jump_n:
5008          EXTRACT_NUMBER (mcnt, p + 2);
5009          DEBUG_PRINT2 ("EXECUTING jump_n %d.\n", mcnt);
5010
5011          /* Originally, this is how many times we CAN jump.  */
5012          if (mcnt)
5013            {
5014               mcnt--;
5015               STORE_NUMBER (p + 2, mcnt);
5016#ifdef _LIBC
5017               DEBUG_PRINT3 ("  Setting %p to %d.\n", p + 2, mcnt);
5018#else
5019               DEBUG_PRINT3 ("  Setting 0x%x to %d.\n", p + 2, mcnt);
5020#endif
5021	       goto unconditional_jump;
5022            }
5023          /* If don't have to jump any more, skip over the rest of command.  */
5024	  else
5025	    p += 4;
5026          break;
5027
5028	case set_number_at:
5029	  {
5030            DEBUG_PRINT1 ("EXECUTING set_number_at.\n");
5031
5032            EXTRACT_NUMBER_AND_INCR (mcnt, p);
5033            p1 = p + mcnt;
5034            EXTRACT_NUMBER_AND_INCR (mcnt, p);
5035#ifdef _LIBC
5036            DEBUG_PRINT3 ("  Setting %p to %d.\n", p1, mcnt);
5037#else
5038            DEBUG_PRINT3 ("  Setting 0x%x to %d.\n", p1, mcnt);
5039#endif
5040	    STORE_NUMBER (p1, mcnt);
5041            break;
5042          }
5043
5044#if 0
5045	/* The DEC Alpha C compiler 3.x generates incorrect code for the
5046	   test  WORDCHAR_P (d - 1) != WORDCHAR_P (d)  in the expansion of
5047	   AT_WORD_BOUNDARY, so this code is disabled.  Expanding the
5048	   macro and introducing temporary variables works around the bug.  */
5049
5050	case wordbound:
5051	  DEBUG_PRINT1 ("EXECUTING wordbound.\n");
5052	  if (AT_WORD_BOUNDARY (d))
5053	    break;
5054	  goto fail;
5055
5056	case notwordbound:
5057	  DEBUG_PRINT1 ("EXECUTING notwordbound.\n");
5058	  if (AT_WORD_BOUNDARY (d))
5059	    goto fail;
5060	  break;
5061#else
5062	case wordbound:
5063	{
5064	  boolean prevchar, thischar;
5065
5066	  DEBUG_PRINT1 ("EXECUTING wordbound.\n");
5067	  if (AT_STRINGS_BEG (d) || AT_STRINGS_END (d))
5068	    break;
5069
5070	  prevchar = WORDCHAR_P (d - 1);
5071	  thischar = WORDCHAR_P (d);
5072	  if (prevchar != thischar)
5073	    break;
5074	  goto fail;
5075	}
5076
5077      case notwordbound:
5078	{
5079	  boolean prevchar, thischar;
5080
5081	  DEBUG_PRINT1 ("EXECUTING notwordbound.\n");
5082	  if (AT_STRINGS_BEG (d) || AT_STRINGS_END (d))
5083	    goto fail;
5084
5085	  prevchar = WORDCHAR_P (d - 1);
5086	  thischar = WORDCHAR_P (d);
5087	  if (prevchar != thischar)
5088	    goto fail;
5089	  break;
5090	}
5091#endif
5092
5093	case wordbeg:
5094          DEBUG_PRINT1 ("EXECUTING wordbeg.\n");
5095	  if (WORDCHAR_P (d) && (AT_STRINGS_BEG (d) || !WORDCHAR_P (d - 1)))
5096	    break;
5097          goto fail;
5098
5099	case wordend:
5100          DEBUG_PRINT1 ("EXECUTING wordend.\n");
5101	  if (!AT_STRINGS_BEG (d) && WORDCHAR_P (d - 1)
5102              && (!WORDCHAR_P (d) || AT_STRINGS_END (d)))
5103	    break;
5104          goto fail;
5105
5106#ifdef emacs
5107  	case before_dot:
5108          DEBUG_PRINT1 ("EXECUTING before_dot.\n");
5109 	  if (PTR_CHAR_POS ((unsigned char *) d) >= point)
5110  	    goto fail;
5111  	  break;
5112
5113  	case at_dot:
5114          DEBUG_PRINT1 ("EXECUTING at_dot.\n");
5115 	  if (PTR_CHAR_POS ((unsigned char *) d) != point)
5116  	    goto fail;
5117  	  break;
5118
5119  	case after_dot:
5120          DEBUG_PRINT1 ("EXECUTING after_dot.\n");
5121          if (PTR_CHAR_POS ((unsigned char *) d) <= point)
5122  	    goto fail;
5123  	  break;
5124
5125	case syntaxspec:
5126          DEBUG_PRINT2 ("EXECUTING syntaxspec %d.\n", mcnt);
5127	  mcnt = *p++;
5128	  goto matchsyntax;
5129
5130        case wordchar:
5131          DEBUG_PRINT1 ("EXECUTING Emacs wordchar.\n");
5132	  mcnt = (int) Sword;
5133        matchsyntax:
5134	  PREFETCH ();
5135	  /* Can't use *d++ here; SYNTAX may be an unsafe macro.  */
5136	  d++;
5137	  if (SYNTAX (d[-1]) != (enum syntaxcode) mcnt)
5138	    goto fail;
5139          SET_REGS_MATCHED ();
5140	  break;
5141
5142	case notsyntaxspec:
5143          DEBUG_PRINT2 ("EXECUTING notsyntaxspec %d.\n", mcnt);
5144	  mcnt = *p++;
5145	  goto matchnotsyntax;
5146
5147        case notwordchar:
5148          DEBUG_PRINT1 ("EXECUTING Emacs notwordchar.\n");
5149	  mcnt = (int) Sword;
5150        matchnotsyntax:
5151	  PREFETCH ();
5152	  /* Can't use *d++ here; SYNTAX may be an unsafe macro.  */
5153	  d++;
5154	  if (SYNTAX (d[-1]) == (enum syntaxcode) mcnt)
5155	    goto fail;
5156	  SET_REGS_MATCHED ();
5157          break;
5158
5159#else /* not emacs */
5160	case wordchar:
5161          DEBUG_PRINT1 ("EXECUTING non-Emacs wordchar.\n");
5162	  PREFETCH ();
5163          if (!WORDCHAR_P (d))
5164            goto fail;
5165	  SET_REGS_MATCHED ();
5166          d++;
5167	  break;
5168
5169	case notwordchar:
5170          DEBUG_PRINT1 ("EXECUTING non-Emacs notwordchar.\n");
5171	  PREFETCH ();
5172	  if (WORDCHAR_P (d))
5173            goto fail;
5174          SET_REGS_MATCHED ();
5175          d++;
5176	  break;
5177#endif /* not emacs */
5178
5179        default:
5180          abort ();
5181	}
5182      continue;  /* Successfully executed one pattern command; keep going.  */
5183
5184
5185    /* We goto here if a matching operation fails. */
5186    fail:
5187      if (!FAIL_STACK_EMPTY ())
5188	{ /* A restart point is known.  Restore to that state.  */
5189          DEBUG_PRINT1 ("\nFAIL:\n");
5190          POP_FAILURE_POINT (d, p,
5191                             lowest_active_reg, highest_active_reg,
5192                             regstart, regend, reg_info);
5193
5194          /* If this failure point is a dummy, try the next one.  */
5195          if (!p)
5196	    goto fail;
5197
5198          /* If we failed to the end of the pattern, don't examine *p.  */
5199	  assert (p <= pend);
5200          if (p < pend)
5201            {
5202              boolean is_a_jump_n = false;
5203
5204              /* If failed to a backwards jump that's part of a repetition
5205                 loop, need to pop this failure point and use the next one.  */
5206              switch ((re_opcode_t) *p)
5207                {
5208                case jump_n:
5209                  is_a_jump_n = true;
5210                case maybe_pop_jump:
5211                case pop_failure_jump:
5212                case jump:
5213                  p1 = p + 1;
5214                  EXTRACT_NUMBER_AND_INCR (mcnt, p1);
5215                  p1 += mcnt;
5216
5217                  if ((is_a_jump_n && (re_opcode_t) *p1 == succeed_n)
5218                      || (!is_a_jump_n
5219                          && (re_opcode_t) *p1 == on_failure_jump))
5220                    goto fail;
5221                  break;
5222                default:
5223                  /* do nothing */ ;
5224                }
5225            }
5226
5227          if (d >= string1 && d <= end1)
5228	    dend = end_match_1;
5229        }
5230      else
5231        break;   /* Matching at this starting point really fails.  */
5232    } /* for (;;) */
5233
5234  if (best_regs_set)
5235    goto restore_best_regs;
5236
5237  FREE_VARIABLES ();
5238
5239  return -1;         			/* Failure to match.  */
5240} /* re_match_2 */
5241
5242/* Subroutine definitions for re_match_2.  */
5243
5244
5245/* We are passed P pointing to a register number after a start_memory.
5246
5247   Return true if the pattern up to the corresponding stop_memory can
5248   match the empty string, and false otherwise.
5249
5250   If we find the matching stop_memory, sets P to point to one past its number.
5251   Otherwise, sets P to an undefined byte less than or equal to END.
5252
5253   We don't handle duplicates properly (yet).  */
5254
5255static boolean
5256group_match_null_string_p (p, end, reg_info)
5257    unsigned char **p, *end;
5258    register_info_type *reg_info;
5259{
5260  int mcnt;
5261  /* Point to after the args to the start_memory.  */
5262  unsigned char *p1 = *p + 2;
5263
5264  while (p1 < end)
5265    {
5266      /* Skip over opcodes that can match nothing, and return true or
5267	 false, as appropriate, when we get to one that can't, or to the
5268         matching stop_memory.  */
5269
5270      switch ((re_opcode_t) *p1)
5271        {
5272        /* Could be either a loop or a series of alternatives.  */
5273        case on_failure_jump:
5274          p1++;
5275          EXTRACT_NUMBER_AND_INCR (mcnt, p1);
5276
5277          /* If the next operation is not a jump backwards in the
5278	     pattern.  */
5279
5280	  if (mcnt >= 0)
5281	    {
5282              /* Go through the on_failure_jumps of the alternatives,
5283                 seeing if any of the alternatives cannot match nothing.
5284                 The last alternative starts with only a jump,
5285                 whereas the rest start with on_failure_jump and end
5286                 with a jump, e.g., here is the pattern for `a|b|c':
5287
5288                 /on_failure_jump/0/6/exactn/1/a/jump_past_alt/0/6
5289                 /on_failure_jump/0/6/exactn/1/b/jump_past_alt/0/3
5290                 /exactn/1/c
5291
5292                 So, we have to first go through the first (n-1)
5293                 alternatives and then deal with the last one separately.  */
5294
5295
5296              /* Deal with the first (n-1) alternatives, which start
5297                 with an on_failure_jump (see above) that jumps to right
5298                 past a jump_past_alt.  */
5299
5300              while ((re_opcode_t) p1[mcnt-3] == jump_past_alt)
5301                {
5302                  /* `mcnt' holds how many bytes long the alternative
5303                     is, including the ending `jump_past_alt' and
5304                     its number.  */
5305
5306                  if (!alt_match_null_string_p (p1, p1 + mcnt - 3,
5307				                      reg_info))
5308                    return false;
5309
5310                  /* Move to right after this alternative, including the
5311		     jump_past_alt.  */
5312                  p1 += mcnt;
5313
5314                  /* Break if it's the beginning of an n-th alternative
5315                     that doesn't begin with an on_failure_jump.  */
5316                  if ((re_opcode_t) *p1 != on_failure_jump)
5317                    break;
5318
5319		  /* Still have to check that it's not an n-th
5320		     alternative that starts with an on_failure_jump.  */
5321		  p1++;
5322                  EXTRACT_NUMBER_AND_INCR (mcnt, p1);
5323                  if ((re_opcode_t) p1[mcnt-3] != jump_past_alt)
5324                    {
5325		      /* Get to the beginning of the n-th alternative.  */
5326                      p1 -= 3;
5327                      break;
5328                    }
5329                }
5330
5331              /* Deal with the last alternative: go back and get number
5332                 of the `jump_past_alt' just before it.  `mcnt' contains
5333                 the length of the alternative.  */
5334              EXTRACT_NUMBER (mcnt, p1 - 2);
5335
5336              if (!alt_match_null_string_p (p1, p1 + mcnt, reg_info))
5337                return false;
5338
5339              p1 += mcnt;	/* Get past the n-th alternative.  */
5340            } /* if mcnt > 0 */
5341          break;
5342
5343
5344        case stop_memory:
5345	  assert (p1[1] == **p);
5346          *p = p1 + 2;
5347          return true;
5348
5349
5350        default:
5351          if (!common_op_match_null_string_p (&p1, end, reg_info))
5352            return false;
5353        }
5354    } /* while p1 < end */
5355
5356  return false;
5357} /* group_match_null_string_p */
5358
5359
5360/* Similar to group_match_null_string_p, but doesn't deal with alternatives:
5361   It expects P to be the first byte of a single alternative and END one
5362   byte past the last. The alternative can contain groups.  */
5363
5364static boolean
5365alt_match_null_string_p (p, end, reg_info)
5366    unsigned char *p, *end;
5367    register_info_type *reg_info;
5368{
5369  int mcnt;
5370  unsigned char *p1 = p;
5371
5372  while (p1 < end)
5373    {
5374      /* Skip over opcodes that can match nothing, and break when we get
5375         to one that can't.  */
5376
5377      switch ((re_opcode_t) *p1)
5378        {
5379	/* It's a loop.  */
5380        case on_failure_jump:
5381          p1++;
5382          EXTRACT_NUMBER_AND_INCR (mcnt, p1);
5383          p1 += mcnt;
5384          break;
5385
5386	default:
5387          if (!common_op_match_null_string_p (&p1, end, reg_info))
5388            return false;
5389        }
5390    }  /* while p1 < end */
5391
5392  return true;
5393} /* alt_match_null_string_p */
5394
5395
5396/* Deals with the ops common to group_match_null_string_p and
5397   alt_match_null_string_p.
5398
5399   Sets P to one after the op and its arguments, if any.  */
5400
5401static boolean
5402common_op_match_null_string_p (p, end, reg_info)
5403    unsigned char **p, *end;
5404    register_info_type *reg_info;
5405{
5406  int mcnt;
5407  boolean ret;
5408  int reg_no;
5409  unsigned char *p1 = *p;
5410
5411  switch ((re_opcode_t) *p1++)
5412    {
5413    case no_op:
5414    case begline:
5415    case endline:
5416    case begbuf:
5417    case endbuf:
5418    case wordbeg:
5419    case wordend:
5420    case wordbound:
5421    case notwordbound:
5422#ifdef emacs
5423    case before_dot:
5424    case at_dot:
5425    case after_dot:
5426#endif
5427      break;
5428
5429    case start_memory:
5430      reg_no = *p1;
5431      assert (reg_no > 0 && reg_no <= MAX_REGNUM);
5432      ret = group_match_null_string_p (&p1, end, reg_info);
5433
5434      /* Have to set this here in case we're checking a group which
5435         contains a group and a back reference to it.  */
5436
5437      if (REG_MATCH_NULL_STRING_P (reg_info[reg_no]) == MATCH_NULL_UNSET_VALUE)
5438        REG_MATCH_NULL_STRING_P (reg_info[reg_no]) = ret;
5439
5440      if (!ret)
5441        return false;
5442      break;
5443
5444    /* If this is an optimized succeed_n for zero times, make the jump.  */
5445    case jump:
5446      EXTRACT_NUMBER_AND_INCR (mcnt, p1);
5447      if (mcnt >= 0)
5448        p1 += mcnt;
5449      else
5450        return false;
5451      break;
5452
5453    case succeed_n:
5454      /* Get to the number of times to succeed.  */
5455      p1 += 2;
5456      EXTRACT_NUMBER_AND_INCR (mcnt, p1);
5457
5458      if (mcnt == 0)
5459        {
5460          p1 -= 4;
5461          EXTRACT_NUMBER_AND_INCR (mcnt, p1);
5462          p1 += mcnt;
5463        }
5464      else
5465        return false;
5466      break;
5467
5468    case duplicate:
5469      if (!REG_MATCH_NULL_STRING_P (reg_info[*p1]))
5470        return false;
5471      break;
5472
5473    case set_number_at:
5474      p1 += 4;
5475
5476    default:
5477      /* All other opcodes mean we cannot match the empty string.  */
5478      return false;
5479  }
5480
5481  *p = p1;
5482  return true;
5483} /* common_op_match_null_string_p */
5484
5485
5486/* Return zero if TRANSLATE[S1] and TRANSLATE[S2] are identical for LEN
5487   bytes; nonzero otherwise.  */
5488
5489static int
5490bcmp_translate (s1, s2, len, translate)
5491     const char *s1, *s2;
5492     register int len;
5493     RE_TRANSLATE_TYPE translate;
5494{
5495  register const unsigned char *p1 = (const unsigned char *) s1;
5496  register const unsigned char *p2 = (const unsigned char *) s2;
5497  while (len)
5498    {
5499      if (translate[*p1++] != translate[*p2++]) return 1;
5500      len--;
5501    }
5502  return 0;
5503}
5504
5505/* Entry points for GNU code.  */
5506
5507/* re_compile_pattern is the GNU regular expression compiler: it
5508   compiles PATTERN (of length SIZE) and puts the result in BUFP.
5509   Returns 0 if the pattern was valid, otherwise an error string.
5510
5511   Assumes the `allocated' (and perhaps `buffer') and `translate' fields
5512   are set in BUFP on entry.
5513
5514   We call regex_compile to do the actual compilation.  */
5515
5516const char *
5517re_compile_pattern (pattern, length, bufp)
5518     const char *pattern;
5519     size_t length;
5520     struct re_pattern_buffer *bufp;
5521{
5522  reg_errcode_t ret;
5523
5524  /* GNU code is written to assume at least RE_NREGS registers will be set
5525     (and at least one extra will be -1).  */
5526  bufp->regs_allocated = REGS_UNALLOCATED;
5527
5528  /* And GNU code determines whether or not to get register information
5529     by passing null for the REGS argument to re_match, etc., not by
5530     setting no_sub.  */
5531  bufp->no_sub = 0;
5532
5533  /* Match anchors at newline.  */
5534  bufp->newline_anchor = 1;
5535
5536  ret = regex_compile (pattern, length, re_syntax_options, bufp);
5537
5538  if (!ret)
5539    return NULL;
5540  return gettext (re_error_msgid + re_error_msgid_idx[(int) ret]);
5541}
5542#ifdef _LIBC
5543weak_alias (__re_compile_pattern, re_compile_pattern)
5544#endif
5545
5546/* Entry points compatible with 4.2 BSD regex library.  We don't define
5547   them unless specifically requested.  */
5548
5549#if defined _REGEX_RE_COMP || defined _LIBC
5550
5551/* BSD has one and only one pattern buffer.  */
5552static struct re_pattern_buffer re_comp_buf;
5553
5554char *
5555#ifdef _LIBC
5556/* Make these definitions weak in libc, so POSIX programs can redefine
5557   these names if they don't use our functions, and still use
5558   regcomp/regexec below without link errors.  */
5559weak_function
5560#endif
5561re_comp (s)
5562    const char *s;
5563{
5564  reg_errcode_t ret;
5565
5566  if (!s)
5567    {
5568      if (!re_comp_buf.buffer)
5569	return gettext ("No previous regular expression");
5570      return 0;
5571    }
5572
5573  if (!re_comp_buf.buffer)
5574    {
5575      re_comp_buf.buffer = (unsigned char *) malloc (200);
5576      if (re_comp_buf.buffer == NULL)
5577        return (char *) gettext (re_error_msgid
5578				 + re_error_msgid_idx[(int) REG_ESPACE]);
5579      re_comp_buf.allocated = 200;
5580
5581      re_comp_buf.fastmap = (char *) malloc (1 << BYTEWIDTH);
5582      if (re_comp_buf.fastmap == NULL)
5583	return (char *) gettext (re_error_msgid
5584				 + re_error_msgid_idx[(int) REG_ESPACE]);
5585    }
5586
5587  /* Since `re_exec' always passes NULL for the `regs' argument, we
5588     don't need to initialize the pattern buffer fields which affect it.  */
5589
5590  /* Match anchors at newlines.  */
5591  re_comp_buf.newline_anchor = 1;
5592
5593  ret = regex_compile (s, strlen (s), re_syntax_options, &re_comp_buf);
5594
5595  if (!ret)
5596    return NULL;
5597
5598  /* Yes, we're discarding `const' here if !HAVE_LIBINTL.  */
5599  return (char *) gettext (re_error_msgid + re_error_msgid_idx[(int) ret]);
5600}
5601
5602
5603int
5604#ifdef _LIBC
5605weak_function
5606#endif
5607re_exec (s)
5608    const char *s;
5609{
5610  const int len = strlen (s);
5611  return
5612    0 <= re_search (&re_comp_buf, s, len, 0, len, (struct re_registers *) 0);
5613}
5614
5615#endif /* _REGEX_RE_COMP */
5616
5617/* POSIX.2 functions.  Don't define these for Emacs.  */
5618
5619#ifndef emacs
5620
5621/* regcomp takes a regular expression as a string and compiles it.
5622
5623   PREG is a regex_t *.  We do not expect any fields to be initialized,
5624   since POSIX says we shouldn't.  Thus, we set
5625
5626     `buffer' to the compiled pattern;
5627     `used' to the length of the compiled pattern;
5628     `syntax' to RE_SYNTAX_POSIX_EXTENDED if the
5629       REG_EXTENDED bit in CFLAGS is set; otherwise, to
5630       RE_SYNTAX_POSIX_BASIC;
5631     `newline_anchor' to REG_NEWLINE being set in CFLAGS;
5632     `fastmap' to an allocated space for the fastmap;
5633     `fastmap_accurate' to zero;
5634     `re_nsub' to the number of subexpressions in PATTERN.
5635
5636   PATTERN is the address of the pattern string.
5637
5638   CFLAGS is a series of bits which affect compilation.
5639
5640     If REG_EXTENDED is set, we use POSIX extended syntax; otherwise, we
5641     use POSIX basic syntax.
5642
5643     If REG_NEWLINE is set, then . and [^...] don't match newline.
5644     Also, regexec will try a match beginning after every newline.
5645
5646     If REG_ICASE is set, then we considers upper- and lowercase
5647     versions of letters to be equivalent when matching.
5648
5649     If REG_NOSUB is set, then when PREG is passed to regexec, that
5650     routine will report only success or failure, and nothing about the
5651     registers.
5652
5653   It returns 0 if it succeeds, nonzero if it doesn't.  (See regex.h for
5654   the return codes and their meanings.)  */
5655
5656int
5657regcomp (preg, pattern, cflags)
5658    regex_t *preg;
5659    const char *pattern;
5660    int cflags;
5661{
5662  reg_errcode_t ret;
5663  reg_syntax_t syntax
5664    = (cflags & REG_EXTENDED) ?
5665      RE_SYNTAX_POSIX_EXTENDED : RE_SYNTAX_POSIX_BASIC;
5666
5667  /* regex_compile will allocate the space for the compiled pattern.  */
5668  preg->buffer = 0;
5669  preg->allocated = 0;
5670  preg->used = 0;
5671
5672  /* Try to allocate space for the fastmap.  */
5673  preg->fastmap = (char *) malloc (1 << BYTEWIDTH);
5674
5675  if (cflags & REG_ICASE)
5676    {
5677      unsigned i;
5678
5679      preg->translate
5680	= (RE_TRANSLATE_TYPE) malloc (CHAR_SET_SIZE
5681				      * sizeof (*(RE_TRANSLATE_TYPE)0));
5682      if (preg->translate == NULL)
5683        return (int) REG_ESPACE;
5684
5685      /* Map uppercase characters to corresponding lowercase ones.  */
5686      for (i = 0; i < CHAR_SET_SIZE; i++)
5687        preg->translate[i] = ISUPPER (i) ? TOLOWER (i) : i;
5688    }
5689  else
5690    preg->translate = NULL;
5691
5692  /* If REG_NEWLINE is set, newlines are treated differently.  */
5693  if (cflags & REG_NEWLINE)
5694    { /* REG_NEWLINE implies neither . nor [^...] match newline.  */
5695      syntax &= ~RE_DOT_NEWLINE;
5696      syntax |= RE_HAT_LISTS_NOT_NEWLINE;
5697      /* It also changes the matching behavior.  */
5698      preg->newline_anchor = 1;
5699    }
5700  else
5701    preg->newline_anchor = 0;
5702
5703  preg->no_sub = !!(cflags & REG_NOSUB);
5704
5705  /* POSIX says a null character in the pattern terminates it, so we
5706     can use strlen here in compiling the pattern.  */
5707  ret = regex_compile (pattern, strlen (pattern), syntax, preg);
5708
5709  /* POSIX doesn't distinguish between an unmatched open-group and an
5710     unmatched close-group: both are REG_EPAREN.  */
5711  if (ret == REG_ERPAREN) ret = REG_EPAREN;
5712
5713  if (ret == REG_NOERROR && preg->fastmap)
5714    {
5715      /* Compute the fastmap now, since regexec cannot modify the pattern
5716	 buffer.  */
5717      if (re_compile_fastmap (preg) == -2)
5718	{
5719	  /* Some error occured while computing the fastmap, just forget
5720	     about it.  */
5721	  free (preg->fastmap);
5722	  preg->fastmap = NULL;
5723	}
5724    }
5725
5726  return (int) ret;
5727}
5728#ifdef _LIBC
5729weak_alias (__regcomp, regcomp)
5730#endif
5731
5732
5733/* regexec searches for a given pattern, specified by PREG, in the
5734   string STRING.
5735
5736   If NMATCH is zero or REG_NOSUB was set in the cflags argument to
5737   `regcomp', we ignore PMATCH.  Otherwise, we assume PMATCH has at
5738   least NMATCH elements, and we set them to the offsets of the
5739   corresponding matched substrings.
5740
5741   EFLAGS specifies `execution flags' which affect matching: if
5742   REG_NOTBOL is set, then ^ does not match at the beginning of the
5743   string; if REG_NOTEOL is set, then $ does not match at the end.
5744
5745   We return 0 if we find a match and REG_NOMATCH if not.  */
5746
5747int
5748regexec (preg, string, nmatch, pmatch, eflags)
5749    const regex_t *preg;
5750    const char *string;
5751    size_t nmatch;
5752    regmatch_t pmatch[];
5753    int eflags;
5754{
5755  int ret;
5756  struct re_registers regs;
5757  regex_t private_preg;
5758  int len = strlen (string);
5759  boolean want_reg_info = !preg->no_sub && nmatch > 0;
5760
5761  private_preg = *preg;
5762
5763  private_preg.not_bol = !!(eflags & REG_NOTBOL);
5764  private_preg.not_eol = !!(eflags & REG_NOTEOL);
5765
5766  /* The user has told us exactly how many registers to return
5767     information about, via `nmatch'.  We have to pass that on to the
5768     matching routines.  */
5769  private_preg.regs_allocated = REGS_FIXED;
5770
5771  if (want_reg_info)
5772    {
5773      regs.num_regs = nmatch;
5774      regs.start = TALLOC (nmatch * 2, regoff_t);
5775      if (regs.start == NULL)
5776        return (int) REG_NOMATCH;
5777      regs.end = regs.start + nmatch;
5778    }
5779
5780  /* Perform the searching operation.  */
5781  ret = re_search (&private_preg, string, len,
5782                   /* start: */ 0, /* range: */ len,
5783                   want_reg_info ? &regs : (struct re_registers *) 0);
5784
5785  /* Copy the register information to the POSIX structure.  */
5786  if (want_reg_info)
5787    {
5788      if (ret >= 0)
5789        {
5790          unsigned r;
5791
5792          for (r = 0; r < nmatch; r++)
5793            {
5794              pmatch[r].rm_so = regs.start[r];
5795              pmatch[r].rm_eo = regs.end[r];
5796            }
5797        }
5798
5799      /* If we needed the temporary register info, free the space now.  */
5800      free (regs.start);
5801    }
5802
5803  /* We want zero return to mean success, unlike `re_search'.  */
5804  return ret >= 0 ? (int) REG_NOERROR : (int) REG_NOMATCH;
5805}
5806#ifdef _LIBC
5807weak_alias (__regexec, regexec)
5808#endif
5809
5810
5811/* Returns a message corresponding to an error code, ERRCODE, returned
5812   from either regcomp or regexec.   We don't use PREG here.  */
5813
5814size_t
5815regerror (errcode, preg, errbuf, errbuf_size)
5816    int errcode;
5817    const regex_t *preg;
5818    char *errbuf;
5819    size_t errbuf_size;
5820{
5821  const char *msg;
5822  size_t msg_size;
5823
5824  if (errcode < 0
5825      || errcode >= (int) (sizeof (re_error_msgid_idx)
5826			   / sizeof (re_error_msgid_idx[0])))
5827    /* Only error codes returned by the rest of the code should be passed
5828       to this routine.  If we are given anything else, or if other regex
5829       code generates an invalid error code, then the program has a bug.
5830       Dump core so we can fix it.  */
5831    abort ();
5832
5833  msg = gettext (re_error_msgid + re_error_msgid_idx[errcode]);
5834
5835  msg_size = strlen (msg) + 1; /* Includes the null.  */
5836
5837  if (errbuf_size != 0)
5838    {
5839      if (msg_size > errbuf_size)
5840        {
5841#if defined HAVE_MEMPCPY || defined _LIBC
5842	  *((char *) __mempcpy (errbuf, msg, errbuf_size - 1)) = '\0';
5843#else
5844          memcpy (errbuf, msg, errbuf_size - 1);
5845          errbuf[errbuf_size - 1] = 0;
5846#endif
5847        }
5848      else
5849        memcpy (errbuf, msg, msg_size);
5850    }
5851
5852  return msg_size;
5853}
5854#ifdef _LIBC
5855weak_alias (__regerror, regerror)
5856#endif
5857
5858
5859/* Free dynamically allocated space used by PREG.  */
5860
5861void
5862regfree (preg)
5863    regex_t *preg;
5864{
5865  if (preg->buffer != NULL)
5866    free (preg->buffer);
5867  preg->buffer = NULL;
5868
5869  preg->allocated = 0;
5870  preg->used = 0;
5871
5872  if (preg->fastmap != NULL)
5873    free (preg->fastmap);
5874  preg->fastmap = NULL;
5875  preg->fastmap_accurate = 0;
5876
5877  if (preg->translate != NULL)
5878    free (preg->translate);
5879  preg->translate = NULL;
5880}
5881#ifdef _LIBC
5882weak_alias (__regfree, regfree)
5883#endif
5884
5885#endif /* not emacs  */
5886