189857Sobrien/* Extended regular expression matching and search library,
289857Sobrien   version 0.12.
389857Sobrien   (Implements POSIX draft P1003.2/D11.2, except for some of the
489857Sobrien   internationalization features.)
5218822Sdim
6218822Sdim   Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
7218822Sdim   2002, 2005 Free Software Foundation, Inc.
889857Sobrien   This file is part of the GNU C Library.
989857Sobrien
1089857Sobrien   The GNU C Library is free software; you can redistribute it and/or
1189857Sobrien   modify it under the terms of the GNU Lesser General Public
1289857Sobrien   License as published by the Free Software Foundation; either
1389857Sobrien   version 2.1 of the License, or (at your option) any later version.
1489857Sobrien
1589857Sobrien   The GNU C Library is distributed in the hope that it will be useful,
1689857Sobrien   but WITHOUT ANY WARRANTY; without even the implied warranty of
1789857Sobrien   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
1889857Sobrien   Lesser General Public License for more details.
1989857Sobrien
2089857Sobrien   You should have received a copy of the GNU Lesser General Public
2189857Sobrien   License along with the GNU C Library; if not, write to the Free
22218822Sdim   Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
23218822Sdim   02110-1301 USA.  */
2489857Sobrien
2589857Sobrien/* This file has been modified for usage in libiberty.  It includes "xregex.h"
2689857Sobrien   instead of <regex.h>.  The "xregex.h" header file renames all external
2789857Sobrien   routines with an "x" prefix so they do not collide with the native regex
2889857Sobrien   routines or with other components regex routines. */
2989857Sobrien/* AIX requires this to be the first thing in the file. */
30130561Sobrien#if defined _AIX && !defined __GNUC__ && !defined REGEX_MALLOC
3189857Sobrien  #pragma alloca
3289857Sobrien#endif
3389857Sobrien
3489857Sobrien#undef	_GNU_SOURCE
3589857Sobrien#define _GNU_SOURCE
3689857Sobrien
37218822Sdim#ifndef INSIDE_RECURSION
38218822Sdim# ifdef HAVE_CONFIG_H
39218822Sdim#  include <config.h>
40218822Sdim# endif
4189857Sobrien#endif
4289857Sobrien
43130561Sobrien#include <ansidecl.h>
44130561Sobrien
4589857Sobrien#ifndef INSIDE_RECURSION
4689857Sobrien
4789857Sobrien# if defined STDC_HEADERS && !defined emacs
4889857Sobrien#  include <stddef.h>
4989857Sobrien# else
5089857Sobrien/* We need this for `regex.h', and perhaps for the Emacs include files.  */
5189857Sobrien#  include <sys/types.h>
5289857Sobrien# endif
5389857Sobrien
5489857Sobrien# define WIDE_CHAR_SUPPORT (HAVE_WCTYPE_H && HAVE_WCHAR_H && HAVE_BTOWC)
5589857Sobrien
5689857Sobrien/* For platform which support the ISO C amendement 1 functionality we
5789857Sobrien   support user defined character classes.  */
5889857Sobrien# if defined _LIBC || WIDE_CHAR_SUPPORT
5989857Sobrien/* Solaris 2.5 has a bug: <wchar.h> must be included before <wctype.h>.  */
6089857Sobrien#  include <wchar.h>
6189857Sobrien#  include <wctype.h>
6289857Sobrien# endif
6389857Sobrien
6489857Sobrien# ifdef _LIBC
6589857Sobrien/* We have to keep the namespace clean.  */
6689857Sobrien#  define regfree(preg) __regfree (preg)
6789857Sobrien#  define regexec(pr, st, nm, pm, ef) __regexec (pr, st, nm, pm, ef)
6889857Sobrien#  define regcomp(preg, pattern, cflags) __regcomp (preg, pattern, cflags)
6989857Sobrien#  define regerror(errcode, preg, errbuf, errbuf_size) \
7089857Sobrien	__regerror(errcode, preg, errbuf, errbuf_size)
7189857Sobrien#  define re_set_registers(bu, re, nu, st, en) \
7289857Sobrien	__re_set_registers (bu, re, nu, st, en)
7389857Sobrien#  define re_match_2(bufp, string1, size1, string2, size2, pos, regs, stop) \
7489857Sobrien	__re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
7589857Sobrien#  define re_match(bufp, string, size, pos, regs) \
7689857Sobrien	__re_match (bufp, string, size, pos, regs)
7789857Sobrien#  define re_search(bufp, string, size, startpos, range, regs) \
7889857Sobrien	__re_search (bufp, string, size, startpos, range, regs)
7989857Sobrien#  define re_compile_pattern(pattern, length, bufp) \
8089857Sobrien	__re_compile_pattern (pattern, length, bufp)
8189857Sobrien#  define re_set_syntax(syntax) __re_set_syntax (syntax)
8289857Sobrien#  define re_search_2(bufp, st1, s1, st2, s2, startpos, range, regs, stop) \
8389857Sobrien	__re_search_2 (bufp, st1, s1, st2, s2, startpos, range, regs, stop)
8489857Sobrien#  define re_compile_fastmap(bufp) __re_compile_fastmap (bufp)
8589857Sobrien
8689857Sobrien#  define btowc __btowc
8789857Sobrien
8889857Sobrien/* We are also using some library internals.  */
8989857Sobrien#  include <locale/localeinfo.h>
9089857Sobrien#  include <locale/elem-hash.h>
9189857Sobrien#  include <langinfo.h>
9289857Sobrien#  include <locale/coll-lookup.h>
9389857Sobrien# endif
9489857Sobrien
9589857Sobrien/* This is for other GNU distributions with internationalized messages.  */
9689857Sobrien# if (HAVE_LIBINTL_H && ENABLE_NLS) || defined _LIBC
9789857Sobrien#  include <libintl.h>
9889857Sobrien#  ifdef _LIBC
9989857Sobrien#   undef gettext
10089857Sobrien#   define gettext(msgid) __dcgettext ("libc", msgid, LC_MESSAGES)
10189857Sobrien#  endif
10289857Sobrien# else
10389857Sobrien#  define gettext(msgid) (msgid)
10489857Sobrien# endif
10589857Sobrien
10689857Sobrien# ifndef gettext_noop
10789857Sobrien/* This define is so xgettext can find the internationalizable
10889857Sobrien   strings.  */
10989857Sobrien#  define gettext_noop(String) String
11089857Sobrien# endif
11189857Sobrien
11289857Sobrien/* The `emacs' switch turns on certain matching commands
11389857Sobrien   that make sense only in Emacs. */
11489857Sobrien# ifdef emacs
11589857Sobrien
11689857Sobrien#  include "lisp.h"
11789857Sobrien#  include "buffer.h"
11889857Sobrien#  include "syntax.h"
11989857Sobrien
12089857Sobrien# else  /* not emacs */
12189857Sobrien
12289857Sobrien/* If we are not linking with Emacs proper,
12389857Sobrien   we can't use the relocating allocator
12489857Sobrien   even if config.h says that we can.  */
12589857Sobrien#  undef REL_ALLOC
12689857Sobrien
12789857Sobrien#  if defined STDC_HEADERS || defined _LIBC
12889857Sobrien#   include <stdlib.h>
12989857Sobrien#  else
13089857Sobrienchar *malloc ();
13189857Sobrienchar *realloc ();
13289857Sobrien#  endif
13389857Sobrien
13489857Sobrien/* When used in Emacs's lib-src, we need to get bzero and bcopy somehow.
13589857Sobrien   If nothing else has been done, use the method below.  */
13689857Sobrien#  ifdef INHIBIT_STRING_HEADER
13789857Sobrien#   if !(defined HAVE_BZERO && defined HAVE_BCOPY)
13889857Sobrien#    if !defined bzero && !defined bcopy
13989857Sobrien#     undef INHIBIT_STRING_HEADER
14089857Sobrien#    endif
14189857Sobrien#   endif
14289857Sobrien#  endif
14389857Sobrien
14489857Sobrien/* This is the normal way of making sure we have a bcopy and a bzero.
14589857Sobrien   This is used in most programs--a few other programs avoid this
14689857Sobrien   by defining INHIBIT_STRING_HEADER.  */
14789857Sobrien#  ifndef INHIBIT_STRING_HEADER
14889857Sobrien#   if defined HAVE_STRING_H || defined STDC_HEADERS || defined _LIBC
14989857Sobrien#    include <string.h>
15089857Sobrien#    ifndef bzero
15189857Sobrien#     ifndef _LIBC
15289857Sobrien#      define bzero(s, n)	(memset (s, '\0', n), (s))
15389857Sobrien#     else
15489857Sobrien#      define bzero(s, n)	__bzero (s, n)
15589857Sobrien#     endif
15689857Sobrien#    endif
15789857Sobrien#   else
15889857Sobrien#    include <strings.h>
15989857Sobrien#    ifndef memcmp
16089857Sobrien#     define memcmp(s1, s2, n)	bcmp (s1, s2, n)
16189857Sobrien#    endif
16289857Sobrien#    ifndef memcpy
16389857Sobrien#     define memcpy(d, s, n)	(bcopy (s, d, n), (d))
16489857Sobrien#    endif
16589857Sobrien#   endif
16689857Sobrien#  endif
16789857Sobrien
16889857Sobrien/* Define the syntax stuff for \<, \>, etc.  */
16989857Sobrien
17089857Sobrien/* This must be nonzero for the wordchar and notwordchar pattern
17189857Sobrien   commands in re_match_2.  */
17289857Sobrien#  ifndef Sword
17389857Sobrien#   define Sword 1
17489857Sobrien#  endif
17589857Sobrien
17689857Sobrien#  ifdef SWITCH_ENUM_BUG
17789857Sobrien#   define SWITCH_ENUM_CAST(x) ((int)(x))
17889857Sobrien#  else
17989857Sobrien#   define SWITCH_ENUM_CAST(x) (x)
18089857Sobrien#  endif
18189857Sobrien
18289857Sobrien# endif /* not emacs */
18389857Sobrien
18489857Sobrien# if defined _LIBC || HAVE_LIMITS_H
18589857Sobrien#  include <limits.h>
18689857Sobrien# endif
18789857Sobrien
18889857Sobrien# ifndef MB_LEN_MAX
18989857Sobrien#  define MB_LEN_MAX 1
19089857Sobrien# endif
19189857Sobrien
19289857Sobrien/* Get the interface, including the syntax bits.  */
19389857Sobrien# include "xregex.h"  /* change for libiberty */
19489857Sobrien
19589857Sobrien/* isalpha etc. are used for the character classes.  */
19689857Sobrien# include <ctype.h>
19789857Sobrien
19889857Sobrien/* Jim Meyering writes:
19989857Sobrien
20089857Sobrien   "... Some ctype macros are valid only for character codes that
20189857Sobrien   isascii says are ASCII (SGI's IRIX-4.0.5 is one such system --when
20289857Sobrien   using /bin/cc or gcc but without giving an ansi option).  So, all
20389857Sobrien   ctype uses should be through macros like ISPRINT...  If
20489857Sobrien   STDC_HEADERS is defined, then autoconf has verified that the ctype
20589857Sobrien   macros don't need to be guarded with references to isascii. ...
20689857Sobrien   Defining isascii to 1 should let any compiler worth its salt
20789857Sobrien   eliminate the && through constant folding."
20889857Sobrien   Solaris defines some of these symbols so we must undefine them first.  */
20989857Sobrien
21089857Sobrien# undef ISASCII
21189857Sobrien# if defined STDC_HEADERS || (!defined isascii && !defined HAVE_ISASCII)
21289857Sobrien#  define ISASCII(c) 1
21389857Sobrien# else
21489857Sobrien#  define ISASCII(c) isascii(c)
21589857Sobrien# endif
21689857Sobrien
21789857Sobrien# ifdef isblank
21889857Sobrien#  define ISBLANK(c) (ISASCII (c) && isblank (c))
21989857Sobrien# else
22089857Sobrien#  define ISBLANK(c) ((c) == ' ' || (c) == '\t')
22189857Sobrien# endif
22289857Sobrien# ifdef isgraph
22389857Sobrien#  define ISGRAPH(c) (ISASCII (c) && isgraph (c))
22489857Sobrien# else
22589857Sobrien#  define ISGRAPH(c) (ISASCII (c) && isprint (c) && !isspace (c))
22689857Sobrien# endif
22789857Sobrien
22889857Sobrien# undef ISPRINT
22989857Sobrien# define ISPRINT(c) (ISASCII (c) && isprint (c))
23089857Sobrien# define ISDIGIT(c) (ISASCII (c) && isdigit (c))
23189857Sobrien# define ISALNUM(c) (ISASCII (c) && isalnum (c))
23289857Sobrien# define ISALPHA(c) (ISASCII (c) && isalpha (c))
23389857Sobrien# define ISCNTRL(c) (ISASCII (c) && iscntrl (c))
23489857Sobrien# define ISLOWER(c) (ISASCII (c) && islower (c))
23589857Sobrien# define ISPUNCT(c) (ISASCII (c) && ispunct (c))
23689857Sobrien# define ISSPACE(c) (ISASCII (c) && isspace (c))
23789857Sobrien# define ISUPPER(c) (ISASCII (c) && isupper (c))
23889857Sobrien# define ISXDIGIT(c) (ISASCII (c) && isxdigit (c))
23989857Sobrien
24089857Sobrien# ifdef _tolower
24189857Sobrien#  define TOLOWER(c) _tolower(c)
24289857Sobrien# else
24389857Sobrien#  define TOLOWER(c) tolower(c)
24489857Sobrien# endif
24589857Sobrien
24689857Sobrien# ifndef NULL
24789857Sobrien#  define NULL (void *)0
24889857Sobrien# endif
24989857Sobrien
25089857Sobrien/* We remove any previous definition of `SIGN_EXTEND_CHAR',
25189857Sobrien   since ours (we hope) works properly with all combinations of
25289857Sobrien   machines, compilers, `char' and `unsigned char' argument types.
25389857Sobrien   (Per Bothner suggested the basic approach.)  */
25489857Sobrien# undef SIGN_EXTEND_CHAR
25589857Sobrien# if __STDC__
25689857Sobrien#  define SIGN_EXTEND_CHAR(c) ((signed char) (c))
25789857Sobrien# else  /* not __STDC__ */
25889857Sobrien/* As in Harbison and Steele.  */
25989857Sobrien#  define SIGN_EXTEND_CHAR(c) ((((unsigned char) (c)) ^ 128) - 128)
26089857Sobrien# endif
26189857Sobrien
26289857Sobrien# ifndef emacs
26389857Sobrien/* How many characters in the character set.  */
26489857Sobrien#  define CHAR_SET_SIZE 256
26589857Sobrien
26689857Sobrien#  ifdef SYNTAX_TABLE
26789857Sobrien
26889857Sobrienextern char *re_syntax_table;
26989857Sobrien
27089857Sobrien#  else /* not SYNTAX_TABLE */
27189857Sobrien
27289857Sobrienstatic char re_syntax_table[CHAR_SET_SIZE];
27389857Sobrien
274218822Sdimstatic void init_syntax_once (void);
27589857Sobrien
27689857Sobrienstatic void
277218822Sdiminit_syntax_once (void)
27889857Sobrien{
27989857Sobrien   register int c;
28089857Sobrien   static int done = 0;
28189857Sobrien
28289857Sobrien   if (done)
28389857Sobrien     return;
28489857Sobrien   bzero (re_syntax_table, sizeof re_syntax_table);
28589857Sobrien
28689857Sobrien   for (c = 0; c < CHAR_SET_SIZE; ++c)
28789857Sobrien     if (ISALNUM (c))
28889857Sobrien	re_syntax_table[c] = Sword;
28989857Sobrien
29089857Sobrien   re_syntax_table['_'] = Sword;
29189857Sobrien
29289857Sobrien   done = 1;
29389857Sobrien}
29489857Sobrien
29589857Sobrien#  endif /* not SYNTAX_TABLE */
29689857Sobrien
29789857Sobrien#  define SYNTAX(c) re_syntax_table[(unsigned char) (c)]
29889857Sobrien
29989857Sobrien# endif /* emacs */
30089857Sobrien
30189857Sobrien/* Integer type for pointers.  */
30289857Sobrien# if !defined _LIBC && !defined HAVE_UINTPTR_T
30389857Sobrientypedef unsigned long int uintptr_t;
30489857Sobrien# endif
30589857Sobrien
30689857Sobrien/* Should we use malloc or alloca?  If REGEX_MALLOC is not defined, we
30789857Sobrien   use `alloca' instead of `malloc'.  This is because using malloc in
30889857Sobrien   re_search* or re_match* could cause memory leaks when C-g is used in
30989857Sobrien   Emacs; also, malloc is slower and causes storage fragmentation.  On
31089857Sobrien   the other hand, malloc is more portable, and easier to debug.
31189857Sobrien
31289857Sobrien   Because we sometimes use alloca, some routines have to be macros,
31389857Sobrien   not functions -- `alloca'-allocated space disappears at the end of the
31489857Sobrien   function it is called in.  */
31589857Sobrien
31689857Sobrien# ifdef REGEX_MALLOC
31789857Sobrien
31889857Sobrien#  define REGEX_ALLOCATE malloc
31989857Sobrien#  define REGEX_REALLOCATE(source, osize, nsize) realloc (source, nsize)
32089857Sobrien#  define REGEX_FREE free
32189857Sobrien
32289857Sobrien# else /* not REGEX_MALLOC  */
32389857Sobrien
32489857Sobrien/* Emacs already defines alloca, sometimes.  */
32589857Sobrien#  ifndef alloca
32689857Sobrien
32789857Sobrien/* Make alloca work the best possible way.  */
32889857Sobrien#   ifdef __GNUC__
32989857Sobrien#    define alloca __builtin_alloca
33089857Sobrien#   else /* not __GNUC__ */
33189857Sobrien#    if HAVE_ALLOCA_H
33289857Sobrien#     include <alloca.h>
33389857Sobrien#    endif /* HAVE_ALLOCA_H */
33489857Sobrien#   endif /* not __GNUC__ */
33589857Sobrien
33689857Sobrien#  endif /* not alloca */
33789857Sobrien
33889857Sobrien#  define REGEX_ALLOCATE alloca
33989857Sobrien
34089857Sobrien/* Assumes a `char *destination' variable.  */
34189857Sobrien#  define REGEX_REALLOCATE(source, osize, nsize)			\
34289857Sobrien  (destination = (char *) alloca (nsize),				\
34389857Sobrien   memcpy (destination, source, osize))
34489857Sobrien
34589857Sobrien/* No need to do anything to free, after alloca.  */
34689857Sobrien#  define REGEX_FREE(arg) ((void)0) /* Do nothing!  But inhibit gcc warning.  */
34789857Sobrien
34889857Sobrien# endif /* not REGEX_MALLOC */
34989857Sobrien
35089857Sobrien/* Define how to allocate the failure stack.  */
35189857Sobrien
35289857Sobrien# if defined REL_ALLOC && defined REGEX_MALLOC
35389857Sobrien
35489857Sobrien#  define REGEX_ALLOCATE_STACK(size)				\
35589857Sobrien  r_alloc (&failure_stack_ptr, (size))
35689857Sobrien#  define REGEX_REALLOCATE_STACK(source, osize, nsize)		\
35789857Sobrien  r_re_alloc (&failure_stack_ptr, (nsize))
35889857Sobrien#  define REGEX_FREE_STACK(ptr)					\
35989857Sobrien  r_alloc_free (&failure_stack_ptr)
36089857Sobrien
36189857Sobrien# else /* not using relocating allocator */
36289857Sobrien
36389857Sobrien#  ifdef REGEX_MALLOC
36489857Sobrien
36589857Sobrien#   define REGEX_ALLOCATE_STACK malloc
36689857Sobrien#   define REGEX_REALLOCATE_STACK(source, osize, nsize) realloc (source, nsize)
36789857Sobrien#   define REGEX_FREE_STACK free
36889857Sobrien
36989857Sobrien#  else /* not REGEX_MALLOC */
37089857Sobrien
37189857Sobrien#   define REGEX_ALLOCATE_STACK alloca
37289857Sobrien
37389857Sobrien#   define REGEX_REALLOCATE_STACK(source, osize, nsize)			\
37489857Sobrien   REGEX_REALLOCATE (source, osize, nsize)
37589857Sobrien/* No need to explicitly free anything.  */
37689857Sobrien#   define REGEX_FREE_STACK(arg)
37789857Sobrien
37889857Sobrien#  endif /* not REGEX_MALLOC */
37989857Sobrien# endif /* not using relocating allocator */
38089857Sobrien
38189857Sobrien
38289857Sobrien/* True if `size1' is non-NULL and PTR is pointing anywhere inside
38389857Sobrien   `string1' or just past its end.  This works if PTR is NULL, which is
38489857Sobrien   a good thing.  */
38589857Sobrien# define FIRST_STRING_P(ptr) 					\
38689857Sobrien  (size1 && string1 <= (ptr) && (ptr) <= string1 + size1)
38789857Sobrien
38889857Sobrien/* (Re)Allocate N items of type T using malloc, or fail.  */
38989857Sobrien# define TALLOC(n, t) ((t *) malloc ((n) * sizeof (t)))
39089857Sobrien# define RETALLOC(addr, n, t) ((addr) = (t *) realloc (addr, (n) * sizeof (t)))
39189857Sobrien# define RETALLOC_IF(addr, n, t) \
39289857Sobrien  if (addr) RETALLOC((addr), (n), t); else (addr) = TALLOC ((n), t)
39389857Sobrien# define REGEX_TALLOC(n, t) ((t *) REGEX_ALLOCATE ((n) * sizeof (t)))
39489857Sobrien
39589857Sobrien# define BYTEWIDTH 8 /* In bits.  */
39689857Sobrien
39789857Sobrien# define STREQ(s1, s2) ((strcmp (s1, s2) == 0))
39889857Sobrien
39989857Sobrien# undef MAX
40089857Sobrien# undef MIN
40189857Sobrien# define MAX(a, b) ((a) > (b) ? (a) : (b))
40289857Sobrien# define MIN(a, b) ((a) < (b) ? (a) : (b))
40389857Sobrien
40489857Sobrientypedef char boolean;
40589857Sobrien# define false 0
40689857Sobrien# define true 1
40789857Sobrien
408218822Sdimstatic reg_errcode_t byte_regex_compile (const char *pattern, size_t size,
409218822Sdim                                         reg_syntax_t syntax,
410218822Sdim                                         struct re_pattern_buffer *bufp);
41189857Sobrien
412218822Sdimstatic int byte_re_match_2_internal (struct re_pattern_buffer *bufp,
413218822Sdim                                     const char *string1, int size1,
414218822Sdim                                     const char *string2, int size2,
415218822Sdim                                     int pos,
416218822Sdim                                     struct re_registers *regs,
417218822Sdim                                     int stop);
418218822Sdimstatic int byte_re_search_2 (struct re_pattern_buffer *bufp,
419218822Sdim                             const char *string1, int size1,
420218822Sdim                             const char *string2, int size2,
421218822Sdim                             int startpos, int range,
422218822Sdim                             struct re_registers *regs, int stop);
423218822Sdimstatic int byte_re_compile_fastmap (struct re_pattern_buffer *bufp);
42489857Sobrien
42589857Sobrien#ifdef MBS_SUPPORT
426218822Sdimstatic reg_errcode_t wcs_regex_compile (const char *pattern, size_t size,
427218822Sdim                                        reg_syntax_t syntax,
428218822Sdim                                        struct re_pattern_buffer *bufp);
42989857Sobrien
43089857Sobrien
431218822Sdimstatic int wcs_re_match_2_internal (struct re_pattern_buffer *bufp,
432218822Sdim                                    const char *cstring1, int csize1,
433218822Sdim                                    const char *cstring2, int csize2,
434218822Sdim                                    int pos,
435218822Sdim                                    struct re_registers *regs,
436218822Sdim                                    int stop,
437218822Sdim                                    wchar_t *string1, int size1,
438218822Sdim                                    wchar_t *string2, int size2,
439218822Sdim                                    int *mbs_offset1, int *mbs_offset2);
440218822Sdimstatic int wcs_re_search_2 (struct re_pattern_buffer *bufp,
441218822Sdim                            const char *string1, int size1,
442218822Sdim                            const char *string2, int size2,
443218822Sdim                            int startpos, int range,
444218822Sdim                            struct re_registers *regs, int stop);
445218822Sdimstatic int wcs_re_compile_fastmap (struct re_pattern_buffer *bufp);
44689857Sobrien#endif
44789857Sobrien
44889857Sobrien/* These are the command codes that appear in compiled regular
44989857Sobrien   expressions.  Some opcodes are followed by argument bytes.  A
45089857Sobrien   command code can specify any interpretation whatsoever for its
45189857Sobrien   arguments.  Zero bytes may appear in the compiled regular expression.  */
45289857Sobrien
45389857Sobrientypedef enum
45489857Sobrien{
45589857Sobrien  no_op = 0,
45689857Sobrien
45789857Sobrien  /* Succeed right away--no more backtracking.  */
45889857Sobrien  succeed,
45989857Sobrien
46089857Sobrien        /* Followed by one byte giving n, then by n literal bytes.  */
46189857Sobrien  exactn,
46289857Sobrien
46389857Sobrien# ifdef MBS_SUPPORT
46489857Sobrien	/* Same as exactn, but contains binary data.  */
46589857Sobrien  exactn_bin,
46689857Sobrien# endif
46789857Sobrien
46889857Sobrien        /* Matches any (more or less) character.  */
46989857Sobrien  anychar,
47089857Sobrien
47189857Sobrien        /* Matches any one char belonging to specified set.  First
47289857Sobrien           following byte is number of bitmap bytes.  Then come bytes
47389857Sobrien           for a bitmap saying which chars are in.  Bits in each byte
47489857Sobrien           are ordered low-bit-first.  A character is in the set if its
47589857Sobrien           bit is 1.  A character too large to have a bit in the map is
47689857Sobrien           automatically not in the set.  */
47789857Sobrien        /* ifdef MBS_SUPPORT, following element is length of character
47889857Sobrien	   classes, length of collating symbols, length of equivalence
47989857Sobrien	   classes, length of character ranges, and length of characters.
48089857Sobrien	   Next, character class element, collating symbols elements,
48189857Sobrien	   equivalence class elements, range elements, and character
48289857Sobrien	   elements follow.
48389857Sobrien	   See regex_compile function.  */
48489857Sobrien  charset,
48589857Sobrien
48689857Sobrien        /* Same parameters as charset, but match any character that is
48789857Sobrien           not one of those specified.  */
48889857Sobrien  charset_not,
48989857Sobrien
49089857Sobrien        /* Start remembering the text that is matched, for storing in a
49189857Sobrien           register.  Followed by one byte with the register number, in
49289857Sobrien           the range 0 to one less than the pattern buffer's re_nsub
49389857Sobrien           field.  Then followed by one byte with the number of groups
49489857Sobrien           inner to this one.  (This last has to be part of the
49589857Sobrien           start_memory only because we need it in the on_failure_jump
49689857Sobrien           of re_match_2.)  */
49789857Sobrien  start_memory,
49889857Sobrien
49989857Sobrien        /* Stop remembering the text that is matched and store it in a
50089857Sobrien           memory register.  Followed by one byte with the register
50189857Sobrien           number, in the range 0 to one less than `re_nsub' in the
50289857Sobrien           pattern buffer, and one byte with the number of inner groups,
50389857Sobrien           just like `start_memory'.  (We need the number of inner
50489857Sobrien           groups here because we don't have any easy way of finding the
50589857Sobrien           corresponding start_memory when we're at a stop_memory.)  */
50689857Sobrien  stop_memory,
50789857Sobrien
50889857Sobrien        /* Match a duplicate of something remembered. Followed by one
50989857Sobrien           byte containing the register number.  */
51089857Sobrien  duplicate,
51189857Sobrien
51289857Sobrien        /* Fail unless at beginning of line.  */
51389857Sobrien  begline,
51489857Sobrien
51589857Sobrien        /* Fail unless at end of line.  */
51689857Sobrien  endline,
51789857Sobrien
51889857Sobrien        /* Succeeds if at beginning of buffer (if emacs) or at beginning
51989857Sobrien           of string to be matched (if not).  */
52089857Sobrien  begbuf,
52189857Sobrien
52289857Sobrien        /* Analogously, for end of buffer/string.  */
52389857Sobrien  endbuf,
52489857Sobrien
52589857Sobrien        /* Followed by two byte relative address to which to jump.  */
52689857Sobrien  jump,
52789857Sobrien
52889857Sobrien	/* Same as jump, but marks the end of an alternative.  */
52989857Sobrien  jump_past_alt,
53089857Sobrien
53189857Sobrien        /* Followed by two-byte relative address of place to resume at
53289857Sobrien           in case of failure.  */
53389857Sobrien        /* ifdef MBS_SUPPORT, the size of address is 1.  */
53489857Sobrien  on_failure_jump,
53589857Sobrien
53689857Sobrien        /* Like on_failure_jump, but pushes a placeholder instead of the
53789857Sobrien           current string position when executed.  */
53889857Sobrien  on_failure_keep_string_jump,
53989857Sobrien
54089857Sobrien        /* Throw away latest failure point and then jump to following
54189857Sobrien           two-byte relative address.  */
54289857Sobrien        /* ifdef MBS_SUPPORT, the size of address is 1.  */
54389857Sobrien  pop_failure_jump,
54489857Sobrien
54589857Sobrien        /* Change to pop_failure_jump if know won't have to backtrack to
54689857Sobrien           match; otherwise change to jump.  This is used to jump
54789857Sobrien           back to the beginning of a repeat.  If what follows this jump
54889857Sobrien           clearly won't match what the repeat does, such that we can be
54989857Sobrien           sure that there is no use backtracking out of repetitions
55089857Sobrien           already matched, then we change it to a pop_failure_jump.
55189857Sobrien           Followed by two-byte address.  */
55289857Sobrien        /* ifdef MBS_SUPPORT, the size of address is 1.  */
55389857Sobrien  maybe_pop_jump,
55489857Sobrien
55589857Sobrien        /* Jump to following two-byte address, and push a dummy failure
55689857Sobrien           point. This failure point will be thrown away if an attempt
55789857Sobrien           is made to use it for a failure.  A `+' construct makes this
55889857Sobrien           before the first repeat.  Also used as an intermediary kind
55989857Sobrien           of jump when compiling an alternative.  */
56089857Sobrien        /* ifdef MBS_SUPPORT, the size of address is 1.  */
56189857Sobrien  dummy_failure_jump,
56289857Sobrien
56389857Sobrien	/* Push a dummy failure point and continue.  Used at the end of
56489857Sobrien	   alternatives.  */
56589857Sobrien  push_dummy_failure,
56689857Sobrien
56789857Sobrien        /* Followed by two-byte relative address and two-byte number n.
56889857Sobrien           After matching N times, jump to the address upon failure.  */
56989857Sobrien        /* ifdef MBS_SUPPORT, the size of address is 1.  */
57089857Sobrien  succeed_n,
57189857Sobrien
57289857Sobrien        /* Followed by two-byte relative address, and two-byte number n.
57389857Sobrien           Jump to the address N times, then fail.  */
57489857Sobrien        /* ifdef MBS_SUPPORT, the size of address is 1.  */
57589857Sobrien  jump_n,
57689857Sobrien
57789857Sobrien        /* Set the following two-byte relative address to the
57889857Sobrien           subsequent two-byte number.  The address *includes* the two
57989857Sobrien           bytes of number.  */
58089857Sobrien        /* ifdef MBS_SUPPORT, the size of address is 1.  */
58189857Sobrien  set_number_at,
58289857Sobrien
58389857Sobrien  wordchar,	/* Matches any word-constituent character.  */
58489857Sobrien  notwordchar,	/* Matches any char that is not a word-constituent.  */
58589857Sobrien
58689857Sobrien  wordbeg,	/* Succeeds if at word beginning.  */
58789857Sobrien  wordend,	/* Succeeds if at word end.  */
58889857Sobrien
58989857Sobrien  wordbound,	/* Succeeds if at a word boundary.  */
59089857Sobrien  notwordbound	/* Succeeds if not at a word boundary.  */
59189857Sobrien
59289857Sobrien# ifdef emacs
59389857Sobrien  ,before_dot,	/* Succeeds if before point.  */
59489857Sobrien  at_dot,	/* Succeeds if at point.  */
59589857Sobrien  after_dot,	/* Succeeds if after point.  */
59689857Sobrien
59789857Sobrien	/* Matches any character whose syntax is specified.  Followed by
59889857Sobrien           a byte which contains a syntax code, e.g., Sword.  */
59989857Sobrien  syntaxspec,
60089857Sobrien
60189857Sobrien	/* Matches any character whose syntax is not that specified.  */
60289857Sobrien  notsyntaxspec
60389857Sobrien# endif /* emacs */
60489857Sobrien} re_opcode_t;
60589857Sobrien#endif /* not INSIDE_RECURSION */
60689857Sobrien
60789857Sobrien
60889857Sobrien#ifdef BYTE
60989857Sobrien# define CHAR_T char
61089857Sobrien# define UCHAR_T unsigned char
61189857Sobrien# define COMPILED_BUFFER_VAR bufp->buffer
61289857Sobrien# define OFFSET_ADDRESS_SIZE 2
613218822Sdim# define PREFIX(name) byte_##name
61489857Sobrien# define ARG_PREFIX(name) name
61589857Sobrien# define PUT_CHAR(c) putchar (c)
61689857Sobrien#else
61789857Sobrien# ifdef WCHAR
61889857Sobrien#  define CHAR_T wchar_t
61989857Sobrien#  define UCHAR_T wchar_t
62089857Sobrien#  define COMPILED_BUFFER_VAR wc_buffer
62189857Sobrien#  define OFFSET_ADDRESS_SIZE 1 /* the size which STORE_NUMBER macro use */
62289857Sobrien#  define CHAR_CLASS_SIZE ((__alignof__(wctype_t)+sizeof(wctype_t))/sizeof(CHAR_T)+1)
623218822Sdim#  define PREFIX(name) wcs_##name
624218822Sdim#  define ARG_PREFIX(name) c##name
62589857Sobrien/* Should we use wide stream??  */
62689857Sobrien#  define PUT_CHAR(c) printf ("%C", c);
62789857Sobrien#  define TRUE 1
62889857Sobrien#  define FALSE 0
62989857Sobrien# else
63089857Sobrien#  ifdef MBS_SUPPORT
63189857Sobrien#   define WCHAR
63289857Sobrien#   define INSIDE_RECURSION
63389857Sobrien#   include "regex.c"
63489857Sobrien#   undef INSIDE_RECURSION
63589857Sobrien#  endif
63689857Sobrien#  define BYTE
63789857Sobrien#  define INSIDE_RECURSION
63889857Sobrien#  include "regex.c"
63989857Sobrien#  undef INSIDE_RECURSION
64089857Sobrien# endif
64189857Sobrien#endif
64289857Sobrien
64389857Sobrien#ifdef INSIDE_RECURSION
64489857Sobrien/* Common operations on the compiled pattern.  */
64589857Sobrien
64689857Sobrien/* Store NUMBER in two contiguous bytes starting at DESTINATION.  */
64789857Sobrien/* ifdef MBS_SUPPORT, we store NUMBER in 1 element.  */
64889857Sobrien
64989857Sobrien# ifdef WCHAR
65089857Sobrien#  define STORE_NUMBER(destination, number)				\
65189857Sobrien  do {									\
65289857Sobrien    *(destination) = (UCHAR_T)(number);				\
65389857Sobrien  } while (0)
65489857Sobrien# else /* BYTE */
65589857Sobrien#  define STORE_NUMBER(destination, number)				\
65689857Sobrien  do {									\
65789857Sobrien    (destination)[0] = (number) & 0377;					\
65889857Sobrien    (destination)[1] = (number) >> 8;					\
65989857Sobrien  } while (0)
66089857Sobrien# endif /* WCHAR */
66189857Sobrien
66289857Sobrien/* Same as STORE_NUMBER, except increment DESTINATION to
66389857Sobrien   the byte after where the number is stored.  Therefore, DESTINATION
66489857Sobrien   must be an lvalue.  */
66589857Sobrien/* ifdef MBS_SUPPORT, we store NUMBER in 1 element.  */
66689857Sobrien
66789857Sobrien# define STORE_NUMBER_AND_INCR(destination, number)			\
66889857Sobrien  do {									\
66989857Sobrien    STORE_NUMBER (destination, number);					\
67089857Sobrien    (destination) += OFFSET_ADDRESS_SIZE;				\
67189857Sobrien  } while (0)
67289857Sobrien
67389857Sobrien/* Put into DESTINATION a number stored in two contiguous bytes starting
67489857Sobrien   at SOURCE.  */
67589857Sobrien/* ifdef MBS_SUPPORT, we store NUMBER in 1 element.  */
67689857Sobrien
67789857Sobrien# ifdef WCHAR
67889857Sobrien#  define EXTRACT_NUMBER(destination, source)				\
67989857Sobrien  do {									\
68089857Sobrien    (destination) = *(source);						\
68189857Sobrien  } while (0)
68289857Sobrien# else /* BYTE */
68389857Sobrien#  define EXTRACT_NUMBER(destination, source)				\
68489857Sobrien  do {									\
68589857Sobrien    (destination) = *(source) & 0377;					\
68689857Sobrien    (destination) += SIGN_EXTEND_CHAR (*((source) + 1)) << 8;		\
68789857Sobrien  } while (0)
68889857Sobrien# endif
68989857Sobrien
69089857Sobrien# ifdef DEBUG
691218822Sdimstatic void PREFIX(extract_number) (int *dest, UCHAR_T *source);
69289857Sobrienstatic void
693218822SdimPREFIX(extract_number) (int *dest, UCHAR_T *source)
69489857Sobrien{
69589857Sobrien#  ifdef WCHAR
69689857Sobrien  *dest = *source;
69789857Sobrien#  else /* BYTE */
69889857Sobrien  int temp = SIGN_EXTEND_CHAR (*(source + 1));
69989857Sobrien  *dest = *source & 0377;
70089857Sobrien  *dest += temp << 8;
70189857Sobrien#  endif
70289857Sobrien}
70389857Sobrien
70489857Sobrien#  ifndef EXTRACT_MACROS /* To debug the macros.  */
70589857Sobrien#   undef EXTRACT_NUMBER
70689857Sobrien#   define EXTRACT_NUMBER(dest, src) PREFIX(extract_number) (&dest, src)
70789857Sobrien#  endif /* not EXTRACT_MACROS */
70889857Sobrien
70989857Sobrien# endif /* DEBUG */
71089857Sobrien
71189857Sobrien/* Same as EXTRACT_NUMBER, except increment SOURCE to after the number.
71289857Sobrien   SOURCE must be an lvalue.  */
71389857Sobrien
71489857Sobrien# define EXTRACT_NUMBER_AND_INCR(destination, source)			\
71589857Sobrien  do {									\
71689857Sobrien    EXTRACT_NUMBER (destination, source);				\
71789857Sobrien    (source) += OFFSET_ADDRESS_SIZE; 					\
71889857Sobrien  } while (0)
71989857Sobrien
72089857Sobrien# ifdef DEBUG
721218822Sdimstatic void PREFIX(extract_number_and_incr) (int *destination,
722218822Sdim                                             UCHAR_T **source);
72389857Sobrienstatic void
724218822SdimPREFIX(extract_number_and_incr) (int *destination, UCHAR_T **source)
72589857Sobrien{
72689857Sobrien  PREFIX(extract_number) (destination, *source);
72789857Sobrien  *source += OFFSET_ADDRESS_SIZE;
72889857Sobrien}
72989857Sobrien
73089857Sobrien#  ifndef EXTRACT_MACROS
73189857Sobrien#   undef EXTRACT_NUMBER_AND_INCR
73289857Sobrien#   define EXTRACT_NUMBER_AND_INCR(dest, src) \
73389857Sobrien  PREFIX(extract_number_and_incr) (&dest, &src)
73489857Sobrien#  endif /* not EXTRACT_MACROS */
73589857Sobrien
73689857Sobrien# endif /* DEBUG */
73789857Sobrien
73889857Sobrien
73989857Sobrien
74089857Sobrien/* If DEBUG is defined, Regex prints many voluminous messages about what
74189857Sobrien   it is doing (if the variable `debug' is nonzero).  If linked with the
74289857Sobrien   main program in `iregex.c', you can enter patterns and strings
74389857Sobrien   interactively.  And if linked with the main program in `main.c' and
74489857Sobrien   the other test files, you can run the already-written tests.  */
74589857Sobrien
74689857Sobrien# ifdef DEBUG
74789857Sobrien
74889857Sobrien#  ifndef DEFINED_ONCE
74989857Sobrien
75089857Sobrien/* We use standard I/O for debugging.  */
75189857Sobrien#   include <stdio.h>
75289857Sobrien
75389857Sobrien/* It is useful to test things that ``must'' be true when debugging.  */
75489857Sobrien#   include <assert.h>
75589857Sobrien
75689857Sobrienstatic int debug;
75789857Sobrien
75889857Sobrien#   define DEBUG_STATEMENT(e) e
75989857Sobrien#   define DEBUG_PRINT1(x) if (debug) printf (x)
76089857Sobrien#   define DEBUG_PRINT2(x1, x2) if (debug) printf (x1, x2)
76189857Sobrien#   define DEBUG_PRINT3(x1, x2, x3) if (debug) printf (x1, x2, x3)
76289857Sobrien#   define DEBUG_PRINT4(x1, x2, x3, x4) if (debug) printf (x1, x2, x3, x4)
76389857Sobrien#  endif /* not DEFINED_ONCE */
76489857Sobrien
76589857Sobrien#  define DEBUG_PRINT_COMPILED_PATTERN(p, s, e) 			\
76689857Sobrien  if (debug) PREFIX(print_partial_compiled_pattern) (s, e)
76789857Sobrien#  define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)		\
76889857Sobrien  if (debug) PREFIX(print_double_string) (w, s1, sz1, s2, sz2)
76989857Sobrien
77089857Sobrien
77189857Sobrien/* Print the fastmap in human-readable form.  */
77289857Sobrien
77389857Sobrien#  ifndef DEFINED_ONCE
77489857Sobrienvoid
775218822Sdimprint_fastmap (char *fastmap)
77689857Sobrien{
77789857Sobrien  unsigned was_a_range = 0;
77889857Sobrien  unsigned i = 0;
77989857Sobrien
78089857Sobrien  while (i < (1 << BYTEWIDTH))
78189857Sobrien    {
78289857Sobrien      if (fastmap[i++])
78389857Sobrien	{
78489857Sobrien	  was_a_range = 0;
78589857Sobrien          putchar (i - 1);
78689857Sobrien          while (i < (1 << BYTEWIDTH)  &&  fastmap[i])
78789857Sobrien            {
78889857Sobrien              was_a_range = 1;
78989857Sobrien              i++;
79089857Sobrien            }
79189857Sobrien	  if (was_a_range)
79289857Sobrien            {
79389857Sobrien              printf ("-");
79489857Sobrien              putchar (i - 1);
79589857Sobrien            }
79689857Sobrien        }
79789857Sobrien    }
79889857Sobrien  putchar ('\n');
79989857Sobrien}
80089857Sobrien#  endif /* not DEFINED_ONCE */
80189857Sobrien
80289857Sobrien
80389857Sobrien/* Print a compiled pattern string in human-readable form, starting at
80489857Sobrien   the START pointer into it and ending just before the pointer END.  */
80589857Sobrien
80689857Sobrienvoid
807218822SdimPREFIX(print_partial_compiled_pattern) (UCHAR_T *start, UCHAR_T *end)
80889857Sobrien{
80989857Sobrien  int mcnt, mcnt2;
81089857Sobrien  UCHAR_T *p1;
81189857Sobrien  UCHAR_T *p = start;
81289857Sobrien  UCHAR_T *pend = end;
81389857Sobrien
81489857Sobrien  if (start == NULL)
81589857Sobrien    {
81689857Sobrien      printf ("(null)\n");
81789857Sobrien      return;
81889857Sobrien    }
81989857Sobrien
82089857Sobrien  /* Loop over pattern commands.  */
82189857Sobrien  while (p < pend)
82289857Sobrien    {
82389857Sobrien#  ifdef _LIBC
82489857Sobrien      printf ("%td:\t", p - start);
82589857Sobrien#  else
82689857Sobrien      printf ("%ld:\t", (long int) (p - start));
82789857Sobrien#  endif
82889857Sobrien
82989857Sobrien      switch ((re_opcode_t) *p++)
83089857Sobrien	{
83189857Sobrien        case no_op:
83289857Sobrien          printf ("/no_op");
83389857Sobrien          break;
83489857Sobrien
83589857Sobrien	case exactn:
83689857Sobrien	  mcnt = *p++;
83789857Sobrien          printf ("/exactn/%d", mcnt);
83889857Sobrien          do
83989857Sobrien	    {
84089857Sobrien              putchar ('/');
84189857Sobrien	      PUT_CHAR (*p++);
84289857Sobrien            }
84389857Sobrien          while (--mcnt);
84489857Sobrien          break;
84589857Sobrien
84689857Sobrien#  ifdef MBS_SUPPORT
84789857Sobrien	case exactn_bin:
84889857Sobrien	  mcnt = *p++;
84989857Sobrien	  printf ("/exactn_bin/%d", mcnt);
85089857Sobrien          do
85189857Sobrien	    {
85289857Sobrien	      printf("/%lx", (long int) *p++);
85389857Sobrien            }
85489857Sobrien          while (--mcnt);
85589857Sobrien          break;
85689857Sobrien#  endif /* MBS_SUPPORT */
85789857Sobrien
85889857Sobrien	case start_memory:
85989857Sobrien          mcnt = *p++;
86089857Sobrien          printf ("/start_memory/%d/%ld", mcnt, (long int) *p++);
86189857Sobrien          break;
86289857Sobrien
86389857Sobrien	case stop_memory:
86489857Sobrien          mcnt = *p++;
86589857Sobrien	  printf ("/stop_memory/%d/%ld", mcnt, (long int) *p++);
86689857Sobrien          break;
86789857Sobrien
86889857Sobrien	case duplicate:
86989857Sobrien	  printf ("/duplicate/%ld", (long int) *p++);
87089857Sobrien	  break;
87189857Sobrien
87289857Sobrien	case anychar:
87389857Sobrien	  printf ("/anychar");
87489857Sobrien	  break;
87589857Sobrien
87689857Sobrien	case charset:
87789857Sobrien        case charset_not:
87889857Sobrien          {
87989857Sobrien#  ifdef WCHAR
88089857Sobrien	    int i, length;
88189857Sobrien	    wchar_t *workp = p;
88289857Sobrien	    printf ("/charset [%s",
88389857Sobrien	            (re_opcode_t) *(workp - 1) == charset_not ? "^" : "");
88489857Sobrien	    p += 5;
88589857Sobrien	    length = *workp++; /* the length of char_classes */
88689857Sobrien	    for (i=0 ; i<length ; i++)
88789857Sobrien	      printf("[:%lx:]", (long int) *p++);
88889857Sobrien	    length = *workp++; /* the length of collating_symbol */
88989857Sobrien	    for (i=0 ; i<length ;)
89089857Sobrien	      {
89189857Sobrien		printf("[.");
89289857Sobrien		while(*p != 0)
89389857Sobrien		  PUT_CHAR((i++,*p++));
89489857Sobrien		i++,p++;
89589857Sobrien		printf(".]");
89689857Sobrien	      }
89789857Sobrien	    length = *workp++; /* the length of equivalence_class */
89889857Sobrien	    for (i=0 ; i<length ;)
89989857Sobrien	      {
90089857Sobrien		printf("[=");
90189857Sobrien		while(*p != 0)
90289857Sobrien		  PUT_CHAR((i++,*p++));
90389857Sobrien		i++,p++;
90489857Sobrien		printf("=]");
90589857Sobrien	      }
90689857Sobrien	    length = *workp++; /* the length of char_range */
90789857Sobrien	    for (i=0 ; i<length ; i++)
90889857Sobrien	      {
90989857Sobrien		wchar_t range_start = *p++;
91089857Sobrien		wchar_t range_end = *p++;
91189857Sobrien		printf("%C-%C", range_start, range_end);
91289857Sobrien	      }
91389857Sobrien	    length = *workp++; /* the length of char */
91489857Sobrien	    for (i=0 ; i<length ; i++)
91589857Sobrien	      printf("%C", *p++);
91689857Sobrien	    putchar (']');
91789857Sobrien#  else
91889857Sobrien            register int c, last = -100;
91989857Sobrien	    register int in_range = 0;
92089857Sobrien
92189857Sobrien	    printf ("/charset [%s",
92289857Sobrien	            (re_opcode_t) *(p - 1) == charset_not ? "^" : "");
92389857Sobrien
92489857Sobrien            assert (p + *p < pend);
92589857Sobrien
92689857Sobrien            for (c = 0; c < 256; c++)
92789857Sobrien	      if (c / 8 < *p
92889857Sobrien		  && (p[1 + (c/8)] & (1 << (c % 8))))
92989857Sobrien		{
93089857Sobrien		  /* Are we starting a range?  */
93189857Sobrien		  if (last + 1 == c && ! in_range)
93289857Sobrien		    {
93389857Sobrien		      putchar ('-');
93489857Sobrien		      in_range = 1;
93589857Sobrien		    }
93689857Sobrien		  /* Have we broken a range?  */
93789857Sobrien		  else if (last + 1 != c && in_range)
93889857Sobrien              {
93989857Sobrien		      putchar (last);
94089857Sobrien		      in_range = 0;
94189857Sobrien		    }
94289857Sobrien
94389857Sobrien		  if (! in_range)
94489857Sobrien		    putchar (c);
94589857Sobrien
94689857Sobrien		  last = c;
94789857Sobrien              }
94889857Sobrien
94989857Sobrien	    if (in_range)
95089857Sobrien	      putchar (last);
95189857Sobrien
95289857Sobrien	    putchar (']');
95389857Sobrien
95489857Sobrien	    p += 1 + *p;
95589857Sobrien#  endif /* WCHAR */
95689857Sobrien	  }
95789857Sobrien	  break;
95889857Sobrien
95989857Sobrien	case begline:
96089857Sobrien	  printf ("/begline");
96189857Sobrien          break;
96289857Sobrien
96389857Sobrien	case endline:
96489857Sobrien          printf ("/endline");
96589857Sobrien          break;
96689857Sobrien
96789857Sobrien	case on_failure_jump:
96889857Sobrien          PREFIX(extract_number_and_incr) (&mcnt, &p);
96989857Sobrien#  ifdef _LIBC
97089857Sobrien  	  printf ("/on_failure_jump to %td", p + mcnt - start);
97189857Sobrien#  else
97289857Sobrien  	  printf ("/on_failure_jump to %ld", (long int) (p + mcnt - start));
97389857Sobrien#  endif
97489857Sobrien          break;
97589857Sobrien
97689857Sobrien	case on_failure_keep_string_jump:
97789857Sobrien          PREFIX(extract_number_and_incr) (&mcnt, &p);
97889857Sobrien#  ifdef _LIBC
97989857Sobrien  	  printf ("/on_failure_keep_string_jump to %td", p + mcnt - start);
98089857Sobrien#  else
98189857Sobrien  	  printf ("/on_failure_keep_string_jump to %ld",
98289857Sobrien		  (long int) (p + mcnt - start));
98389857Sobrien#  endif
98489857Sobrien          break;
98589857Sobrien
98689857Sobrien	case dummy_failure_jump:
98789857Sobrien          PREFIX(extract_number_and_incr) (&mcnt, &p);
98889857Sobrien#  ifdef _LIBC
98989857Sobrien  	  printf ("/dummy_failure_jump to %td", p + mcnt - start);
99089857Sobrien#  else
99189857Sobrien  	  printf ("/dummy_failure_jump to %ld", (long int) (p + mcnt - start));
99289857Sobrien#  endif
99389857Sobrien          break;
99489857Sobrien
99589857Sobrien	case push_dummy_failure:
99689857Sobrien          printf ("/push_dummy_failure");
99789857Sobrien          break;
99889857Sobrien
99989857Sobrien        case maybe_pop_jump:
100089857Sobrien          PREFIX(extract_number_and_incr) (&mcnt, &p);
100189857Sobrien#  ifdef _LIBC
100289857Sobrien  	  printf ("/maybe_pop_jump to %td", p + mcnt - start);
100389857Sobrien#  else
100489857Sobrien  	  printf ("/maybe_pop_jump to %ld", (long int) (p + mcnt - start));
100589857Sobrien#  endif
100689857Sobrien	  break;
100789857Sobrien
100889857Sobrien        case pop_failure_jump:
100989857Sobrien	  PREFIX(extract_number_and_incr) (&mcnt, &p);
101089857Sobrien#  ifdef _LIBC
101189857Sobrien  	  printf ("/pop_failure_jump to %td", p + mcnt - start);
101289857Sobrien#  else
101389857Sobrien  	  printf ("/pop_failure_jump to %ld", (long int) (p + mcnt - start));
101489857Sobrien#  endif
101589857Sobrien	  break;
101689857Sobrien
101789857Sobrien        case jump_past_alt:
101889857Sobrien	  PREFIX(extract_number_and_incr) (&mcnt, &p);
101989857Sobrien#  ifdef _LIBC
102089857Sobrien  	  printf ("/jump_past_alt to %td", p + mcnt - start);
102189857Sobrien#  else
102289857Sobrien  	  printf ("/jump_past_alt to %ld", (long int) (p + mcnt - start));
102389857Sobrien#  endif
102489857Sobrien	  break;
102589857Sobrien
102689857Sobrien        case jump:
102789857Sobrien	  PREFIX(extract_number_and_incr) (&mcnt, &p);
102889857Sobrien#  ifdef _LIBC
102989857Sobrien  	  printf ("/jump to %td", p + mcnt - start);
103089857Sobrien#  else
103189857Sobrien  	  printf ("/jump to %ld", (long int) (p + mcnt - start));
103289857Sobrien#  endif
103389857Sobrien	  break;
103489857Sobrien
103589857Sobrien        case succeed_n:
103689857Sobrien          PREFIX(extract_number_and_incr) (&mcnt, &p);
103789857Sobrien	  p1 = p + mcnt;
103889857Sobrien          PREFIX(extract_number_and_incr) (&mcnt2, &p);
103989857Sobrien#  ifdef _LIBC
104089857Sobrien	  printf ("/succeed_n to %td, %d times", p1 - start, mcnt2);
104189857Sobrien#  else
104289857Sobrien	  printf ("/succeed_n to %ld, %d times",
104389857Sobrien		  (long int) (p1 - start), mcnt2);
104489857Sobrien#  endif
104589857Sobrien          break;
104689857Sobrien
104789857Sobrien        case jump_n:
104889857Sobrien          PREFIX(extract_number_and_incr) (&mcnt, &p);
104989857Sobrien	  p1 = p + mcnt;
105089857Sobrien          PREFIX(extract_number_and_incr) (&mcnt2, &p);
105189857Sobrien	  printf ("/jump_n to %d, %d times", p1 - start, mcnt2);
105289857Sobrien          break;
105389857Sobrien
105489857Sobrien        case set_number_at:
105589857Sobrien          PREFIX(extract_number_and_incr) (&mcnt, &p);
105689857Sobrien	  p1 = p + mcnt;
105789857Sobrien          PREFIX(extract_number_and_incr) (&mcnt2, &p);
105889857Sobrien#  ifdef _LIBC
105989857Sobrien	  printf ("/set_number_at location %td to %d", p1 - start, mcnt2);
106089857Sobrien#  else
106189857Sobrien	  printf ("/set_number_at location %ld to %d",
106289857Sobrien		  (long int) (p1 - start), mcnt2);
106389857Sobrien#  endif
106489857Sobrien          break;
106589857Sobrien
106689857Sobrien        case wordbound:
106789857Sobrien	  printf ("/wordbound");
106889857Sobrien	  break;
106989857Sobrien
107089857Sobrien	case notwordbound:
107189857Sobrien	  printf ("/notwordbound");
107289857Sobrien          break;
107389857Sobrien
107489857Sobrien	case wordbeg:
107589857Sobrien	  printf ("/wordbeg");
107689857Sobrien	  break;
107789857Sobrien
107889857Sobrien	case wordend:
107989857Sobrien	  printf ("/wordend");
108089857Sobrien	  break;
108189857Sobrien
108289857Sobrien#  ifdef emacs
108389857Sobrien	case before_dot:
108489857Sobrien	  printf ("/before_dot");
108589857Sobrien          break;
108689857Sobrien
108789857Sobrien	case at_dot:
108889857Sobrien	  printf ("/at_dot");
108989857Sobrien          break;
109089857Sobrien
109189857Sobrien	case after_dot:
109289857Sobrien	  printf ("/after_dot");
109389857Sobrien          break;
109489857Sobrien
109589857Sobrien	case syntaxspec:
109689857Sobrien          printf ("/syntaxspec");
109789857Sobrien	  mcnt = *p++;
109889857Sobrien	  printf ("/%d", mcnt);
109989857Sobrien          break;
110089857Sobrien
110189857Sobrien	case notsyntaxspec:
110289857Sobrien          printf ("/notsyntaxspec");
110389857Sobrien	  mcnt = *p++;
110489857Sobrien	  printf ("/%d", mcnt);
110589857Sobrien	  break;
110689857Sobrien#  endif /* emacs */
110789857Sobrien
110889857Sobrien	case wordchar:
110989857Sobrien	  printf ("/wordchar");
111089857Sobrien          break;
111189857Sobrien
111289857Sobrien	case notwordchar:
111389857Sobrien	  printf ("/notwordchar");
111489857Sobrien          break;
111589857Sobrien
111689857Sobrien	case begbuf:
111789857Sobrien	  printf ("/begbuf");
111889857Sobrien          break;
111989857Sobrien
112089857Sobrien	case endbuf:
112189857Sobrien	  printf ("/endbuf");
112289857Sobrien          break;
112389857Sobrien
112489857Sobrien        default:
112589857Sobrien          printf ("?%ld", (long int) *(p-1));
112689857Sobrien	}
112789857Sobrien
112889857Sobrien      putchar ('\n');
112989857Sobrien    }
113089857Sobrien
113189857Sobrien#  ifdef _LIBC
113289857Sobrien  printf ("%td:\tend of pattern.\n", p - start);
113389857Sobrien#  else
113489857Sobrien  printf ("%ld:\tend of pattern.\n", (long int) (p - start));
113589857Sobrien#  endif
113689857Sobrien}
113789857Sobrien
113889857Sobrien
113989857Sobrienvoid
1140218822SdimPREFIX(print_compiled_pattern) (struct re_pattern_buffer *bufp)
114189857Sobrien{
114289857Sobrien  UCHAR_T *buffer = (UCHAR_T*) bufp->buffer;
114389857Sobrien
114489857Sobrien  PREFIX(print_partial_compiled_pattern) (buffer, buffer
114589857Sobrien				  + bufp->used / sizeof(UCHAR_T));
114689857Sobrien  printf ("%ld bytes used/%ld bytes allocated.\n",
114789857Sobrien	  bufp->used, bufp->allocated);
114889857Sobrien
114989857Sobrien  if (bufp->fastmap_accurate && bufp->fastmap)
115089857Sobrien    {
115189857Sobrien      printf ("fastmap: ");
115289857Sobrien      print_fastmap (bufp->fastmap);
115389857Sobrien    }
115489857Sobrien
115589857Sobrien#  ifdef _LIBC
115689857Sobrien  printf ("re_nsub: %Zd\t", bufp->re_nsub);
115789857Sobrien#  else
115889857Sobrien  printf ("re_nsub: %ld\t", (long int) bufp->re_nsub);
115989857Sobrien#  endif
116089857Sobrien  printf ("regs_alloc: %d\t", bufp->regs_allocated);
116189857Sobrien  printf ("can_be_null: %d\t", bufp->can_be_null);
116289857Sobrien  printf ("newline_anchor: %d\n", bufp->newline_anchor);
116389857Sobrien  printf ("no_sub: %d\t", bufp->no_sub);
116489857Sobrien  printf ("not_bol: %d\t", bufp->not_bol);
116589857Sobrien  printf ("not_eol: %d\t", bufp->not_eol);
116689857Sobrien  printf ("syntax: %lx\n", bufp->syntax);
116789857Sobrien  /* Perhaps we should print the translate table?  */
116889857Sobrien}
116989857Sobrien
117089857Sobrien
117189857Sobrienvoid
1172218822SdimPREFIX(print_double_string) (const CHAR_T *where, const CHAR_T *string1,
1173218822Sdim                             int size1, const CHAR_T *string2, int size2)
117489857Sobrien{
117589857Sobrien  int this_char;
117689857Sobrien
117789857Sobrien  if (where == NULL)
117889857Sobrien    printf ("(null)");
117989857Sobrien  else
118089857Sobrien    {
118189857Sobrien      int cnt;
118289857Sobrien
118389857Sobrien      if (FIRST_STRING_P (where))
118489857Sobrien        {
118589857Sobrien          for (this_char = where - string1; this_char < size1; this_char++)
118689857Sobrien	    PUT_CHAR (string1[this_char]);
118789857Sobrien
118889857Sobrien          where = string2;
118989857Sobrien        }
119089857Sobrien
119189857Sobrien      cnt = 0;
119289857Sobrien      for (this_char = where - string2; this_char < size2; this_char++)
119389857Sobrien	{
119489857Sobrien	  PUT_CHAR (string2[this_char]);
119589857Sobrien	  if (++cnt > 100)
119689857Sobrien	    {
119789857Sobrien	      fputs ("...", stdout);
119889857Sobrien	      break;
119989857Sobrien	    }
120089857Sobrien	}
120189857Sobrien    }
120289857Sobrien}
120389857Sobrien
120489857Sobrien#  ifndef DEFINED_ONCE
120589857Sobrienvoid
1206218822Sdimprintchar (int c)
120789857Sobrien{
120889857Sobrien  putc (c, stderr);
120989857Sobrien}
121089857Sobrien#  endif
121189857Sobrien
121289857Sobrien# else /* not DEBUG */
121389857Sobrien
121489857Sobrien#  ifndef DEFINED_ONCE
121589857Sobrien#   undef assert
121689857Sobrien#   define assert(e)
121789857Sobrien
121889857Sobrien#   define DEBUG_STATEMENT(e)
121989857Sobrien#   define DEBUG_PRINT1(x)
122089857Sobrien#   define DEBUG_PRINT2(x1, x2)
122189857Sobrien#   define DEBUG_PRINT3(x1, x2, x3)
122289857Sobrien#   define DEBUG_PRINT4(x1, x2, x3, x4)
122389857Sobrien#  endif /* not DEFINED_ONCE */
122489857Sobrien#  define DEBUG_PRINT_COMPILED_PATTERN(p, s, e)
122589857Sobrien#  define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)
122689857Sobrien
122789857Sobrien# endif /* not DEBUG */
122889857Sobrien
122989857Sobrien
123089857Sobrien
123189857Sobrien# ifdef WCHAR
123289857Sobrien/* This  convert a multibyte string to a wide character string.
123389857Sobrien   And write their correspondances to offset_buffer(see below)
123489857Sobrien   and write whether each wchar_t is binary data to is_binary.
123589857Sobrien   This assume invalid multibyte sequences as binary data.
123689857Sobrien   We assume offset_buffer and is_binary is already allocated
123789857Sobrien   enough space.  */
123889857Sobrien
123989857Sobrienstatic size_t convert_mbs_to_wcs (CHAR_T *dest, const unsigned char* src,
124089857Sobrien				  size_t len, int *offset_buffer,
124189857Sobrien				  char *is_binary);
124289857Sobrienstatic size_t
1243218822Sdimconvert_mbs_to_wcs (CHAR_T *dest, const unsigned char*src, size_t len,
1244218822Sdim                    int *offset_buffer, char *is_binary)
124589857Sobrien     /* It hold correspondances between src(char string) and
124689857Sobrien	dest(wchar_t string) for optimization.
124789857Sobrien	e.g. src  = "xxxyzz"
124889857Sobrien             dest = {'X', 'Y', 'Z'}
124989857Sobrien	      (each "xxx", "y" and "zz" represent one multibyte character
125089857Sobrien	       corresponding to 'X', 'Y' and 'Z'.)
125189857Sobrien	  offset_buffer = {0, 0+3("xxx"), 0+3+1("y"), 0+3+1+2("zz")}
125289857Sobrien	  	        = {0, 3, 4, 6}
125389857Sobrien     */
125489857Sobrien{
125589857Sobrien  wchar_t *pdest = dest;
125689857Sobrien  const unsigned char *psrc = src;
125789857Sobrien  size_t wc_count = 0;
125889857Sobrien
125989857Sobrien  mbstate_t mbs;
126089857Sobrien  int i, consumed;
126189857Sobrien  size_t mb_remain = len;
126289857Sobrien  size_t mb_count = 0;
126389857Sobrien
126489857Sobrien  /* Initialize the conversion state.  */
126589857Sobrien  memset (&mbs, 0, sizeof (mbstate_t));
126689857Sobrien
126789857Sobrien  offset_buffer[0] = 0;
126889857Sobrien  for( ; mb_remain > 0 ; ++wc_count, ++pdest, mb_remain -= consumed,
126989857Sobrien	 psrc += consumed)
127089857Sobrien    {
127189857Sobrien#ifdef _LIBC
127289857Sobrien      consumed = __mbrtowc (pdest, psrc, mb_remain, &mbs);
127389857Sobrien#else
127489857Sobrien      consumed = mbrtowc (pdest, psrc, mb_remain, &mbs);
127589857Sobrien#endif
127689857Sobrien
127789857Sobrien      if (consumed <= 0)
127889857Sobrien	/* failed to convert. maybe src contains binary data.
127989857Sobrien	   So we consume 1 byte manualy.  */
128089857Sobrien	{
128189857Sobrien	  *pdest = *psrc;
128289857Sobrien	  consumed = 1;
128389857Sobrien	  is_binary[wc_count] = TRUE;
128489857Sobrien	}
128589857Sobrien      else
128689857Sobrien	is_binary[wc_count] = FALSE;
128789857Sobrien      /* In sjis encoding, we use yen sign as escape character in
128889857Sobrien	 place of reverse solidus. So we convert 0x5c(yen sign in
128989857Sobrien	 sjis) to not 0xa5(yen sign in UCS2) but 0x5c(reverse
129089857Sobrien	 solidus in UCS2).  */
129189857Sobrien      if (consumed == 1 && (int) *psrc == 0x5c && (int) *pdest == 0xa5)
129289857Sobrien	*pdest = (wchar_t) *psrc;
129389857Sobrien
129489857Sobrien      offset_buffer[wc_count + 1] = mb_count += consumed;
129589857Sobrien    }
129689857Sobrien
129789857Sobrien  /* Fill remain of the buffer with sentinel.  */
129889857Sobrien  for (i = wc_count + 1 ; i <= len ; i++)
129989857Sobrien    offset_buffer[i] = mb_count + 1;
130089857Sobrien
130189857Sobrien  return wc_count;
130289857Sobrien}
130389857Sobrien
130489857Sobrien# endif /* WCHAR */
130589857Sobrien
130689857Sobrien#else /* not INSIDE_RECURSION */
130789857Sobrien
130889857Sobrien/* Set by `re_set_syntax' to the current regexp syntax to recognize.  Can
130989857Sobrien   also be assigned to arbitrarily: each pattern buffer stores its own
131089857Sobrien   syntax, so it can be changed between regex compilations.  */
131189857Sobrien/* This has no initializer because initialized variables in Emacs
131289857Sobrien   become read-only after dumping.  */
131389857Sobrienreg_syntax_t re_syntax_options;
131489857Sobrien
131589857Sobrien
131689857Sobrien/* Specify the precise syntax of regexps for compilation.  This provides
131789857Sobrien   for compatibility for various utilities which historically have
131889857Sobrien   different, incompatible syntaxes.
131989857Sobrien
132089857Sobrien   The argument SYNTAX is a bit mask comprised of the various bits
132189857Sobrien   defined in regex.h.  We return the old syntax.  */
132289857Sobrien
132389857Sobrienreg_syntax_t
1324218822Sdimre_set_syntax (reg_syntax_t syntax)
132589857Sobrien{
132689857Sobrien  reg_syntax_t ret = re_syntax_options;
132789857Sobrien
132889857Sobrien  re_syntax_options = syntax;
132989857Sobrien# ifdef DEBUG
133089857Sobrien  if (syntax & RE_DEBUG)
133189857Sobrien    debug = 1;
133289857Sobrien  else if (debug) /* was on but now is not */
133389857Sobrien    debug = 0;
133489857Sobrien# endif /* DEBUG */
133589857Sobrien  return ret;
133689857Sobrien}
133789857Sobrien# ifdef _LIBC
133889857Sobrienweak_alias (__re_set_syntax, re_set_syntax)
133989857Sobrien# endif
134089857Sobrien
134189857Sobrien/* This table gives an error message for each of the error codes listed
134289857Sobrien   in regex.h.  Obviously the order here has to be same as there.
134389857Sobrien   POSIX doesn't require that we do anything for REG_NOERROR,
134489857Sobrien   but why not be nice?  */
134589857Sobrien
1346130561Sobrienstatic const char *re_error_msgid[] =
134789857Sobrien  {
1348130561Sobrien    gettext_noop ("Success"),	/* REG_NOERROR */
1349130561Sobrien    gettext_noop ("No match"),	/* REG_NOMATCH */
1350130561Sobrien    gettext_noop ("Invalid regular expression"), /* REG_BADPAT */
1351130561Sobrien    gettext_noop ("Invalid collation character"), /* REG_ECOLLATE */
1352130561Sobrien    gettext_noop ("Invalid character class name"), /* REG_ECTYPE */
1353130561Sobrien    gettext_noop ("Trailing backslash"), /* REG_EESCAPE */
1354130561Sobrien    gettext_noop ("Invalid back reference"), /* REG_ESUBREG */
1355130561Sobrien    gettext_noop ("Unmatched [ or [^"),	/* REG_EBRACK */
1356130561Sobrien    gettext_noop ("Unmatched ( or \\("), /* REG_EPAREN */
1357130561Sobrien    gettext_noop ("Unmatched \\{"), /* REG_EBRACE */
1358130561Sobrien    gettext_noop ("Invalid content of \\{\\}"), /* REG_BADBR */
1359130561Sobrien    gettext_noop ("Invalid range end"),	/* REG_ERANGE */
1360130561Sobrien    gettext_noop ("Memory exhausted"), /* REG_ESPACE */
1361130561Sobrien    gettext_noop ("Invalid preceding regular expression"), /* REG_BADRPT */
1362130561Sobrien    gettext_noop ("Premature end of regular expression"), /* REG_EEND */
1363130561Sobrien    gettext_noop ("Regular expression too big"), /* REG_ESIZE */
136489857Sobrien    gettext_noop ("Unmatched ) or \\)") /* REG_ERPAREN */
136589857Sobrien  };
136689857Sobrien
136789857Sobrien#endif /* INSIDE_RECURSION */
136889857Sobrien
136989857Sobrien#ifndef DEFINED_ONCE
137089857Sobrien/* Avoiding alloca during matching, to placate r_alloc.  */
137189857Sobrien
137289857Sobrien/* Define MATCH_MAY_ALLOCATE unless we need to make sure that the
137389857Sobrien   searching and matching functions should not call alloca.  On some
137489857Sobrien   systems, alloca is implemented in terms of malloc, and if we're
137589857Sobrien   using the relocating allocator routines, then malloc could cause a
137689857Sobrien   relocation, which might (if the strings being searched are in the
137789857Sobrien   ralloc heap) shift the data out from underneath the regexp
137889857Sobrien   routines.
137989857Sobrien
138089857Sobrien   Here's another reason to avoid allocation: Emacs
138189857Sobrien   processes input from X in a signal handler; processing X input may
138289857Sobrien   call malloc; if input arrives while a matching routine is calling
138389857Sobrien   malloc, then we're scrod.  But Emacs can't just block input while
138489857Sobrien   calling matching routines; then we don't notice interrupts when
138589857Sobrien   they come in.  So, Emacs blocks input around all regexp calls
138689857Sobrien   except the matching calls, which it leaves unprotected, in the
138789857Sobrien   faith that they will not malloc.  */
138889857Sobrien
138989857Sobrien/* Normally, this is fine.  */
139089857Sobrien# define MATCH_MAY_ALLOCATE
139189857Sobrien
139289857Sobrien/* When using GNU C, we are not REALLY using the C alloca, no matter
139389857Sobrien   what config.h may say.  So don't take precautions for it.  */
139489857Sobrien# ifdef __GNUC__
139589857Sobrien#  undef C_ALLOCA
139689857Sobrien# endif
139789857Sobrien
139889857Sobrien/* The match routines may not allocate if (1) they would do it with malloc
139989857Sobrien   and (2) it's not safe for them to use malloc.
140089857Sobrien   Note that if REL_ALLOC is defined, matching would not use malloc for the
140189857Sobrien   failure stack, but we would still use it for the register vectors;
140289857Sobrien   so REL_ALLOC should not affect this.  */
140389857Sobrien# if (defined C_ALLOCA || defined REGEX_MALLOC) && defined emacs
140489857Sobrien#  undef MATCH_MAY_ALLOCATE
140589857Sobrien# endif
140689857Sobrien#endif /* not DEFINED_ONCE */
140789857Sobrien
140889857Sobrien#ifdef INSIDE_RECURSION
140989857Sobrien/* Failure stack declarations and macros; both re_compile_fastmap and
141089857Sobrien   re_match_2 use a failure stack.  These have to be macros because of
141189857Sobrien   REGEX_ALLOCATE_STACK.  */
141289857Sobrien
141389857Sobrien
141489857Sobrien/* Number of failure points for which to initially allocate space
141589857Sobrien   when matching.  If this number is exceeded, we allocate more
141689857Sobrien   space, so it is not a hard limit.  */
141789857Sobrien# ifndef INIT_FAILURE_ALLOC
141889857Sobrien#  define INIT_FAILURE_ALLOC 5
141989857Sobrien# endif
142089857Sobrien
142189857Sobrien/* Roughly the maximum number of failure points on the stack.  Would be
142289857Sobrien   exactly that if always used MAX_FAILURE_ITEMS items each time we failed.
142389857Sobrien   This is a variable only so users of regex can assign to it; we never
142489857Sobrien   change it ourselves.  */
142589857Sobrien
142689857Sobrien# ifdef INT_IS_16BIT
142789857Sobrien
142889857Sobrien#  ifndef DEFINED_ONCE
142989857Sobrien#   if defined MATCH_MAY_ALLOCATE
143089857Sobrien/* 4400 was enough to cause a crash on Alpha OSF/1,
143189857Sobrien   whose default stack limit is 2mb.  */
143289857Sobrienlong int re_max_failures = 4000;
143389857Sobrien#   else
143489857Sobrienlong int re_max_failures = 2000;
143589857Sobrien#   endif
143689857Sobrien#  endif
143789857Sobrien
143889857Sobrienunion PREFIX(fail_stack_elt)
143989857Sobrien{
144089857Sobrien  UCHAR_T *pointer;
144189857Sobrien  long int integer;
144289857Sobrien};
144389857Sobrien
144489857Sobrientypedef union PREFIX(fail_stack_elt) PREFIX(fail_stack_elt_t);
144589857Sobrien
144689857Sobrientypedef struct
144789857Sobrien{
144889857Sobrien  PREFIX(fail_stack_elt_t) *stack;
144989857Sobrien  unsigned long int size;
145089857Sobrien  unsigned long int avail;		/* Offset of next open position.  */
145189857Sobrien} PREFIX(fail_stack_type);
145289857Sobrien
145389857Sobrien# else /* not INT_IS_16BIT */
145489857Sobrien
145589857Sobrien#  ifndef DEFINED_ONCE
145689857Sobrien#   if defined MATCH_MAY_ALLOCATE
145789857Sobrien/* 4400 was enough to cause a crash on Alpha OSF/1,
145889857Sobrien   whose default stack limit is 2mb.  */
145989857Sobrienint re_max_failures = 4000;
146089857Sobrien#   else
146189857Sobrienint re_max_failures = 2000;
146289857Sobrien#   endif
146389857Sobrien#  endif
146489857Sobrien
146589857Sobrienunion PREFIX(fail_stack_elt)
146689857Sobrien{
146789857Sobrien  UCHAR_T *pointer;
146889857Sobrien  int integer;
146989857Sobrien};
147089857Sobrien
147189857Sobrientypedef union PREFIX(fail_stack_elt) PREFIX(fail_stack_elt_t);
147289857Sobrien
147389857Sobrientypedef struct
147489857Sobrien{
147589857Sobrien  PREFIX(fail_stack_elt_t) *stack;
147689857Sobrien  unsigned size;
147789857Sobrien  unsigned avail;			/* Offset of next open position.  */
147889857Sobrien} PREFIX(fail_stack_type);
147989857Sobrien
148089857Sobrien# endif /* INT_IS_16BIT */
148189857Sobrien
148289857Sobrien# ifndef DEFINED_ONCE
148389857Sobrien#  define FAIL_STACK_EMPTY()     (fail_stack.avail == 0)
148489857Sobrien#  define FAIL_STACK_PTR_EMPTY() (fail_stack_ptr->avail == 0)
148589857Sobrien#  define FAIL_STACK_FULL()      (fail_stack.avail == fail_stack.size)
148689857Sobrien# endif
148789857Sobrien
148889857Sobrien
148989857Sobrien/* Define macros to initialize and free the failure stack.
149089857Sobrien   Do `return -2' if the alloc fails.  */
149189857Sobrien
149289857Sobrien# ifdef MATCH_MAY_ALLOCATE
149389857Sobrien#  define INIT_FAIL_STACK()						\
149489857Sobrien  do {									\
149589857Sobrien    fail_stack.stack = (PREFIX(fail_stack_elt_t) *)		\
149689857Sobrien      REGEX_ALLOCATE_STACK (INIT_FAILURE_ALLOC * sizeof (PREFIX(fail_stack_elt_t))); \
149789857Sobrien									\
149889857Sobrien    if (fail_stack.stack == NULL)				\
149989857Sobrien      return -2;							\
150089857Sobrien									\
150189857Sobrien    fail_stack.size = INIT_FAILURE_ALLOC;			\
150289857Sobrien    fail_stack.avail = 0;					\
150389857Sobrien  } while (0)
150489857Sobrien
150589857Sobrien#  define RESET_FAIL_STACK()  REGEX_FREE_STACK (fail_stack.stack)
150689857Sobrien# else
150789857Sobrien#  define INIT_FAIL_STACK()						\
150889857Sobrien  do {									\
150989857Sobrien    fail_stack.avail = 0;					\
151089857Sobrien  } while (0)
151189857Sobrien
151289857Sobrien#  define RESET_FAIL_STACK()
151389857Sobrien# endif
151489857Sobrien
151589857Sobrien
151689857Sobrien/* Double the size of FAIL_STACK, up to approximately `re_max_failures' items.
151789857Sobrien
151889857Sobrien   Return 1 if succeeds, and 0 if either ran out of memory
151989857Sobrien   allocating space for it or it was already too large.
152089857Sobrien
152189857Sobrien   REGEX_REALLOCATE_STACK requires `destination' be declared.   */
152289857Sobrien
152389857Sobrien# define DOUBLE_FAIL_STACK(fail_stack)					\
152489857Sobrien  ((fail_stack).size > (unsigned) (re_max_failures * MAX_FAILURE_ITEMS)	\
152589857Sobrien   ? 0									\
152689857Sobrien   : ((fail_stack).stack = (PREFIX(fail_stack_elt_t) *)			\
152789857Sobrien        REGEX_REALLOCATE_STACK ((fail_stack).stack, 			\
152889857Sobrien          (fail_stack).size * sizeof (PREFIX(fail_stack_elt_t)),	\
152989857Sobrien          ((fail_stack).size << 1) * sizeof (PREFIX(fail_stack_elt_t))),\
153089857Sobrien									\
153189857Sobrien      (fail_stack).stack == NULL					\
153289857Sobrien      ? 0								\
153389857Sobrien      : ((fail_stack).size <<= 1, 					\
153489857Sobrien         1)))
153589857Sobrien
153689857Sobrien
153789857Sobrien/* Push pointer POINTER on FAIL_STACK.
153889857Sobrien   Return 1 if was able to do so and 0 if ran out of memory allocating
153989857Sobrien   space to do so.  */
154089857Sobrien# define PUSH_PATTERN_OP(POINTER, FAIL_STACK)				\
154189857Sobrien  ((FAIL_STACK_FULL ()							\
154289857Sobrien    && !DOUBLE_FAIL_STACK (FAIL_STACK))					\
154389857Sobrien   ? 0									\
154489857Sobrien   : ((FAIL_STACK).stack[(FAIL_STACK).avail++].pointer = POINTER,	\
154589857Sobrien      1))
154689857Sobrien
154789857Sobrien/* Push a pointer value onto the failure stack.
154889857Sobrien   Assumes the variable `fail_stack'.  Probably should only
154989857Sobrien   be called from within `PUSH_FAILURE_POINT'.  */
155089857Sobrien# define PUSH_FAILURE_POINTER(item)					\
155189857Sobrien  fail_stack.stack[fail_stack.avail++].pointer = (UCHAR_T *) (item)
155289857Sobrien
155389857Sobrien/* This pushes an integer-valued item onto the failure stack.
155489857Sobrien   Assumes the variable `fail_stack'.  Probably should only
155589857Sobrien   be called from within `PUSH_FAILURE_POINT'.  */
155689857Sobrien# define PUSH_FAILURE_INT(item)					\
155789857Sobrien  fail_stack.stack[fail_stack.avail++].integer = (item)
155889857Sobrien
155989857Sobrien/* Push a fail_stack_elt_t value onto the failure stack.
156089857Sobrien   Assumes the variable `fail_stack'.  Probably should only
156189857Sobrien   be called from within `PUSH_FAILURE_POINT'.  */
156289857Sobrien# define PUSH_FAILURE_ELT(item)					\
156389857Sobrien  fail_stack.stack[fail_stack.avail++] =  (item)
156489857Sobrien
156589857Sobrien/* These three POP... operations complement the three PUSH... operations.
156689857Sobrien   All assume that `fail_stack' is nonempty.  */
156789857Sobrien# define POP_FAILURE_POINTER() fail_stack.stack[--fail_stack.avail].pointer
156889857Sobrien# define POP_FAILURE_INT() fail_stack.stack[--fail_stack.avail].integer
156989857Sobrien# define POP_FAILURE_ELT() fail_stack.stack[--fail_stack.avail]
157089857Sobrien
157189857Sobrien/* Used to omit pushing failure point id's when we're not debugging.  */
157289857Sobrien# ifdef DEBUG
157389857Sobrien#  define DEBUG_PUSH PUSH_FAILURE_INT
157489857Sobrien#  define DEBUG_POP(item_addr) *(item_addr) = POP_FAILURE_INT ()
157589857Sobrien# else
157689857Sobrien#  define DEBUG_PUSH(item)
157789857Sobrien#  define DEBUG_POP(item_addr)
157889857Sobrien# endif
157989857Sobrien
158089857Sobrien
158189857Sobrien/* Push the information about the state we will need
158289857Sobrien   if we ever fail back to it.
158389857Sobrien
158489857Sobrien   Requires variables fail_stack, regstart, regend, reg_info, and
158589857Sobrien   num_regs_pushed be declared.  DOUBLE_FAIL_STACK requires `destination'
158689857Sobrien   be declared.
158789857Sobrien
158889857Sobrien   Does `return FAILURE_CODE' if runs out of memory.  */
158989857Sobrien
159089857Sobrien# define PUSH_FAILURE_POINT(pattern_place, string_place, failure_code)	\
159189857Sobrien  do {									\
159289857Sobrien    char *destination;							\
159389857Sobrien    /* Must be int, so when we don't save any registers, the arithmetic	\
159489857Sobrien       of 0 + -1 isn't done as unsigned.  */				\
159589857Sobrien    /* Can't be int, since there is not a shred of a guarantee that int	\
159689857Sobrien       is wide enough to hold a value of something to which pointer can	\
159789857Sobrien       be assigned */							\
159889857Sobrien    active_reg_t this_reg;						\
159989857Sobrien    									\
160089857Sobrien    DEBUG_STATEMENT (failure_id++);					\
160189857Sobrien    DEBUG_STATEMENT (nfailure_points_pushed++);				\
160289857Sobrien    DEBUG_PRINT2 ("\nPUSH_FAILURE_POINT #%u:\n", failure_id);		\
160389857Sobrien    DEBUG_PRINT2 ("  Before push, next avail: %d\n", (fail_stack).avail);\
160489857Sobrien    DEBUG_PRINT2 ("                     size: %d\n", (fail_stack).size);\
160589857Sobrien									\
160689857Sobrien    DEBUG_PRINT2 ("  slots needed: %ld\n", NUM_FAILURE_ITEMS);		\
160789857Sobrien    DEBUG_PRINT2 ("     available: %d\n", REMAINING_AVAIL_SLOTS);	\
160889857Sobrien									\
160989857Sobrien    /* Ensure we have enough space allocated for what we will push.  */	\
161089857Sobrien    while (REMAINING_AVAIL_SLOTS < NUM_FAILURE_ITEMS)			\
161189857Sobrien      {									\
161289857Sobrien        if (!DOUBLE_FAIL_STACK (fail_stack))				\
161389857Sobrien          return failure_code;						\
161489857Sobrien									\
161589857Sobrien        DEBUG_PRINT2 ("\n  Doubled stack; size now: %d\n",		\
161689857Sobrien		       (fail_stack).size);				\
161789857Sobrien        DEBUG_PRINT2 ("  slots available: %d\n", REMAINING_AVAIL_SLOTS);\
161889857Sobrien      }									\
161989857Sobrien									\
162089857Sobrien    /* Push the info, starting with the registers.  */			\
162189857Sobrien    DEBUG_PRINT1 ("\n");						\
162289857Sobrien									\
162389857Sobrien    if (1)								\
162489857Sobrien      for (this_reg = lowest_active_reg; this_reg <= highest_active_reg; \
162589857Sobrien	   this_reg++)							\
162689857Sobrien	{								\
162789857Sobrien	  DEBUG_PRINT2 ("  Pushing reg: %lu\n", this_reg);		\
162889857Sobrien	  DEBUG_STATEMENT (num_regs_pushed++);				\
162989857Sobrien									\
163089857Sobrien	  DEBUG_PRINT2 ("    start: %p\n", regstart[this_reg]);		\
163189857Sobrien	  PUSH_FAILURE_POINTER (regstart[this_reg]);			\
163289857Sobrien									\
163389857Sobrien	  DEBUG_PRINT2 ("    end: %p\n", regend[this_reg]);		\
163489857Sobrien	  PUSH_FAILURE_POINTER (regend[this_reg]);			\
163589857Sobrien									\
163689857Sobrien	  DEBUG_PRINT2 ("    info: %p\n      ",				\
163789857Sobrien			reg_info[this_reg].word.pointer);		\
163889857Sobrien	  DEBUG_PRINT2 (" match_null=%d",				\
163989857Sobrien			REG_MATCH_NULL_STRING_P (reg_info[this_reg]));	\
164089857Sobrien	  DEBUG_PRINT2 (" active=%d", IS_ACTIVE (reg_info[this_reg]));	\
164189857Sobrien	  DEBUG_PRINT2 (" matched_something=%d",			\
164289857Sobrien			MATCHED_SOMETHING (reg_info[this_reg]));	\
164389857Sobrien	  DEBUG_PRINT2 (" ever_matched=%d",				\
164489857Sobrien			EVER_MATCHED_SOMETHING (reg_info[this_reg]));	\
164589857Sobrien	  DEBUG_PRINT1 ("\n");						\
164689857Sobrien	  PUSH_FAILURE_ELT (reg_info[this_reg].word);			\
164789857Sobrien	}								\
164889857Sobrien									\
164989857Sobrien    DEBUG_PRINT2 ("  Pushing  low active reg: %ld\n", lowest_active_reg);\
165089857Sobrien    PUSH_FAILURE_INT (lowest_active_reg);				\
165189857Sobrien									\
165289857Sobrien    DEBUG_PRINT2 ("  Pushing high active reg: %ld\n", highest_active_reg);\
165389857Sobrien    PUSH_FAILURE_INT (highest_active_reg);				\
165489857Sobrien									\
165589857Sobrien    DEBUG_PRINT2 ("  Pushing pattern %p:\n", pattern_place);		\
165689857Sobrien    DEBUG_PRINT_COMPILED_PATTERN (bufp, pattern_place, pend);		\
165789857Sobrien    PUSH_FAILURE_POINTER (pattern_place);				\
165889857Sobrien									\
165989857Sobrien    DEBUG_PRINT2 ("  Pushing string %p: `", string_place);		\
166089857Sobrien    DEBUG_PRINT_DOUBLE_STRING (string_place, string1, size1, string2,   \
166189857Sobrien				 size2);				\
166289857Sobrien    DEBUG_PRINT1 ("'\n");						\
166389857Sobrien    PUSH_FAILURE_POINTER (string_place);				\
166489857Sobrien									\
166589857Sobrien    DEBUG_PRINT2 ("  Pushing failure id: %u\n", failure_id);		\
166689857Sobrien    DEBUG_PUSH (failure_id);						\
166789857Sobrien  } while (0)
166889857Sobrien
166989857Sobrien# ifndef DEFINED_ONCE
167089857Sobrien/* This is the number of items that are pushed and popped on the stack
167189857Sobrien   for each register.  */
167289857Sobrien#  define NUM_REG_ITEMS  3
167389857Sobrien
167489857Sobrien/* Individual items aside from the registers.  */
167589857Sobrien#  ifdef DEBUG
167689857Sobrien#   define NUM_NONREG_ITEMS 5 /* Includes failure point id.  */
167789857Sobrien#  else
167889857Sobrien#   define NUM_NONREG_ITEMS 4
167989857Sobrien#  endif
168089857Sobrien
168189857Sobrien/* We push at most this many items on the stack.  */
168289857Sobrien/* We used to use (num_regs - 1), which is the number of registers
168389857Sobrien   this regexp will save; but that was changed to 5
168489857Sobrien   to avoid stack overflow for a regexp with lots of parens.  */
168589857Sobrien#  define MAX_FAILURE_ITEMS (5 * NUM_REG_ITEMS + NUM_NONREG_ITEMS)
168689857Sobrien
168789857Sobrien/* We actually push this many items.  */
168889857Sobrien#  define NUM_FAILURE_ITEMS				\
168989857Sobrien  (((0							\
169089857Sobrien     ? 0 : highest_active_reg - lowest_active_reg + 1)	\
169189857Sobrien    * NUM_REG_ITEMS)					\
169289857Sobrien   + NUM_NONREG_ITEMS)
169389857Sobrien
169489857Sobrien/* How many items can still be added to the stack without overflowing it.  */
169589857Sobrien#  define REMAINING_AVAIL_SLOTS ((fail_stack).size - (fail_stack).avail)
169689857Sobrien# endif /* not DEFINED_ONCE */
169789857Sobrien
169889857Sobrien
169989857Sobrien/* Pops what PUSH_FAIL_STACK pushes.
170089857Sobrien
170189857Sobrien   We restore into the parameters, all of which should be lvalues:
170289857Sobrien     STR -- the saved data position.
170389857Sobrien     PAT -- the saved pattern position.
170489857Sobrien     LOW_REG, HIGH_REG -- the highest and lowest active registers.
170589857Sobrien     REGSTART, REGEND -- arrays of string positions.
170689857Sobrien     REG_INFO -- array of information about each subexpression.
170789857Sobrien
170889857Sobrien   Also assumes the variables `fail_stack' and (if debugging), `bufp',
170989857Sobrien   `pend', `string1', `size1', `string2', and `size2'.  */
171089857Sobrien# define POP_FAILURE_POINT(str, pat, low_reg, high_reg, regstart, regend, reg_info)\
171189857Sobrien{									\
171289857Sobrien  DEBUG_STATEMENT (unsigned failure_id;)				\
171389857Sobrien  active_reg_t this_reg;						\
171489857Sobrien  const UCHAR_T *string_temp;						\
171589857Sobrien									\
171689857Sobrien  assert (!FAIL_STACK_EMPTY ());					\
171789857Sobrien									\
171889857Sobrien  /* Remove failure points and point to how many regs pushed.  */	\
171989857Sobrien  DEBUG_PRINT1 ("POP_FAILURE_POINT:\n");				\
172089857Sobrien  DEBUG_PRINT2 ("  Before pop, next avail: %d\n", fail_stack.avail);	\
172189857Sobrien  DEBUG_PRINT2 ("                    size: %d\n", fail_stack.size);	\
172289857Sobrien									\
172389857Sobrien  assert (fail_stack.avail >= NUM_NONREG_ITEMS);			\
172489857Sobrien									\
172589857Sobrien  DEBUG_POP (&failure_id);						\
172689857Sobrien  DEBUG_PRINT2 ("  Popping failure id: %u\n", failure_id);		\
172789857Sobrien									\
172889857Sobrien  /* If the saved string location is NULL, it came from an		\
172989857Sobrien     on_failure_keep_string_jump opcode, and we want to throw away the	\
173089857Sobrien     saved NULL, thus retaining our current position in the string.  */	\
173189857Sobrien  string_temp = POP_FAILURE_POINTER ();					\
173289857Sobrien  if (string_temp != NULL)						\
173389857Sobrien    str = (const CHAR_T *) string_temp;					\
173489857Sobrien									\
173589857Sobrien  DEBUG_PRINT2 ("  Popping string %p: `", str);				\
173689857Sobrien  DEBUG_PRINT_DOUBLE_STRING (str, string1, size1, string2, size2);	\
173789857Sobrien  DEBUG_PRINT1 ("'\n");							\
173889857Sobrien									\
173989857Sobrien  pat = (UCHAR_T *) POP_FAILURE_POINTER ();				\
174089857Sobrien  DEBUG_PRINT2 ("  Popping pattern %p:\n", pat);			\
174189857Sobrien  DEBUG_PRINT_COMPILED_PATTERN (bufp, pat, pend);			\
174289857Sobrien									\
174389857Sobrien  /* Restore register info.  */						\
174489857Sobrien  high_reg = (active_reg_t) POP_FAILURE_INT ();				\
174589857Sobrien  DEBUG_PRINT2 ("  Popping high active reg: %ld\n", high_reg);		\
174689857Sobrien									\
174789857Sobrien  low_reg = (active_reg_t) POP_FAILURE_INT ();				\
174889857Sobrien  DEBUG_PRINT2 ("  Popping  low active reg: %ld\n", low_reg);		\
174989857Sobrien									\
175089857Sobrien  if (1)								\
175189857Sobrien    for (this_reg = high_reg; this_reg >= low_reg; this_reg--)		\
175289857Sobrien      {									\
175389857Sobrien	DEBUG_PRINT2 ("    Popping reg: %ld\n", this_reg);		\
175489857Sobrien									\
175589857Sobrien	reg_info[this_reg].word = POP_FAILURE_ELT ();			\
175689857Sobrien	DEBUG_PRINT2 ("      info: %p\n",				\
175789857Sobrien		      reg_info[this_reg].word.pointer);			\
175889857Sobrien									\
175989857Sobrien	regend[this_reg] = (const CHAR_T *) POP_FAILURE_POINTER ();	\
176089857Sobrien	DEBUG_PRINT2 ("      end: %p\n", regend[this_reg]);		\
176189857Sobrien									\
176289857Sobrien	regstart[this_reg] = (const CHAR_T *) POP_FAILURE_POINTER ();	\
176389857Sobrien	DEBUG_PRINT2 ("      start: %p\n", regstart[this_reg]);		\
176489857Sobrien      }									\
176589857Sobrien  else									\
176689857Sobrien    {									\
176789857Sobrien      for (this_reg = highest_active_reg; this_reg > high_reg; this_reg--) \
176889857Sobrien	{								\
176989857Sobrien	  reg_info[this_reg].word.integer = 0;				\
177089857Sobrien	  regend[this_reg] = 0;						\
177189857Sobrien	  regstart[this_reg] = 0;					\
177289857Sobrien	}								\
177389857Sobrien      highest_active_reg = high_reg;					\
177489857Sobrien    }									\
177589857Sobrien									\
177689857Sobrien  set_regs_matched_done = 0;						\
177789857Sobrien  DEBUG_STATEMENT (nfailure_points_popped++);				\
177889857Sobrien} /* POP_FAILURE_POINT */
177989857Sobrien
178089857Sobrien/* Structure for per-register (a.k.a. per-group) information.
178189857Sobrien   Other register information, such as the
178289857Sobrien   starting and ending positions (which are addresses), and the list of
178389857Sobrien   inner groups (which is a bits list) are maintained in separate
178489857Sobrien   variables.
178589857Sobrien
178689857Sobrien   We are making a (strictly speaking) nonportable assumption here: that
178789857Sobrien   the compiler will pack our bit fields into something that fits into
178889857Sobrien   the type of `word', i.e., is something that fits into one item on the
178989857Sobrien   failure stack.  */
179089857Sobrien
179189857Sobrien
179289857Sobrien/* Declarations and macros for re_match_2.  */
179389857Sobrien
179489857Sobrientypedef union
179589857Sobrien{
179689857Sobrien  PREFIX(fail_stack_elt_t) word;
179789857Sobrien  struct
179889857Sobrien  {
179989857Sobrien      /* This field is one if this group can match the empty string,
180089857Sobrien         zero if not.  If not yet determined,  `MATCH_NULL_UNSET_VALUE'.  */
180189857Sobrien# define MATCH_NULL_UNSET_VALUE 3
180289857Sobrien    unsigned match_null_string_p : 2;
180389857Sobrien    unsigned is_active : 1;
180489857Sobrien    unsigned matched_something : 1;
180589857Sobrien    unsigned ever_matched_something : 1;
180689857Sobrien  } bits;
180789857Sobrien} PREFIX(register_info_type);
180889857Sobrien
180989857Sobrien# ifndef DEFINED_ONCE
181089857Sobrien#  define REG_MATCH_NULL_STRING_P(R)  ((R).bits.match_null_string_p)
181189857Sobrien#  define IS_ACTIVE(R)  ((R).bits.is_active)
181289857Sobrien#  define MATCHED_SOMETHING(R)  ((R).bits.matched_something)
181389857Sobrien#  define EVER_MATCHED_SOMETHING(R)  ((R).bits.ever_matched_something)
181489857Sobrien
181589857Sobrien
181689857Sobrien/* Call this when have matched a real character; it sets `matched' flags
181789857Sobrien   for the subexpressions which we are currently inside.  Also records
181889857Sobrien   that those subexprs have matched.  */
181989857Sobrien#  define SET_REGS_MATCHED()						\
182089857Sobrien  do									\
182189857Sobrien    {									\
182289857Sobrien      if (!set_regs_matched_done)					\
182389857Sobrien	{								\
182489857Sobrien	  active_reg_t r;						\
182589857Sobrien	  set_regs_matched_done = 1;					\
182689857Sobrien	  for (r = lowest_active_reg; r <= highest_active_reg; r++)	\
182789857Sobrien	    {								\
182889857Sobrien	      MATCHED_SOMETHING (reg_info[r])				\
182989857Sobrien		= EVER_MATCHED_SOMETHING (reg_info[r])			\
183089857Sobrien		= 1;							\
183189857Sobrien	    }								\
183289857Sobrien	}								\
183389857Sobrien    }									\
183489857Sobrien  while (0)
183589857Sobrien# endif /* not DEFINED_ONCE */
183689857Sobrien
183789857Sobrien/* Registers are set to a sentinel when they haven't yet matched.  */
183889857Sobrienstatic CHAR_T PREFIX(reg_unset_dummy);
183989857Sobrien# define REG_UNSET_VALUE (&PREFIX(reg_unset_dummy))
184089857Sobrien# define REG_UNSET(e) ((e) == REG_UNSET_VALUE)
184189857Sobrien
184289857Sobrien/* Subroutine declarations and macros for regex_compile.  */
1843218822Sdimstatic void PREFIX(store_op1) (re_opcode_t op, UCHAR_T *loc, int arg);
1844218822Sdimstatic void PREFIX(store_op2) (re_opcode_t op, UCHAR_T *loc,
1845218822Sdim                               int arg1, int arg2);
1846218822Sdimstatic void PREFIX(insert_op1) (re_opcode_t op, UCHAR_T *loc,
1847218822Sdim                                int arg, UCHAR_T *end);
1848218822Sdimstatic void PREFIX(insert_op2) (re_opcode_t op, UCHAR_T *loc,
1849218822Sdim                                int arg1, int arg2, UCHAR_T *end);
1850218822Sdimstatic boolean PREFIX(at_begline_loc_p) (const CHAR_T *pattern,
1851218822Sdim                                         const CHAR_T *p,
1852218822Sdim                                         reg_syntax_t syntax);
1853218822Sdimstatic boolean PREFIX(at_endline_loc_p) (const CHAR_T *p,
1854218822Sdim                                         const CHAR_T *pend,
1855218822Sdim                                         reg_syntax_t syntax);
185689857Sobrien# ifdef WCHAR
1857218822Sdimstatic reg_errcode_t wcs_compile_range (CHAR_T range_start,
1858218822Sdim                                        const CHAR_T **p_ptr,
1859218822Sdim                                        const CHAR_T *pend,
1860218822Sdim                                        char *translate,
1861218822Sdim                                        reg_syntax_t syntax,
1862218822Sdim                                        UCHAR_T *b,
1863218822Sdim                                        CHAR_T *char_set);
1864218822Sdimstatic void insert_space (int num, CHAR_T *loc, CHAR_T *end);
186589857Sobrien# else /* BYTE */
1866218822Sdimstatic reg_errcode_t byte_compile_range (unsigned int range_start,
1867218822Sdim                                         const char **p_ptr,
1868218822Sdim                                         const char *pend,
1869218822Sdim                                         char *translate,
1870218822Sdim                                         reg_syntax_t syntax,
1871218822Sdim                                         unsigned char *b);
187289857Sobrien# endif /* WCHAR */
187389857Sobrien
187489857Sobrien/* Fetch the next character in the uncompiled pattern---translating it
187589857Sobrien   if necessary.  Also cast from a signed character in the constant
187689857Sobrien   string passed to us by the user to an unsigned char that we can use
187789857Sobrien   as an array index (in, e.g., `translate').  */
187889857Sobrien/* ifdef MBS_SUPPORT, we translate only if character <= 0xff,
187989857Sobrien   because it is impossible to allocate 4GB array for some encodings
188089857Sobrien   which have 4 byte character_set like UCS4.  */
188189857Sobrien# ifndef PATFETCH
188289857Sobrien#  ifdef WCHAR
188389857Sobrien#   define PATFETCH(c)							\
188489857Sobrien  do {if (p == pend) return REG_EEND;					\
188589857Sobrien    c = (UCHAR_T) *p++;							\
188689857Sobrien    if (translate && (c <= 0xff)) c = (UCHAR_T) translate[c];		\
188789857Sobrien  } while (0)
188889857Sobrien#  else /* BYTE */
188989857Sobrien#   define PATFETCH(c)							\
189089857Sobrien  do {if (p == pend) return REG_EEND;					\
189189857Sobrien    c = (unsigned char) *p++;						\
189289857Sobrien    if (translate) c = (unsigned char) translate[c];			\
189389857Sobrien  } while (0)
189489857Sobrien#  endif /* WCHAR */
189589857Sobrien# endif
189689857Sobrien
189789857Sobrien/* Fetch the next character in the uncompiled pattern, with no
189889857Sobrien   translation.  */
189989857Sobrien# define PATFETCH_RAW(c)						\
190089857Sobrien  do {if (p == pend) return REG_EEND;					\
190189857Sobrien    c = (UCHAR_T) *p++; 	       					\
190289857Sobrien  } while (0)
190389857Sobrien
190489857Sobrien/* Go backwards one character in the pattern.  */
190589857Sobrien# define PATUNFETCH p--
190689857Sobrien
190789857Sobrien
190889857Sobrien/* If `translate' is non-null, return translate[D], else just D.  We
190989857Sobrien   cast the subscript to translate because some data is declared as
191089857Sobrien   `char *', to avoid warnings when a string constant is passed.  But
191189857Sobrien   when we use a character as a subscript we must make it unsigned.  */
191289857Sobrien/* ifdef MBS_SUPPORT, we translate only if character <= 0xff,
191389857Sobrien   because it is impossible to allocate 4GB array for some encodings
191489857Sobrien   which have 4 byte character_set like UCS4.  */
191589857Sobrien
191689857Sobrien# ifndef TRANSLATE
191789857Sobrien#  ifdef WCHAR
191889857Sobrien#   define TRANSLATE(d) \
191989857Sobrien  ((translate && ((UCHAR_T) (d)) <= 0xff) \
192089857Sobrien   ? (char) translate[(unsigned char) (d)] : (d))
192189857Sobrien# else /* BYTE */
192289857Sobrien#   define TRANSLATE(d) \
1923218822Sdim  (translate ? (char) translate[(unsigned char) (d)] : (char) (d))
192489857Sobrien#  endif /* WCHAR */
192589857Sobrien# endif
192689857Sobrien
192789857Sobrien
192889857Sobrien/* Macros for outputting the compiled pattern into `buffer'.  */
192989857Sobrien
193089857Sobrien/* If the buffer isn't allocated when it comes in, use this.  */
193189857Sobrien# define INIT_BUF_SIZE  (32 * sizeof(UCHAR_T))
193289857Sobrien
193389857Sobrien/* Make sure we have at least N more bytes of space in buffer.  */
193489857Sobrien# ifdef WCHAR
193589857Sobrien#  define GET_BUFFER_SPACE(n)						\
193689857Sobrien    while (((unsigned long)b - (unsigned long)COMPILED_BUFFER_VAR	\
193789857Sobrien            + (n)*sizeof(CHAR_T)) > bufp->allocated)			\
193889857Sobrien      EXTEND_BUFFER ()
193989857Sobrien# else /* BYTE */
194089857Sobrien#  define GET_BUFFER_SPACE(n)						\
194189857Sobrien    while ((unsigned long) (b - bufp->buffer + (n)) > bufp->allocated)	\
194289857Sobrien      EXTEND_BUFFER ()
194389857Sobrien# endif /* WCHAR */
194489857Sobrien
194589857Sobrien/* Make sure we have one more byte of buffer space and then add C to it.  */
194689857Sobrien# define BUF_PUSH(c)							\
194789857Sobrien  do {									\
194889857Sobrien    GET_BUFFER_SPACE (1);						\
194989857Sobrien    *b++ = (UCHAR_T) (c);						\
195089857Sobrien  } while (0)
195189857Sobrien
195289857Sobrien
195389857Sobrien/* Ensure we have two more bytes of buffer space and then append C1 and C2.  */
195489857Sobrien# define BUF_PUSH_2(c1, c2)						\
195589857Sobrien  do {									\
195689857Sobrien    GET_BUFFER_SPACE (2);						\
195789857Sobrien    *b++ = (UCHAR_T) (c1);						\
195889857Sobrien    *b++ = (UCHAR_T) (c2);						\
195989857Sobrien  } while (0)
196089857Sobrien
196189857Sobrien
196289857Sobrien/* As with BUF_PUSH_2, except for three bytes.  */
196389857Sobrien# define BUF_PUSH_3(c1, c2, c3)						\
196489857Sobrien  do {									\
196589857Sobrien    GET_BUFFER_SPACE (3);						\
196689857Sobrien    *b++ = (UCHAR_T) (c1);						\
196789857Sobrien    *b++ = (UCHAR_T) (c2);						\
196889857Sobrien    *b++ = (UCHAR_T) (c3);						\
196989857Sobrien  } while (0)
197089857Sobrien
197189857Sobrien/* Store a jump with opcode OP at LOC to location TO.  We store a
197289857Sobrien   relative address offset by the three bytes the jump itself occupies.  */
197389857Sobrien# define STORE_JUMP(op, loc, to) \
197489857Sobrien PREFIX(store_op1) (op, loc, (int) ((to) - (loc) - (1 + OFFSET_ADDRESS_SIZE)))
197589857Sobrien
197689857Sobrien/* Likewise, for a two-argument jump.  */
197789857Sobrien# define STORE_JUMP2(op, loc, to, arg) \
197889857Sobrien  PREFIX(store_op2) (op, loc, (int) ((to) - (loc) - (1 + OFFSET_ADDRESS_SIZE)), arg)
197989857Sobrien
198089857Sobrien/* Like `STORE_JUMP', but for inserting.  Assume `b' is the buffer end.  */
198189857Sobrien# define INSERT_JUMP(op, loc, to) \
198289857Sobrien  PREFIX(insert_op1) (op, loc, (int) ((to) - (loc) - (1 + OFFSET_ADDRESS_SIZE)), b)
198389857Sobrien
198489857Sobrien/* Like `STORE_JUMP2', but for inserting.  Assume `b' is the buffer end.  */
198589857Sobrien# define INSERT_JUMP2(op, loc, to, arg) \
198689857Sobrien  PREFIX(insert_op2) (op, loc, (int) ((to) - (loc) - (1 + OFFSET_ADDRESS_SIZE)),\
198789857Sobrien	      arg, b)
198889857Sobrien
198989857Sobrien/* This is not an arbitrary limit: the arguments which represent offsets
199089857Sobrien   into the pattern are two bytes long.  So if 2^16 bytes turns out to
199189857Sobrien   be too small, many things would have to change.  */
199289857Sobrien/* Any other compiler which, like MSC, has allocation limit below 2^16
199389857Sobrien   bytes will have to use approach similar to what was done below for
199489857Sobrien   MSC and drop MAX_BUF_SIZE a bit.  Otherwise you may end up
199589857Sobrien   reallocating to 0 bytes.  Such thing is not going to work too well.
199689857Sobrien   You have been warned!!  */
199789857Sobrien# ifndef DEFINED_ONCE
199889857Sobrien#  if defined _MSC_VER  && !defined WIN32
199989857Sobrien/* Microsoft C 16-bit versions limit malloc to approx 65512 bytes.
200089857Sobrien   The REALLOC define eliminates a flurry of conversion warnings,
200189857Sobrien   but is not required. */
200289857Sobrien#   define MAX_BUF_SIZE  65500L
200389857Sobrien#   define REALLOC(p,s) realloc ((p), (size_t) (s))
200489857Sobrien#  else
200589857Sobrien#   define MAX_BUF_SIZE (1L << 16)
200689857Sobrien#   define REALLOC(p,s) realloc ((p), (s))
200789857Sobrien#  endif
200889857Sobrien
200989857Sobrien/* Extend the buffer by twice its current size via realloc and
201089857Sobrien   reset the pointers that pointed into the old block to point to the
201189857Sobrien   correct places in the new one.  If extending the buffer results in it
201289857Sobrien   being larger than MAX_BUF_SIZE, then flag memory exhausted.  */
201389857Sobrien#  if __BOUNDED_POINTERS__
201489857Sobrien#   define SET_HIGH_BOUND(P) (__ptrhigh (P) = __ptrlow (P) + bufp->allocated)
201589857Sobrien#   define MOVE_BUFFER_POINTER(P) \
201689857Sobrien  (__ptrlow (P) += incr, SET_HIGH_BOUND (P), __ptrvalue (P) += incr)
201789857Sobrien#   define ELSE_EXTEND_BUFFER_HIGH_BOUND	\
201889857Sobrien  else						\
201989857Sobrien    {						\
202089857Sobrien      SET_HIGH_BOUND (b);			\
202189857Sobrien      SET_HIGH_BOUND (begalt);			\
202289857Sobrien      if (fixup_alt_jump)			\
202389857Sobrien	SET_HIGH_BOUND (fixup_alt_jump);	\
202489857Sobrien      if (laststart)				\
202589857Sobrien	SET_HIGH_BOUND (laststart);		\
202689857Sobrien      if (pending_exact)			\
202789857Sobrien	SET_HIGH_BOUND (pending_exact);		\
202889857Sobrien    }
202989857Sobrien#  else
203089857Sobrien#   define MOVE_BUFFER_POINTER(P) (P) += incr
203189857Sobrien#   define ELSE_EXTEND_BUFFER_HIGH_BOUND
203289857Sobrien#  endif
203389857Sobrien# endif /* not DEFINED_ONCE */
203489857Sobrien
203589857Sobrien# ifdef WCHAR
203689857Sobrien#  define EXTEND_BUFFER()						\
203789857Sobrien  do {									\
203889857Sobrien    UCHAR_T *old_buffer = COMPILED_BUFFER_VAR;				\
203989857Sobrien    int wchar_count;							\
204089857Sobrien    if (bufp->allocated + sizeof(UCHAR_T) > MAX_BUF_SIZE)		\
204189857Sobrien      return REG_ESIZE;							\
204289857Sobrien    bufp->allocated <<= 1;						\
204389857Sobrien    if (bufp->allocated > MAX_BUF_SIZE)					\
204489857Sobrien      bufp->allocated = MAX_BUF_SIZE;					\
204589857Sobrien    /* How many characters the new buffer can have?  */			\
204689857Sobrien    wchar_count = bufp->allocated / sizeof(UCHAR_T);			\
204789857Sobrien    if (wchar_count == 0) wchar_count = 1;				\
204889857Sobrien    /* Truncate the buffer to CHAR_T align.  */			\
204989857Sobrien    bufp->allocated = wchar_count * sizeof(UCHAR_T);			\
205089857Sobrien    RETALLOC (COMPILED_BUFFER_VAR, wchar_count, UCHAR_T);		\
205189857Sobrien    bufp->buffer = (char*)COMPILED_BUFFER_VAR;				\
205289857Sobrien    if (COMPILED_BUFFER_VAR == NULL)					\
205389857Sobrien      return REG_ESPACE;						\
205489857Sobrien    /* If the buffer moved, move all the pointers into it.  */		\
205589857Sobrien    if (old_buffer != COMPILED_BUFFER_VAR)				\
205689857Sobrien      {									\
205789857Sobrien	int incr = COMPILED_BUFFER_VAR - old_buffer;			\
205889857Sobrien	MOVE_BUFFER_POINTER (b);					\
205989857Sobrien	MOVE_BUFFER_POINTER (begalt);					\
206089857Sobrien	if (fixup_alt_jump)						\
206189857Sobrien	  MOVE_BUFFER_POINTER (fixup_alt_jump);				\
206289857Sobrien	if (laststart)							\
206389857Sobrien	  MOVE_BUFFER_POINTER (laststart);				\
206489857Sobrien	if (pending_exact)						\
206589857Sobrien	  MOVE_BUFFER_POINTER (pending_exact);				\
206689857Sobrien      }									\
206789857Sobrien    ELSE_EXTEND_BUFFER_HIGH_BOUND					\
206889857Sobrien  } while (0)
206989857Sobrien# else /* BYTE */
207089857Sobrien#  define EXTEND_BUFFER()						\
207189857Sobrien  do {									\
207289857Sobrien    UCHAR_T *old_buffer = COMPILED_BUFFER_VAR;				\
207389857Sobrien    if (bufp->allocated == MAX_BUF_SIZE)				\
207489857Sobrien      return REG_ESIZE;							\
207589857Sobrien    bufp->allocated <<= 1;						\
207689857Sobrien    if (bufp->allocated > MAX_BUF_SIZE)					\
207789857Sobrien      bufp->allocated = MAX_BUF_SIZE;					\
207889857Sobrien    bufp->buffer = (UCHAR_T *) REALLOC (COMPILED_BUFFER_VAR,		\
207989857Sobrien						bufp->allocated);	\
208089857Sobrien    if (COMPILED_BUFFER_VAR == NULL)					\
208189857Sobrien      return REG_ESPACE;						\
208289857Sobrien    /* If the buffer moved, move all the pointers into it.  */		\
208389857Sobrien    if (old_buffer != COMPILED_BUFFER_VAR)				\
208489857Sobrien      {									\
208589857Sobrien	int incr = COMPILED_BUFFER_VAR - old_buffer;			\
208689857Sobrien	MOVE_BUFFER_POINTER (b);					\
208789857Sobrien	MOVE_BUFFER_POINTER (begalt);					\
208889857Sobrien	if (fixup_alt_jump)						\
208989857Sobrien	  MOVE_BUFFER_POINTER (fixup_alt_jump);				\
209089857Sobrien	if (laststart)							\
209189857Sobrien	  MOVE_BUFFER_POINTER (laststart);				\
209289857Sobrien	if (pending_exact)						\
209389857Sobrien	  MOVE_BUFFER_POINTER (pending_exact);				\
209489857Sobrien      }									\
209589857Sobrien    ELSE_EXTEND_BUFFER_HIGH_BOUND					\
209689857Sobrien  } while (0)
209789857Sobrien# endif /* WCHAR */
209889857Sobrien
209989857Sobrien# ifndef DEFINED_ONCE
210089857Sobrien/* Since we have one byte reserved for the register number argument to
210189857Sobrien   {start,stop}_memory, the maximum number of groups we can report
210289857Sobrien   things about is what fits in that byte.  */
210389857Sobrien#  define MAX_REGNUM 255
210489857Sobrien
210589857Sobrien/* But patterns can have more than `MAX_REGNUM' registers.  We just
210689857Sobrien   ignore the excess.  */
210789857Sobrientypedef unsigned regnum_t;
210889857Sobrien
210989857Sobrien
211089857Sobrien/* Macros for the compile stack.  */
211189857Sobrien
211289857Sobrien/* Since offsets can go either forwards or backwards, this type needs to
211389857Sobrien   be able to hold values from -(MAX_BUF_SIZE - 1) to MAX_BUF_SIZE - 1.  */
211489857Sobrien/* int may be not enough when sizeof(int) == 2.  */
211589857Sobrientypedef long pattern_offset_t;
211689857Sobrien
211789857Sobrientypedef struct
211889857Sobrien{
211989857Sobrien  pattern_offset_t begalt_offset;
212089857Sobrien  pattern_offset_t fixup_alt_jump;
212189857Sobrien  pattern_offset_t inner_group_offset;
212289857Sobrien  pattern_offset_t laststart_offset;
212389857Sobrien  regnum_t regnum;
212489857Sobrien} compile_stack_elt_t;
212589857Sobrien
212689857Sobrien
212789857Sobrientypedef struct
212889857Sobrien{
212989857Sobrien  compile_stack_elt_t *stack;
213089857Sobrien  unsigned size;
213189857Sobrien  unsigned avail;			/* Offset of next open position.  */
213289857Sobrien} compile_stack_type;
213389857Sobrien
213489857Sobrien
213589857Sobrien#  define INIT_COMPILE_STACK_SIZE 32
213689857Sobrien
213789857Sobrien#  define COMPILE_STACK_EMPTY  (compile_stack.avail == 0)
213889857Sobrien#  define COMPILE_STACK_FULL  (compile_stack.avail == compile_stack.size)
213989857Sobrien
214089857Sobrien/* The next available element.  */
214189857Sobrien#  define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail])
214289857Sobrien
214389857Sobrien# endif /* not DEFINED_ONCE */
214489857Sobrien
214589857Sobrien/* Set the bit for character C in a list.  */
214689857Sobrien# ifndef DEFINED_ONCE
214789857Sobrien#  define SET_LIST_BIT(c)                               \
214889857Sobrien  (b[((unsigned char) (c)) / BYTEWIDTH]               \
214989857Sobrien   |= 1 << (((unsigned char) c) % BYTEWIDTH))
215089857Sobrien# endif /* DEFINED_ONCE */
215189857Sobrien
215289857Sobrien/* Get the next unsigned number in the uncompiled pattern.  */
215389857Sobrien# define GET_UNSIGNED_NUMBER(num) \
215489857Sobrien  {									\
215589857Sobrien    while (p != pend)							\
215689857Sobrien      {									\
215789857Sobrien	PATFETCH (c);							\
215889857Sobrien	if (c < '0' || c > '9')						\
215989857Sobrien	  break;							\
216089857Sobrien	if (num <= RE_DUP_MAX)						\
216189857Sobrien	  {								\
216289857Sobrien	    if (num < 0)						\
216389857Sobrien	      num = 0;							\
216489857Sobrien	    num = num * 10 + c - '0';					\
216589857Sobrien	  }								\
216689857Sobrien      }									\
216789857Sobrien  }
216889857Sobrien
216989857Sobrien# ifndef DEFINED_ONCE
217089857Sobrien#  if defined _LIBC || WIDE_CHAR_SUPPORT
217189857Sobrien/* The GNU C library provides support for user-defined character classes
217289857Sobrien   and the functions from ISO C amendement 1.  */
217389857Sobrien#   ifdef CHARCLASS_NAME_MAX
217489857Sobrien#    define CHAR_CLASS_MAX_LENGTH CHARCLASS_NAME_MAX
217589857Sobrien#   else
217689857Sobrien/* This shouldn't happen but some implementation might still have this
217789857Sobrien   problem.  Use a reasonable default value.  */
217889857Sobrien#    define CHAR_CLASS_MAX_LENGTH 256
217989857Sobrien#   endif
218089857Sobrien
218189857Sobrien#   ifdef _LIBC
218289857Sobrien#    define IS_CHAR_CLASS(string) __wctype (string)
218389857Sobrien#   else
218489857Sobrien#    define IS_CHAR_CLASS(string) wctype (string)
218589857Sobrien#   endif
218689857Sobrien#  else
218789857Sobrien#   define CHAR_CLASS_MAX_LENGTH  6 /* Namely, `xdigit'.  */
218889857Sobrien
218989857Sobrien#   define IS_CHAR_CLASS(string)					\
219089857Sobrien   (STREQ (string, "alpha") || STREQ (string, "upper")			\
219189857Sobrien    || STREQ (string, "lower") || STREQ (string, "digit")		\
219289857Sobrien    || STREQ (string, "alnum") || STREQ (string, "xdigit")		\
219389857Sobrien    || STREQ (string, "space") || STREQ (string, "print")		\
219489857Sobrien    || STREQ (string, "punct") || STREQ (string, "graph")		\
219589857Sobrien    || STREQ (string, "cntrl") || STREQ (string, "blank"))
219689857Sobrien#  endif
219789857Sobrien# endif /* DEFINED_ONCE */
219889857Sobrien
219989857Sobrien# ifndef MATCH_MAY_ALLOCATE
220089857Sobrien
220189857Sobrien/* If we cannot allocate large objects within re_match_2_internal,
220289857Sobrien   we make the fail stack and register vectors global.
220389857Sobrien   The fail stack, we grow to the maximum size when a regexp
220489857Sobrien   is compiled.
220589857Sobrien   The register vectors, we adjust in size each time we
220689857Sobrien   compile a regexp, according to the number of registers it needs.  */
220789857Sobrien
220889857Sobrienstatic PREFIX(fail_stack_type) fail_stack;
220989857Sobrien
221089857Sobrien/* Size with which the following vectors are currently allocated.
221189857Sobrien   That is so we can make them bigger as needed,
221289857Sobrien   but never make them smaller.  */
221389857Sobrien#  ifdef DEFINED_ONCE
221489857Sobrienstatic int regs_allocated_size;
221589857Sobrien
221689857Sobrienstatic const char **     regstart, **     regend;
221789857Sobrienstatic const char ** old_regstart, ** old_regend;
221889857Sobrienstatic const char **best_regstart, **best_regend;
221989857Sobrienstatic const char **reg_dummy;
222089857Sobrien#  endif /* DEFINED_ONCE */
222189857Sobrien
222289857Sobrienstatic PREFIX(register_info_type) *PREFIX(reg_info);
222389857Sobrienstatic PREFIX(register_info_type) *PREFIX(reg_info_dummy);
222489857Sobrien
222589857Sobrien/* Make the register vectors big enough for NUM_REGS registers,
222689857Sobrien   but don't make them smaller.  */
222789857Sobrien
222889857Sobrienstatic void
2229218822SdimPREFIX(regex_grow_registers) (int num_regs)
223089857Sobrien{
223189857Sobrien  if (num_regs > regs_allocated_size)
223289857Sobrien    {
223389857Sobrien      RETALLOC_IF (regstart,	 num_regs, const char *);
223489857Sobrien      RETALLOC_IF (regend,	 num_regs, const char *);
223589857Sobrien      RETALLOC_IF (old_regstart, num_regs, const char *);
223689857Sobrien      RETALLOC_IF (old_regend,	 num_regs, const char *);
223789857Sobrien      RETALLOC_IF (best_regstart, num_regs, const char *);
223889857Sobrien      RETALLOC_IF (best_regend,	 num_regs, const char *);
223989857Sobrien      RETALLOC_IF (PREFIX(reg_info), num_regs, PREFIX(register_info_type));
224089857Sobrien      RETALLOC_IF (reg_dummy,	 num_regs, const char *);
224189857Sobrien      RETALLOC_IF (PREFIX(reg_info_dummy), num_regs, PREFIX(register_info_type));
224289857Sobrien
224389857Sobrien      regs_allocated_size = num_regs;
224489857Sobrien    }
224589857Sobrien}
224689857Sobrien
224789857Sobrien# endif /* not MATCH_MAY_ALLOCATE */
224889857Sobrien
224989857Sobrien# ifndef DEFINED_ONCE
2250218822Sdimstatic boolean group_in_compile_stack (compile_stack_type compile_stack,
2251218822Sdim                                       regnum_t regnum);
225289857Sobrien# endif /* not DEFINED_ONCE */
225389857Sobrien
225489857Sobrien/* `regex_compile' compiles PATTERN (of length SIZE) according to SYNTAX.
225589857Sobrien   Returns one of error codes defined in `regex.h', or zero for success.
225689857Sobrien
225789857Sobrien   Assumes the `allocated' (and perhaps `buffer') and `translate'
225889857Sobrien   fields are set in BUFP on entry.
225989857Sobrien
226089857Sobrien   If it succeeds, results are put in BUFP (if it returns an error, the
226189857Sobrien   contents of BUFP are undefined):
226289857Sobrien     `buffer' is the compiled pattern;
226389857Sobrien     `syntax' is set to SYNTAX;
226489857Sobrien     `used' is set to the length of the compiled pattern;
226589857Sobrien     `fastmap_accurate' is zero;
226689857Sobrien     `re_nsub' is the number of subexpressions in PATTERN;
226789857Sobrien     `not_bol' and `not_eol' are zero;
226889857Sobrien
226989857Sobrien   The `fastmap' and `newline_anchor' fields are neither
227089857Sobrien   examined nor set.  */
227189857Sobrien
227289857Sobrien/* Return, freeing storage we allocated.  */
227389857Sobrien# ifdef WCHAR
227489857Sobrien#  define FREE_STACK_RETURN(value)		\
227589857Sobrien  return (free(pattern), free(mbs_offset), free(is_binary), free (compile_stack.stack), value)
227689857Sobrien# else
227789857Sobrien#  define FREE_STACK_RETURN(value)		\
227889857Sobrien  return (free (compile_stack.stack), value)
227989857Sobrien# endif /* WCHAR */
228089857Sobrien
228189857Sobrienstatic reg_errcode_t
2282218822SdimPREFIX(regex_compile) (const char *ARG_PREFIX(pattern),
2283218822Sdim                       size_t ARG_PREFIX(size), reg_syntax_t syntax,
2284218822Sdim                       struct re_pattern_buffer *bufp)
228589857Sobrien{
228689857Sobrien  /* We fetch characters from PATTERN here.  Even though PATTERN is
228789857Sobrien     `char *' (i.e., signed), we declare these variables as unsigned, so
228889857Sobrien     they can be reliably used as array indices.  */
228989857Sobrien  register UCHAR_T c, c1;
229089857Sobrien
229189857Sobrien#ifdef WCHAR
229289857Sobrien  /* A temporary space to keep wchar_t pattern and compiled pattern.  */
229389857Sobrien  CHAR_T *pattern, *COMPILED_BUFFER_VAR;
229489857Sobrien  size_t size;
229589857Sobrien  /* offset buffer for optimization. See convert_mbs_to_wc.  */
229689857Sobrien  int *mbs_offset = NULL;
229789857Sobrien  /* It hold whether each wchar_t is binary data or not.  */
229889857Sobrien  char *is_binary = NULL;
229989857Sobrien  /* A flag whether exactn is handling binary data or not.  */
230089857Sobrien  char is_exactn_bin = FALSE;
230189857Sobrien#endif /* WCHAR */
230289857Sobrien
230389857Sobrien  /* A random temporary spot in PATTERN.  */
230489857Sobrien  const CHAR_T *p1;
230589857Sobrien
230689857Sobrien  /* Points to the end of the buffer, where we should append.  */
230789857Sobrien  register UCHAR_T *b;
230889857Sobrien
230989857Sobrien  /* Keeps track of unclosed groups.  */
231089857Sobrien  compile_stack_type compile_stack;
231189857Sobrien
231289857Sobrien  /* Points to the current (ending) position in the pattern.  */
231389857Sobrien#ifdef WCHAR
231489857Sobrien  const CHAR_T *p;
231589857Sobrien  const CHAR_T *pend;
231689857Sobrien#else /* BYTE */
231789857Sobrien  const CHAR_T *p = pattern;
231889857Sobrien  const CHAR_T *pend = pattern + size;
231989857Sobrien#endif /* WCHAR */
232089857Sobrien
232189857Sobrien  /* How to translate the characters in the pattern.  */
232289857Sobrien  RE_TRANSLATE_TYPE translate = bufp->translate;
232389857Sobrien
232489857Sobrien  /* Address of the count-byte of the most recently inserted `exactn'
232589857Sobrien     command.  This makes it possible to tell if a new exact-match
232689857Sobrien     character can be added to that command or if the character requires
232789857Sobrien     a new `exactn' command.  */
232889857Sobrien  UCHAR_T *pending_exact = 0;
232989857Sobrien
233089857Sobrien  /* Address of start of the most recently finished expression.
233189857Sobrien     This tells, e.g., postfix * where to find the start of its
233289857Sobrien     operand.  Reset at the beginning of groups and alternatives.  */
233389857Sobrien  UCHAR_T *laststart = 0;
233489857Sobrien
233589857Sobrien  /* Address of beginning of regexp, or inside of last group.  */
233689857Sobrien  UCHAR_T *begalt;
233789857Sobrien
233889857Sobrien  /* Address of the place where a forward jump should go to the end of
233989857Sobrien     the containing expression.  Each alternative of an `or' -- except the
234089857Sobrien     last -- ends with a forward jump of this sort.  */
234189857Sobrien  UCHAR_T *fixup_alt_jump = 0;
234289857Sobrien
234389857Sobrien  /* Counts open-groups as they are encountered.  Remembered for the
234489857Sobrien     matching close-group on the compile stack, so the same register
234589857Sobrien     number is put in the stop_memory as the start_memory.  */
234689857Sobrien  regnum_t regnum = 0;
234789857Sobrien
234889857Sobrien#ifdef WCHAR
234989857Sobrien  /* Initialize the wchar_t PATTERN and offset_buffer.  */
235089857Sobrien  p = pend = pattern = TALLOC(csize + 1, CHAR_T);
235189857Sobrien  mbs_offset = TALLOC(csize + 1, int);
235289857Sobrien  is_binary = TALLOC(csize + 1, char);
235389857Sobrien  if (pattern == NULL || mbs_offset == NULL || is_binary == NULL)
235489857Sobrien    {
235589857Sobrien      free(pattern);
235689857Sobrien      free(mbs_offset);
235789857Sobrien      free(is_binary);
235889857Sobrien      return REG_ESPACE;
235989857Sobrien    }
236089857Sobrien  pattern[csize] = L'\0';	/* sentinel */
236189857Sobrien  size = convert_mbs_to_wcs(pattern, cpattern, csize, mbs_offset, is_binary);
236289857Sobrien  pend = p + size;
236389857Sobrien  if (size < 0)
236489857Sobrien    {
236589857Sobrien      free(pattern);
236689857Sobrien      free(mbs_offset);
236789857Sobrien      free(is_binary);
236889857Sobrien      return REG_BADPAT;
236989857Sobrien    }
237089857Sobrien#endif
237189857Sobrien
237289857Sobrien#ifdef DEBUG
237389857Sobrien  DEBUG_PRINT1 ("\nCompiling pattern: ");
237489857Sobrien  if (debug)
237589857Sobrien    {
237689857Sobrien      unsigned debug_count;
237789857Sobrien
237889857Sobrien      for (debug_count = 0; debug_count < size; debug_count++)
237989857Sobrien        PUT_CHAR (pattern[debug_count]);
238089857Sobrien      putchar ('\n');
238189857Sobrien    }
238289857Sobrien#endif /* DEBUG */
238389857Sobrien
238489857Sobrien  /* Initialize the compile stack.  */
238589857Sobrien  compile_stack.stack = TALLOC (INIT_COMPILE_STACK_SIZE, compile_stack_elt_t);
238689857Sobrien  if (compile_stack.stack == NULL)
238789857Sobrien    {
238889857Sobrien#ifdef WCHAR
238989857Sobrien      free(pattern);
239089857Sobrien      free(mbs_offset);
239189857Sobrien      free(is_binary);
239289857Sobrien#endif
239389857Sobrien      return REG_ESPACE;
239489857Sobrien    }
239589857Sobrien
239689857Sobrien  compile_stack.size = INIT_COMPILE_STACK_SIZE;
239789857Sobrien  compile_stack.avail = 0;
239889857Sobrien
239989857Sobrien  /* Initialize the pattern buffer.  */
240089857Sobrien  bufp->syntax = syntax;
240189857Sobrien  bufp->fastmap_accurate = 0;
240289857Sobrien  bufp->not_bol = bufp->not_eol = 0;
240389857Sobrien
240489857Sobrien  /* Set `used' to zero, so that if we return an error, the pattern
240589857Sobrien     printer (for debugging) will think there's no pattern.  We reset it
240689857Sobrien     at the end.  */
240789857Sobrien  bufp->used = 0;
240889857Sobrien
240989857Sobrien  /* Always count groups, whether or not bufp->no_sub is set.  */
241089857Sobrien  bufp->re_nsub = 0;
241189857Sobrien
241289857Sobrien#if !defined emacs && !defined SYNTAX_TABLE
241389857Sobrien  /* Initialize the syntax table.  */
241489857Sobrien   init_syntax_once ();
241589857Sobrien#endif
241689857Sobrien
241789857Sobrien  if (bufp->allocated == 0)
241889857Sobrien    {
241989857Sobrien      if (bufp->buffer)
242089857Sobrien	{ /* If zero allocated, but buffer is non-null, try to realloc
242189857Sobrien             enough space.  This loses if buffer's address is bogus, but
242289857Sobrien             that is the user's responsibility.  */
242389857Sobrien#ifdef WCHAR
242489857Sobrien	  /* Free bufp->buffer and allocate an array for wchar_t pattern
242589857Sobrien	     buffer.  */
242689857Sobrien          free(bufp->buffer);
242789857Sobrien          COMPILED_BUFFER_VAR = TALLOC (INIT_BUF_SIZE/sizeof(UCHAR_T),
242889857Sobrien					UCHAR_T);
242989857Sobrien#else
243089857Sobrien          RETALLOC (COMPILED_BUFFER_VAR, INIT_BUF_SIZE, UCHAR_T);
243189857Sobrien#endif /* WCHAR */
243289857Sobrien        }
243389857Sobrien      else
243489857Sobrien        { /* Caller did not allocate a buffer.  Do it for them.  */
243589857Sobrien          COMPILED_BUFFER_VAR = TALLOC (INIT_BUF_SIZE / sizeof(UCHAR_T),
243689857Sobrien					UCHAR_T);
243789857Sobrien        }
243889857Sobrien
243989857Sobrien      if (!COMPILED_BUFFER_VAR) FREE_STACK_RETURN (REG_ESPACE);
244089857Sobrien#ifdef WCHAR
244189857Sobrien      bufp->buffer = (char*)COMPILED_BUFFER_VAR;
244289857Sobrien#endif /* WCHAR */
244389857Sobrien      bufp->allocated = INIT_BUF_SIZE;
244489857Sobrien    }
244589857Sobrien#ifdef WCHAR
244689857Sobrien  else
244789857Sobrien    COMPILED_BUFFER_VAR = (UCHAR_T*) bufp->buffer;
244889857Sobrien#endif
244989857Sobrien
245089857Sobrien  begalt = b = COMPILED_BUFFER_VAR;
245189857Sobrien
245289857Sobrien  /* Loop through the uncompiled pattern until we're at the end.  */
245389857Sobrien  while (p != pend)
245489857Sobrien    {
245589857Sobrien      PATFETCH (c);
245689857Sobrien
245789857Sobrien      switch (c)
245889857Sobrien        {
245989857Sobrien        case '^':
246089857Sobrien          {
246189857Sobrien            if (   /* If at start of pattern, it's an operator.  */
246289857Sobrien                   p == pattern + 1
246389857Sobrien                   /* If context independent, it's an operator.  */
246489857Sobrien                || syntax & RE_CONTEXT_INDEP_ANCHORS
246589857Sobrien                   /* Otherwise, depends on what's come before.  */
246689857Sobrien                || PREFIX(at_begline_loc_p) (pattern, p, syntax))
246789857Sobrien              BUF_PUSH (begline);
246889857Sobrien            else
246989857Sobrien              goto normal_char;
247089857Sobrien          }
247189857Sobrien          break;
247289857Sobrien
247389857Sobrien
247489857Sobrien        case '$':
247589857Sobrien          {
247689857Sobrien            if (   /* If at end of pattern, it's an operator.  */
247789857Sobrien                   p == pend
247889857Sobrien                   /* If context independent, it's an operator.  */
247989857Sobrien                || syntax & RE_CONTEXT_INDEP_ANCHORS
248089857Sobrien                   /* Otherwise, depends on what's next.  */
248189857Sobrien                || PREFIX(at_endline_loc_p) (p, pend, syntax))
248289857Sobrien               BUF_PUSH (endline);
248389857Sobrien             else
248489857Sobrien               goto normal_char;
248589857Sobrien           }
248689857Sobrien           break;
248789857Sobrien
248889857Sobrien
248989857Sobrien	case '+':
249089857Sobrien        case '?':
249189857Sobrien          if ((syntax & RE_BK_PLUS_QM)
249289857Sobrien              || (syntax & RE_LIMITED_OPS))
249389857Sobrien            goto normal_char;
249489857Sobrien        handle_plus:
249589857Sobrien        case '*':
249689857Sobrien          /* If there is no previous pattern... */
249789857Sobrien          if (!laststart)
249889857Sobrien            {
249989857Sobrien              if (syntax & RE_CONTEXT_INVALID_OPS)
250089857Sobrien                FREE_STACK_RETURN (REG_BADRPT);
250189857Sobrien              else if (!(syntax & RE_CONTEXT_INDEP_OPS))
250289857Sobrien                goto normal_char;
250389857Sobrien            }
250489857Sobrien
250589857Sobrien          {
250689857Sobrien            /* Are we optimizing this jump?  */
250789857Sobrien            boolean keep_string_p = false;
250889857Sobrien
250989857Sobrien            /* 1 means zero (many) matches is allowed.  */
251089857Sobrien            char zero_times_ok = 0, many_times_ok = 0;
251189857Sobrien
251289857Sobrien            /* If there is a sequence of repetition chars, collapse it
251389857Sobrien               down to just one (the right one).  We can't combine
251489857Sobrien               interval operators with these because of, e.g., `a{2}*',
251589857Sobrien               which should only match an even number of `a's.  */
251689857Sobrien
251789857Sobrien            for (;;)
251889857Sobrien              {
251989857Sobrien                zero_times_ok |= c != '+';
252089857Sobrien                many_times_ok |= c != '?';
252189857Sobrien
252289857Sobrien                if (p == pend)
252389857Sobrien                  break;
252489857Sobrien
252589857Sobrien                PATFETCH (c);
252689857Sobrien
252789857Sobrien                if (c == '*'
252889857Sobrien                    || (!(syntax & RE_BK_PLUS_QM) && (c == '+' || c == '?')))
252989857Sobrien                  ;
253089857Sobrien
253189857Sobrien                else if (syntax & RE_BK_PLUS_QM  &&  c == '\\')
253289857Sobrien                  {
253389857Sobrien                    if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
253489857Sobrien
253589857Sobrien                    PATFETCH (c1);
253689857Sobrien                    if (!(c1 == '+' || c1 == '?'))
253789857Sobrien                      {
253889857Sobrien                        PATUNFETCH;
253989857Sobrien                        PATUNFETCH;
254089857Sobrien                        break;
254189857Sobrien                      }
254289857Sobrien
254389857Sobrien                    c = c1;
254489857Sobrien                  }
254589857Sobrien                else
254689857Sobrien                  {
254789857Sobrien                    PATUNFETCH;
254889857Sobrien                    break;
254989857Sobrien                  }
255089857Sobrien
255189857Sobrien                /* If we get here, we found another repeat character.  */
255289857Sobrien               }
255389857Sobrien
255489857Sobrien            /* Star, etc. applied to an empty pattern is equivalent
255589857Sobrien               to an empty pattern.  */
255689857Sobrien            if (!laststart)
255789857Sobrien              break;
255889857Sobrien
255989857Sobrien            /* Now we know whether or not zero matches is allowed
256089857Sobrien               and also whether or not two or more matches is allowed.  */
256189857Sobrien            if (many_times_ok)
256289857Sobrien              { /* More than one repetition is allowed, so put in at the
256389857Sobrien                   end a backward relative jump from `b' to before the next
256489857Sobrien                   jump we're going to put in below (which jumps from
256589857Sobrien                   laststart to after this jump).
256689857Sobrien
256789857Sobrien                   But if we are at the `*' in the exact sequence `.*\n',
256889857Sobrien                   insert an unconditional jump backwards to the .,
256989857Sobrien                   instead of the beginning of the loop.  This way we only
257089857Sobrien                   push a failure point once, instead of every time
257189857Sobrien                   through the loop.  */
257289857Sobrien                assert (p - 1 > pattern);
257389857Sobrien
257489857Sobrien                /* Allocate the space for the jump.  */
257589857Sobrien                GET_BUFFER_SPACE (1 + OFFSET_ADDRESS_SIZE);
257689857Sobrien
257789857Sobrien                /* We know we are not at the first character of the pattern,
257889857Sobrien                   because laststart was nonzero.  And we've already
257989857Sobrien                   incremented `p', by the way, to be the character after
258089857Sobrien                   the `*'.  Do we have to do something analogous here
258189857Sobrien                   for null bytes, because of RE_DOT_NOT_NULL?  */
258289857Sobrien                if (TRANSLATE (*(p - 2)) == TRANSLATE ('.')
258389857Sobrien		    && zero_times_ok
258489857Sobrien                    && p < pend && TRANSLATE (*p) == TRANSLATE ('\n')
258589857Sobrien                    && !(syntax & RE_DOT_NEWLINE))
258689857Sobrien                  { /* We have .*\n.  */
258789857Sobrien                    STORE_JUMP (jump, b, laststart);
258889857Sobrien                    keep_string_p = true;
258989857Sobrien                  }
259089857Sobrien                else
259189857Sobrien                  /* Anything else.  */
259289857Sobrien                  STORE_JUMP (maybe_pop_jump, b, laststart -
259389857Sobrien			      (1 + OFFSET_ADDRESS_SIZE));
259489857Sobrien
259589857Sobrien                /* We've added more stuff to the buffer.  */
259689857Sobrien                b += 1 + OFFSET_ADDRESS_SIZE;
259789857Sobrien              }
259889857Sobrien
259989857Sobrien            /* On failure, jump from laststart to b + 3, which will be the
260089857Sobrien               end of the buffer after this jump is inserted.  */
260189857Sobrien	    /* ifdef WCHAR, 'b + 1 + OFFSET_ADDRESS_SIZE' instead of
260289857Sobrien	       'b + 3'.  */
260389857Sobrien            GET_BUFFER_SPACE (1 + OFFSET_ADDRESS_SIZE);
260489857Sobrien            INSERT_JUMP (keep_string_p ? on_failure_keep_string_jump
260589857Sobrien                                       : on_failure_jump,
260689857Sobrien                         laststart, b + 1 + OFFSET_ADDRESS_SIZE);
260789857Sobrien            pending_exact = 0;
260889857Sobrien            b += 1 + OFFSET_ADDRESS_SIZE;
260989857Sobrien
261089857Sobrien            if (!zero_times_ok)
261189857Sobrien              {
261289857Sobrien                /* At least one repetition is required, so insert a
261389857Sobrien                   `dummy_failure_jump' before the initial
261489857Sobrien                   `on_failure_jump' instruction of the loop. This
261589857Sobrien                   effects a skip over that instruction the first time
261689857Sobrien                   we hit that loop.  */
261789857Sobrien                GET_BUFFER_SPACE (1 + OFFSET_ADDRESS_SIZE);
261889857Sobrien                INSERT_JUMP (dummy_failure_jump, laststart, laststart +
261989857Sobrien			     2 + 2 * OFFSET_ADDRESS_SIZE);
262089857Sobrien                b += 1 + OFFSET_ADDRESS_SIZE;
262189857Sobrien              }
262289857Sobrien            }
262389857Sobrien	  break;
262489857Sobrien
262589857Sobrien
262689857Sobrien	case '.':
262789857Sobrien          laststart = b;
262889857Sobrien          BUF_PUSH (anychar);
262989857Sobrien          break;
263089857Sobrien
263189857Sobrien
263289857Sobrien        case '[':
263389857Sobrien          {
263489857Sobrien            boolean had_char_class = false;
263589857Sobrien#ifdef WCHAR
263689857Sobrien	    CHAR_T range_start = 0xffffffff;
263789857Sobrien#else
263889857Sobrien	    unsigned int range_start = 0xffffffff;
263989857Sobrien#endif
264089857Sobrien            if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
264189857Sobrien
264289857Sobrien#ifdef WCHAR
264389857Sobrien	    /* We assume a charset(_not) structure as a wchar_t array.
264489857Sobrien	       charset[0] = (re_opcode_t) charset(_not)
264589857Sobrien               charset[1] = l (= length of char_classes)
264689857Sobrien               charset[2] = m (= length of collating_symbols)
264789857Sobrien               charset[3] = n (= length of equivalence_classes)
264889857Sobrien	       charset[4] = o (= length of char_ranges)
264989857Sobrien	       charset[5] = p (= length of chars)
265089857Sobrien
265189857Sobrien               charset[6] = char_class (wctype_t)
265289857Sobrien               charset[6+CHAR_CLASS_SIZE] = char_class (wctype_t)
265389857Sobrien                         ...
265489857Sobrien               charset[l+5]  = char_class (wctype_t)
265589857Sobrien
265689857Sobrien               charset[l+6]  = collating_symbol (wchar_t)
265789857Sobrien                            ...
265889857Sobrien               charset[l+m+5]  = collating_symbol (wchar_t)
265989857Sobrien					ifdef _LIBC we use the index if
266089857Sobrien					_NL_COLLATE_SYMB_EXTRAMB instead of
266189857Sobrien					wchar_t string.
266289857Sobrien
266389857Sobrien               charset[l+m+6]  = equivalence_classes (wchar_t)
266489857Sobrien                              ...
266589857Sobrien               charset[l+m+n+5]  = equivalence_classes (wchar_t)
266689857Sobrien					ifdef _LIBC we use the index in
266789857Sobrien					_NL_COLLATE_WEIGHT instead of
266889857Sobrien					wchar_t string.
266989857Sobrien
267089857Sobrien	       charset[l+m+n+6] = range_start
267189857Sobrien	       charset[l+m+n+7] = range_end
267289857Sobrien	                       ...
267389857Sobrien	       charset[l+m+n+2o+4] = range_start
267489857Sobrien	       charset[l+m+n+2o+5] = range_end
267589857Sobrien					ifdef _LIBC we use the value looked up
267689857Sobrien					in _NL_COLLATE_COLLSEQ instead of
267789857Sobrien					wchar_t character.
267889857Sobrien
267989857Sobrien	       charset[l+m+n+2o+6] = char
268089857Sobrien	                          ...
268189857Sobrien	       charset[l+m+n+2o+p+5] = char
268289857Sobrien
268389857Sobrien	     */
268489857Sobrien
268589857Sobrien	    /* We need at least 6 spaces: the opcode, the length of
268689857Sobrien               char_classes, the length of collating_symbols, the length of
268789857Sobrien               equivalence_classes, the length of char_ranges, the length of
268889857Sobrien               chars.  */
268989857Sobrien	    GET_BUFFER_SPACE (6);
269089857Sobrien
269189857Sobrien	    /* Save b as laststart. And We use laststart as the pointer
269289857Sobrien	       to the first element of the charset here.
269389857Sobrien	       In other words, laststart[i] indicates charset[i].  */
269489857Sobrien            laststart = b;
269589857Sobrien
269689857Sobrien            /* We test `*p == '^' twice, instead of using an if
269789857Sobrien               statement, so we only need one BUF_PUSH.  */
269889857Sobrien            BUF_PUSH (*p == '^' ? charset_not : charset);
269989857Sobrien            if (*p == '^')
270089857Sobrien              p++;
270189857Sobrien
270289857Sobrien            /* Push the length of char_classes, the length of
270389857Sobrien               collating_symbols, the length of equivalence_classes, the
270489857Sobrien               length of char_ranges and the length of chars.  */
270589857Sobrien            BUF_PUSH_3 (0, 0, 0);
270689857Sobrien            BUF_PUSH_2 (0, 0);
270789857Sobrien
270889857Sobrien            /* Remember the first position in the bracket expression.  */
270989857Sobrien            p1 = p;
271089857Sobrien
271189857Sobrien            /* charset_not matches newline according to a syntax bit.  */
271289857Sobrien            if ((re_opcode_t) b[-6] == charset_not
271389857Sobrien                && (syntax & RE_HAT_LISTS_NOT_NEWLINE))
271489857Sobrien	      {
271589857Sobrien		BUF_PUSH('\n');
271689857Sobrien		laststart[5]++; /* Update the length of characters  */
271789857Sobrien	      }
271889857Sobrien
271989857Sobrien            /* Read in characters and ranges, setting map bits.  */
272089857Sobrien            for (;;)
272189857Sobrien              {
272289857Sobrien                if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
272389857Sobrien
272489857Sobrien                PATFETCH (c);
272589857Sobrien
272689857Sobrien                /* \ might escape characters inside [...] and [^...].  */
272789857Sobrien                if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\')
272889857Sobrien                  {
272989857Sobrien                    if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
273089857Sobrien
273189857Sobrien                    PATFETCH (c1);
273289857Sobrien		    BUF_PUSH(c1);
273389857Sobrien		    laststart[5]++; /* Update the length of chars  */
273489857Sobrien		    range_start = c1;
273589857Sobrien                    continue;
273689857Sobrien                  }
273789857Sobrien
273889857Sobrien                /* Could be the end of the bracket expression.  If it's
273989857Sobrien                   not (i.e., when the bracket expression is `[]' so
274089857Sobrien                   far), the ']' character bit gets set way below.  */
274189857Sobrien                if (c == ']' && p != p1 + 1)
274289857Sobrien                  break;
274389857Sobrien
274489857Sobrien                /* Look ahead to see if it's a range when the last thing
274589857Sobrien                   was a character class.  */
274689857Sobrien                if (had_char_class && c == '-' && *p != ']')
274789857Sobrien                  FREE_STACK_RETURN (REG_ERANGE);
274889857Sobrien
274989857Sobrien                /* Look ahead to see if it's a range when the last thing
275089857Sobrien                   was a character: if this is a hyphen not at the
275189857Sobrien                   beginning or the end of a list, then it's the range
275289857Sobrien                   operator.  */
275389857Sobrien                if (c == '-'
275489857Sobrien                    && !(p - 2 >= pattern && p[-2] == '[')
275589857Sobrien                    && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^')
275689857Sobrien                    && *p != ']')
275789857Sobrien                  {
275889857Sobrien                    reg_errcode_t ret;
275989857Sobrien		    /* Allocate the space for range_start and range_end.  */
276089857Sobrien		    GET_BUFFER_SPACE (2);
276189857Sobrien		    /* Update the pointer to indicate end of buffer.  */
276289857Sobrien                    b += 2;
276389857Sobrien                    ret = wcs_compile_range (range_start, &p, pend, translate,
276489857Sobrien                                         syntax, b, laststart);
276589857Sobrien                    if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
276689857Sobrien                    range_start = 0xffffffff;
276789857Sobrien                  }
276889857Sobrien                else if (p[0] == '-' && p[1] != ']')
276989857Sobrien                  { /* This handles ranges made up of characters only.  */
277089857Sobrien                    reg_errcode_t ret;
277189857Sobrien
277289857Sobrien		    /* Move past the `-'.  */
277389857Sobrien                    PATFETCH (c1);
277489857Sobrien		    /* Allocate the space for range_start and range_end.  */
277589857Sobrien		    GET_BUFFER_SPACE (2);
277689857Sobrien		    /* Update the pointer to indicate end of buffer.  */
277789857Sobrien                    b += 2;
277889857Sobrien                    ret = wcs_compile_range (c, &p, pend, translate, syntax, b,
277989857Sobrien                                         laststart);
278089857Sobrien                    if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
278189857Sobrien		    range_start = 0xffffffff;
278289857Sobrien                  }
278389857Sobrien
278489857Sobrien                /* See if we're at the beginning of a possible character
278589857Sobrien                   class.  */
278689857Sobrien                else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':')
278789857Sobrien                  { /* Leave room for the null.  */
278889857Sobrien                    char str[CHAR_CLASS_MAX_LENGTH + 1];
278989857Sobrien
279089857Sobrien                    PATFETCH (c);
279189857Sobrien                    c1 = 0;
279289857Sobrien
279389857Sobrien                    /* If pattern is `[[:'.  */
279489857Sobrien                    if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
279589857Sobrien
279689857Sobrien                    for (;;)
279789857Sobrien                      {
279889857Sobrien                        PATFETCH (c);
279989857Sobrien                        if ((c == ':' && *p == ']') || p == pend)
280089857Sobrien                          break;
280189857Sobrien			if (c1 < CHAR_CLASS_MAX_LENGTH)
280289857Sobrien			  str[c1++] = c;
280389857Sobrien			else
280489857Sobrien			  /* This is in any case an invalid class name.  */
280589857Sobrien			  str[0] = '\0';
280689857Sobrien                      }
280789857Sobrien                    str[c1] = '\0';
280889857Sobrien
280989857Sobrien                    /* If isn't a word bracketed by `[:' and `:]':
281089857Sobrien                       undo the ending character, the letters, and leave
281189857Sobrien                       the leading `:' and `[' (but store them as character).  */
281289857Sobrien                    if (c == ':' && *p == ']')
281389857Sobrien                      {
281489857Sobrien			wctype_t wt;
281589857Sobrien			uintptr_t alignedp;
281689857Sobrien
281789857Sobrien			/* Query the character class as wctype_t.  */
281889857Sobrien			wt = IS_CHAR_CLASS (str);
281989857Sobrien			if (wt == 0)
282089857Sobrien			  FREE_STACK_RETURN (REG_ECTYPE);
282189857Sobrien
282289857Sobrien                        /* Throw away the ] at the end of the character
282389857Sobrien                           class.  */
282489857Sobrien                        PATFETCH (c);
282589857Sobrien
282689857Sobrien                        if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
282789857Sobrien
282889857Sobrien			/* Allocate the space for character class.  */
282989857Sobrien                        GET_BUFFER_SPACE(CHAR_CLASS_SIZE);
283089857Sobrien			/* Update the pointer to indicate end of buffer.  */
283189857Sobrien                        b += CHAR_CLASS_SIZE;
283289857Sobrien			/* Move data which follow character classes
283389857Sobrien			    not to violate the data.  */
283489857Sobrien                        insert_space(CHAR_CLASS_SIZE,
283589857Sobrien				     laststart + 6 + laststart[1],
283689857Sobrien				     b - 1);
283789857Sobrien			alignedp = ((uintptr_t)(laststart + 6 + laststart[1])
283889857Sobrien				    + __alignof__(wctype_t) - 1)
283989857Sobrien			  	    & ~(uintptr_t)(__alignof__(wctype_t) - 1);
284089857Sobrien			/* Store the character class.  */
284189857Sobrien                        *((wctype_t*)alignedp) = wt;
284289857Sobrien                        /* Update length of char_classes */
284389857Sobrien                        laststart[1] += CHAR_CLASS_SIZE;
284489857Sobrien
284589857Sobrien                        had_char_class = true;
284689857Sobrien                      }
284789857Sobrien                    else
284889857Sobrien                      {
284989857Sobrien                        c1++;
285089857Sobrien                        while (c1--)
285189857Sobrien                          PATUNFETCH;
285289857Sobrien                        BUF_PUSH ('[');
285389857Sobrien                        BUF_PUSH (':');
285489857Sobrien                        laststart[5] += 2; /* Update the length of characters  */
285589857Sobrien			range_start = ':';
285689857Sobrien                        had_char_class = false;
285789857Sobrien                      }
285889857Sobrien                  }
285989857Sobrien                else if (syntax & RE_CHAR_CLASSES && c == '[' && (*p == '='
286089857Sobrien							  || *p == '.'))
286189857Sobrien		  {
286289857Sobrien		    CHAR_T str[128];	/* Should be large enough.  */
286389857Sobrien		    CHAR_T delim = *p; /* '=' or '.'  */
286489857Sobrien# ifdef _LIBC
286589857Sobrien		    uint32_t nrules =
286689857Sobrien		      _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
286789857Sobrien# endif
286889857Sobrien		    PATFETCH (c);
286989857Sobrien		    c1 = 0;
287089857Sobrien
287189857Sobrien		    /* If pattern is `[[=' or '[[.'.  */
287289857Sobrien		    if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
287389857Sobrien
287489857Sobrien		    for (;;)
287589857Sobrien		      {
287689857Sobrien			PATFETCH (c);
287789857Sobrien			if ((c == delim && *p == ']') || p == pend)
287889857Sobrien			  break;
287989857Sobrien			if (c1 < sizeof (str) - 1)
288089857Sobrien			  str[c1++] = c;
288189857Sobrien			else
288289857Sobrien			  /* This is in any case an invalid class name.  */
288389857Sobrien			  str[0] = '\0';
288489857Sobrien                      }
288589857Sobrien		    str[c1] = '\0';
288689857Sobrien
288789857Sobrien		    if (c == delim && *p == ']' && str[0] != '\0')
288889857Sobrien		      {
288989857Sobrien                        unsigned int i, offset;
289089857Sobrien			/* If we have no collation data we use the default
289189857Sobrien			   collation in which each character is in a class
289289857Sobrien			   by itself.  It also means that ASCII is the
289389857Sobrien			   character set and therefore we cannot have character
289489857Sobrien			   with more than one byte in the multibyte
289589857Sobrien			   representation.  */
289689857Sobrien
289789857Sobrien                        /* If not defined _LIBC, we push the name and
289889857Sobrien			   `\0' for the sake of matching performance.  */
289989857Sobrien			int datasize = c1 + 1;
290089857Sobrien
290189857Sobrien# ifdef _LIBC
290289857Sobrien			int32_t idx = 0;
290389857Sobrien			if (nrules == 0)
290489857Sobrien# endif
290589857Sobrien			  {
290689857Sobrien			    if (c1 != 1)
290789857Sobrien			      FREE_STACK_RETURN (REG_ECOLLATE);
290889857Sobrien			  }
290989857Sobrien# ifdef _LIBC
291089857Sobrien			else
291189857Sobrien			  {
291289857Sobrien			    const int32_t *table;
291389857Sobrien			    const int32_t *weights;
291489857Sobrien			    const int32_t *extra;
291589857Sobrien			    const int32_t *indirect;
291689857Sobrien			    wint_t *cp;
291789857Sobrien
291889857Sobrien			    /* This #include defines a local function!  */
291989857Sobrien#  include <locale/weightwc.h>
292089857Sobrien
292189857Sobrien			    if(delim == '=')
292289857Sobrien			      {
292389857Sobrien				/* We push the index for equivalence class.  */
292489857Sobrien				cp = (wint_t*)str;
292589857Sobrien
292689857Sobrien				table = (const int32_t *)
292789857Sobrien				  _NL_CURRENT (LC_COLLATE,
292889857Sobrien					       _NL_COLLATE_TABLEWC);
292989857Sobrien				weights = (const int32_t *)
293089857Sobrien				  _NL_CURRENT (LC_COLLATE,
293189857Sobrien					       _NL_COLLATE_WEIGHTWC);
293289857Sobrien				extra = (const int32_t *)
293389857Sobrien				  _NL_CURRENT (LC_COLLATE,
293489857Sobrien					       _NL_COLLATE_EXTRAWC);
293589857Sobrien				indirect = (const int32_t *)
293689857Sobrien				  _NL_CURRENT (LC_COLLATE,
293789857Sobrien					       _NL_COLLATE_INDIRECTWC);
293889857Sobrien
293989857Sobrien				idx = findidx ((const wint_t**)&cp);
294089857Sobrien				if (idx == 0 || cp < (wint_t*) str + c1)
294189857Sobrien				  /* This is no valid character.  */
294289857Sobrien				  FREE_STACK_RETURN (REG_ECOLLATE);
294389857Sobrien
294489857Sobrien				str[0] = (wchar_t)idx;
294589857Sobrien			      }
294689857Sobrien			    else /* delim == '.' */
294789857Sobrien			      {
294889857Sobrien				/* We push collation sequence value
294989857Sobrien				   for collating symbol.  */
295089857Sobrien				int32_t table_size;
295189857Sobrien				const int32_t *symb_table;
295289857Sobrien				const unsigned char *extra;
295389857Sobrien				int32_t idx;
295489857Sobrien				int32_t elem;
295589857Sobrien				int32_t second;
295689857Sobrien				int32_t hash;
295789857Sobrien				char char_str[c1];
295889857Sobrien
295989857Sobrien				/* We have to convert the name to a single-byte
296089857Sobrien				   string.  This is possible since the names
296189857Sobrien				   consist of ASCII characters and the internal
296289857Sobrien				   representation is UCS4.  */
296389857Sobrien				for (i = 0; i < c1; ++i)
296489857Sobrien				  char_str[i] = str[i];
296589857Sobrien
296689857Sobrien				table_size =
296789857Sobrien				  _NL_CURRENT_WORD (LC_COLLATE,
296889857Sobrien						    _NL_COLLATE_SYMB_HASH_SIZEMB);
296989857Sobrien				symb_table = (const int32_t *)
297089857Sobrien				  _NL_CURRENT (LC_COLLATE,
297189857Sobrien					       _NL_COLLATE_SYMB_TABLEMB);
297289857Sobrien				extra = (const unsigned char *)
297389857Sobrien				  _NL_CURRENT (LC_COLLATE,
297489857Sobrien					       _NL_COLLATE_SYMB_EXTRAMB);
297589857Sobrien
297689857Sobrien				/* Locate the character in the hashing table.  */
297789857Sobrien				hash = elem_hash (char_str, c1);
297889857Sobrien
297989857Sobrien				idx = 0;
298089857Sobrien				elem = hash % table_size;
298189857Sobrien				second = hash % (table_size - 2);
298289857Sobrien				while (symb_table[2 * elem] != 0)
298389857Sobrien				  {
298489857Sobrien				    /* First compare the hashing value.  */
298589857Sobrien				    if (symb_table[2 * elem] == hash
298689857Sobrien					&& c1 == extra[symb_table[2 * elem + 1]]
298789857Sobrien					&& memcmp (char_str,
298889857Sobrien						   &extra[symb_table[2 * elem + 1]
298989857Sobrien							 + 1], c1) == 0)
299089857Sobrien				      {
299189857Sobrien					/* Yep, this is the entry.  */
299289857Sobrien					idx = symb_table[2 * elem + 1];
299389857Sobrien					idx += 1 + extra[idx];
299489857Sobrien					break;
299589857Sobrien				      }
299689857Sobrien
299789857Sobrien				    /* Next entry.  */
299889857Sobrien				    elem += second;
299989857Sobrien				  }
300089857Sobrien
300189857Sobrien				if (symb_table[2 * elem] != 0)
300289857Sobrien				  {
300389857Sobrien				    /* Compute the index of the byte sequence
300489857Sobrien				       in the table.  */
300589857Sobrien				    idx += 1 + extra[idx];
300689857Sobrien				    /* Adjust for the alignment.  */
300789857Sobrien				    idx = (idx + 3) & ~3;
300889857Sobrien
300989857Sobrien				    str[0] = (wchar_t) idx + 4;
301089857Sobrien				  }
301189857Sobrien				else if (symb_table[2 * elem] == 0 && c1 == 1)
301289857Sobrien				  {
301389857Sobrien				    /* No valid character.  Match it as a
301489857Sobrien				       single byte character.  */
301589857Sobrien				    had_char_class = false;
301689857Sobrien				    BUF_PUSH(str[0]);
301789857Sobrien				    /* Update the length of characters  */
301889857Sobrien				    laststart[5]++;
301989857Sobrien				    range_start = str[0];
302089857Sobrien
302189857Sobrien				    /* Throw away the ] at the end of the
302289857Sobrien				       collating symbol.  */
302389857Sobrien				    PATFETCH (c);
302489857Sobrien				    /* exit from the switch block.  */
302589857Sobrien				    continue;
302689857Sobrien				  }
302789857Sobrien				else
302889857Sobrien				  FREE_STACK_RETURN (REG_ECOLLATE);
302989857Sobrien			      }
303089857Sobrien			    datasize = 1;
303189857Sobrien			  }
303289857Sobrien# endif
303389857Sobrien                        /* Throw away the ] at the end of the equivalence
303489857Sobrien                           class (or collating symbol).  */
303589857Sobrien                        PATFETCH (c);
303689857Sobrien
303789857Sobrien			/* Allocate the space for the equivalence class
303889857Sobrien			   (or collating symbol) (and '\0' if needed).  */
303989857Sobrien                        GET_BUFFER_SPACE(datasize);
304089857Sobrien			/* Update the pointer to indicate end of buffer.  */
304189857Sobrien                        b += datasize;
304289857Sobrien
304389857Sobrien			if (delim == '=')
304489857Sobrien			  { /* equivalence class  */
304589857Sobrien			    /* Calculate the offset of char_ranges,
304689857Sobrien			       which is next to equivalence_classes.  */
304789857Sobrien			    offset = laststart[1] + laststart[2]
304889857Sobrien			      + laststart[3] +6;
304989857Sobrien			    /* Insert space.  */
305089857Sobrien			    insert_space(datasize, laststart + offset, b - 1);
305189857Sobrien
305289857Sobrien			    /* Write the equivalence_class and \0.  */
305389857Sobrien			    for (i = 0 ; i < datasize ; i++)
305489857Sobrien			      laststart[offset + i] = str[i];
305589857Sobrien
305689857Sobrien			    /* Update the length of equivalence_classes.  */
305789857Sobrien			    laststart[3] += datasize;
305889857Sobrien			    had_char_class = true;
305989857Sobrien			  }
306089857Sobrien			else /* delim == '.' */
306189857Sobrien			  { /* collating symbol  */
306289857Sobrien			    /* Calculate the offset of the equivalence_classes,
306389857Sobrien			       which is next to collating_symbols.  */
306489857Sobrien			    offset = laststart[1] + laststart[2] + 6;
306589857Sobrien			    /* Insert space and write the collationg_symbol
306689857Sobrien			       and \0.  */
306789857Sobrien			    insert_space(datasize, laststart + offset, b-1);
306889857Sobrien			    for (i = 0 ; i < datasize ; i++)
306989857Sobrien			      laststart[offset + i] = str[i];
307089857Sobrien
307189857Sobrien			    /* In re_match_2_internal if range_start < -1, we
307289857Sobrien			       assume -range_start is the offset of the
307389857Sobrien			       collating symbol which is specified as
307489857Sobrien			       the character of the range start.  So we assign
307589857Sobrien			       -(laststart[1] + laststart[2] + 6) to
307689857Sobrien			       range_start.  */
307789857Sobrien			    range_start = -(laststart[1] + laststart[2] + 6);
307889857Sobrien			    /* Update the length of collating_symbol.  */
307989857Sobrien			    laststart[2] += datasize;
308089857Sobrien			    had_char_class = false;
308189857Sobrien			  }
308289857Sobrien		      }
308389857Sobrien                    else
308489857Sobrien                      {
308589857Sobrien                        c1++;
308689857Sobrien                        while (c1--)
308789857Sobrien                          PATUNFETCH;
308889857Sobrien                        BUF_PUSH ('[');
308989857Sobrien                        BUF_PUSH (delim);
309089857Sobrien                        laststart[5] += 2; /* Update the length of characters  */
309189857Sobrien			range_start = delim;
309289857Sobrien                        had_char_class = false;
309389857Sobrien                      }
309489857Sobrien		  }
309589857Sobrien                else
309689857Sobrien                  {
309789857Sobrien                    had_char_class = false;
309889857Sobrien		    BUF_PUSH(c);
309989857Sobrien		    laststart[5]++;  /* Update the length of characters  */
310089857Sobrien		    range_start = c;
310189857Sobrien                  }
310289857Sobrien	      }
310389857Sobrien
310489857Sobrien#else /* BYTE */
310589857Sobrien            /* Ensure that we have enough space to push a charset: the
310689857Sobrien               opcode, the length count, and the bitset; 34 bytes in all.  */
310789857Sobrien	    GET_BUFFER_SPACE (34);
310889857Sobrien
310989857Sobrien            laststart = b;
311089857Sobrien
311189857Sobrien            /* We test `*p == '^' twice, instead of using an if
311289857Sobrien               statement, so we only need one BUF_PUSH.  */
311389857Sobrien            BUF_PUSH (*p == '^' ? charset_not : charset);
311489857Sobrien            if (*p == '^')
311589857Sobrien              p++;
311689857Sobrien
311789857Sobrien            /* Remember the first position in the bracket expression.  */
311889857Sobrien            p1 = p;
311989857Sobrien
312089857Sobrien            /* Push the number of bytes in the bitmap.  */
312189857Sobrien            BUF_PUSH ((1 << BYTEWIDTH) / BYTEWIDTH);
312289857Sobrien
312389857Sobrien            /* Clear the whole map.  */
312489857Sobrien            bzero (b, (1 << BYTEWIDTH) / BYTEWIDTH);
312589857Sobrien
312689857Sobrien            /* charset_not matches newline according to a syntax bit.  */
312789857Sobrien            if ((re_opcode_t) b[-2] == charset_not
312889857Sobrien                && (syntax & RE_HAT_LISTS_NOT_NEWLINE))
312989857Sobrien              SET_LIST_BIT ('\n');
313089857Sobrien
313189857Sobrien            /* Read in characters and ranges, setting map bits.  */
313289857Sobrien            for (;;)
313389857Sobrien              {
313489857Sobrien                if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
313589857Sobrien
313689857Sobrien                PATFETCH (c);
313789857Sobrien
313889857Sobrien                /* \ might escape characters inside [...] and [^...].  */
313989857Sobrien                if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\')
314089857Sobrien                  {
314189857Sobrien                    if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
314289857Sobrien
314389857Sobrien                    PATFETCH (c1);
314489857Sobrien                    SET_LIST_BIT (c1);
314589857Sobrien		    range_start = c1;
314689857Sobrien                    continue;
314789857Sobrien                  }
314889857Sobrien
314989857Sobrien                /* Could be the end of the bracket expression.  If it's
315089857Sobrien                   not (i.e., when the bracket expression is `[]' so
315189857Sobrien                   far), the ']' character bit gets set way below.  */
315289857Sobrien                if (c == ']' && p != p1 + 1)
315389857Sobrien                  break;
315489857Sobrien
315589857Sobrien                /* Look ahead to see if it's a range when the last thing
315689857Sobrien                   was a character class.  */
315789857Sobrien                if (had_char_class && c == '-' && *p != ']')
315889857Sobrien                  FREE_STACK_RETURN (REG_ERANGE);
315989857Sobrien
316089857Sobrien                /* Look ahead to see if it's a range when the last thing
316189857Sobrien                   was a character: if this is a hyphen not at the
316289857Sobrien                   beginning or the end of a list, then it's the range
316389857Sobrien                   operator.  */
316489857Sobrien                if (c == '-'
316589857Sobrien                    && !(p - 2 >= pattern && p[-2] == '[')
316689857Sobrien                    && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^')
316789857Sobrien                    && *p != ']')
316889857Sobrien                  {
316989857Sobrien                    reg_errcode_t ret
317089857Sobrien                      = byte_compile_range (range_start, &p, pend, translate,
317189857Sobrien					    syntax, b);
317289857Sobrien                    if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
317389857Sobrien		    range_start = 0xffffffff;
317489857Sobrien                  }
317589857Sobrien
317689857Sobrien                else if (p[0] == '-' && p[1] != ']')
317789857Sobrien                  { /* This handles ranges made up of characters only.  */
317889857Sobrien                    reg_errcode_t ret;
317989857Sobrien
318089857Sobrien		    /* Move past the `-'.  */
318189857Sobrien                    PATFETCH (c1);
318289857Sobrien
318389857Sobrien                    ret = byte_compile_range (c, &p, pend, translate, syntax, b);
318489857Sobrien                    if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
318589857Sobrien		    range_start = 0xffffffff;
318689857Sobrien                  }
318789857Sobrien
318889857Sobrien                /* See if we're at the beginning of a possible character
318989857Sobrien                   class.  */
319089857Sobrien
319189857Sobrien                else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':')
319289857Sobrien                  { /* Leave room for the null.  */
319389857Sobrien                    char str[CHAR_CLASS_MAX_LENGTH + 1];
319489857Sobrien
319589857Sobrien                    PATFETCH (c);
319689857Sobrien                    c1 = 0;
319789857Sobrien
319889857Sobrien                    /* If pattern is `[[:'.  */
319989857Sobrien                    if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
320089857Sobrien
320189857Sobrien                    for (;;)
320289857Sobrien                      {
320389857Sobrien                        PATFETCH (c);
320489857Sobrien                        if ((c == ':' && *p == ']') || p == pend)
320589857Sobrien                          break;
320689857Sobrien			if (c1 < CHAR_CLASS_MAX_LENGTH)
320789857Sobrien			  str[c1++] = c;
320889857Sobrien			else
320989857Sobrien			  /* This is in any case an invalid class name.  */
321089857Sobrien			  str[0] = '\0';
321189857Sobrien                      }
321289857Sobrien                    str[c1] = '\0';
321389857Sobrien
321489857Sobrien                    /* If isn't a word bracketed by `[:' and `:]':
321589857Sobrien                       undo the ending character, the letters, and leave
321689857Sobrien                       the leading `:' and `[' (but set bits for them).  */
321789857Sobrien                    if (c == ':' && *p == ']')
321889857Sobrien                      {
321989857Sobrien# if defined _LIBC || WIDE_CHAR_SUPPORT
322089857Sobrien                        boolean is_lower = STREQ (str, "lower");
322189857Sobrien                        boolean is_upper = STREQ (str, "upper");
322289857Sobrien			wctype_t wt;
322389857Sobrien                        int ch;
322489857Sobrien
322589857Sobrien			wt = IS_CHAR_CLASS (str);
322689857Sobrien			if (wt == 0)
322789857Sobrien			  FREE_STACK_RETURN (REG_ECTYPE);
322889857Sobrien
322989857Sobrien                        /* Throw away the ] at the end of the character
323089857Sobrien                           class.  */
323189857Sobrien                        PATFETCH (c);
323289857Sobrien
323389857Sobrien                        if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
323489857Sobrien
323589857Sobrien                        for (ch = 0; ch < 1 << BYTEWIDTH; ++ch)
323689857Sobrien			  {
323789857Sobrien#  ifdef _LIBC
323889857Sobrien			    if (__iswctype (__btowc (ch), wt))
323989857Sobrien			      SET_LIST_BIT (ch);
324089857Sobrien#  else
324189857Sobrien			    if (iswctype (btowc (ch), wt))
324289857Sobrien			      SET_LIST_BIT (ch);
324389857Sobrien#  endif
324489857Sobrien
324589857Sobrien			    if (translate && (is_upper || is_lower)
324689857Sobrien				&& (ISUPPER (ch) || ISLOWER (ch)))
324789857Sobrien			      SET_LIST_BIT (ch);
324889857Sobrien			  }
324989857Sobrien
325089857Sobrien                        had_char_class = true;
325189857Sobrien# else
325289857Sobrien                        int ch;
325389857Sobrien                        boolean is_alnum = STREQ (str, "alnum");
325489857Sobrien                        boolean is_alpha = STREQ (str, "alpha");
325589857Sobrien                        boolean is_blank = STREQ (str, "blank");
325689857Sobrien                        boolean is_cntrl = STREQ (str, "cntrl");
325789857Sobrien                        boolean is_digit = STREQ (str, "digit");
325889857Sobrien                        boolean is_graph = STREQ (str, "graph");
325989857Sobrien                        boolean is_lower = STREQ (str, "lower");
326089857Sobrien                        boolean is_print = STREQ (str, "print");
326189857Sobrien                        boolean is_punct = STREQ (str, "punct");
326289857Sobrien                        boolean is_space = STREQ (str, "space");
326389857Sobrien                        boolean is_upper = STREQ (str, "upper");
326489857Sobrien                        boolean is_xdigit = STREQ (str, "xdigit");
326589857Sobrien
326689857Sobrien                        if (!IS_CHAR_CLASS (str))
326789857Sobrien			  FREE_STACK_RETURN (REG_ECTYPE);
326889857Sobrien
326989857Sobrien                        /* Throw away the ] at the end of the character
327089857Sobrien                           class.  */
327189857Sobrien                        PATFETCH (c);
327289857Sobrien
327389857Sobrien                        if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
327489857Sobrien
327589857Sobrien                        for (ch = 0; ch < 1 << BYTEWIDTH; ch++)
327689857Sobrien                          {
327789857Sobrien			    /* This was split into 3 if's to
327889857Sobrien			       avoid an arbitrary limit in some compiler.  */
327989857Sobrien                            if (   (is_alnum  && ISALNUM (ch))
328089857Sobrien                                || (is_alpha  && ISALPHA (ch))
328189857Sobrien                                || (is_blank  && ISBLANK (ch))
328289857Sobrien                                || (is_cntrl  && ISCNTRL (ch)))
328389857Sobrien			      SET_LIST_BIT (ch);
328489857Sobrien			    if (   (is_digit  && ISDIGIT (ch))
328589857Sobrien                                || (is_graph  && ISGRAPH (ch))
328689857Sobrien                                || (is_lower  && ISLOWER (ch))
328789857Sobrien                                || (is_print  && ISPRINT (ch)))
328889857Sobrien			      SET_LIST_BIT (ch);
328989857Sobrien			    if (   (is_punct  && ISPUNCT (ch))
329089857Sobrien                                || (is_space  && ISSPACE (ch))
329189857Sobrien                                || (is_upper  && ISUPPER (ch))
329289857Sobrien                                || (is_xdigit && ISXDIGIT (ch)))
329389857Sobrien			      SET_LIST_BIT (ch);
329489857Sobrien			    if (   translate && (is_upper || is_lower)
329589857Sobrien				&& (ISUPPER (ch) || ISLOWER (ch)))
329689857Sobrien			      SET_LIST_BIT (ch);
329789857Sobrien                          }
329889857Sobrien                        had_char_class = true;
329989857Sobrien# endif	/* libc || wctype.h */
330089857Sobrien                      }
330189857Sobrien                    else
330289857Sobrien                      {
330389857Sobrien                        c1++;
330489857Sobrien                        while (c1--)
330589857Sobrien                          PATUNFETCH;
330689857Sobrien                        SET_LIST_BIT ('[');
330789857Sobrien                        SET_LIST_BIT (':');
330889857Sobrien			range_start = ':';
330989857Sobrien                        had_char_class = false;
331089857Sobrien                      }
331189857Sobrien                  }
331289857Sobrien                else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == '=')
331389857Sobrien		  {
331489857Sobrien		    unsigned char str[MB_LEN_MAX + 1];
331589857Sobrien# ifdef _LIBC
331689857Sobrien		    uint32_t nrules =
331789857Sobrien		      _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
331889857Sobrien# endif
331989857Sobrien
332089857Sobrien		    PATFETCH (c);
332189857Sobrien		    c1 = 0;
332289857Sobrien
332389857Sobrien		    /* If pattern is `[[='.  */
332489857Sobrien		    if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
332589857Sobrien
332689857Sobrien		    for (;;)
332789857Sobrien		      {
332889857Sobrien			PATFETCH (c);
332989857Sobrien			if ((c == '=' && *p == ']') || p == pend)
333089857Sobrien			  break;
333189857Sobrien			if (c1 < MB_LEN_MAX)
333289857Sobrien			  str[c1++] = c;
333389857Sobrien			else
333489857Sobrien			  /* This is in any case an invalid class name.  */
333589857Sobrien			  str[0] = '\0';
333689857Sobrien                      }
333789857Sobrien		    str[c1] = '\0';
333889857Sobrien
333989857Sobrien		    if (c == '=' && *p == ']' && str[0] != '\0')
334089857Sobrien		      {
334189857Sobrien			/* If we have no collation data we use the default
334289857Sobrien			   collation in which each character is in a class
334389857Sobrien			   by itself.  It also means that ASCII is the
334489857Sobrien			   character set and therefore we cannot have character
334589857Sobrien			   with more than one byte in the multibyte
334689857Sobrien			   representation.  */
334789857Sobrien# ifdef _LIBC
334889857Sobrien			if (nrules == 0)
334989857Sobrien# endif
335089857Sobrien			  {
335189857Sobrien			    if (c1 != 1)
335289857Sobrien			      FREE_STACK_RETURN (REG_ECOLLATE);
335389857Sobrien
335489857Sobrien			    /* Throw away the ] at the end of the equivalence
335589857Sobrien			       class.  */
335689857Sobrien			    PATFETCH (c);
335789857Sobrien
335889857Sobrien			    /* Set the bit for the character.  */
335989857Sobrien			    SET_LIST_BIT (str[0]);
336089857Sobrien			  }
336189857Sobrien# ifdef _LIBC
336289857Sobrien			else
336389857Sobrien			  {
336489857Sobrien			    /* Try to match the byte sequence in `str' against
336589857Sobrien			       those known to the collate implementation.
336689857Sobrien			       First find out whether the bytes in `str' are
336789857Sobrien			       actually from exactly one character.  */
336889857Sobrien			    const int32_t *table;
336989857Sobrien			    const unsigned char *weights;
337089857Sobrien			    const unsigned char *extra;
337189857Sobrien			    const int32_t *indirect;
337289857Sobrien			    int32_t idx;
337389857Sobrien			    const unsigned char *cp = str;
337489857Sobrien			    int ch;
337589857Sobrien
337689857Sobrien			    /* This #include defines a local function!  */
337789857Sobrien#  include <locale/weight.h>
337889857Sobrien
337989857Sobrien			    table = (const int32_t *)
338089857Sobrien			      _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
338189857Sobrien			    weights = (const unsigned char *)
338289857Sobrien			      _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
338389857Sobrien			    extra = (const unsigned char *)
338489857Sobrien			      _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
338589857Sobrien			    indirect = (const int32_t *)
338689857Sobrien			      _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
338789857Sobrien
338889857Sobrien			    idx = findidx (&cp);
338989857Sobrien			    if (idx == 0 || cp < str + c1)
339089857Sobrien			      /* This is no valid character.  */
339189857Sobrien			      FREE_STACK_RETURN (REG_ECOLLATE);
339289857Sobrien
339389857Sobrien			    /* Throw away the ] at the end of the equivalence
339489857Sobrien			       class.  */
339589857Sobrien			    PATFETCH (c);
339689857Sobrien
339789857Sobrien			    /* Now we have to go throught the whole table
339889857Sobrien			       and find all characters which have the same
339989857Sobrien			       first level weight.
340089857Sobrien
340189857Sobrien			       XXX Note that this is not entirely correct.
340289857Sobrien			       we would have to match multibyte sequences
340389857Sobrien			       but this is not possible with the current
340489857Sobrien			       implementation.  */
340589857Sobrien			    for (ch = 1; ch < 256; ++ch)
340689857Sobrien			      /* XXX This test would have to be changed if we
340789857Sobrien				 would allow matching multibyte sequences.  */
340889857Sobrien			      if (table[ch] > 0)
340989857Sobrien				{
341089857Sobrien				  int32_t idx2 = table[ch];
341189857Sobrien				  size_t len = weights[idx2];
341289857Sobrien
341389857Sobrien				  /* Test whether the lenghts match.  */
341489857Sobrien				  if (weights[idx] == len)
341589857Sobrien				    {
341689857Sobrien				      /* They do.  New compare the bytes of
341789857Sobrien					 the weight.  */
341889857Sobrien				      size_t cnt = 0;
341989857Sobrien
342089857Sobrien				      while (cnt < len
342189857Sobrien					     && (weights[idx + 1 + cnt]
342289857Sobrien						 == weights[idx2 + 1 + cnt]))
342389857Sobrien					++cnt;
342489857Sobrien
342589857Sobrien				      if (cnt == len)
342689857Sobrien					/* They match.  Mark the character as
342789857Sobrien					   acceptable.  */
342889857Sobrien					SET_LIST_BIT (ch);
342989857Sobrien				    }
343089857Sobrien				}
343189857Sobrien			  }
343289857Sobrien# endif
343389857Sobrien			had_char_class = true;
343489857Sobrien		      }
343589857Sobrien                    else
343689857Sobrien                      {
343789857Sobrien                        c1++;
343889857Sobrien                        while (c1--)
343989857Sobrien                          PATUNFETCH;
344089857Sobrien                        SET_LIST_BIT ('[');
344189857Sobrien                        SET_LIST_BIT ('=');
344289857Sobrien			range_start = '=';
344389857Sobrien                        had_char_class = false;
344489857Sobrien                      }
344589857Sobrien		  }
344689857Sobrien                else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == '.')
344789857Sobrien		  {
344889857Sobrien		    unsigned char str[128];	/* Should be large enough.  */
344989857Sobrien# ifdef _LIBC
345089857Sobrien		    uint32_t nrules =
345189857Sobrien		      _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
345289857Sobrien# endif
345389857Sobrien
345489857Sobrien		    PATFETCH (c);
345589857Sobrien		    c1 = 0;
345689857Sobrien
345789857Sobrien		    /* If pattern is `[[.'.  */
345889857Sobrien		    if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
345989857Sobrien
346089857Sobrien		    for (;;)
346189857Sobrien		      {
346289857Sobrien			PATFETCH (c);
346389857Sobrien			if ((c == '.' && *p == ']') || p == pend)
346489857Sobrien			  break;
346589857Sobrien			if (c1 < sizeof (str))
346689857Sobrien			  str[c1++] = c;
346789857Sobrien			else
346889857Sobrien			  /* This is in any case an invalid class name.  */
346989857Sobrien			  str[0] = '\0';
347089857Sobrien                      }
347189857Sobrien		    str[c1] = '\0';
347289857Sobrien
347389857Sobrien		    if (c == '.' && *p == ']' && str[0] != '\0')
347489857Sobrien		      {
347589857Sobrien			/* If we have no collation data we use the default
347689857Sobrien			   collation in which each character is the name
347789857Sobrien			   for its own class which contains only the one
347889857Sobrien			   character.  It also means that ASCII is the
347989857Sobrien			   character set and therefore we cannot have character
348089857Sobrien			   with more than one byte in the multibyte
348189857Sobrien			   representation.  */
348289857Sobrien# ifdef _LIBC
348389857Sobrien			if (nrules == 0)
348489857Sobrien# endif
348589857Sobrien			  {
348689857Sobrien			    if (c1 != 1)
348789857Sobrien			      FREE_STACK_RETURN (REG_ECOLLATE);
348889857Sobrien
348989857Sobrien			    /* Throw away the ] at the end of the equivalence
349089857Sobrien			       class.  */
349189857Sobrien			    PATFETCH (c);
349289857Sobrien
349389857Sobrien			    /* Set the bit for the character.  */
349489857Sobrien			    SET_LIST_BIT (str[0]);
349589857Sobrien			    range_start = ((const unsigned char *) str)[0];
349689857Sobrien			  }
349789857Sobrien# ifdef _LIBC
349889857Sobrien			else
349989857Sobrien			  {
350089857Sobrien			    /* Try to match the byte sequence in `str' against
350189857Sobrien			       those known to the collate implementation.
350289857Sobrien			       First find out whether the bytes in `str' are
350389857Sobrien			       actually from exactly one character.  */
350489857Sobrien			    int32_t table_size;
350589857Sobrien			    const int32_t *symb_table;
350689857Sobrien			    const unsigned char *extra;
350789857Sobrien			    int32_t idx;
350889857Sobrien			    int32_t elem;
350989857Sobrien			    int32_t second;
351089857Sobrien			    int32_t hash;
351189857Sobrien
351289857Sobrien			    table_size =
351389857Sobrien			      _NL_CURRENT_WORD (LC_COLLATE,
351489857Sobrien						_NL_COLLATE_SYMB_HASH_SIZEMB);
351589857Sobrien			    symb_table = (const int32_t *)
351689857Sobrien			      _NL_CURRENT (LC_COLLATE,
351789857Sobrien					   _NL_COLLATE_SYMB_TABLEMB);
351889857Sobrien			    extra = (const unsigned char *)
351989857Sobrien			      _NL_CURRENT (LC_COLLATE,
352089857Sobrien					   _NL_COLLATE_SYMB_EXTRAMB);
352189857Sobrien
352289857Sobrien			    /* Locate the character in the hashing table.  */
352389857Sobrien			    hash = elem_hash (str, c1);
352489857Sobrien
352589857Sobrien			    idx = 0;
352689857Sobrien			    elem = hash % table_size;
352789857Sobrien			    second = hash % (table_size - 2);
352889857Sobrien			    while (symb_table[2 * elem] != 0)
352989857Sobrien			      {
353089857Sobrien				/* First compare the hashing value.  */
353189857Sobrien				if (symb_table[2 * elem] == hash
353289857Sobrien				    && c1 == extra[symb_table[2 * elem + 1]]
353389857Sobrien				    && memcmp (str,
353489857Sobrien					       &extra[symb_table[2 * elem + 1]
353589857Sobrien						     + 1],
353689857Sobrien					       c1) == 0)
353789857Sobrien				  {
353889857Sobrien				    /* Yep, this is the entry.  */
353989857Sobrien				    idx = symb_table[2 * elem + 1];
354089857Sobrien				    idx += 1 + extra[idx];
354189857Sobrien				    break;
354289857Sobrien				  }
354389857Sobrien
354489857Sobrien				/* Next entry.  */
354589857Sobrien				elem += second;
354689857Sobrien			      }
354789857Sobrien
354889857Sobrien			    if (symb_table[2 * elem] == 0)
354989857Sobrien			      /* This is no valid character.  */
355089857Sobrien			      FREE_STACK_RETURN (REG_ECOLLATE);
355189857Sobrien
355289857Sobrien			    /* Throw away the ] at the end of the equivalence
355389857Sobrien			       class.  */
355489857Sobrien			    PATFETCH (c);
355589857Sobrien
355689857Sobrien			    /* Now add the multibyte character(s) we found
355789857Sobrien			       to the accept list.
355889857Sobrien
355989857Sobrien			       XXX Note that this is not entirely correct.
356089857Sobrien			       we would have to match multibyte sequences
356189857Sobrien			       but this is not possible with the current
356289857Sobrien			       implementation.  Also, we have to match
356389857Sobrien			       collating symbols, which expand to more than
356489857Sobrien			       one file, as a whole and not allow the
356589857Sobrien			       individual bytes.  */
356689857Sobrien			    c1 = extra[idx++];
356789857Sobrien			    if (c1 == 1)
356889857Sobrien			      range_start = extra[idx];
356989857Sobrien			    while (c1-- > 0)
357089857Sobrien			      {
357189857Sobrien				SET_LIST_BIT (extra[idx]);
357289857Sobrien				++idx;
357389857Sobrien			      }
357489857Sobrien			  }
357589857Sobrien# endif
357689857Sobrien			had_char_class = false;
357789857Sobrien		      }
357889857Sobrien                    else
357989857Sobrien                      {
358089857Sobrien                        c1++;
358189857Sobrien                        while (c1--)
358289857Sobrien                          PATUNFETCH;
358389857Sobrien                        SET_LIST_BIT ('[');
358489857Sobrien                        SET_LIST_BIT ('.');
358589857Sobrien			range_start = '.';
358689857Sobrien                        had_char_class = false;
358789857Sobrien                      }
358889857Sobrien		  }
358989857Sobrien                else
359089857Sobrien                  {
359189857Sobrien                    had_char_class = false;
359289857Sobrien                    SET_LIST_BIT (c);
359389857Sobrien		    range_start = c;
359489857Sobrien                  }
359589857Sobrien              }
359689857Sobrien
359789857Sobrien            /* Discard any (non)matching list bytes that are all 0 at the
359889857Sobrien               end of the map.  Decrease the map-length byte too.  */
359989857Sobrien            while ((int) b[-1] > 0 && b[b[-1] - 1] == 0)
360089857Sobrien              b[-1]--;
360189857Sobrien            b += b[-1];
360289857Sobrien#endif /* WCHAR */
360389857Sobrien          }
360489857Sobrien          break;
360589857Sobrien
360689857Sobrien
360789857Sobrien	case '(':
360889857Sobrien          if (syntax & RE_NO_BK_PARENS)
360989857Sobrien            goto handle_open;
361089857Sobrien          else
361189857Sobrien            goto normal_char;
361289857Sobrien
361389857Sobrien
361489857Sobrien        case ')':
361589857Sobrien          if (syntax & RE_NO_BK_PARENS)
361689857Sobrien            goto handle_close;
361789857Sobrien          else
361889857Sobrien            goto normal_char;
361989857Sobrien
362089857Sobrien
362189857Sobrien        case '\n':
362289857Sobrien          if (syntax & RE_NEWLINE_ALT)
362389857Sobrien            goto handle_alt;
362489857Sobrien          else
362589857Sobrien            goto normal_char;
362689857Sobrien
362789857Sobrien
362889857Sobrien	case '|':
362989857Sobrien          if (syntax & RE_NO_BK_VBAR)
363089857Sobrien            goto handle_alt;
363189857Sobrien          else
363289857Sobrien            goto normal_char;
363389857Sobrien
363489857Sobrien
363589857Sobrien        case '{':
363689857Sobrien           if (syntax & RE_INTERVALS && syntax & RE_NO_BK_BRACES)
363789857Sobrien             goto handle_interval;
363889857Sobrien           else
363989857Sobrien             goto normal_char;
364089857Sobrien
364189857Sobrien
364289857Sobrien        case '\\':
364389857Sobrien          if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
364489857Sobrien
364589857Sobrien          /* Do not translate the character after the \, so that we can
364689857Sobrien             distinguish, e.g., \B from \b, even if we normally would
364789857Sobrien             translate, e.g., B to b.  */
364889857Sobrien          PATFETCH_RAW (c);
364989857Sobrien
365089857Sobrien          switch (c)
365189857Sobrien            {
365289857Sobrien            case '(':
365389857Sobrien              if (syntax & RE_NO_BK_PARENS)
365489857Sobrien                goto normal_backslash;
365589857Sobrien
365689857Sobrien            handle_open:
365789857Sobrien              bufp->re_nsub++;
365889857Sobrien              regnum++;
365989857Sobrien
366089857Sobrien              if (COMPILE_STACK_FULL)
366189857Sobrien                {
366289857Sobrien                  RETALLOC (compile_stack.stack, compile_stack.size << 1,
366389857Sobrien                            compile_stack_elt_t);
366489857Sobrien                  if (compile_stack.stack == NULL) return REG_ESPACE;
366589857Sobrien
366689857Sobrien                  compile_stack.size <<= 1;
366789857Sobrien                }
366889857Sobrien
366989857Sobrien              /* These are the values to restore when we hit end of this
367089857Sobrien                 group.  They are all relative offsets, so that if the
367189857Sobrien                 whole pattern moves because of realloc, they will still
367289857Sobrien                 be valid.  */
367389857Sobrien              COMPILE_STACK_TOP.begalt_offset = begalt - COMPILED_BUFFER_VAR;
367489857Sobrien              COMPILE_STACK_TOP.fixup_alt_jump
367589857Sobrien                = fixup_alt_jump ? fixup_alt_jump - COMPILED_BUFFER_VAR + 1 : 0;
367689857Sobrien              COMPILE_STACK_TOP.laststart_offset = b - COMPILED_BUFFER_VAR;
367789857Sobrien              COMPILE_STACK_TOP.regnum = regnum;
367889857Sobrien
367989857Sobrien              /* We will eventually replace the 0 with the number of
368089857Sobrien                 groups inner to this one.  But do not push a
368189857Sobrien                 start_memory for groups beyond the last one we can
368289857Sobrien                 represent in the compiled pattern.  */
368389857Sobrien              if (regnum <= MAX_REGNUM)
368489857Sobrien                {
368589857Sobrien                  COMPILE_STACK_TOP.inner_group_offset = b
368689857Sobrien		    - COMPILED_BUFFER_VAR + 2;
368789857Sobrien                  BUF_PUSH_3 (start_memory, regnum, 0);
368889857Sobrien                }
368989857Sobrien
369089857Sobrien              compile_stack.avail++;
369189857Sobrien
369289857Sobrien              fixup_alt_jump = 0;
369389857Sobrien              laststart = 0;
369489857Sobrien              begalt = b;
369589857Sobrien	      /* If we've reached MAX_REGNUM groups, then this open
369689857Sobrien		 won't actually generate any code, so we'll have to
369789857Sobrien		 clear pending_exact explicitly.  */
369889857Sobrien	      pending_exact = 0;
369989857Sobrien              break;
370089857Sobrien
370189857Sobrien
370289857Sobrien            case ')':
370389857Sobrien              if (syntax & RE_NO_BK_PARENS) goto normal_backslash;
370489857Sobrien
370589857Sobrien              if (COMPILE_STACK_EMPTY)
370689857Sobrien		{
370789857Sobrien		  if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
370889857Sobrien		    goto normal_backslash;
370989857Sobrien		  else
371089857Sobrien		    FREE_STACK_RETURN (REG_ERPAREN);
371189857Sobrien		}
371289857Sobrien
371389857Sobrien            handle_close:
371489857Sobrien              if (fixup_alt_jump)
371589857Sobrien                { /* Push a dummy failure point at the end of the
371689857Sobrien                     alternative for a possible future
371789857Sobrien                     `pop_failure_jump' to pop.  See comments at
371889857Sobrien                     `push_dummy_failure' in `re_match_2'.  */
371989857Sobrien                  BUF_PUSH (push_dummy_failure);
372089857Sobrien
372189857Sobrien                  /* We allocated space for this jump when we assigned
372289857Sobrien                     to `fixup_alt_jump', in the `handle_alt' case below.  */
372389857Sobrien                  STORE_JUMP (jump_past_alt, fixup_alt_jump, b - 1);
372489857Sobrien                }
372589857Sobrien
372689857Sobrien              /* See similar code for backslashed left paren above.  */
372789857Sobrien              if (COMPILE_STACK_EMPTY)
372889857Sobrien		{
372989857Sobrien		  if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
373089857Sobrien		    goto normal_char;
373189857Sobrien		  else
373289857Sobrien		    FREE_STACK_RETURN (REG_ERPAREN);
373389857Sobrien		}
373489857Sobrien
373589857Sobrien              /* Since we just checked for an empty stack above, this
373689857Sobrien                 ``can't happen''.  */
373789857Sobrien              assert (compile_stack.avail != 0);
373889857Sobrien              {
373989857Sobrien                /* We don't just want to restore into `regnum', because
374089857Sobrien                   later groups should continue to be numbered higher,
374189857Sobrien                   as in `(ab)c(de)' -- the second group is #2.  */
374289857Sobrien                regnum_t this_group_regnum;
374389857Sobrien
374489857Sobrien                compile_stack.avail--;
374589857Sobrien                begalt = COMPILED_BUFFER_VAR + COMPILE_STACK_TOP.begalt_offset;
374689857Sobrien                fixup_alt_jump
374789857Sobrien                  = COMPILE_STACK_TOP.fixup_alt_jump
374889857Sobrien                    ? COMPILED_BUFFER_VAR + COMPILE_STACK_TOP.fixup_alt_jump - 1
374989857Sobrien                    : 0;
375089857Sobrien                laststart = COMPILED_BUFFER_VAR + COMPILE_STACK_TOP.laststart_offset;
375189857Sobrien                this_group_regnum = COMPILE_STACK_TOP.regnum;
375289857Sobrien		/* If we've reached MAX_REGNUM groups, then this open
375389857Sobrien		   won't actually generate any code, so we'll have to
375489857Sobrien		   clear pending_exact explicitly.  */
375589857Sobrien		pending_exact = 0;
375689857Sobrien
375789857Sobrien                /* We're at the end of the group, so now we know how many
375889857Sobrien                   groups were inside this one.  */
375989857Sobrien                if (this_group_regnum <= MAX_REGNUM)
376089857Sobrien                  {
376189857Sobrien		    UCHAR_T *inner_group_loc
376289857Sobrien                      = COMPILED_BUFFER_VAR + COMPILE_STACK_TOP.inner_group_offset;
376389857Sobrien
376489857Sobrien                    *inner_group_loc = regnum - this_group_regnum;
376589857Sobrien                    BUF_PUSH_3 (stop_memory, this_group_regnum,
376689857Sobrien                                regnum - this_group_regnum);
376789857Sobrien                  }
376889857Sobrien              }
376989857Sobrien              break;
377089857Sobrien
377189857Sobrien
377289857Sobrien            case '|':					/* `\|'.  */
377389857Sobrien              if (syntax & RE_LIMITED_OPS || syntax & RE_NO_BK_VBAR)
377489857Sobrien                goto normal_backslash;
377589857Sobrien            handle_alt:
377689857Sobrien              if (syntax & RE_LIMITED_OPS)
377789857Sobrien                goto normal_char;
377889857Sobrien
377989857Sobrien              /* Insert before the previous alternative a jump which
378089857Sobrien                 jumps to this alternative if the former fails.  */
378189857Sobrien              GET_BUFFER_SPACE (1 + OFFSET_ADDRESS_SIZE);
378289857Sobrien              INSERT_JUMP (on_failure_jump, begalt,
378389857Sobrien			   b + 2 + 2 * OFFSET_ADDRESS_SIZE);
378489857Sobrien              pending_exact = 0;
378589857Sobrien              b += 1 + OFFSET_ADDRESS_SIZE;
378689857Sobrien
378789857Sobrien              /* The alternative before this one has a jump after it
378889857Sobrien                 which gets executed if it gets matched.  Adjust that
378989857Sobrien                 jump so it will jump to this alternative's analogous
379089857Sobrien                 jump (put in below, which in turn will jump to the next
379189857Sobrien                 (if any) alternative's such jump, etc.).  The last such
379289857Sobrien                 jump jumps to the correct final destination.  A picture:
379389857Sobrien                          _____ _____
379489857Sobrien                          |   | |   |
379589857Sobrien                          |   v |   v
379689857Sobrien                         a | b   | c
379789857Sobrien
379889857Sobrien                 If we are at `b', then fixup_alt_jump right now points to a
379989857Sobrien                 three-byte space after `a'.  We'll put in the jump, set
380089857Sobrien                 fixup_alt_jump to right after `b', and leave behind three
380189857Sobrien                 bytes which we'll fill in when we get to after `c'.  */
380289857Sobrien
380389857Sobrien              if (fixup_alt_jump)
380489857Sobrien                STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
380589857Sobrien
380689857Sobrien              /* Mark and leave space for a jump after this alternative,
380789857Sobrien                 to be filled in later either by next alternative or
380889857Sobrien                 when know we're at the end of a series of alternatives.  */
380989857Sobrien              fixup_alt_jump = b;
381089857Sobrien              GET_BUFFER_SPACE (1 + OFFSET_ADDRESS_SIZE);
381189857Sobrien              b += 1 + OFFSET_ADDRESS_SIZE;
381289857Sobrien
381389857Sobrien              laststart = 0;
381489857Sobrien              begalt = b;
381589857Sobrien              break;
381689857Sobrien
381789857Sobrien
381889857Sobrien            case '{':
381989857Sobrien              /* If \{ is a literal.  */
382089857Sobrien              if (!(syntax & RE_INTERVALS)
382189857Sobrien                     /* If we're at `\{' and it's not the open-interval
382289857Sobrien                        operator.  */
382389857Sobrien		  || (syntax & RE_NO_BK_BRACES))
382489857Sobrien                goto normal_backslash;
382589857Sobrien
382689857Sobrien            handle_interval:
382789857Sobrien              {
382889857Sobrien                /* If got here, then the syntax allows intervals.  */
382989857Sobrien
383089857Sobrien                /* At least (most) this many matches must be made.  */
383189857Sobrien                int lower_bound = -1, upper_bound = -1;
383289857Sobrien
383389857Sobrien		/* Place in the uncompiled pattern (i.e., just after
383489857Sobrien		   the '{') to go back to if the interval is invalid.  */
383589857Sobrien		const CHAR_T *beg_interval = p;
383689857Sobrien
383789857Sobrien                if (p == pend)
383889857Sobrien		  goto invalid_interval;
383989857Sobrien
384089857Sobrien                GET_UNSIGNED_NUMBER (lower_bound);
384189857Sobrien
384289857Sobrien                if (c == ',')
384389857Sobrien                  {
384489857Sobrien                    GET_UNSIGNED_NUMBER (upper_bound);
384589857Sobrien		    if (upper_bound < 0)
384689857Sobrien		      upper_bound = RE_DUP_MAX;
384789857Sobrien                  }
384889857Sobrien                else
384989857Sobrien                  /* Interval such as `{1}' => match exactly once. */
385089857Sobrien                  upper_bound = lower_bound;
385189857Sobrien
385289857Sobrien                if (! (0 <= lower_bound && lower_bound <= upper_bound))
385389857Sobrien		  goto invalid_interval;
385489857Sobrien
385589857Sobrien                if (!(syntax & RE_NO_BK_BRACES))
385689857Sobrien                  {
385789857Sobrien		    if (c != '\\' || p == pend)
385889857Sobrien		      goto invalid_interval;
385989857Sobrien                    PATFETCH (c);
386089857Sobrien                  }
386189857Sobrien
386289857Sobrien                if (c != '}')
386389857Sobrien		  goto invalid_interval;
386489857Sobrien
386589857Sobrien                /* If it's invalid to have no preceding re.  */
386689857Sobrien                if (!laststart)
386789857Sobrien                  {
386889857Sobrien		    if (syntax & RE_CONTEXT_INVALID_OPS
386989857Sobrien			&& !(syntax & RE_INVALID_INTERVAL_ORD))
387089857Sobrien                      FREE_STACK_RETURN (REG_BADRPT);
387189857Sobrien                    else if (syntax & RE_CONTEXT_INDEP_OPS)
387289857Sobrien                      laststart = b;
387389857Sobrien                    else
387489857Sobrien                      goto unfetch_interval;
387589857Sobrien                  }
387689857Sobrien
387789857Sobrien                /* We just parsed a valid interval.  */
387889857Sobrien
387989857Sobrien                if (RE_DUP_MAX < upper_bound)
388089857Sobrien		  FREE_STACK_RETURN (REG_BADBR);
388189857Sobrien
388289857Sobrien                /* If the upper bound is zero, don't want to succeed at
388389857Sobrien                   all; jump from `laststart' to `b + 3', which will be
388489857Sobrien		   the end of the buffer after we insert the jump.  */
388589857Sobrien		/* ifdef WCHAR, 'b + 1 + OFFSET_ADDRESS_SIZE'
388689857Sobrien		   instead of 'b + 3'.  */
388789857Sobrien                 if (upper_bound == 0)
388889857Sobrien                   {
388989857Sobrien                     GET_BUFFER_SPACE (1 + OFFSET_ADDRESS_SIZE);
389089857Sobrien                     INSERT_JUMP (jump, laststart, b + 1
389189857Sobrien				  + OFFSET_ADDRESS_SIZE);
389289857Sobrien                     b += 1 + OFFSET_ADDRESS_SIZE;
389389857Sobrien                   }
389489857Sobrien
389589857Sobrien                 /* Otherwise, we have a nontrivial interval.  When
389689857Sobrien                    we're all done, the pattern will look like:
389789857Sobrien                      set_number_at <jump count> <upper bound>
389889857Sobrien                      set_number_at <succeed_n count> <lower bound>
389989857Sobrien                      succeed_n <after jump addr> <succeed_n count>
390089857Sobrien                      <body of loop>
390189857Sobrien                      jump_n <succeed_n addr> <jump count>
390289857Sobrien                    (The upper bound and `jump_n' are omitted if
390389857Sobrien                    `upper_bound' is 1, though.)  */
390489857Sobrien                 else
390589857Sobrien                   { /* If the upper bound is > 1, we need to insert
390689857Sobrien                        more at the end of the loop.  */
390789857Sobrien                     unsigned nbytes = 2 + 4 * OFFSET_ADDRESS_SIZE +
390889857Sobrien		       (upper_bound > 1) * (2 + 4 * OFFSET_ADDRESS_SIZE);
390989857Sobrien
391089857Sobrien                     GET_BUFFER_SPACE (nbytes);
391189857Sobrien
391289857Sobrien                     /* Initialize lower bound of the `succeed_n', even
391389857Sobrien                        though it will be set during matching by its
391489857Sobrien                        attendant `set_number_at' (inserted next),
391589857Sobrien                        because `re_compile_fastmap' needs to know.
391689857Sobrien                        Jump to the `jump_n' we might insert below.  */
391789857Sobrien                     INSERT_JUMP2 (succeed_n, laststart,
391889857Sobrien                                   b + 1 + 2 * OFFSET_ADDRESS_SIZE
391989857Sobrien				   + (upper_bound > 1) * (1 + 2 * OFFSET_ADDRESS_SIZE)
392089857Sobrien				   , lower_bound);
392189857Sobrien                     b += 1 + 2 * OFFSET_ADDRESS_SIZE;
392289857Sobrien
392389857Sobrien                     /* Code to initialize the lower bound.  Insert
392489857Sobrien                        before the `succeed_n'.  The `5' is the last two
392589857Sobrien                        bytes of this `set_number_at', plus 3 bytes of
392689857Sobrien                        the following `succeed_n'.  */
392789857Sobrien		     /* ifdef WCHAR, The '1+2*OFFSET_ADDRESS_SIZE'
392889857Sobrien			is the 'set_number_at', plus '1+OFFSET_ADDRESS_SIZE'
392989857Sobrien			of the following `succeed_n'.  */
393089857Sobrien                     PREFIX(insert_op2) (set_number_at, laststart, 1
393189857Sobrien				 + 2 * OFFSET_ADDRESS_SIZE, lower_bound, b);
393289857Sobrien                     b += 1 + 2 * OFFSET_ADDRESS_SIZE;
393389857Sobrien
393489857Sobrien                     if (upper_bound > 1)
393589857Sobrien                       { /* More than one repetition is allowed, so
393689857Sobrien                            append a backward jump to the `succeed_n'
393789857Sobrien                            that starts this interval.
393889857Sobrien
393989857Sobrien                            When we've reached this during matching,
394089857Sobrien                            we'll have matched the interval once, so
394189857Sobrien                            jump back only `upper_bound - 1' times.  */
394289857Sobrien                         STORE_JUMP2 (jump_n, b, laststart
394389857Sobrien				      + 2 * OFFSET_ADDRESS_SIZE + 1,
394489857Sobrien                                      upper_bound - 1);
394589857Sobrien                         b += 1 + 2 * OFFSET_ADDRESS_SIZE;
394689857Sobrien
394789857Sobrien                         /* The location we want to set is the second
394889857Sobrien                            parameter of the `jump_n'; that is `b-2' as
394989857Sobrien                            an absolute address.  `laststart' will be
395089857Sobrien                            the `set_number_at' we're about to insert;
395189857Sobrien                            `laststart+3' the number to set, the source
395289857Sobrien                            for the relative address.  But we are
395389857Sobrien                            inserting into the middle of the pattern --
395489857Sobrien                            so everything is getting moved up by 5.
395589857Sobrien                            Conclusion: (b - 2) - (laststart + 3) + 5,
395689857Sobrien                            i.e., b - laststart.
395789857Sobrien
395889857Sobrien                            We insert this at the beginning of the loop
395989857Sobrien                            so that if we fail during matching, we'll
396089857Sobrien                            reinitialize the bounds.  */
396189857Sobrien                         PREFIX(insert_op2) (set_number_at, laststart,
396289857Sobrien					     b - laststart,
396389857Sobrien					     upper_bound - 1, b);
396489857Sobrien                         b += 1 + 2 * OFFSET_ADDRESS_SIZE;
396589857Sobrien                       }
396689857Sobrien                   }
396789857Sobrien                pending_exact = 0;
396889857Sobrien		break;
396989857Sobrien
397089857Sobrien	      invalid_interval:
397189857Sobrien		if (!(syntax & RE_INVALID_INTERVAL_ORD))
397289857Sobrien		  FREE_STACK_RETURN (p == pend ? REG_EBRACE : REG_BADBR);
397389857Sobrien	      unfetch_interval:
397489857Sobrien		/* Match the characters as literals.  */
397589857Sobrien		p = beg_interval;
397689857Sobrien		c = '{';
397789857Sobrien		if (syntax & RE_NO_BK_BRACES)
397889857Sobrien		  goto normal_char;
397989857Sobrien		else
398089857Sobrien		  goto normal_backslash;
398189857Sobrien	      }
398289857Sobrien
398389857Sobrien#ifdef emacs
398489857Sobrien            /* There is no way to specify the before_dot and after_dot
398589857Sobrien               operators.  rms says this is ok.  --karl  */
398689857Sobrien            case '=':
398789857Sobrien              BUF_PUSH (at_dot);
398889857Sobrien              break;
398989857Sobrien
399089857Sobrien            case 's':
399189857Sobrien              laststart = b;
399289857Sobrien              PATFETCH (c);
399389857Sobrien              BUF_PUSH_2 (syntaxspec, syntax_spec_code[c]);
399489857Sobrien              break;
399589857Sobrien
399689857Sobrien            case 'S':
399789857Sobrien              laststart = b;
399889857Sobrien              PATFETCH (c);
399989857Sobrien              BUF_PUSH_2 (notsyntaxspec, syntax_spec_code[c]);
400089857Sobrien              break;
400189857Sobrien#endif /* emacs */
400289857Sobrien
400389857Sobrien
400489857Sobrien            case 'w':
400589857Sobrien	      if (syntax & RE_NO_GNU_OPS)
400689857Sobrien		goto normal_char;
400789857Sobrien              laststart = b;
400889857Sobrien              BUF_PUSH (wordchar);
400989857Sobrien              break;
401089857Sobrien
401189857Sobrien
401289857Sobrien            case 'W':
401389857Sobrien	      if (syntax & RE_NO_GNU_OPS)
401489857Sobrien		goto normal_char;
401589857Sobrien              laststart = b;
401689857Sobrien              BUF_PUSH (notwordchar);
401789857Sobrien              break;
401889857Sobrien
401989857Sobrien
402089857Sobrien            case '<':
402189857Sobrien	      if (syntax & RE_NO_GNU_OPS)
402289857Sobrien		goto normal_char;
402389857Sobrien              BUF_PUSH (wordbeg);
402489857Sobrien              break;
402589857Sobrien
402689857Sobrien            case '>':
402789857Sobrien	      if (syntax & RE_NO_GNU_OPS)
402889857Sobrien		goto normal_char;
402989857Sobrien              BUF_PUSH (wordend);
403089857Sobrien              break;
403189857Sobrien
403289857Sobrien            case 'b':
403389857Sobrien	      if (syntax & RE_NO_GNU_OPS)
403489857Sobrien		goto normal_char;
403589857Sobrien              BUF_PUSH (wordbound);
403689857Sobrien              break;
403789857Sobrien
403889857Sobrien            case 'B':
403989857Sobrien	      if (syntax & RE_NO_GNU_OPS)
404089857Sobrien		goto normal_char;
404189857Sobrien              BUF_PUSH (notwordbound);
404289857Sobrien              break;
404389857Sobrien
404489857Sobrien            case '`':
404589857Sobrien	      if (syntax & RE_NO_GNU_OPS)
404689857Sobrien		goto normal_char;
404789857Sobrien              BUF_PUSH (begbuf);
404889857Sobrien              break;
404989857Sobrien
405089857Sobrien            case '\'':
405189857Sobrien	      if (syntax & RE_NO_GNU_OPS)
405289857Sobrien		goto normal_char;
405389857Sobrien              BUF_PUSH (endbuf);
405489857Sobrien              break;
405589857Sobrien
405689857Sobrien            case '1': case '2': case '3': case '4': case '5':
405789857Sobrien            case '6': case '7': case '8': case '9':
405889857Sobrien              if (syntax & RE_NO_BK_REFS)
405989857Sobrien                goto normal_char;
406089857Sobrien
406189857Sobrien              c1 = c - '0';
406289857Sobrien
406389857Sobrien              if (c1 > regnum)
406489857Sobrien                FREE_STACK_RETURN (REG_ESUBREG);
406589857Sobrien
406689857Sobrien              /* Can't back reference to a subexpression if inside of it.  */
406789857Sobrien              if (group_in_compile_stack (compile_stack, (regnum_t) c1))
406889857Sobrien                goto normal_char;
406989857Sobrien
407089857Sobrien              laststart = b;
407189857Sobrien              BUF_PUSH_2 (duplicate, c1);
407289857Sobrien              break;
407389857Sobrien
407489857Sobrien
407589857Sobrien            case '+':
407689857Sobrien            case '?':
407789857Sobrien              if (syntax & RE_BK_PLUS_QM)
407889857Sobrien                goto handle_plus;
407989857Sobrien              else
408089857Sobrien                goto normal_backslash;
408189857Sobrien
408289857Sobrien            default:
408389857Sobrien            normal_backslash:
408489857Sobrien              /* You might think it would be useful for \ to mean
408589857Sobrien                 not to translate; but if we don't translate it
408689857Sobrien                 it will never match anything.  */
408789857Sobrien              c = TRANSLATE (c);
408889857Sobrien              goto normal_char;
408989857Sobrien            }
409089857Sobrien          break;
409189857Sobrien
409289857Sobrien
409389857Sobrien	default:
409489857Sobrien        /* Expects the character in `c'.  */
409589857Sobrien	normal_char:
409689857Sobrien	      /* If no exactn currently being built.  */
409789857Sobrien          if (!pending_exact
409889857Sobrien#ifdef WCHAR
409989857Sobrien	      /* If last exactn handle binary(or character) and
410089857Sobrien		 new exactn handle character(or binary).  */
410189857Sobrien	      || is_exactn_bin != is_binary[p - 1 - pattern]
410289857Sobrien#endif /* WCHAR */
410389857Sobrien
410489857Sobrien              /* If last exactn not at current position.  */
410589857Sobrien              || pending_exact + *pending_exact + 1 != b
410689857Sobrien
410789857Sobrien              /* We have only one byte following the exactn for the count.  */
410889857Sobrien	      || *pending_exact == (1 << BYTEWIDTH) - 1
410989857Sobrien
411089857Sobrien              /* If followed by a repetition operator.  */
411189857Sobrien              || *p == '*' || *p == '^'
411289857Sobrien	      || ((syntax & RE_BK_PLUS_QM)
411389857Sobrien		  ? *p == '\\' && (p[1] == '+' || p[1] == '?')
411489857Sobrien		  : (*p == '+' || *p == '?'))
411589857Sobrien	      || ((syntax & RE_INTERVALS)
411689857Sobrien                  && ((syntax & RE_NO_BK_BRACES)
411789857Sobrien		      ? *p == '{'
411889857Sobrien                      : (p[0] == '\\' && p[1] == '{'))))
411989857Sobrien	    {
412089857Sobrien	      /* Start building a new exactn.  */
412189857Sobrien
412289857Sobrien              laststart = b;
412389857Sobrien
412489857Sobrien#ifdef WCHAR
412589857Sobrien	      /* Is this exactn binary data or character? */
412689857Sobrien	      is_exactn_bin = is_binary[p - 1 - pattern];
412789857Sobrien	      if (is_exactn_bin)
412889857Sobrien		  BUF_PUSH_2 (exactn_bin, 0);
412989857Sobrien	      else
413089857Sobrien		  BUF_PUSH_2 (exactn, 0);
413189857Sobrien#else
413289857Sobrien	      BUF_PUSH_2 (exactn, 0);
413389857Sobrien#endif /* WCHAR */
413489857Sobrien	      pending_exact = b - 1;
413589857Sobrien            }
413689857Sobrien
413789857Sobrien	  BUF_PUSH (c);
413889857Sobrien          (*pending_exact)++;
413989857Sobrien	  break;
414089857Sobrien        } /* switch (c) */
414189857Sobrien    } /* while p != pend */
414289857Sobrien
414389857Sobrien
414489857Sobrien  /* Through the pattern now.  */
414589857Sobrien
414689857Sobrien  if (fixup_alt_jump)
414789857Sobrien    STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
414889857Sobrien
414989857Sobrien  if (!COMPILE_STACK_EMPTY)
415089857Sobrien    FREE_STACK_RETURN (REG_EPAREN);
415189857Sobrien
415289857Sobrien  /* If we don't want backtracking, force success
415389857Sobrien     the first time we reach the end of the compiled pattern.  */
415489857Sobrien  if (syntax & RE_NO_POSIX_BACKTRACKING)
415589857Sobrien    BUF_PUSH (succeed);
415689857Sobrien
415789857Sobrien#ifdef WCHAR
415889857Sobrien  free (pattern);
415989857Sobrien  free (mbs_offset);
416089857Sobrien  free (is_binary);
416189857Sobrien#endif
416289857Sobrien  free (compile_stack.stack);
416389857Sobrien
416489857Sobrien  /* We have succeeded; set the length of the buffer.  */
416589857Sobrien#ifdef WCHAR
416689857Sobrien  bufp->used = (uintptr_t) b - (uintptr_t) COMPILED_BUFFER_VAR;
416789857Sobrien#else
416889857Sobrien  bufp->used = b - bufp->buffer;
416989857Sobrien#endif
417089857Sobrien
417189857Sobrien#ifdef DEBUG
417289857Sobrien  if (debug)
417389857Sobrien    {
417489857Sobrien      DEBUG_PRINT1 ("\nCompiled pattern: \n");
417589857Sobrien      PREFIX(print_compiled_pattern) (bufp);
417689857Sobrien    }
417789857Sobrien#endif /* DEBUG */
417889857Sobrien
417989857Sobrien#ifndef MATCH_MAY_ALLOCATE
418089857Sobrien  /* Initialize the failure stack to the largest possible stack.  This
418189857Sobrien     isn't necessary unless we're trying to avoid calling alloca in
418289857Sobrien     the search and match routines.  */
418389857Sobrien  {
418489857Sobrien    int num_regs = bufp->re_nsub + 1;
418589857Sobrien
418689857Sobrien    /* Since DOUBLE_FAIL_STACK refuses to double only if the current size
418789857Sobrien       is strictly greater than re_max_failures, the largest possible stack
418889857Sobrien       is 2 * re_max_failures failure points.  */
418989857Sobrien    if (fail_stack.size < (2 * re_max_failures * MAX_FAILURE_ITEMS))
419089857Sobrien      {
419189857Sobrien	fail_stack.size = (2 * re_max_failures * MAX_FAILURE_ITEMS);
419289857Sobrien
419389857Sobrien# ifdef emacs
419489857Sobrien	if (! fail_stack.stack)
419589857Sobrien	  fail_stack.stack
419689857Sobrien	    = (PREFIX(fail_stack_elt_t) *) xmalloc (fail_stack.size
419789857Sobrien				    * sizeof (PREFIX(fail_stack_elt_t)));
419889857Sobrien	else
419989857Sobrien	  fail_stack.stack
420089857Sobrien	    = (PREFIX(fail_stack_elt_t) *) xrealloc (fail_stack.stack,
420189857Sobrien				     (fail_stack.size
420289857Sobrien				      * sizeof (PREFIX(fail_stack_elt_t))));
420389857Sobrien# else /* not emacs */
420489857Sobrien	if (! fail_stack.stack)
420589857Sobrien	  fail_stack.stack
420689857Sobrien	    = (PREFIX(fail_stack_elt_t) *) malloc (fail_stack.size
420789857Sobrien				   * sizeof (PREFIX(fail_stack_elt_t)));
420889857Sobrien	else
420989857Sobrien	  fail_stack.stack
421089857Sobrien	    = (PREFIX(fail_stack_elt_t) *) realloc (fail_stack.stack,
421189857Sobrien					    (fail_stack.size
421289857Sobrien				     * sizeof (PREFIX(fail_stack_elt_t))));
421389857Sobrien# endif /* not emacs */
421489857Sobrien      }
421589857Sobrien
421689857Sobrien   PREFIX(regex_grow_registers) (num_regs);
421789857Sobrien  }
421889857Sobrien#endif /* not MATCH_MAY_ALLOCATE */
421989857Sobrien
422089857Sobrien  return REG_NOERROR;
422189857Sobrien} /* regex_compile */
422289857Sobrien
422389857Sobrien/* Subroutines for `regex_compile'.  */
422489857Sobrien
422589857Sobrien/* Store OP at LOC followed by two-byte integer parameter ARG.  */
422689857Sobrien/* ifdef WCHAR, integer parameter is 1 wchar_t.  */
422789857Sobrien
422889857Sobrienstatic void
4229218822SdimPREFIX(store_op1) (re_opcode_t op, UCHAR_T *loc, int arg)
423089857Sobrien{
423189857Sobrien  *loc = (UCHAR_T) op;
423289857Sobrien  STORE_NUMBER (loc + 1, arg);
423389857Sobrien}
423489857Sobrien
423589857Sobrien
423689857Sobrien/* Like `store_op1', but for two two-byte parameters ARG1 and ARG2.  */
423789857Sobrien/* ifdef WCHAR, integer parameter is 1 wchar_t.  */
423889857Sobrien
423989857Sobrienstatic void
4240218822SdimPREFIX(store_op2) (re_opcode_t op, UCHAR_T *loc, int arg1, int arg2)
424189857Sobrien{
424289857Sobrien  *loc = (UCHAR_T) op;
424389857Sobrien  STORE_NUMBER (loc + 1, arg1);
424489857Sobrien  STORE_NUMBER (loc + 1 + OFFSET_ADDRESS_SIZE, arg2);
424589857Sobrien}
424689857Sobrien
424789857Sobrien
424889857Sobrien/* Copy the bytes from LOC to END to open up three bytes of space at LOC
424989857Sobrien   for OP followed by two-byte integer parameter ARG.  */
425089857Sobrien/* ifdef WCHAR, integer parameter is 1 wchar_t.  */
425189857Sobrien
425289857Sobrienstatic void
4253218822SdimPREFIX(insert_op1) (re_opcode_t op, UCHAR_T *loc, int arg, UCHAR_T *end)
425489857Sobrien{
425589857Sobrien  register UCHAR_T *pfrom = end;
425689857Sobrien  register UCHAR_T *pto = end + 1 + OFFSET_ADDRESS_SIZE;
425789857Sobrien
425889857Sobrien  while (pfrom != loc)
425989857Sobrien    *--pto = *--pfrom;
426089857Sobrien
426189857Sobrien  PREFIX(store_op1) (op, loc, arg);
426289857Sobrien}
426389857Sobrien
426489857Sobrien
426589857Sobrien/* Like `insert_op1', but for two two-byte parameters ARG1 and ARG2.  */
426689857Sobrien/* ifdef WCHAR, integer parameter is 1 wchar_t.  */
426789857Sobrien
426889857Sobrienstatic void
4269218822SdimPREFIX(insert_op2) (re_opcode_t op, UCHAR_T *loc, int arg1,
4270218822Sdim                    int arg2, UCHAR_T *end)
427189857Sobrien{
427289857Sobrien  register UCHAR_T *pfrom = end;
427389857Sobrien  register UCHAR_T *pto = end + 1 + 2 * OFFSET_ADDRESS_SIZE;
427489857Sobrien
427589857Sobrien  while (pfrom != loc)
427689857Sobrien    *--pto = *--pfrom;
427789857Sobrien
427889857Sobrien  PREFIX(store_op2) (op, loc, arg1, arg2);
427989857Sobrien}
428089857Sobrien
428189857Sobrien
428289857Sobrien/* P points to just after a ^ in PATTERN.  Return true if that ^ comes
428389857Sobrien   after an alternative or a begin-subexpression.  We assume there is at
428489857Sobrien   least one character before the ^.  */
428589857Sobrien
428689857Sobrienstatic boolean
4287218822SdimPREFIX(at_begline_loc_p) (const CHAR_T *pattern, const CHAR_T *p,
4288218822Sdim                          reg_syntax_t syntax)
428989857Sobrien{
429089857Sobrien  const CHAR_T *prev = p - 2;
429189857Sobrien  boolean prev_prev_backslash = prev > pattern && prev[-1] == '\\';
429289857Sobrien
429389857Sobrien  return
429489857Sobrien       /* After a subexpression?  */
429589857Sobrien       (*prev == '(' && (syntax & RE_NO_BK_PARENS || prev_prev_backslash))
429689857Sobrien       /* After an alternative?  */
429789857Sobrien    || (*prev == '|' && (syntax & RE_NO_BK_VBAR || prev_prev_backslash));
429889857Sobrien}
429989857Sobrien
430089857Sobrien
430189857Sobrien/* The dual of at_begline_loc_p.  This one is for $.  We assume there is
430289857Sobrien   at least one character after the $, i.e., `P < PEND'.  */
430389857Sobrien
430489857Sobrienstatic boolean
4305218822SdimPREFIX(at_endline_loc_p) (const CHAR_T *p, const CHAR_T *pend,
4306218822Sdim                          reg_syntax_t syntax)
430789857Sobrien{
430889857Sobrien  const CHAR_T *next = p;
430989857Sobrien  boolean next_backslash = *next == '\\';
431089857Sobrien  const CHAR_T *next_next = p + 1 < pend ? p + 1 : 0;
431189857Sobrien
431289857Sobrien  return
431389857Sobrien       /* Before a subexpression?  */
431489857Sobrien       (syntax & RE_NO_BK_PARENS ? *next == ')'
431589857Sobrien        : next_backslash && next_next && *next_next == ')')
431689857Sobrien       /* Before an alternative?  */
431789857Sobrien    || (syntax & RE_NO_BK_VBAR ? *next == '|'
431889857Sobrien        : next_backslash && next_next && *next_next == '|');
431989857Sobrien}
432089857Sobrien
432189857Sobrien#else /* not INSIDE_RECURSION */
432289857Sobrien
432389857Sobrien/* Returns true if REGNUM is in one of COMPILE_STACK's elements and
432489857Sobrien   false if it's not.  */
432589857Sobrien
432689857Sobrienstatic boolean
4327218822Sdimgroup_in_compile_stack (compile_stack_type compile_stack, regnum_t regnum)
432889857Sobrien{
432989857Sobrien  int this_element;
433089857Sobrien
433189857Sobrien  for (this_element = compile_stack.avail - 1;
433289857Sobrien       this_element >= 0;
433389857Sobrien       this_element--)
433489857Sobrien    if (compile_stack.stack[this_element].regnum == regnum)
433589857Sobrien      return true;
433689857Sobrien
433789857Sobrien  return false;
433889857Sobrien}
433989857Sobrien#endif /* not INSIDE_RECURSION */
434089857Sobrien
434189857Sobrien#ifdef INSIDE_RECURSION
434289857Sobrien
434389857Sobrien#ifdef WCHAR
434489857Sobrien/* This insert space, which size is "num", into the pattern at "loc".
434589857Sobrien   "end" must point the end of the allocated buffer.  */
434689857Sobrienstatic void
4347218822Sdiminsert_space (int num, CHAR_T *loc, CHAR_T *end)
434889857Sobrien{
434989857Sobrien  register CHAR_T *pto = end;
435089857Sobrien  register CHAR_T *pfrom = end - num;
435189857Sobrien
435289857Sobrien  while (pfrom >= loc)
435389857Sobrien    *pto-- = *pfrom--;
435489857Sobrien}
435589857Sobrien#endif /* WCHAR */
435689857Sobrien
435789857Sobrien#ifdef WCHAR
435889857Sobrienstatic reg_errcode_t
4359218822Sdimwcs_compile_range (CHAR_T range_start_char, const CHAR_T **p_ptr,
4360218822Sdim                   const CHAR_T *pend, RE_TRANSLATE_TYPE translate,
4361218822Sdim                   reg_syntax_t syntax, CHAR_T *b, CHAR_T *char_set)
436289857Sobrien{
436389857Sobrien  const CHAR_T *p = *p_ptr;
436489857Sobrien  CHAR_T range_start, range_end;
436589857Sobrien  reg_errcode_t ret;
436689857Sobrien# ifdef _LIBC
436789857Sobrien  uint32_t nrules;
436889857Sobrien  uint32_t start_val, end_val;
436989857Sobrien# endif
437089857Sobrien  if (p == pend)
437189857Sobrien    return REG_ERANGE;
437289857Sobrien
437389857Sobrien# ifdef _LIBC
437489857Sobrien  nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
437589857Sobrien  if (nrules != 0)
437689857Sobrien    {
437789857Sobrien      const char *collseq = (const char *) _NL_CURRENT(LC_COLLATE,
437889857Sobrien						       _NL_COLLATE_COLLSEQWC);
437989857Sobrien      const unsigned char *extra = (const unsigned char *)
438089857Sobrien	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
438189857Sobrien
438289857Sobrien      if (range_start_char < -1)
438389857Sobrien	{
438489857Sobrien	  /* range_start is a collating symbol.  */
438589857Sobrien	  int32_t *wextra;
438689857Sobrien	  /* Retreive the index and get collation sequence value.  */
438789857Sobrien	  wextra = (int32_t*)(extra + char_set[-range_start_char]);
438889857Sobrien	  start_val = wextra[1 + *wextra];
438989857Sobrien	}
439089857Sobrien      else
439189857Sobrien	start_val = collseq_table_lookup(collseq, TRANSLATE(range_start_char));
439289857Sobrien
439389857Sobrien      end_val = collseq_table_lookup (collseq, TRANSLATE (p[0]));
439489857Sobrien
439589857Sobrien      /* Report an error if the range is empty and the syntax prohibits
439689857Sobrien	 this.  */
439789857Sobrien      ret = ((syntax & RE_NO_EMPTY_RANGES)
439889857Sobrien	     && (start_val > end_val))? REG_ERANGE : REG_NOERROR;
439989857Sobrien
440089857Sobrien      /* Insert space to the end of the char_ranges.  */
440189857Sobrien      insert_space(2, b - char_set[5] - 2, b - 1);
440289857Sobrien      *(b - char_set[5] - 2) = (wchar_t)start_val;
440389857Sobrien      *(b - char_set[5] - 1) = (wchar_t)end_val;
440489857Sobrien      char_set[4]++; /* ranges_index */
440589857Sobrien    }
440689857Sobrien  else
440789857Sobrien# endif
440889857Sobrien    {
440989857Sobrien      range_start = (range_start_char >= 0)? TRANSLATE (range_start_char):
441089857Sobrien	range_start_char;
441189857Sobrien      range_end = TRANSLATE (p[0]);
441289857Sobrien      /* Report an error if the range is empty and the syntax prohibits
441389857Sobrien	 this.  */
441489857Sobrien      ret = ((syntax & RE_NO_EMPTY_RANGES)
441589857Sobrien	     && (range_start > range_end))? REG_ERANGE : REG_NOERROR;
441689857Sobrien
441789857Sobrien      /* Insert space to the end of the char_ranges.  */
441889857Sobrien      insert_space(2, b - char_set[5] - 2, b - 1);
441989857Sobrien      *(b - char_set[5] - 2) = range_start;
442089857Sobrien      *(b - char_set[5] - 1) = range_end;
442189857Sobrien      char_set[4]++; /* ranges_index */
442289857Sobrien    }
442389857Sobrien  /* Have to increment the pointer into the pattern string, so the
442489857Sobrien     caller isn't still at the ending character.  */
442589857Sobrien  (*p_ptr)++;
442689857Sobrien
442789857Sobrien  return ret;
442889857Sobrien}
442989857Sobrien#else /* BYTE */
443089857Sobrien/* Read the ending character of a range (in a bracket expression) from the
443189857Sobrien   uncompiled pattern *P_PTR (which ends at PEND).  We assume the
443289857Sobrien   starting character is in `P[-2]'.  (`P[-1]' is the character `-'.)
443389857Sobrien   Then we set the translation of all bits between the starting and
443489857Sobrien   ending characters (inclusive) in the compiled pattern B.
443589857Sobrien
443689857Sobrien   Return an error code.
443789857Sobrien
443889857Sobrien   We use these short variable names so we can use the same macros as
443989857Sobrien   `regex_compile' itself.  */
444089857Sobrien
444189857Sobrienstatic reg_errcode_t
4442218822Sdimbyte_compile_range (unsigned int range_start_char, const char **p_ptr,
4443218822Sdim                    const char *pend, RE_TRANSLATE_TYPE translate,
4444218822Sdim                    reg_syntax_t syntax, unsigned char *b)
444589857Sobrien{
444689857Sobrien  unsigned this_char;
444789857Sobrien  const char *p = *p_ptr;
444889857Sobrien  reg_errcode_t ret;
444989857Sobrien# if _LIBC
445089857Sobrien  const unsigned char *collseq;
445189857Sobrien  unsigned int start_colseq;
445289857Sobrien  unsigned int end_colseq;
445389857Sobrien# else
445489857Sobrien  unsigned end_char;
445589857Sobrien# endif
445689857Sobrien
445789857Sobrien  if (p == pend)
445889857Sobrien    return REG_ERANGE;
445989857Sobrien
446089857Sobrien  /* Have to increment the pointer into the pattern string, so the
446189857Sobrien     caller isn't still at the ending character.  */
446289857Sobrien  (*p_ptr)++;
446389857Sobrien
446489857Sobrien  /* Report an error if the range is empty and the syntax prohibits this.  */
446589857Sobrien  ret = syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR;
446689857Sobrien
446789857Sobrien# if _LIBC
446889857Sobrien  collseq = (const unsigned char *) _NL_CURRENT (LC_COLLATE,
446989857Sobrien						 _NL_COLLATE_COLLSEQMB);
447089857Sobrien
447189857Sobrien  start_colseq = collseq[(unsigned char) TRANSLATE (range_start_char)];
447289857Sobrien  end_colseq = collseq[(unsigned char) TRANSLATE (p[0])];
447389857Sobrien  for (this_char = 0; this_char <= (unsigned char) -1; ++this_char)
447489857Sobrien    {
447589857Sobrien      unsigned int this_colseq = collseq[(unsigned char) TRANSLATE (this_char)];
447689857Sobrien
447789857Sobrien      if (start_colseq <= this_colseq && this_colseq <= end_colseq)
447889857Sobrien	{
447989857Sobrien	  SET_LIST_BIT (TRANSLATE (this_char));
448089857Sobrien	  ret = REG_NOERROR;
448189857Sobrien	}
448289857Sobrien    }
448389857Sobrien# else
448489857Sobrien  /* Here we see why `this_char' has to be larger than an `unsigned
448589857Sobrien     char' -- we would otherwise go into an infinite loop, since all
448689857Sobrien     characters <= 0xff.  */
448789857Sobrien  range_start_char = TRANSLATE (range_start_char);
448889857Sobrien  /* TRANSLATE(p[0]) is casted to char (not unsigned char) in TRANSLATE,
448989857Sobrien     and some compilers cast it to int implicitly, so following for_loop
449089857Sobrien     may fall to (almost) infinite loop.
449189857Sobrien     e.g. If translate[p[0]] = 0xff, end_char may equals to 0xffffffff.
449289857Sobrien     To avoid this, we cast p[0] to unsigned int and truncate it.  */
449389857Sobrien  end_char = ((unsigned)TRANSLATE(p[0]) & ((1 << BYTEWIDTH) - 1));
449489857Sobrien
449589857Sobrien  for (this_char = range_start_char; this_char <= end_char; ++this_char)
449689857Sobrien    {
449789857Sobrien      SET_LIST_BIT (TRANSLATE (this_char));
449889857Sobrien      ret = REG_NOERROR;
449989857Sobrien    }
450089857Sobrien# endif
450189857Sobrien
450289857Sobrien  return ret;
450389857Sobrien}
450489857Sobrien#endif /* WCHAR */
450589857Sobrien
450689857Sobrien/* re_compile_fastmap computes a ``fastmap'' for the compiled pattern in
450789857Sobrien   BUFP.  A fastmap records which of the (1 << BYTEWIDTH) possible
450889857Sobrien   characters can start a string that matches the pattern.  This fastmap
450989857Sobrien   is used by re_search to skip quickly over impossible starting points.
451089857Sobrien
451189857Sobrien   The caller must supply the address of a (1 << BYTEWIDTH)-byte data
451289857Sobrien   area as BUFP->fastmap.
451389857Sobrien
451489857Sobrien   We set the `fastmap', `fastmap_accurate', and `can_be_null' fields in
451589857Sobrien   the pattern buffer.
451689857Sobrien
451789857Sobrien   Returns 0 if we succeed, -2 if an internal error.   */
451889857Sobrien
451989857Sobrien#ifdef WCHAR
452089857Sobrien/* local function for re_compile_fastmap.
452189857Sobrien   truncate wchar_t character to char.  */
452289857Sobrienstatic unsigned char truncate_wchar (CHAR_T c);
452389857Sobrien
452489857Sobrienstatic unsigned char
4525218822Sdimtruncate_wchar (CHAR_T c)
452689857Sobrien{
452789857Sobrien  unsigned char buf[MB_CUR_MAX];
452889857Sobrien  mbstate_t state;
452989857Sobrien  int retval;
453089857Sobrien  memset (&state, '\0', sizeof (state));
453189857Sobrien# ifdef _LIBC
453289857Sobrien  retval = __wcrtomb (buf, c, &state);
453389857Sobrien# else
453489857Sobrien  retval = wcrtomb (buf, c, &state);
453589857Sobrien# endif
453689857Sobrien  return retval > 0 ? buf[0] : (unsigned char) c;
453789857Sobrien}
453889857Sobrien#endif /* WCHAR */
453989857Sobrien
454089857Sobrienstatic int
4541218822SdimPREFIX(re_compile_fastmap) (struct re_pattern_buffer *bufp)
454289857Sobrien{
454389857Sobrien  int j, k;
454489857Sobrien#ifdef MATCH_MAY_ALLOCATE
454589857Sobrien  PREFIX(fail_stack_type) fail_stack;
454689857Sobrien#endif
454789857Sobrien#ifndef REGEX_MALLOC
454889857Sobrien  char *destination;
454989857Sobrien#endif
455089857Sobrien
455189857Sobrien  register char *fastmap = bufp->fastmap;
455289857Sobrien
455389857Sobrien#ifdef WCHAR
455489857Sobrien  /* We need to cast pattern to (wchar_t*), because we casted this compiled
455589857Sobrien     pattern to (char*) in regex_compile.  */
455689857Sobrien  UCHAR_T *pattern = (UCHAR_T*)bufp->buffer;
455789857Sobrien  register UCHAR_T *pend = (UCHAR_T*) (bufp->buffer + bufp->used);
455889857Sobrien#else /* BYTE */
455989857Sobrien  UCHAR_T *pattern = bufp->buffer;
456089857Sobrien  register UCHAR_T *pend = pattern + bufp->used;
456189857Sobrien#endif /* WCHAR */
456289857Sobrien  UCHAR_T *p = pattern;
456389857Sobrien
456489857Sobrien#ifdef REL_ALLOC
456589857Sobrien  /* This holds the pointer to the failure stack, when
456689857Sobrien     it is allocated relocatably.  */
456789857Sobrien  fail_stack_elt_t *failure_stack_ptr;
456889857Sobrien#endif
456989857Sobrien
457089857Sobrien  /* Assume that each path through the pattern can be null until
457189857Sobrien     proven otherwise.  We set this false at the bottom of switch
457289857Sobrien     statement, to which we get only if a particular path doesn't
457389857Sobrien     match the empty string.  */
457489857Sobrien  boolean path_can_be_null = true;
457589857Sobrien
457689857Sobrien  /* We aren't doing a `succeed_n' to begin with.  */
457789857Sobrien  boolean succeed_n_p = false;
457889857Sobrien
457989857Sobrien  assert (fastmap != NULL && p != NULL);
458089857Sobrien
458189857Sobrien  INIT_FAIL_STACK ();
458289857Sobrien  bzero (fastmap, 1 << BYTEWIDTH);  /* Assume nothing's valid.  */
458389857Sobrien  bufp->fastmap_accurate = 1;	    /* It will be when we're done.  */
458489857Sobrien  bufp->can_be_null = 0;
458589857Sobrien
458689857Sobrien  while (1)
458789857Sobrien    {
4588130561Sobrien      if (p == pend || *p == (UCHAR_T) succeed)
458989857Sobrien	{
459089857Sobrien	  /* We have reached the (effective) end of pattern.  */
459189857Sobrien	  if (!FAIL_STACK_EMPTY ())
459289857Sobrien	    {
459389857Sobrien	      bufp->can_be_null |= path_can_be_null;
459489857Sobrien
459589857Sobrien	      /* Reset for next path.  */
459689857Sobrien	      path_can_be_null = true;
459789857Sobrien
459889857Sobrien	      p = fail_stack.stack[--fail_stack.avail].pointer;
459989857Sobrien
460089857Sobrien	      continue;
460189857Sobrien	    }
460289857Sobrien	  else
460389857Sobrien	    break;
460489857Sobrien	}
460589857Sobrien
460689857Sobrien      /* We should never be about to go beyond the end of the pattern.  */
460789857Sobrien      assert (p < pend);
460889857Sobrien
460989857Sobrien      switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++))
461089857Sobrien	{
461189857Sobrien
461289857Sobrien        /* I guess the idea here is to simply not bother with a fastmap
461389857Sobrien           if a backreference is used, since it's too hard to figure out
461489857Sobrien           the fastmap for the corresponding group.  Setting
461589857Sobrien           `can_be_null' stops `re_search_2' from using the fastmap, so
461689857Sobrien           that is all we do.  */
461789857Sobrien	case duplicate:
461889857Sobrien	  bufp->can_be_null = 1;
461989857Sobrien          goto done;
462089857Sobrien
462189857Sobrien
462289857Sobrien      /* Following are the cases which match a character.  These end
462389857Sobrien         with `break'.  */
462489857Sobrien
462589857Sobrien#ifdef WCHAR
462689857Sobrien	case exactn:
462789857Sobrien          fastmap[truncate_wchar(p[1])] = 1;
462889857Sobrien	  break;
462989857Sobrien#else /* BYTE */
463089857Sobrien	case exactn:
463189857Sobrien          fastmap[p[1]] = 1;
463289857Sobrien	  break;
463389857Sobrien#endif /* WCHAR */
463489857Sobrien#ifdef MBS_SUPPORT
463589857Sobrien	case exactn_bin:
463689857Sobrien	  fastmap[p[1]] = 1;
463789857Sobrien	  break;
463889857Sobrien#endif
463989857Sobrien
464089857Sobrien#ifdef WCHAR
464189857Sobrien        /* It is hard to distinguish fastmap from (multi byte) characters
464289857Sobrien           which depends on current locale.  */
464389857Sobrien        case charset:
464489857Sobrien	case charset_not:
464589857Sobrien	case wordchar:
464689857Sobrien	case notwordchar:
464789857Sobrien          bufp->can_be_null = 1;
464889857Sobrien          goto done;
464989857Sobrien#else /* BYTE */
465089857Sobrien        case charset:
465189857Sobrien          for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
465289857Sobrien	    if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))
465389857Sobrien              fastmap[j] = 1;
465489857Sobrien	  break;
465589857Sobrien
465689857Sobrien
465789857Sobrien	case charset_not:
465889857Sobrien	  /* Chars beyond end of map must be allowed.  */
465989857Sobrien	  for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++)
466089857Sobrien            fastmap[j] = 1;
466189857Sobrien
466289857Sobrien	  for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
466389857Sobrien	    if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))))
466489857Sobrien              fastmap[j] = 1;
466589857Sobrien          break;
466689857Sobrien
466789857Sobrien
466889857Sobrien	case wordchar:
466989857Sobrien	  for (j = 0; j < (1 << BYTEWIDTH); j++)
467089857Sobrien	    if (SYNTAX (j) == Sword)
467189857Sobrien	      fastmap[j] = 1;
467289857Sobrien	  break;
467389857Sobrien
467489857Sobrien
467589857Sobrien	case notwordchar:
467689857Sobrien	  for (j = 0; j < (1 << BYTEWIDTH); j++)
467789857Sobrien	    if (SYNTAX (j) != Sword)
467889857Sobrien	      fastmap[j] = 1;
467989857Sobrien	  break;
468089857Sobrien#endif /* WCHAR */
468189857Sobrien
468289857Sobrien        case anychar:
468389857Sobrien	  {
468489857Sobrien	    int fastmap_newline = fastmap['\n'];
468589857Sobrien
468689857Sobrien	    /* `.' matches anything ...  */
468789857Sobrien	    for (j = 0; j < (1 << BYTEWIDTH); j++)
468889857Sobrien	      fastmap[j] = 1;
468989857Sobrien
469089857Sobrien	    /* ... except perhaps newline.  */
469189857Sobrien	    if (!(bufp->syntax & RE_DOT_NEWLINE))
469289857Sobrien	      fastmap['\n'] = fastmap_newline;
469389857Sobrien
469489857Sobrien	    /* Return if we have already set `can_be_null'; if we have,
469589857Sobrien	       then the fastmap is irrelevant.  Something's wrong here.  */
469689857Sobrien	    else if (bufp->can_be_null)
469789857Sobrien	      goto done;
469889857Sobrien
469989857Sobrien	    /* Otherwise, have to check alternative paths.  */
470089857Sobrien	    break;
470189857Sobrien	  }
470289857Sobrien
470389857Sobrien#ifdef emacs
470489857Sobrien        case syntaxspec:
470589857Sobrien	  k = *p++;
470689857Sobrien	  for (j = 0; j < (1 << BYTEWIDTH); j++)
470789857Sobrien	    if (SYNTAX (j) == (enum syntaxcode) k)
470889857Sobrien	      fastmap[j] = 1;
470989857Sobrien	  break;
471089857Sobrien
471189857Sobrien
471289857Sobrien	case notsyntaxspec:
471389857Sobrien	  k = *p++;
471489857Sobrien	  for (j = 0; j < (1 << BYTEWIDTH); j++)
471589857Sobrien	    if (SYNTAX (j) != (enum syntaxcode) k)
471689857Sobrien	      fastmap[j] = 1;
471789857Sobrien	  break;
471889857Sobrien
471989857Sobrien
472089857Sobrien      /* All cases after this match the empty string.  These end with
472189857Sobrien         `continue'.  */
472289857Sobrien
472389857Sobrien
472489857Sobrien	case before_dot:
472589857Sobrien	case at_dot:
472689857Sobrien	case after_dot:
472789857Sobrien          continue;
472889857Sobrien#endif /* emacs */
472989857Sobrien
473089857Sobrien
473189857Sobrien        case no_op:
473289857Sobrien        case begline:
473389857Sobrien        case endline:
473489857Sobrien	case begbuf:
473589857Sobrien	case endbuf:
473689857Sobrien	case wordbound:
473789857Sobrien	case notwordbound:
473889857Sobrien	case wordbeg:
473989857Sobrien	case wordend:
474089857Sobrien        case push_dummy_failure:
474189857Sobrien          continue;
474289857Sobrien
474389857Sobrien
474489857Sobrien	case jump_n:
474589857Sobrien        case pop_failure_jump:
474689857Sobrien	case maybe_pop_jump:
474789857Sobrien	case jump:
474889857Sobrien        case jump_past_alt:
474989857Sobrien	case dummy_failure_jump:
475089857Sobrien          EXTRACT_NUMBER_AND_INCR (j, p);
475189857Sobrien	  p += j;
475289857Sobrien	  if (j > 0)
475389857Sobrien	    continue;
475489857Sobrien
475589857Sobrien          /* Jump backward implies we just went through the body of a
475689857Sobrien             loop and matched nothing.  Opcode jumped to should be
475789857Sobrien             `on_failure_jump' or `succeed_n'.  Just treat it like an
475889857Sobrien             ordinary jump.  For a * loop, it has pushed its failure
475989857Sobrien             point already; if so, discard that as redundant.  */
476089857Sobrien          if ((re_opcode_t) *p != on_failure_jump
476189857Sobrien	      && (re_opcode_t) *p != succeed_n)
476289857Sobrien	    continue;
476389857Sobrien
476489857Sobrien          p++;
476589857Sobrien          EXTRACT_NUMBER_AND_INCR (j, p);
476689857Sobrien          p += j;
476789857Sobrien
476889857Sobrien          /* If what's on the stack is where we are now, pop it.  */
476989857Sobrien          if (!FAIL_STACK_EMPTY ()
477089857Sobrien	      && fail_stack.stack[fail_stack.avail - 1].pointer == p)
477189857Sobrien            fail_stack.avail--;
477289857Sobrien
477389857Sobrien          continue;
477489857Sobrien
477589857Sobrien
477689857Sobrien        case on_failure_jump:
477789857Sobrien        case on_failure_keep_string_jump:
477889857Sobrien	handle_on_failure_jump:
477989857Sobrien          EXTRACT_NUMBER_AND_INCR (j, p);
478089857Sobrien
478189857Sobrien          /* For some patterns, e.g., `(a?)?', `p+j' here points to the
478289857Sobrien             end of the pattern.  We don't want to push such a point,
478389857Sobrien             since when we restore it above, entering the switch will
478489857Sobrien             increment `p' past the end of the pattern.  We don't need
478589857Sobrien             to push such a point since we obviously won't find any more
478689857Sobrien             fastmap entries beyond `pend'.  Such a pattern can match
478789857Sobrien             the null string, though.  */
478889857Sobrien          if (p + j < pend)
478989857Sobrien            {
479089857Sobrien              if (!PUSH_PATTERN_OP (p + j, fail_stack))
479189857Sobrien		{
479289857Sobrien		  RESET_FAIL_STACK ();
479389857Sobrien		  return -2;
479489857Sobrien		}
479589857Sobrien            }
479689857Sobrien          else
479789857Sobrien            bufp->can_be_null = 1;
479889857Sobrien
479989857Sobrien          if (succeed_n_p)
480089857Sobrien            {
480189857Sobrien              EXTRACT_NUMBER_AND_INCR (k, p);	/* Skip the n.  */
480289857Sobrien              succeed_n_p = false;
480389857Sobrien	    }
480489857Sobrien
480589857Sobrien          continue;
480689857Sobrien
480789857Sobrien
480889857Sobrien	case succeed_n:
480989857Sobrien          /* Get to the number of times to succeed.  */
481089857Sobrien          p += OFFSET_ADDRESS_SIZE;
481189857Sobrien
481289857Sobrien          /* Increment p past the n for when k != 0.  */
481389857Sobrien          EXTRACT_NUMBER_AND_INCR (k, p);
481489857Sobrien          if (k == 0)
481589857Sobrien	    {
481689857Sobrien              p -= 2 * OFFSET_ADDRESS_SIZE;
481789857Sobrien  	      succeed_n_p = true;  /* Spaghetti code alert.  */
481889857Sobrien              goto handle_on_failure_jump;
481989857Sobrien            }
482089857Sobrien          continue;
482189857Sobrien
482289857Sobrien
482389857Sobrien	case set_number_at:
482489857Sobrien          p += 2 * OFFSET_ADDRESS_SIZE;
482589857Sobrien          continue;
482689857Sobrien
482789857Sobrien
482889857Sobrien	case start_memory:
482989857Sobrien        case stop_memory:
483089857Sobrien	  p += 2;
483189857Sobrien	  continue;
483289857Sobrien
483389857Sobrien
483489857Sobrien	default:
483589857Sobrien          abort (); /* We have listed all the cases.  */
483689857Sobrien        } /* switch *p++ */
483789857Sobrien
483889857Sobrien      /* Getting here means we have found the possible starting
483989857Sobrien         characters for one path of the pattern -- and that the empty
484089857Sobrien         string does not match.  We need not follow this path further.
484189857Sobrien         Instead, look at the next alternative (remembered on the
484289857Sobrien         stack), or quit if no more.  The test at the top of the loop
484389857Sobrien         does these things.  */
484489857Sobrien      path_can_be_null = false;
484589857Sobrien      p = pend;
484689857Sobrien    } /* while p */
484789857Sobrien
484889857Sobrien  /* Set `can_be_null' for the last path (also the first path, if the
484989857Sobrien     pattern is empty).  */
485089857Sobrien  bufp->can_be_null |= path_can_be_null;
485189857Sobrien
485289857Sobrien done:
485389857Sobrien  RESET_FAIL_STACK ();
485489857Sobrien  return 0;
485589857Sobrien}
485689857Sobrien
485789857Sobrien#else /* not INSIDE_RECURSION */
485889857Sobrien
485989857Sobrienint
4860218822Sdimre_compile_fastmap (struct re_pattern_buffer *bufp)
486189857Sobrien{
486289857Sobrien# ifdef MBS_SUPPORT
486389857Sobrien  if (MB_CUR_MAX != 1)
486489857Sobrien    return wcs_re_compile_fastmap(bufp);
486589857Sobrien  else
486689857Sobrien# endif
486789857Sobrien    return byte_re_compile_fastmap(bufp);
486889857Sobrien} /* re_compile_fastmap */
486989857Sobrien#ifdef _LIBC
487089857Sobrienweak_alias (__re_compile_fastmap, re_compile_fastmap)
487189857Sobrien#endif
487289857Sobrien
487389857Sobrien
487489857Sobrien/* Set REGS to hold NUM_REGS registers, storing them in STARTS and
487589857Sobrien   ENDS.  Subsequent matches using PATTERN_BUFFER and REGS will use
487689857Sobrien   this memory for recording register information.  STARTS and ENDS
487789857Sobrien   must be allocated using the malloc library routine, and must each
487889857Sobrien   be at least NUM_REGS * sizeof (regoff_t) bytes long.
487989857Sobrien
488089857Sobrien   If NUM_REGS == 0, then subsequent matches should allocate their own
488189857Sobrien   register data.
488289857Sobrien
488389857Sobrien   Unless this function is called, the first search or match using
488489857Sobrien   PATTERN_BUFFER will allocate its own register data, without
488589857Sobrien   freeing the old data.  */
488689857Sobrien
488789857Sobrienvoid
4888218822Sdimre_set_registers (struct re_pattern_buffer *bufp,
4889218822Sdim                  struct re_registers *regs, unsigned num_regs,
4890218822Sdim                  regoff_t *starts, regoff_t *ends)
489189857Sobrien{
489289857Sobrien  if (num_regs)
489389857Sobrien    {
489489857Sobrien      bufp->regs_allocated = REGS_REALLOCATE;
489589857Sobrien      regs->num_regs = num_regs;
489689857Sobrien      regs->start = starts;
489789857Sobrien      regs->end = ends;
489889857Sobrien    }
489989857Sobrien  else
490089857Sobrien    {
490189857Sobrien      bufp->regs_allocated = REGS_UNALLOCATED;
490289857Sobrien      regs->num_regs = 0;
490389857Sobrien      regs->start = regs->end = (regoff_t *) 0;
490489857Sobrien    }
490589857Sobrien}
490689857Sobrien#ifdef _LIBC
490789857Sobrienweak_alias (__re_set_registers, re_set_registers)
490889857Sobrien#endif
490989857Sobrien
491089857Sobrien/* Searching routines.  */
491189857Sobrien
491289857Sobrien/* Like re_search_2, below, but only one string is specified, and
491389857Sobrien   doesn't let you say where to stop matching.  */
491489857Sobrien
491589857Sobrienint
4916218822Sdimre_search (struct re_pattern_buffer *bufp, const char *string, int size,
4917218822Sdim           int startpos, int range, struct re_registers *regs)
491889857Sobrien{
491989857Sobrien  return re_search_2 (bufp, NULL, 0, string, size, startpos, range,
492089857Sobrien		      regs, size);
492189857Sobrien}
492289857Sobrien#ifdef _LIBC
492389857Sobrienweak_alias (__re_search, re_search)
492489857Sobrien#endif
492589857Sobrien
492689857Sobrien
492789857Sobrien/* Using the compiled pattern in BUFP->buffer, first tries to match the
492889857Sobrien   virtual concatenation of STRING1 and STRING2, starting first at index
492989857Sobrien   STARTPOS, then at STARTPOS + 1, and so on.
493089857Sobrien
493189857Sobrien   STRING1 and STRING2 have length SIZE1 and SIZE2, respectively.
493289857Sobrien
493389857Sobrien   RANGE is how far to scan while trying to match.  RANGE = 0 means try
493489857Sobrien   only at STARTPOS; in general, the last start tried is STARTPOS +
493589857Sobrien   RANGE.
493689857Sobrien
493789857Sobrien   In REGS, return the indices of the virtual concatenation of STRING1
493889857Sobrien   and STRING2 that matched the entire BUFP->buffer and its contained
493989857Sobrien   subexpressions.
494089857Sobrien
494189857Sobrien   Do not consider matching one past the index STOP in the virtual
494289857Sobrien   concatenation of STRING1 and STRING2.
494389857Sobrien
494489857Sobrien   We return either the position in the strings at which the match was
494589857Sobrien   found, -1 if no match, or -2 if error (such as failure
494689857Sobrien   stack overflow).  */
494789857Sobrien
494889857Sobrienint
4949218822Sdimre_search_2 (struct re_pattern_buffer *bufp, const char *string1, int size1,
4950218822Sdim             const char *string2, int size2, int startpos, int range,
4951218822Sdim             struct re_registers *regs, int stop)
495289857Sobrien{
495389857Sobrien# ifdef MBS_SUPPORT
495489857Sobrien  if (MB_CUR_MAX != 1)
495589857Sobrien    return wcs_re_search_2 (bufp, string1, size1, string2, size2, startpos,
495689857Sobrien			    range, regs, stop);
495789857Sobrien  else
495889857Sobrien# endif
495989857Sobrien    return byte_re_search_2 (bufp, string1, size1, string2, size2, startpos,
496089857Sobrien			     range, regs, stop);
496189857Sobrien} /* re_search_2 */
496289857Sobrien#ifdef _LIBC
496389857Sobrienweak_alias (__re_search_2, re_search_2)
496489857Sobrien#endif
496589857Sobrien
496689857Sobrien#endif /* not INSIDE_RECURSION */
496789857Sobrien
496889857Sobrien#ifdef INSIDE_RECURSION
496989857Sobrien
497089857Sobrien#ifdef MATCH_MAY_ALLOCATE
497189857Sobrien# define FREE_VAR(var) if (var) REGEX_FREE (var); var = NULL
497289857Sobrien#else
497389857Sobrien# define FREE_VAR(var) if (var) free (var); var = NULL
497489857Sobrien#endif
497589857Sobrien
497689857Sobrien#ifdef WCHAR
497789857Sobrien# define MAX_ALLOCA_SIZE	2000
497889857Sobrien
497989857Sobrien# define FREE_WCS_BUFFERS() \
498089857Sobrien  do {									      \
498189857Sobrien    if (size1 > MAX_ALLOCA_SIZE)					      \
498289857Sobrien      {									      \
498389857Sobrien	free (wcs_string1);						      \
498489857Sobrien	free (mbs_offset1);						      \
498589857Sobrien      }									      \
498689857Sobrien    else								      \
498789857Sobrien      {									      \
498889857Sobrien	FREE_VAR (wcs_string1);						      \
498989857Sobrien	FREE_VAR (mbs_offset1);						      \
499089857Sobrien      }									      \
499189857Sobrien    if (size2 > MAX_ALLOCA_SIZE) 					      \
499289857Sobrien      {									      \
499389857Sobrien	free (wcs_string2);						      \
499489857Sobrien	free (mbs_offset2);						      \
499589857Sobrien      }									      \
499689857Sobrien    else								      \
499789857Sobrien      {									      \
499889857Sobrien	FREE_VAR (wcs_string2);						      \
499989857Sobrien	FREE_VAR (mbs_offset2);						      \
500089857Sobrien      }									      \
500189857Sobrien  } while (0)
500289857Sobrien
500389857Sobrien#endif
500489857Sobrien
500589857Sobrien
500689857Sobrienstatic int
5007218822SdimPREFIX(re_search_2) (struct re_pattern_buffer *bufp, const char *string1,
5008218822Sdim                     int size1, const char *string2, int size2,
5009218822Sdim                     int startpos, int range,
5010218822Sdim                     struct re_registers *regs, int stop)
501189857Sobrien{
501289857Sobrien  int val;
501389857Sobrien  register char *fastmap = bufp->fastmap;
501489857Sobrien  register RE_TRANSLATE_TYPE translate = bufp->translate;
501589857Sobrien  int total_size = size1 + size2;
501689857Sobrien  int endpos = startpos + range;
501789857Sobrien#ifdef WCHAR
501889857Sobrien  /* We need wchar_t* buffers correspond to cstring1, cstring2.  */
501989857Sobrien  wchar_t *wcs_string1 = NULL, *wcs_string2 = NULL;
502089857Sobrien  /* We need the size of wchar_t buffers correspond to csize1, csize2.  */
502189857Sobrien  int wcs_size1 = 0, wcs_size2 = 0;
502289857Sobrien  /* offset buffer for optimizatoin. See convert_mbs_to_wc.  */
502389857Sobrien  int *mbs_offset1 = NULL, *mbs_offset2 = NULL;
502489857Sobrien  /* They hold whether each wchar_t is binary data or not.  */
502589857Sobrien  char *is_binary = NULL;
502689857Sobrien#endif /* WCHAR */
502789857Sobrien
502889857Sobrien  /* Check for out-of-range STARTPOS.  */
502989857Sobrien  if (startpos < 0 || startpos > total_size)
503089857Sobrien    return -1;
503189857Sobrien
503289857Sobrien  /* Fix up RANGE if it might eventually take us outside
503389857Sobrien     the virtual concatenation of STRING1 and STRING2.
503489857Sobrien     Make sure we won't move STARTPOS below 0 or above TOTAL_SIZE.  */
503589857Sobrien  if (endpos < 0)
503689857Sobrien    range = 0 - startpos;
503789857Sobrien  else if (endpos > total_size)
503889857Sobrien    range = total_size - startpos;
503989857Sobrien
504089857Sobrien  /* If the search isn't to be a backwards one, don't waste time in a
504189857Sobrien     search for a pattern that must be anchored.  */
504289857Sobrien  if (bufp->used > 0 && range > 0
504389857Sobrien      && ((re_opcode_t) bufp->buffer[0] == begbuf
504489857Sobrien	  /* `begline' is like `begbuf' if it cannot match at newlines.  */
504589857Sobrien	  || ((re_opcode_t) bufp->buffer[0] == begline
504689857Sobrien	      && !bufp->newline_anchor)))
504789857Sobrien    {
504889857Sobrien      if (startpos > 0)
504989857Sobrien	return -1;
505089857Sobrien      else
505189857Sobrien	range = 1;
505289857Sobrien    }
505389857Sobrien
505489857Sobrien#ifdef emacs
505589857Sobrien  /* In a forward search for something that starts with \=.
505689857Sobrien     don't keep searching past point.  */
505789857Sobrien  if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == at_dot && range > 0)
505889857Sobrien    {
505989857Sobrien      range = PT - startpos;
506089857Sobrien      if (range <= 0)
506189857Sobrien	return -1;
506289857Sobrien    }
506389857Sobrien#endif /* emacs */
506489857Sobrien
506589857Sobrien  /* Update the fastmap now if not correct already.  */
506689857Sobrien  if (fastmap && !bufp->fastmap_accurate)
506789857Sobrien    if (re_compile_fastmap (bufp) == -2)
506889857Sobrien      return -2;
506989857Sobrien
507089857Sobrien#ifdef WCHAR
507189857Sobrien  /* Allocate wchar_t array for wcs_string1 and wcs_string2 and
507289857Sobrien     fill them with converted string.  */
507389857Sobrien  if (size1 != 0)
507489857Sobrien    {
507589857Sobrien      if (size1 > MAX_ALLOCA_SIZE)
507689857Sobrien	{
507789857Sobrien	  wcs_string1 = TALLOC (size1 + 1, CHAR_T);
507889857Sobrien	  mbs_offset1 = TALLOC (size1 + 1, int);
507989857Sobrien	  is_binary = TALLOC (size1 + 1, char);
508089857Sobrien	}
508189857Sobrien      else
508289857Sobrien	{
508389857Sobrien	  wcs_string1 = REGEX_TALLOC (size1 + 1, CHAR_T);
508489857Sobrien	  mbs_offset1 = REGEX_TALLOC (size1 + 1, int);
508589857Sobrien	  is_binary = REGEX_TALLOC (size1 + 1, char);
508689857Sobrien	}
508789857Sobrien      if (!wcs_string1 || !mbs_offset1 || !is_binary)
508889857Sobrien	{
508989857Sobrien	  if (size1 > MAX_ALLOCA_SIZE)
509089857Sobrien	    {
509189857Sobrien	      free (wcs_string1);
509289857Sobrien	      free (mbs_offset1);
509389857Sobrien	      free (is_binary);
509489857Sobrien	    }
509589857Sobrien	  else
509689857Sobrien	    {
509789857Sobrien	      FREE_VAR (wcs_string1);
509889857Sobrien	      FREE_VAR (mbs_offset1);
509989857Sobrien	      FREE_VAR (is_binary);
510089857Sobrien	    }
510189857Sobrien	  return -2;
510289857Sobrien	}
510389857Sobrien      wcs_size1 = convert_mbs_to_wcs(wcs_string1, string1, size1,
510489857Sobrien				     mbs_offset1, is_binary);
510589857Sobrien      wcs_string1[wcs_size1] = L'\0'; /* for a sentinel  */
510689857Sobrien      if (size1 > MAX_ALLOCA_SIZE)
510789857Sobrien	free (is_binary);
510889857Sobrien      else
510989857Sobrien	FREE_VAR (is_binary);
511089857Sobrien    }
511189857Sobrien  if (size2 != 0)
511289857Sobrien    {
511389857Sobrien      if (size2 > MAX_ALLOCA_SIZE)
511489857Sobrien	{
511589857Sobrien	  wcs_string2 = TALLOC (size2 + 1, CHAR_T);
511689857Sobrien	  mbs_offset2 = TALLOC (size2 + 1, int);
511789857Sobrien	  is_binary = TALLOC (size2 + 1, char);
511889857Sobrien	}
511989857Sobrien      else
512089857Sobrien	{
512189857Sobrien	  wcs_string2 = REGEX_TALLOC (size2 + 1, CHAR_T);
512289857Sobrien	  mbs_offset2 = REGEX_TALLOC (size2 + 1, int);
512389857Sobrien	  is_binary = REGEX_TALLOC (size2 + 1, char);
512489857Sobrien	}
512589857Sobrien      if (!wcs_string2 || !mbs_offset2 || !is_binary)
512689857Sobrien	{
512789857Sobrien	  FREE_WCS_BUFFERS ();
512889857Sobrien	  if (size2 > MAX_ALLOCA_SIZE)
512989857Sobrien	    free (is_binary);
513089857Sobrien	  else
513189857Sobrien	    FREE_VAR (is_binary);
513289857Sobrien	  return -2;
513389857Sobrien	}
513489857Sobrien      wcs_size2 = convert_mbs_to_wcs(wcs_string2, string2, size2,
513589857Sobrien				     mbs_offset2, is_binary);
513689857Sobrien      wcs_string2[wcs_size2] = L'\0'; /* for a sentinel  */
513789857Sobrien      if (size2 > MAX_ALLOCA_SIZE)
513889857Sobrien	free (is_binary);
513989857Sobrien      else
514089857Sobrien	FREE_VAR (is_binary);
514189857Sobrien    }
514289857Sobrien#endif /* WCHAR */
514389857Sobrien
514489857Sobrien
514589857Sobrien  /* Loop through the string, looking for a place to start matching.  */
514689857Sobrien  for (;;)
514789857Sobrien    {
514889857Sobrien      /* If a fastmap is supplied, skip quickly over characters that
514989857Sobrien         cannot be the start of a match.  If the pattern can match the
515089857Sobrien         null string, however, we don't need to skip characters; we want
515189857Sobrien         the first null string.  */
515289857Sobrien      if (fastmap && startpos < total_size && !bufp->can_be_null)
515389857Sobrien	{
515489857Sobrien	  if (range > 0)	/* Searching forwards.  */
515589857Sobrien	    {
515689857Sobrien	      register const char *d;
515789857Sobrien	      register int lim = 0;
515889857Sobrien	      int irange = range;
515989857Sobrien
516089857Sobrien              if (startpos < size1 && startpos + range >= size1)
516189857Sobrien                lim = range - (size1 - startpos);
516289857Sobrien
516389857Sobrien	      d = (startpos >= size1 ? string2 - size1 : string1) + startpos;
516489857Sobrien
516589857Sobrien              /* Written out as an if-else to avoid testing `translate'
516689857Sobrien                 inside the loop.  */
516789857Sobrien	      if (translate)
516889857Sobrien                while (range > lim
516989857Sobrien                       && !fastmap[(unsigned char)
517089857Sobrien				   translate[(unsigned char) *d++]])
517189857Sobrien                  range--;
517289857Sobrien	      else
517389857Sobrien                while (range > lim && !fastmap[(unsigned char) *d++])
517489857Sobrien                  range--;
517589857Sobrien
517689857Sobrien	      startpos += irange - range;
517789857Sobrien	    }
517889857Sobrien	  else				/* Searching backwards.  */
517989857Sobrien	    {
518089857Sobrien	      register CHAR_T c = (size1 == 0 || startpos >= size1
518189857Sobrien				      ? string2[startpos - size1]
518289857Sobrien				      : string1[startpos]);
518389857Sobrien
518489857Sobrien	      if (!fastmap[(unsigned char) TRANSLATE (c)])
518589857Sobrien		goto advance;
518689857Sobrien	    }
518789857Sobrien	}
518889857Sobrien
518989857Sobrien      /* If can't match the null string, and that's all we have left, fail.  */
519089857Sobrien      if (range >= 0 && startpos == total_size && fastmap
519189857Sobrien          && !bufp->can_be_null)
519289857Sobrien       {
519389857Sobrien#ifdef WCHAR
519489857Sobrien         FREE_WCS_BUFFERS ();
519589857Sobrien#endif
519689857Sobrien         return -1;
519789857Sobrien       }
519889857Sobrien
519989857Sobrien#ifdef WCHAR
520089857Sobrien      val = wcs_re_match_2_internal (bufp, string1, size1, string2,
520189857Sobrien				     size2, startpos, regs, stop,
520289857Sobrien				     wcs_string1, wcs_size1,
520389857Sobrien				     wcs_string2, wcs_size2,
520489857Sobrien				     mbs_offset1, mbs_offset2);
520589857Sobrien#else /* BYTE */
520689857Sobrien      val = byte_re_match_2_internal (bufp, string1, size1, string2,
520789857Sobrien				      size2, startpos, regs, stop);
520889857Sobrien#endif /* BYTE */
520989857Sobrien
521089857Sobrien#ifndef REGEX_MALLOC
521189857Sobrien# ifdef C_ALLOCA
521289857Sobrien      alloca (0);
521389857Sobrien# endif
521489857Sobrien#endif
521589857Sobrien
521689857Sobrien      if (val >= 0)
521789857Sobrien	{
521889857Sobrien#ifdef WCHAR
521989857Sobrien	  FREE_WCS_BUFFERS ();
522089857Sobrien#endif
522189857Sobrien	  return startpos;
522289857Sobrien	}
522389857Sobrien
522489857Sobrien      if (val == -2)
522589857Sobrien	{
522689857Sobrien#ifdef WCHAR
522789857Sobrien	  FREE_WCS_BUFFERS ();
522889857Sobrien#endif
522989857Sobrien	  return -2;
523089857Sobrien	}
523189857Sobrien
523289857Sobrien    advance:
523389857Sobrien      if (!range)
523489857Sobrien        break;
523589857Sobrien      else if (range > 0)
523689857Sobrien        {
523789857Sobrien          range--;
523889857Sobrien          startpos++;
523989857Sobrien        }
524089857Sobrien      else
524189857Sobrien        {
524289857Sobrien          range++;
524389857Sobrien          startpos--;
524489857Sobrien        }
524589857Sobrien    }
524689857Sobrien#ifdef WCHAR
524789857Sobrien  FREE_WCS_BUFFERS ();
524889857Sobrien#endif
524989857Sobrien  return -1;
525089857Sobrien}
525189857Sobrien
525289857Sobrien#ifdef WCHAR
525389857Sobrien/* This converts PTR, a pointer into one of the search wchar_t strings
525489857Sobrien   `string1' and `string2' into an multibyte string offset from the
525589857Sobrien   beginning of that string. We use mbs_offset to optimize.
525689857Sobrien   See convert_mbs_to_wcs.  */
525789857Sobrien# define POINTER_TO_OFFSET(ptr)						\
525889857Sobrien  (FIRST_STRING_P (ptr)							\
525989857Sobrien   ? ((regoff_t)(mbs_offset1 != NULL? mbs_offset1[(ptr)-string1] : 0))	\
526089857Sobrien   : ((regoff_t)((mbs_offset2 != NULL? mbs_offset2[(ptr)-string2] : 0)	\
526189857Sobrien		 + csize1)))
526289857Sobrien#else /* BYTE */
526389857Sobrien/* This converts PTR, a pointer into one of the search strings `string1'
526489857Sobrien   and `string2' into an offset from the beginning of that string.  */
526589857Sobrien# define POINTER_TO_OFFSET(ptr)			\
526689857Sobrien  (FIRST_STRING_P (ptr)				\
526789857Sobrien   ? ((regoff_t) ((ptr) - string1))		\
526889857Sobrien   : ((regoff_t) ((ptr) - string2 + size1)))
526989857Sobrien#endif /* WCHAR */
527089857Sobrien
527189857Sobrien/* Macros for dealing with the split strings in re_match_2.  */
527289857Sobrien
527389857Sobrien#define MATCHING_IN_FIRST_STRING  (dend == end_match_1)
527489857Sobrien
527589857Sobrien/* Call before fetching a character with *d.  This switches over to
527689857Sobrien   string2 if necessary.  */
527789857Sobrien#define PREFETCH()							\
527889857Sobrien  while (d == dend)						    	\
527989857Sobrien    {									\
528089857Sobrien      /* End of string2 => fail.  */					\
528189857Sobrien      if (dend == end_match_2) 						\
528289857Sobrien        goto fail;							\
528389857Sobrien      /* End of string1 => advance to string2.  */ 			\
528489857Sobrien      d = string2;						        \
528589857Sobrien      dend = end_match_2;						\
528689857Sobrien    }
528789857Sobrien
528889857Sobrien/* Test if at very beginning or at very end of the virtual concatenation
528989857Sobrien   of `string1' and `string2'.  If only one string, it's `string2'.  */
529089857Sobrien#define AT_STRINGS_BEG(d) ((d) == (size1 ? string1 : string2) || !size2)
529189857Sobrien#define AT_STRINGS_END(d) ((d) == end2)
529289857Sobrien
529389857Sobrien
529489857Sobrien/* Test if D points to a character which is word-constituent.  We have
529589857Sobrien   two special cases to check for: if past the end of string1, look at
529689857Sobrien   the first character in string2; and if before the beginning of
529789857Sobrien   string2, look at the last character in string1.  */
529889857Sobrien#ifdef WCHAR
529989857Sobrien/* Use internationalized API instead of SYNTAX.  */
530089857Sobrien# define WORDCHAR_P(d)							\
530189857Sobrien  (iswalnum ((wint_t)((d) == end1 ? *string2				\
530289857Sobrien           : (d) == string2 - 1 ? *(end1 - 1) : *(d))) != 0		\
530389857Sobrien   || ((d) == end1 ? *string2						\
530489857Sobrien       : (d) == string2 - 1 ? *(end1 - 1) : *(d)) == L'_')
530589857Sobrien#else /* BYTE */
530689857Sobrien# define WORDCHAR_P(d)							\
530789857Sobrien  (SYNTAX ((d) == end1 ? *string2					\
530889857Sobrien           : (d) == string2 - 1 ? *(end1 - 1) : *(d))			\
530989857Sobrien   == Sword)
531089857Sobrien#endif /* WCHAR */
531189857Sobrien
531289857Sobrien/* Disabled due to a compiler bug -- see comment at case wordbound */
531389857Sobrien#if 0
531489857Sobrien/* Test if the character before D and the one at D differ with respect
531589857Sobrien   to being word-constituent.  */
531689857Sobrien#define AT_WORD_BOUNDARY(d)						\
531789857Sobrien  (AT_STRINGS_BEG (d) || AT_STRINGS_END (d)				\
531889857Sobrien   || WORDCHAR_P (d - 1) != WORDCHAR_P (d))
531989857Sobrien#endif
532089857Sobrien
532189857Sobrien/* Free everything we malloc.  */
532289857Sobrien#ifdef MATCH_MAY_ALLOCATE
532389857Sobrien# ifdef WCHAR
532489857Sobrien#  define FREE_VARIABLES()						\
532589857Sobrien  do {									\
532689857Sobrien    REGEX_FREE_STACK (fail_stack.stack);				\
532789857Sobrien    FREE_VAR (regstart);						\
532889857Sobrien    FREE_VAR (regend);							\
532989857Sobrien    FREE_VAR (old_regstart);						\
533089857Sobrien    FREE_VAR (old_regend);						\
533189857Sobrien    FREE_VAR (best_regstart);						\
533289857Sobrien    FREE_VAR (best_regend);						\
533389857Sobrien    FREE_VAR (reg_info);						\
533489857Sobrien    FREE_VAR (reg_dummy);						\
533589857Sobrien    FREE_VAR (reg_info_dummy);						\
533689857Sobrien    if (!cant_free_wcs_buf)						\
533789857Sobrien      {									\
533889857Sobrien        FREE_VAR (string1);						\
533989857Sobrien        FREE_VAR (string2);						\
534089857Sobrien        FREE_VAR (mbs_offset1);						\
534189857Sobrien        FREE_VAR (mbs_offset2);						\
534289857Sobrien      }									\
534389857Sobrien  } while (0)
534489857Sobrien# else /* BYTE */
534589857Sobrien#  define FREE_VARIABLES()						\
534689857Sobrien  do {									\
534789857Sobrien    REGEX_FREE_STACK (fail_stack.stack);				\
534889857Sobrien    FREE_VAR (regstart);						\
534989857Sobrien    FREE_VAR (regend);							\
535089857Sobrien    FREE_VAR (old_regstart);						\
535189857Sobrien    FREE_VAR (old_regend);						\
535289857Sobrien    FREE_VAR (best_regstart);						\
535389857Sobrien    FREE_VAR (best_regend);						\
535489857Sobrien    FREE_VAR (reg_info);						\
535589857Sobrien    FREE_VAR (reg_dummy);						\
535689857Sobrien    FREE_VAR (reg_info_dummy);						\
535789857Sobrien  } while (0)
535889857Sobrien# endif /* WCHAR */
535989857Sobrien#else
536089857Sobrien# ifdef WCHAR
536189857Sobrien#  define FREE_VARIABLES()						\
536289857Sobrien  do {									\
536389857Sobrien    if (!cant_free_wcs_buf)						\
536489857Sobrien      {									\
536589857Sobrien        FREE_VAR (string1);						\
536689857Sobrien        FREE_VAR (string2);						\
536789857Sobrien        FREE_VAR (mbs_offset1);						\
536889857Sobrien        FREE_VAR (mbs_offset2);						\
536989857Sobrien      }									\
537089857Sobrien  } while (0)
537189857Sobrien# else /* BYTE */
537289857Sobrien#  define FREE_VARIABLES() ((void)0) /* Do nothing!  But inhibit gcc warning. */
537389857Sobrien# endif /* WCHAR */
537489857Sobrien#endif /* not MATCH_MAY_ALLOCATE */
537589857Sobrien
537689857Sobrien/* These values must meet several constraints.  They must not be valid
537789857Sobrien   register values; since we have a limit of 255 registers (because
537889857Sobrien   we use only one byte in the pattern for the register number), we can
537989857Sobrien   use numbers larger than 255.  They must differ by 1, because of
538089857Sobrien   NUM_FAILURE_ITEMS above.  And the value for the lowest register must
538189857Sobrien   be larger than the value for the highest register, so we do not try
538289857Sobrien   to actually save any registers when none are active.  */
538389857Sobrien#define NO_HIGHEST_ACTIVE_REG (1 << BYTEWIDTH)
538489857Sobrien#define NO_LOWEST_ACTIVE_REG (NO_HIGHEST_ACTIVE_REG + 1)
538589857Sobrien
538689857Sobrien#else /* not INSIDE_RECURSION */
538789857Sobrien/* Matching routines.  */
538889857Sobrien
538989857Sobrien#ifndef emacs   /* Emacs never uses this.  */
539089857Sobrien/* re_match is like re_match_2 except it takes only a single string.  */
539189857Sobrien
539289857Sobrienint
5393218822Sdimre_match (struct re_pattern_buffer *bufp, const char *string,
5394218822Sdim          int size, int pos, struct re_registers *regs)
539589857Sobrien{
539689857Sobrien  int result;
539789857Sobrien# ifdef MBS_SUPPORT
539889857Sobrien  if (MB_CUR_MAX != 1)
539989857Sobrien    result = wcs_re_match_2_internal (bufp, NULL, 0, string, size,
540089857Sobrien				      pos, regs, size,
540189857Sobrien				      NULL, 0, NULL, 0, NULL, NULL);
540289857Sobrien  else
540389857Sobrien# endif
540489857Sobrien    result = byte_re_match_2_internal (bufp, NULL, 0, string, size,
540589857Sobrien				  pos, regs, size);
540689857Sobrien# ifndef REGEX_MALLOC
540789857Sobrien#  ifdef C_ALLOCA
540889857Sobrien  alloca (0);
540989857Sobrien#  endif
541089857Sobrien# endif
541189857Sobrien  return result;
541289857Sobrien}
541389857Sobrien# ifdef _LIBC
541489857Sobrienweak_alias (__re_match, re_match)
541589857Sobrien# endif
541689857Sobrien#endif /* not emacs */
541789857Sobrien
541889857Sobrien#endif /* not INSIDE_RECURSION */
541989857Sobrien
542089857Sobrien#ifdef INSIDE_RECURSION
5421218822Sdimstatic boolean PREFIX(group_match_null_string_p) (UCHAR_T **p,
5422218822Sdim                                                  UCHAR_T *end,
5423218822Sdim					PREFIX(register_info_type) *reg_info);
5424218822Sdimstatic boolean PREFIX(alt_match_null_string_p) (UCHAR_T *p,
5425218822Sdim                                                UCHAR_T *end,
5426218822Sdim					PREFIX(register_info_type) *reg_info);
5427218822Sdimstatic boolean PREFIX(common_op_match_null_string_p) (UCHAR_T **p,
5428218822Sdim                                                      UCHAR_T *end,
5429218822Sdim					PREFIX(register_info_type) *reg_info);
5430218822Sdimstatic int PREFIX(bcmp_translate) (const CHAR_T *s1, const CHAR_T *s2,
5431218822Sdim                                   int len, char *translate);
543289857Sobrien#else /* not INSIDE_RECURSION */
543389857Sobrien
543489857Sobrien/* re_match_2 matches the compiled pattern in BUFP against the
543589857Sobrien   the (virtual) concatenation of STRING1 and STRING2 (of length SIZE1
543689857Sobrien   and SIZE2, respectively).  We start matching at POS, and stop
543789857Sobrien   matching at STOP.
543889857Sobrien
543989857Sobrien   If REGS is non-null and the `no_sub' field of BUFP is nonzero, we
544089857Sobrien   store offsets for the substring each group matched in REGS.  See the
544189857Sobrien   documentation for exactly how many groups we fill.
544289857Sobrien
544389857Sobrien   We return -1 if no match, -2 if an internal error (such as the
544489857Sobrien   failure stack overflowing).  Otherwise, we return the length of the
544589857Sobrien   matched substring.  */
544689857Sobrien
544789857Sobrienint
5448218822Sdimre_match_2 (struct re_pattern_buffer *bufp, const char *string1, int size1,
5449218822Sdim            const char *string2, int size2, int pos,
5450218822Sdim            struct re_registers *regs, int stop)
545189857Sobrien{
545289857Sobrien  int result;
545389857Sobrien# ifdef MBS_SUPPORT
545489857Sobrien  if (MB_CUR_MAX != 1)
545589857Sobrien    result = wcs_re_match_2_internal (bufp, string1, size1, string2, size2,
545689857Sobrien				      pos, regs, stop,
545789857Sobrien				      NULL, 0, NULL, 0, NULL, NULL);
545889857Sobrien  else
545989857Sobrien# endif
546089857Sobrien    result = byte_re_match_2_internal (bufp, string1, size1, string2, size2,
546189857Sobrien				  pos, regs, stop);
546289857Sobrien
546389857Sobrien#ifndef REGEX_MALLOC
546489857Sobrien# ifdef C_ALLOCA
546589857Sobrien  alloca (0);
546689857Sobrien# endif
546789857Sobrien#endif
546889857Sobrien  return result;
546989857Sobrien}
547089857Sobrien#ifdef _LIBC
547189857Sobrienweak_alias (__re_match_2, re_match_2)
547289857Sobrien#endif
547389857Sobrien
547489857Sobrien#endif /* not INSIDE_RECURSION */
547589857Sobrien
547689857Sobrien#ifdef INSIDE_RECURSION
547789857Sobrien
547889857Sobrien#ifdef WCHAR
5479218822Sdimstatic int count_mbs_length (int *, int);
548089857Sobrien
548189857Sobrien/* This check the substring (from 0, to length) of the multibyte string,
548289857Sobrien   to which offset_buffer correspond. And count how many wchar_t_characters
548389857Sobrien   the substring occupy. We use offset_buffer to optimization.
548489857Sobrien   See convert_mbs_to_wcs.  */
548589857Sobrien
548689857Sobrienstatic int
5487218822Sdimcount_mbs_length(int *offset_buffer, int length)
548889857Sobrien{
548989857Sobrien  int upper, lower;
549089857Sobrien
549189857Sobrien  /* Check whether the size is valid.  */
549289857Sobrien  if (length < 0)
549389857Sobrien    return -1;
549489857Sobrien
549589857Sobrien  if (offset_buffer == NULL)
549689857Sobrien    return 0;
549789857Sobrien
549889857Sobrien  /* If there are no multibyte character, offset_buffer[i] == i.
549989857Sobrien   Optmize for this case.  */
550089857Sobrien  if (offset_buffer[length] == length)
550189857Sobrien    return length;
550289857Sobrien
550389857Sobrien  /* Set up upper with length. (because for all i, offset_buffer[i] >= i)  */
550489857Sobrien  upper = length;
550589857Sobrien  lower = 0;
550689857Sobrien
550789857Sobrien  while (true)
550889857Sobrien    {
550989857Sobrien      int middle = (lower + upper) / 2;
551089857Sobrien      if (middle == lower || middle == upper)
551189857Sobrien	break;
551289857Sobrien      if (offset_buffer[middle] > length)
551389857Sobrien	upper = middle;
551489857Sobrien      else if (offset_buffer[middle] < length)
551589857Sobrien	lower = middle;
551689857Sobrien      else
551789857Sobrien	return middle;
551889857Sobrien    }
551989857Sobrien
552089857Sobrien  return -1;
552189857Sobrien}
552289857Sobrien#endif /* WCHAR */
552389857Sobrien
552489857Sobrien/* This is a separate function so that we can force an alloca cleanup
552589857Sobrien   afterwards.  */
552689857Sobrien#ifdef WCHAR
552789857Sobrienstatic int
5528218822Sdimwcs_re_match_2_internal (struct re_pattern_buffer *bufp,
5529218822Sdim                         const char *cstring1, int csize1,
5530218822Sdim                         const char *cstring2, int csize2,
5531218822Sdim                         int pos,
5532218822Sdim			 struct re_registers *regs,
5533218822Sdim                         int stop,
553489857Sobrien     /* string1 == string2 == NULL means string1/2, size1/2 and
553589857Sobrien	mbs_offset1/2 need seting up in this function.  */
553689857Sobrien     /* We need wchar_t* buffers correspond to cstring1, cstring2.  */
5537218822Sdim                         wchar_t *string1, int size1,
5538218822Sdim                         wchar_t *string2, int size2,
553989857Sobrien     /* offset buffer for optimizatoin. See convert_mbs_to_wc.  */
5540218822Sdim			 int *mbs_offset1, int *mbs_offset2)
554189857Sobrien#else /* BYTE */
554289857Sobrienstatic int
5543218822Sdimbyte_re_match_2_internal (struct re_pattern_buffer *bufp,
5544218822Sdim                          const char *string1, int size1,
5545218822Sdim                          const char *string2, int size2,
5546218822Sdim                          int pos,
5547218822Sdim			  struct re_registers *regs, int stop)
554889857Sobrien#endif /* BYTE */
554989857Sobrien{
555089857Sobrien  /* General temporaries.  */
555189857Sobrien  int mcnt;
555289857Sobrien  UCHAR_T *p1;
555389857Sobrien#ifdef WCHAR
555489857Sobrien  /* They hold whether each wchar_t is binary data or not.  */
555589857Sobrien  char *is_binary = NULL;
555689857Sobrien  /* If true, we can't free string1/2, mbs_offset1/2.  */
555789857Sobrien  int cant_free_wcs_buf = 1;
555889857Sobrien#endif /* WCHAR */
555989857Sobrien
556089857Sobrien  /* Just past the end of the corresponding string.  */
556189857Sobrien  const CHAR_T *end1, *end2;
556289857Sobrien
556389857Sobrien  /* Pointers into string1 and string2, just past the last characters in
556489857Sobrien     each to consider matching.  */
556589857Sobrien  const CHAR_T *end_match_1, *end_match_2;
556689857Sobrien
556789857Sobrien  /* Where we are in the data, and the end of the current string.  */
556889857Sobrien  const CHAR_T *d, *dend;
556989857Sobrien
557089857Sobrien  /* Where we are in the pattern, and the end of the pattern.  */
557189857Sobrien#ifdef WCHAR
557289857Sobrien  UCHAR_T *pattern, *p;
557389857Sobrien  register UCHAR_T *pend;
557489857Sobrien#else /* BYTE */
557589857Sobrien  UCHAR_T *p = bufp->buffer;
557689857Sobrien  register UCHAR_T *pend = p + bufp->used;
557789857Sobrien#endif /* WCHAR */
557889857Sobrien
557989857Sobrien  /* Mark the opcode just after a start_memory, so we can test for an
558089857Sobrien     empty subpattern when we get to the stop_memory.  */
558189857Sobrien  UCHAR_T *just_past_start_mem = 0;
558289857Sobrien
558389857Sobrien  /* We use this to map every character in the string.  */
558489857Sobrien  RE_TRANSLATE_TYPE translate = bufp->translate;
558589857Sobrien
558689857Sobrien  /* Failure point stack.  Each place that can handle a failure further
558789857Sobrien     down the line pushes a failure point on this stack.  It consists of
558889857Sobrien     restart, regend, and reg_info for all registers corresponding to
558989857Sobrien     the subexpressions we're currently inside, plus the number of such
559089857Sobrien     registers, and, finally, two char *'s.  The first char * is where
559189857Sobrien     to resume scanning the pattern; the second one is where to resume
559289857Sobrien     scanning the strings.  If the latter is zero, the failure point is
559389857Sobrien     a ``dummy''; if a failure happens and the failure point is a dummy,
559489857Sobrien     it gets discarded and the next next one is tried.  */
559589857Sobrien#ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global.  */
559689857Sobrien  PREFIX(fail_stack_type) fail_stack;
559789857Sobrien#endif
559889857Sobrien#ifdef DEBUG
559989857Sobrien  static unsigned failure_id;
560089857Sobrien  unsigned nfailure_points_pushed = 0, nfailure_points_popped = 0;
560189857Sobrien#endif
560289857Sobrien
560389857Sobrien#ifdef REL_ALLOC
560489857Sobrien  /* This holds the pointer to the failure stack, when
560589857Sobrien     it is allocated relocatably.  */
560689857Sobrien  fail_stack_elt_t *failure_stack_ptr;
560789857Sobrien#endif
560889857Sobrien
560989857Sobrien  /* We fill all the registers internally, independent of what we
561089857Sobrien     return, for use in backreferences.  The number here includes
561189857Sobrien     an element for register zero.  */
561289857Sobrien  size_t num_regs = bufp->re_nsub + 1;
561389857Sobrien
561489857Sobrien  /* The currently active registers.  */
561589857Sobrien  active_reg_t lowest_active_reg = NO_LOWEST_ACTIVE_REG;
561689857Sobrien  active_reg_t highest_active_reg = NO_HIGHEST_ACTIVE_REG;
561789857Sobrien
561889857Sobrien  /* Information on the contents of registers. These are pointers into
561989857Sobrien     the input strings; they record just what was matched (on this
562089857Sobrien     attempt) by a subexpression part of the pattern, that is, the
562189857Sobrien     regnum-th regstart pointer points to where in the pattern we began
562289857Sobrien     matching and the regnum-th regend points to right after where we
562389857Sobrien     stopped matching the regnum-th subexpression.  (The zeroth register
562489857Sobrien     keeps track of what the whole pattern matches.)  */
562589857Sobrien#ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global.  */
562689857Sobrien  const CHAR_T **regstart, **regend;
562789857Sobrien#endif
562889857Sobrien
562989857Sobrien  /* If a group that's operated upon by a repetition operator fails to
563089857Sobrien     match anything, then the register for its start will need to be
563189857Sobrien     restored because it will have been set to wherever in the string we
563289857Sobrien     are when we last see its open-group operator.  Similarly for a
563389857Sobrien     register's end.  */
563489857Sobrien#ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global.  */
563589857Sobrien  const CHAR_T **old_regstart, **old_regend;
563689857Sobrien#endif
563789857Sobrien
563889857Sobrien  /* The is_active field of reg_info helps us keep track of which (possibly
563989857Sobrien     nested) subexpressions we are currently in. The matched_something
564089857Sobrien     field of reg_info[reg_num] helps us tell whether or not we have
564189857Sobrien     matched any of the pattern so far this time through the reg_num-th
564289857Sobrien     subexpression.  These two fields get reset each time through any
564389857Sobrien     loop their register is in.  */
564489857Sobrien#ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global.  */
564589857Sobrien  PREFIX(register_info_type) *reg_info;
564689857Sobrien#endif
564789857Sobrien
564889857Sobrien  /* The following record the register info as found in the above
564989857Sobrien     variables when we find a match better than any we've seen before.
565089857Sobrien     This happens as we backtrack through the failure points, which in
565189857Sobrien     turn happens only if we have not yet matched the entire string. */
565289857Sobrien  unsigned best_regs_set = false;
565389857Sobrien#ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global.  */
565489857Sobrien  const CHAR_T **best_regstart, **best_regend;
565589857Sobrien#endif
565689857Sobrien
565789857Sobrien  /* Logically, this is `best_regend[0]'.  But we don't want to have to
565889857Sobrien     allocate space for that if we're not allocating space for anything
565989857Sobrien     else (see below).  Also, we never need info about register 0 for
566089857Sobrien     any of the other register vectors, and it seems rather a kludge to
566189857Sobrien     treat `best_regend' differently than the rest.  So we keep track of
566289857Sobrien     the end of the best match so far in a separate variable.  We
566389857Sobrien     initialize this to NULL so that when we backtrack the first time
566489857Sobrien     and need to test it, it's not garbage.  */
566589857Sobrien  const CHAR_T *match_end = NULL;
566689857Sobrien
566789857Sobrien  /* This helps SET_REGS_MATCHED avoid doing redundant work.  */
566889857Sobrien  int set_regs_matched_done = 0;
566989857Sobrien
567089857Sobrien  /* Used when we pop values we don't care about.  */
567189857Sobrien#ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global.  */
567289857Sobrien  const CHAR_T **reg_dummy;
567389857Sobrien  PREFIX(register_info_type) *reg_info_dummy;
567489857Sobrien#endif
567589857Sobrien
567689857Sobrien#ifdef DEBUG
567789857Sobrien  /* Counts the total number of registers pushed.  */
567889857Sobrien  unsigned num_regs_pushed = 0;
567989857Sobrien#endif
568089857Sobrien
568189857Sobrien  DEBUG_PRINT1 ("\n\nEntering re_match_2.\n");
568289857Sobrien
568389857Sobrien  INIT_FAIL_STACK ();
568489857Sobrien
568589857Sobrien#ifdef MATCH_MAY_ALLOCATE
568689857Sobrien  /* Do not bother to initialize all the register variables if there are
568789857Sobrien     no groups in the pattern, as it takes a fair amount of time.  If
568889857Sobrien     there are groups, we include space for register 0 (the whole
568989857Sobrien     pattern), even though we never use it, since it simplifies the
569089857Sobrien     array indexing.  We should fix this.  */
569189857Sobrien  if (bufp->re_nsub)
569289857Sobrien    {
569389857Sobrien      regstart = REGEX_TALLOC (num_regs, const CHAR_T *);
569489857Sobrien      regend = REGEX_TALLOC (num_regs, const CHAR_T *);
569589857Sobrien      old_regstart = REGEX_TALLOC (num_regs, const CHAR_T *);
569689857Sobrien      old_regend = REGEX_TALLOC (num_regs, const CHAR_T *);
569789857Sobrien      best_regstart = REGEX_TALLOC (num_regs, const CHAR_T *);
569889857Sobrien      best_regend = REGEX_TALLOC (num_regs, const CHAR_T *);
569989857Sobrien      reg_info = REGEX_TALLOC (num_regs, PREFIX(register_info_type));
570089857Sobrien      reg_dummy = REGEX_TALLOC (num_regs, const CHAR_T *);
570189857Sobrien      reg_info_dummy = REGEX_TALLOC (num_regs, PREFIX(register_info_type));
570289857Sobrien
570389857Sobrien      if (!(regstart && regend && old_regstart && old_regend && reg_info
570489857Sobrien            && best_regstart && best_regend && reg_dummy && reg_info_dummy))
570589857Sobrien        {
570689857Sobrien          FREE_VARIABLES ();
570789857Sobrien          return -2;
570889857Sobrien        }
570989857Sobrien    }
571089857Sobrien  else
571189857Sobrien    {
571289857Sobrien      /* We must initialize all our variables to NULL, so that
571389857Sobrien         `FREE_VARIABLES' doesn't try to free them.  */
571489857Sobrien      regstart = regend = old_regstart = old_regend = best_regstart
571589857Sobrien        = best_regend = reg_dummy = NULL;
571689857Sobrien      reg_info = reg_info_dummy = (PREFIX(register_info_type) *) NULL;
571789857Sobrien    }
571889857Sobrien#endif /* MATCH_MAY_ALLOCATE */
571989857Sobrien
572089857Sobrien  /* The starting position is bogus.  */
572189857Sobrien#ifdef WCHAR
572289857Sobrien  if (pos < 0 || pos > csize1 + csize2)
572389857Sobrien#else /* BYTE */
572489857Sobrien  if (pos < 0 || pos > size1 + size2)
572589857Sobrien#endif
572689857Sobrien    {
572789857Sobrien      FREE_VARIABLES ();
572889857Sobrien      return -1;
572989857Sobrien    }
573089857Sobrien
573189857Sobrien#ifdef WCHAR
573289857Sobrien  /* Allocate wchar_t array for string1 and string2 and
573389857Sobrien     fill them with converted string.  */
573489857Sobrien  if (string1 == NULL && string2 == NULL)
573589857Sobrien    {
573689857Sobrien      /* We need seting up buffers here.  */
573789857Sobrien
573889857Sobrien      /* We must free wcs buffers in this function.  */
573989857Sobrien      cant_free_wcs_buf = 0;
574089857Sobrien
574189857Sobrien      if (csize1 != 0)
574289857Sobrien	{
574389857Sobrien	  string1 = REGEX_TALLOC (csize1 + 1, CHAR_T);
574489857Sobrien	  mbs_offset1 = REGEX_TALLOC (csize1 + 1, int);
574589857Sobrien	  is_binary = REGEX_TALLOC (csize1 + 1, char);
574689857Sobrien	  if (!string1 || !mbs_offset1 || !is_binary)
574789857Sobrien	    {
574889857Sobrien	      FREE_VAR (string1);
574989857Sobrien	      FREE_VAR (mbs_offset1);
575089857Sobrien	      FREE_VAR (is_binary);
575189857Sobrien	      return -2;
575289857Sobrien	    }
575389857Sobrien	}
575489857Sobrien      if (csize2 != 0)
575589857Sobrien	{
575689857Sobrien	  string2 = REGEX_TALLOC (csize2 + 1, CHAR_T);
575789857Sobrien	  mbs_offset2 = REGEX_TALLOC (csize2 + 1, int);
575889857Sobrien	  is_binary = REGEX_TALLOC (csize2 + 1, char);
575989857Sobrien	  if (!string2 || !mbs_offset2 || !is_binary)
576089857Sobrien	    {
576189857Sobrien	      FREE_VAR (string1);
576289857Sobrien	      FREE_VAR (mbs_offset1);
576389857Sobrien	      FREE_VAR (string2);
576489857Sobrien	      FREE_VAR (mbs_offset2);
576589857Sobrien	      FREE_VAR (is_binary);
576689857Sobrien	      return -2;
576789857Sobrien	    }
576889857Sobrien	  size2 = convert_mbs_to_wcs(string2, cstring2, csize2,
576989857Sobrien				     mbs_offset2, is_binary);
577089857Sobrien	  string2[size2] = L'\0'; /* for a sentinel  */
577189857Sobrien	  FREE_VAR (is_binary);
577289857Sobrien	}
577389857Sobrien    }
577489857Sobrien
577589857Sobrien  /* We need to cast pattern to (wchar_t*), because we casted this compiled
577689857Sobrien     pattern to (char*) in regex_compile.  */
577789857Sobrien  p = pattern = (CHAR_T*)bufp->buffer;
577889857Sobrien  pend = (CHAR_T*)(bufp->buffer + bufp->used);
577989857Sobrien
578089857Sobrien#endif /* WCHAR */
578189857Sobrien
578289857Sobrien  /* Initialize subexpression text positions to -1 to mark ones that no
578389857Sobrien     start_memory/stop_memory has been seen for. Also initialize the
578489857Sobrien     register information struct.  */
578589857Sobrien  for (mcnt = 1; (unsigned) mcnt < num_regs; mcnt++)
578689857Sobrien    {
578789857Sobrien      regstart[mcnt] = regend[mcnt]
578889857Sobrien        = old_regstart[mcnt] = old_regend[mcnt] = REG_UNSET_VALUE;
578989857Sobrien
579089857Sobrien      REG_MATCH_NULL_STRING_P (reg_info[mcnt]) = MATCH_NULL_UNSET_VALUE;
579189857Sobrien      IS_ACTIVE (reg_info[mcnt]) = 0;
579289857Sobrien      MATCHED_SOMETHING (reg_info[mcnt]) = 0;
579389857Sobrien      EVER_MATCHED_SOMETHING (reg_info[mcnt]) = 0;
579489857Sobrien    }
579589857Sobrien
579689857Sobrien  /* We move `string1' into `string2' if the latter's empty -- but not if
579789857Sobrien     `string1' is null.  */
579889857Sobrien  if (size2 == 0 && string1 != NULL)
579989857Sobrien    {
580089857Sobrien      string2 = string1;
580189857Sobrien      size2 = size1;
580289857Sobrien      string1 = 0;
580389857Sobrien      size1 = 0;
580489857Sobrien#ifdef WCHAR
580589857Sobrien      mbs_offset2 = mbs_offset1;
580689857Sobrien      csize2 = csize1;
580789857Sobrien      mbs_offset1 = NULL;
580889857Sobrien      csize1 = 0;
580989857Sobrien#endif
581089857Sobrien    }
581189857Sobrien  end1 = string1 + size1;
581289857Sobrien  end2 = string2 + size2;
581389857Sobrien
581489857Sobrien  /* Compute where to stop matching, within the two strings.  */
581589857Sobrien#ifdef WCHAR
581689857Sobrien  if (stop <= csize1)
581789857Sobrien    {
581889857Sobrien      mcnt = count_mbs_length(mbs_offset1, stop);
581989857Sobrien      end_match_1 = string1 + mcnt;
582089857Sobrien      end_match_2 = string2;
582189857Sobrien    }
582289857Sobrien  else
582389857Sobrien    {
582489857Sobrien      if (stop > csize1 + csize2)
582589857Sobrien	stop = csize1 + csize2;
582689857Sobrien      end_match_1 = end1;
582789857Sobrien      mcnt = count_mbs_length(mbs_offset2, stop-csize1);
582889857Sobrien      end_match_2 = string2 + mcnt;
582989857Sobrien    }
583089857Sobrien  if (mcnt < 0)
583189857Sobrien    { /* count_mbs_length return error.  */
583289857Sobrien      FREE_VARIABLES ();
583389857Sobrien      return -1;
583489857Sobrien    }
583589857Sobrien#else
583689857Sobrien  if (stop <= size1)
583789857Sobrien    {
583889857Sobrien      end_match_1 = string1 + stop;
583989857Sobrien      end_match_2 = string2;
584089857Sobrien    }
584189857Sobrien  else
584289857Sobrien    {
584389857Sobrien      end_match_1 = end1;
584489857Sobrien      end_match_2 = string2 + stop - size1;
584589857Sobrien    }
584689857Sobrien#endif /* WCHAR */
584789857Sobrien
584889857Sobrien  /* `p' scans through the pattern as `d' scans through the data.
584989857Sobrien     `dend' is the end of the input string that `d' points within.  `d'
585089857Sobrien     is advanced into the following input string whenever necessary, but
585189857Sobrien     this happens before fetching; therefore, at the beginning of the
585289857Sobrien     loop, `d' can be pointing at the end of a string, but it cannot
585389857Sobrien     equal `string2'.  */
585489857Sobrien#ifdef WCHAR
585589857Sobrien  if (size1 > 0 && pos <= csize1)
585689857Sobrien    {
585789857Sobrien      mcnt = count_mbs_length(mbs_offset1, pos);
585889857Sobrien      d = string1 + mcnt;
585989857Sobrien      dend = end_match_1;
586089857Sobrien    }
586189857Sobrien  else
586289857Sobrien    {
586389857Sobrien      mcnt = count_mbs_length(mbs_offset2, pos-csize1);
586489857Sobrien      d = string2 + mcnt;
586589857Sobrien      dend = end_match_2;
586689857Sobrien    }
586789857Sobrien
586889857Sobrien  if (mcnt < 0)
586989857Sobrien    { /* count_mbs_length return error.  */
587089857Sobrien      FREE_VARIABLES ();
587189857Sobrien      return -1;
587289857Sobrien    }
587389857Sobrien#else
587489857Sobrien  if (size1 > 0 && pos <= size1)
587589857Sobrien    {
587689857Sobrien      d = string1 + pos;
587789857Sobrien      dend = end_match_1;
587889857Sobrien    }
587989857Sobrien  else
588089857Sobrien    {
588189857Sobrien      d = string2 + pos - size1;
588289857Sobrien      dend = end_match_2;
588389857Sobrien    }
588489857Sobrien#endif /* WCHAR */
588589857Sobrien
588689857Sobrien  DEBUG_PRINT1 ("The compiled pattern is:\n");
588789857Sobrien  DEBUG_PRINT_COMPILED_PATTERN (bufp, p, pend);
588889857Sobrien  DEBUG_PRINT1 ("The string to match is: `");
588989857Sobrien  DEBUG_PRINT_DOUBLE_STRING (d, string1, size1, string2, size2);
589089857Sobrien  DEBUG_PRINT1 ("'\n");
589189857Sobrien
589289857Sobrien  /* This loops over pattern commands.  It exits by returning from the
589389857Sobrien     function if the match is complete, or it drops through if the match
589489857Sobrien     fails at this starting point in the input data.  */
589589857Sobrien  for (;;)
589689857Sobrien    {
589789857Sobrien#ifdef _LIBC
589889857Sobrien      DEBUG_PRINT2 ("\n%p: ", p);
589989857Sobrien#else
590089857Sobrien      DEBUG_PRINT2 ("\n0x%x: ", p);
590189857Sobrien#endif
590289857Sobrien
590389857Sobrien      if (p == pend)
590489857Sobrien	{ /* End of pattern means we might have succeeded.  */
590589857Sobrien          DEBUG_PRINT1 ("end of pattern ... ");
590689857Sobrien
590789857Sobrien	  /* If we haven't matched the entire string, and we want the
590889857Sobrien             longest match, try backtracking.  */
590989857Sobrien          if (d != end_match_2)
591089857Sobrien	    {
591189857Sobrien	      /* 1 if this match ends in the same string (string1 or string2)
591289857Sobrien		 as the best previous match.  */
591389857Sobrien	      boolean same_str_p = (FIRST_STRING_P (match_end)
591489857Sobrien				    == MATCHING_IN_FIRST_STRING);
591589857Sobrien	      /* 1 if this match is the best seen so far.  */
591689857Sobrien	      boolean best_match_p;
591789857Sobrien
591889857Sobrien	      /* AIX compiler got confused when this was combined
591989857Sobrien		 with the previous declaration.  */
592089857Sobrien	      if (same_str_p)
592189857Sobrien		best_match_p = d > match_end;
592289857Sobrien	      else
592389857Sobrien		best_match_p = !MATCHING_IN_FIRST_STRING;
592489857Sobrien
592589857Sobrien              DEBUG_PRINT1 ("backtracking.\n");
592689857Sobrien
592789857Sobrien              if (!FAIL_STACK_EMPTY ())
592889857Sobrien                { /* More failure points to try.  */
592989857Sobrien
593089857Sobrien                  /* If exceeds best match so far, save it.  */
593189857Sobrien                  if (!best_regs_set || best_match_p)
593289857Sobrien                    {
593389857Sobrien                      best_regs_set = true;
593489857Sobrien                      match_end = d;
593589857Sobrien
593689857Sobrien                      DEBUG_PRINT1 ("\nSAVING match as best so far.\n");
593789857Sobrien
593889857Sobrien                      for (mcnt = 1; (unsigned) mcnt < num_regs; mcnt++)
593989857Sobrien                        {
594089857Sobrien                          best_regstart[mcnt] = regstart[mcnt];
594189857Sobrien                          best_regend[mcnt] = regend[mcnt];
594289857Sobrien                        }
594389857Sobrien                    }
594489857Sobrien                  goto fail;
594589857Sobrien                }
594689857Sobrien
594789857Sobrien              /* If no failure points, don't restore garbage.  And if
594889857Sobrien                 last match is real best match, don't restore second
594989857Sobrien                 best one. */
595089857Sobrien              else if (best_regs_set && !best_match_p)
595189857Sobrien                {
595289857Sobrien  	        restore_best_regs:
595389857Sobrien                  /* Restore best match.  It may happen that `dend ==
595489857Sobrien                     end_match_1' while the restored d is in string2.
595589857Sobrien                     For example, the pattern `x.*y.*z' against the
595689857Sobrien                     strings `x-' and `y-z-', if the two strings are
595789857Sobrien                     not consecutive in memory.  */
595889857Sobrien                  DEBUG_PRINT1 ("Restoring best registers.\n");
595989857Sobrien
596089857Sobrien                  d = match_end;
596189857Sobrien                  dend = ((d >= string1 && d <= end1)
596289857Sobrien		           ? end_match_1 : end_match_2);
596389857Sobrien
596489857Sobrien		  for (mcnt = 1; (unsigned) mcnt < num_regs; mcnt++)
596589857Sobrien		    {
596689857Sobrien		      regstart[mcnt] = best_regstart[mcnt];
596789857Sobrien		      regend[mcnt] = best_regend[mcnt];
596889857Sobrien		    }
596989857Sobrien                }
597089857Sobrien            } /* d != end_match_2 */
597189857Sobrien
597289857Sobrien	succeed_label:
597389857Sobrien          DEBUG_PRINT1 ("Accepting match.\n");
597489857Sobrien          /* If caller wants register contents data back, do it.  */
597589857Sobrien          if (regs && !bufp->no_sub)
597689857Sobrien	    {
597789857Sobrien	      /* Have the register data arrays been allocated?  */
597889857Sobrien              if (bufp->regs_allocated == REGS_UNALLOCATED)
597989857Sobrien                { /* No.  So allocate them with malloc.  We need one
598089857Sobrien                     extra element beyond `num_regs' for the `-1' marker
598189857Sobrien                     GNU code uses.  */
598289857Sobrien                  regs->num_regs = MAX (RE_NREGS, num_regs + 1);
598389857Sobrien                  regs->start = TALLOC (regs->num_regs, regoff_t);
598489857Sobrien                  regs->end = TALLOC (regs->num_regs, regoff_t);
598589857Sobrien                  if (regs->start == NULL || regs->end == NULL)
598689857Sobrien		    {
598789857Sobrien		      FREE_VARIABLES ();
598889857Sobrien		      return -2;
598989857Sobrien		    }
599089857Sobrien                  bufp->regs_allocated = REGS_REALLOCATE;
599189857Sobrien                }
599289857Sobrien              else if (bufp->regs_allocated == REGS_REALLOCATE)
599389857Sobrien                { /* Yes.  If we need more elements than were already
599489857Sobrien                     allocated, reallocate them.  If we need fewer, just
599589857Sobrien                     leave it alone.  */
599689857Sobrien                  if (regs->num_regs < num_regs + 1)
599789857Sobrien                    {
599889857Sobrien                      regs->num_regs = num_regs + 1;
599989857Sobrien                      RETALLOC (regs->start, regs->num_regs, regoff_t);
600089857Sobrien                      RETALLOC (regs->end, regs->num_regs, regoff_t);
600189857Sobrien                      if (regs->start == NULL || regs->end == NULL)
600289857Sobrien			{
600389857Sobrien			  FREE_VARIABLES ();
600489857Sobrien			  return -2;
600589857Sobrien			}
600689857Sobrien                    }
600789857Sobrien                }
600889857Sobrien              else
600989857Sobrien		{
601089857Sobrien		  /* These braces fend off a "empty body in an else-statement"
601189857Sobrien		     warning under GCC when assert expands to nothing.  */
601289857Sobrien		  assert (bufp->regs_allocated == REGS_FIXED);
601389857Sobrien		}
601489857Sobrien
601589857Sobrien              /* Convert the pointer data in `regstart' and `regend' to
601689857Sobrien                 indices.  Register zero has to be set differently,
601789857Sobrien                 since we haven't kept track of any info for it.  */
601889857Sobrien              if (regs->num_regs > 0)
601989857Sobrien                {
602089857Sobrien                  regs->start[0] = pos;
602189857Sobrien#ifdef WCHAR
602289857Sobrien		  if (MATCHING_IN_FIRST_STRING)
602389857Sobrien		    regs->end[0] = mbs_offset1 != NULL ?
602489857Sobrien					mbs_offset1[d-string1] : 0;
602589857Sobrien		  else
602689857Sobrien		    regs->end[0] = csize1 + (mbs_offset2 != NULL ?
602789857Sobrien					     mbs_offset2[d-string2] : 0);
602889857Sobrien#else
602989857Sobrien                  regs->end[0] = (MATCHING_IN_FIRST_STRING
603089857Sobrien				  ? ((regoff_t) (d - string1))
603189857Sobrien			          : ((regoff_t) (d - string2 + size1)));
603289857Sobrien#endif /* WCHAR */
603389857Sobrien                }
603489857Sobrien
603589857Sobrien              /* Go through the first `min (num_regs, regs->num_regs)'
603689857Sobrien                 registers, since that is all we initialized.  */
603789857Sobrien	      for (mcnt = 1; (unsigned) mcnt < MIN (num_regs, regs->num_regs);
603889857Sobrien		   mcnt++)
603989857Sobrien		{
604089857Sobrien                  if (REG_UNSET (regstart[mcnt]) || REG_UNSET (regend[mcnt]))
604189857Sobrien                    regs->start[mcnt] = regs->end[mcnt] = -1;
604289857Sobrien                  else
604389857Sobrien                    {
604489857Sobrien		      regs->start[mcnt]
604589857Sobrien			= (regoff_t) POINTER_TO_OFFSET (regstart[mcnt]);
604689857Sobrien                      regs->end[mcnt]
604789857Sobrien			= (regoff_t) POINTER_TO_OFFSET (regend[mcnt]);
604889857Sobrien                    }
604989857Sobrien		}
605089857Sobrien
605189857Sobrien              /* If the regs structure we return has more elements than
605289857Sobrien                 were in the pattern, set the extra elements to -1.  If
605389857Sobrien                 we (re)allocated the registers, this is the case,
605489857Sobrien                 because we always allocate enough to have at least one
605589857Sobrien                 -1 at the end.  */
605689857Sobrien              for (mcnt = num_regs; (unsigned) mcnt < regs->num_regs; mcnt++)
605789857Sobrien                regs->start[mcnt] = regs->end[mcnt] = -1;
605889857Sobrien	    } /* regs && !bufp->no_sub */
605989857Sobrien
606089857Sobrien          DEBUG_PRINT4 ("%u failure points pushed, %u popped (%u remain).\n",
606189857Sobrien                        nfailure_points_pushed, nfailure_points_popped,
606289857Sobrien                        nfailure_points_pushed - nfailure_points_popped);
606389857Sobrien          DEBUG_PRINT2 ("%u registers pushed.\n", num_regs_pushed);
606489857Sobrien
606589857Sobrien#ifdef WCHAR
606689857Sobrien	  if (MATCHING_IN_FIRST_STRING)
606789857Sobrien	    mcnt = mbs_offset1 != NULL ? mbs_offset1[d-string1] : 0;
606889857Sobrien	  else
606989857Sobrien	    mcnt = (mbs_offset2 != NULL ? mbs_offset2[d-string2] : 0) +
607089857Sobrien			csize1;
607189857Sobrien          mcnt -= pos;
607289857Sobrien#else
607389857Sobrien          mcnt = d - pos - (MATCHING_IN_FIRST_STRING
607489857Sobrien			    ? string1
607589857Sobrien			    : string2 - size1);
607689857Sobrien#endif /* WCHAR */
607789857Sobrien
607889857Sobrien          DEBUG_PRINT2 ("Returning %d from re_match_2.\n", mcnt);
607989857Sobrien
608089857Sobrien          FREE_VARIABLES ();
608189857Sobrien          return mcnt;
608289857Sobrien        }
608389857Sobrien
608489857Sobrien      /* Otherwise match next pattern command.  */
608589857Sobrien      switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++))
608689857Sobrien	{
608789857Sobrien        /* Ignore these.  Used to ignore the n of succeed_n's which
608889857Sobrien           currently have n == 0.  */
608989857Sobrien        case no_op:
609089857Sobrien          DEBUG_PRINT1 ("EXECUTING no_op.\n");
609189857Sobrien          break;
609289857Sobrien
609389857Sobrien	case succeed:
609489857Sobrien          DEBUG_PRINT1 ("EXECUTING succeed.\n");
609589857Sobrien	  goto succeed_label;
609689857Sobrien
609789857Sobrien        /* Match the next n pattern characters exactly.  The following
609889857Sobrien           byte in the pattern defines n, and the n bytes after that
609989857Sobrien           are the characters to match.  */
610089857Sobrien	case exactn:
610189857Sobrien#ifdef MBS_SUPPORT
610289857Sobrien	case exactn_bin:
610389857Sobrien#endif
610489857Sobrien	  mcnt = *p++;
610589857Sobrien          DEBUG_PRINT2 ("EXECUTING exactn %d.\n", mcnt);
610689857Sobrien
610789857Sobrien          /* This is written out as an if-else so we don't waste time
610889857Sobrien             testing `translate' inside the loop.  */
610989857Sobrien          if (translate)
611089857Sobrien	    {
611189857Sobrien	      do
611289857Sobrien		{
611389857Sobrien		  PREFETCH ();
611489857Sobrien#ifdef WCHAR
611589857Sobrien		  if (*d <= 0xff)
611689857Sobrien		    {
611789857Sobrien		      if ((UCHAR_T) translate[(unsigned char) *d++]
611889857Sobrien			  != (UCHAR_T) *p++)
611989857Sobrien			goto fail;
612089857Sobrien		    }
612189857Sobrien		  else
612289857Sobrien		    {
612389857Sobrien		      if (*d++ != (CHAR_T) *p++)
612489857Sobrien			goto fail;
612589857Sobrien		    }
612689857Sobrien#else
612789857Sobrien		  if ((UCHAR_T) translate[(unsigned char) *d++]
612889857Sobrien		      != (UCHAR_T) *p++)
612989857Sobrien                    goto fail;
613089857Sobrien#endif /* WCHAR */
613189857Sobrien		}
613289857Sobrien	      while (--mcnt);
613389857Sobrien	    }
613489857Sobrien	  else
613589857Sobrien	    {
613689857Sobrien	      do
613789857Sobrien		{
613889857Sobrien		  PREFETCH ();
613989857Sobrien		  if (*d++ != (CHAR_T) *p++) goto fail;
614089857Sobrien		}
614189857Sobrien	      while (--mcnt);
614289857Sobrien	    }
614389857Sobrien	  SET_REGS_MATCHED ();
614489857Sobrien          break;
614589857Sobrien
614689857Sobrien
614789857Sobrien        /* Match any character except possibly a newline or a null.  */
614889857Sobrien	case anychar:
614989857Sobrien          DEBUG_PRINT1 ("EXECUTING anychar.\n");
615089857Sobrien
615189857Sobrien          PREFETCH ();
615289857Sobrien
615389857Sobrien          if ((!(bufp->syntax & RE_DOT_NEWLINE) && TRANSLATE (*d) == '\n')
615489857Sobrien              || (bufp->syntax & RE_DOT_NOT_NULL && TRANSLATE (*d) == '\000'))
615589857Sobrien	    goto fail;
615689857Sobrien
615789857Sobrien          SET_REGS_MATCHED ();
615889857Sobrien          DEBUG_PRINT2 ("  Matched `%ld'.\n", (long int) *d);
615989857Sobrien          d++;
616089857Sobrien	  break;
616189857Sobrien
616289857Sobrien
616389857Sobrien	case charset:
616489857Sobrien	case charset_not:
616589857Sobrien	  {
616689857Sobrien	    register UCHAR_T c;
616789857Sobrien#ifdef WCHAR
616889857Sobrien	    unsigned int i, char_class_length, coll_symbol_length,
616989857Sobrien              equiv_class_length, ranges_length, chars_length, length;
617089857Sobrien	    CHAR_T *workp, *workp2, *charset_top;
617189857Sobrien#define WORK_BUFFER_SIZE 128
617289857Sobrien            CHAR_T str_buf[WORK_BUFFER_SIZE];
617389857Sobrien# ifdef _LIBC
617489857Sobrien	    uint32_t nrules;
617589857Sobrien# endif /* _LIBC */
617689857Sobrien#endif /* WCHAR */
6177218822Sdim	    boolean negate = (re_opcode_t) *(p - 1) == charset_not;
617889857Sobrien
6179218822Sdim            DEBUG_PRINT2 ("EXECUTING charset%s.\n", negate ? "_not" : "");
618089857Sobrien	    PREFETCH ();
618189857Sobrien	    c = TRANSLATE (*d); /* The character to match.  */
618289857Sobrien#ifdef WCHAR
618389857Sobrien# ifdef _LIBC
618489857Sobrien	    nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
618589857Sobrien# endif /* _LIBC */
618689857Sobrien	    charset_top = p - 1;
618789857Sobrien	    char_class_length = *p++;
618889857Sobrien	    coll_symbol_length = *p++;
618989857Sobrien	    equiv_class_length = *p++;
619089857Sobrien	    ranges_length = *p++;
619189857Sobrien	    chars_length = *p++;
619289857Sobrien	    /* p points charset[6], so the address of the next instruction
619389857Sobrien	       (charset[l+m+n+2o+k+p']) equals p[l+m+n+2*o+p'],
619489857Sobrien	       where l=length of char_classes, m=length of collating_symbol,
619589857Sobrien	       n=equivalence_class, o=length of char_range,
619689857Sobrien	       p'=length of character.  */
619789857Sobrien	    workp = p;
619889857Sobrien	    /* Update p to indicate the next instruction.  */
619989857Sobrien	    p += char_class_length + coll_symbol_length+ equiv_class_length +
620089857Sobrien              2*ranges_length + chars_length;
620189857Sobrien
620289857Sobrien            /* match with char_class?  */
620389857Sobrien	    for (i = 0; i < char_class_length ; i += CHAR_CLASS_SIZE)
620489857Sobrien	      {
620589857Sobrien		wctype_t wctype;
620689857Sobrien		uintptr_t alignedp = ((uintptr_t)workp
620789857Sobrien				      + __alignof__(wctype_t) - 1)
620889857Sobrien		  		      & ~(uintptr_t)(__alignof__(wctype_t) - 1);
620989857Sobrien		wctype = *((wctype_t*)alignedp);
621089857Sobrien		workp += CHAR_CLASS_SIZE;
621189857Sobrien# ifdef _LIBC
621289857Sobrien		if (__iswctype((wint_t)c, wctype))
621389857Sobrien		  goto char_set_matched;
621489857Sobrien# else
621589857Sobrien		if (iswctype((wint_t)c, wctype))
621689857Sobrien		  goto char_set_matched;
621789857Sobrien# endif
621889857Sobrien	      }
621989857Sobrien
622089857Sobrien            /* match with collating_symbol?  */
622189857Sobrien# ifdef _LIBC
622289857Sobrien	    if (nrules != 0)
622389857Sobrien	      {
622489857Sobrien		const unsigned char *extra = (const unsigned char *)
622589857Sobrien		  _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
622689857Sobrien
622789857Sobrien		for (workp2 = workp + coll_symbol_length ; workp < workp2 ;
622889857Sobrien		     workp++)
622989857Sobrien		  {
623089857Sobrien		    int32_t *wextra;
623189857Sobrien		    wextra = (int32_t*)(extra + *workp++);
623289857Sobrien		    for (i = 0; i < *wextra; ++i)
623389857Sobrien		      if (TRANSLATE(d[i]) != wextra[1 + i])
623489857Sobrien			break;
623589857Sobrien
623689857Sobrien		    if (i == *wextra)
623789857Sobrien		      {
623889857Sobrien			/* Update d, however d will be incremented at
623989857Sobrien			   char_set_matched:, we decrement d here.  */
624089857Sobrien			d += i - 1;
624189857Sobrien			goto char_set_matched;
624289857Sobrien		      }
624389857Sobrien		  }
624489857Sobrien	      }
624589857Sobrien	    else /* (nrules == 0) */
624689857Sobrien# endif
624789857Sobrien	      /* If we can't look up collation data, we use wcscoll
624889857Sobrien		 instead.  */
624989857Sobrien	      {
625089857Sobrien		for (workp2 = workp + coll_symbol_length ; workp < workp2 ;)
625189857Sobrien		  {
625289857Sobrien		    const CHAR_T *backup_d = d, *backup_dend = dend;
625389857Sobrien# ifdef _LIBC
625489857Sobrien		    length = __wcslen (workp);
625589857Sobrien# else
625689857Sobrien		    length = wcslen (workp);
625789857Sobrien# endif
625889857Sobrien
625989857Sobrien		    /* If wcscoll(the collating symbol, whole string) > 0,
626089857Sobrien		       any substring of the string never match with the
626189857Sobrien		       collating symbol.  */
626289857Sobrien# ifdef _LIBC
626389857Sobrien		    if (__wcscoll (workp, d) > 0)
626489857Sobrien# else
626589857Sobrien		    if (wcscoll (workp, d) > 0)
626689857Sobrien# endif
626789857Sobrien		      {
626889857Sobrien			workp += length + 1;
626989857Sobrien			continue;
627089857Sobrien		      }
627189857Sobrien
627289857Sobrien		    /* First, we compare the collating symbol with
627389857Sobrien		       the first character of the string.
627489857Sobrien		       If it don't match, we add the next character to
627589857Sobrien		       the compare buffer in turn.  */
627689857Sobrien		    for (i = 0 ; i < WORK_BUFFER_SIZE-1 ; i++, d++)
627789857Sobrien		      {
627889857Sobrien			int match;
627989857Sobrien			if (d == dend)
628089857Sobrien			  {
628189857Sobrien			    if (dend == end_match_2)
628289857Sobrien			      break;
628389857Sobrien			    d = string2;
628489857Sobrien			    dend = end_match_2;
628589857Sobrien			  }
628689857Sobrien
628789857Sobrien			/* add next character to the compare buffer.  */
628889857Sobrien			str_buf[i] = TRANSLATE(*d);
628989857Sobrien			str_buf[i+1] = '\0';
629089857Sobrien
629189857Sobrien# ifdef _LIBC
629289857Sobrien			match = __wcscoll (workp, str_buf);
629389857Sobrien# else
629489857Sobrien			match = wcscoll (workp, str_buf);
629589857Sobrien# endif
629689857Sobrien			if (match == 0)
629789857Sobrien			  goto char_set_matched;
629889857Sobrien
629989857Sobrien			if (match < 0)
630089857Sobrien			  /* (str_buf > workp) indicate (str_buf + X > workp),
630189857Sobrien			     because for all X (str_buf + X > str_buf).
630289857Sobrien			     So we don't need continue this loop.  */
630389857Sobrien			  break;
630489857Sobrien
630589857Sobrien			/* Otherwise(str_buf < workp),
630689857Sobrien			   (str_buf+next_character) may equals (workp).
630789857Sobrien			   So we continue this loop.  */
630889857Sobrien		      }
630989857Sobrien		    /* not matched */
631089857Sobrien		    d = backup_d;
631189857Sobrien		    dend = backup_dend;
631289857Sobrien		    workp += length + 1;
631389857Sobrien		  }
631489857Sobrien              }
631589857Sobrien            /* match with equivalence_class?  */
631689857Sobrien# ifdef _LIBC
631789857Sobrien	    if (nrules != 0)
631889857Sobrien	      {
631989857Sobrien                const CHAR_T *backup_d = d, *backup_dend = dend;
632089857Sobrien		/* Try to match the equivalence class against
632189857Sobrien		   those known to the collate implementation.  */
632289857Sobrien		const int32_t *table;
632389857Sobrien		const int32_t *weights;
632489857Sobrien		const int32_t *extra;
632589857Sobrien		const int32_t *indirect;
632689857Sobrien		int32_t idx, idx2;
632789857Sobrien		wint_t *cp;
632889857Sobrien		size_t len;
632989857Sobrien
633089857Sobrien		/* This #include defines a local function!  */
633189857Sobrien#  include <locale/weightwc.h>
633289857Sobrien
633389857Sobrien		table = (const int32_t *)
633489857Sobrien		  _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEWC);
633589857Sobrien		weights = (const wint_t *)
633689857Sobrien		  _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTWC);
633789857Sobrien		extra = (const wint_t *)
633889857Sobrien		  _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAWC);
633989857Sobrien		indirect = (const int32_t *)
634089857Sobrien		  _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTWC);
634189857Sobrien
634289857Sobrien		/* Write 1 collating element to str_buf, and
634389857Sobrien		   get its index.  */
634489857Sobrien		idx2 = 0;
634589857Sobrien
634689857Sobrien		for (i = 0 ; idx2 == 0 && i < WORK_BUFFER_SIZE - 1; i++)
634789857Sobrien		  {
634889857Sobrien		    cp = (wint_t*)str_buf;
634989857Sobrien		    if (d == dend)
635089857Sobrien		      {
635189857Sobrien			if (dend == end_match_2)
635289857Sobrien			  break;
635389857Sobrien			d = string2;
635489857Sobrien			dend = end_match_2;
635589857Sobrien		      }
635689857Sobrien		    str_buf[i] = TRANSLATE(*(d+i));
635789857Sobrien		    str_buf[i+1] = '\0'; /* sentinel */
635889857Sobrien		    idx2 = findidx ((const wint_t**)&cp);
635989857Sobrien		  }
636089857Sobrien
636189857Sobrien		/* Update d, however d will be incremented at
636289857Sobrien		   char_set_matched:, we decrement d here.  */
636389857Sobrien		d = backup_d + ((wchar_t*)cp - (wchar_t*)str_buf - 1);
636489857Sobrien		if (d >= dend)
636589857Sobrien		  {
636689857Sobrien		    if (dend == end_match_2)
636789857Sobrien			d = dend;
636889857Sobrien		    else
636989857Sobrien		      {
637089857Sobrien			d = string2;
637189857Sobrien			dend = end_match_2;
637289857Sobrien		      }
637389857Sobrien		  }
637489857Sobrien
637589857Sobrien		len = weights[idx2];
637689857Sobrien
637789857Sobrien		for (workp2 = workp + equiv_class_length ; workp < workp2 ;
637889857Sobrien		     workp++)
637989857Sobrien		  {
638089857Sobrien		    idx = (int32_t)*workp;
638189857Sobrien		    /* We already checked idx != 0 in regex_compile. */
638289857Sobrien
638389857Sobrien		    if (idx2 != 0 && len == weights[idx])
638489857Sobrien		      {
638589857Sobrien			int cnt = 0;
638689857Sobrien			while (cnt < len && (weights[idx + 1 + cnt]
638789857Sobrien					     == weights[idx2 + 1 + cnt]))
638889857Sobrien			  ++cnt;
638989857Sobrien
639089857Sobrien			if (cnt == len)
639189857Sobrien			  goto char_set_matched;
639289857Sobrien		      }
639389857Sobrien		  }
639489857Sobrien		/* not matched */
639589857Sobrien                d = backup_d;
639689857Sobrien                dend = backup_dend;
639789857Sobrien	      }
639889857Sobrien	    else /* (nrules == 0) */
639989857Sobrien# endif
640089857Sobrien	      /* If we can't look up collation data, we use wcscoll
640189857Sobrien		 instead.  */
640289857Sobrien	      {
640389857Sobrien		for (workp2 = workp + equiv_class_length ; workp < workp2 ;)
640489857Sobrien		  {
640589857Sobrien		    const CHAR_T *backup_d = d, *backup_dend = dend;
640689857Sobrien# ifdef _LIBC
640789857Sobrien		    length = __wcslen (workp);
640889857Sobrien# else
640989857Sobrien		    length = wcslen (workp);
641089857Sobrien# endif
641189857Sobrien
641289857Sobrien		    /* If wcscoll(the collating symbol, whole string) > 0,
641389857Sobrien		       any substring of the string never match with the
641489857Sobrien		       collating symbol.  */
641589857Sobrien# ifdef _LIBC
641689857Sobrien		    if (__wcscoll (workp, d) > 0)
641789857Sobrien# else
641889857Sobrien		    if (wcscoll (workp, d) > 0)
641989857Sobrien# endif
642089857Sobrien		      {
642189857Sobrien			workp += length + 1;
642289857Sobrien			break;
642389857Sobrien		      }
642489857Sobrien
642589857Sobrien		    /* First, we compare the equivalence class with
642689857Sobrien		       the first character of the string.
642789857Sobrien		       If it don't match, we add the next character to
642889857Sobrien		       the compare buffer in turn.  */
642989857Sobrien		    for (i = 0 ; i < WORK_BUFFER_SIZE - 1 ; i++, d++)
643089857Sobrien		      {
643189857Sobrien			int match;
643289857Sobrien			if (d == dend)
643389857Sobrien			  {
643489857Sobrien			    if (dend == end_match_2)
643589857Sobrien			      break;
643689857Sobrien			    d = string2;
643789857Sobrien			    dend = end_match_2;
643889857Sobrien			  }
643989857Sobrien
644089857Sobrien			/* add next character to the compare buffer.  */
644189857Sobrien			str_buf[i] = TRANSLATE(*d);
644289857Sobrien			str_buf[i+1] = '\0';
644389857Sobrien
644489857Sobrien# ifdef _LIBC
644589857Sobrien			match = __wcscoll (workp, str_buf);
644689857Sobrien# else
644789857Sobrien			match = wcscoll (workp, str_buf);
644889857Sobrien# endif
644989857Sobrien
645089857Sobrien			if (match == 0)
645189857Sobrien			  goto char_set_matched;
645289857Sobrien
645389857Sobrien			if (match < 0)
645489857Sobrien			/* (str_buf > workp) indicate (str_buf + X > workp),
645589857Sobrien			   because for all X (str_buf + X > str_buf).
645689857Sobrien			   So we don't need continue this loop.  */
645789857Sobrien			  break;
645889857Sobrien
645989857Sobrien			/* Otherwise(str_buf < workp),
646089857Sobrien			   (str_buf+next_character) may equals (workp).
646189857Sobrien			   So we continue this loop.  */
646289857Sobrien		      }
646389857Sobrien		    /* not matched */
646489857Sobrien		    d = backup_d;
646589857Sobrien		    dend = backup_dend;
646689857Sobrien		    workp += length + 1;
646789857Sobrien		  }
646889857Sobrien	      }
646989857Sobrien
647089857Sobrien            /* match with char_range?  */
647189857Sobrien# ifdef _LIBC
647289857Sobrien	    if (nrules != 0)
647389857Sobrien	      {
647489857Sobrien		uint32_t collseqval;
647589857Sobrien		const char *collseq = (const char *)
647689857Sobrien		  _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQWC);
647789857Sobrien
647889857Sobrien		collseqval = collseq_table_lookup (collseq, c);
647989857Sobrien
648089857Sobrien		for (; workp < p - chars_length ;)
648189857Sobrien		  {
648289857Sobrien		    uint32_t start_val, end_val;
648389857Sobrien
648489857Sobrien		    /* We already compute the collation sequence value
648589857Sobrien		       of the characters (or collating symbols).  */
648689857Sobrien		    start_val = (uint32_t) *workp++; /* range_start */
648789857Sobrien		    end_val = (uint32_t) *workp++; /* range_end */
648889857Sobrien
648989857Sobrien		    if (start_val <= collseqval && collseqval <= end_val)
649089857Sobrien		      goto char_set_matched;
649189857Sobrien		  }
649289857Sobrien	      }
649389857Sobrien	    else
649489857Sobrien# endif
649589857Sobrien	      {
649689857Sobrien		/* We set range_start_char at str_buf[0], range_end_char
649789857Sobrien		   at str_buf[4], and compared char at str_buf[2].  */
649889857Sobrien		str_buf[1] = 0;
649989857Sobrien		str_buf[2] = c;
650089857Sobrien		str_buf[3] = 0;
650189857Sobrien		str_buf[5] = 0;
650289857Sobrien		for (; workp < p - chars_length ;)
650389857Sobrien		  {
650489857Sobrien		    wchar_t *range_start_char, *range_end_char;
650589857Sobrien
650689857Sobrien		    /* match if (range_start_char <= c <= range_end_char).  */
650789857Sobrien
650889857Sobrien		    /* If range_start(or end) < 0, we assume -range_start(end)
650989857Sobrien		       is the offset of the collating symbol which is specified
651089857Sobrien		       as the character of the range start(end).  */
651189857Sobrien
651289857Sobrien		    /* range_start */
651389857Sobrien		    if (*workp < 0)
651489857Sobrien		      range_start_char = charset_top - (*workp++);
651589857Sobrien		    else
651689857Sobrien		      {
651789857Sobrien			str_buf[0] = *workp++;
651889857Sobrien			range_start_char = str_buf;
651989857Sobrien		      }
652089857Sobrien
652189857Sobrien		    /* range_end */
652289857Sobrien		    if (*workp < 0)
652389857Sobrien		      range_end_char = charset_top - (*workp++);
652489857Sobrien		    else
652589857Sobrien		      {
652689857Sobrien			str_buf[4] = *workp++;
652789857Sobrien			range_end_char = str_buf + 4;
652889857Sobrien		      }
652989857Sobrien
653089857Sobrien# ifdef _LIBC
653189857Sobrien		    if (__wcscoll (range_start_char, str_buf+2) <= 0
653289857Sobrien			&& __wcscoll (str_buf+2, range_end_char) <= 0)
653389857Sobrien# else
653489857Sobrien		    if (wcscoll (range_start_char, str_buf+2) <= 0
653589857Sobrien			&& wcscoll (str_buf+2, range_end_char) <= 0)
653689857Sobrien# endif
653789857Sobrien		      goto char_set_matched;
653889857Sobrien		  }
653989857Sobrien	      }
654089857Sobrien
654189857Sobrien            /* match with char?  */
654289857Sobrien	    for (; workp < p ; workp++)
654389857Sobrien	      if (c == *workp)
654489857Sobrien		goto char_set_matched;
654589857Sobrien
6546218822Sdim	    negate = !negate;
654789857Sobrien
654889857Sobrien	  char_set_matched:
6549218822Sdim	    if (negate) goto fail;
655089857Sobrien#else
655189857Sobrien            /* Cast to `unsigned' instead of `unsigned char' in case the
655289857Sobrien               bit list is a full 32 bytes long.  */
655389857Sobrien	    if (c < (unsigned) (*p * BYTEWIDTH)
655489857Sobrien		&& p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
6555218822Sdim	      negate = !negate;
655689857Sobrien
655789857Sobrien	    p += 1 + *p;
655889857Sobrien
6559218822Sdim	    if (!negate) goto fail;
656089857Sobrien#undef WORK_BUFFER_SIZE
656189857Sobrien#endif /* WCHAR */
656289857Sobrien	    SET_REGS_MATCHED ();
656389857Sobrien            d++;
656489857Sobrien	    break;
656589857Sobrien	  }
656689857Sobrien
656789857Sobrien
656889857Sobrien        /* The beginning of a group is represented by start_memory.
656989857Sobrien           The arguments are the register number in the next byte, and the
657089857Sobrien           number of groups inner to this one in the next.  The text
657189857Sobrien           matched within the group is recorded (in the internal
657289857Sobrien           registers data structure) under the register number.  */
657389857Sobrien        case start_memory:
657489857Sobrien	  DEBUG_PRINT3 ("EXECUTING start_memory %ld (%ld):\n",
657589857Sobrien			(long int) *p, (long int) p[1]);
657689857Sobrien
657789857Sobrien          /* Find out if this group can match the empty string.  */
657889857Sobrien	  p1 = p;		/* To send to group_match_null_string_p.  */
657989857Sobrien
658089857Sobrien          if (REG_MATCH_NULL_STRING_P (reg_info[*p]) == MATCH_NULL_UNSET_VALUE)
658189857Sobrien            REG_MATCH_NULL_STRING_P (reg_info[*p])
658289857Sobrien              = PREFIX(group_match_null_string_p) (&p1, pend, reg_info);
658389857Sobrien
658489857Sobrien          /* Save the position in the string where we were the last time
658589857Sobrien             we were at this open-group operator in case the group is
658689857Sobrien             operated upon by a repetition operator, e.g., with `(a*)*b'
658789857Sobrien             against `ab'; then we want to ignore where we are now in
658889857Sobrien             the string in case this attempt to match fails.  */
658989857Sobrien          old_regstart[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p])
659089857Sobrien                             ? REG_UNSET (regstart[*p]) ? d : regstart[*p]
659189857Sobrien                             : regstart[*p];
659289857Sobrien	  DEBUG_PRINT2 ("  old_regstart: %d\n",
659389857Sobrien			 POINTER_TO_OFFSET (old_regstart[*p]));
659489857Sobrien
659589857Sobrien          regstart[*p] = d;
659689857Sobrien	  DEBUG_PRINT2 ("  regstart: %d\n", POINTER_TO_OFFSET (regstart[*p]));
659789857Sobrien
659889857Sobrien          IS_ACTIVE (reg_info[*p]) = 1;
659989857Sobrien          MATCHED_SOMETHING (reg_info[*p]) = 0;
660089857Sobrien
660189857Sobrien	  /* Clear this whenever we change the register activity status.  */
660289857Sobrien	  set_regs_matched_done = 0;
660389857Sobrien
660489857Sobrien          /* This is the new highest active register.  */
660589857Sobrien          highest_active_reg = *p;
660689857Sobrien
660789857Sobrien          /* If nothing was active before, this is the new lowest active
660889857Sobrien             register.  */
660989857Sobrien          if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
661089857Sobrien            lowest_active_reg = *p;
661189857Sobrien
661289857Sobrien          /* Move past the register number and inner group count.  */
661389857Sobrien          p += 2;
661489857Sobrien	  just_past_start_mem = p;
661589857Sobrien
661689857Sobrien          break;
661789857Sobrien
661889857Sobrien
661989857Sobrien        /* The stop_memory opcode represents the end of a group.  Its
662089857Sobrien           arguments are the same as start_memory's: the register
662189857Sobrien           number, and the number of inner groups.  */
662289857Sobrien	case stop_memory:
662389857Sobrien	  DEBUG_PRINT3 ("EXECUTING stop_memory %ld (%ld):\n",
662489857Sobrien			(long int) *p, (long int) p[1]);
662589857Sobrien
662689857Sobrien          /* We need to save the string position the last time we were at
662789857Sobrien             this close-group operator in case the group is operated
662889857Sobrien             upon by a repetition operator, e.g., with `((a*)*(b*)*)*'
662989857Sobrien             against `aba'; then we want to ignore where we are now in
663089857Sobrien             the string in case this attempt to match fails.  */
663189857Sobrien          old_regend[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p])
663289857Sobrien                           ? REG_UNSET (regend[*p]) ? d : regend[*p]
663389857Sobrien			   : regend[*p];
663489857Sobrien	  DEBUG_PRINT2 ("      old_regend: %d\n",
663589857Sobrien			 POINTER_TO_OFFSET (old_regend[*p]));
663689857Sobrien
663789857Sobrien          regend[*p] = d;
663889857Sobrien	  DEBUG_PRINT2 ("      regend: %d\n", POINTER_TO_OFFSET (regend[*p]));
663989857Sobrien
664089857Sobrien          /* This register isn't active anymore.  */
664189857Sobrien          IS_ACTIVE (reg_info[*p]) = 0;
664289857Sobrien
664389857Sobrien	  /* Clear this whenever we change the register activity status.  */
664489857Sobrien	  set_regs_matched_done = 0;
664589857Sobrien
664689857Sobrien          /* If this was the only register active, nothing is active
664789857Sobrien             anymore.  */
664889857Sobrien          if (lowest_active_reg == highest_active_reg)
664989857Sobrien            {
665089857Sobrien              lowest_active_reg = NO_LOWEST_ACTIVE_REG;
665189857Sobrien              highest_active_reg = NO_HIGHEST_ACTIVE_REG;
665289857Sobrien            }
665389857Sobrien          else
665489857Sobrien            { /* We must scan for the new highest active register, since
665589857Sobrien                 it isn't necessarily one less than now: consider
665689857Sobrien                 (a(b)c(d(e)f)g).  When group 3 ends, after the f), the
665789857Sobrien                 new highest active register is 1.  */
665889857Sobrien              UCHAR_T r = *p - 1;
665989857Sobrien              while (r > 0 && !IS_ACTIVE (reg_info[r]))
666089857Sobrien                r--;
666189857Sobrien
666289857Sobrien              /* If we end up at register zero, that means that we saved
666389857Sobrien                 the registers as the result of an `on_failure_jump', not
666489857Sobrien                 a `start_memory', and we jumped to past the innermost
666589857Sobrien                 `stop_memory'.  For example, in ((.)*) we save
666689857Sobrien                 registers 1 and 2 as a result of the *, but when we pop
666789857Sobrien                 back to the second ), we are at the stop_memory 1.
666889857Sobrien                 Thus, nothing is active.  */
666989857Sobrien	      if (r == 0)
667089857Sobrien                {
667189857Sobrien                  lowest_active_reg = NO_LOWEST_ACTIVE_REG;
667289857Sobrien                  highest_active_reg = NO_HIGHEST_ACTIVE_REG;
667389857Sobrien                }
667489857Sobrien              else
667589857Sobrien                highest_active_reg = r;
667689857Sobrien            }
667789857Sobrien
667889857Sobrien          /* If just failed to match something this time around with a
667989857Sobrien             group that's operated on by a repetition operator, try to
668089857Sobrien             force exit from the ``loop'', and restore the register
668189857Sobrien             information for this group that we had before trying this
668289857Sobrien             last match.  */
668389857Sobrien          if ((!MATCHED_SOMETHING (reg_info[*p])
668489857Sobrien               || just_past_start_mem == p - 1)
668589857Sobrien	      && (p + 2) < pend)
668689857Sobrien            {
668789857Sobrien              boolean is_a_jump_n = false;
668889857Sobrien
668989857Sobrien              p1 = p + 2;
669089857Sobrien              mcnt = 0;
669189857Sobrien              switch ((re_opcode_t) *p1++)
669289857Sobrien                {
669389857Sobrien                  case jump_n:
669489857Sobrien		    is_a_jump_n = true;
669589857Sobrien                  case pop_failure_jump:
669689857Sobrien		  case maybe_pop_jump:
669789857Sobrien		  case jump:
669889857Sobrien		  case dummy_failure_jump:
669989857Sobrien                    EXTRACT_NUMBER_AND_INCR (mcnt, p1);
670089857Sobrien		    if (is_a_jump_n)
670189857Sobrien		      p1 += OFFSET_ADDRESS_SIZE;
670289857Sobrien                    break;
670389857Sobrien
670489857Sobrien                  default:
670589857Sobrien                    /* do nothing */ ;
670689857Sobrien                }
670789857Sobrien	      p1 += mcnt;
670889857Sobrien
670989857Sobrien              /* If the next operation is a jump backwards in the pattern
671089857Sobrien	         to an on_failure_jump right before the start_memory
671189857Sobrien                 corresponding to this stop_memory, exit from the loop
671289857Sobrien                 by forcing a failure after pushing on the stack the
671389857Sobrien                 on_failure_jump's jump in the pattern, and d.  */
671489857Sobrien              if (mcnt < 0 && (re_opcode_t) *p1 == on_failure_jump
671589857Sobrien                  && (re_opcode_t) p1[1+OFFSET_ADDRESS_SIZE] == start_memory
671689857Sobrien		  && p1[2+OFFSET_ADDRESS_SIZE] == *p)
671789857Sobrien		{
671889857Sobrien                  /* If this group ever matched anything, then restore
671989857Sobrien                     what its registers were before trying this last
672089857Sobrien                     failed match, e.g., with `(a*)*b' against `ab' for
672189857Sobrien                     regstart[1], and, e.g., with `((a*)*(b*)*)*'
672289857Sobrien                     against `aba' for regend[3].
672389857Sobrien
672489857Sobrien                     Also restore the registers for inner groups for,
672589857Sobrien                     e.g., `((a*)(b*))*' against `aba' (register 3 would
672689857Sobrien                     otherwise get trashed).  */
672789857Sobrien
672889857Sobrien                  if (EVER_MATCHED_SOMETHING (reg_info[*p]))
672989857Sobrien		    {
673089857Sobrien		      unsigned r;
673189857Sobrien
673289857Sobrien                      EVER_MATCHED_SOMETHING (reg_info[*p]) = 0;
673389857Sobrien
673489857Sobrien		      /* Restore this and inner groups' (if any) registers.  */
673589857Sobrien                      for (r = *p; r < (unsigned) *p + (unsigned) *(p + 1);
673689857Sobrien			   r++)
673789857Sobrien                        {
673889857Sobrien                          regstart[r] = old_regstart[r];
673989857Sobrien
674089857Sobrien                          /* xx why this test?  */
674189857Sobrien                          if (old_regend[r] >= regstart[r])
674289857Sobrien                            regend[r] = old_regend[r];
674389857Sobrien                        }
674489857Sobrien                    }
674589857Sobrien		  p1++;
674689857Sobrien                  EXTRACT_NUMBER_AND_INCR (mcnt, p1);
674789857Sobrien                  PUSH_FAILURE_POINT (p1 + mcnt, d, -2);
674889857Sobrien
674989857Sobrien                  goto fail;
675089857Sobrien                }
675189857Sobrien            }
675289857Sobrien
675389857Sobrien          /* Move past the register number and the inner group count.  */
675489857Sobrien          p += 2;
675589857Sobrien          break;
675689857Sobrien
675789857Sobrien
675889857Sobrien	/* \<digit> has been turned into a `duplicate' command which is
675989857Sobrien           followed by the numeric value of <digit> as the register number.  */
676089857Sobrien        case duplicate:
676189857Sobrien	  {
676289857Sobrien	    register const CHAR_T *d2, *dend2;
676389857Sobrien	    int regno = *p++;   /* Get which register to match against.  */
676489857Sobrien	    DEBUG_PRINT2 ("EXECUTING duplicate %d.\n", regno);
676589857Sobrien
676689857Sobrien	    /* Can't back reference a group which we've never matched.  */
676789857Sobrien            if (REG_UNSET (regstart[regno]) || REG_UNSET (regend[regno]))
676889857Sobrien              goto fail;
676989857Sobrien
677089857Sobrien            /* Where in input to try to start matching.  */
677189857Sobrien            d2 = regstart[regno];
677289857Sobrien
677389857Sobrien            /* Where to stop matching; if both the place to start and
677489857Sobrien               the place to stop matching are in the same string, then
677589857Sobrien               set to the place to stop, otherwise, for now have to use
677689857Sobrien               the end of the first string.  */
677789857Sobrien
677889857Sobrien            dend2 = ((FIRST_STRING_P (regstart[regno])
677989857Sobrien		      == FIRST_STRING_P (regend[regno]))
678089857Sobrien		     ? regend[regno] : end_match_1);
678189857Sobrien	    for (;;)
678289857Sobrien	      {
678389857Sobrien		/* If necessary, advance to next segment in register
678489857Sobrien                   contents.  */
678589857Sobrien		while (d2 == dend2)
678689857Sobrien		  {
678789857Sobrien		    if (dend2 == end_match_2) break;
678889857Sobrien		    if (dend2 == regend[regno]) break;
678989857Sobrien
679089857Sobrien                    /* End of string1 => advance to string2. */
679189857Sobrien                    d2 = string2;
679289857Sobrien                    dend2 = regend[regno];
679389857Sobrien		  }
679489857Sobrien		/* At end of register contents => success */
679589857Sobrien		if (d2 == dend2) break;
679689857Sobrien
679789857Sobrien		/* If necessary, advance to next segment in data.  */
679889857Sobrien		PREFETCH ();
679989857Sobrien
680089857Sobrien		/* How many characters left in this segment to match.  */
680189857Sobrien		mcnt = dend - d;
680289857Sobrien
680389857Sobrien		/* Want how many consecutive characters we can match in
680489857Sobrien                   one shot, so, if necessary, adjust the count.  */
680589857Sobrien                if (mcnt > dend2 - d2)
680689857Sobrien		  mcnt = dend2 - d2;
680789857Sobrien
680889857Sobrien		/* Compare that many; failure if mismatch, else move
680989857Sobrien                   past them.  */
681089857Sobrien		if (translate
681189857Sobrien                    ? PREFIX(bcmp_translate) (d, d2, mcnt, translate)
681289857Sobrien                    : memcmp (d, d2, mcnt*sizeof(UCHAR_T)))
681389857Sobrien		  goto fail;
681489857Sobrien		d += mcnt, d2 += mcnt;
681589857Sobrien
681689857Sobrien		/* Do this because we've match some characters.  */
681789857Sobrien		SET_REGS_MATCHED ();
681889857Sobrien	      }
681989857Sobrien	  }
682089857Sobrien	  break;
682189857Sobrien
682289857Sobrien
682389857Sobrien        /* begline matches the empty string at the beginning of the string
682489857Sobrien           (unless `not_bol' is set in `bufp'), and, if
682589857Sobrien           `newline_anchor' is set, after newlines.  */
682689857Sobrien	case begline:
682789857Sobrien          DEBUG_PRINT1 ("EXECUTING begline.\n");
682889857Sobrien
682989857Sobrien          if (AT_STRINGS_BEG (d))
683089857Sobrien            {
683189857Sobrien              if (!bufp->not_bol) break;
683289857Sobrien            }
683389857Sobrien          else if (d[-1] == '\n' && bufp->newline_anchor)
683489857Sobrien            {
683589857Sobrien              break;
683689857Sobrien            }
683789857Sobrien          /* In all other cases, we fail.  */
683889857Sobrien          goto fail;
683989857Sobrien
684089857Sobrien
684189857Sobrien        /* endline is the dual of begline.  */
684289857Sobrien	case endline:
684389857Sobrien          DEBUG_PRINT1 ("EXECUTING endline.\n");
684489857Sobrien
684589857Sobrien          if (AT_STRINGS_END (d))
684689857Sobrien            {
684789857Sobrien              if (!bufp->not_eol) break;
684889857Sobrien            }
684989857Sobrien
685089857Sobrien          /* We have to ``prefetch'' the next character.  */
685189857Sobrien          else if ((d == end1 ? *string2 : *d) == '\n'
685289857Sobrien                   && bufp->newline_anchor)
685389857Sobrien            {
685489857Sobrien              break;
685589857Sobrien            }
685689857Sobrien          goto fail;
685789857Sobrien
685889857Sobrien
685989857Sobrien	/* Match at the very beginning of the data.  */
686089857Sobrien        case begbuf:
686189857Sobrien          DEBUG_PRINT1 ("EXECUTING begbuf.\n");
686289857Sobrien          if (AT_STRINGS_BEG (d))
686389857Sobrien            break;
686489857Sobrien          goto fail;
686589857Sobrien
686689857Sobrien
686789857Sobrien	/* Match at the very end of the data.  */
686889857Sobrien        case endbuf:
686989857Sobrien          DEBUG_PRINT1 ("EXECUTING endbuf.\n");
687089857Sobrien	  if (AT_STRINGS_END (d))
687189857Sobrien	    break;
687289857Sobrien          goto fail;
687389857Sobrien
687489857Sobrien
687589857Sobrien        /* on_failure_keep_string_jump is used to optimize `.*\n'.  It
687689857Sobrien           pushes NULL as the value for the string on the stack.  Then
687789857Sobrien           `pop_failure_point' will keep the current value for the
687889857Sobrien           string, instead of restoring it.  To see why, consider
687989857Sobrien           matching `foo\nbar' against `.*\n'.  The .* matches the foo;
688089857Sobrien           then the . fails against the \n.  But the next thing we want
688189857Sobrien           to do is match the \n against the \n; if we restored the
688289857Sobrien           string value, we would be back at the foo.
688389857Sobrien
688489857Sobrien           Because this is used only in specific cases, we don't need to
688589857Sobrien           check all the things that `on_failure_jump' does, to make
688689857Sobrien           sure the right things get saved on the stack.  Hence we don't
688789857Sobrien           share its code.  The only reason to push anything on the
688889857Sobrien           stack at all is that otherwise we would have to change
688989857Sobrien           `anychar's code to do something besides goto fail in this
689089857Sobrien           case; that seems worse than this.  */
689189857Sobrien        case on_failure_keep_string_jump:
689289857Sobrien          DEBUG_PRINT1 ("EXECUTING on_failure_keep_string_jump");
689389857Sobrien
689489857Sobrien          EXTRACT_NUMBER_AND_INCR (mcnt, p);
689589857Sobrien#ifdef _LIBC
689689857Sobrien          DEBUG_PRINT3 (" %d (to %p):\n", mcnt, p + mcnt);
689789857Sobrien#else
689889857Sobrien          DEBUG_PRINT3 (" %d (to 0x%x):\n", mcnt, p + mcnt);
689989857Sobrien#endif
690089857Sobrien
690189857Sobrien          PUSH_FAILURE_POINT (p + mcnt, NULL, -2);
690289857Sobrien          break;
690389857Sobrien
690489857Sobrien
690589857Sobrien	/* Uses of on_failure_jump:
690689857Sobrien
690789857Sobrien           Each alternative starts with an on_failure_jump that points
690889857Sobrien           to the beginning of the next alternative.  Each alternative
690989857Sobrien           except the last ends with a jump that in effect jumps past
691089857Sobrien           the rest of the alternatives.  (They really jump to the
691189857Sobrien           ending jump of the following alternative, because tensioning
691289857Sobrien           these jumps is a hassle.)
691389857Sobrien
691489857Sobrien           Repeats start with an on_failure_jump that points past both
691589857Sobrien           the repetition text and either the following jump or
691689857Sobrien           pop_failure_jump back to this on_failure_jump.  */
691789857Sobrien	case on_failure_jump:
691889857Sobrien        on_failure:
691989857Sobrien          DEBUG_PRINT1 ("EXECUTING on_failure_jump");
692089857Sobrien
692189857Sobrien          EXTRACT_NUMBER_AND_INCR (mcnt, p);
692289857Sobrien#ifdef _LIBC
692389857Sobrien          DEBUG_PRINT3 (" %d (to %p)", mcnt, p + mcnt);
692489857Sobrien#else
692589857Sobrien          DEBUG_PRINT3 (" %d (to 0x%x)", mcnt, p + mcnt);
692689857Sobrien#endif
692789857Sobrien
692889857Sobrien          /* If this on_failure_jump comes right before a group (i.e.,
692989857Sobrien             the original * applied to a group), save the information
693089857Sobrien             for that group and all inner ones, so that if we fail back
693189857Sobrien             to this point, the group's information will be correct.
693289857Sobrien             For example, in \(a*\)*\1, we need the preceding group,
693389857Sobrien             and in \(zz\(a*\)b*\)\2, we need the inner group.  */
693489857Sobrien
693589857Sobrien          /* We can't use `p' to check ahead because we push
693689857Sobrien             a failure point to `p + mcnt' after we do this.  */
693789857Sobrien          p1 = p;
693889857Sobrien
693989857Sobrien          /* We need to skip no_op's before we look for the
694089857Sobrien             start_memory in case this on_failure_jump is happening as
694189857Sobrien             the result of a completed succeed_n, as in \(a\)\{1,3\}b\1
694289857Sobrien             against aba.  */
694389857Sobrien          while (p1 < pend && (re_opcode_t) *p1 == no_op)
694489857Sobrien            p1++;
694589857Sobrien
694689857Sobrien          if (p1 < pend && (re_opcode_t) *p1 == start_memory)
694789857Sobrien            {
694889857Sobrien              /* We have a new highest active register now.  This will
694989857Sobrien                 get reset at the start_memory we are about to get to,
695089857Sobrien                 but we will have saved all the registers relevant to
695189857Sobrien                 this repetition op, as described above.  */
695289857Sobrien              highest_active_reg = *(p1 + 1) + *(p1 + 2);
695389857Sobrien              if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
695489857Sobrien                lowest_active_reg = *(p1 + 1);
695589857Sobrien            }
695689857Sobrien
695789857Sobrien          DEBUG_PRINT1 (":\n");
695889857Sobrien          PUSH_FAILURE_POINT (p + mcnt, d, -2);
695989857Sobrien          break;
696089857Sobrien
696189857Sobrien
696289857Sobrien        /* A smart repeat ends with `maybe_pop_jump'.
696389857Sobrien	   We change it to either `pop_failure_jump' or `jump'.  */
696489857Sobrien        case maybe_pop_jump:
696589857Sobrien          EXTRACT_NUMBER_AND_INCR (mcnt, p);
696689857Sobrien          DEBUG_PRINT2 ("EXECUTING maybe_pop_jump %d.\n", mcnt);
696789857Sobrien          {
696889857Sobrien	    register UCHAR_T *p2 = p;
696989857Sobrien
697089857Sobrien            /* Compare the beginning of the repeat with what in the
697189857Sobrien               pattern follows its end. If we can establish that there
697289857Sobrien               is nothing that they would both match, i.e., that we
697389857Sobrien               would have to backtrack because of (as in, e.g., `a*a')
697489857Sobrien               then we can change to pop_failure_jump, because we'll
697589857Sobrien               never have to backtrack.
697689857Sobrien
697789857Sobrien               This is not true in the case of alternatives: in
697889857Sobrien               `(a|ab)*' we do need to backtrack to the `ab' alternative
697989857Sobrien               (e.g., if the string was `ab').  But instead of trying to
698089857Sobrien               detect that here, the alternative has put on a dummy
698189857Sobrien               failure point which is what we will end up popping.  */
698289857Sobrien
698389857Sobrien	    /* Skip over open/close-group commands.
698489857Sobrien	       If what follows this loop is a ...+ construct,
698589857Sobrien	       look at what begins its body, since we will have to
698689857Sobrien	       match at least one of that.  */
698789857Sobrien	    while (1)
698889857Sobrien	      {
698989857Sobrien		if (p2 + 2 < pend
699089857Sobrien		    && ((re_opcode_t) *p2 == stop_memory
699189857Sobrien			|| (re_opcode_t) *p2 == start_memory))
699289857Sobrien		  p2 += 3;
699389857Sobrien		else if (p2 + 2 + 2 * OFFSET_ADDRESS_SIZE < pend
699489857Sobrien			 && (re_opcode_t) *p2 == dummy_failure_jump)
699589857Sobrien		  p2 += 2 + 2 * OFFSET_ADDRESS_SIZE;
699689857Sobrien		else
699789857Sobrien		  break;
699889857Sobrien	      }
699989857Sobrien
700089857Sobrien	    p1 = p + mcnt;
700189857Sobrien	    /* p1[0] ... p1[2] are the `on_failure_jump' corresponding
700289857Sobrien	       to the `maybe_finalize_jump' of this case.  Examine what
700389857Sobrien	       follows.  */
700489857Sobrien
700589857Sobrien            /* If we're at the end of the pattern, we can change.  */
700689857Sobrien            if (p2 == pend)
700789857Sobrien	      {
700889857Sobrien		/* Consider what happens when matching ":\(.*\)"
700989857Sobrien		   against ":/".  I don't really understand this code
701089857Sobrien		   yet.  */
701189857Sobrien  	        p[-(1+OFFSET_ADDRESS_SIZE)] = (UCHAR_T)
701289857Sobrien		  pop_failure_jump;
701389857Sobrien                DEBUG_PRINT1
701489857Sobrien                  ("  End of pattern: change to `pop_failure_jump'.\n");
701589857Sobrien              }
701689857Sobrien
701789857Sobrien            else if ((re_opcode_t) *p2 == exactn
701889857Sobrien#ifdef MBS_SUPPORT
701989857Sobrien		     || (re_opcode_t) *p2 == exactn_bin
702089857Sobrien#endif
702189857Sobrien		     || (bufp->newline_anchor && (re_opcode_t) *p2 == endline))
702289857Sobrien	      {
702389857Sobrien		register UCHAR_T c
702489857Sobrien                  = *p2 == (UCHAR_T) endline ? '\n' : p2[2];
702589857Sobrien
702689857Sobrien                if (((re_opcode_t) p1[1+OFFSET_ADDRESS_SIZE] == exactn
702789857Sobrien#ifdef MBS_SUPPORT
702889857Sobrien		     || (re_opcode_t) p1[1+OFFSET_ADDRESS_SIZE] == exactn_bin
702989857Sobrien#endif
703089857Sobrien		    ) && p1[3+OFFSET_ADDRESS_SIZE] != c)
703189857Sobrien                  {
703289857Sobrien  		    p[-(1+OFFSET_ADDRESS_SIZE)] = (UCHAR_T)
703389857Sobrien		      pop_failure_jump;
703489857Sobrien#ifdef WCHAR
703589857Sobrien		      DEBUG_PRINT3 ("  %C != %C => pop_failure_jump.\n",
703689857Sobrien				    (wint_t) c,
703789857Sobrien				    (wint_t) p1[3+OFFSET_ADDRESS_SIZE]);
703889857Sobrien#else
703989857Sobrien		      DEBUG_PRINT3 ("  %c != %c => pop_failure_jump.\n",
704089857Sobrien				    (char) c,
704189857Sobrien				    (char) p1[3+OFFSET_ADDRESS_SIZE]);
704289857Sobrien#endif
704389857Sobrien                  }
704489857Sobrien
704589857Sobrien#ifndef WCHAR
704689857Sobrien		else if ((re_opcode_t) p1[3] == charset
704789857Sobrien			 || (re_opcode_t) p1[3] == charset_not)
704889857Sobrien		  {
7049218822Sdim		    int negate = (re_opcode_t) p1[3] == charset_not;
705089857Sobrien
705189857Sobrien		    if (c < (unsigned) (p1[4] * BYTEWIDTH)
705289857Sobrien			&& p1[5 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
7053218822Sdim		      negate = !negate;
705489857Sobrien
7055218822Sdim                    /* `negate' is equal to 1 if c would match, which means
705689857Sobrien                        that we can't change to pop_failure_jump.  */
7057218822Sdim		    if (!negate)
705889857Sobrien                      {
705989857Sobrien  		        p[-3] = (unsigned char) pop_failure_jump;
706089857Sobrien                        DEBUG_PRINT1 ("  No match => pop_failure_jump.\n");
706189857Sobrien                      }
706289857Sobrien		  }
706389857Sobrien#endif /* not WCHAR */
706489857Sobrien	      }
706589857Sobrien#ifndef WCHAR
706689857Sobrien            else if ((re_opcode_t) *p2 == charset)
706789857Sobrien	      {
706889857Sobrien		/* We win if the first character of the loop is not part
706989857Sobrien                   of the charset.  */
707089857Sobrien                if ((re_opcode_t) p1[3] == exactn
707189857Sobrien 		    && ! ((int) p2[1] * BYTEWIDTH > (int) p1[5]
707289857Sobrien 			  && (p2[2 + p1[5] / BYTEWIDTH]
707389857Sobrien 			      & (1 << (p1[5] % BYTEWIDTH)))))
707489857Sobrien		  {
707589857Sobrien		    p[-3] = (unsigned char) pop_failure_jump;
707689857Sobrien		    DEBUG_PRINT1 ("  No match => pop_failure_jump.\n");
707789857Sobrien                  }
707889857Sobrien
707989857Sobrien		else if ((re_opcode_t) p1[3] == charset_not)
708089857Sobrien		  {
708189857Sobrien		    int idx;
708289857Sobrien		    /* We win if the charset_not inside the loop
708389857Sobrien		       lists every character listed in the charset after.  */
708489857Sobrien		    for (idx = 0; idx < (int) p2[1]; idx++)
708589857Sobrien		      if (! (p2[2 + idx] == 0
708689857Sobrien			     || (idx < (int) p1[4]
708789857Sobrien				 && ((p2[2 + idx] & ~ p1[5 + idx]) == 0))))
708889857Sobrien			break;
708989857Sobrien
709089857Sobrien		    if (idx == p2[1])
709189857Sobrien                      {
709289857Sobrien  		        p[-3] = (unsigned char) pop_failure_jump;
709389857Sobrien                        DEBUG_PRINT1 ("  No match => pop_failure_jump.\n");
709489857Sobrien                      }
709589857Sobrien		  }
709689857Sobrien		else if ((re_opcode_t) p1[3] == charset)
709789857Sobrien		  {
709889857Sobrien		    int idx;
709989857Sobrien		    /* We win if the charset inside the loop
710089857Sobrien		       has no overlap with the one after the loop.  */
710189857Sobrien		    for (idx = 0;
710289857Sobrien			 idx < (int) p2[1] && idx < (int) p1[4];
710389857Sobrien			 idx++)
710489857Sobrien		      if ((p2[2 + idx] & p1[5 + idx]) != 0)
710589857Sobrien			break;
710689857Sobrien
710789857Sobrien		    if (idx == p2[1] || idx == p1[4])
710889857Sobrien                      {
710989857Sobrien  		        p[-3] = (unsigned char) pop_failure_jump;
711089857Sobrien                        DEBUG_PRINT1 ("  No match => pop_failure_jump.\n");
711189857Sobrien                      }
711289857Sobrien		  }
711389857Sobrien	      }
711489857Sobrien#endif /* not WCHAR */
711589857Sobrien	  }
711689857Sobrien	  p -= OFFSET_ADDRESS_SIZE;	/* Point at relative address again.  */
711789857Sobrien	  if ((re_opcode_t) p[-1] != pop_failure_jump)
711889857Sobrien	    {
711989857Sobrien	      p[-1] = (UCHAR_T) jump;
712089857Sobrien              DEBUG_PRINT1 ("  Match => jump.\n");
712189857Sobrien	      goto unconditional_jump;
712289857Sobrien	    }
712389857Sobrien        /* Note fall through.  */
712489857Sobrien
712589857Sobrien
712689857Sobrien	/* The end of a simple repeat has a pop_failure_jump back to
712789857Sobrien           its matching on_failure_jump, where the latter will push a
712889857Sobrien           failure point.  The pop_failure_jump takes off failure
712989857Sobrien           points put on by this pop_failure_jump's matching
713089857Sobrien           on_failure_jump; we got through the pattern to here from the
713189857Sobrien           matching on_failure_jump, so didn't fail.  */
713289857Sobrien        case pop_failure_jump:
713389857Sobrien          {
713489857Sobrien            /* We need to pass separate storage for the lowest and
713589857Sobrien               highest registers, even though we don't care about the
713689857Sobrien               actual values.  Otherwise, we will restore only one
713789857Sobrien               register from the stack, since lowest will == highest in
713889857Sobrien               `pop_failure_point'.  */
713989857Sobrien            active_reg_t dummy_low_reg, dummy_high_reg;
714089857Sobrien            UCHAR_T *pdummy = NULL;
714189857Sobrien            const CHAR_T *sdummy = NULL;
714289857Sobrien
714389857Sobrien            DEBUG_PRINT1 ("EXECUTING pop_failure_jump.\n");
714489857Sobrien            POP_FAILURE_POINT (sdummy, pdummy,
714589857Sobrien                               dummy_low_reg, dummy_high_reg,
714689857Sobrien                               reg_dummy, reg_dummy, reg_info_dummy);
714789857Sobrien          }
714889857Sobrien	  /* Note fall through.  */
714989857Sobrien
715089857Sobrien	unconditional_jump:
715189857Sobrien#ifdef _LIBC
715289857Sobrien	  DEBUG_PRINT2 ("\n%p: ", p);
715389857Sobrien#else
715489857Sobrien	  DEBUG_PRINT2 ("\n0x%x: ", p);
715589857Sobrien#endif
715689857Sobrien          /* Note fall through.  */
715789857Sobrien
715889857Sobrien        /* Unconditionally jump (without popping any failure points).  */
715989857Sobrien        case jump:
716089857Sobrien	  EXTRACT_NUMBER_AND_INCR (mcnt, p);	/* Get the amount to jump.  */
716189857Sobrien          DEBUG_PRINT2 ("EXECUTING jump %d ", mcnt);
716289857Sobrien	  p += mcnt;				/* Do the jump.  */
716389857Sobrien#ifdef _LIBC
716489857Sobrien          DEBUG_PRINT2 ("(to %p).\n", p);
716589857Sobrien#else
716689857Sobrien          DEBUG_PRINT2 ("(to 0x%x).\n", p);
716789857Sobrien#endif
716889857Sobrien	  break;
716989857Sobrien
717089857Sobrien
717189857Sobrien        /* We need this opcode so we can detect where alternatives end
717289857Sobrien           in `group_match_null_string_p' et al.  */
717389857Sobrien        case jump_past_alt:
717489857Sobrien          DEBUG_PRINT1 ("EXECUTING jump_past_alt.\n");
717589857Sobrien          goto unconditional_jump;
717689857Sobrien
717789857Sobrien
717889857Sobrien        /* Normally, the on_failure_jump pushes a failure point, which
717989857Sobrien           then gets popped at pop_failure_jump.  We will end up at
718089857Sobrien           pop_failure_jump, also, and with a pattern of, say, `a+', we
718189857Sobrien           are skipping over the on_failure_jump, so we have to push
718289857Sobrien           something meaningless for pop_failure_jump to pop.  */
718389857Sobrien        case dummy_failure_jump:
718489857Sobrien          DEBUG_PRINT1 ("EXECUTING dummy_failure_jump.\n");
718589857Sobrien          /* It doesn't matter what we push for the string here.  What
718689857Sobrien             the code at `fail' tests is the value for the pattern.  */
718789857Sobrien          PUSH_FAILURE_POINT (NULL, NULL, -2);
718889857Sobrien          goto unconditional_jump;
718989857Sobrien
719089857Sobrien
719189857Sobrien        /* At the end of an alternative, we need to push a dummy failure
719289857Sobrien           point in case we are followed by a `pop_failure_jump', because
719389857Sobrien           we don't want the failure point for the alternative to be
719489857Sobrien           popped.  For example, matching `(a|ab)*' against `aab'
719589857Sobrien           requires that we match the `ab' alternative.  */
719689857Sobrien        case push_dummy_failure:
719789857Sobrien          DEBUG_PRINT1 ("EXECUTING push_dummy_failure.\n");
719889857Sobrien          /* See comments just above at `dummy_failure_jump' about the
719989857Sobrien             two zeroes.  */
720089857Sobrien          PUSH_FAILURE_POINT (NULL, NULL, -2);
720189857Sobrien          break;
720289857Sobrien
720389857Sobrien        /* Have to succeed matching what follows at least n times.
720489857Sobrien           After that, handle like `on_failure_jump'.  */
720589857Sobrien        case succeed_n:
720689857Sobrien          EXTRACT_NUMBER (mcnt, p + OFFSET_ADDRESS_SIZE);
720789857Sobrien          DEBUG_PRINT2 ("EXECUTING succeed_n %d.\n", mcnt);
720889857Sobrien
720989857Sobrien          assert (mcnt >= 0);
721089857Sobrien          /* Originally, this is how many times we HAVE to succeed.  */
721189857Sobrien          if (mcnt > 0)
721289857Sobrien            {
721389857Sobrien               mcnt--;
721489857Sobrien	       p += OFFSET_ADDRESS_SIZE;
721589857Sobrien               STORE_NUMBER_AND_INCR (p, mcnt);
721689857Sobrien#ifdef _LIBC
721789857Sobrien               DEBUG_PRINT3 ("  Setting %p to %d.\n", p - OFFSET_ADDRESS_SIZE
721889857Sobrien			     , mcnt);
721989857Sobrien#else
722089857Sobrien               DEBUG_PRINT3 ("  Setting 0x%x to %d.\n", p - OFFSET_ADDRESS_SIZE
722189857Sobrien			     , mcnt);
722289857Sobrien#endif
722389857Sobrien            }
722489857Sobrien	  else if (mcnt == 0)
722589857Sobrien            {
722689857Sobrien#ifdef _LIBC
722789857Sobrien              DEBUG_PRINT2 ("  Setting two bytes from %p to no_op.\n",
722889857Sobrien			    p + OFFSET_ADDRESS_SIZE);
722989857Sobrien#else
723089857Sobrien              DEBUG_PRINT2 ("  Setting two bytes from 0x%x to no_op.\n",
723189857Sobrien			    p + OFFSET_ADDRESS_SIZE);
723289857Sobrien#endif /* _LIBC */
723389857Sobrien
723489857Sobrien#ifdef WCHAR
723589857Sobrien	      p[1] = (UCHAR_T) no_op;
723689857Sobrien#else
723789857Sobrien	      p[2] = (UCHAR_T) no_op;
723889857Sobrien              p[3] = (UCHAR_T) no_op;
723989857Sobrien#endif /* WCHAR */
724089857Sobrien              goto on_failure;
724189857Sobrien            }
724289857Sobrien          break;
724389857Sobrien
724489857Sobrien        case jump_n:
724589857Sobrien          EXTRACT_NUMBER (mcnt, p + OFFSET_ADDRESS_SIZE);
724689857Sobrien          DEBUG_PRINT2 ("EXECUTING jump_n %d.\n", mcnt);
724789857Sobrien
724889857Sobrien          /* Originally, this is how many times we CAN jump.  */
724989857Sobrien          if (mcnt)
725089857Sobrien            {
725189857Sobrien               mcnt--;
725289857Sobrien               STORE_NUMBER (p + OFFSET_ADDRESS_SIZE, mcnt);
725389857Sobrien
725489857Sobrien#ifdef _LIBC
725589857Sobrien               DEBUG_PRINT3 ("  Setting %p to %d.\n", p + OFFSET_ADDRESS_SIZE,
725689857Sobrien			     mcnt);
725789857Sobrien#else
725889857Sobrien               DEBUG_PRINT3 ("  Setting 0x%x to %d.\n", p + OFFSET_ADDRESS_SIZE,
725989857Sobrien			     mcnt);
726089857Sobrien#endif /* _LIBC */
726189857Sobrien	       goto unconditional_jump;
726289857Sobrien            }
726389857Sobrien          /* If don't have to jump any more, skip over the rest of command.  */
726489857Sobrien	  else
726589857Sobrien	    p += 2 * OFFSET_ADDRESS_SIZE;
726689857Sobrien          break;
726789857Sobrien
726889857Sobrien	case set_number_at:
726989857Sobrien	  {
727089857Sobrien            DEBUG_PRINT1 ("EXECUTING set_number_at.\n");
727189857Sobrien
727289857Sobrien            EXTRACT_NUMBER_AND_INCR (mcnt, p);
727389857Sobrien            p1 = p + mcnt;
727489857Sobrien            EXTRACT_NUMBER_AND_INCR (mcnt, p);
727589857Sobrien#ifdef _LIBC
727689857Sobrien            DEBUG_PRINT3 ("  Setting %p to %d.\n", p1, mcnt);
727789857Sobrien#else
727889857Sobrien            DEBUG_PRINT3 ("  Setting 0x%x to %d.\n", p1, mcnt);
727989857Sobrien#endif
728089857Sobrien	    STORE_NUMBER (p1, mcnt);
728189857Sobrien            break;
728289857Sobrien          }
728389857Sobrien
728489857Sobrien#if 0
728589857Sobrien	/* The DEC Alpha C compiler 3.x generates incorrect code for the
728689857Sobrien	   test  WORDCHAR_P (d - 1) != WORDCHAR_P (d)  in the expansion of
728789857Sobrien	   AT_WORD_BOUNDARY, so this code is disabled.  Expanding the
728889857Sobrien	   macro and introducing temporary variables works around the bug.  */
728989857Sobrien
729089857Sobrien	case wordbound:
729189857Sobrien	  DEBUG_PRINT1 ("EXECUTING wordbound.\n");
729289857Sobrien	  if (AT_WORD_BOUNDARY (d))
729389857Sobrien	    break;
729489857Sobrien	  goto fail;
729589857Sobrien
729689857Sobrien	case notwordbound:
729789857Sobrien	  DEBUG_PRINT1 ("EXECUTING notwordbound.\n");
729889857Sobrien	  if (AT_WORD_BOUNDARY (d))
729989857Sobrien	    goto fail;
730089857Sobrien	  break;
730189857Sobrien#else
730289857Sobrien	case wordbound:
730389857Sobrien	{
730489857Sobrien	  boolean prevchar, thischar;
730589857Sobrien
730689857Sobrien	  DEBUG_PRINT1 ("EXECUTING wordbound.\n");
730789857Sobrien	  if (AT_STRINGS_BEG (d) || AT_STRINGS_END (d))
730889857Sobrien	    break;
730989857Sobrien
731089857Sobrien	  prevchar = WORDCHAR_P (d - 1);
731189857Sobrien	  thischar = WORDCHAR_P (d);
731289857Sobrien	  if (prevchar != thischar)
731389857Sobrien	    break;
731489857Sobrien	  goto fail;
731589857Sobrien	}
731689857Sobrien
731789857Sobrien      case notwordbound:
731889857Sobrien	{
731989857Sobrien	  boolean prevchar, thischar;
732089857Sobrien
732189857Sobrien	  DEBUG_PRINT1 ("EXECUTING notwordbound.\n");
732289857Sobrien	  if (AT_STRINGS_BEG (d) || AT_STRINGS_END (d))
732389857Sobrien	    goto fail;
732489857Sobrien
732589857Sobrien	  prevchar = WORDCHAR_P (d - 1);
732689857Sobrien	  thischar = WORDCHAR_P (d);
732789857Sobrien	  if (prevchar != thischar)
732889857Sobrien	    goto fail;
732989857Sobrien	  break;
733089857Sobrien	}
733189857Sobrien#endif
733289857Sobrien
733389857Sobrien	case wordbeg:
733489857Sobrien          DEBUG_PRINT1 ("EXECUTING wordbeg.\n");
733589857Sobrien	  if (!AT_STRINGS_END (d) && WORDCHAR_P (d)
733689857Sobrien	      && (AT_STRINGS_BEG (d) || !WORDCHAR_P (d - 1)))
733789857Sobrien	    break;
733889857Sobrien          goto fail;
733989857Sobrien
734089857Sobrien	case wordend:
734189857Sobrien          DEBUG_PRINT1 ("EXECUTING wordend.\n");
734289857Sobrien	  if (!AT_STRINGS_BEG (d) && WORDCHAR_P (d - 1)
734389857Sobrien              && (AT_STRINGS_END (d) || !WORDCHAR_P (d)))
734489857Sobrien	    break;
734589857Sobrien          goto fail;
734689857Sobrien
734789857Sobrien#ifdef emacs
734889857Sobrien  	case before_dot:
734989857Sobrien          DEBUG_PRINT1 ("EXECUTING before_dot.\n");
735089857Sobrien 	  if (PTR_CHAR_POS ((unsigned char *) d) >= point)
735189857Sobrien  	    goto fail;
735289857Sobrien  	  break;
735389857Sobrien
735489857Sobrien  	case at_dot:
735589857Sobrien          DEBUG_PRINT1 ("EXECUTING at_dot.\n");
735689857Sobrien 	  if (PTR_CHAR_POS ((unsigned char *) d) != point)
735789857Sobrien  	    goto fail;
735889857Sobrien  	  break;
735989857Sobrien
736089857Sobrien  	case after_dot:
736189857Sobrien          DEBUG_PRINT1 ("EXECUTING after_dot.\n");
736289857Sobrien          if (PTR_CHAR_POS ((unsigned char *) d) <= point)
736389857Sobrien  	    goto fail;
736489857Sobrien  	  break;
736589857Sobrien
736689857Sobrien	case syntaxspec:
736789857Sobrien          DEBUG_PRINT2 ("EXECUTING syntaxspec %d.\n", mcnt);
736889857Sobrien	  mcnt = *p++;
736989857Sobrien	  goto matchsyntax;
737089857Sobrien
737189857Sobrien        case wordchar:
737289857Sobrien          DEBUG_PRINT1 ("EXECUTING Emacs wordchar.\n");
737389857Sobrien	  mcnt = (int) Sword;
737489857Sobrien        matchsyntax:
737589857Sobrien	  PREFETCH ();
737689857Sobrien	  /* Can't use *d++ here; SYNTAX may be an unsafe macro.  */
737789857Sobrien	  d++;
737889857Sobrien	  if (SYNTAX (d[-1]) != (enum syntaxcode) mcnt)
737989857Sobrien	    goto fail;
738089857Sobrien          SET_REGS_MATCHED ();
738189857Sobrien	  break;
738289857Sobrien
738389857Sobrien	case notsyntaxspec:
738489857Sobrien          DEBUG_PRINT2 ("EXECUTING notsyntaxspec %d.\n", mcnt);
738589857Sobrien	  mcnt = *p++;
738689857Sobrien	  goto matchnotsyntax;
738789857Sobrien
738889857Sobrien        case notwordchar:
738989857Sobrien          DEBUG_PRINT1 ("EXECUTING Emacs notwordchar.\n");
739089857Sobrien	  mcnt = (int) Sword;
739189857Sobrien        matchnotsyntax:
739289857Sobrien	  PREFETCH ();
739389857Sobrien	  /* Can't use *d++ here; SYNTAX may be an unsafe macro.  */
739489857Sobrien	  d++;
739589857Sobrien	  if (SYNTAX (d[-1]) == (enum syntaxcode) mcnt)
739689857Sobrien	    goto fail;
739789857Sobrien	  SET_REGS_MATCHED ();
739889857Sobrien          break;
739989857Sobrien
740089857Sobrien#else /* not emacs */
740189857Sobrien	case wordchar:
740289857Sobrien          DEBUG_PRINT1 ("EXECUTING non-Emacs wordchar.\n");
740389857Sobrien	  PREFETCH ();
740489857Sobrien          if (!WORDCHAR_P (d))
740589857Sobrien            goto fail;
740689857Sobrien	  SET_REGS_MATCHED ();
740789857Sobrien          d++;
740889857Sobrien	  break;
740989857Sobrien
741089857Sobrien	case notwordchar:
741189857Sobrien          DEBUG_PRINT1 ("EXECUTING non-Emacs notwordchar.\n");
741289857Sobrien	  PREFETCH ();
741389857Sobrien	  if (WORDCHAR_P (d))
741489857Sobrien            goto fail;
741589857Sobrien          SET_REGS_MATCHED ();
741689857Sobrien          d++;
741789857Sobrien	  break;
741889857Sobrien#endif /* not emacs */
741989857Sobrien
742089857Sobrien        default:
742189857Sobrien          abort ();
742289857Sobrien	}
742389857Sobrien      continue;  /* Successfully executed one pattern command; keep going.  */
742489857Sobrien
742589857Sobrien
742689857Sobrien    /* We goto here if a matching operation fails. */
742789857Sobrien    fail:
742889857Sobrien      if (!FAIL_STACK_EMPTY ())
742989857Sobrien	{ /* A restart point is known.  Restore to that state.  */
743089857Sobrien          DEBUG_PRINT1 ("\nFAIL:\n");
743189857Sobrien          POP_FAILURE_POINT (d, p,
743289857Sobrien                             lowest_active_reg, highest_active_reg,
743389857Sobrien                             regstart, regend, reg_info);
743489857Sobrien
743589857Sobrien          /* If this failure point is a dummy, try the next one.  */
743689857Sobrien          if (!p)
743789857Sobrien	    goto fail;
743889857Sobrien
743989857Sobrien          /* If we failed to the end of the pattern, don't examine *p.  */
744089857Sobrien	  assert (p <= pend);
744189857Sobrien          if (p < pend)
744289857Sobrien            {
744389857Sobrien              boolean is_a_jump_n = false;
744489857Sobrien
744589857Sobrien              /* If failed to a backwards jump that's part of a repetition
744689857Sobrien                 loop, need to pop this failure point and use the next one.  */
744789857Sobrien              switch ((re_opcode_t) *p)
744889857Sobrien                {
744989857Sobrien                case jump_n:
745089857Sobrien                  is_a_jump_n = true;
745189857Sobrien                case maybe_pop_jump:
745289857Sobrien                case pop_failure_jump:
745389857Sobrien                case jump:
745489857Sobrien                  p1 = p + 1;
745589857Sobrien                  EXTRACT_NUMBER_AND_INCR (mcnt, p1);
745689857Sobrien                  p1 += mcnt;
745789857Sobrien
745889857Sobrien                  if ((is_a_jump_n && (re_opcode_t) *p1 == succeed_n)
745989857Sobrien                      || (!is_a_jump_n
746089857Sobrien                          && (re_opcode_t) *p1 == on_failure_jump))
746189857Sobrien                    goto fail;
746289857Sobrien                  break;
746389857Sobrien                default:
746489857Sobrien                  /* do nothing */ ;
746589857Sobrien                }
746689857Sobrien            }
746789857Sobrien
746889857Sobrien          if (d >= string1 && d <= end1)
746989857Sobrien	    dend = end_match_1;
747089857Sobrien        }
747189857Sobrien      else
747289857Sobrien        break;   /* Matching at this starting point really fails.  */
747389857Sobrien    } /* for (;;) */
747489857Sobrien
747589857Sobrien  if (best_regs_set)
747689857Sobrien    goto restore_best_regs;
747789857Sobrien
747889857Sobrien  FREE_VARIABLES ();
747989857Sobrien
748089857Sobrien  return -1;         			/* Failure to match.  */
748189857Sobrien} /* re_match_2 */
748289857Sobrien
748389857Sobrien/* Subroutine definitions for re_match_2.  */
748489857Sobrien
748589857Sobrien
748689857Sobrien/* We are passed P pointing to a register number after a start_memory.
748789857Sobrien
748889857Sobrien   Return true if the pattern up to the corresponding stop_memory can
748989857Sobrien   match the empty string, and false otherwise.
749089857Sobrien
749189857Sobrien   If we find the matching stop_memory, sets P to point to one past its number.
749289857Sobrien   Otherwise, sets P to an undefined byte less than or equal to END.
749389857Sobrien
749489857Sobrien   We don't handle duplicates properly (yet).  */
749589857Sobrien
749689857Sobrienstatic boolean
7497218822SdimPREFIX(group_match_null_string_p) (UCHAR_T **p, UCHAR_T *end,
7498218822Sdim                                   PREFIX(register_info_type) *reg_info)
749989857Sobrien{
750089857Sobrien  int mcnt;
750189857Sobrien  /* Point to after the args to the start_memory.  */
750289857Sobrien  UCHAR_T *p1 = *p + 2;
750389857Sobrien
750489857Sobrien  while (p1 < end)
750589857Sobrien    {
750689857Sobrien      /* Skip over opcodes that can match nothing, and return true or
750789857Sobrien	 false, as appropriate, when we get to one that can't, or to the
750889857Sobrien         matching stop_memory.  */
750989857Sobrien
751089857Sobrien      switch ((re_opcode_t) *p1)
751189857Sobrien        {
751289857Sobrien        /* Could be either a loop or a series of alternatives.  */
751389857Sobrien        case on_failure_jump:
751489857Sobrien          p1++;
751589857Sobrien          EXTRACT_NUMBER_AND_INCR (mcnt, p1);
751689857Sobrien
751789857Sobrien          /* If the next operation is not a jump backwards in the
751889857Sobrien	     pattern.  */
751989857Sobrien
752089857Sobrien	  if (mcnt >= 0)
752189857Sobrien	    {
752289857Sobrien              /* Go through the on_failure_jumps of the alternatives,
752389857Sobrien                 seeing if any of the alternatives cannot match nothing.
752489857Sobrien                 The last alternative starts with only a jump,
752589857Sobrien                 whereas the rest start with on_failure_jump and end
752689857Sobrien                 with a jump, e.g., here is the pattern for `a|b|c':
752789857Sobrien
752889857Sobrien                 /on_failure_jump/0/6/exactn/1/a/jump_past_alt/0/6
752989857Sobrien                 /on_failure_jump/0/6/exactn/1/b/jump_past_alt/0/3
753089857Sobrien                 /exactn/1/c
753189857Sobrien
753289857Sobrien                 So, we have to first go through the first (n-1)
753389857Sobrien                 alternatives and then deal with the last one separately.  */
753489857Sobrien
753589857Sobrien
753689857Sobrien              /* Deal with the first (n-1) alternatives, which start
753789857Sobrien                 with an on_failure_jump (see above) that jumps to right
753889857Sobrien                 past a jump_past_alt.  */
753989857Sobrien
754089857Sobrien              while ((re_opcode_t) p1[mcnt-(1+OFFSET_ADDRESS_SIZE)] ==
754189857Sobrien		     jump_past_alt)
754289857Sobrien                {
754389857Sobrien                  /* `mcnt' holds how many bytes long the alternative
754489857Sobrien                     is, including the ending `jump_past_alt' and
754589857Sobrien                     its number.  */
754689857Sobrien
754789857Sobrien                  if (!PREFIX(alt_match_null_string_p) (p1, p1 + mcnt -
754889857Sobrien						(1 + OFFSET_ADDRESS_SIZE),
754989857Sobrien						reg_info))
755089857Sobrien                    return false;
755189857Sobrien
755289857Sobrien                  /* Move to right after this alternative, including the
755389857Sobrien		     jump_past_alt.  */
755489857Sobrien                  p1 += mcnt;
755589857Sobrien
755689857Sobrien                  /* Break if it's the beginning of an n-th alternative
755789857Sobrien                     that doesn't begin with an on_failure_jump.  */
755889857Sobrien                  if ((re_opcode_t) *p1 != on_failure_jump)
755989857Sobrien                    break;
756089857Sobrien
756189857Sobrien		  /* Still have to check that it's not an n-th
756289857Sobrien		     alternative that starts with an on_failure_jump.  */
756389857Sobrien		  p1++;
756489857Sobrien                  EXTRACT_NUMBER_AND_INCR (mcnt, p1);
756589857Sobrien                  if ((re_opcode_t) p1[mcnt-(1+OFFSET_ADDRESS_SIZE)] !=
756689857Sobrien		      jump_past_alt)
756789857Sobrien                    {
756889857Sobrien		      /* Get to the beginning of the n-th alternative.  */
756989857Sobrien                      p1 -= 1 + OFFSET_ADDRESS_SIZE;
757089857Sobrien                      break;
757189857Sobrien                    }
757289857Sobrien                }
757389857Sobrien
757489857Sobrien              /* Deal with the last alternative: go back and get number
757589857Sobrien                 of the `jump_past_alt' just before it.  `mcnt' contains
757689857Sobrien                 the length of the alternative.  */
757789857Sobrien              EXTRACT_NUMBER (mcnt, p1 - OFFSET_ADDRESS_SIZE);
757889857Sobrien
757989857Sobrien              if (!PREFIX(alt_match_null_string_p) (p1, p1 + mcnt, reg_info))
758089857Sobrien                return false;
758189857Sobrien
758289857Sobrien              p1 += mcnt;	/* Get past the n-th alternative.  */
758389857Sobrien            } /* if mcnt > 0 */
758489857Sobrien          break;
758589857Sobrien
758689857Sobrien
758789857Sobrien        case stop_memory:
758889857Sobrien	  assert (p1[1] == **p);
758989857Sobrien          *p = p1 + 2;
759089857Sobrien          return true;
759189857Sobrien
759289857Sobrien
759389857Sobrien        default:
759489857Sobrien          if (!PREFIX(common_op_match_null_string_p) (&p1, end, reg_info))
759589857Sobrien            return false;
759689857Sobrien        }
759789857Sobrien    } /* while p1 < end */
759889857Sobrien
759989857Sobrien  return false;
760089857Sobrien} /* group_match_null_string_p */
760189857Sobrien
760289857Sobrien
760389857Sobrien/* Similar to group_match_null_string_p, but doesn't deal with alternatives:
760489857Sobrien   It expects P to be the first byte of a single alternative and END one
760589857Sobrien   byte past the last. The alternative can contain groups.  */
760689857Sobrien
760789857Sobrienstatic boolean
7608218822SdimPREFIX(alt_match_null_string_p) (UCHAR_T *p, UCHAR_T *end,
7609218822Sdim                                 PREFIX(register_info_type) *reg_info)
761089857Sobrien{
761189857Sobrien  int mcnt;
761289857Sobrien  UCHAR_T *p1 = p;
761389857Sobrien
761489857Sobrien  while (p1 < end)
761589857Sobrien    {
761689857Sobrien      /* Skip over opcodes that can match nothing, and break when we get
761789857Sobrien         to one that can't.  */
761889857Sobrien
761989857Sobrien      switch ((re_opcode_t) *p1)
762089857Sobrien        {
762189857Sobrien	/* It's a loop.  */
762289857Sobrien        case on_failure_jump:
762389857Sobrien          p1++;
762489857Sobrien          EXTRACT_NUMBER_AND_INCR (mcnt, p1);
762589857Sobrien          p1 += mcnt;
762689857Sobrien          break;
762789857Sobrien
762889857Sobrien	default:
762989857Sobrien          if (!PREFIX(common_op_match_null_string_p) (&p1, end, reg_info))
763089857Sobrien            return false;
763189857Sobrien        }
763289857Sobrien    }  /* while p1 < end */
763389857Sobrien
763489857Sobrien  return true;
763589857Sobrien} /* alt_match_null_string_p */
763689857Sobrien
763789857Sobrien
763889857Sobrien/* Deals with the ops common to group_match_null_string_p and
763989857Sobrien   alt_match_null_string_p.
764089857Sobrien
764189857Sobrien   Sets P to one after the op and its arguments, if any.  */
764289857Sobrien
764389857Sobrienstatic boolean
7644218822SdimPREFIX(common_op_match_null_string_p) (UCHAR_T **p, UCHAR_T *end,
7645218822Sdim                                       PREFIX(register_info_type) *reg_info)
764689857Sobrien{
764789857Sobrien  int mcnt;
764889857Sobrien  boolean ret;
764989857Sobrien  int reg_no;
765089857Sobrien  UCHAR_T *p1 = *p;
765189857Sobrien
765289857Sobrien  switch ((re_opcode_t) *p1++)
765389857Sobrien    {
765489857Sobrien    case no_op:
765589857Sobrien    case begline:
765689857Sobrien    case endline:
765789857Sobrien    case begbuf:
765889857Sobrien    case endbuf:
765989857Sobrien    case wordbeg:
766089857Sobrien    case wordend:
766189857Sobrien    case wordbound:
766289857Sobrien    case notwordbound:
766389857Sobrien#ifdef emacs
766489857Sobrien    case before_dot:
766589857Sobrien    case at_dot:
766689857Sobrien    case after_dot:
766789857Sobrien#endif
766889857Sobrien      break;
766989857Sobrien
767089857Sobrien    case start_memory:
767189857Sobrien      reg_no = *p1;
767289857Sobrien      assert (reg_no > 0 && reg_no <= MAX_REGNUM);
767389857Sobrien      ret = PREFIX(group_match_null_string_p) (&p1, end, reg_info);
767489857Sobrien
767589857Sobrien      /* Have to set this here in case we're checking a group which
767689857Sobrien         contains a group and a back reference to it.  */
767789857Sobrien
767889857Sobrien      if (REG_MATCH_NULL_STRING_P (reg_info[reg_no]) == MATCH_NULL_UNSET_VALUE)
767989857Sobrien        REG_MATCH_NULL_STRING_P (reg_info[reg_no]) = ret;
768089857Sobrien
768189857Sobrien      if (!ret)
768289857Sobrien        return false;
768389857Sobrien      break;
768489857Sobrien
768589857Sobrien    /* If this is an optimized succeed_n for zero times, make the jump.  */
768689857Sobrien    case jump:
768789857Sobrien      EXTRACT_NUMBER_AND_INCR (mcnt, p1);
768889857Sobrien      if (mcnt >= 0)
768989857Sobrien        p1 += mcnt;
769089857Sobrien      else
769189857Sobrien        return false;
769289857Sobrien      break;
769389857Sobrien
769489857Sobrien    case succeed_n:
769589857Sobrien      /* Get to the number of times to succeed.  */
769689857Sobrien      p1 += OFFSET_ADDRESS_SIZE;
769789857Sobrien      EXTRACT_NUMBER_AND_INCR (mcnt, p1);
769889857Sobrien
769989857Sobrien      if (mcnt == 0)
770089857Sobrien        {
770189857Sobrien          p1 -= 2 * OFFSET_ADDRESS_SIZE;
770289857Sobrien          EXTRACT_NUMBER_AND_INCR (mcnt, p1);
770389857Sobrien          p1 += mcnt;
770489857Sobrien        }
770589857Sobrien      else
770689857Sobrien        return false;
770789857Sobrien      break;
770889857Sobrien
770989857Sobrien    case duplicate:
771089857Sobrien      if (!REG_MATCH_NULL_STRING_P (reg_info[*p1]))
771189857Sobrien        return false;
771289857Sobrien      break;
771389857Sobrien
771489857Sobrien    case set_number_at:
771589857Sobrien      p1 += 2 * OFFSET_ADDRESS_SIZE;
771689857Sobrien
771789857Sobrien    default:
771889857Sobrien      /* All other opcodes mean we cannot match the empty string.  */
771989857Sobrien      return false;
772089857Sobrien  }
772189857Sobrien
772289857Sobrien  *p = p1;
772389857Sobrien  return true;
772489857Sobrien} /* common_op_match_null_string_p */
772589857Sobrien
772689857Sobrien
772789857Sobrien/* Return zero if TRANSLATE[S1] and TRANSLATE[S2] are identical for LEN
772889857Sobrien   bytes; nonzero otherwise.  */
772989857Sobrien
773089857Sobrienstatic int
7731218822SdimPREFIX(bcmp_translate) (const CHAR_T *s1, const CHAR_T *s2, register int len,
7732218822Sdim                        RE_TRANSLATE_TYPE translate)
773389857Sobrien{
773489857Sobrien  register const UCHAR_T *p1 = (const UCHAR_T *) s1;
773589857Sobrien  register const UCHAR_T *p2 = (const UCHAR_T *) s2;
773689857Sobrien  while (len)
773789857Sobrien    {
773889857Sobrien#ifdef WCHAR
773989857Sobrien      if (((*p1<=0xff)?translate[*p1++]:*p1++)
774089857Sobrien	  != ((*p2<=0xff)?translate[*p2++]:*p2++))
774189857Sobrien	return 1;
774289857Sobrien#else /* BYTE */
774389857Sobrien      if (translate[*p1++] != translate[*p2++]) return 1;
774489857Sobrien#endif /* WCHAR */
774589857Sobrien      len--;
774689857Sobrien    }
774789857Sobrien  return 0;
774889857Sobrien}
774989857Sobrien
775089857Sobrien
775189857Sobrien#else /* not INSIDE_RECURSION */
775289857Sobrien
775389857Sobrien/* Entry points for GNU code.  */
775489857Sobrien
775589857Sobrien/* re_compile_pattern is the GNU regular expression compiler: it
775689857Sobrien   compiles PATTERN (of length SIZE) and puts the result in BUFP.
775789857Sobrien   Returns 0 if the pattern was valid, otherwise an error string.
775889857Sobrien
775989857Sobrien   Assumes the `allocated' (and perhaps `buffer') and `translate' fields
776089857Sobrien   are set in BUFP on entry.
776189857Sobrien
776289857Sobrien   We call regex_compile to do the actual compilation.  */
776389857Sobrien
776489857Sobrienconst char *
7765218822Sdimre_compile_pattern (const char *pattern, size_t length,
7766218822Sdim                    struct re_pattern_buffer *bufp)
776789857Sobrien{
776889857Sobrien  reg_errcode_t ret;
776989857Sobrien
777089857Sobrien  /* GNU code is written to assume at least RE_NREGS registers will be set
777189857Sobrien     (and at least one extra will be -1).  */
777289857Sobrien  bufp->regs_allocated = REGS_UNALLOCATED;
777389857Sobrien
777489857Sobrien  /* And GNU code determines whether or not to get register information
777589857Sobrien     by passing null for the REGS argument to re_match, etc., not by
777689857Sobrien     setting no_sub.  */
777789857Sobrien  bufp->no_sub = 0;
777889857Sobrien
777989857Sobrien  /* Match anchors at newline.  */
778089857Sobrien  bufp->newline_anchor = 1;
778189857Sobrien
778289857Sobrien# ifdef MBS_SUPPORT
778389857Sobrien  if (MB_CUR_MAX != 1)
778489857Sobrien    ret = wcs_regex_compile (pattern, length, re_syntax_options, bufp);
778589857Sobrien  else
778689857Sobrien# endif
778789857Sobrien    ret = byte_regex_compile (pattern, length, re_syntax_options, bufp);
778889857Sobrien
778989857Sobrien  if (!ret)
779089857Sobrien    return NULL;
7791130561Sobrien  return gettext (re_error_msgid[(int) ret]);
779289857Sobrien}
779389857Sobrien#ifdef _LIBC
779489857Sobrienweak_alias (__re_compile_pattern, re_compile_pattern)
779589857Sobrien#endif
779689857Sobrien
779789857Sobrien/* Entry points compatible with 4.2 BSD regex library.  We don't define
779889857Sobrien   them unless specifically requested.  */
779989857Sobrien
780089857Sobrien#if defined _REGEX_RE_COMP || defined _LIBC
780189857Sobrien
780289857Sobrien/* BSD has one and only one pattern buffer.  */
780389857Sobrienstatic struct re_pattern_buffer re_comp_buf;
780489857Sobrien
780589857Sobrienchar *
780689857Sobrien#ifdef _LIBC
780789857Sobrien/* Make these definitions weak in libc, so POSIX programs can redefine
780889857Sobrien   these names if they don't use our functions, and still use
780989857Sobrien   regcomp/regexec below without link errors.  */
781089857Sobrienweak_function
781189857Sobrien#endif
7812218822Sdimre_comp (const char *s)
781389857Sobrien{
781489857Sobrien  reg_errcode_t ret;
781589857Sobrien
781689857Sobrien  if (!s)
781789857Sobrien    {
781889857Sobrien      if (!re_comp_buf.buffer)
7819218822Sdim	return (char *) gettext ("No previous regular expression");
782089857Sobrien      return 0;
782189857Sobrien    }
782289857Sobrien
782389857Sobrien  if (!re_comp_buf.buffer)
782489857Sobrien    {
782589857Sobrien      re_comp_buf.buffer = (unsigned char *) malloc (200);
782689857Sobrien      if (re_comp_buf.buffer == NULL)
7827130561Sobrien        return (char *) gettext (re_error_msgid[(int) REG_ESPACE]);
782889857Sobrien      re_comp_buf.allocated = 200;
782989857Sobrien
783089857Sobrien      re_comp_buf.fastmap = (char *) malloc (1 << BYTEWIDTH);
783189857Sobrien      if (re_comp_buf.fastmap == NULL)
7832130561Sobrien	return (char *) gettext (re_error_msgid[(int) REG_ESPACE]);
783389857Sobrien    }
783489857Sobrien
783589857Sobrien  /* Since `re_exec' always passes NULL for the `regs' argument, we
783689857Sobrien     don't need to initialize the pattern buffer fields which affect it.  */
783789857Sobrien
783889857Sobrien  /* Match anchors at newlines.  */
783989857Sobrien  re_comp_buf.newline_anchor = 1;
784089857Sobrien
784189857Sobrien# ifdef MBS_SUPPORT
784289857Sobrien  if (MB_CUR_MAX != 1)
784389857Sobrien    ret = wcs_regex_compile (s, strlen (s), re_syntax_options, &re_comp_buf);
784489857Sobrien  else
784589857Sobrien# endif
784689857Sobrien    ret = byte_regex_compile (s, strlen (s), re_syntax_options, &re_comp_buf);
784789857Sobrien
784889857Sobrien  if (!ret)
784989857Sobrien    return NULL;
785089857Sobrien
785189857Sobrien  /* Yes, we're discarding `const' here if !HAVE_LIBINTL.  */
7852130561Sobrien  return (char *) gettext (re_error_msgid[(int) ret]);
785389857Sobrien}
785489857Sobrien
785589857Sobrien
785689857Sobrienint
785789857Sobrien#ifdef _LIBC
785889857Sobrienweak_function
785989857Sobrien#endif
7860218822Sdimre_exec (const char *s)
786189857Sobrien{
786289857Sobrien  const int len = strlen (s);
786389857Sobrien  return
786489857Sobrien    0 <= re_search (&re_comp_buf, s, len, 0, len, (struct re_registers *) 0);
786589857Sobrien}
786689857Sobrien
786789857Sobrien#endif /* _REGEX_RE_COMP */
786889857Sobrien
786989857Sobrien/* POSIX.2 functions.  Don't define these for Emacs.  */
787089857Sobrien
787189857Sobrien#ifndef emacs
787289857Sobrien
787389857Sobrien/* regcomp takes a regular expression as a string and compiles it.
787489857Sobrien
787589857Sobrien   PREG is a regex_t *.  We do not expect any fields to be initialized,
787689857Sobrien   since POSIX says we shouldn't.  Thus, we set
787789857Sobrien
787889857Sobrien     `buffer' to the compiled pattern;
787989857Sobrien     `used' to the length of the compiled pattern;
788089857Sobrien     `syntax' to RE_SYNTAX_POSIX_EXTENDED if the
788189857Sobrien       REG_EXTENDED bit in CFLAGS is set; otherwise, to
788289857Sobrien       RE_SYNTAX_POSIX_BASIC;
788389857Sobrien     `newline_anchor' to REG_NEWLINE being set in CFLAGS;
788489857Sobrien     `fastmap' to an allocated space for the fastmap;
788589857Sobrien     `fastmap_accurate' to zero;
788689857Sobrien     `re_nsub' to the number of subexpressions in PATTERN.
788789857Sobrien
788889857Sobrien   PATTERN is the address of the pattern string.
788989857Sobrien
789089857Sobrien   CFLAGS is a series of bits which affect compilation.
789189857Sobrien
789289857Sobrien     If REG_EXTENDED is set, we use POSIX extended syntax; otherwise, we
789389857Sobrien     use POSIX basic syntax.
789489857Sobrien
789589857Sobrien     If REG_NEWLINE is set, then . and [^...] don't match newline.
789689857Sobrien     Also, regexec will try a match beginning after every newline.
789789857Sobrien
789889857Sobrien     If REG_ICASE is set, then we considers upper- and lowercase
789989857Sobrien     versions of letters to be equivalent when matching.
790089857Sobrien
790189857Sobrien     If REG_NOSUB is set, then when PREG is passed to regexec, that
790289857Sobrien     routine will report only success or failure, and nothing about the
790389857Sobrien     registers.
790489857Sobrien
790589857Sobrien   It returns 0 if it succeeds, nonzero if it doesn't.  (See regex.h for
790689857Sobrien   the return codes and their meanings.)  */
790789857Sobrien
790889857Sobrienint
7909218822Sdimregcomp (regex_t *preg, const char *pattern, int cflags)
791089857Sobrien{
791189857Sobrien  reg_errcode_t ret;
791289857Sobrien  reg_syntax_t syntax
791389857Sobrien    = (cflags & REG_EXTENDED) ?
791489857Sobrien      RE_SYNTAX_POSIX_EXTENDED : RE_SYNTAX_POSIX_BASIC;
791589857Sobrien
791689857Sobrien  /* regex_compile will allocate the space for the compiled pattern.  */
791789857Sobrien  preg->buffer = 0;
791889857Sobrien  preg->allocated = 0;
791989857Sobrien  preg->used = 0;
792089857Sobrien
792189857Sobrien  /* Try to allocate space for the fastmap.  */
792289857Sobrien  preg->fastmap = (char *) malloc (1 << BYTEWIDTH);
792389857Sobrien
792489857Sobrien  if (cflags & REG_ICASE)
792589857Sobrien    {
7926218822Sdim      int i;
792789857Sobrien
792889857Sobrien      preg->translate
792989857Sobrien	= (RE_TRANSLATE_TYPE) malloc (CHAR_SET_SIZE
793089857Sobrien				      * sizeof (*(RE_TRANSLATE_TYPE)0));
793189857Sobrien      if (preg->translate == NULL)
793289857Sobrien        return (int) REG_ESPACE;
793389857Sobrien
793489857Sobrien      /* Map uppercase characters to corresponding lowercase ones.  */
793589857Sobrien      for (i = 0; i < CHAR_SET_SIZE; i++)
7936218822Sdim        preg->translate[i] = ISUPPER (i) ? TOLOWER (i) : i;
793789857Sobrien    }
793889857Sobrien  else
793989857Sobrien    preg->translate = NULL;
794089857Sobrien
794189857Sobrien  /* If REG_NEWLINE is set, newlines are treated differently.  */
794289857Sobrien  if (cflags & REG_NEWLINE)
794389857Sobrien    { /* REG_NEWLINE implies neither . nor [^...] match newline.  */
794489857Sobrien      syntax &= ~RE_DOT_NEWLINE;
794589857Sobrien      syntax |= RE_HAT_LISTS_NOT_NEWLINE;
794689857Sobrien      /* It also changes the matching behavior.  */
794789857Sobrien      preg->newline_anchor = 1;
794889857Sobrien    }
794989857Sobrien  else
795089857Sobrien    preg->newline_anchor = 0;
795189857Sobrien
795289857Sobrien  preg->no_sub = !!(cflags & REG_NOSUB);
795389857Sobrien
795489857Sobrien  /* POSIX says a null character in the pattern terminates it, so we
795589857Sobrien     can use strlen here in compiling the pattern.  */
795689857Sobrien# ifdef MBS_SUPPORT
795789857Sobrien  if (MB_CUR_MAX != 1)
795889857Sobrien    ret = wcs_regex_compile (pattern, strlen (pattern), syntax, preg);
795989857Sobrien  else
796089857Sobrien# endif
796189857Sobrien    ret = byte_regex_compile (pattern, strlen (pattern), syntax, preg);
796289857Sobrien
796389857Sobrien  /* POSIX doesn't distinguish between an unmatched open-group and an
796489857Sobrien     unmatched close-group: both are REG_EPAREN.  */
796589857Sobrien  if (ret == REG_ERPAREN) ret = REG_EPAREN;
796689857Sobrien
796789857Sobrien  if (ret == REG_NOERROR && preg->fastmap)
796889857Sobrien    {
796989857Sobrien      /* Compute the fastmap now, since regexec cannot modify the pattern
797089857Sobrien	 buffer.  */
797189857Sobrien      if (re_compile_fastmap (preg) == -2)
797289857Sobrien	{
797389857Sobrien	  /* Some error occurred while computing the fastmap, just forget
797489857Sobrien	     about it.  */
797589857Sobrien	  free (preg->fastmap);
797689857Sobrien	  preg->fastmap = NULL;
797789857Sobrien	}
797889857Sobrien    }
797989857Sobrien
798089857Sobrien  return (int) ret;
798189857Sobrien}
798289857Sobrien#ifdef _LIBC
798389857Sobrienweak_alias (__regcomp, regcomp)
798489857Sobrien#endif
798589857Sobrien
798689857Sobrien
798789857Sobrien/* regexec searches for a given pattern, specified by PREG, in the
798889857Sobrien   string STRING.
798989857Sobrien
799089857Sobrien   If NMATCH is zero or REG_NOSUB was set in the cflags argument to
799189857Sobrien   `regcomp', we ignore PMATCH.  Otherwise, we assume PMATCH has at
799289857Sobrien   least NMATCH elements, and we set them to the offsets of the
799389857Sobrien   corresponding matched substrings.
799489857Sobrien
799589857Sobrien   EFLAGS specifies `execution flags' which affect matching: if
799689857Sobrien   REG_NOTBOL is set, then ^ does not match at the beginning of the
799789857Sobrien   string; if REG_NOTEOL is set, then $ does not match at the end.
799889857Sobrien
799989857Sobrien   We return 0 if we find a match and REG_NOMATCH if not.  */
800089857Sobrien
800189857Sobrienint
8002218822Sdimregexec (const regex_t *preg, const char *string, size_t nmatch,
8003218822Sdim         regmatch_t pmatch[], int eflags)
800489857Sobrien{
800589857Sobrien  int ret;
800689857Sobrien  struct re_registers regs;
800789857Sobrien  regex_t private_preg;
800889857Sobrien  int len = strlen (string);
800989857Sobrien  boolean want_reg_info = !preg->no_sub && nmatch > 0;
801089857Sobrien
801189857Sobrien  private_preg = *preg;
801289857Sobrien
801389857Sobrien  private_preg.not_bol = !!(eflags & REG_NOTBOL);
801489857Sobrien  private_preg.not_eol = !!(eflags & REG_NOTEOL);
801589857Sobrien
801689857Sobrien  /* The user has told us exactly how many registers to return
801789857Sobrien     information about, via `nmatch'.  We have to pass that on to the
801889857Sobrien     matching routines.  */
801989857Sobrien  private_preg.regs_allocated = REGS_FIXED;
802089857Sobrien
802189857Sobrien  if (want_reg_info)
802289857Sobrien    {
802389857Sobrien      regs.num_regs = nmatch;
802489857Sobrien      regs.start = TALLOC (nmatch * 2, regoff_t);
802589857Sobrien      if (regs.start == NULL)
802689857Sobrien        return (int) REG_NOMATCH;
802789857Sobrien      regs.end = regs.start + nmatch;
802889857Sobrien    }
802989857Sobrien
803089857Sobrien  /* Perform the searching operation.  */
803189857Sobrien  ret = re_search (&private_preg, string, len,
803289857Sobrien                   /* start: */ 0, /* range: */ len,
803389857Sobrien                   want_reg_info ? &regs : (struct re_registers *) 0);
803489857Sobrien
803589857Sobrien  /* Copy the register information to the POSIX structure.  */
803689857Sobrien  if (want_reg_info)
803789857Sobrien    {
803889857Sobrien      if (ret >= 0)
803989857Sobrien        {
804089857Sobrien          unsigned r;
804189857Sobrien
804289857Sobrien          for (r = 0; r < nmatch; r++)
804389857Sobrien            {
804489857Sobrien              pmatch[r].rm_so = regs.start[r];
804589857Sobrien              pmatch[r].rm_eo = regs.end[r];
804689857Sobrien            }
804789857Sobrien        }
804889857Sobrien
804989857Sobrien      /* If we needed the temporary register info, free the space now.  */
805089857Sobrien      free (regs.start);
805189857Sobrien    }
805289857Sobrien
805389857Sobrien  /* We want zero return to mean success, unlike `re_search'.  */
805489857Sobrien  return ret >= 0 ? (int) REG_NOERROR : (int) REG_NOMATCH;
805589857Sobrien}
805689857Sobrien#ifdef _LIBC
805789857Sobrienweak_alias (__regexec, regexec)
805889857Sobrien#endif
805989857Sobrien
806089857Sobrien
806189857Sobrien/* Returns a message corresponding to an error code, ERRCODE, returned
806289857Sobrien   from either regcomp or regexec.   We don't use PREG here.  */
806389857Sobrien
806489857Sobriensize_t
8065218822Sdimregerror (int errcode, const regex_t *preg ATTRIBUTE_UNUSED,
8066218822Sdim          char *errbuf, size_t errbuf_size)
806789857Sobrien{
806889857Sobrien  const char *msg;
806989857Sobrien  size_t msg_size;
807089857Sobrien
807189857Sobrien  if (errcode < 0
8072130561Sobrien      || errcode >= (int) (sizeof (re_error_msgid)
8073130561Sobrien			   / sizeof (re_error_msgid[0])))
807489857Sobrien    /* Only error codes returned by the rest of the code should be passed
807589857Sobrien       to this routine.  If we are given anything else, or if other regex
807689857Sobrien       code generates an invalid error code, then the program has a bug.
807789857Sobrien       Dump core so we can fix it.  */
807889857Sobrien    abort ();
807989857Sobrien
8080130561Sobrien  msg = gettext (re_error_msgid[errcode]);
808189857Sobrien
808289857Sobrien  msg_size = strlen (msg) + 1; /* Includes the null.  */
808389857Sobrien
808489857Sobrien  if (errbuf_size != 0)
808589857Sobrien    {
808689857Sobrien      if (msg_size > errbuf_size)
808789857Sobrien        {
808889857Sobrien#if defined HAVE_MEMPCPY || defined _LIBC
8089130561Sobrien	  *((char *) mempcpy (errbuf, msg, errbuf_size - 1)) = '\0';
809089857Sobrien#else
809189857Sobrien          memcpy (errbuf, msg, errbuf_size - 1);
809289857Sobrien          errbuf[errbuf_size - 1] = 0;
809389857Sobrien#endif
809489857Sobrien        }
809589857Sobrien      else
809689857Sobrien        memcpy (errbuf, msg, msg_size);
809789857Sobrien    }
809889857Sobrien
809989857Sobrien  return msg_size;
810089857Sobrien}
810189857Sobrien#ifdef _LIBC
810289857Sobrienweak_alias (__regerror, regerror)
810389857Sobrien#endif
810489857Sobrien
810589857Sobrien
810689857Sobrien/* Free dynamically allocated space used by PREG.  */
810789857Sobrien
810889857Sobrienvoid
8109218822Sdimregfree (regex_t *preg)
811089857Sobrien{
811189857Sobrien  if (preg->buffer != NULL)
811289857Sobrien    free (preg->buffer);
811389857Sobrien  preg->buffer = NULL;
811489857Sobrien
811589857Sobrien  preg->allocated = 0;
811689857Sobrien  preg->used = 0;
811789857Sobrien
811889857Sobrien  if (preg->fastmap != NULL)
811989857Sobrien    free (preg->fastmap);
812089857Sobrien  preg->fastmap = NULL;
812189857Sobrien  preg->fastmap_accurate = 0;
812289857Sobrien
812389857Sobrien  if (preg->translate != NULL)
812489857Sobrien    free (preg->translate);
812589857Sobrien  preg->translate = NULL;
812689857Sobrien}
812789857Sobrien#ifdef _LIBC
812889857Sobrienweak_alias (__regfree, regfree)
812989857Sobrien#endif
813089857Sobrien
813189857Sobrien#endif /* not emacs  */
813289857Sobrien
813389857Sobrien#endif /* not INSIDE_RECURSION */
813489857Sobrien
813589857Sobrien
813689857Sobrien#undef STORE_NUMBER
813789857Sobrien#undef STORE_NUMBER_AND_INCR
813889857Sobrien#undef EXTRACT_NUMBER
813989857Sobrien#undef EXTRACT_NUMBER_AND_INCR
814089857Sobrien
814189857Sobrien#undef DEBUG_PRINT_COMPILED_PATTERN
814289857Sobrien#undef DEBUG_PRINT_DOUBLE_STRING
814389857Sobrien
814489857Sobrien#undef INIT_FAIL_STACK
814589857Sobrien#undef RESET_FAIL_STACK
814689857Sobrien#undef DOUBLE_FAIL_STACK
814789857Sobrien#undef PUSH_PATTERN_OP
814889857Sobrien#undef PUSH_FAILURE_POINTER
814989857Sobrien#undef PUSH_FAILURE_INT
815089857Sobrien#undef PUSH_FAILURE_ELT
815189857Sobrien#undef POP_FAILURE_POINTER
815289857Sobrien#undef POP_FAILURE_INT
815389857Sobrien#undef POP_FAILURE_ELT
815489857Sobrien#undef DEBUG_PUSH
815589857Sobrien#undef DEBUG_POP
815689857Sobrien#undef PUSH_FAILURE_POINT
815789857Sobrien#undef POP_FAILURE_POINT
815889857Sobrien
815989857Sobrien#undef REG_UNSET_VALUE
816089857Sobrien#undef REG_UNSET
816189857Sobrien
816289857Sobrien#undef PATFETCH
816389857Sobrien#undef PATFETCH_RAW
816489857Sobrien#undef PATUNFETCH
816589857Sobrien#undef TRANSLATE
816689857Sobrien
816789857Sobrien#undef INIT_BUF_SIZE
816889857Sobrien#undef GET_BUFFER_SPACE
816989857Sobrien#undef BUF_PUSH
817089857Sobrien#undef BUF_PUSH_2
817189857Sobrien#undef BUF_PUSH_3
817289857Sobrien#undef STORE_JUMP
817389857Sobrien#undef STORE_JUMP2
817489857Sobrien#undef INSERT_JUMP
817589857Sobrien#undef INSERT_JUMP2
817689857Sobrien#undef EXTEND_BUFFER
817789857Sobrien#undef GET_UNSIGNED_NUMBER
817889857Sobrien#undef FREE_STACK_RETURN
817989857Sobrien
818089857Sobrien# undef POINTER_TO_OFFSET
818189857Sobrien# undef MATCHING_IN_FRST_STRING
818289857Sobrien# undef PREFETCH
818389857Sobrien# undef AT_STRINGS_BEG
818489857Sobrien# undef AT_STRINGS_END
818589857Sobrien# undef WORDCHAR_P
818689857Sobrien# undef FREE_VAR
818789857Sobrien# undef FREE_VARIABLES
818889857Sobrien# undef NO_HIGHEST_ACTIVE_REG
818989857Sobrien# undef NO_LOWEST_ACTIVE_REG
819089857Sobrien
819189857Sobrien# undef CHAR_T
819289857Sobrien# undef UCHAR_T
819389857Sobrien# undef COMPILED_BUFFER_VAR
819489857Sobrien# undef OFFSET_ADDRESS_SIZE
819589857Sobrien# undef CHAR_CLASS_SIZE
819689857Sobrien# undef PREFIX
819789857Sobrien# undef ARG_PREFIX
819889857Sobrien# undef PUT_CHAR
819989857Sobrien# undef BYTE
820089857Sobrien# undef WCHAR
820189857Sobrien
820289857Sobrien# define DEFINED_ONCE
8203