134461Speter/* Extended regular expression matching and search library, version
234461Speter   0.12.  (Implements POSIX draft P10003.2/D11.2, except for
317721Speter   internationalization features.)
417721Speter
566525Speter   Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998 Free Software Foundation, Inc.
617721Speter
717721Speter   This program is free software; you can redistribute it and/or modify
817721Speter   it under the terms of the GNU General Public License as published by
917721Speter   the Free Software Foundation; either version 2, or (at your option)
1017721Speter   any later version.
1117721Speter
1217721Speter   This program is distributed in the hope that it will be useful,
1317721Speter   but WITHOUT ANY WARRANTY; without even the implied warranty of
1466525Speter   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	 See the
1566525Speter   GNU General Public License for more details.
1617721Speter
1766525Speter   You should have received a copy of the GNU General Public License
1866525Speter   along with this program; if not, write to the Free Software
1966525Speter   Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
2066525Speter   USA.	 */
2117721Speter
2217721Speter/* AIX requires this to be the first thing in the file. */
2317721Speter#if defined (_AIX) && !defined (REGEX_MALLOC)
2417721Speter  #pragma alloca
2517721Speter#endif
2617721Speter
2766525Speter#undef	_GNU_SOURCE
2817721Speter#define _GNU_SOURCE
2917721Speter
3066525Speter#ifdef emacs
3166525Speter/* Converts the pointer to the char to BEG-based offset from the start.	 */
3266525Speter#define PTR_TO_OFFSET(d)						\
3366525Speter	POS_AS_IN_BUFFER (MATCHING_IN_FIRST_STRING			\
3466525Speter			  ? (d) - string1 : (d) - (string2 - size1))
3566525Speter#define POS_AS_IN_BUFFER(p) ((p) + (NILP (re_match_object) || BUFFERP (re_match_object)))
3666525Speter#else
3766525Speter#define PTR_TO_OFFSET(d) 0
3866525Speter#endif
3917721Speter
4017721Speter#ifdef HAVE_CONFIG_H
4166525Speter#include <config.h>
4217721Speter#endif
4317721Speter
4466525Speter/* We need this for `regex.h', and perhaps for the Emacs include files.	 */
4566525Speter#include <sys/types.h>
4666525Speter
4766525Speter/* This is for other GNU distributions with internationalized messages.	 */
4866525Speter#if HAVE_LIBINTL_H || defined (_LIBC)
4966525Speter# include <libintl.h>
5066525Speter#else
5166525Speter# define gettext(msgid) (msgid)
5266525Speter#endif
5366525Speter
5466525Speter#ifndef gettext_noop
5566525Speter/* This define is so xgettext can find the internationalizable
5666525Speter   strings.  */
5766525Speter#define gettext_noop(String) String
5866525Speter#endif
5966525Speter
6017721Speter/* The `emacs' switch turns on certain matching commands
6117721Speter   that make sense only in Emacs. */
6217721Speter#ifdef emacs
6317721Speter
6417721Speter#include "lisp.h"
6517721Speter#include "buffer.h"
6634461Speter
6734461Speter/* Make syntax table lookup grant data in gl_state.  */
6834461Speter#define SYNTAX_ENTRY_VIA_PROPERTY
6934461Speter
7017721Speter#include "syntax.h"
7134461Speter#include "charset.h"
7234461Speter#include "category.h"
7317721Speter
7434461Speter#define malloc xmalloc
7566525Speter#define realloc xrealloc
7634461Speter#define free xfree
7717721Speter
7817721Speter#else  /* not emacs */
7917721Speter
8066525Speter/* If we are not linking with Emacs proper,
8166525Speter   we can't use the relocating allocator
8266525Speter   even if config.h says that we can.  */
8366525Speter#undef REL_ALLOC
8466525Speter
8566525Speter#if defined (STDC_HEADERS) || defined (_LIBC)
8666525Speter#include <stdlib.h>
8766525Speter#else
8866525Speterchar *malloc ();
8966525Speterchar *realloc ();
9066525Speter#endif
9166525Speter
9266525Speter/* When used in Emacs's lib-src, we need to get bzero and bcopy somehow.
9366525Speter   If nothing else has been done, use the method below.	 */
9466525Speter#ifdef INHIBIT_STRING_HEADER
9566525Speter#if !(defined (HAVE_BZERO) && defined (HAVE_BCOPY))
9666525Speter#if !defined (bzero) && !defined (bcopy)
9766525Speter#undef INHIBIT_STRING_HEADER
9866525Speter#endif
9966525Speter#endif
10066525Speter#endif
10166525Speter
10266525Speter/* This is the normal way of making sure we have a bcopy and a bzero.
10366525Speter   This is used in most programs--a few other programs avoid this
10466525Speter   by defining INHIBIT_STRING_HEADER.  */
10566525Speter#ifndef INHIBIT_STRING_HEADER
10666525Speter#if defined (HAVE_STRING_H) || defined (STDC_HEADERS) || defined (_LIBC)
10717721Speter#include <string.h>
10817721Speter#ifndef bcmp
10917721Speter#define bcmp(s1, s2, n)	memcmp ((s1), (s2), (n))
11017721Speter#endif
11117721Speter#ifndef bcopy
11217721Speter#define bcopy(s, d, n)	memcpy ((d), (s), (n))
11317721Speter#endif
11417721Speter#ifndef bzero
11517721Speter#define bzero(s, n)	memset ((s), 0, (n))
11617721Speter#endif
11717721Speter#else
11817721Speter#include <strings.h>
11917721Speter#endif
12017721Speter#endif
12117721Speter
12217721Speter/* Define the syntax stuff for \<, \>, etc.  */
12317721Speter
12417721Speter/* This must be nonzero for the wordchar and notwordchar pattern
12517721Speter   commands in re_match_2.  */
12625839Speter#ifndef Sword
12717721Speter#define Sword 1
12817721Speter#endif
12917721Speter
13066525Speter#ifdef SWITCH_ENUM_BUG
13166525Speter#define SWITCH_ENUM_CAST(x) ((int)(x))
13266525Speter#else
13366525Speter#define SWITCH_ENUM_CAST(x) (x)
13466525Speter#endif
13566525Speter
13617721Speter#ifdef SYNTAX_TABLE
13717721Speter
13817721Speterextern char *re_syntax_table;
13917721Speter
14017721Speter#else /* not SYNTAX_TABLE */
14117721Speter
14217721Speter/* How many characters in the character set.  */
14317721Speter#define CHAR_SET_SIZE 256
14417721Speter
14517721Speterstatic char re_syntax_table[CHAR_SET_SIZE];
14617721Speter
14717721Speterstatic void
14817721Speterinit_syntax_once ()
14917721Speter{
15017721Speter   register int c;
15117721Speter   static int done = 0;
15217721Speter
15317721Speter   if (done)
15417721Speter     return;
15517721Speter
15617721Speter   bzero (re_syntax_table, sizeof re_syntax_table);
15717721Speter
15817721Speter   for (c = 'a'; c <= 'z'; c++)
15917721Speter     re_syntax_table[c] = Sword;
16017721Speter
16117721Speter   for (c = 'A'; c <= 'Z'; c++)
16217721Speter     re_syntax_table[c] = Sword;
16317721Speter
16417721Speter   for (c = '0'; c <= '9'; c++)
16517721Speter     re_syntax_table[c] = Sword;
16617721Speter
16717721Speter   re_syntax_table['_'] = Sword;
16817721Speter
16917721Speter   done = 1;
17017721Speter}
17117721Speter
17217721Speter#endif /* not SYNTAX_TABLE */
17317721Speter
17417721Speter#define SYNTAX(c) re_syntax_table[c]
17517721Speter
17666525Speter/* Dummy macros for non-Emacs environments.  */
17766525Speter#define BASE_LEADING_CODE_P(c) (0)
17866525Speter#define WORD_BOUNDARY_P(c1, c2) (0)
17966525Speter#define CHAR_HEAD_P(p) (1)
18066525Speter#define SINGLE_BYTE_CHAR_P(c) (1)
18166525Speter#define SAME_CHARSET_P(c1, c2) (1)
18266525Speter#define MULTIBYTE_FORM_LENGTH(p, s) (1)
18366525Speter#define STRING_CHAR(p, s) (*(p))
18466525Speter#define STRING_CHAR_AND_LENGTH(p, s, actual_len) ((actual_len) = 1, *(p))
18566525Speter#define GET_CHAR_AFTER_2(c, p, str1, end1, str2, end2) \
18666525Speter  (c = ((p) == (end1) ? *(str2) : *(p)))
18766525Speter#define GET_CHAR_BEFORE_2(c, p, str1, end1, str2, end2) \
18866525Speter  (c = ((p) == (str2) ? *((end1) - 1) : *((p) - 1)))
18917721Speter#endif /* not emacs */
19017721Speter
19117721Speter/* Get the interface, including the syntax bits.  */
19217721Speter#include "regex.h"
19317721Speter
19417721Speter/* isalpha etc. are used for the character classes.  */
19517721Speter#include <ctype.h>
19617721Speter
19766525Speter/* Jim Meyering writes:
19866525Speter
19966525Speter   "... Some ctype macros are valid only for character codes that
20066525Speter   isascii says are ASCII (SGI's IRIX-4.0.5 is one such system --when
20166525Speter   using /bin/cc or gcc but without giving an ansi option).  So, all
20266525Speter   ctype uses should be through macros like ISPRINT...	If
20366525Speter   STDC_HEADERS is defined, then autoconf has verified that the ctype
20466525Speter   macros don't need to be guarded with references to isascii. ...
20566525Speter   Defining isascii to 1 should let any compiler worth its salt
20666525Speter   eliminate the && through constant folding."	*/
20766525Speter
20866525Speter#if defined (STDC_HEADERS) || (!defined (isascii) && !defined (HAVE_ISASCII))
20966525Speter#define ISASCII(c) 1
21066525Speter#else
21166525Speter#define ISASCII(c) isascii(c)
21217721Speter#endif
21317721Speter
21417721Speter#ifdef isblank
21566525Speter#define ISBLANK(c) (ISASCII (c) && isblank (c))
21617721Speter#else
21717721Speter#define ISBLANK(c) ((c) == ' ' || (c) == '\t')
21817721Speter#endif
21917721Speter#ifdef isgraph
22066525Speter#define ISGRAPH(c) (ISASCII (c) && isgraph (c))
22117721Speter#else
22266525Speter#define ISGRAPH(c) (ISASCII (c) && isprint (c) && !isspace (c))
22317721Speter#endif
22417721Speter
22566525Speter#define ISPRINT(c) (ISASCII (c) && isprint (c))
22666525Speter#define ISDIGIT(c) (ISASCII (c) && isdigit (c))
22766525Speter#define ISALNUM(c) (ISASCII (c) && isalnum (c))
22866525Speter#define ISALPHA(c) (ISASCII (c) && isalpha (c))
22966525Speter#define ISCNTRL(c) (ISASCII (c) && iscntrl (c))
23066525Speter#define ISLOWER(c) (ISASCII (c) && islower (c))
23166525Speter#define ISPUNCT(c) (ISASCII (c) && ispunct (c))
23266525Speter#define ISSPACE(c) (ISASCII (c) && isspace (c))
23366525Speter#define ISUPPER(c) (ISASCII (c) && isupper (c))
23466525Speter#define ISXDIGIT(c) (ISASCII (c) && isxdigit (c))
23517721Speter
23617721Speter#ifndef NULL
23766525Speter#define NULL (void *)0
23817721Speter#endif
23917721Speter
24017721Speter/* We remove any previous definition of `SIGN_EXTEND_CHAR',
24117721Speter   since ours (we hope) works properly with all combinations of
24217721Speter   machines, compilers, `char' and `unsigned char' argument types.
24334461Speter   (Per Bothner suggested the basic approach.)	*/
24417721Speter#undef SIGN_EXTEND_CHAR
24517721Speter#if __STDC__
24617721Speter#define SIGN_EXTEND_CHAR(c) ((signed char) (c))
24717721Speter#else  /* not __STDC__ */
24817721Speter/* As in Harbison and Steele.  */
24917721Speter#define SIGN_EXTEND_CHAR(c) ((((unsigned char) (c)) ^ 128) - 128)
25017721Speter#endif
25117721Speter
25217721Speter/* Should we use malloc or alloca?  If REGEX_MALLOC is not defined, we
25317721Speter   use `alloca' instead of `malloc'.  This is because using malloc in
25417721Speter   re_search* or re_match* could cause memory leaks when C-g is used in
25517721Speter   Emacs; also, malloc is slower and causes storage fragmentation.  On
25625839Speter   the other hand, malloc is more portable, and easier to debug.
25725839Speter
25817721Speter   Because we sometimes use alloca, some routines have to be macros,
25917721Speter   not functions -- `alloca'-allocated space disappears at the end of the
26017721Speter   function it is called in.  */
26117721Speter
26217721Speter#ifdef REGEX_MALLOC
26317721Speter
26417721Speter#define REGEX_ALLOCATE malloc
26517721Speter#define REGEX_REALLOCATE(source, osize, nsize) realloc (source, nsize)
26666525Speter#define REGEX_FREE free
26717721Speter
26817721Speter#else /* not REGEX_MALLOC  */
26917721Speter
27017721Speter/* Emacs already defines alloca, sometimes.  */
27117721Speter#ifndef alloca
27217721Speter
27317721Speter/* Make alloca work the best possible way.  */
27417721Speter#ifdef __GNUC__
27517721Speter#define alloca __builtin_alloca
27617721Speter#else /* not __GNUC__ */
27717721Speter#if HAVE_ALLOCA_H
27817721Speter#include <alloca.h>
27917721Speter#else /* not __GNUC__ or HAVE_ALLOCA_H */
28066525Speter#if 0 /* It is a bad idea to declare alloca.  We always cast the result.  */
28166525Speter#ifndef _AIX /* Already did AIX, up at the top.	 */
28217721Speterchar *alloca ();
28317721Speter#endif /* not _AIX */
28466525Speter#endif
28566525Speter#endif /* not HAVE_ALLOCA_H */
28617721Speter#endif /* not __GNUC__ */
28717721Speter
28817721Speter#endif /* not alloca */
28917721Speter
29017721Speter#define REGEX_ALLOCATE alloca
29117721Speter
29217721Speter/* Assumes a `char *destination' variable.  */
29317721Speter#define REGEX_REALLOCATE(source, osize, nsize)				\
29417721Speter  (destination = (char *) alloca (nsize),				\
29517721Speter   bcopy (source, destination, osize),					\
29617721Speter   destination)
29717721Speter
29866525Speter/* No need to do anything to free, after alloca.  */
29966525Speter#define REGEX_FREE(arg) ((void)0) /* Do nothing!  But inhibit gcc warning.  */
30066525Speter
30117721Speter#endif /* not REGEX_MALLOC */
30217721Speter
30366525Speter/* Define how to allocate the failure stack.  */
30417721Speter
30566525Speter#if defined (REL_ALLOC) && defined (REGEX_MALLOC)
30666525Speter
30766525Speter#define REGEX_ALLOCATE_STACK(size)				\
30866525Speter  r_alloc (&failure_stack_ptr, (size))
30966525Speter#define REGEX_REALLOCATE_STACK(source, osize, nsize)		\
31066525Speter  r_re_alloc (&failure_stack_ptr, (nsize))
31166525Speter#define REGEX_FREE_STACK(ptr)					\
31266525Speter  r_alloc_free (&failure_stack_ptr)
31366525Speter
31466525Speter#else /* not using relocating allocator */
31566525Speter
31666525Speter#ifdef REGEX_MALLOC
31766525Speter
31866525Speter#define REGEX_ALLOCATE_STACK malloc
31966525Speter#define REGEX_REALLOCATE_STACK(source, osize, nsize) realloc (source, nsize)
32066525Speter#define REGEX_FREE_STACK free
32166525Speter
32266525Speter#else /* not REGEX_MALLOC */
32366525Speter
32466525Speter#define REGEX_ALLOCATE_STACK alloca
32566525Speter
32666525Speter#define REGEX_REALLOCATE_STACK(source, osize, nsize)			\
32766525Speter   REGEX_REALLOCATE (source, osize, nsize)
32866525Speter/* No need to explicitly free anything.	 */
32966525Speter#define REGEX_FREE_STACK(arg)
33066525Speter
33166525Speter#endif /* not REGEX_MALLOC */
33266525Speter#endif /* not using relocating allocator */
33366525Speter
33466525Speter
33517721Speter/* True if `size1' is non-NULL and PTR is pointing anywhere inside
33617721Speter   `string1' or just past its end.  This works if PTR is NULL, which is
33717721Speter   a good thing.  */
33866525Speter#define FIRST_STRING_P(ptr)					\
33917721Speter  (size1 && string1 <= (ptr) && (ptr) <= string1 + size1)
34017721Speter
34117721Speter/* (Re)Allocate N items of type T using malloc, or fail.  */
34217721Speter#define TALLOC(n, t) ((t *) malloc ((n) * sizeof (t)))
34317721Speter#define RETALLOC(addr, n, t) ((addr) = (t *) realloc (addr, (n) * sizeof (t)))
34466525Speter#define RETALLOC_IF(addr, n, t) \
34566525Speter  if (addr) RETALLOC((addr), (n), t); else (addr) = TALLOC ((n), t)
34617721Speter#define REGEX_TALLOC(n, t) ((t *) REGEX_ALLOCATE ((n) * sizeof (t)))
34717721Speter
34866525Speter#define BYTEWIDTH 8 /* In bits.	 */
34917721Speter
35017721Speter#define STREQ(s1, s2) ((strcmp (s1, s2) == 0))
35117721Speter
35225839Speter#undef MAX
35325839Speter#undef MIN
35417721Speter#define MAX(a, b) ((a) > (b) ? (a) : (b))
35517721Speter#define MIN(a, b) ((a) < (b) ? (a) : (b))
35617721Speter
35717721Spetertypedef char boolean;
35817721Speter#define false 0
35917721Speter#define true 1
36066525Speter
36166525Speterstatic int re_match_2_internal ();
36217721Speter
36317721Speter/* These are the command codes that appear in compiled regular
36466525Speter   expressions.	 Some opcodes are followed by argument bytes.  A
36517721Speter   command code can specify any interpretation whatsoever for its
36666525Speter   arguments.  Zero bytes may appear in the compiled regular expression.  */
36717721Speter
36817721Spetertypedef enum
36917721Speter{
37017721Speter  no_op = 0,
37117721Speter
37266525Speter  /* Succeed right away--no more backtracking.	*/
37366525Speter  succeed,
37417721Speter
37566525Speter	/* Followed by one byte giving n, then by n literal bytes.  */
37666525Speter  exactn,
37766525Speter
37866525Speter	/* Matches any (more or less) character.  */
37917721Speter  anychar,
38017721Speter
38166525Speter	/* Matches any one char belonging to specified set.  First
38266525Speter	   following byte is number of bitmap bytes.  Then come bytes
38366525Speter	   for a bitmap saying which chars are in.  Bits in each byte
38466525Speter	   are ordered low-bit-first.  A character is in the set if its
38566525Speter	   bit is 1.  A character too large to have a bit in the map is
38666525Speter	   automatically not in the set.  */
38717721Speter  charset,
38817721Speter
38966525Speter	/* Same parameters as charset, but match any character that is
39066525Speter	   not one of those specified.	*/
39117721Speter  charset_not,
39217721Speter
39366525Speter	/* Start remembering the text that is matched, for storing in a
39466525Speter	   register.  Followed by one byte with the register number, in
39566525Speter	   the range 0 to one less than the pattern buffer's re_nsub
39666525Speter	   field.  Then followed by one byte with the number of groups
39766525Speter	   inner to this one.  (This last has to be part of the
39866525Speter	   start_memory only because we need it in the on_failure_jump
39966525Speter	   of re_match_2.)  */
40017721Speter  start_memory,
40117721Speter
40266525Speter	/* Stop remembering the text that is matched and store it in a
40366525Speter	   memory register.  Followed by one byte with the register
40466525Speter	   number, in the range 0 to one less than `re_nsub' in the
40566525Speter	   pattern buffer, and one byte with the number of inner groups,
40666525Speter	   just like `start_memory'.  (We need the number of inner
40766525Speter	   groups here because we don't have any easy way of finding the
40866525Speter	   corresponding start_memory when we're at a stop_memory.)  */
40917721Speter  stop_memory,
41017721Speter
41166525Speter	/* Match a duplicate of something remembered. Followed by one
41266525Speter	   byte containing the register number.	 */
41317721Speter  duplicate,
41417721Speter
41566525Speter	/* Fail unless at beginning of line.  */
41617721Speter  begline,
41717721Speter
41866525Speter	/* Fail unless at end of line.	*/
41917721Speter  endline,
42017721Speter
42166525Speter	/* Succeeds if at beginning of buffer (if emacs) or at beginning
42266525Speter	   of string to be matched (if not).  */
42317721Speter  begbuf,
42417721Speter
42566525Speter	/* Analogously, for end of buffer/string.  */
42617721Speter  endbuf,
42725839Speter
42866525Speter	/* Followed by two byte relative address to which to jump.  */
42925839Speter  jump,
43017721Speter
43117721Speter	/* Same as jump, but marks the end of an alternative.  */
43217721Speter  jump_past_alt,
43317721Speter
43466525Speter	/* Followed by two-byte relative address of place to resume at
43566525Speter	   in case of failure.	*/
43617721Speter  on_failure_jump,
43725839Speter
43866525Speter	/* Like on_failure_jump, but pushes a placeholder instead of the
43966525Speter	   current string position when executed.  */
44017721Speter  on_failure_keep_string_jump,
44125839Speter
44266525Speter	/* Throw away latest failure point and then jump to following
44366525Speter	   two-byte relative address.  */
44417721Speter  pop_failure_jump,
44517721Speter
44666525Speter	/* Change to pop_failure_jump if know won't have to backtrack to
44766525Speter	   match; otherwise change to jump.  This is used to jump
44866525Speter	   back to the beginning of a repeat.  If what follows this jump
44966525Speter	   clearly won't match what the repeat does, such that we can be
45066525Speter	   sure that there is no use backtracking out of repetitions
45166525Speter	   already matched, then we change it to a pop_failure_jump.
45266525Speter	   Followed by two-byte address.  */
45317721Speter  maybe_pop_jump,
45417721Speter
45566525Speter	/* Jump to following two-byte address, and push a dummy failure
45666525Speter	   point. This failure point will be thrown away if an attempt
45766525Speter	   is made to use it for a failure.  A `+' construct makes this
45866525Speter	   before the first repeat.  Also used as an intermediary kind
45966525Speter	   of jump when compiling an alternative.  */
46017721Speter  dummy_failure_jump,
46117721Speter
46217721Speter	/* Push a dummy failure point and continue.  Used at the end of
46317721Speter	   alternatives.  */
46417721Speter  push_dummy_failure,
46517721Speter
46666525Speter	/* Followed by two-byte relative address and two-byte number n.
46766525Speter	   After matching N times, jump to the address upon failure.  */
46817721Speter  succeed_n,
46917721Speter
47066525Speter	/* Followed by two-byte relative address, and two-byte number n.
47166525Speter	   Jump to the address N times, then fail.  */
47217721Speter  jump_n,
47317721Speter
47466525Speter	/* Set the following two-byte relative address to the
47566525Speter	   subsequent two-byte number.	The address *includes* the two
47666525Speter	   bytes of number.  */
47717721Speter  set_number_at,
47817721Speter
47917721Speter  wordchar,	/* Matches any word-constituent character.  */
48017721Speter  notwordchar,	/* Matches any char that is not a word-constituent.  */
48117721Speter
48217721Speter  wordbeg,	/* Succeeds if at word beginning.  */
48317721Speter  wordend,	/* Succeeds if at word end.  */
48417721Speter
48517721Speter  wordbound,	/* Succeeds if at a word boundary.  */
48666525Speter  notwordbound	/* Succeeds if not at a word boundary.	*/
48717721Speter
48817721Speter#ifdef emacs
48917721Speter  ,before_dot,	/* Succeeds if before point.  */
49017721Speter  at_dot,	/* Succeeds if at point.  */
49117721Speter  after_dot,	/* Succeeds if after point.  */
49217721Speter
49317721Speter	/* Matches any character whose syntax is specified.  Followed by
49466525Speter	   a byte which contains a syntax code, e.g., Sword.  */
49517721Speter  syntaxspec,
49617721Speter
49717721Speter	/* Matches any character whose syntax is not that specified.  */
49866525Speter  notsyntaxspec,
49966525Speter
50066525Speter  /* Matches any character whose category-set contains the specified
50166525Speter     category.	The operator is followed by a byte which contains a
50266525Speter     category code (mnemonic ASCII character).	*/
50366525Speter  categoryspec,
50466525Speter
50566525Speter  /* Matches any character whose category-set does not contain the
50666525Speter     specified category.  The operator is followed by a byte which
50766525Speter     contains the category code (mnemonic ASCII character).  */
50866525Speter  notcategoryspec
50917721Speter#endif /* emacs */
51017721Speter} re_opcode_t;
51117721Speter
51217721Speter/* Common operations on the compiled pattern.  */
51317721Speter
51417721Speter/* Store NUMBER in two contiguous bytes starting at DESTINATION.  */
51517721Speter
51617721Speter#define STORE_NUMBER(destination, number)				\
51717721Speter  do {									\
51817721Speter    (destination)[0] = (number) & 0377;					\
51917721Speter    (destination)[1] = (number) >> 8;					\
52017721Speter  } while (0)
52117721Speter
52217721Speter/* Same as STORE_NUMBER, except increment DESTINATION to
52317721Speter   the byte after where the number is stored.  Therefore, DESTINATION
52417721Speter   must be an lvalue.  */
52517721Speter
52617721Speter#define STORE_NUMBER_AND_INCR(destination, number)			\
52717721Speter  do {									\
52817721Speter    STORE_NUMBER (destination, number);					\
52917721Speter    (destination) += 2;							\
53017721Speter  } while (0)
53117721Speter
53217721Speter/* Put into DESTINATION a number stored in two contiguous bytes starting
53317721Speter   at SOURCE.  */
53417721Speter
53517721Speter#define EXTRACT_NUMBER(destination, source)				\
53617721Speter  do {									\
53717721Speter    (destination) = *(source) & 0377;					\
53817721Speter    (destination) += SIGN_EXTEND_CHAR (*((source) + 1)) << 8;		\
53917721Speter  } while (0)
54017721Speter
54117721Speter#ifdef DEBUG
54217721Speterstatic void
54317721Speterextract_number (dest, source)
54417721Speter    int *dest;
54517721Speter    unsigned char *source;
54617721Speter{
54725839Speter  int temp = SIGN_EXTEND_CHAR (*(source + 1));
54817721Speter  *dest = *source & 0377;
54917721Speter  *dest += temp << 8;
55017721Speter}
55117721Speter
55266525Speter#ifndef EXTRACT_MACROS /* To debug the macros.	*/
55317721Speter#undef EXTRACT_NUMBER
55417721Speter#define EXTRACT_NUMBER(dest, src) extract_number (&dest, src)
55517721Speter#endif /* not EXTRACT_MACROS */
55617721Speter
55717721Speter#endif /* DEBUG */
55817721Speter
55917721Speter/* Same as EXTRACT_NUMBER, except increment SOURCE to after the number.
56017721Speter   SOURCE must be an lvalue.  */
56117721Speter
56217721Speter#define EXTRACT_NUMBER_AND_INCR(destination, source)			\
56317721Speter  do {									\
56417721Speter    EXTRACT_NUMBER (destination, source);				\
56566525Speter    (source) += 2;							\
56617721Speter  } while (0)
56717721Speter
56817721Speter#ifdef DEBUG
56917721Speterstatic void
57017721Speterextract_number_and_incr (destination, source)
57117721Speter    int *destination;
57217721Speter    unsigned char **source;
57325839Speter{
57417721Speter  extract_number (destination, *source);
57517721Speter  *source += 2;
57617721Speter}
57717721Speter
57817721Speter#ifndef EXTRACT_MACROS
57917721Speter#undef EXTRACT_NUMBER_AND_INCR
58017721Speter#define EXTRACT_NUMBER_AND_INCR(dest, src) \
58117721Speter  extract_number_and_incr (&dest, &src)
58217721Speter#endif /* not EXTRACT_MACROS */
58317721Speter
58417721Speter#endif /* DEBUG */
58517721Speter
58666525Speter/* Store a multibyte character in three contiguous bytes starting
58766525Speter   DESTINATION, and increment DESTINATION to the byte after where the
58866525Speter   character is stored.	 Therefore, DESTINATION must be an lvalue.  */
58966525Speter
59066525Speter#define STORE_CHARACTER_AND_INCR(destination, character)	\
59166525Speter  do {								\
59266525Speter    (destination)[0] = (character) & 0377;			\
59366525Speter    (destination)[1] = ((character) >> 8) & 0377;		\
59466525Speter    (destination)[2] = (character) >> 16;			\
59566525Speter    (destination) += 3;						\
59666525Speter  } while (0)
59766525Speter
59866525Speter/* Put into DESTINATION a character stored in three contiguous bytes
59966525Speter   starting at SOURCE.	*/
60066525Speter
60166525Speter#define EXTRACT_CHARACTER(destination, source)	\
60266525Speter  do {						\
60366525Speter    (destination) = ((source)[0]		\
60466525Speter		     | ((source)[1] << 8)	\
60566525Speter		     | ((source)[2] << 16));	\
60666525Speter  } while (0)
60766525Speter
60866525Speter
60966525Speter/* Macros for charset. */
61066525Speter
61166525Speter/* Size of bitmap of charset P in bytes.  P is a start of charset,
61266525Speter   i.e. *P is (re_opcode_t) charset or (re_opcode_t) charset_not.  */
61366525Speter#define CHARSET_BITMAP_SIZE(p) ((p)[1] & 0x7F)
61466525Speter
61566525Speter/* Nonzero if charset P has range table.  */
61666525Speter#define CHARSET_RANGE_TABLE_EXISTS_P(p)	 ((p)[1] & 0x80)
61766525Speter
61866525Speter/* Return the address of range table of charset P.  But not the start
61966525Speter   of table itself, but the before where the number of ranges is
62066525Speter   stored.  `2 +' means to skip re_opcode_t and size of bitmap.	 */
62166525Speter#define CHARSET_RANGE_TABLE(p) (&(p)[2 + CHARSET_BITMAP_SIZE (p)])
62266525Speter
62366525Speter/* Test if C is listed in the bitmap of charset P.  */
62466525Speter#define CHARSET_LOOKUP_BITMAP(p, c)				\
62566525Speter  ((c) < CHARSET_BITMAP_SIZE (p) * BYTEWIDTH			\
62666525Speter   && (p)[2 + (c) / BYTEWIDTH] & (1 << ((c) % BYTEWIDTH)))
62766525Speter
62866525Speter/* Return the address of end of RANGE_TABLE.  COUNT is number of
62966525Speter   ranges (which is a pair of (start, end)) in the RANGE_TABLE.	 `* 2'
63066525Speter   is start of range and end of range.	`* 3' is size of each start
63166525Speter   and end.  */
63266525Speter#define CHARSET_RANGE_TABLE_END(range_table, count)	\
63366525Speter  ((range_table) + (count) * 2 * 3)
63466525Speter
63566525Speter/* Test if C is in RANGE_TABLE.	 A flag NOT is negated if C is in.
63666525Speter   COUNT is number of ranges in RANGE_TABLE.  */
63766525Speter#define CHARSET_LOOKUP_RANGE_TABLE_RAW(not, c, range_table, count)	\
63866525Speter  do									\
63966525Speter    {									\
64066525Speter      int range_start, range_end;					\
64166525Speter      unsigned char *p;							\
64266525Speter      unsigned char *range_table_end					\
64366525Speter	= CHARSET_RANGE_TABLE_END ((range_table), (count));		\
64466525Speter									\
64566525Speter      for (p = (range_table); p < range_table_end; p += 2 * 3)		\
64666525Speter	{								\
64766525Speter	  EXTRACT_CHARACTER (range_start, p);				\
64866525Speter	  EXTRACT_CHARACTER (range_end, p + 3);				\
64966525Speter									\
65066525Speter	  if (range_start <= (c) && (c) <= range_end)			\
65166525Speter	    {								\
65266525Speter	      (not) = !(not);						\
65366525Speter	      break;							\
65466525Speter	    }								\
65566525Speter	}								\
65666525Speter    }									\
65766525Speter  while (0)
65866525Speter
65966525Speter/* Test if C is in range table of CHARSET.  The flag NOT is negated if
66066525Speter   C is listed in it.  */
66166525Speter#define CHARSET_LOOKUP_RANGE_TABLE(not, c, charset)			\
66266525Speter  do									\
66366525Speter    {									\
66466525Speter      /* Number of ranges in range table. */				\
66566525Speter      int count;							\
66666525Speter      unsigned char *range_table = CHARSET_RANGE_TABLE (charset);	\
66766525Speter									\
66866525Speter      EXTRACT_NUMBER_AND_INCR (count, range_table);			\
66966525Speter      CHARSET_LOOKUP_RANGE_TABLE_RAW ((not), (c), range_table, count);	\
67066525Speter    }									\
67166525Speter  while (0)
67266525Speter
67317721Speter/* If DEBUG is defined, Regex prints many voluminous messages about what
67417721Speter   it is doing (if the variable `debug' is nonzero).  If linked with the
67517721Speter   main program in `iregex.c', you can enter patterns and strings
67617721Speter   interactively.  And if linked with the main program in `main.c' and
67766525Speter   the other test files, you can run the already-written tests.	 */
67817721Speter
67917721Speter#ifdef DEBUG
68017721Speter
68117721Speter/* We use standard I/O for debugging.  */
68217721Speter#include <stdio.h>
68317721Speter
68417721Speter/* It is useful to test things that ``must'' be true when debugging.  */
68517721Speter#include <assert.h>
68617721Speter
68717721Speterstatic int debug = 0;
68817721Speter
68917721Speter#define DEBUG_STATEMENT(e) e
69017721Speter#define DEBUG_PRINT1(x) if (debug) printf (x)
69117721Speter#define DEBUG_PRINT2(x1, x2) if (debug) printf (x1, x2)
69217721Speter#define DEBUG_PRINT3(x1, x2, x3) if (debug) printf (x1, x2, x3)
69317721Speter#define DEBUG_PRINT4(x1, x2, x3, x4) if (debug) printf (x1, x2, x3, x4)
69466525Speter#define DEBUG_PRINT_COMPILED_PATTERN(p, s, e)				\
69517721Speter  if (debug) print_partial_compiled_pattern (s, e)
69617721Speter#define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)			\
69717721Speter  if (debug) print_double_string (w, s1, sz1, s2, sz2)
69817721Speter
69917721Speter
70017721Speter/* Print the fastmap in human-readable form.  */
70117721Speter
70217721Spetervoid
70317721Speterprint_fastmap (fastmap)
70417721Speter    char *fastmap;
70517721Speter{
70617721Speter  unsigned was_a_range = 0;
70725839Speter  unsigned i = 0;
70825839Speter
70917721Speter  while (i < (1 << BYTEWIDTH))
71017721Speter    {
71117721Speter      if (fastmap[i++])
71217721Speter	{
71317721Speter	  was_a_range = 0;
71466525Speter	  putchar (i - 1);
71566525Speter	  while (i < (1 << BYTEWIDTH)  &&  fastmap[i])
71666525Speter	    {
71766525Speter	      was_a_range = 1;
71866525Speter	      i++;
71966525Speter	    }
72017721Speter	  if (was_a_range)
72166525Speter	    {
72266525Speter	      printf ("-");
72366525Speter	      putchar (i - 1);
72466525Speter	    }
72566525Speter	}
72617721Speter    }
72725839Speter  putchar ('\n');
72817721Speter}
72917721Speter
73017721Speter
73117721Speter/* Print a compiled pattern string in human-readable form, starting at
73217721Speter   the START pointer into it and ending just before the pointer END.  */
73317721Speter
73417721Spetervoid
73517721Speterprint_partial_compiled_pattern (start, end)
73617721Speter    unsigned char *start;
73717721Speter    unsigned char *end;
73817721Speter{
73917721Speter  int mcnt, mcnt2;
74017721Speter  unsigned char *p = start;
74117721Speter  unsigned char *pend = end;
74217721Speter
74317721Speter  if (start == NULL)
74417721Speter    {
74517721Speter      printf ("(null)\n");
74617721Speter      return;
74717721Speter    }
74825839Speter
74917721Speter  /* Loop over pattern commands.  */
75017721Speter  while (p < pend)
75117721Speter    {
75266525Speter      printf ("%d:\t", p - start);
75366525Speter
75417721Speter      switch ((re_opcode_t) *p++)
75517721Speter	{
75666525Speter	case no_op:
75766525Speter	  printf ("/no_op");
75866525Speter	  break;
75917721Speter
76017721Speter	case exactn:
76117721Speter	  mcnt = *p++;
76266525Speter	  printf ("/exactn/%d", mcnt);
76366525Speter	  do
76417721Speter	    {
76566525Speter	      putchar ('/');
76666525Speter	      putchar (*p++);
76766525Speter	    }
76866525Speter	  while (--mcnt);
76966525Speter	  break;
77017721Speter
77117721Speter	case start_memory:
77266525Speter	  mcnt = *p++;
77366525Speter	  printf ("/start_memory/%d/%d", mcnt, *p++);
77466525Speter	  break;
77517721Speter
77617721Speter	case stop_memory:
77766525Speter	  mcnt = *p++;
77817721Speter	  printf ("/stop_memory/%d/%d", mcnt, *p++);
77966525Speter	  break;
78017721Speter
78117721Speter	case duplicate:
78217721Speter	  printf ("/duplicate/%d", *p++);
78317721Speter	  break;
78417721Speter
78517721Speter	case anychar:
78617721Speter	  printf ("/anychar");
78717721Speter	  break;
78817721Speter
78917721Speter	case charset:
79066525Speter	case charset_not:
79166525Speter	  {
79266525Speter	    register int c, last = -100;
79366525Speter	    register int in_range = 0;
79417721Speter
79566525Speter	    printf ("/charset [%s",
79666525Speter		    (re_opcode_t) *(p - 1) == charset_not ? "^" : "");
79717721Speter
79866525Speter	    assert (p + *p < pend);
79917721Speter
80066525Speter	    for (c = 0; c < 256; c++)
80166525Speter	      if (c / 8 < *p
80266525Speter		  && (p[1 + (c/8)] & (1 << (c % 8))))
80366525Speter		{
80466525Speter		  /* Are we starting a range?  */
80566525Speter		  if (last + 1 == c && ! in_range)
80666525Speter		    {
80766525Speter		      putchar ('-');
80866525Speter		      in_range = 1;
80966525Speter		    }
81066525Speter		  /* Have we broken a range?  */
81166525Speter		  else if (last + 1 != c && in_range)
81266525Speter	      {
81366525Speter		      putchar (last);
81466525Speter		      in_range = 0;
81566525Speter		    }
81666525Speter
81766525Speter		  if (! in_range)
81866525Speter		    putchar (c);
81966525Speter
82066525Speter		  last = c;
82166525Speter	      }
82266525Speter
82366525Speter	    if (in_range)
82466525Speter	      putchar (last);
82566525Speter
82666525Speter	    putchar (']');
82766525Speter
82817721Speter	    p += 1 + *p;
82917721Speter	  }
83066525Speter	  break;
83117721Speter
83217721Speter	case begline:
83317721Speter	  printf ("/begline");
83466525Speter	  break;
83517721Speter
83617721Speter	case endline:
83766525Speter	  printf ("/endline");
83866525Speter	  break;
83917721Speter
84017721Speter	case on_failure_jump:
84166525Speter	  extract_number_and_incr (&mcnt, &p);
84266525Speter	  printf ("/on_failure_jump to %d", p + mcnt - start);
84366525Speter	  break;
84417721Speter
84517721Speter	case on_failure_keep_string_jump:
84666525Speter	  extract_number_and_incr (&mcnt, &p);
84766525Speter	  printf ("/on_failure_keep_string_jump to %d", p + mcnt - start);
84866525Speter	  break;
84917721Speter
85017721Speter	case dummy_failure_jump:
85166525Speter	  extract_number_and_incr (&mcnt, &p);
85266525Speter	  printf ("/dummy_failure_jump to %d", p + mcnt - start);
85366525Speter	  break;
85417721Speter
85517721Speter	case push_dummy_failure:
85666525Speter	  printf ("/push_dummy_failure");
85717721Speter	  break;
85817721Speter
85966525Speter	case maybe_pop_jump:
86017721Speter	  extract_number_and_incr (&mcnt, &p);
86166525Speter	  printf ("/maybe_pop_jump to %d", p + mcnt - start);
86266525Speter	  break;
86366525Speter
86466525Speter	case pop_failure_jump:
86517721Speter	  extract_number_and_incr (&mcnt, &p);
86666525Speter	  printf ("/pop_failure_jump to %d", p + mcnt - start);
86766525Speter	  break;
86866525Speter
86966525Speter	case jump_past_alt:
87017721Speter	  extract_number_and_incr (&mcnt, &p);
87166525Speter	  printf ("/jump_past_alt to %d", p + mcnt - start);
87217721Speter	  break;
87317721Speter
87466525Speter	case jump:
87566525Speter	  extract_number_and_incr (&mcnt, &p);
87666525Speter	  printf ("/jump to %d", p + mcnt - start);
87766525Speter	  break;
87825839Speter
87966525Speter	case succeed_n:
88066525Speter	  extract_number_and_incr (&mcnt, &p);
88166525Speter	  extract_number_and_incr (&mcnt2, &p);
88266525Speter	  printf ("/succeed_n to %d, %d times", p + mcnt - start, mcnt2);
88366525Speter	  break;
88425839Speter
88566525Speter	case jump_n:
88666525Speter	  extract_number_and_incr (&mcnt, &p);
88766525Speter	  extract_number_and_incr (&mcnt2, &p);
88866525Speter	  printf ("/jump_n to %d, %d times", p + mcnt - start, mcnt2);
88966525Speter	  break;
89025839Speter
89166525Speter	case set_number_at:
89266525Speter	  extract_number_and_incr (&mcnt, &p);
89366525Speter	  extract_number_and_incr (&mcnt2, &p);
89466525Speter	  printf ("/set_number_at location %d to %d", p + mcnt - start, mcnt2);
89566525Speter	  break;
89666525Speter
89766525Speter	case wordbound:
89817721Speter	  printf ("/wordbound");
89917721Speter	  break;
90017721Speter
90117721Speter	case notwordbound:
90217721Speter	  printf ("/notwordbound");
90366525Speter	  break;
90417721Speter
90517721Speter	case wordbeg:
90617721Speter	  printf ("/wordbeg");
90717721Speter	  break;
90825839Speter
90917721Speter	case wordend:
91017721Speter	  printf ("/wordend");
91125839Speter
91217721Speter#ifdef emacs
91317721Speter	case before_dot:
91417721Speter	  printf ("/before_dot");
91566525Speter	  break;
91617721Speter
91717721Speter	case at_dot:
91817721Speter	  printf ("/at_dot");
91966525Speter	  break;
92017721Speter
92117721Speter	case after_dot:
92217721Speter	  printf ("/after_dot");
92366525Speter	  break;
92417721Speter
92517721Speter	case syntaxspec:
92666525Speter	  printf ("/syntaxspec");
92717721Speter	  mcnt = *p++;
92817721Speter	  printf ("/%d", mcnt);
92966525Speter	  break;
93025839Speter
93117721Speter	case notsyntaxspec:
93266525Speter	  printf ("/notsyntaxspec");
93317721Speter	  mcnt = *p++;
93417721Speter	  printf ("/%d", mcnt);
93517721Speter	  break;
93617721Speter#endif /* emacs */
93717721Speter
93817721Speter	case wordchar:
93917721Speter	  printf ("/wordchar");
94066525Speter	  break;
94125839Speter
94217721Speter	case notwordchar:
94317721Speter	  printf ("/notwordchar");
94466525Speter	  break;
94517721Speter
94617721Speter	case begbuf:
94717721Speter	  printf ("/begbuf");
94866525Speter	  break;
94917721Speter
95017721Speter	case endbuf:
95117721Speter	  printf ("/endbuf");
95266525Speter	  break;
95317721Speter
95466525Speter	default:
95566525Speter	  printf ("?%d", *(p-1));
95617721Speter	}
95766525Speter
95866525Speter      putchar ('\n');
95917721Speter    }
96066525Speter
96166525Speter  printf ("%d:\tend of pattern.\n", p - start);
96217721Speter}
96317721Speter
96417721Speter
96517721Spetervoid
96617721Speterprint_compiled_pattern (bufp)
96717721Speter    struct re_pattern_buffer *bufp;
96817721Speter{
96917721Speter  unsigned char *buffer = bufp->buffer;
97017721Speter
97117721Speter  print_partial_compiled_pattern (buffer, buffer + bufp->used);
97217721Speter  printf ("%d bytes used/%d bytes allocated.\n", bufp->used, bufp->allocated);
97317721Speter
97417721Speter  if (bufp->fastmap_accurate && bufp->fastmap)
97517721Speter    {
97617721Speter      printf ("fastmap: ");
97717721Speter      print_fastmap (bufp->fastmap);
97817721Speter    }
97917721Speter
98017721Speter  printf ("re_nsub: %d\t", bufp->re_nsub);
98117721Speter  printf ("regs_alloc: %d\t", bufp->regs_allocated);
98217721Speter  printf ("can_be_null: %d\t", bufp->can_be_null);
98317721Speter  printf ("newline_anchor: %d\n", bufp->newline_anchor);
98417721Speter  printf ("no_sub: %d\t", bufp->no_sub);
98517721Speter  printf ("not_bol: %d\t", bufp->not_bol);
98617721Speter  printf ("not_eol: %d\t", bufp->not_eol);
98717721Speter  printf ("syntax: %d\n", bufp->syntax);
98817721Speter  /* Perhaps we should print the translate table?  */
98917721Speter}
99017721Speter
99117721Speter
99217721Spetervoid
99317721Speterprint_double_string (where, string1, size1, string2, size2)
99417721Speter    const char *where;
99517721Speter    const char *string1;
99617721Speter    const char *string2;
99717721Speter    int size1;
99817721Speter    int size2;
99917721Speter{
100017721Speter  unsigned this_char;
100125839Speter
100217721Speter  if (where == NULL)
100317721Speter    printf ("(null)");
100417721Speter  else
100517721Speter    {
100617721Speter      if (FIRST_STRING_P (where))
100766525Speter	{
100866525Speter	  for (this_char = where - string1; this_char < size1; this_char++)
100966525Speter	    putchar (string1[this_char]);
101017721Speter
101166525Speter	  where = string2;
101266525Speter	}
101317721Speter
101417721Speter      for (this_char = where - string2; this_char < size2; this_char++)
101566525Speter	putchar (string2[this_char]);
101617721Speter    }
101717721Speter}
101817721Speter
101917721Speter#else /* not DEBUG */
102017721Speter
102117721Speter#undef assert
102217721Speter#define assert(e)
102317721Speter
102417721Speter#define DEBUG_STATEMENT(e)
102517721Speter#define DEBUG_PRINT1(x)
102617721Speter#define DEBUG_PRINT2(x1, x2)
102717721Speter#define DEBUG_PRINT3(x1, x2, x3)
102817721Speter#define DEBUG_PRINT4(x1, x2, x3, x4)
102917721Speter#define DEBUG_PRINT_COMPILED_PATTERN(p, s, e)
103017721Speter#define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)
103117721Speter
103217721Speter#endif /* not DEBUG */
103317721Speter
103417721Speter/* Set by `re_set_syntax' to the current regexp syntax to recognize.  Can
103517721Speter   also be assigned to arbitrarily: each pattern buffer stores its own
103617721Speter   syntax, so it can be changed between regex compilations.  */
103766525Speter/* This has no initializer because initialized variables in Emacs
103866525Speter   become read-only after dumping.  */
103966525Speterreg_syntax_t re_syntax_options;
104017721Speter
104117721Speter
104217721Speter/* Specify the precise syntax of regexps for compilation.  This provides
104317721Speter   for compatibility for various utilities which historically have
104417721Speter   different, incompatible syntaxes.
104517721Speter
104617721Speter   The argument SYNTAX is a bit mask comprised of the various bits
104766525Speter   defined in regex.h.	We return the old syntax.  */
104817721Speter
104917721Speterreg_syntax_t
105017721Speterre_set_syntax (syntax)
105117721Speter    reg_syntax_t syntax;
105217721Speter{
105317721Speter  reg_syntax_t ret = re_syntax_options;
105425839Speter
105517721Speter  re_syntax_options = syntax;
105617721Speter  return ret;
105717721Speter}
105817721Speter
105917721Speter/* This table gives an error message for each of the error codes listed
106066525Speter   in regex.h.	Obviously the order here has to be same as there.
106166525Speter   POSIX doesn't require that we do anything for REG_NOERROR,
106266525Speter   but why not be nice?	 */
106317721Speter
106466525Speterstatic const char *re_error_msgid[] =
106566525Speter  {
106666525Speter    gettext_noop ("Success"),	/* REG_NOERROR */
106766525Speter    gettext_noop ("No match"),	/* REG_NOMATCH */
106866525Speter    gettext_noop ("Invalid regular expression"), /* REG_BADPAT */
106966525Speter    gettext_noop ("Invalid collation character"), /* REG_ECOLLATE */
107066525Speter    gettext_noop ("Invalid character class name"), /* REG_ECTYPE */
107166525Speter    gettext_noop ("Trailing backslash"), /* REG_EESCAPE */
107266525Speter    gettext_noop ("Invalid back reference"), /* REG_ESUBREG */
107366525Speter    gettext_noop ("Unmatched [ or [^"),	/* REG_EBRACK */
107466525Speter    gettext_noop ("Unmatched ( or \\("), /* REG_EPAREN */
107566525Speter    gettext_noop ("Unmatched \\{"), /* REG_EBRACE */
107666525Speter    gettext_noop ("Invalid content of \\{\\}"), /* REG_BADBR */
107766525Speter    gettext_noop ("Invalid range end"),	/* REG_ERANGE */
107866525Speter    gettext_noop ("Memory exhausted"), /* REG_ESPACE */
107966525Speter    gettext_noop ("Invalid preceding regular expression"), /* REG_BADRPT */
108066525Speter    gettext_noop ("Premature end of regular expression"), /* REG_EEND */
108166525Speter    gettext_noop ("Regular expression too big"), /* REG_ESIZE */
108266525Speter    gettext_noop ("Unmatched ) or \\)"), /* REG_ERPAREN */
108317721Speter  };
108417721Speter
108566525Speter/* Avoiding alloca during matching, to placate r_alloc.	 */
108666525Speter
108766525Speter/* Define MATCH_MAY_ALLOCATE unless we need to make sure that the
108866525Speter   searching and matching functions should not call alloca.  On some
108966525Speter   systems, alloca is implemented in terms of malloc, and if we're
109066525Speter   using the relocating allocator routines, then malloc could cause a
109166525Speter   relocation, which might (if the strings being searched are in the
109266525Speter   ralloc heap) shift the data out from underneath the regexp
109366525Speter   routines.
109466525Speter
109566525Speter   Here's another reason to avoid allocation: Emacs
109666525Speter   processes input from X in a signal handler; processing X input may
109766525Speter   call malloc; if input arrives while a matching routine is calling
109866525Speter   malloc, then we're scrod.  But Emacs can't just block input while
109966525Speter   calling matching routines; then we don't notice interrupts when
110066525Speter   they come in.  So, Emacs blocks input around all regexp calls
110166525Speter   except the matching calls, which it leaves unprotected, in the
110266525Speter   faith that they will not malloc.  */
110366525Speter
110466525Speter/* Normally, this is fine.  */
110566525Speter#define MATCH_MAY_ALLOCATE
110666525Speter
110766525Speter/* When using GNU C, we are not REALLY using the C alloca, no matter
110866525Speter   what config.h may say.  So don't take precautions for it.  */
110966525Speter#ifdef __GNUC__
111066525Speter#undef C_ALLOCA
111166525Speter#endif
111266525Speter
111366525Speter/* The match routines may not allocate if (1) they would do it with malloc
111466525Speter   and (2) it's not safe for them to use malloc.
111566525Speter   Note that if REL_ALLOC is defined, matching would not use malloc for the
111666525Speter   failure stack, but we would still use it for the register vectors;
111766525Speter   so REL_ALLOC should not affect this.	 */
111866525Speter#if (defined (C_ALLOCA) || defined (REGEX_MALLOC)) && defined (emacs)
111966525Speter#undef MATCH_MAY_ALLOCATE
112066525Speter#endif
112166525Speter
112266525Speter
112366525Speter/* Failure stack declarations and macros; both re_compile_fastmap and
112466525Speter   re_match_2 use a failure stack.  These have to be macros because of
112566525Speter   REGEX_ALLOCATE_STACK.  */
112666525Speter
112766525Speter
112866525Speter/* Approximate number of failure points for which to initially allocate space
112966525Speter   when matching.  If this number is exceeded, we allocate more
113066525Speter   space, so it is not a hard limit.  */
113166525Speter#ifndef INIT_FAILURE_ALLOC
113266525Speter#define INIT_FAILURE_ALLOC 20
113366525Speter#endif
113466525Speter
113566525Speter/* Roughly the maximum number of failure points on the stack.  Would be
113666525Speter   exactly that if always used TYPICAL_FAILURE_SIZE items each time we failed.
113766525Speter   This is a variable only so users of regex can assign to it; we never
113866525Speter   change it ourselves.	 */
113966525Speter#if defined (MATCH_MAY_ALLOCATE)
114066525Speter/* Note that 4400 is enough to cause a crash on Alpha OSF/1,
114166525Speter   whose default stack limit is 2mb.  In order for a larger
114266525Speter   value to work reliably, you have to try to make it accord
114366525Speter   with the process stack limit.  */
114466525Speterint re_max_failures = 40000;
114566525Speter#else
114666525Speterint re_max_failures = 4000;
114766525Speter#endif
114866525Speter
114966525Speterunion fail_stack_elt
115066525Speter{
115166525Speter  unsigned char *pointer;
115266525Speter  int integer;
115366525Speter};
115466525Speter
115566525Spetertypedef union fail_stack_elt fail_stack_elt_t;
115666525Speter
115766525Spetertypedef struct
115866525Speter{
115966525Speter  fail_stack_elt_t *stack;
116066525Speter  unsigned size;
116166525Speter  unsigned avail;			/* Offset of next open position.  */
116266525Speter} fail_stack_type;
116366525Speter
116466525Speter#define FAIL_STACK_EMPTY()     (fail_stack.avail == 0)
116566525Speter#define FAIL_STACK_PTR_EMPTY() (fail_stack_ptr->avail == 0)
116666525Speter#define FAIL_STACK_FULL()      (fail_stack.avail == fail_stack.size)
116766525Speter
116866525Speter
116966525Speter/* Define macros to initialize and free the failure stack.
117066525Speter   Do `return -2' if the alloc fails.  */
117166525Speter
117266525Speter#ifdef MATCH_MAY_ALLOCATE
117366525Speter#define INIT_FAIL_STACK()						\
117466525Speter  do {									\
117566525Speter    fail_stack.stack = (fail_stack_elt_t *)				\
117666525Speter      REGEX_ALLOCATE_STACK (INIT_FAILURE_ALLOC * TYPICAL_FAILURE_SIZE	\
117766525Speter			    * sizeof (fail_stack_elt_t));		\
117866525Speter									\
117966525Speter    if (fail_stack.stack == NULL)					\
118066525Speter      return -2;							\
118166525Speter									\
118266525Speter    fail_stack.size = INIT_FAILURE_ALLOC;				\
118366525Speter    fail_stack.avail = 0;						\
118466525Speter  } while (0)
118566525Speter
118666525Speter#define RESET_FAIL_STACK()  REGEX_FREE_STACK (fail_stack.stack)
118766525Speter#else
118866525Speter#define INIT_FAIL_STACK()						\
118966525Speter  do {									\
119066525Speter    fail_stack.avail = 0;						\
119166525Speter  } while (0)
119266525Speter
119366525Speter#define RESET_FAIL_STACK()
119466525Speter#endif
119566525Speter
119666525Speter
119766525Speter/* Double the size of FAIL_STACK, up to a limit
119866525Speter   which allows approximately `re_max_failures' items.
119966525Speter
120066525Speter   Return 1 if succeeds, and 0 if either ran out of memory
120166525Speter   allocating space for it or it was already too large.
120266525Speter
120366525Speter   REGEX_REALLOCATE_STACK requires `destination' be declared.	*/
120466525Speter
120566525Speter/* Factor to increase the failure stack size by
120666525Speter   when we increase it.
120766525Speter   This used to be 2, but 2 was too wasteful
120866525Speter   because the old discarded stacks added up to as much space
120966525Speter   were as ultimate, maximum-size stack.  */
121066525Speter#define FAIL_STACK_GROWTH_FACTOR 4
121166525Speter
121266525Speter#define GROW_FAIL_STACK(fail_stack)					\
121366525Speter  (((fail_stack).size * sizeof (fail_stack_elt_t)			\
121466525Speter    >= re_max_failures * TYPICAL_FAILURE_SIZE)				\
121566525Speter   ? 0									\
121666525Speter   : ((fail_stack).stack						\
121766525Speter      = (fail_stack_elt_t *)						\
121866525Speter	REGEX_REALLOCATE_STACK ((fail_stack).stack,			\
121966525Speter	  (fail_stack).size * sizeof (fail_stack_elt_t),		\
122066525Speter	  MIN (re_max_failures * TYPICAL_FAILURE_SIZE,			\
122166525Speter	       ((fail_stack).size * sizeof (fail_stack_elt_t)		\
122266525Speter		* FAIL_STACK_GROWTH_FACTOR))),				\
122366525Speter									\
122466525Speter      (fail_stack).stack == NULL					\
122566525Speter      ? 0								\
122666525Speter      : ((fail_stack).size						\
122766525Speter	 = (MIN (re_max_failures * TYPICAL_FAILURE_SIZE,		\
122866525Speter		 ((fail_stack).size * sizeof (fail_stack_elt_t)		\
122966525Speter		  * FAIL_STACK_GROWTH_FACTOR))				\
123066525Speter	    / sizeof (fail_stack_elt_t)),				\
123166525Speter	 1)))
123266525Speter
123366525Speter
123466525Speter/* Push pointer POINTER on FAIL_STACK.
123566525Speter   Return 1 if was able to do so and 0 if ran out of memory allocating
123666525Speter   space to do so.  */
123766525Speter#define PUSH_PATTERN_OP(POINTER, FAIL_STACK)				\
123866525Speter  ((FAIL_STACK_FULL ()							\
123966525Speter    && !GROW_FAIL_STACK (FAIL_STACK))					\
124066525Speter   ? 0									\
124166525Speter   : ((FAIL_STACK).stack[(FAIL_STACK).avail++].pointer = POINTER,	\
124266525Speter      1))
124366525Speter
124466525Speter/* Push a pointer value onto the failure stack.
124566525Speter   Assumes the variable `fail_stack'.  Probably should only
124666525Speter   be called from within `PUSH_FAILURE_POINT'.	*/
124766525Speter#define PUSH_FAILURE_POINTER(item)					\
124866525Speter  fail_stack.stack[fail_stack.avail++].pointer = (unsigned char *) (item)
124966525Speter
125066525Speter/* This pushes an integer-valued item onto the failure stack.
125166525Speter   Assumes the variable `fail_stack'.  Probably should only
125266525Speter   be called from within `PUSH_FAILURE_POINT'.	*/
125366525Speter#define PUSH_FAILURE_INT(item)					\
125466525Speter  fail_stack.stack[fail_stack.avail++].integer = (item)
125566525Speter
125666525Speter/* Push a fail_stack_elt_t value onto the failure stack.
125766525Speter   Assumes the variable `fail_stack'.  Probably should only
125866525Speter   be called from within `PUSH_FAILURE_POINT'.	*/
125966525Speter#define PUSH_FAILURE_ELT(item)					\
126066525Speter  fail_stack.stack[fail_stack.avail++] =  (item)
126166525Speter
126266525Speter/* These three POP... operations complement the three PUSH... operations.
126366525Speter   All assume that `fail_stack' is nonempty.  */
126466525Speter#define POP_FAILURE_POINTER() fail_stack.stack[--fail_stack.avail].pointer
126566525Speter#define POP_FAILURE_INT() fail_stack.stack[--fail_stack.avail].integer
126666525Speter#define POP_FAILURE_ELT() fail_stack.stack[--fail_stack.avail]
126766525Speter
126866525Speter/* Used to omit pushing failure point id's when we're not debugging.  */
126966525Speter#ifdef DEBUG
127066525Speter#define DEBUG_PUSH PUSH_FAILURE_INT
127166525Speter#define DEBUG_POP(item_addr) *(item_addr) = POP_FAILURE_INT ()
127266525Speter#else
127366525Speter#define DEBUG_PUSH(item)
127466525Speter#define DEBUG_POP(item_addr)
127566525Speter#endif
127666525Speter
127766525Speter
127866525Speter/* Push the information about the state we will need
127966525Speter   if we ever fail back to it.
128066525Speter
128166525Speter   Requires variables fail_stack, regstart, regend, reg_info, and
128266525Speter   num_regs be declared.  GROW_FAIL_STACK requires `destination' be
128366525Speter   declared.
128466525Speter
128566525Speter   Does `return FAILURE_CODE' if runs out of memory.  */
128666525Speter
128766525Speter#define PUSH_FAILURE_POINT(pattern_place, string_place, failure_code)	\
128866525Speter  do {									\
128966525Speter    char *destination;							\
129066525Speter    /* Must be int, so when we don't save any registers, the arithmetic	\
129166525Speter       of 0 + -1 isn't done as unsigned.  */				\
129266525Speter    int this_reg;							\
129366525Speter									\
129466525Speter    DEBUG_STATEMENT (failure_id++);					\
129566525Speter    DEBUG_STATEMENT (nfailure_points_pushed++);				\
129666525Speter    DEBUG_PRINT2 ("\nPUSH_FAILURE_POINT #%u:\n", failure_id);		\
129766525Speter    DEBUG_PRINT2 ("  Before push, next avail: %d\n", (fail_stack).avail);\
129866525Speter    DEBUG_PRINT2 ("			size: %d\n", (fail_stack).size);\
129966525Speter									\
130066525Speter    DEBUG_PRINT2 ("  slots needed: %d\n", NUM_FAILURE_ITEMS);		\
130166525Speter    DEBUG_PRINT2 ("	available: %d\n", REMAINING_AVAIL_SLOTS);	\
130266525Speter									\
130366525Speter    /* Ensure we have enough space allocated for what we will push.  */	\
130466525Speter    while (REMAINING_AVAIL_SLOTS < NUM_FAILURE_ITEMS)			\
130566525Speter      {									\
130666525Speter	if (!GROW_FAIL_STACK (fail_stack))				\
130766525Speter	  return failure_code;						\
130866525Speter									\
130966525Speter	DEBUG_PRINT2 ("\n  Doubled stack; size now: %d\n",		\
131066525Speter		       (fail_stack).size);				\
131166525Speter	DEBUG_PRINT2 ("	 slots available: %d\n", REMAINING_AVAIL_SLOTS);\
131266525Speter      }									\
131366525Speter									\
131466525Speter    /* Push the info, starting with the registers.  */			\
131566525Speter    DEBUG_PRINT1 ("\n");						\
131666525Speter									\
131766525Speter    if (1)								\
131866525Speter      for (this_reg = lowest_active_reg; this_reg <= highest_active_reg; \
131966525Speter	   this_reg++)							\
132066525Speter	{								\
132166525Speter	  DEBUG_PRINT2 ("  Pushing reg: %d\n", this_reg);		\
132266525Speter	  DEBUG_STATEMENT (num_regs_pushed++);				\
132366525Speter									\
132466525Speter	  DEBUG_PRINT2 ("    start: 0x%x\n", regstart[this_reg]);	\
132566525Speter	  PUSH_FAILURE_POINTER (regstart[this_reg]);			\
132666525Speter									\
132766525Speter	  DEBUG_PRINT2 ("    end: 0x%x\n", regend[this_reg]);		\
132866525Speter	  PUSH_FAILURE_POINTER (regend[this_reg]);			\
132966525Speter									\
133066525Speter	  DEBUG_PRINT2 ("    info: 0x%x\n      ", reg_info[this_reg]);	\
133166525Speter	  DEBUG_PRINT2 (" match_null=%d",				\
133266525Speter			REG_MATCH_NULL_STRING_P (reg_info[this_reg]));	\
133366525Speter	  DEBUG_PRINT2 (" active=%d", IS_ACTIVE (reg_info[this_reg]));	\
133466525Speter	  DEBUG_PRINT2 (" matched_something=%d",			\
133566525Speter			MATCHED_SOMETHING (reg_info[this_reg]));	\
133666525Speter	  DEBUG_PRINT2 (" ever_matched=%d",				\
133766525Speter			EVER_MATCHED_SOMETHING (reg_info[this_reg]));	\
133866525Speter	  DEBUG_PRINT1 ("\n");						\
133966525Speter	  PUSH_FAILURE_ELT (reg_info[this_reg].word);			\
134066525Speter	}								\
134166525Speter									\
134266525Speter    DEBUG_PRINT2 ("  Pushing  low active reg: %d\n", lowest_active_reg);\
134366525Speter    PUSH_FAILURE_INT (lowest_active_reg);				\
134466525Speter									\
134566525Speter    DEBUG_PRINT2 ("  Pushing high active reg: %d\n", highest_active_reg);\
134666525Speter    PUSH_FAILURE_INT (highest_active_reg);				\
134766525Speter									\
134866525Speter    DEBUG_PRINT2 ("  Pushing pattern 0x%x: ", pattern_place);		\
134966525Speter    DEBUG_PRINT_COMPILED_PATTERN (bufp, pattern_place, pend);		\
135066525Speter    PUSH_FAILURE_POINTER (pattern_place);				\
135166525Speter									\
135266525Speter    DEBUG_PRINT2 ("  Pushing string 0x%x: `", string_place);		\
135366525Speter    DEBUG_PRINT_DOUBLE_STRING (string_place, string1, size1, string2,	\
135466525Speter				 size2);				\
135566525Speter    DEBUG_PRINT1 ("'\n");						\
135666525Speter    PUSH_FAILURE_POINTER (string_place);				\
135766525Speter									\
135866525Speter    DEBUG_PRINT2 ("  Pushing failure id: %u\n", failure_id);		\
135966525Speter    DEBUG_PUSH (failure_id);						\
136066525Speter  } while (0)
136166525Speter
136266525Speter/* This is the number of items that are pushed and popped on the stack
136366525Speter   for each register.  */
136466525Speter#define NUM_REG_ITEMS  3
136566525Speter
136666525Speter/* Individual items aside from the registers.  */
136766525Speter#ifdef DEBUG
136866525Speter#define NUM_NONREG_ITEMS 5 /* Includes failure point id.  */
136966525Speter#else
137066525Speter#define NUM_NONREG_ITEMS 4
137166525Speter#endif
137266525Speter
137366525Speter/* Estimate the size of data pushed by a typical failure stack entry.
137466525Speter   An estimate is all we need, because all we use this for
137566525Speter   is to choose a limit for how big to make the failure stack.  */
137666525Speter
137766525Speter#define TYPICAL_FAILURE_SIZE 20
137866525Speter
137966525Speter/* This is how many items we actually use for a failure point.
138066525Speter   It depends on the regexp.  */
138166525Speter#define NUM_FAILURE_ITEMS				\
138266525Speter  (((0							\
138366525Speter     ? 0 : highest_active_reg - lowest_active_reg + 1)	\
138466525Speter    * NUM_REG_ITEMS)					\
138566525Speter   + NUM_NONREG_ITEMS)
138666525Speter
138766525Speter/* How many items can still be added to the stack without overflowing it.  */
138866525Speter#define REMAINING_AVAIL_SLOTS ((fail_stack).size - (fail_stack).avail)
138966525Speter
139066525Speter
139166525Speter/* Pops what PUSH_FAIL_STACK pushes.
139266525Speter
139366525Speter   We restore into the parameters, all of which should be lvalues:
139466525Speter     STR -- the saved data position.
139566525Speter     PAT -- the saved pattern position.
139666525Speter     LOW_REG, HIGH_REG -- the highest and lowest active registers.
139766525Speter     REGSTART, REGEND -- arrays of string positions.
139866525Speter     REG_INFO -- array of information about each subexpression.
139966525Speter
140066525Speter   Also assumes the variables `fail_stack' and (if debugging), `bufp',
140166525Speter   `pend', `string1', `size1', `string2', and `size2'.	*/
140266525Speter
140366525Speter#define POP_FAILURE_POINT(str, pat, low_reg, high_reg, regstart, regend, reg_info)\
140466525Speter{									\
140566525Speter  DEBUG_STATEMENT (fail_stack_elt_t failure_id;)			\
140666525Speter  int this_reg;								\
140766525Speter  const unsigned char *string_temp;					\
140866525Speter									\
140966525Speter  assert (!FAIL_STACK_EMPTY ());					\
141066525Speter									\
141166525Speter  /* Remove failure points and point to how many regs pushed.  */	\
141266525Speter  DEBUG_PRINT1 ("POP_FAILURE_POINT:\n");				\
141366525Speter  DEBUG_PRINT2 ("  Before pop, next avail: %d\n", fail_stack.avail);	\
141466525Speter  DEBUG_PRINT2 ("		     size: %d\n", fail_stack.size);	\
141566525Speter									\
141666525Speter  assert (fail_stack.avail >= NUM_NONREG_ITEMS);			\
141766525Speter									\
141866525Speter  DEBUG_POP (&failure_id);						\
141966525Speter  DEBUG_PRINT2 ("  Popping failure id: %u\n", failure_id);		\
142066525Speter									\
142166525Speter  /* If the saved string location is NULL, it came from an		\
142266525Speter     on_failure_keep_string_jump opcode, and we want to throw away the	\
142366525Speter     saved NULL, thus retaining our current position in the string.  */	\
142466525Speter  string_temp = POP_FAILURE_POINTER ();					\
142566525Speter  if (string_temp != NULL)						\
142666525Speter    str = (const char *) string_temp;					\
142766525Speter									\
142866525Speter  DEBUG_PRINT2 ("  Popping string 0x%x: `", str);			\
142966525Speter  DEBUG_PRINT_DOUBLE_STRING (str, string1, size1, string2, size2);	\
143066525Speter  DEBUG_PRINT1 ("'\n");							\
143166525Speter									\
143266525Speter  pat = (unsigned char *) POP_FAILURE_POINTER ();			\
143366525Speter  DEBUG_PRINT2 ("  Popping pattern 0x%x: ", pat);			\
143466525Speter  DEBUG_PRINT_COMPILED_PATTERN (bufp, pat, pend);			\
143566525Speter									\
143666525Speter  /* Restore register info.  */						\
143766525Speter  high_reg = (unsigned) POP_FAILURE_INT ();				\
143866525Speter  DEBUG_PRINT2 ("  Popping high active reg: %d\n", high_reg);		\
143966525Speter									\
144066525Speter  low_reg = (unsigned) POP_FAILURE_INT ();				\
144166525Speter  DEBUG_PRINT2 ("  Popping  low active reg: %d\n", low_reg);		\
144266525Speter									\
144366525Speter  if (1)								\
144466525Speter    for (this_reg = high_reg; this_reg >= low_reg; this_reg--)		\
144566525Speter      {									\
144666525Speter	DEBUG_PRINT2 ("	   Popping reg: %d\n", this_reg);		\
144766525Speter									\
144866525Speter	reg_info[this_reg].word = POP_FAILURE_ELT ();			\
144966525Speter	DEBUG_PRINT2 ("	     info: 0x%x\n", reg_info[this_reg]);	\
145066525Speter									\
145166525Speter	regend[this_reg] = (const char *) POP_FAILURE_POINTER ();	\
145266525Speter	DEBUG_PRINT2 ("	     end: 0x%x\n", regend[this_reg]);		\
145366525Speter									\
145466525Speter	regstart[this_reg] = (const char *) POP_FAILURE_POINTER ();	\
145566525Speter	DEBUG_PRINT2 ("	     start: 0x%x\n", regstart[this_reg]);	\
145666525Speter      }									\
145766525Speter  else									\
145866525Speter    {									\
145966525Speter      for (this_reg = highest_active_reg; this_reg > high_reg; this_reg--) \
146066525Speter	{								\
146166525Speter	  reg_info[this_reg].word.integer = 0;				\
146266525Speter	  regend[this_reg] = 0;						\
146366525Speter	  regstart[this_reg] = 0;					\
146466525Speter	}								\
146566525Speter      highest_active_reg = high_reg;					\
146666525Speter    }									\
146766525Speter									\
146866525Speter  set_regs_matched_done = 0;						\
146966525Speter  DEBUG_STATEMENT (nfailure_points_popped++);				\
147066525Speter} /* POP_FAILURE_POINT */
147166525Speter
147266525Speter
147366525Speter
147466525Speter/* Structure for per-register (a.k.a. per-group) information.
147566525Speter   Other register information, such as the
147666525Speter   starting and ending positions (which are addresses), and the list of
147766525Speter   inner groups (which is a bits list) are maintained in separate
147866525Speter   variables.
147966525Speter
148066525Speter   We are making a (strictly speaking) nonportable assumption here: that
148166525Speter   the compiler will pack our bit fields into something that fits into
148266525Speter   the type of `word', i.e., is something that fits into one item on the
148366525Speter   failure stack.  */
148466525Speter
148566525Spetertypedef union
148666525Speter{
148766525Speter  fail_stack_elt_t word;
148866525Speter  struct
148966525Speter  {
149066525Speter      /* This field is one if this group can match the empty string,
149166525Speter	 zero if not.  If not yet determined,  `MATCH_NULL_UNSET_VALUE'.  */
149266525Speter#define MATCH_NULL_UNSET_VALUE 3
149366525Speter    unsigned match_null_string_p : 2;
149466525Speter    unsigned is_active : 1;
149566525Speter    unsigned matched_something : 1;
149666525Speter    unsigned ever_matched_something : 1;
149766525Speter  } bits;
149866525Speter} register_info_type;
149966525Speter
150066525Speter#define REG_MATCH_NULL_STRING_P(R)  ((R).bits.match_null_string_p)
150166525Speter#define IS_ACTIVE(R)  ((R).bits.is_active)
150266525Speter#define MATCHED_SOMETHING(R)  ((R).bits.matched_something)
150366525Speter#define EVER_MATCHED_SOMETHING(R)  ((R).bits.ever_matched_something)
150466525Speter
150566525Speter
150666525Speter/* Call this when have matched a real character; it sets `matched' flags
150766525Speter   for the subexpressions which we are currently inside.  Also records
150866525Speter   that those subexprs have matched.  */
150966525Speter#define SET_REGS_MATCHED()						\
151066525Speter  do									\
151166525Speter    {									\
151266525Speter      if (!set_regs_matched_done)					\
151366525Speter	{								\
151466525Speter	  unsigned r;							\
151566525Speter	  set_regs_matched_done = 1;					\
151666525Speter	  for (r = lowest_active_reg; r <= highest_active_reg; r++)	\
151766525Speter	    {								\
151866525Speter	      MATCHED_SOMETHING (reg_info[r])				\
151966525Speter		= EVER_MATCHED_SOMETHING (reg_info[r])			\
152066525Speter		= 1;							\
152166525Speter	    }								\
152266525Speter	}								\
152366525Speter    }									\
152466525Speter  while (0)
152566525Speter
152666525Speter/* Registers are set to a sentinel when they haven't yet matched.  */
152766525Speterstatic char reg_unset_dummy;
152866525Speter#define REG_UNSET_VALUE (&reg_unset_dummy)
152966525Speter#define REG_UNSET(e) ((e) == REG_UNSET_VALUE)
153066525Speter
153117721Speter/* Subroutine declarations and macros for regex_compile.  */
153217721Speter
153317721Speterstatic void store_op1 (), store_op2 ();
153417721Speterstatic void insert_op1 (), insert_op2 ();
153517721Speterstatic boolean at_begline_loc_p (), at_endline_loc_p ();
153617721Speterstatic boolean group_in_compile_stack ();
153717721Speter
153866525Speter/* Fetch the next character in the uncompiled pattern---translating it
153917721Speter   if necessary.  Also cast from a signed character in the constant
154017721Speter   string passed to us by the user to an unsigned char that we can use
154117721Speter   as an array index (in, e.g., `translate').  */
154266525Speter#ifndef PATFETCH
154317721Speter#define PATFETCH(c)							\
154417721Speter  do {if (p == pend) return REG_EEND;					\
154517721Speter    c = (unsigned char) *p++;						\
154666525Speter    if (RE_TRANSLATE_P (translate)) c = RE_TRANSLATE (translate, c);	\
154717721Speter  } while (0)
154866525Speter#endif
154917721Speter
155017721Speter/* Fetch the next character in the uncompiled pattern, with no
155166525Speter   translation.	 */
155217721Speter#define PATFETCH_RAW(c)							\
155317721Speter  do {if (p == pend) return REG_EEND;					\
155466525Speter    c = (unsigned char) *p++;						\
155517721Speter  } while (0)
155617721Speter
155717721Speter/* Go backwards one character in the pattern.  */
155817721Speter#define PATUNFETCH p--
155917721Speter
156017721Speter
156117721Speter/* If `translate' is non-null, return translate[D], else just D.  We
156217721Speter   cast the subscript to translate because some data is declared as
156317721Speter   `char *', to avoid warnings when a string constant is passed.  But
156417721Speter   when we use a character as a subscript we must make it unsigned.  */
156566525Speter#ifndef TRANSLATE
156666525Speter#define TRANSLATE(d) \
156766525Speter  (RE_TRANSLATE_P (translate) \
156866525Speter   ? (unsigned) RE_TRANSLATE (translate, (unsigned) (d)) : (d))
156966525Speter#endif
157017721Speter
157117721Speter
157217721Speter/* Macros for outputting the compiled pattern into `buffer'.  */
157317721Speter
157417721Speter/* If the buffer isn't allocated when it comes in, use this.  */
157517721Speter#define INIT_BUF_SIZE  32
157617721Speter
157766525Speter/* Make sure we have at least N more bytes of space in buffer.	*/
157817721Speter#define GET_BUFFER_SPACE(n)						\
157917721Speter    while (b - bufp->buffer + (n) > bufp->allocated)			\
158017721Speter      EXTEND_BUFFER ()
158117721Speter
158217721Speter/* Make sure we have one more byte of buffer space and then add C to it.  */
158317721Speter#define BUF_PUSH(c)							\
158417721Speter  do {									\
158517721Speter    GET_BUFFER_SPACE (1);						\
158617721Speter    *b++ = (unsigned char) (c);						\
158717721Speter  } while (0)
158817721Speter
158917721Speter
159017721Speter/* Ensure we have two more bytes of buffer space and then append C1 and C2.  */
159117721Speter#define BUF_PUSH_2(c1, c2)						\
159217721Speter  do {									\
159317721Speter    GET_BUFFER_SPACE (2);						\
159417721Speter    *b++ = (unsigned char) (c1);					\
159517721Speter    *b++ = (unsigned char) (c2);					\
159617721Speter  } while (0)
159717721Speter
159817721Speter
159966525Speter/* As with BUF_PUSH_2, except for three bytes.	*/
160017721Speter#define BUF_PUSH_3(c1, c2, c3)						\
160117721Speter  do {									\
160217721Speter    GET_BUFFER_SPACE (3);						\
160317721Speter    *b++ = (unsigned char) (c1);					\
160417721Speter    *b++ = (unsigned char) (c2);					\
160517721Speter    *b++ = (unsigned char) (c3);					\
160617721Speter  } while (0)
160717721Speter
160817721Speter
160917721Speter/* Store a jump with opcode OP at LOC to location TO.  We store a
161066525Speter   relative address offset by the three bytes the jump itself occupies.	 */
161117721Speter#define STORE_JUMP(op, loc, to) \
161217721Speter  store_op1 (op, loc, (to) - (loc) - 3)
161317721Speter
161417721Speter/* Likewise, for a two-argument jump.  */
161517721Speter#define STORE_JUMP2(op, loc, to, arg) \
161617721Speter  store_op2 (op, loc, (to) - (loc) - 3, arg)
161717721Speter
161866525Speter/* Like `STORE_JUMP', but for inserting.  Assume `b' is the buffer end.	 */
161917721Speter#define INSERT_JUMP(op, loc, to) \
162017721Speter  insert_op1 (op, loc, (to) - (loc) - 3, b)
162117721Speter
162217721Speter/* Like `STORE_JUMP2', but for inserting.  Assume `b' is the buffer end.  */
162317721Speter#define INSERT_JUMP2(op, loc, to, arg) \
162417721Speter  insert_op2 (op, loc, (to) - (loc) - 3, arg, b)
162517721Speter
162617721Speter
162717721Speter/* This is not an arbitrary limit: the arguments which represent offsets
162866525Speter   into the pattern are two bytes long.	 So if 2^16 bytes turns out to
162917721Speter   be too small, many things would have to change.  */
163017721Speter#define MAX_BUF_SIZE (1L << 16)
163117721Speter
163217721Speter
163317721Speter/* Extend the buffer by twice its current size via realloc and
163417721Speter   reset the pointers that pointed into the old block to point to the
163517721Speter   correct places in the new one.  If extending the buffer results in it
163666525Speter   being larger than MAX_BUF_SIZE, then flag memory exhausted.	*/
163717721Speter#define EXTEND_BUFFER()							\
163866525Speter  do {									\
163917721Speter    unsigned char *old_buffer = bufp->buffer;				\
164066525Speter    if (bufp->allocated == MAX_BUF_SIZE)				\
164117721Speter      return REG_ESIZE;							\
164217721Speter    bufp->allocated <<= 1;						\
164317721Speter    if (bufp->allocated > MAX_BUF_SIZE)					\
164466525Speter      bufp->allocated = MAX_BUF_SIZE;					\
164517721Speter    bufp->buffer = (unsigned char *) realloc (bufp->buffer, bufp->allocated);\
164617721Speter    if (bufp->buffer == NULL)						\
164717721Speter      return REG_ESPACE;						\
164817721Speter    /* If the buffer moved, move all the pointers into it.  */		\
164917721Speter    if (old_buffer != bufp->buffer)					\
165017721Speter      {									\
165166525Speter	b = (b - old_buffer) + bufp->buffer;				\
165266525Speter	begalt = (begalt - old_buffer) + bufp->buffer;			\
165366525Speter	if (fixup_alt_jump)						\
165466525Speter	  fixup_alt_jump = (fixup_alt_jump - old_buffer) + bufp->buffer;\
165566525Speter	if (laststart)							\
165666525Speter	  laststart = (laststart - old_buffer) + bufp->buffer;		\
165766525Speter	if (pending_exact)						\
165866525Speter	  pending_exact = (pending_exact - old_buffer) + bufp->buffer;	\
165917721Speter      }									\
166017721Speter  } while (0)
166117721Speter
166217721Speter
166317721Speter/* Since we have one byte reserved for the register number argument to
166417721Speter   {start,stop}_memory, the maximum number of groups we can report
166517721Speter   things about is what fits in that byte.  */
166617721Speter#define MAX_REGNUM 255
166717721Speter
166817721Speter/* But patterns can have more than `MAX_REGNUM' registers.  We just
166917721Speter   ignore the excess.  */
167017721Spetertypedef unsigned regnum_t;
167117721Speter
167217721Speter
167317721Speter/* Macros for the compile stack.  */
167417721Speter
167517721Speter/* Since offsets can go either forwards or backwards, this type needs to
167666525Speter   be able to hold values from -(MAX_BUF_SIZE - 1) to MAX_BUF_SIZE - 1.	 */
167717721Spetertypedef int pattern_offset_t;
167817721Speter
167917721Spetertypedef struct
168017721Speter{
168117721Speter  pattern_offset_t begalt_offset;
168217721Speter  pattern_offset_t fixup_alt_jump;
168317721Speter  pattern_offset_t inner_group_offset;
168425839Speter  pattern_offset_t laststart_offset;
168517721Speter  regnum_t regnum;
168617721Speter} compile_stack_elt_t;
168717721Speter
168817721Speter
168917721Spetertypedef struct
169017721Speter{
169117721Speter  compile_stack_elt_t *stack;
169217721Speter  unsigned size;
169317721Speter  unsigned avail;			/* Offset of next open position.  */
169417721Speter} compile_stack_type;
169517721Speter
169617721Speter
169717721Speter#define INIT_COMPILE_STACK_SIZE 32
169817721Speter
169917721Speter#define COMPILE_STACK_EMPTY  (compile_stack.avail == 0)
170017721Speter#define COMPILE_STACK_FULL  (compile_stack.avail == compile_stack.size)
170117721Speter
170266525Speter/* The next available element.	*/
170317721Speter#define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail])
170417721Speter
170517721Speter
170666525Speter/* Structure to manage work area for range table.  */
170766525Speterstruct range_table_work_area
170866525Speter{
170966525Speter  int *table;			/* actual work area.  */
171066525Speter  int allocated;		/* allocated size for work area in bytes.  */
171166525Speter  int used;			/* actually used size in words.	 */
171266525Speter};
171366525Speter
171466525Speter/* Make sure that WORK_AREA can hold more N multibyte characters.  */
171566525Speter#define EXTEND_RANGE_TABLE_WORK_AREA(work_area, n)			  \
171666525Speter  do {									  \
171766525Speter    if (((work_area).used + (n)) * sizeof (int) > (work_area).allocated)  \
171866525Speter      {									  \
171966525Speter	(work_area).allocated += 16 * sizeof (int);			  \
172066525Speter	if ((work_area).table)						  \
172166525Speter	  (work_area).table						  \
172266525Speter	    = (int *) realloc ((work_area).table, (work_area).allocated); \
172366525Speter	else								  \
172466525Speter	  (work_area).table						  \
172566525Speter	    = (int *) malloc ((work_area).allocated);			  \
172666525Speter	if ((work_area).table == 0)					  \
172766525Speter	  FREE_STACK_RETURN (REG_ESPACE);				  \
172866525Speter      }									  \
172966525Speter  } while (0)
173066525Speter
173166525Speter/* Set a range (RANGE_START, RANGE_END) to WORK_AREA.  */
173266525Speter#define SET_RANGE_TABLE_WORK_AREA(work_area, range_start, range_end)	\
173366525Speter  do {									\
173466525Speter    EXTEND_RANGE_TABLE_WORK_AREA ((work_area), 2);			\
173566525Speter    (work_area).table[(work_area).used++] = (range_start);		\
173666525Speter    (work_area).table[(work_area).used++] = (range_end);		\
173766525Speter  } while (0)
173866525Speter
173966525Speter/* Free allocated memory for WORK_AREA.	 */
174066525Speter#define FREE_RANGE_TABLE_WORK_AREA(work_area)	\
174166525Speter  do {						\
174266525Speter    if ((work_area).table)			\
174366525Speter      free ((work_area).table);			\
174466525Speter  } while (0)
174566525Speter
174666525Speter#define CLEAR_RANGE_TABLE_WORK_USED(work_area) ((work_area).used = 0)
174766525Speter#define RANGE_TABLE_WORK_USED(work_area) ((work_area).used)
174866525Speter#define RANGE_TABLE_WORK_ELT(work_area, i) ((work_area).table[i])
174966525Speter
175066525Speter
175117721Speter/* Set the bit for character C in a list.  */
175266525Speter#define SET_LIST_BIT(c)				      \
175366525Speter  (b[((unsigned char) (c)) / BYTEWIDTH]		      \
175417721Speter   |= 1 << (((unsigned char) c) % BYTEWIDTH))
175517721Speter
175617721Speter
175717721Speter/* Get the next unsigned number in the uncompiled pattern.  */
175866525Speter#define GET_UNSIGNED_NUMBER(num)					\
175917721Speter  { if (p != pend)							\
176017721Speter     {									\
176166525Speter       PATFETCH (c);							\
176266525Speter       while (ISDIGIT (c))						\
176366525Speter	 {								\
176466525Speter	   if (num < 0)							\
176566525Speter	      num = 0;							\
176666525Speter	   num = num * 10 + c - '0';					\
176766525Speter	   if (p == pend)						\
176866525Speter	      break;							\
176966525Speter	   PATFETCH (c);						\
177066525Speter	 }								\
177166525Speter       }								\
177225839Speter    }
177317721Speter
177417721Speter#define CHAR_CLASS_MAX_LENGTH  6 /* Namely, `xdigit'.  */
177517721Speter
177617721Speter#define IS_CHAR_CLASS(string)						\
177717721Speter   (STREQ (string, "alpha") || STREQ (string, "upper")			\
177817721Speter    || STREQ (string, "lower") || STREQ (string, "digit")		\
177917721Speter    || STREQ (string, "alnum") || STREQ (string, "xdigit")		\
178017721Speter    || STREQ (string, "space") || STREQ (string, "print")		\
178117721Speter    || STREQ (string, "punct") || STREQ (string, "graph")		\
178217721Speter    || STREQ (string, "cntrl") || STREQ (string, "blank"))
178317721Speter
178466525Speter#ifndef MATCH_MAY_ALLOCATE
178566525Speter
178666525Speter/* If we cannot allocate large objects within re_match_2_internal,
178766525Speter   we make the fail stack and register vectors global.
178866525Speter   The fail stack, we grow to the maximum size when a regexp
178966525Speter   is compiled.
179066525Speter   The register vectors, we adjust in size each time we
179166525Speter   compile a regexp, according to the number of registers it needs.  */
179266525Speter
179366525Speterstatic fail_stack_type fail_stack;
179466525Speter
179566525Speter/* Size with which the following vectors are currently allocated.
179666525Speter   That is so we can make them bigger as needed,
179766525Speter   but never make them smaller.	 */
179866525Speterstatic int regs_allocated_size;
179966525Speter
180066525Speterstatic const char **	 regstart, **	  regend;
180166525Speterstatic const char ** old_regstart, ** old_regend;
180266525Speterstatic const char **best_regstart, **best_regend;
180366525Speterstatic register_info_type *reg_info;
180466525Speterstatic const char **reg_dummy;
180566525Speterstatic register_info_type *reg_info_dummy;
180666525Speter
180766525Speter/* Make the register vectors big enough for NUM_REGS registers,
180866525Speter   but don't make them smaller.	 */
180966525Speter
181066525Speterstatic
181166525Speterregex_grow_registers (num_regs)
181266525Speter     int num_regs;
181366525Speter{
181466525Speter  if (num_regs > regs_allocated_size)
181566525Speter    {
181666525Speter      RETALLOC_IF (regstart,	 num_regs, const char *);
181766525Speter      RETALLOC_IF (regend,	 num_regs, const char *);
181866525Speter      RETALLOC_IF (old_regstart, num_regs, const char *);
181966525Speter      RETALLOC_IF (old_regend,	 num_regs, const char *);
182066525Speter      RETALLOC_IF (best_regstart, num_regs, const char *);
182166525Speter      RETALLOC_IF (best_regend,	 num_regs, const char *);
182266525Speter      RETALLOC_IF (reg_info,	 num_regs, register_info_type);
182366525Speter      RETALLOC_IF (reg_dummy,	 num_regs, const char *);
182466525Speter      RETALLOC_IF (reg_info_dummy, num_regs, register_info_type);
182566525Speter
182666525Speter      regs_allocated_size = num_regs;
182766525Speter    }
182866525Speter}
182966525Speter
183066525Speter#endif /* not MATCH_MAY_ALLOCATE */
183166525Speter
183217721Speter/* `regex_compile' compiles PATTERN (of length SIZE) according to SYNTAX.
183317721Speter   Returns one of error codes defined in `regex.h', or zero for success.
183417721Speter
183517721Speter   Assumes the `allocated' (and perhaps `buffer') and `translate'
183617721Speter   fields are set in BUFP on entry.
183717721Speter
183817721Speter   If it succeeds, results are put in BUFP (if it returns an error, the
183917721Speter   contents of BUFP are undefined):
184017721Speter     `buffer' is the compiled pattern;
184117721Speter     `syntax' is set to SYNTAX;
184217721Speter     `used' is set to the length of the compiled pattern;
184317721Speter     `fastmap_accurate' is zero;
184417721Speter     `re_nsub' is the number of subexpressions in PATTERN;
184517721Speter     `not_bol' and `not_eol' are zero;
184625839Speter
184717721Speter   The `fastmap' and `newline_anchor' fields are neither
184817721Speter   examined nor set.  */
184917721Speter
185066525Speter/* Return, freeing storage we allocated.  */
185166525Speter#define FREE_STACK_RETURN(value)		\
185266525Speter  do {							\
185366525Speter    FREE_RANGE_TABLE_WORK_AREA (range_table_work);	\
185466525Speter    free (compile_stack.stack);				\
185566525Speter    return value;					\
185666525Speter  } while (0)
185766525Speter
185817721Speterstatic reg_errcode_t
185917721Speterregex_compile (pattern, size, syntax, bufp)
186017721Speter     const char *pattern;
186117721Speter     int size;
186217721Speter     reg_syntax_t syntax;
186317721Speter     struct re_pattern_buffer *bufp;
186417721Speter{
186517721Speter  /* We fetch characters from PATTERN here.  Even though PATTERN is
186617721Speter     `char *' (i.e., signed), we declare these variables as unsigned, so
186717721Speter     they can be reliably used as array indices.  */
186866525Speter  register unsigned int c, c1;
186925839Speter
187025839Speter  /* A random temporary spot in PATTERN.  */
187117721Speter  const char *p1;
187217721Speter
187317721Speter  /* Points to the end of the buffer, where we should append.  */
187417721Speter  register unsigned char *b;
187525839Speter
187617721Speter  /* Keeps track of unclosed groups.  */
187717721Speter  compile_stack_type compile_stack;
187817721Speter
187917721Speter  /* Points to the current (ending) position in the pattern.  */
188066525Speter#ifdef AIX
188166525Speter  /* `const' makes AIX compiler fail.  */
188266525Speter  char *p = pattern;
188366525Speter#else
188417721Speter  const char *p = pattern;
188566525Speter#endif
188617721Speter  const char *pend = pattern + size;
188725839Speter
188817721Speter  /* How to translate the characters in the pattern.  */
188966525Speter  RE_TRANSLATE_TYPE translate = bufp->translate;
189017721Speter
189117721Speter  /* Address of the count-byte of the most recently inserted `exactn'
189217721Speter     command.  This makes it possible to tell if a new exact-match
189317721Speter     character can be added to that command or if the character requires
189417721Speter     a new `exactn' command.  */
189517721Speter  unsigned char *pending_exact = 0;
189617721Speter
189717721Speter  /* Address of start of the most recently finished expression.
189817721Speter     This tells, e.g., postfix * where to find the start of its
189917721Speter     operand.  Reset at the beginning of groups and alternatives.  */
190017721Speter  unsigned char *laststart = 0;
190117721Speter
190217721Speter  /* Address of beginning of regexp, or inside of last group.  */
190317721Speter  unsigned char *begalt;
190417721Speter
190517721Speter  /* Place in the uncompiled pattern (i.e., the {) to
190617721Speter     which to go back if the interval is invalid.  */
190717721Speter  const char *beg_interval;
190825839Speter
190917721Speter  /* Address of the place where a forward jump should go to the end of
191066525Speter     the containing expression.	 Each alternative of an `or' -- except the
191117721Speter     last -- ends with a forward jump of this sort.  */
191217721Speter  unsigned char *fixup_alt_jump = 0;
191317721Speter
191417721Speter  /* Counts open-groups as they are encountered.  Remembered for the
191517721Speter     matching close-group on the compile stack, so the same register
191617721Speter     number is put in the stop_memory as the start_memory.  */
191717721Speter  regnum_t regnum = 0;
191817721Speter
191966525Speter  /* Work area for range table of charset.  */
192066525Speter  struct range_table_work_area range_table_work;
192166525Speter
192217721Speter#ifdef DEBUG
192317721Speter  DEBUG_PRINT1 ("\nCompiling pattern: ");
192417721Speter  if (debug)
192517721Speter    {
192617721Speter      unsigned debug_count;
192725839Speter
192817721Speter      for (debug_count = 0; debug_count < size; debug_count++)
192966525Speter	putchar (pattern[debug_count]);
193017721Speter      putchar ('\n');
193117721Speter    }
193217721Speter#endif /* DEBUG */
193317721Speter
193417721Speter  /* Initialize the compile stack.  */
193517721Speter  compile_stack.stack = TALLOC (INIT_COMPILE_STACK_SIZE, compile_stack_elt_t);
193617721Speter  if (compile_stack.stack == NULL)
193717721Speter    return REG_ESPACE;
193817721Speter
193917721Speter  compile_stack.size = INIT_COMPILE_STACK_SIZE;
194017721Speter  compile_stack.avail = 0;
194117721Speter
194266525Speter  range_table_work.table = 0;
194366525Speter  range_table_work.allocated = 0;
194466525Speter
194517721Speter  /* Initialize the pattern buffer.  */
194617721Speter  bufp->syntax = syntax;
194717721Speter  bufp->fastmap_accurate = 0;
194817721Speter  bufp->not_bol = bufp->not_eol = 0;
194917721Speter
195017721Speter  /* Set `used' to zero, so that if we return an error, the pattern
195117721Speter     printer (for debugging) will think there's no pattern.  We reset it
195217721Speter     at the end.  */
195317721Speter  bufp->used = 0;
195425839Speter
195517721Speter  /* Always count groups, whether or not bufp->no_sub is set.  */
195625839Speter  bufp->re_nsub = 0;
195717721Speter
195866525Speter#ifdef emacs
195966525Speter  /* bufp->multibyte is set before regex_compile is called, so don't alter
196066525Speter     it. */
196166525Speter#else  /* not emacs */
196266525Speter  /* Nothing is recognized as a multibyte character.  */
196366525Speter  bufp->multibyte = 0;
196466525Speter#endif
196566525Speter
196617721Speter#if !defined (emacs) && !defined (SYNTAX_TABLE)
196717721Speter  /* Initialize the syntax table.  */
196817721Speter   init_syntax_once ();
196917721Speter#endif
197017721Speter
197117721Speter  if (bufp->allocated == 0)
197217721Speter    {
197317721Speter      if (bufp->buffer)
197417721Speter	{ /* If zero allocated, but buffer is non-null, try to realloc
197566525Speter	     enough space.  This loses if buffer's address is bogus, but
197666525Speter	     that is the user's responsibility.	 */
197766525Speter	  RETALLOC (bufp->buffer, INIT_BUF_SIZE, unsigned char);
197866525Speter	}
197917721Speter      else
198066525Speter	{ /* Caller did not allocate a buffer.	Do it for them.	 */
198166525Speter	  bufp->buffer = TALLOC (INIT_BUF_SIZE, unsigned char);
198266525Speter	}
198366525Speter      if (!bufp->buffer) FREE_STACK_RETURN (REG_ESPACE);
198417721Speter
198517721Speter      bufp->allocated = INIT_BUF_SIZE;
198617721Speter    }
198717721Speter
198817721Speter  begalt = b = bufp->buffer;
198917721Speter
199017721Speter  /* Loop through the uncompiled pattern until we're at the end.  */
199117721Speter  while (p != pend)
199217721Speter    {
199317721Speter      PATFETCH (c);
199417721Speter
199517721Speter      switch (c)
199666525Speter	{
199766525Speter	case '^':
199866525Speter	  {
199966525Speter	    if (   /* If at start of pattern, it's an operator.	 */
200066525Speter		   p == pattern + 1
200166525Speter		   /* If context independent, it's an operator.	 */
200266525Speter		|| syntax & RE_CONTEXT_INDEP_ANCHORS
200366525Speter		   /* Otherwise, depends on what's come before.	 */
200466525Speter		|| at_begline_loc_p (pattern, p, syntax))
200566525Speter	      BUF_PUSH (begline);
200666525Speter	    else
200766525Speter	      goto normal_char;
200866525Speter	  }
200966525Speter	  break;
201017721Speter
201117721Speter
201266525Speter	case '$':
201366525Speter	  {
201466525Speter	    if (   /* If at end of pattern, it's an operator.  */
201566525Speter		   p == pend
201666525Speter		   /* If context independent, it's an operator.	 */
201766525Speter		|| syntax & RE_CONTEXT_INDEP_ANCHORS
201866525Speter		   /* Otherwise, depends on what's next.  */
201966525Speter		|| at_endline_loc_p (p, pend, syntax))
202066525Speter	       BUF_PUSH (endline);
202166525Speter	     else
202266525Speter	       goto normal_char;
202366525Speter	   }
202466525Speter	   break;
202517721Speter
202617721Speter
202717721Speter	case '+':
202866525Speter	case '?':
202966525Speter	  if ((syntax & RE_BK_PLUS_QM)
203066525Speter	      || (syntax & RE_LIMITED_OPS))
203166525Speter	    goto normal_char;
203266525Speter	handle_plus:
203366525Speter	case '*':
203466525Speter	  /* If there is no previous pattern... */
203566525Speter	  if (!laststart)
203666525Speter	    {
203766525Speter	      if (syntax & RE_CONTEXT_INVALID_OPS)
203866525Speter		FREE_STACK_RETURN (REG_BADRPT);
203966525Speter	      else if (!(syntax & RE_CONTEXT_INDEP_OPS))
204066525Speter		goto normal_char;
204166525Speter	    }
204217721Speter
204366525Speter	  {
204466525Speter	    /* Are we optimizing this jump?  */
204566525Speter	    boolean keep_string_p = false;
204625839Speter
204766525Speter	    /* 1 means zero (many) matches is allowed.	*/
204866525Speter	    char zero_times_ok = 0, many_times_ok = 0;
204917721Speter
205066525Speter	    /* If there is a sequence of repetition chars, collapse it
205166525Speter	       down to just one (the right one).  We can't combine
205266525Speter	       interval operators with these because of, e.g., `a{2}*',
205366525Speter	       which should only match an even number of `a's.	*/
205417721Speter
205566525Speter	    for (;;)
205666525Speter	      {
205766525Speter		zero_times_ok |= c != '+';
205866525Speter		many_times_ok |= c != '?';
205917721Speter
206066525Speter		if (p == pend)
206166525Speter		  break;
206217721Speter
206366525Speter		PATFETCH (c);
206417721Speter
206566525Speter		if (c == '*'
206666525Speter		    || (!(syntax & RE_BK_PLUS_QM) && (c == '+' || c == '?')))
206766525Speter		  ;
206817721Speter
206966525Speter		else if (syntax & RE_BK_PLUS_QM	 &&  c == '\\')
207066525Speter		  {
207166525Speter		    if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
207217721Speter
207366525Speter		    PATFETCH (c1);
207466525Speter		    if (!(c1 == '+' || c1 == '?'))
207566525Speter		      {
207666525Speter			PATUNFETCH;
207766525Speter			PATUNFETCH;
207866525Speter			break;
207966525Speter		      }
208017721Speter
208166525Speter		    c = c1;
208266525Speter		  }
208366525Speter		else
208466525Speter		  {
208566525Speter		    PATUNFETCH;
208666525Speter		    break;
208766525Speter		  }
208817721Speter
208966525Speter		/* If we get here, we found another repeat character.  */
209066525Speter	       }
209117721Speter
209266525Speter	    /* Star, etc. applied to an empty pattern is equivalent
209366525Speter	       to an empty pattern.  */
209466525Speter	    if (!laststart)
209566525Speter	      break;
209617721Speter
209766525Speter	    /* Now we know whether or not zero matches is allowed
209866525Speter	       and also whether or not two or more matches is allowed.	*/
209966525Speter	    if (many_times_ok)
210066525Speter	      { /* More than one repetition is allowed, so put in at the
210166525Speter		   end a backward relative jump from `b' to before the next
210266525Speter		   jump we're going to put in below (which jumps from
210366525Speter		   laststart to after this jump).
210417721Speter
210566525Speter		   But if we are at the `*' in the exact sequence `.*\n',
210666525Speter		   insert an unconditional jump backwards to the .,
210766525Speter		   instead of the beginning of the loop.  This way we only
210866525Speter		   push a failure point once, instead of every time
210966525Speter		   through the loop.  */
211066525Speter		assert (p - 1 > pattern);
211117721Speter
211266525Speter		/* Allocate the space for the jump.  */
211366525Speter		GET_BUFFER_SPACE (3);
211417721Speter
211566525Speter		/* We know we are not at the first character of the pattern,
211666525Speter		   because laststart was nonzero.  And we've already
211766525Speter		   incremented `p', by the way, to be the character after
211866525Speter		   the `*'.  Do we have to do something analogous here
211966525Speter		   for null bytes, because of RE_DOT_NOT_NULL?	*/
212066525Speter		if (TRANSLATE ((unsigned char)*(p - 2)) == TRANSLATE ('.')
212117721Speter		    && zero_times_ok
212266525Speter		    && p < pend
212366525Speter		    && TRANSLATE ((unsigned char)*p) == TRANSLATE ('\n')
212466525Speter		    && !(syntax & RE_DOT_NEWLINE))
212566525Speter		  { /* We have .*\n.  */
212666525Speter		    STORE_JUMP (jump, b, laststart);
212766525Speter		    keep_string_p = true;
212866525Speter		  }
212966525Speter		else
213066525Speter		  /* Anything else.  */
213166525Speter		  STORE_JUMP (maybe_pop_jump, b, laststart - 3);
213217721Speter
213366525Speter		/* We've added more stuff to the buffer.  */
213466525Speter		b += 3;
213566525Speter	      }
213617721Speter
213766525Speter	    /* On failure, jump from laststart to b + 3, which will be the
213866525Speter	       end of the buffer after this jump is inserted.  */
213966525Speter	    GET_BUFFER_SPACE (3);
214066525Speter	    INSERT_JUMP (keep_string_p ? on_failure_keep_string_jump
214166525Speter				       : on_failure_jump,
214266525Speter			 laststart, b + 3);
214366525Speter	    pending_exact = 0;
214466525Speter	    b += 3;
214517721Speter
214666525Speter	    if (!zero_times_ok)
214766525Speter	      {
214866525Speter		/* At least one repetition is required, so insert a
214966525Speter		   `dummy_failure_jump' before the initial
215066525Speter		   `on_failure_jump' instruction of the loop. This
215166525Speter		   effects a skip over that instruction the first time
215266525Speter		   we hit that loop.  */
215366525Speter		GET_BUFFER_SPACE (3);
215466525Speter		INSERT_JUMP (dummy_failure_jump, laststart, laststart + 6);
215566525Speter		b += 3;
215666525Speter	      }
215766525Speter	    }
215817721Speter	  break;
215917721Speter
216017721Speter
216117721Speter	case '.':
216266525Speter	  laststart = b;
216366525Speter	  BUF_PUSH (anychar);
216466525Speter	  break;
216517721Speter
216617721Speter
216766525Speter	case '[':
216866525Speter	  {
216966525Speter	    CLEAR_RANGE_TABLE_WORK_USED (range_table_work);
217017721Speter
217166525Speter	    if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
217217721Speter
217366525Speter	    /* Ensure that we have enough space to push a charset: the
217466525Speter	       opcode, the length count, and the bitset; 34 bytes in all.  */
217517721Speter	    GET_BUFFER_SPACE (34);
217617721Speter
217766525Speter	    laststart = b;
217817721Speter
217966525Speter	    /* We test `*p == '^' twice, instead of using an if
218066525Speter	       statement, so we only need one BUF_PUSH.	 */
218166525Speter	    BUF_PUSH (*p == '^' ? charset_not : charset);
218266525Speter	    if (*p == '^')
218366525Speter	      p++;
218417721Speter
218566525Speter	    /* Remember the first position in the bracket expression.  */
218666525Speter	    p1 = p;
218717721Speter
218866525Speter	    /* Push the number of bytes in the bitmap.	*/
218966525Speter	    BUF_PUSH ((1 << BYTEWIDTH) / BYTEWIDTH);
219017721Speter
219166525Speter	    /* Clear the whole map.  */
219266525Speter	    bzero (b, (1 << BYTEWIDTH) / BYTEWIDTH);
219317721Speter
219466525Speter	    /* charset_not matches newline according to a syntax bit.  */
219566525Speter	    if ((re_opcode_t) b[-2] == charset_not
219666525Speter		&& (syntax & RE_HAT_LISTS_NOT_NEWLINE))
219766525Speter	      SET_LIST_BIT ('\n');
219817721Speter
219966525Speter	    /* Read in characters and ranges, setting map bits.	 */
220066525Speter	    for (;;)
220166525Speter	      {
220266525Speter		int len;
220366525Speter		boolean escaped_char = false;
220417721Speter
220566525Speter		if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
220617721Speter
220766525Speter		PATFETCH (c);
220817721Speter
220966525Speter		/* \ might escape characters inside [...] and [^...].  */
221066525Speter		if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\')
221166525Speter		  {
221266525Speter		    if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
221317721Speter
221466525Speter		    PATFETCH (c);
221566525Speter		    escaped_char = true;
221666525Speter		  }
221766525Speter		else
221866525Speter		  {
221966525Speter		    /* Could be the end of the bracket expression.	If it's
222066525Speter		       not (i.e., when the bracket expression is `[]' so
222166525Speter		       far), the ']' character bit gets set way below.  */
222266525Speter		    if (c == ']' && p != p1 + 1)
222366525Speter		      break;
222466525Speter		  }
222517721Speter
222666525Speter		/* If C indicates start of multibyte char, get the
222766525Speter		   actual character code in C, and set the pattern
222866525Speter		   pointer P to the next character boundary.  */
222966525Speter		if (bufp->multibyte && BASE_LEADING_CODE_P (c))
223066525Speter		  {
223166525Speter		    PATUNFETCH;
223266525Speter		    c = STRING_CHAR_AND_LENGTH (p, pend - p, len);
223366525Speter		    p += len;
223466525Speter		  }
223566525Speter		/* What should we do for the character which is
223666525Speter		   greater than 0x7F, but not BASE_LEADING_CODE_P?
223766525Speter		   XXX */
223817721Speter
223966525Speter		/* See if we're at the beginning of a possible character
224066525Speter		   class.  */
224117721Speter
224266525Speter		else if (!escaped_char &&
224366525Speter			 syntax & RE_CHAR_CLASSES && c == '[' && *p == ':')
224466525Speter		  {
224566525Speter		    /* Leave room for the null.	 */
224666525Speter		    char str[CHAR_CLASS_MAX_LENGTH + 1];
224717721Speter
224866525Speter		    PATFETCH (c);
224966525Speter		    c1 = 0;
225025839Speter
225166525Speter		    /* If pattern is `[[:'.  */
225266525Speter		    if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
225317721Speter
225466525Speter		    for (;;)
225566525Speter		      {
225666525Speter			PATFETCH (c);
225766525Speter			if (c == ':' || c == ']' || p == pend
225866525Speter			    || c1 == CHAR_CLASS_MAX_LENGTH)
225966525Speter			  break;
226066525Speter			str[c1++] = c;
226166525Speter		      }
226266525Speter		    str[c1] = '\0';
226317721Speter
226466525Speter		    /* If isn't a word bracketed by `[:' and `:]':
226566525Speter		       undo the ending character, the letters, and
226666525Speter		       leave the leading `:' and `[' (but set bits for
226766525Speter		       them).  */
226866525Speter		    if (c == ':' && *p == ']')
226966525Speter		      {
227066525Speter			int ch;
227166525Speter			boolean is_alnum = STREQ (str, "alnum");
227266525Speter			boolean is_alpha = STREQ (str, "alpha");
227366525Speter			boolean is_blank = STREQ (str, "blank");
227466525Speter			boolean is_cntrl = STREQ (str, "cntrl");
227566525Speter			boolean is_digit = STREQ (str, "digit");
227666525Speter			boolean is_graph = STREQ (str, "graph");
227766525Speter			boolean is_lower = STREQ (str, "lower");
227866525Speter			boolean is_print = STREQ (str, "print");
227966525Speter			boolean is_punct = STREQ (str, "punct");
228066525Speter			boolean is_space = STREQ (str, "space");
228166525Speter			boolean is_upper = STREQ (str, "upper");
228266525Speter			boolean is_xdigit = STREQ (str, "xdigit");
228317721Speter
228466525Speter			if (!IS_CHAR_CLASS (str))
228566525Speter			  FREE_STACK_RETURN (REG_ECTYPE);
228617721Speter
228766525Speter			/* Throw away the ] at the end of the character
228866525Speter			   class.  */
228966525Speter			PATFETCH (c);
229017721Speter
229166525Speter			if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
229217721Speter
229366525Speter			for (ch = 0; ch < 1 << BYTEWIDTH; ch++)
229466525Speter			  {
229566525Speter			    int translated = TRANSLATE (ch);
229666525Speter			    /* This was split into 3 if's to
229766525Speter			       avoid an arbitrary limit in some compiler.  */
229866525Speter			    if (   (is_alnum  && ISALNUM (ch))
229966525Speter				|| (is_alpha  && ISALPHA (ch))
230066525Speter				|| (is_blank  && ISBLANK (ch))
230166525Speter				|| (is_cntrl  && ISCNTRL (ch)))
230266525Speter			      SET_LIST_BIT (translated);
230366525Speter			    if (   (is_digit  && ISDIGIT (ch))
230466525Speter				|| (is_graph  && ISGRAPH (ch))
230566525Speter				|| (is_lower  && ISLOWER (ch))
230666525Speter				|| (is_print  && ISPRINT (ch)))
230766525Speter			      SET_LIST_BIT (translated);
230866525Speter			    if (   (is_punct  && ISPUNCT (ch))
230966525Speter				|| (is_space  && ISSPACE (ch))
231066525Speter				|| (is_upper  && ISUPPER (ch))
231166525Speter				|| (is_xdigit && ISXDIGIT (ch)))
231266525Speter			      SET_LIST_BIT (translated);
231366525Speter			  }
231425839Speter
231566525Speter			/* Repeat the loop. */
231666525Speter			continue;
231766525Speter		      }
231866525Speter		    else
231966525Speter		      {
232066525Speter			c1++;
232166525Speter			while (c1--)
232266525Speter			  PATUNFETCH;
232366525Speter			SET_LIST_BIT ('[');
232417721Speter
232566525Speter			/* Because the `:' may starts the range, we
232666525Speter			   can't simply set bit and repeat the loop.
232766525Speter			   Instead, just set it to C and handle below.	*/
232866525Speter			c = ':';
232966525Speter		      }
233066525Speter		  }
233117721Speter
233266525Speter		if (p < pend && p[0] == '-' && p[1] != ']')
233366525Speter		  {
233417721Speter
233566525Speter		    /* Discard the `-'. */
233666525Speter		    PATFETCH (c1);
233717721Speter
233866525Speter		    /* Fetch the character which ends the range. */
233966525Speter		    PATFETCH (c1);
234066525Speter		    if (bufp->multibyte && BASE_LEADING_CODE_P (c1))
234166525Speter		      {
234266525Speter			PATUNFETCH;
234366525Speter			c1 = STRING_CHAR_AND_LENGTH (p, pend - p, len);
234466525Speter			p += len;
234566525Speter		      }
234617721Speter
234766525Speter		    if (SINGLE_BYTE_CHAR_P (c)
234866525Speter			&& ! SINGLE_BYTE_CHAR_P (c1))
234966525Speter		      {
235066525Speter			/* Handle a range such as \177-\377 in multibyte mode.
235166525Speter			   Split that into two ranges,,
235266525Speter			   the low one ending at 0237, and the high one
235366525Speter			   starting at ...040.  */
235466525Speter			int c1_base = (c1 & ~0177) | 040;
235566525Speter			SET_RANGE_TABLE_WORK_AREA (range_table_work, c, c1);
235666525Speter			c1 = 0237;
235766525Speter		      }
235866525Speter		    else if (!SAME_CHARSET_P (c, c1))
235966525Speter		      FREE_STACK_RETURN (REG_ERANGE);
236066525Speter		  }
236166525Speter		else
236266525Speter		  /* Range from C to C. */
236366525Speter		  c1 = c;
236417721Speter
236566525Speter		/* Set the range ... */
236666525Speter		if (SINGLE_BYTE_CHAR_P (c))
236766525Speter		  /* ... into bitmap.  */
236866525Speter		  {
236966525Speter		    unsigned this_char;
237066525Speter		    int range_start = c, range_end = c1;
237166525Speter
237266525Speter		    /* If the start is after the end, the range is empty.  */
237366525Speter		    if (range_start > range_end)
237466525Speter		      {
237566525Speter			if (syntax & RE_NO_EMPTY_RANGES)
237666525Speter			  FREE_STACK_RETURN (REG_ERANGE);
237766525Speter			/* Else, repeat the loop.  */
237866525Speter		      }
237966525Speter		    else
238066525Speter		      {
238166525Speter			for (this_char = range_start; this_char <= range_end;
238266525Speter			     this_char++)
238366525Speter			  SET_LIST_BIT (TRANSLATE (this_char));
238466525Speter		      }
238566525Speter		  }
238666525Speter		else
238766525Speter		  /* ... into range table.  */
238866525Speter		  SET_RANGE_TABLE_WORK_AREA (range_table_work, c, c1);
238966525Speter	      }
239066525Speter
239166525Speter	    /* Discard any (non)matching list bytes that are all 0 at the
239266525Speter	       end of the map.	Decrease the map-length byte too.  */
239366525Speter	    while ((int) b[-1] > 0 && b[b[-1] - 1] == 0)
239466525Speter	      b[-1]--;
239566525Speter	    b += b[-1];
239666525Speter
239766525Speter	    /* Build real range table from work area. */
239866525Speter	    if (RANGE_TABLE_WORK_USED (range_table_work))
239966525Speter	      {
240066525Speter		int i;
240166525Speter		int used = RANGE_TABLE_WORK_USED (range_table_work);
240266525Speter
240366525Speter		/* Allocate space for COUNT + RANGE_TABLE.  Needs two
240466525Speter		   bytes for COUNT and three bytes for each character.	*/
240566525Speter		GET_BUFFER_SPACE (2 + used * 3);
240666525Speter
240766525Speter		/* Indicate the existence of range table.  */
240866525Speter		laststart[1] |= 0x80;
240966525Speter
241066525Speter		STORE_NUMBER_AND_INCR (b, used / 2);
241166525Speter		for (i = 0; i < used; i++)
241266525Speter		  STORE_CHARACTER_AND_INCR
241366525Speter		    (b, RANGE_TABLE_WORK_ELT (range_table_work, i));
241466525Speter	      }
241566525Speter	  }
241666525Speter	  break;
241766525Speter
241866525Speter
241917721Speter	case '(':
242066525Speter	  if (syntax & RE_NO_BK_PARENS)
242166525Speter	    goto handle_open;
242266525Speter	  else
242366525Speter	    goto normal_char;
242417721Speter
242517721Speter
242666525Speter	case ')':
242766525Speter	  if (syntax & RE_NO_BK_PARENS)
242866525Speter	    goto handle_close;
242966525Speter	  else
243066525Speter	    goto normal_char;
243117721Speter
243217721Speter
243366525Speter	case '\n':
243466525Speter	  if (syntax & RE_NEWLINE_ALT)
243566525Speter	    goto handle_alt;
243666525Speter	  else
243766525Speter	    goto normal_char;
243817721Speter
243917721Speter
244017721Speter	case '|':
244166525Speter	  if (syntax & RE_NO_BK_VBAR)
244266525Speter	    goto handle_alt;
244366525Speter	  else
244466525Speter	    goto normal_char;
244517721Speter
244617721Speter
244766525Speter	case '{':
244866525Speter	   if (syntax & RE_INTERVALS && syntax & RE_NO_BK_BRACES)
244966525Speter	     goto handle_interval;
245066525Speter	   else
245166525Speter	     goto normal_char;
245217721Speter
245317721Speter
245466525Speter	case '\\':
245566525Speter	  if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
245617721Speter
245766525Speter	  /* Do not translate the character after the \, so that we can
245866525Speter	     distinguish, e.g., \B from \b, even if we normally would
245966525Speter	     translate, e.g., B to b.  */
246066525Speter	  PATFETCH_RAW (c);
246117721Speter
246266525Speter	  switch (c)
246366525Speter	    {
246466525Speter	    case '(':
246566525Speter	      if (syntax & RE_NO_BK_PARENS)
246666525Speter		goto normal_backslash;
246717721Speter
246866525Speter	    handle_open:
246966525Speter	      bufp->re_nsub++;
247066525Speter	      regnum++;
247117721Speter
247266525Speter	      if (COMPILE_STACK_FULL)
247366525Speter		{
247466525Speter		  RETALLOC (compile_stack.stack, compile_stack.size << 1,
247566525Speter			    compile_stack_elt_t);
247666525Speter		  if (compile_stack.stack == NULL) return REG_ESPACE;
247717721Speter
247866525Speter		  compile_stack.size <<= 1;
247966525Speter		}
248017721Speter
248166525Speter	      /* These are the values to restore when we hit end of this
248266525Speter		 group.	 They are all relative offsets, so that if the
248366525Speter		 whole pattern moves because of realloc, they will still
248466525Speter		 be valid.  */
248566525Speter	      COMPILE_STACK_TOP.begalt_offset = begalt - bufp->buffer;
248666525Speter	      COMPILE_STACK_TOP.fixup_alt_jump
248766525Speter		= fixup_alt_jump ? fixup_alt_jump - bufp->buffer + 1 : 0;
248866525Speter	      COMPILE_STACK_TOP.laststart_offset = b - bufp->buffer;
248966525Speter	      COMPILE_STACK_TOP.regnum = regnum;
249017721Speter
249166525Speter	      /* We will eventually replace the 0 with the number of
249266525Speter		 groups inner to this one.  But do not push a
249366525Speter		 start_memory for groups beyond the last one we can
249466525Speter		 represent in the compiled pattern.  */
249566525Speter	      if (regnum <= MAX_REGNUM)
249666525Speter		{
249766525Speter		  COMPILE_STACK_TOP.inner_group_offset = b - bufp->buffer + 2;
249866525Speter		  BUF_PUSH_3 (start_memory, regnum, 0);
249966525Speter		}
250025839Speter
250166525Speter	      compile_stack.avail++;
250217721Speter
250366525Speter	      fixup_alt_jump = 0;
250466525Speter	      laststart = 0;
250566525Speter	      begalt = b;
250617721Speter	      /* If we've reached MAX_REGNUM groups, then this open
250717721Speter		 won't actually generate any code, so we'll have to
250817721Speter		 clear pending_exact explicitly.  */
250917721Speter	      pending_exact = 0;
251066525Speter	      break;
251117721Speter
251217721Speter
251366525Speter	    case ')':
251466525Speter	      if (syntax & RE_NO_BK_PARENS) goto normal_backslash;
251517721Speter
251666525Speter	      if (COMPILE_STACK_EMPTY)
251766525Speter		if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
251866525Speter		  goto normal_backslash;
251966525Speter		else
252066525Speter		  FREE_STACK_RETURN (REG_ERPAREN);
252117721Speter
252266525Speter	    handle_close:
252366525Speter	      if (fixup_alt_jump)
252466525Speter		{ /* Push a dummy failure point at the end of the
252566525Speter		     alternative for a possible future
252666525Speter		     `pop_failure_jump' to pop.	 See comments at
252766525Speter		     `push_dummy_failure' in `re_match_2'.  */
252866525Speter		  BUF_PUSH (push_dummy_failure);
252925839Speter
253066525Speter		  /* We allocated space for this jump when we assigned
253166525Speter		     to `fixup_alt_jump', in the `handle_alt' case below.  */
253266525Speter		  STORE_JUMP (jump_past_alt, fixup_alt_jump, b - 1);
253366525Speter		}
253417721Speter
253566525Speter	      /* See similar code for backslashed left paren above.  */
253666525Speter	      if (COMPILE_STACK_EMPTY)
253766525Speter		if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
253866525Speter		  goto normal_char;
253966525Speter		else
254066525Speter		  FREE_STACK_RETURN (REG_ERPAREN);
254117721Speter
254266525Speter	      /* Since we just checked for an empty stack above, this
254366525Speter		 ``can't happen''.  */
254466525Speter	      assert (compile_stack.avail != 0);
254566525Speter	      {
254666525Speter		/* We don't just want to restore into `regnum', because
254766525Speter		   later groups should continue to be numbered higher,
254866525Speter		   as in `(ab)c(de)' -- the second group is #2.	 */
254966525Speter		regnum_t this_group_regnum;
255017721Speter
255166525Speter		compile_stack.avail--;
255266525Speter		begalt = bufp->buffer + COMPILE_STACK_TOP.begalt_offset;
255366525Speter		fixup_alt_jump
255466525Speter		  = COMPILE_STACK_TOP.fixup_alt_jump
255566525Speter		    ? bufp->buffer + COMPILE_STACK_TOP.fixup_alt_jump - 1
255666525Speter		    : 0;
255766525Speter		laststart = bufp->buffer + COMPILE_STACK_TOP.laststart_offset;
255866525Speter		this_group_regnum = COMPILE_STACK_TOP.regnum;
255917721Speter		/* If we've reached MAX_REGNUM groups, then this open
256017721Speter		   won't actually generate any code, so we'll have to
256117721Speter		   clear pending_exact explicitly.  */
256217721Speter		pending_exact = 0;
256317721Speter
256466525Speter		/* We're at the end of the group, so now we know how many
256566525Speter		   groups were inside this one.	 */
256666525Speter		if (this_group_regnum <= MAX_REGNUM)
256766525Speter		  {
256866525Speter		    unsigned char *inner_group_loc
256966525Speter		      = bufp->buffer + COMPILE_STACK_TOP.inner_group_offset;
257025839Speter
257166525Speter		    *inner_group_loc = regnum - this_group_regnum;
257266525Speter		    BUF_PUSH_3 (stop_memory, this_group_regnum,
257366525Speter				regnum - this_group_regnum);
257466525Speter		  }
257566525Speter	      }
257666525Speter	      break;
257717721Speter
257817721Speter
257966525Speter	    case '|':					/* `\|'.  */
258066525Speter	      if (syntax & RE_LIMITED_OPS || syntax & RE_NO_BK_VBAR)
258166525Speter		goto normal_backslash;
258266525Speter	    handle_alt:
258366525Speter	      if (syntax & RE_LIMITED_OPS)
258466525Speter		goto normal_char;
258517721Speter
258666525Speter	      /* Insert before the previous alternative a jump which
258766525Speter		 jumps to this alternative if the former fails.	 */
258866525Speter	      GET_BUFFER_SPACE (3);
258966525Speter	      INSERT_JUMP (on_failure_jump, begalt, b + 6);
259066525Speter	      pending_exact = 0;
259166525Speter	      b += 3;
259217721Speter
259366525Speter	      /* The alternative before this one has a jump after it
259466525Speter		 which gets executed if it gets matched.  Adjust that
259566525Speter		 jump so it will jump to this alternative's analogous
259666525Speter		 jump (put in below, which in turn will jump to the next
259766525Speter		 (if any) alternative's such jump, etc.).  The last such
259866525Speter		 jump jumps to the correct final destination.  A picture:
259966525Speter			  _____ _____
260066525Speter			  |   | |   |
260166525Speter			  |   v |   v
260266525Speter			 a | b	 | c
260317721Speter
260466525Speter		 If we are at `b', then fixup_alt_jump right now points to a
260566525Speter		 three-byte space after `a'.  We'll put in the jump, set
260666525Speter		 fixup_alt_jump to right after `b', and leave behind three
260766525Speter		 bytes which we'll fill in when we get to after `c'.  */
260817721Speter
260966525Speter	      if (fixup_alt_jump)
261066525Speter		STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
261117721Speter
261266525Speter	      /* Mark and leave space for a jump after this alternative,
261366525Speter		 to be filled in later either by next alternative or
261466525Speter		 when know we're at the end of a series of alternatives.  */
261566525Speter	      fixup_alt_jump = b;
261666525Speter	      GET_BUFFER_SPACE (3);
261766525Speter	      b += 3;
261817721Speter
261966525Speter	      laststart = 0;
262066525Speter	      begalt = b;
262166525Speter	      break;
262217721Speter
262317721Speter
262466525Speter	    case '{':
262566525Speter	      /* If \{ is a literal.  */
262666525Speter	      if (!(syntax & RE_INTERVALS)
262766525Speter		     /* If we're at `\{' and it's not the open-interval
262866525Speter			operator.  */
262966525Speter		  || ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES))
263066525Speter		  || (p - 2 == pattern	&&  p == pend))
263166525Speter		goto normal_backslash;
263217721Speter
263366525Speter	    handle_interval:
263466525Speter	      {
263566525Speter		/* If got here, then the syntax allows intervals.  */
263617721Speter
263766525Speter		/* At least (most) this many matches must be made.  */
263866525Speter		int lower_bound = -1, upper_bound = -1;
263917721Speter
264066525Speter		beg_interval = p - 1;
264117721Speter
264266525Speter		if (p == pend)
264366525Speter		  {
264466525Speter		    if (syntax & RE_NO_BK_BRACES)
264566525Speter		      goto unfetch_interval;
264666525Speter		    else
264766525Speter		      FREE_STACK_RETURN (REG_EBRACE);
264866525Speter		  }
264917721Speter
265066525Speter		GET_UNSIGNED_NUMBER (lower_bound);
265117721Speter
265266525Speter		if (c == ',')
265366525Speter		  {
265466525Speter		    GET_UNSIGNED_NUMBER (upper_bound);
265566525Speter		    if (upper_bound < 0) upper_bound = RE_DUP_MAX;
265666525Speter		  }
265766525Speter		else
265866525Speter		  /* Interval such as `{1}' => match exactly once. */
265966525Speter		  upper_bound = lower_bound;
266017721Speter
266166525Speter		if (lower_bound < 0 || upper_bound > RE_DUP_MAX
266266525Speter		    || lower_bound > upper_bound)
266366525Speter		  {
266466525Speter		    if (syntax & RE_NO_BK_BRACES)
266566525Speter		      goto unfetch_interval;
266666525Speter		    else
266766525Speter		      FREE_STACK_RETURN (REG_BADBR);
266866525Speter		  }
266917721Speter
267066525Speter		if (!(syntax & RE_NO_BK_BRACES))
267166525Speter		  {
267266525Speter		    if (c != '\\') FREE_STACK_RETURN (REG_EBRACE);
267317721Speter
267466525Speter		    PATFETCH (c);
267566525Speter		  }
267617721Speter
267766525Speter		if (c != '}')
267866525Speter		  {
267966525Speter		    if (syntax & RE_NO_BK_BRACES)
268066525Speter		      goto unfetch_interval;
268166525Speter		    else
268266525Speter		      FREE_STACK_RETURN (REG_BADBR);
268366525Speter		  }
268417721Speter
268566525Speter		/* We just parsed a valid interval.  */
268617721Speter
268766525Speter		/* If it's invalid to have no preceding re.  */
268866525Speter		if (!laststart)
268966525Speter		  {
269066525Speter		    if (syntax & RE_CONTEXT_INVALID_OPS)
269166525Speter		      FREE_STACK_RETURN (REG_BADRPT);
269266525Speter		    else if (syntax & RE_CONTEXT_INDEP_OPS)
269366525Speter		      laststart = b;
269466525Speter		    else
269566525Speter		      goto unfetch_interval;
269666525Speter		  }
269717721Speter
269866525Speter		/* If the upper bound is zero, don't want to succeed at
269966525Speter		   all; jump from `laststart' to `b + 3', which will be
270066525Speter		   the end of the buffer after we insert the jump.  */
270166525Speter		 if (upper_bound == 0)
270266525Speter		   {
270366525Speter		     GET_BUFFER_SPACE (3);
270466525Speter		     INSERT_JUMP (jump, laststart, b + 3);
270566525Speter		     b += 3;
270666525Speter		   }
270717721Speter
270866525Speter		 /* Otherwise, we have a nontrivial interval.  When
270966525Speter		    we're all done, the pattern will look like:
271066525Speter		      set_number_at <jump count> <upper bound>
271166525Speter		      set_number_at <succeed_n count> <lower bound>
271266525Speter		      succeed_n <after jump addr> <succeed_n count>
271366525Speter		      <body of loop>
271466525Speter		      jump_n <succeed_n addr> <jump count>
271566525Speter		    (The upper bound and `jump_n' are omitted if
271666525Speter		    `upper_bound' is 1, though.)  */
271766525Speter		 else
271866525Speter		   { /* If the upper bound is > 1, we need to insert
271966525Speter			more at the end of the loop.  */
272066525Speter		     unsigned nbytes = 10 + (upper_bound > 1) * 10;
272117721Speter
272266525Speter		     GET_BUFFER_SPACE (nbytes);
272317721Speter
272466525Speter		     /* Initialize lower bound of the `succeed_n', even
272566525Speter			though it will be set during matching by its
272666525Speter			attendant `set_number_at' (inserted next),
272766525Speter			because `re_compile_fastmap' needs to know.
272866525Speter			Jump to the `jump_n' we might insert below.  */
272966525Speter		     INSERT_JUMP2 (succeed_n, laststart,
273066525Speter				   b + 5 + (upper_bound > 1) * 5,
273166525Speter				   lower_bound);
273266525Speter		     b += 5;
273317721Speter
273466525Speter		     /* Code to initialize the lower bound.  Insert
273566525Speter			before the `succeed_n'.	 The `5' is the last two
273666525Speter			bytes of this `set_number_at', plus 3 bytes of
273766525Speter			the following `succeed_n'.  */
273866525Speter		     insert_op2 (set_number_at, laststart, 5, lower_bound, b);
273966525Speter		     b += 5;
274017721Speter
274166525Speter		     if (upper_bound > 1)
274266525Speter		       { /* More than one repetition is allowed, so
274366525Speter			    append a backward jump to the `succeed_n'
274466525Speter			    that starts this interval.
274525839Speter
274666525Speter			    When we've reached this during matching,
274766525Speter			    we'll have matched the interval once, so
274866525Speter			    jump back only `upper_bound - 1' times.  */
274966525Speter			 STORE_JUMP2 (jump_n, b, laststart + 5,
275066525Speter				      upper_bound - 1);
275166525Speter			 b += 5;
275217721Speter
275366525Speter			 /* The location we want to set is the second
275466525Speter			    parameter of the `jump_n'; that is `b-2' as
275566525Speter			    an absolute address.  `laststart' will be
275666525Speter			    the `set_number_at' we're about to insert;
275766525Speter			    `laststart+3' the number to set, the source
275866525Speter			    for the relative address.  But we are
275966525Speter			    inserting into the middle of the pattern --
276066525Speter			    so everything is getting moved up by 5.
276166525Speter			    Conclusion: (b - 2) - (laststart + 3) + 5,
276266525Speter			    i.e., b - laststart.
276325839Speter
276466525Speter			    We insert this at the beginning of the loop
276566525Speter			    so that if we fail during matching, we'll
276666525Speter			    reinitialize the bounds.  */
276766525Speter			 insert_op2 (set_number_at, laststart, b - laststart,
276866525Speter				     upper_bound - 1, b);
276966525Speter			 b += 5;
277066525Speter		       }
277166525Speter		   }
277266525Speter		pending_exact = 0;
277366525Speter		beg_interval = NULL;
277466525Speter	      }
277566525Speter	      break;
277617721Speter
277766525Speter	    unfetch_interval:
277866525Speter	      /* If an invalid interval, match the characters as literals.  */
277966525Speter	       assert (beg_interval);
278066525Speter	       p = beg_interval;
278166525Speter	       beg_interval = NULL;
278217721Speter
278366525Speter	       /* normal_char and normal_backslash need `c'.  */
278466525Speter	       PATFETCH (c);
278517721Speter
278666525Speter	       if (!(syntax & RE_NO_BK_BRACES))
278766525Speter		 {
278866525Speter		   if (p > pattern  &&	p[-1] == '\\')
278966525Speter		     goto normal_backslash;
279066525Speter		 }
279166525Speter	       goto normal_char;
279217721Speter
279317721Speter#ifdef emacs
279466525Speter	    /* There is no way to specify the before_dot and after_dot
279566525Speter	       operators.  rms says this is ok.	 --karl	 */
279666525Speter	    case '=':
279766525Speter	      BUF_PUSH (at_dot);
279866525Speter	      break;
279917721Speter
280066525Speter	    case 's':
280166525Speter	      laststart = b;
280266525Speter	      PATFETCH (c);
280366525Speter	      BUF_PUSH_2 (syntaxspec, syntax_spec_code[c]);
280466525Speter	      break;
280517721Speter
280666525Speter	    case 'S':
280766525Speter	      laststart = b;
280866525Speter	      PATFETCH (c);
280966525Speter	      BUF_PUSH_2 (notsyntaxspec, syntax_spec_code[c]);
281066525Speter	      break;
281166525Speter
281266525Speter	    case 'c':
281366525Speter	      laststart = b;
281466525Speter	      PATFETCH_RAW (c);
281566525Speter	      BUF_PUSH_2 (categoryspec, c);
281666525Speter	      break;
281766525Speter
281866525Speter	    case 'C':
281966525Speter	      laststart = b;
282066525Speter	      PATFETCH_RAW (c);
282166525Speter	      BUF_PUSH_2 (notcategoryspec, c);
282266525Speter	      break;
282317721Speter#endif /* emacs */
282417721Speter
282517721Speter
282666525Speter	    case 'w':
282766525Speter	      laststart = b;
282866525Speter	      BUF_PUSH (wordchar);
282966525Speter	      break;
283017721Speter
283117721Speter
283266525Speter	    case 'W':
283366525Speter	      laststart = b;
283466525Speter	      BUF_PUSH (notwordchar);
283566525Speter	      break;
283617721Speter
283717721Speter
283866525Speter	    case '<':
283966525Speter	      BUF_PUSH (wordbeg);
284066525Speter	      break;
284117721Speter
284266525Speter	    case '>':
284366525Speter	      BUF_PUSH (wordend);
284466525Speter	      break;
284517721Speter
284666525Speter	    case 'b':
284766525Speter	      BUF_PUSH (wordbound);
284866525Speter	      break;
284917721Speter
285066525Speter	    case 'B':
285166525Speter	      BUF_PUSH (notwordbound);
285266525Speter	      break;
285317721Speter
285466525Speter	    case '`':
285566525Speter	      BUF_PUSH (begbuf);
285666525Speter	      break;
285717721Speter
285866525Speter	    case '\'':
285966525Speter	      BUF_PUSH (endbuf);
286066525Speter	      break;
286117721Speter
286266525Speter	    case '1': case '2': case '3': case '4': case '5':
286366525Speter	    case '6': case '7': case '8': case '9':
286466525Speter	      if (syntax & RE_NO_BK_REFS)
286566525Speter		goto normal_char;
286617721Speter
286766525Speter	      c1 = c - '0';
286817721Speter
286966525Speter	      if (c1 > regnum)
287066525Speter		FREE_STACK_RETURN (REG_ESUBREG);
287117721Speter
287266525Speter	      /* Can't back reference to a subexpression if inside of it.  */
287366525Speter	      if (group_in_compile_stack (compile_stack, c1))
287466525Speter		goto normal_char;
287517721Speter
287666525Speter	      laststart = b;
287766525Speter	      BUF_PUSH_2 (duplicate, c1);
287866525Speter	      break;
287917721Speter
288017721Speter
288166525Speter	    case '+':
288266525Speter	    case '?':
288366525Speter	      if (syntax & RE_BK_PLUS_QM)
288466525Speter		goto handle_plus;
288566525Speter	      else
288666525Speter		goto normal_backslash;
288717721Speter
288866525Speter	    default:
288966525Speter	    normal_backslash:
289066525Speter	      /* You might think it would be useful for \ to mean
289166525Speter		 not to translate; but if we don't translate it
289266525Speter		 it will never match anything.	*/
289366525Speter	      c = TRANSLATE (c);
289466525Speter	      goto normal_char;
289566525Speter	    }
289666525Speter	  break;
289717721Speter
289817721Speter
289917721Speter	default:
290066525Speter	/* Expects the character in `c'.  */
290117721Speter	normal_char:
290266525Speter	  p1 = p - 1;		/* P1 points the head of C.  */
290366525Speter#ifdef emacs
290466525Speter	  if (bufp->multibyte)
290566525Speter	    {
290666525Speter	      c = STRING_CHAR (p1, pend - p1);
290766525Speter	      c = TRANSLATE (c);
290866525Speter	      /* Set P to the next character boundary.  */
290966525Speter	      p += MULTIBYTE_FORM_LENGTH (p1, pend - p1) - 1;
291066525Speter	    }
291166525Speter#endif
291217721Speter	      /* If no exactn currently being built.  */
291366525Speter	  if (!pending_exact
291417721Speter
291566525Speter	      /* If last exactn not at current position.  */
291666525Speter	      || pending_exact + *pending_exact + 1 != b
291725839Speter
291866525Speter	      /* We have only one byte following the exactn for the count.  */
291966525Speter	      || *pending_exact >= (1 << BYTEWIDTH) - (p - p1)
292017721Speter
292166525Speter	      /* If followed by a repetition operator.	*/
292266525Speter	      || (p != pend && (*p == '*' || *p == '^'))
292317721Speter	      || ((syntax & RE_BK_PLUS_QM)
292466525Speter		  ? p + 1 < pend && *p == '\\' && (p[1] == '+' || p[1] == '?')
292566525Speter		  : p != pend && (*p == '+' || *p == '?'))
292617721Speter	      || ((syntax & RE_INTERVALS)
292766525Speter		  && ((syntax & RE_NO_BK_BRACES)
292866525Speter		      ? p != pend && *p == '{'
292966525Speter		      : p + 1 < pend && p[0] == '\\' && p[1] == '{')))
293017721Speter	    {
293117721Speter	      /* Start building a new exactn.  */
293225839Speter
293366525Speter	      laststart = b;
293417721Speter
293517721Speter	      BUF_PUSH_2 (exactn, 0);
293617721Speter	      pending_exact = b - 1;
293766525Speter	    }
293825839Speter
293966525Speter#ifdef emacs
294066525Speter	  if (! SINGLE_BYTE_CHAR_P (c))
294166525Speter	    {
294266525Speter	      unsigned char work[4], *str;
294366525Speter	      int i = CHAR_STRING (c, work, str);
294466525Speter	      int j;
294566525Speter	      for (j = 0; j < i; j++)
294666525Speter		{
294766525Speter		  BUF_PUSH (str[j]);
294866525Speter		  (*pending_exact)++;
294966525Speter		}
295066525Speter	    }
295166525Speter	  else
295266525Speter#endif
295366525Speter	    {
295466525Speter	      BUF_PUSH (c);
295566525Speter	      (*pending_exact)++;
295666525Speter	    }
295717721Speter	  break;
295866525Speter	} /* switch (c) */
295917721Speter    } /* while p != pend */
296017721Speter
296125839Speter
296217721Speter  /* Through the pattern now.  */
296325839Speter
296417721Speter  if (fixup_alt_jump)
296517721Speter    STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
296617721Speter
296725839Speter  if (!COMPILE_STACK_EMPTY)
296866525Speter    FREE_STACK_RETURN (REG_EPAREN);
296917721Speter
297066525Speter  /* If we don't want backtracking, force success
297166525Speter     the first time we reach the end of the compiled pattern.  */
297266525Speter  if (syntax & RE_NO_POSIX_BACKTRACKING)
297366525Speter    BUF_PUSH (succeed);
297466525Speter
297517721Speter  free (compile_stack.stack);
297617721Speter
297717721Speter  /* We have succeeded; set the length of the buffer.  */
297817721Speter  bufp->used = b - bufp->buffer;
297917721Speter
298017721Speter#ifdef DEBUG
298117721Speter  if (debug)
298217721Speter    {
298366525Speter      DEBUG_PRINT1 ("\nCompiled pattern: \n");
298417721Speter      print_compiled_pattern (bufp);
298517721Speter    }
298617721Speter#endif /* DEBUG */
298717721Speter
298866525Speter#ifndef MATCH_MAY_ALLOCATE
298966525Speter  /* Initialize the failure stack to the largest possible stack.  This
299066525Speter     isn't necessary unless we're trying to avoid calling alloca in
299166525Speter     the search and match routines.  */
299266525Speter  {
299366525Speter    int num_regs = bufp->re_nsub + 1;
299466525Speter
299566525Speter    if (fail_stack.size < re_max_failures * TYPICAL_FAILURE_SIZE)
299666525Speter      {
299766525Speter	fail_stack.size = re_max_failures * TYPICAL_FAILURE_SIZE;
299866525Speter
299966525Speter#ifdef emacs
300066525Speter	if (! fail_stack.stack)
300166525Speter	  fail_stack.stack
300266525Speter	    = (fail_stack_elt_t *) xmalloc (fail_stack.size
300366525Speter					    * sizeof (fail_stack_elt_t));
300466525Speter	else
300566525Speter	  fail_stack.stack
300666525Speter	    = (fail_stack_elt_t *) xrealloc (fail_stack.stack,
300766525Speter					     (fail_stack.size
300866525Speter					      * sizeof (fail_stack_elt_t)));
300966525Speter#else /* not emacs */
301066525Speter	if (! fail_stack.stack)
301166525Speter	  fail_stack.stack
301266525Speter	    = (fail_stack_elt_t *) malloc (fail_stack.size
301366525Speter					   * sizeof (fail_stack_elt_t));
301466525Speter	else
301566525Speter	  fail_stack.stack
301666525Speter	    = (fail_stack_elt_t *) realloc (fail_stack.stack,
301766525Speter					    (fail_stack.size
301866525Speter					     * sizeof (fail_stack_elt_t)));
301966525Speter#endif /* not emacs */
302066525Speter      }
302166525Speter
302266525Speter    regex_grow_registers (num_regs);
302366525Speter  }
302466525Speter#endif /* not MATCH_MAY_ALLOCATE */
302566525Speter
302617721Speter  return REG_NOERROR;
302717721Speter} /* regex_compile */
302817721Speter
302917721Speter/* Subroutines for `regex_compile'.  */
303017721Speter
303166525Speter/* Store OP at LOC followed by two-byte integer parameter ARG.	*/
303217721Speter
303317721Speterstatic void
303417721Speterstore_op1 (op, loc, arg)
303517721Speter    re_opcode_t op;
303617721Speter    unsigned char *loc;
303717721Speter    int arg;
303817721Speter{
303917721Speter  *loc = (unsigned char) op;
304017721Speter  STORE_NUMBER (loc + 1, arg);
304117721Speter}
304217721Speter
304317721Speter
304417721Speter/* Like `store_op1', but for two two-byte parameters ARG1 and ARG2.  */
304517721Speter
304617721Speterstatic void
304717721Speterstore_op2 (op, loc, arg1, arg2)
304817721Speter    re_opcode_t op;
304917721Speter    unsigned char *loc;
305017721Speter    int arg1, arg2;
305117721Speter{
305217721Speter  *loc = (unsigned char) op;
305317721Speter  STORE_NUMBER (loc + 1, arg1);
305417721Speter  STORE_NUMBER (loc + 3, arg2);
305517721Speter}
305617721Speter
305717721Speter
305817721Speter/* Copy the bytes from LOC to END to open up three bytes of space at LOC
305917721Speter   for OP followed by two-byte integer parameter ARG.  */
306017721Speter
306117721Speterstatic void
306217721Speterinsert_op1 (op, loc, arg, end)
306317721Speter    re_opcode_t op;
306417721Speter    unsigned char *loc;
306517721Speter    int arg;
306625839Speter    unsigned char *end;
306717721Speter{
306817721Speter  register unsigned char *pfrom = end;
306917721Speter  register unsigned char *pto = end + 3;
307017721Speter
307117721Speter  while (pfrom != loc)
307217721Speter    *--pto = *--pfrom;
307325839Speter
307417721Speter  store_op1 (op, loc, arg);
307517721Speter}
307617721Speter
307717721Speter
307817721Speter/* Like `insert_op1', but for two two-byte parameters ARG1 and ARG2.  */
307917721Speter
308017721Speterstatic void
308117721Speterinsert_op2 (op, loc, arg1, arg2, end)
308217721Speter    re_opcode_t op;
308317721Speter    unsigned char *loc;
308417721Speter    int arg1, arg2;
308525839Speter    unsigned char *end;
308617721Speter{
308717721Speter  register unsigned char *pfrom = end;
308817721Speter  register unsigned char *pto = end + 5;
308917721Speter
309017721Speter  while (pfrom != loc)
309117721Speter    *--pto = *--pfrom;
309225839Speter
309317721Speter  store_op2 (op, loc, arg1, arg2);
309417721Speter}
309517721Speter
309617721Speter
309717721Speter/* P points to just after a ^ in PATTERN.  Return true if that ^ comes
309817721Speter   after an alternative or a begin-subexpression.  We assume there is at
309917721Speter   least one character before the ^.  */
310017721Speter
310117721Speterstatic boolean
310217721Speterat_begline_loc_p (pattern, p, syntax)
310317721Speter    const char *pattern, *p;
310417721Speter    reg_syntax_t syntax;
310517721Speter{
310617721Speter  const char *prev = p - 2;
310717721Speter  boolean prev_prev_backslash = prev > pattern && prev[-1] == '\\';
310825839Speter
310917721Speter  return
311017721Speter       /* After a subexpression?  */
311117721Speter       (*prev == '(' && (syntax & RE_NO_BK_PARENS || prev_prev_backslash))
311266525Speter       /* After an alternative?	 */
311317721Speter    || (*prev == '|' && (syntax & RE_NO_BK_VBAR || prev_prev_backslash));
311417721Speter}
311517721Speter
311617721Speter
311717721Speter/* The dual of at_begline_loc_p.  This one is for $.  We assume there is
311817721Speter   at least one character after the $, i.e., `P < PEND'.  */
311917721Speter
312017721Speterstatic boolean
312117721Speterat_endline_loc_p (p, pend, syntax)
312217721Speter    const char *p, *pend;
312317721Speter    int syntax;
312417721Speter{
312517721Speter  const char *next = p;
312617721Speter  boolean next_backslash = *next == '\\';
312766525Speter  const char *next_next = p + 1 < pend ? p + 1 : 0;
312866525Speter
312917721Speter  return
313017721Speter       /* Before a subexpression?  */
313117721Speter       (syntax & RE_NO_BK_PARENS ? *next == ')'
313266525Speter	: next_backslash && next_next && *next_next == ')')
313317721Speter       /* Before an alternative?  */
313417721Speter    || (syntax & RE_NO_BK_VBAR ? *next == '|'
313566525Speter	: next_backslash && next_next && *next_next == '|');
313617721Speter}
313717721Speter
313817721Speter
313966525Speter/* Returns true if REGNUM is in one of COMPILE_STACK's elements and
314017721Speter   false if it's not.  */
314117721Speter
314217721Speterstatic boolean
314317721Spetergroup_in_compile_stack (compile_stack, regnum)
314417721Speter    compile_stack_type compile_stack;
314517721Speter    regnum_t regnum;
314617721Speter{
314717721Speter  int this_element;
314817721Speter
314966525Speter  for (this_element = compile_stack.avail - 1;
315066525Speter       this_element >= 0;
315117721Speter       this_element--)
315217721Speter    if (compile_stack.stack[this_element].regnum == regnum)
315317721Speter      return true;
315417721Speter
315517721Speter  return false;
315617721Speter}
315717721Speter
315817721Speter/* re_compile_fastmap computes a ``fastmap'' for the compiled pattern in
315917721Speter   BUFP.  A fastmap records which of the (1 << BYTEWIDTH) possible
316017721Speter   characters can start a string that matches the pattern.  This fastmap
316117721Speter   is used by re_search to skip quickly over impossible starting points.
316217721Speter
316317721Speter   The caller must supply the address of a (1 << BYTEWIDTH)-byte data
316417721Speter   area as BUFP->fastmap.
316525839Speter
316617721Speter   We set the `fastmap', `fastmap_accurate', and `can_be_null' fields in
316717721Speter   the pattern buffer.
316817721Speter
316917721Speter   Returns 0 if we succeed, -2 if an internal error.   */
317017721Speter
317117721Speterint
317217721Speterre_compile_fastmap (bufp)
317317721Speter     struct re_pattern_buffer *bufp;
317417721Speter{
317566525Speter  int i, j, k;
317666525Speter#ifdef MATCH_MAY_ALLOCATE
317717721Speter  fail_stack_type fail_stack;
317866525Speter#endif
317917721Speter#ifndef REGEX_MALLOC
318017721Speter  char *destination;
318117721Speter#endif
318217721Speter  /* We don't push any register information onto the failure stack.  */
318317721Speter  unsigned num_regs = 0;
318425839Speter
318517721Speter  register char *fastmap = bufp->fastmap;
318617721Speter  unsigned char *pattern = bufp->buffer;
318717721Speter  unsigned long size = bufp->used;
318825839Speter  unsigned char *p = pattern;
318917721Speter  register unsigned char *pend = pattern + size;
319017721Speter
319166525Speter  /* This holds the pointer to the failure stack, when
319266525Speter     it is allocated relocatably.  */
319366525Speter  fail_stack_elt_t *failure_stack_ptr;
319466525Speter
319517721Speter  /* Assume that each path through the pattern can be null until
319666525Speter     proven otherwise.	We set this false at the bottom of switch
319717721Speter     statement, to which we get only if a particular path doesn't
319817721Speter     match the empty string.  */
319917721Speter  boolean path_can_be_null = true;
320017721Speter
320117721Speter  /* We aren't doing a `succeed_n' to begin with.  */
320217721Speter  boolean succeed_n_p = false;
320317721Speter
320466525Speter  /* If all elements for base leading-codes in fastmap is set, this
320566525Speter     flag is set true.	*/
320666525Speter  boolean match_any_multibyte_characters = false;
320766525Speter
320866525Speter  /* Maximum code of simple (single byte) character. */
320966525Speter  int simple_char_max;
321066525Speter
321117721Speter  assert (fastmap != NULL && p != NULL);
321225839Speter
321317721Speter  INIT_FAIL_STACK ();
321466525Speter  bzero (fastmap, 1 << BYTEWIDTH);  /* Assume nothing's valid.	*/
321517721Speter  bufp->fastmap_accurate = 1;	    /* It will be when we're done.  */
321617721Speter  bufp->can_be_null = 0;
321725839Speter
321866525Speter  while (1)
321917721Speter    {
322066525Speter      if (p == pend || *p == succeed)
322166525Speter	{
322266525Speter	  /* We have reached the (effective) end of pattern.  */
322366525Speter	  if (!FAIL_STACK_EMPTY ())
322466525Speter	    {
322566525Speter	      bufp->can_be_null |= path_can_be_null;
322666525Speter
322766525Speter	      /* Reset for next path.  */
322866525Speter	      path_can_be_null = true;
322966525Speter
323066525Speter	      p = fail_stack.stack[--fail_stack.avail].pointer;
323166525Speter
323266525Speter	      continue;
323366525Speter	    }
323466525Speter	  else
323566525Speter	    break;
323617721Speter	}
323717721Speter
323866525Speter      /* We should never be about to go beyond the end of the pattern.	*/
323917721Speter      assert (p < pend);
324066525Speter
324166525Speter      switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++))
324217721Speter	{
324317721Speter
324466525Speter	/* I guess the idea here is to simply not bother with a fastmap
324566525Speter	   if a backreference is used, since it's too hard to figure out
324666525Speter	   the fastmap for the corresponding group.  Setting
324766525Speter	   `can_be_null' stops `re_search_2' from using the fastmap, so
324866525Speter	   that is all we do.  */
324917721Speter	case duplicate:
325017721Speter	  bufp->can_be_null = 1;
325166525Speter	  goto done;
325217721Speter
325317721Speter
325417721Speter      /* Following are the cases which match a character.  These end
325566525Speter	 with `break'.	*/
325617721Speter
325717721Speter	case exactn:
325866525Speter	  fastmap[p[1]] = 1;
325917721Speter	  break;
326017721Speter
326117721Speter
326266525Speter#ifndef emacs
326366525Speter	case charset:
326466525Speter	  for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
326517721Speter	    if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))
326666525Speter	      fastmap[j] = 1;
326717721Speter	  break;
326817721Speter
326917721Speter
327017721Speter	case charset_not:
327117721Speter	  /* Chars beyond end of map must be allowed.  */
327217721Speter	  for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++)
327366525Speter	    fastmap[j] = 1;
327417721Speter
327517721Speter	  for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
327617721Speter	    if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))))
327766525Speter	      fastmap[j] = 1;
327866525Speter	  break;
327917721Speter
328017721Speter
328117721Speter	case wordchar:
328217721Speter	  for (j = 0; j < (1 << BYTEWIDTH); j++)
328317721Speter	    if (SYNTAX (j) == Sword)
328417721Speter	      fastmap[j] = 1;
328517721Speter	  break;
328617721Speter
328717721Speter
328817721Speter	case notwordchar:
328917721Speter	  for (j = 0; j < (1 << BYTEWIDTH); j++)
329017721Speter	    if (SYNTAX (j) != Sword)
329117721Speter	      fastmap[j] = 1;
329217721Speter	  break;
329366525Speter#else  /* emacs */
329466525Speter	case charset:
329566525Speter	  for (j = CHARSET_BITMAP_SIZE (&p[-1]) * BYTEWIDTH - 1, p++;
329666525Speter	       j >= 0; j--)
329766525Speter	    if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))
329866525Speter	      fastmap[j] = 1;
329917721Speter
330066525Speter	  if (CHARSET_RANGE_TABLE_EXISTS_P (&p[-2])
330166525Speter	      && match_any_multibyte_characters == false)
330266525Speter	    {
330366525Speter	      /* Set fastmap[I] 1 where I is a base leading code of each
330466525Speter		 multibyte character in the range table. */
330566525Speter	      int c, count;
330617721Speter
330766525Speter	      /* Make P points the range table. */
330866525Speter	      p += CHARSET_BITMAP_SIZE (&p[-2]);
330917721Speter
331066525Speter	      /* Extract the number of ranges in range table into
331166525Speter		 COUNT.	 */
331266525Speter	      EXTRACT_NUMBER_AND_INCR (count, p);
331366525Speter	      for (; count > 0; count--, p += 2 * 3) /* XXX */
331466525Speter		{
331566525Speter		  /* Extract the start of each range.  */
331666525Speter		  EXTRACT_CHARACTER (c, p);
331766525Speter		  j = CHAR_CHARSET (c);
331866525Speter		  fastmap[CHARSET_LEADING_CODE_BASE (j)] = 1;
331966525Speter		}
332066525Speter	    }
332166525Speter	  break;
332217721Speter
332317721Speter
332466525Speter	case charset_not:
332566525Speter	  /* Chars beyond end of bitmap are possible matches.
332666525Speter	     All the single-byte codes can occur in multibyte buffers.
332766525Speter	     So any that are not listed in the charset
332866525Speter	     are possible matches, even in multibyte buffers.  */
332966525Speter	  simple_char_max = (1 << BYTEWIDTH);
333066525Speter	  for (j = CHARSET_BITMAP_SIZE (&p[-1]) * BYTEWIDTH;
333166525Speter	       j < simple_char_max; j++)
333266525Speter	    fastmap[j] = 1;
333366525Speter
333466525Speter	  for (j = CHARSET_BITMAP_SIZE (&p[-1]) * BYTEWIDTH - 1, p++;
333566525Speter	       j >= 0; j--)
333666525Speter	    if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))))
333766525Speter	      fastmap[j] = 1;
333866525Speter
333966525Speter	  if (bufp->multibyte)
334066525Speter	    /* Any character set can possibly contain a character
334166525Speter	       which doesn't match the specified set of characters.  */
334266525Speter	    {
334366525Speter	    set_fastmap_for_multibyte_characters:
334466525Speter	      if (match_any_multibyte_characters == false)
334566525Speter		{
334666525Speter		  for (j = 0x80; j < 0xA0; j++)	/* XXX */
334766525Speter		    if (BASE_LEADING_CODE_P (j))
334866525Speter		      fastmap[j] = 1;
334966525Speter		  match_any_multibyte_characters = true;
335066525Speter		}
335166525Speter	    }
335217721Speter	  break;
335317721Speter
335417721Speter
335566525Speter	case wordchar:
335666525Speter	  /* All the single-byte codes can occur in multibyte buffers,
335766525Speter	     and they may have word syntax.  So do consider them.  */
335866525Speter	  simple_char_max = (1 << BYTEWIDTH);
335966525Speter	  for (j = 0; j < simple_char_max; j++)
336066525Speter	    if (SYNTAX (j) == Sword)
336166525Speter	      fastmap[j] = 1;
336266525Speter
336366525Speter	  if (bufp->multibyte)
336466525Speter	    /* Any character set can possibly contain a character
336566525Speter	       whose syntax is `Sword'.	 */
336666525Speter	    goto set_fastmap_for_multibyte_characters;
336766525Speter	  break;
336866525Speter
336966525Speter
337066525Speter	case notwordchar:
337166525Speter	  /* All the single-byte codes can occur in multibyte buffers,
337266525Speter	     and they may not have word syntax.  So do consider them.  */
337366525Speter	  simple_char_max = (1 << BYTEWIDTH);
337466525Speter	  for (j = 0; j < simple_char_max; j++)
337566525Speter	    if (SYNTAX (j) != Sword)
337666525Speter	      fastmap[j] = 1;
337766525Speter
337866525Speter	  if (bufp->multibyte)
337966525Speter	    /* Any character set can possibly contain a character
338066525Speter	       whose syntax is not `Sword'.  */
338166525Speter	    goto set_fastmap_for_multibyte_characters;
338266525Speter	  break;
338366525Speter#endif
338466525Speter
338566525Speter	case anychar:
338666525Speter	  {
338766525Speter	    int fastmap_newline = fastmap['\n'];
338866525Speter
338966525Speter	    /* `.' matches anything, except perhaps newline.
339066525Speter	       Even in a multibyte buffer, it should match any
339166525Speter	       conceivable byte value for the fastmap.  */
339266525Speter	    if (bufp->multibyte)
339366525Speter	      match_any_multibyte_characters = true;
339466525Speter
339566525Speter	    simple_char_max = (1 << BYTEWIDTH);
339666525Speter	    for (j = 0; j < simple_char_max; j++)
339766525Speter	      fastmap[j] = 1;
339866525Speter
339966525Speter	    /* ... except perhaps newline.  */
340066525Speter	    if (!(bufp->syntax & RE_DOT_NEWLINE))
340166525Speter	      fastmap['\n'] = fastmap_newline;
340266525Speter
340366525Speter	    /* Return if we have already set `can_be_null'; if we have,
340466525Speter	       then the fastmap is irrelevant.	Something's wrong here.	 */
340566525Speter	    else if (bufp->can_be_null)
340666525Speter	      goto done;
340766525Speter
340866525Speter	    /* Otherwise, have to check alternative paths.  */
340966525Speter	    break;
341066525Speter	  }
341166525Speter
341217721Speter#ifdef emacs
341366525Speter	case wordbound:
341466525Speter	case notwordbound:
341566525Speter	case wordbeg:
341666525Speter	case wordend:
341766525Speter	case notsyntaxspec:
341866525Speter	case syntaxspec:
341966525Speter	  /* This match depends on text properties.  These end with
342066525Speter	     aborting optimizations.  */
342166525Speter	  bufp->can_be_null = 1;
342266525Speter	  goto done;
342366525Speter#if 0
342417721Speter	  k = *p++;
342566525Speter	  simple_char_max = bufp->multibyte ? 0x80 : (1 << BYTEWIDTH);
342666525Speter	  for (j = 0; j < simple_char_max; j++)
342717721Speter	    if (SYNTAX (j) == (enum syntaxcode) k)
342817721Speter	      fastmap[j] = 1;
342966525Speter
343066525Speter	  if (bufp->multibyte)
343166525Speter	    /* Any character set can possibly contain a character
343266525Speter	       whose syntax is K.  */
343366525Speter	    goto set_fastmap_for_multibyte_characters;
343417721Speter	  break;
343517721Speter
343617721Speter	case notsyntaxspec:
343717721Speter	  k = *p++;
343866525Speter	  simple_char_max = bufp->multibyte ? 0x80 : (1 << BYTEWIDTH);
343966525Speter	  for (j = 0; j < simple_char_max; j++)
344017721Speter	    if (SYNTAX (j) != (enum syntaxcode) k)
344117721Speter	      fastmap[j] = 1;
344266525Speter
344366525Speter	  if (bufp->multibyte)
344466525Speter	    /* Any character set can possibly contain a character
344566525Speter	       whose syntax is not K.  */
344666525Speter	    goto set_fastmap_for_multibyte_characters;
344717721Speter	  break;
344866525Speter#endif
344917721Speter
345017721Speter
345166525Speter	case categoryspec:
345266525Speter	  k = *p++;
345366525Speter	  simple_char_max = (1 << BYTEWIDTH);
345466525Speter	  for (j = 0; j < simple_char_max; j++)
345566525Speter	    if (CHAR_HAS_CATEGORY (j, k))
345666525Speter	      fastmap[j] = 1;
345766525Speter
345866525Speter	  if (bufp->multibyte)
345966525Speter	    /* Any character set can possibly contain a character
346066525Speter	       whose category is K.  */
346166525Speter	    goto set_fastmap_for_multibyte_characters;
346266525Speter	  break;
346366525Speter
346466525Speter
346566525Speter	case notcategoryspec:
346666525Speter	  k = *p++;
346766525Speter	  simple_char_max = (1 << BYTEWIDTH);
346866525Speter	  for (j = 0; j < simple_char_max; j++)
346966525Speter	    if (!CHAR_HAS_CATEGORY (j, k))
347066525Speter	      fastmap[j] = 1;
347166525Speter
347266525Speter	  if (bufp->multibyte)
347366525Speter	    /* Any character set can possibly contain a character
347466525Speter	       whose category is not K.	 */
347566525Speter	    goto set_fastmap_for_multibyte_characters;
347666525Speter	  break;
347766525Speter
347817721Speter      /* All cases after this match the empty string.  These end with
347966525Speter	 `continue'.  */
348017721Speter
348117721Speter
348217721Speter	case before_dot:
348317721Speter	case at_dot:
348417721Speter	case after_dot:
348566525Speter	  continue;
348666525Speter#endif /* emacs */
348717721Speter
348817721Speter
348966525Speter	case no_op:
349066525Speter	case begline:
349166525Speter	case endline:
349217721Speter	case begbuf:
349317721Speter	case endbuf:
349466525Speter#ifndef emacs
349517721Speter	case wordbound:
349617721Speter	case notwordbound:
349717721Speter	case wordbeg:
349817721Speter	case wordend:
349966525Speter#endif
350066525Speter	case push_dummy_failure:
350166525Speter	  continue;
350217721Speter
350317721Speter
350417721Speter	case jump_n:
350566525Speter	case pop_failure_jump:
350617721Speter	case maybe_pop_jump:
350717721Speter	case jump:
350866525Speter	case jump_past_alt:
350917721Speter	case dummy_failure_jump:
351066525Speter	  EXTRACT_NUMBER_AND_INCR (j, p);
351125839Speter	  p += j;
351217721Speter	  if (j > 0)
351317721Speter	    continue;
351425839Speter
351566525Speter	  /* Jump backward implies we just went through the body of a
351666525Speter	     loop and matched nothing.	Opcode jumped to should be
351766525Speter	     `on_failure_jump' or `succeed_n'.	Just treat it like an
351866525Speter	     ordinary jump.  For a * loop, it has pushed its failure
351966525Speter	     point already; if so, discard that as redundant.  */
352066525Speter	  if ((re_opcode_t) *p != on_failure_jump
352117721Speter	      && (re_opcode_t) *p != succeed_n)
352217721Speter	    continue;
352317721Speter
352466525Speter	  p++;
352566525Speter	  EXTRACT_NUMBER_AND_INCR (j, p);
352666525Speter	  p += j;
352725839Speter
352866525Speter	  /* If what's on the stack is where we are now, pop it.  */
352966525Speter	  if (!FAIL_STACK_EMPTY ()
353025839Speter	      && fail_stack.stack[fail_stack.avail - 1].pointer == p)
353166525Speter	    fail_stack.avail--;
353217721Speter
353366525Speter	  continue;
353417721Speter
353517721Speter
353666525Speter	case on_failure_jump:
353766525Speter	case on_failure_keep_string_jump:
353817721Speter	handle_on_failure_jump:
353966525Speter	  EXTRACT_NUMBER_AND_INCR (j, p);
354017721Speter
354166525Speter	  /* For some patterns, e.g., `(a?)?', `p+j' here points to the
354266525Speter	     end of the pattern.  We don't want to push such a point,
354366525Speter	     since when we restore it above, entering the switch will
354466525Speter	     increment `p' past the end of the pattern.	 We don't need
354566525Speter	     to push such a point since we obviously won't find any more
354666525Speter	     fastmap entries beyond `pend'.  Such a pattern can match
354766525Speter	     the null string, though.  */
354866525Speter	  if (p + j < pend)
354966525Speter	    {
355066525Speter	      if (!PUSH_PATTERN_OP (p + j, fail_stack))
355166525Speter		{
355266525Speter		  RESET_FAIL_STACK ();
355366525Speter		  return -2;
355466525Speter		}
355566525Speter	    }
355666525Speter	  else
355766525Speter	    bufp->can_be_null = 1;
355817721Speter
355966525Speter	  if (succeed_n_p)
356066525Speter	    {
356166525Speter	      EXTRACT_NUMBER_AND_INCR (k, p);	/* Skip the n.	*/
356266525Speter	      succeed_n_p = false;
356317721Speter	    }
356417721Speter
356566525Speter	  continue;
356617721Speter
356717721Speter
356817721Speter	case succeed_n:
356966525Speter	  /* Get to the number of times to succeed.  */
357066525Speter	  p += 2;
357117721Speter
357266525Speter	  /* Increment p past the n for when k != 0.  */
357366525Speter	  EXTRACT_NUMBER_AND_INCR (k, p);
357466525Speter	  if (k == 0)
357517721Speter	    {
357666525Speter	      p -= 4;
357766525Speter	      succeed_n_p = true;  /* Spaghetti code alert.  */
357866525Speter	      goto handle_on_failure_jump;
357966525Speter	    }
358066525Speter	  continue;
358117721Speter
358217721Speter
358317721Speter	case set_number_at:
358466525Speter	  p += 4;
358566525Speter	  continue;
358617721Speter
358717721Speter
358817721Speter	case start_memory:
358966525Speter	case stop_memory:
359017721Speter	  p += 2;
359117721Speter	  continue;
359217721Speter
359317721Speter
359417721Speter	default:
359566525Speter	  abort (); /* We have listed all the cases.  */
359666525Speter	} /* switch *p++ */
359717721Speter
359817721Speter      /* Getting here means we have found the possible starting
359966525Speter	 characters for one path of the pattern -- and that the empty
360066525Speter	 string does not match.	 We need not follow this path further.
360166525Speter	 Instead, look at the next alternative (remembered on the
360266525Speter	 stack), or quit if no more.  The test at the top of the loop
360366525Speter	 does these things.  */
360417721Speter      path_can_be_null = false;
360517721Speter      p = pend;
360617721Speter    } /* while p */
360717721Speter
360817721Speter  /* Set `can_be_null' for the last path (also the first path, if the
360966525Speter     pattern is empty).	 */
361017721Speter  bufp->can_be_null |= path_can_be_null;
361166525Speter
361266525Speter done:
361366525Speter  RESET_FAIL_STACK ();
361417721Speter  return 0;
361517721Speter} /* re_compile_fastmap */
361617721Speter
361717721Speter/* Set REGS to hold NUM_REGS registers, storing them in STARTS and
361817721Speter   ENDS.  Subsequent matches using PATTERN_BUFFER and REGS will use
361917721Speter   this memory for recording register information.  STARTS and ENDS
362017721Speter   must be allocated using the malloc library routine, and must each
362117721Speter   be at least NUM_REGS * sizeof (regoff_t) bytes long.
362217721Speter
362317721Speter   If NUM_REGS == 0, then subsequent matches should allocate their own
362417721Speter   register data.
362517721Speter
362617721Speter   Unless this function is called, the first search or match using
362717721Speter   PATTERN_BUFFER will allocate its own register data, without
362817721Speter   freeing the old data.  */
362917721Speter
363017721Spetervoid
363117721Speterre_set_registers (bufp, regs, num_regs, starts, ends)
363217721Speter    struct re_pattern_buffer *bufp;
363317721Speter    struct re_registers *regs;
363417721Speter    unsigned num_regs;
363517721Speter    regoff_t *starts, *ends;
363617721Speter{
363717721Speter  if (num_regs)
363817721Speter    {
363917721Speter      bufp->regs_allocated = REGS_REALLOCATE;
364017721Speter      regs->num_regs = num_regs;
364117721Speter      regs->start = starts;
364217721Speter      regs->end = ends;
364317721Speter    }
364417721Speter  else
364517721Speter    {
364617721Speter      bufp->regs_allocated = REGS_UNALLOCATED;
364717721Speter      regs->num_regs = 0;
364817721Speter      regs->start = regs->end = (regoff_t *) 0;
364917721Speter    }
365017721Speter}
365117721Speter
365266525Speter/* Searching routines.	*/
365317721Speter
365417721Speter/* Like re_search_2, below, but only one string is specified, and
365517721Speter   doesn't let you say where to stop matching. */
365617721Speter
365717721Speterint
365817721Speterre_search (bufp, string, size, startpos, range, regs)
365917721Speter     struct re_pattern_buffer *bufp;
366017721Speter     const char *string;
366117721Speter     int size, startpos, range;
366217721Speter     struct re_registers *regs;
366317721Speter{
366425839Speter  return re_search_2 (bufp, NULL, 0, string, size, startpos, range,
366517721Speter		      regs, size);
366617721Speter}
366717721Speter
366866525Speter/* End address of virtual concatenation of string.  */
366966525Speter#define STOP_ADDR_VSTRING(P)				\
367066525Speter  (((P) >= size1 ? string2 + size2 : string1 + size1))
367117721Speter
367266525Speter/* Address of POS in the concatenation of virtual string. */
367366525Speter#define POS_ADDR_VSTRING(POS)					\
367466525Speter  (((POS) >= size1 ? string2 - size1 : string1) + (POS))
367566525Speter
367617721Speter/* Using the compiled pattern in BUFP->buffer, first tries to match the
367717721Speter   virtual concatenation of STRING1 and STRING2, starting first at index
367817721Speter   STARTPOS, then at STARTPOS + 1, and so on.
367925839Speter
368017721Speter   STRING1 and STRING2 have length SIZE1 and SIZE2, respectively.
368125839Speter
368217721Speter   RANGE is how far to scan while trying to match.  RANGE = 0 means try
368317721Speter   only at STARTPOS; in general, the last start tried is STARTPOS +
368417721Speter   RANGE.
368525839Speter
368617721Speter   In REGS, return the indices of the virtual concatenation of STRING1
368717721Speter   and STRING2 that matched the entire BUFP->buffer and its contained
368817721Speter   subexpressions.
368925839Speter
369017721Speter   Do not consider matching one past the index STOP in the virtual
369117721Speter   concatenation of STRING1 and STRING2.
369217721Speter
369317721Speter   We return either the position in the strings at which the match was
369417721Speter   found, -1 if no match, or -2 if error (such as failure
369517721Speter   stack overflow).  */
369617721Speter
369717721Speterint
369817721Speterre_search_2 (bufp, string1, size1, string2, size2, startpos, range, regs, stop)
369917721Speter     struct re_pattern_buffer *bufp;
370017721Speter     const char *string1, *string2;
370117721Speter     int size1, size2;
370217721Speter     int startpos;
370317721Speter     int range;
370417721Speter     struct re_registers *regs;
370517721Speter     int stop;
370617721Speter{
370717721Speter  int val;
370817721Speter  register char *fastmap = bufp->fastmap;
370966525Speter  register RE_TRANSLATE_TYPE translate = bufp->translate;
371017721Speter  int total_size = size1 + size2;
371117721Speter  int endpos = startpos + range;
371266525Speter  int anchored_start = 0;
371317721Speter
371466525Speter  /* Nonzero if we have to concern multibyte character.	 */
371566525Speter  int multibyte = bufp->multibyte;
371666525Speter
371717721Speter  /* Check for out-of-range STARTPOS.  */
371817721Speter  if (startpos < 0 || startpos > total_size)
371917721Speter    return -1;
372025839Speter
372117721Speter  /* Fix up RANGE if it might eventually take us outside
372266525Speter     the virtual concatenation of STRING1 and STRING2.
372366525Speter     Make sure we won't move STARTPOS below 0 or above TOTAL_SIZE.  */
372466525Speter  if (endpos < 0)
372566525Speter    range = 0 - startpos;
372617721Speter  else if (endpos > total_size)
372717721Speter    range = total_size - startpos;
372817721Speter
372917721Speter  /* If the search isn't to be a backwards one, don't waste time in a
373066525Speter     search for a pattern anchored at beginning of buffer.  */
373117721Speter  if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == begbuf && range > 0)
373217721Speter    {
373317721Speter      if (startpos > 0)
373417721Speter	return -1;
373517721Speter      else
373666525Speter	range = 0;
373717721Speter    }
373817721Speter
373925839Speter#ifdef emacs
374025839Speter  /* In a forward search for something that starts with \=.
374125839Speter     don't keep searching past point.  */
374225839Speter  if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == at_dot && range > 0)
374325839Speter    {
374466525Speter      range = PT_BYTE - BEGV_BYTE - startpos;
374566525Speter      if (range < 0)
374625839Speter	return -1;
374725839Speter    }
374825839Speter#endif /* emacs */
374925839Speter
375017721Speter  /* Update the fastmap now if not correct already.  */
375117721Speter  if (fastmap && !bufp->fastmap_accurate)
375217721Speter    if (re_compile_fastmap (bufp) == -2)
375317721Speter      return -2;
375425839Speter
375566525Speter  /* See whether the pattern is anchored.  */
375666525Speter  if (bufp->buffer[0] == begline)
375766525Speter    anchored_start = 1;
375866525Speter
375966525Speter#ifdef emacs
376066525Speter  gl_state.object = re_match_object;
376166525Speter  {
376266525Speter    int adjpos = NILP (re_match_object) || BUFFERP (re_match_object);
376366525Speter    int charpos = SYNTAX_TABLE_BYTE_TO_CHAR (startpos + adjpos);
376466525Speter
376566525Speter    SETUP_SYNTAX_TABLE_FOR_OBJECT (re_match_object, charpos, 1);
376666525Speter  }
376766525Speter#endif
376866525Speter
376917721Speter  /* Loop through the string, looking for a place to start matching.  */
377017721Speter  for (;;)
377125839Speter    {
377266525Speter      /* If the pattern is anchored,
377366525Speter	 skip quickly past places we cannot match.
377466525Speter	 We don't bother to treat startpos == 0 specially
377566525Speter	 because that case doesn't repeat.  */
377666525Speter      if (anchored_start && startpos > 0)
377766525Speter	{
377866525Speter	  if (! (bufp->newline_anchor
377966525Speter		 && ((startpos <= size1 ? string1[startpos - 1]
378066525Speter		      : string2[startpos - size1 - 1])
378166525Speter		     == '\n')))
378266525Speter	    goto advance;
378366525Speter	}
378466525Speter
378517721Speter      /* If a fastmap is supplied, skip quickly over characters that
378666525Speter	 cannot be the start of a match.  If the pattern can match the
378766525Speter	 null string, however, we don't need to skip characters; we want
378866525Speter	 the first null string.	 */
378917721Speter      if (fastmap && startpos < total_size && !bufp->can_be_null)
379017721Speter	{
379166525Speter	  register const char *d;
379266525Speter	  register unsigned int buf_ch;
379366525Speter
379466525Speter	  d = POS_ADDR_VSTRING (startpos);
379566525Speter
379666525Speter	  if (range > 0)	/* Searching forwards.	*/
379717721Speter	    {
379817721Speter	      register int lim = 0;
379917721Speter	      int irange = range;
380017721Speter
380166525Speter	      if (startpos < size1 && startpos + range >= size1)
380266525Speter		lim = range - (size1 - startpos);
380317721Speter
380466525Speter	      /* Written out as an if-else to avoid testing `translate'
380566525Speter		 inside the loop.  */
380666525Speter	      if (RE_TRANSLATE_P (translate))
380766525Speter		{
380866525Speter		  if (multibyte)
380966525Speter		    while (range > lim)
381066525Speter		      {
381166525Speter			int buf_charlen;
381225839Speter
381366525Speter			buf_ch = STRING_CHAR_AND_LENGTH (d, range - lim,
381466525Speter							 buf_charlen);
381566525Speter
381666525Speter			buf_ch = RE_TRANSLATE (translate, buf_ch);
381766525Speter			if (buf_ch >= 0400
381866525Speter			    || fastmap[buf_ch])
381966525Speter			  break;
382066525Speter
382166525Speter			range -= buf_charlen;
382266525Speter			d += buf_charlen;
382366525Speter		      }
382466525Speter		  else
382566525Speter		    while (range > lim
382666525Speter			   && !fastmap[(unsigned char)
382766525Speter				       RE_TRANSLATE (translate, (unsigned char) *d)])
382866525Speter		      {
382966525Speter			d++;
383066525Speter			range--;
383166525Speter		      }
383266525Speter		}
383317721Speter	      else
383466525Speter		while (range > lim && !fastmap[(unsigned char) *d])
383566525Speter		  {
383666525Speter		    d++;
383766525Speter		    range--;
383866525Speter		  }
383917721Speter
384017721Speter	      startpos += irange - range;
384117721Speter	    }
384266525Speter	  else				/* Searching backwards.	 */
384317721Speter	    {
384466525Speter	      int room = (size1 == 0 || startpos >= size1
384566525Speter			  ? size2 + size1 - startpos
384666525Speter			  : size1 - startpos);
384717721Speter
384866525Speter	      buf_ch = STRING_CHAR (d, room);
384966525Speter	      if (RE_TRANSLATE_P (translate))
385066525Speter		buf_ch = RE_TRANSLATE (translate, buf_ch);
385166525Speter
385266525Speter	      if (! (buf_ch >= 0400
385366525Speter		     || fastmap[buf_ch]))
385417721Speter		goto advance;
385517721Speter	    }
385617721Speter	}
385717721Speter
385817721Speter      /* If can't match the null string, and that's all we have left, fail.  */
385917721Speter      if (range >= 0 && startpos == total_size && fastmap
386066525Speter	  && !bufp->can_be_null)
386117721Speter	return -1;
386217721Speter
386366525Speter      val = re_match_2_internal (bufp, string1, size1, string2, size2,
386466525Speter				 startpos, regs, stop);
386566525Speter#ifndef REGEX_MALLOC
386666525Speter#ifdef C_ALLOCA
386766525Speter      alloca (0);
386866525Speter#endif
386966525Speter#endif
387066525Speter
387117721Speter      if (val >= 0)
387217721Speter	return startpos;
387325839Speter
387417721Speter      if (val == -2)
387517721Speter	return -2;
387617721Speter
387717721Speter    advance:
387825839Speter      if (!range)
387966525Speter	break;
388025839Speter      else if (range > 0)
388166525Speter	{
388266525Speter	  /* Update STARTPOS to the next character boundary.  */
388366525Speter	  if (multibyte)
388466525Speter	    {
388566525Speter	      const unsigned char *p
388666525Speter		= (const unsigned char *) POS_ADDR_VSTRING (startpos);
388766525Speter	      const unsigned char *pend
388866525Speter		= (const unsigned char *) STOP_ADDR_VSTRING (startpos);
388966525Speter	      int len = MULTIBYTE_FORM_LENGTH (p, pend - p);
389066525Speter
389166525Speter	      range -= len;
389266525Speter	      if (range < 0)
389366525Speter		break;
389466525Speter	      startpos += len;
389566525Speter	    }
389666525Speter	  else
389766525Speter	    {
389866525Speter	      range--;
389966525Speter	      startpos++;
390066525Speter	    }
390166525Speter	}
390217721Speter      else
390366525Speter	{
390466525Speter	  range++;
390566525Speter	  startpos--;
390666525Speter
390766525Speter	  /* Update STARTPOS to the previous character boundary.  */
390866525Speter	  if (multibyte)
390966525Speter	    {
391066525Speter	      const unsigned char *p
391166525Speter		= (const unsigned char *) POS_ADDR_VSTRING (startpos);
391266525Speter	      int len = 0;
391366525Speter
391466525Speter	      /* Find the head of multibyte form.  */
391566525Speter	      while (!CHAR_HEAD_P (*p))
391666525Speter		p--, len++;
391766525Speter
391866525Speter	      /* Adjust it. */
391966525Speter#if 0				/* XXX */
392066525Speter	      if (MULTIBYTE_FORM_LENGTH (p, len + 1) != (len + 1))
392166525Speter		;
392266525Speter	      else
392366525Speter#endif
392466525Speter		{
392566525Speter		  range += len;
392666525Speter		  if (range > 0)
392766525Speter		    break;
392866525Speter
392966525Speter		  startpos -= len;
393066525Speter		}
393166525Speter	    }
393266525Speter	}
393317721Speter    }
393417721Speter  return -1;
393517721Speter} /* re_search_2 */
393617721Speter
393717721Speter/* Declarations and macros for re_match_2.  */
393817721Speter
393917721Speterstatic int bcmp_translate ();
394017721Speterstatic boolean alt_match_null_string_p (),
394166525Speter	       common_op_match_null_string_p (),
394266525Speter	       group_match_null_string_p ();
394317721Speter
394417721Speter/* This converts PTR, a pointer into one of the search strings `string1'
394517721Speter   and `string2' into an offset from the beginning of that string.  */
394666525Speter#define POINTER_TO_OFFSET(ptr)			\
394766525Speter  (FIRST_STRING_P (ptr)				\
394866525Speter   ? ((regoff_t) ((ptr) - string1))		\
394966525Speter   : ((regoff_t) ((ptr) - string2 + size1)))
395017721Speter
395117721Speter/* Macros for dealing with the split strings in re_match_2.  */
395217721Speter
395317721Speter#define MATCHING_IN_FIRST_STRING  (dend == end_match_1)
395417721Speter
395517721Speter/* Call before fetching a character with *d.  This switches over to
395617721Speter   string2 if necessary.  */
395717721Speter#define PREFETCH()							\
395866525Speter  while (d == dend)							\
395917721Speter    {									\
396017721Speter      /* End of string2 => fail.  */					\
396166525Speter      if (dend == end_match_2)						\
396266525Speter	goto fail;							\
396366525Speter      /* End of string1 => advance to string2.	*/			\
396466525Speter      d = string2;							\
396517721Speter      dend = end_match_2;						\
396617721Speter    }
396717721Speter
396817721Speter
396917721Speter/* Test if at very beginning or at very end of the virtual concatenation
397066525Speter   of `string1' and `string2'.	If only one string, it's `string2'.  */
397117721Speter#define AT_STRINGS_BEG(d) ((d) == (size1 ? string1 : string2) || !size2)
397225839Speter#define AT_STRINGS_END(d) ((d) == end2)
397317721Speter
397417721Speter
397517721Speter/* Test if D points to a character which is word-constituent.  We have
397617721Speter   two special cases to check for: if past the end of string1, look at
397717721Speter   the first character in string2; and if before the beginning of
397817721Speter   string2, look at the last character in string1.  */
397917721Speter#define WORDCHAR_P(d)							\
398017721Speter  (SYNTAX ((d) == end1 ? *string2					\
398166525Speter	   : (d) == string2 - 1 ? *(end1 - 1) : *(d))			\
398217721Speter   == Sword)
398317721Speter
398466525Speter/* Disabled due to a compiler bug -- see comment at case wordbound */
398566525Speter
398666525Speter/* The comment at case wordbound is following one, but we don't use
398766525Speter   AT_WORD_BOUNDARY anymore to support multibyte form.
398866525Speter
398966525Speter   The DEC Alpha C compiler 3.x generates incorrect code for the
399066525Speter   test	 WORDCHAR_P (d - 1) != WORDCHAR_P (d)  in the expansion of
399166525Speter   AT_WORD_BOUNDARY, so this code is disabled.	Expanding the
399266525Speter   macro and introducing temporary variables works around the bug.  */
399366525Speter
399466525Speter#if 0
399517721Speter/* Test if the character before D and the one at D differ with respect
399617721Speter   to being word-constituent.  */
399717721Speter#define AT_WORD_BOUNDARY(d)						\
399817721Speter  (AT_STRINGS_BEG (d) || AT_STRINGS_END (d)				\
399917721Speter   || WORDCHAR_P (d - 1) != WORDCHAR_P (d))
400066525Speter#endif
400117721Speter
400217721Speter/* Free everything we malloc.  */
400366525Speter#ifdef MATCH_MAY_ALLOCATE
4004175261Sobrien#define FREE_VAR(var) if (var) { REGEX_FREE (var); var = NULL; } else
400517721Speter#define FREE_VARIABLES()						\
400617721Speter  do {									\
400766525Speter    REGEX_FREE_STACK (fail_stack.stack);				\
400817721Speter    FREE_VAR (regstart);						\
400917721Speter    FREE_VAR (regend);							\
401017721Speter    FREE_VAR (old_regstart);						\
401117721Speter    FREE_VAR (old_regend);						\
401217721Speter    FREE_VAR (best_regstart);						\
401317721Speter    FREE_VAR (best_regend);						\
401417721Speter    FREE_VAR (reg_info);						\
401517721Speter    FREE_VAR (reg_dummy);						\
401617721Speter    FREE_VAR (reg_info_dummy);						\
401717721Speter  } while (0)
401866525Speter#else
401966525Speter#define FREE_VARIABLES() ((void)0) /* Do nothing!  But inhibit gcc warning.  */
402066525Speter#endif /* not MATCH_MAY_ALLOCATE */
402117721Speter
402266525Speter/* These values must meet several constraints.	They must not be valid
402317721Speter   register values; since we have a limit of 255 registers (because
402417721Speter   we use only one byte in the pattern for the register number), we can
402566525Speter   use numbers larger than 255.	 They must differ by 1, because of
402617721Speter   NUM_FAILURE_ITEMS above.  And the value for the lowest register must
402717721Speter   be larger than the value for the highest register, so we do not try
402866525Speter   to actually save any registers when none are active.	 */
402917721Speter#define NO_HIGHEST_ACTIVE_REG (1 << BYTEWIDTH)
403017721Speter#define NO_LOWEST_ACTIVE_REG (NO_HIGHEST_ACTIVE_REG + 1)
403117721Speter
403217721Speter/* Matching routines.  */
403317721Speter
403466525Speter#ifndef emacs	/* Emacs never uses this.  */
403517721Speter/* re_match is like re_match_2 except it takes only a single string.  */
403617721Speter
403717721Speterint
403817721Speterre_match (bufp, string, size, pos, regs)
403917721Speter     struct re_pattern_buffer *bufp;
404017721Speter     const char *string;
404117721Speter     int size, pos;
404217721Speter     struct re_registers *regs;
404366525Speter{
404466525Speter  int result = re_match_2_internal (bufp, NULL, 0, string, size,
404566525Speter				    pos, regs, size);
404666525Speter#ifndef REGEX_MALLOC	/* CVS */
404766525Speter#ifdef C_ALLOCA		/* CVS */
404866525Speter  alloca (0);
404966525Speter#endif			/* CVS */
405066525Speter#endif			/* CVS */
405166525Speter  return result;
405217721Speter}
405317721Speter#endif /* not emacs */
405417721Speter
405566525Speter#ifdef emacs
405666525Speter/* In Emacs, this is the string or buffer in which we
405766525Speter   are matching.  It is used for looking up syntax properties.	*/
405866525SpeterLisp_Object re_match_object;
405966525Speter#endif
406017721Speter
406117721Speter/* re_match_2 matches the compiled pattern in BUFP against the
406217721Speter   the (virtual) concatenation of STRING1 and STRING2 (of length SIZE1
406317721Speter   and SIZE2, respectively).  We start matching at POS, and stop
406417721Speter   matching at STOP.
406525839Speter
406617721Speter   If REGS is non-null and the `no_sub' field of BUFP is nonzero, we
406766525Speter   store offsets for the substring each group matched in REGS.	See the
406817721Speter   documentation for exactly how many groups we fill.
406917721Speter
407017721Speter   We return -1 if no match, -2 if an internal error (such as the
407166525Speter   failure stack overflowing).	Otherwise, we return the length of the
407217721Speter   matched substring.  */
407317721Speter
407417721Speterint
407517721Speterre_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
407617721Speter     struct re_pattern_buffer *bufp;
407717721Speter     const char *string1, *string2;
407817721Speter     int size1, size2;
407917721Speter     int pos;
408017721Speter     struct re_registers *regs;
408117721Speter     int stop;
408217721Speter{
408366525Speter  int result;
408466525Speter
408566525Speter#ifdef emacs
408666525Speter  int charpos;
408766525Speter  int adjpos = NILP (re_match_object) || BUFFERP (re_match_object);
408866525Speter  gl_state.object = re_match_object;
408966525Speter  charpos = SYNTAX_TABLE_BYTE_TO_CHAR (pos + adjpos);
409066525Speter  SETUP_SYNTAX_TABLE_FOR_OBJECT (re_match_object, charpos, 1);
409166525Speter#endif
409266525Speter
409366525Speter  result = re_match_2_internal (bufp, string1, size1, string2, size2,
409466525Speter				pos, regs, stop);
409566525Speter#ifndef REGEX_MALLOC	/* CVS */
409666525Speter#ifdef C_ALLOCA		/* CVS */
409766525Speter  alloca (0);
409866525Speter#endif			/* CVS */
409966525Speter#endif			/* CVS */
410066525Speter  return result;
410166525Speter}
410266525Speter
410366525Speter/* This is a separate function so that we can force an alloca cleanup
410466525Speter   afterwards.	*/
410566525Speterstatic int
410666525Speterre_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
410766525Speter     struct re_pattern_buffer *bufp;
410866525Speter     const char *string1, *string2;
410966525Speter     int size1, size2;
411066525Speter     int pos;
411166525Speter     struct re_registers *regs;
411266525Speter     int stop;
411366525Speter{
411417721Speter  /* General temporaries.  */
411517721Speter  int mcnt;
411617721Speter  unsigned char *p1;
411717721Speter
411817721Speter  /* Just past the end of the corresponding string.  */
411917721Speter  const char *end1, *end2;
412017721Speter
412117721Speter  /* Pointers into string1 and string2, just past the last characters in
412266525Speter     each to consider matching.	 */
412317721Speter  const char *end_match_1, *end_match_2;
412417721Speter
412517721Speter  /* Where we are in the data, and the end of the current string.  */
412617721Speter  const char *d, *dend;
412725839Speter
412817721Speter  /* Where we are in the pattern, and the end of the pattern.  */
412917721Speter  unsigned char *p = bufp->buffer;
413017721Speter  register unsigned char *pend = p + bufp->used;
413117721Speter
413266525Speter  /* Mark the opcode just after a start_memory, so we can test for an
413366525Speter     empty subpattern when we get to the stop_memory.  */
413466525Speter  unsigned char *just_past_start_mem = 0;
413517721Speter
413666525Speter  /* We use this to map every character in the string.	*/
413766525Speter  RE_TRANSLATE_TYPE translate = bufp->translate;
413866525Speter
413966525Speter  /* Nonzero if we have to concern multibyte character.	 */
414066525Speter  int multibyte = bufp->multibyte;
414166525Speter
414217721Speter  /* Failure point stack.  Each place that can handle a failure further
414317721Speter     down the line pushes a failure point on this stack.  It consists of
414417721Speter     restart, regend, and reg_info for all registers corresponding to
414517721Speter     the subexpressions we're currently inside, plus the number of such
414617721Speter     registers, and, finally, two char *'s.  The first char * is where
414717721Speter     to resume scanning the pattern; the second one is where to resume
414817721Speter     scanning the strings.  If the latter is zero, the failure point is
414917721Speter     a ``dummy''; if a failure happens and the failure point is a dummy,
415066525Speter     it gets discarded and the next next one is tried.	*/
415166525Speter#ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global.	 */
415217721Speter  fail_stack_type fail_stack;
415366525Speter#endif
415417721Speter#ifdef DEBUG
415517721Speter  static unsigned failure_id = 0;
415617721Speter  unsigned nfailure_points_pushed = 0, nfailure_points_popped = 0;
415717721Speter#endif
415817721Speter
415966525Speter  /* This holds the pointer to the failure stack, when
416066525Speter     it is allocated relocatably.  */
416166525Speter  fail_stack_elt_t *failure_stack_ptr;
416266525Speter
416317721Speter  /* We fill all the registers internally, independent of what we
416466525Speter     return, for use in backreferences.	 The number here includes
416517721Speter     an element for register zero.  */
416617721Speter  unsigned num_regs = bufp->re_nsub + 1;
416725839Speter
416817721Speter  /* The currently active registers.  */
416917721Speter  unsigned lowest_active_reg = NO_LOWEST_ACTIVE_REG;
417017721Speter  unsigned highest_active_reg = NO_HIGHEST_ACTIVE_REG;
417117721Speter
417217721Speter  /* Information on the contents of registers. These are pointers into
417317721Speter     the input strings; they record just what was matched (on this
417417721Speter     attempt) by a subexpression part of the pattern, that is, the
417517721Speter     regnum-th regstart pointer points to where in the pattern we began
417617721Speter     matching and the regnum-th regend points to right after where we
417717721Speter     stopped matching the regnum-th subexpression.  (The zeroth register
417817721Speter     keeps track of what the whole pattern matches.)  */
417966525Speter#ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global.  */
418017721Speter  const char **regstart, **regend;
418166525Speter#endif
418217721Speter
418317721Speter  /* If a group that's operated upon by a repetition operator fails to
418417721Speter     match anything, then the register for its start will need to be
418517721Speter     restored because it will have been set to wherever in the string we
418617721Speter     are when we last see its open-group operator.  Similarly for a
418717721Speter     register's end.  */
418866525Speter#ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global.  */
418917721Speter  const char **old_regstart, **old_regend;
419066525Speter#endif
419117721Speter
419217721Speter  /* The is_active field of reg_info helps us keep track of which (possibly
419317721Speter     nested) subexpressions we are currently in. The matched_something
419417721Speter     field of reg_info[reg_num] helps us tell whether or not we have
419517721Speter     matched any of the pattern so far this time through the reg_num-th
419617721Speter     subexpression.  These two fields get reset each time through any
419766525Speter     loop their register is in.	 */
419866525Speter#ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global.	 */
419925839Speter  register_info_type *reg_info;
420066525Speter#endif
420117721Speter
420217721Speter  /* The following record the register info as found in the above
420325839Speter     variables when we find a match better than any we've seen before.
420417721Speter     This happens as we backtrack through the failure points, which in
420517721Speter     turn happens only if we have not yet matched the entire string. */
420617721Speter  unsigned best_regs_set = false;
420766525Speter#ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global.  */
420817721Speter  const char **best_regstart, **best_regend;
420966525Speter#endif
421066525Speter
421117721Speter  /* Logically, this is `best_regend[0]'.  But we don't want to have to
421217721Speter     allocate space for that if we're not allocating space for anything
421366525Speter     else (see below).	Also, we never need info about register 0 for
421417721Speter     any of the other register vectors, and it seems rather a kludge to
421517721Speter     treat `best_regend' differently than the rest.  So we keep track of
421617721Speter     the end of the best match so far in a separate variable.  We
421717721Speter     initialize this to NULL so that when we backtrack the first time
421817721Speter     and need to test it, it's not garbage.  */
421917721Speter  const char *match_end = NULL;
422017721Speter
422166525Speter  /* This helps SET_REGS_MATCHED avoid doing redundant work.  */
422266525Speter  int set_regs_matched_done = 0;
422366525Speter
422417721Speter  /* Used when we pop values we don't care about.  */
422566525Speter#ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global.  */
422617721Speter  const char **reg_dummy;
422717721Speter  register_info_type *reg_info_dummy;
422866525Speter#endif
422917721Speter
423017721Speter#ifdef DEBUG
423117721Speter  /* Counts the total number of registers pushed.  */
423225839Speter  unsigned num_regs_pushed = 0;
423317721Speter#endif
423417721Speter
423517721Speter  DEBUG_PRINT1 ("\n\nEntering re_match_2.\n");
423625839Speter
423717721Speter  INIT_FAIL_STACK ();
423825839Speter
423966525Speter#ifdef MATCH_MAY_ALLOCATE
424017721Speter  /* Do not bother to initialize all the register variables if there are
424117721Speter     no groups in the pattern, as it takes a fair amount of time.  If
424217721Speter     there are groups, we include space for register 0 (the whole
424317721Speter     pattern), even though we never use it, since it simplifies the
424417721Speter     array indexing.  We should fix this.  */
424517721Speter  if (bufp->re_nsub)
424617721Speter    {
424717721Speter      regstart = REGEX_TALLOC (num_regs, const char *);
424817721Speter      regend = REGEX_TALLOC (num_regs, const char *);
424917721Speter      old_regstart = REGEX_TALLOC (num_regs, const char *);
425017721Speter      old_regend = REGEX_TALLOC (num_regs, const char *);
425117721Speter      best_regstart = REGEX_TALLOC (num_regs, const char *);
425217721Speter      best_regend = REGEX_TALLOC (num_regs, const char *);
425317721Speter      reg_info = REGEX_TALLOC (num_regs, register_info_type);
425417721Speter      reg_dummy = REGEX_TALLOC (num_regs, const char *);
425517721Speter      reg_info_dummy = REGEX_TALLOC (num_regs, register_info_type);
425617721Speter
425725839Speter      if (!(regstart && regend && old_regstart && old_regend && reg_info
425866525Speter	    && best_regstart && best_regend && reg_dummy && reg_info_dummy))
425966525Speter	{
426066525Speter	  FREE_VARIABLES ();
426166525Speter	  return -2;
426266525Speter	}
426317721Speter    }
426417721Speter  else
426517721Speter    {
426617721Speter      /* We must initialize all our variables to NULL, so that
426766525Speter	 `FREE_VARIABLES' doesn't try to free them.  */
426817721Speter      regstart = regend = old_regstart = old_regend = best_regstart
426966525Speter	= best_regend = reg_dummy = NULL;
427017721Speter      reg_info = reg_info_dummy = (register_info_type *) NULL;
427117721Speter    }
427266525Speter#endif /* MATCH_MAY_ALLOCATE */
427317721Speter
427417721Speter  /* The starting position is bogus.  */
427517721Speter  if (pos < 0 || pos > size1 + size2)
427617721Speter    {
427717721Speter      FREE_VARIABLES ();
427817721Speter      return -1;
427917721Speter    }
428025839Speter
428117721Speter  /* Initialize subexpression text positions to -1 to mark ones that no
428217721Speter     start_memory/stop_memory has been seen for. Also initialize the
428317721Speter     register information struct.  */
428417721Speter  for (mcnt = 1; mcnt < num_regs; mcnt++)
428517721Speter    {
428625839Speter      regstart[mcnt] = regend[mcnt]
428766525Speter	= old_regstart[mcnt] = old_regend[mcnt] = REG_UNSET_VALUE;
428825839Speter
428917721Speter      REG_MATCH_NULL_STRING_P (reg_info[mcnt]) = MATCH_NULL_UNSET_VALUE;
429017721Speter      IS_ACTIVE (reg_info[mcnt]) = 0;
429117721Speter      MATCHED_SOMETHING (reg_info[mcnt]) = 0;
429217721Speter      EVER_MATCHED_SOMETHING (reg_info[mcnt]) = 0;
429317721Speter    }
429425839Speter
429517721Speter  /* We move `string1' into `string2' if the latter's empty -- but not if
429666525Speter     `string1' is null.	 */
429717721Speter  if (size2 == 0 && string1 != NULL)
429817721Speter    {
429917721Speter      string2 = string1;
430017721Speter      size2 = size1;
430117721Speter      string1 = 0;
430217721Speter      size1 = 0;
430317721Speter    }
430417721Speter  end1 = string1 + size1;
430517721Speter  end2 = string2 + size2;
430617721Speter
430717721Speter  /* Compute where to stop matching, within the two strings.  */
430817721Speter  if (stop <= size1)
430917721Speter    {
431017721Speter      end_match_1 = string1 + stop;
431117721Speter      end_match_2 = string2;
431217721Speter    }
431317721Speter  else
431417721Speter    {
431517721Speter      end_match_1 = end1;
431617721Speter      end_match_2 = string2 + stop - size1;
431717721Speter    }
431817721Speter
431925839Speter  /* `p' scans through the pattern as `d' scans through the data.
432017721Speter     `dend' is the end of the input string that `d' points within.  `d'
432117721Speter     is advanced into the following input string whenever necessary, but
432217721Speter     this happens before fetching; therefore, at the beginning of the
432317721Speter     loop, `d' can be pointing at the end of a string, but it cannot
432417721Speter     equal `string2'.  */
432517721Speter  if (size1 > 0 && pos <= size1)
432617721Speter    {
432717721Speter      d = string1 + pos;
432817721Speter      dend = end_match_1;
432917721Speter    }
433017721Speter  else
433117721Speter    {
433217721Speter      d = string2 + pos - size1;
433317721Speter      dend = end_match_2;
433417721Speter    }
433517721Speter
433617721Speter  DEBUG_PRINT1 ("The compiled pattern is: ");
433717721Speter  DEBUG_PRINT_COMPILED_PATTERN (bufp, p, pend);
433817721Speter  DEBUG_PRINT1 ("The string to match is: `");
433917721Speter  DEBUG_PRINT_DOUBLE_STRING (d, string1, size1, string2, size2);
434017721Speter  DEBUG_PRINT1 ("'\n");
434125839Speter
434266525Speter  /* This loops over pattern commands.	It exits by returning from the
434317721Speter     function if the match is complete, or it drops through if the match
434417721Speter     fails at this starting point in the input data.  */
434517721Speter  for (;;)
434617721Speter    {
434717721Speter      DEBUG_PRINT2 ("\n0x%x: ", p);
434817721Speter
434917721Speter      if (p == pend)
435017721Speter	{ /* End of pattern means we might have succeeded.  */
435166525Speter	  DEBUG_PRINT1 ("end of pattern ... ");
435225839Speter
435317721Speter	  /* If we haven't matched the entire string, and we want the
435466525Speter	     longest match, try backtracking.  */
435566525Speter	  if (d != end_match_2)
435617721Speter	    {
435766525Speter	      /* 1 if this match ends in the same string (string1 or string2)
435866525Speter		 as the best previous match.  */
435966525Speter	      boolean same_str_p = (FIRST_STRING_P (match_end)
436066525Speter				    == MATCHING_IN_FIRST_STRING);
436166525Speter	      /* 1 if this match is the best seen so far.  */
436266525Speter	      boolean best_match_p;
436325839Speter
436466525Speter	      /* AIX compiler got confused when this was combined
436566525Speter		 with the previous declaration.	 */
436666525Speter	      if (same_str_p)
436766525Speter		best_match_p = d > match_end;
436866525Speter	      else
436966525Speter		best_match_p = !MATCHING_IN_FIRST_STRING;
437017721Speter
437166525Speter	      DEBUG_PRINT1 ("backtracking.\n");
437225839Speter
437366525Speter	      if (!FAIL_STACK_EMPTY ())
437466525Speter		{ /* More failure points to try.  */
437525839Speter
437666525Speter		  /* If exceeds best match so far, save it.  */
437766525Speter		  if (!best_regs_set || best_match_p)
437866525Speter		    {
437966525Speter		      best_regs_set = true;
438066525Speter		      match_end = d;
438117721Speter
438266525Speter		      DEBUG_PRINT1 ("\nSAVING match as best so far.\n");
438325839Speter
438466525Speter		      for (mcnt = 1; mcnt < num_regs; mcnt++)
438566525Speter			{
438666525Speter			  best_regstart[mcnt] = regstart[mcnt];
438766525Speter			  best_regend[mcnt] = regend[mcnt];
438866525Speter			}
438966525Speter		    }
439066525Speter		  goto fail;
439166525Speter		}
439217721Speter
439366525Speter	      /* If no failure points, don't restore garbage.  And if
439466525Speter		 last match is real best match, don't restore second
439566525Speter		 best one. */
439666525Speter	      else if (best_regs_set && !best_match_p)
439766525Speter		{
439866525Speter		restore_best_regs:
439966525Speter		  /* Restore best match.  It may happen that `dend ==
440066525Speter		     end_match_1' while the restored d is in string2.
440166525Speter		     For example, the pattern `x.*y.*z' against the
440266525Speter		     strings `x-' and `y-z-', if the two strings are
440366525Speter		     not consecutive in memory.	 */
440466525Speter		  DEBUG_PRINT1 ("Restoring best registers.\n");
440566525Speter
440666525Speter		  d = match_end;
440766525Speter		  dend = ((d >= string1 && d <= end1)
440866525Speter			   ? end_match_1 : end_match_2);
440966525Speter
441017721Speter		  for (mcnt = 1; mcnt < num_regs; mcnt++)
441117721Speter		    {
441217721Speter		      regstart[mcnt] = best_regstart[mcnt];
441317721Speter		      regend[mcnt] = best_regend[mcnt];
441417721Speter		    }
441566525Speter		}
441666525Speter	    } /* d != end_match_2 */
441717721Speter
441866525Speter	succeed_label:
441966525Speter	  DEBUG_PRINT1 ("Accepting match.\n");
442017721Speter
442166525Speter	  /* If caller wants register contents data back, do it.  */
442266525Speter	  if (regs && !bufp->no_sub)
442317721Speter	    {
442466525Speter	      /* Have the register data arrays been allocated?	*/
442566525Speter	      if (bufp->regs_allocated == REGS_UNALLOCATED)
442666525Speter		{ /* No.  So allocate them with malloc.	 We need one
442766525Speter		     extra element beyond `num_regs' for the `-1' marker
442866525Speter		     GNU code uses.  */
442966525Speter		  regs->num_regs = MAX (RE_NREGS, num_regs + 1);
443066525Speter		  regs->start = TALLOC (regs->num_regs, regoff_t);
443166525Speter		  regs->end = TALLOC (regs->num_regs, regoff_t);
443266525Speter		  if (regs->start == NULL || regs->end == NULL)
443366525Speter		    {
443466525Speter		      FREE_VARIABLES ();
443566525Speter		      return -2;
443666525Speter		    }
443766525Speter		  bufp->regs_allocated = REGS_REALLOCATE;
443866525Speter		}
443966525Speter	      else if (bufp->regs_allocated == REGS_REALLOCATE)
444066525Speter		{ /* Yes.  If we need more elements than were already
444166525Speter		     allocated, reallocate them.  If we need fewer, just
444266525Speter		     leave it alone.  */
444366525Speter		  if (regs->num_regs < num_regs + 1)
444466525Speter		    {
444566525Speter		      regs->num_regs = num_regs + 1;
444666525Speter		      RETALLOC (regs->start, regs->num_regs, regoff_t);
444766525Speter		      RETALLOC (regs->end, regs->num_regs, regoff_t);
444866525Speter		      if (regs->start == NULL || regs->end == NULL)
444966525Speter			{
445066525Speter			  FREE_VARIABLES ();
445166525Speter			  return -2;
445266525Speter			}
445366525Speter		    }
445466525Speter		}
445566525Speter	      else
445625839Speter		{
445725839Speter		  /* These braces fend off a "empty body in an else-statement"
445866525Speter		     warning under GCC when assert expands to nothing.	*/
445925839Speter		  assert (bufp->regs_allocated == REGS_FIXED);
446025839Speter		}
446117721Speter
446266525Speter	      /* Convert the pointer data in `regstart' and `regend' to
446366525Speter		 indices.  Register zero has to be set differently,
446466525Speter		 since we haven't kept track of any info for it.  */
446566525Speter	      if (regs->num_regs > 0)
446666525Speter		{
446766525Speter		  regs->start[0] = pos;
446866525Speter		  regs->end[0] = (MATCHING_IN_FIRST_STRING
446966525Speter				  ? ((regoff_t) (d - string1))
447066525Speter				  : ((regoff_t) (d - string2 + size1)));
447166525Speter		}
447225839Speter
447366525Speter	      /* Go through the first `min (num_regs, regs->num_regs)'
447466525Speter		 registers, since that is all we initialized.  */
447517721Speter	      for (mcnt = 1; mcnt < MIN (num_regs, regs->num_regs); mcnt++)
447617721Speter		{
447766525Speter		  if (REG_UNSET (regstart[mcnt]) || REG_UNSET (regend[mcnt]))
447866525Speter		    regs->start[mcnt] = regs->end[mcnt] = -1;
447966525Speter		  else
448066525Speter		    {
448166525Speter		      regs->start[mcnt]
448266525Speter			= (regoff_t) POINTER_TO_OFFSET (regstart[mcnt]);
448366525Speter		      regs->end[mcnt]
448466525Speter			= (regoff_t) POINTER_TO_OFFSET (regend[mcnt]);
448566525Speter		    }
448617721Speter		}
448725839Speter
448866525Speter	      /* If the regs structure we return has more elements than
448966525Speter		 were in the pattern, set the extra elements to -1.  If
449066525Speter		 we (re)allocated the registers, this is the case,
449166525Speter		 because we always allocate enough to have at least one
449266525Speter		 -1 at the end.	 */
449366525Speter	      for (mcnt = num_regs; mcnt < regs->num_regs; mcnt++)
449466525Speter		regs->start[mcnt] = regs->end[mcnt] = -1;
449517721Speter	    } /* regs && !bufp->no_sub */
449617721Speter
449766525Speter	  DEBUG_PRINT4 ("%u failure points pushed, %u popped (%u remain).\n",
449866525Speter			nfailure_points_pushed, nfailure_points_popped,
449966525Speter			nfailure_points_pushed - nfailure_points_popped);
450066525Speter	  DEBUG_PRINT2 ("%u registers pushed.\n", num_regs_pushed);
450117721Speter
450266525Speter	  mcnt = d - pos - (MATCHING_IN_FIRST_STRING
450325839Speter			    ? string1
450417721Speter			    : string2 - size1);
450517721Speter
450666525Speter	  DEBUG_PRINT2 ("Returning %d from re_match_2.\n", mcnt);
450717721Speter
450866525Speter	  FREE_VARIABLES ();
450966525Speter	  return mcnt;
451066525Speter	}
451117721Speter
451266525Speter      /* Otherwise match next pattern command.	*/
451366525Speter      switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++))
451417721Speter	{
451566525Speter	/* Ignore these.  Used to ignore the n of succeed_n's which
451666525Speter	   currently have n == 0.  */
451766525Speter	case no_op:
451866525Speter	  DEBUG_PRINT1 ("EXECUTING no_op.\n");
451966525Speter	  break;
452017721Speter
452166525Speter	case succeed:
452266525Speter	  DEBUG_PRINT1 ("EXECUTING succeed.\n");
452366525Speter	  goto succeed_label;
452417721Speter
452566525Speter	/* Match the next n pattern characters exactly.	 The following
452666525Speter	   byte in the pattern defines n, and the n bytes after that
452766525Speter	   are the characters to match.	 */
452817721Speter	case exactn:
452917721Speter	  mcnt = *p++;
453066525Speter	  DEBUG_PRINT2 ("EXECUTING exactn %d.\n", mcnt);
453117721Speter
453266525Speter	  /* This is written out as an if-else so we don't waste time
453366525Speter	     testing `translate' inside the loop.  */
453466525Speter	  if (RE_TRANSLATE_P (translate))
453517721Speter	    {
453666525Speter#ifdef emacs
453766525Speter	      if (multibyte)
453866525Speter		do
453966525Speter		  {
454066525Speter		    int pat_charlen, buf_charlen;
454166525Speter		    unsigned int pat_ch, buf_ch;
454266525Speter
454366525Speter		    PREFETCH ();
454466525Speter		    pat_ch = STRING_CHAR_AND_LENGTH (p, pend - p, pat_charlen);
454566525Speter		    buf_ch = STRING_CHAR_AND_LENGTH (d, dend - d, buf_charlen);
454666525Speter
454766525Speter		    if (RE_TRANSLATE (translate, buf_ch)
454866525Speter			!= pat_ch)
454966525Speter		      goto fail;
455066525Speter
455166525Speter		    p += pat_charlen;
455266525Speter		    d += buf_charlen;
455366525Speter		    mcnt -= pat_charlen;
455466525Speter		  }
455566525Speter		while (mcnt > 0);
455666525Speter	      else
455766525Speter#endif /* not emacs */
455866525Speter		do
455966525Speter		  {
456066525Speter		    PREFETCH ();
456166525Speter		    if ((unsigned char) RE_TRANSLATE (translate, (unsigned char) *d)
456266525Speter			!= (unsigned char) *p++)
456366525Speter		      goto fail;
456466525Speter		    d++;
456566525Speter		  }
456666525Speter		while (--mcnt);
456717721Speter	    }
456817721Speter	  else
456917721Speter	    {
457017721Speter	      do
457117721Speter		{
457217721Speter		  PREFETCH ();
457317721Speter		  if (*d++ != (char) *p++) goto fail;
457417721Speter		}
457517721Speter	      while (--mcnt);
457617721Speter	    }
457717721Speter	  SET_REGS_MATCHED ();
457866525Speter	  break;
457917721Speter
458017721Speter
458166525Speter	/* Match any character except possibly a newline or a null.  */
458217721Speter	case anychar:
458366525Speter	  {
458466525Speter	    int buf_charlen;
458566525Speter	    unsigned int buf_ch;
458617721Speter
458766525Speter	    DEBUG_PRINT1 ("EXECUTING anychar.\n");
458817721Speter
458966525Speter	    PREFETCH ();
459017721Speter
459166525Speter#ifdef emacs
459266525Speter	    if (multibyte)
459366525Speter	      buf_ch = STRING_CHAR_AND_LENGTH (d, dend - d, buf_charlen);
459466525Speter	    else
459566525Speter#endif /* not emacs */
459666525Speter	      {
459766525Speter		buf_ch = (unsigned char) *d;
459866525Speter		buf_charlen = 1;
459966525Speter	      }
460066525Speter
460166525Speter	    buf_ch = TRANSLATE (buf_ch);
460266525Speter
460366525Speter	    if ((!(bufp->syntax & RE_DOT_NEWLINE)
460466525Speter		 && buf_ch == '\n')
460566525Speter		|| ((bufp->syntax & RE_DOT_NOT_NULL)
460666525Speter		    && buf_ch == '\000'))
460766525Speter	      goto fail;
460866525Speter
460966525Speter	    SET_REGS_MATCHED ();
461066525Speter	    DEBUG_PRINT2 ("  Matched `%d'.\n", *d);
461166525Speter	    d += buf_charlen;
461266525Speter	  }
461317721Speter	  break;
461417721Speter
461517721Speter
461617721Speter	case charset:
461717721Speter	case charset_not:
461817721Speter	  {
461966525Speter	    register unsigned int c;
462017721Speter	    boolean not = (re_opcode_t) *(p - 1) == charset_not;
462166525Speter	    int len;
462217721Speter
462366525Speter	    /* Start of actual range_table, or end of bitmap if there is no
462466525Speter	       range table.  */
462566525Speter	    unsigned char *range_table;
462617721Speter
462766525Speter	    /* Nonzero if there is range table.	 */
462866525Speter	    int range_table_exists;
462966525Speter
463066525Speter	    /* Number of ranges of range table.	 Not in bytes.	*/
463166525Speter	    int count;
463266525Speter
463366525Speter	    DEBUG_PRINT2 ("EXECUTING charset%s.\n", not ? "_not" : "");
463466525Speter
463517721Speter	    PREFETCH ();
463666525Speter	    c = (unsigned char) *d;
463717721Speter
463866525Speter	    range_table = CHARSET_RANGE_TABLE (&p[-1]); /* Past the bitmap.  */
463966525Speter	    range_table_exists = CHARSET_RANGE_TABLE_EXISTS_P (&p[-1]);
464066525Speter	    if (range_table_exists)
464166525Speter	      EXTRACT_NUMBER_AND_INCR (count, range_table);
464266525Speter	    else
464366525Speter	      count = 0;
464466525Speter
464566525Speter	    if (multibyte && BASE_LEADING_CODE_P (c))
464666525Speter	      c = STRING_CHAR_AND_LENGTH (d, dend - d, len);
464766525Speter
464866525Speter	    if (SINGLE_BYTE_CHAR_P (c))
464966525Speter	      {			/* Lookup bitmap.  */
465066525Speter		c = TRANSLATE (c); /* The character to match.  */
465166525Speter		len = 1;
465266525Speter
465366525Speter		/* Cast to `unsigned' instead of `unsigned char' in
465466525Speter		   case the bit list is a full 32 bytes long.  */
465566525Speter		if (c < (unsigned) (CHARSET_BITMAP_SIZE (&p[-1]) * BYTEWIDTH)
465617721Speter		&& p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
465717721Speter	      not = !not;
465866525Speter	      }
465966525Speter	    else if (range_table_exists)
466066525Speter	      CHARSET_LOOKUP_RANGE_TABLE_RAW (not, c, range_table, count);
466117721Speter
466266525Speter	    p = CHARSET_RANGE_TABLE_END (range_table, count);
466317721Speter
466417721Speter	    if (!not) goto fail;
466525839Speter
466617721Speter	    SET_REGS_MATCHED ();
466766525Speter	    d += len;
466817721Speter	    break;
466917721Speter	  }
467017721Speter
467117721Speter
467266525Speter	/* The beginning of a group is represented by start_memory.
467366525Speter	   The arguments are the register number in the next byte, and the
467466525Speter	   number of groups inner to this one in the next.  The text
467566525Speter	   matched within the group is recorded (in the internal
467666525Speter	   registers data structure) under the register number.	 */
467766525Speter	case start_memory:
467817721Speter	  DEBUG_PRINT3 ("EXECUTING start_memory %d (%d):\n", *p, p[1]);
467917721Speter
468066525Speter	  /* Find out if this group can match the empty string.	 */
468117721Speter	  p1 = p;		/* To send to group_match_null_string_p.  */
468225839Speter
468366525Speter	  if (REG_MATCH_NULL_STRING_P (reg_info[*p]) == MATCH_NULL_UNSET_VALUE)
468466525Speter	    REG_MATCH_NULL_STRING_P (reg_info[*p])
468566525Speter	      = group_match_null_string_p (&p1, pend, reg_info);
468617721Speter
468766525Speter	  /* Save the position in the string where we were the last time
468866525Speter	     we were at this open-group operator in case the group is
468966525Speter	     operated upon by a repetition operator, e.g., with `(a*)*b'
469066525Speter	     against `ab'; then we want to ignore where we are now in
469166525Speter	     the string in case this attempt to match fails.  */
469266525Speter	  old_regstart[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p])
469366525Speter			     ? REG_UNSET (regstart[*p]) ? d : regstart[*p]
469466525Speter			     : regstart[*p];
469525839Speter	  DEBUG_PRINT2 ("  old_regstart: %d\n",
469617721Speter			 POINTER_TO_OFFSET (old_regstart[*p]));
469717721Speter
469866525Speter	  regstart[*p] = d;
469917721Speter	  DEBUG_PRINT2 ("  regstart: %d\n", POINTER_TO_OFFSET (regstart[*p]));
470017721Speter
470166525Speter	  IS_ACTIVE (reg_info[*p]) = 1;
470266525Speter	  MATCHED_SOMETHING (reg_info[*p]) = 0;
470325839Speter
470466525Speter	  /* Clear this whenever we change the register activity status.  */
470566525Speter	  set_regs_matched_done = 0;
470625839Speter
470766525Speter	  /* This is the new highest active register.  */
470866525Speter	  highest_active_reg = *p;
470917721Speter
471066525Speter	  /* If nothing was active before, this is the new lowest active
471166525Speter	     register.	*/
471266525Speter	  if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
471366525Speter	    lowest_active_reg = *p;
471417721Speter
471566525Speter	  /* Move past the register number and inner group count.  */
471666525Speter	  p += 2;
471766525Speter	  just_past_start_mem = p;
471817721Speter
471966525Speter	  break;
472066525Speter
472166525Speter
472266525Speter	/* The stop_memory opcode represents the end of a group.  Its
472366525Speter	   arguments are the same as start_memory's: the register
472466525Speter	   number, and the number of inner groups.  */
472517721Speter	case stop_memory:
472617721Speter	  DEBUG_PRINT3 ("EXECUTING stop_memory %d (%d):\n", *p, p[1]);
472725839Speter
472866525Speter	  /* We need to save the string position the last time we were at
472966525Speter	     this close-group operator in case the group is operated
473066525Speter	     upon by a repetition operator, e.g., with `((a*)*(b*)*)*'
473166525Speter	     against `aba'; then we want to ignore where we are now in
473266525Speter	     the string in case this attempt to match fails.  */
473366525Speter	  old_regend[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p])
473466525Speter			   ? REG_UNSET (regend[*p]) ? d : regend[*p]
473517721Speter			   : regend[*p];
473625839Speter	  DEBUG_PRINT2 ("      old_regend: %d\n",
473717721Speter			 POINTER_TO_OFFSET (old_regend[*p]));
473817721Speter
473966525Speter	  regend[*p] = d;
474017721Speter	  DEBUG_PRINT2 ("      regend: %d\n", POINTER_TO_OFFSET (regend[*p]));
474117721Speter
474266525Speter	  /* This register isn't active anymore.  */
474366525Speter	  IS_ACTIVE (reg_info[*p]) = 0;
474425839Speter
474566525Speter	  /* Clear this whenever we change the register activity status.  */
474666525Speter	  set_regs_matched_done = 0;
474725839Speter
474866525Speter	  /* If this was the only register active, nothing is active
474966525Speter	     anymore.  */
475066525Speter	  if (lowest_active_reg == highest_active_reg)
475166525Speter	    {
475266525Speter	      lowest_active_reg = NO_LOWEST_ACTIVE_REG;
475366525Speter	      highest_active_reg = NO_HIGHEST_ACTIVE_REG;
475466525Speter	    }
475566525Speter	  else
475666525Speter	    { /* We must scan for the new highest active register, since
475766525Speter		 it isn't necessarily one less than now: consider
475866525Speter		 (a(b)c(d(e)f)g).  When group 3 ends, after the f), the
475966525Speter		 new highest active register is 1.  */
476066525Speter	      unsigned char r = *p - 1;
476166525Speter	      while (r > 0 && !IS_ACTIVE (reg_info[r]))
476266525Speter		r--;
476366525Speter
476466525Speter	      /* If we end up at register zero, that means that we saved
476566525Speter		 the registers as the result of an `on_failure_jump', not
476666525Speter		 a `start_memory', and we jumped to past the innermost
476766525Speter		 `stop_memory'.	 For example, in ((.)*) we save
476866525Speter		 registers 1 and 2 as a result of the *, but when we pop
476966525Speter		 back to the second ), we are at the stop_memory 1.
477066525Speter		 Thus, nothing is active.  */
477117721Speter	      if (r == 0)
477266525Speter		{
477366525Speter		  lowest_active_reg = NO_LOWEST_ACTIVE_REG;
477466525Speter		  highest_active_reg = NO_HIGHEST_ACTIVE_REG;
477566525Speter		}
477666525Speter	      else
477766525Speter		highest_active_reg = r;
477866525Speter	    }
477925839Speter
478066525Speter	  /* If just failed to match something this time around with a
478166525Speter	     group that's operated on by a repetition operator, try to
478266525Speter	     force exit from the ``loop'', and restore the register
478366525Speter	     information for this group that we had before trying this
478466525Speter	     last match.  */
478566525Speter	  if ((!MATCHED_SOMETHING (reg_info[*p])
478666525Speter	       || just_past_start_mem == p - 1)
478766525Speter	      && (p + 2) < pend)
478866525Speter	    {
478966525Speter	      boolean is_a_jump_n = false;
479025839Speter
479166525Speter	      p1 = p + 2;
479266525Speter	      mcnt = 0;
479366525Speter	      switch ((re_opcode_t) *p1++)
479466525Speter		{
479566525Speter		  case jump_n:
479617721Speter		    is_a_jump_n = true;
479766525Speter		  case pop_failure_jump:
479817721Speter		  case maybe_pop_jump:
479917721Speter		  case jump:
480017721Speter		  case dummy_failure_jump:
480166525Speter		    EXTRACT_NUMBER_AND_INCR (mcnt, p1);
480217721Speter		    if (is_a_jump_n)
480317721Speter		      p1 += 2;
480466525Speter		    break;
480525839Speter
480666525Speter		  default:
480766525Speter		    /* do nothing */ ;
480866525Speter		}
480917721Speter	      p1 += mcnt;
481025839Speter
481166525Speter	      /* If the next operation is a jump backwards in the pattern
481266525Speter		 to an on_failure_jump right before the start_memory
481366525Speter		 corresponding to this stop_memory, exit from the loop
481466525Speter		 by forcing a failure after pushing on the stack the
481566525Speter		 on_failure_jump's jump in the pattern, and d.	*/
481666525Speter	      if (mcnt < 0 && (re_opcode_t) *p1 == on_failure_jump
481766525Speter		  && (re_opcode_t) p1[3] == start_memory && p1[4] == *p)
481817721Speter		{
481966525Speter		  /* If this group ever matched anything, then restore
482066525Speter		     what its registers were before trying this last
482166525Speter		     failed match, e.g., with `(a*)*b' against `ab' for
482266525Speter		     regstart[1], and, e.g., with `((a*)*(b*)*)*'
482366525Speter		     against `aba' for regend[3].
482425839Speter
482566525Speter		     Also restore the registers for inner groups for,
482666525Speter		     e.g., `((a*)(b*))*' against `aba' (register 3 would
482766525Speter		     otherwise get trashed).  */
482825839Speter
482966525Speter		  if (EVER_MATCHED_SOMETHING (reg_info[*p]))
483017721Speter		    {
483125839Speter		      unsigned r;
483225839Speter
483366525Speter		      EVER_MATCHED_SOMETHING (reg_info[*p]) = 0;
483425839Speter
483517721Speter		      /* Restore this and inner groups' (if any) registers.  */
483634461Speter		      for (r = *p; r < *p + *(p + 1); r++)
483734461Speter			{
483834461Speter			  regstart[r] = old_regstart[r];
483917721Speter
484034461Speter			  /* xx why this test?	*/
484134461Speter			  if (old_regend[r] >= regstart[r])
484234461Speter			    regend[r] = old_regend[r];
484334461Speter			}
484434461Speter		    }
484517721Speter		  p1++;
484666525Speter		  EXTRACT_NUMBER_AND_INCR (mcnt, p1);
484766525Speter		  PUSH_FAILURE_POINT (p1 + mcnt, d, -2);
484817721Speter
484966525Speter		  goto fail;
485066525Speter		}
485166525Speter	    }
485225839Speter
485366525Speter	  /* Move past the register number and the inner group count.  */
485466525Speter	  p += 2;
485566525Speter	  break;
485617721Speter
485717721Speter
485817721Speter	/* \<digit> has been turned into a `duplicate' command which is
485966525Speter	   followed by the numeric value of <digit> as the register number.  */
486066525Speter	case duplicate:
486117721Speter	  {
486217721Speter	    register const char *d2, *dend2;
486366525Speter	    int regno = *p++;	/* Get which register to match against.	 */
486417721Speter	    DEBUG_PRINT2 ("EXECUTING duplicate %d.\n", regno);
486517721Speter
486666525Speter	    /* Can't back reference a group which we've never matched.	*/
486766525Speter	    if (REG_UNSET (regstart[regno]) || REG_UNSET (regend[regno]))
486866525Speter	      goto fail;
486925839Speter
487066525Speter	    /* Where in input to try to start matching.	 */
487166525Speter	    d2 = regstart[regno];
487225839Speter
487366525Speter	    /* Where to stop matching; if both the place to start and
487466525Speter	       the place to stop matching are in the same string, then
487566525Speter	       set to the place to stop, otherwise, for now have to use
487666525Speter	       the end of the first string.  */
487717721Speter
487866525Speter	    dend2 = ((FIRST_STRING_P (regstart[regno])
487917721Speter		      == FIRST_STRING_P (regend[regno]))
488017721Speter		     ? regend[regno] : end_match_1);
488117721Speter	    for (;;)
488217721Speter	      {
488317721Speter		/* If necessary, advance to next segment in register
488466525Speter		   contents.  */
488517721Speter		while (d2 == dend2)
488617721Speter		  {
488717721Speter		    if (dend2 == end_match_2) break;
488817721Speter		    if (dend2 == regend[regno]) break;
488917721Speter
489066525Speter		    /* End of string1 => advance to string2. */
489166525Speter		    d2 = string2;
489266525Speter		    dend2 = regend[regno];
489317721Speter		  }
489417721Speter		/* At end of register contents => success */
489517721Speter		if (d2 == dend2) break;
489617721Speter
489717721Speter		/* If necessary, advance to next segment in data.  */
489817721Speter		PREFETCH ();
489917721Speter
490017721Speter		/* How many characters left in this segment to match.  */
490117721Speter		mcnt = dend - d;
490225839Speter
490317721Speter		/* Want how many consecutive characters we can match in
490466525Speter		   one shot, so, if necessary, adjust the count.  */
490566525Speter		if (mcnt > dend2 - d2)
490617721Speter		  mcnt = dend2 - d2;
490725839Speter
490817721Speter		/* Compare that many; failure if mismatch, else move
490966525Speter		   past them.  */
491066525Speter		if (RE_TRANSLATE_P (translate)
491166525Speter		    ? bcmp_translate (d, d2, mcnt, translate)
491266525Speter		    : bcmp (d, d2, mcnt))
491317721Speter		  goto fail;
491417721Speter		d += mcnt, d2 += mcnt;
491566525Speter
491666525Speter		/* Do this because we've match some characters.	 */
491766525Speter		SET_REGS_MATCHED ();
491817721Speter	      }
491917721Speter	  }
492017721Speter	  break;
492117721Speter
492217721Speter
492366525Speter	/* begline matches the empty string at the beginning of the string
492466525Speter	   (unless `not_bol' is set in `bufp'), and, if
492566525Speter	   `newline_anchor' is set, after newlines.  */
492617721Speter	case begline:
492766525Speter	  DEBUG_PRINT1 ("EXECUTING begline.\n");
492825839Speter
492966525Speter	  if (AT_STRINGS_BEG (d))
493066525Speter	    {
493166525Speter	      if (!bufp->not_bol) break;
493266525Speter	    }
493366525Speter	  else if (d[-1] == '\n' && bufp->newline_anchor)
493466525Speter	    {
493566525Speter	      break;
493666525Speter	    }
493766525Speter	  /* In all other cases, we fail.  */
493866525Speter	  goto fail;
493917721Speter
494017721Speter
494166525Speter	/* endline is the dual of begline.  */
494217721Speter	case endline:
494366525Speter	  DEBUG_PRINT1 ("EXECUTING endline.\n");
494417721Speter
494566525Speter	  if (AT_STRINGS_END (d))
494666525Speter	    {
494766525Speter	      if (!bufp->not_eol) break;
494866525Speter	    }
494925839Speter
495066525Speter	  /* We have to ``prefetch'' the next character.  */
495166525Speter	  else if ((d == end1 ? *string2 : *d) == '\n'
495266525Speter		   && bufp->newline_anchor)
495366525Speter	    {
495466525Speter	      break;
495566525Speter	    }
495666525Speter	  goto fail;
495717721Speter
495817721Speter
495917721Speter	/* Match at the very beginning of the data.  */
496066525Speter	case begbuf:
496166525Speter	  DEBUG_PRINT1 ("EXECUTING begbuf.\n");
496266525Speter	  if (AT_STRINGS_BEG (d))
496366525Speter	    break;
496466525Speter	  goto fail;
496517721Speter
496617721Speter
496717721Speter	/* Match at the very end of the data.  */
496866525Speter	case endbuf:
496966525Speter	  DEBUG_PRINT1 ("EXECUTING endbuf.\n");
497017721Speter	  if (AT_STRINGS_END (d))
497117721Speter	    break;
497266525Speter	  goto fail;
497317721Speter
497417721Speter
497566525Speter	/* on_failure_keep_string_jump is used to optimize `.*\n'.  It
497666525Speter	   pushes NULL as the value for the string on the stack.  Then
497766525Speter	   `pop_failure_point' will keep the current value for the
497866525Speter	   string, instead of restoring it.  To see why, consider
497966525Speter	   matching `foo\nbar' against `.*\n'.	The .* matches the foo;
498066525Speter	   then the . fails against the \n.  But the next thing we want
498166525Speter	   to do is match the \n against the \n; if we restored the
498266525Speter	   string value, we would be back at the foo.
498325839Speter
498466525Speter	   Because this is used only in specific cases, we don't need to
498566525Speter	   check all the things that `on_failure_jump' does, to make
498666525Speter	   sure the right things get saved on the stack.  Hence we don't
498766525Speter	   share its code.  The only reason to push anything on the
498866525Speter	   stack at all is that otherwise we would have to change
498966525Speter	   `anychar's code to do something besides goto fail in this
499066525Speter	   case; that seems worse than this.  */
499166525Speter	case on_failure_keep_string_jump:
499266525Speter	  DEBUG_PRINT1 ("EXECUTING on_failure_keep_string_jump");
499325839Speter
499466525Speter	  EXTRACT_NUMBER_AND_INCR (mcnt, p);
499566525Speter	  DEBUG_PRINT3 (" %d (to 0x%x):\n", mcnt, p + mcnt);
499617721Speter
499766525Speter	  PUSH_FAILURE_POINT (p + mcnt, NULL, -2);
499866525Speter	  break;
499917721Speter
500017721Speter
500117721Speter	/* Uses of on_failure_jump:
500225839Speter
500366525Speter	   Each alternative starts with an on_failure_jump that points
500466525Speter	   to the beginning of the next alternative.  Each alternative
500566525Speter	   except the last ends with a jump that in effect jumps past
500666525Speter	   the rest of the alternatives.  (They really jump to the
500766525Speter	   ending jump of the following alternative, because tensioning
500866525Speter	   these jumps is a hassle.)
500917721Speter
501066525Speter	   Repeats start with an on_failure_jump that points past both
501166525Speter	   the repetition text and either the following jump or
501266525Speter	   pop_failure_jump back to this on_failure_jump.  */
501317721Speter	case on_failure_jump:
501466525Speter	on_failure:
501566525Speter	  DEBUG_PRINT1 ("EXECUTING on_failure_jump");
501617721Speter
501766525Speter#if defined (WINDOWSNT) && defined (emacs)
501866525Speter	  QUIT;
501966525Speter#endif
502017721Speter
502166525Speter	  EXTRACT_NUMBER_AND_INCR (mcnt, p);
502266525Speter	  DEBUG_PRINT3 (" %d (to 0x%x)", mcnt, p + mcnt);
502317721Speter
502466525Speter	  /* If this on_failure_jump comes right before a group (i.e.,
502566525Speter	     the original * applied to a group), save the information
502666525Speter	     for that group and all inner ones, so that if we fail back
502766525Speter	     to this point, the group's information will be correct.
502866525Speter	     For example, in \(a*\)*\1, we need the preceding group,
502966525Speter	     and in \(zz\(a*\)b*\)\2, we need the inner group.	*/
503017721Speter
503166525Speter	  /* We can't use `p' to check ahead because we push
503266525Speter	     a failure point to `p + mcnt' after we do this.  */
503366525Speter	  p1 = p;
503417721Speter
503566525Speter	  /* We need to skip no_op's before we look for the
503666525Speter	     start_memory in case this on_failure_jump is happening as
503766525Speter	     the result of a completed succeed_n, as in \(a\)\{1,3\}b\1
503866525Speter	     against aba.  */
503966525Speter	  while (p1 < pend && (re_opcode_t) *p1 == no_op)
504066525Speter	    p1++;
504117721Speter
504266525Speter	  if (p1 < pend && (re_opcode_t) *p1 == start_memory)
504366525Speter	    {
504466525Speter	      /* We have a new highest active register now.  This will
504566525Speter		 get reset at the start_memory we are about to get to,
504666525Speter		 but we will have saved all the registers relevant to
504766525Speter		 this repetition op, as described above.  */
504866525Speter	      highest_active_reg = *(p1 + 1) + *(p1 + 2);
504966525Speter	      if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
505066525Speter		lowest_active_reg = *(p1 + 1);
505166525Speter	    }
505217721Speter
505366525Speter	  DEBUG_PRINT1 (":\n");
505466525Speter	  PUSH_FAILURE_POINT (p + mcnt, d, -2);
505566525Speter	  break;
505617721Speter
505766525Speter
505866525Speter	/* A smart repeat ends with `maybe_pop_jump'.
505966525Speter	   We change it to either `pop_failure_jump' or `jump'.	 */
506066525Speter	case maybe_pop_jump:
506166525Speter#if defined (WINDOWSNT) && defined (emacs)
506266525Speter	  QUIT;
506366525Speter#endif
506466525Speter	  EXTRACT_NUMBER_AND_INCR (mcnt, p);
506566525Speter	  DEBUG_PRINT2 ("EXECUTING maybe_pop_jump %d.\n", mcnt);
506666525Speter	  {
506717721Speter	    register unsigned char *p2 = p;
506817721Speter
506966525Speter	    /* Compare the beginning of the repeat with what in the
507066525Speter	       pattern follows its end. If we can establish that there
507166525Speter	       is nothing that they would both match, i.e., that we
507266525Speter	       would have to backtrack because of (as in, e.g., `a*a')
507366525Speter	       then we can change to pop_failure_jump, because we'll
507466525Speter	       never have to backtrack.
507525839Speter
507666525Speter	       This is not true in the case of alternatives: in
507766525Speter	       `(a|ab)*' we do need to backtrack to the `ab' alternative
507866525Speter	       (e.g., if the string was `ab').	But instead of trying to
507966525Speter	       detect that here, the alternative has put on a dummy
508066525Speter	       failure point which is what we will end up popping.  */
508117721Speter
508266525Speter	    /* Skip over open/close-group commands.
508366525Speter	       If what follows this loop is a ...+ construct,
508466525Speter	       look at what begins its body, since we will have to
508566525Speter	       match at least one of that.  */
508666525Speter	    while (1)
508766525Speter	      {
508866525Speter		if (p2 + 2 < pend
508966525Speter		    && ((re_opcode_t) *p2 == stop_memory
509066525Speter			|| (re_opcode_t) *p2 == start_memory))
509166525Speter		  p2 += 3;
509266525Speter		else if (p2 + 6 < pend
509366525Speter			 && (re_opcode_t) *p2 == dummy_failure_jump)
509466525Speter		  p2 += 6;
509566525Speter		else
509666525Speter		  break;
509766525Speter	      }
509817721Speter
509966525Speter	    p1 = p + mcnt;
510066525Speter	    /* p1[0] ... p1[2] are the `on_failure_jump' corresponding
510166525Speter	       to the `maybe_finalize_jump' of this case.  Examine what
510266525Speter	       follows.	 */
510366525Speter
510466525Speter	    /* If we're at the end of the pattern, we can change.  */
510566525Speter	    if (p2 == pend)
510617721Speter	      {
510717721Speter		/* Consider what happens when matching ":\(.*\)"
510817721Speter		   against ":/".  I don't really understand this code
510966525Speter		   yet.	 */
511066525Speter		p[-3] = (unsigned char) pop_failure_jump;
511166525Speter		DEBUG_PRINT1
511266525Speter		  ("  End of pattern: change to `pop_failure_jump'.\n");
511366525Speter	      }
511417721Speter
511566525Speter	    else if ((re_opcode_t) *p2 == exactn
511617721Speter		     || (bufp->newline_anchor && (re_opcode_t) *p2 == endline))
511717721Speter	      {
511866525Speter		register unsigned int c
511966525Speter		  = *p2 == (unsigned char) endline ? '\n' : p2[2];
512017721Speter
512166525Speter		if ((re_opcode_t) p1[3] == exactn)
512266525Speter		  {
512366525Speter		    if (!(multibyte /* && (c != '\n') */
512466525Speter			  && BASE_LEADING_CODE_P (c))
512566525Speter			? c != p1[5]
512666525Speter			: (STRING_CHAR (&p2[2], pend - &p2[2])
512766525Speter			   != STRING_CHAR (&p1[5], pend - &p1[5])))
512866525Speter		  {
512966525Speter		    p[-3] = (unsigned char) pop_failure_jump;
513066525Speter		    DEBUG_PRINT3 ("  %c != %c => pop_failure_jump.\n",
513166525Speter				  c, p1[5]);
513266525Speter		  }
513366525Speter		  }
513425839Speter
513517721Speter		else if ((re_opcode_t) p1[3] == charset
513617721Speter			 || (re_opcode_t) p1[3] == charset_not)
513717721Speter		  {
513817721Speter		    int not = (re_opcode_t) p1[3] == charset_not;
513925839Speter
514066525Speter		    if (multibyte /* && (c != '\n') */
514166525Speter			&& BASE_LEADING_CODE_P (c))
514266525Speter		      c = STRING_CHAR (&p2[2], pend - &p2[2]);
514366525Speter
514466525Speter		    /* Test if C is listed in charset (or charset_not)
514566525Speter		       at `&p1[3]'.  */
514666525Speter		    if (SINGLE_BYTE_CHAR_P (c))
514766525Speter		      {
514866525Speter			if (c < CHARSET_BITMAP_SIZE (&p1[3]) * BYTEWIDTH
514917721Speter			&& p1[5 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
515017721Speter		      not = !not;
515166525Speter		      }
515266525Speter		    else if (CHARSET_RANGE_TABLE_EXISTS_P (&p1[3]))
515366525Speter		      CHARSET_LOOKUP_RANGE_TABLE (not, c, &p1[3]);
515417721Speter
515566525Speter		    /* `not' is equal to 1 if c would match, which means
515666525Speter			that we can't change to pop_failure_jump.  */
515717721Speter		    if (!not)
515866525Speter		      {
515966525Speter			p[-3] = (unsigned char) pop_failure_jump;
516066525Speter			DEBUG_PRINT1 ("	 No match => pop_failure_jump.\n");
516166525Speter		      }
516217721Speter		  }
516317721Speter	      }
516466525Speter	    else if ((re_opcode_t) *p2 == charset)
516566525Speter	      {
516666525Speter		if ((re_opcode_t) p1[3] == exactn)
516766525Speter		  {
516866525Speter		    register unsigned int c = p1[5];
516966525Speter		    int not = 0;
517066525Speter
517166525Speter		    if (multibyte && BASE_LEADING_CODE_P (c))
517266525Speter		      c = STRING_CHAR (&p1[5], pend - &p1[5]);
517366525Speter
517466525Speter		    /* Test if C is listed in charset at `p2'.	*/
517566525Speter		    if (SINGLE_BYTE_CHAR_P (c))
517666525Speter		      {
517766525Speter			if (c < CHARSET_BITMAP_SIZE (p2) * BYTEWIDTH
517866525Speter			    && (p2[2 + c / BYTEWIDTH]
517966525Speter				& (1 << (c % BYTEWIDTH))))
518066525Speter			  not = !not;
518166525Speter		      }
518266525Speter		    else if (CHARSET_RANGE_TABLE_EXISTS_P (p2))
518366525Speter		      CHARSET_LOOKUP_RANGE_TABLE (not, c, p2);
518466525Speter
518566525Speter		    if (!not)
518666525Speter		  {
518766525Speter		    p[-3] = (unsigned char) pop_failure_jump;
518866525Speter			DEBUG_PRINT1 ("	 No match => pop_failure_jump.\n");
518966525Speter		      }
519066525Speter		  }
519166525Speter
519266525Speter		/* It is hard to list up all the character in charset
519366525Speter		   P2 if it includes multibyte character.  Give up in
519466525Speter		   such case.  */
519566525Speter		else if (!multibyte || !CHARSET_RANGE_TABLE_EXISTS_P (p2))
519666525Speter		  {
519766525Speter		    /* Now, we are sure that P2 has no range table.
519866525Speter		       So, for the size of bitmap in P2, `p2[1]' is
519966525Speter		       enough.	But P1 may have range table, so the
520066525Speter		       size of bitmap table of P1 is extracted by
520166525Speter		       using macro `CHARSET_BITMAP_SIZE'.
520266525Speter
520366525Speter		       Since we know that all the character listed in
520466525Speter		       P2 is ASCII, it is enough to test only bitmap
520566525Speter		       table of P1.  */
520666525Speter
520766525Speter		    if ((re_opcode_t) p1[3] == charset_not)
520866525Speter		  {
520966525Speter		    int idx;
521066525Speter			/* We win if the charset_not inside the loop lists
521166525Speter			   every character listed in the charset after.	 */
521266525Speter		    for (idx = 0; idx < (int) p2[1]; idx++)
521366525Speter		      if (! (p2[2 + idx] == 0
521466525Speter				 || (idx < CHARSET_BITMAP_SIZE (&p1[3])
521566525Speter				 && ((p2[2 + idx] & ~ p1[5 + idx]) == 0))))
521666525Speter			break;
521766525Speter
521866525Speter		    if (idx == p2[1])
521966525Speter		      {
522066525Speter			p[-3] = (unsigned char) pop_failure_jump;
522166525Speter			DEBUG_PRINT1 ("	 No match => pop_failure_jump.\n");
522266525Speter		      }
522366525Speter		  }
522466525Speter		else if ((re_opcode_t) p1[3] == charset)
522566525Speter		  {
522666525Speter		    int idx;
522766525Speter		    /* We win if the charset inside the loop
522866525Speter		       has no overlap with the one after the loop.  */
522966525Speter		    for (idx = 0;
523066525Speter			     (idx < (int) p2[1]
523166525Speter			      && idx < CHARSET_BITMAP_SIZE (&p1[3]));
523266525Speter			 idx++)
523366525Speter		      if ((p2[2 + idx] & p1[5 + idx]) != 0)
523466525Speter			break;
523566525Speter
523666525Speter			if (idx == p2[1]
523766525Speter			    || idx == CHARSET_BITMAP_SIZE (&p1[3]))
523866525Speter		      {
523966525Speter			p[-3] = (unsigned char) pop_failure_jump;
524066525Speter			DEBUG_PRINT1 ("	 No match => pop_failure_jump.\n");
524166525Speter		      }
524266525Speter		  }
524366525Speter	      }
524417721Speter	  }
524566525Speter	  }
524617721Speter	  p -= 2;		/* Point at relative address again.  */
524717721Speter	  if ((re_opcode_t) p[-1] != pop_failure_jump)
524817721Speter	    {
524917721Speter	      p[-1] = (unsigned char) jump;
525066525Speter	      DEBUG_PRINT1 ("  Match => jump.\n");
525117721Speter	      goto unconditional_jump;
525217721Speter	    }
525366525Speter	/* Note fall through.  */
525417721Speter
525517721Speter
525617721Speter	/* The end of a simple repeat has a pop_failure_jump back to
525766525Speter	   its matching on_failure_jump, where the latter will push a
525866525Speter	   failure point.  The pop_failure_jump takes off failure
525966525Speter	   points put on by this pop_failure_jump's matching
526066525Speter	   on_failure_jump; we got through the pattern to here from the
526166525Speter	   matching on_failure_jump, so didn't fail.  */
526266525Speter	case pop_failure_jump:
526366525Speter	  {
526466525Speter	    /* We need to pass separate storage for the lowest and
526566525Speter	       highest registers, even though we don't care about the
526666525Speter	       actual values.  Otherwise, we will restore only one
526766525Speter	       register from the stack, since lowest will == highest in
526866525Speter	       `pop_failure_point'.  */
526966525Speter	    unsigned dummy_low_reg, dummy_high_reg;
527066525Speter	    unsigned char *pdummy;
527166525Speter	    const char *sdummy;
527217721Speter
527366525Speter	    DEBUG_PRINT1 ("EXECUTING pop_failure_jump.\n");
527466525Speter	    POP_FAILURE_POINT (sdummy, pdummy,
527566525Speter			       dummy_low_reg, dummy_high_reg,
527666525Speter			       reg_dummy, reg_dummy, reg_info_dummy);
527766525Speter	  }
527866525Speter	  /* Note fall through.	 */
527917721Speter
528025839Speter
528166525Speter	/* Unconditionally jump (without popping any failure points).  */
528266525Speter	case jump:
528317721Speter	unconditional_jump:
528466525Speter#if defined (WINDOWSNT) && defined (emacs)
528566525Speter	  QUIT;
528666525Speter#endif
528717721Speter	  EXTRACT_NUMBER_AND_INCR (mcnt, p);	/* Get the amount to jump.  */
528866525Speter	  DEBUG_PRINT2 ("EXECUTING jump %d ", mcnt);
528966525Speter	  p += mcnt;				/* Do the jump.	 */
529066525Speter	  DEBUG_PRINT2 ("(to 0x%x).\n", p);
529117721Speter	  break;
529217721Speter
529325839Speter
529466525Speter	/* We need this opcode so we can detect where alternatives end
529566525Speter	   in `group_match_null_string_p' et al.  */
529666525Speter	case jump_past_alt:
529766525Speter	  DEBUG_PRINT1 ("EXECUTING jump_past_alt.\n");
529866525Speter	  goto unconditional_jump;
529917721Speter
530017721Speter
530166525Speter	/* Normally, the on_failure_jump pushes a failure point, which
530266525Speter	   then gets popped at pop_failure_jump.  We will end up at
530366525Speter	   pop_failure_jump, also, and with a pattern of, say, `a+', we
530466525Speter	   are skipping over the on_failure_jump, so we have to push
530566525Speter	   something meaningless for pop_failure_jump to pop.  */
530666525Speter	case dummy_failure_jump:
530766525Speter	  DEBUG_PRINT1 ("EXECUTING dummy_failure_jump.\n");
530866525Speter	  /* It doesn't matter what we push for the string here.  What
530966525Speter	     the code at `fail' tests is the value for the pattern.  */
531066525Speter	  PUSH_FAILURE_POINT (0, 0, -2);
531166525Speter	  goto unconditional_jump;
531217721Speter
531317721Speter
531466525Speter	/* At the end of an alternative, we need to push a dummy failure
531566525Speter	   point in case we are followed by a `pop_failure_jump', because
531666525Speter	   we don't want the failure point for the alternative to be
531766525Speter	   popped.  For example, matching `(a|ab)*' against `aab'
531866525Speter	   requires that we match the `ab' alternative.	 */
531966525Speter	case push_dummy_failure:
532066525Speter	  DEBUG_PRINT1 ("EXECUTING push_dummy_failure.\n");
532166525Speter	  /* See comments just above at `dummy_failure_jump' about the
532266525Speter	     two zeroes.  */
532366525Speter	  PUSH_FAILURE_POINT (0, 0, -2);
532466525Speter	  break;
532517721Speter
532666525Speter	/* Have to succeed matching what follows at least n times.
532766525Speter	   After that, handle like `on_failure_jump'.  */
532866525Speter	case succeed_n:
532966525Speter	  EXTRACT_NUMBER (mcnt, p + 2);
533066525Speter	  DEBUG_PRINT2 ("EXECUTING succeed_n %d.\n", mcnt);
533117721Speter
533266525Speter	  assert (mcnt >= 0);
533366525Speter	  /* Originally, this is how many times we HAVE to succeed.  */
533466525Speter	  if (mcnt > 0)
533566525Speter	    {
533666525Speter	       mcnt--;
533717721Speter	       p += 2;
533866525Speter	       STORE_NUMBER_AND_INCR (p, mcnt);
533966525Speter	       DEBUG_PRINT3 ("	Setting 0x%x to %d.\n", p, mcnt);
534066525Speter	    }
534117721Speter	  else if (mcnt == 0)
534266525Speter	    {
534366525Speter	      DEBUG_PRINT2 ("  Setting two bytes from 0x%x to no_op.\n", p+2);
534417721Speter	      p[2] = (unsigned char) no_op;
534566525Speter	      p[3] = (unsigned char) no_op;
534666525Speter	      goto on_failure;
534766525Speter	    }
534866525Speter	  break;
534925839Speter
535066525Speter	case jump_n:
535166525Speter	  EXTRACT_NUMBER (mcnt, p + 2);
535266525Speter	  DEBUG_PRINT2 ("EXECUTING jump_n %d.\n", mcnt);
535317721Speter
535466525Speter	  /* Originally, this is how many times we CAN jump.  */
535566525Speter	  if (mcnt)
535666525Speter	    {
535766525Speter	       mcnt--;
535866525Speter	       STORE_NUMBER (p + 2, mcnt);
535925839Speter	       goto unconditional_jump;
536066525Speter	    }
536166525Speter	  /* If don't have to jump any more, skip over the rest of command.  */
536225839Speter	  else
536325839Speter	    p += 4;
536466525Speter	  break;
536525839Speter
536617721Speter	case set_number_at:
536717721Speter	  {
536866525Speter	    DEBUG_PRINT1 ("EXECUTING set_number_at.\n");
536917721Speter
537066525Speter	    EXTRACT_NUMBER_AND_INCR (mcnt, p);
537166525Speter	    p1 = p + mcnt;
537266525Speter	    EXTRACT_NUMBER_AND_INCR (mcnt, p);
537366525Speter	    DEBUG_PRINT3 ("  Setting 0x%x to %d.\n", p1, mcnt);
537417721Speter	    STORE_NUMBER (p1, mcnt);
537566525Speter	    break;
537666525Speter	  }
537717721Speter
537866525Speter	case wordbound:
537966525Speter	  DEBUG_PRINT1 ("EXECUTING wordbound.\n");
538066525Speter
538166525Speter	  /* We SUCCEED in one of the following cases: */
538266525Speter
538366525Speter	  /* Case 1: D is at the beginning or the end of string.  */
538466525Speter	  if (AT_STRINGS_BEG (d) || AT_STRINGS_END (d))
538517721Speter	    break;
538666525Speter	  else
538766525Speter	    {
538866525Speter	      /* C1 is the character before D, S1 is the syntax of C1, C2
538966525Speter		 is the character at D, and S2 is the syntax of C2.  */
539066525Speter	      int c1, c2, s1, s2;
539166525Speter	      int pos1 = PTR_TO_OFFSET (d - 1);
539266525Speter	      int charpos;
539317721Speter
539466525Speter	      GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2);
539566525Speter	      GET_CHAR_AFTER_2 (c2, d, string1, end1, string2, end2);
539666525Speter#ifdef emacs
539766525Speter	      charpos = SYNTAX_TABLE_BYTE_TO_CHAR (pos1);
539866525Speter	      UPDATE_SYNTAX_TABLE (charpos);
539966525Speter#endif
540066525Speter	      s1 = SYNTAX (c1);
540166525Speter#ifdef emacs
540266525Speter	      UPDATE_SYNTAX_TABLE_FORWARD (charpos + 1);
540366525Speter#endif
540466525Speter	      s2 = SYNTAX (c2);
540566525Speter
540666525Speter	      if (/* Case 2: Only one of S1 and S2 is Sword.  */
540766525Speter		  ((s1 == Sword) != (s2 == Sword))
540866525Speter		  /* Case 3: Both of S1 and S2 are Sword, and macro
540966525Speter		     WORD_BOUNDARY_P (C1, C2) returns nonzero.	*/
541066525Speter		  || ((s1 == Sword) && WORD_BOUNDARY_P (c1, c2)))
541166525Speter	    break;
541266525Speter	}
541366525Speter	  goto fail;
541466525Speter
541566525Speter      case notwordbound:
541666525Speter	  DEBUG_PRINT1 ("EXECUTING notwordbound.\n");
541766525Speter
541866525Speter	  /* We FAIL in one of the following cases: */
541966525Speter
542066525Speter	  /* Case 1: D is at the beginning or the end of string.  */
542166525Speter	  if (AT_STRINGS_BEG (d) || AT_STRINGS_END (d))
542217721Speter	    goto fail;
542366525Speter	  else
542466525Speter	    {
542566525Speter	      /* C1 is the character before D, S1 is the syntax of C1, C2
542666525Speter		 is the character at D, and S2 is the syntax of C2.  */
542766525Speter	      int c1, c2, s1, s2;
542866525Speter	      int pos1 = PTR_TO_OFFSET (d - 1);
542966525Speter	      int charpos;
543017721Speter
543166525Speter	      GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2);
543266525Speter	      GET_CHAR_AFTER_2 (c2, d, string1, end1, string2, end2);
543366525Speter#ifdef emacs
543466525Speter	      charpos = SYNTAX_TABLE_BYTE_TO_CHAR (pos1);
543566525Speter	      UPDATE_SYNTAX_TABLE (charpos);
543666525Speter#endif
543766525Speter	      s1 = SYNTAX (c1);
543866525Speter#ifdef emacs
543966525Speter	      UPDATE_SYNTAX_TABLE_FORWARD (charpos + 1);
544066525Speter#endif
544166525Speter	      s2 = SYNTAX (c2);
544266525Speter
544366525Speter	      if (/* Case 2: Only one of S1 and S2 is Sword.  */
544466525Speter		  ((s1 == Sword) != (s2 == Sword))
544566525Speter		  /* Case 3: Both of S1 and S2 are Sword, and macro
544666525Speter		     WORD_BOUNDARY_P (C1, C2) returns nonzero.	*/
544766525Speter		  || ((s1 == Sword) && WORD_BOUNDARY_P (c1, c2)))
544866525Speter	    goto fail;
544966525Speter	}
545066525Speter	  break;
545166525Speter
545217721Speter	case wordbeg:
545366525Speter	  DEBUG_PRINT1 ("EXECUTING wordbeg.\n");
545417721Speter
545566525Speter	  /* We FAIL in one of the following cases: */
545666525Speter
545766525Speter	  /* Case 1: D is at the end of string.	 */
545866525Speter	  if (AT_STRINGS_END (d))
545966525Speter	  goto fail;
546066525Speter	  else
546166525Speter	    {
546266525Speter	      /* C1 is the character before D, S1 is the syntax of C1, C2
546366525Speter		 is the character at D, and S2 is the syntax of C2.  */
546466525Speter	      int c1, c2, s1, s2;
546566525Speter	      int pos1 = PTR_TO_OFFSET (d);
546666525Speter	      int charpos;
546766525Speter
546866525Speter	      GET_CHAR_AFTER_2 (c2, d, string1, end1, string2, end2);
546966525Speter#ifdef emacs
547066525Speter	      charpos = SYNTAX_TABLE_BYTE_TO_CHAR (pos1);
547166525Speter	      UPDATE_SYNTAX_TABLE (charpos);
547266525Speter#endif
547366525Speter	      s2 = SYNTAX (c2);
5474107484Speter
547566525Speter	      /* Case 2: S2 is not Sword. */
547666525Speter	      if (s2 != Sword)
547766525Speter		goto fail;
547866525Speter
547966525Speter	      /* Case 3: D is not at the beginning of string ... */
548066525Speter	      if (!AT_STRINGS_BEG (d))
548166525Speter		{
548266525Speter		  GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2);
548366525Speter#ifdef emacs
548466525Speter		  UPDATE_SYNTAX_TABLE_BACKWARD (charpos - 1);
548566525Speter#endif
548666525Speter		  s1 = SYNTAX (c1);
548766525Speter
548866525Speter		  /* ... and S1 is Sword, and WORD_BOUNDARY_P (C1, C2)
548966525Speter		     returns 0.	 */
549066525Speter		  if ((s1 == Sword) && !WORD_BOUNDARY_P (c1, c2))
549166525Speter		    goto fail;
549266525Speter		}
549366525Speter	    }
549466525Speter	  break;
549566525Speter
549617721Speter	case wordend:
549766525Speter	  DEBUG_PRINT1 ("EXECUTING wordend.\n");
549817721Speter
549966525Speter	  /* We FAIL in one of the following cases: */
550066525Speter
550166525Speter	  /* Case 1: D is at the beginning of string.  */
550266525Speter	  if (AT_STRINGS_BEG (d))
550366525Speter	    goto fail;
550466525Speter	  else
550566525Speter	    {
550666525Speter	      /* C1 is the character before D, S1 is the syntax of C1, C2
550766525Speter		 is the character at D, and S2 is the syntax of C2.  */
550866525Speter	      int c1, c2, s1, s2;
550966525Speter	      int pos1 = PTR_TO_OFFSET (d);
551066525Speter	      int charpos;
551166525Speter
551266525Speter	      GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2);
551317721Speter#ifdef emacs
551466525Speter	      charpos = SYNTAX_TABLE_BYTE_TO_CHAR (pos1 - 1);
551566525Speter	      UPDATE_SYNTAX_TABLE (charpos);
551666525Speter#endif
551766525Speter	      s1 = SYNTAX (c1);
551825839Speter
551966525Speter	      /* Case 2: S1 is not Sword.  */
552066525Speter	      if (s1 != Sword)
552166525Speter		goto fail;
552225839Speter
552366525Speter	      /* Case 3: D is not at the end of string ... */
552466525Speter	      if (!AT_STRINGS_END (d))
552566525Speter		{
552666525Speter		  GET_CHAR_AFTER_2 (c2, d, string1, end1, string2, end2);
552766525Speter#ifdef emacs
552866525Speter		  UPDATE_SYNTAX_TABLE_FORWARD (charpos);
552966525Speter#endif
553066525Speter		  s2 = SYNTAX (c2);
553117721Speter
553266525Speter		  /* ... and S2 is Sword, and WORD_BOUNDARY_P (C1, C2)
553366525Speter		     returns 0.	 */
553466525Speter		  if ((s2 == Sword) && !WORD_BOUNDARY_P (c1, c2))
553566525Speter	  goto fail;
553666525Speter		}
553766525Speter	    }
553866525Speter	  break;
553966525Speter
554066525Speter#ifdef emacs
554166525Speter	case before_dot:
554266525Speter	  DEBUG_PRINT1 ("EXECUTING before_dot.\n");
554366525Speter	  if (PTR_BYTE_POS ((unsigned char *) d) >= PT_BYTE)
554466525Speter	    goto fail;
554566525Speter	  break;
554666525Speter
554766525Speter	case at_dot:
554866525Speter	  DEBUG_PRINT1 ("EXECUTING at_dot.\n");
554966525Speter	  if (PTR_BYTE_POS ((unsigned char *) d) != PT_BYTE)
555066525Speter	    goto fail;
555166525Speter	  break;
555266525Speter
555366525Speter	case after_dot:
555466525Speter	  DEBUG_PRINT1 ("EXECUTING after_dot.\n");
555566525Speter	  if (PTR_BYTE_POS ((unsigned char *) d) <= PT_BYTE)
555666525Speter	    goto fail;
555766525Speter	  break;
555866525Speter
555917721Speter	case syntaxspec:
556066525Speter	  DEBUG_PRINT2 ("EXECUTING syntaxspec %d.\n", mcnt);
556117721Speter	  mcnt = *p++;
556217721Speter	  goto matchsyntax;
556317721Speter
556466525Speter	case wordchar:
556566525Speter	  DEBUG_PRINT1 ("EXECUTING Emacs wordchar.\n");
556617721Speter	  mcnt = (int) Sword;
556766525Speter	matchsyntax:
556817721Speter	  PREFETCH ();
556966525Speter#ifdef emacs
557066525Speter	  {
557166525Speter	    int pos1 = SYNTAX_TABLE_BYTE_TO_CHAR (PTR_TO_OFFSET (d));
557266525Speter	    UPDATE_SYNTAX_TABLE (pos1);
557366525Speter	  }
557466525Speter#endif
557566525Speter	  {
557666525Speter	    int c, len;
557766525Speter
557866525Speter	    if (multibyte)
557966525Speter	      /* we must concern about multibyte form, ... */
558066525Speter	      c = STRING_CHAR_AND_LENGTH (d, dend - d, len);
558166525Speter	    else
558266525Speter	      /* everything should be handled as ASCII, even though it
558366525Speter		 looks like multibyte form.  */
558466525Speter	      c = *d, len = 1;
558566525Speter
558666525Speter	    if (SYNTAX (c) != (enum syntaxcode) mcnt)
558725839Speter	    goto fail;
558866525Speter	    d += len;
558966525Speter	  }
559066525Speter	  SET_REGS_MATCHED ();
559117721Speter	  break;
559217721Speter
559317721Speter	case notsyntaxspec:
559466525Speter	  DEBUG_PRINT2 ("EXECUTING notsyntaxspec %d.\n", mcnt);
559517721Speter	  mcnt = *p++;
559617721Speter	  goto matchnotsyntax;
559717721Speter
559866525Speter	case notwordchar:
559966525Speter	  DEBUG_PRINT1 ("EXECUTING Emacs notwordchar.\n");
560017721Speter	  mcnt = (int) Sword;
560166525Speter	matchnotsyntax:
560217721Speter	  PREFETCH ();
560366525Speter#ifdef emacs
560466525Speter	  {
560566525Speter	    int pos1 = SYNTAX_TABLE_BYTE_TO_CHAR (PTR_TO_OFFSET (d));
560666525Speter	    UPDATE_SYNTAX_TABLE (pos1);
560766525Speter	  }
560866525Speter#endif
560966525Speter	  {
561066525Speter	    int c, len;
561166525Speter
561266525Speter	    if (multibyte)
561366525Speter	      c = STRING_CHAR_AND_LENGTH (d, dend - d, len);
561466525Speter	    else
561566525Speter	      c = *d, len = 1;
561666525Speter
561766525Speter	    if (SYNTAX (c) == (enum syntaxcode) mcnt)
561825839Speter	    goto fail;
561966525Speter	    d += len;
562066525Speter	  }
562117721Speter	  SET_REGS_MATCHED ();
562266525Speter	  break;
562366525Speter
562466525Speter	case categoryspec:
562566525Speter	  DEBUG_PRINT2 ("EXECUTING categoryspec %d.\n", *p);
562666525Speter	  mcnt = *p++;
562766525Speter	  PREFETCH ();
562866525Speter	  {
562966525Speter	    int c, len;
563066525Speter
563166525Speter	    if (multibyte)
563266525Speter	      c = STRING_CHAR_AND_LENGTH (d, dend - d, len);
563366525Speter	    else
563466525Speter	      c = *d, len = 1;
563566525Speter
563666525Speter	    if (!CHAR_HAS_CATEGORY (c, mcnt))
563766525Speter	      goto fail;
563866525Speter	    d += len;
563966525Speter	  }
564066525Speter	  SET_REGS_MATCHED ();
564166525Speter	  break;
564266525Speter
564366525Speter	case notcategoryspec:
564466525Speter	  DEBUG_PRINT2 ("EXECUTING notcategoryspec %d.\n", *p);
564566525Speter	  mcnt = *p++;
564666525Speter	  PREFETCH ();
564766525Speter	  {
564866525Speter	    int c, len;
564966525Speter
565066525Speter	    if (multibyte)
565166525Speter	      c = STRING_CHAR_AND_LENGTH (d, dend - d, len);
565266525Speter	    else
565366525Speter	      c = *d, len = 1;
565466525Speter
565566525Speter	    if (CHAR_HAS_CATEGORY (c, mcnt))
565666525Speter	      goto fail;
565766525Speter	    d += len;
565866525Speter	  }
565966525Speter	  SET_REGS_MATCHED ();
566017721Speter          break;
566117721Speter
566217721Speter#else /* not emacs */
566317721Speter	case wordchar:
566417721Speter          DEBUG_PRINT1 ("EXECUTING non-Emacs wordchar.\n");
566517721Speter	  PREFETCH ();
566617721Speter          if (!WORDCHAR_P (d))
566717721Speter            goto fail;
566817721Speter	  SET_REGS_MATCHED ();
566917721Speter          d++;
567017721Speter	  break;
567125839Speter
567217721Speter	case notwordchar:
567317721Speter          DEBUG_PRINT1 ("EXECUTING non-Emacs notwordchar.\n");
567417721Speter	  PREFETCH ();
567517721Speter	  if (WORDCHAR_P (d))
567617721Speter            goto fail;
567717721Speter          SET_REGS_MATCHED ();
567817721Speter          d++;
567917721Speter	  break;
568017721Speter#endif /* not emacs */
568125839Speter
568217721Speter        default:
568317721Speter          abort ();
568417721Speter	}
568517721Speter      continue;  /* Successfully executed one pattern command; keep going.  */
568617721Speter
568717721Speter
568817721Speter    /* We goto here if a matching operation fails. */
568917721Speter    fail:
569066525Speter#if defined (WINDOWSNT) && defined (emacs)
569166525Speter      QUIT;
569266525Speter#endif
569317721Speter      if (!FAIL_STACK_EMPTY ())
569417721Speter	{ /* A restart point is known.  Restore to that state.  */
569517721Speter          DEBUG_PRINT1 ("\nFAIL:\n");
569617721Speter          POP_FAILURE_POINT (d, p,
569717721Speter                             lowest_active_reg, highest_active_reg,
569817721Speter                             regstart, regend, reg_info);
569917721Speter
570017721Speter          /* If this failure point is a dummy, try the next one.  */
570117721Speter          if (!p)
570217721Speter	    goto fail;
570317721Speter
570417721Speter          /* If we failed to the end of the pattern, don't examine *p.  */
570517721Speter	  assert (p <= pend);
570617721Speter          if (p < pend)
570717721Speter            {
570817721Speter              boolean is_a_jump_n = false;
570925839Speter
571017721Speter              /* If failed to a backwards jump that's part of a repetition
571117721Speter                 loop, need to pop this failure point and use the next one.  */
571217721Speter              switch ((re_opcode_t) *p)
571317721Speter                {
571417721Speter                case jump_n:
571517721Speter                  is_a_jump_n = true;
571617721Speter                case maybe_pop_jump:
571717721Speter                case pop_failure_jump:
571817721Speter                case jump:
571917721Speter                  p1 = p + 1;
572017721Speter                  EXTRACT_NUMBER_AND_INCR (mcnt, p1);
572125839Speter                  p1 += mcnt;
572217721Speter
572317721Speter                  if ((is_a_jump_n && (re_opcode_t) *p1 == succeed_n)
572417721Speter                      || (!is_a_jump_n
572517721Speter                          && (re_opcode_t) *p1 == on_failure_jump))
572617721Speter                    goto fail;
572717721Speter                  break;
572817721Speter                default:
572917721Speter                  /* do nothing */ ;
573017721Speter                }
573117721Speter            }
573217721Speter
573317721Speter          if (d >= string1 && d <= end1)
573417721Speter	    dend = end_match_1;
573517721Speter        }
573617721Speter      else
573717721Speter        break;   /* Matching at this starting point really fails.  */
573817721Speter    } /* for (;;) */
573917721Speter
574017721Speter  if (best_regs_set)
574117721Speter    goto restore_best_regs;
574217721Speter
574317721Speter  FREE_VARIABLES ();
574417721Speter
574517721Speter  return -1;         			/* Failure to match.  */
574617721Speter} /* re_match_2 */
574717721Speter
574817721Speter/* Subroutine definitions for re_match_2.  */
574917721Speter
575017721Speter
575117721Speter/* We are passed P pointing to a register number after a start_memory.
575225839Speter
575317721Speter   Return true if the pattern up to the corresponding stop_memory can
575417721Speter   match the empty string, and false otherwise.
575525839Speter
575617721Speter   If we find the matching stop_memory, sets P to point to one past its number.
575717721Speter   Otherwise, sets P to an undefined byte less than or equal to END.
575817721Speter
575917721Speter   We don't handle duplicates properly (yet).  */
576017721Speter
576117721Speterstatic boolean
576217721Spetergroup_match_null_string_p (p, end, reg_info)
576317721Speter    unsigned char **p, *end;
576417721Speter    register_info_type *reg_info;
576517721Speter{
576617721Speter  int mcnt;
576717721Speter  /* Point to after the args to the start_memory.  */
576817721Speter  unsigned char *p1 = *p + 2;
576925839Speter
577017721Speter  while (p1 < end)
577117721Speter    {
577217721Speter      /* Skip over opcodes that can match nothing, and return true or
577317721Speter	 false, as appropriate, when we get to one that can't, or to the
577417721Speter         matching stop_memory.  */
577525839Speter
577617721Speter      switch ((re_opcode_t) *p1)
577717721Speter        {
577817721Speter        /* Could be either a loop or a series of alternatives.  */
577917721Speter        case on_failure_jump:
578017721Speter          p1++;
578117721Speter          EXTRACT_NUMBER_AND_INCR (mcnt, p1);
578225839Speter
578317721Speter          /* If the next operation is not a jump backwards in the
578417721Speter	     pattern.  */
578517721Speter
578617721Speter	  if (mcnt >= 0)
578717721Speter	    {
578817721Speter              /* Go through the on_failure_jumps of the alternatives,
578917721Speter                 seeing if any of the alternatives cannot match nothing.
579017721Speter                 The last alternative starts with only a jump,
579117721Speter                 whereas the rest start with on_failure_jump and end
579217721Speter                 with a jump, e.g., here is the pattern for `a|b|c':
579317721Speter
579417721Speter                 /on_failure_jump/0/6/exactn/1/a/jump_past_alt/0/6
579517721Speter                 /on_failure_jump/0/6/exactn/1/b/jump_past_alt/0/3
579625839Speter                 /exactn/1/c
579717721Speter
579817721Speter                 So, we have to first go through the first (n-1)
579917721Speter                 alternatives and then deal with the last one separately.  */
580017721Speter
580117721Speter
580217721Speter              /* Deal with the first (n-1) alternatives, which start
580317721Speter                 with an on_failure_jump (see above) that jumps to right
580417721Speter                 past a jump_past_alt.  */
580517721Speter
580617721Speter              while ((re_opcode_t) p1[mcnt-3] == jump_past_alt)
580717721Speter                {
580817721Speter                  /* `mcnt' holds how many bytes long the alternative
580917721Speter                     is, including the ending `jump_past_alt' and
581017721Speter                     its number.  */
581117721Speter
581225839Speter                  if (!alt_match_null_string_p (p1, p1 + mcnt - 3,
581317721Speter				                      reg_info))
581417721Speter                    return false;
581517721Speter
581617721Speter                  /* Move to right after this alternative, including the
581717721Speter		     jump_past_alt.  */
581825839Speter                  p1 += mcnt;
581917721Speter
582017721Speter                  /* Break if it's the beginning of an n-th alternative
582117721Speter                     that doesn't begin with an on_failure_jump.  */
582217721Speter                  if ((re_opcode_t) *p1 != on_failure_jump)
582317721Speter                    break;
582425839Speter
582517721Speter		  /* Still have to check that it's not an n-th
582617721Speter		     alternative that starts with an on_failure_jump.  */
582717721Speter		  p1++;
582817721Speter                  EXTRACT_NUMBER_AND_INCR (mcnt, p1);
582917721Speter                  if ((re_opcode_t) p1[mcnt-3] != jump_past_alt)
583017721Speter                    {
583117721Speter		      /* Get to the beginning of the n-th alternative.  */
583217721Speter                      p1 -= 3;
583317721Speter                      break;
583417721Speter                    }
583517721Speter                }
583617721Speter
583717721Speter              /* Deal with the last alternative: go back and get number
583817721Speter                 of the `jump_past_alt' just before it.  `mcnt' contains
583917721Speter                 the length of the alternative.  */
584017721Speter              EXTRACT_NUMBER (mcnt, p1 - 2);
584117721Speter
584217721Speter              if (!alt_match_null_string_p (p1, p1 + mcnt, reg_info))
584317721Speter                return false;
584417721Speter
584517721Speter              p1 += mcnt;	/* Get past the n-th alternative.  */
584617721Speter            } /* if mcnt > 0 */
584717721Speter          break;
584817721Speter
584925839Speter
585017721Speter        case stop_memory:
585117721Speter	  assert (p1[1] == **p);
585217721Speter          *p = p1 + 2;
585317721Speter          return true;
585417721Speter
585525839Speter
585625839Speter        default:
585717721Speter          if (!common_op_match_null_string_p (&p1, end, reg_info))
585817721Speter            return false;
585917721Speter        }
586017721Speter    } /* while p1 < end */
586117721Speter
586217721Speter  return false;
586317721Speter} /* group_match_null_string_p */
586417721Speter
586517721Speter
586617721Speter/* Similar to group_match_null_string_p, but doesn't deal with alternatives:
586717721Speter   It expects P to be the first byte of a single alternative and END one
586817721Speter   byte past the last. The alternative can contain groups.  */
586925839Speter
587017721Speterstatic boolean
587117721Speteralt_match_null_string_p (p, end, reg_info)
587217721Speter    unsigned char *p, *end;
587317721Speter    register_info_type *reg_info;
587417721Speter{
587517721Speter  int mcnt;
587617721Speter  unsigned char *p1 = p;
587725839Speter
587817721Speter  while (p1 < end)
587917721Speter    {
588025839Speter      /* Skip over opcodes that can match nothing, and break when we get
588117721Speter         to one that can't.  */
588225839Speter
588317721Speter      switch ((re_opcode_t) *p1)
588417721Speter        {
588517721Speter	/* It's a loop.  */
588617721Speter        case on_failure_jump:
588717721Speter          p1++;
588817721Speter          EXTRACT_NUMBER_AND_INCR (mcnt, p1);
588917721Speter          p1 += mcnt;
589017721Speter          break;
589125839Speter
589225839Speter	default:
589317721Speter          if (!common_op_match_null_string_p (&p1, end, reg_info))
589417721Speter            return false;
589517721Speter        }
589617721Speter    }  /* while p1 < end */
589717721Speter
589817721Speter  return true;
589917721Speter} /* alt_match_null_string_p */
590017721Speter
590117721Speter
590217721Speter/* Deals with the ops common to group_match_null_string_p and
590325839Speter   alt_match_null_string_p.
590425839Speter
590517721Speter   Sets P to one after the op and its arguments, if any.  */
590617721Speter
590717721Speterstatic boolean
590817721Spetercommon_op_match_null_string_p (p, end, reg_info)
590917721Speter    unsigned char **p, *end;
591017721Speter    register_info_type *reg_info;
591117721Speter{
591217721Speter  int mcnt;
591317721Speter  boolean ret;
591417721Speter  int reg_no;
591517721Speter  unsigned char *p1 = *p;
591617721Speter
591717721Speter  switch ((re_opcode_t) *p1++)
591817721Speter    {
591917721Speter    case no_op:
592017721Speter    case begline:
592117721Speter    case endline:
592217721Speter    case begbuf:
592317721Speter    case endbuf:
592417721Speter    case wordbeg:
592517721Speter    case wordend:
592617721Speter    case wordbound:
592717721Speter    case notwordbound:
592817721Speter#ifdef emacs
592917721Speter    case before_dot:
593017721Speter    case at_dot:
593117721Speter    case after_dot:
593217721Speter#endif
593317721Speter      break;
593417721Speter
593517721Speter    case start_memory:
593617721Speter      reg_no = *p1;
593717721Speter      assert (reg_no > 0 && reg_no <= MAX_REGNUM);
593817721Speter      ret = group_match_null_string_p (&p1, end, reg_info);
593925839Speter
594017721Speter      /* Have to set this here in case we're checking a group which
594117721Speter         contains a group and a back reference to it.  */
594217721Speter
594317721Speter      if (REG_MATCH_NULL_STRING_P (reg_info[reg_no]) == MATCH_NULL_UNSET_VALUE)
594417721Speter        REG_MATCH_NULL_STRING_P (reg_info[reg_no]) = ret;
594517721Speter
594617721Speter      if (!ret)
594717721Speter        return false;
594817721Speter      break;
594925839Speter
595017721Speter    /* If this is an optimized succeed_n for zero times, make the jump.  */
595117721Speter    case jump:
595217721Speter      EXTRACT_NUMBER_AND_INCR (mcnt, p1);
595317721Speter      if (mcnt >= 0)
595417721Speter        p1 += mcnt;
595517721Speter      else
595617721Speter        return false;
595717721Speter      break;
595817721Speter
595917721Speter    case succeed_n:
596017721Speter      /* Get to the number of times to succeed.  */
596125839Speter      p1 += 2;
596217721Speter      EXTRACT_NUMBER_AND_INCR (mcnt, p1);
596317721Speter
596417721Speter      if (mcnt == 0)
596517721Speter        {
596617721Speter          p1 -= 4;
596717721Speter          EXTRACT_NUMBER_AND_INCR (mcnt, p1);
596817721Speter          p1 += mcnt;
596917721Speter        }
597017721Speter      else
597117721Speter        return false;
597217721Speter      break;
597317721Speter
597425839Speter    case duplicate:
597517721Speter      if (!REG_MATCH_NULL_STRING_P (reg_info[*p1]))
597617721Speter        return false;
597717721Speter      break;
597817721Speter
597917721Speter    case set_number_at:
598017721Speter      p1 += 4;
598117721Speter
598217721Speter    default:
598317721Speter      /* All other opcodes mean we cannot match the empty string.  */
598417721Speter      return false;
598517721Speter  }
598617721Speter
598717721Speter  *p = p1;
598817721Speter  return true;
598917721Speter} /* common_op_match_null_string_p */
599017721Speter
599117721Speter
599217721Speter/* Return zero if TRANSLATE[S1] and TRANSLATE[S2] are identical for LEN
599317721Speter   bytes; nonzero otherwise.  */
599425839Speter
599517721Speterstatic int
599617721Speterbcmp_translate (s1, s2, len, translate)
599717721Speter     unsigned char *s1, *s2;
599817721Speter     register int len;
599966525Speter     RE_TRANSLATE_TYPE translate;
600017721Speter{
600117721Speter  register unsigned char *p1 = s1, *p2 = s2;
600266525Speter  unsigned char *p1_end = s1 + len;
600366525Speter  unsigned char *p2_end = s2 + len;
600466525Speter
600566525Speter  while (p1 != p1_end && p2 != p2_end)
600617721Speter    {
600766525Speter      int p1_charlen, p2_charlen;
600866525Speter      int p1_ch, p2_ch;
600966525Speter
601066525Speter      p1_ch = STRING_CHAR_AND_LENGTH (p1, p1_end - p1, p1_charlen);
601166525Speter      p2_ch = STRING_CHAR_AND_LENGTH (p2, p2_end - p2, p2_charlen);
601266525Speter
601366525Speter      if (RE_TRANSLATE (translate, p1_ch)
601466525Speter	  != RE_TRANSLATE (translate, p2_ch))
601566525Speter	return 1;
601666525Speter
601766525Speter      p1 += p1_charlen, p2 += p2_charlen;
601817721Speter    }
601966525Speter
602066525Speter  if (p1 != p1_end || p2 != p2_end)
602166525Speter    return 1;
602266525Speter
602317721Speter  return 0;
602417721Speter}
602517721Speter
602617721Speter/* Entry points for GNU code.  */
602717721Speter
602817721Speter/* re_compile_pattern is the GNU regular expression compiler: it
602917721Speter   compiles PATTERN (of length SIZE) and puts the result in BUFP.
603017721Speter   Returns 0 if the pattern was valid, otherwise an error string.
603125839Speter
603217721Speter   Assumes the `allocated' (and perhaps `buffer') and `translate' fields
603317721Speter   are set in BUFP on entry.
603425839Speter
603517721Speter   We call regex_compile to do the actual compilation.  */
603617721Speter
603717721Speterconst char *
603817721Speterre_compile_pattern (pattern, length, bufp)
603917721Speter     const char *pattern;
604017721Speter     int length;
604117721Speter     struct re_pattern_buffer *bufp;
604217721Speter{
604317721Speter  reg_errcode_t ret;
604425839Speter
604517721Speter  /* GNU code is written to assume at least RE_NREGS registers will be set
604617721Speter     (and at least one extra will be -1).  */
604717721Speter  bufp->regs_allocated = REGS_UNALLOCATED;
604825839Speter
604917721Speter  /* And GNU code determines whether or not to get register information
605017721Speter     by passing null for the REGS argument to re_match, etc., not by
605117721Speter     setting no_sub.  */
605217721Speter  bufp->no_sub = 0;
605325839Speter
605417721Speter  /* Match anchors at newline.  */
605517721Speter  bufp->newline_anchor = 1;
605625839Speter
605717721Speter  ret = regex_compile (pattern, length, re_syntax_options, bufp);
605817721Speter
605966525Speter  if (!ret)
606066525Speter    return NULL;
606166525Speter  return gettext (re_error_msgid[(int) ret]);
606266525Speter}
606317721Speter
606417721Speter/* Entry points compatible with 4.2 BSD regex library.  We don't define
606566525Speter   them unless specifically requested.  */
606617721Speter
606766525Speter#if defined (_REGEX_RE_COMP) || defined (_LIBC)
606817721Speter
606917721Speter/* BSD has one and only one pattern buffer.  */
607017721Speterstatic struct re_pattern_buffer re_comp_buf;
607117721Speter
607217721Speterchar *
607366525Speter#ifdef _LIBC
607466525Speter/* Make these definitions weak in libc, so POSIX programs can redefine
607566525Speter   these names if they don't use our functions, and still use
607666525Speter   regcomp/regexec below without link errors.  */
607766525Speterweak_function
607866525Speter#endif
607917721Speterre_comp (s)
608017721Speter    const char *s;
608117721Speter{
608217721Speter  reg_errcode_t ret;
608325839Speter
608417721Speter  if (!s)
608517721Speter    {
608617721Speter      if (!re_comp_buf.buffer)
6087175261Sobrien	return (char *) gettext ("No previous regular expression");
608817721Speter      return 0;
608917721Speter    }
609017721Speter
609117721Speter  if (!re_comp_buf.buffer)
609217721Speter    {
609317721Speter      re_comp_buf.buffer = (unsigned char *) malloc (200);
609417721Speter      if (re_comp_buf.buffer == NULL)
609566525Speter        /* CVS: Yes, we're discarding `const' here if !HAVE_LIBINTL.  */
609666525Speter        return (char *) gettext (re_error_msgid[(int) REG_ESPACE]);
609717721Speter      re_comp_buf.allocated = 200;
609817721Speter
609917721Speter      re_comp_buf.fastmap = (char *) malloc (1 << BYTEWIDTH);
610017721Speter      if (re_comp_buf.fastmap == NULL)
610166525Speter	/* CVS: Yes, we're discarding `const' here if !HAVE_LIBINTL.  */
610266525Speter	return (char *) gettext (re_error_msgid[(int) REG_ESPACE]);
610317721Speter    }
610417721Speter
610517721Speter  /* Since `re_exec' always passes NULL for the `regs' argument, we
610617721Speter     don't need to initialize the pattern buffer fields which affect it.  */
610717721Speter
610817721Speter  /* Match anchors at newlines.  */
610917721Speter  re_comp_buf.newline_anchor = 1;
611017721Speter
611117721Speter  ret = regex_compile (s, strlen (s), re_syntax_options, &re_comp_buf);
611266525Speter
611366525Speter  if (!ret)
611466525Speter    return NULL;
611566525Speter
611666525Speter  /* Yes, we're discarding `const' here if !HAVE_LIBINTL.  */
611766525Speter  return (char *) gettext (re_error_msgid[(int) ret]);
611817721Speter}
611917721Speter
612017721Speter
612117721Speterint
612266525Speter#ifdef _LIBC
612366525Speterweak_function
612466525Speter#endif
612517721Speterre_exec (s)
612617721Speter    const char *s;
612717721Speter{
612817721Speter  const int len = strlen (s);
612917721Speter  return
613017721Speter    0 <= re_search (&re_comp_buf, s, len, 0, len, (struct re_registers *) 0);
613117721Speter}
613266525Speter#endif /* _REGEX_RE_COMP */
613317721Speter
613417721Speter/* POSIX.2 functions.  Don't define these for Emacs.  */
613517721Speter
613617721Speter#ifndef emacs
613717721Speter
613817721Speter/* regcomp takes a regular expression as a string and compiles it.
613917721Speter
614017721Speter   PREG is a regex_t *.  We do not expect any fields to be initialized,
614117721Speter   since POSIX says we shouldn't.  Thus, we set
614217721Speter
614317721Speter     `buffer' to the compiled pattern;
614417721Speter     `used' to the length of the compiled pattern;
614517721Speter     `syntax' to RE_SYNTAX_POSIX_EXTENDED if the
614617721Speter       REG_EXTENDED bit in CFLAGS is set; otherwise, to
614717721Speter       RE_SYNTAX_POSIX_BASIC;
614817721Speter     `newline_anchor' to REG_NEWLINE being set in CFLAGS;
614917721Speter     `fastmap' and `fastmap_accurate' to zero;
615017721Speter     `re_nsub' to the number of subexpressions in PATTERN.
615117721Speter
615217721Speter   PATTERN is the address of the pattern string.
615317721Speter
615417721Speter   CFLAGS is a series of bits which affect compilation.
615517721Speter
615617721Speter     If REG_EXTENDED is set, we use POSIX extended syntax; otherwise, we
615717721Speter     use POSIX basic syntax.
615817721Speter
615917721Speter     If REG_NEWLINE is set, then . and [^...] don't match newline.
616017721Speter     Also, regexec will try a match beginning after every newline.
616117721Speter
616217721Speter     If REG_ICASE is set, then we considers upper- and lowercase
616317721Speter     versions of letters to be equivalent when matching.
616417721Speter
616517721Speter     If REG_NOSUB is set, then when PREG is passed to regexec, that
616617721Speter     routine will report only success or failure, and nothing about the
616717721Speter     registers.
616817721Speter
616917721Speter   It returns 0 if it succeeds, nonzero if it doesn't.  (See regex.h for
617017721Speter   the return codes and their meanings.)  */
617117721Speter
617217721Speterint
617317721Speterregcomp (preg, pattern, cflags)
617417721Speter    regex_t *preg;
617525839Speter    const char *pattern;
617617721Speter    int cflags;
617717721Speter{
617817721Speter  reg_errcode_t ret;
617917721Speter  unsigned syntax
618017721Speter    = (cflags & REG_EXTENDED) ?
618117721Speter      RE_SYNTAX_POSIX_EXTENDED : RE_SYNTAX_POSIX_BASIC;
618217721Speter
618317721Speter  /* regex_compile will allocate the space for the compiled pattern.  */
618417721Speter  preg->buffer = 0;
618517721Speter  preg->allocated = 0;
618666525Speter  preg->used = 0;
618725839Speter
618817721Speter  /* Don't bother to use a fastmap when searching.  This simplifies the
618917721Speter     REG_NEWLINE case: if we used a fastmap, we'd have to put all the
619017721Speter     characters after newlines into the fastmap.  This way, we just try
619117721Speter     every character.  */
619217721Speter  preg->fastmap = 0;
619325839Speter
619417721Speter  if (cflags & REG_ICASE)
619517721Speter    {
619617721Speter      unsigned i;
619725839Speter
619866525Speter      preg->translate
619966525Speter	= (RE_TRANSLATE_TYPE) malloc (CHAR_SET_SIZE
620066525Speter				      * sizeof (*(RE_TRANSLATE_TYPE)0));
620117721Speter      if (preg->translate == NULL)
620217721Speter        return (int) REG_ESPACE;
620317721Speter
620417721Speter      /* Map uppercase characters to corresponding lowercase ones.  */
620517721Speter      for (i = 0; i < CHAR_SET_SIZE; i++)
620617721Speter        preg->translate[i] = ISUPPER (i) ? tolower (i) : i;
620717721Speter    }
620817721Speter  else
620917721Speter    preg->translate = NULL;
621017721Speter
621117721Speter  /* If REG_NEWLINE is set, newlines are treated differently.  */
621217721Speter  if (cflags & REG_NEWLINE)
621317721Speter    { /* REG_NEWLINE implies neither . nor [^...] match newline.  */
621417721Speter      syntax &= ~RE_DOT_NEWLINE;
621517721Speter      syntax |= RE_HAT_LISTS_NOT_NEWLINE;
621617721Speter      /* It also changes the matching behavior.  */
621717721Speter      preg->newline_anchor = 1;
621817721Speter    }
621917721Speter  else
622017721Speter    preg->newline_anchor = 0;
622117721Speter
622217721Speter  preg->no_sub = !!(cflags & REG_NOSUB);
622317721Speter
622425839Speter  /* POSIX says a null character in the pattern terminates it, so we
622517721Speter     can use strlen here in compiling the pattern.  */
622617721Speter  ret = regex_compile (pattern, strlen (pattern), syntax, preg);
622725839Speter
622817721Speter  /* POSIX doesn't distinguish between an unmatched open-group and an
622917721Speter     unmatched close-group: both are REG_EPAREN.  */
623017721Speter  if (ret == REG_ERPAREN) ret = REG_EPAREN;
623125839Speter
623217721Speter  return (int) ret;
623317721Speter}
623417721Speter
623517721Speter
623617721Speter/* regexec searches for a given pattern, specified by PREG, in the
623717721Speter   string STRING.
623825839Speter
623917721Speter   If NMATCH is zero or REG_NOSUB was set in the cflags argument to
624017721Speter   `regcomp', we ignore PMATCH.  Otherwise, we assume PMATCH has at
624117721Speter   least NMATCH elements, and we set them to the offsets of the
624217721Speter   corresponding matched substrings.
624325839Speter
624417721Speter   EFLAGS specifies `execution flags' which affect matching: if
624517721Speter   REG_NOTBOL is set, then ^ does not match at the beginning of the
624617721Speter   string; if REG_NOTEOL is set, then $ does not match at the end.
624725839Speter
624817721Speter   We return 0 if we find a match and REG_NOMATCH if not.  */
624917721Speter
625017721Speterint
625117721Speterregexec (preg, string, nmatch, pmatch, eflags)
625217721Speter    const regex_t *preg;
625325839Speter    const char *string;
625425839Speter    size_t nmatch;
625525839Speter    regmatch_t pmatch[];
625617721Speter    int eflags;
625717721Speter{
625817721Speter  int ret;
625917721Speter  struct re_registers regs;
626017721Speter  regex_t private_preg;
626117721Speter  int len = strlen (string);
626217721Speter  boolean want_reg_info = !preg->no_sub && nmatch > 0;
626317721Speter
626417721Speter  private_preg = *preg;
626525839Speter
626617721Speter  private_preg.not_bol = !!(eflags & REG_NOTBOL);
626717721Speter  private_preg.not_eol = !!(eflags & REG_NOTEOL);
626825839Speter
626917721Speter  /* The user has told us exactly how many registers to return
627017721Speter     information about, via `nmatch'.  We have to pass that on to the
627117721Speter     matching routines.  */
627217721Speter  private_preg.regs_allocated = REGS_FIXED;
627325839Speter
627417721Speter  if (want_reg_info)
627517721Speter    {
627617721Speter      regs.num_regs = nmatch;
627717721Speter      regs.start = TALLOC (nmatch, regoff_t);
627817721Speter      regs.end = TALLOC (nmatch, regoff_t);
627917721Speter      if (regs.start == NULL || regs.end == NULL)
628017721Speter        return (int) REG_NOMATCH;
628117721Speter    }
628217721Speter
628317721Speter  /* Perform the searching operation.  */
628417721Speter  ret = re_search (&private_preg, string, len,
628517721Speter                   /* start: */ 0, /* range: */ len,
628617721Speter                   want_reg_info ? &regs : (struct re_registers *) 0);
628725839Speter
628817721Speter  /* Copy the register information to the POSIX structure.  */
628917721Speter  if (want_reg_info)
629017721Speter    {
629117721Speter      if (ret >= 0)
629217721Speter        {
629317721Speter          unsigned r;
629417721Speter
629517721Speter          for (r = 0; r < nmatch; r++)
629617721Speter            {
629717721Speter              pmatch[r].rm_so = regs.start[r];
629817721Speter              pmatch[r].rm_eo = regs.end[r];
629917721Speter            }
630017721Speter        }
630117721Speter
630217721Speter      /* If we needed the temporary register info, free the space now.  */
630317721Speter      free (regs.start);
630417721Speter      free (regs.end);
630517721Speter    }
630617721Speter
630717721Speter  /* We want zero return to mean success, unlike `re_search'.  */
630817721Speter  return ret >= 0 ? (int) REG_NOERROR : (int) REG_NOMATCH;
630917721Speter}
631017721Speter
631117721Speter
631217721Speter/* Returns a message corresponding to an error code, ERRCODE, returned
631317721Speter   from either regcomp or regexec.   We don't use PREG here.  */
631417721Speter
631517721Spetersize_t
631617721Speterregerror (errcode, preg, errbuf, errbuf_size)
631717721Speter    int errcode;
631817721Speter    const regex_t *preg;
631917721Speter    char *errbuf;
632017721Speter    size_t errbuf_size;
632117721Speter{
632217721Speter  const char *msg;
632317721Speter  size_t msg_size;
632417721Speter
632517721Speter  if (errcode < 0
632666525Speter      || errcode >= (sizeof (re_error_msgid) / sizeof (re_error_msgid[0])))
632725839Speter    /* Only error codes returned by the rest of the code should be passed
632817721Speter       to this routine.  If we are given anything else, or if other regex
632917721Speter       code generates an invalid error code, then the program has a bug.
633017721Speter       Dump core so we can fix it.  */
633117721Speter    abort ();
633217721Speter
633366525Speter  msg = gettext (re_error_msgid[errcode]);
633417721Speter
633517721Speter  msg_size = strlen (msg) + 1; /* Includes the null.  */
633625839Speter
633717721Speter  if (errbuf_size != 0)
633817721Speter    {
633917721Speter      if (msg_size > errbuf_size)
634017721Speter        {
634117721Speter          strncpy (errbuf, msg, errbuf_size - 1);
634217721Speter          errbuf[errbuf_size - 1] = 0;
634317721Speter        }
634417721Speter      else
634517721Speter        strcpy (errbuf, msg);
634617721Speter    }
634717721Speter
634817721Speter  return msg_size;
634917721Speter}
635017721Speter
635117721Speter
635217721Speter/* Free dynamically allocated space used by PREG.  */
635317721Speter
635417721Spetervoid
635517721Speterregfree (preg)
635617721Speter    regex_t *preg;
635717721Speter{
635817721Speter  if (preg->buffer != NULL)
635917721Speter    free (preg->buffer);
636017721Speter  preg->buffer = NULL;
636125839Speter
636217721Speter  preg->allocated = 0;
636317721Speter  preg->used = 0;
636417721Speter
636517721Speter  if (preg->fastmap != NULL)
636617721Speter    free (preg->fastmap);
636717721Speter  preg->fastmap = NULL;
636817721Speter  preg->fastmap_accurate = 0;
636917721Speter
637017721Speter  if (preg->translate != NULL)
637117721Speter    free (preg->translate);
637217721Speter  preg->translate = NULL;
637317721Speter}
637417721Speter
637517721Speter#endif /* not emacs  */
6376