1/* Extended regular expression matching and search library, version
2   0.12.  (Implements POSIX draft P1003.2/D11.2, except for some of the
3   internationalization features.)
4
5   Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
6                 2002, 2003, 2004, 2005, 2006, 2007  Free Software Foundation, Inc.
7
8   This program is free software; you can redistribute it and/or modify
9   it under the terms of the GNU General Public License as published by
10   the Free Software Foundation; either version 2, or (at your option)
11   any later version.
12
13   This program is distributed in the hope that it will be useful,
14   but WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16   GNU General Public License for more details.
17
18   You should have received a copy of the GNU General Public License
19   along with this program; if not, write to the Free Software
20   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
21   USA.  */
22
23/* TODO:
24   - structure the opcode space into opcode+flag.
25   - merge with glibc's regex.[ch].
26   - replace (succeed_n + jump_n + set_number_at) with something that doesn't
27     need to modify the compiled regexp so that re_match can be reentrant.
28   - get rid of on_failure_jump_smart by doing the optimization in re_comp
29     rather than at run-time, so that re_match can be reentrant.
30*/
31
32/* AIX requires this to be the first thing in the file. */
33#if defined _AIX && !defined REGEX_MALLOC
34  #pragma alloca
35#endif
36
37#ifdef HAVE_CONFIG_H
38# include <config.h>
39#endif
40
41#if defined STDC_HEADERS
42# include <stddef.h>
43#else
44/* We need this for `regex.h', and perhaps for the Emacs include files.  */
45# include <sys/types.h>
46#endif
47
48/* Whether to use ISO C Amendment 1 wide char functions.
49   Those should not be used for Emacs since it uses its own.  */
50#if defined _LIBC
51#define WIDE_CHAR_SUPPORT 1
52#else
53#define WIDE_CHAR_SUPPORT \
54	(HAVE_WCTYPE_H && HAVE_WCHAR_H && HAVE_BTOWC && !emacs)
55#endif
56
57/* For platform which support the ISO C amendement 1 functionality we
58   support user defined character classes.  */
59#if WIDE_CHAR_SUPPORT
60/* Solaris 2.5 has a bug: <wchar.h> must be included before <wctype.h>.  */
61# include <wchar.h>
62# include <wctype.h>
63#endif
64
65#ifdef _LIBC
66/* We have to keep the namespace clean.  */
67# define regfree(preg) __regfree (preg)
68# define regexec(pr, st, nm, pm, ef) __regexec (pr, st, nm, pm, ef)
69# define regcomp(preg, pattern, cflags) __regcomp (preg, pattern, cflags)
70# define regerror(err_code, preg, errbuf, errbuf_size) \
71	__regerror(err_code, preg, errbuf, errbuf_size)
72# define re_set_registers(bu, re, nu, st, en) \
73	__re_set_registers (bu, re, nu, st, en)
74# define re_match_2(bufp, string1, size1, string2, size2, pos, regs, stop) \
75	__re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
76# define re_match(bufp, string, size, pos, regs) \
77	__re_match (bufp, string, size, pos, regs)
78# define re_search(bufp, string, size, startpos, range, regs) \
79	__re_search (bufp, string, size, startpos, range, regs)
80# define re_compile_pattern(pattern, length, bufp) \
81	__re_compile_pattern (pattern, length, bufp)
82# define re_set_syntax(syntax) __re_set_syntax (syntax)
83# define re_search_2(bufp, st1, s1, st2, s2, startpos, range, regs, stop) \
84	__re_search_2 (bufp, st1, s1, st2, s2, startpos, range, regs, stop)
85# define re_compile_fastmap(bufp) __re_compile_fastmap (bufp)
86
87/* Make sure we call libc's function even if the user overrides them.  */
88# define btowc __btowc
89# define iswctype __iswctype
90# define wctype __wctype
91
92# define WEAK_ALIAS(a,b) weak_alias (a, b)
93
94/* We are also using some library internals.  */
95# include <locale/localeinfo.h>
96# include <locale/elem-hash.h>
97# include <langinfo.h>
98#else
99# define WEAK_ALIAS(a,b)
100#endif
101
102/* This is for other GNU distributions with internationalized messages.  */
103#if HAVE_LIBINTL_H || defined _LIBC
104# include <libintl.h>
105#else
106# define gettext(msgid) (msgid)
107#endif
108
109#ifndef gettext_noop
110/* This define is so xgettext can find the internationalizable
111   strings.  */
112# define gettext_noop(String) String
113#endif
114
115/* The `emacs' switch turns on certain matching commands
116   that make sense only in Emacs. */
117#ifdef emacs
118
119# include "lisp.h"
120# include "buffer.h"
121
122/* Make syntax table lookup grant data in gl_state.  */
123# define SYNTAX_ENTRY_VIA_PROPERTY
124
125# include "syntax.h"
126# include "charset.h"
127# include "category.h"
128
129# ifdef malloc
130#  undef malloc
131# endif
132# define malloc xmalloc
133# ifdef realloc
134#  undef realloc
135# endif
136# define realloc xrealloc
137# ifdef free
138#  undef free
139# endif
140# define free xfree
141
142/* Converts the pointer to the char to BEG-based offset from the start.  */
143# define PTR_TO_OFFSET(d) POS_AS_IN_BUFFER (POINTER_TO_OFFSET (d))
144# define POS_AS_IN_BUFFER(p) ((p) + (NILP (re_match_object) || BUFFERP (re_match_object)))
145
146# define RE_MULTIBYTE_P(bufp) ((bufp)->multibyte)
147# define RE_STRING_CHAR(p, s) \
148  (multibyte ? (STRING_CHAR (p, s)) : (*(p)))
149# define RE_STRING_CHAR_AND_LENGTH(p, s, len) \
150  (multibyte ? (STRING_CHAR_AND_LENGTH (p, s, len)) : ((len) = 1, *(p)))
151
152/* Set C a (possibly multibyte) character before P.  P points into a
153   string which is the virtual concatenation of STR1 (which ends at
154   END1) or STR2 (which ends at END2).  */
155# define GET_CHAR_BEFORE_2(c, p, str1, end1, str2, end2)		\
156  do {									\
157    if (multibyte)							\
158       {						    		\
159	 re_char *dtemp = (p) == (str2) ? (end1) : (p);		    	\
160	 re_char *dlimit = ((p) > (str2) && (p) <= (end2)) ? (str2) : (str1); \
161	 re_char *d0 = dtemp;						\
162	 PREV_CHAR_BOUNDARY (d0, dlimit);				\
163	 c = STRING_CHAR (d0, dtemp - d0);				\
164       }						    		\
165     else						    		\
166       (c = ((p) == (str2) ? (end1) : (p))[-1]);			\
167  } while (0)
168
169
170#else  /* not emacs */
171
172/* If we are not linking with Emacs proper,
173   we can't use the relocating allocator
174   even if config.h says that we can.  */
175# undef REL_ALLOC
176
177# if defined STDC_HEADERS || defined _LIBC
178#  include <stdlib.h>
179# else
180char *malloc ();
181char *realloc ();
182# endif
183
184/* When used in Emacs's lib-src, we need xmalloc and xrealloc. */
185
186void *
187xmalloc (size)
188     size_t size;
189{
190  register void *val;
191  val = (void *) malloc (size);
192  if (!val && size)
193    {
194      write (2, "virtual memory exhausted\n", 25);
195      exit (1);
196    }
197  return val;
198}
199
200void *
201xrealloc (block, size)
202     void *block;
203     size_t size;
204{
205  register void *val;
206  /* We must call malloc explicitly when BLOCK is 0, since some
207     reallocs don't do this.  */
208  if (! block)
209    val = (void *) malloc (size);
210  else
211    val = (void *) realloc (block, size);
212  if (!val && size)
213    {
214      write (2, "virtual memory exhausted\n", 25);
215      exit (1);
216    }
217  return val;
218}
219
220# ifdef malloc
221#  undef malloc
222# endif
223# define malloc xmalloc
224# ifdef realloc
225#  undef realloc
226# endif
227# define realloc xrealloc
228
229/* When used in Emacs's lib-src, we need to get bzero and bcopy somehow.
230   If nothing else has been done, use the method below.  */
231# ifdef INHIBIT_STRING_HEADER
232#  if !(defined HAVE_BZERO && defined HAVE_BCOPY)
233#   if !defined bzero && !defined bcopy
234#    undef INHIBIT_STRING_HEADER
235#   endif
236#  endif
237# endif
238
239/* This is the normal way of making sure we have memcpy, memcmp and bzero.
240   This is used in most programs--a few other programs avoid this
241   by defining INHIBIT_STRING_HEADER.  */
242# ifndef INHIBIT_STRING_HEADER
243#  if defined HAVE_STRING_H || defined STDC_HEADERS || defined _LIBC
244#   include <string.h>
245#   ifndef bzero
246#    ifndef _LIBC
247#     define bzero(s, n)	(memset (s, '\0', n), (s))
248#    else
249#     define bzero(s, n)	__bzero (s, n)
250#    endif
251#   endif
252#  else
253#   include <strings.h>
254#   ifndef memcmp
255#    define memcmp(s1, s2, n)	bcmp (s1, s2, n)
256#   endif
257#   ifndef memcpy
258#    define memcpy(d, s, n)	(bcopy (s, d, n), (d))
259#   endif
260#  endif
261# endif
262
263/* Define the syntax stuff for \<, \>, etc.  */
264
265/* Sword must be nonzero for the wordchar pattern commands in re_match_2.  */
266enum syntaxcode { Swhitespace = 0, Sword = 1, Ssymbol = 2 };
267
268# ifdef SWITCH_ENUM_BUG
269#  define SWITCH_ENUM_CAST(x) ((int)(x))
270# else
271#  define SWITCH_ENUM_CAST(x) (x)
272# endif
273
274/* Dummy macros for non-Emacs environments.  */
275# define BASE_LEADING_CODE_P(c) (0)
276# define CHAR_CHARSET(c) 0
277# define CHARSET_LEADING_CODE_BASE(c) 0
278# define MAX_MULTIBYTE_LENGTH 1
279# define RE_MULTIBYTE_P(x) 0
280# define WORD_BOUNDARY_P(c1, c2) (0)
281# define CHAR_HEAD_P(p) (1)
282# define SINGLE_BYTE_CHAR_P(c) (1)
283# define SAME_CHARSET_P(c1, c2) (1)
284# define MULTIBYTE_FORM_LENGTH(p, s) (1)
285# define PREV_CHAR_BOUNDARY(p, limit) ((p)--)
286# define STRING_CHAR(p, s) (*(p))
287# define RE_STRING_CHAR STRING_CHAR
288# define CHAR_STRING(c, s) (*(s) = (c), 1)
289# define STRING_CHAR_AND_LENGTH(p, s, actual_len) ((actual_len) = 1, *(p))
290# define RE_STRING_CHAR_AND_LENGTH STRING_CHAR_AND_LENGTH
291# define GET_CHAR_BEFORE_2(c, p, str1, end1, str2, end2) \
292  (c = ((p) == (str2) ? *((end1) - 1) : *((p) - 1)))
293# define MAKE_CHAR(charset, c1, c2) (c1)
294#endif /* not emacs */
295
296#ifndef RE_TRANSLATE
297# define RE_TRANSLATE(TBL, C) ((unsigned char)(TBL)[C])
298# define RE_TRANSLATE_P(TBL) (TBL)
299#endif
300
301/* Get the interface, including the syntax bits.  */
302#include "regex.h"
303
304/* isalpha etc. are used for the character classes.  */
305#include <ctype.h>
306
307#ifdef emacs
308
309/* 1 if C is an ASCII character.  */
310# define IS_REAL_ASCII(c) ((c) < 0200)
311
312/* 1 if C is a unibyte character.  */
313# define ISUNIBYTE(c) (SINGLE_BYTE_CHAR_P ((c)))
314
315/* The Emacs definitions should not be directly affected by locales.  */
316
317/* In Emacs, these are only used for single-byte characters.  */
318# define ISDIGIT(c) ((c) >= '0' && (c) <= '9')
319# define ISCNTRL(c) ((c) < ' ')
320# define ISXDIGIT(c) (((c) >= '0' && (c) <= '9')		\
321		     || ((c) >= 'a' && (c) <= 'f')	\
322		     || ((c) >= 'A' && (c) <= 'F'))
323
324/* This is only used for single-byte characters.  */
325# define ISBLANK(c) ((c) == ' ' || (c) == '\t')
326
327/* The rest must handle multibyte characters.  */
328
329# define ISGRAPH(c) (SINGLE_BYTE_CHAR_P (c)				\
330		    ? (c) > ' ' && !((c) >= 0177 && (c) <= 0237)	\
331		    : 1)
332
333# define ISPRINT(c) (SINGLE_BYTE_CHAR_P (c)				\
334		    ? (c) >= ' ' && !((c) >= 0177 && (c) <= 0237)	\
335		    : 1)
336
337# define ISALNUM(c) (IS_REAL_ASCII (c)			\
338		    ? (((c) >= 'a' && (c) <= 'z')	\
339		       || ((c) >= 'A' && (c) <= 'Z')	\
340		       || ((c) >= '0' && (c) <= '9'))	\
341		    : SYNTAX (c) == Sword)
342
343# define ISALPHA(c) (IS_REAL_ASCII (c)			\
344		    ? (((c) >= 'a' && (c) <= 'z')	\
345		       || ((c) >= 'A' && (c) <= 'Z'))	\
346		    : SYNTAX (c) == Sword)
347
348# define ISLOWER(c) (LOWERCASEP (c))
349
350# define ISPUNCT(c) (IS_REAL_ASCII (c)				\
351		    ? ((c) > ' ' && (c) < 0177			\
352		       && !(((c) >= 'a' && (c) <= 'z')		\
353		            || ((c) >= 'A' && (c) <= 'Z')	\
354		            || ((c) >= '0' && (c) <= '9')))	\
355		    : SYNTAX (c) != Sword)
356
357# define ISSPACE(c) (SYNTAX (c) == Swhitespace)
358
359# define ISUPPER(c) (UPPERCASEP (c))
360
361# define ISWORD(c) (SYNTAX (c) == Sword)
362
363#else /* not emacs */
364
365/* Jim Meyering writes:
366
367   "... Some ctype macros are valid only for character codes that
368   isascii says are ASCII (SGI's IRIX-4.0.5 is one such system --when
369   using /bin/cc or gcc but without giving an ansi option).  So, all
370   ctype uses should be through macros like ISPRINT...  If
371   STDC_HEADERS is defined, then autoconf has verified that the ctype
372   macros don't need to be guarded with references to isascii. ...
373   Defining isascii to 1 should let any compiler worth its salt
374   eliminate the && through constant folding."
375   Solaris defines some of these symbols so we must undefine them first.  */
376
377# undef ISASCII
378# if defined STDC_HEADERS || (!defined isascii && !defined HAVE_ISASCII)
379#  define ISASCII(c) 1
380# else
381#  define ISASCII(c) isascii(c)
382# endif
383
384/* 1 if C is an ASCII character.  */
385# define IS_REAL_ASCII(c) ((c) < 0200)
386
387/* This distinction is not meaningful, except in Emacs.  */
388# define ISUNIBYTE(c) 1
389
390# ifdef isblank
391#  define ISBLANK(c) (ISASCII (c) && isblank (c))
392# else
393#  define ISBLANK(c) ((c) == ' ' || (c) == '\t')
394# endif
395# ifdef isgraph
396#  define ISGRAPH(c) (ISASCII (c) && isgraph (c))
397# else
398#  define ISGRAPH(c) (ISASCII (c) && isprint (c) && !isspace (c))
399# endif
400
401# undef ISPRINT
402# define ISPRINT(c) (ISASCII (c) && isprint (c))
403# define ISDIGIT(c) (ISASCII (c) && isdigit (c))
404# define ISALNUM(c) (ISASCII (c) && isalnum (c))
405# define ISALPHA(c) (ISASCII (c) && isalpha (c))
406# define ISCNTRL(c) (ISASCII (c) && iscntrl (c))
407# define ISLOWER(c) (ISASCII (c) && islower (c))
408# define ISPUNCT(c) (ISASCII (c) && ispunct (c))
409# define ISSPACE(c) (ISASCII (c) && isspace (c))
410# define ISUPPER(c) (ISASCII (c) && isupper (c))
411# define ISXDIGIT(c) (ISASCII (c) && isxdigit (c))
412
413# define ISWORD(c) ISALPHA(c)
414
415# ifdef _tolower
416#  define TOLOWER(c) _tolower(c)
417# else
418#  define TOLOWER(c) tolower(c)
419# endif
420
421/* How many characters in the character set.  */
422# define CHAR_SET_SIZE 256
423
424# ifdef SYNTAX_TABLE
425
426extern char *re_syntax_table;
427
428# else /* not SYNTAX_TABLE */
429
430static char re_syntax_table[CHAR_SET_SIZE];
431
432static void
433init_syntax_once ()
434{
435   register int c;
436   static int done = 0;
437
438   if (done)
439     return;
440
441   bzero (re_syntax_table, sizeof re_syntax_table);
442
443   for (c = 0; c < CHAR_SET_SIZE; ++c)
444     if (ISALNUM (c))
445	re_syntax_table[c] = Sword;
446
447   re_syntax_table['_'] = Ssymbol;
448
449   done = 1;
450}
451
452# endif /* not SYNTAX_TABLE */
453
454# define SYNTAX(c) re_syntax_table[(c)]
455
456#endif /* not emacs */
457
458#ifndef NULL
459# define NULL (void *)0
460#endif
461
462/* We remove any previous definition of `SIGN_EXTEND_CHAR',
463   since ours (we hope) works properly with all combinations of
464   machines, compilers, `char' and `unsigned char' argument types.
465   (Per Bothner suggested the basic approach.)  */
466#undef SIGN_EXTEND_CHAR
467#if __STDC__
468# define SIGN_EXTEND_CHAR(c) ((signed char) (c))
469#else  /* not __STDC__ */
470/* As in Harbison and Steele.  */
471# define SIGN_EXTEND_CHAR(c) ((((unsigned char) (c)) ^ 128) - 128)
472#endif
473
474/* Should we use malloc or alloca?  If REGEX_MALLOC is not defined, we
475   use `alloca' instead of `malloc'.  This is because using malloc in
476   re_search* or re_match* could cause memory leaks when C-g is used in
477   Emacs; also, malloc is slower and causes storage fragmentation.  On
478   the other hand, malloc is more portable, and easier to debug.
479
480   Because we sometimes use alloca, some routines have to be macros,
481   not functions -- `alloca'-allocated space disappears at the end of the
482   function it is called in.  */
483
484#ifdef REGEX_MALLOC
485
486# define REGEX_ALLOCATE malloc
487# define REGEX_REALLOCATE(source, osize, nsize) realloc (source, nsize)
488# define REGEX_FREE free
489
490#else /* not REGEX_MALLOC  */
491
492/* Emacs already defines alloca, sometimes.  */
493# ifndef alloca
494
495/* Make alloca work the best possible way.  */
496#  ifdef __GNUC__
497#   define alloca __builtin_alloca
498#  else /* not __GNUC__ */
499#   if HAVE_ALLOCA_H
500#    include <alloca.h>
501#   endif /* HAVE_ALLOCA_H */
502#  endif /* not __GNUC__ */
503
504# endif /* not alloca */
505
506# define REGEX_ALLOCATE alloca
507
508/* Assumes a `char *destination' variable.  */
509# define REGEX_REALLOCATE(source, osize, nsize)				\
510  (destination = (char *) alloca (nsize),				\
511   memcpy (destination, source, osize))
512
513/* No need to do anything to free, after alloca.  */
514# define REGEX_FREE(arg) ((void)0) /* Do nothing!  But inhibit gcc warning.  */
515
516#endif /* not REGEX_MALLOC */
517
518/* Define how to allocate the failure stack.  */
519
520#if defined REL_ALLOC && defined REGEX_MALLOC
521
522# define REGEX_ALLOCATE_STACK(size)				\
523  r_alloc (&failure_stack_ptr, (size))
524# define REGEX_REALLOCATE_STACK(source, osize, nsize)		\
525  r_re_alloc (&failure_stack_ptr, (nsize))
526# define REGEX_FREE_STACK(ptr)					\
527  r_alloc_free (&failure_stack_ptr)
528
529#else /* not using relocating allocator */
530
531# ifdef REGEX_MALLOC
532
533#  define REGEX_ALLOCATE_STACK malloc
534#  define REGEX_REALLOCATE_STACK(source, osize, nsize) realloc (source, nsize)
535#  define REGEX_FREE_STACK free
536
537# else /* not REGEX_MALLOC */
538
539#  define REGEX_ALLOCATE_STACK alloca
540
541#  define REGEX_REALLOCATE_STACK(source, osize, nsize)			\
542   REGEX_REALLOCATE (source, osize, nsize)
543/* No need to explicitly free anything.  */
544#  define REGEX_FREE_STACK(arg) ((void)0)
545
546# endif /* not REGEX_MALLOC */
547#endif /* not using relocating allocator */
548
549
550/* True if `size1' is non-NULL and PTR is pointing anywhere inside
551   `string1' or just past its end.  This works if PTR is NULL, which is
552   a good thing.  */
553#define FIRST_STRING_P(ptr)					\
554  (size1 && string1 <= (ptr) && (ptr) <= string1 + size1)
555
556/* (Re)Allocate N items of type T using malloc, or fail.  */
557#define TALLOC(n, t) ((t *) malloc ((n) * sizeof (t)))
558#define RETALLOC(addr, n, t) ((addr) = (t *) realloc (addr, (n) * sizeof (t)))
559#define RETALLOC_IF(addr, n, t) \
560  if (addr) RETALLOC((addr), (n), t); else (addr) = TALLOC ((n), t)
561#define REGEX_TALLOC(n, t) ((t *) REGEX_ALLOCATE ((n) * sizeof (t)))
562
563#define BYTEWIDTH 8 /* In bits.  */
564
565#define STREQ(s1, s2) ((strcmp (s1, s2) == 0))
566
567#undef MAX
568#undef MIN
569#define MAX(a, b) ((a) > (b) ? (a) : (b))
570#define MIN(a, b) ((a) < (b) ? (a) : (b))
571
572/* Type of source-pattern and string chars.  */
573typedef const unsigned char re_char;
574
575typedef char boolean;
576#define false 0
577#define true 1
578
579static int re_match_2_internal _RE_ARGS ((struct re_pattern_buffer *bufp,
580					re_char *string1, int size1,
581					re_char *string2, int size2,
582					int pos,
583					struct re_registers *regs,
584					int stop));
585
586/* These are the command codes that appear in compiled regular
587   expressions.  Some opcodes are followed by argument bytes.  A
588   command code can specify any interpretation whatsoever for its
589   arguments.  Zero bytes may appear in the compiled regular expression.  */
590
591typedef enum
592{
593  no_op = 0,
594
595  /* Succeed right away--no more backtracking.  */
596  succeed,
597
598	/* Followed by one byte giving n, then by n literal bytes.  */
599  exactn,
600
601	/* Matches any (more or less) character.  */
602  anychar,
603
604	/* Matches any one char belonging to specified set.  First
605	   following byte is number of bitmap bytes.  Then come bytes
606	   for a bitmap saying which chars are in.  Bits in each byte
607	   are ordered low-bit-first.  A character is in the set if its
608	   bit is 1.  A character too large to have a bit in the map is
609	   automatically not in the set.
610
611	   If the length byte has the 0x80 bit set, then that stuff
612	   is followed by a range table:
613	       2 bytes of flags for character sets (low 8 bits, high 8 bits)
614		   See RANGE_TABLE_WORK_BITS below.
615	       2 bytes, the number of pairs that follow (upto 32767)
616	       pairs, each 2 multibyte characters,
617		   each multibyte character represented as 3 bytes.  */
618  charset,
619
620	/* Same parameters as charset, but match any character that is
621	   not one of those specified.  */
622  charset_not,
623
624	/* Start remembering the text that is matched, for storing in a
625	   register.  Followed by one byte with the register number, in
626	   the range 0 to one less than the pattern buffer's re_nsub
627	   field.  */
628  start_memory,
629
630	/* Stop remembering the text that is matched and store it in a
631	   memory register.  Followed by one byte with the register
632	   number, in the range 0 to one less than `re_nsub' in the
633	   pattern buffer.  */
634  stop_memory,
635
636	/* Match a duplicate of something remembered. Followed by one
637	   byte containing the register number.  */
638  duplicate,
639
640	/* Fail unless at beginning of line.  */
641  begline,
642
643	/* Fail unless at end of line.  */
644  endline,
645
646	/* Succeeds if at beginning of buffer (if emacs) or at beginning
647	   of string to be matched (if not).  */
648  begbuf,
649
650	/* Analogously, for end of buffer/string.  */
651  endbuf,
652
653	/* Followed by two byte relative address to which to jump.  */
654  jump,
655
656	/* Followed by two-byte relative address of place to resume at
657	   in case of failure.  */
658  on_failure_jump,
659
660	/* Like on_failure_jump, but pushes a placeholder instead of the
661	   current string position when executed.  */
662  on_failure_keep_string_jump,
663
664	/* Just like `on_failure_jump', except that it checks that we
665	   don't get stuck in an infinite loop (matching an empty string
666	   indefinitely).  */
667  on_failure_jump_loop,
668
669	/* Just like `on_failure_jump_loop', except that it checks for
670	   a different kind of loop (the kind that shows up with non-greedy
671	   operators).  This operation has to be immediately preceded
672	   by a `no_op'.  */
673  on_failure_jump_nastyloop,
674
675	/* A smart `on_failure_jump' used for greedy * and + operators.
676	   It analyses the loop before which it is put and if the
677	   loop does not require backtracking, it changes itself to
678	   `on_failure_keep_string_jump' and short-circuits the loop,
679	   else it just defaults to changing itself into `on_failure_jump'.
680	   It assumes that it is pointing to just past a `jump'.  */
681  on_failure_jump_smart,
682
683	/* Followed by two-byte relative address and two-byte number n.
684	   After matching N times, jump to the address upon failure.
685	   Does not work if N starts at 0: use on_failure_jump_loop
686	   instead.  */
687  succeed_n,
688
689	/* Followed by two-byte relative address, and two-byte number n.
690	   Jump to the address N times, then fail.  */
691  jump_n,
692
693	/* Set the following two-byte relative address to the
694	   subsequent two-byte number.  The address *includes* the two
695	   bytes of number.  */
696  set_number_at,
697
698  wordbeg,	/* Succeeds if at word beginning.  */
699  wordend,	/* Succeeds if at word end.  */
700
701  wordbound,	/* Succeeds if at a word boundary.  */
702  notwordbound,	/* Succeeds if not at a word boundary.  */
703
704  symbeg,       /* Succeeds if at symbol beginning.  */
705  symend,       /* Succeeds if at symbol end.  */
706
707	/* Matches any character whose syntax is specified.  Followed by
708	   a byte which contains a syntax code, e.g., Sword.  */
709  syntaxspec,
710
711	/* Matches any character whose syntax is not that specified.  */
712  notsyntaxspec
713
714#ifdef emacs
715  ,before_dot,	/* Succeeds if before point.  */
716  at_dot,	/* Succeeds if at point.  */
717  after_dot,	/* Succeeds if after point.  */
718
719  /* Matches any character whose category-set contains the specified
720     category.  The operator is followed by a byte which contains a
721     category code (mnemonic ASCII character).  */
722  categoryspec,
723
724  /* Matches any character whose category-set does not contain the
725     specified category.  The operator is followed by a byte which
726     contains the category code (mnemonic ASCII character).  */
727  notcategoryspec
728#endif /* emacs */
729} re_opcode_t;
730
731/* Common operations on the compiled pattern.  */
732
733/* Store NUMBER in two contiguous bytes starting at DESTINATION.  */
734
735#define STORE_NUMBER(destination, number)				\
736  do {									\
737    (destination)[0] = (number) & 0377;					\
738    (destination)[1] = (number) >> 8;					\
739  } while (0)
740
741/* Same as STORE_NUMBER, except increment DESTINATION to
742   the byte after where the number is stored.  Therefore, DESTINATION
743   must be an lvalue.  */
744
745#define STORE_NUMBER_AND_INCR(destination, number)			\
746  do {									\
747    STORE_NUMBER (destination, number);					\
748    (destination) += 2;							\
749  } while (0)
750
751/* Put into DESTINATION a number stored in two contiguous bytes starting
752   at SOURCE.  */
753
754#define EXTRACT_NUMBER(destination, source)				\
755  do {									\
756    (destination) = *(source) & 0377;					\
757    (destination) += SIGN_EXTEND_CHAR (*((source) + 1)) << 8;		\
758  } while (0)
759
760#ifdef DEBUG
761static void extract_number _RE_ARGS ((int *dest, re_char *source));
762static void
763extract_number (dest, source)
764    int *dest;
765    re_char *source;
766{
767  int temp = SIGN_EXTEND_CHAR (*(source + 1));
768  *dest = *source & 0377;
769  *dest += temp << 8;
770}
771
772# ifndef EXTRACT_MACROS /* To debug the macros.  */
773#  undef EXTRACT_NUMBER
774#  define EXTRACT_NUMBER(dest, src) extract_number (&dest, src)
775# endif /* not EXTRACT_MACROS */
776
777#endif /* DEBUG */
778
779/* Same as EXTRACT_NUMBER, except increment SOURCE to after the number.
780   SOURCE must be an lvalue.  */
781
782#define EXTRACT_NUMBER_AND_INCR(destination, source)			\
783  do {									\
784    EXTRACT_NUMBER (destination, source);				\
785    (source) += 2;							\
786  } while (0)
787
788#ifdef DEBUG
789static void extract_number_and_incr _RE_ARGS ((int *destination,
790					       re_char **source));
791static void
792extract_number_and_incr (destination, source)
793    int *destination;
794    re_char **source;
795{
796  extract_number (destination, *source);
797  *source += 2;
798}
799
800# ifndef EXTRACT_MACROS
801#  undef EXTRACT_NUMBER_AND_INCR
802#  define EXTRACT_NUMBER_AND_INCR(dest, src) \
803  extract_number_and_incr (&dest, &src)
804# endif /* not EXTRACT_MACROS */
805
806#endif /* DEBUG */
807
808/* Store a multibyte character in three contiguous bytes starting
809   DESTINATION, and increment DESTINATION to the byte after where the
810   character is stored.  Therefore, DESTINATION must be an lvalue.  */
811
812#define STORE_CHARACTER_AND_INCR(destination, character)	\
813  do {								\
814    (destination)[0] = (character) & 0377;			\
815    (destination)[1] = ((character) >> 8) & 0377;		\
816    (destination)[2] = (character) >> 16;			\
817    (destination) += 3;						\
818  } while (0)
819
820/* Put into DESTINATION a character stored in three contiguous bytes
821   starting at SOURCE.  */
822
823#define EXTRACT_CHARACTER(destination, source)	\
824  do {						\
825    (destination) = ((source)[0]		\
826		     | ((source)[1] << 8)	\
827		     | ((source)[2] << 16));	\
828  } while (0)
829
830
831/* Macros for charset. */
832
833/* Size of bitmap of charset P in bytes.  P is a start of charset,
834   i.e. *P is (re_opcode_t) charset or (re_opcode_t) charset_not.  */
835#define CHARSET_BITMAP_SIZE(p) ((p)[1] & 0x7F)
836
837/* Nonzero if charset P has range table.  */
838#define CHARSET_RANGE_TABLE_EXISTS_P(p)	 ((p)[1] & 0x80)
839
840/* Return the address of range table of charset P.  But not the start
841   of table itself, but the before where the number of ranges is
842   stored.  `2 +' means to skip re_opcode_t and size of bitmap,
843   and the 2 bytes of flags at the start of the range table.  */
844#define CHARSET_RANGE_TABLE(p) (&(p)[4 + CHARSET_BITMAP_SIZE (p)])
845
846/* Extract the bit flags that start a range table.  */
847#define CHARSET_RANGE_TABLE_BITS(p)		\
848  ((p)[2 + CHARSET_BITMAP_SIZE (p)]		\
849   + (p)[3 + CHARSET_BITMAP_SIZE (p)] * 0x100)
850
851/* Test if C is listed in the bitmap of charset P.  */
852#define CHARSET_LOOKUP_BITMAP(p, c)				\
853  ((c) < CHARSET_BITMAP_SIZE (p) * BYTEWIDTH			\
854   && (p)[2 + (c) / BYTEWIDTH] & (1 << ((c) % BYTEWIDTH)))
855
856/* Return the address of end of RANGE_TABLE.  COUNT is number of
857   ranges (which is a pair of (start, end)) in the RANGE_TABLE.  `* 2'
858   is start of range and end of range.  `* 3' is size of each start
859   and end.  */
860#define CHARSET_RANGE_TABLE_END(range_table, count)	\
861  ((range_table) + (count) * 2 * 3)
862
863/* Test if C is in RANGE_TABLE.  A flag NOT is negated if C is in.
864   COUNT is number of ranges in RANGE_TABLE.  */
865#define CHARSET_LOOKUP_RANGE_TABLE_RAW(not, c, range_table, count)	\
866  do									\
867    {									\
868      re_wchar_t range_start, range_end;				\
869      re_char *p;							\
870      re_char *range_table_end						\
871	= CHARSET_RANGE_TABLE_END ((range_table), (count));		\
872									\
873      for (p = (range_table); p < range_table_end; p += 2 * 3)		\
874	{								\
875	  EXTRACT_CHARACTER (range_start, p);				\
876	  EXTRACT_CHARACTER (range_end, p + 3);				\
877									\
878	  if (range_start <= (c) && (c) <= range_end)			\
879	    {								\
880	      (not) = !(not);						\
881	      break;							\
882	    }								\
883	}								\
884    }									\
885  while (0)
886
887/* Test if C is in range table of CHARSET.  The flag NOT is negated if
888   C is listed in it.  */
889#define CHARSET_LOOKUP_RANGE_TABLE(not, c, charset)			\
890  do									\
891    {									\
892      /* Number of ranges in range table. */				\
893      int count;							\
894      re_char *range_table = CHARSET_RANGE_TABLE (charset);		\
895      									\
896      EXTRACT_NUMBER_AND_INCR (count, range_table);			\
897      CHARSET_LOOKUP_RANGE_TABLE_RAW ((not), (c), range_table, count);	\
898    }									\
899  while (0)
900
901/* If DEBUG is defined, Regex prints many voluminous messages about what
902   it is doing (if the variable `debug' is nonzero).  If linked with the
903   main program in `iregex.c', you can enter patterns and strings
904   interactively.  And if linked with the main program in `main.c' and
905   the other test files, you can run the already-written tests.  */
906
907#ifdef DEBUG
908
909/* We use standard I/O for debugging.  */
910# include <stdio.h>
911
912/* It is useful to test things that ``must'' be true when debugging.  */
913# include <assert.h>
914
915static int debug = -100000;
916
917# define DEBUG_STATEMENT(e) e
918# define DEBUG_PRINT1(x) if (debug > 0) printf (x)
919# define DEBUG_PRINT2(x1, x2) if (debug > 0) printf (x1, x2)
920# define DEBUG_PRINT3(x1, x2, x3) if (debug > 0) printf (x1, x2, x3)
921# define DEBUG_PRINT4(x1, x2, x3, x4) if (debug > 0) printf (x1, x2, x3, x4)
922# define DEBUG_PRINT_COMPILED_PATTERN(p, s, e)				\
923  if (debug > 0) print_partial_compiled_pattern (s, e)
924# define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)			\
925  if (debug > 0) print_double_string (w, s1, sz1, s2, sz2)
926
927
928/* Print the fastmap in human-readable form.  */
929
930void
931print_fastmap (fastmap)
932    char *fastmap;
933{
934  unsigned was_a_range = 0;
935  unsigned i = 0;
936
937  while (i < (1 << BYTEWIDTH))
938    {
939      if (fastmap[i++])
940	{
941	  was_a_range = 0;
942	  putchar (i - 1);
943	  while (i < (1 << BYTEWIDTH)  &&  fastmap[i])
944	    {
945	      was_a_range = 1;
946	      i++;
947	    }
948	  if (was_a_range)
949	    {
950	      printf ("-");
951	      putchar (i - 1);
952	    }
953	}
954    }
955  putchar ('\n');
956}
957
958
959/* Print a compiled pattern string in human-readable form, starting at
960   the START pointer into it and ending just before the pointer END.  */
961
962void
963print_partial_compiled_pattern (start, end)
964    re_char *start;
965    re_char *end;
966{
967  int mcnt, mcnt2;
968  re_char *p = start;
969  re_char *pend = end;
970
971  if (start == NULL)
972    {
973      fprintf (stderr, "(null)\n");
974      return;
975    }
976
977  /* Loop over pattern commands.  */
978  while (p < pend)
979    {
980      fprintf (stderr, "%d:\t", p - start);
981
982      switch ((re_opcode_t) *p++)
983	{
984	case no_op:
985	  fprintf (stderr, "/no_op");
986	  break;
987
988	case succeed:
989	  fprintf (stderr, "/succeed");
990	  break;
991
992	case exactn:
993	  mcnt = *p++;
994	  fprintf (stderr, "/exactn/%d", mcnt);
995	  do
996	    {
997	      fprintf (stderr, "/%c", *p++);
998	    }
999	  while (--mcnt);
1000	  break;
1001
1002	case start_memory:
1003	  fprintf (stderr, "/start_memory/%d", *p++);
1004	  break;
1005
1006	case stop_memory:
1007	  fprintf (stderr, "/stop_memory/%d", *p++);
1008	  break;
1009
1010	case duplicate:
1011	  fprintf (stderr, "/duplicate/%d", *p++);
1012	  break;
1013
1014	case anychar:
1015	  fprintf (stderr, "/anychar");
1016	  break;
1017
1018	case charset:
1019	case charset_not:
1020	  {
1021	    register int c, last = -100;
1022	    register int in_range = 0;
1023	    int length = CHARSET_BITMAP_SIZE (p - 1);
1024	    int has_range_table = CHARSET_RANGE_TABLE_EXISTS_P (p - 1);
1025
1026	    fprintf (stderr, "/charset [%s",
1027		     (re_opcode_t) *(p - 1) == charset_not ? "^" : "");
1028
1029	    if (p + *p >= pend)
1030	      fprintf (stderr, " !extends past end of pattern! ");
1031
1032	    for (c = 0; c < 256; c++)
1033	      if (c / 8 < length
1034		  && (p[1 + (c/8)] & (1 << (c % 8))))
1035		{
1036		  /* Are we starting a range?  */
1037		  if (last + 1 == c && ! in_range)
1038		    {
1039		      fprintf (stderr, "-");
1040		      in_range = 1;
1041		    }
1042		  /* Have we broken a range?  */
1043		  else if (last + 1 != c && in_range)
1044		    {
1045		      fprintf (stderr, "%c", last);
1046		      in_range = 0;
1047		    }
1048
1049		  if (! in_range)
1050		    fprintf (stderr, "%c", c);
1051
1052		  last = c;
1053	      }
1054
1055	    if (in_range)
1056	      fprintf (stderr, "%c", last);
1057
1058	    fprintf (stderr, "]");
1059
1060	    p += 1 + length;
1061
1062	    if (has_range_table)
1063	      {
1064		int count;
1065		fprintf (stderr, "has-range-table");
1066
1067		/* ??? Should print the range table; for now, just skip it.  */
1068		p += 2;		/* skip range table bits */
1069		EXTRACT_NUMBER_AND_INCR (count, p);
1070		p = CHARSET_RANGE_TABLE_END (p, count);
1071	      }
1072	  }
1073	  break;
1074
1075	case begline:
1076	  fprintf (stderr, "/begline");
1077	  break;
1078
1079	case endline:
1080	  fprintf (stderr, "/endline");
1081	  break;
1082
1083	case on_failure_jump:
1084	  extract_number_and_incr (&mcnt, &p);
1085	  fprintf (stderr, "/on_failure_jump to %d", p + mcnt - start);
1086	  break;
1087
1088	case on_failure_keep_string_jump:
1089	  extract_number_and_incr (&mcnt, &p);
1090	  fprintf (stderr, "/on_failure_keep_string_jump to %d", p + mcnt - start);
1091	  break;
1092
1093	case on_failure_jump_nastyloop:
1094	  extract_number_and_incr (&mcnt, &p);
1095	  fprintf (stderr, "/on_failure_jump_nastyloop to %d", p + mcnt - start);
1096	  break;
1097
1098	case on_failure_jump_loop:
1099	  extract_number_and_incr (&mcnt, &p);
1100	  fprintf (stderr, "/on_failure_jump_loop to %d", p + mcnt - start);
1101	  break;
1102
1103	case on_failure_jump_smart:
1104	  extract_number_and_incr (&mcnt, &p);
1105	  fprintf (stderr, "/on_failure_jump_smart to %d", p + mcnt - start);
1106	  break;
1107
1108	case jump:
1109	  extract_number_and_incr (&mcnt, &p);
1110	  fprintf (stderr, "/jump to %d", p + mcnt - start);
1111	  break;
1112
1113	case succeed_n:
1114	  extract_number_and_incr (&mcnt, &p);
1115	  extract_number_and_incr (&mcnt2, &p);
1116	  fprintf (stderr, "/succeed_n to %d, %d times", p - 2 + mcnt - start, mcnt2);
1117	  break;
1118
1119	case jump_n:
1120	  extract_number_and_incr (&mcnt, &p);
1121	  extract_number_and_incr (&mcnt2, &p);
1122	  fprintf (stderr, "/jump_n to %d, %d times", p - 2 + mcnt - start, mcnt2);
1123	  break;
1124
1125	case set_number_at:
1126	  extract_number_and_incr (&mcnt, &p);
1127	  extract_number_and_incr (&mcnt2, &p);
1128	  fprintf (stderr, "/set_number_at location %d to %d", p - 2 + mcnt - start, mcnt2);
1129	  break;
1130
1131	case wordbound:
1132	  fprintf (stderr, "/wordbound");
1133	  break;
1134
1135	case notwordbound:
1136	  fprintf (stderr, "/notwordbound");
1137	  break;
1138
1139	case wordbeg:
1140	  fprintf (stderr, "/wordbeg");
1141	  break;
1142
1143	case wordend:
1144	  fprintf (stderr, "/wordend");
1145	  break;
1146
1147	case symbeg:
1148	  fprintf (stderr, "/symbeg");
1149	  break;
1150
1151	case symend:
1152	  fprintf (stderr, "/symend");
1153	  break;
1154
1155	case syntaxspec:
1156	  fprintf (stderr, "/syntaxspec");
1157	  mcnt = *p++;
1158	  fprintf (stderr, "/%d", mcnt);
1159	  break;
1160
1161	case notsyntaxspec:
1162	  fprintf (stderr, "/notsyntaxspec");
1163	  mcnt = *p++;
1164	  fprintf (stderr, "/%d", mcnt);
1165	  break;
1166
1167# ifdef emacs
1168	case before_dot:
1169	  fprintf (stderr, "/before_dot");
1170	  break;
1171
1172	case at_dot:
1173	  fprintf (stderr, "/at_dot");
1174	  break;
1175
1176	case after_dot:
1177	  fprintf (stderr, "/after_dot");
1178	  break;
1179
1180	case categoryspec:
1181	  fprintf (stderr, "/categoryspec");
1182	  mcnt = *p++;
1183	  fprintf (stderr, "/%d", mcnt);
1184	  break;
1185
1186	case notcategoryspec:
1187	  fprintf (stderr, "/notcategoryspec");
1188	  mcnt = *p++;
1189	  fprintf (stderr, "/%d", mcnt);
1190	  break;
1191# endif /* emacs */
1192
1193	case begbuf:
1194	  fprintf (stderr, "/begbuf");
1195	  break;
1196
1197	case endbuf:
1198	  fprintf (stderr, "/endbuf");
1199	  break;
1200
1201	default:
1202	  fprintf (stderr, "?%d", *(p-1));
1203	}
1204
1205      fprintf (stderr, "\n");
1206    }
1207
1208  fprintf (stderr, "%d:\tend of pattern.\n", p - start);
1209}
1210
1211
1212void
1213print_compiled_pattern (bufp)
1214    struct re_pattern_buffer *bufp;
1215{
1216  re_char *buffer = bufp->buffer;
1217
1218  print_partial_compiled_pattern (buffer, buffer + bufp->used);
1219  printf ("%ld bytes used/%ld bytes allocated.\n",
1220	  bufp->used, bufp->allocated);
1221
1222  if (bufp->fastmap_accurate && bufp->fastmap)
1223    {
1224      printf ("fastmap: ");
1225      print_fastmap (bufp->fastmap);
1226    }
1227
1228  printf ("re_nsub: %d\t", bufp->re_nsub);
1229  printf ("regs_alloc: %d\t", bufp->regs_allocated);
1230  printf ("can_be_null: %d\t", bufp->can_be_null);
1231  printf ("no_sub: %d\t", bufp->no_sub);
1232  printf ("not_bol: %d\t", bufp->not_bol);
1233  printf ("not_eol: %d\t", bufp->not_eol);
1234  printf ("syntax: %lx\n", bufp->syntax);
1235  fflush (stdout);
1236  /* Perhaps we should print the translate table?  */
1237}
1238
1239
1240void
1241print_double_string (where, string1, size1, string2, size2)
1242    re_char *where;
1243    re_char *string1;
1244    re_char *string2;
1245    int size1;
1246    int size2;
1247{
1248  int this_char;
1249
1250  if (where == NULL)
1251    printf ("(null)");
1252  else
1253    {
1254      if (FIRST_STRING_P (where))
1255	{
1256	  for (this_char = where - string1; this_char < size1; this_char++)
1257	    putchar (string1[this_char]);
1258
1259	  where = string2;
1260	}
1261
1262      for (this_char = where - string2; this_char < size2; this_char++)
1263	putchar (string2[this_char]);
1264    }
1265}
1266
1267#else /* not DEBUG */
1268
1269# undef assert
1270# define assert(e)
1271
1272# define DEBUG_STATEMENT(e)
1273# define DEBUG_PRINT1(x)
1274# define DEBUG_PRINT2(x1, x2)
1275# define DEBUG_PRINT3(x1, x2, x3)
1276# define DEBUG_PRINT4(x1, x2, x3, x4)
1277# define DEBUG_PRINT_COMPILED_PATTERN(p, s, e)
1278# define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)
1279
1280#endif /* not DEBUG */
1281
1282/* Set by `re_set_syntax' to the current regexp syntax to recognize.  Can
1283   also be assigned to arbitrarily: each pattern buffer stores its own
1284   syntax, so it can be changed between regex compilations.  */
1285/* This has no initializer because initialized variables in Emacs
1286   become read-only after dumping.  */
1287reg_syntax_t re_syntax_options;
1288
1289
1290/* Specify the precise syntax of regexps for compilation.  This provides
1291   for compatibility for various utilities which historically have
1292   different, incompatible syntaxes.
1293
1294   The argument SYNTAX is a bit mask comprised of the various bits
1295   defined in regex.h.  We return the old syntax.  */
1296
1297reg_syntax_t
1298re_set_syntax (syntax)
1299     reg_syntax_t syntax;
1300{
1301  reg_syntax_t ret = re_syntax_options;
1302
1303  re_syntax_options = syntax;
1304  return ret;
1305}
1306WEAK_ALIAS (__re_set_syntax, re_set_syntax)
1307
1308/* Regexp to use to replace spaces, or NULL meaning don't.  */
1309static re_char *whitespace_regexp;
1310
1311void
1312re_set_whitespace_regexp (regexp)
1313     const char *regexp;
1314{
1315  whitespace_regexp = (re_char *) regexp;
1316}
1317WEAK_ALIAS (__re_set_syntax, re_set_syntax)
1318
1319/* This table gives an error message for each of the error codes listed
1320   in regex.h.  Obviously the order here has to be same as there.
1321   POSIX doesn't require that we do anything for REG_NOERROR,
1322   but why not be nice?  */
1323
1324static const char *re_error_msgid[] =
1325  {
1326    gettext_noop ("Success"),	/* REG_NOERROR */
1327    gettext_noop ("No match"),	/* REG_NOMATCH */
1328    gettext_noop ("Invalid regular expression"), /* REG_BADPAT */
1329    gettext_noop ("Invalid collation character"), /* REG_ECOLLATE */
1330    gettext_noop ("Invalid character class name"), /* REG_ECTYPE */
1331    gettext_noop ("Trailing backslash"), /* REG_EESCAPE */
1332    gettext_noop ("Invalid back reference"), /* REG_ESUBREG */
1333    gettext_noop ("Unmatched [ or [^"),	/* REG_EBRACK */
1334    gettext_noop ("Unmatched ( or \\("), /* REG_EPAREN */
1335    gettext_noop ("Unmatched \\{"), /* REG_EBRACE */
1336    gettext_noop ("Invalid content of \\{\\}"), /* REG_BADBR */
1337    gettext_noop ("Invalid range end"),	/* REG_ERANGE */
1338    gettext_noop ("Memory exhausted"), /* REG_ESPACE */
1339    gettext_noop ("Invalid preceding regular expression"), /* REG_BADRPT */
1340    gettext_noop ("Premature end of regular expression"), /* REG_EEND */
1341    gettext_noop ("Regular expression too big"), /* REG_ESIZE */
1342    gettext_noop ("Unmatched ) or \\)"), /* REG_ERPAREN */
1343    gettext_noop ("Range striding over charsets") /* REG_ERANGEX  */
1344  };
1345
1346/* Avoiding alloca during matching, to placate r_alloc.  */
1347
1348/* Define MATCH_MAY_ALLOCATE unless we need to make sure that the
1349   searching and matching functions should not call alloca.  On some
1350   systems, alloca is implemented in terms of malloc, and if we're
1351   using the relocating allocator routines, then malloc could cause a
1352   relocation, which might (if the strings being searched are in the
1353   ralloc heap) shift the data out from underneath the regexp
1354   routines.
1355
1356   Here's another reason to avoid allocation: Emacs
1357   processes input from X in a signal handler; processing X input may
1358   call malloc; if input arrives while a matching routine is calling
1359   malloc, then we're scrod.  But Emacs can't just block input while
1360   calling matching routines; then we don't notice interrupts when
1361   they come in.  So, Emacs blocks input around all regexp calls
1362   except the matching calls, which it leaves unprotected, in the
1363   faith that they will not malloc.  */
1364
1365/* Normally, this is fine.  */
1366#define MATCH_MAY_ALLOCATE
1367
1368/* When using GNU C, we are not REALLY using the C alloca, no matter
1369   what config.h may say.  So don't take precautions for it.  */
1370#ifdef __GNUC__
1371# undef C_ALLOCA
1372#endif
1373
1374/* The match routines may not allocate if (1) they would do it with malloc
1375   and (2) it's not safe for them to use malloc.
1376   Note that if REL_ALLOC is defined, matching would not use malloc for the
1377   failure stack, but we would still use it for the register vectors;
1378   so REL_ALLOC should not affect this.  */
1379#if (defined C_ALLOCA || defined REGEX_MALLOC) && defined emacs
1380# undef MATCH_MAY_ALLOCATE
1381#endif
1382
1383
1384/* Failure stack declarations and macros; both re_compile_fastmap and
1385   re_match_2 use a failure stack.  These have to be macros because of
1386   REGEX_ALLOCATE_STACK.  */
1387
1388
1389/* Approximate number of failure points for which to initially allocate space
1390   when matching.  If this number is exceeded, we allocate more
1391   space, so it is not a hard limit.  */
1392#ifndef INIT_FAILURE_ALLOC
1393# define INIT_FAILURE_ALLOC 20
1394#endif
1395
1396/* Roughly the maximum number of failure points on the stack.  Would be
1397   exactly that if always used TYPICAL_FAILURE_SIZE items each time we failed.
1398   This is a variable only so users of regex can assign to it; we never
1399   change it ourselves.  We always multiply it by TYPICAL_FAILURE_SIZE
1400   before using it, so it should probably be a byte-count instead.  */
1401# if defined MATCH_MAY_ALLOCATE
1402/* Note that 4400 was enough to cause a crash on Alpha OSF/1,
1403   whose default stack limit is 2mb.  In order for a larger
1404   value to work reliably, you have to try to make it accord
1405   with the process stack limit.  */
1406size_t re_max_failures = 40000;
1407# else
1408size_t re_max_failures = 4000;
1409# endif
1410
1411union fail_stack_elt
1412{
1413  re_char *pointer;
1414  /* This should be the biggest `int' that's no bigger than a pointer.  */
1415  long integer;
1416};
1417
1418typedef union fail_stack_elt fail_stack_elt_t;
1419
1420typedef struct
1421{
1422  fail_stack_elt_t *stack;
1423  size_t size;
1424  size_t avail;	/* Offset of next open position.  */
1425  size_t frame;	/* Offset of the cur constructed frame.  */
1426} fail_stack_type;
1427
1428#define FAIL_STACK_EMPTY()     (fail_stack.frame == 0)
1429#define FAIL_STACK_FULL()      (fail_stack.avail == fail_stack.size)
1430
1431
1432/* Define macros to initialize and free the failure stack.
1433   Do `return -2' if the alloc fails.  */
1434
1435#ifdef MATCH_MAY_ALLOCATE
1436# define INIT_FAIL_STACK()						\
1437  do {									\
1438    fail_stack.stack = (fail_stack_elt_t *)				\
1439      REGEX_ALLOCATE_STACK (INIT_FAILURE_ALLOC * TYPICAL_FAILURE_SIZE	\
1440			    * sizeof (fail_stack_elt_t));		\
1441									\
1442    if (fail_stack.stack == NULL)					\
1443      return -2;							\
1444									\
1445    fail_stack.size = INIT_FAILURE_ALLOC;				\
1446    fail_stack.avail = 0;						\
1447    fail_stack.frame = 0;						\
1448  } while (0)
1449
1450# define RESET_FAIL_STACK()  REGEX_FREE_STACK (fail_stack.stack)
1451#else
1452# define INIT_FAIL_STACK()						\
1453  do {									\
1454    fail_stack.avail = 0;						\
1455    fail_stack.frame = 0;						\
1456  } while (0)
1457
1458# define RESET_FAIL_STACK() ((void)0)
1459#endif
1460
1461
1462/* Double the size of FAIL_STACK, up to a limit
1463   which allows approximately `re_max_failures' items.
1464
1465   Return 1 if succeeds, and 0 if either ran out of memory
1466   allocating space for it or it was already too large.
1467
1468   REGEX_REALLOCATE_STACK requires `destination' be declared.   */
1469
1470/* Factor to increase the failure stack size by
1471   when we increase it.
1472   This used to be 2, but 2 was too wasteful
1473   because the old discarded stacks added up to as much space
1474   were as ultimate, maximum-size stack.  */
1475#define FAIL_STACK_GROWTH_FACTOR 4
1476
1477#define GROW_FAIL_STACK(fail_stack)					\
1478  (((fail_stack).size * sizeof (fail_stack_elt_t)			\
1479    >= re_max_failures * TYPICAL_FAILURE_SIZE)				\
1480   ? 0									\
1481   : ((fail_stack).stack						\
1482      = (fail_stack_elt_t *)						\
1483	REGEX_REALLOCATE_STACK ((fail_stack).stack,			\
1484	  (fail_stack).size * sizeof (fail_stack_elt_t),		\
1485	  MIN (re_max_failures * TYPICAL_FAILURE_SIZE,			\
1486	       ((fail_stack).size * sizeof (fail_stack_elt_t)		\
1487		* FAIL_STACK_GROWTH_FACTOR))),				\
1488									\
1489      (fail_stack).stack == NULL					\
1490      ? 0								\
1491      : ((fail_stack).size						\
1492	 = (MIN (re_max_failures * TYPICAL_FAILURE_SIZE,		\
1493		 ((fail_stack).size * sizeof (fail_stack_elt_t)		\
1494		  * FAIL_STACK_GROWTH_FACTOR))				\
1495	    / sizeof (fail_stack_elt_t)),				\
1496	 1)))
1497
1498
1499/* Push a pointer value onto the failure stack.
1500   Assumes the variable `fail_stack'.  Probably should only
1501   be called from within `PUSH_FAILURE_POINT'.  */
1502#define PUSH_FAILURE_POINTER(item)					\
1503  fail_stack.stack[fail_stack.avail++].pointer = (item)
1504
1505/* This pushes an integer-valued item onto the failure stack.
1506   Assumes the variable `fail_stack'.  Probably should only
1507   be called from within `PUSH_FAILURE_POINT'.  */
1508#define PUSH_FAILURE_INT(item)					\
1509  fail_stack.stack[fail_stack.avail++].integer = (item)
1510
1511/* Push a fail_stack_elt_t value onto the failure stack.
1512   Assumes the variable `fail_stack'.  Probably should only
1513   be called from within `PUSH_FAILURE_POINT'.  */
1514#define PUSH_FAILURE_ELT(item)					\
1515  fail_stack.stack[fail_stack.avail++] =  (item)
1516
1517/* These three POP... operations complement the three PUSH... operations.
1518   All assume that `fail_stack' is nonempty.  */
1519#define POP_FAILURE_POINTER() fail_stack.stack[--fail_stack.avail].pointer
1520#define POP_FAILURE_INT() fail_stack.stack[--fail_stack.avail].integer
1521#define POP_FAILURE_ELT() fail_stack.stack[--fail_stack.avail]
1522
1523/* Individual items aside from the registers.  */
1524#define NUM_NONREG_ITEMS 3
1525
1526/* Used to examine the stack (to detect infinite loops).  */
1527#define FAILURE_PAT(h) fail_stack.stack[(h) - 1].pointer
1528#define FAILURE_STR(h) (fail_stack.stack[(h) - 2].pointer)
1529#define NEXT_FAILURE_HANDLE(h) fail_stack.stack[(h) - 3].integer
1530#define TOP_FAILURE_HANDLE() fail_stack.frame
1531
1532
1533#define ENSURE_FAIL_STACK(space)					\
1534while (REMAINING_AVAIL_SLOTS <= space) {				\
1535  if (!GROW_FAIL_STACK (fail_stack))					\
1536    return -2;								\
1537  DEBUG_PRINT2 ("\n  Doubled stack; size now: %d\n", (fail_stack).size);\
1538  DEBUG_PRINT2 ("	 slots available: %d\n", REMAINING_AVAIL_SLOTS);\
1539}
1540
1541/* Push register NUM onto the stack.  */
1542#define PUSH_FAILURE_REG(num)						\
1543do {									\
1544  char *destination;							\
1545  ENSURE_FAIL_STACK(3);							\
1546  DEBUG_PRINT4 ("    Push reg %d (spanning %p -> %p)\n",		\
1547		num, regstart[num], regend[num]);			\
1548  PUSH_FAILURE_POINTER (regstart[num]);					\
1549  PUSH_FAILURE_POINTER (regend[num]);					\
1550  PUSH_FAILURE_INT (num);						\
1551} while (0)
1552
1553/* Change the counter's value to VAL, but make sure that it will
1554   be reset when backtracking.  */
1555#define PUSH_NUMBER(ptr,val)						\
1556do {									\
1557  char *destination;							\
1558  int c;								\
1559  ENSURE_FAIL_STACK(3);							\
1560  EXTRACT_NUMBER (c, ptr);						\
1561  DEBUG_PRINT4 ("    Push number %p = %d -> %d\n", ptr, c, val);	\
1562  PUSH_FAILURE_INT (c);							\
1563  PUSH_FAILURE_POINTER (ptr);						\
1564  PUSH_FAILURE_INT (-1);						\
1565  STORE_NUMBER (ptr, val);						\
1566} while (0)
1567
1568/* Pop a saved register off the stack.  */
1569#define POP_FAILURE_REG_OR_COUNT()					\
1570do {									\
1571  int reg = POP_FAILURE_INT ();						\
1572  if (reg == -1)							\
1573    {									\
1574      /* It's a counter.  */						\
1575      /* Here, we discard `const', making re_match non-reentrant.  */	\
1576      unsigned char *ptr = (unsigned char*) POP_FAILURE_POINTER ();	\
1577      reg = POP_FAILURE_INT ();						\
1578      STORE_NUMBER (ptr, reg);						\
1579      DEBUG_PRINT3 ("     Pop counter %p = %d\n", ptr, reg);		\
1580    }									\
1581  else									\
1582    {									\
1583      regend[reg] = POP_FAILURE_POINTER ();				\
1584      regstart[reg] = POP_FAILURE_POINTER ();				\
1585      DEBUG_PRINT4 ("     Pop reg %d (spanning %p -> %p)\n",		\
1586		    reg, regstart[reg], regend[reg]);			\
1587    }									\
1588} while (0)
1589
1590/* Check that we are not stuck in an infinite loop.  */
1591#define CHECK_INFINITE_LOOP(pat_cur, string_place)			\
1592do {									\
1593  int failure = TOP_FAILURE_HANDLE ();					\
1594  /* Check for infinite matching loops */				\
1595  while (failure > 0							\
1596	 && (FAILURE_STR (failure) == string_place			\
1597	     || FAILURE_STR (failure) == NULL))				\
1598    {									\
1599      assert (FAILURE_PAT (failure) >= bufp->buffer			\
1600	      && FAILURE_PAT (failure) <= bufp->buffer + bufp->used);	\
1601      if (FAILURE_PAT (failure) == pat_cur)				\
1602	{								\
1603	  cycle = 1;							\
1604	  break;							\
1605	}								\
1606      DEBUG_PRINT2 ("  Other pattern: %p\n", FAILURE_PAT (failure));	\
1607      failure = NEXT_FAILURE_HANDLE(failure);				\
1608    }									\
1609  DEBUG_PRINT2 ("  Other string: %p\n", FAILURE_STR (failure));		\
1610} while (0)
1611
1612/* Push the information about the state we will need
1613   if we ever fail back to it.
1614
1615   Requires variables fail_stack, regstart, regend and
1616   num_regs be declared.  GROW_FAIL_STACK requires `destination' be
1617   declared.
1618
1619   Does `return FAILURE_CODE' if runs out of memory.  */
1620
1621#define PUSH_FAILURE_POINT(pattern, string_place)			\
1622do {									\
1623  char *destination;							\
1624  /* Must be int, so when we don't save any registers, the arithmetic	\
1625     of 0 + -1 isn't done as unsigned.  */				\
1626  									\
1627  DEBUG_STATEMENT (nfailure_points_pushed++);				\
1628  DEBUG_PRINT1 ("\nPUSH_FAILURE_POINT:\n");				\
1629  DEBUG_PRINT2 ("  Before push, next avail: %d\n", (fail_stack).avail);	\
1630  DEBUG_PRINT2 ("			size: %d\n", (fail_stack).size);\
1631  									\
1632  ENSURE_FAIL_STACK (NUM_NONREG_ITEMS);					\
1633  									\
1634  DEBUG_PRINT1 ("\n");							\
1635  									\
1636  DEBUG_PRINT2 ("  Push frame index: %d\n", fail_stack.frame);		\
1637  PUSH_FAILURE_INT (fail_stack.frame);					\
1638  									\
1639  DEBUG_PRINT2 ("  Push string %p: `", string_place);			\
1640  DEBUG_PRINT_DOUBLE_STRING (string_place, string1, size1, string2, size2);\
1641  DEBUG_PRINT1 ("'\n");							\
1642  PUSH_FAILURE_POINTER (string_place);					\
1643  									\
1644  DEBUG_PRINT2 ("  Push pattern %p: ", pattern);			\
1645  DEBUG_PRINT_COMPILED_PATTERN (bufp, pattern, pend);			\
1646  PUSH_FAILURE_POINTER (pattern);					\
1647  									\
1648  /* Close the frame by moving the frame pointer past it.  */		\
1649  fail_stack.frame = fail_stack.avail;					\
1650} while (0)
1651
1652/* Estimate the size of data pushed by a typical failure stack entry.
1653   An estimate is all we need, because all we use this for
1654   is to choose a limit for how big to make the failure stack.  */
1655/* BEWARE, the value `20' is hard-coded in emacs.c:main().  */
1656#define TYPICAL_FAILURE_SIZE 20
1657
1658/* How many items can still be added to the stack without overflowing it.  */
1659#define REMAINING_AVAIL_SLOTS ((fail_stack).size - (fail_stack).avail)
1660
1661
1662/* Pops what PUSH_FAIL_STACK pushes.
1663
1664   We restore into the parameters, all of which should be lvalues:
1665     STR -- the saved data position.
1666     PAT -- the saved pattern position.
1667     REGSTART, REGEND -- arrays of string positions.
1668
1669   Also assumes the variables `fail_stack' and (if debugging), `bufp',
1670   `pend', `string1', `size1', `string2', and `size2'.  */
1671
1672#define POP_FAILURE_POINT(str, pat)                                     \
1673do {									\
1674  assert (!FAIL_STACK_EMPTY ());					\
1675									\
1676  /* Remove failure points and point to how many regs pushed.  */	\
1677  DEBUG_PRINT1 ("POP_FAILURE_POINT:\n");				\
1678  DEBUG_PRINT2 ("  Before pop, next avail: %d\n", fail_stack.avail);	\
1679  DEBUG_PRINT2 ("		     size: %d\n", fail_stack.size);	\
1680									\
1681  /* Pop the saved registers.  */					\
1682  while (fail_stack.frame < fail_stack.avail)				\
1683    POP_FAILURE_REG_OR_COUNT ();					\
1684									\
1685  pat = POP_FAILURE_POINTER ();				\
1686  DEBUG_PRINT2 ("  Popping pattern %p: ", pat);				\
1687  DEBUG_PRINT_COMPILED_PATTERN (bufp, pat, pend);			\
1688									\
1689  /* If the saved string location is NULL, it came from an		\
1690     on_failure_keep_string_jump opcode, and we want to throw away the	\
1691     saved NULL, thus retaining our current position in the string.  */	\
1692  str = POP_FAILURE_POINTER ();						\
1693  DEBUG_PRINT2 ("  Popping string %p: `", str);				\
1694  DEBUG_PRINT_DOUBLE_STRING (str, string1, size1, string2, size2);	\
1695  DEBUG_PRINT1 ("'\n");							\
1696									\
1697  fail_stack.frame = POP_FAILURE_INT ();				\
1698  DEBUG_PRINT2 ("  Popping  frame index: %d\n", fail_stack.frame);	\
1699									\
1700  assert (fail_stack.avail >= 0);					\
1701  assert (fail_stack.frame <= fail_stack.avail);			\
1702									\
1703  DEBUG_STATEMENT (nfailure_points_popped++);				\
1704} while (0) /* POP_FAILURE_POINT */
1705
1706
1707
1708/* Registers are set to a sentinel when they haven't yet matched.  */
1709#define REG_UNSET(e) ((e) == NULL)
1710
1711/* Subroutine declarations and macros for regex_compile.  */
1712
1713static reg_errcode_t regex_compile _RE_ARGS ((re_char *pattern, size_t size,
1714					      reg_syntax_t syntax,
1715					      struct re_pattern_buffer *bufp));
1716static void store_op1 _RE_ARGS ((re_opcode_t op, unsigned char *loc, int arg));
1717static void store_op2 _RE_ARGS ((re_opcode_t op, unsigned char *loc,
1718				 int arg1, int arg2));
1719static void insert_op1 _RE_ARGS ((re_opcode_t op, unsigned char *loc,
1720				  int arg, unsigned char *end));
1721static void insert_op2 _RE_ARGS ((re_opcode_t op, unsigned char *loc,
1722				  int arg1, int arg2, unsigned char *end));
1723static boolean at_begline_loc_p _RE_ARGS ((re_char *pattern,
1724					   re_char *p,
1725					   reg_syntax_t syntax));
1726static boolean at_endline_loc_p _RE_ARGS ((re_char *p,
1727					   re_char *pend,
1728					   reg_syntax_t syntax));
1729static re_char *skip_one_char _RE_ARGS ((re_char *p));
1730static int analyse_first _RE_ARGS ((re_char *p, re_char *pend,
1731				    char *fastmap, const int multibyte));
1732
1733/* Fetch the next character in the uncompiled pattern, with no
1734   translation.  */
1735#define PATFETCH(c)							\
1736  do {									\
1737    int len;								\
1738    if (p == pend) return REG_EEND;					\
1739    c = RE_STRING_CHAR_AND_LENGTH (p, pend - p, len);			\
1740    p += len;								\
1741  } while (0)
1742
1743
1744/* If `translate' is non-null, return translate[D], else just D.  We
1745   cast the subscript to translate because some data is declared as
1746   `char *', to avoid warnings when a string constant is passed.  But
1747   when we use a character as a subscript we must make it unsigned.  */
1748#ifndef TRANSLATE
1749# define TRANSLATE(d) \
1750  (RE_TRANSLATE_P (translate) ? RE_TRANSLATE (translate, (d)) : (d))
1751#endif
1752
1753
1754/* Macros for outputting the compiled pattern into `buffer'.  */
1755
1756/* If the buffer isn't allocated when it comes in, use this.  */
1757#define INIT_BUF_SIZE  32
1758
1759/* Make sure we have at least N more bytes of space in buffer.  */
1760#define GET_BUFFER_SPACE(n)						\
1761    while ((size_t) (b - bufp->buffer + (n)) > bufp->allocated)		\
1762      EXTEND_BUFFER ()
1763
1764/* Make sure we have one more byte of buffer space and then add C to it.  */
1765#define BUF_PUSH(c)							\
1766  do {									\
1767    GET_BUFFER_SPACE (1);						\
1768    *b++ = (unsigned char) (c);						\
1769  } while (0)
1770
1771
1772/* Ensure we have two more bytes of buffer space and then append C1 and C2.  */
1773#define BUF_PUSH_2(c1, c2)						\
1774  do {									\
1775    GET_BUFFER_SPACE (2);						\
1776    *b++ = (unsigned char) (c1);					\
1777    *b++ = (unsigned char) (c2);					\
1778  } while (0)
1779
1780
1781/* As with BUF_PUSH_2, except for three bytes.  */
1782#define BUF_PUSH_3(c1, c2, c3)						\
1783  do {									\
1784    GET_BUFFER_SPACE (3);						\
1785    *b++ = (unsigned char) (c1);					\
1786    *b++ = (unsigned char) (c2);					\
1787    *b++ = (unsigned char) (c3);					\
1788  } while (0)
1789
1790
1791/* Store a jump with opcode OP at LOC to location TO.  We store a
1792   relative address offset by the three bytes the jump itself occupies.  */
1793#define STORE_JUMP(op, loc, to) \
1794  store_op1 (op, loc, (to) - (loc) - 3)
1795
1796/* Likewise, for a two-argument jump.  */
1797#define STORE_JUMP2(op, loc, to, arg) \
1798  store_op2 (op, loc, (to) - (loc) - 3, arg)
1799
1800/* Like `STORE_JUMP', but for inserting.  Assume `b' is the buffer end.  */
1801#define INSERT_JUMP(op, loc, to) \
1802  insert_op1 (op, loc, (to) - (loc) - 3, b)
1803
1804/* Like `STORE_JUMP2', but for inserting.  Assume `b' is the buffer end.  */
1805#define INSERT_JUMP2(op, loc, to, arg) \
1806  insert_op2 (op, loc, (to) - (loc) - 3, arg, b)
1807
1808
1809/* This is not an arbitrary limit: the arguments which represent offsets
1810   into the pattern are two bytes long.  So if 2^15 bytes turns out to
1811   be too small, many things would have to change.  */
1812# define MAX_BUF_SIZE (1L << 15)
1813
1814#if 0  /* This is when we thought it could be 2^16 bytes.  */
1815/* Any other compiler which, like MSC, has allocation limit below 2^16
1816   bytes will have to use approach similar to what was done below for
1817   MSC and drop MAX_BUF_SIZE a bit.  Otherwise you may end up
1818   reallocating to 0 bytes.  Such thing is not going to work too well.
1819   You have been warned!!  */
1820#if defined _MSC_VER  && !defined WIN32
1821/* Microsoft C 16-bit versions limit malloc to approx 65512 bytes.  */
1822# define MAX_BUF_SIZE  65500L
1823#else
1824# define MAX_BUF_SIZE (1L << 16)
1825#endif
1826#endif /* 0 */
1827
1828/* Extend the buffer by twice its current size via realloc and
1829   reset the pointers that pointed into the old block to point to the
1830   correct places in the new one.  If extending the buffer results in it
1831   being larger than MAX_BUF_SIZE, then flag memory exhausted.  */
1832#if __BOUNDED_POINTERS__
1833# define SET_HIGH_BOUND(P) (__ptrhigh (P) = __ptrlow (P) + bufp->allocated)
1834# define MOVE_BUFFER_POINTER(P) \
1835  (__ptrlow (P) += incr, SET_HIGH_BOUND (P), __ptrvalue (P) += incr)
1836# define ELSE_EXTEND_BUFFER_HIGH_BOUND		\
1837  else						\
1838    {						\
1839      SET_HIGH_BOUND (b);			\
1840      SET_HIGH_BOUND (begalt);			\
1841      if (fixup_alt_jump)			\
1842	SET_HIGH_BOUND (fixup_alt_jump);	\
1843      if (laststart)				\
1844	SET_HIGH_BOUND (laststart);		\
1845      if (pending_exact)			\
1846	SET_HIGH_BOUND (pending_exact);		\
1847    }
1848#else
1849# define MOVE_BUFFER_POINTER(P) (P) += incr
1850# define ELSE_EXTEND_BUFFER_HIGH_BOUND
1851#endif
1852#define EXTEND_BUFFER()							\
1853  do {									\
1854    re_char *old_buffer = bufp->buffer;					\
1855    if (bufp->allocated == MAX_BUF_SIZE)				\
1856      return REG_ESIZE;							\
1857    bufp->allocated <<= 1;						\
1858    if (bufp->allocated > MAX_BUF_SIZE)					\
1859      bufp->allocated = MAX_BUF_SIZE;					\
1860    RETALLOC (bufp->buffer, bufp->allocated, unsigned char);		\
1861    if (bufp->buffer == NULL)						\
1862      return REG_ESPACE;						\
1863    /* If the buffer moved, move all the pointers into it.  */		\
1864    if (old_buffer != bufp->buffer)					\
1865      {									\
1866	ptrdiff_t incr = bufp->buffer - old_buffer;			\
1867	MOVE_BUFFER_POINTER (b);					\
1868	MOVE_BUFFER_POINTER (begalt);					\
1869	if (fixup_alt_jump)						\
1870	  MOVE_BUFFER_POINTER (fixup_alt_jump);				\
1871	if (laststart)							\
1872	  MOVE_BUFFER_POINTER (laststart);				\
1873	if (pending_exact)						\
1874	  MOVE_BUFFER_POINTER (pending_exact);				\
1875      }									\
1876    ELSE_EXTEND_BUFFER_HIGH_BOUND					\
1877  } while (0)
1878
1879
1880/* Since we have one byte reserved for the register number argument to
1881   {start,stop}_memory, the maximum number of groups we can report
1882   things about is what fits in that byte.  */
1883#define MAX_REGNUM 255
1884
1885/* But patterns can have more than `MAX_REGNUM' registers.  We just
1886   ignore the excess.  */
1887typedef int regnum_t;
1888
1889
1890/* Macros for the compile stack.  */
1891
1892/* Since offsets can go either forwards or backwards, this type needs to
1893   be able to hold values from -(MAX_BUF_SIZE - 1) to MAX_BUF_SIZE - 1.  */
1894/* int may be not enough when sizeof(int) == 2.  */
1895typedef long pattern_offset_t;
1896
1897typedef struct
1898{
1899  pattern_offset_t begalt_offset;
1900  pattern_offset_t fixup_alt_jump;
1901  pattern_offset_t laststart_offset;
1902  regnum_t regnum;
1903} compile_stack_elt_t;
1904
1905
1906typedef struct
1907{
1908  compile_stack_elt_t *stack;
1909  unsigned size;
1910  unsigned avail;			/* Offset of next open position.  */
1911} compile_stack_type;
1912
1913
1914#define INIT_COMPILE_STACK_SIZE 32
1915
1916#define COMPILE_STACK_EMPTY  (compile_stack.avail == 0)
1917#define COMPILE_STACK_FULL  (compile_stack.avail == compile_stack.size)
1918
1919/* The next available element.  */
1920#define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail])
1921
1922/* Explicit quit checking is only used on NTemacs and whenever we
1923   use polling to process input events.  */
1924#if defined emacs && (defined WINDOWSNT || defined SYNC_INPUT) && defined QUIT
1925extern int immediate_quit;
1926# define IMMEDIATE_QUIT_CHECK			\
1927    do {					\
1928      if (immediate_quit) QUIT;			\
1929    } while (0)
1930#else
1931# define IMMEDIATE_QUIT_CHECK    ((void)0)
1932#endif
1933
1934/* Structure to manage work area for range table.  */
1935struct range_table_work_area
1936{
1937  int *table;			/* actual work area.  */
1938  int allocated;		/* allocated size for work area in bytes.  */
1939  int used;			/* actually used size in words.  */
1940  int bits;			/* flag to record character classes */
1941};
1942
1943/* Make sure that WORK_AREA can hold more N multibyte characters.
1944   This is used only in set_image_of_range and set_image_of_range_1.
1945   It expects WORK_AREA to be a pointer.
1946   If it can't get the space, it returns from the surrounding function.  */
1947
1948#define EXTEND_RANGE_TABLE(work_area, n)				\
1949  do {									\
1950    if (((work_area)->used + (n)) * sizeof (int) > (work_area)->allocated) \
1951      {									\
1952        extend_range_table_work_area (work_area);			\
1953        if ((work_area)->table == 0)					\
1954          return (REG_ESPACE);						\
1955      }									\
1956  } while (0)
1957
1958#define SET_RANGE_TABLE_WORK_AREA_BIT(work_area, bit)		\
1959  (work_area).bits |= (bit)
1960
1961/* Bits used to implement the multibyte-part of the various character classes
1962   such as [:alnum:] in a charset's range table.  */
1963#define BIT_WORD	0x1
1964#define BIT_LOWER	0x2
1965#define BIT_PUNCT	0x4
1966#define BIT_SPACE	0x8
1967#define BIT_UPPER	0x10
1968#define BIT_MULTIBYTE	0x20
1969
1970/* Set a range START..END to WORK_AREA.
1971   The range is passed through TRANSLATE, so START and END
1972   should be untranslated.  */
1973#define SET_RANGE_TABLE_WORK_AREA(work_area, start, end)		\
1974  do {									\
1975    int tem;								\
1976    tem = set_image_of_range (&work_area, start, end, translate);	\
1977    if (tem > 0)							\
1978      FREE_STACK_RETURN (tem);						\
1979  } while (0)
1980
1981/* Free allocated memory for WORK_AREA.  */
1982#define FREE_RANGE_TABLE_WORK_AREA(work_area)	\
1983  do {						\
1984    if ((work_area).table)			\
1985      free ((work_area).table);			\
1986  } while (0)
1987
1988#define CLEAR_RANGE_TABLE_WORK_USED(work_area) ((work_area).used = 0, (work_area).bits = 0)
1989#define RANGE_TABLE_WORK_USED(work_area) ((work_area).used)
1990#define RANGE_TABLE_WORK_BITS(work_area) ((work_area).bits)
1991#define RANGE_TABLE_WORK_ELT(work_area, i) ((work_area).table[i])
1992
1993
1994/* Set the bit for character C in a list.  */
1995#define SET_LIST_BIT(c) (b[((c)) / BYTEWIDTH] |= 1 << ((c) % BYTEWIDTH))
1996
1997
1998/* Get the next unsigned number in the uncompiled pattern.  */
1999#define GET_UNSIGNED_NUMBER(num)					\
2000  do {									\
2001    if (p == pend)							\
2002      FREE_STACK_RETURN (REG_EBRACE);					\
2003    else								\
2004      {									\
2005	PATFETCH (c);							\
2006	while ('0' <= c && c <= '9')					\
2007	  {								\
2008	    int prev;							\
2009	    if (num < 0)						\
2010	      num = 0;							\
2011	    prev = num;							\
2012	    num = num * 10 + c - '0';					\
2013	    if (num / 10 != prev)					\
2014	      FREE_STACK_RETURN (REG_BADBR);				\
2015	    if (p == pend)						\
2016	      FREE_STACK_RETURN (REG_EBRACE);				\
2017	    PATFETCH (c);						\
2018	  }								\
2019      }									\
2020  } while (0)
2021
2022#if ! WIDE_CHAR_SUPPORT
2023
2024/* Map a string to the char class it names (if any).  */
2025re_wctype_t
2026re_wctype (str)
2027     re_char *str;
2028{
2029  const char *string = str;
2030  if      (STREQ (string, "alnum"))	return RECC_ALNUM;
2031  else if (STREQ (string, "alpha"))	return RECC_ALPHA;
2032  else if (STREQ (string, "word"))	return RECC_WORD;
2033  else if (STREQ (string, "ascii"))	return RECC_ASCII;
2034  else if (STREQ (string, "nonascii"))	return RECC_NONASCII;
2035  else if (STREQ (string, "graph"))	return RECC_GRAPH;
2036  else if (STREQ (string, "lower"))	return RECC_LOWER;
2037  else if (STREQ (string, "print"))	return RECC_PRINT;
2038  else if (STREQ (string, "punct"))	return RECC_PUNCT;
2039  else if (STREQ (string, "space"))	return RECC_SPACE;
2040  else if (STREQ (string, "upper"))	return RECC_UPPER;
2041  else if (STREQ (string, "unibyte"))	return RECC_UNIBYTE;
2042  else if (STREQ (string, "multibyte"))	return RECC_MULTIBYTE;
2043  else if (STREQ (string, "digit"))	return RECC_DIGIT;
2044  else if (STREQ (string, "xdigit"))	return RECC_XDIGIT;
2045  else if (STREQ (string, "cntrl"))	return RECC_CNTRL;
2046  else if (STREQ (string, "blank"))	return RECC_BLANK;
2047  else return 0;
2048}
2049
2050/* True iff CH is in the char class CC.  */
2051boolean
2052re_iswctype (ch, cc)
2053     int ch;
2054     re_wctype_t cc;
2055{
2056  switch (cc)
2057    {
2058    case RECC_ALNUM: return ISALNUM (ch);
2059    case RECC_ALPHA: return ISALPHA (ch);
2060    case RECC_BLANK: return ISBLANK (ch);
2061    case RECC_CNTRL: return ISCNTRL (ch);
2062    case RECC_DIGIT: return ISDIGIT (ch);
2063    case RECC_GRAPH: return ISGRAPH (ch);
2064    case RECC_LOWER: return ISLOWER (ch);
2065    case RECC_PRINT: return ISPRINT (ch);
2066    case RECC_PUNCT: return ISPUNCT (ch);
2067    case RECC_SPACE: return ISSPACE (ch);
2068    case RECC_UPPER: return ISUPPER (ch);
2069    case RECC_XDIGIT: return ISXDIGIT (ch);
2070    case RECC_ASCII: return IS_REAL_ASCII (ch);
2071    case RECC_NONASCII: return !IS_REAL_ASCII (ch);
2072    case RECC_UNIBYTE: return ISUNIBYTE (ch);
2073    case RECC_MULTIBYTE: return !ISUNIBYTE (ch);
2074    case RECC_WORD: return ISWORD (ch);
2075    case RECC_ERROR: return false;
2076    default:
2077      abort();
2078    }
2079}
2080
2081/* Return a bit-pattern to use in the range-table bits to match multibyte
2082   chars of class CC.  */
2083static int
2084re_wctype_to_bit (cc)
2085     re_wctype_t cc;
2086{
2087  switch (cc)
2088    {
2089    case RECC_NONASCII: case RECC_PRINT: case RECC_GRAPH:
2090    case RECC_MULTIBYTE: return BIT_MULTIBYTE;
2091    case RECC_ALPHA: case RECC_ALNUM: case RECC_WORD: return BIT_WORD;
2092    case RECC_LOWER: return BIT_LOWER;
2093    case RECC_UPPER: return BIT_UPPER;
2094    case RECC_PUNCT: return BIT_PUNCT;
2095    case RECC_SPACE: return BIT_SPACE;
2096    case RECC_ASCII: case RECC_DIGIT: case RECC_XDIGIT: case RECC_CNTRL:
2097    case RECC_BLANK: case RECC_UNIBYTE: case RECC_ERROR: return 0;
2098    default:
2099      abort();
2100    }
2101}
2102#endif
2103
2104/* Filling in the work area of a range.  */
2105
2106/* Actually extend the space in WORK_AREA.  */
2107
2108static void
2109extend_range_table_work_area (work_area)
2110     struct range_table_work_area *work_area;
2111{
2112  work_area->allocated += 16 * sizeof (int);
2113  if (work_area->table)
2114    work_area->table
2115      = (int *) realloc (work_area->table, work_area->allocated);
2116  else
2117    work_area->table
2118      = (int *) malloc (work_area->allocated);
2119}
2120
2121#ifdef emacs
2122
2123/* Carefully find the ranges of codes that are equivalent
2124   under case conversion to the range start..end when passed through
2125   TRANSLATE.  Handle the case where non-letters can come in between
2126   two upper-case letters (which happens in Latin-1).
2127   Also handle the case of groups of more than 2 case-equivalent chars.
2128
2129   The basic method is to look at consecutive characters and see
2130   if they can form a run that can be handled as one.
2131
2132   Returns -1 if successful, REG_ESPACE if ran out of space.  */
2133
2134static int
2135set_image_of_range_1 (work_area, start, end, translate)
2136     RE_TRANSLATE_TYPE translate;
2137     struct range_table_work_area *work_area;
2138     re_wchar_t start, end;
2139{
2140  /* `one_case' indicates a character, or a run of characters,
2141     each of which is an isolate (no case-equivalents).
2142     This includes all ASCII non-letters.
2143
2144     `two_case' indicates a character, or a run of characters,
2145     each of which has two case-equivalent forms.
2146     This includes all ASCII letters.
2147
2148     `strange' indicates a character that has more than one
2149     case-equivalent.  */
2150
2151  enum case_type {one_case, two_case, strange};
2152
2153  /* Describe the run that is in progress,
2154     which the next character can try to extend.
2155     If run_type is strange, that means there really is no run.
2156     If run_type is one_case, then run_start...run_end is the run.
2157     If run_type is two_case, then the run is run_start...run_end,
2158     and the case-equivalents end at run_eqv_end.  */
2159
2160  enum case_type run_type = strange;
2161  int run_start, run_end, run_eqv_end;
2162
2163  Lisp_Object eqv_table;
2164
2165  if (!RE_TRANSLATE_P (translate))
2166    {
2167      EXTEND_RANGE_TABLE (work_area, 2);
2168      work_area->table[work_area->used++] = (start);
2169      work_area->table[work_area->used++] = (end);
2170      return -1;
2171    }
2172
2173  eqv_table = XCHAR_TABLE (translate)->extras[2];
2174
2175  for (; start <= end; start++)
2176    {
2177      enum case_type this_type;
2178      int eqv = RE_TRANSLATE (eqv_table, start);
2179      int minchar, maxchar;
2180
2181      /* Classify this character */
2182      if (eqv == start)
2183	this_type = one_case;
2184      else if (RE_TRANSLATE (eqv_table, eqv) == start)
2185	this_type = two_case;
2186      else
2187	this_type = strange;
2188
2189      if (start < eqv)
2190	minchar = start, maxchar = eqv;
2191      else
2192	minchar = eqv, maxchar = start;
2193
2194      /* Can this character extend the run in progress?  */
2195      if (this_type == strange || this_type != run_type
2196	  || !(minchar == run_end + 1
2197	       && (run_type == two_case
2198		   ? maxchar == run_eqv_end + 1 : 1)))
2199	{
2200	  /* No, end the run.
2201	     Record each of its equivalent ranges.  */
2202	  if (run_type == one_case)
2203	    {
2204	      EXTEND_RANGE_TABLE (work_area, 2);
2205	      work_area->table[work_area->used++] = run_start;
2206	      work_area->table[work_area->used++] = run_end;
2207	    }
2208	  else if (run_type == two_case)
2209	    {
2210	      EXTEND_RANGE_TABLE (work_area, 4);
2211	      work_area->table[work_area->used++] = run_start;
2212	      work_area->table[work_area->used++] = run_end;
2213	      work_area->table[work_area->used++]
2214		= RE_TRANSLATE (eqv_table, run_start);
2215	      work_area->table[work_area->used++]
2216		= RE_TRANSLATE (eqv_table, run_end);
2217	    }
2218	  run_type = strange;
2219	}
2220
2221      if (this_type == strange)
2222	{
2223	  /* For a strange character, add each of its equivalents, one
2224	     by one.  Don't start a range.  */
2225	  do
2226	    {
2227	      EXTEND_RANGE_TABLE (work_area, 2);
2228	      work_area->table[work_area->used++] = eqv;
2229	      work_area->table[work_area->used++] = eqv;
2230	      eqv = RE_TRANSLATE (eqv_table, eqv);
2231	    }
2232	  while (eqv != start);
2233	}
2234
2235      /* Add this char to the run, or start a new run.  */
2236      else if (run_type == strange)
2237	{
2238	  /* Initialize a new range.  */
2239	  run_type = this_type;
2240	  run_start = start;
2241	  run_end = start;
2242	  run_eqv_end = RE_TRANSLATE (eqv_table, run_end);
2243	}
2244      else
2245	{
2246	  /* Extend a running range.  */
2247	  run_end = minchar;
2248	  run_eqv_end = RE_TRANSLATE (eqv_table, run_end);
2249	}
2250    }
2251
2252  /* If a run is still in progress at the end, finish it now
2253     by recording its equivalent ranges.  */
2254  if (run_type == one_case)
2255    {
2256      EXTEND_RANGE_TABLE (work_area, 2);
2257      work_area->table[work_area->used++] = run_start;
2258      work_area->table[work_area->used++] = run_end;
2259    }
2260  else if (run_type == two_case)
2261    {
2262      EXTEND_RANGE_TABLE (work_area, 4);
2263      work_area->table[work_area->used++] = run_start;
2264      work_area->table[work_area->used++] = run_end;
2265      work_area->table[work_area->used++]
2266	= RE_TRANSLATE (eqv_table, run_start);
2267      work_area->table[work_area->used++]
2268	= RE_TRANSLATE (eqv_table, run_end);
2269    }
2270
2271  return -1;
2272}
2273
2274#endif /* emacs */
2275
2276/* Record the the image of the range start..end when passed through
2277   TRANSLATE.  This is not necessarily TRANSLATE(start)..TRANSLATE(end)
2278   and is not even necessarily contiguous.
2279   Normally we approximate it with the smallest contiguous range that contains
2280   all the chars we need.  However, for Latin-1 we go to extra effort
2281   to do a better job.
2282
2283   This function is not called for ASCII ranges.
2284
2285   Returns -1 if successful, REG_ESPACE if ran out of space.  */
2286
2287static int
2288set_image_of_range (work_area, start, end, translate)
2289     RE_TRANSLATE_TYPE translate;
2290     struct range_table_work_area *work_area;
2291     re_wchar_t start, end;
2292{
2293  re_wchar_t cmin, cmax;
2294
2295#ifdef emacs
2296  /* For Latin-1 ranges, use set_image_of_range_1
2297     to get proper handling of ranges that include letters and nonletters.
2298     For a range that includes the whole of Latin-1, this is not necessary.
2299     For other character sets, we don't bother to get this right.  */
2300  if (RE_TRANSLATE_P (translate) && start < 04400
2301      && !(start < 04200 && end >= 04377))
2302    {
2303      int newend;
2304      int tem;
2305      newend = end;
2306      if (newend > 04377)
2307	newend = 04377;
2308      tem = set_image_of_range_1 (work_area, start, newend, translate);
2309      if (tem > 0)
2310	return tem;
2311
2312      start = 04400;
2313      if (end < 04400)
2314	return -1;
2315    }
2316#endif
2317
2318  EXTEND_RANGE_TABLE (work_area, 2);
2319  work_area->table[work_area->used++] = (start);
2320  work_area->table[work_area->used++] = (end);
2321
2322  cmin = -1, cmax = -1;
2323
2324  if (RE_TRANSLATE_P (translate))
2325    {
2326      int ch;
2327
2328      for (ch = start; ch <= end; ch++)
2329	{
2330	  re_wchar_t c = TRANSLATE (ch);
2331	  if (! (start <= c && c <= end))
2332	    {
2333	      if (cmin == -1)
2334		cmin = c, cmax = c;
2335	      else
2336		{
2337		  cmin = MIN (cmin, c);
2338		  cmax = MAX (cmax, c);
2339		}
2340	    }
2341	}
2342
2343      if (cmin != -1)
2344	{
2345	  EXTEND_RANGE_TABLE (work_area, 2);
2346	  work_area->table[work_area->used++] = (cmin);
2347	  work_area->table[work_area->used++] = (cmax);
2348	}
2349    }
2350
2351  return -1;
2352}
2353
2354#ifndef MATCH_MAY_ALLOCATE
2355
2356/* If we cannot allocate large objects within re_match_2_internal,
2357   we make the fail stack and register vectors global.
2358   The fail stack, we grow to the maximum size when a regexp
2359   is compiled.
2360   The register vectors, we adjust in size each time we
2361   compile a regexp, according to the number of registers it needs.  */
2362
2363static fail_stack_type fail_stack;
2364
2365/* Size with which the following vectors are currently allocated.
2366   That is so we can make them bigger as needed,
2367   but never make them smaller.  */
2368static int regs_allocated_size;
2369
2370static re_char **     regstart, **     regend;
2371static re_char **best_regstart, **best_regend;
2372
2373/* Make the register vectors big enough for NUM_REGS registers,
2374   but don't make them smaller.  */
2375
2376static
2377regex_grow_registers (num_regs)
2378     int num_regs;
2379{
2380  if (num_regs > regs_allocated_size)
2381    {
2382      RETALLOC_IF (regstart,	 num_regs, re_char *);
2383      RETALLOC_IF (regend,	 num_regs, re_char *);
2384      RETALLOC_IF (best_regstart, num_regs, re_char *);
2385      RETALLOC_IF (best_regend,	 num_regs, re_char *);
2386
2387      regs_allocated_size = num_regs;
2388    }
2389}
2390
2391#endif /* not MATCH_MAY_ALLOCATE */
2392
2393static boolean group_in_compile_stack _RE_ARGS ((compile_stack_type
2394						 compile_stack,
2395						 regnum_t regnum));
2396
2397/* `regex_compile' compiles PATTERN (of length SIZE) according to SYNTAX.
2398   Returns one of error codes defined in `regex.h', or zero for success.
2399
2400   Assumes the `allocated' (and perhaps `buffer') and `translate'
2401   fields are set in BUFP on entry.
2402
2403   If it succeeds, results are put in BUFP (if it returns an error, the
2404   contents of BUFP are undefined):
2405     `buffer' is the compiled pattern;
2406     `syntax' is set to SYNTAX;
2407     `used' is set to the length of the compiled pattern;
2408     `fastmap_accurate' is zero;
2409     `re_nsub' is the number of subexpressions in PATTERN;
2410     `not_bol' and `not_eol' are zero;
2411
2412   The `fastmap' field is neither examined nor set.  */
2413
2414/* Insert the `jump' from the end of last alternative to "here".
2415   The space for the jump has already been allocated. */
2416#define FIXUP_ALT_JUMP()						\
2417do {									\
2418  if (fixup_alt_jump)							\
2419    STORE_JUMP (jump, fixup_alt_jump, b);				\
2420} while (0)
2421
2422
2423/* Return, freeing storage we allocated.  */
2424#define FREE_STACK_RETURN(value)		\
2425  do {							\
2426    FREE_RANGE_TABLE_WORK_AREA (range_table_work);	\
2427    free (compile_stack.stack);				\
2428    return value;					\
2429  } while (0)
2430
2431static reg_errcode_t
2432regex_compile (pattern, size, syntax, bufp)
2433     re_char *pattern;
2434     size_t size;
2435     reg_syntax_t syntax;
2436     struct re_pattern_buffer *bufp;
2437{
2438  /* We fetch characters from PATTERN here.  */
2439  register re_wchar_t c, c1;
2440
2441  /* A random temporary spot in PATTERN.  */
2442  re_char *p1;
2443
2444  /* Points to the end of the buffer, where we should append.  */
2445  register unsigned char *b;
2446
2447  /* Keeps track of unclosed groups.  */
2448  compile_stack_type compile_stack;
2449
2450  /* Points to the current (ending) position in the pattern.  */
2451#ifdef AIX
2452  /* `const' makes AIX compiler fail.  */
2453  unsigned char *p = pattern;
2454#else
2455  re_char *p = pattern;
2456#endif
2457  re_char *pend = pattern + size;
2458
2459  /* How to translate the characters in the pattern.  */
2460  RE_TRANSLATE_TYPE translate = bufp->translate;
2461
2462  /* Address of the count-byte of the most recently inserted `exactn'
2463     command.  This makes it possible to tell if a new exact-match
2464     character can be added to that command or if the character requires
2465     a new `exactn' command.  */
2466  unsigned char *pending_exact = 0;
2467
2468  /* Address of start of the most recently finished expression.
2469     This tells, e.g., postfix * where to find the start of its
2470     operand.  Reset at the beginning of groups and alternatives.  */
2471  unsigned char *laststart = 0;
2472
2473  /* Address of beginning of regexp, or inside of last group.  */
2474  unsigned char *begalt;
2475
2476  /* Place in the uncompiled pattern (i.e., the {) to
2477     which to go back if the interval is invalid.  */
2478  re_char *beg_interval;
2479
2480  /* Address of the place where a forward jump should go to the end of
2481     the containing expression.  Each alternative of an `or' -- except the
2482     last -- ends with a forward jump of this sort.  */
2483  unsigned char *fixup_alt_jump = 0;
2484
2485  /* Counts open-groups as they are encountered.  Remembered for the
2486     matching close-group on the compile stack, so the same register
2487     number is put in the stop_memory as the start_memory.  */
2488  regnum_t regnum = 0;
2489
2490  /* Work area for range table of charset.  */
2491  struct range_table_work_area range_table_work;
2492
2493  /* If the object matched can contain multibyte characters.  */
2494  const boolean multibyte = RE_MULTIBYTE_P (bufp);
2495
2496  /* Nonzero if we have pushed down into a subpattern.  */
2497  int in_subpattern = 0;
2498
2499  /* These hold the values of p, pattern, and pend from the main
2500     pattern when we have pushed into a subpattern.  */
2501  re_char *main_p;
2502  re_char *main_pattern;
2503  re_char *main_pend;
2504
2505#ifdef DEBUG
2506  debug++;
2507  DEBUG_PRINT1 ("\nCompiling pattern: ");
2508  if (debug > 0)
2509    {
2510      unsigned debug_count;
2511
2512      for (debug_count = 0; debug_count < size; debug_count++)
2513	putchar (pattern[debug_count]);
2514      putchar ('\n');
2515    }
2516#endif /* DEBUG */
2517
2518  /* Initialize the compile stack.  */
2519  compile_stack.stack = TALLOC (INIT_COMPILE_STACK_SIZE, compile_stack_elt_t);
2520  if (compile_stack.stack == NULL)
2521    return REG_ESPACE;
2522
2523  compile_stack.size = INIT_COMPILE_STACK_SIZE;
2524  compile_stack.avail = 0;
2525
2526  range_table_work.table = 0;
2527  range_table_work.allocated = 0;
2528
2529  /* Initialize the pattern buffer.  */
2530  bufp->syntax = syntax;
2531  bufp->fastmap_accurate = 0;
2532  bufp->not_bol = bufp->not_eol = 0;
2533  bufp->used_syntax = 0;
2534
2535  /* Set `used' to zero, so that if we return an error, the pattern
2536     printer (for debugging) will think there's no pattern.  We reset it
2537     at the end.  */
2538  bufp->used = 0;
2539
2540  /* Always count groups, whether or not bufp->no_sub is set.  */
2541  bufp->re_nsub = 0;
2542
2543#if !defined emacs && !defined SYNTAX_TABLE
2544  /* Initialize the syntax table.  */
2545   init_syntax_once ();
2546#endif
2547
2548  if (bufp->allocated == 0)
2549    {
2550      if (bufp->buffer)
2551	{ /* If zero allocated, but buffer is non-null, try to realloc
2552	     enough space.  This loses if buffer's address is bogus, but
2553	     that is the user's responsibility.  */
2554	  RETALLOC (bufp->buffer, INIT_BUF_SIZE, unsigned char);
2555	}
2556      else
2557	{ /* Caller did not allocate a buffer.  Do it for them.  */
2558	  bufp->buffer = TALLOC (INIT_BUF_SIZE, unsigned char);
2559	}
2560      if (!bufp->buffer) FREE_STACK_RETURN (REG_ESPACE);
2561
2562      bufp->allocated = INIT_BUF_SIZE;
2563    }
2564
2565  begalt = b = bufp->buffer;
2566
2567  /* Loop through the uncompiled pattern until we're at the end.  */
2568  while (1)
2569    {
2570      if (p == pend)
2571	{
2572	  /* If this is the end of an included regexp,
2573	     pop back to the main regexp and try again.  */
2574	  if (in_subpattern)
2575	    {
2576	      in_subpattern = 0;
2577	      pattern = main_pattern;
2578	      p = main_p;
2579	      pend = main_pend;
2580	      continue;
2581	    }
2582	  /* If this is the end of the main regexp, we are done.  */
2583	  break;
2584	}
2585
2586      PATFETCH (c);
2587
2588      switch (c)
2589	{
2590	case ' ':
2591	  {
2592	    re_char *p1 = p;
2593
2594	    /* If there's no special whitespace regexp, treat
2595	       spaces normally.  And don't try to do this recursively.  */
2596	    if (!whitespace_regexp || in_subpattern)
2597	      goto normal_char;
2598
2599	    /* Peek past following spaces.  */
2600	    while (p1 != pend)
2601	      {
2602		if (*p1 != ' ')
2603		  break;
2604		p1++;
2605	      }
2606	    /* If the spaces are followed by a repetition op,
2607	       treat them normally.  */
2608	    if (p1 != pend
2609		&& (*p1 == '*' || *p1 == '+' || *p1 == '?'
2610		    || (*p1 == '\\' && p1 + 1 != pend && p1[1] == '{')))
2611	      goto normal_char;
2612
2613	    /* Replace the spaces with the whitespace regexp.  */
2614	    in_subpattern = 1;
2615	    main_p = p1;
2616	    main_pend = pend;
2617	    main_pattern = pattern;
2618	    p = pattern = whitespace_regexp;
2619	    pend = p + strlen (p);
2620	    break;
2621	  }
2622
2623	case '^':
2624	  {
2625	    if (   /* If at start of pattern, it's an operator.  */
2626		   p == pattern + 1
2627		   /* If context independent, it's an operator.  */
2628		|| syntax & RE_CONTEXT_INDEP_ANCHORS
2629		   /* Otherwise, depends on what's come before.  */
2630		|| at_begline_loc_p (pattern, p, syntax))
2631	      BUF_PUSH ((syntax & RE_NO_NEWLINE_ANCHOR) ? begbuf : begline);
2632	    else
2633	      goto normal_char;
2634	  }
2635	  break;
2636
2637
2638	case '$':
2639	  {
2640	    if (   /* If at end of pattern, it's an operator.  */
2641		   p == pend
2642		   /* If context independent, it's an operator.  */
2643		|| syntax & RE_CONTEXT_INDEP_ANCHORS
2644		   /* Otherwise, depends on what's next.  */
2645		|| at_endline_loc_p (p, pend, syntax))
2646	       BUF_PUSH ((syntax & RE_NO_NEWLINE_ANCHOR) ? endbuf : endline);
2647	     else
2648	       goto normal_char;
2649	   }
2650	   break;
2651
2652
2653	case '+':
2654	case '?':
2655	  if ((syntax & RE_BK_PLUS_QM)
2656	      || (syntax & RE_LIMITED_OPS))
2657	    goto normal_char;
2658	handle_plus:
2659	case '*':
2660	  /* If there is no previous pattern... */
2661	  if (!laststart)
2662	    {
2663	      if (syntax & RE_CONTEXT_INVALID_OPS)
2664		FREE_STACK_RETURN (REG_BADRPT);
2665	      else if (!(syntax & RE_CONTEXT_INDEP_OPS))
2666		goto normal_char;
2667	    }
2668
2669	  {
2670	    /* 1 means zero (many) matches is allowed.  */
2671	    boolean zero_times_ok = 0, many_times_ok = 0;
2672	    boolean greedy = 1;
2673
2674	    /* If there is a sequence of repetition chars, collapse it
2675	       down to just one (the right one).  We can't combine
2676	       interval operators with these because of, e.g., `a{2}*',
2677	       which should only match an even number of `a's.  */
2678
2679	    for (;;)
2680	      {
2681		if ((syntax & RE_FRUGAL)
2682		    && c == '?' && (zero_times_ok || many_times_ok))
2683		  greedy = 0;
2684		else
2685		  {
2686		    zero_times_ok |= c != '+';
2687		    many_times_ok |= c != '?';
2688		  }
2689
2690		if (p == pend)
2691		  break;
2692		else if (*p == '*'
2693			 || (!(syntax & RE_BK_PLUS_QM)
2694			     && (*p == '+' || *p == '?')))
2695		  ;
2696		else if (syntax & RE_BK_PLUS_QM	 && *p == '\\')
2697		  {
2698		    if (p+1 == pend)
2699		      FREE_STACK_RETURN (REG_EESCAPE);
2700		    if (p[1] == '+' || p[1] == '?')
2701		      PATFETCH (c); /* Gobble up the backslash.  */
2702		    else
2703		      break;
2704		  }
2705		else
2706		  break;
2707		/* If we get here, we found another repeat character.  */
2708		PATFETCH (c);
2709	       }
2710
2711	    /* Star, etc. applied to an empty pattern is equivalent
2712	       to an empty pattern.  */
2713	    if (!laststart || laststart == b)
2714	      break;
2715
2716	    /* Now we know whether or not zero matches is allowed
2717	       and also whether or not two or more matches is allowed.  */
2718	    if (greedy)
2719	      {
2720		if (many_times_ok)
2721		  {
2722		    boolean simple = skip_one_char (laststart) == b;
2723		    unsigned int startoffset = 0;
2724		    re_opcode_t ofj =
2725		      /* Check if the loop can match the empty string.  */
2726		      (simple || !analyse_first (laststart, b, NULL, 0))
2727		      ? on_failure_jump : on_failure_jump_loop;
2728		    assert (skip_one_char (laststart) <= b);
2729
2730		    if (!zero_times_ok && simple)
2731		      { /* Since simple * loops can be made faster by using
2732		    	   on_failure_keep_string_jump, we turn simple P+
2733		    	   into PP* if P is simple.  */
2734		    	unsigned char *p1, *p2;
2735		    	startoffset = b - laststart;
2736		    	GET_BUFFER_SPACE (startoffset);
2737		    	p1 = b; p2 = laststart;
2738		    	while (p2 < p1)
2739		    	  *b++ = *p2++;
2740		    	zero_times_ok = 1;
2741		      }
2742
2743		    GET_BUFFER_SPACE (6);
2744		    if (!zero_times_ok)
2745		      /* A + loop.  */
2746		      STORE_JUMP (ofj, b, b + 6);
2747		    else
2748		      /* Simple * loops can use on_failure_keep_string_jump
2749			 depending on what follows.  But since we don't know
2750			 that yet, we leave the decision up to
2751			 on_failure_jump_smart.  */
2752		      INSERT_JUMP (simple ? on_failure_jump_smart : ofj,
2753				   laststart + startoffset, b + 6);
2754		    b += 3;
2755		    STORE_JUMP (jump, b, laststart + startoffset);
2756		    b += 3;
2757		  }
2758		else
2759		  {
2760		    /* A simple ? pattern.  */
2761		    assert (zero_times_ok);
2762		    GET_BUFFER_SPACE (3);
2763		    INSERT_JUMP (on_failure_jump, laststart, b + 3);
2764		    b += 3;
2765		  }
2766	      }
2767	    else		/* not greedy */
2768	      { /* I wish the greedy and non-greedy cases could be merged. */
2769
2770		GET_BUFFER_SPACE (7); /* We might use less.  */
2771		if (many_times_ok)
2772		  {
2773		    boolean emptyp = analyse_first (laststart, b, NULL, 0);
2774
2775		    /* The non-greedy multiple match looks like
2776		       a repeat..until: we only need a conditional jump
2777		       at the end of the loop.  */
2778		    if (emptyp) BUF_PUSH (no_op);
2779		    STORE_JUMP (emptyp ? on_failure_jump_nastyloop
2780				: on_failure_jump, b, laststart);
2781		    b += 3;
2782		    if (zero_times_ok)
2783		      {
2784			/* The repeat...until naturally matches one or more.
2785			   To also match zero times, we need to first jump to
2786			   the end of the loop (its conditional jump).  */
2787			INSERT_JUMP (jump, laststart, b);
2788			b += 3;
2789		      }
2790		  }
2791		else
2792		  {
2793		    /* non-greedy a?? */
2794		    INSERT_JUMP (jump, laststart, b + 3);
2795		    b += 3;
2796		    INSERT_JUMP (on_failure_jump, laststart, laststart + 6);
2797		    b += 3;
2798		  }
2799	      }
2800	  }
2801	  pending_exact = 0;
2802	  break;
2803
2804
2805	case '.':
2806	  laststart = b;
2807	  BUF_PUSH (anychar);
2808	  break;
2809
2810
2811	case '[':
2812	  {
2813	    CLEAR_RANGE_TABLE_WORK_USED (range_table_work);
2814
2815	    if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
2816
2817	    /* Ensure that we have enough space to push a charset: the
2818	       opcode, the length count, and the bitset; 34 bytes in all.  */
2819	    GET_BUFFER_SPACE (34);
2820
2821	    laststart = b;
2822
2823	    /* We test `*p == '^' twice, instead of using an if
2824	       statement, so we only need one BUF_PUSH.  */
2825	    BUF_PUSH (*p == '^' ? charset_not : charset);
2826	    if (*p == '^')
2827	      p++;
2828
2829	    /* Remember the first position in the bracket expression.  */
2830	    p1 = p;
2831
2832	    /* Push the number of bytes in the bitmap.  */
2833	    BUF_PUSH ((1 << BYTEWIDTH) / BYTEWIDTH);
2834
2835	    /* Clear the whole map.  */
2836	    bzero (b, (1 << BYTEWIDTH) / BYTEWIDTH);
2837
2838	    /* charset_not matches newline according to a syntax bit.  */
2839	    if ((re_opcode_t) b[-2] == charset_not
2840		&& (syntax & RE_HAT_LISTS_NOT_NEWLINE))
2841	      SET_LIST_BIT ('\n');
2842
2843	    /* Read in characters and ranges, setting map bits.  */
2844	    for (;;)
2845	      {
2846		boolean escaped_char = false;
2847		const unsigned char *p2 = p;
2848
2849		if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
2850
2851		/* Don't translate yet.  The range TRANSLATE(X..Y) cannot
2852		   always be determined from TRANSLATE(X) and TRANSLATE(Y)
2853		   So the translation is done later in a loop.  Example:
2854		   (let ((case-fold-search t)) (string-match "[A-_]" "A"))  */
2855		PATFETCH (c);
2856
2857		/* \ might escape characters inside [...] and [^...].  */
2858		if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\')
2859		  {
2860		    if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
2861
2862		    PATFETCH (c);
2863		    escaped_char = true;
2864		  }
2865		else
2866		  {
2867		    /* Could be the end of the bracket expression.  If it's
2868		       not (i.e., when the bracket expression is `[]' so
2869		       far), the ']' character bit gets set way below.  */
2870		    if (c == ']' && p2 != p1)
2871		      break;
2872		  }
2873
2874		/* What should we do for the character which is
2875		   greater than 0x7F, but not BASE_LEADING_CODE_P?
2876		   XXX */
2877
2878		/* See if we're at the beginning of a possible character
2879		   class.  */
2880
2881		if (!escaped_char &&
2882		    syntax & RE_CHAR_CLASSES && c == '[' && *p == ':')
2883		  {
2884		    /* Leave room for the null.  */
2885		    unsigned char str[CHAR_CLASS_MAX_LENGTH + 1];
2886		    const unsigned char *class_beg;
2887
2888		    PATFETCH (c);
2889		    c1 = 0;
2890		    class_beg = p;
2891
2892		    /* If pattern is `[[:'.  */
2893		    if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
2894
2895		    for (;;)
2896		      {
2897		        PATFETCH (c);
2898		        if ((c == ':' && *p == ']') || p == pend)
2899		          break;
2900			if (c1 < CHAR_CLASS_MAX_LENGTH)
2901			  str[c1++] = c;
2902			else
2903			  /* This is in any case an invalid class name.  */
2904			  str[0] = '\0';
2905		      }
2906		    str[c1] = '\0';
2907
2908		    /* If isn't a word bracketed by `[:' and `:]':
2909		       undo the ending character, the letters, and
2910		       leave the leading `:' and `[' (but set bits for
2911		       them).  */
2912		    if (c == ':' && *p == ']')
2913		      {
2914			re_wchar_t ch;
2915			re_wctype_t cc;
2916
2917			cc = re_wctype (str);
2918
2919			if (cc == 0)
2920			  FREE_STACK_RETURN (REG_ECTYPE);
2921
2922                        /* Throw away the ] at the end of the character
2923                           class.  */
2924                        PATFETCH (c);
2925
2926                        if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
2927
2928			/* Most character classes in a multibyte match
2929			   just set a flag.  Exceptions are is_blank,
2930			   is_digit, is_cntrl, and is_xdigit, since
2931			   they can only match ASCII characters.  We
2932			   don't need to handle them for multibyte.
2933			   They are distinguished by a negative wctype.  */
2934
2935			if (multibyte)
2936			  SET_RANGE_TABLE_WORK_AREA_BIT (range_table_work,
2937							 re_wctype_to_bit (cc));
2938
2939                        for (ch = 0; ch < 1 << BYTEWIDTH; ++ch)
2940			  {
2941			    int translated = TRANSLATE (ch);
2942			    if (translated < (1 << BYTEWIDTH)
2943				&& re_iswctype (btowc (ch), cc))
2944			      SET_LIST_BIT (translated);
2945			  }
2946
2947			/* In most cases the matching rule for char classes
2948			   only uses the syntax table for multibyte chars,
2949			   so that the content of the syntax-table it is not
2950			   hardcoded in the range_table.  SPACE and WORD are
2951			   the two exceptions.  */
2952			if ((1 << cc) & ((1 << RECC_SPACE) | (1 << RECC_WORD)))
2953			  bufp->used_syntax = 1;
2954
2955			/* Repeat the loop. */
2956			continue;
2957		      }
2958		    else
2959		      {
2960			/* Go back to right after the "[:".  */
2961			p = class_beg;
2962			SET_LIST_BIT ('[');
2963
2964			/* Because the `:' may starts the range, we
2965			   can't simply set bit and repeat the loop.
2966			   Instead, just set it to C and handle below.  */
2967			c = ':';
2968		      }
2969		  }
2970
2971		if (p < pend && p[0] == '-' && p[1] != ']')
2972		  {
2973
2974		    /* Discard the `-'. */
2975		    PATFETCH (c1);
2976
2977		    /* Fetch the character which ends the range. */
2978		    PATFETCH (c1);
2979
2980		    if (SINGLE_BYTE_CHAR_P (c))
2981		      {
2982			if (! SINGLE_BYTE_CHAR_P (c1))
2983			  {
2984			    /* Handle a range starting with a
2985			       character of less than 256, and ending
2986			       with a character of not less than 256.
2987			       Split that into two ranges, the low one
2988			       ending at 0377, and the high one
2989			       starting at the smallest character in
2990			       the charset of C1 and ending at C1.  */
2991			    int charset = CHAR_CHARSET (c1);
2992			    re_wchar_t c2 = MAKE_CHAR (charset, 0, 0);
2993
2994			    SET_RANGE_TABLE_WORK_AREA (range_table_work,
2995						       c2, c1);
2996			    c1 = 0377;
2997			  }
2998		      }
2999		    else if (!SAME_CHARSET_P (c, c1))
3000		      FREE_STACK_RETURN (REG_ERANGEX);
3001		  }
3002		else
3003		  /* Range from C to C. */
3004		  c1 = c;
3005
3006		/* Set the range ... */
3007		if (SINGLE_BYTE_CHAR_P (c))
3008		  /* ... into bitmap.  */
3009		  {
3010		    re_wchar_t this_char;
3011		    re_wchar_t range_start = c, range_end = c1;
3012
3013		    /* If the start is after the end, the range is empty.  */
3014		    if (range_start > range_end)
3015		      {
3016			if (syntax & RE_NO_EMPTY_RANGES)
3017			  FREE_STACK_RETURN (REG_ERANGE);
3018			/* Else, repeat the loop.  */
3019		      }
3020		    else
3021		      {
3022			for (this_char = range_start; this_char <= range_end;
3023			     this_char++)
3024			  {
3025			    int translated = TRANSLATE (this_char);
3026			    if (translated < (1 << BYTEWIDTH))
3027			      SET_LIST_BIT (translated);
3028			    else
3029			      SET_RANGE_TABLE_WORK_AREA
3030				(range_table_work, translated, translated);
3031			  }
3032		      }
3033		  }
3034		else
3035		  /* ... into range table.  */
3036		  SET_RANGE_TABLE_WORK_AREA (range_table_work, c, c1);
3037	      }
3038
3039	    /* Discard any (non)matching list bytes that are all 0 at the
3040	       end of the map.  Decrease the map-length byte too.  */
3041	    while ((int) b[-1] > 0 && b[b[-1] - 1] == 0)
3042	      b[-1]--;
3043	    b += b[-1];
3044
3045	    /* Build real range table from work area.  */
3046	    if (RANGE_TABLE_WORK_USED (range_table_work)
3047		|| RANGE_TABLE_WORK_BITS (range_table_work))
3048	      {
3049		int i;
3050		int used = RANGE_TABLE_WORK_USED (range_table_work);
3051
3052		/* Allocate space for COUNT + RANGE_TABLE.  Needs two
3053		   bytes for flags, two for COUNT, and three bytes for
3054		   each character. */
3055		GET_BUFFER_SPACE (4 + used * 3);
3056
3057		/* Indicate the existence of range table.  */
3058		laststart[1] |= 0x80;
3059
3060		/* Store the character class flag bits into the range table.
3061		   If not in emacs, these flag bits are always 0.  */
3062		*b++ = RANGE_TABLE_WORK_BITS (range_table_work) & 0xff;
3063		*b++ = RANGE_TABLE_WORK_BITS (range_table_work) >> 8;
3064
3065		STORE_NUMBER_AND_INCR (b, used / 2);
3066		for (i = 0; i < used; i++)
3067		  STORE_CHARACTER_AND_INCR
3068		    (b, RANGE_TABLE_WORK_ELT (range_table_work, i));
3069	      }
3070	  }
3071	  break;
3072
3073
3074	case '(':
3075	  if (syntax & RE_NO_BK_PARENS)
3076	    goto handle_open;
3077	  else
3078	    goto normal_char;
3079
3080
3081	case ')':
3082	  if (syntax & RE_NO_BK_PARENS)
3083	    goto handle_close;
3084	  else
3085	    goto normal_char;
3086
3087
3088	case '\n':
3089	  if (syntax & RE_NEWLINE_ALT)
3090	    goto handle_alt;
3091	  else
3092	    goto normal_char;
3093
3094
3095	case '|':
3096	  if (syntax & RE_NO_BK_VBAR)
3097	    goto handle_alt;
3098	  else
3099	    goto normal_char;
3100
3101
3102	case '{':
3103	   if (syntax & RE_INTERVALS && syntax & RE_NO_BK_BRACES)
3104	     goto handle_interval;
3105	   else
3106	     goto normal_char;
3107
3108
3109	case '\\':
3110	  if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
3111
3112	  /* Do not translate the character after the \, so that we can
3113	     distinguish, e.g., \B from \b, even if we normally would
3114	     translate, e.g., B to b.  */
3115	  PATFETCH (c);
3116
3117	  switch (c)
3118	    {
3119	    case '(':
3120	      if (syntax & RE_NO_BK_PARENS)
3121		goto normal_backslash;
3122
3123	    handle_open:
3124	      {
3125		int shy = 0;
3126		if (p+1 < pend)
3127		  {
3128		    /* Look for a special (?...) construct */
3129		    if ((syntax & RE_SHY_GROUPS) && *p == '?')
3130		      {
3131			PATFETCH (c); /* Gobble up the '?'.  */
3132			PATFETCH (c);
3133			switch (c)
3134			  {
3135			  case ':': shy = 1; break;
3136			  default:
3137			    /* Only (?:...) is supported right now. */
3138			    FREE_STACK_RETURN (REG_BADPAT);
3139			  }
3140		      }
3141		  }
3142
3143		if (!shy)
3144		  {
3145		    bufp->re_nsub++;
3146		    regnum++;
3147		  }
3148
3149		if (COMPILE_STACK_FULL)
3150		  {
3151		    RETALLOC (compile_stack.stack, compile_stack.size << 1,
3152			      compile_stack_elt_t);
3153		    if (compile_stack.stack == NULL) return REG_ESPACE;
3154
3155		    compile_stack.size <<= 1;
3156		  }
3157
3158		/* These are the values to restore when we hit end of this
3159		   group.  They are all relative offsets, so that if the
3160		   whole pattern moves because of realloc, they will still
3161		   be valid.  */
3162		COMPILE_STACK_TOP.begalt_offset = begalt - bufp->buffer;
3163		COMPILE_STACK_TOP.fixup_alt_jump
3164		  = fixup_alt_jump ? fixup_alt_jump - bufp->buffer + 1 : 0;
3165		COMPILE_STACK_TOP.laststart_offset = b - bufp->buffer;
3166		COMPILE_STACK_TOP.regnum = shy ? -regnum : regnum;
3167
3168		/* Do not push a
3169		   start_memory for groups beyond the last one we can
3170		   represent in the compiled pattern.  */
3171		if (regnum <= MAX_REGNUM && !shy)
3172		  BUF_PUSH_2 (start_memory, regnum);
3173
3174		compile_stack.avail++;
3175
3176		fixup_alt_jump = 0;
3177		laststart = 0;
3178		begalt = b;
3179		/* If we've reached MAX_REGNUM groups, then this open
3180		   won't actually generate any code, so we'll have to
3181		   clear pending_exact explicitly.  */
3182		pending_exact = 0;
3183		break;
3184	      }
3185
3186	    case ')':
3187	      if (syntax & RE_NO_BK_PARENS) goto normal_backslash;
3188
3189	      if (COMPILE_STACK_EMPTY)
3190		{
3191		  if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
3192		    goto normal_backslash;
3193		  else
3194		    FREE_STACK_RETURN (REG_ERPAREN);
3195		}
3196
3197	    handle_close:
3198	      FIXUP_ALT_JUMP ();
3199
3200	      /* See similar code for backslashed left paren above.  */
3201	      if (COMPILE_STACK_EMPTY)
3202		{
3203		  if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
3204		    goto normal_char;
3205		  else
3206		    FREE_STACK_RETURN (REG_ERPAREN);
3207		}
3208
3209	      /* Since we just checked for an empty stack above, this
3210		 ``can't happen''.  */
3211	      assert (compile_stack.avail != 0);
3212	      {
3213		/* We don't just want to restore into `regnum', because
3214		   later groups should continue to be numbered higher,
3215		   as in `(ab)c(de)' -- the second group is #2.  */
3216		regnum_t this_group_regnum;
3217
3218		compile_stack.avail--;
3219		begalt = bufp->buffer + COMPILE_STACK_TOP.begalt_offset;
3220		fixup_alt_jump
3221		  = COMPILE_STACK_TOP.fixup_alt_jump
3222		    ? bufp->buffer + COMPILE_STACK_TOP.fixup_alt_jump - 1
3223		    : 0;
3224		laststart = bufp->buffer + COMPILE_STACK_TOP.laststart_offset;
3225		this_group_regnum = COMPILE_STACK_TOP.regnum;
3226		/* If we've reached MAX_REGNUM groups, then this open
3227		   won't actually generate any code, so we'll have to
3228		   clear pending_exact explicitly.  */
3229		pending_exact = 0;
3230
3231		/* We're at the end of the group, so now we know how many
3232		   groups were inside this one.  */
3233		if (this_group_regnum <= MAX_REGNUM && this_group_regnum > 0)
3234		  BUF_PUSH_2 (stop_memory, this_group_regnum);
3235	      }
3236	      break;
3237
3238
3239	    case '|':					/* `\|'.  */
3240	      if (syntax & RE_LIMITED_OPS || syntax & RE_NO_BK_VBAR)
3241		goto normal_backslash;
3242	    handle_alt:
3243	      if (syntax & RE_LIMITED_OPS)
3244		goto normal_char;
3245
3246	      /* Insert before the previous alternative a jump which
3247		 jumps to this alternative if the former fails.  */
3248	      GET_BUFFER_SPACE (3);
3249	      INSERT_JUMP (on_failure_jump, begalt, b + 6);
3250	      pending_exact = 0;
3251	      b += 3;
3252
3253	      /* The alternative before this one has a jump after it
3254		 which gets executed if it gets matched.  Adjust that
3255		 jump so it will jump to this alternative's analogous
3256		 jump (put in below, which in turn will jump to the next
3257		 (if any) alternative's such jump, etc.).  The last such
3258		 jump jumps to the correct final destination.  A picture:
3259			  _____ _____
3260			  |   | |   |
3261			  |   v |   v
3262			 a | b	 | c
3263
3264		 If we are at `b', then fixup_alt_jump right now points to a
3265		 three-byte space after `a'.  We'll put in the jump, set
3266		 fixup_alt_jump to right after `b', and leave behind three
3267		 bytes which we'll fill in when we get to after `c'.  */
3268
3269	      FIXUP_ALT_JUMP ();
3270
3271	      /* Mark and leave space for a jump after this alternative,
3272		 to be filled in later either by next alternative or
3273		 when know we're at the end of a series of alternatives.  */
3274	      fixup_alt_jump = b;
3275	      GET_BUFFER_SPACE (3);
3276	      b += 3;
3277
3278	      laststart = 0;
3279	      begalt = b;
3280	      break;
3281
3282
3283	    case '{':
3284	      /* If \{ is a literal.  */
3285	      if (!(syntax & RE_INTERVALS)
3286		     /* If we're at `\{' and it's not the open-interval
3287			operator.  */
3288		  || (syntax & RE_NO_BK_BRACES))
3289		goto normal_backslash;
3290
3291	    handle_interval:
3292	      {
3293		/* If got here, then the syntax allows intervals.  */
3294
3295		/* At least (most) this many matches must be made.  */
3296		int lower_bound = 0, upper_bound = -1;
3297
3298		beg_interval = p;
3299
3300		GET_UNSIGNED_NUMBER (lower_bound);
3301
3302		if (c == ',')
3303		  GET_UNSIGNED_NUMBER (upper_bound);
3304		else
3305		  /* Interval such as `{1}' => match exactly once. */
3306		  upper_bound = lower_bound;
3307
3308		if (lower_bound < 0 || upper_bound > RE_DUP_MAX
3309		    || (upper_bound >= 0 && lower_bound > upper_bound))
3310		  FREE_STACK_RETURN (REG_BADBR);
3311
3312		if (!(syntax & RE_NO_BK_BRACES))
3313		  {
3314		    if (c != '\\')
3315		      FREE_STACK_RETURN (REG_BADBR);
3316		    if (p == pend)
3317		      FREE_STACK_RETURN (REG_EESCAPE);
3318		    PATFETCH (c);
3319		  }
3320
3321		if (c != '}')
3322		  FREE_STACK_RETURN (REG_BADBR);
3323
3324		/* We just parsed a valid interval.  */
3325
3326		/* If it's invalid to have no preceding re.  */
3327		if (!laststart)
3328		  {
3329		    if (syntax & RE_CONTEXT_INVALID_OPS)
3330		      FREE_STACK_RETURN (REG_BADRPT);
3331		    else if (syntax & RE_CONTEXT_INDEP_OPS)
3332		      laststart = b;
3333		    else
3334		      goto unfetch_interval;
3335		  }
3336
3337		if (upper_bound == 0)
3338		  /* If the upper bound is zero, just drop the sub pattern
3339		     altogether.  */
3340		  b = laststart;
3341		else if (lower_bound == 1 && upper_bound == 1)
3342		  /* Just match it once: nothing to do here.  */
3343		  ;
3344
3345		/* Otherwise, we have a nontrivial interval.  When
3346		   we're all done, the pattern will look like:
3347		   set_number_at <jump count> <upper bound>
3348		   set_number_at <succeed_n count> <lower bound>
3349		   succeed_n <after jump addr> <succeed_n count>
3350		   <body of loop>
3351		   jump_n <succeed_n addr> <jump count>
3352		   (The upper bound and `jump_n' are omitted if
3353		   `upper_bound' is 1, though.)  */
3354		else
3355		  { /* If the upper bound is > 1, we need to insert
3356		       more at the end of the loop.  */
3357		    unsigned int nbytes = (upper_bound < 0 ? 3
3358					   : upper_bound > 1 ? 5 : 0);
3359		    unsigned int startoffset = 0;
3360
3361		    GET_BUFFER_SPACE (20); /* We might use less.  */
3362
3363		    if (lower_bound == 0)
3364		      {
3365			/* A succeed_n that starts with 0 is really a
3366			   a simple on_failure_jump_loop.  */
3367			INSERT_JUMP (on_failure_jump_loop, laststart,
3368				     b + 3 + nbytes);
3369			b += 3;
3370		      }
3371		    else
3372		      {
3373			/* Initialize lower bound of the `succeed_n', even
3374			   though it will be set during matching by its
3375			   attendant `set_number_at' (inserted next),
3376			   because `re_compile_fastmap' needs to know.
3377			   Jump to the `jump_n' we might insert below.  */
3378			INSERT_JUMP2 (succeed_n, laststart,
3379				      b + 5 + nbytes,
3380				      lower_bound);
3381			b += 5;
3382
3383			/* Code to initialize the lower bound.  Insert
3384			   before the `succeed_n'.  The `5' is the last two
3385			   bytes of this `set_number_at', plus 3 bytes of
3386			   the following `succeed_n'.  */
3387			insert_op2 (set_number_at, laststart, 5, lower_bound, b);
3388			b += 5;
3389			startoffset += 5;
3390		      }
3391
3392		    if (upper_bound < 0)
3393		      {
3394			/* A negative upper bound stands for infinity,
3395			   in which case it degenerates to a plain jump.  */
3396			STORE_JUMP (jump, b, laststart + startoffset);
3397			b += 3;
3398		      }
3399		    else if (upper_bound > 1)
3400		      { /* More than one repetition is allowed, so
3401			   append a backward jump to the `succeed_n'
3402			   that starts this interval.
3403
3404			   When we've reached this during matching,
3405			   we'll have matched the interval once, so
3406			   jump back only `upper_bound - 1' times.  */
3407			STORE_JUMP2 (jump_n, b, laststart + startoffset,
3408				     upper_bound - 1);
3409			b += 5;
3410
3411			/* The location we want to set is the second
3412			   parameter of the `jump_n'; that is `b-2' as
3413			   an absolute address.  `laststart' will be
3414			   the `set_number_at' we're about to insert;
3415			   `laststart+3' the number to set, the source
3416			   for the relative address.  But we are
3417			   inserting into the middle of the pattern --
3418			   so everything is getting moved up by 5.
3419			   Conclusion: (b - 2) - (laststart + 3) + 5,
3420			   i.e., b - laststart.
3421
3422			   We insert this at the beginning of the loop
3423			   so that if we fail during matching, we'll
3424			   reinitialize the bounds.  */
3425			insert_op2 (set_number_at, laststart, b - laststart,
3426				    upper_bound - 1, b);
3427			b += 5;
3428		      }
3429		  }
3430		pending_exact = 0;
3431		beg_interval = NULL;
3432	      }
3433	      break;
3434
3435	    unfetch_interval:
3436	      /* If an invalid interval, match the characters as literals.  */
3437	       assert (beg_interval);
3438	       p = beg_interval;
3439	       beg_interval = NULL;
3440
3441	       /* normal_char and normal_backslash need `c'.  */
3442	       c = '{';
3443
3444	       if (!(syntax & RE_NO_BK_BRACES))
3445		 {
3446		   assert (p > pattern && p[-1] == '\\');
3447		   goto normal_backslash;
3448		 }
3449	       else
3450		 goto normal_char;
3451
3452#ifdef emacs
3453	    /* There is no way to specify the before_dot and after_dot
3454	       operators.  rms says this is ok.  --karl  */
3455	    case '=':
3456	      BUF_PUSH (at_dot);
3457	      break;
3458
3459	    case 's':
3460	      laststart = b;
3461	      PATFETCH (c);
3462	      BUF_PUSH_2 (syntaxspec, syntax_spec_code[c]);
3463	      break;
3464
3465	    case 'S':
3466	      laststart = b;
3467	      PATFETCH (c);
3468	      BUF_PUSH_2 (notsyntaxspec, syntax_spec_code[c]);
3469	      break;
3470
3471	    case 'c':
3472	      laststart = b;
3473	      PATFETCH (c);
3474	      BUF_PUSH_2 (categoryspec, c);
3475	      break;
3476
3477	    case 'C':
3478	      laststart = b;
3479	      PATFETCH (c);
3480	      BUF_PUSH_2 (notcategoryspec, c);
3481	      break;
3482#endif /* emacs */
3483
3484
3485	    case 'w':
3486	      if (syntax & RE_NO_GNU_OPS)
3487		goto normal_char;
3488	      laststart = b;
3489	      BUF_PUSH_2 (syntaxspec, Sword);
3490	      break;
3491
3492
3493	    case 'W':
3494	      if (syntax & RE_NO_GNU_OPS)
3495		goto normal_char;
3496	      laststart = b;
3497	      BUF_PUSH_2 (notsyntaxspec, Sword);
3498	      break;
3499
3500
3501	    case '<':
3502	      if (syntax & RE_NO_GNU_OPS)
3503		goto normal_char;
3504	      BUF_PUSH (wordbeg);
3505	      break;
3506
3507	    case '>':
3508	      if (syntax & RE_NO_GNU_OPS)
3509		goto normal_char;
3510	      BUF_PUSH (wordend);
3511	      break;
3512
3513	    case '_':
3514	      if (syntax & RE_NO_GNU_OPS)
3515		goto normal_char;
3516              laststart = b;
3517              PATFETCH (c);
3518              if (c == '<')
3519                BUF_PUSH (symbeg);
3520              else if (c == '>')
3521                BUF_PUSH (symend);
3522              else
3523                FREE_STACK_RETURN (REG_BADPAT);
3524              break;
3525
3526	    case 'b':
3527	      if (syntax & RE_NO_GNU_OPS)
3528		goto normal_char;
3529	      BUF_PUSH (wordbound);
3530	      break;
3531
3532	    case 'B':
3533	      if (syntax & RE_NO_GNU_OPS)
3534		goto normal_char;
3535	      BUF_PUSH (notwordbound);
3536	      break;
3537
3538	    case '`':
3539	      if (syntax & RE_NO_GNU_OPS)
3540		goto normal_char;
3541	      BUF_PUSH (begbuf);
3542	      break;
3543
3544	    case '\'':
3545	      if (syntax & RE_NO_GNU_OPS)
3546		goto normal_char;
3547	      BUF_PUSH (endbuf);
3548	      break;
3549
3550	    case '1': case '2': case '3': case '4': case '5':
3551	    case '6': case '7': case '8': case '9':
3552	      {
3553		regnum_t reg;
3554
3555		if (syntax & RE_NO_BK_REFS)
3556		  goto normal_backslash;
3557
3558		reg = c - '0';
3559
3560		/* Can't back reference to a subexpression before its end.  */
3561		if (reg > regnum || group_in_compile_stack (compile_stack, reg))
3562		  FREE_STACK_RETURN (REG_ESUBREG);
3563
3564		laststart = b;
3565		BUF_PUSH_2 (duplicate, reg);
3566	      }
3567	      break;
3568
3569
3570	    case '+':
3571	    case '?':
3572	      if (syntax & RE_BK_PLUS_QM)
3573		goto handle_plus;
3574	      else
3575		goto normal_backslash;
3576
3577	    default:
3578	    normal_backslash:
3579	      /* You might think it would be useful for \ to mean
3580		 not to translate; but if we don't translate it
3581		 it will never match anything.  */
3582	      goto normal_char;
3583	    }
3584	  break;
3585
3586
3587	default:
3588	/* Expects the character in `c'.  */
3589	normal_char:
3590	  /* If no exactn currently being built.  */
3591	  if (!pending_exact
3592
3593	      /* If last exactn not at current position.  */
3594	      || pending_exact + *pending_exact + 1 != b
3595
3596	      /* We have only one byte following the exactn for the count.  */
3597	      || *pending_exact >= (1 << BYTEWIDTH) - MAX_MULTIBYTE_LENGTH
3598
3599	      /* If followed by a repetition operator.  */
3600	      || (p != pend && (*p == '*' || *p == '^'))
3601	      || ((syntax & RE_BK_PLUS_QM)
3602		  ? p + 1 < pend && *p == '\\' && (p[1] == '+' || p[1] == '?')
3603		  : p != pend && (*p == '+' || *p == '?'))
3604	      || ((syntax & RE_INTERVALS)
3605		  && ((syntax & RE_NO_BK_BRACES)
3606		      ? p != pend && *p == '{'
3607		      : p + 1 < pend && p[0] == '\\' && p[1] == '{')))
3608	    {
3609	      /* Start building a new exactn.  */
3610
3611	      laststart = b;
3612
3613	      BUF_PUSH_2 (exactn, 0);
3614	      pending_exact = b - 1;
3615	    }
3616
3617	  GET_BUFFER_SPACE (MAX_MULTIBYTE_LENGTH);
3618	  {
3619	    int len;
3620
3621	    c = TRANSLATE (c);
3622	    if (multibyte)
3623	      len = CHAR_STRING (c, b);
3624	    else
3625	      *b = c, len = 1;
3626	    b += len;
3627	    (*pending_exact) += len;
3628	  }
3629
3630	  break;
3631	} /* switch (c) */
3632    } /* while p != pend */
3633
3634
3635  /* Through the pattern now.  */
3636
3637  FIXUP_ALT_JUMP ();
3638
3639  if (!COMPILE_STACK_EMPTY)
3640    FREE_STACK_RETURN (REG_EPAREN);
3641
3642  /* If we don't want backtracking, force success
3643     the first time we reach the end of the compiled pattern.  */
3644  if (syntax & RE_NO_POSIX_BACKTRACKING)
3645    BUF_PUSH (succeed);
3646
3647  /* We have succeeded; set the length of the buffer.  */
3648  bufp->used = b - bufp->buffer;
3649
3650#ifdef DEBUG
3651  if (debug > 0)
3652    {
3653      re_compile_fastmap (bufp);
3654      DEBUG_PRINT1 ("\nCompiled pattern: \n");
3655      print_compiled_pattern (bufp);
3656    }
3657  debug--;
3658#endif /* DEBUG */
3659
3660#ifndef MATCH_MAY_ALLOCATE
3661  /* Initialize the failure stack to the largest possible stack.  This
3662     isn't necessary unless we're trying to avoid calling alloca in
3663     the search and match routines.  */
3664  {
3665    int num_regs = bufp->re_nsub + 1;
3666
3667    if (fail_stack.size < re_max_failures * TYPICAL_FAILURE_SIZE)
3668      {
3669	fail_stack.size = re_max_failures * TYPICAL_FAILURE_SIZE;
3670
3671	if (! fail_stack.stack)
3672	  fail_stack.stack
3673	    = (fail_stack_elt_t *) malloc (fail_stack.size
3674					   * sizeof (fail_stack_elt_t));
3675	else
3676	  fail_stack.stack
3677	    = (fail_stack_elt_t *) realloc (fail_stack.stack,
3678					    (fail_stack.size
3679					     * sizeof (fail_stack_elt_t)));
3680      }
3681
3682    regex_grow_registers (num_regs);
3683  }
3684#endif /* not MATCH_MAY_ALLOCATE */
3685
3686  FREE_STACK_RETURN (REG_NOERROR);
3687} /* regex_compile */
3688
3689/* Subroutines for `regex_compile'.  */
3690
3691/* Store OP at LOC followed by two-byte integer parameter ARG.  */
3692
3693static void
3694store_op1 (op, loc, arg)
3695    re_opcode_t op;
3696    unsigned char *loc;
3697    int arg;
3698{
3699  *loc = (unsigned char) op;
3700  STORE_NUMBER (loc + 1, arg);
3701}
3702
3703
3704/* Like `store_op1', but for two two-byte parameters ARG1 and ARG2.  */
3705
3706static void
3707store_op2 (op, loc, arg1, arg2)
3708    re_opcode_t op;
3709    unsigned char *loc;
3710    int arg1, arg2;
3711{
3712  *loc = (unsigned char) op;
3713  STORE_NUMBER (loc + 1, arg1);
3714  STORE_NUMBER (loc + 3, arg2);
3715}
3716
3717
3718/* Copy the bytes from LOC to END to open up three bytes of space at LOC
3719   for OP followed by two-byte integer parameter ARG.  */
3720
3721static void
3722insert_op1 (op, loc, arg, end)
3723    re_opcode_t op;
3724    unsigned char *loc;
3725    int arg;
3726    unsigned char *end;
3727{
3728  register unsigned char *pfrom = end;
3729  register unsigned char *pto = end + 3;
3730
3731  while (pfrom != loc)
3732    *--pto = *--pfrom;
3733
3734  store_op1 (op, loc, arg);
3735}
3736
3737
3738/* Like `insert_op1', but for two two-byte parameters ARG1 and ARG2.  */
3739
3740static void
3741insert_op2 (op, loc, arg1, arg2, end)
3742    re_opcode_t op;
3743    unsigned char *loc;
3744    int arg1, arg2;
3745    unsigned char *end;
3746{
3747  register unsigned char *pfrom = end;
3748  register unsigned char *pto = end + 5;
3749
3750  while (pfrom != loc)
3751    *--pto = *--pfrom;
3752
3753  store_op2 (op, loc, arg1, arg2);
3754}
3755
3756
3757/* P points to just after a ^ in PATTERN.  Return true if that ^ comes
3758   after an alternative or a begin-subexpression.  We assume there is at
3759   least one character before the ^.  */
3760
3761static boolean
3762at_begline_loc_p (pattern, p, syntax)
3763    re_char *pattern, *p;
3764    reg_syntax_t syntax;
3765{
3766  re_char *prev = p - 2;
3767  boolean prev_prev_backslash = prev > pattern && prev[-1] == '\\';
3768
3769  return
3770       /* After a subexpression?  */
3771       (*prev == '(' && (syntax & RE_NO_BK_PARENS || prev_prev_backslash))
3772       /* After an alternative?	 */
3773    || (*prev == '|' && (syntax & RE_NO_BK_VBAR || prev_prev_backslash))
3774       /* After a shy subexpression?  */
3775    || ((syntax & RE_SHY_GROUPS) && prev - 2 >= pattern
3776	&& prev[-1] == '?' && prev[-2] == '('
3777	&& (syntax & RE_NO_BK_PARENS
3778	    || (prev - 3 >= pattern && prev[-3] == '\\')));
3779}
3780
3781
3782/* The dual of at_begline_loc_p.  This one is for $.  We assume there is
3783   at least one character after the $, i.e., `P < PEND'.  */
3784
3785static boolean
3786at_endline_loc_p (p, pend, syntax)
3787    re_char *p, *pend;
3788    reg_syntax_t syntax;
3789{
3790  re_char *next = p;
3791  boolean next_backslash = *next == '\\';
3792  re_char *next_next = p + 1 < pend ? p + 1 : 0;
3793
3794  return
3795       /* Before a subexpression?  */
3796       (syntax & RE_NO_BK_PARENS ? *next == ')'
3797	: next_backslash && next_next && *next_next == ')')
3798       /* Before an alternative?  */
3799    || (syntax & RE_NO_BK_VBAR ? *next == '|'
3800	: next_backslash && next_next && *next_next == '|');
3801}
3802
3803
3804/* Returns true if REGNUM is in one of COMPILE_STACK's elements and
3805   false if it's not.  */
3806
3807static boolean
3808group_in_compile_stack (compile_stack, regnum)
3809    compile_stack_type compile_stack;
3810    regnum_t regnum;
3811{
3812  int this_element;
3813
3814  for (this_element = compile_stack.avail - 1;
3815       this_element >= 0;
3816       this_element--)
3817    if (compile_stack.stack[this_element].regnum == regnum)
3818      return true;
3819
3820  return false;
3821}
3822
3823/* analyse_first.
3824   If fastmap is non-NULL, go through the pattern and fill fastmap
3825   with all the possible leading chars.  If fastmap is NULL, don't
3826   bother filling it up (obviously) and only return whether the
3827   pattern could potentially match the empty string.
3828
3829   Return 1  if p..pend might match the empty string.
3830   Return 0  if p..pend matches at least one char.
3831   Return -1 if fastmap was not updated accurately.  */
3832
3833static int
3834analyse_first (p, pend, fastmap, multibyte)
3835     re_char *p, *pend;
3836     char *fastmap;
3837     const int multibyte;
3838{
3839  int j, k;
3840  boolean not;
3841
3842  /* If all elements for base leading-codes in fastmap is set, this
3843     flag is set true.  */
3844  boolean match_any_multibyte_characters = false;
3845
3846  assert (p);
3847
3848  /* The loop below works as follows:
3849     - It has a working-list kept in the PATTERN_STACK and which basically
3850       starts by only containing a pointer to the first operation.
3851     - If the opcode we're looking at is a match against some set of
3852       chars, then we add those chars to the fastmap and go on to the
3853       next work element from the worklist (done via `break').
3854     - If the opcode is a control operator on the other hand, we either
3855       ignore it (if it's meaningless at this point, such as `start_memory')
3856       or execute it (if it's a jump).  If the jump has several destinations
3857       (i.e. `on_failure_jump'), then we push the other destination onto the
3858       worklist.
3859     We guarantee termination by ignoring backward jumps (more or less),
3860     so that `p' is monotonically increasing.  More to the point, we
3861     never set `p' (or push) anything `<= p1'.  */
3862
3863  while (p < pend)
3864    {
3865      /* `p1' is used as a marker of how far back a `on_failure_jump'
3866	 can go without being ignored.  It is normally equal to `p'
3867	 (which prevents any backward `on_failure_jump') except right
3868	 after a plain `jump', to allow patterns such as:
3869	    0: jump 10
3870	    3..9: <body>
3871	    10: on_failure_jump 3
3872	 as used for the *? operator.  */
3873      re_char *p1 = p;
3874
3875      switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++))
3876	{
3877	case succeed:
3878	  return 1;
3879	  continue;
3880
3881	case duplicate:
3882	  /* If the first character has to match a backreference, that means
3883	     that the group was empty (since it already matched).  Since this
3884	     is the only case that interests us here, we can assume that the
3885	     backreference must match the empty string.  */
3886	  p++;
3887	  continue;
3888
3889
3890      /* Following are the cases which match a character.  These end
3891	 with `break'.  */
3892
3893	case exactn:
3894	  if (fastmap)
3895	    {
3896	      int c = RE_STRING_CHAR (p + 1, pend - p);
3897	      /* When fast-scanning, the fastmap can be indexed either with
3898		 a char (smaller than 256) or with the first byte of
3899		 a char's byte sequence.  So we have to conservatively add
3900		 both to the table.  */
3901	      if (SINGLE_BYTE_CHAR_P (c))
3902		fastmap[c] = 1;
3903	      fastmap[p[1]] = 1;
3904	    }
3905	  break;
3906
3907
3908	case anychar:
3909	  /* We could put all the chars except for \n (and maybe \0)
3910	     but we don't bother since it is generally not worth it.  */
3911	  if (!fastmap) break;
3912	  return -1;
3913
3914
3915	case charset_not:
3916	  /* Chars beyond end of bitmap are possible matches.
3917	     All the single-byte codes can occur in multibyte buffers.
3918	     So any that are not listed in the charset
3919	     are possible matches, even in multibyte buffers.  */
3920	  if (!fastmap) break;
3921	  /* We don't need to mark LEADING_CODE_8_BIT_CONTROL specially
3922	     because it will automatically be set when needed by virtue of
3923	     being larger than the highest char of its charset (0xbf) but
3924	     smaller than (1<<BYTEWIDTH).  */
3925	  for (j = CHARSET_BITMAP_SIZE (&p[-1]) * BYTEWIDTH;
3926	       j < (1 << BYTEWIDTH); j++)
3927	    fastmap[j] = 1;
3928	  /* Fallthrough */
3929	case charset:
3930	  if (!fastmap) break;
3931	  not = (re_opcode_t) *(p - 1) == charset_not;
3932	  for (j = CHARSET_BITMAP_SIZE (&p[-1]) * BYTEWIDTH - 1, p++;
3933	       j >= 0; j--)
3934	    if (!!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))) ^ not)
3935	      {
3936		fastmap[j] = 1;
3937#ifdef emacs
3938		if (j >= 0x80 && j < 0xa0)
3939		  fastmap[LEADING_CODE_8_BIT_CONTROL] = 1;
3940#endif
3941	      }
3942
3943	  if ((not && multibyte)
3944	      /* Any character set can possibly contain a character
3945		 which doesn't match the specified set of characters.  */
3946	      || (CHARSET_RANGE_TABLE_EXISTS_P (&p[-2])
3947		  && CHARSET_RANGE_TABLE_BITS (&p[-2]) != 0))
3948	    /* If we can match a character class, we can match
3949	       any character set.  */
3950	    {
3951	    set_fastmap_for_multibyte_characters:
3952	      if (match_any_multibyte_characters == false)
3953		{
3954		  for (j = 0x80; j < 0xA0; j++)	/* XXX */
3955		    if (BASE_LEADING_CODE_P (j))
3956		      fastmap[j] = 1;
3957		  match_any_multibyte_characters = true;
3958		}
3959	    }
3960
3961	  else if (!not && CHARSET_RANGE_TABLE_EXISTS_P (&p[-2])
3962		   && match_any_multibyte_characters == false)
3963	    {
3964	      /* Set fastmap[I] 1 where I is a base leading code of each
3965		 multibyte character in the range table. */
3966	      int c, count;
3967
3968	      /* Make P points the range table.  `+ 2' is to skip flag
3969		 bits for a character class.  */
3970	      p += CHARSET_BITMAP_SIZE (&p[-2]) + 2;
3971
3972	      /* Extract the number of ranges in range table into COUNT.  */
3973	      EXTRACT_NUMBER_AND_INCR (count, p);
3974	      for (; count > 0; count--, p += 2 * 3) /* XXX */
3975		{
3976		  /* Extract the start of each range.  */
3977		  EXTRACT_CHARACTER (c, p);
3978		  j = CHAR_CHARSET (c);
3979		  fastmap[CHARSET_LEADING_CODE_BASE (j)] = 1;
3980		}
3981	    }
3982	  break;
3983
3984	case syntaxspec:
3985	case notsyntaxspec:
3986	  if (!fastmap) break;
3987#ifndef emacs
3988	  not = (re_opcode_t)p[-1] == notsyntaxspec;
3989	  k = *p++;
3990	  for (j = 0; j < (1 << BYTEWIDTH); j++)
3991	    if ((SYNTAX (j) == (enum syntaxcode) k) ^ not)
3992	      fastmap[j] = 1;
3993	  break;
3994#else  /* emacs */
3995	  /* This match depends on text properties.  These end with
3996	     aborting optimizations.  */
3997	  return -1;
3998
3999	case categoryspec:
4000	case notcategoryspec:
4001	  if (!fastmap) break;
4002	  not = (re_opcode_t)p[-1] == notcategoryspec;
4003	  k = *p++;
4004	  for (j = 0; j < (1 << BYTEWIDTH); j++)
4005	    if ((CHAR_HAS_CATEGORY (j, k)) ^ not)
4006	      fastmap[j] = 1;
4007
4008	  if (multibyte)
4009	    /* Any character set can possibly contain a character
4010	       whose category is K (or not).  */
4011	    goto set_fastmap_for_multibyte_characters;
4012	  break;
4013
4014      /* All cases after this match the empty string.  These end with
4015	 `continue'.  */
4016
4017	case before_dot:
4018	case at_dot:
4019	case after_dot:
4020#endif /* !emacs */
4021	case no_op:
4022	case begline:
4023	case endline:
4024	case begbuf:
4025	case endbuf:
4026	case wordbound:
4027	case notwordbound:
4028	case wordbeg:
4029	case wordend:
4030	case symbeg:
4031	case symend:
4032	  continue;
4033
4034
4035	case jump:
4036	  EXTRACT_NUMBER_AND_INCR (j, p);
4037	  if (j < 0)
4038	    /* Backward jumps can only go back to code that we've already
4039	       visited.  `re_compile' should make sure this is true.  */
4040	    break;
4041	  p += j;
4042	  switch (SWITCH_ENUM_CAST ((re_opcode_t) *p))
4043	    {
4044	    case on_failure_jump:
4045	    case on_failure_keep_string_jump:
4046	    case on_failure_jump_loop:
4047	    case on_failure_jump_nastyloop:
4048	    case on_failure_jump_smart:
4049	      p++;
4050	      break;
4051	    default:
4052	      continue;
4053	    };
4054	  /* Keep `p1' to allow the `on_failure_jump' we are jumping to
4055	     to jump back to "just after here".  */
4056	  /* Fallthrough */
4057
4058	case on_failure_jump:
4059	case on_failure_keep_string_jump:
4060	case on_failure_jump_nastyloop:
4061	case on_failure_jump_loop:
4062	case on_failure_jump_smart:
4063	  EXTRACT_NUMBER_AND_INCR (j, p);
4064	  if (p + j <= p1)
4065	    ; /* Backward jump to be ignored.  */
4066	  else
4067	    { /* We have to look down both arms.
4068		 We first go down the "straight" path so as to minimize
4069		 stack usage when going through alternatives.  */
4070	      int r = analyse_first (p, pend, fastmap, multibyte);
4071	      if (r) return r;
4072	      p += j;
4073	    }
4074	  continue;
4075
4076
4077	case jump_n:
4078	  /* This code simply does not properly handle forward jump_n.  */
4079	  DEBUG_STATEMENT (EXTRACT_NUMBER (j, p); assert (j < 0));
4080	  p += 4;
4081	  /* jump_n can either jump or fall through.  The (backward) jump
4082	     case has already been handled, so we only need to look at the
4083	     fallthrough case.  */
4084	  continue;
4085
4086	case succeed_n:
4087	  /* If N == 0, it should be an on_failure_jump_loop instead.  */
4088	  DEBUG_STATEMENT (EXTRACT_NUMBER (j, p + 2); assert (j > 0));
4089	  p += 4;
4090	  /* We only care about one iteration of the loop, so we don't
4091	     need to consider the case where this behaves like an
4092	     on_failure_jump.  */
4093	  continue;
4094
4095
4096	case set_number_at:
4097	  p += 4;
4098	  continue;
4099
4100
4101	case start_memory:
4102	case stop_memory:
4103	  p += 1;
4104	  continue;
4105
4106
4107	default:
4108	  abort (); /* We have listed all the cases.  */
4109	} /* switch *p++ */
4110
4111      /* Getting here means we have found the possible starting
4112	 characters for one path of the pattern -- and that the empty
4113	 string does not match.  We need not follow this path further.  */
4114      return 0;
4115    } /* while p */
4116
4117  /* We reached the end without matching anything.  */
4118  return 1;
4119
4120} /* analyse_first */
4121
4122/* re_compile_fastmap computes a ``fastmap'' for the compiled pattern in
4123   BUFP.  A fastmap records which of the (1 << BYTEWIDTH) possible
4124   characters can start a string that matches the pattern.  This fastmap
4125   is used by re_search to skip quickly over impossible starting points.
4126
4127   Character codes above (1 << BYTEWIDTH) are not represented in the
4128   fastmap, but the leading codes are represented.  Thus, the fastmap
4129   indicates which character sets could start a match.
4130
4131   The caller must supply the address of a (1 << BYTEWIDTH)-byte data
4132   area as BUFP->fastmap.
4133
4134   We set the `fastmap', `fastmap_accurate', and `can_be_null' fields in
4135   the pattern buffer.
4136
4137   Returns 0 if we succeed, -2 if an internal error.   */
4138
4139int
4140re_compile_fastmap (bufp)
4141     struct re_pattern_buffer *bufp;
4142{
4143  char *fastmap = bufp->fastmap;
4144  int analysis;
4145
4146  assert (fastmap && bufp->buffer);
4147
4148  bzero (fastmap, 1 << BYTEWIDTH);  /* Assume nothing's valid.  */
4149  bufp->fastmap_accurate = 1;	    /* It will be when we're done.  */
4150
4151  analysis = analyse_first (bufp->buffer, bufp->buffer + bufp->used,
4152			    fastmap, RE_MULTIBYTE_P (bufp));
4153  bufp->can_be_null = (analysis != 0);
4154  return 0;
4155} /* re_compile_fastmap */
4156
4157/* Set REGS to hold NUM_REGS registers, storing them in STARTS and
4158   ENDS.  Subsequent matches using PATTERN_BUFFER and REGS will use
4159   this memory for recording register information.  STARTS and ENDS
4160   must be allocated using the malloc library routine, and must each
4161   be at least NUM_REGS * sizeof (regoff_t) bytes long.
4162
4163   If NUM_REGS == 0, then subsequent matches should allocate their own
4164   register data.
4165
4166   Unless this function is called, the first search or match using
4167   PATTERN_BUFFER will allocate its own register data, without
4168   freeing the old data.  */
4169
4170void
4171re_set_registers (bufp, regs, num_regs, starts, ends)
4172    struct re_pattern_buffer *bufp;
4173    struct re_registers *regs;
4174    unsigned num_regs;
4175    regoff_t *starts, *ends;
4176{
4177  if (num_regs)
4178    {
4179      bufp->regs_allocated = REGS_REALLOCATE;
4180      regs->num_regs = num_regs;
4181      regs->start = starts;
4182      regs->end = ends;
4183    }
4184  else
4185    {
4186      bufp->regs_allocated = REGS_UNALLOCATED;
4187      regs->num_regs = 0;
4188      regs->start = regs->end = (regoff_t *) 0;
4189    }
4190}
4191WEAK_ALIAS (__re_set_registers, re_set_registers)
4192
4193/* Searching routines.  */
4194
4195/* Like re_search_2, below, but only one string is specified, and
4196   doesn't let you say where to stop matching. */
4197
4198int
4199re_search (bufp, string, size, startpos, range, regs)
4200     struct re_pattern_buffer *bufp;
4201     const char *string;
4202     int size, startpos, range;
4203     struct re_registers *regs;
4204{
4205  return re_search_2 (bufp, NULL, 0, string, size, startpos, range,
4206		      regs, size);
4207}
4208WEAK_ALIAS (__re_search, re_search)
4209
4210/* Head address of virtual concatenation of string.  */
4211#define HEAD_ADDR_VSTRING(P)		\
4212  (((P) >= size1 ? string2 : string1))
4213
4214/* End address of virtual concatenation of string.  */
4215#define STOP_ADDR_VSTRING(P)				\
4216  (((P) >= size1 ? string2 + size2 : string1 + size1))
4217
4218/* Address of POS in the concatenation of virtual string. */
4219#define POS_ADDR_VSTRING(POS)					\
4220  (((POS) >= size1 ? string2 - size1 : string1) + (POS))
4221
4222/* Using the compiled pattern in BUFP->buffer, first tries to match the
4223   virtual concatenation of STRING1 and STRING2, starting first at index
4224   STARTPOS, then at STARTPOS + 1, and so on.
4225
4226   STRING1 and STRING2 have length SIZE1 and SIZE2, respectively.
4227
4228   RANGE is how far to scan while trying to match.  RANGE = 0 means try
4229   only at STARTPOS; in general, the last start tried is STARTPOS +
4230   RANGE.
4231
4232   In REGS, return the indices of the virtual concatenation of STRING1
4233   and STRING2 that matched the entire BUFP->buffer and its contained
4234   subexpressions.
4235
4236   Do not consider matching one past the index STOP in the virtual
4237   concatenation of STRING1 and STRING2.
4238
4239   We return either the position in the strings at which the match was
4240   found, -1 if no match, or -2 if error (such as failure
4241   stack overflow).  */
4242
4243int
4244re_search_2 (bufp, str1, size1, str2, size2, startpos, range, regs, stop)
4245     struct re_pattern_buffer *bufp;
4246     const char *str1, *str2;
4247     int size1, size2;
4248     int startpos;
4249     int range;
4250     struct re_registers *regs;
4251     int stop;
4252{
4253  int val;
4254  re_char *string1 = (re_char*) str1;
4255  re_char *string2 = (re_char*) str2;
4256  register char *fastmap = bufp->fastmap;
4257  register RE_TRANSLATE_TYPE translate = bufp->translate;
4258  int total_size = size1 + size2;
4259  int endpos = startpos + range;
4260  boolean anchored_start;
4261
4262  /* Nonzero if we have to concern multibyte character.  */
4263  const boolean multibyte = RE_MULTIBYTE_P (bufp);
4264
4265  /* Check for out-of-range STARTPOS.  */
4266  if (startpos < 0 || startpos > total_size)
4267    return -1;
4268
4269  /* Fix up RANGE if it might eventually take us outside
4270     the virtual concatenation of STRING1 and STRING2.
4271     Make sure we won't move STARTPOS below 0 or above TOTAL_SIZE.  */
4272  if (endpos < 0)
4273    range = 0 - startpos;
4274  else if (endpos > total_size)
4275    range = total_size - startpos;
4276
4277  /* If the search isn't to be a backwards one, don't waste time in a
4278     search for a pattern anchored at beginning of buffer.  */
4279  if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == begbuf && range > 0)
4280    {
4281      if (startpos > 0)
4282	return -1;
4283      else
4284	range = 0;
4285    }
4286
4287#ifdef emacs
4288  /* In a forward search for something that starts with \=.
4289     don't keep searching past point.  */
4290  if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == at_dot && range > 0)
4291    {
4292      range = PT_BYTE - BEGV_BYTE - startpos;
4293      if (range < 0)
4294	return -1;
4295    }
4296#endif /* emacs */
4297
4298  /* Update the fastmap now if not correct already.  */
4299  if (fastmap && !bufp->fastmap_accurate)
4300    re_compile_fastmap (bufp);
4301
4302  /* See whether the pattern is anchored.  */
4303  anchored_start = (bufp->buffer[0] == begline);
4304
4305#ifdef emacs
4306  gl_state.object = re_match_object;
4307  {
4308    int charpos = SYNTAX_TABLE_BYTE_TO_CHAR (POS_AS_IN_BUFFER (startpos));
4309
4310    SETUP_SYNTAX_TABLE_FOR_OBJECT (re_match_object, charpos, 1);
4311  }
4312#endif
4313
4314  /* Loop through the string, looking for a place to start matching.  */
4315  for (;;)
4316    {
4317      /* If the pattern is anchored,
4318	 skip quickly past places we cannot match.
4319	 We don't bother to treat startpos == 0 specially
4320	 because that case doesn't repeat.  */
4321      if (anchored_start && startpos > 0)
4322	{
4323	  if (! ((startpos <= size1 ? string1[startpos - 1]
4324		  : string2[startpos - size1 - 1])
4325		 == '\n'))
4326	    goto advance;
4327	}
4328
4329      /* If a fastmap is supplied, skip quickly over characters that
4330	 cannot be the start of a match.  If the pattern can match the
4331	 null string, however, we don't need to skip characters; we want
4332	 the first null string.  */
4333      if (fastmap && startpos < total_size && !bufp->can_be_null)
4334	{
4335	  register re_char *d;
4336	  register re_wchar_t buf_ch;
4337
4338	  d = POS_ADDR_VSTRING (startpos);
4339
4340	  if (range > 0)	/* Searching forwards.  */
4341	    {
4342	      register int lim = 0;
4343	      int irange = range;
4344
4345	      if (startpos < size1 && startpos + range >= size1)
4346		lim = range - (size1 - startpos);
4347
4348	      /* Written out as an if-else to avoid testing `translate'
4349		 inside the loop.  */
4350	      if (RE_TRANSLATE_P (translate))
4351		{
4352		  if (multibyte)
4353		    while (range > lim)
4354		      {
4355			int buf_charlen;
4356
4357			buf_ch = STRING_CHAR_AND_LENGTH (d, range - lim,
4358							 buf_charlen);
4359
4360			buf_ch = RE_TRANSLATE (translate, buf_ch);
4361			if (buf_ch >= 0400
4362			    || fastmap[buf_ch])
4363			  break;
4364
4365			range -= buf_charlen;
4366			d += buf_charlen;
4367		      }
4368		  else
4369		    {
4370		      /* Convert *d to integer to shut up GCC's
4371			 whining about comparison that is always
4372			 true.  */
4373		      int di = *d;
4374
4375		      while (range > lim
4376			     && !fastmap[RE_TRANSLATE (translate, di)])
4377			{
4378			  di = *(++d);
4379			  range--;
4380			}
4381		    }
4382		}
4383	      else
4384		do
4385		  {
4386		    re_char *d_start = d;
4387		    while (range > lim && !fastmap[*d])
4388		      {
4389			d++;
4390			range--;
4391		      }
4392#ifdef emacs
4393		    if (multibyte && range > lim)
4394		      {
4395			/* Check that we are at the beginning of a char.  */
4396			int at_boundary;
4397			AT_CHAR_BOUNDARY_P (at_boundary, d, d_start);
4398			if (at_boundary)
4399			  break;
4400			else
4401			  { /* We have matched an internal byte of a char
4402			       rather than the leading byte, so it's a false
4403			       positive: we should keep scanning.  */
4404			    d++; range--;
4405			  }
4406		      }
4407		    else
4408#endif
4409		      break;
4410		  } while (1);
4411
4412	      startpos += irange - range;
4413	    }
4414	  else				/* Searching backwards.  */
4415	    {
4416	      int room = (startpos >= size1
4417			  ? size2 + size1 - startpos
4418			  : size1 - startpos);
4419	      buf_ch = RE_STRING_CHAR (d, room);
4420	      buf_ch = TRANSLATE (buf_ch);
4421
4422	      if (! (buf_ch >= 0400
4423		     || fastmap[buf_ch]))
4424		goto advance;
4425	    }
4426	}
4427
4428      /* If can't match the null string, and that's all we have left, fail.  */
4429      if (range >= 0 && startpos == total_size && fastmap
4430	  && !bufp->can_be_null)
4431	return -1;
4432
4433      val = re_match_2_internal (bufp, string1, size1, string2, size2,
4434				 startpos, regs, stop);
4435#ifndef REGEX_MALLOC
4436# ifdef C_ALLOCA
4437      alloca (0);
4438# endif
4439#endif
4440
4441      if (val >= 0)
4442	return startpos;
4443
4444      if (val == -2)
4445	return -2;
4446
4447    advance:
4448      if (!range)
4449	break;
4450      else if (range > 0)
4451	{
4452	  /* Update STARTPOS to the next character boundary.  */
4453	  if (multibyte)
4454	    {
4455	      re_char *p = POS_ADDR_VSTRING (startpos);
4456	      re_char *pend = STOP_ADDR_VSTRING (startpos);
4457	      int len = MULTIBYTE_FORM_LENGTH (p, pend - p);
4458
4459	      range -= len;
4460	      if (range < 0)
4461		break;
4462	      startpos += len;
4463	    }
4464	  else
4465	    {
4466	      range--;
4467	      startpos++;
4468	    }
4469	}
4470      else
4471	{
4472	  range++;
4473	  startpos--;
4474
4475	  /* Update STARTPOS to the previous character boundary.  */
4476	  if (multibyte)
4477	    {
4478	      re_char *p = POS_ADDR_VSTRING (startpos) + 1;
4479	      re_char *p0 = p;
4480	      re_char *phead = HEAD_ADDR_VSTRING (startpos);
4481
4482	      /* Find the head of multibyte form.  */
4483	      PREV_CHAR_BOUNDARY (p, phead);
4484	      range += p0 - 1 - p;
4485	      if (range > 0)
4486		break;
4487
4488	      startpos -= p0 - 1 - p;
4489	    }
4490	}
4491    }
4492  return -1;
4493} /* re_search_2 */
4494WEAK_ALIAS (__re_search_2, re_search_2)
4495
4496/* Declarations and macros for re_match_2.  */
4497
4498static int bcmp_translate _RE_ARGS((re_char *s1, re_char *s2,
4499				    register int len,
4500				    RE_TRANSLATE_TYPE translate,
4501				    const int multibyte));
4502
4503/* This converts PTR, a pointer into one of the search strings `string1'
4504   and `string2' into an offset from the beginning of that string.  */
4505#define POINTER_TO_OFFSET(ptr)			\
4506  (FIRST_STRING_P (ptr)				\
4507   ? ((regoff_t) ((ptr) - string1))		\
4508   : ((regoff_t) ((ptr) - string2 + size1)))
4509
4510/* Call before fetching a character with *d.  This switches over to
4511   string2 if necessary.
4512   Check re_match_2_internal for a discussion of why end_match_2 might
4513   not be within string2 (but be equal to end_match_1 instead).  */
4514#define PREFETCH()							\
4515  while (d == dend)							\
4516    {									\
4517      /* End of string2 => fail.  */					\
4518      if (dend == end_match_2)						\
4519	goto fail;							\
4520      /* End of string1 => advance to string2.  */			\
4521      d = string2;							\
4522      dend = end_match_2;						\
4523    }
4524
4525/* Call before fetching a char with *d if you already checked other limits.
4526   This is meant for use in lookahead operations like wordend, etc..
4527   where we might need to look at parts of the string that might be
4528   outside of the LIMITs (i.e past `stop').  */
4529#define PREFETCH_NOLIMIT()						\
4530  if (d == end1)							\
4531     {									\
4532       d = string2;							\
4533       dend = end_match_2;						\
4534     }									\
4535
4536/* Test if at very beginning or at very end of the virtual concatenation
4537   of `string1' and `string2'.  If only one string, it's `string2'.  */
4538#define AT_STRINGS_BEG(d) ((d) == (size1 ? string1 : string2) || !size2)
4539#define AT_STRINGS_END(d) ((d) == end2)
4540
4541
4542/* Test if D points to a character which is word-constituent.  We have
4543   two special cases to check for: if past the end of string1, look at
4544   the first character in string2; and if before the beginning of
4545   string2, look at the last character in string1.  */
4546#define WORDCHAR_P(d)							\
4547  (SYNTAX ((d) == end1 ? *string2					\
4548	   : (d) == string2 - 1 ? *(end1 - 1) : *(d))			\
4549   == Sword)
4550
4551/* Disabled due to a compiler bug -- see comment at case wordbound */
4552
4553/* The comment at case wordbound is following one, but we don't use
4554   AT_WORD_BOUNDARY anymore to support multibyte form.
4555
4556   The DEC Alpha C compiler 3.x generates incorrect code for the
4557   test	 WORDCHAR_P (d - 1) != WORDCHAR_P (d)  in the expansion of
4558   AT_WORD_BOUNDARY, so this code is disabled.  Expanding the
4559   macro and introducing temporary variables works around the bug.  */
4560
4561#if 0
4562/* Test if the character before D and the one at D differ with respect
4563   to being word-constituent.  */
4564#define AT_WORD_BOUNDARY(d)						\
4565  (AT_STRINGS_BEG (d) || AT_STRINGS_END (d)				\
4566   || WORDCHAR_P (d - 1) != WORDCHAR_P (d))
4567#endif
4568
4569/* Free everything we malloc.  */
4570#ifdef MATCH_MAY_ALLOCATE
4571# define FREE_VAR(var) if (var) { REGEX_FREE (var); var = NULL; } else
4572# define FREE_VARIABLES()						\
4573  do {									\
4574    REGEX_FREE_STACK (fail_stack.stack);				\
4575    FREE_VAR (regstart);						\
4576    FREE_VAR (regend);							\
4577    FREE_VAR (best_regstart);						\
4578    FREE_VAR (best_regend);						\
4579  } while (0)
4580#else
4581# define FREE_VARIABLES() ((void)0) /* Do nothing!  But inhibit gcc warning.  */
4582#endif /* not MATCH_MAY_ALLOCATE */
4583
4584
4585/* Optimization routines.  */
4586
4587/* If the operation is a match against one or more chars,
4588   return a pointer to the next operation, else return NULL.  */
4589static re_char *
4590skip_one_char (p)
4591     re_char *p;
4592{
4593  switch (SWITCH_ENUM_CAST (*p++))
4594    {
4595    case anychar:
4596      break;
4597
4598    case exactn:
4599      p += *p + 1;
4600      break;
4601
4602    case charset_not:
4603    case charset:
4604      if (CHARSET_RANGE_TABLE_EXISTS_P (p - 1))
4605	{
4606	  int mcnt;
4607	  p = CHARSET_RANGE_TABLE (p - 1);
4608	  EXTRACT_NUMBER_AND_INCR (mcnt, p);
4609	  p = CHARSET_RANGE_TABLE_END (p, mcnt);
4610	}
4611      else
4612	p += 1 + CHARSET_BITMAP_SIZE (p - 1);
4613      break;
4614
4615    case syntaxspec:
4616    case notsyntaxspec:
4617#ifdef emacs
4618    case categoryspec:
4619    case notcategoryspec:
4620#endif /* emacs */
4621      p++;
4622      break;
4623
4624    default:
4625      p = NULL;
4626    }
4627  return p;
4628}
4629
4630
4631/* Jump over non-matching operations.  */
4632static re_char *
4633skip_noops (p, pend)
4634     re_char *p, *pend;
4635{
4636  int mcnt;
4637  while (p < pend)
4638    {
4639      switch (SWITCH_ENUM_CAST ((re_opcode_t) *p))
4640	{
4641	case start_memory:
4642	case stop_memory:
4643	  p += 2; break;
4644	case no_op:
4645	  p += 1; break;
4646	case jump:
4647	  p += 1;
4648	  EXTRACT_NUMBER_AND_INCR (mcnt, p);
4649	  p += mcnt;
4650	  break;
4651	default:
4652	  return p;
4653	}
4654    }
4655  assert (p == pend);
4656  return p;
4657}
4658
4659/* Non-zero if "p1 matches something" implies "p2 fails".  */
4660static int
4661mutually_exclusive_p (bufp, p1, p2)
4662     struct re_pattern_buffer *bufp;
4663     re_char *p1, *p2;
4664{
4665  re_opcode_t op2;
4666  const boolean multibyte = RE_MULTIBYTE_P (bufp);
4667  unsigned char *pend = bufp->buffer + bufp->used;
4668
4669  assert (p1 >= bufp->buffer && p1 < pend
4670	  && p2 >= bufp->buffer && p2 <= pend);
4671
4672  /* Skip over open/close-group commands.
4673     If what follows this loop is a ...+ construct,
4674     look at what begins its body, since we will have to
4675     match at least one of that.  */
4676  p2 = skip_noops (p2, pend);
4677  /* The same skip can be done for p1, except that this function
4678     is only used in the case where p1 is a simple match operator.  */
4679  /* p1 = skip_noops (p1, pend); */
4680
4681  assert (p1 >= bufp->buffer && p1 < pend
4682	  && p2 >= bufp->buffer && p2 <= pend);
4683
4684  op2 = p2 == pend ? succeed : *p2;
4685
4686  switch (SWITCH_ENUM_CAST (op2))
4687    {
4688    case succeed:
4689    case endbuf:
4690      /* If we're at the end of the pattern, we can change.  */
4691      if (skip_one_char (p1))
4692	{
4693	  DEBUG_PRINT1 ("  End of pattern: fast loop.\n");
4694	  return 1;
4695	}
4696      break;
4697
4698    case endline:
4699    case exactn:
4700      {
4701	register re_wchar_t c
4702	  = (re_opcode_t) *p2 == endline ? '\n'
4703	  : RE_STRING_CHAR (p2 + 2, pend - p2 - 2);
4704
4705	if ((re_opcode_t) *p1 == exactn)
4706	  {
4707	    if (c != RE_STRING_CHAR (p1 + 2, pend - p1 - 2))
4708	      {
4709		DEBUG_PRINT3 ("  '%c' != '%c' => fast loop.\n", c, p1[2]);
4710		return 1;
4711	      }
4712	  }
4713
4714	else if ((re_opcode_t) *p1 == charset
4715		 || (re_opcode_t) *p1 == charset_not)
4716	  {
4717	    int not = (re_opcode_t) *p1 == charset_not;
4718
4719	    /* Test if C is listed in charset (or charset_not)
4720	       at `p1'.  */
4721	    if (SINGLE_BYTE_CHAR_P (c))
4722	      {
4723		if (c < CHARSET_BITMAP_SIZE (p1) * BYTEWIDTH
4724		    && p1[2 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
4725		  not = !not;
4726	      }
4727	    else if (CHARSET_RANGE_TABLE_EXISTS_P (p1))
4728	      CHARSET_LOOKUP_RANGE_TABLE (not, c, p1);
4729
4730	    /* `not' is equal to 1 if c would match, which means
4731	       that we can't change to pop_failure_jump.  */
4732	    if (!not)
4733	      {
4734		DEBUG_PRINT1 ("	 No match => fast loop.\n");
4735		return 1;
4736	      }
4737	  }
4738	else if ((re_opcode_t) *p1 == anychar
4739		 && c == '\n')
4740	  {
4741	    DEBUG_PRINT1 ("   . != \\n => fast loop.\n");
4742	    return 1;
4743	  }
4744      }
4745      break;
4746
4747    case charset:
4748      {
4749	if ((re_opcode_t) *p1 == exactn)
4750	  /* Reuse the code above.  */
4751	  return mutually_exclusive_p (bufp, p2, p1);
4752
4753      /* It is hard to list up all the character in charset
4754	 P2 if it includes multibyte character.  Give up in
4755	 such case.  */
4756      else if (!multibyte || !CHARSET_RANGE_TABLE_EXISTS_P (p2))
4757	{
4758	  /* Now, we are sure that P2 has no range table.
4759	     So, for the size of bitmap in P2, `p2[1]' is
4760	     enough.  But P1 may have range table, so the
4761	     size of bitmap table of P1 is extracted by
4762	     using macro `CHARSET_BITMAP_SIZE'.
4763
4764	     Since we know that all the character listed in
4765	     P2 is ASCII, it is enough to test only bitmap
4766	     table of P1.  */
4767
4768	  if ((re_opcode_t) *p1 == charset)
4769	    {
4770	      int idx;
4771	      /* We win if the charset inside the loop
4772		 has no overlap with the one after the loop.  */
4773	      for (idx = 0;
4774		   (idx < (int) p2[1]
4775		    && idx < CHARSET_BITMAP_SIZE (p1));
4776		   idx++)
4777		if ((p2[2 + idx] & p1[2 + idx]) != 0)
4778		  break;
4779
4780	      if (idx == p2[1]
4781		  || idx == CHARSET_BITMAP_SIZE (p1))
4782		{
4783		  DEBUG_PRINT1 ("	 No match => fast loop.\n");
4784		  return 1;
4785		}
4786	    }
4787	  else if ((re_opcode_t) *p1 == charset_not)
4788	    {
4789	      int idx;
4790	      /* We win if the charset_not inside the loop lists
4791		 every character listed in the charset after.  */
4792	      for (idx = 0; idx < (int) p2[1]; idx++)
4793		if (! (p2[2 + idx] == 0
4794		       || (idx < CHARSET_BITMAP_SIZE (p1)
4795			   && ((p2[2 + idx] & ~ p1[2 + idx]) == 0))))
4796		  break;
4797
4798		if (idx == p2[1])
4799		  {
4800		    DEBUG_PRINT1 ("	 No match => fast loop.\n");
4801		    return 1;
4802		  }
4803	      }
4804	  }
4805      }
4806      break;
4807
4808    case charset_not:
4809      switch (SWITCH_ENUM_CAST (*p1))
4810	{
4811	case exactn:
4812	case charset:
4813	  /* Reuse the code above.  */
4814	  return mutually_exclusive_p (bufp, p2, p1);
4815	case charset_not:
4816	  /* When we have two charset_not, it's very unlikely that
4817	     they don't overlap.  The union of the two sets of excluded
4818	     chars should cover all possible chars, which, as a matter of
4819	     fact, is virtually impossible in multibyte buffers.  */
4820	  break;
4821	}
4822      break;
4823
4824    case wordend:
4825      return ((re_opcode_t) *p1 == syntaxspec && p1[1] == Sword);
4826    case symend:
4827      return ((re_opcode_t) *p1 == syntaxspec
4828              && (p1[1] == Ssymbol || p1[1] == Sword));
4829    case notsyntaxspec:
4830      return ((re_opcode_t) *p1 == syntaxspec && p1[1] == p2[1]);
4831
4832    case wordbeg:
4833      return ((re_opcode_t) *p1 == notsyntaxspec && p1[1] == Sword);
4834    case symbeg:
4835      return ((re_opcode_t) *p1 == notsyntaxspec
4836              && (p1[1] == Ssymbol || p1[1] == Sword));
4837    case syntaxspec:
4838      return ((re_opcode_t) *p1 == notsyntaxspec && p1[1] == p2[1]);
4839
4840    case wordbound:
4841      return (((re_opcode_t) *p1 == notsyntaxspec
4842	       || (re_opcode_t) *p1 == syntaxspec)
4843	      && p1[1] == Sword);
4844
4845#ifdef emacs
4846    case categoryspec:
4847      return ((re_opcode_t) *p1 == notcategoryspec && p1[1] == p2[1]);
4848    case notcategoryspec:
4849      return ((re_opcode_t) *p1 == categoryspec && p1[1] == p2[1]);
4850#endif /* emacs */
4851
4852    default:
4853      ;
4854    }
4855
4856  /* Safe default.  */
4857  return 0;
4858}
4859
4860
4861/* Matching routines.  */
4862
4863#ifndef emacs	/* Emacs never uses this.  */
4864/* re_match is like re_match_2 except it takes only a single string.  */
4865
4866int
4867re_match (bufp, string, size, pos, regs)
4868     struct re_pattern_buffer *bufp;
4869     const char *string;
4870     int size, pos;
4871     struct re_registers *regs;
4872{
4873  int result = re_match_2_internal (bufp, NULL, 0, (re_char*) string, size,
4874				    pos, regs, size);
4875# if defined C_ALLOCA && !defined REGEX_MALLOC
4876  alloca (0);
4877# endif
4878  return result;
4879}
4880WEAK_ALIAS (__re_match, re_match)
4881#endif /* not emacs */
4882
4883#ifdef emacs
4884/* In Emacs, this is the string or buffer in which we
4885   are matching.  It is used for looking up syntax properties.  */
4886Lisp_Object re_match_object;
4887#endif
4888
4889/* re_match_2 matches the compiled pattern in BUFP against the
4890   the (virtual) concatenation of STRING1 and STRING2 (of length SIZE1
4891   and SIZE2, respectively).  We start matching at POS, and stop
4892   matching at STOP.
4893
4894   If REGS is non-null and the `no_sub' field of BUFP is nonzero, we
4895   store offsets for the substring each group matched in REGS.  See the
4896   documentation for exactly how many groups we fill.
4897
4898   We return -1 if no match, -2 if an internal error (such as the
4899   failure stack overflowing).  Otherwise, we return the length of the
4900   matched substring.  */
4901
4902int
4903re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
4904     struct re_pattern_buffer *bufp;
4905     const char *string1, *string2;
4906     int size1, size2;
4907     int pos;
4908     struct re_registers *regs;
4909     int stop;
4910{
4911  int result;
4912
4913#ifdef emacs
4914  int charpos;
4915  gl_state.object = re_match_object;
4916  charpos = SYNTAX_TABLE_BYTE_TO_CHAR (POS_AS_IN_BUFFER (pos));
4917  SETUP_SYNTAX_TABLE_FOR_OBJECT (re_match_object, charpos, 1);
4918#endif
4919
4920  result = re_match_2_internal (bufp, (re_char*) string1, size1,
4921				(re_char*) string2, size2,
4922				pos, regs, stop);
4923#if defined C_ALLOCA && !defined REGEX_MALLOC
4924  alloca (0);
4925#endif
4926  return result;
4927}
4928WEAK_ALIAS (__re_match_2, re_match_2)
4929
4930/* This is a separate function so that we can force an alloca cleanup
4931   afterwards.  */
4932static int
4933re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
4934     struct re_pattern_buffer *bufp;
4935     re_char *string1, *string2;
4936     int size1, size2;
4937     int pos;
4938     struct re_registers *regs;
4939     int stop;
4940{
4941  /* General temporaries.  */
4942  int mcnt;
4943  size_t reg;
4944  boolean not;
4945
4946  /* Just past the end of the corresponding string.  */
4947  re_char *end1, *end2;
4948
4949  /* Pointers into string1 and string2, just past the last characters in
4950     each to consider matching.  */
4951  re_char *end_match_1, *end_match_2;
4952
4953  /* Where we are in the data, and the end of the current string.  */
4954  re_char *d, *dend;
4955
4956  /* Used sometimes to remember where we were before starting matching
4957     an operator so that we can go back in case of failure.  This "atomic"
4958     behavior of matching opcodes is indispensable to the correctness
4959     of the on_failure_keep_string_jump optimization.  */
4960  re_char *dfail;
4961
4962  /* Where we are in the pattern, and the end of the pattern.  */
4963  re_char *p = bufp->buffer;
4964  re_char *pend = p + bufp->used;
4965
4966  /* We use this to map every character in the string.  */
4967  RE_TRANSLATE_TYPE translate = bufp->translate;
4968
4969  /* Nonzero if we have to concern multibyte character.  */
4970  const boolean multibyte = RE_MULTIBYTE_P (bufp);
4971
4972  /* Failure point stack.  Each place that can handle a failure further
4973     down the line pushes a failure point on this stack.  It consists of
4974     regstart, and regend for all registers corresponding to
4975     the subexpressions we're currently inside, plus the number of such
4976     registers, and, finally, two char *'s.  The first char * is where
4977     to resume scanning the pattern; the second one is where to resume
4978     scanning the strings.  */
4979#ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global.  */
4980  fail_stack_type fail_stack;
4981#endif
4982#ifdef DEBUG
4983  unsigned nfailure_points_pushed = 0, nfailure_points_popped = 0;
4984#endif
4985
4986#if defined REL_ALLOC && defined REGEX_MALLOC
4987  /* This holds the pointer to the failure stack, when
4988     it is allocated relocatably.  */
4989  fail_stack_elt_t *failure_stack_ptr;
4990#endif
4991
4992  /* We fill all the registers internally, independent of what we
4993     return, for use in backreferences.  The number here includes
4994     an element for register zero.  */
4995  size_t num_regs = bufp->re_nsub + 1;
4996
4997  /* Information on the contents of registers. These are pointers into
4998     the input strings; they record just what was matched (on this
4999     attempt) by a subexpression part of the pattern, that is, the
5000     regnum-th regstart pointer points to where in the pattern we began
5001     matching and the regnum-th regend points to right after where we
5002     stopped matching the regnum-th subexpression.  (The zeroth register
5003     keeps track of what the whole pattern matches.)  */
5004#ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global.  */
5005  re_char **regstart, **regend;
5006#endif
5007
5008  /* The following record the register info as found in the above
5009     variables when we find a match better than any we've seen before.
5010     This happens as we backtrack through the failure points, which in
5011     turn happens only if we have not yet matched the entire string. */
5012  unsigned best_regs_set = false;
5013#ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global.  */
5014  re_char **best_regstart, **best_regend;
5015#endif
5016
5017  /* Logically, this is `best_regend[0]'.  But we don't want to have to
5018     allocate space for that if we're not allocating space for anything
5019     else (see below).  Also, we never need info about register 0 for
5020     any of the other register vectors, and it seems rather a kludge to
5021     treat `best_regend' differently than the rest.  So we keep track of
5022     the end of the best match so far in a separate variable.  We
5023     initialize this to NULL so that when we backtrack the first time
5024     and need to test it, it's not garbage.  */
5025  re_char *match_end = NULL;
5026
5027#ifdef DEBUG
5028  /* Counts the total number of registers pushed.  */
5029  unsigned num_regs_pushed = 0;
5030#endif
5031
5032  DEBUG_PRINT1 ("\n\nEntering re_match_2.\n");
5033
5034  INIT_FAIL_STACK ();
5035
5036#ifdef MATCH_MAY_ALLOCATE
5037  /* Do not bother to initialize all the register variables if there are
5038     no groups in the pattern, as it takes a fair amount of time.  If
5039     there are groups, we include space for register 0 (the whole
5040     pattern), even though we never use it, since it simplifies the
5041     array indexing.  We should fix this.  */
5042  if (bufp->re_nsub)
5043    {
5044      regstart = REGEX_TALLOC (num_regs, re_char *);
5045      regend = REGEX_TALLOC (num_regs, re_char *);
5046      best_regstart = REGEX_TALLOC (num_regs, re_char *);
5047      best_regend = REGEX_TALLOC (num_regs, re_char *);
5048
5049      if (!(regstart && regend && best_regstart && best_regend))
5050	{
5051	  FREE_VARIABLES ();
5052	  return -2;
5053	}
5054    }
5055  else
5056    {
5057      /* We must initialize all our variables to NULL, so that
5058	 `FREE_VARIABLES' doesn't try to free them.  */
5059      regstart = regend = best_regstart = best_regend = NULL;
5060    }
5061#endif /* MATCH_MAY_ALLOCATE */
5062
5063  /* The starting position is bogus.  */
5064  if (pos < 0 || pos > size1 + size2)
5065    {
5066      FREE_VARIABLES ();
5067      return -1;
5068    }
5069
5070  /* Initialize subexpression text positions to -1 to mark ones that no
5071     start_memory/stop_memory has been seen for. Also initialize the
5072     register information struct.  */
5073  for (reg = 1; reg < num_regs; reg++)
5074    regstart[reg] = regend[reg] = NULL;
5075
5076  /* We move `string1' into `string2' if the latter's empty -- but not if
5077     `string1' is null.  */
5078  if (size2 == 0 && string1 != NULL)
5079    {
5080      string2 = string1;
5081      size2 = size1;
5082      string1 = 0;
5083      size1 = 0;
5084    }
5085  end1 = string1 + size1;
5086  end2 = string2 + size2;
5087
5088  /* `p' scans through the pattern as `d' scans through the data.
5089     `dend' is the end of the input string that `d' points within.  `d'
5090     is advanced into the following input string whenever necessary, but
5091     this happens before fetching; therefore, at the beginning of the
5092     loop, `d' can be pointing at the end of a string, but it cannot
5093     equal `string2'.  */
5094  if (pos >= size1)
5095    {
5096      /* Only match within string2.  */
5097      d = string2 + pos - size1;
5098      dend = end_match_2 = string2 + stop - size1;
5099      end_match_1 = end1;	/* Just to give it a value.  */
5100    }
5101  else
5102    {
5103      if (stop < size1)
5104	{
5105	  /* Only match within string1.  */
5106	  end_match_1 = string1 + stop;
5107	  /* BEWARE!
5108	     When we reach end_match_1, PREFETCH normally switches to string2.
5109	     But in the present case, this means that just doing a PREFETCH
5110	     makes us jump from `stop' to `gap' within the string.
5111	     What we really want here is for the search to stop as
5112	     soon as we hit end_match_1.  That's why we set end_match_2
5113	     to end_match_1 (since PREFETCH fails as soon as we hit
5114	     end_match_2).  */
5115	  end_match_2 = end_match_1;
5116	}
5117      else
5118	{ /* It's important to use this code when stop == size so that
5119	     moving `d' from end1 to string2 will not prevent the d == dend
5120	     check from catching the end of string.  */
5121	  end_match_1 = end1;
5122	  end_match_2 = string2 + stop - size1;
5123	}
5124      d = string1 + pos;
5125      dend = end_match_1;
5126    }
5127
5128  DEBUG_PRINT1 ("The compiled pattern is: ");
5129  DEBUG_PRINT_COMPILED_PATTERN (bufp, p, pend);
5130  DEBUG_PRINT1 ("The string to match is: `");
5131  DEBUG_PRINT_DOUBLE_STRING (d, string1, size1, string2, size2);
5132  DEBUG_PRINT1 ("'\n");
5133
5134  /* This loops over pattern commands.  It exits by returning from the
5135     function if the match is complete, or it drops through if the match
5136     fails at this starting point in the input data.  */
5137  for (;;)
5138    {
5139      DEBUG_PRINT2 ("\n%p: ", p);
5140
5141      if (p == pend)
5142	{ /* End of pattern means we might have succeeded.  */
5143	  DEBUG_PRINT1 ("end of pattern ... ");
5144
5145	  /* If we haven't matched the entire string, and we want the
5146	     longest match, try backtracking.  */
5147	  if (d != end_match_2)
5148	    {
5149	      /* 1 if this match ends in the same string (string1 or string2)
5150		 as the best previous match.  */
5151	      boolean same_str_p = (FIRST_STRING_P (match_end)
5152				    == FIRST_STRING_P (d));
5153	      /* 1 if this match is the best seen so far.  */
5154	      boolean best_match_p;
5155
5156	      /* AIX compiler got confused when this was combined
5157		 with the previous declaration.  */
5158	      if (same_str_p)
5159		best_match_p = d > match_end;
5160	      else
5161		best_match_p = !FIRST_STRING_P (d);
5162
5163	      DEBUG_PRINT1 ("backtracking.\n");
5164
5165	      if (!FAIL_STACK_EMPTY ())
5166		{ /* More failure points to try.  */
5167
5168		  /* If exceeds best match so far, save it.  */
5169		  if (!best_regs_set || best_match_p)
5170		    {
5171		      best_regs_set = true;
5172		      match_end = d;
5173
5174		      DEBUG_PRINT1 ("\nSAVING match as best so far.\n");
5175
5176		      for (reg = 1; reg < num_regs; reg++)
5177			{
5178			  best_regstart[reg] = regstart[reg];
5179			  best_regend[reg] = regend[reg];
5180			}
5181		    }
5182		  goto fail;
5183		}
5184
5185	      /* If no failure points, don't restore garbage.  And if
5186		 last match is real best match, don't restore second
5187		 best one. */
5188	      else if (best_regs_set && !best_match_p)
5189		{
5190		restore_best_regs:
5191		  /* Restore best match.  It may happen that `dend ==
5192		     end_match_1' while the restored d is in string2.
5193		     For example, the pattern `x.*y.*z' against the
5194		     strings `x-' and `y-z-', if the two strings are
5195		     not consecutive in memory.  */
5196		  DEBUG_PRINT1 ("Restoring best registers.\n");
5197
5198		  d = match_end;
5199		  dend = ((d >= string1 && d <= end1)
5200			   ? end_match_1 : end_match_2);
5201
5202		  for (reg = 1; reg < num_regs; reg++)
5203		    {
5204		      regstart[reg] = best_regstart[reg];
5205		      regend[reg] = best_regend[reg];
5206		    }
5207		}
5208	    } /* d != end_match_2 */
5209
5210	succeed_label:
5211	  DEBUG_PRINT1 ("Accepting match.\n");
5212
5213	  /* If caller wants register contents data back, do it.  */
5214	  if (regs && !bufp->no_sub)
5215	    {
5216	      /* Have the register data arrays been allocated?	*/
5217	      if (bufp->regs_allocated == REGS_UNALLOCATED)
5218		{ /* No.  So allocate them with malloc.  We need one
5219		     extra element beyond `num_regs' for the `-1' marker
5220		     GNU code uses.  */
5221		  regs->num_regs = MAX (RE_NREGS, num_regs + 1);
5222		  regs->start = TALLOC (regs->num_regs, regoff_t);
5223		  regs->end = TALLOC (regs->num_regs, regoff_t);
5224		  if (regs->start == NULL || regs->end == NULL)
5225		    {
5226		      FREE_VARIABLES ();
5227		      return -2;
5228		    }
5229		  bufp->regs_allocated = REGS_REALLOCATE;
5230		}
5231	      else if (bufp->regs_allocated == REGS_REALLOCATE)
5232		{ /* Yes.  If we need more elements than were already
5233		     allocated, reallocate them.  If we need fewer, just
5234		     leave it alone.  */
5235		  if (regs->num_regs < num_regs + 1)
5236		    {
5237		      regs->num_regs = num_regs + 1;
5238		      RETALLOC (regs->start, regs->num_regs, regoff_t);
5239		      RETALLOC (regs->end, regs->num_regs, regoff_t);
5240		      if (regs->start == NULL || regs->end == NULL)
5241			{
5242			  FREE_VARIABLES ();
5243			  return -2;
5244			}
5245		    }
5246		}
5247	      else
5248		{
5249		  /* These braces fend off a "empty body in an else-statement"
5250		     warning under GCC when assert expands to nothing.  */
5251		  assert (bufp->regs_allocated == REGS_FIXED);
5252		}
5253
5254	      /* Convert the pointer data in `regstart' and `regend' to
5255		 indices.  Register zero has to be set differently,
5256		 since we haven't kept track of any info for it.  */
5257	      if (regs->num_regs > 0)
5258		{
5259		  regs->start[0] = pos;
5260		  regs->end[0] = POINTER_TO_OFFSET (d);
5261		}
5262
5263	      /* Go through the first `min (num_regs, regs->num_regs)'
5264		 registers, since that is all we initialized.  */
5265	      for (reg = 1; reg < MIN (num_regs, regs->num_regs); reg++)
5266		{
5267		  if (REG_UNSET (regstart[reg]) || REG_UNSET (regend[reg]))
5268		    regs->start[reg] = regs->end[reg] = -1;
5269		  else
5270		    {
5271		      regs->start[reg]
5272			= (regoff_t) POINTER_TO_OFFSET (regstart[reg]);
5273		      regs->end[reg]
5274			= (regoff_t) POINTER_TO_OFFSET (regend[reg]);
5275		    }
5276		}
5277
5278	      /* If the regs structure we return has more elements than
5279		 were in the pattern, set the extra elements to -1.  If
5280		 we (re)allocated the registers, this is the case,
5281		 because we always allocate enough to have at least one
5282		 -1 at the end.  */
5283	      for (reg = num_regs; reg < regs->num_regs; reg++)
5284		regs->start[reg] = regs->end[reg] = -1;
5285	    } /* regs && !bufp->no_sub */
5286
5287	  DEBUG_PRINT4 ("%u failure points pushed, %u popped (%u remain).\n",
5288			nfailure_points_pushed, nfailure_points_popped,
5289			nfailure_points_pushed - nfailure_points_popped);
5290	  DEBUG_PRINT2 ("%u registers pushed.\n", num_regs_pushed);
5291
5292	  mcnt = POINTER_TO_OFFSET (d) - pos;
5293
5294	  DEBUG_PRINT2 ("Returning %d from re_match_2.\n", mcnt);
5295
5296	  FREE_VARIABLES ();
5297	  return mcnt;
5298	}
5299
5300      /* Otherwise match next pattern command.  */
5301      switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++))
5302	{
5303	/* Ignore these.  Used to ignore the n of succeed_n's which
5304	   currently have n == 0.  */
5305	case no_op:
5306	  DEBUG_PRINT1 ("EXECUTING no_op.\n");
5307	  break;
5308
5309	case succeed:
5310	  DEBUG_PRINT1 ("EXECUTING succeed.\n");
5311	  goto succeed_label;
5312
5313	/* Match the next n pattern characters exactly.  The following
5314	   byte in the pattern defines n, and the n bytes after that
5315	   are the characters to match.  */
5316	case exactn:
5317	  mcnt = *p++;
5318	  DEBUG_PRINT2 ("EXECUTING exactn %d.\n", mcnt);
5319
5320	  /* Remember the start point to rollback upon failure.  */
5321	  dfail = d;
5322
5323	  /* This is written out as an if-else so we don't waste time
5324	     testing `translate' inside the loop.  */
5325	  if (RE_TRANSLATE_P (translate))
5326	    {
5327	      if (multibyte)
5328		do
5329		  {
5330		    int pat_charlen, buf_charlen;
5331		    unsigned int pat_ch, buf_ch;
5332
5333		    PREFETCH ();
5334		    pat_ch = STRING_CHAR_AND_LENGTH (p, pend - p, pat_charlen);
5335		    buf_ch = STRING_CHAR_AND_LENGTH (d, dend - d, buf_charlen);
5336
5337		    if (RE_TRANSLATE (translate, buf_ch)
5338			!= pat_ch)
5339		      {
5340			d = dfail;
5341			goto fail;
5342		      }
5343
5344		    p += pat_charlen;
5345		    d += buf_charlen;
5346		    mcnt -= pat_charlen;
5347		  }
5348		while (mcnt > 0);
5349	      else
5350		do
5351		  {
5352		    /* Avoid compiler whining about comparison being
5353		       always true.  */
5354		    int di;
5355
5356		    PREFETCH ();
5357		    di = *d;
5358		    if (RE_TRANSLATE (translate, di) != *p++)
5359		      {
5360			d = dfail;
5361			goto fail;
5362		      }
5363		    d++;
5364		  }
5365		while (--mcnt);
5366	    }
5367	  else
5368	    {
5369	      do
5370		{
5371		  PREFETCH ();
5372		  if (*d++ != *p++)
5373		    {
5374		      d = dfail;
5375		      goto fail;
5376		    }
5377		}
5378	      while (--mcnt);
5379	    }
5380	  break;
5381
5382
5383	/* Match any character except possibly a newline or a null.  */
5384	case anychar:
5385	  {
5386	    int buf_charlen;
5387	    re_wchar_t buf_ch;
5388
5389	    DEBUG_PRINT1 ("EXECUTING anychar.\n");
5390
5391	    PREFETCH ();
5392	    buf_ch = RE_STRING_CHAR_AND_LENGTH (d, dend - d, buf_charlen);
5393	    buf_ch = TRANSLATE (buf_ch);
5394
5395	    if ((!(bufp->syntax & RE_DOT_NEWLINE)
5396		 && buf_ch == '\n')
5397		|| ((bufp->syntax & RE_DOT_NOT_NULL)
5398		    && buf_ch == '\000'))
5399	      goto fail;
5400
5401	    DEBUG_PRINT2 ("  Matched `%d'.\n", *d);
5402	    d += buf_charlen;
5403	  }
5404	  break;
5405
5406
5407	case charset:
5408	case charset_not:
5409	  {
5410	    register unsigned int c;
5411	    boolean not = (re_opcode_t) *(p - 1) == charset_not;
5412	    int len;
5413
5414	    /* Start of actual range_table, or end of bitmap if there is no
5415	       range table.  */
5416	    re_char *range_table;
5417
5418	    /* Nonzero if there is a range table.  */
5419	    int range_table_exists;
5420
5421	    /* Number of ranges of range table.  This is not included
5422	       in the initial byte-length of the command.  */
5423	    int count = 0;
5424
5425	    DEBUG_PRINT2 ("EXECUTING charset%s.\n", not ? "_not" : "");
5426
5427	    range_table_exists = CHARSET_RANGE_TABLE_EXISTS_P (&p[-1]);
5428
5429	    if (range_table_exists)
5430	      {
5431		range_table = CHARSET_RANGE_TABLE (&p[-1]); /* Past the bitmap.  */
5432		EXTRACT_NUMBER_AND_INCR (count, range_table);
5433	      }
5434
5435	    PREFETCH ();
5436	    c = RE_STRING_CHAR_AND_LENGTH (d, dend - d, len);
5437	    c = TRANSLATE (c); /* The character to match.  */
5438
5439	    if (SINGLE_BYTE_CHAR_P (c))
5440	      {			/* Lookup bitmap.  */
5441		/* Cast to `unsigned' instead of `unsigned char' in
5442		   case the bit list is a full 32 bytes long.  */
5443		if (c < (unsigned) (CHARSET_BITMAP_SIZE (&p[-1]) * BYTEWIDTH)
5444		    && p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
5445		  not = !not;
5446	      }
5447#ifdef emacs
5448	    else if (range_table_exists)
5449	      {
5450		int class_bits = CHARSET_RANGE_TABLE_BITS (&p[-1]);
5451
5452		if (  (class_bits & BIT_LOWER && ISLOWER (c))
5453		    | (class_bits & BIT_MULTIBYTE)
5454		    | (class_bits & BIT_PUNCT && ISPUNCT (c))
5455		    | (class_bits & BIT_SPACE && ISSPACE (c))
5456		    | (class_bits & BIT_UPPER && ISUPPER (c))
5457		    | (class_bits & BIT_WORD  && ISWORD (c)))
5458		  not = !not;
5459		else
5460		  CHARSET_LOOKUP_RANGE_TABLE_RAW (not, c, range_table, count);
5461	      }
5462#endif /* emacs */
5463
5464	    if (range_table_exists)
5465	      p = CHARSET_RANGE_TABLE_END (range_table, count);
5466	    else
5467	      p += CHARSET_BITMAP_SIZE (&p[-1]) + 1;
5468
5469	    if (!not) goto fail;
5470
5471	    d += len;
5472	    break;
5473	  }
5474
5475
5476	/* The beginning of a group is represented by start_memory.
5477	   The argument is the register number.  The text
5478	   matched within the group is recorded (in the internal
5479	   registers data structure) under the register number.  */
5480	case start_memory:
5481	  DEBUG_PRINT2 ("EXECUTING start_memory %d:\n", *p);
5482
5483	  /* In case we need to undo this operation (via backtracking).  */
5484	  PUSH_FAILURE_REG ((unsigned int)*p);
5485
5486	  regstart[*p] = d;
5487	  regend[*p] = NULL;	/* probably unnecessary.  -sm  */
5488	  DEBUG_PRINT2 ("  regstart: %d\n", POINTER_TO_OFFSET (regstart[*p]));
5489
5490	  /* Move past the register number and inner group count.  */
5491	  p += 1;
5492	  break;
5493
5494
5495	/* The stop_memory opcode represents the end of a group.  Its
5496	   argument is the same as start_memory's: the register number.  */
5497	case stop_memory:
5498	  DEBUG_PRINT2 ("EXECUTING stop_memory %d:\n", *p);
5499
5500	  assert (!REG_UNSET (regstart[*p]));
5501	  /* Strictly speaking, there should be code such as:
5502
5503		assert (REG_UNSET (regend[*p]));
5504		PUSH_FAILURE_REGSTOP ((unsigned int)*p);
5505
5506	     But the only info to be pushed is regend[*p] and it is known to
5507	     be UNSET, so there really isn't anything to push.
5508	     Not pushing anything, on the other hand deprives us from the
5509	     guarantee that regend[*p] is UNSET since undoing this operation
5510	     will not reset its value properly.  This is not important since
5511	     the value will only be read on the next start_memory or at
5512	     the very end and both events can only happen if this stop_memory
5513	     is *not* undone.  */
5514
5515	  regend[*p] = d;
5516	  DEBUG_PRINT2 ("      regend: %d\n", POINTER_TO_OFFSET (regend[*p]));
5517
5518	  /* Move past the register number and the inner group count.  */
5519	  p += 1;
5520	  break;
5521
5522
5523	/* \<digit> has been turned into a `duplicate' command which is
5524	   followed by the numeric value of <digit> as the register number.  */
5525	case duplicate:
5526	  {
5527	    register re_char *d2, *dend2;
5528	    int regno = *p++;	/* Get which register to match against.  */
5529	    DEBUG_PRINT2 ("EXECUTING duplicate %d.\n", regno);
5530
5531	    /* Can't back reference a group which we've never matched.  */
5532	    if (REG_UNSET (regstart[regno]) || REG_UNSET (regend[regno]))
5533	      goto fail;
5534
5535	    /* Where in input to try to start matching.  */
5536	    d2 = regstart[regno];
5537
5538	    /* Remember the start point to rollback upon failure.  */
5539	    dfail = d;
5540
5541	    /* Where to stop matching; if both the place to start and
5542	       the place to stop matching are in the same string, then
5543	       set to the place to stop, otherwise, for now have to use
5544	       the end of the first string.  */
5545
5546	    dend2 = ((FIRST_STRING_P (regstart[regno])
5547		      == FIRST_STRING_P (regend[regno]))
5548		     ? regend[regno] : end_match_1);
5549	    for (;;)
5550	      {
5551		/* If necessary, advance to next segment in register
5552		   contents.  */
5553		while (d2 == dend2)
5554		  {
5555		    if (dend2 == end_match_2) break;
5556		    if (dend2 == regend[regno]) break;
5557
5558		    /* End of string1 => advance to string2. */
5559		    d2 = string2;
5560		    dend2 = regend[regno];
5561		  }
5562		/* At end of register contents => success */
5563		if (d2 == dend2) break;
5564
5565		/* If necessary, advance to next segment in data.  */
5566		PREFETCH ();
5567
5568		/* How many characters left in this segment to match.  */
5569		mcnt = dend - d;
5570
5571		/* Want how many consecutive characters we can match in
5572		   one shot, so, if necessary, adjust the count.  */
5573		if (mcnt > dend2 - d2)
5574		  mcnt = dend2 - d2;
5575
5576		/* Compare that many; failure if mismatch, else move
5577		   past them.  */
5578		if (RE_TRANSLATE_P (translate)
5579		    ? bcmp_translate (d, d2, mcnt, translate, multibyte)
5580		    : memcmp (d, d2, mcnt))
5581		  {
5582		    d = dfail;
5583		    goto fail;
5584		  }
5585		d += mcnt, d2 += mcnt;
5586	      }
5587	  }
5588	  break;
5589
5590
5591	/* begline matches the empty string at the beginning of the string
5592	   (unless `not_bol' is set in `bufp'), and after newlines.  */
5593	case begline:
5594	  DEBUG_PRINT1 ("EXECUTING begline.\n");
5595
5596	  if (AT_STRINGS_BEG (d))
5597	    {
5598	      if (!bufp->not_bol) break;
5599	    }
5600	  else
5601	    {
5602	      unsigned char c;
5603	      GET_CHAR_BEFORE_2 (c, d, string1, end1, string2, end2);
5604	      if (c == '\n')
5605		break;
5606	    }
5607	  /* In all other cases, we fail.  */
5608	  goto fail;
5609
5610
5611	/* endline is the dual of begline.  */
5612	case endline:
5613	  DEBUG_PRINT1 ("EXECUTING endline.\n");
5614
5615	  if (AT_STRINGS_END (d))
5616	    {
5617	      if (!bufp->not_eol) break;
5618	    }
5619	  else
5620	    {
5621	      PREFETCH_NOLIMIT ();
5622	      if (*d == '\n')
5623		break;
5624	    }
5625	  goto fail;
5626
5627
5628	/* Match at the very beginning of the data.  */
5629	case begbuf:
5630	  DEBUG_PRINT1 ("EXECUTING begbuf.\n");
5631	  if (AT_STRINGS_BEG (d))
5632	    break;
5633	  goto fail;
5634
5635
5636	/* Match at the very end of the data.  */
5637	case endbuf:
5638	  DEBUG_PRINT1 ("EXECUTING endbuf.\n");
5639	  if (AT_STRINGS_END (d))
5640	    break;
5641	  goto fail;
5642
5643
5644	/* on_failure_keep_string_jump is used to optimize `.*\n'.  It
5645	   pushes NULL as the value for the string on the stack.  Then
5646	   `POP_FAILURE_POINT' will keep the current value for the
5647	   string, instead of restoring it.  To see why, consider
5648	   matching `foo\nbar' against `.*\n'.  The .* matches the foo;
5649	   then the . fails against the \n.  But the next thing we want
5650	   to do is match the \n against the \n; if we restored the
5651	   string value, we would be back at the foo.
5652
5653	   Because this is used only in specific cases, we don't need to
5654	   check all the things that `on_failure_jump' does, to make
5655	   sure the right things get saved on the stack.  Hence we don't
5656	   share its code.  The only reason to push anything on the
5657	   stack at all is that otherwise we would have to change
5658	   `anychar's code to do something besides goto fail in this
5659	   case; that seems worse than this.  */
5660	case on_failure_keep_string_jump:
5661	  EXTRACT_NUMBER_AND_INCR (mcnt, p);
5662	  DEBUG_PRINT3 ("EXECUTING on_failure_keep_string_jump %d (to %p):\n",
5663			mcnt, p + mcnt);
5664
5665	  PUSH_FAILURE_POINT (p - 3, NULL);
5666	  break;
5667
5668	  /* A nasty loop is introduced by the non-greedy *? and +?.
5669	     With such loops, the stack only ever contains one failure point
5670	     at a time, so that a plain on_failure_jump_loop kind of
5671	     cycle detection cannot work.  Worse yet, such a detection
5672	     can not only fail to detect a cycle, but it can also wrongly
5673	     detect a cycle (between different instantiations of the same
5674	     loop).
5675	     So the method used for those nasty loops is a little different:
5676	     We use a special cycle-detection-stack-frame which is pushed
5677	     when the on_failure_jump_nastyloop failure-point is *popped*.
5678	     This special frame thus marks the beginning of one iteration
5679	     through the loop and we can hence easily check right here
5680	     whether something matched between the beginning and the end of
5681	     the loop.  */
5682	case on_failure_jump_nastyloop:
5683	  EXTRACT_NUMBER_AND_INCR (mcnt, p);
5684	  DEBUG_PRINT3 ("EXECUTING on_failure_jump_nastyloop %d (to %p):\n",
5685			mcnt, p + mcnt);
5686
5687	  assert ((re_opcode_t)p[-4] == no_op);
5688	  {
5689	    int cycle = 0;
5690	    CHECK_INFINITE_LOOP (p - 4, d);
5691	    if (!cycle)
5692	      /* If there's a cycle, just continue without pushing
5693		 this failure point.  The failure point is the "try again"
5694		 option, which shouldn't be tried.
5695		 We want (x?)*?y\1z to match both xxyz and xxyxz.  */
5696	      PUSH_FAILURE_POINT (p - 3, d);
5697	  }
5698	  break;
5699
5700	  /* Simple loop detecting on_failure_jump:  just check on the
5701	     failure stack if the same spot was already hit earlier.  */
5702	case on_failure_jump_loop:
5703	on_failure:
5704	  EXTRACT_NUMBER_AND_INCR (mcnt, p);
5705	  DEBUG_PRINT3 ("EXECUTING on_failure_jump_loop %d (to %p):\n",
5706			mcnt, p + mcnt);
5707	  {
5708	    int cycle = 0;
5709	    CHECK_INFINITE_LOOP (p - 3, d);
5710	    if (cycle)
5711	      /* If there's a cycle, get out of the loop, as if the matching
5712		 had failed.  We used to just `goto fail' here, but that was
5713		 aborting the search a bit too early: we want to keep the
5714		 empty-loop-match and keep matching after the loop.
5715		 We want (x?)*y\1z to match both xxyz and xxyxz.  */
5716	      p += mcnt;
5717	    else
5718	      PUSH_FAILURE_POINT (p - 3, d);
5719	  }
5720	  break;
5721
5722
5723	/* Uses of on_failure_jump:
5724
5725	   Each alternative starts with an on_failure_jump that points
5726	   to the beginning of the next alternative.  Each alternative
5727	   except the last ends with a jump that in effect jumps past
5728	   the rest of the alternatives.  (They really jump to the
5729	   ending jump of the following alternative, because tensioning
5730	   these jumps is a hassle.)
5731
5732	   Repeats start with an on_failure_jump that points past both
5733	   the repetition text and either the following jump or
5734	   pop_failure_jump back to this on_failure_jump.  */
5735	case on_failure_jump:
5736	  EXTRACT_NUMBER_AND_INCR (mcnt, p);
5737	  DEBUG_PRINT3 ("EXECUTING on_failure_jump %d (to %p):\n",
5738			mcnt, p + mcnt);
5739
5740	  PUSH_FAILURE_POINT (p -3, d);
5741	  break;
5742
5743	/* This operation is used for greedy *.
5744	   Compare the beginning of the repeat with what in the
5745	   pattern follows its end. If we can establish that there
5746	   is nothing that they would both match, i.e., that we
5747	   would have to backtrack because of (as in, e.g., `a*a')
5748	   then we can use a non-backtracking loop based on
5749	   on_failure_keep_string_jump instead of on_failure_jump.  */
5750	case on_failure_jump_smart:
5751	  EXTRACT_NUMBER_AND_INCR (mcnt, p);
5752	  DEBUG_PRINT3 ("EXECUTING on_failure_jump_smart %d (to %p).\n",
5753			mcnt, p + mcnt);
5754	  {
5755	    re_char *p1 = p; /* Next operation.  */
5756	    /* Here, we discard `const', making re_match non-reentrant.  */
5757	    unsigned char *p2 = (unsigned char*) p + mcnt; /* Jump dest.  */
5758	    unsigned char *p3 = (unsigned char*) p - 3; /* opcode location.  */
5759
5760	    p -= 3;		/* Reset so that we will re-execute the
5761				   instruction once it's been changed. */
5762
5763	    EXTRACT_NUMBER (mcnt, p2 - 2);
5764
5765	    /* Ensure this is a indeed the trivial kind of loop
5766	       we are expecting.  */
5767	    assert (skip_one_char (p1) == p2 - 3);
5768	    assert ((re_opcode_t) p2[-3] == jump && p2 + mcnt == p);
5769	    DEBUG_STATEMENT (debug += 2);
5770	    if (mutually_exclusive_p (bufp, p1, p2))
5771	      {
5772		/* Use a fast `on_failure_keep_string_jump' loop.  */
5773		DEBUG_PRINT1 ("  smart exclusive => fast loop.\n");
5774		*p3 = (unsigned char) on_failure_keep_string_jump;
5775		STORE_NUMBER (p2 - 2, mcnt + 3);
5776	      }
5777	    else
5778	      {
5779		/* Default to a safe `on_failure_jump' loop.  */
5780		DEBUG_PRINT1 ("  smart default => slow loop.\n");
5781		*p3 = (unsigned char) on_failure_jump;
5782	      }
5783	    DEBUG_STATEMENT (debug -= 2);
5784	  }
5785	  break;
5786
5787	/* Unconditionally jump (without popping any failure points).  */
5788	case jump:
5789	unconditional_jump:
5790	  IMMEDIATE_QUIT_CHECK;
5791	  EXTRACT_NUMBER_AND_INCR (mcnt, p);	/* Get the amount to jump.  */
5792	  DEBUG_PRINT2 ("EXECUTING jump %d ", mcnt);
5793	  p += mcnt;				/* Do the jump.  */
5794	  DEBUG_PRINT2 ("(to %p).\n", p);
5795	  break;
5796
5797
5798	/* Have to succeed matching what follows at least n times.
5799	   After that, handle like `on_failure_jump'.  */
5800	case succeed_n:
5801	  /* Signedness doesn't matter since we only compare MCNT to 0.  */
5802	  EXTRACT_NUMBER (mcnt, p + 2);
5803	  DEBUG_PRINT2 ("EXECUTING succeed_n %d.\n", mcnt);
5804
5805	  /* Originally, mcnt is how many times we HAVE to succeed.  */
5806	  if (mcnt != 0)
5807	    {
5808	      /* Here, we discard `const', making re_match non-reentrant.  */
5809	      unsigned char *p2 = (unsigned char*) p + 2; /* counter loc.  */
5810	      mcnt--;
5811	      p += 4;
5812	      PUSH_NUMBER (p2, mcnt);
5813	    }
5814	  else
5815	    /* The two bytes encoding mcnt == 0 are two no_op opcodes.  */
5816	    goto on_failure;
5817	  break;
5818
5819	case jump_n:
5820	  /* Signedness doesn't matter since we only compare MCNT to 0.  */
5821	  EXTRACT_NUMBER (mcnt, p + 2);
5822	  DEBUG_PRINT2 ("EXECUTING jump_n %d.\n", mcnt);
5823
5824	  /* Originally, this is how many times we CAN jump.  */
5825	  if (mcnt != 0)
5826	    {
5827	       /* Here, we discard `const', making re_match non-reentrant.  */
5828	      unsigned char *p2 = (unsigned char*) p + 2; /* counter loc.  */
5829	      mcnt--;
5830	      PUSH_NUMBER (p2, mcnt);
5831	      goto unconditional_jump;
5832	    }
5833	  /* If don't have to jump any more, skip over the rest of command.  */
5834	  else
5835	    p += 4;
5836	  break;
5837
5838	case set_number_at:
5839	  {
5840	    unsigned char *p2;	/* Location of the counter.  */
5841	    DEBUG_PRINT1 ("EXECUTING set_number_at.\n");
5842
5843	    EXTRACT_NUMBER_AND_INCR (mcnt, p);
5844	    /* Here, we discard `const', making re_match non-reentrant.  */
5845	    p2 = (unsigned char*) p + mcnt;
5846	    /* Signedness doesn't matter since we only copy MCNT's bits .  */
5847	    EXTRACT_NUMBER_AND_INCR (mcnt, p);
5848	    DEBUG_PRINT3 ("  Setting %p to %d.\n", p2, mcnt);
5849	    PUSH_NUMBER (p2, mcnt);
5850	    break;
5851	  }
5852
5853	case wordbound:
5854	case notwordbound:
5855	  not = (re_opcode_t) *(p - 1) == notwordbound;
5856	  DEBUG_PRINT2 ("EXECUTING %swordbound.\n", not?"not":"");
5857
5858	  /* We SUCCEED (or FAIL) in one of the following cases: */
5859
5860	  /* Case 1: D is at the beginning or the end of string.  */
5861	  if (AT_STRINGS_BEG (d) || AT_STRINGS_END (d))
5862	    not = !not;
5863	  else
5864	    {
5865	      /* C1 is the character before D, S1 is the syntax of C1, C2
5866		 is the character at D, and S2 is the syntax of C2.  */
5867	      re_wchar_t c1, c2;
5868	      int s1, s2;
5869#ifdef emacs
5870	      int offset = PTR_TO_OFFSET (d - 1);
5871	      int charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset);
5872	      UPDATE_SYNTAX_TABLE (charpos);
5873#endif
5874	      GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2);
5875	      s1 = SYNTAX (c1);
5876#ifdef emacs
5877	      UPDATE_SYNTAX_TABLE_FORWARD (charpos + 1);
5878#endif
5879	      PREFETCH_NOLIMIT ();
5880	      c2 = RE_STRING_CHAR (d, dend - d);
5881	      s2 = SYNTAX (c2);
5882
5883	      if (/* Case 2: Only one of S1 and S2 is Sword.  */
5884		  ((s1 == Sword) != (s2 == Sword))
5885		  /* Case 3: Both of S1 and S2 are Sword, and macro
5886		     WORD_BOUNDARY_P (C1, C2) returns nonzero.  */
5887		  || ((s1 == Sword) && WORD_BOUNDARY_P (c1, c2)))
5888		not = !not;
5889	    }
5890	  if (not)
5891	    break;
5892	  else
5893	    goto fail;
5894
5895	case wordbeg:
5896	  DEBUG_PRINT1 ("EXECUTING wordbeg.\n");
5897
5898	  /* We FAIL in one of the following cases: */
5899
5900	  /* Case 1: D is at the end of string.  */
5901	  if (AT_STRINGS_END (d))
5902	    goto fail;
5903	  else
5904	    {
5905	      /* C1 is the character before D, S1 is the syntax of C1, C2
5906		 is the character at D, and S2 is the syntax of C2.  */
5907	      re_wchar_t c1, c2;
5908	      int s1, s2;
5909#ifdef emacs
5910	      int offset = PTR_TO_OFFSET (d);
5911	      int charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset);
5912	      UPDATE_SYNTAX_TABLE (charpos);
5913#endif
5914	      PREFETCH ();
5915	      c2 = RE_STRING_CHAR (d, dend - d);
5916	      s2 = SYNTAX (c2);
5917
5918	      /* Case 2: S2 is not Sword. */
5919	      if (s2 != Sword)
5920		goto fail;
5921
5922	      /* Case 3: D is not at the beginning of string ... */
5923	      if (!AT_STRINGS_BEG (d))
5924		{
5925		  GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2);
5926#ifdef emacs
5927		  UPDATE_SYNTAX_TABLE_BACKWARD (charpos - 1);
5928#endif
5929		  s1 = SYNTAX (c1);
5930
5931		  /* ... and S1 is Sword, and WORD_BOUNDARY_P (C1, C2)
5932		     returns 0.  */
5933		  if ((s1 == Sword) && !WORD_BOUNDARY_P (c1, c2))
5934		    goto fail;
5935		}
5936	    }
5937	  break;
5938
5939	case wordend:
5940	  DEBUG_PRINT1 ("EXECUTING wordend.\n");
5941
5942	  /* We FAIL in one of the following cases: */
5943
5944	  /* Case 1: D is at the beginning of string.  */
5945	  if (AT_STRINGS_BEG (d))
5946	    goto fail;
5947	  else
5948	    {
5949	      /* C1 is the character before D, S1 is the syntax of C1, C2
5950		 is the character at D, and S2 is the syntax of C2.  */
5951	      re_wchar_t c1, c2;
5952	      int s1, s2;
5953#ifdef emacs
5954	      int offset = PTR_TO_OFFSET (d) - 1;
5955	      int charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset);
5956	      UPDATE_SYNTAX_TABLE (charpos);
5957#endif
5958	      GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2);
5959	      s1 = SYNTAX (c1);
5960
5961	      /* Case 2: S1 is not Sword.  */
5962	      if (s1 != Sword)
5963		goto fail;
5964
5965	      /* Case 3: D is not at the end of string ... */
5966	      if (!AT_STRINGS_END (d))
5967		{
5968		  PREFETCH_NOLIMIT ();
5969		  c2 = RE_STRING_CHAR (d, dend - d);
5970#ifdef emacs
5971		  UPDATE_SYNTAX_TABLE_FORWARD (charpos + 1);
5972#endif
5973		  s2 = SYNTAX (c2);
5974
5975		  /* ... and S2 is Sword, and WORD_BOUNDARY_P (C1, C2)
5976		     returns 0.  */
5977		  if ((s2 == Sword) && !WORD_BOUNDARY_P (c1, c2))
5978	  goto fail;
5979		}
5980	    }
5981	  break;
5982
5983	case symbeg:
5984	  DEBUG_PRINT1 ("EXECUTING symbeg.\n");
5985
5986	  /* We FAIL in one of the following cases: */
5987
5988	  /* Case 1: D is at the end of string.  */
5989	  if (AT_STRINGS_END (d))
5990	    goto fail;
5991	  else
5992	    {
5993	      /* C1 is the character before D, S1 is the syntax of C1, C2
5994		 is the character at D, and S2 is the syntax of C2.  */
5995	      re_wchar_t c1, c2;
5996	      int s1, s2;
5997#ifdef emacs
5998	      int offset = PTR_TO_OFFSET (d);
5999	      int charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset);
6000	      UPDATE_SYNTAX_TABLE (charpos);
6001#endif
6002	      PREFETCH ();
6003	      c2 = RE_STRING_CHAR (d, dend - d);
6004	      s2 = SYNTAX (c2);
6005
6006	      /* Case 2: S2 is neither Sword nor Ssymbol. */
6007	      if (s2 != Sword && s2 != Ssymbol)
6008		goto fail;
6009
6010	      /* Case 3: D is not at the beginning of string ... */
6011	      if (!AT_STRINGS_BEG (d))
6012		{
6013		  GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2);
6014#ifdef emacs
6015		  UPDATE_SYNTAX_TABLE_BACKWARD (charpos - 1);
6016#endif
6017		  s1 = SYNTAX (c1);
6018
6019		  /* ... and S1 is Sword or Ssymbol.  */
6020		  if (s1 == Sword || s1 == Ssymbol)
6021		    goto fail;
6022		}
6023	    }
6024	  break;
6025
6026	case symend:
6027	  DEBUG_PRINT1 ("EXECUTING symend.\n");
6028
6029	  /* We FAIL in one of the following cases: */
6030
6031	  /* Case 1: D is at the beginning of string.  */
6032	  if (AT_STRINGS_BEG (d))
6033	    goto fail;
6034	  else
6035	    {
6036	      /* C1 is the character before D, S1 is the syntax of C1, C2
6037		 is the character at D, and S2 is the syntax of C2.  */
6038	      re_wchar_t c1, c2;
6039	      int s1, s2;
6040#ifdef emacs
6041	      int offset = PTR_TO_OFFSET (d) - 1;
6042	      int charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset);
6043	      UPDATE_SYNTAX_TABLE (charpos);
6044#endif
6045	      GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2);
6046	      s1 = SYNTAX (c1);
6047
6048	      /* Case 2: S1 is neither Ssymbol nor Sword.  */
6049	      if (s1 != Sword && s1 != Ssymbol)
6050		goto fail;
6051
6052	      /* Case 3: D is not at the end of string ... */
6053	      if (!AT_STRINGS_END (d))
6054		{
6055		  PREFETCH_NOLIMIT ();
6056		  c2 = RE_STRING_CHAR (d, dend - d);
6057#ifdef emacs
6058		  UPDATE_SYNTAX_TABLE_FORWARD (charpos + 1);
6059#endif
6060		  s2 = SYNTAX (c2);
6061
6062		  /* ... and S2 is Sword or Ssymbol.  */
6063		  if (s2 == Sword || s2 == Ssymbol)
6064                    goto fail;
6065		}
6066	    }
6067	  break;
6068
6069	case syntaxspec:
6070	case notsyntaxspec:
6071	  not = (re_opcode_t) *(p - 1) == notsyntaxspec;
6072	  mcnt = *p++;
6073	  DEBUG_PRINT3 ("EXECUTING %ssyntaxspec %d.\n", not?"not":"", mcnt);
6074	  PREFETCH ();
6075#ifdef emacs
6076	  {
6077	    int offset = PTR_TO_OFFSET (d);
6078	    int pos1 = SYNTAX_TABLE_BYTE_TO_CHAR (offset);
6079	    UPDATE_SYNTAX_TABLE (pos1);
6080	  }
6081#endif
6082	  {
6083	    int len;
6084	    re_wchar_t c;
6085
6086	    c = RE_STRING_CHAR_AND_LENGTH (d, dend - d, len);
6087
6088	    if ((SYNTAX (c) != (enum syntaxcode) mcnt) ^ not)
6089	      goto fail;
6090	    d += len;
6091	  }
6092	  break;
6093
6094#ifdef emacs
6095	case before_dot:
6096	  DEBUG_PRINT1 ("EXECUTING before_dot.\n");
6097	  if (PTR_BYTE_POS (d) >= PT_BYTE)
6098	    goto fail;
6099	  break;
6100
6101	case at_dot:
6102	  DEBUG_PRINT1 ("EXECUTING at_dot.\n");
6103	  if (PTR_BYTE_POS (d) != PT_BYTE)
6104	    goto fail;
6105	  break;
6106
6107	case after_dot:
6108	  DEBUG_PRINT1 ("EXECUTING after_dot.\n");
6109	  if (PTR_BYTE_POS (d) <= PT_BYTE)
6110	    goto fail;
6111	  break;
6112
6113	case categoryspec:
6114	case notcategoryspec:
6115	  not = (re_opcode_t) *(p - 1) == notcategoryspec;
6116	  mcnt = *p++;
6117	  DEBUG_PRINT3 ("EXECUTING %scategoryspec %d.\n", not?"not":"", mcnt);
6118	  PREFETCH ();
6119	  {
6120	    int len;
6121	    re_wchar_t c;
6122
6123	    c = RE_STRING_CHAR_AND_LENGTH (d, dend - d, len);
6124
6125	    if ((!CHAR_HAS_CATEGORY (c, mcnt)) ^ not)
6126	      goto fail;
6127	    d += len;
6128	  }
6129	  break;
6130
6131#endif /* emacs */
6132
6133	default:
6134	  abort ();
6135	}
6136      continue;  /* Successfully executed one pattern command; keep going.  */
6137
6138
6139    /* We goto here if a matching operation fails. */
6140    fail:
6141      IMMEDIATE_QUIT_CHECK;
6142      if (!FAIL_STACK_EMPTY ())
6143	{
6144	  re_char *str, *pat;
6145	  /* A restart point is known.  Restore to that state.  */
6146	  DEBUG_PRINT1 ("\nFAIL:\n");
6147	  POP_FAILURE_POINT (str, pat);
6148	  switch (SWITCH_ENUM_CAST ((re_opcode_t) *pat++))
6149	    {
6150	    case on_failure_keep_string_jump:
6151	      assert (str == NULL);
6152	      goto continue_failure_jump;
6153
6154	    case on_failure_jump_nastyloop:
6155	      assert ((re_opcode_t)pat[-2] == no_op);
6156	      PUSH_FAILURE_POINT (pat - 2, str);
6157	      /* Fallthrough */
6158
6159	    case on_failure_jump_loop:
6160	    case on_failure_jump:
6161	    case succeed_n:
6162	      d = str;
6163	    continue_failure_jump:
6164	      EXTRACT_NUMBER_AND_INCR (mcnt, pat);
6165	      p = pat + mcnt;
6166	      break;
6167
6168	    case no_op:
6169	      /* A special frame used for nastyloops. */
6170	      goto fail;
6171
6172	    default:
6173	      abort();
6174	    }
6175
6176	  assert (p >= bufp->buffer && p <= pend);
6177
6178	  if (d >= string1 && d <= end1)
6179	    dend = end_match_1;
6180	}
6181      else
6182	break;   /* Matching at this starting point really fails.  */
6183    } /* for (;;) */
6184
6185  if (best_regs_set)
6186    goto restore_best_regs;
6187
6188  FREE_VARIABLES ();
6189
6190  return -1;         			/* Failure to match.  */
6191} /* re_match_2 */
6192
6193/* Subroutine definitions for re_match_2.  */
6194
6195/* Return zero if TRANSLATE[S1] and TRANSLATE[S2] are identical for LEN
6196   bytes; nonzero otherwise.  */
6197
6198static int
6199bcmp_translate (s1, s2, len, translate, multibyte)
6200     re_char *s1, *s2;
6201     register int len;
6202     RE_TRANSLATE_TYPE translate;
6203     const int multibyte;
6204{
6205  register re_char *p1 = s1, *p2 = s2;
6206  re_char *p1_end = s1 + len;
6207  re_char *p2_end = s2 + len;
6208
6209  /* FIXME: Checking both p1 and p2 presumes that the two strings might have
6210     different lengths, but relying on a single `len' would break this. -sm  */
6211  while (p1 < p1_end && p2 < p2_end)
6212    {
6213      int p1_charlen, p2_charlen;
6214      re_wchar_t p1_ch, p2_ch;
6215
6216      p1_ch = RE_STRING_CHAR_AND_LENGTH (p1, p1_end - p1, p1_charlen);
6217      p2_ch = RE_STRING_CHAR_AND_LENGTH (p2, p2_end - p2, p2_charlen);
6218
6219      if (RE_TRANSLATE (translate, p1_ch)
6220	  != RE_TRANSLATE (translate, p2_ch))
6221	return 1;
6222
6223      p1 += p1_charlen, p2 += p2_charlen;
6224    }
6225
6226  if (p1 != p1_end || p2 != p2_end)
6227    return 1;
6228
6229  return 0;
6230}
6231
6232/* Entry points for GNU code.  */
6233
6234/* re_compile_pattern is the GNU regular expression compiler: it
6235   compiles PATTERN (of length SIZE) and puts the result in BUFP.
6236   Returns 0 if the pattern was valid, otherwise an error string.
6237
6238   Assumes the `allocated' (and perhaps `buffer') and `translate' fields
6239   are set in BUFP on entry.
6240
6241   We call regex_compile to do the actual compilation.  */
6242
6243const char *
6244re_compile_pattern (pattern, length, bufp)
6245     const char *pattern;
6246     size_t length;
6247     struct re_pattern_buffer *bufp;
6248{
6249  reg_errcode_t ret;
6250
6251#ifdef emacs
6252  gl_state.current_syntax_table = current_buffer->syntax_table;
6253#endif
6254
6255  /* GNU code is written to assume at least RE_NREGS registers will be set
6256     (and at least one extra will be -1).  */
6257  bufp->regs_allocated = REGS_UNALLOCATED;
6258
6259  /* And GNU code determines whether or not to get register information
6260     by passing null for the REGS argument to re_match, etc., not by
6261     setting no_sub.  */
6262  bufp->no_sub = 0;
6263
6264  ret = regex_compile ((re_char*) pattern, length, re_syntax_options, bufp);
6265
6266  if (!ret)
6267    return NULL;
6268  return gettext (re_error_msgid[(int) ret]);
6269}
6270WEAK_ALIAS (__re_compile_pattern, re_compile_pattern)
6271
6272/* Entry points compatible with 4.2 BSD regex library.  We don't define
6273   them unless specifically requested.  */
6274
6275#if defined _REGEX_RE_COMP || defined _LIBC
6276
6277/* BSD has one and only one pattern buffer.  */
6278static struct re_pattern_buffer re_comp_buf;
6279
6280char *
6281# ifdef _LIBC
6282/* Make these definitions weak in libc, so POSIX programs can redefine
6283   these names if they don't use our functions, and still use
6284   regcomp/regexec below without link errors.  */
6285weak_function
6286# endif
6287re_comp (s)
6288    const char *s;
6289{
6290  reg_errcode_t ret;
6291
6292  if (!s)
6293    {
6294      if (!re_comp_buf.buffer)
6295	/* Yes, we're discarding `const' here if !HAVE_LIBINTL.  */
6296	return (char *) gettext ("No previous regular expression");
6297      return 0;
6298    }
6299
6300  if (!re_comp_buf.buffer)
6301    {
6302      re_comp_buf.buffer = (unsigned char *) malloc (200);
6303      if (re_comp_buf.buffer == NULL)
6304	/* Yes, we're discarding `const' here if !HAVE_LIBINTL.  */
6305	return (char *) gettext (re_error_msgid[(int) REG_ESPACE]);
6306      re_comp_buf.allocated = 200;
6307
6308      re_comp_buf.fastmap = (char *) malloc (1 << BYTEWIDTH);
6309      if (re_comp_buf.fastmap == NULL)
6310	/* Yes, we're discarding `const' here if !HAVE_LIBINTL.  */
6311	return (char *) gettext (re_error_msgid[(int) REG_ESPACE]);
6312    }
6313
6314  /* Since `re_exec' always passes NULL for the `regs' argument, we
6315     don't need to initialize the pattern buffer fields which affect it.  */
6316
6317  ret = regex_compile (s, strlen (s), re_syntax_options, &re_comp_buf);
6318
6319  if (!ret)
6320    return NULL;
6321
6322  /* Yes, we're discarding `const' here if !HAVE_LIBINTL.  */
6323  return (char *) gettext (re_error_msgid[(int) ret]);
6324}
6325
6326
6327int
6328# ifdef _LIBC
6329weak_function
6330# endif
6331re_exec (s)
6332    const char *s;
6333{
6334  const int len = strlen (s);
6335  return
6336    0 <= re_search (&re_comp_buf, s, len, 0, len, (struct re_registers *) 0);
6337}
6338#endif /* _REGEX_RE_COMP */
6339
6340/* POSIX.2 functions.  Don't define these for Emacs.  */
6341
6342#ifndef emacs
6343
6344/* regcomp takes a regular expression as a string and compiles it.
6345
6346   PREG is a regex_t *.  We do not expect any fields to be initialized,
6347   since POSIX says we shouldn't.  Thus, we set
6348
6349     `buffer' to the compiled pattern;
6350     `used' to the length of the compiled pattern;
6351     `syntax' to RE_SYNTAX_POSIX_EXTENDED if the
6352       REG_EXTENDED bit in CFLAGS is set; otherwise, to
6353       RE_SYNTAX_POSIX_BASIC;
6354     `fastmap' to an allocated space for the fastmap;
6355     `fastmap_accurate' to zero;
6356     `re_nsub' to the number of subexpressions in PATTERN.
6357
6358   PATTERN is the address of the pattern string.
6359
6360   CFLAGS is a series of bits which affect compilation.
6361
6362     If REG_EXTENDED is set, we use POSIX extended syntax; otherwise, we
6363     use POSIX basic syntax.
6364
6365     If REG_NEWLINE is set, then . and [^...] don't match newline.
6366     Also, regexec will try a match beginning after every newline.
6367
6368     If REG_ICASE is set, then we considers upper- and lowercase
6369     versions of letters to be equivalent when matching.
6370
6371     If REG_NOSUB is set, then when PREG is passed to regexec, that
6372     routine will report only success or failure, and nothing about the
6373     registers.
6374
6375   It returns 0 if it succeeds, nonzero if it doesn't.  (See regex.h for
6376   the return codes and their meanings.)  */
6377
6378int
6379regcomp (preg, pattern, cflags)
6380    regex_t *__restrict preg;
6381    const char *__restrict pattern;
6382    int cflags;
6383{
6384  reg_errcode_t ret;
6385  reg_syntax_t syntax
6386    = (cflags & REG_EXTENDED) ?
6387      RE_SYNTAX_POSIX_EXTENDED : RE_SYNTAX_POSIX_BASIC;
6388
6389  /* regex_compile will allocate the space for the compiled pattern.  */
6390  preg->buffer = 0;
6391  preg->allocated = 0;
6392  preg->used = 0;
6393
6394  /* Try to allocate space for the fastmap.  */
6395  preg->fastmap = (char *) malloc (1 << BYTEWIDTH);
6396
6397  if (cflags & REG_ICASE)
6398    {
6399      unsigned i;
6400
6401      preg->translate
6402	= (RE_TRANSLATE_TYPE) malloc (CHAR_SET_SIZE
6403				      * sizeof (*(RE_TRANSLATE_TYPE)0));
6404      if (preg->translate == NULL)
6405	return (int) REG_ESPACE;
6406
6407      /* Map uppercase characters to corresponding lowercase ones.  */
6408      for (i = 0; i < CHAR_SET_SIZE; i++)
6409	preg->translate[i] = ISUPPER (i) ? TOLOWER (i) : i;
6410    }
6411  else
6412    preg->translate = NULL;
6413
6414  /* If REG_NEWLINE is set, newlines are treated differently.  */
6415  if (cflags & REG_NEWLINE)
6416    { /* REG_NEWLINE implies neither . nor [^...] match newline.  */
6417      syntax &= ~RE_DOT_NEWLINE;
6418      syntax |= RE_HAT_LISTS_NOT_NEWLINE;
6419    }
6420  else
6421    syntax |= RE_NO_NEWLINE_ANCHOR;
6422
6423  preg->no_sub = !!(cflags & REG_NOSUB);
6424
6425  /* POSIX says a null character in the pattern terminates it, so we
6426     can use strlen here in compiling the pattern.  */
6427  ret = regex_compile ((re_char*) pattern, strlen (pattern), syntax, preg);
6428
6429  /* POSIX doesn't distinguish between an unmatched open-group and an
6430     unmatched close-group: both are REG_EPAREN.  */
6431  if (ret == REG_ERPAREN)
6432    ret = REG_EPAREN;
6433
6434  if (ret == REG_NOERROR && preg->fastmap)
6435    { /* Compute the fastmap now, since regexec cannot modify the pattern
6436	 buffer.  */
6437      re_compile_fastmap (preg);
6438      if (preg->can_be_null)
6439	{ /* The fastmap can't be used anyway.  */
6440	  free (preg->fastmap);
6441	  preg->fastmap = NULL;
6442	}
6443    }
6444  return (int) ret;
6445}
6446WEAK_ALIAS (__regcomp, regcomp)
6447
6448
6449/* regexec searches for a given pattern, specified by PREG, in the
6450   string STRING.
6451
6452   If NMATCH is zero or REG_NOSUB was set in the cflags argument to
6453   `regcomp', we ignore PMATCH.  Otherwise, we assume PMATCH has at
6454   least NMATCH elements, and we set them to the offsets of the
6455   corresponding matched substrings.
6456
6457   EFLAGS specifies `execution flags' which affect matching: if
6458   REG_NOTBOL is set, then ^ does not match at the beginning of the
6459   string; if REG_NOTEOL is set, then $ does not match at the end.
6460
6461   We return 0 if we find a match and REG_NOMATCH if not.  */
6462
6463int
6464regexec (preg, string, nmatch, pmatch, eflags)
6465    const regex_t *__restrict preg;
6466    const char *__restrict string;
6467    size_t nmatch;
6468    regmatch_t pmatch[__restrict_arr];
6469    int eflags;
6470{
6471  int ret;
6472  struct re_registers regs;
6473  regex_t private_preg;
6474  int len = strlen (string);
6475  boolean want_reg_info = !preg->no_sub && nmatch > 0 && pmatch;
6476
6477  private_preg = *preg;
6478
6479  private_preg.not_bol = !!(eflags & REG_NOTBOL);
6480  private_preg.not_eol = !!(eflags & REG_NOTEOL);
6481
6482  /* The user has told us exactly how many registers to return
6483     information about, via `nmatch'.  We have to pass that on to the
6484     matching routines.  */
6485  private_preg.regs_allocated = REGS_FIXED;
6486
6487  if (want_reg_info)
6488    {
6489      regs.num_regs = nmatch;
6490      regs.start = TALLOC (nmatch * 2, regoff_t);
6491      if (regs.start == NULL)
6492	return (int) REG_NOMATCH;
6493      regs.end = regs.start + nmatch;
6494    }
6495
6496  /* Instead of using not_eol to implement REG_NOTEOL, we could simply
6497     pass (&private_preg, string, len + 1, 0, len, ...) pretending the string
6498     was a little bit longer but still only matching the real part.
6499     This works because the `endline' will check for a '\n' and will find a
6500     '\0', correctly deciding that this is not the end of a line.
6501     But it doesn't work out so nicely for REG_NOTBOL, since we don't have
6502     a convenient '\0' there.  For all we know, the string could be preceded
6503     by '\n' which would throw things off.  */
6504
6505  /* Perform the searching operation.  */
6506  ret = re_search (&private_preg, string, len,
6507		   /* start: */ 0, /* range: */ len,
6508		   want_reg_info ? &regs : (struct re_registers *) 0);
6509
6510  /* Copy the register information to the POSIX structure.  */
6511  if (want_reg_info)
6512    {
6513      if (ret >= 0)
6514	{
6515	  unsigned r;
6516
6517	  for (r = 0; r < nmatch; r++)
6518	    {
6519	      pmatch[r].rm_so = regs.start[r];
6520	      pmatch[r].rm_eo = regs.end[r];
6521	    }
6522	}
6523
6524      /* If we needed the temporary register info, free the space now.  */
6525      free (regs.start);
6526    }
6527
6528  /* We want zero return to mean success, unlike `re_search'.  */
6529  return ret >= 0 ? (int) REG_NOERROR : (int) REG_NOMATCH;
6530}
6531WEAK_ALIAS (__regexec, regexec)
6532
6533
6534/* Returns a message corresponding to an error code, ERR_CODE, returned
6535   from either regcomp or regexec.   We don't use PREG here.
6536
6537   ERR_CODE was previously called ERRCODE, but that name causes an
6538   error with msvc8 compiler.  */
6539
6540size_t
6541regerror (err_code, preg, errbuf, errbuf_size)
6542    int err_code;
6543    const regex_t *preg;
6544    char *errbuf;
6545    size_t errbuf_size;
6546{
6547  const char *msg;
6548  size_t msg_size;
6549
6550  if (err_code < 0
6551      || err_code >= (sizeof (re_error_msgid) / sizeof (re_error_msgid[0])))
6552    /* Only error codes returned by the rest of the code should be passed
6553       to this routine.  If we are given anything else, or if other regex
6554       code generates an invalid error code, then the program has a bug.
6555       Dump core so we can fix it.  */
6556    abort ();
6557
6558  msg = gettext (re_error_msgid[err_code]);
6559
6560  msg_size = strlen (msg) + 1; /* Includes the null.  */
6561
6562  if (errbuf_size != 0)
6563    {
6564      if (msg_size > errbuf_size)
6565	{
6566	  strncpy (errbuf, msg, errbuf_size - 1);
6567	  errbuf[errbuf_size - 1] = 0;
6568	}
6569      else
6570	strcpy (errbuf, msg);
6571    }
6572
6573  return msg_size;
6574}
6575WEAK_ALIAS (__regerror, regerror)
6576
6577
6578/* Free dynamically allocated space used by PREG.  */
6579
6580void
6581regfree (preg)
6582    regex_t *preg;
6583{
6584  if (preg->buffer != NULL)
6585    free (preg->buffer);
6586  preg->buffer = NULL;
6587
6588  preg->allocated = 0;
6589  preg->used = 0;
6590
6591  if (preg->fastmap != NULL)
6592    free (preg->fastmap);
6593  preg->fastmap = NULL;
6594  preg->fastmap_accurate = 0;
6595
6596  if (preg->translate != NULL)
6597    free (preg->translate);
6598  preg->translate = NULL;
6599}
6600WEAK_ALIAS (__regfree, regfree)
6601
6602#endif /* not emacs  */
6603
6604/* arch-tag: 4ffd68ba-2a9e-435b-a21a-018990f9eeb2
6605   (do not change this comment) */
6606