1/* Extended regular expression matching and search library, version
2   0.12.  (Implements POSIX draft P10003.2/D11.2, except for
3   internationalization features.)
4
5   Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998 Free Software Foundation, Inc.
6
7   This program is free software; you can redistribute it and/or modify
8   it under the terms of the GNU General Public License as published by
9   the Free Software Foundation; either version 2, or (at your option)
10   any later version.
11
12   This program is distributed in the hope that it will be useful,
13   but WITHOUT ANY WARRANTY; without even the implied warranty of
14   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	 See the
15   GNU General Public License for more details.
16
17   You should have received a copy of the GNU General Public License
18   along with this program; if not, write to the Free Software
19   Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
20   USA.	 */
21
22/* AIX requires this to be the first thing in the file. */
23#if defined (_AIX) && !defined (REGEX_MALLOC)
24  #pragma alloca
25#endif
26
27#undef	_GNU_SOURCE
28#define _GNU_SOURCE
29
30#ifdef emacs
31/* Converts the pointer to the char to BEG-based offset from the start.	 */
32#define PTR_TO_OFFSET(d)						\
33	POS_AS_IN_BUFFER (MATCHING_IN_FIRST_STRING			\
34			  ? (d) - string1 : (d) - (string2 - size1))
35#define POS_AS_IN_BUFFER(p) ((p) + (NILP (re_match_object) || BUFFERP (re_match_object)))
36#else
37#define PTR_TO_OFFSET(d) 0
38#endif
39
40#ifdef HAVE_CONFIG_H
41#include <config.h>
42#endif
43
44/* We need this for `regex.h', and perhaps for the Emacs include files.	 */
45#include <sys/types.h>
46
47/* This is for other GNU distributions with internationalized messages.	 */
48#if HAVE_LIBINTL_H || defined (_LIBC)
49# include <libintl.h>
50#else
51# define gettext(msgid) (msgid)
52#endif
53
54#ifndef gettext_noop
55/* This define is so xgettext can find the internationalizable
56   strings.  */
57#define gettext_noop(String) String
58#endif
59
60/* The `emacs' switch turns on certain matching commands
61   that make sense only in Emacs. */
62#ifdef emacs
63
64#include "lisp.h"
65#include "buffer.h"
66
67/* Make syntax table lookup grant data in gl_state.  */
68#define SYNTAX_ENTRY_VIA_PROPERTY
69
70#include "syntax.h"
71#include "charset.h"
72#include "category.h"
73
74#define malloc xmalloc
75#define realloc xrealloc
76#define free xfree
77
78#else  /* not emacs */
79
80/* If we are not linking with Emacs proper,
81   we can't use the relocating allocator
82   even if config.h says that we can.  */
83#undef REL_ALLOC
84
85#if defined (STDC_HEADERS) || defined (_LIBC)
86#include <stdlib.h>
87#else
88char *malloc ();
89char *realloc ();
90#endif
91
92/* When used in Emacs's lib-src, we need to get bzero and bcopy somehow.
93   If nothing else has been done, use the method below.	 */
94#ifdef INHIBIT_STRING_HEADER
95#if !(defined (HAVE_BZERO) && defined (HAVE_BCOPY))
96#if !defined (bzero) && !defined (bcopy)
97#undef INHIBIT_STRING_HEADER
98#endif
99#endif
100#endif
101
102/* This is the normal way of making sure we have a bcopy and a bzero.
103   This is used in most programs--a few other programs avoid this
104   by defining INHIBIT_STRING_HEADER.  */
105#ifndef INHIBIT_STRING_HEADER
106#if defined (HAVE_STRING_H) || defined (STDC_HEADERS) || defined (_LIBC)
107#include <string.h>
108#ifndef bcmp
109#define bcmp(s1, s2, n)	memcmp ((s1), (s2), (n))
110#endif
111#ifndef bcopy
112#define bcopy(s, d, n)	memcpy ((d), (s), (n))
113#endif
114#ifndef bzero
115#define bzero(s, n)	memset ((s), 0, (n))
116#endif
117#else
118#include <strings.h>
119#endif
120#endif
121
122/* Define the syntax stuff for \<, \>, etc.  */
123
124/* This must be nonzero for the wordchar and notwordchar pattern
125   commands in re_match_2.  */
126#ifndef Sword
127#define Sword 1
128#endif
129
130#ifdef SWITCH_ENUM_BUG
131#define SWITCH_ENUM_CAST(x) ((int)(x))
132#else
133#define SWITCH_ENUM_CAST(x) (x)
134#endif
135
136#ifdef SYNTAX_TABLE
137
138extern char *re_syntax_table;
139
140#else /* not SYNTAX_TABLE */
141
142/* How many characters in the character set.  */
143#define CHAR_SET_SIZE 256
144
145static char re_syntax_table[CHAR_SET_SIZE];
146
147static void
148init_syntax_once ()
149{
150   register int c;
151   static int done = 0;
152
153   if (done)
154     return;
155
156   bzero (re_syntax_table, sizeof re_syntax_table);
157
158   for (c = 'a'; c <= 'z'; c++)
159     re_syntax_table[c] = Sword;
160
161   for (c = 'A'; c <= 'Z'; c++)
162     re_syntax_table[c] = Sword;
163
164   for (c = '0'; c <= '9'; c++)
165     re_syntax_table[c] = Sword;
166
167   re_syntax_table['_'] = Sword;
168
169   done = 1;
170}
171
172#endif /* not SYNTAX_TABLE */
173
174#define SYNTAX(c) re_syntax_table[c]
175
176/* Dummy macros for non-Emacs environments.  */
177#define BASE_LEADING_CODE_P(c) (0)
178#define WORD_BOUNDARY_P(c1, c2) (0)
179#define CHAR_HEAD_P(p) (1)
180#define SINGLE_BYTE_CHAR_P(c) (1)
181#define SAME_CHARSET_P(c1, c2) (1)
182#define MULTIBYTE_FORM_LENGTH(p, s) (1)
183#define STRING_CHAR(p, s) (*(p))
184#define STRING_CHAR_AND_LENGTH(p, s, actual_len) ((actual_len) = 1, *(p))
185#define GET_CHAR_AFTER_2(c, p, str1, end1, str2, end2) \
186  (c = ((p) == (end1) ? *(str2) : *(p)))
187#define GET_CHAR_BEFORE_2(c, p, str1, end1, str2, end2) \
188  (c = ((p) == (str2) ? *((end1) - 1) : *((p) - 1)))
189#endif /* not emacs */
190
191/* Get the interface, including the syntax bits.  */
192#include "regex.h"
193
194/* isalpha etc. are used for the character classes.  */
195#include <ctype.h>
196
197/* Jim Meyering writes:
198
199   "... Some ctype macros are valid only for character codes that
200   isascii says are ASCII (SGI's IRIX-4.0.5 is one such system --when
201   using /bin/cc or gcc but without giving an ansi option).  So, all
202   ctype uses should be through macros like ISPRINT...	If
203   STDC_HEADERS is defined, then autoconf has verified that the ctype
204   macros don't need to be guarded with references to isascii. ...
205   Defining isascii to 1 should let any compiler worth its salt
206   eliminate the && through constant folding."	*/
207
208#if defined (STDC_HEADERS) || (!defined (isascii) && !defined (HAVE_ISASCII))
209#define ISASCII(c) 1
210#else
211#define ISASCII(c) isascii(c)
212#endif
213
214#ifdef isblank
215#define ISBLANK(c) (ISASCII (c) && isblank (c))
216#else
217#define ISBLANK(c) ((c) == ' ' || (c) == '\t')
218#endif
219#ifdef isgraph
220#define ISGRAPH(c) (ISASCII (c) && isgraph (c))
221#else
222#define ISGRAPH(c) (ISASCII (c) && isprint (c) && !isspace (c))
223#endif
224
225#define ISPRINT(c) (ISASCII (c) && isprint (c))
226#define ISDIGIT(c) (ISASCII (c) && isdigit (c))
227#define ISALNUM(c) (ISASCII (c) && isalnum (c))
228#define ISALPHA(c) (ISASCII (c) && isalpha (c))
229#define ISCNTRL(c) (ISASCII (c) && iscntrl (c))
230#define ISLOWER(c) (ISASCII (c) && islower (c))
231#define ISPUNCT(c) (ISASCII (c) && ispunct (c))
232#define ISSPACE(c) (ISASCII (c) && isspace (c))
233#define ISUPPER(c) (ISASCII (c) && isupper (c))
234#define ISXDIGIT(c) (ISASCII (c) && isxdigit (c))
235
236#ifndef NULL
237#define NULL (void *)0
238#endif
239
240/* We remove any previous definition of `SIGN_EXTEND_CHAR',
241   since ours (we hope) works properly with all combinations of
242   machines, compilers, `char' and `unsigned char' argument types.
243   (Per Bothner suggested the basic approach.)	*/
244#undef SIGN_EXTEND_CHAR
245#if __STDC__
246#define SIGN_EXTEND_CHAR(c) ((signed char) (c))
247#else  /* not __STDC__ */
248/* As in Harbison and Steele.  */
249#define SIGN_EXTEND_CHAR(c) ((((unsigned char) (c)) ^ 128) - 128)
250#endif
251
252/* Should we use malloc or alloca?  If REGEX_MALLOC is not defined, we
253   use `alloca' instead of `malloc'.  This is because using malloc in
254   re_search* or re_match* could cause memory leaks when C-g is used in
255   Emacs; also, malloc is slower and causes storage fragmentation.  On
256   the other hand, malloc is more portable, and easier to debug.
257
258   Because we sometimes use alloca, some routines have to be macros,
259   not functions -- `alloca'-allocated space disappears at the end of the
260   function it is called in.  */
261
262#ifdef REGEX_MALLOC
263
264#define REGEX_ALLOCATE malloc
265#define REGEX_REALLOCATE(source, osize, nsize) realloc (source, nsize)
266#define REGEX_FREE free
267
268#else /* not REGEX_MALLOC  */
269
270/* Emacs already defines alloca, sometimes.  */
271#ifndef alloca
272
273/* Make alloca work the best possible way.  */
274#ifdef __GNUC__
275#define alloca __builtin_alloca
276#else /* not __GNUC__ */
277#if HAVE_ALLOCA_H
278#include <alloca.h>
279#else /* not __GNUC__ or HAVE_ALLOCA_H */
280#if 0 /* It is a bad idea to declare alloca.  We always cast the result.  */
281#ifndef _AIX /* Already did AIX, up at the top.	 */
282char *alloca ();
283#endif /* not _AIX */
284#endif
285#endif /* not HAVE_ALLOCA_H */
286#endif /* not __GNUC__ */
287
288#endif /* not alloca */
289
290#define REGEX_ALLOCATE alloca
291
292/* Assumes a `char *destination' variable.  */
293#define REGEX_REALLOCATE(source, osize, nsize)				\
294  (destination = (char *) alloca (nsize),				\
295   bcopy (source, destination, osize),					\
296   destination)
297
298/* No need to do anything to free, after alloca.  */
299#define REGEX_FREE(arg) ((void)0) /* Do nothing!  But inhibit gcc warning.  */
300
301#endif /* not REGEX_MALLOC */
302
303/* Define how to allocate the failure stack.  */
304
305#if defined (REL_ALLOC) && defined (REGEX_MALLOC)
306
307#define REGEX_ALLOCATE_STACK(size)				\
308  r_alloc (&failure_stack_ptr, (size))
309#define REGEX_REALLOCATE_STACK(source, osize, nsize)		\
310  r_re_alloc (&failure_stack_ptr, (nsize))
311#define REGEX_FREE_STACK(ptr)					\
312  r_alloc_free (&failure_stack_ptr)
313
314#else /* not using relocating allocator */
315
316#ifdef REGEX_MALLOC
317
318#define REGEX_ALLOCATE_STACK malloc
319#define REGEX_REALLOCATE_STACK(source, osize, nsize) realloc (source, nsize)
320#define REGEX_FREE_STACK free
321
322#else /* not REGEX_MALLOC */
323
324#define REGEX_ALLOCATE_STACK alloca
325
326#define REGEX_REALLOCATE_STACK(source, osize, nsize)			\
327   REGEX_REALLOCATE (source, osize, nsize)
328/* No need to explicitly free anything.	 */
329#define REGEX_FREE_STACK(arg)
330
331#endif /* not REGEX_MALLOC */
332#endif /* not using relocating allocator */
333
334
335/* True if `size1' is non-NULL and PTR is pointing anywhere inside
336   `string1' or just past its end.  This works if PTR is NULL, which is
337   a good thing.  */
338#define FIRST_STRING_P(ptr)					\
339  (size1 && string1 <= (ptr) && (ptr) <= string1 + size1)
340
341/* (Re)Allocate N items of type T using malloc, or fail.  */
342#define TALLOC(n, t) ((t *) malloc ((n) * sizeof (t)))
343#define RETALLOC(addr, n, t) ((addr) = (t *) realloc (addr, (n) * sizeof (t)))
344#define RETALLOC_IF(addr, n, t) \
345  if (addr) RETALLOC((addr), (n), t); else (addr) = TALLOC ((n), t)
346#define REGEX_TALLOC(n, t) ((t *) REGEX_ALLOCATE ((n) * sizeof (t)))
347
348#define BYTEWIDTH 8 /* In bits.	 */
349
350#define STREQ(s1, s2) ((strcmp (s1, s2) == 0))
351
352#undef MAX
353#undef MIN
354#define MAX(a, b) ((a) > (b) ? (a) : (b))
355#define MIN(a, b) ((a) < (b) ? (a) : (b))
356
357typedef char boolean;
358#define false 0
359#define true 1
360
361static int re_match_2_internal ();
362
363/* These are the command codes that appear in compiled regular
364   expressions.	 Some opcodes are followed by argument bytes.  A
365   command code can specify any interpretation whatsoever for its
366   arguments.  Zero bytes may appear in the compiled regular expression.  */
367
368typedef enum
369{
370  no_op = 0,
371
372  /* Succeed right away--no more backtracking.	*/
373  succeed,
374
375	/* Followed by one byte giving n, then by n literal bytes.  */
376  exactn,
377
378	/* Matches any (more or less) character.  */
379  anychar,
380
381	/* Matches any one char belonging to specified set.  First
382	   following byte is number of bitmap bytes.  Then come bytes
383	   for a bitmap saying which chars are in.  Bits in each byte
384	   are ordered low-bit-first.  A character is in the set if its
385	   bit is 1.  A character too large to have a bit in the map is
386	   automatically not in the set.  */
387  charset,
388
389	/* Same parameters as charset, but match any character that is
390	   not one of those specified.	*/
391  charset_not,
392
393	/* Start remembering the text that is matched, for storing in a
394	   register.  Followed by one byte with the register number, in
395	   the range 0 to one less than the pattern buffer's re_nsub
396	   field.  Then followed by one byte with the number of groups
397	   inner to this one.  (This last has to be part of the
398	   start_memory only because we need it in the on_failure_jump
399	   of re_match_2.)  */
400  start_memory,
401
402	/* Stop remembering the text that is matched and store it in a
403	   memory register.  Followed by one byte with the register
404	   number, in the range 0 to one less than `re_nsub' in the
405	   pattern buffer, and one byte with the number of inner groups,
406	   just like `start_memory'.  (We need the number of inner
407	   groups here because we don't have any easy way of finding the
408	   corresponding start_memory when we're at a stop_memory.)  */
409  stop_memory,
410
411	/* Match a duplicate of something remembered. Followed by one
412	   byte containing the register number.	 */
413  duplicate,
414
415	/* Fail unless at beginning of line.  */
416  begline,
417
418	/* Fail unless at end of line.	*/
419  endline,
420
421	/* Succeeds if at beginning of buffer (if emacs) or at beginning
422	   of string to be matched (if not).  */
423  begbuf,
424
425	/* Analogously, for end of buffer/string.  */
426  endbuf,
427
428	/* Followed by two byte relative address to which to jump.  */
429  jump,
430
431	/* Same as jump, but marks the end of an alternative.  */
432  jump_past_alt,
433
434	/* Followed by two-byte relative address of place to resume at
435	   in case of failure.	*/
436  on_failure_jump,
437
438	/* Like on_failure_jump, but pushes a placeholder instead of the
439	   current string position when executed.  */
440  on_failure_keep_string_jump,
441
442	/* Throw away latest failure point and then jump to following
443	   two-byte relative address.  */
444  pop_failure_jump,
445
446	/* Change to pop_failure_jump if know won't have to backtrack to
447	   match; otherwise change to jump.  This is used to jump
448	   back to the beginning of a repeat.  If what follows this jump
449	   clearly won't match what the repeat does, such that we can be
450	   sure that there is no use backtracking out of repetitions
451	   already matched, then we change it to a pop_failure_jump.
452	   Followed by two-byte address.  */
453  maybe_pop_jump,
454
455	/* Jump to following two-byte address, and push a dummy failure
456	   point. This failure point will be thrown away if an attempt
457	   is made to use it for a failure.  A `+' construct makes this
458	   before the first repeat.  Also used as an intermediary kind
459	   of jump when compiling an alternative.  */
460  dummy_failure_jump,
461
462	/* Push a dummy failure point and continue.  Used at the end of
463	   alternatives.  */
464  push_dummy_failure,
465
466	/* Followed by two-byte relative address and two-byte number n.
467	   After matching N times, jump to the address upon failure.  */
468  succeed_n,
469
470	/* Followed by two-byte relative address, and two-byte number n.
471	   Jump to the address N times, then fail.  */
472  jump_n,
473
474	/* Set the following two-byte relative address to the
475	   subsequent two-byte number.	The address *includes* the two
476	   bytes of number.  */
477  set_number_at,
478
479  wordchar,	/* Matches any word-constituent character.  */
480  notwordchar,	/* Matches any char that is not a word-constituent.  */
481
482  wordbeg,	/* Succeeds if at word beginning.  */
483  wordend,	/* Succeeds if at word end.  */
484
485  wordbound,	/* Succeeds if at a word boundary.  */
486  notwordbound	/* Succeeds if not at a word boundary.	*/
487
488#ifdef emacs
489  ,before_dot,	/* Succeeds if before point.  */
490  at_dot,	/* Succeeds if at point.  */
491  after_dot,	/* Succeeds if after point.  */
492
493	/* Matches any character whose syntax is specified.  Followed by
494	   a byte which contains a syntax code, e.g., Sword.  */
495  syntaxspec,
496
497	/* Matches any character whose syntax is not that specified.  */
498  notsyntaxspec,
499
500  /* Matches any character whose category-set contains the specified
501     category.	The operator is followed by a byte which contains a
502     category code (mnemonic ASCII character).	*/
503  categoryspec,
504
505  /* Matches any character whose category-set does not contain the
506     specified category.  The operator is followed by a byte which
507     contains the category code (mnemonic ASCII character).  */
508  notcategoryspec
509#endif /* emacs */
510} re_opcode_t;
511
512/* Common operations on the compiled pattern.  */
513
514/* Store NUMBER in two contiguous bytes starting at DESTINATION.  */
515
516#define STORE_NUMBER(destination, number)				\
517  do {									\
518    (destination)[0] = (number) & 0377;					\
519    (destination)[1] = (number) >> 8;					\
520  } while (0)
521
522/* Same as STORE_NUMBER, except increment DESTINATION to
523   the byte after where the number is stored.  Therefore, DESTINATION
524   must be an lvalue.  */
525
526#define STORE_NUMBER_AND_INCR(destination, number)			\
527  do {									\
528    STORE_NUMBER (destination, number);					\
529    (destination) += 2;							\
530  } while (0)
531
532/* Put into DESTINATION a number stored in two contiguous bytes starting
533   at SOURCE.  */
534
535#define EXTRACT_NUMBER(destination, source)				\
536  do {									\
537    (destination) = *(source) & 0377;					\
538    (destination) += SIGN_EXTEND_CHAR (*((source) + 1)) << 8;		\
539  } while (0)
540
541#ifdef DEBUG
542static void
543extract_number (dest, source)
544    int *dest;
545    unsigned char *source;
546{
547  int temp = SIGN_EXTEND_CHAR (*(source + 1));
548  *dest = *source & 0377;
549  *dest += temp << 8;
550}
551
552#ifndef EXTRACT_MACROS /* To debug the macros.	*/
553#undef EXTRACT_NUMBER
554#define EXTRACT_NUMBER(dest, src) extract_number (&dest, src)
555#endif /* not EXTRACT_MACROS */
556
557#endif /* DEBUG */
558
559/* Same as EXTRACT_NUMBER, except increment SOURCE to after the number.
560   SOURCE must be an lvalue.  */
561
562#define EXTRACT_NUMBER_AND_INCR(destination, source)			\
563  do {									\
564    EXTRACT_NUMBER (destination, source);				\
565    (source) += 2;							\
566  } while (0)
567
568#ifdef DEBUG
569static void
570extract_number_and_incr (destination, source)
571    int *destination;
572    unsigned char **source;
573{
574  extract_number (destination, *source);
575  *source += 2;
576}
577
578#ifndef EXTRACT_MACROS
579#undef EXTRACT_NUMBER_AND_INCR
580#define EXTRACT_NUMBER_AND_INCR(dest, src) \
581  extract_number_and_incr (&dest, &src)
582#endif /* not EXTRACT_MACROS */
583
584#endif /* DEBUG */
585
586/* Store a multibyte character in three contiguous bytes starting
587   DESTINATION, and increment DESTINATION to the byte after where the
588   character is stored.	 Therefore, DESTINATION must be an lvalue.  */
589
590#define STORE_CHARACTER_AND_INCR(destination, character)	\
591  do {								\
592    (destination)[0] = (character) & 0377;			\
593    (destination)[1] = ((character) >> 8) & 0377;		\
594    (destination)[2] = (character) >> 16;			\
595    (destination) += 3;						\
596  } while (0)
597
598/* Put into DESTINATION a character stored in three contiguous bytes
599   starting at SOURCE.	*/
600
601#define EXTRACT_CHARACTER(destination, source)	\
602  do {						\
603    (destination) = ((source)[0]		\
604		     | ((source)[1] << 8)	\
605		     | ((source)[2] << 16));	\
606  } while (0)
607
608
609/* Macros for charset. */
610
611/* Size of bitmap of charset P in bytes.  P is a start of charset,
612   i.e. *P is (re_opcode_t) charset or (re_opcode_t) charset_not.  */
613#define CHARSET_BITMAP_SIZE(p) ((p)[1] & 0x7F)
614
615/* Nonzero if charset P has range table.  */
616#define CHARSET_RANGE_TABLE_EXISTS_P(p)	 ((p)[1] & 0x80)
617
618/* Return the address of range table of charset P.  But not the start
619   of table itself, but the before where the number of ranges is
620   stored.  `2 +' means to skip re_opcode_t and size of bitmap.	 */
621#define CHARSET_RANGE_TABLE(p) (&(p)[2 + CHARSET_BITMAP_SIZE (p)])
622
623/* Test if C is listed in the bitmap of charset P.  */
624#define CHARSET_LOOKUP_BITMAP(p, c)				\
625  ((c) < CHARSET_BITMAP_SIZE (p) * BYTEWIDTH			\
626   && (p)[2 + (c) / BYTEWIDTH] & (1 << ((c) % BYTEWIDTH)))
627
628/* Return the address of end of RANGE_TABLE.  COUNT is number of
629   ranges (which is a pair of (start, end)) in the RANGE_TABLE.	 `* 2'
630   is start of range and end of range.	`* 3' is size of each start
631   and end.  */
632#define CHARSET_RANGE_TABLE_END(range_table, count)	\
633  ((range_table) + (count) * 2 * 3)
634
635/* Test if C is in RANGE_TABLE.	 A flag NOT is negated if C is in.
636   COUNT is number of ranges in RANGE_TABLE.  */
637#define CHARSET_LOOKUP_RANGE_TABLE_RAW(not, c, range_table, count)	\
638  do									\
639    {									\
640      int range_start, range_end;					\
641      unsigned char *p;							\
642      unsigned char *range_table_end					\
643	= CHARSET_RANGE_TABLE_END ((range_table), (count));		\
644									\
645      for (p = (range_table); p < range_table_end; p += 2 * 3)		\
646	{								\
647	  EXTRACT_CHARACTER (range_start, p);				\
648	  EXTRACT_CHARACTER (range_end, p + 3);				\
649									\
650	  if (range_start <= (c) && (c) <= range_end)			\
651	    {								\
652	      (not) = !(not);						\
653	      break;							\
654	    }								\
655	}								\
656    }									\
657  while (0)
658
659/* Test if C is in range table of CHARSET.  The flag NOT is negated if
660   C is listed in it.  */
661#define CHARSET_LOOKUP_RANGE_TABLE(not, c, charset)			\
662  do									\
663    {									\
664      /* Number of ranges in range table. */				\
665      int count;							\
666      unsigned char *range_table = CHARSET_RANGE_TABLE (charset);	\
667									\
668      EXTRACT_NUMBER_AND_INCR (count, range_table);			\
669      CHARSET_LOOKUP_RANGE_TABLE_RAW ((not), (c), range_table, count);	\
670    }									\
671  while (0)
672
673/* If DEBUG is defined, Regex prints many voluminous messages about what
674   it is doing (if the variable `debug' is nonzero).  If linked with the
675   main program in `iregex.c', you can enter patterns and strings
676   interactively.  And if linked with the main program in `main.c' and
677   the other test files, you can run the already-written tests.	 */
678
679#ifdef DEBUG
680
681/* We use standard I/O for debugging.  */
682#include <stdio.h>
683
684/* It is useful to test things that ``must'' be true when debugging.  */
685#include <assert.h>
686
687static int debug = 0;
688
689#define DEBUG_STATEMENT(e) e
690#define DEBUG_PRINT1(x) if (debug) printf (x)
691#define DEBUG_PRINT2(x1, x2) if (debug) printf (x1, x2)
692#define DEBUG_PRINT3(x1, x2, x3) if (debug) printf (x1, x2, x3)
693#define DEBUG_PRINT4(x1, x2, x3, x4) if (debug) printf (x1, x2, x3, x4)
694#define DEBUG_PRINT_COMPILED_PATTERN(p, s, e)				\
695  if (debug) print_partial_compiled_pattern (s, e)
696#define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)			\
697  if (debug) print_double_string (w, s1, sz1, s2, sz2)
698
699
700/* Print the fastmap in human-readable form.  */
701
702void
703print_fastmap (fastmap)
704    char *fastmap;
705{
706  unsigned was_a_range = 0;
707  unsigned i = 0;
708
709  while (i < (1 << BYTEWIDTH))
710    {
711      if (fastmap[i++])
712	{
713	  was_a_range = 0;
714	  putchar (i - 1);
715	  while (i < (1 << BYTEWIDTH)  &&  fastmap[i])
716	    {
717	      was_a_range = 1;
718	      i++;
719	    }
720	  if (was_a_range)
721	    {
722	      printf ("-");
723	      putchar (i - 1);
724	    }
725	}
726    }
727  putchar ('\n');
728}
729
730
731/* Print a compiled pattern string in human-readable form, starting at
732   the START pointer into it and ending just before the pointer END.  */
733
734void
735print_partial_compiled_pattern (start, end)
736    unsigned char *start;
737    unsigned char *end;
738{
739  int mcnt, mcnt2;
740  unsigned char *p = start;
741  unsigned char *pend = end;
742
743  if (start == NULL)
744    {
745      printf ("(null)\n");
746      return;
747    }
748
749  /* Loop over pattern commands.  */
750  while (p < pend)
751    {
752      printf ("%d:\t", p - start);
753
754      switch ((re_opcode_t) *p++)
755	{
756	case no_op:
757	  printf ("/no_op");
758	  break;
759
760	case exactn:
761	  mcnt = *p++;
762	  printf ("/exactn/%d", mcnt);
763	  do
764	    {
765	      putchar ('/');
766	      putchar (*p++);
767	    }
768	  while (--mcnt);
769	  break;
770
771	case start_memory:
772	  mcnt = *p++;
773	  printf ("/start_memory/%d/%d", mcnt, *p++);
774	  break;
775
776	case stop_memory:
777	  mcnt = *p++;
778	  printf ("/stop_memory/%d/%d", mcnt, *p++);
779	  break;
780
781	case duplicate:
782	  printf ("/duplicate/%d", *p++);
783	  break;
784
785	case anychar:
786	  printf ("/anychar");
787	  break;
788
789	case charset:
790	case charset_not:
791	  {
792	    register int c, last = -100;
793	    register int in_range = 0;
794
795	    printf ("/charset [%s",
796		    (re_opcode_t) *(p - 1) == charset_not ? "^" : "");
797
798	    assert (p + *p < pend);
799
800	    for (c = 0; c < 256; c++)
801	      if (c / 8 < *p
802		  && (p[1 + (c/8)] & (1 << (c % 8))))
803		{
804		  /* Are we starting a range?  */
805		  if (last + 1 == c && ! in_range)
806		    {
807		      putchar ('-');
808		      in_range = 1;
809		    }
810		  /* Have we broken a range?  */
811		  else if (last + 1 != c && in_range)
812	      {
813		      putchar (last);
814		      in_range = 0;
815		    }
816
817		  if (! in_range)
818		    putchar (c);
819
820		  last = c;
821	      }
822
823	    if (in_range)
824	      putchar (last);
825
826	    putchar (']');
827
828	    p += 1 + *p;
829	  }
830	  break;
831
832	case begline:
833	  printf ("/begline");
834	  break;
835
836	case endline:
837	  printf ("/endline");
838	  break;
839
840	case on_failure_jump:
841	  extract_number_and_incr (&mcnt, &p);
842	  printf ("/on_failure_jump to %d", p + mcnt - start);
843	  break;
844
845	case on_failure_keep_string_jump:
846	  extract_number_and_incr (&mcnt, &p);
847	  printf ("/on_failure_keep_string_jump to %d", p + mcnt - start);
848	  break;
849
850	case dummy_failure_jump:
851	  extract_number_and_incr (&mcnt, &p);
852	  printf ("/dummy_failure_jump to %d", p + mcnt - start);
853	  break;
854
855	case push_dummy_failure:
856	  printf ("/push_dummy_failure");
857	  break;
858
859	case maybe_pop_jump:
860	  extract_number_and_incr (&mcnt, &p);
861	  printf ("/maybe_pop_jump to %d", p + mcnt - start);
862	  break;
863
864	case pop_failure_jump:
865	  extract_number_and_incr (&mcnt, &p);
866	  printf ("/pop_failure_jump to %d", p + mcnt - start);
867	  break;
868
869	case jump_past_alt:
870	  extract_number_and_incr (&mcnt, &p);
871	  printf ("/jump_past_alt to %d", p + mcnt - start);
872	  break;
873
874	case jump:
875	  extract_number_and_incr (&mcnt, &p);
876	  printf ("/jump to %d", p + mcnt - start);
877	  break;
878
879	case succeed_n:
880	  extract_number_and_incr (&mcnt, &p);
881	  extract_number_and_incr (&mcnt2, &p);
882	  printf ("/succeed_n to %d, %d times", p + mcnt - start, mcnt2);
883	  break;
884
885	case jump_n:
886	  extract_number_and_incr (&mcnt, &p);
887	  extract_number_and_incr (&mcnt2, &p);
888	  printf ("/jump_n to %d, %d times", p + mcnt - start, mcnt2);
889	  break;
890
891	case set_number_at:
892	  extract_number_and_incr (&mcnt, &p);
893	  extract_number_and_incr (&mcnt2, &p);
894	  printf ("/set_number_at location %d to %d", p + mcnt - start, mcnt2);
895	  break;
896
897	case wordbound:
898	  printf ("/wordbound");
899	  break;
900
901	case notwordbound:
902	  printf ("/notwordbound");
903	  break;
904
905	case wordbeg:
906	  printf ("/wordbeg");
907	  break;
908
909	case wordend:
910	  printf ("/wordend");
911
912#ifdef emacs
913	case before_dot:
914	  printf ("/before_dot");
915	  break;
916
917	case at_dot:
918	  printf ("/at_dot");
919	  break;
920
921	case after_dot:
922	  printf ("/after_dot");
923	  break;
924
925	case syntaxspec:
926	  printf ("/syntaxspec");
927	  mcnt = *p++;
928	  printf ("/%d", mcnt);
929	  break;
930
931	case notsyntaxspec:
932	  printf ("/notsyntaxspec");
933	  mcnt = *p++;
934	  printf ("/%d", mcnt);
935	  break;
936#endif /* emacs */
937
938	case wordchar:
939	  printf ("/wordchar");
940	  break;
941
942	case notwordchar:
943	  printf ("/notwordchar");
944	  break;
945
946	case begbuf:
947	  printf ("/begbuf");
948	  break;
949
950	case endbuf:
951	  printf ("/endbuf");
952	  break;
953
954	default:
955	  printf ("?%d", *(p-1));
956	}
957
958      putchar ('\n');
959    }
960
961  printf ("%d:\tend of pattern.\n", p - start);
962}
963
964
965void
966print_compiled_pattern (bufp)
967    struct re_pattern_buffer *bufp;
968{
969  unsigned char *buffer = bufp->buffer;
970
971  print_partial_compiled_pattern (buffer, buffer + bufp->used);
972  printf ("%d bytes used/%d bytes allocated.\n", bufp->used, bufp->allocated);
973
974  if (bufp->fastmap_accurate && bufp->fastmap)
975    {
976      printf ("fastmap: ");
977      print_fastmap (bufp->fastmap);
978    }
979
980  printf ("re_nsub: %d\t", bufp->re_nsub);
981  printf ("regs_alloc: %d\t", bufp->regs_allocated);
982  printf ("can_be_null: %d\t", bufp->can_be_null);
983  printf ("newline_anchor: %d\n", bufp->newline_anchor);
984  printf ("no_sub: %d\t", bufp->no_sub);
985  printf ("not_bol: %d\t", bufp->not_bol);
986  printf ("not_eol: %d\t", bufp->not_eol);
987  printf ("syntax: %d\n", bufp->syntax);
988  /* Perhaps we should print the translate table?  */
989}
990
991
992void
993print_double_string (where, string1, size1, string2, size2)
994    const char *where;
995    const char *string1;
996    const char *string2;
997    int size1;
998    int size2;
999{
1000  unsigned this_char;
1001
1002  if (where == NULL)
1003    printf ("(null)");
1004  else
1005    {
1006      if (FIRST_STRING_P (where))
1007	{
1008	  for (this_char = where - string1; this_char < size1; this_char++)
1009	    putchar (string1[this_char]);
1010
1011	  where = string2;
1012	}
1013
1014      for (this_char = where - string2; this_char < size2; this_char++)
1015	putchar (string2[this_char]);
1016    }
1017}
1018
1019#else /* not DEBUG */
1020
1021#undef assert
1022#define assert(e)
1023
1024#define DEBUG_STATEMENT(e)
1025#define DEBUG_PRINT1(x)
1026#define DEBUG_PRINT2(x1, x2)
1027#define DEBUG_PRINT3(x1, x2, x3)
1028#define DEBUG_PRINT4(x1, x2, x3, x4)
1029#define DEBUG_PRINT_COMPILED_PATTERN(p, s, e)
1030#define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)
1031
1032#endif /* not DEBUG */
1033
1034/* Set by `re_set_syntax' to the current regexp syntax to recognize.  Can
1035   also be assigned to arbitrarily: each pattern buffer stores its own
1036   syntax, so it can be changed between regex compilations.  */
1037/* This has no initializer because initialized variables in Emacs
1038   become read-only after dumping.  */
1039reg_syntax_t re_syntax_options;
1040
1041
1042/* Specify the precise syntax of regexps for compilation.  This provides
1043   for compatibility for various utilities which historically have
1044   different, incompatible syntaxes.
1045
1046   The argument SYNTAX is a bit mask comprised of the various bits
1047   defined in regex.h.	We return the old syntax.  */
1048
1049reg_syntax_t
1050re_set_syntax (syntax)
1051    reg_syntax_t syntax;
1052{
1053  reg_syntax_t ret = re_syntax_options;
1054
1055  re_syntax_options = syntax;
1056  return ret;
1057}
1058
1059/* This table gives an error message for each of the error codes listed
1060   in regex.h.	Obviously the order here has to be same as there.
1061   POSIX doesn't require that we do anything for REG_NOERROR,
1062   but why not be nice?	 */
1063
1064static const char *re_error_msgid[] =
1065  {
1066    gettext_noop ("Success"),	/* REG_NOERROR */
1067    gettext_noop ("No match"),	/* REG_NOMATCH */
1068    gettext_noop ("Invalid regular expression"), /* REG_BADPAT */
1069    gettext_noop ("Invalid collation character"), /* REG_ECOLLATE */
1070    gettext_noop ("Invalid character class name"), /* REG_ECTYPE */
1071    gettext_noop ("Trailing backslash"), /* REG_EESCAPE */
1072    gettext_noop ("Invalid back reference"), /* REG_ESUBREG */
1073    gettext_noop ("Unmatched [ or [^"),	/* REG_EBRACK */
1074    gettext_noop ("Unmatched ( or \\("), /* REG_EPAREN */
1075    gettext_noop ("Unmatched \\{"), /* REG_EBRACE */
1076    gettext_noop ("Invalid content of \\{\\}"), /* REG_BADBR */
1077    gettext_noop ("Invalid range end"),	/* REG_ERANGE */
1078    gettext_noop ("Memory exhausted"), /* REG_ESPACE */
1079    gettext_noop ("Invalid preceding regular expression"), /* REG_BADRPT */
1080    gettext_noop ("Premature end of regular expression"), /* REG_EEND */
1081    gettext_noop ("Regular expression too big"), /* REG_ESIZE */
1082    gettext_noop ("Unmatched ) or \\)"), /* REG_ERPAREN */
1083  };
1084
1085/* Avoiding alloca during matching, to placate r_alloc.	 */
1086
1087/* Define MATCH_MAY_ALLOCATE unless we need to make sure that the
1088   searching and matching functions should not call alloca.  On some
1089   systems, alloca is implemented in terms of malloc, and if we're
1090   using the relocating allocator routines, then malloc could cause a
1091   relocation, which might (if the strings being searched are in the
1092   ralloc heap) shift the data out from underneath the regexp
1093   routines.
1094
1095   Here's another reason to avoid allocation: Emacs
1096   processes input from X in a signal handler; processing X input may
1097   call malloc; if input arrives while a matching routine is calling
1098   malloc, then we're scrod.  But Emacs can't just block input while
1099   calling matching routines; then we don't notice interrupts when
1100   they come in.  So, Emacs blocks input around all regexp calls
1101   except the matching calls, which it leaves unprotected, in the
1102   faith that they will not malloc.  */
1103
1104/* Normally, this is fine.  */
1105#define MATCH_MAY_ALLOCATE
1106
1107/* When using GNU C, we are not REALLY using the C alloca, no matter
1108   what config.h may say.  So don't take precautions for it.  */
1109#ifdef __GNUC__
1110#undef C_ALLOCA
1111#endif
1112
1113/* The match routines may not allocate if (1) they would do it with malloc
1114   and (2) it's not safe for them to use malloc.
1115   Note that if REL_ALLOC is defined, matching would not use malloc for the
1116   failure stack, but we would still use it for the register vectors;
1117   so REL_ALLOC should not affect this.	 */
1118#if (defined (C_ALLOCA) || defined (REGEX_MALLOC)) && defined (emacs)
1119#undef MATCH_MAY_ALLOCATE
1120#endif
1121
1122
1123/* Failure stack declarations and macros; both re_compile_fastmap and
1124   re_match_2 use a failure stack.  These have to be macros because of
1125   REGEX_ALLOCATE_STACK.  */
1126
1127
1128/* Approximate number of failure points for which to initially allocate space
1129   when matching.  If this number is exceeded, we allocate more
1130   space, so it is not a hard limit.  */
1131#ifndef INIT_FAILURE_ALLOC
1132#define INIT_FAILURE_ALLOC 20
1133#endif
1134
1135/* Roughly the maximum number of failure points on the stack.  Would be
1136   exactly that if always used TYPICAL_FAILURE_SIZE items each time we failed.
1137   This is a variable only so users of regex can assign to it; we never
1138   change it ourselves.	 */
1139#if defined (MATCH_MAY_ALLOCATE)
1140/* Note that 4400 is enough to cause a crash on Alpha OSF/1,
1141   whose default stack limit is 2mb.  In order for a larger
1142   value to work reliably, you have to try to make it accord
1143   with the process stack limit.  */
1144int re_max_failures = 40000;
1145#else
1146int re_max_failures = 4000;
1147#endif
1148
1149union fail_stack_elt
1150{
1151  unsigned char *pointer;
1152  int integer;
1153};
1154
1155typedef union fail_stack_elt fail_stack_elt_t;
1156
1157typedef struct
1158{
1159  fail_stack_elt_t *stack;
1160  unsigned size;
1161  unsigned avail;			/* Offset of next open position.  */
1162} fail_stack_type;
1163
1164#define FAIL_STACK_EMPTY()     (fail_stack.avail == 0)
1165#define FAIL_STACK_PTR_EMPTY() (fail_stack_ptr->avail == 0)
1166#define FAIL_STACK_FULL()      (fail_stack.avail == fail_stack.size)
1167
1168
1169/* Define macros to initialize and free the failure stack.
1170   Do `return -2' if the alloc fails.  */
1171
1172#ifdef MATCH_MAY_ALLOCATE
1173#define INIT_FAIL_STACK()						\
1174  do {									\
1175    fail_stack.stack = (fail_stack_elt_t *)				\
1176      REGEX_ALLOCATE_STACK (INIT_FAILURE_ALLOC * TYPICAL_FAILURE_SIZE	\
1177			    * sizeof (fail_stack_elt_t));		\
1178									\
1179    if (fail_stack.stack == NULL)					\
1180      return -2;							\
1181									\
1182    fail_stack.size = INIT_FAILURE_ALLOC;				\
1183    fail_stack.avail = 0;						\
1184  } while (0)
1185
1186#define RESET_FAIL_STACK()  REGEX_FREE_STACK (fail_stack.stack)
1187#else
1188#define INIT_FAIL_STACK()						\
1189  do {									\
1190    fail_stack.avail = 0;						\
1191  } while (0)
1192
1193#define RESET_FAIL_STACK()
1194#endif
1195
1196
1197/* Double the size of FAIL_STACK, up to a limit
1198   which allows approximately `re_max_failures' items.
1199
1200   Return 1 if succeeds, and 0 if either ran out of memory
1201   allocating space for it or it was already too large.
1202
1203   REGEX_REALLOCATE_STACK requires `destination' be declared.	*/
1204
1205/* Factor to increase the failure stack size by
1206   when we increase it.
1207   This used to be 2, but 2 was too wasteful
1208   because the old discarded stacks added up to as much space
1209   were as ultimate, maximum-size stack.  */
1210#define FAIL_STACK_GROWTH_FACTOR 4
1211
1212#define GROW_FAIL_STACK(fail_stack)					\
1213  (((fail_stack).size * sizeof (fail_stack_elt_t)			\
1214    >= re_max_failures * TYPICAL_FAILURE_SIZE)				\
1215   ? 0									\
1216   : ((fail_stack).stack						\
1217      = (fail_stack_elt_t *)						\
1218	REGEX_REALLOCATE_STACK ((fail_stack).stack,			\
1219	  (fail_stack).size * sizeof (fail_stack_elt_t),		\
1220	  MIN (re_max_failures * TYPICAL_FAILURE_SIZE,			\
1221	       ((fail_stack).size * sizeof (fail_stack_elt_t)		\
1222		* FAIL_STACK_GROWTH_FACTOR))),				\
1223									\
1224      (fail_stack).stack == NULL					\
1225      ? 0								\
1226      : ((fail_stack).size						\
1227	 = (MIN (re_max_failures * TYPICAL_FAILURE_SIZE,		\
1228		 ((fail_stack).size * sizeof (fail_stack_elt_t)		\
1229		  * FAIL_STACK_GROWTH_FACTOR))				\
1230	    / sizeof (fail_stack_elt_t)),				\
1231	 1)))
1232
1233
1234/* Push pointer POINTER on FAIL_STACK.
1235   Return 1 if was able to do so and 0 if ran out of memory allocating
1236   space to do so.  */
1237#define PUSH_PATTERN_OP(POINTER, FAIL_STACK)				\
1238  ((FAIL_STACK_FULL ()							\
1239    && !GROW_FAIL_STACK (FAIL_STACK))					\
1240   ? 0									\
1241   : ((FAIL_STACK).stack[(FAIL_STACK).avail++].pointer = POINTER,	\
1242      1))
1243
1244/* Push a pointer value onto the failure stack.
1245   Assumes the variable `fail_stack'.  Probably should only
1246   be called from within `PUSH_FAILURE_POINT'.	*/
1247#define PUSH_FAILURE_POINTER(item)					\
1248  fail_stack.stack[fail_stack.avail++].pointer = (unsigned char *) (item)
1249
1250/* This pushes an integer-valued item onto the failure stack.
1251   Assumes the variable `fail_stack'.  Probably should only
1252   be called from within `PUSH_FAILURE_POINT'.	*/
1253#define PUSH_FAILURE_INT(item)					\
1254  fail_stack.stack[fail_stack.avail++].integer = (item)
1255
1256/* Push a fail_stack_elt_t value onto the failure stack.
1257   Assumes the variable `fail_stack'.  Probably should only
1258   be called from within `PUSH_FAILURE_POINT'.	*/
1259#define PUSH_FAILURE_ELT(item)					\
1260  fail_stack.stack[fail_stack.avail++] =  (item)
1261
1262/* These three POP... operations complement the three PUSH... operations.
1263   All assume that `fail_stack' is nonempty.  */
1264#define POP_FAILURE_POINTER() fail_stack.stack[--fail_stack.avail].pointer
1265#define POP_FAILURE_INT() fail_stack.stack[--fail_stack.avail].integer
1266#define POP_FAILURE_ELT() fail_stack.stack[--fail_stack.avail]
1267
1268/* Used to omit pushing failure point id's when we're not debugging.  */
1269#ifdef DEBUG
1270#define DEBUG_PUSH PUSH_FAILURE_INT
1271#define DEBUG_POP(item_addr) *(item_addr) = POP_FAILURE_INT ()
1272#else
1273#define DEBUG_PUSH(item)
1274#define DEBUG_POP(item_addr)
1275#endif
1276
1277
1278/* Push the information about the state we will need
1279   if we ever fail back to it.
1280
1281   Requires variables fail_stack, regstart, regend, reg_info, and
1282   num_regs be declared.  GROW_FAIL_STACK requires `destination' be
1283   declared.
1284
1285   Does `return FAILURE_CODE' if runs out of memory.  */
1286
1287#define PUSH_FAILURE_POINT(pattern_place, string_place, failure_code)	\
1288  do {									\
1289    char *destination;							\
1290    /* Must be int, so when we don't save any registers, the arithmetic	\
1291       of 0 + -1 isn't done as unsigned.  */				\
1292    int this_reg;							\
1293									\
1294    DEBUG_STATEMENT (failure_id++);					\
1295    DEBUG_STATEMENT (nfailure_points_pushed++);				\
1296    DEBUG_PRINT2 ("\nPUSH_FAILURE_POINT #%u:\n", failure_id);		\
1297    DEBUG_PRINT2 ("  Before push, next avail: %d\n", (fail_stack).avail);\
1298    DEBUG_PRINT2 ("			size: %d\n", (fail_stack).size);\
1299									\
1300    DEBUG_PRINT2 ("  slots needed: %d\n", NUM_FAILURE_ITEMS);		\
1301    DEBUG_PRINT2 ("	available: %d\n", REMAINING_AVAIL_SLOTS);	\
1302									\
1303    /* Ensure we have enough space allocated for what we will push.  */	\
1304    while (REMAINING_AVAIL_SLOTS < NUM_FAILURE_ITEMS)			\
1305      {									\
1306	if (!GROW_FAIL_STACK (fail_stack))				\
1307	  return failure_code;						\
1308									\
1309	DEBUG_PRINT2 ("\n  Doubled stack; size now: %d\n",		\
1310		       (fail_stack).size);				\
1311	DEBUG_PRINT2 ("	 slots available: %d\n", REMAINING_AVAIL_SLOTS);\
1312      }									\
1313									\
1314    /* Push the info, starting with the registers.  */			\
1315    DEBUG_PRINT1 ("\n");						\
1316									\
1317    if (1)								\
1318      for (this_reg = lowest_active_reg; this_reg <= highest_active_reg; \
1319	   this_reg++)							\
1320	{								\
1321	  DEBUG_PRINT2 ("  Pushing reg: %d\n", this_reg);		\
1322	  DEBUG_STATEMENT (num_regs_pushed++);				\
1323									\
1324	  DEBUG_PRINT2 ("    start: 0x%x\n", regstart[this_reg]);	\
1325	  PUSH_FAILURE_POINTER (regstart[this_reg]);			\
1326									\
1327	  DEBUG_PRINT2 ("    end: 0x%x\n", regend[this_reg]);		\
1328	  PUSH_FAILURE_POINTER (regend[this_reg]);			\
1329									\
1330	  DEBUG_PRINT2 ("    info: 0x%x\n      ", reg_info[this_reg]);	\
1331	  DEBUG_PRINT2 (" match_null=%d",				\
1332			REG_MATCH_NULL_STRING_P (reg_info[this_reg]));	\
1333	  DEBUG_PRINT2 (" active=%d", IS_ACTIVE (reg_info[this_reg]));	\
1334	  DEBUG_PRINT2 (" matched_something=%d",			\
1335			MATCHED_SOMETHING (reg_info[this_reg]));	\
1336	  DEBUG_PRINT2 (" ever_matched=%d",				\
1337			EVER_MATCHED_SOMETHING (reg_info[this_reg]));	\
1338	  DEBUG_PRINT1 ("\n");						\
1339	  PUSH_FAILURE_ELT (reg_info[this_reg].word);			\
1340	}								\
1341									\
1342    DEBUG_PRINT2 ("  Pushing  low active reg: %d\n", lowest_active_reg);\
1343    PUSH_FAILURE_INT (lowest_active_reg);				\
1344									\
1345    DEBUG_PRINT2 ("  Pushing high active reg: %d\n", highest_active_reg);\
1346    PUSH_FAILURE_INT (highest_active_reg);				\
1347									\
1348    DEBUG_PRINT2 ("  Pushing pattern 0x%x: ", pattern_place);		\
1349    DEBUG_PRINT_COMPILED_PATTERN (bufp, pattern_place, pend);		\
1350    PUSH_FAILURE_POINTER (pattern_place);				\
1351									\
1352    DEBUG_PRINT2 ("  Pushing string 0x%x: `", string_place);		\
1353    DEBUG_PRINT_DOUBLE_STRING (string_place, string1, size1, string2,	\
1354				 size2);				\
1355    DEBUG_PRINT1 ("'\n");						\
1356    PUSH_FAILURE_POINTER (string_place);				\
1357									\
1358    DEBUG_PRINT2 ("  Pushing failure id: %u\n", failure_id);		\
1359    DEBUG_PUSH (failure_id);						\
1360  } while (0)
1361
1362/* This is the number of items that are pushed and popped on the stack
1363   for each register.  */
1364#define NUM_REG_ITEMS  3
1365
1366/* Individual items aside from the registers.  */
1367#ifdef DEBUG
1368#define NUM_NONREG_ITEMS 5 /* Includes failure point id.  */
1369#else
1370#define NUM_NONREG_ITEMS 4
1371#endif
1372
1373/* Estimate the size of data pushed by a typical failure stack entry.
1374   An estimate is all we need, because all we use this for
1375   is to choose a limit for how big to make the failure stack.  */
1376
1377#define TYPICAL_FAILURE_SIZE 20
1378
1379/* This is how many items we actually use for a failure point.
1380   It depends on the regexp.  */
1381#define NUM_FAILURE_ITEMS				\
1382  (((0							\
1383     ? 0 : highest_active_reg - lowest_active_reg + 1)	\
1384    * NUM_REG_ITEMS)					\
1385   + NUM_NONREG_ITEMS)
1386
1387/* How many items can still be added to the stack without overflowing it.  */
1388#define REMAINING_AVAIL_SLOTS ((fail_stack).size - (fail_stack).avail)
1389
1390
1391/* Pops what PUSH_FAIL_STACK pushes.
1392
1393   We restore into the parameters, all of which should be lvalues:
1394     STR -- the saved data position.
1395     PAT -- the saved pattern position.
1396     LOW_REG, HIGH_REG -- the highest and lowest active registers.
1397     REGSTART, REGEND -- arrays of string positions.
1398     REG_INFO -- array of information about each subexpression.
1399
1400   Also assumes the variables `fail_stack' and (if debugging), `bufp',
1401   `pend', `string1', `size1', `string2', and `size2'.	*/
1402
1403#define POP_FAILURE_POINT(str, pat, low_reg, high_reg, regstart, regend, reg_info)\
1404{									\
1405  DEBUG_STATEMENT (fail_stack_elt_t failure_id;)			\
1406  int this_reg;								\
1407  const unsigned char *string_temp;					\
1408									\
1409  assert (!FAIL_STACK_EMPTY ());					\
1410									\
1411  /* Remove failure points and point to how many regs pushed.  */	\
1412  DEBUG_PRINT1 ("POP_FAILURE_POINT:\n");				\
1413  DEBUG_PRINT2 ("  Before pop, next avail: %d\n", fail_stack.avail);	\
1414  DEBUG_PRINT2 ("		     size: %d\n", fail_stack.size);	\
1415									\
1416  assert (fail_stack.avail >= NUM_NONREG_ITEMS);			\
1417									\
1418  DEBUG_POP (&failure_id);						\
1419  DEBUG_PRINT2 ("  Popping failure id: %u\n", failure_id);		\
1420									\
1421  /* If the saved string location is NULL, it came from an		\
1422     on_failure_keep_string_jump opcode, and we want to throw away the	\
1423     saved NULL, thus retaining our current position in the string.  */	\
1424  string_temp = POP_FAILURE_POINTER ();					\
1425  if (string_temp != NULL)						\
1426    str = (const char *) string_temp;					\
1427									\
1428  DEBUG_PRINT2 ("  Popping string 0x%x: `", str);			\
1429  DEBUG_PRINT_DOUBLE_STRING (str, string1, size1, string2, size2);	\
1430  DEBUG_PRINT1 ("'\n");							\
1431									\
1432  pat = (unsigned char *) POP_FAILURE_POINTER ();			\
1433  DEBUG_PRINT2 ("  Popping pattern 0x%x: ", pat);			\
1434  DEBUG_PRINT_COMPILED_PATTERN (bufp, pat, pend);			\
1435									\
1436  /* Restore register info.  */						\
1437  high_reg = (unsigned) POP_FAILURE_INT ();				\
1438  DEBUG_PRINT2 ("  Popping high active reg: %d\n", high_reg);		\
1439									\
1440  low_reg = (unsigned) POP_FAILURE_INT ();				\
1441  DEBUG_PRINT2 ("  Popping  low active reg: %d\n", low_reg);		\
1442									\
1443  if (1)								\
1444    for (this_reg = high_reg; this_reg >= low_reg; this_reg--)		\
1445      {									\
1446	DEBUG_PRINT2 ("	   Popping reg: %d\n", this_reg);		\
1447									\
1448	reg_info[this_reg].word = POP_FAILURE_ELT ();			\
1449	DEBUG_PRINT2 ("	     info: 0x%x\n", reg_info[this_reg]);	\
1450									\
1451	regend[this_reg] = (const char *) POP_FAILURE_POINTER ();	\
1452	DEBUG_PRINT2 ("	     end: 0x%x\n", regend[this_reg]);		\
1453									\
1454	regstart[this_reg] = (const char *) POP_FAILURE_POINTER ();	\
1455	DEBUG_PRINT2 ("	     start: 0x%x\n", regstart[this_reg]);	\
1456      }									\
1457  else									\
1458    {									\
1459      for (this_reg = highest_active_reg; this_reg > high_reg; this_reg--) \
1460	{								\
1461	  reg_info[this_reg].word.integer = 0;				\
1462	  regend[this_reg] = 0;						\
1463	  regstart[this_reg] = 0;					\
1464	}								\
1465      highest_active_reg = high_reg;					\
1466    }									\
1467									\
1468  set_regs_matched_done = 0;						\
1469  DEBUG_STATEMENT (nfailure_points_popped++);				\
1470} /* POP_FAILURE_POINT */
1471
1472
1473
1474/* Structure for per-register (a.k.a. per-group) information.
1475   Other register information, such as the
1476   starting and ending positions (which are addresses), and the list of
1477   inner groups (which is a bits list) are maintained in separate
1478   variables.
1479
1480   We are making a (strictly speaking) nonportable assumption here: that
1481   the compiler will pack our bit fields into something that fits into
1482   the type of `word', i.e., is something that fits into one item on the
1483   failure stack.  */
1484
1485typedef union
1486{
1487  fail_stack_elt_t word;
1488  struct
1489  {
1490      /* This field is one if this group can match the empty string,
1491	 zero if not.  If not yet determined,  `MATCH_NULL_UNSET_VALUE'.  */
1492#define MATCH_NULL_UNSET_VALUE 3
1493    unsigned match_null_string_p : 2;
1494    unsigned is_active : 1;
1495    unsigned matched_something : 1;
1496    unsigned ever_matched_something : 1;
1497  } bits;
1498} register_info_type;
1499
1500#define REG_MATCH_NULL_STRING_P(R)  ((R).bits.match_null_string_p)
1501#define IS_ACTIVE(R)  ((R).bits.is_active)
1502#define MATCHED_SOMETHING(R)  ((R).bits.matched_something)
1503#define EVER_MATCHED_SOMETHING(R)  ((R).bits.ever_matched_something)
1504
1505
1506/* Call this when have matched a real character; it sets `matched' flags
1507   for the subexpressions which we are currently inside.  Also records
1508   that those subexprs have matched.  */
1509#define SET_REGS_MATCHED()						\
1510  do									\
1511    {									\
1512      if (!set_regs_matched_done)					\
1513	{								\
1514	  unsigned r;							\
1515	  set_regs_matched_done = 1;					\
1516	  for (r = lowest_active_reg; r <= highest_active_reg; r++)	\
1517	    {								\
1518	      MATCHED_SOMETHING (reg_info[r])				\
1519		= EVER_MATCHED_SOMETHING (reg_info[r])			\
1520		= 1;							\
1521	    }								\
1522	}								\
1523    }									\
1524  while (0)
1525
1526/* Registers are set to a sentinel when they haven't yet matched.  */
1527static char reg_unset_dummy;
1528#define REG_UNSET_VALUE (&reg_unset_dummy)
1529#define REG_UNSET(e) ((e) == REG_UNSET_VALUE)
1530
1531/* Subroutine declarations and macros for regex_compile.  */
1532
1533static void store_op1 (), store_op2 ();
1534static void insert_op1 (), insert_op2 ();
1535static boolean at_begline_loc_p (), at_endline_loc_p ();
1536static boolean group_in_compile_stack ();
1537static reg_errcode_t compile_range ();
1538
1539/* Fetch the next character in the uncompiled pattern---translating it
1540   if necessary.  Also cast from a signed character in the constant
1541   string passed to us by the user to an unsigned char that we can use
1542   as an array index (in, e.g., `translate').  */
1543#ifndef PATFETCH
1544#define PATFETCH(c)							\
1545  do {if (p == pend) return REG_EEND;					\
1546    c = (unsigned char) *p++;						\
1547    if (RE_TRANSLATE_P (translate)) c = RE_TRANSLATE (translate, c);	\
1548  } while (0)
1549#endif
1550
1551/* Fetch the next character in the uncompiled pattern, with no
1552   translation.	 */
1553#define PATFETCH_RAW(c)							\
1554  do {if (p == pend) return REG_EEND;					\
1555    c = (unsigned char) *p++;						\
1556  } while (0)
1557
1558/* Go backwards one character in the pattern.  */
1559#define PATUNFETCH p--
1560
1561
1562/* If `translate' is non-null, return translate[D], else just D.  We
1563   cast the subscript to translate because some data is declared as
1564   `char *', to avoid warnings when a string constant is passed.  But
1565   when we use a character as a subscript we must make it unsigned.  */
1566#ifndef TRANSLATE
1567#define TRANSLATE(d) \
1568  (RE_TRANSLATE_P (translate) \
1569   ? (unsigned) RE_TRANSLATE (translate, (unsigned) (d)) : (d))
1570#endif
1571
1572
1573/* Macros for outputting the compiled pattern into `buffer'.  */
1574
1575/* If the buffer isn't allocated when it comes in, use this.  */
1576#define INIT_BUF_SIZE  32
1577
1578/* Make sure we have at least N more bytes of space in buffer.	*/
1579#define GET_BUFFER_SPACE(n)						\
1580    while (b - bufp->buffer + (n) > bufp->allocated)			\
1581      EXTEND_BUFFER ()
1582
1583/* Make sure we have one more byte of buffer space and then add C to it.  */
1584#define BUF_PUSH(c)							\
1585  do {									\
1586    GET_BUFFER_SPACE (1);						\
1587    *b++ = (unsigned char) (c);						\
1588  } while (0)
1589
1590
1591/* Ensure we have two more bytes of buffer space and then append C1 and C2.  */
1592#define BUF_PUSH_2(c1, c2)						\
1593  do {									\
1594    GET_BUFFER_SPACE (2);						\
1595    *b++ = (unsigned char) (c1);					\
1596    *b++ = (unsigned char) (c2);					\
1597  } while (0)
1598
1599
1600/* As with BUF_PUSH_2, except for three bytes.	*/
1601#define BUF_PUSH_3(c1, c2, c3)						\
1602  do {									\
1603    GET_BUFFER_SPACE (3);						\
1604    *b++ = (unsigned char) (c1);					\
1605    *b++ = (unsigned char) (c2);					\
1606    *b++ = (unsigned char) (c3);					\
1607  } while (0)
1608
1609
1610/* Store a jump with opcode OP at LOC to location TO.  We store a
1611   relative address offset by the three bytes the jump itself occupies.	 */
1612#define STORE_JUMP(op, loc, to) \
1613  store_op1 (op, loc, (to) - (loc) - 3)
1614
1615/* Likewise, for a two-argument jump.  */
1616#define STORE_JUMP2(op, loc, to, arg) \
1617  store_op2 (op, loc, (to) - (loc) - 3, arg)
1618
1619/* Like `STORE_JUMP', but for inserting.  Assume `b' is the buffer end.	 */
1620#define INSERT_JUMP(op, loc, to) \
1621  insert_op1 (op, loc, (to) - (loc) - 3, b)
1622
1623/* Like `STORE_JUMP2', but for inserting.  Assume `b' is the buffer end.  */
1624#define INSERT_JUMP2(op, loc, to, arg) \
1625  insert_op2 (op, loc, (to) - (loc) - 3, arg, b)
1626
1627
1628/* This is not an arbitrary limit: the arguments which represent offsets
1629   into the pattern are two bytes long.	 So if 2^16 bytes turns out to
1630   be too small, many things would have to change.  */
1631#define MAX_BUF_SIZE (1L << 16)
1632
1633
1634/* Extend the buffer by twice its current size via realloc and
1635   reset the pointers that pointed into the old block to point to the
1636   correct places in the new one.  If extending the buffer results in it
1637   being larger than MAX_BUF_SIZE, then flag memory exhausted.	*/
1638#define EXTEND_BUFFER()							\
1639  do {									\
1640    unsigned char *old_buffer = bufp->buffer;				\
1641    if (bufp->allocated == MAX_BUF_SIZE)				\
1642      return REG_ESIZE;							\
1643    bufp->allocated <<= 1;						\
1644    if (bufp->allocated > MAX_BUF_SIZE)					\
1645      bufp->allocated = MAX_BUF_SIZE;					\
1646    bufp->buffer = (unsigned char *) realloc (bufp->buffer, bufp->allocated);\
1647    if (bufp->buffer == NULL)						\
1648      return REG_ESPACE;						\
1649    /* If the buffer moved, move all the pointers into it.  */		\
1650    if (old_buffer != bufp->buffer)					\
1651      {									\
1652	b = (b - old_buffer) + bufp->buffer;				\
1653	begalt = (begalt - old_buffer) + bufp->buffer;			\
1654	if (fixup_alt_jump)						\
1655	  fixup_alt_jump = (fixup_alt_jump - old_buffer) + bufp->buffer;\
1656	if (laststart)							\
1657	  laststart = (laststart - old_buffer) + bufp->buffer;		\
1658	if (pending_exact)						\
1659	  pending_exact = (pending_exact - old_buffer) + bufp->buffer;	\
1660      }									\
1661  } while (0)
1662
1663
1664/* Since we have one byte reserved for the register number argument to
1665   {start,stop}_memory, the maximum number of groups we can report
1666   things about is what fits in that byte.  */
1667#define MAX_REGNUM 255
1668
1669/* But patterns can have more than `MAX_REGNUM' registers.  We just
1670   ignore the excess.  */
1671typedef unsigned regnum_t;
1672
1673
1674/* Macros for the compile stack.  */
1675
1676/* Since offsets can go either forwards or backwards, this type needs to
1677   be able to hold values from -(MAX_BUF_SIZE - 1) to MAX_BUF_SIZE - 1.	 */
1678typedef int pattern_offset_t;
1679
1680typedef struct
1681{
1682  pattern_offset_t begalt_offset;
1683  pattern_offset_t fixup_alt_jump;
1684  pattern_offset_t inner_group_offset;
1685  pattern_offset_t laststart_offset;
1686  regnum_t regnum;
1687} compile_stack_elt_t;
1688
1689
1690typedef struct
1691{
1692  compile_stack_elt_t *stack;
1693  unsigned size;
1694  unsigned avail;			/* Offset of next open position.  */
1695} compile_stack_type;
1696
1697
1698#define INIT_COMPILE_STACK_SIZE 32
1699
1700#define COMPILE_STACK_EMPTY  (compile_stack.avail == 0)
1701#define COMPILE_STACK_FULL  (compile_stack.avail == compile_stack.size)
1702
1703/* The next available element.	*/
1704#define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail])
1705
1706
1707/* Structure to manage work area for range table.  */
1708struct range_table_work_area
1709{
1710  int *table;			/* actual work area.  */
1711  int allocated;		/* allocated size for work area in bytes.  */
1712  int used;			/* actually used size in words.	 */
1713};
1714
1715/* Make sure that WORK_AREA can hold more N multibyte characters.  */
1716#define EXTEND_RANGE_TABLE_WORK_AREA(work_area, n)			  \
1717  do {									  \
1718    if (((work_area).used + (n)) * sizeof (int) > (work_area).allocated)  \
1719      {									  \
1720	(work_area).allocated += 16 * sizeof (int);			  \
1721	if ((work_area).table)						  \
1722	  (work_area).table						  \
1723	    = (int *) realloc ((work_area).table, (work_area).allocated); \
1724	else								  \
1725	  (work_area).table						  \
1726	    = (int *) malloc ((work_area).allocated);			  \
1727	if ((work_area).table == 0)					  \
1728	  FREE_STACK_RETURN (REG_ESPACE);				  \
1729      }									  \
1730  } while (0)
1731
1732/* Set a range (RANGE_START, RANGE_END) to WORK_AREA.  */
1733#define SET_RANGE_TABLE_WORK_AREA(work_area, range_start, range_end)	\
1734  do {									\
1735    EXTEND_RANGE_TABLE_WORK_AREA ((work_area), 2);			\
1736    (work_area).table[(work_area).used++] = (range_start);		\
1737    (work_area).table[(work_area).used++] = (range_end);		\
1738  } while (0)
1739
1740/* Free allocated memory for WORK_AREA.	 */
1741#define FREE_RANGE_TABLE_WORK_AREA(work_area)	\
1742  do {						\
1743    if ((work_area).table)			\
1744      free ((work_area).table);			\
1745  } while (0)
1746
1747#define CLEAR_RANGE_TABLE_WORK_USED(work_area) ((work_area).used = 0)
1748#define RANGE_TABLE_WORK_USED(work_area) ((work_area).used)
1749#define RANGE_TABLE_WORK_ELT(work_area, i) ((work_area).table[i])
1750
1751
1752/* Set the bit for character C in a list.  */
1753#define SET_LIST_BIT(c)				      \
1754  (b[((unsigned char) (c)) / BYTEWIDTH]		      \
1755   |= 1 << (((unsigned char) c) % BYTEWIDTH))
1756
1757
1758/* Get the next unsigned number in the uncompiled pattern.  */
1759#define GET_UNSIGNED_NUMBER(num)					\
1760  { if (p != pend)							\
1761     {									\
1762       PATFETCH (c);							\
1763       while (ISDIGIT (c))						\
1764	 {								\
1765	   if (num < 0)							\
1766	      num = 0;							\
1767	   num = num * 10 + c - '0';					\
1768	   if (p == pend)						\
1769	      break;							\
1770	   PATFETCH (c);						\
1771	 }								\
1772       }								\
1773    }
1774
1775#define CHAR_CLASS_MAX_LENGTH  6 /* Namely, `xdigit'.  */
1776
1777#define IS_CHAR_CLASS(string)						\
1778   (STREQ (string, "alpha") || STREQ (string, "upper")			\
1779    || STREQ (string, "lower") || STREQ (string, "digit")		\
1780    || STREQ (string, "alnum") || STREQ (string, "xdigit")		\
1781    || STREQ (string, "space") || STREQ (string, "print")		\
1782    || STREQ (string, "punct") || STREQ (string, "graph")		\
1783    || STREQ (string, "cntrl") || STREQ (string, "blank"))
1784
1785#ifndef MATCH_MAY_ALLOCATE
1786
1787/* If we cannot allocate large objects within re_match_2_internal,
1788   we make the fail stack and register vectors global.
1789   The fail stack, we grow to the maximum size when a regexp
1790   is compiled.
1791   The register vectors, we adjust in size each time we
1792   compile a regexp, according to the number of registers it needs.  */
1793
1794static fail_stack_type fail_stack;
1795
1796/* Size with which the following vectors are currently allocated.
1797   That is so we can make them bigger as needed,
1798   but never make them smaller.	 */
1799static int regs_allocated_size;
1800
1801static const char **	 regstart, **	  regend;
1802static const char ** old_regstart, ** old_regend;
1803static const char **best_regstart, **best_regend;
1804static register_info_type *reg_info;
1805static const char **reg_dummy;
1806static register_info_type *reg_info_dummy;
1807
1808/* Make the register vectors big enough for NUM_REGS registers,
1809   but don't make them smaller.	 */
1810
1811static
1812regex_grow_registers (num_regs)
1813     int num_regs;
1814{
1815  if (num_regs > regs_allocated_size)
1816    {
1817      RETALLOC_IF (regstart,	 num_regs, const char *);
1818      RETALLOC_IF (regend,	 num_regs, const char *);
1819      RETALLOC_IF (old_regstart, num_regs, const char *);
1820      RETALLOC_IF (old_regend,	 num_regs, const char *);
1821      RETALLOC_IF (best_regstart, num_regs, const char *);
1822      RETALLOC_IF (best_regend,	 num_regs, const char *);
1823      RETALLOC_IF (reg_info,	 num_regs, register_info_type);
1824      RETALLOC_IF (reg_dummy,	 num_regs, const char *);
1825      RETALLOC_IF (reg_info_dummy, num_regs, register_info_type);
1826
1827      regs_allocated_size = num_regs;
1828    }
1829}
1830
1831#endif /* not MATCH_MAY_ALLOCATE */
1832
1833/* `regex_compile' compiles PATTERN (of length SIZE) according to SYNTAX.
1834   Returns one of error codes defined in `regex.h', or zero for success.
1835
1836   Assumes the `allocated' (and perhaps `buffer') and `translate'
1837   fields are set in BUFP on entry.
1838
1839   If it succeeds, results are put in BUFP (if it returns an error, the
1840   contents of BUFP are undefined):
1841     `buffer' is the compiled pattern;
1842     `syntax' is set to SYNTAX;
1843     `used' is set to the length of the compiled pattern;
1844     `fastmap_accurate' is zero;
1845     `re_nsub' is the number of subexpressions in PATTERN;
1846     `not_bol' and `not_eol' are zero;
1847
1848   The `fastmap' and `newline_anchor' fields are neither
1849   examined nor set.  */
1850
1851/* Return, freeing storage we allocated.  */
1852#define FREE_STACK_RETURN(value)		\
1853  do {							\
1854    FREE_RANGE_TABLE_WORK_AREA (range_table_work);	\
1855    free (compile_stack.stack);				\
1856    return value;					\
1857  } while (0)
1858
1859static reg_errcode_t
1860regex_compile (pattern, size, syntax, bufp)
1861     const char *pattern;
1862     int size;
1863     reg_syntax_t syntax;
1864     struct re_pattern_buffer *bufp;
1865{
1866  /* We fetch characters from PATTERN here.  Even though PATTERN is
1867     `char *' (i.e., signed), we declare these variables as unsigned, so
1868     they can be reliably used as array indices.  */
1869  register unsigned int c, c1;
1870
1871  /* A random temporary spot in PATTERN.  */
1872  const char *p1;
1873
1874  /* Points to the end of the buffer, where we should append.  */
1875  register unsigned char *b;
1876
1877  /* Keeps track of unclosed groups.  */
1878  compile_stack_type compile_stack;
1879
1880  /* Points to the current (ending) position in the pattern.  */
1881#ifdef AIX
1882  /* `const' makes AIX compiler fail.  */
1883  char *p = pattern;
1884#else
1885  const char *p = pattern;
1886#endif
1887  const char *pend = pattern + size;
1888
1889  /* How to translate the characters in the pattern.  */
1890  RE_TRANSLATE_TYPE translate = bufp->translate;
1891
1892  /* Address of the count-byte of the most recently inserted `exactn'
1893     command.  This makes it possible to tell if a new exact-match
1894     character can be added to that command or if the character requires
1895     a new `exactn' command.  */
1896  unsigned char *pending_exact = 0;
1897
1898  /* Address of start of the most recently finished expression.
1899     This tells, e.g., postfix * where to find the start of its
1900     operand.  Reset at the beginning of groups and alternatives.  */
1901  unsigned char *laststart = 0;
1902
1903  /* Address of beginning of regexp, or inside of last group.  */
1904  unsigned char *begalt;
1905
1906  /* Place in the uncompiled pattern (i.e., the {) to
1907     which to go back if the interval is invalid.  */
1908  const char *beg_interval;
1909
1910  /* Address of the place where a forward jump should go to the end of
1911     the containing expression.	 Each alternative of an `or' -- except the
1912     last -- ends with a forward jump of this sort.  */
1913  unsigned char *fixup_alt_jump = 0;
1914
1915  /* Counts open-groups as they are encountered.  Remembered for the
1916     matching close-group on the compile stack, so the same register
1917     number is put in the stop_memory as the start_memory.  */
1918  regnum_t regnum = 0;
1919
1920  /* Work area for range table of charset.  */
1921  struct range_table_work_area range_table_work;
1922
1923#ifdef DEBUG
1924  DEBUG_PRINT1 ("\nCompiling pattern: ");
1925  if (debug)
1926    {
1927      unsigned debug_count;
1928
1929      for (debug_count = 0; debug_count < size; debug_count++)
1930	putchar (pattern[debug_count]);
1931      putchar ('\n');
1932    }
1933#endif /* DEBUG */
1934
1935  /* Initialize the compile stack.  */
1936  compile_stack.stack = TALLOC (INIT_COMPILE_STACK_SIZE, compile_stack_elt_t);
1937  if (compile_stack.stack == NULL)
1938    return REG_ESPACE;
1939
1940  compile_stack.size = INIT_COMPILE_STACK_SIZE;
1941  compile_stack.avail = 0;
1942
1943  range_table_work.table = 0;
1944  range_table_work.allocated = 0;
1945
1946  /* Initialize the pattern buffer.  */
1947  bufp->syntax = syntax;
1948  bufp->fastmap_accurate = 0;
1949  bufp->not_bol = bufp->not_eol = 0;
1950
1951  /* Set `used' to zero, so that if we return an error, the pattern
1952     printer (for debugging) will think there's no pattern.  We reset it
1953     at the end.  */
1954  bufp->used = 0;
1955
1956  /* Always count groups, whether or not bufp->no_sub is set.  */
1957  bufp->re_nsub = 0;
1958
1959#ifdef emacs
1960  /* bufp->multibyte is set before regex_compile is called, so don't alter
1961     it. */
1962#else  /* not emacs */
1963  /* Nothing is recognized as a multibyte character.  */
1964  bufp->multibyte = 0;
1965#endif
1966
1967#if !defined (emacs) && !defined (SYNTAX_TABLE)
1968  /* Initialize the syntax table.  */
1969   init_syntax_once ();
1970#endif
1971
1972  if (bufp->allocated == 0)
1973    {
1974      if (bufp->buffer)
1975	{ /* If zero allocated, but buffer is non-null, try to realloc
1976	     enough space.  This loses if buffer's address is bogus, but
1977	     that is the user's responsibility.	 */
1978	  RETALLOC (bufp->buffer, INIT_BUF_SIZE, unsigned char);
1979	}
1980      else
1981	{ /* Caller did not allocate a buffer.	Do it for them.	 */
1982	  bufp->buffer = TALLOC (INIT_BUF_SIZE, unsigned char);
1983	}
1984      if (!bufp->buffer) FREE_STACK_RETURN (REG_ESPACE);
1985
1986      bufp->allocated = INIT_BUF_SIZE;
1987    }
1988
1989  begalt = b = bufp->buffer;
1990
1991  /* Loop through the uncompiled pattern until we're at the end.  */
1992  while (p != pend)
1993    {
1994      PATFETCH (c);
1995
1996      switch (c)
1997	{
1998	case '^':
1999	  {
2000	    if (   /* If at start of pattern, it's an operator.	 */
2001		   p == pattern + 1
2002		   /* If context independent, it's an operator.	 */
2003		|| syntax & RE_CONTEXT_INDEP_ANCHORS
2004		   /* Otherwise, depends on what's come before.	 */
2005		|| at_begline_loc_p (pattern, p, syntax))
2006	      BUF_PUSH (begline);
2007	    else
2008	      goto normal_char;
2009	  }
2010	  break;
2011
2012
2013	case '$':
2014	  {
2015	    if (   /* If at end of pattern, it's an operator.  */
2016		   p == pend
2017		   /* If context independent, it's an operator.	 */
2018		|| syntax & RE_CONTEXT_INDEP_ANCHORS
2019		   /* Otherwise, depends on what's next.  */
2020		|| at_endline_loc_p (p, pend, syntax))
2021	       BUF_PUSH (endline);
2022	     else
2023	       goto normal_char;
2024	   }
2025	   break;
2026
2027
2028	case '+':
2029	case '?':
2030	  if ((syntax & RE_BK_PLUS_QM)
2031	      || (syntax & RE_LIMITED_OPS))
2032	    goto normal_char;
2033	handle_plus:
2034	case '*':
2035	  /* If there is no previous pattern... */
2036	  if (!laststart)
2037	    {
2038	      if (syntax & RE_CONTEXT_INVALID_OPS)
2039		FREE_STACK_RETURN (REG_BADRPT);
2040	      else if (!(syntax & RE_CONTEXT_INDEP_OPS))
2041		goto normal_char;
2042	    }
2043
2044	  {
2045	    /* Are we optimizing this jump?  */
2046	    boolean keep_string_p = false;
2047
2048	    /* 1 means zero (many) matches is allowed.	*/
2049	    char zero_times_ok = 0, many_times_ok = 0;
2050
2051	    /* If there is a sequence of repetition chars, collapse it
2052	       down to just one (the right one).  We can't combine
2053	       interval operators with these because of, e.g., `a{2}*',
2054	       which should only match an even number of `a's.	*/
2055
2056	    for (;;)
2057	      {
2058		zero_times_ok |= c != '+';
2059		many_times_ok |= c != '?';
2060
2061		if (p == pend)
2062		  break;
2063
2064		PATFETCH (c);
2065
2066		if (c == '*'
2067		    || (!(syntax & RE_BK_PLUS_QM) && (c == '+' || c == '?')))
2068		  ;
2069
2070		else if (syntax & RE_BK_PLUS_QM	 &&  c == '\\')
2071		  {
2072		    if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
2073
2074		    PATFETCH (c1);
2075		    if (!(c1 == '+' || c1 == '?'))
2076		      {
2077			PATUNFETCH;
2078			PATUNFETCH;
2079			break;
2080		      }
2081
2082		    c = c1;
2083		  }
2084		else
2085		  {
2086		    PATUNFETCH;
2087		    break;
2088		  }
2089
2090		/* If we get here, we found another repeat character.  */
2091	       }
2092
2093	    /* Star, etc. applied to an empty pattern is equivalent
2094	       to an empty pattern.  */
2095	    if (!laststart)
2096	      break;
2097
2098	    /* Now we know whether or not zero matches is allowed
2099	       and also whether or not two or more matches is allowed.	*/
2100	    if (many_times_ok)
2101	      { /* More than one repetition is allowed, so put in at the
2102		   end a backward relative jump from `b' to before the next
2103		   jump we're going to put in below (which jumps from
2104		   laststart to after this jump).
2105
2106		   But if we are at the `*' in the exact sequence `.*\n',
2107		   insert an unconditional jump backwards to the .,
2108		   instead of the beginning of the loop.  This way we only
2109		   push a failure point once, instead of every time
2110		   through the loop.  */
2111		assert (p - 1 > pattern);
2112
2113		/* Allocate the space for the jump.  */
2114		GET_BUFFER_SPACE (3);
2115
2116		/* We know we are not at the first character of the pattern,
2117		   because laststart was nonzero.  And we've already
2118		   incremented `p', by the way, to be the character after
2119		   the `*'.  Do we have to do something analogous here
2120		   for null bytes, because of RE_DOT_NOT_NULL?	*/
2121		if (TRANSLATE ((unsigned char)*(p - 2)) == TRANSLATE ('.')
2122		    && zero_times_ok
2123		    && p < pend
2124		    && TRANSLATE ((unsigned char)*p) == TRANSLATE ('\n')
2125		    && !(syntax & RE_DOT_NEWLINE))
2126		  { /* We have .*\n.  */
2127		    STORE_JUMP (jump, b, laststart);
2128		    keep_string_p = true;
2129		  }
2130		else
2131		  /* Anything else.  */
2132		  STORE_JUMP (maybe_pop_jump, b, laststart - 3);
2133
2134		/* We've added more stuff to the buffer.  */
2135		b += 3;
2136	      }
2137
2138	    /* On failure, jump from laststart to b + 3, which will be the
2139	       end of the buffer after this jump is inserted.  */
2140	    GET_BUFFER_SPACE (3);
2141	    INSERT_JUMP (keep_string_p ? on_failure_keep_string_jump
2142				       : on_failure_jump,
2143			 laststart, b + 3);
2144	    pending_exact = 0;
2145	    b += 3;
2146
2147	    if (!zero_times_ok)
2148	      {
2149		/* At least one repetition is required, so insert a
2150		   `dummy_failure_jump' before the initial
2151		   `on_failure_jump' instruction of the loop. This
2152		   effects a skip over that instruction the first time
2153		   we hit that loop.  */
2154		GET_BUFFER_SPACE (3);
2155		INSERT_JUMP (dummy_failure_jump, laststart, laststart + 6);
2156		b += 3;
2157	      }
2158	    }
2159	  break;
2160
2161
2162	case '.':
2163	  laststart = b;
2164	  BUF_PUSH (anychar);
2165	  break;
2166
2167
2168	case '[':
2169	  {
2170	    CLEAR_RANGE_TABLE_WORK_USED (range_table_work);
2171
2172	    if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
2173
2174	    /* Ensure that we have enough space to push a charset: the
2175	       opcode, the length count, and the bitset; 34 bytes in all.  */
2176	    GET_BUFFER_SPACE (34);
2177
2178	    laststart = b;
2179
2180	    /* We test `*p == '^' twice, instead of using an if
2181	       statement, so we only need one BUF_PUSH.	 */
2182	    BUF_PUSH (*p == '^' ? charset_not : charset);
2183	    if (*p == '^')
2184	      p++;
2185
2186	    /* Remember the first position in the bracket expression.  */
2187	    p1 = p;
2188
2189	    /* Push the number of bytes in the bitmap.	*/
2190	    BUF_PUSH ((1 << BYTEWIDTH) / BYTEWIDTH);
2191
2192	    /* Clear the whole map.  */
2193	    bzero (b, (1 << BYTEWIDTH) / BYTEWIDTH);
2194
2195	    /* charset_not matches newline according to a syntax bit.  */
2196	    if ((re_opcode_t) b[-2] == charset_not
2197		&& (syntax & RE_HAT_LISTS_NOT_NEWLINE))
2198	      SET_LIST_BIT ('\n');
2199
2200	    /* Read in characters and ranges, setting map bits.	 */
2201	    for (;;)
2202	      {
2203		int len;
2204		boolean escaped_char = false;
2205
2206		if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
2207
2208		PATFETCH (c);
2209
2210		/* \ might escape characters inside [...] and [^...].  */
2211		if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\')
2212		  {
2213		    if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
2214
2215		    PATFETCH (c);
2216		    escaped_char = true;
2217		  }
2218		else
2219		  {
2220		    /* Could be the end of the bracket expression.	If it's
2221		       not (i.e., when the bracket expression is `[]' so
2222		       far), the ']' character bit gets set way below.  */
2223		    if (c == ']' && p != p1 + 1)
2224		      break;
2225		  }
2226
2227		/* If C indicates start of multibyte char, get the
2228		   actual character code in C, and set the pattern
2229		   pointer P to the next character boundary.  */
2230		if (bufp->multibyte && BASE_LEADING_CODE_P (c))
2231		  {
2232		    PATUNFETCH;
2233		    c = STRING_CHAR_AND_LENGTH (p, pend - p, len);
2234		    p += len;
2235		  }
2236		/* What should we do for the character which is
2237		   greater than 0x7F, but not BASE_LEADING_CODE_P?
2238		   XXX */
2239
2240		/* See if we're at the beginning of a possible character
2241		   class.  */
2242
2243		else if (!escaped_char &&
2244			 syntax & RE_CHAR_CLASSES && c == '[' && *p == ':')
2245		  {
2246		    /* Leave room for the null.	 */
2247		    char str[CHAR_CLASS_MAX_LENGTH + 1];
2248
2249		    PATFETCH (c);
2250		    c1 = 0;
2251
2252		    /* If pattern is `[[:'.  */
2253		    if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
2254
2255		    for (;;)
2256		      {
2257			PATFETCH (c);
2258			if (c == ':' || c == ']' || p == pend
2259			    || c1 == CHAR_CLASS_MAX_LENGTH)
2260			  break;
2261			str[c1++] = c;
2262		      }
2263		    str[c1] = '\0';
2264
2265		    /* If isn't a word bracketed by `[:' and `:]':
2266		       undo the ending character, the letters, and
2267		       leave the leading `:' and `[' (but set bits for
2268		       them).  */
2269		    if (c == ':' && *p == ']')
2270		      {
2271			int ch;
2272			boolean is_alnum = STREQ (str, "alnum");
2273			boolean is_alpha = STREQ (str, "alpha");
2274			boolean is_blank = STREQ (str, "blank");
2275			boolean is_cntrl = STREQ (str, "cntrl");
2276			boolean is_digit = STREQ (str, "digit");
2277			boolean is_graph = STREQ (str, "graph");
2278			boolean is_lower = STREQ (str, "lower");
2279			boolean is_print = STREQ (str, "print");
2280			boolean is_punct = STREQ (str, "punct");
2281			boolean is_space = STREQ (str, "space");
2282			boolean is_upper = STREQ (str, "upper");
2283			boolean is_xdigit = STREQ (str, "xdigit");
2284
2285			if (!IS_CHAR_CLASS (str))
2286			  FREE_STACK_RETURN (REG_ECTYPE);
2287
2288			/* Throw away the ] at the end of the character
2289			   class.  */
2290			PATFETCH (c);
2291
2292			if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
2293
2294			for (ch = 0; ch < 1 << BYTEWIDTH; ch++)
2295			  {
2296			    int translated = TRANSLATE (ch);
2297			    /* This was split into 3 if's to
2298			       avoid an arbitrary limit in some compiler.  */
2299			    if (   (is_alnum  && ISALNUM (ch))
2300				|| (is_alpha  && ISALPHA (ch))
2301				|| (is_blank  && ISBLANK (ch))
2302				|| (is_cntrl  && ISCNTRL (ch)))
2303			      SET_LIST_BIT (translated);
2304			    if (   (is_digit  && ISDIGIT (ch))
2305				|| (is_graph  && ISGRAPH (ch))
2306				|| (is_lower  && ISLOWER (ch))
2307				|| (is_print  && ISPRINT (ch)))
2308			      SET_LIST_BIT (translated);
2309			    if (   (is_punct  && ISPUNCT (ch))
2310				|| (is_space  && ISSPACE (ch))
2311				|| (is_upper  && ISUPPER (ch))
2312				|| (is_xdigit && ISXDIGIT (ch)))
2313			      SET_LIST_BIT (translated);
2314			  }
2315
2316			/* Repeat the loop. */
2317			continue;
2318		      }
2319		    else
2320		      {
2321			c1++;
2322			while (c1--)
2323			  PATUNFETCH;
2324			SET_LIST_BIT ('[');
2325
2326			/* Because the `:' may starts the range, we
2327			   can't simply set bit and repeat the loop.
2328			   Instead, just set it to C and handle below.	*/
2329			c = ':';
2330		      }
2331		  }
2332
2333		if (p < pend && p[0] == '-' && p[1] != ']')
2334		  {
2335
2336		    /* Discard the `-'. */
2337		    PATFETCH (c1);
2338
2339		    /* Fetch the character which ends the range. */
2340		    PATFETCH (c1);
2341		    if (bufp->multibyte && BASE_LEADING_CODE_P (c1))
2342		      {
2343			PATUNFETCH;
2344			c1 = STRING_CHAR_AND_LENGTH (p, pend - p, len);
2345			p += len;
2346		      }
2347
2348		    if (SINGLE_BYTE_CHAR_P (c)
2349			&& ! SINGLE_BYTE_CHAR_P (c1))
2350		      {
2351			/* Handle a range such as \177-\377 in multibyte mode.
2352			   Split that into two ranges,,
2353			   the low one ending at 0237, and the high one
2354			   starting at ...040.  */
2355			int c1_base = (c1 & ~0177) | 040;
2356			SET_RANGE_TABLE_WORK_AREA (range_table_work, c, c1);
2357			c1 = 0237;
2358		      }
2359		    else if (!SAME_CHARSET_P (c, c1))
2360		      FREE_STACK_RETURN (REG_ERANGE);
2361		  }
2362		else
2363		  /* Range from C to C. */
2364		  c1 = c;
2365
2366		/* Set the range ... */
2367		if (SINGLE_BYTE_CHAR_P (c))
2368		  /* ... into bitmap.  */
2369		  {
2370		    unsigned this_char;
2371		    int range_start = c, range_end = c1;
2372
2373		    /* If the start is after the end, the range is empty.  */
2374		    if (range_start > range_end)
2375		      {
2376			if (syntax & RE_NO_EMPTY_RANGES)
2377			  FREE_STACK_RETURN (REG_ERANGE);
2378			/* Else, repeat the loop.  */
2379		      }
2380		    else
2381		      {
2382			for (this_char = range_start; this_char <= range_end;
2383			     this_char++)
2384			  SET_LIST_BIT (TRANSLATE (this_char));
2385		      }
2386		  }
2387		else
2388		  /* ... into range table.  */
2389		  SET_RANGE_TABLE_WORK_AREA (range_table_work, c, c1);
2390	      }
2391
2392	    /* Discard any (non)matching list bytes that are all 0 at the
2393	       end of the map.	Decrease the map-length byte too.  */
2394	    while ((int) b[-1] > 0 && b[b[-1] - 1] == 0)
2395	      b[-1]--;
2396	    b += b[-1];
2397
2398	    /* Build real range table from work area. */
2399	    if (RANGE_TABLE_WORK_USED (range_table_work))
2400	      {
2401		int i;
2402		int used = RANGE_TABLE_WORK_USED (range_table_work);
2403
2404		/* Allocate space for COUNT + RANGE_TABLE.  Needs two
2405		   bytes for COUNT and three bytes for each character.	*/
2406		GET_BUFFER_SPACE (2 + used * 3);
2407
2408		/* Indicate the existence of range table.  */
2409		laststart[1] |= 0x80;
2410
2411		STORE_NUMBER_AND_INCR (b, used / 2);
2412		for (i = 0; i < used; i++)
2413		  STORE_CHARACTER_AND_INCR
2414		    (b, RANGE_TABLE_WORK_ELT (range_table_work, i));
2415	      }
2416	  }
2417	  break;
2418
2419
2420	case '(':
2421	  if (syntax & RE_NO_BK_PARENS)
2422	    goto handle_open;
2423	  else
2424	    goto normal_char;
2425
2426
2427	case ')':
2428	  if (syntax & RE_NO_BK_PARENS)
2429	    goto handle_close;
2430	  else
2431	    goto normal_char;
2432
2433
2434	case '\n':
2435	  if (syntax & RE_NEWLINE_ALT)
2436	    goto handle_alt;
2437	  else
2438	    goto normal_char;
2439
2440
2441	case '|':
2442	  if (syntax & RE_NO_BK_VBAR)
2443	    goto handle_alt;
2444	  else
2445	    goto normal_char;
2446
2447
2448	case '{':
2449	   if (syntax & RE_INTERVALS && syntax & RE_NO_BK_BRACES)
2450	     goto handle_interval;
2451	   else
2452	     goto normal_char;
2453
2454
2455	case '\\':
2456	  if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
2457
2458	  /* Do not translate the character after the \, so that we can
2459	     distinguish, e.g., \B from \b, even if we normally would
2460	     translate, e.g., B to b.  */
2461	  PATFETCH_RAW (c);
2462
2463	  switch (c)
2464	    {
2465	    case '(':
2466	      if (syntax & RE_NO_BK_PARENS)
2467		goto normal_backslash;
2468
2469	    handle_open:
2470	      bufp->re_nsub++;
2471	      regnum++;
2472
2473	      if (COMPILE_STACK_FULL)
2474		{
2475		  RETALLOC (compile_stack.stack, compile_stack.size << 1,
2476			    compile_stack_elt_t);
2477		  if (compile_stack.stack == NULL) return REG_ESPACE;
2478
2479		  compile_stack.size <<= 1;
2480		}
2481
2482	      /* These are the values to restore when we hit end of this
2483		 group.	 They are all relative offsets, so that if the
2484		 whole pattern moves because of realloc, they will still
2485		 be valid.  */
2486	      COMPILE_STACK_TOP.begalt_offset = begalt - bufp->buffer;
2487	      COMPILE_STACK_TOP.fixup_alt_jump
2488		= fixup_alt_jump ? fixup_alt_jump - bufp->buffer + 1 : 0;
2489	      COMPILE_STACK_TOP.laststart_offset = b - bufp->buffer;
2490	      COMPILE_STACK_TOP.regnum = regnum;
2491
2492	      /* We will eventually replace the 0 with the number of
2493		 groups inner to this one.  But do not push a
2494		 start_memory for groups beyond the last one we can
2495		 represent in the compiled pattern.  */
2496	      if (regnum <= MAX_REGNUM)
2497		{
2498		  COMPILE_STACK_TOP.inner_group_offset = b - bufp->buffer + 2;
2499		  BUF_PUSH_3 (start_memory, regnum, 0);
2500		}
2501
2502	      compile_stack.avail++;
2503
2504	      fixup_alt_jump = 0;
2505	      laststart = 0;
2506	      begalt = b;
2507	      /* If we've reached MAX_REGNUM groups, then this open
2508		 won't actually generate any code, so we'll have to
2509		 clear pending_exact explicitly.  */
2510	      pending_exact = 0;
2511	      break;
2512
2513
2514	    case ')':
2515	      if (syntax & RE_NO_BK_PARENS) goto normal_backslash;
2516
2517	      if (COMPILE_STACK_EMPTY) {
2518		if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
2519		  goto normal_backslash;
2520		else
2521		  FREE_STACK_RETURN (REG_ERPAREN);
2522	      }
2523
2524	    handle_close:
2525	      if (fixup_alt_jump)
2526		{ /* Push a dummy failure point at the end of the
2527		     alternative for a possible future
2528		     `pop_failure_jump' to pop.	 See comments at
2529		     `push_dummy_failure' in `re_match_2'.  */
2530		  BUF_PUSH (push_dummy_failure);
2531
2532		  /* We allocated space for this jump when we assigned
2533		     to `fixup_alt_jump', in the `handle_alt' case below.  */
2534		  STORE_JUMP (jump_past_alt, fixup_alt_jump, b - 1);
2535		}
2536
2537	      /* See similar code for backslashed left paren above.  */
2538	      if (COMPILE_STACK_EMPTY) {
2539		if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
2540		  goto normal_char;
2541		else
2542		  FREE_STACK_RETURN (REG_ERPAREN);
2543	      }
2544
2545	      /* Since we just checked for an empty stack above, this
2546		 ``can't happen''.  */
2547	      assert (compile_stack.avail != 0);
2548	      {
2549		/* We don't just want to restore into `regnum', because
2550		   later groups should continue to be numbered higher,
2551		   as in `(ab)c(de)' -- the second group is #2.	 */
2552		regnum_t this_group_regnum;
2553
2554		compile_stack.avail--;
2555		begalt = bufp->buffer + COMPILE_STACK_TOP.begalt_offset;
2556		fixup_alt_jump
2557		  = COMPILE_STACK_TOP.fixup_alt_jump
2558		    ? bufp->buffer + COMPILE_STACK_TOP.fixup_alt_jump - 1
2559		    : 0;
2560		laststart = bufp->buffer + COMPILE_STACK_TOP.laststart_offset;
2561		this_group_regnum = COMPILE_STACK_TOP.regnum;
2562		/* If we've reached MAX_REGNUM groups, then this open
2563		   won't actually generate any code, so we'll have to
2564		   clear pending_exact explicitly.  */
2565		pending_exact = 0;
2566
2567		/* We're at the end of the group, so now we know how many
2568		   groups were inside this one.	 */
2569		if (this_group_regnum <= MAX_REGNUM)
2570		  {
2571		    unsigned char *inner_group_loc
2572		      = bufp->buffer + COMPILE_STACK_TOP.inner_group_offset;
2573
2574		    *inner_group_loc = regnum - this_group_regnum;
2575		    BUF_PUSH_3 (stop_memory, this_group_regnum,
2576				regnum - this_group_regnum);
2577		  }
2578	      }
2579	      break;
2580
2581
2582	    case '|':					/* `\|'.  */
2583	      if (syntax & RE_LIMITED_OPS || syntax & RE_NO_BK_VBAR)
2584		goto normal_backslash;
2585	    handle_alt:
2586	      if (syntax & RE_LIMITED_OPS)
2587		goto normal_char;
2588
2589	      /* Insert before the previous alternative a jump which
2590		 jumps to this alternative if the former fails.	 */
2591	      GET_BUFFER_SPACE (3);
2592	      INSERT_JUMP (on_failure_jump, begalt, b + 6);
2593	      pending_exact = 0;
2594	      b += 3;
2595
2596	      /* The alternative before this one has a jump after it
2597		 which gets executed if it gets matched.  Adjust that
2598		 jump so it will jump to this alternative's analogous
2599		 jump (put in below, which in turn will jump to the next
2600		 (if any) alternative's such jump, etc.).  The last such
2601		 jump jumps to the correct final destination.  A picture:
2602			  _____ _____
2603			  |   | |   |
2604			  |   v |   v
2605			 a | b	 | c
2606
2607		 If we are at `b', then fixup_alt_jump right now points to a
2608		 three-byte space after `a'.  We'll put in the jump, set
2609		 fixup_alt_jump to right after `b', and leave behind three
2610		 bytes which we'll fill in when we get to after `c'.  */
2611
2612	      if (fixup_alt_jump)
2613		STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
2614
2615	      /* Mark and leave space for a jump after this alternative,
2616		 to be filled in later either by next alternative or
2617		 when know we're at the end of a series of alternatives.  */
2618	      fixup_alt_jump = b;
2619	      GET_BUFFER_SPACE (3);
2620	      b += 3;
2621
2622	      laststart = 0;
2623	      begalt = b;
2624	      break;
2625
2626
2627	    case '{':
2628	      /* If \{ is a literal.  */
2629	      if (!(syntax & RE_INTERVALS)
2630		     /* If we're at `\{' and it's not the open-interval
2631			operator.  */
2632		  || ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES))
2633		  || (p - 2 == pattern	&&  p == pend))
2634		goto normal_backslash;
2635
2636	    handle_interval:
2637	      {
2638		/* If got here, then the syntax allows intervals.  */
2639
2640		/* At least (most) this many matches must be made.  */
2641		int lower_bound = -1, upper_bound = -1;
2642
2643		beg_interval = p - 1;
2644
2645		if (p == pend)
2646		  {
2647		    if (syntax & RE_NO_BK_BRACES)
2648		      goto unfetch_interval;
2649		    else
2650		      FREE_STACK_RETURN (REG_EBRACE);
2651		  }
2652
2653		GET_UNSIGNED_NUMBER (lower_bound);
2654
2655		if (c == ',')
2656		  {
2657		    GET_UNSIGNED_NUMBER (upper_bound);
2658		    if (upper_bound < 0) upper_bound = RE_DUP_MAX;
2659		  }
2660		else
2661		  /* Interval such as `{1}' => match exactly once. */
2662		  upper_bound = lower_bound;
2663
2664		if (lower_bound < 0 || upper_bound > RE_DUP_MAX
2665		    || lower_bound > upper_bound)
2666		  {
2667		    if (syntax & RE_NO_BK_BRACES)
2668		      goto unfetch_interval;
2669		    else
2670		      FREE_STACK_RETURN (REG_BADBR);
2671		  }
2672
2673		if (!(syntax & RE_NO_BK_BRACES))
2674		  {
2675		    if (c != '\\') FREE_STACK_RETURN (REG_EBRACE);
2676
2677		    PATFETCH (c);
2678		  }
2679
2680		if (c != '}')
2681		  {
2682		    if (syntax & RE_NO_BK_BRACES)
2683		      goto unfetch_interval;
2684		    else
2685		      FREE_STACK_RETURN (REG_BADBR);
2686		  }
2687
2688		/* We just parsed a valid interval.  */
2689
2690		/* If it's invalid to have no preceding re.  */
2691		if (!laststart)
2692		  {
2693		    if (syntax & RE_CONTEXT_INVALID_OPS)
2694		      FREE_STACK_RETURN (REG_BADRPT);
2695		    else if (syntax & RE_CONTEXT_INDEP_OPS)
2696		      laststart = b;
2697		    else
2698		      goto unfetch_interval;
2699		  }
2700
2701		/* If the upper bound is zero, don't want to succeed at
2702		   all; jump from `laststart' to `b + 3', which will be
2703		   the end of the buffer after we insert the jump.  */
2704		 if (upper_bound == 0)
2705		   {
2706		     GET_BUFFER_SPACE (3);
2707		     INSERT_JUMP (jump, laststart, b + 3);
2708		     b += 3;
2709		   }
2710
2711		 /* Otherwise, we have a nontrivial interval.  When
2712		    we're all done, the pattern will look like:
2713		      set_number_at <jump count> <upper bound>
2714		      set_number_at <succeed_n count> <lower bound>
2715		      succeed_n <after jump addr> <succeed_n count>
2716		      <body of loop>
2717		      jump_n <succeed_n addr> <jump count>
2718		    (The upper bound and `jump_n' are omitted if
2719		    `upper_bound' is 1, though.)  */
2720		 else
2721		   { /* If the upper bound is > 1, we need to insert
2722			more at the end of the loop.  */
2723		     unsigned nbytes = 10 + (upper_bound > 1) * 10;
2724
2725		     GET_BUFFER_SPACE (nbytes);
2726
2727		     /* Initialize lower bound of the `succeed_n', even
2728			though it will be set during matching by its
2729			attendant `set_number_at' (inserted next),
2730			because `re_compile_fastmap' needs to know.
2731			Jump to the `jump_n' we might insert below.  */
2732		     INSERT_JUMP2 (succeed_n, laststart,
2733				   b + 5 + (upper_bound > 1) * 5,
2734				   lower_bound);
2735		     b += 5;
2736
2737		     /* Code to initialize the lower bound.  Insert
2738			before the `succeed_n'.	 The `5' is the last two
2739			bytes of this `set_number_at', plus 3 bytes of
2740			the following `succeed_n'.  */
2741		     insert_op2 (set_number_at, laststart, 5, lower_bound, b);
2742		     b += 5;
2743
2744		     if (upper_bound > 1)
2745		       { /* More than one repetition is allowed, so
2746			    append a backward jump to the `succeed_n'
2747			    that starts this interval.
2748
2749			    When we've reached this during matching,
2750			    we'll have matched the interval once, so
2751			    jump back only `upper_bound - 1' times.  */
2752			 STORE_JUMP2 (jump_n, b, laststart + 5,
2753				      upper_bound - 1);
2754			 b += 5;
2755
2756			 /* The location we want to set is the second
2757			    parameter of the `jump_n'; that is `b-2' as
2758			    an absolute address.  `laststart' will be
2759			    the `set_number_at' we're about to insert;
2760			    `laststart+3' the number to set, the source
2761			    for the relative address.  But we are
2762			    inserting into the middle of the pattern --
2763			    so everything is getting moved up by 5.
2764			    Conclusion: (b - 2) - (laststart + 3) + 5,
2765			    i.e., b - laststart.
2766
2767			    We insert this at the beginning of the loop
2768			    so that if we fail during matching, we'll
2769			    reinitialize the bounds.  */
2770			 insert_op2 (set_number_at, laststart, b - laststart,
2771				     upper_bound - 1, b);
2772			 b += 5;
2773		       }
2774		   }
2775		pending_exact = 0;
2776		beg_interval = NULL;
2777	      }
2778	      break;
2779
2780	    unfetch_interval:
2781	      /* If an invalid interval, match the characters as literals.  */
2782	       assert (beg_interval);
2783	       p = beg_interval;
2784	       beg_interval = NULL;
2785
2786	       /* normal_char and normal_backslash need `c'.  */
2787	       PATFETCH (c);
2788
2789	       if (!(syntax & RE_NO_BK_BRACES))
2790		 {
2791		   if (p > pattern  &&	p[-1] == '\\')
2792		     goto normal_backslash;
2793		 }
2794	       goto normal_char;
2795
2796#ifdef emacs
2797	    /* There is no way to specify the before_dot and after_dot
2798	       operators.  rms says this is ok.	 --karl	 */
2799	    case '=':
2800	      BUF_PUSH (at_dot);
2801	      break;
2802
2803	    case 's':
2804	      laststart = b;
2805	      PATFETCH (c);
2806	      BUF_PUSH_2 (syntaxspec, syntax_spec_code[c]);
2807	      break;
2808
2809	    case 'S':
2810	      laststart = b;
2811	      PATFETCH (c);
2812	      BUF_PUSH_2 (notsyntaxspec, syntax_spec_code[c]);
2813	      break;
2814
2815	    case 'c':
2816	      laststart = b;
2817	      PATFETCH_RAW (c);
2818	      BUF_PUSH_2 (categoryspec, c);
2819	      break;
2820
2821	    case 'C':
2822	      laststart = b;
2823	      PATFETCH_RAW (c);
2824	      BUF_PUSH_2 (notcategoryspec, c);
2825	      break;
2826#endif /* emacs */
2827
2828
2829	    case 'w':
2830	      laststart = b;
2831	      BUF_PUSH (wordchar);
2832	      break;
2833
2834
2835	    case 'W':
2836	      laststart = b;
2837	      BUF_PUSH (notwordchar);
2838	      break;
2839
2840
2841	    case '<':
2842	      BUF_PUSH (wordbeg);
2843	      break;
2844
2845	    case '>':
2846	      BUF_PUSH (wordend);
2847	      break;
2848
2849	    case 'b':
2850	      BUF_PUSH (wordbound);
2851	      break;
2852
2853	    case 'B':
2854	      BUF_PUSH (notwordbound);
2855	      break;
2856
2857	    case '`':
2858	      BUF_PUSH (begbuf);
2859	      break;
2860
2861	    case '\'':
2862	      BUF_PUSH (endbuf);
2863	      break;
2864
2865	    case '1': case '2': case '3': case '4': case '5':
2866	    case '6': case '7': case '8': case '9':
2867	      if (syntax & RE_NO_BK_REFS)
2868		goto normal_char;
2869
2870	      c1 = c - '0';
2871
2872	      if (c1 > regnum)
2873		FREE_STACK_RETURN (REG_ESUBREG);
2874
2875	      /* Can't back reference to a subexpression if inside of it.  */
2876	      if (group_in_compile_stack (compile_stack, c1))
2877		goto normal_char;
2878
2879	      laststart = b;
2880	      BUF_PUSH_2 (duplicate, c1);
2881	      break;
2882
2883
2884	    case '+':
2885	    case '?':
2886	      if (syntax & RE_BK_PLUS_QM)
2887		goto handle_plus;
2888	      else
2889		goto normal_backslash;
2890
2891	    default:
2892	    normal_backslash:
2893	      /* You might think it would be useful for \ to mean
2894		 not to translate; but if we don't translate it
2895		 it will never match anything.	*/
2896	      c = TRANSLATE (c);
2897	      goto normal_char;
2898	    }
2899	  break;
2900
2901
2902	default:
2903	/* Expects the character in `c'.  */
2904	normal_char:
2905	  p1 = p - 1;		/* P1 points the head of C.  */
2906#ifdef emacs
2907	  if (bufp->multibyte)
2908	    {
2909	      c = STRING_CHAR (p1, pend - p1);
2910	      c = TRANSLATE (c);
2911	      /* Set P to the next character boundary.  */
2912	      p += MULTIBYTE_FORM_LENGTH (p1, pend - p1) - 1;
2913	    }
2914#endif
2915	      /* If no exactn currently being built.  */
2916	  if (!pending_exact
2917
2918	      /* If last exactn not at current position.  */
2919	      || pending_exact + *pending_exact + 1 != b
2920
2921	      /* We have only one byte following the exactn for the count.  */
2922	      || *pending_exact >= (1 << BYTEWIDTH) - (p - p1)
2923
2924	      /* If followed by a repetition operator.	*/
2925	      || (p != pend && (*p == '*' || *p == '^'))
2926	      || ((syntax & RE_BK_PLUS_QM)
2927		  ? p + 1 < pend && *p == '\\' && (p[1] == '+' || p[1] == '?')
2928		  : p != pend && (*p == '+' || *p == '?'))
2929	      || ((syntax & RE_INTERVALS)
2930		  && ((syntax & RE_NO_BK_BRACES)
2931		      ? p != pend && *p == '{'
2932		      : p + 1 < pend && p[0] == '\\' && p[1] == '{')))
2933	    {
2934	      /* Start building a new exactn.  */
2935
2936	      laststart = b;
2937
2938	      BUF_PUSH_2 (exactn, 0);
2939	      pending_exact = b - 1;
2940	    }
2941
2942#ifdef emacs
2943	  if (! SINGLE_BYTE_CHAR_P (c))
2944	    {
2945	      unsigned char work[4], *str;
2946	      int i = CHAR_STRING (c, work, str);
2947	      int j;
2948	      for (j = 0; j < i; j++)
2949		{
2950		  BUF_PUSH (str[j]);
2951		  (*pending_exact)++;
2952		}
2953	    }
2954	  else
2955#endif
2956	    {
2957	      BUF_PUSH (c);
2958	      (*pending_exact)++;
2959	    }
2960	  break;
2961	} /* switch (c) */
2962    } /* while p != pend */
2963
2964
2965  /* Through the pattern now.  */
2966
2967  if (fixup_alt_jump)
2968    STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
2969
2970  if (!COMPILE_STACK_EMPTY)
2971    FREE_STACK_RETURN (REG_EPAREN);
2972
2973  /* If we don't want backtracking, force success
2974     the first time we reach the end of the compiled pattern.  */
2975  if (syntax & RE_NO_POSIX_BACKTRACKING)
2976    BUF_PUSH (succeed);
2977
2978  free (compile_stack.stack);
2979
2980  /* We have succeeded; set the length of the buffer.  */
2981  bufp->used = b - bufp->buffer;
2982
2983#ifdef DEBUG
2984  if (debug)
2985    {
2986      DEBUG_PRINT1 ("\nCompiled pattern: \n");
2987      print_compiled_pattern (bufp);
2988    }
2989#endif /* DEBUG */
2990
2991#ifndef MATCH_MAY_ALLOCATE
2992  /* Initialize the failure stack to the largest possible stack.  This
2993     isn't necessary unless we're trying to avoid calling alloca in
2994     the search and match routines.  */
2995  {
2996    int num_regs = bufp->re_nsub + 1;
2997
2998    if (fail_stack.size < re_max_failures * TYPICAL_FAILURE_SIZE)
2999      {
3000	fail_stack.size = re_max_failures * TYPICAL_FAILURE_SIZE;
3001
3002#ifdef emacs
3003	if (! fail_stack.stack)
3004	  fail_stack.stack
3005	    = (fail_stack_elt_t *) xmalloc (fail_stack.size
3006					    * sizeof (fail_stack_elt_t));
3007	else
3008	  fail_stack.stack
3009	    = (fail_stack_elt_t *) xrealloc (fail_stack.stack,
3010					     (fail_stack.size
3011					      * sizeof (fail_stack_elt_t)));
3012#else /* not emacs */
3013	if (! fail_stack.stack)
3014	  fail_stack.stack
3015	    = (fail_stack_elt_t *) malloc (fail_stack.size
3016					   * sizeof (fail_stack_elt_t));
3017	else
3018	  fail_stack.stack
3019	    = (fail_stack_elt_t *) realloc (fail_stack.stack,
3020					    (fail_stack.size
3021					     * sizeof (fail_stack_elt_t)));
3022#endif /* not emacs */
3023      }
3024
3025    regex_grow_registers (num_regs);
3026  }
3027#endif /* not MATCH_MAY_ALLOCATE */
3028
3029  return REG_NOERROR;
3030} /* regex_compile */
3031
3032/* Subroutines for `regex_compile'.  */
3033
3034/* Store OP at LOC followed by two-byte integer parameter ARG.	*/
3035
3036static void
3037store_op1 (op, loc, arg)
3038    re_opcode_t op;
3039    unsigned char *loc;
3040    int arg;
3041{
3042  *loc = (unsigned char) op;
3043  STORE_NUMBER (loc + 1, arg);
3044}
3045
3046
3047/* Like `store_op1', but for two two-byte parameters ARG1 and ARG2.  */
3048
3049static void
3050store_op2 (op, loc, arg1, arg2)
3051    re_opcode_t op;
3052    unsigned char *loc;
3053    int arg1, arg2;
3054{
3055  *loc = (unsigned char) op;
3056  STORE_NUMBER (loc + 1, arg1);
3057  STORE_NUMBER (loc + 3, arg2);
3058}
3059
3060
3061/* Copy the bytes from LOC to END to open up three bytes of space at LOC
3062   for OP followed by two-byte integer parameter ARG.  */
3063
3064static void
3065insert_op1 (op, loc, arg, end)
3066    re_opcode_t op;
3067    unsigned char *loc;
3068    int arg;
3069    unsigned char *end;
3070{
3071  register unsigned char *pfrom = end;
3072  register unsigned char *pto = end + 3;
3073
3074  while (pfrom != loc)
3075    *--pto = *--pfrom;
3076
3077  store_op1 (op, loc, arg);
3078}
3079
3080
3081/* Like `insert_op1', but for two two-byte parameters ARG1 and ARG2.  */
3082
3083static void
3084insert_op2 (op, loc, arg1, arg2, end)
3085    re_opcode_t op;
3086    unsigned char *loc;
3087    int arg1, arg2;
3088    unsigned char *end;
3089{
3090  register unsigned char *pfrom = end;
3091  register unsigned char *pto = end + 5;
3092
3093  while (pfrom != loc)
3094    *--pto = *--pfrom;
3095
3096  store_op2 (op, loc, arg1, arg2);
3097}
3098
3099
3100/* P points to just after a ^ in PATTERN.  Return true if that ^ comes
3101   after an alternative or a begin-subexpression.  We assume there is at
3102   least one character before the ^.  */
3103
3104static boolean
3105at_begline_loc_p (pattern, p, syntax)
3106    const char *pattern, *p;
3107    reg_syntax_t syntax;
3108{
3109  const char *prev = p - 2;
3110  boolean prev_prev_backslash = prev > pattern && prev[-1] == '\\';
3111
3112  return
3113       /* After a subexpression?  */
3114       (*prev == '(' && (syntax & RE_NO_BK_PARENS || prev_prev_backslash))
3115       /* After an alternative?	 */
3116    || (*prev == '|' && (syntax & RE_NO_BK_VBAR || prev_prev_backslash));
3117}
3118
3119
3120/* The dual of at_begline_loc_p.  This one is for $.  We assume there is
3121   at least one character after the $, i.e., `P < PEND'.  */
3122
3123static boolean
3124at_endline_loc_p (p, pend, syntax)
3125    const char *p, *pend;
3126    int syntax;
3127{
3128  const char *next = p;
3129  boolean next_backslash = *next == '\\';
3130  const char *next_next = p + 1 < pend ? p + 1 : 0;
3131
3132  return
3133       /* Before a subexpression?  */
3134       (syntax & RE_NO_BK_PARENS ? *next == ')'
3135	: next_backslash && next_next && *next_next == ')')
3136       /* Before an alternative?  */
3137    || (syntax & RE_NO_BK_VBAR ? *next == '|'
3138	: next_backslash && next_next && *next_next == '|');
3139}
3140
3141
3142/* Returns true if REGNUM is in one of COMPILE_STACK's elements and
3143   false if it's not.  */
3144
3145static boolean
3146group_in_compile_stack (compile_stack, regnum)
3147    compile_stack_type compile_stack;
3148    regnum_t regnum;
3149{
3150  int this_element;
3151
3152  for (this_element = compile_stack.avail - 1;
3153       this_element >= 0;
3154       this_element--)
3155    if (compile_stack.stack[this_element].regnum == regnum)
3156      return true;
3157
3158  return false;
3159}
3160
3161/* re_compile_fastmap computes a ``fastmap'' for the compiled pattern in
3162   BUFP.  A fastmap records which of the (1 << BYTEWIDTH) possible
3163   characters can start a string that matches the pattern.  This fastmap
3164   is used by re_search to skip quickly over impossible starting points.
3165
3166   The caller must supply the address of a (1 << BYTEWIDTH)-byte data
3167   area as BUFP->fastmap.
3168
3169   We set the `fastmap', `fastmap_accurate', and `can_be_null' fields in
3170   the pattern buffer.
3171
3172   Returns 0 if we succeed, -2 if an internal error.   */
3173
3174int
3175re_compile_fastmap (bufp)
3176     struct re_pattern_buffer *bufp;
3177{
3178  int i, j, k;
3179#ifdef MATCH_MAY_ALLOCATE
3180  fail_stack_type fail_stack;
3181#endif
3182#ifndef REGEX_MALLOC
3183  char *destination;
3184#endif
3185  /* We don't push any register information onto the failure stack.  */
3186  unsigned num_regs = 0;
3187
3188  register char *fastmap = bufp->fastmap;
3189  unsigned char *pattern = bufp->buffer;
3190  unsigned long size = bufp->used;
3191  unsigned char *p = pattern;
3192  register unsigned char *pend = pattern + size;
3193
3194  /* This holds the pointer to the failure stack, when
3195     it is allocated relocatably.  */
3196  fail_stack_elt_t *failure_stack_ptr;
3197
3198  /* Assume that each path through the pattern can be null until
3199     proven otherwise.	We set this false at the bottom of switch
3200     statement, to which we get only if a particular path doesn't
3201     match the empty string.  */
3202  boolean path_can_be_null = true;
3203
3204  /* We aren't doing a `succeed_n' to begin with.  */
3205  boolean succeed_n_p = false;
3206
3207  /* If all elements for base leading-codes in fastmap is set, this
3208     flag is set true.	*/
3209  boolean match_any_multibyte_characters = false;
3210
3211  /* Maximum code of simple (single byte) character. */
3212  int simple_char_max;
3213
3214  assert (fastmap != NULL && p != NULL);
3215
3216  INIT_FAIL_STACK ();
3217  bzero (fastmap, 1 << BYTEWIDTH);  /* Assume nothing's valid.	*/
3218  bufp->fastmap_accurate = 1;	    /* It will be when we're done.  */
3219  bufp->can_be_null = 0;
3220
3221  while (1)
3222    {
3223      if (p == pend || *p == succeed)
3224	{
3225	  /* We have reached the (effective) end of pattern.  */
3226	  if (!FAIL_STACK_EMPTY ())
3227	    {
3228	      bufp->can_be_null |= path_can_be_null;
3229
3230	      /* Reset for next path.  */
3231	      path_can_be_null = true;
3232
3233	      p = fail_stack.stack[--fail_stack.avail].pointer;
3234
3235	      continue;
3236	    }
3237	  else
3238	    break;
3239	}
3240
3241      /* We should never be about to go beyond the end of the pattern.	*/
3242      assert (p < pend);
3243
3244      switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++))
3245	{
3246
3247	/* I guess the idea here is to simply not bother with a fastmap
3248	   if a backreference is used, since it's too hard to figure out
3249	   the fastmap for the corresponding group.  Setting
3250	   `can_be_null' stops `re_search_2' from using the fastmap, so
3251	   that is all we do.  */
3252	case duplicate:
3253	  bufp->can_be_null = 1;
3254	  goto done;
3255
3256
3257      /* Following are the cases which match a character.  These end
3258	 with `break'.	*/
3259
3260	case exactn:
3261	  fastmap[p[1]] = 1;
3262	  break;
3263
3264
3265#ifndef emacs
3266	case charset:
3267	  for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
3268	    if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))
3269	      fastmap[j] = 1;
3270	  break;
3271
3272
3273	case charset_not:
3274	  /* Chars beyond end of map must be allowed.  */
3275	  for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++)
3276	    fastmap[j] = 1;
3277
3278	  for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
3279	    if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))))
3280	      fastmap[j] = 1;
3281	  break;
3282
3283
3284	case wordchar:
3285	  for (j = 0; j < (1 << BYTEWIDTH); j++)
3286	    if (SYNTAX (j) == Sword)
3287	      fastmap[j] = 1;
3288	  break;
3289
3290
3291	case notwordchar:
3292	  for (j = 0; j < (1 << BYTEWIDTH); j++)
3293	    if (SYNTAX (j) != Sword)
3294	      fastmap[j] = 1;
3295	  break;
3296#else  /* emacs */
3297	case charset:
3298	  for (j = CHARSET_BITMAP_SIZE (&p[-1]) * BYTEWIDTH - 1, p++;
3299	       j >= 0; j--)
3300	    if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))
3301	      fastmap[j] = 1;
3302
3303	  if (CHARSET_RANGE_TABLE_EXISTS_P (&p[-2])
3304	      && match_any_multibyte_characters == false)
3305	    {
3306	      /* Set fastmap[I] 1 where I is a base leading code of each
3307		 multibyte character in the range table. */
3308	      int c, count;
3309
3310	      /* Make P points the range table. */
3311	      p += CHARSET_BITMAP_SIZE (&p[-2]);
3312
3313	      /* Extract the number of ranges in range table into
3314		 COUNT.	 */
3315	      EXTRACT_NUMBER_AND_INCR (count, p);
3316	      for (; count > 0; count--, p += 2 * 3) /* XXX */
3317		{
3318		  /* Extract the start of each range.  */
3319		  EXTRACT_CHARACTER (c, p);
3320		  j = CHAR_CHARSET (c);
3321		  fastmap[CHARSET_LEADING_CODE_BASE (j)] = 1;
3322		}
3323	    }
3324	  break;
3325
3326
3327	case charset_not:
3328	  /* Chars beyond end of bitmap are possible matches.
3329	     All the single-byte codes can occur in multibyte buffers.
3330	     So any that are not listed in the charset
3331	     are possible matches, even in multibyte buffers.  */
3332	  simple_char_max = (1 << BYTEWIDTH);
3333	  for (j = CHARSET_BITMAP_SIZE (&p[-1]) * BYTEWIDTH;
3334	       j < simple_char_max; j++)
3335	    fastmap[j] = 1;
3336
3337	  for (j = CHARSET_BITMAP_SIZE (&p[-1]) * BYTEWIDTH - 1, p++;
3338	       j >= 0; j--)
3339	    if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))))
3340	      fastmap[j] = 1;
3341
3342	  if (bufp->multibyte)
3343	    /* Any character set can possibly contain a character
3344	       which doesn't match the specified set of characters.  */
3345	    {
3346	    set_fastmap_for_multibyte_characters:
3347	      if (match_any_multibyte_characters == false)
3348		{
3349		  for (j = 0x80; j < 0xA0; j++)	/* XXX */
3350		    if (BASE_LEADING_CODE_P (j))
3351		      fastmap[j] = 1;
3352		  match_any_multibyte_characters = true;
3353		}
3354	    }
3355	  break;
3356
3357
3358	case wordchar:
3359	  /* All the single-byte codes can occur in multibyte buffers,
3360	     and they may have word syntax.  So do consider them.  */
3361	  simple_char_max = (1 << BYTEWIDTH);
3362	  for (j = 0; j < simple_char_max; j++)
3363	    if (SYNTAX (j) == Sword)
3364	      fastmap[j] = 1;
3365
3366	  if (bufp->multibyte)
3367	    /* Any character set can possibly contain a character
3368	       whose syntax is `Sword'.	 */
3369	    goto set_fastmap_for_multibyte_characters;
3370	  break;
3371
3372
3373	case notwordchar:
3374	  /* All the single-byte codes can occur in multibyte buffers,
3375	     and they may not have word syntax.  So do consider them.  */
3376	  simple_char_max = (1 << BYTEWIDTH);
3377	  for (j = 0; j < simple_char_max; j++)
3378	    if (SYNTAX (j) != Sword)
3379	      fastmap[j] = 1;
3380
3381	  if (bufp->multibyte)
3382	    /* Any character set can possibly contain a character
3383	       whose syntax is not `Sword'.  */
3384	    goto set_fastmap_for_multibyte_characters;
3385	  break;
3386#endif
3387
3388	case anychar:
3389	  {
3390	    int fastmap_newline = fastmap['\n'];
3391
3392	    /* `.' matches anything, except perhaps newline.
3393	       Even in a multibyte buffer, it should match any
3394	       conceivable byte value for the fastmap.  */
3395	    if (bufp->multibyte)
3396	      match_any_multibyte_characters = true;
3397
3398	    simple_char_max = (1 << BYTEWIDTH);
3399	    for (j = 0; j < simple_char_max; j++)
3400	      fastmap[j] = 1;
3401
3402	    /* ... except perhaps newline.  */
3403	    if (!(bufp->syntax & RE_DOT_NEWLINE))
3404	      fastmap['\n'] = fastmap_newline;
3405
3406	    /* Return if we have already set `can_be_null'; if we have,
3407	       then the fastmap is irrelevant.	Something's wrong here.	 */
3408	    else if (bufp->can_be_null)
3409	      goto done;
3410
3411	    /* Otherwise, have to check alternative paths.  */
3412	    break;
3413	  }
3414
3415#ifdef emacs
3416	case wordbound:
3417	case notwordbound:
3418	case wordbeg:
3419	case wordend:
3420	case notsyntaxspec:
3421	case syntaxspec:
3422	  /* This match depends on text properties.  These end with
3423	     aborting optimizations.  */
3424	  bufp->can_be_null = 1;
3425	  goto done;
3426#if 0
3427	  k = *p++;
3428	  simple_char_max = bufp->multibyte ? 0x80 : (1 << BYTEWIDTH);
3429	  for (j = 0; j < simple_char_max; j++)
3430	    if (SYNTAX (j) == (enum syntaxcode) k)
3431	      fastmap[j] = 1;
3432
3433	  if (bufp->multibyte)
3434	    /* Any character set can possibly contain a character
3435	       whose syntax is K.  */
3436	    goto set_fastmap_for_multibyte_characters;
3437	  break;
3438
3439	case notsyntaxspec:
3440	  k = *p++;
3441	  simple_char_max = bufp->multibyte ? 0x80 : (1 << BYTEWIDTH);
3442	  for (j = 0; j < simple_char_max; j++)
3443	    if (SYNTAX (j) != (enum syntaxcode) k)
3444	      fastmap[j] = 1;
3445
3446	  if (bufp->multibyte)
3447	    /* Any character set can possibly contain a character
3448	       whose syntax is not K.  */
3449	    goto set_fastmap_for_multibyte_characters;
3450	  break;
3451#endif
3452
3453
3454	case categoryspec:
3455	  k = *p++;
3456	  simple_char_max = (1 << BYTEWIDTH);
3457	  for (j = 0; j < simple_char_max; j++)
3458	    if (CHAR_HAS_CATEGORY (j, k))
3459	      fastmap[j] = 1;
3460
3461	  if (bufp->multibyte)
3462	    /* Any character set can possibly contain a character
3463	       whose category is K.  */
3464	    goto set_fastmap_for_multibyte_characters;
3465	  break;
3466
3467
3468	case notcategoryspec:
3469	  k = *p++;
3470	  simple_char_max = (1 << BYTEWIDTH);
3471	  for (j = 0; j < simple_char_max; j++)
3472	    if (!CHAR_HAS_CATEGORY (j, k))
3473	      fastmap[j] = 1;
3474
3475	  if (bufp->multibyte)
3476	    /* Any character set can possibly contain a character
3477	       whose category is not K.	 */
3478	    goto set_fastmap_for_multibyte_characters;
3479	  break;
3480
3481      /* All cases after this match the empty string.  These end with
3482	 `continue'.  */
3483
3484
3485	case before_dot:
3486	case at_dot:
3487	case after_dot:
3488	  continue;
3489#endif /* emacs */
3490
3491
3492	case no_op:
3493	case begline:
3494	case endline:
3495	case begbuf:
3496	case endbuf:
3497#ifndef emacs
3498	case wordbound:
3499	case notwordbound:
3500	case wordbeg:
3501	case wordend:
3502#endif
3503	case push_dummy_failure:
3504	  continue;
3505
3506
3507	case jump_n:
3508	case pop_failure_jump:
3509	case maybe_pop_jump:
3510	case jump:
3511	case jump_past_alt:
3512	case dummy_failure_jump:
3513	  EXTRACT_NUMBER_AND_INCR (j, p);
3514	  p += j;
3515	  if (j > 0)
3516	    continue;
3517
3518	  /* Jump backward implies we just went through the body of a
3519	     loop and matched nothing.	Opcode jumped to should be
3520	     `on_failure_jump' or `succeed_n'.	Just treat it like an
3521	     ordinary jump.  For a * loop, it has pushed its failure
3522	     point already; if so, discard that as redundant.  */
3523	  if ((re_opcode_t) *p != on_failure_jump
3524	      && (re_opcode_t) *p != succeed_n)
3525	    continue;
3526
3527	  p++;
3528	  EXTRACT_NUMBER_AND_INCR (j, p);
3529	  p += j;
3530
3531	  /* If what's on the stack is where we are now, pop it.  */
3532	  if (!FAIL_STACK_EMPTY ()
3533	      && fail_stack.stack[fail_stack.avail - 1].pointer == p)
3534	    fail_stack.avail--;
3535
3536	  continue;
3537
3538
3539	case on_failure_jump:
3540	case on_failure_keep_string_jump:
3541	handle_on_failure_jump:
3542	  EXTRACT_NUMBER_AND_INCR (j, p);
3543
3544	  /* For some patterns, e.g., `(a?)?', `p+j' here points to the
3545	     end of the pattern.  We don't want to push such a point,
3546	     since when we restore it above, entering the switch will
3547	     increment `p' past the end of the pattern.	 We don't need
3548	     to push such a point since we obviously won't find any more
3549	     fastmap entries beyond `pend'.  Such a pattern can match
3550	     the null string, though.  */
3551	  if (p + j < pend)
3552	    {
3553	      if (!PUSH_PATTERN_OP (p + j, fail_stack))
3554		{
3555		  RESET_FAIL_STACK ();
3556		  return -2;
3557		}
3558	    }
3559	  else
3560	    bufp->can_be_null = 1;
3561
3562	  if (succeed_n_p)
3563	    {
3564	      EXTRACT_NUMBER_AND_INCR (k, p);	/* Skip the n.	*/
3565	      succeed_n_p = false;
3566	    }
3567
3568	  continue;
3569
3570
3571	case succeed_n:
3572	  /* Get to the number of times to succeed.  */
3573	  p += 2;
3574
3575	  /* Increment p past the n for when k != 0.  */
3576	  EXTRACT_NUMBER_AND_INCR (k, p);
3577	  if (k == 0)
3578	    {
3579	      p -= 4;
3580	      succeed_n_p = true;  /* Spaghetti code alert.  */
3581	      goto handle_on_failure_jump;
3582	    }
3583	  continue;
3584
3585
3586	case set_number_at:
3587	  p += 4;
3588	  continue;
3589
3590
3591	case start_memory:
3592	case stop_memory:
3593	  p += 2;
3594	  continue;
3595
3596
3597	default:
3598	  abort (); /* We have listed all the cases.  */
3599	} /* switch *p++ */
3600
3601      /* Getting here means we have found the possible starting
3602	 characters for one path of the pattern -- and that the empty
3603	 string does not match.	 We need not follow this path further.
3604	 Instead, look at the next alternative (remembered on the
3605	 stack), or quit if no more.  The test at the top of the loop
3606	 does these things.  */
3607      path_can_be_null = false;
3608      p = pend;
3609    } /* while p */
3610
3611  /* Set `can_be_null' for the last path (also the first path, if the
3612     pattern is empty).	 */
3613  bufp->can_be_null |= path_can_be_null;
3614
3615 done:
3616  RESET_FAIL_STACK ();
3617  return 0;
3618} /* re_compile_fastmap */
3619
3620/* Set REGS to hold NUM_REGS registers, storing them in STARTS and
3621   ENDS.  Subsequent matches using PATTERN_BUFFER and REGS will use
3622   this memory for recording register information.  STARTS and ENDS
3623   must be allocated using the malloc library routine, and must each
3624   be at least NUM_REGS * sizeof (regoff_t) bytes long.
3625
3626   If NUM_REGS == 0, then subsequent matches should allocate their own
3627   register data.
3628
3629   Unless this function is called, the first search or match using
3630   PATTERN_BUFFER will allocate its own register data, without
3631   freeing the old data.  */
3632
3633void
3634re_set_registers (bufp, regs, num_regs, starts, ends)
3635    struct re_pattern_buffer *bufp;
3636    struct re_registers *regs;
3637    unsigned num_regs;
3638    regoff_t *starts, *ends;
3639{
3640  if (num_regs)
3641    {
3642      bufp->regs_allocated = REGS_REALLOCATE;
3643      regs->num_regs = num_regs;
3644      regs->start = starts;
3645      regs->end = ends;
3646    }
3647  else
3648    {
3649      bufp->regs_allocated = REGS_UNALLOCATED;
3650      regs->num_regs = 0;
3651      regs->start = regs->end = (regoff_t *) 0;
3652    }
3653}
3654
3655/* Searching routines.	*/
3656
3657/* Like re_search_2, below, but only one string is specified, and
3658   doesn't let you say where to stop matching. */
3659
3660int
3661re_search (bufp, string, size, startpos, range, regs)
3662     struct re_pattern_buffer *bufp;
3663     const char *string;
3664     int size, startpos, range;
3665     struct re_registers *regs;
3666{
3667  return re_search_2 (bufp, NULL, 0, string, size, startpos, range,
3668		      regs, size);
3669}
3670
3671/* End address of virtual concatenation of string.  */
3672#define STOP_ADDR_VSTRING(P)				\
3673  (((P) >= size1 ? string2 + size2 : string1 + size1))
3674
3675/* Address of POS in the concatenation of virtual string. */
3676#define POS_ADDR_VSTRING(POS)					\
3677  (((POS) >= size1 ? string2 - size1 : string1) + (POS))
3678
3679/* Using the compiled pattern in BUFP->buffer, first tries to match the
3680   virtual concatenation of STRING1 and STRING2, starting first at index
3681   STARTPOS, then at STARTPOS + 1, and so on.
3682
3683   STRING1 and STRING2 have length SIZE1 and SIZE2, respectively.
3684
3685   RANGE is how far to scan while trying to match.  RANGE = 0 means try
3686   only at STARTPOS; in general, the last start tried is STARTPOS +
3687   RANGE.
3688
3689   In REGS, return the indices of the virtual concatenation of STRING1
3690   and STRING2 that matched the entire BUFP->buffer and its contained
3691   subexpressions.
3692
3693   Do not consider matching one past the index STOP in the virtual
3694   concatenation of STRING1 and STRING2.
3695
3696   We return either the position in the strings at which the match was
3697   found, -1 if no match, or -2 if error (such as failure
3698   stack overflow).  */
3699
3700int
3701re_search_2 (bufp, string1, size1, string2, size2, startpos, range, regs, stop)
3702     struct re_pattern_buffer *bufp;
3703     const char *string1, *string2;
3704     int size1, size2;
3705     int startpos;
3706     int range;
3707     struct re_registers *regs;
3708     int stop;
3709{
3710  int val;
3711  register char *fastmap = bufp->fastmap;
3712  register RE_TRANSLATE_TYPE translate = bufp->translate;
3713  int total_size = size1 + size2;
3714  int endpos = startpos + range;
3715  int anchored_start = 0;
3716
3717  /* Nonzero if we have to concern multibyte character.	 */
3718  int multibyte = bufp->multibyte;
3719
3720  /* Check for out-of-range STARTPOS.  */
3721  if (startpos < 0 || startpos > total_size)
3722    return -1;
3723
3724  /* Fix up RANGE if it might eventually take us outside
3725     the virtual concatenation of STRING1 and STRING2.
3726     Make sure we won't move STARTPOS below 0 or above TOTAL_SIZE.  */
3727  if (endpos < 0)
3728    range = 0 - startpos;
3729  else if (endpos > total_size)
3730    range = total_size - startpos;
3731
3732  /* If the search isn't to be a backwards one, don't waste time in a
3733     search for a pattern anchored at beginning of buffer.  */
3734  if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == begbuf && range > 0)
3735    {
3736      if (startpos > 0)
3737	return -1;
3738      else
3739	range = 0;
3740    }
3741
3742#ifdef emacs
3743  /* In a forward search for something that starts with \=.
3744     don't keep searching past point.  */
3745  if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == at_dot && range > 0)
3746    {
3747      range = PT_BYTE - BEGV_BYTE - startpos;
3748      if (range < 0)
3749	return -1;
3750    }
3751#endif /* emacs */
3752
3753  /* Update the fastmap now if not correct already.  */
3754  if (fastmap && !bufp->fastmap_accurate)
3755    if (re_compile_fastmap (bufp) == -2)
3756      return -2;
3757
3758  /* See whether the pattern is anchored.  */
3759  if (bufp->buffer[0] == begline)
3760    anchored_start = 1;
3761
3762#ifdef emacs
3763  gl_state.object = re_match_object;
3764  {
3765    int adjpos = NILP (re_match_object) || BUFFERP (re_match_object);
3766    int charpos = SYNTAX_TABLE_BYTE_TO_CHAR (startpos + adjpos);
3767
3768    SETUP_SYNTAX_TABLE_FOR_OBJECT (re_match_object, charpos, 1);
3769  }
3770#endif
3771
3772  /* Loop through the string, looking for a place to start matching.  */
3773  for (;;)
3774    {
3775      /* If the pattern is anchored,
3776	 skip quickly past places we cannot match.
3777	 We don't bother to treat startpos == 0 specially
3778	 because that case doesn't repeat.  */
3779      if (anchored_start && startpos > 0)
3780	{
3781	  if (! (bufp->newline_anchor
3782		 && ((startpos <= size1 ? string1[startpos - 1]
3783		      : string2[startpos - size1 - 1])
3784		     == '\n')))
3785	    goto advance;
3786	}
3787
3788      /* If a fastmap is supplied, skip quickly over characters that
3789	 cannot be the start of a match.  If the pattern can match the
3790	 null string, however, we don't need to skip characters; we want
3791	 the first null string.	 */
3792      if (fastmap && startpos < total_size && !bufp->can_be_null)
3793	{
3794	  register const char *d;
3795	  register unsigned int buf_ch;
3796
3797	  d = POS_ADDR_VSTRING (startpos);
3798
3799	  if (range > 0)	/* Searching forwards.	*/
3800	    {
3801	      register int lim = 0;
3802	      int irange = range;
3803
3804	      if (startpos < size1 && startpos + range >= size1)
3805		lim = range - (size1 - startpos);
3806
3807	      /* Written out as an if-else to avoid testing `translate'
3808		 inside the loop.  */
3809	      if (RE_TRANSLATE_P (translate))
3810		{
3811		  if (multibyte)
3812		    while (range > lim)
3813		      {
3814			int buf_charlen;
3815
3816			buf_ch = STRING_CHAR_AND_LENGTH (d, range - lim,
3817							 buf_charlen);
3818
3819			buf_ch = RE_TRANSLATE (translate, buf_ch);
3820			if (buf_ch >= 0400
3821			    || fastmap[buf_ch])
3822			  break;
3823
3824			range -= buf_charlen;
3825			d += buf_charlen;
3826		      }
3827		  else
3828		    while (range > lim
3829			   && !fastmap[(unsigned char)
3830				       RE_TRANSLATE (translate, (unsigned char) *d)])
3831		      {
3832			d++;
3833			range--;
3834		      }
3835		}
3836	      else
3837		while (range > lim && !fastmap[(unsigned char) *d])
3838		  {
3839		    d++;
3840		    range--;
3841		  }
3842
3843	      startpos += irange - range;
3844	    }
3845	  else				/* Searching backwards.	 */
3846	    {
3847	      int room = (size1 == 0 || startpos >= size1
3848			  ? size2 + size1 - startpos
3849			  : size1 - startpos);
3850
3851	      buf_ch = STRING_CHAR (d, room);
3852	      if (RE_TRANSLATE_P (translate))
3853		buf_ch = RE_TRANSLATE (translate, buf_ch);
3854
3855	      if (! (buf_ch >= 0400
3856		     || fastmap[buf_ch]))
3857		goto advance;
3858	    }
3859	}
3860
3861      /* If can't match the null string, and that's all we have left, fail.  */
3862      if (range >= 0 && startpos == total_size && fastmap
3863	  && !bufp->can_be_null)
3864	return -1;
3865
3866      val = re_match_2_internal (bufp, string1, size1, string2, size2,
3867				 startpos, regs, stop);
3868#ifndef REGEX_MALLOC
3869#ifdef C_ALLOCA
3870      alloca (0);
3871#endif
3872#endif
3873
3874      if (val >= 0)
3875	return startpos;
3876
3877      if (val == -2)
3878	return -2;
3879
3880    advance:
3881      if (!range)
3882	break;
3883      else if (range > 0)
3884	{
3885	  /* Update STARTPOS to the next character boundary.  */
3886	  if (multibyte)
3887	    {
3888	      const unsigned char *p
3889		= (const unsigned char *) POS_ADDR_VSTRING (startpos);
3890	      const unsigned char *pend
3891		= (const unsigned char *) STOP_ADDR_VSTRING (startpos);
3892	      int len = MULTIBYTE_FORM_LENGTH (p, pend - p);
3893
3894	      range -= len;
3895	      if (range < 0)
3896		break;
3897	      startpos += len;
3898	    }
3899	  else
3900	    {
3901	      range--;
3902	      startpos++;
3903	    }
3904	}
3905      else
3906	{
3907	  range++;
3908	  startpos--;
3909
3910	  /* Update STARTPOS to the previous character boundary.  */
3911	  if (multibyte)
3912	    {
3913	      const unsigned char *p
3914		= (const unsigned char *) POS_ADDR_VSTRING (startpos);
3915	      int len = 0;
3916
3917	      /* Find the head of multibyte form.  */
3918	      while (!CHAR_HEAD_P (*p))
3919		p--, len++;
3920
3921	      /* Adjust it. */
3922#if 0				/* XXX */
3923	      if (MULTIBYTE_FORM_LENGTH (p, len + 1) != (len + 1))
3924		;
3925	      else
3926#endif
3927		{
3928		  range += len;
3929		  if (range > 0)
3930		    break;
3931
3932		  startpos -= len;
3933		}
3934	    }
3935	}
3936    }
3937  return -1;
3938} /* re_search_2 */
3939
3940/* Declarations and macros for re_match_2.  */
3941
3942static int bcmp_translate ();
3943static boolean alt_match_null_string_p (),
3944	       common_op_match_null_string_p (),
3945	       group_match_null_string_p ();
3946
3947/* This converts PTR, a pointer into one of the search strings `string1'
3948   and `string2' into an offset from the beginning of that string.  */
3949#define POINTER_TO_OFFSET(ptr)			\
3950  (FIRST_STRING_P (ptr)				\
3951   ? ((regoff_t) ((ptr) - string1))		\
3952   : ((regoff_t) ((ptr) - string2 + size1)))
3953
3954/* Macros for dealing with the split strings in re_match_2.  */
3955
3956#define MATCHING_IN_FIRST_STRING  (dend == end_match_1)
3957
3958/* Call before fetching a character with *d.  This switches over to
3959   string2 if necessary.  */
3960#define PREFETCH()							\
3961  while (d == dend)							\
3962    {									\
3963      /* End of string2 => fail.  */					\
3964      if (dend == end_match_2)						\
3965	goto fail;							\
3966      /* End of string1 => advance to string2.	*/			\
3967      d = string2;							\
3968      dend = end_match_2;						\
3969    }
3970
3971
3972/* Test if at very beginning or at very end of the virtual concatenation
3973   of `string1' and `string2'.	If only one string, it's `string2'.  */
3974#define AT_STRINGS_BEG(d) ((d) == (size1 ? string1 : string2) || !size2)
3975#define AT_STRINGS_END(d) ((d) == end2)
3976
3977
3978/* Test if D points to a character which is word-constituent.  We have
3979   two special cases to check for: if past the end of string1, look at
3980   the first character in string2; and if before the beginning of
3981   string2, look at the last character in string1.  */
3982#define WORDCHAR_P(d)							\
3983  (SYNTAX ((d) == end1 ? *string2					\
3984	   : (d) == string2 - 1 ? *(end1 - 1) : *(d))			\
3985   == Sword)
3986
3987/* Disabled due to a compiler bug -- see comment at case wordbound */
3988
3989/* The comment at case wordbound is following one, but we don't use
3990   AT_WORD_BOUNDARY anymore to support multibyte form.
3991
3992   The DEC Alpha C compiler 3.x generates incorrect code for the
3993   test	 WORDCHAR_P (d - 1) != WORDCHAR_P (d)  in the expansion of
3994   AT_WORD_BOUNDARY, so this code is disabled.	Expanding the
3995   macro and introducing temporary variables works around the bug.  */
3996
3997#if 0
3998/* Test if the character before D and the one at D differ with respect
3999   to being word-constituent.  */
4000#define AT_WORD_BOUNDARY(d)						\
4001  (AT_STRINGS_BEG (d) || AT_STRINGS_END (d)				\
4002   || WORDCHAR_P (d - 1) != WORDCHAR_P (d))
4003#endif
4004
4005/* Free everything we malloc.  */
4006#ifdef MATCH_MAY_ALLOCATE
4007#define FREE_VAR(var) if (var) { REGEX_FREE (var); var = NULL; } else
4008#define FREE_VARIABLES()						\
4009  do {									\
4010    REGEX_FREE_STACK (fail_stack.stack);				\
4011    FREE_VAR (regstart);						\
4012    FREE_VAR (regend);							\
4013    FREE_VAR (old_regstart);						\
4014    FREE_VAR (old_regend);						\
4015    FREE_VAR (best_regstart);						\
4016    FREE_VAR (best_regend);						\
4017    FREE_VAR (reg_info);						\
4018    FREE_VAR (reg_dummy);						\
4019    FREE_VAR (reg_info_dummy);						\
4020  } while (0)
4021#else
4022#define FREE_VARIABLES() ((void)0) /* Do nothing!  But inhibit gcc warning.  */
4023#endif /* not MATCH_MAY_ALLOCATE */
4024
4025/* These values must meet several constraints.	They must not be valid
4026   register values; since we have a limit of 255 registers (because
4027   we use only one byte in the pattern for the register number), we can
4028   use numbers larger than 255.	 They must differ by 1, because of
4029   NUM_FAILURE_ITEMS above.  And the value for the lowest register must
4030   be larger than the value for the highest register, so we do not try
4031   to actually save any registers when none are active.	 */
4032#define NO_HIGHEST_ACTIVE_REG (1 << BYTEWIDTH)
4033#define NO_LOWEST_ACTIVE_REG (NO_HIGHEST_ACTIVE_REG + 1)
4034
4035/* Matching routines.  */
4036
4037#ifndef emacs	/* Emacs never uses this.  */
4038/* re_match is like re_match_2 except it takes only a single string.  */
4039
4040int
4041re_match (bufp, string, size, pos, regs)
4042     struct re_pattern_buffer *bufp;
4043     const char *string;
4044     int size, pos;
4045     struct re_registers *regs;
4046{
4047  int result = re_match_2_internal (bufp, NULL, 0, string, size,
4048				    pos, regs, size);
4049#ifndef REGEX_MALLOC	/* CVS */
4050#ifdef C_ALLOCA		/* CVS */
4051  alloca (0);
4052#endif			/* CVS */
4053#endif			/* CVS */
4054  return result;
4055}
4056#endif /* not emacs */
4057
4058#ifdef emacs
4059/* In Emacs, this is the string or buffer in which we
4060   are matching.  It is used for looking up syntax properties.	*/
4061Lisp_Object re_match_object;
4062#endif
4063
4064/* re_match_2 matches the compiled pattern in BUFP against the
4065   the (virtual) concatenation of STRING1 and STRING2 (of length SIZE1
4066   and SIZE2, respectively).  We start matching at POS, and stop
4067   matching at STOP.
4068
4069   If REGS is non-null and the `no_sub' field of BUFP is nonzero, we
4070   store offsets for the substring each group matched in REGS.	See the
4071   documentation for exactly how many groups we fill.
4072
4073   We return -1 if no match, -2 if an internal error (such as the
4074   failure stack overflowing).	Otherwise, we return the length of the
4075   matched substring.  */
4076
4077int
4078re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
4079     struct re_pattern_buffer *bufp;
4080     const char *string1, *string2;
4081     int size1, size2;
4082     int pos;
4083     struct re_registers *regs;
4084     int stop;
4085{
4086  int result;
4087
4088#ifdef emacs
4089  int charpos;
4090  int adjpos = NILP (re_match_object) || BUFFERP (re_match_object);
4091  gl_state.object = re_match_object;
4092  charpos = SYNTAX_TABLE_BYTE_TO_CHAR (pos + adjpos);
4093  SETUP_SYNTAX_TABLE_FOR_OBJECT (re_match_object, charpos, 1);
4094#endif
4095
4096  result = re_match_2_internal (bufp, string1, size1, string2, size2,
4097				pos, regs, stop);
4098#ifndef REGEX_MALLOC	/* CVS */
4099#ifdef C_ALLOCA		/* CVS */
4100  alloca (0);
4101#endif			/* CVS */
4102#endif			/* CVS */
4103  return result;
4104}
4105
4106/* This is a separate function so that we can force an alloca cleanup
4107   afterwards.	*/
4108static int
4109re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
4110     struct re_pattern_buffer *bufp;
4111     const char *string1, *string2;
4112     int size1, size2;
4113     int pos;
4114     struct re_registers *regs;
4115     int stop;
4116{
4117  /* General temporaries.  */
4118  int mcnt;
4119  unsigned char *p1;
4120
4121  /* Just past the end of the corresponding string.  */
4122  const char *end1, *end2;
4123
4124  /* Pointers into string1 and string2, just past the last characters in
4125     each to consider matching.	 */
4126  const char *end_match_1, *end_match_2;
4127
4128  /* Where we are in the data, and the end of the current string.  */
4129  const char *d, *dend;
4130
4131  /* Where we are in the pattern, and the end of the pattern.  */
4132  unsigned char *p = bufp->buffer;
4133  register unsigned char *pend = p + bufp->used;
4134
4135  /* Mark the opcode just after a start_memory, so we can test for an
4136     empty subpattern when we get to the stop_memory.  */
4137  unsigned char *just_past_start_mem = 0;
4138
4139  /* We use this to map every character in the string.	*/
4140  RE_TRANSLATE_TYPE translate = bufp->translate;
4141
4142  /* Nonzero if we have to concern multibyte character.	 */
4143  int multibyte = bufp->multibyte;
4144
4145  /* Failure point stack.  Each place that can handle a failure further
4146     down the line pushes a failure point on this stack.  It consists of
4147     restart, regend, and reg_info for all registers corresponding to
4148     the subexpressions we're currently inside, plus the number of such
4149     registers, and, finally, two char *'s.  The first char * is where
4150     to resume scanning the pattern; the second one is where to resume
4151     scanning the strings.  If the latter is zero, the failure point is
4152     a ``dummy''; if a failure happens and the failure point is a dummy,
4153     it gets discarded and the next next one is tried.	*/
4154#ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global.	 */
4155  fail_stack_type fail_stack;
4156#endif
4157#ifdef DEBUG
4158  static unsigned failure_id = 0;
4159  unsigned nfailure_points_pushed = 0, nfailure_points_popped = 0;
4160#endif
4161
4162  /* This holds the pointer to the failure stack, when
4163     it is allocated relocatably.  */
4164  fail_stack_elt_t *failure_stack_ptr;
4165
4166  /* We fill all the registers internally, independent of what we
4167     return, for use in backreferences.	 The number here includes
4168     an element for register zero.  */
4169  unsigned num_regs = bufp->re_nsub + 1;
4170
4171  /* The currently active registers.  */
4172  unsigned lowest_active_reg = NO_LOWEST_ACTIVE_REG;
4173  unsigned highest_active_reg = NO_HIGHEST_ACTIVE_REG;
4174
4175  /* Information on the contents of registers. These are pointers into
4176     the input strings; they record just what was matched (on this
4177     attempt) by a subexpression part of the pattern, that is, the
4178     regnum-th regstart pointer points to where in the pattern we began
4179     matching and the regnum-th regend points to right after where we
4180     stopped matching the regnum-th subexpression.  (The zeroth register
4181     keeps track of what the whole pattern matches.)  */
4182#ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global.  */
4183  const char **regstart, **regend;
4184#endif
4185
4186  /* If a group that's operated upon by a repetition operator fails to
4187     match anything, then the register for its start will need to be
4188     restored because it will have been set to wherever in the string we
4189     are when we last see its open-group operator.  Similarly for a
4190     register's end.  */
4191#ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global.  */
4192  const char **old_regstart, **old_regend;
4193#endif
4194
4195  /* The is_active field of reg_info helps us keep track of which (possibly
4196     nested) subexpressions we are currently in. The matched_something
4197     field of reg_info[reg_num] helps us tell whether or not we have
4198     matched any of the pattern so far this time through the reg_num-th
4199     subexpression.  These two fields get reset each time through any
4200     loop their register is in.	 */
4201#ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global.	 */
4202  register_info_type *reg_info;
4203#endif
4204
4205  /* The following record the register info as found in the above
4206     variables when we find a match better than any we've seen before.
4207     This happens as we backtrack through the failure points, which in
4208     turn happens only if we have not yet matched the entire string. */
4209  unsigned best_regs_set = false;
4210#ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global.  */
4211  const char **best_regstart, **best_regend;
4212#endif
4213
4214  /* Logically, this is `best_regend[0]'.  But we don't want to have to
4215     allocate space for that if we're not allocating space for anything
4216     else (see below).	Also, we never need info about register 0 for
4217     any of the other register vectors, and it seems rather a kludge to
4218     treat `best_regend' differently than the rest.  So we keep track of
4219     the end of the best match so far in a separate variable.  We
4220     initialize this to NULL so that when we backtrack the first time
4221     and need to test it, it's not garbage.  */
4222  const char *match_end = NULL;
4223
4224  /* This helps SET_REGS_MATCHED avoid doing redundant work.  */
4225  int set_regs_matched_done = 0;
4226
4227  /* Used when we pop values we don't care about.  */
4228#ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global.  */
4229  const char **reg_dummy;
4230  register_info_type *reg_info_dummy;
4231#endif
4232
4233#ifdef DEBUG
4234  /* Counts the total number of registers pushed.  */
4235  unsigned num_regs_pushed = 0;
4236#endif
4237
4238  DEBUG_PRINT1 ("\n\nEntering re_match_2.\n");
4239
4240  INIT_FAIL_STACK ();
4241
4242#ifdef MATCH_MAY_ALLOCATE
4243  /* Do not bother to initialize all the register variables if there are
4244     no groups in the pattern, as it takes a fair amount of time.  If
4245     there are groups, we include space for register 0 (the whole
4246     pattern), even though we never use it, since it simplifies the
4247     array indexing.  We should fix this.  */
4248  if (bufp->re_nsub)
4249    {
4250      regstart = REGEX_TALLOC (num_regs, const char *);
4251      regend = REGEX_TALLOC (num_regs, const char *);
4252      old_regstart = REGEX_TALLOC (num_regs, const char *);
4253      old_regend = REGEX_TALLOC (num_regs, const char *);
4254      best_regstart = REGEX_TALLOC (num_regs, const char *);
4255      best_regend = REGEX_TALLOC (num_regs, const char *);
4256      reg_info = REGEX_TALLOC (num_regs, register_info_type);
4257      reg_dummy = REGEX_TALLOC (num_regs, const char *);
4258      reg_info_dummy = REGEX_TALLOC (num_regs, register_info_type);
4259
4260      if (!(regstart && regend && old_regstart && old_regend && reg_info
4261	    && best_regstart && best_regend && reg_dummy && reg_info_dummy))
4262	{
4263	  FREE_VARIABLES ();
4264	  return -2;
4265	}
4266    }
4267  else
4268    {
4269      /* We must initialize all our variables to NULL, so that
4270	 `FREE_VARIABLES' doesn't try to free them.  */
4271      regstart = regend = old_regstart = old_regend = best_regstart
4272	= best_regend = reg_dummy = NULL;
4273      reg_info = reg_info_dummy = (register_info_type *) NULL;
4274    }
4275#endif /* MATCH_MAY_ALLOCATE */
4276
4277  /* The starting position is bogus.  */
4278  if (pos < 0 || pos > size1 + size2)
4279    {
4280      FREE_VARIABLES ();
4281      return -1;
4282    }
4283
4284  /* Initialize subexpression text positions to -1 to mark ones that no
4285     start_memory/stop_memory has been seen for. Also initialize the
4286     register information struct.  */
4287  for (mcnt = 1; mcnt < num_regs; mcnt++)
4288    {
4289      regstart[mcnt] = regend[mcnt]
4290	= old_regstart[mcnt] = old_regend[mcnt] = REG_UNSET_VALUE;
4291
4292      REG_MATCH_NULL_STRING_P (reg_info[mcnt]) = MATCH_NULL_UNSET_VALUE;
4293      IS_ACTIVE (reg_info[mcnt]) = 0;
4294      MATCHED_SOMETHING (reg_info[mcnt]) = 0;
4295      EVER_MATCHED_SOMETHING (reg_info[mcnt]) = 0;
4296    }
4297
4298  /* We move `string1' into `string2' if the latter's empty -- but not if
4299     `string1' is null.	 */
4300  if (size2 == 0 && string1 != NULL)
4301    {
4302      string2 = string1;
4303      size2 = size1;
4304      string1 = 0;
4305      size1 = 0;
4306    }
4307  end1 = string1 + size1;
4308  end2 = string2 + size2;
4309
4310  /* Compute where to stop matching, within the two strings.  */
4311  if (stop <= size1)
4312    {
4313      end_match_1 = string1 + stop;
4314      end_match_2 = string2;
4315    }
4316  else
4317    {
4318      end_match_1 = end1;
4319      end_match_2 = string2 + stop - size1;
4320    }
4321
4322  /* `p' scans through the pattern as `d' scans through the data.
4323     `dend' is the end of the input string that `d' points within.  `d'
4324     is advanced into the following input string whenever necessary, but
4325     this happens before fetching; therefore, at the beginning of the
4326     loop, `d' can be pointing at the end of a string, but it cannot
4327     equal `string2'.  */
4328  if (size1 > 0 && pos <= size1)
4329    {
4330      d = string1 + pos;
4331      dend = end_match_1;
4332    }
4333  else
4334    {
4335      d = string2 + pos - size1;
4336      dend = end_match_2;
4337    }
4338
4339  DEBUG_PRINT1 ("The compiled pattern is: ");
4340  DEBUG_PRINT_COMPILED_PATTERN (bufp, p, pend);
4341  DEBUG_PRINT1 ("The string to match is: `");
4342  DEBUG_PRINT_DOUBLE_STRING (d, string1, size1, string2, size2);
4343  DEBUG_PRINT1 ("'\n");
4344
4345  /* This loops over pattern commands.	It exits by returning from the
4346     function if the match is complete, or it drops through if the match
4347     fails at this starting point in the input data.  */
4348  for (;;)
4349    {
4350      DEBUG_PRINT2 ("\n0x%x: ", p);
4351
4352      if (p == pend)
4353	{ /* End of pattern means we might have succeeded.  */
4354	  DEBUG_PRINT1 ("end of pattern ... ");
4355
4356	  /* If we haven't matched the entire string, and we want the
4357	     longest match, try backtracking.  */
4358	  if (d != end_match_2)
4359	    {
4360	      /* 1 if this match ends in the same string (string1 or string2)
4361		 as the best previous match.  */
4362	      boolean same_str_p = (FIRST_STRING_P (match_end)
4363				    == MATCHING_IN_FIRST_STRING);
4364	      /* 1 if this match is the best seen so far.  */
4365	      boolean best_match_p;
4366
4367	      /* AIX compiler got confused when this was combined
4368		 with the previous declaration.	 */
4369	      if (same_str_p)
4370		best_match_p = d > match_end;
4371	      else
4372		best_match_p = !MATCHING_IN_FIRST_STRING;
4373
4374	      DEBUG_PRINT1 ("backtracking.\n");
4375
4376	      if (!FAIL_STACK_EMPTY ())
4377		{ /* More failure points to try.  */
4378
4379		  /* If exceeds best match so far, save it.  */
4380		  if (!best_regs_set || best_match_p)
4381		    {
4382		      best_regs_set = true;
4383		      match_end = d;
4384
4385		      DEBUG_PRINT1 ("\nSAVING match as best so far.\n");
4386
4387		      for (mcnt = 1; mcnt < num_regs; mcnt++)
4388			{
4389			  best_regstart[mcnt] = regstart[mcnt];
4390			  best_regend[mcnt] = regend[mcnt];
4391			}
4392		    }
4393		  goto fail;
4394		}
4395
4396	      /* If no failure points, don't restore garbage.  And if
4397		 last match is real best match, don't restore second
4398		 best one. */
4399	      else if (best_regs_set && !best_match_p)
4400		{
4401		restore_best_regs:
4402		  /* Restore best match.  It may happen that `dend ==
4403		     end_match_1' while the restored d is in string2.
4404		     For example, the pattern `x.*y.*z' against the
4405		     strings `x-' and `y-z-', if the two strings are
4406		     not consecutive in memory.	 */
4407		  DEBUG_PRINT1 ("Restoring best registers.\n");
4408
4409		  d = match_end;
4410		  dend = ((d >= string1 && d <= end1)
4411			   ? end_match_1 : end_match_2);
4412
4413		  for (mcnt = 1; mcnt < num_regs; mcnt++)
4414		    {
4415		      regstart[mcnt] = best_regstart[mcnt];
4416		      regend[mcnt] = best_regend[mcnt];
4417		    }
4418		}
4419	    } /* d != end_match_2 */
4420
4421	succeed_label:
4422	  DEBUG_PRINT1 ("Accepting match.\n");
4423
4424	  /* If caller wants register contents data back, do it.  */
4425	  if (regs && !bufp->no_sub)
4426	    {
4427	      /* Have the register data arrays been allocated?	*/
4428	      if (bufp->regs_allocated == REGS_UNALLOCATED)
4429		{ /* No.  So allocate them with malloc.	 We need one
4430		     extra element beyond `num_regs' for the `-1' marker
4431		     GNU code uses.  */
4432		  regs->num_regs = MAX (RE_NREGS, num_regs + 1);
4433		  regs->start = TALLOC (regs->num_regs, regoff_t);
4434		  regs->end = TALLOC (regs->num_regs, regoff_t);
4435		  if (regs->start == NULL || regs->end == NULL)
4436		    {
4437		      FREE_VARIABLES ();
4438		      return -2;
4439		    }
4440		  bufp->regs_allocated = REGS_REALLOCATE;
4441		}
4442	      else if (bufp->regs_allocated == REGS_REALLOCATE)
4443		{ /* Yes.  If we need more elements than were already
4444		     allocated, reallocate them.  If we need fewer, just
4445		     leave it alone.  */
4446		  if (regs->num_regs < num_regs + 1)
4447		    {
4448		      regs->num_regs = num_regs + 1;
4449		      RETALLOC (regs->start, regs->num_regs, regoff_t);
4450		      RETALLOC (regs->end, regs->num_regs, regoff_t);
4451		      if (regs->start == NULL || regs->end == NULL)
4452			{
4453			  FREE_VARIABLES ();
4454			  return -2;
4455			}
4456		    }
4457		}
4458	      else
4459		{
4460		  /* These braces fend off a "empty body in an else-statement"
4461		     warning under GCC when assert expands to nothing.	*/
4462		  assert (bufp->regs_allocated == REGS_FIXED);
4463		}
4464
4465	      /* Convert the pointer data in `regstart' and `regend' to
4466		 indices.  Register zero has to be set differently,
4467		 since we haven't kept track of any info for it.  */
4468	      if (regs->num_regs > 0)
4469		{
4470		  regs->start[0] = pos;
4471		  regs->end[0] = (MATCHING_IN_FIRST_STRING
4472				  ? ((regoff_t) (d - string1))
4473				  : ((regoff_t) (d - string2 + size1)));
4474		}
4475
4476	      /* Go through the first `min (num_regs, regs->num_regs)'
4477		 registers, since that is all we initialized.  */
4478	      for (mcnt = 1; mcnt < MIN (num_regs, regs->num_regs); mcnt++)
4479		{
4480		  if (REG_UNSET (regstart[mcnt]) || REG_UNSET (regend[mcnt]))
4481		    regs->start[mcnt] = regs->end[mcnt] = -1;
4482		  else
4483		    {
4484		      regs->start[mcnt]
4485			= (regoff_t) POINTER_TO_OFFSET (regstart[mcnt]);
4486		      regs->end[mcnt]
4487			= (regoff_t) POINTER_TO_OFFSET (regend[mcnt]);
4488		    }
4489		}
4490
4491	      /* If the regs structure we return has more elements than
4492		 were in the pattern, set the extra elements to -1.  If
4493		 we (re)allocated the registers, this is the case,
4494		 because we always allocate enough to have at least one
4495		 -1 at the end.	 */
4496	      for (mcnt = num_regs; mcnt < regs->num_regs; mcnt++)
4497		regs->start[mcnt] = regs->end[mcnt] = -1;
4498	    } /* regs && !bufp->no_sub */
4499
4500	  DEBUG_PRINT4 ("%u failure points pushed, %u popped (%u remain).\n",
4501			nfailure_points_pushed, nfailure_points_popped,
4502			nfailure_points_pushed - nfailure_points_popped);
4503	  DEBUG_PRINT2 ("%u registers pushed.\n", num_regs_pushed);
4504
4505	  mcnt = d - pos - (MATCHING_IN_FIRST_STRING
4506			    ? string1
4507			    : string2 - size1);
4508
4509	  DEBUG_PRINT2 ("Returning %d from re_match_2.\n", mcnt);
4510
4511	  FREE_VARIABLES ();
4512	  return mcnt;
4513	}
4514
4515      /* Otherwise match next pattern command.	*/
4516      switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++))
4517	{
4518	/* Ignore these.  Used to ignore the n of succeed_n's which
4519	   currently have n == 0.  */
4520	case no_op:
4521	  DEBUG_PRINT1 ("EXECUTING no_op.\n");
4522	  break;
4523
4524	case succeed:
4525	  DEBUG_PRINT1 ("EXECUTING succeed.\n");
4526	  goto succeed_label;
4527
4528	/* Match the next n pattern characters exactly.	 The following
4529	   byte in the pattern defines n, and the n bytes after that
4530	   are the characters to match.	 */
4531	case exactn:
4532	  mcnt = *p++;
4533	  DEBUG_PRINT2 ("EXECUTING exactn %d.\n", mcnt);
4534
4535	  /* This is written out as an if-else so we don't waste time
4536	     testing `translate' inside the loop.  */
4537	  if (RE_TRANSLATE_P (translate))
4538	    {
4539#ifdef emacs
4540	      if (multibyte)
4541		do
4542		  {
4543		    int pat_charlen, buf_charlen;
4544		    unsigned int pat_ch, buf_ch;
4545
4546		    PREFETCH ();
4547		    pat_ch = STRING_CHAR_AND_LENGTH (p, pend - p, pat_charlen);
4548		    buf_ch = STRING_CHAR_AND_LENGTH (d, dend - d, buf_charlen);
4549
4550		    if (RE_TRANSLATE (translate, buf_ch)
4551			!= pat_ch)
4552		      goto fail;
4553
4554		    p += pat_charlen;
4555		    d += buf_charlen;
4556		    mcnt -= pat_charlen;
4557		  }
4558		while (mcnt > 0);
4559	      else
4560#endif /* not emacs */
4561		do
4562		  {
4563		    PREFETCH ();
4564		    if ((unsigned char) RE_TRANSLATE (translate, (unsigned char) *d)
4565			!= (unsigned char) *p++)
4566		      goto fail;
4567		    d++;
4568		  }
4569		while (--mcnt);
4570	    }
4571	  else
4572	    {
4573	      do
4574		{
4575		  PREFETCH ();
4576		  if (*d++ != (char) *p++) goto fail;
4577		}
4578	      while (--mcnt);
4579	    }
4580	  SET_REGS_MATCHED ();
4581	  break;
4582
4583
4584	/* Match any character except possibly a newline or a null.  */
4585	case anychar:
4586	  {
4587	    int buf_charlen;
4588	    unsigned int buf_ch;
4589
4590	    DEBUG_PRINT1 ("EXECUTING anychar.\n");
4591
4592	    PREFETCH ();
4593
4594#ifdef emacs
4595	    if (multibyte)
4596	      buf_ch = STRING_CHAR_AND_LENGTH (d, dend - d, buf_charlen);
4597	    else
4598#endif /* not emacs */
4599	      {
4600		buf_ch = (unsigned char) *d;
4601		buf_charlen = 1;
4602	      }
4603
4604	    buf_ch = TRANSLATE (buf_ch);
4605
4606	    if ((!(bufp->syntax & RE_DOT_NEWLINE)
4607		 && buf_ch == '\n')
4608		|| ((bufp->syntax & RE_DOT_NOT_NULL)
4609		    && buf_ch == '\000'))
4610	      goto fail;
4611
4612	    SET_REGS_MATCHED ();
4613	    DEBUG_PRINT2 ("  Matched `%d'.\n", *d);
4614	    d += buf_charlen;
4615	  }
4616	  break;
4617
4618
4619	case charset:
4620	case charset_not:
4621	  {
4622	    register unsigned int c;
4623	    boolean not = (re_opcode_t) *(p - 1) == charset_not;
4624	    int len;
4625
4626	    /* Start of actual range_table, or end of bitmap if there is no
4627	       range table.  */
4628	    unsigned char *range_table;
4629
4630	    /* Nonzero if there is range table.	 */
4631	    int range_table_exists;
4632
4633	    /* Number of ranges of range table.	 Not in bytes.	*/
4634	    int count;
4635
4636	    DEBUG_PRINT2 ("EXECUTING charset%s.\n", not ? "_not" : "");
4637
4638	    PREFETCH ();
4639	    c = (unsigned char) *d;
4640
4641	    range_table = CHARSET_RANGE_TABLE (&p[-1]); /* Past the bitmap.  */
4642	    range_table_exists = CHARSET_RANGE_TABLE_EXISTS_P (&p[-1]);
4643	    if (range_table_exists)
4644	      EXTRACT_NUMBER_AND_INCR (count, range_table);
4645	    else
4646	      count = 0;
4647
4648	    if (multibyte && BASE_LEADING_CODE_P (c))
4649	      c = STRING_CHAR_AND_LENGTH (d, dend - d, len);
4650
4651	    if (SINGLE_BYTE_CHAR_P (c))
4652	      {			/* Lookup bitmap.  */
4653		c = TRANSLATE (c); /* The character to match.  */
4654		len = 1;
4655
4656		/* Cast to `unsigned' instead of `unsigned char' in
4657		   case the bit list is a full 32 bytes long.  */
4658		if (c < (unsigned) (CHARSET_BITMAP_SIZE (&p[-1]) * BYTEWIDTH)
4659		&& p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
4660	      not = !not;
4661	      }
4662	    else if (range_table_exists)
4663	      CHARSET_LOOKUP_RANGE_TABLE_RAW (not, c, range_table, count);
4664
4665	    p = CHARSET_RANGE_TABLE_END (range_table, count);
4666
4667	    if (!not) goto fail;
4668
4669	    SET_REGS_MATCHED ();
4670	    d += len;
4671	    break;
4672	  }
4673
4674
4675	/* The beginning of a group is represented by start_memory.
4676	   The arguments are the register number in the next byte, and the
4677	   number of groups inner to this one in the next.  The text
4678	   matched within the group is recorded (in the internal
4679	   registers data structure) under the register number.	 */
4680	case start_memory:
4681	  DEBUG_PRINT3 ("EXECUTING start_memory %d (%d):\n", *p, p[1]);
4682
4683	  /* Find out if this group can match the empty string.	 */
4684	  p1 = p;		/* To send to group_match_null_string_p.  */
4685
4686	  if (REG_MATCH_NULL_STRING_P (reg_info[*p]) == MATCH_NULL_UNSET_VALUE)
4687	    REG_MATCH_NULL_STRING_P (reg_info[*p])
4688	      = group_match_null_string_p (&p1, pend, reg_info);
4689
4690	  /* Save the position in the string where we were the last time
4691	     we were at this open-group operator in case the group is
4692	     operated upon by a repetition operator, e.g., with `(a*)*b'
4693	     against `ab'; then we want to ignore where we are now in
4694	     the string in case this attempt to match fails.  */
4695	  old_regstart[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p])
4696			     ? REG_UNSET (regstart[*p]) ? d : regstart[*p]
4697			     : regstart[*p];
4698	  DEBUG_PRINT2 ("  old_regstart: %d\n",
4699			 POINTER_TO_OFFSET (old_regstart[*p]));
4700
4701	  regstart[*p] = d;
4702	  DEBUG_PRINT2 ("  regstart: %d\n", POINTER_TO_OFFSET (regstart[*p]));
4703
4704	  IS_ACTIVE (reg_info[*p]) = 1;
4705	  MATCHED_SOMETHING (reg_info[*p]) = 0;
4706
4707	  /* Clear this whenever we change the register activity status.  */
4708	  set_regs_matched_done = 0;
4709
4710	  /* This is the new highest active register.  */
4711	  highest_active_reg = *p;
4712
4713	  /* If nothing was active before, this is the new lowest active
4714	     register.	*/
4715	  if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
4716	    lowest_active_reg = *p;
4717
4718	  /* Move past the register number and inner group count.  */
4719	  p += 2;
4720	  just_past_start_mem = p;
4721
4722	  break;
4723
4724
4725	/* The stop_memory opcode represents the end of a group.  Its
4726	   arguments are the same as start_memory's: the register
4727	   number, and the number of inner groups.  */
4728	case stop_memory:
4729	  DEBUG_PRINT3 ("EXECUTING stop_memory %d (%d):\n", *p, p[1]);
4730
4731	  /* We need to save the string position the last time we were at
4732	     this close-group operator in case the group is operated
4733	     upon by a repetition operator, e.g., with `((a*)*(b*)*)*'
4734	     against `aba'; then we want to ignore where we are now in
4735	     the string in case this attempt to match fails.  */
4736	  old_regend[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p])
4737			   ? REG_UNSET (regend[*p]) ? d : regend[*p]
4738			   : regend[*p];
4739	  DEBUG_PRINT2 ("      old_regend: %d\n",
4740			 POINTER_TO_OFFSET (old_regend[*p]));
4741
4742	  regend[*p] = d;
4743	  DEBUG_PRINT2 ("      regend: %d\n", POINTER_TO_OFFSET (regend[*p]));
4744
4745	  /* This register isn't active anymore.  */
4746	  IS_ACTIVE (reg_info[*p]) = 0;
4747
4748	  /* Clear this whenever we change the register activity status.  */
4749	  set_regs_matched_done = 0;
4750
4751	  /* If this was the only register active, nothing is active
4752	     anymore.  */
4753	  if (lowest_active_reg == highest_active_reg)
4754	    {
4755	      lowest_active_reg = NO_LOWEST_ACTIVE_REG;
4756	      highest_active_reg = NO_HIGHEST_ACTIVE_REG;
4757	    }
4758	  else
4759	    { /* We must scan for the new highest active register, since
4760		 it isn't necessarily one less than now: consider
4761		 (a(b)c(d(e)f)g).  When group 3 ends, after the f), the
4762		 new highest active register is 1.  */
4763	      unsigned char r = *p - 1;
4764	      while (r > 0 && !IS_ACTIVE (reg_info[r]))
4765		r--;
4766
4767	      /* If we end up at register zero, that means that we saved
4768		 the registers as the result of an `on_failure_jump', not
4769		 a `start_memory', and we jumped to past the innermost
4770		 `stop_memory'.	 For example, in ((.)*) we save
4771		 registers 1 and 2 as a result of the *, but when we pop
4772		 back to the second ), we are at the stop_memory 1.
4773		 Thus, nothing is active.  */
4774	      if (r == 0)
4775		{
4776		  lowest_active_reg = NO_LOWEST_ACTIVE_REG;
4777		  highest_active_reg = NO_HIGHEST_ACTIVE_REG;
4778		}
4779	      else
4780		highest_active_reg = r;
4781	    }
4782
4783	  /* If just failed to match something this time around with a
4784	     group that's operated on by a repetition operator, try to
4785	     force exit from the ``loop'', and restore the register
4786	     information for this group that we had before trying this
4787	     last match.  */
4788	  if ((!MATCHED_SOMETHING (reg_info[*p])
4789	       || just_past_start_mem == p - 1)
4790	      && (p + 2) < pend)
4791	    {
4792	      boolean is_a_jump_n = false;
4793
4794	      p1 = p + 2;
4795	      mcnt = 0;
4796	      switch ((re_opcode_t) *p1++)
4797		{
4798		  case jump_n:
4799		    is_a_jump_n = true;
4800		  case pop_failure_jump:
4801		  case maybe_pop_jump:
4802		  case jump:
4803		  case dummy_failure_jump:
4804		    EXTRACT_NUMBER_AND_INCR (mcnt, p1);
4805		    if (is_a_jump_n)
4806		      p1 += 2;
4807		    break;
4808
4809		  default:
4810		    /* do nothing */ ;
4811		}
4812	      p1 += mcnt;
4813
4814	      /* If the next operation is a jump backwards in the pattern
4815		 to an on_failure_jump right before the start_memory
4816		 corresponding to this stop_memory, exit from the loop
4817		 by forcing a failure after pushing on the stack the
4818		 on_failure_jump's jump in the pattern, and d.	*/
4819	      if (mcnt < 0 && (re_opcode_t) *p1 == on_failure_jump
4820		  && (re_opcode_t) p1[3] == start_memory && p1[4] == *p)
4821		{
4822		  /* If this group ever matched anything, then restore
4823		     what its registers were before trying this last
4824		     failed match, e.g., with `(a*)*b' against `ab' for
4825		     regstart[1], and, e.g., with `((a*)*(b*)*)*'
4826		     against `aba' for regend[3].
4827
4828		     Also restore the registers for inner groups for,
4829		     e.g., `((a*)(b*))*' against `aba' (register 3 would
4830		     otherwise get trashed).  */
4831
4832		  if (EVER_MATCHED_SOMETHING (reg_info[*p]))
4833		    {
4834		      unsigned r;
4835
4836		      EVER_MATCHED_SOMETHING (reg_info[*p]) = 0;
4837
4838		      /* Restore this and inner groups' (if any) registers.  */
4839		      for (r = *p; r < *p + *(p + 1); r++)
4840			{
4841			  regstart[r] = old_regstart[r];
4842
4843			  /* xx why this test?	*/
4844			  if (old_regend[r] >= regstart[r])
4845			    regend[r] = old_regend[r];
4846			}
4847		    }
4848		  p1++;
4849		  EXTRACT_NUMBER_AND_INCR (mcnt, p1);
4850		  PUSH_FAILURE_POINT (p1 + mcnt, d, -2);
4851
4852		  goto fail;
4853		}
4854	    }
4855
4856	  /* Move past the register number and the inner group count.  */
4857	  p += 2;
4858	  break;
4859
4860
4861	/* \<digit> has been turned into a `duplicate' command which is
4862	   followed by the numeric value of <digit> as the register number.  */
4863	case duplicate:
4864	  {
4865	    register const char *d2, *dend2;
4866	    int regno = *p++;	/* Get which register to match against.	 */
4867	    DEBUG_PRINT2 ("EXECUTING duplicate %d.\n", regno);
4868
4869	    /* Can't back reference a group which we've never matched.	*/
4870	    if (REG_UNSET (regstart[regno]) || REG_UNSET (regend[regno]))
4871	      goto fail;
4872
4873	    /* Where in input to try to start matching.	 */
4874	    d2 = regstart[regno];
4875
4876	    /* Where to stop matching; if both the place to start and
4877	       the place to stop matching are in the same string, then
4878	       set to the place to stop, otherwise, for now have to use
4879	       the end of the first string.  */
4880
4881	    dend2 = ((FIRST_STRING_P (regstart[regno])
4882		      == FIRST_STRING_P (regend[regno]))
4883		     ? regend[regno] : end_match_1);
4884	    for (;;)
4885	      {
4886		/* If necessary, advance to next segment in register
4887		   contents.  */
4888		while (d2 == dend2)
4889		  {
4890		    if (dend2 == end_match_2) break;
4891		    if (dend2 == regend[regno]) break;
4892
4893		    /* End of string1 => advance to string2. */
4894		    d2 = string2;
4895		    dend2 = regend[regno];
4896		  }
4897		/* At end of register contents => success */
4898		if (d2 == dend2) break;
4899
4900		/* If necessary, advance to next segment in data.  */
4901		PREFETCH ();
4902
4903		/* How many characters left in this segment to match.  */
4904		mcnt = dend - d;
4905
4906		/* Want how many consecutive characters we can match in
4907		   one shot, so, if necessary, adjust the count.  */
4908		if (mcnt > dend2 - d2)
4909		  mcnt = dend2 - d2;
4910
4911		/* Compare that many; failure if mismatch, else move
4912		   past them.  */
4913		if (RE_TRANSLATE_P (translate)
4914		    ? bcmp_translate (d, d2, mcnt, translate)
4915		    : bcmp (d, d2, mcnt))
4916		  goto fail;
4917		d += mcnt, d2 += mcnt;
4918
4919		/* Do this because we've match some characters.	 */
4920		SET_REGS_MATCHED ();
4921	      }
4922	  }
4923	  break;
4924
4925
4926	/* begline matches the empty string at the beginning of the string
4927	   (unless `not_bol' is set in `bufp'), and, if
4928	   `newline_anchor' is set, after newlines.  */
4929	case begline:
4930	  DEBUG_PRINT1 ("EXECUTING begline.\n");
4931
4932	  if (AT_STRINGS_BEG (d))
4933	    {
4934	      if (!bufp->not_bol) break;
4935	    }
4936	  else if (d[-1] == '\n' && bufp->newline_anchor)
4937	    {
4938	      break;
4939	    }
4940	  /* In all other cases, we fail.  */
4941	  goto fail;
4942
4943
4944	/* endline is the dual of begline.  */
4945	case endline:
4946	  DEBUG_PRINT1 ("EXECUTING endline.\n");
4947
4948	  if (AT_STRINGS_END (d))
4949	    {
4950	      if (!bufp->not_eol) break;
4951	    }
4952
4953	  /* We have to ``prefetch'' the next character.  */
4954	  else if ((d == end1 ? *string2 : *d) == '\n'
4955		   && bufp->newline_anchor)
4956	    {
4957	      break;
4958	    }
4959	  goto fail;
4960
4961
4962	/* Match at the very beginning of the data.  */
4963	case begbuf:
4964	  DEBUG_PRINT1 ("EXECUTING begbuf.\n");
4965	  if (AT_STRINGS_BEG (d))
4966	    break;
4967	  goto fail;
4968
4969
4970	/* Match at the very end of the data.  */
4971	case endbuf:
4972	  DEBUG_PRINT1 ("EXECUTING endbuf.\n");
4973	  if (AT_STRINGS_END (d))
4974	    break;
4975	  goto fail;
4976
4977
4978	/* on_failure_keep_string_jump is used to optimize `.*\n'.  It
4979	   pushes NULL as the value for the string on the stack.  Then
4980	   `pop_failure_point' will keep the current value for the
4981	   string, instead of restoring it.  To see why, consider
4982	   matching `foo\nbar' against `.*\n'.	The .* matches the foo;
4983	   then the . fails against the \n.  But the next thing we want
4984	   to do is match the \n against the \n; if we restored the
4985	   string value, we would be back at the foo.
4986
4987	   Because this is used only in specific cases, we don't need to
4988	   check all the things that `on_failure_jump' does, to make
4989	   sure the right things get saved on the stack.  Hence we don't
4990	   share its code.  The only reason to push anything on the
4991	   stack at all is that otherwise we would have to change
4992	   `anychar's code to do something besides goto fail in this
4993	   case; that seems worse than this.  */
4994	case on_failure_keep_string_jump:
4995	  DEBUG_PRINT1 ("EXECUTING on_failure_keep_string_jump");
4996
4997	  EXTRACT_NUMBER_AND_INCR (mcnt, p);
4998	  DEBUG_PRINT3 (" %d (to 0x%x):\n", mcnt, p + mcnt);
4999
5000	  PUSH_FAILURE_POINT (p + mcnt, NULL, -2);
5001	  break;
5002
5003
5004	/* Uses of on_failure_jump:
5005
5006	   Each alternative starts with an on_failure_jump that points
5007	   to the beginning of the next alternative.  Each alternative
5008	   except the last ends with a jump that in effect jumps past
5009	   the rest of the alternatives.  (They really jump to the
5010	   ending jump of the following alternative, because tensioning
5011	   these jumps is a hassle.)
5012
5013	   Repeats start with an on_failure_jump that points past both
5014	   the repetition text and either the following jump or
5015	   pop_failure_jump back to this on_failure_jump.  */
5016	case on_failure_jump:
5017	on_failure:
5018	  DEBUG_PRINT1 ("EXECUTING on_failure_jump");
5019
5020#if defined (WINDOWSNT) && defined (emacs)
5021	  QUIT;
5022#endif
5023
5024	  EXTRACT_NUMBER_AND_INCR (mcnt, p);
5025	  DEBUG_PRINT3 (" %d (to 0x%x)", mcnt, p + mcnt);
5026
5027	  /* If this on_failure_jump comes right before a group (i.e.,
5028	     the original * applied to a group), save the information
5029	     for that group and all inner ones, so that if we fail back
5030	     to this point, the group's information will be correct.
5031	     For example, in \(a*\)*\1, we need the preceding group,
5032	     and in \(zz\(a*\)b*\)\2, we need the inner group.	*/
5033
5034	  /* We can't use `p' to check ahead because we push
5035	     a failure point to `p + mcnt' after we do this.  */
5036	  p1 = p;
5037
5038	  /* We need to skip no_op's before we look for the
5039	     start_memory in case this on_failure_jump is happening as
5040	     the result of a completed succeed_n, as in \(a\)\{1,3\}b\1
5041	     against aba.  */
5042	  while (p1 < pend && (re_opcode_t) *p1 == no_op)
5043	    p1++;
5044
5045	  if (p1 < pend && (re_opcode_t) *p1 == start_memory)
5046	    {
5047	      /* We have a new highest active register now.  This will
5048		 get reset at the start_memory we are about to get to,
5049		 but we will have saved all the registers relevant to
5050		 this repetition op, as described above.  */
5051	      highest_active_reg = *(p1 + 1) + *(p1 + 2);
5052	      if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
5053		lowest_active_reg = *(p1 + 1);
5054	    }
5055
5056	  DEBUG_PRINT1 (":\n");
5057	  PUSH_FAILURE_POINT (p + mcnt, d, -2);
5058	  break;
5059
5060
5061	/* A smart repeat ends with `maybe_pop_jump'.
5062	   We change it to either `pop_failure_jump' or `jump'.	 */
5063	case maybe_pop_jump:
5064#if defined (WINDOWSNT) && defined (emacs)
5065	  QUIT;
5066#endif
5067	  EXTRACT_NUMBER_AND_INCR (mcnt, p);
5068	  DEBUG_PRINT2 ("EXECUTING maybe_pop_jump %d.\n", mcnt);
5069	  {
5070	    register unsigned char *p2 = p;
5071
5072	    /* Compare the beginning of the repeat with what in the
5073	       pattern follows its end. If we can establish that there
5074	       is nothing that they would both match, i.e., that we
5075	       would have to backtrack because of (as in, e.g., `a*a')
5076	       then we can change to pop_failure_jump, because we'll
5077	       never have to backtrack.
5078
5079	       This is not true in the case of alternatives: in
5080	       `(a|ab)*' we do need to backtrack to the `ab' alternative
5081	       (e.g., if the string was `ab').	But instead of trying to
5082	       detect that here, the alternative has put on a dummy
5083	       failure point which is what we will end up popping.  */
5084
5085	    /* Skip over open/close-group commands.
5086	       If what follows this loop is a ...+ construct,
5087	       look at what begins its body, since we will have to
5088	       match at least one of that.  */
5089	    while (1)
5090	      {
5091		if (p2 + 2 < pend
5092		    && ((re_opcode_t) *p2 == stop_memory
5093			|| (re_opcode_t) *p2 == start_memory))
5094		  p2 += 3;
5095		else if (p2 + 6 < pend
5096			 && (re_opcode_t) *p2 == dummy_failure_jump)
5097		  p2 += 6;
5098		else
5099		  break;
5100	      }
5101
5102	    p1 = p + mcnt;
5103	    /* p1[0] ... p1[2] are the `on_failure_jump' corresponding
5104	       to the `maybe_finalize_jump' of this case.  Examine what
5105	       follows.	 */
5106
5107	    /* If we're at the end of the pattern, we can change.  */
5108	    if (p2 == pend)
5109	      {
5110		/* Consider what happens when matching ":\(.*\)"
5111		   against ":/".  I don't really understand this code
5112		   yet.	 */
5113		p[-3] = (unsigned char) pop_failure_jump;
5114		DEBUG_PRINT1
5115		  ("  End of pattern: change to `pop_failure_jump'.\n");
5116	      }
5117
5118	    else if ((re_opcode_t) *p2 == exactn
5119		     || (bufp->newline_anchor && (re_opcode_t) *p2 == endline))
5120	      {
5121		register unsigned int c
5122		  = *p2 == (unsigned char) endline ? '\n' : p2[2];
5123
5124		if ((re_opcode_t) p1[3] == exactn)
5125		  {
5126		    if (!(multibyte /* && (c != '\n') */
5127			  && BASE_LEADING_CODE_P (c))
5128			? c != p1[5]
5129			: (STRING_CHAR (&p2[2], pend - &p2[2])
5130			   != STRING_CHAR (&p1[5], pend - &p1[5])))
5131		  {
5132		    p[-3] = (unsigned char) pop_failure_jump;
5133		    DEBUG_PRINT3 ("  %c != %c => pop_failure_jump.\n",
5134				  c, p1[5]);
5135		  }
5136		  }
5137
5138		else if ((re_opcode_t) p1[3] == charset
5139			 || (re_opcode_t) p1[3] == charset_not)
5140		  {
5141		    int not = (re_opcode_t) p1[3] == charset_not;
5142
5143		    if (multibyte /* && (c != '\n') */
5144			&& BASE_LEADING_CODE_P (c))
5145		      c = STRING_CHAR (&p2[2], pend - &p2[2]);
5146
5147		    /* Test if C is listed in charset (or charset_not)
5148		       at `&p1[3]'.  */
5149		    if (SINGLE_BYTE_CHAR_P (c))
5150		      {
5151			if (c < CHARSET_BITMAP_SIZE (&p1[3]) * BYTEWIDTH
5152			&& p1[5 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
5153		      not = !not;
5154		      }
5155		    else if (CHARSET_RANGE_TABLE_EXISTS_P (&p1[3]))
5156		      CHARSET_LOOKUP_RANGE_TABLE (not, c, &p1[3]);
5157
5158		    /* `not' is equal to 1 if c would match, which means
5159			that we can't change to pop_failure_jump.  */
5160		    if (!not)
5161		      {
5162			p[-3] = (unsigned char) pop_failure_jump;
5163			DEBUG_PRINT1 ("	 No match => pop_failure_jump.\n");
5164		      }
5165		  }
5166	      }
5167	    else if ((re_opcode_t) *p2 == charset)
5168	      {
5169		if ((re_opcode_t) p1[3] == exactn)
5170		  {
5171		    register unsigned int c = p1[5];
5172		    int not = 0;
5173
5174		    if (multibyte && BASE_LEADING_CODE_P (c))
5175		      c = STRING_CHAR (&p1[5], pend - &p1[5]);
5176
5177		    /* Test if C is listed in charset at `p2'.	*/
5178		    if (SINGLE_BYTE_CHAR_P (c))
5179		      {
5180			if (c < CHARSET_BITMAP_SIZE (p2) * BYTEWIDTH
5181			    && (p2[2 + c / BYTEWIDTH]
5182				& (1 << (c % BYTEWIDTH))))
5183			  not = !not;
5184		      }
5185		    else if (CHARSET_RANGE_TABLE_EXISTS_P (p2))
5186		      CHARSET_LOOKUP_RANGE_TABLE (not, c, p2);
5187
5188		    if (!not)
5189		  {
5190		    p[-3] = (unsigned char) pop_failure_jump;
5191			DEBUG_PRINT1 ("	 No match => pop_failure_jump.\n");
5192		      }
5193		  }
5194
5195		/* It is hard to list up all the character in charset
5196		   P2 if it includes multibyte character.  Give up in
5197		   such case.  */
5198		else if (!multibyte || !CHARSET_RANGE_TABLE_EXISTS_P (p2))
5199		  {
5200		    /* Now, we are sure that P2 has no range table.
5201		       So, for the size of bitmap in P2, `p2[1]' is
5202		       enough.	But P1 may have range table, so the
5203		       size of bitmap table of P1 is extracted by
5204		       using macro `CHARSET_BITMAP_SIZE'.
5205
5206		       Since we know that all the character listed in
5207		       P2 is ASCII, it is enough to test only bitmap
5208		       table of P1.  */
5209
5210		    if ((re_opcode_t) p1[3] == charset_not)
5211		  {
5212		    int idx;
5213			/* We win if the charset_not inside the loop lists
5214			   every character listed in the charset after.	 */
5215		    for (idx = 0; idx < (int) p2[1]; idx++)
5216		      if (! (p2[2 + idx] == 0
5217				 || (idx < CHARSET_BITMAP_SIZE (&p1[3])
5218				 && ((p2[2 + idx] & ~ p1[5 + idx]) == 0))))
5219			break;
5220
5221		    if (idx == p2[1])
5222		      {
5223			p[-3] = (unsigned char) pop_failure_jump;
5224			DEBUG_PRINT1 ("	 No match => pop_failure_jump.\n");
5225		      }
5226		  }
5227		else if ((re_opcode_t) p1[3] == charset)
5228		  {
5229		    int idx;
5230		    /* We win if the charset inside the loop
5231		       has no overlap with the one after the loop.  */
5232		    for (idx = 0;
5233			     (idx < (int) p2[1]
5234			      && idx < CHARSET_BITMAP_SIZE (&p1[3]));
5235			 idx++)
5236		      if ((p2[2 + idx] & p1[5 + idx]) != 0)
5237			break;
5238
5239			if (idx == p2[1]
5240			    || idx == CHARSET_BITMAP_SIZE (&p1[3]))
5241		      {
5242			p[-3] = (unsigned char) pop_failure_jump;
5243			DEBUG_PRINT1 ("	 No match => pop_failure_jump.\n");
5244		      }
5245		  }
5246	      }
5247	  }
5248	  }
5249	  p -= 2;		/* Point at relative address again.  */
5250	  if ((re_opcode_t) p[-1] != pop_failure_jump)
5251	    {
5252	      p[-1] = (unsigned char) jump;
5253	      DEBUG_PRINT1 ("  Match => jump.\n");
5254	      goto unconditional_jump;
5255	    }
5256	/* Note fall through.  */
5257
5258
5259	/* The end of a simple repeat has a pop_failure_jump back to
5260	   its matching on_failure_jump, where the latter will push a
5261	   failure point.  The pop_failure_jump takes off failure
5262	   points put on by this pop_failure_jump's matching
5263	   on_failure_jump; we got through the pattern to here from the
5264	   matching on_failure_jump, so didn't fail.  */
5265	case pop_failure_jump:
5266	  {
5267	    /* We need to pass separate storage for the lowest and
5268	       highest registers, even though we don't care about the
5269	       actual values.  Otherwise, we will restore only one
5270	       register from the stack, since lowest will == highest in
5271	       `pop_failure_point'.  */
5272	    unsigned dummy_low_reg, dummy_high_reg;
5273	    unsigned char *pdummy;
5274	    const char *sdummy;
5275
5276	    DEBUG_PRINT1 ("EXECUTING pop_failure_jump.\n");
5277	    POP_FAILURE_POINT (sdummy, pdummy,
5278			       dummy_low_reg, dummy_high_reg,
5279			       reg_dummy, reg_dummy, reg_info_dummy);
5280	  }
5281	  /* Note fall through.	 */
5282
5283
5284	/* Unconditionally jump (without popping any failure points).  */
5285	case jump:
5286	unconditional_jump:
5287#if defined (WINDOWSNT) && defined (emacs)
5288	  QUIT;
5289#endif
5290	  EXTRACT_NUMBER_AND_INCR (mcnt, p);	/* Get the amount to jump.  */
5291	  DEBUG_PRINT2 ("EXECUTING jump %d ", mcnt);
5292	  p += mcnt;				/* Do the jump.	 */
5293	  DEBUG_PRINT2 ("(to 0x%x).\n", p);
5294	  break;
5295
5296
5297	/* We need this opcode so we can detect where alternatives end
5298	   in `group_match_null_string_p' et al.  */
5299	case jump_past_alt:
5300	  DEBUG_PRINT1 ("EXECUTING jump_past_alt.\n");
5301	  goto unconditional_jump;
5302
5303
5304	/* Normally, the on_failure_jump pushes a failure point, which
5305	   then gets popped at pop_failure_jump.  We will end up at
5306	   pop_failure_jump, also, and with a pattern of, say, `a+', we
5307	   are skipping over the on_failure_jump, so we have to push
5308	   something meaningless for pop_failure_jump to pop.  */
5309	case dummy_failure_jump:
5310	  DEBUG_PRINT1 ("EXECUTING dummy_failure_jump.\n");
5311	  /* It doesn't matter what we push for the string here.  What
5312	     the code at `fail' tests is the value for the pattern.  */
5313	  PUSH_FAILURE_POINT (0, 0, -2);
5314	  goto unconditional_jump;
5315
5316
5317	/* At the end of an alternative, we need to push a dummy failure
5318	   point in case we are followed by a `pop_failure_jump', because
5319	   we don't want the failure point for the alternative to be
5320	   popped.  For example, matching `(a|ab)*' against `aab'
5321	   requires that we match the `ab' alternative.	 */
5322	case push_dummy_failure:
5323	  DEBUG_PRINT1 ("EXECUTING push_dummy_failure.\n");
5324	  /* See comments just above at `dummy_failure_jump' about the
5325	     two zeroes.  */
5326	  PUSH_FAILURE_POINT (0, 0, -2);
5327	  break;
5328
5329	/* Have to succeed matching what follows at least n times.
5330	   After that, handle like `on_failure_jump'.  */
5331	case succeed_n:
5332	  EXTRACT_NUMBER (mcnt, p + 2);
5333	  DEBUG_PRINT2 ("EXECUTING succeed_n %d.\n", mcnt);
5334
5335	  assert (mcnt >= 0);
5336	  /* Originally, this is how many times we HAVE to succeed.  */
5337	  if (mcnt > 0)
5338	    {
5339	       mcnt--;
5340	       p += 2;
5341	       STORE_NUMBER_AND_INCR (p, mcnt);
5342	       DEBUG_PRINT3 ("	Setting 0x%x to %d.\n", p, mcnt);
5343	    }
5344	  else if (mcnt == 0)
5345	    {
5346	      DEBUG_PRINT2 ("  Setting two bytes from 0x%x to no_op.\n", p+2);
5347	      p[2] = (unsigned char) no_op;
5348	      p[3] = (unsigned char) no_op;
5349	      goto on_failure;
5350	    }
5351	  break;
5352
5353	case jump_n:
5354	  EXTRACT_NUMBER (mcnt, p + 2);
5355	  DEBUG_PRINT2 ("EXECUTING jump_n %d.\n", mcnt);
5356
5357	  /* Originally, this is how many times we CAN jump.  */
5358	  if (mcnt)
5359	    {
5360	       mcnt--;
5361	       STORE_NUMBER (p + 2, mcnt);
5362	       goto unconditional_jump;
5363	    }
5364	  /* If don't have to jump any more, skip over the rest of command.  */
5365	  else
5366	    p += 4;
5367	  break;
5368
5369	case set_number_at:
5370	  {
5371	    DEBUG_PRINT1 ("EXECUTING set_number_at.\n");
5372
5373	    EXTRACT_NUMBER_AND_INCR (mcnt, p);
5374	    p1 = p + mcnt;
5375	    EXTRACT_NUMBER_AND_INCR (mcnt, p);
5376	    DEBUG_PRINT3 ("  Setting 0x%x to %d.\n", p1, mcnt);
5377	    STORE_NUMBER (p1, mcnt);
5378	    break;
5379	  }
5380
5381	case wordbound:
5382	  DEBUG_PRINT1 ("EXECUTING wordbound.\n");
5383
5384	  /* We SUCCEED in one of the following cases: */
5385
5386	  /* Case 1: D is at the beginning or the end of string.  */
5387	  if (AT_STRINGS_BEG (d) || AT_STRINGS_END (d))
5388	    break;
5389	  else
5390	    {
5391	      /* C1 is the character before D, S1 is the syntax of C1, C2
5392		 is the character at D, and S2 is the syntax of C2.  */
5393	      int c1, c2, s1, s2;
5394	      int pos1 = PTR_TO_OFFSET (d - 1);
5395	      int charpos;
5396
5397	      GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2);
5398	      GET_CHAR_AFTER_2 (c2, d, string1, end1, string2, end2);
5399#ifdef emacs
5400	      charpos = SYNTAX_TABLE_BYTE_TO_CHAR (pos1);
5401	      UPDATE_SYNTAX_TABLE (charpos);
5402#endif
5403	      s1 = SYNTAX (c1);
5404#ifdef emacs
5405	      UPDATE_SYNTAX_TABLE_FORWARD (charpos + 1);
5406#endif
5407	      s2 = SYNTAX (c2);
5408
5409	      if (/* Case 2: Only one of S1 and S2 is Sword.  */
5410		  ((s1 == Sword) != (s2 == Sword))
5411		  /* Case 3: Both of S1 and S2 are Sword, and macro
5412		     WORD_BOUNDARY_P (C1, C2) returns nonzero.	*/
5413		  || ((s1 == Sword) && WORD_BOUNDARY_P (c1, c2)))
5414	    break;
5415	}
5416	  goto fail;
5417
5418      case notwordbound:
5419	  DEBUG_PRINT1 ("EXECUTING notwordbound.\n");
5420
5421	  /* We FAIL in one of the following cases: */
5422
5423	  /* Case 1: D is at the beginning or the end of string.  */
5424	  if (AT_STRINGS_BEG (d) || AT_STRINGS_END (d))
5425	    goto fail;
5426	  else
5427	    {
5428	      /* C1 is the character before D, S1 is the syntax of C1, C2
5429		 is the character at D, and S2 is the syntax of C2.  */
5430	      int c1, c2, s1, s2;
5431	      int pos1 = PTR_TO_OFFSET (d - 1);
5432	      int charpos;
5433
5434	      GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2);
5435	      GET_CHAR_AFTER_2 (c2, d, string1, end1, string2, end2);
5436#ifdef emacs
5437	      charpos = SYNTAX_TABLE_BYTE_TO_CHAR (pos1);
5438	      UPDATE_SYNTAX_TABLE (charpos);
5439#endif
5440	      s1 = SYNTAX (c1);
5441#ifdef emacs
5442	      UPDATE_SYNTAX_TABLE_FORWARD (charpos + 1);
5443#endif
5444	      s2 = SYNTAX (c2);
5445
5446	      if (/* Case 2: Only one of S1 and S2 is Sword.  */
5447		  ((s1 == Sword) != (s2 == Sword))
5448		  /* Case 3: Both of S1 and S2 are Sword, and macro
5449		     WORD_BOUNDARY_P (C1, C2) returns nonzero.	*/
5450		  || ((s1 == Sword) && WORD_BOUNDARY_P (c1, c2)))
5451	    goto fail;
5452	}
5453	  break;
5454
5455	case wordbeg:
5456	  DEBUG_PRINT1 ("EXECUTING wordbeg.\n");
5457
5458	  /* We FAIL in one of the following cases: */
5459
5460	  /* Case 1: D is at the end of string.	 */
5461	  if (AT_STRINGS_END (d))
5462	  goto fail;
5463	  else
5464	    {
5465	      /* C1 is the character before D, S1 is the syntax of C1, C2
5466		 is the character at D, and S2 is the syntax of C2.  */
5467	      int c1, c2, s1, s2;
5468	      int pos1 = PTR_TO_OFFSET (d);
5469	      int charpos;
5470
5471	      GET_CHAR_AFTER_2 (c2, d, string1, end1, string2, end2);
5472#ifdef emacs
5473	      charpos = SYNTAX_TABLE_BYTE_TO_CHAR (pos1);
5474	      UPDATE_SYNTAX_TABLE (charpos);
5475#endif
5476	      s2 = SYNTAX (c2);
5477
5478	      /* Case 2: S2 is not Sword. */
5479	      if (s2 != Sword)
5480		goto fail;
5481
5482	      /* Case 3: D is not at the beginning of string ... */
5483	      if (!AT_STRINGS_BEG (d))
5484		{
5485		  GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2);
5486#ifdef emacs
5487		  UPDATE_SYNTAX_TABLE_BACKWARD (charpos - 1);
5488#endif
5489		  s1 = SYNTAX (c1);
5490
5491		  /* ... and S1 is Sword, and WORD_BOUNDARY_P (C1, C2)
5492		     returns 0.	 */
5493		  if ((s1 == Sword) && !WORD_BOUNDARY_P (c1, c2))
5494		    goto fail;
5495		}
5496	    }
5497	  break;
5498
5499	case wordend:
5500	  DEBUG_PRINT1 ("EXECUTING wordend.\n");
5501
5502	  /* We FAIL in one of the following cases: */
5503
5504	  /* Case 1: D is at the beginning of string.  */
5505	  if (AT_STRINGS_BEG (d))
5506	    goto fail;
5507	  else
5508	    {
5509	      /* C1 is the character before D, S1 is the syntax of C1, C2
5510		 is the character at D, and S2 is the syntax of C2.  */
5511	      int c1, c2, s1, s2;
5512	      int pos1 = PTR_TO_OFFSET (d);
5513	      int charpos;
5514
5515	      GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2);
5516#ifdef emacs
5517	      charpos = SYNTAX_TABLE_BYTE_TO_CHAR (pos1 - 1);
5518	      UPDATE_SYNTAX_TABLE (charpos);
5519#endif
5520	      s1 = SYNTAX (c1);
5521
5522	      /* Case 2: S1 is not Sword.  */
5523	      if (s1 != Sword)
5524		goto fail;
5525
5526	      /* Case 3: D is not at the end of string ... */
5527	      if (!AT_STRINGS_END (d))
5528		{
5529		  GET_CHAR_AFTER_2 (c2, d, string1, end1, string2, end2);
5530#ifdef emacs
5531		  UPDATE_SYNTAX_TABLE_FORWARD (charpos);
5532#endif
5533		  s2 = SYNTAX (c2);
5534
5535		  /* ... and S2 is Sword, and WORD_BOUNDARY_P (C1, C2)
5536		     returns 0.	 */
5537		  if ((s2 == Sword) && !WORD_BOUNDARY_P (c1, c2))
5538	  goto fail;
5539		}
5540	    }
5541	  break;
5542
5543#ifdef emacs
5544	case before_dot:
5545	  DEBUG_PRINT1 ("EXECUTING before_dot.\n");
5546	  if (PTR_BYTE_POS ((unsigned char *) d) >= PT_BYTE)
5547	    goto fail;
5548	  break;
5549
5550	case at_dot:
5551	  DEBUG_PRINT1 ("EXECUTING at_dot.\n");
5552	  if (PTR_BYTE_POS ((unsigned char *) d) != PT_BYTE)
5553	    goto fail;
5554	  break;
5555
5556	case after_dot:
5557	  DEBUG_PRINT1 ("EXECUTING after_dot.\n");
5558	  if (PTR_BYTE_POS ((unsigned char *) d) <= PT_BYTE)
5559	    goto fail;
5560	  break;
5561
5562	case syntaxspec:
5563	  DEBUG_PRINT2 ("EXECUTING syntaxspec %d.\n", mcnt);
5564	  mcnt = *p++;
5565	  goto matchsyntax;
5566
5567	case wordchar:
5568	  DEBUG_PRINT1 ("EXECUTING Emacs wordchar.\n");
5569	  mcnt = (int) Sword;
5570	matchsyntax:
5571	  PREFETCH ();
5572#ifdef emacs
5573	  {
5574	    int pos1 = SYNTAX_TABLE_BYTE_TO_CHAR (PTR_TO_OFFSET (d));
5575	    UPDATE_SYNTAX_TABLE (pos1);
5576	  }
5577#endif
5578	  {
5579	    int c, len;
5580
5581	    if (multibyte)
5582	      /* we must concern about multibyte form, ... */
5583	      c = STRING_CHAR_AND_LENGTH (d, dend - d, len);
5584	    else
5585	      /* everything should be handled as ASCII, even though it
5586		 looks like multibyte form.  */
5587	      c = *d, len = 1;
5588
5589	    if (SYNTAX (c) != (enum syntaxcode) mcnt)
5590	    goto fail;
5591	    d += len;
5592	  }
5593	  SET_REGS_MATCHED ();
5594	  break;
5595
5596	case notsyntaxspec:
5597	  DEBUG_PRINT2 ("EXECUTING notsyntaxspec %d.\n", mcnt);
5598	  mcnt = *p++;
5599	  goto matchnotsyntax;
5600
5601	case notwordchar:
5602	  DEBUG_PRINT1 ("EXECUTING Emacs notwordchar.\n");
5603	  mcnt = (int) Sword;
5604	matchnotsyntax:
5605	  PREFETCH ();
5606#ifdef emacs
5607	  {
5608	    int pos1 = SYNTAX_TABLE_BYTE_TO_CHAR (PTR_TO_OFFSET (d));
5609	    UPDATE_SYNTAX_TABLE (pos1);
5610	  }
5611#endif
5612	  {
5613	    int c, len;
5614
5615	    if (multibyte)
5616	      c = STRING_CHAR_AND_LENGTH (d, dend - d, len);
5617	    else
5618	      c = *d, len = 1;
5619
5620	    if (SYNTAX (c) == (enum syntaxcode) mcnt)
5621	    goto fail;
5622	    d += len;
5623	  }
5624	  SET_REGS_MATCHED ();
5625	  break;
5626
5627	case categoryspec:
5628	  DEBUG_PRINT2 ("EXECUTING categoryspec %d.\n", *p);
5629	  mcnt = *p++;
5630	  PREFETCH ();
5631	  {
5632	    int c, len;
5633
5634	    if (multibyte)
5635	      c = STRING_CHAR_AND_LENGTH (d, dend - d, len);
5636	    else
5637	      c = *d, len = 1;
5638
5639	    if (!CHAR_HAS_CATEGORY (c, mcnt))
5640	      goto fail;
5641	    d += len;
5642	  }
5643	  SET_REGS_MATCHED ();
5644	  break;
5645
5646	case notcategoryspec:
5647	  DEBUG_PRINT2 ("EXECUTING notcategoryspec %d.\n", *p);
5648	  mcnt = *p++;
5649	  PREFETCH ();
5650	  {
5651	    int c, len;
5652
5653	    if (multibyte)
5654	      c = STRING_CHAR_AND_LENGTH (d, dend - d, len);
5655	    else
5656	      c = *d, len = 1;
5657
5658	    if (CHAR_HAS_CATEGORY (c, mcnt))
5659	      goto fail;
5660	    d += len;
5661	  }
5662	  SET_REGS_MATCHED ();
5663          break;
5664
5665#else /* not emacs */
5666	case wordchar:
5667          DEBUG_PRINT1 ("EXECUTING non-Emacs wordchar.\n");
5668	  PREFETCH ();
5669          if (!WORDCHAR_P (d))
5670            goto fail;
5671	  SET_REGS_MATCHED ();
5672          d++;
5673	  break;
5674
5675	case notwordchar:
5676          DEBUG_PRINT1 ("EXECUTING non-Emacs notwordchar.\n");
5677	  PREFETCH ();
5678	  if (WORDCHAR_P (d))
5679            goto fail;
5680          SET_REGS_MATCHED ();
5681          d++;
5682	  break;
5683#endif /* not emacs */
5684
5685        default:
5686          abort ();
5687	}
5688      continue;  /* Successfully executed one pattern command; keep going.  */
5689
5690
5691    /* We goto here if a matching operation fails. */
5692    fail:
5693#if defined (WINDOWSNT) && defined (emacs)
5694      QUIT;
5695#endif
5696      if (!FAIL_STACK_EMPTY ())
5697	{ /* A restart point is known.  Restore to that state.  */
5698          DEBUG_PRINT1 ("\nFAIL:\n");
5699          POP_FAILURE_POINT (d, p,
5700                             lowest_active_reg, highest_active_reg,
5701                             regstart, regend, reg_info);
5702
5703          /* If this failure point is a dummy, try the next one.  */
5704          if (!p)
5705	    goto fail;
5706
5707          /* If we failed to the end of the pattern, don't examine *p.  */
5708	  assert (p <= pend);
5709          if (p < pend)
5710            {
5711              boolean is_a_jump_n = false;
5712
5713              /* If failed to a backwards jump that's part of a repetition
5714                 loop, need to pop this failure point and use the next one.  */
5715              switch ((re_opcode_t) *p)
5716                {
5717                case jump_n:
5718                  is_a_jump_n = true;
5719                case maybe_pop_jump:
5720                case pop_failure_jump:
5721                case jump:
5722                  p1 = p + 1;
5723                  EXTRACT_NUMBER_AND_INCR (mcnt, p1);
5724                  p1 += mcnt;
5725
5726                  if ((is_a_jump_n && (re_opcode_t) *p1 == succeed_n)
5727                      || (!is_a_jump_n
5728                          && (re_opcode_t) *p1 == on_failure_jump))
5729                    goto fail;
5730                  break;
5731                default:
5732                  /* do nothing */ ;
5733                }
5734            }
5735
5736          if (d >= string1 && d <= end1)
5737	    dend = end_match_1;
5738        }
5739      else
5740        break;   /* Matching at this starting point really fails.  */
5741    } /* for (;;) */
5742
5743  if (best_regs_set)
5744    goto restore_best_regs;
5745
5746  FREE_VARIABLES ();
5747
5748  return -1;         			/* Failure to match.  */
5749} /* re_match_2 */
5750
5751/* Subroutine definitions for re_match_2.  */
5752
5753
5754/* We are passed P pointing to a register number after a start_memory.
5755
5756   Return true if the pattern up to the corresponding stop_memory can
5757   match the empty string, and false otherwise.
5758
5759   If we find the matching stop_memory, sets P to point to one past its number.
5760   Otherwise, sets P to an undefined byte less than or equal to END.
5761
5762   We don't handle duplicates properly (yet).  */
5763
5764static boolean
5765group_match_null_string_p (p, end, reg_info)
5766    unsigned char **p, *end;
5767    register_info_type *reg_info;
5768{
5769  int mcnt;
5770  /* Point to after the args to the start_memory.  */
5771  unsigned char *p1 = *p + 2;
5772
5773  while (p1 < end)
5774    {
5775      /* Skip over opcodes that can match nothing, and return true or
5776	 false, as appropriate, when we get to one that can't, or to the
5777         matching stop_memory.  */
5778
5779      switch ((re_opcode_t) *p1)
5780        {
5781        /* Could be either a loop or a series of alternatives.  */
5782        case on_failure_jump:
5783          p1++;
5784          EXTRACT_NUMBER_AND_INCR (mcnt, p1);
5785
5786          /* If the next operation is not a jump backwards in the
5787	     pattern.  */
5788
5789	  if (mcnt >= 0)
5790	    {
5791              /* Go through the on_failure_jumps of the alternatives,
5792                 seeing if any of the alternatives cannot match nothing.
5793                 The last alternative starts with only a jump,
5794                 whereas the rest start with on_failure_jump and end
5795                 with a jump, e.g., here is the pattern for `a|b|c':
5796
5797                 /on_failure_jump/0/6/exactn/1/a/jump_past_alt/0/6
5798                 /on_failure_jump/0/6/exactn/1/b/jump_past_alt/0/3
5799                 /exactn/1/c
5800
5801                 So, we have to first go through the first (n-1)
5802                 alternatives and then deal with the last one separately.  */
5803
5804
5805              /* Deal with the first (n-1) alternatives, which start
5806                 with an on_failure_jump (see above) that jumps to right
5807                 past a jump_past_alt.  */
5808
5809              while ((re_opcode_t) p1[mcnt-3] == jump_past_alt)
5810                {
5811                  /* `mcnt' holds how many bytes long the alternative
5812                     is, including the ending `jump_past_alt' and
5813                     its number.  */
5814
5815                  if (!alt_match_null_string_p (p1, p1 + mcnt - 3,
5816				                      reg_info))
5817                    return false;
5818
5819                  /* Move to right after this alternative, including the
5820		     jump_past_alt.  */
5821                  p1 += mcnt;
5822
5823                  /* Break if it's the beginning of an n-th alternative
5824                     that doesn't begin with an on_failure_jump.  */
5825                  if ((re_opcode_t) *p1 != on_failure_jump)
5826                    break;
5827
5828		  /* Still have to check that it's not an n-th
5829		     alternative that starts with an on_failure_jump.  */
5830		  p1++;
5831                  EXTRACT_NUMBER_AND_INCR (mcnt, p1);
5832                  if ((re_opcode_t) p1[mcnt-3] != jump_past_alt)
5833                    {
5834		      /* Get to the beginning of the n-th alternative.  */
5835                      p1 -= 3;
5836                      break;
5837                    }
5838                }
5839
5840              /* Deal with the last alternative: go back and get number
5841                 of the `jump_past_alt' just before it.  `mcnt' contains
5842                 the length of the alternative.  */
5843              EXTRACT_NUMBER (mcnt, p1 - 2);
5844
5845              if (!alt_match_null_string_p (p1, p1 + mcnt, reg_info))
5846                return false;
5847
5848              p1 += mcnt;	/* Get past the n-th alternative.  */
5849            } /* if mcnt > 0 */
5850          break;
5851
5852
5853        case stop_memory:
5854	  assert (p1[1] == **p);
5855          *p = p1 + 2;
5856          return true;
5857
5858
5859        default:
5860          if (!common_op_match_null_string_p (&p1, end, reg_info))
5861            return false;
5862        }
5863    } /* while p1 < end */
5864
5865  return false;
5866} /* group_match_null_string_p */
5867
5868
5869/* Similar to group_match_null_string_p, but doesn't deal with alternatives:
5870   It expects P to be the first byte of a single alternative and END one
5871   byte past the last. The alternative can contain groups.  */
5872
5873static boolean
5874alt_match_null_string_p (p, end, reg_info)
5875    unsigned char *p, *end;
5876    register_info_type *reg_info;
5877{
5878  int mcnt;
5879  unsigned char *p1 = p;
5880
5881  while (p1 < end)
5882    {
5883      /* Skip over opcodes that can match nothing, and break when we get
5884         to one that can't.  */
5885
5886      switch ((re_opcode_t) *p1)
5887        {
5888	/* It's a loop.  */
5889        case on_failure_jump:
5890          p1++;
5891          EXTRACT_NUMBER_AND_INCR (mcnt, p1);
5892          p1 += mcnt;
5893          break;
5894
5895	default:
5896          if (!common_op_match_null_string_p (&p1, end, reg_info))
5897            return false;
5898        }
5899    }  /* while p1 < end */
5900
5901  return true;
5902} /* alt_match_null_string_p */
5903
5904
5905/* Deals with the ops common to group_match_null_string_p and
5906   alt_match_null_string_p.
5907
5908   Sets P to one after the op and its arguments, if any.  */
5909
5910static boolean
5911common_op_match_null_string_p (p, end, reg_info)
5912    unsigned char **p, *end;
5913    register_info_type *reg_info;
5914{
5915  int mcnt;
5916  boolean ret;
5917  int reg_no;
5918  unsigned char *p1 = *p;
5919
5920  switch ((re_opcode_t) *p1++)
5921    {
5922    case no_op:
5923    case begline:
5924    case endline:
5925    case begbuf:
5926    case endbuf:
5927    case wordbeg:
5928    case wordend:
5929    case wordbound:
5930    case notwordbound:
5931#ifdef emacs
5932    case before_dot:
5933    case at_dot:
5934    case after_dot:
5935#endif
5936      break;
5937
5938    case start_memory:
5939      reg_no = *p1;
5940      assert (reg_no > 0 && reg_no <= MAX_REGNUM);
5941      ret = group_match_null_string_p (&p1, end, reg_info);
5942
5943      /* Have to set this here in case we're checking a group which
5944         contains a group and a back reference to it.  */
5945
5946      if (REG_MATCH_NULL_STRING_P (reg_info[reg_no]) == MATCH_NULL_UNSET_VALUE)
5947        REG_MATCH_NULL_STRING_P (reg_info[reg_no]) = ret;
5948
5949      if (!ret)
5950        return false;
5951      break;
5952
5953    /* If this is an optimized succeed_n for zero times, make the jump.  */
5954    case jump:
5955      EXTRACT_NUMBER_AND_INCR (mcnt, p1);
5956      if (mcnt >= 0)
5957        p1 += mcnt;
5958      else
5959        return false;
5960      break;
5961
5962    case succeed_n:
5963      /* Get to the number of times to succeed.  */
5964      p1 += 2;
5965      EXTRACT_NUMBER_AND_INCR (mcnt, p1);
5966
5967      if (mcnt == 0)
5968        {
5969          p1 -= 4;
5970          EXTRACT_NUMBER_AND_INCR (mcnt, p1);
5971          p1 += mcnt;
5972        }
5973      else
5974        return false;
5975      break;
5976
5977    case duplicate:
5978      if (!REG_MATCH_NULL_STRING_P (reg_info[*p1]))
5979        return false;
5980      break;
5981
5982    case set_number_at:
5983      p1 += 4;
5984
5985    default:
5986      /* All other opcodes mean we cannot match the empty string.  */
5987      return false;
5988  }
5989
5990  *p = p1;
5991  return true;
5992} /* common_op_match_null_string_p */
5993
5994
5995/* Return zero if TRANSLATE[S1] and TRANSLATE[S2] are identical for LEN
5996   bytes; nonzero otherwise.  */
5997
5998static int
5999bcmp_translate (s1, s2, len, translate)
6000     unsigned char *s1, *s2;
6001     register int len;
6002     RE_TRANSLATE_TYPE translate;
6003{
6004  register unsigned char *p1 = s1, *p2 = s2;
6005  unsigned char *p1_end = s1 + len;
6006  unsigned char *p2_end = s2 + len;
6007
6008  while (p1 != p1_end && p2 != p2_end)
6009    {
6010      int p1_charlen, p2_charlen;
6011      int p1_ch, p2_ch;
6012
6013      p1_ch = STRING_CHAR_AND_LENGTH (p1, p1_end - p1, p1_charlen);
6014      p2_ch = STRING_CHAR_AND_LENGTH (p2, p2_end - p2, p2_charlen);
6015
6016      if (RE_TRANSLATE (translate, p1_ch)
6017	  != RE_TRANSLATE (translate, p2_ch))
6018	return 1;
6019
6020      p1 += p1_charlen, p2 += p2_charlen;
6021    }
6022
6023  if (p1 != p1_end || p2 != p2_end)
6024    return 1;
6025
6026  return 0;
6027}
6028
6029/* Entry points for GNU code.  */
6030
6031/* re_compile_pattern is the GNU regular expression compiler: it
6032   compiles PATTERN (of length SIZE) and puts the result in BUFP.
6033   Returns 0 if the pattern was valid, otherwise an error string.
6034
6035   Assumes the `allocated' (and perhaps `buffer') and `translate' fields
6036   are set in BUFP on entry.
6037
6038   We call regex_compile to do the actual compilation.  */
6039
6040const char *
6041re_compile_pattern (pattern, length, bufp)
6042     const char *pattern;
6043     int length;
6044     struct re_pattern_buffer *bufp;
6045{
6046  reg_errcode_t ret;
6047
6048  /* GNU code is written to assume at least RE_NREGS registers will be set
6049     (and at least one extra will be -1).  */
6050  bufp->regs_allocated = REGS_UNALLOCATED;
6051
6052  /* And GNU code determines whether or not to get register information
6053     by passing null for the REGS argument to re_match, etc., not by
6054     setting no_sub.  */
6055  bufp->no_sub = 0;
6056
6057  /* Match anchors at newline.  */
6058  bufp->newline_anchor = 1;
6059
6060  ret = regex_compile (pattern, length, re_syntax_options, bufp);
6061
6062  if (!ret)
6063    return NULL;
6064  return gettext (re_error_msgid[(int) ret]);
6065}
6066
6067/* Entry points compatible with 4.2 BSD regex library.  We don't define
6068   them unless specifically requested.  */
6069
6070#if defined (_REGEX_RE_COMP) || defined (_LIBC)
6071
6072/* BSD has one and only one pattern buffer.  */
6073static struct re_pattern_buffer re_comp_buf;
6074
6075char *
6076#ifdef _LIBC
6077/* Make these definitions weak in libc, so POSIX programs can redefine
6078   these names if they don't use our functions, and still use
6079   regcomp/regexec below without link errors.  */
6080weak_function
6081#endif
6082re_comp (s)
6083    const char *s;
6084{
6085  reg_errcode_t ret;
6086
6087  if (!s)
6088    {
6089      if (!re_comp_buf.buffer)
6090	return gettext ("No previous regular expression");
6091      return 0;
6092    }
6093
6094  if (!re_comp_buf.buffer)
6095    {
6096      re_comp_buf.buffer = (unsigned char *) malloc (200);
6097      if (re_comp_buf.buffer == NULL)
6098        /* CVS: Yes, we're discarding `const' here if !HAVE_LIBINTL.  */
6099        return (char *) gettext (re_error_msgid[(int) REG_ESPACE]);
6100      re_comp_buf.allocated = 200;
6101
6102      re_comp_buf.fastmap = (char *) malloc (1 << BYTEWIDTH);
6103      if (re_comp_buf.fastmap == NULL)
6104	/* CVS: Yes, we're discarding `const' here if !HAVE_LIBINTL.  */
6105	return (char *) gettext (re_error_msgid[(int) REG_ESPACE]);
6106    }
6107
6108  /* Since `re_exec' always passes NULL for the `regs' argument, we
6109     don't need to initialize the pattern buffer fields which affect it.  */
6110
6111  /* Match anchors at newlines.  */
6112  re_comp_buf.newline_anchor = 1;
6113
6114  ret = regex_compile (s, strlen (s), re_syntax_options, &re_comp_buf);
6115
6116  if (!ret)
6117    return NULL;
6118
6119  /* Yes, we're discarding `const' here if !HAVE_LIBINTL.  */
6120  return (char *) gettext (re_error_msgid[(int) ret]);
6121}
6122
6123
6124int
6125#ifdef _LIBC
6126weak_function
6127#endif
6128re_exec (s)
6129    const char *s;
6130{
6131  const int len = strlen (s);
6132  return
6133    0 <= re_search (&re_comp_buf, s, len, 0, len, (struct re_registers *) 0);
6134}
6135#endif /* _REGEX_RE_COMP */
6136
6137/* POSIX.2 functions.  Don't define these for Emacs.  */
6138
6139#ifndef emacs
6140
6141/* regcomp takes a regular expression as a string and compiles it.
6142
6143   PREG is a regex_t *.  We do not expect any fields to be initialized,
6144   since POSIX says we shouldn't.  Thus, we set
6145
6146     `buffer' to the compiled pattern;
6147     `used' to the length of the compiled pattern;
6148     `syntax' to RE_SYNTAX_POSIX_EXTENDED if the
6149       REG_EXTENDED bit in CFLAGS is set; otherwise, to
6150       RE_SYNTAX_POSIX_BASIC;
6151     `newline_anchor' to REG_NEWLINE being set in CFLAGS;
6152     `fastmap' and `fastmap_accurate' to zero;
6153     `re_nsub' to the number of subexpressions in PATTERN.
6154
6155   PATTERN is the address of the pattern string.
6156
6157   CFLAGS is a series of bits which affect compilation.
6158
6159     If REG_EXTENDED is set, we use POSIX extended syntax; otherwise, we
6160     use POSIX basic syntax.
6161
6162     If REG_NEWLINE is set, then . and [^...] don't match newline.
6163     Also, regexec will try a match beginning after every newline.
6164
6165     If REG_ICASE is set, then we considers upper- and lowercase
6166     versions of letters to be equivalent when matching.
6167
6168     If REG_NOSUB is set, then when PREG is passed to regexec, that
6169     routine will report only success or failure, and nothing about the
6170     registers.
6171
6172   It returns 0 if it succeeds, nonzero if it doesn't.  (See regex.h for
6173   the return codes and their meanings.)  */
6174
6175int
6176regcomp (preg, pattern, cflags)
6177    regex_t *preg;
6178    const char *pattern;
6179    int cflags;
6180{
6181  reg_errcode_t ret;
6182  unsigned syntax
6183    = (cflags & REG_EXTENDED) ?
6184      RE_SYNTAX_POSIX_EXTENDED : RE_SYNTAX_POSIX_BASIC;
6185
6186  /* regex_compile will allocate the space for the compiled pattern.  */
6187  preg->buffer = 0;
6188  preg->allocated = 0;
6189  preg->used = 0;
6190
6191  /* Don't bother to use a fastmap when searching.  This simplifies the
6192     REG_NEWLINE case: if we used a fastmap, we'd have to put all the
6193     characters after newlines into the fastmap.  This way, we just try
6194     every character.  */
6195  preg->fastmap = 0;
6196
6197  if (cflags & REG_ICASE)
6198    {
6199      unsigned i;
6200
6201      preg->translate
6202	= (RE_TRANSLATE_TYPE) malloc (CHAR_SET_SIZE
6203				      * sizeof (*(RE_TRANSLATE_TYPE)0));
6204      if (preg->translate == NULL)
6205        return (int) REG_ESPACE;
6206
6207      /* Map uppercase characters to corresponding lowercase ones.  */
6208      for (i = 0; i < CHAR_SET_SIZE; i++)
6209        preg->translate[i] = ISUPPER (i) ? tolower (i) : i;
6210    }
6211  else
6212    preg->translate = NULL;
6213
6214  /* If REG_NEWLINE is set, newlines are treated differently.  */
6215  if (cflags & REG_NEWLINE)
6216    { /* REG_NEWLINE implies neither . nor [^...] match newline.  */
6217      syntax &= ~RE_DOT_NEWLINE;
6218      syntax |= RE_HAT_LISTS_NOT_NEWLINE;
6219      /* It also changes the matching behavior.  */
6220      preg->newline_anchor = 1;
6221    }
6222  else
6223    preg->newline_anchor = 0;
6224
6225  preg->no_sub = !!(cflags & REG_NOSUB);
6226
6227  /* POSIX says a null character in the pattern terminates it, so we
6228     can use strlen here in compiling the pattern.  */
6229  ret = regex_compile (pattern, strlen (pattern), syntax, preg);
6230
6231  /* POSIX doesn't distinguish between an unmatched open-group and an
6232     unmatched close-group: both are REG_EPAREN.  */
6233  if (ret == REG_ERPAREN) ret = REG_EPAREN;
6234
6235  return (int) ret;
6236}
6237
6238
6239/* regexec searches for a given pattern, specified by PREG, in the
6240   string STRING.
6241
6242   If NMATCH is zero or REG_NOSUB was set in the cflags argument to
6243   `regcomp', we ignore PMATCH.  Otherwise, we assume PMATCH has at
6244   least NMATCH elements, and we set them to the offsets of the
6245   corresponding matched substrings.
6246
6247   EFLAGS specifies `execution flags' which affect matching: if
6248   REG_NOTBOL is set, then ^ does not match at the beginning of the
6249   string; if REG_NOTEOL is set, then $ does not match at the end.
6250
6251   We return 0 if we find a match and REG_NOMATCH if not.  */
6252
6253int
6254regexec (preg, string, nmatch, pmatch, eflags)
6255    const regex_t *preg;
6256    const char *string;
6257    size_t nmatch;
6258    regmatch_t pmatch[];
6259    int eflags;
6260{
6261  int ret;
6262  struct re_registers regs;
6263  regex_t private_preg;
6264  int len = strlen (string);
6265  boolean want_reg_info = !preg->no_sub && nmatch > 0;
6266
6267  private_preg = *preg;
6268
6269  private_preg.not_bol = !!(eflags & REG_NOTBOL);
6270  private_preg.not_eol = !!(eflags & REG_NOTEOL);
6271
6272  /* The user has told us exactly how many registers to return
6273     information about, via `nmatch'.  We have to pass that on to the
6274     matching routines.  */
6275  private_preg.regs_allocated = REGS_FIXED;
6276
6277  if (want_reg_info)
6278    {
6279      regs.num_regs = nmatch;
6280      regs.start = TALLOC (nmatch, regoff_t);
6281      regs.end = TALLOC (nmatch, regoff_t);
6282      if (regs.start == NULL || regs.end == NULL)
6283        return (int) REG_NOMATCH;
6284    }
6285
6286  /* Perform the searching operation.  */
6287  ret = re_search (&private_preg, string, len,
6288                   /* start: */ 0, /* range: */ len,
6289                   want_reg_info ? &regs : (struct re_registers *) 0);
6290
6291  /* Copy the register information to the POSIX structure.  */
6292  if (want_reg_info)
6293    {
6294      if (ret >= 0)
6295        {
6296          unsigned r;
6297
6298          for (r = 0; r < nmatch; r++)
6299            {
6300              pmatch[r].rm_so = regs.start[r];
6301              pmatch[r].rm_eo = regs.end[r];
6302            }
6303        }
6304
6305      /* If we needed the temporary register info, free the space now.  */
6306      free (regs.start);
6307      free (regs.end);
6308    }
6309
6310  /* We want zero return to mean success, unlike `re_search'.  */
6311  return ret >= 0 ? (int) REG_NOERROR : (int) REG_NOMATCH;
6312}
6313
6314
6315/* Returns a message corresponding to an error code, ERRCODE, returned
6316   from either regcomp or regexec.   We don't use PREG here.  */
6317
6318size_t
6319regerror (errcode, preg, errbuf, errbuf_size)
6320    int errcode;
6321    const regex_t *preg;
6322    char *errbuf;
6323    size_t errbuf_size;
6324{
6325  const char *msg;
6326  size_t msg_size;
6327
6328  if (errcode < 0
6329      || errcode >= (sizeof (re_error_msgid) / sizeof (re_error_msgid[0])))
6330    /* Only error codes returned by the rest of the code should be passed
6331       to this routine.  If we are given anything else, or if other regex
6332       code generates an invalid error code, then the program has a bug.
6333       Dump core so we can fix it.  */
6334    abort ();
6335
6336  msg = gettext (re_error_msgid[errcode]);
6337
6338  msg_size = strlen (msg) + 1; /* Includes the null.  */
6339
6340  if (errbuf_size != 0)
6341    {
6342      if (msg_size > errbuf_size)
6343        {
6344          strncpy (errbuf, msg, errbuf_size - 1);
6345          errbuf[errbuf_size - 1] = 0;
6346        }
6347      else
6348        strcpy (errbuf, msg);
6349    }
6350
6351  return msg_size;
6352}
6353
6354
6355/* Free dynamically allocated space used by PREG.  */
6356
6357void
6358regfree (preg)
6359    regex_t *preg;
6360{
6361  if (preg->buffer != NULL)
6362    free (preg->buffer);
6363  preg->buffer = NULL;
6364
6365  preg->allocated = 0;
6366  preg->used = 0;
6367
6368  if (preg->fastmap != NULL)
6369    free (preg->fastmap);
6370  preg->fastmap = NULL;
6371  preg->fastmap_accurate = 0;
6372
6373  if (preg->translate != NULL)
6374    free (preg->translate);
6375  preg->translate = NULL;
6376}
6377
6378#endif /* not emacs  */
6379