1/*
2 * regex.c -- POSIX-conformant regular expression function set for the lsof
3 *	      library
4 *
5 * This file is used when the UNIX dialect does not have a POSIX-conformant
6 * regular expression function set.  In that case USE_LIB_REGEX is defined.
7 *
8 * V. Abell <abe@purdue.edu>
9 * Purdue University Computing Center
10 */
11
12
13/*
14 * Copyright 2000 Purdue Research Foundation, West Lafayette, Indiana
15 * 47907.  All rights reserved.
16 *
17 * Written by Victor A. Abell
18 *
19 * This software is not subject to any license of the American Telephone
20 * and Telegraph Company or the Regents of the University of California.
21 *
22 * This software has been adapted from snprintf.c in sendmail 8.9.3.  It
23 * is subject to the sendmail copyright statements listed below, and the
24 * sendmail licensing terms stated in the sendmail LICENSE file comment
25 * section of this file.
26 *
27 * Permission is granted to anyone to use this software for any purpose on
28 * any computer system, and to alter it and redistribute it freely, subject
29 * to the following restrictions:
30 *
31 * 1. Neither the authors nor Purdue University are responsible for any
32 *    consequences of the use of this software.
33 *
34 * 2. The origin of this software must not be misrepresented, either by
35 *    explicit claim or by omission.  Credit to the authors and Purdue
36 *    University must appear in documentation and sources.
37 *
38 * 3. Altered versions must be plainly marked as such, and must not be
39 *    misrepresented as being the original software.
40 *
41 * 4. This notice may not be removed or altered.
42 */
43
44
45#include "../machine.h"
46
47#ifdef	USE_LIB_REGEX
48/*
49 * This file comes from GLIBC 2.2.  It is used when the UNIX dialect does not
50 * have a POSIX-conformant regular expression function set.  In that case
51 * USE_LIB_REGEX is defined.
52 */
53
54/* Extended regular expression matching and search library,
55   version 0.12.
56   (Implements POSIX draft P1003.2/D11.2, except for some of the
57   internationalization features.)
58   Copyright (C) 1993-1999, 2000 Free Software Foundation, Inc.
59
60   The GNU C Library is free software; you can redistribute it and/or
61   modify it under the terms of the GNU Library General Public License as
62   published by the Free Software Foundation; either version 2 of the
63   License, or (at your option) any later version.
64
65   The GNU C Library is distributed in the hope that it will be useful,
66   but WITHOUT ANY WARRANTY; without even the implied warranty of
67   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
68   Library General Public License for more details.
69
70   You should have received a copy of the GNU Library General Public
71   License along with the GNU C Library; see the file COPYING.LIB.  If not,
72   write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
73   Boston, MA 02111-1307, USA.  */
74
75/* AIX requires this to be the first thing in the file. */
76#if defined _AIX && !defined REGEX_MALLOC
77  #pragma alloca
78#endif
79
80#undef	_GNU_SOURCE
81#define _GNU_SOURCE
82
83#ifdef HAVE_CONFIG_H
84# include <config.h>
85#endif
86
87#ifndef PARAMS
88# if defined __GNUC__ || (defined __STDC__ && __STDC__)
89#  define PARAMS(args) args
90# else
91#  define PARAMS(args) ()
92# endif  /* GCC.  */
93#endif  /* Not PARAMS.  */
94
95#if defined STDC_HEADERS && !defined emacs
96# include <stddef.h>
97#else
98/* We need this for `regex.h', and perhaps for the Emacs include files.  */
99# include <sys/types.h>
100#endif
101
102#define WIDE_CHAR_SUPPORT (HAVE_WCTYPE_H && HAVE_WCHAR_H && HAVE_BTOWC)
103
104/* For platform which support the ISO C amendement 1 functionality we
105   support user defined character classes.  */
106#if defined _LIBC || WIDE_CHAR_SUPPORT
107/* Solaris 2.5 has a bug: <wchar.h> must be included before <wctype.h>.  */
108# include <wchar.h>
109# include <wctype.h>
110#endif
111
112#ifdef _LIBC
113/* We have to keep the namespace clean.  */
114# define regfree(preg) __regfree (preg)
115# define regexec(pr, st, nm, pm, ef) __regexec (pr, st, nm, pm, ef)
116# define regcomp(preg, pattern, cflags) __regcomp (preg, pattern, cflags)
117# define regerror(errcode, preg, errbuf, errbuf_size) \
118	__regerror(errcode, preg, errbuf, errbuf_size)
119# define re_set_registers(bu, re, nu, st, en) \
120	__re_set_registers (bu, re, nu, st, en)
121# define re_match_2(bufp, string1, size1, string2, size2, pos, regs, stop) \
122	__re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
123# define re_match(bufp, string, size, pos, regs) \
124	__re_match (bufp, string, size, pos, regs)
125# define re_search(bufp, string, size, startpos, range, regs) \
126	__re_search (bufp, string, size, startpos, range, regs)
127# define re_compile_pattern(pattern, length, bufp) \
128	__re_compile_pattern (pattern, length, bufp)
129# define re_set_syntax(syntax) __re_set_syntax (syntax)
130# define re_search_2(bufp, st1, s1, st2, s2, startpos, range, regs, stop) \
131	__re_search_2 (bufp, st1, s1, st2, s2, startpos, range, regs, stop)
132# define re_compile_fastmap(bufp) __re_compile_fastmap (bufp)
133
134# define btowc __btowc
135
136/* We are also using some library internals.  */
137# include <locale/localeinfo.h>
138# include <locale/elem-hash.h>
139# include <langinfo.h>
140#endif
141
142/* This is for other GNU distributions with internationalized messages.  */
143#if HAVE_LIBINTL_H || defined _LIBC
144# include <libintl.h>
145# ifdef _LIBC
146#  undef gettext
147#  define gettext(msgid) __dcgettext ("libc", msgid, LC_MESSAGES)
148# endif
149#else
150# define gettext(msgid) (msgid)
151#endif
152
153#ifndef gettext_noop
154/* This define is so xgettext can find the internationalizable
155   strings.  */
156# define gettext_noop(String) String
157#endif
158
159/* The `emacs' switch turns on certain matching commands
160   that make sense only in Emacs. */
161#ifdef emacs
162
163# include "lisp.h"
164# include "buffer.h"
165# include "syntax.h"
166
167#else  /* not emacs */
168
169/* If we are not linking with Emacs proper,
170   we can't use the relocating allocator
171   even if config.h says that we can.  */
172# undef REL_ALLOC
173
174# if defined STDC_HEADERS || defined _LIBC
175#  include <stdlib.h>
176# else
177char *malloc ();
178char *realloc ();
179# endif
180
181/* When used in Emacs's lib-src, we need to get bzero and bcopy somehow.
182   If nothing else has been done, use the method below.  */
183# ifdef INHIBIT_STRING_HEADER
184#  if !(defined HAVE_BZERO && defined HAVE_BCOPY)
185#   if !defined bzero && !defined bcopy
186#    undef INHIBIT_STRING_HEADER
187#   endif
188#  endif
189# endif
190
191/* This is the normal way of making sure we have a bcopy and a bzero.
192   This is used in most programs--a few other programs avoid this
193   by defining INHIBIT_STRING_HEADER.  */
194# ifndef INHIBIT_STRING_HEADER
195#  if defined HAVE_STRING_H || defined STDC_HEADERS || defined _LIBC
196#   include <string.h>
197#   ifndef bzero
198#    ifndef _LIBC
199#     define bzero(s, n)	(memset (s, '\0', n), (s))
200#    else
201#     define bzero(s, n)	__bzero (s, n)
202#    endif
203#   endif
204#  else
205#   include <strings.h>
206#   ifndef memcmp
207#    define memcmp(s1, s2, n)	bcmp (s1, s2, n)
208#   endif
209#   ifndef memcpy
210#    define memcpy(d, s, n)	(bcopy (s, d, n), (d))
211#   endif
212#  endif
213# endif
214
215/* Define the syntax stuff for \<, \>, etc.  */
216
217/* This must be nonzero for the wordchar and notwordchar pattern
218   commands in re_match_2.  */
219# ifndef Sword
220#  define Sword 1
221# endif
222
223# ifdef SWITCH_ENUM_BUG
224#  define SWITCH_ENUM_CAST(x) ((int)(x))
225# else
226#  define SWITCH_ENUM_CAST(x) (x)
227# endif
228
229#endif /* not emacs */
230
231#if defined _LIBC || HAVE_LIMITS_H
232# include <limits.h>
233#endif
234
235#ifndef MB_LEN_MAX
236# define MB_LEN_MAX 1
237#endif
238
239/* Get the interface, including the syntax bits.  */
240/* Disabled by V. Abell on January 29, 2001: #include <regex.h> */
241#include "../regex.h"
242
243/* isalpha etc. are used for the character classes.  */
244#include <ctype.h>
245
246/* Jim Meyering writes:
247
248   "... Some ctype macros are valid only for character codes that
249   isascii says are ASCII (SGI's IRIX-4.0.5 is one such system --when
250   using /bin/cc or gcc but without giving an ansi option).  So, all
251   ctype uses should be through macros like ISPRINT...  If
252   STDC_HEADERS is defined, then autoconf has verified that the ctype
253   macros don't need to be guarded with references to isascii. ...
254   Defining isascii to 1 should let any compiler worth its salt
255   eliminate the && through constant folding."
256   Solaris defines some of these symbols so we must undefine them first.  */
257
258#undef ISASCII
259#if defined STDC_HEADERS || (!defined isascii && !defined HAVE_ISASCII)
260# define ISASCII(c) 1
261#else
262# define ISASCII(c) isascii(c)
263#endif
264
265#ifdef isblank
266# define ISBLANK(c) (ISASCII (c) && isblank (c))
267#else
268# define ISBLANK(c) ((c) == ' ' || (c) == '\t')
269#endif
270#ifdef isgraph
271# define ISGRAPH(c) (ISASCII (c) && isgraph (c))
272#else
273# define ISGRAPH(c) (ISASCII (c) && isprint (c) && !isspace (c))
274#endif
275
276#undef ISPRINT
277#define ISPRINT(c) (ISASCII (c) && isprint (c))
278#define ISDIGIT(c) (ISASCII (c) && isdigit (c))
279#define ISALNUM(c) (ISASCII (c) && isalnum (c))
280#define ISALPHA(c) (ISASCII (c) && isalpha (c))
281#define ISCNTRL(c) (ISASCII (c) && iscntrl (c))
282#define ISLOWER(c) (ISASCII (c) && islower (c))
283#define ISPUNCT(c) (ISASCII (c) && ispunct (c))
284#define ISSPACE(c) (ISASCII (c) && isspace (c))
285#define ISUPPER(c) (ISASCII (c) && isupper (c))
286#define ISXDIGIT(c) (ISASCII (c) && isxdigit (c))
287
288#ifdef _tolower
289# define TOLOWER(c) _tolower(c)
290#else
291# define TOLOWER(c) tolower(c)
292#endif
293
294#ifndef NULL
295# define NULL (void *)0
296#endif
297
298/* We remove any previous definition of `SIGN_EXTEND_CHAR',
299   since ours (we hope) works properly with all combinations of
300   machines, compilers, `char' and `unsigned char' argument types.
301   (Per Bothner suggested the basic approach.)  */
302#undef SIGN_EXTEND_CHAR
303#if __STDC__
304# define SIGN_EXTEND_CHAR(c) ((signed char) (c))
305#else  /* not __STDC__ */
306/* As in Harbison and Steele.  */
307# define SIGN_EXTEND_CHAR(c) ((((unsigned char) (c)) ^ 128) - 128)
308#endif
309
310#ifndef emacs
311/* How many characters in the character set.  */
312# define CHAR_SET_SIZE 256
313
314# ifdef SYNTAX_TABLE
315
316extern char *re_syntax_table;
317
318# else /* not SYNTAX_TABLE */
319
320static char re_syntax_table[CHAR_SET_SIZE];
321
322static void
323init_syntax_once ()
324{
325   register int c;
326   static int done = 0;
327
328   if (done)
329     return;
330   bzero (re_syntax_table, sizeof re_syntax_table);
331
332   for (c = 0; c < CHAR_SET_SIZE; ++c)
333     if (ISALNUM (c))
334	re_syntax_table[c] = Sword;
335
336   re_syntax_table['_'] = Sword;
337
338   done = 1;
339}
340
341# endif /* not SYNTAX_TABLE */
342
343# define SYNTAX(c) re_syntax_table[(unsigned char) (c)]
344
345#endif /* emacs */
346
347/* Should we use malloc or alloca?  If REGEX_MALLOC is not defined, we
348   use `alloca' instead of `malloc'.  This is because using malloc in
349   re_search* or re_match* could cause memory leaks when C-g is used in
350   Emacs; also, malloc is slower and causes storage fragmentation.  On
351   the other hand, malloc is more portable, and easier to debug.
352
353   Because we sometimes use alloca, some routines have to be macros,
354   not functions -- `alloca'-allocated space disappears at the end of the
355   function it is called in.  */
356
357#ifdef REGEX_MALLOC
358
359# define REGEX_ALLOCATE malloc
360# define REGEX_REALLOCATE(source, osize, nsize) realloc (source, nsize)
361# define REGEX_FREE free
362
363#else /* not REGEX_MALLOC  */
364
365/* Emacs already defines alloca, sometimes.  */
366# ifndef alloca
367
368/* Make alloca work the best possible way.  */
369#  ifdef __GNUC__
370#   define alloca __builtin_alloca
371#  else /* not __GNUC__ */
372#   if HAVE_ALLOCA_H
373#    include <alloca.h>
374#   endif /* HAVE_ALLOCA_H */
375#  endif /* not __GNUC__ */
376
377# endif /* not alloca */
378
379# define REGEX_ALLOCATE alloca
380
381/* Assumes a `char *destination' variable.  */
382# define REGEX_REALLOCATE(source, osize, nsize)				\
383  (destination = (char *) alloca (nsize),				\
384   memcpy (destination, source, osize))
385
386/* No need to do anything to free, after alloca.  */
387# define REGEX_FREE(arg) ((void)0) /* Do nothing!  But inhibit gcc warning.  */
388
389#endif /* not REGEX_MALLOC */
390
391/* Define how to allocate the failure stack.  */
392
393#if defined REL_ALLOC && defined REGEX_MALLOC
394
395# define REGEX_ALLOCATE_STACK(size)				\
396  r_alloc (&failure_stack_ptr, (size))
397# define REGEX_REALLOCATE_STACK(source, osize, nsize)		\
398  r_re_alloc (&failure_stack_ptr, (nsize))
399# define REGEX_FREE_STACK(ptr)					\
400  r_alloc_free (&failure_stack_ptr)
401
402#else /* not using relocating allocator */
403
404# ifdef REGEX_MALLOC
405
406#  define REGEX_ALLOCATE_STACK malloc
407#  define REGEX_REALLOCATE_STACK(source, osize, nsize) realloc (source, nsize)
408#  define REGEX_FREE_STACK free
409
410# else /* not REGEX_MALLOC */
411
412#  define REGEX_ALLOCATE_STACK alloca
413
414#  define REGEX_REALLOCATE_STACK(source, osize, nsize)			\
415   REGEX_REALLOCATE (source, osize, nsize)
416/* No need to explicitly free anything.  */
417#  define REGEX_FREE_STACK(arg)
418
419# endif /* not REGEX_MALLOC */
420#endif /* not using relocating allocator */
421
422
423/* True if `size1' is non-NULL and PTR is pointing anywhere inside
424   `string1' or just past its end.  This works if PTR is NULL, which is
425   a good thing.  */
426#define FIRST_STRING_P(ptr) 					\
427  (size1 && string1 <= (ptr) && (ptr) <= string1 + size1)
428
429/* (Re)Allocate N items of type T using malloc, or fail.  */
430#define TALLOC(n, t) ((t *) malloc ((n) * sizeof (t)))
431#define RETALLOC(addr, n, t) ((addr) = (t *) realloc (addr, (n) * sizeof (t)))
432#define RETALLOC_IF(addr, n, t) \
433  if (addr) RETALLOC((addr), (n), t); else (addr) = TALLOC ((n), t)
434#define REGEX_TALLOC(n, t) ((t *) REGEX_ALLOCATE ((n) * sizeof (t)))
435
436#define BYTEWIDTH 8 /* In bits.  */
437
438#define STREQ(s1, s2) ((strcmp (s1, s2) == 0))
439
440#undef MAX
441#undef MIN
442#define MAX(a, b) ((a) > (b) ? (a) : (b))
443#define MIN(a, b) ((a) < (b) ? (a) : (b))
444
445typedef char boolean;
446#define false 0
447#define true 1
448
449static int re_match_2_internal PARAMS ((struct re_pattern_buffer *bufp,
450					const char *string1, int size1,
451					const char *string2, int size2,
452					int pos,
453					struct re_registers *regs,
454					int stop));
455
456/* These are the command codes that appear in compiled regular
457   expressions.  Some opcodes are followed by argument bytes.  A
458   command code can specify any interpretation whatsoever for its
459   arguments.  Zero bytes may appear in the compiled regular expression.  */
460
461typedef enum
462{
463  no_op = 0,
464
465  /* Succeed right away--no more backtracking.  */
466  succeed,
467
468        /* Followed by one byte giving n, then by n literal bytes.  */
469  exactn,
470
471        /* Matches any (more or less) character.  */
472  anychar,
473
474        /* Matches any one char belonging to specified set.  First
475           following byte is number of bitmap bytes.  Then come bytes
476           for a bitmap saying which chars are in.  Bits in each byte
477           are ordered low-bit-first.  A character is in the set if its
478           bit is 1.  A character too large to have a bit in the map is
479           automatically not in the set.  */
480  charset,
481
482        /* Same parameters as charset, but match any character that is
483           not one of those specified.  */
484  charset_not,
485
486        /* Start remembering the text that is matched, for storing in a
487           register.  Followed by one byte with the register number, in
488           the range 0 to one less than the pattern buffer's re_nsub
489           field.  Then followed by one byte with the number of groups
490           inner to this one.  (This last has to be part of the
491           start_memory only because we need it in the on_failure_jump
492           of re_match_2.)  */
493  start_memory,
494
495        /* Stop remembering the text that is matched and store it in a
496           memory register.  Followed by one byte with the register
497           number, in the range 0 to one less than `re_nsub' in the
498           pattern buffer, and one byte with the number of inner groups,
499           just like `start_memory'.  (We need the number of inner
500           groups here because we don't have any easy way of finding the
501           corresponding start_memory when we're at a stop_memory.)  */
502  stop_memory,
503
504        /* Match a duplicate of something remembered. Followed by one
505           byte containing the register number.  */
506  duplicate,
507
508        /* Fail unless at beginning of line.  */
509  begline,
510
511        /* Fail unless at end of line.  */
512  endline,
513
514        /* Succeeds if at beginning of buffer (if emacs) or at beginning
515           of string to be matched (if not).  */
516  begbuf,
517
518        /* Analogously, for end of buffer/string.  */
519  endbuf,
520
521        /* Followed by two byte relative address to which to jump.  */
522  jump,
523
524	/* Same as jump, but marks the end of an alternative.  */
525  jump_past_alt,
526
527        /* Followed by two-byte relative address of place to resume at
528           in case of failure.  */
529  on_failure_jump,
530
531        /* Like on_failure_jump, but pushes a placeholder instead of the
532           current string position when executed.  */
533  on_failure_keep_string_jump,
534
535        /* Throw away latest failure point and then jump to following
536           two-byte relative address.  */
537  pop_failure_jump,
538
539        /* Change to pop_failure_jump if know won't have to backtrack to
540           match; otherwise change to jump.  This is used to jump
541           back to the beginning of a repeat.  If what follows this jump
542           clearly won't match what the repeat does, such that we can be
543           sure that there is no use backtracking out of repetitions
544           already matched, then we change it to a pop_failure_jump.
545           Followed by two-byte address.  */
546  maybe_pop_jump,
547
548        /* Jump to following two-byte address, and push a dummy failure
549           point. This failure point will be thrown away if an attempt
550           is made to use it for a failure.  A `+' construct makes this
551           before the first repeat.  Also used as an intermediary kind
552           of jump when compiling an alternative.  */
553  dummy_failure_jump,
554
555	/* Push a dummy failure point and continue.  Used at the end of
556	   alternatives.  */
557  push_dummy_failure,
558
559        /* Followed by two-byte relative address and two-byte number n.
560           After matching N times, jump to the address upon failure.  */
561  succeed_n,
562
563        /* Followed by two-byte relative address, and two-byte number n.
564           Jump to the address N times, then fail.  */
565  jump_n,
566
567        /* Set the following two-byte relative address to the
568           subsequent two-byte number.  The address *includes* the two
569           bytes of number.  */
570  set_number_at,
571
572  wordchar,	/* Matches any word-constituent character.  */
573  notwordchar,	/* Matches any char that is not a word-constituent.  */
574
575  wordbeg,	/* Succeeds if at word beginning.  */
576  wordend,	/* Succeeds if at word end.  */
577
578  wordbound,	/* Succeeds if at a word boundary.  */
579  notwordbound	/* Succeeds if not at a word boundary.  */
580
581#ifdef emacs
582  ,before_dot,	/* Succeeds if before point.  */
583  at_dot,	/* Succeeds if at point.  */
584  after_dot,	/* Succeeds if after point.  */
585
586	/* Matches any character whose syntax is specified.  Followed by
587           a byte which contains a syntax code, e.g., Sword.  */
588  syntaxspec,
589
590	/* Matches any character whose syntax is not that specified.  */
591  notsyntaxspec
592#endif /* emacs */
593} re_opcode_t;
594
595/* Common operations on the compiled pattern.  */
596
597/* Store NUMBER in two contiguous bytes starting at DESTINATION.  */
598
599#define STORE_NUMBER(destination, number)				\
600  do {									\
601    (destination)[0] = (number) & 0377;					\
602    (destination)[1] = (number) >> 8;					\
603  } while (0)
604
605/* Same as STORE_NUMBER, except increment DESTINATION to
606   the byte after where the number is stored.  Therefore, DESTINATION
607   must be an lvalue.  */
608
609#define STORE_NUMBER_AND_INCR(destination, number)			\
610  do {									\
611    STORE_NUMBER (destination, number);					\
612    (destination) += 2;							\
613  } while (0)
614
615/* Put into DESTINATION a number stored in two contiguous bytes starting
616   at SOURCE.  */
617
618#define EXTRACT_NUMBER(destination, source)				\
619  do {									\
620    (destination) = *(source) & 0377;					\
621    (destination) += SIGN_EXTEND_CHAR (*((source) + 1)) << 8;		\
622  } while (0)
623
624#ifdef DEBUG
625static void extract_number _RE_ARGS ((int *dest, unsigned char *source));
626static void
627extract_number (dest, source)
628    int *dest;
629    unsigned char *source;
630{
631  int temp = SIGN_EXTEND_CHAR (*(source + 1));
632  *dest = *source & 0377;
633  *dest += temp << 8;
634}
635
636# ifndef EXTRACT_MACROS /* To debug the macros.  */
637#  undef EXTRACT_NUMBER
638#  define EXTRACT_NUMBER(dest, src) extract_number (&dest, src)
639# endif /* not EXTRACT_MACROS */
640
641#endif /* DEBUG */
642
643/* Same as EXTRACT_NUMBER, except increment SOURCE to after the number.
644   SOURCE must be an lvalue.  */
645
646#define EXTRACT_NUMBER_AND_INCR(destination, source)			\
647  do {									\
648    EXTRACT_NUMBER (destination, source);				\
649    (source) += 2; 							\
650  } while (0)
651
652#ifdef DEBUG
653static void extract_number_and_incr _RE_ARGS ((int *destination,
654					       unsigned char **source));
655static void
656extract_number_and_incr (destination, source)
657    int *destination;
658    unsigned char **source;
659{
660  extract_number (destination, *source);
661  *source += 2;
662}
663
664# ifndef EXTRACT_MACROS
665#  undef EXTRACT_NUMBER_AND_INCR
666#  define EXTRACT_NUMBER_AND_INCR(dest, src) \
667  extract_number_and_incr (&dest, &src)
668# endif /* not EXTRACT_MACROS */
669
670#endif /* DEBUG */
671
672/* If DEBUG is defined, Regex prints many voluminous messages about what
673   it is doing (if the variable `debug' is nonzero).  If linked with the
674   main program in `iregex.c', you can enter patterns and strings
675   interactively.  And if linked with the main program in `main.c' and
676   the other test files, you can run the already-written tests.  */
677
678#ifdef DEBUG
679
680/* We use standard I/O for debugging.  */
681# include <stdio.h>
682
683/* It is useful to test things that ``must'' be true when debugging.  */
684# include <assert.h>
685
686static int debug;
687
688# define DEBUG_STATEMENT(e) e
689# define DEBUG_PRINT1(x) if (debug) printf (x)
690# define DEBUG_PRINT2(x1, x2) if (debug) printf (x1, x2)
691# define DEBUG_PRINT3(x1, x2, x3) if (debug) printf (x1, x2, x3)
692# define DEBUG_PRINT4(x1, x2, x3, x4) if (debug) printf (x1, x2, x3, x4)
693# define DEBUG_PRINT_COMPILED_PATTERN(p, s, e) 				\
694  if (debug) print_partial_compiled_pattern (s, e)
695# define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)			\
696  if (debug) print_double_string (w, s1, sz1, s2, sz2)
697
698
699/* Print the fastmap in human-readable form.  */
700
701void
702print_fastmap (fastmap)
703    char *fastmap;
704{
705  unsigned was_a_range = 0;
706  unsigned i = 0;
707
708  while (i < (1 << BYTEWIDTH))
709    {
710      if (fastmap[i++])
711	{
712	  was_a_range = 0;
713          putchar (i - 1);
714          while (i < (1 << BYTEWIDTH)  &&  fastmap[i])
715            {
716              was_a_range = 1;
717              i++;
718            }
719	  if (was_a_range)
720            {
721              printf ("-");
722              putchar (i - 1);
723            }
724        }
725    }
726  putchar ('\n');
727}
728
729
730/* Print a compiled pattern string in human-readable form, starting at
731   the START pointer into it and ending just before the pointer END.  */
732
733void
734print_partial_compiled_pattern (start, end)
735    unsigned char *start;
736    unsigned char *end;
737{
738  int mcnt, mcnt2;
739  unsigned char *p1;
740  unsigned char *p = start;
741  unsigned char *pend = end;
742
743  if (start == NULL)
744    {
745      printf ("(null)\n");
746      return;
747    }
748
749  /* Loop over pattern commands.  */
750  while (p < pend)
751    {
752#ifdef _LIBC
753      printf ("%t:\t", p - start);
754#else
755      printf ("%ld:\t", (long int) (p - start));
756#endif
757
758      switch ((re_opcode_t) *p++)
759	{
760        case no_op:
761          printf ("/no_op");
762          break;
763
764	case exactn:
765	  mcnt = *p++;
766          printf ("/exactn/%d", mcnt);
767          do
768	    {
769              putchar ('/');
770	      putchar (*p++);
771            }
772          while (--mcnt);
773          break;
774
775	case start_memory:
776          mcnt = *p++;
777          printf ("/start_memory/%d/%d", mcnt, *p++);
778          break;
779
780	case stop_memory:
781          mcnt = *p++;
782	  printf ("/stop_memory/%d/%d", mcnt, *p++);
783          break;
784
785	case duplicate:
786	  printf ("/duplicate/%d", *p++);
787	  break;
788
789	case anychar:
790	  printf ("/anychar");
791	  break;
792
793	case charset:
794        case charset_not:
795          {
796            register int c, last = -100;
797	    register int in_range = 0;
798
799	    printf ("/charset [%s",
800	            (re_opcode_t) *(p - 1) == charset_not ? "^" : "");
801
802            assert (p + *p < pend);
803
804            for (c = 0; c < 256; c++)
805	      if (c / 8 < *p
806		  && (p[1 + (c/8)] & (1 << (c % 8))))
807		{
808		  /* Are we starting a range?  */
809		  if (last + 1 == c && ! in_range)
810		    {
811		      putchar ('-');
812		      in_range = 1;
813		    }
814		  /* Have we broken a range?  */
815		  else if (last + 1 != c && in_range)
816              {
817		      putchar (last);
818		      in_range = 0;
819		    }
820
821		  if (! in_range)
822		    putchar (c);
823
824		  last = c;
825              }
826
827	    if (in_range)
828	      putchar (last);
829
830	    putchar (']');
831
832	    p += 1 + *p;
833	  }
834	  break;
835
836	case begline:
837	  printf ("/begline");
838          break;
839
840	case endline:
841          printf ("/endline");
842          break;
843
844	case on_failure_jump:
845          extract_number_and_incr (&mcnt, &p);
846#ifdef _LIBC
847  	  printf ("/on_failure_jump to %t", p + mcnt - start);
848#else
849  	  printf ("/on_failure_jump to %ld", (long int) (p + mcnt - start));
850#endif
851          break;
852
853	case on_failure_keep_string_jump:
854          extract_number_and_incr (&mcnt, &p);
855#ifdef _LIBC
856  	  printf ("/on_failure_keep_string_jump to %t", p + mcnt - start);
857#else
858  	  printf ("/on_failure_keep_string_jump to %ld",
859		  (long int) (p + mcnt - start));
860#endif
861          break;
862
863	case dummy_failure_jump:
864          extract_number_and_incr (&mcnt, &p);
865#ifdef _LIBC
866  	  printf ("/dummy_failure_jump to %t", p + mcnt - start);
867#else
868  	  printf ("/dummy_failure_jump to %ld", (long int) (p + mcnt - start));
869#endif
870          break;
871
872	case push_dummy_failure:
873          printf ("/push_dummy_failure");
874          break;
875
876        case maybe_pop_jump:
877          extract_number_and_incr (&mcnt, &p);
878#ifdef _LIBC
879  	  printf ("/maybe_pop_jump to %t", p + mcnt - start);
880#else
881  	  printf ("/maybe_pop_jump to %ld", (long int) (p + mcnt - start));
882#endif
883	  break;
884
885        case pop_failure_jump:
886	  extract_number_and_incr (&mcnt, &p);
887#ifdef _LIBC
888  	  printf ("/pop_failure_jump to %t", p + mcnt - start);
889#else
890  	  printf ("/pop_failure_jump to %ld", (long int) (p + mcnt - start));
891#endif
892	  break;
893
894        case jump_past_alt:
895	  extract_number_and_incr (&mcnt, &p);
896#ifdef _LIBC
897  	  printf ("/jump_past_alt to %t", p + mcnt - start);
898#else
899  	  printf ("/jump_past_alt to %ld", (long int) (p + mcnt - start));
900#endif
901	  break;
902
903        case jump:
904	  extract_number_and_incr (&mcnt, &p);
905#ifdef _LIBC
906  	  printf ("/jump to %t", p + mcnt - start);
907#else
908  	  printf ("/jump to %ld", (long int) (p + mcnt - start));
909#endif
910	  break;
911
912        case succeed_n:
913          extract_number_and_incr (&mcnt, &p);
914	  p1 = p + mcnt;
915          extract_number_and_incr (&mcnt2, &p);
916#ifdef _LIBC
917	  printf ("/succeed_n to %t, %d times", p1 - start, mcnt2);
918#else
919	  printf ("/succeed_n to %ld, %d times",
920		  (long int) (p1 - start), mcnt2);
921#endif
922          break;
923
924        case jump_n:
925          extract_number_and_incr (&mcnt, &p);
926	  p1 = p + mcnt;
927          extract_number_and_incr (&mcnt2, &p);
928	  printf ("/jump_n to %d, %d times", p1 - start, mcnt2);
929          break;
930
931        case set_number_at:
932          extract_number_and_incr (&mcnt, &p);
933	  p1 = p + mcnt;
934          extract_number_and_incr (&mcnt2, &p);
935#ifdef _LIBC
936	  printf ("/set_number_at location %t to %d", p1 - start, mcnt2);
937#else
938	  printf ("/set_number_at location %ld to %d",
939		  (long int) (p1 - start), mcnt2);
940#endif
941          break;
942
943        case wordbound:
944	  printf ("/wordbound");
945	  break;
946
947	case notwordbound:
948	  printf ("/notwordbound");
949          break;
950
951	case wordbeg:
952	  printf ("/wordbeg");
953	  break;
954
955	case wordend:
956	  printf ("/wordend");
957
958# ifdef emacs
959	case before_dot:
960	  printf ("/before_dot");
961          break;
962
963	case at_dot:
964	  printf ("/at_dot");
965          break;
966
967	case after_dot:
968	  printf ("/after_dot");
969          break;
970
971	case syntaxspec:
972          printf ("/syntaxspec");
973	  mcnt = *p++;
974	  printf ("/%d", mcnt);
975          break;
976
977	case notsyntaxspec:
978          printf ("/notsyntaxspec");
979	  mcnt = *p++;
980	  printf ("/%d", mcnt);
981	  break;
982# endif /* emacs */
983
984	case wordchar:
985	  printf ("/wordchar");
986          break;
987
988	case notwordchar:
989	  printf ("/notwordchar");
990          break;
991
992	case begbuf:
993	  printf ("/begbuf");
994          break;
995
996	case endbuf:
997	  printf ("/endbuf");
998          break;
999
1000        default:
1001          printf ("?%d", *(p-1));
1002	}
1003
1004      putchar ('\n');
1005    }
1006
1007#ifdef _LIBC
1008  printf ("%t:\tend of pattern.\n", p - start);
1009#else
1010  printf ("%ld:\tend of pattern.\n", (long int) (p - start));
1011#endif
1012}
1013
1014
1015void
1016print_compiled_pattern (bufp)
1017    struct re_pattern_buffer *bufp;
1018{
1019  unsigned char *buffer = bufp->buffer;
1020
1021  print_partial_compiled_pattern (buffer, buffer + bufp->used);
1022  printf ("%ld bytes used/%ld bytes allocated.\n",
1023	  bufp->used, bufp->allocated);
1024
1025  if (bufp->fastmap_accurate && bufp->fastmap)
1026    {
1027      printf ("fastmap: ");
1028      print_fastmap (bufp->fastmap);
1029    }
1030
1031#ifdef _LIBC
1032  printf ("re_nsub: %Zd\t", bufp->re_nsub);
1033#else
1034  printf ("re_nsub: %ld\t", (long int) bufp->re_nsub);
1035#endif
1036  printf ("regs_alloc: %d\t", bufp->regs_allocated);
1037  printf ("can_be_null: %d\t", bufp->can_be_null);
1038  printf ("newline_anchor: %d\n", bufp->newline_anchor);
1039  printf ("no_sub: %d\t", bufp->no_sub);
1040  printf ("not_bol: %d\t", bufp->not_bol);
1041  printf ("not_eol: %d\t", bufp->not_eol);
1042  printf ("syntax: %lx\n", bufp->syntax);
1043  /* Perhaps we should print the translate table?  */
1044}
1045
1046
1047void
1048print_double_string (where, string1, size1, string2, size2)
1049    const char *where;
1050    const char *string1;
1051    const char *string2;
1052    int size1;
1053    int size2;
1054{
1055  int this_char;
1056
1057  if (where == NULL)
1058    printf ("(null)");
1059  else
1060    {
1061      if (FIRST_STRING_P (where))
1062        {
1063          for (this_char = where - string1; this_char < size1; this_char++)
1064            putchar (string1[this_char]);
1065
1066          where = string2;
1067        }
1068
1069      for (this_char = where - string2; this_char < size2; this_char++)
1070        putchar (string2[this_char]);
1071    }
1072}
1073
1074void
1075printchar (c)
1076     int c;
1077{
1078  putc (c, stderr);
1079}
1080
1081#else /* not DEBUG */
1082
1083# undef assert
1084# define assert(e)
1085
1086# define DEBUG_STATEMENT(e)
1087# define DEBUG_PRINT1(x)
1088# define DEBUG_PRINT2(x1, x2)
1089# define DEBUG_PRINT3(x1, x2, x3)
1090# define DEBUG_PRINT4(x1, x2, x3, x4)
1091# define DEBUG_PRINT_COMPILED_PATTERN(p, s, e)
1092# define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)
1093
1094#endif /* not DEBUG */
1095
1096/* Set by `re_set_syntax' to the current regexp syntax to recognize.  Can
1097   also be assigned to arbitrarily: each pattern buffer stores its own
1098   syntax, so it can be changed between regex compilations.  */
1099/* This has no initializer because initialized variables in Emacs
1100   become read-only after dumping.  */
1101reg_syntax_t re_syntax_options;
1102
1103
1104/* Specify the precise syntax of regexps for compilation.  This provides
1105   for compatibility for various utilities which historically have
1106   different, incompatible syntaxes.
1107
1108   The argument SYNTAX is a bit mask comprised of the various bits
1109   defined in regex.h.  We return the old syntax.  */
1110
1111reg_syntax_t
1112re_set_syntax (syntax)
1113    reg_syntax_t syntax;
1114{
1115  reg_syntax_t ret = re_syntax_options;
1116
1117  re_syntax_options = syntax;
1118#ifdef DEBUG
1119  if (syntax & RE_DEBUG)
1120    debug = 1;
1121  else if (debug) /* was on but now is not */
1122    debug = 0;
1123#endif /* DEBUG */
1124  return ret;
1125}
1126#ifdef _LIBC
1127weak_alias (__re_set_syntax, re_set_syntax)
1128#endif
1129
1130/* This table gives an error message for each of the error codes listed
1131   in regex.h.  Obviously the order here has to be same as there.
1132   POSIX doesn't require that we do anything for REG_NOERROR,
1133   but why not be nice?  */
1134
1135static const char re_error_msgid[] =
1136  {
1137#define REG_NOERROR_IDX	0
1138    gettext_noop ("Success")	/* REG_NOERROR */
1139    "\0"
1140#define REG_NOMATCH_IDX (REG_NOERROR_IDX + sizeof "Success")
1141    gettext_noop ("No match")	/* REG_NOMATCH */
1142    "\0"
1143#define REG_BADPAT_IDX	(REG_NOMATCH_IDX + sizeof "No match")
1144    gettext_noop ("Invalid regular expression") /* REG_BADPAT */
1145    "\0"
1146#define REG_ECOLLATE_IDX (REG_BADPAT_IDX + sizeof "Invalid regular expression")
1147    gettext_noop ("Invalid collation character") /* REG_ECOLLATE */
1148    "\0"
1149#define REG_ECTYPE_IDX	(REG_ECOLLATE_IDX + sizeof "Invalid collation character")
1150    gettext_noop ("Invalid character class name") /* REG_ECTYPE */
1151    "\0"
1152#define REG_EESCAPE_IDX	(REG_ECTYPE_IDX + sizeof "Invalid character class name")
1153    gettext_noop ("Trailing backslash") /* REG_EESCAPE */
1154    "\0"
1155#define REG_ESUBREG_IDX	(REG_EESCAPE_IDX + sizeof "Trailing backslash")
1156    gettext_noop ("Invalid back reference") /* REG_ESUBREG */
1157    "\0"
1158#define REG_EBRACK_IDX	(REG_ESUBREG_IDX + sizeof "Invalid back reference")
1159    gettext_noop ("Unmatched [ or [^")	/* REG_EBRACK */
1160    "\0"
1161#define REG_EPAREN_IDX	(REG_EBRACK_IDX + sizeof "Unmatched [ or [^")
1162    gettext_noop ("Unmatched ( or \\(") /* REG_EPAREN */
1163    "\0"
1164#define REG_EBRACE_IDX	(REG_EPAREN_IDX + sizeof "Unmatched ( or \\(")
1165    gettext_noop ("Unmatched \\{") /* REG_EBRACE */
1166    "\0"
1167#define REG_BADBR_IDX	(REG_EBRACE_IDX + sizeof "Unmatched \\{")
1168    gettext_noop ("Invalid content of \\{\\}") /* REG_BADBR */
1169    "\0"
1170#define REG_ERANGE_IDX	(REG_BADBR_IDX + sizeof "Invalid content of \\{\\}")
1171    gettext_noop ("Invalid range end")	/* REG_ERANGE */
1172    "\0"
1173#define REG_ESPACE_IDX	(REG_ERANGE_IDX + sizeof "Invalid range end")
1174    gettext_noop ("Memory exhausted") /* REG_ESPACE */
1175    "\0"
1176#define REG_BADRPT_IDX	(REG_ESPACE_IDX + sizeof "Memory exhausted")
1177    gettext_noop ("Invalid preceding regular expression") /* REG_BADRPT */
1178    "\0"
1179#define REG_EEND_IDX	(REG_BADRPT_IDX + sizeof "Invalid preceding regular expression")
1180    gettext_noop ("Premature end of regular expression") /* REG_EEND */
1181    "\0"
1182#define REG_ESIZE_IDX	(REG_EEND_IDX + sizeof "Premature end of regular expression")
1183    gettext_noop ("Regular expression too big") /* REG_ESIZE */
1184    "\0"
1185#define REG_ERPAREN_IDX	(REG_ESIZE_IDX + sizeof "Regular expression too big")
1186    gettext_noop ("Unmatched ) or \\)") /* REG_ERPAREN */
1187  };
1188
1189static const size_t re_error_msgid_idx[] =
1190  {
1191    REG_NOERROR_IDX,
1192    REG_NOMATCH_IDX,
1193    REG_BADPAT_IDX,
1194    REG_ECOLLATE_IDX,
1195    REG_ECTYPE_IDX,
1196    REG_EESCAPE_IDX,
1197    REG_ESUBREG_IDX,
1198    REG_EBRACK_IDX,
1199    REG_EPAREN_IDX,
1200    REG_EBRACE_IDX,
1201    REG_BADBR_IDX,
1202    REG_ERANGE_IDX,
1203    REG_ESPACE_IDX,
1204    REG_BADRPT_IDX,
1205    REG_EEND_IDX,
1206    REG_ESIZE_IDX,
1207    REG_ERPAREN_IDX
1208  };
1209
1210/* Avoiding alloca during matching, to placate r_alloc.  */
1211
1212/* Define MATCH_MAY_ALLOCATE unless we need to make sure that the
1213   searching and matching functions should not call alloca.  On some
1214   systems, alloca is implemented in terms of malloc, and if we're
1215   using the relocating allocator routines, then malloc could cause a
1216   relocation, which might (if the strings being searched are in the
1217   ralloc heap) shift the data out from underneath the regexp
1218   routines.
1219
1220   Here's another reason to avoid allocation: Emacs
1221   processes input from X in a signal handler; processing X input may
1222   call malloc; if input arrives while a matching routine is calling
1223   malloc, then we're scrod.  But Emacs can't just block input while
1224   calling matching routines; then we don't notice interrupts when
1225   they come in.  So, Emacs blocks input around all regexp calls
1226   except the matching calls, which it leaves unprotected, in the
1227   faith that they will not malloc.  */
1228
1229/* Normally, this is fine.  */
1230#define MATCH_MAY_ALLOCATE
1231
1232/* When using GNU C, we are not REALLY using the C alloca, no matter
1233   what config.h may say.  So don't take precautions for it.  */
1234#ifdef __GNUC__
1235# undef C_ALLOCA
1236#endif
1237
1238/* The match routines may not allocate if (1) they would do it with malloc
1239   and (2) it's not safe for them to use malloc.
1240   Note that if REL_ALLOC is defined, matching would not use malloc for the
1241   failure stack, but we would still use it for the register vectors;
1242   so REL_ALLOC should not affect this.  */
1243#if (defined C_ALLOCA || defined REGEX_MALLOC) && defined emacs
1244# undef MATCH_MAY_ALLOCATE
1245#endif
1246
1247
1248/* Failure stack declarations and macros; both re_compile_fastmap and
1249   re_match_2 use a failure stack.  These have to be macros because of
1250   REGEX_ALLOCATE_STACK.  */
1251
1252
1253/* Number of failure points for which to initially allocate space
1254   when matching.  If this number is exceeded, we allocate more
1255   space, so it is not a hard limit.  */
1256#ifndef INIT_FAILURE_ALLOC
1257# define INIT_FAILURE_ALLOC 5
1258#endif
1259
1260/* Roughly the maximum number of failure points on the stack.  Would be
1261   exactly that if always used MAX_FAILURE_ITEMS items each time we failed.
1262   This is a variable only so users of regex can assign to it; we never
1263   change it ourselves.  */
1264
1265#ifdef INT_IS_16BIT
1266
1267# if defined MATCH_MAY_ALLOCATE
1268/* 4400 was enough to cause a crash on Alpha OSF/1,
1269   whose default stack limit is 2mb.  */
1270long int re_max_failures = 4000;
1271# else
1272long int re_max_failures = 2000;
1273# endif
1274
1275union fail_stack_elt
1276{
1277  unsigned char *pointer;
1278  long int integer;
1279};
1280
1281typedef union fail_stack_elt fail_stack_elt_t;
1282
1283typedef struct
1284{
1285  fail_stack_elt_t *stack;
1286  unsigned long int size;
1287  unsigned long int avail;		/* Offset of next open position.  */
1288} fail_stack_type;
1289
1290#else /* not INT_IS_16BIT */
1291
1292# if defined MATCH_MAY_ALLOCATE
1293/* 4400 was enough to cause a crash on Alpha OSF/1,
1294   whose default stack limit is 2mb.  */
1295int re_max_failures = 4000;
1296# else
1297int re_max_failures = 2000;
1298# endif
1299
1300union fail_stack_elt
1301{
1302  unsigned char *pointer;
1303  int integer;
1304};
1305
1306typedef union fail_stack_elt fail_stack_elt_t;
1307
1308typedef struct
1309{
1310  fail_stack_elt_t *stack;
1311  unsigned size;
1312  unsigned avail;			/* Offset of next open position.  */
1313} fail_stack_type;
1314
1315#endif /* INT_IS_16BIT */
1316
1317#define FAIL_STACK_EMPTY()     (fail_stack.avail == 0)
1318#define FAIL_STACK_PTR_EMPTY() (fail_stack_ptr->avail == 0)
1319#define FAIL_STACK_FULL()      (fail_stack.avail == fail_stack.size)
1320
1321
1322/* Define macros to initialize and free the failure stack.
1323   Do `return -2' if the alloc fails.  */
1324
1325#ifdef MATCH_MAY_ALLOCATE
1326# define INIT_FAIL_STACK()						\
1327  do {									\
1328    fail_stack.stack = (fail_stack_elt_t *)				\
1329      REGEX_ALLOCATE_STACK (INIT_FAILURE_ALLOC * sizeof (fail_stack_elt_t)); \
1330									\
1331    if (fail_stack.stack == NULL)					\
1332      return -2;							\
1333									\
1334    fail_stack.size = INIT_FAILURE_ALLOC;				\
1335    fail_stack.avail = 0;						\
1336  } while (0)
1337
1338# define RESET_FAIL_STACK()  REGEX_FREE_STACK (fail_stack.stack)
1339#else
1340# define INIT_FAIL_STACK()						\
1341  do {									\
1342    fail_stack.avail = 0;						\
1343  } while (0)
1344
1345# define RESET_FAIL_STACK()
1346#endif
1347
1348
1349/* Double the size of FAIL_STACK, up to approximately `re_max_failures' items.
1350
1351   Return 1 if succeeds, and 0 if either ran out of memory
1352   allocating space for it or it was already too large.
1353
1354   REGEX_REALLOCATE_STACK requires `destination' be declared.   */
1355
1356#define DOUBLE_FAIL_STACK(fail_stack)					\
1357  ((fail_stack).size > (unsigned) (re_max_failures * MAX_FAILURE_ITEMS)	\
1358   ? 0									\
1359   : ((fail_stack).stack = (fail_stack_elt_t *)				\
1360        REGEX_REALLOCATE_STACK ((fail_stack).stack, 			\
1361          (fail_stack).size * sizeof (fail_stack_elt_t),		\
1362          ((fail_stack).size << 1) * sizeof (fail_stack_elt_t)),	\
1363									\
1364      (fail_stack).stack == NULL					\
1365      ? 0								\
1366      : ((fail_stack).size <<= 1, 					\
1367         1)))
1368
1369
1370/* Push pointer POINTER on FAIL_STACK.
1371   Return 1 if was able to do so and 0 if ran out of memory allocating
1372   space to do so.  */
1373#define PUSH_PATTERN_OP(POINTER, FAIL_STACK)				\
1374  ((FAIL_STACK_FULL ()							\
1375    && !DOUBLE_FAIL_STACK (FAIL_STACK))					\
1376   ? 0									\
1377   : ((FAIL_STACK).stack[(FAIL_STACK).avail++].pointer = POINTER,	\
1378      1))
1379
1380/* Push a pointer value onto the failure stack.
1381   Assumes the variable `fail_stack'.  Probably should only
1382   be called from within `PUSH_FAILURE_POINT'.  */
1383#define PUSH_FAILURE_POINTER(item)					\
1384  fail_stack.stack[fail_stack.avail++].pointer = (unsigned char *) (item)
1385
1386/* This pushes an integer-valued item onto the failure stack.
1387   Assumes the variable `fail_stack'.  Probably should only
1388   be called from within `PUSH_FAILURE_POINT'.  */
1389#define PUSH_FAILURE_INT(item)					\
1390  fail_stack.stack[fail_stack.avail++].integer = (item)
1391
1392/* Push a fail_stack_elt_t value onto the failure stack.
1393   Assumes the variable `fail_stack'.  Probably should only
1394   be called from within `PUSH_FAILURE_POINT'.  */
1395#define PUSH_FAILURE_ELT(item)					\
1396  fail_stack.stack[fail_stack.avail++] =  (item)
1397
1398/* These three POP... operations complement the three PUSH... operations.
1399   All assume that `fail_stack' is nonempty.  */
1400#define POP_FAILURE_POINTER() fail_stack.stack[--fail_stack.avail].pointer
1401#define POP_FAILURE_INT() fail_stack.stack[--fail_stack.avail].integer
1402#define POP_FAILURE_ELT() fail_stack.stack[--fail_stack.avail]
1403
1404/* Used to omit pushing failure point id's when we're not debugging.  */
1405#ifdef DEBUG
1406# define DEBUG_PUSH PUSH_FAILURE_INT
1407# define DEBUG_POP(item_addr) *(item_addr) = POP_FAILURE_INT ()
1408#else
1409# define DEBUG_PUSH(item)
1410# define DEBUG_POP(item_addr)
1411#endif
1412
1413
1414/* Push the information about the state we will need
1415   if we ever fail back to it.
1416
1417   Requires variables fail_stack, regstart, regend, reg_info, and
1418   num_regs_pushed be declared.  DOUBLE_FAIL_STACK requires `destination'
1419   be declared.
1420
1421   Does `return FAILURE_CODE' if runs out of memory.  */
1422
1423#define PUSH_FAILURE_POINT(pattern_place, string_place, failure_code)	\
1424  do {									\
1425    char *destination;							\
1426    /* Must be int, so when we don't save any registers, the arithmetic	\
1427       of 0 + -1 isn't done as unsigned.  */				\
1428    /* Can't be int, since there is not a shred of a guarantee that int	\
1429       is wide enough to hold a value of something to which pointer can	\
1430       be assigned */							\
1431    active_reg_t this_reg;						\
1432    									\
1433    DEBUG_STATEMENT (failure_id++);					\
1434    DEBUG_STATEMENT (nfailure_points_pushed++);				\
1435    DEBUG_PRINT2 ("\nPUSH_FAILURE_POINT #%u:\n", failure_id);		\
1436    DEBUG_PRINT2 ("  Before push, next avail: %d\n", (fail_stack).avail);\
1437    DEBUG_PRINT2 ("                     size: %d\n", (fail_stack).size);\
1438									\
1439    DEBUG_PRINT2 ("  slots needed: %ld\n", NUM_FAILURE_ITEMS);		\
1440    DEBUG_PRINT2 ("     available: %d\n", REMAINING_AVAIL_SLOTS);	\
1441									\
1442    /* Ensure we have enough space allocated for what we will push.  */	\
1443    while (REMAINING_AVAIL_SLOTS < NUM_FAILURE_ITEMS)			\
1444      {									\
1445        if (!DOUBLE_FAIL_STACK (fail_stack))				\
1446          return failure_code;						\
1447									\
1448        DEBUG_PRINT2 ("\n  Doubled stack; size now: %d\n",		\
1449		       (fail_stack).size);				\
1450        DEBUG_PRINT2 ("  slots available: %d\n", REMAINING_AVAIL_SLOTS);\
1451      }									\
1452									\
1453    /* Push the info, starting with the registers.  */			\
1454    DEBUG_PRINT1 ("\n");						\
1455									\
1456    if (1)								\
1457      for (this_reg = lowest_active_reg; this_reg <= highest_active_reg; \
1458	   this_reg++)							\
1459	{								\
1460	  DEBUG_PRINT2 ("  Pushing reg: %lu\n", this_reg);		\
1461	  DEBUG_STATEMENT (num_regs_pushed++);				\
1462									\
1463	  DEBUG_PRINT2 ("    start: %p\n", regstart[this_reg]);		\
1464	  PUSH_FAILURE_POINTER (regstart[this_reg]);			\
1465									\
1466	  DEBUG_PRINT2 ("    end: %p\n", regend[this_reg]);		\
1467	  PUSH_FAILURE_POINTER (regend[this_reg]);			\
1468									\
1469	  DEBUG_PRINT2 ("    info: %p\n      ",				\
1470			reg_info[this_reg].word.pointer);		\
1471	  DEBUG_PRINT2 (" match_null=%d",				\
1472			REG_MATCH_NULL_STRING_P (reg_info[this_reg]));	\
1473	  DEBUG_PRINT2 (" active=%d", IS_ACTIVE (reg_info[this_reg]));	\
1474	  DEBUG_PRINT2 (" matched_something=%d",			\
1475			MATCHED_SOMETHING (reg_info[this_reg]));	\
1476	  DEBUG_PRINT2 (" ever_matched=%d",				\
1477			EVER_MATCHED_SOMETHING (reg_info[this_reg]));	\
1478	  DEBUG_PRINT1 ("\n");						\
1479	  PUSH_FAILURE_ELT (reg_info[this_reg].word);			\
1480	}								\
1481									\
1482    DEBUG_PRINT2 ("  Pushing  low active reg: %ld\n", lowest_active_reg);\
1483    PUSH_FAILURE_INT (lowest_active_reg);				\
1484									\
1485    DEBUG_PRINT2 ("  Pushing high active reg: %ld\n", highest_active_reg);\
1486    PUSH_FAILURE_INT (highest_active_reg);				\
1487									\
1488    DEBUG_PRINT2 ("  Pushing pattern %p:\n", pattern_place);		\
1489    DEBUG_PRINT_COMPILED_PATTERN (bufp, pattern_place, pend);		\
1490    PUSH_FAILURE_POINTER (pattern_place);				\
1491									\
1492    DEBUG_PRINT2 ("  Pushing string %p: `", string_place);		\
1493    DEBUG_PRINT_DOUBLE_STRING (string_place, string1, size1, string2,   \
1494				 size2);				\
1495    DEBUG_PRINT1 ("'\n");						\
1496    PUSH_FAILURE_POINTER (string_place);				\
1497									\
1498    DEBUG_PRINT2 ("  Pushing failure id: %u\n", failure_id);		\
1499    DEBUG_PUSH (failure_id);						\
1500  } while (0)
1501
1502/* This is the number of items that are pushed and popped on the stack
1503   for each register.  */
1504#define NUM_REG_ITEMS  3
1505
1506/* Individual items aside from the registers.  */
1507#ifdef DEBUG
1508# define NUM_NONREG_ITEMS 5 /* Includes failure point id.  */
1509#else
1510# define NUM_NONREG_ITEMS 4
1511#endif
1512
1513/* We push at most this many items on the stack.  */
1514/* We used to use (num_regs - 1), which is the number of registers
1515   this regexp will save; but that was changed to 5
1516   to avoid stack overflow for a regexp with lots of parens.  */
1517#define MAX_FAILURE_ITEMS (5 * NUM_REG_ITEMS + NUM_NONREG_ITEMS)
1518
1519/* We actually push this many items.  */
1520#define NUM_FAILURE_ITEMS				\
1521  (((0							\
1522     ? 0 : highest_active_reg - lowest_active_reg + 1)	\
1523    * NUM_REG_ITEMS)					\
1524   + NUM_NONREG_ITEMS)
1525
1526/* How many items can still be added to the stack without overflowing it.  */
1527#define REMAINING_AVAIL_SLOTS ((fail_stack).size - (fail_stack).avail)
1528
1529
1530/* Pops what PUSH_FAIL_STACK pushes.
1531
1532   We restore into the parameters, all of which should be lvalues:
1533     STR -- the saved data position.
1534     PAT -- the saved pattern position.
1535     LOW_REG, HIGH_REG -- the highest and lowest active registers.
1536     REGSTART, REGEND -- arrays of string positions.
1537     REG_INFO -- array of information about each subexpression.
1538
1539   Also assumes the variables `fail_stack' and (if debugging), `bufp',
1540   `pend', `string1', `size1', `string2', and `size2'.  */
1541
1542#define POP_FAILURE_POINT(str, pat, low_reg, high_reg, regstart, regend, reg_info)\
1543{									\
1544  DEBUG_STATEMENT (unsigned failure_id;)				\
1545  active_reg_t this_reg;						\
1546  const unsigned char *string_temp;					\
1547									\
1548  assert (!FAIL_STACK_EMPTY ());					\
1549									\
1550  /* Remove failure points and point to how many regs pushed.  */	\
1551  DEBUG_PRINT1 ("POP_FAILURE_POINT:\n");				\
1552  DEBUG_PRINT2 ("  Before pop, next avail: %d\n", fail_stack.avail);	\
1553  DEBUG_PRINT2 ("                    size: %d\n", fail_stack.size);	\
1554									\
1555  assert (fail_stack.avail >= NUM_NONREG_ITEMS);			\
1556									\
1557  DEBUG_POP (&failure_id);						\
1558  DEBUG_PRINT2 ("  Popping failure id: %u\n", failure_id);		\
1559									\
1560  /* If the saved string location is NULL, it came from an		\
1561     on_failure_keep_string_jump opcode, and we want to throw away the	\
1562     saved NULL, thus retaining our current position in the string.  */	\
1563  string_temp = POP_FAILURE_POINTER ();					\
1564  if (string_temp != NULL)						\
1565    str = (const char *) string_temp;					\
1566									\
1567  DEBUG_PRINT2 ("  Popping string %p: `", str);				\
1568  DEBUG_PRINT_DOUBLE_STRING (str, string1, size1, string2, size2);	\
1569  DEBUG_PRINT1 ("'\n");							\
1570									\
1571  pat = (unsigned char *) POP_FAILURE_POINTER ();			\
1572  DEBUG_PRINT2 ("  Popping pattern %p:\n", pat);			\
1573  DEBUG_PRINT_COMPILED_PATTERN (bufp, pat, pend);			\
1574									\
1575  /* Restore register info.  */						\
1576  high_reg = (active_reg_t) POP_FAILURE_INT ();				\
1577  DEBUG_PRINT2 ("  Popping high active reg: %ld\n", high_reg);		\
1578									\
1579  low_reg = (active_reg_t) POP_FAILURE_INT ();				\
1580  DEBUG_PRINT2 ("  Popping  low active reg: %ld\n", low_reg);		\
1581									\
1582  if (1)								\
1583    for (this_reg = high_reg; this_reg >= low_reg; this_reg--)		\
1584      {									\
1585	DEBUG_PRINT2 ("    Popping reg: %ld\n", this_reg);		\
1586									\
1587	reg_info[this_reg].word = POP_FAILURE_ELT ();			\
1588	DEBUG_PRINT2 ("      info: %p\n",				\
1589		      reg_info[this_reg].word.pointer);			\
1590									\
1591	regend[this_reg] = (const char *) POP_FAILURE_POINTER ();	\
1592	DEBUG_PRINT2 ("      end: %p\n", regend[this_reg]);		\
1593									\
1594	regstart[this_reg] = (const char *) POP_FAILURE_POINTER ();	\
1595	DEBUG_PRINT2 ("      start: %p\n", regstart[this_reg]);		\
1596      }									\
1597  else									\
1598    {									\
1599      for (this_reg = highest_active_reg; this_reg > high_reg; this_reg--) \
1600	{								\
1601	  reg_info[this_reg].word.integer = 0;				\
1602	  regend[this_reg] = 0;						\
1603	  regstart[this_reg] = 0;					\
1604	}								\
1605      highest_active_reg = high_reg;					\
1606    }									\
1607									\
1608  set_regs_matched_done = 0;						\
1609  DEBUG_STATEMENT (nfailure_points_popped++);				\
1610} /* POP_FAILURE_POINT */
1611
1612
1613
1614/* Structure for per-register (a.k.a. per-group) information.
1615   Other register information, such as the
1616   starting and ending positions (which are addresses), and the list of
1617   inner groups (which is a bits list) are maintained in separate
1618   variables.
1619
1620   We are making a (strictly speaking) nonportable assumption here: that
1621   the compiler will pack our bit fields into something that fits into
1622   the type of `word', i.e., is something that fits into one item on the
1623   failure stack.  */
1624
1625
1626/* Declarations and macros for re_match_2.  */
1627
1628typedef union
1629{
1630  fail_stack_elt_t word;
1631  struct
1632  {
1633      /* This field is one if this group can match the empty string,
1634         zero if not.  If not yet determined,  `MATCH_NULL_UNSET_VALUE'.  */
1635#define MATCH_NULL_UNSET_VALUE 3
1636    unsigned match_null_string_p : 2;
1637    unsigned is_active : 1;
1638    unsigned matched_something : 1;
1639    unsigned ever_matched_something : 1;
1640  } bits;
1641} register_info_type;
1642
1643#define REG_MATCH_NULL_STRING_P(R)  ((R).bits.match_null_string_p)
1644#define IS_ACTIVE(R)  ((R).bits.is_active)
1645#define MATCHED_SOMETHING(R)  ((R).bits.matched_something)
1646#define EVER_MATCHED_SOMETHING(R)  ((R).bits.ever_matched_something)
1647
1648
1649/* Call this when have matched a real character; it sets `matched' flags
1650   for the subexpressions which we are currently inside.  Also records
1651   that those subexprs have matched.  */
1652#define SET_REGS_MATCHED()						\
1653  do									\
1654    {									\
1655      if (!set_regs_matched_done)					\
1656	{								\
1657	  active_reg_t r;						\
1658	  set_regs_matched_done = 1;					\
1659	  for (r = lowest_active_reg; r <= highest_active_reg; r++)	\
1660	    {								\
1661	      MATCHED_SOMETHING (reg_info[r])				\
1662		= EVER_MATCHED_SOMETHING (reg_info[r])			\
1663		= 1;							\
1664	    }								\
1665	}								\
1666    }									\
1667  while (0)
1668
1669/* Registers are set to a sentinel when they haven't yet matched.  */
1670static char reg_unset_dummy;
1671#define REG_UNSET_VALUE (&reg_unset_dummy)
1672#define REG_UNSET(e) ((e) == REG_UNSET_VALUE)
1673
1674/* Subroutine declarations and macros for regex_compile.  */
1675
1676static reg_errcode_t regex_compile _RE_ARGS ((const char *pattern, size_t size,
1677					      reg_syntax_t syntax,
1678					      struct re_pattern_buffer *bufp));
1679static void store_op1 _RE_ARGS ((re_opcode_t op, unsigned char *loc, int arg));
1680static void store_op2 _RE_ARGS ((re_opcode_t op, unsigned char *loc,
1681				 int arg1, int arg2));
1682static void insert_op1 _RE_ARGS ((re_opcode_t op, unsigned char *loc,
1683				  int arg, unsigned char *end));
1684static void insert_op2 _RE_ARGS ((re_opcode_t op, unsigned char *loc,
1685				  int arg1, int arg2, unsigned char *end));
1686static boolean at_begline_loc_p _RE_ARGS ((const char *pattern, const char *p,
1687					   reg_syntax_t syntax));
1688static boolean at_endline_loc_p _RE_ARGS ((const char *p, const char *pend,
1689					   reg_syntax_t syntax));
1690static reg_errcode_t compile_range _RE_ARGS ((unsigned int range_start,
1691					      const char **p_ptr,
1692					      const char *pend,
1693					      char *translate,
1694					      reg_syntax_t syntax,
1695					      unsigned char *b));
1696
1697/* Fetch the next character in the uncompiled pattern---translating it
1698   if necessary.  Also cast from a signed character in the constant
1699   string passed to us by the user to an unsigned char that we can use
1700   as an array index (in, e.g., `translate').  */
1701#ifndef PATFETCH
1702# define PATFETCH(c)							\
1703  do {if (p == pend) return REG_EEND;					\
1704    c = (unsigned char) *p++;						\
1705    if (translate) c = (unsigned char) translate[c];			\
1706  } while (0)
1707#endif
1708
1709/* Fetch the next character in the uncompiled pattern, with no
1710   translation.  */
1711#define PATFETCH_RAW(c)							\
1712  do {if (p == pend) return REG_EEND;					\
1713    c = (unsigned char) *p++; 						\
1714  } while (0)
1715
1716/* Go backwards one character in the pattern.  */
1717#define PATUNFETCH p--
1718
1719
1720/* If `translate' is non-null, return translate[D], else just D.  We
1721   cast the subscript to translate because some data is declared as
1722   `char *', to avoid warnings when a string constant is passed.  But
1723   when we use a character as a subscript we must make it unsigned.  */
1724#ifndef TRANSLATE
1725# define TRANSLATE(d) \
1726  (translate ? (char) translate[(unsigned char) (d)] : (d))
1727#endif
1728
1729
1730/* Macros for outputting the compiled pattern into `buffer'.  */
1731
1732/* If the buffer isn't allocated when it comes in, use this.  */
1733#define INIT_BUF_SIZE  32
1734
1735/* Make sure we have at least N more bytes of space in buffer.  */
1736#define GET_BUFFER_SPACE(n)						\
1737    while ((unsigned long) (b - bufp->buffer + (n)) > bufp->allocated)	\
1738      EXTEND_BUFFER ()
1739
1740/* Make sure we have one more byte of buffer space and then add C to it.  */
1741#define BUF_PUSH(c)							\
1742  do {									\
1743    GET_BUFFER_SPACE (1);						\
1744    *b++ = (unsigned char) (c);						\
1745  } while (0)
1746
1747
1748/* Ensure we have two more bytes of buffer space and then append C1 and C2.  */
1749#define BUF_PUSH_2(c1, c2)						\
1750  do {									\
1751    GET_BUFFER_SPACE (2);						\
1752    *b++ = (unsigned char) (c1);					\
1753    *b++ = (unsigned char) (c2);					\
1754  } while (0)
1755
1756
1757/* As with BUF_PUSH_2, except for three bytes.  */
1758#define BUF_PUSH_3(c1, c2, c3)						\
1759  do {									\
1760    GET_BUFFER_SPACE (3);						\
1761    *b++ = (unsigned char) (c1);					\
1762    *b++ = (unsigned char) (c2);					\
1763    *b++ = (unsigned char) (c3);					\
1764  } while (0)
1765
1766
1767/* Store a jump with opcode OP at LOC to location TO.  We store a
1768   relative address offset by the three bytes the jump itself occupies.  */
1769#define STORE_JUMP(op, loc, to) \
1770  store_op1 (op, loc, (int) ((to) - (loc) - 3))
1771
1772/* Likewise, for a two-argument jump.  */
1773#define STORE_JUMP2(op, loc, to, arg) \
1774  store_op2 (op, loc, (int) ((to) - (loc) - 3), arg)
1775
1776/* Like `STORE_JUMP', but for inserting.  Assume `b' is the buffer end.  */
1777#define INSERT_JUMP(op, loc, to) \
1778  insert_op1 (op, loc, (int) ((to) - (loc) - 3), b)
1779
1780/* Like `STORE_JUMP2', but for inserting.  Assume `b' is the buffer end.  */
1781#define INSERT_JUMP2(op, loc, to, arg) \
1782  insert_op2 (op, loc, (int) ((to) - (loc) - 3), arg, b)
1783
1784
1785/* This is not an arbitrary limit: the arguments which represent offsets
1786   into the pattern are two bytes long.  So if 2^16 bytes turns out to
1787   be too small, many things would have to change.  */
1788/* Any other compiler which, like MSC, has allocation limit below 2^16
1789   bytes will have to use approach similar to what was done below for
1790   MSC and drop MAX_BUF_SIZE a bit.  Otherwise you may end up
1791   reallocating to 0 bytes.  Such thing is not going to work too well.
1792   You have been warned!!  */
1793#if defined _MSC_VER  && !defined WIN32
1794/* Microsoft C 16-bit versions limit malloc to approx 65512 bytes.
1795   The REALLOC define eliminates a flurry of conversion warnings,
1796   but is not required. */
1797# define MAX_BUF_SIZE  65500L
1798# define REALLOC(p,s) realloc ((p), (size_t) (s))
1799#else
1800# define MAX_BUF_SIZE (1L << 16)
1801# define REALLOC(p,s) realloc ((p), (s))
1802#endif
1803
1804/* Extend the buffer by twice its current size via realloc and
1805   reset the pointers that pointed into the old block to point to the
1806   correct places in the new one.  If extending the buffer results in it
1807   being larger than MAX_BUF_SIZE, then flag memory exhausted.  */
1808#if __BOUNDED_POINTERS__
1809# define SET_HIGH_BOUND(P) (__ptrhigh (P) = __ptrlow (P) + bufp->allocated)
1810# define MOVE_BUFFER_POINTER(P) \
1811  (__ptrlow (P) += incr, SET_HIGH_BOUND (P), __ptrvalue (P) += incr)
1812# define ELSE_EXTEND_BUFFER_HIGH_BOUND		\
1813  else						\
1814    {						\
1815      SET_HIGH_BOUND (b);			\
1816      SET_HIGH_BOUND (begalt);			\
1817      if (fixup_alt_jump)			\
1818	SET_HIGH_BOUND (fixup_alt_jump);	\
1819      if (laststart)				\
1820	SET_HIGH_BOUND (laststart);		\
1821      if (pending_exact)			\
1822	SET_HIGH_BOUND (pending_exact);		\
1823    }
1824#else
1825# define MOVE_BUFFER_POINTER(P) (P) += incr
1826# define ELSE_EXTEND_BUFFER_HIGH_BOUND
1827#endif
1828#define EXTEND_BUFFER()							\
1829  do {									\
1830    unsigned char *old_buffer = bufp->buffer;				\
1831    if (bufp->allocated == MAX_BUF_SIZE)				\
1832      return REG_ESIZE;							\
1833    bufp->allocated <<= 1;						\
1834    if (bufp->allocated > MAX_BUF_SIZE)					\
1835      bufp->allocated = MAX_BUF_SIZE;					\
1836    bufp->buffer = (unsigned char *) REALLOC (bufp->buffer, bufp->allocated);\
1837    if (bufp->buffer == NULL)						\
1838      return REG_ESPACE;						\
1839    /* If the buffer moved, move all the pointers into it.  */		\
1840    if (old_buffer != bufp->buffer)					\
1841      {									\
1842	int incr = bufp->buffer - old_buffer;				\
1843	MOVE_BUFFER_POINTER (b);					\
1844	MOVE_BUFFER_POINTER (begalt);					\
1845	if (fixup_alt_jump)						\
1846	  MOVE_BUFFER_POINTER (fixup_alt_jump);				\
1847	if (laststart)							\
1848	  MOVE_BUFFER_POINTER (laststart);				\
1849	if (pending_exact)						\
1850	  MOVE_BUFFER_POINTER (pending_exact);				\
1851      }									\
1852    ELSE_EXTEND_BUFFER_HIGH_BOUND					\
1853  } while (0)
1854
1855
1856/* Since we have one byte reserved for the register number argument to
1857   {start,stop}_memory, the maximum number of groups we can report
1858   things about is what fits in that byte.  */
1859#define MAX_REGNUM 255
1860
1861/* But patterns can have more than `MAX_REGNUM' registers.  We just
1862   ignore the excess.  */
1863typedef unsigned regnum_t;
1864
1865
1866/* Macros for the compile stack.  */
1867
1868/* Since offsets can go either forwards or backwards, this type needs to
1869   be able to hold values from -(MAX_BUF_SIZE - 1) to MAX_BUF_SIZE - 1.  */
1870/* int may be not enough when sizeof(int) == 2.  */
1871typedef long pattern_offset_t;
1872
1873typedef struct
1874{
1875  pattern_offset_t begalt_offset;
1876  pattern_offset_t fixup_alt_jump;
1877  pattern_offset_t inner_group_offset;
1878  pattern_offset_t laststart_offset;
1879  regnum_t regnum;
1880} compile_stack_elt_t;
1881
1882
1883typedef struct
1884{
1885  compile_stack_elt_t *stack;
1886  unsigned size;
1887  unsigned avail;			/* Offset of next open position.  */
1888} compile_stack_type;
1889
1890
1891#define INIT_COMPILE_STACK_SIZE 32
1892
1893#define COMPILE_STACK_EMPTY  (compile_stack.avail == 0)
1894#define COMPILE_STACK_FULL  (compile_stack.avail == compile_stack.size)
1895
1896/* The next available element.  */
1897#define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail])
1898
1899
1900/* Set the bit for character C in a list.  */
1901#define SET_LIST_BIT(c)                               \
1902  (b[((unsigned char) (c)) / BYTEWIDTH]               \
1903   |= 1 << (((unsigned char) c) % BYTEWIDTH))
1904
1905
1906/* Get the next unsigned number in the uncompiled pattern.  */
1907#define GET_UNSIGNED_NUMBER(num) 					\
1908  { if (p != pend)							\
1909     {									\
1910       PATFETCH (c); 							\
1911       while ('0' <= c && c <= '9')					\
1912         { 								\
1913           if (num < 0)							\
1914              num = 0;							\
1915           num = num * 10 + c - '0'; 					\
1916           if (p == pend) 						\
1917              break; 							\
1918           PATFETCH (c);						\
1919         } 								\
1920       } 								\
1921    }
1922
1923#if defined _LIBC || WIDE_CHAR_SUPPORT
1924/* The GNU C library provides support for user-defined character classes
1925   and the functions from ISO C amendement 1.  */
1926# ifdef CHARCLASS_NAME_MAX
1927#  define CHAR_CLASS_MAX_LENGTH CHARCLASS_NAME_MAX
1928# else
1929/* This shouldn't happen but some implementation might still have this
1930   problem.  Use a reasonable default value.  */
1931#  define CHAR_CLASS_MAX_LENGTH 256
1932# endif
1933
1934# ifdef _LIBC
1935#  define IS_CHAR_CLASS(string) __wctype (string)
1936# else
1937#  define IS_CHAR_CLASS(string) wctype (string)
1938# endif
1939#else
1940# define CHAR_CLASS_MAX_LENGTH  6 /* Namely, `xdigit'.  */
1941
1942# define IS_CHAR_CLASS(string)						\
1943   (STREQ (string, "alpha") || STREQ (string, "upper")			\
1944    || STREQ (string, "lower") || STREQ (string, "digit")		\
1945    || STREQ (string, "alnum") || STREQ (string, "xdigit")		\
1946    || STREQ (string, "space") || STREQ (string, "print")		\
1947    || STREQ (string, "punct") || STREQ (string, "graph")		\
1948    || STREQ (string, "cntrl") || STREQ (string, "blank"))
1949#endif
1950
1951#ifndef MATCH_MAY_ALLOCATE
1952
1953/* If we cannot allocate large objects within re_match_2_internal,
1954   we make the fail stack and register vectors global.
1955   The fail stack, we grow to the maximum size when a regexp
1956   is compiled.
1957   The register vectors, we adjust in size each time we
1958   compile a regexp, according to the number of registers it needs.  */
1959
1960static fail_stack_type fail_stack;
1961
1962/* Size with which the following vectors are currently allocated.
1963   That is so we can make them bigger as needed,
1964   but never make them smaller.  */
1965static int regs_allocated_size;
1966
1967static const char **     regstart, **     regend;
1968static const char ** old_regstart, ** old_regend;
1969static const char **best_regstart, **best_regend;
1970static register_info_type *reg_info;
1971static const char **reg_dummy;
1972static register_info_type *reg_info_dummy;
1973
1974/* Make the register vectors big enough for NUM_REGS registers,
1975   but don't make them smaller.  */
1976
1977static
1978regex_grow_registers (num_regs)
1979     int num_regs;
1980{
1981  if (num_regs > regs_allocated_size)
1982    {
1983      RETALLOC_IF (regstart,	 num_regs, const char *);
1984      RETALLOC_IF (regend,	 num_regs, const char *);
1985      RETALLOC_IF (old_regstart, num_regs, const char *);
1986      RETALLOC_IF (old_regend,	 num_regs, const char *);
1987      RETALLOC_IF (best_regstart, num_regs, const char *);
1988      RETALLOC_IF (best_regend,	 num_regs, const char *);
1989      RETALLOC_IF (reg_info,	 num_regs, register_info_type);
1990      RETALLOC_IF (reg_dummy,	 num_regs, const char *);
1991      RETALLOC_IF (reg_info_dummy, num_regs, register_info_type);
1992
1993      regs_allocated_size = num_regs;
1994    }
1995}
1996
1997#endif /* not MATCH_MAY_ALLOCATE */
1998
1999static boolean group_in_compile_stack _RE_ARGS ((compile_stack_type
2000						 compile_stack,
2001						 regnum_t regnum));
2002
2003/* `regex_compile' compiles PATTERN (of length SIZE) according to SYNTAX.
2004   Returns one of error codes defined in `regex.h', or zero for success.
2005
2006   Assumes the `allocated' (and perhaps `buffer') and `translate'
2007   fields are set in BUFP on entry.
2008
2009   If it succeeds, results are put in BUFP (if it returns an error, the
2010   contents of BUFP are undefined):
2011     `buffer' is the compiled pattern;
2012     `syntax' is set to SYNTAX;
2013     `used' is set to the length of the compiled pattern;
2014     `fastmap_accurate' is zero;
2015     `re_nsub' is the number of subexpressions in PATTERN;
2016     `not_bol' and `not_eol' are zero;
2017
2018   The `fastmap' and `newline_anchor' fields are neither
2019   examined nor set.  */
2020
2021/* Return, freeing storage we allocated.  */
2022#define FREE_STACK_RETURN(value)		\
2023  return (free (compile_stack.stack), value)
2024
2025static reg_errcode_t
2026regex_compile (pattern, size, syntax, bufp)
2027     const char *pattern;
2028     size_t size;
2029     reg_syntax_t syntax;
2030     struct re_pattern_buffer *bufp;
2031{
2032  /* We fetch characters from PATTERN here.  Even though PATTERN is
2033     `char *' (i.e., signed), we declare these variables as unsigned, so
2034     they can be reliably used as array indices.  */
2035  register unsigned char c, c1;
2036
2037  /* A random temporary spot in PATTERN.  */
2038  const char *p1;
2039
2040  /* Points to the end of the buffer, where we should append.  */
2041  register unsigned char *b;
2042
2043  /* Keeps track of unclosed groups.  */
2044  compile_stack_type compile_stack;
2045
2046  /* Points to the current (ending) position in the pattern.  */
2047  const char *p = pattern;
2048  const char *pend = pattern + size;
2049
2050  /* How to translate the characters in the pattern.  */
2051  RE_TRANSLATE_TYPE translate = bufp->translate;
2052
2053  /* Address of the count-byte of the most recently inserted `exactn'
2054     command.  This makes it possible to tell if a new exact-match
2055     character can be added to that command or if the character requires
2056     a new `exactn' command.  */
2057  unsigned char *pending_exact = 0;
2058
2059  /* Address of start of the most recently finished expression.
2060     This tells, e.g., postfix * where to find the start of its
2061     operand.  Reset at the beginning of groups and alternatives.  */
2062  unsigned char *laststart = 0;
2063
2064  /* Address of beginning of regexp, or inside of last group.  */
2065  unsigned char *begalt;
2066
2067  /* Place in the uncompiled pattern (i.e., the {) to
2068     which to go back if the interval is invalid.  */
2069  const char *beg_interval;
2070
2071  /* Address of the place where a forward jump should go to the end of
2072     the containing expression.  Each alternative of an `or' -- except the
2073     last -- ends with a forward jump of this sort.  */
2074  unsigned char *fixup_alt_jump = 0;
2075
2076  /* Counts open-groups as they are encountered.  Remembered for the
2077     matching close-group on the compile stack, so the same register
2078     number is put in the stop_memory as the start_memory.  */
2079  regnum_t regnum = 0;
2080
2081#ifdef DEBUG
2082  DEBUG_PRINT1 ("\nCompiling pattern: ");
2083  if (debug)
2084    {
2085      unsigned debug_count;
2086
2087      for (debug_count = 0; debug_count < size; debug_count++)
2088        putchar (pattern[debug_count]);
2089      putchar ('\n');
2090    }
2091#endif /* DEBUG */
2092
2093  /* Initialize the compile stack.  */
2094  compile_stack.stack = TALLOC (INIT_COMPILE_STACK_SIZE, compile_stack_elt_t);
2095  if (compile_stack.stack == NULL)
2096    return REG_ESPACE;
2097
2098  compile_stack.size = INIT_COMPILE_STACK_SIZE;
2099  compile_stack.avail = 0;
2100
2101  /* Initialize the pattern buffer.  */
2102  bufp->syntax = syntax;
2103  bufp->fastmap_accurate = 0;
2104  bufp->not_bol = bufp->not_eol = 0;
2105
2106  /* Set `used' to zero, so that if we return an error, the pattern
2107     printer (for debugging) will think there's no pattern.  We reset it
2108     at the end.  */
2109  bufp->used = 0;
2110
2111  /* Always count groups, whether or not bufp->no_sub is set.  */
2112  bufp->re_nsub = 0;
2113
2114#if !defined emacs && !defined SYNTAX_TABLE
2115  /* Initialize the syntax table.  */
2116   init_syntax_once ();
2117#endif
2118
2119  if (bufp->allocated == 0)
2120    {
2121      if (bufp->buffer)
2122	{ /* If zero allocated, but buffer is non-null, try to realloc
2123             enough space.  This loses if buffer's address is bogus, but
2124             that is the user's responsibility.  */
2125          RETALLOC (bufp->buffer, INIT_BUF_SIZE, unsigned char);
2126        }
2127      else
2128        { /* Caller did not allocate a buffer.  Do it for them.  */
2129          bufp->buffer = TALLOC (INIT_BUF_SIZE, unsigned char);
2130        }
2131      if (!bufp->buffer) FREE_STACK_RETURN (REG_ESPACE);
2132
2133      bufp->allocated = INIT_BUF_SIZE;
2134    }
2135
2136  begalt = b = bufp->buffer;
2137
2138  /* Loop through the uncompiled pattern until we're at the end.  */
2139  while (p != pend)
2140    {
2141      PATFETCH (c);
2142
2143      switch (c)
2144        {
2145        case '^':
2146          {
2147            if (   /* If at start of pattern, it's an operator.  */
2148                   p == pattern + 1
2149                   /* If context independent, it's an operator.  */
2150                || syntax & RE_CONTEXT_INDEP_ANCHORS
2151                   /* Otherwise, depends on what's come before.  */
2152                || at_begline_loc_p (pattern, p, syntax))
2153              BUF_PUSH (begline);
2154            else
2155              goto normal_char;
2156          }
2157          break;
2158
2159
2160        case '$':
2161          {
2162            if (   /* If at end of pattern, it's an operator.  */
2163                   p == pend
2164                   /* If context independent, it's an operator.  */
2165                || syntax & RE_CONTEXT_INDEP_ANCHORS
2166                   /* Otherwise, depends on what's next.  */
2167                || at_endline_loc_p (p, pend, syntax))
2168               BUF_PUSH (endline);
2169             else
2170               goto normal_char;
2171           }
2172           break;
2173
2174
2175	case '+':
2176        case '?':
2177          if ((syntax & RE_BK_PLUS_QM)
2178              || (syntax & RE_LIMITED_OPS))
2179            goto normal_char;
2180        handle_plus:
2181        case '*':
2182          /* If there is no previous pattern... */
2183          if (!laststart)
2184            {
2185              if (syntax & RE_CONTEXT_INVALID_OPS)
2186                FREE_STACK_RETURN (REG_BADRPT);
2187              else if (!(syntax & RE_CONTEXT_INDEP_OPS))
2188                goto normal_char;
2189            }
2190
2191          {
2192            /* Are we optimizing this jump?  */
2193            boolean keep_string_p = false;
2194
2195            /* 1 means zero (many) matches is allowed.  */
2196            char zero_times_ok = 0, many_times_ok = 0;
2197
2198            /* If there is a sequence of repetition chars, collapse it
2199               down to just one (the right one).  We can't combine
2200               interval operators with these because of, e.g., `a{2}*',
2201               which should only match an even number of `a's.  */
2202
2203            for (;;)
2204              {
2205                zero_times_ok |= c != '+';
2206                many_times_ok |= c != '?';
2207
2208                if (p == pend)
2209                  break;
2210
2211                PATFETCH (c);
2212
2213                if (c == '*'
2214                    || (!(syntax & RE_BK_PLUS_QM) && (c == '+' || c == '?')))
2215                  ;
2216
2217                else if (syntax & RE_BK_PLUS_QM  &&  c == '\\')
2218                  {
2219                    if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
2220
2221                    PATFETCH (c1);
2222                    if (!(c1 == '+' || c1 == '?'))
2223                      {
2224                        PATUNFETCH;
2225                        PATUNFETCH;
2226                        break;
2227                      }
2228
2229                    c = c1;
2230                  }
2231                else
2232                  {
2233                    PATUNFETCH;
2234                    break;
2235                  }
2236
2237                /* If we get here, we found another repeat character.  */
2238               }
2239
2240            /* Star, etc. applied to an empty pattern is equivalent
2241               to an empty pattern.  */
2242            if (!laststart)
2243              break;
2244
2245            /* Now we know whether or not zero matches is allowed
2246               and also whether or not two or more matches is allowed.  */
2247            if (many_times_ok)
2248              { /* More than one repetition is allowed, so put in at the
2249                   end a backward relative jump from `b' to before the next
2250                   jump we're going to put in below (which jumps from
2251                   laststart to after this jump).
2252
2253                   But if we are at the `*' in the exact sequence `.*\n',
2254                   insert an unconditional jump backwards to the .,
2255                   instead of the beginning of the loop.  This way we only
2256                   push a failure point once, instead of every time
2257                   through the loop.  */
2258                assert (p - 1 > pattern);
2259
2260                /* Allocate the space for the jump.  */
2261                GET_BUFFER_SPACE (3);
2262
2263                /* We know we are not at the first character of the pattern,
2264                   because laststart was nonzero.  And we've already
2265                   incremented `p', by the way, to be the character after
2266                   the `*'.  Do we have to do something analogous here
2267                   for null bytes, because of RE_DOT_NOT_NULL?  */
2268                if (TRANSLATE (*(p - 2)) == TRANSLATE ('.')
2269		    && zero_times_ok
2270                    && p < pend && TRANSLATE (*p) == TRANSLATE ('\n')
2271                    && !(syntax & RE_DOT_NEWLINE))
2272                  { /* We have .*\n.  */
2273                    STORE_JUMP (jump, b, laststart);
2274                    keep_string_p = true;
2275                  }
2276                else
2277                  /* Anything else.  */
2278                  STORE_JUMP (maybe_pop_jump, b, laststart - 3);
2279
2280                /* We've added more stuff to the buffer.  */
2281                b += 3;
2282              }
2283
2284            /* On failure, jump from laststart to b + 3, which will be the
2285               end of the buffer after this jump is inserted.  */
2286            GET_BUFFER_SPACE (3);
2287            INSERT_JUMP (keep_string_p ? on_failure_keep_string_jump
2288                                       : on_failure_jump,
2289                         laststart, b + 3);
2290            pending_exact = 0;
2291            b += 3;
2292
2293            if (!zero_times_ok)
2294              {
2295                /* At least one repetition is required, so insert a
2296                   `dummy_failure_jump' before the initial
2297                   `on_failure_jump' instruction of the loop. This
2298                   effects a skip over that instruction the first time
2299                   we hit that loop.  */
2300                GET_BUFFER_SPACE (3);
2301                INSERT_JUMP (dummy_failure_jump, laststart, laststart + 6);
2302                b += 3;
2303              }
2304            }
2305	  break;
2306
2307
2308	case '.':
2309          laststart = b;
2310          BUF_PUSH (anychar);
2311          break;
2312
2313
2314        case '[':
2315          {
2316            boolean had_char_class = false;
2317	    unsigned int range_start = 0xffffffff;
2318
2319            if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
2320
2321            /* Ensure that we have enough space to push a charset: the
2322               opcode, the length count, and the bitset; 34 bytes in all.  */
2323	    GET_BUFFER_SPACE (34);
2324
2325            laststart = b;
2326
2327            /* We test `*p == '^' twice, instead of using an if
2328               statement, so we only need one BUF_PUSH.  */
2329            BUF_PUSH (*p == '^' ? charset_not : charset);
2330            if (*p == '^')
2331              p++;
2332
2333            /* Remember the first position in the bracket expression.  */
2334            p1 = p;
2335
2336            /* Push the number of bytes in the bitmap.  */
2337            BUF_PUSH ((1 << BYTEWIDTH) / BYTEWIDTH);
2338
2339            /* Clear the whole map.  */
2340            bzero (b, (1 << BYTEWIDTH) / BYTEWIDTH);
2341
2342            /* charset_not matches newline according to a syntax bit.  */
2343            if ((re_opcode_t) b[-2] == charset_not
2344                && (syntax & RE_HAT_LISTS_NOT_NEWLINE))
2345              SET_LIST_BIT ('\n');
2346
2347            /* Read in characters and ranges, setting map bits.  */
2348            for (;;)
2349              {
2350                if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
2351
2352                PATFETCH (c);
2353
2354                /* \ might escape characters inside [...] and [^...].  */
2355                if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\')
2356                  {
2357                    if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
2358
2359                    PATFETCH (c1);
2360                    SET_LIST_BIT (c1);
2361		    range_start = c1;
2362                    continue;
2363                  }
2364
2365                /* Could be the end of the bracket expression.  If it's
2366                   not (i.e., when the bracket expression is `[]' so
2367                   far), the ']' character bit gets set way below.  */
2368                if (c == ']' && p != p1 + 1)
2369                  break;
2370
2371                /* Look ahead to see if it's a range when the last thing
2372                   was a character class.  */
2373                if (had_char_class && c == '-' && *p != ']')
2374                  FREE_STACK_RETURN (REG_ERANGE);
2375
2376                /* Look ahead to see if it's a range when the last thing
2377                   was a character: if this is a hyphen not at the
2378                   beginning or the end of a list, then it's the range
2379                   operator.  */
2380                if (c == '-'
2381                    && !(p - 2 >= pattern && p[-2] == '[')
2382                    && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^')
2383                    && *p != ']')
2384                  {
2385                    reg_errcode_t ret
2386                      = compile_range (range_start, &p, pend, translate,
2387				       syntax, b);
2388                    if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
2389		    range_start = 0xffffffff;
2390                  }
2391
2392                else if (p[0] == '-' && p[1] != ']')
2393                  { /* This handles ranges made up of characters only.  */
2394                    reg_errcode_t ret;
2395
2396		    /* Move past the `-'.  */
2397                    PATFETCH (c1);
2398
2399                    ret = compile_range (c, &p, pend, translate, syntax, b);
2400                    if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
2401		    range_start = 0xffffffff;
2402                  }
2403
2404                /* See if we're at the beginning of a possible character
2405                   class.  */
2406
2407                else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':')
2408                  { /* Leave room for the null.  */
2409                    char str[CHAR_CLASS_MAX_LENGTH + 1];
2410
2411                    PATFETCH (c);
2412                    c1 = 0;
2413
2414                    /* If pattern is `[[:'.  */
2415                    if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
2416
2417                    for (;;)
2418                      {
2419                        PATFETCH (c);
2420                        if ((c == ':' && *p == ']') || p == pend)
2421                          break;
2422			if (c1 < CHAR_CLASS_MAX_LENGTH)
2423			  str[c1++] = c;
2424			else
2425			  /* This is in any case an invalid class name.  */
2426			  str[0] = '\0';
2427                      }
2428                    str[c1] = '\0';
2429
2430                    /* If isn't a word bracketed by `[:' and `:]':
2431                       undo the ending character, the letters, and leave
2432                       the leading `:' and `[' (but set bits for them).  */
2433                    if (c == ':' && *p == ']')
2434                      {
2435#if defined _LIBC || WIDE_CHAR_SUPPORT
2436                        boolean is_lower = STREQ (str, "lower");
2437                        boolean is_upper = STREQ (str, "upper");
2438			wctype_t wt;
2439                        int ch;
2440
2441			wt = IS_CHAR_CLASS (str);
2442			if (wt == 0)
2443			  FREE_STACK_RETURN (REG_ECTYPE);
2444
2445                        /* Throw away the ] at the end of the character
2446                           class.  */
2447                        PATFETCH (c);
2448
2449                        if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
2450
2451                        for (ch = 0; ch < 1 << BYTEWIDTH; ++ch)
2452			  {
2453# ifdef _LIBC
2454			    if (__iswctype (__btowc (ch), wt))
2455			      SET_LIST_BIT (ch);
2456# else
2457			    if (iswctype (btowc (ch), wt))
2458			      SET_LIST_BIT (ch);
2459# endif
2460
2461			    if (translate && (is_upper || is_lower)
2462				&& (ISUPPER (ch) || ISLOWER (ch)))
2463			      SET_LIST_BIT (ch);
2464			  }
2465
2466                        had_char_class = true;
2467#else
2468                        int ch;
2469                        boolean is_alnum = STREQ (str, "alnum");
2470                        boolean is_alpha = STREQ (str, "alpha");
2471                        boolean is_blank = STREQ (str, "blank");
2472                        boolean is_cntrl = STREQ (str, "cntrl");
2473                        boolean is_digit = STREQ (str, "digit");
2474                        boolean is_graph = STREQ (str, "graph");
2475                        boolean is_lower = STREQ (str, "lower");
2476                        boolean is_print = STREQ (str, "print");
2477                        boolean is_punct = STREQ (str, "punct");
2478                        boolean is_space = STREQ (str, "space");
2479                        boolean is_upper = STREQ (str, "upper");
2480                        boolean is_xdigit = STREQ (str, "xdigit");
2481
2482                        if (!IS_CHAR_CLASS (str))
2483			  FREE_STACK_RETURN (REG_ECTYPE);
2484
2485                        /* Throw away the ] at the end of the character
2486                           class.  */
2487                        PATFETCH (c);
2488
2489                        if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
2490
2491                        for (ch = 0; ch < 1 << BYTEWIDTH; ch++)
2492                          {
2493			    /* This was split into 3 if's to
2494			       avoid an arbitrary limit in some compiler.  */
2495                            if (   (is_alnum  && ISALNUM (ch))
2496                                || (is_alpha  && ISALPHA (ch))
2497                                || (is_blank  && ISBLANK (ch))
2498                                || (is_cntrl  && ISCNTRL (ch)))
2499			      SET_LIST_BIT (ch);
2500			    if (   (is_digit  && ISDIGIT (ch))
2501                                || (is_graph  && ISGRAPH (ch))
2502                                || (is_lower  && ISLOWER (ch))
2503                                || (is_print  && ISPRINT (ch)))
2504			      SET_LIST_BIT (ch);
2505			    if (   (is_punct  && ISPUNCT (ch))
2506                                || (is_space  && ISSPACE (ch))
2507                                || (is_upper  && ISUPPER (ch))
2508                                || (is_xdigit && ISXDIGIT (ch)))
2509			      SET_LIST_BIT (ch);
2510			    if (   translate && (is_upper || is_lower)
2511				&& (ISUPPER (ch) || ISLOWER (ch)))
2512			      SET_LIST_BIT (ch);
2513                          }
2514                        had_char_class = true;
2515#endif	/* libc || wctype.h */
2516                      }
2517                    else
2518                      {
2519                        c1++;
2520                        while (c1--)
2521                          PATUNFETCH;
2522                        SET_LIST_BIT ('[');
2523                        SET_LIST_BIT (':');
2524			range_start = ':';
2525                        had_char_class = false;
2526                      }
2527                  }
2528                else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == '=')
2529		  {
2530		    unsigned char str[MB_LEN_MAX + 1];
2531#ifdef _LIBC
2532		    uint32_t nrules =
2533		      _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
2534#endif
2535
2536		    PATFETCH (c);
2537		    c1 = 0;
2538
2539		    /* If pattern is `[[='.  */
2540		    if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
2541
2542		    for (;;)
2543		      {
2544			PATFETCH (c);
2545			if ((c == '=' && *p == ']') || p == pend)
2546			  break;
2547			if (c1 < MB_LEN_MAX)
2548			  str[c1++] = c;
2549			else
2550			  /* This is in any case an invalid class name.  */
2551			  str[0] = '\0';
2552                      }
2553		    str[c1] = '\0';
2554
2555		    if (c == '=' && *p == ']' && str[0] != '\0')
2556		      {
2557			/* If we have no collation data we use the default
2558			   collation in which each character is in a class
2559			   by itself.  It also means that ASCII is the
2560			   character set and therefore we cannot have character
2561			   with more than one byte in the multibyte
2562			   representation.  */
2563#ifdef _LIBC
2564			if (nrules == 0)
2565#endif
2566			  {
2567			    if (c1 != 1)
2568			      FREE_STACK_RETURN (REG_ECOLLATE);
2569
2570			    /* Throw away the ] at the end of the equivalence
2571			       class.  */
2572			    PATFETCH (c);
2573
2574			    /* Set the bit for the character.  */
2575			    SET_LIST_BIT (str[0]);
2576			  }
2577#ifdef _LIBC
2578			else
2579			  {
2580			    /* Try to match the byte sequence in `str' against
2581			       those known to the collate implementation.
2582			       First find out whether the bytes in `str' are
2583			       actually from exactly one character.  */
2584			    const int32_t *table;
2585			    const unsigned char *weights;
2586			    const unsigned char *extra;
2587			    const int32_t *indirect;
2588			    int32_t idx;
2589			    const unsigned char *cp = str;
2590			    int ch;
2591
2592			    /* This #include defines a local function!  */
2593# include <locale/weight.h>
2594
2595			    table = (const int32_t *)
2596			      _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
2597			    weights = (const unsigned char *)
2598			      _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
2599			    extra = (const unsigned char *)
2600			      _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
2601			    indirect = (const int32_t *)
2602			      _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
2603
2604			    idx = findidx (&cp);
2605			    if (idx == 0 || cp < str + c1)
2606			      /* This is no valid character.  */
2607			      FREE_STACK_RETURN (REG_ECOLLATE);
2608
2609			    /* Throw away the ] at the end of the equivalence
2610			       class.  */
2611			    PATFETCH (c);
2612
2613			    /* Now we have to go throught the whole table
2614			       and find all characters which have the same
2615			       first level weight.
2616
2617			       XXX Note that this is not entirely correct.
2618			       we would have to match multibyte sequences
2619			       but this is not possible with the current
2620			       implementation.  */
2621			    for (ch = 1; ch < 256; ++ch)
2622			      /* XXX This test would have to be changed if we
2623				 would allow matching multibyte sequences.  */
2624			      if (table[ch] > 0)
2625				{
2626				  int32_t idx2 = table[ch];
2627				  size_t len = weights[idx2];
2628
2629				  /* Test whether the lenghts match.  */
2630				  if (weights[idx] == len)
2631				    {
2632				      /* They do.  New compare the bytes of
2633					 the weight.  */
2634				      size_t cnt = 0;
2635
2636				      while (cnt < len
2637					     && (weights[idx + 1 + cnt]
2638						 == weights[idx2 + 1 + cnt]))
2639					++len;
2640
2641				      if (cnt == len)
2642					/* They match.  Mark the character as
2643					   acceptable.  */
2644					SET_LIST_BIT (ch);
2645				    }
2646				}
2647			  }
2648#endif
2649			had_char_class = true;
2650		      }
2651                    else
2652                      {
2653                        c1++;
2654                        while (c1--)
2655                          PATUNFETCH;
2656                        SET_LIST_BIT ('[');
2657                        SET_LIST_BIT ('=');
2658			range_start = '=';
2659                        had_char_class = false;
2660                      }
2661		  }
2662                else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == '.')
2663		  {
2664		    unsigned char str[128];	/* Should be large enough.  */
2665#ifdef _LIBC
2666		    uint32_t nrules =
2667		      _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
2668#endif
2669
2670		    PATFETCH (c);
2671		    c1 = 0;
2672
2673		    /* If pattern is `[[='.  */
2674		    if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
2675
2676		    for (;;)
2677		      {
2678			PATFETCH (c);
2679			if ((c == '.' && *p == ']') || p == pend)
2680			  break;
2681			if (c1 < sizeof (str))
2682			  str[c1++] = c;
2683			else
2684			  /* This is in any case an invalid class name.  */
2685			  str[0] = '\0';
2686                      }
2687		    str[c1] = '\0';
2688
2689		    if (c == '.' && *p == ']' && str[0] != '\0')
2690		      {
2691			/* If we have no collation data we use the default
2692			   collation in which each character is the name
2693			   for its own class which contains only the one
2694			   character.  It also means that ASCII is the
2695			   character set and therefore we cannot have character
2696			   with more than one byte in the multibyte
2697			   representation.  */
2698#ifdef _LIBC
2699			if (nrules == 0)
2700#endif
2701			  {
2702			    if (c1 != 1)
2703			      FREE_STACK_RETURN (REG_ECOLLATE);
2704
2705			    /* Throw away the ] at the end of the equivalence
2706			       class.  */
2707			    PATFETCH (c);
2708
2709			    /* Set the bit for the character.  */
2710			    SET_LIST_BIT (str[0]);
2711			    range_start = ((const unsigned char *) str)[0];
2712			  }
2713#ifdef _LIBC
2714			else
2715			  {
2716			    /* Try to match the byte sequence in `str' against
2717			       those known to the collate implementation.
2718			       First find out whether the bytes in `str' are
2719			       actually from exactly one character.  */
2720			    int32_t table_size;
2721			    const int32_t *symb_table;
2722			    const unsigned char *extra;
2723			    int32_t idx;
2724			    int32_t elem;
2725			    int32_t second;
2726			    int32_t hash;
2727
2728			    table_size =
2729			      _NL_CURRENT_WORD (LC_COLLATE,
2730						_NL_COLLATE_SYMB_HASH_SIZEMB);
2731			    symb_table = (const int32_t *)
2732			      _NL_CURRENT (LC_COLLATE,
2733					   _NL_COLLATE_SYMB_TABLEMB);
2734			    extra = (const unsigned char *)
2735			      _NL_CURRENT (LC_COLLATE,
2736					   _NL_COLLATE_SYMB_EXTRAMB);
2737
2738			    /* Locate the character in the hashing table.  */
2739			    hash = elem_hash (str, c1);
2740
2741			    idx = 0;
2742			    elem = hash % table_size;
2743			    second = hash % (table_size - 2);
2744			    while (symb_table[2 * elem] != 0)
2745			      {
2746				/* First compare the hashing value.  */
2747				if (symb_table[2 * elem] == hash
2748				    && c1 == extra[symb_table[2 * elem + 1]]
2749				    && memcmp (str,
2750					       &extra[symb_table[2 * elem + 1]
2751						     + 1],
2752					       c1) == 0)
2753				  {
2754				    /* Yep, this is the entry.  */
2755				    idx = symb_table[2 * elem + 1];
2756				    idx += 1 + extra[idx];
2757				    break;
2758				  }
2759
2760				/* Next entry.  */
2761				elem += second;
2762			      }
2763
2764			    if (symb_table[2 * elem] == 0)
2765			      /* This is no valid character.  */
2766			      FREE_STACK_RETURN (REG_ECOLLATE);
2767
2768			    /* Throw away the ] at the end of the equivalence
2769			       class.  */
2770			    PATFETCH (c);
2771
2772			    /* Now add the multibyte character(s) we found
2773			       to the accept list.
2774
2775			       XXX Note that this is not entirely correct.
2776			       we would have to match multibyte sequences
2777			       but this is not possible with the current
2778			       implementation.  Also, we have to match
2779			       collating symbols, which expand to more than
2780			       one file, as a whole and not allow the
2781			       individual bytes.  */
2782			    c1 = extra[idx++];
2783			    if (c1 == 1)
2784			      range_start = extra[idx];
2785			    while (c1-- > 0)
2786			      {
2787				SET_LIST_BIT (extra[idx]);
2788				++idx;
2789			      }
2790			  }
2791#endif
2792			had_char_class = false;
2793		      }
2794                    else
2795                      {
2796                        c1++;
2797                        while (c1--)
2798                          PATUNFETCH;
2799                        SET_LIST_BIT ('[');
2800                        SET_LIST_BIT ('.');
2801			range_start = '.';
2802                        had_char_class = false;
2803                      }
2804		  }
2805                else
2806                  {
2807                    had_char_class = false;
2808                    SET_LIST_BIT (c);
2809		    range_start = c;
2810                  }
2811              }
2812
2813            /* Discard any (non)matching list bytes that are all 0 at the
2814               end of the map.  Decrease the map-length byte too.  */
2815            while ((int) b[-1] > 0 && b[b[-1] - 1] == 0)
2816              b[-1]--;
2817            b += b[-1];
2818          }
2819          break;
2820
2821
2822	case '(':
2823          if (syntax & RE_NO_BK_PARENS)
2824            goto handle_open;
2825          else
2826            goto normal_char;
2827
2828
2829        case ')':
2830          if (syntax & RE_NO_BK_PARENS)
2831            goto handle_close;
2832          else
2833            goto normal_char;
2834
2835
2836        case '\n':
2837          if (syntax & RE_NEWLINE_ALT)
2838            goto handle_alt;
2839          else
2840            goto normal_char;
2841
2842
2843	case '|':
2844          if (syntax & RE_NO_BK_VBAR)
2845            goto handle_alt;
2846          else
2847            goto normal_char;
2848
2849
2850        case '{':
2851           if (syntax & RE_INTERVALS && syntax & RE_NO_BK_BRACES)
2852             goto handle_interval;
2853           else
2854             goto normal_char;
2855
2856
2857        case '\\':
2858          if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
2859
2860          /* Do not translate the character after the \, so that we can
2861             distinguish, e.g., \B from \b, even if we normally would
2862             translate, e.g., B to b.  */
2863          PATFETCH_RAW (c);
2864
2865          switch (c)
2866            {
2867            case '(':
2868              if (syntax & RE_NO_BK_PARENS)
2869                goto normal_backslash;
2870
2871            handle_open:
2872              bufp->re_nsub++;
2873              regnum++;
2874
2875              if (COMPILE_STACK_FULL)
2876                {
2877                  RETALLOC (compile_stack.stack, compile_stack.size << 1,
2878                            compile_stack_elt_t);
2879                  if (compile_stack.stack == NULL) return REG_ESPACE;
2880
2881                  compile_stack.size <<= 1;
2882                }
2883
2884              /* These are the values to restore when we hit end of this
2885                 group.  They are all relative offsets, so that if the
2886                 whole pattern moves because of realloc, they will still
2887                 be valid.  */
2888              COMPILE_STACK_TOP.begalt_offset = begalt - bufp->buffer;
2889              COMPILE_STACK_TOP.fixup_alt_jump
2890                = fixup_alt_jump ? fixup_alt_jump - bufp->buffer + 1 : 0;
2891              COMPILE_STACK_TOP.laststart_offset = b - bufp->buffer;
2892              COMPILE_STACK_TOP.regnum = regnum;
2893
2894              /* We will eventually replace the 0 with the number of
2895                 groups inner to this one.  But do not push a
2896                 start_memory for groups beyond the last one we can
2897                 represent in the compiled pattern.  */
2898              if (regnum <= MAX_REGNUM)
2899                {
2900                  COMPILE_STACK_TOP.inner_group_offset = b - bufp->buffer + 2;
2901                  BUF_PUSH_3 (start_memory, regnum, 0);
2902                }
2903
2904              compile_stack.avail++;
2905
2906              fixup_alt_jump = 0;
2907              laststart = 0;
2908              begalt = b;
2909	      /* If we've reached MAX_REGNUM groups, then this open
2910		 won't actually generate any code, so we'll have to
2911		 clear pending_exact explicitly.  */
2912	      pending_exact = 0;
2913              break;
2914
2915
2916            case ')':
2917              if (syntax & RE_NO_BK_PARENS) goto normal_backslash;
2918
2919              if (COMPILE_STACK_EMPTY)
2920		{
2921		  if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
2922		    goto normal_backslash;
2923		  else
2924		    FREE_STACK_RETURN (REG_ERPAREN);
2925		}
2926
2927            handle_close:
2928              if (fixup_alt_jump)
2929                { /* Push a dummy failure point at the end of the
2930                     alternative for a possible future
2931                     `pop_failure_jump' to pop.  See comments at
2932                     `push_dummy_failure' in `re_match_2'.  */
2933                  BUF_PUSH (push_dummy_failure);
2934
2935                  /* We allocated space for this jump when we assigned
2936                     to `fixup_alt_jump', in the `handle_alt' case below.  */
2937                  STORE_JUMP (jump_past_alt, fixup_alt_jump, b - 1);
2938                }
2939
2940              /* See similar code for backslashed left paren above.  */
2941              if (COMPILE_STACK_EMPTY)
2942		{
2943		  if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
2944		    goto normal_char;
2945		  else
2946		    FREE_STACK_RETURN (REG_ERPAREN);
2947		}
2948
2949              /* Since we just checked for an empty stack above, this
2950                 ``can't happen''.  */
2951              assert (compile_stack.avail != 0);
2952              {
2953                /* We don't just want to restore into `regnum', because
2954                   later groups should continue to be numbered higher,
2955                   as in `(ab)c(de)' -- the second group is #2.  */
2956                regnum_t this_group_regnum;
2957
2958                compile_stack.avail--;
2959                begalt = bufp->buffer + COMPILE_STACK_TOP.begalt_offset;
2960                fixup_alt_jump
2961                  = COMPILE_STACK_TOP.fixup_alt_jump
2962                    ? bufp->buffer + COMPILE_STACK_TOP.fixup_alt_jump - 1
2963                    : 0;
2964                laststart = bufp->buffer + COMPILE_STACK_TOP.laststart_offset;
2965                this_group_regnum = COMPILE_STACK_TOP.regnum;
2966		/* If we've reached MAX_REGNUM groups, then this open
2967		   won't actually generate any code, so we'll have to
2968		   clear pending_exact explicitly.  */
2969		pending_exact = 0;
2970
2971                /* We're at the end of the group, so now we know how many
2972                   groups were inside this one.  */
2973                if (this_group_regnum <= MAX_REGNUM)
2974                  {
2975                    unsigned char *inner_group_loc
2976                      = bufp->buffer + COMPILE_STACK_TOP.inner_group_offset;
2977
2978                    *inner_group_loc = regnum - this_group_regnum;
2979                    BUF_PUSH_3 (stop_memory, this_group_regnum,
2980                                regnum - this_group_regnum);
2981                  }
2982              }
2983              break;
2984
2985
2986            case '|':					/* `\|'.  */
2987              if (syntax & RE_LIMITED_OPS || syntax & RE_NO_BK_VBAR)
2988                goto normal_backslash;
2989            handle_alt:
2990              if (syntax & RE_LIMITED_OPS)
2991                goto normal_char;
2992
2993              /* Insert before the previous alternative a jump which
2994                 jumps to this alternative if the former fails.  */
2995              GET_BUFFER_SPACE (3);
2996              INSERT_JUMP (on_failure_jump, begalt, b + 6);
2997              pending_exact = 0;
2998              b += 3;
2999
3000              /* The alternative before this one has a jump after it
3001                 which gets executed if it gets matched.  Adjust that
3002                 jump so it will jump to this alternative's analogous
3003                 jump (put in below, which in turn will jump to the next
3004                 (if any) alternative's such jump, etc.).  The last such
3005                 jump jumps to the correct final destination.  A picture:
3006                          _____ _____
3007                          |   | |   |
3008                          |   v |   v
3009                         a | b   | c
3010
3011                 If we are at `b', then fixup_alt_jump right now points to a
3012                 three-byte space after `a'.  We'll put in the jump, set
3013                 fixup_alt_jump to right after `b', and leave behind three
3014                 bytes which we'll fill in when we get to after `c'.  */
3015
3016              if (fixup_alt_jump)
3017                STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
3018
3019              /* Mark and leave space for a jump after this alternative,
3020                 to be filled in later either by next alternative or
3021                 when know we're at the end of a series of alternatives.  */
3022              fixup_alt_jump = b;
3023              GET_BUFFER_SPACE (3);
3024              b += 3;
3025
3026              laststart = 0;
3027              begalt = b;
3028              break;
3029
3030
3031            case '{':
3032              /* If \{ is a literal.  */
3033              if (!(syntax & RE_INTERVALS)
3034                     /* If we're at `\{' and it's not the open-interval
3035                        operator.  */
3036		  || (syntax & RE_NO_BK_BRACES))
3037                goto normal_backslash;
3038
3039            handle_interval:
3040              {
3041                /* If got here, then the syntax allows intervals.  */
3042
3043                /* At least (most) this many matches must be made.  */
3044                int lower_bound = -1, upper_bound = -1;
3045
3046                beg_interval = p - 1;
3047
3048                if (p == pend)
3049                  {
3050                    if (!(syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES))
3051                      goto unfetch_interval;
3052                    else
3053                      FREE_STACK_RETURN (REG_EBRACE);
3054                  }
3055
3056                GET_UNSIGNED_NUMBER (lower_bound);
3057
3058                if (c == ',')
3059                  {
3060                    GET_UNSIGNED_NUMBER (upper_bound);
3061		    if ((!(syntax & RE_NO_BK_BRACES) && c != '\\')
3062			|| ((syntax & RE_NO_BK_BRACES) && c != '}'))
3063		      FREE_STACK_RETURN (REG_BADBR);
3064
3065		    if (upper_bound < 0)
3066		      upper_bound = RE_DUP_MAX;
3067                  }
3068                else
3069                  /* Interval such as `{1}' => match exactly once. */
3070                  upper_bound = lower_bound;
3071
3072                if (lower_bound < 0 || upper_bound > RE_DUP_MAX
3073                    || lower_bound > upper_bound)
3074                  {
3075                    if (!(syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES))
3076                      goto unfetch_interval;
3077                    else
3078                      FREE_STACK_RETURN (REG_BADBR);
3079                  }
3080
3081                if (!(syntax & RE_NO_BK_BRACES))
3082                  {
3083                    if (c != '\\') FREE_STACK_RETURN (REG_EBRACE);
3084
3085                    PATFETCH (c);
3086                  }
3087
3088                if (c != '}')
3089                  {
3090                    if (!(syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES))
3091                      goto unfetch_interval;
3092                    else
3093                      FREE_STACK_RETURN (REG_BADBR);
3094                  }
3095
3096                /* We just parsed a valid interval.  */
3097
3098                /* If it's invalid to have no preceding re.  */
3099                if (!laststart)
3100                  {
3101                    if (syntax & RE_CONTEXT_INVALID_OPS)
3102                      FREE_STACK_RETURN (REG_BADRPT);
3103                    else if (syntax & RE_CONTEXT_INDEP_OPS)
3104                      laststart = b;
3105                    else
3106                      goto unfetch_interval;
3107                  }
3108
3109                /* If the upper bound is zero, don't want to succeed at
3110                   all; jump from `laststart' to `b + 3', which will be
3111                   the end of the buffer after we insert the jump.  */
3112                 if (upper_bound == 0)
3113                   {
3114                     GET_BUFFER_SPACE (3);
3115                     INSERT_JUMP (jump, laststart, b + 3);
3116                     b += 3;
3117                   }
3118
3119                 /* Otherwise, we have a nontrivial interval.  When
3120                    we're all done, the pattern will look like:
3121                      set_number_at <jump count> <upper bound>
3122                      set_number_at <succeed_n count> <lower bound>
3123                      succeed_n <after jump addr> <succeed_n count>
3124                      <body of loop>
3125                      jump_n <succeed_n addr> <jump count>
3126                    (The upper bound and `jump_n' are omitted if
3127                    `upper_bound' is 1, though.)  */
3128                 else
3129                   { /* If the upper bound is > 1, we need to insert
3130                        more at the end of the loop.  */
3131                     unsigned nbytes = 10 + (upper_bound > 1) * 10;
3132
3133                     GET_BUFFER_SPACE (nbytes);
3134
3135                     /* Initialize lower bound of the `succeed_n', even
3136                        though it will be set during matching by its
3137                        attendant `set_number_at' (inserted next),
3138                        because `re_compile_fastmap' needs to know.
3139                        Jump to the `jump_n' we might insert below.  */
3140                     INSERT_JUMP2 (succeed_n, laststart,
3141                                   b + 5 + (upper_bound > 1) * 5,
3142                                   lower_bound);
3143                     b += 5;
3144
3145                     /* Code to initialize the lower bound.  Insert
3146                        before the `succeed_n'.  The `5' is the last two
3147                        bytes of this `set_number_at', plus 3 bytes of
3148                        the following `succeed_n'.  */
3149                     insert_op2 (set_number_at, laststart, 5, lower_bound, b);
3150                     b += 5;
3151
3152                     if (upper_bound > 1)
3153                       { /* More than one repetition is allowed, so
3154                            append a backward jump to the `succeed_n'
3155                            that starts this interval.
3156
3157                            When we've reached this during matching,
3158                            we'll have matched the interval once, so
3159                            jump back only `upper_bound - 1' times.  */
3160                         STORE_JUMP2 (jump_n, b, laststart + 5,
3161                                      upper_bound - 1);
3162                         b += 5;
3163
3164                         /* The location we want to set is the second
3165                            parameter of the `jump_n'; that is `b-2' as
3166                            an absolute address.  `laststart' will be
3167                            the `set_number_at' we're about to insert;
3168                            `laststart+3' the number to set, the source
3169                            for the relative address.  But we are
3170                            inserting into the middle of the pattern --
3171                            so everything is getting moved up by 5.
3172                            Conclusion: (b - 2) - (laststart + 3) + 5,
3173                            i.e., b - laststart.
3174
3175                            We insert this at the beginning of the loop
3176                            so that if we fail during matching, we'll
3177                            reinitialize the bounds.  */
3178                         insert_op2 (set_number_at, laststart, b - laststart,
3179                                     upper_bound - 1, b);
3180                         b += 5;
3181                       }
3182                   }
3183                pending_exact = 0;
3184                beg_interval = NULL;
3185              }
3186              break;
3187
3188            unfetch_interval:
3189              /* If an invalid interval, match the characters as literals.  */
3190               assert (beg_interval);
3191               p = beg_interval;
3192               beg_interval = NULL;
3193
3194               /* normal_char and normal_backslash need `c'.  */
3195               PATFETCH (c);
3196
3197               if (!(syntax & RE_NO_BK_BRACES))
3198                 {
3199                   if (p > pattern  &&  p[-1] == '\\')
3200                     goto normal_backslash;
3201                 }
3202               goto normal_char;
3203
3204#ifdef emacs
3205            /* There is no way to specify the before_dot and after_dot
3206               operators.  rms says this is ok.  --karl  */
3207            case '=':
3208              BUF_PUSH (at_dot);
3209              break;
3210
3211            case 's':
3212              laststart = b;
3213              PATFETCH (c);
3214              BUF_PUSH_2 (syntaxspec, syntax_spec_code[c]);
3215              break;
3216
3217            case 'S':
3218              laststart = b;
3219              PATFETCH (c);
3220              BUF_PUSH_2 (notsyntaxspec, syntax_spec_code[c]);
3221              break;
3222#endif /* emacs */
3223
3224
3225            case 'w':
3226	      if (syntax & RE_NO_GNU_OPS)
3227		goto normal_char;
3228              laststart = b;
3229              BUF_PUSH (wordchar);
3230              break;
3231
3232
3233            case 'W':
3234	      if (syntax & RE_NO_GNU_OPS)
3235		goto normal_char;
3236              laststart = b;
3237              BUF_PUSH (notwordchar);
3238              break;
3239
3240
3241            case '<':
3242	      if (syntax & RE_NO_GNU_OPS)
3243		goto normal_char;
3244              BUF_PUSH (wordbeg);
3245              break;
3246
3247            case '>':
3248	      if (syntax & RE_NO_GNU_OPS)
3249		goto normal_char;
3250              BUF_PUSH (wordend);
3251              break;
3252
3253            case 'b':
3254	      if (syntax & RE_NO_GNU_OPS)
3255		goto normal_char;
3256              BUF_PUSH (wordbound);
3257              break;
3258
3259            case 'B':
3260	      if (syntax & RE_NO_GNU_OPS)
3261		goto normal_char;
3262              BUF_PUSH (notwordbound);
3263              break;
3264
3265            case '`':
3266	      if (syntax & RE_NO_GNU_OPS)
3267		goto normal_char;
3268              BUF_PUSH (begbuf);
3269              break;
3270
3271            case '\'':
3272	      if (syntax & RE_NO_GNU_OPS)
3273		goto normal_char;
3274              BUF_PUSH (endbuf);
3275              break;
3276
3277            case '1': case '2': case '3': case '4': case '5':
3278            case '6': case '7': case '8': case '9':
3279              if (syntax & RE_NO_BK_REFS)
3280                goto normal_char;
3281
3282              c1 = c - '0';
3283
3284              if (c1 > regnum)
3285                FREE_STACK_RETURN (REG_ESUBREG);
3286
3287              /* Can't back reference to a subexpression if inside of it.  */
3288              if (group_in_compile_stack (compile_stack, (regnum_t) c1))
3289                goto normal_char;
3290
3291              laststart = b;
3292              BUF_PUSH_2 (duplicate, c1);
3293              break;
3294
3295
3296            case '+':
3297            case '?':
3298              if (syntax & RE_BK_PLUS_QM)
3299                goto handle_plus;
3300              else
3301                goto normal_backslash;
3302
3303            default:
3304            normal_backslash:
3305              /* You might think it would be useful for \ to mean
3306                 not to translate; but if we don't translate it
3307                 it will never match anything.  */
3308              c = TRANSLATE (c);
3309              goto normal_char;
3310            }
3311          break;
3312
3313
3314	default:
3315        /* Expects the character in `c'.  */
3316	normal_char:
3317	      /* If no exactn currently being built.  */
3318          if (!pending_exact
3319
3320              /* If last exactn not at current position.  */
3321              || pending_exact + *pending_exact + 1 != b
3322
3323              /* We have only one byte following the exactn for the count.  */
3324	      || *pending_exact == (1 << BYTEWIDTH) - 1
3325
3326              /* If followed by a repetition operator.  */
3327              || *p == '*' || *p == '^'
3328	      || ((syntax & RE_BK_PLUS_QM)
3329		  ? *p == '\\' && (p[1] == '+' || p[1] == '?')
3330		  : (*p == '+' || *p == '?'))
3331	      || ((syntax & RE_INTERVALS)
3332                  && ((syntax & RE_NO_BK_BRACES)
3333		      ? *p == '{'
3334                      : (p[0] == '\\' && p[1] == '{'))))
3335	    {
3336	      /* Start building a new exactn.  */
3337
3338              laststart = b;
3339
3340	      BUF_PUSH_2 (exactn, 0);
3341	      pending_exact = b - 1;
3342            }
3343
3344	  BUF_PUSH (c);
3345          (*pending_exact)++;
3346	  break;
3347        } /* switch (c) */
3348    } /* while p != pend */
3349
3350
3351  /* Through the pattern now.  */
3352
3353  if (fixup_alt_jump)
3354    STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
3355
3356  if (!COMPILE_STACK_EMPTY)
3357    FREE_STACK_RETURN (REG_EPAREN);
3358
3359  /* If we don't want backtracking, force success
3360     the first time we reach the end of the compiled pattern.  */
3361  if (syntax & RE_NO_POSIX_BACKTRACKING)
3362    BUF_PUSH (succeed);
3363
3364  free (compile_stack.stack);
3365
3366  /* We have succeeded; set the length of the buffer.  */
3367  bufp->used = b - bufp->buffer;
3368
3369#ifdef DEBUG
3370  if (debug)
3371    {
3372      DEBUG_PRINT1 ("\nCompiled pattern: \n");
3373      print_compiled_pattern (bufp);
3374    }
3375#endif /* DEBUG */
3376
3377#ifndef MATCH_MAY_ALLOCATE
3378  /* Initialize the failure stack to the largest possible stack.  This
3379     isn't necessary unless we're trying to avoid calling alloca in
3380     the search and match routines.  */
3381  {
3382    int num_regs = bufp->re_nsub + 1;
3383
3384    /* Since DOUBLE_FAIL_STACK refuses to double only if the current size
3385       is strictly greater than re_max_failures, the largest possible stack
3386       is 2 * re_max_failures failure points.  */
3387    if (fail_stack.size < (2 * re_max_failures * MAX_FAILURE_ITEMS))
3388      {
3389	fail_stack.size = (2 * re_max_failures * MAX_FAILURE_ITEMS);
3390
3391# ifdef emacs
3392	if (! fail_stack.stack)
3393	  fail_stack.stack
3394	    = (fail_stack_elt_t *) xmalloc (fail_stack.size
3395					    * sizeof (fail_stack_elt_t));
3396	else
3397	  fail_stack.stack
3398	    = (fail_stack_elt_t *) xrealloc (fail_stack.stack,
3399					     (fail_stack.size
3400					      * sizeof (fail_stack_elt_t)));
3401# else /* not emacs */
3402	if (! fail_stack.stack)
3403	  fail_stack.stack
3404	    = (fail_stack_elt_t *) malloc (fail_stack.size
3405					   * sizeof (fail_stack_elt_t));
3406	else
3407	  fail_stack.stack
3408	    = (fail_stack_elt_t *) realloc (fail_stack.stack,
3409					    (fail_stack.size
3410					     * sizeof (fail_stack_elt_t)));
3411# endif /* not emacs */
3412      }
3413
3414    regex_grow_registers (num_regs);
3415  }
3416#endif /* not MATCH_MAY_ALLOCATE */
3417
3418  return REG_NOERROR;
3419} /* regex_compile */
3420
3421/* Subroutines for `regex_compile'.  */
3422
3423/* Store OP at LOC followed by two-byte integer parameter ARG.  */
3424
3425static void
3426store_op1 (op, loc, arg)
3427    re_opcode_t op;
3428    unsigned char *loc;
3429    int arg;
3430{
3431  *loc = (unsigned char) op;
3432  STORE_NUMBER (loc + 1, arg);
3433}
3434
3435
3436/* Like `store_op1', but for two two-byte parameters ARG1 and ARG2.  */
3437
3438static void
3439store_op2 (op, loc, arg1, arg2)
3440    re_opcode_t op;
3441    unsigned char *loc;
3442    int arg1, arg2;
3443{
3444  *loc = (unsigned char) op;
3445  STORE_NUMBER (loc + 1, arg1);
3446  STORE_NUMBER (loc + 3, arg2);
3447}
3448
3449
3450/* Copy the bytes from LOC to END to open up three bytes of space at LOC
3451   for OP followed by two-byte integer parameter ARG.  */
3452
3453static void
3454insert_op1 (op, loc, arg, end)
3455    re_opcode_t op;
3456    unsigned char *loc;
3457    int arg;
3458    unsigned char *end;
3459{
3460  register unsigned char *pfrom = end;
3461  register unsigned char *pto = end + 3;
3462
3463  while (pfrom != loc)
3464    *--pto = *--pfrom;
3465
3466  store_op1 (op, loc, arg);
3467}
3468
3469
3470/* Like `insert_op1', but for two two-byte parameters ARG1 and ARG2.  */
3471
3472static void
3473insert_op2 (op, loc, arg1, arg2, end)
3474    re_opcode_t op;
3475    unsigned char *loc;
3476    int arg1, arg2;
3477    unsigned char *end;
3478{
3479  register unsigned char *pfrom = end;
3480  register unsigned char *pto = end + 5;
3481
3482  while (pfrom != loc)
3483    *--pto = *--pfrom;
3484
3485  store_op2 (op, loc, arg1, arg2);
3486}
3487
3488
3489/* P points to just after a ^ in PATTERN.  Return true if that ^ comes
3490   after an alternative or a begin-subexpression.  We assume there is at
3491   least one character before the ^.  */
3492
3493static boolean
3494at_begline_loc_p (pattern, p, syntax)
3495    const char *pattern, *p;
3496    reg_syntax_t syntax;
3497{
3498  const char *prev = p - 2;
3499  boolean prev_prev_backslash = prev > pattern && prev[-1] == '\\';
3500
3501  return
3502       /* After a subexpression?  */
3503       (*prev == '(' && (syntax & RE_NO_BK_PARENS || prev_prev_backslash))
3504       /* After an alternative?  */
3505    || (*prev == '|' && (syntax & RE_NO_BK_VBAR || prev_prev_backslash));
3506}
3507
3508
3509/* The dual of at_begline_loc_p.  This one is for $.  We assume there is
3510   at least one character after the $, i.e., `P < PEND'.  */
3511
3512static boolean
3513at_endline_loc_p (p, pend, syntax)
3514    const char *p, *pend;
3515    reg_syntax_t syntax;
3516{
3517  const char *next = p;
3518  boolean next_backslash = *next == '\\';
3519  const char *next_next = p + 1 < pend ? p + 1 : 0;
3520
3521  return
3522       /* Before a subexpression?  */
3523       (syntax & RE_NO_BK_PARENS ? *next == ')'
3524        : next_backslash && next_next && *next_next == ')')
3525       /* Before an alternative?  */
3526    || (syntax & RE_NO_BK_VBAR ? *next == '|'
3527        : next_backslash && next_next && *next_next == '|');
3528}
3529
3530
3531/* Returns true if REGNUM is in one of COMPILE_STACK's elements and
3532   false if it's not.  */
3533
3534static boolean
3535group_in_compile_stack (compile_stack, regnum)
3536    compile_stack_type compile_stack;
3537    regnum_t regnum;
3538{
3539  int this_element;
3540
3541  for (this_element = compile_stack.avail - 1;
3542       this_element >= 0;
3543       this_element--)
3544    if (compile_stack.stack[this_element].regnum == regnum)
3545      return true;
3546
3547  return false;
3548}
3549
3550
3551/* Read the ending character of a range (in a bracket expression) from the
3552   uncompiled pattern *P_PTR (which ends at PEND).  We assume the
3553   starting character is in `P[-2]'.  (`P[-1]' is the character `-'.)
3554   Then we set the translation of all bits between the starting and
3555   ending characters (inclusive) in the compiled pattern B.
3556
3557   Return an error code.
3558
3559   We use these short variable names so we can use the same macros as
3560   `regex_compile' itself.  */
3561
3562static reg_errcode_t
3563compile_range (range_start_char, p_ptr, pend, translate, syntax, b)
3564     unsigned int range_start_char;
3565     const char **p_ptr, *pend;
3566     RE_TRANSLATE_TYPE translate;
3567     reg_syntax_t syntax;
3568     unsigned char *b;
3569{
3570  unsigned this_char;
3571  const char *p = *p_ptr;
3572  reg_errcode_t ret;
3573#if _LIBC
3574  const unsigned char *collseq;
3575  unsigned int start_colseq;
3576  unsigned int end_colseq;
3577#else
3578  unsigned end_char;
3579#endif
3580
3581  if (p == pend)
3582    return REG_ERANGE;
3583
3584  /* Have to increment the pointer into the pattern string, so the
3585     caller isn't still at the ending character.  */
3586  (*p_ptr)++;
3587
3588  /* Report an error if the range is empty and the syntax prohibits this.  */
3589  ret = syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR;
3590
3591#if _LIBC
3592  collseq = (const unsigned char *) _NL_CURRENT (LC_COLLATE,
3593						 _NL_COLLATE_COLLSEQMB);
3594
3595  start_colseq = collseq[(unsigned char) TRANSLATE (range_start_char)];
3596  end_colseq = collseq[(unsigned char) TRANSLATE (p[0])];
3597  for (this_char = 0; this_char <= (unsigned char) -1; ++this_char)
3598    {
3599      unsigned int this_colseq = collseq[(unsigned char) TRANSLATE (this_char)];
3600
3601      if (start_colseq <= this_colseq && this_colseq <= end_colseq)
3602	{
3603	  SET_LIST_BIT (TRANSLATE (this_char));
3604	  ret = REG_NOERROR;
3605	}
3606    }
3607#else
3608  /* Here we see why `this_char' has to be larger than an `unsigned
3609     char' -- we would otherwise go into an infinite loop, since all
3610     characters <= 0xff.  */
3611  range_start_char = TRANSLATE (range_start_char);
3612  end_char = TRANSLATE (p[0]);
3613  for (this_char = range_start_char; this_char <= end_char; ++this_char)
3614    {
3615      SET_LIST_BIT (TRANSLATE (this_char));
3616      ret = REG_NOERROR;
3617    }
3618#endif
3619
3620  return ret;
3621}
3622
3623/* re_compile_fastmap computes a ``fastmap'' for the compiled pattern in
3624   BUFP.  A fastmap records which of the (1 << BYTEWIDTH) possible
3625   characters can start a string that matches the pattern.  This fastmap
3626   is used by re_search to skip quickly over impossible starting points.
3627
3628   The caller must supply the address of a (1 << BYTEWIDTH)-byte data
3629   area as BUFP->fastmap.
3630
3631   We set the `fastmap', `fastmap_accurate', and `can_be_null' fields in
3632   the pattern buffer.
3633
3634   Returns 0 if we succeed, -2 if an internal error.   */
3635
3636int
3637re_compile_fastmap (bufp)
3638     struct re_pattern_buffer *bufp;
3639{
3640  int j, k;
3641#ifdef MATCH_MAY_ALLOCATE
3642  fail_stack_type fail_stack;
3643#endif
3644#ifndef REGEX_MALLOC
3645  char *destination;
3646#endif
3647
3648  register char *fastmap = bufp->fastmap;
3649  unsigned char *pattern = bufp->buffer;
3650  unsigned char *p = pattern;
3651  register unsigned char *pend = pattern + bufp->used;
3652
3653#ifdef REL_ALLOC
3654  /* This holds the pointer to the failure stack, when
3655     it is allocated relocatably.  */
3656  fail_stack_elt_t *failure_stack_ptr;
3657#endif
3658
3659  /* Assume that each path through the pattern can be null until
3660     proven otherwise.  We set this false at the bottom of switch
3661     statement, to which we get only if a particular path doesn't
3662     match the empty string.  */
3663  boolean path_can_be_null = true;
3664
3665  /* We aren't doing a `succeed_n' to begin with.  */
3666  boolean succeed_n_p = false;
3667
3668  assert (fastmap != NULL && p != NULL);
3669
3670  INIT_FAIL_STACK ();
3671  bzero (fastmap, 1 << BYTEWIDTH);  /* Assume nothing's valid.  */
3672  bufp->fastmap_accurate = 1;	    /* It will be when we're done.  */
3673  bufp->can_be_null = 0;
3674
3675  while (1)
3676    {
3677      if (p == pend || *p == succeed)
3678	{
3679	  /* We have reached the (effective) end of pattern.  */
3680	  if (!FAIL_STACK_EMPTY ())
3681	    {
3682	      bufp->can_be_null |= path_can_be_null;
3683
3684	      /* Reset for next path.  */
3685	      path_can_be_null = true;
3686
3687	      p = fail_stack.stack[--fail_stack.avail].pointer;
3688
3689	      continue;
3690	    }
3691	  else
3692	    break;
3693	}
3694
3695      /* We should never be about to go beyond the end of the pattern.  */
3696      assert (p < pend);
3697
3698      switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++))
3699	{
3700
3701        /* I guess the idea here is to simply not bother with a fastmap
3702           if a backreference is used, since it's too hard to figure out
3703           the fastmap for the corresponding group.  Setting
3704           `can_be_null' stops `re_search_2' from using the fastmap, so
3705           that is all we do.  */
3706	case duplicate:
3707	  bufp->can_be_null = 1;
3708          goto done;
3709
3710
3711      /* Following are the cases which match a character.  These end
3712         with `break'.  */
3713
3714	case exactn:
3715          fastmap[p[1]] = 1;
3716	  break;
3717
3718
3719        case charset:
3720          for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
3721	    if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))
3722              fastmap[j] = 1;
3723	  break;
3724
3725
3726	case charset_not:
3727	  /* Chars beyond end of map must be allowed.  */
3728	  for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++)
3729            fastmap[j] = 1;
3730
3731	  for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
3732	    if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))))
3733              fastmap[j] = 1;
3734          break;
3735
3736
3737	case wordchar:
3738	  for (j = 0; j < (1 << BYTEWIDTH); j++)
3739	    if (SYNTAX (j) == Sword)
3740	      fastmap[j] = 1;
3741	  break;
3742
3743
3744	case notwordchar:
3745	  for (j = 0; j < (1 << BYTEWIDTH); j++)
3746	    if (SYNTAX (j) != Sword)
3747	      fastmap[j] = 1;
3748	  break;
3749
3750
3751        case anychar:
3752	  {
3753	    int fastmap_newline = fastmap['\n'];
3754
3755	    /* `.' matches anything ...  */
3756	    for (j = 0; j < (1 << BYTEWIDTH); j++)
3757	      fastmap[j] = 1;
3758
3759	    /* ... except perhaps newline.  */
3760	    if (!(bufp->syntax & RE_DOT_NEWLINE))
3761	      fastmap['\n'] = fastmap_newline;
3762
3763	    /* Return if we have already set `can_be_null'; if we have,
3764	       then the fastmap is irrelevant.  Something's wrong here.  */
3765	    else if (bufp->can_be_null)
3766	      goto done;
3767
3768	    /* Otherwise, have to check alternative paths.  */
3769	    break;
3770	  }
3771
3772#ifdef emacs
3773        case syntaxspec:
3774	  k = *p++;
3775	  for (j = 0; j < (1 << BYTEWIDTH); j++)
3776	    if (SYNTAX (j) == (enum syntaxcode) k)
3777	      fastmap[j] = 1;
3778	  break;
3779
3780
3781	case notsyntaxspec:
3782	  k = *p++;
3783	  for (j = 0; j < (1 << BYTEWIDTH); j++)
3784	    if (SYNTAX (j) != (enum syntaxcode) k)
3785	      fastmap[j] = 1;
3786	  break;
3787
3788
3789      /* All cases after this match the empty string.  These end with
3790         `continue'.  */
3791
3792
3793	case before_dot:
3794	case at_dot:
3795	case after_dot:
3796          continue;
3797#endif /* emacs */
3798
3799
3800        case no_op:
3801        case begline:
3802        case endline:
3803	case begbuf:
3804	case endbuf:
3805	case wordbound:
3806	case notwordbound:
3807	case wordbeg:
3808	case wordend:
3809        case push_dummy_failure:
3810          continue;
3811
3812
3813	case jump_n:
3814        case pop_failure_jump:
3815	case maybe_pop_jump:
3816	case jump:
3817        case jump_past_alt:
3818	case dummy_failure_jump:
3819          EXTRACT_NUMBER_AND_INCR (j, p);
3820	  p += j;
3821	  if (j > 0)
3822	    continue;
3823
3824          /* Jump backward implies we just went through the body of a
3825             loop and matched nothing.  Opcode jumped to should be
3826             `on_failure_jump' or `succeed_n'.  Just treat it like an
3827             ordinary jump.  For a * loop, it has pushed its failure
3828             point already; if so, discard that as redundant.  */
3829          if ((re_opcode_t) *p != on_failure_jump
3830	      && (re_opcode_t) *p != succeed_n)
3831	    continue;
3832
3833          p++;
3834          EXTRACT_NUMBER_AND_INCR (j, p);
3835          p += j;
3836
3837          /* If what's on the stack is where we are now, pop it.  */
3838          if (!FAIL_STACK_EMPTY ()
3839	      && fail_stack.stack[fail_stack.avail - 1].pointer == p)
3840            fail_stack.avail--;
3841
3842          continue;
3843
3844
3845        case on_failure_jump:
3846        case on_failure_keep_string_jump:
3847	handle_on_failure_jump:
3848          EXTRACT_NUMBER_AND_INCR (j, p);
3849
3850          /* For some patterns, e.g., `(a?)?', `p+j' here points to the
3851             end of the pattern.  We don't want to push such a point,
3852             since when we restore it above, entering the switch will
3853             increment `p' past the end of the pattern.  We don't need
3854             to push such a point since we obviously won't find any more
3855             fastmap entries beyond `pend'.  Such a pattern can match
3856             the null string, though.  */
3857          if (p + j < pend)
3858            {
3859              if (!PUSH_PATTERN_OP (p + j, fail_stack))
3860		{
3861		  RESET_FAIL_STACK ();
3862		  return -2;
3863		}
3864            }
3865          else
3866            bufp->can_be_null = 1;
3867
3868          if (succeed_n_p)
3869            {
3870              EXTRACT_NUMBER_AND_INCR (k, p);	/* Skip the n.  */
3871              succeed_n_p = false;
3872	    }
3873
3874          continue;
3875
3876
3877	case succeed_n:
3878          /* Get to the number of times to succeed.  */
3879          p += 2;
3880
3881          /* Increment p past the n for when k != 0.  */
3882          EXTRACT_NUMBER_AND_INCR (k, p);
3883          if (k == 0)
3884	    {
3885              p -= 4;
3886  	      succeed_n_p = true;  /* Spaghetti code alert.  */
3887              goto handle_on_failure_jump;
3888            }
3889          continue;
3890
3891
3892	case set_number_at:
3893          p += 4;
3894          continue;
3895
3896
3897	case start_memory:
3898        case stop_memory:
3899	  p += 2;
3900	  continue;
3901
3902
3903	default:
3904          abort (); /* We have listed all the cases.  */
3905        } /* switch *p++ */
3906
3907      /* Getting here means we have found the possible starting
3908         characters for one path of the pattern -- and that the empty
3909         string does not match.  We need not follow this path further.
3910         Instead, look at the next alternative (remembered on the
3911         stack), or quit if no more.  The test at the top of the loop
3912         does these things.  */
3913      path_can_be_null = false;
3914      p = pend;
3915    } /* while p */
3916
3917  /* Set `can_be_null' for the last path (also the first path, if the
3918     pattern is empty).  */
3919  bufp->can_be_null |= path_can_be_null;
3920
3921 done:
3922  RESET_FAIL_STACK ();
3923  return 0;
3924} /* re_compile_fastmap */
3925#ifdef _LIBC
3926weak_alias (__re_compile_fastmap, re_compile_fastmap)
3927#endif
3928
3929/* Set REGS to hold NUM_REGS registers, storing them in STARTS and
3930   ENDS.  Subsequent matches using PATTERN_BUFFER and REGS will use
3931   this memory for recording register information.  STARTS and ENDS
3932   must be allocated using the malloc library routine, and must each
3933   be at least NUM_REGS * sizeof (regoff_t) bytes long.
3934
3935   If NUM_REGS == 0, then subsequent matches should allocate their own
3936   register data.
3937
3938   Unless this function is called, the first search or match using
3939   PATTERN_BUFFER will allocate its own register data, without
3940   freeing the old data.  */
3941
3942void
3943re_set_registers (bufp, regs, num_regs, starts, ends)
3944    struct re_pattern_buffer *bufp;
3945    struct re_registers *regs;
3946    unsigned num_regs;
3947    regoff_t *starts, *ends;
3948{
3949  if (num_regs)
3950    {
3951      bufp->regs_allocated = REGS_REALLOCATE;
3952      regs->num_regs = num_regs;
3953      regs->start = starts;
3954      regs->end = ends;
3955    }
3956  else
3957    {
3958      bufp->regs_allocated = REGS_UNALLOCATED;
3959      regs->num_regs = 0;
3960      regs->start = regs->end = (regoff_t *) 0;
3961    }
3962}
3963#ifdef _LIBC
3964weak_alias (__re_set_registers, re_set_registers)
3965#endif
3966
3967/* Searching routines.  */
3968
3969/* Like re_search_2, below, but only one string is specified, and
3970   doesn't let you say where to stop matching. */
3971
3972int
3973re_search (bufp, string, size, startpos, range, regs)
3974     struct re_pattern_buffer *bufp;
3975     const char *string;
3976     int size, startpos, range;
3977     struct re_registers *regs;
3978{
3979  return re_search_2 (bufp, NULL, 0, string, size, startpos, range,
3980		      regs, size);
3981}
3982#ifdef _LIBC
3983weak_alias (__re_search, re_search)
3984#endif
3985
3986
3987/* Using the compiled pattern in BUFP->buffer, first tries to match the
3988   virtual concatenation of STRING1 and STRING2, starting first at index
3989   STARTPOS, then at STARTPOS + 1, and so on.
3990
3991   STRING1 and STRING2 have length SIZE1 and SIZE2, respectively.
3992
3993   RANGE is how far to scan while trying to match.  RANGE = 0 means try
3994   only at STARTPOS; in general, the last start tried is STARTPOS +
3995   RANGE.
3996
3997   In REGS, return the indices of the virtual concatenation of STRING1
3998   and STRING2 that matched the entire BUFP->buffer and its contained
3999   subexpressions.
4000
4001   Do not consider matching one past the index STOP in the virtual
4002   concatenation of STRING1 and STRING2.
4003
4004   We return either the position in the strings at which the match was
4005   found, -1 if no match, or -2 if error (such as failure
4006   stack overflow).  */
4007
4008int
4009re_search_2 (bufp, string1, size1, string2, size2, startpos, range, regs, stop)
4010     struct re_pattern_buffer *bufp;
4011     const char *string1, *string2;
4012     int size1, size2;
4013     int startpos;
4014     int range;
4015     struct re_registers *regs;
4016     int stop;
4017{
4018  int val;
4019  register char *fastmap = bufp->fastmap;
4020  register RE_TRANSLATE_TYPE translate = bufp->translate;
4021  int total_size = size1 + size2;
4022  int endpos = startpos + range;
4023
4024  /* Check for out-of-range STARTPOS.  */
4025  if (startpos < 0 || startpos > total_size)
4026    return -1;
4027
4028  /* Fix up RANGE if it might eventually take us outside
4029     the virtual concatenation of STRING1 and STRING2.
4030     Make sure we won't move STARTPOS below 0 or above TOTAL_SIZE.  */
4031  if (endpos < 0)
4032    range = 0 - startpos;
4033  else if (endpos > total_size)
4034    range = total_size - startpos;
4035
4036  /* If the search isn't to be a backwards one, don't waste time in a
4037     search for a pattern that must be anchored.  */
4038  if (bufp->used > 0 && range > 0
4039      && ((re_opcode_t) bufp->buffer[0] == begbuf
4040	  /* `begline' is like `begbuf' if it cannot match at newlines.  */
4041	  || ((re_opcode_t) bufp->buffer[0] == begline
4042	      && !bufp->newline_anchor)))
4043    {
4044      if (startpos > 0)
4045	return -1;
4046      else
4047	range = 1;
4048    }
4049
4050#ifdef emacs
4051  /* In a forward search for something that starts with \=.
4052     don't keep searching past point.  */
4053  if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == at_dot && range > 0)
4054    {
4055      range = PT - startpos;
4056      if (range <= 0)
4057	return -1;
4058    }
4059#endif /* emacs */
4060
4061  /* Update the fastmap now if not correct already.  */
4062  if (fastmap && !bufp->fastmap_accurate)
4063    if (re_compile_fastmap (bufp) == -2)
4064      return -2;
4065
4066  /* Loop through the string, looking for a place to start matching.  */
4067  for (;;)
4068    {
4069      /* If a fastmap is supplied, skip quickly over characters that
4070         cannot be the start of a match.  If the pattern can match the
4071         null string, however, we don't need to skip characters; we want
4072         the first null string.  */
4073      if (fastmap && startpos < total_size && !bufp->can_be_null)
4074	{
4075	  if (range > 0)	/* Searching forwards.  */
4076	    {
4077	      register const char *d;
4078	      register int lim = 0;
4079	      int irange = range;
4080
4081              if (startpos < size1 && startpos + range >= size1)
4082                lim = range - (size1 - startpos);
4083
4084	      d = (startpos >= size1 ? string2 - size1 : string1) + startpos;
4085
4086              /* Written out as an if-else to avoid testing `translate'
4087                 inside the loop.  */
4088	      if (translate)
4089                while (range > lim
4090                       && !fastmap[(unsigned char)
4091				   translate[(unsigned char) *d++]])
4092                  range--;
4093	      else
4094                while (range > lim && !fastmap[(unsigned char) *d++])
4095                  range--;
4096
4097	      startpos += irange - range;
4098	    }
4099	  else				/* Searching backwards.  */
4100	    {
4101	      register char c = (size1 == 0 || startpos >= size1
4102                                 ? string2[startpos - size1]
4103                                 : string1[startpos]);
4104
4105	      if (!fastmap[(unsigned char) TRANSLATE (c)])
4106		goto advance;
4107	    }
4108	}
4109
4110      /* If can't match the null string, and that's all we have left, fail.  */
4111      if (range >= 0 && startpos == total_size && fastmap
4112          && !bufp->can_be_null)
4113	return -1;
4114
4115      val = re_match_2_internal (bufp, string1, size1, string2, size2,
4116				 startpos, regs, stop);
4117#ifndef REGEX_MALLOC
4118# ifdef C_ALLOCA
4119      alloca (0);
4120# endif
4121#endif
4122
4123      if (val >= 0)
4124	return startpos;
4125
4126      if (val == -2)
4127	return -2;
4128
4129    advance:
4130      if (!range)
4131        break;
4132      else if (range > 0)
4133        {
4134          range--;
4135          startpos++;
4136        }
4137      else
4138        {
4139          range++;
4140          startpos--;
4141        }
4142    }
4143  return -1;
4144} /* re_search_2 */
4145#ifdef _LIBC
4146weak_alias (__re_search_2, re_search_2)
4147#endif
4148
4149/* This converts PTR, a pointer into one of the search strings `string1'
4150   and `string2' into an offset from the beginning of that string.  */
4151#define POINTER_TO_OFFSET(ptr)			\
4152  (FIRST_STRING_P (ptr)				\
4153   ? ((regoff_t) ((ptr) - string1))		\
4154   : ((regoff_t) ((ptr) - string2 + size1)))
4155
4156/* Macros for dealing with the split strings in re_match_2.  */
4157
4158#define MATCHING_IN_FIRST_STRING  (dend == end_match_1)
4159
4160/* Call before fetching a character with *d.  This switches over to
4161   string2 if necessary.  */
4162#define PREFETCH()							\
4163  while (d == dend)						    	\
4164    {									\
4165      /* End of string2 => fail.  */					\
4166      if (dend == end_match_2) 						\
4167        goto fail;							\
4168      /* End of string1 => advance to string2.  */ 			\
4169      d = string2;						        \
4170      dend = end_match_2;						\
4171    }
4172
4173
4174/* Test if at very beginning or at very end of the virtual concatenation
4175   of `string1' and `string2'.  If only one string, it's `string2'.  */
4176#define AT_STRINGS_BEG(d) ((d) == (size1 ? string1 : string2) || !size2)
4177#define AT_STRINGS_END(d) ((d) == end2)
4178
4179
4180/* Test if D points to a character which is word-constituent.  We have
4181   two special cases to check for: if past the end of string1, look at
4182   the first character in string2; and if before the beginning of
4183   string2, look at the last character in string1.  */
4184#define WORDCHAR_P(d)							\
4185  (SYNTAX ((d) == end1 ? *string2					\
4186           : (d) == string2 - 1 ? *(end1 - 1) : *(d))			\
4187   == Sword)
4188
4189/* Disabled due to a compiler bug -- see comment at case wordbound */
4190#if 0
4191/* Test if the character before D and the one at D differ with respect
4192   to being word-constituent.  */
4193#define AT_WORD_BOUNDARY(d)						\
4194  (AT_STRINGS_BEG (d) || AT_STRINGS_END (d)				\
4195   || WORDCHAR_P (d - 1) != WORDCHAR_P (d))
4196#endif
4197
4198/* Free everything we malloc.  */
4199#ifdef MATCH_MAY_ALLOCATE
4200# define FREE_VAR(var) if (var) REGEX_FREE (var); var = NULL
4201# define FREE_VARIABLES()						\
4202  do {									\
4203    REGEX_FREE_STACK (fail_stack.stack);				\
4204    FREE_VAR (regstart);						\
4205    FREE_VAR (regend);							\
4206    FREE_VAR (old_regstart);						\
4207    FREE_VAR (old_regend);						\
4208    FREE_VAR (best_regstart);						\
4209    FREE_VAR (best_regend);						\
4210    FREE_VAR (reg_info);						\
4211    FREE_VAR (reg_dummy);						\
4212    FREE_VAR (reg_info_dummy);						\
4213  } while (0)
4214#else
4215# define FREE_VARIABLES() ((void)0) /* Do nothing!  But inhibit gcc warning. */
4216#endif /* not MATCH_MAY_ALLOCATE */
4217
4218/* These values must meet several constraints.  They must not be valid
4219   register values; since we have a limit of 255 registers (because
4220   we use only one byte in the pattern for the register number), we can
4221   use numbers larger than 255.  They must differ by 1, because of
4222   NUM_FAILURE_ITEMS above.  And the value for the lowest register must
4223   be larger than the value for the highest register, so we do not try
4224   to actually save any registers when none are active.  */
4225#define NO_HIGHEST_ACTIVE_REG (1 << BYTEWIDTH)
4226#define NO_LOWEST_ACTIVE_REG (NO_HIGHEST_ACTIVE_REG + 1)
4227
4228/* Matching routines.  */
4229
4230#ifndef emacs   /* Emacs never uses this.  */
4231/* re_match is like re_match_2 except it takes only a single string.  */
4232
4233int
4234re_match (bufp, string, size, pos, regs)
4235     struct re_pattern_buffer *bufp;
4236     const char *string;
4237     int size, pos;
4238     struct re_registers *regs;
4239{
4240  int result = re_match_2_internal (bufp, NULL, 0, string, size,
4241				    pos, regs, size);
4242# ifndef REGEX_MALLOC
4243#  ifdef C_ALLOCA
4244  alloca (0);
4245#  endif
4246# endif
4247  return result;
4248}
4249# ifdef _LIBC
4250weak_alias (__re_match, re_match)
4251# endif
4252#endif /* not emacs */
4253
4254static boolean group_match_null_string_p _RE_ARGS ((unsigned char **p,
4255						    unsigned char *end,
4256						register_info_type *reg_info));
4257static boolean alt_match_null_string_p _RE_ARGS ((unsigned char *p,
4258						  unsigned char *end,
4259						register_info_type *reg_info));
4260static boolean common_op_match_null_string_p _RE_ARGS ((unsigned char **p,
4261							unsigned char *end,
4262						register_info_type *reg_info));
4263static int bcmp_translate _RE_ARGS ((const char *s1, const char *s2,
4264				     int len, char *translate));
4265
4266/* re_match_2 matches the compiled pattern in BUFP against the
4267   the (virtual) concatenation of STRING1 and STRING2 (of length SIZE1
4268   and SIZE2, respectively).  We start matching at POS, and stop
4269   matching at STOP.
4270
4271   If REGS is non-null and the `no_sub' field of BUFP is nonzero, we
4272   store offsets for the substring each group matched in REGS.  See the
4273   documentation for exactly how many groups we fill.
4274
4275   We return -1 if no match, -2 if an internal error (such as the
4276   failure stack overflowing).  Otherwise, we return the length of the
4277   matched substring.  */
4278
4279int
4280re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
4281     struct re_pattern_buffer *bufp;
4282     const char *string1, *string2;
4283     int size1, size2;
4284     int pos;
4285     struct re_registers *regs;
4286     int stop;
4287{
4288  int result = re_match_2_internal (bufp, string1, size1, string2, size2,
4289				    pos, regs, stop);
4290#ifndef REGEX_MALLOC
4291# ifdef C_ALLOCA
4292  alloca (0);
4293# endif
4294#endif
4295  return result;
4296}
4297#ifdef _LIBC
4298weak_alias (__re_match_2, re_match_2)
4299#endif
4300
4301/* This is a separate function so that we can force an alloca cleanup
4302   afterwards.  */
4303static int
4304re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
4305     struct re_pattern_buffer *bufp;
4306     const char *string1, *string2;
4307     int size1, size2;
4308     int pos;
4309     struct re_registers *regs;
4310     int stop;
4311{
4312  /* General temporaries.  */
4313  int mcnt;
4314  unsigned char *p1;
4315
4316  /* Just past the end of the corresponding string.  */
4317  const char *end1, *end2;
4318
4319  /* Pointers into string1 and string2, just past the last characters in
4320     each to consider matching.  */
4321  const char *end_match_1, *end_match_2;
4322
4323  /* Where we are in the data, and the end of the current string.  */
4324  const char *d, *dend;
4325
4326  /* Where we are in the pattern, and the end of the pattern.  */
4327  unsigned char *p = bufp->buffer;
4328  register unsigned char *pend = p + bufp->used;
4329
4330  /* Mark the opcode just after a start_memory, so we can test for an
4331     empty subpattern when we get to the stop_memory.  */
4332  unsigned char *just_past_start_mem = 0;
4333
4334  /* We use this to map every character in the string.  */
4335  RE_TRANSLATE_TYPE translate = bufp->translate;
4336
4337  /* Failure point stack.  Each place that can handle a failure further
4338     down the line pushes a failure point on this stack.  It consists of
4339     restart, regend, and reg_info for all registers corresponding to
4340     the subexpressions we're currently inside, plus the number of such
4341     registers, and, finally, two char *'s.  The first char * is where
4342     to resume scanning the pattern; the second one is where to resume
4343     scanning the strings.  If the latter is zero, the failure point is
4344     a ``dummy''; if a failure happens and the failure point is a dummy,
4345     it gets discarded and the next next one is tried.  */
4346#ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global.  */
4347  fail_stack_type fail_stack;
4348#endif
4349#ifdef DEBUG
4350  static unsigned failure_id;
4351  unsigned nfailure_points_pushed = 0, nfailure_points_popped = 0;
4352#endif
4353
4354#ifdef REL_ALLOC
4355  /* This holds the pointer to the failure stack, when
4356     it is allocated relocatably.  */
4357  fail_stack_elt_t *failure_stack_ptr;
4358#endif
4359
4360  /* We fill all the registers internally, independent of what we
4361     return, for use in backreferences.  The number here includes
4362     an element for register zero.  */
4363  size_t num_regs = bufp->re_nsub + 1;
4364
4365  /* The currently active registers.  */
4366  active_reg_t lowest_active_reg = NO_LOWEST_ACTIVE_REG;
4367  active_reg_t highest_active_reg = NO_HIGHEST_ACTIVE_REG;
4368
4369  /* Information on the contents of registers. These are pointers into
4370     the input strings; they record just what was matched (on this
4371     attempt) by a subexpression part of the pattern, that is, the
4372     regnum-th regstart pointer points to where in the pattern we began
4373     matching and the regnum-th regend points to right after where we
4374     stopped matching the regnum-th subexpression.  (The zeroth register
4375     keeps track of what the whole pattern matches.)  */
4376#ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global.  */
4377  const char **regstart, **regend;
4378#endif
4379
4380  /* If a group that's operated upon by a repetition operator fails to
4381     match anything, then the register for its start will need to be
4382     restored because it will have been set to wherever in the string we
4383     are when we last see its open-group operator.  Similarly for a
4384     register's end.  */
4385#ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global.  */
4386  const char **old_regstart, **old_regend;
4387#endif
4388
4389  /* The is_active field of reg_info helps us keep track of which (possibly
4390     nested) subexpressions we are currently in. The matched_something
4391     field of reg_info[reg_num] helps us tell whether or not we have
4392     matched any of the pattern so far this time through the reg_num-th
4393     subexpression.  These two fields get reset each time through any
4394     loop their register is in.  */
4395#ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global.  */
4396  register_info_type *reg_info;
4397#endif
4398
4399  /* The following record the register info as found in the above
4400     variables when we find a match better than any we've seen before.
4401     This happens as we backtrack through the failure points, which in
4402     turn happens only if we have not yet matched the entire string. */
4403  unsigned best_regs_set = false;
4404#ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global.  */
4405  const char **best_regstart, **best_regend;
4406#endif
4407
4408  /* Logically, this is `best_regend[0]'.  But we don't want to have to
4409     allocate space for that if we're not allocating space for anything
4410     else (see below).  Also, we never need info about register 0 for
4411     any of the other register vectors, and it seems rather a kludge to
4412     treat `best_regend' differently than the rest.  So we keep track of
4413     the end of the best match so far in a separate variable.  We
4414     initialize this to NULL so that when we backtrack the first time
4415     and need to test it, it's not garbage.  */
4416  const char *match_end = NULL;
4417
4418  /* This helps SET_REGS_MATCHED avoid doing redundant work.  */
4419  int set_regs_matched_done = 0;
4420
4421  /* Used when we pop values we don't care about.  */
4422#ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global.  */
4423  const char **reg_dummy;
4424  register_info_type *reg_info_dummy;
4425#endif
4426
4427#ifdef DEBUG
4428  /* Counts the total number of registers pushed.  */
4429  unsigned num_regs_pushed = 0;
4430#endif
4431
4432  DEBUG_PRINT1 ("\n\nEntering re_match_2.\n");
4433
4434  INIT_FAIL_STACK ();
4435
4436#ifdef MATCH_MAY_ALLOCATE
4437  /* Do not bother to initialize all the register variables if there are
4438     no groups in the pattern, as it takes a fair amount of time.  If
4439     there are groups, we include space for register 0 (the whole
4440     pattern), even though we never use it, since it simplifies the
4441     array indexing.  We should fix this.  */
4442  if (bufp->re_nsub)
4443    {
4444      regstart = REGEX_TALLOC (num_regs, const char *);
4445      regend = REGEX_TALLOC (num_regs, const char *);
4446      old_regstart = REGEX_TALLOC (num_regs, const char *);
4447      old_regend = REGEX_TALLOC (num_regs, const char *);
4448      best_regstart = REGEX_TALLOC (num_regs, const char *);
4449      best_regend = REGEX_TALLOC (num_regs, const char *);
4450      reg_info = REGEX_TALLOC (num_regs, register_info_type);
4451      reg_dummy = REGEX_TALLOC (num_regs, const char *);
4452      reg_info_dummy = REGEX_TALLOC (num_regs, register_info_type);
4453
4454      if (!(regstart && regend && old_regstart && old_regend && reg_info
4455            && best_regstart && best_regend && reg_dummy && reg_info_dummy))
4456        {
4457          FREE_VARIABLES ();
4458          return -2;
4459        }
4460    }
4461  else
4462    {
4463      /* We must initialize all our variables to NULL, so that
4464         `FREE_VARIABLES' doesn't try to free them.  */
4465      regstart = regend = old_regstart = old_regend = best_regstart
4466        = best_regend = reg_dummy = NULL;
4467      reg_info = reg_info_dummy = (register_info_type *) NULL;
4468    }
4469#endif /* MATCH_MAY_ALLOCATE */
4470
4471  /* The starting position is bogus.  */
4472  if (pos < 0 || pos > size1 + size2)
4473    {
4474      FREE_VARIABLES ();
4475      return -1;
4476    }
4477
4478  /* Initialize subexpression text positions to -1 to mark ones that no
4479     start_memory/stop_memory has been seen for. Also initialize the
4480     register information struct.  */
4481  for (mcnt = 1; (unsigned) mcnt < num_regs; mcnt++)
4482    {
4483      regstart[mcnt] = regend[mcnt]
4484        = old_regstart[mcnt] = old_regend[mcnt] = REG_UNSET_VALUE;
4485
4486      REG_MATCH_NULL_STRING_P (reg_info[mcnt]) = MATCH_NULL_UNSET_VALUE;
4487      IS_ACTIVE (reg_info[mcnt]) = 0;
4488      MATCHED_SOMETHING (reg_info[mcnt]) = 0;
4489      EVER_MATCHED_SOMETHING (reg_info[mcnt]) = 0;
4490    }
4491
4492  /* We move `string1' into `string2' if the latter's empty -- but not if
4493     `string1' is null.  */
4494  if (size2 == 0 && string1 != NULL)
4495    {
4496      string2 = string1;
4497      size2 = size1;
4498      string1 = 0;
4499      size1 = 0;
4500    }
4501  end1 = string1 + size1;
4502  end2 = string2 + size2;
4503
4504  /* Compute where to stop matching, within the two strings.  */
4505  if (stop <= size1)
4506    {
4507      end_match_1 = string1 + stop;
4508      end_match_2 = string2;
4509    }
4510  else
4511    {
4512      end_match_1 = end1;
4513      end_match_2 = string2 + stop - size1;
4514    }
4515
4516  /* `p' scans through the pattern as `d' scans through the data.
4517     `dend' is the end of the input string that `d' points within.  `d'
4518     is advanced into the following input string whenever necessary, but
4519     this happens before fetching; therefore, at the beginning of the
4520     loop, `d' can be pointing at the end of a string, but it cannot
4521     equal `string2'.  */
4522  if (size1 > 0 && pos <= size1)
4523    {
4524      d = string1 + pos;
4525      dend = end_match_1;
4526    }
4527  else
4528    {
4529      d = string2 + pos - size1;
4530      dend = end_match_2;
4531    }
4532
4533  DEBUG_PRINT1 ("The compiled pattern is:\n");
4534  DEBUG_PRINT_COMPILED_PATTERN (bufp, p, pend);
4535  DEBUG_PRINT1 ("The string to match is: `");
4536  DEBUG_PRINT_DOUBLE_STRING (d, string1, size1, string2, size2);
4537  DEBUG_PRINT1 ("'\n");
4538
4539  /* This loops over pattern commands.  It exits by returning from the
4540     function if the match is complete, or it drops through if the match
4541     fails at this starting point in the input data.  */
4542  for (;;)
4543    {
4544#ifdef _LIBC
4545      DEBUG_PRINT2 ("\n%p: ", p);
4546#else
4547      DEBUG_PRINT2 ("\n0x%x: ", p);
4548#endif
4549
4550      if (p == pend)
4551	{ /* End of pattern means we might have succeeded.  */
4552          DEBUG_PRINT1 ("end of pattern ... ");
4553
4554	  /* If we haven't matched the entire string, and we want the
4555             longest match, try backtracking.  */
4556          if (d != end_match_2)
4557	    {
4558	      /* 1 if this match ends in the same string (string1 or string2)
4559		 as the best previous match.  */
4560	      boolean same_str_p = (FIRST_STRING_P (match_end)
4561				    == MATCHING_IN_FIRST_STRING);
4562	      /* 1 if this match is the best seen so far.  */
4563	      boolean best_match_p;
4564
4565	      /* AIX compiler got confused when this was combined
4566		 with the previous declaration.  */
4567	      if (same_str_p)
4568		best_match_p = d > match_end;
4569	      else
4570		best_match_p = !MATCHING_IN_FIRST_STRING;
4571
4572              DEBUG_PRINT1 ("backtracking.\n");
4573
4574              if (!FAIL_STACK_EMPTY ())
4575                { /* More failure points to try.  */
4576
4577                  /* If exceeds best match so far, save it.  */
4578                  if (!best_regs_set || best_match_p)
4579                    {
4580                      best_regs_set = true;
4581                      match_end = d;
4582
4583                      DEBUG_PRINT1 ("\nSAVING match as best so far.\n");
4584
4585                      for (mcnt = 1; (unsigned) mcnt < num_regs; mcnt++)
4586                        {
4587                          best_regstart[mcnt] = regstart[mcnt];
4588                          best_regend[mcnt] = regend[mcnt];
4589                        }
4590                    }
4591                  goto fail;
4592                }
4593
4594              /* If no failure points, don't restore garbage.  And if
4595                 last match is real best match, don't restore second
4596                 best one. */
4597              else if (best_regs_set && !best_match_p)
4598                {
4599  	        restore_best_regs:
4600                  /* Restore best match.  It may happen that `dend ==
4601                     end_match_1' while the restored d is in string2.
4602                     For example, the pattern `x.*y.*z' against the
4603                     strings `x-' and `y-z-', if the two strings are
4604                     not consecutive in memory.  */
4605                  DEBUG_PRINT1 ("Restoring best registers.\n");
4606
4607                  d = match_end;
4608                  dend = ((d >= string1 && d <= end1)
4609		           ? end_match_1 : end_match_2);
4610
4611		  for (mcnt = 1; (unsigned) mcnt < num_regs; mcnt++)
4612		    {
4613		      regstart[mcnt] = best_regstart[mcnt];
4614		      regend[mcnt] = best_regend[mcnt];
4615		    }
4616                }
4617            } /* d != end_match_2 */
4618
4619	succeed_label:
4620          DEBUG_PRINT1 ("Accepting match.\n");
4621
4622          /* If caller wants register contents data back, do it.  */
4623          if (regs && !bufp->no_sub)
4624	    {
4625              /* Have the register data arrays been allocated?  */
4626              if (bufp->regs_allocated == REGS_UNALLOCATED)
4627                { /* No.  So allocate them with malloc.  We need one
4628                     extra element beyond `num_regs' for the `-1' marker
4629                     GNU code uses.  */
4630                  regs->num_regs = MAX (RE_NREGS, num_regs + 1);
4631                  regs->start = TALLOC (regs->num_regs, regoff_t);
4632                  regs->end = TALLOC (regs->num_regs, regoff_t);
4633                  if (regs->start == NULL || regs->end == NULL)
4634		    {
4635		      FREE_VARIABLES ();
4636		      return -2;
4637		    }
4638                  bufp->regs_allocated = REGS_REALLOCATE;
4639                }
4640              else if (bufp->regs_allocated == REGS_REALLOCATE)
4641                { /* Yes.  If we need more elements than were already
4642                     allocated, reallocate them.  If we need fewer, just
4643                     leave it alone.  */
4644                  if (regs->num_regs < num_regs + 1)
4645                    {
4646                      regs->num_regs = num_regs + 1;
4647                      RETALLOC (regs->start, regs->num_regs, regoff_t);
4648                      RETALLOC (regs->end, regs->num_regs, regoff_t);
4649                      if (regs->start == NULL || regs->end == NULL)
4650			{
4651			  FREE_VARIABLES ();
4652			  return -2;
4653			}
4654                    }
4655                }
4656              else
4657		{
4658		  /* These braces fend off a "empty body in an else-statement"
4659		     warning under GCC when assert expands to nothing.  */
4660		  assert (bufp->regs_allocated == REGS_FIXED);
4661		}
4662
4663              /* Convert the pointer data in `regstart' and `regend' to
4664                 indices.  Register zero has to be set differently,
4665                 since we haven't kept track of any info for it.  */
4666              if (regs->num_regs > 0)
4667                {
4668                  regs->start[0] = pos;
4669                  regs->end[0] = (MATCHING_IN_FIRST_STRING
4670				  ? ((regoff_t) (d - string1))
4671			          : ((regoff_t) (d - string2 + size1)));
4672                }
4673
4674              /* Go through the first `min (num_regs, regs->num_regs)'
4675                 registers, since that is all we initialized.  */
4676	      for (mcnt = 1; (unsigned) mcnt < MIN (num_regs, regs->num_regs);
4677		   mcnt++)
4678		{
4679                  if (REG_UNSET (regstart[mcnt]) || REG_UNSET (regend[mcnt]))
4680                    regs->start[mcnt] = regs->end[mcnt] = -1;
4681                  else
4682                    {
4683		      regs->start[mcnt]
4684			= (regoff_t) POINTER_TO_OFFSET (regstart[mcnt]);
4685                      regs->end[mcnt]
4686			= (regoff_t) POINTER_TO_OFFSET (regend[mcnt]);
4687                    }
4688		}
4689
4690              /* If the regs structure we return has more elements than
4691                 were in the pattern, set the extra elements to -1.  If
4692                 we (re)allocated the registers, this is the case,
4693                 because we always allocate enough to have at least one
4694                 -1 at the end.  */
4695              for (mcnt = num_regs; (unsigned) mcnt < regs->num_regs; mcnt++)
4696                regs->start[mcnt] = regs->end[mcnt] = -1;
4697	    } /* regs && !bufp->no_sub */
4698
4699          DEBUG_PRINT4 ("%u failure points pushed, %u popped (%u remain).\n",
4700                        nfailure_points_pushed, nfailure_points_popped,
4701                        nfailure_points_pushed - nfailure_points_popped);
4702          DEBUG_PRINT2 ("%u registers pushed.\n", num_regs_pushed);
4703
4704          mcnt = d - pos - (MATCHING_IN_FIRST_STRING
4705			    ? string1
4706			    : string2 - size1);
4707
4708          DEBUG_PRINT2 ("Returning %d from re_match_2.\n", mcnt);
4709
4710          FREE_VARIABLES ();
4711          return mcnt;
4712        }
4713
4714      /* Otherwise match next pattern command.  */
4715      switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++))
4716	{
4717        /* Ignore these.  Used to ignore the n of succeed_n's which
4718           currently have n == 0.  */
4719        case no_op:
4720          DEBUG_PRINT1 ("EXECUTING no_op.\n");
4721          break;
4722
4723	case succeed:
4724          DEBUG_PRINT1 ("EXECUTING succeed.\n");
4725	  goto succeed_label;
4726
4727        /* Match the next n pattern characters exactly.  The following
4728           byte in the pattern defines n, and the n bytes after that
4729           are the characters to match.  */
4730	case exactn:
4731	  mcnt = *p++;
4732          DEBUG_PRINT2 ("EXECUTING exactn %d.\n", mcnt);
4733
4734          /* This is written out as an if-else so we don't waste time
4735             testing `translate' inside the loop.  */
4736          if (translate)
4737	    {
4738	      do
4739		{
4740		  PREFETCH ();
4741		  if ((unsigned char) translate[(unsigned char) *d++]
4742		      != (unsigned char) *p++)
4743                    goto fail;
4744		}
4745	      while (--mcnt);
4746	    }
4747	  else
4748	    {
4749	      do
4750		{
4751		  PREFETCH ();
4752		  if (*d++ != (char) *p++) goto fail;
4753		}
4754	      while (--mcnt);
4755	    }
4756	  SET_REGS_MATCHED ();
4757          break;
4758
4759
4760        /* Match any character except possibly a newline or a null.  */
4761	case anychar:
4762          DEBUG_PRINT1 ("EXECUTING anychar.\n");
4763
4764          PREFETCH ();
4765
4766          if ((!(bufp->syntax & RE_DOT_NEWLINE) && TRANSLATE (*d) == '\n')
4767              || (bufp->syntax & RE_DOT_NOT_NULL && TRANSLATE (*d) == '\000'))
4768	    goto fail;
4769
4770          SET_REGS_MATCHED ();
4771          DEBUG_PRINT2 ("  Matched `%d'.\n", *d);
4772          d++;
4773	  break;
4774
4775
4776	case charset:
4777	case charset_not:
4778	  {
4779	    register unsigned char c;
4780	    boolean not = (re_opcode_t) *(p - 1) == charset_not;
4781
4782            DEBUG_PRINT2 ("EXECUTING charset%s.\n", not ? "_not" : "");
4783
4784	    PREFETCH ();
4785	    c = TRANSLATE (*d); /* The character to match.  */
4786
4787            /* Cast to `unsigned' instead of `unsigned char' in case the
4788               bit list is a full 32 bytes long.  */
4789	    if (c < (unsigned) (*p * BYTEWIDTH)
4790		&& p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
4791	      not = !not;
4792
4793	    p += 1 + *p;
4794
4795	    if (!not) goto fail;
4796
4797	    SET_REGS_MATCHED ();
4798            d++;
4799	    break;
4800	  }
4801
4802
4803        /* The beginning of a group is represented by start_memory.
4804           The arguments are the register number in the next byte, and the
4805           number of groups inner to this one in the next.  The text
4806           matched within the group is recorded (in the internal
4807           registers data structure) under the register number.  */
4808        case start_memory:
4809	  DEBUG_PRINT3 ("EXECUTING start_memory %d (%d):\n", *p, p[1]);
4810
4811          /* Find out if this group can match the empty string.  */
4812	  p1 = p;		/* To send to group_match_null_string_p.  */
4813
4814          if (REG_MATCH_NULL_STRING_P (reg_info[*p]) == MATCH_NULL_UNSET_VALUE)
4815            REG_MATCH_NULL_STRING_P (reg_info[*p])
4816              = group_match_null_string_p (&p1, pend, reg_info);
4817
4818          /* Save the position in the string where we were the last time
4819             we were at this open-group operator in case the group is
4820             operated upon by a repetition operator, e.g., with `(a*)*b'
4821             against `ab'; then we want to ignore where we are now in
4822             the string in case this attempt to match fails.  */
4823          old_regstart[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p])
4824                             ? REG_UNSET (regstart[*p]) ? d : regstart[*p]
4825                             : regstart[*p];
4826	  DEBUG_PRINT2 ("  old_regstart: %d\n",
4827			 POINTER_TO_OFFSET (old_regstart[*p]));
4828
4829          regstart[*p] = d;
4830	  DEBUG_PRINT2 ("  regstart: %d\n", POINTER_TO_OFFSET (regstart[*p]));
4831
4832          IS_ACTIVE (reg_info[*p]) = 1;
4833          MATCHED_SOMETHING (reg_info[*p]) = 0;
4834
4835	  /* Clear this whenever we change the register activity status.  */
4836	  set_regs_matched_done = 0;
4837
4838          /* This is the new highest active register.  */
4839          highest_active_reg = *p;
4840
4841          /* If nothing was active before, this is the new lowest active
4842             register.  */
4843          if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
4844            lowest_active_reg = *p;
4845
4846          /* Move past the register number and inner group count.  */
4847          p += 2;
4848	  just_past_start_mem = p;
4849
4850          break;
4851
4852
4853        /* The stop_memory opcode represents the end of a group.  Its
4854           arguments are the same as start_memory's: the register
4855           number, and the number of inner groups.  */
4856	case stop_memory:
4857	  DEBUG_PRINT3 ("EXECUTING stop_memory %d (%d):\n", *p, p[1]);
4858
4859          /* We need to save the string position the last time we were at
4860             this close-group operator in case the group is operated
4861             upon by a repetition operator, e.g., with `((a*)*(b*)*)*'
4862             against `aba'; then we want to ignore where we are now in
4863             the string in case this attempt to match fails.  */
4864          old_regend[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p])
4865                           ? REG_UNSET (regend[*p]) ? d : regend[*p]
4866			   : regend[*p];
4867	  DEBUG_PRINT2 ("      old_regend: %d\n",
4868			 POINTER_TO_OFFSET (old_regend[*p]));
4869
4870          regend[*p] = d;
4871	  DEBUG_PRINT2 ("      regend: %d\n", POINTER_TO_OFFSET (regend[*p]));
4872
4873          /* This register isn't active anymore.  */
4874          IS_ACTIVE (reg_info[*p]) = 0;
4875
4876	  /* Clear this whenever we change the register activity status.  */
4877	  set_regs_matched_done = 0;
4878
4879          /* If this was the only register active, nothing is active
4880             anymore.  */
4881          if (lowest_active_reg == highest_active_reg)
4882            {
4883              lowest_active_reg = NO_LOWEST_ACTIVE_REG;
4884              highest_active_reg = NO_HIGHEST_ACTIVE_REG;
4885            }
4886          else
4887            { /* We must scan for the new highest active register, since
4888                 it isn't necessarily one less than now: consider
4889                 (a(b)c(d(e)f)g).  When group 3 ends, after the f), the
4890                 new highest active register is 1.  */
4891              unsigned char r = *p - 1;
4892              while (r > 0 && !IS_ACTIVE (reg_info[r]))
4893                r--;
4894
4895              /* If we end up at register zero, that means that we saved
4896                 the registers as the result of an `on_failure_jump', not
4897                 a `start_memory', and we jumped to past the innermost
4898                 `stop_memory'.  For example, in ((.)*) we save
4899                 registers 1 and 2 as a result of the *, but when we pop
4900                 back to the second ), we are at the stop_memory 1.
4901                 Thus, nothing is active.  */
4902	      if (r == 0)
4903                {
4904                  lowest_active_reg = NO_LOWEST_ACTIVE_REG;
4905                  highest_active_reg = NO_HIGHEST_ACTIVE_REG;
4906                }
4907              else
4908                highest_active_reg = r;
4909            }
4910
4911          /* If just failed to match something this time around with a
4912             group that's operated on by a repetition operator, try to
4913             force exit from the ``loop'', and restore the register
4914             information for this group that we had before trying this
4915             last match.  */
4916          if ((!MATCHED_SOMETHING (reg_info[*p])
4917               || just_past_start_mem == p - 1)
4918	      && (p + 2) < pend)
4919            {
4920              boolean is_a_jump_n = false;
4921
4922              p1 = p + 2;
4923              mcnt = 0;
4924              switch ((re_opcode_t) *p1++)
4925                {
4926                  case jump_n:
4927		    is_a_jump_n = true;
4928                  case pop_failure_jump:
4929		  case maybe_pop_jump:
4930		  case jump:
4931		  case dummy_failure_jump:
4932                    EXTRACT_NUMBER_AND_INCR (mcnt, p1);
4933		    if (is_a_jump_n)
4934		      p1 += 2;
4935                    break;
4936
4937                  default:
4938                    /* do nothing */ ;
4939                }
4940	      p1 += mcnt;
4941
4942              /* If the next operation is a jump backwards in the pattern
4943	         to an on_failure_jump right before the start_memory
4944                 corresponding to this stop_memory, exit from the loop
4945                 by forcing a failure after pushing on the stack the
4946                 on_failure_jump's jump in the pattern, and d.  */
4947              if (mcnt < 0 && (re_opcode_t) *p1 == on_failure_jump
4948                  && (re_opcode_t) p1[3] == start_memory && p1[4] == *p)
4949		{
4950                  /* If this group ever matched anything, then restore
4951                     what its registers were before trying this last
4952                     failed match, e.g., with `(a*)*b' against `ab' for
4953                     regstart[1], and, e.g., with `((a*)*(b*)*)*'
4954                     against `aba' for regend[3].
4955
4956                     Also restore the registers for inner groups for,
4957                     e.g., `((a*)(b*))*' against `aba' (register 3 would
4958                     otherwise get trashed).  */
4959
4960                  if (EVER_MATCHED_SOMETHING (reg_info[*p]))
4961		    {
4962		      unsigned r;
4963
4964                      EVER_MATCHED_SOMETHING (reg_info[*p]) = 0;
4965
4966		      /* Restore this and inner groups' (if any) registers.  */
4967                      for (r = *p; r < (unsigned) *p + (unsigned) *(p + 1);
4968			   r++)
4969                        {
4970                          regstart[r] = old_regstart[r];
4971
4972                          /* xx why this test?  */
4973                          if (old_regend[r] >= regstart[r])
4974                            regend[r] = old_regend[r];
4975                        }
4976                    }
4977		  p1++;
4978                  EXTRACT_NUMBER_AND_INCR (mcnt, p1);
4979                  PUSH_FAILURE_POINT (p1 + mcnt, d, -2);
4980
4981                  goto fail;
4982                }
4983            }
4984
4985          /* Move past the register number and the inner group count.  */
4986          p += 2;
4987          break;
4988
4989
4990	/* \<digit> has been turned into a `duplicate' command which is
4991           followed by the numeric value of <digit> as the register number.  */
4992        case duplicate:
4993	  {
4994	    register const char *d2, *dend2;
4995	    int regno = *p++;   /* Get which register to match against.  */
4996	    DEBUG_PRINT2 ("EXECUTING duplicate %d.\n", regno);
4997
4998	    /* Can't back reference a group which we've never matched.  */
4999            if (REG_UNSET (regstart[regno]) || REG_UNSET (regend[regno]))
5000              goto fail;
5001
5002            /* Where in input to try to start matching.  */
5003            d2 = regstart[regno];
5004
5005            /* Where to stop matching; if both the place to start and
5006               the place to stop matching are in the same string, then
5007               set to the place to stop, otherwise, for now have to use
5008               the end of the first string.  */
5009
5010            dend2 = ((FIRST_STRING_P (regstart[regno])
5011		      == FIRST_STRING_P (regend[regno]))
5012		     ? regend[regno] : end_match_1);
5013	    for (;;)
5014	      {
5015		/* If necessary, advance to next segment in register
5016                   contents.  */
5017		while (d2 == dend2)
5018		  {
5019		    if (dend2 == end_match_2) break;
5020		    if (dend2 == regend[regno]) break;
5021
5022                    /* End of string1 => advance to string2. */
5023                    d2 = string2;
5024                    dend2 = regend[regno];
5025		  }
5026		/* At end of register contents => success */
5027		if (d2 == dend2) break;
5028
5029		/* If necessary, advance to next segment in data.  */
5030		PREFETCH ();
5031
5032		/* How many characters left in this segment to match.  */
5033		mcnt = dend - d;
5034
5035		/* Want how many consecutive characters we can match in
5036                   one shot, so, if necessary, adjust the count.  */
5037                if (mcnt > dend2 - d2)
5038		  mcnt = dend2 - d2;
5039
5040		/* Compare that many; failure if mismatch, else move
5041                   past them.  */
5042		if (translate
5043                    ? bcmp_translate (d, d2, mcnt, translate)
5044                    : memcmp (d, d2, mcnt))
5045		  goto fail;
5046		d += mcnt, d2 += mcnt;
5047
5048		/* Do this because we've match some characters.  */
5049		SET_REGS_MATCHED ();
5050	      }
5051	  }
5052	  break;
5053
5054
5055        /* begline matches the empty string at the beginning of the string
5056           (unless `not_bol' is set in `bufp'), and, if
5057           `newline_anchor' is set, after newlines.  */
5058	case begline:
5059          DEBUG_PRINT1 ("EXECUTING begline.\n");
5060
5061          if (AT_STRINGS_BEG (d))
5062            {
5063              if (!bufp->not_bol) break;
5064            }
5065          else if (d[-1] == '\n' && bufp->newline_anchor)
5066            {
5067              break;
5068            }
5069          /* In all other cases, we fail.  */
5070          goto fail;
5071
5072
5073        /* endline is the dual of begline.  */
5074	case endline:
5075          DEBUG_PRINT1 ("EXECUTING endline.\n");
5076
5077          if (AT_STRINGS_END (d))
5078            {
5079              if (!bufp->not_eol) break;
5080            }
5081
5082          /* We have to ``prefetch'' the next character.  */
5083          else if ((d == end1 ? *string2 : *d) == '\n'
5084                   && bufp->newline_anchor)
5085            {
5086              break;
5087            }
5088          goto fail;
5089
5090
5091	/* Match at the very beginning of the data.  */
5092        case begbuf:
5093          DEBUG_PRINT1 ("EXECUTING begbuf.\n");
5094          if (AT_STRINGS_BEG (d))
5095            break;
5096          goto fail;
5097
5098
5099	/* Match at the very end of the data.  */
5100        case endbuf:
5101          DEBUG_PRINT1 ("EXECUTING endbuf.\n");
5102	  if (AT_STRINGS_END (d))
5103	    break;
5104          goto fail;
5105
5106
5107        /* on_failure_keep_string_jump is used to optimize `.*\n'.  It
5108           pushes NULL as the value for the string on the stack.  Then
5109           `pop_failure_point' will keep the current value for the
5110           string, instead of restoring it.  To see why, consider
5111           matching `foo\nbar' against `.*\n'.  The .* matches the foo;
5112           then the . fails against the \n.  But the next thing we want
5113           to do is match the \n against the \n; if we restored the
5114           string value, we would be back at the foo.
5115
5116           Because this is used only in specific cases, we don't need to
5117           check all the things that `on_failure_jump' does, to make
5118           sure the right things get saved on the stack.  Hence we don't
5119           share its code.  The only reason to push anything on the
5120           stack at all is that otherwise we would have to change
5121           `anychar's code to do something besides goto fail in this
5122           case; that seems worse than this.  */
5123        case on_failure_keep_string_jump:
5124          DEBUG_PRINT1 ("EXECUTING on_failure_keep_string_jump");
5125
5126          EXTRACT_NUMBER_AND_INCR (mcnt, p);
5127#ifdef _LIBC
5128          DEBUG_PRINT3 (" %d (to %p):\n", mcnt, p + mcnt);
5129#else
5130          DEBUG_PRINT3 (" %d (to 0x%x):\n", mcnt, p + mcnt);
5131#endif
5132
5133          PUSH_FAILURE_POINT (p + mcnt, NULL, -2);
5134          break;
5135
5136
5137	/* Uses of on_failure_jump:
5138
5139           Each alternative starts with an on_failure_jump that points
5140           to the beginning of the next alternative.  Each alternative
5141           except the last ends with a jump that in effect jumps past
5142           the rest of the alternatives.  (They really jump to the
5143           ending jump of the following alternative, because tensioning
5144           these jumps is a hassle.)
5145
5146           Repeats start with an on_failure_jump that points past both
5147           the repetition text and either the following jump or
5148           pop_failure_jump back to this on_failure_jump.  */
5149	case on_failure_jump:
5150        on_failure:
5151          DEBUG_PRINT1 ("EXECUTING on_failure_jump");
5152
5153          EXTRACT_NUMBER_AND_INCR (mcnt, p);
5154#ifdef _LIBC
5155          DEBUG_PRINT3 (" %d (to %p)", mcnt, p + mcnt);
5156#else
5157          DEBUG_PRINT3 (" %d (to 0x%x)", mcnt, p + mcnt);
5158#endif
5159
5160          /* If this on_failure_jump comes right before a group (i.e.,
5161             the original * applied to a group), save the information
5162             for that group and all inner ones, so that if we fail back
5163             to this point, the group's information will be correct.
5164             For example, in \(a*\)*\1, we need the preceding group,
5165             and in \(zz\(a*\)b*\)\2, we need the inner group.  */
5166
5167          /* We can't use `p' to check ahead because we push
5168             a failure point to `p + mcnt' after we do this.  */
5169          p1 = p;
5170
5171          /* We need to skip no_op's before we look for the
5172             start_memory in case this on_failure_jump is happening as
5173             the result of a completed succeed_n, as in \(a\)\{1,3\}b\1
5174             against aba.  */
5175          while (p1 < pend && (re_opcode_t) *p1 == no_op)
5176            p1++;
5177
5178          if (p1 < pend && (re_opcode_t) *p1 == start_memory)
5179            {
5180              /* We have a new highest active register now.  This will
5181                 get reset at the start_memory we are about to get to,
5182                 but we will have saved all the registers relevant to
5183                 this repetition op, as described above.  */
5184              highest_active_reg = *(p1 + 1) + *(p1 + 2);
5185              if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
5186                lowest_active_reg = *(p1 + 1);
5187            }
5188
5189          DEBUG_PRINT1 (":\n");
5190          PUSH_FAILURE_POINT (p + mcnt, d, -2);
5191          break;
5192
5193
5194        /* A smart repeat ends with `maybe_pop_jump'.
5195	   We change it to either `pop_failure_jump' or `jump'.  */
5196        case maybe_pop_jump:
5197          EXTRACT_NUMBER_AND_INCR (mcnt, p);
5198          DEBUG_PRINT2 ("EXECUTING maybe_pop_jump %d.\n", mcnt);
5199          {
5200	    register unsigned char *p2 = p;
5201
5202            /* Compare the beginning of the repeat with what in the
5203               pattern follows its end. If we can establish that there
5204               is nothing that they would both match, i.e., that we
5205               would have to backtrack because of (as in, e.g., `a*a')
5206               then we can change to pop_failure_jump, because we'll
5207               never have to backtrack.
5208
5209               This is not true in the case of alternatives: in
5210               `(a|ab)*' we do need to backtrack to the `ab' alternative
5211               (e.g., if the string was `ab').  But instead of trying to
5212               detect that here, the alternative has put on a dummy
5213               failure point which is what we will end up popping.  */
5214
5215	    /* Skip over open/close-group commands.
5216	       If what follows this loop is a ...+ construct,
5217	       look at what begins its body, since we will have to
5218	       match at least one of that.  */
5219	    while (1)
5220	      {
5221		if (p2 + 2 < pend
5222		    && ((re_opcode_t) *p2 == stop_memory
5223			|| (re_opcode_t) *p2 == start_memory))
5224		  p2 += 3;
5225		else if (p2 + 6 < pend
5226			 && (re_opcode_t) *p2 == dummy_failure_jump)
5227		  p2 += 6;
5228		else
5229		  break;
5230	      }
5231
5232	    p1 = p + mcnt;
5233	    /* p1[0] ... p1[2] are the `on_failure_jump' corresponding
5234	       to the `maybe_finalize_jump' of this case.  Examine what
5235	       follows.  */
5236
5237            /* If we're at the end of the pattern, we can change.  */
5238            if (p2 == pend)
5239	      {
5240		/* Consider what happens when matching ":\(.*\)"
5241		   against ":/".  I don't really understand this code
5242		   yet.  */
5243  	        p[-3] = (unsigned char) pop_failure_jump;
5244                DEBUG_PRINT1
5245                  ("  End of pattern: change to `pop_failure_jump'.\n");
5246              }
5247
5248            else if ((re_opcode_t) *p2 == exactn
5249		     || (bufp->newline_anchor && (re_opcode_t) *p2 == endline))
5250	      {
5251		register unsigned char c
5252                  = *p2 == (unsigned char) endline ? '\n' : p2[2];
5253
5254                if ((re_opcode_t) p1[3] == exactn && p1[5] != c)
5255                  {
5256  		    p[-3] = (unsigned char) pop_failure_jump;
5257                    DEBUG_PRINT3 ("  %c != %c => pop_failure_jump.\n",
5258                                  c, p1[5]);
5259                  }
5260
5261		else if ((re_opcode_t) p1[3] == charset
5262			 || (re_opcode_t) p1[3] == charset_not)
5263		  {
5264		    int not = (re_opcode_t) p1[3] == charset_not;
5265
5266		    if (c < (unsigned char) (p1[4] * BYTEWIDTH)
5267			&& p1[5 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
5268		      not = !not;
5269
5270                    /* `not' is equal to 1 if c would match, which means
5271                        that we can't change to pop_failure_jump.  */
5272		    if (!not)
5273                      {
5274  		        p[-3] = (unsigned char) pop_failure_jump;
5275                        DEBUG_PRINT1 ("  No match => pop_failure_jump.\n");
5276                      }
5277		  }
5278	      }
5279            else if ((re_opcode_t) *p2 == charset)
5280	      {
5281		/* We win if the first character of the loop is not part
5282                   of the charset.  */
5283                if ((re_opcode_t) p1[3] == exactn
5284 		    && ! ((int) p2[1] * BYTEWIDTH > (int) p1[5]
5285 			  && (p2[2 + p1[5] / BYTEWIDTH]
5286 			      & (1 << (p1[5] % BYTEWIDTH)))))
5287		  {
5288		    p[-3] = (unsigned char) pop_failure_jump;
5289		    DEBUG_PRINT1 ("  No match => pop_failure_jump.\n");
5290                  }
5291
5292		else if ((re_opcode_t) p1[3] == charset_not)
5293		  {
5294		    int idx;
5295		    /* We win if the charset_not inside the loop
5296		       lists every character listed in the charset after.  */
5297		    for (idx = 0; idx < (int) p2[1]; idx++)
5298		      if (! (p2[2 + idx] == 0
5299			     || (idx < (int) p1[4]
5300				 && ((p2[2 + idx] & ~ p1[5 + idx]) == 0))))
5301			break;
5302
5303		    if (idx == p2[1])
5304                      {
5305  		        p[-3] = (unsigned char) pop_failure_jump;
5306                        DEBUG_PRINT1 ("  No match => pop_failure_jump.\n");
5307                      }
5308		  }
5309		else if ((re_opcode_t) p1[3] == charset)
5310		  {
5311		    int idx;
5312		    /* We win if the charset inside the loop
5313		       has no overlap with the one after the loop.  */
5314		    for (idx = 0;
5315			 idx < (int) p2[1] && idx < (int) p1[4];
5316			 idx++)
5317		      if ((p2[2 + idx] & p1[5 + idx]) != 0)
5318			break;
5319
5320		    if (idx == p2[1] || idx == p1[4])
5321                      {
5322  		        p[-3] = (unsigned char) pop_failure_jump;
5323                        DEBUG_PRINT1 ("  No match => pop_failure_jump.\n");
5324                      }
5325		  }
5326	      }
5327	  }
5328	  p -= 2;		/* Point at relative address again.  */
5329	  if ((re_opcode_t) p[-1] != pop_failure_jump)
5330	    {
5331	      p[-1] = (unsigned char) jump;
5332              DEBUG_PRINT1 ("  Match => jump.\n");
5333	      goto unconditional_jump;
5334	    }
5335        /* Note fall through.  */
5336
5337
5338	/* The end of a simple repeat has a pop_failure_jump back to
5339           its matching on_failure_jump, where the latter will push a
5340           failure point.  The pop_failure_jump takes off failure
5341           points put on by this pop_failure_jump's matching
5342           on_failure_jump; we got through the pattern to here from the
5343           matching on_failure_jump, so didn't fail.  */
5344        case pop_failure_jump:
5345          {
5346            /* We need to pass separate storage for the lowest and
5347               highest registers, even though we don't care about the
5348               actual values.  Otherwise, we will restore only one
5349               register from the stack, since lowest will == highest in
5350               `pop_failure_point'.  */
5351            active_reg_t dummy_low_reg, dummy_high_reg;
5352            unsigned char *pdummy;
5353            const char *sdummy;
5354
5355            DEBUG_PRINT1 ("EXECUTING pop_failure_jump.\n");
5356            POP_FAILURE_POINT (sdummy, pdummy,
5357                               dummy_low_reg, dummy_high_reg,
5358                               reg_dummy, reg_dummy, reg_info_dummy);
5359          }
5360	  /* Note fall through.  */
5361
5362	unconditional_jump:
5363#ifdef _LIBC
5364	  DEBUG_PRINT2 ("\n%p: ", p);
5365#else
5366	  DEBUG_PRINT2 ("\n0x%x: ", p);
5367#endif
5368          /* Note fall through.  */
5369
5370        /* Unconditionally jump (without popping any failure points).  */
5371        case jump:
5372	  EXTRACT_NUMBER_AND_INCR (mcnt, p);	/* Get the amount to jump.  */
5373          DEBUG_PRINT2 ("EXECUTING jump %d ", mcnt);
5374	  p += mcnt;				/* Do the jump.  */
5375#ifdef _LIBC
5376          DEBUG_PRINT2 ("(to %p).\n", p);
5377#else
5378          DEBUG_PRINT2 ("(to 0x%x).\n", p);
5379#endif
5380	  break;
5381
5382
5383        /* We need this opcode so we can detect where alternatives end
5384           in `group_match_null_string_p' et al.  */
5385        case jump_past_alt:
5386          DEBUG_PRINT1 ("EXECUTING jump_past_alt.\n");
5387          goto unconditional_jump;
5388
5389
5390        /* Normally, the on_failure_jump pushes a failure point, which
5391           then gets popped at pop_failure_jump.  We will end up at
5392           pop_failure_jump, also, and with a pattern of, say, `a+', we
5393           are skipping over the on_failure_jump, so we have to push
5394           something meaningless for pop_failure_jump to pop.  */
5395        case dummy_failure_jump:
5396          DEBUG_PRINT1 ("EXECUTING dummy_failure_jump.\n");
5397          /* It doesn't matter what we push for the string here.  What
5398             the code at `fail' tests is the value for the pattern.  */
5399          PUSH_FAILURE_POINT (NULL, NULL, -2);
5400          goto unconditional_jump;
5401
5402
5403        /* At the end of an alternative, we need to push a dummy failure
5404           point in case we are followed by a `pop_failure_jump', because
5405           we don't want the failure point for the alternative to be
5406           popped.  For example, matching `(a|ab)*' against `aab'
5407           requires that we match the `ab' alternative.  */
5408        case push_dummy_failure:
5409          DEBUG_PRINT1 ("EXECUTING push_dummy_failure.\n");
5410          /* See comments just above at `dummy_failure_jump' about the
5411             two zeroes.  */
5412          PUSH_FAILURE_POINT (NULL, NULL, -2);
5413          break;
5414
5415        /* Have to succeed matching what follows at least n times.
5416           After that, handle like `on_failure_jump'.  */
5417        case succeed_n:
5418          EXTRACT_NUMBER (mcnt, p + 2);
5419          DEBUG_PRINT2 ("EXECUTING succeed_n %d.\n", mcnt);
5420
5421          assert (mcnt >= 0);
5422          /* Originally, this is how many times we HAVE to succeed.  */
5423          if (mcnt > 0)
5424            {
5425               mcnt--;
5426	       p += 2;
5427               STORE_NUMBER_AND_INCR (p, mcnt);
5428#ifdef _LIBC
5429               DEBUG_PRINT3 ("  Setting %p to %d.\n", p - 2, mcnt);
5430#else
5431               DEBUG_PRINT3 ("  Setting 0x%x to %d.\n", p - 2, mcnt);
5432#endif
5433            }
5434	  else if (mcnt == 0)
5435            {
5436#ifdef _LIBC
5437              DEBUG_PRINT2 ("  Setting two bytes from %p to no_op.\n", p+2);
5438#else
5439              DEBUG_PRINT2 ("  Setting two bytes from 0x%x to no_op.\n", p+2);
5440#endif
5441	      p[2] = (unsigned char) no_op;
5442              p[3] = (unsigned char) no_op;
5443              goto on_failure;
5444            }
5445          break;
5446
5447        case jump_n:
5448          EXTRACT_NUMBER (mcnt, p + 2);
5449          DEBUG_PRINT2 ("EXECUTING jump_n %d.\n", mcnt);
5450
5451          /* Originally, this is how many times we CAN jump.  */
5452          if (mcnt)
5453            {
5454               mcnt--;
5455               STORE_NUMBER (p + 2, mcnt);
5456#ifdef _LIBC
5457               DEBUG_PRINT3 ("  Setting %p to %d.\n", p + 2, mcnt);
5458#else
5459               DEBUG_PRINT3 ("  Setting 0x%x to %d.\n", p + 2, mcnt);
5460#endif
5461	       goto unconditional_jump;
5462            }
5463          /* If don't have to jump any more, skip over the rest of command.  */
5464	  else
5465	    p += 4;
5466          break;
5467
5468	case set_number_at:
5469	  {
5470            DEBUG_PRINT1 ("EXECUTING set_number_at.\n");
5471
5472            EXTRACT_NUMBER_AND_INCR (mcnt, p);
5473            p1 = p + mcnt;
5474            EXTRACT_NUMBER_AND_INCR (mcnt, p);
5475#ifdef _LIBC
5476            DEBUG_PRINT3 ("  Setting %p to %d.\n", p1, mcnt);
5477#else
5478            DEBUG_PRINT3 ("  Setting 0x%x to %d.\n", p1, mcnt);
5479#endif
5480	    STORE_NUMBER (p1, mcnt);
5481            break;
5482          }
5483
5484#if 0
5485	/* The DEC Alpha C compiler 3.x generates incorrect code for the
5486	   test  WORDCHAR_P (d - 1) != WORDCHAR_P (d)  in the expansion of
5487	   AT_WORD_BOUNDARY, so this code is disabled.  Expanding the
5488	   macro and introducing temporary variables works around the bug.  */
5489
5490	case wordbound:
5491	  DEBUG_PRINT1 ("EXECUTING wordbound.\n");
5492	  if (AT_WORD_BOUNDARY (d))
5493	    break;
5494	  goto fail;
5495
5496	case notwordbound:
5497	  DEBUG_PRINT1 ("EXECUTING notwordbound.\n");
5498	  if (AT_WORD_BOUNDARY (d))
5499	    goto fail;
5500	  break;
5501#else
5502	case wordbound:
5503	{
5504	  boolean prevchar, thischar;
5505
5506	  DEBUG_PRINT1 ("EXECUTING wordbound.\n");
5507	  if (AT_STRINGS_BEG (d) || AT_STRINGS_END (d))
5508	    break;
5509
5510	  prevchar = WORDCHAR_P (d - 1);
5511	  thischar = WORDCHAR_P (d);
5512	  if (prevchar != thischar)
5513	    break;
5514	  goto fail;
5515	}
5516
5517      case notwordbound:
5518	{
5519	  boolean prevchar, thischar;
5520
5521	  DEBUG_PRINT1 ("EXECUTING notwordbound.\n");
5522	  if (AT_STRINGS_BEG (d) || AT_STRINGS_END (d))
5523	    goto fail;
5524
5525	  prevchar = WORDCHAR_P (d - 1);
5526	  thischar = WORDCHAR_P (d);
5527	  if (prevchar != thischar)
5528	    goto fail;
5529	  break;
5530	}
5531#endif
5532
5533	case wordbeg:
5534          DEBUG_PRINT1 ("EXECUTING wordbeg.\n");
5535	  if (WORDCHAR_P (d) && (AT_STRINGS_BEG (d) || !WORDCHAR_P (d - 1)))
5536	    break;
5537          goto fail;
5538
5539	case wordend:
5540          DEBUG_PRINT1 ("EXECUTING wordend.\n");
5541	  if (!AT_STRINGS_BEG (d) && WORDCHAR_P (d - 1)
5542              && (!WORDCHAR_P (d) || AT_STRINGS_END (d)))
5543	    break;
5544          goto fail;
5545
5546#ifdef emacs
5547  	case before_dot:
5548          DEBUG_PRINT1 ("EXECUTING before_dot.\n");
5549 	  if (PTR_CHAR_POS ((unsigned char *) d) >= point)
5550  	    goto fail;
5551  	  break;
5552
5553  	case at_dot:
5554          DEBUG_PRINT1 ("EXECUTING at_dot.\n");
5555 	  if (PTR_CHAR_POS ((unsigned char *) d) != point)
5556  	    goto fail;
5557  	  break;
5558
5559  	case after_dot:
5560          DEBUG_PRINT1 ("EXECUTING after_dot.\n");
5561          if (PTR_CHAR_POS ((unsigned char *) d) <= point)
5562  	    goto fail;
5563  	  break;
5564
5565	case syntaxspec:
5566          DEBUG_PRINT2 ("EXECUTING syntaxspec %d.\n", mcnt);
5567	  mcnt = *p++;
5568	  goto matchsyntax;
5569
5570        case wordchar:
5571          DEBUG_PRINT1 ("EXECUTING Emacs wordchar.\n");
5572	  mcnt = (int) Sword;
5573        matchsyntax:
5574	  PREFETCH ();
5575	  /* Can't use *d++ here; SYNTAX may be an unsafe macro.  */
5576	  d++;
5577	  if (SYNTAX (d[-1]) != (enum syntaxcode) mcnt)
5578	    goto fail;
5579          SET_REGS_MATCHED ();
5580	  break;
5581
5582	case notsyntaxspec:
5583          DEBUG_PRINT2 ("EXECUTING notsyntaxspec %d.\n", mcnt);
5584	  mcnt = *p++;
5585	  goto matchnotsyntax;
5586
5587        case notwordchar:
5588          DEBUG_PRINT1 ("EXECUTING Emacs notwordchar.\n");
5589	  mcnt = (int) Sword;
5590        matchnotsyntax:
5591	  PREFETCH ();
5592	  /* Can't use *d++ here; SYNTAX may be an unsafe macro.  */
5593	  d++;
5594	  if (SYNTAX (d[-1]) == (enum syntaxcode) mcnt)
5595	    goto fail;
5596	  SET_REGS_MATCHED ();
5597          break;
5598
5599#else /* not emacs */
5600	case wordchar:
5601          DEBUG_PRINT1 ("EXECUTING non-Emacs wordchar.\n");
5602	  PREFETCH ();
5603          if (!WORDCHAR_P (d))
5604            goto fail;
5605	  SET_REGS_MATCHED ();
5606          d++;
5607	  break;
5608
5609	case notwordchar:
5610          DEBUG_PRINT1 ("EXECUTING non-Emacs notwordchar.\n");
5611	  PREFETCH ();
5612	  if (WORDCHAR_P (d))
5613            goto fail;
5614          SET_REGS_MATCHED ();
5615          d++;
5616	  break;
5617#endif /* not emacs */
5618
5619        default:
5620          abort ();
5621	}
5622      continue;  /* Successfully executed one pattern command; keep going.  */
5623
5624
5625    /* We goto here if a matching operation fails. */
5626    fail:
5627      if (!FAIL_STACK_EMPTY ())
5628	{ /* A restart point is known.  Restore to that state.  */
5629          DEBUG_PRINT1 ("\nFAIL:\n");
5630          POP_FAILURE_POINT (d, p,
5631                             lowest_active_reg, highest_active_reg,
5632                             regstart, regend, reg_info);
5633
5634          /* If this failure point is a dummy, try the next one.  */
5635          if (!p)
5636	    goto fail;
5637
5638          /* If we failed to the end of the pattern, don't examine *p.  */
5639	  assert (p <= pend);
5640          if (p < pend)
5641            {
5642              boolean is_a_jump_n = false;
5643
5644              /* If failed to a backwards jump that's part of a repetition
5645                 loop, need to pop this failure point and use the next one.  */
5646              switch ((re_opcode_t) *p)
5647                {
5648                case jump_n:
5649                  is_a_jump_n = true;
5650                case maybe_pop_jump:
5651                case pop_failure_jump:
5652                case jump:
5653                  p1 = p + 1;
5654                  EXTRACT_NUMBER_AND_INCR (mcnt, p1);
5655                  p1 += mcnt;
5656
5657                  if ((is_a_jump_n && (re_opcode_t) *p1 == succeed_n)
5658                      || (!is_a_jump_n
5659                          && (re_opcode_t) *p1 == on_failure_jump))
5660                    goto fail;
5661                  break;
5662                default:
5663                  /* do nothing */ ;
5664                }
5665            }
5666
5667          if (d >= string1 && d <= end1)
5668	    dend = end_match_1;
5669        }
5670      else
5671        break;   /* Matching at this starting point really fails.  */
5672    } /* for (;;) */
5673
5674  if (best_regs_set)
5675    goto restore_best_regs;
5676
5677  FREE_VARIABLES ();
5678
5679  return -1;         			/* Failure to match.  */
5680} /* re_match_2 */
5681
5682/* Subroutine definitions for re_match_2.  */
5683
5684
5685/* We are passed P pointing to a register number after a start_memory.
5686
5687   Return true if the pattern up to the corresponding stop_memory can
5688   match the empty string, and false otherwise.
5689
5690   If we find the matching stop_memory, sets P to point to one past its number.
5691   Otherwise, sets P to an undefined byte less than or equal to END.
5692
5693   We don't handle duplicates properly (yet).  */
5694
5695static boolean
5696group_match_null_string_p (p, end, reg_info)
5697    unsigned char **p, *end;
5698    register_info_type *reg_info;
5699{
5700  int mcnt;
5701  /* Point to after the args to the start_memory.  */
5702  unsigned char *p1 = *p + 2;
5703
5704  while (p1 < end)
5705    {
5706      /* Skip over opcodes that can match nothing, and return true or
5707	 false, as appropriate, when we get to one that can't, or to the
5708         matching stop_memory.  */
5709
5710      switch ((re_opcode_t) *p1)
5711        {
5712        /* Could be either a loop or a series of alternatives.  */
5713        case on_failure_jump:
5714          p1++;
5715          EXTRACT_NUMBER_AND_INCR (mcnt, p1);
5716
5717          /* If the next operation is not a jump backwards in the
5718	     pattern.  */
5719
5720	  if (mcnt >= 0)
5721	    {
5722              /* Go through the on_failure_jumps of the alternatives,
5723                 seeing if any of the alternatives cannot match nothing.
5724                 The last alternative starts with only a jump,
5725                 whereas the rest start with on_failure_jump and end
5726                 with a jump, e.g., here is the pattern for `a|b|c':
5727
5728                 /on_failure_jump/0/6/exactn/1/a/jump_past_alt/0/6
5729                 /on_failure_jump/0/6/exactn/1/b/jump_past_alt/0/3
5730                 /exactn/1/c
5731
5732                 So, we have to first go through the first (n-1)
5733                 alternatives and then deal with the last one separately.  */
5734
5735
5736              /* Deal with the first (n-1) alternatives, which start
5737                 with an on_failure_jump (see above) that jumps to right
5738                 past a jump_past_alt.  */
5739
5740              while ((re_opcode_t) p1[mcnt-3] == jump_past_alt)
5741                {
5742                  /* `mcnt' holds how many bytes long the alternative
5743                     is, including the ending `jump_past_alt' and
5744                     its number.  */
5745
5746                  if (!alt_match_null_string_p (p1, p1 + mcnt - 3,
5747				                      reg_info))
5748                    return false;
5749
5750                  /* Move to right after this alternative, including the
5751		     jump_past_alt.  */
5752                  p1 += mcnt;
5753
5754                  /* Break if it's the beginning of an n-th alternative
5755                     that doesn't begin with an on_failure_jump.  */
5756                  if ((re_opcode_t) *p1 != on_failure_jump)
5757                    break;
5758
5759		  /* Still have to check that it's not an n-th
5760		     alternative that starts with an on_failure_jump.  */
5761		  p1++;
5762                  EXTRACT_NUMBER_AND_INCR (mcnt, p1);
5763                  if ((re_opcode_t) p1[mcnt-3] != jump_past_alt)
5764                    {
5765		      /* Get to the beginning of the n-th alternative.  */
5766                      p1 -= 3;
5767                      break;
5768                    }
5769                }
5770
5771              /* Deal with the last alternative: go back and get number
5772                 of the `jump_past_alt' just before it.  `mcnt' contains
5773                 the length of the alternative.  */
5774              EXTRACT_NUMBER (mcnt, p1 - 2);
5775
5776              if (!alt_match_null_string_p (p1, p1 + mcnt, reg_info))
5777                return false;
5778
5779              p1 += mcnt;	/* Get past the n-th alternative.  */
5780            } /* if mcnt > 0 */
5781          break;
5782
5783
5784        case stop_memory:
5785	  assert (p1[1] == **p);
5786          *p = p1 + 2;
5787          return true;
5788
5789
5790        default:
5791          if (!common_op_match_null_string_p (&p1, end, reg_info))
5792            return false;
5793        }
5794    } /* while p1 < end */
5795
5796  return false;
5797} /* group_match_null_string_p */
5798
5799
5800/* Similar to group_match_null_string_p, but doesn't deal with alternatives:
5801   It expects P to be the first byte of a single alternative and END one
5802   byte past the last. The alternative can contain groups.  */
5803
5804static boolean
5805alt_match_null_string_p (p, end, reg_info)
5806    unsigned char *p, *end;
5807    register_info_type *reg_info;
5808{
5809  int mcnt;
5810  unsigned char *p1 = p;
5811
5812  while (p1 < end)
5813    {
5814      /* Skip over opcodes that can match nothing, and break when we get
5815         to one that can't.  */
5816
5817      switch ((re_opcode_t) *p1)
5818        {
5819	/* It's a loop.  */
5820        case on_failure_jump:
5821          p1++;
5822          EXTRACT_NUMBER_AND_INCR (mcnt, p1);
5823          p1 += mcnt;
5824          break;
5825
5826	default:
5827          if (!common_op_match_null_string_p (&p1, end, reg_info))
5828            return false;
5829        }
5830    }  /* while p1 < end */
5831
5832  return true;
5833} /* alt_match_null_string_p */
5834
5835
5836/* Deals with the ops common to group_match_null_string_p and
5837   alt_match_null_string_p.
5838
5839   Sets P to one after the op and its arguments, if any.  */
5840
5841static boolean
5842common_op_match_null_string_p (p, end, reg_info)
5843    unsigned char **p, *end;
5844    register_info_type *reg_info;
5845{
5846  int mcnt;
5847  boolean ret;
5848  int reg_no;
5849  unsigned char *p1 = *p;
5850
5851  switch ((re_opcode_t) *p1++)
5852    {
5853    case no_op:
5854    case begline:
5855    case endline:
5856    case begbuf:
5857    case endbuf:
5858    case wordbeg:
5859    case wordend:
5860    case wordbound:
5861    case notwordbound:
5862#ifdef emacs
5863    case before_dot:
5864    case at_dot:
5865    case after_dot:
5866#endif
5867      break;
5868
5869    case start_memory:
5870      reg_no = *p1;
5871      assert (reg_no > 0 && reg_no <= MAX_REGNUM);
5872      ret = group_match_null_string_p (&p1, end, reg_info);
5873
5874      /* Have to set this here in case we're checking a group which
5875         contains a group and a back reference to it.  */
5876
5877      if (REG_MATCH_NULL_STRING_P (reg_info[reg_no]) == MATCH_NULL_UNSET_VALUE)
5878        REG_MATCH_NULL_STRING_P (reg_info[reg_no]) = ret;
5879
5880      if (!ret)
5881        return false;
5882      break;
5883
5884    /* If this is an optimized succeed_n for zero times, make the jump.  */
5885    case jump:
5886      EXTRACT_NUMBER_AND_INCR (mcnt, p1);
5887      if (mcnt >= 0)
5888        p1 += mcnt;
5889      else
5890        return false;
5891      break;
5892
5893    case succeed_n:
5894      /* Get to the number of times to succeed.  */
5895      p1 += 2;
5896      EXTRACT_NUMBER_AND_INCR (mcnt, p1);
5897
5898      if (mcnt == 0)
5899        {
5900          p1 -= 4;
5901          EXTRACT_NUMBER_AND_INCR (mcnt, p1);
5902          p1 += mcnt;
5903        }
5904      else
5905        return false;
5906      break;
5907
5908    case duplicate:
5909      if (!REG_MATCH_NULL_STRING_P (reg_info[*p1]))
5910        return false;
5911      break;
5912
5913    case set_number_at:
5914      p1 += 4;
5915
5916    default:
5917      /* All other opcodes mean we cannot match the empty string.  */
5918      return false;
5919  }
5920
5921  *p = p1;
5922  return true;
5923} /* common_op_match_null_string_p */
5924
5925
5926/* Return zero if TRANSLATE[S1] and TRANSLATE[S2] are identical for LEN
5927   bytes; nonzero otherwise.  */
5928
5929static int
5930bcmp_translate (s1, s2, len, translate)
5931     const char *s1, *s2;
5932     register int len;
5933     RE_TRANSLATE_TYPE translate;
5934{
5935  register const unsigned char *p1 = (const unsigned char *) s1;
5936  register const unsigned char *p2 = (const unsigned char *) s2;
5937  while (len)
5938    {
5939      if (translate[*p1++] != translate[*p2++]) return 1;
5940      len--;
5941    }
5942  return 0;
5943}
5944
5945/* Entry points for GNU code.  */
5946
5947/* re_compile_pattern is the GNU regular expression compiler: it
5948   compiles PATTERN (of length SIZE) and puts the result in BUFP.
5949   Returns 0 if the pattern was valid, otherwise an error string.
5950
5951   Assumes the `allocated' (and perhaps `buffer') and `translate' fields
5952   are set in BUFP on entry.
5953
5954   We call regex_compile to do the actual compilation.  */
5955
5956const char *
5957re_compile_pattern (pattern, length, bufp)
5958     const char *pattern;
5959     size_t length;
5960     struct re_pattern_buffer *bufp;
5961{
5962  reg_errcode_t ret;
5963
5964  /* GNU code is written to assume at least RE_NREGS registers will be set
5965     (and at least one extra will be -1).  */
5966  bufp->regs_allocated = REGS_UNALLOCATED;
5967
5968  /* And GNU code determines whether or not to get register information
5969     by passing null for the REGS argument to re_match, etc., not by
5970     setting no_sub.  */
5971  bufp->no_sub = 0;
5972
5973  /* Match anchors at newline.  */
5974  bufp->newline_anchor = 1;
5975
5976  ret = regex_compile (pattern, length, re_syntax_options, bufp);
5977
5978  if (!ret)
5979    return NULL;
5980  return gettext (re_error_msgid + re_error_msgid_idx[(int) ret]);
5981}
5982#ifdef _LIBC
5983weak_alias (__re_compile_pattern, re_compile_pattern)
5984#endif
5985
5986/* Entry points compatible with 4.2 BSD regex library.  We don't define
5987   them unless specifically requested.  */
5988
5989#if defined _REGEX_RE_COMP || defined _LIBC
5990
5991/* BSD has one and only one pattern buffer.  */
5992static struct re_pattern_buffer re_comp_buf;
5993
5994char *
5995#ifdef _LIBC
5996/* Make these definitions weak in libc, so POSIX programs can redefine
5997   these names if they don't use our functions, and still use
5998   regcomp/regexec below without link errors.  */
5999weak_function
6000#endif
6001re_comp (s)
6002    const char *s;
6003{
6004  reg_errcode_t ret;
6005
6006  if (!s)
6007    {
6008      if (!re_comp_buf.buffer)
6009	return gettext ("No previous regular expression");
6010      return 0;
6011    }
6012
6013  if (!re_comp_buf.buffer)
6014    {
6015      re_comp_buf.buffer = (unsigned char *) malloc (200);
6016      if (re_comp_buf.buffer == NULL)
6017        return (char *) gettext (re_error_msgid
6018				 + re_error_msgid_idx[(int) REG_ESPACE]);
6019      re_comp_buf.allocated = 200;
6020
6021      re_comp_buf.fastmap = (char *) malloc (1 << BYTEWIDTH);
6022      if (re_comp_buf.fastmap == NULL)
6023	return (char *) gettext (re_error_msgid
6024				 + re_error_msgid_idx[(int) REG_ESPACE]);
6025    }
6026
6027  /* Since `re_exec' always passes NULL for the `regs' argument, we
6028     don't need to initialize the pattern buffer fields which affect it.  */
6029
6030  /* Match anchors at newlines.  */
6031  re_comp_buf.newline_anchor = 1;
6032
6033  ret = regex_compile (s, strlen (s), re_syntax_options, &re_comp_buf);
6034
6035  if (!ret)
6036    return NULL;
6037
6038  /* Yes, we're discarding `const' here if !HAVE_LIBINTL.  */
6039  return (char *) gettext (re_error_msgid + re_error_msgid_idx[(int) ret]);
6040}
6041
6042
6043int
6044#ifdef _LIBC
6045weak_function
6046#endif
6047re_exec (s)
6048    const char *s;
6049{
6050  const int len = strlen (s);
6051  return
6052    0 <= re_search (&re_comp_buf, s, len, 0, len, (struct re_registers *) 0);
6053}
6054
6055#endif /* _REGEX_RE_COMP */
6056
6057/* POSIX.2 functions.  Don't define these for Emacs.  */
6058
6059#ifndef emacs
6060
6061/* regcomp takes a regular expression as a string and compiles it.
6062
6063   PREG is a regex_t *.  We do not expect any fields to be initialized,
6064   since POSIX says we shouldn't.  Thus, we set
6065
6066     `buffer' to the compiled pattern;
6067     `used' to the length of the compiled pattern;
6068     `syntax' to RE_SYNTAX_POSIX_EXTENDED if the
6069       REG_EXTENDED bit in CFLAGS is set; otherwise, to
6070       RE_SYNTAX_POSIX_BASIC;
6071     `newline_anchor' to REG_NEWLINE being set in CFLAGS;
6072     `fastmap' to an allocated space for the fastmap;
6073     `fastmap_accurate' to zero;
6074     `re_nsub' to the number of subexpressions in PATTERN.
6075
6076   PATTERN is the address of the pattern string.
6077
6078   CFLAGS is a series of bits which affect compilation.
6079
6080     If REG_EXTENDED is set, we use POSIX extended syntax; otherwise, we
6081     use POSIX basic syntax.
6082
6083     If REG_NEWLINE is set, then . and [^...] don't match newline.
6084     Also, regexec will try a match beginning after every newline.
6085
6086     If REG_ICASE is set, then we considers upper- and lowercase
6087     versions of letters to be equivalent when matching.
6088
6089     If REG_NOSUB is set, then when PREG is passed to regexec, that
6090     routine will report only success or failure, and nothing about the
6091     registers.
6092
6093   It returns 0 if it succeeds, nonzero if it doesn't.  (See regex.h for
6094   the return codes and their meanings.)  */
6095
6096int
6097regcomp (preg, pattern, cflags)
6098    regex_t *preg;
6099    const char *pattern;
6100    int cflags;
6101{
6102  reg_errcode_t ret;
6103  reg_syntax_t syntax
6104    = (cflags & REG_EXTENDED) ?
6105      RE_SYNTAX_POSIX_EXTENDED : RE_SYNTAX_POSIX_BASIC;
6106
6107  /* regex_compile will allocate the space for the compiled pattern.  */
6108  preg->buffer = 0;
6109  preg->allocated = 0;
6110  preg->used = 0;
6111
6112  /* Try to allocate space for the fastmap.  */
6113  preg->fastmap = (char *) malloc (1 << BYTEWIDTH);
6114
6115  if (cflags & REG_ICASE)
6116    {
6117      unsigned i;
6118
6119      preg->translate
6120	= (RE_TRANSLATE_TYPE) malloc (CHAR_SET_SIZE
6121				      * sizeof (*(RE_TRANSLATE_TYPE)0));
6122      if (preg->translate == NULL)
6123        return (int) REG_ESPACE;
6124
6125      /* Map uppercase characters to corresponding lowercase ones.  */
6126      for (i = 0; i < CHAR_SET_SIZE; i++)
6127        preg->translate[i] = ISUPPER (i) ? TOLOWER (i) : i;
6128    }
6129  else
6130    preg->translate = NULL;
6131
6132  /* If REG_NEWLINE is set, newlines are treated differently.  */
6133  if (cflags & REG_NEWLINE)
6134    { /* REG_NEWLINE implies neither . nor [^...] match newline.  */
6135      syntax &= ~RE_DOT_NEWLINE;
6136      syntax |= RE_HAT_LISTS_NOT_NEWLINE;
6137      /* It also changes the matching behavior.  */
6138      preg->newline_anchor = 1;
6139    }
6140  else
6141    preg->newline_anchor = 0;
6142
6143  preg->no_sub = !!(cflags & REG_NOSUB);
6144
6145  /* POSIX says a null character in the pattern terminates it, so we
6146     can use strlen here in compiling the pattern.  */
6147  ret = regex_compile (pattern, strlen (pattern), syntax, preg);
6148
6149  /* POSIX doesn't distinguish between an unmatched open-group and an
6150     unmatched close-group: both are REG_EPAREN.  */
6151  if (ret == REG_ERPAREN) ret = REG_EPAREN;
6152
6153  if (ret == REG_NOERROR && preg->fastmap)
6154    {
6155      /* Compute the fastmap now, since regexec cannot modify the pattern
6156	 buffer.  */
6157      if (re_compile_fastmap (preg) == -2)
6158	{
6159	  /* Some error occurred while computing the fastmap, just forget
6160	     about it.  */
6161	  free (preg->fastmap);
6162	  preg->fastmap = NULL;
6163	}
6164    }
6165
6166  return (int) ret;
6167}
6168#ifdef _LIBC
6169weak_alias (__regcomp, regcomp)
6170#endif
6171
6172
6173/* regexec searches for a given pattern, specified by PREG, in the
6174   string STRING.
6175
6176   If NMATCH is zero or REG_NOSUB was set in the cflags argument to
6177   `regcomp', we ignore PMATCH.  Otherwise, we assume PMATCH has at
6178   least NMATCH elements, and we set them to the offsets of the
6179   corresponding matched substrings.
6180
6181   EFLAGS specifies `execution flags' which affect matching: if
6182   REG_NOTBOL is set, then ^ does not match at the beginning of the
6183   string; if REG_NOTEOL is set, then $ does not match at the end.
6184
6185   We return 0 if we find a match and REG_NOMATCH if not.  */
6186
6187int
6188regexec (preg, string, nmatch, pmatch, eflags)
6189    const regex_t *preg;
6190    const char *string;
6191    size_t nmatch;
6192    regmatch_t pmatch[];
6193    int eflags;
6194{
6195  int ret;
6196  struct re_registers regs;
6197  regex_t private_preg;
6198  int len = strlen (string);
6199  boolean want_reg_info = !preg->no_sub && nmatch > 0;
6200
6201  private_preg = *preg;
6202
6203  private_preg.not_bol = !!(eflags & REG_NOTBOL);
6204  private_preg.not_eol = !!(eflags & REG_NOTEOL);
6205
6206  /* The user has told us exactly how many registers to return
6207     information about, via `nmatch'.  We have to pass that on to the
6208     matching routines.  */
6209  private_preg.regs_allocated = REGS_FIXED;
6210
6211  if (want_reg_info)
6212    {
6213      regs.num_regs = nmatch;
6214      regs.start = TALLOC (nmatch * 2, regoff_t);
6215      if (regs.start == NULL)
6216        return (int) REG_NOMATCH;
6217      regs.end = regs.start + nmatch;
6218    }
6219
6220  /* Perform the searching operation.  */
6221  ret = re_search (&private_preg, string, len,
6222                   /* start: */ 0, /* range: */ len,
6223                   want_reg_info ? &regs : (struct re_registers *) 0);
6224
6225  /* Copy the register information to the POSIX structure.  */
6226  if (want_reg_info)
6227    {
6228      if (ret >= 0)
6229        {
6230          unsigned r;
6231
6232          for (r = 0; r < nmatch; r++)
6233            {
6234              pmatch[r].rm_so = regs.start[r];
6235              pmatch[r].rm_eo = regs.end[r];
6236            }
6237        }
6238
6239      /* If we needed the temporary register info, free the space now.  */
6240      free (regs.start);
6241    }
6242
6243  /* We want zero return to mean success, unlike `re_search'.  */
6244  return ret >= 0 ? (int) REG_NOERROR : (int) REG_NOMATCH;
6245}
6246#ifdef _LIBC
6247weak_alias (__regexec, regexec)
6248#endif
6249
6250
6251/* Returns a message corresponding to an error code, ERRCODE, returned
6252   from either regcomp or regexec.   We don't use PREG here.  */
6253
6254size_t
6255regerror (errcode, preg, errbuf, errbuf_size)
6256    int errcode;
6257    const regex_t *preg;
6258    char *errbuf;
6259    size_t errbuf_size;
6260{
6261  const char *msg;
6262  size_t msg_size;
6263
6264  if (errcode < 0
6265      || errcode >= (int) (sizeof (re_error_msgid_idx)
6266			   / sizeof (re_error_msgid_idx[0])))
6267    /* Only error codes returned by the rest of the code should be passed
6268       to this routine.  If we are given anything else, or if other regex
6269       code generates an invalid error code, then the program has a bug.
6270       Dump core so we can fix it.  */
6271    abort ();
6272
6273  msg = gettext (re_error_msgid + re_error_msgid_idx[errcode]);
6274
6275  msg_size = strlen (msg) + 1; /* Includes the null.  */
6276
6277  if (errbuf_size != 0)
6278    {
6279      if (msg_size > errbuf_size)
6280        {
6281#if defined HAVE_MEMPCPY || defined _LIBC
6282	  *((char *) __mempcpy (errbuf, msg, errbuf_size - 1)) = '\0';
6283#else
6284          memcpy (errbuf, msg, errbuf_size - 1);
6285          errbuf[errbuf_size - 1] = 0;
6286#endif
6287        }
6288      else
6289        memcpy (errbuf, msg, msg_size);
6290    }
6291
6292  return msg_size;
6293}
6294#ifdef _LIBC
6295weak_alias (__regerror, regerror)
6296#endif
6297
6298
6299/* Free dynamically allocated space used by PREG.  */
6300
6301void
6302regfree (preg)
6303    regex_t *preg;
6304{
6305  if (preg->buffer != NULL)
6306    free (preg->buffer);
6307  preg->buffer = NULL;
6308
6309  preg->allocated = 0;
6310  preg->used = 0;
6311
6312  if (preg->fastmap != NULL)
6313    free (preg->fastmap);
6314  preg->fastmap = NULL;
6315  preg->fastmap_accurate = 0;
6316
6317  if (preg->translate != NULL)
6318    free (preg->translate);
6319  preg->translate = NULL;
6320}
6321#ifdef _LIBC
6322weak_alias (__regfree, regfree)
6323#endif
6324
6325#endif /* not emacs  */
6326#else	/* !defined(USE_LIB_REGEX) */
6327char regex_d1[] = "d"; char *regex_d2 = regex_d1;
6328#endif	/* defined(USE_LIB_REGEX) */
6329