1/* Extended regular expression matching and search library,
2   version 0.12.
3   (Implements POSIX draft P1003.2/D11.2, except for some of the
4   internationalization features.)
5
6   Copyright (C) 1993-2022 Free Software Foundation, Inc.
7   This file is part of the GNU C Library.
8
9   The GNU C Library is free software; you can redistribute it and/or
10   modify it under the terms of the GNU Lesser General Public
11   License as published by the Free Software Foundation; either
12   version 2.1 of the License, or (at your option) any later version.
13
14   The GNU C Library is distributed in the hope that it will be useful,
15   but WITHOUT ANY WARRANTY; without even the implied warranty of
16   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17   Lesser General Public License for more details.
18
19   You should have received a copy of the GNU Lesser General Public
20   License along with the GNU C Library; if not, write to the Free
21   Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22   02110-1301 USA.  */
23
24/* This file has been modified for usage in libiberty.  It includes "xregex.h"
25   instead of <regex.h>.  The "xregex.h" header file renames all external
26   routines with an "x" prefix so they do not collide with the native regex
27   routines or with other components regex routines. */
28/* AIX requires this to be the first thing in the file. */
29#if defined _AIX && !defined __GNUC__ && !defined REGEX_MALLOC
30  #pragma alloca
31#endif
32
33#if __GNUC__ >= 12
34#  pragma GCC diagnostic ignored "-Wuse-after-free"
35#endif
36
37#undef	_GNU_SOURCE
38#define _GNU_SOURCE
39
40#ifndef INSIDE_RECURSION
41# ifdef HAVE_CONFIG_H
42#  include <config.h>
43# endif
44#endif
45
46#include <ansidecl.h>
47
48#ifndef INSIDE_RECURSION
49
50# if defined STDC_HEADERS && !defined emacs
51#  include <stddef.h>
52#  define PTR_INT_TYPE ptrdiff_t
53# else
54/* We need this for `regex.h', and perhaps for the Emacs include files.  */
55#  include <sys/types.h>
56#  define PTR_INT_TYPE long
57# endif
58
59# define WIDE_CHAR_SUPPORT (HAVE_WCTYPE_H && HAVE_WCHAR_H && HAVE_BTOWC)
60
61/* For platform which support the ISO C amendement 1 functionality we
62   support user defined character classes.  */
63# if defined _LIBC || WIDE_CHAR_SUPPORT
64/* Solaris 2.5 has a bug: <wchar.h> must be included before <wctype.h>.  */
65#  include <wchar.h>
66#  include <wctype.h>
67# endif
68
69# ifdef _LIBC
70/* We have to keep the namespace clean.  */
71#  define regfree(preg) __regfree (preg)
72#  define regexec(pr, st, nm, pm, ef) __regexec (pr, st, nm, pm, ef)
73#  define regcomp(preg, pattern, cflags) __regcomp (preg, pattern, cflags)
74#  define regerror(errcode, preg, errbuf, errbuf_size) \
75	__regerror(errcode, preg, errbuf, errbuf_size)
76#  define re_set_registers(bu, re, nu, st, en) \
77	__re_set_registers (bu, re, nu, st, en)
78#  define re_match_2(bufp, string1, size1, string2, size2, pos, regs, stop) \
79	__re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
80#  define re_match(bufp, string, size, pos, regs) \
81	__re_match (bufp, string, size, pos, regs)
82#  define re_search(bufp, string, size, startpos, range, regs) \
83	__re_search (bufp, string, size, startpos, range, regs)
84#  define re_compile_pattern(pattern, length, bufp) \
85	__re_compile_pattern (pattern, length, bufp)
86#  define re_set_syntax(syntax) __re_set_syntax (syntax)
87#  define re_search_2(bufp, st1, s1, st2, s2, startpos, range, regs, stop) \
88	__re_search_2 (bufp, st1, s1, st2, s2, startpos, range, regs, stop)
89#  define re_compile_fastmap(bufp) __re_compile_fastmap (bufp)
90
91#  define btowc __btowc
92
93/* We are also using some library internals.  */
94#  include <locale/localeinfo.h>
95#  include <locale/elem-hash.h>
96#  include <langinfo.h>
97#  include <locale/coll-lookup.h>
98# endif
99
100/* This is for other GNU distributions with internationalized messages.  */
101# if (HAVE_LIBINTL_H && ENABLE_NLS) || defined _LIBC
102#  include <libintl.h>
103#  ifdef _LIBC
104#   undef gettext
105#   define gettext(msgid) __dcgettext ("libc", msgid, LC_MESSAGES)
106#  endif
107# else
108#  define gettext(msgid) (msgid)
109# endif
110
111# ifndef gettext_noop
112/* This define is so xgettext can find the internationalizable
113   strings.  */
114#  define gettext_noop(String) String
115# endif
116
117/* The `emacs' switch turns on certain matching commands
118   that make sense only in Emacs. */
119# ifdef emacs
120
121#  include "lisp.h"
122#  include "buffer.h"
123#  include "syntax.h"
124
125# else  /* not emacs */
126
127/* If we are not linking with Emacs proper,
128   we can't use the relocating allocator
129   even if config.h says that we can.  */
130#  undef REL_ALLOC
131
132#  if defined STDC_HEADERS || defined _LIBC
133#   include <stdlib.h>
134#  else
135char *malloc ();
136char *realloc ();
137#  endif
138
139/* When used in Emacs's lib-src, we need to get bzero and bcopy somehow.
140   If nothing else has been done, use the method below.  */
141#  ifdef INHIBIT_STRING_HEADER
142#   if !(defined HAVE_BZERO && defined HAVE_BCOPY)
143#    if !defined bzero && !defined bcopy
144#     undef INHIBIT_STRING_HEADER
145#    endif
146#   endif
147#  endif
148
149/* This is the normal way of making sure we have a bcopy and a bzero.
150   This is used in most programs--a few other programs avoid this
151   by defining INHIBIT_STRING_HEADER.  */
152#  ifndef INHIBIT_STRING_HEADER
153#   if defined HAVE_STRING_H || defined STDC_HEADERS || defined _LIBC
154#    include <string.h>
155#    ifndef bzero
156#     ifndef _LIBC
157#      define bzero(s, n)	((void) memset (s, '\0', n))
158#     else
159#      define bzero(s, n)	__bzero (s, n)
160#     endif
161#    endif
162#   else
163#    include <strings.h>
164#    ifndef memcmp
165#     define memcmp(s1, s2, n)	bcmp (s1, s2, n)
166#    endif
167#    ifndef memcpy
168#     define memcpy(d, s, n)	(bcopy (s, d, n), (d))
169#    endif
170#   endif
171#  endif
172
173/* Define the syntax stuff for \<, \>, etc.  */
174
175/* This must be nonzero for the wordchar and notwordchar pattern
176   commands in re_match_2.  */
177#  ifndef Sword
178#   define Sword 1
179#  endif
180
181#  ifdef SWITCH_ENUM_BUG
182#   define SWITCH_ENUM_CAST(x) ((int)(x))
183#  else
184#   define SWITCH_ENUM_CAST(x) (x)
185#  endif
186
187# endif /* not emacs */
188
189# if defined _LIBC || HAVE_LIMITS_H
190#  include <limits.h>
191# endif
192
193# ifndef MB_LEN_MAX
194#  define MB_LEN_MAX 1
195# endif
196
197/* Get the interface, including the syntax bits.  */
198# include "xregex.h"  /* change for libiberty */
199
200/* isalpha etc. are used for the character classes.  */
201# include <ctype.h>
202
203/* Jim Meyering writes:
204
205   "... Some ctype macros are valid only for character codes that
206   isascii says are ASCII (SGI's IRIX-4.0.5 is one such system --when
207   using /bin/cc or gcc but without giving an ansi option).  So, all
208   ctype uses should be through macros like ISPRINT...  If
209   STDC_HEADERS is defined, then autoconf has verified that the ctype
210   macros don't need to be guarded with references to isascii. ...
211   Defining isascii to 1 should let any compiler worth its salt
212   eliminate the && through constant folding."
213   Solaris defines some of these symbols so we must undefine them first.  */
214
215# undef ISASCII
216# if defined STDC_HEADERS || (!defined isascii && !defined HAVE_ISASCII)
217#  define ISASCII(c) 1
218# else
219#  define ISASCII(c) isascii(c)
220# endif
221
222# ifdef isblank
223#  define ISBLANK(c) (ISASCII (c) && isblank (c))
224# else
225#  define ISBLANK(c) ((c) == ' ' || (c) == '\t')
226# endif
227# ifdef isgraph
228#  define ISGRAPH(c) (ISASCII (c) && isgraph (c))
229# else
230#  define ISGRAPH(c) (ISASCII (c) && isprint (c) && !isspace (c))
231# endif
232
233# undef ISPRINT
234# define ISPRINT(c) (ISASCII (c) && isprint (c))
235# define ISDIGIT(c) (ISASCII (c) && isdigit (c))
236# define ISALNUM(c) (ISASCII (c) && isalnum (c))
237# define ISALPHA(c) (ISASCII (c) && isalpha (c))
238# define ISCNTRL(c) (ISASCII (c) && iscntrl (c))
239# define ISLOWER(c) (ISASCII (c) && islower (c))
240# define ISPUNCT(c) (ISASCII (c) && ispunct (c))
241# define ISSPACE(c) (ISASCII (c) && isspace (c))
242# define ISUPPER(c) (ISASCII (c) && isupper (c))
243# define ISXDIGIT(c) (ISASCII (c) && isxdigit (c))
244
245# ifdef _tolower
246#  define TOLOWER(c) _tolower(c)
247# else
248#  define TOLOWER(c) tolower(c)
249# endif
250
251# ifndef NULL
252#  define NULL (void *)0
253# endif
254
255/* We remove any previous definition of `SIGN_EXTEND_CHAR',
256   since ours (we hope) works properly with all combinations of
257   machines, compilers, `char' and `unsigned char' argument types.
258   (Per Bothner suggested the basic approach.)  */
259# undef SIGN_EXTEND_CHAR
260# if __STDC__
261#  define SIGN_EXTEND_CHAR(c) ((signed char) (c))
262# else  /* not __STDC__ */
263/* As in Harbison and Steele.  */
264#  define SIGN_EXTEND_CHAR(c) ((((unsigned char) (c)) ^ 128) - 128)
265# endif
266
267# ifndef emacs
268/* How many characters in the character set.  */
269#  define CHAR_SET_SIZE 256
270
271#  ifdef SYNTAX_TABLE
272
273extern char *re_syntax_table;
274
275#  else /* not SYNTAX_TABLE */
276
277static char re_syntax_table[CHAR_SET_SIZE];
278
279static void init_syntax_once (void);
280
281static void
282init_syntax_once (void)
283{
284   register int c;
285   static int done = 0;
286
287   if (done)
288     return;
289   bzero (re_syntax_table, sizeof re_syntax_table);
290
291   for (c = 0; c < CHAR_SET_SIZE; ++c)
292     if (ISALNUM (c))
293	re_syntax_table[c] = Sword;
294
295   re_syntax_table['_'] = Sword;
296
297   done = 1;
298}
299
300#  endif /* not SYNTAX_TABLE */
301
302#  define SYNTAX(c) re_syntax_table[(unsigned char) (c)]
303
304# endif /* emacs */
305
306/* Integer type for pointers.  */
307# if !defined _LIBC && !defined HAVE_UINTPTR_T
308typedef unsigned long int uintptr_t;
309# endif
310
311/* Should we use malloc or alloca?  If REGEX_MALLOC is not defined, we
312   use `alloca' instead of `malloc'.  This is because using malloc in
313   re_search* or re_match* could cause memory leaks when C-g is used in
314   Emacs; also, malloc is slower and causes storage fragmentation.  On
315   the other hand, malloc is more portable, and easier to debug.
316
317   Because we sometimes use alloca, some routines have to be macros,
318   not functions -- `alloca'-allocated space disappears at the end of the
319   function it is called in.  */
320
321# ifdef REGEX_MALLOC
322
323#  define REGEX_ALLOCATE malloc
324#  define REGEX_REALLOCATE(source, osize, nsize) realloc (source, nsize)
325#  define REGEX_FREE free
326
327# else /* not REGEX_MALLOC  */
328
329/* Emacs already defines alloca, sometimes.  */
330#  ifndef alloca
331
332/* Make alloca work the best possible way.  */
333#   ifdef __GNUC__
334#    define alloca __builtin_alloca
335#   else /* not __GNUC__ */
336#    if HAVE_ALLOCA_H
337#     include <alloca.h>
338#    endif /* HAVE_ALLOCA_H */
339#   endif /* not __GNUC__ */
340
341#  endif /* not alloca */
342
343#  define REGEX_ALLOCATE alloca
344
345/* Assumes a `char *destination' variable.  */
346#  define REGEX_REALLOCATE(source, osize, nsize)			\
347  (destination = (char *) alloca (nsize),				\
348   memcpy (destination, source, osize))
349
350/* No need to do anything to free, after alloca.  */
351#  define REGEX_FREE(arg) ((void)0) /* Do nothing!  But inhibit gcc warning.  */
352
353# endif /* not REGEX_MALLOC */
354
355/* Define how to allocate the failure stack.  */
356
357# if defined REL_ALLOC && defined REGEX_MALLOC
358
359#  define REGEX_ALLOCATE_STACK(size)				\
360  r_alloc (&failure_stack_ptr, (size))
361#  define REGEX_REALLOCATE_STACK(source, osize, nsize)		\
362  r_re_alloc (&failure_stack_ptr, (nsize))
363#  define REGEX_FREE_STACK(ptr)					\
364  r_alloc_free (&failure_stack_ptr)
365
366# else /* not using relocating allocator */
367
368#  ifdef REGEX_MALLOC
369
370#   define REGEX_ALLOCATE_STACK malloc
371#   define REGEX_REALLOCATE_STACK(source, osize, nsize) realloc (source, nsize)
372#   define REGEX_FREE_STACK free
373
374#  else /* not REGEX_MALLOC */
375
376#   define REGEX_ALLOCATE_STACK alloca
377
378#   define REGEX_REALLOCATE_STACK(source, osize, nsize)			\
379   REGEX_REALLOCATE (source, osize, nsize)
380/* No need to explicitly free anything.  */
381#   define REGEX_FREE_STACK(arg)
382
383#  endif /* not REGEX_MALLOC */
384# endif /* not using relocating allocator */
385
386
387/* True if `size1' is non-NULL and PTR is pointing anywhere inside
388   `string1' or just past its end.  This works if PTR is NULL, which is
389   a good thing.  */
390# define FIRST_STRING_P(ptr) 					\
391  (size1 && string1 <= (ptr) && (ptr) <= string1 + size1)
392
393/* (Re)Allocate N items of type T using malloc, or fail.  */
394# define TALLOC(n, t) ((t *) malloc ((n) * sizeof (t)))
395# define RETALLOC(addr, n, t) ((addr) = (t *) realloc (addr, (n) * sizeof (t)))
396# define RETALLOC_IF(addr, n, t) \
397  if (addr) RETALLOC((addr), (n), t); else (addr) = TALLOC ((n), t)
398# define REGEX_TALLOC(n, t) ((t *) REGEX_ALLOCATE ((n) * sizeof (t)))
399
400# define BYTEWIDTH 8 /* In bits.  */
401
402# define STREQ(s1, s2) ((strcmp (s1, s2) == 0))
403
404# undef MAX
405# undef MIN
406# define MAX(a, b) ((a) > (b) ? (a) : (b))
407# define MIN(a, b) ((a) < (b) ? (a) : (b))
408
409typedef char boolean;
410# define false 0
411# define true 1
412
413static reg_errcode_t byte_regex_compile (const char *pattern, size_t size,
414                                         reg_syntax_t syntax,
415                                         struct re_pattern_buffer *bufp);
416
417static int byte_re_match_2_internal (struct re_pattern_buffer *bufp,
418                                     const char *string1, int size1,
419                                     const char *string2, int size2,
420                                     int pos,
421                                     struct re_registers *regs,
422                                     int stop);
423static int byte_re_search_2 (struct re_pattern_buffer *bufp,
424                             const char *string1, int size1,
425                             const char *string2, int size2,
426                             int startpos, int range,
427                             struct re_registers *regs, int stop);
428static int byte_re_compile_fastmap (struct re_pattern_buffer *bufp);
429
430#ifdef MBS_SUPPORT
431static reg_errcode_t wcs_regex_compile (const char *pattern, size_t size,
432                                        reg_syntax_t syntax,
433                                        struct re_pattern_buffer *bufp);
434
435
436static int wcs_re_match_2_internal (struct re_pattern_buffer *bufp,
437                                    const char *cstring1, int csize1,
438                                    const char *cstring2, int csize2,
439                                    int pos,
440                                    struct re_registers *regs,
441                                    int stop,
442                                    wchar_t *string1, int size1,
443                                    wchar_t *string2, int size2,
444                                    int *mbs_offset1, int *mbs_offset2);
445static int wcs_re_search_2 (struct re_pattern_buffer *bufp,
446                            const char *string1, int size1,
447                            const char *string2, int size2,
448                            int startpos, int range,
449                            struct re_registers *regs, int stop);
450static int wcs_re_compile_fastmap (struct re_pattern_buffer *bufp);
451#endif
452
453/* These are the command codes that appear in compiled regular
454   expressions.  Some opcodes are followed by argument bytes.  A
455   command code can specify any interpretation whatsoever for its
456   arguments.  Zero bytes may appear in the compiled regular expression.  */
457
458typedef enum
459{
460  no_op = 0,
461
462  /* Succeed right away--no more backtracking.  */
463  succeed,
464
465        /* Followed by one byte giving n, then by n literal bytes.  */
466  exactn,
467
468# ifdef MBS_SUPPORT
469	/* Same as exactn, but contains binary data.  */
470  exactn_bin,
471# endif
472
473        /* Matches any (more or less) character.  */
474  anychar,
475
476        /* Matches any one char belonging to specified set.  First
477           following byte is number of bitmap bytes.  Then come bytes
478           for a bitmap saying which chars are in.  Bits in each byte
479           are ordered low-bit-first.  A character is in the set if its
480           bit is 1.  A character too large to have a bit in the map is
481           automatically not in the set.  */
482        /* ifdef MBS_SUPPORT, following element is length of character
483	   classes, length of collating symbols, length of equivalence
484	   classes, length of character ranges, and length of characters.
485	   Next, character class element, collating symbols elements,
486	   equivalence class elements, range elements, and character
487	   elements follow.
488	   See regex_compile function.  */
489  charset,
490
491        /* Same parameters as charset, but match any character that is
492           not one of those specified.  */
493  charset_not,
494
495        /* Start remembering the text that is matched, for storing in a
496           register.  Followed by one byte with the register number, in
497           the range 0 to one less than the pattern buffer's re_nsub
498           field.  Then followed by one byte with the number of groups
499           inner to this one.  (This last has to be part of the
500           start_memory only because we need it in the on_failure_jump
501           of re_match_2.)  */
502  start_memory,
503
504        /* Stop remembering the text that is matched and store it in a
505           memory register.  Followed by one byte with the register
506           number, in the range 0 to one less than `re_nsub' in the
507           pattern buffer, and one byte with the number of inner groups,
508           just like `start_memory'.  (We need the number of inner
509           groups here because we don't have any easy way of finding the
510           corresponding start_memory when we're at a stop_memory.)  */
511  stop_memory,
512
513        /* Match a duplicate of something remembered. Followed by one
514           byte containing the register number.  */
515  duplicate,
516
517        /* Fail unless at beginning of line.  */
518  begline,
519
520        /* Fail unless at end of line.  */
521  endline,
522
523        /* Succeeds if at beginning of buffer (if emacs) or at beginning
524           of string to be matched (if not).  */
525  begbuf,
526
527        /* Analogously, for end of buffer/string.  */
528  endbuf,
529
530        /* Followed by two byte relative address to which to jump.  */
531  jump,
532
533	/* Same as jump, but marks the end of an alternative.  */
534  jump_past_alt,
535
536        /* Followed by two-byte relative address of place to resume at
537           in case of failure.  */
538        /* ifdef MBS_SUPPORT, the size of address is 1.  */
539  on_failure_jump,
540
541        /* Like on_failure_jump, but pushes a placeholder instead of the
542           current string position when executed.  */
543  on_failure_keep_string_jump,
544
545        /* Throw away latest failure point and then jump to following
546           two-byte relative address.  */
547        /* ifdef MBS_SUPPORT, the size of address is 1.  */
548  pop_failure_jump,
549
550        /* Change to pop_failure_jump if know won't have to backtrack to
551           match; otherwise change to jump.  This is used to jump
552           back to the beginning of a repeat.  If what follows this jump
553           clearly won't match what the repeat does, such that we can be
554           sure that there is no use backtracking out of repetitions
555           already matched, then we change it to a pop_failure_jump.
556           Followed by two-byte address.  */
557        /* ifdef MBS_SUPPORT, the size of address is 1.  */
558  maybe_pop_jump,
559
560        /* Jump to following two-byte address, and push a dummy failure
561           point. This failure point will be thrown away if an attempt
562           is made to use it for a failure.  A `+' construct makes this
563           before the first repeat.  Also used as an intermediary kind
564           of jump when compiling an alternative.  */
565        /* ifdef MBS_SUPPORT, the size of address is 1.  */
566  dummy_failure_jump,
567
568	/* Push a dummy failure point and continue.  Used at the end of
569	   alternatives.  */
570  push_dummy_failure,
571
572        /* Followed by two-byte relative address and two-byte number n.
573           After matching N times, jump to the address upon failure.  */
574        /* ifdef MBS_SUPPORT, the size of address is 1.  */
575  succeed_n,
576
577        /* Followed by two-byte relative address, and two-byte number n.
578           Jump to the address N times, then fail.  */
579        /* ifdef MBS_SUPPORT, the size of address is 1.  */
580  jump_n,
581
582        /* Set the following two-byte relative address to the
583           subsequent two-byte number.  The address *includes* the two
584           bytes of number.  */
585        /* ifdef MBS_SUPPORT, the size of address is 1.  */
586  set_number_at,
587
588  wordchar,	/* Matches any word-constituent character.  */
589  notwordchar,	/* Matches any char that is not a word-constituent.  */
590
591  wordbeg,	/* Succeeds if at word beginning.  */
592  wordend,	/* Succeeds if at word end.  */
593
594  wordbound,	/* Succeeds if at a word boundary.  */
595  notwordbound	/* Succeeds if not at a word boundary.  */
596
597# ifdef emacs
598  ,before_dot,	/* Succeeds if before point.  */
599  at_dot,	/* Succeeds if at point.  */
600  after_dot,	/* Succeeds if after point.  */
601
602	/* Matches any character whose syntax is specified.  Followed by
603           a byte which contains a syntax code, e.g., Sword.  */
604  syntaxspec,
605
606	/* Matches any character whose syntax is not that specified.  */
607  notsyntaxspec
608# endif /* emacs */
609} re_opcode_t;
610#endif /* not INSIDE_RECURSION */
611
612
613#ifdef BYTE
614# define CHAR_T char
615# define UCHAR_T unsigned char
616# define COMPILED_BUFFER_VAR bufp->buffer
617# define OFFSET_ADDRESS_SIZE 2
618# define PREFIX(name) byte_##name
619# define ARG_PREFIX(name) name
620# define PUT_CHAR(c) putchar (c)
621#else
622# ifdef WCHAR
623#  define CHAR_T wchar_t
624#  define UCHAR_T wchar_t
625#  define COMPILED_BUFFER_VAR wc_buffer
626#  define OFFSET_ADDRESS_SIZE 1 /* the size which STORE_NUMBER macro use */
627#  define CHAR_CLASS_SIZE ((__alignof__(wctype_t)+sizeof(wctype_t))/sizeof(CHAR_T)+1)
628#  define PREFIX(name) wcs_##name
629#  define ARG_PREFIX(name) c##name
630/* Should we use wide stream??  */
631#  define PUT_CHAR(c) printf ("%C", c);
632#  define TRUE 1
633#  define FALSE 0
634# else
635#  ifdef MBS_SUPPORT
636#   define WCHAR
637#   define INSIDE_RECURSION
638#   include "regex.c"
639#   undef INSIDE_RECURSION
640#  endif
641#  define BYTE
642#  define INSIDE_RECURSION
643#  include "regex.c"
644#  undef INSIDE_RECURSION
645# endif
646#endif
647
648#ifdef INSIDE_RECURSION
649/* Common operations on the compiled pattern.  */
650
651/* Store NUMBER in two contiguous bytes starting at DESTINATION.  */
652/* ifdef MBS_SUPPORT, we store NUMBER in 1 element.  */
653
654# ifdef WCHAR
655#  define STORE_NUMBER(destination, number)				\
656  do {									\
657    *(destination) = (UCHAR_T)(number);				\
658  } while (0)
659# else /* BYTE */
660#  define STORE_NUMBER(destination, number)				\
661  do {									\
662    (destination)[0] = (number) & 0377;					\
663    (destination)[1] = (number) >> 8;					\
664  } while (0)
665# endif /* WCHAR */
666
667/* Same as STORE_NUMBER, except increment DESTINATION to
668   the byte after where the number is stored.  Therefore, DESTINATION
669   must be an lvalue.  */
670/* ifdef MBS_SUPPORT, we store NUMBER in 1 element.  */
671
672# define STORE_NUMBER_AND_INCR(destination, number)			\
673  do {									\
674    STORE_NUMBER (destination, number);					\
675    (destination) += OFFSET_ADDRESS_SIZE;				\
676  } while (0)
677
678/* Put into DESTINATION a number stored in two contiguous bytes starting
679   at SOURCE.  */
680/* ifdef MBS_SUPPORT, we store NUMBER in 1 element.  */
681
682# ifdef WCHAR
683#  define EXTRACT_NUMBER(destination, source)				\
684  do {									\
685    (destination) = *(source);						\
686  } while (0)
687# else /* BYTE */
688#  define EXTRACT_NUMBER(destination, source)				\
689  do {									\
690    (destination) = *(source) & 0377;					\
691    (destination) += ((unsigned) SIGN_EXTEND_CHAR (*((source) + 1))) << 8; \
692  } while (0)
693# endif
694
695# ifdef DEBUG
696static void PREFIX(extract_number) (int *dest, UCHAR_T *source);
697static void
698PREFIX(extract_number) (int *dest, UCHAR_T *source)
699{
700#  ifdef WCHAR
701  *dest = *source;
702#  else /* BYTE */
703  int temp = SIGN_EXTEND_CHAR (*(source + 1));
704  *dest = *source & 0377;
705  *dest += temp << 8;
706#  endif
707}
708
709#  ifndef EXTRACT_MACROS /* To debug the macros.  */
710#   undef EXTRACT_NUMBER
711#   define EXTRACT_NUMBER(dest, src) PREFIX(extract_number) (&dest, src)
712#  endif /* not EXTRACT_MACROS */
713
714# endif /* DEBUG */
715
716/* Same as EXTRACT_NUMBER, except increment SOURCE to after the number.
717   SOURCE must be an lvalue.  */
718
719# define EXTRACT_NUMBER_AND_INCR(destination, source)			\
720  do {									\
721    EXTRACT_NUMBER (destination, source);				\
722    (source) += OFFSET_ADDRESS_SIZE; 					\
723  } while (0)
724
725# ifdef DEBUG
726static void PREFIX(extract_number_and_incr) (int *destination,
727                                             UCHAR_T **source);
728static void
729PREFIX(extract_number_and_incr) (int *destination, UCHAR_T **source)
730{
731  PREFIX(extract_number) (destination, *source);
732  *source += OFFSET_ADDRESS_SIZE;
733}
734
735#  ifndef EXTRACT_MACROS
736#   undef EXTRACT_NUMBER_AND_INCR
737#   define EXTRACT_NUMBER_AND_INCR(dest, src) \
738  PREFIX(extract_number_and_incr) (&dest, &src)
739#  endif /* not EXTRACT_MACROS */
740
741# endif /* DEBUG */
742
743
744
745/* If DEBUG is defined, Regex prints many voluminous messages about what
746   it is doing (if the variable `debug' is nonzero).  If linked with the
747   main program in `iregex.c', you can enter patterns and strings
748   interactively.  And if linked with the main program in `main.c' and
749   the other test files, you can run the already-written tests.  */
750
751# ifdef DEBUG
752
753#  ifndef DEFINED_ONCE
754
755/* We use standard I/O for debugging.  */
756#   include <stdio.h>
757
758/* It is useful to test things that ``must'' be true when debugging.  */
759#   include <assert.h>
760
761static int debug;
762
763#   define DEBUG_STATEMENT(e) e
764#   define DEBUG_PRINT1(x) if (debug) printf (x)
765#   define DEBUG_PRINT2(x1, x2) if (debug) printf (x1, x2)
766#   define DEBUG_PRINT3(x1, x2, x3) if (debug) printf (x1, x2, x3)
767#   define DEBUG_PRINT4(x1, x2, x3, x4) if (debug) printf (x1, x2, x3, x4)
768#  endif /* not DEFINED_ONCE */
769
770#  define DEBUG_PRINT_COMPILED_PATTERN(p, s, e) 			\
771  if (debug) PREFIX(print_partial_compiled_pattern) (s, e)
772#  define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)		\
773  if (debug) PREFIX(print_double_string) (w, s1, sz1, s2, sz2)
774
775
776/* Print the fastmap in human-readable form.  */
777
778#  ifndef DEFINED_ONCE
779void
780print_fastmap (char *fastmap)
781{
782  unsigned was_a_range = 0;
783  unsigned i = 0;
784
785  while (i < (1 << BYTEWIDTH))
786    {
787      if (fastmap[i++])
788	{
789	  was_a_range = 0;
790          putchar (i - 1);
791          while (i < (1 << BYTEWIDTH)  &&  fastmap[i])
792            {
793              was_a_range = 1;
794              i++;
795            }
796	  if (was_a_range)
797            {
798              printf ("-");
799              putchar (i - 1);
800            }
801        }
802    }
803  putchar ('\n');
804}
805#  endif /* not DEFINED_ONCE */
806
807
808/* Print a compiled pattern string in human-readable form, starting at
809   the START pointer into it and ending just before the pointer END.  */
810
811void
812PREFIX(print_partial_compiled_pattern) (UCHAR_T *start, UCHAR_T *end)
813{
814  int mcnt, mcnt2;
815  UCHAR_T *p1;
816  UCHAR_T *p = start;
817  UCHAR_T *pend = end;
818
819  if (start == NULL)
820    {
821      printf ("(null)\n");
822      return;
823    }
824
825  /* Loop over pattern commands.  */
826  while (p < pend)
827    {
828#  ifdef _LIBC
829      printf ("%td:\t", p - start);
830#  else
831      printf ("%ld:\t", (long int) (p - start));
832#  endif
833
834      switch ((re_opcode_t) *p++)
835	{
836        case no_op:
837          printf ("/no_op");
838          break;
839
840	case exactn:
841	  mcnt = *p++;
842          printf ("/exactn/%d", mcnt);
843          do
844	    {
845              putchar ('/');
846	      PUT_CHAR (*p++);
847            }
848          while (--mcnt);
849          break;
850
851#  ifdef MBS_SUPPORT
852	case exactn_bin:
853	  mcnt = *p++;
854	  printf ("/exactn_bin/%d", mcnt);
855          do
856	    {
857	      printf("/%lx", (long int) *p++);
858            }
859          while (--mcnt);
860          break;
861#  endif /* MBS_SUPPORT */
862
863	case start_memory:
864          mcnt = *p++;
865          printf ("/start_memory/%d/%ld", mcnt, (long int) *p++);
866          break;
867
868	case stop_memory:
869          mcnt = *p++;
870	  printf ("/stop_memory/%d/%ld", mcnt, (long int) *p++);
871          break;
872
873	case duplicate:
874	  printf ("/duplicate/%ld", (long int) *p++);
875	  break;
876
877	case anychar:
878	  printf ("/anychar");
879	  break;
880
881	case charset:
882        case charset_not:
883          {
884#  ifdef WCHAR
885	    int i, length;
886	    wchar_t *workp = p;
887	    printf ("/charset [%s",
888	            (re_opcode_t) *(workp - 1) == charset_not ? "^" : "");
889	    p += 5;
890	    length = *workp++; /* the length of char_classes */
891	    for (i=0 ; i<length ; i++)
892	      printf("[:%lx:]", (long int) *p++);
893	    length = *workp++; /* the length of collating_symbol */
894	    for (i=0 ; i<length ;)
895	      {
896		printf("[.");
897		while(*p != 0)
898		  PUT_CHAR((i++,*p++));
899		i++,p++;
900		printf(".]");
901	      }
902	    length = *workp++; /* the length of equivalence_class */
903	    for (i=0 ; i<length ;)
904	      {
905		printf("[=");
906		while(*p != 0)
907		  PUT_CHAR((i++,*p++));
908		i++,p++;
909		printf("=]");
910	      }
911	    length = *workp++; /* the length of char_range */
912	    for (i=0 ; i<length ; i++)
913	      {
914		wchar_t range_start = *p++;
915		wchar_t range_end = *p++;
916		printf("%C-%C", range_start, range_end);
917	      }
918	    length = *workp++; /* the length of char */
919	    for (i=0 ; i<length ; i++)
920	      printf("%C", *p++);
921	    putchar (']');
922#  else
923            register int c, last = -100;
924	    register int in_range = 0;
925
926	    printf ("/charset [%s",
927	            (re_opcode_t) *(p - 1) == charset_not ? "^" : "");
928
929            assert (p + *p < pend);
930
931            for (c = 0; c < 256; c++)
932	      if (c / 8 < *p
933		  && (p[1 + (c/8)] & (1 << (c % 8))))
934		{
935		  /* Are we starting a range?  */
936		  if (last + 1 == c && ! in_range)
937		    {
938		      putchar ('-');
939		      in_range = 1;
940		    }
941		  /* Have we broken a range?  */
942		  else if (last + 1 != c && in_range)
943              {
944		      putchar (last);
945		      in_range = 0;
946		    }
947
948		  if (! in_range)
949		    putchar (c);
950
951		  last = c;
952              }
953
954	    if (in_range)
955	      putchar (last);
956
957	    putchar (']');
958
959	    p += 1 + *p;
960#  endif /* WCHAR */
961	  }
962	  break;
963
964	case begline:
965	  printf ("/begline");
966          break;
967
968	case endline:
969          printf ("/endline");
970          break;
971
972	case on_failure_jump:
973          PREFIX(extract_number_and_incr) (&mcnt, &p);
974#  ifdef _LIBC
975  	  printf ("/on_failure_jump to %td", p + mcnt - start);
976#  else
977  	  printf ("/on_failure_jump to %ld", (long int) (p + mcnt - start));
978#  endif
979          break;
980
981	case on_failure_keep_string_jump:
982          PREFIX(extract_number_and_incr) (&mcnt, &p);
983#  ifdef _LIBC
984  	  printf ("/on_failure_keep_string_jump to %td", p + mcnt - start);
985#  else
986  	  printf ("/on_failure_keep_string_jump to %ld",
987		  (long int) (p + mcnt - start));
988#  endif
989          break;
990
991	case dummy_failure_jump:
992          PREFIX(extract_number_and_incr) (&mcnt, &p);
993#  ifdef _LIBC
994  	  printf ("/dummy_failure_jump to %td", p + mcnt - start);
995#  else
996  	  printf ("/dummy_failure_jump to %ld", (long int) (p + mcnt - start));
997#  endif
998          break;
999
1000	case push_dummy_failure:
1001          printf ("/push_dummy_failure");
1002          break;
1003
1004        case maybe_pop_jump:
1005          PREFIX(extract_number_and_incr) (&mcnt, &p);
1006#  ifdef _LIBC
1007  	  printf ("/maybe_pop_jump to %td", p + mcnt - start);
1008#  else
1009  	  printf ("/maybe_pop_jump to %ld", (long int) (p + mcnt - start));
1010#  endif
1011	  break;
1012
1013        case pop_failure_jump:
1014	  PREFIX(extract_number_and_incr) (&mcnt, &p);
1015#  ifdef _LIBC
1016  	  printf ("/pop_failure_jump to %td", p + mcnt - start);
1017#  else
1018  	  printf ("/pop_failure_jump to %ld", (long int) (p + mcnt - start));
1019#  endif
1020	  break;
1021
1022        case jump_past_alt:
1023	  PREFIX(extract_number_and_incr) (&mcnt, &p);
1024#  ifdef _LIBC
1025  	  printf ("/jump_past_alt to %td", p + mcnt - start);
1026#  else
1027  	  printf ("/jump_past_alt to %ld", (long int) (p + mcnt - start));
1028#  endif
1029	  break;
1030
1031        case jump:
1032	  PREFIX(extract_number_and_incr) (&mcnt, &p);
1033#  ifdef _LIBC
1034  	  printf ("/jump to %td", p + mcnt - start);
1035#  else
1036  	  printf ("/jump to %ld", (long int) (p + mcnt - start));
1037#  endif
1038	  break;
1039
1040        case succeed_n:
1041          PREFIX(extract_number_and_incr) (&mcnt, &p);
1042	  p1 = p + mcnt;
1043          PREFIX(extract_number_and_incr) (&mcnt2, &p);
1044#  ifdef _LIBC
1045	  printf ("/succeed_n to %td, %d times", p1 - start, mcnt2);
1046#  else
1047	  printf ("/succeed_n to %ld, %d times",
1048		  (long int) (p1 - start), mcnt2);
1049#  endif
1050          break;
1051
1052        case jump_n:
1053          PREFIX(extract_number_and_incr) (&mcnt, &p);
1054	  p1 = p + mcnt;
1055          PREFIX(extract_number_and_incr) (&mcnt2, &p);
1056	  printf ("/jump_n to %d, %d times", p1 - start, mcnt2);
1057          break;
1058
1059        case set_number_at:
1060          PREFIX(extract_number_and_incr) (&mcnt, &p);
1061	  p1 = p + mcnt;
1062          PREFIX(extract_number_and_incr) (&mcnt2, &p);
1063#  ifdef _LIBC
1064	  printf ("/set_number_at location %td to %d", p1 - start, mcnt2);
1065#  else
1066	  printf ("/set_number_at location %ld to %d",
1067		  (long int) (p1 - start), mcnt2);
1068#  endif
1069          break;
1070
1071        case wordbound:
1072	  printf ("/wordbound");
1073	  break;
1074
1075	case notwordbound:
1076	  printf ("/notwordbound");
1077          break;
1078
1079	case wordbeg:
1080	  printf ("/wordbeg");
1081	  break;
1082
1083	case wordend:
1084	  printf ("/wordend");
1085	  break;
1086
1087#  ifdef emacs
1088	case before_dot:
1089	  printf ("/before_dot");
1090          break;
1091
1092	case at_dot:
1093	  printf ("/at_dot");
1094          break;
1095
1096	case after_dot:
1097	  printf ("/after_dot");
1098          break;
1099
1100	case syntaxspec:
1101          printf ("/syntaxspec");
1102	  mcnt = *p++;
1103	  printf ("/%d", mcnt);
1104          break;
1105
1106	case notsyntaxspec:
1107          printf ("/notsyntaxspec");
1108	  mcnt = *p++;
1109	  printf ("/%d", mcnt);
1110	  break;
1111#  endif /* emacs */
1112
1113	case wordchar:
1114	  printf ("/wordchar");
1115          break;
1116
1117	case notwordchar:
1118	  printf ("/notwordchar");
1119          break;
1120
1121	case begbuf:
1122	  printf ("/begbuf");
1123          break;
1124
1125	case endbuf:
1126	  printf ("/endbuf");
1127          break;
1128
1129        default:
1130          printf ("?%ld", (long int) *(p-1));
1131	}
1132
1133      putchar ('\n');
1134    }
1135
1136#  ifdef _LIBC
1137  printf ("%td:\tend of pattern.\n", p - start);
1138#  else
1139  printf ("%ld:\tend of pattern.\n", (long int) (p - start));
1140#  endif
1141}
1142
1143
1144void
1145PREFIX(print_compiled_pattern) (struct re_pattern_buffer *bufp)
1146{
1147  UCHAR_T *buffer = (UCHAR_T*) bufp->buffer;
1148
1149  PREFIX(print_partial_compiled_pattern) (buffer, buffer
1150				  + bufp->used / sizeof(UCHAR_T));
1151  printf ("%ld bytes used/%ld bytes allocated.\n",
1152	  bufp->used, bufp->allocated);
1153
1154  if (bufp->fastmap_accurate && bufp->fastmap)
1155    {
1156      printf ("fastmap: ");
1157      print_fastmap (bufp->fastmap);
1158    }
1159
1160#  ifdef _LIBC
1161  printf ("re_nsub: %Zd\t", bufp->re_nsub);
1162#  else
1163  printf ("re_nsub: %ld\t", (long int) bufp->re_nsub);
1164#  endif
1165  printf ("regs_alloc: %d\t", bufp->regs_allocated);
1166  printf ("can_be_null: %d\t", bufp->can_be_null);
1167  printf ("newline_anchor: %d\n", bufp->newline_anchor);
1168  printf ("no_sub: %d\t", bufp->no_sub);
1169  printf ("not_bol: %d\t", bufp->not_bol);
1170  printf ("not_eol: %d\t", bufp->not_eol);
1171  printf ("syntax: %lx\n", bufp->syntax);
1172  /* Perhaps we should print the translate table?  */
1173}
1174
1175
1176void
1177PREFIX(print_double_string) (const CHAR_T *where, const CHAR_T *string1,
1178                             int size1, const CHAR_T *string2, int size2)
1179{
1180  int this_char;
1181
1182  if (where == NULL)
1183    printf ("(null)");
1184  else
1185    {
1186      int cnt;
1187
1188      if (FIRST_STRING_P (where))
1189        {
1190          for (this_char = where - string1; this_char < size1; this_char++)
1191	    PUT_CHAR (string1[this_char]);
1192
1193          where = string2;
1194        }
1195
1196      cnt = 0;
1197      for (this_char = where - string2; this_char < size2; this_char++)
1198	{
1199	  PUT_CHAR (string2[this_char]);
1200	  if (++cnt > 100)
1201	    {
1202	      fputs ("...", stdout);
1203	      break;
1204	    }
1205	}
1206    }
1207}
1208
1209#  ifndef DEFINED_ONCE
1210void
1211printchar (int c)
1212{
1213  putc (c, stderr);
1214}
1215#  endif
1216
1217# else /* not DEBUG */
1218
1219#  ifndef DEFINED_ONCE
1220#   undef assert
1221#   define assert(e)
1222
1223#   define DEBUG_STATEMENT(e)
1224#   define DEBUG_PRINT1(x)
1225#   define DEBUG_PRINT2(x1, x2)
1226#   define DEBUG_PRINT3(x1, x2, x3)
1227#   define DEBUG_PRINT4(x1, x2, x3, x4)
1228#  endif /* not DEFINED_ONCE */
1229#  define DEBUG_PRINT_COMPILED_PATTERN(p, s, e)
1230#  define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)
1231
1232# endif /* not DEBUG */
1233
1234
1235
1236# ifdef WCHAR
1237/* This  convert a multibyte string to a wide character string.
1238   And write their correspondances to offset_buffer(see below)
1239   and write whether each wchar_t is binary data to is_binary.
1240   This assume invalid multibyte sequences as binary data.
1241   We assume offset_buffer and is_binary is already allocated
1242   enough space.  */
1243
1244static size_t convert_mbs_to_wcs (CHAR_T *dest, const unsigned char* src,
1245				  size_t len, int *offset_buffer,
1246				  char *is_binary);
1247static size_t
1248convert_mbs_to_wcs (CHAR_T *dest, const unsigned char*src, size_t len,
1249                    int *offset_buffer, char *is_binary)
1250     /* It hold correspondances between src(char string) and
1251	dest(wchar_t string) for optimization.
1252	e.g. src  = "xxxyzz"
1253             dest = {'X', 'Y', 'Z'}
1254	      (each "xxx", "y" and "zz" represent one multibyte character
1255	       corresponding to 'X', 'Y' and 'Z'.)
1256	  offset_buffer = {0, 0+3("xxx"), 0+3+1("y"), 0+3+1+2("zz")}
1257	  	        = {0, 3, 4, 6}
1258     */
1259{
1260  wchar_t *pdest = dest;
1261  const unsigned char *psrc = src;
1262  size_t wc_count = 0;
1263
1264  mbstate_t mbs;
1265  int i, consumed;
1266  size_t mb_remain = len;
1267  size_t mb_count = 0;
1268
1269  /* Initialize the conversion state.  */
1270  memset (&mbs, 0, sizeof (mbstate_t));
1271
1272  offset_buffer[0] = 0;
1273  for( ; mb_remain > 0 ; ++wc_count, ++pdest, mb_remain -= consumed,
1274	 psrc += consumed)
1275    {
1276#ifdef _LIBC
1277      consumed = __mbrtowc (pdest, psrc, mb_remain, &mbs);
1278#else
1279      consumed = mbrtowc (pdest, psrc, mb_remain, &mbs);
1280#endif
1281
1282      if (consumed <= 0)
1283	/* failed to convert. maybe src contains binary data.
1284	   So we consume 1 byte manualy.  */
1285	{
1286	  *pdest = *psrc;
1287	  consumed = 1;
1288	  is_binary[wc_count] = TRUE;
1289	}
1290      else
1291	is_binary[wc_count] = FALSE;
1292      /* In sjis encoding, we use yen sign as escape character in
1293	 place of reverse solidus. So we convert 0x5c(yen sign in
1294	 sjis) to not 0xa5(yen sign in UCS2) but 0x5c(reverse
1295	 solidus in UCS2).  */
1296      if (consumed == 1 && (int) *psrc == 0x5c && (int) *pdest == 0xa5)
1297	*pdest = (wchar_t) *psrc;
1298
1299      offset_buffer[wc_count + 1] = mb_count += consumed;
1300    }
1301
1302  /* Fill remain of the buffer with sentinel.  */
1303  for (i = wc_count + 1 ; i <= len ; i++)
1304    offset_buffer[i] = mb_count + 1;
1305
1306  return wc_count;
1307}
1308
1309# endif /* WCHAR */
1310
1311#else /* not INSIDE_RECURSION */
1312
1313/* Set by `re_set_syntax' to the current regexp syntax to recognize.  Can
1314   also be assigned to arbitrarily: each pattern buffer stores its own
1315   syntax, so it can be changed between regex compilations.  */
1316/* This has no initializer because initialized variables in Emacs
1317   become read-only after dumping.  */
1318reg_syntax_t re_syntax_options;
1319
1320
1321/* Specify the precise syntax of regexps for compilation.  This provides
1322   for compatibility for various utilities which historically have
1323   different, incompatible syntaxes.
1324
1325   The argument SYNTAX is a bit mask comprised of the various bits
1326   defined in regex.h.  We return the old syntax.  */
1327
1328reg_syntax_t
1329re_set_syntax (reg_syntax_t syntax)
1330{
1331  reg_syntax_t ret = re_syntax_options;
1332
1333  re_syntax_options = syntax;
1334# ifdef DEBUG
1335  if (syntax & RE_DEBUG)
1336    debug = 1;
1337  else if (debug) /* was on but now is not */
1338    debug = 0;
1339# endif /* DEBUG */
1340  return ret;
1341}
1342# ifdef _LIBC
1343weak_alias (__re_set_syntax, re_set_syntax)
1344# endif
1345
1346/* This table gives an error message for each of the error codes listed
1347   in regex.h.  Obviously the order here has to be same as there.
1348   POSIX doesn't require that we do anything for REG_NOERROR,
1349   but why not be nice?  */
1350
1351static const char *re_error_msgid[] =
1352  {
1353    gettext_noop ("Success"),	/* REG_NOERROR */
1354    gettext_noop ("No match"),	/* REG_NOMATCH */
1355    gettext_noop ("Invalid regular expression"), /* REG_BADPAT */
1356    gettext_noop ("Invalid collation character"), /* REG_ECOLLATE */
1357    gettext_noop ("Invalid character class name"), /* REG_ECTYPE */
1358    gettext_noop ("Trailing backslash"), /* REG_EESCAPE */
1359    gettext_noop ("Invalid back reference"), /* REG_ESUBREG */
1360    gettext_noop ("Unmatched [ or [^"),	/* REG_EBRACK */
1361    gettext_noop ("Unmatched ( or \\("), /* REG_EPAREN */
1362    gettext_noop ("Unmatched \\{"), /* REG_EBRACE */
1363    gettext_noop ("Invalid content of \\{\\}"), /* REG_BADBR */
1364    gettext_noop ("Invalid range end"),	/* REG_ERANGE */
1365    gettext_noop ("Memory exhausted"), /* REG_ESPACE */
1366    gettext_noop ("Invalid preceding regular expression"), /* REG_BADRPT */
1367    gettext_noop ("Premature end of regular expression"), /* REG_EEND */
1368    gettext_noop ("Regular expression too big"), /* REG_ESIZE */
1369    gettext_noop ("Unmatched ) or \\)") /* REG_ERPAREN */
1370  };
1371
1372#endif /* INSIDE_RECURSION */
1373
1374#ifndef DEFINED_ONCE
1375/* Avoiding alloca during matching, to placate r_alloc.  */
1376
1377/* Define MATCH_MAY_ALLOCATE unless we need to make sure that the
1378   searching and matching functions should not call alloca.  On some
1379   systems, alloca is implemented in terms of malloc, and if we're
1380   using the relocating allocator routines, then malloc could cause a
1381   relocation, which might (if the strings being searched are in the
1382   ralloc heap) shift the data out from underneath the regexp
1383   routines.
1384
1385   Here's another reason to avoid allocation: Emacs
1386   processes input from X in a signal handler; processing X input may
1387   call malloc; if input arrives while a matching routine is calling
1388   malloc, then we're scrod.  But Emacs can't just block input while
1389   calling matching routines; then we don't notice interrupts when
1390   they come in.  So, Emacs blocks input around all regexp calls
1391   except the matching calls, which it leaves unprotected, in the
1392   faith that they will not malloc.  */
1393
1394/* Normally, this is fine.  */
1395# define MATCH_MAY_ALLOCATE
1396
1397/* When using GNU C, we are not REALLY using the C alloca, no matter
1398   what config.h may say.  So don't take precautions for it.  */
1399# ifdef __GNUC__
1400#  undef C_ALLOCA
1401# endif
1402
1403/* The match routines may not allocate if (1) they would do it with malloc
1404   and (2) it's not safe for them to use malloc.
1405   Note that if REL_ALLOC is defined, matching would not use malloc for the
1406   failure stack, but we would still use it for the register vectors;
1407   so REL_ALLOC should not affect this.  */
1408# if (defined C_ALLOCA || defined REGEX_MALLOC) && defined emacs
1409#  undef MATCH_MAY_ALLOCATE
1410# endif
1411#endif /* not DEFINED_ONCE */
1412
1413#ifdef INSIDE_RECURSION
1414/* Failure stack declarations and macros; both re_compile_fastmap and
1415   re_match_2 use a failure stack.  These have to be macros because of
1416   REGEX_ALLOCATE_STACK.  */
1417
1418
1419/* Number of failure points for which to initially allocate space
1420   when matching.  If this number is exceeded, we allocate more
1421   space, so it is not a hard limit.  */
1422# ifndef INIT_FAILURE_ALLOC
1423#  define INIT_FAILURE_ALLOC 5
1424# endif
1425
1426/* Roughly the maximum number of failure points on the stack.  Would be
1427   exactly that if always used MAX_FAILURE_ITEMS items each time we failed.
1428   This is a variable only so users of regex can assign to it; we never
1429   change it ourselves.  */
1430
1431# ifdef INT_IS_16BIT
1432
1433#  ifndef DEFINED_ONCE
1434#   if defined MATCH_MAY_ALLOCATE
1435/* 4400 was enough to cause a crash on Alpha OSF/1,
1436   whose default stack limit is 2mb.  */
1437long int re_max_failures = 4000;
1438#   else
1439long int re_max_failures = 2000;
1440#   endif
1441#  endif
1442
1443union PREFIX(fail_stack_elt)
1444{
1445  UCHAR_T *pointer;
1446  long int integer;
1447};
1448
1449typedef union PREFIX(fail_stack_elt) PREFIX(fail_stack_elt_t);
1450
1451typedef struct
1452{
1453  PREFIX(fail_stack_elt_t) *stack;
1454  unsigned long int size;
1455  unsigned long int avail;		/* Offset of next open position.  */
1456} PREFIX(fail_stack_type);
1457
1458# else /* not INT_IS_16BIT */
1459
1460#  ifndef DEFINED_ONCE
1461#   if defined MATCH_MAY_ALLOCATE
1462/* 4400 was enough to cause a crash on Alpha OSF/1,
1463   whose default stack limit is 2mb.  */
1464int re_max_failures = 4000;
1465#   else
1466int re_max_failures = 2000;
1467#   endif
1468#  endif
1469
1470union PREFIX(fail_stack_elt)
1471{
1472  UCHAR_T *pointer;
1473  int integer;
1474};
1475
1476typedef union PREFIX(fail_stack_elt) PREFIX(fail_stack_elt_t);
1477
1478typedef struct
1479{
1480  PREFIX(fail_stack_elt_t) *stack;
1481  unsigned size;
1482  unsigned avail;			/* Offset of next open position.  */
1483} PREFIX(fail_stack_type);
1484
1485# endif /* INT_IS_16BIT */
1486
1487# ifndef DEFINED_ONCE
1488#  define FAIL_STACK_EMPTY()     (fail_stack.avail == 0)
1489#  define FAIL_STACK_PTR_EMPTY() (fail_stack_ptr->avail == 0)
1490#  define FAIL_STACK_FULL()      (fail_stack.avail == fail_stack.size)
1491# endif
1492
1493
1494/* Define macros to initialize and free the failure stack.
1495   Do `return -2' if the alloc fails.  */
1496
1497# ifdef MATCH_MAY_ALLOCATE
1498#  define INIT_FAIL_STACK()						\
1499  do {									\
1500    fail_stack.stack = (PREFIX(fail_stack_elt_t) *)		\
1501      REGEX_ALLOCATE_STACK (INIT_FAILURE_ALLOC * sizeof (PREFIX(fail_stack_elt_t))); \
1502									\
1503    if (fail_stack.stack == NULL)				\
1504      return -2;							\
1505									\
1506    fail_stack.size = INIT_FAILURE_ALLOC;			\
1507    fail_stack.avail = 0;					\
1508  } while (0)
1509
1510#  define RESET_FAIL_STACK()  REGEX_FREE_STACK (fail_stack.stack)
1511# else
1512#  define INIT_FAIL_STACK()						\
1513  do {									\
1514    fail_stack.avail = 0;					\
1515  } while (0)
1516
1517#  define RESET_FAIL_STACK()
1518# endif
1519
1520
1521/* Double the size of FAIL_STACK, up to approximately `re_max_failures' items.
1522
1523   Return 1 if succeeds, and 0 if either ran out of memory
1524   allocating space for it or it was already too large.
1525
1526   REGEX_REALLOCATE_STACK requires `destination' be declared.   */
1527
1528# define DOUBLE_FAIL_STACK(fail_stack)					\
1529  ((fail_stack).size > (unsigned) (re_max_failures * MAX_FAILURE_ITEMS)	\
1530   ? 0									\
1531   : ((fail_stack).stack = (PREFIX(fail_stack_elt_t) *)			\
1532        REGEX_REALLOCATE_STACK ((fail_stack).stack, 			\
1533          (fail_stack).size * sizeof (PREFIX(fail_stack_elt_t)),	\
1534          ((fail_stack).size << 1) * sizeof (PREFIX(fail_stack_elt_t))),\
1535									\
1536      (fail_stack).stack == NULL					\
1537      ? 0								\
1538      : ((fail_stack).size <<= 1, 					\
1539         1)))
1540
1541
1542/* Push pointer POINTER on FAIL_STACK.
1543   Return 1 if was able to do so and 0 if ran out of memory allocating
1544   space to do so.  */
1545# define PUSH_PATTERN_OP(POINTER, FAIL_STACK)				\
1546  ((FAIL_STACK_FULL ()							\
1547    && !DOUBLE_FAIL_STACK (FAIL_STACK))					\
1548   ? 0									\
1549   : ((FAIL_STACK).stack[(FAIL_STACK).avail++].pointer = POINTER,	\
1550      1))
1551
1552/* Push a pointer value onto the failure stack.
1553   Assumes the variable `fail_stack'.  Probably should only
1554   be called from within `PUSH_FAILURE_POINT'.  */
1555# define PUSH_FAILURE_POINTER(item)					\
1556  fail_stack.stack[fail_stack.avail++].pointer = (UCHAR_T *) (item)
1557
1558/* This pushes an integer-valued item onto the failure stack.
1559   Assumes the variable `fail_stack'.  Probably should only
1560   be called from within `PUSH_FAILURE_POINT'.  */
1561# define PUSH_FAILURE_INT(item)					\
1562  fail_stack.stack[fail_stack.avail++].integer = (item)
1563
1564/* Push a fail_stack_elt_t value onto the failure stack.
1565   Assumes the variable `fail_stack'.  Probably should only
1566   be called from within `PUSH_FAILURE_POINT'.  */
1567# define PUSH_FAILURE_ELT(item)					\
1568  fail_stack.stack[fail_stack.avail++] =  (item)
1569
1570/* These three POP... operations complement the three PUSH... operations.
1571   All assume that `fail_stack' is nonempty.  */
1572# define POP_FAILURE_POINTER() fail_stack.stack[--fail_stack.avail].pointer
1573# define POP_FAILURE_INT() fail_stack.stack[--fail_stack.avail].integer
1574# define POP_FAILURE_ELT() fail_stack.stack[--fail_stack.avail]
1575
1576/* Used to omit pushing failure point id's when we're not debugging.  */
1577# ifdef DEBUG
1578#  define DEBUG_PUSH PUSH_FAILURE_INT
1579#  define DEBUG_POP(item_addr) *(item_addr) = POP_FAILURE_INT ()
1580# else
1581#  define DEBUG_PUSH(item)
1582#  define DEBUG_POP(item_addr)
1583# endif
1584
1585
1586/* Push the information about the state we will need
1587   if we ever fail back to it.
1588
1589   Requires variables fail_stack, regstart, regend, reg_info, and
1590   num_regs_pushed be declared.  DOUBLE_FAIL_STACK requires `destination'
1591   be declared.
1592
1593   Does `return FAILURE_CODE' if runs out of memory.  */
1594
1595# define PUSH_FAILURE_POINT(pattern_place, string_place, failure_code)	\
1596  do {									\
1597    char *destination;							\
1598    /* Must be int, so when we don't save any registers, the arithmetic	\
1599       of 0 + -1 isn't done as unsigned.  */				\
1600    /* Can't be int, since there is not a shred of a guarantee that int	\
1601       is wide enough to hold a value of something to which pointer can	\
1602       be assigned */							\
1603    active_reg_t this_reg;						\
1604    									\
1605    DEBUG_STATEMENT (failure_id++);					\
1606    DEBUG_STATEMENT (nfailure_points_pushed++);				\
1607    DEBUG_PRINT2 ("\nPUSH_FAILURE_POINT #%u:\n", failure_id);		\
1608    DEBUG_PRINT2 ("  Before push, next avail: %d\n", (fail_stack).avail);\
1609    DEBUG_PRINT2 ("                     size: %d\n", (fail_stack).size);\
1610									\
1611    DEBUG_PRINT2 ("  slots needed: %ld\n", NUM_FAILURE_ITEMS);		\
1612    DEBUG_PRINT2 ("     available: %d\n", REMAINING_AVAIL_SLOTS);	\
1613									\
1614    /* Ensure we have enough space allocated for what we will push.  */	\
1615    while (REMAINING_AVAIL_SLOTS < NUM_FAILURE_ITEMS)			\
1616      {									\
1617        if (!DOUBLE_FAIL_STACK (fail_stack))				\
1618          return failure_code;						\
1619									\
1620        DEBUG_PRINT2 ("\n  Doubled stack; size now: %d\n",		\
1621		       (fail_stack).size);				\
1622        DEBUG_PRINT2 ("  slots available: %d\n", REMAINING_AVAIL_SLOTS);\
1623      }									\
1624									\
1625    /* Push the info, starting with the registers.  */			\
1626    DEBUG_PRINT1 ("\n");						\
1627									\
1628    if (1)								\
1629      for (this_reg = lowest_active_reg; this_reg <= highest_active_reg; \
1630	   this_reg++)							\
1631	{								\
1632	  DEBUG_PRINT2 ("  Pushing reg: %lu\n", this_reg);		\
1633	  DEBUG_STATEMENT (num_regs_pushed++);				\
1634									\
1635	  DEBUG_PRINT2 ("    start: %p\n", regstart[this_reg]);		\
1636	  PUSH_FAILURE_POINTER (regstart[this_reg]);			\
1637									\
1638	  DEBUG_PRINT2 ("    end: %p\n", regend[this_reg]);		\
1639	  PUSH_FAILURE_POINTER (regend[this_reg]);			\
1640									\
1641	  DEBUG_PRINT2 ("    info: %p\n      ",				\
1642			reg_info[this_reg].word.pointer);		\
1643	  DEBUG_PRINT2 (" match_null=%d",				\
1644			REG_MATCH_NULL_STRING_P (reg_info[this_reg]));	\
1645	  DEBUG_PRINT2 (" active=%d", IS_ACTIVE (reg_info[this_reg]));	\
1646	  DEBUG_PRINT2 (" matched_something=%d",			\
1647			MATCHED_SOMETHING (reg_info[this_reg]));	\
1648	  DEBUG_PRINT2 (" ever_matched=%d",				\
1649			EVER_MATCHED_SOMETHING (reg_info[this_reg]));	\
1650	  DEBUG_PRINT1 ("\n");						\
1651	  PUSH_FAILURE_ELT (reg_info[this_reg].word);			\
1652	}								\
1653									\
1654    DEBUG_PRINT2 ("  Pushing  low active reg: %ld\n", lowest_active_reg);\
1655    PUSH_FAILURE_INT (lowest_active_reg);				\
1656									\
1657    DEBUG_PRINT2 ("  Pushing high active reg: %ld\n", highest_active_reg);\
1658    PUSH_FAILURE_INT (highest_active_reg);				\
1659									\
1660    DEBUG_PRINT2 ("  Pushing pattern %p:\n", pattern_place);		\
1661    DEBUG_PRINT_COMPILED_PATTERN (bufp, pattern_place, pend);		\
1662    PUSH_FAILURE_POINTER (pattern_place);				\
1663									\
1664    DEBUG_PRINT2 ("  Pushing string %p: `", string_place);		\
1665    DEBUG_PRINT_DOUBLE_STRING (string_place, string1, size1, string2,   \
1666				 size2);				\
1667    DEBUG_PRINT1 ("'\n");						\
1668    PUSH_FAILURE_POINTER (string_place);				\
1669									\
1670    DEBUG_PRINT2 ("  Pushing failure id: %u\n", failure_id);		\
1671    DEBUG_PUSH (failure_id);						\
1672  } while (0)
1673
1674# ifndef DEFINED_ONCE
1675/* This is the number of items that are pushed and popped on the stack
1676   for each register.  */
1677#  define NUM_REG_ITEMS  3
1678
1679/* Individual items aside from the registers.  */
1680#  ifdef DEBUG
1681#   define NUM_NONREG_ITEMS 5 /* Includes failure point id.  */
1682#  else
1683#   define NUM_NONREG_ITEMS 4
1684#  endif
1685
1686/* We push at most this many items on the stack.  */
1687/* We used to use (num_regs - 1), which is the number of registers
1688   this regexp will save; but that was changed to 5
1689   to avoid stack overflow for a regexp with lots of parens.  */
1690#  define MAX_FAILURE_ITEMS (5 * NUM_REG_ITEMS + NUM_NONREG_ITEMS)
1691
1692/* We actually push this many items.  */
1693#  define NUM_FAILURE_ITEMS				\
1694  (((0							\
1695     ? 0 : highest_active_reg - lowest_active_reg + 1)	\
1696    * NUM_REG_ITEMS)					\
1697   + NUM_NONREG_ITEMS)
1698
1699/* How many items can still be added to the stack without overflowing it.  */
1700#  define REMAINING_AVAIL_SLOTS ((fail_stack).size - (fail_stack).avail)
1701# endif /* not DEFINED_ONCE */
1702
1703
1704/* Pops what PUSH_FAIL_STACK pushes.
1705
1706   We restore into the parameters, all of which should be lvalues:
1707     STR -- the saved data position.
1708     PAT -- the saved pattern position.
1709     LOW_REG, HIGH_REG -- the highest and lowest active registers.
1710     REGSTART, REGEND -- arrays of string positions.
1711     REG_INFO -- array of information about each subexpression.
1712
1713   Also assumes the variables `fail_stack' and (if debugging), `bufp',
1714   `pend', `string1', `size1', `string2', and `size2'.  */
1715# define POP_FAILURE_POINT(str, pat, low_reg, high_reg, regstart, regend, reg_info)\
1716{									\
1717  DEBUG_STATEMENT (unsigned failure_id;)				\
1718  active_reg_t this_reg;						\
1719  const UCHAR_T *string_temp;						\
1720									\
1721  assert (!FAIL_STACK_EMPTY ());					\
1722									\
1723  /* Remove failure points and point to how many regs pushed.  */	\
1724  DEBUG_PRINT1 ("POP_FAILURE_POINT:\n");				\
1725  DEBUG_PRINT2 ("  Before pop, next avail: %d\n", fail_stack.avail);	\
1726  DEBUG_PRINT2 ("                    size: %d\n", fail_stack.size);	\
1727									\
1728  assert (fail_stack.avail >= NUM_NONREG_ITEMS);			\
1729									\
1730  DEBUG_POP (&failure_id);						\
1731  DEBUG_PRINT2 ("  Popping failure id: %u\n", failure_id);		\
1732									\
1733  /* If the saved string location is NULL, it came from an		\
1734     on_failure_keep_string_jump opcode, and we want to throw away the	\
1735     saved NULL, thus retaining our current position in the string.  */	\
1736  string_temp = POP_FAILURE_POINTER ();					\
1737  if (string_temp != NULL)						\
1738    str = (const CHAR_T *) string_temp;					\
1739									\
1740  DEBUG_PRINT2 ("  Popping string %p: `", str);				\
1741  DEBUG_PRINT_DOUBLE_STRING (str, string1, size1, string2, size2);	\
1742  DEBUG_PRINT1 ("'\n");							\
1743									\
1744  pat = (UCHAR_T *) POP_FAILURE_POINTER ();				\
1745  DEBUG_PRINT2 ("  Popping pattern %p:\n", pat);			\
1746  DEBUG_PRINT_COMPILED_PATTERN (bufp, pat, pend);			\
1747									\
1748  /* Restore register info.  */						\
1749  high_reg = (active_reg_t) POP_FAILURE_INT ();				\
1750  DEBUG_PRINT2 ("  Popping high active reg: %ld\n", high_reg);		\
1751									\
1752  low_reg = (active_reg_t) POP_FAILURE_INT ();				\
1753  DEBUG_PRINT2 ("  Popping  low active reg: %ld\n", low_reg);		\
1754									\
1755  if (1)								\
1756    for (this_reg = high_reg; this_reg >= low_reg; this_reg--)		\
1757      {									\
1758	DEBUG_PRINT2 ("    Popping reg: %ld\n", this_reg);		\
1759									\
1760	reg_info[this_reg].word = POP_FAILURE_ELT ();			\
1761	DEBUG_PRINT2 ("      info: %p\n",				\
1762		      reg_info[this_reg].word.pointer);			\
1763									\
1764	regend[this_reg] = (const CHAR_T *) POP_FAILURE_POINTER ();	\
1765	DEBUG_PRINT2 ("      end: %p\n", regend[this_reg]);		\
1766									\
1767	regstart[this_reg] = (const CHAR_T *) POP_FAILURE_POINTER ();	\
1768	DEBUG_PRINT2 ("      start: %p\n", regstart[this_reg]);		\
1769      }									\
1770  else									\
1771    {									\
1772      for (this_reg = highest_active_reg; this_reg > high_reg; this_reg--) \
1773	{								\
1774	  reg_info[this_reg].word.integer = 0;				\
1775	  regend[this_reg] = 0;						\
1776	  regstart[this_reg] = 0;					\
1777	}								\
1778      highest_active_reg = high_reg;					\
1779    }									\
1780									\
1781  set_regs_matched_done = 0;						\
1782  DEBUG_STATEMENT (nfailure_points_popped++);				\
1783} /* POP_FAILURE_POINT */
1784
1785/* Structure for per-register (a.k.a. per-group) information.
1786   Other register information, such as the
1787   starting and ending positions (which are addresses), and the list of
1788   inner groups (which is a bits list) are maintained in separate
1789   variables.
1790
1791   We are making a (strictly speaking) nonportable assumption here: that
1792   the compiler will pack our bit fields into something that fits into
1793   the type of `word', i.e., is something that fits into one item on the
1794   failure stack.  */
1795
1796
1797/* Declarations and macros for re_match_2.  */
1798
1799typedef union
1800{
1801  PREFIX(fail_stack_elt_t) word;
1802  struct
1803  {
1804      /* This field is one if this group can match the empty string,
1805         zero if not.  If not yet determined,  `MATCH_NULL_UNSET_VALUE'.  */
1806# define MATCH_NULL_UNSET_VALUE 3
1807    unsigned match_null_string_p : 2;
1808    unsigned is_active : 1;
1809    unsigned matched_something : 1;
1810    unsigned ever_matched_something : 1;
1811  } bits;
1812} PREFIX(register_info_type);
1813
1814# ifndef DEFINED_ONCE
1815#  define REG_MATCH_NULL_STRING_P(R)  ((R).bits.match_null_string_p)
1816#  define IS_ACTIVE(R)  ((R).bits.is_active)
1817#  define MATCHED_SOMETHING(R)  ((R).bits.matched_something)
1818#  define EVER_MATCHED_SOMETHING(R)  ((R).bits.ever_matched_something)
1819
1820
1821/* Call this when have matched a real character; it sets `matched' flags
1822   for the subexpressions which we are currently inside.  Also records
1823   that those subexprs have matched.  */
1824#  define SET_REGS_MATCHED()						\
1825  do									\
1826    {									\
1827      if (!set_regs_matched_done)					\
1828	{								\
1829	  active_reg_t r;						\
1830	  set_regs_matched_done = 1;					\
1831	  for (r = lowest_active_reg; r <= highest_active_reg; r++)	\
1832	    {								\
1833	      MATCHED_SOMETHING (reg_info[r])				\
1834		= EVER_MATCHED_SOMETHING (reg_info[r])			\
1835		= 1;							\
1836	    }								\
1837	}								\
1838    }									\
1839  while (0)
1840# endif /* not DEFINED_ONCE */
1841
1842/* Registers are set to a sentinel when they haven't yet matched.  */
1843static CHAR_T PREFIX(reg_unset_dummy);
1844# define REG_UNSET_VALUE (&PREFIX(reg_unset_dummy))
1845# define REG_UNSET(e) ((e) == REG_UNSET_VALUE)
1846
1847/* Subroutine declarations and macros for regex_compile.  */
1848static void PREFIX(store_op1) (re_opcode_t op, UCHAR_T *loc, int arg);
1849static void PREFIX(store_op2) (re_opcode_t op, UCHAR_T *loc,
1850                               int arg1, int arg2);
1851static void PREFIX(insert_op1) (re_opcode_t op, UCHAR_T *loc,
1852                                int arg, UCHAR_T *end);
1853static void PREFIX(insert_op2) (re_opcode_t op, UCHAR_T *loc,
1854                                int arg1, int arg2, UCHAR_T *end);
1855static boolean PREFIX(at_begline_loc_p) (const CHAR_T *pattern,
1856                                         const CHAR_T *p,
1857                                         reg_syntax_t syntax);
1858static boolean PREFIX(at_endline_loc_p) (const CHAR_T *p,
1859                                         const CHAR_T *pend,
1860                                         reg_syntax_t syntax);
1861# ifdef WCHAR
1862static reg_errcode_t wcs_compile_range (CHAR_T range_start,
1863                                        const CHAR_T **p_ptr,
1864                                        const CHAR_T *pend,
1865                                        char *translate,
1866                                        reg_syntax_t syntax,
1867                                        UCHAR_T *b,
1868                                        CHAR_T *char_set);
1869static void insert_space (int num, CHAR_T *loc, CHAR_T *end);
1870# else /* BYTE */
1871static reg_errcode_t byte_compile_range (unsigned int range_start,
1872                                         const char **p_ptr,
1873                                         const char *pend,
1874                                         char *translate,
1875                                         reg_syntax_t syntax,
1876                                         unsigned char *b);
1877# endif /* WCHAR */
1878
1879/* Fetch the next character in the uncompiled pattern---translating it
1880   if necessary.  Also cast from a signed character in the constant
1881   string passed to us by the user to an unsigned char that we can use
1882   as an array index (in, e.g., `translate').  */
1883/* ifdef MBS_SUPPORT, we translate only if character <= 0xff,
1884   because it is impossible to allocate 4GB array for some encodings
1885   which have 4 byte character_set like UCS4.  */
1886# ifndef PATFETCH
1887#  ifdef WCHAR
1888#   define PATFETCH(c)							\
1889  do {if (p == pend) return REG_EEND;					\
1890    c = (UCHAR_T) *p++;							\
1891    if (translate && (c <= 0xff)) c = (UCHAR_T) translate[c];		\
1892  } while (0)
1893#  else /* BYTE */
1894#   define PATFETCH(c)							\
1895  do {if (p == pend) return REG_EEND;					\
1896    c = (unsigned char) *p++;						\
1897    if (translate) c = (unsigned char) translate[c];			\
1898  } while (0)
1899#  endif /* WCHAR */
1900# endif
1901
1902/* Fetch the next character in the uncompiled pattern, with no
1903   translation.  */
1904# define PATFETCH_RAW(c)						\
1905  do {if (p == pend) return REG_EEND;					\
1906    c = (UCHAR_T) *p++; 	       					\
1907  } while (0)
1908
1909/* Go backwards one character in the pattern.  */
1910# define PATUNFETCH p--
1911
1912
1913/* If `translate' is non-null, return translate[D], else just D.  We
1914   cast the subscript to translate because some data is declared as
1915   `char *', to avoid warnings when a string constant is passed.  But
1916   when we use a character as a subscript we must make it unsigned.  */
1917/* ifdef MBS_SUPPORT, we translate only if character <= 0xff,
1918   because it is impossible to allocate 4GB array for some encodings
1919   which have 4 byte character_set like UCS4.  */
1920
1921# ifndef TRANSLATE
1922#  ifdef WCHAR
1923#   define TRANSLATE(d) \
1924  ((translate && ((UCHAR_T) (d)) <= 0xff) \
1925   ? (char) translate[(unsigned char) (d)] : (d))
1926# else /* BYTE */
1927#   define TRANSLATE(d) \
1928  (translate ? (char) translate[(unsigned char) (d)] : (char) (d))
1929#  endif /* WCHAR */
1930# endif
1931
1932
1933/* Macros for outputting the compiled pattern into `buffer'.  */
1934
1935/* If the buffer isn't allocated when it comes in, use this.  */
1936# define INIT_BUF_SIZE  (32 * sizeof(UCHAR_T))
1937
1938/* Make sure we have at least N more bytes of space in buffer.  */
1939# ifdef WCHAR
1940#  define GET_BUFFER_SPACE(n)						\
1941    while (((unsigned long)b - (unsigned long)COMPILED_BUFFER_VAR	\
1942            + (n)*sizeof(CHAR_T)) > bufp->allocated)			\
1943      EXTEND_BUFFER ()
1944# else /* BYTE */
1945#  define GET_BUFFER_SPACE(n)						\
1946    while ((unsigned long) (b - bufp->buffer + (n)) > bufp->allocated)	\
1947      EXTEND_BUFFER ()
1948# endif /* WCHAR */
1949
1950/* Make sure we have one more byte of buffer space and then add C to it.  */
1951# define BUF_PUSH(c)							\
1952  do {									\
1953    GET_BUFFER_SPACE (1);						\
1954    *b++ = (UCHAR_T) (c);						\
1955  } while (0)
1956
1957
1958/* Ensure we have two more bytes of buffer space and then append C1 and C2.  */
1959# define BUF_PUSH_2(c1, c2)						\
1960  do {									\
1961    GET_BUFFER_SPACE (2);						\
1962    *b++ = (UCHAR_T) (c1);						\
1963    *b++ = (UCHAR_T) (c2);						\
1964  } while (0)
1965
1966
1967/* As with BUF_PUSH_2, except for three bytes.  */
1968# define BUF_PUSH_3(c1, c2, c3)						\
1969  do {									\
1970    GET_BUFFER_SPACE (3);						\
1971    *b++ = (UCHAR_T) (c1);						\
1972    *b++ = (UCHAR_T) (c2);						\
1973    *b++ = (UCHAR_T) (c3);						\
1974  } while (0)
1975
1976/* Store a jump with opcode OP at LOC to location TO.  We store a
1977   relative address offset by the three bytes the jump itself occupies.  */
1978# define STORE_JUMP(op, loc, to) \
1979 PREFIX(store_op1) (op, loc, (int) ((to) - (loc) - (1 + OFFSET_ADDRESS_SIZE)))
1980
1981/* Likewise, for a two-argument jump.  */
1982# define STORE_JUMP2(op, loc, to, arg) \
1983  PREFIX(store_op2) (op, loc, (int) ((to) - (loc) - (1 + OFFSET_ADDRESS_SIZE)), arg)
1984
1985/* Like `STORE_JUMP', but for inserting.  Assume `b' is the buffer end.  */
1986# define INSERT_JUMP(op, loc, to) \
1987  PREFIX(insert_op1) (op, loc, (int) ((to) - (loc) - (1 + OFFSET_ADDRESS_SIZE)), b)
1988
1989/* Like `STORE_JUMP2', but for inserting.  Assume `b' is the buffer end.  */
1990# define INSERT_JUMP2(op, loc, to, arg) \
1991  PREFIX(insert_op2) (op, loc, (int) ((to) - (loc) - (1 + OFFSET_ADDRESS_SIZE)),\
1992	      arg, b)
1993
1994/* This is not an arbitrary limit: the arguments which represent offsets
1995   into the pattern are two bytes long.  So if 2^16 bytes turns out to
1996   be too small, many things would have to change.  */
1997/* Any other compiler which, like MSC, has allocation limit below 2^16
1998   bytes will have to use approach similar to what was done below for
1999   MSC and drop MAX_BUF_SIZE a bit.  Otherwise you may end up
2000   reallocating to 0 bytes.  Such thing is not going to work too well.
2001   You have been warned!!  */
2002# ifndef DEFINED_ONCE
2003#  if defined _MSC_VER  && !defined WIN32
2004/* Microsoft C 16-bit versions limit malloc to approx 65512 bytes.
2005   The REALLOC define eliminates a flurry of conversion warnings,
2006   but is not required. */
2007#   define MAX_BUF_SIZE  65500L
2008#   define REALLOC(p,s) realloc ((p), (size_t) (s))
2009#  else
2010#   define MAX_BUF_SIZE (1L << 16)
2011#   define REALLOC(p,s) realloc ((p), (s))
2012#  endif
2013
2014/* Extend the buffer by twice its current size via realloc and
2015   reset the pointers that pointed into the old block to point to the
2016   correct places in the new one.  If extending the buffer results in it
2017   being larger than MAX_BUF_SIZE, then flag memory exhausted.  */
2018#  if __BOUNDED_POINTERS__
2019#   define SET_HIGH_BOUND(P) (__ptrhigh (P) = __ptrlow (P) + bufp->allocated)
2020#   define MOVE_BUFFER_POINTER(P) \
2021  (__ptrlow (P) += incr, SET_HIGH_BOUND (P), __ptrvalue (P) += incr)
2022#   define ELSE_EXTEND_BUFFER_HIGH_BOUND	\
2023  else						\
2024    {						\
2025      SET_HIGH_BOUND (b);			\
2026      SET_HIGH_BOUND (begalt);			\
2027      if (fixup_alt_jump)			\
2028	SET_HIGH_BOUND (fixup_alt_jump);	\
2029      if (laststart)				\
2030	SET_HIGH_BOUND (laststart);		\
2031      if (pending_exact)			\
2032	SET_HIGH_BOUND (pending_exact);		\
2033    }
2034#  else
2035#   define MOVE_BUFFER_POINTER(P) (P) += incr
2036#   define ELSE_EXTEND_BUFFER_HIGH_BOUND
2037#  endif
2038# endif /* not DEFINED_ONCE */
2039
2040# ifdef WCHAR
2041#  define EXTEND_BUFFER()						\
2042  do {									\
2043    UCHAR_T *old_buffer = COMPILED_BUFFER_VAR;				\
2044    int wchar_count;							\
2045    if (bufp->allocated + sizeof(UCHAR_T) > MAX_BUF_SIZE)		\
2046      return REG_ESIZE;							\
2047    bufp->allocated <<= 1;						\
2048    if (bufp->allocated > MAX_BUF_SIZE)					\
2049      bufp->allocated = MAX_BUF_SIZE;					\
2050    /* How many characters the new buffer can have?  */			\
2051    wchar_count = bufp->allocated / sizeof(UCHAR_T);			\
2052    if (wchar_count == 0) wchar_count = 1;				\
2053    /* Truncate the buffer to CHAR_T align.  */				\
2054    bufp->allocated = wchar_count * sizeof(UCHAR_T);			\
2055    RETALLOC (COMPILED_BUFFER_VAR, wchar_count, UCHAR_T);		\
2056    bufp->buffer = (char*)COMPILED_BUFFER_VAR;				\
2057    if (COMPILED_BUFFER_VAR == NULL)					\
2058      return REG_ESPACE;						\
2059    /* If the buffer moved, move all the pointers into it.  */		\
2060    if (old_buffer != COMPILED_BUFFER_VAR)				\
2061      {									\
2062	PTR_INT_TYPE incr = COMPILED_BUFFER_VAR - old_buffer;		\
2063	MOVE_BUFFER_POINTER (b);					\
2064	MOVE_BUFFER_POINTER (begalt);					\
2065	if (fixup_alt_jump)						\
2066	  MOVE_BUFFER_POINTER (fixup_alt_jump);				\
2067	if (laststart)							\
2068	  MOVE_BUFFER_POINTER (laststart);				\
2069	if (pending_exact)						\
2070	  MOVE_BUFFER_POINTER (pending_exact);				\
2071      }									\
2072    ELSE_EXTEND_BUFFER_HIGH_BOUND					\
2073  } while (0)
2074# else /* BYTE */
2075#  define EXTEND_BUFFER()						\
2076  do {									\
2077    UCHAR_T *old_buffer = COMPILED_BUFFER_VAR;				\
2078    if (bufp->allocated == MAX_BUF_SIZE)				\
2079      return REG_ESIZE;							\
2080    bufp->allocated <<= 1;						\
2081    if (bufp->allocated > MAX_BUF_SIZE)					\
2082      bufp->allocated = MAX_BUF_SIZE;					\
2083    bufp->buffer = (UCHAR_T *) REALLOC (COMPILED_BUFFER_VAR,		\
2084						bufp->allocated);	\
2085    if (COMPILED_BUFFER_VAR == NULL)					\
2086      return REG_ESPACE;						\
2087    /* If the buffer moved, move all the pointers into it.  */		\
2088    if (old_buffer != COMPILED_BUFFER_VAR)				\
2089      {									\
2090	PTR_INT_TYPE incr = COMPILED_BUFFER_VAR - old_buffer;		\
2091	MOVE_BUFFER_POINTER (b);					\
2092	MOVE_BUFFER_POINTER (begalt);					\
2093	if (fixup_alt_jump)						\
2094	  MOVE_BUFFER_POINTER (fixup_alt_jump);				\
2095	if (laststart)							\
2096	  MOVE_BUFFER_POINTER (laststart);				\
2097	if (pending_exact)						\
2098	  MOVE_BUFFER_POINTER (pending_exact);				\
2099      }									\
2100    ELSE_EXTEND_BUFFER_HIGH_BOUND					\
2101  } while (0)
2102# endif /* WCHAR */
2103
2104# ifndef DEFINED_ONCE
2105/* Since we have one byte reserved for the register number argument to
2106   {start,stop}_memory, the maximum number of groups we can report
2107   things about is what fits in that byte.  */
2108#  define MAX_REGNUM 255
2109
2110/* But patterns can have more than `MAX_REGNUM' registers.  We just
2111   ignore the excess.  */
2112typedef unsigned regnum_t;
2113
2114
2115/* Macros for the compile stack.  */
2116
2117/* Since offsets can go either forwards or backwards, this type needs to
2118   be able to hold values from -(MAX_BUF_SIZE - 1) to MAX_BUF_SIZE - 1.  */
2119/* int may be not enough when sizeof(int) == 2.  */
2120typedef long pattern_offset_t;
2121
2122typedef struct
2123{
2124  pattern_offset_t begalt_offset;
2125  pattern_offset_t fixup_alt_jump;
2126  pattern_offset_t inner_group_offset;
2127  pattern_offset_t laststart_offset;
2128  regnum_t regnum;
2129} compile_stack_elt_t;
2130
2131
2132typedef struct
2133{
2134  compile_stack_elt_t *stack;
2135  unsigned size;
2136  unsigned avail;			/* Offset of next open position.  */
2137} compile_stack_type;
2138
2139
2140#  define INIT_COMPILE_STACK_SIZE 32
2141
2142#  define COMPILE_STACK_EMPTY  (compile_stack.avail == 0)
2143#  define COMPILE_STACK_FULL  (compile_stack.avail == compile_stack.size)
2144
2145/* The next available element.  */
2146#  define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail])
2147
2148# endif /* not DEFINED_ONCE */
2149
2150/* Set the bit for character C in a list.  */
2151# ifndef DEFINED_ONCE
2152#  define SET_LIST_BIT(c)                               \
2153  (b[((unsigned char) (c)) / BYTEWIDTH]               \
2154   |= 1 << (((unsigned char) c) % BYTEWIDTH))
2155# endif /* DEFINED_ONCE */
2156
2157/* Get the next unsigned number in the uncompiled pattern.  */
2158# define GET_UNSIGNED_NUMBER(num) \
2159  {									\
2160    while (p != pend)							\
2161      {									\
2162	PATFETCH (c);							\
2163	if (c < '0' || c > '9')						\
2164	  break;							\
2165	if (num <= RE_DUP_MAX)						\
2166	  {								\
2167	    if (num < 0)						\
2168	      num = 0;							\
2169	    num = num * 10 + c - '0';					\
2170	  }								\
2171      }									\
2172  }
2173
2174# ifndef DEFINED_ONCE
2175#  if defined _LIBC || WIDE_CHAR_SUPPORT
2176/* The GNU C library provides support for user-defined character classes
2177   and the functions from ISO C amendement 1.  */
2178#   ifdef CHARCLASS_NAME_MAX
2179#    define CHAR_CLASS_MAX_LENGTH CHARCLASS_NAME_MAX
2180#   else
2181/* This shouldn't happen but some implementation might still have this
2182   problem.  Use a reasonable default value.  */
2183#    define CHAR_CLASS_MAX_LENGTH 256
2184#   endif
2185
2186#   ifdef _LIBC
2187#    define IS_CHAR_CLASS(string) __wctype (string)
2188#   else
2189#    define IS_CHAR_CLASS(string) wctype (string)
2190#   endif
2191#  else
2192#   define CHAR_CLASS_MAX_LENGTH  6 /* Namely, `xdigit'.  */
2193
2194#   define IS_CHAR_CLASS(string)					\
2195   (STREQ (string, "alpha") || STREQ (string, "upper")			\
2196    || STREQ (string, "lower") || STREQ (string, "digit")		\
2197    || STREQ (string, "alnum") || STREQ (string, "xdigit")		\
2198    || STREQ (string, "space") || STREQ (string, "print")		\
2199    || STREQ (string, "punct") || STREQ (string, "graph")		\
2200    || STREQ (string, "cntrl") || STREQ (string, "blank"))
2201#  endif
2202# endif /* DEFINED_ONCE */
2203
2204# ifndef MATCH_MAY_ALLOCATE
2205
2206/* If we cannot allocate large objects within re_match_2_internal,
2207   we make the fail stack and register vectors global.
2208   The fail stack, we grow to the maximum size when a regexp
2209   is compiled.
2210   The register vectors, we adjust in size each time we
2211   compile a regexp, according to the number of registers it needs.  */
2212
2213static PREFIX(fail_stack_type) fail_stack;
2214
2215/* Size with which the following vectors are currently allocated.
2216   That is so we can make them bigger as needed,
2217   but never make them smaller.  */
2218#  ifdef DEFINED_ONCE
2219static int regs_allocated_size;
2220
2221static const char **     regstart, **     regend;
2222static const char ** old_regstart, ** old_regend;
2223static const char **best_regstart, **best_regend;
2224static const char **reg_dummy;
2225#  endif /* DEFINED_ONCE */
2226
2227static PREFIX(register_info_type) *PREFIX(reg_info);
2228static PREFIX(register_info_type) *PREFIX(reg_info_dummy);
2229
2230/* Make the register vectors big enough for NUM_REGS registers,
2231   but don't make them smaller.  */
2232
2233static void
2234PREFIX(regex_grow_registers) (int num_regs)
2235{
2236  if (num_regs > regs_allocated_size)
2237    {
2238      RETALLOC_IF (regstart,	 num_regs, const char *);
2239      RETALLOC_IF (regend,	 num_regs, const char *);
2240      RETALLOC_IF (old_regstart, num_regs, const char *);
2241      RETALLOC_IF (old_regend,	 num_regs, const char *);
2242      RETALLOC_IF (best_regstart, num_regs, const char *);
2243      RETALLOC_IF (best_regend,	 num_regs, const char *);
2244      RETALLOC_IF (PREFIX(reg_info), num_regs, PREFIX(register_info_type));
2245      RETALLOC_IF (reg_dummy,	 num_regs, const char *);
2246      RETALLOC_IF (PREFIX(reg_info_dummy), num_regs, PREFIX(register_info_type));
2247
2248      regs_allocated_size = num_regs;
2249    }
2250}
2251
2252# endif /* not MATCH_MAY_ALLOCATE */
2253
2254# ifndef DEFINED_ONCE
2255static boolean group_in_compile_stack (compile_stack_type compile_stack,
2256                                       regnum_t regnum);
2257# endif /* not DEFINED_ONCE */
2258
2259/* `regex_compile' compiles PATTERN (of length SIZE) according to SYNTAX.
2260   Returns one of error codes defined in `regex.h', or zero for success.
2261
2262   Assumes the `allocated' (and perhaps `buffer') and `translate'
2263   fields are set in BUFP on entry.
2264
2265   If it succeeds, results are put in BUFP (if it returns an error, the
2266   contents of BUFP are undefined):
2267     `buffer' is the compiled pattern;
2268     `syntax' is set to SYNTAX;
2269     `used' is set to the length of the compiled pattern;
2270     `fastmap_accurate' is zero;
2271     `re_nsub' is the number of subexpressions in PATTERN;
2272     `not_bol' and `not_eol' are zero;
2273
2274   The `fastmap' and `newline_anchor' fields are neither
2275   examined nor set.  */
2276
2277/* Return, freeing storage we allocated.  */
2278# ifdef WCHAR
2279#  define FREE_STACK_RETURN(value)		\
2280  return (free(pattern), free(mbs_offset), free(is_binary), free (compile_stack.stack), value)
2281# else
2282#  define FREE_STACK_RETURN(value)		\
2283  return (free (compile_stack.stack), value)
2284# endif /* WCHAR */
2285
2286static reg_errcode_t
2287PREFIX(regex_compile) (const char *ARG_PREFIX(pattern),
2288                       size_t ARG_PREFIX(size), reg_syntax_t syntax,
2289                       struct re_pattern_buffer *bufp)
2290{
2291  /* We fetch characters from PATTERN here.  Even though PATTERN is
2292     `char *' (i.e., signed), we declare these variables as unsigned, so
2293     they can be reliably used as array indices.  */
2294  register UCHAR_T c, c1;
2295
2296#ifdef WCHAR
2297  /* A temporary space to keep wchar_t pattern and compiled pattern.  */
2298  CHAR_T *pattern, *COMPILED_BUFFER_VAR;
2299  size_t size;
2300  /* offset buffer for optimization. See convert_mbs_to_wc.  */
2301  int *mbs_offset = NULL;
2302  /* It hold whether each wchar_t is binary data or not.  */
2303  char *is_binary = NULL;
2304  /* A flag whether exactn is handling binary data or not.  */
2305  char is_exactn_bin = FALSE;
2306#endif /* WCHAR */
2307
2308  /* A random temporary spot in PATTERN.  */
2309  const CHAR_T *p1;
2310
2311  /* Points to the end of the buffer, where we should append.  */
2312  register UCHAR_T *b;
2313
2314  /* Keeps track of unclosed groups.  */
2315  compile_stack_type compile_stack;
2316
2317  /* Points to the current (ending) position in the pattern.  */
2318#ifdef WCHAR
2319  const CHAR_T *p;
2320  const CHAR_T *pend;
2321#else /* BYTE */
2322  const CHAR_T *p = pattern;
2323  const CHAR_T *pend = pattern + size;
2324#endif /* WCHAR */
2325
2326  /* How to translate the characters in the pattern.  */
2327  RE_TRANSLATE_TYPE translate = bufp->translate;
2328
2329  /* Address of the count-byte of the most recently inserted `exactn'
2330     command.  This makes it possible to tell if a new exact-match
2331     character can be added to that command or if the character requires
2332     a new `exactn' command.  */
2333  UCHAR_T *pending_exact = 0;
2334
2335  /* Address of start of the most recently finished expression.
2336     This tells, e.g., postfix * where to find the start of its
2337     operand.  Reset at the beginning of groups and alternatives.  */
2338  UCHAR_T *laststart = 0;
2339
2340  /* Address of beginning of regexp, or inside of last group.  */
2341  UCHAR_T *begalt;
2342
2343  /* Address of the place where a forward jump should go to the end of
2344     the containing expression.  Each alternative of an `or' -- except the
2345     last -- ends with a forward jump of this sort.  */
2346  UCHAR_T *fixup_alt_jump = 0;
2347
2348  /* Counts open-groups as they are encountered.  Remembered for the
2349     matching close-group on the compile stack, so the same register
2350     number is put in the stop_memory as the start_memory.  */
2351  regnum_t regnum = 0;
2352
2353#ifdef WCHAR
2354  /* Initialize the wchar_t PATTERN and offset_buffer.  */
2355  p = pend = pattern = TALLOC(csize + 1, CHAR_T);
2356  mbs_offset = TALLOC(csize + 1, int);
2357  is_binary = TALLOC(csize + 1, char);
2358  if (pattern == NULL || mbs_offset == NULL || is_binary == NULL)
2359    {
2360      free(pattern);
2361      free(mbs_offset);
2362      free(is_binary);
2363      return REG_ESPACE;
2364    }
2365  pattern[csize] = L'\0';	/* sentinel */
2366  size = convert_mbs_to_wcs(pattern, cpattern, csize, mbs_offset, is_binary);
2367  pend = p + size;
2368  if (size < 0)
2369    {
2370      free(pattern);
2371      free(mbs_offset);
2372      free(is_binary);
2373      return REG_BADPAT;
2374    }
2375#endif
2376
2377#ifdef DEBUG
2378  DEBUG_PRINT1 ("\nCompiling pattern: ");
2379  if (debug)
2380    {
2381      unsigned debug_count;
2382
2383      for (debug_count = 0; debug_count < size; debug_count++)
2384        PUT_CHAR (pattern[debug_count]);
2385      putchar ('\n');
2386    }
2387#endif /* DEBUG */
2388
2389  /* Initialize the compile stack.  */
2390  compile_stack.stack = TALLOC (INIT_COMPILE_STACK_SIZE, compile_stack_elt_t);
2391  if (compile_stack.stack == NULL)
2392    {
2393#ifdef WCHAR
2394      free(pattern);
2395      free(mbs_offset);
2396      free(is_binary);
2397#endif
2398      return REG_ESPACE;
2399    }
2400
2401  compile_stack.size = INIT_COMPILE_STACK_SIZE;
2402  compile_stack.avail = 0;
2403
2404  /* Initialize the pattern buffer.  */
2405  bufp->syntax = syntax;
2406  bufp->fastmap_accurate = 0;
2407  bufp->not_bol = bufp->not_eol = 0;
2408
2409  /* Set `used' to zero, so that if we return an error, the pattern
2410     printer (for debugging) will think there's no pattern.  We reset it
2411     at the end.  */
2412  bufp->used = 0;
2413
2414  /* Always count groups, whether or not bufp->no_sub is set.  */
2415  bufp->re_nsub = 0;
2416
2417#if !defined emacs && !defined SYNTAX_TABLE
2418  /* Initialize the syntax table.  */
2419   init_syntax_once ();
2420#endif
2421
2422  if (bufp->allocated == 0)
2423    {
2424      if (bufp->buffer)
2425	{ /* If zero allocated, but buffer is non-null, try to realloc
2426             enough space.  This loses if buffer's address is bogus, but
2427             that is the user's responsibility.  */
2428#ifdef WCHAR
2429	  /* Free bufp->buffer and allocate an array for wchar_t pattern
2430	     buffer.  */
2431          free(bufp->buffer);
2432          COMPILED_BUFFER_VAR = TALLOC (INIT_BUF_SIZE/sizeof(UCHAR_T),
2433					UCHAR_T);
2434#else
2435          RETALLOC (COMPILED_BUFFER_VAR, INIT_BUF_SIZE, UCHAR_T);
2436#endif /* WCHAR */
2437        }
2438      else
2439        { /* Caller did not allocate a buffer.  Do it for them.  */
2440          COMPILED_BUFFER_VAR = TALLOC (INIT_BUF_SIZE / sizeof(UCHAR_T),
2441					UCHAR_T);
2442        }
2443
2444      if (!COMPILED_BUFFER_VAR) FREE_STACK_RETURN (REG_ESPACE);
2445#ifdef WCHAR
2446      bufp->buffer = (char*)COMPILED_BUFFER_VAR;
2447#endif /* WCHAR */
2448      bufp->allocated = INIT_BUF_SIZE;
2449    }
2450#ifdef WCHAR
2451  else
2452    COMPILED_BUFFER_VAR = (UCHAR_T*) bufp->buffer;
2453#endif
2454
2455  begalt = b = COMPILED_BUFFER_VAR;
2456
2457  /* Loop through the uncompiled pattern until we're at the end.  */
2458  while (p != pend)
2459    {
2460      PATFETCH (c);
2461
2462      switch (c)
2463        {
2464        case '^':
2465          {
2466            if (   /* If at start of pattern, it's an operator.  */
2467                   p == pattern + 1
2468                   /* If context independent, it's an operator.  */
2469                || syntax & RE_CONTEXT_INDEP_ANCHORS
2470                   /* Otherwise, depends on what's come before.  */
2471                || PREFIX(at_begline_loc_p) (pattern, p, syntax))
2472              BUF_PUSH (begline);
2473            else
2474              goto normal_char;
2475          }
2476          break;
2477
2478
2479        case '$':
2480          {
2481            if (   /* If at end of pattern, it's an operator.  */
2482                   p == pend
2483                   /* If context independent, it's an operator.  */
2484                || syntax & RE_CONTEXT_INDEP_ANCHORS
2485                   /* Otherwise, depends on what's next.  */
2486                || PREFIX(at_endline_loc_p) (p, pend, syntax))
2487               BUF_PUSH (endline);
2488             else
2489               goto normal_char;
2490           }
2491           break;
2492
2493
2494	case '+':
2495        case '?':
2496          if ((syntax & RE_BK_PLUS_QM)
2497              || (syntax & RE_LIMITED_OPS))
2498            goto normal_char;
2499	  /* Fall through.  */
2500        handle_plus:
2501        case '*':
2502          /* If there is no previous pattern... */
2503          if (!laststart)
2504            {
2505              if (syntax & RE_CONTEXT_INVALID_OPS)
2506                FREE_STACK_RETURN (REG_BADRPT);
2507              else if (!(syntax & RE_CONTEXT_INDEP_OPS))
2508                goto normal_char;
2509            }
2510
2511          {
2512            /* Are we optimizing this jump?  */
2513            boolean keep_string_p = false;
2514
2515            /* 1 means zero (many) matches is allowed.  */
2516            char zero_times_ok = 0, many_times_ok = 0;
2517
2518            /* If there is a sequence of repetition chars, collapse it
2519               down to just one (the right one).  We can't combine
2520               interval operators with these because of, e.g., `a{2}*',
2521               which should only match an even number of `a's.  */
2522
2523            for (;;)
2524              {
2525                zero_times_ok |= c != '+';
2526                many_times_ok |= c != '?';
2527
2528                if (p == pend)
2529                  break;
2530
2531                PATFETCH (c);
2532
2533                if (c == '*'
2534                    || (!(syntax & RE_BK_PLUS_QM) && (c == '+' || c == '?')))
2535                  ;
2536
2537                else if (syntax & RE_BK_PLUS_QM  &&  c == '\\')
2538                  {
2539                    if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
2540
2541                    PATFETCH (c1);
2542                    if (!(c1 == '+' || c1 == '?'))
2543                      {
2544                        PATUNFETCH;
2545                        PATUNFETCH;
2546                        break;
2547                      }
2548
2549                    c = c1;
2550                  }
2551                else
2552                  {
2553                    PATUNFETCH;
2554                    break;
2555                  }
2556
2557                /* If we get here, we found another repeat character.  */
2558               }
2559
2560            /* Star, etc. applied to an empty pattern is equivalent
2561               to an empty pattern.  */
2562            if (!laststart)
2563              break;
2564
2565            /* Now we know whether or not zero matches is allowed
2566               and also whether or not two or more matches is allowed.  */
2567            if (many_times_ok)
2568              { /* More than one repetition is allowed, so put in at the
2569                   end a backward relative jump from `b' to before the next
2570                   jump we're going to put in below (which jumps from
2571                   laststart to after this jump).
2572
2573                   But if we are at the `*' in the exact sequence `.*\n',
2574                   insert an unconditional jump backwards to the .,
2575                   instead of the beginning of the loop.  This way we only
2576                   push a failure point once, instead of every time
2577                   through the loop.  */
2578                assert (p - 1 > pattern);
2579
2580                /* Allocate the space for the jump.  */
2581                GET_BUFFER_SPACE (1 + OFFSET_ADDRESS_SIZE);
2582
2583                /* We know we are not at the first character of the pattern,
2584                   because laststart was nonzero.  And we've already
2585                   incremented `p', by the way, to be the character after
2586                   the `*'.  Do we have to do something analogous here
2587                   for null bytes, because of RE_DOT_NOT_NULL?  */
2588                if (TRANSLATE (*(p - 2)) == TRANSLATE ('.')
2589		    && zero_times_ok
2590                    && p < pend && TRANSLATE (*p) == TRANSLATE ('\n')
2591                    && !(syntax & RE_DOT_NEWLINE))
2592                  { /* We have .*\n.  */
2593                    STORE_JUMP (jump, b, laststart);
2594                    keep_string_p = true;
2595                  }
2596                else
2597                  /* Anything else.  */
2598                  STORE_JUMP (maybe_pop_jump, b, laststart -
2599			      (1 + OFFSET_ADDRESS_SIZE));
2600
2601                /* We've added more stuff to the buffer.  */
2602                b += 1 + OFFSET_ADDRESS_SIZE;
2603              }
2604
2605            /* On failure, jump from laststart to b + 3, which will be the
2606               end of the buffer after this jump is inserted.  */
2607	    /* ifdef WCHAR, 'b + 1 + OFFSET_ADDRESS_SIZE' instead of
2608	       'b + 3'.  */
2609            GET_BUFFER_SPACE (1 + OFFSET_ADDRESS_SIZE);
2610            INSERT_JUMP (keep_string_p ? on_failure_keep_string_jump
2611                                       : on_failure_jump,
2612                         laststart, b + 1 + OFFSET_ADDRESS_SIZE);
2613            pending_exact = 0;
2614            b += 1 + OFFSET_ADDRESS_SIZE;
2615
2616            if (!zero_times_ok)
2617              {
2618                /* At least one repetition is required, so insert a
2619                   `dummy_failure_jump' before the initial
2620                   `on_failure_jump' instruction of the loop. This
2621                   effects a skip over that instruction the first time
2622                   we hit that loop.  */
2623                GET_BUFFER_SPACE (1 + OFFSET_ADDRESS_SIZE);
2624                INSERT_JUMP (dummy_failure_jump, laststart, laststart +
2625			     2 + 2 * OFFSET_ADDRESS_SIZE);
2626                b += 1 + OFFSET_ADDRESS_SIZE;
2627              }
2628            }
2629	  break;
2630
2631
2632	case '.':
2633          laststart = b;
2634          BUF_PUSH (anychar);
2635          break;
2636
2637
2638        case '[':
2639          {
2640            boolean had_char_class = false;
2641#ifdef WCHAR
2642	    CHAR_T range_start = 0xffffffff;
2643#else
2644	    unsigned int range_start = 0xffffffff;
2645#endif
2646            if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
2647
2648#ifdef WCHAR
2649	    /* We assume a charset(_not) structure as a wchar_t array.
2650	       charset[0] = (re_opcode_t) charset(_not)
2651               charset[1] = l (= length of char_classes)
2652               charset[2] = m (= length of collating_symbols)
2653               charset[3] = n (= length of equivalence_classes)
2654	       charset[4] = o (= length of char_ranges)
2655	       charset[5] = p (= length of chars)
2656
2657               charset[6] = char_class (wctype_t)
2658               charset[6+CHAR_CLASS_SIZE] = char_class (wctype_t)
2659                         ...
2660               charset[l+5]  = char_class (wctype_t)
2661
2662               charset[l+6]  = collating_symbol (wchar_t)
2663                            ...
2664               charset[l+m+5]  = collating_symbol (wchar_t)
2665					ifdef _LIBC we use the index if
2666					_NL_COLLATE_SYMB_EXTRAMB instead of
2667					wchar_t string.
2668
2669               charset[l+m+6]  = equivalence_classes (wchar_t)
2670                              ...
2671               charset[l+m+n+5]  = equivalence_classes (wchar_t)
2672					ifdef _LIBC we use the index in
2673					_NL_COLLATE_WEIGHT instead of
2674					wchar_t string.
2675
2676	       charset[l+m+n+6] = range_start
2677	       charset[l+m+n+7] = range_end
2678	                       ...
2679	       charset[l+m+n+2o+4] = range_start
2680	       charset[l+m+n+2o+5] = range_end
2681					ifdef _LIBC we use the value looked up
2682					in _NL_COLLATE_COLLSEQ instead of
2683					wchar_t character.
2684
2685	       charset[l+m+n+2o+6] = char
2686	                          ...
2687	       charset[l+m+n+2o+p+5] = char
2688
2689	     */
2690
2691	    /* We need at least 6 spaces: the opcode, the length of
2692               char_classes, the length of collating_symbols, the length of
2693               equivalence_classes, the length of char_ranges, the length of
2694               chars.  */
2695	    GET_BUFFER_SPACE (6);
2696
2697	    /* Save b as laststart. And We use laststart as the pointer
2698	       to the first element of the charset here.
2699	       In other words, laststart[i] indicates charset[i].  */
2700            laststart = b;
2701
2702            /* We test `*p == '^' twice, instead of using an if
2703               statement, so we only need one BUF_PUSH.  */
2704            BUF_PUSH (*p == '^' ? charset_not : charset);
2705            if (*p == '^')
2706              p++;
2707
2708            /* Push the length of char_classes, the length of
2709               collating_symbols, the length of equivalence_classes, the
2710               length of char_ranges and the length of chars.  */
2711            BUF_PUSH_3 (0, 0, 0);
2712            BUF_PUSH_2 (0, 0);
2713
2714            /* Remember the first position in the bracket expression.  */
2715            p1 = p;
2716
2717            /* charset_not matches newline according to a syntax bit.  */
2718            if ((re_opcode_t) b[-6] == charset_not
2719                && (syntax & RE_HAT_LISTS_NOT_NEWLINE))
2720	      {
2721		BUF_PUSH('\n');
2722		laststart[5]++; /* Update the length of characters  */
2723	      }
2724
2725            /* Read in characters and ranges, setting map bits.  */
2726            for (;;)
2727              {
2728                if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
2729
2730                PATFETCH (c);
2731
2732                /* \ might escape characters inside [...] and [^...].  */
2733                if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\')
2734                  {
2735                    if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
2736
2737                    PATFETCH (c1);
2738		    BUF_PUSH(c1);
2739		    laststart[5]++; /* Update the length of chars  */
2740		    range_start = c1;
2741                    continue;
2742                  }
2743
2744                /* Could be the end of the bracket expression.  If it's
2745                   not (i.e., when the bracket expression is `[]' so
2746                   far), the ']' character bit gets set way below.  */
2747                if (c == ']' && p != p1 + 1)
2748                  break;
2749
2750                /* Look ahead to see if it's a range when the last thing
2751                   was a character class.  */
2752                if (had_char_class && c == '-' && *p != ']')
2753                  FREE_STACK_RETURN (REG_ERANGE);
2754
2755                /* Look ahead to see if it's a range when the last thing
2756                   was a character: if this is a hyphen not at the
2757                   beginning or the end of a list, then it's the range
2758                   operator.  */
2759                if (c == '-'
2760                    && !(p - 2 >= pattern && p[-2] == '[')
2761                    && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^')
2762                    && *p != ']')
2763                  {
2764                    reg_errcode_t ret;
2765		    /* Allocate the space for range_start and range_end.  */
2766		    GET_BUFFER_SPACE (2);
2767		    /* Update the pointer to indicate end of buffer.  */
2768                    b += 2;
2769                    ret = wcs_compile_range (range_start, &p, pend, translate,
2770                                         syntax, b, laststart);
2771                    if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
2772                    range_start = 0xffffffff;
2773                  }
2774                else if (p[0] == '-' && p[1] != ']')
2775                  { /* This handles ranges made up of characters only.  */
2776                    reg_errcode_t ret;
2777
2778		    /* Move past the `-'.  */
2779                    PATFETCH (c1);
2780		    /* Allocate the space for range_start and range_end.  */
2781		    GET_BUFFER_SPACE (2);
2782		    /* Update the pointer to indicate end of buffer.  */
2783                    b += 2;
2784                    ret = wcs_compile_range (c, &p, pend, translate, syntax, b,
2785                                         laststart);
2786                    if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
2787		    range_start = 0xffffffff;
2788                  }
2789
2790                /* See if we're at the beginning of a possible character
2791                   class.  */
2792                else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':')
2793                  { /* Leave room for the null.  */
2794                    char str[CHAR_CLASS_MAX_LENGTH + 1];
2795
2796                    PATFETCH (c);
2797                    c1 = 0;
2798
2799                    /* If pattern is `[[:'.  */
2800                    if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
2801
2802                    for (;;)
2803                      {
2804                        PATFETCH (c);
2805                        if ((c == ':' && *p == ']') || p == pend)
2806                          break;
2807			if (c1 < CHAR_CLASS_MAX_LENGTH)
2808			  str[c1++] = c;
2809			else
2810			  /* This is in any case an invalid class name.  */
2811			  str[0] = '\0';
2812                      }
2813                    str[c1] = '\0';
2814
2815                    /* If isn't a word bracketed by `[:' and `:]':
2816                       undo the ending character, the letters, and leave
2817                       the leading `:' and `[' (but store them as character).  */
2818                    if (c == ':' && *p == ']')
2819                      {
2820			wctype_t wt;
2821			uintptr_t alignedp;
2822
2823			/* Query the character class as wctype_t.  */
2824			wt = IS_CHAR_CLASS (str);
2825			if (wt == 0)
2826			  FREE_STACK_RETURN (REG_ECTYPE);
2827
2828                        /* Throw away the ] at the end of the character
2829                           class.  */
2830                        PATFETCH (c);
2831
2832                        if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
2833
2834			/* Allocate the space for character class.  */
2835                        GET_BUFFER_SPACE(CHAR_CLASS_SIZE);
2836			/* Update the pointer to indicate end of buffer.  */
2837                        b += CHAR_CLASS_SIZE;
2838			/* Move data which follow character classes
2839			    not to violate the data.  */
2840                        insert_space(CHAR_CLASS_SIZE,
2841				     laststart + 6 + laststart[1],
2842				     b - 1);
2843			alignedp = ((uintptr_t)(laststart + 6 + laststart[1])
2844				    + __alignof__(wctype_t) - 1)
2845			  	    & ~(uintptr_t)(__alignof__(wctype_t) - 1);
2846			/* Store the character class.  */
2847                        *((wctype_t*)alignedp) = wt;
2848                        /* Update length of char_classes */
2849                        laststart[1] += CHAR_CLASS_SIZE;
2850
2851                        had_char_class = true;
2852                      }
2853                    else
2854                      {
2855                        c1++;
2856                        while (c1--)
2857                          PATUNFETCH;
2858                        BUF_PUSH ('[');
2859                        BUF_PUSH (':');
2860                        laststart[5] += 2; /* Update the length of characters  */
2861			range_start = ':';
2862                        had_char_class = false;
2863                      }
2864                  }
2865                else if (syntax & RE_CHAR_CLASSES && c == '[' && (*p == '='
2866							  || *p == '.'))
2867		  {
2868		    CHAR_T str[128];	/* Should be large enough.  */
2869		    CHAR_T delim = *p; /* '=' or '.'  */
2870# ifdef _LIBC
2871		    uint32_t nrules =
2872		      _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
2873# endif
2874		    PATFETCH (c);
2875		    c1 = 0;
2876
2877		    /* If pattern is `[[=' or '[[.'.  */
2878		    if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
2879
2880		    for (;;)
2881		      {
2882			PATFETCH (c);
2883			if ((c == delim && *p == ']') || p == pend)
2884			  break;
2885			if (c1 < sizeof (str) - 1)
2886			  str[c1++] = c;
2887			else
2888			  /* This is in any case an invalid class name.  */
2889			  str[0] = '\0';
2890                      }
2891		    str[c1] = '\0';
2892
2893		    if (c == delim && *p == ']' && str[0] != '\0')
2894		      {
2895                        unsigned int i, offset;
2896			/* If we have no collation data we use the default
2897			   collation in which each character is in a class
2898			   by itself.  It also means that ASCII is the
2899			   character set and therefore we cannot have character
2900			   with more than one byte in the multibyte
2901			   representation.  */
2902
2903                        /* If not defined _LIBC, we push the name and
2904			   `\0' for the sake of matching performance.  */
2905			int datasize = c1 + 1;
2906
2907# ifdef _LIBC
2908			int32_t idx = 0;
2909			if (nrules == 0)
2910# endif
2911			  {
2912			    if (c1 != 1)
2913			      FREE_STACK_RETURN (REG_ECOLLATE);
2914			  }
2915# ifdef _LIBC
2916			else
2917			  {
2918			    const int32_t *table;
2919			    const int32_t *weights;
2920			    const int32_t *extra;
2921			    const int32_t *indirect;
2922			    wint_t *cp;
2923
2924			    /* This #include defines a local function!  */
2925#  include <locale/weightwc.h>
2926
2927			    if(delim == '=')
2928			      {
2929				/* We push the index for equivalence class.  */
2930				cp = (wint_t*)str;
2931
2932				table = (const int32_t *)
2933				  _NL_CURRENT (LC_COLLATE,
2934					       _NL_COLLATE_TABLEWC);
2935				weights = (const int32_t *)
2936				  _NL_CURRENT (LC_COLLATE,
2937					       _NL_COLLATE_WEIGHTWC);
2938				extra = (const int32_t *)
2939				  _NL_CURRENT (LC_COLLATE,
2940					       _NL_COLLATE_EXTRAWC);
2941				indirect = (const int32_t *)
2942				  _NL_CURRENT (LC_COLLATE,
2943					       _NL_COLLATE_INDIRECTWC);
2944
2945				idx = findidx ((const wint_t**)&cp);
2946				if (idx == 0 || cp < (wint_t*) str + c1)
2947				  /* This is no valid character.  */
2948				  FREE_STACK_RETURN (REG_ECOLLATE);
2949
2950				str[0] = (wchar_t)idx;
2951			      }
2952			    else /* delim == '.' */
2953			      {
2954				/* We push collation sequence value
2955				   for collating symbol.  */
2956				int32_t table_size;
2957				const int32_t *symb_table;
2958				const unsigned char *extra;
2959				int32_t idx;
2960				int32_t elem;
2961				int32_t second;
2962				int32_t hash;
2963				char char_str[c1];
2964
2965				/* We have to convert the name to a single-byte
2966				   string.  This is possible since the names
2967				   consist of ASCII characters and the internal
2968				   representation is UCS4.  */
2969				for (i = 0; i < c1; ++i)
2970				  char_str[i] = str[i];
2971
2972				table_size =
2973				  _NL_CURRENT_WORD (LC_COLLATE,
2974						    _NL_COLLATE_SYMB_HASH_SIZEMB);
2975				symb_table = (const int32_t *)
2976				  _NL_CURRENT (LC_COLLATE,
2977					       _NL_COLLATE_SYMB_TABLEMB);
2978				extra = (const unsigned char *)
2979				  _NL_CURRENT (LC_COLLATE,
2980					       _NL_COLLATE_SYMB_EXTRAMB);
2981
2982				/* Locate the character in the hashing table.  */
2983				hash = elem_hash (char_str, c1);
2984
2985				idx = 0;
2986				elem = hash % table_size;
2987				second = hash % (table_size - 2);
2988				while (symb_table[2 * elem] != 0)
2989				  {
2990				    /* First compare the hashing value.  */
2991				    if (symb_table[2 * elem] == hash
2992					&& c1 == extra[symb_table[2 * elem + 1]]
2993					&& memcmp (char_str,
2994						   &extra[symb_table[2 * elem + 1]
2995							 + 1], c1) == 0)
2996				      {
2997					/* Yep, this is the entry.  */
2998					idx = symb_table[2 * elem + 1];
2999					idx += 1 + extra[idx];
3000					break;
3001				      }
3002
3003				    /* Next entry.  */
3004				    elem += second;
3005				  }
3006
3007				if (symb_table[2 * elem] != 0)
3008				  {
3009				    /* Compute the index of the byte sequence
3010				       in the table.  */
3011				    idx += 1 + extra[idx];
3012				    /* Adjust for the alignment.  */
3013				    idx = (idx + 3) & ~3;
3014
3015				    str[0] = (wchar_t) idx + 4;
3016				  }
3017				else if (symb_table[2 * elem] == 0 && c1 == 1)
3018				  {
3019				    /* No valid character.  Match it as a
3020				       single byte character.  */
3021				    had_char_class = false;
3022				    BUF_PUSH(str[0]);
3023				    /* Update the length of characters  */
3024				    laststart[5]++;
3025				    range_start = str[0];
3026
3027				    /* Throw away the ] at the end of the
3028				       collating symbol.  */
3029				    PATFETCH (c);
3030				    /* exit from the switch block.  */
3031				    continue;
3032				  }
3033				else
3034				  FREE_STACK_RETURN (REG_ECOLLATE);
3035			      }
3036			    datasize = 1;
3037			  }
3038# endif
3039                        /* Throw away the ] at the end of the equivalence
3040                           class (or collating symbol).  */
3041                        PATFETCH (c);
3042
3043			/* Allocate the space for the equivalence class
3044			   (or collating symbol) (and '\0' if needed).  */
3045                        GET_BUFFER_SPACE(datasize);
3046			/* Update the pointer to indicate end of buffer.  */
3047                        b += datasize;
3048
3049			if (delim == '=')
3050			  { /* equivalence class  */
3051			    /* Calculate the offset of char_ranges,
3052			       which is next to equivalence_classes.  */
3053			    offset = laststart[1] + laststart[2]
3054			      + laststart[3] +6;
3055			    /* Insert space.  */
3056			    insert_space(datasize, laststart + offset, b - 1);
3057
3058			    /* Write the equivalence_class and \0.  */
3059			    for (i = 0 ; i < datasize ; i++)
3060			      laststart[offset + i] = str[i];
3061
3062			    /* Update the length of equivalence_classes.  */
3063			    laststart[3] += datasize;
3064			    had_char_class = true;
3065			  }
3066			else /* delim == '.' */
3067			  { /* collating symbol  */
3068			    /* Calculate the offset of the equivalence_classes,
3069			       which is next to collating_symbols.  */
3070			    offset = laststart[1] + laststart[2] + 6;
3071			    /* Insert space and write the collationg_symbol
3072			       and \0.  */
3073			    insert_space(datasize, laststart + offset, b-1);
3074			    for (i = 0 ; i < datasize ; i++)
3075			      laststart[offset + i] = str[i];
3076
3077			    /* In re_match_2_internal if range_start < -1, we
3078			       assume -range_start is the offset of the
3079			       collating symbol which is specified as
3080			       the character of the range start.  So we assign
3081			       -(laststart[1] + laststart[2] + 6) to
3082			       range_start.  */
3083			    range_start = -(laststart[1] + laststart[2] + 6);
3084			    /* Update the length of collating_symbol.  */
3085			    laststart[2] += datasize;
3086			    had_char_class = false;
3087			  }
3088		      }
3089                    else
3090                      {
3091                        c1++;
3092                        while (c1--)
3093                          PATUNFETCH;
3094                        BUF_PUSH ('[');
3095                        BUF_PUSH (delim);
3096                        laststart[5] += 2; /* Update the length of characters  */
3097			range_start = delim;
3098                        had_char_class = false;
3099                      }
3100		  }
3101                else
3102                  {
3103                    had_char_class = false;
3104		    BUF_PUSH(c);
3105		    laststart[5]++;  /* Update the length of characters  */
3106		    range_start = c;
3107                  }
3108	      }
3109
3110#else /* BYTE */
3111            /* Ensure that we have enough space to push a charset: the
3112               opcode, the length count, and the bitset; 34 bytes in all.  */
3113	    GET_BUFFER_SPACE (34);
3114
3115            laststart = b;
3116
3117            /* We test `*p == '^' twice, instead of using an if
3118               statement, so we only need one BUF_PUSH.  */
3119            BUF_PUSH (*p == '^' ? charset_not : charset);
3120            if (*p == '^')
3121              p++;
3122
3123            /* Remember the first position in the bracket expression.  */
3124            p1 = p;
3125
3126            /* Push the number of bytes in the bitmap.  */
3127            BUF_PUSH ((1 << BYTEWIDTH) / BYTEWIDTH);
3128
3129            /* Clear the whole map.  */
3130            bzero (b, (1 << BYTEWIDTH) / BYTEWIDTH);
3131
3132            /* charset_not matches newline according to a syntax bit.  */
3133            if ((re_opcode_t) b[-2] == charset_not
3134                && (syntax & RE_HAT_LISTS_NOT_NEWLINE))
3135              SET_LIST_BIT ('\n');
3136
3137            /* Read in characters and ranges, setting map bits.  */
3138            for (;;)
3139              {
3140                if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
3141
3142                PATFETCH (c);
3143
3144                /* \ might escape characters inside [...] and [^...].  */
3145                if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\')
3146                  {
3147                    if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
3148
3149                    PATFETCH (c1);
3150                    SET_LIST_BIT (c1);
3151		    range_start = c1;
3152                    continue;
3153                  }
3154
3155                /* Could be the end of the bracket expression.  If it's
3156                   not (i.e., when the bracket expression is `[]' so
3157                   far), the ']' character bit gets set way below.  */
3158                if (c == ']' && p != p1 + 1)
3159                  break;
3160
3161                /* Look ahead to see if it's a range when the last thing
3162                   was a character class.  */
3163                if (had_char_class && c == '-' && *p != ']')
3164                  FREE_STACK_RETURN (REG_ERANGE);
3165
3166                /* Look ahead to see if it's a range when the last thing
3167                   was a character: if this is a hyphen not at the
3168                   beginning or the end of a list, then it's the range
3169                   operator.  */
3170                if (c == '-'
3171                    && !(p - 2 >= pattern && p[-2] == '[')
3172                    && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^')
3173                    && *p != ']')
3174                  {
3175                    reg_errcode_t ret
3176                      = byte_compile_range (range_start, &p, pend, translate,
3177					    syntax, b);
3178                    if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
3179		    range_start = 0xffffffff;
3180                  }
3181
3182                else if (p[0] == '-' && p[1] != ']')
3183                  { /* This handles ranges made up of characters only.  */
3184                    reg_errcode_t ret;
3185
3186		    /* Move past the `-'.  */
3187                    PATFETCH (c1);
3188
3189                    ret = byte_compile_range (c, &p, pend, translate, syntax, b);
3190                    if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
3191		    range_start = 0xffffffff;
3192                  }
3193
3194                /* See if we're at the beginning of a possible character
3195                   class.  */
3196
3197                else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':')
3198                  { /* Leave room for the null.  */
3199                    char str[CHAR_CLASS_MAX_LENGTH + 1];
3200
3201                    PATFETCH (c);
3202                    c1 = 0;
3203
3204                    /* If pattern is `[[:'.  */
3205                    if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
3206
3207                    for (;;)
3208                      {
3209                        PATFETCH (c);
3210                        if ((c == ':' && *p == ']') || p == pend)
3211                          break;
3212			if (c1 < CHAR_CLASS_MAX_LENGTH)
3213			  str[c1++] = c;
3214			else
3215			  /* This is in any case an invalid class name.  */
3216			  str[0] = '\0';
3217                      }
3218                    str[c1] = '\0';
3219
3220                    /* If isn't a word bracketed by `[:' and `:]':
3221                       undo the ending character, the letters, and leave
3222                       the leading `:' and `[' (but set bits for them).  */
3223                    if (c == ':' && *p == ']')
3224                      {
3225# if defined _LIBC || WIDE_CHAR_SUPPORT
3226                        boolean is_lower = STREQ (str, "lower");
3227                        boolean is_upper = STREQ (str, "upper");
3228			wctype_t wt;
3229                        int ch;
3230
3231			wt = IS_CHAR_CLASS (str);
3232			if (wt == 0)
3233			  FREE_STACK_RETURN (REG_ECTYPE);
3234
3235                        /* Throw away the ] at the end of the character
3236                           class.  */
3237                        PATFETCH (c);
3238
3239                        if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
3240
3241                        for (ch = 0; ch < 1 << BYTEWIDTH; ++ch)
3242			  {
3243#  ifdef _LIBC
3244			    if (__iswctype (__btowc (ch), wt))
3245			      SET_LIST_BIT (ch);
3246#  else
3247			    if (iswctype (btowc (ch), wt))
3248			      SET_LIST_BIT (ch);
3249#  endif
3250
3251			    if (translate && (is_upper || is_lower)
3252				&& (ISUPPER (ch) || ISLOWER (ch)))
3253			      SET_LIST_BIT (ch);
3254			  }
3255
3256                        had_char_class = true;
3257# else
3258                        int ch;
3259                        boolean is_alnum = STREQ (str, "alnum");
3260                        boolean is_alpha = STREQ (str, "alpha");
3261                        boolean is_blank = STREQ (str, "blank");
3262                        boolean is_cntrl = STREQ (str, "cntrl");
3263                        boolean is_digit = STREQ (str, "digit");
3264                        boolean is_graph = STREQ (str, "graph");
3265                        boolean is_lower = STREQ (str, "lower");
3266                        boolean is_print = STREQ (str, "print");
3267                        boolean is_punct = STREQ (str, "punct");
3268                        boolean is_space = STREQ (str, "space");
3269                        boolean is_upper = STREQ (str, "upper");
3270                        boolean is_xdigit = STREQ (str, "xdigit");
3271
3272                        if (!IS_CHAR_CLASS (str))
3273			  FREE_STACK_RETURN (REG_ECTYPE);
3274
3275                        /* Throw away the ] at the end of the character
3276                           class.  */
3277                        PATFETCH (c);
3278
3279                        if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
3280
3281                        for (ch = 0; ch < 1 << BYTEWIDTH; ch++)
3282                          {
3283			    /* This was split into 3 if's to
3284			       avoid an arbitrary limit in some compiler.  */
3285                            if (   (is_alnum  && ISALNUM (ch))
3286                                || (is_alpha  && ISALPHA (ch))
3287                                || (is_blank  && ISBLANK (ch))
3288                                || (is_cntrl  && ISCNTRL (ch)))
3289			      SET_LIST_BIT (ch);
3290			    if (   (is_digit  && ISDIGIT (ch))
3291                                || (is_graph  && ISGRAPH (ch))
3292                                || (is_lower  && ISLOWER (ch))
3293                                || (is_print  && ISPRINT (ch)))
3294			      SET_LIST_BIT (ch);
3295			    if (   (is_punct  && ISPUNCT (ch))
3296                                || (is_space  && ISSPACE (ch))
3297                                || (is_upper  && ISUPPER (ch))
3298                                || (is_xdigit && ISXDIGIT (ch)))
3299			      SET_LIST_BIT (ch);
3300			    if (   translate && (is_upper || is_lower)
3301				&& (ISUPPER (ch) || ISLOWER (ch)))
3302			      SET_LIST_BIT (ch);
3303                          }
3304                        had_char_class = true;
3305# endif	/* libc || wctype.h */
3306                      }
3307                    else
3308                      {
3309                        c1++;
3310                        while (c1--)
3311                          PATUNFETCH;
3312                        SET_LIST_BIT ('[');
3313                        SET_LIST_BIT (':');
3314			range_start = ':';
3315                        had_char_class = false;
3316                      }
3317                  }
3318                else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == '=')
3319		  {
3320		    unsigned char str[MB_LEN_MAX + 1];
3321# ifdef _LIBC
3322		    uint32_t nrules =
3323		      _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
3324# endif
3325
3326		    PATFETCH (c);
3327		    c1 = 0;
3328
3329		    /* If pattern is `[[='.  */
3330		    if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
3331
3332		    for (;;)
3333		      {
3334			PATFETCH (c);
3335			if ((c == '=' && *p == ']') || p == pend)
3336			  break;
3337			if (c1 < MB_LEN_MAX)
3338			  str[c1++] = c;
3339			else
3340			  /* This is in any case an invalid class name.  */
3341			  str[0] = '\0';
3342                      }
3343		    str[c1] = '\0';
3344
3345		    if (c == '=' && *p == ']' && str[0] != '\0')
3346		      {
3347			/* If we have no collation data we use the default
3348			   collation in which each character is in a class
3349			   by itself.  It also means that ASCII is the
3350			   character set and therefore we cannot have character
3351			   with more than one byte in the multibyte
3352			   representation.  */
3353# ifdef _LIBC
3354			if (nrules == 0)
3355# endif
3356			  {
3357			    if (c1 != 1)
3358			      FREE_STACK_RETURN (REG_ECOLLATE);
3359
3360			    /* Throw away the ] at the end of the equivalence
3361			       class.  */
3362			    PATFETCH (c);
3363
3364			    /* Set the bit for the character.  */
3365			    SET_LIST_BIT (str[0]);
3366			  }
3367# ifdef _LIBC
3368			else
3369			  {
3370			    /* Try to match the byte sequence in `str' against
3371			       those known to the collate implementation.
3372			       First find out whether the bytes in `str' are
3373			       actually from exactly one character.  */
3374			    const int32_t *table;
3375			    const unsigned char *weights;
3376			    const unsigned char *extra;
3377			    const int32_t *indirect;
3378			    int32_t idx;
3379			    const unsigned char *cp = str;
3380			    int ch;
3381
3382			    /* This #include defines a local function!  */
3383#  include <locale/weight.h>
3384
3385			    table = (const int32_t *)
3386			      _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
3387			    weights = (const unsigned char *)
3388			      _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
3389			    extra = (const unsigned char *)
3390			      _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
3391			    indirect = (const int32_t *)
3392			      _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
3393
3394			    idx = findidx (&cp);
3395			    if (idx == 0 || cp < str + c1)
3396			      /* This is no valid character.  */
3397			      FREE_STACK_RETURN (REG_ECOLLATE);
3398
3399			    /* Throw away the ] at the end of the equivalence
3400			       class.  */
3401			    PATFETCH (c);
3402
3403			    /* Now we have to go through the whole table
3404			       and find all characters which have the same
3405			       first level weight.
3406
3407			       XXX Note that this is not entirely correct.
3408			       we would have to match multibyte sequences
3409			       but this is not possible with the current
3410			       implementation.  */
3411			    for (ch = 1; ch < 256; ++ch)
3412			      /* XXX This test would have to be changed if we
3413				 would allow matching multibyte sequences.  */
3414			      if (table[ch] > 0)
3415				{
3416				  int32_t idx2 = table[ch];
3417				  size_t len = weights[idx2];
3418
3419				  /* Test whether the lenghts match.  */
3420				  if (weights[idx] == len)
3421				    {
3422				      /* They do.  New compare the bytes of
3423					 the weight.  */
3424				      size_t cnt = 0;
3425
3426				      while (cnt < len
3427					     && (weights[idx + 1 + cnt]
3428						 == weights[idx2 + 1 + cnt]))
3429					++cnt;
3430
3431				      if (cnt == len)
3432					/* They match.  Mark the character as
3433					   acceptable.  */
3434					SET_LIST_BIT (ch);
3435				    }
3436				}
3437			  }
3438# endif
3439			had_char_class = true;
3440		      }
3441                    else
3442                      {
3443                        c1++;
3444                        while (c1--)
3445                          PATUNFETCH;
3446                        SET_LIST_BIT ('[');
3447                        SET_LIST_BIT ('=');
3448			range_start = '=';
3449                        had_char_class = false;
3450                      }
3451		  }
3452                else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == '.')
3453		  {
3454		    unsigned char str[128];	/* Should be large enough.  */
3455# ifdef _LIBC
3456		    uint32_t nrules =
3457		      _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
3458# endif
3459
3460		    PATFETCH (c);
3461		    c1 = 0;
3462
3463		    /* If pattern is `[[.'.  */
3464		    if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
3465
3466		    for (;;)
3467		      {
3468			PATFETCH (c);
3469			if ((c == '.' && *p == ']') || p == pend)
3470			  break;
3471			if (c1 < sizeof (str))
3472			  str[c1++] = c;
3473			else
3474			  /* This is in any case an invalid class name.  */
3475			  str[0] = '\0';
3476                      }
3477		    str[c1] = '\0';
3478
3479		    if (c == '.' && *p == ']' && str[0] != '\0')
3480		      {
3481			/* If we have no collation data we use the default
3482			   collation in which each character is the name
3483			   for its own class which contains only the one
3484			   character.  It also means that ASCII is the
3485			   character set and therefore we cannot have character
3486			   with more than one byte in the multibyte
3487			   representation.  */
3488# ifdef _LIBC
3489			if (nrules == 0)
3490# endif
3491			  {
3492			    if (c1 != 1)
3493			      FREE_STACK_RETURN (REG_ECOLLATE);
3494
3495			    /* Throw away the ] at the end of the equivalence
3496			       class.  */
3497			    PATFETCH (c);
3498
3499			    /* Set the bit for the character.  */
3500			    SET_LIST_BIT (str[0]);
3501			    range_start = ((const unsigned char *) str)[0];
3502			  }
3503# ifdef _LIBC
3504			else
3505			  {
3506			    /* Try to match the byte sequence in `str' against
3507			       those known to the collate implementation.
3508			       First find out whether the bytes in `str' are
3509			       actually from exactly one character.  */
3510			    int32_t table_size;
3511			    const int32_t *symb_table;
3512			    const unsigned char *extra;
3513			    int32_t idx;
3514			    int32_t elem;
3515			    int32_t second;
3516			    int32_t hash;
3517
3518			    table_size =
3519			      _NL_CURRENT_WORD (LC_COLLATE,
3520						_NL_COLLATE_SYMB_HASH_SIZEMB);
3521			    symb_table = (const int32_t *)
3522			      _NL_CURRENT (LC_COLLATE,
3523					   _NL_COLLATE_SYMB_TABLEMB);
3524			    extra = (const unsigned char *)
3525			      _NL_CURRENT (LC_COLLATE,
3526					   _NL_COLLATE_SYMB_EXTRAMB);
3527
3528			    /* Locate the character in the hashing table.  */
3529			    hash = elem_hash (str, c1);
3530
3531			    idx = 0;
3532			    elem = hash % table_size;
3533			    second = hash % (table_size - 2);
3534			    while (symb_table[2 * elem] != 0)
3535			      {
3536				/* First compare the hashing value.  */
3537				if (symb_table[2 * elem] == hash
3538				    && c1 == extra[symb_table[2 * elem + 1]]
3539				    && memcmp (str,
3540					       &extra[symb_table[2 * elem + 1]
3541						     + 1],
3542					       c1) == 0)
3543				  {
3544				    /* Yep, this is the entry.  */
3545				    idx = symb_table[2 * elem + 1];
3546				    idx += 1 + extra[idx];
3547				    break;
3548				  }
3549
3550				/* Next entry.  */
3551				elem += second;
3552			      }
3553
3554			    if (symb_table[2 * elem] == 0)
3555			      /* This is no valid character.  */
3556			      FREE_STACK_RETURN (REG_ECOLLATE);
3557
3558			    /* Throw away the ] at the end of the equivalence
3559			       class.  */
3560			    PATFETCH (c);
3561
3562			    /* Now add the multibyte character(s) we found
3563			       to the accept list.
3564
3565			       XXX Note that this is not entirely correct.
3566			       we would have to match multibyte sequences
3567			       but this is not possible with the current
3568			       implementation.  Also, we have to match
3569			       collating symbols, which expand to more than
3570			       one file, as a whole and not allow the
3571			       individual bytes.  */
3572			    c1 = extra[idx++];
3573			    if (c1 == 1)
3574			      range_start = extra[idx];
3575			    while (c1-- > 0)
3576			      {
3577				SET_LIST_BIT (extra[idx]);
3578				++idx;
3579			      }
3580			  }
3581# endif
3582			had_char_class = false;
3583		      }
3584                    else
3585                      {
3586                        c1++;
3587                        while (c1--)
3588                          PATUNFETCH;
3589                        SET_LIST_BIT ('[');
3590                        SET_LIST_BIT ('.');
3591			range_start = '.';
3592                        had_char_class = false;
3593                      }
3594		  }
3595                else
3596                  {
3597                    had_char_class = false;
3598                    SET_LIST_BIT (c);
3599		    range_start = c;
3600                  }
3601              }
3602
3603            /* Discard any (non)matching list bytes that are all 0 at the
3604               end of the map.  Decrease the map-length byte too.  */
3605            while ((int) b[-1] > 0 && b[b[-1] - 1] == 0)
3606              b[-1]--;
3607            b += b[-1];
3608#endif /* WCHAR */
3609          }
3610          break;
3611
3612
3613	case '(':
3614          if (syntax & RE_NO_BK_PARENS)
3615            goto handle_open;
3616          else
3617            goto normal_char;
3618
3619
3620        case ')':
3621          if (syntax & RE_NO_BK_PARENS)
3622            goto handle_close;
3623          else
3624            goto normal_char;
3625
3626
3627        case '\n':
3628          if (syntax & RE_NEWLINE_ALT)
3629            goto handle_alt;
3630          else
3631            goto normal_char;
3632
3633
3634	case '|':
3635          if (syntax & RE_NO_BK_VBAR)
3636            goto handle_alt;
3637          else
3638            goto normal_char;
3639
3640
3641        case '{':
3642           if (syntax & RE_INTERVALS && syntax & RE_NO_BK_BRACES)
3643             goto handle_interval;
3644           else
3645             goto normal_char;
3646
3647
3648        case '\\':
3649          if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
3650
3651          /* Do not translate the character after the \, so that we can
3652             distinguish, e.g., \B from \b, even if we normally would
3653             translate, e.g., B to b.  */
3654          PATFETCH_RAW (c);
3655
3656          switch (c)
3657            {
3658            case '(':
3659              if (syntax & RE_NO_BK_PARENS)
3660                goto normal_backslash;
3661
3662            handle_open:
3663              bufp->re_nsub++;
3664              regnum++;
3665
3666              if (COMPILE_STACK_FULL)
3667                {
3668                  RETALLOC (compile_stack.stack, compile_stack.size << 1,
3669                            compile_stack_elt_t);
3670                  if (compile_stack.stack == NULL) return REG_ESPACE;
3671
3672                  compile_stack.size <<= 1;
3673                }
3674
3675              /* These are the values to restore when we hit end of this
3676                 group.  They are all relative offsets, so that if the
3677                 whole pattern moves because of realloc, they will still
3678                 be valid.  */
3679              COMPILE_STACK_TOP.begalt_offset = begalt - COMPILED_BUFFER_VAR;
3680              COMPILE_STACK_TOP.fixup_alt_jump
3681                = fixup_alt_jump ? fixup_alt_jump - COMPILED_BUFFER_VAR + 1 : 0;
3682              COMPILE_STACK_TOP.laststart_offset = b - COMPILED_BUFFER_VAR;
3683              COMPILE_STACK_TOP.regnum = regnum;
3684
3685              /* We will eventually replace the 0 with the number of
3686                 groups inner to this one.  But do not push a
3687                 start_memory for groups beyond the last one we can
3688                 represent in the compiled pattern.  */
3689              if (regnum <= MAX_REGNUM)
3690                {
3691                  COMPILE_STACK_TOP.inner_group_offset = b
3692		    - COMPILED_BUFFER_VAR + 2;
3693                  BUF_PUSH_3 (start_memory, regnum, 0);
3694                }
3695
3696              compile_stack.avail++;
3697
3698              fixup_alt_jump = 0;
3699              laststart = 0;
3700              begalt = b;
3701	      /* If we've reached MAX_REGNUM groups, then this open
3702		 won't actually generate any code, so we'll have to
3703		 clear pending_exact explicitly.  */
3704	      pending_exact = 0;
3705              break;
3706
3707
3708            case ')':
3709              if (syntax & RE_NO_BK_PARENS) goto normal_backslash;
3710
3711              if (COMPILE_STACK_EMPTY)
3712		{
3713		  if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
3714		    goto normal_backslash;
3715		  else
3716		    FREE_STACK_RETURN (REG_ERPAREN);
3717		}
3718
3719            handle_close:
3720              if (fixup_alt_jump)
3721                { /* Push a dummy failure point at the end of the
3722                     alternative for a possible future
3723                     `pop_failure_jump' to pop.  See comments at
3724                     `push_dummy_failure' in `re_match_2'.  */
3725                  BUF_PUSH (push_dummy_failure);
3726
3727                  /* We allocated space for this jump when we assigned
3728                     to `fixup_alt_jump', in the `handle_alt' case below.  */
3729                  STORE_JUMP (jump_past_alt, fixup_alt_jump, b - 1);
3730                }
3731
3732              /* See similar code for backslashed left paren above.  */
3733              if (COMPILE_STACK_EMPTY)
3734		{
3735		  if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
3736		    goto normal_char;
3737		  else
3738		    FREE_STACK_RETURN (REG_ERPAREN);
3739		}
3740
3741              /* Since we just checked for an empty stack above, this
3742                 ``can't happen''.  */
3743              assert (compile_stack.avail != 0);
3744              {
3745                /* We don't just want to restore into `regnum', because
3746                   later groups should continue to be numbered higher,
3747                   as in `(ab)c(de)' -- the second group is #2.  */
3748                regnum_t this_group_regnum;
3749
3750                compile_stack.avail--;
3751                begalt = COMPILED_BUFFER_VAR + COMPILE_STACK_TOP.begalt_offset;
3752                fixup_alt_jump
3753                  = COMPILE_STACK_TOP.fixup_alt_jump
3754                    ? COMPILED_BUFFER_VAR + COMPILE_STACK_TOP.fixup_alt_jump - 1
3755                    : 0;
3756                laststart = COMPILED_BUFFER_VAR + COMPILE_STACK_TOP.laststart_offset;
3757                this_group_regnum = COMPILE_STACK_TOP.regnum;
3758		/* If we've reached MAX_REGNUM groups, then this open
3759		   won't actually generate any code, so we'll have to
3760		   clear pending_exact explicitly.  */
3761		pending_exact = 0;
3762
3763                /* We're at the end of the group, so now we know how many
3764                   groups were inside this one.  */
3765                if (this_group_regnum <= MAX_REGNUM)
3766                  {
3767		    UCHAR_T *inner_group_loc
3768                      = COMPILED_BUFFER_VAR + COMPILE_STACK_TOP.inner_group_offset;
3769
3770                    *inner_group_loc = regnum - this_group_regnum;
3771                    BUF_PUSH_3 (stop_memory, this_group_regnum,
3772                                regnum - this_group_regnum);
3773                  }
3774              }
3775              break;
3776
3777
3778            case '|':					/* `\|'.  */
3779              if (syntax & RE_LIMITED_OPS || syntax & RE_NO_BK_VBAR)
3780                goto normal_backslash;
3781            handle_alt:
3782              if (syntax & RE_LIMITED_OPS)
3783                goto normal_char;
3784
3785              /* Insert before the previous alternative a jump which
3786                 jumps to this alternative if the former fails.  */
3787              GET_BUFFER_SPACE (1 + OFFSET_ADDRESS_SIZE);
3788              INSERT_JUMP (on_failure_jump, begalt,
3789			   b + 2 + 2 * OFFSET_ADDRESS_SIZE);
3790              pending_exact = 0;
3791              b += 1 + OFFSET_ADDRESS_SIZE;
3792
3793              /* The alternative before this one has a jump after it
3794                 which gets executed if it gets matched.  Adjust that
3795                 jump so it will jump to this alternative's analogous
3796                 jump (put in below, which in turn will jump to the next
3797                 (if any) alternative's such jump, etc.).  The last such
3798                 jump jumps to the correct final destination.  A picture:
3799                          _____ _____
3800                          |   | |   |
3801                          |   v |   v
3802                         a | b   | c
3803
3804                 If we are at `b', then fixup_alt_jump right now points to a
3805                 three-byte space after `a'.  We'll put in the jump, set
3806                 fixup_alt_jump to right after `b', and leave behind three
3807                 bytes which we'll fill in when we get to after `c'.  */
3808
3809              if (fixup_alt_jump)
3810                STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
3811
3812              /* Mark and leave space for a jump after this alternative,
3813                 to be filled in later either by next alternative or
3814                 when know we're at the end of a series of alternatives.  */
3815              fixup_alt_jump = b;
3816              GET_BUFFER_SPACE (1 + OFFSET_ADDRESS_SIZE);
3817              b += 1 + OFFSET_ADDRESS_SIZE;
3818
3819              laststart = 0;
3820              begalt = b;
3821              break;
3822
3823
3824            case '{':
3825              /* If \{ is a literal.  */
3826              if (!(syntax & RE_INTERVALS)
3827                     /* If we're at `\{' and it's not the open-interval
3828                        operator.  */
3829		  || (syntax & RE_NO_BK_BRACES))
3830                goto normal_backslash;
3831
3832            handle_interval:
3833              {
3834                /* If got here, then the syntax allows intervals.  */
3835
3836                /* At least (most) this many matches must be made.  */
3837                int lower_bound = -1, upper_bound = -1;
3838
3839		/* Place in the uncompiled pattern (i.e., just after
3840		   the '{') to go back to if the interval is invalid.  */
3841		const CHAR_T *beg_interval = p;
3842
3843                if (p == pend)
3844		  goto invalid_interval;
3845
3846                GET_UNSIGNED_NUMBER (lower_bound);
3847
3848                if (c == ',')
3849                  {
3850                    GET_UNSIGNED_NUMBER (upper_bound);
3851		    if (upper_bound < 0)
3852		      upper_bound = RE_DUP_MAX;
3853                  }
3854                else
3855                  /* Interval such as `{1}' => match exactly once. */
3856                  upper_bound = lower_bound;
3857
3858                if (! (0 <= lower_bound && lower_bound <= upper_bound))
3859		  goto invalid_interval;
3860
3861                if (!(syntax & RE_NO_BK_BRACES))
3862                  {
3863		    if (c != '\\' || p == pend)
3864		      goto invalid_interval;
3865                    PATFETCH (c);
3866                  }
3867
3868                if (c != '}')
3869		  goto invalid_interval;
3870
3871                /* If it's invalid to have no preceding re.  */
3872                if (!laststart)
3873                  {
3874		    if (syntax & RE_CONTEXT_INVALID_OPS
3875			&& !(syntax & RE_INVALID_INTERVAL_ORD))
3876                      FREE_STACK_RETURN (REG_BADRPT);
3877                    else if (syntax & RE_CONTEXT_INDEP_OPS)
3878                      laststart = b;
3879                    else
3880                      goto unfetch_interval;
3881                  }
3882
3883                /* We just parsed a valid interval.  */
3884
3885                if (RE_DUP_MAX < upper_bound)
3886		  FREE_STACK_RETURN (REG_BADBR);
3887
3888                /* If the upper bound is zero, don't want to succeed at
3889                   all; jump from `laststart' to `b + 3', which will be
3890		   the end of the buffer after we insert the jump.  */
3891		/* ifdef WCHAR, 'b + 1 + OFFSET_ADDRESS_SIZE'
3892		   instead of 'b + 3'.  */
3893                 if (upper_bound == 0)
3894                   {
3895                     GET_BUFFER_SPACE (1 + OFFSET_ADDRESS_SIZE);
3896                     INSERT_JUMP (jump, laststart, b + 1
3897				  + OFFSET_ADDRESS_SIZE);
3898                     b += 1 + OFFSET_ADDRESS_SIZE;
3899                   }
3900
3901                 /* Otherwise, we have a nontrivial interval.  When
3902                    we're all done, the pattern will look like:
3903                      set_number_at <jump count> <upper bound>
3904                      set_number_at <succeed_n count> <lower bound>
3905                      succeed_n <after jump addr> <succeed_n count>
3906                      <body of loop>
3907                      jump_n <succeed_n addr> <jump count>
3908                    (The upper bound and `jump_n' are omitted if
3909                    `upper_bound' is 1, though.)  */
3910                 else
3911                   { /* If the upper bound is > 1, we need to insert
3912                        more at the end of the loop.  */
3913                     unsigned nbytes = 2 + 4 * OFFSET_ADDRESS_SIZE +
3914		       (upper_bound > 1) * (2 + 4 * OFFSET_ADDRESS_SIZE);
3915
3916                     GET_BUFFER_SPACE (nbytes);
3917
3918                     /* Initialize lower bound of the `succeed_n', even
3919                        though it will be set during matching by its
3920                        attendant `set_number_at' (inserted next),
3921                        because `re_compile_fastmap' needs to know.
3922                        Jump to the `jump_n' we might insert below.  */
3923                     INSERT_JUMP2 (succeed_n, laststart,
3924                                   b + 1 + 2 * OFFSET_ADDRESS_SIZE
3925				   + (upper_bound > 1) * (1 + 2 * OFFSET_ADDRESS_SIZE)
3926				   , lower_bound);
3927                     b += 1 + 2 * OFFSET_ADDRESS_SIZE;
3928
3929                     /* Code to initialize the lower bound.  Insert
3930                        before the `succeed_n'.  The `5' is the last two
3931                        bytes of this `set_number_at', plus 3 bytes of
3932                        the following `succeed_n'.  */
3933		     /* ifdef WCHAR, The '1+2*OFFSET_ADDRESS_SIZE'
3934			is the 'set_number_at', plus '1+OFFSET_ADDRESS_SIZE'
3935			of the following `succeed_n'.  */
3936                     PREFIX(insert_op2) (set_number_at, laststart, 1
3937				 + 2 * OFFSET_ADDRESS_SIZE, lower_bound, b);
3938                     b += 1 + 2 * OFFSET_ADDRESS_SIZE;
3939
3940                     if (upper_bound > 1)
3941                       { /* More than one repetition is allowed, so
3942                            append a backward jump to the `succeed_n'
3943                            that starts this interval.
3944
3945                            When we've reached this during matching,
3946                            we'll have matched the interval once, so
3947                            jump back only `upper_bound - 1' times.  */
3948                         STORE_JUMP2 (jump_n, b, laststart
3949				      + 2 * OFFSET_ADDRESS_SIZE + 1,
3950                                      upper_bound - 1);
3951                         b += 1 + 2 * OFFSET_ADDRESS_SIZE;
3952
3953                         /* The location we want to set is the second
3954                            parameter of the `jump_n'; that is `b-2' as
3955                            an absolute address.  `laststart' will be
3956                            the `set_number_at' we're about to insert;
3957                            `laststart+3' the number to set, the source
3958                            for the relative address.  But we are
3959                            inserting into the middle of the pattern --
3960                            so everything is getting moved up by 5.
3961                            Conclusion: (b - 2) - (laststart + 3) + 5,
3962                            i.e., b - laststart.
3963
3964                            We insert this at the beginning of the loop
3965                            so that if we fail during matching, we'll
3966                            reinitialize the bounds.  */
3967                         PREFIX(insert_op2) (set_number_at, laststart,
3968					     b - laststart,
3969					     upper_bound - 1, b);
3970                         b += 1 + 2 * OFFSET_ADDRESS_SIZE;
3971                       }
3972                   }
3973                pending_exact = 0;
3974		break;
3975
3976	      invalid_interval:
3977		if (!(syntax & RE_INVALID_INTERVAL_ORD))
3978		  FREE_STACK_RETURN (p == pend ? REG_EBRACE : REG_BADBR);
3979	      unfetch_interval:
3980		/* Match the characters as literals.  */
3981		p = beg_interval;
3982		c = '{';
3983		if (syntax & RE_NO_BK_BRACES)
3984		  goto normal_char;
3985		else
3986		  goto normal_backslash;
3987	      }
3988
3989#ifdef emacs
3990            /* There is no way to specify the before_dot and after_dot
3991               operators.  rms says this is ok.  --karl  */
3992            case '=':
3993              BUF_PUSH (at_dot);
3994              break;
3995
3996            case 's':
3997              laststart = b;
3998              PATFETCH (c);
3999              BUF_PUSH_2 (syntaxspec, syntax_spec_code[c]);
4000              break;
4001
4002            case 'S':
4003              laststart = b;
4004              PATFETCH (c);
4005              BUF_PUSH_2 (notsyntaxspec, syntax_spec_code[c]);
4006              break;
4007#endif /* emacs */
4008
4009
4010            case 'w':
4011	      if (syntax & RE_NO_GNU_OPS)
4012		goto normal_char;
4013              laststart = b;
4014              BUF_PUSH (wordchar);
4015              break;
4016
4017
4018            case 'W':
4019	      if (syntax & RE_NO_GNU_OPS)
4020		goto normal_char;
4021              laststart = b;
4022              BUF_PUSH (notwordchar);
4023              break;
4024
4025
4026            case '<':
4027	      if (syntax & RE_NO_GNU_OPS)
4028		goto normal_char;
4029              BUF_PUSH (wordbeg);
4030              break;
4031
4032            case '>':
4033	      if (syntax & RE_NO_GNU_OPS)
4034		goto normal_char;
4035              BUF_PUSH (wordend);
4036              break;
4037
4038            case 'b':
4039	      if (syntax & RE_NO_GNU_OPS)
4040		goto normal_char;
4041              BUF_PUSH (wordbound);
4042              break;
4043
4044            case 'B':
4045	      if (syntax & RE_NO_GNU_OPS)
4046		goto normal_char;
4047              BUF_PUSH (notwordbound);
4048              break;
4049
4050            case '`':
4051	      if (syntax & RE_NO_GNU_OPS)
4052		goto normal_char;
4053              BUF_PUSH (begbuf);
4054              break;
4055
4056            case '\'':
4057	      if (syntax & RE_NO_GNU_OPS)
4058		goto normal_char;
4059              BUF_PUSH (endbuf);
4060              break;
4061
4062            case '1': case '2': case '3': case '4': case '5':
4063            case '6': case '7': case '8': case '9':
4064              if (syntax & RE_NO_BK_REFS)
4065                goto normal_char;
4066
4067              c1 = c - '0';
4068
4069              if (c1 > regnum)
4070                FREE_STACK_RETURN (REG_ESUBREG);
4071
4072              /* Can't back reference to a subexpression if inside of it.  */
4073              if (group_in_compile_stack (compile_stack, (regnum_t) c1))
4074                goto normal_char;
4075
4076              laststart = b;
4077              BUF_PUSH_2 (duplicate, c1);
4078              break;
4079
4080
4081            case '+':
4082            case '?':
4083              if (syntax & RE_BK_PLUS_QM)
4084                goto handle_plus;
4085              else
4086                goto normal_backslash;
4087
4088            default:
4089            normal_backslash:
4090              /* You might think it would be useful for \ to mean
4091                 not to translate; but if we don't translate it
4092                 it will never match anything.  */
4093              c = TRANSLATE (c);
4094              goto normal_char;
4095            }
4096          break;
4097
4098
4099	default:
4100        /* Expects the character in `c'.  */
4101	normal_char:
4102	      /* If no exactn currently being built.  */
4103          if (!pending_exact
4104#ifdef WCHAR
4105	      /* If last exactn handle binary(or character) and
4106		 new exactn handle character(or binary).  */
4107	      || is_exactn_bin != is_binary[p - 1 - pattern]
4108#endif /* WCHAR */
4109
4110              /* If last exactn not at current position.  */
4111              || pending_exact + *pending_exact + 1 != b
4112
4113              /* We have only one byte following the exactn for the count.  */
4114	      || *pending_exact == (1 << BYTEWIDTH) - 1
4115
4116              /* If followed by a repetition operator.  */
4117              || *p == '*' || *p == '^'
4118	      || ((syntax & RE_BK_PLUS_QM)
4119		  ? *p == '\\' && (p[1] == '+' || p[1] == '?')
4120		  : (*p == '+' || *p == '?'))
4121	      || ((syntax & RE_INTERVALS)
4122                  && ((syntax & RE_NO_BK_BRACES)
4123		      ? *p == '{'
4124                      : (p[0] == '\\' && p[1] == '{'))))
4125	    {
4126	      /* Start building a new exactn.  */
4127
4128              laststart = b;
4129
4130#ifdef WCHAR
4131	      /* Is this exactn binary data or character? */
4132	      is_exactn_bin = is_binary[p - 1 - pattern];
4133	      if (is_exactn_bin)
4134		  BUF_PUSH_2 (exactn_bin, 0);
4135	      else
4136		  BUF_PUSH_2 (exactn, 0);
4137#else
4138	      BUF_PUSH_2 (exactn, 0);
4139#endif /* WCHAR */
4140	      pending_exact = b - 1;
4141            }
4142
4143	  BUF_PUSH (c);
4144          (*pending_exact)++;
4145	  break;
4146        } /* switch (c) */
4147    } /* while p != pend */
4148
4149
4150  /* Through the pattern now.  */
4151
4152  if (fixup_alt_jump)
4153    STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
4154
4155  if (!COMPILE_STACK_EMPTY)
4156    FREE_STACK_RETURN (REG_EPAREN);
4157
4158  /* If we don't want backtracking, force success
4159     the first time we reach the end of the compiled pattern.  */
4160  if (syntax & RE_NO_POSIX_BACKTRACKING)
4161    BUF_PUSH (succeed);
4162
4163#ifdef WCHAR
4164  free (pattern);
4165  free (mbs_offset);
4166  free (is_binary);
4167#endif
4168  free (compile_stack.stack);
4169
4170  /* We have succeeded; set the length of the buffer.  */
4171#ifdef WCHAR
4172  bufp->used = (uintptr_t) b - (uintptr_t) COMPILED_BUFFER_VAR;
4173#else
4174  bufp->used = b - bufp->buffer;
4175#endif
4176
4177#ifdef DEBUG
4178  if (debug)
4179    {
4180      DEBUG_PRINT1 ("\nCompiled pattern: \n");
4181      PREFIX(print_compiled_pattern) (bufp);
4182    }
4183#endif /* DEBUG */
4184
4185#ifndef MATCH_MAY_ALLOCATE
4186  /* Initialize the failure stack to the largest possible stack.  This
4187     isn't necessary unless we're trying to avoid calling alloca in
4188     the search and match routines.  */
4189  {
4190    int num_regs = bufp->re_nsub + 1;
4191
4192    /* Since DOUBLE_FAIL_STACK refuses to double only if the current size
4193       is strictly greater than re_max_failures, the largest possible stack
4194       is 2 * re_max_failures failure points.  */
4195    if (fail_stack.size < (2 * re_max_failures * MAX_FAILURE_ITEMS))
4196      {
4197	fail_stack.size = (2 * re_max_failures * MAX_FAILURE_ITEMS);
4198
4199# ifdef emacs
4200	if (! fail_stack.stack)
4201	  fail_stack.stack
4202	    = (PREFIX(fail_stack_elt_t) *) xmalloc (fail_stack.size
4203				    * sizeof (PREFIX(fail_stack_elt_t)));
4204	else
4205	  fail_stack.stack
4206	    = (PREFIX(fail_stack_elt_t) *) xrealloc (fail_stack.stack,
4207				     (fail_stack.size
4208				      * sizeof (PREFIX(fail_stack_elt_t))));
4209# else /* not emacs */
4210	if (! fail_stack.stack)
4211	  fail_stack.stack
4212	    = (PREFIX(fail_stack_elt_t) *) malloc (fail_stack.size
4213				   * sizeof (PREFIX(fail_stack_elt_t)));
4214	else
4215	  fail_stack.stack
4216	    = (PREFIX(fail_stack_elt_t) *) realloc (fail_stack.stack,
4217					    (fail_stack.size
4218				     * sizeof (PREFIX(fail_stack_elt_t))));
4219# endif /* not emacs */
4220      }
4221
4222   PREFIX(regex_grow_registers) (num_regs);
4223  }
4224#endif /* not MATCH_MAY_ALLOCATE */
4225
4226  return REG_NOERROR;
4227} /* regex_compile */
4228
4229/* Subroutines for `regex_compile'.  */
4230
4231/* Store OP at LOC followed by two-byte integer parameter ARG.  */
4232/* ifdef WCHAR, integer parameter is 1 wchar_t.  */
4233
4234static void
4235PREFIX(store_op1) (re_opcode_t op, UCHAR_T *loc, int arg)
4236{
4237  *loc = (UCHAR_T) op;
4238  STORE_NUMBER (loc + 1, arg);
4239}
4240
4241
4242/* Like `store_op1', but for two two-byte parameters ARG1 and ARG2.  */
4243/* ifdef WCHAR, integer parameter is 1 wchar_t.  */
4244
4245static void
4246PREFIX(store_op2) (re_opcode_t op, UCHAR_T *loc, int arg1, int arg2)
4247{
4248  *loc = (UCHAR_T) op;
4249  STORE_NUMBER (loc + 1, arg1);
4250  STORE_NUMBER (loc + 1 + OFFSET_ADDRESS_SIZE, arg2);
4251}
4252
4253
4254/* Copy the bytes from LOC to END to open up three bytes of space at LOC
4255   for OP followed by two-byte integer parameter ARG.  */
4256/* ifdef WCHAR, integer parameter is 1 wchar_t.  */
4257
4258static void
4259PREFIX(insert_op1) (re_opcode_t op, UCHAR_T *loc, int arg, UCHAR_T *end)
4260{
4261  register UCHAR_T *pfrom = end;
4262  register UCHAR_T *pto = end + 1 + OFFSET_ADDRESS_SIZE;
4263
4264  while (pfrom != loc)
4265    *--pto = *--pfrom;
4266
4267  PREFIX(store_op1) (op, loc, arg);
4268}
4269
4270
4271/* Like `insert_op1', but for two two-byte parameters ARG1 and ARG2.  */
4272/* ifdef WCHAR, integer parameter is 1 wchar_t.  */
4273
4274static void
4275PREFIX(insert_op2) (re_opcode_t op, UCHAR_T *loc, int arg1,
4276                    int arg2, UCHAR_T *end)
4277{
4278  register UCHAR_T *pfrom = end;
4279  register UCHAR_T *pto = end + 1 + 2 * OFFSET_ADDRESS_SIZE;
4280
4281  while (pfrom != loc)
4282    *--pto = *--pfrom;
4283
4284  PREFIX(store_op2) (op, loc, arg1, arg2);
4285}
4286
4287
4288/* P points to just after a ^ in PATTERN.  Return true if that ^ comes
4289   after an alternative or a begin-subexpression.  We assume there is at
4290   least one character before the ^.  */
4291
4292static boolean
4293PREFIX(at_begline_loc_p) (const CHAR_T *pattern, const CHAR_T *p,
4294                          reg_syntax_t syntax)
4295{
4296  const CHAR_T *prev = p - 2;
4297  boolean prev_prev_backslash = prev > pattern && prev[-1] == '\\';
4298
4299  return
4300       /* After a subexpression?  */
4301       (*prev == '(' && (syntax & RE_NO_BK_PARENS || prev_prev_backslash))
4302       /* After an alternative?  */
4303    || (*prev == '|' && (syntax & RE_NO_BK_VBAR || prev_prev_backslash));
4304}
4305
4306
4307/* The dual of at_begline_loc_p.  This one is for $.  We assume there is
4308   at least one character after the $, i.e., `P < PEND'.  */
4309
4310static boolean
4311PREFIX(at_endline_loc_p) (const CHAR_T *p, const CHAR_T *pend,
4312                          reg_syntax_t syntax)
4313{
4314  const CHAR_T *next = p;
4315  boolean next_backslash = *next == '\\';
4316  const CHAR_T *next_next = p + 1 < pend ? p + 1 : 0;
4317
4318  return
4319       /* Before a subexpression?  */
4320       (syntax & RE_NO_BK_PARENS ? *next == ')'
4321        : next_backslash && next_next && *next_next == ')')
4322       /* Before an alternative?  */
4323    || (syntax & RE_NO_BK_VBAR ? *next == '|'
4324        : next_backslash && next_next && *next_next == '|');
4325}
4326
4327#else /* not INSIDE_RECURSION */
4328
4329/* Returns true if REGNUM is in one of COMPILE_STACK's elements and
4330   false if it's not.  */
4331
4332static boolean
4333group_in_compile_stack (compile_stack_type compile_stack, regnum_t regnum)
4334{
4335  int this_element;
4336
4337  for (this_element = compile_stack.avail - 1;
4338       this_element >= 0;
4339       this_element--)
4340    if (compile_stack.stack[this_element].regnum == regnum)
4341      return true;
4342
4343  return false;
4344}
4345#endif /* not INSIDE_RECURSION */
4346
4347#ifdef INSIDE_RECURSION
4348
4349#ifdef WCHAR
4350/* This insert space, which size is "num", into the pattern at "loc".
4351   "end" must point the end of the allocated buffer.  */
4352static void
4353insert_space (int num, CHAR_T *loc, CHAR_T *end)
4354{
4355  register CHAR_T *pto = end;
4356  register CHAR_T *pfrom = end - num;
4357
4358  while (pfrom >= loc)
4359    *pto-- = *pfrom--;
4360}
4361#endif /* WCHAR */
4362
4363#ifdef WCHAR
4364static reg_errcode_t
4365wcs_compile_range (CHAR_T range_start_char, const CHAR_T **p_ptr,
4366                   const CHAR_T *pend, RE_TRANSLATE_TYPE translate,
4367                   reg_syntax_t syntax, CHAR_T *b, CHAR_T *char_set)
4368{
4369  const CHAR_T *p = *p_ptr;
4370  CHAR_T range_start, range_end;
4371  reg_errcode_t ret;
4372# ifdef _LIBC
4373  uint32_t nrules;
4374  uint32_t start_val, end_val;
4375# endif
4376  if (p == pend)
4377    return REG_ERANGE;
4378
4379# ifdef _LIBC
4380  nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
4381  if (nrules != 0)
4382    {
4383      const char *collseq = (const char *) _NL_CURRENT(LC_COLLATE,
4384						       _NL_COLLATE_COLLSEQWC);
4385      const unsigned char *extra = (const unsigned char *)
4386	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
4387
4388      if (range_start_char < -1)
4389	{
4390	  /* range_start is a collating symbol.  */
4391	  int32_t *wextra;
4392	  /* Retreive the index and get collation sequence value.  */
4393	  wextra = (int32_t*)(extra + char_set[-range_start_char]);
4394	  start_val = wextra[1 + *wextra];
4395	}
4396      else
4397	start_val = collseq_table_lookup(collseq, TRANSLATE(range_start_char));
4398
4399      end_val = collseq_table_lookup (collseq, TRANSLATE (p[0]));
4400
4401      /* Report an error if the range is empty and the syntax prohibits
4402	 this.  */
4403      ret = ((syntax & RE_NO_EMPTY_RANGES)
4404	     && (start_val > end_val))? REG_ERANGE : REG_NOERROR;
4405
4406      /* Insert space to the end of the char_ranges.  */
4407      insert_space(2, b - char_set[5] - 2, b - 1);
4408      *(b - char_set[5] - 2) = (wchar_t)start_val;
4409      *(b - char_set[5] - 1) = (wchar_t)end_val;
4410      char_set[4]++; /* ranges_index */
4411    }
4412  else
4413# endif
4414    {
4415      range_start = (range_start_char >= 0)? TRANSLATE (range_start_char):
4416	range_start_char;
4417      range_end = TRANSLATE (p[0]);
4418      /* Report an error if the range is empty and the syntax prohibits
4419	 this.  */
4420      ret = ((syntax & RE_NO_EMPTY_RANGES)
4421	     && (range_start > range_end))? REG_ERANGE : REG_NOERROR;
4422
4423      /* Insert space to the end of the char_ranges.  */
4424      insert_space(2, b - char_set[5] - 2, b - 1);
4425      *(b - char_set[5] - 2) = range_start;
4426      *(b - char_set[5] - 1) = range_end;
4427      char_set[4]++; /* ranges_index */
4428    }
4429  /* Have to increment the pointer into the pattern string, so the
4430     caller isn't still at the ending character.  */
4431  (*p_ptr)++;
4432
4433  return ret;
4434}
4435#else /* BYTE */
4436/* Read the ending character of a range (in a bracket expression) from the
4437   uncompiled pattern *P_PTR (which ends at PEND).  We assume the
4438   starting character is in `P[-2]'.  (`P[-1]' is the character `-'.)
4439   Then we set the translation of all bits between the starting and
4440   ending characters (inclusive) in the compiled pattern B.
4441
4442   Return an error code.
4443
4444   We use these short variable names so we can use the same macros as
4445   `regex_compile' itself.  */
4446
4447static reg_errcode_t
4448byte_compile_range (unsigned int range_start_char, const char **p_ptr,
4449                    const char *pend, RE_TRANSLATE_TYPE translate,
4450                    reg_syntax_t syntax, unsigned char *b)
4451{
4452  unsigned this_char;
4453  const char *p = *p_ptr;
4454  reg_errcode_t ret;
4455# if _LIBC
4456  const unsigned char *collseq;
4457  unsigned int start_colseq;
4458  unsigned int end_colseq;
4459# else
4460  unsigned end_char;
4461# endif
4462
4463  if (p == pend)
4464    return REG_ERANGE;
4465
4466  /* Have to increment the pointer into the pattern string, so the
4467     caller isn't still at the ending character.  */
4468  (*p_ptr)++;
4469
4470  /* Report an error if the range is empty and the syntax prohibits this.  */
4471  ret = syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR;
4472
4473# if _LIBC
4474  collseq = (const unsigned char *) _NL_CURRENT (LC_COLLATE,
4475						 _NL_COLLATE_COLLSEQMB);
4476
4477  start_colseq = collseq[(unsigned char) TRANSLATE (range_start_char)];
4478  end_colseq = collseq[(unsigned char) TRANSLATE (p[0])];
4479  for (this_char = 0; this_char <= (unsigned char) -1; ++this_char)
4480    {
4481      unsigned int this_colseq = collseq[(unsigned char) TRANSLATE (this_char)];
4482
4483      if (start_colseq <= this_colseq && this_colseq <= end_colseq)
4484	{
4485	  SET_LIST_BIT (TRANSLATE (this_char));
4486	  ret = REG_NOERROR;
4487	}
4488    }
4489# else
4490  /* Here we see why `this_char' has to be larger than an `unsigned
4491     char' -- we would otherwise go into an infinite loop, since all
4492     characters <= 0xff.  */
4493  range_start_char = TRANSLATE (range_start_char);
4494  /* TRANSLATE(p[0]) is casted to char (not unsigned char) in TRANSLATE,
4495     and some compilers cast it to int implicitly, so following for_loop
4496     may fall to (almost) infinite loop.
4497     e.g. If translate[p[0]] = 0xff, end_char may equals to 0xffffffff.
4498     To avoid this, we cast p[0] to unsigned int and truncate it.  */
4499  end_char = ((unsigned)TRANSLATE(p[0]) & ((1 << BYTEWIDTH) - 1));
4500
4501  for (this_char = range_start_char; this_char <= end_char; ++this_char)
4502    {
4503      SET_LIST_BIT (TRANSLATE (this_char));
4504      ret = REG_NOERROR;
4505    }
4506# endif
4507
4508  return ret;
4509}
4510#endif /* WCHAR */
4511
4512/* re_compile_fastmap computes a ``fastmap'' for the compiled pattern in
4513   BUFP.  A fastmap records which of the (1 << BYTEWIDTH) possible
4514   characters can start a string that matches the pattern.  This fastmap
4515   is used by re_search to skip quickly over impossible starting points.
4516
4517   The caller must supply the address of a (1 << BYTEWIDTH)-byte data
4518   area as BUFP->fastmap.
4519
4520   We set the `fastmap', `fastmap_accurate', and `can_be_null' fields in
4521   the pattern buffer.
4522
4523   Returns 0 if we succeed, -2 if an internal error.   */
4524
4525#ifdef WCHAR
4526/* local function for re_compile_fastmap.
4527   truncate wchar_t character to char.  */
4528static unsigned char truncate_wchar (CHAR_T c);
4529
4530static unsigned char
4531truncate_wchar (CHAR_T c)
4532{
4533  unsigned char buf[MB_CUR_MAX];
4534  mbstate_t state;
4535  int retval;
4536  memset (&state, '\0', sizeof (state));
4537# ifdef _LIBC
4538  retval = __wcrtomb (buf, c, &state);
4539# else
4540  retval = wcrtomb (buf, c, &state);
4541# endif
4542  return retval > 0 ? buf[0] : (unsigned char) c;
4543}
4544#endif /* WCHAR */
4545
4546static int
4547PREFIX(re_compile_fastmap) (struct re_pattern_buffer *bufp)
4548{
4549  int j, k;
4550#ifdef MATCH_MAY_ALLOCATE
4551  PREFIX(fail_stack_type) fail_stack;
4552#endif
4553#ifndef REGEX_MALLOC
4554  char *destination;
4555#endif
4556
4557  register char *fastmap = bufp->fastmap;
4558
4559#ifdef WCHAR
4560  /* We need to cast pattern to (wchar_t*), because we casted this compiled
4561     pattern to (char*) in regex_compile.  */
4562  UCHAR_T *pattern = (UCHAR_T*)bufp->buffer;
4563  register UCHAR_T *pend = (UCHAR_T*) (bufp->buffer + bufp->used);
4564#else /* BYTE */
4565  UCHAR_T *pattern = bufp->buffer;
4566  register UCHAR_T *pend = pattern + bufp->used;
4567#endif /* WCHAR */
4568  UCHAR_T *p = pattern;
4569
4570#ifdef REL_ALLOC
4571  /* This holds the pointer to the failure stack, when
4572     it is allocated relocatably.  */
4573  fail_stack_elt_t *failure_stack_ptr;
4574#endif
4575
4576  /* Assume that each path through the pattern can be null until
4577     proven otherwise.  We set this false at the bottom of switch
4578     statement, to which we get only if a particular path doesn't
4579     match the empty string.  */
4580  boolean path_can_be_null = true;
4581
4582  /* We aren't doing a `succeed_n' to begin with.  */
4583  boolean succeed_n_p = false;
4584
4585  assert (fastmap != NULL && p != NULL);
4586
4587  INIT_FAIL_STACK ();
4588  bzero (fastmap, 1 << BYTEWIDTH);  /* Assume nothing's valid.  */
4589  bufp->fastmap_accurate = 1;	    /* It will be when we're done.  */
4590  bufp->can_be_null = 0;
4591
4592  while (1)
4593    {
4594      if (p == pend || *p == (UCHAR_T) succeed)
4595	{
4596	  /* We have reached the (effective) end of pattern.  */
4597	  if (!FAIL_STACK_EMPTY ())
4598	    {
4599	      bufp->can_be_null |= path_can_be_null;
4600
4601	      /* Reset for next path.  */
4602	      path_can_be_null = true;
4603
4604	      p = fail_stack.stack[--fail_stack.avail].pointer;
4605
4606	      continue;
4607	    }
4608	  else
4609	    break;
4610	}
4611
4612      /* We should never be about to go beyond the end of the pattern.  */
4613      assert (p < pend);
4614
4615      switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++))
4616	{
4617
4618        /* I guess the idea here is to simply not bother with a fastmap
4619           if a backreference is used, since it's too hard to figure out
4620           the fastmap for the corresponding group.  Setting
4621           `can_be_null' stops `re_search_2' from using the fastmap, so
4622           that is all we do.  */
4623	case duplicate:
4624	  bufp->can_be_null = 1;
4625          goto done;
4626
4627
4628      /* Following are the cases which match a character.  These end
4629         with `break'.  */
4630
4631#ifdef WCHAR
4632	case exactn:
4633          fastmap[truncate_wchar(p[1])] = 1;
4634	  break;
4635#else /* BYTE */
4636	case exactn:
4637          fastmap[p[1]] = 1;
4638	  break;
4639#endif /* WCHAR */
4640#ifdef MBS_SUPPORT
4641	case exactn_bin:
4642	  fastmap[p[1]] = 1;
4643	  break;
4644#endif
4645
4646#ifdef WCHAR
4647        /* It is hard to distinguish fastmap from (multi byte) characters
4648           which depends on current locale.  */
4649        case charset:
4650	case charset_not:
4651	case wordchar:
4652	case notwordchar:
4653          bufp->can_be_null = 1;
4654          goto done;
4655#else /* BYTE */
4656        case charset:
4657          for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
4658	    if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))
4659              fastmap[j] = 1;
4660	  break;
4661
4662
4663	case charset_not:
4664	  /* Chars beyond end of map must be allowed.  */
4665	  for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++)
4666            fastmap[j] = 1;
4667
4668	  for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
4669	    if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))))
4670              fastmap[j] = 1;
4671          break;
4672
4673
4674	case wordchar:
4675	  for (j = 0; j < (1 << BYTEWIDTH); j++)
4676	    if (SYNTAX (j) == Sword)
4677	      fastmap[j] = 1;
4678	  break;
4679
4680
4681	case notwordchar:
4682	  for (j = 0; j < (1 << BYTEWIDTH); j++)
4683	    if (SYNTAX (j) != Sword)
4684	      fastmap[j] = 1;
4685	  break;
4686#endif /* WCHAR */
4687
4688        case anychar:
4689	  {
4690	    int fastmap_newline = fastmap['\n'];
4691
4692	    /* `.' matches anything ...  */
4693	    for (j = 0; j < (1 << BYTEWIDTH); j++)
4694	      fastmap[j] = 1;
4695
4696	    /* ... except perhaps newline.  */
4697	    if (!(bufp->syntax & RE_DOT_NEWLINE))
4698	      fastmap['\n'] = fastmap_newline;
4699
4700	    /* Return if we have already set `can_be_null'; if we have,
4701	       then the fastmap is irrelevant.  Something's wrong here.  */
4702	    else if (bufp->can_be_null)
4703	      goto done;
4704
4705	    /* Otherwise, have to check alternative paths.  */
4706	    break;
4707	  }
4708
4709#ifdef emacs
4710        case syntaxspec:
4711	  k = *p++;
4712	  for (j = 0; j < (1 << BYTEWIDTH); j++)
4713	    if (SYNTAX (j) == (enum syntaxcode) k)
4714	      fastmap[j] = 1;
4715	  break;
4716
4717
4718	case notsyntaxspec:
4719	  k = *p++;
4720	  for (j = 0; j < (1 << BYTEWIDTH); j++)
4721	    if (SYNTAX (j) != (enum syntaxcode) k)
4722	      fastmap[j] = 1;
4723	  break;
4724
4725
4726      /* All cases after this match the empty string.  These end with
4727         `continue'.  */
4728
4729
4730	case before_dot:
4731	case at_dot:
4732	case after_dot:
4733          continue;
4734#endif /* emacs */
4735
4736
4737        case no_op:
4738        case begline:
4739        case endline:
4740	case begbuf:
4741	case endbuf:
4742	case wordbound:
4743	case notwordbound:
4744	case wordbeg:
4745	case wordend:
4746        case push_dummy_failure:
4747          continue;
4748
4749
4750	case jump_n:
4751        case pop_failure_jump:
4752	case maybe_pop_jump:
4753	case jump:
4754        case jump_past_alt:
4755	case dummy_failure_jump:
4756          EXTRACT_NUMBER_AND_INCR (j, p);
4757	  p += j;
4758	  if (j > 0)
4759	    continue;
4760
4761          /* Jump backward implies we just went through the body of a
4762             loop and matched nothing.  Opcode jumped to should be
4763             `on_failure_jump' or `succeed_n'.  Just treat it like an
4764             ordinary jump.  For a * loop, it has pushed its failure
4765             point already; if so, discard that as redundant.  */
4766          if ((re_opcode_t) *p != on_failure_jump
4767	      && (re_opcode_t) *p != succeed_n)
4768	    continue;
4769
4770          p++;
4771          EXTRACT_NUMBER_AND_INCR (j, p);
4772          p += j;
4773
4774          /* If what's on the stack is where we are now, pop it.  */
4775          if (!FAIL_STACK_EMPTY ()
4776	      && fail_stack.stack[fail_stack.avail - 1].pointer == p)
4777            fail_stack.avail--;
4778
4779          continue;
4780
4781
4782        case on_failure_jump:
4783        case on_failure_keep_string_jump:
4784	handle_on_failure_jump:
4785          EXTRACT_NUMBER_AND_INCR (j, p);
4786
4787          /* For some patterns, e.g., `(a?)?', `p+j' here points to the
4788             end of the pattern.  We don't want to push such a point,
4789             since when we restore it above, entering the switch will
4790             increment `p' past the end of the pattern.  We don't need
4791             to push such a point since we obviously won't find any more
4792             fastmap entries beyond `pend'.  Such a pattern can match
4793             the null string, though.  */
4794          if (p + j < pend)
4795            {
4796              if (!PUSH_PATTERN_OP (p + j, fail_stack))
4797		{
4798		  RESET_FAIL_STACK ();
4799		  return -2;
4800		}
4801            }
4802          else
4803            bufp->can_be_null = 1;
4804
4805          if (succeed_n_p)
4806            {
4807              EXTRACT_NUMBER_AND_INCR (k, p);	/* Skip the n.  */
4808              succeed_n_p = false;
4809	    }
4810
4811          continue;
4812
4813
4814	case succeed_n:
4815          /* Get to the number of times to succeed.  */
4816          p += OFFSET_ADDRESS_SIZE;
4817
4818          /* Increment p past the n for when k != 0.  */
4819          EXTRACT_NUMBER_AND_INCR (k, p);
4820          if (k == 0)
4821	    {
4822              p -= 2 * OFFSET_ADDRESS_SIZE;
4823  	      succeed_n_p = true;  /* Spaghetti code alert.  */
4824              goto handle_on_failure_jump;
4825            }
4826          continue;
4827
4828
4829	case set_number_at:
4830          p += 2 * OFFSET_ADDRESS_SIZE;
4831          continue;
4832
4833
4834	case start_memory:
4835        case stop_memory:
4836	  p += 2;
4837	  continue;
4838
4839
4840	default:
4841          abort (); /* We have listed all the cases.  */
4842        } /* switch *p++ */
4843
4844      /* Getting here means we have found the possible starting
4845         characters for one path of the pattern -- and that the empty
4846         string does not match.  We need not follow this path further.
4847         Instead, look at the next alternative (remembered on the
4848         stack), or quit if no more.  The test at the top of the loop
4849         does these things.  */
4850      path_can_be_null = false;
4851      p = pend;
4852    } /* while p */
4853
4854  /* Set `can_be_null' for the last path (also the first path, if the
4855     pattern is empty).  */
4856  bufp->can_be_null |= path_can_be_null;
4857
4858 done:
4859  RESET_FAIL_STACK ();
4860  return 0;
4861}
4862
4863#else /* not INSIDE_RECURSION */
4864
4865int
4866re_compile_fastmap (struct re_pattern_buffer *bufp)
4867{
4868# ifdef MBS_SUPPORT
4869  if (MB_CUR_MAX != 1)
4870    return wcs_re_compile_fastmap(bufp);
4871  else
4872# endif
4873    return byte_re_compile_fastmap(bufp);
4874} /* re_compile_fastmap */
4875#ifdef _LIBC
4876weak_alias (__re_compile_fastmap, re_compile_fastmap)
4877#endif
4878
4879
4880/* Set REGS to hold NUM_REGS registers, storing them in STARTS and
4881   ENDS.  Subsequent matches using PATTERN_BUFFER and REGS will use
4882   this memory for recording register information.  STARTS and ENDS
4883   must be allocated using the malloc library routine, and must each
4884   be at least NUM_REGS * sizeof (regoff_t) bytes long.
4885
4886   If NUM_REGS == 0, then subsequent matches should allocate their own
4887   register data.
4888
4889   Unless this function is called, the first search or match using
4890   PATTERN_BUFFER will allocate its own register data, without
4891   freeing the old data.  */
4892
4893void
4894re_set_registers (struct re_pattern_buffer *bufp,
4895                  struct re_registers *regs, unsigned num_regs,
4896                  regoff_t *starts, regoff_t *ends)
4897{
4898  if (num_regs)
4899    {
4900      bufp->regs_allocated = REGS_REALLOCATE;
4901      regs->num_regs = num_regs;
4902      regs->start = starts;
4903      regs->end = ends;
4904    }
4905  else
4906    {
4907      bufp->regs_allocated = REGS_UNALLOCATED;
4908      regs->num_regs = 0;
4909      regs->start = regs->end = (regoff_t *) 0;
4910    }
4911}
4912#ifdef _LIBC
4913weak_alias (__re_set_registers, re_set_registers)
4914#endif
4915
4916/* Searching routines.  */
4917
4918/* Like re_search_2, below, but only one string is specified, and
4919   doesn't let you say where to stop matching.  */
4920
4921int
4922re_search (struct re_pattern_buffer *bufp, const char *string, int size,
4923           int startpos, int range, struct re_registers *regs)
4924{
4925  return re_search_2 (bufp, NULL, 0, string, size, startpos, range,
4926		      regs, size);
4927}
4928#ifdef _LIBC
4929weak_alias (__re_search, re_search)
4930#endif
4931
4932
4933/* Using the compiled pattern in BUFP->buffer, first tries to match the
4934   virtual concatenation of STRING1 and STRING2, starting first at index
4935   STARTPOS, then at STARTPOS + 1, and so on.
4936
4937   STRING1 and STRING2 have length SIZE1 and SIZE2, respectively.
4938
4939   RANGE is how far to scan while trying to match.  RANGE = 0 means try
4940   only at STARTPOS; in general, the last start tried is STARTPOS +
4941   RANGE.
4942
4943   In REGS, return the indices of the virtual concatenation of STRING1
4944   and STRING2 that matched the entire BUFP->buffer and its contained
4945   subexpressions.
4946
4947   Do not consider matching one past the index STOP in the virtual
4948   concatenation of STRING1 and STRING2.
4949
4950   We return either the position in the strings at which the match was
4951   found, -1 if no match, or -2 if error (such as failure
4952   stack overflow).  */
4953
4954int
4955re_search_2 (struct re_pattern_buffer *bufp, const char *string1, int size1,
4956             const char *string2, int size2, int startpos, int range,
4957             struct re_registers *regs, int stop)
4958{
4959# ifdef MBS_SUPPORT
4960  if (MB_CUR_MAX != 1)
4961    return wcs_re_search_2 (bufp, string1, size1, string2, size2, startpos,
4962			    range, regs, stop);
4963  else
4964# endif
4965    return byte_re_search_2 (bufp, string1, size1, string2, size2, startpos,
4966			     range, regs, stop);
4967} /* re_search_2 */
4968#ifdef _LIBC
4969weak_alias (__re_search_2, re_search_2)
4970#endif
4971
4972#endif /* not INSIDE_RECURSION */
4973
4974#ifdef INSIDE_RECURSION
4975
4976#ifdef MATCH_MAY_ALLOCATE
4977# define FREE_VAR(var) if (var) REGEX_FREE (var); var = NULL
4978#else
4979# define FREE_VAR(var) free (var); var = NULL
4980#endif
4981
4982#ifdef WCHAR
4983# define MAX_ALLOCA_SIZE	2000
4984
4985# define FREE_WCS_BUFFERS() \
4986  do {									      \
4987    if (size1 > MAX_ALLOCA_SIZE)					      \
4988      {									      \
4989	free (wcs_string1);						      \
4990	free (mbs_offset1);						      \
4991      }									      \
4992    else								      \
4993      {									      \
4994	FREE_VAR (wcs_string1);						      \
4995	FREE_VAR (mbs_offset1);						      \
4996      }									      \
4997    if (size2 > MAX_ALLOCA_SIZE) 					      \
4998      {									      \
4999	free (wcs_string2);						      \
5000	free (mbs_offset2);						      \
5001      }									      \
5002    else								      \
5003      {									      \
5004	FREE_VAR (wcs_string2);						      \
5005	FREE_VAR (mbs_offset2);						      \
5006      }									      \
5007  } while (0)
5008
5009#endif
5010
5011
5012static int
5013PREFIX(re_search_2) (struct re_pattern_buffer *bufp, const char *string1,
5014                     int size1, const char *string2, int size2,
5015                     int startpos, int range,
5016                     struct re_registers *regs, int stop)
5017{
5018  int val;
5019  register char *fastmap = bufp->fastmap;
5020  register RE_TRANSLATE_TYPE translate = bufp->translate;
5021  int total_size = size1 + size2;
5022  int endpos = startpos + range;
5023#ifdef WCHAR
5024  /* We need wchar_t* buffers correspond to cstring1, cstring2.  */
5025  wchar_t *wcs_string1 = NULL, *wcs_string2 = NULL;
5026  /* We need the size of wchar_t buffers correspond to csize1, csize2.  */
5027  int wcs_size1 = 0, wcs_size2 = 0;
5028  /* offset buffer for optimizatoin. See convert_mbs_to_wc.  */
5029  int *mbs_offset1 = NULL, *mbs_offset2 = NULL;
5030  /* They hold whether each wchar_t is binary data or not.  */
5031  char *is_binary = NULL;
5032#endif /* WCHAR */
5033
5034  /* Check for out-of-range STARTPOS.  */
5035  if (startpos < 0 || startpos > total_size)
5036    return -1;
5037
5038  /* Fix up RANGE if it might eventually take us outside
5039     the virtual concatenation of STRING1 and STRING2.
5040     Make sure we won't move STARTPOS below 0 or above TOTAL_SIZE.  */
5041  if (endpos < 0)
5042    range = 0 - startpos;
5043  else if (endpos > total_size)
5044    range = total_size - startpos;
5045
5046  /* If the search isn't to be a backwards one, don't waste time in a
5047     search for a pattern that must be anchored.  */
5048  if (bufp->used > 0 && range > 0
5049      && ((re_opcode_t) bufp->buffer[0] == begbuf
5050	  /* `begline' is like `begbuf' if it cannot match at newlines.  */
5051	  || ((re_opcode_t) bufp->buffer[0] == begline
5052	      && !bufp->newline_anchor)))
5053    {
5054      if (startpos > 0)
5055	return -1;
5056      else
5057	range = 1;
5058    }
5059
5060#ifdef emacs
5061  /* In a forward search for something that starts with \=.
5062     don't keep searching past point.  */
5063  if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == at_dot && range > 0)
5064    {
5065      range = PT - startpos;
5066      if (range <= 0)
5067	return -1;
5068    }
5069#endif /* emacs */
5070
5071  /* Update the fastmap now if not correct already.  */
5072  if (fastmap && !bufp->fastmap_accurate)
5073    if (re_compile_fastmap (bufp) == -2)
5074      return -2;
5075
5076#ifdef WCHAR
5077  /* Allocate wchar_t array for wcs_string1 and wcs_string2 and
5078     fill them with converted string.  */
5079  if (size1 != 0)
5080    {
5081      if (size1 > MAX_ALLOCA_SIZE)
5082	{
5083	  wcs_string1 = TALLOC (size1 + 1, CHAR_T);
5084	  mbs_offset1 = TALLOC (size1 + 1, int);
5085	  is_binary = TALLOC (size1 + 1, char);
5086	}
5087      else
5088	{
5089	  wcs_string1 = REGEX_TALLOC (size1 + 1, CHAR_T);
5090	  mbs_offset1 = REGEX_TALLOC (size1 + 1, int);
5091	  is_binary = REGEX_TALLOC (size1 + 1, char);
5092	}
5093      if (!wcs_string1 || !mbs_offset1 || !is_binary)
5094	{
5095	  if (size1 > MAX_ALLOCA_SIZE)
5096	    {
5097	      free (wcs_string1);
5098	      free (mbs_offset1);
5099	      free (is_binary);
5100	    }
5101	  else
5102	    {
5103	      FREE_VAR (wcs_string1);
5104	      FREE_VAR (mbs_offset1);
5105	      FREE_VAR (is_binary);
5106	    }
5107	  return -2;
5108	}
5109      wcs_size1 = convert_mbs_to_wcs(wcs_string1, string1, size1,
5110				     mbs_offset1, is_binary);
5111      wcs_string1[wcs_size1] = L'\0'; /* for a sentinel  */
5112      if (size1 > MAX_ALLOCA_SIZE)
5113	free (is_binary);
5114      else
5115	FREE_VAR (is_binary);
5116    }
5117  if (size2 != 0)
5118    {
5119      if (size2 > MAX_ALLOCA_SIZE)
5120	{
5121	  wcs_string2 = TALLOC (size2 + 1, CHAR_T);
5122	  mbs_offset2 = TALLOC (size2 + 1, int);
5123	  is_binary = TALLOC (size2 + 1, char);
5124	}
5125      else
5126	{
5127	  wcs_string2 = REGEX_TALLOC (size2 + 1, CHAR_T);
5128	  mbs_offset2 = REGEX_TALLOC (size2 + 1, int);
5129	  is_binary = REGEX_TALLOC (size2 + 1, char);
5130	}
5131      if (!wcs_string2 || !mbs_offset2 || !is_binary)
5132	{
5133	  FREE_WCS_BUFFERS ();
5134	  if (size2 > MAX_ALLOCA_SIZE)
5135	    free (is_binary);
5136	  else
5137	    FREE_VAR (is_binary);
5138	  return -2;
5139	}
5140      wcs_size2 = convert_mbs_to_wcs(wcs_string2, string2, size2,
5141				     mbs_offset2, is_binary);
5142      wcs_string2[wcs_size2] = L'\0'; /* for a sentinel  */
5143      if (size2 > MAX_ALLOCA_SIZE)
5144	free (is_binary);
5145      else
5146	FREE_VAR (is_binary);
5147    }
5148#endif /* WCHAR */
5149
5150
5151  /* Loop through the string, looking for a place to start matching.  */
5152  for (;;)
5153    {
5154      /* If a fastmap is supplied, skip quickly over characters that
5155         cannot be the start of a match.  If the pattern can match the
5156         null string, however, we don't need to skip characters; we want
5157         the first null string.  */
5158      if (fastmap && startpos < total_size && !bufp->can_be_null)
5159	{
5160	  if (range > 0)	/* Searching forwards.  */
5161	    {
5162	      register const char *d;
5163	      register int lim = 0;
5164	      int irange = range;
5165
5166              if (startpos < size1 && startpos + range >= size1)
5167                lim = range - (size1 - startpos);
5168
5169	      d = (startpos >= size1 ? string2 - size1 : string1) + startpos;
5170
5171              /* Written out as an if-else to avoid testing `translate'
5172                 inside the loop.  */
5173	      if (translate)
5174                while (range > lim
5175                       && !fastmap[(unsigned char)
5176				   translate[(unsigned char) *d++]])
5177                  range--;
5178	      else
5179                while (range > lim && !fastmap[(unsigned char) *d++])
5180                  range--;
5181
5182	      startpos += irange - range;
5183	    }
5184	  else				/* Searching backwards.  */
5185	    {
5186	      register CHAR_T c = (size1 == 0 || startpos >= size1
5187				      ? string2[startpos - size1]
5188				      : string1[startpos]);
5189
5190	      if (!fastmap[(unsigned char) TRANSLATE (c)])
5191		goto advance;
5192	    }
5193	}
5194
5195      /* If can't match the null string, and that's all we have left, fail.  */
5196      if (range >= 0 && startpos == total_size && fastmap
5197          && !bufp->can_be_null)
5198       {
5199#ifdef WCHAR
5200         FREE_WCS_BUFFERS ();
5201#endif
5202         return -1;
5203       }
5204
5205#ifdef WCHAR
5206      val = wcs_re_match_2_internal (bufp, string1, size1, string2,
5207				     size2, startpos, regs, stop,
5208				     wcs_string1, wcs_size1,
5209				     wcs_string2, wcs_size2,
5210				     mbs_offset1, mbs_offset2);
5211#else /* BYTE */
5212      val = byte_re_match_2_internal (bufp, string1, size1, string2,
5213				      size2, startpos, regs, stop);
5214#endif /* BYTE */
5215
5216#ifndef REGEX_MALLOC
5217# ifdef C_ALLOCA
5218      alloca (0);
5219# endif
5220#endif
5221
5222      if (val >= 0)
5223	{
5224#ifdef WCHAR
5225	  FREE_WCS_BUFFERS ();
5226#endif
5227	  return startpos;
5228	}
5229
5230      if (val == -2)
5231	{
5232#ifdef WCHAR
5233	  FREE_WCS_BUFFERS ();
5234#endif
5235	  return -2;
5236	}
5237
5238    advance:
5239      if (!range)
5240        break;
5241      else if (range > 0)
5242        {
5243          range--;
5244          startpos++;
5245        }
5246      else
5247        {
5248          range++;
5249          startpos--;
5250        }
5251    }
5252#ifdef WCHAR
5253  FREE_WCS_BUFFERS ();
5254#endif
5255  return -1;
5256}
5257
5258#ifdef WCHAR
5259/* This converts PTR, a pointer into one of the search wchar_t strings
5260   `string1' and `string2' into an multibyte string offset from the
5261   beginning of that string. We use mbs_offset to optimize.
5262   See convert_mbs_to_wcs.  */
5263# define POINTER_TO_OFFSET(ptr)						\
5264  (FIRST_STRING_P (ptr)							\
5265   ? ((regoff_t)(mbs_offset1 != NULL? mbs_offset1[(ptr)-string1] : 0))	\
5266   : ((regoff_t)((mbs_offset2 != NULL? mbs_offset2[(ptr)-string2] : 0)	\
5267		 + csize1)))
5268#else /* BYTE */
5269/* This converts PTR, a pointer into one of the search strings `string1'
5270   and `string2' into an offset from the beginning of that string.  */
5271# define POINTER_TO_OFFSET(ptr)			\
5272  (FIRST_STRING_P (ptr)				\
5273   ? ((regoff_t) ((ptr) - string1))		\
5274   : ((regoff_t) ((ptr) - string2 + size1)))
5275#endif /* WCHAR */
5276
5277/* Macros for dealing with the split strings in re_match_2.  */
5278
5279#define MATCHING_IN_FIRST_STRING  (dend == end_match_1)
5280
5281/* Call before fetching a character with *d.  This switches over to
5282   string2 if necessary.  */
5283#define PREFETCH()							\
5284  while (d == dend)						    	\
5285    {									\
5286      /* End of string2 => fail.  */					\
5287      if (dend == end_match_2) 						\
5288        goto fail;							\
5289      /* End of string1 => advance to string2.  */ 			\
5290      d = string2;						        \
5291      dend = end_match_2;						\
5292    }
5293
5294/* Test if at very beginning or at very end of the virtual concatenation
5295   of `string1' and `string2'.  If only one string, it's `string2'.  */
5296#define AT_STRINGS_BEG(d) ((d) == (size1 ? string1 : string2) || !size2)
5297#define AT_STRINGS_END(d) ((d) == end2)
5298
5299
5300/* Test if D points to a character which is word-constituent.  We have
5301   two special cases to check for: if past the end of string1, look at
5302   the first character in string2; and if before the beginning of
5303   string2, look at the last character in string1.  */
5304#ifdef WCHAR
5305/* Use internationalized API instead of SYNTAX.  */
5306# define WORDCHAR_P(d)							\
5307  (iswalnum ((wint_t)((d) == end1 ? *string2				\
5308           : (d) == string2 - 1 ? *(end1 - 1) : *(d))) != 0		\
5309   || ((d) == end1 ? *string2						\
5310       : (d) == string2 - 1 ? *(end1 - 1) : *(d)) == L'_')
5311#else /* BYTE */
5312# define WORDCHAR_P(d)							\
5313  (SYNTAX ((d) == end1 ? *string2					\
5314           : (d) == string2 - 1 ? *(end1 - 1) : *(d))			\
5315   == Sword)
5316#endif /* WCHAR */
5317
5318/* Disabled due to a compiler bug -- see comment at case wordbound */
5319#if 0
5320/* Test if the character before D and the one at D differ with respect
5321   to being word-constituent.  */
5322#define AT_WORD_BOUNDARY(d)						\
5323  (AT_STRINGS_BEG (d) || AT_STRINGS_END (d)				\
5324   || WORDCHAR_P (d - 1) != WORDCHAR_P (d))
5325#endif
5326
5327/* Free everything we malloc.  */
5328#ifdef MATCH_MAY_ALLOCATE
5329# ifdef WCHAR
5330#  define FREE_VARIABLES()						\
5331  do {									\
5332    REGEX_FREE_STACK (fail_stack.stack);				\
5333    FREE_VAR (regstart);						\
5334    FREE_VAR (regend);							\
5335    FREE_VAR (old_regstart);						\
5336    FREE_VAR (old_regend);						\
5337    FREE_VAR (best_regstart);						\
5338    FREE_VAR (best_regend);						\
5339    FREE_VAR (reg_info);						\
5340    FREE_VAR (reg_dummy);						\
5341    FREE_VAR (reg_info_dummy);						\
5342    if (!cant_free_wcs_buf)						\
5343      {									\
5344        FREE_VAR (string1);						\
5345        FREE_VAR (string2);						\
5346        FREE_VAR (mbs_offset1);						\
5347        FREE_VAR (mbs_offset2);						\
5348      }									\
5349  } while (0)
5350# else /* BYTE */
5351#  define FREE_VARIABLES()						\
5352  do {									\
5353    REGEX_FREE_STACK (fail_stack.stack);				\
5354    FREE_VAR (regstart);						\
5355    FREE_VAR (regend);							\
5356    FREE_VAR (old_regstart);						\
5357    FREE_VAR (old_regend);						\
5358    FREE_VAR (best_regstart);						\
5359    FREE_VAR (best_regend);						\
5360    FREE_VAR (reg_info);						\
5361    FREE_VAR (reg_dummy);						\
5362    FREE_VAR (reg_info_dummy);						\
5363  } while (0)
5364# endif /* WCHAR */
5365#else
5366# ifdef WCHAR
5367#  define FREE_VARIABLES()						\
5368  do {									\
5369    if (!cant_free_wcs_buf)						\
5370      {									\
5371        FREE_VAR (string1);						\
5372        FREE_VAR (string2);						\
5373        FREE_VAR (mbs_offset1);						\
5374        FREE_VAR (mbs_offset2);						\
5375      }									\
5376  } while (0)
5377# else /* BYTE */
5378#  define FREE_VARIABLES() ((void)0) /* Do nothing!  But inhibit gcc warning. */
5379# endif /* WCHAR */
5380#endif /* not MATCH_MAY_ALLOCATE */
5381
5382/* These values must meet several constraints.  They must not be valid
5383   register values; since we have a limit of 255 registers (because
5384   we use only one byte in the pattern for the register number), we can
5385   use numbers larger than 255.  They must differ by 1, because of
5386   NUM_FAILURE_ITEMS above.  And the value for the lowest register must
5387   be larger than the value for the highest register, so we do not try
5388   to actually save any registers when none are active.  */
5389#define NO_HIGHEST_ACTIVE_REG (1 << BYTEWIDTH)
5390#define NO_LOWEST_ACTIVE_REG (NO_HIGHEST_ACTIVE_REG + 1)
5391
5392#else /* not INSIDE_RECURSION */
5393/* Matching routines.  */
5394
5395#ifndef emacs   /* Emacs never uses this.  */
5396/* re_match is like re_match_2 except it takes only a single string.  */
5397
5398int
5399re_match (struct re_pattern_buffer *bufp, const char *string,
5400          int size, int pos, struct re_registers *regs)
5401{
5402  int result;
5403# ifdef MBS_SUPPORT
5404  if (MB_CUR_MAX != 1)
5405    result = wcs_re_match_2_internal (bufp, NULL, 0, string, size,
5406				      pos, regs, size,
5407				      NULL, 0, NULL, 0, NULL, NULL);
5408  else
5409# endif
5410    result = byte_re_match_2_internal (bufp, NULL, 0, string, size,
5411				  pos, regs, size);
5412# ifndef REGEX_MALLOC
5413#  ifdef C_ALLOCA
5414  alloca (0);
5415#  endif
5416# endif
5417  return result;
5418}
5419# ifdef _LIBC
5420weak_alias (__re_match, re_match)
5421# endif
5422#endif /* not emacs */
5423
5424#endif /* not INSIDE_RECURSION */
5425
5426#ifdef INSIDE_RECURSION
5427static boolean PREFIX(group_match_null_string_p) (UCHAR_T **p,
5428                                                  UCHAR_T *end,
5429					PREFIX(register_info_type) *reg_info);
5430static boolean PREFIX(alt_match_null_string_p) (UCHAR_T *p,
5431                                                UCHAR_T *end,
5432					PREFIX(register_info_type) *reg_info);
5433static boolean PREFIX(common_op_match_null_string_p) (UCHAR_T **p,
5434                                                      UCHAR_T *end,
5435					PREFIX(register_info_type) *reg_info);
5436static int PREFIX(bcmp_translate) (const CHAR_T *s1, const CHAR_T *s2,
5437                                   int len, char *translate);
5438#else /* not INSIDE_RECURSION */
5439
5440/* re_match_2 matches the compiled pattern in BUFP against the
5441   the (virtual) concatenation of STRING1 and STRING2 (of length SIZE1
5442   and SIZE2, respectively).  We start matching at POS, and stop
5443   matching at STOP.
5444
5445   If REGS is non-null and the `no_sub' field of BUFP is nonzero, we
5446   store offsets for the substring each group matched in REGS.  See the
5447   documentation for exactly how many groups we fill.
5448
5449   We return -1 if no match, -2 if an internal error (such as the
5450   failure stack overflowing).  Otherwise, we return the length of the
5451   matched substring.  */
5452
5453int
5454re_match_2 (struct re_pattern_buffer *bufp, const char *string1, int size1,
5455            const char *string2, int size2, int pos,
5456            struct re_registers *regs, int stop)
5457{
5458  int result;
5459# ifdef MBS_SUPPORT
5460  if (MB_CUR_MAX != 1)
5461    result = wcs_re_match_2_internal (bufp, string1, size1, string2, size2,
5462				      pos, regs, stop,
5463				      NULL, 0, NULL, 0, NULL, NULL);
5464  else
5465# endif
5466    result = byte_re_match_2_internal (bufp, string1, size1, string2, size2,
5467				  pos, regs, stop);
5468
5469#ifndef REGEX_MALLOC
5470# ifdef C_ALLOCA
5471  alloca (0);
5472# endif
5473#endif
5474  return result;
5475}
5476#ifdef _LIBC
5477weak_alias (__re_match_2, re_match_2)
5478#endif
5479
5480#endif /* not INSIDE_RECURSION */
5481
5482#ifdef INSIDE_RECURSION
5483
5484#ifdef WCHAR
5485static int count_mbs_length (int *, int);
5486
5487/* This check the substring (from 0, to length) of the multibyte string,
5488   to which offset_buffer correspond. And count how many wchar_t_characters
5489   the substring occupy. We use offset_buffer to optimization.
5490   See convert_mbs_to_wcs.  */
5491
5492static int
5493count_mbs_length(int *offset_buffer, int length)
5494{
5495  int upper, lower;
5496
5497  /* Check whether the size is valid.  */
5498  if (length < 0)
5499    return -1;
5500
5501  if (offset_buffer == NULL)
5502    return 0;
5503
5504  /* If there are no multibyte character, offset_buffer[i] == i.
5505   Optmize for this case.  */
5506  if (offset_buffer[length] == length)
5507    return length;
5508
5509  /* Set up upper with length. (because for all i, offset_buffer[i] >= i)  */
5510  upper = length;
5511  lower = 0;
5512
5513  while (true)
5514    {
5515      int middle = (lower + upper) / 2;
5516      if (middle == lower || middle == upper)
5517	break;
5518      if (offset_buffer[middle] > length)
5519	upper = middle;
5520      else if (offset_buffer[middle] < length)
5521	lower = middle;
5522      else
5523	return middle;
5524    }
5525
5526  return -1;
5527}
5528#endif /* WCHAR */
5529
5530/* This is a separate function so that we can force an alloca cleanup
5531   afterwards.  */
5532#ifdef WCHAR
5533static int
5534wcs_re_match_2_internal (struct re_pattern_buffer *bufp,
5535                         const char *cstring1, int csize1,
5536                         const char *cstring2, int csize2,
5537                         int pos,
5538			 struct re_registers *regs,
5539                         int stop,
5540     /* string1 == string2 == NULL means string1/2, size1/2 and
5541	mbs_offset1/2 need seting up in this function.  */
5542     /* We need wchar_t* buffers correspond to cstring1, cstring2.  */
5543                         wchar_t *string1, int size1,
5544                         wchar_t *string2, int size2,
5545     /* offset buffer for optimizatoin. See convert_mbs_to_wc.  */
5546			 int *mbs_offset1, int *mbs_offset2)
5547#else /* BYTE */
5548static int
5549byte_re_match_2_internal (struct re_pattern_buffer *bufp,
5550                          const char *string1, int size1,
5551                          const char *string2, int size2,
5552                          int pos,
5553			  struct re_registers *regs, int stop)
5554#endif /* BYTE */
5555{
5556  /* General temporaries.  */
5557  int mcnt;
5558  UCHAR_T *p1;
5559#ifdef WCHAR
5560  /* They hold whether each wchar_t is binary data or not.  */
5561  char *is_binary = NULL;
5562  /* If true, we can't free string1/2, mbs_offset1/2.  */
5563  int cant_free_wcs_buf = 1;
5564#endif /* WCHAR */
5565
5566  /* Just past the end of the corresponding string.  */
5567  const CHAR_T *end1, *end2;
5568
5569  /* Pointers into string1 and string2, just past the last characters in
5570     each to consider matching.  */
5571  const CHAR_T *end_match_1, *end_match_2;
5572
5573  /* Where we are in the data, and the end of the current string.  */
5574  const CHAR_T *d, *dend;
5575
5576  /* Where we are in the pattern, and the end of the pattern.  */
5577#ifdef WCHAR
5578  UCHAR_T *pattern, *p;
5579  register UCHAR_T *pend;
5580#else /* BYTE */
5581  UCHAR_T *p = bufp->buffer;
5582  register UCHAR_T *pend = p + bufp->used;
5583#endif /* WCHAR */
5584
5585  /* Mark the opcode just after a start_memory, so we can test for an
5586     empty subpattern when we get to the stop_memory.  */
5587  UCHAR_T *just_past_start_mem = 0;
5588
5589  /* We use this to map every character in the string.  */
5590  RE_TRANSLATE_TYPE translate = bufp->translate;
5591
5592  /* Failure point stack.  Each place that can handle a failure further
5593     down the line pushes a failure point on this stack.  It consists of
5594     restart, regend, and reg_info for all registers corresponding to
5595     the subexpressions we're currently inside, plus the number of such
5596     registers, and, finally, two char *'s.  The first char * is where
5597     to resume scanning the pattern; the second one is where to resume
5598     scanning the strings.  If the latter is zero, the failure point is
5599     a ``dummy''; if a failure happens and the failure point is a dummy,
5600     it gets discarded and the next next one is tried.  */
5601#ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global.  */
5602  PREFIX(fail_stack_type) fail_stack;
5603#endif
5604#ifdef DEBUG
5605  static unsigned failure_id;
5606  unsigned nfailure_points_pushed = 0, nfailure_points_popped = 0;
5607#endif
5608
5609#ifdef REL_ALLOC
5610  /* This holds the pointer to the failure stack, when
5611     it is allocated relocatably.  */
5612  fail_stack_elt_t *failure_stack_ptr;
5613#endif
5614
5615  /* We fill all the registers internally, independent of what we
5616     return, for use in backreferences.  The number here includes
5617     an element for register zero.  */
5618  size_t num_regs = bufp->re_nsub + 1;
5619
5620  /* The currently active registers.  */
5621  active_reg_t lowest_active_reg = NO_LOWEST_ACTIVE_REG;
5622  active_reg_t highest_active_reg = NO_HIGHEST_ACTIVE_REG;
5623
5624  /* Information on the contents of registers. These are pointers into
5625     the input strings; they record just what was matched (on this
5626     attempt) by a subexpression part of the pattern, that is, the
5627     regnum-th regstart pointer points to where in the pattern we began
5628     matching and the regnum-th regend points to right after where we
5629     stopped matching the regnum-th subexpression.  (The zeroth register
5630     keeps track of what the whole pattern matches.)  */
5631#ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global.  */
5632  const CHAR_T **regstart, **regend;
5633#endif
5634
5635  /* If a group that's operated upon by a repetition operator fails to
5636     match anything, then the register for its start will need to be
5637     restored because it will have been set to wherever in the string we
5638     are when we last see its open-group operator.  Similarly for a
5639     register's end.  */
5640#ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global.  */
5641  const CHAR_T **old_regstart, **old_regend;
5642#endif
5643
5644  /* The is_active field of reg_info helps us keep track of which (possibly
5645     nested) subexpressions we are currently in. The matched_something
5646     field of reg_info[reg_num] helps us tell whether or not we have
5647     matched any of the pattern so far this time through the reg_num-th
5648     subexpression.  These two fields get reset each time through any
5649     loop their register is in.  */
5650#ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global.  */
5651  PREFIX(register_info_type) *reg_info;
5652#endif
5653
5654  /* The following record the register info as found in the above
5655     variables when we find a match better than any we've seen before.
5656     This happens as we backtrack through the failure points, which in
5657     turn happens only if we have not yet matched the entire string. */
5658  unsigned best_regs_set = false;
5659#ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global.  */
5660  const CHAR_T **best_regstart, **best_regend;
5661#endif
5662
5663  /* Logically, this is `best_regend[0]'.  But we don't want to have to
5664     allocate space for that if we're not allocating space for anything
5665     else (see below).  Also, we never need info about register 0 for
5666     any of the other register vectors, and it seems rather a kludge to
5667     treat `best_regend' differently than the rest.  So we keep track of
5668     the end of the best match so far in a separate variable.  We
5669     initialize this to NULL so that when we backtrack the first time
5670     and need to test it, it's not garbage.  */
5671  const CHAR_T *match_end = NULL;
5672
5673  /* This helps SET_REGS_MATCHED avoid doing redundant work.  */
5674  int set_regs_matched_done = 0;
5675
5676  /* Used when we pop values we don't care about.  */
5677#ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global.  */
5678  const CHAR_T **reg_dummy;
5679  PREFIX(register_info_type) *reg_info_dummy;
5680#endif
5681
5682#ifdef DEBUG
5683  /* Counts the total number of registers pushed.  */
5684  unsigned num_regs_pushed = 0;
5685#endif
5686
5687  DEBUG_PRINT1 ("\n\nEntering re_match_2.\n");
5688
5689  INIT_FAIL_STACK ();
5690
5691#ifdef MATCH_MAY_ALLOCATE
5692  /* Do not bother to initialize all the register variables if there are
5693     no groups in the pattern, as it takes a fair amount of time.  If
5694     there are groups, we include space for register 0 (the whole
5695     pattern), even though we never use it, since it simplifies the
5696     array indexing.  We should fix this.  */
5697  if (bufp->re_nsub)
5698    {
5699      regstart = REGEX_TALLOC (num_regs, const CHAR_T *);
5700      regend = REGEX_TALLOC (num_regs, const CHAR_T *);
5701      old_regstart = REGEX_TALLOC (num_regs, const CHAR_T *);
5702      old_regend = REGEX_TALLOC (num_regs, const CHAR_T *);
5703      best_regstart = REGEX_TALLOC (num_regs, const CHAR_T *);
5704      best_regend = REGEX_TALLOC (num_regs, const CHAR_T *);
5705      reg_info = REGEX_TALLOC (num_regs, PREFIX(register_info_type));
5706      reg_dummy = REGEX_TALLOC (num_regs, const CHAR_T *);
5707      reg_info_dummy = REGEX_TALLOC (num_regs, PREFIX(register_info_type));
5708
5709      if (!(regstart && regend && old_regstart && old_regend && reg_info
5710            && best_regstart && best_regend && reg_dummy && reg_info_dummy))
5711        {
5712          FREE_VARIABLES ();
5713          return -2;
5714        }
5715    }
5716  else
5717    {
5718      /* We must initialize all our variables to NULL, so that
5719         `FREE_VARIABLES' doesn't try to free them.  */
5720      regstart = regend = old_regstart = old_regend = best_regstart
5721        = best_regend = reg_dummy = NULL;
5722      reg_info = reg_info_dummy = (PREFIX(register_info_type) *) NULL;
5723    }
5724#endif /* MATCH_MAY_ALLOCATE */
5725
5726  /* The starting position is bogus.  */
5727#ifdef WCHAR
5728  if (pos < 0 || pos > csize1 + csize2)
5729#else /* BYTE */
5730  if (pos < 0 || pos > size1 + size2)
5731#endif
5732    {
5733      FREE_VARIABLES ();
5734      return -1;
5735    }
5736
5737#ifdef WCHAR
5738  /* Allocate wchar_t array for string1 and string2 and
5739     fill them with converted string.  */
5740  if (string1 == NULL && string2 == NULL)
5741    {
5742      /* We need seting up buffers here.  */
5743
5744      /* We must free wcs buffers in this function.  */
5745      cant_free_wcs_buf = 0;
5746
5747      if (csize1 != 0)
5748	{
5749	  string1 = REGEX_TALLOC (csize1 + 1, CHAR_T);
5750	  mbs_offset1 = REGEX_TALLOC (csize1 + 1, int);
5751	  is_binary = REGEX_TALLOC (csize1 + 1, char);
5752	  if (!string1 || !mbs_offset1 || !is_binary)
5753	    {
5754	      FREE_VAR (string1);
5755	      FREE_VAR (mbs_offset1);
5756	      FREE_VAR (is_binary);
5757	      return -2;
5758	    }
5759	}
5760      if (csize2 != 0)
5761	{
5762	  string2 = REGEX_TALLOC (csize2 + 1, CHAR_T);
5763	  mbs_offset2 = REGEX_TALLOC (csize2 + 1, int);
5764	  is_binary = REGEX_TALLOC (csize2 + 1, char);
5765	  if (!string2 || !mbs_offset2 || !is_binary)
5766	    {
5767	      FREE_VAR (string1);
5768	      FREE_VAR (mbs_offset1);
5769	      FREE_VAR (string2);
5770	      FREE_VAR (mbs_offset2);
5771	      FREE_VAR (is_binary);
5772	      return -2;
5773	    }
5774	  size2 = convert_mbs_to_wcs(string2, cstring2, csize2,
5775				     mbs_offset2, is_binary);
5776	  string2[size2] = L'\0'; /* for a sentinel  */
5777	  FREE_VAR (is_binary);
5778	}
5779    }
5780
5781  /* We need to cast pattern to (wchar_t*), because we casted this compiled
5782     pattern to (char*) in regex_compile.  */
5783  p = pattern = (CHAR_T*)bufp->buffer;
5784  pend = (CHAR_T*)(bufp->buffer + bufp->used);
5785
5786#endif /* WCHAR */
5787
5788  /* Initialize subexpression text positions to -1 to mark ones that no
5789     start_memory/stop_memory has been seen for. Also initialize the
5790     register information struct.  */
5791  for (mcnt = 1; (unsigned) mcnt < num_regs; mcnt++)
5792    {
5793      regstart[mcnt] = regend[mcnt]
5794        = old_regstart[mcnt] = old_regend[mcnt] = REG_UNSET_VALUE;
5795
5796      REG_MATCH_NULL_STRING_P (reg_info[mcnt]) = MATCH_NULL_UNSET_VALUE;
5797      IS_ACTIVE (reg_info[mcnt]) = 0;
5798      MATCHED_SOMETHING (reg_info[mcnt]) = 0;
5799      EVER_MATCHED_SOMETHING (reg_info[mcnt]) = 0;
5800    }
5801
5802  /* We move `string1' into `string2' if the latter's empty -- but not if
5803     `string1' is null.  */
5804  if (size2 == 0 && string1 != NULL)
5805    {
5806      string2 = string1;
5807      size2 = size1;
5808      string1 = 0;
5809      size1 = 0;
5810#ifdef WCHAR
5811      mbs_offset2 = mbs_offset1;
5812      csize2 = csize1;
5813      mbs_offset1 = NULL;
5814      csize1 = 0;
5815#endif
5816    }
5817  end1 = string1 + size1;
5818  end2 = string2 + size2;
5819
5820  /* Compute where to stop matching, within the two strings.  */
5821#ifdef WCHAR
5822  if (stop <= csize1)
5823    {
5824      mcnt = count_mbs_length(mbs_offset1, stop);
5825      end_match_1 = string1 + mcnt;
5826      end_match_2 = string2;
5827    }
5828  else
5829    {
5830      if (stop > csize1 + csize2)
5831	stop = csize1 + csize2;
5832      end_match_1 = end1;
5833      mcnt = count_mbs_length(mbs_offset2, stop-csize1);
5834      end_match_2 = string2 + mcnt;
5835    }
5836  if (mcnt < 0)
5837    { /* count_mbs_length return error.  */
5838      FREE_VARIABLES ();
5839      return -1;
5840    }
5841#else
5842  if (stop <= size1)
5843    {
5844      end_match_1 = string1 + stop;
5845      end_match_2 = string2;
5846    }
5847  else
5848    {
5849      end_match_1 = end1;
5850      end_match_2 = string2 + stop - size1;
5851    }
5852#endif /* WCHAR */
5853
5854  /* `p' scans through the pattern as `d' scans through the data.
5855     `dend' is the end of the input string that `d' points within.  `d'
5856     is advanced into the following input string whenever necessary, but
5857     this happens before fetching; therefore, at the beginning of the
5858     loop, `d' can be pointing at the end of a string, but it cannot
5859     equal `string2'.  */
5860#ifdef WCHAR
5861  if (size1 > 0 && pos <= csize1)
5862    {
5863      mcnt = count_mbs_length(mbs_offset1, pos);
5864      d = string1 + mcnt;
5865      dend = end_match_1;
5866    }
5867  else
5868    {
5869      mcnt = count_mbs_length(mbs_offset2, pos-csize1);
5870      d = string2 + mcnt;
5871      dend = end_match_2;
5872    }
5873
5874  if (mcnt < 0)
5875    { /* count_mbs_length return error.  */
5876      FREE_VARIABLES ();
5877      return -1;
5878    }
5879#else
5880  if (size1 > 0 && pos <= size1)
5881    {
5882      d = string1 + pos;
5883      dend = end_match_1;
5884    }
5885  else
5886    {
5887      d = string2 + pos - size1;
5888      dend = end_match_2;
5889    }
5890#endif /* WCHAR */
5891
5892  DEBUG_PRINT1 ("The compiled pattern is:\n");
5893  DEBUG_PRINT_COMPILED_PATTERN (bufp, p, pend);
5894  DEBUG_PRINT1 ("The string to match is: `");
5895  DEBUG_PRINT_DOUBLE_STRING (d, string1, size1, string2, size2);
5896  DEBUG_PRINT1 ("'\n");
5897
5898  /* This loops over pattern commands.  It exits by returning from the
5899     function if the match is complete, or it drops through if the match
5900     fails at this starting point in the input data.  */
5901  for (;;)
5902    {
5903#ifdef _LIBC
5904      DEBUG_PRINT2 ("\n%p: ", p);
5905#else
5906      DEBUG_PRINT2 ("\n0x%x: ", p);
5907#endif
5908
5909      if (p == pend)
5910	{ /* End of pattern means we might have succeeded.  */
5911          DEBUG_PRINT1 ("end of pattern ... ");
5912
5913	  /* If we haven't matched the entire string, and we want the
5914             longest match, try backtracking.  */
5915          if (d != end_match_2)
5916	    {
5917	      /* 1 if this match ends in the same string (string1 or string2)
5918		 as the best previous match.  */
5919	      boolean same_str_p;
5920
5921	      /* 1 if this match is the best seen so far.  */
5922	      boolean best_match_p;
5923
5924              same_str_p = (FIRST_STRING_P (match_end)
5925                            == MATCHING_IN_FIRST_STRING);
5926
5927	      /* AIX compiler got confused when this was combined
5928		 with the previous declaration.  */
5929	      if (same_str_p)
5930		best_match_p = d > match_end;
5931	      else
5932		best_match_p = !MATCHING_IN_FIRST_STRING;
5933
5934              DEBUG_PRINT1 ("backtracking.\n");
5935
5936              if (!FAIL_STACK_EMPTY ())
5937                { /* More failure points to try.  */
5938
5939                  /* If exceeds best match so far, save it.  */
5940                  if (!best_regs_set || best_match_p)
5941                    {
5942                      best_regs_set = true;
5943                      match_end = d;
5944
5945                      DEBUG_PRINT1 ("\nSAVING match as best so far.\n");
5946
5947                      for (mcnt = 1; (unsigned) mcnt < num_regs; mcnt++)
5948                        {
5949                          best_regstart[mcnt] = regstart[mcnt];
5950                          best_regend[mcnt] = regend[mcnt];
5951                        }
5952                    }
5953                  goto fail;
5954                }
5955
5956              /* If no failure points, don't restore garbage.  And if
5957                 last match is real best match, don't restore second
5958                 best one. */
5959              else if (best_regs_set && !best_match_p)
5960                {
5961  	        restore_best_regs:
5962                  /* Restore best match.  It may happen that `dend ==
5963                     end_match_1' while the restored d is in string2.
5964                     For example, the pattern `x.*y.*z' against the
5965                     strings `x-' and `y-z-', if the two strings are
5966                     not consecutive in memory.  */
5967                  DEBUG_PRINT1 ("Restoring best registers.\n");
5968
5969                  d = match_end;
5970                  dend = ((d >= string1 && d <= end1)
5971		           ? end_match_1 : end_match_2);
5972
5973		  for (mcnt = 1; (unsigned) mcnt < num_regs; mcnt++)
5974		    {
5975		      regstart[mcnt] = best_regstart[mcnt];
5976		      regend[mcnt] = best_regend[mcnt];
5977		    }
5978                }
5979            } /* d != end_match_2 */
5980
5981	succeed_label:
5982          DEBUG_PRINT1 ("Accepting match.\n");
5983          /* If caller wants register contents data back, do it.  */
5984          if (regs && !bufp->no_sub)
5985	    {
5986	      /* Have the register data arrays been allocated?  */
5987              if (bufp->regs_allocated == REGS_UNALLOCATED)
5988                { /* No.  So allocate them with malloc.  We need one
5989                     extra element beyond `num_regs' for the `-1' marker
5990                     GNU code uses.  */
5991                  regs->num_regs = MAX (RE_NREGS, num_regs + 1);
5992                  regs->start = TALLOC (regs->num_regs, regoff_t);
5993                  regs->end = TALLOC (regs->num_regs, regoff_t);
5994                  if (regs->start == NULL || regs->end == NULL)
5995		    {
5996		      FREE_VARIABLES ();
5997		      return -2;
5998		    }
5999                  bufp->regs_allocated = REGS_REALLOCATE;
6000                }
6001              else if (bufp->regs_allocated == REGS_REALLOCATE)
6002                { /* Yes.  If we need more elements than were already
6003                     allocated, reallocate them.  If we need fewer, just
6004                     leave it alone.  */
6005                  if (regs->num_regs < num_regs + 1)
6006                    {
6007                      regs->num_regs = num_regs + 1;
6008                      RETALLOC (regs->start, regs->num_regs, regoff_t);
6009                      RETALLOC (regs->end, regs->num_regs, regoff_t);
6010                      if (regs->start == NULL || regs->end == NULL)
6011			{
6012			  FREE_VARIABLES ();
6013			  return -2;
6014			}
6015                    }
6016                }
6017              else
6018		{
6019		  /* These braces fend off a "empty body in an else-statement"
6020		     warning under GCC when assert expands to nothing.  */
6021		  assert (bufp->regs_allocated == REGS_FIXED);
6022		}
6023
6024              /* Convert the pointer data in `regstart' and `regend' to
6025                 indices.  Register zero has to be set differently,
6026                 since we haven't kept track of any info for it.  */
6027              if (regs->num_regs > 0)
6028                {
6029                  regs->start[0] = pos;
6030#ifdef WCHAR
6031		  if (MATCHING_IN_FIRST_STRING)
6032		    regs->end[0] = mbs_offset1 != NULL ?
6033					mbs_offset1[d-string1] : 0;
6034		  else
6035		    regs->end[0] = csize1 + (mbs_offset2 != NULL ?
6036					     mbs_offset2[d-string2] : 0);
6037#else
6038                  regs->end[0] = (MATCHING_IN_FIRST_STRING
6039				  ? ((regoff_t) (d - string1))
6040			          : ((regoff_t) (d - string2 + size1)));
6041#endif /* WCHAR */
6042                }
6043
6044              /* Go through the first `min (num_regs, regs->num_regs)'
6045                 registers, since that is all we initialized.  */
6046	      for (mcnt = 1; (unsigned) mcnt < MIN (num_regs, regs->num_regs);
6047		   mcnt++)
6048		{
6049                  if (REG_UNSET (regstart[mcnt]) || REG_UNSET (regend[mcnt]))
6050                    regs->start[mcnt] = regs->end[mcnt] = -1;
6051                  else
6052                    {
6053		      regs->start[mcnt]
6054			= (regoff_t) POINTER_TO_OFFSET (regstart[mcnt]);
6055                      regs->end[mcnt]
6056			= (regoff_t) POINTER_TO_OFFSET (regend[mcnt]);
6057                    }
6058		}
6059
6060              /* If the regs structure we return has more elements than
6061                 were in the pattern, set the extra elements to -1.  If
6062                 we (re)allocated the registers, this is the case,
6063                 because we always allocate enough to have at least one
6064                 -1 at the end.  */
6065              for (mcnt = num_regs; (unsigned) mcnt < regs->num_regs; mcnt++)
6066                regs->start[mcnt] = regs->end[mcnt] = -1;
6067	    } /* regs && !bufp->no_sub */
6068
6069          DEBUG_PRINT4 ("%u failure points pushed, %u popped (%u remain).\n",
6070                        nfailure_points_pushed, nfailure_points_popped,
6071                        nfailure_points_pushed - nfailure_points_popped);
6072          DEBUG_PRINT2 ("%u registers pushed.\n", num_regs_pushed);
6073
6074#ifdef WCHAR
6075	  if (MATCHING_IN_FIRST_STRING)
6076	    mcnt = mbs_offset1 != NULL ? mbs_offset1[d-string1] : 0;
6077	  else
6078	    mcnt = (mbs_offset2 != NULL ? mbs_offset2[d-string2] : 0) +
6079			csize1;
6080          mcnt -= pos;
6081#else
6082          mcnt = d - pos - (MATCHING_IN_FIRST_STRING
6083			    ? string1
6084			    : string2 - size1);
6085#endif /* WCHAR */
6086
6087          DEBUG_PRINT2 ("Returning %d from re_match_2.\n", mcnt);
6088
6089          FREE_VARIABLES ();
6090          return mcnt;
6091        }
6092
6093      /* Otherwise match next pattern command.  */
6094      switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++))
6095	{
6096        /* Ignore these.  Used to ignore the n of succeed_n's which
6097           currently have n == 0.  */
6098        case no_op:
6099          DEBUG_PRINT1 ("EXECUTING no_op.\n");
6100          break;
6101
6102	case succeed:
6103          DEBUG_PRINT1 ("EXECUTING succeed.\n");
6104	  goto succeed_label;
6105
6106        /* Match the next n pattern characters exactly.  The following
6107           byte in the pattern defines n, and the n bytes after that
6108           are the characters to match.  */
6109	case exactn:
6110#ifdef MBS_SUPPORT
6111	case exactn_bin:
6112#endif
6113	  mcnt = *p++;
6114          DEBUG_PRINT2 ("EXECUTING exactn %d.\n", mcnt);
6115
6116          /* This is written out as an if-else so we don't waste time
6117             testing `translate' inside the loop.  */
6118          if (translate)
6119	    {
6120	      do
6121		{
6122		  PREFETCH ();
6123#ifdef WCHAR
6124		  if (*d <= 0xff)
6125		    {
6126		      if ((UCHAR_T) translate[(unsigned char) *d++]
6127			  != (UCHAR_T) *p++)
6128			goto fail;
6129		    }
6130		  else
6131		    {
6132		      if (*d++ != (CHAR_T) *p++)
6133			goto fail;
6134		    }
6135#else
6136		  if ((UCHAR_T) translate[(unsigned char) *d++]
6137		      != (UCHAR_T) *p++)
6138                    goto fail;
6139#endif /* WCHAR */
6140		}
6141	      while (--mcnt);
6142	    }
6143	  else
6144	    {
6145	      do
6146		{
6147		  PREFETCH ();
6148		  if (*d++ != (CHAR_T) *p++) goto fail;
6149		}
6150	      while (--mcnt);
6151	    }
6152	  SET_REGS_MATCHED ();
6153          break;
6154
6155
6156        /* Match any character except possibly a newline or a null.  */
6157	case anychar:
6158          DEBUG_PRINT1 ("EXECUTING anychar.\n");
6159
6160          PREFETCH ();
6161
6162          if ((!(bufp->syntax & RE_DOT_NEWLINE) && TRANSLATE (*d) == '\n')
6163              || (bufp->syntax & RE_DOT_NOT_NULL && TRANSLATE (*d) == '\000'))
6164	    goto fail;
6165
6166          SET_REGS_MATCHED ();
6167          DEBUG_PRINT2 ("  Matched `%ld'.\n", (long int) *d);
6168          d++;
6169	  break;
6170
6171
6172	case charset:
6173	case charset_not:
6174	  {
6175	    register UCHAR_T c;
6176#ifdef WCHAR
6177	    unsigned int i, char_class_length, coll_symbol_length,
6178              equiv_class_length, ranges_length, chars_length, length;
6179	    CHAR_T *workp, *workp2, *charset_top;
6180#define WORK_BUFFER_SIZE 128
6181            CHAR_T str_buf[WORK_BUFFER_SIZE];
6182# ifdef _LIBC
6183	    uint32_t nrules;
6184# endif /* _LIBC */
6185#endif /* WCHAR */
6186	    boolean negate = (re_opcode_t) *(p - 1) == charset_not;
6187
6188            DEBUG_PRINT2 ("EXECUTING charset%s.\n", negate ? "_not" : "");
6189	    PREFETCH ();
6190	    c = TRANSLATE (*d); /* The character to match.  */
6191#ifdef WCHAR
6192# ifdef _LIBC
6193	    nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
6194# endif /* _LIBC */
6195	    charset_top = p - 1;
6196	    char_class_length = *p++;
6197	    coll_symbol_length = *p++;
6198	    equiv_class_length = *p++;
6199	    ranges_length = *p++;
6200	    chars_length = *p++;
6201	    /* p points charset[6], so the address of the next instruction
6202	       (charset[l+m+n+2o+k+p']) equals p[l+m+n+2*o+p'],
6203	       where l=length of char_classes, m=length of collating_symbol,
6204	       n=equivalence_class, o=length of char_range,
6205	       p'=length of character.  */
6206	    workp = p;
6207	    /* Update p to indicate the next instruction.  */
6208	    p += char_class_length + coll_symbol_length+ equiv_class_length +
6209              2*ranges_length + chars_length;
6210
6211            /* match with char_class?  */
6212	    for (i = 0; i < char_class_length ; i += CHAR_CLASS_SIZE)
6213	      {
6214		wctype_t wctype;
6215		uintptr_t alignedp = ((uintptr_t)workp
6216				      + __alignof__(wctype_t) - 1)
6217		  		      & ~(uintptr_t)(__alignof__(wctype_t) - 1);
6218		wctype = *((wctype_t*)alignedp);
6219		workp += CHAR_CLASS_SIZE;
6220# ifdef _LIBC
6221		if (__iswctype((wint_t)c, wctype))
6222		  goto char_set_matched;
6223# else
6224		if (iswctype((wint_t)c, wctype))
6225		  goto char_set_matched;
6226# endif
6227	      }
6228
6229            /* match with collating_symbol?  */
6230# ifdef _LIBC
6231	    if (nrules != 0)
6232	      {
6233		const unsigned char *extra = (const unsigned char *)
6234		  _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
6235
6236		for (workp2 = workp + coll_symbol_length ; workp < workp2 ;
6237		     workp++)
6238		  {
6239		    int32_t *wextra;
6240		    wextra = (int32_t*)(extra + *workp++);
6241		    for (i = 0; i < *wextra; ++i)
6242		      if (TRANSLATE(d[i]) != wextra[1 + i])
6243			break;
6244
6245		    if (i == *wextra)
6246		      {
6247			/* Update d, however d will be incremented at
6248			   char_set_matched:, we decrement d here.  */
6249			d += i - 1;
6250			goto char_set_matched;
6251		      }
6252		  }
6253	      }
6254	    else /* (nrules == 0) */
6255# endif
6256	      /* If we can't look up collation data, we use wcscoll
6257		 instead.  */
6258	      {
6259		for (workp2 = workp + coll_symbol_length ; workp < workp2 ;)
6260		  {
6261		    const CHAR_T *backup_d = d, *backup_dend = dend;
6262# ifdef _LIBC
6263		    length = __wcslen (workp);
6264# else
6265		    length = wcslen (workp);
6266# endif
6267
6268		    /* If wcscoll(the collating symbol, whole string) > 0,
6269		       any substring of the string never match with the
6270		       collating symbol.  */
6271# ifdef _LIBC
6272		    if (__wcscoll (workp, d) > 0)
6273# else
6274		    if (wcscoll (workp, d) > 0)
6275# endif
6276		      {
6277			workp += length + 1;
6278			continue;
6279		      }
6280
6281		    /* First, we compare the collating symbol with
6282		       the first character of the string.
6283		       If it don't match, we add the next character to
6284		       the compare buffer in turn.  */
6285		    for (i = 0 ; i < WORK_BUFFER_SIZE-1 ; i++, d++)
6286		      {
6287			int match;
6288			if (d == dend)
6289			  {
6290			    if (dend == end_match_2)
6291			      break;
6292			    d = string2;
6293			    dend = end_match_2;
6294			  }
6295
6296			/* add next character to the compare buffer.  */
6297			str_buf[i] = TRANSLATE(*d);
6298			str_buf[i+1] = '\0';
6299
6300# ifdef _LIBC
6301			match = __wcscoll (workp, str_buf);
6302# else
6303			match = wcscoll (workp, str_buf);
6304# endif
6305			if (match == 0)
6306			  goto char_set_matched;
6307
6308			if (match < 0)
6309			  /* (str_buf > workp) indicate (str_buf + X > workp),
6310			     because for all X (str_buf + X > str_buf).
6311			     So we don't need continue this loop.  */
6312			  break;
6313
6314			/* Otherwise(str_buf < workp),
6315			   (str_buf+next_character) may equals (workp).
6316			   So we continue this loop.  */
6317		      }
6318		    /* not matched */
6319		    d = backup_d;
6320		    dend = backup_dend;
6321		    workp += length + 1;
6322		  }
6323              }
6324            /* match with equivalence_class?  */
6325# ifdef _LIBC
6326	    if (nrules != 0)
6327	      {
6328                const CHAR_T *backup_d = d, *backup_dend = dend;
6329		/* Try to match the equivalence class against
6330		   those known to the collate implementation.  */
6331		const int32_t *table;
6332		const int32_t *weights;
6333		const int32_t *extra;
6334		const int32_t *indirect;
6335		int32_t idx, idx2;
6336		wint_t *cp;
6337		size_t len;
6338
6339		/* This #include defines a local function!  */
6340#  include <locale/weightwc.h>
6341
6342		table = (const int32_t *)
6343		  _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEWC);
6344		weights = (const wint_t *)
6345		  _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTWC);
6346		extra = (const wint_t *)
6347		  _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAWC);
6348		indirect = (const int32_t *)
6349		  _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTWC);
6350
6351		/* Write 1 collating element to str_buf, and
6352		   get its index.  */
6353		idx2 = 0;
6354
6355		for (i = 0 ; idx2 == 0 && i < WORK_BUFFER_SIZE - 1; i++)
6356		  {
6357		    cp = (wint_t*)str_buf;
6358		    if (d == dend)
6359		      {
6360			if (dend == end_match_2)
6361			  break;
6362			d = string2;
6363			dend = end_match_2;
6364		      }
6365		    str_buf[i] = TRANSLATE(*(d+i));
6366		    str_buf[i+1] = '\0'; /* sentinel */
6367		    idx2 = findidx ((const wint_t**)&cp);
6368		  }
6369
6370		/* Update d, however d will be incremented at
6371		   char_set_matched:, we decrement d here.  */
6372		d = backup_d + ((wchar_t*)cp - (wchar_t*)str_buf - 1);
6373		if (d >= dend)
6374		  {
6375		    if (dend == end_match_2)
6376			d = dend;
6377		    else
6378		      {
6379			d = string2;
6380			dend = end_match_2;
6381		      }
6382		  }
6383
6384		len = weights[idx2];
6385
6386		for (workp2 = workp + equiv_class_length ; workp < workp2 ;
6387		     workp++)
6388		  {
6389		    idx = (int32_t)*workp;
6390		    /* We already checked idx != 0 in regex_compile. */
6391
6392		    if (idx2 != 0 && len == weights[idx])
6393		      {
6394			int cnt = 0;
6395			while (cnt < len && (weights[idx + 1 + cnt]
6396					     == weights[idx2 + 1 + cnt]))
6397			  ++cnt;
6398
6399			if (cnt == len)
6400			  goto char_set_matched;
6401		      }
6402		  }
6403		/* not matched */
6404                d = backup_d;
6405                dend = backup_dend;
6406	      }
6407	    else /* (nrules == 0) */
6408# endif
6409	      /* If we can't look up collation data, we use wcscoll
6410		 instead.  */
6411	      {
6412		for (workp2 = workp + equiv_class_length ; workp < workp2 ;)
6413		  {
6414		    const CHAR_T *backup_d = d, *backup_dend = dend;
6415# ifdef _LIBC
6416		    length = __wcslen (workp);
6417# else
6418		    length = wcslen (workp);
6419# endif
6420
6421		    /* If wcscoll(the collating symbol, whole string) > 0,
6422		       any substring of the string never match with the
6423		       collating symbol.  */
6424# ifdef _LIBC
6425		    if (__wcscoll (workp, d) > 0)
6426# else
6427		    if (wcscoll (workp, d) > 0)
6428# endif
6429		      {
6430			workp += length + 1;
6431			break;
6432		      }
6433
6434		    /* First, we compare the equivalence class with
6435		       the first character of the string.
6436		       If it don't match, we add the next character to
6437		       the compare buffer in turn.  */
6438		    for (i = 0 ; i < WORK_BUFFER_SIZE - 1 ; i++, d++)
6439		      {
6440			int match;
6441			if (d == dend)
6442			  {
6443			    if (dend == end_match_2)
6444			      break;
6445			    d = string2;
6446			    dend = end_match_2;
6447			  }
6448
6449			/* add next character to the compare buffer.  */
6450			str_buf[i] = TRANSLATE(*d);
6451			str_buf[i+1] = '\0';
6452
6453# ifdef _LIBC
6454			match = __wcscoll (workp, str_buf);
6455# else
6456			match = wcscoll (workp, str_buf);
6457# endif
6458
6459			if (match == 0)
6460			  goto char_set_matched;
6461
6462			if (match < 0)
6463			/* (str_buf > workp) indicate (str_buf + X > workp),
6464			   because for all X (str_buf + X > str_buf).
6465			   So we don't need continue this loop.  */
6466			  break;
6467
6468			/* Otherwise(str_buf < workp),
6469			   (str_buf+next_character) may equals (workp).
6470			   So we continue this loop.  */
6471		      }
6472		    /* not matched */
6473		    d = backup_d;
6474		    dend = backup_dend;
6475		    workp += length + 1;
6476		  }
6477	      }
6478
6479            /* match with char_range?  */
6480# ifdef _LIBC
6481	    if (nrules != 0)
6482	      {
6483		uint32_t collseqval;
6484		const char *collseq = (const char *)
6485		  _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQWC);
6486
6487		collseqval = collseq_table_lookup (collseq, c);
6488
6489		for (; workp < p - chars_length ;)
6490		  {
6491		    uint32_t start_val, end_val;
6492
6493		    /* We already compute the collation sequence value
6494		       of the characters (or collating symbols).  */
6495		    start_val = (uint32_t) *workp++; /* range_start */
6496		    end_val = (uint32_t) *workp++; /* range_end */
6497
6498		    if (start_val <= collseqval && collseqval <= end_val)
6499		      goto char_set_matched;
6500		  }
6501	      }
6502	    else
6503# endif
6504	      {
6505		/* We set range_start_char at str_buf[0], range_end_char
6506		   at str_buf[4], and compared char at str_buf[2].  */
6507		str_buf[1] = 0;
6508		str_buf[2] = c;
6509		str_buf[3] = 0;
6510		str_buf[5] = 0;
6511		for (; workp < p - chars_length ;)
6512		  {
6513		    wchar_t *range_start_char, *range_end_char;
6514
6515		    /* match if (range_start_char <= c <= range_end_char).  */
6516
6517		    /* If range_start(or end) < 0, we assume -range_start(end)
6518		       is the offset of the collating symbol which is specified
6519		       as the character of the range start(end).  */
6520
6521		    /* range_start */
6522		    if (*workp < 0)
6523		      range_start_char = charset_top - (*workp++);
6524		    else
6525		      {
6526			str_buf[0] = *workp++;
6527			range_start_char = str_buf;
6528		      }
6529
6530		    /* range_end */
6531		    if (*workp < 0)
6532		      range_end_char = charset_top - (*workp++);
6533		    else
6534		      {
6535			str_buf[4] = *workp++;
6536			range_end_char = str_buf + 4;
6537		      }
6538
6539# ifdef _LIBC
6540		    if (__wcscoll (range_start_char, str_buf+2) <= 0
6541			&& __wcscoll (str_buf+2, range_end_char) <= 0)
6542# else
6543		    if (wcscoll (range_start_char, str_buf+2) <= 0
6544			&& wcscoll (str_buf+2, range_end_char) <= 0)
6545# endif
6546		      goto char_set_matched;
6547		  }
6548	      }
6549
6550            /* match with char?  */
6551	    for (; workp < p ; workp++)
6552	      if (c == *workp)
6553		goto char_set_matched;
6554
6555	    negate = !negate;
6556
6557	  char_set_matched:
6558	    if (negate) goto fail;
6559#else
6560            /* Cast to `unsigned' instead of `unsigned char' in case the
6561               bit list is a full 32 bytes long.  */
6562	    if (c < (unsigned) (*p * BYTEWIDTH)
6563		&& p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
6564	      negate = !negate;
6565
6566	    p += 1 + *p;
6567
6568	    if (!negate) goto fail;
6569#undef WORK_BUFFER_SIZE
6570#endif /* WCHAR */
6571	    SET_REGS_MATCHED ();
6572            d++;
6573	    break;
6574	  }
6575
6576
6577        /* The beginning of a group is represented by start_memory.
6578           The arguments are the register number in the next byte, and the
6579           number of groups inner to this one in the next.  The text
6580           matched within the group is recorded (in the internal
6581           registers data structure) under the register number.  */
6582        case start_memory:
6583	  DEBUG_PRINT3 ("EXECUTING start_memory %ld (%ld):\n",
6584			(long int) *p, (long int) p[1]);
6585
6586          /* Find out if this group can match the empty string.  */
6587	  p1 = p;		/* To send to group_match_null_string_p.  */
6588
6589          if (REG_MATCH_NULL_STRING_P (reg_info[*p]) == MATCH_NULL_UNSET_VALUE)
6590            REG_MATCH_NULL_STRING_P (reg_info[*p])
6591              = PREFIX(group_match_null_string_p) (&p1, pend, reg_info);
6592
6593          /* Save the position in the string where we were the last time
6594             we were at this open-group operator in case the group is
6595             operated upon by a repetition operator, e.g., with `(a*)*b'
6596             against `ab'; then we want to ignore where we are now in
6597             the string in case this attempt to match fails.  */
6598          old_regstart[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p])
6599                             ? REG_UNSET (regstart[*p]) ? d : regstart[*p]
6600                             : regstart[*p];
6601	  DEBUG_PRINT2 ("  old_regstart: %d\n",
6602			 POINTER_TO_OFFSET (old_regstart[*p]));
6603
6604          regstart[*p] = d;
6605	  DEBUG_PRINT2 ("  regstart: %d\n", POINTER_TO_OFFSET (regstart[*p]));
6606
6607          IS_ACTIVE (reg_info[*p]) = 1;
6608          MATCHED_SOMETHING (reg_info[*p]) = 0;
6609
6610	  /* Clear this whenever we change the register activity status.  */
6611	  set_regs_matched_done = 0;
6612
6613          /* This is the new highest active register.  */
6614          highest_active_reg = *p;
6615
6616          /* If nothing was active before, this is the new lowest active
6617             register.  */
6618          if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
6619            lowest_active_reg = *p;
6620
6621          /* Move past the register number and inner group count.  */
6622          p += 2;
6623	  just_past_start_mem = p;
6624
6625          break;
6626
6627
6628        /* The stop_memory opcode represents the end of a group.  Its
6629           arguments are the same as start_memory's: the register
6630           number, and the number of inner groups.  */
6631	case stop_memory:
6632	  DEBUG_PRINT3 ("EXECUTING stop_memory %ld (%ld):\n",
6633			(long int) *p, (long int) p[1]);
6634
6635          /* We need to save the string position the last time we were at
6636             this close-group operator in case the group is operated
6637             upon by a repetition operator, e.g., with `((a*)*(b*)*)*'
6638             against `aba'; then we want to ignore where we are now in
6639             the string in case this attempt to match fails.  */
6640          old_regend[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p])
6641                           ? REG_UNSET (regend[*p]) ? d : regend[*p]
6642			   : regend[*p];
6643	  DEBUG_PRINT2 ("      old_regend: %d\n",
6644			 POINTER_TO_OFFSET (old_regend[*p]));
6645
6646          regend[*p] = d;
6647	  DEBUG_PRINT2 ("      regend: %d\n", POINTER_TO_OFFSET (regend[*p]));
6648
6649          /* This register isn't active anymore.  */
6650          IS_ACTIVE (reg_info[*p]) = 0;
6651
6652	  /* Clear this whenever we change the register activity status.  */
6653	  set_regs_matched_done = 0;
6654
6655          /* If this was the only register active, nothing is active
6656             anymore.  */
6657          if (lowest_active_reg == highest_active_reg)
6658            {
6659              lowest_active_reg = NO_LOWEST_ACTIVE_REG;
6660              highest_active_reg = NO_HIGHEST_ACTIVE_REG;
6661            }
6662          else
6663            { /* We must scan for the new highest active register, since
6664                 it isn't necessarily one less than now: consider
6665                 (a(b)c(d(e)f)g).  When group 3 ends, after the f), the
6666                 new highest active register is 1.  */
6667              UCHAR_T r = *p - 1;
6668              while (r > 0 && !IS_ACTIVE (reg_info[r]))
6669                r--;
6670
6671              /* If we end up at register zero, that means that we saved
6672                 the registers as the result of an `on_failure_jump', not
6673                 a `start_memory', and we jumped to past the innermost
6674                 `stop_memory'.  For example, in ((.)*) we save
6675                 registers 1 and 2 as a result of the *, but when we pop
6676                 back to the second ), we are at the stop_memory 1.
6677                 Thus, nothing is active.  */
6678	      if (r == 0)
6679                {
6680                  lowest_active_reg = NO_LOWEST_ACTIVE_REG;
6681                  highest_active_reg = NO_HIGHEST_ACTIVE_REG;
6682                }
6683              else
6684                highest_active_reg = r;
6685            }
6686
6687          /* If just failed to match something this time around with a
6688             group that's operated on by a repetition operator, try to
6689             force exit from the ``loop'', and restore the register
6690             information for this group that we had before trying this
6691             last match.  */
6692          if ((!MATCHED_SOMETHING (reg_info[*p])
6693               || just_past_start_mem == p - 1)
6694	      && (p + 2) < pend)
6695            {
6696              boolean is_a_jump_n = false;
6697
6698              p1 = p + 2;
6699              mcnt = 0;
6700              switch ((re_opcode_t) *p1++)
6701                {
6702                  case jump_n:
6703		    is_a_jump_n = true;
6704		    /* Fall through.  */
6705                  case pop_failure_jump:
6706		  case maybe_pop_jump:
6707		  case jump:
6708		  case dummy_failure_jump:
6709                    EXTRACT_NUMBER_AND_INCR (mcnt, p1);
6710		    if (is_a_jump_n)
6711		      p1 += OFFSET_ADDRESS_SIZE;
6712                    break;
6713
6714                  default:
6715                    /* do nothing */ ;
6716                }
6717	      p1 += mcnt;
6718
6719              /* If the next operation is a jump backwards in the pattern
6720	         to an on_failure_jump right before the start_memory
6721                 corresponding to this stop_memory, exit from the loop
6722                 by forcing a failure after pushing on the stack the
6723                 on_failure_jump's jump in the pattern, and d.  */
6724              if (mcnt < 0 && (re_opcode_t) *p1 == on_failure_jump
6725                  && (re_opcode_t) p1[1+OFFSET_ADDRESS_SIZE] == start_memory
6726		  && p1[2+OFFSET_ADDRESS_SIZE] == *p)
6727		{
6728                  /* If this group ever matched anything, then restore
6729                     what its registers were before trying this last
6730                     failed match, e.g., with `(a*)*b' against `ab' for
6731                     regstart[1], and, e.g., with `((a*)*(b*)*)*'
6732                     against `aba' for regend[3].
6733
6734                     Also restore the registers for inner groups for,
6735                     e.g., `((a*)(b*))*' against `aba' (register 3 would
6736                     otherwise get trashed).  */
6737
6738                  if (EVER_MATCHED_SOMETHING (reg_info[*p]))
6739		    {
6740		      unsigned r;
6741
6742                      EVER_MATCHED_SOMETHING (reg_info[*p]) = 0;
6743
6744		      /* Restore this and inner groups' (if any) registers.  */
6745                      for (r = *p; r < (unsigned) *p + (unsigned) *(p + 1);
6746			   r++)
6747                        {
6748                          regstart[r] = old_regstart[r];
6749
6750                          /* xx why this test?  */
6751                          if (old_regend[r] >= regstart[r])
6752                            regend[r] = old_regend[r];
6753                        }
6754                    }
6755		  p1++;
6756                  EXTRACT_NUMBER_AND_INCR (mcnt, p1);
6757                  PUSH_FAILURE_POINT (p1 + mcnt, d, -2);
6758
6759                  goto fail;
6760                }
6761            }
6762
6763          /* Move past the register number and the inner group count.  */
6764          p += 2;
6765          break;
6766
6767
6768	/* \<digit> has been turned into a `duplicate' command which is
6769           followed by the numeric value of <digit> as the register number.  */
6770        case duplicate:
6771	  {
6772	    register const CHAR_T *d2, *dend2;
6773	    int regno = *p++;   /* Get which register to match against.  */
6774	    DEBUG_PRINT2 ("EXECUTING duplicate %d.\n", regno);
6775
6776	    /* Can't back reference a group which we've never matched.  */
6777            if (REG_UNSET (regstart[regno]) || REG_UNSET (regend[regno]))
6778              goto fail;
6779
6780            /* Where in input to try to start matching.  */
6781            d2 = regstart[regno];
6782
6783            /* Where to stop matching; if both the place to start and
6784               the place to stop matching are in the same string, then
6785               set to the place to stop, otherwise, for now have to use
6786               the end of the first string.  */
6787
6788            dend2 = ((FIRST_STRING_P (regstart[regno])
6789		      == FIRST_STRING_P (regend[regno]))
6790		     ? regend[regno] : end_match_1);
6791	    for (;;)
6792	      {
6793		/* If necessary, advance to next segment in register
6794                   contents.  */
6795		while (d2 == dend2)
6796		  {
6797		    if (dend2 == end_match_2) break;
6798		    if (dend2 == regend[regno]) break;
6799
6800                    /* End of string1 => advance to string2. */
6801                    d2 = string2;
6802                    dend2 = regend[regno];
6803		  }
6804		/* At end of register contents => success */
6805		if (d2 == dend2) break;
6806
6807		/* If necessary, advance to next segment in data.  */
6808		PREFETCH ();
6809
6810		/* How many characters left in this segment to match.  */
6811		mcnt = dend - d;
6812
6813		/* Want how many consecutive characters we can match in
6814                   one shot, so, if necessary, adjust the count.  */
6815                if (mcnt > dend2 - d2)
6816		  mcnt = dend2 - d2;
6817
6818		/* Compare that many; failure if mismatch, else move
6819                   past them.  */
6820		if (translate
6821                    ? PREFIX(bcmp_translate) (d, d2, mcnt, translate)
6822                    : memcmp (d, d2, mcnt*sizeof(UCHAR_T)))
6823		  goto fail;
6824		d += mcnt, d2 += mcnt;
6825
6826		/* Do this because we've match some characters.  */
6827		SET_REGS_MATCHED ();
6828	      }
6829	  }
6830	  break;
6831
6832
6833        /* begline matches the empty string at the beginning of the string
6834           (unless `not_bol' is set in `bufp'), and, if
6835           `newline_anchor' is set, after newlines.  */
6836	case begline:
6837          DEBUG_PRINT1 ("EXECUTING begline.\n");
6838
6839          if (AT_STRINGS_BEG (d))
6840            {
6841              if (!bufp->not_bol) break;
6842            }
6843          else if (d[-1] == '\n' && bufp->newline_anchor)
6844            {
6845              break;
6846            }
6847          /* In all other cases, we fail.  */
6848          goto fail;
6849
6850
6851        /* endline is the dual of begline.  */
6852	case endline:
6853          DEBUG_PRINT1 ("EXECUTING endline.\n");
6854
6855          if (AT_STRINGS_END (d))
6856            {
6857              if (!bufp->not_eol) break;
6858            }
6859
6860          /* We have to ``prefetch'' the next character.  */
6861          else if ((d == end1 ? *string2 : *d) == '\n'
6862                   && bufp->newline_anchor)
6863            {
6864              break;
6865            }
6866          goto fail;
6867
6868
6869	/* Match at the very beginning of the data.  */
6870        case begbuf:
6871          DEBUG_PRINT1 ("EXECUTING begbuf.\n");
6872          if (AT_STRINGS_BEG (d))
6873            break;
6874          goto fail;
6875
6876
6877	/* Match at the very end of the data.  */
6878        case endbuf:
6879          DEBUG_PRINT1 ("EXECUTING endbuf.\n");
6880	  if (AT_STRINGS_END (d))
6881	    break;
6882          goto fail;
6883
6884
6885        /* on_failure_keep_string_jump is used to optimize `.*\n'.  It
6886           pushes NULL as the value for the string on the stack.  Then
6887           `pop_failure_point' will keep the current value for the
6888           string, instead of restoring it.  To see why, consider
6889           matching `foo\nbar' against `.*\n'.  The .* matches the foo;
6890           then the . fails against the \n.  But the next thing we want
6891           to do is match the \n against the \n; if we restored the
6892           string value, we would be back at the foo.
6893
6894           Because this is used only in specific cases, we don't need to
6895           check all the things that `on_failure_jump' does, to make
6896           sure the right things get saved on the stack.  Hence we don't
6897           share its code.  The only reason to push anything on the
6898           stack at all is that otherwise we would have to change
6899           `anychar's code to do something besides goto fail in this
6900           case; that seems worse than this.  */
6901        case on_failure_keep_string_jump:
6902          DEBUG_PRINT1 ("EXECUTING on_failure_keep_string_jump");
6903
6904          EXTRACT_NUMBER_AND_INCR (mcnt, p);
6905#ifdef _LIBC
6906          DEBUG_PRINT3 (" %d (to %p):\n", mcnt, p + mcnt);
6907#else
6908          DEBUG_PRINT3 (" %d (to 0x%x):\n", mcnt, p + mcnt);
6909#endif
6910
6911          PUSH_FAILURE_POINT (p + mcnt, NULL, -2);
6912          break;
6913
6914
6915	/* Uses of on_failure_jump:
6916
6917           Each alternative starts with an on_failure_jump that points
6918           to the beginning of the next alternative.  Each alternative
6919           except the last ends with a jump that in effect jumps past
6920           the rest of the alternatives.  (They really jump to the
6921           ending jump of the following alternative, because tensioning
6922           these jumps is a hassle.)
6923
6924           Repeats start with an on_failure_jump that points past both
6925           the repetition text and either the following jump or
6926           pop_failure_jump back to this on_failure_jump.  */
6927	case on_failure_jump:
6928        on_failure:
6929          DEBUG_PRINT1 ("EXECUTING on_failure_jump");
6930
6931          EXTRACT_NUMBER_AND_INCR (mcnt, p);
6932#ifdef _LIBC
6933          DEBUG_PRINT3 (" %d (to %p)", mcnt, p + mcnt);
6934#else
6935          DEBUG_PRINT3 (" %d (to 0x%x)", mcnt, p + mcnt);
6936#endif
6937
6938          /* If this on_failure_jump comes right before a group (i.e.,
6939             the original * applied to a group), save the information
6940             for that group and all inner ones, so that if we fail back
6941             to this point, the group's information will be correct.
6942             For example, in \(a*\)*\1, we need the preceding group,
6943             and in \(zz\(a*\)b*\)\2, we need the inner group.  */
6944
6945          /* We can't use `p' to check ahead because we push
6946             a failure point to `p + mcnt' after we do this.  */
6947          p1 = p;
6948
6949          /* We need to skip no_op's before we look for the
6950             start_memory in case this on_failure_jump is happening as
6951             the result of a completed succeed_n, as in \(a\)\{1,3\}b\1
6952             against aba.  */
6953          while (p1 < pend && (re_opcode_t) *p1 == no_op)
6954            p1++;
6955
6956          if (p1 < pend && (re_opcode_t) *p1 == start_memory)
6957            {
6958              /* We have a new highest active register now.  This will
6959                 get reset at the start_memory we are about to get to,
6960                 but we will have saved all the registers relevant to
6961                 this repetition op, as described above.  */
6962              highest_active_reg = *(p1 + 1) + *(p1 + 2);
6963              if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
6964                lowest_active_reg = *(p1 + 1);
6965            }
6966
6967          DEBUG_PRINT1 (":\n");
6968          PUSH_FAILURE_POINT (p + mcnt, d, -2);
6969          break;
6970
6971
6972        /* A smart repeat ends with `maybe_pop_jump'.
6973	   We change it to either `pop_failure_jump' or `jump'.  */
6974        case maybe_pop_jump:
6975          EXTRACT_NUMBER_AND_INCR (mcnt, p);
6976          DEBUG_PRINT2 ("EXECUTING maybe_pop_jump %d.\n", mcnt);
6977          {
6978	    register UCHAR_T *p2 = p;
6979
6980            /* Compare the beginning of the repeat with what in the
6981               pattern follows its end. If we can establish that there
6982               is nothing that they would both match, i.e., that we
6983               would have to backtrack because of (as in, e.g., `a*a')
6984               then we can change to pop_failure_jump, because we'll
6985               never have to backtrack.
6986
6987               This is not true in the case of alternatives: in
6988               `(a|ab)*' we do need to backtrack to the `ab' alternative
6989               (e.g., if the string was `ab').  But instead of trying to
6990               detect that here, the alternative has put on a dummy
6991               failure point which is what we will end up popping.  */
6992
6993	    /* Skip over open/close-group commands.
6994	       If what follows this loop is a ...+ construct,
6995	       look at what begins its body, since we will have to
6996	       match at least one of that.  */
6997	    while (1)
6998	      {
6999		if (p2 + 2 < pend
7000		    && ((re_opcode_t) *p2 == stop_memory
7001			|| (re_opcode_t) *p2 == start_memory))
7002		  p2 += 3;
7003		else if (p2 + 2 + 2 * OFFSET_ADDRESS_SIZE < pend
7004			 && (re_opcode_t) *p2 == dummy_failure_jump)
7005		  p2 += 2 + 2 * OFFSET_ADDRESS_SIZE;
7006		else
7007		  break;
7008	      }
7009
7010	    p1 = p + mcnt;
7011	    /* p1[0] ... p1[2] are the `on_failure_jump' corresponding
7012	       to the `maybe_finalize_jump' of this case.  Examine what
7013	       follows.  */
7014
7015            /* If we're at the end of the pattern, we can change.  */
7016            if (p2 == pend)
7017	      {
7018		/* Consider what happens when matching ":\(.*\)"
7019		   against ":/".  I don't really understand this code
7020		   yet.  */
7021  	        p[-(1+OFFSET_ADDRESS_SIZE)] = (UCHAR_T)
7022		  pop_failure_jump;
7023                DEBUG_PRINT1
7024                  ("  End of pattern: change to `pop_failure_jump'.\n");
7025              }
7026
7027            else if ((re_opcode_t) *p2 == exactn
7028#ifdef MBS_SUPPORT
7029		     || (re_opcode_t) *p2 == exactn_bin
7030#endif
7031		     || (bufp->newline_anchor && (re_opcode_t) *p2 == endline))
7032	      {
7033		register UCHAR_T c
7034                  = *p2 == (UCHAR_T) endline ? '\n' : p2[2];
7035
7036                if (((re_opcode_t) p1[1+OFFSET_ADDRESS_SIZE] == exactn
7037#ifdef MBS_SUPPORT
7038		     || (re_opcode_t) p1[1+OFFSET_ADDRESS_SIZE] == exactn_bin
7039#endif
7040		    ) && p1[3+OFFSET_ADDRESS_SIZE] != c)
7041                  {
7042  		    p[-(1+OFFSET_ADDRESS_SIZE)] = (UCHAR_T)
7043		      pop_failure_jump;
7044#ifdef WCHAR
7045		      DEBUG_PRINT3 ("  %C != %C => pop_failure_jump.\n",
7046				    (wint_t) c,
7047				    (wint_t) p1[3+OFFSET_ADDRESS_SIZE]);
7048#else
7049		      DEBUG_PRINT3 ("  %c != %c => pop_failure_jump.\n",
7050				    (char) c,
7051				    (char) p1[3+OFFSET_ADDRESS_SIZE]);
7052#endif
7053                  }
7054
7055#ifndef WCHAR
7056		else if ((re_opcode_t) p1[3] == charset
7057			 || (re_opcode_t) p1[3] == charset_not)
7058		  {
7059		    int negate = (re_opcode_t) p1[3] == charset_not;
7060
7061		    if (c < (unsigned) (p1[4] * BYTEWIDTH)
7062			&& p1[5 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
7063		      negate = !negate;
7064
7065                    /* `negate' is equal to 1 if c would match, which means
7066                        that we can't change to pop_failure_jump.  */
7067		    if (!negate)
7068                      {
7069  		        p[-3] = (unsigned char) pop_failure_jump;
7070                        DEBUG_PRINT1 ("  No match => pop_failure_jump.\n");
7071                      }
7072		  }
7073#endif /* not WCHAR */
7074	      }
7075#ifndef WCHAR
7076            else if ((re_opcode_t) *p2 == charset)
7077	      {
7078		/* We win if the first character of the loop is not part
7079                   of the charset.  */
7080                if ((re_opcode_t) p1[3] == exactn
7081 		    && ! ((int) p2[1] * BYTEWIDTH > (int) p1[5]
7082 			  && (p2[2 + p1[5] / BYTEWIDTH]
7083 			      & (1 << (p1[5] % BYTEWIDTH)))))
7084		  {
7085		    p[-3] = (unsigned char) pop_failure_jump;
7086		    DEBUG_PRINT1 ("  No match => pop_failure_jump.\n");
7087                  }
7088
7089		else if ((re_opcode_t) p1[3] == charset_not)
7090		  {
7091		    int idx;
7092		    /* We win if the charset_not inside the loop
7093		       lists every character listed in the charset after.  */
7094		    for (idx = 0; idx < (int) p2[1]; idx++)
7095		      if (! (p2[2 + idx] == 0
7096			     || (idx < (int) p1[4]
7097				 && ((p2[2 + idx] & ~ p1[5 + idx]) == 0))))
7098			break;
7099
7100		    if (idx == p2[1])
7101                      {
7102  		        p[-3] = (unsigned char) pop_failure_jump;
7103                        DEBUG_PRINT1 ("  No match => pop_failure_jump.\n");
7104                      }
7105		  }
7106		else if ((re_opcode_t) p1[3] == charset)
7107		  {
7108		    int idx;
7109		    /* We win if the charset inside the loop
7110		       has no overlap with the one after the loop.  */
7111		    for (idx = 0;
7112			 idx < (int) p2[1] && idx < (int) p1[4];
7113			 idx++)
7114		      if ((p2[2 + idx] & p1[5 + idx]) != 0)
7115			break;
7116
7117		    if (idx == p2[1] || idx == p1[4])
7118                      {
7119  		        p[-3] = (unsigned char) pop_failure_jump;
7120                        DEBUG_PRINT1 ("  No match => pop_failure_jump.\n");
7121                      }
7122		  }
7123	      }
7124#endif /* not WCHAR */
7125	  }
7126	  p -= OFFSET_ADDRESS_SIZE;	/* Point at relative address again.  */
7127	  if ((re_opcode_t) p[-1] != pop_failure_jump)
7128	    {
7129	      p[-1] = (UCHAR_T) jump;
7130              DEBUG_PRINT1 ("  Match => jump.\n");
7131	      goto unconditional_jump;
7132	    }
7133        /* Fall through.  */
7134
7135
7136	/* The end of a simple repeat has a pop_failure_jump back to
7137           its matching on_failure_jump, where the latter will push a
7138           failure point.  The pop_failure_jump takes off failure
7139           points put on by this pop_failure_jump's matching
7140           on_failure_jump; we got through the pattern to here from the
7141           matching on_failure_jump, so didn't fail.  */
7142        case pop_failure_jump:
7143          {
7144            /* We need to pass separate storage for the lowest and
7145               highest registers, even though we don't care about the
7146               actual values.  Otherwise, we will restore only one
7147               register from the stack, since lowest will == highest in
7148               `pop_failure_point'.  */
7149            active_reg_t dummy_low_reg, dummy_high_reg;
7150            UCHAR_T *pdummy ATTRIBUTE_UNUSED = NULL;
7151            const CHAR_T *sdummy ATTRIBUTE_UNUSED = NULL;
7152
7153            DEBUG_PRINT1 ("EXECUTING pop_failure_jump.\n");
7154            POP_FAILURE_POINT (sdummy, pdummy,
7155                               dummy_low_reg, dummy_high_reg,
7156                               reg_dummy, reg_dummy, reg_info_dummy);
7157          }
7158	  /* Fall through.  */
7159
7160	unconditional_jump:
7161#ifdef _LIBC
7162	  DEBUG_PRINT2 ("\n%p: ", p);
7163#else
7164	  DEBUG_PRINT2 ("\n0x%x: ", p);
7165#endif
7166          /* Note fall through.  */
7167
7168        /* Unconditionally jump (without popping any failure points).  */
7169        case jump:
7170	  EXTRACT_NUMBER_AND_INCR (mcnt, p);	/* Get the amount to jump.  */
7171          DEBUG_PRINT2 ("EXECUTING jump %d ", mcnt);
7172	  p += mcnt;				/* Do the jump.  */
7173#ifdef _LIBC
7174          DEBUG_PRINT2 ("(to %p).\n", p);
7175#else
7176          DEBUG_PRINT2 ("(to 0x%x).\n", p);
7177#endif
7178	  break;
7179
7180
7181        /* We need this opcode so we can detect where alternatives end
7182           in `group_match_null_string_p' et al.  */
7183        case jump_past_alt:
7184          DEBUG_PRINT1 ("EXECUTING jump_past_alt.\n");
7185          goto unconditional_jump;
7186
7187
7188        /* Normally, the on_failure_jump pushes a failure point, which
7189           then gets popped at pop_failure_jump.  We will end up at
7190           pop_failure_jump, also, and with a pattern of, say, `a+', we
7191           are skipping over the on_failure_jump, so we have to push
7192           something meaningless for pop_failure_jump to pop.  */
7193        case dummy_failure_jump:
7194          DEBUG_PRINT1 ("EXECUTING dummy_failure_jump.\n");
7195          /* It doesn't matter what we push for the string here.  What
7196             the code at `fail' tests is the value for the pattern.  */
7197          PUSH_FAILURE_POINT (NULL, NULL, -2);
7198          goto unconditional_jump;
7199
7200
7201        /* At the end of an alternative, we need to push a dummy failure
7202           point in case we are followed by a `pop_failure_jump', because
7203           we don't want the failure point for the alternative to be
7204           popped.  For example, matching `(a|ab)*' against `aab'
7205           requires that we match the `ab' alternative.  */
7206        case push_dummy_failure:
7207          DEBUG_PRINT1 ("EXECUTING push_dummy_failure.\n");
7208          /* See comments just above at `dummy_failure_jump' about the
7209             two zeroes.  */
7210          PUSH_FAILURE_POINT (NULL, NULL, -2);
7211          break;
7212
7213        /* Have to succeed matching what follows at least n times.
7214           After that, handle like `on_failure_jump'.  */
7215        case succeed_n:
7216          EXTRACT_NUMBER (mcnt, p + OFFSET_ADDRESS_SIZE);
7217          DEBUG_PRINT2 ("EXECUTING succeed_n %d.\n", mcnt);
7218
7219          assert (mcnt >= 0);
7220          /* Originally, this is how many times we HAVE to succeed.  */
7221          if (mcnt > 0)
7222            {
7223               mcnt--;
7224	       p += OFFSET_ADDRESS_SIZE;
7225               STORE_NUMBER_AND_INCR (p, mcnt);
7226#ifdef _LIBC
7227               DEBUG_PRINT3 ("  Setting %p to %d.\n", p - OFFSET_ADDRESS_SIZE
7228			     , mcnt);
7229#else
7230               DEBUG_PRINT3 ("  Setting 0x%x to %d.\n", p - OFFSET_ADDRESS_SIZE
7231			     , mcnt);
7232#endif
7233            }
7234	  else if (mcnt == 0)
7235            {
7236#ifdef _LIBC
7237              DEBUG_PRINT2 ("  Setting two bytes from %p to no_op.\n",
7238			    p + OFFSET_ADDRESS_SIZE);
7239#else
7240              DEBUG_PRINT2 ("  Setting two bytes from 0x%x to no_op.\n",
7241			    p + OFFSET_ADDRESS_SIZE);
7242#endif /* _LIBC */
7243
7244#ifdef WCHAR
7245	      p[1] = (UCHAR_T) no_op;
7246#else
7247	      p[2] = (UCHAR_T) no_op;
7248              p[3] = (UCHAR_T) no_op;
7249#endif /* WCHAR */
7250              goto on_failure;
7251            }
7252          break;
7253
7254        case jump_n:
7255          EXTRACT_NUMBER (mcnt, p + OFFSET_ADDRESS_SIZE);
7256          DEBUG_PRINT2 ("EXECUTING jump_n %d.\n", mcnt);
7257
7258          /* Originally, this is how many times we CAN jump.  */
7259          if (mcnt)
7260            {
7261               mcnt--;
7262               STORE_NUMBER (p + OFFSET_ADDRESS_SIZE, mcnt);
7263
7264#ifdef _LIBC
7265               DEBUG_PRINT3 ("  Setting %p to %d.\n", p + OFFSET_ADDRESS_SIZE,
7266			     mcnt);
7267#else
7268               DEBUG_PRINT3 ("  Setting 0x%x to %d.\n", p + OFFSET_ADDRESS_SIZE,
7269			     mcnt);
7270#endif /* _LIBC */
7271	       goto unconditional_jump;
7272            }
7273          /* If don't have to jump any more, skip over the rest of command.  */
7274	  else
7275	    p += 2 * OFFSET_ADDRESS_SIZE;
7276          break;
7277
7278	case set_number_at:
7279	  {
7280            DEBUG_PRINT1 ("EXECUTING set_number_at.\n");
7281
7282            EXTRACT_NUMBER_AND_INCR (mcnt, p);
7283            p1 = p + mcnt;
7284            EXTRACT_NUMBER_AND_INCR (mcnt, p);
7285#ifdef _LIBC
7286            DEBUG_PRINT3 ("  Setting %p to %d.\n", p1, mcnt);
7287#else
7288            DEBUG_PRINT3 ("  Setting 0x%x to %d.\n", p1, mcnt);
7289#endif
7290	    STORE_NUMBER (p1, mcnt);
7291            break;
7292          }
7293
7294#if 0
7295	/* The DEC Alpha C compiler 3.x generates incorrect code for the
7296	   test  WORDCHAR_P (d - 1) != WORDCHAR_P (d)  in the expansion of
7297	   AT_WORD_BOUNDARY, so this code is disabled.  Expanding the
7298	   macro and introducing temporary variables works around the bug.  */
7299
7300	case wordbound:
7301	  DEBUG_PRINT1 ("EXECUTING wordbound.\n");
7302	  if (AT_WORD_BOUNDARY (d))
7303	    break;
7304	  goto fail;
7305
7306	case notwordbound:
7307	  DEBUG_PRINT1 ("EXECUTING notwordbound.\n");
7308	  if (AT_WORD_BOUNDARY (d))
7309	    goto fail;
7310	  break;
7311#else
7312	case wordbound:
7313	{
7314	  boolean prevchar, thischar;
7315
7316	  DEBUG_PRINT1 ("EXECUTING wordbound.\n");
7317	  if (AT_STRINGS_BEG (d) || AT_STRINGS_END (d))
7318	    break;
7319
7320	  prevchar = WORDCHAR_P (d - 1);
7321	  thischar = WORDCHAR_P (d);
7322	  if (prevchar != thischar)
7323	    break;
7324	  goto fail;
7325	}
7326
7327      case notwordbound:
7328	{
7329	  boolean prevchar, thischar;
7330
7331	  DEBUG_PRINT1 ("EXECUTING notwordbound.\n");
7332	  if (AT_STRINGS_BEG (d) || AT_STRINGS_END (d))
7333	    goto fail;
7334
7335	  prevchar = WORDCHAR_P (d - 1);
7336	  thischar = WORDCHAR_P (d);
7337	  if (prevchar != thischar)
7338	    goto fail;
7339	  break;
7340	}
7341#endif
7342
7343	case wordbeg:
7344          DEBUG_PRINT1 ("EXECUTING wordbeg.\n");
7345	  if (!AT_STRINGS_END (d) && WORDCHAR_P (d)
7346	      && (AT_STRINGS_BEG (d) || !WORDCHAR_P (d - 1)))
7347	    break;
7348          goto fail;
7349
7350	case wordend:
7351          DEBUG_PRINT1 ("EXECUTING wordend.\n");
7352	  if (!AT_STRINGS_BEG (d) && WORDCHAR_P (d - 1)
7353              && (AT_STRINGS_END (d) || !WORDCHAR_P (d)))
7354	    break;
7355          goto fail;
7356
7357#ifdef emacs
7358  	case before_dot:
7359          DEBUG_PRINT1 ("EXECUTING before_dot.\n");
7360 	  if (PTR_CHAR_POS ((unsigned char *) d) >= point)
7361  	    goto fail;
7362  	  break;
7363
7364  	case at_dot:
7365          DEBUG_PRINT1 ("EXECUTING at_dot.\n");
7366 	  if (PTR_CHAR_POS ((unsigned char *) d) != point)
7367  	    goto fail;
7368  	  break;
7369
7370  	case after_dot:
7371          DEBUG_PRINT1 ("EXECUTING after_dot.\n");
7372          if (PTR_CHAR_POS ((unsigned char *) d) <= point)
7373  	    goto fail;
7374  	  break;
7375
7376	case syntaxspec:
7377          DEBUG_PRINT2 ("EXECUTING syntaxspec %d.\n", mcnt);
7378	  mcnt = *p++;
7379	  goto matchsyntax;
7380
7381        case wordchar:
7382          DEBUG_PRINT1 ("EXECUTING Emacs wordchar.\n");
7383	  mcnt = (int) Sword;
7384        matchsyntax:
7385	  PREFETCH ();
7386	  /* Can't use *d++ here; SYNTAX may be an unsafe macro.  */
7387	  d++;
7388	  if (SYNTAX (d[-1]) != (enum syntaxcode) mcnt)
7389	    goto fail;
7390          SET_REGS_MATCHED ();
7391	  break;
7392
7393	case notsyntaxspec:
7394          DEBUG_PRINT2 ("EXECUTING notsyntaxspec %d.\n", mcnt);
7395	  mcnt = *p++;
7396	  goto matchnotsyntax;
7397
7398        case notwordchar:
7399          DEBUG_PRINT1 ("EXECUTING Emacs notwordchar.\n");
7400	  mcnt = (int) Sword;
7401        matchnotsyntax:
7402	  PREFETCH ();
7403	  /* Can't use *d++ here; SYNTAX may be an unsafe macro.  */
7404	  d++;
7405	  if (SYNTAX (d[-1]) == (enum syntaxcode) mcnt)
7406	    goto fail;
7407	  SET_REGS_MATCHED ();
7408          break;
7409
7410#else /* not emacs */
7411	case wordchar:
7412          DEBUG_PRINT1 ("EXECUTING non-Emacs wordchar.\n");
7413	  PREFETCH ();
7414          if (!WORDCHAR_P (d))
7415            goto fail;
7416	  SET_REGS_MATCHED ();
7417          d++;
7418	  break;
7419
7420	case notwordchar:
7421          DEBUG_PRINT1 ("EXECUTING non-Emacs notwordchar.\n");
7422	  PREFETCH ();
7423	  if (WORDCHAR_P (d))
7424            goto fail;
7425          SET_REGS_MATCHED ();
7426          d++;
7427	  break;
7428#endif /* not emacs */
7429
7430        default:
7431          abort ();
7432	}
7433      continue;  /* Successfully executed one pattern command; keep going.  */
7434
7435
7436    /* We goto here if a matching operation fails. */
7437    fail:
7438      if (!FAIL_STACK_EMPTY ())
7439	{ /* A restart point is known.  Restore to that state.  */
7440          DEBUG_PRINT1 ("\nFAIL:\n");
7441          POP_FAILURE_POINT (d, p,
7442                             lowest_active_reg, highest_active_reg,
7443                             regstart, regend, reg_info);
7444
7445          /* If this failure point is a dummy, try the next one.  */
7446          if (!p)
7447	    goto fail;
7448
7449          /* If we failed to the end of the pattern, don't examine *p.  */
7450	  assert (p <= pend);
7451          if (p < pend)
7452            {
7453              boolean is_a_jump_n = false;
7454
7455              /* If failed to a backwards jump that's part of a repetition
7456                 loop, need to pop this failure point and use the next one.  */
7457              switch ((re_opcode_t) *p)
7458                {
7459                case jump_n:
7460                  is_a_jump_n = true;
7461		  /* Fall through.  */
7462                case maybe_pop_jump:
7463                case pop_failure_jump:
7464                case jump:
7465                  p1 = p + 1;
7466                  EXTRACT_NUMBER_AND_INCR (mcnt, p1);
7467                  p1 += mcnt;
7468
7469                  if ((is_a_jump_n && (re_opcode_t) *p1 == succeed_n)
7470                      || (!is_a_jump_n
7471                          && (re_opcode_t) *p1 == on_failure_jump))
7472                    goto fail;
7473                  break;
7474                default:
7475                  /* do nothing */ ;
7476                }
7477            }
7478
7479          if (d >= string1 && d <= end1)
7480	    dend = end_match_1;
7481        }
7482      else
7483        break;   /* Matching at this starting point really fails.  */
7484    } /* for (;;) */
7485
7486  if (best_regs_set)
7487    goto restore_best_regs;
7488
7489  FREE_VARIABLES ();
7490
7491  return -1;         			/* Failure to match.  */
7492} /* re_match_2 */
7493
7494/* Subroutine definitions for re_match_2.  */
7495
7496
7497/* We are passed P pointing to a register number after a start_memory.
7498
7499   Return true if the pattern up to the corresponding stop_memory can
7500   match the empty string, and false otherwise.
7501
7502   If we find the matching stop_memory, sets P to point to one past its number.
7503   Otherwise, sets P to an undefined byte less than or equal to END.
7504
7505   We don't handle duplicates properly (yet).  */
7506
7507static boolean
7508PREFIX(group_match_null_string_p) (UCHAR_T **p, UCHAR_T *end,
7509                                   PREFIX(register_info_type) *reg_info)
7510{
7511  int mcnt;
7512  /* Point to after the args to the start_memory.  */
7513  UCHAR_T *p1 = *p + 2;
7514
7515  while (p1 < end)
7516    {
7517      /* Skip over opcodes that can match nothing, and return true or
7518	 false, as appropriate, when we get to one that can't, or to the
7519         matching stop_memory.  */
7520
7521      switch ((re_opcode_t) *p1)
7522        {
7523        /* Could be either a loop or a series of alternatives.  */
7524        case on_failure_jump:
7525          p1++;
7526          EXTRACT_NUMBER_AND_INCR (mcnt, p1);
7527
7528          /* If the next operation is not a jump backwards in the
7529	     pattern.  */
7530
7531	  if (mcnt >= 0)
7532	    {
7533              /* Go through the on_failure_jumps of the alternatives,
7534                 seeing if any of the alternatives cannot match nothing.
7535                 The last alternative starts with only a jump,
7536                 whereas the rest start with on_failure_jump and end
7537                 with a jump, e.g., here is the pattern for `a|b|c':
7538
7539                 /on_failure_jump/0/6/exactn/1/a/jump_past_alt/0/6
7540                 /on_failure_jump/0/6/exactn/1/b/jump_past_alt/0/3
7541                 /exactn/1/c
7542
7543                 So, we have to first go through the first (n-1)
7544                 alternatives and then deal with the last one separately.  */
7545
7546
7547              /* Deal with the first (n-1) alternatives, which start
7548                 with an on_failure_jump (see above) that jumps to right
7549                 past a jump_past_alt.  */
7550
7551              while ((re_opcode_t) p1[mcnt-(1+OFFSET_ADDRESS_SIZE)] ==
7552		     jump_past_alt)
7553                {
7554                  /* `mcnt' holds how many bytes long the alternative
7555                     is, including the ending `jump_past_alt' and
7556                     its number.  */
7557
7558                  if (!PREFIX(alt_match_null_string_p) (p1, p1 + mcnt -
7559						(1 + OFFSET_ADDRESS_SIZE),
7560						reg_info))
7561                    return false;
7562
7563                  /* Move to right after this alternative, including the
7564		     jump_past_alt.  */
7565                  p1 += mcnt;
7566
7567                  /* Break if it's the beginning of an n-th alternative
7568                     that doesn't begin with an on_failure_jump.  */
7569                  if ((re_opcode_t) *p1 != on_failure_jump)
7570                    break;
7571
7572		  /* Still have to check that it's not an n-th
7573		     alternative that starts with an on_failure_jump.  */
7574		  p1++;
7575                  EXTRACT_NUMBER_AND_INCR (mcnt, p1);
7576                  if ((re_opcode_t) p1[mcnt-(1+OFFSET_ADDRESS_SIZE)] !=
7577		      jump_past_alt)
7578                    {
7579		      /* Get to the beginning of the n-th alternative.  */
7580                      p1 -= 1 + OFFSET_ADDRESS_SIZE;
7581                      break;
7582                    }
7583                }
7584
7585              /* Deal with the last alternative: go back and get number
7586                 of the `jump_past_alt' just before it.  `mcnt' contains
7587                 the length of the alternative.  */
7588              EXTRACT_NUMBER (mcnt, p1 - OFFSET_ADDRESS_SIZE);
7589
7590              if (!PREFIX(alt_match_null_string_p) (p1, p1 + mcnt, reg_info))
7591                return false;
7592
7593              p1 += mcnt;	/* Get past the n-th alternative.  */
7594            } /* if mcnt > 0 */
7595          break;
7596
7597
7598        case stop_memory:
7599	  assert (p1[1] == **p);
7600          *p = p1 + 2;
7601          return true;
7602
7603
7604        default:
7605          if (!PREFIX(common_op_match_null_string_p) (&p1, end, reg_info))
7606            return false;
7607        }
7608    } /* while p1 < end */
7609
7610  return false;
7611} /* group_match_null_string_p */
7612
7613
7614/* Similar to group_match_null_string_p, but doesn't deal with alternatives:
7615   It expects P to be the first byte of a single alternative and END one
7616   byte past the last. The alternative can contain groups.  */
7617
7618static boolean
7619PREFIX(alt_match_null_string_p) (UCHAR_T *p, UCHAR_T *end,
7620                                 PREFIX(register_info_type) *reg_info)
7621{
7622  int mcnt;
7623  UCHAR_T *p1 = p;
7624
7625  while (p1 < end)
7626    {
7627      /* Skip over opcodes that can match nothing, and break when we get
7628         to one that can't.  */
7629
7630      switch ((re_opcode_t) *p1)
7631        {
7632	/* It's a loop.  */
7633        case on_failure_jump:
7634          p1++;
7635          EXTRACT_NUMBER_AND_INCR (mcnt, p1);
7636          p1 += mcnt;
7637          break;
7638
7639	default:
7640          if (!PREFIX(common_op_match_null_string_p) (&p1, end, reg_info))
7641            return false;
7642        }
7643    }  /* while p1 < end */
7644
7645  return true;
7646} /* alt_match_null_string_p */
7647
7648
7649/* Deals with the ops common to group_match_null_string_p and
7650   alt_match_null_string_p.
7651
7652   Sets P to one after the op and its arguments, if any.  */
7653
7654static boolean
7655PREFIX(common_op_match_null_string_p) (UCHAR_T **p, UCHAR_T *end,
7656                                       PREFIX(register_info_type) *reg_info)
7657{
7658  int mcnt;
7659  boolean ret;
7660  int reg_no;
7661  UCHAR_T *p1 = *p;
7662
7663  switch ((re_opcode_t) *p1++)
7664    {
7665    case no_op:
7666    case begline:
7667    case endline:
7668    case begbuf:
7669    case endbuf:
7670    case wordbeg:
7671    case wordend:
7672    case wordbound:
7673    case notwordbound:
7674#ifdef emacs
7675    case before_dot:
7676    case at_dot:
7677    case after_dot:
7678#endif
7679      break;
7680
7681    case start_memory:
7682      reg_no = *p1;
7683      assert (reg_no > 0 && reg_no <= MAX_REGNUM);
7684      ret = PREFIX(group_match_null_string_p) (&p1, end, reg_info);
7685
7686      /* Have to set this here in case we're checking a group which
7687         contains a group and a back reference to it.  */
7688
7689      if (REG_MATCH_NULL_STRING_P (reg_info[reg_no]) == MATCH_NULL_UNSET_VALUE)
7690        REG_MATCH_NULL_STRING_P (reg_info[reg_no]) = ret;
7691
7692      if (!ret)
7693        return false;
7694      break;
7695
7696    /* If this is an optimized succeed_n for zero times, make the jump.  */
7697    case jump:
7698      EXTRACT_NUMBER_AND_INCR (mcnt, p1);
7699      if (mcnt >= 0)
7700        p1 += mcnt;
7701      else
7702        return false;
7703      break;
7704
7705    case succeed_n:
7706      /* Get to the number of times to succeed.  */
7707      p1 += OFFSET_ADDRESS_SIZE;
7708      EXTRACT_NUMBER_AND_INCR (mcnt, p1);
7709
7710      if (mcnt == 0)
7711        {
7712          p1 -= 2 * OFFSET_ADDRESS_SIZE;
7713          EXTRACT_NUMBER_AND_INCR (mcnt, p1);
7714          p1 += mcnt;
7715        }
7716      else
7717        return false;
7718      break;
7719
7720    case duplicate:
7721      if (!REG_MATCH_NULL_STRING_P (reg_info[*p1]))
7722        return false;
7723      break;
7724
7725    case set_number_at:
7726      p1 += 2 * OFFSET_ADDRESS_SIZE;
7727      return false;
7728
7729    default:
7730      /* All other opcodes mean we cannot match the empty string.  */
7731      return false;
7732  }
7733
7734  *p = p1;
7735  return true;
7736} /* common_op_match_null_string_p */
7737
7738
7739/* Return zero if TRANSLATE[S1] and TRANSLATE[S2] are identical for LEN
7740   bytes; nonzero otherwise.  */
7741
7742static int
7743PREFIX(bcmp_translate) (const CHAR_T *s1, const CHAR_T *s2, register int len,
7744                        RE_TRANSLATE_TYPE translate)
7745{
7746  register const UCHAR_T *p1 = (const UCHAR_T *) s1;
7747  register const UCHAR_T *p2 = (const UCHAR_T *) s2;
7748  while (len)
7749    {
7750#ifdef WCHAR
7751      if (((*p1<=0xff)?translate[*p1++]:*p1++)
7752	  != ((*p2<=0xff)?translate[*p2++]:*p2++))
7753	return 1;
7754#else /* BYTE */
7755      if (translate[*p1++] != translate[*p2++]) return 1;
7756#endif /* WCHAR */
7757      len--;
7758    }
7759  return 0;
7760}
7761
7762
7763#else /* not INSIDE_RECURSION */
7764
7765/* Entry points for GNU code.  */
7766
7767/* re_compile_pattern is the GNU regular expression compiler: it
7768   compiles PATTERN (of length SIZE) and puts the result in BUFP.
7769   Returns 0 if the pattern was valid, otherwise an error string.
7770
7771   Assumes the `allocated' (and perhaps `buffer') and `translate' fields
7772   are set in BUFP on entry.
7773
7774   We call regex_compile to do the actual compilation.  */
7775
7776const char *
7777re_compile_pattern (const char *pattern, size_t length,
7778                    struct re_pattern_buffer *bufp)
7779{
7780  reg_errcode_t ret;
7781
7782  /* GNU code is written to assume at least RE_NREGS registers will be set
7783     (and at least one extra will be -1).  */
7784  bufp->regs_allocated = REGS_UNALLOCATED;
7785
7786  /* And GNU code determines whether or not to get register information
7787     by passing null for the REGS argument to re_match, etc., not by
7788     setting no_sub.  */
7789  bufp->no_sub = 0;
7790
7791  /* Match anchors at newline.  */
7792  bufp->newline_anchor = 1;
7793
7794# ifdef MBS_SUPPORT
7795  if (MB_CUR_MAX != 1)
7796    ret = wcs_regex_compile (pattern, length, re_syntax_options, bufp);
7797  else
7798# endif
7799    ret = byte_regex_compile (pattern, length, re_syntax_options, bufp);
7800
7801  if (!ret)
7802    return NULL;
7803  return gettext (re_error_msgid[(int) ret]);
7804}
7805#ifdef _LIBC
7806weak_alias (__re_compile_pattern, re_compile_pattern)
7807#endif
7808
7809/* Entry points compatible with 4.2 BSD regex library.  We don't define
7810   them unless specifically requested.  */
7811
7812#if defined _REGEX_RE_COMP || defined _LIBC
7813
7814/* BSD has one and only one pattern buffer.  */
7815static struct re_pattern_buffer re_comp_buf;
7816
7817char *
7818#ifdef _LIBC
7819/* Make these definitions weak in libc, so POSIX programs can redefine
7820   these names if they don't use our functions, and still use
7821   regcomp/regexec below without link errors.  */
7822weak_function
7823#endif
7824re_comp (const char *s)
7825{
7826  reg_errcode_t ret;
7827
7828  if (!s)
7829    {
7830      if (!re_comp_buf.buffer)
7831	return (char *) gettext ("No previous regular expression");
7832      return 0;
7833    }
7834
7835  if (!re_comp_buf.buffer)
7836    {
7837      re_comp_buf.buffer = (unsigned char *) malloc (200);
7838      if (re_comp_buf.buffer == NULL)
7839        return (char *) gettext (re_error_msgid[(int) REG_ESPACE]);
7840      re_comp_buf.allocated = 200;
7841
7842      re_comp_buf.fastmap = (char *) malloc (1 << BYTEWIDTH);
7843      if (re_comp_buf.fastmap == NULL)
7844	return (char *) gettext (re_error_msgid[(int) REG_ESPACE]);
7845    }
7846
7847  /* Since `re_exec' always passes NULL for the `regs' argument, we
7848     don't need to initialize the pattern buffer fields which affect it.  */
7849
7850  /* Match anchors at newlines.  */
7851  re_comp_buf.newline_anchor = 1;
7852
7853# ifdef MBS_SUPPORT
7854  if (MB_CUR_MAX != 1)
7855    ret = wcs_regex_compile (s, strlen (s), re_syntax_options, &re_comp_buf);
7856  else
7857# endif
7858    ret = byte_regex_compile (s, strlen (s), re_syntax_options, &re_comp_buf);
7859
7860  if (!ret)
7861    return NULL;
7862
7863  /* Yes, we're discarding `const' here if !HAVE_LIBINTL.  */
7864  return (char *) gettext (re_error_msgid[(int) ret]);
7865}
7866
7867
7868int
7869#ifdef _LIBC
7870weak_function
7871#endif
7872re_exec (const char *s)
7873{
7874  const int len = strlen (s);
7875  return
7876    0 <= re_search (&re_comp_buf, s, len, 0, len, (struct re_registers *) 0);
7877}
7878
7879#endif /* _REGEX_RE_COMP */
7880
7881/* POSIX.2 functions.  Don't define these for Emacs.  */
7882
7883#ifndef emacs
7884
7885/* regcomp takes a regular expression as a string and compiles it.
7886
7887   PREG is a regex_t *.  We do not expect any fields to be initialized,
7888   since POSIX says we shouldn't.  Thus, we set
7889
7890     `buffer' to the compiled pattern;
7891     `used' to the length of the compiled pattern;
7892     `syntax' to RE_SYNTAX_POSIX_EXTENDED if the
7893       REG_EXTENDED bit in CFLAGS is set; otherwise, to
7894       RE_SYNTAX_POSIX_BASIC;
7895     `newline_anchor' to REG_NEWLINE being set in CFLAGS;
7896     `fastmap' to an allocated space for the fastmap;
7897     `fastmap_accurate' to zero;
7898     `re_nsub' to the number of subexpressions in PATTERN.
7899
7900   PATTERN is the address of the pattern string.
7901
7902   CFLAGS is a series of bits which affect compilation.
7903
7904     If REG_EXTENDED is set, we use POSIX extended syntax; otherwise, we
7905     use POSIX basic syntax.
7906
7907     If REG_NEWLINE is set, then . and [^...] don't match newline.
7908     Also, regexec will try a match beginning after every newline.
7909
7910     If REG_ICASE is set, then we considers upper- and lowercase
7911     versions of letters to be equivalent when matching.
7912
7913     If REG_NOSUB is set, then when PREG is passed to regexec, that
7914     routine will report only success or failure, and nothing about the
7915     registers.
7916
7917   It returns 0 if it succeeds, nonzero if it doesn't.  (See regex.h for
7918   the return codes and their meanings.)  */
7919
7920int
7921regcomp (regex_t *preg, const char *pattern, int cflags)
7922{
7923  reg_errcode_t ret;
7924  reg_syntax_t syntax
7925    = (cflags & REG_EXTENDED) ?
7926      RE_SYNTAX_POSIX_EXTENDED : RE_SYNTAX_POSIX_BASIC;
7927
7928  /* regex_compile will allocate the space for the compiled pattern.  */
7929  preg->buffer = 0;
7930  preg->allocated = 0;
7931  preg->used = 0;
7932
7933  /* Try to allocate space for the fastmap.  */
7934  preg->fastmap = (char *) malloc (1 << BYTEWIDTH);
7935
7936  if (cflags & REG_ICASE)
7937    {
7938      int i;
7939
7940      preg->translate
7941	= (RE_TRANSLATE_TYPE) malloc (CHAR_SET_SIZE
7942				      * sizeof (*(RE_TRANSLATE_TYPE)0));
7943      if (preg->translate == NULL)
7944        return (int) REG_ESPACE;
7945
7946      /* Map uppercase characters to corresponding lowercase ones.  */
7947      for (i = 0; i < CHAR_SET_SIZE; i++)
7948        preg->translate[i] = ISUPPER (i) ? TOLOWER (i) : i;
7949    }
7950  else
7951    preg->translate = NULL;
7952
7953  /* If REG_NEWLINE is set, newlines are treated differently.  */
7954  if (cflags & REG_NEWLINE)
7955    { /* REG_NEWLINE implies neither . nor [^...] match newline.  */
7956      syntax &= ~RE_DOT_NEWLINE;
7957      syntax |= RE_HAT_LISTS_NOT_NEWLINE;
7958      /* It also changes the matching behavior.  */
7959      preg->newline_anchor = 1;
7960    }
7961  else
7962    preg->newline_anchor = 0;
7963
7964  preg->no_sub = !!(cflags & REG_NOSUB);
7965
7966  /* POSIX says a null character in the pattern terminates it, so we
7967     can use strlen here in compiling the pattern.  */
7968# ifdef MBS_SUPPORT
7969  if (MB_CUR_MAX != 1)
7970    ret = wcs_regex_compile (pattern, strlen (pattern), syntax, preg);
7971  else
7972# endif
7973    ret = byte_regex_compile (pattern, strlen (pattern), syntax, preg);
7974
7975  /* POSIX doesn't distinguish between an unmatched open-group and an
7976     unmatched close-group: both are REG_EPAREN.  */
7977  if (ret == REG_ERPAREN) ret = REG_EPAREN;
7978
7979  if (ret == REG_NOERROR && preg->fastmap)
7980    {
7981      /* Compute the fastmap now, since regexec cannot modify the pattern
7982	 buffer.  */
7983      if (re_compile_fastmap (preg) == -2)
7984	{
7985	  /* Some error occurred while computing the fastmap, just forget
7986	     about it.  */
7987	  free (preg->fastmap);
7988	  preg->fastmap = NULL;
7989	}
7990    }
7991
7992  return (int) ret;
7993}
7994#ifdef _LIBC
7995weak_alias (__regcomp, regcomp)
7996#endif
7997
7998
7999/* regexec searches for a given pattern, specified by PREG, in the
8000   string STRING.
8001
8002   If NMATCH is zero or REG_NOSUB was set in the cflags argument to
8003   `regcomp', we ignore PMATCH.  Otherwise, we assume PMATCH has at
8004   least NMATCH elements, and we set them to the offsets of the
8005   corresponding matched substrings.
8006
8007   EFLAGS specifies `execution flags' which affect matching: if
8008   REG_NOTBOL is set, then ^ does not match at the beginning of the
8009   string; if REG_NOTEOL is set, then $ does not match at the end.
8010
8011   We return 0 if we find a match and REG_NOMATCH if not.  */
8012
8013int
8014regexec (const regex_t *preg, const char *string, size_t nmatch,
8015         regmatch_t pmatch[], int eflags)
8016{
8017  int ret;
8018  struct re_registers regs;
8019  regex_t private_preg;
8020  int len = strlen (string);
8021  boolean want_reg_info = !preg->no_sub && nmatch > 0;
8022
8023  private_preg = *preg;
8024
8025  private_preg.not_bol = !!(eflags & REG_NOTBOL);
8026  private_preg.not_eol = !!(eflags & REG_NOTEOL);
8027
8028  /* The user has told us exactly how many registers to return
8029     information about, via `nmatch'.  We have to pass that on to the
8030     matching routines.  */
8031  private_preg.regs_allocated = REGS_FIXED;
8032
8033  if (want_reg_info)
8034    {
8035      regs.num_regs = nmatch;
8036      regs.start = TALLOC (nmatch * 2, regoff_t);
8037      if (regs.start == NULL)
8038        return (int) REG_NOMATCH;
8039      regs.end = regs.start + nmatch;
8040    }
8041
8042  /* Perform the searching operation.  */
8043  ret = re_search (&private_preg, string, len,
8044                   /* start: */ 0, /* range: */ len,
8045                   want_reg_info ? &regs : (struct re_registers *) 0);
8046
8047  /* Copy the register information to the POSIX structure.  */
8048  if (want_reg_info)
8049    {
8050      if (ret >= 0)
8051        {
8052          unsigned r;
8053
8054          for (r = 0; r < nmatch; r++)
8055            {
8056              pmatch[r].rm_so = regs.start[r];
8057              pmatch[r].rm_eo = regs.end[r];
8058            }
8059        }
8060
8061      /* If we needed the temporary register info, free the space now.  */
8062      free (regs.start);
8063    }
8064
8065  /* We want zero return to mean success, unlike `re_search'.  */
8066  return ret >= 0 ? (int) REG_NOERROR : (int) REG_NOMATCH;
8067}
8068#ifdef _LIBC
8069weak_alias (__regexec, regexec)
8070#endif
8071
8072
8073/* Returns a message corresponding to an error code, ERRCODE, returned
8074   from either regcomp or regexec.   We don't use PREG here.  */
8075
8076size_t
8077regerror (int errcode, const regex_t *preg ATTRIBUTE_UNUSED,
8078          char *errbuf, size_t errbuf_size)
8079{
8080  const char *msg;
8081  size_t msg_size;
8082
8083  if (errcode < 0
8084      || errcode >= (int) (sizeof (re_error_msgid)
8085			   / sizeof (re_error_msgid[0])))
8086    /* Only error codes returned by the rest of the code should be passed
8087       to this routine.  If we are given anything else, or if other regex
8088       code generates an invalid error code, then the program has a bug.
8089       Dump core so we can fix it.  */
8090    abort ();
8091
8092  msg = gettext (re_error_msgid[errcode]);
8093
8094  msg_size = strlen (msg) + 1; /* Includes the null.  */
8095
8096  if (errbuf_size != 0)
8097    {
8098      if (msg_size > errbuf_size)
8099        {
8100#if defined HAVE_MEMPCPY || defined _LIBC
8101	  *((char *) mempcpy (errbuf, msg, errbuf_size - 1)) = '\0';
8102#else
8103          (void) memcpy (errbuf, msg, errbuf_size - 1);
8104          errbuf[errbuf_size - 1] = 0;
8105#endif
8106        }
8107      else
8108        (void) memcpy (errbuf, msg, msg_size);
8109    }
8110
8111  return msg_size;
8112}
8113#ifdef _LIBC
8114weak_alias (__regerror, regerror)
8115#endif
8116
8117
8118/* Free dynamically allocated space used by PREG.  */
8119
8120void
8121regfree (regex_t *preg)
8122{
8123  free (preg->buffer);
8124  preg->buffer = NULL;
8125
8126  preg->allocated = 0;
8127  preg->used = 0;
8128
8129  free (preg->fastmap);
8130  preg->fastmap = NULL;
8131  preg->fastmap_accurate = 0;
8132
8133  free (preg->translate);
8134  preg->translate = NULL;
8135}
8136#ifdef _LIBC
8137weak_alias (__regfree, regfree)
8138#endif
8139
8140#endif /* not emacs  */
8141
8142#endif /* not INSIDE_RECURSION */
8143
8144
8145#undef STORE_NUMBER
8146#undef STORE_NUMBER_AND_INCR
8147#undef EXTRACT_NUMBER
8148#undef EXTRACT_NUMBER_AND_INCR
8149
8150#undef DEBUG_PRINT_COMPILED_PATTERN
8151#undef DEBUG_PRINT_DOUBLE_STRING
8152
8153#undef INIT_FAIL_STACK
8154#undef RESET_FAIL_STACK
8155#undef DOUBLE_FAIL_STACK
8156#undef PUSH_PATTERN_OP
8157#undef PUSH_FAILURE_POINTER
8158#undef PUSH_FAILURE_INT
8159#undef PUSH_FAILURE_ELT
8160#undef POP_FAILURE_POINTER
8161#undef POP_FAILURE_INT
8162#undef POP_FAILURE_ELT
8163#undef DEBUG_PUSH
8164#undef DEBUG_POP
8165#undef PUSH_FAILURE_POINT
8166#undef POP_FAILURE_POINT
8167
8168#undef REG_UNSET_VALUE
8169#undef REG_UNSET
8170
8171#undef PATFETCH
8172#undef PATFETCH_RAW
8173#undef PATUNFETCH
8174#undef TRANSLATE
8175
8176#undef INIT_BUF_SIZE
8177#undef GET_BUFFER_SPACE
8178#undef BUF_PUSH
8179#undef BUF_PUSH_2
8180#undef BUF_PUSH_3
8181#undef STORE_JUMP
8182#undef STORE_JUMP2
8183#undef INSERT_JUMP
8184#undef INSERT_JUMP2
8185#undef EXTEND_BUFFER
8186#undef GET_UNSIGNED_NUMBER
8187#undef FREE_STACK_RETURN
8188
8189# undef POINTER_TO_OFFSET
8190# undef MATCHING_IN_FRST_STRING
8191# undef PREFETCH
8192# undef AT_STRINGS_BEG
8193# undef AT_STRINGS_END
8194# undef WORDCHAR_P
8195# undef FREE_VAR
8196# undef FREE_VARIABLES
8197# undef NO_HIGHEST_ACTIVE_REG
8198# undef NO_LOWEST_ACTIVE_REG
8199
8200# undef CHAR_T
8201# undef UCHAR_T
8202# undef COMPILED_BUFFER_VAR
8203# undef OFFSET_ADDRESS_SIZE
8204# undef CHAR_CLASS_SIZE
8205# undef PREFIX
8206# undef ARG_PREFIX
8207# undef PUT_CHAR
8208# undef BYTE
8209# undef WCHAR
8210
8211# define DEFINED_ONCE
8212