1169695Skan/* Extended regular expression matching and search library,
2169695Skan   version 0.12.
3169695Skan   (Implements POSIX draft P1003.2/D11.2, except for some of the
4169695Skan   internationalization features.)
5169695Skan
6169695Skan   Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
7169695Skan   2002, 2005 Free Software Foundation, Inc.
8169695Skan   This file is part of the GNU C Library.
9169695Skan
10169695Skan   The GNU C Library is free software; you can redistribute it and/or
11169695Skan   modify it under the terms of the GNU Lesser General Public
12169695Skan   License as published by the Free Software Foundation; either
13169695Skan   version 2.1 of the License, or (at your option) any later version.
14169695Skan
15169695Skan   The GNU C Library is distributed in the hope that it will be useful,
16169695Skan   but WITHOUT ANY WARRANTY; without even the implied warranty of
17169695Skan   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18169695Skan   Lesser General Public License for more details.
19169695Skan
20169695Skan   You should have received a copy of the GNU Lesser General Public
21169695Skan   License along with the GNU C Library; if not, write to the Free
22169695Skan   Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
23169695Skan   02110-1301 USA.  */
24169695Skan
25169695Skan/* This file has been modified for usage in libiberty.  It includes "xregex.h"
26169695Skan   instead of <regex.h>.  The "xregex.h" header file renames all external
27169695Skan   routines with an "x" prefix so they do not collide with the native regex
28169695Skan   routines or with other components regex routines. */
29169695Skan/* AIX requires this to be the first thing in the file. */
30169695Skan#if defined _AIX && !defined __GNUC__ && !defined REGEX_MALLOC
31169695Skan  #pragma alloca
32169695Skan#endif
33169695Skan
34169695Skan#undef	_GNU_SOURCE
35169695Skan#define _GNU_SOURCE
36169695Skan
37169695Skan#ifndef INSIDE_RECURSION
38169695Skan# ifdef HAVE_CONFIG_H
39169695Skan#  include <config.h>
40169695Skan# endif
41169695Skan#endif
42169695Skan
43169695Skan#include <ansidecl.h>
44169695Skan
45169695Skan#ifndef INSIDE_RECURSION
46169695Skan
47169695Skan# if defined STDC_HEADERS && !defined emacs
48169695Skan#  include <stddef.h>
49169695Skan# else
50169695Skan/* We need this for `regex.h', and perhaps for the Emacs include files.  */
51169695Skan#  include <sys/types.h>
52169695Skan# endif
53169695Skan
54169695Skan# define WIDE_CHAR_SUPPORT (HAVE_WCTYPE_H && HAVE_WCHAR_H && HAVE_BTOWC)
55169695Skan
56169695Skan/* For platform which support the ISO C amendement 1 functionality we
57169695Skan   support user defined character classes.  */
58169695Skan# if defined _LIBC || WIDE_CHAR_SUPPORT
59169695Skan/* Solaris 2.5 has a bug: <wchar.h> must be included before <wctype.h>.  */
60169695Skan#  include <wchar.h>
61169695Skan#  include <wctype.h>
62169695Skan# endif
63169695Skan
64169695Skan# ifdef _LIBC
65169695Skan/* We have to keep the namespace clean.  */
66169695Skan#  define regfree(preg) __regfree (preg)
67169695Skan#  define regexec(pr, st, nm, pm, ef) __regexec (pr, st, nm, pm, ef)
68169695Skan#  define regcomp(preg, pattern, cflags) __regcomp (preg, pattern, cflags)
69169695Skan#  define regerror(errcode, preg, errbuf, errbuf_size) \
70169695Skan	__regerror(errcode, preg, errbuf, errbuf_size)
71169695Skan#  define re_set_registers(bu, re, nu, st, en) \
72169695Skan	__re_set_registers (bu, re, nu, st, en)
73169695Skan#  define re_match_2(bufp, string1, size1, string2, size2, pos, regs, stop) \
74169695Skan	__re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
75169695Skan#  define re_match(bufp, string, size, pos, regs) \
76169695Skan	__re_match (bufp, string, size, pos, regs)
77169695Skan#  define re_search(bufp, string, size, startpos, range, regs) \
78169695Skan	__re_search (bufp, string, size, startpos, range, regs)
79169695Skan#  define re_compile_pattern(pattern, length, bufp) \
80169695Skan	__re_compile_pattern (pattern, length, bufp)
81169695Skan#  define re_set_syntax(syntax) __re_set_syntax (syntax)
82169695Skan#  define re_search_2(bufp, st1, s1, st2, s2, startpos, range, regs, stop) \
83169695Skan	__re_search_2 (bufp, st1, s1, st2, s2, startpos, range, regs, stop)
84169695Skan#  define re_compile_fastmap(bufp) __re_compile_fastmap (bufp)
85169695Skan
86169695Skan#  define btowc __btowc
87169695Skan
88169695Skan/* We are also using some library internals.  */
89169695Skan#  include <locale/localeinfo.h>
90169695Skan#  include <locale/elem-hash.h>
91169695Skan#  include <langinfo.h>
92169695Skan#  include <locale/coll-lookup.h>
93169695Skan# endif
94169695Skan
95169695Skan/* This is for other GNU distributions with internationalized messages.  */
96169695Skan# if (HAVE_LIBINTL_H && ENABLE_NLS) || defined _LIBC
97169695Skan#  include <libintl.h>
98169695Skan#  ifdef _LIBC
99169695Skan#   undef gettext
100169695Skan#   define gettext(msgid) __dcgettext ("libc", msgid, LC_MESSAGES)
101169695Skan#  endif
102169695Skan# else
103169695Skan#  define gettext(msgid) (msgid)
104169695Skan# endif
105169695Skan
106169695Skan# ifndef gettext_noop
107169695Skan/* This define is so xgettext can find the internationalizable
108169695Skan   strings.  */
109169695Skan#  define gettext_noop(String) String
110169695Skan# endif
111169695Skan
112169695Skan/* The `emacs' switch turns on certain matching commands
113169695Skan   that make sense only in Emacs. */
114169695Skan# ifdef emacs
115169695Skan
116169695Skan#  include "lisp.h"
117169695Skan#  include "buffer.h"
118169695Skan#  include "syntax.h"
119169695Skan
120169695Skan# else  /* not emacs */
121169695Skan
122169695Skan/* If we are not linking with Emacs proper,
123169695Skan   we can't use the relocating allocator
124169695Skan   even if config.h says that we can.  */
125169695Skan#  undef REL_ALLOC
126169695Skan
127169695Skan#  if defined STDC_HEADERS || defined _LIBC
128169695Skan#   include <stdlib.h>
129169695Skan#  else
130169695Skanchar *malloc ();
131169695Skanchar *realloc ();
132169695Skan#  endif
133169695Skan
134169695Skan/* When used in Emacs's lib-src, we need to get bzero and bcopy somehow.
135169695Skan   If nothing else has been done, use the method below.  */
136169695Skan#  ifdef INHIBIT_STRING_HEADER
137169695Skan#   if !(defined HAVE_BZERO && defined HAVE_BCOPY)
138169695Skan#    if !defined bzero && !defined bcopy
139169695Skan#     undef INHIBIT_STRING_HEADER
140169695Skan#    endif
141169695Skan#   endif
142169695Skan#  endif
143169695Skan
144169695Skan/* This is the normal way of making sure we have a bcopy and a bzero.
145169695Skan   This is used in most programs--a few other programs avoid this
146169695Skan   by defining INHIBIT_STRING_HEADER.  */
147169695Skan#  ifndef INHIBIT_STRING_HEADER
148169695Skan#   if defined HAVE_STRING_H || defined STDC_HEADERS || defined _LIBC
149169695Skan#    include <string.h>
150169695Skan#    ifndef bzero
151169695Skan#     ifndef _LIBC
152222203Sbenl#      define bzero(s, n)	((void) (memset (s, '\0', n), (s)))
153169695Skan#     else
154169695Skan#      define bzero(s, n)	__bzero (s, n)
155169695Skan#     endif
156169695Skan#    endif
157169695Skan#   else
158169695Skan#    include <strings.h>
159169695Skan#    ifndef memcmp
160169695Skan#     define memcmp(s1, s2, n)	bcmp (s1, s2, n)
161169695Skan#    endif
162169695Skan#    ifndef memcpy
163169695Skan#     define memcpy(d, s, n)	(bcopy (s, d, n), (d))
164169695Skan#    endif
165169695Skan#   endif
166169695Skan#  endif
167169695Skan
168169695Skan/* Define the syntax stuff for \<, \>, etc.  */
169169695Skan
170169695Skan/* This must be nonzero for the wordchar and notwordchar pattern
171169695Skan   commands in re_match_2.  */
172169695Skan#  ifndef Sword
173169695Skan#   define Sword 1
174169695Skan#  endif
175169695Skan
176169695Skan#  ifdef SWITCH_ENUM_BUG
177169695Skan#   define SWITCH_ENUM_CAST(x) ((int)(x))
178169695Skan#  else
179169695Skan#   define SWITCH_ENUM_CAST(x) (x)
180169695Skan#  endif
181169695Skan
182169695Skan# endif /* not emacs */
183169695Skan
184169695Skan# if defined _LIBC || HAVE_LIMITS_H
185169695Skan#  include <limits.h>
186169695Skan# endif
187169695Skan
188169695Skan# ifndef MB_LEN_MAX
189169695Skan#  define MB_LEN_MAX 1
190169695Skan# endif
191169695Skan
192169695Skan/* Get the interface, including the syntax bits.  */
193169695Skan# include "xregex.h"  /* change for libiberty */
194169695Skan
195169695Skan/* isalpha etc. are used for the character classes.  */
196169695Skan# include <ctype.h>
197169695Skan
198169695Skan/* Jim Meyering writes:
199169695Skan
200169695Skan   "... Some ctype macros are valid only for character codes that
201169695Skan   isascii says are ASCII (SGI's IRIX-4.0.5 is one such system --when
202169695Skan   using /bin/cc or gcc but without giving an ansi option).  So, all
203169695Skan   ctype uses should be through macros like ISPRINT...  If
204169695Skan   STDC_HEADERS is defined, then autoconf has verified that the ctype
205169695Skan   macros don't need to be guarded with references to isascii. ...
206169695Skan   Defining isascii to 1 should let any compiler worth its salt
207169695Skan   eliminate the && through constant folding."
208169695Skan   Solaris defines some of these symbols so we must undefine them first.  */
209169695Skan
210169695Skan# undef ISASCII
211169695Skan# if defined STDC_HEADERS || (!defined isascii && !defined HAVE_ISASCII)
212169695Skan#  define ISASCII(c) 1
213169695Skan# else
214169695Skan#  define ISASCII(c) isascii(c)
215169695Skan# endif
216169695Skan
217169695Skan# ifdef isblank
218169695Skan#  define ISBLANK(c) (ISASCII (c) && isblank (c))
219169695Skan# else
220169695Skan#  define ISBLANK(c) ((c) == ' ' || (c) == '\t')
221169695Skan# endif
222169695Skan# ifdef isgraph
223169695Skan#  define ISGRAPH(c) (ISASCII (c) && isgraph (c))
224169695Skan# else
225169695Skan#  define ISGRAPH(c) (ISASCII (c) && isprint (c) && !isspace (c))
226169695Skan# endif
227169695Skan
228169695Skan# undef ISPRINT
229169695Skan# define ISPRINT(c) (ISASCII (c) && isprint (c))
230169695Skan# define ISDIGIT(c) (ISASCII (c) && isdigit (c))
231169695Skan# define ISALNUM(c) (ISASCII (c) && isalnum (c))
232169695Skan# define ISALPHA(c) (ISASCII (c) && isalpha (c))
233169695Skan# define ISCNTRL(c) (ISASCII (c) && iscntrl (c))
234169695Skan# define ISLOWER(c) (ISASCII (c) && islower (c))
235169695Skan# define ISPUNCT(c) (ISASCII (c) && ispunct (c))
236169695Skan# define ISSPACE(c) (ISASCII (c) && isspace (c))
237169695Skan# define ISUPPER(c) (ISASCII (c) && isupper (c))
238169695Skan# define ISXDIGIT(c) (ISASCII (c) && isxdigit (c))
239169695Skan
240169695Skan# ifdef _tolower
241169695Skan#  define TOLOWER(c) _tolower(c)
242169695Skan# else
243169695Skan#  define TOLOWER(c) tolower(c)
244169695Skan# endif
245169695Skan
246169695Skan# ifndef NULL
247169695Skan#  define NULL (void *)0
248169695Skan# endif
249169695Skan
250169695Skan/* We remove any previous definition of `SIGN_EXTEND_CHAR',
251169695Skan   since ours (we hope) works properly with all combinations of
252169695Skan   machines, compilers, `char' and `unsigned char' argument types.
253169695Skan   (Per Bothner suggested the basic approach.)  */
254169695Skan# undef SIGN_EXTEND_CHAR
255169695Skan# if __STDC__
256169695Skan#  define SIGN_EXTEND_CHAR(c) ((signed char) (c))
257169695Skan# else  /* not __STDC__ */
258169695Skan/* As in Harbison and Steele.  */
259169695Skan#  define SIGN_EXTEND_CHAR(c) ((((unsigned char) (c)) ^ 128) - 128)
260169695Skan# endif
261169695Skan
262169695Skan# ifndef emacs
263169695Skan/* How many characters in the character set.  */
264169695Skan#  define CHAR_SET_SIZE 256
265169695Skan
266169695Skan#  ifdef SYNTAX_TABLE
267169695Skan
268169695Skanextern char *re_syntax_table;
269169695Skan
270169695Skan#  else /* not SYNTAX_TABLE */
271169695Skan
272169695Skanstatic char re_syntax_table[CHAR_SET_SIZE];
273169695Skan
274169695Skanstatic void init_syntax_once (void);
275169695Skan
276169695Skanstatic void
277169695Skaninit_syntax_once (void)
278169695Skan{
279169695Skan   register int c;
280169695Skan   static int done = 0;
281169695Skan
282169695Skan   if (done)
283169695Skan     return;
284169695Skan   bzero (re_syntax_table, sizeof re_syntax_table);
285169695Skan
286169695Skan   for (c = 0; c < CHAR_SET_SIZE; ++c)
287169695Skan     if (ISALNUM (c))
288169695Skan	re_syntax_table[c] = Sword;
289169695Skan
290169695Skan   re_syntax_table['_'] = Sword;
291169695Skan
292169695Skan   done = 1;
293169695Skan}
294169695Skan
295169695Skan#  endif /* not SYNTAX_TABLE */
296169695Skan
297169695Skan#  define SYNTAX(c) re_syntax_table[(unsigned char) (c)]
298169695Skan
299169695Skan# endif /* emacs */
300169695Skan
301169695Skan/* Integer type for pointers.  */
302169695Skan# if !defined _LIBC && !defined HAVE_UINTPTR_T
303169695Skantypedef unsigned long int uintptr_t;
304169695Skan# endif
305169695Skan
306169695Skan/* Should we use malloc or alloca?  If REGEX_MALLOC is not defined, we
307169695Skan   use `alloca' instead of `malloc'.  This is because using malloc in
308169695Skan   re_search* or re_match* could cause memory leaks when C-g is used in
309169695Skan   Emacs; also, malloc is slower and causes storage fragmentation.  On
310169695Skan   the other hand, malloc is more portable, and easier to debug.
311169695Skan
312169695Skan   Because we sometimes use alloca, some routines have to be macros,
313169695Skan   not functions -- `alloca'-allocated space disappears at the end of the
314169695Skan   function it is called in.  */
315169695Skan
316169695Skan# ifdef REGEX_MALLOC
317169695Skan
318169695Skan#  define REGEX_ALLOCATE malloc
319169695Skan#  define REGEX_REALLOCATE(source, osize, nsize) realloc (source, nsize)
320169695Skan#  define REGEX_FREE free
321169695Skan
322169695Skan# else /* not REGEX_MALLOC  */
323169695Skan
324169695Skan/* Emacs already defines alloca, sometimes.  */
325169695Skan#  ifndef alloca
326169695Skan
327169695Skan/* Make alloca work the best possible way.  */
328169695Skan#   ifdef __GNUC__
329169695Skan#    define alloca __builtin_alloca
330169695Skan#   else /* not __GNUC__ */
331169695Skan#    if HAVE_ALLOCA_H
332169695Skan#     include <alloca.h>
333169695Skan#    endif /* HAVE_ALLOCA_H */
334169695Skan#   endif /* not __GNUC__ */
335169695Skan
336169695Skan#  endif /* not alloca */
337169695Skan
338169695Skan#  define REGEX_ALLOCATE alloca
339169695Skan
340169695Skan/* Assumes a `char *destination' variable.  */
341169695Skan#  define REGEX_REALLOCATE(source, osize, nsize)			\
342169695Skan  (destination = (char *) alloca (nsize),				\
343169695Skan   memcpy (destination, source, osize))
344169695Skan
345169695Skan/* No need to do anything to free, after alloca.  */
346169695Skan#  define REGEX_FREE(arg) ((void)0) /* Do nothing!  But inhibit gcc warning.  */
347169695Skan
348169695Skan# endif /* not REGEX_MALLOC */
349169695Skan
350169695Skan/* Define how to allocate the failure stack.  */
351169695Skan
352169695Skan# if defined REL_ALLOC && defined REGEX_MALLOC
353169695Skan
354169695Skan#  define REGEX_ALLOCATE_STACK(size)				\
355169695Skan  r_alloc (&failure_stack_ptr, (size))
356169695Skan#  define REGEX_REALLOCATE_STACK(source, osize, nsize)		\
357169695Skan  r_re_alloc (&failure_stack_ptr, (nsize))
358169695Skan#  define REGEX_FREE_STACK(ptr)					\
359169695Skan  r_alloc_free (&failure_stack_ptr)
360169695Skan
361169695Skan# else /* not using relocating allocator */
362169695Skan
363169695Skan#  ifdef REGEX_MALLOC
364169695Skan
365169695Skan#   define REGEX_ALLOCATE_STACK malloc
366169695Skan#   define REGEX_REALLOCATE_STACK(source, osize, nsize) realloc (source, nsize)
367169695Skan#   define REGEX_FREE_STACK free
368169695Skan
369169695Skan#  else /* not REGEX_MALLOC */
370169695Skan
371169695Skan#   define REGEX_ALLOCATE_STACK alloca
372169695Skan
373169695Skan#   define REGEX_REALLOCATE_STACK(source, osize, nsize)			\
374169695Skan   REGEX_REALLOCATE (source, osize, nsize)
375169695Skan/* No need to explicitly free anything.  */
376169695Skan#   define REGEX_FREE_STACK(arg)
377169695Skan
378169695Skan#  endif /* not REGEX_MALLOC */
379169695Skan# endif /* not using relocating allocator */
380169695Skan
381169695Skan
382169695Skan/* True if `size1' is non-NULL and PTR is pointing anywhere inside
383169695Skan   `string1' or just past its end.  This works if PTR is NULL, which is
384169695Skan   a good thing.  */
385169695Skan# define FIRST_STRING_P(ptr) 					\
386169695Skan  (size1 && string1 <= (ptr) && (ptr) <= string1 + size1)
387169695Skan
388169695Skan/* (Re)Allocate N items of type T using malloc, or fail.  */
389169695Skan# define TALLOC(n, t) ((t *) malloc ((n) * sizeof (t)))
390169695Skan# define RETALLOC(addr, n, t) ((addr) = (t *) realloc (addr, (n) * sizeof (t)))
391169695Skan# define RETALLOC_IF(addr, n, t) \
392169695Skan  if (addr) RETALLOC((addr), (n), t); else (addr) = TALLOC ((n), t)
393169695Skan# define REGEX_TALLOC(n, t) ((t *) REGEX_ALLOCATE ((n) * sizeof (t)))
394169695Skan
395169695Skan# define BYTEWIDTH 8 /* In bits.  */
396169695Skan
397169695Skan# define STREQ(s1, s2) ((strcmp (s1, s2) == 0))
398169695Skan
399169695Skan# undef MAX
400169695Skan# undef MIN
401169695Skan# define MAX(a, b) ((a) > (b) ? (a) : (b))
402169695Skan# define MIN(a, b) ((a) < (b) ? (a) : (b))
403169695Skan
404169695Skantypedef char boolean;
405169695Skan# define false 0
406169695Skan# define true 1
407169695Skan
408169695Skanstatic reg_errcode_t byte_regex_compile (const char *pattern, size_t size,
409169695Skan                                         reg_syntax_t syntax,
410169695Skan                                         struct re_pattern_buffer *bufp);
411169695Skan
412169695Skanstatic int byte_re_match_2_internal (struct re_pattern_buffer *bufp,
413169695Skan                                     const char *string1, int size1,
414169695Skan                                     const char *string2, int size2,
415169695Skan                                     int pos,
416169695Skan                                     struct re_registers *regs,
417169695Skan                                     int stop);
418169695Skanstatic int byte_re_search_2 (struct re_pattern_buffer *bufp,
419169695Skan                             const char *string1, int size1,
420169695Skan                             const char *string2, int size2,
421169695Skan                             int startpos, int range,
422169695Skan                             struct re_registers *regs, int stop);
423169695Skanstatic int byte_re_compile_fastmap (struct re_pattern_buffer *bufp);
424169695Skan
425169695Skan#ifdef MBS_SUPPORT
426169695Skanstatic reg_errcode_t wcs_regex_compile (const char *pattern, size_t size,
427169695Skan                                        reg_syntax_t syntax,
428169695Skan                                        struct re_pattern_buffer *bufp);
429169695Skan
430169695Skan
431169695Skanstatic int wcs_re_match_2_internal (struct re_pattern_buffer *bufp,
432169695Skan                                    const char *cstring1, int csize1,
433169695Skan                                    const char *cstring2, int csize2,
434169695Skan                                    int pos,
435169695Skan                                    struct re_registers *regs,
436169695Skan                                    int stop,
437169695Skan                                    wchar_t *string1, int size1,
438169695Skan                                    wchar_t *string2, int size2,
439169695Skan                                    int *mbs_offset1, int *mbs_offset2);
440169695Skanstatic int wcs_re_search_2 (struct re_pattern_buffer *bufp,
441169695Skan                            const char *string1, int size1,
442169695Skan                            const char *string2, int size2,
443169695Skan                            int startpos, int range,
444169695Skan                            struct re_registers *regs, int stop);
445169695Skanstatic int wcs_re_compile_fastmap (struct re_pattern_buffer *bufp);
446169695Skan#endif
447169695Skan
448169695Skan/* These are the command codes that appear in compiled regular
449169695Skan   expressions.  Some opcodes are followed by argument bytes.  A
450169695Skan   command code can specify any interpretation whatsoever for its
451169695Skan   arguments.  Zero bytes may appear in the compiled regular expression.  */
452169695Skan
453169695Skantypedef enum
454169695Skan{
455169695Skan  no_op = 0,
456169695Skan
457169695Skan  /* Succeed right away--no more backtracking.  */
458169695Skan  succeed,
459169695Skan
460169695Skan        /* Followed by one byte giving n, then by n literal bytes.  */
461169695Skan  exactn,
462169695Skan
463169695Skan# ifdef MBS_SUPPORT
464169695Skan	/* Same as exactn, but contains binary data.  */
465169695Skan  exactn_bin,
466169695Skan# endif
467169695Skan
468169695Skan        /* Matches any (more or less) character.  */
469169695Skan  anychar,
470169695Skan
471169695Skan        /* Matches any one char belonging to specified set.  First
472169695Skan           following byte is number of bitmap bytes.  Then come bytes
473169695Skan           for a bitmap saying which chars are in.  Bits in each byte
474169695Skan           are ordered low-bit-first.  A character is in the set if its
475169695Skan           bit is 1.  A character too large to have a bit in the map is
476169695Skan           automatically not in the set.  */
477169695Skan        /* ifdef MBS_SUPPORT, following element is length of character
478169695Skan	   classes, length of collating symbols, length of equivalence
479169695Skan	   classes, length of character ranges, and length of characters.
480169695Skan	   Next, character class element, collating symbols elements,
481169695Skan	   equivalence class elements, range elements, and character
482169695Skan	   elements follow.
483169695Skan	   See regex_compile function.  */
484169695Skan  charset,
485169695Skan
486169695Skan        /* Same parameters as charset, but match any character that is
487169695Skan           not one of those specified.  */
488169695Skan  charset_not,
489169695Skan
490169695Skan        /* Start remembering the text that is matched, for storing in a
491169695Skan           register.  Followed by one byte with the register number, in
492169695Skan           the range 0 to one less than the pattern buffer's re_nsub
493169695Skan           field.  Then followed by one byte with the number of groups
494169695Skan           inner to this one.  (This last has to be part of the
495169695Skan           start_memory only because we need it in the on_failure_jump
496169695Skan           of re_match_2.)  */
497169695Skan  start_memory,
498169695Skan
499169695Skan        /* Stop remembering the text that is matched and store it in a
500169695Skan           memory register.  Followed by one byte with the register
501169695Skan           number, in the range 0 to one less than `re_nsub' in the
502169695Skan           pattern buffer, and one byte with the number of inner groups,
503169695Skan           just like `start_memory'.  (We need the number of inner
504169695Skan           groups here because we don't have any easy way of finding the
505169695Skan           corresponding start_memory when we're at a stop_memory.)  */
506169695Skan  stop_memory,
507169695Skan
508169695Skan        /* Match a duplicate of something remembered. Followed by one
509169695Skan           byte containing the register number.  */
510169695Skan  duplicate,
511169695Skan
512169695Skan        /* Fail unless at beginning of line.  */
513169695Skan  begline,
514169695Skan
515169695Skan        /* Fail unless at end of line.  */
516169695Skan  endline,
517169695Skan
518169695Skan        /* Succeeds if at beginning of buffer (if emacs) or at beginning
519169695Skan           of string to be matched (if not).  */
520169695Skan  begbuf,
521169695Skan
522169695Skan        /* Analogously, for end of buffer/string.  */
523169695Skan  endbuf,
524169695Skan
525169695Skan        /* Followed by two byte relative address to which to jump.  */
526169695Skan  jump,
527169695Skan
528169695Skan	/* Same as jump, but marks the end of an alternative.  */
529169695Skan  jump_past_alt,
530169695Skan
531169695Skan        /* Followed by two-byte relative address of place to resume at
532169695Skan           in case of failure.  */
533169695Skan        /* ifdef MBS_SUPPORT, the size of address is 1.  */
534169695Skan  on_failure_jump,
535169695Skan
536169695Skan        /* Like on_failure_jump, but pushes a placeholder instead of the
537169695Skan           current string position when executed.  */
538169695Skan  on_failure_keep_string_jump,
539169695Skan
540169695Skan        /* Throw away latest failure point and then jump to following
541169695Skan           two-byte relative address.  */
542169695Skan        /* ifdef MBS_SUPPORT, the size of address is 1.  */
543169695Skan  pop_failure_jump,
544169695Skan
545169695Skan        /* Change to pop_failure_jump if know won't have to backtrack to
546169695Skan           match; otherwise change to jump.  This is used to jump
547169695Skan           back to the beginning of a repeat.  If what follows this jump
548169695Skan           clearly won't match what the repeat does, such that we can be
549169695Skan           sure that there is no use backtracking out of repetitions
550169695Skan           already matched, then we change it to a pop_failure_jump.
551169695Skan           Followed by two-byte address.  */
552169695Skan        /* ifdef MBS_SUPPORT, the size of address is 1.  */
553169695Skan  maybe_pop_jump,
554169695Skan
555169695Skan        /* Jump to following two-byte address, and push a dummy failure
556169695Skan           point. This failure point will be thrown away if an attempt
557169695Skan           is made to use it for a failure.  A `+' construct makes this
558169695Skan           before the first repeat.  Also used as an intermediary kind
559169695Skan           of jump when compiling an alternative.  */
560169695Skan        /* ifdef MBS_SUPPORT, the size of address is 1.  */
561169695Skan  dummy_failure_jump,
562169695Skan
563169695Skan	/* Push a dummy failure point and continue.  Used at the end of
564169695Skan	   alternatives.  */
565169695Skan  push_dummy_failure,
566169695Skan
567169695Skan        /* Followed by two-byte relative address and two-byte number n.
568169695Skan           After matching N times, jump to the address upon failure.  */
569169695Skan        /* ifdef MBS_SUPPORT, the size of address is 1.  */
570169695Skan  succeed_n,
571169695Skan
572169695Skan        /* Followed by two-byte relative address, and two-byte number n.
573169695Skan           Jump to the address N times, then fail.  */
574169695Skan        /* ifdef MBS_SUPPORT, the size of address is 1.  */
575169695Skan  jump_n,
576169695Skan
577169695Skan        /* Set the following two-byte relative address to the
578169695Skan           subsequent two-byte number.  The address *includes* the two
579169695Skan           bytes of number.  */
580169695Skan        /* ifdef MBS_SUPPORT, the size of address is 1.  */
581169695Skan  set_number_at,
582169695Skan
583169695Skan  wordchar,	/* Matches any word-constituent character.  */
584169695Skan  notwordchar,	/* Matches any char that is not a word-constituent.  */
585169695Skan
586169695Skan  wordbeg,	/* Succeeds if at word beginning.  */
587169695Skan  wordend,	/* Succeeds if at word end.  */
588169695Skan
589169695Skan  wordbound,	/* Succeeds if at a word boundary.  */
590169695Skan  notwordbound	/* Succeeds if not at a word boundary.  */
591169695Skan
592169695Skan# ifdef emacs
593169695Skan  ,before_dot,	/* Succeeds if before point.  */
594169695Skan  at_dot,	/* Succeeds if at point.  */
595169695Skan  after_dot,	/* Succeeds if after point.  */
596169695Skan
597169695Skan	/* Matches any character whose syntax is specified.  Followed by
598169695Skan           a byte which contains a syntax code, e.g., Sword.  */
599169695Skan  syntaxspec,
600169695Skan
601169695Skan	/* Matches any character whose syntax is not that specified.  */
602169695Skan  notsyntaxspec
603169695Skan# endif /* emacs */
604169695Skan} re_opcode_t;
605169695Skan#endif /* not INSIDE_RECURSION */
606169695Skan
607169695Skan
608169695Skan#ifdef BYTE
609169695Skan# define CHAR_T char
610169695Skan# define UCHAR_T unsigned char
611169695Skan# define COMPILED_BUFFER_VAR bufp->buffer
612169695Skan# define OFFSET_ADDRESS_SIZE 2
613169695Skan# define PREFIX(name) byte_##name
614169695Skan# define ARG_PREFIX(name) name
615169695Skan# define PUT_CHAR(c) putchar (c)
616169695Skan#else
617169695Skan# ifdef WCHAR
618169695Skan#  define CHAR_T wchar_t
619169695Skan#  define UCHAR_T wchar_t
620169695Skan#  define COMPILED_BUFFER_VAR wc_buffer
621169695Skan#  define OFFSET_ADDRESS_SIZE 1 /* the size which STORE_NUMBER macro use */
622169695Skan#  define CHAR_CLASS_SIZE ((__alignof__(wctype_t)+sizeof(wctype_t))/sizeof(CHAR_T)+1)
623169695Skan#  define PREFIX(name) wcs_##name
624169695Skan#  define ARG_PREFIX(name) c##name
625169695Skan/* Should we use wide stream??  */
626169695Skan#  define PUT_CHAR(c) printf ("%C", c);
627169695Skan#  define TRUE 1
628169695Skan#  define FALSE 0
629169695Skan# else
630169695Skan#  ifdef MBS_SUPPORT
631169695Skan#   define WCHAR
632169695Skan#   define INSIDE_RECURSION
633169695Skan#   include "regex.c"
634169695Skan#   undef INSIDE_RECURSION
635169695Skan#  endif
636169695Skan#  define BYTE
637169695Skan#  define INSIDE_RECURSION
638169695Skan#  include "regex.c"
639169695Skan#  undef INSIDE_RECURSION
640169695Skan# endif
641169695Skan#endif
642169695Skan
643169695Skan#ifdef INSIDE_RECURSION
644169695Skan/* Common operations on the compiled pattern.  */
645169695Skan
646169695Skan/* Store NUMBER in two contiguous bytes starting at DESTINATION.  */
647169695Skan/* ifdef MBS_SUPPORT, we store NUMBER in 1 element.  */
648169695Skan
649169695Skan# ifdef WCHAR
650169695Skan#  define STORE_NUMBER(destination, number)				\
651169695Skan  do {									\
652169695Skan    *(destination) = (UCHAR_T)(number);				\
653169695Skan  } while (0)
654169695Skan# else /* BYTE */
655169695Skan#  define STORE_NUMBER(destination, number)				\
656169695Skan  do {									\
657169695Skan    (destination)[0] = (number) & 0377;					\
658169695Skan    (destination)[1] = (number) >> 8;					\
659169695Skan  } while (0)
660169695Skan# endif /* WCHAR */
661169695Skan
662169695Skan/* Same as STORE_NUMBER, except increment DESTINATION to
663169695Skan   the byte after where the number is stored.  Therefore, DESTINATION
664169695Skan   must be an lvalue.  */
665169695Skan/* ifdef MBS_SUPPORT, we store NUMBER in 1 element.  */
666169695Skan
667169695Skan# define STORE_NUMBER_AND_INCR(destination, number)			\
668169695Skan  do {									\
669169695Skan    STORE_NUMBER (destination, number);					\
670169695Skan    (destination) += OFFSET_ADDRESS_SIZE;				\
671169695Skan  } while (0)
672169695Skan
673169695Skan/* Put into DESTINATION a number stored in two contiguous bytes starting
674169695Skan   at SOURCE.  */
675169695Skan/* ifdef MBS_SUPPORT, we store NUMBER in 1 element.  */
676169695Skan
677169695Skan# ifdef WCHAR
678169695Skan#  define EXTRACT_NUMBER(destination, source)				\
679169695Skan  do {									\
680169695Skan    (destination) = *(source);						\
681169695Skan  } while (0)
682169695Skan# else /* BYTE */
683169695Skan#  define EXTRACT_NUMBER(destination, source)				\
684169695Skan  do {									\
685169695Skan    (destination) = *(source) & 0377;					\
686169695Skan    (destination) += SIGN_EXTEND_CHAR (*((source) + 1)) << 8;		\
687169695Skan  } while (0)
688169695Skan# endif
689169695Skan
690169695Skan# ifdef DEBUG
691169695Skanstatic void PREFIX(extract_number) (int *dest, UCHAR_T *source);
692169695Skanstatic void
693169695SkanPREFIX(extract_number) (int *dest, UCHAR_T *source)
694169695Skan{
695169695Skan#  ifdef WCHAR
696169695Skan  *dest = *source;
697169695Skan#  else /* BYTE */
698169695Skan  int temp = SIGN_EXTEND_CHAR (*(source + 1));
699169695Skan  *dest = *source & 0377;
700169695Skan  *dest += temp << 8;
701169695Skan#  endif
702169695Skan}
703169695Skan
704169695Skan#  ifndef EXTRACT_MACROS /* To debug the macros.  */
705169695Skan#   undef EXTRACT_NUMBER
706169695Skan#   define EXTRACT_NUMBER(dest, src) PREFIX(extract_number) (&dest, src)
707169695Skan#  endif /* not EXTRACT_MACROS */
708169695Skan
709169695Skan# endif /* DEBUG */
710169695Skan
711169695Skan/* Same as EXTRACT_NUMBER, except increment SOURCE to after the number.
712169695Skan   SOURCE must be an lvalue.  */
713169695Skan
714169695Skan# define EXTRACT_NUMBER_AND_INCR(destination, source)			\
715169695Skan  do {									\
716169695Skan    EXTRACT_NUMBER (destination, source);				\
717169695Skan    (source) += OFFSET_ADDRESS_SIZE; 					\
718169695Skan  } while (0)
719169695Skan
720169695Skan# ifdef DEBUG
721169695Skanstatic void PREFIX(extract_number_and_incr) (int *destination,
722169695Skan                                             UCHAR_T **source);
723169695Skanstatic void
724169695SkanPREFIX(extract_number_and_incr) (int *destination, UCHAR_T **source)
725169695Skan{
726169695Skan  PREFIX(extract_number) (destination, *source);
727169695Skan  *source += OFFSET_ADDRESS_SIZE;
728169695Skan}
729169695Skan
730169695Skan#  ifndef EXTRACT_MACROS
731169695Skan#   undef EXTRACT_NUMBER_AND_INCR
732169695Skan#   define EXTRACT_NUMBER_AND_INCR(dest, src) \
733169695Skan  PREFIX(extract_number_and_incr) (&dest, &src)
734169695Skan#  endif /* not EXTRACT_MACROS */
735169695Skan
736169695Skan# endif /* DEBUG */
737169695Skan
738169695Skan
739169695Skan
740169695Skan/* If DEBUG is defined, Regex prints many voluminous messages about what
741169695Skan   it is doing (if the variable `debug' is nonzero).  If linked with the
742169695Skan   main program in `iregex.c', you can enter patterns and strings
743169695Skan   interactively.  And if linked with the main program in `main.c' and
744169695Skan   the other test files, you can run the already-written tests.  */
745169695Skan
746169695Skan# ifdef DEBUG
747169695Skan
748169695Skan#  ifndef DEFINED_ONCE
749169695Skan
750169695Skan/* We use standard I/O for debugging.  */
751169695Skan#   include <stdio.h>
752169695Skan
753169695Skan/* It is useful to test things that ``must'' be true when debugging.  */
754169695Skan#   include <assert.h>
755169695Skan
756169695Skanstatic int debug;
757169695Skan
758169695Skan#   define DEBUG_STATEMENT(e) e
759169695Skan#   define DEBUG_PRINT1(x) if (debug) printf (x)
760169695Skan#   define DEBUG_PRINT2(x1, x2) if (debug) printf (x1, x2)
761169695Skan#   define DEBUG_PRINT3(x1, x2, x3) if (debug) printf (x1, x2, x3)
762169695Skan#   define DEBUG_PRINT4(x1, x2, x3, x4) if (debug) printf (x1, x2, x3, x4)
763169695Skan#  endif /* not DEFINED_ONCE */
764169695Skan
765169695Skan#  define DEBUG_PRINT_COMPILED_PATTERN(p, s, e) 			\
766169695Skan  if (debug) PREFIX(print_partial_compiled_pattern) (s, e)
767169695Skan#  define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)		\
768169695Skan  if (debug) PREFIX(print_double_string) (w, s1, sz1, s2, sz2)
769169695Skan
770169695Skan
771169695Skan/* Print the fastmap in human-readable form.  */
772169695Skan
773169695Skan#  ifndef DEFINED_ONCE
774169695Skanvoid
775169695Skanprint_fastmap (char *fastmap)
776169695Skan{
777169695Skan  unsigned was_a_range = 0;
778169695Skan  unsigned i = 0;
779169695Skan
780169695Skan  while (i < (1 << BYTEWIDTH))
781169695Skan    {
782169695Skan      if (fastmap[i++])
783169695Skan	{
784169695Skan	  was_a_range = 0;
785169695Skan          putchar (i - 1);
786169695Skan          while (i < (1 << BYTEWIDTH)  &&  fastmap[i])
787169695Skan            {
788169695Skan              was_a_range = 1;
789169695Skan              i++;
790169695Skan            }
791169695Skan	  if (was_a_range)
792169695Skan            {
793169695Skan              printf ("-");
794169695Skan              putchar (i - 1);
795169695Skan            }
796169695Skan        }
797169695Skan    }
798169695Skan  putchar ('\n');
799169695Skan}
800169695Skan#  endif /* not DEFINED_ONCE */
801169695Skan
802169695Skan
803169695Skan/* Print a compiled pattern string in human-readable form, starting at
804169695Skan   the START pointer into it and ending just before the pointer END.  */
805169695Skan
806169695Skanvoid
807169695SkanPREFIX(print_partial_compiled_pattern) (UCHAR_T *start, UCHAR_T *end)
808169695Skan{
809169695Skan  int mcnt, mcnt2;
810169695Skan  UCHAR_T *p1;
811169695Skan  UCHAR_T *p = start;
812169695Skan  UCHAR_T *pend = end;
813169695Skan
814169695Skan  if (start == NULL)
815169695Skan    {
816169695Skan      printf ("(null)\n");
817169695Skan      return;
818169695Skan    }
819169695Skan
820169695Skan  /* Loop over pattern commands.  */
821169695Skan  while (p < pend)
822169695Skan    {
823169695Skan#  ifdef _LIBC
824169695Skan      printf ("%td:\t", p - start);
825169695Skan#  else
826169695Skan      printf ("%ld:\t", (long int) (p - start));
827169695Skan#  endif
828169695Skan
829169695Skan      switch ((re_opcode_t) *p++)
830169695Skan	{
831169695Skan        case no_op:
832169695Skan          printf ("/no_op");
833169695Skan          break;
834169695Skan
835169695Skan	case exactn:
836169695Skan	  mcnt = *p++;
837169695Skan          printf ("/exactn/%d", mcnt);
838169695Skan          do
839169695Skan	    {
840169695Skan              putchar ('/');
841169695Skan	      PUT_CHAR (*p++);
842169695Skan            }
843169695Skan          while (--mcnt);
844169695Skan          break;
845169695Skan
846169695Skan#  ifdef MBS_SUPPORT
847169695Skan	case exactn_bin:
848169695Skan	  mcnt = *p++;
849169695Skan	  printf ("/exactn_bin/%d", mcnt);
850169695Skan          do
851169695Skan	    {
852169695Skan	      printf("/%lx", (long int) *p++);
853169695Skan            }
854169695Skan          while (--mcnt);
855169695Skan          break;
856169695Skan#  endif /* MBS_SUPPORT */
857169695Skan
858169695Skan	case start_memory:
859169695Skan          mcnt = *p++;
860169695Skan          printf ("/start_memory/%d/%ld", mcnt, (long int) *p++);
861169695Skan          break;
862169695Skan
863169695Skan	case stop_memory:
864169695Skan          mcnt = *p++;
865169695Skan	  printf ("/stop_memory/%d/%ld", mcnt, (long int) *p++);
866169695Skan          break;
867169695Skan
868169695Skan	case duplicate:
869169695Skan	  printf ("/duplicate/%ld", (long int) *p++);
870169695Skan	  break;
871169695Skan
872169695Skan	case anychar:
873169695Skan	  printf ("/anychar");
874169695Skan	  break;
875169695Skan
876169695Skan	case charset:
877169695Skan        case charset_not:
878169695Skan          {
879169695Skan#  ifdef WCHAR
880169695Skan	    int i, length;
881169695Skan	    wchar_t *workp = p;
882169695Skan	    printf ("/charset [%s",
883169695Skan	            (re_opcode_t) *(workp - 1) == charset_not ? "^" : "");
884169695Skan	    p += 5;
885169695Skan	    length = *workp++; /* the length of char_classes */
886169695Skan	    for (i=0 ; i<length ; i++)
887169695Skan	      printf("[:%lx:]", (long int) *p++);
888169695Skan	    length = *workp++; /* the length of collating_symbol */
889169695Skan	    for (i=0 ; i<length ;)
890169695Skan	      {
891169695Skan		printf("[.");
892169695Skan		while(*p != 0)
893169695Skan		  PUT_CHAR((i++,*p++));
894169695Skan		i++,p++;
895169695Skan		printf(".]");
896169695Skan	      }
897169695Skan	    length = *workp++; /* the length of equivalence_class */
898169695Skan	    for (i=0 ; i<length ;)
899169695Skan	      {
900169695Skan		printf("[=");
901169695Skan		while(*p != 0)
902169695Skan		  PUT_CHAR((i++,*p++));
903169695Skan		i++,p++;
904169695Skan		printf("=]");
905169695Skan	      }
906169695Skan	    length = *workp++; /* the length of char_range */
907169695Skan	    for (i=0 ; i<length ; i++)
908169695Skan	      {
909169695Skan		wchar_t range_start = *p++;
910169695Skan		wchar_t range_end = *p++;
911169695Skan		printf("%C-%C", range_start, range_end);
912169695Skan	      }
913169695Skan	    length = *workp++; /* the length of char */
914169695Skan	    for (i=0 ; i<length ; i++)
915169695Skan	      printf("%C", *p++);
916169695Skan	    putchar (']');
917169695Skan#  else
918169695Skan            register int c, last = -100;
919169695Skan	    register int in_range = 0;
920169695Skan
921169695Skan	    printf ("/charset [%s",
922169695Skan	            (re_opcode_t) *(p - 1) == charset_not ? "^" : "");
923169695Skan
924169695Skan            assert (p + *p < pend);
925169695Skan
926169695Skan            for (c = 0; c < 256; c++)
927169695Skan	      if (c / 8 < *p
928169695Skan		  && (p[1 + (c/8)] & (1 << (c % 8))))
929169695Skan		{
930169695Skan		  /* Are we starting a range?  */
931169695Skan		  if (last + 1 == c && ! in_range)
932169695Skan		    {
933169695Skan		      putchar ('-');
934169695Skan		      in_range = 1;
935169695Skan		    }
936169695Skan		  /* Have we broken a range?  */
937169695Skan		  else if (last + 1 != c && in_range)
938169695Skan              {
939169695Skan		      putchar (last);
940169695Skan		      in_range = 0;
941169695Skan		    }
942169695Skan
943169695Skan		  if (! in_range)
944169695Skan		    putchar (c);
945169695Skan
946169695Skan		  last = c;
947169695Skan              }
948169695Skan
949169695Skan	    if (in_range)
950169695Skan	      putchar (last);
951169695Skan
952169695Skan	    putchar (']');
953169695Skan
954169695Skan	    p += 1 + *p;
955169695Skan#  endif /* WCHAR */
956169695Skan	  }
957169695Skan	  break;
958169695Skan
959169695Skan	case begline:
960169695Skan	  printf ("/begline");
961169695Skan          break;
962169695Skan
963169695Skan	case endline:
964169695Skan          printf ("/endline");
965169695Skan          break;
966169695Skan
967169695Skan	case on_failure_jump:
968169695Skan          PREFIX(extract_number_and_incr) (&mcnt, &p);
969169695Skan#  ifdef _LIBC
970169695Skan  	  printf ("/on_failure_jump to %td", p + mcnt - start);
971169695Skan#  else
972169695Skan  	  printf ("/on_failure_jump to %ld", (long int) (p + mcnt - start));
973169695Skan#  endif
974169695Skan          break;
975169695Skan
976169695Skan	case on_failure_keep_string_jump:
977169695Skan          PREFIX(extract_number_and_incr) (&mcnt, &p);
978169695Skan#  ifdef _LIBC
979169695Skan  	  printf ("/on_failure_keep_string_jump to %td", p + mcnt - start);
980169695Skan#  else
981169695Skan  	  printf ("/on_failure_keep_string_jump to %ld",
982169695Skan		  (long int) (p + mcnt - start));
983169695Skan#  endif
984169695Skan          break;
985169695Skan
986169695Skan	case dummy_failure_jump:
987169695Skan          PREFIX(extract_number_and_incr) (&mcnt, &p);
988169695Skan#  ifdef _LIBC
989169695Skan  	  printf ("/dummy_failure_jump to %td", p + mcnt - start);
990169695Skan#  else
991169695Skan  	  printf ("/dummy_failure_jump to %ld", (long int) (p + mcnt - start));
992169695Skan#  endif
993169695Skan          break;
994169695Skan
995169695Skan	case push_dummy_failure:
996169695Skan          printf ("/push_dummy_failure");
997169695Skan          break;
998169695Skan
999169695Skan        case maybe_pop_jump:
1000169695Skan          PREFIX(extract_number_and_incr) (&mcnt, &p);
1001169695Skan#  ifdef _LIBC
1002169695Skan  	  printf ("/maybe_pop_jump to %td", p + mcnt - start);
1003169695Skan#  else
1004169695Skan  	  printf ("/maybe_pop_jump to %ld", (long int) (p + mcnt - start));
1005169695Skan#  endif
1006169695Skan	  break;
1007169695Skan
1008169695Skan        case pop_failure_jump:
1009169695Skan	  PREFIX(extract_number_and_incr) (&mcnt, &p);
1010169695Skan#  ifdef _LIBC
1011169695Skan  	  printf ("/pop_failure_jump to %td", p + mcnt - start);
1012169695Skan#  else
1013169695Skan  	  printf ("/pop_failure_jump to %ld", (long int) (p + mcnt - start));
1014169695Skan#  endif
1015169695Skan	  break;
1016169695Skan
1017169695Skan        case jump_past_alt:
1018169695Skan	  PREFIX(extract_number_and_incr) (&mcnt, &p);
1019169695Skan#  ifdef _LIBC
1020169695Skan  	  printf ("/jump_past_alt to %td", p + mcnt - start);
1021169695Skan#  else
1022169695Skan  	  printf ("/jump_past_alt to %ld", (long int) (p + mcnt - start));
1023169695Skan#  endif
1024169695Skan	  break;
1025169695Skan
1026169695Skan        case jump:
1027169695Skan	  PREFIX(extract_number_and_incr) (&mcnt, &p);
1028169695Skan#  ifdef _LIBC
1029169695Skan  	  printf ("/jump to %td", p + mcnt - start);
1030169695Skan#  else
1031169695Skan  	  printf ("/jump to %ld", (long int) (p + mcnt - start));
1032169695Skan#  endif
1033169695Skan	  break;
1034169695Skan
1035169695Skan        case succeed_n:
1036169695Skan          PREFIX(extract_number_and_incr) (&mcnt, &p);
1037169695Skan	  p1 = p + mcnt;
1038169695Skan          PREFIX(extract_number_and_incr) (&mcnt2, &p);
1039169695Skan#  ifdef _LIBC
1040169695Skan	  printf ("/succeed_n to %td, %d times", p1 - start, mcnt2);
1041169695Skan#  else
1042169695Skan	  printf ("/succeed_n to %ld, %d times",
1043169695Skan		  (long int) (p1 - start), mcnt2);
1044169695Skan#  endif
1045169695Skan          break;
1046169695Skan
1047169695Skan        case jump_n:
1048169695Skan          PREFIX(extract_number_and_incr) (&mcnt, &p);
1049169695Skan	  p1 = p + mcnt;
1050169695Skan          PREFIX(extract_number_and_incr) (&mcnt2, &p);
1051169695Skan	  printf ("/jump_n to %d, %d times", p1 - start, mcnt2);
1052169695Skan          break;
1053169695Skan
1054169695Skan        case set_number_at:
1055169695Skan          PREFIX(extract_number_and_incr) (&mcnt, &p);
1056169695Skan	  p1 = p + mcnt;
1057169695Skan          PREFIX(extract_number_and_incr) (&mcnt2, &p);
1058169695Skan#  ifdef _LIBC
1059169695Skan	  printf ("/set_number_at location %td to %d", p1 - start, mcnt2);
1060169695Skan#  else
1061169695Skan	  printf ("/set_number_at location %ld to %d",
1062169695Skan		  (long int) (p1 - start), mcnt2);
1063169695Skan#  endif
1064169695Skan          break;
1065169695Skan
1066169695Skan        case wordbound:
1067169695Skan	  printf ("/wordbound");
1068169695Skan	  break;
1069169695Skan
1070169695Skan	case notwordbound:
1071169695Skan	  printf ("/notwordbound");
1072169695Skan          break;
1073169695Skan
1074169695Skan	case wordbeg:
1075169695Skan	  printf ("/wordbeg");
1076169695Skan	  break;
1077169695Skan
1078169695Skan	case wordend:
1079169695Skan	  printf ("/wordend");
1080169695Skan	  break;
1081169695Skan
1082169695Skan#  ifdef emacs
1083169695Skan	case before_dot:
1084169695Skan	  printf ("/before_dot");
1085169695Skan          break;
1086169695Skan
1087169695Skan	case at_dot:
1088169695Skan	  printf ("/at_dot");
1089169695Skan          break;
1090169695Skan
1091169695Skan	case after_dot:
1092169695Skan	  printf ("/after_dot");
1093169695Skan          break;
1094169695Skan
1095169695Skan	case syntaxspec:
1096169695Skan          printf ("/syntaxspec");
1097169695Skan	  mcnt = *p++;
1098169695Skan	  printf ("/%d", mcnt);
1099169695Skan          break;
1100169695Skan
1101169695Skan	case notsyntaxspec:
1102169695Skan          printf ("/notsyntaxspec");
1103169695Skan	  mcnt = *p++;
1104169695Skan	  printf ("/%d", mcnt);
1105169695Skan	  break;
1106169695Skan#  endif /* emacs */
1107169695Skan
1108169695Skan	case wordchar:
1109169695Skan	  printf ("/wordchar");
1110169695Skan          break;
1111169695Skan
1112169695Skan	case notwordchar:
1113169695Skan	  printf ("/notwordchar");
1114169695Skan          break;
1115169695Skan
1116169695Skan	case begbuf:
1117169695Skan	  printf ("/begbuf");
1118169695Skan          break;
1119169695Skan
1120169695Skan	case endbuf:
1121169695Skan	  printf ("/endbuf");
1122169695Skan          break;
1123169695Skan
1124169695Skan        default:
1125169695Skan          printf ("?%ld", (long int) *(p-1));
1126169695Skan	}
1127169695Skan
1128169695Skan      putchar ('\n');
1129169695Skan    }
1130169695Skan
1131169695Skan#  ifdef _LIBC
1132169695Skan  printf ("%td:\tend of pattern.\n", p - start);
1133169695Skan#  else
1134169695Skan  printf ("%ld:\tend of pattern.\n", (long int) (p - start));
1135169695Skan#  endif
1136169695Skan}
1137169695Skan
1138169695Skan
1139169695Skanvoid
1140169695SkanPREFIX(print_compiled_pattern) (struct re_pattern_buffer *bufp)
1141169695Skan{
1142169695Skan  UCHAR_T *buffer = (UCHAR_T*) bufp->buffer;
1143169695Skan
1144169695Skan  PREFIX(print_partial_compiled_pattern) (buffer, buffer
1145169695Skan				  + bufp->used / sizeof(UCHAR_T));
1146169695Skan  printf ("%ld bytes used/%ld bytes allocated.\n",
1147169695Skan	  bufp->used, bufp->allocated);
1148169695Skan
1149169695Skan  if (bufp->fastmap_accurate && bufp->fastmap)
1150169695Skan    {
1151169695Skan      printf ("fastmap: ");
1152169695Skan      print_fastmap (bufp->fastmap);
1153169695Skan    }
1154169695Skan
1155169695Skan#  ifdef _LIBC
1156169695Skan  printf ("re_nsub: %Zd\t", bufp->re_nsub);
1157169695Skan#  else
1158169695Skan  printf ("re_nsub: %ld\t", (long int) bufp->re_nsub);
1159169695Skan#  endif
1160169695Skan  printf ("regs_alloc: %d\t", bufp->regs_allocated);
1161169695Skan  printf ("can_be_null: %d\t", bufp->can_be_null);
1162169695Skan  printf ("newline_anchor: %d\n", bufp->newline_anchor);
1163169695Skan  printf ("no_sub: %d\t", bufp->no_sub);
1164169695Skan  printf ("not_bol: %d\t", bufp->not_bol);
1165169695Skan  printf ("not_eol: %d\t", bufp->not_eol);
1166169695Skan  printf ("syntax: %lx\n", bufp->syntax);
1167169695Skan  /* Perhaps we should print the translate table?  */
1168169695Skan}
1169169695Skan
1170169695Skan
1171169695Skanvoid
1172169695SkanPREFIX(print_double_string) (const CHAR_T *where, const CHAR_T *string1,
1173169695Skan                             int size1, const CHAR_T *string2, int size2)
1174169695Skan{
1175169695Skan  int this_char;
1176169695Skan
1177169695Skan  if (where == NULL)
1178169695Skan    printf ("(null)");
1179169695Skan  else
1180169695Skan    {
1181169695Skan      int cnt;
1182169695Skan
1183169695Skan      if (FIRST_STRING_P (where))
1184169695Skan        {
1185169695Skan          for (this_char = where - string1; this_char < size1; this_char++)
1186169695Skan	    PUT_CHAR (string1[this_char]);
1187169695Skan
1188169695Skan          where = string2;
1189169695Skan        }
1190169695Skan
1191169695Skan      cnt = 0;
1192169695Skan      for (this_char = where - string2; this_char < size2; this_char++)
1193169695Skan	{
1194169695Skan	  PUT_CHAR (string2[this_char]);
1195169695Skan	  if (++cnt > 100)
1196169695Skan	    {
1197169695Skan	      fputs ("...", stdout);
1198169695Skan	      break;
1199169695Skan	    }
1200169695Skan	}
1201169695Skan    }
1202169695Skan}
1203169695Skan
1204169695Skan#  ifndef DEFINED_ONCE
1205169695Skanvoid
1206169695Skanprintchar (int c)
1207169695Skan{
1208169695Skan  putc (c, stderr);
1209169695Skan}
1210169695Skan#  endif
1211169695Skan
1212169695Skan# else /* not DEBUG */
1213169695Skan
1214169695Skan#  ifndef DEFINED_ONCE
1215169695Skan#   undef assert
1216169695Skan#   define assert(e)
1217169695Skan
1218169695Skan#   define DEBUG_STATEMENT(e)
1219169695Skan#   define DEBUG_PRINT1(x)
1220169695Skan#   define DEBUG_PRINT2(x1, x2)
1221169695Skan#   define DEBUG_PRINT3(x1, x2, x3)
1222169695Skan#   define DEBUG_PRINT4(x1, x2, x3, x4)
1223169695Skan#  endif /* not DEFINED_ONCE */
1224169695Skan#  define DEBUG_PRINT_COMPILED_PATTERN(p, s, e)
1225169695Skan#  define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)
1226169695Skan
1227169695Skan# endif /* not DEBUG */
1228169695Skan
1229169695Skan
1230169695Skan
1231169695Skan# ifdef WCHAR
1232169695Skan/* This  convert a multibyte string to a wide character string.
1233169695Skan   And write their correspondances to offset_buffer(see below)
1234169695Skan   and write whether each wchar_t is binary data to is_binary.
1235169695Skan   This assume invalid multibyte sequences as binary data.
1236169695Skan   We assume offset_buffer and is_binary is already allocated
1237169695Skan   enough space.  */
1238169695Skan
1239169695Skanstatic size_t convert_mbs_to_wcs (CHAR_T *dest, const unsigned char* src,
1240169695Skan				  size_t len, int *offset_buffer,
1241169695Skan				  char *is_binary);
1242169695Skanstatic size_t
1243169695Skanconvert_mbs_to_wcs (CHAR_T *dest, const unsigned char*src, size_t len,
1244169695Skan                    int *offset_buffer, char *is_binary)
1245169695Skan     /* It hold correspondances between src(char string) and
1246169695Skan	dest(wchar_t string) for optimization.
1247169695Skan	e.g. src  = "xxxyzz"
1248169695Skan             dest = {'X', 'Y', 'Z'}
1249169695Skan	      (each "xxx", "y" and "zz" represent one multibyte character
1250169695Skan	       corresponding to 'X', 'Y' and 'Z'.)
1251169695Skan	  offset_buffer = {0, 0+3("xxx"), 0+3+1("y"), 0+3+1+2("zz")}
1252169695Skan	  	        = {0, 3, 4, 6}
1253169695Skan     */
1254169695Skan{
1255169695Skan  wchar_t *pdest = dest;
1256169695Skan  const unsigned char *psrc = src;
1257169695Skan  size_t wc_count = 0;
1258169695Skan
1259169695Skan  mbstate_t mbs;
1260169695Skan  int i, consumed;
1261169695Skan  size_t mb_remain = len;
1262169695Skan  size_t mb_count = 0;
1263169695Skan
1264169695Skan  /* Initialize the conversion state.  */
1265169695Skan  memset (&mbs, 0, sizeof (mbstate_t));
1266169695Skan
1267169695Skan  offset_buffer[0] = 0;
1268169695Skan  for( ; mb_remain > 0 ; ++wc_count, ++pdest, mb_remain -= consumed,
1269169695Skan	 psrc += consumed)
1270169695Skan    {
1271169695Skan#ifdef _LIBC
1272169695Skan      consumed = __mbrtowc (pdest, psrc, mb_remain, &mbs);
1273169695Skan#else
1274169695Skan      consumed = mbrtowc (pdest, psrc, mb_remain, &mbs);
1275169695Skan#endif
1276169695Skan
1277169695Skan      if (consumed <= 0)
1278169695Skan	/* failed to convert. maybe src contains binary data.
1279169695Skan	   So we consume 1 byte manualy.  */
1280169695Skan	{
1281169695Skan	  *pdest = *psrc;
1282169695Skan	  consumed = 1;
1283169695Skan	  is_binary[wc_count] = TRUE;
1284169695Skan	}
1285169695Skan      else
1286169695Skan	is_binary[wc_count] = FALSE;
1287169695Skan      /* In sjis encoding, we use yen sign as escape character in
1288169695Skan	 place of reverse solidus. So we convert 0x5c(yen sign in
1289169695Skan	 sjis) to not 0xa5(yen sign in UCS2) but 0x5c(reverse
1290169695Skan	 solidus in UCS2).  */
1291169695Skan      if (consumed == 1 && (int) *psrc == 0x5c && (int) *pdest == 0xa5)
1292169695Skan	*pdest = (wchar_t) *psrc;
1293169695Skan
1294169695Skan      offset_buffer[wc_count + 1] = mb_count += consumed;
1295169695Skan    }
1296169695Skan
1297169695Skan  /* Fill remain of the buffer with sentinel.  */
1298169695Skan  for (i = wc_count + 1 ; i <= len ; i++)
1299169695Skan    offset_buffer[i] = mb_count + 1;
1300169695Skan
1301169695Skan  return wc_count;
1302169695Skan}
1303169695Skan
1304169695Skan# endif /* WCHAR */
1305169695Skan
1306169695Skan#else /* not INSIDE_RECURSION */
1307169695Skan
1308169695Skan/* Set by `re_set_syntax' to the current regexp syntax to recognize.  Can
1309169695Skan   also be assigned to arbitrarily: each pattern buffer stores its own
1310169695Skan   syntax, so it can be changed between regex compilations.  */
1311169695Skan/* This has no initializer because initialized variables in Emacs
1312169695Skan   become read-only after dumping.  */
1313169695Skanreg_syntax_t re_syntax_options;
1314169695Skan
1315169695Skan
1316169695Skan/* Specify the precise syntax of regexps for compilation.  This provides
1317169695Skan   for compatibility for various utilities which historically have
1318169695Skan   different, incompatible syntaxes.
1319169695Skan
1320169695Skan   The argument SYNTAX is a bit mask comprised of the various bits
1321169695Skan   defined in regex.h.  We return the old syntax.  */
1322169695Skan
1323169695Skanreg_syntax_t
1324169695Skanre_set_syntax (reg_syntax_t syntax)
1325169695Skan{
1326169695Skan  reg_syntax_t ret = re_syntax_options;
1327169695Skan
1328169695Skan  re_syntax_options = syntax;
1329169695Skan# ifdef DEBUG
1330169695Skan  if (syntax & RE_DEBUG)
1331169695Skan    debug = 1;
1332169695Skan  else if (debug) /* was on but now is not */
1333169695Skan    debug = 0;
1334169695Skan# endif /* DEBUG */
1335169695Skan  return ret;
1336169695Skan}
1337169695Skan# ifdef _LIBC
1338169695Skanweak_alias (__re_set_syntax, re_set_syntax)
1339169695Skan# endif
1340169695Skan
1341169695Skan/* This table gives an error message for each of the error codes listed
1342169695Skan   in regex.h.  Obviously the order here has to be same as there.
1343169695Skan   POSIX doesn't require that we do anything for REG_NOERROR,
1344169695Skan   but why not be nice?  */
1345169695Skan
1346169695Skanstatic const char *re_error_msgid[] =
1347169695Skan  {
1348169695Skan    gettext_noop ("Success"),	/* REG_NOERROR */
1349169695Skan    gettext_noop ("No match"),	/* REG_NOMATCH */
1350169695Skan    gettext_noop ("Invalid regular expression"), /* REG_BADPAT */
1351169695Skan    gettext_noop ("Invalid collation character"), /* REG_ECOLLATE */
1352169695Skan    gettext_noop ("Invalid character class name"), /* REG_ECTYPE */
1353169695Skan    gettext_noop ("Trailing backslash"), /* REG_EESCAPE */
1354169695Skan    gettext_noop ("Invalid back reference"), /* REG_ESUBREG */
1355169695Skan    gettext_noop ("Unmatched [ or [^"),	/* REG_EBRACK */
1356169695Skan    gettext_noop ("Unmatched ( or \\("), /* REG_EPAREN */
1357169695Skan    gettext_noop ("Unmatched \\{"), /* REG_EBRACE */
1358169695Skan    gettext_noop ("Invalid content of \\{\\}"), /* REG_BADBR */
1359169695Skan    gettext_noop ("Invalid range end"),	/* REG_ERANGE */
1360169695Skan    gettext_noop ("Memory exhausted"), /* REG_ESPACE */
1361169695Skan    gettext_noop ("Invalid preceding regular expression"), /* REG_BADRPT */
1362169695Skan    gettext_noop ("Premature end of regular expression"), /* REG_EEND */
1363169695Skan    gettext_noop ("Regular expression too big"), /* REG_ESIZE */
1364169695Skan    gettext_noop ("Unmatched ) or \\)") /* REG_ERPAREN */
1365169695Skan  };
1366169695Skan
1367169695Skan#endif /* INSIDE_RECURSION */
1368169695Skan
1369169695Skan#ifndef DEFINED_ONCE
1370169695Skan/* Avoiding alloca during matching, to placate r_alloc.  */
1371169695Skan
1372169695Skan/* Define MATCH_MAY_ALLOCATE unless we need to make sure that the
1373169695Skan   searching and matching functions should not call alloca.  On some
1374169695Skan   systems, alloca is implemented in terms of malloc, and if we're
1375169695Skan   using the relocating allocator routines, then malloc could cause a
1376169695Skan   relocation, which might (if the strings being searched are in the
1377169695Skan   ralloc heap) shift the data out from underneath the regexp
1378169695Skan   routines.
1379169695Skan
1380169695Skan   Here's another reason to avoid allocation: Emacs
1381169695Skan   processes input from X in a signal handler; processing X input may
1382169695Skan   call malloc; if input arrives while a matching routine is calling
1383169695Skan   malloc, then we're scrod.  But Emacs can't just block input while
1384169695Skan   calling matching routines; then we don't notice interrupts when
1385169695Skan   they come in.  So, Emacs blocks input around all regexp calls
1386169695Skan   except the matching calls, which it leaves unprotected, in the
1387169695Skan   faith that they will not malloc.  */
1388169695Skan
1389169695Skan/* Normally, this is fine.  */
1390169695Skan# define MATCH_MAY_ALLOCATE
1391169695Skan
1392169695Skan/* When using GNU C, we are not REALLY using the C alloca, no matter
1393169695Skan   what config.h may say.  So don't take precautions for it.  */
1394169695Skan# ifdef __GNUC__
1395169695Skan#  undef C_ALLOCA
1396169695Skan# endif
1397169695Skan
1398169695Skan/* The match routines may not allocate if (1) they would do it with malloc
1399169695Skan   and (2) it's not safe for them to use malloc.
1400169695Skan   Note that if REL_ALLOC is defined, matching would not use malloc for the
1401169695Skan   failure stack, but we would still use it for the register vectors;
1402169695Skan   so REL_ALLOC should not affect this.  */
1403169695Skan# if (defined C_ALLOCA || defined REGEX_MALLOC) && defined emacs
1404169695Skan#  undef MATCH_MAY_ALLOCATE
1405169695Skan# endif
1406169695Skan#endif /* not DEFINED_ONCE */
1407169695Skan
1408169695Skan#ifdef INSIDE_RECURSION
1409169695Skan/* Failure stack declarations and macros; both re_compile_fastmap and
1410169695Skan   re_match_2 use a failure stack.  These have to be macros because of
1411169695Skan   REGEX_ALLOCATE_STACK.  */
1412169695Skan
1413169695Skan
1414169695Skan/* Number of failure points for which to initially allocate space
1415169695Skan   when matching.  If this number is exceeded, we allocate more
1416169695Skan   space, so it is not a hard limit.  */
1417169695Skan# ifndef INIT_FAILURE_ALLOC
1418169695Skan#  define INIT_FAILURE_ALLOC 5
1419169695Skan# endif
1420169695Skan
1421169695Skan/* Roughly the maximum number of failure points on the stack.  Would be
1422169695Skan   exactly that if always used MAX_FAILURE_ITEMS items each time we failed.
1423169695Skan   This is a variable only so users of regex can assign to it; we never
1424169695Skan   change it ourselves.  */
1425169695Skan
1426169695Skan# ifdef INT_IS_16BIT
1427169695Skan
1428169695Skan#  ifndef DEFINED_ONCE
1429169695Skan#   if defined MATCH_MAY_ALLOCATE
1430169695Skan/* 4400 was enough to cause a crash on Alpha OSF/1,
1431169695Skan   whose default stack limit is 2mb.  */
1432169695Skanlong int re_max_failures = 4000;
1433169695Skan#   else
1434169695Skanlong int re_max_failures = 2000;
1435169695Skan#   endif
1436169695Skan#  endif
1437169695Skan
1438169695Skanunion PREFIX(fail_stack_elt)
1439169695Skan{
1440169695Skan  UCHAR_T *pointer;
1441169695Skan  long int integer;
1442169695Skan};
1443169695Skan
1444169695Skantypedef union PREFIX(fail_stack_elt) PREFIX(fail_stack_elt_t);
1445169695Skan
1446169695Skantypedef struct
1447169695Skan{
1448169695Skan  PREFIX(fail_stack_elt_t) *stack;
1449169695Skan  unsigned long int size;
1450169695Skan  unsigned long int avail;		/* Offset of next open position.  */
1451169695Skan} PREFIX(fail_stack_type);
1452169695Skan
1453169695Skan# else /* not INT_IS_16BIT */
1454169695Skan
1455169695Skan#  ifndef DEFINED_ONCE
1456169695Skan#   if defined MATCH_MAY_ALLOCATE
1457169695Skan/* 4400 was enough to cause a crash on Alpha OSF/1,
1458169695Skan   whose default stack limit is 2mb.  */
1459169695Skanint re_max_failures = 4000;
1460169695Skan#   else
1461169695Skanint re_max_failures = 2000;
1462169695Skan#   endif
1463169695Skan#  endif
1464169695Skan
1465169695Skanunion PREFIX(fail_stack_elt)
1466169695Skan{
1467169695Skan  UCHAR_T *pointer;
1468169695Skan  int integer;
1469169695Skan};
1470169695Skan
1471169695Skantypedef union PREFIX(fail_stack_elt) PREFIX(fail_stack_elt_t);
1472169695Skan
1473169695Skantypedef struct
1474169695Skan{
1475169695Skan  PREFIX(fail_stack_elt_t) *stack;
1476169695Skan  unsigned size;
1477169695Skan  unsigned avail;			/* Offset of next open position.  */
1478169695Skan} PREFIX(fail_stack_type);
1479169695Skan
1480169695Skan# endif /* INT_IS_16BIT */
1481169695Skan
1482169695Skan# ifndef DEFINED_ONCE
1483169695Skan#  define FAIL_STACK_EMPTY()     (fail_stack.avail == 0)
1484169695Skan#  define FAIL_STACK_PTR_EMPTY() (fail_stack_ptr->avail == 0)
1485169695Skan#  define FAIL_STACK_FULL()      (fail_stack.avail == fail_stack.size)
1486169695Skan# endif
1487169695Skan
1488169695Skan
1489169695Skan/* Define macros to initialize and free the failure stack.
1490169695Skan   Do `return -2' if the alloc fails.  */
1491169695Skan
1492169695Skan# ifdef MATCH_MAY_ALLOCATE
1493169695Skan#  define INIT_FAIL_STACK()						\
1494169695Skan  do {									\
1495169695Skan    fail_stack.stack = (PREFIX(fail_stack_elt_t) *)		\
1496169695Skan      REGEX_ALLOCATE_STACK (INIT_FAILURE_ALLOC * sizeof (PREFIX(fail_stack_elt_t))); \
1497169695Skan									\
1498169695Skan    if (fail_stack.stack == NULL)				\
1499169695Skan      return -2;							\
1500169695Skan									\
1501169695Skan    fail_stack.size = INIT_FAILURE_ALLOC;			\
1502169695Skan    fail_stack.avail = 0;					\
1503169695Skan  } while (0)
1504169695Skan
1505169695Skan#  define RESET_FAIL_STACK()  REGEX_FREE_STACK (fail_stack.stack)
1506169695Skan# else
1507169695Skan#  define INIT_FAIL_STACK()						\
1508169695Skan  do {									\
1509169695Skan    fail_stack.avail = 0;					\
1510169695Skan  } while (0)
1511169695Skan
1512169695Skan#  define RESET_FAIL_STACK()
1513169695Skan# endif
1514169695Skan
1515169695Skan
1516169695Skan/* Double the size of FAIL_STACK, up to approximately `re_max_failures' items.
1517169695Skan
1518169695Skan   Return 1 if succeeds, and 0 if either ran out of memory
1519169695Skan   allocating space for it or it was already too large.
1520169695Skan
1521169695Skan   REGEX_REALLOCATE_STACK requires `destination' be declared.   */
1522169695Skan
1523169695Skan# define DOUBLE_FAIL_STACK(fail_stack)					\
1524169695Skan  ((fail_stack).size > (unsigned) (re_max_failures * MAX_FAILURE_ITEMS)	\
1525169695Skan   ? 0									\
1526169695Skan   : ((fail_stack).stack = (PREFIX(fail_stack_elt_t) *)			\
1527169695Skan        REGEX_REALLOCATE_STACK ((fail_stack).stack, 			\
1528169695Skan          (fail_stack).size * sizeof (PREFIX(fail_stack_elt_t)),	\
1529169695Skan          ((fail_stack).size << 1) * sizeof (PREFIX(fail_stack_elt_t))),\
1530169695Skan									\
1531169695Skan      (fail_stack).stack == NULL					\
1532169695Skan      ? 0								\
1533169695Skan      : ((fail_stack).size <<= 1, 					\
1534169695Skan         1)))
1535169695Skan
1536169695Skan
1537169695Skan/* Push pointer POINTER on FAIL_STACK.
1538169695Skan   Return 1 if was able to do so and 0 if ran out of memory allocating
1539169695Skan   space to do so.  */
1540169695Skan# define PUSH_PATTERN_OP(POINTER, FAIL_STACK)				\
1541169695Skan  ((FAIL_STACK_FULL ()							\
1542169695Skan    && !DOUBLE_FAIL_STACK (FAIL_STACK))					\
1543169695Skan   ? 0									\
1544169695Skan   : ((FAIL_STACK).stack[(FAIL_STACK).avail++].pointer = POINTER,	\
1545169695Skan      1))
1546169695Skan
1547169695Skan/* Push a pointer value onto the failure stack.
1548169695Skan   Assumes the variable `fail_stack'.  Probably should only
1549169695Skan   be called from within `PUSH_FAILURE_POINT'.  */
1550169695Skan# define PUSH_FAILURE_POINTER(item)					\
1551169695Skan  fail_stack.stack[fail_stack.avail++].pointer = (UCHAR_T *) (item)
1552169695Skan
1553169695Skan/* This pushes an integer-valued item onto the failure stack.
1554169695Skan   Assumes the variable `fail_stack'.  Probably should only
1555169695Skan   be called from within `PUSH_FAILURE_POINT'.  */
1556169695Skan# define PUSH_FAILURE_INT(item)					\
1557169695Skan  fail_stack.stack[fail_stack.avail++].integer = (item)
1558169695Skan
1559169695Skan/* Push a fail_stack_elt_t value onto the failure stack.
1560169695Skan   Assumes the variable `fail_stack'.  Probably should only
1561169695Skan   be called from within `PUSH_FAILURE_POINT'.  */
1562169695Skan# define PUSH_FAILURE_ELT(item)					\
1563169695Skan  fail_stack.stack[fail_stack.avail++] =  (item)
1564169695Skan
1565169695Skan/* These three POP... operations complement the three PUSH... operations.
1566169695Skan   All assume that `fail_stack' is nonempty.  */
1567169695Skan# define POP_FAILURE_POINTER() fail_stack.stack[--fail_stack.avail].pointer
1568169695Skan# define POP_FAILURE_INT() fail_stack.stack[--fail_stack.avail].integer
1569169695Skan# define POP_FAILURE_ELT() fail_stack.stack[--fail_stack.avail]
1570169695Skan
1571169695Skan/* Used to omit pushing failure point id's when we're not debugging.  */
1572169695Skan# ifdef DEBUG
1573169695Skan#  define DEBUG_PUSH PUSH_FAILURE_INT
1574169695Skan#  define DEBUG_POP(item_addr) *(item_addr) = POP_FAILURE_INT ()
1575169695Skan# else
1576169695Skan#  define DEBUG_PUSH(item)
1577169695Skan#  define DEBUG_POP(item_addr)
1578169695Skan# endif
1579169695Skan
1580169695Skan
1581169695Skan/* Push the information about the state we will need
1582169695Skan   if we ever fail back to it.
1583169695Skan
1584169695Skan   Requires variables fail_stack, regstart, regend, reg_info, and
1585169695Skan   num_regs_pushed be declared.  DOUBLE_FAIL_STACK requires `destination'
1586169695Skan   be declared.
1587169695Skan
1588169695Skan   Does `return FAILURE_CODE' if runs out of memory.  */
1589169695Skan
1590169695Skan# define PUSH_FAILURE_POINT(pattern_place, string_place, failure_code)	\
1591169695Skan  do {									\
1592169695Skan    char *destination;							\
1593169695Skan    /* Must be int, so when we don't save any registers, the arithmetic	\
1594169695Skan       of 0 + -1 isn't done as unsigned.  */				\
1595169695Skan    /* Can't be int, since there is not a shred of a guarantee that int	\
1596169695Skan       is wide enough to hold a value of something to which pointer can	\
1597169695Skan       be assigned */							\
1598169695Skan    active_reg_t this_reg;						\
1599169695Skan    									\
1600169695Skan    DEBUG_STATEMENT (failure_id++);					\
1601169695Skan    DEBUG_STATEMENT (nfailure_points_pushed++);				\
1602169695Skan    DEBUG_PRINT2 ("\nPUSH_FAILURE_POINT #%u:\n", failure_id);		\
1603169695Skan    DEBUG_PRINT2 ("  Before push, next avail: %d\n", (fail_stack).avail);\
1604169695Skan    DEBUG_PRINT2 ("                     size: %d\n", (fail_stack).size);\
1605169695Skan									\
1606169695Skan    DEBUG_PRINT2 ("  slots needed: %ld\n", NUM_FAILURE_ITEMS);		\
1607169695Skan    DEBUG_PRINT2 ("     available: %d\n", REMAINING_AVAIL_SLOTS);	\
1608169695Skan									\
1609169695Skan    /* Ensure we have enough space allocated for what we will push.  */	\
1610169695Skan    while (REMAINING_AVAIL_SLOTS < NUM_FAILURE_ITEMS)			\
1611169695Skan      {									\
1612169695Skan        if (!DOUBLE_FAIL_STACK (fail_stack))				\
1613169695Skan          return failure_code;						\
1614169695Skan									\
1615169695Skan        DEBUG_PRINT2 ("\n  Doubled stack; size now: %d\n",		\
1616169695Skan		       (fail_stack).size);				\
1617169695Skan        DEBUG_PRINT2 ("  slots available: %d\n", REMAINING_AVAIL_SLOTS);\
1618169695Skan      }									\
1619169695Skan									\
1620169695Skan    /* Push the info, starting with the registers.  */			\
1621169695Skan    DEBUG_PRINT1 ("\n");						\
1622169695Skan									\
1623169695Skan    if (1)								\
1624169695Skan      for (this_reg = lowest_active_reg; this_reg <= highest_active_reg; \
1625169695Skan	   this_reg++)							\
1626169695Skan	{								\
1627169695Skan	  DEBUG_PRINT2 ("  Pushing reg: %lu\n", this_reg);		\
1628169695Skan	  DEBUG_STATEMENT (num_regs_pushed++);				\
1629169695Skan									\
1630169695Skan	  DEBUG_PRINT2 ("    start: %p\n", regstart[this_reg]);		\
1631169695Skan	  PUSH_FAILURE_POINTER (regstart[this_reg]);			\
1632169695Skan									\
1633169695Skan	  DEBUG_PRINT2 ("    end: %p\n", regend[this_reg]);		\
1634169695Skan	  PUSH_FAILURE_POINTER (regend[this_reg]);			\
1635169695Skan									\
1636169695Skan	  DEBUG_PRINT2 ("    info: %p\n      ",				\
1637169695Skan			reg_info[this_reg].word.pointer);		\
1638169695Skan	  DEBUG_PRINT2 (" match_null=%d",				\
1639169695Skan			REG_MATCH_NULL_STRING_P (reg_info[this_reg]));	\
1640169695Skan	  DEBUG_PRINT2 (" active=%d", IS_ACTIVE (reg_info[this_reg]));	\
1641169695Skan	  DEBUG_PRINT2 (" matched_something=%d",			\
1642169695Skan			MATCHED_SOMETHING (reg_info[this_reg]));	\
1643169695Skan	  DEBUG_PRINT2 (" ever_matched=%d",				\
1644169695Skan			EVER_MATCHED_SOMETHING (reg_info[this_reg]));	\
1645169695Skan	  DEBUG_PRINT1 ("\n");						\
1646169695Skan	  PUSH_FAILURE_ELT (reg_info[this_reg].word);			\
1647169695Skan	}								\
1648169695Skan									\
1649169695Skan    DEBUG_PRINT2 ("  Pushing  low active reg: %ld\n", lowest_active_reg);\
1650169695Skan    PUSH_FAILURE_INT (lowest_active_reg);				\
1651169695Skan									\
1652169695Skan    DEBUG_PRINT2 ("  Pushing high active reg: %ld\n", highest_active_reg);\
1653169695Skan    PUSH_FAILURE_INT (highest_active_reg);				\
1654169695Skan									\
1655169695Skan    DEBUG_PRINT2 ("  Pushing pattern %p:\n", pattern_place);		\
1656169695Skan    DEBUG_PRINT_COMPILED_PATTERN (bufp, pattern_place, pend);		\
1657169695Skan    PUSH_FAILURE_POINTER (pattern_place);				\
1658169695Skan									\
1659169695Skan    DEBUG_PRINT2 ("  Pushing string %p: `", string_place);		\
1660169695Skan    DEBUG_PRINT_DOUBLE_STRING (string_place, string1, size1, string2,   \
1661169695Skan				 size2);				\
1662169695Skan    DEBUG_PRINT1 ("'\n");						\
1663169695Skan    PUSH_FAILURE_POINTER (string_place);				\
1664169695Skan									\
1665169695Skan    DEBUG_PRINT2 ("  Pushing failure id: %u\n", failure_id);		\
1666169695Skan    DEBUG_PUSH (failure_id);						\
1667169695Skan  } while (0)
1668169695Skan
1669169695Skan# ifndef DEFINED_ONCE
1670169695Skan/* This is the number of items that are pushed and popped on the stack
1671169695Skan   for each register.  */
1672169695Skan#  define NUM_REG_ITEMS  3
1673169695Skan
1674169695Skan/* Individual items aside from the registers.  */
1675169695Skan#  ifdef DEBUG
1676169695Skan#   define NUM_NONREG_ITEMS 5 /* Includes failure point id.  */
1677169695Skan#  else
1678169695Skan#   define NUM_NONREG_ITEMS 4
1679169695Skan#  endif
1680169695Skan
1681169695Skan/* We push at most this many items on the stack.  */
1682169695Skan/* We used to use (num_regs - 1), which is the number of registers
1683169695Skan   this regexp will save; but that was changed to 5
1684169695Skan   to avoid stack overflow for a regexp with lots of parens.  */
1685169695Skan#  define MAX_FAILURE_ITEMS (5 * NUM_REG_ITEMS + NUM_NONREG_ITEMS)
1686169695Skan
1687169695Skan/* We actually push this many items.  */
1688169695Skan#  define NUM_FAILURE_ITEMS				\
1689169695Skan  (((0							\
1690169695Skan     ? 0 : highest_active_reg - lowest_active_reg + 1)	\
1691169695Skan    * NUM_REG_ITEMS)					\
1692169695Skan   + NUM_NONREG_ITEMS)
1693169695Skan
1694169695Skan/* How many items can still be added to the stack without overflowing it.  */
1695169695Skan#  define REMAINING_AVAIL_SLOTS ((fail_stack).size - (fail_stack).avail)
1696169695Skan# endif /* not DEFINED_ONCE */
1697169695Skan
1698169695Skan
1699169695Skan/* Pops what PUSH_FAIL_STACK pushes.
1700169695Skan
1701169695Skan   We restore into the parameters, all of which should be lvalues:
1702169695Skan     STR -- the saved data position.
1703169695Skan     PAT -- the saved pattern position.
1704169695Skan     LOW_REG, HIGH_REG -- the highest and lowest active registers.
1705169695Skan     REGSTART, REGEND -- arrays of string positions.
1706169695Skan     REG_INFO -- array of information about each subexpression.
1707169695Skan
1708169695Skan   Also assumes the variables `fail_stack' and (if debugging), `bufp',
1709169695Skan   `pend', `string1', `size1', `string2', and `size2'.  */
1710169695Skan# define POP_FAILURE_POINT(str, pat, low_reg, high_reg, regstart, regend, reg_info)\
1711169695Skan{									\
1712169695Skan  DEBUG_STATEMENT (unsigned failure_id;)				\
1713169695Skan  active_reg_t this_reg;						\
1714169695Skan  const UCHAR_T *string_temp;						\
1715169695Skan									\
1716169695Skan  assert (!FAIL_STACK_EMPTY ());					\
1717169695Skan									\
1718169695Skan  /* Remove failure points and point to how many regs pushed.  */	\
1719169695Skan  DEBUG_PRINT1 ("POP_FAILURE_POINT:\n");				\
1720169695Skan  DEBUG_PRINT2 ("  Before pop, next avail: %d\n", fail_stack.avail);	\
1721169695Skan  DEBUG_PRINT2 ("                    size: %d\n", fail_stack.size);	\
1722169695Skan									\
1723169695Skan  assert (fail_stack.avail >= NUM_NONREG_ITEMS);			\
1724169695Skan									\
1725169695Skan  DEBUG_POP (&failure_id);						\
1726169695Skan  DEBUG_PRINT2 ("  Popping failure id: %u\n", failure_id);		\
1727169695Skan									\
1728169695Skan  /* If the saved string location is NULL, it came from an		\
1729169695Skan     on_failure_keep_string_jump opcode, and we want to throw away the	\
1730169695Skan     saved NULL, thus retaining our current position in the string.  */	\
1731169695Skan  string_temp = POP_FAILURE_POINTER ();					\
1732169695Skan  if (string_temp != NULL)						\
1733169695Skan    str = (const CHAR_T *) string_temp;					\
1734169695Skan									\
1735169695Skan  DEBUG_PRINT2 ("  Popping string %p: `", str);				\
1736169695Skan  DEBUG_PRINT_DOUBLE_STRING (str, string1, size1, string2, size2);	\
1737169695Skan  DEBUG_PRINT1 ("'\n");							\
1738169695Skan									\
1739169695Skan  pat = (UCHAR_T *) POP_FAILURE_POINTER ();				\
1740169695Skan  DEBUG_PRINT2 ("  Popping pattern %p:\n", pat);			\
1741169695Skan  DEBUG_PRINT_COMPILED_PATTERN (bufp, pat, pend);			\
1742169695Skan									\
1743169695Skan  /* Restore register info.  */						\
1744169695Skan  high_reg = (active_reg_t) POP_FAILURE_INT ();				\
1745169695Skan  DEBUG_PRINT2 ("  Popping high active reg: %ld\n", high_reg);		\
1746169695Skan									\
1747169695Skan  low_reg = (active_reg_t) POP_FAILURE_INT ();				\
1748169695Skan  DEBUG_PRINT2 ("  Popping  low active reg: %ld\n", low_reg);		\
1749169695Skan									\
1750169695Skan  if (1)								\
1751169695Skan    for (this_reg = high_reg; this_reg >= low_reg; this_reg--)		\
1752169695Skan      {									\
1753169695Skan	DEBUG_PRINT2 ("    Popping reg: %ld\n", this_reg);		\
1754169695Skan									\
1755169695Skan	reg_info[this_reg].word = POP_FAILURE_ELT ();			\
1756169695Skan	DEBUG_PRINT2 ("      info: %p\n",				\
1757169695Skan		      reg_info[this_reg].word.pointer);			\
1758169695Skan									\
1759169695Skan	regend[this_reg] = (const CHAR_T *) POP_FAILURE_POINTER ();	\
1760169695Skan	DEBUG_PRINT2 ("      end: %p\n", regend[this_reg]);		\
1761169695Skan									\
1762169695Skan	regstart[this_reg] = (const CHAR_T *) POP_FAILURE_POINTER ();	\
1763169695Skan	DEBUG_PRINT2 ("      start: %p\n", regstart[this_reg]);		\
1764169695Skan      }									\
1765169695Skan  else									\
1766169695Skan    {									\
1767169695Skan      for (this_reg = highest_active_reg; this_reg > high_reg; this_reg--) \
1768169695Skan	{								\
1769169695Skan	  reg_info[this_reg].word.integer = 0;				\
1770169695Skan	  regend[this_reg] = 0;						\
1771169695Skan	  regstart[this_reg] = 0;					\
1772169695Skan	}								\
1773169695Skan      highest_active_reg = high_reg;					\
1774169695Skan    }									\
1775169695Skan									\
1776169695Skan  set_regs_matched_done = 0;						\
1777169695Skan  DEBUG_STATEMENT (nfailure_points_popped++);				\
1778169695Skan} /* POP_FAILURE_POINT */
1779169695Skan
1780169695Skan/* Structure for per-register (a.k.a. per-group) information.
1781169695Skan   Other register information, such as the
1782169695Skan   starting and ending positions (which are addresses), and the list of
1783169695Skan   inner groups (which is a bits list) are maintained in separate
1784169695Skan   variables.
1785169695Skan
1786169695Skan   We are making a (strictly speaking) nonportable assumption here: that
1787169695Skan   the compiler will pack our bit fields into something that fits into
1788169695Skan   the type of `word', i.e., is something that fits into one item on the
1789169695Skan   failure stack.  */
1790169695Skan
1791169695Skan
1792169695Skan/* Declarations and macros for re_match_2.  */
1793169695Skan
1794169695Skantypedef union
1795169695Skan{
1796169695Skan  PREFIX(fail_stack_elt_t) word;
1797169695Skan  struct
1798169695Skan  {
1799169695Skan      /* This field is one if this group can match the empty string,
1800169695Skan         zero if not.  If not yet determined,  `MATCH_NULL_UNSET_VALUE'.  */
1801169695Skan# define MATCH_NULL_UNSET_VALUE 3
1802169695Skan    unsigned match_null_string_p : 2;
1803169695Skan    unsigned is_active : 1;
1804169695Skan    unsigned matched_something : 1;
1805169695Skan    unsigned ever_matched_something : 1;
1806169695Skan  } bits;
1807169695Skan} PREFIX(register_info_type);
1808169695Skan
1809169695Skan# ifndef DEFINED_ONCE
1810169695Skan#  define REG_MATCH_NULL_STRING_P(R)  ((R).bits.match_null_string_p)
1811169695Skan#  define IS_ACTIVE(R)  ((R).bits.is_active)
1812169695Skan#  define MATCHED_SOMETHING(R)  ((R).bits.matched_something)
1813169695Skan#  define EVER_MATCHED_SOMETHING(R)  ((R).bits.ever_matched_something)
1814169695Skan
1815169695Skan
1816169695Skan/* Call this when have matched a real character; it sets `matched' flags
1817169695Skan   for the subexpressions which we are currently inside.  Also records
1818169695Skan   that those subexprs have matched.  */
1819169695Skan#  define SET_REGS_MATCHED()						\
1820169695Skan  do									\
1821169695Skan    {									\
1822169695Skan      if (!set_regs_matched_done)					\
1823169695Skan	{								\
1824169695Skan	  active_reg_t r;						\
1825169695Skan	  set_regs_matched_done = 1;					\
1826169695Skan	  for (r = lowest_active_reg; r <= highest_active_reg; r++)	\
1827169695Skan	    {								\
1828169695Skan	      MATCHED_SOMETHING (reg_info[r])				\
1829169695Skan		= EVER_MATCHED_SOMETHING (reg_info[r])			\
1830169695Skan		= 1;							\
1831169695Skan	    }								\
1832169695Skan	}								\
1833169695Skan    }									\
1834169695Skan  while (0)
1835169695Skan# endif /* not DEFINED_ONCE */
1836169695Skan
1837169695Skan/* Registers are set to a sentinel when they haven't yet matched.  */
1838169695Skanstatic CHAR_T PREFIX(reg_unset_dummy);
1839169695Skan# define REG_UNSET_VALUE (&PREFIX(reg_unset_dummy))
1840169695Skan# define REG_UNSET(e) ((e) == REG_UNSET_VALUE)
1841169695Skan
1842169695Skan/* Subroutine declarations and macros for regex_compile.  */
1843169695Skanstatic void PREFIX(store_op1) (re_opcode_t op, UCHAR_T *loc, int arg);
1844169695Skanstatic void PREFIX(store_op2) (re_opcode_t op, UCHAR_T *loc,
1845169695Skan                               int arg1, int arg2);
1846169695Skanstatic void PREFIX(insert_op1) (re_opcode_t op, UCHAR_T *loc,
1847169695Skan                                int arg, UCHAR_T *end);
1848169695Skanstatic void PREFIX(insert_op2) (re_opcode_t op, UCHAR_T *loc,
1849169695Skan                                int arg1, int arg2, UCHAR_T *end);
1850169695Skanstatic boolean PREFIX(at_begline_loc_p) (const CHAR_T *pattern,
1851169695Skan                                         const CHAR_T *p,
1852169695Skan                                         reg_syntax_t syntax);
1853169695Skanstatic boolean PREFIX(at_endline_loc_p) (const CHAR_T *p,
1854169695Skan                                         const CHAR_T *pend,
1855169695Skan                                         reg_syntax_t syntax);
1856169695Skan# ifdef WCHAR
1857169695Skanstatic reg_errcode_t wcs_compile_range (CHAR_T range_start,
1858169695Skan                                        const CHAR_T **p_ptr,
1859169695Skan                                        const CHAR_T *pend,
1860169695Skan                                        char *translate,
1861169695Skan                                        reg_syntax_t syntax,
1862169695Skan                                        UCHAR_T *b,
1863169695Skan                                        CHAR_T *char_set);
1864169695Skanstatic void insert_space (int num, CHAR_T *loc, CHAR_T *end);
1865169695Skan# else /* BYTE */
1866169695Skanstatic reg_errcode_t byte_compile_range (unsigned int range_start,
1867169695Skan                                         const char **p_ptr,
1868169695Skan                                         const char *pend,
1869169695Skan                                         char *translate,
1870169695Skan                                         reg_syntax_t syntax,
1871169695Skan                                         unsigned char *b);
1872169695Skan# endif /* WCHAR */
1873169695Skan
1874169695Skan/* Fetch the next character in the uncompiled pattern---translating it
1875169695Skan   if necessary.  Also cast from a signed character in the constant
1876169695Skan   string passed to us by the user to an unsigned char that we can use
1877169695Skan   as an array index (in, e.g., `translate').  */
1878169695Skan/* ifdef MBS_SUPPORT, we translate only if character <= 0xff,
1879169695Skan   because it is impossible to allocate 4GB array for some encodings
1880169695Skan   which have 4 byte character_set like UCS4.  */
1881169695Skan# ifndef PATFETCH
1882169695Skan#  ifdef WCHAR
1883169695Skan#   define PATFETCH(c)							\
1884169695Skan  do {if (p == pend) return REG_EEND;					\
1885169695Skan    c = (UCHAR_T) *p++;							\
1886169695Skan    if (translate && (c <= 0xff)) c = (UCHAR_T) translate[c];		\
1887169695Skan  } while (0)
1888169695Skan#  else /* BYTE */
1889169695Skan#   define PATFETCH(c)							\
1890169695Skan  do {if (p == pend) return REG_EEND;					\
1891169695Skan    c = (unsigned char) *p++;						\
1892169695Skan    if (translate) c = (unsigned char) translate[c];			\
1893169695Skan  } while (0)
1894169695Skan#  endif /* WCHAR */
1895169695Skan# endif
1896169695Skan
1897169695Skan/* Fetch the next character in the uncompiled pattern, with no
1898169695Skan   translation.  */
1899169695Skan# define PATFETCH_RAW(c)						\
1900169695Skan  do {if (p == pend) return REG_EEND;					\
1901169695Skan    c = (UCHAR_T) *p++; 	       					\
1902169695Skan  } while (0)
1903169695Skan
1904169695Skan/* Go backwards one character in the pattern.  */
1905169695Skan# define PATUNFETCH p--
1906169695Skan
1907169695Skan
1908169695Skan/* If `translate' is non-null, return translate[D], else just D.  We
1909169695Skan   cast the subscript to translate because some data is declared as
1910169695Skan   `char *', to avoid warnings when a string constant is passed.  But
1911169695Skan   when we use a character as a subscript we must make it unsigned.  */
1912169695Skan/* ifdef MBS_SUPPORT, we translate only if character <= 0xff,
1913169695Skan   because it is impossible to allocate 4GB array for some encodings
1914169695Skan   which have 4 byte character_set like UCS4.  */
1915169695Skan
1916169695Skan# ifndef TRANSLATE
1917169695Skan#  ifdef WCHAR
1918169695Skan#   define TRANSLATE(d) \
1919169695Skan  ((translate && ((UCHAR_T) (d)) <= 0xff) \
1920169695Skan   ? (char) translate[(unsigned char) (d)] : (d))
1921169695Skan# else /* BYTE */
1922169695Skan#   define TRANSLATE(d) \
1923169695Skan  (translate ? (char) translate[(unsigned char) (d)] : (char) (d))
1924169695Skan#  endif /* WCHAR */
1925169695Skan# endif
1926169695Skan
1927169695Skan
1928169695Skan/* Macros for outputting the compiled pattern into `buffer'.  */
1929169695Skan
1930169695Skan/* If the buffer isn't allocated when it comes in, use this.  */
1931169695Skan# define INIT_BUF_SIZE  (32 * sizeof(UCHAR_T))
1932169695Skan
1933169695Skan/* Make sure we have at least N more bytes of space in buffer.  */
1934169695Skan# ifdef WCHAR
1935169695Skan#  define GET_BUFFER_SPACE(n)						\
1936169695Skan    while (((unsigned long)b - (unsigned long)COMPILED_BUFFER_VAR	\
1937169695Skan            + (n)*sizeof(CHAR_T)) > bufp->allocated)			\
1938169695Skan      EXTEND_BUFFER ()
1939169695Skan# else /* BYTE */
1940169695Skan#  define GET_BUFFER_SPACE(n)						\
1941169695Skan    while ((unsigned long) (b - bufp->buffer + (n)) > bufp->allocated)	\
1942169695Skan      EXTEND_BUFFER ()
1943169695Skan# endif /* WCHAR */
1944169695Skan
1945169695Skan/* Make sure we have one more byte of buffer space and then add C to it.  */
1946169695Skan# define BUF_PUSH(c)							\
1947169695Skan  do {									\
1948169695Skan    GET_BUFFER_SPACE (1);						\
1949169695Skan    *b++ = (UCHAR_T) (c);						\
1950169695Skan  } while (0)
1951169695Skan
1952169695Skan
1953169695Skan/* Ensure we have two more bytes of buffer space and then append C1 and C2.  */
1954169695Skan# define BUF_PUSH_2(c1, c2)						\
1955169695Skan  do {									\
1956169695Skan    GET_BUFFER_SPACE (2);						\
1957169695Skan    *b++ = (UCHAR_T) (c1);						\
1958169695Skan    *b++ = (UCHAR_T) (c2);						\
1959169695Skan  } while (0)
1960169695Skan
1961169695Skan
1962169695Skan/* As with BUF_PUSH_2, except for three bytes.  */
1963169695Skan# define BUF_PUSH_3(c1, c2, c3)						\
1964169695Skan  do {									\
1965169695Skan    GET_BUFFER_SPACE (3);						\
1966169695Skan    *b++ = (UCHAR_T) (c1);						\
1967169695Skan    *b++ = (UCHAR_T) (c2);						\
1968169695Skan    *b++ = (UCHAR_T) (c3);						\
1969169695Skan  } while (0)
1970169695Skan
1971169695Skan/* Store a jump with opcode OP at LOC to location TO.  We store a
1972169695Skan   relative address offset by the three bytes the jump itself occupies.  */
1973169695Skan# define STORE_JUMP(op, loc, to) \
1974169695Skan PREFIX(store_op1) (op, loc, (int) ((to) - (loc) - (1 + OFFSET_ADDRESS_SIZE)))
1975169695Skan
1976169695Skan/* Likewise, for a two-argument jump.  */
1977169695Skan# define STORE_JUMP2(op, loc, to, arg) \
1978169695Skan  PREFIX(store_op2) (op, loc, (int) ((to) - (loc) - (1 + OFFSET_ADDRESS_SIZE)), arg)
1979169695Skan
1980169695Skan/* Like `STORE_JUMP', but for inserting.  Assume `b' is the buffer end.  */
1981169695Skan# define INSERT_JUMP(op, loc, to) \
1982169695Skan  PREFIX(insert_op1) (op, loc, (int) ((to) - (loc) - (1 + OFFSET_ADDRESS_SIZE)), b)
1983169695Skan
1984169695Skan/* Like `STORE_JUMP2', but for inserting.  Assume `b' is the buffer end.  */
1985169695Skan# define INSERT_JUMP2(op, loc, to, arg) \
1986169695Skan  PREFIX(insert_op2) (op, loc, (int) ((to) - (loc) - (1 + OFFSET_ADDRESS_SIZE)),\
1987169695Skan	      arg, b)
1988169695Skan
1989169695Skan/* This is not an arbitrary limit: the arguments which represent offsets
1990169695Skan   into the pattern are two bytes long.  So if 2^16 bytes turns out to
1991169695Skan   be too small, many things would have to change.  */
1992169695Skan/* Any other compiler which, like MSC, has allocation limit below 2^16
1993169695Skan   bytes will have to use approach similar to what was done below for
1994169695Skan   MSC and drop MAX_BUF_SIZE a bit.  Otherwise you may end up
1995169695Skan   reallocating to 0 bytes.  Such thing is not going to work too well.
1996169695Skan   You have been warned!!  */
1997169695Skan# ifndef DEFINED_ONCE
1998169695Skan#  if defined _MSC_VER  && !defined WIN32
1999169695Skan/* Microsoft C 16-bit versions limit malloc to approx 65512 bytes.
2000169695Skan   The REALLOC define eliminates a flurry of conversion warnings,
2001169695Skan   but is not required. */
2002169695Skan#   define MAX_BUF_SIZE  65500L
2003169695Skan#   define REALLOC(p,s) realloc ((p), (size_t) (s))
2004169695Skan#  else
2005169695Skan#   define MAX_BUF_SIZE (1L << 16)
2006169695Skan#   define REALLOC(p,s) realloc ((p), (s))
2007169695Skan#  endif
2008169695Skan
2009169695Skan/* Extend the buffer by twice its current size via realloc and
2010169695Skan   reset the pointers that pointed into the old block to point to the
2011169695Skan   correct places in the new one.  If extending the buffer results in it
2012169695Skan   being larger than MAX_BUF_SIZE, then flag memory exhausted.  */
2013169695Skan#  if __BOUNDED_POINTERS__
2014169695Skan#   define SET_HIGH_BOUND(P) (__ptrhigh (P) = __ptrlow (P) + bufp->allocated)
2015169695Skan#   define MOVE_BUFFER_POINTER(P) \
2016169695Skan  (__ptrlow (P) += incr, SET_HIGH_BOUND (P), __ptrvalue (P) += incr)
2017169695Skan#   define ELSE_EXTEND_BUFFER_HIGH_BOUND	\
2018169695Skan  else						\
2019169695Skan    {						\
2020169695Skan      SET_HIGH_BOUND (b);			\
2021169695Skan      SET_HIGH_BOUND (begalt);			\
2022169695Skan      if (fixup_alt_jump)			\
2023169695Skan	SET_HIGH_BOUND (fixup_alt_jump);	\
2024169695Skan      if (laststart)				\
2025169695Skan	SET_HIGH_BOUND (laststart);		\
2026169695Skan      if (pending_exact)			\
2027169695Skan	SET_HIGH_BOUND (pending_exact);		\
2028169695Skan    }
2029169695Skan#  else
2030169695Skan#   define MOVE_BUFFER_POINTER(P) (P) += incr
2031169695Skan#   define ELSE_EXTEND_BUFFER_HIGH_BOUND
2032169695Skan#  endif
2033169695Skan# endif /* not DEFINED_ONCE */
2034169695Skan
2035169695Skan# ifdef WCHAR
2036169695Skan#  define EXTEND_BUFFER()						\
2037169695Skan  do {									\
2038169695Skan    UCHAR_T *old_buffer = COMPILED_BUFFER_VAR;				\
2039169695Skan    int wchar_count;							\
2040169695Skan    if (bufp->allocated + sizeof(UCHAR_T) > MAX_BUF_SIZE)		\
2041169695Skan      return REG_ESIZE;							\
2042169695Skan    bufp->allocated <<= 1;						\
2043169695Skan    if (bufp->allocated > MAX_BUF_SIZE)					\
2044169695Skan      bufp->allocated = MAX_BUF_SIZE;					\
2045169695Skan    /* How many characters the new buffer can have?  */			\
2046169695Skan    wchar_count = bufp->allocated / sizeof(UCHAR_T);			\
2047169695Skan    if (wchar_count == 0) wchar_count = 1;				\
2048169695Skan    /* Truncate the buffer to CHAR_T align.  */			\
2049169695Skan    bufp->allocated = wchar_count * sizeof(UCHAR_T);			\
2050169695Skan    RETALLOC (COMPILED_BUFFER_VAR, wchar_count, UCHAR_T);		\
2051169695Skan    bufp->buffer = (char*)COMPILED_BUFFER_VAR;				\
2052169695Skan    if (COMPILED_BUFFER_VAR == NULL)					\
2053169695Skan      return REG_ESPACE;						\
2054169695Skan    /* If the buffer moved, move all the pointers into it.  */		\
2055169695Skan    if (old_buffer != COMPILED_BUFFER_VAR)				\
2056169695Skan      {									\
2057169695Skan	int incr = COMPILED_BUFFER_VAR - old_buffer;			\
2058169695Skan	MOVE_BUFFER_POINTER (b);					\
2059169695Skan	MOVE_BUFFER_POINTER (begalt);					\
2060169695Skan	if (fixup_alt_jump)						\
2061169695Skan	  MOVE_BUFFER_POINTER (fixup_alt_jump);				\
2062169695Skan	if (laststart)							\
2063169695Skan	  MOVE_BUFFER_POINTER (laststart);				\
2064169695Skan	if (pending_exact)						\
2065169695Skan	  MOVE_BUFFER_POINTER (pending_exact);				\
2066169695Skan      }									\
2067169695Skan    ELSE_EXTEND_BUFFER_HIGH_BOUND					\
2068169695Skan  } while (0)
2069169695Skan# else /* BYTE */
2070169695Skan#  define EXTEND_BUFFER()						\
2071169695Skan  do {									\
2072169695Skan    UCHAR_T *old_buffer = COMPILED_BUFFER_VAR;				\
2073169695Skan    if (bufp->allocated == MAX_BUF_SIZE)				\
2074169695Skan      return REG_ESIZE;							\
2075169695Skan    bufp->allocated <<= 1;						\
2076169695Skan    if (bufp->allocated > MAX_BUF_SIZE)					\
2077169695Skan      bufp->allocated = MAX_BUF_SIZE;					\
2078169695Skan    bufp->buffer = (UCHAR_T *) REALLOC (COMPILED_BUFFER_VAR,		\
2079169695Skan						bufp->allocated);	\
2080169695Skan    if (COMPILED_BUFFER_VAR == NULL)					\
2081169695Skan      return REG_ESPACE;						\
2082169695Skan    /* If the buffer moved, move all the pointers into it.  */		\
2083169695Skan    if (old_buffer != COMPILED_BUFFER_VAR)				\
2084169695Skan      {									\
2085169695Skan	int incr = COMPILED_BUFFER_VAR - old_buffer;			\
2086169695Skan	MOVE_BUFFER_POINTER (b);					\
2087169695Skan	MOVE_BUFFER_POINTER (begalt);					\
2088169695Skan	if (fixup_alt_jump)						\
2089169695Skan	  MOVE_BUFFER_POINTER (fixup_alt_jump);				\
2090169695Skan	if (laststart)							\
2091169695Skan	  MOVE_BUFFER_POINTER (laststart);				\
2092169695Skan	if (pending_exact)						\
2093169695Skan	  MOVE_BUFFER_POINTER (pending_exact);				\
2094169695Skan      }									\
2095169695Skan    ELSE_EXTEND_BUFFER_HIGH_BOUND					\
2096169695Skan  } while (0)
2097169695Skan# endif /* WCHAR */
2098169695Skan
2099169695Skan# ifndef DEFINED_ONCE
2100169695Skan/* Since we have one byte reserved for the register number argument to
2101169695Skan   {start,stop}_memory, the maximum number of groups we can report
2102169695Skan   things about is what fits in that byte.  */
2103169695Skan#  define MAX_REGNUM 255
2104169695Skan
2105169695Skan/* But patterns can have more than `MAX_REGNUM' registers.  We just
2106169695Skan   ignore the excess.  */
2107169695Skantypedef unsigned regnum_t;
2108169695Skan
2109169695Skan
2110169695Skan/* Macros for the compile stack.  */
2111169695Skan
2112169695Skan/* Since offsets can go either forwards or backwards, this type needs to
2113169695Skan   be able to hold values from -(MAX_BUF_SIZE - 1) to MAX_BUF_SIZE - 1.  */
2114169695Skan/* int may be not enough when sizeof(int) == 2.  */
2115169695Skantypedef long pattern_offset_t;
2116169695Skan
2117169695Skantypedef struct
2118169695Skan{
2119169695Skan  pattern_offset_t begalt_offset;
2120169695Skan  pattern_offset_t fixup_alt_jump;
2121169695Skan  pattern_offset_t inner_group_offset;
2122169695Skan  pattern_offset_t laststart_offset;
2123169695Skan  regnum_t regnum;
2124169695Skan} compile_stack_elt_t;
2125169695Skan
2126169695Skan
2127169695Skantypedef struct
2128169695Skan{
2129169695Skan  compile_stack_elt_t *stack;
2130169695Skan  unsigned size;
2131169695Skan  unsigned avail;			/* Offset of next open position.  */
2132169695Skan} compile_stack_type;
2133169695Skan
2134169695Skan
2135169695Skan#  define INIT_COMPILE_STACK_SIZE 32
2136169695Skan
2137169695Skan#  define COMPILE_STACK_EMPTY  (compile_stack.avail == 0)
2138169695Skan#  define COMPILE_STACK_FULL  (compile_stack.avail == compile_stack.size)
2139169695Skan
2140169695Skan/* The next available element.  */
2141169695Skan#  define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail])
2142169695Skan
2143169695Skan# endif /* not DEFINED_ONCE */
2144169695Skan
2145169695Skan/* Set the bit for character C in a list.  */
2146169695Skan# ifndef DEFINED_ONCE
2147169695Skan#  define SET_LIST_BIT(c)                               \
2148169695Skan  (b[((unsigned char) (c)) / BYTEWIDTH]               \
2149169695Skan   |= 1 << (((unsigned char) c) % BYTEWIDTH))
2150169695Skan# endif /* DEFINED_ONCE */
2151169695Skan
2152169695Skan/* Get the next unsigned number in the uncompiled pattern.  */
2153169695Skan# define GET_UNSIGNED_NUMBER(num) \
2154169695Skan  {									\
2155169695Skan    while (p != pend)							\
2156169695Skan      {									\
2157169695Skan	PATFETCH (c);							\
2158169695Skan	if (c < '0' || c > '9')						\
2159169695Skan	  break;							\
2160169695Skan	if (num <= RE_DUP_MAX)						\
2161169695Skan	  {								\
2162169695Skan	    if (num < 0)						\
2163169695Skan	      num = 0;							\
2164169695Skan	    num = num * 10 + c - '0';					\
2165169695Skan	  }								\
2166169695Skan      }									\
2167169695Skan  }
2168169695Skan
2169169695Skan# ifndef DEFINED_ONCE
2170169695Skan#  if defined _LIBC || WIDE_CHAR_SUPPORT
2171169695Skan/* The GNU C library provides support for user-defined character classes
2172169695Skan   and the functions from ISO C amendement 1.  */
2173169695Skan#   ifdef CHARCLASS_NAME_MAX
2174169695Skan#    define CHAR_CLASS_MAX_LENGTH CHARCLASS_NAME_MAX
2175169695Skan#   else
2176169695Skan/* This shouldn't happen but some implementation might still have this
2177169695Skan   problem.  Use a reasonable default value.  */
2178169695Skan#    define CHAR_CLASS_MAX_LENGTH 256
2179169695Skan#   endif
2180169695Skan
2181169695Skan#   ifdef _LIBC
2182169695Skan#    define IS_CHAR_CLASS(string) __wctype (string)
2183169695Skan#   else
2184169695Skan#    define IS_CHAR_CLASS(string) wctype (string)
2185169695Skan#   endif
2186169695Skan#  else
2187169695Skan#   define CHAR_CLASS_MAX_LENGTH  6 /* Namely, `xdigit'.  */
2188169695Skan
2189169695Skan#   define IS_CHAR_CLASS(string)					\
2190169695Skan   (STREQ (string, "alpha") || STREQ (string, "upper")			\
2191169695Skan    || STREQ (string, "lower") || STREQ (string, "digit")		\
2192169695Skan    || STREQ (string, "alnum") || STREQ (string, "xdigit")		\
2193169695Skan    || STREQ (string, "space") || STREQ (string, "print")		\
2194169695Skan    || STREQ (string, "punct") || STREQ (string, "graph")		\
2195169695Skan    || STREQ (string, "cntrl") || STREQ (string, "blank"))
2196169695Skan#  endif
2197169695Skan# endif /* DEFINED_ONCE */
2198169695Skan
2199169695Skan# ifndef MATCH_MAY_ALLOCATE
2200169695Skan
2201169695Skan/* If we cannot allocate large objects within re_match_2_internal,
2202169695Skan   we make the fail stack and register vectors global.
2203169695Skan   The fail stack, we grow to the maximum size when a regexp
2204169695Skan   is compiled.
2205169695Skan   The register vectors, we adjust in size each time we
2206169695Skan   compile a regexp, according to the number of registers it needs.  */
2207169695Skan
2208169695Skanstatic PREFIX(fail_stack_type) fail_stack;
2209169695Skan
2210169695Skan/* Size with which the following vectors are currently allocated.
2211169695Skan   That is so we can make them bigger as needed,
2212169695Skan   but never make them smaller.  */
2213169695Skan#  ifdef DEFINED_ONCE
2214169695Skanstatic int regs_allocated_size;
2215169695Skan
2216169695Skanstatic const char **     regstart, **     regend;
2217169695Skanstatic const char ** old_regstart, ** old_regend;
2218169695Skanstatic const char **best_regstart, **best_regend;
2219169695Skanstatic const char **reg_dummy;
2220169695Skan#  endif /* DEFINED_ONCE */
2221169695Skan
2222169695Skanstatic PREFIX(register_info_type) *PREFIX(reg_info);
2223169695Skanstatic PREFIX(register_info_type) *PREFIX(reg_info_dummy);
2224169695Skan
2225169695Skan/* Make the register vectors big enough for NUM_REGS registers,
2226169695Skan   but don't make them smaller.  */
2227169695Skan
2228169695Skanstatic void
2229169695SkanPREFIX(regex_grow_registers) (int num_regs)
2230169695Skan{
2231169695Skan  if (num_regs > regs_allocated_size)
2232169695Skan    {
2233169695Skan      RETALLOC_IF (regstart,	 num_regs, const char *);
2234169695Skan      RETALLOC_IF (regend,	 num_regs, const char *);
2235169695Skan      RETALLOC_IF (old_regstart, num_regs, const char *);
2236169695Skan      RETALLOC_IF (old_regend,	 num_regs, const char *);
2237169695Skan      RETALLOC_IF (best_regstart, num_regs, const char *);
2238169695Skan      RETALLOC_IF (best_regend,	 num_regs, const char *);
2239169695Skan      RETALLOC_IF (PREFIX(reg_info), num_regs, PREFIX(register_info_type));
2240169695Skan      RETALLOC_IF (reg_dummy,	 num_regs, const char *);
2241169695Skan      RETALLOC_IF (PREFIX(reg_info_dummy), num_regs, PREFIX(register_info_type));
2242169695Skan
2243169695Skan      regs_allocated_size = num_regs;
2244169695Skan    }
2245169695Skan}
2246169695Skan
2247169695Skan# endif /* not MATCH_MAY_ALLOCATE */
2248169695Skan
2249169695Skan# ifndef DEFINED_ONCE
2250169695Skanstatic boolean group_in_compile_stack (compile_stack_type compile_stack,
2251169695Skan                                       regnum_t regnum);
2252169695Skan# endif /* not DEFINED_ONCE */
2253169695Skan
2254169695Skan/* `regex_compile' compiles PATTERN (of length SIZE) according to SYNTAX.
2255169695Skan   Returns one of error codes defined in `regex.h', or zero for success.
2256169695Skan
2257169695Skan   Assumes the `allocated' (and perhaps `buffer') and `translate'
2258169695Skan   fields are set in BUFP on entry.
2259169695Skan
2260169695Skan   If it succeeds, results are put in BUFP (if it returns an error, the
2261169695Skan   contents of BUFP are undefined):
2262169695Skan     `buffer' is the compiled pattern;
2263169695Skan     `syntax' is set to SYNTAX;
2264169695Skan     `used' is set to the length of the compiled pattern;
2265169695Skan     `fastmap_accurate' is zero;
2266169695Skan     `re_nsub' is the number of subexpressions in PATTERN;
2267169695Skan     `not_bol' and `not_eol' are zero;
2268169695Skan
2269169695Skan   The `fastmap' and `newline_anchor' fields are neither
2270169695Skan   examined nor set.  */
2271169695Skan
2272169695Skan/* Return, freeing storage we allocated.  */
2273169695Skan# ifdef WCHAR
2274169695Skan#  define FREE_STACK_RETURN(value)		\
2275169695Skan  return (free(pattern), free(mbs_offset), free(is_binary), free (compile_stack.stack), value)
2276169695Skan# else
2277169695Skan#  define FREE_STACK_RETURN(value)		\
2278169695Skan  return (free (compile_stack.stack), value)
2279169695Skan# endif /* WCHAR */
2280169695Skan
2281169695Skanstatic reg_errcode_t
2282169695SkanPREFIX(regex_compile) (const char *ARG_PREFIX(pattern),
2283169695Skan                       size_t ARG_PREFIX(size), reg_syntax_t syntax,
2284169695Skan                       struct re_pattern_buffer *bufp)
2285169695Skan{
2286169695Skan  /* We fetch characters from PATTERN here.  Even though PATTERN is
2287169695Skan     `char *' (i.e., signed), we declare these variables as unsigned, so
2288169695Skan     they can be reliably used as array indices.  */
2289169695Skan  register UCHAR_T c, c1;
2290169695Skan
2291169695Skan#ifdef WCHAR
2292169695Skan  /* A temporary space to keep wchar_t pattern and compiled pattern.  */
2293169695Skan  CHAR_T *pattern, *COMPILED_BUFFER_VAR;
2294169695Skan  size_t size;
2295169695Skan  /* offset buffer for optimization. See convert_mbs_to_wc.  */
2296169695Skan  int *mbs_offset = NULL;
2297169695Skan  /* It hold whether each wchar_t is binary data or not.  */
2298169695Skan  char *is_binary = NULL;
2299169695Skan  /* A flag whether exactn is handling binary data or not.  */
2300169695Skan  char is_exactn_bin = FALSE;
2301169695Skan#endif /* WCHAR */
2302169695Skan
2303169695Skan  /* A random temporary spot in PATTERN.  */
2304169695Skan  const CHAR_T *p1;
2305169695Skan
2306169695Skan  /* Points to the end of the buffer, where we should append.  */
2307169695Skan  register UCHAR_T *b;
2308169695Skan
2309169695Skan  /* Keeps track of unclosed groups.  */
2310169695Skan  compile_stack_type compile_stack;
2311169695Skan
2312169695Skan  /* Points to the current (ending) position in the pattern.  */
2313169695Skan#ifdef WCHAR
2314169695Skan  const CHAR_T *p;
2315169695Skan  const CHAR_T *pend;
2316169695Skan#else /* BYTE */
2317169695Skan  const CHAR_T *p = pattern;
2318169695Skan  const CHAR_T *pend = pattern + size;
2319169695Skan#endif /* WCHAR */
2320169695Skan
2321169695Skan  /* How to translate the characters in the pattern.  */
2322169695Skan  RE_TRANSLATE_TYPE translate = bufp->translate;
2323169695Skan
2324169695Skan  /* Address of the count-byte of the most recently inserted `exactn'
2325169695Skan     command.  This makes it possible to tell if a new exact-match
2326169695Skan     character can be added to that command or if the character requires
2327169695Skan     a new `exactn' command.  */
2328169695Skan  UCHAR_T *pending_exact = 0;
2329169695Skan
2330169695Skan  /* Address of start of the most recently finished expression.
2331169695Skan     This tells, e.g., postfix * where to find the start of its
2332169695Skan     operand.  Reset at the beginning of groups and alternatives.  */
2333169695Skan  UCHAR_T *laststart = 0;
2334169695Skan
2335169695Skan  /* Address of beginning of regexp, or inside of last group.  */
2336169695Skan  UCHAR_T *begalt;
2337169695Skan
2338169695Skan  /* Address of the place where a forward jump should go to the end of
2339169695Skan     the containing expression.  Each alternative of an `or' -- except the
2340169695Skan     last -- ends with a forward jump of this sort.  */
2341169695Skan  UCHAR_T *fixup_alt_jump = 0;
2342169695Skan
2343169695Skan  /* Counts open-groups as they are encountered.  Remembered for the
2344169695Skan     matching close-group on the compile stack, so the same register
2345169695Skan     number is put in the stop_memory as the start_memory.  */
2346169695Skan  regnum_t regnum = 0;
2347169695Skan
2348169695Skan#ifdef WCHAR
2349169695Skan  /* Initialize the wchar_t PATTERN and offset_buffer.  */
2350169695Skan  p = pend = pattern = TALLOC(csize + 1, CHAR_T);
2351169695Skan  mbs_offset = TALLOC(csize + 1, int);
2352169695Skan  is_binary = TALLOC(csize + 1, char);
2353169695Skan  if (pattern == NULL || mbs_offset == NULL || is_binary == NULL)
2354169695Skan    {
2355169695Skan      free(pattern);
2356169695Skan      free(mbs_offset);
2357169695Skan      free(is_binary);
2358169695Skan      return REG_ESPACE;
2359169695Skan    }
2360169695Skan  pattern[csize] = L'\0';	/* sentinel */
2361169695Skan  size = convert_mbs_to_wcs(pattern, cpattern, csize, mbs_offset, is_binary);
2362169695Skan  pend = p + size;
2363169695Skan  if (size < 0)
2364169695Skan    {
2365169695Skan      free(pattern);
2366169695Skan      free(mbs_offset);
2367169695Skan      free(is_binary);
2368169695Skan      return REG_BADPAT;
2369169695Skan    }
2370169695Skan#endif
2371169695Skan
2372169695Skan#ifdef DEBUG
2373169695Skan  DEBUG_PRINT1 ("\nCompiling pattern: ");
2374169695Skan  if (debug)
2375169695Skan    {
2376169695Skan      unsigned debug_count;
2377169695Skan
2378169695Skan      for (debug_count = 0; debug_count < size; debug_count++)
2379169695Skan        PUT_CHAR (pattern[debug_count]);
2380169695Skan      putchar ('\n');
2381169695Skan    }
2382169695Skan#endif /* DEBUG */
2383169695Skan
2384169695Skan  /* Initialize the compile stack.  */
2385169695Skan  compile_stack.stack = TALLOC (INIT_COMPILE_STACK_SIZE, compile_stack_elt_t);
2386169695Skan  if (compile_stack.stack == NULL)
2387169695Skan    {
2388169695Skan#ifdef WCHAR
2389169695Skan      free(pattern);
2390169695Skan      free(mbs_offset);
2391169695Skan      free(is_binary);
2392169695Skan#endif
2393169695Skan      return REG_ESPACE;
2394169695Skan    }
2395169695Skan
2396169695Skan  compile_stack.size = INIT_COMPILE_STACK_SIZE;
2397169695Skan  compile_stack.avail = 0;
2398169695Skan
2399169695Skan  /* Initialize the pattern buffer.  */
2400169695Skan  bufp->syntax = syntax;
2401169695Skan  bufp->fastmap_accurate = 0;
2402169695Skan  bufp->not_bol = bufp->not_eol = 0;
2403169695Skan
2404169695Skan  /* Set `used' to zero, so that if we return an error, the pattern
2405169695Skan     printer (for debugging) will think there's no pattern.  We reset it
2406169695Skan     at the end.  */
2407169695Skan  bufp->used = 0;
2408169695Skan
2409169695Skan  /* Always count groups, whether or not bufp->no_sub is set.  */
2410169695Skan  bufp->re_nsub = 0;
2411169695Skan
2412169695Skan#if !defined emacs && !defined SYNTAX_TABLE
2413169695Skan  /* Initialize the syntax table.  */
2414169695Skan   init_syntax_once ();
2415169695Skan#endif
2416169695Skan
2417169695Skan  if (bufp->allocated == 0)
2418169695Skan    {
2419169695Skan      if (bufp->buffer)
2420169695Skan	{ /* If zero allocated, but buffer is non-null, try to realloc
2421169695Skan             enough space.  This loses if buffer's address is bogus, but
2422169695Skan             that is the user's responsibility.  */
2423169695Skan#ifdef WCHAR
2424169695Skan	  /* Free bufp->buffer and allocate an array for wchar_t pattern
2425169695Skan	     buffer.  */
2426169695Skan          free(bufp->buffer);
2427169695Skan          COMPILED_BUFFER_VAR = TALLOC (INIT_BUF_SIZE/sizeof(UCHAR_T),
2428169695Skan					UCHAR_T);
2429169695Skan#else
2430169695Skan          RETALLOC (COMPILED_BUFFER_VAR, INIT_BUF_SIZE, UCHAR_T);
2431169695Skan#endif /* WCHAR */
2432169695Skan        }
2433169695Skan      else
2434169695Skan        { /* Caller did not allocate a buffer.  Do it for them.  */
2435169695Skan          COMPILED_BUFFER_VAR = TALLOC (INIT_BUF_SIZE / sizeof(UCHAR_T),
2436169695Skan					UCHAR_T);
2437169695Skan        }
2438169695Skan
2439169695Skan      if (!COMPILED_BUFFER_VAR) FREE_STACK_RETURN (REG_ESPACE);
2440169695Skan#ifdef WCHAR
2441169695Skan      bufp->buffer = (char*)COMPILED_BUFFER_VAR;
2442169695Skan#endif /* WCHAR */
2443169695Skan      bufp->allocated = INIT_BUF_SIZE;
2444169695Skan    }
2445169695Skan#ifdef WCHAR
2446169695Skan  else
2447169695Skan    COMPILED_BUFFER_VAR = (UCHAR_T*) bufp->buffer;
2448169695Skan#endif
2449169695Skan
2450169695Skan  begalt = b = COMPILED_BUFFER_VAR;
2451169695Skan
2452169695Skan  /* Loop through the uncompiled pattern until we're at the end.  */
2453169695Skan  while (p != pend)
2454169695Skan    {
2455169695Skan      PATFETCH (c);
2456169695Skan
2457169695Skan      switch (c)
2458169695Skan        {
2459169695Skan        case '^':
2460169695Skan          {
2461169695Skan            if (   /* If at start of pattern, it's an operator.  */
2462169695Skan                   p == pattern + 1
2463169695Skan                   /* If context independent, it's an operator.  */
2464169695Skan                || syntax & RE_CONTEXT_INDEP_ANCHORS
2465169695Skan                   /* Otherwise, depends on what's come before.  */
2466169695Skan                || PREFIX(at_begline_loc_p) (pattern, p, syntax))
2467169695Skan              BUF_PUSH (begline);
2468169695Skan            else
2469169695Skan              goto normal_char;
2470169695Skan          }
2471169695Skan          break;
2472169695Skan
2473169695Skan
2474169695Skan        case '$':
2475169695Skan          {
2476169695Skan            if (   /* If at end of pattern, it's an operator.  */
2477169695Skan                   p == pend
2478169695Skan                   /* If context independent, it's an operator.  */
2479169695Skan                || syntax & RE_CONTEXT_INDEP_ANCHORS
2480169695Skan                   /* Otherwise, depends on what's next.  */
2481169695Skan                || PREFIX(at_endline_loc_p) (p, pend, syntax))
2482169695Skan               BUF_PUSH (endline);
2483169695Skan             else
2484169695Skan               goto normal_char;
2485169695Skan           }
2486169695Skan           break;
2487169695Skan
2488169695Skan
2489169695Skan	case '+':
2490169695Skan        case '?':
2491169695Skan          if ((syntax & RE_BK_PLUS_QM)
2492169695Skan              || (syntax & RE_LIMITED_OPS))
2493169695Skan            goto normal_char;
2494169695Skan        handle_plus:
2495169695Skan        case '*':
2496169695Skan          /* If there is no previous pattern... */
2497169695Skan          if (!laststart)
2498169695Skan            {
2499169695Skan              if (syntax & RE_CONTEXT_INVALID_OPS)
2500169695Skan                FREE_STACK_RETURN (REG_BADRPT);
2501169695Skan              else if (!(syntax & RE_CONTEXT_INDEP_OPS))
2502169695Skan                goto normal_char;
2503169695Skan            }
2504169695Skan
2505169695Skan          {
2506169695Skan            /* Are we optimizing this jump?  */
2507169695Skan            boolean keep_string_p = false;
2508169695Skan
2509169695Skan            /* 1 means zero (many) matches is allowed.  */
2510169695Skan            char zero_times_ok = 0, many_times_ok = 0;
2511169695Skan
2512169695Skan            /* If there is a sequence of repetition chars, collapse it
2513169695Skan               down to just one (the right one).  We can't combine
2514169695Skan               interval operators with these because of, e.g., `a{2}*',
2515169695Skan               which should only match an even number of `a's.  */
2516169695Skan
2517169695Skan            for (;;)
2518169695Skan              {
2519169695Skan                zero_times_ok |= c != '+';
2520169695Skan                many_times_ok |= c != '?';
2521169695Skan
2522169695Skan                if (p == pend)
2523169695Skan                  break;
2524169695Skan
2525169695Skan                PATFETCH (c);
2526169695Skan
2527169695Skan                if (c == '*'
2528169695Skan                    || (!(syntax & RE_BK_PLUS_QM) && (c == '+' || c == '?')))
2529169695Skan                  ;
2530169695Skan
2531169695Skan                else if (syntax & RE_BK_PLUS_QM  &&  c == '\\')
2532169695Skan                  {
2533169695Skan                    if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
2534169695Skan
2535169695Skan                    PATFETCH (c1);
2536169695Skan                    if (!(c1 == '+' || c1 == '?'))
2537169695Skan                      {
2538169695Skan                        PATUNFETCH;
2539169695Skan                        PATUNFETCH;
2540169695Skan                        break;
2541169695Skan                      }
2542169695Skan
2543169695Skan                    c = c1;
2544169695Skan                  }
2545169695Skan                else
2546169695Skan                  {
2547169695Skan                    PATUNFETCH;
2548169695Skan                    break;
2549169695Skan                  }
2550169695Skan
2551169695Skan                /* If we get here, we found another repeat character.  */
2552169695Skan               }
2553169695Skan
2554169695Skan            /* Star, etc. applied to an empty pattern is equivalent
2555169695Skan               to an empty pattern.  */
2556169695Skan            if (!laststart)
2557169695Skan              break;
2558169695Skan
2559169695Skan            /* Now we know whether or not zero matches is allowed
2560169695Skan               and also whether or not two or more matches is allowed.  */
2561169695Skan            if (many_times_ok)
2562169695Skan              { /* More than one repetition is allowed, so put in at the
2563169695Skan                   end a backward relative jump from `b' to before the next
2564169695Skan                   jump we're going to put in below (which jumps from
2565169695Skan                   laststart to after this jump).
2566169695Skan
2567169695Skan                   But if we are at the `*' in the exact sequence `.*\n',
2568169695Skan                   insert an unconditional jump backwards to the .,
2569169695Skan                   instead of the beginning of the loop.  This way we only
2570169695Skan                   push a failure point once, instead of every time
2571169695Skan                   through the loop.  */
2572169695Skan                assert (p - 1 > pattern);
2573169695Skan
2574169695Skan                /* Allocate the space for the jump.  */
2575169695Skan                GET_BUFFER_SPACE (1 + OFFSET_ADDRESS_SIZE);
2576169695Skan
2577169695Skan                /* We know we are not at the first character of the pattern,
2578169695Skan                   because laststart was nonzero.  And we've already
2579169695Skan                   incremented `p', by the way, to be the character after
2580169695Skan                   the `*'.  Do we have to do something analogous here
2581169695Skan                   for null bytes, because of RE_DOT_NOT_NULL?  */
2582169695Skan                if (TRANSLATE (*(p - 2)) == TRANSLATE ('.')
2583169695Skan		    && zero_times_ok
2584169695Skan                    && p < pend && TRANSLATE (*p) == TRANSLATE ('\n')
2585169695Skan                    && !(syntax & RE_DOT_NEWLINE))
2586169695Skan                  { /* We have .*\n.  */
2587169695Skan                    STORE_JUMP (jump, b, laststart);
2588169695Skan                    keep_string_p = true;
2589169695Skan                  }
2590169695Skan                else
2591169695Skan                  /* Anything else.  */
2592169695Skan                  STORE_JUMP (maybe_pop_jump, b, laststart -
2593169695Skan			      (1 + OFFSET_ADDRESS_SIZE));
2594169695Skan
2595169695Skan                /* We've added more stuff to the buffer.  */
2596169695Skan                b += 1 + OFFSET_ADDRESS_SIZE;
2597169695Skan              }
2598169695Skan
2599169695Skan            /* On failure, jump from laststart to b + 3, which will be the
2600169695Skan               end of the buffer after this jump is inserted.  */
2601169695Skan	    /* ifdef WCHAR, 'b + 1 + OFFSET_ADDRESS_SIZE' instead of
2602169695Skan	       'b + 3'.  */
2603169695Skan            GET_BUFFER_SPACE (1 + OFFSET_ADDRESS_SIZE);
2604169695Skan            INSERT_JUMP (keep_string_p ? on_failure_keep_string_jump
2605169695Skan                                       : on_failure_jump,
2606169695Skan                         laststart, b + 1 + OFFSET_ADDRESS_SIZE);
2607169695Skan            pending_exact = 0;
2608169695Skan            b += 1 + OFFSET_ADDRESS_SIZE;
2609169695Skan
2610169695Skan            if (!zero_times_ok)
2611169695Skan              {
2612169695Skan                /* At least one repetition is required, so insert a
2613169695Skan                   `dummy_failure_jump' before the initial
2614169695Skan                   `on_failure_jump' instruction of the loop. This
2615169695Skan                   effects a skip over that instruction the first time
2616169695Skan                   we hit that loop.  */
2617169695Skan                GET_BUFFER_SPACE (1 + OFFSET_ADDRESS_SIZE);
2618169695Skan                INSERT_JUMP (dummy_failure_jump, laststart, laststart +
2619169695Skan			     2 + 2 * OFFSET_ADDRESS_SIZE);
2620169695Skan                b += 1 + OFFSET_ADDRESS_SIZE;
2621169695Skan              }
2622169695Skan            }
2623169695Skan	  break;
2624169695Skan
2625169695Skan
2626169695Skan	case '.':
2627169695Skan          laststart = b;
2628169695Skan          BUF_PUSH (anychar);
2629169695Skan          break;
2630169695Skan
2631169695Skan
2632169695Skan        case '[':
2633169695Skan          {
2634169695Skan            boolean had_char_class = false;
2635169695Skan#ifdef WCHAR
2636169695Skan	    CHAR_T range_start = 0xffffffff;
2637169695Skan#else
2638169695Skan	    unsigned int range_start = 0xffffffff;
2639169695Skan#endif
2640169695Skan            if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
2641169695Skan
2642169695Skan#ifdef WCHAR
2643169695Skan	    /* We assume a charset(_not) structure as a wchar_t array.
2644169695Skan	       charset[0] = (re_opcode_t) charset(_not)
2645169695Skan               charset[1] = l (= length of char_classes)
2646169695Skan               charset[2] = m (= length of collating_symbols)
2647169695Skan               charset[3] = n (= length of equivalence_classes)
2648169695Skan	       charset[4] = o (= length of char_ranges)
2649169695Skan	       charset[5] = p (= length of chars)
2650169695Skan
2651169695Skan               charset[6] = char_class (wctype_t)
2652169695Skan               charset[6+CHAR_CLASS_SIZE] = char_class (wctype_t)
2653169695Skan                         ...
2654169695Skan               charset[l+5]  = char_class (wctype_t)
2655169695Skan
2656169695Skan               charset[l+6]  = collating_symbol (wchar_t)
2657169695Skan                            ...
2658169695Skan               charset[l+m+5]  = collating_symbol (wchar_t)
2659169695Skan					ifdef _LIBC we use the index if
2660169695Skan					_NL_COLLATE_SYMB_EXTRAMB instead of
2661169695Skan					wchar_t string.
2662169695Skan
2663169695Skan               charset[l+m+6]  = equivalence_classes (wchar_t)
2664169695Skan                              ...
2665169695Skan               charset[l+m+n+5]  = equivalence_classes (wchar_t)
2666169695Skan					ifdef _LIBC we use the index in
2667169695Skan					_NL_COLLATE_WEIGHT instead of
2668169695Skan					wchar_t string.
2669169695Skan
2670169695Skan	       charset[l+m+n+6] = range_start
2671169695Skan	       charset[l+m+n+7] = range_end
2672169695Skan	                       ...
2673169695Skan	       charset[l+m+n+2o+4] = range_start
2674169695Skan	       charset[l+m+n+2o+5] = range_end
2675169695Skan					ifdef _LIBC we use the value looked up
2676169695Skan					in _NL_COLLATE_COLLSEQ instead of
2677169695Skan					wchar_t character.
2678169695Skan
2679169695Skan	       charset[l+m+n+2o+6] = char
2680169695Skan	                          ...
2681169695Skan	       charset[l+m+n+2o+p+5] = char
2682169695Skan
2683169695Skan	     */
2684169695Skan
2685169695Skan	    /* We need at least 6 spaces: the opcode, the length of
2686169695Skan               char_classes, the length of collating_symbols, the length of
2687169695Skan               equivalence_classes, the length of char_ranges, the length of
2688169695Skan               chars.  */
2689169695Skan	    GET_BUFFER_SPACE (6);
2690169695Skan
2691169695Skan	    /* Save b as laststart. And We use laststart as the pointer
2692169695Skan	       to the first element of the charset here.
2693169695Skan	       In other words, laststart[i] indicates charset[i].  */
2694169695Skan            laststart = b;
2695169695Skan
2696169695Skan            /* We test `*p == '^' twice, instead of using an if
2697169695Skan               statement, so we only need one BUF_PUSH.  */
2698169695Skan            BUF_PUSH (*p == '^' ? charset_not : charset);
2699169695Skan            if (*p == '^')
2700169695Skan              p++;
2701169695Skan
2702169695Skan            /* Push the length of char_classes, the length of
2703169695Skan               collating_symbols, the length of equivalence_classes, the
2704169695Skan               length of char_ranges and the length of chars.  */
2705169695Skan            BUF_PUSH_3 (0, 0, 0);
2706169695Skan            BUF_PUSH_2 (0, 0);
2707169695Skan
2708169695Skan            /* Remember the first position in the bracket expression.  */
2709169695Skan            p1 = p;
2710169695Skan
2711169695Skan            /* charset_not matches newline according to a syntax bit.  */
2712169695Skan            if ((re_opcode_t) b[-6] == charset_not
2713169695Skan                && (syntax & RE_HAT_LISTS_NOT_NEWLINE))
2714169695Skan	      {
2715169695Skan		BUF_PUSH('\n');
2716169695Skan		laststart[5]++; /* Update the length of characters  */
2717169695Skan	      }
2718169695Skan
2719169695Skan            /* Read in characters and ranges, setting map bits.  */
2720169695Skan            for (;;)
2721169695Skan              {
2722169695Skan                if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
2723169695Skan
2724169695Skan                PATFETCH (c);
2725169695Skan
2726169695Skan                /* \ might escape characters inside [...] and [^...].  */
2727169695Skan                if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\')
2728169695Skan                  {
2729169695Skan                    if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
2730169695Skan
2731169695Skan                    PATFETCH (c1);
2732169695Skan		    BUF_PUSH(c1);
2733169695Skan		    laststart[5]++; /* Update the length of chars  */
2734169695Skan		    range_start = c1;
2735169695Skan                    continue;
2736169695Skan                  }
2737169695Skan
2738169695Skan                /* Could be the end of the bracket expression.  If it's
2739169695Skan                   not (i.e., when the bracket expression is `[]' so
2740169695Skan                   far), the ']' character bit gets set way below.  */
2741169695Skan                if (c == ']' && p != p1 + 1)
2742169695Skan                  break;
2743169695Skan
2744169695Skan                /* Look ahead to see if it's a range when the last thing
2745169695Skan                   was a character class.  */
2746169695Skan                if (had_char_class && c == '-' && *p != ']')
2747169695Skan                  FREE_STACK_RETURN (REG_ERANGE);
2748169695Skan
2749169695Skan                /* Look ahead to see if it's a range when the last thing
2750169695Skan                   was a character: if this is a hyphen not at the
2751169695Skan                   beginning or the end of a list, then it's the range
2752169695Skan                   operator.  */
2753169695Skan                if (c == '-'
2754169695Skan                    && !(p - 2 >= pattern && p[-2] == '[')
2755169695Skan                    && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^')
2756169695Skan                    && *p != ']')
2757169695Skan                  {
2758169695Skan                    reg_errcode_t ret;
2759169695Skan		    /* Allocate the space for range_start and range_end.  */
2760169695Skan		    GET_BUFFER_SPACE (2);
2761169695Skan		    /* Update the pointer to indicate end of buffer.  */
2762169695Skan                    b += 2;
2763169695Skan                    ret = wcs_compile_range (range_start, &p, pend, translate,
2764169695Skan                                         syntax, b, laststart);
2765169695Skan                    if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
2766169695Skan                    range_start = 0xffffffff;
2767169695Skan                  }
2768169695Skan                else if (p[0] == '-' && p[1] != ']')
2769169695Skan                  { /* This handles ranges made up of characters only.  */
2770169695Skan                    reg_errcode_t ret;
2771169695Skan
2772169695Skan		    /* Move past the `-'.  */
2773169695Skan                    PATFETCH (c1);
2774169695Skan		    /* Allocate the space for range_start and range_end.  */
2775169695Skan		    GET_BUFFER_SPACE (2);
2776169695Skan		    /* Update the pointer to indicate end of buffer.  */
2777169695Skan                    b += 2;
2778169695Skan                    ret = wcs_compile_range (c, &p, pend, translate, syntax, b,
2779169695Skan                                         laststart);
2780169695Skan                    if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
2781169695Skan		    range_start = 0xffffffff;
2782169695Skan                  }
2783169695Skan
2784169695Skan                /* See if we're at the beginning of a possible character
2785169695Skan                   class.  */
2786169695Skan                else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':')
2787169695Skan                  { /* Leave room for the null.  */
2788169695Skan                    char str[CHAR_CLASS_MAX_LENGTH + 1];
2789169695Skan
2790169695Skan                    PATFETCH (c);
2791169695Skan                    c1 = 0;
2792169695Skan
2793169695Skan                    /* If pattern is `[[:'.  */
2794169695Skan                    if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
2795169695Skan
2796169695Skan                    for (;;)
2797169695Skan                      {
2798169695Skan                        PATFETCH (c);
2799169695Skan                        if ((c == ':' && *p == ']') || p == pend)
2800169695Skan                          break;
2801169695Skan			if (c1 < CHAR_CLASS_MAX_LENGTH)
2802169695Skan			  str[c1++] = c;
2803169695Skan			else
2804169695Skan			  /* This is in any case an invalid class name.  */
2805169695Skan			  str[0] = '\0';
2806169695Skan                      }
2807169695Skan                    str[c1] = '\0';
2808169695Skan
2809169695Skan                    /* If isn't a word bracketed by `[:' and `:]':
2810169695Skan                       undo the ending character, the letters, and leave
2811169695Skan                       the leading `:' and `[' (but store them as character).  */
2812169695Skan                    if (c == ':' && *p == ']')
2813169695Skan                      {
2814169695Skan			wctype_t wt;
2815169695Skan			uintptr_t alignedp;
2816169695Skan
2817169695Skan			/* Query the character class as wctype_t.  */
2818169695Skan			wt = IS_CHAR_CLASS (str);
2819169695Skan			if (wt == 0)
2820169695Skan			  FREE_STACK_RETURN (REG_ECTYPE);
2821169695Skan
2822169695Skan                        /* Throw away the ] at the end of the character
2823169695Skan                           class.  */
2824169695Skan                        PATFETCH (c);
2825169695Skan
2826169695Skan                        if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
2827169695Skan
2828169695Skan			/* Allocate the space for character class.  */
2829169695Skan                        GET_BUFFER_SPACE(CHAR_CLASS_SIZE);
2830169695Skan			/* Update the pointer to indicate end of buffer.  */
2831169695Skan                        b += CHAR_CLASS_SIZE;
2832169695Skan			/* Move data which follow character classes
2833169695Skan			    not to violate the data.  */
2834169695Skan                        insert_space(CHAR_CLASS_SIZE,
2835169695Skan				     laststart + 6 + laststart[1],
2836169695Skan				     b - 1);
2837169695Skan			alignedp = ((uintptr_t)(laststart + 6 + laststart[1])
2838169695Skan				    + __alignof__(wctype_t) - 1)
2839169695Skan			  	    & ~(uintptr_t)(__alignof__(wctype_t) - 1);
2840169695Skan			/* Store the character class.  */
2841169695Skan                        *((wctype_t*)alignedp) = wt;
2842169695Skan                        /* Update length of char_classes */
2843169695Skan                        laststart[1] += CHAR_CLASS_SIZE;
2844169695Skan
2845169695Skan                        had_char_class = true;
2846169695Skan                      }
2847169695Skan                    else
2848169695Skan                      {
2849169695Skan                        c1++;
2850169695Skan                        while (c1--)
2851169695Skan                          PATUNFETCH;
2852169695Skan                        BUF_PUSH ('[');
2853169695Skan                        BUF_PUSH (':');
2854169695Skan                        laststart[5] += 2; /* Update the length of characters  */
2855169695Skan			range_start = ':';
2856169695Skan                        had_char_class = false;
2857169695Skan                      }
2858169695Skan                  }
2859169695Skan                else if (syntax & RE_CHAR_CLASSES && c == '[' && (*p == '='
2860169695Skan							  || *p == '.'))
2861169695Skan		  {
2862169695Skan		    CHAR_T str[128];	/* Should be large enough.  */
2863169695Skan		    CHAR_T delim = *p; /* '=' or '.'  */
2864169695Skan# ifdef _LIBC
2865169695Skan		    uint32_t nrules =
2866169695Skan		      _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
2867169695Skan# endif
2868169695Skan		    PATFETCH (c);
2869169695Skan		    c1 = 0;
2870169695Skan
2871169695Skan		    /* If pattern is `[[=' or '[[.'.  */
2872169695Skan		    if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
2873169695Skan
2874169695Skan		    for (;;)
2875169695Skan		      {
2876169695Skan			PATFETCH (c);
2877169695Skan			if ((c == delim && *p == ']') || p == pend)
2878169695Skan			  break;
2879169695Skan			if (c1 < sizeof (str) - 1)
2880169695Skan			  str[c1++] = c;
2881169695Skan			else
2882169695Skan			  /* This is in any case an invalid class name.  */
2883169695Skan			  str[0] = '\0';
2884169695Skan                      }
2885169695Skan		    str[c1] = '\0';
2886169695Skan
2887169695Skan		    if (c == delim && *p == ']' && str[0] != '\0')
2888169695Skan		      {
2889169695Skan                        unsigned int i, offset;
2890169695Skan			/* If we have no collation data we use the default
2891169695Skan			   collation in which each character is in a class
2892169695Skan			   by itself.  It also means that ASCII is the
2893169695Skan			   character set and therefore we cannot have character
2894169695Skan			   with more than one byte in the multibyte
2895169695Skan			   representation.  */
2896169695Skan
2897169695Skan                        /* If not defined _LIBC, we push the name and
2898169695Skan			   `\0' for the sake of matching performance.  */
2899169695Skan			int datasize = c1 + 1;
2900169695Skan
2901169695Skan# ifdef _LIBC
2902169695Skan			int32_t idx = 0;
2903169695Skan			if (nrules == 0)
2904169695Skan# endif
2905169695Skan			  {
2906169695Skan			    if (c1 != 1)
2907169695Skan			      FREE_STACK_RETURN (REG_ECOLLATE);
2908169695Skan			  }
2909169695Skan# ifdef _LIBC
2910169695Skan			else
2911169695Skan			  {
2912169695Skan			    const int32_t *table;
2913169695Skan			    const int32_t *weights;
2914169695Skan			    const int32_t *extra;
2915169695Skan			    const int32_t *indirect;
2916169695Skan			    wint_t *cp;
2917169695Skan
2918169695Skan			    /* This #include defines a local function!  */
2919169695Skan#  include <locale/weightwc.h>
2920169695Skan
2921169695Skan			    if(delim == '=')
2922169695Skan			      {
2923169695Skan				/* We push the index for equivalence class.  */
2924169695Skan				cp = (wint_t*)str;
2925169695Skan
2926169695Skan				table = (const int32_t *)
2927169695Skan				  _NL_CURRENT (LC_COLLATE,
2928169695Skan					       _NL_COLLATE_TABLEWC);
2929169695Skan				weights = (const int32_t *)
2930169695Skan				  _NL_CURRENT (LC_COLLATE,
2931169695Skan					       _NL_COLLATE_WEIGHTWC);
2932169695Skan				extra = (const int32_t *)
2933169695Skan				  _NL_CURRENT (LC_COLLATE,
2934169695Skan					       _NL_COLLATE_EXTRAWC);
2935169695Skan				indirect = (const int32_t *)
2936169695Skan				  _NL_CURRENT (LC_COLLATE,
2937169695Skan					       _NL_COLLATE_INDIRECTWC);
2938169695Skan
2939169695Skan				idx = findidx ((const wint_t**)&cp);
2940169695Skan				if (idx == 0 || cp < (wint_t*) str + c1)
2941169695Skan				  /* This is no valid character.  */
2942169695Skan				  FREE_STACK_RETURN (REG_ECOLLATE);
2943169695Skan
2944169695Skan				str[0] = (wchar_t)idx;
2945169695Skan			      }
2946169695Skan			    else /* delim == '.' */
2947169695Skan			      {
2948169695Skan				/* We push collation sequence value
2949169695Skan				   for collating symbol.  */
2950169695Skan				int32_t table_size;
2951169695Skan				const int32_t *symb_table;
2952169695Skan				const unsigned char *extra;
2953169695Skan				int32_t idx;
2954169695Skan				int32_t elem;
2955169695Skan				int32_t second;
2956169695Skan				int32_t hash;
2957169695Skan				char char_str[c1];
2958169695Skan
2959169695Skan				/* We have to convert the name to a single-byte
2960169695Skan				   string.  This is possible since the names
2961169695Skan				   consist of ASCII characters and the internal
2962169695Skan				   representation is UCS4.  */
2963169695Skan				for (i = 0; i < c1; ++i)
2964169695Skan				  char_str[i] = str[i];
2965169695Skan
2966169695Skan				table_size =
2967169695Skan				  _NL_CURRENT_WORD (LC_COLLATE,
2968169695Skan						    _NL_COLLATE_SYMB_HASH_SIZEMB);
2969169695Skan				symb_table = (const int32_t *)
2970169695Skan				  _NL_CURRENT (LC_COLLATE,
2971169695Skan					       _NL_COLLATE_SYMB_TABLEMB);
2972169695Skan				extra = (const unsigned char *)
2973169695Skan				  _NL_CURRENT (LC_COLLATE,
2974169695Skan					       _NL_COLLATE_SYMB_EXTRAMB);
2975169695Skan
2976169695Skan				/* Locate the character in the hashing table.  */
2977169695Skan				hash = elem_hash (char_str, c1);
2978169695Skan
2979169695Skan				idx = 0;
2980169695Skan				elem = hash % table_size;
2981169695Skan				second = hash % (table_size - 2);
2982169695Skan				while (symb_table[2 * elem] != 0)
2983169695Skan				  {
2984169695Skan				    /* First compare the hashing value.  */
2985169695Skan				    if (symb_table[2 * elem] == hash
2986169695Skan					&& c1 == extra[symb_table[2 * elem + 1]]
2987169695Skan					&& memcmp (char_str,
2988169695Skan						   &extra[symb_table[2 * elem + 1]
2989169695Skan							 + 1], c1) == 0)
2990169695Skan				      {
2991169695Skan					/* Yep, this is the entry.  */
2992169695Skan					idx = symb_table[2 * elem + 1];
2993169695Skan					idx += 1 + extra[idx];
2994169695Skan					break;
2995169695Skan				      }
2996169695Skan
2997169695Skan				    /* Next entry.  */
2998169695Skan				    elem += second;
2999169695Skan				  }
3000169695Skan
3001169695Skan				if (symb_table[2 * elem] != 0)
3002169695Skan				  {
3003169695Skan				    /* Compute the index of the byte sequence
3004169695Skan				       in the table.  */
3005169695Skan				    idx += 1 + extra[idx];
3006169695Skan				    /* Adjust for the alignment.  */
3007169695Skan				    idx = (idx + 3) & ~3;
3008169695Skan
3009169695Skan				    str[0] = (wchar_t) idx + 4;
3010169695Skan				  }
3011169695Skan				else if (symb_table[2 * elem] == 0 && c1 == 1)
3012169695Skan				  {
3013169695Skan				    /* No valid character.  Match it as a
3014169695Skan				       single byte character.  */
3015169695Skan				    had_char_class = false;
3016169695Skan				    BUF_PUSH(str[0]);
3017169695Skan				    /* Update the length of characters  */
3018169695Skan				    laststart[5]++;
3019169695Skan				    range_start = str[0];
3020169695Skan
3021169695Skan				    /* Throw away the ] at the end of the
3022169695Skan				       collating symbol.  */
3023169695Skan				    PATFETCH (c);
3024169695Skan				    /* exit from the switch block.  */
3025169695Skan				    continue;
3026169695Skan				  }
3027169695Skan				else
3028169695Skan				  FREE_STACK_RETURN (REG_ECOLLATE);
3029169695Skan			      }
3030169695Skan			    datasize = 1;
3031169695Skan			  }
3032169695Skan# endif
3033169695Skan                        /* Throw away the ] at the end of the equivalence
3034169695Skan                           class (or collating symbol).  */
3035169695Skan                        PATFETCH (c);
3036169695Skan
3037169695Skan			/* Allocate the space for the equivalence class
3038169695Skan			   (or collating symbol) (and '\0' if needed).  */
3039169695Skan                        GET_BUFFER_SPACE(datasize);
3040169695Skan			/* Update the pointer to indicate end of buffer.  */
3041169695Skan                        b += datasize;
3042169695Skan
3043169695Skan			if (delim == '=')
3044169695Skan			  { /* equivalence class  */
3045169695Skan			    /* Calculate the offset of char_ranges,
3046169695Skan			       which is next to equivalence_classes.  */
3047169695Skan			    offset = laststart[1] + laststart[2]
3048169695Skan			      + laststart[3] +6;
3049169695Skan			    /* Insert space.  */
3050169695Skan			    insert_space(datasize, laststart + offset, b - 1);
3051169695Skan
3052169695Skan			    /* Write the equivalence_class and \0.  */
3053169695Skan			    for (i = 0 ; i < datasize ; i++)
3054169695Skan			      laststart[offset + i] = str[i];
3055169695Skan
3056169695Skan			    /* Update the length of equivalence_classes.  */
3057169695Skan			    laststart[3] += datasize;
3058169695Skan			    had_char_class = true;
3059169695Skan			  }
3060169695Skan			else /* delim == '.' */
3061169695Skan			  { /* collating symbol  */
3062169695Skan			    /* Calculate the offset of the equivalence_classes,
3063169695Skan			       which is next to collating_symbols.  */
3064169695Skan			    offset = laststart[1] + laststart[2] + 6;
3065169695Skan			    /* Insert space and write the collationg_symbol
3066169695Skan			       and \0.  */
3067169695Skan			    insert_space(datasize, laststart + offset, b-1);
3068169695Skan			    for (i = 0 ; i < datasize ; i++)
3069169695Skan			      laststart[offset + i] = str[i];
3070169695Skan
3071169695Skan			    /* In re_match_2_internal if range_start < -1, we
3072169695Skan			       assume -range_start is the offset of the
3073169695Skan			       collating symbol which is specified as
3074169695Skan			       the character of the range start.  So we assign
3075169695Skan			       -(laststart[1] + laststart[2] + 6) to
3076169695Skan			       range_start.  */
3077169695Skan			    range_start = -(laststart[1] + laststart[2] + 6);
3078169695Skan			    /* Update the length of collating_symbol.  */
3079169695Skan			    laststart[2] += datasize;
3080169695Skan			    had_char_class = false;
3081169695Skan			  }
3082169695Skan		      }
3083169695Skan                    else
3084169695Skan                      {
3085169695Skan                        c1++;
3086169695Skan                        while (c1--)
3087169695Skan                          PATUNFETCH;
3088169695Skan                        BUF_PUSH ('[');
3089169695Skan                        BUF_PUSH (delim);
3090169695Skan                        laststart[5] += 2; /* Update the length of characters  */
3091169695Skan			range_start = delim;
3092169695Skan                        had_char_class = false;
3093169695Skan                      }
3094169695Skan		  }
3095169695Skan                else
3096169695Skan                  {
3097169695Skan                    had_char_class = false;
3098169695Skan		    BUF_PUSH(c);
3099169695Skan		    laststart[5]++;  /* Update the length of characters  */
3100169695Skan		    range_start = c;
3101169695Skan                  }
3102169695Skan	      }
3103169695Skan
3104169695Skan#else /* BYTE */
3105169695Skan            /* Ensure that we have enough space to push a charset: the
3106169695Skan               opcode, the length count, and the bitset; 34 bytes in all.  */
3107169695Skan	    GET_BUFFER_SPACE (34);
3108169695Skan
3109169695Skan            laststart = b;
3110169695Skan
3111169695Skan            /* We test `*p == '^' twice, instead of using an if
3112169695Skan               statement, so we only need one BUF_PUSH.  */
3113169695Skan            BUF_PUSH (*p == '^' ? charset_not : charset);
3114169695Skan            if (*p == '^')
3115169695Skan              p++;
3116169695Skan
3117169695Skan            /* Remember the first position in the bracket expression.  */
3118169695Skan            p1 = p;
3119169695Skan
3120169695Skan            /* Push the number of bytes in the bitmap.  */
3121169695Skan            BUF_PUSH ((1 << BYTEWIDTH) / BYTEWIDTH);
3122169695Skan
3123169695Skan            /* Clear the whole map.  */
3124169695Skan            bzero (b, (1 << BYTEWIDTH) / BYTEWIDTH);
3125169695Skan
3126169695Skan            /* charset_not matches newline according to a syntax bit.  */
3127169695Skan            if ((re_opcode_t) b[-2] == charset_not
3128169695Skan                && (syntax & RE_HAT_LISTS_NOT_NEWLINE))
3129169695Skan              SET_LIST_BIT ('\n');
3130169695Skan
3131169695Skan            /* Read in characters and ranges, setting map bits.  */
3132169695Skan            for (;;)
3133169695Skan              {
3134169695Skan                if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
3135169695Skan
3136169695Skan                PATFETCH (c);
3137169695Skan
3138169695Skan                /* \ might escape characters inside [...] and [^...].  */
3139169695Skan                if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\')
3140169695Skan                  {
3141169695Skan                    if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
3142169695Skan
3143169695Skan                    PATFETCH (c1);
3144169695Skan                    SET_LIST_BIT (c1);
3145169695Skan		    range_start = c1;
3146169695Skan                    continue;
3147169695Skan                  }
3148169695Skan
3149169695Skan                /* Could be the end of the bracket expression.  If it's
3150169695Skan                   not (i.e., when the bracket expression is `[]' so
3151169695Skan                   far), the ']' character bit gets set way below.  */
3152169695Skan                if (c == ']' && p != p1 + 1)
3153169695Skan                  break;
3154169695Skan
3155169695Skan                /* Look ahead to see if it's a range when the last thing
3156169695Skan                   was a character class.  */
3157169695Skan                if (had_char_class && c == '-' && *p != ']')
3158169695Skan                  FREE_STACK_RETURN (REG_ERANGE);
3159169695Skan
3160169695Skan                /* Look ahead to see if it's a range when the last thing
3161169695Skan                   was a character: if this is a hyphen not at the
3162169695Skan                   beginning or the end of a list, then it's the range
3163169695Skan                   operator.  */
3164169695Skan                if (c == '-'
3165169695Skan                    && !(p - 2 >= pattern && p[-2] == '[')
3166169695Skan                    && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^')
3167169695Skan                    && *p != ']')
3168169695Skan                  {
3169169695Skan                    reg_errcode_t ret
3170169695Skan                      = byte_compile_range (range_start, &p, pend, translate,
3171169695Skan					    syntax, b);
3172169695Skan                    if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
3173169695Skan		    range_start = 0xffffffff;
3174169695Skan                  }
3175169695Skan
3176169695Skan                else if (p[0] == '-' && p[1] != ']')
3177169695Skan                  { /* This handles ranges made up of characters only.  */
3178169695Skan                    reg_errcode_t ret;
3179169695Skan
3180169695Skan		    /* Move past the `-'.  */
3181169695Skan                    PATFETCH (c1);
3182169695Skan
3183169695Skan                    ret = byte_compile_range (c, &p, pend, translate, syntax, b);
3184169695Skan                    if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
3185169695Skan		    range_start = 0xffffffff;
3186169695Skan                  }
3187169695Skan
3188169695Skan                /* See if we're at the beginning of a possible character
3189169695Skan                   class.  */
3190169695Skan
3191169695Skan                else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':')
3192169695Skan                  { /* Leave room for the null.  */
3193169695Skan                    char str[CHAR_CLASS_MAX_LENGTH + 1];
3194169695Skan
3195169695Skan                    PATFETCH (c);
3196169695Skan                    c1 = 0;
3197169695Skan
3198169695Skan                    /* If pattern is `[[:'.  */
3199169695Skan                    if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
3200169695Skan
3201169695Skan                    for (;;)
3202169695Skan                      {
3203169695Skan                        PATFETCH (c);
3204169695Skan                        if ((c == ':' && *p == ']') || p == pend)
3205169695Skan                          break;
3206169695Skan			if (c1 < CHAR_CLASS_MAX_LENGTH)
3207169695Skan			  str[c1++] = c;
3208169695Skan			else
3209169695Skan			  /* This is in any case an invalid class name.  */
3210169695Skan			  str[0] = '\0';
3211169695Skan                      }
3212169695Skan                    str[c1] = '\0';
3213169695Skan
3214169695Skan                    /* If isn't a word bracketed by `[:' and `:]':
3215169695Skan                       undo the ending character, the letters, and leave
3216169695Skan                       the leading `:' and `[' (but set bits for them).  */
3217169695Skan                    if (c == ':' && *p == ']')
3218169695Skan                      {
3219169695Skan# if defined _LIBC || WIDE_CHAR_SUPPORT
3220169695Skan                        boolean is_lower = STREQ (str, "lower");
3221169695Skan                        boolean is_upper = STREQ (str, "upper");
3222169695Skan			wctype_t wt;
3223169695Skan                        int ch;
3224169695Skan
3225169695Skan			wt = IS_CHAR_CLASS (str);
3226169695Skan			if (wt == 0)
3227169695Skan			  FREE_STACK_RETURN (REG_ECTYPE);
3228169695Skan
3229169695Skan                        /* Throw away the ] at the end of the character
3230169695Skan                           class.  */
3231169695Skan                        PATFETCH (c);
3232169695Skan
3233169695Skan                        if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
3234169695Skan
3235169695Skan                        for (ch = 0; ch < 1 << BYTEWIDTH; ++ch)
3236169695Skan			  {
3237169695Skan#  ifdef _LIBC
3238169695Skan			    if (__iswctype (__btowc (ch), wt))
3239169695Skan			      SET_LIST_BIT (ch);
3240169695Skan#  else
3241169695Skan			    if (iswctype (btowc (ch), wt))
3242169695Skan			      SET_LIST_BIT (ch);
3243169695Skan#  endif
3244169695Skan
3245169695Skan			    if (translate && (is_upper || is_lower)
3246169695Skan				&& (ISUPPER (ch) || ISLOWER (ch)))
3247169695Skan			      SET_LIST_BIT (ch);
3248169695Skan			  }
3249169695Skan
3250169695Skan                        had_char_class = true;
3251169695Skan# else
3252169695Skan                        int ch;
3253169695Skan                        boolean is_alnum = STREQ (str, "alnum");
3254169695Skan                        boolean is_alpha = STREQ (str, "alpha");
3255169695Skan                        boolean is_blank = STREQ (str, "blank");
3256169695Skan                        boolean is_cntrl = STREQ (str, "cntrl");
3257169695Skan                        boolean is_digit = STREQ (str, "digit");
3258169695Skan                        boolean is_graph = STREQ (str, "graph");
3259169695Skan                        boolean is_lower = STREQ (str, "lower");
3260169695Skan                        boolean is_print = STREQ (str, "print");
3261169695Skan                        boolean is_punct = STREQ (str, "punct");
3262169695Skan                        boolean is_space = STREQ (str, "space");
3263169695Skan                        boolean is_upper = STREQ (str, "upper");
3264169695Skan                        boolean is_xdigit = STREQ (str, "xdigit");
3265169695Skan
3266169695Skan                        if (!IS_CHAR_CLASS (str))
3267169695Skan			  FREE_STACK_RETURN (REG_ECTYPE);
3268169695Skan
3269169695Skan                        /* Throw away the ] at the end of the character
3270169695Skan                           class.  */
3271169695Skan                        PATFETCH (c);
3272169695Skan
3273169695Skan                        if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
3274169695Skan
3275169695Skan                        for (ch = 0; ch < 1 << BYTEWIDTH; ch++)
3276169695Skan                          {
3277169695Skan			    /* This was split into 3 if's to
3278169695Skan			       avoid an arbitrary limit in some compiler.  */
3279169695Skan                            if (   (is_alnum  && ISALNUM (ch))
3280169695Skan                                || (is_alpha  && ISALPHA (ch))
3281169695Skan                                || (is_blank  && ISBLANK (ch))
3282169695Skan                                || (is_cntrl  && ISCNTRL (ch)))
3283169695Skan			      SET_LIST_BIT (ch);
3284169695Skan			    if (   (is_digit  && ISDIGIT (ch))
3285169695Skan                                || (is_graph  && ISGRAPH (ch))
3286169695Skan                                || (is_lower  && ISLOWER (ch))
3287169695Skan                                || (is_print  && ISPRINT (ch)))
3288169695Skan			      SET_LIST_BIT (ch);
3289169695Skan			    if (   (is_punct  && ISPUNCT (ch))
3290169695Skan                                || (is_space  && ISSPACE (ch))
3291169695Skan                                || (is_upper  && ISUPPER (ch))
3292169695Skan                                || (is_xdigit && ISXDIGIT (ch)))
3293169695Skan			      SET_LIST_BIT (ch);
3294169695Skan			    if (   translate && (is_upper || is_lower)
3295169695Skan				&& (ISUPPER (ch) || ISLOWER (ch)))
3296169695Skan			      SET_LIST_BIT (ch);
3297169695Skan                          }
3298169695Skan                        had_char_class = true;
3299169695Skan# endif	/* libc || wctype.h */
3300169695Skan                      }
3301169695Skan                    else
3302169695Skan                      {
3303169695Skan                        c1++;
3304169695Skan                        while (c1--)
3305169695Skan                          PATUNFETCH;
3306169695Skan                        SET_LIST_BIT ('[');
3307169695Skan                        SET_LIST_BIT (':');
3308169695Skan			range_start = ':';
3309169695Skan                        had_char_class = false;
3310169695Skan                      }
3311169695Skan                  }
3312169695Skan                else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == '=')
3313169695Skan		  {
3314169695Skan		    unsigned char str[MB_LEN_MAX + 1];
3315169695Skan# ifdef _LIBC
3316169695Skan		    uint32_t nrules =
3317169695Skan		      _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
3318169695Skan# endif
3319169695Skan
3320169695Skan		    PATFETCH (c);
3321169695Skan		    c1 = 0;
3322169695Skan
3323169695Skan		    /* If pattern is `[[='.  */
3324169695Skan		    if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
3325169695Skan
3326169695Skan		    for (;;)
3327169695Skan		      {
3328169695Skan			PATFETCH (c);
3329169695Skan			if ((c == '=' && *p == ']') || p == pend)
3330169695Skan			  break;
3331169695Skan			if (c1 < MB_LEN_MAX)
3332169695Skan			  str[c1++] = c;
3333169695Skan			else
3334169695Skan			  /* This is in any case an invalid class name.  */
3335169695Skan			  str[0] = '\0';
3336169695Skan                      }
3337169695Skan		    str[c1] = '\0';
3338169695Skan
3339169695Skan		    if (c == '=' && *p == ']' && str[0] != '\0')
3340169695Skan		      {
3341169695Skan			/* If we have no collation data we use the default
3342169695Skan			   collation in which each character is in a class
3343169695Skan			   by itself.  It also means that ASCII is the
3344169695Skan			   character set and therefore we cannot have character
3345169695Skan			   with more than one byte in the multibyte
3346169695Skan			   representation.  */
3347169695Skan# ifdef _LIBC
3348169695Skan			if (nrules == 0)
3349169695Skan# endif
3350169695Skan			  {
3351169695Skan			    if (c1 != 1)
3352169695Skan			      FREE_STACK_RETURN (REG_ECOLLATE);
3353169695Skan
3354169695Skan			    /* Throw away the ] at the end of the equivalence
3355169695Skan			       class.  */
3356169695Skan			    PATFETCH (c);
3357169695Skan
3358169695Skan			    /* Set the bit for the character.  */
3359169695Skan			    SET_LIST_BIT (str[0]);
3360169695Skan			  }
3361169695Skan# ifdef _LIBC
3362169695Skan			else
3363169695Skan			  {
3364169695Skan			    /* Try to match the byte sequence in `str' against
3365169695Skan			       those known to the collate implementation.
3366169695Skan			       First find out whether the bytes in `str' are
3367169695Skan			       actually from exactly one character.  */
3368169695Skan			    const int32_t *table;
3369169695Skan			    const unsigned char *weights;
3370169695Skan			    const unsigned char *extra;
3371169695Skan			    const int32_t *indirect;
3372169695Skan			    int32_t idx;
3373169695Skan			    const unsigned char *cp = str;
3374169695Skan			    int ch;
3375169695Skan
3376169695Skan			    /* This #include defines a local function!  */
3377169695Skan#  include <locale/weight.h>
3378169695Skan
3379169695Skan			    table = (const int32_t *)
3380169695Skan			      _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
3381169695Skan			    weights = (const unsigned char *)
3382169695Skan			      _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
3383169695Skan			    extra = (const unsigned char *)
3384169695Skan			      _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
3385169695Skan			    indirect = (const int32_t *)
3386169695Skan			      _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
3387169695Skan
3388169695Skan			    idx = findidx (&cp);
3389169695Skan			    if (idx == 0 || cp < str + c1)
3390169695Skan			      /* This is no valid character.  */
3391169695Skan			      FREE_STACK_RETURN (REG_ECOLLATE);
3392169695Skan
3393169695Skan			    /* Throw away the ] at the end of the equivalence
3394169695Skan			       class.  */
3395169695Skan			    PATFETCH (c);
3396169695Skan
3397169695Skan			    /* Now we have to go throught the whole table
3398169695Skan			       and find all characters which have the same
3399169695Skan			       first level weight.
3400169695Skan
3401169695Skan			       XXX Note that this is not entirely correct.
3402169695Skan			       we would have to match multibyte sequences
3403169695Skan			       but this is not possible with the current
3404169695Skan			       implementation.  */
3405169695Skan			    for (ch = 1; ch < 256; ++ch)
3406169695Skan			      /* XXX This test would have to be changed if we
3407169695Skan				 would allow matching multibyte sequences.  */
3408169695Skan			      if (table[ch] > 0)
3409169695Skan				{
3410169695Skan				  int32_t idx2 = table[ch];
3411169695Skan				  size_t len = weights[idx2];
3412169695Skan
3413169695Skan				  /* Test whether the lenghts match.  */
3414169695Skan				  if (weights[idx] == len)
3415169695Skan				    {
3416169695Skan				      /* They do.  New compare the bytes of
3417169695Skan					 the weight.  */
3418169695Skan				      size_t cnt = 0;
3419169695Skan
3420169695Skan				      while (cnt < len
3421169695Skan					     && (weights[idx + 1 + cnt]
3422169695Skan						 == weights[idx2 + 1 + cnt]))
3423169695Skan					++cnt;
3424169695Skan
3425169695Skan				      if (cnt == len)
3426169695Skan					/* They match.  Mark the character as
3427169695Skan					   acceptable.  */
3428169695Skan					SET_LIST_BIT (ch);
3429169695Skan				    }
3430169695Skan				}
3431169695Skan			  }
3432169695Skan# endif
3433169695Skan			had_char_class = true;
3434169695Skan		      }
3435169695Skan                    else
3436169695Skan                      {
3437169695Skan                        c1++;
3438169695Skan                        while (c1--)
3439169695Skan                          PATUNFETCH;
3440169695Skan                        SET_LIST_BIT ('[');
3441169695Skan                        SET_LIST_BIT ('=');
3442169695Skan			range_start = '=';
3443169695Skan                        had_char_class = false;
3444169695Skan                      }
3445169695Skan		  }
3446169695Skan                else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == '.')
3447169695Skan		  {
3448169695Skan		    unsigned char str[128];	/* Should be large enough.  */
3449169695Skan# ifdef _LIBC
3450169695Skan		    uint32_t nrules =
3451169695Skan		      _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
3452169695Skan# endif
3453169695Skan
3454169695Skan		    PATFETCH (c);
3455169695Skan		    c1 = 0;
3456169695Skan
3457169695Skan		    /* If pattern is `[[.'.  */
3458169695Skan		    if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
3459169695Skan
3460169695Skan		    for (;;)
3461169695Skan		      {
3462169695Skan			PATFETCH (c);
3463169695Skan			if ((c == '.' && *p == ']') || p == pend)
3464169695Skan			  break;
3465169695Skan			if (c1 < sizeof (str))
3466169695Skan			  str[c1++] = c;
3467169695Skan			else
3468169695Skan			  /* This is in any case an invalid class name.  */
3469169695Skan			  str[0] = '\0';
3470169695Skan                      }
3471169695Skan		    str[c1] = '\0';
3472169695Skan
3473169695Skan		    if (c == '.' && *p == ']' && str[0] != '\0')
3474169695Skan		      {
3475169695Skan			/* If we have no collation data we use the default
3476169695Skan			   collation in which each character is the name
3477169695Skan			   for its own class which contains only the one
3478169695Skan			   character.  It also means that ASCII is the
3479169695Skan			   character set and therefore we cannot have character
3480169695Skan			   with more than one byte in the multibyte
3481169695Skan			   representation.  */
3482169695Skan# ifdef _LIBC
3483169695Skan			if (nrules == 0)
3484169695Skan# endif
3485169695Skan			  {
3486169695Skan			    if (c1 != 1)
3487169695Skan			      FREE_STACK_RETURN (REG_ECOLLATE);
3488169695Skan
3489169695Skan			    /* Throw away the ] at the end of the equivalence
3490169695Skan			       class.  */
3491169695Skan			    PATFETCH (c);
3492169695Skan
3493169695Skan			    /* Set the bit for the character.  */
3494169695Skan			    SET_LIST_BIT (str[0]);
3495169695Skan			    range_start = ((const unsigned char *) str)[0];
3496169695Skan			  }
3497169695Skan# ifdef _LIBC
3498169695Skan			else
3499169695Skan			  {
3500169695Skan			    /* Try to match the byte sequence in `str' against
3501169695Skan			       those known to the collate implementation.
3502169695Skan			       First find out whether the bytes in `str' are
3503169695Skan			       actually from exactly one character.  */
3504169695Skan			    int32_t table_size;
3505169695Skan			    const int32_t *symb_table;
3506169695Skan			    const unsigned char *extra;
3507169695Skan			    int32_t idx;
3508169695Skan			    int32_t elem;
3509169695Skan			    int32_t second;
3510169695Skan			    int32_t hash;
3511169695Skan
3512169695Skan			    table_size =
3513169695Skan			      _NL_CURRENT_WORD (LC_COLLATE,
3514169695Skan						_NL_COLLATE_SYMB_HASH_SIZEMB);
3515169695Skan			    symb_table = (const int32_t *)
3516169695Skan			      _NL_CURRENT (LC_COLLATE,
3517169695Skan					   _NL_COLLATE_SYMB_TABLEMB);
3518169695Skan			    extra = (const unsigned char *)
3519169695Skan			      _NL_CURRENT (LC_COLLATE,
3520169695Skan					   _NL_COLLATE_SYMB_EXTRAMB);
3521169695Skan
3522169695Skan			    /* Locate the character in the hashing table.  */
3523169695Skan			    hash = elem_hash (str, c1);
3524169695Skan
3525169695Skan			    idx = 0;
3526169695Skan			    elem = hash % table_size;
3527169695Skan			    second = hash % (table_size - 2);
3528169695Skan			    while (symb_table[2 * elem] != 0)
3529169695Skan			      {
3530169695Skan				/* First compare the hashing value.  */
3531169695Skan				if (symb_table[2 * elem] == hash
3532169695Skan				    && c1 == extra[symb_table[2 * elem + 1]]
3533169695Skan				    && memcmp (str,
3534169695Skan					       &extra[symb_table[2 * elem + 1]
3535169695Skan						     + 1],
3536169695Skan					       c1) == 0)
3537169695Skan				  {
3538169695Skan				    /* Yep, this is the entry.  */
3539169695Skan				    idx = symb_table[2 * elem + 1];
3540169695Skan				    idx += 1 + extra[idx];
3541169695Skan				    break;
3542169695Skan				  }
3543169695Skan
3544169695Skan				/* Next entry.  */
3545169695Skan				elem += second;
3546169695Skan			      }
3547169695Skan
3548169695Skan			    if (symb_table[2 * elem] == 0)
3549169695Skan			      /* This is no valid character.  */
3550169695Skan			      FREE_STACK_RETURN (REG_ECOLLATE);
3551169695Skan
3552169695Skan			    /* Throw away the ] at the end of the equivalence
3553169695Skan			       class.  */
3554169695Skan			    PATFETCH (c);
3555169695Skan
3556169695Skan			    /* Now add the multibyte character(s) we found
3557169695Skan			       to the accept list.
3558169695Skan
3559169695Skan			       XXX Note that this is not entirely correct.
3560169695Skan			       we would have to match multibyte sequences
3561169695Skan			       but this is not possible with the current
3562169695Skan			       implementation.  Also, we have to match
3563169695Skan			       collating symbols, which expand to more than
3564169695Skan			       one file, as a whole and not allow the
3565169695Skan			       individual bytes.  */
3566169695Skan			    c1 = extra[idx++];
3567169695Skan			    if (c1 == 1)
3568169695Skan			      range_start = extra[idx];
3569169695Skan			    while (c1-- > 0)
3570169695Skan			      {
3571169695Skan				SET_LIST_BIT (extra[idx]);
3572169695Skan				++idx;
3573169695Skan			      }
3574169695Skan			  }
3575169695Skan# endif
3576169695Skan			had_char_class = false;
3577169695Skan		      }
3578169695Skan                    else
3579169695Skan                      {
3580169695Skan                        c1++;
3581169695Skan                        while (c1--)
3582169695Skan                          PATUNFETCH;
3583169695Skan                        SET_LIST_BIT ('[');
3584169695Skan                        SET_LIST_BIT ('.');
3585169695Skan			range_start = '.';
3586169695Skan                        had_char_class = false;
3587169695Skan                      }
3588169695Skan		  }
3589169695Skan                else
3590169695Skan                  {
3591169695Skan                    had_char_class = false;
3592169695Skan                    SET_LIST_BIT (c);
3593169695Skan		    range_start = c;
3594169695Skan                  }
3595169695Skan              }
3596169695Skan
3597169695Skan            /* Discard any (non)matching list bytes that are all 0 at the
3598169695Skan               end of the map.  Decrease the map-length byte too.  */
3599169695Skan            while ((int) b[-1] > 0 && b[b[-1] - 1] == 0)
3600169695Skan              b[-1]--;
3601169695Skan            b += b[-1];
3602169695Skan#endif /* WCHAR */
3603169695Skan          }
3604169695Skan          break;
3605169695Skan
3606169695Skan
3607169695Skan	case '(':
3608169695Skan          if (syntax & RE_NO_BK_PARENS)
3609169695Skan            goto handle_open;
3610169695Skan          else
3611169695Skan            goto normal_char;
3612169695Skan
3613169695Skan
3614169695Skan        case ')':
3615169695Skan          if (syntax & RE_NO_BK_PARENS)
3616169695Skan            goto handle_close;
3617169695Skan          else
3618169695Skan            goto normal_char;
3619169695Skan
3620169695Skan
3621169695Skan        case '\n':
3622169695Skan          if (syntax & RE_NEWLINE_ALT)
3623169695Skan            goto handle_alt;
3624169695Skan          else
3625169695Skan            goto normal_char;
3626169695Skan
3627169695Skan
3628169695Skan	case '|':
3629169695Skan          if (syntax & RE_NO_BK_VBAR)
3630169695Skan            goto handle_alt;
3631169695Skan          else
3632169695Skan            goto normal_char;
3633169695Skan
3634169695Skan
3635169695Skan        case '{':
3636169695Skan           if (syntax & RE_INTERVALS && syntax & RE_NO_BK_BRACES)
3637169695Skan             goto handle_interval;
3638169695Skan           else
3639169695Skan             goto normal_char;
3640169695Skan
3641169695Skan
3642169695Skan        case '\\':
3643169695Skan          if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
3644169695Skan
3645169695Skan          /* Do not translate the character after the \, so that we can
3646169695Skan             distinguish, e.g., \B from \b, even if we normally would
3647169695Skan             translate, e.g., B to b.  */
3648169695Skan          PATFETCH_RAW (c);
3649169695Skan
3650169695Skan          switch (c)
3651169695Skan            {
3652169695Skan            case '(':
3653169695Skan              if (syntax & RE_NO_BK_PARENS)
3654169695Skan                goto normal_backslash;
3655169695Skan
3656169695Skan            handle_open:
3657169695Skan              bufp->re_nsub++;
3658169695Skan              regnum++;
3659169695Skan
3660169695Skan              if (COMPILE_STACK_FULL)
3661169695Skan                {
3662169695Skan                  RETALLOC (compile_stack.stack, compile_stack.size << 1,
3663169695Skan                            compile_stack_elt_t);
3664169695Skan                  if (compile_stack.stack == NULL) return REG_ESPACE;
3665169695Skan
3666169695Skan                  compile_stack.size <<= 1;
3667169695Skan                }
3668169695Skan
3669169695Skan              /* These are the values to restore when we hit end of this
3670169695Skan                 group.  They are all relative offsets, so that if the
3671169695Skan                 whole pattern moves because of realloc, they will still
3672169695Skan                 be valid.  */
3673169695Skan              COMPILE_STACK_TOP.begalt_offset = begalt - COMPILED_BUFFER_VAR;
3674169695Skan              COMPILE_STACK_TOP.fixup_alt_jump
3675169695Skan                = fixup_alt_jump ? fixup_alt_jump - COMPILED_BUFFER_VAR + 1 : 0;
3676169695Skan              COMPILE_STACK_TOP.laststart_offset = b - COMPILED_BUFFER_VAR;
3677169695Skan              COMPILE_STACK_TOP.regnum = regnum;
3678169695Skan
3679169695Skan              /* We will eventually replace the 0 with the number of
3680169695Skan                 groups inner to this one.  But do not push a
3681169695Skan                 start_memory for groups beyond the last one we can
3682169695Skan                 represent in the compiled pattern.  */
3683169695Skan              if (regnum <= MAX_REGNUM)
3684169695Skan                {
3685169695Skan                  COMPILE_STACK_TOP.inner_group_offset = b
3686169695Skan		    - COMPILED_BUFFER_VAR + 2;
3687169695Skan                  BUF_PUSH_3 (start_memory, regnum, 0);
3688169695Skan                }
3689169695Skan
3690169695Skan              compile_stack.avail++;
3691169695Skan
3692169695Skan              fixup_alt_jump = 0;
3693169695Skan              laststart = 0;
3694169695Skan              begalt = b;
3695169695Skan	      /* If we've reached MAX_REGNUM groups, then this open
3696169695Skan		 won't actually generate any code, so we'll have to
3697169695Skan		 clear pending_exact explicitly.  */
3698169695Skan	      pending_exact = 0;
3699169695Skan              break;
3700169695Skan
3701169695Skan
3702169695Skan            case ')':
3703169695Skan              if (syntax & RE_NO_BK_PARENS) goto normal_backslash;
3704169695Skan
3705169695Skan              if (COMPILE_STACK_EMPTY)
3706169695Skan		{
3707169695Skan		  if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
3708169695Skan		    goto normal_backslash;
3709169695Skan		  else
3710169695Skan		    FREE_STACK_RETURN (REG_ERPAREN);
3711169695Skan		}
3712169695Skan
3713169695Skan            handle_close:
3714169695Skan              if (fixup_alt_jump)
3715169695Skan                { /* Push a dummy failure point at the end of the
3716169695Skan                     alternative for a possible future
3717169695Skan                     `pop_failure_jump' to pop.  See comments at
3718169695Skan                     `push_dummy_failure' in `re_match_2'.  */
3719169695Skan                  BUF_PUSH (push_dummy_failure);
3720169695Skan
3721169695Skan                  /* We allocated space for this jump when we assigned
3722169695Skan                     to `fixup_alt_jump', in the `handle_alt' case below.  */
3723169695Skan                  STORE_JUMP (jump_past_alt, fixup_alt_jump, b - 1);
3724169695Skan                }
3725169695Skan
3726169695Skan              /* See similar code for backslashed left paren above.  */
3727169695Skan              if (COMPILE_STACK_EMPTY)
3728169695Skan		{
3729169695Skan		  if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
3730169695Skan		    goto normal_char;
3731169695Skan		  else
3732169695Skan		    FREE_STACK_RETURN (REG_ERPAREN);
3733169695Skan		}
3734169695Skan
3735169695Skan              /* Since we just checked for an empty stack above, this
3736169695Skan                 ``can't happen''.  */
3737169695Skan              assert (compile_stack.avail != 0);
3738169695Skan              {
3739169695Skan                /* We don't just want to restore into `regnum', because
3740169695Skan                   later groups should continue to be numbered higher,
3741169695Skan                   as in `(ab)c(de)' -- the second group is #2.  */
3742169695Skan                regnum_t this_group_regnum;
3743169695Skan
3744169695Skan                compile_stack.avail--;
3745169695Skan                begalt = COMPILED_BUFFER_VAR + COMPILE_STACK_TOP.begalt_offset;
3746169695Skan                fixup_alt_jump
3747169695Skan                  = COMPILE_STACK_TOP.fixup_alt_jump
3748169695Skan                    ? COMPILED_BUFFER_VAR + COMPILE_STACK_TOP.fixup_alt_jump - 1
3749169695Skan                    : 0;
3750169695Skan                laststart = COMPILED_BUFFER_VAR + COMPILE_STACK_TOP.laststart_offset;
3751169695Skan                this_group_regnum = COMPILE_STACK_TOP.regnum;
3752169695Skan		/* If we've reached MAX_REGNUM groups, then this open
3753169695Skan		   won't actually generate any code, so we'll have to
3754169695Skan		   clear pending_exact explicitly.  */
3755169695Skan		pending_exact = 0;
3756169695Skan
3757169695Skan                /* We're at the end of the group, so now we know how many
3758169695Skan                   groups were inside this one.  */
3759169695Skan                if (this_group_regnum <= MAX_REGNUM)
3760169695Skan                  {
3761169695Skan		    UCHAR_T *inner_group_loc
3762169695Skan                      = COMPILED_BUFFER_VAR + COMPILE_STACK_TOP.inner_group_offset;
3763169695Skan
3764169695Skan                    *inner_group_loc = regnum - this_group_regnum;
3765169695Skan                    BUF_PUSH_3 (stop_memory, this_group_regnum,
3766169695Skan                                regnum - this_group_regnum);
3767169695Skan                  }
3768169695Skan              }
3769169695Skan              break;
3770169695Skan
3771169695Skan
3772169695Skan            case '|':					/* `\|'.  */
3773169695Skan              if (syntax & RE_LIMITED_OPS || syntax & RE_NO_BK_VBAR)
3774169695Skan                goto normal_backslash;
3775169695Skan            handle_alt:
3776169695Skan              if (syntax & RE_LIMITED_OPS)
3777169695Skan                goto normal_char;
3778169695Skan
3779169695Skan              /* Insert before the previous alternative a jump which
3780169695Skan                 jumps to this alternative if the former fails.  */
3781169695Skan              GET_BUFFER_SPACE (1 + OFFSET_ADDRESS_SIZE);
3782169695Skan              INSERT_JUMP (on_failure_jump, begalt,
3783169695Skan			   b + 2 + 2 * OFFSET_ADDRESS_SIZE);
3784169695Skan              pending_exact = 0;
3785169695Skan              b += 1 + OFFSET_ADDRESS_SIZE;
3786169695Skan
3787169695Skan              /* The alternative before this one has a jump after it
3788169695Skan                 which gets executed if it gets matched.  Adjust that
3789169695Skan                 jump so it will jump to this alternative's analogous
3790169695Skan                 jump (put in below, which in turn will jump to the next
3791169695Skan                 (if any) alternative's such jump, etc.).  The last such
3792169695Skan                 jump jumps to the correct final destination.  A picture:
3793169695Skan                          _____ _____
3794169695Skan                          |   | |   |
3795169695Skan                          |   v |   v
3796169695Skan                         a | b   | c
3797169695Skan
3798169695Skan                 If we are at `b', then fixup_alt_jump right now points to a
3799169695Skan                 three-byte space after `a'.  We'll put in the jump, set
3800169695Skan                 fixup_alt_jump to right after `b', and leave behind three
3801169695Skan                 bytes which we'll fill in when we get to after `c'.  */
3802169695Skan
3803169695Skan              if (fixup_alt_jump)
3804169695Skan                STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
3805169695Skan
3806169695Skan              /* Mark and leave space for a jump after this alternative,
3807169695Skan                 to be filled in later either by next alternative or
3808169695Skan                 when know we're at the end of a series of alternatives.  */
3809169695Skan              fixup_alt_jump = b;
3810169695Skan              GET_BUFFER_SPACE (1 + OFFSET_ADDRESS_SIZE);
3811169695Skan              b += 1 + OFFSET_ADDRESS_SIZE;
3812169695Skan
3813169695Skan              laststart = 0;
3814169695Skan              begalt = b;
3815169695Skan              break;
3816169695Skan
3817169695Skan
3818169695Skan            case '{':
3819169695Skan              /* If \{ is a literal.  */
3820169695Skan              if (!(syntax & RE_INTERVALS)
3821169695Skan                     /* If we're at `\{' and it's not the open-interval
3822169695Skan                        operator.  */
3823169695Skan		  || (syntax & RE_NO_BK_BRACES))
3824169695Skan                goto normal_backslash;
3825169695Skan
3826169695Skan            handle_interval:
3827169695Skan              {
3828169695Skan                /* If got here, then the syntax allows intervals.  */
3829169695Skan
3830169695Skan                /* At least (most) this many matches must be made.  */
3831169695Skan                int lower_bound = -1, upper_bound = -1;
3832169695Skan
3833169695Skan		/* Place in the uncompiled pattern (i.e., just after
3834169695Skan		   the '{') to go back to if the interval is invalid.  */
3835169695Skan		const CHAR_T *beg_interval = p;
3836169695Skan
3837169695Skan                if (p == pend)
3838169695Skan		  goto invalid_interval;
3839169695Skan
3840169695Skan                GET_UNSIGNED_NUMBER (lower_bound);
3841169695Skan
3842169695Skan                if (c == ',')
3843169695Skan                  {
3844169695Skan                    GET_UNSIGNED_NUMBER (upper_bound);
3845169695Skan		    if (upper_bound < 0)
3846169695Skan		      upper_bound = RE_DUP_MAX;
3847169695Skan                  }
3848169695Skan                else
3849169695Skan                  /* Interval such as `{1}' => match exactly once. */
3850169695Skan                  upper_bound = lower_bound;
3851169695Skan
3852169695Skan                if (! (0 <= lower_bound && lower_bound <= upper_bound))
3853169695Skan		  goto invalid_interval;
3854169695Skan
3855169695Skan                if (!(syntax & RE_NO_BK_BRACES))
3856169695Skan                  {
3857169695Skan		    if (c != '\\' || p == pend)
3858169695Skan		      goto invalid_interval;
3859169695Skan                    PATFETCH (c);
3860169695Skan                  }
3861169695Skan
3862169695Skan                if (c != '}')
3863169695Skan		  goto invalid_interval;
3864169695Skan
3865169695Skan                /* If it's invalid to have no preceding re.  */
3866169695Skan                if (!laststart)
3867169695Skan                  {
3868169695Skan		    if (syntax & RE_CONTEXT_INVALID_OPS
3869169695Skan			&& !(syntax & RE_INVALID_INTERVAL_ORD))
3870169695Skan                      FREE_STACK_RETURN (REG_BADRPT);
3871169695Skan                    else if (syntax & RE_CONTEXT_INDEP_OPS)
3872169695Skan                      laststart = b;
3873169695Skan                    else
3874169695Skan                      goto unfetch_interval;
3875169695Skan                  }
3876169695Skan
3877169695Skan                /* We just parsed a valid interval.  */
3878169695Skan
3879169695Skan                if (RE_DUP_MAX < upper_bound)
3880169695Skan		  FREE_STACK_RETURN (REG_BADBR);
3881169695Skan
3882169695Skan                /* If the upper bound is zero, don't want to succeed at
3883169695Skan                   all; jump from `laststart' to `b + 3', which will be
3884169695Skan		   the end of the buffer after we insert the jump.  */
3885169695Skan		/* ifdef WCHAR, 'b + 1 + OFFSET_ADDRESS_SIZE'
3886169695Skan		   instead of 'b + 3'.  */
3887169695Skan                 if (upper_bound == 0)
3888169695Skan                   {
3889169695Skan                     GET_BUFFER_SPACE (1 + OFFSET_ADDRESS_SIZE);
3890169695Skan                     INSERT_JUMP (jump, laststart, b + 1
3891169695Skan				  + OFFSET_ADDRESS_SIZE);
3892169695Skan                     b += 1 + OFFSET_ADDRESS_SIZE;
3893169695Skan                   }
3894169695Skan
3895169695Skan                 /* Otherwise, we have a nontrivial interval.  When
3896169695Skan                    we're all done, the pattern will look like:
3897169695Skan                      set_number_at <jump count> <upper bound>
3898169695Skan                      set_number_at <succeed_n count> <lower bound>
3899169695Skan                      succeed_n <after jump addr> <succeed_n count>
3900169695Skan                      <body of loop>
3901169695Skan                      jump_n <succeed_n addr> <jump count>
3902169695Skan                    (The upper bound and `jump_n' are omitted if
3903169695Skan                    `upper_bound' is 1, though.)  */
3904169695Skan                 else
3905169695Skan                   { /* If the upper bound is > 1, we need to insert
3906169695Skan                        more at the end of the loop.  */
3907169695Skan                     unsigned nbytes = 2 + 4 * OFFSET_ADDRESS_SIZE +
3908169695Skan		       (upper_bound > 1) * (2 + 4 * OFFSET_ADDRESS_SIZE);
3909169695Skan
3910169695Skan                     GET_BUFFER_SPACE (nbytes);
3911169695Skan
3912169695Skan                     /* Initialize lower bound of the `succeed_n', even
3913169695Skan                        though it will be set during matching by its
3914169695Skan                        attendant `set_number_at' (inserted next),
3915169695Skan                        because `re_compile_fastmap' needs to know.
3916169695Skan                        Jump to the `jump_n' we might insert below.  */
3917169695Skan                     INSERT_JUMP2 (succeed_n, laststart,
3918169695Skan                                   b + 1 + 2 * OFFSET_ADDRESS_SIZE
3919169695Skan				   + (upper_bound > 1) * (1 + 2 * OFFSET_ADDRESS_SIZE)
3920169695Skan				   , lower_bound);
3921169695Skan                     b += 1 + 2 * OFFSET_ADDRESS_SIZE;
3922169695Skan
3923169695Skan                     /* Code to initialize the lower bound.  Insert
3924169695Skan                        before the `succeed_n'.  The `5' is the last two
3925169695Skan                        bytes of this `set_number_at', plus 3 bytes of
3926169695Skan                        the following `succeed_n'.  */
3927169695Skan		     /* ifdef WCHAR, The '1+2*OFFSET_ADDRESS_SIZE'
3928169695Skan			is the 'set_number_at', plus '1+OFFSET_ADDRESS_SIZE'
3929169695Skan			of the following `succeed_n'.  */
3930169695Skan                     PREFIX(insert_op2) (set_number_at, laststart, 1
3931169695Skan				 + 2 * OFFSET_ADDRESS_SIZE, lower_bound, b);
3932169695Skan                     b += 1 + 2 * OFFSET_ADDRESS_SIZE;
3933169695Skan
3934169695Skan                     if (upper_bound > 1)
3935169695Skan                       { /* More than one repetition is allowed, so
3936169695Skan                            append a backward jump to the `succeed_n'
3937169695Skan                            that starts this interval.
3938169695Skan
3939169695Skan                            When we've reached this during matching,
3940169695Skan                            we'll have matched the interval once, so
3941169695Skan                            jump back only `upper_bound - 1' times.  */
3942169695Skan                         STORE_JUMP2 (jump_n, b, laststart
3943169695Skan				      + 2 * OFFSET_ADDRESS_SIZE + 1,
3944169695Skan                                      upper_bound - 1);
3945169695Skan                         b += 1 + 2 * OFFSET_ADDRESS_SIZE;
3946169695Skan
3947169695Skan                         /* The location we want to set is the second
3948169695Skan                            parameter of the `jump_n'; that is `b-2' as
3949169695Skan                            an absolute address.  `laststart' will be
3950169695Skan                            the `set_number_at' we're about to insert;
3951169695Skan                            `laststart+3' the number to set, the source
3952169695Skan                            for the relative address.  But we are
3953169695Skan                            inserting into the middle of the pattern --
3954169695Skan                            so everything is getting moved up by 5.
3955169695Skan                            Conclusion: (b - 2) - (laststart + 3) + 5,
3956169695Skan                            i.e., b - laststart.
3957169695Skan
3958169695Skan                            We insert this at the beginning of the loop
3959169695Skan                            so that if we fail during matching, we'll
3960169695Skan                            reinitialize the bounds.  */
3961169695Skan                         PREFIX(insert_op2) (set_number_at, laststart,
3962169695Skan					     b - laststart,
3963169695Skan					     upper_bound - 1, b);
3964169695Skan                         b += 1 + 2 * OFFSET_ADDRESS_SIZE;
3965169695Skan                       }
3966169695Skan                   }
3967169695Skan                pending_exact = 0;
3968169695Skan		break;
3969169695Skan
3970169695Skan	      invalid_interval:
3971169695Skan		if (!(syntax & RE_INVALID_INTERVAL_ORD))
3972169695Skan		  FREE_STACK_RETURN (p == pend ? REG_EBRACE : REG_BADBR);
3973169695Skan	      unfetch_interval:
3974169695Skan		/* Match the characters as literals.  */
3975169695Skan		p = beg_interval;
3976169695Skan		c = '{';
3977169695Skan		if (syntax & RE_NO_BK_BRACES)
3978169695Skan		  goto normal_char;
3979169695Skan		else
3980169695Skan		  goto normal_backslash;
3981169695Skan	      }
3982169695Skan
3983169695Skan#ifdef emacs
3984169695Skan            /* There is no way to specify the before_dot and after_dot
3985169695Skan               operators.  rms says this is ok.  --karl  */
3986169695Skan            case '=':
3987169695Skan              BUF_PUSH (at_dot);
3988169695Skan              break;
3989169695Skan
3990169695Skan            case 's':
3991169695Skan              laststart = b;
3992169695Skan              PATFETCH (c);
3993169695Skan              BUF_PUSH_2 (syntaxspec, syntax_spec_code[c]);
3994169695Skan              break;
3995169695Skan
3996169695Skan            case 'S':
3997169695Skan              laststart = b;
3998169695Skan              PATFETCH (c);
3999169695Skan              BUF_PUSH_2 (notsyntaxspec, syntax_spec_code[c]);
4000169695Skan              break;
4001169695Skan#endif /* emacs */
4002169695Skan
4003169695Skan
4004169695Skan            case 'w':
4005169695Skan	      if (syntax & RE_NO_GNU_OPS)
4006169695Skan		goto normal_char;
4007169695Skan              laststart = b;
4008169695Skan              BUF_PUSH (wordchar);
4009169695Skan              break;
4010169695Skan
4011169695Skan
4012169695Skan            case 'W':
4013169695Skan	      if (syntax & RE_NO_GNU_OPS)
4014169695Skan		goto normal_char;
4015169695Skan              laststart = b;
4016169695Skan              BUF_PUSH (notwordchar);
4017169695Skan              break;
4018169695Skan
4019169695Skan
4020169695Skan            case '<':
4021169695Skan	      if (syntax & RE_NO_GNU_OPS)
4022169695Skan		goto normal_char;
4023169695Skan              BUF_PUSH (wordbeg);
4024169695Skan              break;
4025169695Skan
4026169695Skan            case '>':
4027169695Skan	      if (syntax & RE_NO_GNU_OPS)
4028169695Skan		goto normal_char;
4029169695Skan              BUF_PUSH (wordend);
4030169695Skan              break;
4031169695Skan
4032169695Skan            case 'b':
4033169695Skan	      if (syntax & RE_NO_GNU_OPS)
4034169695Skan		goto normal_char;
4035169695Skan              BUF_PUSH (wordbound);
4036169695Skan              break;
4037169695Skan
4038169695Skan            case 'B':
4039169695Skan	      if (syntax & RE_NO_GNU_OPS)
4040169695Skan		goto normal_char;
4041169695Skan              BUF_PUSH (notwordbound);
4042169695Skan              break;
4043169695Skan
4044169695Skan            case '`':
4045169695Skan	      if (syntax & RE_NO_GNU_OPS)
4046169695Skan		goto normal_char;
4047169695Skan              BUF_PUSH (begbuf);
4048169695Skan              break;
4049169695Skan
4050169695Skan            case '\'':
4051169695Skan	      if (syntax & RE_NO_GNU_OPS)
4052169695Skan		goto normal_char;
4053169695Skan              BUF_PUSH (endbuf);
4054169695Skan              break;
4055169695Skan
4056169695Skan            case '1': case '2': case '3': case '4': case '5':
4057169695Skan            case '6': case '7': case '8': case '9':
4058169695Skan              if (syntax & RE_NO_BK_REFS)
4059169695Skan                goto normal_char;
4060169695Skan
4061169695Skan              c1 = c - '0';
4062169695Skan
4063169695Skan              if (c1 > regnum)
4064169695Skan                FREE_STACK_RETURN (REG_ESUBREG);
4065169695Skan
4066169695Skan              /* Can't back reference to a subexpression if inside of it.  */
4067169695Skan              if (group_in_compile_stack (compile_stack, (regnum_t) c1))
4068169695Skan                goto normal_char;
4069169695Skan
4070169695Skan              laststart = b;
4071169695Skan              BUF_PUSH_2 (duplicate, c1);
4072169695Skan              break;
4073169695Skan
4074169695Skan
4075169695Skan            case '+':
4076169695Skan            case '?':
4077169695Skan              if (syntax & RE_BK_PLUS_QM)
4078169695Skan                goto handle_plus;
4079169695Skan              else
4080169695Skan                goto normal_backslash;
4081169695Skan
4082169695Skan            default:
4083169695Skan            normal_backslash:
4084169695Skan              /* You might think it would be useful for \ to mean
4085169695Skan                 not to translate; but if we don't translate it
4086169695Skan                 it will never match anything.  */
4087169695Skan              c = TRANSLATE (c);
4088169695Skan              goto normal_char;
4089169695Skan            }
4090169695Skan          break;
4091169695Skan
4092169695Skan
4093169695Skan	default:
4094169695Skan        /* Expects the character in `c'.  */
4095169695Skan	normal_char:
4096169695Skan	      /* If no exactn currently being built.  */
4097169695Skan          if (!pending_exact
4098169695Skan#ifdef WCHAR
4099169695Skan	      /* If last exactn handle binary(or character) and
4100169695Skan		 new exactn handle character(or binary).  */
4101169695Skan	      || is_exactn_bin != is_binary[p - 1 - pattern]
4102169695Skan#endif /* WCHAR */
4103169695Skan
4104169695Skan              /* If last exactn not at current position.  */
4105169695Skan              || pending_exact + *pending_exact + 1 != b
4106169695Skan
4107169695Skan              /* We have only one byte following the exactn for the count.  */
4108169695Skan	      || *pending_exact == (1 << BYTEWIDTH) - 1
4109169695Skan
4110169695Skan              /* If followed by a repetition operator.  */
4111169695Skan              || *p == '*' || *p == '^'
4112169695Skan	      || ((syntax & RE_BK_PLUS_QM)
4113169695Skan		  ? *p == '\\' && (p[1] == '+' || p[1] == '?')
4114169695Skan		  : (*p == '+' || *p == '?'))
4115169695Skan	      || ((syntax & RE_INTERVALS)
4116169695Skan                  && ((syntax & RE_NO_BK_BRACES)
4117169695Skan		      ? *p == '{'
4118169695Skan                      : (p[0] == '\\' && p[1] == '{'))))
4119169695Skan	    {
4120169695Skan	      /* Start building a new exactn.  */
4121169695Skan
4122169695Skan              laststart = b;
4123169695Skan
4124169695Skan#ifdef WCHAR
4125169695Skan	      /* Is this exactn binary data or character? */
4126169695Skan	      is_exactn_bin = is_binary[p - 1 - pattern];
4127169695Skan	      if (is_exactn_bin)
4128169695Skan		  BUF_PUSH_2 (exactn_bin, 0);
4129169695Skan	      else
4130169695Skan		  BUF_PUSH_2 (exactn, 0);
4131169695Skan#else
4132169695Skan	      BUF_PUSH_2 (exactn, 0);
4133169695Skan#endif /* WCHAR */
4134169695Skan	      pending_exact = b - 1;
4135169695Skan            }
4136169695Skan
4137169695Skan	  BUF_PUSH (c);
4138169695Skan          (*pending_exact)++;
4139169695Skan	  break;
4140169695Skan        } /* switch (c) */
4141169695Skan    } /* while p != pend */
4142169695Skan
4143169695Skan
4144169695Skan  /* Through the pattern now.  */
4145169695Skan
4146169695Skan  if (fixup_alt_jump)
4147169695Skan    STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
4148169695Skan
4149169695Skan  if (!COMPILE_STACK_EMPTY)
4150169695Skan    FREE_STACK_RETURN (REG_EPAREN);
4151169695Skan
4152169695Skan  /* If we don't want backtracking, force success
4153169695Skan     the first time we reach the end of the compiled pattern.  */
4154169695Skan  if (syntax & RE_NO_POSIX_BACKTRACKING)
4155169695Skan    BUF_PUSH (succeed);
4156169695Skan
4157169695Skan#ifdef WCHAR
4158169695Skan  free (pattern);
4159169695Skan  free (mbs_offset);
4160169695Skan  free (is_binary);
4161169695Skan#endif
4162169695Skan  free (compile_stack.stack);
4163169695Skan
4164169695Skan  /* We have succeeded; set the length of the buffer.  */
4165169695Skan#ifdef WCHAR
4166169695Skan  bufp->used = (uintptr_t) b - (uintptr_t) COMPILED_BUFFER_VAR;
4167169695Skan#else
4168169695Skan  bufp->used = b - bufp->buffer;
4169169695Skan#endif
4170169695Skan
4171169695Skan#ifdef DEBUG
4172169695Skan  if (debug)
4173169695Skan    {
4174169695Skan      DEBUG_PRINT1 ("\nCompiled pattern: \n");
4175169695Skan      PREFIX(print_compiled_pattern) (bufp);
4176169695Skan    }
4177169695Skan#endif /* DEBUG */
4178169695Skan
4179169695Skan#ifndef MATCH_MAY_ALLOCATE
4180169695Skan  /* Initialize the failure stack to the largest possible stack.  This
4181169695Skan     isn't necessary unless we're trying to avoid calling alloca in
4182169695Skan     the search and match routines.  */
4183169695Skan  {
4184169695Skan    int num_regs = bufp->re_nsub + 1;
4185169695Skan
4186169695Skan    /* Since DOUBLE_FAIL_STACK refuses to double only if the current size
4187169695Skan       is strictly greater than re_max_failures, the largest possible stack
4188169695Skan       is 2 * re_max_failures failure points.  */
4189169695Skan    if (fail_stack.size < (2 * re_max_failures * MAX_FAILURE_ITEMS))
4190169695Skan      {
4191169695Skan	fail_stack.size = (2 * re_max_failures * MAX_FAILURE_ITEMS);
4192169695Skan
4193169695Skan# ifdef emacs
4194169695Skan	if (! fail_stack.stack)
4195169695Skan	  fail_stack.stack
4196169695Skan	    = (PREFIX(fail_stack_elt_t) *) xmalloc (fail_stack.size
4197169695Skan				    * sizeof (PREFIX(fail_stack_elt_t)));
4198169695Skan	else
4199169695Skan	  fail_stack.stack
4200169695Skan	    = (PREFIX(fail_stack_elt_t) *) xrealloc (fail_stack.stack,
4201169695Skan				     (fail_stack.size
4202169695Skan				      * sizeof (PREFIX(fail_stack_elt_t))));
4203169695Skan# else /* not emacs */
4204169695Skan	if (! fail_stack.stack)
4205169695Skan	  fail_stack.stack
4206169695Skan	    = (PREFIX(fail_stack_elt_t) *) malloc (fail_stack.size
4207169695Skan				   * sizeof (PREFIX(fail_stack_elt_t)));
4208169695Skan	else
4209169695Skan	  fail_stack.stack
4210169695Skan	    = (PREFIX(fail_stack_elt_t) *) realloc (fail_stack.stack,
4211169695Skan					    (fail_stack.size
4212169695Skan				     * sizeof (PREFIX(fail_stack_elt_t))));
4213169695Skan# endif /* not emacs */
4214169695Skan      }
4215169695Skan
4216169695Skan   PREFIX(regex_grow_registers) (num_regs);
4217169695Skan  }
4218169695Skan#endif /* not MATCH_MAY_ALLOCATE */
4219169695Skan
4220169695Skan  return REG_NOERROR;
4221169695Skan} /* regex_compile */
4222169695Skan
4223169695Skan/* Subroutines for `regex_compile'.  */
4224169695Skan
4225169695Skan/* Store OP at LOC followed by two-byte integer parameter ARG.  */
4226169695Skan/* ifdef WCHAR, integer parameter is 1 wchar_t.  */
4227169695Skan
4228169695Skanstatic void
4229169695SkanPREFIX(store_op1) (re_opcode_t op, UCHAR_T *loc, int arg)
4230169695Skan{
4231169695Skan  *loc = (UCHAR_T) op;
4232169695Skan  STORE_NUMBER (loc + 1, arg);
4233169695Skan}
4234169695Skan
4235169695Skan
4236169695Skan/* Like `store_op1', but for two two-byte parameters ARG1 and ARG2.  */
4237169695Skan/* ifdef WCHAR, integer parameter is 1 wchar_t.  */
4238169695Skan
4239169695Skanstatic void
4240169695SkanPREFIX(store_op2) (re_opcode_t op, UCHAR_T *loc, int arg1, int arg2)
4241169695Skan{
4242169695Skan  *loc = (UCHAR_T) op;
4243169695Skan  STORE_NUMBER (loc + 1, arg1);
4244169695Skan  STORE_NUMBER (loc + 1 + OFFSET_ADDRESS_SIZE, arg2);
4245169695Skan}
4246169695Skan
4247169695Skan
4248169695Skan/* Copy the bytes from LOC to END to open up three bytes of space at LOC
4249169695Skan   for OP followed by two-byte integer parameter ARG.  */
4250169695Skan/* ifdef WCHAR, integer parameter is 1 wchar_t.  */
4251169695Skan
4252169695Skanstatic void
4253169695SkanPREFIX(insert_op1) (re_opcode_t op, UCHAR_T *loc, int arg, UCHAR_T *end)
4254169695Skan{
4255169695Skan  register UCHAR_T *pfrom = end;
4256169695Skan  register UCHAR_T *pto = end + 1 + OFFSET_ADDRESS_SIZE;
4257169695Skan
4258169695Skan  while (pfrom != loc)
4259169695Skan    *--pto = *--pfrom;
4260169695Skan
4261169695Skan  PREFIX(store_op1) (op, loc, arg);
4262169695Skan}
4263169695Skan
4264169695Skan
4265169695Skan/* Like `insert_op1', but for two two-byte parameters ARG1 and ARG2.  */
4266169695Skan/* ifdef WCHAR, integer parameter is 1 wchar_t.  */
4267169695Skan
4268169695Skanstatic void
4269169695SkanPREFIX(insert_op2) (re_opcode_t op, UCHAR_T *loc, int arg1,
4270169695Skan                    int arg2, UCHAR_T *end)
4271169695Skan{
4272169695Skan  register UCHAR_T *pfrom = end;
4273169695Skan  register UCHAR_T *pto = end + 1 + 2 * OFFSET_ADDRESS_SIZE;
4274169695Skan
4275169695Skan  while (pfrom != loc)
4276169695Skan    *--pto = *--pfrom;
4277169695Skan
4278169695Skan  PREFIX(store_op2) (op, loc, arg1, arg2);
4279169695Skan}
4280169695Skan
4281169695Skan
4282169695Skan/* P points to just after a ^ in PATTERN.  Return true if that ^ comes
4283169695Skan   after an alternative or a begin-subexpression.  We assume there is at
4284169695Skan   least one character before the ^.  */
4285169695Skan
4286169695Skanstatic boolean
4287169695SkanPREFIX(at_begline_loc_p) (const CHAR_T *pattern, const CHAR_T *p,
4288169695Skan                          reg_syntax_t syntax)
4289169695Skan{
4290169695Skan  const CHAR_T *prev = p - 2;
4291169695Skan  boolean prev_prev_backslash = prev > pattern && prev[-1] == '\\';
4292169695Skan
4293169695Skan  return
4294169695Skan       /* After a subexpression?  */
4295169695Skan       (*prev == '(' && (syntax & RE_NO_BK_PARENS || prev_prev_backslash))
4296169695Skan       /* After an alternative?  */
4297169695Skan    || (*prev == '|' && (syntax & RE_NO_BK_VBAR || prev_prev_backslash));
4298169695Skan}
4299169695Skan
4300169695Skan
4301169695Skan/* The dual of at_begline_loc_p.  This one is for $.  We assume there is
4302169695Skan   at least one character after the $, i.e., `P < PEND'.  */
4303169695Skan
4304169695Skanstatic boolean
4305169695SkanPREFIX(at_endline_loc_p) (const CHAR_T *p, const CHAR_T *pend,
4306169695Skan                          reg_syntax_t syntax)
4307169695Skan{
4308169695Skan  const CHAR_T *next = p;
4309169695Skan  boolean next_backslash = *next == '\\';
4310169695Skan  const CHAR_T *next_next = p + 1 < pend ? p + 1 : 0;
4311169695Skan
4312169695Skan  return
4313169695Skan       /* Before a subexpression?  */
4314169695Skan       (syntax & RE_NO_BK_PARENS ? *next == ')'
4315169695Skan        : next_backslash && next_next && *next_next == ')')
4316169695Skan       /* Before an alternative?  */
4317169695Skan    || (syntax & RE_NO_BK_VBAR ? *next == '|'
4318169695Skan        : next_backslash && next_next && *next_next == '|');
4319169695Skan}
4320169695Skan
4321169695Skan#else /* not INSIDE_RECURSION */
4322169695Skan
4323169695Skan/* Returns true if REGNUM is in one of COMPILE_STACK's elements and
4324169695Skan   false if it's not.  */
4325169695Skan
4326169695Skanstatic boolean
4327169695Skangroup_in_compile_stack (compile_stack_type compile_stack, regnum_t regnum)
4328169695Skan{
4329169695Skan  int this_element;
4330169695Skan
4331169695Skan  for (this_element = compile_stack.avail - 1;
4332169695Skan       this_element >= 0;
4333169695Skan       this_element--)
4334169695Skan    if (compile_stack.stack[this_element].regnum == regnum)
4335169695Skan      return true;
4336169695Skan
4337169695Skan  return false;
4338169695Skan}
4339169695Skan#endif /* not INSIDE_RECURSION */
4340169695Skan
4341169695Skan#ifdef INSIDE_RECURSION
4342169695Skan
4343169695Skan#ifdef WCHAR
4344169695Skan/* This insert space, which size is "num", into the pattern at "loc".
4345169695Skan   "end" must point the end of the allocated buffer.  */
4346169695Skanstatic void
4347169695Skaninsert_space (int num, CHAR_T *loc, CHAR_T *end)
4348169695Skan{
4349169695Skan  register CHAR_T *pto = end;
4350169695Skan  register CHAR_T *pfrom = end - num;
4351169695Skan
4352169695Skan  while (pfrom >= loc)
4353169695Skan    *pto-- = *pfrom--;
4354169695Skan}
4355169695Skan#endif /* WCHAR */
4356169695Skan
4357169695Skan#ifdef WCHAR
4358169695Skanstatic reg_errcode_t
4359169695Skanwcs_compile_range (CHAR_T range_start_char, const CHAR_T **p_ptr,
4360169695Skan                   const CHAR_T *pend, RE_TRANSLATE_TYPE translate,
4361169695Skan                   reg_syntax_t syntax, CHAR_T *b, CHAR_T *char_set)
4362169695Skan{
4363169695Skan  const CHAR_T *p = *p_ptr;
4364169695Skan  CHAR_T range_start, range_end;
4365169695Skan  reg_errcode_t ret;
4366169695Skan# ifdef _LIBC
4367169695Skan  uint32_t nrules;
4368169695Skan  uint32_t start_val, end_val;
4369169695Skan# endif
4370169695Skan  if (p == pend)
4371169695Skan    return REG_ERANGE;
4372169695Skan
4373169695Skan# ifdef _LIBC
4374169695Skan  nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
4375169695Skan  if (nrules != 0)
4376169695Skan    {
4377169695Skan      const char *collseq = (const char *) _NL_CURRENT(LC_COLLATE,
4378169695Skan						       _NL_COLLATE_COLLSEQWC);
4379169695Skan      const unsigned char *extra = (const unsigned char *)
4380169695Skan	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
4381169695Skan
4382169695Skan      if (range_start_char < -1)
4383169695Skan	{
4384169695Skan	  /* range_start is a collating symbol.  */
4385169695Skan	  int32_t *wextra;
4386169695Skan	  /* Retreive the index and get collation sequence value.  */
4387169695Skan	  wextra = (int32_t*)(extra + char_set[-range_start_char]);
4388169695Skan	  start_val = wextra[1 + *wextra];
4389169695Skan	}
4390169695Skan      else
4391169695Skan	start_val = collseq_table_lookup(collseq, TRANSLATE(range_start_char));
4392169695Skan
4393169695Skan      end_val = collseq_table_lookup (collseq, TRANSLATE (p[0]));
4394169695Skan
4395169695Skan      /* Report an error if the range is empty and the syntax prohibits
4396169695Skan	 this.  */
4397169695Skan      ret = ((syntax & RE_NO_EMPTY_RANGES)
4398169695Skan	     && (start_val > end_val))? REG_ERANGE : REG_NOERROR;
4399169695Skan
4400169695Skan      /* Insert space to the end of the char_ranges.  */
4401169695Skan      insert_space(2, b - char_set[5] - 2, b - 1);
4402169695Skan      *(b - char_set[5] - 2) = (wchar_t)start_val;
4403169695Skan      *(b - char_set[5] - 1) = (wchar_t)end_val;
4404169695Skan      char_set[4]++; /* ranges_index */
4405169695Skan    }
4406169695Skan  else
4407169695Skan# endif
4408169695Skan    {
4409169695Skan      range_start = (range_start_char >= 0)? TRANSLATE (range_start_char):
4410169695Skan	range_start_char;
4411169695Skan      range_end = TRANSLATE (p[0]);
4412169695Skan      /* Report an error if the range is empty and the syntax prohibits
4413169695Skan	 this.  */
4414169695Skan      ret = ((syntax & RE_NO_EMPTY_RANGES)
4415169695Skan	     && (range_start > range_end))? REG_ERANGE : REG_NOERROR;
4416169695Skan
4417169695Skan      /* Insert space to the end of the char_ranges.  */
4418169695Skan      insert_space(2, b - char_set[5] - 2, b - 1);
4419169695Skan      *(b - char_set[5] - 2) = range_start;
4420169695Skan      *(b - char_set[5] - 1) = range_end;
4421169695Skan      char_set[4]++; /* ranges_index */
4422169695Skan    }
4423169695Skan  /* Have to increment the pointer into the pattern string, so the
4424169695Skan     caller isn't still at the ending character.  */
4425169695Skan  (*p_ptr)++;
4426169695Skan
4427169695Skan  return ret;
4428169695Skan}
4429169695Skan#else /* BYTE */
4430169695Skan/* Read the ending character of a range (in a bracket expression) from the
4431169695Skan   uncompiled pattern *P_PTR (which ends at PEND).  We assume the
4432169695Skan   starting character is in `P[-2]'.  (`P[-1]' is the character `-'.)
4433169695Skan   Then we set the translation of all bits between the starting and
4434169695Skan   ending characters (inclusive) in the compiled pattern B.
4435169695Skan
4436169695Skan   Return an error code.
4437169695Skan
4438169695Skan   We use these short variable names so we can use the same macros as
4439169695Skan   `regex_compile' itself.  */
4440169695Skan
4441169695Skanstatic reg_errcode_t
4442169695Skanbyte_compile_range (unsigned int range_start_char, const char **p_ptr,
4443169695Skan                    const char *pend, RE_TRANSLATE_TYPE translate,
4444169695Skan                    reg_syntax_t syntax, unsigned char *b)
4445169695Skan{
4446169695Skan  unsigned this_char;
4447169695Skan  const char *p = *p_ptr;
4448169695Skan  reg_errcode_t ret;
4449169695Skan# if _LIBC
4450169695Skan  const unsigned char *collseq;
4451169695Skan  unsigned int start_colseq;
4452169695Skan  unsigned int end_colseq;
4453169695Skan# else
4454169695Skan  unsigned end_char;
4455169695Skan# endif
4456169695Skan
4457169695Skan  if (p == pend)
4458169695Skan    return REG_ERANGE;
4459169695Skan
4460169695Skan  /* Have to increment the pointer into the pattern string, so the
4461169695Skan     caller isn't still at the ending character.  */
4462169695Skan  (*p_ptr)++;
4463169695Skan
4464169695Skan  /* Report an error if the range is empty and the syntax prohibits this.  */
4465169695Skan  ret = syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR;
4466169695Skan
4467169695Skan# if _LIBC
4468169695Skan  collseq = (const unsigned char *) _NL_CURRENT (LC_COLLATE,
4469169695Skan						 _NL_COLLATE_COLLSEQMB);
4470169695Skan
4471169695Skan  start_colseq = collseq[(unsigned char) TRANSLATE (range_start_char)];
4472169695Skan  end_colseq = collseq[(unsigned char) TRANSLATE (p[0])];
4473169695Skan  for (this_char = 0; this_char <= (unsigned char) -1; ++this_char)
4474169695Skan    {
4475169695Skan      unsigned int this_colseq = collseq[(unsigned char) TRANSLATE (this_char)];
4476169695Skan
4477169695Skan      if (start_colseq <= this_colseq && this_colseq <= end_colseq)
4478169695Skan	{
4479169695Skan	  SET_LIST_BIT (TRANSLATE (this_char));
4480169695Skan	  ret = REG_NOERROR;
4481169695Skan	}
4482169695Skan    }
4483169695Skan# else
4484169695Skan  /* Here we see why `this_char' has to be larger than an `unsigned
4485169695Skan     char' -- we would otherwise go into an infinite loop, since all
4486169695Skan     characters <= 0xff.  */
4487169695Skan  range_start_char = TRANSLATE (range_start_char);
4488169695Skan  /* TRANSLATE(p[0]) is casted to char (not unsigned char) in TRANSLATE,
4489169695Skan     and some compilers cast it to int implicitly, so following for_loop
4490169695Skan     may fall to (almost) infinite loop.
4491169695Skan     e.g. If translate[p[0]] = 0xff, end_char may equals to 0xffffffff.
4492169695Skan     To avoid this, we cast p[0] to unsigned int and truncate it.  */
4493169695Skan  end_char = ((unsigned)TRANSLATE(p[0]) & ((1 << BYTEWIDTH) - 1));
4494169695Skan
4495169695Skan  for (this_char = range_start_char; this_char <= end_char; ++this_char)
4496169695Skan    {
4497169695Skan      SET_LIST_BIT (TRANSLATE (this_char));
4498169695Skan      ret = REG_NOERROR;
4499169695Skan    }
4500169695Skan# endif
4501169695Skan
4502169695Skan  return ret;
4503169695Skan}
4504169695Skan#endif /* WCHAR */
4505169695Skan
4506169695Skan/* re_compile_fastmap computes a ``fastmap'' for the compiled pattern in
4507169695Skan   BUFP.  A fastmap records which of the (1 << BYTEWIDTH) possible
4508169695Skan   characters can start a string that matches the pattern.  This fastmap
4509169695Skan   is used by re_search to skip quickly over impossible starting points.
4510169695Skan
4511169695Skan   The caller must supply the address of a (1 << BYTEWIDTH)-byte data
4512169695Skan   area as BUFP->fastmap.
4513169695Skan
4514169695Skan   We set the `fastmap', `fastmap_accurate', and `can_be_null' fields in
4515169695Skan   the pattern buffer.
4516169695Skan
4517169695Skan   Returns 0 if we succeed, -2 if an internal error.   */
4518169695Skan
4519169695Skan#ifdef WCHAR
4520169695Skan/* local function for re_compile_fastmap.
4521169695Skan   truncate wchar_t character to char.  */
4522169695Skanstatic unsigned char truncate_wchar (CHAR_T c);
4523169695Skan
4524169695Skanstatic unsigned char
4525169695Skantruncate_wchar (CHAR_T c)
4526169695Skan{
4527169695Skan  unsigned char buf[MB_CUR_MAX];
4528169695Skan  mbstate_t state;
4529169695Skan  int retval;
4530169695Skan  memset (&state, '\0', sizeof (state));
4531169695Skan# ifdef _LIBC
4532169695Skan  retval = __wcrtomb (buf, c, &state);
4533169695Skan# else
4534169695Skan  retval = wcrtomb (buf, c, &state);
4535169695Skan# endif
4536169695Skan  return retval > 0 ? buf[0] : (unsigned char) c;
4537169695Skan}
4538169695Skan#endif /* WCHAR */
4539169695Skan
4540169695Skanstatic int
4541169695SkanPREFIX(re_compile_fastmap) (struct re_pattern_buffer *bufp)
4542169695Skan{
4543169695Skan  int j, k;
4544169695Skan#ifdef MATCH_MAY_ALLOCATE
4545169695Skan  PREFIX(fail_stack_type) fail_stack;
4546169695Skan#endif
4547169695Skan#ifndef REGEX_MALLOC
4548169695Skan  char *destination;
4549169695Skan#endif
4550169695Skan
4551169695Skan  register char *fastmap = bufp->fastmap;
4552169695Skan
4553169695Skan#ifdef WCHAR
4554169695Skan  /* We need to cast pattern to (wchar_t*), because we casted this compiled
4555169695Skan     pattern to (char*) in regex_compile.  */
4556169695Skan  UCHAR_T *pattern = (UCHAR_T*)bufp->buffer;
4557169695Skan  register UCHAR_T *pend = (UCHAR_T*) (bufp->buffer + bufp->used);
4558169695Skan#else /* BYTE */
4559169695Skan  UCHAR_T *pattern = bufp->buffer;
4560169695Skan  register UCHAR_T *pend = pattern + bufp->used;
4561169695Skan#endif /* WCHAR */
4562169695Skan  UCHAR_T *p = pattern;
4563169695Skan
4564169695Skan#ifdef REL_ALLOC
4565169695Skan  /* This holds the pointer to the failure stack, when
4566169695Skan     it is allocated relocatably.  */
4567169695Skan  fail_stack_elt_t *failure_stack_ptr;
4568169695Skan#endif
4569169695Skan
4570169695Skan  /* Assume that each path through the pattern can be null until
4571169695Skan     proven otherwise.  We set this false at the bottom of switch
4572169695Skan     statement, to which we get only if a particular path doesn't
4573169695Skan     match the empty string.  */
4574169695Skan  boolean path_can_be_null = true;
4575169695Skan
4576169695Skan  /* We aren't doing a `succeed_n' to begin with.  */
4577169695Skan  boolean succeed_n_p = false;
4578169695Skan
4579169695Skan  assert (fastmap != NULL && p != NULL);
4580169695Skan
4581169695Skan  INIT_FAIL_STACK ();
4582169695Skan  bzero (fastmap, 1 << BYTEWIDTH);  /* Assume nothing's valid.  */
4583169695Skan  bufp->fastmap_accurate = 1;	    /* It will be when we're done.  */
4584169695Skan  bufp->can_be_null = 0;
4585169695Skan
4586169695Skan  while (1)
4587169695Skan    {
4588169695Skan      if (p == pend || *p == (UCHAR_T) succeed)
4589169695Skan	{
4590169695Skan	  /* We have reached the (effective) end of pattern.  */
4591169695Skan	  if (!FAIL_STACK_EMPTY ())
4592169695Skan	    {
4593169695Skan	      bufp->can_be_null |= path_can_be_null;
4594169695Skan
4595169695Skan	      /* Reset for next path.  */
4596169695Skan	      path_can_be_null = true;
4597169695Skan
4598169695Skan	      p = fail_stack.stack[--fail_stack.avail].pointer;
4599169695Skan
4600169695Skan	      continue;
4601169695Skan	    }
4602169695Skan	  else
4603169695Skan	    break;
4604169695Skan	}
4605169695Skan
4606169695Skan      /* We should never be about to go beyond the end of the pattern.  */
4607169695Skan      assert (p < pend);
4608169695Skan
4609169695Skan      switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++))
4610169695Skan	{
4611169695Skan
4612169695Skan        /* I guess the idea here is to simply not bother with a fastmap
4613169695Skan           if a backreference is used, since it's too hard to figure out
4614169695Skan           the fastmap for the corresponding group.  Setting
4615169695Skan           `can_be_null' stops `re_search_2' from using the fastmap, so
4616169695Skan           that is all we do.  */
4617169695Skan	case duplicate:
4618169695Skan	  bufp->can_be_null = 1;
4619169695Skan          goto done;
4620169695Skan
4621169695Skan
4622169695Skan      /* Following are the cases which match a character.  These end
4623169695Skan         with `break'.  */
4624169695Skan
4625169695Skan#ifdef WCHAR
4626169695Skan	case exactn:
4627169695Skan          fastmap[truncate_wchar(p[1])] = 1;
4628169695Skan	  break;
4629169695Skan#else /* BYTE */
4630169695Skan	case exactn:
4631169695Skan          fastmap[p[1]] = 1;
4632169695Skan	  break;
4633169695Skan#endif /* WCHAR */
4634169695Skan#ifdef MBS_SUPPORT
4635169695Skan	case exactn_bin:
4636169695Skan	  fastmap[p[1]] = 1;
4637169695Skan	  break;
4638169695Skan#endif
4639169695Skan
4640169695Skan#ifdef WCHAR
4641169695Skan        /* It is hard to distinguish fastmap from (multi byte) characters
4642169695Skan           which depends on current locale.  */
4643169695Skan        case charset:
4644169695Skan	case charset_not:
4645169695Skan	case wordchar:
4646169695Skan	case notwordchar:
4647169695Skan          bufp->can_be_null = 1;
4648169695Skan          goto done;
4649169695Skan#else /* BYTE */
4650169695Skan        case charset:
4651169695Skan          for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
4652169695Skan	    if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))
4653169695Skan              fastmap[j] = 1;
4654169695Skan	  break;
4655169695Skan
4656169695Skan
4657169695Skan	case charset_not:
4658169695Skan	  /* Chars beyond end of map must be allowed.  */
4659169695Skan	  for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++)
4660169695Skan            fastmap[j] = 1;
4661169695Skan
4662169695Skan	  for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
4663169695Skan	    if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))))
4664169695Skan              fastmap[j] = 1;
4665169695Skan          break;
4666169695Skan
4667169695Skan
4668169695Skan	case wordchar:
4669169695Skan	  for (j = 0; j < (1 << BYTEWIDTH); j++)
4670169695Skan	    if (SYNTAX (j) == Sword)
4671169695Skan	      fastmap[j] = 1;
4672169695Skan	  break;
4673169695Skan
4674169695Skan
4675169695Skan	case notwordchar:
4676169695Skan	  for (j = 0; j < (1 << BYTEWIDTH); j++)
4677169695Skan	    if (SYNTAX (j) != Sword)
4678169695Skan	      fastmap[j] = 1;
4679169695Skan	  break;
4680169695Skan#endif /* WCHAR */
4681169695Skan
4682169695Skan        case anychar:
4683169695Skan	  {
4684169695Skan	    int fastmap_newline = fastmap['\n'];
4685169695Skan
4686169695Skan	    /* `.' matches anything ...  */
4687169695Skan	    for (j = 0; j < (1 << BYTEWIDTH); j++)
4688169695Skan	      fastmap[j] = 1;
4689169695Skan
4690169695Skan	    /* ... except perhaps newline.  */
4691169695Skan	    if (!(bufp->syntax & RE_DOT_NEWLINE))
4692169695Skan	      fastmap['\n'] = fastmap_newline;
4693169695Skan
4694169695Skan	    /* Return if we have already set `can_be_null'; if we have,
4695169695Skan	       then the fastmap is irrelevant.  Something's wrong here.  */
4696169695Skan	    else if (bufp->can_be_null)
4697169695Skan	      goto done;
4698169695Skan
4699169695Skan	    /* Otherwise, have to check alternative paths.  */
4700169695Skan	    break;
4701169695Skan	  }
4702169695Skan
4703169695Skan#ifdef emacs
4704169695Skan        case syntaxspec:
4705169695Skan	  k = *p++;
4706169695Skan	  for (j = 0; j < (1 << BYTEWIDTH); j++)
4707169695Skan	    if (SYNTAX (j) == (enum syntaxcode) k)
4708169695Skan	      fastmap[j] = 1;
4709169695Skan	  break;
4710169695Skan
4711169695Skan
4712169695Skan	case notsyntaxspec:
4713169695Skan	  k = *p++;
4714169695Skan	  for (j = 0; j < (1 << BYTEWIDTH); j++)
4715169695Skan	    if (SYNTAX (j) != (enum syntaxcode) k)
4716169695Skan	      fastmap[j] = 1;
4717169695Skan	  break;
4718169695Skan
4719169695Skan
4720169695Skan      /* All cases after this match the empty string.  These end with
4721169695Skan         `continue'.  */
4722169695Skan
4723169695Skan
4724169695Skan	case before_dot:
4725169695Skan	case at_dot:
4726169695Skan	case after_dot:
4727169695Skan          continue;
4728169695Skan#endif /* emacs */
4729169695Skan
4730169695Skan
4731169695Skan        case no_op:
4732169695Skan        case begline:
4733169695Skan        case endline:
4734169695Skan	case begbuf:
4735169695Skan	case endbuf:
4736169695Skan	case wordbound:
4737169695Skan	case notwordbound:
4738169695Skan	case wordbeg:
4739169695Skan	case wordend:
4740169695Skan        case push_dummy_failure:
4741169695Skan          continue;
4742169695Skan
4743169695Skan
4744169695Skan	case jump_n:
4745169695Skan        case pop_failure_jump:
4746169695Skan	case maybe_pop_jump:
4747169695Skan	case jump:
4748169695Skan        case jump_past_alt:
4749169695Skan	case dummy_failure_jump:
4750169695Skan          EXTRACT_NUMBER_AND_INCR (j, p);
4751169695Skan	  p += j;
4752169695Skan	  if (j > 0)
4753169695Skan	    continue;
4754169695Skan
4755169695Skan          /* Jump backward implies we just went through the body of a
4756169695Skan             loop and matched nothing.  Opcode jumped to should be
4757169695Skan             `on_failure_jump' or `succeed_n'.  Just treat it like an
4758169695Skan             ordinary jump.  For a * loop, it has pushed its failure
4759169695Skan             point already; if so, discard that as redundant.  */
4760169695Skan          if ((re_opcode_t) *p != on_failure_jump
4761169695Skan	      && (re_opcode_t) *p != succeed_n)
4762169695Skan	    continue;
4763169695Skan
4764169695Skan          p++;
4765169695Skan          EXTRACT_NUMBER_AND_INCR (j, p);
4766169695Skan          p += j;
4767169695Skan
4768169695Skan          /* If what's on the stack is where we are now, pop it.  */
4769169695Skan          if (!FAIL_STACK_EMPTY ()
4770169695Skan	      && fail_stack.stack[fail_stack.avail - 1].pointer == p)
4771169695Skan            fail_stack.avail--;
4772169695Skan
4773169695Skan          continue;
4774169695Skan
4775169695Skan
4776169695Skan        case on_failure_jump:
4777169695Skan        case on_failure_keep_string_jump:
4778169695Skan	handle_on_failure_jump:
4779169695Skan          EXTRACT_NUMBER_AND_INCR (j, p);
4780169695Skan
4781169695Skan          /* For some patterns, e.g., `(a?)?', `p+j' here points to the
4782169695Skan             end of the pattern.  We don't want to push such a point,
4783169695Skan             since when we restore it above, entering the switch will
4784169695Skan             increment `p' past the end of the pattern.  We don't need
4785169695Skan             to push such a point since we obviously won't find any more
4786169695Skan             fastmap entries beyond `pend'.  Such a pattern can match
4787169695Skan             the null string, though.  */
4788169695Skan          if (p + j < pend)
4789169695Skan            {
4790169695Skan              if (!PUSH_PATTERN_OP (p + j, fail_stack))
4791169695Skan		{
4792169695Skan		  RESET_FAIL_STACK ();
4793169695Skan		  return -2;
4794169695Skan		}
4795169695Skan            }
4796169695Skan          else
4797169695Skan            bufp->can_be_null = 1;
4798169695Skan
4799169695Skan          if (succeed_n_p)
4800169695Skan            {
4801169695Skan              EXTRACT_NUMBER_AND_INCR (k, p);	/* Skip the n.  */
4802169695Skan              succeed_n_p = false;
4803169695Skan	    }
4804169695Skan
4805169695Skan          continue;
4806169695Skan
4807169695Skan
4808169695Skan	case succeed_n:
4809169695Skan          /* Get to the number of times to succeed.  */
4810169695Skan          p += OFFSET_ADDRESS_SIZE;
4811169695Skan
4812169695Skan          /* Increment p past the n for when k != 0.  */
4813169695Skan          EXTRACT_NUMBER_AND_INCR (k, p);
4814169695Skan          if (k == 0)
4815169695Skan	    {
4816169695Skan              p -= 2 * OFFSET_ADDRESS_SIZE;
4817169695Skan  	      succeed_n_p = true;  /* Spaghetti code alert.  */
4818169695Skan              goto handle_on_failure_jump;
4819169695Skan            }
4820169695Skan          continue;
4821169695Skan
4822169695Skan
4823169695Skan	case set_number_at:
4824169695Skan          p += 2 * OFFSET_ADDRESS_SIZE;
4825169695Skan          continue;
4826169695Skan
4827169695Skan
4828169695Skan	case start_memory:
4829169695Skan        case stop_memory:
4830169695Skan	  p += 2;
4831169695Skan	  continue;
4832169695Skan
4833169695Skan
4834169695Skan	default:
4835169695Skan          abort (); /* We have listed all the cases.  */
4836169695Skan        } /* switch *p++ */
4837169695Skan
4838169695Skan      /* Getting here means we have found the possible starting
4839169695Skan         characters for one path of the pattern -- and that the empty
4840169695Skan         string does not match.  We need not follow this path further.
4841169695Skan         Instead, look at the next alternative (remembered on the
4842169695Skan         stack), or quit if no more.  The test at the top of the loop
4843169695Skan         does these things.  */
4844169695Skan      path_can_be_null = false;
4845169695Skan      p = pend;
4846169695Skan    } /* while p */
4847169695Skan
4848169695Skan  /* Set `can_be_null' for the last path (also the first path, if the
4849169695Skan     pattern is empty).  */
4850169695Skan  bufp->can_be_null |= path_can_be_null;
4851169695Skan
4852169695Skan done:
4853169695Skan  RESET_FAIL_STACK ();
4854169695Skan  return 0;
4855169695Skan}
4856169695Skan
4857169695Skan#else /* not INSIDE_RECURSION */
4858169695Skan
4859169695Skanint
4860169695Skanre_compile_fastmap (struct re_pattern_buffer *bufp)
4861169695Skan{
4862169695Skan# ifdef MBS_SUPPORT
4863169695Skan  if (MB_CUR_MAX != 1)
4864169695Skan    return wcs_re_compile_fastmap(bufp);
4865169695Skan  else
4866169695Skan# endif
4867169695Skan    return byte_re_compile_fastmap(bufp);
4868169695Skan} /* re_compile_fastmap */
4869169695Skan#ifdef _LIBC
4870169695Skanweak_alias (__re_compile_fastmap, re_compile_fastmap)
4871169695Skan#endif
4872169695Skan
4873169695Skan
4874169695Skan/* Set REGS to hold NUM_REGS registers, storing them in STARTS and
4875169695Skan   ENDS.  Subsequent matches using PATTERN_BUFFER and REGS will use
4876169695Skan   this memory for recording register information.  STARTS and ENDS
4877169695Skan   must be allocated using the malloc library routine, and must each
4878169695Skan   be at least NUM_REGS * sizeof (regoff_t) bytes long.
4879169695Skan
4880169695Skan   If NUM_REGS == 0, then subsequent matches should allocate their own
4881169695Skan   register data.
4882169695Skan
4883169695Skan   Unless this function is called, the first search or match using
4884169695Skan   PATTERN_BUFFER will allocate its own register data, without
4885169695Skan   freeing the old data.  */
4886169695Skan
4887169695Skanvoid
4888169695Skanre_set_registers (struct re_pattern_buffer *bufp,
4889169695Skan                  struct re_registers *regs, unsigned num_regs,
4890169695Skan                  regoff_t *starts, regoff_t *ends)
4891169695Skan{
4892169695Skan  if (num_regs)
4893169695Skan    {
4894169695Skan      bufp->regs_allocated = REGS_REALLOCATE;
4895169695Skan      regs->num_regs = num_regs;
4896169695Skan      regs->start = starts;
4897169695Skan      regs->end = ends;
4898169695Skan    }
4899169695Skan  else
4900169695Skan    {
4901169695Skan      bufp->regs_allocated = REGS_UNALLOCATED;
4902169695Skan      regs->num_regs = 0;
4903169695Skan      regs->start = regs->end = (regoff_t *) 0;
4904169695Skan    }
4905169695Skan}
4906169695Skan#ifdef _LIBC
4907169695Skanweak_alias (__re_set_registers, re_set_registers)
4908169695Skan#endif
4909169695Skan
4910169695Skan/* Searching routines.  */
4911169695Skan
4912169695Skan/* Like re_search_2, below, but only one string is specified, and
4913169695Skan   doesn't let you say where to stop matching.  */
4914169695Skan
4915169695Skanint
4916169695Skanre_search (struct re_pattern_buffer *bufp, const char *string, int size,
4917169695Skan           int startpos, int range, struct re_registers *regs)
4918169695Skan{
4919169695Skan  return re_search_2 (bufp, NULL, 0, string, size, startpos, range,
4920169695Skan		      regs, size);
4921169695Skan}
4922169695Skan#ifdef _LIBC
4923169695Skanweak_alias (__re_search, re_search)
4924169695Skan#endif
4925169695Skan
4926169695Skan
4927169695Skan/* Using the compiled pattern in BUFP->buffer, first tries to match the
4928169695Skan   virtual concatenation of STRING1 and STRING2, starting first at index
4929169695Skan   STARTPOS, then at STARTPOS + 1, and so on.
4930169695Skan
4931169695Skan   STRING1 and STRING2 have length SIZE1 and SIZE2, respectively.
4932169695Skan
4933169695Skan   RANGE is how far to scan while trying to match.  RANGE = 0 means try
4934169695Skan   only at STARTPOS; in general, the last start tried is STARTPOS +
4935169695Skan   RANGE.
4936169695Skan
4937169695Skan   In REGS, return the indices of the virtual concatenation of STRING1
4938169695Skan   and STRING2 that matched the entire BUFP->buffer and its contained
4939169695Skan   subexpressions.
4940169695Skan
4941169695Skan   Do not consider matching one past the index STOP in the virtual
4942169695Skan   concatenation of STRING1 and STRING2.
4943169695Skan
4944169695Skan   We return either the position in the strings at which the match was
4945169695Skan   found, -1 if no match, or -2 if error (such as failure
4946169695Skan   stack overflow).  */
4947169695Skan
4948169695Skanint
4949169695Skanre_search_2 (struct re_pattern_buffer *bufp, const char *string1, int size1,
4950169695Skan             const char *string2, int size2, int startpos, int range,
4951169695Skan             struct re_registers *regs, int stop)
4952169695Skan{
4953169695Skan# ifdef MBS_SUPPORT
4954169695Skan  if (MB_CUR_MAX != 1)
4955169695Skan    return wcs_re_search_2 (bufp, string1, size1, string2, size2, startpos,
4956169695Skan			    range, regs, stop);
4957169695Skan  else
4958169695Skan# endif
4959169695Skan    return byte_re_search_2 (bufp, string1, size1, string2, size2, startpos,
4960169695Skan			     range, regs, stop);
4961169695Skan} /* re_search_2 */
4962169695Skan#ifdef _LIBC
4963169695Skanweak_alias (__re_search_2, re_search_2)
4964169695Skan#endif
4965169695Skan
4966169695Skan#endif /* not INSIDE_RECURSION */
4967169695Skan
4968169695Skan#ifdef INSIDE_RECURSION
4969169695Skan
4970169695Skan#ifdef MATCH_MAY_ALLOCATE
4971169695Skan# define FREE_VAR(var) if (var) REGEX_FREE (var); var = NULL
4972169695Skan#else
4973169695Skan# define FREE_VAR(var) if (var) free (var); var = NULL
4974169695Skan#endif
4975169695Skan
4976169695Skan#ifdef WCHAR
4977169695Skan# define MAX_ALLOCA_SIZE	2000
4978169695Skan
4979169695Skan# define FREE_WCS_BUFFERS() \
4980169695Skan  do {									      \
4981169695Skan    if (size1 > MAX_ALLOCA_SIZE)					      \
4982169695Skan      {									      \
4983169695Skan	free (wcs_string1);						      \
4984169695Skan	free (mbs_offset1);						      \
4985169695Skan      }									      \
4986169695Skan    else								      \
4987169695Skan      {									      \
4988169695Skan	FREE_VAR (wcs_string1);						      \
4989169695Skan	FREE_VAR (mbs_offset1);						      \
4990169695Skan      }									      \
4991169695Skan    if (size2 > MAX_ALLOCA_SIZE) 					      \
4992169695Skan      {									      \
4993169695Skan	free (wcs_string2);						      \
4994169695Skan	free (mbs_offset2);						      \
4995169695Skan      }									      \
4996169695Skan    else								      \
4997169695Skan      {									      \
4998169695Skan	FREE_VAR (wcs_string2);						      \
4999169695Skan	FREE_VAR (mbs_offset2);						      \
5000169695Skan      }									      \
5001169695Skan  } while (0)
5002169695Skan
5003169695Skan#endif
5004169695Skan
5005169695Skan
5006169695Skanstatic int
5007169695SkanPREFIX(re_search_2) (struct re_pattern_buffer *bufp, const char *string1,
5008169695Skan                     int size1, const char *string2, int size2,
5009169695Skan                     int startpos, int range,
5010169695Skan                     struct re_registers *regs, int stop)
5011169695Skan{
5012169695Skan  int val;
5013169695Skan  register char *fastmap = bufp->fastmap;
5014169695Skan  register RE_TRANSLATE_TYPE translate = bufp->translate;
5015169695Skan  int total_size = size1 + size2;
5016169695Skan  int endpos = startpos + range;
5017169695Skan#ifdef WCHAR
5018169695Skan  /* We need wchar_t* buffers correspond to cstring1, cstring2.  */
5019169695Skan  wchar_t *wcs_string1 = NULL, *wcs_string2 = NULL;
5020169695Skan  /* We need the size of wchar_t buffers correspond to csize1, csize2.  */
5021169695Skan  int wcs_size1 = 0, wcs_size2 = 0;
5022169695Skan  /* offset buffer for optimizatoin. See convert_mbs_to_wc.  */
5023169695Skan  int *mbs_offset1 = NULL, *mbs_offset2 = NULL;
5024169695Skan  /* They hold whether each wchar_t is binary data or not.  */
5025169695Skan  char *is_binary = NULL;
5026169695Skan#endif /* WCHAR */
5027169695Skan
5028169695Skan  /* Check for out-of-range STARTPOS.  */
5029169695Skan  if (startpos < 0 || startpos > total_size)
5030169695Skan    return -1;
5031169695Skan
5032169695Skan  /* Fix up RANGE if it might eventually take us outside
5033169695Skan     the virtual concatenation of STRING1 and STRING2.
5034169695Skan     Make sure we won't move STARTPOS below 0 or above TOTAL_SIZE.  */
5035169695Skan  if (endpos < 0)
5036169695Skan    range = 0 - startpos;
5037169695Skan  else if (endpos > total_size)
5038169695Skan    range = total_size - startpos;
5039169695Skan
5040169695Skan  /* If the search isn't to be a backwards one, don't waste time in a
5041169695Skan     search for a pattern that must be anchored.  */
5042169695Skan  if (bufp->used > 0 && range > 0
5043169695Skan      && ((re_opcode_t) bufp->buffer[0] == begbuf
5044169695Skan	  /* `begline' is like `begbuf' if it cannot match at newlines.  */
5045169695Skan	  || ((re_opcode_t) bufp->buffer[0] == begline
5046169695Skan	      && !bufp->newline_anchor)))
5047169695Skan    {
5048169695Skan      if (startpos > 0)
5049169695Skan	return -1;
5050169695Skan      else
5051169695Skan	range = 1;
5052169695Skan    }
5053169695Skan
5054169695Skan#ifdef emacs
5055169695Skan  /* In a forward search for something that starts with \=.
5056169695Skan     don't keep searching past point.  */
5057169695Skan  if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == at_dot && range > 0)
5058169695Skan    {
5059169695Skan      range = PT - startpos;
5060169695Skan      if (range <= 0)
5061169695Skan	return -1;
5062169695Skan    }
5063169695Skan#endif /* emacs */
5064169695Skan
5065169695Skan  /* Update the fastmap now if not correct already.  */
5066169695Skan  if (fastmap && !bufp->fastmap_accurate)
5067169695Skan    if (re_compile_fastmap (bufp) == -2)
5068169695Skan      return -2;
5069169695Skan
5070169695Skan#ifdef WCHAR
5071169695Skan  /* Allocate wchar_t array for wcs_string1 and wcs_string2 and
5072169695Skan     fill them with converted string.  */
5073169695Skan  if (size1 != 0)
5074169695Skan    {
5075169695Skan      if (size1 > MAX_ALLOCA_SIZE)
5076169695Skan	{
5077169695Skan	  wcs_string1 = TALLOC (size1 + 1, CHAR_T);
5078169695Skan	  mbs_offset1 = TALLOC (size1 + 1, int);
5079169695Skan	  is_binary = TALLOC (size1 + 1, char);
5080169695Skan	}
5081169695Skan      else
5082169695Skan	{
5083169695Skan	  wcs_string1 = REGEX_TALLOC (size1 + 1, CHAR_T);
5084169695Skan	  mbs_offset1 = REGEX_TALLOC (size1 + 1, int);
5085169695Skan	  is_binary = REGEX_TALLOC (size1 + 1, char);
5086169695Skan	}
5087169695Skan      if (!wcs_string1 || !mbs_offset1 || !is_binary)
5088169695Skan	{
5089169695Skan	  if (size1 > MAX_ALLOCA_SIZE)
5090169695Skan	    {
5091169695Skan	      free (wcs_string1);
5092169695Skan	      free (mbs_offset1);
5093169695Skan	      free (is_binary);
5094169695Skan	    }
5095169695Skan	  else
5096169695Skan	    {
5097169695Skan	      FREE_VAR (wcs_string1);
5098169695Skan	      FREE_VAR (mbs_offset1);
5099169695Skan	      FREE_VAR (is_binary);
5100169695Skan	    }
5101169695Skan	  return -2;
5102169695Skan	}
5103169695Skan      wcs_size1 = convert_mbs_to_wcs(wcs_string1, string1, size1,
5104169695Skan				     mbs_offset1, is_binary);
5105169695Skan      wcs_string1[wcs_size1] = L'\0'; /* for a sentinel  */
5106169695Skan      if (size1 > MAX_ALLOCA_SIZE)
5107169695Skan	free (is_binary);
5108169695Skan      else
5109169695Skan	FREE_VAR (is_binary);
5110169695Skan    }
5111169695Skan  if (size2 != 0)
5112169695Skan    {
5113169695Skan      if (size2 > MAX_ALLOCA_SIZE)
5114169695Skan	{
5115169695Skan	  wcs_string2 = TALLOC (size2 + 1, CHAR_T);
5116169695Skan	  mbs_offset2 = TALLOC (size2 + 1, int);
5117169695Skan	  is_binary = TALLOC (size2 + 1, char);
5118169695Skan	}
5119169695Skan      else
5120169695Skan	{
5121169695Skan	  wcs_string2 = REGEX_TALLOC (size2 + 1, CHAR_T);
5122169695Skan	  mbs_offset2 = REGEX_TALLOC (size2 + 1, int);
5123169695Skan	  is_binary = REGEX_TALLOC (size2 + 1, char);
5124169695Skan	}
5125169695Skan      if (!wcs_string2 || !mbs_offset2 || !is_binary)
5126169695Skan	{
5127169695Skan	  FREE_WCS_BUFFERS ();
5128169695Skan	  if (size2 > MAX_ALLOCA_SIZE)
5129169695Skan	    free (is_binary);
5130169695Skan	  else
5131169695Skan	    FREE_VAR (is_binary);
5132169695Skan	  return -2;
5133169695Skan	}
5134169695Skan      wcs_size2 = convert_mbs_to_wcs(wcs_string2, string2, size2,
5135169695Skan				     mbs_offset2, is_binary);
5136169695Skan      wcs_string2[wcs_size2] = L'\0'; /* for a sentinel  */
5137169695Skan      if (size2 > MAX_ALLOCA_SIZE)
5138169695Skan	free (is_binary);
5139169695Skan      else
5140169695Skan	FREE_VAR (is_binary);
5141169695Skan    }
5142169695Skan#endif /* WCHAR */
5143169695Skan
5144169695Skan
5145169695Skan  /* Loop through the string, looking for a place to start matching.  */
5146169695Skan  for (;;)
5147169695Skan    {
5148169695Skan      /* If a fastmap is supplied, skip quickly over characters that
5149169695Skan         cannot be the start of a match.  If the pattern can match the
5150169695Skan         null string, however, we don't need to skip characters; we want
5151169695Skan         the first null string.  */
5152169695Skan      if (fastmap && startpos < total_size && !bufp->can_be_null)
5153169695Skan	{
5154169695Skan	  if (range > 0)	/* Searching forwards.  */
5155169695Skan	    {
5156169695Skan	      register const char *d;
5157169695Skan	      register int lim = 0;
5158169695Skan	      int irange = range;
5159169695Skan
5160169695Skan              if (startpos < size1 && startpos + range >= size1)
5161169695Skan                lim = range - (size1 - startpos);
5162169695Skan
5163169695Skan	      d = (startpos >= size1 ? string2 - size1 : string1) + startpos;
5164169695Skan
5165169695Skan              /* Written out as an if-else to avoid testing `translate'
5166169695Skan                 inside the loop.  */
5167169695Skan	      if (translate)
5168169695Skan                while (range > lim
5169169695Skan                       && !fastmap[(unsigned char)
5170169695Skan				   translate[(unsigned char) *d++]])
5171169695Skan                  range--;
5172169695Skan	      else
5173169695Skan                while (range > lim && !fastmap[(unsigned char) *d++])
5174169695Skan                  range--;
5175169695Skan
5176169695Skan	      startpos += irange - range;
5177169695Skan	    }
5178169695Skan	  else				/* Searching backwards.  */
5179169695Skan	    {
5180169695Skan	      register CHAR_T c = (size1 == 0 || startpos >= size1
5181169695Skan				      ? string2[startpos - size1]
5182169695Skan				      : string1[startpos]);
5183169695Skan
5184169695Skan	      if (!fastmap[(unsigned char) TRANSLATE (c)])
5185169695Skan		goto advance;
5186169695Skan	    }
5187169695Skan	}
5188169695Skan
5189169695Skan      /* If can't match the null string, and that's all we have left, fail.  */
5190169695Skan      if (range >= 0 && startpos == total_size && fastmap
5191169695Skan          && !bufp->can_be_null)
5192169695Skan       {
5193169695Skan#ifdef WCHAR
5194169695Skan         FREE_WCS_BUFFERS ();
5195169695Skan#endif
5196169695Skan         return -1;
5197169695Skan       }
5198169695Skan
5199169695Skan#ifdef WCHAR
5200169695Skan      val = wcs_re_match_2_internal (bufp, string1, size1, string2,
5201169695Skan				     size2, startpos, regs, stop,
5202169695Skan				     wcs_string1, wcs_size1,
5203169695Skan				     wcs_string2, wcs_size2,
5204169695Skan				     mbs_offset1, mbs_offset2);
5205169695Skan#else /* BYTE */
5206169695Skan      val = byte_re_match_2_internal (bufp, string1, size1, string2,
5207169695Skan				      size2, startpos, regs, stop);
5208169695Skan#endif /* BYTE */
5209169695Skan
5210169695Skan#ifndef REGEX_MALLOC
5211169695Skan# ifdef C_ALLOCA
5212169695Skan      alloca (0);
5213169695Skan# endif
5214169695Skan#endif
5215169695Skan
5216169695Skan      if (val >= 0)
5217169695Skan	{
5218169695Skan#ifdef WCHAR
5219169695Skan	  FREE_WCS_BUFFERS ();
5220169695Skan#endif
5221169695Skan	  return startpos;
5222169695Skan	}
5223169695Skan
5224169695Skan      if (val == -2)
5225169695Skan	{
5226169695Skan#ifdef WCHAR
5227169695Skan	  FREE_WCS_BUFFERS ();
5228169695Skan#endif
5229169695Skan	  return -2;
5230169695Skan	}
5231169695Skan
5232169695Skan    advance:
5233169695Skan      if (!range)
5234169695Skan        break;
5235169695Skan      else if (range > 0)
5236169695Skan        {
5237169695Skan          range--;
5238169695Skan          startpos++;
5239169695Skan        }
5240169695Skan      else
5241169695Skan        {
5242169695Skan          range++;
5243169695Skan          startpos--;
5244169695Skan        }
5245169695Skan    }
5246169695Skan#ifdef WCHAR
5247169695Skan  FREE_WCS_BUFFERS ();
5248169695Skan#endif
5249169695Skan  return -1;
5250169695Skan}
5251169695Skan
5252169695Skan#ifdef WCHAR
5253169695Skan/* This converts PTR, a pointer into one of the search wchar_t strings
5254169695Skan   `string1' and `string2' into an multibyte string offset from the
5255169695Skan   beginning of that string. We use mbs_offset to optimize.
5256169695Skan   See convert_mbs_to_wcs.  */
5257169695Skan# define POINTER_TO_OFFSET(ptr)						\
5258169695Skan  (FIRST_STRING_P (ptr)							\
5259169695Skan   ? ((regoff_t)(mbs_offset1 != NULL? mbs_offset1[(ptr)-string1] : 0))	\
5260169695Skan   : ((regoff_t)((mbs_offset2 != NULL? mbs_offset2[(ptr)-string2] : 0)	\
5261169695Skan		 + csize1)))
5262169695Skan#else /* BYTE */
5263169695Skan/* This converts PTR, a pointer into one of the search strings `string1'
5264169695Skan   and `string2' into an offset from the beginning of that string.  */
5265169695Skan# define POINTER_TO_OFFSET(ptr)			\
5266169695Skan  (FIRST_STRING_P (ptr)				\
5267169695Skan   ? ((regoff_t) ((ptr) - string1))		\
5268169695Skan   : ((regoff_t) ((ptr) - string2 + size1)))
5269169695Skan#endif /* WCHAR */
5270169695Skan
5271169695Skan/* Macros for dealing with the split strings in re_match_2.  */
5272169695Skan
5273169695Skan#define MATCHING_IN_FIRST_STRING  (dend == end_match_1)
5274169695Skan
5275169695Skan/* Call before fetching a character with *d.  This switches over to
5276169695Skan   string2 if necessary.  */
5277169695Skan#define PREFETCH()							\
5278169695Skan  while (d == dend)						    	\
5279169695Skan    {									\
5280169695Skan      /* End of string2 => fail.  */					\
5281169695Skan      if (dend == end_match_2) 						\
5282169695Skan        goto fail;							\
5283169695Skan      /* End of string1 => advance to string2.  */ 			\
5284169695Skan      d = string2;						        \
5285169695Skan      dend = end_match_2;						\
5286169695Skan    }
5287169695Skan
5288169695Skan/* Test if at very beginning or at very end of the virtual concatenation
5289169695Skan   of `string1' and `string2'.  If only one string, it's `string2'.  */
5290169695Skan#define AT_STRINGS_BEG(d) ((d) == (size1 ? string1 : string2) || !size2)
5291169695Skan#define AT_STRINGS_END(d) ((d) == end2)
5292169695Skan
5293169695Skan
5294169695Skan/* Test if D points to a character which is word-constituent.  We have
5295169695Skan   two special cases to check for: if past the end of string1, look at
5296169695Skan   the first character in string2; and if before the beginning of
5297169695Skan   string2, look at the last character in string1.  */
5298169695Skan#ifdef WCHAR
5299169695Skan/* Use internationalized API instead of SYNTAX.  */
5300169695Skan# define WORDCHAR_P(d)							\
5301169695Skan  (iswalnum ((wint_t)((d) == end1 ? *string2				\
5302169695Skan           : (d) == string2 - 1 ? *(end1 - 1) : *(d))) != 0		\
5303169695Skan   || ((d) == end1 ? *string2						\
5304169695Skan       : (d) == string2 - 1 ? *(end1 - 1) : *(d)) == L'_')
5305169695Skan#else /* BYTE */
5306169695Skan# define WORDCHAR_P(d)							\
5307169695Skan  (SYNTAX ((d) == end1 ? *string2					\
5308169695Skan           : (d) == string2 - 1 ? *(end1 - 1) : *(d))			\
5309169695Skan   == Sword)
5310169695Skan#endif /* WCHAR */
5311169695Skan
5312169695Skan/* Disabled due to a compiler bug -- see comment at case wordbound */
5313169695Skan#if 0
5314169695Skan/* Test if the character before D and the one at D differ with respect
5315169695Skan   to being word-constituent.  */
5316169695Skan#define AT_WORD_BOUNDARY(d)						\
5317169695Skan  (AT_STRINGS_BEG (d) || AT_STRINGS_END (d)				\
5318169695Skan   || WORDCHAR_P (d - 1) != WORDCHAR_P (d))
5319169695Skan#endif
5320169695Skan
5321169695Skan/* Free everything we malloc.  */
5322169695Skan#ifdef MATCH_MAY_ALLOCATE
5323169695Skan# ifdef WCHAR
5324169695Skan#  define FREE_VARIABLES()						\
5325169695Skan  do {									\
5326169695Skan    REGEX_FREE_STACK (fail_stack.stack);				\
5327169695Skan    FREE_VAR (regstart);						\
5328169695Skan    FREE_VAR (regend);							\
5329169695Skan    FREE_VAR (old_regstart);						\
5330169695Skan    FREE_VAR (old_regend);						\
5331169695Skan    FREE_VAR (best_regstart);						\
5332169695Skan    FREE_VAR (best_regend);						\
5333169695Skan    FREE_VAR (reg_info);						\
5334169695Skan    FREE_VAR (reg_dummy);						\
5335169695Skan    FREE_VAR (reg_info_dummy);						\
5336169695Skan    if (!cant_free_wcs_buf)						\
5337169695Skan      {									\
5338169695Skan        FREE_VAR (string1);						\
5339169695Skan        FREE_VAR (string2);						\
5340169695Skan        FREE_VAR (mbs_offset1);						\
5341169695Skan        FREE_VAR (mbs_offset2);						\
5342169695Skan      }									\
5343169695Skan  } while (0)
5344169695Skan# else /* BYTE */
5345169695Skan#  define FREE_VARIABLES()						\
5346169695Skan  do {									\
5347169695Skan    REGEX_FREE_STACK (fail_stack.stack);				\
5348169695Skan    FREE_VAR (regstart);						\
5349169695Skan    FREE_VAR (regend);							\
5350169695Skan    FREE_VAR (old_regstart);						\
5351169695Skan    FREE_VAR (old_regend);						\
5352169695Skan    FREE_VAR (best_regstart);						\
5353169695Skan    FREE_VAR (best_regend);						\
5354169695Skan    FREE_VAR (reg_info);						\
5355169695Skan    FREE_VAR (reg_dummy);						\
5356169695Skan    FREE_VAR (reg_info_dummy);						\
5357169695Skan  } while (0)
5358169695Skan# endif /* WCHAR */
5359169695Skan#else
5360169695Skan# ifdef WCHAR
5361169695Skan#  define FREE_VARIABLES()						\
5362169695Skan  do {									\
5363169695Skan    if (!cant_free_wcs_buf)						\
5364169695Skan      {									\
5365169695Skan        FREE_VAR (string1);						\
5366169695Skan        FREE_VAR (string2);						\
5367169695Skan        FREE_VAR (mbs_offset1);						\
5368169695Skan        FREE_VAR (mbs_offset2);						\
5369169695Skan      }									\
5370169695Skan  } while (0)
5371169695Skan# else /* BYTE */
5372169695Skan#  define FREE_VARIABLES() ((void)0) /* Do nothing!  But inhibit gcc warning. */
5373169695Skan# endif /* WCHAR */
5374169695Skan#endif /* not MATCH_MAY_ALLOCATE */
5375169695Skan
5376169695Skan/* These values must meet several constraints.  They must not be valid
5377169695Skan   register values; since we have a limit of 255 registers (because
5378169695Skan   we use only one byte in the pattern for the register number), we can
5379169695Skan   use numbers larger than 255.  They must differ by 1, because of
5380169695Skan   NUM_FAILURE_ITEMS above.  And the value for the lowest register must
5381169695Skan   be larger than the value for the highest register, so we do not try
5382169695Skan   to actually save any registers when none are active.  */
5383169695Skan#define NO_HIGHEST_ACTIVE_REG (1 << BYTEWIDTH)
5384169695Skan#define NO_LOWEST_ACTIVE_REG (NO_HIGHEST_ACTIVE_REG + 1)
5385169695Skan
5386169695Skan#else /* not INSIDE_RECURSION */
5387169695Skan/* Matching routines.  */
5388169695Skan
5389169695Skan#ifndef emacs   /* Emacs never uses this.  */
5390169695Skan/* re_match is like re_match_2 except it takes only a single string.  */
5391169695Skan
5392169695Skanint
5393169695Skanre_match (struct re_pattern_buffer *bufp, const char *string,
5394169695Skan          int size, int pos, struct re_registers *regs)
5395169695Skan{
5396169695Skan  int result;
5397169695Skan# ifdef MBS_SUPPORT
5398169695Skan  if (MB_CUR_MAX != 1)
5399169695Skan    result = wcs_re_match_2_internal (bufp, NULL, 0, string, size,
5400169695Skan				      pos, regs, size,
5401169695Skan				      NULL, 0, NULL, 0, NULL, NULL);
5402169695Skan  else
5403169695Skan# endif
5404169695Skan    result = byte_re_match_2_internal (bufp, NULL, 0, string, size,
5405169695Skan				  pos, regs, size);
5406169695Skan# ifndef REGEX_MALLOC
5407169695Skan#  ifdef C_ALLOCA
5408169695Skan  alloca (0);
5409169695Skan#  endif
5410169695Skan# endif
5411169695Skan  return result;
5412169695Skan}
5413169695Skan# ifdef _LIBC
5414169695Skanweak_alias (__re_match, re_match)
5415169695Skan# endif
5416169695Skan#endif /* not emacs */
5417169695Skan
5418169695Skan#endif /* not INSIDE_RECURSION */
5419169695Skan
5420169695Skan#ifdef INSIDE_RECURSION
5421169695Skanstatic boolean PREFIX(group_match_null_string_p) (UCHAR_T **p,
5422169695Skan                                                  UCHAR_T *end,
5423169695Skan					PREFIX(register_info_type) *reg_info);
5424169695Skanstatic boolean PREFIX(alt_match_null_string_p) (UCHAR_T *p,
5425169695Skan                                                UCHAR_T *end,
5426169695Skan					PREFIX(register_info_type) *reg_info);
5427169695Skanstatic boolean PREFIX(common_op_match_null_string_p) (UCHAR_T **p,
5428169695Skan                                                      UCHAR_T *end,
5429169695Skan					PREFIX(register_info_type) *reg_info);
5430169695Skanstatic int PREFIX(bcmp_translate) (const CHAR_T *s1, const CHAR_T *s2,
5431169695Skan                                   int len, char *translate);
5432169695Skan#else /* not INSIDE_RECURSION */
5433169695Skan
5434169695Skan/* re_match_2 matches the compiled pattern in BUFP against the
5435169695Skan   the (virtual) concatenation of STRING1 and STRING2 (of length SIZE1
5436169695Skan   and SIZE2, respectively).  We start matching at POS, and stop
5437169695Skan   matching at STOP.
5438169695Skan
5439169695Skan   If REGS is non-null and the `no_sub' field of BUFP is nonzero, we
5440169695Skan   store offsets for the substring each group matched in REGS.  See the
5441169695Skan   documentation for exactly how many groups we fill.
5442169695Skan
5443169695Skan   We return -1 if no match, -2 if an internal error (such as the
5444169695Skan   failure stack overflowing).  Otherwise, we return the length of the
5445169695Skan   matched substring.  */
5446169695Skan
5447169695Skanint
5448169695Skanre_match_2 (struct re_pattern_buffer *bufp, const char *string1, int size1,
5449169695Skan            const char *string2, int size2, int pos,
5450169695Skan            struct re_registers *regs, int stop)
5451169695Skan{
5452169695Skan  int result;
5453169695Skan# ifdef MBS_SUPPORT
5454169695Skan  if (MB_CUR_MAX != 1)
5455169695Skan    result = wcs_re_match_2_internal (bufp, string1, size1, string2, size2,
5456169695Skan				      pos, regs, stop,
5457169695Skan				      NULL, 0, NULL, 0, NULL, NULL);
5458169695Skan  else
5459169695Skan# endif
5460169695Skan    result = byte_re_match_2_internal (bufp, string1, size1, string2, size2,
5461169695Skan				  pos, regs, stop);
5462169695Skan
5463169695Skan#ifndef REGEX_MALLOC
5464169695Skan# ifdef C_ALLOCA
5465169695Skan  alloca (0);
5466169695Skan# endif
5467169695Skan#endif
5468169695Skan  return result;
5469169695Skan}
5470169695Skan#ifdef _LIBC
5471169695Skanweak_alias (__re_match_2, re_match_2)
5472169695Skan#endif
5473169695Skan
5474169695Skan#endif /* not INSIDE_RECURSION */
5475169695Skan
5476169695Skan#ifdef INSIDE_RECURSION
5477169695Skan
5478169695Skan#ifdef WCHAR
5479169695Skanstatic int count_mbs_length (int *, int);
5480169695Skan
5481169695Skan/* This check the substring (from 0, to length) of the multibyte string,
5482169695Skan   to which offset_buffer correspond. And count how many wchar_t_characters
5483169695Skan   the substring occupy. We use offset_buffer to optimization.
5484169695Skan   See convert_mbs_to_wcs.  */
5485169695Skan
5486169695Skanstatic int
5487169695Skancount_mbs_length(int *offset_buffer, int length)
5488169695Skan{
5489169695Skan  int upper, lower;
5490169695Skan
5491169695Skan  /* Check whether the size is valid.  */
5492169695Skan  if (length < 0)
5493169695Skan    return -1;
5494169695Skan
5495169695Skan  if (offset_buffer == NULL)
5496169695Skan    return 0;
5497169695Skan
5498169695Skan  /* If there are no multibyte character, offset_buffer[i] == i.
5499169695Skan   Optmize for this case.  */
5500169695Skan  if (offset_buffer[length] == length)
5501169695Skan    return length;
5502169695Skan
5503169695Skan  /* Set up upper with length. (because for all i, offset_buffer[i] >= i)  */
5504169695Skan  upper = length;
5505169695Skan  lower = 0;
5506169695Skan
5507169695Skan  while (true)
5508169695Skan    {
5509169695Skan      int middle = (lower + upper) / 2;
5510169695Skan      if (middle == lower || middle == upper)
5511169695Skan	break;
5512169695Skan      if (offset_buffer[middle] > length)
5513169695Skan	upper = middle;
5514169695Skan      else if (offset_buffer[middle] < length)
5515169695Skan	lower = middle;
5516169695Skan      else
5517169695Skan	return middle;
5518169695Skan    }
5519169695Skan
5520169695Skan  return -1;
5521169695Skan}
5522169695Skan#endif /* WCHAR */
5523169695Skan
5524169695Skan/* This is a separate function so that we can force an alloca cleanup
5525169695Skan   afterwards.  */
5526169695Skan#ifdef WCHAR
5527169695Skanstatic int
5528169695Skanwcs_re_match_2_internal (struct re_pattern_buffer *bufp,
5529169695Skan                         const char *cstring1, int csize1,
5530169695Skan                         const char *cstring2, int csize2,
5531169695Skan                         int pos,
5532169695Skan			 struct re_registers *regs,
5533169695Skan                         int stop,
5534169695Skan     /* string1 == string2 == NULL means string1/2, size1/2 and
5535169695Skan	mbs_offset1/2 need seting up in this function.  */
5536169695Skan     /* We need wchar_t* buffers correspond to cstring1, cstring2.  */
5537169695Skan                         wchar_t *string1, int size1,
5538169695Skan                         wchar_t *string2, int size2,
5539169695Skan     /* offset buffer for optimizatoin. See convert_mbs_to_wc.  */
5540169695Skan			 int *mbs_offset1, int *mbs_offset2)
5541169695Skan#else /* BYTE */
5542169695Skanstatic int
5543169695Skanbyte_re_match_2_internal (struct re_pattern_buffer *bufp,
5544169695Skan                          const char *string1, int size1,
5545169695Skan                          const char *string2, int size2,
5546169695Skan                          int pos,
5547169695Skan			  struct re_registers *regs, int stop)
5548169695Skan#endif /* BYTE */
5549169695Skan{
5550169695Skan  /* General temporaries.  */
5551169695Skan  int mcnt;
5552169695Skan  UCHAR_T *p1;
5553169695Skan#ifdef WCHAR
5554169695Skan  /* They hold whether each wchar_t is binary data or not.  */
5555169695Skan  char *is_binary = NULL;
5556169695Skan  /* If true, we can't free string1/2, mbs_offset1/2.  */
5557169695Skan  int cant_free_wcs_buf = 1;
5558169695Skan#endif /* WCHAR */
5559169695Skan
5560169695Skan  /* Just past the end of the corresponding string.  */
5561169695Skan  const CHAR_T *end1, *end2;
5562169695Skan
5563169695Skan  /* Pointers into string1 and string2, just past the last characters in
5564169695Skan     each to consider matching.  */
5565169695Skan  const CHAR_T *end_match_1, *end_match_2;
5566169695Skan
5567169695Skan  /* Where we are in the data, and the end of the current string.  */
5568169695Skan  const CHAR_T *d, *dend;
5569169695Skan
5570169695Skan  /* Where we are in the pattern, and the end of the pattern.  */
5571169695Skan#ifdef WCHAR
5572169695Skan  UCHAR_T *pattern, *p;
5573169695Skan  register UCHAR_T *pend;
5574169695Skan#else /* BYTE */
5575169695Skan  UCHAR_T *p = bufp->buffer;
5576169695Skan  register UCHAR_T *pend = p + bufp->used;
5577169695Skan#endif /* WCHAR */
5578169695Skan
5579169695Skan  /* Mark the opcode just after a start_memory, so we can test for an
5580169695Skan     empty subpattern when we get to the stop_memory.  */
5581169695Skan  UCHAR_T *just_past_start_mem = 0;
5582169695Skan
5583169695Skan  /* We use this to map every character in the string.  */
5584169695Skan  RE_TRANSLATE_TYPE translate = bufp->translate;
5585169695Skan
5586169695Skan  /* Failure point stack.  Each place that can handle a failure further
5587169695Skan     down the line pushes a failure point on this stack.  It consists of
5588169695Skan     restart, regend, and reg_info for all registers corresponding to
5589169695Skan     the subexpressions we're currently inside, plus the number of such
5590169695Skan     registers, and, finally, two char *'s.  The first char * is where
5591169695Skan     to resume scanning the pattern; the second one is where to resume
5592169695Skan     scanning the strings.  If the latter is zero, the failure point is
5593169695Skan     a ``dummy''; if a failure happens and the failure point is a dummy,
5594169695Skan     it gets discarded and the next next one is tried.  */
5595169695Skan#ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global.  */
5596169695Skan  PREFIX(fail_stack_type) fail_stack;
5597169695Skan#endif
5598169695Skan#ifdef DEBUG
5599169695Skan  static unsigned failure_id;
5600169695Skan  unsigned nfailure_points_pushed = 0, nfailure_points_popped = 0;
5601169695Skan#endif
5602169695Skan
5603169695Skan#ifdef REL_ALLOC
5604169695Skan  /* This holds the pointer to the failure stack, when
5605169695Skan     it is allocated relocatably.  */
5606169695Skan  fail_stack_elt_t *failure_stack_ptr;
5607169695Skan#endif
5608169695Skan
5609169695Skan  /* We fill all the registers internally, independent of what we
5610169695Skan     return, for use in backreferences.  The number here includes
5611169695Skan     an element for register zero.  */
5612169695Skan  size_t num_regs = bufp->re_nsub + 1;
5613169695Skan
5614169695Skan  /* The currently active registers.  */
5615169695Skan  active_reg_t lowest_active_reg = NO_LOWEST_ACTIVE_REG;
5616169695Skan  active_reg_t highest_active_reg = NO_HIGHEST_ACTIVE_REG;
5617169695Skan
5618169695Skan  /* Information on the contents of registers. These are pointers into
5619169695Skan     the input strings; they record just what was matched (on this
5620169695Skan     attempt) by a subexpression part of the pattern, that is, the
5621169695Skan     regnum-th regstart pointer points to where in the pattern we began
5622169695Skan     matching and the regnum-th regend points to right after where we
5623169695Skan     stopped matching the regnum-th subexpression.  (The zeroth register
5624169695Skan     keeps track of what the whole pattern matches.)  */
5625169695Skan#ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global.  */
5626169695Skan  const CHAR_T **regstart, **regend;
5627169695Skan#endif
5628169695Skan
5629169695Skan  /* If a group that's operated upon by a repetition operator fails to
5630169695Skan     match anything, then the register for its start will need to be
5631169695Skan     restored because it will have been set to wherever in the string we
5632169695Skan     are when we last see its open-group operator.  Similarly for a
5633169695Skan     register's end.  */
5634169695Skan#ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global.  */
5635169695Skan  const CHAR_T **old_regstart, **old_regend;
5636169695Skan#endif
5637169695Skan
5638169695Skan  /* The is_active field of reg_info helps us keep track of which (possibly
5639169695Skan     nested) subexpressions we are currently in. The matched_something
5640169695Skan     field of reg_info[reg_num] helps us tell whether or not we have
5641169695Skan     matched any of the pattern so far this time through the reg_num-th
5642169695Skan     subexpression.  These two fields get reset each time through any
5643169695Skan     loop their register is in.  */
5644169695Skan#ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global.  */
5645169695Skan  PREFIX(register_info_type) *reg_info;
5646169695Skan#endif
5647169695Skan
5648169695Skan  /* The following record the register info as found in the above
5649169695Skan     variables when we find a match better than any we've seen before.
5650169695Skan     This happens as we backtrack through the failure points, which in
5651169695Skan     turn happens only if we have not yet matched the entire string. */
5652169695Skan  unsigned best_regs_set = false;
5653169695Skan#ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global.  */
5654169695Skan  const CHAR_T **best_regstart, **best_regend;
5655169695Skan#endif
5656169695Skan
5657169695Skan  /* Logically, this is `best_regend[0]'.  But we don't want to have to
5658169695Skan     allocate space for that if we're not allocating space for anything
5659169695Skan     else (see below).  Also, we never need info about register 0 for
5660169695Skan     any of the other register vectors, and it seems rather a kludge to
5661169695Skan     treat `best_regend' differently than the rest.  So we keep track of
5662169695Skan     the end of the best match so far in a separate variable.  We
5663169695Skan     initialize this to NULL so that when we backtrack the first time
5664169695Skan     and need to test it, it's not garbage.  */
5665169695Skan  const CHAR_T *match_end = NULL;
5666169695Skan
5667169695Skan  /* This helps SET_REGS_MATCHED avoid doing redundant work.  */
5668169695Skan  int set_regs_matched_done = 0;
5669169695Skan
5670169695Skan  /* Used when we pop values we don't care about.  */
5671169695Skan#ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global.  */
5672169695Skan  const CHAR_T **reg_dummy;
5673169695Skan  PREFIX(register_info_type) *reg_info_dummy;
5674169695Skan#endif
5675169695Skan
5676169695Skan#ifdef DEBUG
5677169695Skan  /* Counts the total number of registers pushed.  */
5678169695Skan  unsigned num_regs_pushed = 0;
5679169695Skan#endif
5680169695Skan
5681169695Skan  DEBUG_PRINT1 ("\n\nEntering re_match_2.\n");
5682169695Skan
5683169695Skan  INIT_FAIL_STACK ();
5684169695Skan
5685169695Skan#ifdef MATCH_MAY_ALLOCATE
5686169695Skan  /* Do not bother to initialize all the register variables if there are
5687169695Skan     no groups in the pattern, as it takes a fair amount of time.  If
5688169695Skan     there are groups, we include space for register 0 (the whole
5689169695Skan     pattern), even though we never use it, since it simplifies the
5690169695Skan     array indexing.  We should fix this.  */
5691169695Skan  if (bufp->re_nsub)
5692169695Skan    {
5693169695Skan      regstart = REGEX_TALLOC (num_regs, const CHAR_T *);
5694169695Skan      regend = REGEX_TALLOC (num_regs, const CHAR_T *);
5695169695Skan      old_regstart = REGEX_TALLOC (num_regs, const CHAR_T *);
5696169695Skan      old_regend = REGEX_TALLOC (num_regs, const CHAR_T *);
5697169695Skan      best_regstart = REGEX_TALLOC (num_regs, const CHAR_T *);
5698169695Skan      best_regend = REGEX_TALLOC (num_regs, const CHAR_T *);
5699169695Skan      reg_info = REGEX_TALLOC (num_regs, PREFIX(register_info_type));
5700169695Skan      reg_dummy = REGEX_TALLOC (num_regs, const CHAR_T *);
5701169695Skan      reg_info_dummy = REGEX_TALLOC (num_regs, PREFIX(register_info_type));
5702169695Skan
5703169695Skan      if (!(regstart && regend && old_regstart && old_regend && reg_info
5704169695Skan            && best_regstart && best_regend && reg_dummy && reg_info_dummy))
5705169695Skan        {
5706169695Skan          FREE_VARIABLES ();
5707169695Skan          return -2;
5708169695Skan        }
5709169695Skan    }
5710169695Skan  else
5711169695Skan    {
5712169695Skan      /* We must initialize all our variables to NULL, so that
5713169695Skan         `FREE_VARIABLES' doesn't try to free them.  */
5714169695Skan      regstart = regend = old_regstart = old_regend = best_regstart
5715169695Skan        = best_regend = reg_dummy = NULL;
5716169695Skan      reg_info = reg_info_dummy = (PREFIX(register_info_type) *) NULL;
5717169695Skan    }
5718169695Skan#endif /* MATCH_MAY_ALLOCATE */
5719169695Skan
5720169695Skan  /* The starting position is bogus.  */
5721169695Skan#ifdef WCHAR
5722169695Skan  if (pos < 0 || pos > csize1 + csize2)
5723169695Skan#else /* BYTE */
5724169695Skan  if (pos < 0 || pos > size1 + size2)
5725169695Skan#endif
5726169695Skan    {
5727169695Skan      FREE_VARIABLES ();
5728169695Skan      return -1;
5729169695Skan    }
5730169695Skan
5731169695Skan#ifdef WCHAR
5732169695Skan  /* Allocate wchar_t array for string1 and string2 and
5733169695Skan     fill them with converted string.  */
5734169695Skan  if (string1 == NULL && string2 == NULL)
5735169695Skan    {
5736169695Skan      /* We need seting up buffers here.  */
5737169695Skan
5738169695Skan      /* We must free wcs buffers in this function.  */
5739169695Skan      cant_free_wcs_buf = 0;
5740169695Skan
5741169695Skan      if (csize1 != 0)
5742169695Skan	{
5743169695Skan	  string1 = REGEX_TALLOC (csize1 + 1, CHAR_T);
5744169695Skan	  mbs_offset1 = REGEX_TALLOC (csize1 + 1, int);
5745169695Skan	  is_binary = REGEX_TALLOC (csize1 + 1, char);
5746169695Skan	  if (!string1 || !mbs_offset1 || !is_binary)
5747169695Skan	    {
5748169695Skan	      FREE_VAR (string1);
5749169695Skan	      FREE_VAR (mbs_offset1);
5750169695Skan	      FREE_VAR (is_binary);
5751169695Skan	      return -2;
5752169695Skan	    }
5753169695Skan	}
5754169695Skan      if (csize2 != 0)
5755169695Skan	{
5756169695Skan	  string2 = REGEX_TALLOC (csize2 + 1, CHAR_T);
5757169695Skan	  mbs_offset2 = REGEX_TALLOC (csize2 + 1, int);
5758169695Skan	  is_binary = REGEX_TALLOC (csize2 + 1, char);
5759169695Skan	  if (!string2 || !mbs_offset2 || !is_binary)
5760169695Skan	    {
5761169695Skan	      FREE_VAR (string1);
5762169695Skan	      FREE_VAR (mbs_offset1);
5763169695Skan	      FREE_VAR (string2);
5764169695Skan	      FREE_VAR (mbs_offset2);
5765169695Skan	      FREE_VAR (is_binary);
5766169695Skan	      return -2;
5767169695Skan	    }
5768169695Skan	  size2 = convert_mbs_to_wcs(string2, cstring2, csize2,
5769169695Skan				     mbs_offset2, is_binary);
5770169695Skan	  string2[size2] = L'\0'; /* for a sentinel  */
5771169695Skan	  FREE_VAR (is_binary);
5772169695Skan	}
5773169695Skan    }
5774169695Skan
5775169695Skan  /* We need to cast pattern to (wchar_t*), because we casted this compiled
5776169695Skan     pattern to (char*) in regex_compile.  */
5777169695Skan  p = pattern = (CHAR_T*)bufp->buffer;
5778169695Skan  pend = (CHAR_T*)(bufp->buffer + bufp->used);
5779169695Skan
5780169695Skan#endif /* WCHAR */
5781169695Skan
5782169695Skan  /* Initialize subexpression text positions to -1 to mark ones that no
5783169695Skan     start_memory/stop_memory has been seen for. Also initialize the
5784169695Skan     register information struct.  */
5785169695Skan  for (mcnt = 1; (unsigned) mcnt < num_regs; mcnt++)
5786169695Skan    {
5787169695Skan      regstart[mcnt] = regend[mcnt]
5788169695Skan        = old_regstart[mcnt] = old_regend[mcnt] = REG_UNSET_VALUE;
5789169695Skan
5790169695Skan      REG_MATCH_NULL_STRING_P (reg_info[mcnt]) = MATCH_NULL_UNSET_VALUE;
5791169695Skan      IS_ACTIVE (reg_info[mcnt]) = 0;
5792169695Skan      MATCHED_SOMETHING (reg_info[mcnt]) = 0;
5793169695Skan      EVER_MATCHED_SOMETHING (reg_info[mcnt]) = 0;
5794169695Skan    }
5795169695Skan
5796169695Skan  /* We move `string1' into `string2' if the latter's empty -- but not if
5797169695Skan     `string1' is null.  */
5798169695Skan  if (size2 == 0 && string1 != NULL)
5799169695Skan    {
5800169695Skan      string2 = string1;
5801169695Skan      size2 = size1;
5802169695Skan      string1 = 0;
5803169695Skan      size1 = 0;
5804169695Skan#ifdef WCHAR
5805169695Skan      mbs_offset2 = mbs_offset1;
5806169695Skan      csize2 = csize1;
5807169695Skan      mbs_offset1 = NULL;
5808169695Skan      csize1 = 0;
5809169695Skan#endif
5810169695Skan    }
5811169695Skan  end1 = string1 + size1;
5812169695Skan  end2 = string2 + size2;
5813169695Skan
5814169695Skan  /* Compute where to stop matching, within the two strings.  */
5815169695Skan#ifdef WCHAR
5816169695Skan  if (stop <= csize1)
5817169695Skan    {
5818169695Skan      mcnt = count_mbs_length(mbs_offset1, stop);
5819169695Skan      end_match_1 = string1 + mcnt;
5820169695Skan      end_match_2 = string2;
5821169695Skan    }
5822169695Skan  else
5823169695Skan    {
5824169695Skan      if (stop > csize1 + csize2)
5825169695Skan	stop = csize1 + csize2;
5826169695Skan      end_match_1 = end1;
5827169695Skan      mcnt = count_mbs_length(mbs_offset2, stop-csize1);
5828169695Skan      end_match_2 = string2 + mcnt;
5829169695Skan    }
5830169695Skan  if (mcnt < 0)
5831169695Skan    { /* count_mbs_length return error.  */
5832169695Skan      FREE_VARIABLES ();
5833169695Skan      return -1;
5834169695Skan    }
5835169695Skan#else
5836169695Skan  if (stop <= size1)
5837169695Skan    {
5838169695Skan      end_match_1 = string1 + stop;
5839169695Skan      end_match_2 = string2;
5840169695Skan    }
5841169695Skan  else
5842169695Skan    {
5843169695Skan      end_match_1 = end1;
5844169695Skan      end_match_2 = string2 + stop - size1;
5845169695Skan    }
5846169695Skan#endif /* WCHAR */
5847169695Skan
5848169695Skan  /* `p' scans through the pattern as `d' scans through the data.
5849169695Skan     `dend' is the end of the input string that `d' points within.  `d'
5850169695Skan     is advanced into the following input string whenever necessary, but
5851169695Skan     this happens before fetching; therefore, at the beginning of the
5852169695Skan     loop, `d' can be pointing at the end of a string, but it cannot
5853169695Skan     equal `string2'.  */
5854169695Skan#ifdef WCHAR
5855169695Skan  if (size1 > 0 && pos <= csize1)
5856169695Skan    {
5857169695Skan      mcnt = count_mbs_length(mbs_offset1, pos);
5858169695Skan      d = string1 + mcnt;
5859169695Skan      dend = end_match_1;
5860169695Skan    }
5861169695Skan  else
5862169695Skan    {
5863169695Skan      mcnt = count_mbs_length(mbs_offset2, pos-csize1);
5864169695Skan      d = string2 + mcnt;
5865169695Skan      dend = end_match_2;
5866169695Skan    }
5867169695Skan
5868169695Skan  if (mcnt < 0)
5869169695Skan    { /* count_mbs_length return error.  */
5870169695Skan      FREE_VARIABLES ();
5871169695Skan      return -1;
5872169695Skan    }
5873169695Skan#else
5874169695Skan  if (size1 > 0 && pos <= size1)
5875169695Skan    {
5876169695Skan      d = string1 + pos;
5877169695Skan      dend = end_match_1;
5878169695Skan    }
5879169695Skan  else
5880169695Skan    {
5881169695Skan      d = string2 + pos - size1;
5882169695Skan      dend = end_match_2;
5883169695Skan    }
5884169695Skan#endif /* WCHAR */
5885169695Skan
5886169695Skan  DEBUG_PRINT1 ("The compiled pattern is:\n");
5887169695Skan  DEBUG_PRINT_COMPILED_PATTERN (bufp, p, pend);
5888169695Skan  DEBUG_PRINT1 ("The string to match is: `");
5889169695Skan  DEBUG_PRINT_DOUBLE_STRING (d, string1, size1, string2, size2);
5890169695Skan  DEBUG_PRINT1 ("'\n");
5891169695Skan
5892169695Skan  /* This loops over pattern commands.  It exits by returning from the
5893169695Skan     function if the match is complete, or it drops through if the match
5894169695Skan     fails at this starting point in the input data.  */
5895169695Skan  for (;;)
5896169695Skan    {
5897169695Skan#ifdef _LIBC
5898169695Skan      DEBUG_PRINT2 ("\n%p: ", p);
5899169695Skan#else
5900169695Skan      DEBUG_PRINT2 ("\n0x%x: ", p);
5901169695Skan#endif
5902169695Skan
5903169695Skan      if (p == pend)
5904169695Skan	{ /* End of pattern means we might have succeeded.  */
5905169695Skan          DEBUG_PRINT1 ("end of pattern ... ");
5906169695Skan
5907169695Skan	  /* If we haven't matched the entire string, and we want the
5908169695Skan             longest match, try backtracking.  */
5909169695Skan          if (d != end_match_2)
5910169695Skan	    {
5911169695Skan	      /* 1 if this match ends in the same string (string1 or string2)
5912169695Skan		 as the best previous match.  */
5913169695Skan	      boolean same_str_p = (FIRST_STRING_P (match_end)
5914169695Skan				    == MATCHING_IN_FIRST_STRING);
5915169695Skan	      /* 1 if this match is the best seen so far.  */
5916169695Skan	      boolean best_match_p;
5917169695Skan
5918169695Skan	      /* AIX compiler got confused when this was combined
5919169695Skan		 with the previous declaration.  */
5920169695Skan	      if (same_str_p)
5921169695Skan		best_match_p = d > match_end;
5922169695Skan	      else
5923169695Skan		best_match_p = !MATCHING_IN_FIRST_STRING;
5924169695Skan
5925169695Skan              DEBUG_PRINT1 ("backtracking.\n");
5926169695Skan
5927169695Skan              if (!FAIL_STACK_EMPTY ())
5928169695Skan                { /* More failure points to try.  */
5929169695Skan
5930169695Skan                  /* If exceeds best match so far, save it.  */
5931169695Skan                  if (!best_regs_set || best_match_p)
5932169695Skan                    {
5933169695Skan                      best_regs_set = true;
5934169695Skan                      match_end = d;
5935169695Skan
5936169695Skan                      DEBUG_PRINT1 ("\nSAVING match as best so far.\n");
5937169695Skan
5938169695Skan                      for (mcnt = 1; (unsigned) mcnt < num_regs; mcnt++)
5939169695Skan                        {
5940169695Skan                          best_regstart[mcnt] = regstart[mcnt];
5941169695Skan                          best_regend[mcnt] = regend[mcnt];
5942169695Skan                        }
5943169695Skan                    }
5944169695Skan                  goto fail;
5945169695Skan                }
5946169695Skan
5947169695Skan              /* If no failure points, don't restore garbage.  And if
5948169695Skan                 last match is real best match, don't restore second
5949169695Skan                 best one. */
5950169695Skan              else if (best_regs_set && !best_match_p)
5951169695Skan                {
5952169695Skan  	        restore_best_regs:
5953169695Skan                  /* Restore best match.  It may happen that `dend ==
5954169695Skan                     end_match_1' while the restored d is in string2.
5955169695Skan                     For example, the pattern `x.*y.*z' against the
5956169695Skan                     strings `x-' and `y-z-', if the two strings are
5957169695Skan                     not consecutive in memory.  */
5958169695Skan                  DEBUG_PRINT1 ("Restoring best registers.\n");
5959169695Skan
5960169695Skan                  d = match_end;
5961169695Skan                  dend = ((d >= string1 && d <= end1)
5962169695Skan		           ? end_match_1 : end_match_2);
5963169695Skan
5964169695Skan		  for (mcnt = 1; (unsigned) mcnt < num_regs; mcnt++)
5965169695Skan		    {
5966169695Skan		      regstart[mcnt] = best_regstart[mcnt];
5967169695Skan		      regend[mcnt] = best_regend[mcnt];
5968169695Skan		    }
5969169695Skan                }
5970169695Skan            } /* d != end_match_2 */
5971169695Skan
5972169695Skan	succeed_label:
5973169695Skan          DEBUG_PRINT1 ("Accepting match.\n");
5974169695Skan          /* If caller wants register contents data back, do it.  */
5975169695Skan          if (regs && !bufp->no_sub)
5976169695Skan	    {
5977169695Skan	      /* Have the register data arrays been allocated?  */
5978169695Skan              if (bufp->regs_allocated == REGS_UNALLOCATED)
5979169695Skan                { /* No.  So allocate them with malloc.  We need one
5980169695Skan                     extra element beyond `num_regs' for the `-1' marker
5981169695Skan                     GNU code uses.  */
5982169695Skan                  regs->num_regs = MAX (RE_NREGS, num_regs + 1);
5983169695Skan                  regs->start = TALLOC (regs->num_regs, regoff_t);
5984169695Skan                  regs->end = TALLOC (regs->num_regs, regoff_t);
5985169695Skan                  if (regs->start == NULL || regs->end == NULL)
5986169695Skan		    {
5987169695Skan		      FREE_VARIABLES ();
5988169695Skan		      return -2;
5989169695Skan		    }
5990169695Skan                  bufp->regs_allocated = REGS_REALLOCATE;
5991169695Skan                }
5992169695Skan              else if (bufp->regs_allocated == REGS_REALLOCATE)
5993169695Skan                { /* Yes.  If we need more elements than were already
5994169695Skan                     allocated, reallocate them.  If we need fewer, just
5995169695Skan                     leave it alone.  */
5996169695Skan                  if (regs->num_regs < num_regs + 1)
5997169695Skan                    {
5998169695Skan                      regs->num_regs = num_regs + 1;
5999169695Skan                      RETALLOC (regs->start, regs->num_regs, regoff_t);
6000169695Skan                      RETALLOC (regs->end, regs->num_regs, regoff_t);
6001169695Skan                      if (regs->start == NULL || regs->end == NULL)
6002169695Skan			{
6003169695Skan			  FREE_VARIABLES ();
6004169695Skan			  return -2;
6005169695Skan			}
6006169695Skan                    }
6007169695Skan                }
6008169695Skan              else
6009169695Skan		{
6010169695Skan		  /* These braces fend off a "empty body in an else-statement"
6011169695Skan		     warning under GCC when assert expands to nothing.  */
6012169695Skan		  assert (bufp->regs_allocated == REGS_FIXED);
6013169695Skan		}
6014169695Skan
6015169695Skan              /* Convert the pointer data in `regstart' and `regend' to
6016169695Skan                 indices.  Register zero has to be set differently,
6017169695Skan                 since we haven't kept track of any info for it.  */
6018169695Skan              if (regs->num_regs > 0)
6019169695Skan                {
6020169695Skan                  regs->start[0] = pos;
6021169695Skan#ifdef WCHAR
6022169695Skan		  if (MATCHING_IN_FIRST_STRING)
6023169695Skan		    regs->end[0] = mbs_offset1 != NULL ?
6024169695Skan					mbs_offset1[d-string1] : 0;
6025169695Skan		  else
6026169695Skan		    regs->end[0] = csize1 + (mbs_offset2 != NULL ?
6027169695Skan					     mbs_offset2[d-string2] : 0);
6028169695Skan#else
6029169695Skan                  regs->end[0] = (MATCHING_IN_FIRST_STRING
6030169695Skan				  ? ((regoff_t) (d - string1))
6031169695Skan			          : ((regoff_t) (d - string2 + size1)));
6032169695Skan#endif /* WCHAR */
6033169695Skan                }
6034169695Skan
6035169695Skan              /* Go through the first `min (num_regs, regs->num_regs)'
6036169695Skan                 registers, since that is all we initialized.  */
6037169695Skan	      for (mcnt = 1; (unsigned) mcnt < MIN (num_regs, regs->num_regs);
6038169695Skan		   mcnt++)
6039169695Skan		{
6040169695Skan                  if (REG_UNSET (regstart[mcnt]) || REG_UNSET (regend[mcnt]))
6041169695Skan                    regs->start[mcnt] = regs->end[mcnt] = -1;
6042169695Skan                  else
6043169695Skan                    {
6044169695Skan		      regs->start[mcnt]
6045169695Skan			= (regoff_t) POINTER_TO_OFFSET (regstart[mcnt]);
6046169695Skan                      regs->end[mcnt]
6047169695Skan			= (regoff_t) POINTER_TO_OFFSET (regend[mcnt]);
6048169695Skan                    }
6049169695Skan		}
6050169695Skan
6051169695Skan              /* If the regs structure we return has more elements than
6052169695Skan                 were in the pattern, set the extra elements to -1.  If
6053169695Skan                 we (re)allocated the registers, this is the case,
6054169695Skan                 because we always allocate enough to have at least one
6055169695Skan                 -1 at the end.  */
6056169695Skan              for (mcnt = num_regs; (unsigned) mcnt < regs->num_regs; mcnt++)
6057169695Skan                regs->start[mcnt] = regs->end[mcnt] = -1;
6058169695Skan	    } /* regs && !bufp->no_sub */
6059169695Skan
6060169695Skan          DEBUG_PRINT4 ("%u failure points pushed, %u popped (%u remain).\n",
6061169695Skan                        nfailure_points_pushed, nfailure_points_popped,
6062169695Skan                        nfailure_points_pushed - nfailure_points_popped);
6063169695Skan          DEBUG_PRINT2 ("%u registers pushed.\n", num_regs_pushed);
6064169695Skan
6065169695Skan#ifdef WCHAR
6066169695Skan	  if (MATCHING_IN_FIRST_STRING)
6067169695Skan	    mcnt = mbs_offset1 != NULL ? mbs_offset1[d-string1] : 0;
6068169695Skan	  else
6069169695Skan	    mcnt = (mbs_offset2 != NULL ? mbs_offset2[d-string2] : 0) +
6070169695Skan			csize1;
6071169695Skan          mcnt -= pos;
6072169695Skan#else
6073169695Skan          mcnt = d - pos - (MATCHING_IN_FIRST_STRING
6074169695Skan			    ? string1
6075169695Skan			    : string2 - size1);
6076169695Skan#endif /* WCHAR */
6077169695Skan
6078169695Skan          DEBUG_PRINT2 ("Returning %d from re_match_2.\n", mcnt);
6079169695Skan
6080169695Skan          FREE_VARIABLES ();
6081169695Skan          return mcnt;
6082169695Skan        }
6083169695Skan
6084169695Skan      /* Otherwise match next pattern command.  */
6085169695Skan      switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++))
6086169695Skan	{
6087169695Skan        /* Ignore these.  Used to ignore the n of succeed_n's which
6088169695Skan           currently have n == 0.  */
6089169695Skan        case no_op:
6090169695Skan          DEBUG_PRINT1 ("EXECUTING no_op.\n");
6091169695Skan          break;
6092169695Skan
6093169695Skan	case succeed:
6094169695Skan          DEBUG_PRINT1 ("EXECUTING succeed.\n");
6095169695Skan	  goto succeed_label;
6096169695Skan
6097169695Skan        /* Match the next n pattern characters exactly.  The following
6098169695Skan           byte in the pattern defines n, and the n bytes after that
6099169695Skan           are the characters to match.  */
6100169695Skan	case exactn:
6101169695Skan#ifdef MBS_SUPPORT
6102169695Skan	case exactn_bin:
6103169695Skan#endif
6104169695Skan	  mcnt = *p++;
6105169695Skan          DEBUG_PRINT2 ("EXECUTING exactn %d.\n", mcnt);
6106169695Skan
6107169695Skan          /* This is written out as an if-else so we don't waste time
6108169695Skan             testing `translate' inside the loop.  */
6109169695Skan          if (translate)
6110169695Skan	    {
6111169695Skan	      do
6112169695Skan		{
6113169695Skan		  PREFETCH ();
6114169695Skan#ifdef WCHAR
6115169695Skan		  if (*d <= 0xff)
6116169695Skan		    {
6117169695Skan		      if ((UCHAR_T) translate[(unsigned char) *d++]
6118169695Skan			  != (UCHAR_T) *p++)
6119169695Skan			goto fail;
6120169695Skan		    }
6121169695Skan		  else
6122169695Skan		    {
6123169695Skan		      if (*d++ != (CHAR_T) *p++)
6124169695Skan			goto fail;
6125169695Skan		    }
6126169695Skan#else
6127169695Skan		  if ((UCHAR_T) translate[(unsigned char) *d++]
6128169695Skan		      != (UCHAR_T) *p++)
6129169695Skan                    goto fail;
6130169695Skan#endif /* WCHAR */
6131169695Skan		}
6132169695Skan	      while (--mcnt);
6133169695Skan	    }
6134169695Skan	  else
6135169695Skan	    {
6136169695Skan	      do
6137169695Skan		{
6138169695Skan		  PREFETCH ();
6139169695Skan		  if (*d++ != (CHAR_T) *p++) goto fail;
6140169695Skan		}
6141169695Skan	      while (--mcnt);
6142169695Skan	    }
6143169695Skan	  SET_REGS_MATCHED ();
6144169695Skan          break;
6145169695Skan
6146169695Skan
6147169695Skan        /* Match any character except possibly a newline or a null.  */
6148169695Skan	case anychar:
6149169695Skan          DEBUG_PRINT1 ("EXECUTING anychar.\n");
6150169695Skan
6151169695Skan          PREFETCH ();
6152169695Skan
6153169695Skan          if ((!(bufp->syntax & RE_DOT_NEWLINE) && TRANSLATE (*d) == '\n')
6154169695Skan              || (bufp->syntax & RE_DOT_NOT_NULL && TRANSLATE (*d) == '\000'))
6155169695Skan	    goto fail;
6156169695Skan
6157169695Skan          SET_REGS_MATCHED ();
6158169695Skan          DEBUG_PRINT2 ("  Matched `%ld'.\n", (long int) *d);
6159169695Skan          d++;
6160169695Skan	  break;
6161169695Skan
6162169695Skan
6163169695Skan	case charset:
6164169695Skan	case charset_not:
6165169695Skan	  {
6166169695Skan	    register UCHAR_T c;
6167169695Skan#ifdef WCHAR
6168169695Skan	    unsigned int i, char_class_length, coll_symbol_length,
6169169695Skan              equiv_class_length, ranges_length, chars_length, length;
6170169695Skan	    CHAR_T *workp, *workp2, *charset_top;
6171169695Skan#define WORK_BUFFER_SIZE 128
6172169695Skan            CHAR_T str_buf[WORK_BUFFER_SIZE];
6173169695Skan# ifdef _LIBC
6174169695Skan	    uint32_t nrules;
6175169695Skan# endif /* _LIBC */
6176169695Skan#endif /* WCHAR */
6177169695Skan	    boolean negate = (re_opcode_t) *(p - 1) == charset_not;
6178169695Skan
6179169695Skan            DEBUG_PRINT2 ("EXECUTING charset%s.\n", negate ? "_not" : "");
6180169695Skan	    PREFETCH ();
6181169695Skan	    c = TRANSLATE (*d); /* The character to match.  */
6182169695Skan#ifdef WCHAR
6183169695Skan# ifdef _LIBC
6184169695Skan	    nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
6185169695Skan# endif /* _LIBC */
6186169695Skan	    charset_top = p - 1;
6187169695Skan	    char_class_length = *p++;
6188169695Skan	    coll_symbol_length = *p++;
6189169695Skan	    equiv_class_length = *p++;
6190169695Skan	    ranges_length = *p++;
6191169695Skan	    chars_length = *p++;
6192169695Skan	    /* p points charset[6], so the address of the next instruction
6193169695Skan	       (charset[l+m+n+2o+k+p']) equals p[l+m+n+2*o+p'],
6194169695Skan	       where l=length of char_classes, m=length of collating_symbol,
6195169695Skan	       n=equivalence_class, o=length of char_range,
6196169695Skan	       p'=length of character.  */
6197169695Skan	    workp = p;
6198169695Skan	    /* Update p to indicate the next instruction.  */
6199169695Skan	    p += char_class_length + coll_symbol_length+ equiv_class_length +
6200169695Skan              2*ranges_length + chars_length;
6201169695Skan
6202169695Skan            /* match with char_class?  */
6203169695Skan	    for (i = 0; i < char_class_length ; i += CHAR_CLASS_SIZE)
6204169695Skan	      {
6205169695Skan		wctype_t wctype;
6206169695Skan		uintptr_t alignedp = ((uintptr_t)workp
6207169695Skan				      + __alignof__(wctype_t) - 1)
6208169695Skan		  		      & ~(uintptr_t)(__alignof__(wctype_t) - 1);
6209169695Skan		wctype = *((wctype_t*)alignedp);
6210169695Skan		workp += CHAR_CLASS_SIZE;
6211169695Skan# ifdef _LIBC
6212169695Skan		if (__iswctype((wint_t)c, wctype))
6213169695Skan		  goto char_set_matched;
6214169695Skan# else
6215169695Skan		if (iswctype((wint_t)c, wctype))
6216169695Skan		  goto char_set_matched;
6217169695Skan# endif
6218169695Skan	      }
6219169695Skan
6220169695Skan            /* match with collating_symbol?  */
6221169695Skan# ifdef _LIBC
6222169695Skan	    if (nrules != 0)
6223169695Skan	      {
6224169695Skan		const unsigned char *extra = (const unsigned char *)
6225169695Skan		  _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
6226169695Skan
6227169695Skan		for (workp2 = workp + coll_symbol_length ; workp < workp2 ;
6228169695Skan		     workp++)
6229169695Skan		  {
6230169695Skan		    int32_t *wextra;
6231169695Skan		    wextra = (int32_t*)(extra + *workp++);
6232169695Skan		    for (i = 0; i < *wextra; ++i)
6233169695Skan		      if (TRANSLATE(d[i]) != wextra[1 + i])
6234169695Skan			break;
6235169695Skan
6236169695Skan		    if (i == *wextra)
6237169695Skan		      {
6238169695Skan			/* Update d, however d will be incremented at
6239169695Skan			   char_set_matched:, we decrement d here.  */
6240169695Skan			d += i - 1;
6241169695Skan			goto char_set_matched;
6242169695Skan		      }
6243169695Skan		  }
6244169695Skan	      }
6245169695Skan	    else /* (nrules == 0) */
6246169695Skan# endif
6247169695Skan	      /* If we can't look up collation data, we use wcscoll
6248169695Skan		 instead.  */
6249169695Skan	      {
6250169695Skan		for (workp2 = workp + coll_symbol_length ; workp < workp2 ;)
6251169695Skan		  {
6252169695Skan		    const CHAR_T *backup_d = d, *backup_dend = dend;
6253169695Skan# ifdef _LIBC
6254169695Skan		    length = __wcslen (workp);
6255169695Skan# else
6256169695Skan		    length = wcslen (workp);
6257169695Skan# endif
6258169695Skan
6259169695Skan		    /* If wcscoll(the collating symbol, whole string) > 0,
6260169695Skan		       any substring of the string never match with the
6261169695Skan		       collating symbol.  */
6262169695Skan# ifdef _LIBC
6263169695Skan		    if (__wcscoll (workp, d) > 0)
6264169695Skan# else
6265169695Skan		    if (wcscoll (workp, d) > 0)
6266169695Skan# endif
6267169695Skan		      {
6268169695Skan			workp += length + 1;
6269169695Skan			continue;
6270169695Skan		      }
6271169695Skan
6272169695Skan		    /* First, we compare the collating symbol with
6273169695Skan		       the first character of the string.
6274169695Skan		       If it don't match, we add the next character to
6275169695Skan		       the compare buffer in turn.  */
6276169695Skan		    for (i = 0 ; i < WORK_BUFFER_SIZE-1 ; i++, d++)
6277169695Skan		      {
6278169695Skan			int match;
6279169695Skan			if (d == dend)
6280169695Skan			  {
6281169695Skan			    if (dend == end_match_2)
6282169695Skan			      break;
6283169695Skan			    d = string2;
6284169695Skan			    dend = end_match_2;
6285169695Skan			  }
6286169695Skan
6287169695Skan			/* add next character to the compare buffer.  */
6288169695Skan			str_buf[i] = TRANSLATE(*d);
6289169695Skan			str_buf[i+1] = '\0';
6290169695Skan
6291169695Skan# ifdef _LIBC
6292169695Skan			match = __wcscoll (workp, str_buf);
6293169695Skan# else
6294169695Skan			match = wcscoll (workp, str_buf);
6295169695Skan# endif
6296169695Skan			if (match == 0)
6297169695Skan			  goto char_set_matched;
6298169695Skan
6299169695Skan			if (match < 0)
6300169695Skan			  /* (str_buf > workp) indicate (str_buf + X > workp),
6301169695Skan			     because for all X (str_buf + X > str_buf).
6302169695Skan			     So we don't need continue this loop.  */
6303169695Skan			  break;
6304169695Skan
6305169695Skan			/* Otherwise(str_buf < workp),
6306169695Skan			   (str_buf+next_character) may equals (workp).
6307169695Skan			   So we continue this loop.  */
6308169695Skan		      }
6309169695Skan		    /* not matched */
6310169695Skan		    d = backup_d;
6311169695Skan		    dend = backup_dend;
6312169695Skan		    workp += length + 1;
6313169695Skan		  }
6314169695Skan              }
6315169695Skan            /* match with equivalence_class?  */
6316169695Skan# ifdef _LIBC
6317169695Skan	    if (nrules != 0)
6318169695Skan	      {
6319169695Skan                const CHAR_T *backup_d = d, *backup_dend = dend;
6320169695Skan		/* Try to match the equivalence class against
6321169695Skan		   those known to the collate implementation.  */
6322169695Skan		const int32_t *table;
6323169695Skan		const int32_t *weights;
6324169695Skan		const int32_t *extra;
6325169695Skan		const int32_t *indirect;
6326169695Skan		int32_t idx, idx2;
6327169695Skan		wint_t *cp;
6328169695Skan		size_t len;
6329169695Skan
6330169695Skan		/* This #include defines a local function!  */
6331169695Skan#  include <locale/weightwc.h>
6332169695Skan
6333169695Skan		table = (const int32_t *)
6334169695Skan		  _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEWC);
6335169695Skan		weights = (const wint_t *)
6336169695Skan		  _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTWC);
6337169695Skan		extra = (const wint_t *)
6338169695Skan		  _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAWC);
6339169695Skan		indirect = (const int32_t *)
6340169695Skan		  _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTWC);
6341169695Skan
6342169695Skan		/* Write 1 collating element to str_buf, and
6343169695Skan		   get its index.  */
6344169695Skan		idx2 = 0;
6345169695Skan
6346169695Skan		for (i = 0 ; idx2 == 0 && i < WORK_BUFFER_SIZE - 1; i++)
6347169695Skan		  {
6348169695Skan		    cp = (wint_t*)str_buf;
6349169695Skan		    if (d == dend)
6350169695Skan		      {
6351169695Skan			if (dend == end_match_2)
6352169695Skan			  break;
6353169695Skan			d = string2;
6354169695Skan			dend = end_match_2;
6355169695Skan		      }
6356169695Skan		    str_buf[i] = TRANSLATE(*(d+i));
6357169695Skan		    str_buf[i+1] = '\0'; /* sentinel */
6358169695Skan		    idx2 = findidx ((const wint_t**)&cp);
6359169695Skan		  }
6360169695Skan
6361169695Skan		/* Update d, however d will be incremented at
6362169695Skan		   char_set_matched:, we decrement d here.  */
6363169695Skan		d = backup_d + ((wchar_t*)cp - (wchar_t*)str_buf - 1);
6364169695Skan		if (d >= dend)
6365169695Skan		  {
6366169695Skan		    if (dend == end_match_2)
6367169695Skan			d = dend;
6368169695Skan		    else
6369169695Skan		      {
6370169695Skan			d = string2;
6371169695Skan			dend = end_match_2;
6372169695Skan		      }
6373169695Skan		  }
6374169695Skan
6375169695Skan		len = weights[idx2];
6376169695Skan
6377169695Skan		for (workp2 = workp + equiv_class_length ; workp < workp2 ;
6378169695Skan		     workp++)
6379169695Skan		  {
6380169695Skan		    idx = (int32_t)*workp;
6381169695Skan		    /* We already checked idx != 0 in regex_compile. */
6382169695Skan
6383169695Skan		    if (idx2 != 0 && len == weights[idx])
6384169695Skan		      {
6385169695Skan			int cnt = 0;
6386169695Skan			while (cnt < len && (weights[idx + 1 + cnt]
6387169695Skan					     == weights[idx2 + 1 + cnt]))
6388169695Skan			  ++cnt;
6389169695Skan
6390169695Skan			if (cnt == len)
6391169695Skan			  goto char_set_matched;
6392169695Skan		      }
6393169695Skan		  }
6394169695Skan		/* not matched */
6395169695Skan                d = backup_d;
6396169695Skan                dend = backup_dend;
6397169695Skan	      }
6398169695Skan	    else /* (nrules == 0) */
6399169695Skan# endif
6400169695Skan	      /* If we can't look up collation data, we use wcscoll
6401169695Skan		 instead.  */
6402169695Skan	      {
6403169695Skan		for (workp2 = workp + equiv_class_length ; workp < workp2 ;)
6404169695Skan		  {
6405169695Skan		    const CHAR_T *backup_d = d, *backup_dend = dend;
6406169695Skan# ifdef _LIBC
6407169695Skan		    length = __wcslen (workp);
6408169695Skan# else
6409169695Skan		    length = wcslen (workp);
6410169695Skan# endif
6411169695Skan
6412169695Skan		    /* If wcscoll(the collating symbol, whole string) > 0,
6413169695Skan		       any substring of the string never match with the
6414169695Skan		       collating symbol.  */
6415169695Skan# ifdef _LIBC
6416169695Skan		    if (__wcscoll (workp, d) > 0)
6417169695Skan# else
6418169695Skan		    if (wcscoll (workp, d) > 0)
6419169695Skan# endif
6420169695Skan		      {
6421169695Skan			workp += length + 1;
6422169695Skan			break;
6423169695Skan		      }
6424169695Skan
6425169695Skan		    /* First, we compare the equivalence class with
6426169695Skan		       the first character of the string.
6427169695Skan		       If it don't match, we add the next character to
6428169695Skan		       the compare buffer in turn.  */
6429169695Skan		    for (i = 0 ; i < WORK_BUFFER_SIZE - 1 ; i++, d++)
6430169695Skan		      {
6431169695Skan			int match;
6432169695Skan			if (d == dend)
6433169695Skan			  {
6434169695Skan			    if (dend == end_match_2)
6435169695Skan			      break;
6436169695Skan			    d = string2;
6437169695Skan			    dend = end_match_2;
6438169695Skan			  }
6439169695Skan
6440169695Skan			/* add next character to the compare buffer.  */
6441169695Skan			str_buf[i] = TRANSLATE(*d);
6442169695Skan			str_buf[i+1] = '\0';
6443169695Skan
6444169695Skan# ifdef _LIBC
6445169695Skan			match = __wcscoll (workp, str_buf);
6446169695Skan# else
6447169695Skan			match = wcscoll (workp, str_buf);
6448169695Skan# endif
6449169695Skan
6450169695Skan			if (match == 0)
6451169695Skan			  goto char_set_matched;
6452169695Skan
6453169695Skan			if (match < 0)
6454169695Skan			/* (str_buf > workp) indicate (str_buf + X > workp),
6455169695Skan			   because for all X (str_buf + X > str_buf).
6456169695Skan			   So we don't need continue this loop.  */
6457169695Skan			  break;
6458169695Skan
6459169695Skan			/* Otherwise(str_buf < workp),
6460169695Skan			   (str_buf+next_character) may equals (workp).
6461169695Skan			   So we continue this loop.  */
6462169695Skan		      }
6463169695Skan		    /* not matched */
6464169695Skan		    d = backup_d;
6465169695Skan		    dend = backup_dend;
6466169695Skan		    workp += length + 1;
6467169695Skan		  }
6468169695Skan	      }
6469169695Skan
6470169695Skan            /* match with char_range?  */
6471169695Skan# ifdef _LIBC
6472169695Skan	    if (nrules != 0)
6473169695Skan	      {
6474169695Skan		uint32_t collseqval;
6475169695Skan		const char *collseq = (const char *)
6476169695Skan		  _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQWC);
6477169695Skan
6478169695Skan		collseqval = collseq_table_lookup (collseq, c);
6479169695Skan
6480169695Skan		for (; workp < p - chars_length ;)
6481169695Skan		  {
6482169695Skan		    uint32_t start_val, end_val;
6483169695Skan
6484169695Skan		    /* We already compute the collation sequence value
6485169695Skan		       of the characters (or collating symbols).  */
6486169695Skan		    start_val = (uint32_t) *workp++; /* range_start */
6487169695Skan		    end_val = (uint32_t) *workp++; /* range_end */
6488169695Skan
6489169695Skan		    if (start_val <= collseqval && collseqval <= end_val)
6490169695Skan		      goto char_set_matched;
6491169695Skan		  }
6492169695Skan	      }
6493169695Skan	    else
6494169695Skan# endif
6495169695Skan	      {
6496169695Skan		/* We set range_start_char at str_buf[0], range_end_char
6497169695Skan		   at str_buf[4], and compared char at str_buf[2].  */
6498169695Skan		str_buf[1] = 0;
6499169695Skan		str_buf[2] = c;
6500169695Skan		str_buf[3] = 0;
6501169695Skan		str_buf[5] = 0;
6502169695Skan		for (; workp < p - chars_length ;)
6503169695Skan		  {
6504169695Skan		    wchar_t *range_start_char, *range_end_char;
6505169695Skan
6506169695Skan		    /* match if (range_start_char <= c <= range_end_char).  */
6507169695Skan
6508169695Skan		    /* If range_start(or end) < 0, we assume -range_start(end)
6509169695Skan		       is the offset of the collating symbol which is specified
6510169695Skan		       as the character of the range start(end).  */
6511169695Skan
6512169695Skan		    /* range_start */
6513169695Skan		    if (*workp < 0)
6514169695Skan		      range_start_char = charset_top - (*workp++);
6515169695Skan		    else
6516169695Skan		      {
6517169695Skan			str_buf[0] = *workp++;
6518169695Skan			range_start_char = str_buf;
6519169695Skan		      }
6520169695Skan
6521169695Skan		    /* range_end */
6522169695Skan		    if (*workp < 0)
6523169695Skan		      range_end_char = charset_top - (*workp++);
6524169695Skan		    else
6525169695Skan		      {
6526169695Skan			str_buf[4] = *workp++;
6527169695Skan			range_end_char = str_buf + 4;
6528169695Skan		      }
6529169695Skan
6530169695Skan# ifdef _LIBC
6531169695Skan		    if (__wcscoll (range_start_char, str_buf+2) <= 0
6532169695Skan			&& __wcscoll (str_buf+2, range_end_char) <= 0)
6533169695Skan# else
6534169695Skan		    if (wcscoll (range_start_char, str_buf+2) <= 0
6535169695Skan			&& wcscoll (str_buf+2, range_end_char) <= 0)
6536169695Skan# endif
6537169695Skan		      goto char_set_matched;
6538169695Skan		  }
6539169695Skan	      }
6540169695Skan
6541169695Skan            /* match with char?  */
6542169695Skan	    for (; workp < p ; workp++)
6543169695Skan	      if (c == *workp)
6544169695Skan		goto char_set_matched;
6545169695Skan
6546169695Skan	    negate = !negate;
6547169695Skan
6548169695Skan	  char_set_matched:
6549169695Skan	    if (negate) goto fail;
6550169695Skan#else
6551169695Skan            /* Cast to `unsigned' instead of `unsigned char' in case the
6552169695Skan               bit list is a full 32 bytes long.  */
6553169695Skan	    if (c < (unsigned) (*p * BYTEWIDTH)
6554169695Skan		&& p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
6555169695Skan	      negate = !negate;
6556169695Skan
6557169695Skan	    p += 1 + *p;
6558169695Skan
6559169695Skan	    if (!negate) goto fail;
6560169695Skan#undef WORK_BUFFER_SIZE
6561169695Skan#endif /* WCHAR */
6562169695Skan	    SET_REGS_MATCHED ();
6563169695Skan            d++;
6564169695Skan	    break;
6565169695Skan	  }
6566169695Skan
6567169695Skan
6568169695Skan        /* The beginning of a group is represented by start_memory.
6569169695Skan           The arguments are the register number in the next byte, and the
6570169695Skan           number of groups inner to this one in the next.  The text
6571169695Skan           matched within the group is recorded (in the internal
6572169695Skan           registers data structure) under the register number.  */
6573169695Skan        case start_memory:
6574169695Skan	  DEBUG_PRINT3 ("EXECUTING start_memory %ld (%ld):\n",
6575169695Skan			(long int) *p, (long int) p[1]);
6576169695Skan
6577169695Skan          /* Find out if this group can match the empty string.  */
6578169695Skan	  p1 = p;		/* To send to group_match_null_string_p.  */
6579169695Skan
6580169695Skan          if (REG_MATCH_NULL_STRING_P (reg_info[*p]) == MATCH_NULL_UNSET_VALUE)
6581169695Skan            REG_MATCH_NULL_STRING_P (reg_info[*p])
6582169695Skan              = PREFIX(group_match_null_string_p) (&p1, pend, reg_info);
6583169695Skan
6584169695Skan          /* Save the position in the string where we were the last time
6585169695Skan             we were at this open-group operator in case the group is
6586169695Skan             operated upon by a repetition operator, e.g., with `(a*)*b'
6587169695Skan             against `ab'; then we want to ignore where we are now in
6588169695Skan             the string in case this attempt to match fails.  */
6589169695Skan          old_regstart[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p])
6590169695Skan                             ? REG_UNSET (regstart[*p]) ? d : regstart[*p]
6591169695Skan                             : regstart[*p];
6592169695Skan	  DEBUG_PRINT2 ("  old_regstart: %d\n",
6593169695Skan			 POINTER_TO_OFFSET (old_regstart[*p]));
6594169695Skan
6595169695Skan          regstart[*p] = d;
6596169695Skan	  DEBUG_PRINT2 ("  regstart: %d\n", POINTER_TO_OFFSET (regstart[*p]));
6597169695Skan
6598169695Skan          IS_ACTIVE (reg_info[*p]) = 1;
6599169695Skan          MATCHED_SOMETHING (reg_info[*p]) = 0;
6600169695Skan
6601169695Skan	  /* Clear this whenever we change the register activity status.  */
6602169695Skan	  set_regs_matched_done = 0;
6603169695Skan
6604169695Skan          /* This is the new highest active register.  */
6605169695Skan          highest_active_reg = *p;
6606169695Skan
6607169695Skan          /* If nothing was active before, this is the new lowest active
6608169695Skan             register.  */
6609169695Skan          if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
6610169695Skan            lowest_active_reg = *p;
6611169695Skan
6612169695Skan          /* Move past the register number and inner group count.  */
6613169695Skan          p += 2;
6614169695Skan	  just_past_start_mem = p;
6615169695Skan
6616169695Skan          break;
6617169695Skan
6618169695Skan
6619169695Skan        /* The stop_memory opcode represents the end of a group.  Its
6620169695Skan           arguments are the same as start_memory's: the register
6621169695Skan           number, and the number of inner groups.  */
6622169695Skan	case stop_memory:
6623169695Skan	  DEBUG_PRINT3 ("EXECUTING stop_memory %ld (%ld):\n",
6624169695Skan			(long int) *p, (long int) p[1]);
6625169695Skan
6626169695Skan          /* We need to save the string position the last time we were at
6627169695Skan             this close-group operator in case the group is operated
6628169695Skan             upon by a repetition operator, e.g., with `((a*)*(b*)*)*'
6629169695Skan             against `aba'; then we want to ignore where we are now in
6630169695Skan             the string in case this attempt to match fails.  */
6631169695Skan          old_regend[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p])
6632169695Skan                           ? REG_UNSET (regend[*p]) ? d : regend[*p]
6633169695Skan			   : regend[*p];
6634169695Skan	  DEBUG_PRINT2 ("      old_regend: %d\n",
6635169695Skan			 POINTER_TO_OFFSET (old_regend[*p]));
6636169695Skan
6637169695Skan          regend[*p] = d;
6638169695Skan	  DEBUG_PRINT2 ("      regend: %d\n", POINTER_TO_OFFSET (regend[*p]));
6639169695Skan
6640169695Skan          /* This register isn't active anymore.  */
6641169695Skan          IS_ACTIVE (reg_info[*p]) = 0;
6642169695Skan
6643169695Skan	  /* Clear this whenever we change the register activity status.  */
6644169695Skan	  set_regs_matched_done = 0;
6645169695Skan
6646169695Skan          /* If this was the only register active, nothing is active
6647169695Skan             anymore.  */
6648169695Skan          if (lowest_active_reg == highest_active_reg)
6649169695Skan            {
6650169695Skan              lowest_active_reg = NO_LOWEST_ACTIVE_REG;
6651169695Skan              highest_active_reg = NO_HIGHEST_ACTIVE_REG;
6652169695Skan            }
6653169695Skan          else
6654169695Skan            { /* We must scan for the new highest active register, since
6655169695Skan                 it isn't necessarily one less than now: consider
6656169695Skan                 (a(b)c(d(e)f)g).  When group 3 ends, after the f), the
6657169695Skan                 new highest active register is 1.  */
6658169695Skan              UCHAR_T r = *p - 1;
6659169695Skan              while (r > 0 && !IS_ACTIVE (reg_info[r]))
6660169695Skan                r--;
6661169695Skan
6662169695Skan              /* If we end up at register zero, that means that we saved
6663169695Skan                 the registers as the result of an `on_failure_jump', not
6664169695Skan                 a `start_memory', and we jumped to past the innermost
6665169695Skan                 `stop_memory'.  For example, in ((.)*) we save
6666169695Skan                 registers 1 and 2 as a result of the *, but when we pop
6667169695Skan                 back to the second ), we are at the stop_memory 1.
6668169695Skan                 Thus, nothing is active.  */
6669169695Skan	      if (r == 0)
6670169695Skan                {
6671169695Skan                  lowest_active_reg = NO_LOWEST_ACTIVE_REG;
6672169695Skan                  highest_active_reg = NO_HIGHEST_ACTIVE_REG;
6673169695Skan                }
6674169695Skan              else
6675169695Skan                highest_active_reg = r;
6676169695Skan            }
6677169695Skan
6678169695Skan          /* If just failed to match something this time around with a
6679169695Skan             group that's operated on by a repetition operator, try to
6680169695Skan             force exit from the ``loop'', and restore the register
6681169695Skan             information for this group that we had before trying this
6682169695Skan             last match.  */
6683169695Skan          if ((!MATCHED_SOMETHING (reg_info[*p])
6684169695Skan               || just_past_start_mem == p - 1)
6685169695Skan	      && (p + 2) < pend)
6686169695Skan            {
6687169695Skan              boolean is_a_jump_n = false;
6688169695Skan
6689169695Skan              p1 = p + 2;
6690169695Skan              mcnt = 0;
6691169695Skan              switch ((re_opcode_t) *p1++)
6692169695Skan                {
6693169695Skan                  case jump_n:
6694169695Skan		    is_a_jump_n = true;
6695169695Skan                  case pop_failure_jump:
6696169695Skan		  case maybe_pop_jump:
6697169695Skan		  case jump:
6698169695Skan		  case dummy_failure_jump:
6699169695Skan                    EXTRACT_NUMBER_AND_INCR (mcnt, p1);
6700169695Skan		    if (is_a_jump_n)
6701169695Skan		      p1 += OFFSET_ADDRESS_SIZE;
6702169695Skan                    break;
6703169695Skan
6704169695Skan                  default:
6705169695Skan                    /* do nothing */ ;
6706169695Skan                }
6707169695Skan	      p1 += mcnt;
6708169695Skan
6709169695Skan              /* If the next operation is a jump backwards in the pattern
6710169695Skan	         to an on_failure_jump right before the start_memory
6711169695Skan                 corresponding to this stop_memory, exit from the loop
6712169695Skan                 by forcing a failure after pushing on the stack the
6713169695Skan                 on_failure_jump's jump in the pattern, and d.  */
6714169695Skan              if (mcnt < 0 && (re_opcode_t) *p1 == on_failure_jump
6715169695Skan                  && (re_opcode_t) p1[1+OFFSET_ADDRESS_SIZE] == start_memory
6716169695Skan		  && p1[2+OFFSET_ADDRESS_SIZE] == *p)
6717169695Skan		{
6718169695Skan                  /* If this group ever matched anything, then restore
6719169695Skan                     what its registers were before trying this last
6720169695Skan                     failed match, e.g., with `(a*)*b' against `ab' for
6721169695Skan                     regstart[1], and, e.g., with `((a*)*(b*)*)*'
6722169695Skan                     against `aba' for regend[3].
6723169695Skan
6724169695Skan                     Also restore the registers for inner groups for,
6725169695Skan                     e.g., `((a*)(b*))*' against `aba' (register 3 would
6726169695Skan                     otherwise get trashed).  */
6727169695Skan
6728169695Skan                  if (EVER_MATCHED_SOMETHING (reg_info[*p]))
6729169695Skan		    {
6730169695Skan		      unsigned r;
6731169695Skan
6732169695Skan                      EVER_MATCHED_SOMETHING (reg_info[*p]) = 0;
6733169695Skan
6734169695Skan		      /* Restore this and inner groups' (if any) registers.  */
6735169695Skan                      for (r = *p; r < (unsigned) *p + (unsigned) *(p + 1);
6736169695Skan			   r++)
6737169695Skan                        {
6738169695Skan                          regstart[r] = old_regstart[r];
6739169695Skan
6740169695Skan                          /* xx why this test?  */
6741169695Skan                          if (old_regend[r] >= regstart[r])
6742169695Skan                            regend[r] = old_regend[r];
6743169695Skan                        }
6744169695Skan                    }
6745169695Skan		  p1++;
6746169695Skan                  EXTRACT_NUMBER_AND_INCR (mcnt, p1);
6747169695Skan                  PUSH_FAILURE_POINT (p1 + mcnt, d, -2);
6748169695Skan
6749169695Skan                  goto fail;
6750169695Skan                }
6751169695Skan            }
6752169695Skan
6753169695Skan          /* Move past the register number and the inner group count.  */
6754169695Skan          p += 2;
6755169695Skan          break;
6756169695Skan
6757169695Skan
6758169695Skan	/* \<digit> has been turned into a `duplicate' command which is
6759169695Skan           followed by the numeric value of <digit> as the register number.  */
6760169695Skan        case duplicate:
6761169695Skan	  {
6762169695Skan	    register const CHAR_T *d2, *dend2;
6763169695Skan	    int regno = *p++;   /* Get which register to match against.  */
6764169695Skan	    DEBUG_PRINT2 ("EXECUTING duplicate %d.\n", regno);
6765169695Skan
6766169695Skan	    /* Can't back reference a group which we've never matched.  */
6767169695Skan            if (REG_UNSET (regstart[regno]) || REG_UNSET (regend[regno]))
6768169695Skan              goto fail;
6769169695Skan
6770169695Skan            /* Where in input to try to start matching.  */
6771169695Skan            d2 = regstart[regno];
6772169695Skan
6773169695Skan            /* Where to stop matching; if both the place to start and
6774169695Skan               the place to stop matching are in the same string, then
6775169695Skan               set to the place to stop, otherwise, for now have to use
6776169695Skan               the end of the first string.  */
6777169695Skan
6778169695Skan            dend2 = ((FIRST_STRING_P (regstart[regno])
6779169695Skan		      == FIRST_STRING_P (regend[regno]))
6780169695Skan		     ? regend[regno] : end_match_1);
6781169695Skan	    for (;;)
6782169695Skan	      {
6783169695Skan		/* If necessary, advance to next segment in register
6784169695Skan                   contents.  */
6785169695Skan		while (d2 == dend2)
6786169695Skan		  {
6787169695Skan		    if (dend2 == end_match_2) break;
6788169695Skan		    if (dend2 == regend[regno]) break;
6789169695Skan
6790169695Skan                    /* End of string1 => advance to string2. */
6791169695Skan                    d2 = string2;
6792169695Skan                    dend2 = regend[regno];
6793169695Skan		  }
6794169695Skan		/* At end of register contents => success */
6795169695Skan		if (d2 == dend2) break;
6796169695Skan
6797169695Skan		/* If necessary, advance to next segment in data.  */
6798169695Skan		PREFETCH ();
6799169695Skan
6800169695Skan		/* How many characters left in this segment to match.  */
6801169695Skan		mcnt = dend - d;
6802169695Skan
6803169695Skan		/* Want how many consecutive characters we can match in
6804169695Skan                   one shot, so, if necessary, adjust the count.  */
6805169695Skan                if (mcnt > dend2 - d2)
6806169695Skan		  mcnt = dend2 - d2;
6807169695Skan
6808169695Skan		/* Compare that many; failure if mismatch, else move
6809169695Skan                   past them.  */
6810169695Skan		if (translate
6811169695Skan                    ? PREFIX(bcmp_translate) (d, d2, mcnt, translate)
6812169695Skan                    : memcmp (d, d2, mcnt*sizeof(UCHAR_T)))
6813169695Skan		  goto fail;
6814169695Skan		d += mcnt, d2 += mcnt;
6815169695Skan
6816169695Skan		/* Do this because we've match some characters.  */
6817169695Skan		SET_REGS_MATCHED ();
6818169695Skan	      }
6819169695Skan	  }
6820169695Skan	  break;
6821169695Skan
6822169695Skan
6823169695Skan        /* begline matches the empty string at the beginning of the string
6824169695Skan           (unless `not_bol' is set in `bufp'), and, if
6825169695Skan           `newline_anchor' is set, after newlines.  */
6826169695Skan	case begline:
6827169695Skan          DEBUG_PRINT1 ("EXECUTING begline.\n");
6828169695Skan
6829169695Skan          if (AT_STRINGS_BEG (d))
6830169695Skan            {
6831169695Skan              if (!bufp->not_bol) break;
6832169695Skan            }
6833169695Skan          else if (d[-1] == '\n' && bufp->newline_anchor)
6834169695Skan            {
6835169695Skan              break;
6836169695Skan            }
6837169695Skan          /* In all other cases, we fail.  */
6838169695Skan          goto fail;
6839169695Skan
6840169695Skan
6841169695Skan        /* endline is the dual of begline.  */
6842169695Skan	case endline:
6843169695Skan          DEBUG_PRINT1 ("EXECUTING endline.\n");
6844169695Skan
6845169695Skan          if (AT_STRINGS_END (d))
6846169695Skan            {
6847169695Skan              if (!bufp->not_eol) break;
6848169695Skan            }
6849169695Skan
6850169695Skan          /* We have to ``prefetch'' the next character.  */
6851169695Skan          else if ((d == end1 ? *string2 : *d) == '\n'
6852169695Skan                   && bufp->newline_anchor)
6853169695Skan            {
6854169695Skan              break;
6855169695Skan            }
6856169695Skan          goto fail;
6857169695Skan
6858169695Skan
6859169695Skan	/* Match at the very beginning of the data.  */
6860169695Skan        case begbuf:
6861169695Skan          DEBUG_PRINT1 ("EXECUTING begbuf.\n");
6862169695Skan          if (AT_STRINGS_BEG (d))
6863169695Skan            break;
6864169695Skan          goto fail;
6865169695Skan
6866169695Skan
6867169695Skan	/* Match at the very end of the data.  */
6868169695Skan        case endbuf:
6869169695Skan          DEBUG_PRINT1 ("EXECUTING endbuf.\n");
6870169695Skan	  if (AT_STRINGS_END (d))
6871169695Skan	    break;
6872169695Skan          goto fail;
6873169695Skan
6874169695Skan
6875169695Skan        /* on_failure_keep_string_jump is used to optimize `.*\n'.  It
6876169695Skan           pushes NULL as the value for the string on the stack.  Then
6877169695Skan           `pop_failure_point' will keep the current value for the
6878169695Skan           string, instead of restoring it.  To see why, consider
6879169695Skan           matching `foo\nbar' against `.*\n'.  The .* matches the foo;
6880169695Skan           then the . fails against the \n.  But the next thing we want
6881169695Skan           to do is match the \n against the \n; if we restored the
6882169695Skan           string value, we would be back at the foo.
6883169695Skan
6884169695Skan           Because this is used only in specific cases, we don't need to
6885169695Skan           check all the things that `on_failure_jump' does, to make
6886169695Skan           sure the right things get saved on the stack.  Hence we don't
6887169695Skan           share its code.  The only reason to push anything on the
6888169695Skan           stack at all is that otherwise we would have to change
6889169695Skan           `anychar's code to do something besides goto fail in this
6890169695Skan           case; that seems worse than this.  */
6891169695Skan        case on_failure_keep_string_jump:
6892169695Skan          DEBUG_PRINT1 ("EXECUTING on_failure_keep_string_jump");
6893169695Skan
6894169695Skan          EXTRACT_NUMBER_AND_INCR (mcnt, p);
6895169695Skan#ifdef _LIBC
6896169695Skan          DEBUG_PRINT3 (" %d (to %p):\n", mcnt, p + mcnt);
6897169695Skan#else
6898169695Skan          DEBUG_PRINT3 (" %d (to 0x%x):\n", mcnt, p + mcnt);
6899169695Skan#endif
6900169695Skan
6901169695Skan          PUSH_FAILURE_POINT (p + mcnt, NULL, -2);
6902169695Skan          break;
6903169695Skan
6904169695Skan
6905169695Skan	/* Uses of on_failure_jump:
6906169695Skan
6907169695Skan           Each alternative starts with an on_failure_jump that points
6908169695Skan           to the beginning of the next alternative.  Each alternative
6909169695Skan           except the last ends with a jump that in effect jumps past
6910169695Skan           the rest of the alternatives.  (They really jump to the
6911169695Skan           ending jump of the following alternative, because tensioning
6912169695Skan           these jumps is a hassle.)
6913169695Skan
6914169695Skan           Repeats start with an on_failure_jump that points past both
6915169695Skan           the repetition text and either the following jump or
6916169695Skan           pop_failure_jump back to this on_failure_jump.  */
6917169695Skan	case on_failure_jump:
6918169695Skan        on_failure:
6919169695Skan          DEBUG_PRINT1 ("EXECUTING on_failure_jump");
6920169695Skan
6921169695Skan          EXTRACT_NUMBER_AND_INCR (mcnt, p);
6922169695Skan#ifdef _LIBC
6923169695Skan          DEBUG_PRINT3 (" %d (to %p)", mcnt, p + mcnt);
6924169695Skan#else
6925169695Skan          DEBUG_PRINT3 (" %d (to 0x%x)", mcnt, p + mcnt);
6926169695Skan#endif
6927169695Skan
6928169695Skan          /* If this on_failure_jump comes right before a group (i.e.,
6929169695Skan             the original * applied to a group), save the information
6930169695Skan             for that group and all inner ones, so that if we fail back
6931169695Skan             to this point, the group's information will be correct.
6932169695Skan             For example, in \(a*\)*\1, we need the preceding group,
6933169695Skan             and in \(zz\(a*\)b*\)\2, we need the inner group.  */
6934169695Skan
6935169695Skan          /* We can't use `p' to check ahead because we push
6936169695Skan             a failure point to `p + mcnt' after we do this.  */
6937169695Skan          p1 = p;
6938169695Skan
6939169695Skan          /* We need to skip no_op's before we look for the
6940169695Skan             start_memory in case this on_failure_jump is happening as
6941169695Skan             the result of a completed succeed_n, as in \(a\)\{1,3\}b\1
6942169695Skan             against aba.  */
6943169695Skan          while (p1 < pend && (re_opcode_t) *p1 == no_op)
6944169695Skan            p1++;
6945169695Skan
6946169695Skan          if (p1 < pend && (re_opcode_t) *p1 == start_memory)
6947169695Skan            {
6948169695Skan              /* We have a new highest active register now.  This will
6949169695Skan                 get reset at the start_memory we are about to get to,
6950169695Skan                 but we will have saved all the registers relevant to
6951169695Skan                 this repetition op, as described above.  */
6952169695Skan              highest_active_reg = *(p1 + 1) + *(p1 + 2);
6953169695Skan              if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
6954169695Skan                lowest_active_reg = *(p1 + 1);
6955169695Skan            }
6956169695Skan
6957169695Skan          DEBUG_PRINT1 (":\n");
6958169695Skan          PUSH_FAILURE_POINT (p + mcnt, d, -2);
6959169695Skan          break;
6960169695Skan
6961169695Skan
6962169695Skan        /* A smart repeat ends with `maybe_pop_jump'.
6963169695Skan	   We change it to either `pop_failure_jump' or `jump'.  */
6964169695Skan        case maybe_pop_jump:
6965169695Skan          EXTRACT_NUMBER_AND_INCR (mcnt, p);
6966169695Skan          DEBUG_PRINT2 ("EXECUTING maybe_pop_jump %d.\n", mcnt);
6967169695Skan          {
6968169695Skan	    register UCHAR_T *p2 = p;
6969169695Skan
6970169695Skan            /* Compare the beginning of the repeat with what in the
6971169695Skan               pattern follows its end. If we can establish that there
6972169695Skan               is nothing that they would both match, i.e., that we
6973169695Skan               would have to backtrack because of (as in, e.g., `a*a')
6974169695Skan               then we can change to pop_failure_jump, because we'll
6975169695Skan               never have to backtrack.
6976169695Skan
6977169695Skan               This is not true in the case of alternatives: in
6978169695Skan               `(a|ab)*' we do need to backtrack to the `ab' alternative
6979169695Skan               (e.g., if the string was `ab').  But instead of trying to
6980169695Skan               detect that here, the alternative has put on a dummy
6981169695Skan               failure point which is what we will end up popping.  */
6982169695Skan
6983169695Skan	    /* Skip over open/close-group commands.
6984169695Skan	       If what follows this loop is a ...+ construct,
6985169695Skan	       look at what begins its body, since we will have to
6986169695Skan	       match at least one of that.  */
6987169695Skan	    while (1)
6988169695Skan	      {
6989169695Skan		if (p2 + 2 < pend
6990169695Skan		    && ((re_opcode_t) *p2 == stop_memory
6991169695Skan			|| (re_opcode_t) *p2 == start_memory))
6992169695Skan		  p2 += 3;
6993169695Skan		else if (p2 + 2 + 2 * OFFSET_ADDRESS_SIZE < pend
6994169695Skan			 && (re_opcode_t) *p2 == dummy_failure_jump)
6995169695Skan		  p2 += 2 + 2 * OFFSET_ADDRESS_SIZE;
6996169695Skan		else
6997169695Skan		  break;
6998169695Skan	      }
6999169695Skan
7000169695Skan	    p1 = p + mcnt;
7001169695Skan	    /* p1[0] ... p1[2] are the `on_failure_jump' corresponding
7002169695Skan	       to the `maybe_finalize_jump' of this case.  Examine what
7003169695Skan	       follows.  */
7004169695Skan
7005169695Skan            /* If we're at the end of the pattern, we can change.  */
7006169695Skan            if (p2 == pend)
7007169695Skan	      {
7008169695Skan		/* Consider what happens when matching ":\(.*\)"
7009169695Skan		   against ":/".  I don't really understand this code
7010169695Skan		   yet.  */
7011169695Skan  	        p[-(1+OFFSET_ADDRESS_SIZE)] = (UCHAR_T)
7012169695Skan		  pop_failure_jump;
7013169695Skan                DEBUG_PRINT1
7014169695Skan                  ("  End of pattern: change to `pop_failure_jump'.\n");
7015169695Skan              }
7016169695Skan
7017169695Skan            else if ((re_opcode_t) *p2 == exactn
7018169695Skan#ifdef MBS_SUPPORT
7019169695Skan		     || (re_opcode_t) *p2 == exactn_bin
7020169695Skan#endif
7021169695Skan		     || (bufp->newline_anchor && (re_opcode_t) *p2 == endline))
7022169695Skan	      {
7023169695Skan		register UCHAR_T c
7024169695Skan                  = *p2 == (UCHAR_T) endline ? '\n' : p2[2];
7025169695Skan
7026169695Skan                if (((re_opcode_t) p1[1+OFFSET_ADDRESS_SIZE] == exactn
7027169695Skan#ifdef MBS_SUPPORT
7028169695Skan		     || (re_opcode_t) p1[1+OFFSET_ADDRESS_SIZE] == exactn_bin
7029169695Skan#endif
7030169695Skan		    ) && p1[3+OFFSET_ADDRESS_SIZE] != c)
7031169695Skan                  {
7032169695Skan  		    p[-(1+OFFSET_ADDRESS_SIZE)] = (UCHAR_T)
7033169695Skan		      pop_failure_jump;
7034169695Skan#ifdef WCHAR
7035169695Skan		      DEBUG_PRINT3 ("  %C != %C => pop_failure_jump.\n",
7036169695Skan				    (wint_t) c,
7037169695Skan				    (wint_t) p1[3+OFFSET_ADDRESS_SIZE]);
7038169695Skan#else
7039169695Skan		      DEBUG_PRINT3 ("  %c != %c => pop_failure_jump.\n",
7040169695Skan				    (char) c,
7041169695Skan				    (char) p1[3+OFFSET_ADDRESS_SIZE]);
7042169695Skan#endif
7043169695Skan                  }
7044169695Skan
7045169695Skan#ifndef WCHAR
7046169695Skan		else if ((re_opcode_t) p1[3] == charset
7047169695Skan			 || (re_opcode_t) p1[3] == charset_not)
7048169695Skan		  {
7049169695Skan		    int negate = (re_opcode_t) p1[3] == charset_not;
7050169695Skan
7051169695Skan		    if (c < (unsigned) (p1[4] * BYTEWIDTH)
7052169695Skan			&& p1[5 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
7053169695Skan		      negate = !negate;
7054169695Skan
7055169695Skan                    /* `negate' is equal to 1 if c would match, which means
7056169695Skan                        that we can't change to pop_failure_jump.  */
7057169695Skan		    if (!negate)
7058169695Skan                      {
7059169695Skan  		        p[-3] = (unsigned char) pop_failure_jump;
7060169695Skan                        DEBUG_PRINT1 ("  No match => pop_failure_jump.\n");
7061169695Skan                      }
7062169695Skan		  }
7063169695Skan#endif /* not WCHAR */
7064169695Skan	      }
7065169695Skan#ifndef WCHAR
7066169695Skan            else if ((re_opcode_t) *p2 == charset)
7067169695Skan	      {
7068169695Skan		/* We win if the first character of the loop is not part
7069169695Skan                   of the charset.  */
7070169695Skan                if ((re_opcode_t) p1[3] == exactn
7071169695Skan 		    && ! ((int) p2[1] * BYTEWIDTH > (int) p1[5]
7072169695Skan 			  && (p2[2 + p1[5] / BYTEWIDTH]
7073169695Skan 			      & (1 << (p1[5] % BYTEWIDTH)))))
7074169695Skan		  {
7075169695Skan		    p[-3] = (unsigned char) pop_failure_jump;
7076169695Skan		    DEBUG_PRINT1 ("  No match => pop_failure_jump.\n");
7077169695Skan                  }
7078169695Skan
7079169695Skan		else if ((re_opcode_t) p1[3] == charset_not)
7080169695Skan		  {
7081169695Skan		    int idx;
7082169695Skan		    /* We win if the charset_not inside the loop
7083169695Skan		       lists every character listed in the charset after.  */
7084169695Skan		    for (idx = 0; idx < (int) p2[1]; idx++)
7085169695Skan		      if (! (p2[2 + idx] == 0
7086169695Skan			     || (idx < (int) p1[4]
7087169695Skan				 && ((p2[2 + idx] & ~ p1[5 + idx]) == 0))))
7088169695Skan			break;
7089169695Skan
7090169695Skan		    if (idx == p2[1])
7091169695Skan                      {
7092169695Skan  		        p[-3] = (unsigned char) pop_failure_jump;
7093169695Skan                        DEBUG_PRINT1 ("  No match => pop_failure_jump.\n");
7094169695Skan                      }
7095169695Skan		  }
7096169695Skan		else if ((re_opcode_t) p1[3] == charset)
7097169695Skan		  {
7098169695Skan		    int idx;
7099169695Skan		    /* We win if the charset inside the loop
7100169695Skan		       has no overlap with the one after the loop.  */
7101169695Skan		    for (idx = 0;
7102169695Skan			 idx < (int) p2[1] && idx < (int) p1[4];
7103169695Skan			 idx++)
7104169695Skan		      if ((p2[2 + idx] & p1[5 + idx]) != 0)
7105169695Skan			break;
7106169695Skan
7107169695Skan		    if (idx == p2[1] || idx == p1[4])
7108169695Skan                      {
7109169695Skan  		        p[-3] = (unsigned char) pop_failure_jump;
7110169695Skan                        DEBUG_PRINT1 ("  No match => pop_failure_jump.\n");
7111169695Skan                      }
7112169695Skan		  }
7113169695Skan	      }
7114169695Skan#endif /* not WCHAR */
7115169695Skan	  }
7116169695Skan	  p -= OFFSET_ADDRESS_SIZE;	/* Point at relative address again.  */
7117169695Skan	  if ((re_opcode_t) p[-1] != pop_failure_jump)
7118169695Skan	    {
7119169695Skan	      p[-1] = (UCHAR_T) jump;
7120169695Skan              DEBUG_PRINT1 ("  Match => jump.\n");
7121169695Skan	      goto unconditional_jump;
7122169695Skan	    }
7123169695Skan        /* Note fall through.  */
7124169695Skan
7125169695Skan
7126169695Skan	/* The end of a simple repeat has a pop_failure_jump back to
7127169695Skan           its matching on_failure_jump, where the latter will push a
7128169695Skan           failure point.  The pop_failure_jump takes off failure
7129169695Skan           points put on by this pop_failure_jump's matching
7130169695Skan           on_failure_jump; we got through the pattern to here from the
7131169695Skan           matching on_failure_jump, so didn't fail.  */
7132169695Skan        case pop_failure_jump:
7133169695Skan          {
7134169695Skan            /* We need to pass separate storage for the lowest and
7135169695Skan               highest registers, even though we don't care about the
7136169695Skan               actual values.  Otherwise, we will restore only one
7137169695Skan               register from the stack, since lowest will == highest in
7138169695Skan               `pop_failure_point'.  */
7139169695Skan            active_reg_t dummy_low_reg, dummy_high_reg;
7140169695Skan            UCHAR_T *pdummy = NULL;
7141169695Skan            const CHAR_T *sdummy = NULL;
7142169695Skan
7143169695Skan            DEBUG_PRINT1 ("EXECUTING pop_failure_jump.\n");
7144169695Skan            POP_FAILURE_POINT (sdummy, pdummy,
7145169695Skan                               dummy_low_reg, dummy_high_reg,
7146169695Skan                               reg_dummy, reg_dummy, reg_info_dummy);
7147169695Skan          }
7148169695Skan	  /* Note fall through.  */
7149169695Skan
7150169695Skan	unconditional_jump:
7151169695Skan#ifdef _LIBC
7152169695Skan	  DEBUG_PRINT2 ("\n%p: ", p);
7153169695Skan#else
7154169695Skan	  DEBUG_PRINT2 ("\n0x%x: ", p);
7155169695Skan#endif
7156169695Skan          /* Note fall through.  */
7157169695Skan
7158169695Skan        /* Unconditionally jump (without popping any failure points).  */
7159169695Skan        case jump:
7160169695Skan	  EXTRACT_NUMBER_AND_INCR (mcnt, p);	/* Get the amount to jump.  */
7161169695Skan          DEBUG_PRINT2 ("EXECUTING jump %d ", mcnt);
7162169695Skan	  p += mcnt;				/* Do the jump.  */
7163169695Skan#ifdef _LIBC
7164169695Skan          DEBUG_PRINT2 ("(to %p).\n", p);
7165169695Skan#else
7166169695Skan          DEBUG_PRINT2 ("(to 0x%x).\n", p);
7167169695Skan#endif
7168169695Skan	  break;
7169169695Skan
7170169695Skan
7171169695Skan        /* We need this opcode so we can detect where alternatives end
7172169695Skan           in `group_match_null_string_p' et al.  */
7173169695Skan        case jump_past_alt:
7174169695Skan          DEBUG_PRINT1 ("EXECUTING jump_past_alt.\n");
7175169695Skan          goto unconditional_jump;
7176169695Skan
7177169695Skan
7178169695Skan        /* Normally, the on_failure_jump pushes a failure point, which
7179169695Skan           then gets popped at pop_failure_jump.  We will end up at
7180169695Skan           pop_failure_jump, also, and with a pattern of, say, `a+', we
7181169695Skan           are skipping over the on_failure_jump, so we have to push
7182169695Skan           something meaningless for pop_failure_jump to pop.  */
7183169695Skan        case dummy_failure_jump:
7184169695Skan          DEBUG_PRINT1 ("EXECUTING dummy_failure_jump.\n");
7185169695Skan          /* It doesn't matter what we push for the string here.  What
7186169695Skan             the code at `fail' tests is the value for the pattern.  */
7187169695Skan          PUSH_FAILURE_POINT (NULL, NULL, -2);
7188169695Skan          goto unconditional_jump;
7189169695Skan
7190169695Skan
7191169695Skan        /* At the end of an alternative, we need to push a dummy failure
7192169695Skan           point in case we are followed by a `pop_failure_jump', because
7193169695Skan           we don't want the failure point for the alternative to be
7194169695Skan           popped.  For example, matching `(a|ab)*' against `aab'
7195169695Skan           requires that we match the `ab' alternative.  */
7196169695Skan        case push_dummy_failure:
7197169695Skan          DEBUG_PRINT1 ("EXECUTING push_dummy_failure.\n");
7198169695Skan          /* See comments just above at `dummy_failure_jump' about the
7199169695Skan             two zeroes.  */
7200169695Skan          PUSH_FAILURE_POINT (NULL, NULL, -2);
7201169695Skan          break;
7202169695Skan
7203169695Skan        /* Have to succeed matching what follows at least n times.
7204169695Skan           After that, handle like `on_failure_jump'.  */
7205169695Skan        case succeed_n:
7206169695Skan          EXTRACT_NUMBER (mcnt, p + OFFSET_ADDRESS_SIZE);
7207169695Skan          DEBUG_PRINT2 ("EXECUTING succeed_n %d.\n", mcnt);
7208169695Skan
7209169695Skan          assert (mcnt >= 0);
7210169695Skan          /* Originally, this is how many times we HAVE to succeed.  */
7211169695Skan          if (mcnt > 0)
7212169695Skan            {
7213169695Skan               mcnt--;
7214169695Skan	       p += OFFSET_ADDRESS_SIZE;
7215169695Skan               STORE_NUMBER_AND_INCR (p, mcnt);
7216169695Skan#ifdef _LIBC
7217169695Skan               DEBUG_PRINT3 ("  Setting %p to %d.\n", p - OFFSET_ADDRESS_SIZE
7218169695Skan			     , mcnt);
7219169695Skan#else
7220169695Skan               DEBUG_PRINT3 ("  Setting 0x%x to %d.\n", p - OFFSET_ADDRESS_SIZE
7221169695Skan			     , mcnt);
7222169695Skan#endif
7223169695Skan            }
7224169695Skan	  else if (mcnt == 0)
7225169695Skan            {
7226169695Skan#ifdef _LIBC
7227169695Skan              DEBUG_PRINT2 ("  Setting two bytes from %p to no_op.\n",
7228169695Skan			    p + OFFSET_ADDRESS_SIZE);
7229169695Skan#else
7230169695Skan              DEBUG_PRINT2 ("  Setting two bytes from 0x%x to no_op.\n",
7231169695Skan			    p + OFFSET_ADDRESS_SIZE);
7232169695Skan#endif /* _LIBC */
7233169695Skan
7234169695Skan#ifdef WCHAR
7235169695Skan	      p[1] = (UCHAR_T) no_op;
7236169695Skan#else
7237169695Skan	      p[2] = (UCHAR_T) no_op;
7238169695Skan              p[3] = (UCHAR_T) no_op;
7239169695Skan#endif /* WCHAR */
7240169695Skan              goto on_failure;
7241169695Skan            }
7242169695Skan          break;
7243169695Skan
7244169695Skan        case jump_n:
7245169695Skan          EXTRACT_NUMBER (mcnt, p + OFFSET_ADDRESS_SIZE);
7246169695Skan          DEBUG_PRINT2 ("EXECUTING jump_n %d.\n", mcnt);
7247169695Skan
7248169695Skan          /* Originally, this is how many times we CAN jump.  */
7249169695Skan          if (mcnt)
7250169695Skan            {
7251169695Skan               mcnt--;
7252169695Skan               STORE_NUMBER (p + OFFSET_ADDRESS_SIZE, mcnt);
7253169695Skan
7254169695Skan#ifdef _LIBC
7255169695Skan               DEBUG_PRINT3 ("  Setting %p to %d.\n", p + OFFSET_ADDRESS_SIZE,
7256169695Skan			     mcnt);
7257169695Skan#else
7258169695Skan               DEBUG_PRINT3 ("  Setting 0x%x to %d.\n", p + OFFSET_ADDRESS_SIZE,
7259169695Skan			     mcnt);
7260169695Skan#endif /* _LIBC */
7261169695Skan	       goto unconditional_jump;
7262169695Skan            }
7263169695Skan          /* If don't have to jump any more, skip over the rest of command.  */
7264169695Skan	  else
7265169695Skan	    p += 2 * OFFSET_ADDRESS_SIZE;
7266169695Skan          break;
7267169695Skan
7268169695Skan	case set_number_at:
7269169695Skan	  {
7270169695Skan            DEBUG_PRINT1 ("EXECUTING set_number_at.\n");
7271169695Skan
7272169695Skan            EXTRACT_NUMBER_AND_INCR (mcnt, p);
7273169695Skan            p1 = p + mcnt;
7274169695Skan            EXTRACT_NUMBER_AND_INCR (mcnt, p);
7275169695Skan#ifdef _LIBC
7276169695Skan            DEBUG_PRINT3 ("  Setting %p to %d.\n", p1, mcnt);
7277169695Skan#else
7278169695Skan            DEBUG_PRINT3 ("  Setting 0x%x to %d.\n", p1, mcnt);
7279169695Skan#endif
7280169695Skan	    STORE_NUMBER (p1, mcnt);
7281169695Skan            break;
7282169695Skan          }
7283169695Skan
7284169695Skan#if 0
7285169695Skan	/* The DEC Alpha C compiler 3.x generates incorrect code for the
7286169695Skan	   test  WORDCHAR_P (d - 1) != WORDCHAR_P (d)  in the expansion of
7287169695Skan	   AT_WORD_BOUNDARY, so this code is disabled.  Expanding the
7288169695Skan	   macro and introducing temporary variables works around the bug.  */
7289169695Skan
7290169695Skan	case wordbound:
7291169695Skan	  DEBUG_PRINT1 ("EXECUTING wordbound.\n");
7292169695Skan	  if (AT_WORD_BOUNDARY (d))
7293169695Skan	    break;
7294169695Skan	  goto fail;
7295169695Skan
7296169695Skan	case notwordbound:
7297169695Skan	  DEBUG_PRINT1 ("EXECUTING notwordbound.\n");
7298169695Skan	  if (AT_WORD_BOUNDARY (d))
7299169695Skan	    goto fail;
7300169695Skan	  break;
7301169695Skan#else
7302169695Skan	case wordbound:
7303169695Skan	{
7304169695Skan	  boolean prevchar, thischar;
7305169695Skan
7306169695Skan	  DEBUG_PRINT1 ("EXECUTING wordbound.\n");
7307169695Skan	  if (AT_STRINGS_BEG (d) || AT_STRINGS_END (d))
7308169695Skan	    break;
7309169695Skan
7310169695Skan	  prevchar = WORDCHAR_P (d - 1);
7311169695Skan	  thischar = WORDCHAR_P (d);
7312169695Skan	  if (prevchar != thischar)
7313169695Skan	    break;
7314169695Skan	  goto fail;
7315169695Skan	}
7316169695Skan
7317169695Skan      case notwordbound:
7318169695Skan	{
7319169695Skan	  boolean prevchar, thischar;
7320169695Skan
7321169695Skan	  DEBUG_PRINT1 ("EXECUTING notwordbound.\n");
7322169695Skan	  if (AT_STRINGS_BEG (d) || AT_STRINGS_END (d))
7323169695Skan	    goto fail;
7324169695Skan
7325169695Skan	  prevchar = WORDCHAR_P (d - 1);
7326169695Skan	  thischar = WORDCHAR_P (d);
7327169695Skan	  if (prevchar != thischar)
7328169695Skan	    goto fail;
7329169695Skan	  break;
7330169695Skan	}
7331169695Skan#endif
7332169695Skan
7333169695Skan	case wordbeg:
7334169695Skan          DEBUG_PRINT1 ("EXECUTING wordbeg.\n");
7335169695Skan	  if (!AT_STRINGS_END (d) && WORDCHAR_P (d)
7336169695Skan	      && (AT_STRINGS_BEG (d) || !WORDCHAR_P (d - 1)))
7337169695Skan	    break;
7338169695Skan          goto fail;
7339169695Skan
7340169695Skan	case wordend:
7341169695Skan          DEBUG_PRINT1 ("EXECUTING wordend.\n");
7342169695Skan	  if (!AT_STRINGS_BEG (d) && WORDCHAR_P (d - 1)
7343169695Skan              && (AT_STRINGS_END (d) || !WORDCHAR_P (d)))
7344169695Skan	    break;
7345169695Skan          goto fail;
7346169695Skan
7347169695Skan#ifdef emacs
7348169695Skan  	case before_dot:
7349169695Skan          DEBUG_PRINT1 ("EXECUTING before_dot.\n");
7350169695Skan 	  if (PTR_CHAR_POS ((unsigned char *) d) >= point)
7351169695Skan  	    goto fail;
7352169695Skan  	  break;
7353169695Skan
7354169695Skan  	case at_dot:
7355169695Skan          DEBUG_PRINT1 ("EXECUTING at_dot.\n");
7356169695Skan 	  if (PTR_CHAR_POS ((unsigned char *) d) != point)
7357169695Skan  	    goto fail;
7358169695Skan  	  break;
7359169695Skan
7360169695Skan  	case after_dot:
7361169695Skan          DEBUG_PRINT1 ("EXECUTING after_dot.\n");
7362169695Skan          if (PTR_CHAR_POS ((unsigned char *) d) <= point)
7363169695Skan  	    goto fail;
7364169695Skan  	  break;
7365169695Skan
7366169695Skan	case syntaxspec:
7367169695Skan          DEBUG_PRINT2 ("EXECUTING syntaxspec %d.\n", mcnt);
7368169695Skan	  mcnt = *p++;
7369169695Skan	  goto matchsyntax;
7370169695Skan
7371169695Skan        case wordchar:
7372169695Skan          DEBUG_PRINT1 ("EXECUTING Emacs wordchar.\n");
7373169695Skan	  mcnt = (int) Sword;
7374169695Skan        matchsyntax:
7375169695Skan	  PREFETCH ();
7376169695Skan	  /* Can't use *d++ here; SYNTAX may be an unsafe macro.  */
7377169695Skan	  d++;
7378169695Skan	  if (SYNTAX (d[-1]) != (enum syntaxcode) mcnt)
7379169695Skan	    goto fail;
7380169695Skan          SET_REGS_MATCHED ();
7381169695Skan	  break;
7382169695Skan
7383169695Skan	case notsyntaxspec:
7384169695Skan          DEBUG_PRINT2 ("EXECUTING notsyntaxspec %d.\n", mcnt);
7385169695Skan	  mcnt = *p++;
7386169695Skan	  goto matchnotsyntax;
7387169695Skan
7388169695Skan        case notwordchar:
7389169695Skan          DEBUG_PRINT1 ("EXECUTING Emacs notwordchar.\n");
7390169695Skan	  mcnt = (int) Sword;
7391169695Skan        matchnotsyntax:
7392169695Skan	  PREFETCH ();
7393169695Skan	  /* Can't use *d++ here; SYNTAX may be an unsafe macro.  */
7394169695Skan	  d++;
7395169695Skan	  if (SYNTAX (d[-1]) == (enum syntaxcode) mcnt)
7396169695Skan	    goto fail;
7397169695Skan	  SET_REGS_MATCHED ();
7398169695Skan          break;
7399169695Skan
7400169695Skan#else /* not emacs */
7401169695Skan	case wordchar:
7402169695Skan          DEBUG_PRINT1 ("EXECUTING non-Emacs wordchar.\n");
7403169695Skan	  PREFETCH ();
7404169695Skan          if (!WORDCHAR_P (d))
7405169695Skan            goto fail;
7406169695Skan	  SET_REGS_MATCHED ();
7407169695Skan          d++;
7408169695Skan	  break;
7409169695Skan
7410169695Skan	case notwordchar:
7411169695Skan          DEBUG_PRINT1 ("EXECUTING non-Emacs notwordchar.\n");
7412169695Skan	  PREFETCH ();
7413169695Skan	  if (WORDCHAR_P (d))
7414169695Skan            goto fail;
7415169695Skan          SET_REGS_MATCHED ();
7416169695Skan          d++;
7417169695Skan	  break;
7418169695Skan#endif /* not emacs */
7419169695Skan
7420169695Skan        default:
7421169695Skan          abort ();
7422169695Skan	}
7423169695Skan      continue;  /* Successfully executed one pattern command; keep going.  */
7424169695Skan
7425169695Skan
7426169695Skan    /* We goto here if a matching operation fails. */
7427169695Skan    fail:
7428169695Skan      if (!FAIL_STACK_EMPTY ())
7429169695Skan	{ /* A restart point is known.  Restore to that state.  */
7430169695Skan          DEBUG_PRINT1 ("\nFAIL:\n");
7431169695Skan          POP_FAILURE_POINT (d, p,
7432169695Skan                             lowest_active_reg, highest_active_reg,
7433169695Skan                             regstart, regend, reg_info);
7434169695Skan
7435169695Skan          /* If this failure point is a dummy, try the next one.  */
7436169695Skan          if (!p)
7437169695Skan	    goto fail;
7438169695Skan
7439169695Skan          /* If we failed to the end of the pattern, don't examine *p.  */
7440169695Skan	  assert (p <= pend);
7441169695Skan          if (p < pend)
7442169695Skan            {
7443169695Skan              boolean is_a_jump_n = false;
7444169695Skan
7445169695Skan              /* If failed to a backwards jump that's part of a repetition
7446169695Skan                 loop, need to pop this failure point and use the next one.  */
7447169695Skan              switch ((re_opcode_t) *p)
7448169695Skan                {
7449169695Skan                case jump_n:
7450169695Skan                  is_a_jump_n = true;
7451169695Skan                case maybe_pop_jump:
7452169695Skan                case pop_failure_jump:
7453169695Skan                case jump:
7454169695Skan                  p1 = p + 1;
7455169695Skan                  EXTRACT_NUMBER_AND_INCR (mcnt, p1);
7456169695Skan                  p1 += mcnt;
7457169695Skan
7458169695Skan                  if ((is_a_jump_n && (re_opcode_t) *p1 == succeed_n)
7459169695Skan                      || (!is_a_jump_n
7460169695Skan                          && (re_opcode_t) *p1 == on_failure_jump))
7461169695Skan                    goto fail;
7462169695Skan                  break;
7463169695Skan                default:
7464169695Skan                  /* do nothing */ ;
7465169695Skan                }
7466169695Skan            }
7467169695Skan
7468169695Skan          if (d >= string1 && d <= end1)
7469169695Skan	    dend = end_match_1;
7470169695Skan        }
7471169695Skan      else
7472169695Skan        break;   /* Matching at this starting point really fails.  */
7473169695Skan    } /* for (;;) */
7474169695Skan
7475169695Skan  if (best_regs_set)
7476169695Skan    goto restore_best_regs;
7477169695Skan
7478169695Skan  FREE_VARIABLES ();
7479169695Skan
7480169695Skan  return -1;         			/* Failure to match.  */
7481169695Skan} /* re_match_2 */
7482169695Skan
7483169695Skan/* Subroutine definitions for re_match_2.  */
7484169695Skan
7485169695Skan
7486169695Skan/* We are passed P pointing to a register number after a start_memory.
7487169695Skan
7488169695Skan   Return true if the pattern up to the corresponding stop_memory can
7489169695Skan   match the empty string, and false otherwise.
7490169695Skan
7491169695Skan   If we find the matching stop_memory, sets P to point to one past its number.
7492169695Skan   Otherwise, sets P to an undefined byte less than or equal to END.
7493169695Skan
7494169695Skan   We don't handle duplicates properly (yet).  */
7495169695Skan
7496169695Skanstatic boolean
7497169695SkanPREFIX(group_match_null_string_p) (UCHAR_T **p, UCHAR_T *end,
7498169695Skan                                   PREFIX(register_info_type) *reg_info)
7499169695Skan{
7500169695Skan  int mcnt;
7501169695Skan  /* Point to after the args to the start_memory.  */
7502169695Skan  UCHAR_T *p1 = *p + 2;
7503169695Skan
7504169695Skan  while (p1 < end)
7505169695Skan    {
7506169695Skan      /* Skip over opcodes that can match nothing, and return true or
7507169695Skan	 false, as appropriate, when we get to one that can't, or to the
7508169695Skan         matching stop_memory.  */
7509169695Skan
7510169695Skan      switch ((re_opcode_t) *p1)
7511169695Skan        {
7512169695Skan        /* Could be either a loop or a series of alternatives.  */
7513169695Skan        case on_failure_jump:
7514169695Skan          p1++;
7515169695Skan          EXTRACT_NUMBER_AND_INCR (mcnt, p1);
7516169695Skan
7517169695Skan          /* If the next operation is not a jump backwards in the
7518169695Skan	     pattern.  */
7519169695Skan
7520169695Skan	  if (mcnt >= 0)
7521169695Skan	    {
7522169695Skan              /* Go through the on_failure_jumps of the alternatives,
7523169695Skan                 seeing if any of the alternatives cannot match nothing.
7524169695Skan                 The last alternative starts with only a jump,
7525169695Skan                 whereas the rest start with on_failure_jump and end
7526169695Skan                 with a jump, e.g., here is the pattern for `a|b|c':
7527169695Skan
7528169695Skan                 /on_failure_jump/0/6/exactn/1/a/jump_past_alt/0/6
7529169695Skan                 /on_failure_jump/0/6/exactn/1/b/jump_past_alt/0/3
7530169695Skan                 /exactn/1/c
7531169695Skan
7532169695Skan                 So, we have to first go through the first (n-1)
7533169695Skan                 alternatives and then deal with the last one separately.  */
7534169695Skan
7535169695Skan
7536169695Skan              /* Deal with the first (n-1) alternatives, which start
7537169695Skan                 with an on_failure_jump (see above) that jumps to right
7538169695Skan                 past a jump_past_alt.  */
7539169695Skan
7540169695Skan              while ((re_opcode_t) p1[mcnt-(1+OFFSET_ADDRESS_SIZE)] ==
7541169695Skan		     jump_past_alt)
7542169695Skan                {
7543169695Skan                  /* `mcnt' holds how many bytes long the alternative
7544169695Skan                     is, including the ending `jump_past_alt' and
7545169695Skan                     its number.  */
7546169695Skan
7547169695Skan                  if (!PREFIX(alt_match_null_string_p) (p1, p1 + mcnt -
7548169695Skan						(1 + OFFSET_ADDRESS_SIZE),
7549169695Skan						reg_info))
7550169695Skan                    return false;
7551169695Skan
7552169695Skan                  /* Move to right after this alternative, including the
7553169695Skan		     jump_past_alt.  */
7554169695Skan                  p1 += mcnt;
7555169695Skan
7556169695Skan                  /* Break if it's the beginning of an n-th alternative
7557169695Skan                     that doesn't begin with an on_failure_jump.  */
7558169695Skan                  if ((re_opcode_t) *p1 != on_failure_jump)
7559169695Skan                    break;
7560169695Skan
7561169695Skan		  /* Still have to check that it's not an n-th
7562169695Skan		     alternative that starts with an on_failure_jump.  */
7563169695Skan		  p1++;
7564169695Skan                  EXTRACT_NUMBER_AND_INCR (mcnt, p1);
7565169695Skan                  if ((re_opcode_t) p1[mcnt-(1+OFFSET_ADDRESS_SIZE)] !=
7566169695Skan		      jump_past_alt)
7567169695Skan                    {
7568169695Skan		      /* Get to the beginning of the n-th alternative.  */
7569169695Skan                      p1 -= 1 + OFFSET_ADDRESS_SIZE;
7570169695Skan                      break;
7571169695Skan                    }
7572169695Skan                }
7573169695Skan
7574169695Skan              /* Deal with the last alternative: go back and get number
7575169695Skan                 of the `jump_past_alt' just before it.  `mcnt' contains
7576169695Skan                 the length of the alternative.  */
7577169695Skan              EXTRACT_NUMBER (mcnt, p1 - OFFSET_ADDRESS_SIZE);
7578169695Skan
7579169695Skan              if (!PREFIX(alt_match_null_string_p) (p1, p1 + mcnt, reg_info))
7580169695Skan                return false;
7581169695Skan
7582169695Skan              p1 += mcnt;	/* Get past the n-th alternative.  */
7583169695Skan            } /* if mcnt > 0 */
7584169695Skan          break;
7585169695Skan
7586169695Skan
7587169695Skan        case stop_memory:
7588169695Skan	  assert (p1[1] == **p);
7589169695Skan          *p = p1 + 2;
7590169695Skan          return true;
7591169695Skan
7592169695Skan
7593169695Skan        default:
7594169695Skan          if (!PREFIX(common_op_match_null_string_p) (&p1, end, reg_info))
7595169695Skan            return false;
7596169695Skan        }
7597169695Skan    } /* while p1 < end */
7598169695Skan
7599169695Skan  return false;
7600169695Skan} /* group_match_null_string_p */
7601169695Skan
7602169695Skan
7603169695Skan/* Similar to group_match_null_string_p, but doesn't deal with alternatives:
7604169695Skan   It expects P to be the first byte of a single alternative and END one
7605169695Skan   byte past the last. The alternative can contain groups.  */
7606169695Skan
7607169695Skanstatic boolean
7608169695SkanPREFIX(alt_match_null_string_p) (UCHAR_T *p, UCHAR_T *end,
7609169695Skan                                 PREFIX(register_info_type) *reg_info)
7610169695Skan{
7611169695Skan  int mcnt;
7612169695Skan  UCHAR_T *p1 = p;
7613169695Skan
7614169695Skan  while (p1 < end)
7615169695Skan    {
7616169695Skan      /* Skip over opcodes that can match nothing, and break when we get
7617169695Skan         to one that can't.  */
7618169695Skan
7619169695Skan      switch ((re_opcode_t) *p1)
7620169695Skan        {
7621169695Skan	/* It's a loop.  */
7622169695Skan        case on_failure_jump:
7623169695Skan          p1++;
7624169695Skan          EXTRACT_NUMBER_AND_INCR (mcnt, p1);
7625169695Skan          p1 += mcnt;
7626169695Skan          break;
7627169695Skan
7628169695Skan	default:
7629169695Skan          if (!PREFIX(common_op_match_null_string_p) (&p1, end, reg_info))
7630169695Skan            return false;
7631169695Skan        }
7632169695Skan    }  /* while p1 < end */
7633169695Skan
7634169695Skan  return true;
7635169695Skan} /* alt_match_null_string_p */
7636169695Skan
7637169695Skan
7638169695Skan/* Deals with the ops common to group_match_null_string_p and
7639169695Skan   alt_match_null_string_p.
7640169695Skan
7641169695Skan   Sets P to one after the op and its arguments, if any.  */
7642169695Skan
7643169695Skanstatic boolean
7644169695SkanPREFIX(common_op_match_null_string_p) (UCHAR_T **p, UCHAR_T *end,
7645169695Skan                                       PREFIX(register_info_type) *reg_info)
7646169695Skan{
7647169695Skan  int mcnt;
7648169695Skan  boolean ret;
7649169695Skan  int reg_no;
7650169695Skan  UCHAR_T *p1 = *p;
7651169695Skan
7652169695Skan  switch ((re_opcode_t) *p1++)
7653169695Skan    {
7654169695Skan    case no_op:
7655169695Skan    case begline:
7656169695Skan    case endline:
7657169695Skan    case begbuf:
7658169695Skan    case endbuf:
7659169695Skan    case wordbeg:
7660169695Skan    case wordend:
7661169695Skan    case wordbound:
7662169695Skan    case notwordbound:
7663169695Skan#ifdef emacs
7664169695Skan    case before_dot:
7665169695Skan    case at_dot:
7666169695Skan    case after_dot:
7667169695Skan#endif
7668169695Skan      break;
7669169695Skan
7670169695Skan    case start_memory:
7671169695Skan      reg_no = *p1;
7672169695Skan      assert (reg_no > 0 && reg_no <= MAX_REGNUM);
7673169695Skan      ret = PREFIX(group_match_null_string_p) (&p1, end, reg_info);
7674169695Skan
7675169695Skan      /* Have to set this here in case we're checking a group which
7676169695Skan         contains a group and a back reference to it.  */
7677169695Skan
7678169695Skan      if (REG_MATCH_NULL_STRING_P (reg_info[reg_no]) == MATCH_NULL_UNSET_VALUE)
7679169695Skan        REG_MATCH_NULL_STRING_P (reg_info[reg_no]) = ret;
7680169695Skan
7681169695Skan      if (!ret)
7682169695Skan        return false;
7683169695Skan      break;
7684169695Skan
7685169695Skan    /* If this is an optimized succeed_n for zero times, make the jump.  */
7686169695Skan    case jump:
7687169695Skan      EXTRACT_NUMBER_AND_INCR (mcnt, p1);
7688169695Skan      if (mcnt >= 0)
7689169695Skan        p1 += mcnt;
7690169695Skan      else
7691169695Skan        return false;
7692169695Skan      break;
7693169695Skan
7694169695Skan    case succeed_n:
7695169695Skan      /* Get to the number of times to succeed.  */
7696169695Skan      p1 += OFFSET_ADDRESS_SIZE;
7697169695Skan      EXTRACT_NUMBER_AND_INCR (mcnt, p1);
7698169695Skan
7699169695Skan      if (mcnt == 0)
7700169695Skan        {
7701169695Skan          p1 -= 2 * OFFSET_ADDRESS_SIZE;
7702169695Skan          EXTRACT_NUMBER_AND_INCR (mcnt, p1);
7703169695Skan          p1 += mcnt;
7704169695Skan        }
7705169695Skan      else
7706169695Skan        return false;
7707169695Skan      break;
7708169695Skan
7709169695Skan    case duplicate:
7710169695Skan      if (!REG_MATCH_NULL_STRING_P (reg_info[*p1]))
7711169695Skan        return false;
7712169695Skan      break;
7713169695Skan
7714169695Skan    case set_number_at:
7715169695Skan      p1 += 2 * OFFSET_ADDRESS_SIZE;
7716169695Skan
7717169695Skan    default:
7718169695Skan      /* All other opcodes mean we cannot match the empty string.  */
7719169695Skan      return false;
7720169695Skan  }
7721169695Skan
7722169695Skan  *p = p1;
7723169695Skan  return true;
7724169695Skan} /* common_op_match_null_string_p */
7725169695Skan
7726169695Skan
7727169695Skan/* Return zero if TRANSLATE[S1] and TRANSLATE[S2] are identical for LEN
7728169695Skan   bytes; nonzero otherwise.  */
7729169695Skan
7730169695Skanstatic int
7731169695SkanPREFIX(bcmp_translate) (const CHAR_T *s1, const CHAR_T *s2, register int len,
7732169695Skan                        RE_TRANSLATE_TYPE translate)
7733169695Skan{
7734169695Skan  register const UCHAR_T *p1 = (const UCHAR_T *) s1;
7735169695Skan  register const UCHAR_T *p2 = (const UCHAR_T *) s2;
7736169695Skan  while (len)
7737169695Skan    {
7738169695Skan#ifdef WCHAR
7739169695Skan      if (((*p1<=0xff)?translate[*p1++]:*p1++)
7740169695Skan	  != ((*p2<=0xff)?translate[*p2++]:*p2++))
7741169695Skan	return 1;
7742169695Skan#else /* BYTE */
7743169695Skan      if (translate[*p1++] != translate[*p2++]) return 1;
7744169695Skan#endif /* WCHAR */
7745169695Skan      len--;
7746169695Skan    }
7747169695Skan  return 0;
7748169695Skan}
7749169695Skan
7750169695Skan
7751169695Skan#else /* not INSIDE_RECURSION */
7752169695Skan
7753169695Skan/* Entry points for GNU code.  */
7754169695Skan
7755169695Skan/* re_compile_pattern is the GNU regular expression compiler: it
7756169695Skan   compiles PATTERN (of length SIZE) and puts the result in BUFP.
7757169695Skan   Returns 0 if the pattern was valid, otherwise an error string.
7758169695Skan
7759169695Skan   Assumes the `allocated' (and perhaps `buffer') and `translate' fields
7760169695Skan   are set in BUFP on entry.
7761169695Skan
7762169695Skan   We call regex_compile to do the actual compilation.  */
7763169695Skan
7764169695Skanconst char *
7765169695Skanre_compile_pattern (const char *pattern, size_t length,
7766169695Skan                    struct re_pattern_buffer *bufp)
7767169695Skan{
7768169695Skan  reg_errcode_t ret;
7769169695Skan
7770169695Skan  /* GNU code is written to assume at least RE_NREGS registers will be set
7771169695Skan     (and at least one extra will be -1).  */
7772169695Skan  bufp->regs_allocated = REGS_UNALLOCATED;
7773169695Skan
7774169695Skan  /* And GNU code determines whether or not to get register information
7775169695Skan     by passing null for the REGS argument to re_match, etc., not by
7776169695Skan     setting no_sub.  */
7777169695Skan  bufp->no_sub = 0;
7778169695Skan
7779169695Skan  /* Match anchors at newline.  */
7780169695Skan  bufp->newline_anchor = 1;
7781169695Skan
7782169695Skan# ifdef MBS_SUPPORT
7783169695Skan  if (MB_CUR_MAX != 1)
7784169695Skan    ret = wcs_regex_compile (pattern, length, re_syntax_options, bufp);
7785169695Skan  else
7786169695Skan# endif
7787169695Skan    ret = byte_regex_compile (pattern, length, re_syntax_options, bufp);
7788169695Skan
7789169695Skan  if (!ret)
7790169695Skan    return NULL;
7791169695Skan  return gettext (re_error_msgid[(int) ret]);
7792169695Skan}
7793169695Skan#ifdef _LIBC
7794169695Skanweak_alias (__re_compile_pattern, re_compile_pattern)
7795169695Skan#endif
7796169695Skan
7797169695Skan/* Entry points compatible with 4.2 BSD regex library.  We don't define
7798169695Skan   them unless specifically requested.  */
7799169695Skan
7800169695Skan#if defined _REGEX_RE_COMP || defined _LIBC
7801169695Skan
7802169695Skan/* BSD has one and only one pattern buffer.  */
7803169695Skanstatic struct re_pattern_buffer re_comp_buf;
7804169695Skan
7805169695Skanchar *
7806169695Skan#ifdef _LIBC
7807169695Skan/* Make these definitions weak in libc, so POSIX programs can redefine
7808169695Skan   these names if they don't use our functions, and still use
7809169695Skan   regcomp/regexec below without link errors.  */
7810169695Skanweak_function
7811169695Skan#endif
7812169695Skanre_comp (const char *s)
7813169695Skan{
7814169695Skan  reg_errcode_t ret;
7815169695Skan
7816169695Skan  if (!s)
7817169695Skan    {
7818169695Skan      if (!re_comp_buf.buffer)
7819169695Skan	return (char *) gettext ("No previous regular expression");
7820169695Skan      return 0;
7821169695Skan    }
7822169695Skan
7823169695Skan  if (!re_comp_buf.buffer)
7824169695Skan    {
7825169695Skan      re_comp_buf.buffer = (unsigned char *) malloc (200);
7826169695Skan      if (re_comp_buf.buffer == NULL)
7827169695Skan        return (char *) gettext (re_error_msgid[(int) REG_ESPACE]);
7828169695Skan      re_comp_buf.allocated = 200;
7829169695Skan
7830169695Skan      re_comp_buf.fastmap = (char *) malloc (1 << BYTEWIDTH);
7831169695Skan      if (re_comp_buf.fastmap == NULL)
7832169695Skan	return (char *) gettext (re_error_msgid[(int) REG_ESPACE]);
7833169695Skan    }
7834169695Skan
7835169695Skan  /* Since `re_exec' always passes NULL for the `regs' argument, we
7836169695Skan     don't need to initialize the pattern buffer fields which affect it.  */
7837169695Skan
7838169695Skan  /* Match anchors at newlines.  */
7839169695Skan  re_comp_buf.newline_anchor = 1;
7840169695Skan
7841169695Skan# ifdef MBS_SUPPORT
7842169695Skan  if (MB_CUR_MAX != 1)
7843169695Skan    ret = wcs_regex_compile (s, strlen (s), re_syntax_options, &re_comp_buf);
7844169695Skan  else
7845169695Skan# endif
7846169695Skan    ret = byte_regex_compile (s, strlen (s), re_syntax_options, &re_comp_buf);
7847169695Skan
7848169695Skan  if (!ret)
7849169695Skan    return NULL;
7850169695Skan
7851169695Skan  /* Yes, we're discarding `const' here if !HAVE_LIBINTL.  */
7852169695Skan  return (char *) gettext (re_error_msgid[(int) ret]);
7853169695Skan}
7854169695Skan
7855169695Skan
7856169695Skanint
7857169695Skan#ifdef _LIBC
7858169695Skanweak_function
7859169695Skan#endif
7860169695Skanre_exec (const char *s)
7861169695Skan{
7862169695Skan  const int len = strlen (s);
7863169695Skan  return
7864169695Skan    0 <= re_search (&re_comp_buf, s, len, 0, len, (struct re_registers *) 0);
7865169695Skan}
7866169695Skan
7867169695Skan#endif /* _REGEX_RE_COMP */
7868169695Skan
7869169695Skan/* POSIX.2 functions.  Don't define these for Emacs.  */
7870169695Skan
7871169695Skan#ifndef emacs
7872169695Skan
7873169695Skan/* regcomp takes a regular expression as a string and compiles it.
7874169695Skan
7875169695Skan   PREG is a regex_t *.  We do not expect any fields to be initialized,
7876169695Skan   since POSIX says we shouldn't.  Thus, we set
7877169695Skan
7878169695Skan     `buffer' to the compiled pattern;
7879169695Skan     `used' to the length of the compiled pattern;
7880169695Skan     `syntax' to RE_SYNTAX_POSIX_EXTENDED if the
7881169695Skan       REG_EXTENDED bit in CFLAGS is set; otherwise, to
7882169695Skan       RE_SYNTAX_POSIX_BASIC;
7883169695Skan     `newline_anchor' to REG_NEWLINE being set in CFLAGS;
7884169695Skan     `fastmap' to an allocated space for the fastmap;
7885169695Skan     `fastmap_accurate' to zero;
7886169695Skan     `re_nsub' to the number of subexpressions in PATTERN.
7887169695Skan
7888169695Skan   PATTERN is the address of the pattern string.
7889169695Skan
7890169695Skan   CFLAGS is a series of bits which affect compilation.
7891169695Skan
7892169695Skan     If REG_EXTENDED is set, we use POSIX extended syntax; otherwise, we
7893169695Skan     use POSIX basic syntax.
7894169695Skan
7895169695Skan     If REG_NEWLINE is set, then . and [^...] don't match newline.
7896169695Skan     Also, regexec will try a match beginning after every newline.
7897169695Skan
7898169695Skan     If REG_ICASE is set, then we considers upper- and lowercase
7899169695Skan     versions of letters to be equivalent when matching.
7900169695Skan
7901169695Skan     If REG_NOSUB is set, then when PREG is passed to regexec, that
7902169695Skan     routine will report only success or failure, and nothing about the
7903169695Skan     registers.
7904169695Skan
7905169695Skan   It returns 0 if it succeeds, nonzero if it doesn't.  (See regex.h for
7906169695Skan   the return codes and their meanings.)  */
7907169695Skan
7908169695Skanint
7909169695Skanregcomp (regex_t *preg, const char *pattern, int cflags)
7910169695Skan{
7911169695Skan  reg_errcode_t ret;
7912169695Skan  reg_syntax_t syntax
7913169695Skan    = (cflags & REG_EXTENDED) ?
7914169695Skan      RE_SYNTAX_POSIX_EXTENDED : RE_SYNTAX_POSIX_BASIC;
7915169695Skan
7916169695Skan  /* regex_compile will allocate the space for the compiled pattern.  */
7917169695Skan  preg->buffer = 0;
7918169695Skan  preg->allocated = 0;
7919169695Skan  preg->used = 0;
7920169695Skan
7921169695Skan  /* Try to allocate space for the fastmap.  */
7922169695Skan  preg->fastmap = (char *) malloc (1 << BYTEWIDTH);
7923169695Skan
7924169695Skan  if (cflags & REG_ICASE)
7925169695Skan    {
7926169695Skan      int i;
7927169695Skan
7928169695Skan      preg->translate
7929169695Skan	= (RE_TRANSLATE_TYPE) malloc (CHAR_SET_SIZE
7930169695Skan				      * sizeof (*(RE_TRANSLATE_TYPE)0));
7931169695Skan      if (preg->translate == NULL)
7932169695Skan        return (int) REG_ESPACE;
7933169695Skan
7934169695Skan      /* Map uppercase characters to corresponding lowercase ones.  */
7935169695Skan      for (i = 0; i < CHAR_SET_SIZE; i++)
7936169695Skan        preg->translate[i] = ISUPPER (i) ? TOLOWER (i) : i;
7937169695Skan    }
7938169695Skan  else
7939169695Skan    preg->translate = NULL;
7940169695Skan
7941169695Skan  /* If REG_NEWLINE is set, newlines are treated differently.  */
7942169695Skan  if (cflags & REG_NEWLINE)
7943169695Skan    { /* REG_NEWLINE implies neither . nor [^...] match newline.  */
7944169695Skan      syntax &= ~RE_DOT_NEWLINE;
7945169695Skan      syntax |= RE_HAT_LISTS_NOT_NEWLINE;
7946169695Skan      /* It also changes the matching behavior.  */
7947169695Skan      preg->newline_anchor = 1;
7948169695Skan    }
7949169695Skan  else
7950169695Skan    preg->newline_anchor = 0;
7951169695Skan
7952169695Skan  preg->no_sub = !!(cflags & REG_NOSUB);
7953169695Skan
7954169695Skan  /* POSIX says a null character in the pattern terminates it, so we
7955169695Skan     can use strlen here in compiling the pattern.  */
7956169695Skan# ifdef MBS_SUPPORT
7957169695Skan  if (MB_CUR_MAX != 1)
7958169695Skan    ret = wcs_regex_compile (pattern, strlen (pattern), syntax, preg);
7959169695Skan  else
7960169695Skan# endif
7961169695Skan    ret = byte_regex_compile (pattern, strlen (pattern), syntax, preg);
7962169695Skan
7963169695Skan  /* POSIX doesn't distinguish between an unmatched open-group and an
7964169695Skan     unmatched close-group: both are REG_EPAREN.  */
7965169695Skan  if (ret == REG_ERPAREN) ret = REG_EPAREN;
7966169695Skan
7967169695Skan  if (ret == REG_NOERROR && preg->fastmap)
7968169695Skan    {
7969169695Skan      /* Compute the fastmap now, since regexec cannot modify the pattern
7970169695Skan	 buffer.  */
7971169695Skan      if (re_compile_fastmap (preg) == -2)
7972169695Skan	{
7973169695Skan	  /* Some error occurred while computing the fastmap, just forget
7974169695Skan	     about it.  */
7975169695Skan	  free (preg->fastmap);
7976169695Skan	  preg->fastmap = NULL;
7977169695Skan	}
7978169695Skan    }
7979169695Skan
7980169695Skan  return (int) ret;
7981169695Skan}
7982169695Skan#ifdef _LIBC
7983169695Skanweak_alias (__regcomp, regcomp)
7984169695Skan#endif
7985169695Skan
7986169695Skan
7987169695Skan/* regexec searches for a given pattern, specified by PREG, in the
7988169695Skan   string STRING.
7989169695Skan
7990169695Skan   If NMATCH is zero or REG_NOSUB was set in the cflags argument to
7991169695Skan   `regcomp', we ignore PMATCH.  Otherwise, we assume PMATCH has at
7992169695Skan   least NMATCH elements, and we set them to the offsets of the
7993169695Skan   corresponding matched substrings.
7994169695Skan
7995169695Skan   EFLAGS specifies `execution flags' which affect matching: if
7996169695Skan   REG_NOTBOL is set, then ^ does not match at the beginning of the
7997169695Skan   string; if REG_NOTEOL is set, then $ does not match at the end.
7998169695Skan
7999169695Skan   We return 0 if we find a match and REG_NOMATCH if not.  */
8000169695Skan
8001169695Skanint
8002169695Skanregexec (const regex_t *preg, const char *string, size_t nmatch,
8003169695Skan         regmatch_t pmatch[], int eflags)
8004169695Skan{
8005169695Skan  int ret;
8006169695Skan  struct re_registers regs;
8007169695Skan  regex_t private_preg;
8008169695Skan  int len = strlen (string);
8009169695Skan  boolean want_reg_info = !preg->no_sub && nmatch > 0;
8010169695Skan
8011169695Skan  private_preg = *preg;
8012169695Skan
8013169695Skan  private_preg.not_bol = !!(eflags & REG_NOTBOL);
8014169695Skan  private_preg.not_eol = !!(eflags & REG_NOTEOL);
8015169695Skan
8016169695Skan  /* The user has told us exactly how many registers to return
8017169695Skan     information about, via `nmatch'.  We have to pass that on to the
8018169695Skan     matching routines.  */
8019169695Skan  private_preg.regs_allocated = REGS_FIXED;
8020169695Skan
8021169695Skan  if (want_reg_info)
8022169695Skan    {
8023169695Skan      regs.num_regs = nmatch;
8024169695Skan      regs.start = TALLOC (nmatch * 2, regoff_t);
8025169695Skan      if (regs.start == NULL)
8026169695Skan        return (int) REG_NOMATCH;
8027169695Skan      regs.end = regs.start + nmatch;
8028169695Skan    }
8029169695Skan
8030169695Skan  /* Perform the searching operation.  */
8031169695Skan  ret = re_search (&private_preg, string, len,
8032169695Skan                   /* start: */ 0, /* range: */ len,
8033169695Skan                   want_reg_info ? &regs : (struct re_registers *) 0);
8034169695Skan
8035169695Skan  /* Copy the register information to the POSIX structure.  */
8036169695Skan  if (want_reg_info)
8037169695Skan    {
8038169695Skan      if (ret >= 0)
8039169695Skan        {
8040169695Skan          unsigned r;
8041169695Skan
8042169695Skan          for (r = 0; r < nmatch; r++)
8043169695Skan            {
8044169695Skan              pmatch[r].rm_so = regs.start[r];
8045169695Skan              pmatch[r].rm_eo = regs.end[r];
8046169695Skan            }
8047169695Skan        }
8048169695Skan
8049169695Skan      /* If we needed the temporary register info, free the space now.  */
8050169695Skan      free (regs.start);
8051169695Skan    }
8052169695Skan
8053169695Skan  /* We want zero return to mean success, unlike `re_search'.  */
8054169695Skan  return ret >= 0 ? (int) REG_NOERROR : (int) REG_NOMATCH;
8055169695Skan}
8056169695Skan#ifdef _LIBC
8057169695Skanweak_alias (__regexec, regexec)
8058169695Skan#endif
8059169695Skan
8060169695Skan
8061169695Skan/* Returns a message corresponding to an error code, ERRCODE, returned
8062169695Skan   from either regcomp or regexec.   We don't use PREG here.  */
8063169695Skan
8064169695Skansize_t
8065169695Skanregerror (int errcode, const regex_t *preg ATTRIBUTE_UNUSED,
8066169695Skan          char *errbuf, size_t errbuf_size)
8067169695Skan{
8068169695Skan  const char *msg;
8069169695Skan  size_t msg_size;
8070169695Skan
8071169695Skan  if (errcode < 0
8072169695Skan      || errcode >= (int) (sizeof (re_error_msgid)
8073169695Skan			   / sizeof (re_error_msgid[0])))
8074169695Skan    /* Only error codes returned by the rest of the code should be passed
8075169695Skan       to this routine.  If we are given anything else, or if other regex
8076169695Skan       code generates an invalid error code, then the program has a bug.
8077169695Skan       Dump core so we can fix it.  */
8078169695Skan    abort ();
8079169695Skan
8080169695Skan  msg = gettext (re_error_msgid[errcode]);
8081169695Skan
8082169695Skan  msg_size = strlen (msg) + 1; /* Includes the null.  */
8083169695Skan
8084169695Skan  if (errbuf_size != 0)
8085169695Skan    {
8086169695Skan      if (msg_size > errbuf_size)
8087169695Skan        {
8088169695Skan#if defined HAVE_MEMPCPY || defined _LIBC
8089169695Skan	  *((char *) mempcpy (errbuf, msg, errbuf_size - 1)) = '\0';
8090169695Skan#else
8091169695Skan          memcpy (errbuf, msg, errbuf_size - 1);
8092169695Skan          errbuf[errbuf_size - 1] = 0;
8093169695Skan#endif
8094169695Skan        }
8095169695Skan      else
8096169695Skan        memcpy (errbuf, msg, msg_size);
8097169695Skan    }
8098169695Skan
8099169695Skan  return msg_size;
8100169695Skan}
8101169695Skan#ifdef _LIBC
8102169695Skanweak_alias (__regerror, regerror)
8103169695Skan#endif
8104169695Skan
8105169695Skan
8106169695Skan/* Free dynamically allocated space used by PREG.  */
8107169695Skan
8108169695Skanvoid
8109169695Skanregfree (regex_t *preg)
8110169695Skan{
8111169695Skan  if (preg->buffer != NULL)
8112169695Skan    free (preg->buffer);
8113169695Skan  preg->buffer = NULL;
8114169695Skan
8115169695Skan  preg->allocated = 0;
8116169695Skan  preg->used = 0;
8117169695Skan
8118169695Skan  if (preg->fastmap != NULL)
8119169695Skan    free (preg->fastmap);
8120169695Skan  preg->fastmap = NULL;
8121169695Skan  preg->fastmap_accurate = 0;
8122169695Skan
8123169695Skan  if (preg->translate != NULL)
8124169695Skan    free (preg->translate);
8125169695Skan  preg->translate = NULL;
8126169695Skan}
8127169695Skan#ifdef _LIBC
8128169695Skanweak_alias (__regfree, regfree)
8129169695Skan#endif
8130169695Skan
8131169695Skan#endif /* not emacs  */
8132169695Skan
8133169695Skan#endif /* not INSIDE_RECURSION */
8134169695Skan
8135169695Skan
8136169695Skan#undef STORE_NUMBER
8137169695Skan#undef STORE_NUMBER_AND_INCR
8138169695Skan#undef EXTRACT_NUMBER
8139169695Skan#undef EXTRACT_NUMBER_AND_INCR
8140169695Skan
8141169695Skan#undef DEBUG_PRINT_COMPILED_PATTERN
8142169695Skan#undef DEBUG_PRINT_DOUBLE_STRING
8143169695Skan
8144169695Skan#undef INIT_FAIL_STACK
8145169695Skan#undef RESET_FAIL_STACK
8146169695Skan#undef DOUBLE_FAIL_STACK
8147169695Skan#undef PUSH_PATTERN_OP
8148169695Skan#undef PUSH_FAILURE_POINTER
8149169695Skan#undef PUSH_FAILURE_INT
8150169695Skan#undef PUSH_FAILURE_ELT
8151169695Skan#undef POP_FAILURE_POINTER
8152169695Skan#undef POP_FAILURE_INT
8153169695Skan#undef POP_FAILURE_ELT
8154169695Skan#undef DEBUG_PUSH
8155169695Skan#undef DEBUG_POP
8156169695Skan#undef PUSH_FAILURE_POINT
8157169695Skan#undef POP_FAILURE_POINT
8158169695Skan
8159169695Skan#undef REG_UNSET_VALUE
8160169695Skan#undef REG_UNSET
8161169695Skan
8162169695Skan#undef PATFETCH
8163169695Skan#undef PATFETCH_RAW
8164169695Skan#undef PATUNFETCH
8165169695Skan#undef TRANSLATE
8166169695Skan
8167169695Skan#undef INIT_BUF_SIZE
8168169695Skan#undef GET_BUFFER_SPACE
8169169695Skan#undef BUF_PUSH
8170169695Skan#undef BUF_PUSH_2
8171169695Skan#undef BUF_PUSH_3
8172169695Skan#undef STORE_JUMP
8173169695Skan#undef STORE_JUMP2
8174169695Skan#undef INSERT_JUMP
8175169695Skan#undef INSERT_JUMP2
8176169695Skan#undef EXTEND_BUFFER
8177169695Skan#undef GET_UNSIGNED_NUMBER
8178169695Skan#undef FREE_STACK_RETURN
8179169695Skan
8180169695Skan# undef POINTER_TO_OFFSET
8181169695Skan# undef MATCHING_IN_FRST_STRING
8182169695Skan# undef PREFETCH
8183169695Skan# undef AT_STRINGS_BEG
8184169695Skan# undef AT_STRINGS_END
8185169695Skan# undef WORDCHAR_P
8186169695Skan# undef FREE_VAR
8187169695Skan# undef FREE_VARIABLES
8188169695Skan# undef NO_HIGHEST_ACTIVE_REG
8189169695Skan# undef NO_LOWEST_ACTIVE_REG
8190169695Skan
8191169695Skan# undef CHAR_T
8192169695Skan# undef UCHAR_T
8193169695Skan# undef COMPILED_BUFFER_VAR
8194169695Skan# undef OFFSET_ADDRESS_SIZE
8195169695Skan# undef CHAR_CLASS_SIZE
8196169695Skan# undef PREFIX
8197169695Skan# undef ARG_PREFIX
8198169695Skan# undef PUT_CHAR
8199169695Skan# undef BYTE
8200169695Skan# undef WCHAR
8201169695Skan
8202169695Skan# define DEFINED_ONCE
8203