189857Sobrien/* Extended regular expression matching and search library, 289857Sobrien version 0.12. 389857Sobrien (Implements POSIX draft P1003.2/D11.2, except for some of the 489857Sobrien internationalization features.) 5218822Sdim 6218822Sdim Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 7218822Sdim 2002, 2005 Free Software Foundation, Inc. 889857Sobrien This file is part of the GNU C Library. 989857Sobrien 1089857Sobrien The GNU C Library is free software; you can redistribute it and/or 1189857Sobrien modify it under the terms of the GNU Lesser General Public 1289857Sobrien License as published by the Free Software Foundation; either 1389857Sobrien version 2.1 of the License, or (at your option) any later version. 1489857Sobrien 1589857Sobrien The GNU C Library is distributed in the hope that it will be useful, 1689857Sobrien but WITHOUT ANY WARRANTY; without even the implied warranty of 1789857Sobrien MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 1889857Sobrien Lesser General Public License for more details. 1989857Sobrien 2089857Sobrien You should have received a copy of the GNU Lesser General Public 2189857Sobrien License along with the GNU C Library; if not, write to the Free 22218822Sdim Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 23218822Sdim 02110-1301 USA. */ 2489857Sobrien 2589857Sobrien/* This file has been modified for usage in libiberty. It includes "xregex.h" 2689857Sobrien instead of <regex.h>. The "xregex.h" header file renames all external 2789857Sobrien routines with an "x" prefix so they do not collide with the native regex 2889857Sobrien routines or with other components regex routines. */ 2989857Sobrien/* AIX requires this to be the first thing in the file. */ 30130561Sobrien#if defined _AIX && !defined __GNUC__ && !defined REGEX_MALLOC 3189857Sobrien #pragma alloca 3289857Sobrien#endif 3389857Sobrien 3489857Sobrien#undef _GNU_SOURCE 3589857Sobrien#define _GNU_SOURCE 3689857Sobrien 37218822Sdim#ifndef INSIDE_RECURSION 38218822Sdim# ifdef HAVE_CONFIG_H 39218822Sdim# include <config.h> 40218822Sdim# endif 4189857Sobrien#endif 4289857Sobrien 43130561Sobrien#include <ansidecl.h> 44130561Sobrien 4589857Sobrien#ifndef INSIDE_RECURSION 4689857Sobrien 4789857Sobrien# if defined STDC_HEADERS && !defined emacs 4889857Sobrien# include <stddef.h> 4989857Sobrien# else 5089857Sobrien/* We need this for `regex.h', and perhaps for the Emacs include files. */ 5189857Sobrien# include <sys/types.h> 5289857Sobrien# endif 5389857Sobrien 5489857Sobrien# define WIDE_CHAR_SUPPORT (HAVE_WCTYPE_H && HAVE_WCHAR_H && HAVE_BTOWC) 5589857Sobrien 5689857Sobrien/* For platform which support the ISO C amendement 1 functionality we 5789857Sobrien support user defined character classes. */ 5889857Sobrien# if defined _LIBC || WIDE_CHAR_SUPPORT 5989857Sobrien/* Solaris 2.5 has a bug: <wchar.h> must be included before <wctype.h>. */ 6089857Sobrien# include <wchar.h> 6189857Sobrien# include <wctype.h> 6289857Sobrien# endif 6389857Sobrien 6489857Sobrien# ifdef _LIBC 6589857Sobrien/* We have to keep the namespace clean. */ 6689857Sobrien# define regfree(preg) __regfree (preg) 6789857Sobrien# define regexec(pr, st, nm, pm, ef) __regexec (pr, st, nm, pm, ef) 6889857Sobrien# define regcomp(preg, pattern, cflags) __regcomp (preg, pattern, cflags) 6989857Sobrien# define regerror(errcode, preg, errbuf, errbuf_size) \ 7089857Sobrien __regerror(errcode, preg, errbuf, errbuf_size) 7189857Sobrien# define re_set_registers(bu, re, nu, st, en) \ 7289857Sobrien __re_set_registers (bu, re, nu, st, en) 7389857Sobrien# define re_match_2(bufp, string1, size1, string2, size2, pos, regs, stop) \ 7489857Sobrien __re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop) 7589857Sobrien# define re_match(bufp, string, size, pos, regs) \ 7689857Sobrien __re_match (bufp, string, size, pos, regs) 7789857Sobrien# define re_search(bufp, string, size, startpos, range, regs) \ 7889857Sobrien __re_search (bufp, string, size, startpos, range, regs) 7989857Sobrien# define re_compile_pattern(pattern, length, bufp) \ 8089857Sobrien __re_compile_pattern (pattern, length, bufp) 8189857Sobrien# define re_set_syntax(syntax) __re_set_syntax (syntax) 8289857Sobrien# define re_search_2(bufp, st1, s1, st2, s2, startpos, range, regs, stop) \ 8389857Sobrien __re_search_2 (bufp, st1, s1, st2, s2, startpos, range, regs, stop) 8489857Sobrien# define re_compile_fastmap(bufp) __re_compile_fastmap (bufp) 8589857Sobrien 8689857Sobrien# define btowc __btowc 8789857Sobrien 8889857Sobrien/* We are also using some library internals. */ 8989857Sobrien# include <locale/localeinfo.h> 9089857Sobrien# include <locale/elem-hash.h> 9189857Sobrien# include <langinfo.h> 9289857Sobrien# include <locale/coll-lookup.h> 9389857Sobrien# endif 9489857Sobrien 9589857Sobrien/* This is for other GNU distributions with internationalized messages. */ 9689857Sobrien# if (HAVE_LIBINTL_H && ENABLE_NLS) || defined _LIBC 9789857Sobrien# include <libintl.h> 9889857Sobrien# ifdef _LIBC 9989857Sobrien# undef gettext 10089857Sobrien# define gettext(msgid) __dcgettext ("libc", msgid, LC_MESSAGES) 10189857Sobrien# endif 10289857Sobrien# else 10389857Sobrien# define gettext(msgid) (msgid) 10489857Sobrien# endif 10589857Sobrien 10689857Sobrien# ifndef gettext_noop 10789857Sobrien/* This define is so xgettext can find the internationalizable 10889857Sobrien strings. */ 10989857Sobrien# define gettext_noop(String) String 11089857Sobrien# endif 11189857Sobrien 11289857Sobrien/* The `emacs' switch turns on certain matching commands 11389857Sobrien that make sense only in Emacs. */ 11489857Sobrien# ifdef emacs 11589857Sobrien 11689857Sobrien# include "lisp.h" 11789857Sobrien# include "buffer.h" 11889857Sobrien# include "syntax.h" 11989857Sobrien 12089857Sobrien# else /* not emacs */ 12189857Sobrien 12289857Sobrien/* If we are not linking with Emacs proper, 12389857Sobrien we can't use the relocating allocator 12489857Sobrien even if config.h says that we can. */ 12589857Sobrien# undef REL_ALLOC 12689857Sobrien 12789857Sobrien# if defined STDC_HEADERS || defined _LIBC 12889857Sobrien# include <stdlib.h> 12989857Sobrien# else 13089857Sobrienchar *malloc (); 13189857Sobrienchar *realloc (); 13289857Sobrien# endif 13389857Sobrien 13489857Sobrien/* When used in Emacs's lib-src, we need to get bzero and bcopy somehow. 13589857Sobrien If nothing else has been done, use the method below. */ 13689857Sobrien# ifdef INHIBIT_STRING_HEADER 13789857Sobrien# if !(defined HAVE_BZERO && defined HAVE_BCOPY) 13889857Sobrien# if !defined bzero && !defined bcopy 13989857Sobrien# undef INHIBIT_STRING_HEADER 14089857Sobrien# endif 14189857Sobrien# endif 14289857Sobrien# endif 14389857Sobrien 14489857Sobrien/* This is the normal way of making sure we have a bcopy and a bzero. 14589857Sobrien This is used in most programs--a few other programs avoid this 14689857Sobrien by defining INHIBIT_STRING_HEADER. */ 14789857Sobrien# ifndef INHIBIT_STRING_HEADER 14889857Sobrien# if defined HAVE_STRING_H || defined STDC_HEADERS || defined _LIBC 14989857Sobrien# include <string.h> 15089857Sobrien# ifndef bzero 15189857Sobrien# ifndef _LIBC 15289857Sobrien# define bzero(s, n) (memset (s, '\0', n), (s)) 15389857Sobrien# else 15489857Sobrien# define bzero(s, n) __bzero (s, n) 15589857Sobrien# endif 15689857Sobrien# endif 15789857Sobrien# else 15889857Sobrien# include <strings.h> 15989857Sobrien# ifndef memcmp 16089857Sobrien# define memcmp(s1, s2, n) bcmp (s1, s2, n) 16189857Sobrien# endif 16289857Sobrien# ifndef memcpy 16389857Sobrien# define memcpy(d, s, n) (bcopy (s, d, n), (d)) 16489857Sobrien# endif 16589857Sobrien# endif 16689857Sobrien# endif 16789857Sobrien 16889857Sobrien/* Define the syntax stuff for \<, \>, etc. */ 16989857Sobrien 17089857Sobrien/* This must be nonzero for the wordchar and notwordchar pattern 17189857Sobrien commands in re_match_2. */ 17289857Sobrien# ifndef Sword 17389857Sobrien# define Sword 1 17489857Sobrien# endif 17589857Sobrien 17689857Sobrien# ifdef SWITCH_ENUM_BUG 17789857Sobrien# define SWITCH_ENUM_CAST(x) ((int)(x)) 17889857Sobrien# else 17989857Sobrien# define SWITCH_ENUM_CAST(x) (x) 18089857Sobrien# endif 18189857Sobrien 18289857Sobrien# endif /* not emacs */ 18389857Sobrien 18489857Sobrien# if defined _LIBC || HAVE_LIMITS_H 18589857Sobrien# include <limits.h> 18689857Sobrien# endif 18789857Sobrien 18889857Sobrien# ifndef MB_LEN_MAX 18989857Sobrien# define MB_LEN_MAX 1 19089857Sobrien# endif 19189857Sobrien 19289857Sobrien/* Get the interface, including the syntax bits. */ 19389857Sobrien# include "xregex.h" /* change for libiberty */ 19489857Sobrien 19589857Sobrien/* isalpha etc. are used for the character classes. */ 19689857Sobrien# include <ctype.h> 19789857Sobrien 19889857Sobrien/* Jim Meyering writes: 19989857Sobrien 20089857Sobrien "... Some ctype macros are valid only for character codes that 20189857Sobrien isascii says are ASCII (SGI's IRIX-4.0.5 is one such system --when 20289857Sobrien using /bin/cc or gcc but without giving an ansi option). So, all 20389857Sobrien ctype uses should be through macros like ISPRINT... If 20489857Sobrien STDC_HEADERS is defined, then autoconf has verified that the ctype 20589857Sobrien macros don't need to be guarded with references to isascii. ... 20689857Sobrien Defining isascii to 1 should let any compiler worth its salt 20789857Sobrien eliminate the && through constant folding." 20889857Sobrien Solaris defines some of these symbols so we must undefine them first. */ 20989857Sobrien 21089857Sobrien# undef ISASCII 21189857Sobrien# if defined STDC_HEADERS || (!defined isascii && !defined HAVE_ISASCII) 21289857Sobrien# define ISASCII(c) 1 21389857Sobrien# else 21489857Sobrien# define ISASCII(c) isascii(c) 21589857Sobrien# endif 21689857Sobrien 21789857Sobrien# ifdef isblank 21889857Sobrien# define ISBLANK(c) (ISASCII (c) && isblank (c)) 21989857Sobrien# else 22089857Sobrien# define ISBLANK(c) ((c) == ' ' || (c) == '\t') 22189857Sobrien# endif 22289857Sobrien# ifdef isgraph 22389857Sobrien# define ISGRAPH(c) (ISASCII (c) && isgraph (c)) 22489857Sobrien# else 22589857Sobrien# define ISGRAPH(c) (ISASCII (c) && isprint (c) && !isspace (c)) 22689857Sobrien# endif 22789857Sobrien 22889857Sobrien# undef ISPRINT 22989857Sobrien# define ISPRINT(c) (ISASCII (c) && isprint (c)) 23089857Sobrien# define ISDIGIT(c) (ISASCII (c) && isdigit (c)) 23189857Sobrien# define ISALNUM(c) (ISASCII (c) && isalnum (c)) 23289857Sobrien# define ISALPHA(c) (ISASCII (c) && isalpha (c)) 23389857Sobrien# define ISCNTRL(c) (ISASCII (c) && iscntrl (c)) 23489857Sobrien# define ISLOWER(c) (ISASCII (c) && islower (c)) 23589857Sobrien# define ISPUNCT(c) (ISASCII (c) && ispunct (c)) 23689857Sobrien# define ISSPACE(c) (ISASCII (c) && isspace (c)) 23789857Sobrien# define ISUPPER(c) (ISASCII (c) && isupper (c)) 23889857Sobrien# define ISXDIGIT(c) (ISASCII (c) && isxdigit (c)) 23989857Sobrien 24089857Sobrien# ifdef _tolower 24189857Sobrien# define TOLOWER(c) _tolower(c) 24289857Sobrien# else 24389857Sobrien# define TOLOWER(c) tolower(c) 24489857Sobrien# endif 24589857Sobrien 24689857Sobrien# ifndef NULL 24789857Sobrien# define NULL (void *)0 24889857Sobrien# endif 24989857Sobrien 25089857Sobrien/* We remove any previous definition of `SIGN_EXTEND_CHAR', 25189857Sobrien since ours (we hope) works properly with all combinations of 25289857Sobrien machines, compilers, `char' and `unsigned char' argument types. 25389857Sobrien (Per Bothner suggested the basic approach.) */ 25489857Sobrien# undef SIGN_EXTEND_CHAR 25589857Sobrien# if __STDC__ 25689857Sobrien# define SIGN_EXTEND_CHAR(c) ((signed char) (c)) 25789857Sobrien# else /* not __STDC__ */ 25889857Sobrien/* As in Harbison and Steele. */ 25989857Sobrien# define SIGN_EXTEND_CHAR(c) ((((unsigned char) (c)) ^ 128) - 128) 26089857Sobrien# endif 26189857Sobrien 26289857Sobrien# ifndef emacs 26389857Sobrien/* How many characters in the character set. */ 26489857Sobrien# define CHAR_SET_SIZE 256 26589857Sobrien 26689857Sobrien# ifdef SYNTAX_TABLE 26789857Sobrien 26889857Sobrienextern char *re_syntax_table; 26989857Sobrien 27089857Sobrien# else /* not SYNTAX_TABLE */ 27189857Sobrien 27289857Sobrienstatic char re_syntax_table[CHAR_SET_SIZE]; 27389857Sobrien 274218822Sdimstatic void init_syntax_once (void); 27589857Sobrien 27689857Sobrienstatic void 277218822Sdiminit_syntax_once (void) 27889857Sobrien{ 27989857Sobrien register int c; 28089857Sobrien static int done = 0; 28189857Sobrien 28289857Sobrien if (done) 28389857Sobrien return; 28489857Sobrien bzero (re_syntax_table, sizeof re_syntax_table); 28589857Sobrien 28689857Sobrien for (c = 0; c < CHAR_SET_SIZE; ++c) 28789857Sobrien if (ISALNUM (c)) 28889857Sobrien re_syntax_table[c] = Sword; 28989857Sobrien 29089857Sobrien re_syntax_table['_'] = Sword; 29189857Sobrien 29289857Sobrien done = 1; 29389857Sobrien} 29489857Sobrien 29589857Sobrien# endif /* not SYNTAX_TABLE */ 29689857Sobrien 29789857Sobrien# define SYNTAX(c) re_syntax_table[(unsigned char) (c)] 29889857Sobrien 29989857Sobrien# endif /* emacs */ 30089857Sobrien 30189857Sobrien/* Integer type for pointers. */ 30289857Sobrien# if !defined _LIBC && !defined HAVE_UINTPTR_T 30389857Sobrientypedef unsigned long int uintptr_t; 30489857Sobrien# endif 30589857Sobrien 30689857Sobrien/* Should we use malloc or alloca? If REGEX_MALLOC is not defined, we 30789857Sobrien use `alloca' instead of `malloc'. This is because using malloc in 30889857Sobrien re_search* or re_match* could cause memory leaks when C-g is used in 30989857Sobrien Emacs; also, malloc is slower and causes storage fragmentation. On 31089857Sobrien the other hand, malloc is more portable, and easier to debug. 31189857Sobrien 31289857Sobrien Because we sometimes use alloca, some routines have to be macros, 31389857Sobrien not functions -- `alloca'-allocated space disappears at the end of the 31489857Sobrien function it is called in. */ 31589857Sobrien 31689857Sobrien# ifdef REGEX_MALLOC 31789857Sobrien 31889857Sobrien# define REGEX_ALLOCATE malloc 31989857Sobrien# define REGEX_REALLOCATE(source, osize, nsize) realloc (source, nsize) 32089857Sobrien# define REGEX_FREE free 32189857Sobrien 32289857Sobrien# else /* not REGEX_MALLOC */ 32389857Sobrien 32489857Sobrien/* Emacs already defines alloca, sometimes. */ 32589857Sobrien# ifndef alloca 32689857Sobrien 32789857Sobrien/* Make alloca work the best possible way. */ 32889857Sobrien# ifdef __GNUC__ 32989857Sobrien# define alloca __builtin_alloca 33089857Sobrien# else /* not __GNUC__ */ 33189857Sobrien# if HAVE_ALLOCA_H 33289857Sobrien# include <alloca.h> 33389857Sobrien# endif /* HAVE_ALLOCA_H */ 33489857Sobrien# endif /* not __GNUC__ */ 33589857Sobrien 33689857Sobrien# endif /* not alloca */ 33789857Sobrien 33889857Sobrien# define REGEX_ALLOCATE alloca 33989857Sobrien 34089857Sobrien/* Assumes a `char *destination' variable. */ 34189857Sobrien# define REGEX_REALLOCATE(source, osize, nsize) \ 34289857Sobrien (destination = (char *) alloca (nsize), \ 34389857Sobrien memcpy (destination, source, osize)) 34489857Sobrien 34589857Sobrien/* No need to do anything to free, after alloca. */ 34689857Sobrien# define REGEX_FREE(arg) ((void)0) /* Do nothing! But inhibit gcc warning. */ 34789857Sobrien 34889857Sobrien# endif /* not REGEX_MALLOC */ 34989857Sobrien 35089857Sobrien/* Define how to allocate the failure stack. */ 35189857Sobrien 35289857Sobrien# if defined REL_ALLOC && defined REGEX_MALLOC 35389857Sobrien 35489857Sobrien# define REGEX_ALLOCATE_STACK(size) \ 35589857Sobrien r_alloc (&failure_stack_ptr, (size)) 35689857Sobrien# define REGEX_REALLOCATE_STACK(source, osize, nsize) \ 35789857Sobrien r_re_alloc (&failure_stack_ptr, (nsize)) 35889857Sobrien# define REGEX_FREE_STACK(ptr) \ 35989857Sobrien r_alloc_free (&failure_stack_ptr) 36089857Sobrien 36189857Sobrien# else /* not using relocating allocator */ 36289857Sobrien 36389857Sobrien# ifdef REGEX_MALLOC 36489857Sobrien 36589857Sobrien# define REGEX_ALLOCATE_STACK malloc 36689857Sobrien# define REGEX_REALLOCATE_STACK(source, osize, nsize) realloc (source, nsize) 36789857Sobrien# define REGEX_FREE_STACK free 36889857Sobrien 36989857Sobrien# else /* not REGEX_MALLOC */ 37089857Sobrien 37189857Sobrien# define REGEX_ALLOCATE_STACK alloca 37289857Sobrien 37389857Sobrien# define REGEX_REALLOCATE_STACK(source, osize, nsize) \ 37489857Sobrien REGEX_REALLOCATE (source, osize, nsize) 37589857Sobrien/* No need to explicitly free anything. */ 37689857Sobrien# define REGEX_FREE_STACK(arg) 37789857Sobrien 37889857Sobrien# endif /* not REGEX_MALLOC */ 37989857Sobrien# endif /* not using relocating allocator */ 38089857Sobrien 38189857Sobrien 38289857Sobrien/* True if `size1' is non-NULL and PTR is pointing anywhere inside 38389857Sobrien `string1' or just past its end. This works if PTR is NULL, which is 38489857Sobrien a good thing. */ 38589857Sobrien# define FIRST_STRING_P(ptr) \ 38689857Sobrien (size1 && string1 <= (ptr) && (ptr) <= string1 + size1) 38789857Sobrien 38889857Sobrien/* (Re)Allocate N items of type T using malloc, or fail. */ 38989857Sobrien# define TALLOC(n, t) ((t *) malloc ((n) * sizeof (t))) 39089857Sobrien# define RETALLOC(addr, n, t) ((addr) = (t *) realloc (addr, (n) * sizeof (t))) 39189857Sobrien# define RETALLOC_IF(addr, n, t) \ 39289857Sobrien if (addr) RETALLOC((addr), (n), t); else (addr) = TALLOC ((n), t) 39389857Sobrien# define REGEX_TALLOC(n, t) ((t *) REGEX_ALLOCATE ((n) * sizeof (t))) 39489857Sobrien 39589857Sobrien# define BYTEWIDTH 8 /* In bits. */ 39689857Sobrien 39789857Sobrien# define STREQ(s1, s2) ((strcmp (s1, s2) == 0)) 39889857Sobrien 39989857Sobrien# undef MAX 40089857Sobrien# undef MIN 40189857Sobrien# define MAX(a, b) ((a) > (b) ? (a) : (b)) 40289857Sobrien# define MIN(a, b) ((a) < (b) ? (a) : (b)) 40389857Sobrien 40489857Sobrientypedef char boolean; 40589857Sobrien# define false 0 40689857Sobrien# define true 1 40789857Sobrien 408218822Sdimstatic reg_errcode_t byte_regex_compile (const char *pattern, size_t size, 409218822Sdim reg_syntax_t syntax, 410218822Sdim struct re_pattern_buffer *bufp); 41189857Sobrien 412218822Sdimstatic int byte_re_match_2_internal (struct re_pattern_buffer *bufp, 413218822Sdim const char *string1, int size1, 414218822Sdim const char *string2, int size2, 415218822Sdim int pos, 416218822Sdim struct re_registers *regs, 417218822Sdim int stop); 418218822Sdimstatic int byte_re_search_2 (struct re_pattern_buffer *bufp, 419218822Sdim const char *string1, int size1, 420218822Sdim const char *string2, int size2, 421218822Sdim int startpos, int range, 422218822Sdim struct re_registers *regs, int stop); 423218822Sdimstatic int byte_re_compile_fastmap (struct re_pattern_buffer *bufp); 42489857Sobrien 42589857Sobrien#ifdef MBS_SUPPORT 426218822Sdimstatic reg_errcode_t wcs_regex_compile (const char *pattern, size_t size, 427218822Sdim reg_syntax_t syntax, 428218822Sdim struct re_pattern_buffer *bufp); 42989857Sobrien 43089857Sobrien 431218822Sdimstatic int wcs_re_match_2_internal (struct re_pattern_buffer *bufp, 432218822Sdim const char *cstring1, int csize1, 433218822Sdim const char *cstring2, int csize2, 434218822Sdim int pos, 435218822Sdim struct re_registers *regs, 436218822Sdim int stop, 437218822Sdim wchar_t *string1, int size1, 438218822Sdim wchar_t *string2, int size2, 439218822Sdim int *mbs_offset1, int *mbs_offset2); 440218822Sdimstatic int wcs_re_search_2 (struct re_pattern_buffer *bufp, 441218822Sdim const char *string1, int size1, 442218822Sdim const char *string2, int size2, 443218822Sdim int startpos, int range, 444218822Sdim struct re_registers *regs, int stop); 445218822Sdimstatic int wcs_re_compile_fastmap (struct re_pattern_buffer *bufp); 44689857Sobrien#endif 44789857Sobrien 44889857Sobrien/* These are the command codes that appear in compiled regular 44989857Sobrien expressions. Some opcodes are followed by argument bytes. A 45089857Sobrien command code can specify any interpretation whatsoever for its 45189857Sobrien arguments. Zero bytes may appear in the compiled regular expression. */ 45289857Sobrien 45389857Sobrientypedef enum 45489857Sobrien{ 45589857Sobrien no_op = 0, 45689857Sobrien 45789857Sobrien /* Succeed right away--no more backtracking. */ 45889857Sobrien succeed, 45989857Sobrien 46089857Sobrien /* Followed by one byte giving n, then by n literal bytes. */ 46189857Sobrien exactn, 46289857Sobrien 46389857Sobrien# ifdef MBS_SUPPORT 46489857Sobrien /* Same as exactn, but contains binary data. */ 46589857Sobrien exactn_bin, 46689857Sobrien# endif 46789857Sobrien 46889857Sobrien /* Matches any (more or less) character. */ 46989857Sobrien anychar, 47089857Sobrien 47189857Sobrien /* Matches any one char belonging to specified set. First 47289857Sobrien following byte is number of bitmap bytes. Then come bytes 47389857Sobrien for a bitmap saying which chars are in. Bits in each byte 47489857Sobrien are ordered low-bit-first. A character is in the set if its 47589857Sobrien bit is 1. A character too large to have a bit in the map is 47689857Sobrien automatically not in the set. */ 47789857Sobrien /* ifdef MBS_SUPPORT, following element is length of character 47889857Sobrien classes, length of collating symbols, length of equivalence 47989857Sobrien classes, length of character ranges, and length of characters. 48089857Sobrien Next, character class element, collating symbols elements, 48189857Sobrien equivalence class elements, range elements, and character 48289857Sobrien elements follow. 48389857Sobrien See regex_compile function. */ 48489857Sobrien charset, 48589857Sobrien 48689857Sobrien /* Same parameters as charset, but match any character that is 48789857Sobrien not one of those specified. */ 48889857Sobrien charset_not, 48989857Sobrien 49089857Sobrien /* Start remembering the text that is matched, for storing in a 49189857Sobrien register. Followed by one byte with the register number, in 49289857Sobrien the range 0 to one less than the pattern buffer's re_nsub 49389857Sobrien field. Then followed by one byte with the number of groups 49489857Sobrien inner to this one. (This last has to be part of the 49589857Sobrien start_memory only because we need it in the on_failure_jump 49689857Sobrien of re_match_2.) */ 49789857Sobrien start_memory, 49889857Sobrien 49989857Sobrien /* Stop remembering the text that is matched and store it in a 50089857Sobrien memory register. Followed by one byte with the register 50189857Sobrien number, in the range 0 to one less than `re_nsub' in the 50289857Sobrien pattern buffer, and one byte with the number of inner groups, 50389857Sobrien just like `start_memory'. (We need the number of inner 50489857Sobrien groups here because we don't have any easy way of finding the 50589857Sobrien corresponding start_memory when we're at a stop_memory.) */ 50689857Sobrien stop_memory, 50789857Sobrien 50889857Sobrien /* Match a duplicate of something remembered. Followed by one 50989857Sobrien byte containing the register number. */ 51089857Sobrien duplicate, 51189857Sobrien 51289857Sobrien /* Fail unless at beginning of line. */ 51389857Sobrien begline, 51489857Sobrien 51589857Sobrien /* Fail unless at end of line. */ 51689857Sobrien endline, 51789857Sobrien 51889857Sobrien /* Succeeds if at beginning of buffer (if emacs) or at beginning 51989857Sobrien of string to be matched (if not). */ 52089857Sobrien begbuf, 52189857Sobrien 52289857Sobrien /* Analogously, for end of buffer/string. */ 52389857Sobrien endbuf, 52489857Sobrien 52589857Sobrien /* Followed by two byte relative address to which to jump. */ 52689857Sobrien jump, 52789857Sobrien 52889857Sobrien /* Same as jump, but marks the end of an alternative. */ 52989857Sobrien jump_past_alt, 53089857Sobrien 53189857Sobrien /* Followed by two-byte relative address of place to resume at 53289857Sobrien in case of failure. */ 53389857Sobrien /* ifdef MBS_SUPPORT, the size of address is 1. */ 53489857Sobrien on_failure_jump, 53589857Sobrien 53689857Sobrien /* Like on_failure_jump, but pushes a placeholder instead of the 53789857Sobrien current string position when executed. */ 53889857Sobrien on_failure_keep_string_jump, 53989857Sobrien 54089857Sobrien /* Throw away latest failure point and then jump to following 54189857Sobrien two-byte relative address. */ 54289857Sobrien /* ifdef MBS_SUPPORT, the size of address is 1. */ 54389857Sobrien pop_failure_jump, 54489857Sobrien 54589857Sobrien /* Change to pop_failure_jump if know won't have to backtrack to 54689857Sobrien match; otherwise change to jump. This is used to jump 54789857Sobrien back to the beginning of a repeat. If what follows this jump 54889857Sobrien clearly won't match what the repeat does, such that we can be 54989857Sobrien sure that there is no use backtracking out of repetitions 55089857Sobrien already matched, then we change it to a pop_failure_jump. 55189857Sobrien Followed by two-byte address. */ 55289857Sobrien /* ifdef MBS_SUPPORT, the size of address is 1. */ 55389857Sobrien maybe_pop_jump, 55489857Sobrien 55589857Sobrien /* Jump to following two-byte address, and push a dummy failure 55689857Sobrien point. This failure point will be thrown away if an attempt 55789857Sobrien is made to use it for a failure. A `+' construct makes this 55889857Sobrien before the first repeat. Also used as an intermediary kind 55989857Sobrien of jump when compiling an alternative. */ 56089857Sobrien /* ifdef MBS_SUPPORT, the size of address is 1. */ 56189857Sobrien dummy_failure_jump, 56289857Sobrien 56389857Sobrien /* Push a dummy failure point and continue. Used at the end of 56489857Sobrien alternatives. */ 56589857Sobrien push_dummy_failure, 56689857Sobrien 56789857Sobrien /* Followed by two-byte relative address and two-byte number n. 56889857Sobrien After matching N times, jump to the address upon failure. */ 56989857Sobrien /* ifdef MBS_SUPPORT, the size of address is 1. */ 57089857Sobrien succeed_n, 57189857Sobrien 57289857Sobrien /* Followed by two-byte relative address, and two-byte number n. 57389857Sobrien Jump to the address N times, then fail. */ 57489857Sobrien /* ifdef MBS_SUPPORT, the size of address is 1. */ 57589857Sobrien jump_n, 57689857Sobrien 57789857Sobrien /* Set the following two-byte relative address to the 57889857Sobrien subsequent two-byte number. The address *includes* the two 57989857Sobrien bytes of number. */ 58089857Sobrien /* ifdef MBS_SUPPORT, the size of address is 1. */ 58189857Sobrien set_number_at, 58289857Sobrien 58389857Sobrien wordchar, /* Matches any word-constituent character. */ 58489857Sobrien notwordchar, /* Matches any char that is not a word-constituent. */ 58589857Sobrien 58689857Sobrien wordbeg, /* Succeeds if at word beginning. */ 58789857Sobrien wordend, /* Succeeds if at word end. */ 58889857Sobrien 58989857Sobrien wordbound, /* Succeeds if at a word boundary. */ 59089857Sobrien notwordbound /* Succeeds if not at a word boundary. */ 59189857Sobrien 59289857Sobrien# ifdef emacs 59389857Sobrien ,before_dot, /* Succeeds if before point. */ 59489857Sobrien at_dot, /* Succeeds if at point. */ 59589857Sobrien after_dot, /* Succeeds if after point. */ 59689857Sobrien 59789857Sobrien /* Matches any character whose syntax is specified. Followed by 59889857Sobrien a byte which contains a syntax code, e.g., Sword. */ 59989857Sobrien syntaxspec, 60089857Sobrien 60189857Sobrien /* Matches any character whose syntax is not that specified. */ 60289857Sobrien notsyntaxspec 60389857Sobrien# endif /* emacs */ 60489857Sobrien} re_opcode_t; 60589857Sobrien#endif /* not INSIDE_RECURSION */ 60689857Sobrien 60789857Sobrien 60889857Sobrien#ifdef BYTE 60989857Sobrien# define CHAR_T char 61089857Sobrien# define UCHAR_T unsigned char 61189857Sobrien# define COMPILED_BUFFER_VAR bufp->buffer 61289857Sobrien# define OFFSET_ADDRESS_SIZE 2 613218822Sdim# define PREFIX(name) byte_##name 61489857Sobrien# define ARG_PREFIX(name) name 61589857Sobrien# define PUT_CHAR(c) putchar (c) 61689857Sobrien#else 61789857Sobrien# ifdef WCHAR 61889857Sobrien# define CHAR_T wchar_t 61989857Sobrien# define UCHAR_T wchar_t 62089857Sobrien# define COMPILED_BUFFER_VAR wc_buffer 62189857Sobrien# define OFFSET_ADDRESS_SIZE 1 /* the size which STORE_NUMBER macro use */ 62289857Sobrien# define CHAR_CLASS_SIZE ((__alignof__(wctype_t)+sizeof(wctype_t))/sizeof(CHAR_T)+1) 623218822Sdim# define PREFIX(name) wcs_##name 624218822Sdim# define ARG_PREFIX(name) c##name 62589857Sobrien/* Should we use wide stream?? */ 62689857Sobrien# define PUT_CHAR(c) printf ("%C", c); 62789857Sobrien# define TRUE 1 62889857Sobrien# define FALSE 0 62989857Sobrien# else 63089857Sobrien# ifdef MBS_SUPPORT 63189857Sobrien# define WCHAR 63289857Sobrien# define INSIDE_RECURSION 63389857Sobrien# include "regex.c" 63489857Sobrien# undef INSIDE_RECURSION 63589857Sobrien# endif 63689857Sobrien# define BYTE 63789857Sobrien# define INSIDE_RECURSION 63889857Sobrien# include "regex.c" 63989857Sobrien# undef INSIDE_RECURSION 64089857Sobrien# endif 64189857Sobrien#endif 64289857Sobrien 64389857Sobrien#ifdef INSIDE_RECURSION 64489857Sobrien/* Common operations on the compiled pattern. */ 64589857Sobrien 64689857Sobrien/* Store NUMBER in two contiguous bytes starting at DESTINATION. */ 64789857Sobrien/* ifdef MBS_SUPPORT, we store NUMBER in 1 element. */ 64889857Sobrien 64989857Sobrien# ifdef WCHAR 65089857Sobrien# define STORE_NUMBER(destination, number) \ 65189857Sobrien do { \ 65289857Sobrien *(destination) = (UCHAR_T)(number); \ 65389857Sobrien } while (0) 65489857Sobrien# else /* BYTE */ 65589857Sobrien# define STORE_NUMBER(destination, number) \ 65689857Sobrien do { \ 65789857Sobrien (destination)[0] = (number) & 0377; \ 65889857Sobrien (destination)[1] = (number) >> 8; \ 65989857Sobrien } while (0) 66089857Sobrien# endif /* WCHAR */ 66189857Sobrien 66289857Sobrien/* Same as STORE_NUMBER, except increment DESTINATION to 66389857Sobrien the byte after where the number is stored. Therefore, DESTINATION 66489857Sobrien must be an lvalue. */ 66589857Sobrien/* ifdef MBS_SUPPORT, we store NUMBER in 1 element. */ 66689857Sobrien 66789857Sobrien# define STORE_NUMBER_AND_INCR(destination, number) \ 66889857Sobrien do { \ 66989857Sobrien STORE_NUMBER (destination, number); \ 67089857Sobrien (destination) += OFFSET_ADDRESS_SIZE; \ 67189857Sobrien } while (0) 67289857Sobrien 67389857Sobrien/* Put into DESTINATION a number stored in two contiguous bytes starting 67489857Sobrien at SOURCE. */ 67589857Sobrien/* ifdef MBS_SUPPORT, we store NUMBER in 1 element. */ 67689857Sobrien 67789857Sobrien# ifdef WCHAR 67889857Sobrien# define EXTRACT_NUMBER(destination, source) \ 67989857Sobrien do { \ 68089857Sobrien (destination) = *(source); \ 68189857Sobrien } while (0) 68289857Sobrien# else /* BYTE */ 68389857Sobrien# define EXTRACT_NUMBER(destination, source) \ 68489857Sobrien do { \ 68589857Sobrien (destination) = *(source) & 0377; \ 68689857Sobrien (destination) += SIGN_EXTEND_CHAR (*((source) + 1)) << 8; \ 68789857Sobrien } while (0) 68889857Sobrien# endif 68989857Sobrien 69089857Sobrien# ifdef DEBUG 691218822Sdimstatic void PREFIX(extract_number) (int *dest, UCHAR_T *source); 69289857Sobrienstatic void 693218822SdimPREFIX(extract_number) (int *dest, UCHAR_T *source) 69489857Sobrien{ 69589857Sobrien# ifdef WCHAR 69689857Sobrien *dest = *source; 69789857Sobrien# else /* BYTE */ 69889857Sobrien int temp = SIGN_EXTEND_CHAR (*(source + 1)); 69989857Sobrien *dest = *source & 0377; 70089857Sobrien *dest += temp << 8; 70189857Sobrien# endif 70289857Sobrien} 70389857Sobrien 70489857Sobrien# ifndef EXTRACT_MACROS /* To debug the macros. */ 70589857Sobrien# undef EXTRACT_NUMBER 70689857Sobrien# define EXTRACT_NUMBER(dest, src) PREFIX(extract_number) (&dest, src) 70789857Sobrien# endif /* not EXTRACT_MACROS */ 70889857Sobrien 70989857Sobrien# endif /* DEBUG */ 71089857Sobrien 71189857Sobrien/* Same as EXTRACT_NUMBER, except increment SOURCE to after the number. 71289857Sobrien SOURCE must be an lvalue. */ 71389857Sobrien 71489857Sobrien# define EXTRACT_NUMBER_AND_INCR(destination, source) \ 71589857Sobrien do { \ 71689857Sobrien EXTRACT_NUMBER (destination, source); \ 71789857Sobrien (source) += OFFSET_ADDRESS_SIZE; \ 71889857Sobrien } while (0) 71989857Sobrien 72089857Sobrien# ifdef DEBUG 721218822Sdimstatic void PREFIX(extract_number_and_incr) (int *destination, 722218822Sdim UCHAR_T **source); 72389857Sobrienstatic void 724218822SdimPREFIX(extract_number_and_incr) (int *destination, UCHAR_T **source) 72589857Sobrien{ 72689857Sobrien PREFIX(extract_number) (destination, *source); 72789857Sobrien *source += OFFSET_ADDRESS_SIZE; 72889857Sobrien} 72989857Sobrien 73089857Sobrien# ifndef EXTRACT_MACROS 73189857Sobrien# undef EXTRACT_NUMBER_AND_INCR 73289857Sobrien# define EXTRACT_NUMBER_AND_INCR(dest, src) \ 73389857Sobrien PREFIX(extract_number_and_incr) (&dest, &src) 73489857Sobrien# endif /* not EXTRACT_MACROS */ 73589857Sobrien 73689857Sobrien# endif /* DEBUG */ 73789857Sobrien 73889857Sobrien 73989857Sobrien 74089857Sobrien/* If DEBUG is defined, Regex prints many voluminous messages about what 74189857Sobrien it is doing (if the variable `debug' is nonzero). If linked with the 74289857Sobrien main program in `iregex.c', you can enter patterns and strings 74389857Sobrien interactively. And if linked with the main program in `main.c' and 74489857Sobrien the other test files, you can run the already-written tests. */ 74589857Sobrien 74689857Sobrien# ifdef DEBUG 74789857Sobrien 74889857Sobrien# ifndef DEFINED_ONCE 74989857Sobrien 75089857Sobrien/* We use standard I/O for debugging. */ 75189857Sobrien# include <stdio.h> 75289857Sobrien 75389857Sobrien/* It is useful to test things that ``must'' be true when debugging. */ 75489857Sobrien# include <assert.h> 75589857Sobrien 75689857Sobrienstatic int debug; 75789857Sobrien 75889857Sobrien# define DEBUG_STATEMENT(e) e 75989857Sobrien# define DEBUG_PRINT1(x) if (debug) printf (x) 76089857Sobrien# define DEBUG_PRINT2(x1, x2) if (debug) printf (x1, x2) 76189857Sobrien# define DEBUG_PRINT3(x1, x2, x3) if (debug) printf (x1, x2, x3) 76289857Sobrien# define DEBUG_PRINT4(x1, x2, x3, x4) if (debug) printf (x1, x2, x3, x4) 76389857Sobrien# endif /* not DEFINED_ONCE */ 76489857Sobrien 76589857Sobrien# define DEBUG_PRINT_COMPILED_PATTERN(p, s, e) \ 76689857Sobrien if (debug) PREFIX(print_partial_compiled_pattern) (s, e) 76789857Sobrien# define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2) \ 76889857Sobrien if (debug) PREFIX(print_double_string) (w, s1, sz1, s2, sz2) 76989857Sobrien 77089857Sobrien 77189857Sobrien/* Print the fastmap in human-readable form. */ 77289857Sobrien 77389857Sobrien# ifndef DEFINED_ONCE 77489857Sobrienvoid 775218822Sdimprint_fastmap (char *fastmap) 77689857Sobrien{ 77789857Sobrien unsigned was_a_range = 0; 77889857Sobrien unsigned i = 0; 77989857Sobrien 78089857Sobrien while (i < (1 << BYTEWIDTH)) 78189857Sobrien { 78289857Sobrien if (fastmap[i++]) 78389857Sobrien { 78489857Sobrien was_a_range = 0; 78589857Sobrien putchar (i - 1); 78689857Sobrien while (i < (1 << BYTEWIDTH) && fastmap[i]) 78789857Sobrien { 78889857Sobrien was_a_range = 1; 78989857Sobrien i++; 79089857Sobrien } 79189857Sobrien if (was_a_range) 79289857Sobrien { 79389857Sobrien printf ("-"); 79489857Sobrien putchar (i - 1); 79589857Sobrien } 79689857Sobrien } 79789857Sobrien } 79889857Sobrien putchar ('\n'); 79989857Sobrien} 80089857Sobrien# endif /* not DEFINED_ONCE */ 80189857Sobrien 80289857Sobrien 80389857Sobrien/* Print a compiled pattern string in human-readable form, starting at 80489857Sobrien the START pointer into it and ending just before the pointer END. */ 80589857Sobrien 80689857Sobrienvoid 807218822SdimPREFIX(print_partial_compiled_pattern) (UCHAR_T *start, UCHAR_T *end) 80889857Sobrien{ 80989857Sobrien int mcnt, mcnt2; 81089857Sobrien UCHAR_T *p1; 81189857Sobrien UCHAR_T *p = start; 81289857Sobrien UCHAR_T *pend = end; 81389857Sobrien 81489857Sobrien if (start == NULL) 81589857Sobrien { 81689857Sobrien printf ("(null)\n"); 81789857Sobrien return; 81889857Sobrien } 81989857Sobrien 82089857Sobrien /* Loop over pattern commands. */ 82189857Sobrien while (p < pend) 82289857Sobrien { 82389857Sobrien# ifdef _LIBC 82489857Sobrien printf ("%td:\t", p - start); 82589857Sobrien# else 82689857Sobrien printf ("%ld:\t", (long int) (p - start)); 82789857Sobrien# endif 82889857Sobrien 82989857Sobrien switch ((re_opcode_t) *p++) 83089857Sobrien { 83189857Sobrien case no_op: 83289857Sobrien printf ("/no_op"); 83389857Sobrien break; 83489857Sobrien 83589857Sobrien case exactn: 83689857Sobrien mcnt = *p++; 83789857Sobrien printf ("/exactn/%d", mcnt); 83889857Sobrien do 83989857Sobrien { 84089857Sobrien putchar ('/'); 84189857Sobrien PUT_CHAR (*p++); 84289857Sobrien } 84389857Sobrien while (--mcnt); 84489857Sobrien break; 84589857Sobrien 84689857Sobrien# ifdef MBS_SUPPORT 84789857Sobrien case exactn_bin: 84889857Sobrien mcnt = *p++; 84989857Sobrien printf ("/exactn_bin/%d", mcnt); 85089857Sobrien do 85189857Sobrien { 85289857Sobrien printf("/%lx", (long int) *p++); 85389857Sobrien } 85489857Sobrien while (--mcnt); 85589857Sobrien break; 85689857Sobrien# endif /* MBS_SUPPORT */ 85789857Sobrien 85889857Sobrien case start_memory: 85989857Sobrien mcnt = *p++; 86089857Sobrien printf ("/start_memory/%d/%ld", mcnt, (long int) *p++); 86189857Sobrien break; 86289857Sobrien 86389857Sobrien case stop_memory: 86489857Sobrien mcnt = *p++; 86589857Sobrien printf ("/stop_memory/%d/%ld", mcnt, (long int) *p++); 86689857Sobrien break; 86789857Sobrien 86889857Sobrien case duplicate: 86989857Sobrien printf ("/duplicate/%ld", (long int) *p++); 87089857Sobrien break; 87189857Sobrien 87289857Sobrien case anychar: 87389857Sobrien printf ("/anychar"); 87489857Sobrien break; 87589857Sobrien 87689857Sobrien case charset: 87789857Sobrien case charset_not: 87889857Sobrien { 87989857Sobrien# ifdef WCHAR 88089857Sobrien int i, length; 88189857Sobrien wchar_t *workp = p; 88289857Sobrien printf ("/charset [%s", 88389857Sobrien (re_opcode_t) *(workp - 1) == charset_not ? "^" : ""); 88489857Sobrien p += 5; 88589857Sobrien length = *workp++; /* the length of char_classes */ 88689857Sobrien for (i=0 ; i<length ; i++) 88789857Sobrien printf("[:%lx:]", (long int) *p++); 88889857Sobrien length = *workp++; /* the length of collating_symbol */ 88989857Sobrien for (i=0 ; i<length ;) 89089857Sobrien { 89189857Sobrien printf("[."); 89289857Sobrien while(*p != 0) 89389857Sobrien PUT_CHAR((i++,*p++)); 89489857Sobrien i++,p++; 89589857Sobrien printf(".]"); 89689857Sobrien } 89789857Sobrien length = *workp++; /* the length of equivalence_class */ 89889857Sobrien for (i=0 ; i<length ;) 89989857Sobrien { 90089857Sobrien printf("[="); 90189857Sobrien while(*p != 0) 90289857Sobrien PUT_CHAR((i++,*p++)); 90389857Sobrien i++,p++; 90489857Sobrien printf("=]"); 90589857Sobrien } 90689857Sobrien length = *workp++; /* the length of char_range */ 90789857Sobrien for (i=0 ; i<length ; i++) 90889857Sobrien { 90989857Sobrien wchar_t range_start = *p++; 91089857Sobrien wchar_t range_end = *p++; 91189857Sobrien printf("%C-%C", range_start, range_end); 91289857Sobrien } 91389857Sobrien length = *workp++; /* the length of char */ 91489857Sobrien for (i=0 ; i<length ; i++) 91589857Sobrien printf("%C", *p++); 91689857Sobrien putchar (']'); 91789857Sobrien# else 91889857Sobrien register int c, last = -100; 91989857Sobrien register int in_range = 0; 92089857Sobrien 92189857Sobrien printf ("/charset [%s", 92289857Sobrien (re_opcode_t) *(p - 1) == charset_not ? "^" : ""); 92389857Sobrien 92489857Sobrien assert (p + *p < pend); 92589857Sobrien 92689857Sobrien for (c = 0; c < 256; c++) 92789857Sobrien if (c / 8 < *p 92889857Sobrien && (p[1 + (c/8)] & (1 << (c % 8)))) 92989857Sobrien { 93089857Sobrien /* Are we starting a range? */ 93189857Sobrien if (last + 1 == c && ! in_range) 93289857Sobrien { 93389857Sobrien putchar ('-'); 93489857Sobrien in_range = 1; 93589857Sobrien } 93689857Sobrien /* Have we broken a range? */ 93789857Sobrien else if (last + 1 != c && in_range) 93889857Sobrien { 93989857Sobrien putchar (last); 94089857Sobrien in_range = 0; 94189857Sobrien } 94289857Sobrien 94389857Sobrien if (! in_range) 94489857Sobrien putchar (c); 94589857Sobrien 94689857Sobrien last = c; 94789857Sobrien } 94889857Sobrien 94989857Sobrien if (in_range) 95089857Sobrien putchar (last); 95189857Sobrien 95289857Sobrien putchar (']'); 95389857Sobrien 95489857Sobrien p += 1 + *p; 95589857Sobrien# endif /* WCHAR */ 95689857Sobrien } 95789857Sobrien break; 95889857Sobrien 95989857Sobrien case begline: 96089857Sobrien printf ("/begline"); 96189857Sobrien break; 96289857Sobrien 96389857Sobrien case endline: 96489857Sobrien printf ("/endline"); 96589857Sobrien break; 96689857Sobrien 96789857Sobrien case on_failure_jump: 96889857Sobrien PREFIX(extract_number_and_incr) (&mcnt, &p); 96989857Sobrien# ifdef _LIBC 97089857Sobrien printf ("/on_failure_jump to %td", p + mcnt - start); 97189857Sobrien# else 97289857Sobrien printf ("/on_failure_jump to %ld", (long int) (p + mcnt - start)); 97389857Sobrien# endif 97489857Sobrien break; 97589857Sobrien 97689857Sobrien case on_failure_keep_string_jump: 97789857Sobrien PREFIX(extract_number_and_incr) (&mcnt, &p); 97889857Sobrien# ifdef _LIBC 97989857Sobrien printf ("/on_failure_keep_string_jump to %td", p + mcnt - start); 98089857Sobrien# else 98189857Sobrien printf ("/on_failure_keep_string_jump to %ld", 98289857Sobrien (long int) (p + mcnt - start)); 98389857Sobrien# endif 98489857Sobrien break; 98589857Sobrien 98689857Sobrien case dummy_failure_jump: 98789857Sobrien PREFIX(extract_number_and_incr) (&mcnt, &p); 98889857Sobrien# ifdef _LIBC 98989857Sobrien printf ("/dummy_failure_jump to %td", p + mcnt - start); 99089857Sobrien# else 99189857Sobrien printf ("/dummy_failure_jump to %ld", (long int) (p + mcnt - start)); 99289857Sobrien# endif 99389857Sobrien break; 99489857Sobrien 99589857Sobrien case push_dummy_failure: 99689857Sobrien printf ("/push_dummy_failure"); 99789857Sobrien break; 99889857Sobrien 99989857Sobrien case maybe_pop_jump: 100089857Sobrien PREFIX(extract_number_and_incr) (&mcnt, &p); 100189857Sobrien# ifdef _LIBC 100289857Sobrien printf ("/maybe_pop_jump to %td", p + mcnt - start); 100389857Sobrien# else 100489857Sobrien printf ("/maybe_pop_jump to %ld", (long int) (p + mcnt - start)); 100589857Sobrien# endif 100689857Sobrien break; 100789857Sobrien 100889857Sobrien case pop_failure_jump: 100989857Sobrien PREFIX(extract_number_and_incr) (&mcnt, &p); 101089857Sobrien# ifdef _LIBC 101189857Sobrien printf ("/pop_failure_jump to %td", p + mcnt - start); 101289857Sobrien# else 101389857Sobrien printf ("/pop_failure_jump to %ld", (long int) (p + mcnt - start)); 101489857Sobrien# endif 101589857Sobrien break; 101689857Sobrien 101789857Sobrien case jump_past_alt: 101889857Sobrien PREFIX(extract_number_and_incr) (&mcnt, &p); 101989857Sobrien# ifdef _LIBC 102089857Sobrien printf ("/jump_past_alt to %td", p + mcnt - start); 102189857Sobrien# else 102289857Sobrien printf ("/jump_past_alt to %ld", (long int) (p + mcnt - start)); 102389857Sobrien# endif 102489857Sobrien break; 102589857Sobrien 102689857Sobrien case jump: 102789857Sobrien PREFIX(extract_number_and_incr) (&mcnt, &p); 102889857Sobrien# ifdef _LIBC 102989857Sobrien printf ("/jump to %td", p + mcnt - start); 103089857Sobrien# else 103189857Sobrien printf ("/jump to %ld", (long int) (p + mcnt - start)); 103289857Sobrien# endif 103389857Sobrien break; 103489857Sobrien 103589857Sobrien case succeed_n: 103689857Sobrien PREFIX(extract_number_and_incr) (&mcnt, &p); 103789857Sobrien p1 = p + mcnt; 103889857Sobrien PREFIX(extract_number_and_incr) (&mcnt2, &p); 103989857Sobrien# ifdef _LIBC 104089857Sobrien printf ("/succeed_n to %td, %d times", p1 - start, mcnt2); 104189857Sobrien# else 104289857Sobrien printf ("/succeed_n to %ld, %d times", 104389857Sobrien (long int) (p1 - start), mcnt2); 104489857Sobrien# endif 104589857Sobrien break; 104689857Sobrien 104789857Sobrien case jump_n: 104889857Sobrien PREFIX(extract_number_and_incr) (&mcnt, &p); 104989857Sobrien p1 = p + mcnt; 105089857Sobrien PREFIX(extract_number_and_incr) (&mcnt2, &p); 105189857Sobrien printf ("/jump_n to %d, %d times", p1 - start, mcnt2); 105289857Sobrien break; 105389857Sobrien 105489857Sobrien case set_number_at: 105589857Sobrien PREFIX(extract_number_and_incr) (&mcnt, &p); 105689857Sobrien p1 = p + mcnt; 105789857Sobrien PREFIX(extract_number_and_incr) (&mcnt2, &p); 105889857Sobrien# ifdef _LIBC 105989857Sobrien printf ("/set_number_at location %td to %d", p1 - start, mcnt2); 106089857Sobrien# else 106189857Sobrien printf ("/set_number_at location %ld to %d", 106289857Sobrien (long int) (p1 - start), mcnt2); 106389857Sobrien# endif 106489857Sobrien break; 106589857Sobrien 106689857Sobrien case wordbound: 106789857Sobrien printf ("/wordbound"); 106889857Sobrien break; 106989857Sobrien 107089857Sobrien case notwordbound: 107189857Sobrien printf ("/notwordbound"); 107289857Sobrien break; 107389857Sobrien 107489857Sobrien case wordbeg: 107589857Sobrien printf ("/wordbeg"); 107689857Sobrien break; 107789857Sobrien 107889857Sobrien case wordend: 107989857Sobrien printf ("/wordend"); 108089857Sobrien break; 108189857Sobrien 108289857Sobrien# ifdef emacs 108389857Sobrien case before_dot: 108489857Sobrien printf ("/before_dot"); 108589857Sobrien break; 108689857Sobrien 108789857Sobrien case at_dot: 108889857Sobrien printf ("/at_dot"); 108989857Sobrien break; 109089857Sobrien 109189857Sobrien case after_dot: 109289857Sobrien printf ("/after_dot"); 109389857Sobrien break; 109489857Sobrien 109589857Sobrien case syntaxspec: 109689857Sobrien printf ("/syntaxspec"); 109789857Sobrien mcnt = *p++; 109889857Sobrien printf ("/%d", mcnt); 109989857Sobrien break; 110089857Sobrien 110189857Sobrien case notsyntaxspec: 110289857Sobrien printf ("/notsyntaxspec"); 110389857Sobrien mcnt = *p++; 110489857Sobrien printf ("/%d", mcnt); 110589857Sobrien break; 110689857Sobrien# endif /* emacs */ 110789857Sobrien 110889857Sobrien case wordchar: 110989857Sobrien printf ("/wordchar"); 111089857Sobrien break; 111189857Sobrien 111289857Sobrien case notwordchar: 111389857Sobrien printf ("/notwordchar"); 111489857Sobrien break; 111589857Sobrien 111689857Sobrien case begbuf: 111789857Sobrien printf ("/begbuf"); 111889857Sobrien break; 111989857Sobrien 112089857Sobrien case endbuf: 112189857Sobrien printf ("/endbuf"); 112289857Sobrien break; 112389857Sobrien 112489857Sobrien default: 112589857Sobrien printf ("?%ld", (long int) *(p-1)); 112689857Sobrien } 112789857Sobrien 112889857Sobrien putchar ('\n'); 112989857Sobrien } 113089857Sobrien 113189857Sobrien# ifdef _LIBC 113289857Sobrien printf ("%td:\tend of pattern.\n", p - start); 113389857Sobrien# else 113489857Sobrien printf ("%ld:\tend of pattern.\n", (long int) (p - start)); 113589857Sobrien# endif 113689857Sobrien} 113789857Sobrien 113889857Sobrien 113989857Sobrienvoid 1140218822SdimPREFIX(print_compiled_pattern) (struct re_pattern_buffer *bufp) 114189857Sobrien{ 114289857Sobrien UCHAR_T *buffer = (UCHAR_T*) bufp->buffer; 114389857Sobrien 114489857Sobrien PREFIX(print_partial_compiled_pattern) (buffer, buffer 114589857Sobrien + bufp->used / sizeof(UCHAR_T)); 114689857Sobrien printf ("%ld bytes used/%ld bytes allocated.\n", 114789857Sobrien bufp->used, bufp->allocated); 114889857Sobrien 114989857Sobrien if (bufp->fastmap_accurate && bufp->fastmap) 115089857Sobrien { 115189857Sobrien printf ("fastmap: "); 115289857Sobrien print_fastmap (bufp->fastmap); 115389857Sobrien } 115489857Sobrien 115589857Sobrien# ifdef _LIBC 115689857Sobrien printf ("re_nsub: %Zd\t", bufp->re_nsub); 115789857Sobrien# else 115889857Sobrien printf ("re_nsub: %ld\t", (long int) bufp->re_nsub); 115989857Sobrien# endif 116089857Sobrien printf ("regs_alloc: %d\t", bufp->regs_allocated); 116189857Sobrien printf ("can_be_null: %d\t", bufp->can_be_null); 116289857Sobrien printf ("newline_anchor: %d\n", bufp->newline_anchor); 116389857Sobrien printf ("no_sub: %d\t", bufp->no_sub); 116489857Sobrien printf ("not_bol: %d\t", bufp->not_bol); 116589857Sobrien printf ("not_eol: %d\t", bufp->not_eol); 116689857Sobrien printf ("syntax: %lx\n", bufp->syntax); 116789857Sobrien /* Perhaps we should print the translate table? */ 116889857Sobrien} 116989857Sobrien 117089857Sobrien 117189857Sobrienvoid 1172218822SdimPREFIX(print_double_string) (const CHAR_T *where, const CHAR_T *string1, 1173218822Sdim int size1, const CHAR_T *string2, int size2) 117489857Sobrien{ 117589857Sobrien int this_char; 117689857Sobrien 117789857Sobrien if (where == NULL) 117889857Sobrien printf ("(null)"); 117989857Sobrien else 118089857Sobrien { 118189857Sobrien int cnt; 118289857Sobrien 118389857Sobrien if (FIRST_STRING_P (where)) 118489857Sobrien { 118589857Sobrien for (this_char = where - string1; this_char < size1; this_char++) 118689857Sobrien PUT_CHAR (string1[this_char]); 118789857Sobrien 118889857Sobrien where = string2; 118989857Sobrien } 119089857Sobrien 119189857Sobrien cnt = 0; 119289857Sobrien for (this_char = where - string2; this_char < size2; this_char++) 119389857Sobrien { 119489857Sobrien PUT_CHAR (string2[this_char]); 119589857Sobrien if (++cnt > 100) 119689857Sobrien { 119789857Sobrien fputs ("...", stdout); 119889857Sobrien break; 119989857Sobrien } 120089857Sobrien } 120189857Sobrien } 120289857Sobrien} 120389857Sobrien 120489857Sobrien# ifndef DEFINED_ONCE 120589857Sobrienvoid 1206218822Sdimprintchar (int c) 120789857Sobrien{ 120889857Sobrien putc (c, stderr); 120989857Sobrien} 121089857Sobrien# endif 121189857Sobrien 121289857Sobrien# else /* not DEBUG */ 121389857Sobrien 121489857Sobrien# ifndef DEFINED_ONCE 121589857Sobrien# undef assert 121689857Sobrien# define assert(e) 121789857Sobrien 121889857Sobrien# define DEBUG_STATEMENT(e) 121989857Sobrien# define DEBUG_PRINT1(x) 122089857Sobrien# define DEBUG_PRINT2(x1, x2) 122189857Sobrien# define DEBUG_PRINT3(x1, x2, x3) 122289857Sobrien# define DEBUG_PRINT4(x1, x2, x3, x4) 122389857Sobrien# endif /* not DEFINED_ONCE */ 122489857Sobrien# define DEBUG_PRINT_COMPILED_PATTERN(p, s, e) 122589857Sobrien# define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2) 122689857Sobrien 122789857Sobrien# endif /* not DEBUG */ 122889857Sobrien 122989857Sobrien 123089857Sobrien 123189857Sobrien# ifdef WCHAR 123289857Sobrien/* This convert a multibyte string to a wide character string. 123389857Sobrien And write their correspondances to offset_buffer(see below) 123489857Sobrien and write whether each wchar_t is binary data to is_binary. 123589857Sobrien This assume invalid multibyte sequences as binary data. 123689857Sobrien We assume offset_buffer and is_binary is already allocated 123789857Sobrien enough space. */ 123889857Sobrien 123989857Sobrienstatic size_t convert_mbs_to_wcs (CHAR_T *dest, const unsigned char* src, 124089857Sobrien size_t len, int *offset_buffer, 124189857Sobrien char *is_binary); 124289857Sobrienstatic size_t 1243218822Sdimconvert_mbs_to_wcs (CHAR_T *dest, const unsigned char*src, size_t len, 1244218822Sdim int *offset_buffer, char *is_binary) 124589857Sobrien /* It hold correspondances between src(char string) and 124689857Sobrien dest(wchar_t string) for optimization. 124789857Sobrien e.g. src = "xxxyzz" 124889857Sobrien dest = {'X', 'Y', 'Z'} 124989857Sobrien (each "xxx", "y" and "zz" represent one multibyte character 125089857Sobrien corresponding to 'X', 'Y' and 'Z'.) 125189857Sobrien offset_buffer = {0, 0+3("xxx"), 0+3+1("y"), 0+3+1+2("zz")} 125289857Sobrien = {0, 3, 4, 6} 125389857Sobrien */ 125489857Sobrien{ 125589857Sobrien wchar_t *pdest = dest; 125689857Sobrien const unsigned char *psrc = src; 125789857Sobrien size_t wc_count = 0; 125889857Sobrien 125989857Sobrien mbstate_t mbs; 126089857Sobrien int i, consumed; 126189857Sobrien size_t mb_remain = len; 126289857Sobrien size_t mb_count = 0; 126389857Sobrien 126489857Sobrien /* Initialize the conversion state. */ 126589857Sobrien memset (&mbs, 0, sizeof (mbstate_t)); 126689857Sobrien 126789857Sobrien offset_buffer[0] = 0; 126889857Sobrien for( ; mb_remain > 0 ; ++wc_count, ++pdest, mb_remain -= consumed, 126989857Sobrien psrc += consumed) 127089857Sobrien { 127189857Sobrien#ifdef _LIBC 127289857Sobrien consumed = __mbrtowc (pdest, psrc, mb_remain, &mbs); 127389857Sobrien#else 127489857Sobrien consumed = mbrtowc (pdest, psrc, mb_remain, &mbs); 127589857Sobrien#endif 127689857Sobrien 127789857Sobrien if (consumed <= 0) 127889857Sobrien /* failed to convert. maybe src contains binary data. 127989857Sobrien So we consume 1 byte manualy. */ 128089857Sobrien { 128189857Sobrien *pdest = *psrc; 128289857Sobrien consumed = 1; 128389857Sobrien is_binary[wc_count] = TRUE; 128489857Sobrien } 128589857Sobrien else 128689857Sobrien is_binary[wc_count] = FALSE; 128789857Sobrien /* In sjis encoding, we use yen sign as escape character in 128889857Sobrien place of reverse solidus. So we convert 0x5c(yen sign in 128989857Sobrien sjis) to not 0xa5(yen sign in UCS2) but 0x5c(reverse 129089857Sobrien solidus in UCS2). */ 129189857Sobrien if (consumed == 1 && (int) *psrc == 0x5c && (int) *pdest == 0xa5) 129289857Sobrien *pdest = (wchar_t) *psrc; 129389857Sobrien 129489857Sobrien offset_buffer[wc_count + 1] = mb_count += consumed; 129589857Sobrien } 129689857Sobrien 129789857Sobrien /* Fill remain of the buffer with sentinel. */ 129889857Sobrien for (i = wc_count + 1 ; i <= len ; i++) 129989857Sobrien offset_buffer[i] = mb_count + 1; 130089857Sobrien 130189857Sobrien return wc_count; 130289857Sobrien} 130389857Sobrien 130489857Sobrien# endif /* WCHAR */ 130589857Sobrien 130689857Sobrien#else /* not INSIDE_RECURSION */ 130789857Sobrien 130889857Sobrien/* Set by `re_set_syntax' to the current regexp syntax to recognize. Can 130989857Sobrien also be assigned to arbitrarily: each pattern buffer stores its own 131089857Sobrien syntax, so it can be changed between regex compilations. */ 131189857Sobrien/* This has no initializer because initialized variables in Emacs 131289857Sobrien become read-only after dumping. */ 131389857Sobrienreg_syntax_t re_syntax_options; 131489857Sobrien 131589857Sobrien 131689857Sobrien/* Specify the precise syntax of regexps for compilation. This provides 131789857Sobrien for compatibility for various utilities which historically have 131889857Sobrien different, incompatible syntaxes. 131989857Sobrien 132089857Sobrien The argument SYNTAX is a bit mask comprised of the various bits 132189857Sobrien defined in regex.h. We return the old syntax. */ 132289857Sobrien 132389857Sobrienreg_syntax_t 1324218822Sdimre_set_syntax (reg_syntax_t syntax) 132589857Sobrien{ 132689857Sobrien reg_syntax_t ret = re_syntax_options; 132789857Sobrien 132889857Sobrien re_syntax_options = syntax; 132989857Sobrien# ifdef DEBUG 133089857Sobrien if (syntax & RE_DEBUG) 133189857Sobrien debug = 1; 133289857Sobrien else if (debug) /* was on but now is not */ 133389857Sobrien debug = 0; 133489857Sobrien# endif /* DEBUG */ 133589857Sobrien return ret; 133689857Sobrien} 133789857Sobrien# ifdef _LIBC 133889857Sobrienweak_alias (__re_set_syntax, re_set_syntax) 133989857Sobrien# endif 134089857Sobrien 134189857Sobrien/* This table gives an error message for each of the error codes listed 134289857Sobrien in regex.h. Obviously the order here has to be same as there. 134389857Sobrien POSIX doesn't require that we do anything for REG_NOERROR, 134489857Sobrien but why not be nice? */ 134589857Sobrien 1346130561Sobrienstatic const char *re_error_msgid[] = 134789857Sobrien { 1348130561Sobrien gettext_noop ("Success"), /* REG_NOERROR */ 1349130561Sobrien gettext_noop ("No match"), /* REG_NOMATCH */ 1350130561Sobrien gettext_noop ("Invalid regular expression"), /* REG_BADPAT */ 1351130561Sobrien gettext_noop ("Invalid collation character"), /* REG_ECOLLATE */ 1352130561Sobrien gettext_noop ("Invalid character class name"), /* REG_ECTYPE */ 1353130561Sobrien gettext_noop ("Trailing backslash"), /* REG_EESCAPE */ 1354130561Sobrien gettext_noop ("Invalid back reference"), /* REG_ESUBREG */ 1355130561Sobrien gettext_noop ("Unmatched [ or [^"), /* REG_EBRACK */ 1356130561Sobrien gettext_noop ("Unmatched ( or \\("), /* REG_EPAREN */ 1357130561Sobrien gettext_noop ("Unmatched \\{"), /* REG_EBRACE */ 1358130561Sobrien gettext_noop ("Invalid content of \\{\\}"), /* REG_BADBR */ 1359130561Sobrien gettext_noop ("Invalid range end"), /* REG_ERANGE */ 1360130561Sobrien gettext_noop ("Memory exhausted"), /* REG_ESPACE */ 1361130561Sobrien gettext_noop ("Invalid preceding regular expression"), /* REG_BADRPT */ 1362130561Sobrien gettext_noop ("Premature end of regular expression"), /* REG_EEND */ 1363130561Sobrien gettext_noop ("Regular expression too big"), /* REG_ESIZE */ 136489857Sobrien gettext_noop ("Unmatched ) or \\)") /* REG_ERPAREN */ 136589857Sobrien }; 136689857Sobrien 136789857Sobrien#endif /* INSIDE_RECURSION */ 136889857Sobrien 136989857Sobrien#ifndef DEFINED_ONCE 137089857Sobrien/* Avoiding alloca during matching, to placate r_alloc. */ 137189857Sobrien 137289857Sobrien/* Define MATCH_MAY_ALLOCATE unless we need to make sure that the 137389857Sobrien searching and matching functions should not call alloca. On some 137489857Sobrien systems, alloca is implemented in terms of malloc, and if we're 137589857Sobrien using the relocating allocator routines, then malloc could cause a 137689857Sobrien relocation, which might (if the strings being searched are in the 137789857Sobrien ralloc heap) shift the data out from underneath the regexp 137889857Sobrien routines. 137989857Sobrien 138089857Sobrien Here's another reason to avoid allocation: Emacs 138189857Sobrien processes input from X in a signal handler; processing X input may 138289857Sobrien call malloc; if input arrives while a matching routine is calling 138389857Sobrien malloc, then we're scrod. But Emacs can't just block input while 138489857Sobrien calling matching routines; then we don't notice interrupts when 138589857Sobrien they come in. So, Emacs blocks input around all regexp calls 138689857Sobrien except the matching calls, which it leaves unprotected, in the 138789857Sobrien faith that they will not malloc. */ 138889857Sobrien 138989857Sobrien/* Normally, this is fine. */ 139089857Sobrien# define MATCH_MAY_ALLOCATE 139189857Sobrien 139289857Sobrien/* When using GNU C, we are not REALLY using the C alloca, no matter 139389857Sobrien what config.h may say. So don't take precautions for it. */ 139489857Sobrien# ifdef __GNUC__ 139589857Sobrien# undef C_ALLOCA 139689857Sobrien# endif 139789857Sobrien 139889857Sobrien/* The match routines may not allocate if (1) they would do it with malloc 139989857Sobrien and (2) it's not safe for them to use malloc. 140089857Sobrien Note that if REL_ALLOC is defined, matching would not use malloc for the 140189857Sobrien failure stack, but we would still use it for the register vectors; 140289857Sobrien so REL_ALLOC should not affect this. */ 140389857Sobrien# if (defined C_ALLOCA || defined REGEX_MALLOC) && defined emacs 140489857Sobrien# undef MATCH_MAY_ALLOCATE 140589857Sobrien# endif 140689857Sobrien#endif /* not DEFINED_ONCE */ 140789857Sobrien 140889857Sobrien#ifdef INSIDE_RECURSION 140989857Sobrien/* Failure stack declarations and macros; both re_compile_fastmap and 141089857Sobrien re_match_2 use a failure stack. These have to be macros because of 141189857Sobrien REGEX_ALLOCATE_STACK. */ 141289857Sobrien 141389857Sobrien 141489857Sobrien/* Number of failure points for which to initially allocate space 141589857Sobrien when matching. If this number is exceeded, we allocate more 141689857Sobrien space, so it is not a hard limit. */ 141789857Sobrien# ifndef INIT_FAILURE_ALLOC 141889857Sobrien# define INIT_FAILURE_ALLOC 5 141989857Sobrien# endif 142089857Sobrien 142189857Sobrien/* Roughly the maximum number of failure points on the stack. Would be 142289857Sobrien exactly that if always used MAX_FAILURE_ITEMS items each time we failed. 142389857Sobrien This is a variable only so users of regex can assign to it; we never 142489857Sobrien change it ourselves. */ 142589857Sobrien 142689857Sobrien# ifdef INT_IS_16BIT 142789857Sobrien 142889857Sobrien# ifndef DEFINED_ONCE 142989857Sobrien# if defined MATCH_MAY_ALLOCATE 143089857Sobrien/* 4400 was enough to cause a crash on Alpha OSF/1, 143189857Sobrien whose default stack limit is 2mb. */ 143289857Sobrienlong int re_max_failures = 4000; 143389857Sobrien# else 143489857Sobrienlong int re_max_failures = 2000; 143589857Sobrien# endif 143689857Sobrien# endif 143789857Sobrien 143889857Sobrienunion PREFIX(fail_stack_elt) 143989857Sobrien{ 144089857Sobrien UCHAR_T *pointer; 144189857Sobrien long int integer; 144289857Sobrien}; 144389857Sobrien 144489857Sobrientypedef union PREFIX(fail_stack_elt) PREFIX(fail_stack_elt_t); 144589857Sobrien 144689857Sobrientypedef struct 144789857Sobrien{ 144889857Sobrien PREFIX(fail_stack_elt_t) *stack; 144989857Sobrien unsigned long int size; 145089857Sobrien unsigned long int avail; /* Offset of next open position. */ 145189857Sobrien} PREFIX(fail_stack_type); 145289857Sobrien 145389857Sobrien# else /* not INT_IS_16BIT */ 145489857Sobrien 145589857Sobrien# ifndef DEFINED_ONCE 145689857Sobrien# if defined MATCH_MAY_ALLOCATE 145789857Sobrien/* 4400 was enough to cause a crash on Alpha OSF/1, 145889857Sobrien whose default stack limit is 2mb. */ 145989857Sobrienint re_max_failures = 4000; 146089857Sobrien# else 146189857Sobrienint re_max_failures = 2000; 146289857Sobrien# endif 146389857Sobrien# endif 146489857Sobrien 146589857Sobrienunion PREFIX(fail_stack_elt) 146689857Sobrien{ 146789857Sobrien UCHAR_T *pointer; 146889857Sobrien int integer; 146989857Sobrien}; 147089857Sobrien 147189857Sobrientypedef union PREFIX(fail_stack_elt) PREFIX(fail_stack_elt_t); 147289857Sobrien 147389857Sobrientypedef struct 147489857Sobrien{ 147589857Sobrien PREFIX(fail_stack_elt_t) *stack; 147689857Sobrien unsigned size; 147789857Sobrien unsigned avail; /* Offset of next open position. */ 147889857Sobrien} PREFIX(fail_stack_type); 147989857Sobrien 148089857Sobrien# endif /* INT_IS_16BIT */ 148189857Sobrien 148289857Sobrien# ifndef DEFINED_ONCE 148389857Sobrien# define FAIL_STACK_EMPTY() (fail_stack.avail == 0) 148489857Sobrien# define FAIL_STACK_PTR_EMPTY() (fail_stack_ptr->avail == 0) 148589857Sobrien# define FAIL_STACK_FULL() (fail_stack.avail == fail_stack.size) 148689857Sobrien# endif 148789857Sobrien 148889857Sobrien 148989857Sobrien/* Define macros to initialize and free the failure stack. 149089857Sobrien Do `return -2' if the alloc fails. */ 149189857Sobrien 149289857Sobrien# ifdef MATCH_MAY_ALLOCATE 149389857Sobrien# define INIT_FAIL_STACK() \ 149489857Sobrien do { \ 149589857Sobrien fail_stack.stack = (PREFIX(fail_stack_elt_t) *) \ 149689857Sobrien REGEX_ALLOCATE_STACK (INIT_FAILURE_ALLOC * sizeof (PREFIX(fail_stack_elt_t))); \ 149789857Sobrien \ 149889857Sobrien if (fail_stack.stack == NULL) \ 149989857Sobrien return -2; \ 150089857Sobrien \ 150189857Sobrien fail_stack.size = INIT_FAILURE_ALLOC; \ 150289857Sobrien fail_stack.avail = 0; \ 150389857Sobrien } while (0) 150489857Sobrien 150589857Sobrien# define RESET_FAIL_STACK() REGEX_FREE_STACK (fail_stack.stack) 150689857Sobrien# else 150789857Sobrien# define INIT_FAIL_STACK() \ 150889857Sobrien do { \ 150989857Sobrien fail_stack.avail = 0; \ 151089857Sobrien } while (0) 151189857Sobrien 151289857Sobrien# define RESET_FAIL_STACK() 151389857Sobrien# endif 151489857Sobrien 151589857Sobrien 151689857Sobrien/* Double the size of FAIL_STACK, up to approximately `re_max_failures' items. 151789857Sobrien 151889857Sobrien Return 1 if succeeds, and 0 if either ran out of memory 151989857Sobrien allocating space for it or it was already too large. 152089857Sobrien 152189857Sobrien REGEX_REALLOCATE_STACK requires `destination' be declared. */ 152289857Sobrien 152389857Sobrien# define DOUBLE_FAIL_STACK(fail_stack) \ 152489857Sobrien ((fail_stack).size > (unsigned) (re_max_failures * MAX_FAILURE_ITEMS) \ 152589857Sobrien ? 0 \ 152689857Sobrien : ((fail_stack).stack = (PREFIX(fail_stack_elt_t) *) \ 152789857Sobrien REGEX_REALLOCATE_STACK ((fail_stack).stack, \ 152889857Sobrien (fail_stack).size * sizeof (PREFIX(fail_stack_elt_t)), \ 152989857Sobrien ((fail_stack).size << 1) * sizeof (PREFIX(fail_stack_elt_t))),\ 153089857Sobrien \ 153189857Sobrien (fail_stack).stack == NULL \ 153289857Sobrien ? 0 \ 153389857Sobrien : ((fail_stack).size <<= 1, \ 153489857Sobrien 1))) 153589857Sobrien 153689857Sobrien 153789857Sobrien/* Push pointer POINTER on FAIL_STACK. 153889857Sobrien Return 1 if was able to do so and 0 if ran out of memory allocating 153989857Sobrien space to do so. */ 154089857Sobrien# define PUSH_PATTERN_OP(POINTER, FAIL_STACK) \ 154189857Sobrien ((FAIL_STACK_FULL () \ 154289857Sobrien && !DOUBLE_FAIL_STACK (FAIL_STACK)) \ 154389857Sobrien ? 0 \ 154489857Sobrien : ((FAIL_STACK).stack[(FAIL_STACK).avail++].pointer = POINTER, \ 154589857Sobrien 1)) 154689857Sobrien 154789857Sobrien/* Push a pointer value onto the failure stack. 154889857Sobrien Assumes the variable `fail_stack'. Probably should only 154989857Sobrien be called from within `PUSH_FAILURE_POINT'. */ 155089857Sobrien# define PUSH_FAILURE_POINTER(item) \ 155189857Sobrien fail_stack.stack[fail_stack.avail++].pointer = (UCHAR_T *) (item) 155289857Sobrien 155389857Sobrien/* This pushes an integer-valued item onto the failure stack. 155489857Sobrien Assumes the variable `fail_stack'. Probably should only 155589857Sobrien be called from within `PUSH_FAILURE_POINT'. */ 155689857Sobrien# define PUSH_FAILURE_INT(item) \ 155789857Sobrien fail_stack.stack[fail_stack.avail++].integer = (item) 155889857Sobrien 155989857Sobrien/* Push a fail_stack_elt_t value onto the failure stack. 156089857Sobrien Assumes the variable `fail_stack'. Probably should only 156189857Sobrien be called from within `PUSH_FAILURE_POINT'. */ 156289857Sobrien# define PUSH_FAILURE_ELT(item) \ 156389857Sobrien fail_stack.stack[fail_stack.avail++] = (item) 156489857Sobrien 156589857Sobrien/* These three POP... operations complement the three PUSH... operations. 156689857Sobrien All assume that `fail_stack' is nonempty. */ 156789857Sobrien# define POP_FAILURE_POINTER() fail_stack.stack[--fail_stack.avail].pointer 156889857Sobrien# define POP_FAILURE_INT() fail_stack.stack[--fail_stack.avail].integer 156989857Sobrien# define POP_FAILURE_ELT() fail_stack.stack[--fail_stack.avail] 157089857Sobrien 157189857Sobrien/* Used to omit pushing failure point id's when we're not debugging. */ 157289857Sobrien# ifdef DEBUG 157389857Sobrien# define DEBUG_PUSH PUSH_FAILURE_INT 157489857Sobrien# define DEBUG_POP(item_addr) *(item_addr) = POP_FAILURE_INT () 157589857Sobrien# else 157689857Sobrien# define DEBUG_PUSH(item) 157789857Sobrien# define DEBUG_POP(item_addr) 157889857Sobrien# endif 157989857Sobrien 158089857Sobrien 158189857Sobrien/* Push the information about the state we will need 158289857Sobrien if we ever fail back to it. 158389857Sobrien 158489857Sobrien Requires variables fail_stack, regstart, regend, reg_info, and 158589857Sobrien num_regs_pushed be declared. DOUBLE_FAIL_STACK requires `destination' 158689857Sobrien be declared. 158789857Sobrien 158889857Sobrien Does `return FAILURE_CODE' if runs out of memory. */ 158989857Sobrien 159089857Sobrien# define PUSH_FAILURE_POINT(pattern_place, string_place, failure_code) \ 159189857Sobrien do { \ 159289857Sobrien char *destination; \ 159389857Sobrien /* Must be int, so when we don't save any registers, the arithmetic \ 159489857Sobrien of 0 + -1 isn't done as unsigned. */ \ 159589857Sobrien /* Can't be int, since there is not a shred of a guarantee that int \ 159689857Sobrien is wide enough to hold a value of something to which pointer can \ 159789857Sobrien be assigned */ \ 159889857Sobrien active_reg_t this_reg; \ 159989857Sobrien \ 160089857Sobrien DEBUG_STATEMENT (failure_id++); \ 160189857Sobrien DEBUG_STATEMENT (nfailure_points_pushed++); \ 160289857Sobrien DEBUG_PRINT2 ("\nPUSH_FAILURE_POINT #%u:\n", failure_id); \ 160389857Sobrien DEBUG_PRINT2 (" Before push, next avail: %d\n", (fail_stack).avail);\ 160489857Sobrien DEBUG_PRINT2 (" size: %d\n", (fail_stack).size);\ 160589857Sobrien \ 160689857Sobrien DEBUG_PRINT2 (" slots needed: %ld\n", NUM_FAILURE_ITEMS); \ 160789857Sobrien DEBUG_PRINT2 (" available: %d\n", REMAINING_AVAIL_SLOTS); \ 160889857Sobrien \ 160989857Sobrien /* Ensure we have enough space allocated for what we will push. */ \ 161089857Sobrien while (REMAINING_AVAIL_SLOTS < NUM_FAILURE_ITEMS) \ 161189857Sobrien { \ 161289857Sobrien if (!DOUBLE_FAIL_STACK (fail_stack)) \ 161389857Sobrien return failure_code; \ 161489857Sobrien \ 161589857Sobrien DEBUG_PRINT2 ("\n Doubled stack; size now: %d\n", \ 161689857Sobrien (fail_stack).size); \ 161789857Sobrien DEBUG_PRINT2 (" slots available: %d\n", REMAINING_AVAIL_SLOTS);\ 161889857Sobrien } \ 161989857Sobrien \ 162089857Sobrien /* Push the info, starting with the registers. */ \ 162189857Sobrien DEBUG_PRINT1 ("\n"); \ 162289857Sobrien \ 162389857Sobrien if (1) \ 162489857Sobrien for (this_reg = lowest_active_reg; this_reg <= highest_active_reg; \ 162589857Sobrien this_reg++) \ 162689857Sobrien { \ 162789857Sobrien DEBUG_PRINT2 (" Pushing reg: %lu\n", this_reg); \ 162889857Sobrien DEBUG_STATEMENT (num_regs_pushed++); \ 162989857Sobrien \ 163089857Sobrien DEBUG_PRINT2 (" start: %p\n", regstart[this_reg]); \ 163189857Sobrien PUSH_FAILURE_POINTER (regstart[this_reg]); \ 163289857Sobrien \ 163389857Sobrien DEBUG_PRINT2 (" end: %p\n", regend[this_reg]); \ 163489857Sobrien PUSH_FAILURE_POINTER (regend[this_reg]); \ 163589857Sobrien \ 163689857Sobrien DEBUG_PRINT2 (" info: %p\n ", \ 163789857Sobrien reg_info[this_reg].word.pointer); \ 163889857Sobrien DEBUG_PRINT2 (" match_null=%d", \ 163989857Sobrien REG_MATCH_NULL_STRING_P (reg_info[this_reg])); \ 164089857Sobrien DEBUG_PRINT2 (" active=%d", IS_ACTIVE (reg_info[this_reg])); \ 164189857Sobrien DEBUG_PRINT2 (" matched_something=%d", \ 164289857Sobrien MATCHED_SOMETHING (reg_info[this_reg])); \ 164389857Sobrien DEBUG_PRINT2 (" ever_matched=%d", \ 164489857Sobrien EVER_MATCHED_SOMETHING (reg_info[this_reg])); \ 164589857Sobrien DEBUG_PRINT1 ("\n"); \ 164689857Sobrien PUSH_FAILURE_ELT (reg_info[this_reg].word); \ 164789857Sobrien } \ 164889857Sobrien \ 164989857Sobrien DEBUG_PRINT2 (" Pushing low active reg: %ld\n", lowest_active_reg);\ 165089857Sobrien PUSH_FAILURE_INT (lowest_active_reg); \ 165189857Sobrien \ 165289857Sobrien DEBUG_PRINT2 (" Pushing high active reg: %ld\n", highest_active_reg);\ 165389857Sobrien PUSH_FAILURE_INT (highest_active_reg); \ 165489857Sobrien \ 165589857Sobrien DEBUG_PRINT2 (" Pushing pattern %p:\n", pattern_place); \ 165689857Sobrien DEBUG_PRINT_COMPILED_PATTERN (bufp, pattern_place, pend); \ 165789857Sobrien PUSH_FAILURE_POINTER (pattern_place); \ 165889857Sobrien \ 165989857Sobrien DEBUG_PRINT2 (" Pushing string %p: `", string_place); \ 166089857Sobrien DEBUG_PRINT_DOUBLE_STRING (string_place, string1, size1, string2, \ 166189857Sobrien size2); \ 166289857Sobrien DEBUG_PRINT1 ("'\n"); \ 166389857Sobrien PUSH_FAILURE_POINTER (string_place); \ 166489857Sobrien \ 166589857Sobrien DEBUG_PRINT2 (" Pushing failure id: %u\n", failure_id); \ 166689857Sobrien DEBUG_PUSH (failure_id); \ 166789857Sobrien } while (0) 166889857Sobrien 166989857Sobrien# ifndef DEFINED_ONCE 167089857Sobrien/* This is the number of items that are pushed and popped on the stack 167189857Sobrien for each register. */ 167289857Sobrien# define NUM_REG_ITEMS 3 167389857Sobrien 167489857Sobrien/* Individual items aside from the registers. */ 167589857Sobrien# ifdef DEBUG 167689857Sobrien# define NUM_NONREG_ITEMS 5 /* Includes failure point id. */ 167789857Sobrien# else 167889857Sobrien# define NUM_NONREG_ITEMS 4 167989857Sobrien# endif 168089857Sobrien 168189857Sobrien/* We push at most this many items on the stack. */ 168289857Sobrien/* We used to use (num_regs - 1), which is the number of registers 168389857Sobrien this regexp will save; but that was changed to 5 168489857Sobrien to avoid stack overflow for a regexp with lots of parens. */ 168589857Sobrien# define MAX_FAILURE_ITEMS (5 * NUM_REG_ITEMS + NUM_NONREG_ITEMS) 168689857Sobrien 168789857Sobrien/* We actually push this many items. */ 168889857Sobrien# define NUM_FAILURE_ITEMS \ 168989857Sobrien (((0 \ 169089857Sobrien ? 0 : highest_active_reg - lowest_active_reg + 1) \ 169189857Sobrien * NUM_REG_ITEMS) \ 169289857Sobrien + NUM_NONREG_ITEMS) 169389857Sobrien 169489857Sobrien/* How many items can still be added to the stack without overflowing it. */ 169589857Sobrien# define REMAINING_AVAIL_SLOTS ((fail_stack).size - (fail_stack).avail) 169689857Sobrien# endif /* not DEFINED_ONCE */ 169789857Sobrien 169889857Sobrien 169989857Sobrien/* Pops what PUSH_FAIL_STACK pushes. 170089857Sobrien 170189857Sobrien We restore into the parameters, all of which should be lvalues: 170289857Sobrien STR -- the saved data position. 170389857Sobrien PAT -- the saved pattern position. 170489857Sobrien LOW_REG, HIGH_REG -- the highest and lowest active registers. 170589857Sobrien REGSTART, REGEND -- arrays of string positions. 170689857Sobrien REG_INFO -- array of information about each subexpression. 170789857Sobrien 170889857Sobrien Also assumes the variables `fail_stack' and (if debugging), `bufp', 170989857Sobrien `pend', `string1', `size1', `string2', and `size2'. */ 171089857Sobrien# define POP_FAILURE_POINT(str, pat, low_reg, high_reg, regstart, regend, reg_info)\ 171189857Sobrien{ \ 171289857Sobrien DEBUG_STATEMENT (unsigned failure_id;) \ 171389857Sobrien active_reg_t this_reg; \ 171489857Sobrien const UCHAR_T *string_temp; \ 171589857Sobrien \ 171689857Sobrien assert (!FAIL_STACK_EMPTY ()); \ 171789857Sobrien \ 171889857Sobrien /* Remove failure points and point to how many regs pushed. */ \ 171989857Sobrien DEBUG_PRINT1 ("POP_FAILURE_POINT:\n"); \ 172089857Sobrien DEBUG_PRINT2 (" Before pop, next avail: %d\n", fail_stack.avail); \ 172189857Sobrien DEBUG_PRINT2 (" size: %d\n", fail_stack.size); \ 172289857Sobrien \ 172389857Sobrien assert (fail_stack.avail >= NUM_NONREG_ITEMS); \ 172489857Sobrien \ 172589857Sobrien DEBUG_POP (&failure_id); \ 172689857Sobrien DEBUG_PRINT2 (" Popping failure id: %u\n", failure_id); \ 172789857Sobrien \ 172889857Sobrien /* If the saved string location is NULL, it came from an \ 172989857Sobrien on_failure_keep_string_jump opcode, and we want to throw away the \ 173089857Sobrien saved NULL, thus retaining our current position in the string. */ \ 173189857Sobrien string_temp = POP_FAILURE_POINTER (); \ 173289857Sobrien if (string_temp != NULL) \ 173389857Sobrien str = (const CHAR_T *) string_temp; \ 173489857Sobrien \ 173589857Sobrien DEBUG_PRINT2 (" Popping string %p: `", str); \ 173689857Sobrien DEBUG_PRINT_DOUBLE_STRING (str, string1, size1, string2, size2); \ 173789857Sobrien DEBUG_PRINT1 ("'\n"); \ 173889857Sobrien \ 173989857Sobrien pat = (UCHAR_T *) POP_FAILURE_POINTER (); \ 174089857Sobrien DEBUG_PRINT2 (" Popping pattern %p:\n", pat); \ 174189857Sobrien DEBUG_PRINT_COMPILED_PATTERN (bufp, pat, pend); \ 174289857Sobrien \ 174389857Sobrien /* Restore register info. */ \ 174489857Sobrien high_reg = (active_reg_t) POP_FAILURE_INT (); \ 174589857Sobrien DEBUG_PRINT2 (" Popping high active reg: %ld\n", high_reg); \ 174689857Sobrien \ 174789857Sobrien low_reg = (active_reg_t) POP_FAILURE_INT (); \ 174889857Sobrien DEBUG_PRINT2 (" Popping low active reg: %ld\n", low_reg); \ 174989857Sobrien \ 175089857Sobrien if (1) \ 175189857Sobrien for (this_reg = high_reg; this_reg >= low_reg; this_reg--) \ 175289857Sobrien { \ 175389857Sobrien DEBUG_PRINT2 (" Popping reg: %ld\n", this_reg); \ 175489857Sobrien \ 175589857Sobrien reg_info[this_reg].word = POP_FAILURE_ELT (); \ 175689857Sobrien DEBUG_PRINT2 (" info: %p\n", \ 175789857Sobrien reg_info[this_reg].word.pointer); \ 175889857Sobrien \ 175989857Sobrien regend[this_reg] = (const CHAR_T *) POP_FAILURE_POINTER (); \ 176089857Sobrien DEBUG_PRINT2 (" end: %p\n", regend[this_reg]); \ 176189857Sobrien \ 176289857Sobrien regstart[this_reg] = (const CHAR_T *) POP_FAILURE_POINTER (); \ 176389857Sobrien DEBUG_PRINT2 (" start: %p\n", regstart[this_reg]); \ 176489857Sobrien } \ 176589857Sobrien else \ 176689857Sobrien { \ 176789857Sobrien for (this_reg = highest_active_reg; this_reg > high_reg; this_reg--) \ 176889857Sobrien { \ 176989857Sobrien reg_info[this_reg].word.integer = 0; \ 177089857Sobrien regend[this_reg] = 0; \ 177189857Sobrien regstart[this_reg] = 0; \ 177289857Sobrien } \ 177389857Sobrien highest_active_reg = high_reg; \ 177489857Sobrien } \ 177589857Sobrien \ 177689857Sobrien set_regs_matched_done = 0; \ 177789857Sobrien DEBUG_STATEMENT (nfailure_points_popped++); \ 177889857Sobrien} /* POP_FAILURE_POINT */ 177989857Sobrien 178089857Sobrien/* Structure for per-register (a.k.a. per-group) information. 178189857Sobrien Other register information, such as the 178289857Sobrien starting and ending positions (which are addresses), and the list of 178389857Sobrien inner groups (which is a bits list) are maintained in separate 178489857Sobrien variables. 178589857Sobrien 178689857Sobrien We are making a (strictly speaking) nonportable assumption here: that 178789857Sobrien the compiler will pack our bit fields into something that fits into 178889857Sobrien the type of `word', i.e., is something that fits into one item on the 178989857Sobrien failure stack. */ 179089857Sobrien 179189857Sobrien 179289857Sobrien/* Declarations and macros for re_match_2. */ 179389857Sobrien 179489857Sobrientypedef union 179589857Sobrien{ 179689857Sobrien PREFIX(fail_stack_elt_t) word; 179789857Sobrien struct 179889857Sobrien { 179989857Sobrien /* This field is one if this group can match the empty string, 180089857Sobrien zero if not. If not yet determined, `MATCH_NULL_UNSET_VALUE'. */ 180189857Sobrien# define MATCH_NULL_UNSET_VALUE 3 180289857Sobrien unsigned match_null_string_p : 2; 180389857Sobrien unsigned is_active : 1; 180489857Sobrien unsigned matched_something : 1; 180589857Sobrien unsigned ever_matched_something : 1; 180689857Sobrien } bits; 180789857Sobrien} PREFIX(register_info_type); 180889857Sobrien 180989857Sobrien# ifndef DEFINED_ONCE 181089857Sobrien# define REG_MATCH_NULL_STRING_P(R) ((R).bits.match_null_string_p) 181189857Sobrien# define IS_ACTIVE(R) ((R).bits.is_active) 181289857Sobrien# define MATCHED_SOMETHING(R) ((R).bits.matched_something) 181389857Sobrien# define EVER_MATCHED_SOMETHING(R) ((R).bits.ever_matched_something) 181489857Sobrien 181589857Sobrien 181689857Sobrien/* Call this when have matched a real character; it sets `matched' flags 181789857Sobrien for the subexpressions which we are currently inside. Also records 181889857Sobrien that those subexprs have matched. */ 181989857Sobrien# define SET_REGS_MATCHED() \ 182089857Sobrien do \ 182189857Sobrien { \ 182289857Sobrien if (!set_regs_matched_done) \ 182389857Sobrien { \ 182489857Sobrien active_reg_t r; \ 182589857Sobrien set_regs_matched_done = 1; \ 182689857Sobrien for (r = lowest_active_reg; r <= highest_active_reg; r++) \ 182789857Sobrien { \ 182889857Sobrien MATCHED_SOMETHING (reg_info[r]) \ 182989857Sobrien = EVER_MATCHED_SOMETHING (reg_info[r]) \ 183089857Sobrien = 1; \ 183189857Sobrien } \ 183289857Sobrien } \ 183389857Sobrien } \ 183489857Sobrien while (0) 183589857Sobrien# endif /* not DEFINED_ONCE */ 183689857Sobrien 183789857Sobrien/* Registers are set to a sentinel when they haven't yet matched. */ 183889857Sobrienstatic CHAR_T PREFIX(reg_unset_dummy); 183989857Sobrien# define REG_UNSET_VALUE (&PREFIX(reg_unset_dummy)) 184089857Sobrien# define REG_UNSET(e) ((e) == REG_UNSET_VALUE) 184189857Sobrien 184289857Sobrien/* Subroutine declarations and macros for regex_compile. */ 1843218822Sdimstatic void PREFIX(store_op1) (re_opcode_t op, UCHAR_T *loc, int arg); 1844218822Sdimstatic void PREFIX(store_op2) (re_opcode_t op, UCHAR_T *loc, 1845218822Sdim int arg1, int arg2); 1846218822Sdimstatic void PREFIX(insert_op1) (re_opcode_t op, UCHAR_T *loc, 1847218822Sdim int arg, UCHAR_T *end); 1848218822Sdimstatic void PREFIX(insert_op2) (re_opcode_t op, UCHAR_T *loc, 1849218822Sdim int arg1, int arg2, UCHAR_T *end); 1850218822Sdimstatic boolean PREFIX(at_begline_loc_p) (const CHAR_T *pattern, 1851218822Sdim const CHAR_T *p, 1852218822Sdim reg_syntax_t syntax); 1853218822Sdimstatic boolean PREFIX(at_endline_loc_p) (const CHAR_T *p, 1854218822Sdim const CHAR_T *pend, 1855218822Sdim reg_syntax_t syntax); 185689857Sobrien# ifdef WCHAR 1857218822Sdimstatic reg_errcode_t wcs_compile_range (CHAR_T range_start, 1858218822Sdim const CHAR_T **p_ptr, 1859218822Sdim const CHAR_T *pend, 1860218822Sdim char *translate, 1861218822Sdim reg_syntax_t syntax, 1862218822Sdim UCHAR_T *b, 1863218822Sdim CHAR_T *char_set); 1864218822Sdimstatic void insert_space (int num, CHAR_T *loc, CHAR_T *end); 186589857Sobrien# else /* BYTE */ 1866218822Sdimstatic reg_errcode_t byte_compile_range (unsigned int range_start, 1867218822Sdim const char **p_ptr, 1868218822Sdim const char *pend, 1869218822Sdim char *translate, 1870218822Sdim reg_syntax_t syntax, 1871218822Sdim unsigned char *b); 187289857Sobrien# endif /* WCHAR */ 187389857Sobrien 187489857Sobrien/* Fetch the next character in the uncompiled pattern---translating it 187589857Sobrien if necessary. Also cast from a signed character in the constant 187689857Sobrien string passed to us by the user to an unsigned char that we can use 187789857Sobrien as an array index (in, e.g., `translate'). */ 187889857Sobrien/* ifdef MBS_SUPPORT, we translate only if character <= 0xff, 187989857Sobrien because it is impossible to allocate 4GB array for some encodings 188089857Sobrien which have 4 byte character_set like UCS4. */ 188189857Sobrien# ifndef PATFETCH 188289857Sobrien# ifdef WCHAR 188389857Sobrien# define PATFETCH(c) \ 188489857Sobrien do {if (p == pend) return REG_EEND; \ 188589857Sobrien c = (UCHAR_T) *p++; \ 188689857Sobrien if (translate && (c <= 0xff)) c = (UCHAR_T) translate[c]; \ 188789857Sobrien } while (0) 188889857Sobrien# else /* BYTE */ 188989857Sobrien# define PATFETCH(c) \ 189089857Sobrien do {if (p == pend) return REG_EEND; \ 189189857Sobrien c = (unsigned char) *p++; \ 189289857Sobrien if (translate) c = (unsigned char) translate[c]; \ 189389857Sobrien } while (0) 189489857Sobrien# endif /* WCHAR */ 189589857Sobrien# endif 189689857Sobrien 189789857Sobrien/* Fetch the next character in the uncompiled pattern, with no 189889857Sobrien translation. */ 189989857Sobrien# define PATFETCH_RAW(c) \ 190089857Sobrien do {if (p == pend) return REG_EEND; \ 190189857Sobrien c = (UCHAR_T) *p++; \ 190289857Sobrien } while (0) 190389857Sobrien 190489857Sobrien/* Go backwards one character in the pattern. */ 190589857Sobrien# define PATUNFETCH p-- 190689857Sobrien 190789857Sobrien 190889857Sobrien/* If `translate' is non-null, return translate[D], else just D. We 190989857Sobrien cast the subscript to translate because some data is declared as 191089857Sobrien `char *', to avoid warnings when a string constant is passed. But 191189857Sobrien when we use a character as a subscript we must make it unsigned. */ 191289857Sobrien/* ifdef MBS_SUPPORT, we translate only if character <= 0xff, 191389857Sobrien because it is impossible to allocate 4GB array for some encodings 191489857Sobrien which have 4 byte character_set like UCS4. */ 191589857Sobrien 191689857Sobrien# ifndef TRANSLATE 191789857Sobrien# ifdef WCHAR 191889857Sobrien# define TRANSLATE(d) \ 191989857Sobrien ((translate && ((UCHAR_T) (d)) <= 0xff) \ 192089857Sobrien ? (char) translate[(unsigned char) (d)] : (d)) 192189857Sobrien# else /* BYTE */ 192289857Sobrien# define TRANSLATE(d) \ 1923218822Sdim (translate ? (char) translate[(unsigned char) (d)] : (char) (d)) 192489857Sobrien# endif /* WCHAR */ 192589857Sobrien# endif 192689857Sobrien 192789857Sobrien 192889857Sobrien/* Macros for outputting the compiled pattern into `buffer'. */ 192989857Sobrien 193089857Sobrien/* If the buffer isn't allocated when it comes in, use this. */ 193189857Sobrien# define INIT_BUF_SIZE (32 * sizeof(UCHAR_T)) 193289857Sobrien 193389857Sobrien/* Make sure we have at least N more bytes of space in buffer. */ 193489857Sobrien# ifdef WCHAR 193589857Sobrien# define GET_BUFFER_SPACE(n) \ 193689857Sobrien while (((unsigned long)b - (unsigned long)COMPILED_BUFFER_VAR \ 193789857Sobrien + (n)*sizeof(CHAR_T)) > bufp->allocated) \ 193889857Sobrien EXTEND_BUFFER () 193989857Sobrien# else /* BYTE */ 194089857Sobrien# define GET_BUFFER_SPACE(n) \ 194189857Sobrien while ((unsigned long) (b - bufp->buffer + (n)) > bufp->allocated) \ 194289857Sobrien EXTEND_BUFFER () 194389857Sobrien# endif /* WCHAR */ 194489857Sobrien 194589857Sobrien/* Make sure we have one more byte of buffer space and then add C to it. */ 194689857Sobrien# define BUF_PUSH(c) \ 194789857Sobrien do { \ 194889857Sobrien GET_BUFFER_SPACE (1); \ 194989857Sobrien *b++ = (UCHAR_T) (c); \ 195089857Sobrien } while (0) 195189857Sobrien 195289857Sobrien 195389857Sobrien/* Ensure we have two more bytes of buffer space and then append C1 and C2. */ 195489857Sobrien# define BUF_PUSH_2(c1, c2) \ 195589857Sobrien do { \ 195689857Sobrien GET_BUFFER_SPACE (2); \ 195789857Sobrien *b++ = (UCHAR_T) (c1); \ 195889857Sobrien *b++ = (UCHAR_T) (c2); \ 195989857Sobrien } while (0) 196089857Sobrien 196189857Sobrien 196289857Sobrien/* As with BUF_PUSH_2, except for three bytes. */ 196389857Sobrien# define BUF_PUSH_3(c1, c2, c3) \ 196489857Sobrien do { \ 196589857Sobrien GET_BUFFER_SPACE (3); \ 196689857Sobrien *b++ = (UCHAR_T) (c1); \ 196789857Sobrien *b++ = (UCHAR_T) (c2); \ 196889857Sobrien *b++ = (UCHAR_T) (c3); \ 196989857Sobrien } while (0) 197089857Sobrien 197189857Sobrien/* Store a jump with opcode OP at LOC to location TO. We store a 197289857Sobrien relative address offset by the three bytes the jump itself occupies. */ 197389857Sobrien# define STORE_JUMP(op, loc, to) \ 197489857Sobrien PREFIX(store_op1) (op, loc, (int) ((to) - (loc) - (1 + OFFSET_ADDRESS_SIZE))) 197589857Sobrien 197689857Sobrien/* Likewise, for a two-argument jump. */ 197789857Sobrien# define STORE_JUMP2(op, loc, to, arg) \ 197889857Sobrien PREFIX(store_op2) (op, loc, (int) ((to) - (loc) - (1 + OFFSET_ADDRESS_SIZE)), arg) 197989857Sobrien 198089857Sobrien/* Like `STORE_JUMP', but for inserting. Assume `b' is the buffer end. */ 198189857Sobrien# define INSERT_JUMP(op, loc, to) \ 198289857Sobrien PREFIX(insert_op1) (op, loc, (int) ((to) - (loc) - (1 + OFFSET_ADDRESS_SIZE)), b) 198389857Sobrien 198489857Sobrien/* Like `STORE_JUMP2', but for inserting. Assume `b' is the buffer end. */ 198589857Sobrien# define INSERT_JUMP2(op, loc, to, arg) \ 198689857Sobrien PREFIX(insert_op2) (op, loc, (int) ((to) - (loc) - (1 + OFFSET_ADDRESS_SIZE)),\ 198789857Sobrien arg, b) 198889857Sobrien 198989857Sobrien/* This is not an arbitrary limit: the arguments which represent offsets 199089857Sobrien into the pattern are two bytes long. So if 2^16 bytes turns out to 199189857Sobrien be too small, many things would have to change. */ 199289857Sobrien/* Any other compiler which, like MSC, has allocation limit below 2^16 199389857Sobrien bytes will have to use approach similar to what was done below for 199489857Sobrien MSC and drop MAX_BUF_SIZE a bit. Otherwise you may end up 199589857Sobrien reallocating to 0 bytes. Such thing is not going to work too well. 199689857Sobrien You have been warned!! */ 199789857Sobrien# ifndef DEFINED_ONCE 199889857Sobrien# if defined _MSC_VER && !defined WIN32 199989857Sobrien/* Microsoft C 16-bit versions limit malloc to approx 65512 bytes. 200089857Sobrien The REALLOC define eliminates a flurry of conversion warnings, 200189857Sobrien but is not required. */ 200289857Sobrien# define MAX_BUF_SIZE 65500L 200389857Sobrien# define REALLOC(p,s) realloc ((p), (size_t) (s)) 200489857Sobrien# else 200589857Sobrien# define MAX_BUF_SIZE (1L << 16) 200689857Sobrien# define REALLOC(p,s) realloc ((p), (s)) 200789857Sobrien# endif 200889857Sobrien 200989857Sobrien/* Extend the buffer by twice its current size via realloc and 201089857Sobrien reset the pointers that pointed into the old block to point to the 201189857Sobrien correct places in the new one. If extending the buffer results in it 201289857Sobrien being larger than MAX_BUF_SIZE, then flag memory exhausted. */ 201389857Sobrien# if __BOUNDED_POINTERS__ 201489857Sobrien# define SET_HIGH_BOUND(P) (__ptrhigh (P) = __ptrlow (P) + bufp->allocated) 201589857Sobrien# define MOVE_BUFFER_POINTER(P) \ 201689857Sobrien (__ptrlow (P) += incr, SET_HIGH_BOUND (P), __ptrvalue (P) += incr) 201789857Sobrien# define ELSE_EXTEND_BUFFER_HIGH_BOUND \ 201889857Sobrien else \ 201989857Sobrien { \ 202089857Sobrien SET_HIGH_BOUND (b); \ 202189857Sobrien SET_HIGH_BOUND (begalt); \ 202289857Sobrien if (fixup_alt_jump) \ 202389857Sobrien SET_HIGH_BOUND (fixup_alt_jump); \ 202489857Sobrien if (laststart) \ 202589857Sobrien SET_HIGH_BOUND (laststart); \ 202689857Sobrien if (pending_exact) \ 202789857Sobrien SET_HIGH_BOUND (pending_exact); \ 202889857Sobrien } 202989857Sobrien# else 203089857Sobrien# define MOVE_BUFFER_POINTER(P) (P) += incr 203189857Sobrien# define ELSE_EXTEND_BUFFER_HIGH_BOUND 203289857Sobrien# endif 203389857Sobrien# endif /* not DEFINED_ONCE */ 203489857Sobrien 203589857Sobrien# ifdef WCHAR 203689857Sobrien# define EXTEND_BUFFER() \ 203789857Sobrien do { \ 203889857Sobrien UCHAR_T *old_buffer = COMPILED_BUFFER_VAR; \ 203989857Sobrien int wchar_count; \ 204089857Sobrien if (bufp->allocated + sizeof(UCHAR_T) > MAX_BUF_SIZE) \ 204189857Sobrien return REG_ESIZE; \ 204289857Sobrien bufp->allocated <<= 1; \ 204389857Sobrien if (bufp->allocated > MAX_BUF_SIZE) \ 204489857Sobrien bufp->allocated = MAX_BUF_SIZE; \ 204589857Sobrien /* How many characters the new buffer can have? */ \ 204689857Sobrien wchar_count = bufp->allocated / sizeof(UCHAR_T); \ 204789857Sobrien if (wchar_count == 0) wchar_count = 1; \ 204889857Sobrien /* Truncate the buffer to CHAR_T align. */ \ 204989857Sobrien bufp->allocated = wchar_count * sizeof(UCHAR_T); \ 205089857Sobrien RETALLOC (COMPILED_BUFFER_VAR, wchar_count, UCHAR_T); \ 205189857Sobrien bufp->buffer = (char*)COMPILED_BUFFER_VAR; \ 205289857Sobrien if (COMPILED_BUFFER_VAR == NULL) \ 205389857Sobrien return REG_ESPACE; \ 205489857Sobrien /* If the buffer moved, move all the pointers into it. */ \ 205589857Sobrien if (old_buffer != COMPILED_BUFFER_VAR) \ 205689857Sobrien { \ 205789857Sobrien int incr = COMPILED_BUFFER_VAR - old_buffer; \ 205889857Sobrien MOVE_BUFFER_POINTER (b); \ 205989857Sobrien MOVE_BUFFER_POINTER (begalt); \ 206089857Sobrien if (fixup_alt_jump) \ 206189857Sobrien MOVE_BUFFER_POINTER (fixup_alt_jump); \ 206289857Sobrien if (laststart) \ 206389857Sobrien MOVE_BUFFER_POINTER (laststart); \ 206489857Sobrien if (pending_exact) \ 206589857Sobrien MOVE_BUFFER_POINTER (pending_exact); \ 206689857Sobrien } \ 206789857Sobrien ELSE_EXTEND_BUFFER_HIGH_BOUND \ 206889857Sobrien } while (0) 206989857Sobrien# else /* BYTE */ 207089857Sobrien# define EXTEND_BUFFER() \ 207189857Sobrien do { \ 207289857Sobrien UCHAR_T *old_buffer = COMPILED_BUFFER_VAR; \ 207389857Sobrien if (bufp->allocated == MAX_BUF_SIZE) \ 207489857Sobrien return REG_ESIZE; \ 207589857Sobrien bufp->allocated <<= 1; \ 207689857Sobrien if (bufp->allocated > MAX_BUF_SIZE) \ 207789857Sobrien bufp->allocated = MAX_BUF_SIZE; \ 207889857Sobrien bufp->buffer = (UCHAR_T *) REALLOC (COMPILED_BUFFER_VAR, \ 207989857Sobrien bufp->allocated); \ 208089857Sobrien if (COMPILED_BUFFER_VAR == NULL) \ 208189857Sobrien return REG_ESPACE; \ 208289857Sobrien /* If the buffer moved, move all the pointers into it. */ \ 208389857Sobrien if (old_buffer != COMPILED_BUFFER_VAR) \ 208489857Sobrien { \ 208589857Sobrien int incr = COMPILED_BUFFER_VAR - old_buffer; \ 208689857Sobrien MOVE_BUFFER_POINTER (b); \ 208789857Sobrien MOVE_BUFFER_POINTER (begalt); \ 208889857Sobrien if (fixup_alt_jump) \ 208989857Sobrien MOVE_BUFFER_POINTER (fixup_alt_jump); \ 209089857Sobrien if (laststart) \ 209189857Sobrien MOVE_BUFFER_POINTER (laststart); \ 209289857Sobrien if (pending_exact) \ 209389857Sobrien MOVE_BUFFER_POINTER (pending_exact); \ 209489857Sobrien } \ 209589857Sobrien ELSE_EXTEND_BUFFER_HIGH_BOUND \ 209689857Sobrien } while (0) 209789857Sobrien# endif /* WCHAR */ 209889857Sobrien 209989857Sobrien# ifndef DEFINED_ONCE 210089857Sobrien/* Since we have one byte reserved for the register number argument to 210189857Sobrien {start,stop}_memory, the maximum number of groups we can report 210289857Sobrien things about is what fits in that byte. */ 210389857Sobrien# define MAX_REGNUM 255 210489857Sobrien 210589857Sobrien/* But patterns can have more than `MAX_REGNUM' registers. We just 210689857Sobrien ignore the excess. */ 210789857Sobrientypedef unsigned regnum_t; 210889857Sobrien 210989857Sobrien 211089857Sobrien/* Macros for the compile stack. */ 211189857Sobrien 211289857Sobrien/* Since offsets can go either forwards or backwards, this type needs to 211389857Sobrien be able to hold values from -(MAX_BUF_SIZE - 1) to MAX_BUF_SIZE - 1. */ 211489857Sobrien/* int may be not enough when sizeof(int) == 2. */ 211589857Sobrientypedef long pattern_offset_t; 211689857Sobrien 211789857Sobrientypedef struct 211889857Sobrien{ 211989857Sobrien pattern_offset_t begalt_offset; 212089857Sobrien pattern_offset_t fixup_alt_jump; 212189857Sobrien pattern_offset_t inner_group_offset; 212289857Sobrien pattern_offset_t laststart_offset; 212389857Sobrien regnum_t regnum; 212489857Sobrien} compile_stack_elt_t; 212589857Sobrien 212689857Sobrien 212789857Sobrientypedef struct 212889857Sobrien{ 212989857Sobrien compile_stack_elt_t *stack; 213089857Sobrien unsigned size; 213189857Sobrien unsigned avail; /* Offset of next open position. */ 213289857Sobrien} compile_stack_type; 213389857Sobrien 213489857Sobrien 213589857Sobrien# define INIT_COMPILE_STACK_SIZE 32 213689857Sobrien 213789857Sobrien# define COMPILE_STACK_EMPTY (compile_stack.avail == 0) 213889857Sobrien# define COMPILE_STACK_FULL (compile_stack.avail == compile_stack.size) 213989857Sobrien 214089857Sobrien/* The next available element. */ 214189857Sobrien# define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail]) 214289857Sobrien 214389857Sobrien# endif /* not DEFINED_ONCE */ 214489857Sobrien 214589857Sobrien/* Set the bit for character C in a list. */ 214689857Sobrien# ifndef DEFINED_ONCE 214789857Sobrien# define SET_LIST_BIT(c) \ 214889857Sobrien (b[((unsigned char) (c)) / BYTEWIDTH] \ 214989857Sobrien |= 1 << (((unsigned char) c) % BYTEWIDTH)) 215089857Sobrien# endif /* DEFINED_ONCE */ 215189857Sobrien 215289857Sobrien/* Get the next unsigned number in the uncompiled pattern. */ 215389857Sobrien# define GET_UNSIGNED_NUMBER(num) \ 215489857Sobrien { \ 215589857Sobrien while (p != pend) \ 215689857Sobrien { \ 215789857Sobrien PATFETCH (c); \ 215889857Sobrien if (c < '0' || c > '9') \ 215989857Sobrien break; \ 216089857Sobrien if (num <= RE_DUP_MAX) \ 216189857Sobrien { \ 216289857Sobrien if (num < 0) \ 216389857Sobrien num = 0; \ 216489857Sobrien num = num * 10 + c - '0'; \ 216589857Sobrien } \ 216689857Sobrien } \ 216789857Sobrien } 216889857Sobrien 216989857Sobrien# ifndef DEFINED_ONCE 217089857Sobrien# if defined _LIBC || WIDE_CHAR_SUPPORT 217189857Sobrien/* The GNU C library provides support for user-defined character classes 217289857Sobrien and the functions from ISO C amendement 1. */ 217389857Sobrien# ifdef CHARCLASS_NAME_MAX 217489857Sobrien# define CHAR_CLASS_MAX_LENGTH CHARCLASS_NAME_MAX 217589857Sobrien# else 217689857Sobrien/* This shouldn't happen but some implementation might still have this 217789857Sobrien problem. Use a reasonable default value. */ 217889857Sobrien# define CHAR_CLASS_MAX_LENGTH 256 217989857Sobrien# endif 218089857Sobrien 218189857Sobrien# ifdef _LIBC 218289857Sobrien# define IS_CHAR_CLASS(string) __wctype (string) 218389857Sobrien# else 218489857Sobrien# define IS_CHAR_CLASS(string) wctype (string) 218589857Sobrien# endif 218689857Sobrien# else 218789857Sobrien# define CHAR_CLASS_MAX_LENGTH 6 /* Namely, `xdigit'. */ 218889857Sobrien 218989857Sobrien# define IS_CHAR_CLASS(string) \ 219089857Sobrien (STREQ (string, "alpha") || STREQ (string, "upper") \ 219189857Sobrien || STREQ (string, "lower") || STREQ (string, "digit") \ 219289857Sobrien || STREQ (string, "alnum") || STREQ (string, "xdigit") \ 219389857Sobrien || STREQ (string, "space") || STREQ (string, "print") \ 219489857Sobrien || STREQ (string, "punct") || STREQ (string, "graph") \ 219589857Sobrien || STREQ (string, "cntrl") || STREQ (string, "blank")) 219689857Sobrien# endif 219789857Sobrien# endif /* DEFINED_ONCE */ 219889857Sobrien 219989857Sobrien# ifndef MATCH_MAY_ALLOCATE 220089857Sobrien 220189857Sobrien/* If we cannot allocate large objects within re_match_2_internal, 220289857Sobrien we make the fail stack and register vectors global. 220389857Sobrien The fail stack, we grow to the maximum size when a regexp 220489857Sobrien is compiled. 220589857Sobrien The register vectors, we adjust in size each time we 220689857Sobrien compile a regexp, according to the number of registers it needs. */ 220789857Sobrien 220889857Sobrienstatic PREFIX(fail_stack_type) fail_stack; 220989857Sobrien 221089857Sobrien/* Size with which the following vectors are currently allocated. 221189857Sobrien That is so we can make them bigger as needed, 221289857Sobrien but never make them smaller. */ 221389857Sobrien# ifdef DEFINED_ONCE 221489857Sobrienstatic int regs_allocated_size; 221589857Sobrien 221689857Sobrienstatic const char ** regstart, ** regend; 221789857Sobrienstatic const char ** old_regstart, ** old_regend; 221889857Sobrienstatic const char **best_regstart, **best_regend; 221989857Sobrienstatic const char **reg_dummy; 222089857Sobrien# endif /* DEFINED_ONCE */ 222189857Sobrien 222289857Sobrienstatic PREFIX(register_info_type) *PREFIX(reg_info); 222389857Sobrienstatic PREFIX(register_info_type) *PREFIX(reg_info_dummy); 222489857Sobrien 222589857Sobrien/* Make the register vectors big enough for NUM_REGS registers, 222689857Sobrien but don't make them smaller. */ 222789857Sobrien 222889857Sobrienstatic void 2229218822SdimPREFIX(regex_grow_registers) (int num_regs) 223089857Sobrien{ 223189857Sobrien if (num_regs > regs_allocated_size) 223289857Sobrien { 223389857Sobrien RETALLOC_IF (regstart, num_regs, const char *); 223489857Sobrien RETALLOC_IF (regend, num_regs, const char *); 223589857Sobrien RETALLOC_IF (old_regstart, num_regs, const char *); 223689857Sobrien RETALLOC_IF (old_regend, num_regs, const char *); 223789857Sobrien RETALLOC_IF (best_regstart, num_regs, const char *); 223889857Sobrien RETALLOC_IF (best_regend, num_regs, const char *); 223989857Sobrien RETALLOC_IF (PREFIX(reg_info), num_regs, PREFIX(register_info_type)); 224089857Sobrien RETALLOC_IF (reg_dummy, num_regs, const char *); 224189857Sobrien RETALLOC_IF (PREFIX(reg_info_dummy), num_regs, PREFIX(register_info_type)); 224289857Sobrien 224389857Sobrien regs_allocated_size = num_regs; 224489857Sobrien } 224589857Sobrien} 224689857Sobrien 224789857Sobrien# endif /* not MATCH_MAY_ALLOCATE */ 224889857Sobrien 224989857Sobrien# ifndef DEFINED_ONCE 2250218822Sdimstatic boolean group_in_compile_stack (compile_stack_type compile_stack, 2251218822Sdim regnum_t regnum); 225289857Sobrien# endif /* not DEFINED_ONCE */ 225389857Sobrien 225489857Sobrien/* `regex_compile' compiles PATTERN (of length SIZE) according to SYNTAX. 225589857Sobrien Returns one of error codes defined in `regex.h', or zero for success. 225689857Sobrien 225789857Sobrien Assumes the `allocated' (and perhaps `buffer') and `translate' 225889857Sobrien fields are set in BUFP on entry. 225989857Sobrien 226089857Sobrien If it succeeds, results are put in BUFP (if it returns an error, the 226189857Sobrien contents of BUFP are undefined): 226289857Sobrien `buffer' is the compiled pattern; 226389857Sobrien `syntax' is set to SYNTAX; 226489857Sobrien `used' is set to the length of the compiled pattern; 226589857Sobrien `fastmap_accurate' is zero; 226689857Sobrien `re_nsub' is the number of subexpressions in PATTERN; 226789857Sobrien `not_bol' and `not_eol' are zero; 226889857Sobrien 226989857Sobrien The `fastmap' and `newline_anchor' fields are neither 227089857Sobrien examined nor set. */ 227189857Sobrien 227289857Sobrien/* Return, freeing storage we allocated. */ 227389857Sobrien# ifdef WCHAR 227489857Sobrien# define FREE_STACK_RETURN(value) \ 227589857Sobrien return (free(pattern), free(mbs_offset), free(is_binary), free (compile_stack.stack), value) 227689857Sobrien# else 227789857Sobrien# define FREE_STACK_RETURN(value) \ 227889857Sobrien return (free (compile_stack.stack), value) 227989857Sobrien# endif /* WCHAR */ 228089857Sobrien 228189857Sobrienstatic reg_errcode_t 2282218822SdimPREFIX(regex_compile) (const char *ARG_PREFIX(pattern), 2283218822Sdim size_t ARG_PREFIX(size), reg_syntax_t syntax, 2284218822Sdim struct re_pattern_buffer *bufp) 228589857Sobrien{ 228689857Sobrien /* We fetch characters from PATTERN here. Even though PATTERN is 228789857Sobrien `char *' (i.e., signed), we declare these variables as unsigned, so 228889857Sobrien they can be reliably used as array indices. */ 228989857Sobrien register UCHAR_T c, c1; 229089857Sobrien 229189857Sobrien#ifdef WCHAR 229289857Sobrien /* A temporary space to keep wchar_t pattern and compiled pattern. */ 229389857Sobrien CHAR_T *pattern, *COMPILED_BUFFER_VAR; 229489857Sobrien size_t size; 229589857Sobrien /* offset buffer for optimization. See convert_mbs_to_wc. */ 229689857Sobrien int *mbs_offset = NULL; 229789857Sobrien /* It hold whether each wchar_t is binary data or not. */ 229889857Sobrien char *is_binary = NULL; 229989857Sobrien /* A flag whether exactn is handling binary data or not. */ 230089857Sobrien char is_exactn_bin = FALSE; 230189857Sobrien#endif /* WCHAR */ 230289857Sobrien 230389857Sobrien /* A random temporary spot in PATTERN. */ 230489857Sobrien const CHAR_T *p1; 230589857Sobrien 230689857Sobrien /* Points to the end of the buffer, where we should append. */ 230789857Sobrien register UCHAR_T *b; 230889857Sobrien 230989857Sobrien /* Keeps track of unclosed groups. */ 231089857Sobrien compile_stack_type compile_stack; 231189857Sobrien 231289857Sobrien /* Points to the current (ending) position in the pattern. */ 231389857Sobrien#ifdef WCHAR 231489857Sobrien const CHAR_T *p; 231589857Sobrien const CHAR_T *pend; 231689857Sobrien#else /* BYTE */ 231789857Sobrien const CHAR_T *p = pattern; 231889857Sobrien const CHAR_T *pend = pattern + size; 231989857Sobrien#endif /* WCHAR */ 232089857Sobrien 232189857Sobrien /* How to translate the characters in the pattern. */ 232289857Sobrien RE_TRANSLATE_TYPE translate = bufp->translate; 232389857Sobrien 232489857Sobrien /* Address of the count-byte of the most recently inserted `exactn' 232589857Sobrien command. This makes it possible to tell if a new exact-match 232689857Sobrien character can be added to that command or if the character requires 232789857Sobrien a new `exactn' command. */ 232889857Sobrien UCHAR_T *pending_exact = 0; 232989857Sobrien 233089857Sobrien /* Address of start of the most recently finished expression. 233189857Sobrien This tells, e.g., postfix * where to find the start of its 233289857Sobrien operand. Reset at the beginning of groups and alternatives. */ 233389857Sobrien UCHAR_T *laststart = 0; 233489857Sobrien 233589857Sobrien /* Address of beginning of regexp, or inside of last group. */ 233689857Sobrien UCHAR_T *begalt; 233789857Sobrien 233889857Sobrien /* Address of the place where a forward jump should go to the end of 233989857Sobrien the containing expression. Each alternative of an `or' -- except the 234089857Sobrien last -- ends with a forward jump of this sort. */ 234189857Sobrien UCHAR_T *fixup_alt_jump = 0; 234289857Sobrien 234389857Sobrien /* Counts open-groups as they are encountered. Remembered for the 234489857Sobrien matching close-group on the compile stack, so the same register 234589857Sobrien number is put in the stop_memory as the start_memory. */ 234689857Sobrien regnum_t regnum = 0; 234789857Sobrien 234889857Sobrien#ifdef WCHAR 234989857Sobrien /* Initialize the wchar_t PATTERN and offset_buffer. */ 235089857Sobrien p = pend = pattern = TALLOC(csize + 1, CHAR_T); 235189857Sobrien mbs_offset = TALLOC(csize + 1, int); 235289857Sobrien is_binary = TALLOC(csize + 1, char); 235389857Sobrien if (pattern == NULL || mbs_offset == NULL || is_binary == NULL) 235489857Sobrien { 235589857Sobrien free(pattern); 235689857Sobrien free(mbs_offset); 235789857Sobrien free(is_binary); 235889857Sobrien return REG_ESPACE; 235989857Sobrien } 236089857Sobrien pattern[csize] = L'\0'; /* sentinel */ 236189857Sobrien size = convert_mbs_to_wcs(pattern, cpattern, csize, mbs_offset, is_binary); 236289857Sobrien pend = p + size; 236389857Sobrien if (size < 0) 236489857Sobrien { 236589857Sobrien free(pattern); 236689857Sobrien free(mbs_offset); 236789857Sobrien free(is_binary); 236889857Sobrien return REG_BADPAT; 236989857Sobrien } 237089857Sobrien#endif 237189857Sobrien 237289857Sobrien#ifdef DEBUG 237389857Sobrien DEBUG_PRINT1 ("\nCompiling pattern: "); 237489857Sobrien if (debug) 237589857Sobrien { 237689857Sobrien unsigned debug_count; 237789857Sobrien 237889857Sobrien for (debug_count = 0; debug_count < size; debug_count++) 237989857Sobrien PUT_CHAR (pattern[debug_count]); 238089857Sobrien putchar ('\n'); 238189857Sobrien } 238289857Sobrien#endif /* DEBUG */ 238389857Sobrien 238489857Sobrien /* Initialize the compile stack. */ 238589857Sobrien compile_stack.stack = TALLOC (INIT_COMPILE_STACK_SIZE, compile_stack_elt_t); 238689857Sobrien if (compile_stack.stack == NULL) 238789857Sobrien { 238889857Sobrien#ifdef WCHAR 238989857Sobrien free(pattern); 239089857Sobrien free(mbs_offset); 239189857Sobrien free(is_binary); 239289857Sobrien#endif 239389857Sobrien return REG_ESPACE; 239489857Sobrien } 239589857Sobrien 239689857Sobrien compile_stack.size = INIT_COMPILE_STACK_SIZE; 239789857Sobrien compile_stack.avail = 0; 239889857Sobrien 239989857Sobrien /* Initialize the pattern buffer. */ 240089857Sobrien bufp->syntax = syntax; 240189857Sobrien bufp->fastmap_accurate = 0; 240289857Sobrien bufp->not_bol = bufp->not_eol = 0; 240389857Sobrien 240489857Sobrien /* Set `used' to zero, so that if we return an error, the pattern 240589857Sobrien printer (for debugging) will think there's no pattern. We reset it 240689857Sobrien at the end. */ 240789857Sobrien bufp->used = 0; 240889857Sobrien 240989857Sobrien /* Always count groups, whether or not bufp->no_sub is set. */ 241089857Sobrien bufp->re_nsub = 0; 241189857Sobrien 241289857Sobrien#if !defined emacs && !defined SYNTAX_TABLE 241389857Sobrien /* Initialize the syntax table. */ 241489857Sobrien init_syntax_once (); 241589857Sobrien#endif 241689857Sobrien 241789857Sobrien if (bufp->allocated == 0) 241889857Sobrien { 241989857Sobrien if (bufp->buffer) 242089857Sobrien { /* If zero allocated, but buffer is non-null, try to realloc 242189857Sobrien enough space. This loses if buffer's address is bogus, but 242289857Sobrien that is the user's responsibility. */ 242389857Sobrien#ifdef WCHAR 242489857Sobrien /* Free bufp->buffer and allocate an array for wchar_t pattern 242589857Sobrien buffer. */ 242689857Sobrien free(bufp->buffer); 242789857Sobrien COMPILED_BUFFER_VAR = TALLOC (INIT_BUF_SIZE/sizeof(UCHAR_T), 242889857Sobrien UCHAR_T); 242989857Sobrien#else 243089857Sobrien RETALLOC (COMPILED_BUFFER_VAR, INIT_BUF_SIZE, UCHAR_T); 243189857Sobrien#endif /* WCHAR */ 243289857Sobrien } 243389857Sobrien else 243489857Sobrien { /* Caller did not allocate a buffer. Do it for them. */ 243589857Sobrien COMPILED_BUFFER_VAR = TALLOC (INIT_BUF_SIZE / sizeof(UCHAR_T), 243689857Sobrien UCHAR_T); 243789857Sobrien } 243889857Sobrien 243989857Sobrien if (!COMPILED_BUFFER_VAR) FREE_STACK_RETURN (REG_ESPACE); 244089857Sobrien#ifdef WCHAR 244189857Sobrien bufp->buffer = (char*)COMPILED_BUFFER_VAR; 244289857Sobrien#endif /* WCHAR */ 244389857Sobrien bufp->allocated = INIT_BUF_SIZE; 244489857Sobrien } 244589857Sobrien#ifdef WCHAR 244689857Sobrien else 244789857Sobrien COMPILED_BUFFER_VAR = (UCHAR_T*) bufp->buffer; 244889857Sobrien#endif 244989857Sobrien 245089857Sobrien begalt = b = COMPILED_BUFFER_VAR; 245189857Sobrien 245289857Sobrien /* Loop through the uncompiled pattern until we're at the end. */ 245389857Sobrien while (p != pend) 245489857Sobrien { 245589857Sobrien PATFETCH (c); 245689857Sobrien 245789857Sobrien switch (c) 245889857Sobrien { 245989857Sobrien case '^': 246089857Sobrien { 246189857Sobrien if ( /* If at start of pattern, it's an operator. */ 246289857Sobrien p == pattern + 1 246389857Sobrien /* If context independent, it's an operator. */ 246489857Sobrien || syntax & RE_CONTEXT_INDEP_ANCHORS 246589857Sobrien /* Otherwise, depends on what's come before. */ 246689857Sobrien || PREFIX(at_begline_loc_p) (pattern, p, syntax)) 246789857Sobrien BUF_PUSH (begline); 246889857Sobrien else 246989857Sobrien goto normal_char; 247089857Sobrien } 247189857Sobrien break; 247289857Sobrien 247389857Sobrien 247489857Sobrien case '$': 247589857Sobrien { 247689857Sobrien if ( /* If at end of pattern, it's an operator. */ 247789857Sobrien p == pend 247889857Sobrien /* If context independent, it's an operator. */ 247989857Sobrien || syntax & RE_CONTEXT_INDEP_ANCHORS 248089857Sobrien /* Otherwise, depends on what's next. */ 248189857Sobrien || PREFIX(at_endline_loc_p) (p, pend, syntax)) 248289857Sobrien BUF_PUSH (endline); 248389857Sobrien else 248489857Sobrien goto normal_char; 248589857Sobrien } 248689857Sobrien break; 248789857Sobrien 248889857Sobrien 248989857Sobrien case '+': 249089857Sobrien case '?': 249189857Sobrien if ((syntax & RE_BK_PLUS_QM) 249289857Sobrien || (syntax & RE_LIMITED_OPS)) 249389857Sobrien goto normal_char; 249489857Sobrien handle_plus: 249589857Sobrien case '*': 249689857Sobrien /* If there is no previous pattern... */ 249789857Sobrien if (!laststart) 249889857Sobrien { 249989857Sobrien if (syntax & RE_CONTEXT_INVALID_OPS) 250089857Sobrien FREE_STACK_RETURN (REG_BADRPT); 250189857Sobrien else if (!(syntax & RE_CONTEXT_INDEP_OPS)) 250289857Sobrien goto normal_char; 250389857Sobrien } 250489857Sobrien 250589857Sobrien { 250689857Sobrien /* Are we optimizing this jump? */ 250789857Sobrien boolean keep_string_p = false; 250889857Sobrien 250989857Sobrien /* 1 means zero (many) matches is allowed. */ 251089857Sobrien char zero_times_ok = 0, many_times_ok = 0; 251189857Sobrien 251289857Sobrien /* If there is a sequence of repetition chars, collapse it 251389857Sobrien down to just one (the right one). We can't combine 251489857Sobrien interval operators with these because of, e.g., `a{2}*', 251589857Sobrien which should only match an even number of `a's. */ 251689857Sobrien 251789857Sobrien for (;;) 251889857Sobrien { 251989857Sobrien zero_times_ok |= c != '+'; 252089857Sobrien many_times_ok |= c != '?'; 252189857Sobrien 252289857Sobrien if (p == pend) 252389857Sobrien break; 252489857Sobrien 252589857Sobrien PATFETCH (c); 252689857Sobrien 252789857Sobrien if (c == '*' 252889857Sobrien || (!(syntax & RE_BK_PLUS_QM) && (c == '+' || c == '?'))) 252989857Sobrien ; 253089857Sobrien 253189857Sobrien else if (syntax & RE_BK_PLUS_QM && c == '\\') 253289857Sobrien { 253389857Sobrien if (p == pend) FREE_STACK_RETURN (REG_EESCAPE); 253489857Sobrien 253589857Sobrien PATFETCH (c1); 253689857Sobrien if (!(c1 == '+' || c1 == '?')) 253789857Sobrien { 253889857Sobrien PATUNFETCH; 253989857Sobrien PATUNFETCH; 254089857Sobrien break; 254189857Sobrien } 254289857Sobrien 254389857Sobrien c = c1; 254489857Sobrien } 254589857Sobrien else 254689857Sobrien { 254789857Sobrien PATUNFETCH; 254889857Sobrien break; 254989857Sobrien } 255089857Sobrien 255189857Sobrien /* If we get here, we found another repeat character. */ 255289857Sobrien } 255389857Sobrien 255489857Sobrien /* Star, etc. applied to an empty pattern is equivalent 255589857Sobrien to an empty pattern. */ 255689857Sobrien if (!laststart) 255789857Sobrien break; 255889857Sobrien 255989857Sobrien /* Now we know whether or not zero matches is allowed 256089857Sobrien and also whether or not two or more matches is allowed. */ 256189857Sobrien if (many_times_ok) 256289857Sobrien { /* More than one repetition is allowed, so put in at the 256389857Sobrien end a backward relative jump from `b' to before the next 256489857Sobrien jump we're going to put in below (which jumps from 256589857Sobrien laststart to after this jump). 256689857Sobrien 256789857Sobrien But if we are at the `*' in the exact sequence `.*\n', 256889857Sobrien insert an unconditional jump backwards to the ., 256989857Sobrien instead of the beginning of the loop. This way we only 257089857Sobrien push a failure point once, instead of every time 257189857Sobrien through the loop. */ 257289857Sobrien assert (p - 1 > pattern); 257389857Sobrien 257489857Sobrien /* Allocate the space for the jump. */ 257589857Sobrien GET_BUFFER_SPACE (1 + OFFSET_ADDRESS_SIZE); 257689857Sobrien 257789857Sobrien /* We know we are not at the first character of the pattern, 257889857Sobrien because laststart was nonzero. And we've already 257989857Sobrien incremented `p', by the way, to be the character after 258089857Sobrien the `*'. Do we have to do something analogous here 258189857Sobrien for null bytes, because of RE_DOT_NOT_NULL? */ 258289857Sobrien if (TRANSLATE (*(p - 2)) == TRANSLATE ('.') 258389857Sobrien && zero_times_ok 258489857Sobrien && p < pend && TRANSLATE (*p) == TRANSLATE ('\n') 258589857Sobrien && !(syntax & RE_DOT_NEWLINE)) 258689857Sobrien { /* We have .*\n. */ 258789857Sobrien STORE_JUMP (jump, b, laststart); 258889857Sobrien keep_string_p = true; 258989857Sobrien } 259089857Sobrien else 259189857Sobrien /* Anything else. */ 259289857Sobrien STORE_JUMP (maybe_pop_jump, b, laststart - 259389857Sobrien (1 + OFFSET_ADDRESS_SIZE)); 259489857Sobrien 259589857Sobrien /* We've added more stuff to the buffer. */ 259689857Sobrien b += 1 + OFFSET_ADDRESS_SIZE; 259789857Sobrien } 259889857Sobrien 259989857Sobrien /* On failure, jump from laststart to b + 3, which will be the 260089857Sobrien end of the buffer after this jump is inserted. */ 260189857Sobrien /* ifdef WCHAR, 'b + 1 + OFFSET_ADDRESS_SIZE' instead of 260289857Sobrien 'b + 3'. */ 260389857Sobrien GET_BUFFER_SPACE (1 + OFFSET_ADDRESS_SIZE); 260489857Sobrien INSERT_JUMP (keep_string_p ? on_failure_keep_string_jump 260589857Sobrien : on_failure_jump, 260689857Sobrien laststart, b + 1 + OFFSET_ADDRESS_SIZE); 260789857Sobrien pending_exact = 0; 260889857Sobrien b += 1 + OFFSET_ADDRESS_SIZE; 260989857Sobrien 261089857Sobrien if (!zero_times_ok) 261189857Sobrien { 261289857Sobrien /* At least one repetition is required, so insert a 261389857Sobrien `dummy_failure_jump' before the initial 261489857Sobrien `on_failure_jump' instruction of the loop. This 261589857Sobrien effects a skip over that instruction the first time 261689857Sobrien we hit that loop. */ 261789857Sobrien GET_BUFFER_SPACE (1 + OFFSET_ADDRESS_SIZE); 261889857Sobrien INSERT_JUMP (dummy_failure_jump, laststart, laststart + 261989857Sobrien 2 + 2 * OFFSET_ADDRESS_SIZE); 262089857Sobrien b += 1 + OFFSET_ADDRESS_SIZE; 262189857Sobrien } 262289857Sobrien } 262389857Sobrien break; 262489857Sobrien 262589857Sobrien 262689857Sobrien case '.': 262789857Sobrien laststart = b; 262889857Sobrien BUF_PUSH (anychar); 262989857Sobrien break; 263089857Sobrien 263189857Sobrien 263289857Sobrien case '[': 263389857Sobrien { 263489857Sobrien boolean had_char_class = false; 263589857Sobrien#ifdef WCHAR 263689857Sobrien CHAR_T range_start = 0xffffffff; 263789857Sobrien#else 263889857Sobrien unsigned int range_start = 0xffffffff; 263989857Sobrien#endif 264089857Sobrien if (p == pend) FREE_STACK_RETURN (REG_EBRACK); 264189857Sobrien 264289857Sobrien#ifdef WCHAR 264389857Sobrien /* We assume a charset(_not) structure as a wchar_t array. 264489857Sobrien charset[0] = (re_opcode_t) charset(_not) 264589857Sobrien charset[1] = l (= length of char_classes) 264689857Sobrien charset[2] = m (= length of collating_symbols) 264789857Sobrien charset[3] = n (= length of equivalence_classes) 264889857Sobrien charset[4] = o (= length of char_ranges) 264989857Sobrien charset[5] = p (= length of chars) 265089857Sobrien 265189857Sobrien charset[6] = char_class (wctype_t) 265289857Sobrien charset[6+CHAR_CLASS_SIZE] = char_class (wctype_t) 265389857Sobrien ... 265489857Sobrien charset[l+5] = char_class (wctype_t) 265589857Sobrien 265689857Sobrien charset[l+6] = collating_symbol (wchar_t) 265789857Sobrien ... 265889857Sobrien charset[l+m+5] = collating_symbol (wchar_t) 265989857Sobrien ifdef _LIBC we use the index if 266089857Sobrien _NL_COLLATE_SYMB_EXTRAMB instead of 266189857Sobrien wchar_t string. 266289857Sobrien 266389857Sobrien charset[l+m+6] = equivalence_classes (wchar_t) 266489857Sobrien ... 266589857Sobrien charset[l+m+n+5] = equivalence_classes (wchar_t) 266689857Sobrien ifdef _LIBC we use the index in 266789857Sobrien _NL_COLLATE_WEIGHT instead of 266889857Sobrien wchar_t string. 266989857Sobrien 267089857Sobrien charset[l+m+n+6] = range_start 267189857Sobrien charset[l+m+n+7] = range_end 267289857Sobrien ... 267389857Sobrien charset[l+m+n+2o+4] = range_start 267489857Sobrien charset[l+m+n+2o+5] = range_end 267589857Sobrien ifdef _LIBC we use the value looked up 267689857Sobrien in _NL_COLLATE_COLLSEQ instead of 267789857Sobrien wchar_t character. 267889857Sobrien 267989857Sobrien charset[l+m+n+2o+6] = char 268089857Sobrien ... 268189857Sobrien charset[l+m+n+2o+p+5] = char 268289857Sobrien 268389857Sobrien */ 268489857Sobrien 268589857Sobrien /* We need at least 6 spaces: the opcode, the length of 268689857Sobrien char_classes, the length of collating_symbols, the length of 268789857Sobrien equivalence_classes, the length of char_ranges, the length of 268889857Sobrien chars. */ 268989857Sobrien GET_BUFFER_SPACE (6); 269089857Sobrien 269189857Sobrien /* Save b as laststart. And We use laststart as the pointer 269289857Sobrien to the first element of the charset here. 269389857Sobrien In other words, laststart[i] indicates charset[i]. */ 269489857Sobrien laststart = b; 269589857Sobrien 269689857Sobrien /* We test `*p == '^' twice, instead of using an if 269789857Sobrien statement, so we only need one BUF_PUSH. */ 269889857Sobrien BUF_PUSH (*p == '^' ? charset_not : charset); 269989857Sobrien if (*p == '^') 270089857Sobrien p++; 270189857Sobrien 270289857Sobrien /* Push the length of char_classes, the length of 270389857Sobrien collating_symbols, the length of equivalence_classes, the 270489857Sobrien length of char_ranges and the length of chars. */ 270589857Sobrien BUF_PUSH_3 (0, 0, 0); 270689857Sobrien BUF_PUSH_2 (0, 0); 270789857Sobrien 270889857Sobrien /* Remember the first position in the bracket expression. */ 270989857Sobrien p1 = p; 271089857Sobrien 271189857Sobrien /* charset_not matches newline according to a syntax bit. */ 271289857Sobrien if ((re_opcode_t) b[-6] == charset_not 271389857Sobrien && (syntax & RE_HAT_LISTS_NOT_NEWLINE)) 271489857Sobrien { 271589857Sobrien BUF_PUSH('\n'); 271689857Sobrien laststart[5]++; /* Update the length of characters */ 271789857Sobrien } 271889857Sobrien 271989857Sobrien /* Read in characters and ranges, setting map bits. */ 272089857Sobrien for (;;) 272189857Sobrien { 272289857Sobrien if (p == pend) FREE_STACK_RETURN (REG_EBRACK); 272389857Sobrien 272489857Sobrien PATFETCH (c); 272589857Sobrien 272689857Sobrien /* \ might escape characters inside [...] and [^...]. */ 272789857Sobrien if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\') 272889857Sobrien { 272989857Sobrien if (p == pend) FREE_STACK_RETURN (REG_EESCAPE); 273089857Sobrien 273189857Sobrien PATFETCH (c1); 273289857Sobrien BUF_PUSH(c1); 273389857Sobrien laststart[5]++; /* Update the length of chars */ 273489857Sobrien range_start = c1; 273589857Sobrien continue; 273689857Sobrien } 273789857Sobrien 273889857Sobrien /* Could be the end of the bracket expression. If it's 273989857Sobrien not (i.e., when the bracket expression is `[]' so 274089857Sobrien far), the ']' character bit gets set way below. */ 274189857Sobrien if (c == ']' && p != p1 + 1) 274289857Sobrien break; 274389857Sobrien 274489857Sobrien /* Look ahead to see if it's a range when the last thing 274589857Sobrien was a character class. */ 274689857Sobrien if (had_char_class && c == '-' && *p != ']') 274789857Sobrien FREE_STACK_RETURN (REG_ERANGE); 274889857Sobrien 274989857Sobrien /* Look ahead to see if it's a range when the last thing 275089857Sobrien was a character: if this is a hyphen not at the 275189857Sobrien beginning or the end of a list, then it's the range 275289857Sobrien operator. */ 275389857Sobrien if (c == '-' 275489857Sobrien && !(p - 2 >= pattern && p[-2] == '[') 275589857Sobrien && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^') 275689857Sobrien && *p != ']') 275789857Sobrien { 275889857Sobrien reg_errcode_t ret; 275989857Sobrien /* Allocate the space for range_start and range_end. */ 276089857Sobrien GET_BUFFER_SPACE (2); 276189857Sobrien /* Update the pointer to indicate end of buffer. */ 276289857Sobrien b += 2; 276389857Sobrien ret = wcs_compile_range (range_start, &p, pend, translate, 276489857Sobrien syntax, b, laststart); 276589857Sobrien if (ret != REG_NOERROR) FREE_STACK_RETURN (ret); 276689857Sobrien range_start = 0xffffffff; 276789857Sobrien } 276889857Sobrien else if (p[0] == '-' && p[1] != ']') 276989857Sobrien { /* This handles ranges made up of characters only. */ 277089857Sobrien reg_errcode_t ret; 277189857Sobrien 277289857Sobrien /* Move past the `-'. */ 277389857Sobrien PATFETCH (c1); 277489857Sobrien /* Allocate the space for range_start and range_end. */ 277589857Sobrien GET_BUFFER_SPACE (2); 277689857Sobrien /* Update the pointer to indicate end of buffer. */ 277789857Sobrien b += 2; 277889857Sobrien ret = wcs_compile_range (c, &p, pend, translate, syntax, b, 277989857Sobrien laststart); 278089857Sobrien if (ret != REG_NOERROR) FREE_STACK_RETURN (ret); 278189857Sobrien range_start = 0xffffffff; 278289857Sobrien } 278389857Sobrien 278489857Sobrien /* See if we're at the beginning of a possible character 278589857Sobrien class. */ 278689857Sobrien else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':') 278789857Sobrien { /* Leave room for the null. */ 278889857Sobrien char str[CHAR_CLASS_MAX_LENGTH + 1]; 278989857Sobrien 279089857Sobrien PATFETCH (c); 279189857Sobrien c1 = 0; 279289857Sobrien 279389857Sobrien /* If pattern is `[[:'. */ 279489857Sobrien if (p == pend) FREE_STACK_RETURN (REG_EBRACK); 279589857Sobrien 279689857Sobrien for (;;) 279789857Sobrien { 279889857Sobrien PATFETCH (c); 279989857Sobrien if ((c == ':' && *p == ']') || p == pend) 280089857Sobrien break; 280189857Sobrien if (c1 < CHAR_CLASS_MAX_LENGTH) 280289857Sobrien str[c1++] = c; 280389857Sobrien else 280489857Sobrien /* This is in any case an invalid class name. */ 280589857Sobrien str[0] = '\0'; 280689857Sobrien } 280789857Sobrien str[c1] = '\0'; 280889857Sobrien 280989857Sobrien /* If isn't a word bracketed by `[:' and `:]': 281089857Sobrien undo the ending character, the letters, and leave 281189857Sobrien the leading `:' and `[' (but store them as character). */ 281289857Sobrien if (c == ':' && *p == ']') 281389857Sobrien { 281489857Sobrien wctype_t wt; 281589857Sobrien uintptr_t alignedp; 281689857Sobrien 281789857Sobrien /* Query the character class as wctype_t. */ 281889857Sobrien wt = IS_CHAR_CLASS (str); 281989857Sobrien if (wt == 0) 282089857Sobrien FREE_STACK_RETURN (REG_ECTYPE); 282189857Sobrien 282289857Sobrien /* Throw away the ] at the end of the character 282389857Sobrien class. */ 282489857Sobrien PATFETCH (c); 282589857Sobrien 282689857Sobrien if (p == pend) FREE_STACK_RETURN (REG_EBRACK); 282789857Sobrien 282889857Sobrien /* Allocate the space for character class. */ 282989857Sobrien GET_BUFFER_SPACE(CHAR_CLASS_SIZE); 283089857Sobrien /* Update the pointer to indicate end of buffer. */ 283189857Sobrien b += CHAR_CLASS_SIZE; 283289857Sobrien /* Move data which follow character classes 283389857Sobrien not to violate the data. */ 283489857Sobrien insert_space(CHAR_CLASS_SIZE, 283589857Sobrien laststart + 6 + laststart[1], 283689857Sobrien b - 1); 283789857Sobrien alignedp = ((uintptr_t)(laststart + 6 + laststart[1]) 283889857Sobrien + __alignof__(wctype_t) - 1) 283989857Sobrien & ~(uintptr_t)(__alignof__(wctype_t) - 1); 284089857Sobrien /* Store the character class. */ 284189857Sobrien *((wctype_t*)alignedp) = wt; 284289857Sobrien /* Update length of char_classes */ 284389857Sobrien laststart[1] += CHAR_CLASS_SIZE; 284489857Sobrien 284589857Sobrien had_char_class = true; 284689857Sobrien } 284789857Sobrien else 284889857Sobrien { 284989857Sobrien c1++; 285089857Sobrien while (c1--) 285189857Sobrien PATUNFETCH; 285289857Sobrien BUF_PUSH ('['); 285389857Sobrien BUF_PUSH (':'); 285489857Sobrien laststart[5] += 2; /* Update the length of characters */ 285589857Sobrien range_start = ':'; 285689857Sobrien had_char_class = false; 285789857Sobrien } 285889857Sobrien } 285989857Sobrien else if (syntax & RE_CHAR_CLASSES && c == '[' && (*p == '=' 286089857Sobrien || *p == '.')) 286189857Sobrien { 286289857Sobrien CHAR_T str[128]; /* Should be large enough. */ 286389857Sobrien CHAR_T delim = *p; /* '=' or '.' */ 286489857Sobrien# ifdef _LIBC 286589857Sobrien uint32_t nrules = 286689857Sobrien _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES); 286789857Sobrien# endif 286889857Sobrien PATFETCH (c); 286989857Sobrien c1 = 0; 287089857Sobrien 287189857Sobrien /* If pattern is `[[=' or '[[.'. */ 287289857Sobrien if (p == pend) FREE_STACK_RETURN (REG_EBRACK); 287389857Sobrien 287489857Sobrien for (;;) 287589857Sobrien { 287689857Sobrien PATFETCH (c); 287789857Sobrien if ((c == delim && *p == ']') || p == pend) 287889857Sobrien break; 287989857Sobrien if (c1 < sizeof (str) - 1) 288089857Sobrien str[c1++] = c; 288189857Sobrien else 288289857Sobrien /* This is in any case an invalid class name. */ 288389857Sobrien str[0] = '\0'; 288489857Sobrien } 288589857Sobrien str[c1] = '\0'; 288689857Sobrien 288789857Sobrien if (c == delim && *p == ']' && str[0] != '\0') 288889857Sobrien { 288989857Sobrien unsigned int i, offset; 289089857Sobrien /* If we have no collation data we use the default 289189857Sobrien collation in which each character is in a class 289289857Sobrien by itself. It also means that ASCII is the 289389857Sobrien character set and therefore we cannot have character 289489857Sobrien with more than one byte in the multibyte 289589857Sobrien representation. */ 289689857Sobrien 289789857Sobrien /* If not defined _LIBC, we push the name and 289889857Sobrien `\0' for the sake of matching performance. */ 289989857Sobrien int datasize = c1 + 1; 290089857Sobrien 290189857Sobrien# ifdef _LIBC 290289857Sobrien int32_t idx = 0; 290389857Sobrien if (nrules == 0) 290489857Sobrien# endif 290589857Sobrien { 290689857Sobrien if (c1 != 1) 290789857Sobrien FREE_STACK_RETURN (REG_ECOLLATE); 290889857Sobrien } 290989857Sobrien# ifdef _LIBC 291089857Sobrien else 291189857Sobrien { 291289857Sobrien const int32_t *table; 291389857Sobrien const int32_t *weights; 291489857Sobrien const int32_t *extra; 291589857Sobrien const int32_t *indirect; 291689857Sobrien wint_t *cp; 291789857Sobrien 291889857Sobrien /* This #include defines a local function! */ 291989857Sobrien# include <locale/weightwc.h> 292089857Sobrien 292189857Sobrien if(delim == '=') 292289857Sobrien { 292389857Sobrien /* We push the index for equivalence class. */ 292489857Sobrien cp = (wint_t*)str; 292589857Sobrien 292689857Sobrien table = (const int32_t *) 292789857Sobrien _NL_CURRENT (LC_COLLATE, 292889857Sobrien _NL_COLLATE_TABLEWC); 292989857Sobrien weights = (const int32_t *) 293089857Sobrien _NL_CURRENT (LC_COLLATE, 293189857Sobrien _NL_COLLATE_WEIGHTWC); 293289857Sobrien extra = (const int32_t *) 293389857Sobrien _NL_CURRENT (LC_COLLATE, 293489857Sobrien _NL_COLLATE_EXTRAWC); 293589857Sobrien indirect = (const int32_t *) 293689857Sobrien _NL_CURRENT (LC_COLLATE, 293789857Sobrien _NL_COLLATE_INDIRECTWC); 293889857Sobrien 293989857Sobrien idx = findidx ((const wint_t**)&cp); 294089857Sobrien if (idx == 0 || cp < (wint_t*) str + c1) 294189857Sobrien /* This is no valid character. */ 294289857Sobrien FREE_STACK_RETURN (REG_ECOLLATE); 294389857Sobrien 294489857Sobrien str[0] = (wchar_t)idx; 294589857Sobrien } 294689857Sobrien else /* delim == '.' */ 294789857Sobrien { 294889857Sobrien /* We push collation sequence value 294989857Sobrien for collating symbol. */ 295089857Sobrien int32_t table_size; 295189857Sobrien const int32_t *symb_table; 295289857Sobrien const unsigned char *extra; 295389857Sobrien int32_t idx; 295489857Sobrien int32_t elem; 295589857Sobrien int32_t second; 295689857Sobrien int32_t hash; 295789857Sobrien char char_str[c1]; 295889857Sobrien 295989857Sobrien /* We have to convert the name to a single-byte 296089857Sobrien string. This is possible since the names 296189857Sobrien consist of ASCII characters and the internal 296289857Sobrien representation is UCS4. */ 296389857Sobrien for (i = 0; i < c1; ++i) 296489857Sobrien char_str[i] = str[i]; 296589857Sobrien 296689857Sobrien table_size = 296789857Sobrien _NL_CURRENT_WORD (LC_COLLATE, 296889857Sobrien _NL_COLLATE_SYMB_HASH_SIZEMB); 296989857Sobrien symb_table = (const int32_t *) 297089857Sobrien _NL_CURRENT (LC_COLLATE, 297189857Sobrien _NL_COLLATE_SYMB_TABLEMB); 297289857Sobrien extra = (const unsigned char *) 297389857Sobrien _NL_CURRENT (LC_COLLATE, 297489857Sobrien _NL_COLLATE_SYMB_EXTRAMB); 297589857Sobrien 297689857Sobrien /* Locate the character in the hashing table. */ 297789857Sobrien hash = elem_hash (char_str, c1); 297889857Sobrien 297989857Sobrien idx = 0; 298089857Sobrien elem = hash % table_size; 298189857Sobrien second = hash % (table_size - 2); 298289857Sobrien while (symb_table[2 * elem] != 0) 298389857Sobrien { 298489857Sobrien /* First compare the hashing value. */ 298589857Sobrien if (symb_table[2 * elem] == hash 298689857Sobrien && c1 == extra[symb_table[2 * elem + 1]] 298789857Sobrien && memcmp (char_str, 298889857Sobrien &extra[symb_table[2 * elem + 1] 298989857Sobrien + 1], c1) == 0) 299089857Sobrien { 299189857Sobrien /* Yep, this is the entry. */ 299289857Sobrien idx = symb_table[2 * elem + 1]; 299389857Sobrien idx += 1 + extra[idx]; 299489857Sobrien break; 299589857Sobrien } 299689857Sobrien 299789857Sobrien /* Next entry. */ 299889857Sobrien elem += second; 299989857Sobrien } 300089857Sobrien 300189857Sobrien if (symb_table[2 * elem] != 0) 300289857Sobrien { 300389857Sobrien /* Compute the index of the byte sequence 300489857Sobrien in the table. */ 300589857Sobrien idx += 1 + extra[idx]; 300689857Sobrien /* Adjust for the alignment. */ 300789857Sobrien idx = (idx + 3) & ~3; 300889857Sobrien 300989857Sobrien str[0] = (wchar_t) idx + 4; 301089857Sobrien } 301189857Sobrien else if (symb_table[2 * elem] == 0 && c1 == 1) 301289857Sobrien { 301389857Sobrien /* No valid character. Match it as a 301489857Sobrien single byte character. */ 301589857Sobrien had_char_class = false; 301689857Sobrien BUF_PUSH(str[0]); 301789857Sobrien /* Update the length of characters */ 301889857Sobrien laststart[5]++; 301989857Sobrien range_start = str[0]; 302089857Sobrien 302189857Sobrien /* Throw away the ] at the end of the 302289857Sobrien collating symbol. */ 302389857Sobrien PATFETCH (c); 302489857Sobrien /* exit from the switch block. */ 302589857Sobrien continue; 302689857Sobrien } 302789857Sobrien else 302889857Sobrien FREE_STACK_RETURN (REG_ECOLLATE); 302989857Sobrien } 303089857Sobrien datasize = 1; 303189857Sobrien } 303289857Sobrien# endif 303389857Sobrien /* Throw away the ] at the end of the equivalence 303489857Sobrien class (or collating symbol). */ 303589857Sobrien PATFETCH (c); 303689857Sobrien 303789857Sobrien /* Allocate the space for the equivalence class 303889857Sobrien (or collating symbol) (and '\0' if needed). */ 303989857Sobrien GET_BUFFER_SPACE(datasize); 304089857Sobrien /* Update the pointer to indicate end of buffer. */ 304189857Sobrien b += datasize; 304289857Sobrien 304389857Sobrien if (delim == '=') 304489857Sobrien { /* equivalence class */ 304589857Sobrien /* Calculate the offset of char_ranges, 304689857Sobrien which is next to equivalence_classes. */ 304789857Sobrien offset = laststart[1] + laststart[2] 304889857Sobrien + laststart[3] +6; 304989857Sobrien /* Insert space. */ 305089857Sobrien insert_space(datasize, laststart + offset, b - 1); 305189857Sobrien 305289857Sobrien /* Write the equivalence_class and \0. */ 305389857Sobrien for (i = 0 ; i < datasize ; i++) 305489857Sobrien laststart[offset + i] = str[i]; 305589857Sobrien 305689857Sobrien /* Update the length of equivalence_classes. */ 305789857Sobrien laststart[3] += datasize; 305889857Sobrien had_char_class = true; 305989857Sobrien } 306089857Sobrien else /* delim == '.' */ 306189857Sobrien { /* collating symbol */ 306289857Sobrien /* Calculate the offset of the equivalence_classes, 306389857Sobrien which is next to collating_symbols. */ 306489857Sobrien offset = laststart[1] + laststart[2] + 6; 306589857Sobrien /* Insert space and write the collationg_symbol 306689857Sobrien and \0. */ 306789857Sobrien insert_space(datasize, laststart + offset, b-1); 306889857Sobrien for (i = 0 ; i < datasize ; i++) 306989857Sobrien laststart[offset + i] = str[i]; 307089857Sobrien 307189857Sobrien /* In re_match_2_internal if range_start < -1, we 307289857Sobrien assume -range_start is the offset of the 307389857Sobrien collating symbol which is specified as 307489857Sobrien the character of the range start. So we assign 307589857Sobrien -(laststart[1] + laststart[2] + 6) to 307689857Sobrien range_start. */ 307789857Sobrien range_start = -(laststart[1] + laststart[2] + 6); 307889857Sobrien /* Update the length of collating_symbol. */ 307989857Sobrien laststart[2] += datasize; 308089857Sobrien had_char_class = false; 308189857Sobrien } 308289857Sobrien } 308389857Sobrien else 308489857Sobrien { 308589857Sobrien c1++; 308689857Sobrien while (c1--) 308789857Sobrien PATUNFETCH; 308889857Sobrien BUF_PUSH ('['); 308989857Sobrien BUF_PUSH (delim); 309089857Sobrien laststart[5] += 2; /* Update the length of characters */ 309189857Sobrien range_start = delim; 309289857Sobrien had_char_class = false; 309389857Sobrien } 309489857Sobrien } 309589857Sobrien else 309689857Sobrien { 309789857Sobrien had_char_class = false; 309889857Sobrien BUF_PUSH(c); 309989857Sobrien laststart[5]++; /* Update the length of characters */ 310089857Sobrien range_start = c; 310189857Sobrien } 310289857Sobrien } 310389857Sobrien 310489857Sobrien#else /* BYTE */ 310589857Sobrien /* Ensure that we have enough space to push a charset: the 310689857Sobrien opcode, the length count, and the bitset; 34 bytes in all. */ 310789857Sobrien GET_BUFFER_SPACE (34); 310889857Sobrien 310989857Sobrien laststart = b; 311089857Sobrien 311189857Sobrien /* We test `*p == '^' twice, instead of using an if 311289857Sobrien statement, so we only need one BUF_PUSH. */ 311389857Sobrien BUF_PUSH (*p == '^' ? charset_not : charset); 311489857Sobrien if (*p == '^') 311589857Sobrien p++; 311689857Sobrien 311789857Sobrien /* Remember the first position in the bracket expression. */ 311889857Sobrien p1 = p; 311989857Sobrien 312089857Sobrien /* Push the number of bytes in the bitmap. */ 312189857Sobrien BUF_PUSH ((1 << BYTEWIDTH) / BYTEWIDTH); 312289857Sobrien 312389857Sobrien /* Clear the whole map. */ 312489857Sobrien bzero (b, (1 << BYTEWIDTH) / BYTEWIDTH); 312589857Sobrien 312689857Sobrien /* charset_not matches newline according to a syntax bit. */ 312789857Sobrien if ((re_opcode_t) b[-2] == charset_not 312889857Sobrien && (syntax & RE_HAT_LISTS_NOT_NEWLINE)) 312989857Sobrien SET_LIST_BIT ('\n'); 313089857Sobrien 313189857Sobrien /* Read in characters and ranges, setting map bits. */ 313289857Sobrien for (;;) 313389857Sobrien { 313489857Sobrien if (p == pend) FREE_STACK_RETURN (REG_EBRACK); 313589857Sobrien 313689857Sobrien PATFETCH (c); 313789857Sobrien 313889857Sobrien /* \ might escape characters inside [...] and [^...]. */ 313989857Sobrien if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\') 314089857Sobrien { 314189857Sobrien if (p == pend) FREE_STACK_RETURN (REG_EESCAPE); 314289857Sobrien 314389857Sobrien PATFETCH (c1); 314489857Sobrien SET_LIST_BIT (c1); 314589857Sobrien range_start = c1; 314689857Sobrien continue; 314789857Sobrien } 314889857Sobrien 314989857Sobrien /* Could be the end of the bracket expression. If it's 315089857Sobrien not (i.e., when the bracket expression is `[]' so 315189857Sobrien far), the ']' character bit gets set way below. */ 315289857Sobrien if (c == ']' && p != p1 + 1) 315389857Sobrien break; 315489857Sobrien 315589857Sobrien /* Look ahead to see if it's a range when the last thing 315689857Sobrien was a character class. */ 315789857Sobrien if (had_char_class && c == '-' && *p != ']') 315889857Sobrien FREE_STACK_RETURN (REG_ERANGE); 315989857Sobrien 316089857Sobrien /* Look ahead to see if it's a range when the last thing 316189857Sobrien was a character: if this is a hyphen not at the 316289857Sobrien beginning or the end of a list, then it's the range 316389857Sobrien operator. */ 316489857Sobrien if (c == '-' 316589857Sobrien && !(p - 2 >= pattern && p[-2] == '[') 316689857Sobrien && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^') 316789857Sobrien && *p != ']') 316889857Sobrien { 316989857Sobrien reg_errcode_t ret 317089857Sobrien = byte_compile_range (range_start, &p, pend, translate, 317189857Sobrien syntax, b); 317289857Sobrien if (ret != REG_NOERROR) FREE_STACK_RETURN (ret); 317389857Sobrien range_start = 0xffffffff; 317489857Sobrien } 317589857Sobrien 317689857Sobrien else if (p[0] == '-' && p[1] != ']') 317789857Sobrien { /* This handles ranges made up of characters only. */ 317889857Sobrien reg_errcode_t ret; 317989857Sobrien 318089857Sobrien /* Move past the `-'. */ 318189857Sobrien PATFETCH (c1); 318289857Sobrien 318389857Sobrien ret = byte_compile_range (c, &p, pend, translate, syntax, b); 318489857Sobrien if (ret != REG_NOERROR) FREE_STACK_RETURN (ret); 318589857Sobrien range_start = 0xffffffff; 318689857Sobrien } 318789857Sobrien 318889857Sobrien /* See if we're at the beginning of a possible character 318989857Sobrien class. */ 319089857Sobrien 319189857Sobrien else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':') 319289857Sobrien { /* Leave room for the null. */ 319389857Sobrien char str[CHAR_CLASS_MAX_LENGTH + 1]; 319489857Sobrien 319589857Sobrien PATFETCH (c); 319689857Sobrien c1 = 0; 319789857Sobrien 319889857Sobrien /* If pattern is `[[:'. */ 319989857Sobrien if (p == pend) FREE_STACK_RETURN (REG_EBRACK); 320089857Sobrien 320189857Sobrien for (;;) 320289857Sobrien { 320389857Sobrien PATFETCH (c); 320489857Sobrien if ((c == ':' && *p == ']') || p == pend) 320589857Sobrien break; 320689857Sobrien if (c1 < CHAR_CLASS_MAX_LENGTH) 320789857Sobrien str[c1++] = c; 320889857Sobrien else 320989857Sobrien /* This is in any case an invalid class name. */ 321089857Sobrien str[0] = '\0'; 321189857Sobrien } 321289857Sobrien str[c1] = '\0'; 321389857Sobrien 321489857Sobrien /* If isn't a word bracketed by `[:' and `:]': 321589857Sobrien undo the ending character, the letters, and leave 321689857Sobrien the leading `:' and `[' (but set bits for them). */ 321789857Sobrien if (c == ':' && *p == ']') 321889857Sobrien { 321989857Sobrien# if defined _LIBC || WIDE_CHAR_SUPPORT 322089857Sobrien boolean is_lower = STREQ (str, "lower"); 322189857Sobrien boolean is_upper = STREQ (str, "upper"); 322289857Sobrien wctype_t wt; 322389857Sobrien int ch; 322489857Sobrien 322589857Sobrien wt = IS_CHAR_CLASS (str); 322689857Sobrien if (wt == 0) 322789857Sobrien FREE_STACK_RETURN (REG_ECTYPE); 322889857Sobrien 322989857Sobrien /* Throw away the ] at the end of the character 323089857Sobrien class. */ 323189857Sobrien PATFETCH (c); 323289857Sobrien 323389857Sobrien if (p == pend) FREE_STACK_RETURN (REG_EBRACK); 323489857Sobrien 323589857Sobrien for (ch = 0; ch < 1 << BYTEWIDTH; ++ch) 323689857Sobrien { 323789857Sobrien# ifdef _LIBC 323889857Sobrien if (__iswctype (__btowc (ch), wt)) 323989857Sobrien SET_LIST_BIT (ch); 324089857Sobrien# else 324189857Sobrien if (iswctype (btowc (ch), wt)) 324289857Sobrien SET_LIST_BIT (ch); 324389857Sobrien# endif 324489857Sobrien 324589857Sobrien if (translate && (is_upper || is_lower) 324689857Sobrien && (ISUPPER (ch) || ISLOWER (ch))) 324789857Sobrien SET_LIST_BIT (ch); 324889857Sobrien } 324989857Sobrien 325089857Sobrien had_char_class = true; 325189857Sobrien# else 325289857Sobrien int ch; 325389857Sobrien boolean is_alnum = STREQ (str, "alnum"); 325489857Sobrien boolean is_alpha = STREQ (str, "alpha"); 325589857Sobrien boolean is_blank = STREQ (str, "blank"); 325689857Sobrien boolean is_cntrl = STREQ (str, "cntrl"); 325789857Sobrien boolean is_digit = STREQ (str, "digit"); 325889857Sobrien boolean is_graph = STREQ (str, "graph"); 325989857Sobrien boolean is_lower = STREQ (str, "lower"); 326089857Sobrien boolean is_print = STREQ (str, "print"); 326189857Sobrien boolean is_punct = STREQ (str, "punct"); 326289857Sobrien boolean is_space = STREQ (str, "space"); 326389857Sobrien boolean is_upper = STREQ (str, "upper"); 326489857Sobrien boolean is_xdigit = STREQ (str, "xdigit"); 326589857Sobrien 326689857Sobrien if (!IS_CHAR_CLASS (str)) 326789857Sobrien FREE_STACK_RETURN (REG_ECTYPE); 326889857Sobrien 326989857Sobrien /* Throw away the ] at the end of the character 327089857Sobrien class. */ 327189857Sobrien PATFETCH (c); 327289857Sobrien 327389857Sobrien if (p == pend) FREE_STACK_RETURN (REG_EBRACK); 327489857Sobrien 327589857Sobrien for (ch = 0; ch < 1 << BYTEWIDTH; ch++) 327689857Sobrien { 327789857Sobrien /* This was split into 3 if's to 327889857Sobrien avoid an arbitrary limit in some compiler. */ 327989857Sobrien if ( (is_alnum && ISALNUM (ch)) 328089857Sobrien || (is_alpha && ISALPHA (ch)) 328189857Sobrien || (is_blank && ISBLANK (ch)) 328289857Sobrien || (is_cntrl && ISCNTRL (ch))) 328389857Sobrien SET_LIST_BIT (ch); 328489857Sobrien if ( (is_digit && ISDIGIT (ch)) 328589857Sobrien || (is_graph && ISGRAPH (ch)) 328689857Sobrien || (is_lower && ISLOWER (ch)) 328789857Sobrien || (is_print && ISPRINT (ch))) 328889857Sobrien SET_LIST_BIT (ch); 328989857Sobrien if ( (is_punct && ISPUNCT (ch)) 329089857Sobrien || (is_space && ISSPACE (ch)) 329189857Sobrien || (is_upper && ISUPPER (ch)) 329289857Sobrien || (is_xdigit && ISXDIGIT (ch))) 329389857Sobrien SET_LIST_BIT (ch); 329489857Sobrien if ( translate && (is_upper || is_lower) 329589857Sobrien && (ISUPPER (ch) || ISLOWER (ch))) 329689857Sobrien SET_LIST_BIT (ch); 329789857Sobrien } 329889857Sobrien had_char_class = true; 329989857Sobrien# endif /* libc || wctype.h */ 330089857Sobrien } 330189857Sobrien else 330289857Sobrien { 330389857Sobrien c1++; 330489857Sobrien while (c1--) 330589857Sobrien PATUNFETCH; 330689857Sobrien SET_LIST_BIT ('['); 330789857Sobrien SET_LIST_BIT (':'); 330889857Sobrien range_start = ':'; 330989857Sobrien had_char_class = false; 331089857Sobrien } 331189857Sobrien } 331289857Sobrien else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == '=') 331389857Sobrien { 331489857Sobrien unsigned char str[MB_LEN_MAX + 1]; 331589857Sobrien# ifdef _LIBC 331689857Sobrien uint32_t nrules = 331789857Sobrien _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES); 331889857Sobrien# endif 331989857Sobrien 332089857Sobrien PATFETCH (c); 332189857Sobrien c1 = 0; 332289857Sobrien 332389857Sobrien /* If pattern is `[[='. */ 332489857Sobrien if (p == pend) FREE_STACK_RETURN (REG_EBRACK); 332589857Sobrien 332689857Sobrien for (;;) 332789857Sobrien { 332889857Sobrien PATFETCH (c); 332989857Sobrien if ((c == '=' && *p == ']') || p == pend) 333089857Sobrien break; 333189857Sobrien if (c1 < MB_LEN_MAX) 333289857Sobrien str[c1++] = c; 333389857Sobrien else 333489857Sobrien /* This is in any case an invalid class name. */ 333589857Sobrien str[0] = '\0'; 333689857Sobrien } 333789857Sobrien str[c1] = '\0'; 333889857Sobrien 333989857Sobrien if (c == '=' && *p == ']' && str[0] != '\0') 334089857Sobrien { 334189857Sobrien /* If we have no collation data we use the default 334289857Sobrien collation in which each character is in a class 334389857Sobrien by itself. It also means that ASCII is the 334489857Sobrien character set and therefore we cannot have character 334589857Sobrien with more than one byte in the multibyte 334689857Sobrien representation. */ 334789857Sobrien# ifdef _LIBC 334889857Sobrien if (nrules == 0) 334989857Sobrien# endif 335089857Sobrien { 335189857Sobrien if (c1 != 1) 335289857Sobrien FREE_STACK_RETURN (REG_ECOLLATE); 335389857Sobrien 335489857Sobrien /* Throw away the ] at the end of the equivalence 335589857Sobrien class. */ 335689857Sobrien PATFETCH (c); 335789857Sobrien 335889857Sobrien /* Set the bit for the character. */ 335989857Sobrien SET_LIST_BIT (str[0]); 336089857Sobrien } 336189857Sobrien# ifdef _LIBC 336289857Sobrien else 336389857Sobrien { 336489857Sobrien /* Try to match the byte sequence in `str' against 336589857Sobrien those known to the collate implementation. 336689857Sobrien First find out whether the bytes in `str' are 336789857Sobrien actually from exactly one character. */ 336889857Sobrien const int32_t *table; 336989857Sobrien const unsigned char *weights; 337089857Sobrien const unsigned char *extra; 337189857Sobrien const int32_t *indirect; 337289857Sobrien int32_t idx; 337389857Sobrien const unsigned char *cp = str; 337489857Sobrien int ch; 337589857Sobrien 337689857Sobrien /* This #include defines a local function! */ 337789857Sobrien# include <locale/weight.h> 337889857Sobrien 337989857Sobrien table = (const int32_t *) 338089857Sobrien _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB); 338189857Sobrien weights = (const unsigned char *) 338289857Sobrien _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB); 338389857Sobrien extra = (const unsigned char *) 338489857Sobrien _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB); 338589857Sobrien indirect = (const int32_t *) 338689857Sobrien _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB); 338789857Sobrien 338889857Sobrien idx = findidx (&cp); 338989857Sobrien if (idx == 0 || cp < str + c1) 339089857Sobrien /* This is no valid character. */ 339189857Sobrien FREE_STACK_RETURN (REG_ECOLLATE); 339289857Sobrien 339389857Sobrien /* Throw away the ] at the end of the equivalence 339489857Sobrien class. */ 339589857Sobrien PATFETCH (c); 339689857Sobrien 339789857Sobrien /* Now we have to go throught the whole table 339889857Sobrien and find all characters which have the same 339989857Sobrien first level weight. 340089857Sobrien 340189857Sobrien XXX Note that this is not entirely correct. 340289857Sobrien we would have to match multibyte sequences 340389857Sobrien but this is not possible with the current 340489857Sobrien implementation. */ 340589857Sobrien for (ch = 1; ch < 256; ++ch) 340689857Sobrien /* XXX This test would have to be changed if we 340789857Sobrien would allow matching multibyte sequences. */ 340889857Sobrien if (table[ch] > 0) 340989857Sobrien { 341089857Sobrien int32_t idx2 = table[ch]; 341189857Sobrien size_t len = weights[idx2]; 341289857Sobrien 341389857Sobrien /* Test whether the lenghts match. */ 341489857Sobrien if (weights[idx] == len) 341589857Sobrien { 341689857Sobrien /* They do. New compare the bytes of 341789857Sobrien the weight. */ 341889857Sobrien size_t cnt = 0; 341989857Sobrien 342089857Sobrien while (cnt < len 342189857Sobrien && (weights[idx + 1 + cnt] 342289857Sobrien == weights[idx2 + 1 + cnt])) 342389857Sobrien ++cnt; 342489857Sobrien 342589857Sobrien if (cnt == len) 342689857Sobrien /* They match. Mark the character as 342789857Sobrien acceptable. */ 342889857Sobrien SET_LIST_BIT (ch); 342989857Sobrien } 343089857Sobrien } 343189857Sobrien } 343289857Sobrien# endif 343389857Sobrien had_char_class = true; 343489857Sobrien } 343589857Sobrien else 343689857Sobrien { 343789857Sobrien c1++; 343889857Sobrien while (c1--) 343989857Sobrien PATUNFETCH; 344089857Sobrien SET_LIST_BIT ('['); 344189857Sobrien SET_LIST_BIT ('='); 344289857Sobrien range_start = '='; 344389857Sobrien had_char_class = false; 344489857Sobrien } 344589857Sobrien } 344689857Sobrien else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == '.') 344789857Sobrien { 344889857Sobrien unsigned char str[128]; /* Should be large enough. */ 344989857Sobrien# ifdef _LIBC 345089857Sobrien uint32_t nrules = 345189857Sobrien _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES); 345289857Sobrien# endif 345389857Sobrien 345489857Sobrien PATFETCH (c); 345589857Sobrien c1 = 0; 345689857Sobrien 345789857Sobrien /* If pattern is `[[.'. */ 345889857Sobrien if (p == pend) FREE_STACK_RETURN (REG_EBRACK); 345989857Sobrien 346089857Sobrien for (;;) 346189857Sobrien { 346289857Sobrien PATFETCH (c); 346389857Sobrien if ((c == '.' && *p == ']') || p == pend) 346489857Sobrien break; 346589857Sobrien if (c1 < sizeof (str)) 346689857Sobrien str[c1++] = c; 346789857Sobrien else 346889857Sobrien /* This is in any case an invalid class name. */ 346989857Sobrien str[0] = '\0'; 347089857Sobrien } 347189857Sobrien str[c1] = '\0'; 347289857Sobrien 347389857Sobrien if (c == '.' && *p == ']' && str[0] != '\0') 347489857Sobrien { 347589857Sobrien /* If we have no collation data we use the default 347689857Sobrien collation in which each character is the name 347789857Sobrien for its own class which contains only the one 347889857Sobrien character. It also means that ASCII is the 347989857Sobrien character set and therefore we cannot have character 348089857Sobrien with more than one byte in the multibyte 348189857Sobrien representation. */ 348289857Sobrien# ifdef _LIBC 348389857Sobrien if (nrules == 0) 348489857Sobrien# endif 348589857Sobrien { 348689857Sobrien if (c1 != 1) 348789857Sobrien FREE_STACK_RETURN (REG_ECOLLATE); 348889857Sobrien 348989857Sobrien /* Throw away the ] at the end of the equivalence 349089857Sobrien class. */ 349189857Sobrien PATFETCH (c); 349289857Sobrien 349389857Sobrien /* Set the bit for the character. */ 349489857Sobrien SET_LIST_BIT (str[0]); 349589857Sobrien range_start = ((const unsigned char *) str)[0]; 349689857Sobrien } 349789857Sobrien# ifdef _LIBC 349889857Sobrien else 349989857Sobrien { 350089857Sobrien /* Try to match the byte sequence in `str' against 350189857Sobrien those known to the collate implementation. 350289857Sobrien First find out whether the bytes in `str' are 350389857Sobrien actually from exactly one character. */ 350489857Sobrien int32_t table_size; 350589857Sobrien const int32_t *symb_table; 350689857Sobrien const unsigned char *extra; 350789857Sobrien int32_t idx; 350889857Sobrien int32_t elem; 350989857Sobrien int32_t second; 351089857Sobrien int32_t hash; 351189857Sobrien 351289857Sobrien table_size = 351389857Sobrien _NL_CURRENT_WORD (LC_COLLATE, 351489857Sobrien _NL_COLLATE_SYMB_HASH_SIZEMB); 351589857Sobrien symb_table = (const int32_t *) 351689857Sobrien _NL_CURRENT (LC_COLLATE, 351789857Sobrien _NL_COLLATE_SYMB_TABLEMB); 351889857Sobrien extra = (const unsigned char *) 351989857Sobrien _NL_CURRENT (LC_COLLATE, 352089857Sobrien _NL_COLLATE_SYMB_EXTRAMB); 352189857Sobrien 352289857Sobrien /* Locate the character in the hashing table. */ 352389857Sobrien hash = elem_hash (str, c1); 352489857Sobrien 352589857Sobrien idx = 0; 352689857Sobrien elem = hash % table_size; 352789857Sobrien second = hash % (table_size - 2); 352889857Sobrien while (symb_table[2 * elem] != 0) 352989857Sobrien { 353089857Sobrien /* First compare the hashing value. */ 353189857Sobrien if (symb_table[2 * elem] == hash 353289857Sobrien && c1 == extra[symb_table[2 * elem + 1]] 353389857Sobrien && memcmp (str, 353489857Sobrien &extra[symb_table[2 * elem + 1] 353589857Sobrien + 1], 353689857Sobrien c1) == 0) 353789857Sobrien { 353889857Sobrien /* Yep, this is the entry. */ 353989857Sobrien idx = symb_table[2 * elem + 1]; 354089857Sobrien idx += 1 + extra[idx]; 354189857Sobrien break; 354289857Sobrien } 354389857Sobrien 354489857Sobrien /* Next entry. */ 354589857Sobrien elem += second; 354689857Sobrien } 354789857Sobrien 354889857Sobrien if (symb_table[2 * elem] == 0) 354989857Sobrien /* This is no valid character. */ 355089857Sobrien FREE_STACK_RETURN (REG_ECOLLATE); 355189857Sobrien 355289857Sobrien /* Throw away the ] at the end of the equivalence 355389857Sobrien class. */ 355489857Sobrien PATFETCH (c); 355589857Sobrien 355689857Sobrien /* Now add the multibyte character(s) we found 355789857Sobrien to the accept list. 355889857Sobrien 355989857Sobrien XXX Note that this is not entirely correct. 356089857Sobrien we would have to match multibyte sequences 356189857Sobrien but this is not possible with the current 356289857Sobrien implementation. Also, we have to match 356389857Sobrien collating symbols, which expand to more than 356489857Sobrien one file, as a whole and not allow the 356589857Sobrien individual bytes. */ 356689857Sobrien c1 = extra[idx++]; 356789857Sobrien if (c1 == 1) 356889857Sobrien range_start = extra[idx]; 356989857Sobrien while (c1-- > 0) 357089857Sobrien { 357189857Sobrien SET_LIST_BIT (extra[idx]); 357289857Sobrien ++idx; 357389857Sobrien } 357489857Sobrien } 357589857Sobrien# endif 357689857Sobrien had_char_class = false; 357789857Sobrien } 357889857Sobrien else 357989857Sobrien { 358089857Sobrien c1++; 358189857Sobrien while (c1--) 358289857Sobrien PATUNFETCH; 358389857Sobrien SET_LIST_BIT ('['); 358489857Sobrien SET_LIST_BIT ('.'); 358589857Sobrien range_start = '.'; 358689857Sobrien had_char_class = false; 358789857Sobrien } 358889857Sobrien } 358989857Sobrien else 359089857Sobrien { 359189857Sobrien had_char_class = false; 359289857Sobrien SET_LIST_BIT (c); 359389857Sobrien range_start = c; 359489857Sobrien } 359589857Sobrien } 359689857Sobrien 359789857Sobrien /* Discard any (non)matching list bytes that are all 0 at the 359889857Sobrien end of the map. Decrease the map-length byte too. */ 359989857Sobrien while ((int) b[-1] > 0 && b[b[-1] - 1] == 0) 360089857Sobrien b[-1]--; 360189857Sobrien b += b[-1]; 360289857Sobrien#endif /* WCHAR */ 360389857Sobrien } 360489857Sobrien break; 360589857Sobrien 360689857Sobrien 360789857Sobrien case '(': 360889857Sobrien if (syntax & RE_NO_BK_PARENS) 360989857Sobrien goto handle_open; 361089857Sobrien else 361189857Sobrien goto normal_char; 361289857Sobrien 361389857Sobrien 361489857Sobrien case ')': 361589857Sobrien if (syntax & RE_NO_BK_PARENS) 361689857Sobrien goto handle_close; 361789857Sobrien else 361889857Sobrien goto normal_char; 361989857Sobrien 362089857Sobrien 362189857Sobrien case '\n': 362289857Sobrien if (syntax & RE_NEWLINE_ALT) 362389857Sobrien goto handle_alt; 362489857Sobrien else 362589857Sobrien goto normal_char; 362689857Sobrien 362789857Sobrien 362889857Sobrien case '|': 362989857Sobrien if (syntax & RE_NO_BK_VBAR) 363089857Sobrien goto handle_alt; 363189857Sobrien else 363289857Sobrien goto normal_char; 363389857Sobrien 363489857Sobrien 363589857Sobrien case '{': 363689857Sobrien if (syntax & RE_INTERVALS && syntax & RE_NO_BK_BRACES) 363789857Sobrien goto handle_interval; 363889857Sobrien else 363989857Sobrien goto normal_char; 364089857Sobrien 364189857Sobrien 364289857Sobrien case '\\': 364389857Sobrien if (p == pend) FREE_STACK_RETURN (REG_EESCAPE); 364489857Sobrien 364589857Sobrien /* Do not translate the character after the \, so that we can 364689857Sobrien distinguish, e.g., \B from \b, even if we normally would 364789857Sobrien translate, e.g., B to b. */ 364889857Sobrien PATFETCH_RAW (c); 364989857Sobrien 365089857Sobrien switch (c) 365189857Sobrien { 365289857Sobrien case '(': 365389857Sobrien if (syntax & RE_NO_BK_PARENS) 365489857Sobrien goto normal_backslash; 365589857Sobrien 365689857Sobrien handle_open: 365789857Sobrien bufp->re_nsub++; 365889857Sobrien regnum++; 365989857Sobrien 366089857Sobrien if (COMPILE_STACK_FULL) 366189857Sobrien { 366289857Sobrien RETALLOC (compile_stack.stack, compile_stack.size << 1, 366389857Sobrien compile_stack_elt_t); 366489857Sobrien if (compile_stack.stack == NULL) return REG_ESPACE; 366589857Sobrien 366689857Sobrien compile_stack.size <<= 1; 366789857Sobrien } 366889857Sobrien 366989857Sobrien /* These are the values to restore when we hit end of this 367089857Sobrien group. They are all relative offsets, so that if the 367189857Sobrien whole pattern moves because of realloc, they will still 367289857Sobrien be valid. */ 367389857Sobrien COMPILE_STACK_TOP.begalt_offset = begalt - COMPILED_BUFFER_VAR; 367489857Sobrien COMPILE_STACK_TOP.fixup_alt_jump 367589857Sobrien = fixup_alt_jump ? fixup_alt_jump - COMPILED_BUFFER_VAR + 1 : 0; 367689857Sobrien COMPILE_STACK_TOP.laststart_offset = b - COMPILED_BUFFER_VAR; 367789857Sobrien COMPILE_STACK_TOP.regnum = regnum; 367889857Sobrien 367989857Sobrien /* We will eventually replace the 0 with the number of 368089857Sobrien groups inner to this one. But do not push a 368189857Sobrien start_memory for groups beyond the last one we can 368289857Sobrien represent in the compiled pattern. */ 368389857Sobrien if (regnum <= MAX_REGNUM) 368489857Sobrien { 368589857Sobrien COMPILE_STACK_TOP.inner_group_offset = b 368689857Sobrien - COMPILED_BUFFER_VAR + 2; 368789857Sobrien BUF_PUSH_3 (start_memory, regnum, 0); 368889857Sobrien } 368989857Sobrien 369089857Sobrien compile_stack.avail++; 369189857Sobrien 369289857Sobrien fixup_alt_jump = 0; 369389857Sobrien laststart = 0; 369489857Sobrien begalt = b; 369589857Sobrien /* If we've reached MAX_REGNUM groups, then this open 369689857Sobrien won't actually generate any code, so we'll have to 369789857Sobrien clear pending_exact explicitly. */ 369889857Sobrien pending_exact = 0; 369989857Sobrien break; 370089857Sobrien 370189857Sobrien 370289857Sobrien case ')': 370389857Sobrien if (syntax & RE_NO_BK_PARENS) goto normal_backslash; 370489857Sobrien 370589857Sobrien if (COMPILE_STACK_EMPTY) 370689857Sobrien { 370789857Sobrien if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD) 370889857Sobrien goto normal_backslash; 370989857Sobrien else 371089857Sobrien FREE_STACK_RETURN (REG_ERPAREN); 371189857Sobrien } 371289857Sobrien 371389857Sobrien handle_close: 371489857Sobrien if (fixup_alt_jump) 371589857Sobrien { /* Push a dummy failure point at the end of the 371689857Sobrien alternative for a possible future 371789857Sobrien `pop_failure_jump' to pop. See comments at 371889857Sobrien `push_dummy_failure' in `re_match_2'. */ 371989857Sobrien BUF_PUSH (push_dummy_failure); 372089857Sobrien 372189857Sobrien /* We allocated space for this jump when we assigned 372289857Sobrien to `fixup_alt_jump', in the `handle_alt' case below. */ 372389857Sobrien STORE_JUMP (jump_past_alt, fixup_alt_jump, b - 1); 372489857Sobrien } 372589857Sobrien 372689857Sobrien /* See similar code for backslashed left paren above. */ 372789857Sobrien if (COMPILE_STACK_EMPTY) 372889857Sobrien { 372989857Sobrien if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD) 373089857Sobrien goto normal_char; 373189857Sobrien else 373289857Sobrien FREE_STACK_RETURN (REG_ERPAREN); 373389857Sobrien } 373489857Sobrien 373589857Sobrien /* Since we just checked for an empty stack above, this 373689857Sobrien ``can't happen''. */ 373789857Sobrien assert (compile_stack.avail != 0); 373889857Sobrien { 373989857Sobrien /* We don't just want to restore into `regnum', because 374089857Sobrien later groups should continue to be numbered higher, 374189857Sobrien as in `(ab)c(de)' -- the second group is #2. */ 374289857Sobrien regnum_t this_group_regnum; 374389857Sobrien 374489857Sobrien compile_stack.avail--; 374589857Sobrien begalt = COMPILED_BUFFER_VAR + COMPILE_STACK_TOP.begalt_offset; 374689857Sobrien fixup_alt_jump 374789857Sobrien = COMPILE_STACK_TOP.fixup_alt_jump 374889857Sobrien ? COMPILED_BUFFER_VAR + COMPILE_STACK_TOP.fixup_alt_jump - 1 374989857Sobrien : 0; 375089857Sobrien laststart = COMPILED_BUFFER_VAR + COMPILE_STACK_TOP.laststart_offset; 375189857Sobrien this_group_regnum = COMPILE_STACK_TOP.regnum; 375289857Sobrien /* If we've reached MAX_REGNUM groups, then this open 375389857Sobrien won't actually generate any code, so we'll have to 375489857Sobrien clear pending_exact explicitly. */ 375589857Sobrien pending_exact = 0; 375689857Sobrien 375789857Sobrien /* We're at the end of the group, so now we know how many 375889857Sobrien groups were inside this one. */ 375989857Sobrien if (this_group_regnum <= MAX_REGNUM) 376089857Sobrien { 376189857Sobrien UCHAR_T *inner_group_loc 376289857Sobrien = COMPILED_BUFFER_VAR + COMPILE_STACK_TOP.inner_group_offset; 376389857Sobrien 376489857Sobrien *inner_group_loc = regnum - this_group_regnum; 376589857Sobrien BUF_PUSH_3 (stop_memory, this_group_regnum, 376689857Sobrien regnum - this_group_regnum); 376789857Sobrien } 376889857Sobrien } 376989857Sobrien break; 377089857Sobrien 377189857Sobrien 377289857Sobrien case '|': /* `\|'. */ 377389857Sobrien if (syntax & RE_LIMITED_OPS || syntax & RE_NO_BK_VBAR) 377489857Sobrien goto normal_backslash; 377589857Sobrien handle_alt: 377689857Sobrien if (syntax & RE_LIMITED_OPS) 377789857Sobrien goto normal_char; 377889857Sobrien 377989857Sobrien /* Insert before the previous alternative a jump which 378089857Sobrien jumps to this alternative if the former fails. */ 378189857Sobrien GET_BUFFER_SPACE (1 + OFFSET_ADDRESS_SIZE); 378289857Sobrien INSERT_JUMP (on_failure_jump, begalt, 378389857Sobrien b + 2 + 2 * OFFSET_ADDRESS_SIZE); 378489857Sobrien pending_exact = 0; 378589857Sobrien b += 1 + OFFSET_ADDRESS_SIZE; 378689857Sobrien 378789857Sobrien /* The alternative before this one has a jump after it 378889857Sobrien which gets executed if it gets matched. Adjust that 378989857Sobrien jump so it will jump to this alternative's analogous 379089857Sobrien jump (put in below, which in turn will jump to the next 379189857Sobrien (if any) alternative's such jump, etc.). The last such 379289857Sobrien jump jumps to the correct final destination. A picture: 379389857Sobrien _____ _____ 379489857Sobrien | | | | 379589857Sobrien | v | v 379689857Sobrien a | b | c 379789857Sobrien 379889857Sobrien If we are at `b', then fixup_alt_jump right now points to a 379989857Sobrien three-byte space after `a'. We'll put in the jump, set 380089857Sobrien fixup_alt_jump to right after `b', and leave behind three 380189857Sobrien bytes which we'll fill in when we get to after `c'. */ 380289857Sobrien 380389857Sobrien if (fixup_alt_jump) 380489857Sobrien STORE_JUMP (jump_past_alt, fixup_alt_jump, b); 380589857Sobrien 380689857Sobrien /* Mark and leave space for a jump after this alternative, 380789857Sobrien to be filled in later either by next alternative or 380889857Sobrien when know we're at the end of a series of alternatives. */ 380989857Sobrien fixup_alt_jump = b; 381089857Sobrien GET_BUFFER_SPACE (1 + OFFSET_ADDRESS_SIZE); 381189857Sobrien b += 1 + OFFSET_ADDRESS_SIZE; 381289857Sobrien 381389857Sobrien laststart = 0; 381489857Sobrien begalt = b; 381589857Sobrien break; 381689857Sobrien 381789857Sobrien 381889857Sobrien case '{': 381989857Sobrien /* If \{ is a literal. */ 382089857Sobrien if (!(syntax & RE_INTERVALS) 382189857Sobrien /* If we're at `\{' and it's not the open-interval 382289857Sobrien operator. */ 382389857Sobrien || (syntax & RE_NO_BK_BRACES)) 382489857Sobrien goto normal_backslash; 382589857Sobrien 382689857Sobrien handle_interval: 382789857Sobrien { 382889857Sobrien /* If got here, then the syntax allows intervals. */ 382989857Sobrien 383089857Sobrien /* At least (most) this many matches must be made. */ 383189857Sobrien int lower_bound = -1, upper_bound = -1; 383289857Sobrien 383389857Sobrien /* Place in the uncompiled pattern (i.e., just after 383489857Sobrien the '{') to go back to if the interval is invalid. */ 383589857Sobrien const CHAR_T *beg_interval = p; 383689857Sobrien 383789857Sobrien if (p == pend) 383889857Sobrien goto invalid_interval; 383989857Sobrien 384089857Sobrien GET_UNSIGNED_NUMBER (lower_bound); 384189857Sobrien 384289857Sobrien if (c == ',') 384389857Sobrien { 384489857Sobrien GET_UNSIGNED_NUMBER (upper_bound); 384589857Sobrien if (upper_bound < 0) 384689857Sobrien upper_bound = RE_DUP_MAX; 384789857Sobrien } 384889857Sobrien else 384989857Sobrien /* Interval such as `{1}' => match exactly once. */ 385089857Sobrien upper_bound = lower_bound; 385189857Sobrien 385289857Sobrien if (! (0 <= lower_bound && lower_bound <= upper_bound)) 385389857Sobrien goto invalid_interval; 385489857Sobrien 385589857Sobrien if (!(syntax & RE_NO_BK_BRACES)) 385689857Sobrien { 385789857Sobrien if (c != '\\' || p == pend) 385889857Sobrien goto invalid_interval; 385989857Sobrien PATFETCH (c); 386089857Sobrien } 386189857Sobrien 386289857Sobrien if (c != '}') 386389857Sobrien goto invalid_interval; 386489857Sobrien 386589857Sobrien /* If it's invalid to have no preceding re. */ 386689857Sobrien if (!laststart) 386789857Sobrien { 386889857Sobrien if (syntax & RE_CONTEXT_INVALID_OPS 386989857Sobrien && !(syntax & RE_INVALID_INTERVAL_ORD)) 387089857Sobrien FREE_STACK_RETURN (REG_BADRPT); 387189857Sobrien else if (syntax & RE_CONTEXT_INDEP_OPS) 387289857Sobrien laststart = b; 387389857Sobrien else 387489857Sobrien goto unfetch_interval; 387589857Sobrien } 387689857Sobrien 387789857Sobrien /* We just parsed a valid interval. */ 387889857Sobrien 387989857Sobrien if (RE_DUP_MAX < upper_bound) 388089857Sobrien FREE_STACK_RETURN (REG_BADBR); 388189857Sobrien 388289857Sobrien /* If the upper bound is zero, don't want to succeed at 388389857Sobrien all; jump from `laststart' to `b + 3', which will be 388489857Sobrien the end of the buffer after we insert the jump. */ 388589857Sobrien /* ifdef WCHAR, 'b + 1 + OFFSET_ADDRESS_SIZE' 388689857Sobrien instead of 'b + 3'. */ 388789857Sobrien if (upper_bound == 0) 388889857Sobrien { 388989857Sobrien GET_BUFFER_SPACE (1 + OFFSET_ADDRESS_SIZE); 389089857Sobrien INSERT_JUMP (jump, laststart, b + 1 389189857Sobrien + OFFSET_ADDRESS_SIZE); 389289857Sobrien b += 1 + OFFSET_ADDRESS_SIZE; 389389857Sobrien } 389489857Sobrien 389589857Sobrien /* Otherwise, we have a nontrivial interval. When 389689857Sobrien we're all done, the pattern will look like: 389789857Sobrien set_number_at <jump count> <upper bound> 389889857Sobrien set_number_at <succeed_n count> <lower bound> 389989857Sobrien succeed_n <after jump addr> <succeed_n count> 390089857Sobrien <body of loop> 390189857Sobrien jump_n <succeed_n addr> <jump count> 390289857Sobrien (The upper bound and `jump_n' are omitted if 390389857Sobrien `upper_bound' is 1, though.) */ 390489857Sobrien else 390589857Sobrien { /* If the upper bound is > 1, we need to insert 390689857Sobrien more at the end of the loop. */ 390789857Sobrien unsigned nbytes = 2 + 4 * OFFSET_ADDRESS_SIZE + 390889857Sobrien (upper_bound > 1) * (2 + 4 * OFFSET_ADDRESS_SIZE); 390989857Sobrien 391089857Sobrien GET_BUFFER_SPACE (nbytes); 391189857Sobrien 391289857Sobrien /* Initialize lower bound of the `succeed_n', even 391389857Sobrien though it will be set during matching by its 391489857Sobrien attendant `set_number_at' (inserted next), 391589857Sobrien because `re_compile_fastmap' needs to know. 391689857Sobrien Jump to the `jump_n' we might insert below. */ 391789857Sobrien INSERT_JUMP2 (succeed_n, laststart, 391889857Sobrien b + 1 + 2 * OFFSET_ADDRESS_SIZE 391989857Sobrien + (upper_bound > 1) * (1 + 2 * OFFSET_ADDRESS_SIZE) 392089857Sobrien , lower_bound); 392189857Sobrien b += 1 + 2 * OFFSET_ADDRESS_SIZE; 392289857Sobrien 392389857Sobrien /* Code to initialize the lower bound. Insert 392489857Sobrien before the `succeed_n'. The `5' is the last two 392589857Sobrien bytes of this `set_number_at', plus 3 bytes of 392689857Sobrien the following `succeed_n'. */ 392789857Sobrien /* ifdef WCHAR, The '1+2*OFFSET_ADDRESS_SIZE' 392889857Sobrien is the 'set_number_at', plus '1+OFFSET_ADDRESS_SIZE' 392989857Sobrien of the following `succeed_n'. */ 393089857Sobrien PREFIX(insert_op2) (set_number_at, laststart, 1 393189857Sobrien + 2 * OFFSET_ADDRESS_SIZE, lower_bound, b); 393289857Sobrien b += 1 + 2 * OFFSET_ADDRESS_SIZE; 393389857Sobrien 393489857Sobrien if (upper_bound > 1) 393589857Sobrien { /* More than one repetition is allowed, so 393689857Sobrien append a backward jump to the `succeed_n' 393789857Sobrien that starts this interval. 393889857Sobrien 393989857Sobrien When we've reached this during matching, 394089857Sobrien we'll have matched the interval once, so 394189857Sobrien jump back only `upper_bound - 1' times. */ 394289857Sobrien STORE_JUMP2 (jump_n, b, laststart 394389857Sobrien + 2 * OFFSET_ADDRESS_SIZE + 1, 394489857Sobrien upper_bound - 1); 394589857Sobrien b += 1 + 2 * OFFSET_ADDRESS_SIZE; 394689857Sobrien 394789857Sobrien /* The location we want to set is the second 394889857Sobrien parameter of the `jump_n'; that is `b-2' as 394989857Sobrien an absolute address. `laststart' will be 395089857Sobrien the `set_number_at' we're about to insert; 395189857Sobrien `laststart+3' the number to set, the source 395289857Sobrien for the relative address. But we are 395389857Sobrien inserting into the middle of the pattern -- 395489857Sobrien so everything is getting moved up by 5. 395589857Sobrien Conclusion: (b - 2) - (laststart + 3) + 5, 395689857Sobrien i.e., b - laststart. 395789857Sobrien 395889857Sobrien We insert this at the beginning of the loop 395989857Sobrien so that if we fail during matching, we'll 396089857Sobrien reinitialize the bounds. */ 396189857Sobrien PREFIX(insert_op2) (set_number_at, laststart, 396289857Sobrien b - laststart, 396389857Sobrien upper_bound - 1, b); 396489857Sobrien b += 1 + 2 * OFFSET_ADDRESS_SIZE; 396589857Sobrien } 396689857Sobrien } 396789857Sobrien pending_exact = 0; 396889857Sobrien break; 396989857Sobrien 397089857Sobrien invalid_interval: 397189857Sobrien if (!(syntax & RE_INVALID_INTERVAL_ORD)) 397289857Sobrien FREE_STACK_RETURN (p == pend ? REG_EBRACE : REG_BADBR); 397389857Sobrien unfetch_interval: 397489857Sobrien /* Match the characters as literals. */ 397589857Sobrien p = beg_interval; 397689857Sobrien c = '{'; 397789857Sobrien if (syntax & RE_NO_BK_BRACES) 397889857Sobrien goto normal_char; 397989857Sobrien else 398089857Sobrien goto normal_backslash; 398189857Sobrien } 398289857Sobrien 398389857Sobrien#ifdef emacs 398489857Sobrien /* There is no way to specify the before_dot and after_dot 398589857Sobrien operators. rms says this is ok. --karl */ 398689857Sobrien case '=': 398789857Sobrien BUF_PUSH (at_dot); 398889857Sobrien break; 398989857Sobrien 399089857Sobrien case 's': 399189857Sobrien laststart = b; 399289857Sobrien PATFETCH (c); 399389857Sobrien BUF_PUSH_2 (syntaxspec, syntax_spec_code[c]); 399489857Sobrien break; 399589857Sobrien 399689857Sobrien case 'S': 399789857Sobrien laststart = b; 399889857Sobrien PATFETCH (c); 399989857Sobrien BUF_PUSH_2 (notsyntaxspec, syntax_spec_code[c]); 400089857Sobrien break; 400189857Sobrien#endif /* emacs */ 400289857Sobrien 400389857Sobrien 400489857Sobrien case 'w': 400589857Sobrien if (syntax & RE_NO_GNU_OPS) 400689857Sobrien goto normal_char; 400789857Sobrien laststart = b; 400889857Sobrien BUF_PUSH (wordchar); 400989857Sobrien break; 401089857Sobrien 401189857Sobrien 401289857Sobrien case 'W': 401389857Sobrien if (syntax & RE_NO_GNU_OPS) 401489857Sobrien goto normal_char; 401589857Sobrien laststart = b; 401689857Sobrien BUF_PUSH (notwordchar); 401789857Sobrien break; 401889857Sobrien 401989857Sobrien 402089857Sobrien case '<': 402189857Sobrien if (syntax & RE_NO_GNU_OPS) 402289857Sobrien goto normal_char; 402389857Sobrien BUF_PUSH (wordbeg); 402489857Sobrien break; 402589857Sobrien 402689857Sobrien case '>': 402789857Sobrien if (syntax & RE_NO_GNU_OPS) 402889857Sobrien goto normal_char; 402989857Sobrien BUF_PUSH (wordend); 403089857Sobrien break; 403189857Sobrien 403289857Sobrien case 'b': 403389857Sobrien if (syntax & RE_NO_GNU_OPS) 403489857Sobrien goto normal_char; 403589857Sobrien BUF_PUSH (wordbound); 403689857Sobrien break; 403789857Sobrien 403889857Sobrien case 'B': 403989857Sobrien if (syntax & RE_NO_GNU_OPS) 404089857Sobrien goto normal_char; 404189857Sobrien BUF_PUSH (notwordbound); 404289857Sobrien break; 404389857Sobrien 404489857Sobrien case '`': 404589857Sobrien if (syntax & RE_NO_GNU_OPS) 404689857Sobrien goto normal_char; 404789857Sobrien BUF_PUSH (begbuf); 404889857Sobrien break; 404989857Sobrien 405089857Sobrien case '\'': 405189857Sobrien if (syntax & RE_NO_GNU_OPS) 405289857Sobrien goto normal_char; 405389857Sobrien BUF_PUSH (endbuf); 405489857Sobrien break; 405589857Sobrien 405689857Sobrien case '1': case '2': case '3': case '4': case '5': 405789857Sobrien case '6': case '7': case '8': case '9': 405889857Sobrien if (syntax & RE_NO_BK_REFS) 405989857Sobrien goto normal_char; 406089857Sobrien 406189857Sobrien c1 = c - '0'; 406289857Sobrien 406389857Sobrien if (c1 > regnum) 406489857Sobrien FREE_STACK_RETURN (REG_ESUBREG); 406589857Sobrien 406689857Sobrien /* Can't back reference to a subexpression if inside of it. */ 406789857Sobrien if (group_in_compile_stack (compile_stack, (regnum_t) c1)) 406889857Sobrien goto normal_char; 406989857Sobrien 407089857Sobrien laststart = b; 407189857Sobrien BUF_PUSH_2 (duplicate, c1); 407289857Sobrien break; 407389857Sobrien 407489857Sobrien 407589857Sobrien case '+': 407689857Sobrien case '?': 407789857Sobrien if (syntax & RE_BK_PLUS_QM) 407889857Sobrien goto handle_plus; 407989857Sobrien else 408089857Sobrien goto normal_backslash; 408189857Sobrien 408289857Sobrien default: 408389857Sobrien normal_backslash: 408489857Sobrien /* You might think it would be useful for \ to mean 408589857Sobrien not to translate; but if we don't translate it 408689857Sobrien it will never match anything. */ 408789857Sobrien c = TRANSLATE (c); 408889857Sobrien goto normal_char; 408989857Sobrien } 409089857Sobrien break; 409189857Sobrien 409289857Sobrien 409389857Sobrien default: 409489857Sobrien /* Expects the character in `c'. */ 409589857Sobrien normal_char: 409689857Sobrien /* If no exactn currently being built. */ 409789857Sobrien if (!pending_exact 409889857Sobrien#ifdef WCHAR 409989857Sobrien /* If last exactn handle binary(or character) and 410089857Sobrien new exactn handle character(or binary). */ 410189857Sobrien || is_exactn_bin != is_binary[p - 1 - pattern] 410289857Sobrien#endif /* WCHAR */ 410389857Sobrien 410489857Sobrien /* If last exactn not at current position. */ 410589857Sobrien || pending_exact + *pending_exact + 1 != b 410689857Sobrien 410789857Sobrien /* We have only one byte following the exactn for the count. */ 410889857Sobrien || *pending_exact == (1 << BYTEWIDTH) - 1 410989857Sobrien 411089857Sobrien /* If followed by a repetition operator. */ 411189857Sobrien || *p == '*' || *p == '^' 411289857Sobrien || ((syntax & RE_BK_PLUS_QM) 411389857Sobrien ? *p == '\\' && (p[1] == '+' || p[1] == '?') 411489857Sobrien : (*p == '+' || *p == '?')) 411589857Sobrien || ((syntax & RE_INTERVALS) 411689857Sobrien && ((syntax & RE_NO_BK_BRACES) 411789857Sobrien ? *p == '{' 411889857Sobrien : (p[0] == '\\' && p[1] == '{')))) 411989857Sobrien { 412089857Sobrien /* Start building a new exactn. */ 412189857Sobrien 412289857Sobrien laststart = b; 412389857Sobrien 412489857Sobrien#ifdef WCHAR 412589857Sobrien /* Is this exactn binary data or character? */ 412689857Sobrien is_exactn_bin = is_binary[p - 1 - pattern]; 412789857Sobrien if (is_exactn_bin) 412889857Sobrien BUF_PUSH_2 (exactn_bin, 0); 412989857Sobrien else 413089857Sobrien BUF_PUSH_2 (exactn, 0); 413189857Sobrien#else 413289857Sobrien BUF_PUSH_2 (exactn, 0); 413389857Sobrien#endif /* WCHAR */ 413489857Sobrien pending_exact = b - 1; 413589857Sobrien } 413689857Sobrien 413789857Sobrien BUF_PUSH (c); 413889857Sobrien (*pending_exact)++; 413989857Sobrien break; 414089857Sobrien } /* switch (c) */ 414189857Sobrien } /* while p != pend */ 414289857Sobrien 414389857Sobrien 414489857Sobrien /* Through the pattern now. */ 414589857Sobrien 414689857Sobrien if (fixup_alt_jump) 414789857Sobrien STORE_JUMP (jump_past_alt, fixup_alt_jump, b); 414889857Sobrien 414989857Sobrien if (!COMPILE_STACK_EMPTY) 415089857Sobrien FREE_STACK_RETURN (REG_EPAREN); 415189857Sobrien 415289857Sobrien /* If we don't want backtracking, force success 415389857Sobrien the first time we reach the end of the compiled pattern. */ 415489857Sobrien if (syntax & RE_NO_POSIX_BACKTRACKING) 415589857Sobrien BUF_PUSH (succeed); 415689857Sobrien 415789857Sobrien#ifdef WCHAR 415889857Sobrien free (pattern); 415989857Sobrien free (mbs_offset); 416089857Sobrien free (is_binary); 416189857Sobrien#endif 416289857Sobrien free (compile_stack.stack); 416389857Sobrien 416489857Sobrien /* We have succeeded; set the length of the buffer. */ 416589857Sobrien#ifdef WCHAR 416689857Sobrien bufp->used = (uintptr_t) b - (uintptr_t) COMPILED_BUFFER_VAR; 416789857Sobrien#else 416889857Sobrien bufp->used = b - bufp->buffer; 416989857Sobrien#endif 417089857Sobrien 417189857Sobrien#ifdef DEBUG 417289857Sobrien if (debug) 417389857Sobrien { 417489857Sobrien DEBUG_PRINT1 ("\nCompiled pattern: \n"); 417589857Sobrien PREFIX(print_compiled_pattern) (bufp); 417689857Sobrien } 417789857Sobrien#endif /* DEBUG */ 417889857Sobrien 417989857Sobrien#ifndef MATCH_MAY_ALLOCATE 418089857Sobrien /* Initialize the failure stack to the largest possible stack. This 418189857Sobrien isn't necessary unless we're trying to avoid calling alloca in 418289857Sobrien the search and match routines. */ 418389857Sobrien { 418489857Sobrien int num_regs = bufp->re_nsub + 1; 418589857Sobrien 418689857Sobrien /* Since DOUBLE_FAIL_STACK refuses to double only if the current size 418789857Sobrien is strictly greater than re_max_failures, the largest possible stack 418889857Sobrien is 2 * re_max_failures failure points. */ 418989857Sobrien if (fail_stack.size < (2 * re_max_failures * MAX_FAILURE_ITEMS)) 419089857Sobrien { 419189857Sobrien fail_stack.size = (2 * re_max_failures * MAX_FAILURE_ITEMS); 419289857Sobrien 419389857Sobrien# ifdef emacs 419489857Sobrien if (! fail_stack.stack) 419589857Sobrien fail_stack.stack 419689857Sobrien = (PREFIX(fail_stack_elt_t) *) xmalloc (fail_stack.size 419789857Sobrien * sizeof (PREFIX(fail_stack_elt_t))); 419889857Sobrien else 419989857Sobrien fail_stack.stack 420089857Sobrien = (PREFIX(fail_stack_elt_t) *) xrealloc (fail_stack.stack, 420189857Sobrien (fail_stack.size 420289857Sobrien * sizeof (PREFIX(fail_stack_elt_t)))); 420389857Sobrien# else /* not emacs */ 420489857Sobrien if (! fail_stack.stack) 420589857Sobrien fail_stack.stack 420689857Sobrien = (PREFIX(fail_stack_elt_t) *) malloc (fail_stack.size 420789857Sobrien * sizeof (PREFIX(fail_stack_elt_t))); 420889857Sobrien else 420989857Sobrien fail_stack.stack 421089857Sobrien = (PREFIX(fail_stack_elt_t) *) realloc (fail_stack.stack, 421189857Sobrien (fail_stack.size 421289857Sobrien * sizeof (PREFIX(fail_stack_elt_t)))); 421389857Sobrien# endif /* not emacs */ 421489857Sobrien } 421589857Sobrien 421689857Sobrien PREFIX(regex_grow_registers) (num_regs); 421789857Sobrien } 421889857Sobrien#endif /* not MATCH_MAY_ALLOCATE */ 421989857Sobrien 422089857Sobrien return REG_NOERROR; 422189857Sobrien} /* regex_compile */ 422289857Sobrien 422389857Sobrien/* Subroutines for `regex_compile'. */ 422489857Sobrien 422589857Sobrien/* Store OP at LOC followed by two-byte integer parameter ARG. */ 422689857Sobrien/* ifdef WCHAR, integer parameter is 1 wchar_t. */ 422789857Sobrien 422889857Sobrienstatic void 4229218822SdimPREFIX(store_op1) (re_opcode_t op, UCHAR_T *loc, int arg) 423089857Sobrien{ 423189857Sobrien *loc = (UCHAR_T) op; 423289857Sobrien STORE_NUMBER (loc + 1, arg); 423389857Sobrien} 423489857Sobrien 423589857Sobrien 423689857Sobrien/* Like `store_op1', but for two two-byte parameters ARG1 and ARG2. */ 423789857Sobrien/* ifdef WCHAR, integer parameter is 1 wchar_t. */ 423889857Sobrien 423989857Sobrienstatic void 4240218822SdimPREFIX(store_op2) (re_opcode_t op, UCHAR_T *loc, int arg1, int arg2) 424189857Sobrien{ 424289857Sobrien *loc = (UCHAR_T) op; 424389857Sobrien STORE_NUMBER (loc + 1, arg1); 424489857Sobrien STORE_NUMBER (loc + 1 + OFFSET_ADDRESS_SIZE, arg2); 424589857Sobrien} 424689857Sobrien 424789857Sobrien 424889857Sobrien/* Copy the bytes from LOC to END to open up three bytes of space at LOC 424989857Sobrien for OP followed by two-byte integer parameter ARG. */ 425089857Sobrien/* ifdef WCHAR, integer parameter is 1 wchar_t. */ 425189857Sobrien 425289857Sobrienstatic void 4253218822SdimPREFIX(insert_op1) (re_opcode_t op, UCHAR_T *loc, int arg, UCHAR_T *end) 425489857Sobrien{ 425589857Sobrien register UCHAR_T *pfrom = end; 425689857Sobrien register UCHAR_T *pto = end + 1 + OFFSET_ADDRESS_SIZE; 425789857Sobrien 425889857Sobrien while (pfrom != loc) 425989857Sobrien *--pto = *--pfrom; 426089857Sobrien 426189857Sobrien PREFIX(store_op1) (op, loc, arg); 426289857Sobrien} 426389857Sobrien 426489857Sobrien 426589857Sobrien/* Like `insert_op1', but for two two-byte parameters ARG1 and ARG2. */ 426689857Sobrien/* ifdef WCHAR, integer parameter is 1 wchar_t. */ 426789857Sobrien 426889857Sobrienstatic void 4269218822SdimPREFIX(insert_op2) (re_opcode_t op, UCHAR_T *loc, int arg1, 4270218822Sdim int arg2, UCHAR_T *end) 427189857Sobrien{ 427289857Sobrien register UCHAR_T *pfrom = end; 427389857Sobrien register UCHAR_T *pto = end + 1 + 2 * OFFSET_ADDRESS_SIZE; 427489857Sobrien 427589857Sobrien while (pfrom != loc) 427689857Sobrien *--pto = *--pfrom; 427789857Sobrien 427889857Sobrien PREFIX(store_op2) (op, loc, arg1, arg2); 427989857Sobrien} 428089857Sobrien 428189857Sobrien 428289857Sobrien/* P points to just after a ^ in PATTERN. Return true if that ^ comes 428389857Sobrien after an alternative or a begin-subexpression. We assume there is at 428489857Sobrien least one character before the ^. */ 428589857Sobrien 428689857Sobrienstatic boolean 4287218822SdimPREFIX(at_begline_loc_p) (const CHAR_T *pattern, const CHAR_T *p, 4288218822Sdim reg_syntax_t syntax) 428989857Sobrien{ 429089857Sobrien const CHAR_T *prev = p - 2; 429189857Sobrien boolean prev_prev_backslash = prev > pattern && prev[-1] == '\\'; 429289857Sobrien 429389857Sobrien return 429489857Sobrien /* After a subexpression? */ 429589857Sobrien (*prev == '(' && (syntax & RE_NO_BK_PARENS || prev_prev_backslash)) 429689857Sobrien /* After an alternative? */ 429789857Sobrien || (*prev == '|' && (syntax & RE_NO_BK_VBAR || prev_prev_backslash)); 429889857Sobrien} 429989857Sobrien 430089857Sobrien 430189857Sobrien/* The dual of at_begline_loc_p. This one is for $. We assume there is 430289857Sobrien at least one character after the $, i.e., `P < PEND'. */ 430389857Sobrien 430489857Sobrienstatic boolean 4305218822SdimPREFIX(at_endline_loc_p) (const CHAR_T *p, const CHAR_T *pend, 4306218822Sdim reg_syntax_t syntax) 430789857Sobrien{ 430889857Sobrien const CHAR_T *next = p; 430989857Sobrien boolean next_backslash = *next == '\\'; 431089857Sobrien const CHAR_T *next_next = p + 1 < pend ? p + 1 : 0; 431189857Sobrien 431289857Sobrien return 431389857Sobrien /* Before a subexpression? */ 431489857Sobrien (syntax & RE_NO_BK_PARENS ? *next == ')' 431589857Sobrien : next_backslash && next_next && *next_next == ')') 431689857Sobrien /* Before an alternative? */ 431789857Sobrien || (syntax & RE_NO_BK_VBAR ? *next == '|' 431889857Sobrien : next_backslash && next_next && *next_next == '|'); 431989857Sobrien} 432089857Sobrien 432189857Sobrien#else /* not INSIDE_RECURSION */ 432289857Sobrien 432389857Sobrien/* Returns true if REGNUM is in one of COMPILE_STACK's elements and 432489857Sobrien false if it's not. */ 432589857Sobrien 432689857Sobrienstatic boolean 4327218822Sdimgroup_in_compile_stack (compile_stack_type compile_stack, regnum_t regnum) 432889857Sobrien{ 432989857Sobrien int this_element; 433089857Sobrien 433189857Sobrien for (this_element = compile_stack.avail - 1; 433289857Sobrien this_element >= 0; 433389857Sobrien this_element--) 433489857Sobrien if (compile_stack.stack[this_element].regnum == regnum) 433589857Sobrien return true; 433689857Sobrien 433789857Sobrien return false; 433889857Sobrien} 433989857Sobrien#endif /* not INSIDE_RECURSION */ 434089857Sobrien 434189857Sobrien#ifdef INSIDE_RECURSION 434289857Sobrien 434389857Sobrien#ifdef WCHAR 434489857Sobrien/* This insert space, which size is "num", into the pattern at "loc". 434589857Sobrien "end" must point the end of the allocated buffer. */ 434689857Sobrienstatic void 4347218822Sdiminsert_space (int num, CHAR_T *loc, CHAR_T *end) 434889857Sobrien{ 434989857Sobrien register CHAR_T *pto = end; 435089857Sobrien register CHAR_T *pfrom = end - num; 435189857Sobrien 435289857Sobrien while (pfrom >= loc) 435389857Sobrien *pto-- = *pfrom--; 435489857Sobrien} 435589857Sobrien#endif /* WCHAR */ 435689857Sobrien 435789857Sobrien#ifdef WCHAR 435889857Sobrienstatic reg_errcode_t 4359218822Sdimwcs_compile_range (CHAR_T range_start_char, const CHAR_T **p_ptr, 4360218822Sdim const CHAR_T *pend, RE_TRANSLATE_TYPE translate, 4361218822Sdim reg_syntax_t syntax, CHAR_T *b, CHAR_T *char_set) 436289857Sobrien{ 436389857Sobrien const CHAR_T *p = *p_ptr; 436489857Sobrien CHAR_T range_start, range_end; 436589857Sobrien reg_errcode_t ret; 436689857Sobrien# ifdef _LIBC 436789857Sobrien uint32_t nrules; 436889857Sobrien uint32_t start_val, end_val; 436989857Sobrien# endif 437089857Sobrien if (p == pend) 437189857Sobrien return REG_ERANGE; 437289857Sobrien 437389857Sobrien# ifdef _LIBC 437489857Sobrien nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES); 437589857Sobrien if (nrules != 0) 437689857Sobrien { 437789857Sobrien const char *collseq = (const char *) _NL_CURRENT(LC_COLLATE, 437889857Sobrien _NL_COLLATE_COLLSEQWC); 437989857Sobrien const unsigned char *extra = (const unsigned char *) 438089857Sobrien _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB); 438189857Sobrien 438289857Sobrien if (range_start_char < -1) 438389857Sobrien { 438489857Sobrien /* range_start is a collating symbol. */ 438589857Sobrien int32_t *wextra; 438689857Sobrien /* Retreive the index and get collation sequence value. */ 438789857Sobrien wextra = (int32_t*)(extra + char_set[-range_start_char]); 438889857Sobrien start_val = wextra[1 + *wextra]; 438989857Sobrien } 439089857Sobrien else 439189857Sobrien start_val = collseq_table_lookup(collseq, TRANSLATE(range_start_char)); 439289857Sobrien 439389857Sobrien end_val = collseq_table_lookup (collseq, TRANSLATE (p[0])); 439489857Sobrien 439589857Sobrien /* Report an error if the range is empty and the syntax prohibits 439689857Sobrien this. */ 439789857Sobrien ret = ((syntax & RE_NO_EMPTY_RANGES) 439889857Sobrien && (start_val > end_val))? REG_ERANGE : REG_NOERROR; 439989857Sobrien 440089857Sobrien /* Insert space to the end of the char_ranges. */ 440189857Sobrien insert_space(2, b - char_set[5] - 2, b - 1); 440289857Sobrien *(b - char_set[5] - 2) = (wchar_t)start_val; 440389857Sobrien *(b - char_set[5] - 1) = (wchar_t)end_val; 440489857Sobrien char_set[4]++; /* ranges_index */ 440589857Sobrien } 440689857Sobrien else 440789857Sobrien# endif 440889857Sobrien { 440989857Sobrien range_start = (range_start_char >= 0)? TRANSLATE (range_start_char): 441089857Sobrien range_start_char; 441189857Sobrien range_end = TRANSLATE (p[0]); 441289857Sobrien /* Report an error if the range is empty and the syntax prohibits 441389857Sobrien this. */ 441489857Sobrien ret = ((syntax & RE_NO_EMPTY_RANGES) 441589857Sobrien && (range_start > range_end))? REG_ERANGE : REG_NOERROR; 441689857Sobrien 441789857Sobrien /* Insert space to the end of the char_ranges. */ 441889857Sobrien insert_space(2, b - char_set[5] - 2, b - 1); 441989857Sobrien *(b - char_set[5] - 2) = range_start; 442089857Sobrien *(b - char_set[5] - 1) = range_end; 442189857Sobrien char_set[4]++; /* ranges_index */ 442289857Sobrien } 442389857Sobrien /* Have to increment the pointer into the pattern string, so the 442489857Sobrien caller isn't still at the ending character. */ 442589857Sobrien (*p_ptr)++; 442689857Sobrien 442789857Sobrien return ret; 442889857Sobrien} 442989857Sobrien#else /* BYTE */ 443089857Sobrien/* Read the ending character of a range (in a bracket expression) from the 443189857Sobrien uncompiled pattern *P_PTR (which ends at PEND). We assume the 443289857Sobrien starting character is in `P[-2]'. (`P[-1]' is the character `-'.) 443389857Sobrien Then we set the translation of all bits between the starting and 443489857Sobrien ending characters (inclusive) in the compiled pattern B. 443589857Sobrien 443689857Sobrien Return an error code. 443789857Sobrien 443889857Sobrien We use these short variable names so we can use the same macros as 443989857Sobrien `regex_compile' itself. */ 444089857Sobrien 444189857Sobrienstatic reg_errcode_t 4442218822Sdimbyte_compile_range (unsigned int range_start_char, const char **p_ptr, 4443218822Sdim const char *pend, RE_TRANSLATE_TYPE translate, 4444218822Sdim reg_syntax_t syntax, unsigned char *b) 444589857Sobrien{ 444689857Sobrien unsigned this_char; 444789857Sobrien const char *p = *p_ptr; 444889857Sobrien reg_errcode_t ret; 444989857Sobrien# if _LIBC 445089857Sobrien const unsigned char *collseq; 445189857Sobrien unsigned int start_colseq; 445289857Sobrien unsigned int end_colseq; 445389857Sobrien# else 445489857Sobrien unsigned end_char; 445589857Sobrien# endif 445689857Sobrien 445789857Sobrien if (p == pend) 445889857Sobrien return REG_ERANGE; 445989857Sobrien 446089857Sobrien /* Have to increment the pointer into the pattern string, so the 446189857Sobrien caller isn't still at the ending character. */ 446289857Sobrien (*p_ptr)++; 446389857Sobrien 446489857Sobrien /* Report an error if the range is empty and the syntax prohibits this. */ 446589857Sobrien ret = syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR; 446689857Sobrien 446789857Sobrien# if _LIBC 446889857Sobrien collseq = (const unsigned char *) _NL_CURRENT (LC_COLLATE, 446989857Sobrien _NL_COLLATE_COLLSEQMB); 447089857Sobrien 447189857Sobrien start_colseq = collseq[(unsigned char) TRANSLATE (range_start_char)]; 447289857Sobrien end_colseq = collseq[(unsigned char) TRANSLATE (p[0])]; 447389857Sobrien for (this_char = 0; this_char <= (unsigned char) -1; ++this_char) 447489857Sobrien { 447589857Sobrien unsigned int this_colseq = collseq[(unsigned char) TRANSLATE (this_char)]; 447689857Sobrien 447789857Sobrien if (start_colseq <= this_colseq && this_colseq <= end_colseq) 447889857Sobrien { 447989857Sobrien SET_LIST_BIT (TRANSLATE (this_char)); 448089857Sobrien ret = REG_NOERROR; 448189857Sobrien } 448289857Sobrien } 448389857Sobrien# else 448489857Sobrien /* Here we see why `this_char' has to be larger than an `unsigned 448589857Sobrien char' -- we would otherwise go into an infinite loop, since all 448689857Sobrien characters <= 0xff. */ 448789857Sobrien range_start_char = TRANSLATE (range_start_char); 448889857Sobrien /* TRANSLATE(p[0]) is casted to char (not unsigned char) in TRANSLATE, 448989857Sobrien and some compilers cast it to int implicitly, so following for_loop 449089857Sobrien may fall to (almost) infinite loop. 449189857Sobrien e.g. If translate[p[0]] = 0xff, end_char may equals to 0xffffffff. 449289857Sobrien To avoid this, we cast p[0] to unsigned int and truncate it. */ 449389857Sobrien end_char = ((unsigned)TRANSLATE(p[0]) & ((1 << BYTEWIDTH) - 1)); 449489857Sobrien 449589857Sobrien for (this_char = range_start_char; this_char <= end_char; ++this_char) 449689857Sobrien { 449789857Sobrien SET_LIST_BIT (TRANSLATE (this_char)); 449889857Sobrien ret = REG_NOERROR; 449989857Sobrien } 450089857Sobrien# endif 450189857Sobrien 450289857Sobrien return ret; 450389857Sobrien} 450489857Sobrien#endif /* WCHAR */ 450589857Sobrien 450689857Sobrien/* re_compile_fastmap computes a ``fastmap'' for the compiled pattern in 450789857Sobrien BUFP. A fastmap records which of the (1 << BYTEWIDTH) possible 450889857Sobrien characters can start a string that matches the pattern. This fastmap 450989857Sobrien is used by re_search to skip quickly over impossible starting points. 451089857Sobrien 451189857Sobrien The caller must supply the address of a (1 << BYTEWIDTH)-byte data 451289857Sobrien area as BUFP->fastmap. 451389857Sobrien 451489857Sobrien We set the `fastmap', `fastmap_accurate', and `can_be_null' fields in 451589857Sobrien the pattern buffer. 451689857Sobrien 451789857Sobrien Returns 0 if we succeed, -2 if an internal error. */ 451889857Sobrien 451989857Sobrien#ifdef WCHAR 452089857Sobrien/* local function for re_compile_fastmap. 452189857Sobrien truncate wchar_t character to char. */ 452289857Sobrienstatic unsigned char truncate_wchar (CHAR_T c); 452389857Sobrien 452489857Sobrienstatic unsigned char 4525218822Sdimtruncate_wchar (CHAR_T c) 452689857Sobrien{ 452789857Sobrien unsigned char buf[MB_CUR_MAX]; 452889857Sobrien mbstate_t state; 452989857Sobrien int retval; 453089857Sobrien memset (&state, '\0', sizeof (state)); 453189857Sobrien# ifdef _LIBC 453289857Sobrien retval = __wcrtomb (buf, c, &state); 453389857Sobrien# else 453489857Sobrien retval = wcrtomb (buf, c, &state); 453589857Sobrien# endif 453689857Sobrien return retval > 0 ? buf[0] : (unsigned char) c; 453789857Sobrien} 453889857Sobrien#endif /* WCHAR */ 453989857Sobrien 454089857Sobrienstatic int 4541218822SdimPREFIX(re_compile_fastmap) (struct re_pattern_buffer *bufp) 454289857Sobrien{ 454389857Sobrien int j, k; 454489857Sobrien#ifdef MATCH_MAY_ALLOCATE 454589857Sobrien PREFIX(fail_stack_type) fail_stack; 454689857Sobrien#endif 454789857Sobrien#ifndef REGEX_MALLOC 454889857Sobrien char *destination; 454989857Sobrien#endif 455089857Sobrien 455189857Sobrien register char *fastmap = bufp->fastmap; 455289857Sobrien 455389857Sobrien#ifdef WCHAR 455489857Sobrien /* We need to cast pattern to (wchar_t*), because we casted this compiled 455589857Sobrien pattern to (char*) in regex_compile. */ 455689857Sobrien UCHAR_T *pattern = (UCHAR_T*)bufp->buffer; 455789857Sobrien register UCHAR_T *pend = (UCHAR_T*) (bufp->buffer + bufp->used); 455889857Sobrien#else /* BYTE */ 455989857Sobrien UCHAR_T *pattern = bufp->buffer; 456089857Sobrien register UCHAR_T *pend = pattern + bufp->used; 456189857Sobrien#endif /* WCHAR */ 456289857Sobrien UCHAR_T *p = pattern; 456389857Sobrien 456489857Sobrien#ifdef REL_ALLOC 456589857Sobrien /* This holds the pointer to the failure stack, when 456689857Sobrien it is allocated relocatably. */ 456789857Sobrien fail_stack_elt_t *failure_stack_ptr; 456889857Sobrien#endif 456989857Sobrien 457089857Sobrien /* Assume that each path through the pattern can be null until 457189857Sobrien proven otherwise. We set this false at the bottom of switch 457289857Sobrien statement, to which we get only if a particular path doesn't 457389857Sobrien match the empty string. */ 457489857Sobrien boolean path_can_be_null = true; 457589857Sobrien 457689857Sobrien /* We aren't doing a `succeed_n' to begin with. */ 457789857Sobrien boolean succeed_n_p = false; 457889857Sobrien 457989857Sobrien assert (fastmap != NULL && p != NULL); 458089857Sobrien 458189857Sobrien INIT_FAIL_STACK (); 458289857Sobrien bzero (fastmap, 1 << BYTEWIDTH); /* Assume nothing's valid. */ 458389857Sobrien bufp->fastmap_accurate = 1; /* It will be when we're done. */ 458489857Sobrien bufp->can_be_null = 0; 458589857Sobrien 458689857Sobrien while (1) 458789857Sobrien { 4588130561Sobrien if (p == pend || *p == (UCHAR_T) succeed) 458989857Sobrien { 459089857Sobrien /* We have reached the (effective) end of pattern. */ 459189857Sobrien if (!FAIL_STACK_EMPTY ()) 459289857Sobrien { 459389857Sobrien bufp->can_be_null |= path_can_be_null; 459489857Sobrien 459589857Sobrien /* Reset for next path. */ 459689857Sobrien path_can_be_null = true; 459789857Sobrien 459889857Sobrien p = fail_stack.stack[--fail_stack.avail].pointer; 459989857Sobrien 460089857Sobrien continue; 460189857Sobrien } 460289857Sobrien else 460389857Sobrien break; 460489857Sobrien } 460589857Sobrien 460689857Sobrien /* We should never be about to go beyond the end of the pattern. */ 460789857Sobrien assert (p < pend); 460889857Sobrien 460989857Sobrien switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++)) 461089857Sobrien { 461189857Sobrien 461289857Sobrien /* I guess the idea here is to simply not bother with a fastmap 461389857Sobrien if a backreference is used, since it's too hard to figure out 461489857Sobrien the fastmap for the corresponding group. Setting 461589857Sobrien `can_be_null' stops `re_search_2' from using the fastmap, so 461689857Sobrien that is all we do. */ 461789857Sobrien case duplicate: 461889857Sobrien bufp->can_be_null = 1; 461989857Sobrien goto done; 462089857Sobrien 462189857Sobrien 462289857Sobrien /* Following are the cases which match a character. These end 462389857Sobrien with `break'. */ 462489857Sobrien 462589857Sobrien#ifdef WCHAR 462689857Sobrien case exactn: 462789857Sobrien fastmap[truncate_wchar(p[1])] = 1; 462889857Sobrien break; 462989857Sobrien#else /* BYTE */ 463089857Sobrien case exactn: 463189857Sobrien fastmap[p[1]] = 1; 463289857Sobrien break; 463389857Sobrien#endif /* WCHAR */ 463489857Sobrien#ifdef MBS_SUPPORT 463589857Sobrien case exactn_bin: 463689857Sobrien fastmap[p[1]] = 1; 463789857Sobrien break; 463889857Sobrien#endif 463989857Sobrien 464089857Sobrien#ifdef WCHAR 464189857Sobrien /* It is hard to distinguish fastmap from (multi byte) characters 464289857Sobrien which depends on current locale. */ 464389857Sobrien case charset: 464489857Sobrien case charset_not: 464589857Sobrien case wordchar: 464689857Sobrien case notwordchar: 464789857Sobrien bufp->can_be_null = 1; 464889857Sobrien goto done; 464989857Sobrien#else /* BYTE */ 465089857Sobrien case charset: 465189857Sobrien for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--) 465289857Sobrien if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))) 465389857Sobrien fastmap[j] = 1; 465489857Sobrien break; 465589857Sobrien 465689857Sobrien 465789857Sobrien case charset_not: 465889857Sobrien /* Chars beyond end of map must be allowed. */ 465989857Sobrien for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++) 466089857Sobrien fastmap[j] = 1; 466189857Sobrien 466289857Sobrien for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--) 466389857Sobrien if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))) 466489857Sobrien fastmap[j] = 1; 466589857Sobrien break; 466689857Sobrien 466789857Sobrien 466889857Sobrien case wordchar: 466989857Sobrien for (j = 0; j < (1 << BYTEWIDTH); j++) 467089857Sobrien if (SYNTAX (j) == Sword) 467189857Sobrien fastmap[j] = 1; 467289857Sobrien break; 467389857Sobrien 467489857Sobrien 467589857Sobrien case notwordchar: 467689857Sobrien for (j = 0; j < (1 << BYTEWIDTH); j++) 467789857Sobrien if (SYNTAX (j) != Sword) 467889857Sobrien fastmap[j] = 1; 467989857Sobrien break; 468089857Sobrien#endif /* WCHAR */ 468189857Sobrien 468289857Sobrien case anychar: 468389857Sobrien { 468489857Sobrien int fastmap_newline = fastmap['\n']; 468589857Sobrien 468689857Sobrien /* `.' matches anything ... */ 468789857Sobrien for (j = 0; j < (1 << BYTEWIDTH); j++) 468889857Sobrien fastmap[j] = 1; 468989857Sobrien 469089857Sobrien /* ... except perhaps newline. */ 469189857Sobrien if (!(bufp->syntax & RE_DOT_NEWLINE)) 469289857Sobrien fastmap['\n'] = fastmap_newline; 469389857Sobrien 469489857Sobrien /* Return if we have already set `can_be_null'; if we have, 469589857Sobrien then the fastmap is irrelevant. Something's wrong here. */ 469689857Sobrien else if (bufp->can_be_null) 469789857Sobrien goto done; 469889857Sobrien 469989857Sobrien /* Otherwise, have to check alternative paths. */ 470089857Sobrien break; 470189857Sobrien } 470289857Sobrien 470389857Sobrien#ifdef emacs 470489857Sobrien case syntaxspec: 470589857Sobrien k = *p++; 470689857Sobrien for (j = 0; j < (1 << BYTEWIDTH); j++) 470789857Sobrien if (SYNTAX (j) == (enum syntaxcode) k) 470889857Sobrien fastmap[j] = 1; 470989857Sobrien break; 471089857Sobrien 471189857Sobrien 471289857Sobrien case notsyntaxspec: 471389857Sobrien k = *p++; 471489857Sobrien for (j = 0; j < (1 << BYTEWIDTH); j++) 471589857Sobrien if (SYNTAX (j) != (enum syntaxcode) k) 471689857Sobrien fastmap[j] = 1; 471789857Sobrien break; 471889857Sobrien 471989857Sobrien 472089857Sobrien /* All cases after this match the empty string. These end with 472189857Sobrien `continue'. */ 472289857Sobrien 472389857Sobrien 472489857Sobrien case before_dot: 472589857Sobrien case at_dot: 472689857Sobrien case after_dot: 472789857Sobrien continue; 472889857Sobrien#endif /* emacs */ 472989857Sobrien 473089857Sobrien 473189857Sobrien case no_op: 473289857Sobrien case begline: 473389857Sobrien case endline: 473489857Sobrien case begbuf: 473589857Sobrien case endbuf: 473689857Sobrien case wordbound: 473789857Sobrien case notwordbound: 473889857Sobrien case wordbeg: 473989857Sobrien case wordend: 474089857Sobrien case push_dummy_failure: 474189857Sobrien continue; 474289857Sobrien 474389857Sobrien 474489857Sobrien case jump_n: 474589857Sobrien case pop_failure_jump: 474689857Sobrien case maybe_pop_jump: 474789857Sobrien case jump: 474889857Sobrien case jump_past_alt: 474989857Sobrien case dummy_failure_jump: 475089857Sobrien EXTRACT_NUMBER_AND_INCR (j, p); 475189857Sobrien p += j; 475289857Sobrien if (j > 0) 475389857Sobrien continue; 475489857Sobrien 475589857Sobrien /* Jump backward implies we just went through the body of a 475689857Sobrien loop and matched nothing. Opcode jumped to should be 475789857Sobrien `on_failure_jump' or `succeed_n'. Just treat it like an 475889857Sobrien ordinary jump. For a * loop, it has pushed its failure 475989857Sobrien point already; if so, discard that as redundant. */ 476089857Sobrien if ((re_opcode_t) *p != on_failure_jump 476189857Sobrien && (re_opcode_t) *p != succeed_n) 476289857Sobrien continue; 476389857Sobrien 476489857Sobrien p++; 476589857Sobrien EXTRACT_NUMBER_AND_INCR (j, p); 476689857Sobrien p += j; 476789857Sobrien 476889857Sobrien /* If what's on the stack is where we are now, pop it. */ 476989857Sobrien if (!FAIL_STACK_EMPTY () 477089857Sobrien && fail_stack.stack[fail_stack.avail - 1].pointer == p) 477189857Sobrien fail_stack.avail--; 477289857Sobrien 477389857Sobrien continue; 477489857Sobrien 477589857Sobrien 477689857Sobrien case on_failure_jump: 477789857Sobrien case on_failure_keep_string_jump: 477889857Sobrien handle_on_failure_jump: 477989857Sobrien EXTRACT_NUMBER_AND_INCR (j, p); 478089857Sobrien 478189857Sobrien /* For some patterns, e.g., `(a?)?', `p+j' here points to the 478289857Sobrien end of the pattern. We don't want to push such a point, 478389857Sobrien since when we restore it above, entering the switch will 478489857Sobrien increment `p' past the end of the pattern. We don't need 478589857Sobrien to push such a point since we obviously won't find any more 478689857Sobrien fastmap entries beyond `pend'. Such a pattern can match 478789857Sobrien the null string, though. */ 478889857Sobrien if (p + j < pend) 478989857Sobrien { 479089857Sobrien if (!PUSH_PATTERN_OP (p + j, fail_stack)) 479189857Sobrien { 479289857Sobrien RESET_FAIL_STACK (); 479389857Sobrien return -2; 479489857Sobrien } 479589857Sobrien } 479689857Sobrien else 479789857Sobrien bufp->can_be_null = 1; 479889857Sobrien 479989857Sobrien if (succeed_n_p) 480089857Sobrien { 480189857Sobrien EXTRACT_NUMBER_AND_INCR (k, p); /* Skip the n. */ 480289857Sobrien succeed_n_p = false; 480389857Sobrien } 480489857Sobrien 480589857Sobrien continue; 480689857Sobrien 480789857Sobrien 480889857Sobrien case succeed_n: 480989857Sobrien /* Get to the number of times to succeed. */ 481089857Sobrien p += OFFSET_ADDRESS_SIZE; 481189857Sobrien 481289857Sobrien /* Increment p past the n for when k != 0. */ 481389857Sobrien EXTRACT_NUMBER_AND_INCR (k, p); 481489857Sobrien if (k == 0) 481589857Sobrien { 481689857Sobrien p -= 2 * OFFSET_ADDRESS_SIZE; 481789857Sobrien succeed_n_p = true; /* Spaghetti code alert. */ 481889857Sobrien goto handle_on_failure_jump; 481989857Sobrien } 482089857Sobrien continue; 482189857Sobrien 482289857Sobrien 482389857Sobrien case set_number_at: 482489857Sobrien p += 2 * OFFSET_ADDRESS_SIZE; 482589857Sobrien continue; 482689857Sobrien 482789857Sobrien 482889857Sobrien case start_memory: 482989857Sobrien case stop_memory: 483089857Sobrien p += 2; 483189857Sobrien continue; 483289857Sobrien 483389857Sobrien 483489857Sobrien default: 483589857Sobrien abort (); /* We have listed all the cases. */ 483689857Sobrien } /* switch *p++ */ 483789857Sobrien 483889857Sobrien /* Getting here means we have found the possible starting 483989857Sobrien characters for one path of the pattern -- and that the empty 484089857Sobrien string does not match. We need not follow this path further. 484189857Sobrien Instead, look at the next alternative (remembered on the 484289857Sobrien stack), or quit if no more. The test at the top of the loop 484389857Sobrien does these things. */ 484489857Sobrien path_can_be_null = false; 484589857Sobrien p = pend; 484689857Sobrien } /* while p */ 484789857Sobrien 484889857Sobrien /* Set `can_be_null' for the last path (also the first path, if the 484989857Sobrien pattern is empty). */ 485089857Sobrien bufp->can_be_null |= path_can_be_null; 485189857Sobrien 485289857Sobrien done: 485389857Sobrien RESET_FAIL_STACK (); 485489857Sobrien return 0; 485589857Sobrien} 485689857Sobrien 485789857Sobrien#else /* not INSIDE_RECURSION */ 485889857Sobrien 485989857Sobrienint 4860218822Sdimre_compile_fastmap (struct re_pattern_buffer *bufp) 486189857Sobrien{ 486289857Sobrien# ifdef MBS_SUPPORT 486389857Sobrien if (MB_CUR_MAX != 1) 486489857Sobrien return wcs_re_compile_fastmap(bufp); 486589857Sobrien else 486689857Sobrien# endif 486789857Sobrien return byte_re_compile_fastmap(bufp); 486889857Sobrien} /* re_compile_fastmap */ 486989857Sobrien#ifdef _LIBC 487089857Sobrienweak_alias (__re_compile_fastmap, re_compile_fastmap) 487189857Sobrien#endif 487289857Sobrien 487389857Sobrien 487489857Sobrien/* Set REGS to hold NUM_REGS registers, storing them in STARTS and 487589857Sobrien ENDS. Subsequent matches using PATTERN_BUFFER and REGS will use 487689857Sobrien this memory for recording register information. STARTS and ENDS 487789857Sobrien must be allocated using the malloc library routine, and must each 487889857Sobrien be at least NUM_REGS * sizeof (regoff_t) bytes long. 487989857Sobrien 488089857Sobrien If NUM_REGS == 0, then subsequent matches should allocate their own 488189857Sobrien register data. 488289857Sobrien 488389857Sobrien Unless this function is called, the first search or match using 488489857Sobrien PATTERN_BUFFER will allocate its own register data, without 488589857Sobrien freeing the old data. */ 488689857Sobrien 488789857Sobrienvoid 4888218822Sdimre_set_registers (struct re_pattern_buffer *bufp, 4889218822Sdim struct re_registers *regs, unsigned num_regs, 4890218822Sdim regoff_t *starts, regoff_t *ends) 489189857Sobrien{ 489289857Sobrien if (num_regs) 489389857Sobrien { 489489857Sobrien bufp->regs_allocated = REGS_REALLOCATE; 489589857Sobrien regs->num_regs = num_regs; 489689857Sobrien regs->start = starts; 489789857Sobrien regs->end = ends; 489889857Sobrien } 489989857Sobrien else 490089857Sobrien { 490189857Sobrien bufp->regs_allocated = REGS_UNALLOCATED; 490289857Sobrien regs->num_regs = 0; 490389857Sobrien regs->start = regs->end = (regoff_t *) 0; 490489857Sobrien } 490589857Sobrien} 490689857Sobrien#ifdef _LIBC 490789857Sobrienweak_alias (__re_set_registers, re_set_registers) 490889857Sobrien#endif 490989857Sobrien 491089857Sobrien/* Searching routines. */ 491189857Sobrien 491289857Sobrien/* Like re_search_2, below, but only one string is specified, and 491389857Sobrien doesn't let you say where to stop matching. */ 491489857Sobrien 491589857Sobrienint 4916218822Sdimre_search (struct re_pattern_buffer *bufp, const char *string, int size, 4917218822Sdim int startpos, int range, struct re_registers *regs) 491889857Sobrien{ 491989857Sobrien return re_search_2 (bufp, NULL, 0, string, size, startpos, range, 492089857Sobrien regs, size); 492189857Sobrien} 492289857Sobrien#ifdef _LIBC 492389857Sobrienweak_alias (__re_search, re_search) 492489857Sobrien#endif 492589857Sobrien 492689857Sobrien 492789857Sobrien/* Using the compiled pattern in BUFP->buffer, first tries to match the 492889857Sobrien virtual concatenation of STRING1 and STRING2, starting first at index 492989857Sobrien STARTPOS, then at STARTPOS + 1, and so on. 493089857Sobrien 493189857Sobrien STRING1 and STRING2 have length SIZE1 and SIZE2, respectively. 493289857Sobrien 493389857Sobrien RANGE is how far to scan while trying to match. RANGE = 0 means try 493489857Sobrien only at STARTPOS; in general, the last start tried is STARTPOS + 493589857Sobrien RANGE. 493689857Sobrien 493789857Sobrien In REGS, return the indices of the virtual concatenation of STRING1 493889857Sobrien and STRING2 that matched the entire BUFP->buffer and its contained 493989857Sobrien subexpressions. 494089857Sobrien 494189857Sobrien Do not consider matching one past the index STOP in the virtual 494289857Sobrien concatenation of STRING1 and STRING2. 494389857Sobrien 494489857Sobrien We return either the position in the strings at which the match was 494589857Sobrien found, -1 if no match, or -2 if error (such as failure 494689857Sobrien stack overflow). */ 494789857Sobrien 494889857Sobrienint 4949218822Sdimre_search_2 (struct re_pattern_buffer *bufp, const char *string1, int size1, 4950218822Sdim const char *string2, int size2, int startpos, int range, 4951218822Sdim struct re_registers *regs, int stop) 495289857Sobrien{ 495389857Sobrien# ifdef MBS_SUPPORT 495489857Sobrien if (MB_CUR_MAX != 1) 495589857Sobrien return wcs_re_search_2 (bufp, string1, size1, string2, size2, startpos, 495689857Sobrien range, regs, stop); 495789857Sobrien else 495889857Sobrien# endif 495989857Sobrien return byte_re_search_2 (bufp, string1, size1, string2, size2, startpos, 496089857Sobrien range, regs, stop); 496189857Sobrien} /* re_search_2 */ 496289857Sobrien#ifdef _LIBC 496389857Sobrienweak_alias (__re_search_2, re_search_2) 496489857Sobrien#endif 496589857Sobrien 496689857Sobrien#endif /* not INSIDE_RECURSION */ 496789857Sobrien 496889857Sobrien#ifdef INSIDE_RECURSION 496989857Sobrien 497089857Sobrien#ifdef MATCH_MAY_ALLOCATE 497189857Sobrien# define FREE_VAR(var) if (var) REGEX_FREE (var); var = NULL 497289857Sobrien#else 497389857Sobrien# define FREE_VAR(var) if (var) free (var); var = NULL 497489857Sobrien#endif 497589857Sobrien 497689857Sobrien#ifdef WCHAR 497789857Sobrien# define MAX_ALLOCA_SIZE 2000 497889857Sobrien 497989857Sobrien# define FREE_WCS_BUFFERS() \ 498089857Sobrien do { \ 498189857Sobrien if (size1 > MAX_ALLOCA_SIZE) \ 498289857Sobrien { \ 498389857Sobrien free (wcs_string1); \ 498489857Sobrien free (mbs_offset1); \ 498589857Sobrien } \ 498689857Sobrien else \ 498789857Sobrien { \ 498889857Sobrien FREE_VAR (wcs_string1); \ 498989857Sobrien FREE_VAR (mbs_offset1); \ 499089857Sobrien } \ 499189857Sobrien if (size2 > MAX_ALLOCA_SIZE) \ 499289857Sobrien { \ 499389857Sobrien free (wcs_string2); \ 499489857Sobrien free (mbs_offset2); \ 499589857Sobrien } \ 499689857Sobrien else \ 499789857Sobrien { \ 499889857Sobrien FREE_VAR (wcs_string2); \ 499989857Sobrien FREE_VAR (mbs_offset2); \ 500089857Sobrien } \ 500189857Sobrien } while (0) 500289857Sobrien 500389857Sobrien#endif 500489857Sobrien 500589857Sobrien 500689857Sobrienstatic int 5007218822SdimPREFIX(re_search_2) (struct re_pattern_buffer *bufp, const char *string1, 5008218822Sdim int size1, const char *string2, int size2, 5009218822Sdim int startpos, int range, 5010218822Sdim struct re_registers *regs, int stop) 501189857Sobrien{ 501289857Sobrien int val; 501389857Sobrien register char *fastmap = bufp->fastmap; 501489857Sobrien register RE_TRANSLATE_TYPE translate = bufp->translate; 501589857Sobrien int total_size = size1 + size2; 501689857Sobrien int endpos = startpos + range; 501789857Sobrien#ifdef WCHAR 501889857Sobrien /* We need wchar_t* buffers correspond to cstring1, cstring2. */ 501989857Sobrien wchar_t *wcs_string1 = NULL, *wcs_string2 = NULL; 502089857Sobrien /* We need the size of wchar_t buffers correspond to csize1, csize2. */ 502189857Sobrien int wcs_size1 = 0, wcs_size2 = 0; 502289857Sobrien /* offset buffer for optimizatoin. See convert_mbs_to_wc. */ 502389857Sobrien int *mbs_offset1 = NULL, *mbs_offset2 = NULL; 502489857Sobrien /* They hold whether each wchar_t is binary data or not. */ 502589857Sobrien char *is_binary = NULL; 502689857Sobrien#endif /* WCHAR */ 502789857Sobrien 502889857Sobrien /* Check for out-of-range STARTPOS. */ 502989857Sobrien if (startpos < 0 || startpos > total_size) 503089857Sobrien return -1; 503189857Sobrien 503289857Sobrien /* Fix up RANGE if it might eventually take us outside 503389857Sobrien the virtual concatenation of STRING1 and STRING2. 503489857Sobrien Make sure we won't move STARTPOS below 0 or above TOTAL_SIZE. */ 503589857Sobrien if (endpos < 0) 503689857Sobrien range = 0 - startpos; 503789857Sobrien else if (endpos > total_size) 503889857Sobrien range = total_size - startpos; 503989857Sobrien 504089857Sobrien /* If the search isn't to be a backwards one, don't waste time in a 504189857Sobrien search for a pattern that must be anchored. */ 504289857Sobrien if (bufp->used > 0 && range > 0 504389857Sobrien && ((re_opcode_t) bufp->buffer[0] == begbuf 504489857Sobrien /* `begline' is like `begbuf' if it cannot match at newlines. */ 504589857Sobrien || ((re_opcode_t) bufp->buffer[0] == begline 504689857Sobrien && !bufp->newline_anchor))) 504789857Sobrien { 504889857Sobrien if (startpos > 0) 504989857Sobrien return -1; 505089857Sobrien else 505189857Sobrien range = 1; 505289857Sobrien } 505389857Sobrien 505489857Sobrien#ifdef emacs 505589857Sobrien /* In a forward search for something that starts with \=. 505689857Sobrien don't keep searching past point. */ 505789857Sobrien if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == at_dot && range > 0) 505889857Sobrien { 505989857Sobrien range = PT - startpos; 506089857Sobrien if (range <= 0) 506189857Sobrien return -1; 506289857Sobrien } 506389857Sobrien#endif /* emacs */ 506489857Sobrien 506589857Sobrien /* Update the fastmap now if not correct already. */ 506689857Sobrien if (fastmap && !bufp->fastmap_accurate) 506789857Sobrien if (re_compile_fastmap (bufp) == -2) 506889857Sobrien return -2; 506989857Sobrien 507089857Sobrien#ifdef WCHAR 507189857Sobrien /* Allocate wchar_t array for wcs_string1 and wcs_string2 and 507289857Sobrien fill them with converted string. */ 507389857Sobrien if (size1 != 0) 507489857Sobrien { 507589857Sobrien if (size1 > MAX_ALLOCA_SIZE) 507689857Sobrien { 507789857Sobrien wcs_string1 = TALLOC (size1 + 1, CHAR_T); 507889857Sobrien mbs_offset1 = TALLOC (size1 + 1, int); 507989857Sobrien is_binary = TALLOC (size1 + 1, char); 508089857Sobrien } 508189857Sobrien else 508289857Sobrien { 508389857Sobrien wcs_string1 = REGEX_TALLOC (size1 + 1, CHAR_T); 508489857Sobrien mbs_offset1 = REGEX_TALLOC (size1 + 1, int); 508589857Sobrien is_binary = REGEX_TALLOC (size1 + 1, char); 508689857Sobrien } 508789857Sobrien if (!wcs_string1 || !mbs_offset1 || !is_binary) 508889857Sobrien { 508989857Sobrien if (size1 > MAX_ALLOCA_SIZE) 509089857Sobrien { 509189857Sobrien free (wcs_string1); 509289857Sobrien free (mbs_offset1); 509389857Sobrien free (is_binary); 509489857Sobrien } 509589857Sobrien else 509689857Sobrien { 509789857Sobrien FREE_VAR (wcs_string1); 509889857Sobrien FREE_VAR (mbs_offset1); 509989857Sobrien FREE_VAR (is_binary); 510089857Sobrien } 510189857Sobrien return -2; 510289857Sobrien } 510389857Sobrien wcs_size1 = convert_mbs_to_wcs(wcs_string1, string1, size1, 510489857Sobrien mbs_offset1, is_binary); 510589857Sobrien wcs_string1[wcs_size1] = L'\0'; /* for a sentinel */ 510689857Sobrien if (size1 > MAX_ALLOCA_SIZE) 510789857Sobrien free (is_binary); 510889857Sobrien else 510989857Sobrien FREE_VAR (is_binary); 511089857Sobrien } 511189857Sobrien if (size2 != 0) 511289857Sobrien { 511389857Sobrien if (size2 > MAX_ALLOCA_SIZE) 511489857Sobrien { 511589857Sobrien wcs_string2 = TALLOC (size2 + 1, CHAR_T); 511689857Sobrien mbs_offset2 = TALLOC (size2 + 1, int); 511789857Sobrien is_binary = TALLOC (size2 + 1, char); 511889857Sobrien } 511989857Sobrien else 512089857Sobrien { 512189857Sobrien wcs_string2 = REGEX_TALLOC (size2 + 1, CHAR_T); 512289857Sobrien mbs_offset2 = REGEX_TALLOC (size2 + 1, int); 512389857Sobrien is_binary = REGEX_TALLOC (size2 + 1, char); 512489857Sobrien } 512589857Sobrien if (!wcs_string2 || !mbs_offset2 || !is_binary) 512689857Sobrien { 512789857Sobrien FREE_WCS_BUFFERS (); 512889857Sobrien if (size2 > MAX_ALLOCA_SIZE) 512989857Sobrien free (is_binary); 513089857Sobrien else 513189857Sobrien FREE_VAR (is_binary); 513289857Sobrien return -2; 513389857Sobrien } 513489857Sobrien wcs_size2 = convert_mbs_to_wcs(wcs_string2, string2, size2, 513589857Sobrien mbs_offset2, is_binary); 513689857Sobrien wcs_string2[wcs_size2] = L'\0'; /* for a sentinel */ 513789857Sobrien if (size2 > MAX_ALLOCA_SIZE) 513889857Sobrien free (is_binary); 513989857Sobrien else 514089857Sobrien FREE_VAR (is_binary); 514189857Sobrien } 514289857Sobrien#endif /* WCHAR */ 514389857Sobrien 514489857Sobrien 514589857Sobrien /* Loop through the string, looking for a place to start matching. */ 514689857Sobrien for (;;) 514789857Sobrien { 514889857Sobrien /* If a fastmap is supplied, skip quickly over characters that 514989857Sobrien cannot be the start of a match. If the pattern can match the 515089857Sobrien null string, however, we don't need to skip characters; we want 515189857Sobrien the first null string. */ 515289857Sobrien if (fastmap && startpos < total_size && !bufp->can_be_null) 515389857Sobrien { 515489857Sobrien if (range > 0) /* Searching forwards. */ 515589857Sobrien { 515689857Sobrien register const char *d; 515789857Sobrien register int lim = 0; 515889857Sobrien int irange = range; 515989857Sobrien 516089857Sobrien if (startpos < size1 && startpos + range >= size1) 516189857Sobrien lim = range - (size1 - startpos); 516289857Sobrien 516389857Sobrien d = (startpos >= size1 ? string2 - size1 : string1) + startpos; 516489857Sobrien 516589857Sobrien /* Written out as an if-else to avoid testing `translate' 516689857Sobrien inside the loop. */ 516789857Sobrien if (translate) 516889857Sobrien while (range > lim 516989857Sobrien && !fastmap[(unsigned char) 517089857Sobrien translate[(unsigned char) *d++]]) 517189857Sobrien range--; 517289857Sobrien else 517389857Sobrien while (range > lim && !fastmap[(unsigned char) *d++]) 517489857Sobrien range--; 517589857Sobrien 517689857Sobrien startpos += irange - range; 517789857Sobrien } 517889857Sobrien else /* Searching backwards. */ 517989857Sobrien { 518089857Sobrien register CHAR_T c = (size1 == 0 || startpos >= size1 518189857Sobrien ? string2[startpos - size1] 518289857Sobrien : string1[startpos]); 518389857Sobrien 518489857Sobrien if (!fastmap[(unsigned char) TRANSLATE (c)]) 518589857Sobrien goto advance; 518689857Sobrien } 518789857Sobrien } 518889857Sobrien 518989857Sobrien /* If can't match the null string, and that's all we have left, fail. */ 519089857Sobrien if (range >= 0 && startpos == total_size && fastmap 519189857Sobrien && !bufp->can_be_null) 519289857Sobrien { 519389857Sobrien#ifdef WCHAR 519489857Sobrien FREE_WCS_BUFFERS (); 519589857Sobrien#endif 519689857Sobrien return -1; 519789857Sobrien } 519889857Sobrien 519989857Sobrien#ifdef WCHAR 520089857Sobrien val = wcs_re_match_2_internal (bufp, string1, size1, string2, 520189857Sobrien size2, startpos, regs, stop, 520289857Sobrien wcs_string1, wcs_size1, 520389857Sobrien wcs_string2, wcs_size2, 520489857Sobrien mbs_offset1, mbs_offset2); 520589857Sobrien#else /* BYTE */ 520689857Sobrien val = byte_re_match_2_internal (bufp, string1, size1, string2, 520789857Sobrien size2, startpos, regs, stop); 520889857Sobrien#endif /* BYTE */ 520989857Sobrien 521089857Sobrien#ifndef REGEX_MALLOC 521189857Sobrien# ifdef C_ALLOCA 521289857Sobrien alloca (0); 521389857Sobrien# endif 521489857Sobrien#endif 521589857Sobrien 521689857Sobrien if (val >= 0) 521789857Sobrien { 521889857Sobrien#ifdef WCHAR 521989857Sobrien FREE_WCS_BUFFERS (); 522089857Sobrien#endif 522189857Sobrien return startpos; 522289857Sobrien } 522389857Sobrien 522489857Sobrien if (val == -2) 522589857Sobrien { 522689857Sobrien#ifdef WCHAR 522789857Sobrien FREE_WCS_BUFFERS (); 522889857Sobrien#endif 522989857Sobrien return -2; 523089857Sobrien } 523189857Sobrien 523289857Sobrien advance: 523389857Sobrien if (!range) 523489857Sobrien break; 523589857Sobrien else if (range > 0) 523689857Sobrien { 523789857Sobrien range--; 523889857Sobrien startpos++; 523989857Sobrien } 524089857Sobrien else 524189857Sobrien { 524289857Sobrien range++; 524389857Sobrien startpos--; 524489857Sobrien } 524589857Sobrien } 524689857Sobrien#ifdef WCHAR 524789857Sobrien FREE_WCS_BUFFERS (); 524889857Sobrien#endif 524989857Sobrien return -1; 525089857Sobrien} 525189857Sobrien 525289857Sobrien#ifdef WCHAR 525389857Sobrien/* This converts PTR, a pointer into one of the search wchar_t strings 525489857Sobrien `string1' and `string2' into an multibyte string offset from the 525589857Sobrien beginning of that string. We use mbs_offset to optimize. 525689857Sobrien See convert_mbs_to_wcs. */ 525789857Sobrien# define POINTER_TO_OFFSET(ptr) \ 525889857Sobrien (FIRST_STRING_P (ptr) \ 525989857Sobrien ? ((regoff_t)(mbs_offset1 != NULL? mbs_offset1[(ptr)-string1] : 0)) \ 526089857Sobrien : ((regoff_t)((mbs_offset2 != NULL? mbs_offset2[(ptr)-string2] : 0) \ 526189857Sobrien + csize1))) 526289857Sobrien#else /* BYTE */ 526389857Sobrien/* This converts PTR, a pointer into one of the search strings `string1' 526489857Sobrien and `string2' into an offset from the beginning of that string. */ 526589857Sobrien# define POINTER_TO_OFFSET(ptr) \ 526689857Sobrien (FIRST_STRING_P (ptr) \ 526789857Sobrien ? ((regoff_t) ((ptr) - string1)) \ 526889857Sobrien : ((regoff_t) ((ptr) - string2 + size1))) 526989857Sobrien#endif /* WCHAR */ 527089857Sobrien 527189857Sobrien/* Macros for dealing with the split strings in re_match_2. */ 527289857Sobrien 527389857Sobrien#define MATCHING_IN_FIRST_STRING (dend == end_match_1) 527489857Sobrien 527589857Sobrien/* Call before fetching a character with *d. This switches over to 527689857Sobrien string2 if necessary. */ 527789857Sobrien#define PREFETCH() \ 527889857Sobrien while (d == dend) \ 527989857Sobrien { \ 528089857Sobrien /* End of string2 => fail. */ \ 528189857Sobrien if (dend == end_match_2) \ 528289857Sobrien goto fail; \ 528389857Sobrien /* End of string1 => advance to string2. */ \ 528489857Sobrien d = string2; \ 528589857Sobrien dend = end_match_2; \ 528689857Sobrien } 528789857Sobrien 528889857Sobrien/* Test if at very beginning or at very end of the virtual concatenation 528989857Sobrien of `string1' and `string2'. If only one string, it's `string2'. */ 529089857Sobrien#define AT_STRINGS_BEG(d) ((d) == (size1 ? string1 : string2) || !size2) 529189857Sobrien#define AT_STRINGS_END(d) ((d) == end2) 529289857Sobrien 529389857Sobrien 529489857Sobrien/* Test if D points to a character which is word-constituent. We have 529589857Sobrien two special cases to check for: if past the end of string1, look at 529689857Sobrien the first character in string2; and if before the beginning of 529789857Sobrien string2, look at the last character in string1. */ 529889857Sobrien#ifdef WCHAR 529989857Sobrien/* Use internationalized API instead of SYNTAX. */ 530089857Sobrien# define WORDCHAR_P(d) \ 530189857Sobrien (iswalnum ((wint_t)((d) == end1 ? *string2 \ 530289857Sobrien : (d) == string2 - 1 ? *(end1 - 1) : *(d))) != 0 \ 530389857Sobrien || ((d) == end1 ? *string2 \ 530489857Sobrien : (d) == string2 - 1 ? *(end1 - 1) : *(d)) == L'_') 530589857Sobrien#else /* BYTE */ 530689857Sobrien# define WORDCHAR_P(d) \ 530789857Sobrien (SYNTAX ((d) == end1 ? *string2 \ 530889857Sobrien : (d) == string2 - 1 ? *(end1 - 1) : *(d)) \ 530989857Sobrien == Sword) 531089857Sobrien#endif /* WCHAR */ 531189857Sobrien 531289857Sobrien/* Disabled due to a compiler bug -- see comment at case wordbound */ 531389857Sobrien#if 0 531489857Sobrien/* Test if the character before D and the one at D differ with respect 531589857Sobrien to being word-constituent. */ 531689857Sobrien#define AT_WORD_BOUNDARY(d) \ 531789857Sobrien (AT_STRINGS_BEG (d) || AT_STRINGS_END (d) \ 531889857Sobrien || WORDCHAR_P (d - 1) != WORDCHAR_P (d)) 531989857Sobrien#endif 532089857Sobrien 532189857Sobrien/* Free everything we malloc. */ 532289857Sobrien#ifdef MATCH_MAY_ALLOCATE 532389857Sobrien# ifdef WCHAR 532489857Sobrien# define FREE_VARIABLES() \ 532589857Sobrien do { \ 532689857Sobrien REGEX_FREE_STACK (fail_stack.stack); \ 532789857Sobrien FREE_VAR (regstart); \ 532889857Sobrien FREE_VAR (regend); \ 532989857Sobrien FREE_VAR (old_regstart); \ 533089857Sobrien FREE_VAR (old_regend); \ 533189857Sobrien FREE_VAR (best_regstart); \ 533289857Sobrien FREE_VAR (best_regend); \ 533389857Sobrien FREE_VAR (reg_info); \ 533489857Sobrien FREE_VAR (reg_dummy); \ 533589857Sobrien FREE_VAR (reg_info_dummy); \ 533689857Sobrien if (!cant_free_wcs_buf) \ 533789857Sobrien { \ 533889857Sobrien FREE_VAR (string1); \ 533989857Sobrien FREE_VAR (string2); \ 534089857Sobrien FREE_VAR (mbs_offset1); \ 534189857Sobrien FREE_VAR (mbs_offset2); \ 534289857Sobrien } \ 534389857Sobrien } while (0) 534489857Sobrien# else /* BYTE */ 534589857Sobrien# define FREE_VARIABLES() \ 534689857Sobrien do { \ 534789857Sobrien REGEX_FREE_STACK (fail_stack.stack); \ 534889857Sobrien FREE_VAR (regstart); \ 534989857Sobrien FREE_VAR (regend); \ 535089857Sobrien FREE_VAR (old_regstart); \ 535189857Sobrien FREE_VAR (old_regend); \ 535289857Sobrien FREE_VAR (best_regstart); \ 535389857Sobrien FREE_VAR (best_regend); \ 535489857Sobrien FREE_VAR (reg_info); \ 535589857Sobrien FREE_VAR (reg_dummy); \ 535689857Sobrien FREE_VAR (reg_info_dummy); \ 535789857Sobrien } while (0) 535889857Sobrien# endif /* WCHAR */ 535989857Sobrien#else 536089857Sobrien# ifdef WCHAR 536189857Sobrien# define FREE_VARIABLES() \ 536289857Sobrien do { \ 536389857Sobrien if (!cant_free_wcs_buf) \ 536489857Sobrien { \ 536589857Sobrien FREE_VAR (string1); \ 536689857Sobrien FREE_VAR (string2); \ 536789857Sobrien FREE_VAR (mbs_offset1); \ 536889857Sobrien FREE_VAR (mbs_offset2); \ 536989857Sobrien } \ 537089857Sobrien } while (0) 537189857Sobrien# else /* BYTE */ 537289857Sobrien# define FREE_VARIABLES() ((void)0) /* Do nothing! But inhibit gcc warning. */ 537389857Sobrien# endif /* WCHAR */ 537489857Sobrien#endif /* not MATCH_MAY_ALLOCATE */ 537589857Sobrien 537689857Sobrien/* These values must meet several constraints. They must not be valid 537789857Sobrien register values; since we have a limit of 255 registers (because 537889857Sobrien we use only one byte in the pattern for the register number), we can 537989857Sobrien use numbers larger than 255. They must differ by 1, because of 538089857Sobrien NUM_FAILURE_ITEMS above. And the value for the lowest register must 538189857Sobrien be larger than the value for the highest register, so we do not try 538289857Sobrien to actually save any registers when none are active. */ 538389857Sobrien#define NO_HIGHEST_ACTIVE_REG (1 << BYTEWIDTH) 538489857Sobrien#define NO_LOWEST_ACTIVE_REG (NO_HIGHEST_ACTIVE_REG + 1) 538589857Sobrien 538689857Sobrien#else /* not INSIDE_RECURSION */ 538789857Sobrien/* Matching routines. */ 538889857Sobrien 538989857Sobrien#ifndef emacs /* Emacs never uses this. */ 539089857Sobrien/* re_match is like re_match_2 except it takes only a single string. */ 539189857Sobrien 539289857Sobrienint 5393218822Sdimre_match (struct re_pattern_buffer *bufp, const char *string, 5394218822Sdim int size, int pos, struct re_registers *regs) 539589857Sobrien{ 539689857Sobrien int result; 539789857Sobrien# ifdef MBS_SUPPORT 539889857Sobrien if (MB_CUR_MAX != 1) 539989857Sobrien result = wcs_re_match_2_internal (bufp, NULL, 0, string, size, 540089857Sobrien pos, regs, size, 540189857Sobrien NULL, 0, NULL, 0, NULL, NULL); 540289857Sobrien else 540389857Sobrien# endif 540489857Sobrien result = byte_re_match_2_internal (bufp, NULL, 0, string, size, 540589857Sobrien pos, regs, size); 540689857Sobrien# ifndef REGEX_MALLOC 540789857Sobrien# ifdef C_ALLOCA 540889857Sobrien alloca (0); 540989857Sobrien# endif 541089857Sobrien# endif 541189857Sobrien return result; 541289857Sobrien} 541389857Sobrien# ifdef _LIBC 541489857Sobrienweak_alias (__re_match, re_match) 541589857Sobrien# endif 541689857Sobrien#endif /* not emacs */ 541789857Sobrien 541889857Sobrien#endif /* not INSIDE_RECURSION */ 541989857Sobrien 542089857Sobrien#ifdef INSIDE_RECURSION 5421218822Sdimstatic boolean PREFIX(group_match_null_string_p) (UCHAR_T **p, 5422218822Sdim UCHAR_T *end, 5423218822Sdim PREFIX(register_info_type) *reg_info); 5424218822Sdimstatic boolean PREFIX(alt_match_null_string_p) (UCHAR_T *p, 5425218822Sdim UCHAR_T *end, 5426218822Sdim PREFIX(register_info_type) *reg_info); 5427218822Sdimstatic boolean PREFIX(common_op_match_null_string_p) (UCHAR_T **p, 5428218822Sdim UCHAR_T *end, 5429218822Sdim PREFIX(register_info_type) *reg_info); 5430218822Sdimstatic int PREFIX(bcmp_translate) (const CHAR_T *s1, const CHAR_T *s2, 5431218822Sdim int len, char *translate); 543289857Sobrien#else /* not INSIDE_RECURSION */ 543389857Sobrien 543489857Sobrien/* re_match_2 matches the compiled pattern in BUFP against the 543589857Sobrien the (virtual) concatenation of STRING1 and STRING2 (of length SIZE1 543689857Sobrien and SIZE2, respectively). We start matching at POS, and stop 543789857Sobrien matching at STOP. 543889857Sobrien 543989857Sobrien If REGS is non-null and the `no_sub' field of BUFP is nonzero, we 544089857Sobrien store offsets for the substring each group matched in REGS. See the 544189857Sobrien documentation for exactly how many groups we fill. 544289857Sobrien 544389857Sobrien We return -1 if no match, -2 if an internal error (such as the 544489857Sobrien failure stack overflowing). Otherwise, we return the length of the 544589857Sobrien matched substring. */ 544689857Sobrien 544789857Sobrienint 5448218822Sdimre_match_2 (struct re_pattern_buffer *bufp, const char *string1, int size1, 5449218822Sdim const char *string2, int size2, int pos, 5450218822Sdim struct re_registers *regs, int stop) 545189857Sobrien{ 545289857Sobrien int result; 545389857Sobrien# ifdef MBS_SUPPORT 545489857Sobrien if (MB_CUR_MAX != 1) 545589857Sobrien result = wcs_re_match_2_internal (bufp, string1, size1, string2, size2, 545689857Sobrien pos, regs, stop, 545789857Sobrien NULL, 0, NULL, 0, NULL, NULL); 545889857Sobrien else 545989857Sobrien# endif 546089857Sobrien result = byte_re_match_2_internal (bufp, string1, size1, string2, size2, 546189857Sobrien pos, regs, stop); 546289857Sobrien 546389857Sobrien#ifndef REGEX_MALLOC 546489857Sobrien# ifdef C_ALLOCA 546589857Sobrien alloca (0); 546689857Sobrien# endif 546789857Sobrien#endif 546889857Sobrien return result; 546989857Sobrien} 547089857Sobrien#ifdef _LIBC 547189857Sobrienweak_alias (__re_match_2, re_match_2) 547289857Sobrien#endif 547389857Sobrien 547489857Sobrien#endif /* not INSIDE_RECURSION */ 547589857Sobrien 547689857Sobrien#ifdef INSIDE_RECURSION 547789857Sobrien 547889857Sobrien#ifdef WCHAR 5479218822Sdimstatic int count_mbs_length (int *, int); 548089857Sobrien 548189857Sobrien/* This check the substring (from 0, to length) of the multibyte string, 548289857Sobrien to which offset_buffer correspond. And count how many wchar_t_characters 548389857Sobrien the substring occupy. We use offset_buffer to optimization. 548489857Sobrien See convert_mbs_to_wcs. */ 548589857Sobrien 548689857Sobrienstatic int 5487218822Sdimcount_mbs_length(int *offset_buffer, int length) 548889857Sobrien{ 548989857Sobrien int upper, lower; 549089857Sobrien 549189857Sobrien /* Check whether the size is valid. */ 549289857Sobrien if (length < 0) 549389857Sobrien return -1; 549489857Sobrien 549589857Sobrien if (offset_buffer == NULL) 549689857Sobrien return 0; 549789857Sobrien 549889857Sobrien /* If there are no multibyte character, offset_buffer[i] == i. 549989857Sobrien Optmize for this case. */ 550089857Sobrien if (offset_buffer[length] == length) 550189857Sobrien return length; 550289857Sobrien 550389857Sobrien /* Set up upper with length. (because for all i, offset_buffer[i] >= i) */ 550489857Sobrien upper = length; 550589857Sobrien lower = 0; 550689857Sobrien 550789857Sobrien while (true) 550889857Sobrien { 550989857Sobrien int middle = (lower + upper) / 2; 551089857Sobrien if (middle == lower || middle == upper) 551189857Sobrien break; 551289857Sobrien if (offset_buffer[middle] > length) 551389857Sobrien upper = middle; 551489857Sobrien else if (offset_buffer[middle] < length) 551589857Sobrien lower = middle; 551689857Sobrien else 551789857Sobrien return middle; 551889857Sobrien } 551989857Sobrien 552089857Sobrien return -1; 552189857Sobrien} 552289857Sobrien#endif /* WCHAR */ 552389857Sobrien 552489857Sobrien/* This is a separate function so that we can force an alloca cleanup 552589857Sobrien afterwards. */ 552689857Sobrien#ifdef WCHAR 552789857Sobrienstatic int 5528218822Sdimwcs_re_match_2_internal (struct re_pattern_buffer *bufp, 5529218822Sdim const char *cstring1, int csize1, 5530218822Sdim const char *cstring2, int csize2, 5531218822Sdim int pos, 5532218822Sdim struct re_registers *regs, 5533218822Sdim int stop, 553489857Sobrien /* string1 == string2 == NULL means string1/2, size1/2 and 553589857Sobrien mbs_offset1/2 need seting up in this function. */ 553689857Sobrien /* We need wchar_t* buffers correspond to cstring1, cstring2. */ 5537218822Sdim wchar_t *string1, int size1, 5538218822Sdim wchar_t *string2, int size2, 553989857Sobrien /* offset buffer for optimizatoin. See convert_mbs_to_wc. */ 5540218822Sdim int *mbs_offset1, int *mbs_offset2) 554189857Sobrien#else /* BYTE */ 554289857Sobrienstatic int 5543218822Sdimbyte_re_match_2_internal (struct re_pattern_buffer *bufp, 5544218822Sdim const char *string1, int size1, 5545218822Sdim const char *string2, int size2, 5546218822Sdim int pos, 5547218822Sdim struct re_registers *regs, int stop) 554889857Sobrien#endif /* BYTE */ 554989857Sobrien{ 555089857Sobrien /* General temporaries. */ 555189857Sobrien int mcnt; 555289857Sobrien UCHAR_T *p1; 555389857Sobrien#ifdef WCHAR 555489857Sobrien /* They hold whether each wchar_t is binary data or not. */ 555589857Sobrien char *is_binary = NULL; 555689857Sobrien /* If true, we can't free string1/2, mbs_offset1/2. */ 555789857Sobrien int cant_free_wcs_buf = 1; 555889857Sobrien#endif /* WCHAR */ 555989857Sobrien 556089857Sobrien /* Just past the end of the corresponding string. */ 556189857Sobrien const CHAR_T *end1, *end2; 556289857Sobrien 556389857Sobrien /* Pointers into string1 and string2, just past the last characters in 556489857Sobrien each to consider matching. */ 556589857Sobrien const CHAR_T *end_match_1, *end_match_2; 556689857Sobrien 556789857Sobrien /* Where we are in the data, and the end of the current string. */ 556889857Sobrien const CHAR_T *d, *dend; 556989857Sobrien 557089857Sobrien /* Where we are in the pattern, and the end of the pattern. */ 557189857Sobrien#ifdef WCHAR 557289857Sobrien UCHAR_T *pattern, *p; 557389857Sobrien register UCHAR_T *pend; 557489857Sobrien#else /* BYTE */ 557589857Sobrien UCHAR_T *p = bufp->buffer; 557689857Sobrien register UCHAR_T *pend = p + bufp->used; 557789857Sobrien#endif /* WCHAR */ 557889857Sobrien 557989857Sobrien /* Mark the opcode just after a start_memory, so we can test for an 558089857Sobrien empty subpattern when we get to the stop_memory. */ 558189857Sobrien UCHAR_T *just_past_start_mem = 0; 558289857Sobrien 558389857Sobrien /* We use this to map every character in the string. */ 558489857Sobrien RE_TRANSLATE_TYPE translate = bufp->translate; 558589857Sobrien 558689857Sobrien /* Failure point stack. Each place that can handle a failure further 558789857Sobrien down the line pushes a failure point on this stack. It consists of 558889857Sobrien restart, regend, and reg_info for all registers corresponding to 558989857Sobrien the subexpressions we're currently inside, plus the number of such 559089857Sobrien registers, and, finally, two char *'s. The first char * is where 559189857Sobrien to resume scanning the pattern; the second one is where to resume 559289857Sobrien scanning the strings. If the latter is zero, the failure point is 559389857Sobrien a ``dummy''; if a failure happens and the failure point is a dummy, 559489857Sobrien it gets discarded and the next next one is tried. */ 559589857Sobrien#ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global. */ 559689857Sobrien PREFIX(fail_stack_type) fail_stack; 559789857Sobrien#endif 559889857Sobrien#ifdef DEBUG 559989857Sobrien static unsigned failure_id; 560089857Sobrien unsigned nfailure_points_pushed = 0, nfailure_points_popped = 0; 560189857Sobrien#endif 560289857Sobrien 560389857Sobrien#ifdef REL_ALLOC 560489857Sobrien /* This holds the pointer to the failure stack, when 560589857Sobrien it is allocated relocatably. */ 560689857Sobrien fail_stack_elt_t *failure_stack_ptr; 560789857Sobrien#endif 560889857Sobrien 560989857Sobrien /* We fill all the registers internally, independent of what we 561089857Sobrien return, for use in backreferences. The number here includes 561189857Sobrien an element for register zero. */ 561289857Sobrien size_t num_regs = bufp->re_nsub + 1; 561389857Sobrien 561489857Sobrien /* The currently active registers. */ 561589857Sobrien active_reg_t lowest_active_reg = NO_LOWEST_ACTIVE_REG; 561689857Sobrien active_reg_t highest_active_reg = NO_HIGHEST_ACTIVE_REG; 561789857Sobrien 561889857Sobrien /* Information on the contents of registers. These are pointers into 561989857Sobrien the input strings; they record just what was matched (on this 562089857Sobrien attempt) by a subexpression part of the pattern, that is, the 562189857Sobrien regnum-th regstart pointer points to where in the pattern we began 562289857Sobrien matching and the regnum-th regend points to right after where we 562389857Sobrien stopped matching the regnum-th subexpression. (The zeroth register 562489857Sobrien keeps track of what the whole pattern matches.) */ 562589857Sobrien#ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */ 562689857Sobrien const CHAR_T **regstart, **regend; 562789857Sobrien#endif 562889857Sobrien 562989857Sobrien /* If a group that's operated upon by a repetition operator fails to 563089857Sobrien match anything, then the register for its start will need to be 563189857Sobrien restored because it will have been set to wherever in the string we 563289857Sobrien are when we last see its open-group operator. Similarly for a 563389857Sobrien register's end. */ 563489857Sobrien#ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */ 563589857Sobrien const CHAR_T **old_regstart, **old_regend; 563689857Sobrien#endif 563789857Sobrien 563889857Sobrien /* The is_active field of reg_info helps us keep track of which (possibly 563989857Sobrien nested) subexpressions we are currently in. The matched_something 564089857Sobrien field of reg_info[reg_num] helps us tell whether or not we have 564189857Sobrien matched any of the pattern so far this time through the reg_num-th 564289857Sobrien subexpression. These two fields get reset each time through any 564389857Sobrien loop their register is in. */ 564489857Sobrien#ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global. */ 564589857Sobrien PREFIX(register_info_type) *reg_info; 564689857Sobrien#endif 564789857Sobrien 564889857Sobrien /* The following record the register info as found in the above 564989857Sobrien variables when we find a match better than any we've seen before. 565089857Sobrien This happens as we backtrack through the failure points, which in 565189857Sobrien turn happens only if we have not yet matched the entire string. */ 565289857Sobrien unsigned best_regs_set = false; 565389857Sobrien#ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */ 565489857Sobrien const CHAR_T **best_regstart, **best_regend; 565589857Sobrien#endif 565689857Sobrien 565789857Sobrien /* Logically, this is `best_regend[0]'. But we don't want to have to 565889857Sobrien allocate space for that if we're not allocating space for anything 565989857Sobrien else (see below). Also, we never need info about register 0 for 566089857Sobrien any of the other register vectors, and it seems rather a kludge to 566189857Sobrien treat `best_regend' differently than the rest. So we keep track of 566289857Sobrien the end of the best match so far in a separate variable. We 566389857Sobrien initialize this to NULL so that when we backtrack the first time 566489857Sobrien and need to test it, it's not garbage. */ 566589857Sobrien const CHAR_T *match_end = NULL; 566689857Sobrien 566789857Sobrien /* This helps SET_REGS_MATCHED avoid doing redundant work. */ 566889857Sobrien int set_regs_matched_done = 0; 566989857Sobrien 567089857Sobrien /* Used when we pop values we don't care about. */ 567189857Sobrien#ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */ 567289857Sobrien const CHAR_T **reg_dummy; 567389857Sobrien PREFIX(register_info_type) *reg_info_dummy; 567489857Sobrien#endif 567589857Sobrien 567689857Sobrien#ifdef DEBUG 567789857Sobrien /* Counts the total number of registers pushed. */ 567889857Sobrien unsigned num_regs_pushed = 0; 567989857Sobrien#endif 568089857Sobrien 568189857Sobrien DEBUG_PRINT1 ("\n\nEntering re_match_2.\n"); 568289857Sobrien 568389857Sobrien INIT_FAIL_STACK (); 568489857Sobrien 568589857Sobrien#ifdef MATCH_MAY_ALLOCATE 568689857Sobrien /* Do not bother to initialize all the register variables if there are 568789857Sobrien no groups in the pattern, as it takes a fair amount of time. If 568889857Sobrien there are groups, we include space for register 0 (the whole 568989857Sobrien pattern), even though we never use it, since it simplifies the 569089857Sobrien array indexing. We should fix this. */ 569189857Sobrien if (bufp->re_nsub) 569289857Sobrien { 569389857Sobrien regstart = REGEX_TALLOC (num_regs, const CHAR_T *); 569489857Sobrien regend = REGEX_TALLOC (num_regs, const CHAR_T *); 569589857Sobrien old_regstart = REGEX_TALLOC (num_regs, const CHAR_T *); 569689857Sobrien old_regend = REGEX_TALLOC (num_regs, const CHAR_T *); 569789857Sobrien best_regstart = REGEX_TALLOC (num_regs, const CHAR_T *); 569889857Sobrien best_regend = REGEX_TALLOC (num_regs, const CHAR_T *); 569989857Sobrien reg_info = REGEX_TALLOC (num_regs, PREFIX(register_info_type)); 570089857Sobrien reg_dummy = REGEX_TALLOC (num_regs, const CHAR_T *); 570189857Sobrien reg_info_dummy = REGEX_TALLOC (num_regs, PREFIX(register_info_type)); 570289857Sobrien 570389857Sobrien if (!(regstart && regend && old_regstart && old_regend && reg_info 570489857Sobrien && best_regstart && best_regend && reg_dummy && reg_info_dummy)) 570589857Sobrien { 570689857Sobrien FREE_VARIABLES (); 570789857Sobrien return -2; 570889857Sobrien } 570989857Sobrien } 571089857Sobrien else 571189857Sobrien { 571289857Sobrien /* We must initialize all our variables to NULL, so that 571389857Sobrien `FREE_VARIABLES' doesn't try to free them. */ 571489857Sobrien regstart = regend = old_regstart = old_regend = best_regstart 571589857Sobrien = best_regend = reg_dummy = NULL; 571689857Sobrien reg_info = reg_info_dummy = (PREFIX(register_info_type) *) NULL; 571789857Sobrien } 571889857Sobrien#endif /* MATCH_MAY_ALLOCATE */ 571989857Sobrien 572089857Sobrien /* The starting position is bogus. */ 572189857Sobrien#ifdef WCHAR 572289857Sobrien if (pos < 0 || pos > csize1 + csize2) 572389857Sobrien#else /* BYTE */ 572489857Sobrien if (pos < 0 || pos > size1 + size2) 572589857Sobrien#endif 572689857Sobrien { 572789857Sobrien FREE_VARIABLES (); 572889857Sobrien return -1; 572989857Sobrien } 573089857Sobrien 573189857Sobrien#ifdef WCHAR 573289857Sobrien /* Allocate wchar_t array for string1 and string2 and 573389857Sobrien fill them with converted string. */ 573489857Sobrien if (string1 == NULL && string2 == NULL) 573589857Sobrien { 573689857Sobrien /* We need seting up buffers here. */ 573789857Sobrien 573889857Sobrien /* We must free wcs buffers in this function. */ 573989857Sobrien cant_free_wcs_buf = 0; 574089857Sobrien 574189857Sobrien if (csize1 != 0) 574289857Sobrien { 574389857Sobrien string1 = REGEX_TALLOC (csize1 + 1, CHAR_T); 574489857Sobrien mbs_offset1 = REGEX_TALLOC (csize1 + 1, int); 574589857Sobrien is_binary = REGEX_TALLOC (csize1 + 1, char); 574689857Sobrien if (!string1 || !mbs_offset1 || !is_binary) 574789857Sobrien { 574889857Sobrien FREE_VAR (string1); 574989857Sobrien FREE_VAR (mbs_offset1); 575089857Sobrien FREE_VAR (is_binary); 575189857Sobrien return -2; 575289857Sobrien } 575389857Sobrien } 575489857Sobrien if (csize2 != 0) 575589857Sobrien { 575689857Sobrien string2 = REGEX_TALLOC (csize2 + 1, CHAR_T); 575789857Sobrien mbs_offset2 = REGEX_TALLOC (csize2 + 1, int); 575889857Sobrien is_binary = REGEX_TALLOC (csize2 + 1, char); 575989857Sobrien if (!string2 || !mbs_offset2 || !is_binary) 576089857Sobrien { 576189857Sobrien FREE_VAR (string1); 576289857Sobrien FREE_VAR (mbs_offset1); 576389857Sobrien FREE_VAR (string2); 576489857Sobrien FREE_VAR (mbs_offset2); 576589857Sobrien FREE_VAR (is_binary); 576689857Sobrien return -2; 576789857Sobrien } 576889857Sobrien size2 = convert_mbs_to_wcs(string2, cstring2, csize2, 576989857Sobrien mbs_offset2, is_binary); 577089857Sobrien string2[size2] = L'\0'; /* for a sentinel */ 577189857Sobrien FREE_VAR (is_binary); 577289857Sobrien } 577389857Sobrien } 577489857Sobrien 577589857Sobrien /* We need to cast pattern to (wchar_t*), because we casted this compiled 577689857Sobrien pattern to (char*) in regex_compile. */ 577789857Sobrien p = pattern = (CHAR_T*)bufp->buffer; 577889857Sobrien pend = (CHAR_T*)(bufp->buffer + bufp->used); 577989857Sobrien 578089857Sobrien#endif /* WCHAR */ 578189857Sobrien 578289857Sobrien /* Initialize subexpression text positions to -1 to mark ones that no 578389857Sobrien start_memory/stop_memory has been seen for. Also initialize the 578489857Sobrien register information struct. */ 578589857Sobrien for (mcnt = 1; (unsigned) mcnt < num_regs; mcnt++) 578689857Sobrien { 578789857Sobrien regstart[mcnt] = regend[mcnt] 578889857Sobrien = old_regstart[mcnt] = old_regend[mcnt] = REG_UNSET_VALUE; 578989857Sobrien 579089857Sobrien REG_MATCH_NULL_STRING_P (reg_info[mcnt]) = MATCH_NULL_UNSET_VALUE; 579189857Sobrien IS_ACTIVE (reg_info[mcnt]) = 0; 579289857Sobrien MATCHED_SOMETHING (reg_info[mcnt]) = 0; 579389857Sobrien EVER_MATCHED_SOMETHING (reg_info[mcnt]) = 0; 579489857Sobrien } 579589857Sobrien 579689857Sobrien /* We move `string1' into `string2' if the latter's empty -- but not if 579789857Sobrien `string1' is null. */ 579889857Sobrien if (size2 == 0 && string1 != NULL) 579989857Sobrien { 580089857Sobrien string2 = string1; 580189857Sobrien size2 = size1; 580289857Sobrien string1 = 0; 580389857Sobrien size1 = 0; 580489857Sobrien#ifdef WCHAR 580589857Sobrien mbs_offset2 = mbs_offset1; 580689857Sobrien csize2 = csize1; 580789857Sobrien mbs_offset1 = NULL; 580889857Sobrien csize1 = 0; 580989857Sobrien#endif 581089857Sobrien } 581189857Sobrien end1 = string1 + size1; 581289857Sobrien end2 = string2 + size2; 581389857Sobrien 581489857Sobrien /* Compute where to stop matching, within the two strings. */ 581589857Sobrien#ifdef WCHAR 581689857Sobrien if (stop <= csize1) 581789857Sobrien { 581889857Sobrien mcnt = count_mbs_length(mbs_offset1, stop); 581989857Sobrien end_match_1 = string1 + mcnt; 582089857Sobrien end_match_2 = string2; 582189857Sobrien } 582289857Sobrien else 582389857Sobrien { 582489857Sobrien if (stop > csize1 + csize2) 582589857Sobrien stop = csize1 + csize2; 582689857Sobrien end_match_1 = end1; 582789857Sobrien mcnt = count_mbs_length(mbs_offset2, stop-csize1); 582889857Sobrien end_match_2 = string2 + mcnt; 582989857Sobrien } 583089857Sobrien if (mcnt < 0) 583189857Sobrien { /* count_mbs_length return error. */ 583289857Sobrien FREE_VARIABLES (); 583389857Sobrien return -1; 583489857Sobrien } 583589857Sobrien#else 583689857Sobrien if (stop <= size1) 583789857Sobrien { 583889857Sobrien end_match_1 = string1 + stop; 583989857Sobrien end_match_2 = string2; 584089857Sobrien } 584189857Sobrien else 584289857Sobrien { 584389857Sobrien end_match_1 = end1; 584489857Sobrien end_match_2 = string2 + stop - size1; 584589857Sobrien } 584689857Sobrien#endif /* WCHAR */ 584789857Sobrien 584889857Sobrien /* `p' scans through the pattern as `d' scans through the data. 584989857Sobrien `dend' is the end of the input string that `d' points within. `d' 585089857Sobrien is advanced into the following input string whenever necessary, but 585189857Sobrien this happens before fetching; therefore, at the beginning of the 585289857Sobrien loop, `d' can be pointing at the end of a string, but it cannot 585389857Sobrien equal `string2'. */ 585489857Sobrien#ifdef WCHAR 585589857Sobrien if (size1 > 0 && pos <= csize1) 585689857Sobrien { 585789857Sobrien mcnt = count_mbs_length(mbs_offset1, pos); 585889857Sobrien d = string1 + mcnt; 585989857Sobrien dend = end_match_1; 586089857Sobrien } 586189857Sobrien else 586289857Sobrien { 586389857Sobrien mcnt = count_mbs_length(mbs_offset2, pos-csize1); 586489857Sobrien d = string2 + mcnt; 586589857Sobrien dend = end_match_2; 586689857Sobrien } 586789857Sobrien 586889857Sobrien if (mcnt < 0) 586989857Sobrien { /* count_mbs_length return error. */ 587089857Sobrien FREE_VARIABLES (); 587189857Sobrien return -1; 587289857Sobrien } 587389857Sobrien#else 587489857Sobrien if (size1 > 0 && pos <= size1) 587589857Sobrien { 587689857Sobrien d = string1 + pos; 587789857Sobrien dend = end_match_1; 587889857Sobrien } 587989857Sobrien else 588089857Sobrien { 588189857Sobrien d = string2 + pos - size1; 588289857Sobrien dend = end_match_2; 588389857Sobrien } 588489857Sobrien#endif /* WCHAR */ 588589857Sobrien 588689857Sobrien DEBUG_PRINT1 ("The compiled pattern is:\n"); 588789857Sobrien DEBUG_PRINT_COMPILED_PATTERN (bufp, p, pend); 588889857Sobrien DEBUG_PRINT1 ("The string to match is: `"); 588989857Sobrien DEBUG_PRINT_DOUBLE_STRING (d, string1, size1, string2, size2); 589089857Sobrien DEBUG_PRINT1 ("'\n"); 589189857Sobrien 589289857Sobrien /* This loops over pattern commands. It exits by returning from the 589389857Sobrien function if the match is complete, or it drops through if the match 589489857Sobrien fails at this starting point in the input data. */ 589589857Sobrien for (;;) 589689857Sobrien { 589789857Sobrien#ifdef _LIBC 589889857Sobrien DEBUG_PRINT2 ("\n%p: ", p); 589989857Sobrien#else 590089857Sobrien DEBUG_PRINT2 ("\n0x%x: ", p); 590189857Sobrien#endif 590289857Sobrien 590389857Sobrien if (p == pend) 590489857Sobrien { /* End of pattern means we might have succeeded. */ 590589857Sobrien DEBUG_PRINT1 ("end of pattern ... "); 590689857Sobrien 590789857Sobrien /* If we haven't matched the entire string, and we want the 590889857Sobrien longest match, try backtracking. */ 590989857Sobrien if (d != end_match_2) 591089857Sobrien { 591189857Sobrien /* 1 if this match ends in the same string (string1 or string2) 591289857Sobrien as the best previous match. */ 591389857Sobrien boolean same_str_p = (FIRST_STRING_P (match_end) 591489857Sobrien == MATCHING_IN_FIRST_STRING); 591589857Sobrien /* 1 if this match is the best seen so far. */ 591689857Sobrien boolean best_match_p; 591789857Sobrien 591889857Sobrien /* AIX compiler got confused when this was combined 591989857Sobrien with the previous declaration. */ 592089857Sobrien if (same_str_p) 592189857Sobrien best_match_p = d > match_end; 592289857Sobrien else 592389857Sobrien best_match_p = !MATCHING_IN_FIRST_STRING; 592489857Sobrien 592589857Sobrien DEBUG_PRINT1 ("backtracking.\n"); 592689857Sobrien 592789857Sobrien if (!FAIL_STACK_EMPTY ()) 592889857Sobrien { /* More failure points to try. */ 592989857Sobrien 593089857Sobrien /* If exceeds best match so far, save it. */ 593189857Sobrien if (!best_regs_set || best_match_p) 593289857Sobrien { 593389857Sobrien best_regs_set = true; 593489857Sobrien match_end = d; 593589857Sobrien 593689857Sobrien DEBUG_PRINT1 ("\nSAVING match as best so far.\n"); 593789857Sobrien 593889857Sobrien for (mcnt = 1; (unsigned) mcnt < num_regs; mcnt++) 593989857Sobrien { 594089857Sobrien best_regstart[mcnt] = regstart[mcnt]; 594189857Sobrien best_regend[mcnt] = regend[mcnt]; 594289857Sobrien } 594389857Sobrien } 594489857Sobrien goto fail; 594589857Sobrien } 594689857Sobrien 594789857Sobrien /* If no failure points, don't restore garbage. And if 594889857Sobrien last match is real best match, don't restore second 594989857Sobrien best one. */ 595089857Sobrien else if (best_regs_set && !best_match_p) 595189857Sobrien { 595289857Sobrien restore_best_regs: 595389857Sobrien /* Restore best match. It may happen that `dend == 595489857Sobrien end_match_1' while the restored d is in string2. 595589857Sobrien For example, the pattern `x.*y.*z' against the 595689857Sobrien strings `x-' and `y-z-', if the two strings are 595789857Sobrien not consecutive in memory. */ 595889857Sobrien DEBUG_PRINT1 ("Restoring best registers.\n"); 595989857Sobrien 596089857Sobrien d = match_end; 596189857Sobrien dend = ((d >= string1 && d <= end1) 596289857Sobrien ? end_match_1 : end_match_2); 596389857Sobrien 596489857Sobrien for (mcnt = 1; (unsigned) mcnt < num_regs; mcnt++) 596589857Sobrien { 596689857Sobrien regstart[mcnt] = best_regstart[mcnt]; 596789857Sobrien regend[mcnt] = best_regend[mcnt]; 596889857Sobrien } 596989857Sobrien } 597089857Sobrien } /* d != end_match_2 */ 597189857Sobrien 597289857Sobrien succeed_label: 597389857Sobrien DEBUG_PRINT1 ("Accepting match.\n"); 597489857Sobrien /* If caller wants register contents data back, do it. */ 597589857Sobrien if (regs && !bufp->no_sub) 597689857Sobrien { 597789857Sobrien /* Have the register data arrays been allocated? */ 597889857Sobrien if (bufp->regs_allocated == REGS_UNALLOCATED) 597989857Sobrien { /* No. So allocate them with malloc. We need one 598089857Sobrien extra element beyond `num_regs' for the `-1' marker 598189857Sobrien GNU code uses. */ 598289857Sobrien regs->num_regs = MAX (RE_NREGS, num_regs + 1); 598389857Sobrien regs->start = TALLOC (regs->num_regs, regoff_t); 598489857Sobrien regs->end = TALLOC (regs->num_regs, regoff_t); 598589857Sobrien if (regs->start == NULL || regs->end == NULL) 598689857Sobrien { 598789857Sobrien FREE_VARIABLES (); 598889857Sobrien return -2; 598989857Sobrien } 599089857Sobrien bufp->regs_allocated = REGS_REALLOCATE; 599189857Sobrien } 599289857Sobrien else if (bufp->regs_allocated == REGS_REALLOCATE) 599389857Sobrien { /* Yes. If we need more elements than were already 599489857Sobrien allocated, reallocate them. If we need fewer, just 599589857Sobrien leave it alone. */ 599689857Sobrien if (regs->num_regs < num_regs + 1) 599789857Sobrien { 599889857Sobrien regs->num_regs = num_regs + 1; 599989857Sobrien RETALLOC (regs->start, regs->num_regs, regoff_t); 600089857Sobrien RETALLOC (regs->end, regs->num_regs, regoff_t); 600189857Sobrien if (regs->start == NULL || regs->end == NULL) 600289857Sobrien { 600389857Sobrien FREE_VARIABLES (); 600489857Sobrien return -2; 600589857Sobrien } 600689857Sobrien } 600789857Sobrien } 600889857Sobrien else 600989857Sobrien { 601089857Sobrien /* These braces fend off a "empty body in an else-statement" 601189857Sobrien warning under GCC when assert expands to nothing. */ 601289857Sobrien assert (bufp->regs_allocated == REGS_FIXED); 601389857Sobrien } 601489857Sobrien 601589857Sobrien /* Convert the pointer data in `regstart' and `regend' to 601689857Sobrien indices. Register zero has to be set differently, 601789857Sobrien since we haven't kept track of any info for it. */ 601889857Sobrien if (regs->num_regs > 0) 601989857Sobrien { 602089857Sobrien regs->start[0] = pos; 602189857Sobrien#ifdef WCHAR 602289857Sobrien if (MATCHING_IN_FIRST_STRING) 602389857Sobrien regs->end[0] = mbs_offset1 != NULL ? 602489857Sobrien mbs_offset1[d-string1] : 0; 602589857Sobrien else 602689857Sobrien regs->end[0] = csize1 + (mbs_offset2 != NULL ? 602789857Sobrien mbs_offset2[d-string2] : 0); 602889857Sobrien#else 602989857Sobrien regs->end[0] = (MATCHING_IN_FIRST_STRING 603089857Sobrien ? ((regoff_t) (d - string1)) 603189857Sobrien : ((regoff_t) (d - string2 + size1))); 603289857Sobrien#endif /* WCHAR */ 603389857Sobrien } 603489857Sobrien 603589857Sobrien /* Go through the first `min (num_regs, regs->num_regs)' 603689857Sobrien registers, since that is all we initialized. */ 603789857Sobrien for (mcnt = 1; (unsigned) mcnt < MIN (num_regs, regs->num_regs); 603889857Sobrien mcnt++) 603989857Sobrien { 604089857Sobrien if (REG_UNSET (regstart[mcnt]) || REG_UNSET (regend[mcnt])) 604189857Sobrien regs->start[mcnt] = regs->end[mcnt] = -1; 604289857Sobrien else 604389857Sobrien { 604489857Sobrien regs->start[mcnt] 604589857Sobrien = (regoff_t) POINTER_TO_OFFSET (regstart[mcnt]); 604689857Sobrien regs->end[mcnt] 604789857Sobrien = (regoff_t) POINTER_TO_OFFSET (regend[mcnt]); 604889857Sobrien } 604989857Sobrien } 605089857Sobrien 605189857Sobrien /* If the regs structure we return has more elements than 605289857Sobrien were in the pattern, set the extra elements to -1. If 605389857Sobrien we (re)allocated the registers, this is the case, 605489857Sobrien because we always allocate enough to have at least one 605589857Sobrien -1 at the end. */ 605689857Sobrien for (mcnt = num_regs; (unsigned) mcnt < regs->num_regs; mcnt++) 605789857Sobrien regs->start[mcnt] = regs->end[mcnt] = -1; 605889857Sobrien } /* regs && !bufp->no_sub */ 605989857Sobrien 606089857Sobrien DEBUG_PRINT4 ("%u failure points pushed, %u popped (%u remain).\n", 606189857Sobrien nfailure_points_pushed, nfailure_points_popped, 606289857Sobrien nfailure_points_pushed - nfailure_points_popped); 606389857Sobrien DEBUG_PRINT2 ("%u registers pushed.\n", num_regs_pushed); 606489857Sobrien 606589857Sobrien#ifdef WCHAR 606689857Sobrien if (MATCHING_IN_FIRST_STRING) 606789857Sobrien mcnt = mbs_offset1 != NULL ? mbs_offset1[d-string1] : 0; 606889857Sobrien else 606989857Sobrien mcnt = (mbs_offset2 != NULL ? mbs_offset2[d-string2] : 0) + 607089857Sobrien csize1; 607189857Sobrien mcnt -= pos; 607289857Sobrien#else 607389857Sobrien mcnt = d - pos - (MATCHING_IN_FIRST_STRING 607489857Sobrien ? string1 607589857Sobrien : string2 - size1); 607689857Sobrien#endif /* WCHAR */ 607789857Sobrien 607889857Sobrien DEBUG_PRINT2 ("Returning %d from re_match_2.\n", mcnt); 607989857Sobrien 608089857Sobrien FREE_VARIABLES (); 608189857Sobrien return mcnt; 608289857Sobrien } 608389857Sobrien 608489857Sobrien /* Otherwise match next pattern command. */ 608589857Sobrien switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++)) 608689857Sobrien { 608789857Sobrien /* Ignore these. Used to ignore the n of succeed_n's which 608889857Sobrien currently have n == 0. */ 608989857Sobrien case no_op: 609089857Sobrien DEBUG_PRINT1 ("EXECUTING no_op.\n"); 609189857Sobrien break; 609289857Sobrien 609389857Sobrien case succeed: 609489857Sobrien DEBUG_PRINT1 ("EXECUTING succeed.\n"); 609589857Sobrien goto succeed_label; 609689857Sobrien 609789857Sobrien /* Match the next n pattern characters exactly. The following 609889857Sobrien byte in the pattern defines n, and the n bytes after that 609989857Sobrien are the characters to match. */ 610089857Sobrien case exactn: 610189857Sobrien#ifdef MBS_SUPPORT 610289857Sobrien case exactn_bin: 610389857Sobrien#endif 610489857Sobrien mcnt = *p++; 610589857Sobrien DEBUG_PRINT2 ("EXECUTING exactn %d.\n", mcnt); 610689857Sobrien 610789857Sobrien /* This is written out as an if-else so we don't waste time 610889857Sobrien testing `translate' inside the loop. */ 610989857Sobrien if (translate) 611089857Sobrien { 611189857Sobrien do 611289857Sobrien { 611389857Sobrien PREFETCH (); 611489857Sobrien#ifdef WCHAR 611589857Sobrien if (*d <= 0xff) 611689857Sobrien { 611789857Sobrien if ((UCHAR_T) translate[(unsigned char) *d++] 611889857Sobrien != (UCHAR_T) *p++) 611989857Sobrien goto fail; 612089857Sobrien } 612189857Sobrien else 612289857Sobrien { 612389857Sobrien if (*d++ != (CHAR_T) *p++) 612489857Sobrien goto fail; 612589857Sobrien } 612689857Sobrien#else 612789857Sobrien if ((UCHAR_T) translate[(unsigned char) *d++] 612889857Sobrien != (UCHAR_T) *p++) 612989857Sobrien goto fail; 613089857Sobrien#endif /* WCHAR */ 613189857Sobrien } 613289857Sobrien while (--mcnt); 613389857Sobrien } 613489857Sobrien else 613589857Sobrien { 613689857Sobrien do 613789857Sobrien { 613889857Sobrien PREFETCH (); 613989857Sobrien if (*d++ != (CHAR_T) *p++) goto fail; 614089857Sobrien } 614189857Sobrien while (--mcnt); 614289857Sobrien } 614389857Sobrien SET_REGS_MATCHED (); 614489857Sobrien break; 614589857Sobrien 614689857Sobrien 614789857Sobrien /* Match any character except possibly a newline or a null. */ 614889857Sobrien case anychar: 614989857Sobrien DEBUG_PRINT1 ("EXECUTING anychar.\n"); 615089857Sobrien 615189857Sobrien PREFETCH (); 615289857Sobrien 615389857Sobrien if ((!(bufp->syntax & RE_DOT_NEWLINE) && TRANSLATE (*d) == '\n') 615489857Sobrien || (bufp->syntax & RE_DOT_NOT_NULL && TRANSLATE (*d) == '\000')) 615589857Sobrien goto fail; 615689857Sobrien 615789857Sobrien SET_REGS_MATCHED (); 615889857Sobrien DEBUG_PRINT2 (" Matched `%ld'.\n", (long int) *d); 615989857Sobrien d++; 616089857Sobrien break; 616189857Sobrien 616289857Sobrien 616389857Sobrien case charset: 616489857Sobrien case charset_not: 616589857Sobrien { 616689857Sobrien register UCHAR_T c; 616789857Sobrien#ifdef WCHAR 616889857Sobrien unsigned int i, char_class_length, coll_symbol_length, 616989857Sobrien equiv_class_length, ranges_length, chars_length, length; 617089857Sobrien CHAR_T *workp, *workp2, *charset_top; 617189857Sobrien#define WORK_BUFFER_SIZE 128 617289857Sobrien CHAR_T str_buf[WORK_BUFFER_SIZE]; 617389857Sobrien# ifdef _LIBC 617489857Sobrien uint32_t nrules; 617589857Sobrien# endif /* _LIBC */ 617689857Sobrien#endif /* WCHAR */ 6177218822Sdim boolean negate = (re_opcode_t) *(p - 1) == charset_not; 617889857Sobrien 6179218822Sdim DEBUG_PRINT2 ("EXECUTING charset%s.\n", negate ? "_not" : ""); 618089857Sobrien PREFETCH (); 618189857Sobrien c = TRANSLATE (*d); /* The character to match. */ 618289857Sobrien#ifdef WCHAR 618389857Sobrien# ifdef _LIBC 618489857Sobrien nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES); 618589857Sobrien# endif /* _LIBC */ 618689857Sobrien charset_top = p - 1; 618789857Sobrien char_class_length = *p++; 618889857Sobrien coll_symbol_length = *p++; 618989857Sobrien equiv_class_length = *p++; 619089857Sobrien ranges_length = *p++; 619189857Sobrien chars_length = *p++; 619289857Sobrien /* p points charset[6], so the address of the next instruction 619389857Sobrien (charset[l+m+n+2o+k+p']) equals p[l+m+n+2*o+p'], 619489857Sobrien where l=length of char_classes, m=length of collating_symbol, 619589857Sobrien n=equivalence_class, o=length of char_range, 619689857Sobrien p'=length of character. */ 619789857Sobrien workp = p; 619889857Sobrien /* Update p to indicate the next instruction. */ 619989857Sobrien p += char_class_length + coll_symbol_length+ equiv_class_length + 620089857Sobrien 2*ranges_length + chars_length; 620189857Sobrien 620289857Sobrien /* match with char_class? */ 620389857Sobrien for (i = 0; i < char_class_length ; i += CHAR_CLASS_SIZE) 620489857Sobrien { 620589857Sobrien wctype_t wctype; 620689857Sobrien uintptr_t alignedp = ((uintptr_t)workp 620789857Sobrien + __alignof__(wctype_t) - 1) 620889857Sobrien & ~(uintptr_t)(__alignof__(wctype_t) - 1); 620989857Sobrien wctype = *((wctype_t*)alignedp); 621089857Sobrien workp += CHAR_CLASS_SIZE; 621189857Sobrien# ifdef _LIBC 621289857Sobrien if (__iswctype((wint_t)c, wctype)) 621389857Sobrien goto char_set_matched; 621489857Sobrien# else 621589857Sobrien if (iswctype((wint_t)c, wctype)) 621689857Sobrien goto char_set_matched; 621789857Sobrien# endif 621889857Sobrien } 621989857Sobrien 622089857Sobrien /* match with collating_symbol? */ 622189857Sobrien# ifdef _LIBC 622289857Sobrien if (nrules != 0) 622389857Sobrien { 622489857Sobrien const unsigned char *extra = (const unsigned char *) 622589857Sobrien _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB); 622689857Sobrien 622789857Sobrien for (workp2 = workp + coll_symbol_length ; workp < workp2 ; 622889857Sobrien workp++) 622989857Sobrien { 623089857Sobrien int32_t *wextra; 623189857Sobrien wextra = (int32_t*)(extra + *workp++); 623289857Sobrien for (i = 0; i < *wextra; ++i) 623389857Sobrien if (TRANSLATE(d[i]) != wextra[1 + i]) 623489857Sobrien break; 623589857Sobrien 623689857Sobrien if (i == *wextra) 623789857Sobrien { 623889857Sobrien /* Update d, however d will be incremented at 623989857Sobrien char_set_matched:, we decrement d here. */ 624089857Sobrien d += i - 1; 624189857Sobrien goto char_set_matched; 624289857Sobrien } 624389857Sobrien } 624489857Sobrien } 624589857Sobrien else /* (nrules == 0) */ 624689857Sobrien# endif 624789857Sobrien /* If we can't look up collation data, we use wcscoll 624889857Sobrien instead. */ 624989857Sobrien { 625089857Sobrien for (workp2 = workp + coll_symbol_length ; workp < workp2 ;) 625189857Sobrien { 625289857Sobrien const CHAR_T *backup_d = d, *backup_dend = dend; 625389857Sobrien# ifdef _LIBC 625489857Sobrien length = __wcslen (workp); 625589857Sobrien# else 625689857Sobrien length = wcslen (workp); 625789857Sobrien# endif 625889857Sobrien 625989857Sobrien /* If wcscoll(the collating symbol, whole string) > 0, 626089857Sobrien any substring of the string never match with the 626189857Sobrien collating symbol. */ 626289857Sobrien# ifdef _LIBC 626389857Sobrien if (__wcscoll (workp, d) > 0) 626489857Sobrien# else 626589857Sobrien if (wcscoll (workp, d) > 0) 626689857Sobrien# endif 626789857Sobrien { 626889857Sobrien workp += length + 1; 626989857Sobrien continue; 627089857Sobrien } 627189857Sobrien 627289857Sobrien /* First, we compare the collating symbol with 627389857Sobrien the first character of the string. 627489857Sobrien If it don't match, we add the next character to 627589857Sobrien the compare buffer in turn. */ 627689857Sobrien for (i = 0 ; i < WORK_BUFFER_SIZE-1 ; i++, d++) 627789857Sobrien { 627889857Sobrien int match; 627989857Sobrien if (d == dend) 628089857Sobrien { 628189857Sobrien if (dend == end_match_2) 628289857Sobrien break; 628389857Sobrien d = string2; 628489857Sobrien dend = end_match_2; 628589857Sobrien } 628689857Sobrien 628789857Sobrien /* add next character to the compare buffer. */ 628889857Sobrien str_buf[i] = TRANSLATE(*d); 628989857Sobrien str_buf[i+1] = '\0'; 629089857Sobrien 629189857Sobrien# ifdef _LIBC 629289857Sobrien match = __wcscoll (workp, str_buf); 629389857Sobrien# else 629489857Sobrien match = wcscoll (workp, str_buf); 629589857Sobrien# endif 629689857Sobrien if (match == 0) 629789857Sobrien goto char_set_matched; 629889857Sobrien 629989857Sobrien if (match < 0) 630089857Sobrien /* (str_buf > workp) indicate (str_buf + X > workp), 630189857Sobrien because for all X (str_buf + X > str_buf). 630289857Sobrien So we don't need continue this loop. */ 630389857Sobrien break; 630489857Sobrien 630589857Sobrien /* Otherwise(str_buf < workp), 630689857Sobrien (str_buf+next_character) may equals (workp). 630789857Sobrien So we continue this loop. */ 630889857Sobrien } 630989857Sobrien /* not matched */ 631089857Sobrien d = backup_d; 631189857Sobrien dend = backup_dend; 631289857Sobrien workp += length + 1; 631389857Sobrien } 631489857Sobrien } 631589857Sobrien /* match with equivalence_class? */ 631689857Sobrien# ifdef _LIBC 631789857Sobrien if (nrules != 0) 631889857Sobrien { 631989857Sobrien const CHAR_T *backup_d = d, *backup_dend = dend; 632089857Sobrien /* Try to match the equivalence class against 632189857Sobrien those known to the collate implementation. */ 632289857Sobrien const int32_t *table; 632389857Sobrien const int32_t *weights; 632489857Sobrien const int32_t *extra; 632589857Sobrien const int32_t *indirect; 632689857Sobrien int32_t idx, idx2; 632789857Sobrien wint_t *cp; 632889857Sobrien size_t len; 632989857Sobrien 633089857Sobrien /* This #include defines a local function! */ 633189857Sobrien# include <locale/weightwc.h> 633289857Sobrien 633389857Sobrien table = (const int32_t *) 633489857Sobrien _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEWC); 633589857Sobrien weights = (const wint_t *) 633689857Sobrien _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTWC); 633789857Sobrien extra = (const wint_t *) 633889857Sobrien _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAWC); 633989857Sobrien indirect = (const int32_t *) 634089857Sobrien _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTWC); 634189857Sobrien 634289857Sobrien /* Write 1 collating element to str_buf, and 634389857Sobrien get its index. */ 634489857Sobrien idx2 = 0; 634589857Sobrien 634689857Sobrien for (i = 0 ; idx2 == 0 && i < WORK_BUFFER_SIZE - 1; i++) 634789857Sobrien { 634889857Sobrien cp = (wint_t*)str_buf; 634989857Sobrien if (d == dend) 635089857Sobrien { 635189857Sobrien if (dend == end_match_2) 635289857Sobrien break; 635389857Sobrien d = string2; 635489857Sobrien dend = end_match_2; 635589857Sobrien } 635689857Sobrien str_buf[i] = TRANSLATE(*(d+i)); 635789857Sobrien str_buf[i+1] = '\0'; /* sentinel */ 635889857Sobrien idx2 = findidx ((const wint_t**)&cp); 635989857Sobrien } 636089857Sobrien 636189857Sobrien /* Update d, however d will be incremented at 636289857Sobrien char_set_matched:, we decrement d here. */ 636389857Sobrien d = backup_d + ((wchar_t*)cp - (wchar_t*)str_buf - 1); 636489857Sobrien if (d >= dend) 636589857Sobrien { 636689857Sobrien if (dend == end_match_2) 636789857Sobrien d = dend; 636889857Sobrien else 636989857Sobrien { 637089857Sobrien d = string2; 637189857Sobrien dend = end_match_2; 637289857Sobrien } 637389857Sobrien } 637489857Sobrien 637589857Sobrien len = weights[idx2]; 637689857Sobrien 637789857Sobrien for (workp2 = workp + equiv_class_length ; workp < workp2 ; 637889857Sobrien workp++) 637989857Sobrien { 638089857Sobrien idx = (int32_t)*workp; 638189857Sobrien /* We already checked idx != 0 in regex_compile. */ 638289857Sobrien 638389857Sobrien if (idx2 != 0 && len == weights[idx]) 638489857Sobrien { 638589857Sobrien int cnt = 0; 638689857Sobrien while (cnt < len && (weights[idx + 1 + cnt] 638789857Sobrien == weights[idx2 + 1 + cnt])) 638889857Sobrien ++cnt; 638989857Sobrien 639089857Sobrien if (cnt == len) 639189857Sobrien goto char_set_matched; 639289857Sobrien } 639389857Sobrien } 639489857Sobrien /* not matched */ 639589857Sobrien d = backup_d; 639689857Sobrien dend = backup_dend; 639789857Sobrien } 639889857Sobrien else /* (nrules == 0) */ 639989857Sobrien# endif 640089857Sobrien /* If we can't look up collation data, we use wcscoll 640189857Sobrien instead. */ 640289857Sobrien { 640389857Sobrien for (workp2 = workp + equiv_class_length ; workp < workp2 ;) 640489857Sobrien { 640589857Sobrien const CHAR_T *backup_d = d, *backup_dend = dend; 640689857Sobrien# ifdef _LIBC 640789857Sobrien length = __wcslen (workp); 640889857Sobrien# else 640989857Sobrien length = wcslen (workp); 641089857Sobrien# endif 641189857Sobrien 641289857Sobrien /* If wcscoll(the collating symbol, whole string) > 0, 641389857Sobrien any substring of the string never match with the 641489857Sobrien collating symbol. */ 641589857Sobrien# ifdef _LIBC 641689857Sobrien if (__wcscoll (workp, d) > 0) 641789857Sobrien# else 641889857Sobrien if (wcscoll (workp, d) > 0) 641989857Sobrien# endif 642089857Sobrien { 642189857Sobrien workp += length + 1; 642289857Sobrien break; 642389857Sobrien } 642489857Sobrien 642589857Sobrien /* First, we compare the equivalence class with 642689857Sobrien the first character of the string. 642789857Sobrien If it don't match, we add the next character to 642889857Sobrien the compare buffer in turn. */ 642989857Sobrien for (i = 0 ; i < WORK_BUFFER_SIZE - 1 ; i++, d++) 643089857Sobrien { 643189857Sobrien int match; 643289857Sobrien if (d == dend) 643389857Sobrien { 643489857Sobrien if (dend == end_match_2) 643589857Sobrien break; 643689857Sobrien d = string2; 643789857Sobrien dend = end_match_2; 643889857Sobrien } 643989857Sobrien 644089857Sobrien /* add next character to the compare buffer. */ 644189857Sobrien str_buf[i] = TRANSLATE(*d); 644289857Sobrien str_buf[i+1] = '\0'; 644389857Sobrien 644489857Sobrien# ifdef _LIBC 644589857Sobrien match = __wcscoll (workp, str_buf); 644689857Sobrien# else 644789857Sobrien match = wcscoll (workp, str_buf); 644889857Sobrien# endif 644989857Sobrien 645089857Sobrien if (match == 0) 645189857Sobrien goto char_set_matched; 645289857Sobrien 645389857Sobrien if (match < 0) 645489857Sobrien /* (str_buf > workp) indicate (str_buf + X > workp), 645589857Sobrien because for all X (str_buf + X > str_buf). 645689857Sobrien So we don't need continue this loop. */ 645789857Sobrien break; 645889857Sobrien 645989857Sobrien /* Otherwise(str_buf < workp), 646089857Sobrien (str_buf+next_character) may equals (workp). 646189857Sobrien So we continue this loop. */ 646289857Sobrien } 646389857Sobrien /* not matched */ 646489857Sobrien d = backup_d; 646589857Sobrien dend = backup_dend; 646689857Sobrien workp += length + 1; 646789857Sobrien } 646889857Sobrien } 646989857Sobrien 647089857Sobrien /* match with char_range? */ 647189857Sobrien# ifdef _LIBC 647289857Sobrien if (nrules != 0) 647389857Sobrien { 647489857Sobrien uint32_t collseqval; 647589857Sobrien const char *collseq = (const char *) 647689857Sobrien _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQWC); 647789857Sobrien 647889857Sobrien collseqval = collseq_table_lookup (collseq, c); 647989857Sobrien 648089857Sobrien for (; workp < p - chars_length ;) 648189857Sobrien { 648289857Sobrien uint32_t start_val, end_val; 648389857Sobrien 648489857Sobrien /* We already compute the collation sequence value 648589857Sobrien of the characters (or collating symbols). */ 648689857Sobrien start_val = (uint32_t) *workp++; /* range_start */ 648789857Sobrien end_val = (uint32_t) *workp++; /* range_end */ 648889857Sobrien 648989857Sobrien if (start_val <= collseqval && collseqval <= end_val) 649089857Sobrien goto char_set_matched; 649189857Sobrien } 649289857Sobrien } 649389857Sobrien else 649489857Sobrien# endif 649589857Sobrien { 649689857Sobrien /* We set range_start_char at str_buf[0], range_end_char 649789857Sobrien at str_buf[4], and compared char at str_buf[2]. */ 649889857Sobrien str_buf[1] = 0; 649989857Sobrien str_buf[2] = c; 650089857Sobrien str_buf[3] = 0; 650189857Sobrien str_buf[5] = 0; 650289857Sobrien for (; workp < p - chars_length ;) 650389857Sobrien { 650489857Sobrien wchar_t *range_start_char, *range_end_char; 650589857Sobrien 650689857Sobrien /* match if (range_start_char <= c <= range_end_char). */ 650789857Sobrien 650889857Sobrien /* If range_start(or end) < 0, we assume -range_start(end) 650989857Sobrien is the offset of the collating symbol which is specified 651089857Sobrien as the character of the range start(end). */ 651189857Sobrien 651289857Sobrien /* range_start */ 651389857Sobrien if (*workp < 0) 651489857Sobrien range_start_char = charset_top - (*workp++); 651589857Sobrien else 651689857Sobrien { 651789857Sobrien str_buf[0] = *workp++; 651889857Sobrien range_start_char = str_buf; 651989857Sobrien } 652089857Sobrien 652189857Sobrien /* range_end */ 652289857Sobrien if (*workp < 0) 652389857Sobrien range_end_char = charset_top - (*workp++); 652489857Sobrien else 652589857Sobrien { 652689857Sobrien str_buf[4] = *workp++; 652789857Sobrien range_end_char = str_buf + 4; 652889857Sobrien } 652989857Sobrien 653089857Sobrien# ifdef _LIBC 653189857Sobrien if (__wcscoll (range_start_char, str_buf+2) <= 0 653289857Sobrien && __wcscoll (str_buf+2, range_end_char) <= 0) 653389857Sobrien# else 653489857Sobrien if (wcscoll (range_start_char, str_buf+2) <= 0 653589857Sobrien && wcscoll (str_buf+2, range_end_char) <= 0) 653689857Sobrien# endif 653789857Sobrien goto char_set_matched; 653889857Sobrien } 653989857Sobrien } 654089857Sobrien 654189857Sobrien /* match with char? */ 654289857Sobrien for (; workp < p ; workp++) 654389857Sobrien if (c == *workp) 654489857Sobrien goto char_set_matched; 654589857Sobrien 6546218822Sdim negate = !negate; 654789857Sobrien 654889857Sobrien char_set_matched: 6549218822Sdim if (negate) goto fail; 655089857Sobrien#else 655189857Sobrien /* Cast to `unsigned' instead of `unsigned char' in case the 655289857Sobrien bit list is a full 32 bytes long. */ 655389857Sobrien if (c < (unsigned) (*p * BYTEWIDTH) 655489857Sobrien && p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH))) 6555218822Sdim negate = !negate; 655689857Sobrien 655789857Sobrien p += 1 + *p; 655889857Sobrien 6559218822Sdim if (!negate) goto fail; 656089857Sobrien#undef WORK_BUFFER_SIZE 656189857Sobrien#endif /* WCHAR */ 656289857Sobrien SET_REGS_MATCHED (); 656389857Sobrien d++; 656489857Sobrien break; 656589857Sobrien } 656689857Sobrien 656789857Sobrien 656889857Sobrien /* The beginning of a group is represented by start_memory. 656989857Sobrien The arguments are the register number in the next byte, and the 657089857Sobrien number of groups inner to this one in the next. The text 657189857Sobrien matched within the group is recorded (in the internal 657289857Sobrien registers data structure) under the register number. */ 657389857Sobrien case start_memory: 657489857Sobrien DEBUG_PRINT3 ("EXECUTING start_memory %ld (%ld):\n", 657589857Sobrien (long int) *p, (long int) p[1]); 657689857Sobrien 657789857Sobrien /* Find out if this group can match the empty string. */ 657889857Sobrien p1 = p; /* To send to group_match_null_string_p. */ 657989857Sobrien 658089857Sobrien if (REG_MATCH_NULL_STRING_P (reg_info[*p]) == MATCH_NULL_UNSET_VALUE) 658189857Sobrien REG_MATCH_NULL_STRING_P (reg_info[*p]) 658289857Sobrien = PREFIX(group_match_null_string_p) (&p1, pend, reg_info); 658389857Sobrien 658489857Sobrien /* Save the position in the string where we were the last time 658589857Sobrien we were at this open-group operator in case the group is 658689857Sobrien operated upon by a repetition operator, e.g., with `(a*)*b' 658789857Sobrien against `ab'; then we want to ignore where we are now in 658889857Sobrien the string in case this attempt to match fails. */ 658989857Sobrien old_regstart[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p]) 659089857Sobrien ? REG_UNSET (regstart[*p]) ? d : regstart[*p] 659189857Sobrien : regstart[*p]; 659289857Sobrien DEBUG_PRINT2 (" old_regstart: %d\n", 659389857Sobrien POINTER_TO_OFFSET (old_regstart[*p])); 659489857Sobrien 659589857Sobrien regstart[*p] = d; 659689857Sobrien DEBUG_PRINT2 (" regstart: %d\n", POINTER_TO_OFFSET (regstart[*p])); 659789857Sobrien 659889857Sobrien IS_ACTIVE (reg_info[*p]) = 1; 659989857Sobrien MATCHED_SOMETHING (reg_info[*p]) = 0; 660089857Sobrien 660189857Sobrien /* Clear this whenever we change the register activity status. */ 660289857Sobrien set_regs_matched_done = 0; 660389857Sobrien 660489857Sobrien /* This is the new highest active register. */ 660589857Sobrien highest_active_reg = *p; 660689857Sobrien 660789857Sobrien /* If nothing was active before, this is the new lowest active 660889857Sobrien register. */ 660989857Sobrien if (lowest_active_reg == NO_LOWEST_ACTIVE_REG) 661089857Sobrien lowest_active_reg = *p; 661189857Sobrien 661289857Sobrien /* Move past the register number and inner group count. */ 661389857Sobrien p += 2; 661489857Sobrien just_past_start_mem = p; 661589857Sobrien 661689857Sobrien break; 661789857Sobrien 661889857Sobrien 661989857Sobrien /* The stop_memory opcode represents the end of a group. Its 662089857Sobrien arguments are the same as start_memory's: the register 662189857Sobrien number, and the number of inner groups. */ 662289857Sobrien case stop_memory: 662389857Sobrien DEBUG_PRINT3 ("EXECUTING stop_memory %ld (%ld):\n", 662489857Sobrien (long int) *p, (long int) p[1]); 662589857Sobrien 662689857Sobrien /* We need to save the string position the last time we were at 662789857Sobrien this close-group operator in case the group is operated 662889857Sobrien upon by a repetition operator, e.g., with `((a*)*(b*)*)*' 662989857Sobrien against `aba'; then we want to ignore where we are now in 663089857Sobrien the string in case this attempt to match fails. */ 663189857Sobrien old_regend[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p]) 663289857Sobrien ? REG_UNSET (regend[*p]) ? d : regend[*p] 663389857Sobrien : regend[*p]; 663489857Sobrien DEBUG_PRINT2 (" old_regend: %d\n", 663589857Sobrien POINTER_TO_OFFSET (old_regend[*p])); 663689857Sobrien 663789857Sobrien regend[*p] = d; 663889857Sobrien DEBUG_PRINT2 (" regend: %d\n", POINTER_TO_OFFSET (regend[*p])); 663989857Sobrien 664089857Sobrien /* This register isn't active anymore. */ 664189857Sobrien IS_ACTIVE (reg_info[*p]) = 0; 664289857Sobrien 664389857Sobrien /* Clear this whenever we change the register activity status. */ 664489857Sobrien set_regs_matched_done = 0; 664589857Sobrien 664689857Sobrien /* If this was the only register active, nothing is active 664789857Sobrien anymore. */ 664889857Sobrien if (lowest_active_reg == highest_active_reg) 664989857Sobrien { 665089857Sobrien lowest_active_reg = NO_LOWEST_ACTIVE_REG; 665189857Sobrien highest_active_reg = NO_HIGHEST_ACTIVE_REG; 665289857Sobrien } 665389857Sobrien else 665489857Sobrien { /* We must scan for the new highest active register, since 665589857Sobrien it isn't necessarily one less than now: consider 665689857Sobrien (a(b)c(d(e)f)g). When group 3 ends, after the f), the 665789857Sobrien new highest active register is 1. */ 665889857Sobrien UCHAR_T r = *p - 1; 665989857Sobrien while (r > 0 && !IS_ACTIVE (reg_info[r])) 666089857Sobrien r--; 666189857Sobrien 666289857Sobrien /* If we end up at register zero, that means that we saved 666389857Sobrien the registers as the result of an `on_failure_jump', not 666489857Sobrien a `start_memory', and we jumped to past the innermost 666589857Sobrien `stop_memory'. For example, in ((.)*) we save 666689857Sobrien registers 1 and 2 as a result of the *, but when we pop 666789857Sobrien back to the second ), we are at the stop_memory 1. 666889857Sobrien Thus, nothing is active. */ 666989857Sobrien if (r == 0) 667089857Sobrien { 667189857Sobrien lowest_active_reg = NO_LOWEST_ACTIVE_REG; 667289857Sobrien highest_active_reg = NO_HIGHEST_ACTIVE_REG; 667389857Sobrien } 667489857Sobrien else 667589857Sobrien highest_active_reg = r; 667689857Sobrien } 667789857Sobrien 667889857Sobrien /* If just failed to match something this time around with a 667989857Sobrien group that's operated on by a repetition operator, try to 668089857Sobrien force exit from the ``loop'', and restore the register 668189857Sobrien information for this group that we had before trying this 668289857Sobrien last match. */ 668389857Sobrien if ((!MATCHED_SOMETHING (reg_info[*p]) 668489857Sobrien || just_past_start_mem == p - 1) 668589857Sobrien && (p + 2) < pend) 668689857Sobrien { 668789857Sobrien boolean is_a_jump_n = false; 668889857Sobrien 668989857Sobrien p1 = p + 2; 669089857Sobrien mcnt = 0; 669189857Sobrien switch ((re_opcode_t) *p1++) 669289857Sobrien { 669389857Sobrien case jump_n: 669489857Sobrien is_a_jump_n = true; 669589857Sobrien case pop_failure_jump: 669689857Sobrien case maybe_pop_jump: 669789857Sobrien case jump: 669889857Sobrien case dummy_failure_jump: 669989857Sobrien EXTRACT_NUMBER_AND_INCR (mcnt, p1); 670089857Sobrien if (is_a_jump_n) 670189857Sobrien p1 += OFFSET_ADDRESS_SIZE; 670289857Sobrien break; 670389857Sobrien 670489857Sobrien default: 670589857Sobrien /* do nothing */ ; 670689857Sobrien } 670789857Sobrien p1 += mcnt; 670889857Sobrien 670989857Sobrien /* If the next operation is a jump backwards in the pattern 671089857Sobrien to an on_failure_jump right before the start_memory 671189857Sobrien corresponding to this stop_memory, exit from the loop 671289857Sobrien by forcing a failure after pushing on the stack the 671389857Sobrien on_failure_jump's jump in the pattern, and d. */ 671489857Sobrien if (mcnt < 0 && (re_opcode_t) *p1 == on_failure_jump 671589857Sobrien && (re_opcode_t) p1[1+OFFSET_ADDRESS_SIZE] == start_memory 671689857Sobrien && p1[2+OFFSET_ADDRESS_SIZE] == *p) 671789857Sobrien { 671889857Sobrien /* If this group ever matched anything, then restore 671989857Sobrien what its registers were before trying this last 672089857Sobrien failed match, e.g., with `(a*)*b' against `ab' for 672189857Sobrien regstart[1], and, e.g., with `((a*)*(b*)*)*' 672289857Sobrien against `aba' for regend[3]. 672389857Sobrien 672489857Sobrien Also restore the registers for inner groups for, 672589857Sobrien e.g., `((a*)(b*))*' against `aba' (register 3 would 672689857Sobrien otherwise get trashed). */ 672789857Sobrien 672889857Sobrien if (EVER_MATCHED_SOMETHING (reg_info[*p])) 672989857Sobrien { 673089857Sobrien unsigned r; 673189857Sobrien 673289857Sobrien EVER_MATCHED_SOMETHING (reg_info[*p]) = 0; 673389857Sobrien 673489857Sobrien /* Restore this and inner groups' (if any) registers. */ 673589857Sobrien for (r = *p; r < (unsigned) *p + (unsigned) *(p + 1); 673689857Sobrien r++) 673789857Sobrien { 673889857Sobrien regstart[r] = old_regstart[r]; 673989857Sobrien 674089857Sobrien /* xx why this test? */ 674189857Sobrien if (old_regend[r] >= regstart[r]) 674289857Sobrien regend[r] = old_regend[r]; 674389857Sobrien } 674489857Sobrien } 674589857Sobrien p1++; 674689857Sobrien EXTRACT_NUMBER_AND_INCR (mcnt, p1); 674789857Sobrien PUSH_FAILURE_POINT (p1 + mcnt, d, -2); 674889857Sobrien 674989857Sobrien goto fail; 675089857Sobrien } 675189857Sobrien } 675289857Sobrien 675389857Sobrien /* Move past the register number and the inner group count. */ 675489857Sobrien p += 2; 675589857Sobrien break; 675689857Sobrien 675789857Sobrien 675889857Sobrien /* \<digit> has been turned into a `duplicate' command which is 675989857Sobrien followed by the numeric value of <digit> as the register number. */ 676089857Sobrien case duplicate: 676189857Sobrien { 676289857Sobrien register const CHAR_T *d2, *dend2; 676389857Sobrien int regno = *p++; /* Get which register to match against. */ 676489857Sobrien DEBUG_PRINT2 ("EXECUTING duplicate %d.\n", regno); 676589857Sobrien 676689857Sobrien /* Can't back reference a group which we've never matched. */ 676789857Sobrien if (REG_UNSET (regstart[regno]) || REG_UNSET (regend[regno])) 676889857Sobrien goto fail; 676989857Sobrien 677089857Sobrien /* Where in input to try to start matching. */ 677189857Sobrien d2 = regstart[regno]; 677289857Sobrien 677389857Sobrien /* Where to stop matching; if both the place to start and 677489857Sobrien the place to stop matching are in the same string, then 677589857Sobrien set to the place to stop, otherwise, for now have to use 677689857Sobrien the end of the first string. */ 677789857Sobrien 677889857Sobrien dend2 = ((FIRST_STRING_P (regstart[regno]) 677989857Sobrien == FIRST_STRING_P (regend[regno])) 678089857Sobrien ? regend[regno] : end_match_1); 678189857Sobrien for (;;) 678289857Sobrien { 678389857Sobrien /* If necessary, advance to next segment in register 678489857Sobrien contents. */ 678589857Sobrien while (d2 == dend2) 678689857Sobrien { 678789857Sobrien if (dend2 == end_match_2) break; 678889857Sobrien if (dend2 == regend[regno]) break; 678989857Sobrien 679089857Sobrien /* End of string1 => advance to string2. */ 679189857Sobrien d2 = string2; 679289857Sobrien dend2 = regend[regno]; 679389857Sobrien } 679489857Sobrien /* At end of register contents => success */ 679589857Sobrien if (d2 == dend2) break; 679689857Sobrien 679789857Sobrien /* If necessary, advance to next segment in data. */ 679889857Sobrien PREFETCH (); 679989857Sobrien 680089857Sobrien /* How many characters left in this segment to match. */ 680189857Sobrien mcnt = dend - d; 680289857Sobrien 680389857Sobrien /* Want how many consecutive characters we can match in 680489857Sobrien one shot, so, if necessary, adjust the count. */ 680589857Sobrien if (mcnt > dend2 - d2) 680689857Sobrien mcnt = dend2 - d2; 680789857Sobrien 680889857Sobrien /* Compare that many; failure if mismatch, else move 680989857Sobrien past them. */ 681089857Sobrien if (translate 681189857Sobrien ? PREFIX(bcmp_translate) (d, d2, mcnt, translate) 681289857Sobrien : memcmp (d, d2, mcnt*sizeof(UCHAR_T))) 681389857Sobrien goto fail; 681489857Sobrien d += mcnt, d2 += mcnt; 681589857Sobrien 681689857Sobrien /* Do this because we've match some characters. */ 681789857Sobrien SET_REGS_MATCHED (); 681889857Sobrien } 681989857Sobrien } 682089857Sobrien break; 682189857Sobrien 682289857Sobrien 682389857Sobrien /* begline matches the empty string at the beginning of the string 682489857Sobrien (unless `not_bol' is set in `bufp'), and, if 682589857Sobrien `newline_anchor' is set, after newlines. */ 682689857Sobrien case begline: 682789857Sobrien DEBUG_PRINT1 ("EXECUTING begline.\n"); 682889857Sobrien 682989857Sobrien if (AT_STRINGS_BEG (d)) 683089857Sobrien { 683189857Sobrien if (!bufp->not_bol) break; 683289857Sobrien } 683389857Sobrien else if (d[-1] == '\n' && bufp->newline_anchor) 683489857Sobrien { 683589857Sobrien break; 683689857Sobrien } 683789857Sobrien /* In all other cases, we fail. */ 683889857Sobrien goto fail; 683989857Sobrien 684089857Sobrien 684189857Sobrien /* endline is the dual of begline. */ 684289857Sobrien case endline: 684389857Sobrien DEBUG_PRINT1 ("EXECUTING endline.\n"); 684489857Sobrien 684589857Sobrien if (AT_STRINGS_END (d)) 684689857Sobrien { 684789857Sobrien if (!bufp->not_eol) break; 684889857Sobrien } 684989857Sobrien 685089857Sobrien /* We have to ``prefetch'' the next character. */ 685189857Sobrien else if ((d == end1 ? *string2 : *d) == '\n' 685289857Sobrien && bufp->newline_anchor) 685389857Sobrien { 685489857Sobrien break; 685589857Sobrien } 685689857Sobrien goto fail; 685789857Sobrien 685889857Sobrien 685989857Sobrien /* Match at the very beginning of the data. */ 686089857Sobrien case begbuf: 686189857Sobrien DEBUG_PRINT1 ("EXECUTING begbuf.\n"); 686289857Sobrien if (AT_STRINGS_BEG (d)) 686389857Sobrien break; 686489857Sobrien goto fail; 686589857Sobrien 686689857Sobrien 686789857Sobrien /* Match at the very end of the data. */ 686889857Sobrien case endbuf: 686989857Sobrien DEBUG_PRINT1 ("EXECUTING endbuf.\n"); 687089857Sobrien if (AT_STRINGS_END (d)) 687189857Sobrien break; 687289857Sobrien goto fail; 687389857Sobrien 687489857Sobrien 687589857Sobrien /* on_failure_keep_string_jump is used to optimize `.*\n'. It 687689857Sobrien pushes NULL as the value for the string on the stack. Then 687789857Sobrien `pop_failure_point' will keep the current value for the 687889857Sobrien string, instead of restoring it. To see why, consider 687989857Sobrien matching `foo\nbar' against `.*\n'. The .* matches the foo; 688089857Sobrien then the . fails against the \n. But the next thing we want 688189857Sobrien to do is match the \n against the \n; if we restored the 688289857Sobrien string value, we would be back at the foo. 688389857Sobrien 688489857Sobrien Because this is used only in specific cases, we don't need to 688589857Sobrien check all the things that `on_failure_jump' does, to make 688689857Sobrien sure the right things get saved on the stack. Hence we don't 688789857Sobrien share its code. The only reason to push anything on the 688889857Sobrien stack at all is that otherwise we would have to change 688989857Sobrien `anychar's code to do something besides goto fail in this 689089857Sobrien case; that seems worse than this. */ 689189857Sobrien case on_failure_keep_string_jump: 689289857Sobrien DEBUG_PRINT1 ("EXECUTING on_failure_keep_string_jump"); 689389857Sobrien 689489857Sobrien EXTRACT_NUMBER_AND_INCR (mcnt, p); 689589857Sobrien#ifdef _LIBC 689689857Sobrien DEBUG_PRINT3 (" %d (to %p):\n", mcnt, p + mcnt); 689789857Sobrien#else 689889857Sobrien DEBUG_PRINT3 (" %d (to 0x%x):\n", mcnt, p + mcnt); 689989857Sobrien#endif 690089857Sobrien 690189857Sobrien PUSH_FAILURE_POINT (p + mcnt, NULL, -2); 690289857Sobrien break; 690389857Sobrien 690489857Sobrien 690589857Sobrien /* Uses of on_failure_jump: 690689857Sobrien 690789857Sobrien Each alternative starts with an on_failure_jump that points 690889857Sobrien to the beginning of the next alternative. Each alternative 690989857Sobrien except the last ends with a jump that in effect jumps past 691089857Sobrien the rest of the alternatives. (They really jump to the 691189857Sobrien ending jump of the following alternative, because tensioning 691289857Sobrien these jumps is a hassle.) 691389857Sobrien 691489857Sobrien Repeats start with an on_failure_jump that points past both 691589857Sobrien the repetition text and either the following jump or 691689857Sobrien pop_failure_jump back to this on_failure_jump. */ 691789857Sobrien case on_failure_jump: 691889857Sobrien on_failure: 691989857Sobrien DEBUG_PRINT1 ("EXECUTING on_failure_jump"); 692089857Sobrien 692189857Sobrien EXTRACT_NUMBER_AND_INCR (mcnt, p); 692289857Sobrien#ifdef _LIBC 692389857Sobrien DEBUG_PRINT3 (" %d (to %p)", mcnt, p + mcnt); 692489857Sobrien#else 692589857Sobrien DEBUG_PRINT3 (" %d (to 0x%x)", mcnt, p + mcnt); 692689857Sobrien#endif 692789857Sobrien 692889857Sobrien /* If this on_failure_jump comes right before a group (i.e., 692989857Sobrien the original * applied to a group), save the information 693089857Sobrien for that group and all inner ones, so that if we fail back 693189857Sobrien to this point, the group's information will be correct. 693289857Sobrien For example, in \(a*\)*\1, we need the preceding group, 693389857Sobrien and in \(zz\(a*\)b*\)\2, we need the inner group. */ 693489857Sobrien 693589857Sobrien /* We can't use `p' to check ahead because we push 693689857Sobrien a failure point to `p + mcnt' after we do this. */ 693789857Sobrien p1 = p; 693889857Sobrien 693989857Sobrien /* We need to skip no_op's before we look for the 694089857Sobrien start_memory in case this on_failure_jump is happening as 694189857Sobrien the result of a completed succeed_n, as in \(a\)\{1,3\}b\1 694289857Sobrien against aba. */ 694389857Sobrien while (p1 < pend && (re_opcode_t) *p1 == no_op) 694489857Sobrien p1++; 694589857Sobrien 694689857Sobrien if (p1 < pend && (re_opcode_t) *p1 == start_memory) 694789857Sobrien { 694889857Sobrien /* We have a new highest active register now. This will 694989857Sobrien get reset at the start_memory we are about to get to, 695089857Sobrien but we will have saved all the registers relevant to 695189857Sobrien this repetition op, as described above. */ 695289857Sobrien highest_active_reg = *(p1 + 1) + *(p1 + 2); 695389857Sobrien if (lowest_active_reg == NO_LOWEST_ACTIVE_REG) 695489857Sobrien lowest_active_reg = *(p1 + 1); 695589857Sobrien } 695689857Sobrien 695789857Sobrien DEBUG_PRINT1 (":\n"); 695889857Sobrien PUSH_FAILURE_POINT (p + mcnt, d, -2); 695989857Sobrien break; 696089857Sobrien 696189857Sobrien 696289857Sobrien /* A smart repeat ends with `maybe_pop_jump'. 696389857Sobrien We change it to either `pop_failure_jump' or `jump'. */ 696489857Sobrien case maybe_pop_jump: 696589857Sobrien EXTRACT_NUMBER_AND_INCR (mcnt, p); 696689857Sobrien DEBUG_PRINT2 ("EXECUTING maybe_pop_jump %d.\n", mcnt); 696789857Sobrien { 696889857Sobrien register UCHAR_T *p2 = p; 696989857Sobrien 697089857Sobrien /* Compare the beginning of the repeat with what in the 697189857Sobrien pattern follows its end. If we can establish that there 697289857Sobrien is nothing that they would both match, i.e., that we 697389857Sobrien would have to backtrack because of (as in, e.g., `a*a') 697489857Sobrien then we can change to pop_failure_jump, because we'll 697589857Sobrien never have to backtrack. 697689857Sobrien 697789857Sobrien This is not true in the case of alternatives: in 697889857Sobrien `(a|ab)*' we do need to backtrack to the `ab' alternative 697989857Sobrien (e.g., if the string was `ab'). But instead of trying to 698089857Sobrien detect that here, the alternative has put on a dummy 698189857Sobrien failure point which is what we will end up popping. */ 698289857Sobrien 698389857Sobrien /* Skip over open/close-group commands. 698489857Sobrien If what follows this loop is a ...+ construct, 698589857Sobrien look at what begins its body, since we will have to 698689857Sobrien match at least one of that. */ 698789857Sobrien while (1) 698889857Sobrien { 698989857Sobrien if (p2 + 2 < pend 699089857Sobrien && ((re_opcode_t) *p2 == stop_memory 699189857Sobrien || (re_opcode_t) *p2 == start_memory)) 699289857Sobrien p2 += 3; 699389857Sobrien else if (p2 + 2 + 2 * OFFSET_ADDRESS_SIZE < pend 699489857Sobrien && (re_opcode_t) *p2 == dummy_failure_jump) 699589857Sobrien p2 += 2 + 2 * OFFSET_ADDRESS_SIZE; 699689857Sobrien else 699789857Sobrien break; 699889857Sobrien } 699989857Sobrien 700089857Sobrien p1 = p + mcnt; 700189857Sobrien /* p1[0] ... p1[2] are the `on_failure_jump' corresponding 700289857Sobrien to the `maybe_finalize_jump' of this case. Examine what 700389857Sobrien follows. */ 700489857Sobrien 700589857Sobrien /* If we're at the end of the pattern, we can change. */ 700689857Sobrien if (p2 == pend) 700789857Sobrien { 700889857Sobrien /* Consider what happens when matching ":\(.*\)" 700989857Sobrien against ":/". I don't really understand this code 701089857Sobrien yet. */ 701189857Sobrien p[-(1+OFFSET_ADDRESS_SIZE)] = (UCHAR_T) 701289857Sobrien pop_failure_jump; 701389857Sobrien DEBUG_PRINT1 701489857Sobrien (" End of pattern: change to `pop_failure_jump'.\n"); 701589857Sobrien } 701689857Sobrien 701789857Sobrien else if ((re_opcode_t) *p2 == exactn 701889857Sobrien#ifdef MBS_SUPPORT 701989857Sobrien || (re_opcode_t) *p2 == exactn_bin 702089857Sobrien#endif 702189857Sobrien || (bufp->newline_anchor && (re_opcode_t) *p2 == endline)) 702289857Sobrien { 702389857Sobrien register UCHAR_T c 702489857Sobrien = *p2 == (UCHAR_T) endline ? '\n' : p2[2]; 702589857Sobrien 702689857Sobrien if (((re_opcode_t) p1[1+OFFSET_ADDRESS_SIZE] == exactn 702789857Sobrien#ifdef MBS_SUPPORT 702889857Sobrien || (re_opcode_t) p1[1+OFFSET_ADDRESS_SIZE] == exactn_bin 702989857Sobrien#endif 703089857Sobrien ) && p1[3+OFFSET_ADDRESS_SIZE] != c) 703189857Sobrien { 703289857Sobrien p[-(1+OFFSET_ADDRESS_SIZE)] = (UCHAR_T) 703389857Sobrien pop_failure_jump; 703489857Sobrien#ifdef WCHAR 703589857Sobrien DEBUG_PRINT3 (" %C != %C => pop_failure_jump.\n", 703689857Sobrien (wint_t) c, 703789857Sobrien (wint_t) p1[3+OFFSET_ADDRESS_SIZE]); 703889857Sobrien#else 703989857Sobrien DEBUG_PRINT3 (" %c != %c => pop_failure_jump.\n", 704089857Sobrien (char) c, 704189857Sobrien (char) p1[3+OFFSET_ADDRESS_SIZE]); 704289857Sobrien#endif 704389857Sobrien } 704489857Sobrien 704589857Sobrien#ifndef WCHAR 704689857Sobrien else if ((re_opcode_t) p1[3] == charset 704789857Sobrien || (re_opcode_t) p1[3] == charset_not) 704889857Sobrien { 7049218822Sdim int negate = (re_opcode_t) p1[3] == charset_not; 705089857Sobrien 705189857Sobrien if (c < (unsigned) (p1[4] * BYTEWIDTH) 705289857Sobrien && p1[5 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH))) 7053218822Sdim negate = !negate; 705489857Sobrien 7055218822Sdim /* `negate' is equal to 1 if c would match, which means 705689857Sobrien that we can't change to pop_failure_jump. */ 7057218822Sdim if (!negate) 705889857Sobrien { 705989857Sobrien p[-3] = (unsigned char) pop_failure_jump; 706089857Sobrien DEBUG_PRINT1 (" No match => pop_failure_jump.\n"); 706189857Sobrien } 706289857Sobrien } 706389857Sobrien#endif /* not WCHAR */ 706489857Sobrien } 706589857Sobrien#ifndef WCHAR 706689857Sobrien else if ((re_opcode_t) *p2 == charset) 706789857Sobrien { 706889857Sobrien /* We win if the first character of the loop is not part 706989857Sobrien of the charset. */ 707089857Sobrien if ((re_opcode_t) p1[3] == exactn 707189857Sobrien && ! ((int) p2[1] * BYTEWIDTH > (int) p1[5] 707289857Sobrien && (p2[2 + p1[5] / BYTEWIDTH] 707389857Sobrien & (1 << (p1[5] % BYTEWIDTH))))) 707489857Sobrien { 707589857Sobrien p[-3] = (unsigned char) pop_failure_jump; 707689857Sobrien DEBUG_PRINT1 (" No match => pop_failure_jump.\n"); 707789857Sobrien } 707889857Sobrien 707989857Sobrien else if ((re_opcode_t) p1[3] == charset_not) 708089857Sobrien { 708189857Sobrien int idx; 708289857Sobrien /* We win if the charset_not inside the loop 708389857Sobrien lists every character listed in the charset after. */ 708489857Sobrien for (idx = 0; idx < (int) p2[1]; idx++) 708589857Sobrien if (! (p2[2 + idx] == 0 708689857Sobrien || (idx < (int) p1[4] 708789857Sobrien && ((p2[2 + idx] & ~ p1[5 + idx]) == 0)))) 708889857Sobrien break; 708989857Sobrien 709089857Sobrien if (idx == p2[1]) 709189857Sobrien { 709289857Sobrien p[-3] = (unsigned char) pop_failure_jump; 709389857Sobrien DEBUG_PRINT1 (" No match => pop_failure_jump.\n"); 709489857Sobrien } 709589857Sobrien } 709689857Sobrien else if ((re_opcode_t) p1[3] == charset) 709789857Sobrien { 709889857Sobrien int idx; 709989857Sobrien /* We win if the charset inside the loop 710089857Sobrien has no overlap with the one after the loop. */ 710189857Sobrien for (idx = 0; 710289857Sobrien idx < (int) p2[1] && idx < (int) p1[4]; 710389857Sobrien idx++) 710489857Sobrien if ((p2[2 + idx] & p1[5 + idx]) != 0) 710589857Sobrien break; 710689857Sobrien 710789857Sobrien if (idx == p2[1] || idx == p1[4]) 710889857Sobrien { 710989857Sobrien p[-3] = (unsigned char) pop_failure_jump; 711089857Sobrien DEBUG_PRINT1 (" No match => pop_failure_jump.\n"); 711189857Sobrien } 711289857Sobrien } 711389857Sobrien } 711489857Sobrien#endif /* not WCHAR */ 711589857Sobrien } 711689857Sobrien p -= OFFSET_ADDRESS_SIZE; /* Point at relative address again. */ 711789857Sobrien if ((re_opcode_t) p[-1] != pop_failure_jump) 711889857Sobrien { 711989857Sobrien p[-1] = (UCHAR_T) jump; 712089857Sobrien DEBUG_PRINT1 (" Match => jump.\n"); 712189857Sobrien goto unconditional_jump; 712289857Sobrien } 712389857Sobrien /* Note fall through. */ 712489857Sobrien 712589857Sobrien 712689857Sobrien /* The end of a simple repeat has a pop_failure_jump back to 712789857Sobrien its matching on_failure_jump, where the latter will push a 712889857Sobrien failure point. The pop_failure_jump takes off failure 712989857Sobrien points put on by this pop_failure_jump's matching 713089857Sobrien on_failure_jump; we got through the pattern to here from the 713189857Sobrien matching on_failure_jump, so didn't fail. */ 713289857Sobrien case pop_failure_jump: 713389857Sobrien { 713489857Sobrien /* We need to pass separate storage for the lowest and 713589857Sobrien highest registers, even though we don't care about the 713689857Sobrien actual values. Otherwise, we will restore only one 713789857Sobrien register from the stack, since lowest will == highest in 713889857Sobrien `pop_failure_point'. */ 713989857Sobrien active_reg_t dummy_low_reg, dummy_high_reg; 714089857Sobrien UCHAR_T *pdummy = NULL; 714189857Sobrien const CHAR_T *sdummy = NULL; 714289857Sobrien 714389857Sobrien DEBUG_PRINT1 ("EXECUTING pop_failure_jump.\n"); 714489857Sobrien POP_FAILURE_POINT (sdummy, pdummy, 714589857Sobrien dummy_low_reg, dummy_high_reg, 714689857Sobrien reg_dummy, reg_dummy, reg_info_dummy); 714789857Sobrien } 714889857Sobrien /* Note fall through. */ 714989857Sobrien 715089857Sobrien unconditional_jump: 715189857Sobrien#ifdef _LIBC 715289857Sobrien DEBUG_PRINT2 ("\n%p: ", p); 715389857Sobrien#else 715489857Sobrien DEBUG_PRINT2 ("\n0x%x: ", p); 715589857Sobrien#endif 715689857Sobrien /* Note fall through. */ 715789857Sobrien 715889857Sobrien /* Unconditionally jump (without popping any failure points). */ 715989857Sobrien case jump: 716089857Sobrien EXTRACT_NUMBER_AND_INCR (mcnt, p); /* Get the amount to jump. */ 716189857Sobrien DEBUG_PRINT2 ("EXECUTING jump %d ", mcnt); 716289857Sobrien p += mcnt; /* Do the jump. */ 716389857Sobrien#ifdef _LIBC 716489857Sobrien DEBUG_PRINT2 ("(to %p).\n", p); 716589857Sobrien#else 716689857Sobrien DEBUG_PRINT2 ("(to 0x%x).\n", p); 716789857Sobrien#endif 716889857Sobrien break; 716989857Sobrien 717089857Sobrien 717189857Sobrien /* We need this opcode so we can detect where alternatives end 717289857Sobrien in `group_match_null_string_p' et al. */ 717389857Sobrien case jump_past_alt: 717489857Sobrien DEBUG_PRINT1 ("EXECUTING jump_past_alt.\n"); 717589857Sobrien goto unconditional_jump; 717689857Sobrien 717789857Sobrien 717889857Sobrien /* Normally, the on_failure_jump pushes a failure point, which 717989857Sobrien then gets popped at pop_failure_jump. We will end up at 718089857Sobrien pop_failure_jump, also, and with a pattern of, say, `a+', we 718189857Sobrien are skipping over the on_failure_jump, so we have to push 718289857Sobrien something meaningless for pop_failure_jump to pop. */ 718389857Sobrien case dummy_failure_jump: 718489857Sobrien DEBUG_PRINT1 ("EXECUTING dummy_failure_jump.\n"); 718589857Sobrien /* It doesn't matter what we push for the string here. What 718689857Sobrien the code at `fail' tests is the value for the pattern. */ 718789857Sobrien PUSH_FAILURE_POINT (NULL, NULL, -2); 718889857Sobrien goto unconditional_jump; 718989857Sobrien 719089857Sobrien 719189857Sobrien /* At the end of an alternative, we need to push a dummy failure 719289857Sobrien point in case we are followed by a `pop_failure_jump', because 719389857Sobrien we don't want the failure point for the alternative to be 719489857Sobrien popped. For example, matching `(a|ab)*' against `aab' 719589857Sobrien requires that we match the `ab' alternative. */ 719689857Sobrien case push_dummy_failure: 719789857Sobrien DEBUG_PRINT1 ("EXECUTING push_dummy_failure.\n"); 719889857Sobrien /* See comments just above at `dummy_failure_jump' about the 719989857Sobrien two zeroes. */ 720089857Sobrien PUSH_FAILURE_POINT (NULL, NULL, -2); 720189857Sobrien break; 720289857Sobrien 720389857Sobrien /* Have to succeed matching what follows at least n times. 720489857Sobrien After that, handle like `on_failure_jump'. */ 720589857Sobrien case succeed_n: 720689857Sobrien EXTRACT_NUMBER (mcnt, p + OFFSET_ADDRESS_SIZE); 720789857Sobrien DEBUG_PRINT2 ("EXECUTING succeed_n %d.\n", mcnt); 720889857Sobrien 720989857Sobrien assert (mcnt >= 0); 721089857Sobrien /* Originally, this is how many times we HAVE to succeed. */ 721189857Sobrien if (mcnt > 0) 721289857Sobrien { 721389857Sobrien mcnt--; 721489857Sobrien p += OFFSET_ADDRESS_SIZE; 721589857Sobrien STORE_NUMBER_AND_INCR (p, mcnt); 721689857Sobrien#ifdef _LIBC 721789857Sobrien DEBUG_PRINT3 (" Setting %p to %d.\n", p - OFFSET_ADDRESS_SIZE 721889857Sobrien , mcnt); 721989857Sobrien#else 722089857Sobrien DEBUG_PRINT3 (" Setting 0x%x to %d.\n", p - OFFSET_ADDRESS_SIZE 722189857Sobrien , mcnt); 722289857Sobrien#endif 722389857Sobrien } 722489857Sobrien else if (mcnt == 0) 722589857Sobrien { 722689857Sobrien#ifdef _LIBC 722789857Sobrien DEBUG_PRINT2 (" Setting two bytes from %p to no_op.\n", 722889857Sobrien p + OFFSET_ADDRESS_SIZE); 722989857Sobrien#else 723089857Sobrien DEBUG_PRINT2 (" Setting two bytes from 0x%x to no_op.\n", 723189857Sobrien p + OFFSET_ADDRESS_SIZE); 723289857Sobrien#endif /* _LIBC */ 723389857Sobrien 723489857Sobrien#ifdef WCHAR 723589857Sobrien p[1] = (UCHAR_T) no_op; 723689857Sobrien#else 723789857Sobrien p[2] = (UCHAR_T) no_op; 723889857Sobrien p[3] = (UCHAR_T) no_op; 723989857Sobrien#endif /* WCHAR */ 724089857Sobrien goto on_failure; 724189857Sobrien } 724289857Sobrien break; 724389857Sobrien 724489857Sobrien case jump_n: 724589857Sobrien EXTRACT_NUMBER (mcnt, p + OFFSET_ADDRESS_SIZE); 724689857Sobrien DEBUG_PRINT2 ("EXECUTING jump_n %d.\n", mcnt); 724789857Sobrien 724889857Sobrien /* Originally, this is how many times we CAN jump. */ 724989857Sobrien if (mcnt) 725089857Sobrien { 725189857Sobrien mcnt--; 725289857Sobrien STORE_NUMBER (p + OFFSET_ADDRESS_SIZE, mcnt); 725389857Sobrien 725489857Sobrien#ifdef _LIBC 725589857Sobrien DEBUG_PRINT3 (" Setting %p to %d.\n", p + OFFSET_ADDRESS_SIZE, 725689857Sobrien mcnt); 725789857Sobrien#else 725889857Sobrien DEBUG_PRINT3 (" Setting 0x%x to %d.\n", p + OFFSET_ADDRESS_SIZE, 725989857Sobrien mcnt); 726089857Sobrien#endif /* _LIBC */ 726189857Sobrien goto unconditional_jump; 726289857Sobrien } 726389857Sobrien /* If don't have to jump any more, skip over the rest of command. */ 726489857Sobrien else 726589857Sobrien p += 2 * OFFSET_ADDRESS_SIZE; 726689857Sobrien break; 726789857Sobrien 726889857Sobrien case set_number_at: 726989857Sobrien { 727089857Sobrien DEBUG_PRINT1 ("EXECUTING set_number_at.\n"); 727189857Sobrien 727289857Sobrien EXTRACT_NUMBER_AND_INCR (mcnt, p); 727389857Sobrien p1 = p + mcnt; 727489857Sobrien EXTRACT_NUMBER_AND_INCR (mcnt, p); 727589857Sobrien#ifdef _LIBC 727689857Sobrien DEBUG_PRINT3 (" Setting %p to %d.\n", p1, mcnt); 727789857Sobrien#else 727889857Sobrien DEBUG_PRINT3 (" Setting 0x%x to %d.\n", p1, mcnt); 727989857Sobrien#endif 728089857Sobrien STORE_NUMBER (p1, mcnt); 728189857Sobrien break; 728289857Sobrien } 728389857Sobrien 728489857Sobrien#if 0 728589857Sobrien /* The DEC Alpha C compiler 3.x generates incorrect code for the 728689857Sobrien test WORDCHAR_P (d - 1) != WORDCHAR_P (d) in the expansion of 728789857Sobrien AT_WORD_BOUNDARY, so this code is disabled. Expanding the 728889857Sobrien macro and introducing temporary variables works around the bug. */ 728989857Sobrien 729089857Sobrien case wordbound: 729189857Sobrien DEBUG_PRINT1 ("EXECUTING wordbound.\n"); 729289857Sobrien if (AT_WORD_BOUNDARY (d)) 729389857Sobrien break; 729489857Sobrien goto fail; 729589857Sobrien 729689857Sobrien case notwordbound: 729789857Sobrien DEBUG_PRINT1 ("EXECUTING notwordbound.\n"); 729889857Sobrien if (AT_WORD_BOUNDARY (d)) 729989857Sobrien goto fail; 730089857Sobrien break; 730189857Sobrien#else 730289857Sobrien case wordbound: 730389857Sobrien { 730489857Sobrien boolean prevchar, thischar; 730589857Sobrien 730689857Sobrien DEBUG_PRINT1 ("EXECUTING wordbound.\n"); 730789857Sobrien if (AT_STRINGS_BEG (d) || AT_STRINGS_END (d)) 730889857Sobrien break; 730989857Sobrien 731089857Sobrien prevchar = WORDCHAR_P (d - 1); 731189857Sobrien thischar = WORDCHAR_P (d); 731289857Sobrien if (prevchar != thischar) 731389857Sobrien break; 731489857Sobrien goto fail; 731589857Sobrien } 731689857Sobrien 731789857Sobrien case notwordbound: 731889857Sobrien { 731989857Sobrien boolean prevchar, thischar; 732089857Sobrien 732189857Sobrien DEBUG_PRINT1 ("EXECUTING notwordbound.\n"); 732289857Sobrien if (AT_STRINGS_BEG (d) || AT_STRINGS_END (d)) 732389857Sobrien goto fail; 732489857Sobrien 732589857Sobrien prevchar = WORDCHAR_P (d - 1); 732689857Sobrien thischar = WORDCHAR_P (d); 732789857Sobrien if (prevchar != thischar) 732889857Sobrien goto fail; 732989857Sobrien break; 733089857Sobrien } 733189857Sobrien#endif 733289857Sobrien 733389857Sobrien case wordbeg: 733489857Sobrien DEBUG_PRINT1 ("EXECUTING wordbeg.\n"); 733589857Sobrien if (!AT_STRINGS_END (d) && WORDCHAR_P (d) 733689857Sobrien && (AT_STRINGS_BEG (d) || !WORDCHAR_P (d - 1))) 733789857Sobrien break; 733889857Sobrien goto fail; 733989857Sobrien 734089857Sobrien case wordend: 734189857Sobrien DEBUG_PRINT1 ("EXECUTING wordend.\n"); 734289857Sobrien if (!AT_STRINGS_BEG (d) && WORDCHAR_P (d - 1) 734389857Sobrien && (AT_STRINGS_END (d) || !WORDCHAR_P (d))) 734489857Sobrien break; 734589857Sobrien goto fail; 734689857Sobrien 734789857Sobrien#ifdef emacs 734889857Sobrien case before_dot: 734989857Sobrien DEBUG_PRINT1 ("EXECUTING before_dot.\n"); 735089857Sobrien if (PTR_CHAR_POS ((unsigned char *) d) >= point) 735189857Sobrien goto fail; 735289857Sobrien break; 735389857Sobrien 735489857Sobrien case at_dot: 735589857Sobrien DEBUG_PRINT1 ("EXECUTING at_dot.\n"); 735689857Sobrien if (PTR_CHAR_POS ((unsigned char *) d) != point) 735789857Sobrien goto fail; 735889857Sobrien break; 735989857Sobrien 736089857Sobrien case after_dot: 736189857Sobrien DEBUG_PRINT1 ("EXECUTING after_dot.\n"); 736289857Sobrien if (PTR_CHAR_POS ((unsigned char *) d) <= point) 736389857Sobrien goto fail; 736489857Sobrien break; 736589857Sobrien 736689857Sobrien case syntaxspec: 736789857Sobrien DEBUG_PRINT2 ("EXECUTING syntaxspec %d.\n", mcnt); 736889857Sobrien mcnt = *p++; 736989857Sobrien goto matchsyntax; 737089857Sobrien 737189857Sobrien case wordchar: 737289857Sobrien DEBUG_PRINT1 ("EXECUTING Emacs wordchar.\n"); 737389857Sobrien mcnt = (int) Sword; 737489857Sobrien matchsyntax: 737589857Sobrien PREFETCH (); 737689857Sobrien /* Can't use *d++ here; SYNTAX may be an unsafe macro. */ 737789857Sobrien d++; 737889857Sobrien if (SYNTAX (d[-1]) != (enum syntaxcode) mcnt) 737989857Sobrien goto fail; 738089857Sobrien SET_REGS_MATCHED (); 738189857Sobrien break; 738289857Sobrien 738389857Sobrien case notsyntaxspec: 738489857Sobrien DEBUG_PRINT2 ("EXECUTING notsyntaxspec %d.\n", mcnt); 738589857Sobrien mcnt = *p++; 738689857Sobrien goto matchnotsyntax; 738789857Sobrien 738889857Sobrien case notwordchar: 738989857Sobrien DEBUG_PRINT1 ("EXECUTING Emacs notwordchar.\n"); 739089857Sobrien mcnt = (int) Sword; 739189857Sobrien matchnotsyntax: 739289857Sobrien PREFETCH (); 739389857Sobrien /* Can't use *d++ here; SYNTAX may be an unsafe macro. */ 739489857Sobrien d++; 739589857Sobrien if (SYNTAX (d[-1]) == (enum syntaxcode) mcnt) 739689857Sobrien goto fail; 739789857Sobrien SET_REGS_MATCHED (); 739889857Sobrien break; 739989857Sobrien 740089857Sobrien#else /* not emacs */ 740189857Sobrien case wordchar: 740289857Sobrien DEBUG_PRINT1 ("EXECUTING non-Emacs wordchar.\n"); 740389857Sobrien PREFETCH (); 740489857Sobrien if (!WORDCHAR_P (d)) 740589857Sobrien goto fail; 740689857Sobrien SET_REGS_MATCHED (); 740789857Sobrien d++; 740889857Sobrien break; 740989857Sobrien 741089857Sobrien case notwordchar: 741189857Sobrien DEBUG_PRINT1 ("EXECUTING non-Emacs notwordchar.\n"); 741289857Sobrien PREFETCH (); 741389857Sobrien if (WORDCHAR_P (d)) 741489857Sobrien goto fail; 741589857Sobrien SET_REGS_MATCHED (); 741689857Sobrien d++; 741789857Sobrien break; 741889857Sobrien#endif /* not emacs */ 741989857Sobrien 742089857Sobrien default: 742189857Sobrien abort (); 742289857Sobrien } 742389857Sobrien continue; /* Successfully executed one pattern command; keep going. */ 742489857Sobrien 742589857Sobrien 742689857Sobrien /* We goto here if a matching operation fails. */ 742789857Sobrien fail: 742889857Sobrien if (!FAIL_STACK_EMPTY ()) 742989857Sobrien { /* A restart point is known. Restore to that state. */ 743089857Sobrien DEBUG_PRINT1 ("\nFAIL:\n"); 743189857Sobrien POP_FAILURE_POINT (d, p, 743289857Sobrien lowest_active_reg, highest_active_reg, 743389857Sobrien regstart, regend, reg_info); 743489857Sobrien 743589857Sobrien /* If this failure point is a dummy, try the next one. */ 743689857Sobrien if (!p) 743789857Sobrien goto fail; 743889857Sobrien 743989857Sobrien /* If we failed to the end of the pattern, don't examine *p. */ 744089857Sobrien assert (p <= pend); 744189857Sobrien if (p < pend) 744289857Sobrien { 744389857Sobrien boolean is_a_jump_n = false; 744489857Sobrien 744589857Sobrien /* If failed to a backwards jump that's part of a repetition 744689857Sobrien loop, need to pop this failure point and use the next one. */ 744789857Sobrien switch ((re_opcode_t) *p) 744889857Sobrien { 744989857Sobrien case jump_n: 745089857Sobrien is_a_jump_n = true; 745189857Sobrien case maybe_pop_jump: 745289857Sobrien case pop_failure_jump: 745389857Sobrien case jump: 745489857Sobrien p1 = p + 1; 745589857Sobrien EXTRACT_NUMBER_AND_INCR (mcnt, p1); 745689857Sobrien p1 += mcnt; 745789857Sobrien 745889857Sobrien if ((is_a_jump_n && (re_opcode_t) *p1 == succeed_n) 745989857Sobrien || (!is_a_jump_n 746089857Sobrien && (re_opcode_t) *p1 == on_failure_jump)) 746189857Sobrien goto fail; 746289857Sobrien break; 746389857Sobrien default: 746489857Sobrien /* do nothing */ ; 746589857Sobrien } 746689857Sobrien } 746789857Sobrien 746889857Sobrien if (d >= string1 && d <= end1) 746989857Sobrien dend = end_match_1; 747089857Sobrien } 747189857Sobrien else 747289857Sobrien break; /* Matching at this starting point really fails. */ 747389857Sobrien } /* for (;;) */ 747489857Sobrien 747589857Sobrien if (best_regs_set) 747689857Sobrien goto restore_best_regs; 747789857Sobrien 747889857Sobrien FREE_VARIABLES (); 747989857Sobrien 748089857Sobrien return -1; /* Failure to match. */ 748189857Sobrien} /* re_match_2 */ 748289857Sobrien 748389857Sobrien/* Subroutine definitions for re_match_2. */ 748489857Sobrien 748589857Sobrien 748689857Sobrien/* We are passed P pointing to a register number after a start_memory. 748789857Sobrien 748889857Sobrien Return true if the pattern up to the corresponding stop_memory can 748989857Sobrien match the empty string, and false otherwise. 749089857Sobrien 749189857Sobrien If we find the matching stop_memory, sets P to point to one past its number. 749289857Sobrien Otherwise, sets P to an undefined byte less than or equal to END. 749389857Sobrien 749489857Sobrien We don't handle duplicates properly (yet). */ 749589857Sobrien 749689857Sobrienstatic boolean 7497218822SdimPREFIX(group_match_null_string_p) (UCHAR_T **p, UCHAR_T *end, 7498218822Sdim PREFIX(register_info_type) *reg_info) 749989857Sobrien{ 750089857Sobrien int mcnt; 750189857Sobrien /* Point to after the args to the start_memory. */ 750289857Sobrien UCHAR_T *p1 = *p + 2; 750389857Sobrien 750489857Sobrien while (p1 < end) 750589857Sobrien { 750689857Sobrien /* Skip over opcodes that can match nothing, and return true or 750789857Sobrien false, as appropriate, when we get to one that can't, or to the 750889857Sobrien matching stop_memory. */ 750989857Sobrien 751089857Sobrien switch ((re_opcode_t) *p1) 751189857Sobrien { 751289857Sobrien /* Could be either a loop or a series of alternatives. */ 751389857Sobrien case on_failure_jump: 751489857Sobrien p1++; 751589857Sobrien EXTRACT_NUMBER_AND_INCR (mcnt, p1); 751689857Sobrien 751789857Sobrien /* If the next operation is not a jump backwards in the 751889857Sobrien pattern. */ 751989857Sobrien 752089857Sobrien if (mcnt >= 0) 752189857Sobrien { 752289857Sobrien /* Go through the on_failure_jumps of the alternatives, 752389857Sobrien seeing if any of the alternatives cannot match nothing. 752489857Sobrien The last alternative starts with only a jump, 752589857Sobrien whereas the rest start with on_failure_jump and end 752689857Sobrien with a jump, e.g., here is the pattern for `a|b|c': 752789857Sobrien 752889857Sobrien /on_failure_jump/0/6/exactn/1/a/jump_past_alt/0/6 752989857Sobrien /on_failure_jump/0/6/exactn/1/b/jump_past_alt/0/3 753089857Sobrien /exactn/1/c 753189857Sobrien 753289857Sobrien So, we have to first go through the first (n-1) 753389857Sobrien alternatives and then deal with the last one separately. */ 753489857Sobrien 753589857Sobrien 753689857Sobrien /* Deal with the first (n-1) alternatives, which start 753789857Sobrien with an on_failure_jump (see above) that jumps to right 753889857Sobrien past a jump_past_alt. */ 753989857Sobrien 754089857Sobrien while ((re_opcode_t) p1[mcnt-(1+OFFSET_ADDRESS_SIZE)] == 754189857Sobrien jump_past_alt) 754289857Sobrien { 754389857Sobrien /* `mcnt' holds how many bytes long the alternative 754489857Sobrien is, including the ending `jump_past_alt' and 754589857Sobrien its number. */ 754689857Sobrien 754789857Sobrien if (!PREFIX(alt_match_null_string_p) (p1, p1 + mcnt - 754889857Sobrien (1 + OFFSET_ADDRESS_SIZE), 754989857Sobrien reg_info)) 755089857Sobrien return false; 755189857Sobrien 755289857Sobrien /* Move to right after this alternative, including the 755389857Sobrien jump_past_alt. */ 755489857Sobrien p1 += mcnt; 755589857Sobrien 755689857Sobrien /* Break if it's the beginning of an n-th alternative 755789857Sobrien that doesn't begin with an on_failure_jump. */ 755889857Sobrien if ((re_opcode_t) *p1 != on_failure_jump) 755989857Sobrien break; 756089857Sobrien 756189857Sobrien /* Still have to check that it's not an n-th 756289857Sobrien alternative that starts with an on_failure_jump. */ 756389857Sobrien p1++; 756489857Sobrien EXTRACT_NUMBER_AND_INCR (mcnt, p1); 756589857Sobrien if ((re_opcode_t) p1[mcnt-(1+OFFSET_ADDRESS_SIZE)] != 756689857Sobrien jump_past_alt) 756789857Sobrien { 756889857Sobrien /* Get to the beginning of the n-th alternative. */ 756989857Sobrien p1 -= 1 + OFFSET_ADDRESS_SIZE; 757089857Sobrien break; 757189857Sobrien } 757289857Sobrien } 757389857Sobrien 757489857Sobrien /* Deal with the last alternative: go back and get number 757589857Sobrien of the `jump_past_alt' just before it. `mcnt' contains 757689857Sobrien the length of the alternative. */ 757789857Sobrien EXTRACT_NUMBER (mcnt, p1 - OFFSET_ADDRESS_SIZE); 757889857Sobrien 757989857Sobrien if (!PREFIX(alt_match_null_string_p) (p1, p1 + mcnt, reg_info)) 758089857Sobrien return false; 758189857Sobrien 758289857Sobrien p1 += mcnt; /* Get past the n-th alternative. */ 758389857Sobrien } /* if mcnt > 0 */ 758489857Sobrien break; 758589857Sobrien 758689857Sobrien 758789857Sobrien case stop_memory: 758889857Sobrien assert (p1[1] == **p); 758989857Sobrien *p = p1 + 2; 759089857Sobrien return true; 759189857Sobrien 759289857Sobrien 759389857Sobrien default: 759489857Sobrien if (!PREFIX(common_op_match_null_string_p) (&p1, end, reg_info)) 759589857Sobrien return false; 759689857Sobrien } 759789857Sobrien } /* while p1 < end */ 759889857Sobrien 759989857Sobrien return false; 760089857Sobrien} /* group_match_null_string_p */ 760189857Sobrien 760289857Sobrien 760389857Sobrien/* Similar to group_match_null_string_p, but doesn't deal with alternatives: 760489857Sobrien It expects P to be the first byte of a single alternative and END one 760589857Sobrien byte past the last. The alternative can contain groups. */ 760689857Sobrien 760789857Sobrienstatic boolean 7608218822SdimPREFIX(alt_match_null_string_p) (UCHAR_T *p, UCHAR_T *end, 7609218822Sdim PREFIX(register_info_type) *reg_info) 761089857Sobrien{ 761189857Sobrien int mcnt; 761289857Sobrien UCHAR_T *p1 = p; 761389857Sobrien 761489857Sobrien while (p1 < end) 761589857Sobrien { 761689857Sobrien /* Skip over opcodes that can match nothing, and break when we get 761789857Sobrien to one that can't. */ 761889857Sobrien 761989857Sobrien switch ((re_opcode_t) *p1) 762089857Sobrien { 762189857Sobrien /* It's a loop. */ 762289857Sobrien case on_failure_jump: 762389857Sobrien p1++; 762489857Sobrien EXTRACT_NUMBER_AND_INCR (mcnt, p1); 762589857Sobrien p1 += mcnt; 762689857Sobrien break; 762789857Sobrien 762889857Sobrien default: 762989857Sobrien if (!PREFIX(common_op_match_null_string_p) (&p1, end, reg_info)) 763089857Sobrien return false; 763189857Sobrien } 763289857Sobrien } /* while p1 < end */ 763389857Sobrien 763489857Sobrien return true; 763589857Sobrien} /* alt_match_null_string_p */ 763689857Sobrien 763789857Sobrien 763889857Sobrien/* Deals with the ops common to group_match_null_string_p and 763989857Sobrien alt_match_null_string_p. 764089857Sobrien 764189857Sobrien Sets P to one after the op and its arguments, if any. */ 764289857Sobrien 764389857Sobrienstatic boolean 7644218822SdimPREFIX(common_op_match_null_string_p) (UCHAR_T **p, UCHAR_T *end, 7645218822Sdim PREFIX(register_info_type) *reg_info) 764689857Sobrien{ 764789857Sobrien int mcnt; 764889857Sobrien boolean ret; 764989857Sobrien int reg_no; 765089857Sobrien UCHAR_T *p1 = *p; 765189857Sobrien 765289857Sobrien switch ((re_opcode_t) *p1++) 765389857Sobrien { 765489857Sobrien case no_op: 765589857Sobrien case begline: 765689857Sobrien case endline: 765789857Sobrien case begbuf: 765889857Sobrien case endbuf: 765989857Sobrien case wordbeg: 766089857Sobrien case wordend: 766189857Sobrien case wordbound: 766289857Sobrien case notwordbound: 766389857Sobrien#ifdef emacs 766489857Sobrien case before_dot: 766589857Sobrien case at_dot: 766689857Sobrien case after_dot: 766789857Sobrien#endif 766889857Sobrien break; 766989857Sobrien 767089857Sobrien case start_memory: 767189857Sobrien reg_no = *p1; 767289857Sobrien assert (reg_no > 0 && reg_no <= MAX_REGNUM); 767389857Sobrien ret = PREFIX(group_match_null_string_p) (&p1, end, reg_info); 767489857Sobrien 767589857Sobrien /* Have to set this here in case we're checking a group which 767689857Sobrien contains a group and a back reference to it. */ 767789857Sobrien 767889857Sobrien if (REG_MATCH_NULL_STRING_P (reg_info[reg_no]) == MATCH_NULL_UNSET_VALUE) 767989857Sobrien REG_MATCH_NULL_STRING_P (reg_info[reg_no]) = ret; 768089857Sobrien 768189857Sobrien if (!ret) 768289857Sobrien return false; 768389857Sobrien break; 768489857Sobrien 768589857Sobrien /* If this is an optimized succeed_n for zero times, make the jump. */ 768689857Sobrien case jump: 768789857Sobrien EXTRACT_NUMBER_AND_INCR (mcnt, p1); 768889857Sobrien if (mcnt >= 0) 768989857Sobrien p1 += mcnt; 769089857Sobrien else 769189857Sobrien return false; 769289857Sobrien break; 769389857Sobrien 769489857Sobrien case succeed_n: 769589857Sobrien /* Get to the number of times to succeed. */ 769689857Sobrien p1 += OFFSET_ADDRESS_SIZE; 769789857Sobrien EXTRACT_NUMBER_AND_INCR (mcnt, p1); 769889857Sobrien 769989857Sobrien if (mcnt == 0) 770089857Sobrien { 770189857Sobrien p1 -= 2 * OFFSET_ADDRESS_SIZE; 770289857Sobrien EXTRACT_NUMBER_AND_INCR (mcnt, p1); 770389857Sobrien p1 += mcnt; 770489857Sobrien } 770589857Sobrien else 770689857Sobrien return false; 770789857Sobrien break; 770889857Sobrien 770989857Sobrien case duplicate: 771089857Sobrien if (!REG_MATCH_NULL_STRING_P (reg_info[*p1])) 771189857Sobrien return false; 771289857Sobrien break; 771389857Sobrien 771489857Sobrien case set_number_at: 771589857Sobrien p1 += 2 * OFFSET_ADDRESS_SIZE; 771689857Sobrien 771789857Sobrien default: 771889857Sobrien /* All other opcodes mean we cannot match the empty string. */ 771989857Sobrien return false; 772089857Sobrien } 772189857Sobrien 772289857Sobrien *p = p1; 772389857Sobrien return true; 772489857Sobrien} /* common_op_match_null_string_p */ 772589857Sobrien 772689857Sobrien 772789857Sobrien/* Return zero if TRANSLATE[S1] and TRANSLATE[S2] are identical for LEN 772889857Sobrien bytes; nonzero otherwise. */ 772989857Sobrien 773089857Sobrienstatic int 7731218822SdimPREFIX(bcmp_translate) (const CHAR_T *s1, const CHAR_T *s2, register int len, 7732218822Sdim RE_TRANSLATE_TYPE translate) 773389857Sobrien{ 773489857Sobrien register const UCHAR_T *p1 = (const UCHAR_T *) s1; 773589857Sobrien register const UCHAR_T *p2 = (const UCHAR_T *) s2; 773689857Sobrien while (len) 773789857Sobrien { 773889857Sobrien#ifdef WCHAR 773989857Sobrien if (((*p1<=0xff)?translate[*p1++]:*p1++) 774089857Sobrien != ((*p2<=0xff)?translate[*p2++]:*p2++)) 774189857Sobrien return 1; 774289857Sobrien#else /* BYTE */ 774389857Sobrien if (translate[*p1++] != translate[*p2++]) return 1; 774489857Sobrien#endif /* WCHAR */ 774589857Sobrien len--; 774689857Sobrien } 774789857Sobrien return 0; 774889857Sobrien} 774989857Sobrien 775089857Sobrien 775189857Sobrien#else /* not INSIDE_RECURSION */ 775289857Sobrien 775389857Sobrien/* Entry points for GNU code. */ 775489857Sobrien 775589857Sobrien/* re_compile_pattern is the GNU regular expression compiler: it 775689857Sobrien compiles PATTERN (of length SIZE) and puts the result in BUFP. 775789857Sobrien Returns 0 if the pattern was valid, otherwise an error string. 775889857Sobrien 775989857Sobrien Assumes the `allocated' (and perhaps `buffer') and `translate' fields 776089857Sobrien are set in BUFP on entry. 776189857Sobrien 776289857Sobrien We call regex_compile to do the actual compilation. */ 776389857Sobrien 776489857Sobrienconst char * 7765218822Sdimre_compile_pattern (const char *pattern, size_t length, 7766218822Sdim struct re_pattern_buffer *bufp) 776789857Sobrien{ 776889857Sobrien reg_errcode_t ret; 776989857Sobrien 777089857Sobrien /* GNU code is written to assume at least RE_NREGS registers will be set 777189857Sobrien (and at least one extra will be -1). */ 777289857Sobrien bufp->regs_allocated = REGS_UNALLOCATED; 777389857Sobrien 777489857Sobrien /* And GNU code determines whether or not to get register information 777589857Sobrien by passing null for the REGS argument to re_match, etc., not by 777689857Sobrien setting no_sub. */ 777789857Sobrien bufp->no_sub = 0; 777889857Sobrien 777989857Sobrien /* Match anchors at newline. */ 778089857Sobrien bufp->newline_anchor = 1; 778189857Sobrien 778289857Sobrien# ifdef MBS_SUPPORT 778389857Sobrien if (MB_CUR_MAX != 1) 778489857Sobrien ret = wcs_regex_compile (pattern, length, re_syntax_options, bufp); 778589857Sobrien else 778689857Sobrien# endif 778789857Sobrien ret = byte_regex_compile (pattern, length, re_syntax_options, bufp); 778889857Sobrien 778989857Sobrien if (!ret) 779089857Sobrien return NULL; 7791130561Sobrien return gettext (re_error_msgid[(int) ret]); 779289857Sobrien} 779389857Sobrien#ifdef _LIBC 779489857Sobrienweak_alias (__re_compile_pattern, re_compile_pattern) 779589857Sobrien#endif 779689857Sobrien 779789857Sobrien/* Entry points compatible with 4.2 BSD regex library. We don't define 779889857Sobrien them unless specifically requested. */ 779989857Sobrien 780089857Sobrien#if defined _REGEX_RE_COMP || defined _LIBC 780189857Sobrien 780289857Sobrien/* BSD has one and only one pattern buffer. */ 780389857Sobrienstatic struct re_pattern_buffer re_comp_buf; 780489857Sobrien 780589857Sobrienchar * 780689857Sobrien#ifdef _LIBC 780789857Sobrien/* Make these definitions weak in libc, so POSIX programs can redefine 780889857Sobrien these names if they don't use our functions, and still use 780989857Sobrien regcomp/regexec below without link errors. */ 781089857Sobrienweak_function 781189857Sobrien#endif 7812218822Sdimre_comp (const char *s) 781389857Sobrien{ 781489857Sobrien reg_errcode_t ret; 781589857Sobrien 781689857Sobrien if (!s) 781789857Sobrien { 781889857Sobrien if (!re_comp_buf.buffer) 7819218822Sdim return (char *) gettext ("No previous regular expression"); 782089857Sobrien return 0; 782189857Sobrien } 782289857Sobrien 782389857Sobrien if (!re_comp_buf.buffer) 782489857Sobrien { 782589857Sobrien re_comp_buf.buffer = (unsigned char *) malloc (200); 782689857Sobrien if (re_comp_buf.buffer == NULL) 7827130561Sobrien return (char *) gettext (re_error_msgid[(int) REG_ESPACE]); 782889857Sobrien re_comp_buf.allocated = 200; 782989857Sobrien 783089857Sobrien re_comp_buf.fastmap = (char *) malloc (1 << BYTEWIDTH); 783189857Sobrien if (re_comp_buf.fastmap == NULL) 7832130561Sobrien return (char *) gettext (re_error_msgid[(int) REG_ESPACE]); 783389857Sobrien } 783489857Sobrien 783589857Sobrien /* Since `re_exec' always passes NULL for the `regs' argument, we 783689857Sobrien don't need to initialize the pattern buffer fields which affect it. */ 783789857Sobrien 783889857Sobrien /* Match anchors at newlines. */ 783989857Sobrien re_comp_buf.newline_anchor = 1; 784089857Sobrien 784189857Sobrien# ifdef MBS_SUPPORT 784289857Sobrien if (MB_CUR_MAX != 1) 784389857Sobrien ret = wcs_regex_compile (s, strlen (s), re_syntax_options, &re_comp_buf); 784489857Sobrien else 784589857Sobrien# endif 784689857Sobrien ret = byte_regex_compile (s, strlen (s), re_syntax_options, &re_comp_buf); 784789857Sobrien 784889857Sobrien if (!ret) 784989857Sobrien return NULL; 785089857Sobrien 785189857Sobrien /* Yes, we're discarding `const' here if !HAVE_LIBINTL. */ 7852130561Sobrien return (char *) gettext (re_error_msgid[(int) ret]); 785389857Sobrien} 785489857Sobrien 785589857Sobrien 785689857Sobrienint 785789857Sobrien#ifdef _LIBC 785889857Sobrienweak_function 785989857Sobrien#endif 7860218822Sdimre_exec (const char *s) 786189857Sobrien{ 786289857Sobrien const int len = strlen (s); 786389857Sobrien return 786489857Sobrien 0 <= re_search (&re_comp_buf, s, len, 0, len, (struct re_registers *) 0); 786589857Sobrien} 786689857Sobrien 786789857Sobrien#endif /* _REGEX_RE_COMP */ 786889857Sobrien 786989857Sobrien/* POSIX.2 functions. Don't define these for Emacs. */ 787089857Sobrien 787189857Sobrien#ifndef emacs 787289857Sobrien 787389857Sobrien/* regcomp takes a regular expression as a string and compiles it. 787489857Sobrien 787589857Sobrien PREG is a regex_t *. We do not expect any fields to be initialized, 787689857Sobrien since POSIX says we shouldn't. Thus, we set 787789857Sobrien 787889857Sobrien `buffer' to the compiled pattern; 787989857Sobrien `used' to the length of the compiled pattern; 788089857Sobrien `syntax' to RE_SYNTAX_POSIX_EXTENDED if the 788189857Sobrien REG_EXTENDED bit in CFLAGS is set; otherwise, to 788289857Sobrien RE_SYNTAX_POSIX_BASIC; 788389857Sobrien `newline_anchor' to REG_NEWLINE being set in CFLAGS; 788489857Sobrien `fastmap' to an allocated space for the fastmap; 788589857Sobrien `fastmap_accurate' to zero; 788689857Sobrien `re_nsub' to the number of subexpressions in PATTERN. 788789857Sobrien 788889857Sobrien PATTERN is the address of the pattern string. 788989857Sobrien 789089857Sobrien CFLAGS is a series of bits which affect compilation. 789189857Sobrien 789289857Sobrien If REG_EXTENDED is set, we use POSIX extended syntax; otherwise, we 789389857Sobrien use POSIX basic syntax. 789489857Sobrien 789589857Sobrien If REG_NEWLINE is set, then . and [^...] don't match newline. 789689857Sobrien Also, regexec will try a match beginning after every newline. 789789857Sobrien 789889857Sobrien If REG_ICASE is set, then we considers upper- and lowercase 789989857Sobrien versions of letters to be equivalent when matching. 790089857Sobrien 790189857Sobrien If REG_NOSUB is set, then when PREG is passed to regexec, that 790289857Sobrien routine will report only success or failure, and nothing about the 790389857Sobrien registers. 790489857Sobrien 790589857Sobrien It returns 0 if it succeeds, nonzero if it doesn't. (See regex.h for 790689857Sobrien the return codes and their meanings.) */ 790789857Sobrien 790889857Sobrienint 7909218822Sdimregcomp (regex_t *preg, const char *pattern, int cflags) 791089857Sobrien{ 791189857Sobrien reg_errcode_t ret; 791289857Sobrien reg_syntax_t syntax 791389857Sobrien = (cflags & REG_EXTENDED) ? 791489857Sobrien RE_SYNTAX_POSIX_EXTENDED : RE_SYNTAX_POSIX_BASIC; 791589857Sobrien 791689857Sobrien /* regex_compile will allocate the space for the compiled pattern. */ 791789857Sobrien preg->buffer = 0; 791889857Sobrien preg->allocated = 0; 791989857Sobrien preg->used = 0; 792089857Sobrien 792189857Sobrien /* Try to allocate space for the fastmap. */ 792289857Sobrien preg->fastmap = (char *) malloc (1 << BYTEWIDTH); 792389857Sobrien 792489857Sobrien if (cflags & REG_ICASE) 792589857Sobrien { 7926218822Sdim int i; 792789857Sobrien 792889857Sobrien preg->translate 792989857Sobrien = (RE_TRANSLATE_TYPE) malloc (CHAR_SET_SIZE 793089857Sobrien * sizeof (*(RE_TRANSLATE_TYPE)0)); 793189857Sobrien if (preg->translate == NULL) 793289857Sobrien return (int) REG_ESPACE; 793389857Sobrien 793489857Sobrien /* Map uppercase characters to corresponding lowercase ones. */ 793589857Sobrien for (i = 0; i < CHAR_SET_SIZE; i++) 7936218822Sdim preg->translate[i] = ISUPPER (i) ? TOLOWER (i) : i; 793789857Sobrien } 793889857Sobrien else 793989857Sobrien preg->translate = NULL; 794089857Sobrien 794189857Sobrien /* If REG_NEWLINE is set, newlines are treated differently. */ 794289857Sobrien if (cflags & REG_NEWLINE) 794389857Sobrien { /* REG_NEWLINE implies neither . nor [^...] match newline. */ 794489857Sobrien syntax &= ~RE_DOT_NEWLINE; 794589857Sobrien syntax |= RE_HAT_LISTS_NOT_NEWLINE; 794689857Sobrien /* It also changes the matching behavior. */ 794789857Sobrien preg->newline_anchor = 1; 794889857Sobrien } 794989857Sobrien else 795089857Sobrien preg->newline_anchor = 0; 795189857Sobrien 795289857Sobrien preg->no_sub = !!(cflags & REG_NOSUB); 795389857Sobrien 795489857Sobrien /* POSIX says a null character in the pattern terminates it, so we 795589857Sobrien can use strlen here in compiling the pattern. */ 795689857Sobrien# ifdef MBS_SUPPORT 795789857Sobrien if (MB_CUR_MAX != 1) 795889857Sobrien ret = wcs_regex_compile (pattern, strlen (pattern), syntax, preg); 795989857Sobrien else 796089857Sobrien# endif 796189857Sobrien ret = byte_regex_compile (pattern, strlen (pattern), syntax, preg); 796289857Sobrien 796389857Sobrien /* POSIX doesn't distinguish between an unmatched open-group and an 796489857Sobrien unmatched close-group: both are REG_EPAREN. */ 796589857Sobrien if (ret == REG_ERPAREN) ret = REG_EPAREN; 796689857Sobrien 796789857Sobrien if (ret == REG_NOERROR && preg->fastmap) 796889857Sobrien { 796989857Sobrien /* Compute the fastmap now, since regexec cannot modify the pattern 797089857Sobrien buffer. */ 797189857Sobrien if (re_compile_fastmap (preg) == -2) 797289857Sobrien { 797389857Sobrien /* Some error occurred while computing the fastmap, just forget 797489857Sobrien about it. */ 797589857Sobrien free (preg->fastmap); 797689857Sobrien preg->fastmap = NULL; 797789857Sobrien } 797889857Sobrien } 797989857Sobrien 798089857Sobrien return (int) ret; 798189857Sobrien} 798289857Sobrien#ifdef _LIBC 798389857Sobrienweak_alias (__regcomp, regcomp) 798489857Sobrien#endif 798589857Sobrien 798689857Sobrien 798789857Sobrien/* regexec searches for a given pattern, specified by PREG, in the 798889857Sobrien string STRING. 798989857Sobrien 799089857Sobrien If NMATCH is zero or REG_NOSUB was set in the cflags argument to 799189857Sobrien `regcomp', we ignore PMATCH. Otherwise, we assume PMATCH has at 799289857Sobrien least NMATCH elements, and we set them to the offsets of the 799389857Sobrien corresponding matched substrings. 799489857Sobrien 799589857Sobrien EFLAGS specifies `execution flags' which affect matching: if 799689857Sobrien REG_NOTBOL is set, then ^ does not match at the beginning of the 799789857Sobrien string; if REG_NOTEOL is set, then $ does not match at the end. 799889857Sobrien 799989857Sobrien We return 0 if we find a match and REG_NOMATCH if not. */ 800089857Sobrien 800189857Sobrienint 8002218822Sdimregexec (const regex_t *preg, const char *string, size_t nmatch, 8003218822Sdim regmatch_t pmatch[], int eflags) 800489857Sobrien{ 800589857Sobrien int ret; 800689857Sobrien struct re_registers regs; 800789857Sobrien regex_t private_preg; 800889857Sobrien int len = strlen (string); 800989857Sobrien boolean want_reg_info = !preg->no_sub && nmatch > 0; 801089857Sobrien 801189857Sobrien private_preg = *preg; 801289857Sobrien 801389857Sobrien private_preg.not_bol = !!(eflags & REG_NOTBOL); 801489857Sobrien private_preg.not_eol = !!(eflags & REG_NOTEOL); 801589857Sobrien 801689857Sobrien /* The user has told us exactly how many registers to return 801789857Sobrien information about, via `nmatch'. We have to pass that on to the 801889857Sobrien matching routines. */ 801989857Sobrien private_preg.regs_allocated = REGS_FIXED; 802089857Sobrien 802189857Sobrien if (want_reg_info) 802289857Sobrien { 802389857Sobrien regs.num_regs = nmatch; 802489857Sobrien regs.start = TALLOC (nmatch * 2, regoff_t); 802589857Sobrien if (regs.start == NULL) 802689857Sobrien return (int) REG_NOMATCH; 802789857Sobrien regs.end = regs.start + nmatch; 802889857Sobrien } 802989857Sobrien 803089857Sobrien /* Perform the searching operation. */ 803189857Sobrien ret = re_search (&private_preg, string, len, 803289857Sobrien /* start: */ 0, /* range: */ len, 803389857Sobrien want_reg_info ? ®s : (struct re_registers *) 0); 803489857Sobrien 803589857Sobrien /* Copy the register information to the POSIX structure. */ 803689857Sobrien if (want_reg_info) 803789857Sobrien { 803889857Sobrien if (ret >= 0) 803989857Sobrien { 804089857Sobrien unsigned r; 804189857Sobrien 804289857Sobrien for (r = 0; r < nmatch; r++) 804389857Sobrien { 804489857Sobrien pmatch[r].rm_so = regs.start[r]; 804589857Sobrien pmatch[r].rm_eo = regs.end[r]; 804689857Sobrien } 804789857Sobrien } 804889857Sobrien 804989857Sobrien /* If we needed the temporary register info, free the space now. */ 805089857Sobrien free (regs.start); 805189857Sobrien } 805289857Sobrien 805389857Sobrien /* We want zero return to mean success, unlike `re_search'. */ 805489857Sobrien return ret >= 0 ? (int) REG_NOERROR : (int) REG_NOMATCH; 805589857Sobrien} 805689857Sobrien#ifdef _LIBC 805789857Sobrienweak_alias (__regexec, regexec) 805889857Sobrien#endif 805989857Sobrien 806089857Sobrien 806189857Sobrien/* Returns a message corresponding to an error code, ERRCODE, returned 806289857Sobrien from either regcomp or regexec. We don't use PREG here. */ 806389857Sobrien 806489857Sobriensize_t 8065218822Sdimregerror (int errcode, const regex_t *preg ATTRIBUTE_UNUSED, 8066218822Sdim char *errbuf, size_t errbuf_size) 806789857Sobrien{ 806889857Sobrien const char *msg; 806989857Sobrien size_t msg_size; 807089857Sobrien 807189857Sobrien if (errcode < 0 8072130561Sobrien || errcode >= (int) (sizeof (re_error_msgid) 8073130561Sobrien / sizeof (re_error_msgid[0]))) 807489857Sobrien /* Only error codes returned by the rest of the code should be passed 807589857Sobrien to this routine. If we are given anything else, or if other regex 807689857Sobrien code generates an invalid error code, then the program has a bug. 807789857Sobrien Dump core so we can fix it. */ 807889857Sobrien abort (); 807989857Sobrien 8080130561Sobrien msg = gettext (re_error_msgid[errcode]); 808189857Sobrien 808289857Sobrien msg_size = strlen (msg) + 1; /* Includes the null. */ 808389857Sobrien 808489857Sobrien if (errbuf_size != 0) 808589857Sobrien { 808689857Sobrien if (msg_size > errbuf_size) 808789857Sobrien { 808889857Sobrien#if defined HAVE_MEMPCPY || defined _LIBC 8089130561Sobrien *((char *) mempcpy (errbuf, msg, errbuf_size - 1)) = '\0'; 809089857Sobrien#else 809189857Sobrien memcpy (errbuf, msg, errbuf_size - 1); 809289857Sobrien errbuf[errbuf_size - 1] = 0; 809389857Sobrien#endif 809489857Sobrien } 809589857Sobrien else 809689857Sobrien memcpy (errbuf, msg, msg_size); 809789857Sobrien } 809889857Sobrien 809989857Sobrien return msg_size; 810089857Sobrien} 810189857Sobrien#ifdef _LIBC 810289857Sobrienweak_alias (__regerror, regerror) 810389857Sobrien#endif 810489857Sobrien 810589857Sobrien 810689857Sobrien/* Free dynamically allocated space used by PREG. */ 810789857Sobrien 810889857Sobrienvoid 8109218822Sdimregfree (regex_t *preg) 811089857Sobrien{ 811189857Sobrien if (preg->buffer != NULL) 811289857Sobrien free (preg->buffer); 811389857Sobrien preg->buffer = NULL; 811489857Sobrien 811589857Sobrien preg->allocated = 0; 811689857Sobrien preg->used = 0; 811789857Sobrien 811889857Sobrien if (preg->fastmap != NULL) 811989857Sobrien free (preg->fastmap); 812089857Sobrien preg->fastmap = NULL; 812189857Sobrien preg->fastmap_accurate = 0; 812289857Sobrien 812389857Sobrien if (preg->translate != NULL) 812489857Sobrien free (preg->translate); 812589857Sobrien preg->translate = NULL; 812689857Sobrien} 812789857Sobrien#ifdef _LIBC 812889857Sobrienweak_alias (__regfree, regfree) 812989857Sobrien#endif 813089857Sobrien 813189857Sobrien#endif /* not emacs */ 813289857Sobrien 813389857Sobrien#endif /* not INSIDE_RECURSION */ 813489857Sobrien 813589857Sobrien 813689857Sobrien#undef STORE_NUMBER 813789857Sobrien#undef STORE_NUMBER_AND_INCR 813889857Sobrien#undef EXTRACT_NUMBER 813989857Sobrien#undef EXTRACT_NUMBER_AND_INCR 814089857Sobrien 814189857Sobrien#undef DEBUG_PRINT_COMPILED_PATTERN 814289857Sobrien#undef DEBUG_PRINT_DOUBLE_STRING 814389857Sobrien 814489857Sobrien#undef INIT_FAIL_STACK 814589857Sobrien#undef RESET_FAIL_STACK 814689857Sobrien#undef DOUBLE_FAIL_STACK 814789857Sobrien#undef PUSH_PATTERN_OP 814889857Sobrien#undef PUSH_FAILURE_POINTER 814989857Sobrien#undef PUSH_FAILURE_INT 815089857Sobrien#undef PUSH_FAILURE_ELT 815189857Sobrien#undef POP_FAILURE_POINTER 815289857Sobrien#undef POP_FAILURE_INT 815389857Sobrien#undef POP_FAILURE_ELT 815489857Sobrien#undef DEBUG_PUSH 815589857Sobrien#undef DEBUG_POP 815689857Sobrien#undef PUSH_FAILURE_POINT 815789857Sobrien#undef POP_FAILURE_POINT 815889857Sobrien 815989857Sobrien#undef REG_UNSET_VALUE 816089857Sobrien#undef REG_UNSET 816189857Sobrien 816289857Sobrien#undef PATFETCH 816389857Sobrien#undef PATFETCH_RAW 816489857Sobrien#undef PATUNFETCH 816589857Sobrien#undef TRANSLATE 816689857Sobrien 816789857Sobrien#undef INIT_BUF_SIZE 816889857Sobrien#undef GET_BUFFER_SPACE 816989857Sobrien#undef BUF_PUSH 817089857Sobrien#undef BUF_PUSH_2 817189857Sobrien#undef BUF_PUSH_3 817289857Sobrien#undef STORE_JUMP 817389857Sobrien#undef STORE_JUMP2 817489857Sobrien#undef INSERT_JUMP 817589857Sobrien#undef INSERT_JUMP2 817689857Sobrien#undef EXTEND_BUFFER 817789857Sobrien#undef GET_UNSIGNED_NUMBER 817889857Sobrien#undef FREE_STACK_RETURN 817989857Sobrien 818089857Sobrien# undef POINTER_TO_OFFSET 818189857Sobrien# undef MATCHING_IN_FRST_STRING 818289857Sobrien# undef PREFETCH 818389857Sobrien# undef AT_STRINGS_BEG 818489857Sobrien# undef AT_STRINGS_END 818589857Sobrien# undef WORDCHAR_P 818689857Sobrien# undef FREE_VAR 818789857Sobrien# undef FREE_VARIABLES 818889857Sobrien# undef NO_HIGHEST_ACTIVE_REG 818989857Sobrien# undef NO_LOWEST_ACTIVE_REG 819089857Sobrien 819189857Sobrien# undef CHAR_T 819289857Sobrien# undef UCHAR_T 819389857Sobrien# undef COMPILED_BUFFER_VAR 819489857Sobrien# undef OFFSET_ADDRESS_SIZE 819589857Sobrien# undef CHAR_CLASS_SIZE 819689857Sobrien# undef PREFIX 819789857Sobrien# undef ARG_PREFIX 819889857Sobrien# undef PUT_CHAR 819989857Sobrien# undef BYTE 820089857Sobrien# undef WCHAR 820189857Sobrien 820289857Sobrien# define DEFINED_ONCE 8203