1169695Skan/* Extended regular expression matching and search library, 2169695Skan version 0.12. 3169695Skan (Implements POSIX draft P1003.2/D11.2, except for some of the 4169695Skan internationalization features.) 5169695Skan 6169695Skan Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 7169695Skan 2002, 2005 Free Software Foundation, Inc. 8169695Skan This file is part of the GNU C Library. 9169695Skan 10169695Skan The GNU C Library is free software; you can redistribute it and/or 11169695Skan modify it under the terms of the GNU Lesser General Public 12169695Skan License as published by the Free Software Foundation; either 13169695Skan version 2.1 of the License, or (at your option) any later version. 14169695Skan 15169695Skan The GNU C Library is distributed in the hope that it will be useful, 16169695Skan but WITHOUT ANY WARRANTY; without even the implied warranty of 17169695Skan MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 18169695Skan Lesser General Public License for more details. 19169695Skan 20169695Skan You should have received a copy of the GNU Lesser General Public 21169695Skan License along with the GNU C Library; if not, write to the Free 22169695Skan Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 23169695Skan 02110-1301 USA. */ 24169695Skan 25169695Skan/* This file has been modified for usage in libiberty. It includes "xregex.h" 26169695Skan instead of <regex.h>. The "xregex.h" header file renames all external 27169695Skan routines with an "x" prefix so they do not collide with the native regex 28169695Skan routines or with other components regex routines. */ 29169695Skan/* AIX requires this to be the first thing in the file. */ 30169695Skan#if defined _AIX && !defined __GNUC__ && !defined REGEX_MALLOC 31169695Skan #pragma alloca 32169695Skan#endif 33169695Skan 34169695Skan#undef _GNU_SOURCE 35169695Skan#define _GNU_SOURCE 36169695Skan 37169695Skan#ifndef INSIDE_RECURSION 38169695Skan# ifdef HAVE_CONFIG_H 39169695Skan# include <config.h> 40169695Skan# endif 41169695Skan#endif 42169695Skan 43169695Skan#include <ansidecl.h> 44169695Skan 45169695Skan#ifndef INSIDE_RECURSION 46169695Skan 47169695Skan# if defined STDC_HEADERS && !defined emacs 48169695Skan# include <stddef.h> 49169695Skan# else 50169695Skan/* We need this for `regex.h', and perhaps for the Emacs include files. */ 51169695Skan# include <sys/types.h> 52169695Skan# endif 53169695Skan 54169695Skan# define WIDE_CHAR_SUPPORT (HAVE_WCTYPE_H && HAVE_WCHAR_H && HAVE_BTOWC) 55169695Skan 56169695Skan/* For platform which support the ISO C amendement 1 functionality we 57169695Skan support user defined character classes. */ 58169695Skan# if defined _LIBC || WIDE_CHAR_SUPPORT 59169695Skan/* Solaris 2.5 has a bug: <wchar.h> must be included before <wctype.h>. */ 60169695Skan# include <wchar.h> 61169695Skan# include <wctype.h> 62169695Skan# endif 63169695Skan 64169695Skan# ifdef _LIBC 65169695Skan/* We have to keep the namespace clean. */ 66169695Skan# define regfree(preg) __regfree (preg) 67169695Skan# define regexec(pr, st, nm, pm, ef) __regexec (pr, st, nm, pm, ef) 68169695Skan# define regcomp(preg, pattern, cflags) __regcomp (preg, pattern, cflags) 69169695Skan# define regerror(errcode, preg, errbuf, errbuf_size) \ 70169695Skan __regerror(errcode, preg, errbuf, errbuf_size) 71169695Skan# define re_set_registers(bu, re, nu, st, en) \ 72169695Skan __re_set_registers (bu, re, nu, st, en) 73169695Skan# define re_match_2(bufp, string1, size1, string2, size2, pos, regs, stop) \ 74169695Skan __re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop) 75169695Skan# define re_match(bufp, string, size, pos, regs) \ 76169695Skan __re_match (bufp, string, size, pos, regs) 77169695Skan# define re_search(bufp, string, size, startpos, range, regs) \ 78169695Skan __re_search (bufp, string, size, startpos, range, regs) 79169695Skan# define re_compile_pattern(pattern, length, bufp) \ 80169695Skan __re_compile_pattern (pattern, length, bufp) 81169695Skan# define re_set_syntax(syntax) __re_set_syntax (syntax) 82169695Skan# define re_search_2(bufp, st1, s1, st2, s2, startpos, range, regs, stop) \ 83169695Skan __re_search_2 (bufp, st1, s1, st2, s2, startpos, range, regs, stop) 84169695Skan# define re_compile_fastmap(bufp) __re_compile_fastmap (bufp) 85169695Skan 86169695Skan# define btowc __btowc 87169695Skan 88169695Skan/* We are also using some library internals. */ 89169695Skan# include <locale/localeinfo.h> 90169695Skan# include <locale/elem-hash.h> 91169695Skan# include <langinfo.h> 92169695Skan# include <locale/coll-lookup.h> 93169695Skan# endif 94169695Skan 95169695Skan/* This is for other GNU distributions with internationalized messages. */ 96169695Skan# if (HAVE_LIBINTL_H && ENABLE_NLS) || defined _LIBC 97169695Skan# include <libintl.h> 98169695Skan# ifdef _LIBC 99169695Skan# undef gettext 100169695Skan# define gettext(msgid) __dcgettext ("libc", msgid, LC_MESSAGES) 101169695Skan# endif 102169695Skan# else 103169695Skan# define gettext(msgid) (msgid) 104169695Skan# endif 105169695Skan 106169695Skan# ifndef gettext_noop 107169695Skan/* This define is so xgettext can find the internationalizable 108169695Skan strings. */ 109169695Skan# define gettext_noop(String) String 110169695Skan# endif 111169695Skan 112169695Skan/* The `emacs' switch turns on certain matching commands 113169695Skan that make sense only in Emacs. */ 114169695Skan# ifdef emacs 115169695Skan 116169695Skan# include "lisp.h" 117169695Skan# include "buffer.h" 118169695Skan# include "syntax.h" 119169695Skan 120169695Skan# else /* not emacs */ 121169695Skan 122169695Skan/* If we are not linking with Emacs proper, 123169695Skan we can't use the relocating allocator 124169695Skan even if config.h says that we can. */ 125169695Skan# undef REL_ALLOC 126169695Skan 127169695Skan# if defined STDC_HEADERS || defined _LIBC 128169695Skan# include <stdlib.h> 129169695Skan# else 130169695Skanchar *malloc (); 131169695Skanchar *realloc (); 132169695Skan# endif 133169695Skan 134169695Skan/* When used in Emacs's lib-src, we need to get bzero and bcopy somehow. 135169695Skan If nothing else has been done, use the method below. */ 136169695Skan# ifdef INHIBIT_STRING_HEADER 137169695Skan# if !(defined HAVE_BZERO && defined HAVE_BCOPY) 138169695Skan# if !defined bzero && !defined bcopy 139169695Skan# undef INHIBIT_STRING_HEADER 140169695Skan# endif 141169695Skan# endif 142169695Skan# endif 143169695Skan 144169695Skan/* This is the normal way of making sure we have a bcopy and a bzero. 145169695Skan This is used in most programs--a few other programs avoid this 146169695Skan by defining INHIBIT_STRING_HEADER. */ 147169695Skan# ifndef INHIBIT_STRING_HEADER 148169695Skan# if defined HAVE_STRING_H || defined STDC_HEADERS || defined _LIBC 149169695Skan# include <string.h> 150169695Skan# ifndef bzero 151169695Skan# ifndef _LIBC 152222203Sbenl# define bzero(s, n) ((void) (memset (s, '\0', n), (s))) 153169695Skan# else 154169695Skan# define bzero(s, n) __bzero (s, n) 155169695Skan# endif 156169695Skan# endif 157169695Skan# else 158169695Skan# include <strings.h> 159169695Skan# ifndef memcmp 160169695Skan# define memcmp(s1, s2, n) bcmp (s1, s2, n) 161169695Skan# endif 162169695Skan# ifndef memcpy 163169695Skan# define memcpy(d, s, n) (bcopy (s, d, n), (d)) 164169695Skan# endif 165169695Skan# endif 166169695Skan# endif 167169695Skan 168169695Skan/* Define the syntax stuff for \<, \>, etc. */ 169169695Skan 170169695Skan/* This must be nonzero for the wordchar and notwordchar pattern 171169695Skan commands in re_match_2. */ 172169695Skan# ifndef Sword 173169695Skan# define Sword 1 174169695Skan# endif 175169695Skan 176169695Skan# ifdef SWITCH_ENUM_BUG 177169695Skan# define SWITCH_ENUM_CAST(x) ((int)(x)) 178169695Skan# else 179169695Skan# define SWITCH_ENUM_CAST(x) (x) 180169695Skan# endif 181169695Skan 182169695Skan# endif /* not emacs */ 183169695Skan 184169695Skan# if defined _LIBC || HAVE_LIMITS_H 185169695Skan# include <limits.h> 186169695Skan# endif 187169695Skan 188169695Skan# ifndef MB_LEN_MAX 189169695Skan# define MB_LEN_MAX 1 190169695Skan# endif 191169695Skan 192169695Skan/* Get the interface, including the syntax bits. */ 193169695Skan# include "xregex.h" /* change for libiberty */ 194169695Skan 195169695Skan/* isalpha etc. are used for the character classes. */ 196169695Skan# include <ctype.h> 197169695Skan 198169695Skan/* Jim Meyering writes: 199169695Skan 200169695Skan "... Some ctype macros are valid only for character codes that 201169695Skan isascii says are ASCII (SGI's IRIX-4.0.5 is one such system --when 202169695Skan using /bin/cc or gcc but without giving an ansi option). So, all 203169695Skan ctype uses should be through macros like ISPRINT... If 204169695Skan STDC_HEADERS is defined, then autoconf has verified that the ctype 205169695Skan macros don't need to be guarded with references to isascii. ... 206169695Skan Defining isascii to 1 should let any compiler worth its salt 207169695Skan eliminate the && through constant folding." 208169695Skan Solaris defines some of these symbols so we must undefine them first. */ 209169695Skan 210169695Skan# undef ISASCII 211169695Skan# if defined STDC_HEADERS || (!defined isascii && !defined HAVE_ISASCII) 212169695Skan# define ISASCII(c) 1 213169695Skan# else 214169695Skan# define ISASCII(c) isascii(c) 215169695Skan# endif 216169695Skan 217169695Skan# ifdef isblank 218169695Skan# define ISBLANK(c) (ISASCII (c) && isblank (c)) 219169695Skan# else 220169695Skan# define ISBLANK(c) ((c) == ' ' || (c) == '\t') 221169695Skan# endif 222169695Skan# ifdef isgraph 223169695Skan# define ISGRAPH(c) (ISASCII (c) && isgraph (c)) 224169695Skan# else 225169695Skan# define ISGRAPH(c) (ISASCII (c) && isprint (c) && !isspace (c)) 226169695Skan# endif 227169695Skan 228169695Skan# undef ISPRINT 229169695Skan# define ISPRINT(c) (ISASCII (c) && isprint (c)) 230169695Skan# define ISDIGIT(c) (ISASCII (c) && isdigit (c)) 231169695Skan# define ISALNUM(c) (ISASCII (c) && isalnum (c)) 232169695Skan# define ISALPHA(c) (ISASCII (c) && isalpha (c)) 233169695Skan# define ISCNTRL(c) (ISASCII (c) && iscntrl (c)) 234169695Skan# define ISLOWER(c) (ISASCII (c) && islower (c)) 235169695Skan# define ISPUNCT(c) (ISASCII (c) && ispunct (c)) 236169695Skan# define ISSPACE(c) (ISASCII (c) && isspace (c)) 237169695Skan# define ISUPPER(c) (ISASCII (c) && isupper (c)) 238169695Skan# define ISXDIGIT(c) (ISASCII (c) && isxdigit (c)) 239169695Skan 240169695Skan# ifdef _tolower 241169695Skan# define TOLOWER(c) _tolower(c) 242169695Skan# else 243169695Skan# define TOLOWER(c) tolower(c) 244169695Skan# endif 245169695Skan 246169695Skan# ifndef NULL 247169695Skan# define NULL (void *)0 248169695Skan# endif 249169695Skan 250169695Skan/* We remove any previous definition of `SIGN_EXTEND_CHAR', 251169695Skan since ours (we hope) works properly with all combinations of 252169695Skan machines, compilers, `char' and `unsigned char' argument types. 253169695Skan (Per Bothner suggested the basic approach.) */ 254169695Skan# undef SIGN_EXTEND_CHAR 255169695Skan# if __STDC__ 256169695Skan# define SIGN_EXTEND_CHAR(c) ((signed char) (c)) 257169695Skan# else /* not __STDC__ */ 258169695Skan/* As in Harbison and Steele. */ 259169695Skan# define SIGN_EXTEND_CHAR(c) ((((unsigned char) (c)) ^ 128) - 128) 260169695Skan# endif 261169695Skan 262169695Skan# ifndef emacs 263169695Skan/* How many characters in the character set. */ 264169695Skan# define CHAR_SET_SIZE 256 265169695Skan 266169695Skan# ifdef SYNTAX_TABLE 267169695Skan 268169695Skanextern char *re_syntax_table; 269169695Skan 270169695Skan# else /* not SYNTAX_TABLE */ 271169695Skan 272169695Skanstatic char re_syntax_table[CHAR_SET_SIZE]; 273169695Skan 274169695Skanstatic void init_syntax_once (void); 275169695Skan 276169695Skanstatic void 277169695Skaninit_syntax_once (void) 278169695Skan{ 279169695Skan register int c; 280169695Skan static int done = 0; 281169695Skan 282169695Skan if (done) 283169695Skan return; 284169695Skan bzero (re_syntax_table, sizeof re_syntax_table); 285169695Skan 286169695Skan for (c = 0; c < CHAR_SET_SIZE; ++c) 287169695Skan if (ISALNUM (c)) 288169695Skan re_syntax_table[c] = Sword; 289169695Skan 290169695Skan re_syntax_table['_'] = Sword; 291169695Skan 292169695Skan done = 1; 293169695Skan} 294169695Skan 295169695Skan# endif /* not SYNTAX_TABLE */ 296169695Skan 297169695Skan# define SYNTAX(c) re_syntax_table[(unsigned char) (c)] 298169695Skan 299169695Skan# endif /* emacs */ 300169695Skan 301169695Skan/* Integer type for pointers. */ 302169695Skan# if !defined _LIBC && !defined HAVE_UINTPTR_T 303169695Skantypedef unsigned long int uintptr_t; 304169695Skan# endif 305169695Skan 306169695Skan/* Should we use malloc or alloca? If REGEX_MALLOC is not defined, we 307169695Skan use `alloca' instead of `malloc'. This is because using malloc in 308169695Skan re_search* or re_match* could cause memory leaks when C-g is used in 309169695Skan Emacs; also, malloc is slower and causes storage fragmentation. On 310169695Skan the other hand, malloc is more portable, and easier to debug. 311169695Skan 312169695Skan Because we sometimes use alloca, some routines have to be macros, 313169695Skan not functions -- `alloca'-allocated space disappears at the end of the 314169695Skan function it is called in. */ 315169695Skan 316169695Skan# ifdef REGEX_MALLOC 317169695Skan 318169695Skan# define REGEX_ALLOCATE malloc 319169695Skan# define REGEX_REALLOCATE(source, osize, nsize) realloc (source, nsize) 320169695Skan# define REGEX_FREE free 321169695Skan 322169695Skan# else /* not REGEX_MALLOC */ 323169695Skan 324169695Skan/* Emacs already defines alloca, sometimes. */ 325169695Skan# ifndef alloca 326169695Skan 327169695Skan/* Make alloca work the best possible way. */ 328169695Skan# ifdef __GNUC__ 329169695Skan# define alloca __builtin_alloca 330169695Skan# else /* not __GNUC__ */ 331169695Skan# if HAVE_ALLOCA_H 332169695Skan# include <alloca.h> 333169695Skan# endif /* HAVE_ALLOCA_H */ 334169695Skan# endif /* not __GNUC__ */ 335169695Skan 336169695Skan# endif /* not alloca */ 337169695Skan 338169695Skan# define REGEX_ALLOCATE alloca 339169695Skan 340169695Skan/* Assumes a `char *destination' variable. */ 341169695Skan# define REGEX_REALLOCATE(source, osize, nsize) \ 342169695Skan (destination = (char *) alloca (nsize), \ 343169695Skan memcpy (destination, source, osize)) 344169695Skan 345169695Skan/* No need to do anything to free, after alloca. */ 346169695Skan# define REGEX_FREE(arg) ((void)0) /* Do nothing! But inhibit gcc warning. */ 347169695Skan 348169695Skan# endif /* not REGEX_MALLOC */ 349169695Skan 350169695Skan/* Define how to allocate the failure stack. */ 351169695Skan 352169695Skan# if defined REL_ALLOC && defined REGEX_MALLOC 353169695Skan 354169695Skan# define REGEX_ALLOCATE_STACK(size) \ 355169695Skan r_alloc (&failure_stack_ptr, (size)) 356169695Skan# define REGEX_REALLOCATE_STACK(source, osize, nsize) \ 357169695Skan r_re_alloc (&failure_stack_ptr, (nsize)) 358169695Skan# define REGEX_FREE_STACK(ptr) \ 359169695Skan r_alloc_free (&failure_stack_ptr) 360169695Skan 361169695Skan# else /* not using relocating allocator */ 362169695Skan 363169695Skan# ifdef REGEX_MALLOC 364169695Skan 365169695Skan# define REGEX_ALLOCATE_STACK malloc 366169695Skan# define REGEX_REALLOCATE_STACK(source, osize, nsize) realloc (source, nsize) 367169695Skan# define REGEX_FREE_STACK free 368169695Skan 369169695Skan# else /* not REGEX_MALLOC */ 370169695Skan 371169695Skan# define REGEX_ALLOCATE_STACK alloca 372169695Skan 373169695Skan# define REGEX_REALLOCATE_STACK(source, osize, nsize) \ 374169695Skan REGEX_REALLOCATE (source, osize, nsize) 375169695Skan/* No need to explicitly free anything. */ 376169695Skan# define REGEX_FREE_STACK(arg) 377169695Skan 378169695Skan# endif /* not REGEX_MALLOC */ 379169695Skan# endif /* not using relocating allocator */ 380169695Skan 381169695Skan 382169695Skan/* True if `size1' is non-NULL and PTR is pointing anywhere inside 383169695Skan `string1' or just past its end. This works if PTR is NULL, which is 384169695Skan a good thing. */ 385169695Skan# define FIRST_STRING_P(ptr) \ 386169695Skan (size1 && string1 <= (ptr) && (ptr) <= string1 + size1) 387169695Skan 388169695Skan/* (Re)Allocate N items of type T using malloc, or fail. */ 389169695Skan# define TALLOC(n, t) ((t *) malloc ((n) * sizeof (t))) 390169695Skan# define RETALLOC(addr, n, t) ((addr) = (t *) realloc (addr, (n) * sizeof (t))) 391169695Skan# define RETALLOC_IF(addr, n, t) \ 392169695Skan if (addr) RETALLOC((addr), (n), t); else (addr) = TALLOC ((n), t) 393169695Skan# define REGEX_TALLOC(n, t) ((t *) REGEX_ALLOCATE ((n) * sizeof (t))) 394169695Skan 395169695Skan# define BYTEWIDTH 8 /* In bits. */ 396169695Skan 397169695Skan# define STREQ(s1, s2) ((strcmp (s1, s2) == 0)) 398169695Skan 399169695Skan# undef MAX 400169695Skan# undef MIN 401169695Skan# define MAX(a, b) ((a) > (b) ? (a) : (b)) 402169695Skan# define MIN(a, b) ((a) < (b) ? (a) : (b)) 403169695Skan 404169695Skantypedef char boolean; 405169695Skan# define false 0 406169695Skan# define true 1 407169695Skan 408169695Skanstatic reg_errcode_t byte_regex_compile (const char *pattern, size_t size, 409169695Skan reg_syntax_t syntax, 410169695Skan struct re_pattern_buffer *bufp); 411169695Skan 412169695Skanstatic int byte_re_match_2_internal (struct re_pattern_buffer *bufp, 413169695Skan const char *string1, int size1, 414169695Skan const char *string2, int size2, 415169695Skan int pos, 416169695Skan struct re_registers *regs, 417169695Skan int stop); 418169695Skanstatic int byte_re_search_2 (struct re_pattern_buffer *bufp, 419169695Skan const char *string1, int size1, 420169695Skan const char *string2, int size2, 421169695Skan int startpos, int range, 422169695Skan struct re_registers *regs, int stop); 423169695Skanstatic int byte_re_compile_fastmap (struct re_pattern_buffer *bufp); 424169695Skan 425169695Skan#ifdef MBS_SUPPORT 426169695Skanstatic reg_errcode_t wcs_regex_compile (const char *pattern, size_t size, 427169695Skan reg_syntax_t syntax, 428169695Skan struct re_pattern_buffer *bufp); 429169695Skan 430169695Skan 431169695Skanstatic int wcs_re_match_2_internal (struct re_pattern_buffer *bufp, 432169695Skan const char *cstring1, int csize1, 433169695Skan const char *cstring2, int csize2, 434169695Skan int pos, 435169695Skan struct re_registers *regs, 436169695Skan int stop, 437169695Skan wchar_t *string1, int size1, 438169695Skan wchar_t *string2, int size2, 439169695Skan int *mbs_offset1, int *mbs_offset2); 440169695Skanstatic int wcs_re_search_2 (struct re_pattern_buffer *bufp, 441169695Skan const char *string1, int size1, 442169695Skan const char *string2, int size2, 443169695Skan int startpos, int range, 444169695Skan struct re_registers *regs, int stop); 445169695Skanstatic int wcs_re_compile_fastmap (struct re_pattern_buffer *bufp); 446169695Skan#endif 447169695Skan 448169695Skan/* These are the command codes that appear in compiled regular 449169695Skan expressions. Some opcodes are followed by argument bytes. A 450169695Skan command code can specify any interpretation whatsoever for its 451169695Skan arguments. Zero bytes may appear in the compiled regular expression. */ 452169695Skan 453169695Skantypedef enum 454169695Skan{ 455169695Skan no_op = 0, 456169695Skan 457169695Skan /* Succeed right away--no more backtracking. */ 458169695Skan succeed, 459169695Skan 460169695Skan /* Followed by one byte giving n, then by n literal bytes. */ 461169695Skan exactn, 462169695Skan 463169695Skan# ifdef MBS_SUPPORT 464169695Skan /* Same as exactn, but contains binary data. */ 465169695Skan exactn_bin, 466169695Skan# endif 467169695Skan 468169695Skan /* Matches any (more or less) character. */ 469169695Skan anychar, 470169695Skan 471169695Skan /* Matches any one char belonging to specified set. First 472169695Skan following byte is number of bitmap bytes. Then come bytes 473169695Skan for a bitmap saying which chars are in. Bits in each byte 474169695Skan are ordered low-bit-first. A character is in the set if its 475169695Skan bit is 1. A character too large to have a bit in the map is 476169695Skan automatically not in the set. */ 477169695Skan /* ifdef MBS_SUPPORT, following element is length of character 478169695Skan classes, length of collating symbols, length of equivalence 479169695Skan classes, length of character ranges, and length of characters. 480169695Skan Next, character class element, collating symbols elements, 481169695Skan equivalence class elements, range elements, and character 482169695Skan elements follow. 483169695Skan See regex_compile function. */ 484169695Skan charset, 485169695Skan 486169695Skan /* Same parameters as charset, but match any character that is 487169695Skan not one of those specified. */ 488169695Skan charset_not, 489169695Skan 490169695Skan /* Start remembering the text that is matched, for storing in a 491169695Skan register. Followed by one byte with the register number, in 492169695Skan the range 0 to one less than the pattern buffer's re_nsub 493169695Skan field. Then followed by one byte with the number of groups 494169695Skan inner to this one. (This last has to be part of the 495169695Skan start_memory only because we need it in the on_failure_jump 496169695Skan of re_match_2.) */ 497169695Skan start_memory, 498169695Skan 499169695Skan /* Stop remembering the text that is matched and store it in a 500169695Skan memory register. Followed by one byte with the register 501169695Skan number, in the range 0 to one less than `re_nsub' in the 502169695Skan pattern buffer, and one byte with the number of inner groups, 503169695Skan just like `start_memory'. (We need the number of inner 504169695Skan groups here because we don't have any easy way of finding the 505169695Skan corresponding start_memory when we're at a stop_memory.) */ 506169695Skan stop_memory, 507169695Skan 508169695Skan /* Match a duplicate of something remembered. Followed by one 509169695Skan byte containing the register number. */ 510169695Skan duplicate, 511169695Skan 512169695Skan /* Fail unless at beginning of line. */ 513169695Skan begline, 514169695Skan 515169695Skan /* Fail unless at end of line. */ 516169695Skan endline, 517169695Skan 518169695Skan /* Succeeds if at beginning of buffer (if emacs) or at beginning 519169695Skan of string to be matched (if not). */ 520169695Skan begbuf, 521169695Skan 522169695Skan /* Analogously, for end of buffer/string. */ 523169695Skan endbuf, 524169695Skan 525169695Skan /* Followed by two byte relative address to which to jump. */ 526169695Skan jump, 527169695Skan 528169695Skan /* Same as jump, but marks the end of an alternative. */ 529169695Skan jump_past_alt, 530169695Skan 531169695Skan /* Followed by two-byte relative address of place to resume at 532169695Skan in case of failure. */ 533169695Skan /* ifdef MBS_SUPPORT, the size of address is 1. */ 534169695Skan on_failure_jump, 535169695Skan 536169695Skan /* Like on_failure_jump, but pushes a placeholder instead of the 537169695Skan current string position when executed. */ 538169695Skan on_failure_keep_string_jump, 539169695Skan 540169695Skan /* Throw away latest failure point and then jump to following 541169695Skan two-byte relative address. */ 542169695Skan /* ifdef MBS_SUPPORT, the size of address is 1. */ 543169695Skan pop_failure_jump, 544169695Skan 545169695Skan /* Change to pop_failure_jump if know won't have to backtrack to 546169695Skan match; otherwise change to jump. This is used to jump 547169695Skan back to the beginning of a repeat. If what follows this jump 548169695Skan clearly won't match what the repeat does, such that we can be 549169695Skan sure that there is no use backtracking out of repetitions 550169695Skan already matched, then we change it to a pop_failure_jump. 551169695Skan Followed by two-byte address. */ 552169695Skan /* ifdef MBS_SUPPORT, the size of address is 1. */ 553169695Skan maybe_pop_jump, 554169695Skan 555169695Skan /* Jump to following two-byte address, and push a dummy failure 556169695Skan point. This failure point will be thrown away if an attempt 557169695Skan is made to use it for a failure. A `+' construct makes this 558169695Skan before the first repeat. Also used as an intermediary kind 559169695Skan of jump when compiling an alternative. */ 560169695Skan /* ifdef MBS_SUPPORT, the size of address is 1. */ 561169695Skan dummy_failure_jump, 562169695Skan 563169695Skan /* Push a dummy failure point and continue. Used at the end of 564169695Skan alternatives. */ 565169695Skan push_dummy_failure, 566169695Skan 567169695Skan /* Followed by two-byte relative address and two-byte number n. 568169695Skan After matching N times, jump to the address upon failure. */ 569169695Skan /* ifdef MBS_SUPPORT, the size of address is 1. */ 570169695Skan succeed_n, 571169695Skan 572169695Skan /* Followed by two-byte relative address, and two-byte number n. 573169695Skan Jump to the address N times, then fail. */ 574169695Skan /* ifdef MBS_SUPPORT, the size of address is 1. */ 575169695Skan jump_n, 576169695Skan 577169695Skan /* Set the following two-byte relative address to the 578169695Skan subsequent two-byte number. The address *includes* the two 579169695Skan bytes of number. */ 580169695Skan /* ifdef MBS_SUPPORT, the size of address is 1. */ 581169695Skan set_number_at, 582169695Skan 583169695Skan wordchar, /* Matches any word-constituent character. */ 584169695Skan notwordchar, /* Matches any char that is not a word-constituent. */ 585169695Skan 586169695Skan wordbeg, /* Succeeds if at word beginning. */ 587169695Skan wordend, /* Succeeds if at word end. */ 588169695Skan 589169695Skan wordbound, /* Succeeds if at a word boundary. */ 590169695Skan notwordbound /* Succeeds if not at a word boundary. */ 591169695Skan 592169695Skan# ifdef emacs 593169695Skan ,before_dot, /* Succeeds if before point. */ 594169695Skan at_dot, /* Succeeds if at point. */ 595169695Skan after_dot, /* Succeeds if after point. */ 596169695Skan 597169695Skan /* Matches any character whose syntax is specified. Followed by 598169695Skan a byte which contains a syntax code, e.g., Sword. */ 599169695Skan syntaxspec, 600169695Skan 601169695Skan /* Matches any character whose syntax is not that specified. */ 602169695Skan notsyntaxspec 603169695Skan# endif /* emacs */ 604169695Skan} re_opcode_t; 605169695Skan#endif /* not INSIDE_RECURSION */ 606169695Skan 607169695Skan 608169695Skan#ifdef BYTE 609169695Skan# define CHAR_T char 610169695Skan# define UCHAR_T unsigned char 611169695Skan# define COMPILED_BUFFER_VAR bufp->buffer 612169695Skan# define OFFSET_ADDRESS_SIZE 2 613169695Skan# define PREFIX(name) byte_##name 614169695Skan# define ARG_PREFIX(name) name 615169695Skan# define PUT_CHAR(c) putchar (c) 616169695Skan#else 617169695Skan# ifdef WCHAR 618169695Skan# define CHAR_T wchar_t 619169695Skan# define UCHAR_T wchar_t 620169695Skan# define COMPILED_BUFFER_VAR wc_buffer 621169695Skan# define OFFSET_ADDRESS_SIZE 1 /* the size which STORE_NUMBER macro use */ 622169695Skan# define CHAR_CLASS_SIZE ((__alignof__(wctype_t)+sizeof(wctype_t))/sizeof(CHAR_T)+1) 623169695Skan# define PREFIX(name) wcs_##name 624169695Skan# define ARG_PREFIX(name) c##name 625169695Skan/* Should we use wide stream?? */ 626169695Skan# define PUT_CHAR(c) printf ("%C", c); 627169695Skan# define TRUE 1 628169695Skan# define FALSE 0 629169695Skan# else 630169695Skan# ifdef MBS_SUPPORT 631169695Skan# define WCHAR 632169695Skan# define INSIDE_RECURSION 633169695Skan# include "regex.c" 634169695Skan# undef INSIDE_RECURSION 635169695Skan# endif 636169695Skan# define BYTE 637169695Skan# define INSIDE_RECURSION 638169695Skan# include "regex.c" 639169695Skan# undef INSIDE_RECURSION 640169695Skan# endif 641169695Skan#endif 642169695Skan 643169695Skan#ifdef INSIDE_RECURSION 644169695Skan/* Common operations on the compiled pattern. */ 645169695Skan 646169695Skan/* Store NUMBER in two contiguous bytes starting at DESTINATION. */ 647169695Skan/* ifdef MBS_SUPPORT, we store NUMBER in 1 element. */ 648169695Skan 649169695Skan# ifdef WCHAR 650169695Skan# define STORE_NUMBER(destination, number) \ 651169695Skan do { \ 652169695Skan *(destination) = (UCHAR_T)(number); \ 653169695Skan } while (0) 654169695Skan# else /* BYTE */ 655169695Skan# define STORE_NUMBER(destination, number) \ 656169695Skan do { \ 657169695Skan (destination)[0] = (number) & 0377; \ 658169695Skan (destination)[1] = (number) >> 8; \ 659169695Skan } while (0) 660169695Skan# endif /* WCHAR */ 661169695Skan 662169695Skan/* Same as STORE_NUMBER, except increment DESTINATION to 663169695Skan the byte after where the number is stored. Therefore, DESTINATION 664169695Skan must be an lvalue. */ 665169695Skan/* ifdef MBS_SUPPORT, we store NUMBER in 1 element. */ 666169695Skan 667169695Skan# define STORE_NUMBER_AND_INCR(destination, number) \ 668169695Skan do { \ 669169695Skan STORE_NUMBER (destination, number); \ 670169695Skan (destination) += OFFSET_ADDRESS_SIZE; \ 671169695Skan } while (0) 672169695Skan 673169695Skan/* Put into DESTINATION a number stored in two contiguous bytes starting 674169695Skan at SOURCE. */ 675169695Skan/* ifdef MBS_SUPPORT, we store NUMBER in 1 element. */ 676169695Skan 677169695Skan# ifdef WCHAR 678169695Skan# define EXTRACT_NUMBER(destination, source) \ 679169695Skan do { \ 680169695Skan (destination) = *(source); \ 681169695Skan } while (0) 682169695Skan# else /* BYTE */ 683169695Skan# define EXTRACT_NUMBER(destination, source) \ 684169695Skan do { \ 685169695Skan (destination) = *(source) & 0377; \ 686169695Skan (destination) += SIGN_EXTEND_CHAR (*((source) + 1)) << 8; \ 687169695Skan } while (0) 688169695Skan# endif 689169695Skan 690169695Skan# ifdef DEBUG 691169695Skanstatic void PREFIX(extract_number) (int *dest, UCHAR_T *source); 692169695Skanstatic void 693169695SkanPREFIX(extract_number) (int *dest, UCHAR_T *source) 694169695Skan{ 695169695Skan# ifdef WCHAR 696169695Skan *dest = *source; 697169695Skan# else /* BYTE */ 698169695Skan int temp = SIGN_EXTEND_CHAR (*(source + 1)); 699169695Skan *dest = *source & 0377; 700169695Skan *dest += temp << 8; 701169695Skan# endif 702169695Skan} 703169695Skan 704169695Skan# ifndef EXTRACT_MACROS /* To debug the macros. */ 705169695Skan# undef EXTRACT_NUMBER 706169695Skan# define EXTRACT_NUMBER(dest, src) PREFIX(extract_number) (&dest, src) 707169695Skan# endif /* not EXTRACT_MACROS */ 708169695Skan 709169695Skan# endif /* DEBUG */ 710169695Skan 711169695Skan/* Same as EXTRACT_NUMBER, except increment SOURCE to after the number. 712169695Skan SOURCE must be an lvalue. */ 713169695Skan 714169695Skan# define EXTRACT_NUMBER_AND_INCR(destination, source) \ 715169695Skan do { \ 716169695Skan EXTRACT_NUMBER (destination, source); \ 717169695Skan (source) += OFFSET_ADDRESS_SIZE; \ 718169695Skan } while (0) 719169695Skan 720169695Skan# ifdef DEBUG 721169695Skanstatic void PREFIX(extract_number_and_incr) (int *destination, 722169695Skan UCHAR_T **source); 723169695Skanstatic void 724169695SkanPREFIX(extract_number_and_incr) (int *destination, UCHAR_T **source) 725169695Skan{ 726169695Skan PREFIX(extract_number) (destination, *source); 727169695Skan *source += OFFSET_ADDRESS_SIZE; 728169695Skan} 729169695Skan 730169695Skan# ifndef EXTRACT_MACROS 731169695Skan# undef EXTRACT_NUMBER_AND_INCR 732169695Skan# define EXTRACT_NUMBER_AND_INCR(dest, src) \ 733169695Skan PREFIX(extract_number_and_incr) (&dest, &src) 734169695Skan# endif /* not EXTRACT_MACROS */ 735169695Skan 736169695Skan# endif /* DEBUG */ 737169695Skan 738169695Skan 739169695Skan 740169695Skan/* If DEBUG is defined, Regex prints many voluminous messages about what 741169695Skan it is doing (if the variable `debug' is nonzero). If linked with the 742169695Skan main program in `iregex.c', you can enter patterns and strings 743169695Skan interactively. And if linked with the main program in `main.c' and 744169695Skan the other test files, you can run the already-written tests. */ 745169695Skan 746169695Skan# ifdef DEBUG 747169695Skan 748169695Skan# ifndef DEFINED_ONCE 749169695Skan 750169695Skan/* We use standard I/O for debugging. */ 751169695Skan# include <stdio.h> 752169695Skan 753169695Skan/* It is useful to test things that ``must'' be true when debugging. */ 754169695Skan# include <assert.h> 755169695Skan 756169695Skanstatic int debug; 757169695Skan 758169695Skan# define DEBUG_STATEMENT(e) e 759169695Skan# define DEBUG_PRINT1(x) if (debug) printf (x) 760169695Skan# define DEBUG_PRINT2(x1, x2) if (debug) printf (x1, x2) 761169695Skan# define DEBUG_PRINT3(x1, x2, x3) if (debug) printf (x1, x2, x3) 762169695Skan# define DEBUG_PRINT4(x1, x2, x3, x4) if (debug) printf (x1, x2, x3, x4) 763169695Skan# endif /* not DEFINED_ONCE */ 764169695Skan 765169695Skan# define DEBUG_PRINT_COMPILED_PATTERN(p, s, e) \ 766169695Skan if (debug) PREFIX(print_partial_compiled_pattern) (s, e) 767169695Skan# define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2) \ 768169695Skan if (debug) PREFIX(print_double_string) (w, s1, sz1, s2, sz2) 769169695Skan 770169695Skan 771169695Skan/* Print the fastmap in human-readable form. */ 772169695Skan 773169695Skan# ifndef DEFINED_ONCE 774169695Skanvoid 775169695Skanprint_fastmap (char *fastmap) 776169695Skan{ 777169695Skan unsigned was_a_range = 0; 778169695Skan unsigned i = 0; 779169695Skan 780169695Skan while (i < (1 << BYTEWIDTH)) 781169695Skan { 782169695Skan if (fastmap[i++]) 783169695Skan { 784169695Skan was_a_range = 0; 785169695Skan putchar (i - 1); 786169695Skan while (i < (1 << BYTEWIDTH) && fastmap[i]) 787169695Skan { 788169695Skan was_a_range = 1; 789169695Skan i++; 790169695Skan } 791169695Skan if (was_a_range) 792169695Skan { 793169695Skan printf ("-"); 794169695Skan putchar (i - 1); 795169695Skan } 796169695Skan } 797169695Skan } 798169695Skan putchar ('\n'); 799169695Skan} 800169695Skan# endif /* not DEFINED_ONCE */ 801169695Skan 802169695Skan 803169695Skan/* Print a compiled pattern string in human-readable form, starting at 804169695Skan the START pointer into it and ending just before the pointer END. */ 805169695Skan 806169695Skanvoid 807169695SkanPREFIX(print_partial_compiled_pattern) (UCHAR_T *start, UCHAR_T *end) 808169695Skan{ 809169695Skan int mcnt, mcnt2; 810169695Skan UCHAR_T *p1; 811169695Skan UCHAR_T *p = start; 812169695Skan UCHAR_T *pend = end; 813169695Skan 814169695Skan if (start == NULL) 815169695Skan { 816169695Skan printf ("(null)\n"); 817169695Skan return; 818169695Skan } 819169695Skan 820169695Skan /* Loop over pattern commands. */ 821169695Skan while (p < pend) 822169695Skan { 823169695Skan# ifdef _LIBC 824169695Skan printf ("%td:\t", p - start); 825169695Skan# else 826169695Skan printf ("%ld:\t", (long int) (p - start)); 827169695Skan# endif 828169695Skan 829169695Skan switch ((re_opcode_t) *p++) 830169695Skan { 831169695Skan case no_op: 832169695Skan printf ("/no_op"); 833169695Skan break; 834169695Skan 835169695Skan case exactn: 836169695Skan mcnt = *p++; 837169695Skan printf ("/exactn/%d", mcnt); 838169695Skan do 839169695Skan { 840169695Skan putchar ('/'); 841169695Skan PUT_CHAR (*p++); 842169695Skan } 843169695Skan while (--mcnt); 844169695Skan break; 845169695Skan 846169695Skan# ifdef MBS_SUPPORT 847169695Skan case exactn_bin: 848169695Skan mcnt = *p++; 849169695Skan printf ("/exactn_bin/%d", mcnt); 850169695Skan do 851169695Skan { 852169695Skan printf("/%lx", (long int) *p++); 853169695Skan } 854169695Skan while (--mcnt); 855169695Skan break; 856169695Skan# endif /* MBS_SUPPORT */ 857169695Skan 858169695Skan case start_memory: 859169695Skan mcnt = *p++; 860169695Skan printf ("/start_memory/%d/%ld", mcnt, (long int) *p++); 861169695Skan break; 862169695Skan 863169695Skan case stop_memory: 864169695Skan mcnt = *p++; 865169695Skan printf ("/stop_memory/%d/%ld", mcnt, (long int) *p++); 866169695Skan break; 867169695Skan 868169695Skan case duplicate: 869169695Skan printf ("/duplicate/%ld", (long int) *p++); 870169695Skan break; 871169695Skan 872169695Skan case anychar: 873169695Skan printf ("/anychar"); 874169695Skan break; 875169695Skan 876169695Skan case charset: 877169695Skan case charset_not: 878169695Skan { 879169695Skan# ifdef WCHAR 880169695Skan int i, length; 881169695Skan wchar_t *workp = p; 882169695Skan printf ("/charset [%s", 883169695Skan (re_opcode_t) *(workp - 1) == charset_not ? "^" : ""); 884169695Skan p += 5; 885169695Skan length = *workp++; /* the length of char_classes */ 886169695Skan for (i=0 ; i<length ; i++) 887169695Skan printf("[:%lx:]", (long int) *p++); 888169695Skan length = *workp++; /* the length of collating_symbol */ 889169695Skan for (i=0 ; i<length ;) 890169695Skan { 891169695Skan printf("[."); 892169695Skan while(*p != 0) 893169695Skan PUT_CHAR((i++,*p++)); 894169695Skan i++,p++; 895169695Skan printf(".]"); 896169695Skan } 897169695Skan length = *workp++; /* the length of equivalence_class */ 898169695Skan for (i=0 ; i<length ;) 899169695Skan { 900169695Skan printf("[="); 901169695Skan while(*p != 0) 902169695Skan PUT_CHAR((i++,*p++)); 903169695Skan i++,p++; 904169695Skan printf("=]"); 905169695Skan } 906169695Skan length = *workp++; /* the length of char_range */ 907169695Skan for (i=0 ; i<length ; i++) 908169695Skan { 909169695Skan wchar_t range_start = *p++; 910169695Skan wchar_t range_end = *p++; 911169695Skan printf("%C-%C", range_start, range_end); 912169695Skan } 913169695Skan length = *workp++; /* the length of char */ 914169695Skan for (i=0 ; i<length ; i++) 915169695Skan printf("%C", *p++); 916169695Skan putchar (']'); 917169695Skan# else 918169695Skan register int c, last = -100; 919169695Skan register int in_range = 0; 920169695Skan 921169695Skan printf ("/charset [%s", 922169695Skan (re_opcode_t) *(p - 1) == charset_not ? "^" : ""); 923169695Skan 924169695Skan assert (p + *p < pend); 925169695Skan 926169695Skan for (c = 0; c < 256; c++) 927169695Skan if (c / 8 < *p 928169695Skan && (p[1 + (c/8)] & (1 << (c % 8)))) 929169695Skan { 930169695Skan /* Are we starting a range? */ 931169695Skan if (last + 1 == c && ! in_range) 932169695Skan { 933169695Skan putchar ('-'); 934169695Skan in_range = 1; 935169695Skan } 936169695Skan /* Have we broken a range? */ 937169695Skan else if (last + 1 != c && in_range) 938169695Skan { 939169695Skan putchar (last); 940169695Skan in_range = 0; 941169695Skan } 942169695Skan 943169695Skan if (! in_range) 944169695Skan putchar (c); 945169695Skan 946169695Skan last = c; 947169695Skan } 948169695Skan 949169695Skan if (in_range) 950169695Skan putchar (last); 951169695Skan 952169695Skan putchar (']'); 953169695Skan 954169695Skan p += 1 + *p; 955169695Skan# endif /* WCHAR */ 956169695Skan } 957169695Skan break; 958169695Skan 959169695Skan case begline: 960169695Skan printf ("/begline"); 961169695Skan break; 962169695Skan 963169695Skan case endline: 964169695Skan printf ("/endline"); 965169695Skan break; 966169695Skan 967169695Skan case on_failure_jump: 968169695Skan PREFIX(extract_number_and_incr) (&mcnt, &p); 969169695Skan# ifdef _LIBC 970169695Skan printf ("/on_failure_jump to %td", p + mcnt - start); 971169695Skan# else 972169695Skan printf ("/on_failure_jump to %ld", (long int) (p + mcnt - start)); 973169695Skan# endif 974169695Skan break; 975169695Skan 976169695Skan case on_failure_keep_string_jump: 977169695Skan PREFIX(extract_number_and_incr) (&mcnt, &p); 978169695Skan# ifdef _LIBC 979169695Skan printf ("/on_failure_keep_string_jump to %td", p + mcnt - start); 980169695Skan# else 981169695Skan printf ("/on_failure_keep_string_jump to %ld", 982169695Skan (long int) (p + mcnt - start)); 983169695Skan# endif 984169695Skan break; 985169695Skan 986169695Skan case dummy_failure_jump: 987169695Skan PREFIX(extract_number_and_incr) (&mcnt, &p); 988169695Skan# ifdef _LIBC 989169695Skan printf ("/dummy_failure_jump to %td", p + mcnt - start); 990169695Skan# else 991169695Skan printf ("/dummy_failure_jump to %ld", (long int) (p + mcnt - start)); 992169695Skan# endif 993169695Skan break; 994169695Skan 995169695Skan case push_dummy_failure: 996169695Skan printf ("/push_dummy_failure"); 997169695Skan break; 998169695Skan 999169695Skan case maybe_pop_jump: 1000169695Skan PREFIX(extract_number_and_incr) (&mcnt, &p); 1001169695Skan# ifdef _LIBC 1002169695Skan printf ("/maybe_pop_jump to %td", p + mcnt - start); 1003169695Skan# else 1004169695Skan printf ("/maybe_pop_jump to %ld", (long int) (p + mcnt - start)); 1005169695Skan# endif 1006169695Skan break; 1007169695Skan 1008169695Skan case pop_failure_jump: 1009169695Skan PREFIX(extract_number_and_incr) (&mcnt, &p); 1010169695Skan# ifdef _LIBC 1011169695Skan printf ("/pop_failure_jump to %td", p + mcnt - start); 1012169695Skan# else 1013169695Skan printf ("/pop_failure_jump to %ld", (long int) (p + mcnt - start)); 1014169695Skan# endif 1015169695Skan break; 1016169695Skan 1017169695Skan case jump_past_alt: 1018169695Skan PREFIX(extract_number_and_incr) (&mcnt, &p); 1019169695Skan# ifdef _LIBC 1020169695Skan printf ("/jump_past_alt to %td", p + mcnt - start); 1021169695Skan# else 1022169695Skan printf ("/jump_past_alt to %ld", (long int) (p + mcnt - start)); 1023169695Skan# endif 1024169695Skan break; 1025169695Skan 1026169695Skan case jump: 1027169695Skan PREFIX(extract_number_and_incr) (&mcnt, &p); 1028169695Skan# ifdef _LIBC 1029169695Skan printf ("/jump to %td", p + mcnt - start); 1030169695Skan# else 1031169695Skan printf ("/jump to %ld", (long int) (p + mcnt - start)); 1032169695Skan# endif 1033169695Skan break; 1034169695Skan 1035169695Skan case succeed_n: 1036169695Skan PREFIX(extract_number_and_incr) (&mcnt, &p); 1037169695Skan p1 = p + mcnt; 1038169695Skan PREFIX(extract_number_and_incr) (&mcnt2, &p); 1039169695Skan# ifdef _LIBC 1040169695Skan printf ("/succeed_n to %td, %d times", p1 - start, mcnt2); 1041169695Skan# else 1042169695Skan printf ("/succeed_n to %ld, %d times", 1043169695Skan (long int) (p1 - start), mcnt2); 1044169695Skan# endif 1045169695Skan break; 1046169695Skan 1047169695Skan case jump_n: 1048169695Skan PREFIX(extract_number_and_incr) (&mcnt, &p); 1049169695Skan p1 = p + mcnt; 1050169695Skan PREFIX(extract_number_and_incr) (&mcnt2, &p); 1051169695Skan printf ("/jump_n to %d, %d times", p1 - start, mcnt2); 1052169695Skan break; 1053169695Skan 1054169695Skan case set_number_at: 1055169695Skan PREFIX(extract_number_and_incr) (&mcnt, &p); 1056169695Skan p1 = p + mcnt; 1057169695Skan PREFIX(extract_number_and_incr) (&mcnt2, &p); 1058169695Skan# ifdef _LIBC 1059169695Skan printf ("/set_number_at location %td to %d", p1 - start, mcnt2); 1060169695Skan# else 1061169695Skan printf ("/set_number_at location %ld to %d", 1062169695Skan (long int) (p1 - start), mcnt2); 1063169695Skan# endif 1064169695Skan break; 1065169695Skan 1066169695Skan case wordbound: 1067169695Skan printf ("/wordbound"); 1068169695Skan break; 1069169695Skan 1070169695Skan case notwordbound: 1071169695Skan printf ("/notwordbound"); 1072169695Skan break; 1073169695Skan 1074169695Skan case wordbeg: 1075169695Skan printf ("/wordbeg"); 1076169695Skan break; 1077169695Skan 1078169695Skan case wordend: 1079169695Skan printf ("/wordend"); 1080169695Skan break; 1081169695Skan 1082169695Skan# ifdef emacs 1083169695Skan case before_dot: 1084169695Skan printf ("/before_dot"); 1085169695Skan break; 1086169695Skan 1087169695Skan case at_dot: 1088169695Skan printf ("/at_dot"); 1089169695Skan break; 1090169695Skan 1091169695Skan case after_dot: 1092169695Skan printf ("/after_dot"); 1093169695Skan break; 1094169695Skan 1095169695Skan case syntaxspec: 1096169695Skan printf ("/syntaxspec"); 1097169695Skan mcnt = *p++; 1098169695Skan printf ("/%d", mcnt); 1099169695Skan break; 1100169695Skan 1101169695Skan case notsyntaxspec: 1102169695Skan printf ("/notsyntaxspec"); 1103169695Skan mcnt = *p++; 1104169695Skan printf ("/%d", mcnt); 1105169695Skan break; 1106169695Skan# endif /* emacs */ 1107169695Skan 1108169695Skan case wordchar: 1109169695Skan printf ("/wordchar"); 1110169695Skan break; 1111169695Skan 1112169695Skan case notwordchar: 1113169695Skan printf ("/notwordchar"); 1114169695Skan break; 1115169695Skan 1116169695Skan case begbuf: 1117169695Skan printf ("/begbuf"); 1118169695Skan break; 1119169695Skan 1120169695Skan case endbuf: 1121169695Skan printf ("/endbuf"); 1122169695Skan break; 1123169695Skan 1124169695Skan default: 1125169695Skan printf ("?%ld", (long int) *(p-1)); 1126169695Skan } 1127169695Skan 1128169695Skan putchar ('\n'); 1129169695Skan } 1130169695Skan 1131169695Skan# ifdef _LIBC 1132169695Skan printf ("%td:\tend of pattern.\n", p - start); 1133169695Skan# else 1134169695Skan printf ("%ld:\tend of pattern.\n", (long int) (p - start)); 1135169695Skan# endif 1136169695Skan} 1137169695Skan 1138169695Skan 1139169695Skanvoid 1140169695SkanPREFIX(print_compiled_pattern) (struct re_pattern_buffer *bufp) 1141169695Skan{ 1142169695Skan UCHAR_T *buffer = (UCHAR_T*) bufp->buffer; 1143169695Skan 1144169695Skan PREFIX(print_partial_compiled_pattern) (buffer, buffer 1145169695Skan + bufp->used / sizeof(UCHAR_T)); 1146169695Skan printf ("%ld bytes used/%ld bytes allocated.\n", 1147169695Skan bufp->used, bufp->allocated); 1148169695Skan 1149169695Skan if (bufp->fastmap_accurate && bufp->fastmap) 1150169695Skan { 1151169695Skan printf ("fastmap: "); 1152169695Skan print_fastmap (bufp->fastmap); 1153169695Skan } 1154169695Skan 1155169695Skan# ifdef _LIBC 1156169695Skan printf ("re_nsub: %Zd\t", bufp->re_nsub); 1157169695Skan# else 1158169695Skan printf ("re_nsub: %ld\t", (long int) bufp->re_nsub); 1159169695Skan# endif 1160169695Skan printf ("regs_alloc: %d\t", bufp->regs_allocated); 1161169695Skan printf ("can_be_null: %d\t", bufp->can_be_null); 1162169695Skan printf ("newline_anchor: %d\n", bufp->newline_anchor); 1163169695Skan printf ("no_sub: %d\t", bufp->no_sub); 1164169695Skan printf ("not_bol: %d\t", bufp->not_bol); 1165169695Skan printf ("not_eol: %d\t", bufp->not_eol); 1166169695Skan printf ("syntax: %lx\n", bufp->syntax); 1167169695Skan /* Perhaps we should print the translate table? */ 1168169695Skan} 1169169695Skan 1170169695Skan 1171169695Skanvoid 1172169695SkanPREFIX(print_double_string) (const CHAR_T *where, const CHAR_T *string1, 1173169695Skan int size1, const CHAR_T *string2, int size2) 1174169695Skan{ 1175169695Skan int this_char; 1176169695Skan 1177169695Skan if (where == NULL) 1178169695Skan printf ("(null)"); 1179169695Skan else 1180169695Skan { 1181169695Skan int cnt; 1182169695Skan 1183169695Skan if (FIRST_STRING_P (where)) 1184169695Skan { 1185169695Skan for (this_char = where - string1; this_char < size1; this_char++) 1186169695Skan PUT_CHAR (string1[this_char]); 1187169695Skan 1188169695Skan where = string2; 1189169695Skan } 1190169695Skan 1191169695Skan cnt = 0; 1192169695Skan for (this_char = where - string2; this_char < size2; this_char++) 1193169695Skan { 1194169695Skan PUT_CHAR (string2[this_char]); 1195169695Skan if (++cnt > 100) 1196169695Skan { 1197169695Skan fputs ("...", stdout); 1198169695Skan break; 1199169695Skan } 1200169695Skan } 1201169695Skan } 1202169695Skan} 1203169695Skan 1204169695Skan# ifndef DEFINED_ONCE 1205169695Skanvoid 1206169695Skanprintchar (int c) 1207169695Skan{ 1208169695Skan putc (c, stderr); 1209169695Skan} 1210169695Skan# endif 1211169695Skan 1212169695Skan# else /* not DEBUG */ 1213169695Skan 1214169695Skan# ifndef DEFINED_ONCE 1215169695Skan# undef assert 1216169695Skan# define assert(e) 1217169695Skan 1218169695Skan# define DEBUG_STATEMENT(e) 1219169695Skan# define DEBUG_PRINT1(x) 1220169695Skan# define DEBUG_PRINT2(x1, x2) 1221169695Skan# define DEBUG_PRINT3(x1, x2, x3) 1222169695Skan# define DEBUG_PRINT4(x1, x2, x3, x4) 1223169695Skan# endif /* not DEFINED_ONCE */ 1224169695Skan# define DEBUG_PRINT_COMPILED_PATTERN(p, s, e) 1225169695Skan# define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2) 1226169695Skan 1227169695Skan# endif /* not DEBUG */ 1228169695Skan 1229169695Skan 1230169695Skan 1231169695Skan# ifdef WCHAR 1232169695Skan/* This convert a multibyte string to a wide character string. 1233169695Skan And write their correspondances to offset_buffer(see below) 1234169695Skan and write whether each wchar_t is binary data to is_binary. 1235169695Skan This assume invalid multibyte sequences as binary data. 1236169695Skan We assume offset_buffer and is_binary is already allocated 1237169695Skan enough space. */ 1238169695Skan 1239169695Skanstatic size_t convert_mbs_to_wcs (CHAR_T *dest, const unsigned char* src, 1240169695Skan size_t len, int *offset_buffer, 1241169695Skan char *is_binary); 1242169695Skanstatic size_t 1243169695Skanconvert_mbs_to_wcs (CHAR_T *dest, const unsigned char*src, size_t len, 1244169695Skan int *offset_buffer, char *is_binary) 1245169695Skan /* It hold correspondances between src(char string) and 1246169695Skan dest(wchar_t string) for optimization. 1247169695Skan e.g. src = "xxxyzz" 1248169695Skan dest = {'X', 'Y', 'Z'} 1249169695Skan (each "xxx", "y" and "zz" represent one multibyte character 1250169695Skan corresponding to 'X', 'Y' and 'Z'.) 1251169695Skan offset_buffer = {0, 0+3("xxx"), 0+3+1("y"), 0+3+1+2("zz")} 1252169695Skan = {0, 3, 4, 6} 1253169695Skan */ 1254169695Skan{ 1255169695Skan wchar_t *pdest = dest; 1256169695Skan const unsigned char *psrc = src; 1257169695Skan size_t wc_count = 0; 1258169695Skan 1259169695Skan mbstate_t mbs; 1260169695Skan int i, consumed; 1261169695Skan size_t mb_remain = len; 1262169695Skan size_t mb_count = 0; 1263169695Skan 1264169695Skan /* Initialize the conversion state. */ 1265169695Skan memset (&mbs, 0, sizeof (mbstate_t)); 1266169695Skan 1267169695Skan offset_buffer[0] = 0; 1268169695Skan for( ; mb_remain > 0 ; ++wc_count, ++pdest, mb_remain -= consumed, 1269169695Skan psrc += consumed) 1270169695Skan { 1271169695Skan#ifdef _LIBC 1272169695Skan consumed = __mbrtowc (pdest, psrc, mb_remain, &mbs); 1273169695Skan#else 1274169695Skan consumed = mbrtowc (pdest, psrc, mb_remain, &mbs); 1275169695Skan#endif 1276169695Skan 1277169695Skan if (consumed <= 0) 1278169695Skan /* failed to convert. maybe src contains binary data. 1279169695Skan So we consume 1 byte manualy. */ 1280169695Skan { 1281169695Skan *pdest = *psrc; 1282169695Skan consumed = 1; 1283169695Skan is_binary[wc_count] = TRUE; 1284169695Skan } 1285169695Skan else 1286169695Skan is_binary[wc_count] = FALSE; 1287169695Skan /* In sjis encoding, we use yen sign as escape character in 1288169695Skan place of reverse solidus. So we convert 0x5c(yen sign in 1289169695Skan sjis) to not 0xa5(yen sign in UCS2) but 0x5c(reverse 1290169695Skan solidus in UCS2). */ 1291169695Skan if (consumed == 1 && (int) *psrc == 0x5c && (int) *pdest == 0xa5) 1292169695Skan *pdest = (wchar_t) *psrc; 1293169695Skan 1294169695Skan offset_buffer[wc_count + 1] = mb_count += consumed; 1295169695Skan } 1296169695Skan 1297169695Skan /* Fill remain of the buffer with sentinel. */ 1298169695Skan for (i = wc_count + 1 ; i <= len ; i++) 1299169695Skan offset_buffer[i] = mb_count + 1; 1300169695Skan 1301169695Skan return wc_count; 1302169695Skan} 1303169695Skan 1304169695Skan# endif /* WCHAR */ 1305169695Skan 1306169695Skan#else /* not INSIDE_RECURSION */ 1307169695Skan 1308169695Skan/* Set by `re_set_syntax' to the current regexp syntax to recognize. Can 1309169695Skan also be assigned to arbitrarily: each pattern buffer stores its own 1310169695Skan syntax, so it can be changed between regex compilations. */ 1311169695Skan/* This has no initializer because initialized variables in Emacs 1312169695Skan become read-only after dumping. */ 1313169695Skanreg_syntax_t re_syntax_options; 1314169695Skan 1315169695Skan 1316169695Skan/* Specify the precise syntax of regexps for compilation. This provides 1317169695Skan for compatibility for various utilities which historically have 1318169695Skan different, incompatible syntaxes. 1319169695Skan 1320169695Skan The argument SYNTAX is a bit mask comprised of the various bits 1321169695Skan defined in regex.h. We return the old syntax. */ 1322169695Skan 1323169695Skanreg_syntax_t 1324169695Skanre_set_syntax (reg_syntax_t syntax) 1325169695Skan{ 1326169695Skan reg_syntax_t ret = re_syntax_options; 1327169695Skan 1328169695Skan re_syntax_options = syntax; 1329169695Skan# ifdef DEBUG 1330169695Skan if (syntax & RE_DEBUG) 1331169695Skan debug = 1; 1332169695Skan else if (debug) /* was on but now is not */ 1333169695Skan debug = 0; 1334169695Skan# endif /* DEBUG */ 1335169695Skan return ret; 1336169695Skan} 1337169695Skan# ifdef _LIBC 1338169695Skanweak_alias (__re_set_syntax, re_set_syntax) 1339169695Skan# endif 1340169695Skan 1341169695Skan/* This table gives an error message for each of the error codes listed 1342169695Skan in regex.h. Obviously the order here has to be same as there. 1343169695Skan POSIX doesn't require that we do anything for REG_NOERROR, 1344169695Skan but why not be nice? */ 1345169695Skan 1346169695Skanstatic const char *re_error_msgid[] = 1347169695Skan { 1348169695Skan gettext_noop ("Success"), /* REG_NOERROR */ 1349169695Skan gettext_noop ("No match"), /* REG_NOMATCH */ 1350169695Skan gettext_noop ("Invalid regular expression"), /* REG_BADPAT */ 1351169695Skan gettext_noop ("Invalid collation character"), /* REG_ECOLLATE */ 1352169695Skan gettext_noop ("Invalid character class name"), /* REG_ECTYPE */ 1353169695Skan gettext_noop ("Trailing backslash"), /* REG_EESCAPE */ 1354169695Skan gettext_noop ("Invalid back reference"), /* REG_ESUBREG */ 1355169695Skan gettext_noop ("Unmatched [ or [^"), /* REG_EBRACK */ 1356169695Skan gettext_noop ("Unmatched ( or \\("), /* REG_EPAREN */ 1357169695Skan gettext_noop ("Unmatched \\{"), /* REG_EBRACE */ 1358169695Skan gettext_noop ("Invalid content of \\{\\}"), /* REG_BADBR */ 1359169695Skan gettext_noop ("Invalid range end"), /* REG_ERANGE */ 1360169695Skan gettext_noop ("Memory exhausted"), /* REG_ESPACE */ 1361169695Skan gettext_noop ("Invalid preceding regular expression"), /* REG_BADRPT */ 1362169695Skan gettext_noop ("Premature end of regular expression"), /* REG_EEND */ 1363169695Skan gettext_noop ("Regular expression too big"), /* REG_ESIZE */ 1364169695Skan gettext_noop ("Unmatched ) or \\)") /* REG_ERPAREN */ 1365169695Skan }; 1366169695Skan 1367169695Skan#endif /* INSIDE_RECURSION */ 1368169695Skan 1369169695Skan#ifndef DEFINED_ONCE 1370169695Skan/* Avoiding alloca during matching, to placate r_alloc. */ 1371169695Skan 1372169695Skan/* Define MATCH_MAY_ALLOCATE unless we need to make sure that the 1373169695Skan searching and matching functions should not call alloca. On some 1374169695Skan systems, alloca is implemented in terms of malloc, and if we're 1375169695Skan using the relocating allocator routines, then malloc could cause a 1376169695Skan relocation, which might (if the strings being searched are in the 1377169695Skan ralloc heap) shift the data out from underneath the regexp 1378169695Skan routines. 1379169695Skan 1380169695Skan Here's another reason to avoid allocation: Emacs 1381169695Skan processes input from X in a signal handler; processing X input may 1382169695Skan call malloc; if input arrives while a matching routine is calling 1383169695Skan malloc, then we're scrod. But Emacs can't just block input while 1384169695Skan calling matching routines; then we don't notice interrupts when 1385169695Skan they come in. So, Emacs blocks input around all regexp calls 1386169695Skan except the matching calls, which it leaves unprotected, in the 1387169695Skan faith that they will not malloc. */ 1388169695Skan 1389169695Skan/* Normally, this is fine. */ 1390169695Skan# define MATCH_MAY_ALLOCATE 1391169695Skan 1392169695Skan/* When using GNU C, we are not REALLY using the C alloca, no matter 1393169695Skan what config.h may say. So don't take precautions for it. */ 1394169695Skan# ifdef __GNUC__ 1395169695Skan# undef C_ALLOCA 1396169695Skan# endif 1397169695Skan 1398169695Skan/* The match routines may not allocate if (1) they would do it with malloc 1399169695Skan and (2) it's not safe for them to use malloc. 1400169695Skan Note that if REL_ALLOC is defined, matching would not use malloc for the 1401169695Skan failure stack, but we would still use it for the register vectors; 1402169695Skan so REL_ALLOC should not affect this. */ 1403169695Skan# if (defined C_ALLOCA || defined REGEX_MALLOC) && defined emacs 1404169695Skan# undef MATCH_MAY_ALLOCATE 1405169695Skan# endif 1406169695Skan#endif /* not DEFINED_ONCE */ 1407169695Skan 1408169695Skan#ifdef INSIDE_RECURSION 1409169695Skan/* Failure stack declarations and macros; both re_compile_fastmap and 1410169695Skan re_match_2 use a failure stack. These have to be macros because of 1411169695Skan REGEX_ALLOCATE_STACK. */ 1412169695Skan 1413169695Skan 1414169695Skan/* Number of failure points for which to initially allocate space 1415169695Skan when matching. If this number is exceeded, we allocate more 1416169695Skan space, so it is not a hard limit. */ 1417169695Skan# ifndef INIT_FAILURE_ALLOC 1418169695Skan# define INIT_FAILURE_ALLOC 5 1419169695Skan# endif 1420169695Skan 1421169695Skan/* Roughly the maximum number of failure points on the stack. Would be 1422169695Skan exactly that if always used MAX_FAILURE_ITEMS items each time we failed. 1423169695Skan This is a variable only so users of regex can assign to it; we never 1424169695Skan change it ourselves. */ 1425169695Skan 1426169695Skan# ifdef INT_IS_16BIT 1427169695Skan 1428169695Skan# ifndef DEFINED_ONCE 1429169695Skan# if defined MATCH_MAY_ALLOCATE 1430169695Skan/* 4400 was enough to cause a crash on Alpha OSF/1, 1431169695Skan whose default stack limit is 2mb. */ 1432169695Skanlong int re_max_failures = 4000; 1433169695Skan# else 1434169695Skanlong int re_max_failures = 2000; 1435169695Skan# endif 1436169695Skan# endif 1437169695Skan 1438169695Skanunion PREFIX(fail_stack_elt) 1439169695Skan{ 1440169695Skan UCHAR_T *pointer; 1441169695Skan long int integer; 1442169695Skan}; 1443169695Skan 1444169695Skantypedef union PREFIX(fail_stack_elt) PREFIX(fail_stack_elt_t); 1445169695Skan 1446169695Skantypedef struct 1447169695Skan{ 1448169695Skan PREFIX(fail_stack_elt_t) *stack; 1449169695Skan unsigned long int size; 1450169695Skan unsigned long int avail; /* Offset of next open position. */ 1451169695Skan} PREFIX(fail_stack_type); 1452169695Skan 1453169695Skan# else /* not INT_IS_16BIT */ 1454169695Skan 1455169695Skan# ifndef DEFINED_ONCE 1456169695Skan# if defined MATCH_MAY_ALLOCATE 1457169695Skan/* 4400 was enough to cause a crash on Alpha OSF/1, 1458169695Skan whose default stack limit is 2mb. */ 1459169695Skanint re_max_failures = 4000; 1460169695Skan# else 1461169695Skanint re_max_failures = 2000; 1462169695Skan# endif 1463169695Skan# endif 1464169695Skan 1465169695Skanunion PREFIX(fail_stack_elt) 1466169695Skan{ 1467169695Skan UCHAR_T *pointer; 1468169695Skan int integer; 1469169695Skan}; 1470169695Skan 1471169695Skantypedef union PREFIX(fail_stack_elt) PREFIX(fail_stack_elt_t); 1472169695Skan 1473169695Skantypedef struct 1474169695Skan{ 1475169695Skan PREFIX(fail_stack_elt_t) *stack; 1476169695Skan unsigned size; 1477169695Skan unsigned avail; /* Offset of next open position. */ 1478169695Skan} PREFIX(fail_stack_type); 1479169695Skan 1480169695Skan# endif /* INT_IS_16BIT */ 1481169695Skan 1482169695Skan# ifndef DEFINED_ONCE 1483169695Skan# define FAIL_STACK_EMPTY() (fail_stack.avail == 0) 1484169695Skan# define FAIL_STACK_PTR_EMPTY() (fail_stack_ptr->avail == 0) 1485169695Skan# define FAIL_STACK_FULL() (fail_stack.avail == fail_stack.size) 1486169695Skan# endif 1487169695Skan 1488169695Skan 1489169695Skan/* Define macros to initialize and free the failure stack. 1490169695Skan Do `return -2' if the alloc fails. */ 1491169695Skan 1492169695Skan# ifdef MATCH_MAY_ALLOCATE 1493169695Skan# define INIT_FAIL_STACK() \ 1494169695Skan do { \ 1495169695Skan fail_stack.stack = (PREFIX(fail_stack_elt_t) *) \ 1496169695Skan REGEX_ALLOCATE_STACK (INIT_FAILURE_ALLOC * sizeof (PREFIX(fail_stack_elt_t))); \ 1497169695Skan \ 1498169695Skan if (fail_stack.stack == NULL) \ 1499169695Skan return -2; \ 1500169695Skan \ 1501169695Skan fail_stack.size = INIT_FAILURE_ALLOC; \ 1502169695Skan fail_stack.avail = 0; \ 1503169695Skan } while (0) 1504169695Skan 1505169695Skan# define RESET_FAIL_STACK() REGEX_FREE_STACK (fail_stack.stack) 1506169695Skan# else 1507169695Skan# define INIT_FAIL_STACK() \ 1508169695Skan do { \ 1509169695Skan fail_stack.avail = 0; \ 1510169695Skan } while (0) 1511169695Skan 1512169695Skan# define RESET_FAIL_STACK() 1513169695Skan# endif 1514169695Skan 1515169695Skan 1516169695Skan/* Double the size of FAIL_STACK, up to approximately `re_max_failures' items. 1517169695Skan 1518169695Skan Return 1 if succeeds, and 0 if either ran out of memory 1519169695Skan allocating space for it or it was already too large. 1520169695Skan 1521169695Skan REGEX_REALLOCATE_STACK requires `destination' be declared. */ 1522169695Skan 1523169695Skan# define DOUBLE_FAIL_STACK(fail_stack) \ 1524169695Skan ((fail_stack).size > (unsigned) (re_max_failures * MAX_FAILURE_ITEMS) \ 1525169695Skan ? 0 \ 1526169695Skan : ((fail_stack).stack = (PREFIX(fail_stack_elt_t) *) \ 1527169695Skan REGEX_REALLOCATE_STACK ((fail_stack).stack, \ 1528169695Skan (fail_stack).size * sizeof (PREFIX(fail_stack_elt_t)), \ 1529169695Skan ((fail_stack).size << 1) * sizeof (PREFIX(fail_stack_elt_t))),\ 1530169695Skan \ 1531169695Skan (fail_stack).stack == NULL \ 1532169695Skan ? 0 \ 1533169695Skan : ((fail_stack).size <<= 1, \ 1534169695Skan 1))) 1535169695Skan 1536169695Skan 1537169695Skan/* Push pointer POINTER on FAIL_STACK. 1538169695Skan Return 1 if was able to do so and 0 if ran out of memory allocating 1539169695Skan space to do so. */ 1540169695Skan# define PUSH_PATTERN_OP(POINTER, FAIL_STACK) \ 1541169695Skan ((FAIL_STACK_FULL () \ 1542169695Skan && !DOUBLE_FAIL_STACK (FAIL_STACK)) \ 1543169695Skan ? 0 \ 1544169695Skan : ((FAIL_STACK).stack[(FAIL_STACK).avail++].pointer = POINTER, \ 1545169695Skan 1)) 1546169695Skan 1547169695Skan/* Push a pointer value onto the failure stack. 1548169695Skan Assumes the variable `fail_stack'. Probably should only 1549169695Skan be called from within `PUSH_FAILURE_POINT'. */ 1550169695Skan# define PUSH_FAILURE_POINTER(item) \ 1551169695Skan fail_stack.stack[fail_stack.avail++].pointer = (UCHAR_T *) (item) 1552169695Skan 1553169695Skan/* This pushes an integer-valued item onto the failure stack. 1554169695Skan Assumes the variable `fail_stack'. Probably should only 1555169695Skan be called from within `PUSH_FAILURE_POINT'. */ 1556169695Skan# define PUSH_FAILURE_INT(item) \ 1557169695Skan fail_stack.stack[fail_stack.avail++].integer = (item) 1558169695Skan 1559169695Skan/* Push a fail_stack_elt_t value onto the failure stack. 1560169695Skan Assumes the variable `fail_stack'. Probably should only 1561169695Skan be called from within `PUSH_FAILURE_POINT'. */ 1562169695Skan# define PUSH_FAILURE_ELT(item) \ 1563169695Skan fail_stack.stack[fail_stack.avail++] = (item) 1564169695Skan 1565169695Skan/* These three POP... operations complement the three PUSH... operations. 1566169695Skan All assume that `fail_stack' is nonempty. */ 1567169695Skan# define POP_FAILURE_POINTER() fail_stack.stack[--fail_stack.avail].pointer 1568169695Skan# define POP_FAILURE_INT() fail_stack.stack[--fail_stack.avail].integer 1569169695Skan# define POP_FAILURE_ELT() fail_stack.stack[--fail_stack.avail] 1570169695Skan 1571169695Skan/* Used to omit pushing failure point id's when we're not debugging. */ 1572169695Skan# ifdef DEBUG 1573169695Skan# define DEBUG_PUSH PUSH_FAILURE_INT 1574169695Skan# define DEBUG_POP(item_addr) *(item_addr) = POP_FAILURE_INT () 1575169695Skan# else 1576169695Skan# define DEBUG_PUSH(item) 1577169695Skan# define DEBUG_POP(item_addr) 1578169695Skan# endif 1579169695Skan 1580169695Skan 1581169695Skan/* Push the information about the state we will need 1582169695Skan if we ever fail back to it. 1583169695Skan 1584169695Skan Requires variables fail_stack, regstart, regend, reg_info, and 1585169695Skan num_regs_pushed be declared. DOUBLE_FAIL_STACK requires `destination' 1586169695Skan be declared. 1587169695Skan 1588169695Skan Does `return FAILURE_CODE' if runs out of memory. */ 1589169695Skan 1590169695Skan# define PUSH_FAILURE_POINT(pattern_place, string_place, failure_code) \ 1591169695Skan do { \ 1592169695Skan char *destination; \ 1593169695Skan /* Must be int, so when we don't save any registers, the arithmetic \ 1594169695Skan of 0 + -1 isn't done as unsigned. */ \ 1595169695Skan /* Can't be int, since there is not a shred of a guarantee that int \ 1596169695Skan is wide enough to hold a value of something to which pointer can \ 1597169695Skan be assigned */ \ 1598169695Skan active_reg_t this_reg; \ 1599169695Skan \ 1600169695Skan DEBUG_STATEMENT (failure_id++); \ 1601169695Skan DEBUG_STATEMENT (nfailure_points_pushed++); \ 1602169695Skan DEBUG_PRINT2 ("\nPUSH_FAILURE_POINT #%u:\n", failure_id); \ 1603169695Skan DEBUG_PRINT2 (" Before push, next avail: %d\n", (fail_stack).avail);\ 1604169695Skan DEBUG_PRINT2 (" size: %d\n", (fail_stack).size);\ 1605169695Skan \ 1606169695Skan DEBUG_PRINT2 (" slots needed: %ld\n", NUM_FAILURE_ITEMS); \ 1607169695Skan DEBUG_PRINT2 (" available: %d\n", REMAINING_AVAIL_SLOTS); \ 1608169695Skan \ 1609169695Skan /* Ensure we have enough space allocated for what we will push. */ \ 1610169695Skan while (REMAINING_AVAIL_SLOTS < NUM_FAILURE_ITEMS) \ 1611169695Skan { \ 1612169695Skan if (!DOUBLE_FAIL_STACK (fail_stack)) \ 1613169695Skan return failure_code; \ 1614169695Skan \ 1615169695Skan DEBUG_PRINT2 ("\n Doubled stack; size now: %d\n", \ 1616169695Skan (fail_stack).size); \ 1617169695Skan DEBUG_PRINT2 (" slots available: %d\n", REMAINING_AVAIL_SLOTS);\ 1618169695Skan } \ 1619169695Skan \ 1620169695Skan /* Push the info, starting with the registers. */ \ 1621169695Skan DEBUG_PRINT1 ("\n"); \ 1622169695Skan \ 1623169695Skan if (1) \ 1624169695Skan for (this_reg = lowest_active_reg; this_reg <= highest_active_reg; \ 1625169695Skan this_reg++) \ 1626169695Skan { \ 1627169695Skan DEBUG_PRINT2 (" Pushing reg: %lu\n", this_reg); \ 1628169695Skan DEBUG_STATEMENT (num_regs_pushed++); \ 1629169695Skan \ 1630169695Skan DEBUG_PRINT2 (" start: %p\n", regstart[this_reg]); \ 1631169695Skan PUSH_FAILURE_POINTER (regstart[this_reg]); \ 1632169695Skan \ 1633169695Skan DEBUG_PRINT2 (" end: %p\n", regend[this_reg]); \ 1634169695Skan PUSH_FAILURE_POINTER (regend[this_reg]); \ 1635169695Skan \ 1636169695Skan DEBUG_PRINT2 (" info: %p\n ", \ 1637169695Skan reg_info[this_reg].word.pointer); \ 1638169695Skan DEBUG_PRINT2 (" match_null=%d", \ 1639169695Skan REG_MATCH_NULL_STRING_P (reg_info[this_reg])); \ 1640169695Skan DEBUG_PRINT2 (" active=%d", IS_ACTIVE (reg_info[this_reg])); \ 1641169695Skan DEBUG_PRINT2 (" matched_something=%d", \ 1642169695Skan MATCHED_SOMETHING (reg_info[this_reg])); \ 1643169695Skan DEBUG_PRINT2 (" ever_matched=%d", \ 1644169695Skan EVER_MATCHED_SOMETHING (reg_info[this_reg])); \ 1645169695Skan DEBUG_PRINT1 ("\n"); \ 1646169695Skan PUSH_FAILURE_ELT (reg_info[this_reg].word); \ 1647169695Skan } \ 1648169695Skan \ 1649169695Skan DEBUG_PRINT2 (" Pushing low active reg: %ld\n", lowest_active_reg);\ 1650169695Skan PUSH_FAILURE_INT (lowest_active_reg); \ 1651169695Skan \ 1652169695Skan DEBUG_PRINT2 (" Pushing high active reg: %ld\n", highest_active_reg);\ 1653169695Skan PUSH_FAILURE_INT (highest_active_reg); \ 1654169695Skan \ 1655169695Skan DEBUG_PRINT2 (" Pushing pattern %p:\n", pattern_place); \ 1656169695Skan DEBUG_PRINT_COMPILED_PATTERN (bufp, pattern_place, pend); \ 1657169695Skan PUSH_FAILURE_POINTER (pattern_place); \ 1658169695Skan \ 1659169695Skan DEBUG_PRINT2 (" Pushing string %p: `", string_place); \ 1660169695Skan DEBUG_PRINT_DOUBLE_STRING (string_place, string1, size1, string2, \ 1661169695Skan size2); \ 1662169695Skan DEBUG_PRINT1 ("'\n"); \ 1663169695Skan PUSH_FAILURE_POINTER (string_place); \ 1664169695Skan \ 1665169695Skan DEBUG_PRINT2 (" Pushing failure id: %u\n", failure_id); \ 1666169695Skan DEBUG_PUSH (failure_id); \ 1667169695Skan } while (0) 1668169695Skan 1669169695Skan# ifndef DEFINED_ONCE 1670169695Skan/* This is the number of items that are pushed and popped on the stack 1671169695Skan for each register. */ 1672169695Skan# define NUM_REG_ITEMS 3 1673169695Skan 1674169695Skan/* Individual items aside from the registers. */ 1675169695Skan# ifdef DEBUG 1676169695Skan# define NUM_NONREG_ITEMS 5 /* Includes failure point id. */ 1677169695Skan# else 1678169695Skan# define NUM_NONREG_ITEMS 4 1679169695Skan# endif 1680169695Skan 1681169695Skan/* We push at most this many items on the stack. */ 1682169695Skan/* We used to use (num_regs - 1), which is the number of registers 1683169695Skan this regexp will save; but that was changed to 5 1684169695Skan to avoid stack overflow for a regexp with lots of parens. */ 1685169695Skan# define MAX_FAILURE_ITEMS (5 * NUM_REG_ITEMS + NUM_NONREG_ITEMS) 1686169695Skan 1687169695Skan/* We actually push this many items. */ 1688169695Skan# define NUM_FAILURE_ITEMS \ 1689169695Skan (((0 \ 1690169695Skan ? 0 : highest_active_reg - lowest_active_reg + 1) \ 1691169695Skan * NUM_REG_ITEMS) \ 1692169695Skan + NUM_NONREG_ITEMS) 1693169695Skan 1694169695Skan/* How many items can still be added to the stack without overflowing it. */ 1695169695Skan# define REMAINING_AVAIL_SLOTS ((fail_stack).size - (fail_stack).avail) 1696169695Skan# endif /* not DEFINED_ONCE */ 1697169695Skan 1698169695Skan 1699169695Skan/* Pops what PUSH_FAIL_STACK pushes. 1700169695Skan 1701169695Skan We restore into the parameters, all of which should be lvalues: 1702169695Skan STR -- the saved data position. 1703169695Skan PAT -- the saved pattern position. 1704169695Skan LOW_REG, HIGH_REG -- the highest and lowest active registers. 1705169695Skan REGSTART, REGEND -- arrays of string positions. 1706169695Skan REG_INFO -- array of information about each subexpression. 1707169695Skan 1708169695Skan Also assumes the variables `fail_stack' and (if debugging), `bufp', 1709169695Skan `pend', `string1', `size1', `string2', and `size2'. */ 1710169695Skan# define POP_FAILURE_POINT(str, pat, low_reg, high_reg, regstart, regend, reg_info)\ 1711169695Skan{ \ 1712169695Skan DEBUG_STATEMENT (unsigned failure_id;) \ 1713169695Skan active_reg_t this_reg; \ 1714169695Skan const UCHAR_T *string_temp; \ 1715169695Skan \ 1716169695Skan assert (!FAIL_STACK_EMPTY ()); \ 1717169695Skan \ 1718169695Skan /* Remove failure points and point to how many regs pushed. */ \ 1719169695Skan DEBUG_PRINT1 ("POP_FAILURE_POINT:\n"); \ 1720169695Skan DEBUG_PRINT2 (" Before pop, next avail: %d\n", fail_stack.avail); \ 1721169695Skan DEBUG_PRINT2 (" size: %d\n", fail_stack.size); \ 1722169695Skan \ 1723169695Skan assert (fail_stack.avail >= NUM_NONREG_ITEMS); \ 1724169695Skan \ 1725169695Skan DEBUG_POP (&failure_id); \ 1726169695Skan DEBUG_PRINT2 (" Popping failure id: %u\n", failure_id); \ 1727169695Skan \ 1728169695Skan /* If the saved string location is NULL, it came from an \ 1729169695Skan on_failure_keep_string_jump opcode, and we want to throw away the \ 1730169695Skan saved NULL, thus retaining our current position in the string. */ \ 1731169695Skan string_temp = POP_FAILURE_POINTER (); \ 1732169695Skan if (string_temp != NULL) \ 1733169695Skan str = (const CHAR_T *) string_temp; \ 1734169695Skan \ 1735169695Skan DEBUG_PRINT2 (" Popping string %p: `", str); \ 1736169695Skan DEBUG_PRINT_DOUBLE_STRING (str, string1, size1, string2, size2); \ 1737169695Skan DEBUG_PRINT1 ("'\n"); \ 1738169695Skan \ 1739169695Skan pat = (UCHAR_T *) POP_FAILURE_POINTER (); \ 1740169695Skan DEBUG_PRINT2 (" Popping pattern %p:\n", pat); \ 1741169695Skan DEBUG_PRINT_COMPILED_PATTERN (bufp, pat, pend); \ 1742169695Skan \ 1743169695Skan /* Restore register info. */ \ 1744169695Skan high_reg = (active_reg_t) POP_FAILURE_INT (); \ 1745169695Skan DEBUG_PRINT2 (" Popping high active reg: %ld\n", high_reg); \ 1746169695Skan \ 1747169695Skan low_reg = (active_reg_t) POP_FAILURE_INT (); \ 1748169695Skan DEBUG_PRINT2 (" Popping low active reg: %ld\n", low_reg); \ 1749169695Skan \ 1750169695Skan if (1) \ 1751169695Skan for (this_reg = high_reg; this_reg >= low_reg; this_reg--) \ 1752169695Skan { \ 1753169695Skan DEBUG_PRINT2 (" Popping reg: %ld\n", this_reg); \ 1754169695Skan \ 1755169695Skan reg_info[this_reg].word = POP_FAILURE_ELT (); \ 1756169695Skan DEBUG_PRINT2 (" info: %p\n", \ 1757169695Skan reg_info[this_reg].word.pointer); \ 1758169695Skan \ 1759169695Skan regend[this_reg] = (const CHAR_T *) POP_FAILURE_POINTER (); \ 1760169695Skan DEBUG_PRINT2 (" end: %p\n", regend[this_reg]); \ 1761169695Skan \ 1762169695Skan regstart[this_reg] = (const CHAR_T *) POP_FAILURE_POINTER (); \ 1763169695Skan DEBUG_PRINT2 (" start: %p\n", regstart[this_reg]); \ 1764169695Skan } \ 1765169695Skan else \ 1766169695Skan { \ 1767169695Skan for (this_reg = highest_active_reg; this_reg > high_reg; this_reg--) \ 1768169695Skan { \ 1769169695Skan reg_info[this_reg].word.integer = 0; \ 1770169695Skan regend[this_reg] = 0; \ 1771169695Skan regstart[this_reg] = 0; \ 1772169695Skan } \ 1773169695Skan highest_active_reg = high_reg; \ 1774169695Skan } \ 1775169695Skan \ 1776169695Skan set_regs_matched_done = 0; \ 1777169695Skan DEBUG_STATEMENT (nfailure_points_popped++); \ 1778169695Skan} /* POP_FAILURE_POINT */ 1779169695Skan 1780169695Skan/* Structure for per-register (a.k.a. per-group) information. 1781169695Skan Other register information, such as the 1782169695Skan starting and ending positions (which are addresses), and the list of 1783169695Skan inner groups (which is a bits list) are maintained in separate 1784169695Skan variables. 1785169695Skan 1786169695Skan We are making a (strictly speaking) nonportable assumption here: that 1787169695Skan the compiler will pack our bit fields into something that fits into 1788169695Skan the type of `word', i.e., is something that fits into one item on the 1789169695Skan failure stack. */ 1790169695Skan 1791169695Skan 1792169695Skan/* Declarations and macros for re_match_2. */ 1793169695Skan 1794169695Skantypedef union 1795169695Skan{ 1796169695Skan PREFIX(fail_stack_elt_t) word; 1797169695Skan struct 1798169695Skan { 1799169695Skan /* This field is one if this group can match the empty string, 1800169695Skan zero if not. If not yet determined, `MATCH_NULL_UNSET_VALUE'. */ 1801169695Skan# define MATCH_NULL_UNSET_VALUE 3 1802169695Skan unsigned match_null_string_p : 2; 1803169695Skan unsigned is_active : 1; 1804169695Skan unsigned matched_something : 1; 1805169695Skan unsigned ever_matched_something : 1; 1806169695Skan } bits; 1807169695Skan} PREFIX(register_info_type); 1808169695Skan 1809169695Skan# ifndef DEFINED_ONCE 1810169695Skan# define REG_MATCH_NULL_STRING_P(R) ((R).bits.match_null_string_p) 1811169695Skan# define IS_ACTIVE(R) ((R).bits.is_active) 1812169695Skan# define MATCHED_SOMETHING(R) ((R).bits.matched_something) 1813169695Skan# define EVER_MATCHED_SOMETHING(R) ((R).bits.ever_matched_something) 1814169695Skan 1815169695Skan 1816169695Skan/* Call this when have matched a real character; it sets `matched' flags 1817169695Skan for the subexpressions which we are currently inside. Also records 1818169695Skan that those subexprs have matched. */ 1819169695Skan# define SET_REGS_MATCHED() \ 1820169695Skan do \ 1821169695Skan { \ 1822169695Skan if (!set_regs_matched_done) \ 1823169695Skan { \ 1824169695Skan active_reg_t r; \ 1825169695Skan set_regs_matched_done = 1; \ 1826169695Skan for (r = lowest_active_reg; r <= highest_active_reg; r++) \ 1827169695Skan { \ 1828169695Skan MATCHED_SOMETHING (reg_info[r]) \ 1829169695Skan = EVER_MATCHED_SOMETHING (reg_info[r]) \ 1830169695Skan = 1; \ 1831169695Skan } \ 1832169695Skan } \ 1833169695Skan } \ 1834169695Skan while (0) 1835169695Skan# endif /* not DEFINED_ONCE */ 1836169695Skan 1837169695Skan/* Registers are set to a sentinel when they haven't yet matched. */ 1838169695Skanstatic CHAR_T PREFIX(reg_unset_dummy); 1839169695Skan# define REG_UNSET_VALUE (&PREFIX(reg_unset_dummy)) 1840169695Skan# define REG_UNSET(e) ((e) == REG_UNSET_VALUE) 1841169695Skan 1842169695Skan/* Subroutine declarations and macros for regex_compile. */ 1843169695Skanstatic void PREFIX(store_op1) (re_opcode_t op, UCHAR_T *loc, int arg); 1844169695Skanstatic void PREFIX(store_op2) (re_opcode_t op, UCHAR_T *loc, 1845169695Skan int arg1, int arg2); 1846169695Skanstatic void PREFIX(insert_op1) (re_opcode_t op, UCHAR_T *loc, 1847169695Skan int arg, UCHAR_T *end); 1848169695Skanstatic void PREFIX(insert_op2) (re_opcode_t op, UCHAR_T *loc, 1849169695Skan int arg1, int arg2, UCHAR_T *end); 1850169695Skanstatic boolean PREFIX(at_begline_loc_p) (const CHAR_T *pattern, 1851169695Skan const CHAR_T *p, 1852169695Skan reg_syntax_t syntax); 1853169695Skanstatic boolean PREFIX(at_endline_loc_p) (const CHAR_T *p, 1854169695Skan const CHAR_T *pend, 1855169695Skan reg_syntax_t syntax); 1856169695Skan# ifdef WCHAR 1857169695Skanstatic reg_errcode_t wcs_compile_range (CHAR_T range_start, 1858169695Skan const CHAR_T **p_ptr, 1859169695Skan const CHAR_T *pend, 1860169695Skan char *translate, 1861169695Skan reg_syntax_t syntax, 1862169695Skan UCHAR_T *b, 1863169695Skan CHAR_T *char_set); 1864169695Skanstatic void insert_space (int num, CHAR_T *loc, CHAR_T *end); 1865169695Skan# else /* BYTE */ 1866169695Skanstatic reg_errcode_t byte_compile_range (unsigned int range_start, 1867169695Skan const char **p_ptr, 1868169695Skan const char *pend, 1869169695Skan char *translate, 1870169695Skan reg_syntax_t syntax, 1871169695Skan unsigned char *b); 1872169695Skan# endif /* WCHAR */ 1873169695Skan 1874169695Skan/* Fetch the next character in the uncompiled pattern---translating it 1875169695Skan if necessary. Also cast from a signed character in the constant 1876169695Skan string passed to us by the user to an unsigned char that we can use 1877169695Skan as an array index (in, e.g., `translate'). */ 1878169695Skan/* ifdef MBS_SUPPORT, we translate only if character <= 0xff, 1879169695Skan because it is impossible to allocate 4GB array for some encodings 1880169695Skan which have 4 byte character_set like UCS4. */ 1881169695Skan# ifndef PATFETCH 1882169695Skan# ifdef WCHAR 1883169695Skan# define PATFETCH(c) \ 1884169695Skan do {if (p == pend) return REG_EEND; \ 1885169695Skan c = (UCHAR_T) *p++; \ 1886169695Skan if (translate && (c <= 0xff)) c = (UCHAR_T) translate[c]; \ 1887169695Skan } while (0) 1888169695Skan# else /* BYTE */ 1889169695Skan# define PATFETCH(c) \ 1890169695Skan do {if (p == pend) return REG_EEND; \ 1891169695Skan c = (unsigned char) *p++; \ 1892169695Skan if (translate) c = (unsigned char) translate[c]; \ 1893169695Skan } while (0) 1894169695Skan# endif /* WCHAR */ 1895169695Skan# endif 1896169695Skan 1897169695Skan/* Fetch the next character in the uncompiled pattern, with no 1898169695Skan translation. */ 1899169695Skan# define PATFETCH_RAW(c) \ 1900169695Skan do {if (p == pend) return REG_EEND; \ 1901169695Skan c = (UCHAR_T) *p++; \ 1902169695Skan } while (0) 1903169695Skan 1904169695Skan/* Go backwards one character in the pattern. */ 1905169695Skan# define PATUNFETCH p-- 1906169695Skan 1907169695Skan 1908169695Skan/* If `translate' is non-null, return translate[D], else just D. We 1909169695Skan cast the subscript to translate because some data is declared as 1910169695Skan `char *', to avoid warnings when a string constant is passed. But 1911169695Skan when we use a character as a subscript we must make it unsigned. */ 1912169695Skan/* ifdef MBS_SUPPORT, we translate only if character <= 0xff, 1913169695Skan because it is impossible to allocate 4GB array for some encodings 1914169695Skan which have 4 byte character_set like UCS4. */ 1915169695Skan 1916169695Skan# ifndef TRANSLATE 1917169695Skan# ifdef WCHAR 1918169695Skan# define TRANSLATE(d) \ 1919169695Skan ((translate && ((UCHAR_T) (d)) <= 0xff) \ 1920169695Skan ? (char) translate[(unsigned char) (d)] : (d)) 1921169695Skan# else /* BYTE */ 1922169695Skan# define TRANSLATE(d) \ 1923169695Skan (translate ? (char) translate[(unsigned char) (d)] : (char) (d)) 1924169695Skan# endif /* WCHAR */ 1925169695Skan# endif 1926169695Skan 1927169695Skan 1928169695Skan/* Macros for outputting the compiled pattern into `buffer'. */ 1929169695Skan 1930169695Skan/* If the buffer isn't allocated when it comes in, use this. */ 1931169695Skan# define INIT_BUF_SIZE (32 * sizeof(UCHAR_T)) 1932169695Skan 1933169695Skan/* Make sure we have at least N more bytes of space in buffer. */ 1934169695Skan# ifdef WCHAR 1935169695Skan# define GET_BUFFER_SPACE(n) \ 1936169695Skan while (((unsigned long)b - (unsigned long)COMPILED_BUFFER_VAR \ 1937169695Skan + (n)*sizeof(CHAR_T)) > bufp->allocated) \ 1938169695Skan EXTEND_BUFFER () 1939169695Skan# else /* BYTE */ 1940169695Skan# define GET_BUFFER_SPACE(n) \ 1941169695Skan while ((unsigned long) (b - bufp->buffer + (n)) > bufp->allocated) \ 1942169695Skan EXTEND_BUFFER () 1943169695Skan# endif /* WCHAR */ 1944169695Skan 1945169695Skan/* Make sure we have one more byte of buffer space and then add C to it. */ 1946169695Skan# define BUF_PUSH(c) \ 1947169695Skan do { \ 1948169695Skan GET_BUFFER_SPACE (1); \ 1949169695Skan *b++ = (UCHAR_T) (c); \ 1950169695Skan } while (0) 1951169695Skan 1952169695Skan 1953169695Skan/* Ensure we have two more bytes of buffer space and then append C1 and C2. */ 1954169695Skan# define BUF_PUSH_2(c1, c2) \ 1955169695Skan do { \ 1956169695Skan GET_BUFFER_SPACE (2); \ 1957169695Skan *b++ = (UCHAR_T) (c1); \ 1958169695Skan *b++ = (UCHAR_T) (c2); \ 1959169695Skan } while (0) 1960169695Skan 1961169695Skan 1962169695Skan/* As with BUF_PUSH_2, except for three bytes. */ 1963169695Skan# define BUF_PUSH_3(c1, c2, c3) \ 1964169695Skan do { \ 1965169695Skan GET_BUFFER_SPACE (3); \ 1966169695Skan *b++ = (UCHAR_T) (c1); \ 1967169695Skan *b++ = (UCHAR_T) (c2); \ 1968169695Skan *b++ = (UCHAR_T) (c3); \ 1969169695Skan } while (0) 1970169695Skan 1971169695Skan/* Store a jump with opcode OP at LOC to location TO. We store a 1972169695Skan relative address offset by the three bytes the jump itself occupies. */ 1973169695Skan# define STORE_JUMP(op, loc, to) \ 1974169695Skan PREFIX(store_op1) (op, loc, (int) ((to) - (loc) - (1 + OFFSET_ADDRESS_SIZE))) 1975169695Skan 1976169695Skan/* Likewise, for a two-argument jump. */ 1977169695Skan# define STORE_JUMP2(op, loc, to, arg) \ 1978169695Skan PREFIX(store_op2) (op, loc, (int) ((to) - (loc) - (1 + OFFSET_ADDRESS_SIZE)), arg) 1979169695Skan 1980169695Skan/* Like `STORE_JUMP', but for inserting. Assume `b' is the buffer end. */ 1981169695Skan# define INSERT_JUMP(op, loc, to) \ 1982169695Skan PREFIX(insert_op1) (op, loc, (int) ((to) - (loc) - (1 + OFFSET_ADDRESS_SIZE)), b) 1983169695Skan 1984169695Skan/* Like `STORE_JUMP2', but for inserting. Assume `b' is the buffer end. */ 1985169695Skan# define INSERT_JUMP2(op, loc, to, arg) \ 1986169695Skan PREFIX(insert_op2) (op, loc, (int) ((to) - (loc) - (1 + OFFSET_ADDRESS_SIZE)),\ 1987169695Skan arg, b) 1988169695Skan 1989169695Skan/* This is not an arbitrary limit: the arguments which represent offsets 1990169695Skan into the pattern are two bytes long. So if 2^16 bytes turns out to 1991169695Skan be too small, many things would have to change. */ 1992169695Skan/* Any other compiler which, like MSC, has allocation limit below 2^16 1993169695Skan bytes will have to use approach similar to what was done below for 1994169695Skan MSC and drop MAX_BUF_SIZE a bit. Otherwise you may end up 1995169695Skan reallocating to 0 bytes. Such thing is not going to work too well. 1996169695Skan You have been warned!! */ 1997169695Skan# ifndef DEFINED_ONCE 1998169695Skan# if defined _MSC_VER && !defined WIN32 1999169695Skan/* Microsoft C 16-bit versions limit malloc to approx 65512 bytes. 2000169695Skan The REALLOC define eliminates a flurry of conversion warnings, 2001169695Skan but is not required. */ 2002169695Skan# define MAX_BUF_SIZE 65500L 2003169695Skan# define REALLOC(p,s) realloc ((p), (size_t) (s)) 2004169695Skan# else 2005169695Skan# define MAX_BUF_SIZE (1L << 16) 2006169695Skan# define REALLOC(p,s) realloc ((p), (s)) 2007169695Skan# endif 2008169695Skan 2009169695Skan/* Extend the buffer by twice its current size via realloc and 2010169695Skan reset the pointers that pointed into the old block to point to the 2011169695Skan correct places in the new one. If extending the buffer results in it 2012169695Skan being larger than MAX_BUF_SIZE, then flag memory exhausted. */ 2013169695Skan# if __BOUNDED_POINTERS__ 2014169695Skan# define SET_HIGH_BOUND(P) (__ptrhigh (P) = __ptrlow (P) + bufp->allocated) 2015169695Skan# define MOVE_BUFFER_POINTER(P) \ 2016169695Skan (__ptrlow (P) += incr, SET_HIGH_BOUND (P), __ptrvalue (P) += incr) 2017169695Skan# define ELSE_EXTEND_BUFFER_HIGH_BOUND \ 2018169695Skan else \ 2019169695Skan { \ 2020169695Skan SET_HIGH_BOUND (b); \ 2021169695Skan SET_HIGH_BOUND (begalt); \ 2022169695Skan if (fixup_alt_jump) \ 2023169695Skan SET_HIGH_BOUND (fixup_alt_jump); \ 2024169695Skan if (laststart) \ 2025169695Skan SET_HIGH_BOUND (laststart); \ 2026169695Skan if (pending_exact) \ 2027169695Skan SET_HIGH_BOUND (pending_exact); \ 2028169695Skan } 2029169695Skan# else 2030169695Skan# define MOVE_BUFFER_POINTER(P) (P) += incr 2031169695Skan# define ELSE_EXTEND_BUFFER_HIGH_BOUND 2032169695Skan# endif 2033169695Skan# endif /* not DEFINED_ONCE */ 2034169695Skan 2035169695Skan# ifdef WCHAR 2036169695Skan# define EXTEND_BUFFER() \ 2037169695Skan do { \ 2038169695Skan UCHAR_T *old_buffer = COMPILED_BUFFER_VAR; \ 2039169695Skan int wchar_count; \ 2040169695Skan if (bufp->allocated + sizeof(UCHAR_T) > MAX_BUF_SIZE) \ 2041169695Skan return REG_ESIZE; \ 2042169695Skan bufp->allocated <<= 1; \ 2043169695Skan if (bufp->allocated > MAX_BUF_SIZE) \ 2044169695Skan bufp->allocated = MAX_BUF_SIZE; \ 2045169695Skan /* How many characters the new buffer can have? */ \ 2046169695Skan wchar_count = bufp->allocated / sizeof(UCHAR_T); \ 2047169695Skan if (wchar_count == 0) wchar_count = 1; \ 2048169695Skan /* Truncate the buffer to CHAR_T align. */ \ 2049169695Skan bufp->allocated = wchar_count * sizeof(UCHAR_T); \ 2050169695Skan RETALLOC (COMPILED_BUFFER_VAR, wchar_count, UCHAR_T); \ 2051169695Skan bufp->buffer = (char*)COMPILED_BUFFER_VAR; \ 2052169695Skan if (COMPILED_BUFFER_VAR == NULL) \ 2053169695Skan return REG_ESPACE; \ 2054169695Skan /* If the buffer moved, move all the pointers into it. */ \ 2055169695Skan if (old_buffer != COMPILED_BUFFER_VAR) \ 2056169695Skan { \ 2057169695Skan int incr = COMPILED_BUFFER_VAR - old_buffer; \ 2058169695Skan MOVE_BUFFER_POINTER (b); \ 2059169695Skan MOVE_BUFFER_POINTER (begalt); \ 2060169695Skan if (fixup_alt_jump) \ 2061169695Skan MOVE_BUFFER_POINTER (fixup_alt_jump); \ 2062169695Skan if (laststart) \ 2063169695Skan MOVE_BUFFER_POINTER (laststart); \ 2064169695Skan if (pending_exact) \ 2065169695Skan MOVE_BUFFER_POINTER (pending_exact); \ 2066169695Skan } \ 2067169695Skan ELSE_EXTEND_BUFFER_HIGH_BOUND \ 2068169695Skan } while (0) 2069169695Skan# else /* BYTE */ 2070169695Skan# define EXTEND_BUFFER() \ 2071169695Skan do { \ 2072169695Skan UCHAR_T *old_buffer = COMPILED_BUFFER_VAR; \ 2073169695Skan if (bufp->allocated == MAX_BUF_SIZE) \ 2074169695Skan return REG_ESIZE; \ 2075169695Skan bufp->allocated <<= 1; \ 2076169695Skan if (bufp->allocated > MAX_BUF_SIZE) \ 2077169695Skan bufp->allocated = MAX_BUF_SIZE; \ 2078169695Skan bufp->buffer = (UCHAR_T *) REALLOC (COMPILED_BUFFER_VAR, \ 2079169695Skan bufp->allocated); \ 2080169695Skan if (COMPILED_BUFFER_VAR == NULL) \ 2081169695Skan return REG_ESPACE; \ 2082169695Skan /* If the buffer moved, move all the pointers into it. */ \ 2083169695Skan if (old_buffer != COMPILED_BUFFER_VAR) \ 2084169695Skan { \ 2085169695Skan int incr = COMPILED_BUFFER_VAR - old_buffer; \ 2086169695Skan MOVE_BUFFER_POINTER (b); \ 2087169695Skan MOVE_BUFFER_POINTER (begalt); \ 2088169695Skan if (fixup_alt_jump) \ 2089169695Skan MOVE_BUFFER_POINTER (fixup_alt_jump); \ 2090169695Skan if (laststart) \ 2091169695Skan MOVE_BUFFER_POINTER (laststart); \ 2092169695Skan if (pending_exact) \ 2093169695Skan MOVE_BUFFER_POINTER (pending_exact); \ 2094169695Skan } \ 2095169695Skan ELSE_EXTEND_BUFFER_HIGH_BOUND \ 2096169695Skan } while (0) 2097169695Skan# endif /* WCHAR */ 2098169695Skan 2099169695Skan# ifndef DEFINED_ONCE 2100169695Skan/* Since we have one byte reserved for the register number argument to 2101169695Skan {start,stop}_memory, the maximum number of groups we can report 2102169695Skan things about is what fits in that byte. */ 2103169695Skan# define MAX_REGNUM 255 2104169695Skan 2105169695Skan/* But patterns can have more than `MAX_REGNUM' registers. We just 2106169695Skan ignore the excess. */ 2107169695Skantypedef unsigned regnum_t; 2108169695Skan 2109169695Skan 2110169695Skan/* Macros for the compile stack. */ 2111169695Skan 2112169695Skan/* Since offsets can go either forwards or backwards, this type needs to 2113169695Skan be able to hold values from -(MAX_BUF_SIZE - 1) to MAX_BUF_SIZE - 1. */ 2114169695Skan/* int may be not enough when sizeof(int) == 2. */ 2115169695Skantypedef long pattern_offset_t; 2116169695Skan 2117169695Skantypedef struct 2118169695Skan{ 2119169695Skan pattern_offset_t begalt_offset; 2120169695Skan pattern_offset_t fixup_alt_jump; 2121169695Skan pattern_offset_t inner_group_offset; 2122169695Skan pattern_offset_t laststart_offset; 2123169695Skan regnum_t regnum; 2124169695Skan} compile_stack_elt_t; 2125169695Skan 2126169695Skan 2127169695Skantypedef struct 2128169695Skan{ 2129169695Skan compile_stack_elt_t *stack; 2130169695Skan unsigned size; 2131169695Skan unsigned avail; /* Offset of next open position. */ 2132169695Skan} compile_stack_type; 2133169695Skan 2134169695Skan 2135169695Skan# define INIT_COMPILE_STACK_SIZE 32 2136169695Skan 2137169695Skan# define COMPILE_STACK_EMPTY (compile_stack.avail == 0) 2138169695Skan# define COMPILE_STACK_FULL (compile_stack.avail == compile_stack.size) 2139169695Skan 2140169695Skan/* The next available element. */ 2141169695Skan# define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail]) 2142169695Skan 2143169695Skan# endif /* not DEFINED_ONCE */ 2144169695Skan 2145169695Skan/* Set the bit for character C in a list. */ 2146169695Skan# ifndef DEFINED_ONCE 2147169695Skan# define SET_LIST_BIT(c) \ 2148169695Skan (b[((unsigned char) (c)) / BYTEWIDTH] \ 2149169695Skan |= 1 << (((unsigned char) c) % BYTEWIDTH)) 2150169695Skan# endif /* DEFINED_ONCE */ 2151169695Skan 2152169695Skan/* Get the next unsigned number in the uncompiled pattern. */ 2153169695Skan# define GET_UNSIGNED_NUMBER(num) \ 2154169695Skan { \ 2155169695Skan while (p != pend) \ 2156169695Skan { \ 2157169695Skan PATFETCH (c); \ 2158169695Skan if (c < '0' || c > '9') \ 2159169695Skan break; \ 2160169695Skan if (num <= RE_DUP_MAX) \ 2161169695Skan { \ 2162169695Skan if (num < 0) \ 2163169695Skan num = 0; \ 2164169695Skan num = num * 10 + c - '0'; \ 2165169695Skan } \ 2166169695Skan } \ 2167169695Skan } 2168169695Skan 2169169695Skan# ifndef DEFINED_ONCE 2170169695Skan# if defined _LIBC || WIDE_CHAR_SUPPORT 2171169695Skan/* The GNU C library provides support for user-defined character classes 2172169695Skan and the functions from ISO C amendement 1. */ 2173169695Skan# ifdef CHARCLASS_NAME_MAX 2174169695Skan# define CHAR_CLASS_MAX_LENGTH CHARCLASS_NAME_MAX 2175169695Skan# else 2176169695Skan/* This shouldn't happen but some implementation might still have this 2177169695Skan problem. Use a reasonable default value. */ 2178169695Skan# define CHAR_CLASS_MAX_LENGTH 256 2179169695Skan# endif 2180169695Skan 2181169695Skan# ifdef _LIBC 2182169695Skan# define IS_CHAR_CLASS(string) __wctype (string) 2183169695Skan# else 2184169695Skan# define IS_CHAR_CLASS(string) wctype (string) 2185169695Skan# endif 2186169695Skan# else 2187169695Skan# define CHAR_CLASS_MAX_LENGTH 6 /* Namely, `xdigit'. */ 2188169695Skan 2189169695Skan# define IS_CHAR_CLASS(string) \ 2190169695Skan (STREQ (string, "alpha") || STREQ (string, "upper") \ 2191169695Skan || STREQ (string, "lower") || STREQ (string, "digit") \ 2192169695Skan || STREQ (string, "alnum") || STREQ (string, "xdigit") \ 2193169695Skan || STREQ (string, "space") || STREQ (string, "print") \ 2194169695Skan || STREQ (string, "punct") || STREQ (string, "graph") \ 2195169695Skan || STREQ (string, "cntrl") || STREQ (string, "blank")) 2196169695Skan# endif 2197169695Skan# endif /* DEFINED_ONCE */ 2198169695Skan 2199169695Skan# ifndef MATCH_MAY_ALLOCATE 2200169695Skan 2201169695Skan/* If we cannot allocate large objects within re_match_2_internal, 2202169695Skan we make the fail stack and register vectors global. 2203169695Skan The fail stack, we grow to the maximum size when a regexp 2204169695Skan is compiled. 2205169695Skan The register vectors, we adjust in size each time we 2206169695Skan compile a regexp, according to the number of registers it needs. */ 2207169695Skan 2208169695Skanstatic PREFIX(fail_stack_type) fail_stack; 2209169695Skan 2210169695Skan/* Size with which the following vectors are currently allocated. 2211169695Skan That is so we can make them bigger as needed, 2212169695Skan but never make them smaller. */ 2213169695Skan# ifdef DEFINED_ONCE 2214169695Skanstatic int regs_allocated_size; 2215169695Skan 2216169695Skanstatic const char ** regstart, ** regend; 2217169695Skanstatic const char ** old_regstart, ** old_regend; 2218169695Skanstatic const char **best_regstart, **best_regend; 2219169695Skanstatic const char **reg_dummy; 2220169695Skan# endif /* DEFINED_ONCE */ 2221169695Skan 2222169695Skanstatic PREFIX(register_info_type) *PREFIX(reg_info); 2223169695Skanstatic PREFIX(register_info_type) *PREFIX(reg_info_dummy); 2224169695Skan 2225169695Skan/* Make the register vectors big enough for NUM_REGS registers, 2226169695Skan but don't make them smaller. */ 2227169695Skan 2228169695Skanstatic void 2229169695SkanPREFIX(regex_grow_registers) (int num_regs) 2230169695Skan{ 2231169695Skan if (num_regs > regs_allocated_size) 2232169695Skan { 2233169695Skan RETALLOC_IF (regstart, num_regs, const char *); 2234169695Skan RETALLOC_IF (regend, num_regs, const char *); 2235169695Skan RETALLOC_IF (old_regstart, num_regs, const char *); 2236169695Skan RETALLOC_IF (old_regend, num_regs, const char *); 2237169695Skan RETALLOC_IF (best_regstart, num_regs, const char *); 2238169695Skan RETALLOC_IF (best_regend, num_regs, const char *); 2239169695Skan RETALLOC_IF (PREFIX(reg_info), num_regs, PREFIX(register_info_type)); 2240169695Skan RETALLOC_IF (reg_dummy, num_regs, const char *); 2241169695Skan RETALLOC_IF (PREFIX(reg_info_dummy), num_regs, PREFIX(register_info_type)); 2242169695Skan 2243169695Skan regs_allocated_size = num_regs; 2244169695Skan } 2245169695Skan} 2246169695Skan 2247169695Skan# endif /* not MATCH_MAY_ALLOCATE */ 2248169695Skan 2249169695Skan# ifndef DEFINED_ONCE 2250169695Skanstatic boolean group_in_compile_stack (compile_stack_type compile_stack, 2251169695Skan regnum_t regnum); 2252169695Skan# endif /* not DEFINED_ONCE */ 2253169695Skan 2254169695Skan/* `regex_compile' compiles PATTERN (of length SIZE) according to SYNTAX. 2255169695Skan Returns one of error codes defined in `regex.h', or zero for success. 2256169695Skan 2257169695Skan Assumes the `allocated' (and perhaps `buffer') and `translate' 2258169695Skan fields are set in BUFP on entry. 2259169695Skan 2260169695Skan If it succeeds, results are put in BUFP (if it returns an error, the 2261169695Skan contents of BUFP are undefined): 2262169695Skan `buffer' is the compiled pattern; 2263169695Skan `syntax' is set to SYNTAX; 2264169695Skan `used' is set to the length of the compiled pattern; 2265169695Skan `fastmap_accurate' is zero; 2266169695Skan `re_nsub' is the number of subexpressions in PATTERN; 2267169695Skan `not_bol' and `not_eol' are zero; 2268169695Skan 2269169695Skan The `fastmap' and `newline_anchor' fields are neither 2270169695Skan examined nor set. */ 2271169695Skan 2272169695Skan/* Return, freeing storage we allocated. */ 2273169695Skan# ifdef WCHAR 2274169695Skan# define FREE_STACK_RETURN(value) \ 2275169695Skan return (free(pattern), free(mbs_offset), free(is_binary), free (compile_stack.stack), value) 2276169695Skan# else 2277169695Skan# define FREE_STACK_RETURN(value) \ 2278169695Skan return (free (compile_stack.stack), value) 2279169695Skan# endif /* WCHAR */ 2280169695Skan 2281169695Skanstatic reg_errcode_t 2282169695SkanPREFIX(regex_compile) (const char *ARG_PREFIX(pattern), 2283169695Skan size_t ARG_PREFIX(size), reg_syntax_t syntax, 2284169695Skan struct re_pattern_buffer *bufp) 2285169695Skan{ 2286169695Skan /* We fetch characters from PATTERN here. Even though PATTERN is 2287169695Skan `char *' (i.e., signed), we declare these variables as unsigned, so 2288169695Skan they can be reliably used as array indices. */ 2289169695Skan register UCHAR_T c, c1; 2290169695Skan 2291169695Skan#ifdef WCHAR 2292169695Skan /* A temporary space to keep wchar_t pattern and compiled pattern. */ 2293169695Skan CHAR_T *pattern, *COMPILED_BUFFER_VAR; 2294169695Skan size_t size; 2295169695Skan /* offset buffer for optimization. See convert_mbs_to_wc. */ 2296169695Skan int *mbs_offset = NULL; 2297169695Skan /* It hold whether each wchar_t is binary data or not. */ 2298169695Skan char *is_binary = NULL; 2299169695Skan /* A flag whether exactn is handling binary data or not. */ 2300169695Skan char is_exactn_bin = FALSE; 2301169695Skan#endif /* WCHAR */ 2302169695Skan 2303169695Skan /* A random temporary spot in PATTERN. */ 2304169695Skan const CHAR_T *p1; 2305169695Skan 2306169695Skan /* Points to the end of the buffer, where we should append. */ 2307169695Skan register UCHAR_T *b; 2308169695Skan 2309169695Skan /* Keeps track of unclosed groups. */ 2310169695Skan compile_stack_type compile_stack; 2311169695Skan 2312169695Skan /* Points to the current (ending) position in the pattern. */ 2313169695Skan#ifdef WCHAR 2314169695Skan const CHAR_T *p; 2315169695Skan const CHAR_T *pend; 2316169695Skan#else /* BYTE */ 2317169695Skan const CHAR_T *p = pattern; 2318169695Skan const CHAR_T *pend = pattern + size; 2319169695Skan#endif /* WCHAR */ 2320169695Skan 2321169695Skan /* How to translate the characters in the pattern. */ 2322169695Skan RE_TRANSLATE_TYPE translate = bufp->translate; 2323169695Skan 2324169695Skan /* Address of the count-byte of the most recently inserted `exactn' 2325169695Skan command. This makes it possible to tell if a new exact-match 2326169695Skan character can be added to that command or if the character requires 2327169695Skan a new `exactn' command. */ 2328169695Skan UCHAR_T *pending_exact = 0; 2329169695Skan 2330169695Skan /* Address of start of the most recently finished expression. 2331169695Skan This tells, e.g., postfix * where to find the start of its 2332169695Skan operand. Reset at the beginning of groups and alternatives. */ 2333169695Skan UCHAR_T *laststart = 0; 2334169695Skan 2335169695Skan /* Address of beginning of regexp, or inside of last group. */ 2336169695Skan UCHAR_T *begalt; 2337169695Skan 2338169695Skan /* Address of the place where a forward jump should go to the end of 2339169695Skan the containing expression. Each alternative of an `or' -- except the 2340169695Skan last -- ends with a forward jump of this sort. */ 2341169695Skan UCHAR_T *fixup_alt_jump = 0; 2342169695Skan 2343169695Skan /* Counts open-groups as they are encountered. Remembered for the 2344169695Skan matching close-group on the compile stack, so the same register 2345169695Skan number is put in the stop_memory as the start_memory. */ 2346169695Skan regnum_t regnum = 0; 2347169695Skan 2348169695Skan#ifdef WCHAR 2349169695Skan /* Initialize the wchar_t PATTERN and offset_buffer. */ 2350169695Skan p = pend = pattern = TALLOC(csize + 1, CHAR_T); 2351169695Skan mbs_offset = TALLOC(csize + 1, int); 2352169695Skan is_binary = TALLOC(csize + 1, char); 2353169695Skan if (pattern == NULL || mbs_offset == NULL || is_binary == NULL) 2354169695Skan { 2355169695Skan free(pattern); 2356169695Skan free(mbs_offset); 2357169695Skan free(is_binary); 2358169695Skan return REG_ESPACE; 2359169695Skan } 2360169695Skan pattern[csize] = L'\0'; /* sentinel */ 2361169695Skan size = convert_mbs_to_wcs(pattern, cpattern, csize, mbs_offset, is_binary); 2362169695Skan pend = p + size; 2363169695Skan if (size < 0) 2364169695Skan { 2365169695Skan free(pattern); 2366169695Skan free(mbs_offset); 2367169695Skan free(is_binary); 2368169695Skan return REG_BADPAT; 2369169695Skan } 2370169695Skan#endif 2371169695Skan 2372169695Skan#ifdef DEBUG 2373169695Skan DEBUG_PRINT1 ("\nCompiling pattern: "); 2374169695Skan if (debug) 2375169695Skan { 2376169695Skan unsigned debug_count; 2377169695Skan 2378169695Skan for (debug_count = 0; debug_count < size; debug_count++) 2379169695Skan PUT_CHAR (pattern[debug_count]); 2380169695Skan putchar ('\n'); 2381169695Skan } 2382169695Skan#endif /* DEBUG */ 2383169695Skan 2384169695Skan /* Initialize the compile stack. */ 2385169695Skan compile_stack.stack = TALLOC (INIT_COMPILE_STACK_SIZE, compile_stack_elt_t); 2386169695Skan if (compile_stack.stack == NULL) 2387169695Skan { 2388169695Skan#ifdef WCHAR 2389169695Skan free(pattern); 2390169695Skan free(mbs_offset); 2391169695Skan free(is_binary); 2392169695Skan#endif 2393169695Skan return REG_ESPACE; 2394169695Skan } 2395169695Skan 2396169695Skan compile_stack.size = INIT_COMPILE_STACK_SIZE; 2397169695Skan compile_stack.avail = 0; 2398169695Skan 2399169695Skan /* Initialize the pattern buffer. */ 2400169695Skan bufp->syntax = syntax; 2401169695Skan bufp->fastmap_accurate = 0; 2402169695Skan bufp->not_bol = bufp->not_eol = 0; 2403169695Skan 2404169695Skan /* Set `used' to zero, so that if we return an error, the pattern 2405169695Skan printer (for debugging) will think there's no pattern. We reset it 2406169695Skan at the end. */ 2407169695Skan bufp->used = 0; 2408169695Skan 2409169695Skan /* Always count groups, whether or not bufp->no_sub is set. */ 2410169695Skan bufp->re_nsub = 0; 2411169695Skan 2412169695Skan#if !defined emacs && !defined SYNTAX_TABLE 2413169695Skan /* Initialize the syntax table. */ 2414169695Skan init_syntax_once (); 2415169695Skan#endif 2416169695Skan 2417169695Skan if (bufp->allocated == 0) 2418169695Skan { 2419169695Skan if (bufp->buffer) 2420169695Skan { /* If zero allocated, but buffer is non-null, try to realloc 2421169695Skan enough space. This loses if buffer's address is bogus, but 2422169695Skan that is the user's responsibility. */ 2423169695Skan#ifdef WCHAR 2424169695Skan /* Free bufp->buffer and allocate an array for wchar_t pattern 2425169695Skan buffer. */ 2426169695Skan free(bufp->buffer); 2427169695Skan COMPILED_BUFFER_VAR = TALLOC (INIT_BUF_SIZE/sizeof(UCHAR_T), 2428169695Skan UCHAR_T); 2429169695Skan#else 2430169695Skan RETALLOC (COMPILED_BUFFER_VAR, INIT_BUF_SIZE, UCHAR_T); 2431169695Skan#endif /* WCHAR */ 2432169695Skan } 2433169695Skan else 2434169695Skan { /* Caller did not allocate a buffer. Do it for them. */ 2435169695Skan COMPILED_BUFFER_VAR = TALLOC (INIT_BUF_SIZE / sizeof(UCHAR_T), 2436169695Skan UCHAR_T); 2437169695Skan } 2438169695Skan 2439169695Skan if (!COMPILED_BUFFER_VAR) FREE_STACK_RETURN (REG_ESPACE); 2440169695Skan#ifdef WCHAR 2441169695Skan bufp->buffer = (char*)COMPILED_BUFFER_VAR; 2442169695Skan#endif /* WCHAR */ 2443169695Skan bufp->allocated = INIT_BUF_SIZE; 2444169695Skan } 2445169695Skan#ifdef WCHAR 2446169695Skan else 2447169695Skan COMPILED_BUFFER_VAR = (UCHAR_T*) bufp->buffer; 2448169695Skan#endif 2449169695Skan 2450169695Skan begalt = b = COMPILED_BUFFER_VAR; 2451169695Skan 2452169695Skan /* Loop through the uncompiled pattern until we're at the end. */ 2453169695Skan while (p != pend) 2454169695Skan { 2455169695Skan PATFETCH (c); 2456169695Skan 2457169695Skan switch (c) 2458169695Skan { 2459169695Skan case '^': 2460169695Skan { 2461169695Skan if ( /* If at start of pattern, it's an operator. */ 2462169695Skan p == pattern + 1 2463169695Skan /* If context independent, it's an operator. */ 2464169695Skan || syntax & RE_CONTEXT_INDEP_ANCHORS 2465169695Skan /* Otherwise, depends on what's come before. */ 2466169695Skan || PREFIX(at_begline_loc_p) (pattern, p, syntax)) 2467169695Skan BUF_PUSH (begline); 2468169695Skan else 2469169695Skan goto normal_char; 2470169695Skan } 2471169695Skan break; 2472169695Skan 2473169695Skan 2474169695Skan case '$': 2475169695Skan { 2476169695Skan if ( /* If at end of pattern, it's an operator. */ 2477169695Skan p == pend 2478169695Skan /* If context independent, it's an operator. */ 2479169695Skan || syntax & RE_CONTEXT_INDEP_ANCHORS 2480169695Skan /* Otherwise, depends on what's next. */ 2481169695Skan || PREFIX(at_endline_loc_p) (p, pend, syntax)) 2482169695Skan BUF_PUSH (endline); 2483169695Skan else 2484169695Skan goto normal_char; 2485169695Skan } 2486169695Skan break; 2487169695Skan 2488169695Skan 2489169695Skan case '+': 2490169695Skan case '?': 2491169695Skan if ((syntax & RE_BK_PLUS_QM) 2492169695Skan || (syntax & RE_LIMITED_OPS)) 2493169695Skan goto normal_char; 2494169695Skan handle_plus: 2495169695Skan case '*': 2496169695Skan /* If there is no previous pattern... */ 2497169695Skan if (!laststart) 2498169695Skan { 2499169695Skan if (syntax & RE_CONTEXT_INVALID_OPS) 2500169695Skan FREE_STACK_RETURN (REG_BADRPT); 2501169695Skan else if (!(syntax & RE_CONTEXT_INDEP_OPS)) 2502169695Skan goto normal_char; 2503169695Skan } 2504169695Skan 2505169695Skan { 2506169695Skan /* Are we optimizing this jump? */ 2507169695Skan boolean keep_string_p = false; 2508169695Skan 2509169695Skan /* 1 means zero (many) matches is allowed. */ 2510169695Skan char zero_times_ok = 0, many_times_ok = 0; 2511169695Skan 2512169695Skan /* If there is a sequence of repetition chars, collapse it 2513169695Skan down to just one (the right one). We can't combine 2514169695Skan interval operators with these because of, e.g., `a{2}*', 2515169695Skan which should only match an even number of `a's. */ 2516169695Skan 2517169695Skan for (;;) 2518169695Skan { 2519169695Skan zero_times_ok |= c != '+'; 2520169695Skan many_times_ok |= c != '?'; 2521169695Skan 2522169695Skan if (p == pend) 2523169695Skan break; 2524169695Skan 2525169695Skan PATFETCH (c); 2526169695Skan 2527169695Skan if (c == '*' 2528169695Skan || (!(syntax & RE_BK_PLUS_QM) && (c == '+' || c == '?'))) 2529169695Skan ; 2530169695Skan 2531169695Skan else if (syntax & RE_BK_PLUS_QM && c == '\\') 2532169695Skan { 2533169695Skan if (p == pend) FREE_STACK_RETURN (REG_EESCAPE); 2534169695Skan 2535169695Skan PATFETCH (c1); 2536169695Skan if (!(c1 == '+' || c1 == '?')) 2537169695Skan { 2538169695Skan PATUNFETCH; 2539169695Skan PATUNFETCH; 2540169695Skan break; 2541169695Skan } 2542169695Skan 2543169695Skan c = c1; 2544169695Skan } 2545169695Skan else 2546169695Skan { 2547169695Skan PATUNFETCH; 2548169695Skan break; 2549169695Skan } 2550169695Skan 2551169695Skan /* If we get here, we found another repeat character. */ 2552169695Skan } 2553169695Skan 2554169695Skan /* Star, etc. applied to an empty pattern is equivalent 2555169695Skan to an empty pattern. */ 2556169695Skan if (!laststart) 2557169695Skan break; 2558169695Skan 2559169695Skan /* Now we know whether or not zero matches is allowed 2560169695Skan and also whether or not two or more matches is allowed. */ 2561169695Skan if (many_times_ok) 2562169695Skan { /* More than one repetition is allowed, so put in at the 2563169695Skan end a backward relative jump from `b' to before the next 2564169695Skan jump we're going to put in below (which jumps from 2565169695Skan laststart to after this jump). 2566169695Skan 2567169695Skan But if we are at the `*' in the exact sequence `.*\n', 2568169695Skan insert an unconditional jump backwards to the ., 2569169695Skan instead of the beginning of the loop. This way we only 2570169695Skan push a failure point once, instead of every time 2571169695Skan through the loop. */ 2572169695Skan assert (p - 1 > pattern); 2573169695Skan 2574169695Skan /* Allocate the space for the jump. */ 2575169695Skan GET_BUFFER_SPACE (1 + OFFSET_ADDRESS_SIZE); 2576169695Skan 2577169695Skan /* We know we are not at the first character of the pattern, 2578169695Skan because laststart was nonzero. And we've already 2579169695Skan incremented `p', by the way, to be the character after 2580169695Skan the `*'. Do we have to do something analogous here 2581169695Skan for null bytes, because of RE_DOT_NOT_NULL? */ 2582169695Skan if (TRANSLATE (*(p - 2)) == TRANSLATE ('.') 2583169695Skan && zero_times_ok 2584169695Skan && p < pend && TRANSLATE (*p) == TRANSLATE ('\n') 2585169695Skan && !(syntax & RE_DOT_NEWLINE)) 2586169695Skan { /* We have .*\n. */ 2587169695Skan STORE_JUMP (jump, b, laststart); 2588169695Skan keep_string_p = true; 2589169695Skan } 2590169695Skan else 2591169695Skan /* Anything else. */ 2592169695Skan STORE_JUMP (maybe_pop_jump, b, laststart - 2593169695Skan (1 + OFFSET_ADDRESS_SIZE)); 2594169695Skan 2595169695Skan /* We've added more stuff to the buffer. */ 2596169695Skan b += 1 + OFFSET_ADDRESS_SIZE; 2597169695Skan } 2598169695Skan 2599169695Skan /* On failure, jump from laststart to b + 3, which will be the 2600169695Skan end of the buffer after this jump is inserted. */ 2601169695Skan /* ifdef WCHAR, 'b + 1 + OFFSET_ADDRESS_SIZE' instead of 2602169695Skan 'b + 3'. */ 2603169695Skan GET_BUFFER_SPACE (1 + OFFSET_ADDRESS_SIZE); 2604169695Skan INSERT_JUMP (keep_string_p ? on_failure_keep_string_jump 2605169695Skan : on_failure_jump, 2606169695Skan laststart, b + 1 + OFFSET_ADDRESS_SIZE); 2607169695Skan pending_exact = 0; 2608169695Skan b += 1 + OFFSET_ADDRESS_SIZE; 2609169695Skan 2610169695Skan if (!zero_times_ok) 2611169695Skan { 2612169695Skan /* At least one repetition is required, so insert a 2613169695Skan `dummy_failure_jump' before the initial 2614169695Skan `on_failure_jump' instruction of the loop. This 2615169695Skan effects a skip over that instruction the first time 2616169695Skan we hit that loop. */ 2617169695Skan GET_BUFFER_SPACE (1 + OFFSET_ADDRESS_SIZE); 2618169695Skan INSERT_JUMP (dummy_failure_jump, laststart, laststart + 2619169695Skan 2 + 2 * OFFSET_ADDRESS_SIZE); 2620169695Skan b += 1 + OFFSET_ADDRESS_SIZE; 2621169695Skan } 2622169695Skan } 2623169695Skan break; 2624169695Skan 2625169695Skan 2626169695Skan case '.': 2627169695Skan laststart = b; 2628169695Skan BUF_PUSH (anychar); 2629169695Skan break; 2630169695Skan 2631169695Skan 2632169695Skan case '[': 2633169695Skan { 2634169695Skan boolean had_char_class = false; 2635169695Skan#ifdef WCHAR 2636169695Skan CHAR_T range_start = 0xffffffff; 2637169695Skan#else 2638169695Skan unsigned int range_start = 0xffffffff; 2639169695Skan#endif 2640169695Skan if (p == pend) FREE_STACK_RETURN (REG_EBRACK); 2641169695Skan 2642169695Skan#ifdef WCHAR 2643169695Skan /* We assume a charset(_not) structure as a wchar_t array. 2644169695Skan charset[0] = (re_opcode_t) charset(_not) 2645169695Skan charset[1] = l (= length of char_classes) 2646169695Skan charset[2] = m (= length of collating_symbols) 2647169695Skan charset[3] = n (= length of equivalence_classes) 2648169695Skan charset[4] = o (= length of char_ranges) 2649169695Skan charset[5] = p (= length of chars) 2650169695Skan 2651169695Skan charset[6] = char_class (wctype_t) 2652169695Skan charset[6+CHAR_CLASS_SIZE] = char_class (wctype_t) 2653169695Skan ... 2654169695Skan charset[l+5] = char_class (wctype_t) 2655169695Skan 2656169695Skan charset[l+6] = collating_symbol (wchar_t) 2657169695Skan ... 2658169695Skan charset[l+m+5] = collating_symbol (wchar_t) 2659169695Skan ifdef _LIBC we use the index if 2660169695Skan _NL_COLLATE_SYMB_EXTRAMB instead of 2661169695Skan wchar_t string. 2662169695Skan 2663169695Skan charset[l+m+6] = equivalence_classes (wchar_t) 2664169695Skan ... 2665169695Skan charset[l+m+n+5] = equivalence_classes (wchar_t) 2666169695Skan ifdef _LIBC we use the index in 2667169695Skan _NL_COLLATE_WEIGHT instead of 2668169695Skan wchar_t string. 2669169695Skan 2670169695Skan charset[l+m+n+6] = range_start 2671169695Skan charset[l+m+n+7] = range_end 2672169695Skan ... 2673169695Skan charset[l+m+n+2o+4] = range_start 2674169695Skan charset[l+m+n+2o+5] = range_end 2675169695Skan ifdef _LIBC we use the value looked up 2676169695Skan in _NL_COLLATE_COLLSEQ instead of 2677169695Skan wchar_t character. 2678169695Skan 2679169695Skan charset[l+m+n+2o+6] = char 2680169695Skan ... 2681169695Skan charset[l+m+n+2o+p+5] = char 2682169695Skan 2683169695Skan */ 2684169695Skan 2685169695Skan /* We need at least 6 spaces: the opcode, the length of 2686169695Skan char_classes, the length of collating_symbols, the length of 2687169695Skan equivalence_classes, the length of char_ranges, the length of 2688169695Skan chars. */ 2689169695Skan GET_BUFFER_SPACE (6); 2690169695Skan 2691169695Skan /* Save b as laststart. And We use laststart as the pointer 2692169695Skan to the first element of the charset here. 2693169695Skan In other words, laststart[i] indicates charset[i]. */ 2694169695Skan laststart = b; 2695169695Skan 2696169695Skan /* We test `*p == '^' twice, instead of using an if 2697169695Skan statement, so we only need one BUF_PUSH. */ 2698169695Skan BUF_PUSH (*p == '^' ? charset_not : charset); 2699169695Skan if (*p == '^') 2700169695Skan p++; 2701169695Skan 2702169695Skan /* Push the length of char_classes, the length of 2703169695Skan collating_symbols, the length of equivalence_classes, the 2704169695Skan length of char_ranges and the length of chars. */ 2705169695Skan BUF_PUSH_3 (0, 0, 0); 2706169695Skan BUF_PUSH_2 (0, 0); 2707169695Skan 2708169695Skan /* Remember the first position in the bracket expression. */ 2709169695Skan p1 = p; 2710169695Skan 2711169695Skan /* charset_not matches newline according to a syntax bit. */ 2712169695Skan if ((re_opcode_t) b[-6] == charset_not 2713169695Skan && (syntax & RE_HAT_LISTS_NOT_NEWLINE)) 2714169695Skan { 2715169695Skan BUF_PUSH('\n'); 2716169695Skan laststart[5]++; /* Update the length of characters */ 2717169695Skan } 2718169695Skan 2719169695Skan /* Read in characters and ranges, setting map bits. */ 2720169695Skan for (;;) 2721169695Skan { 2722169695Skan if (p == pend) FREE_STACK_RETURN (REG_EBRACK); 2723169695Skan 2724169695Skan PATFETCH (c); 2725169695Skan 2726169695Skan /* \ might escape characters inside [...] and [^...]. */ 2727169695Skan if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\') 2728169695Skan { 2729169695Skan if (p == pend) FREE_STACK_RETURN (REG_EESCAPE); 2730169695Skan 2731169695Skan PATFETCH (c1); 2732169695Skan BUF_PUSH(c1); 2733169695Skan laststart[5]++; /* Update the length of chars */ 2734169695Skan range_start = c1; 2735169695Skan continue; 2736169695Skan } 2737169695Skan 2738169695Skan /* Could be the end of the bracket expression. If it's 2739169695Skan not (i.e., when the bracket expression is `[]' so 2740169695Skan far), the ']' character bit gets set way below. */ 2741169695Skan if (c == ']' && p != p1 + 1) 2742169695Skan break; 2743169695Skan 2744169695Skan /* Look ahead to see if it's a range when the last thing 2745169695Skan was a character class. */ 2746169695Skan if (had_char_class && c == '-' && *p != ']') 2747169695Skan FREE_STACK_RETURN (REG_ERANGE); 2748169695Skan 2749169695Skan /* Look ahead to see if it's a range when the last thing 2750169695Skan was a character: if this is a hyphen not at the 2751169695Skan beginning or the end of a list, then it's the range 2752169695Skan operator. */ 2753169695Skan if (c == '-' 2754169695Skan && !(p - 2 >= pattern && p[-2] == '[') 2755169695Skan && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^') 2756169695Skan && *p != ']') 2757169695Skan { 2758169695Skan reg_errcode_t ret; 2759169695Skan /* Allocate the space for range_start and range_end. */ 2760169695Skan GET_BUFFER_SPACE (2); 2761169695Skan /* Update the pointer to indicate end of buffer. */ 2762169695Skan b += 2; 2763169695Skan ret = wcs_compile_range (range_start, &p, pend, translate, 2764169695Skan syntax, b, laststart); 2765169695Skan if (ret != REG_NOERROR) FREE_STACK_RETURN (ret); 2766169695Skan range_start = 0xffffffff; 2767169695Skan } 2768169695Skan else if (p[0] == '-' && p[1] != ']') 2769169695Skan { /* This handles ranges made up of characters only. */ 2770169695Skan reg_errcode_t ret; 2771169695Skan 2772169695Skan /* Move past the `-'. */ 2773169695Skan PATFETCH (c1); 2774169695Skan /* Allocate the space for range_start and range_end. */ 2775169695Skan GET_BUFFER_SPACE (2); 2776169695Skan /* Update the pointer to indicate end of buffer. */ 2777169695Skan b += 2; 2778169695Skan ret = wcs_compile_range (c, &p, pend, translate, syntax, b, 2779169695Skan laststart); 2780169695Skan if (ret != REG_NOERROR) FREE_STACK_RETURN (ret); 2781169695Skan range_start = 0xffffffff; 2782169695Skan } 2783169695Skan 2784169695Skan /* See if we're at the beginning of a possible character 2785169695Skan class. */ 2786169695Skan else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':') 2787169695Skan { /* Leave room for the null. */ 2788169695Skan char str[CHAR_CLASS_MAX_LENGTH + 1]; 2789169695Skan 2790169695Skan PATFETCH (c); 2791169695Skan c1 = 0; 2792169695Skan 2793169695Skan /* If pattern is `[[:'. */ 2794169695Skan if (p == pend) FREE_STACK_RETURN (REG_EBRACK); 2795169695Skan 2796169695Skan for (;;) 2797169695Skan { 2798169695Skan PATFETCH (c); 2799169695Skan if ((c == ':' && *p == ']') || p == pend) 2800169695Skan break; 2801169695Skan if (c1 < CHAR_CLASS_MAX_LENGTH) 2802169695Skan str[c1++] = c; 2803169695Skan else 2804169695Skan /* This is in any case an invalid class name. */ 2805169695Skan str[0] = '\0'; 2806169695Skan } 2807169695Skan str[c1] = '\0'; 2808169695Skan 2809169695Skan /* If isn't a word bracketed by `[:' and `:]': 2810169695Skan undo the ending character, the letters, and leave 2811169695Skan the leading `:' and `[' (but store them as character). */ 2812169695Skan if (c == ':' && *p == ']') 2813169695Skan { 2814169695Skan wctype_t wt; 2815169695Skan uintptr_t alignedp; 2816169695Skan 2817169695Skan /* Query the character class as wctype_t. */ 2818169695Skan wt = IS_CHAR_CLASS (str); 2819169695Skan if (wt == 0) 2820169695Skan FREE_STACK_RETURN (REG_ECTYPE); 2821169695Skan 2822169695Skan /* Throw away the ] at the end of the character 2823169695Skan class. */ 2824169695Skan PATFETCH (c); 2825169695Skan 2826169695Skan if (p == pend) FREE_STACK_RETURN (REG_EBRACK); 2827169695Skan 2828169695Skan /* Allocate the space for character class. */ 2829169695Skan GET_BUFFER_SPACE(CHAR_CLASS_SIZE); 2830169695Skan /* Update the pointer to indicate end of buffer. */ 2831169695Skan b += CHAR_CLASS_SIZE; 2832169695Skan /* Move data which follow character classes 2833169695Skan not to violate the data. */ 2834169695Skan insert_space(CHAR_CLASS_SIZE, 2835169695Skan laststart + 6 + laststart[1], 2836169695Skan b - 1); 2837169695Skan alignedp = ((uintptr_t)(laststart + 6 + laststart[1]) 2838169695Skan + __alignof__(wctype_t) - 1) 2839169695Skan & ~(uintptr_t)(__alignof__(wctype_t) - 1); 2840169695Skan /* Store the character class. */ 2841169695Skan *((wctype_t*)alignedp) = wt; 2842169695Skan /* Update length of char_classes */ 2843169695Skan laststart[1] += CHAR_CLASS_SIZE; 2844169695Skan 2845169695Skan had_char_class = true; 2846169695Skan } 2847169695Skan else 2848169695Skan { 2849169695Skan c1++; 2850169695Skan while (c1--) 2851169695Skan PATUNFETCH; 2852169695Skan BUF_PUSH ('['); 2853169695Skan BUF_PUSH (':'); 2854169695Skan laststart[5] += 2; /* Update the length of characters */ 2855169695Skan range_start = ':'; 2856169695Skan had_char_class = false; 2857169695Skan } 2858169695Skan } 2859169695Skan else if (syntax & RE_CHAR_CLASSES && c == '[' && (*p == '=' 2860169695Skan || *p == '.')) 2861169695Skan { 2862169695Skan CHAR_T str[128]; /* Should be large enough. */ 2863169695Skan CHAR_T delim = *p; /* '=' or '.' */ 2864169695Skan# ifdef _LIBC 2865169695Skan uint32_t nrules = 2866169695Skan _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES); 2867169695Skan# endif 2868169695Skan PATFETCH (c); 2869169695Skan c1 = 0; 2870169695Skan 2871169695Skan /* If pattern is `[[=' or '[[.'. */ 2872169695Skan if (p == pend) FREE_STACK_RETURN (REG_EBRACK); 2873169695Skan 2874169695Skan for (;;) 2875169695Skan { 2876169695Skan PATFETCH (c); 2877169695Skan if ((c == delim && *p == ']') || p == pend) 2878169695Skan break; 2879169695Skan if (c1 < sizeof (str) - 1) 2880169695Skan str[c1++] = c; 2881169695Skan else 2882169695Skan /* This is in any case an invalid class name. */ 2883169695Skan str[0] = '\0'; 2884169695Skan } 2885169695Skan str[c1] = '\0'; 2886169695Skan 2887169695Skan if (c == delim && *p == ']' && str[0] != '\0') 2888169695Skan { 2889169695Skan unsigned int i, offset; 2890169695Skan /* If we have no collation data we use the default 2891169695Skan collation in which each character is in a class 2892169695Skan by itself. It also means that ASCII is the 2893169695Skan character set and therefore we cannot have character 2894169695Skan with more than one byte in the multibyte 2895169695Skan representation. */ 2896169695Skan 2897169695Skan /* If not defined _LIBC, we push the name and 2898169695Skan `\0' for the sake of matching performance. */ 2899169695Skan int datasize = c1 + 1; 2900169695Skan 2901169695Skan# ifdef _LIBC 2902169695Skan int32_t idx = 0; 2903169695Skan if (nrules == 0) 2904169695Skan# endif 2905169695Skan { 2906169695Skan if (c1 != 1) 2907169695Skan FREE_STACK_RETURN (REG_ECOLLATE); 2908169695Skan } 2909169695Skan# ifdef _LIBC 2910169695Skan else 2911169695Skan { 2912169695Skan const int32_t *table; 2913169695Skan const int32_t *weights; 2914169695Skan const int32_t *extra; 2915169695Skan const int32_t *indirect; 2916169695Skan wint_t *cp; 2917169695Skan 2918169695Skan /* This #include defines a local function! */ 2919169695Skan# include <locale/weightwc.h> 2920169695Skan 2921169695Skan if(delim == '=') 2922169695Skan { 2923169695Skan /* We push the index for equivalence class. */ 2924169695Skan cp = (wint_t*)str; 2925169695Skan 2926169695Skan table = (const int32_t *) 2927169695Skan _NL_CURRENT (LC_COLLATE, 2928169695Skan _NL_COLLATE_TABLEWC); 2929169695Skan weights = (const int32_t *) 2930169695Skan _NL_CURRENT (LC_COLLATE, 2931169695Skan _NL_COLLATE_WEIGHTWC); 2932169695Skan extra = (const int32_t *) 2933169695Skan _NL_CURRENT (LC_COLLATE, 2934169695Skan _NL_COLLATE_EXTRAWC); 2935169695Skan indirect = (const int32_t *) 2936169695Skan _NL_CURRENT (LC_COLLATE, 2937169695Skan _NL_COLLATE_INDIRECTWC); 2938169695Skan 2939169695Skan idx = findidx ((const wint_t**)&cp); 2940169695Skan if (idx == 0 || cp < (wint_t*) str + c1) 2941169695Skan /* This is no valid character. */ 2942169695Skan FREE_STACK_RETURN (REG_ECOLLATE); 2943169695Skan 2944169695Skan str[0] = (wchar_t)idx; 2945169695Skan } 2946169695Skan else /* delim == '.' */ 2947169695Skan { 2948169695Skan /* We push collation sequence value 2949169695Skan for collating symbol. */ 2950169695Skan int32_t table_size; 2951169695Skan const int32_t *symb_table; 2952169695Skan const unsigned char *extra; 2953169695Skan int32_t idx; 2954169695Skan int32_t elem; 2955169695Skan int32_t second; 2956169695Skan int32_t hash; 2957169695Skan char char_str[c1]; 2958169695Skan 2959169695Skan /* We have to convert the name to a single-byte 2960169695Skan string. This is possible since the names 2961169695Skan consist of ASCII characters and the internal 2962169695Skan representation is UCS4. */ 2963169695Skan for (i = 0; i < c1; ++i) 2964169695Skan char_str[i] = str[i]; 2965169695Skan 2966169695Skan table_size = 2967169695Skan _NL_CURRENT_WORD (LC_COLLATE, 2968169695Skan _NL_COLLATE_SYMB_HASH_SIZEMB); 2969169695Skan symb_table = (const int32_t *) 2970169695Skan _NL_CURRENT (LC_COLLATE, 2971169695Skan _NL_COLLATE_SYMB_TABLEMB); 2972169695Skan extra = (const unsigned char *) 2973169695Skan _NL_CURRENT (LC_COLLATE, 2974169695Skan _NL_COLLATE_SYMB_EXTRAMB); 2975169695Skan 2976169695Skan /* Locate the character in the hashing table. */ 2977169695Skan hash = elem_hash (char_str, c1); 2978169695Skan 2979169695Skan idx = 0; 2980169695Skan elem = hash % table_size; 2981169695Skan second = hash % (table_size - 2); 2982169695Skan while (symb_table[2 * elem] != 0) 2983169695Skan { 2984169695Skan /* First compare the hashing value. */ 2985169695Skan if (symb_table[2 * elem] == hash 2986169695Skan && c1 == extra[symb_table[2 * elem + 1]] 2987169695Skan && memcmp (char_str, 2988169695Skan &extra[symb_table[2 * elem + 1] 2989169695Skan + 1], c1) == 0) 2990169695Skan { 2991169695Skan /* Yep, this is the entry. */ 2992169695Skan idx = symb_table[2 * elem + 1]; 2993169695Skan idx += 1 + extra[idx]; 2994169695Skan break; 2995169695Skan } 2996169695Skan 2997169695Skan /* Next entry. */ 2998169695Skan elem += second; 2999169695Skan } 3000169695Skan 3001169695Skan if (symb_table[2 * elem] != 0) 3002169695Skan { 3003169695Skan /* Compute the index of the byte sequence 3004169695Skan in the table. */ 3005169695Skan idx += 1 + extra[idx]; 3006169695Skan /* Adjust for the alignment. */ 3007169695Skan idx = (idx + 3) & ~3; 3008169695Skan 3009169695Skan str[0] = (wchar_t) idx + 4; 3010169695Skan } 3011169695Skan else if (symb_table[2 * elem] == 0 && c1 == 1) 3012169695Skan { 3013169695Skan /* No valid character. Match it as a 3014169695Skan single byte character. */ 3015169695Skan had_char_class = false; 3016169695Skan BUF_PUSH(str[0]); 3017169695Skan /* Update the length of characters */ 3018169695Skan laststart[5]++; 3019169695Skan range_start = str[0]; 3020169695Skan 3021169695Skan /* Throw away the ] at the end of the 3022169695Skan collating symbol. */ 3023169695Skan PATFETCH (c); 3024169695Skan /* exit from the switch block. */ 3025169695Skan continue; 3026169695Skan } 3027169695Skan else 3028169695Skan FREE_STACK_RETURN (REG_ECOLLATE); 3029169695Skan } 3030169695Skan datasize = 1; 3031169695Skan } 3032169695Skan# endif 3033169695Skan /* Throw away the ] at the end of the equivalence 3034169695Skan class (or collating symbol). */ 3035169695Skan PATFETCH (c); 3036169695Skan 3037169695Skan /* Allocate the space for the equivalence class 3038169695Skan (or collating symbol) (and '\0' if needed). */ 3039169695Skan GET_BUFFER_SPACE(datasize); 3040169695Skan /* Update the pointer to indicate end of buffer. */ 3041169695Skan b += datasize; 3042169695Skan 3043169695Skan if (delim == '=') 3044169695Skan { /* equivalence class */ 3045169695Skan /* Calculate the offset of char_ranges, 3046169695Skan which is next to equivalence_classes. */ 3047169695Skan offset = laststart[1] + laststart[2] 3048169695Skan + laststart[3] +6; 3049169695Skan /* Insert space. */ 3050169695Skan insert_space(datasize, laststart + offset, b - 1); 3051169695Skan 3052169695Skan /* Write the equivalence_class and \0. */ 3053169695Skan for (i = 0 ; i < datasize ; i++) 3054169695Skan laststart[offset + i] = str[i]; 3055169695Skan 3056169695Skan /* Update the length of equivalence_classes. */ 3057169695Skan laststart[3] += datasize; 3058169695Skan had_char_class = true; 3059169695Skan } 3060169695Skan else /* delim == '.' */ 3061169695Skan { /* collating symbol */ 3062169695Skan /* Calculate the offset of the equivalence_classes, 3063169695Skan which is next to collating_symbols. */ 3064169695Skan offset = laststart[1] + laststart[2] + 6; 3065169695Skan /* Insert space and write the collationg_symbol 3066169695Skan and \0. */ 3067169695Skan insert_space(datasize, laststart + offset, b-1); 3068169695Skan for (i = 0 ; i < datasize ; i++) 3069169695Skan laststart[offset + i] = str[i]; 3070169695Skan 3071169695Skan /* In re_match_2_internal if range_start < -1, we 3072169695Skan assume -range_start is the offset of the 3073169695Skan collating symbol which is specified as 3074169695Skan the character of the range start. So we assign 3075169695Skan -(laststart[1] + laststart[2] + 6) to 3076169695Skan range_start. */ 3077169695Skan range_start = -(laststart[1] + laststart[2] + 6); 3078169695Skan /* Update the length of collating_symbol. */ 3079169695Skan laststart[2] += datasize; 3080169695Skan had_char_class = false; 3081169695Skan } 3082169695Skan } 3083169695Skan else 3084169695Skan { 3085169695Skan c1++; 3086169695Skan while (c1--) 3087169695Skan PATUNFETCH; 3088169695Skan BUF_PUSH ('['); 3089169695Skan BUF_PUSH (delim); 3090169695Skan laststart[5] += 2; /* Update the length of characters */ 3091169695Skan range_start = delim; 3092169695Skan had_char_class = false; 3093169695Skan } 3094169695Skan } 3095169695Skan else 3096169695Skan { 3097169695Skan had_char_class = false; 3098169695Skan BUF_PUSH(c); 3099169695Skan laststart[5]++; /* Update the length of characters */ 3100169695Skan range_start = c; 3101169695Skan } 3102169695Skan } 3103169695Skan 3104169695Skan#else /* BYTE */ 3105169695Skan /* Ensure that we have enough space to push a charset: the 3106169695Skan opcode, the length count, and the bitset; 34 bytes in all. */ 3107169695Skan GET_BUFFER_SPACE (34); 3108169695Skan 3109169695Skan laststart = b; 3110169695Skan 3111169695Skan /* We test `*p == '^' twice, instead of using an if 3112169695Skan statement, so we only need one BUF_PUSH. */ 3113169695Skan BUF_PUSH (*p == '^' ? charset_not : charset); 3114169695Skan if (*p == '^') 3115169695Skan p++; 3116169695Skan 3117169695Skan /* Remember the first position in the bracket expression. */ 3118169695Skan p1 = p; 3119169695Skan 3120169695Skan /* Push the number of bytes in the bitmap. */ 3121169695Skan BUF_PUSH ((1 << BYTEWIDTH) / BYTEWIDTH); 3122169695Skan 3123169695Skan /* Clear the whole map. */ 3124169695Skan bzero (b, (1 << BYTEWIDTH) / BYTEWIDTH); 3125169695Skan 3126169695Skan /* charset_not matches newline according to a syntax bit. */ 3127169695Skan if ((re_opcode_t) b[-2] == charset_not 3128169695Skan && (syntax & RE_HAT_LISTS_NOT_NEWLINE)) 3129169695Skan SET_LIST_BIT ('\n'); 3130169695Skan 3131169695Skan /* Read in characters and ranges, setting map bits. */ 3132169695Skan for (;;) 3133169695Skan { 3134169695Skan if (p == pend) FREE_STACK_RETURN (REG_EBRACK); 3135169695Skan 3136169695Skan PATFETCH (c); 3137169695Skan 3138169695Skan /* \ might escape characters inside [...] and [^...]. */ 3139169695Skan if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\') 3140169695Skan { 3141169695Skan if (p == pend) FREE_STACK_RETURN (REG_EESCAPE); 3142169695Skan 3143169695Skan PATFETCH (c1); 3144169695Skan SET_LIST_BIT (c1); 3145169695Skan range_start = c1; 3146169695Skan continue; 3147169695Skan } 3148169695Skan 3149169695Skan /* Could be the end of the bracket expression. If it's 3150169695Skan not (i.e., when the bracket expression is `[]' so 3151169695Skan far), the ']' character bit gets set way below. */ 3152169695Skan if (c == ']' && p != p1 + 1) 3153169695Skan break; 3154169695Skan 3155169695Skan /* Look ahead to see if it's a range when the last thing 3156169695Skan was a character class. */ 3157169695Skan if (had_char_class && c == '-' && *p != ']') 3158169695Skan FREE_STACK_RETURN (REG_ERANGE); 3159169695Skan 3160169695Skan /* Look ahead to see if it's a range when the last thing 3161169695Skan was a character: if this is a hyphen not at the 3162169695Skan beginning or the end of a list, then it's the range 3163169695Skan operator. */ 3164169695Skan if (c == '-' 3165169695Skan && !(p - 2 >= pattern && p[-2] == '[') 3166169695Skan && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^') 3167169695Skan && *p != ']') 3168169695Skan { 3169169695Skan reg_errcode_t ret 3170169695Skan = byte_compile_range (range_start, &p, pend, translate, 3171169695Skan syntax, b); 3172169695Skan if (ret != REG_NOERROR) FREE_STACK_RETURN (ret); 3173169695Skan range_start = 0xffffffff; 3174169695Skan } 3175169695Skan 3176169695Skan else if (p[0] == '-' && p[1] != ']') 3177169695Skan { /* This handles ranges made up of characters only. */ 3178169695Skan reg_errcode_t ret; 3179169695Skan 3180169695Skan /* Move past the `-'. */ 3181169695Skan PATFETCH (c1); 3182169695Skan 3183169695Skan ret = byte_compile_range (c, &p, pend, translate, syntax, b); 3184169695Skan if (ret != REG_NOERROR) FREE_STACK_RETURN (ret); 3185169695Skan range_start = 0xffffffff; 3186169695Skan } 3187169695Skan 3188169695Skan /* See if we're at the beginning of a possible character 3189169695Skan class. */ 3190169695Skan 3191169695Skan else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':') 3192169695Skan { /* Leave room for the null. */ 3193169695Skan char str[CHAR_CLASS_MAX_LENGTH + 1]; 3194169695Skan 3195169695Skan PATFETCH (c); 3196169695Skan c1 = 0; 3197169695Skan 3198169695Skan /* If pattern is `[[:'. */ 3199169695Skan if (p == pend) FREE_STACK_RETURN (REG_EBRACK); 3200169695Skan 3201169695Skan for (;;) 3202169695Skan { 3203169695Skan PATFETCH (c); 3204169695Skan if ((c == ':' && *p == ']') || p == pend) 3205169695Skan break; 3206169695Skan if (c1 < CHAR_CLASS_MAX_LENGTH) 3207169695Skan str[c1++] = c; 3208169695Skan else 3209169695Skan /* This is in any case an invalid class name. */ 3210169695Skan str[0] = '\0'; 3211169695Skan } 3212169695Skan str[c1] = '\0'; 3213169695Skan 3214169695Skan /* If isn't a word bracketed by `[:' and `:]': 3215169695Skan undo the ending character, the letters, and leave 3216169695Skan the leading `:' and `[' (but set bits for them). */ 3217169695Skan if (c == ':' && *p == ']') 3218169695Skan { 3219169695Skan# if defined _LIBC || WIDE_CHAR_SUPPORT 3220169695Skan boolean is_lower = STREQ (str, "lower"); 3221169695Skan boolean is_upper = STREQ (str, "upper"); 3222169695Skan wctype_t wt; 3223169695Skan int ch; 3224169695Skan 3225169695Skan wt = IS_CHAR_CLASS (str); 3226169695Skan if (wt == 0) 3227169695Skan FREE_STACK_RETURN (REG_ECTYPE); 3228169695Skan 3229169695Skan /* Throw away the ] at the end of the character 3230169695Skan class. */ 3231169695Skan PATFETCH (c); 3232169695Skan 3233169695Skan if (p == pend) FREE_STACK_RETURN (REG_EBRACK); 3234169695Skan 3235169695Skan for (ch = 0; ch < 1 << BYTEWIDTH; ++ch) 3236169695Skan { 3237169695Skan# ifdef _LIBC 3238169695Skan if (__iswctype (__btowc (ch), wt)) 3239169695Skan SET_LIST_BIT (ch); 3240169695Skan# else 3241169695Skan if (iswctype (btowc (ch), wt)) 3242169695Skan SET_LIST_BIT (ch); 3243169695Skan# endif 3244169695Skan 3245169695Skan if (translate && (is_upper || is_lower) 3246169695Skan && (ISUPPER (ch) || ISLOWER (ch))) 3247169695Skan SET_LIST_BIT (ch); 3248169695Skan } 3249169695Skan 3250169695Skan had_char_class = true; 3251169695Skan# else 3252169695Skan int ch; 3253169695Skan boolean is_alnum = STREQ (str, "alnum"); 3254169695Skan boolean is_alpha = STREQ (str, "alpha"); 3255169695Skan boolean is_blank = STREQ (str, "blank"); 3256169695Skan boolean is_cntrl = STREQ (str, "cntrl"); 3257169695Skan boolean is_digit = STREQ (str, "digit"); 3258169695Skan boolean is_graph = STREQ (str, "graph"); 3259169695Skan boolean is_lower = STREQ (str, "lower"); 3260169695Skan boolean is_print = STREQ (str, "print"); 3261169695Skan boolean is_punct = STREQ (str, "punct"); 3262169695Skan boolean is_space = STREQ (str, "space"); 3263169695Skan boolean is_upper = STREQ (str, "upper"); 3264169695Skan boolean is_xdigit = STREQ (str, "xdigit"); 3265169695Skan 3266169695Skan if (!IS_CHAR_CLASS (str)) 3267169695Skan FREE_STACK_RETURN (REG_ECTYPE); 3268169695Skan 3269169695Skan /* Throw away the ] at the end of the character 3270169695Skan class. */ 3271169695Skan PATFETCH (c); 3272169695Skan 3273169695Skan if (p == pend) FREE_STACK_RETURN (REG_EBRACK); 3274169695Skan 3275169695Skan for (ch = 0; ch < 1 << BYTEWIDTH; ch++) 3276169695Skan { 3277169695Skan /* This was split into 3 if's to 3278169695Skan avoid an arbitrary limit in some compiler. */ 3279169695Skan if ( (is_alnum && ISALNUM (ch)) 3280169695Skan || (is_alpha && ISALPHA (ch)) 3281169695Skan || (is_blank && ISBLANK (ch)) 3282169695Skan || (is_cntrl && ISCNTRL (ch))) 3283169695Skan SET_LIST_BIT (ch); 3284169695Skan if ( (is_digit && ISDIGIT (ch)) 3285169695Skan || (is_graph && ISGRAPH (ch)) 3286169695Skan || (is_lower && ISLOWER (ch)) 3287169695Skan || (is_print && ISPRINT (ch))) 3288169695Skan SET_LIST_BIT (ch); 3289169695Skan if ( (is_punct && ISPUNCT (ch)) 3290169695Skan || (is_space && ISSPACE (ch)) 3291169695Skan || (is_upper && ISUPPER (ch)) 3292169695Skan || (is_xdigit && ISXDIGIT (ch))) 3293169695Skan SET_LIST_BIT (ch); 3294169695Skan if ( translate && (is_upper || is_lower) 3295169695Skan && (ISUPPER (ch) || ISLOWER (ch))) 3296169695Skan SET_LIST_BIT (ch); 3297169695Skan } 3298169695Skan had_char_class = true; 3299169695Skan# endif /* libc || wctype.h */ 3300169695Skan } 3301169695Skan else 3302169695Skan { 3303169695Skan c1++; 3304169695Skan while (c1--) 3305169695Skan PATUNFETCH; 3306169695Skan SET_LIST_BIT ('['); 3307169695Skan SET_LIST_BIT (':'); 3308169695Skan range_start = ':'; 3309169695Skan had_char_class = false; 3310169695Skan } 3311169695Skan } 3312169695Skan else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == '=') 3313169695Skan { 3314169695Skan unsigned char str[MB_LEN_MAX + 1]; 3315169695Skan# ifdef _LIBC 3316169695Skan uint32_t nrules = 3317169695Skan _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES); 3318169695Skan# endif 3319169695Skan 3320169695Skan PATFETCH (c); 3321169695Skan c1 = 0; 3322169695Skan 3323169695Skan /* If pattern is `[[='. */ 3324169695Skan if (p == pend) FREE_STACK_RETURN (REG_EBRACK); 3325169695Skan 3326169695Skan for (;;) 3327169695Skan { 3328169695Skan PATFETCH (c); 3329169695Skan if ((c == '=' && *p == ']') || p == pend) 3330169695Skan break; 3331169695Skan if (c1 < MB_LEN_MAX) 3332169695Skan str[c1++] = c; 3333169695Skan else 3334169695Skan /* This is in any case an invalid class name. */ 3335169695Skan str[0] = '\0'; 3336169695Skan } 3337169695Skan str[c1] = '\0'; 3338169695Skan 3339169695Skan if (c == '=' && *p == ']' && str[0] != '\0') 3340169695Skan { 3341169695Skan /* If we have no collation data we use the default 3342169695Skan collation in which each character is in a class 3343169695Skan by itself. It also means that ASCII is the 3344169695Skan character set and therefore we cannot have character 3345169695Skan with more than one byte in the multibyte 3346169695Skan representation. */ 3347169695Skan# ifdef _LIBC 3348169695Skan if (nrules == 0) 3349169695Skan# endif 3350169695Skan { 3351169695Skan if (c1 != 1) 3352169695Skan FREE_STACK_RETURN (REG_ECOLLATE); 3353169695Skan 3354169695Skan /* Throw away the ] at the end of the equivalence 3355169695Skan class. */ 3356169695Skan PATFETCH (c); 3357169695Skan 3358169695Skan /* Set the bit for the character. */ 3359169695Skan SET_LIST_BIT (str[0]); 3360169695Skan } 3361169695Skan# ifdef _LIBC 3362169695Skan else 3363169695Skan { 3364169695Skan /* Try to match the byte sequence in `str' against 3365169695Skan those known to the collate implementation. 3366169695Skan First find out whether the bytes in `str' are 3367169695Skan actually from exactly one character. */ 3368169695Skan const int32_t *table; 3369169695Skan const unsigned char *weights; 3370169695Skan const unsigned char *extra; 3371169695Skan const int32_t *indirect; 3372169695Skan int32_t idx; 3373169695Skan const unsigned char *cp = str; 3374169695Skan int ch; 3375169695Skan 3376169695Skan /* This #include defines a local function! */ 3377169695Skan# include <locale/weight.h> 3378169695Skan 3379169695Skan table = (const int32_t *) 3380169695Skan _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB); 3381169695Skan weights = (const unsigned char *) 3382169695Skan _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB); 3383169695Skan extra = (const unsigned char *) 3384169695Skan _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB); 3385169695Skan indirect = (const int32_t *) 3386169695Skan _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB); 3387169695Skan 3388169695Skan idx = findidx (&cp); 3389169695Skan if (idx == 0 || cp < str + c1) 3390169695Skan /* This is no valid character. */ 3391169695Skan FREE_STACK_RETURN (REG_ECOLLATE); 3392169695Skan 3393169695Skan /* Throw away the ] at the end of the equivalence 3394169695Skan class. */ 3395169695Skan PATFETCH (c); 3396169695Skan 3397169695Skan /* Now we have to go throught the whole table 3398169695Skan and find all characters which have the same 3399169695Skan first level weight. 3400169695Skan 3401169695Skan XXX Note that this is not entirely correct. 3402169695Skan we would have to match multibyte sequences 3403169695Skan but this is not possible with the current 3404169695Skan implementation. */ 3405169695Skan for (ch = 1; ch < 256; ++ch) 3406169695Skan /* XXX This test would have to be changed if we 3407169695Skan would allow matching multibyte sequences. */ 3408169695Skan if (table[ch] > 0) 3409169695Skan { 3410169695Skan int32_t idx2 = table[ch]; 3411169695Skan size_t len = weights[idx2]; 3412169695Skan 3413169695Skan /* Test whether the lenghts match. */ 3414169695Skan if (weights[idx] == len) 3415169695Skan { 3416169695Skan /* They do. New compare the bytes of 3417169695Skan the weight. */ 3418169695Skan size_t cnt = 0; 3419169695Skan 3420169695Skan while (cnt < len 3421169695Skan && (weights[idx + 1 + cnt] 3422169695Skan == weights[idx2 + 1 + cnt])) 3423169695Skan ++cnt; 3424169695Skan 3425169695Skan if (cnt == len) 3426169695Skan /* They match. Mark the character as 3427169695Skan acceptable. */ 3428169695Skan SET_LIST_BIT (ch); 3429169695Skan } 3430169695Skan } 3431169695Skan } 3432169695Skan# endif 3433169695Skan had_char_class = true; 3434169695Skan } 3435169695Skan else 3436169695Skan { 3437169695Skan c1++; 3438169695Skan while (c1--) 3439169695Skan PATUNFETCH; 3440169695Skan SET_LIST_BIT ('['); 3441169695Skan SET_LIST_BIT ('='); 3442169695Skan range_start = '='; 3443169695Skan had_char_class = false; 3444169695Skan } 3445169695Skan } 3446169695Skan else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == '.') 3447169695Skan { 3448169695Skan unsigned char str[128]; /* Should be large enough. */ 3449169695Skan# ifdef _LIBC 3450169695Skan uint32_t nrules = 3451169695Skan _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES); 3452169695Skan# endif 3453169695Skan 3454169695Skan PATFETCH (c); 3455169695Skan c1 = 0; 3456169695Skan 3457169695Skan /* If pattern is `[[.'. */ 3458169695Skan if (p == pend) FREE_STACK_RETURN (REG_EBRACK); 3459169695Skan 3460169695Skan for (;;) 3461169695Skan { 3462169695Skan PATFETCH (c); 3463169695Skan if ((c == '.' && *p == ']') || p == pend) 3464169695Skan break; 3465169695Skan if (c1 < sizeof (str)) 3466169695Skan str[c1++] = c; 3467169695Skan else 3468169695Skan /* This is in any case an invalid class name. */ 3469169695Skan str[0] = '\0'; 3470169695Skan } 3471169695Skan str[c1] = '\0'; 3472169695Skan 3473169695Skan if (c == '.' && *p == ']' && str[0] != '\0') 3474169695Skan { 3475169695Skan /* If we have no collation data we use the default 3476169695Skan collation in which each character is the name 3477169695Skan for its own class which contains only the one 3478169695Skan character. It also means that ASCII is the 3479169695Skan character set and therefore we cannot have character 3480169695Skan with more than one byte in the multibyte 3481169695Skan representation. */ 3482169695Skan# ifdef _LIBC 3483169695Skan if (nrules == 0) 3484169695Skan# endif 3485169695Skan { 3486169695Skan if (c1 != 1) 3487169695Skan FREE_STACK_RETURN (REG_ECOLLATE); 3488169695Skan 3489169695Skan /* Throw away the ] at the end of the equivalence 3490169695Skan class. */ 3491169695Skan PATFETCH (c); 3492169695Skan 3493169695Skan /* Set the bit for the character. */ 3494169695Skan SET_LIST_BIT (str[0]); 3495169695Skan range_start = ((const unsigned char *) str)[0]; 3496169695Skan } 3497169695Skan# ifdef _LIBC 3498169695Skan else 3499169695Skan { 3500169695Skan /* Try to match the byte sequence in `str' against 3501169695Skan those known to the collate implementation. 3502169695Skan First find out whether the bytes in `str' are 3503169695Skan actually from exactly one character. */ 3504169695Skan int32_t table_size; 3505169695Skan const int32_t *symb_table; 3506169695Skan const unsigned char *extra; 3507169695Skan int32_t idx; 3508169695Skan int32_t elem; 3509169695Skan int32_t second; 3510169695Skan int32_t hash; 3511169695Skan 3512169695Skan table_size = 3513169695Skan _NL_CURRENT_WORD (LC_COLLATE, 3514169695Skan _NL_COLLATE_SYMB_HASH_SIZEMB); 3515169695Skan symb_table = (const int32_t *) 3516169695Skan _NL_CURRENT (LC_COLLATE, 3517169695Skan _NL_COLLATE_SYMB_TABLEMB); 3518169695Skan extra = (const unsigned char *) 3519169695Skan _NL_CURRENT (LC_COLLATE, 3520169695Skan _NL_COLLATE_SYMB_EXTRAMB); 3521169695Skan 3522169695Skan /* Locate the character in the hashing table. */ 3523169695Skan hash = elem_hash (str, c1); 3524169695Skan 3525169695Skan idx = 0; 3526169695Skan elem = hash % table_size; 3527169695Skan second = hash % (table_size - 2); 3528169695Skan while (symb_table[2 * elem] != 0) 3529169695Skan { 3530169695Skan /* First compare the hashing value. */ 3531169695Skan if (symb_table[2 * elem] == hash 3532169695Skan && c1 == extra[symb_table[2 * elem + 1]] 3533169695Skan && memcmp (str, 3534169695Skan &extra[symb_table[2 * elem + 1] 3535169695Skan + 1], 3536169695Skan c1) == 0) 3537169695Skan { 3538169695Skan /* Yep, this is the entry. */ 3539169695Skan idx = symb_table[2 * elem + 1]; 3540169695Skan idx += 1 + extra[idx]; 3541169695Skan break; 3542169695Skan } 3543169695Skan 3544169695Skan /* Next entry. */ 3545169695Skan elem += second; 3546169695Skan } 3547169695Skan 3548169695Skan if (symb_table[2 * elem] == 0) 3549169695Skan /* This is no valid character. */ 3550169695Skan FREE_STACK_RETURN (REG_ECOLLATE); 3551169695Skan 3552169695Skan /* Throw away the ] at the end of the equivalence 3553169695Skan class. */ 3554169695Skan PATFETCH (c); 3555169695Skan 3556169695Skan /* Now add the multibyte character(s) we found 3557169695Skan to the accept list. 3558169695Skan 3559169695Skan XXX Note that this is not entirely correct. 3560169695Skan we would have to match multibyte sequences 3561169695Skan but this is not possible with the current 3562169695Skan implementation. Also, we have to match 3563169695Skan collating symbols, which expand to more than 3564169695Skan one file, as a whole and not allow the 3565169695Skan individual bytes. */ 3566169695Skan c1 = extra[idx++]; 3567169695Skan if (c1 == 1) 3568169695Skan range_start = extra[idx]; 3569169695Skan while (c1-- > 0) 3570169695Skan { 3571169695Skan SET_LIST_BIT (extra[idx]); 3572169695Skan ++idx; 3573169695Skan } 3574169695Skan } 3575169695Skan# endif 3576169695Skan had_char_class = false; 3577169695Skan } 3578169695Skan else 3579169695Skan { 3580169695Skan c1++; 3581169695Skan while (c1--) 3582169695Skan PATUNFETCH; 3583169695Skan SET_LIST_BIT ('['); 3584169695Skan SET_LIST_BIT ('.'); 3585169695Skan range_start = '.'; 3586169695Skan had_char_class = false; 3587169695Skan } 3588169695Skan } 3589169695Skan else 3590169695Skan { 3591169695Skan had_char_class = false; 3592169695Skan SET_LIST_BIT (c); 3593169695Skan range_start = c; 3594169695Skan } 3595169695Skan } 3596169695Skan 3597169695Skan /* Discard any (non)matching list bytes that are all 0 at the 3598169695Skan end of the map. Decrease the map-length byte too. */ 3599169695Skan while ((int) b[-1] > 0 && b[b[-1] - 1] == 0) 3600169695Skan b[-1]--; 3601169695Skan b += b[-1]; 3602169695Skan#endif /* WCHAR */ 3603169695Skan } 3604169695Skan break; 3605169695Skan 3606169695Skan 3607169695Skan case '(': 3608169695Skan if (syntax & RE_NO_BK_PARENS) 3609169695Skan goto handle_open; 3610169695Skan else 3611169695Skan goto normal_char; 3612169695Skan 3613169695Skan 3614169695Skan case ')': 3615169695Skan if (syntax & RE_NO_BK_PARENS) 3616169695Skan goto handle_close; 3617169695Skan else 3618169695Skan goto normal_char; 3619169695Skan 3620169695Skan 3621169695Skan case '\n': 3622169695Skan if (syntax & RE_NEWLINE_ALT) 3623169695Skan goto handle_alt; 3624169695Skan else 3625169695Skan goto normal_char; 3626169695Skan 3627169695Skan 3628169695Skan case '|': 3629169695Skan if (syntax & RE_NO_BK_VBAR) 3630169695Skan goto handle_alt; 3631169695Skan else 3632169695Skan goto normal_char; 3633169695Skan 3634169695Skan 3635169695Skan case '{': 3636169695Skan if (syntax & RE_INTERVALS && syntax & RE_NO_BK_BRACES) 3637169695Skan goto handle_interval; 3638169695Skan else 3639169695Skan goto normal_char; 3640169695Skan 3641169695Skan 3642169695Skan case '\\': 3643169695Skan if (p == pend) FREE_STACK_RETURN (REG_EESCAPE); 3644169695Skan 3645169695Skan /* Do not translate the character after the \, so that we can 3646169695Skan distinguish, e.g., \B from \b, even if we normally would 3647169695Skan translate, e.g., B to b. */ 3648169695Skan PATFETCH_RAW (c); 3649169695Skan 3650169695Skan switch (c) 3651169695Skan { 3652169695Skan case '(': 3653169695Skan if (syntax & RE_NO_BK_PARENS) 3654169695Skan goto normal_backslash; 3655169695Skan 3656169695Skan handle_open: 3657169695Skan bufp->re_nsub++; 3658169695Skan regnum++; 3659169695Skan 3660169695Skan if (COMPILE_STACK_FULL) 3661169695Skan { 3662169695Skan RETALLOC (compile_stack.stack, compile_stack.size << 1, 3663169695Skan compile_stack_elt_t); 3664169695Skan if (compile_stack.stack == NULL) return REG_ESPACE; 3665169695Skan 3666169695Skan compile_stack.size <<= 1; 3667169695Skan } 3668169695Skan 3669169695Skan /* These are the values to restore when we hit end of this 3670169695Skan group. They are all relative offsets, so that if the 3671169695Skan whole pattern moves because of realloc, they will still 3672169695Skan be valid. */ 3673169695Skan COMPILE_STACK_TOP.begalt_offset = begalt - COMPILED_BUFFER_VAR; 3674169695Skan COMPILE_STACK_TOP.fixup_alt_jump 3675169695Skan = fixup_alt_jump ? fixup_alt_jump - COMPILED_BUFFER_VAR + 1 : 0; 3676169695Skan COMPILE_STACK_TOP.laststart_offset = b - COMPILED_BUFFER_VAR; 3677169695Skan COMPILE_STACK_TOP.regnum = regnum; 3678169695Skan 3679169695Skan /* We will eventually replace the 0 with the number of 3680169695Skan groups inner to this one. But do not push a 3681169695Skan start_memory for groups beyond the last one we can 3682169695Skan represent in the compiled pattern. */ 3683169695Skan if (regnum <= MAX_REGNUM) 3684169695Skan { 3685169695Skan COMPILE_STACK_TOP.inner_group_offset = b 3686169695Skan - COMPILED_BUFFER_VAR + 2; 3687169695Skan BUF_PUSH_3 (start_memory, regnum, 0); 3688169695Skan } 3689169695Skan 3690169695Skan compile_stack.avail++; 3691169695Skan 3692169695Skan fixup_alt_jump = 0; 3693169695Skan laststart = 0; 3694169695Skan begalt = b; 3695169695Skan /* If we've reached MAX_REGNUM groups, then this open 3696169695Skan won't actually generate any code, so we'll have to 3697169695Skan clear pending_exact explicitly. */ 3698169695Skan pending_exact = 0; 3699169695Skan break; 3700169695Skan 3701169695Skan 3702169695Skan case ')': 3703169695Skan if (syntax & RE_NO_BK_PARENS) goto normal_backslash; 3704169695Skan 3705169695Skan if (COMPILE_STACK_EMPTY) 3706169695Skan { 3707169695Skan if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD) 3708169695Skan goto normal_backslash; 3709169695Skan else 3710169695Skan FREE_STACK_RETURN (REG_ERPAREN); 3711169695Skan } 3712169695Skan 3713169695Skan handle_close: 3714169695Skan if (fixup_alt_jump) 3715169695Skan { /* Push a dummy failure point at the end of the 3716169695Skan alternative for a possible future 3717169695Skan `pop_failure_jump' to pop. See comments at 3718169695Skan `push_dummy_failure' in `re_match_2'. */ 3719169695Skan BUF_PUSH (push_dummy_failure); 3720169695Skan 3721169695Skan /* We allocated space for this jump when we assigned 3722169695Skan to `fixup_alt_jump', in the `handle_alt' case below. */ 3723169695Skan STORE_JUMP (jump_past_alt, fixup_alt_jump, b - 1); 3724169695Skan } 3725169695Skan 3726169695Skan /* See similar code for backslashed left paren above. */ 3727169695Skan if (COMPILE_STACK_EMPTY) 3728169695Skan { 3729169695Skan if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD) 3730169695Skan goto normal_char; 3731169695Skan else 3732169695Skan FREE_STACK_RETURN (REG_ERPAREN); 3733169695Skan } 3734169695Skan 3735169695Skan /* Since we just checked for an empty stack above, this 3736169695Skan ``can't happen''. */ 3737169695Skan assert (compile_stack.avail != 0); 3738169695Skan { 3739169695Skan /* We don't just want to restore into `regnum', because 3740169695Skan later groups should continue to be numbered higher, 3741169695Skan as in `(ab)c(de)' -- the second group is #2. */ 3742169695Skan regnum_t this_group_regnum; 3743169695Skan 3744169695Skan compile_stack.avail--; 3745169695Skan begalt = COMPILED_BUFFER_VAR + COMPILE_STACK_TOP.begalt_offset; 3746169695Skan fixup_alt_jump 3747169695Skan = COMPILE_STACK_TOP.fixup_alt_jump 3748169695Skan ? COMPILED_BUFFER_VAR + COMPILE_STACK_TOP.fixup_alt_jump - 1 3749169695Skan : 0; 3750169695Skan laststart = COMPILED_BUFFER_VAR + COMPILE_STACK_TOP.laststart_offset; 3751169695Skan this_group_regnum = COMPILE_STACK_TOP.regnum; 3752169695Skan /* If we've reached MAX_REGNUM groups, then this open 3753169695Skan won't actually generate any code, so we'll have to 3754169695Skan clear pending_exact explicitly. */ 3755169695Skan pending_exact = 0; 3756169695Skan 3757169695Skan /* We're at the end of the group, so now we know how many 3758169695Skan groups were inside this one. */ 3759169695Skan if (this_group_regnum <= MAX_REGNUM) 3760169695Skan { 3761169695Skan UCHAR_T *inner_group_loc 3762169695Skan = COMPILED_BUFFER_VAR + COMPILE_STACK_TOP.inner_group_offset; 3763169695Skan 3764169695Skan *inner_group_loc = regnum - this_group_regnum; 3765169695Skan BUF_PUSH_3 (stop_memory, this_group_regnum, 3766169695Skan regnum - this_group_regnum); 3767169695Skan } 3768169695Skan } 3769169695Skan break; 3770169695Skan 3771169695Skan 3772169695Skan case '|': /* `\|'. */ 3773169695Skan if (syntax & RE_LIMITED_OPS || syntax & RE_NO_BK_VBAR) 3774169695Skan goto normal_backslash; 3775169695Skan handle_alt: 3776169695Skan if (syntax & RE_LIMITED_OPS) 3777169695Skan goto normal_char; 3778169695Skan 3779169695Skan /* Insert before the previous alternative a jump which 3780169695Skan jumps to this alternative if the former fails. */ 3781169695Skan GET_BUFFER_SPACE (1 + OFFSET_ADDRESS_SIZE); 3782169695Skan INSERT_JUMP (on_failure_jump, begalt, 3783169695Skan b + 2 + 2 * OFFSET_ADDRESS_SIZE); 3784169695Skan pending_exact = 0; 3785169695Skan b += 1 + OFFSET_ADDRESS_SIZE; 3786169695Skan 3787169695Skan /* The alternative before this one has a jump after it 3788169695Skan which gets executed if it gets matched. Adjust that 3789169695Skan jump so it will jump to this alternative's analogous 3790169695Skan jump (put in below, which in turn will jump to the next 3791169695Skan (if any) alternative's such jump, etc.). The last such 3792169695Skan jump jumps to the correct final destination. A picture: 3793169695Skan _____ _____ 3794169695Skan | | | | 3795169695Skan | v | v 3796169695Skan a | b | c 3797169695Skan 3798169695Skan If we are at `b', then fixup_alt_jump right now points to a 3799169695Skan three-byte space after `a'. We'll put in the jump, set 3800169695Skan fixup_alt_jump to right after `b', and leave behind three 3801169695Skan bytes which we'll fill in when we get to after `c'. */ 3802169695Skan 3803169695Skan if (fixup_alt_jump) 3804169695Skan STORE_JUMP (jump_past_alt, fixup_alt_jump, b); 3805169695Skan 3806169695Skan /* Mark and leave space for a jump after this alternative, 3807169695Skan to be filled in later either by next alternative or 3808169695Skan when know we're at the end of a series of alternatives. */ 3809169695Skan fixup_alt_jump = b; 3810169695Skan GET_BUFFER_SPACE (1 + OFFSET_ADDRESS_SIZE); 3811169695Skan b += 1 + OFFSET_ADDRESS_SIZE; 3812169695Skan 3813169695Skan laststart = 0; 3814169695Skan begalt = b; 3815169695Skan break; 3816169695Skan 3817169695Skan 3818169695Skan case '{': 3819169695Skan /* If \{ is a literal. */ 3820169695Skan if (!(syntax & RE_INTERVALS) 3821169695Skan /* If we're at `\{' and it's not the open-interval 3822169695Skan operator. */ 3823169695Skan || (syntax & RE_NO_BK_BRACES)) 3824169695Skan goto normal_backslash; 3825169695Skan 3826169695Skan handle_interval: 3827169695Skan { 3828169695Skan /* If got here, then the syntax allows intervals. */ 3829169695Skan 3830169695Skan /* At least (most) this many matches must be made. */ 3831169695Skan int lower_bound = -1, upper_bound = -1; 3832169695Skan 3833169695Skan /* Place in the uncompiled pattern (i.e., just after 3834169695Skan the '{') to go back to if the interval is invalid. */ 3835169695Skan const CHAR_T *beg_interval = p; 3836169695Skan 3837169695Skan if (p == pend) 3838169695Skan goto invalid_interval; 3839169695Skan 3840169695Skan GET_UNSIGNED_NUMBER (lower_bound); 3841169695Skan 3842169695Skan if (c == ',') 3843169695Skan { 3844169695Skan GET_UNSIGNED_NUMBER (upper_bound); 3845169695Skan if (upper_bound < 0) 3846169695Skan upper_bound = RE_DUP_MAX; 3847169695Skan } 3848169695Skan else 3849169695Skan /* Interval such as `{1}' => match exactly once. */ 3850169695Skan upper_bound = lower_bound; 3851169695Skan 3852169695Skan if (! (0 <= lower_bound && lower_bound <= upper_bound)) 3853169695Skan goto invalid_interval; 3854169695Skan 3855169695Skan if (!(syntax & RE_NO_BK_BRACES)) 3856169695Skan { 3857169695Skan if (c != '\\' || p == pend) 3858169695Skan goto invalid_interval; 3859169695Skan PATFETCH (c); 3860169695Skan } 3861169695Skan 3862169695Skan if (c != '}') 3863169695Skan goto invalid_interval; 3864169695Skan 3865169695Skan /* If it's invalid to have no preceding re. */ 3866169695Skan if (!laststart) 3867169695Skan { 3868169695Skan if (syntax & RE_CONTEXT_INVALID_OPS 3869169695Skan && !(syntax & RE_INVALID_INTERVAL_ORD)) 3870169695Skan FREE_STACK_RETURN (REG_BADRPT); 3871169695Skan else if (syntax & RE_CONTEXT_INDEP_OPS) 3872169695Skan laststart = b; 3873169695Skan else 3874169695Skan goto unfetch_interval; 3875169695Skan } 3876169695Skan 3877169695Skan /* We just parsed a valid interval. */ 3878169695Skan 3879169695Skan if (RE_DUP_MAX < upper_bound) 3880169695Skan FREE_STACK_RETURN (REG_BADBR); 3881169695Skan 3882169695Skan /* If the upper bound is zero, don't want to succeed at 3883169695Skan all; jump from `laststart' to `b + 3', which will be 3884169695Skan the end of the buffer after we insert the jump. */ 3885169695Skan /* ifdef WCHAR, 'b + 1 + OFFSET_ADDRESS_SIZE' 3886169695Skan instead of 'b + 3'. */ 3887169695Skan if (upper_bound == 0) 3888169695Skan { 3889169695Skan GET_BUFFER_SPACE (1 + OFFSET_ADDRESS_SIZE); 3890169695Skan INSERT_JUMP (jump, laststart, b + 1 3891169695Skan + OFFSET_ADDRESS_SIZE); 3892169695Skan b += 1 + OFFSET_ADDRESS_SIZE; 3893169695Skan } 3894169695Skan 3895169695Skan /* Otherwise, we have a nontrivial interval. When 3896169695Skan we're all done, the pattern will look like: 3897169695Skan set_number_at <jump count> <upper bound> 3898169695Skan set_number_at <succeed_n count> <lower bound> 3899169695Skan succeed_n <after jump addr> <succeed_n count> 3900169695Skan <body of loop> 3901169695Skan jump_n <succeed_n addr> <jump count> 3902169695Skan (The upper bound and `jump_n' are omitted if 3903169695Skan `upper_bound' is 1, though.) */ 3904169695Skan else 3905169695Skan { /* If the upper bound is > 1, we need to insert 3906169695Skan more at the end of the loop. */ 3907169695Skan unsigned nbytes = 2 + 4 * OFFSET_ADDRESS_SIZE + 3908169695Skan (upper_bound > 1) * (2 + 4 * OFFSET_ADDRESS_SIZE); 3909169695Skan 3910169695Skan GET_BUFFER_SPACE (nbytes); 3911169695Skan 3912169695Skan /* Initialize lower bound of the `succeed_n', even 3913169695Skan though it will be set during matching by its 3914169695Skan attendant `set_number_at' (inserted next), 3915169695Skan because `re_compile_fastmap' needs to know. 3916169695Skan Jump to the `jump_n' we might insert below. */ 3917169695Skan INSERT_JUMP2 (succeed_n, laststart, 3918169695Skan b + 1 + 2 * OFFSET_ADDRESS_SIZE 3919169695Skan + (upper_bound > 1) * (1 + 2 * OFFSET_ADDRESS_SIZE) 3920169695Skan , lower_bound); 3921169695Skan b += 1 + 2 * OFFSET_ADDRESS_SIZE; 3922169695Skan 3923169695Skan /* Code to initialize the lower bound. Insert 3924169695Skan before the `succeed_n'. The `5' is the last two 3925169695Skan bytes of this `set_number_at', plus 3 bytes of 3926169695Skan the following `succeed_n'. */ 3927169695Skan /* ifdef WCHAR, The '1+2*OFFSET_ADDRESS_SIZE' 3928169695Skan is the 'set_number_at', plus '1+OFFSET_ADDRESS_SIZE' 3929169695Skan of the following `succeed_n'. */ 3930169695Skan PREFIX(insert_op2) (set_number_at, laststart, 1 3931169695Skan + 2 * OFFSET_ADDRESS_SIZE, lower_bound, b); 3932169695Skan b += 1 + 2 * OFFSET_ADDRESS_SIZE; 3933169695Skan 3934169695Skan if (upper_bound > 1) 3935169695Skan { /* More than one repetition is allowed, so 3936169695Skan append a backward jump to the `succeed_n' 3937169695Skan that starts this interval. 3938169695Skan 3939169695Skan When we've reached this during matching, 3940169695Skan we'll have matched the interval once, so 3941169695Skan jump back only `upper_bound - 1' times. */ 3942169695Skan STORE_JUMP2 (jump_n, b, laststart 3943169695Skan + 2 * OFFSET_ADDRESS_SIZE + 1, 3944169695Skan upper_bound - 1); 3945169695Skan b += 1 + 2 * OFFSET_ADDRESS_SIZE; 3946169695Skan 3947169695Skan /* The location we want to set is the second 3948169695Skan parameter of the `jump_n'; that is `b-2' as 3949169695Skan an absolute address. `laststart' will be 3950169695Skan the `set_number_at' we're about to insert; 3951169695Skan `laststart+3' the number to set, the source 3952169695Skan for the relative address. But we are 3953169695Skan inserting into the middle of the pattern -- 3954169695Skan so everything is getting moved up by 5. 3955169695Skan Conclusion: (b - 2) - (laststart + 3) + 5, 3956169695Skan i.e., b - laststart. 3957169695Skan 3958169695Skan We insert this at the beginning of the loop 3959169695Skan so that if we fail during matching, we'll 3960169695Skan reinitialize the bounds. */ 3961169695Skan PREFIX(insert_op2) (set_number_at, laststart, 3962169695Skan b - laststart, 3963169695Skan upper_bound - 1, b); 3964169695Skan b += 1 + 2 * OFFSET_ADDRESS_SIZE; 3965169695Skan } 3966169695Skan } 3967169695Skan pending_exact = 0; 3968169695Skan break; 3969169695Skan 3970169695Skan invalid_interval: 3971169695Skan if (!(syntax & RE_INVALID_INTERVAL_ORD)) 3972169695Skan FREE_STACK_RETURN (p == pend ? REG_EBRACE : REG_BADBR); 3973169695Skan unfetch_interval: 3974169695Skan /* Match the characters as literals. */ 3975169695Skan p = beg_interval; 3976169695Skan c = '{'; 3977169695Skan if (syntax & RE_NO_BK_BRACES) 3978169695Skan goto normal_char; 3979169695Skan else 3980169695Skan goto normal_backslash; 3981169695Skan } 3982169695Skan 3983169695Skan#ifdef emacs 3984169695Skan /* There is no way to specify the before_dot and after_dot 3985169695Skan operators. rms says this is ok. --karl */ 3986169695Skan case '=': 3987169695Skan BUF_PUSH (at_dot); 3988169695Skan break; 3989169695Skan 3990169695Skan case 's': 3991169695Skan laststart = b; 3992169695Skan PATFETCH (c); 3993169695Skan BUF_PUSH_2 (syntaxspec, syntax_spec_code[c]); 3994169695Skan break; 3995169695Skan 3996169695Skan case 'S': 3997169695Skan laststart = b; 3998169695Skan PATFETCH (c); 3999169695Skan BUF_PUSH_2 (notsyntaxspec, syntax_spec_code[c]); 4000169695Skan break; 4001169695Skan#endif /* emacs */ 4002169695Skan 4003169695Skan 4004169695Skan case 'w': 4005169695Skan if (syntax & RE_NO_GNU_OPS) 4006169695Skan goto normal_char; 4007169695Skan laststart = b; 4008169695Skan BUF_PUSH (wordchar); 4009169695Skan break; 4010169695Skan 4011169695Skan 4012169695Skan case 'W': 4013169695Skan if (syntax & RE_NO_GNU_OPS) 4014169695Skan goto normal_char; 4015169695Skan laststart = b; 4016169695Skan BUF_PUSH (notwordchar); 4017169695Skan break; 4018169695Skan 4019169695Skan 4020169695Skan case '<': 4021169695Skan if (syntax & RE_NO_GNU_OPS) 4022169695Skan goto normal_char; 4023169695Skan BUF_PUSH (wordbeg); 4024169695Skan break; 4025169695Skan 4026169695Skan case '>': 4027169695Skan if (syntax & RE_NO_GNU_OPS) 4028169695Skan goto normal_char; 4029169695Skan BUF_PUSH (wordend); 4030169695Skan break; 4031169695Skan 4032169695Skan case 'b': 4033169695Skan if (syntax & RE_NO_GNU_OPS) 4034169695Skan goto normal_char; 4035169695Skan BUF_PUSH (wordbound); 4036169695Skan break; 4037169695Skan 4038169695Skan case 'B': 4039169695Skan if (syntax & RE_NO_GNU_OPS) 4040169695Skan goto normal_char; 4041169695Skan BUF_PUSH (notwordbound); 4042169695Skan break; 4043169695Skan 4044169695Skan case '`': 4045169695Skan if (syntax & RE_NO_GNU_OPS) 4046169695Skan goto normal_char; 4047169695Skan BUF_PUSH (begbuf); 4048169695Skan break; 4049169695Skan 4050169695Skan case '\'': 4051169695Skan if (syntax & RE_NO_GNU_OPS) 4052169695Skan goto normal_char; 4053169695Skan BUF_PUSH (endbuf); 4054169695Skan break; 4055169695Skan 4056169695Skan case '1': case '2': case '3': case '4': case '5': 4057169695Skan case '6': case '7': case '8': case '9': 4058169695Skan if (syntax & RE_NO_BK_REFS) 4059169695Skan goto normal_char; 4060169695Skan 4061169695Skan c1 = c - '0'; 4062169695Skan 4063169695Skan if (c1 > regnum) 4064169695Skan FREE_STACK_RETURN (REG_ESUBREG); 4065169695Skan 4066169695Skan /* Can't back reference to a subexpression if inside of it. */ 4067169695Skan if (group_in_compile_stack (compile_stack, (regnum_t) c1)) 4068169695Skan goto normal_char; 4069169695Skan 4070169695Skan laststart = b; 4071169695Skan BUF_PUSH_2 (duplicate, c1); 4072169695Skan break; 4073169695Skan 4074169695Skan 4075169695Skan case '+': 4076169695Skan case '?': 4077169695Skan if (syntax & RE_BK_PLUS_QM) 4078169695Skan goto handle_plus; 4079169695Skan else 4080169695Skan goto normal_backslash; 4081169695Skan 4082169695Skan default: 4083169695Skan normal_backslash: 4084169695Skan /* You might think it would be useful for \ to mean 4085169695Skan not to translate; but if we don't translate it 4086169695Skan it will never match anything. */ 4087169695Skan c = TRANSLATE (c); 4088169695Skan goto normal_char; 4089169695Skan } 4090169695Skan break; 4091169695Skan 4092169695Skan 4093169695Skan default: 4094169695Skan /* Expects the character in `c'. */ 4095169695Skan normal_char: 4096169695Skan /* If no exactn currently being built. */ 4097169695Skan if (!pending_exact 4098169695Skan#ifdef WCHAR 4099169695Skan /* If last exactn handle binary(or character) and 4100169695Skan new exactn handle character(or binary). */ 4101169695Skan || is_exactn_bin != is_binary[p - 1 - pattern] 4102169695Skan#endif /* WCHAR */ 4103169695Skan 4104169695Skan /* If last exactn not at current position. */ 4105169695Skan || pending_exact + *pending_exact + 1 != b 4106169695Skan 4107169695Skan /* We have only one byte following the exactn for the count. */ 4108169695Skan || *pending_exact == (1 << BYTEWIDTH) - 1 4109169695Skan 4110169695Skan /* If followed by a repetition operator. */ 4111169695Skan || *p == '*' || *p == '^' 4112169695Skan || ((syntax & RE_BK_PLUS_QM) 4113169695Skan ? *p == '\\' && (p[1] == '+' || p[1] == '?') 4114169695Skan : (*p == '+' || *p == '?')) 4115169695Skan || ((syntax & RE_INTERVALS) 4116169695Skan && ((syntax & RE_NO_BK_BRACES) 4117169695Skan ? *p == '{' 4118169695Skan : (p[0] == '\\' && p[1] == '{')))) 4119169695Skan { 4120169695Skan /* Start building a new exactn. */ 4121169695Skan 4122169695Skan laststart = b; 4123169695Skan 4124169695Skan#ifdef WCHAR 4125169695Skan /* Is this exactn binary data or character? */ 4126169695Skan is_exactn_bin = is_binary[p - 1 - pattern]; 4127169695Skan if (is_exactn_bin) 4128169695Skan BUF_PUSH_2 (exactn_bin, 0); 4129169695Skan else 4130169695Skan BUF_PUSH_2 (exactn, 0); 4131169695Skan#else 4132169695Skan BUF_PUSH_2 (exactn, 0); 4133169695Skan#endif /* WCHAR */ 4134169695Skan pending_exact = b - 1; 4135169695Skan } 4136169695Skan 4137169695Skan BUF_PUSH (c); 4138169695Skan (*pending_exact)++; 4139169695Skan break; 4140169695Skan } /* switch (c) */ 4141169695Skan } /* while p != pend */ 4142169695Skan 4143169695Skan 4144169695Skan /* Through the pattern now. */ 4145169695Skan 4146169695Skan if (fixup_alt_jump) 4147169695Skan STORE_JUMP (jump_past_alt, fixup_alt_jump, b); 4148169695Skan 4149169695Skan if (!COMPILE_STACK_EMPTY) 4150169695Skan FREE_STACK_RETURN (REG_EPAREN); 4151169695Skan 4152169695Skan /* If we don't want backtracking, force success 4153169695Skan the first time we reach the end of the compiled pattern. */ 4154169695Skan if (syntax & RE_NO_POSIX_BACKTRACKING) 4155169695Skan BUF_PUSH (succeed); 4156169695Skan 4157169695Skan#ifdef WCHAR 4158169695Skan free (pattern); 4159169695Skan free (mbs_offset); 4160169695Skan free (is_binary); 4161169695Skan#endif 4162169695Skan free (compile_stack.stack); 4163169695Skan 4164169695Skan /* We have succeeded; set the length of the buffer. */ 4165169695Skan#ifdef WCHAR 4166169695Skan bufp->used = (uintptr_t) b - (uintptr_t) COMPILED_BUFFER_VAR; 4167169695Skan#else 4168169695Skan bufp->used = b - bufp->buffer; 4169169695Skan#endif 4170169695Skan 4171169695Skan#ifdef DEBUG 4172169695Skan if (debug) 4173169695Skan { 4174169695Skan DEBUG_PRINT1 ("\nCompiled pattern: \n"); 4175169695Skan PREFIX(print_compiled_pattern) (bufp); 4176169695Skan } 4177169695Skan#endif /* DEBUG */ 4178169695Skan 4179169695Skan#ifndef MATCH_MAY_ALLOCATE 4180169695Skan /* Initialize the failure stack to the largest possible stack. This 4181169695Skan isn't necessary unless we're trying to avoid calling alloca in 4182169695Skan the search and match routines. */ 4183169695Skan { 4184169695Skan int num_regs = bufp->re_nsub + 1; 4185169695Skan 4186169695Skan /* Since DOUBLE_FAIL_STACK refuses to double only if the current size 4187169695Skan is strictly greater than re_max_failures, the largest possible stack 4188169695Skan is 2 * re_max_failures failure points. */ 4189169695Skan if (fail_stack.size < (2 * re_max_failures * MAX_FAILURE_ITEMS)) 4190169695Skan { 4191169695Skan fail_stack.size = (2 * re_max_failures * MAX_FAILURE_ITEMS); 4192169695Skan 4193169695Skan# ifdef emacs 4194169695Skan if (! fail_stack.stack) 4195169695Skan fail_stack.stack 4196169695Skan = (PREFIX(fail_stack_elt_t) *) xmalloc (fail_stack.size 4197169695Skan * sizeof (PREFIX(fail_stack_elt_t))); 4198169695Skan else 4199169695Skan fail_stack.stack 4200169695Skan = (PREFIX(fail_stack_elt_t) *) xrealloc (fail_stack.stack, 4201169695Skan (fail_stack.size 4202169695Skan * sizeof (PREFIX(fail_stack_elt_t)))); 4203169695Skan# else /* not emacs */ 4204169695Skan if (! fail_stack.stack) 4205169695Skan fail_stack.stack 4206169695Skan = (PREFIX(fail_stack_elt_t) *) malloc (fail_stack.size 4207169695Skan * sizeof (PREFIX(fail_stack_elt_t))); 4208169695Skan else 4209169695Skan fail_stack.stack 4210169695Skan = (PREFIX(fail_stack_elt_t) *) realloc (fail_stack.stack, 4211169695Skan (fail_stack.size 4212169695Skan * sizeof (PREFIX(fail_stack_elt_t)))); 4213169695Skan# endif /* not emacs */ 4214169695Skan } 4215169695Skan 4216169695Skan PREFIX(regex_grow_registers) (num_regs); 4217169695Skan } 4218169695Skan#endif /* not MATCH_MAY_ALLOCATE */ 4219169695Skan 4220169695Skan return REG_NOERROR; 4221169695Skan} /* regex_compile */ 4222169695Skan 4223169695Skan/* Subroutines for `regex_compile'. */ 4224169695Skan 4225169695Skan/* Store OP at LOC followed by two-byte integer parameter ARG. */ 4226169695Skan/* ifdef WCHAR, integer parameter is 1 wchar_t. */ 4227169695Skan 4228169695Skanstatic void 4229169695SkanPREFIX(store_op1) (re_opcode_t op, UCHAR_T *loc, int arg) 4230169695Skan{ 4231169695Skan *loc = (UCHAR_T) op; 4232169695Skan STORE_NUMBER (loc + 1, arg); 4233169695Skan} 4234169695Skan 4235169695Skan 4236169695Skan/* Like `store_op1', but for two two-byte parameters ARG1 and ARG2. */ 4237169695Skan/* ifdef WCHAR, integer parameter is 1 wchar_t. */ 4238169695Skan 4239169695Skanstatic void 4240169695SkanPREFIX(store_op2) (re_opcode_t op, UCHAR_T *loc, int arg1, int arg2) 4241169695Skan{ 4242169695Skan *loc = (UCHAR_T) op; 4243169695Skan STORE_NUMBER (loc + 1, arg1); 4244169695Skan STORE_NUMBER (loc + 1 + OFFSET_ADDRESS_SIZE, arg2); 4245169695Skan} 4246169695Skan 4247169695Skan 4248169695Skan/* Copy the bytes from LOC to END to open up three bytes of space at LOC 4249169695Skan for OP followed by two-byte integer parameter ARG. */ 4250169695Skan/* ifdef WCHAR, integer parameter is 1 wchar_t. */ 4251169695Skan 4252169695Skanstatic void 4253169695SkanPREFIX(insert_op1) (re_opcode_t op, UCHAR_T *loc, int arg, UCHAR_T *end) 4254169695Skan{ 4255169695Skan register UCHAR_T *pfrom = end; 4256169695Skan register UCHAR_T *pto = end + 1 + OFFSET_ADDRESS_SIZE; 4257169695Skan 4258169695Skan while (pfrom != loc) 4259169695Skan *--pto = *--pfrom; 4260169695Skan 4261169695Skan PREFIX(store_op1) (op, loc, arg); 4262169695Skan} 4263169695Skan 4264169695Skan 4265169695Skan/* Like `insert_op1', but for two two-byte parameters ARG1 and ARG2. */ 4266169695Skan/* ifdef WCHAR, integer parameter is 1 wchar_t. */ 4267169695Skan 4268169695Skanstatic void 4269169695SkanPREFIX(insert_op2) (re_opcode_t op, UCHAR_T *loc, int arg1, 4270169695Skan int arg2, UCHAR_T *end) 4271169695Skan{ 4272169695Skan register UCHAR_T *pfrom = end; 4273169695Skan register UCHAR_T *pto = end + 1 + 2 * OFFSET_ADDRESS_SIZE; 4274169695Skan 4275169695Skan while (pfrom != loc) 4276169695Skan *--pto = *--pfrom; 4277169695Skan 4278169695Skan PREFIX(store_op2) (op, loc, arg1, arg2); 4279169695Skan} 4280169695Skan 4281169695Skan 4282169695Skan/* P points to just after a ^ in PATTERN. Return true if that ^ comes 4283169695Skan after an alternative or a begin-subexpression. We assume there is at 4284169695Skan least one character before the ^. */ 4285169695Skan 4286169695Skanstatic boolean 4287169695SkanPREFIX(at_begline_loc_p) (const CHAR_T *pattern, const CHAR_T *p, 4288169695Skan reg_syntax_t syntax) 4289169695Skan{ 4290169695Skan const CHAR_T *prev = p - 2; 4291169695Skan boolean prev_prev_backslash = prev > pattern && prev[-1] == '\\'; 4292169695Skan 4293169695Skan return 4294169695Skan /* After a subexpression? */ 4295169695Skan (*prev == '(' && (syntax & RE_NO_BK_PARENS || prev_prev_backslash)) 4296169695Skan /* After an alternative? */ 4297169695Skan || (*prev == '|' && (syntax & RE_NO_BK_VBAR || prev_prev_backslash)); 4298169695Skan} 4299169695Skan 4300169695Skan 4301169695Skan/* The dual of at_begline_loc_p. This one is for $. We assume there is 4302169695Skan at least one character after the $, i.e., `P < PEND'. */ 4303169695Skan 4304169695Skanstatic boolean 4305169695SkanPREFIX(at_endline_loc_p) (const CHAR_T *p, const CHAR_T *pend, 4306169695Skan reg_syntax_t syntax) 4307169695Skan{ 4308169695Skan const CHAR_T *next = p; 4309169695Skan boolean next_backslash = *next == '\\'; 4310169695Skan const CHAR_T *next_next = p + 1 < pend ? p + 1 : 0; 4311169695Skan 4312169695Skan return 4313169695Skan /* Before a subexpression? */ 4314169695Skan (syntax & RE_NO_BK_PARENS ? *next == ')' 4315169695Skan : next_backslash && next_next && *next_next == ')') 4316169695Skan /* Before an alternative? */ 4317169695Skan || (syntax & RE_NO_BK_VBAR ? *next == '|' 4318169695Skan : next_backslash && next_next && *next_next == '|'); 4319169695Skan} 4320169695Skan 4321169695Skan#else /* not INSIDE_RECURSION */ 4322169695Skan 4323169695Skan/* Returns true if REGNUM is in one of COMPILE_STACK's elements and 4324169695Skan false if it's not. */ 4325169695Skan 4326169695Skanstatic boolean 4327169695Skangroup_in_compile_stack (compile_stack_type compile_stack, regnum_t regnum) 4328169695Skan{ 4329169695Skan int this_element; 4330169695Skan 4331169695Skan for (this_element = compile_stack.avail - 1; 4332169695Skan this_element >= 0; 4333169695Skan this_element--) 4334169695Skan if (compile_stack.stack[this_element].regnum == regnum) 4335169695Skan return true; 4336169695Skan 4337169695Skan return false; 4338169695Skan} 4339169695Skan#endif /* not INSIDE_RECURSION */ 4340169695Skan 4341169695Skan#ifdef INSIDE_RECURSION 4342169695Skan 4343169695Skan#ifdef WCHAR 4344169695Skan/* This insert space, which size is "num", into the pattern at "loc". 4345169695Skan "end" must point the end of the allocated buffer. */ 4346169695Skanstatic void 4347169695Skaninsert_space (int num, CHAR_T *loc, CHAR_T *end) 4348169695Skan{ 4349169695Skan register CHAR_T *pto = end; 4350169695Skan register CHAR_T *pfrom = end - num; 4351169695Skan 4352169695Skan while (pfrom >= loc) 4353169695Skan *pto-- = *pfrom--; 4354169695Skan} 4355169695Skan#endif /* WCHAR */ 4356169695Skan 4357169695Skan#ifdef WCHAR 4358169695Skanstatic reg_errcode_t 4359169695Skanwcs_compile_range (CHAR_T range_start_char, const CHAR_T **p_ptr, 4360169695Skan const CHAR_T *pend, RE_TRANSLATE_TYPE translate, 4361169695Skan reg_syntax_t syntax, CHAR_T *b, CHAR_T *char_set) 4362169695Skan{ 4363169695Skan const CHAR_T *p = *p_ptr; 4364169695Skan CHAR_T range_start, range_end; 4365169695Skan reg_errcode_t ret; 4366169695Skan# ifdef _LIBC 4367169695Skan uint32_t nrules; 4368169695Skan uint32_t start_val, end_val; 4369169695Skan# endif 4370169695Skan if (p == pend) 4371169695Skan return REG_ERANGE; 4372169695Skan 4373169695Skan# ifdef _LIBC 4374169695Skan nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES); 4375169695Skan if (nrules != 0) 4376169695Skan { 4377169695Skan const char *collseq = (const char *) _NL_CURRENT(LC_COLLATE, 4378169695Skan _NL_COLLATE_COLLSEQWC); 4379169695Skan const unsigned char *extra = (const unsigned char *) 4380169695Skan _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB); 4381169695Skan 4382169695Skan if (range_start_char < -1) 4383169695Skan { 4384169695Skan /* range_start is a collating symbol. */ 4385169695Skan int32_t *wextra; 4386169695Skan /* Retreive the index and get collation sequence value. */ 4387169695Skan wextra = (int32_t*)(extra + char_set[-range_start_char]); 4388169695Skan start_val = wextra[1 + *wextra]; 4389169695Skan } 4390169695Skan else 4391169695Skan start_val = collseq_table_lookup(collseq, TRANSLATE(range_start_char)); 4392169695Skan 4393169695Skan end_val = collseq_table_lookup (collseq, TRANSLATE (p[0])); 4394169695Skan 4395169695Skan /* Report an error if the range is empty and the syntax prohibits 4396169695Skan this. */ 4397169695Skan ret = ((syntax & RE_NO_EMPTY_RANGES) 4398169695Skan && (start_val > end_val))? REG_ERANGE : REG_NOERROR; 4399169695Skan 4400169695Skan /* Insert space to the end of the char_ranges. */ 4401169695Skan insert_space(2, b - char_set[5] - 2, b - 1); 4402169695Skan *(b - char_set[5] - 2) = (wchar_t)start_val; 4403169695Skan *(b - char_set[5] - 1) = (wchar_t)end_val; 4404169695Skan char_set[4]++; /* ranges_index */ 4405169695Skan } 4406169695Skan else 4407169695Skan# endif 4408169695Skan { 4409169695Skan range_start = (range_start_char >= 0)? TRANSLATE (range_start_char): 4410169695Skan range_start_char; 4411169695Skan range_end = TRANSLATE (p[0]); 4412169695Skan /* Report an error if the range is empty and the syntax prohibits 4413169695Skan this. */ 4414169695Skan ret = ((syntax & RE_NO_EMPTY_RANGES) 4415169695Skan && (range_start > range_end))? REG_ERANGE : REG_NOERROR; 4416169695Skan 4417169695Skan /* Insert space to the end of the char_ranges. */ 4418169695Skan insert_space(2, b - char_set[5] - 2, b - 1); 4419169695Skan *(b - char_set[5] - 2) = range_start; 4420169695Skan *(b - char_set[5] - 1) = range_end; 4421169695Skan char_set[4]++; /* ranges_index */ 4422169695Skan } 4423169695Skan /* Have to increment the pointer into the pattern string, so the 4424169695Skan caller isn't still at the ending character. */ 4425169695Skan (*p_ptr)++; 4426169695Skan 4427169695Skan return ret; 4428169695Skan} 4429169695Skan#else /* BYTE */ 4430169695Skan/* Read the ending character of a range (in a bracket expression) from the 4431169695Skan uncompiled pattern *P_PTR (which ends at PEND). We assume the 4432169695Skan starting character is in `P[-2]'. (`P[-1]' is the character `-'.) 4433169695Skan Then we set the translation of all bits between the starting and 4434169695Skan ending characters (inclusive) in the compiled pattern B. 4435169695Skan 4436169695Skan Return an error code. 4437169695Skan 4438169695Skan We use these short variable names so we can use the same macros as 4439169695Skan `regex_compile' itself. */ 4440169695Skan 4441169695Skanstatic reg_errcode_t 4442169695Skanbyte_compile_range (unsigned int range_start_char, const char **p_ptr, 4443169695Skan const char *pend, RE_TRANSLATE_TYPE translate, 4444169695Skan reg_syntax_t syntax, unsigned char *b) 4445169695Skan{ 4446169695Skan unsigned this_char; 4447169695Skan const char *p = *p_ptr; 4448169695Skan reg_errcode_t ret; 4449169695Skan# if _LIBC 4450169695Skan const unsigned char *collseq; 4451169695Skan unsigned int start_colseq; 4452169695Skan unsigned int end_colseq; 4453169695Skan# else 4454169695Skan unsigned end_char; 4455169695Skan# endif 4456169695Skan 4457169695Skan if (p == pend) 4458169695Skan return REG_ERANGE; 4459169695Skan 4460169695Skan /* Have to increment the pointer into the pattern string, so the 4461169695Skan caller isn't still at the ending character. */ 4462169695Skan (*p_ptr)++; 4463169695Skan 4464169695Skan /* Report an error if the range is empty and the syntax prohibits this. */ 4465169695Skan ret = syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR; 4466169695Skan 4467169695Skan# if _LIBC 4468169695Skan collseq = (const unsigned char *) _NL_CURRENT (LC_COLLATE, 4469169695Skan _NL_COLLATE_COLLSEQMB); 4470169695Skan 4471169695Skan start_colseq = collseq[(unsigned char) TRANSLATE (range_start_char)]; 4472169695Skan end_colseq = collseq[(unsigned char) TRANSLATE (p[0])]; 4473169695Skan for (this_char = 0; this_char <= (unsigned char) -1; ++this_char) 4474169695Skan { 4475169695Skan unsigned int this_colseq = collseq[(unsigned char) TRANSLATE (this_char)]; 4476169695Skan 4477169695Skan if (start_colseq <= this_colseq && this_colseq <= end_colseq) 4478169695Skan { 4479169695Skan SET_LIST_BIT (TRANSLATE (this_char)); 4480169695Skan ret = REG_NOERROR; 4481169695Skan } 4482169695Skan } 4483169695Skan# else 4484169695Skan /* Here we see why `this_char' has to be larger than an `unsigned 4485169695Skan char' -- we would otherwise go into an infinite loop, since all 4486169695Skan characters <= 0xff. */ 4487169695Skan range_start_char = TRANSLATE (range_start_char); 4488169695Skan /* TRANSLATE(p[0]) is casted to char (not unsigned char) in TRANSLATE, 4489169695Skan and some compilers cast it to int implicitly, so following for_loop 4490169695Skan may fall to (almost) infinite loop. 4491169695Skan e.g. If translate[p[0]] = 0xff, end_char may equals to 0xffffffff. 4492169695Skan To avoid this, we cast p[0] to unsigned int and truncate it. */ 4493169695Skan end_char = ((unsigned)TRANSLATE(p[0]) & ((1 << BYTEWIDTH) - 1)); 4494169695Skan 4495169695Skan for (this_char = range_start_char; this_char <= end_char; ++this_char) 4496169695Skan { 4497169695Skan SET_LIST_BIT (TRANSLATE (this_char)); 4498169695Skan ret = REG_NOERROR; 4499169695Skan } 4500169695Skan# endif 4501169695Skan 4502169695Skan return ret; 4503169695Skan} 4504169695Skan#endif /* WCHAR */ 4505169695Skan 4506169695Skan/* re_compile_fastmap computes a ``fastmap'' for the compiled pattern in 4507169695Skan BUFP. A fastmap records which of the (1 << BYTEWIDTH) possible 4508169695Skan characters can start a string that matches the pattern. This fastmap 4509169695Skan is used by re_search to skip quickly over impossible starting points. 4510169695Skan 4511169695Skan The caller must supply the address of a (1 << BYTEWIDTH)-byte data 4512169695Skan area as BUFP->fastmap. 4513169695Skan 4514169695Skan We set the `fastmap', `fastmap_accurate', and `can_be_null' fields in 4515169695Skan the pattern buffer. 4516169695Skan 4517169695Skan Returns 0 if we succeed, -2 if an internal error. */ 4518169695Skan 4519169695Skan#ifdef WCHAR 4520169695Skan/* local function for re_compile_fastmap. 4521169695Skan truncate wchar_t character to char. */ 4522169695Skanstatic unsigned char truncate_wchar (CHAR_T c); 4523169695Skan 4524169695Skanstatic unsigned char 4525169695Skantruncate_wchar (CHAR_T c) 4526169695Skan{ 4527169695Skan unsigned char buf[MB_CUR_MAX]; 4528169695Skan mbstate_t state; 4529169695Skan int retval; 4530169695Skan memset (&state, '\0', sizeof (state)); 4531169695Skan# ifdef _LIBC 4532169695Skan retval = __wcrtomb (buf, c, &state); 4533169695Skan# else 4534169695Skan retval = wcrtomb (buf, c, &state); 4535169695Skan# endif 4536169695Skan return retval > 0 ? buf[0] : (unsigned char) c; 4537169695Skan} 4538169695Skan#endif /* WCHAR */ 4539169695Skan 4540169695Skanstatic int 4541169695SkanPREFIX(re_compile_fastmap) (struct re_pattern_buffer *bufp) 4542169695Skan{ 4543169695Skan int j, k; 4544169695Skan#ifdef MATCH_MAY_ALLOCATE 4545169695Skan PREFIX(fail_stack_type) fail_stack; 4546169695Skan#endif 4547169695Skan#ifndef REGEX_MALLOC 4548169695Skan char *destination; 4549169695Skan#endif 4550169695Skan 4551169695Skan register char *fastmap = bufp->fastmap; 4552169695Skan 4553169695Skan#ifdef WCHAR 4554169695Skan /* We need to cast pattern to (wchar_t*), because we casted this compiled 4555169695Skan pattern to (char*) in regex_compile. */ 4556169695Skan UCHAR_T *pattern = (UCHAR_T*)bufp->buffer; 4557169695Skan register UCHAR_T *pend = (UCHAR_T*) (bufp->buffer + bufp->used); 4558169695Skan#else /* BYTE */ 4559169695Skan UCHAR_T *pattern = bufp->buffer; 4560169695Skan register UCHAR_T *pend = pattern + bufp->used; 4561169695Skan#endif /* WCHAR */ 4562169695Skan UCHAR_T *p = pattern; 4563169695Skan 4564169695Skan#ifdef REL_ALLOC 4565169695Skan /* This holds the pointer to the failure stack, when 4566169695Skan it is allocated relocatably. */ 4567169695Skan fail_stack_elt_t *failure_stack_ptr; 4568169695Skan#endif 4569169695Skan 4570169695Skan /* Assume that each path through the pattern can be null until 4571169695Skan proven otherwise. We set this false at the bottom of switch 4572169695Skan statement, to which we get only if a particular path doesn't 4573169695Skan match the empty string. */ 4574169695Skan boolean path_can_be_null = true; 4575169695Skan 4576169695Skan /* We aren't doing a `succeed_n' to begin with. */ 4577169695Skan boolean succeed_n_p = false; 4578169695Skan 4579169695Skan assert (fastmap != NULL && p != NULL); 4580169695Skan 4581169695Skan INIT_FAIL_STACK (); 4582169695Skan bzero (fastmap, 1 << BYTEWIDTH); /* Assume nothing's valid. */ 4583169695Skan bufp->fastmap_accurate = 1; /* It will be when we're done. */ 4584169695Skan bufp->can_be_null = 0; 4585169695Skan 4586169695Skan while (1) 4587169695Skan { 4588169695Skan if (p == pend || *p == (UCHAR_T) succeed) 4589169695Skan { 4590169695Skan /* We have reached the (effective) end of pattern. */ 4591169695Skan if (!FAIL_STACK_EMPTY ()) 4592169695Skan { 4593169695Skan bufp->can_be_null |= path_can_be_null; 4594169695Skan 4595169695Skan /* Reset for next path. */ 4596169695Skan path_can_be_null = true; 4597169695Skan 4598169695Skan p = fail_stack.stack[--fail_stack.avail].pointer; 4599169695Skan 4600169695Skan continue; 4601169695Skan } 4602169695Skan else 4603169695Skan break; 4604169695Skan } 4605169695Skan 4606169695Skan /* We should never be about to go beyond the end of the pattern. */ 4607169695Skan assert (p < pend); 4608169695Skan 4609169695Skan switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++)) 4610169695Skan { 4611169695Skan 4612169695Skan /* I guess the idea here is to simply not bother with a fastmap 4613169695Skan if a backreference is used, since it's too hard to figure out 4614169695Skan the fastmap for the corresponding group. Setting 4615169695Skan `can_be_null' stops `re_search_2' from using the fastmap, so 4616169695Skan that is all we do. */ 4617169695Skan case duplicate: 4618169695Skan bufp->can_be_null = 1; 4619169695Skan goto done; 4620169695Skan 4621169695Skan 4622169695Skan /* Following are the cases which match a character. These end 4623169695Skan with `break'. */ 4624169695Skan 4625169695Skan#ifdef WCHAR 4626169695Skan case exactn: 4627169695Skan fastmap[truncate_wchar(p[1])] = 1; 4628169695Skan break; 4629169695Skan#else /* BYTE */ 4630169695Skan case exactn: 4631169695Skan fastmap[p[1]] = 1; 4632169695Skan break; 4633169695Skan#endif /* WCHAR */ 4634169695Skan#ifdef MBS_SUPPORT 4635169695Skan case exactn_bin: 4636169695Skan fastmap[p[1]] = 1; 4637169695Skan break; 4638169695Skan#endif 4639169695Skan 4640169695Skan#ifdef WCHAR 4641169695Skan /* It is hard to distinguish fastmap from (multi byte) characters 4642169695Skan which depends on current locale. */ 4643169695Skan case charset: 4644169695Skan case charset_not: 4645169695Skan case wordchar: 4646169695Skan case notwordchar: 4647169695Skan bufp->can_be_null = 1; 4648169695Skan goto done; 4649169695Skan#else /* BYTE */ 4650169695Skan case charset: 4651169695Skan for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--) 4652169695Skan if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))) 4653169695Skan fastmap[j] = 1; 4654169695Skan break; 4655169695Skan 4656169695Skan 4657169695Skan case charset_not: 4658169695Skan /* Chars beyond end of map must be allowed. */ 4659169695Skan for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++) 4660169695Skan fastmap[j] = 1; 4661169695Skan 4662169695Skan for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--) 4663169695Skan if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))) 4664169695Skan fastmap[j] = 1; 4665169695Skan break; 4666169695Skan 4667169695Skan 4668169695Skan case wordchar: 4669169695Skan for (j = 0; j < (1 << BYTEWIDTH); j++) 4670169695Skan if (SYNTAX (j) == Sword) 4671169695Skan fastmap[j] = 1; 4672169695Skan break; 4673169695Skan 4674169695Skan 4675169695Skan case notwordchar: 4676169695Skan for (j = 0; j < (1 << BYTEWIDTH); j++) 4677169695Skan if (SYNTAX (j) != Sword) 4678169695Skan fastmap[j] = 1; 4679169695Skan break; 4680169695Skan#endif /* WCHAR */ 4681169695Skan 4682169695Skan case anychar: 4683169695Skan { 4684169695Skan int fastmap_newline = fastmap['\n']; 4685169695Skan 4686169695Skan /* `.' matches anything ... */ 4687169695Skan for (j = 0; j < (1 << BYTEWIDTH); j++) 4688169695Skan fastmap[j] = 1; 4689169695Skan 4690169695Skan /* ... except perhaps newline. */ 4691169695Skan if (!(bufp->syntax & RE_DOT_NEWLINE)) 4692169695Skan fastmap['\n'] = fastmap_newline; 4693169695Skan 4694169695Skan /* Return if we have already set `can_be_null'; if we have, 4695169695Skan then the fastmap is irrelevant. Something's wrong here. */ 4696169695Skan else if (bufp->can_be_null) 4697169695Skan goto done; 4698169695Skan 4699169695Skan /* Otherwise, have to check alternative paths. */ 4700169695Skan break; 4701169695Skan } 4702169695Skan 4703169695Skan#ifdef emacs 4704169695Skan case syntaxspec: 4705169695Skan k = *p++; 4706169695Skan for (j = 0; j < (1 << BYTEWIDTH); j++) 4707169695Skan if (SYNTAX (j) == (enum syntaxcode) k) 4708169695Skan fastmap[j] = 1; 4709169695Skan break; 4710169695Skan 4711169695Skan 4712169695Skan case notsyntaxspec: 4713169695Skan k = *p++; 4714169695Skan for (j = 0; j < (1 << BYTEWIDTH); j++) 4715169695Skan if (SYNTAX (j) != (enum syntaxcode) k) 4716169695Skan fastmap[j] = 1; 4717169695Skan break; 4718169695Skan 4719169695Skan 4720169695Skan /* All cases after this match the empty string. These end with 4721169695Skan `continue'. */ 4722169695Skan 4723169695Skan 4724169695Skan case before_dot: 4725169695Skan case at_dot: 4726169695Skan case after_dot: 4727169695Skan continue; 4728169695Skan#endif /* emacs */ 4729169695Skan 4730169695Skan 4731169695Skan case no_op: 4732169695Skan case begline: 4733169695Skan case endline: 4734169695Skan case begbuf: 4735169695Skan case endbuf: 4736169695Skan case wordbound: 4737169695Skan case notwordbound: 4738169695Skan case wordbeg: 4739169695Skan case wordend: 4740169695Skan case push_dummy_failure: 4741169695Skan continue; 4742169695Skan 4743169695Skan 4744169695Skan case jump_n: 4745169695Skan case pop_failure_jump: 4746169695Skan case maybe_pop_jump: 4747169695Skan case jump: 4748169695Skan case jump_past_alt: 4749169695Skan case dummy_failure_jump: 4750169695Skan EXTRACT_NUMBER_AND_INCR (j, p); 4751169695Skan p += j; 4752169695Skan if (j > 0) 4753169695Skan continue; 4754169695Skan 4755169695Skan /* Jump backward implies we just went through the body of a 4756169695Skan loop and matched nothing. Opcode jumped to should be 4757169695Skan `on_failure_jump' or `succeed_n'. Just treat it like an 4758169695Skan ordinary jump. For a * loop, it has pushed its failure 4759169695Skan point already; if so, discard that as redundant. */ 4760169695Skan if ((re_opcode_t) *p != on_failure_jump 4761169695Skan && (re_opcode_t) *p != succeed_n) 4762169695Skan continue; 4763169695Skan 4764169695Skan p++; 4765169695Skan EXTRACT_NUMBER_AND_INCR (j, p); 4766169695Skan p += j; 4767169695Skan 4768169695Skan /* If what's on the stack is where we are now, pop it. */ 4769169695Skan if (!FAIL_STACK_EMPTY () 4770169695Skan && fail_stack.stack[fail_stack.avail - 1].pointer == p) 4771169695Skan fail_stack.avail--; 4772169695Skan 4773169695Skan continue; 4774169695Skan 4775169695Skan 4776169695Skan case on_failure_jump: 4777169695Skan case on_failure_keep_string_jump: 4778169695Skan handle_on_failure_jump: 4779169695Skan EXTRACT_NUMBER_AND_INCR (j, p); 4780169695Skan 4781169695Skan /* For some patterns, e.g., `(a?)?', `p+j' here points to the 4782169695Skan end of the pattern. We don't want to push such a point, 4783169695Skan since when we restore it above, entering the switch will 4784169695Skan increment `p' past the end of the pattern. We don't need 4785169695Skan to push such a point since we obviously won't find any more 4786169695Skan fastmap entries beyond `pend'. Such a pattern can match 4787169695Skan the null string, though. */ 4788169695Skan if (p + j < pend) 4789169695Skan { 4790169695Skan if (!PUSH_PATTERN_OP (p + j, fail_stack)) 4791169695Skan { 4792169695Skan RESET_FAIL_STACK (); 4793169695Skan return -2; 4794169695Skan } 4795169695Skan } 4796169695Skan else 4797169695Skan bufp->can_be_null = 1; 4798169695Skan 4799169695Skan if (succeed_n_p) 4800169695Skan { 4801169695Skan EXTRACT_NUMBER_AND_INCR (k, p); /* Skip the n. */ 4802169695Skan succeed_n_p = false; 4803169695Skan } 4804169695Skan 4805169695Skan continue; 4806169695Skan 4807169695Skan 4808169695Skan case succeed_n: 4809169695Skan /* Get to the number of times to succeed. */ 4810169695Skan p += OFFSET_ADDRESS_SIZE; 4811169695Skan 4812169695Skan /* Increment p past the n for when k != 0. */ 4813169695Skan EXTRACT_NUMBER_AND_INCR (k, p); 4814169695Skan if (k == 0) 4815169695Skan { 4816169695Skan p -= 2 * OFFSET_ADDRESS_SIZE; 4817169695Skan succeed_n_p = true; /* Spaghetti code alert. */ 4818169695Skan goto handle_on_failure_jump; 4819169695Skan } 4820169695Skan continue; 4821169695Skan 4822169695Skan 4823169695Skan case set_number_at: 4824169695Skan p += 2 * OFFSET_ADDRESS_SIZE; 4825169695Skan continue; 4826169695Skan 4827169695Skan 4828169695Skan case start_memory: 4829169695Skan case stop_memory: 4830169695Skan p += 2; 4831169695Skan continue; 4832169695Skan 4833169695Skan 4834169695Skan default: 4835169695Skan abort (); /* We have listed all the cases. */ 4836169695Skan } /* switch *p++ */ 4837169695Skan 4838169695Skan /* Getting here means we have found the possible starting 4839169695Skan characters for one path of the pattern -- and that the empty 4840169695Skan string does not match. We need not follow this path further. 4841169695Skan Instead, look at the next alternative (remembered on the 4842169695Skan stack), or quit if no more. The test at the top of the loop 4843169695Skan does these things. */ 4844169695Skan path_can_be_null = false; 4845169695Skan p = pend; 4846169695Skan } /* while p */ 4847169695Skan 4848169695Skan /* Set `can_be_null' for the last path (also the first path, if the 4849169695Skan pattern is empty). */ 4850169695Skan bufp->can_be_null |= path_can_be_null; 4851169695Skan 4852169695Skan done: 4853169695Skan RESET_FAIL_STACK (); 4854169695Skan return 0; 4855169695Skan} 4856169695Skan 4857169695Skan#else /* not INSIDE_RECURSION */ 4858169695Skan 4859169695Skanint 4860169695Skanre_compile_fastmap (struct re_pattern_buffer *bufp) 4861169695Skan{ 4862169695Skan# ifdef MBS_SUPPORT 4863169695Skan if (MB_CUR_MAX != 1) 4864169695Skan return wcs_re_compile_fastmap(bufp); 4865169695Skan else 4866169695Skan# endif 4867169695Skan return byte_re_compile_fastmap(bufp); 4868169695Skan} /* re_compile_fastmap */ 4869169695Skan#ifdef _LIBC 4870169695Skanweak_alias (__re_compile_fastmap, re_compile_fastmap) 4871169695Skan#endif 4872169695Skan 4873169695Skan 4874169695Skan/* Set REGS to hold NUM_REGS registers, storing them in STARTS and 4875169695Skan ENDS. Subsequent matches using PATTERN_BUFFER and REGS will use 4876169695Skan this memory for recording register information. STARTS and ENDS 4877169695Skan must be allocated using the malloc library routine, and must each 4878169695Skan be at least NUM_REGS * sizeof (regoff_t) bytes long. 4879169695Skan 4880169695Skan If NUM_REGS == 0, then subsequent matches should allocate their own 4881169695Skan register data. 4882169695Skan 4883169695Skan Unless this function is called, the first search or match using 4884169695Skan PATTERN_BUFFER will allocate its own register data, without 4885169695Skan freeing the old data. */ 4886169695Skan 4887169695Skanvoid 4888169695Skanre_set_registers (struct re_pattern_buffer *bufp, 4889169695Skan struct re_registers *regs, unsigned num_regs, 4890169695Skan regoff_t *starts, regoff_t *ends) 4891169695Skan{ 4892169695Skan if (num_regs) 4893169695Skan { 4894169695Skan bufp->regs_allocated = REGS_REALLOCATE; 4895169695Skan regs->num_regs = num_regs; 4896169695Skan regs->start = starts; 4897169695Skan regs->end = ends; 4898169695Skan } 4899169695Skan else 4900169695Skan { 4901169695Skan bufp->regs_allocated = REGS_UNALLOCATED; 4902169695Skan regs->num_regs = 0; 4903169695Skan regs->start = regs->end = (regoff_t *) 0; 4904169695Skan } 4905169695Skan} 4906169695Skan#ifdef _LIBC 4907169695Skanweak_alias (__re_set_registers, re_set_registers) 4908169695Skan#endif 4909169695Skan 4910169695Skan/* Searching routines. */ 4911169695Skan 4912169695Skan/* Like re_search_2, below, but only one string is specified, and 4913169695Skan doesn't let you say where to stop matching. */ 4914169695Skan 4915169695Skanint 4916169695Skanre_search (struct re_pattern_buffer *bufp, const char *string, int size, 4917169695Skan int startpos, int range, struct re_registers *regs) 4918169695Skan{ 4919169695Skan return re_search_2 (bufp, NULL, 0, string, size, startpos, range, 4920169695Skan regs, size); 4921169695Skan} 4922169695Skan#ifdef _LIBC 4923169695Skanweak_alias (__re_search, re_search) 4924169695Skan#endif 4925169695Skan 4926169695Skan 4927169695Skan/* Using the compiled pattern in BUFP->buffer, first tries to match the 4928169695Skan virtual concatenation of STRING1 and STRING2, starting first at index 4929169695Skan STARTPOS, then at STARTPOS + 1, and so on. 4930169695Skan 4931169695Skan STRING1 and STRING2 have length SIZE1 and SIZE2, respectively. 4932169695Skan 4933169695Skan RANGE is how far to scan while trying to match. RANGE = 0 means try 4934169695Skan only at STARTPOS; in general, the last start tried is STARTPOS + 4935169695Skan RANGE. 4936169695Skan 4937169695Skan In REGS, return the indices of the virtual concatenation of STRING1 4938169695Skan and STRING2 that matched the entire BUFP->buffer and its contained 4939169695Skan subexpressions. 4940169695Skan 4941169695Skan Do not consider matching one past the index STOP in the virtual 4942169695Skan concatenation of STRING1 and STRING2. 4943169695Skan 4944169695Skan We return either the position in the strings at which the match was 4945169695Skan found, -1 if no match, or -2 if error (such as failure 4946169695Skan stack overflow). */ 4947169695Skan 4948169695Skanint 4949169695Skanre_search_2 (struct re_pattern_buffer *bufp, const char *string1, int size1, 4950169695Skan const char *string2, int size2, int startpos, int range, 4951169695Skan struct re_registers *regs, int stop) 4952169695Skan{ 4953169695Skan# ifdef MBS_SUPPORT 4954169695Skan if (MB_CUR_MAX != 1) 4955169695Skan return wcs_re_search_2 (bufp, string1, size1, string2, size2, startpos, 4956169695Skan range, regs, stop); 4957169695Skan else 4958169695Skan# endif 4959169695Skan return byte_re_search_2 (bufp, string1, size1, string2, size2, startpos, 4960169695Skan range, regs, stop); 4961169695Skan} /* re_search_2 */ 4962169695Skan#ifdef _LIBC 4963169695Skanweak_alias (__re_search_2, re_search_2) 4964169695Skan#endif 4965169695Skan 4966169695Skan#endif /* not INSIDE_RECURSION */ 4967169695Skan 4968169695Skan#ifdef INSIDE_RECURSION 4969169695Skan 4970169695Skan#ifdef MATCH_MAY_ALLOCATE 4971169695Skan# define FREE_VAR(var) if (var) REGEX_FREE (var); var = NULL 4972169695Skan#else 4973169695Skan# define FREE_VAR(var) if (var) free (var); var = NULL 4974169695Skan#endif 4975169695Skan 4976169695Skan#ifdef WCHAR 4977169695Skan# define MAX_ALLOCA_SIZE 2000 4978169695Skan 4979169695Skan# define FREE_WCS_BUFFERS() \ 4980169695Skan do { \ 4981169695Skan if (size1 > MAX_ALLOCA_SIZE) \ 4982169695Skan { \ 4983169695Skan free (wcs_string1); \ 4984169695Skan free (mbs_offset1); \ 4985169695Skan } \ 4986169695Skan else \ 4987169695Skan { \ 4988169695Skan FREE_VAR (wcs_string1); \ 4989169695Skan FREE_VAR (mbs_offset1); \ 4990169695Skan } \ 4991169695Skan if (size2 > MAX_ALLOCA_SIZE) \ 4992169695Skan { \ 4993169695Skan free (wcs_string2); \ 4994169695Skan free (mbs_offset2); \ 4995169695Skan } \ 4996169695Skan else \ 4997169695Skan { \ 4998169695Skan FREE_VAR (wcs_string2); \ 4999169695Skan FREE_VAR (mbs_offset2); \ 5000169695Skan } \ 5001169695Skan } while (0) 5002169695Skan 5003169695Skan#endif 5004169695Skan 5005169695Skan 5006169695Skanstatic int 5007169695SkanPREFIX(re_search_2) (struct re_pattern_buffer *bufp, const char *string1, 5008169695Skan int size1, const char *string2, int size2, 5009169695Skan int startpos, int range, 5010169695Skan struct re_registers *regs, int stop) 5011169695Skan{ 5012169695Skan int val; 5013169695Skan register char *fastmap = bufp->fastmap; 5014169695Skan register RE_TRANSLATE_TYPE translate = bufp->translate; 5015169695Skan int total_size = size1 + size2; 5016169695Skan int endpos = startpos + range; 5017169695Skan#ifdef WCHAR 5018169695Skan /* We need wchar_t* buffers correspond to cstring1, cstring2. */ 5019169695Skan wchar_t *wcs_string1 = NULL, *wcs_string2 = NULL; 5020169695Skan /* We need the size of wchar_t buffers correspond to csize1, csize2. */ 5021169695Skan int wcs_size1 = 0, wcs_size2 = 0; 5022169695Skan /* offset buffer for optimizatoin. See convert_mbs_to_wc. */ 5023169695Skan int *mbs_offset1 = NULL, *mbs_offset2 = NULL; 5024169695Skan /* They hold whether each wchar_t is binary data or not. */ 5025169695Skan char *is_binary = NULL; 5026169695Skan#endif /* WCHAR */ 5027169695Skan 5028169695Skan /* Check for out-of-range STARTPOS. */ 5029169695Skan if (startpos < 0 || startpos > total_size) 5030169695Skan return -1; 5031169695Skan 5032169695Skan /* Fix up RANGE if it might eventually take us outside 5033169695Skan the virtual concatenation of STRING1 and STRING2. 5034169695Skan Make sure we won't move STARTPOS below 0 or above TOTAL_SIZE. */ 5035169695Skan if (endpos < 0) 5036169695Skan range = 0 - startpos; 5037169695Skan else if (endpos > total_size) 5038169695Skan range = total_size - startpos; 5039169695Skan 5040169695Skan /* If the search isn't to be a backwards one, don't waste time in a 5041169695Skan search for a pattern that must be anchored. */ 5042169695Skan if (bufp->used > 0 && range > 0 5043169695Skan && ((re_opcode_t) bufp->buffer[0] == begbuf 5044169695Skan /* `begline' is like `begbuf' if it cannot match at newlines. */ 5045169695Skan || ((re_opcode_t) bufp->buffer[0] == begline 5046169695Skan && !bufp->newline_anchor))) 5047169695Skan { 5048169695Skan if (startpos > 0) 5049169695Skan return -1; 5050169695Skan else 5051169695Skan range = 1; 5052169695Skan } 5053169695Skan 5054169695Skan#ifdef emacs 5055169695Skan /* In a forward search for something that starts with \=. 5056169695Skan don't keep searching past point. */ 5057169695Skan if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == at_dot && range > 0) 5058169695Skan { 5059169695Skan range = PT - startpos; 5060169695Skan if (range <= 0) 5061169695Skan return -1; 5062169695Skan } 5063169695Skan#endif /* emacs */ 5064169695Skan 5065169695Skan /* Update the fastmap now if not correct already. */ 5066169695Skan if (fastmap && !bufp->fastmap_accurate) 5067169695Skan if (re_compile_fastmap (bufp) == -2) 5068169695Skan return -2; 5069169695Skan 5070169695Skan#ifdef WCHAR 5071169695Skan /* Allocate wchar_t array for wcs_string1 and wcs_string2 and 5072169695Skan fill them with converted string. */ 5073169695Skan if (size1 != 0) 5074169695Skan { 5075169695Skan if (size1 > MAX_ALLOCA_SIZE) 5076169695Skan { 5077169695Skan wcs_string1 = TALLOC (size1 + 1, CHAR_T); 5078169695Skan mbs_offset1 = TALLOC (size1 + 1, int); 5079169695Skan is_binary = TALLOC (size1 + 1, char); 5080169695Skan } 5081169695Skan else 5082169695Skan { 5083169695Skan wcs_string1 = REGEX_TALLOC (size1 + 1, CHAR_T); 5084169695Skan mbs_offset1 = REGEX_TALLOC (size1 + 1, int); 5085169695Skan is_binary = REGEX_TALLOC (size1 + 1, char); 5086169695Skan } 5087169695Skan if (!wcs_string1 || !mbs_offset1 || !is_binary) 5088169695Skan { 5089169695Skan if (size1 > MAX_ALLOCA_SIZE) 5090169695Skan { 5091169695Skan free (wcs_string1); 5092169695Skan free (mbs_offset1); 5093169695Skan free (is_binary); 5094169695Skan } 5095169695Skan else 5096169695Skan { 5097169695Skan FREE_VAR (wcs_string1); 5098169695Skan FREE_VAR (mbs_offset1); 5099169695Skan FREE_VAR (is_binary); 5100169695Skan } 5101169695Skan return -2; 5102169695Skan } 5103169695Skan wcs_size1 = convert_mbs_to_wcs(wcs_string1, string1, size1, 5104169695Skan mbs_offset1, is_binary); 5105169695Skan wcs_string1[wcs_size1] = L'\0'; /* for a sentinel */ 5106169695Skan if (size1 > MAX_ALLOCA_SIZE) 5107169695Skan free (is_binary); 5108169695Skan else 5109169695Skan FREE_VAR (is_binary); 5110169695Skan } 5111169695Skan if (size2 != 0) 5112169695Skan { 5113169695Skan if (size2 > MAX_ALLOCA_SIZE) 5114169695Skan { 5115169695Skan wcs_string2 = TALLOC (size2 + 1, CHAR_T); 5116169695Skan mbs_offset2 = TALLOC (size2 + 1, int); 5117169695Skan is_binary = TALLOC (size2 + 1, char); 5118169695Skan } 5119169695Skan else 5120169695Skan { 5121169695Skan wcs_string2 = REGEX_TALLOC (size2 + 1, CHAR_T); 5122169695Skan mbs_offset2 = REGEX_TALLOC (size2 + 1, int); 5123169695Skan is_binary = REGEX_TALLOC (size2 + 1, char); 5124169695Skan } 5125169695Skan if (!wcs_string2 || !mbs_offset2 || !is_binary) 5126169695Skan { 5127169695Skan FREE_WCS_BUFFERS (); 5128169695Skan if (size2 > MAX_ALLOCA_SIZE) 5129169695Skan free (is_binary); 5130169695Skan else 5131169695Skan FREE_VAR (is_binary); 5132169695Skan return -2; 5133169695Skan } 5134169695Skan wcs_size2 = convert_mbs_to_wcs(wcs_string2, string2, size2, 5135169695Skan mbs_offset2, is_binary); 5136169695Skan wcs_string2[wcs_size2] = L'\0'; /* for a sentinel */ 5137169695Skan if (size2 > MAX_ALLOCA_SIZE) 5138169695Skan free (is_binary); 5139169695Skan else 5140169695Skan FREE_VAR (is_binary); 5141169695Skan } 5142169695Skan#endif /* WCHAR */ 5143169695Skan 5144169695Skan 5145169695Skan /* Loop through the string, looking for a place to start matching. */ 5146169695Skan for (;;) 5147169695Skan { 5148169695Skan /* If a fastmap is supplied, skip quickly over characters that 5149169695Skan cannot be the start of a match. If the pattern can match the 5150169695Skan null string, however, we don't need to skip characters; we want 5151169695Skan the first null string. */ 5152169695Skan if (fastmap && startpos < total_size && !bufp->can_be_null) 5153169695Skan { 5154169695Skan if (range > 0) /* Searching forwards. */ 5155169695Skan { 5156169695Skan register const char *d; 5157169695Skan register int lim = 0; 5158169695Skan int irange = range; 5159169695Skan 5160169695Skan if (startpos < size1 && startpos + range >= size1) 5161169695Skan lim = range - (size1 - startpos); 5162169695Skan 5163169695Skan d = (startpos >= size1 ? string2 - size1 : string1) + startpos; 5164169695Skan 5165169695Skan /* Written out as an if-else to avoid testing `translate' 5166169695Skan inside the loop. */ 5167169695Skan if (translate) 5168169695Skan while (range > lim 5169169695Skan && !fastmap[(unsigned char) 5170169695Skan translate[(unsigned char) *d++]]) 5171169695Skan range--; 5172169695Skan else 5173169695Skan while (range > lim && !fastmap[(unsigned char) *d++]) 5174169695Skan range--; 5175169695Skan 5176169695Skan startpos += irange - range; 5177169695Skan } 5178169695Skan else /* Searching backwards. */ 5179169695Skan { 5180169695Skan register CHAR_T c = (size1 == 0 || startpos >= size1 5181169695Skan ? string2[startpos - size1] 5182169695Skan : string1[startpos]); 5183169695Skan 5184169695Skan if (!fastmap[(unsigned char) TRANSLATE (c)]) 5185169695Skan goto advance; 5186169695Skan } 5187169695Skan } 5188169695Skan 5189169695Skan /* If can't match the null string, and that's all we have left, fail. */ 5190169695Skan if (range >= 0 && startpos == total_size && fastmap 5191169695Skan && !bufp->can_be_null) 5192169695Skan { 5193169695Skan#ifdef WCHAR 5194169695Skan FREE_WCS_BUFFERS (); 5195169695Skan#endif 5196169695Skan return -1; 5197169695Skan } 5198169695Skan 5199169695Skan#ifdef WCHAR 5200169695Skan val = wcs_re_match_2_internal (bufp, string1, size1, string2, 5201169695Skan size2, startpos, regs, stop, 5202169695Skan wcs_string1, wcs_size1, 5203169695Skan wcs_string2, wcs_size2, 5204169695Skan mbs_offset1, mbs_offset2); 5205169695Skan#else /* BYTE */ 5206169695Skan val = byte_re_match_2_internal (bufp, string1, size1, string2, 5207169695Skan size2, startpos, regs, stop); 5208169695Skan#endif /* BYTE */ 5209169695Skan 5210169695Skan#ifndef REGEX_MALLOC 5211169695Skan# ifdef C_ALLOCA 5212169695Skan alloca (0); 5213169695Skan# endif 5214169695Skan#endif 5215169695Skan 5216169695Skan if (val >= 0) 5217169695Skan { 5218169695Skan#ifdef WCHAR 5219169695Skan FREE_WCS_BUFFERS (); 5220169695Skan#endif 5221169695Skan return startpos; 5222169695Skan } 5223169695Skan 5224169695Skan if (val == -2) 5225169695Skan { 5226169695Skan#ifdef WCHAR 5227169695Skan FREE_WCS_BUFFERS (); 5228169695Skan#endif 5229169695Skan return -2; 5230169695Skan } 5231169695Skan 5232169695Skan advance: 5233169695Skan if (!range) 5234169695Skan break; 5235169695Skan else if (range > 0) 5236169695Skan { 5237169695Skan range--; 5238169695Skan startpos++; 5239169695Skan } 5240169695Skan else 5241169695Skan { 5242169695Skan range++; 5243169695Skan startpos--; 5244169695Skan } 5245169695Skan } 5246169695Skan#ifdef WCHAR 5247169695Skan FREE_WCS_BUFFERS (); 5248169695Skan#endif 5249169695Skan return -1; 5250169695Skan} 5251169695Skan 5252169695Skan#ifdef WCHAR 5253169695Skan/* This converts PTR, a pointer into one of the search wchar_t strings 5254169695Skan `string1' and `string2' into an multibyte string offset from the 5255169695Skan beginning of that string. We use mbs_offset to optimize. 5256169695Skan See convert_mbs_to_wcs. */ 5257169695Skan# define POINTER_TO_OFFSET(ptr) \ 5258169695Skan (FIRST_STRING_P (ptr) \ 5259169695Skan ? ((regoff_t)(mbs_offset1 != NULL? mbs_offset1[(ptr)-string1] : 0)) \ 5260169695Skan : ((regoff_t)((mbs_offset2 != NULL? mbs_offset2[(ptr)-string2] : 0) \ 5261169695Skan + csize1))) 5262169695Skan#else /* BYTE */ 5263169695Skan/* This converts PTR, a pointer into one of the search strings `string1' 5264169695Skan and `string2' into an offset from the beginning of that string. */ 5265169695Skan# define POINTER_TO_OFFSET(ptr) \ 5266169695Skan (FIRST_STRING_P (ptr) \ 5267169695Skan ? ((regoff_t) ((ptr) - string1)) \ 5268169695Skan : ((regoff_t) ((ptr) - string2 + size1))) 5269169695Skan#endif /* WCHAR */ 5270169695Skan 5271169695Skan/* Macros for dealing with the split strings in re_match_2. */ 5272169695Skan 5273169695Skan#define MATCHING_IN_FIRST_STRING (dend == end_match_1) 5274169695Skan 5275169695Skan/* Call before fetching a character with *d. This switches over to 5276169695Skan string2 if necessary. */ 5277169695Skan#define PREFETCH() \ 5278169695Skan while (d == dend) \ 5279169695Skan { \ 5280169695Skan /* End of string2 => fail. */ \ 5281169695Skan if (dend == end_match_2) \ 5282169695Skan goto fail; \ 5283169695Skan /* End of string1 => advance to string2. */ \ 5284169695Skan d = string2; \ 5285169695Skan dend = end_match_2; \ 5286169695Skan } 5287169695Skan 5288169695Skan/* Test if at very beginning or at very end of the virtual concatenation 5289169695Skan of `string1' and `string2'. If only one string, it's `string2'. */ 5290169695Skan#define AT_STRINGS_BEG(d) ((d) == (size1 ? string1 : string2) || !size2) 5291169695Skan#define AT_STRINGS_END(d) ((d) == end2) 5292169695Skan 5293169695Skan 5294169695Skan/* Test if D points to a character which is word-constituent. We have 5295169695Skan two special cases to check for: if past the end of string1, look at 5296169695Skan the first character in string2; and if before the beginning of 5297169695Skan string2, look at the last character in string1. */ 5298169695Skan#ifdef WCHAR 5299169695Skan/* Use internationalized API instead of SYNTAX. */ 5300169695Skan# define WORDCHAR_P(d) \ 5301169695Skan (iswalnum ((wint_t)((d) == end1 ? *string2 \ 5302169695Skan : (d) == string2 - 1 ? *(end1 - 1) : *(d))) != 0 \ 5303169695Skan || ((d) == end1 ? *string2 \ 5304169695Skan : (d) == string2 - 1 ? *(end1 - 1) : *(d)) == L'_') 5305169695Skan#else /* BYTE */ 5306169695Skan# define WORDCHAR_P(d) \ 5307169695Skan (SYNTAX ((d) == end1 ? *string2 \ 5308169695Skan : (d) == string2 - 1 ? *(end1 - 1) : *(d)) \ 5309169695Skan == Sword) 5310169695Skan#endif /* WCHAR */ 5311169695Skan 5312169695Skan/* Disabled due to a compiler bug -- see comment at case wordbound */ 5313169695Skan#if 0 5314169695Skan/* Test if the character before D and the one at D differ with respect 5315169695Skan to being word-constituent. */ 5316169695Skan#define AT_WORD_BOUNDARY(d) \ 5317169695Skan (AT_STRINGS_BEG (d) || AT_STRINGS_END (d) \ 5318169695Skan || WORDCHAR_P (d - 1) != WORDCHAR_P (d)) 5319169695Skan#endif 5320169695Skan 5321169695Skan/* Free everything we malloc. */ 5322169695Skan#ifdef MATCH_MAY_ALLOCATE 5323169695Skan# ifdef WCHAR 5324169695Skan# define FREE_VARIABLES() \ 5325169695Skan do { \ 5326169695Skan REGEX_FREE_STACK (fail_stack.stack); \ 5327169695Skan FREE_VAR (regstart); \ 5328169695Skan FREE_VAR (regend); \ 5329169695Skan FREE_VAR (old_regstart); \ 5330169695Skan FREE_VAR (old_regend); \ 5331169695Skan FREE_VAR (best_regstart); \ 5332169695Skan FREE_VAR (best_regend); \ 5333169695Skan FREE_VAR (reg_info); \ 5334169695Skan FREE_VAR (reg_dummy); \ 5335169695Skan FREE_VAR (reg_info_dummy); \ 5336169695Skan if (!cant_free_wcs_buf) \ 5337169695Skan { \ 5338169695Skan FREE_VAR (string1); \ 5339169695Skan FREE_VAR (string2); \ 5340169695Skan FREE_VAR (mbs_offset1); \ 5341169695Skan FREE_VAR (mbs_offset2); \ 5342169695Skan } \ 5343169695Skan } while (0) 5344169695Skan# else /* BYTE */ 5345169695Skan# define FREE_VARIABLES() \ 5346169695Skan do { \ 5347169695Skan REGEX_FREE_STACK (fail_stack.stack); \ 5348169695Skan FREE_VAR (regstart); \ 5349169695Skan FREE_VAR (regend); \ 5350169695Skan FREE_VAR (old_regstart); \ 5351169695Skan FREE_VAR (old_regend); \ 5352169695Skan FREE_VAR (best_regstart); \ 5353169695Skan FREE_VAR (best_regend); \ 5354169695Skan FREE_VAR (reg_info); \ 5355169695Skan FREE_VAR (reg_dummy); \ 5356169695Skan FREE_VAR (reg_info_dummy); \ 5357169695Skan } while (0) 5358169695Skan# endif /* WCHAR */ 5359169695Skan#else 5360169695Skan# ifdef WCHAR 5361169695Skan# define FREE_VARIABLES() \ 5362169695Skan do { \ 5363169695Skan if (!cant_free_wcs_buf) \ 5364169695Skan { \ 5365169695Skan FREE_VAR (string1); \ 5366169695Skan FREE_VAR (string2); \ 5367169695Skan FREE_VAR (mbs_offset1); \ 5368169695Skan FREE_VAR (mbs_offset2); \ 5369169695Skan } \ 5370169695Skan } while (0) 5371169695Skan# else /* BYTE */ 5372169695Skan# define FREE_VARIABLES() ((void)0) /* Do nothing! But inhibit gcc warning. */ 5373169695Skan# endif /* WCHAR */ 5374169695Skan#endif /* not MATCH_MAY_ALLOCATE */ 5375169695Skan 5376169695Skan/* These values must meet several constraints. They must not be valid 5377169695Skan register values; since we have a limit of 255 registers (because 5378169695Skan we use only one byte in the pattern for the register number), we can 5379169695Skan use numbers larger than 255. They must differ by 1, because of 5380169695Skan NUM_FAILURE_ITEMS above. And the value for the lowest register must 5381169695Skan be larger than the value for the highest register, so we do not try 5382169695Skan to actually save any registers when none are active. */ 5383169695Skan#define NO_HIGHEST_ACTIVE_REG (1 << BYTEWIDTH) 5384169695Skan#define NO_LOWEST_ACTIVE_REG (NO_HIGHEST_ACTIVE_REG + 1) 5385169695Skan 5386169695Skan#else /* not INSIDE_RECURSION */ 5387169695Skan/* Matching routines. */ 5388169695Skan 5389169695Skan#ifndef emacs /* Emacs never uses this. */ 5390169695Skan/* re_match is like re_match_2 except it takes only a single string. */ 5391169695Skan 5392169695Skanint 5393169695Skanre_match (struct re_pattern_buffer *bufp, const char *string, 5394169695Skan int size, int pos, struct re_registers *regs) 5395169695Skan{ 5396169695Skan int result; 5397169695Skan# ifdef MBS_SUPPORT 5398169695Skan if (MB_CUR_MAX != 1) 5399169695Skan result = wcs_re_match_2_internal (bufp, NULL, 0, string, size, 5400169695Skan pos, regs, size, 5401169695Skan NULL, 0, NULL, 0, NULL, NULL); 5402169695Skan else 5403169695Skan# endif 5404169695Skan result = byte_re_match_2_internal (bufp, NULL, 0, string, size, 5405169695Skan pos, regs, size); 5406169695Skan# ifndef REGEX_MALLOC 5407169695Skan# ifdef C_ALLOCA 5408169695Skan alloca (0); 5409169695Skan# endif 5410169695Skan# endif 5411169695Skan return result; 5412169695Skan} 5413169695Skan# ifdef _LIBC 5414169695Skanweak_alias (__re_match, re_match) 5415169695Skan# endif 5416169695Skan#endif /* not emacs */ 5417169695Skan 5418169695Skan#endif /* not INSIDE_RECURSION */ 5419169695Skan 5420169695Skan#ifdef INSIDE_RECURSION 5421169695Skanstatic boolean PREFIX(group_match_null_string_p) (UCHAR_T **p, 5422169695Skan UCHAR_T *end, 5423169695Skan PREFIX(register_info_type) *reg_info); 5424169695Skanstatic boolean PREFIX(alt_match_null_string_p) (UCHAR_T *p, 5425169695Skan UCHAR_T *end, 5426169695Skan PREFIX(register_info_type) *reg_info); 5427169695Skanstatic boolean PREFIX(common_op_match_null_string_p) (UCHAR_T **p, 5428169695Skan UCHAR_T *end, 5429169695Skan PREFIX(register_info_type) *reg_info); 5430169695Skanstatic int PREFIX(bcmp_translate) (const CHAR_T *s1, const CHAR_T *s2, 5431169695Skan int len, char *translate); 5432169695Skan#else /* not INSIDE_RECURSION */ 5433169695Skan 5434169695Skan/* re_match_2 matches the compiled pattern in BUFP against the 5435169695Skan the (virtual) concatenation of STRING1 and STRING2 (of length SIZE1 5436169695Skan and SIZE2, respectively). We start matching at POS, and stop 5437169695Skan matching at STOP. 5438169695Skan 5439169695Skan If REGS is non-null and the `no_sub' field of BUFP is nonzero, we 5440169695Skan store offsets for the substring each group matched in REGS. See the 5441169695Skan documentation for exactly how many groups we fill. 5442169695Skan 5443169695Skan We return -1 if no match, -2 if an internal error (such as the 5444169695Skan failure stack overflowing). Otherwise, we return the length of the 5445169695Skan matched substring. */ 5446169695Skan 5447169695Skanint 5448169695Skanre_match_2 (struct re_pattern_buffer *bufp, const char *string1, int size1, 5449169695Skan const char *string2, int size2, int pos, 5450169695Skan struct re_registers *regs, int stop) 5451169695Skan{ 5452169695Skan int result; 5453169695Skan# ifdef MBS_SUPPORT 5454169695Skan if (MB_CUR_MAX != 1) 5455169695Skan result = wcs_re_match_2_internal (bufp, string1, size1, string2, size2, 5456169695Skan pos, regs, stop, 5457169695Skan NULL, 0, NULL, 0, NULL, NULL); 5458169695Skan else 5459169695Skan# endif 5460169695Skan result = byte_re_match_2_internal (bufp, string1, size1, string2, size2, 5461169695Skan pos, regs, stop); 5462169695Skan 5463169695Skan#ifndef REGEX_MALLOC 5464169695Skan# ifdef C_ALLOCA 5465169695Skan alloca (0); 5466169695Skan# endif 5467169695Skan#endif 5468169695Skan return result; 5469169695Skan} 5470169695Skan#ifdef _LIBC 5471169695Skanweak_alias (__re_match_2, re_match_2) 5472169695Skan#endif 5473169695Skan 5474169695Skan#endif /* not INSIDE_RECURSION */ 5475169695Skan 5476169695Skan#ifdef INSIDE_RECURSION 5477169695Skan 5478169695Skan#ifdef WCHAR 5479169695Skanstatic int count_mbs_length (int *, int); 5480169695Skan 5481169695Skan/* This check the substring (from 0, to length) of the multibyte string, 5482169695Skan to which offset_buffer correspond. And count how many wchar_t_characters 5483169695Skan the substring occupy. We use offset_buffer to optimization. 5484169695Skan See convert_mbs_to_wcs. */ 5485169695Skan 5486169695Skanstatic int 5487169695Skancount_mbs_length(int *offset_buffer, int length) 5488169695Skan{ 5489169695Skan int upper, lower; 5490169695Skan 5491169695Skan /* Check whether the size is valid. */ 5492169695Skan if (length < 0) 5493169695Skan return -1; 5494169695Skan 5495169695Skan if (offset_buffer == NULL) 5496169695Skan return 0; 5497169695Skan 5498169695Skan /* If there are no multibyte character, offset_buffer[i] == i. 5499169695Skan Optmize for this case. */ 5500169695Skan if (offset_buffer[length] == length) 5501169695Skan return length; 5502169695Skan 5503169695Skan /* Set up upper with length. (because for all i, offset_buffer[i] >= i) */ 5504169695Skan upper = length; 5505169695Skan lower = 0; 5506169695Skan 5507169695Skan while (true) 5508169695Skan { 5509169695Skan int middle = (lower + upper) / 2; 5510169695Skan if (middle == lower || middle == upper) 5511169695Skan break; 5512169695Skan if (offset_buffer[middle] > length) 5513169695Skan upper = middle; 5514169695Skan else if (offset_buffer[middle] < length) 5515169695Skan lower = middle; 5516169695Skan else 5517169695Skan return middle; 5518169695Skan } 5519169695Skan 5520169695Skan return -1; 5521169695Skan} 5522169695Skan#endif /* WCHAR */ 5523169695Skan 5524169695Skan/* This is a separate function so that we can force an alloca cleanup 5525169695Skan afterwards. */ 5526169695Skan#ifdef WCHAR 5527169695Skanstatic int 5528169695Skanwcs_re_match_2_internal (struct re_pattern_buffer *bufp, 5529169695Skan const char *cstring1, int csize1, 5530169695Skan const char *cstring2, int csize2, 5531169695Skan int pos, 5532169695Skan struct re_registers *regs, 5533169695Skan int stop, 5534169695Skan /* string1 == string2 == NULL means string1/2, size1/2 and 5535169695Skan mbs_offset1/2 need seting up in this function. */ 5536169695Skan /* We need wchar_t* buffers correspond to cstring1, cstring2. */ 5537169695Skan wchar_t *string1, int size1, 5538169695Skan wchar_t *string2, int size2, 5539169695Skan /* offset buffer for optimizatoin. See convert_mbs_to_wc. */ 5540169695Skan int *mbs_offset1, int *mbs_offset2) 5541169695Skan#else /* BYTE */ 5542169695Skanstatic int 5543169695Skanbyte_re_match_2_internal (struct re_pattern_buffer *bufp, 5544169695Skan const char *string1, int size1, 5545169695Skan const char *string2, int size2, 5546169695Skan int pos, 5547169695Skan struct re_registers *regs, int stop) 5548169695Skan#endif /* BYTE */ 5549169695Skan{ 5550169695Skan /* General temporaries. */ 5551169695Skan int mcnt; 5552169695Skan UCHAR_T *p1; 5553169695Skan#ifdef WCHAR 5554169695Skan /* They hold whether each wchar_t is binary data or not. */ 5555169695Skan char *is_binary = NULL; 5556169695Skan /* If true, we can't free string1/2, mbs_offset1/2. */ 5557169695Skan int cant_free_wcs_buf = 1; 5558169695Skan#endif /* WCHAR */ 5559169695Skan 5560169695Skan /* Just past the end of the corresponding string. */ 5561169695Skan const CHAR_T *end1, *end2; 5562169695Skan 5563169695Skan /* Pointers into string1 and string2, just past the last characters in 5564169695Skan each to consider matching. */ 5565169695Skan const CHAR_T *end_match_1, *end_match_2; 5566169695Skan 5567169695Skan /* Where we are in the data, and the end of the current string. */ 5568169695Skan const CHAR_T *d, *dend; 5569169695Skan 5570169695Skan /* Where we are in the pattern, and the end of the pattern. */ 5571169695Skan#ifdef WCHAR 5572169695Skan UCHAR_T *pattern, *p; 5573169695Skan register UCHAR_T *pend; 5574169695Skan#else /* BYTE */ 5575169695Skan UCHAR_T *p = bufp->buffer; 5576169695Skan register UCHAR_T *pend = p + bufp->used; 5577169695Skan#endif /* WCHAR */ 5578169695Skan 5579169695Skan /* Mark the opcode just after a start_memory, so we can test for an 5580169695Skan empty subpattern when we get to the stop_memory. */ 5581169695Skan UCHAR_T *just_past_start_mem = 0; 5582169695Skan 5583169695Skan /* We use this to map every character in the string. */ 5584169695Skan RE_TRANSLATE_TYPE translate = bufp->translate; 5585169695Skan 5586169695Skan /* Failure point stack. Each place that can handle a failure further 5587169695Skan down the line pushes a failure point on this stack. It consists of 5588169695Skan restart, regend, and reg_info for all registers corresponding to 5589169695Skan the subexpressions we're currently inside, plus the number of such 5590169695Skan registers, and, finally, two char *'s. The first char * is where 5591169695Skan to resume scanning the pattern; the second one is where to resume 5592169695Skan scanning the strings. If the latter is zero, the failure point is 5593169695Skan a ``dummy''; if a failure happens and the failure point is a dummy, 5594169695Skan it gets discarded and the next next one is tried. */ 5595169695Skan#ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global. */ 5596169695Skan PREFIX(fail_stack_type) fail_stack; 5597169695Skan#endif 5598169695Skan#ifdef DEBUG 5599169695Skan static unsigned failure_id; 5600169695Skan unsigned nfailure_points_pushed = 0, nfailure_points_popped = 0; 5601169695Skan#endif 5602169695Skan 5603169695Skan#ifdef REL_ALLOC 5604169695Skan /* This holds the pointer to the failure stack, when 5605169695Skan it is allocated relocatably. */ 5606169695Skan fail_stack_elt_t *failure_stack_ptr; 5607169695Skan#endif 5608169695Skan 5609169695Skan /* We fill all the registers internally, independent of what we 5610169695Skan return, for use in backreferences. The number here includes 5611169695Skan an element for register zero. */ 5612169695Skan size_t num_regs = bufp->re_nsub + 1; 5613169695Skan 5614169695Skan /* The currently active registers. */ 5615169695Skan active_reg_t lowest_active_reg = NO_LOWEST_ACTIVE_REG; 5616169695Skan active_reg_t highest_active_reg = NO_HIGHEST_ACTIVE_REG; 5617169695Skan 5618169695Skan /* Information on the contents of registers. These are pointers into 5619169695Skan the input strings; they record just what was matched (on this 5620169695Skan attempt) by a subexpression part of the pattern, that is, the 5621169695Skan regnum-th regstart pointer points to where in the pattern we began 5622169695Skan matching and the regnum-th regend points to right after where we 5623169695Skan stopped matching the regnum-th subexpression. (The zeroth register 5624169695Skan keeps track of what the whole pattern matches.) */ 5625169695Skan#ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */ 5626169695Skan const CHAR_T **regstart, **regend; 5627169695Skan#endif 5628169695Skan 5629169695Skan /* If a group that's operated upon by a repetition operator fails to 5630169695Skan match anything, then the register for its start will need to be 5631169695Skan restored because it will have been set to wherever in the string we 5632169695Skan are when we last see its open-group operator. Similarly for a 5633169695Skan register's end. */ 5634169695Skan#ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */ 5635169695Skan const CHAR_T **old_regstart, **old_regend; 5636169695Skan#endif 5637169695Skan 5638169695Skan /* The is_active field of reg_info helps us keep track of which (possibly 5639169695Skan nested) subexpressions we are currently in. The matched_something 5640169695Skan field of reg_info[reg_num] helps us tell whether or not we have 5641169695Skan matched any of the pattern so far this time through the reg_num-th 5642169695Skan subexpression. These two fields get reset each time through any 5643169695Skan loop their register is in. */ 5644169695Skan#ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global. */ 5645169695Skan PREFIX(register_info_type) *reg_info; 5646169695Skan#endif 5647169695Skan 5648169695Skan /* The following record the register info as found in the above 5649169695Skan variables when we find a match better than any we've seen before. 5650169695Skan This happens as we backtrack through the failure points, which in 5651169695Skan turn happens only if we have not yet matched the entire string. */ 5652169695Skan unsigned best_regs_set = false; 5653169695Skan#ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */ 5654169695Skan const CHAR_T **best_regstart, **best_regend; 5655169695Skan#endif 5656169695Skan 5657169695Skan /* Logically, this is `best_regend[0]'. But we don't want to have to 5658169695Skan allocate space for that if we're not allocating space for anything 5659169695Skan else (see below). Also, we never need info about register 0 for 5660169695Skan any of the other register vectors, and it seems rather a kludge to 5661169695Skan treat `best_regend' differently than the rest. So we keep track of 5662169695Skan the end of the best match so far in a separate variable. We 5663169695Skan initialize this to NULL so that when we backtrack the first time 5664169695Skan and need to test it, it's not garbage. */ 5665169695Skan const CHAR_T *match_end = NULL; 5666169695Skan 5667169695Skan /* This helps SET_REGS_MATCHED avoid doing redundant work. */ 5668169695Skan int set_regs_matched_done = 0; 5669169695Skan 5670169695Skan /* Used when we pop values we don't care about. */ 5671169695Skan#ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */ 5672169695Skan const CHAR_T **reg_dummy; 5673169695Skan PREFIX(register_info_type) *reg_info_dummy; 5674169695Skan#endif 5675169695Skan 5676169695Skan#ifdef DEBUG 5677169695Skan /* Counts the total number of registers pushed. */ 5678169695Skan unsigned num_regs_pushed = 0; 5679169695Skan#endif 5680169695Skan 5681169695Skan DEBUG_PRINT1 ("\n\nEntering re_match_2.\n"); 5682169695Skan 5683169695Skan INIT_FAIL_STACK (); 5684169695Skan 5685169695Skan#ifdef MATCH_MAY_ALLOCATE 5686169695Skan /* Do not bother to initialize all the register variables if there are 5687169695Skan no groups in the pattern, as it takes a fair amount of time. If 5688169695Skan there are groups, we include space for register 0 (the whole 5689169695Skan pattern), even though we never use it, since it simplifies the 5690169695Skan array indexing. We should fix this. */ 5691169695Skan if (bufp->re_nsub) 5692169695Skan { 5693169695Skan regstart = REGEX_TALLOC (num_regs, const CHAR_T *); 5694169695Skan regend = REGEX_TALLOC (num_regs, const CHAR_T *); 5695169695Skan old_regstart = REGEX_TALLOC (num_regs, const CHAR_T *); 5696169695Skan old_regend = REGEX_TALLOC (num_regs, const CHAR_T *); 5697169695Skan best_regstart = REGEX_TALLOC (num_regs, const CHAR_T *); 5698169695Skan best_regend = REGEX_TALLOC (num_regs, const CHAR_T *); 5699169695Skan reg_info = REGEX_TALLOC (num_regs, PREFIX(register_info_type)); 5700169695Skan reg_dummy = REGEX_TALLOC (num_regs, const CHAR_T *); 5701169695Skan reg_info_dummy = REGEX_TALLOC (num_regs, PREFIX(register_info_type)); 5702169695Skan 5703169695Skan if (!(regstart && regend && old_regstart && old_regend && reg_info 5704169695Skan && best_regstart && best_regend && reg_dummy && reg_info_dummy)) 5705169695Skan { 5706169695Skan FREE_VARIABLES (); 5707169695Skan return -2; 5708169695Skan } 5709169695Skan } 5710169695Skan else 5711169695Skan { 5712169695Skan /* We must initialize all our variables to NULL, so that 5713169695Skan `FREE_VARIABLES' doesn't try to free them. */ 5714169695Skan regstart = regend = old_regstart = old_regend = best_regstart 5715169695Skan = best_regend = reg_dummy = NULL; 5716169695Skan reg_info = reg_info_dummy = (PREFIX(register_info_type) *) NULL; 5717169695Skan } 5718169695Skan#endif /* MATCH_MAY_ALLOCATE */ 5719169695Skan 5720169695Skan /* The starting position is bogus. */ 5721169695Skan#ifdef WCHAR 5722169695Skan if (pos < 0 || pos > csize1 + csize2) 5723169695Skan#else /* BYTE */ 5724169695Skan if (pos < 0 || pos > size1 + size2) 5725169695Skan#endif 5726169695Skan { 5727169695Skan FREE_VARIABLES (); 5728169695Skan return -1; 5729169695Skan } 5730169695Skan 5731169695Skan#ifdef WCHAR 5732169695Skan /* Allocate wchar_t array for string1 and string2 and 5733169695Skan fill them with converted string. */ 5734169695Skan if (string1 == NULL && string2 == NULL) 5735169695Skan { 5736169695Skan /* We need seting up buffers here. */ 5737169695Skan 5738169695Skan /* We must free wcs buffers in this function. */ 5739169695Skan cant_free_wcs_buf = 0; 5740169695Skan 5741169695Skan if (csize1 != 0) 5742169695Skan { 5743169695Skan string1 = REGEX_TALLOC (csize1 + 1, CHAR_T); 5744169695Skan mbs_offset1 = REGEX_TALLOC (csize1 + 1, int); 5745169695Skan is_binary = REGEX_TALLOC (csize1 + 1, char); 5746169695Skan if (!string1 || !mbs_offset1 || !is_binary) 5747169695Skan { 5748169695Skan FREE_VAR (string1); 5749169695Skan FREE_VAR (mbs_offset1); 5750169695Skan FREE_VAR (is_binary); 5751169695Skan return -2; 5752169695Skan } 5753169695Skan } 5754169695Skan if (csize2 != 0) 5755169695Skan { 5756169695Skan string2 = REGEX_TALLOC (csize2 + 1, CHAR_T); 5757169695Skan mbs_offset2 = REGEX_TALLOC (csize2 + 1, int); 5758169695Skan is_binary = REGEX_TALLOC (csize2 + 1, char); 5759169695Skan if (!string2 || !mbs_offset2 || !is_binary) 5760169695Skan { 5761169695Skan FREE_VAR (string1); 5762169695Skan FREE_VAR (mbs_offset1); 5763169695Skan FREE_VAR (string2); 5764169695Skan FREE_VAR (mbs_offset2); 5765169695Skan FREE_VAR (is_binary); 5766169695Skan return -2; 5767169695Skan } 5768169695Skan size2 = convert_mbs_to_wcs(string2, cstring2, csize2, 5769169695Skan mbs_offset2, is_binary); 5770169695Skan string2[size2] = L'\0'; /* for a sentinel */ 5771169695Skan FREE_VAR (is_binary); 5772169695Skan } 5773169695Skan } 5774169695Skan 5775169695Skan /* We need to cast pattern to (wchar_t*), because we casted this compiled 5776169695Skan pattern to (char*) in regex_compile. */ 5777169695Skan p = pattern = (CHAR_T*)bufp->buffer; 5778169695Skan pend = (CHAR_T*)(bufp->buffer + bufp->used); 5779169695Skan 5780169695Skan#endif /* WCHAR */ 5781169695Skan 5782169695Skan /* Initialize subexpression text positions to -1 to mark ones that no 5783169695Skan start_memory/stop_memory has been seen for. Also initialize the 5784169695Skan register information struct. */ 5785169695Skan for (mcnt = 1; (unsigned) mcnt < num_regs; mcnt++) 5786169695Skan { 5787169695Skan regstart[mcnt] = regend[mcnt] 5788169695Skan = old_regstart[mcnt] = old_regend[mcnt] = REG_UNSET_VALUE; 5789169695Skan 5790169695Skan REG_MATCH_NULL_STRING_P (reg_info[mcnt]) = MATCH_NULL_UNSET_VALUE; 5791169695Skan IS_ACTIVE (reg_info[mcnt]) = 0; 5792169695Skan MATCHED_SOMETHING (reg_info[mcnt]) = 0; 5793169695Skan EVER_MATCHED_SOMETHING (reg_info[mcnt]) = 0; 5794169695Skan } 5795169695Skan 5796169695Skan /* We move `string1' into `string2' if the latter's empty -- but not if 5797169695Skan `string1' is null. */ 5798169695Skan if (size2 == 0 && string1 != NULL) 5799169695Skan { 5800169695Skan string2 = string1; 5801169695Skan size2 = size1; 5802169695Skan string1 = 0; 5803169695Skan size1 = 0; 5804169695Skan#ifdef WCHAR 5805169695Skan mbs_offset2 = mbs_offset1; 5806169695Skan csize2 = csize1; 5807169695Skan mbs_offset1 = NULL; 5808169695Skan csize1 = 0; 5809169695Skan#endif 5810169695Skan } 5811169695Skan end1 = string1 + size1; 5812169695Skan end2 = string2 + size2; 5813169695Skan 5814169695Skan /* Compute where to stop matching, within the two strings. */ 5815169695Skan#ifdef WCHAR 5816169695Skan if (stop <= csize1) 5817169695Skan { 5818169695Skan mcnt = count_mbs_length(mbs_offset1, stop); 5819169695Skan end_match_1 = string1 + mcnt; 5820169695Skan end_match_2 = string2; 5821169695Skan } 5822169695Skan else 5823169695Skan { 5824169695Skan if (stop > csize1 + csize2) 5825169695Skan stop = csize1 + csize2; 5826169695Skan end_match_1 = end1; 5827169695Skan mcnt = count_mbs_length(mbs_offset2, stop-csize1); 5828169695Skan end_match_2 = string2 + mcnt; 5829169695Skan } 5830169695Skan if (mcnt < 0) 5831169695Skan { /* count_mbs_length return error. */ 5832169695Skan FREE_VARIABLES (); 5833169695Skan return -1; 5834169695Skan } 5835169695Skan#else 5836169695Skan if (stop <= size1) 5837169695Skan { 5838169695Skan end_match_1 = string1 + stop; 5839169695Skan end_match_2 = string2; 5840169695Skan } 5841169695Skan else 5842169695Skan { 5843169695Skan end_match_1 = end1; 5844169695Skan end_match_2 = string2 + stop - size1; 5845169695Skan } 5846169695Skan#endif /* WCHAR */ 5847169695Skan 5848169695Skan /* `p' scans through the pattern as `d' scans through the data. 5849169695Skan `dend' is the end of the input string that `d' points within. `d' 5850169695Skan is advanced into the following input string whenever necessary, but 5851169695Skan this happens before fetching; therefore, at the beginning of the 5852169695Skan loop, `d' can be pointing at the end of a string, but it cannot 5853169695Skan equal `string2'. */ 5854169695Skan#ifdef WCHAR 5855169695Skan if (size1 > 0 && pos <= csize1) 5856169695Skan { 5857169695Skan mcnt = count_mbs_length(mbs_offset1, pos); 5858169695Skan d = string1 + mcnt; 5859169695Skan dend = end_match_1; 5860169695Skan } 5861169695Skan else 5862169695Skan { 5863169695Skan mcnt = count_mbs_length(mbs_offset2, pos-csize1); 5864169695Skan d = string2 + mcnt; 5865169695Skan dend = end_match_2; 5866169695Skan } 5867169695Skan 5868169695Skan if (mcnt < 0) 5869169695Skan { /* count_mbs_length return error. */ 5870169695Skan FREE_VARIABLES (); 5871169695Skan return -1; 5872169695Skan } 5873169695Skan#else 5874169695Skan if (size1 > 0 && pos <= size1) 5875169695Skan { 5876169695Skan d = string1 + pos; 5877169695Skan dend = end_match_1; 5878169695Skan } 5879169695Skan else 5880169695Skan { 5881169695Skan d = string2 + pos - size1; 5882169695Skan dend = end_match_2; 5883169695Skan } 5884169695Skan#endif /* WCHAR */ 5885169695Skan 5886169695Skan DEBUG_PRINT1 ("The compiled pattern is:\n"); 5887169695Skan DEBUG_PRINT_COMPILED_PATTERN (bufp, p, pend); 5888169695Skan DEBUG_PRINT1 ("The string to match is: `"); 5889169695Skan DEBUG_PRINT_DOUBLE_STRING (d, string1, size1, string2, size2); 5890169695Skan DEBUG_PRINT1 ("'\n"); 5891169695Skan 5892169695Skan /* This loops over pattern commands. It exits by returning from the 5893169695Skan function if the match is complete, or it drops through if the match 5894169695Skan fails at this starting point in the input data. */ 5895169695Skan for (;;) 5896169695Skan { 5897169695Skan#ifdef _LIBC 5898169695Skan DEBUG_PRINT2 ("\n%p: ", p); 5899169695Skan#else 5900169695Skan DEBUG_PRINT2 ("\n0x%x: ", p); 5901169695Skan#endif 5902169695Skan 5903169695Skan if (p == pend) 5904169695Skan { /* End of pattern means we might have succeeded. */ 5905169695Skan DEBUG_PRINT1 ("end of pattern ... "); 5906169695Skan 5907169695Skan /* If we haven't matched the entire string, and we want the 5908169695Skan longest match, try backtracking. */ 5909169695Skan if (d != end_match_2) 5910169695Skan { 5911169695Skan /* 1 if this match ends in the same string (string1 or string2) 5912169695Skan as the best previous match. */ 5913169695Skan boolean same_str_p = (FIRST_STRING_P (match_end) 5914169695Skan == MATCHING_IN_FIRST_STRING); 5915169695Skan /* 1 if this match is the best seen so far. */ 5916169695Skan boolean best_match_p; 5917169695Skan 5918169695Skan /* AIX compiler got confused when this was combined 5919169695Skan with the previous declaration. */ 5920169695Skan if (same_str_p) 5921169695Skan best_match_p = d > match_end; 5922169695Skan else 5923169695Skan best_match_p = !MATCHING_IN_FIRST_STRING; 5924169695Skan 5925169695Skan DEBUG_PRINT1 ("backtracking.\n"); 5926169695Skan 5927169695Skan if (!FAIL_STACK_EMPTY ()) 5928169695Skan { /* More failure points to try. */ 5929169695Skan 5930169695Skan /* If exceeds best match so far, save it. */ 5931169695Skan if (!best_regs_set || best_match_p) 5932169695Skan { 5933169695Skan best_regs_set = true; 5934169695Skan match_end = d; 5935169695Skan 5936169695Skan DEBUG_PRINT1 ("\nSAVING match as best so far.\n"); 5937169695Skan 5938169695Skan for (mcnt = 1; (unsigned) mcnt < num_regs; mcnt++) 5939169695Skan { 5940169695Skan best_regstart[mcnt] = regstart[mcnt]; 5941169695Skan best_regend[mcnt] = regend[mcnt]; 5942169695Skan } 5943169695Skan } 5944169695Skan goto fail; 5945169695Skan } 5946169695Skan 5947169695Skan /* If no failure points, don't restore garbage. And if 5948169695Skan last match is real best match, don't restore second 5949169695Skan best one. */ 5950169695Skan else if (best_regs_set && !best_match_p) 5951169695Skan { 5952169695Skan restore_best_regs: 5953169695Skan /* Restore best match. It may happen that `dend == 5954169695Skan end_match_1' while the restored d is in string2. 5955169695Skan For example, the pattern `x.*y.*z' against the 5956169695Skan strings `x-' and `y-z-', if the two strings are 5957169695Skan not consecutive in memory. */ 5958169695Skan DEBUG_PRINT1 ("Restoring best registers.\n"); 5959169695Skan 5960169695Skan d = match_end; 5961169695Skan dend = ((d >= string1 && d <= end1) 5962169695Skan ? end_match_1 : end_match_2); 5963169695Skan 5964169695Skan for (mcnt = 1; (unsigned) mcnt < num_regs; mcnt++) 5965169695Skan { 5966169695Skan regstart[mcnt] = best_regstart[mcnt]; 5967169695Skan regend[mcnt] = best_regend[mcnt]; 5968169695Skan } 5969169695Skan } 5970169695Skan } /* d != end_match_2 */ 5971169695Skan 5972169695Skan succeed_label: 5973169695Skan DEBUG_PRINT1 ("Accepting match.\n"); 5974169695Skan /* If caller wants register contents data back, do it. */ 5975169695Skan if (regs && !bufp->no_sub) 5976169695Skan { 5977169695Skan /* Have the register data arrays been allocated? */ 5978169695Skan if (bufp->regs_allocated == REGS_UNALLOCATED) 5979169695Skan { /* No. So allocate them with malloc. We need one 5980169695Skan extra element beyond `num_regs' for the `-1' marker 5981169695Skan GNU code uses. */ 5982169695Skan regs->num_regs = MAX (RE_NREGS, num_regs + 1); 5983169695Skan regs->start = TALLOC (regs->num_regs, regoff_t); 5984169695Skan regs->end = TALLOC (regs->num_regs, regoff_t); 5985169695Skan if (regs->start == NULL || regs->end == NULL) 5986169695Skan { 5987169695Skan FREE_VARIABLES (); 5988169695Skan return -2; 5989169695Skan } 5990169695Skan bufp->regs_allocated = REGS_REALLOCATE; 5991169695Skan } 5992169695Skan else if (bufp->regs_allocated == REGS_REALLOCATE) 5993169695Skan { /* Yes. If we need more elements than were already 5994169695Skan allocated, reallocate them. If we need fewer, just 5995169695Skan leave it alone. */ 5996169695Skan if (regs->num_regs < num_regs + 1) 5997169695Skan { 5998169695Skan regs->num_regs = num_regs + 1; 5999169695Skan RETALLOC (regs->start, regs->num_regs, regoff_t); 6000169695Skan RETALLOC (regs->end, regs->num_regs, regoff_t); 6001169695Skan if (regs->start == NULL || regs->end == NULL) 6002169695Skan { 6003169695Skan FREE_VARIABLES (); 6004169695Skan return -2; 6005169695Skan } 6006169695Skan } 6007169695Skan } 6008169695Skan else 6009169695Skan { 6010169695Skan /* These braces fend off a "empty body in an else-statement" 6011169695Skan warning under GCC when assert expands to nothing. */ 6012169695Skan assert (bufp->regs_allocated == REGS_FIXED); 6013169695Skan } 6014169695Skan 6015169695Skan /* Convert the pointer data in `regstart' and `regend' to 6016169695Skan indices. Register zero has to be set differently, 6017169695Skan since we haven't kept track of any info for it. */ 6018169695Skan if (regs->num_regs > 0) 6019169695Skan { 6020169695Skan regs->start[0] = pos; 6021169695Skan#ifdef WCHAR 6022169695Skan if (MATCHING_IN_FIRST_STRING) 6023169695Skan regs->end[0] = mbs_offset1 != NULL ? 6024169695Skan mbs_offset1[d-string1] : 0; 6025169695Skan else 6026169695Skan regs->end[0] = csize1 + (mbs_offset2 != NULL ? 6027169695Skan mbs_offset2[d-string2] : 0); 6028169695Skan#else 6029169695Skan regs->end[0] = (MATCHING_IN_FIRST_STRING 6030169695Skan ? ((regoff_t) (d - string1)) 6031169695Skan : ((regoff_t) (d - string2 + size1))); 6032169695Skan#endif /* WCHAR */ 6033169695Skan } 6034169695Skan 6035169695Skan /* Go through the first `min (num_regs, regs->num_regs)' 6036169695Skan registers, since that is all we initialized. */ 6037169695Skan for (mcnt = 1; (unsigned) mcnt < MIN (num_regs, regs->num_regs); 6038169695Skan mcnt++) 6039169695Skan { 6040169695Skan if (REG_UNSET (regstart[mcnt]) || REG_UNSET (regend[mcnt])) 6041169695Skan regs->start[mcnt] = regs->end[mcnt] = -1; 6042169695Skan else 6043169695Skan { 6044169695Skan regs->start[mcnt] 6045169695Skan = (regoff_t) POINTER_TO_OFFSET (regstart[mcnt]); 6046169695Skan regs->end[mcnt] 6047169695Skan = (regoff_t) POINTER_TO_OFFSET (regend[mcnt]); 6048169695Skan } 6049169695Skan } 6050169695Skan 6051169695Skan /* If the regs structure we return has more elements than 6052169695Skan were in the pattern, set the extra elements to -1. If 6053169695Skan we (re)allocated the registers, this is the case, 6054169695Skan because we always allocate enough to have at least one 6055169695Skan -1 at the end. */ 6056169695Skan for (mcnt = num_regs; (unsigned) mcnt < regs->num_regs; mcnt++) 6057169695Skan regs->start[mcnt] = regs->end[mcnt] = -1; 6058169695Skan } /* regs && !bufp->no_sub */ 6059169695Skan 6060169695Skan DEBUG_PRINT4 ("%u failure points pushed, %u popped (%u remain).\n", 6061169695Skan nfailure_points_pushed, nfailure_points_popped, 6062169695Skan nfailure_points_pushed - nfailure_points_popped); 6063169695Skan DEBUG_PRINT2 ("%u registers pushed.\n", num_regs_pushed); 6064169695Skan 6065169695Skan#ifdef WCHAR 6066169695Skan if (MATCHING_IN_FIRST_STRING) 6067169695Skan mcnt = mbs_offset1 != NULL ? mbs_offset1[d-string1] : 0; 6068169695Skan else 6069169695Skan mcnt = (mbs_offset2 != NULL ? mbs_offset2[d-string2] : 0) + 6070169695Skan csize1; 6071169695Skan mcnt -= pos; 6072169695Skan#else 6073169695Skan mcnt = d - pos - (MATCHING_IN_FIRST_STRING 6074169695Skan ? string1 6075169695Skan : string2 - size1); 6076169695Skan#endif /* WCHAR */ 6077169695Skan 6078169695Skan DEBUG_PRINT2 ("Returning %d from re_match_2.\n", mcnt); 6079169695Skan 6080169695Skan FREE_VARIABLES (); 6081169695Skan return mcnt; 6082169695Skan } 6083169695Skan 6084169695Skan /* Otherwise match next pattern command. */ 6085169695Skan switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++)) 6086169695Skan { 6087169695Skan /* Ignore these. Used to ignore the n of succeed_n's which 6088169695Skan currently have n == 0. */ 6089169695Skan case no_op: 6090169695Skan DEBUG_PRINT1 ("EXECUTING no_op.\n"); 6091169695Skan break; 6092169695Skan 6093169695Skan case succeed: 6094169695Skan DEBUG_PRINT1 ("EXECUTING succeed.\n"); 6095169695Skan goto succeed_label; 6096169695Skan 6097169695Skan /* Match the next n pattern characters exactly. The following 6098169695Skan byte in the pattern defines n, and the n bytes after that 6099169695Skan are the characters to match. */ 6100169695Skan case exactn: 6101169695Skan#ifdef MBS_SUPPORT 6102169695Skan case exactn_bin: 6103169695Skan#endif 6104169695Skan mcnt = *p++; 6105169695Skan DEBUG_PRINT2 ("EXECUTING exactn %d.\n", mcnt); 6106169695Skan 6107169695Skan /* This is written out as an if-else so we don't waste time 6108169695Skan testing `translate' inside the loop. */ 6109169695Skan if (translate) 6110169695Skan { 6111169695Skan do 6112169695Skan { 6113169695Skan PREFETCH (); 6114169695Skan#ifdef WCHAR 6115169695Skan if (*d <= 0xff) 6116169695Skan { 6117169695Skan if ((UCHAR_T) translate[(unsigned char) *d++] 6118169695Skan != (UCHAR_T) *p++) 6119169695Skan goto fail; 6120169695Skan } 6121169695Skan else 6122169695Skan { 6123169695Skan if (*d++ != (CHAR_T) *p++) 6124169695Skan goto fail; 6125169695Skan } 6126169695Skan#else 6127169695Skan if ((UCHAR_T) translate[(unsigned char) *d++] 6128169695Skan != (UCHAR_T) *p++) 6129169695Skan goto fail; 6130169695Skan#endif /* WCHAR */ 6131169695Skan } 6132169695Skan while (--mcnt); 6133169695Skan } 6134169695Skan else 6135169695Skan { 6136169695Skan do 6137169695Skan { 6138169695Skan PREFETCH (); 6139169695Skan if (*d++ != (CHAR_T) *p++) goto fail; 6140169695Skan } 6141169695Skan while (--mcnt); 6142169695Skan } 6143169695Skan SET_REGS_MATCHED (); 6144169695Skan break; 6145169695Skan 6146169695Skan 6147169695Skan /* Match any character except possibly a newline or a null. */ 6148169695Skan case anychar: 6149169695Skan DEBUG_PRINT1 ("EXECUTING anychar.\n"); 6150169695Skan 6151169695Skan PREFETCH (); 6152169695Skan 6153169695Skan if ((!(bufp->syntax & RE_DOT_NEWLINE) && TRANSLATE (*d) == '\n') 6154169695Skan || (bufp->syntax & RE_DOT_NOT_NULL && TRANSLATE (*d) == '\000')) 6155169695Skan goto fail; 6156169695Skan 6157169695Skan SET_REGS_MATCHED (); 6158169695Skan DEBUG_PRINT2 (" Matched `%ld'.\n", (long int) *d); 6159169695Skan d++; 6160169695Skan break; 6161169695Skan 6162169695Skan 6163169695Skan case charset: 6164169695Skan case charset_not: 6165169695Skan { 6166169695Skan register UCHAR_T c; 6167169695Skan#ifdef WCHAR 6168169695Skan unsigned int i, char_class_length, coll_symbol_length, 6169169695Skan equiv_class_length, ranges_length, chars_length, length; 6170169695Skan CHAR_T *workp, *workp2, *charset_top; 6171169695Skan#define WORK_BUFFER_SIZE 128 6172169695Skan CHAR_T str_buf[WORK_BUFFER_SIZE]; 6173169695Skan# ifdef _LIBC 6174169695Skan uint32_t nrules; 6175169695Skan# endif /* _LIBC */ 6176169695Skan#endif /* WCHAR */ 6177169695Skan boolean negate = (re_opcode_t) *(p - 1) == charset_not; 6178169695Skan 6179169695Skan DEBUG_PRINT2 ("EXECUTING charset%s.\n", negate ? "_not" : ""); 6180169695Skan PREFETCH (); 6181169695Skan c = TRANSLATE (*d); /* The character to match. */ 6182169695Skan#ifdef WCHAR 6183169695Skan# ifdef _LIBC 6184169695Skan nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES); 6185169695Skan# endif /* _LIBC */ 6186169695Skan charset_top = p - 1; 6187169695Skan char_class_length = *p++; 6188169695Skan coll_symbol_length = *p++; 6189169695Skan equiv_class_length = *p++; 6190169695Skan ranges_length = *p++; 6191169695Skan chars_length = *p++; 6192169695Skan /* p points charset[6], so the address of the next instruction 6193169695Skan (charset[l+m+n+2o+k+p']) equals p[l+m+n+2*o+p'], 6194169695Skan where l=length of char_classes, m=length of collating_symbol, 6195169695Skan n=equivalence_class, o=length of char_range, 6196169695Skan p'=length of character. */ 6197169695Skan workp = p; 6198169695Skan /* Update p to indicate the next instruction. */ 6199169695Skan p += char_class_length + coll_symbol_length+ equiv_class_length + 6200169695Skan 2*ranges_length + chars_length; 6201169695Skan 6202169695Skan /* match with char_class? */ 6203169695Skan for (i = 0; i < char_class_length ; i += CHAR_CLASS_SIZE) 6204169695Skan { 6205169695Skan wctype_t wctype; 6206169695Skan uintptr_t alignedp = ((uintptr_t)workp 6207169695Skan + __alignof__(wctype_t) - 1) 6208169695Skan & ~(uintptr_t)(__alignof__(wctype_t) - 1); 6209169695Skan wctype = *((wctype_t*)alignedp); 6210169695Skan workp += CHAR_CLASS_SIZE; 6211169695Skan# ifdef _LIBC 6212169695Skan if (__iswctype((wint_t)c, wctype)) 6213169695Skan goto char_set_matched; 6214169695Skan# else 6215169695Skan if (iswctype((wint_t)c, wctype)) 6216169695Skan goto char_set_matched; 6217169695Skan# endif 6218169695Skan } 6219169695Skan 6220169695Skan /* match with collating_symbol? */ 6221169695Skan# ifdef _LIBC 6222169695Skan if (nrules != 0) 6223169695Skan { 6224169695Skan const unsigned char *extra = (const unsigned char *) 6225169695Skan _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB); 6226169695Skan 6227169695Skan for (workp2 = workp + coll_symbol_length ; workp < workp2 ; 6228169695Skan workp++) 6229169695Skan { 6230169695Skan int32_t *wextra; 6231169695Skan wextra = (int32_t*)(extra + *workp++); 6232169695Skan for (i = 0; i < *wextra; ++i) 6233169695Skan if (TRANSLATE(d[i]) != wextra[1 + i]) 6234169695Skan break; 6235169695Skan 6236169695Skan if (i == *wextra) 6237169695Skan { 6238169695Skan /* Update d, however d will be incremented at 6239169695Skan char_set_matched:, we decrement d here. */ 6240169695Skan d += i - 1; 6241169695Skan goto char_set_matched; 6242169695Skan } 6243169695Skan } 6244169695Skan } 6245169695Skan else /* (nrules == 0) */ 6246169695Skan# endif 6247169695Skan /* If we can't look up collation data, we use wcscoll 6248169695Skan instead. */ 6249169695Skan { 6250169695Skan for (workp2 = workp + coll_symbol_length ; workp < workp2 ;) 6251169695Skan { 6252169695Skan const CHAR_T *backup_d = d, *backup_dend = dend; 6253169695Skan# ifdef _LIBC 6254169695Skan length = __wcslen (workp); 6255169695Skan# else 6256169695Skan length = wcslen (workp); 6257169695Skan# endif 6258169695Skan 6259169695Skan /* If wcscoll(the collating symbol, whole string) > 0, 6260169695Skan any substring of the string never match with the 6261169695Skan collating symbol. */ 6262169695Skan# ifdef _LIBC 6263169695Skan if (__wcscoll (workp, d) > 0) 6264169695Skan# else 6265169695Skan if (wcscoll (workp, d) > 0) 6266169695Skan# endif 6267169695Skan { 6268169695Skan workp += length + 1; 6269169695Skan continue; 6270169695Skan } 6271169695Skan 6272169695Skan /* First, we compare the collating symbol with 6273169695Skan the first character of the string. 6274169695Skan If it don't match, we add the next character to 6275169695Skan the compare buffer in turn. */ 6276169695Skan for (i = 0 ; i < WORK_BUFFER_SIZE-1 ; i++, d++) 6277169695Skan { 6278169695Skan int match; 6279169695Skan if (d == dend) 6280169695Skan { 6281169695Skan if (dend == end_match_2) 6282169695Skan break; 6283169695Skan d = string2; 6284169695Skan dend = end_match_2; 6285169695Skan } 6286169695Skan 6287169695Skan /* add next character to the compare buffer. */ 6288169695Skan str_buf[i] = TRANSLATE(*d); 6289169695Skan str_buf[i+1] = '\0'; 6290169695Skan 6291169695Skan# ifdef _LIBC 6292169695Skan match = __wcscoll (workp, str_buf); 6293169695Skan# else 6294169695Skan match = wcscoll (workp, str_buf); 6295169695Skan# endif 6296169695Skan if (match == 0) 6297169695Skan goto char_set_matched; 6298169695Skan 6299169695Skan if (match < 0) 6300169695Skan /* (str_buf > workp) indicate (str_buf + X > workp), 6301169695Skan because for all X (str_buf + X > str_buf). 6302169695Skan So we don't need continue this loop. */ 6303169695Skan break; 6304169695Skan 6305169695Skan /* Otherwise(str_buf < workp), 6306169695Skan (str_buf+next_character) may equals (workp). 6307169695Skan So we continue this loop. */ 6308169695Skan } 6309169695Skan /* not matched */ 6310169695Skan d = backup_d; 6311169695Skan dend = backup_dend; 6312169695Skan workp += length + 1; 6313169695Skan } 6314169695Skan } 6315169695Skan /* match with equivalence_class? */ 6316169695Skan# ifdef _LIBC 6317169695Skan if (nrules != 0) 6318169695Skan { 6319169695Skan const CHAR_T *backup_d = d, *backup_dend = dend; 6320169695Skan /* Try to match the equivalence class against 6321169695Skan those known to the collate implementation. */ 6322169695Skan const int32_t *table; 6323169695Skan const int32_t *weights; 6324169695Skan const int32_t *extra; 6325169695Skan const int32_t *indirect; 6326169695Skan int32_t idx, idx2; 6327169695Skan wint_t *cp; 6328169695Skan size_t len; 6329169695Skan 6330169695Skan /* This #include defines a local function! */ 6331169695Skan# include <locale/weightwc.h> 6332169695Skan 6333169695Skan table = (const int32_t *) 6334169695Skan _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEWC); 6335169695Skan weights = (const wint_t *) 6336169695Skan _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTWC); 6337169695Skan extra = (const wint_t *) 6338169695Skan _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAWC); 6339169695Skan indirect = (const int32_t *) 6340169695Skan _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTWC); 6341169695Skan 6342169695Skan /* Write 1 collating element to str_buf, and 6343169695Skan get its index. */ 6344169695Skan idx2 = 0; 6345169695Skan 6346169695Skan for (i = 0 ; idx2 == 0 && i < WORK_BUFFER_SIZE - 1; i++) 6347169695Skan { 6348169695Skan cp = (wint_t*)str_buf; 6349169695Skan if (d == dend) 6350169695Skan { 6351169695Skan if (dend == end_match_2) 6352169695Skan break; 6353169695Skan d = string2; 6354169695Skan dend = end_match_2; 6355169695Skan } 6356169695Skan str_buf[i] = TRANSLATE(*(d+i)); 6357169695Skan str_buf[i+1] = '\0'; /* sentinel */ 6358169695Skan idx2 = findidx ((const wint_t**)&cp); 6359169695Skan } 6360169695Skan 6361169695Skan /* Update d, however d will be incremented at 6362169695Skan char_set_matched:, we decrement d here. */ 6363169695Skan d = backup_d + ((wchar_t*)cp - (wchar_t*)str_buf - 1); 6364169695Skan if (d >= dend) 6365169695Skan { 6366169695Skan if (dend == end_match_2) 6367169695Skan d = dend; 6368169695Skan else 6369169695Skan { 6370169695Skan d = string2; 6371169695Skan dend = end_match_2; 6372169695Skan } 6373169695Skan } 6374169695Skan 6375169695Skan len = weights[idx2]; 6376169695Skan 6377169695Skan for (workp2 = workp + equiv_class_length ; workp < workp2 ; 6378169695Skan workp++) 6379169695Skan { 6380169695Skan idx = (int32_t)*workp; 6381169695Skan /* We already checked idx != 0 in regex_compile. */ 6382169695Skan 6383169695Skan if (idx2 != 0 && len == weights[idx]) 6384169695Skan { 6385169695Skan int cnt = 0; 6386169695Skan while (cnt < len && (weights[idx + 1 + cnt] 6387169695Skan == weights[idx2 + 1 + cnt])) 6388169695Skan ++cnt; 6389169695Skan 6390169695Skan if (cnt == len) 6391169695Skan goto char_set_matched; 6392169695Skan } 6393169695Skan } 6394169695Skan /* not matched */ 6395169695Skan d = backup_d; 6396169695Skan dend = backup_dend; 6397169695Skan } 6398169695Skan else /* (nrules == 0) */ 6399169695Skan# endif 6400169695Skan /* If we can't look up collation data, we use wcscoll 6401169695Skan instead. */ 6402169695Skan { 6403169695Skan for (workp2 = workp + equiv_class_length ; workp < workp2 ;) 6404169695Skan { 6405169695Skan const CHAR_T *backup_d = d, *backup_dend = dend; 6406169695Skan# ifdef _LIBC 6407169695Skan length = __wcslen (workp); 6408169695Skan# else 6409169695Skan length = wcslen (workp); 6410169695Skan# endif 6411169695Skan 6412169695Skan /* If wcscoll(the collating symbol, whole string) > 0, 6413169695Skan any substring of the string never match with the 6414169695Skan collating symbol. */ 6415169695Skan# ifdef _LIBC 6416169695Skan if (__wcscoll (workp, d) > 0) 6417169695Skan# else 6418169695Skan if (wcscoll (workp, d) > 0) 6419169695Skan# endif 6420169695Skan { 6421169695Skan workp += length + 1; 6422169695Skan break; 6423169695Skan } 6424169695Skan 6425169695Skan /* First, we compare the equivalence class with 6426169695Skan the first character of the string. 6427169695Skan If it don't match, we add the next character to 6428169695Skan the compare buffer in turn. */ 6429169695Skan for (i = 0 ; i < WORK_BUFFER_SIZE - 1 ; i++, d++) 6430169695Skan { 6431169695Skan int match; 6432169695Skan if (d == dend) 6433169695Skan { 6434169695Skan if (dend == end_match_2) 6435169695Skan break; 6436169695Skan d = string2; 6437169695Skan dend = end_match_2; 6438169695Skan } 6439169695Skan 6440169695Skan /* add next character to the compare buffer. */ 6441169695Skan str_buf[i] = TRANSLATE(*d); 6442169695Skan str_buf[i+1] = '\0'; 6443169695Skan 6444169695Skan# ifdef _LIBC 6445169695Skan match = __wcscoll (workp, str_buf); 6446169695Skan# else 6447169695Skan match = wcscoll (workp, str_buf); 6448169695Skan# endif 6449169695Skan 6450169695Skan if (match == 0) 6451169695Skan goto char_set_matched; 6452169695Skan 6453169695Skan if (match < 0) 6454169695Skan /* (str_buf > workp) indicate (str_buf + X > workp), 6455169695Skan because for all X (str_buf + X > str_buf). 6456169695Skan So we don't need continue this loop. */ 6457169695Skan break; 6458169695Skan 6459169695Skan /* Otherwise(str_buf < workp), 6460169695Skan (str_buf+next_character) may equals (workp). 6461169695Skan So we continue this loop. */ 6462169695Skan } 6463169695Skan /* not matched */ 6464169695Skan d = backup_d; 6465169695Skan dend = backup_dend; 6466169695Skan workp += length + 1; 6467169695Skan } 6468169695Skan } 6469169695Skan 6470169695Skan /* match with char_range? */ 6471169695Skan# ifdef _LIBC 6472169695Skan if (nrules != 0) 6473169695Skan { 6474169695Skan uint32_t collseqval; 6475169695Skan const char *collseq = (const char *) 6476169695Skan _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQWC); 6477169695Skan 6478169695Skan collseqval = collseq_table_lookup (collseq, c); 6479169695Skan 6480169695Skan for (; workp < p - chars_length ;) 6481169695Skan { 6482169695Skan uint32_t start_val, end_val; 6483169695Skan 6484169695Skan /* We already compute the collation sequence value 6485169695Skan of the characters (or collating symbols). */ 6486169695Skan start_val = (uint32_t) *workp++; /* range_start */ 6487169695Skan end_val = (uint32_t) *workp++; /* range_end */ 6488169695Skan 6489169695Skan if (start_val <= collseqval && collseqval <= end_val) 6490169695Skan goto char_set_matched; 6491169695Skan } 6492169695Skan } 6493169695Skan else 6494169695Skan# endif 6495169695Skan { 6496169695Skan /* We set range_start_char at str_buf[0], range_end_char 6497169695Skan at str_buf[4], and compared char at str_buf[2]. */ 6498169695Skan str_buf[1] = 0; 6499169695Skan str_buf[2] = c; 6500169695Skan str_buf[3] = 0; 6501169695Skan str_buf[5] = 0; 6502169695Skan for (; workp < p - chars_length ;) 6503169695Skan { 6504169695Skan wchar_t *range_start_char, *range_end_char; 6505169695Skan 6506169695Skan /* match if (range_start_char <= c <= range_end_char). */ 6507169695Skan 6508169695Skan /* If range_start(or end) < 0, we assume -range_start(end) 6509169695Skan is the offset of the collating symbol which is specified 6510169695Skan as the character of the range start(end). */ 6511169695Skan 6512169695Skan /* range_start */ 6513169695Skan if (*workp < 0) 6514169695Skan range_start_char = charset_top - (*workp++); 6515169695Skan else 6516169695Skan { 6517169695Skan str_buf[0] = *workp++; 6518169695Skan range_start_char = str_buf; 6519169695Skan } 6520169695Skan 6521169695Skan /* range_end */ 6522169695Skan if (*workp < 0) 6523169695Skan range_end_char = charset_top - (*workp++); 6524169695Skan else 6525169695Skan { 6526169695Skan str_buf[4] = *workp++; 6527169695Skan range_end_char = str_buf + 4; 6528169695Skan } 6529169695Skan 6530169695Skan# ifdef _LIBC 6531169695Skan if (__wcscoll (range_start_char, str_buf+2) <= 0 6532169695Skan && __wcscoll (str_buf+2, range_end_char) <= 0) 6533169695Skan# else 6534169695Skan if (wcscoll (range_start_char, str_buf+2) <= 0 6535169695Skan && wcscoll (str_buf+2, range_end_char) <= 0) 6536169695Skan# endif 6537169695Skan goto char_set_matched; 6538169695Skan } 6539169695Skan } 6540169695Skan 6541169695Skan /* match with char? */ 6542169695Skan for (; workp < p ; workp++) 6543169695Skan if (c == *workp) 6544169695Skan goto char_set_matched; 6545169695Skan 6546169695Skan negate = !negate; 6547169695Skan 6548169695Skan char_set_matched: 6549169695Skan if (negate) goto fail; 6550169695Skan#else 6551169695Skan /* Cast to `unsigned' instead of `unsigned char' in case the 6552169695Skan bit list is a full 32 bytes long. */ 6553169695Skan if (c < (unsigned) (*p * BYTEWIDTH) 6554169695Skan && p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH))) 6555169695Skan negate = !negate; 6556169695Skan 6557169695Skan p += 1 + *p; 6558169695Skan 6559169695Skan if (!negate) goto fail; 6560169695Skan#undef WORK_BUFFER_SIZE 6561169695Skan#endif /* WCHAR */ 6562169695Skan SET_REGS_MATCHED (); 6563169695Skan d++; 6564169695Skan break; 6565169695Skan } 6566169695Skan 6567169695Skan 6568169695Skan /* The beginning of a group is represented by start_memory. 6569169695Skan The arguments are the register number in the next byte, and the 6570169695Skan number of groups inner to this one in the next. The text 6571169695Skan matched within the group is recorded (in the internal 6572169695Skan registers data structure) under the register number. */ 6573169695Skan case start_memory: 6574169695Skan DEBUG_PRINT3 ("EXECUTING start_memory %ld (%ld):\n", 6575169695Skan (long int) *p, (long int) p[1]); 6576169695Skan 6577169695Skan /* Find out if this group can match the empty string. */ 6578169695Skan p1 = p; /* To send to group_match_null_string_p. */ 6579169695Skan 6580169695Skan if (REG_MATCH_NULL_STRING_P (reg_info[*p]) == MATCH_NULL_UNSET_VALUE) 6581169695Skan REG_MATCH_NULL_STRING_P (reg_info[*p]) 6582169695Skan = PREFIX(group_match_null_string_p) (&p1, pend, reg_info); 6583169695Skan 6584169695Skan /* Save the position in the string where we were the last time 6585169695Skan we were at this open-group operator in case the group is 6586169695Skan operated upon by a repetition operator, e.g., with `(a*)*b' 6587169695Skan against `ab'; then we want to ignore where we are now in 6588169695Skan the string in case this attempt to match fails. */ 6589169695Skan old_regstart[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p]) 6590169695Skan ? REG_UNSET (regstart[*p]) ? d : regstart[*p] 6591169695Skan : regstart[*p]; 6592169695Skan DEBUG_PRINT2 (" old_regstart: %d\n", 6593169695Skan POINTER_TO_OFFSET (old_regstart[*p])); 6594169695Skan 6595169695Skan regstart[*p] = d; 6596169695Skan DEBUG_PRINT2 (" regstart: %d\n", POINTER_TO_OFFSET (regstart[*p])); 6597169695Skan 6598169695Skan IS_ACTIVE (reg_info[*p]) = 1; 6599169695Skan MATCHED_SOMETHING (reg_info[*p]) = 0; 6600169695Skan 6601169695Skan /* Clear this whenever we change the register activity status. */ 6602169695Skan set_regs_matched_done = 0; 6603169695Skan 6604169695Skan /* This is the new highest active register. */ 6605169695Skan highest_active_reg = *p; 6606169695Skan 6607169695Skan /* If nothing was active before, this is the new lowest active 6608169695Skan register. */ 6609169695Skan if (lowest_active_reg == NO_LOWEST_ACTIVE_REG) 6610169695Skan lowest_active_reg = *p; 6611169695Skan 6612169695Skan /* Move past the register number and inner group count. */ 6613169695Skan p += 2; 6614169695Skan just_past_start_mem = p; 6615169695Skan 6616169695Skan break; 6617169695Skan 6618169695Skan 6619169695Skan /* The stop_memory opcode represents the end of a group. Its 6620169695Skan arguments are the same as start_memory's: the register 6621169695Skan number, and the number of inner groups. */ 6622169695Skan case stop_memory: 6623169695Skan DEBUG_PRINT3 ("EXECUTING stop_memory %ld (%ld):\n", 6624169695Skan (long int) *p, (long int) p[1]); 6625169695Skan 6626169695Skan /* We need to save the string position the last time we were at 6627169695Skan this close-group operator in case the group is operated 6628169695Skan upon by a repetition operator, e.g., with `((a*)*(b*)*)*' 6629169695Skan against `aba'; then we want to ignore where we are now in 6630169695Skan the string in case this attempt to match fails. */ 6631169695Skan old_regend[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p]) 6632169695Skan ? REG_UNSET (regend[*p]) ? d : regend[*p] 6633169695Skan : regend[*p]; 6634169695Skan DEBUG_PRINT2 (" old_regend: %d\n", 6635169695Skan POINTER_TO_OFFSET (old_regend[*p])); 6636169695Skan 6637169695Skan regend[*p] = d; 6638169695Skan DEBUG_PRINT2 (" regend: %d\n", POINTER_TO_OFFSET (regend[*p])); 6639169695Skan 6640169695Skan /* This register isn't active anymore. */ 6641169695Skan IS_ACTIVE (reg_info[*p]) = 0; 6642169695Skan 6643169695Skan /* Clear this whenever we change the register activity status. */ 6644169695Skan set_regs_matched_done = 0; 6645169695Skan 6646169695Skan /* If this was the only register active, nothing is active 6647169695Skan anymore. */ 6648169695Skan if (lowest_active_reg == highest_active_reg) 6649169695Skan { 6650169695Skan lowest_active_reg = NO_LOWEST_ACTIVE_REG; 6651169695Skan highest_active_reg = NO_HIGHEST_ACTIVE_REG; 6652169695Skan } 6653169695Skan else 6654169695Skan { /* We must scan for the new highest active register, since 6655169695Skan it isn't necessarily one less than now: consider 6656169695Skan (a(b)c(d(e)f)g). When group 3 ends, after the f), the 6657169695Skan new highest active register is 1. */ 6658169695Skan UCHAR_T r = *p - 1; 6659169695Skan while (r > 0 && !IS_ACTIVE (reg_info[r])) 6660169695Skan r--; 6661169695Skan 6662169695Skan /* If we end up at register zero, that means that we saved 6663169695Skan the registers as the result of an `on_failure_jump', not 6664169695Skan a `start_memory', and we jumped to past the innermost 6665169695Skan `stop_memory'. For example, in ((.)*) we save 6666169695Skan registers 1 and 2 as a result of the *, but when we pop 6667169695Skan back to the second ), we are at the stop_memory 1. 6668169695Skan Thus, nothing is active. */ 6669169695Skan if (r == 0) 6670169695Skan { 6671169695Skan lowest_active_reg = NO_LOWEST_ACTIVE_REG; 6672169695Skan highest_active_reg = NO_HIGHEST_ACTIVE_REG; 6673169695Skan } 6674169695Skan else 6675169695Skan highest_active_reg = r; 6676169695Skan } 6677169695Skan 6678169695Skan /* If just failed to match something this time around with a 6679169695Skan group that's operated on by a repetition operator, try to 6680169695Skan force exit from the ``loop'', and restore the register 6681169695Skan information for this group that we had before trying this 6682169695Skan last match. */ 6683169695Skan if ((!MATCHED_SOMETHING (reg_info[*p]) 6684169695Skan || just_past_start_mem == p - 1) 6685169695Skan && (p + 2) < pend) 6686169695Skan { 6687169695Skan boolean is_a_jump_n = false; 6688169695Skan 6689169695Skan p1 = p + 2; 6690169695Skan mcnt = 0; 6691169695Skan switch ((re_opcode_t) *p1++) 6692169695Skan { 6693169695Skan case jump_n: 6694169695Skan is_a_jump_n = true; 6695169695Skan case pop_failure_jump: 6696169695Skan case maybe_pop_jump: 6697169695Skan case jump: 6698169695Skan case dummy_failure_jump: 6699169695Skan EXTRACT_NUMBER_AND_INCR (mcnt, p1); 6700169695Skan if (is_a_jump_n) 6701169695Skan p1 += OFFSET_ADDRESS_SIZE; 6702169695Skan break; 6703169695Skan 6704169695Skan default: 6705169695Skan /* do nothing */ ; 6706169695Skan } 6707169695Skan p1 += mcnt; 6708169695Skan 6709169695Skan /* If the next operation is a jump backwards in the pattern 6710169695Skan to an on_failure_jump right before the start_memory 6711169695Skan corresponding to this stop_memory, exit from the loop 6712169695Skan by forcing a failure after pushing on the stack the 6713169695Skan on_failure_jump's jump in the pattern, and d. */ 6714169695Skan if (mcnt < 0 && (re_opcode_t) *p1 == on_failure_jump 6715169695Skan && (re_opcode_t) p1[1+OFFSET_ADDRESS_SIZE] == start_memory 6716169695Skan && p1[2+OFFSET_ADDRESS_SIZE] == *p) 6717169695Skan { 6718169695Skan /* If this group ever matched anything, then restore 6719169695Skan what its registers were before trying this last 6720169695Skan failed match, e.g., with `(a*)*b' against `ab' for 6721169695Skan regstart[1], and, e.g., with `((a*)*(b*)*)*' 6722169695Skan against `aba' for regend[3]. 6723169695Skan 6724169695Skan Also restore the registers for inner groups for, 6725169695Skan e.g., `((a*)(b*))*' against `aba' (register 3 would 6726169695Skan otherwise get trashed). */ 6727169695Skan 6728169695Skan if (EVER_MATCHED_SOMETHING (reg_info[*p])) 6729169695Skan { 6730169695Skan unsigned r; 6731169695Skan 6732169695Skan EVER_MATCHED_SOMETHING (reg_info[*p]) = 0; 6733169695Skan 6734169695Skan /* Restore this and inner groups' (if any) registers. */ 6735169695Skan for (r = *p; r < (unsigned) *p + (unsigned) *(p + 1); 6736169695Skan r++) 6737169695Skan { 6738169695Skan regstart[r] = old_regstart[r]; 6739169695Skan 6740169695Skan /* xx why this test? */ 6741169695Skan if (old_regend[r] >= regstart[r]) 6742169695Skan regend[r] = old_regend[r]; 6743169695Skan } 6744169695Skan } 6745169695Skan p1++; 6746169695Skan EXTRACT_NUMBER_AND_INCR (mcnt, p1); 6747169695Skan PUSH_FAILURE_POINT (p1 + mcnt, d, -2); 6748169695Skan 6749169695Skan goto fail; 6750169695Skan } 6751169695Skan } 6752169695Skan 6753169695Skan /* Move past the register number and the inner group count. */ 6754169695Skan p += 2; 6755169695Skan break; 6756169695Skan 6757169695Skan 6758169695Skan /* \<digit> has been turned into a `duplicate' command which is 6759169695Skan followed by the numeric value of <digit> as the register number. */ 6760169695Skan case duplicate: 6761169695Skan { 6762169695Skan register const CHAR_T *d2, *dend2; 6763169695Skan int regno = *p++; /* Get which register to match against. */ 6764169695Skan DEBUG_PRINT2 ("EXECUTING duplicate %d.\n", regno); 6765169695Skan 6766169695Skan /* Can't back reference a group which we've never matched. */ 6767169695Skan if (REG_UNSET (regstart[regno]) || REG_UNSET (regend[regno])) 6768169695Skan goto fail; 6769169695Skan 6770169695Skan /* Where in input to try to start matching. */ 6771169695Skan d2 = regstart[regno]; 6772169695Skan 6773169695Skan /* Where to stop matching; if both the place to start and 6774169695Skan the place to stop matching are in the same string, then 6775169695Skan set to the place to stop, otherwise, for now have to use 6776169695Skan the end of the first string. */ 6777169695Skan 6778169695Skan dend2 = ((FIRST_STRING_P (regstart[regno]) 6779169695Skan == FIRST_STRING_P (regend[regno])) 6780169695Skan ? regend[regno] : end_match_1); 6781169695Skan for (;;) 6782169695Skan { 6783169695Skan /* If necessary, advance to next segment in register 6784169695Skan contents. */ 6785169695Skan while (d2 == dend2) 6786169695Skan { 6787169695Skan if (dend2 == end_match_2) break; 6788169695Skan if (dend2 == regend[regno]) break; 6789169695Skan 6790169695Skan /* End of string1 => advance to string2. */ 6791169695Skan d2 = string2; 6792169695Skan dend2 = regend[regno]; 6793169695Skan } 6794169695Skan /* At end of register contents => success */ 6795169695Skan if (d2 == dend2) break; 6796169695Skan 6797169695Skan /* If necessary, advance to next segment in data. */ 6798169695Skan PREFETCH (); 6799169695Skan 6800169695Skan /* How many characters left in this segment to match. */ 6801169695Skan mcnt = dend - d; 6802169695Skan 6803169695Skan /* Want how many consecutive characters we can match in 6804169695Skan one shot, so, if necessary, adjust the count. */ 6805169695Skan if (mcnt > dend2 - d2) 6806169695Skan mcnt = dend2 - d2; 6807169695Skan 6808169695Skan /* Compare that many; failure if mismatch, else move 6809169695Skan past them. */ 6810169695Skan if (translate 6811169695Skan ? PREFIX(bcmp_translate) (d, d2, mcnt, translate) 6812169695Skan : memcmp (d, d2, mcnt*sizeof(UCHAR_T))) 6813169695Skan goto fail; 6814169695Skan d += mcnt, d2 += mcnt; 6815169695Skan 6816169695Skan /* Do this because we've match some characters. */ 6817169695Skan SET_REGS_MATCHED (); 6818169695Skan } 6819169695Skan } 6820169695Skan break; 6821169695Skan 6822169695Skan 6823169695Skan /* begline matches the empty string at the beginning of the string 6824169695Skan (unless `not_bol' is set in `bufp'), and, if 6825169695Skan `newline_anchor' is set, after newlines. */ 6826169695Skan case begline: 6827169695Skan DEBUG_PRINT1 ("EXECUTING begline.\n"); 6828169695Skan 6829169695Skan if (AT_STRINGS_BEG (d)) 6830169695Skan { 6831169695Skan if (!bufp->not_bol) break; 6832169695Skan } 6833169695Skan else if (d[-1] == '\n' && bufp->newline_anchor) 6834169695Skan { 6835169695Skan break; 6836169695Skan } 6837169695Skan /* In all other cases, we fail. */ 6838169695Skan goto fail; 6839169695Skan 6840169695Skan 6841169695Skan /* endline is the dual of begline. */ 6842169695Skan case endline: 6843169695Skan DEBUG_PRINT1 ("EXECUTING endline.\n"); 6844169695Skan 6845169695Skan if (AT_STRINGS_END (d)) 6846169695Skan { 6847169695Skan if (!bufp->not_eol) break; 6848169695Skan } 6849169695Skan 6850169695Skan /* We have to ``prefetch'' the next character. */ 6851169695Skan else if ((d == end1 ? *string2 : *d) == '\n' 6852169695Skan && bufp->newline_anchor) 6853169695Skan { 6854169695Skan break; 6855169695Skan } 6856169695Skan goto fail; 6857169695Skan 6858169695Skan 6859169695Skan /* Match at the very beginning of the data. */ 6860169695Skan case begbuf: 6861169695Skan DEBUG_PRINT1 ("EXECUTING begbuf.\n"); 6862169695Skan if (AT_STRINGS_BEG (d)) 6863169695Skan break; 6864169695Skan goto fail; 6865169695Skan 6866169695Skan 6867169695Skan /* Match at the very end of the data. */ 6868169695Skan case endbuf: 6869169695Skan DEBUG_PRINT1 ("EXECUTING endbuf.\n"); 6870169695Skan if (AT_STRINGS_END (d)) 6871169695Skan break; 6872169695Skan goto fail; 6873169695Skan 6874169695Skan 6875169695Skan /* on_failure_keep_string_jump is used to optimize `.*\n'. It 6876169695Skan pushes NULL as the value for the string on the stack. Then 6877169695Skan `pop_failure_point' will keep the current value for the 6878169695Skan string, instead of restoring it. To see why, consider 6879169695Skan matching `foo\nbar' against `.*\n'. The .* matches the foo; 6880169695Skan then the . fails against the \n. But the next thing we want 6881169695Skan to do is match the \n against the \n; if we restored the 6882169695Skan string value, we would be back at the foo. 6883169695Skan 6884169695Skan Because this is used only in specific cases, we don't need to 6885169695Skan check all the things that `on_failure_jump' does, to make 6886169695Skan sure the right things get saved on the stack. Hence we don't 6887169695Skan share its code. The only reason to push anything on the 6888169695Skan stack at all is that otherwise we would have to change 6889169695Skan `anychar's code to do something besides goto fail in this 6890169695Skan case; that seems worse than this. */ 6891169695Skan case on_failure_keep_string_jump: 6892169695Skan DEBUG_PRINT1 ("EXECUTING on_failure_keep_string_jump"); 6893169695Skan 6894169695Skan EXTRACT_NUMBER_AND_INCR (mcnt, p); 6895169695Skan#ifdef _LIBC 6896169695Skan DEBUG_PRINT3 (" %d (to %p):\n", mcnt, p + mcnt); 6897169695Skan#else 6898169695Skan DEBUG_PRINT3 (" %d (to 0x%x):\n", mcnt, p + mcnt); 6899169695Skan#endif 6900169695Skan 6901169695Skan PUSH_FAILURE_POINT (p + mcnt, NULL, -2); 6902169695Skan break; 6903169695Skan 6904169695Skan 6905169695Skan /* Uses of on_failure_jump: 6906169695Skan 6907169695Skan Each alternative starts with an on_failure_jump that points 6908169695Skan to the beginning of the next alternative. Each alternative 6909169695Skan except the last ends with a jump that in effect jumps past 6910169695Skan the rest of the alternatives. (They really jump to the 6911169695Skan ending jump of the following alternative, because tensioning 6912169695Skan these jumps is a hassle.) 6913169695Skan 6914169695Skan Repeats start with an on_failure_jump that points past both 6915169695Skan the repetition text and either the following jump or 6916169695Skan pop_failure_jump back to this on_failure_jump. */ 6917169695Skan case on_failure_jump: 6918169695Skan on_failure: 6919169695Skan DEBUG_PRINT1 ("EXECUTING on_failure_jump"); 6920169695Skan 6921169695Skan EXTRACT_NUMBER_AND_INCR (mcnt, p); 6922169695Skan#ifdef _LIBC 6923169695Skan DEBUG_PRINT3 (" %d (to %p)", mcnt, p + mcnt); 6924169695Skan#else 6925169695Skan DEBUG_PRINT3 (" %d (to 0x%x)", mcnt, p + mcnt); 6926169695Skan#endif 6927169695Skan 6928169695Skan /* If this on_failure_jump comes right before a group (i.e., 6929169695Skan the original * applied to a group), save the information 6930169695Skan for that group and all inner ones, so that if we fail back 6931169695Skan to this point, the group's information will be correct. 6932169695Skan For example, in \(a*\)*\1, we need the preceding group, 6933169695Skan and in \(zz\(a*\)b*\)\2, we need the inner group. */ 6934169695Skan 6935169695Skan /* We can't use `p' to check ahead because we push 6936169695Skan a failure point to `p + mcnt' after we do this. */ 6937169695Skan p1 = p; 6938169695Skan 6939169695Skan /* We need to skip no_op's before we look for the 6940169695Skan start_memory in case this on_failure_jump is happening as 6941169695Skan the result of a completed succeed_n, as in \(a\)\{1,3\}b\1 6942169695Skan against aba. */ 6943169695Skan while (p1 < pend && (re_opcode_t) *p1 == no_op) 6944169695Skan p1++; 6945169695Skan 6946169695Skan if (p1 < pend && (re_opcode_t) *p1 == start_memory) 6947169695Skan { 6948169695Skan /* We have a new highest active register now. This will 6949169695Skan get reset at the start_memory we are about to get to, 6950169695Skan but we will have saved all the registers relevant to 6951169695Skan this repetition op, as described above. */ 6952169695Skan highest_active_reg = *(p1 + 1) + *(p1 + 2); 6953169695Skan if (lowest_active_reg == NO_LOWEST_ACTIVE_REG) 6954169695Skan lowest_active_reg = *(p1 + 1); 6955169695Skan } 6956169695Skan 6957169695Skan DEBUG_PRINT1 (":\n"); 6958169695Skan PUSH_FAILURE_POINT (p + mcnt, d, -2); 6959169695Skan break; 6960169695Skan 6961169695Skan 6962169695Skan /* A smart repeat ends with `maybe_pop_jump'. 6963169695Skan We change it to either `pop_failure_jump' or `jump'. */ 6964169695Skan case maybe_pop_jump: 6965169695Skan EXTRACT_NUMBER_AND_INCR (mcnt, p); 6966169695Skan DEBUG_PRINT2 ("EXECUTING maybe_pop_jump %d.\n", mcnt); 6967169695Skan { 6968169695Skan register UCHAR_T *p2 = p; 6969169695Skan 6970169695Skan /* Compare the beginning of the repeat with what in the 6971169695Skan pattern follows its end. If we can establish that there 6972169695Skan is nothing that they would both match, i.e., that we 6973169695Skan would have to backtrack because of (as in, e.g., `a*a') 6974169695Skan then we can change to pop_failure_jump, because we'll 6975169695Skan never have to backtrack. 6976169695Skan 6977169695Skan This is not true in the case of alternatives: in 6978169695Skan `(a|ab)*' we do need to backtrack to the `ab' alternative 6979169695Skan (e.g., if the string was `ab'). But instead of trying to 6980169695Skan detect that here, the alternative has put on a dummy 6981169695Skan failure point which is what we will end up popping. */ 6982169695Skan 6983169695Skan /* Skip over open/close-group commands. 6984169695Skan If what follows this loop is a ...+ construct, 6985169695Skan look at what begins its body, since we will have to 6986169695Skan match at least one of that. */ 6987169695Skan while (1) 6988169695Skan { 6989169695Skan if (p2 + 2 < pend 6990169695Skan && ((re_opcode_t) *p2 == stop_memory 6991169695Skan || (re_opcode_t) *p2 == start_memory)) 6992169695Skan p2 += 3; 6993169695Skan else if (p2 + 2 + 2 * OFFSET_ADDRESS_SIZE < pend 6994169695Skan && (re_opcode_t) *p2 == dummy_failure_jump) 6995169695Skan p2 += 2 + 2 * OFFSET_ADDRESS_SIZE; 6996169695Skan else 6997169695Skan break; 6998169695Skan } 6999169695Skan 7000169695Skan p1 = p + mcnt; 7001169695Skan /* p1[0] ... p1[2] are the `on_failure_jump' corresponding 7002169695Skan to the `maybe_finalize_jump' of this case. Examine what 7003169695Skan follows. */ 7004169695Skan 7005169695Skan /* If we're at the end of the pattern, we can change. */ 7006169695Skan if (p2 == pend) 7007169695Skan { 7008169695Skan /* Consider what happens when matching ":\(.*\)" 7009169695Skan against ":/". I don't really understand this code 7010169695Skan yet. */ 7011169695Skan p[-(1+OFFSET_ADDRESS_SIZE)] = (UCHAR_T) 7012169695Skan pop_failure_jump; 7013169695Skan DEBUG_PRINT1 7014169695Skan (" End of pattern: change to `pop_failure_jump'.\n"); 7015169695Skan } 7016169695Skan 7017169695Skan else if ((re_opcode_t) *p2 == exactn 7018169695Skan#ifdef MBS_SUPPORT 7019169695Skan || (re_opcode_t) *p2 == exactn_bin 7020169695Skan#endif 7021169695Skan || (bufp->newline_anchor && (re_opcode_t) *p2 == endline)) 7022169695Skan { 7023169695Skan register UCHAR_T c 7024169695Skan = *p2 == (UCHAR_T) endline ? '\n' : p2[2]; 7025169695Skan 7026169695Skan if (((re_opcode_t) p1[1+OFFSET_ADDRESS_SIZE] == exactn 7027169695Skan#ifdef MBS_SUPPORT 7028169695Skan || (re_opcode_t) p1[1+OFFSET_ADDRESS_SIZE] == exactn_bin 7029169695Skan#endif 7030169695Skan ) && p1[3+OFFSET_ADDRESS_SIZE] != c) 7031169695Skan { 7032169695Skan p[-(1+OFFSET_ADDRESS_SIZE)] = (UCHAR_T) 7033169695Skan pop_failure_jump; 7034169695Skan#ifdef WCHAR 7035169695Skan DEBUG_PRINT3 (" %C != %C => pop_failure_jump.\n", 7036169695Skan (wint_t) c, 7037169695Skan (wint_t) p1[3+OFFSET_ADDRESS_SIZE]); 7038169695Skan#else 7039169695Skan DEBUG_PRINT3 (" %c != %c => pop_failure_jump.\n", 7040169695Skan (char) c, 7041169695Skan (char) p1[3+OFFSET_ADDRESS_SIZE]); 7042169695Skan#endif 7043169695Skan } 7044169695Skan 7045169695Skan#ifndef WCHAR 7046169695Skan else if ((re_opcode_t) p1[3] == charset 7047169695Skan || (re_opcode_t) p1[3] == charset_not) 7048169695Skan { 7049169695Skan int negate = (re_opcode_t) p1[3] == charset_not; 7050169695Skan 7051169695Skan if (c < (unsigned) (p1[4] * BYTEWIDTH) 7052169695Skan && p1[5 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH))) 7053169695Skan negate = !negate; 7054169695Skan 7055169695Skan /* `negate' is equal to 1 if c would match, which means 7056169695Skan that we can't change to pop_failure_jump. */ 7057169695Skan if (!negate) 7058169695Skan { 7059169695Skan p[-3] = (unsigned char) pop_failure_jump; 7060169695Skan DEBUG_PRINT1 (" No match => pop_failure_jump.\n"); 7061169695Skan } 7062169695Skan } 7063169695Skan#endif /* not WCHAR */ 7064169695Skan } 7065169695Skan#ifndef WCHAR 7066169695Skan else if ((re_opcode_t) *p2 == charset) 7067169695Skan { 7068169695Skan /* We win if the first character of the loop is not part 7069169695Skan of the charset. */ 7070169695Skan if ((re_opcode_t) p1[3] == exactn 7071169695Skan && ! ((int) p2[1] * BYTEWIDTH > (int) p1[5] 7072169695Skan && (p2[2 + p1[5] / BYTEWIDTH] 7073169695Skan & (1 << (p1[5] % BYTEWIDTH))))) 7074169695Skan { 7075169695Skan p[-3] = (unsigned char) pop_failure_jump; 7076169695Skan DEBUG_PRINT1 (" No match => pop_failure_jump.\n"); 7077169695Skan } 7078169695Skan 7079169695Skan else if ((re_opcode_t) p1[3] == charset_not) 7080169695Skan { 7081169695Skan int idx; 7082169695Skan /* We win if the charset_not inside the loop 7083169695Skan lists every character listed in the charset after. */ 7084169695Skan for (idx = 0; idx < (int) p2[1]; idx++) 7085169695Skan if (! (p2[2 + idx] == 0 7086169695Skan || (idx < (int) p1[4] 7087169695Skan && ((p2[2 + idx] & ~ p1[5 + idx]) == 0)))) 7088169695Skan break; 7089169695Skan 7090169695Skan if (idx == p2[1]) 7091169695Skan { 7092169695Skan p[-3] = (unsigned char) pop_failure_jump; 7093169695Skan DEBUG_PRINT1 (" No match => pop_failure_jump.\n"); 7094169695Skan } 7095169695Skan } 7096169695Skan else if ((re_opcode_t) p1[3] == charset) 7097169695Skan { 7098169695Skan int idx; 7099169695Skan /* We win if the charset inside the loop 7100169695Skan has no overlap with the one after the loop. */ 7101169695Skan for (idx = 0; 7102169695Skan idx < (int) p2[1] && idx < (int) p1[4]; 7103169695Skan idx++) 7104169695Skan if ((p2[2 + idx] & p1[5 + idx]) != 0) 7105169695Skan break; 7106169695Skan 7107169695Skan if (idx == p2[1] || idx == p1[4]) 7108169695Skan { 7109169695Skan p[-3] = (unsigned char) pop_failure_jump; 7110169695Skan DEBUG_PRINT1 (" No match => pop_failure_jump.\n"); 7111169695Skan } 7112169695Skan } 7113169695Skan } 7114169695Skan#endif /* not WCHAR */ 7115169695Skan } 7116169695Skan p -= OFFSET_ADDRESS_SIZE; /* Point at relative address again. */ 7117169695Skan if ((re_opcode_t) p[-1] != pop_failure_jump) 7118169695Skan { 7119169695Skan p[-1] = (UCHAR_T) jump; 7120169695Skan DEBUG_PRINT1 (" Match => jump.\n"); 7121169695Skan goto unconditional_jump; 7122169695Skan } 7123169695Skan /* Note fall through. */ 7124169695Skan 7125169695Skan 7126169695Skan /* The end of a simple repeat has a pop_failure_jump back to 7127169695Skan its matching on_failure_jump, where the latter will push a 7128169695Skan failure point. The pop_failure_jump takes off failure 7129169695Skan points put on by this pop_failure_jump's matching 7130169695Skan on_failure_jump; we got through the pattern to here from the 7131169695Skan matching on_failure_jump, so didn't fail. */ 7132169695Skan case pop_failure_jump: 7133169695Skan { 7134169695Skan /* We need to pass separate storage for the lowest and 7135169695Skan highest registers, even though we don't care about the 7136169695Skan actual values. Otherwise, we will restore only one 7137169695Skan register from the stack, since lowest will == highest in 7138169695Skan `pop_failure_point'. */ 7139169695Skan active_reg_t dummy_low_reg, dummy_high_reg; 7140169695Skan UCHAR_T *pdummy = NULL; 7141169695Skan const CHAR_T *sdummy = NULL; 7142169695Skan 7143169695Skan DEBUG_PRINT1 ("EXECUTING pop_failure_jump.\n"); 7144169695Skan POP_FAILURE_POINT (sdummy, pdummy, 7145169695Skan dummy_low_reg, dummy_high_reg, 7146169695Skan reg_dummy, reg_dummy, reg_info_dummy); 7147169695Skan } 7148169695Skan /* Note fall through. */ 7149169695Skan 7150169695Skan unconditional_jump: 7151169695Skan#ifdef _LIBC 7152169695Skan DEBUG_PRINT2 ("\n%p: ", p); 7153169695Skan#else 7154169695Skan DEBUG_PRINT2 ("\n0x%x: ", p); 7155169695Skan#endif 7156169695Skan /* Note fall through. */ 7157169695Skan 7158169695Skan /* Unconditionally jump (without popping any failure points). */ 7159169695Skan case jump: 7160169695Skan EXTRACT_NUMBER_AND_INCR (mcnt, p); /* Get the amount to jump. */ 7161169695Skan DEBUG_PRINT2 ("EXECUTING jump %d ", mcnt); 7162169695Skan p += mcnt; /* Do the jump. */ 7163169695Skan#ifdef _LIBC 7164169695Skan DEBUG_PRINT2 ("(to %p).\n", p); 7165169695Skan#else 7166169695Skan DEBUG_PRINT2 ("(to 0x%x).\n", p); 7167169695Skan#endif 7168169695Skan break; 7169169695Skan 7170169695Skan 7171169695Skan /* We need this opcode so we can detect where alternatives end 7172169695Skan in `group_match_null_string_p' et al. */ 7173169695Skan case jump_past_alt: 7174169695Skan DEBUG_PRINT1 ("EXECUTING jump_past_alt.\n"); 7175169695Skan goto unconditional_jump; 7176169695Skan 7177169695Skan 7178169695Skan /* Normally, the on_failure_jump pushes a failure point, which 7179169695Skan then gets popped at pop_failure_jump. We will end up at 7180169695Skan pop_failure_jump, also, and with a pattern of, say, `a+', we 7181169695Skan are skipping over the on_failure_jump, so we have to push 7182169695Skan something meaningless for pop_failure_jump to pop. */ 7183169695Skan case dummy_failure_jump: 7184169695Skan DEBUG_PRINT1 ("EXECUTING dummy_failure_jump.\n"); 7185169695Skan /* It doesn't matter what we push for the string here. What 7186169695Skan the code at `fail' tests is the value for the pattern. */ 7187169695Skan PUSH_FAILURE_POINT (NULL, NULL, -2); 7188169695Skan goto unconditional_jump; 7189169695Skan 7190169695Skan 7191169695Skan /* At the end of an alternative, we need to push a dummy failure 7192169695Skan point in case we are followed by a `pop_failure_jump', because 7193169695Skan we don't want the failure point for the alternative to be 7194169695Skan popped. For example, matching `(a|ab)*' against `aab' 7195169695Skan requires that we match the `ab' alternative. */ 7196169695Skan case push_dummy_failure: 7197169695Skan DEBUG_PRINT1 ("EXECUTING push_dummy_failure.\n"); 7198169695Skan /* See comments just above at `dummy_failure_jump' about the 7199169695Skan two zeroes. */ 7200169695Skan PUSH_FAILURE_POINT (NULL, NULL, -2); 7201169695Skan break; 7202169695Skan 7203169695Skan /* Have to succeed matching what follows at least n times. 7204169695Skan After that, handle like `on_failure_jump'. */ 7205169695Skan case succeed_n: 7206169695Skan EXTRACT_NUMBER (mcnt, p + OFFSET_ADDRESS_SIZE); 7207169695Skan DEBUG_PRINT2 ("EXECUTING succeed_n %d.\n", mcnt); 7208169695Skan 7209169695Skan assert (mcnt >= 0); 7210169695Skan /* Originally, this is how many times we HAVE to succeed. */ 7211169695Skan if (mcnt > 0) 7212169695Skan { 7213169695Skan mcnt--; 7214169695Skan p += OFFSET_ADDRESS_SIZE; 7215169695Skan STORE_NUMBER_AND_INCR (p, mcnt); 7216169695Skan#ifdef _LIBC 7217169695Skan DEBUG_PRINT3 (" Setting %p to %d.\n", p - OFFSET_ADDRESS_SIZE 7218169695Skan , mcnt); 7219169695Skan#else 7220169695Skan DEBUG_PRINT3 (" Setting 0x%x to %d.\n", p - OFFSET_ADDRESS_SIZE 7221169695Skan , mcnt); 7222169695Skan#endif 7223169695Skan } 7224169695Skan else if (mcnt == 0) 7225169695Skan { 7226169695Skan#ifdef _LIBC 7227169695Skan DEBUG_PRINT2 (" Setting two bytes from %p to no_op.\n", 7228169695Skan p + OFFSET_ADDRESS_SIZE); 7229169695Skan#else 7230169695Skan DEBUG_PRINT2 (" Setting two bytes from 0x%x to no_op.\n", 7231169695Skan p + OFFSET_ADDRESS_SIZE); 7232169695Skan#endif /* _LIBC */ 7233169695Skan 7234169695Skan#ifdef WCHAR 7235169695Skan p[1] = (UCHAR_T) no_op; 7236169695Skan#else 7237169695Skan p[2] = (UCHAR_T) no_op; 7238169695Skan p[3] = (UCHAR_T) no_op; 7239169695Skan#endif /* WCHAR */ 7240169695Skan goto on_failure; 7241169695Skan } 7242169695Skan break; 7243169695Skan 7244169695Skan case jump_n: 7245169695Skan EXTRACT_NUMBER (mcnt, p + OFFSET_ADDRESS_SIZE); 7246169695Skan DEBUG_PRINT2 ("EXECUTING jump_n %d.\n", mcnt); 7247169695Skan 7248169695Skan /* Originally, this is how many times we CAN jump. */ 7249169695Skan if (mcnt) 7250169695Skan { 7251169695Skan mcnt--; 7252169695Skan STORE_NUMBER (p + OFFSET_ADDRESS_SIZE, mcnt); 7253169695Skan 7254169695Skan#ifdef _LIBC 7255169695Skan DEBUG_PRINT3 (" Setting %p to %d.\n", p + OFFSET_ADDRESS_SIZE, 7256169695Skan mcnt); 7257169695Skan#else 7258169695Skan DEBUG_PRINT3 (" Setting 0x%x to %d.\n", p + OFFSET_ADDRESS_SIZE, 7259169695Skan mcnt); 7260169695Skan#endif /* _LIBC */ 7261169695Skan goto unconditional_jump; 7262169695Skan } 7263169695Skan /* If don't have to jump any more, skip over the rest of command. */ 7264169695Skan else 7265169695Skan p += 2 * OFFSET_ADDRESS_SIZE; 7266169695Skan break; 7267169695Skan 7268169695Skan case set_number_at: 7269169695Skan { 7270169695Skan DEBUG_PRINT1 ("EXECUTING set_number_at.\n"); 7271169695Skan 7272169695Skan EXTRACT_NUMBER_AND_INCR (mcnt, p); 7273169695Skan p1 = p + mcnt; 7274169695Skan EXTRACT_NUMBER_AND_INCR (mcnt, p); 7275169695Skan#ifdef _LIBC 7276169695Skan DEBUG_PRINT3 (" Setting %p to %d.\n", p1, mcnt); 7277169695Skan#else 7278169695Skan DEBUG_PRINT3 (" Setting 0x%x to %d.\n", p1, mcnt); 7279169695Skan#endif 7280169695Skan STORE_NUMBER (p1, mcnt); 7281169695Skan break; 7282169695Skan } 7283169695Skan 7284169695Skan#if 0 7285169695Skan /* The DEC Alpha C compiler 3.x generates incorrect code for the 7286169695Skan test WORDCHAR_P (d - 1) != WORDCHAR_P (d) in the expansion of 7287169695Skan AT_WORD_BOUNDARY, so this code is disabled. Expanding the 7288169695Skan macro and introducing temporary variables works around the bug. */ 7289169695Skan 7290169695Skan case wordbound: 7291169695Skan DEBUG_PRINT1 ("EXECUTING wordbound.\n"); 7292169695Skan if (AT_WORD_BOUNDARY (d)) 7293169695Skan break; 7294169695Skan goto fail; 7295169695Skan 7296169695Skan case notwordbound: 7297169695Skan DEBUG_PRINT1 ("EXECUTING notwordbound.\n"); 7298169695Skan if (AT_WORD_BOUNDARY (d)) 7299169695Skan goto fail; 7300169695Skan break; 7301169695Skan#else 7302169695Skan case wordbound: 7303169695Skan { 7304169695Skan boolean prevchar, thischar; 7305169695Skan 7306169695Skan DEBUG_PRINT1 ("EXECUTING wordbound.\n"); 7307169695Skan if (AT_STRINGS_BEG (d) || AT_STRINGS_END (d)) 7308169695Skan break; 7309169695Skan 7310169695Skan prevchar = WORDCHAR_P (d - 1); 7311169695Skan thischar = WORDCHAR_P (d); 7312169695Skan if (prevchar != thischar) 7313169695Skan break; 7314169695Skan goto fail; 7315169695Skan } 7316169695Skan 7317169695Skan case notwordbound: 7318169695Skan { 7319169695Skan boolean prevchar, thischar; 7320169695Skan 7321169695Skan DEBUG_PRINT1 ("EXECUTING notwordbound.\n"); 7322169695Skan if (AT_STRINGS_BEG (d) || AT_STRINGS_END (d)) 7323169695Skan goto fail; 7324169695Skan 7325169695Skan prevchar = WORDCHAR_P (d - 1); 7326169695Skan thischar = WORDCHAR_P (d); 7327169695Skan if (prevchar != thischar) 7328169695Skan goto fail; 7329169695Skan break; 7330169695Skan } 7331169695Skan#endif 7332169695Skan 7333169695Skan case wordbeg: 7334169695Skan DEBUG_PRINT1 ("EXECUTING wordbeg.\n"); 7335169695Skan if (!AT_STRINGS_END (d) && WORDCHAR_P (d) 7336169695Skan && (AT_STRINGS_BEG (d) || !WORDCHAR_P (d - 1))) 7337169695Skan break; 7338169695Skan goto fail; 7339169695Skan 7340169695Skan case wordend: 7341169695Skan DEBUG_PRINT1 ("EXECUTING wordend.\n"); 7342169695Skan if (!AT_STRINGS_BEG (d) && WORDCHAR_P (d - 1) 7343169695Skan && (AT_STRINGS_END (d) || !WORDCHAR_P (d))) 7344169695Skan break; 7345169695Skan goto fail; 7346169695Skan 7347169695Skan#ifdef emacs 7348169695Skan case before_dot: 7349169695Skan DEBUG_PRINT1 ("EXECUTING before_dot.\n"); 7350169695Skan if (PTR_CHAR_POS ((unsigned char *) d) >= point) 7351169695Skan goto fail; 7352169695Skan break; 7353169695Skan 7354169695Skan case at_dot: 7355169695Skan DEBUG_PRINT1 ("EXECUTING at_dot.\n"); 7356169695Skan if (PTR_CHAR_POS ((unsigned char *) d) != point) 7357169695Skan goto fail; 7358169695Skan break; 7359169695Skan 7360169695Skan case after_dot: 7361169695Skan DEBUG_PRINT1 ("EXECUTING after_dot.\n"); 7362169695Skan if (PTR_CHAR_POS ((unsigned char *) d) <= point) 7363169695Skan goto fail; 7364169695Skan break; 7365169695Skan 7366169695Skan case syntaxspec: 7367169695Skan DEBUG_PRINT2 ("EXECUTING syntaxspec %d.\n", mcnt); 7368169695Skan mcnt = *p++; 7369169695Skan goto matchsyntax; 7370169695Skan 7371169695Skan case wordchar: 7372169695Skan DEBUG_PRINT1 ("EXECUTING Emacs wordchar.\n"); 7373169695Skan mcnt = (int) Sword; 7374169695Skan matchsyntax: 7375169695Skan PREFETCH (); 7376169695Skan /* Can't use *d++ here; SYNTAX may be an unsafe macro. */ 7377169695Skan d++; 7378169695Skan if (SYNTAX (d[-1]) != (enum syntaxcode) mcnt) 7379169695Skan goto fail; 7380169695Skan SET_REGS_MATCHED (); 7381169695Skan break; 7382169695Skan 7383169695Skan case notsyntaxspec: 7384169695Skan DEBUG_PRINT2 ("EXECUTING notsyntaxspec %d.\n", mcnt); 7385169695Skan mcnt = *p++; 7386169695Skan goto matchnotsyntax; 7387169695Skan 7388169695Skan case notwordchar: 7389169695Skan DEBUG_PRINT1 ("EXECUTING Emacs notwordchar.\n"); 7390169695Skan mcnt = (int) Sword; 7391169695Skan matchnotsyntax: 7392169695Skan PREFETCH (); 7393169695Skan /* Can't use *d++ here; SYNTAX may be an unsafe macro. */ 7394169695Skan d++; 7395169695Skan if (SYNTAX (d[-1]) == (enum syntaxcode) mcnt) 7396169695Skan goto fail; 7397169695Skan SET_REGS_MATCHED (); 7398169695Skan break; 7399169695Skan 7400169695Skan#else /* not emacs */ 7401169695Skan case wordchar: 7402169695Skan DEBUG_PRINT1 ("EXECUTING non-Emacs wordchar.\n"); 7403169695Skan PREFETCH (); 7404169695Skan if (!WORDCHAR_P (d)) 7405169695Skan goto fail; 7406169695Skan SET_REGS_MATCHED (); 7407169695Skan d++; 7408169695Skan break; 7409169695Skan 7410169695Skan case notwordchar: 7411169695Skan DEBUG_PRINT1 ("EXECUTING non-Emacs notwordchar.\n"); 7412169695Skan PREFETCH (); 7413169695Skan if (WORDCHAR_P (d)) 7414169695Skan goto fail; 7415169695Skan SET_REGS_MATCHED (); 7416169695Skan d++; 7417169695Skan break; 7418169695Skan#endif /* not emacs */ 7419169695Skan 7420169695Skan default: 7421169695Skan abort (); 7422169695Skan } 7423169695Skan continue; /* Successfully executed one pattern command; keep going. */ 7424169695Skan 7425169695Skan 7426169695Skan /* We goto here if a matching operation fails. */ 7427169695Skan fail: 7428169695Skan if (!FAIL_STACK_EMPTY ()) 7429169695Skan { /* A restart point is known. Restore to that state. */ 7430169695Skan DEBUG_PRINT1 ("\nFAIL:\n"); 7431169695Skan POP_FAILURE_POINT (d, p, 7432169695Skan lowest_active_reg, highest_active_reg, 7433169695Skan regstart, regend, reg_info); 7434169695Skan 7435169695Skan /* If this failure point is a dummy, try the next one. */ 7436169695Skan if (!p) 7437169695Skan goto fail; 7438169695Skan 7439169695Skan /* If we failed to the end of the pattern, don't examine *p. */ 7440169695Skan assert (p <= pend); 7441169695Skan if (p < pend) 7442169695Skan { 7443169695Skan boolean is_a_jump_n = false; 7444169695Skan 7445169695Skan /* If failed to a backwards jump that's part of a repetition 7446169695Skan loop, need to pop this failure point and use the next one. */ 7447169695Skan switch ((re_opcode_t) *p) 7448169695Skan { 7449169695Skan case jump_n: 7450169695Skan is_a_jump_n = true; 7451169695Skan case maybe_pop_jump: 7452169695Skan case pop_failure_jump: 7453169695Skan case jump: 7454169695Skan p1 = p + 1; 7455169695Skan EXTRACT_NUMBER_AND_INCR (mcnt, p1); 7456169695Skan p1 += mcnt; 7457169695Skan 7458169695Skan if ((is_a_jump_n && (re_opcode_t) *p1 == succeed_n) 7459169695Skan || (!is_a_jump_n 7460169695Skan && (re_opcode_t) *p1 == on_failure_jump)) 7461169695Skan goto fail; 7462169695Skan break; 7463169695Skan default: 7464169695Skan /* do nothing */ ; 7465169695Skan } 7466169695Skan } 7467169695Skan 7468169695Skan if (d >= string1 && d <= end1) 7469169695Skan dend = end_match_1; 7470169695Skan } 7471169695Skan else 7472169695Skan break; /* Matching at this starting point really fails. */ 7473169695Skan } /* for (;;) */ 7474169695Skan 7475169695Skan if (best_regs_set) 7476169695Skan goto restore_best_regs; 7477169695Skan 7478169695Skan FREE_VARIABLES (); 7479169695Skan 7480169695Skan return -1; /* Failure to match. */ 7481169695Skan} /* re_match_2 */ 7482169695Skan 7483169695Skan/* Subroutine definitions for re_match_2. */ 7484169695Skan 7485169695Skan 7486169695Skan/* We are passed P pointing to a register number after a start_memory. 7487169695Skan 7488169695Skan Return true if the pattern up to the corresponding stop_memory can 7489169695Skan match the empty string, and false otherwise. 7490169695Skan 7491169695Skan If we find the matching stop_memory, sets P to point to one past its number. 7492169695Skan Otherwise, sets P to an undefined byte less than or equal to END. 7493169695Skan 7494169695Skan We don't handle duplicates properly (yet). */ 7495169695Skan 7496169695Skanstatic boolean 7497169695SkanPREFIX(group_match_null_string_p) (UCHAR_T **p, UCHAR_T *end, 7498169695Skan PREFIX(register_info_type) *reg_info) 7499169695Skan{ 7500169695Skan int mcnt; 7501169695Skan /* Point to after the args to the start_memory. */ 7502169695Skan UCHAR_T *p1 = *p + 2; 7503169695Skan 7504169695Skan while (p1 < end) 7505169695Skan { 7506169695Skan /* Skip over opcodes that can match nothing, and return true or 7507169695Skan false, as appropriate, when we get to one that can't, or to the 7508169695Skan matching stop_memory. */ 7509169695Skan 7510169695Skan switch ((re_opcode_t) *p1) 7511169695Skan { 7512169695Skan /* Could be either a loop or a series of alternatives. */ 7513169695Skan case on_failure_jump: 7514169695Skan p1++; 7515169695Skan EXTRACT_NUMBER_AND_INCR (mcnt, p1); 7516169695Skan 7517169695Skan /* If the next operation is not a jump backwards in the 7518169695Skan pattern. */ 7519169695Skan 7520169695Skan if (mcnt >= 0) 7521169695Skan { 7522169695Skan /* Go through the on_failure_jumps of the alternatives, 7523169695Skan seeing if any of the alternatives cannot match nothing. 7524169695Skan The last alternative starts with only a jump, 7525169695Skan whereas the rest start with on_failure_jump and end 7526169695Skan with a jump, e.g., here is the pattern for `a|b|c': 7527169695Skan 7528169695Skan /on_failure_jump/0/6/exactn/1/a/jump_past_alt/0/6 7529169695Skan /on_failure_jump/0/6/exactn/1/b/jump_past_alt/0/3 7530169695Skan /exactn/1/c 7531169695Skan 7532169695Skan So, we have to first go through the first (n-1) 7533169695Skan alternatives and then deal with the last one separately. */ 7534169695Skan 7535169695Skan 7536169695Skan /* Deal with the first (n-1) alternatives, which start 7537169695Skan with an on_failure_jump (see above) that jumps to right 7538169695Skan past a jump_past_alt. */ 7539169695Skan 7540169695Skan while ((re_opcode_t) p1[mcnt-(1+OFFSET_ADDRESS_SIZE)] == 7541169695Skan jump_past_alt) 7542169695Skan { 7543169695Skan /* `mcnt' holds how many bytes long the alternative 7544169695Skan is, including the ending `jump_past_alt' and 7545169695Skan its number. */ 7546169695Skan 7547169695Skan if (!PREFIX(alt_match_null_string_p) (p1, p1 + mcnt - 7548169695Skan (1 + OFFSET_ADDRESS_SIZE), 7549169695Skan reg_info)) 7550169695Skan return false; 7551169695Skan 7552169695Skan /* Move to right after this alternative, including the 7553169695Skan jump_past_alt. */ 7554169695Skan p1 += mcnt; 7555169695Skan 7556169695Skan /* Break if it's the beginning of an n-th alternative 7557169695Skan that doesn't begin with an on_failure_jump. */ 7558169695Skan if ((re_opcode_t) *p1 != on_failure_jump) 7559169695Skan break; 7560169695Skan 7561169695Skan /* Still have to check that it's not an n-th 7562169695Skan alternative that starts with an on_failure_jump. */ 7563169695Skan p1++; 7564169695Skan EXTRACT_NUMBER_AND_INCR (mcnt, p1); 7565169695Skan if ((re_opcode_t) p1[mcnt-(1+OFFSET_ADDRESS_SIZE)] != 7566169695Skan jump_past_alt) 7567169695Skan { 7568169695Skan /* Get to the beginning of the n-th alternative. */ 7569169695Skan p1 -= 1 + OFFSET_ADDRESS_SIZE; 7570169695Skan break; 7571169695Skan } 7572169695Skan } 7573169695Skan 7574169695Skan /* Deal with the last alternative: go back and get number 7575169695Skan of the `jump_past_alt' just before it. `mcnt' contains 7576169695Skan the length of the alternative. */ 7577169695Skan EXTRACT_NUMBER (mcnt, p1 - OFFSET_ADDRESS_SIZE); 7578169695Skan 7579169695Skan if (!PREFIX(alt_match_null_string_p) (p1, p1 + mcnt, reg_info)) 7580169695Skan return false; 7581169695Skan 7582169695Skan p1 += mcnt; /* Get past the n-th alternative. */ 7583169695Skan } /* if mcnt > 0 */ 7584169695Skan break; 7585169695Skan 7586169695Skan 7587169695Skan case stop_memory: 7588169695Skan assert (p1[1] == **p); 7589169695Skan *p = p1 + 2; 7590169695Skan return true; 7591169695Skan 7592169695Skan 7593169695Skan default: 7594169695Skan if (!PREFIX(common_op_match_null_string_p) (&p1, end, reg_info)) 7595169695Skan return false; 7596169695Skan } 7597169695Skan } /* while p1 < end */ 7598169695Skan 7599169695Skan return false; 7600169695Skan} /* group_match_null_string_p */ 7601169695Skan 7602169695Skan 7603169695Skan/* Similar to group_match_null_string_p, but doesn't deal with alternatives: 7604169695Skan It expects P to be the first byte of a single alternative and END one 7605169695Skan byte past the last. The alternative can contain groups. */ 7606169695Skan 7607169695Skanstatic boolean 7608169695SkanPREFIX(alt_match_null_string_p) (UCHAR_T *p, UCHAR_T *end, 7609169695Skan PREFIX(register_info_type) *reg_info) 7610169695Skan{ 7611169695Skan int mcnt; 7612169695Skan UCHAR_T *p1 = p; 7613169695Skan 7614169695Skan while (p1 < end) 7615169695Skan { 7616169695Skan /* Skip over opcodes that can match nothing, and break when we get 7617169695Skan to one that can't. */ 7618169695Skan 7619169695Skan switch ((re_opcode_t) *p1) 7620169695Skan { 7621169695Skan /* It's a loop. */ 7622169695Skan case on_failure_jump: 7623169695Skan p1++; 7624169695Skan EXTRACT_NUMBER_AND_INCR (mcnt, p1); 7625169695Skan p1 += mcnt; 7626169695Skan break; 7627169695Skan 7628169695Skan default: 7629169695Skan if (!PREFIX(common_op_match_null_string_p) (&p1, end, reg_info)) 7630169695Skan return false; 7631169695Skan } 7632169695Skan } /* while p1 < end */ 7633169695Skan 7634169695Skan return true; 7635169695Skan} /* alt_match_null_string_p */ 7636169695Skan 7637169695Skan 7638169695Skan/* Deals with the ops common to group_match_null_string_p and 7639169695Skan alt_match_null_string_p. 7640169695Skan 7641169695Skan Sets P to one after the op and its arguments, if any. */ 7642169695Skan 7643169695Skanstatic boolean 7644169695SkanPREFIX(common_op_match_null_string_p) (UCHAR_T **p, UCHAR_T *end, 7645169695Skan PREFIX(register_info_type) *reg_info) 7646169695Skan{ 7647169695Skan int mcnt; 7648169695Skan boolean ret; 7649169695Skan int reg_no; 7650169695Skan UCHAR_T *p1 = *p; 7651169695Skan 7652169695Skan switch ((re_opcode_t) *p1++) 7653169695Skan { 7654169695Skan case no_op: 7655169695Skan case begline: 7656169695Skan case endline: 7657169695Skan case begbuf: 7658169695Skan case endbuf: 7659169695Skan case wordbeg: 7660169695Skan case wordend: 7661169695Skan case wordbound: 7662169695Skan case notwordbound: 7663169695Skan#ifdef emacs 7664169695Skan case before_dot: 7665169695Skan case at_dot: 7666169695Skan case after_dot: 7667169695Skan#endif 7668169695Skan break; 7669169695Skan 7670169695Skan case start_memory: 7671169695Skan reg_no = *p1; 7672169695Skan assert (reg_no > 0 && reg_no <= MAX_REGNUM); 7673169695Skan ret = PREFIX(group_match_null_string_p) (&p1, end, reg_info); 7674169695Skan 7675169695Skan /* Have to set this here in case we're checking a group which 7676169695Skan contains a group and a back reference to it. */ 7677169695Skan 7678169695Skan if (REG_MATCH_NULL_STRING_P (reg_info[reg_no]) == MATCH_NULL_UNSET_VALUE) 7679169695Skan REG_MATCH_NULL_STRING_P (reg_info[reg_no]) = ret; 7680169695Skan 7681169695Skan if (!ret) 7682169695Skan return false; 7683169695Skan break; 7684169695Skan 7685169695Skan /* If this is an optimized succeed_n for zero times, make the jump. */ 7686169695Skan case jump: 7687169695Skan EXTRACT_NUMBER_AND_INCR (mcnt, p1); 7688169695Skan if (mcnt >= 0) 7689169695Skan p1 += mcnt; 7690169695Skan else 7691169695Skan return false; 7692169695Skan break; 7693169695Skan 7694169695Skan case succeed_n: 7695169695Skan /* Get to the number of times to succeed. */ 7696169695Skan p1 += OFFSET_ADDRESS_SIZE; 7697169695Skan EXTRACT_NUMBER_AND_INCR (mcnt, p1); 7698169695Skan 7699169695Skan if (mcnt == 0) 7700169695Skan { 7701169695Skan p1 -= 2 * OFFSET_ADDRESS_SIZE; 7702169695Skan EXTRACT_NUMBER_AND_INCR (mcnt, p1); 7703169695Skan p1 += mcnt; 7704169695Skan } 7705169695Skan else 7706169695Skan return false; 7707169695Skan break; 7708169695Skan 7709169695Skan case duplicate: 7710169695Skan if (!REG_MATCH_NULL_STRING_P (reg_info[*p1])) 7711169695Skan return false; 7712169695Skan break; 7713169695Skan 7714169695Skan case set_number_at: 7715169695Skan p1 += 2 * OFFSET_ADDRESS_SIZE; 7716169695Skan 7717169695Skan default: 7718169695Skan /* All other opcodes mean we cannot match the empty string. */ 7719169695Skan return false; 7720169695Skan } 7721169695Skan 7722169695Skan *p = p1; 7723169695Skan return true; 7724169695Skan} /* common_op_match_null_string_p */ 7725169695Skan 7726169695Skan 7727169695Skan/* Return zero if TRANSLATE[S1] and TRANSLATE[S2] are identical for LEN 7728169695Skan bytes; nonzero otherwise. */ 7729169695Skan 7730169695Skanstatic int 7731169695SkanPREFIX(bcmp_translate) (const CHAR_T *s1, const CHAR_T *s2, register int len, 7732169695Skan RE_TRANSLATE_TYPE translate) 7733169695Skan{ 7734169695Skan register const UCHAR_T *p1 = (const UCHAR_T *) s1; 7735169695Skan register const UCHAR_T *p2 = (const UCHAR_T *) s2; 7736169695Skan while (len) 7737169695Skan { 7738169695Skan#ifdef WCHAR 7739169695Skan if (((*p1<=0xff)?translate[*p1++]:*p1++) 7740169695Skan != ((*p2<=0xff)?translate[*p2++]:*p2++)) 7741169695Skan return 1; 7742169695Skan#else /* BYTE */ 7743169695Skan if (translate[*p1++] != translate[*p2++]) return 1; 7744169695Skan#endif /* WCHAR */ 7745169695Skan len--; 7746169695Skan } 7747169695Skan return 0; 7748169695Skan} 7749169695Skan 7750169695Skan 7751169695Skan#else /* not INSIDE_RECURSION */ 7752169695Skan 7753169695Skan/* Entry points for GNU code. */ 7754169695Skan 7755169695Skan/* re_compile_pattern is the GNU regular expression compiler: it 7756169695Skan compiles PATTERN (of length SIZE) and puts the result in BUFP. 7757169695Skan Returns 0 if the pattern was valid, otherwise an error string. 7758169695Skan 7759169695Skan Assumes the `allocated' (and perhaps `buffer') and `translate' fields 7760169695Skan are set in BUFP on entry. 7761169695Skan 7762169695Skan We call regex_compile to do the actual compilation. */ 7763169695Skan 7764169695Skanconst char * 7765169695Skanre_compile_pattern (const char *pattern, size_t length, 7766169695Skan struct re_pattern_buffer *bufp) 7767169695Skan{ 7768169695Skan reg_errcode_t ret; 7769169695Skan 7770169695Skan /* GNU code is written to assume at least RE_NREGS registers will be set 7771169695Skan (and at least one extra will be -1). */ 7772169695Skan bufp->regs_allocated = REGS_UNALLOCATED; 7773169695Skan 7774169695Skan /* And GNU code determines whether or not to get register information 7775169695Skan by passing null for the REGS argument to re_match, etc., not by 7776169695Skan setting no_sub. */ 7777169695Skan bufp->no_sub = 0; 7778169695Skan 7779169695Skan /* Match anchors at newline. */ 7780169695Skan bufp->newline_anchor = 1; 7781169695Skan 7782169695Skan# ifdef MBS_SUPPORT 7783169695Skan if (MB_CUR_MAX != 1) 7784169695Skan ret = wcs_regex_compile (pattern, length, re_syntax_options, bufp); 7785169695Skan else 7786169695Skan# endif 7787169695Skan ret = byte_regex_compile (pattern, length, re_syntax_options, bufp); 7788169695Skan 7789169695Skan if (!ret) 7790169695Skan return NULL; 7791169695Skan return gettext (re_error_msgid[(int) ret]); 7792169695Skan} 7793169695Skan#ifdef _LIBC 7794169695Skanweak_alias (__re_compile_pattern, re_compile_pattern) 7795169695Skan#endif 7796169695Skan 7797169695Skan/* Entry points compatible with 4.2 BSD regex library. We don't define 7798169695Skan them unless specifically requested. */ 7799169695Skan 7800169695Skan#if defined _REGEX_RE_COMP || defined _LIBC 7801169695Skan 7802169695Skan/* BSD has one and only one pattern buffer. */ 7803169695Skanstatic struct re_pattern_buffer re_comp_buf; 7804169695Skan 7805169695Skanchar * 7806169695Skan#ifdef _LIBC 7807169695Skan/* Make these definitions weak in libc, so POSIX programs can redefine 7808169695Skan these names if they don't use our functions, and still use 7809169695Skan regcomp/regexec below without link errors. */ 7810169695Skanweak_function 7811169695Skan#endif 7812169695Skanre_comp (const char *s) 7813169695Skan{ 7814169695Skan reg_errcode_t ret; 7815169695Skan 7816169695Skan if (!s) 7817169695Skan { 7818169695Skan if (!re_comp_buf.buffer) 7819169695Skan return (char *) gettext ("No previous regular expression"); 7820169695Skan return 0; 7821169695Skan } 7822169695Skan 7823169695Skan if (!re_comp_buf.buffer) 7824169695Skan { 7825169695Skan re_comp_buf.buffer = (unsigned char *) malloc (200); 7826169695Skan if (re_comp_buf.buffer == NULL) 7827169695Skan return (char *) gettext (re_error_msgid[(int) REG_ESPACE]); 7828169695Skan re_comp_buf.allocated = 200; 7829169695Skan 7830169695Skan re_comp_buf.fastmap = (char *) malloc (1 << BYTEWIDTH); 7831169695Skan if (re_comp_buf.fastmap == NULL) 7832169695Skan return (char *) gettext (re_error_msgid[(int) REG_ESPACE]); 7833169695Skan } 7834169695Skan 7835169695Skan /* Since `re_exec' always passes NULL for the `regs' argument, we 7836169695Skan don't need to initialize the pattern buffer fields which affect it. */ 7837169695Skan 7838169695Skan /* Match anchors at newlines. */ 7839169695Skan re_comp_buf.newline_anchor = 1; 7840169695Skan 7841169695Skan# ifdef MBS_SUPPORT 7842169695Skan if (MB_CUR_MAX != 1) 7843169695Skan ret = wcs_regex_compile (s, strlen (s), re_syntax_options, &re_comp_buf); 7844169695Skan else 7845169695Skan# endif 7846169695Skan ret = byte_regex_compile (s, strlen (s), re_syntax_options, &re_comp_buf); 7847169695Skan 7848169695Skan if (!ret) 7849169695Skan return NULL; 7850169695Skan 7851169695Skan /* Yes, we're discarding `const' here if !HAVE_LIBINTL. */ 7852169695Skan return (char *) gettext (re_error_msgid[(int) ret]); 7853169695Skan} 7854169695Skan 7855169695Skan 7856169695Skanint 7857169695Skan#ifdef _LIBC 7858169695Skanweak_function 7859169695Skan#endif 7860169695Skanre_exec (const char *s) 7861169695Skan{ 7862169695Skan const int len = strlen (s); 7863169695Skan return 7864169695Skan 0 <= re_search (&re_comp_buf, s, len, 0, len, (struct re_registers *) 0); 7865169695Skan} 7866169695Skan 7867169695Skan#endif /* _REGEX_RE_COMP */ 7868169695Skan 7869169695Skan/* POSIX.2 functions. Don't define these for Emacs. */ 7870169695Skan 7871169695Skan#ifndef emacs 7872169695Skan 7873169695Skan/* regcomp takes a regular expression as a string and compiles it. 7874169695Skan 7875169695Skan PREG is a regex_t *. We do not expect any fields to be initialized, 7876169695Skan since POSIX says we shouldn't. Thus, we set 7877169695Skan 7878169695Skan `buffer' to the compiled pattern; 7879169695Skan `used' to the length of the compiled pattern; 7880169695Skan `syntax' to RE_SYNTAX_POSIX_EXTENDED if the 7881169695Skan REG_EXTENDED bit in CFLAGS is set; otherwise, to 7882169695Skan RE_SYNTAX_POSIX_BASIC; 7883169695Skan `newline_anchor' to REG_NEWLINE being set in CFLAGS; 7884169695Skan `fastmap' to an allocated space for the fastmap; 7885169695Skan `fastmap_accurate' to zero; 7886169695Skan `re_nsub' to the number of subexpressions in PATTERN. 7887169695Skan 7888169695Skan PATTERN is the address of the pattern string. 7889169695Skan 7890169695Skan CFLAGS is a series of bits which affect compilation. 7891169695Skan 7892169695Skan If REG_EXTENDED is set, we use POSIX extended syntax; otherwise, we 7893169695Skan use POSIX basic syntax. 7894169695Skan 7895169695Skan If REG_NEWLINE is set, then . and [^...] don't match newline. 7896169695Skan Also, regexec will try a match beginning after every newline. 7897169695Skan 7898169695Skan If REG_ICASE is set, then we considers upper- and lowercase 7899169695Skan versions of letters to be equivalent when matching. 7900169695Skan 7901169695Skan If REG_NOSUB is set, then when PREG is passed to regexec, that 7902169695Skan routine will report only success or failure, and nothing about the 7903169695Skan registers. 7904169695Skan 7905169695Skan It returns 0 if it succeeds, nonzero if it doesn't. (See regex.h for 7906169695Skan the return codes and their meanings.) */ 7907169695Skan 7908169695Skanint 7909169695Skanregcomp (regex_t *preg, const char *pattern, int cflags) 7910169695Skan{ 7911169695Skan reg_errcode_t ret; 7912169695Skan reg_syntax_t syntax 7913169695Skan = (cflags & REG_EXTENDED) ? 7914169695Skan RE_SYNTAX_POSIX_EXTENDED : RE_SYNTAX_POSIX_BASIC; 7915169695Skan 7916169695Skan /* regex_compile will allocate the space for the compiled pattern. */ 7917169695Skan preg->buffer = 0; 7918169695Skan preg->allocated = 0; 7919169695Skan preg->used = 0; 7920169695Skan 7921169695Skan /* Try to allocate space for the fastmap. */ 7922169695Skan preg->fastmap = (char *) malloc (1 << BYTEWIDTH); 7923169695Skan 7924169695Skan if (cflags & REG_ICASE) 7925169695Skan { 7926169695Skan int i; 7927169695Skan 7928169695Skan preg->translate 7929169695Skan = (RE_TRANSLATE_TYPE) malloc (CHAR_SET_SIZE 7930169695Skan * sizeof (*(RE_TRANSLATE_TYPE)0)); 7931169695Skan if (preg->translate == NULL) 7932169695Skan return (int) REG_ESPACE; 7933169695Skan 7934169695Skan /* Map uppercase characters to corresponding lowercase ones. */ 7935169695Skan for (i = 0; i < CHAR_SET_SIZE; i++) 7936169695Skan preg->translate[i] = ISUPPER (i) ? TOLOWER (i) : i; 7937169695Skan } 7938169695Skan else 7939169695Skan preg->translate = NULL; 7940169695Skan 7941169695Skan /* If REG_NEWLINE is set, newlines are treated differently. */ 7942169695Skan if (cflags & REG_NEWLINE) 7943169695Skan { /* REG_NEWLINE implies neither . nor [^...] match newline. */ 7944169695Skan syntax &= ~RE_DOT_NEWLINE; 7945169695Skan syntax |= RE_HAT_LISTS_NOT_NEWLINE; 7946169695Skan /* It also changes the matching behavior. */ 7947169695Skan preg->newline_anchor = 1; 7948169695Skan } 7949169695Skan else 7950169695Skan preg->newline_anchor = 0; 7951169695Skan 7952169695Skan preg->no_sub = !!(cflags & REG_NOSUB); 7953169695Skan 7954169695Skan /* POSIX says a null character in the pattern terminates it, so we 7955169695Skan can use strlen here in compiling the pattern. */ 7956169695Skan# ifdef MBS_SUPPORT 7957169695Skan if (MB_CUR_MAX != 1) 7958169695Skan ret = wcs_regex_compile (pattern, strlen (pattern), syntax, preg); 7959169695Skan else 7960169695Skan# endif 7961169695Skan ret = byte_regex_compile (pattern, strlen (pattern), syntax, preg); 7962169695Skan 7963169695Skan /* POSIX doesn't distinguish between an unmatched open-group and an 7964169695Skan unmatched close-group: both are REG_EPAREN. */ 7965169695Skan if (ret == REG_ERPAREN) ret = REG_EPAREN; 7966169695Skan 7967169695Skan if (ret == REG_NOERROR && preg->fastmap) 7968169695Skan { 7969169695Skan /* Compute the fastmap now, since regexec cannot modify the pattern 7970169695Skan buffer. */ 7971169695Skan if (re_compile_fastmap (preg) == -2) 7972169695Skan { 7973169695Skan /* Some error occurred while computing the fastmap, just forget 7974169695Skan about it. */ 7975169695Skan free (preg->fastmap); 7976169695Skan preg->fastmap = NULL; 7977169695Skan } 7978169695Skan } 7979169695Skan 7980169695Skan return (int) ret; 7981169695Skan} 7982169695Skan#ifdef _LIBC 7983169695Skanweak_alias (__regcomp, regcomp) 7984169695Skan#endif 7985169695Skan 7986169695Skan 7987169695Skan/* regexec searches for a given pattern, specified by PREG, in the 7988169695Skan string STRING. 7989169695Skan 7990169695Skan If NMATCH is zero or REG_NOSUB was set in the cflags argument to 7991169695Skan `regcomp', we ignore PMATCH. Otherwise, we assume PMATCH has at 7992169695Skan least NMATCH elements, and we set them to the offsets of the 7993169695Skan corresponding matched substrings. 7994169695Skan 7995169695Skan EFLAGS specifies `execution flags' which affect matching: if 7996169695Skan REG_NOTBOL is set, then ^ does not match at the beginning of the 7997169695Skan string; if REG_NOTEOL is set, then $ does not match at the end. 7998169695Skan 7999169695Skan We return 0 if we find a match and REG_NOMATCH if not. */ 8000169695Skan 8001169695Skanint 8002169695Skanregexec (const regex_t *preg, const char *string, size_t nmatch, 8003169695Skan regmatch_t pmatch[], int eflags) 8004169695Skan{ 8005169695Skan int ret; 8006169695Skan struct re_registers regs; 8007169695Skan regex_t private_preg; 8008169695Skan int len = strlen (string); 8009169695Skan boolean want_reg_info = !preg->no_sub && nmatch > 0; 8010169695Skan 8011169695Skan private_preg = *preg; 8012169695Skan 8013169695Skan private_preg.not_bol = !!(eflags & REG_NOTBOL); 8014169695Skan private_preg.not_eol = !!(eflags & REG_NOTEOL); 8015169695Skan 8016169695Skan /* The user has told us exactly how many registers to return 8017169695Skan information about, via `nmatch'. We have to pass that on to the 8018169695Skan matching routines. */ 8019169695Skan private_preg.regs_allocated = REGS_FIXED; 8020169695Skan 8021169695Skan if (want_reg_info) 8022169695Skan { 8023169695Skan regs.num_regs = nmatch; 8024169695Skan regs.start = TALLOC (nmatch * 2, regoff_t); 8025169695Skan if (regs.start == NULL) 8026169695Skan return (int) REG_NOMATCH; 8027169695Skan regs.end = regs.start + nmatch; 8028169695Skan } 8029169695Skan 8030169695Skan /* Perform the searching operation. */ 8031169695Skan ret = re_search (&private_preg, string, len, 8032169695Skan /* start: */ 0, /* range: */ len, 8033169695Skan want_reg_info ? ®s : (struct re_registers *) 0); 8034169695Skan 8035169695Skan /* Copy the register information to the POSIX structure. */ 8036169695Skan if (want_reg_info) 8037169695Skan { 8038169695Skan if (ret >= 0) 8039169695Skan { 8040169695Skan unsigned r; 8041169695Skan 8042169695Skan for (r = 0; r < nmatch; r++) 8043169695Skan { 8044169695Skan pmatch[r].rm_so = regs.start[r]; 8045169695Skan pmatch[r].rm_eo = regs.end[r]; 8046169695Skan } 8047169695Skan } 8048169695Skan 8049169695Skan /* If we needed the temporary register info, free the space now. */ 8050169695Skan free (regs.start); 8051169695Skan } 8052169695Skan 8053169695Skan /* We want zero return to mean success, unlike `re_search'. */ 8054169695Skan return ret >= 0 ? (int) REG_NOERROR : (int) REG_NOMATCH; 8055169695Skan} 8056169695Skan#ifdef _LIBC 8057169695Skanweak_alias (__regexec, regexec) 8058169695Skan#endif 8059169695Skan 8060169695Skan 8061169695Skan/* Returns a message corresponding to an error code, ERRCODE, returned 8062169695Skan from either regcomp or regexec. We don't use PREG here. */ 8063169695Skan 8064169695Skansize_t 8065169695Skanregerror (int errcode, const regex_t *preg ATTRIBUTE_UNUSED, 8066169695Skan char *errbuf, size_t errbuf_size) 8067169695Skan{ 8068169695Skan const char *msg; 8069169695Skan size_t msg_size; 8070169695Skan 8071169695Skan if (errcode < 0 8072169695Skan || errcode >= (int) (sizeof (re_error_msgid) 8073169695Skan / sizeof (re_error_msgid[0]))) 8074169695Skan /* Only error codes returned by the rest of the code should be passed 8075169695Skan to this routine. If we are given anything else, or if other regex 8076169695Skan code generates an invalid error code, then the program has a bug. 8077169695Skan Dump core so we can fix it. */ 8078169695Skan abort (); 8079169695Skan 8080169695Skan msg = gettext (re_error_msgid[errcode]); 8081169695Skan 8082169695Skan msg_size = strlen (msg) + 1; /* Includes the null. */ 8083169695Skan 8084169695Skan if (errbuf_size != 0) 8085169695Skan { 8086169695Skan if (msg_size > errbuf_size) 8087169695Skan { 8088169695Skan#if defined HAVE_MEMPCPY || defined _LIBC 8089169695Skan *((char *) mempcpy (errbuf, msg, errbuf_size - 1)) = '\0'; 8090169695Skan#else 8091169695Skan memcpy (errbuf, msg, errbuf_size - 1); 8092169695Skan errbuf[errbuf_size - 1] = 0; 8093169695Skan#endif 8094169695Skan } 8095169695Skan else 8096169695Skan memcpy (errbuf, msg, msg_size); 8097169695Skan } 8098169695Skan 8099169695Skan return msg_size; 8100169695Skan} 8101169695Skan#ifdef _LIBC 8102169695Skanweak_alias (__regerror, regerror) 8103169695Skan#endif 8104169695Skan 8105169695Skan 8106169695Skan/* Free dynamically allocated space used by PREG. */ 8107169695Skan 8108169695Skanvoid 8109169695Skanregfree (regex_t *preg) 8110169695Skan{ 8111169695Skan if (preg->buffer != NULL) 8112169695Skan free (preg->buffer); 8113169695Skan preg->buffer = NULL; 8114169695Skan 8115169695Skan preg->allocated = 0; 8116169695Skan preg->used = 0; 8117169695Skan 8118169695Skan if (preg->fastmap != NULL) 8119169695Skan free (preg->fastmap); 8120169695Skan preg->fastmap = NULL; 8121169695Skan preg->fastmap_accurate = 0; 8122169695Skan 8123169695Skan if (preg->translate != NULL) 8124169695Skan free (preg->translate); 8125169695Skan preg->translate = NULL; 8126169695Skan} 8127169695Skan#ifdef _LIBC 8128169695Skanweak_alias (__regfree, regfree) 8129169695Skan#endif 8130169695Skan 8131169695Skan#endif /* not emacs */ 8132169695Skan 8133169695Skan#endif /* not INSIDE_RECURSION */ 8134169695Skan 8135169695Skan 8136169695Skan#undef STORE_NUMBER 8137169695Skan#undef STORE_NUMBER_AND_INCR 8138169695Skan#undef EXTRACT_NUMBER 8139169695Skan#undef EXTRACT_NUMBER_AND_INCR 8140169695Skan 8141169695Skan#undef DEBUG_PRINT_COMPILED_PATTERN 8142169695Skan#undef DEBUG_PRINT_DOUBLE_STRING 8143169695Skan 8144169695Skan#undef INIT_FAIL_STACK 8145169695Skan#undef RESET_FAIL_STACK 8146169695Skan#undef DOUBLE_FAIL_STACK 8147169695Skan#undef PUSH_PATTERN_OP 8148169695Skan#undef PUSH_FAILURE_POINTER 8149169695Skan#undef PUSH_FAILURE_INT 8150169695Skan#undef PUSH_FAILURE_ELT 8151169695Skan#undef POP_FAILURE_POINTER 8152169695Skan#undef POP_FAILURE_INT 8153169695Skan#undef POP_FAILURE_ELT 8154169695Skan#undef DEBUG_PUSH 8155169695Skan#undef DEBUG_POP 8156169695Skan#undef PUSH_FAILURE_POINT 8157169695Skan#undef POP_FAILURE_POINT 8158169695Skan 8159169695Skan#undef REG_UNSET_VALUE 8160169695Skan#undef REG_UNSET 8161169695Skan 8162169695Skan#undef PATFETCH 8163169695Skan#undef PATFETCH_RAW 8164169695Skan#undef PATUNFETCH 8165169695Skan#undef TRANSLATE 8166169695Skan 8167169695Skan#undef INIT_BUF_SIZE 8168169695Skan#undef GET_BUFFER_SPACE 8169169695Skan#undef BUF_PUSH 8170169695Skan#undef BUF_PUSH_2 8171169695Skan#undef BUF_PUSH_3 8172169695Skan#undef STORE_JUMP 8173169695Skan#undef STORE_JUMP2 8174169695Skan#undef INSERT_JUMP 8175169695Skan#undef INSERT_JUMP2 8176169695Skan#undef EXTEND_BUFFER 8177169695Skan#undef GET_UNSIGNED_NUMBER 8178169695Skan#undef FREE_STACK_RETURN 8179169695Skan 8180169695Skan# undef POINTER_TO_OFFSET 8181169695Skan# undef MATCHING_IN_FRST_STRING 8182169695Skan# undef PREFETCH 8183169695Skan# undef AT_STRINGS_BEG 8184169695Skan# undef AT_STRINGS_END 8185169695Skan# undef WORDCHAR_P 8186169695Skan# undef FREE_VAR 8187169695Skan# undef FREE_VARIABLES 8188169695Skan# undef NO_HIGHEST_ACTIVE_REG 8189169695Skan# undef NO_LOWEST_ACTIVE_REG 8190169695Skan 8191169695Skan# undef CHAR_T 8192169695Skan# undef UCHAR_T 8193169695Skan# undef COMPILED_BUFFER_VAR 8194169695Skan# undef OFFSET_ADDRESS_SIZE 8195169695Skan# undef CHAR_CLASS_SIZE 8196169695Skan# undef PREFIX 8197169695Skan# undef ARG_PREFIX 8198169695Skan# undef PUT_CHAR 8199169695Skan# undef BYTE 8200169695Skan# undef WCHAR 8201169695Skan 8202169695Skan# define DEFINED_ONCE 8203