134461Speter/* Extended regular expression matching and search library, version 234461Speter 0.12. (Implements POSIX draft P10003.2/D11.2, except for 317721Speter internationalization features.) 417721Speter 566525Speter Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998 Free Software Foundation, Inc. 617721Speter 717721Speter This program is free software; you can redistribute it and/or modify 817721Speter it under the terms of the GNU General Public License as published by 917721Speter the Free Software Foundation; either version 2, or (at your option) 1017721Speter any later version. 1117721Speter 1217721Speter This program is distributed in the hope that it will be useful, 1317721Speter but WITHOUT ANY WARRANTY; without even the implied warranty of 1466525Speter MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 1566525Speter GNU General Public License for more details. 1617721Speter 1766525Speter You should have received a copy of the GNU General Public License 1866525Speter along with this program; if not, write to the Free Software 1966525Speter Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, 2066525Speter USA. */ 2117721Speter 2217721Speter/* AIX requires this to be the first thing in the file. */ 2317721Speter#if defined (_AIX) && !defined (REGEX_MALLOC) 2417721Speter #pragma alloca 2517721Speter#endif 2617721Speter 2766525Speter#undef _GNU_SOURCE 2817721Speter#define _GNU_SOURCE 2917721Speter 3066525Speter#ifdef emacs 3166525Speter/* Converts the pointer to the char to BEG-based offset from the start. */ 3266525Speter#define PTR_TO_OFFSET(d) \ 3366525Speter POS_AS_IN_BUFFER (MATCHING_IN_FIRST_STRING \ 3466525Speter ? (d) - string1 : (d) - (string2 - size1)) 3566525Speter#define POS_AS_IN_BUFFER(p) ((p) + (NILP (re_match_object) || BUFFERP (re_match_object))) 3666525Speter#else 3766525Speter#define PTR_TO_OFFSET(d) 0 3866525Speter#endif 3917721Speter 4017721Speter#ifdef HAVE_CONFIG_H 4166525Speter#include <config.h> 4217721Speter#endif 4317721Speter 4466525Speter/* We need this for `regex.h', and perhaps for the Emacs include files. */ 4566525Speter#include <sys/types.h> 4666525Speter 4766525Speter/* This is for other GNU distributions with internationalized messages. */ 4866525Speter#if HAVE_LIBINTL_H || defined (_LIBC) 4966525Speter# include <libintl.h> 5066525Speter#else 5166525Speter# define gettext(msgid) (msgid) 5266525Speter#endif 5366525Speter 5466525Speter#ifndef gettext_noop 5566525Speter/* This define is so xgettext can find the internationalizable 5666525Speter strings. */ 5766525Speter#define gettext_noop(String) String 5866525Speter#endif 5966525Speter 6017721Speter/* The `emacs' switch turns on certain matching commands 6117721Speter that make sense only in Emacs. */ 6217721Speter#ifdef emacs 6317721Speter 6417721Speter#include "lisp.h" 6517721Speter#include "buffer.h" 6634461Speter 6734461Speter/* Make syntax table lookup grant data in gl_state. */ 6834461Speter#define SYNTAX_ENTRY_VIA_PROPERTY 6934461Speter 7017721Speter#include "syntax.h" 7134461Speter#include "charset.h" 7234461Speter#include "category.h" 7317721Speter 7434461Speter#define malloc xmalloc 7566525Speter#define realloc xrealloc 7634461Speter#define free xfree 7717721Speter 7817721Speter#else /* not emacs */ 7917721Speter 8066525Speter/* If we are not linking with Emacs proper, 8166525Speter we can't use the relocating allocator 8266525Speter even if config.h says that we can. */ 8366525Speter#undef REL_ALLOC 8466525Speter 8566525Speter#if defined (STDC_HEADERS) || defined (_LIBC) 8666525Speter#include <stdlib.h> 8766525Speter#else 8866525Speterchar *malloc (); 8966525Speterchar *realloc (); 9066525Speter#endif 9166525Speter 9266525Speter/* When used in Emacs's lib-src, we need to get bzero and bcopy somehow. 9366525Speter If nothing else has been done, use the method below. */ 9466525Speter#ifdef INHIBIT_STRING_HEADER 9566525Speter#if !(defined (HAVE_BZERO) && defined (HAVE_BCOPY)) 9666525Speter#if !defined (bzero) && !defined (bcopy) 9766525Speter#undef INHIBIT_STRING_HEADER 9866525Speter#endif 9966525Speter#endif 10066525Speter#endif 10166525Speter 10266525Speter/* This is the normal way of making sure we have a bcopy and a bzero. 10366525Speter This is used in most programs--a few other programs avoid this 10466525Speter by defining INHIBIT_STRING_HEADER. */ 10566525Speter#ifndef INHIBIT_STRING_HEADER 10666525Speter#if defined (HAVE_STRING_H) || defined (STDC_HEADERS) || defined (_LIBC) 10717721Speter#include <string.h> 10817721Speter#ifndef bcmp 10917721Speter#define bcmp(s1, s2, n) memcmp ((s1), (s2), (n)) 11017721Speter#endif 11117721Speter#ifndef bcopy 11217721Speter#define bcopy(s, d, n) memcpy ((d), (s), (n)) 11317721Speter#endif 11417721Speter#ifndef bzero 11517721Speter#define bzero(s, n) memset ((s), 0, (n)) 11617721Speter#endif 11717721Speter#else 11817721Speter#include <strings.h> 11917721Speter#endif 12017721Speter#endif 12117721Speter 12217721Speter/* Define the syntax stuff for \<, \>, etc. */ 12317721Speter 12417721Speter/* This must be nonzero for the wordchar and notwordchar pattern 12517721Speter commands in re_match_2. */ 12625839Speter#ifndef Sword 12717721Speter#define Sword 1 12817721Speter#endif 12917721Speter 13066525Speter#ifdef SWITCH_ENUM_BUG 13166525Speter#define SWITCH_ENUM_CAST(x) ((int)(x)) 13266525Speter#else 13366525Speter#define SWITCH_ENUM_CAST(x) (x) 13466525Speter#endif 13566525Speter 13617721Speter#ifdef SYNTAX_TABLE 13717721Speter 13817721Speterextern char *re_syntax_table; 13917721Speter 14017721Speter#else /* not SYNTAX_TABLE */ 14117721Speter 14217721Speter/* How many characters in the character set. */ 14317721Speter#define CHAR_SET_SIZE 256 14417721Speter 14517721Speterstatic char re_syntax_table[CHAR_SET_SIZE]; 14617721Speter 14717721Speterstatic void 14817721Speterinit_syntax_once () 14917721Speter{ 15017721Speter register int c; 15117721Speter static int done = 0; 15217721Speter 15317721Speter if (done) 15417721Speter return; 15517721Speter 15617721Speter bzero (re_syntax_table, sizeof re_syntax_table); 15717721Speter 15817721Speter for (c = 'a'; c <= 'z'; c++) 15917721Speter re_syntax_table[c] = Sword; 16017721Speter 16117721Speter for (c = 'A'; c <= 'Z'; c++) 16217721Speter re_syntax_table[c] = Sword; 16317721Speter 16417721Speter for (c = '0'; c <= '9'; c++) 16517721Speter re_syntax_table[c] = Sword; 16617721Speter 16717721Speter re_syntax_table['_'] = Sword; 16817721Speter 16917721Speter done = 1; 17017721Speter} 17117721Speter 17217721Speter#endif /* not SYNTAX_TABLE */ 17317721Speter 17417721Speter#define SYNTAX(c) re_syntax_table[c] 17517721Speter 17666525Speter/* Dummy macros for non-Emacs environments. */ 17766525Speter#define BASE_LEADING_CODE_P(c) (0) 17866525Speter#define WORD_BOUNDARY_P(c1, c2) (0) 17966525Speter#define CHAR_HEAD_P(p) (1) 18066525Speter#define SINGLE_BYTE_CHAR_P(c) (1) 18166525Speter#define SAME_CHARSET_P(c1, c2) (1) 18266525Speter#define MULTIBYTE_FORM_LENGTH(p, s) (1) 18366525Speter#define STRING_CHAR(p, s) (*(p)) 18466525Speter#define STRING_CHAR_AND_LENGTH(p, s, actual_len) ((actual_len) = 1, *(p)) 18566525Speter#define GET_CHAR_AFTER_2(c, p, str1, end1, str2, end2) \ 18666525Speter (c = ((p) == (end1) ? *(str2) : *(p))) 18766525Speter#define GET_CHAR_BEFORE_2(c, p, str1, end1, str2, end2) \ 18866525Speter (c = ((p) == (str2) ? *((end1) - 1) : *((p) - 1))) 18917721Speter#endif /* not emacs */ 19017721Speter 19117721Speter/* Get the interface, including the syntax bits. */ 19217721Speter#include "regex.h" 19317721Speter 19417721Speter/* isalpha etc. are used for the character classes. */ 19517721Speter#include <ctype.h> 19617721Speter 19766525Speter/* Jim Meyering writes: 19866525Speter 19966525Speter "... Some ctype macros are valid only for character codes that 20066525Speter isascii says are ASCII (SGI's IRIX-4.0.5 is one such system --when 20166525Speter using /bin/cc or gcc but without giving an ansi option). So, all 20266525Speter ctype uses should be through macros like ISPRINT... If 20366525Speter STDC_HEADERS is defined, then autoconf has verified that the ctype 20466525Speter macros don't need to be guarded with references to isascii. ... 20566525Speter Defining isascii to 1 should let any compiler worth its salt 20666525Speter eliminate the && through constant folding." */ 20766525Speter 20866525Speter#if defined (STDC_HEADERS) || (!defined (isascii) && !defined (HAVE_ISASCII)) 20966525Speter#define ISASCII(c) 1 21066525Speter#else 21166525Speter#define ISASCII(c) isascii(c) 21217721Speter#endif 21317721Speter 21417721Speter#ifdef isblank 21566525Speter#define ISBLANK(c) (ISASCII (c) && isblank (c)) 21617721Speter#else 21717721Speter#define ISBLANK(c) ((c) == ' ' || (c) == '\t') 21817721Speter#endif 21917721Speter#ifdef isgraph 22066525Speter#define ISGRAPH(c) (ISASCII (c) && isgraph (c)) 22117721Speter#else 22266525Speter#define ISGRAPH(c) (ISASCII (c) && isprint (c) && !isspace (c)) 22317721Speter#endif 22417721Speter 22566525Speter#define ISPRINT(c) (ISASCII (c) && isprint (c)) 22666525Speter#define ISDIGIT(c) (ISASCII (c) && isdigit (c)) 22766525Speter#define ISALNUM(c) (ISASCII (c) && isalnum (c)) 22866525Speter#define ISALPHA(c) (ISASCII (c) && isalpha (c)) 22966525Speter#define ISCNTRL(c) (ISASCII (c) && iscntrl (c)) 23066525Speter#define ISLOWER(c) (ISASCII (c) && islower (c)) 23166525Speter#define ISPUNCT(c) (ISASCII (c) && ispunct (c)) 23266525Speter#define ISSPACE(c) (ISASCII (c) && isspace (c)) 23366525Speter#define ISUPPER(c) (ISASCII (c) && isupper (c)) 23466525Speter#define ISXDIGIT(c) (ISASCII (c) && isxdigit (c)) 23517721Speter 23617721Speter#ifndef NULL 23766525Speter#define NULL (void *)0 23817721Speter#endif 23917721Speter 24017721Speter/* We remove any previous definition of `SIGN_EXTEND_CHAR', 24117721Speter since ours (we hope) works properly with all combinations of 24217721Speter machines, compilers, `char' and `unsigned char' argument types. 24334461Speter (Per Bothner suggested the basic approach.) */ 24417721Speter#undef SIGN_EXTEND_CHAR 24517721Speter#if __STDC__ 24617721Speter#define SIGN_EXTEND_CHAR(c) ((signed char) (c)) 24717721Speter#else /* not __STDC__ */ 24817721Speter/* As in Harbison and Steele. */ 24917721Speter#define SIGN_EXTEND_CHAR(c) ((((unsigned char) (c)) ^ 128) - 128) 25017721Speter#endif 25117721Speter 25217721Speter/* Should we use malloc or alloca? If REGEX_MALLOC is not defined, we 25317721Speter use `alloca' instead of `malloc'. This is because using malloc in 25417721Speter re_search* or re_match* could cause memory leaks when C-g is used in 25517721Speter Emacs; also, malloc is slower and causes storage fragmentation. On 25625839Speter the other hand, malloc is more portable, and easier to debug. 25725839Speter 25817721Speter Because we sometimes use alloca, some routines have to be macros, 25917721Speter not functions -- `alloca'-allocated space disappears at the end of the 26017721Speter function it is called in. */ 26117721Speter 26217721Speter#ifdef REGEX_MALLOC 26317721Speter 26417721Speter#define REGEX_ALLOCATE malloc 26517721Speter#define REGEX_REALLOCATE(source, osize, nsize) realloc (source, nsize) 26666525Speter#define REGEX_FREE free 26717721Speter 26817721Speter#else /* not REGEX_MALLOC */ 26917721Speter 27017721Speter/* Emacs already defines alloca, sometimes. */ 27117721Speter#ifndef alloca 27217721Speter 27317721Speter/* Make alloca work the best possible way. */ 27417721Speter#ifdef __GNUC__ 27517721Speter#define alloca __builtin_alloca 27617721Speter#else /* not __GNUC__ */ 27717721Speter#if HAVE_ALLOCA_H 27817721Speter#include <alloca.h> 27917721Speter#else /* not __GNUC__ or HAVE_ALLOCA_H */ 28066525Speter#if 0 /* It is a bad idea to declare alloca. We always cast the result. */ 28166525Speter#ifndef _AIX /* Already did AIX, up at the top. */ 28217721Speterchar *alloca (); 28317721Speter#endif /* not _AIX */ 28466525Speter#endif 28566525Speter#endif /* not HAVE_ALLOCA_H */ 28617721Speter#endif /* not __GNUC__ */ 28717721Speter 28817721Speter#endif /* not alloca */ 28917721Speter 29017721Speter#define REGEX_ALLOCATE alloca 29117721Speter 29217721Speter/* Assumes a `char *destination' variable. */ 29317721Speter#define REGEX_REALLOCATE(source, osize, nsize) \ 29417721Speter (destination = (char *) alloca (nsize), \ 29517721Speter bcopy (source, destination, osize), \ 29617721Speter destination) 29717721Speter 29866525Speter/* No need to do anything to free, after alloca. */ 29966525Speter#define REGEX_FREE(arg) ((void)0) /* Do nothing! But inhibit gcc warning. */ 30066525Speter 30117721Speter#endif /* not REGEX_MALLOC */ 30217721Speter 30366525Speter/* Define how to allocate the failure stack. */ 30417721Speter 30566525Speter#if defined (REL_ALLOC) && defined (REGEX_MALLOC) 30666525Speter 30766525Speter#define REGEX_ALLOCATE_STACK(size) \ 30866525Speter r_alloc (&failure_stack_ptr, (size)) 30966525Speter#define REGEX_REALLOCATE_STACK(source, osize, nsize) \ 31066525Speter r_re_alloc (&failure_stack_ptr, (nsize)) 31166525Speter#define REGEX_FREE_STACK(ptr) \ 31266525Speter r_alloc_free (&failure_stack_ptr) 31366525Speter 31466525Speter#else /* not using relocating allocator */ 31566525Speter 31666525Speter#ifdef REGEX_MALLOC 31766525Speter 31866525Speter#define REGEX_ALLOCATE_STACK malloc 31966525Speter#define REGEX_REALLOCATE_STACK(source, osize, nsize) realloc (source, nsize) 32066525Speter#define REGEX_FREE_STACK free 32166525Speter 32266525Speter#else /* not REGEX_MALLOC */ 32366525Speter 32466525Speter#define REGEX_ALLOCATE_STACK alloca 32566525Speter 32666525Speter#define REGEX_REALLOCATE_STACK(source, osize, nsize) \ 32766525Speter REGEX_REALLOCATE (source, osize, nsize) 32866525Speter/* No need to explicitly free anything. */ 32966525Speter#define REGEX_FREE_STACK(arg) 33066525Speter 33166525Speter#endif /* not REGEX_MALLOC */ 33266525Speter#endif /* not using relocating allocator */ 33366525Speter 33466525Speter 33517721Speter/* True if `size1' is non-NULL and PTR is pointing anywhere inside 33617721Speter `string1' or just past its end. This works if PTR is NULL, which is 33717721Speter a good thing. */ 33866525Speter#define FIRST_STRING_P(ptr) \ 33917721Speter (size1 && string1 <= (ptr) && (ptr) <= string1 + size1) 34017721Speter 34117721Speter/* (Re)Allocate N items of type T using malloc, or fail. */ 34217721Speter#define TALLOC(n, t) ((t *) malloc ((n) * sizeof (t))) 34317721Speter#define RETALLOC(addr, n, t) ((addr) = (t *) realloc (addr, (n) * sizeof (t))) 34466525Speter#define RETALLOC_IF(addr, n, t) \ 34566525Speter if (addr) RETALLOC((addr), (n), t); else (addr) = TALLOC ((n), t) 34617721Speter#define REGEX_TALLOC(n, t) ((t *) REGEX_ALLOCATE ((n) * sizeof (t))) 34717721Speter 34866525Speter#define BYTEWIDTH 8 /* In bits. */ 34917721Speter 35017721Speter#define STREQ(s1, s2) ((strcmp (s1, s2) == 0)) 35117721Speter 35225839Speter#undef MAX 35325839Speter#undef MIN 35417721Speter#define MAX(a, b) ((a) > (b) ? (a) : (b)) 35517721Speter#define MIN(a, b) ((a) < (b) ? (a) : (b)) 35617721Speter 35717721Spetertypedef char boolean; 35817721Speter#define false 0 35917721Speter#define true 1 36066525Speter 36166525Speterstatic int re_match_2_internal (); 36217721Speter 36317721Speter/* These are the command codes that appear in compiled regular 36466525Speter expressions. Some opcodes are followed by argument bytes. A 36517721Speter command code can specify any interpretation whatsoever for its 36666525Speter arguments. Zero bytes may appear in the compiled regular expression. */ 36717721Speter 36817721Spetertypedef enum 36917721Speter{ 37017721Speter no_op = 0, 37117721Speter 37266525Speter /* Succeed right away--no more backtracking. */ 37366525Speter succeed, 37417721Speter 37566525Speter /* Followed by one byte giving n, then by n literal bytes. */ 37666525Speter exactn, 37766525Speter 37866525Speter /* Matches any (more or less) character. */ 37917721Speter anychar, 38017721Speter 38166525Speter /* Matches any one char belonging to specified set. First 38266525Speter following byte is number of bitmap bytes. Then come bytes 38366525Speter for a bitmap saying which chars are in. Bits in each byte 38466525Speter are ordered low-bit-first. A character is in the set if its 38566525Speter bit is 1. A character too large to have a bit in the map is 38666525Speter automatically not in the set. */ 38717721Speter charset, 38817721Speter 38966525Speter /* Same parameters as charset, but match any character that is 39066525Speter not one of those specified. */ 39117721Speter charset_not, 39217721Speter 39366525Speter /* Start remembering the text that is matched, for storing in a 39466525Speter register. Followed by one byte with the register number, in 39566525Speter the range 0 to one less than the pattern buffer's re_nsub 39666525Speter field. Then followed by one byte with the number of groups 39766525Speter inner to this one. (This last has to be part of the 39866525Speter start_memory only because we need it in the on_failure_jump 39966525Speter of re_match_2.) */ 40017721Speter start_memory, 40117721Speter 40266525Speter /* Stop remembering the text that is matched and store it in a 40366525Speter memory register. Followed by one byte with the register 40466525Speter number, in the range 0 to one less than `re_nsub' in the 40566525Speter pattern buffer, and one byte with the number of inner groups, 40666525Speter just like `start_memory'. (We need the number of inner 40766525Speter groups here because we don't have any easy way of finding the 40866525Speter corresponding start_memory when we're at a stop_memory.) */ 40917721Speter stop_memory, 41017721Speter 41166525Speter /* Match a duplicate of something remembered. Followed by one 41266525Speter byte containing the register number. */ 41317721Speter duplicate, 41417721Speter 41566525Speter /* Fail unless at beginning of line. */ 41617721Speter begline, 41717721Speter 41866525Speter /* Fail unless at end of line. */ 41917721Speter endline, 42017721Speter 42166525Speter /* Succeeds if at beginning of buffer (if emacs) or at beginning 42266525Speter of string to be matched (if not). */ 42317721Speter begbuf, 42417721Speter 42566525Speter /* Analogously, for end of buffer/string. */ 42617721Speter endbuf, 42725839Speter 42866525Speter /* Followed by two byte relative address to which to jump. */ 42925839Speter jump, 43017721Speter 43117721Speter /* Same as jump, but marks the end of an alternative. */ 43217721Speter jump_past_alt, 43317721Speter 43466525Speter /* Followed by two-byte relative address of place to resume at 43566525Speter in case of failure. */ 43617721Speter on_failure_jump, 43725839Speter 43866525Speter /* Like on_failure_jump, but pushes a placeholder instead of the 43966525Speter current string position when executed. */ 44017721Speter on_failure_keep_string_jump, 44125839Speter 44266525Speter /* Throw away latest failure point and then jump to following 44366525Speter two-byte relative address. */ 44417721Speter pop_failure_jump, 44517721Speter 44666525Speter /* Change to pop_failure_jump if know won't have to backtrack to 44766525Speter match; otherwise change to jump. This is used to jump 44866525Speter back to the beginning of a repeat. If what follows this jump 44966525Speter clearly won't match what the repeat does, such that we can be 45066525Speter sure that there is no use backtracking out of repetitions 45166525Speter already matched, then we change it to a pop_failure_jump. 45266525Speter Followed by two-byte address. */ 45317721Speter maybe_pop_jump, 45417721Speter 45566525Speter /* Jump to following two-byte address, and push a dummy failure 45666525Speter point. This failure point will be thrown away if an attempt 45766525Speter is made to use it for a failure. A `+' construct makes this 45866525Speter before the first repeat. Also used as an intermediary kind 45966525Speter of jump when compiling an alternative. */ 46017721Speter dummy_failure_jump, 46117721Speter 46217721Speter /* Push a dummy failure point and continue. Used at the end of 46317721Speter alternatives. */ 46417721Speter push_dummy_failure, 46517721Speter 46666525Speter /* Followed by two-byte relative address and two-byte number n. 46766525Speter After matching N times, jump to the address upon failure. */ 46817721Speter succeed_n, 46917721Speter 47066525Speter /* Followed by two-byte relative address, and two-byte number n. 47166525Speter Jump to the address N times, then fail. */ 47217721Speter jump_n, 47317721Speter 47466525Speter /* Set the following two-byte relative address to the 47566525Speter subsequent two-byte number. The address *includes* the two 47666525Speter bytes of number. */ 47717721Speter set_number_at, 47817721Speter 47917721Speter wordchar, /* Matches any word-constituent character. */ 48017721Speter notwordchar, /* Matches any char that is not a word-constituent. */ 48117721Speter 48217721Speter wordbeg, /* Succeeds if at word beginning. */ 48317721Speter wordend, /* Succeeds if at word end. */ 48417721Speter 48517721Speter wordbound, /* Succeeds if at a word boundary. */ 48666525Speter notwordbound /* Succeeds if not at a word boundary. */ 48717721Speter 48817721Speter#ifdef emacs 48917721Speter ,before_dot, /* Succeeds if before point. */ 49017721Speter at_dot, /* Succeeds if at point. */ 49117721Speter after_dot, /* Succeeds if after point. */ 49217721Speter 49317721Speter /* Matches any character whose syntax is specified. Followed by 49466525Speter a byte which contains a syntax code, e.g., Sword. */ 49517721Speter syntaxspec, 49617721Speter 49717721Speter /* Matches any character whose syntax is not that specified. */ 49866525Speter notsyntaxspec, 49966525Speter 50066525Speter /* Matches any character whose category-set contains the specified 50166525Speter category. The operator is followed by a byte which contains a 50266525Speter category code (mnemonic ASCII character). */ 50366525Speter categoryspec, 50466525Speter 50566525Speter /* Matches any character whose category-set does not contain the 50666525Speter specified category. The operator is followed by a byte which 50766525Speter contains the category code (mnemonic ASCII character). */ 50866525Speter notcategoryspec 50917721Speter#endif /* emacs */ 51017721Speter} re_opcode_t; 51117721Speter 51217721Speter/* Common operations on the compiled pattern. */ 51317721Speter 51417721Speter/* Store NUMBER in two contiguous bytes starting at DESTINATION. */ 51517721Speter 51617721Speter#define STORE_NUMBER(destination, number) \ 51717721Speter do { \ 51817721Speter (destination)[0] = (number) & 0377; \ 51917721Speter (destination)[1] = (number) >> 8; \ 52017721Speter } while (0) 52117721Speter 52217721Speter/* Same as STORE_NUMBER, except increment DESTINATION to 52317721Speter the byte after where the number is stored. Therefore, DESTINATION 52417721Speter must be an lvalue. */ 52517721Speter 52617721Speter#define STORE_NUMBER_AND_INCR(destination, number) \ 52717721Speter do { \ 52817721Speter STORE_NUMBER (destination, number); \ 52917721Speter (destination) += 2; \ 53017721Speter } while (0) 53117721Speter 53217721Speter/* Put into DESTINATION a number stored in two contiguous bytes starting 53317721Speter at SOURCE. */ 53417721Speter 53517721Speter#define EXTRACT_NUMBER(destination, source) \ 53617721Speter do { \ 53717721Speter (destination) = *(source) & 0377; \ 53817721Speter (destination) += SIGN_EXTEND_CHAR (*((source) + 1)) << 8; \ 53917721Speter } while (0) 54017721Speter 54117721Speter#ifdef DEBUG 54217721Speterstatic void 54317721Speterextract_number (dest, source) 54417721Speter int *dest; 54517721Speter unsigned char *source; 54617721Speter{ 54725839Speter int temp = SIGN_EXTEND_CHAR (*(source + 1)); 54817721Speter *dest = *source & 0377; 54917721Speter *dest += temp << 8; 55017721Speter} 55117721Speter 55266525Speter#ifndef EXTRACT_MACROS /* To debug the macros. */ 55317721Speter#undef EXTRACT_NUMBER 55417721Speter#define EXTRACT_NUMBER(dest, src) extract_number (&dest, src) 55517721Speter#endif /* not EXTRACT_MACROS */ 55617721Speter 55717721Speter#endif /* DEBUG */ 55817721Speter 55917721Speter/* Same as EXTRACT_NUMBER, except increment SOURCE to after the number. 56017721Speter SOURCE must be an lvalue. */ 56117721Speter 56217721Speter#define EXTRACT_NUMBER_AND_INCR(destination, source) \ 56317721Speter do { \ 56417721Speter EXTRACT_NUMBER (destination, source); \ 56566525Speter (source) += 2; \ 56617721Speter } while (0) 56717721Speter 56817721Speter#ifdef DEBUG 56917721Speterstatic void 57017721Speterextract_number_and_incr (destination, source) 57117721Speter int *destination; 57217721Speter unsigned char **source; 57325839Speter{ 57417721Speter extract_number (destination, *source); 57517721Speter *source += 2; 57617721Speter} 57717721Speter 57817721Speter#ifndef EXTRACT_MACROS 57917721Speter#undef EXTRACT_NUMBER_AND_INCR 58017721Speter#define EXTRACT_NUMBER_AND_INCR(dest, src) \ 58117721Speter extract_number_and_incr (&dest, &src) 58217721Speter#endif /* not EXTRACT_MACROS */ 58317721Speter 58417721Speter#endif /* DEBUG */ 58517721Speter 58666525Speter/* Store a multibyte character in three contiguous bytes starting 58766525Speter DESTINATION, and increment DESTINATION to the byte after where the 58866525Speter character is stored. Therefore, DESTINATION must be an lvalue. */ 58966525Speter 59066525Speter#define STORE_CHARACTER_AND_INCR(destination, character) \ 59166525Speter do { \ 59266525Speter (destination)[0] = (character) & 0377; \ 59366525Speter (destination)[1] = ((character) >> 8) & 0377; \ 59466525Speter (destination)[2] = (character) >> 16; \ 59566525Speter (destination) += 3; \ 59666525Speter } while (0) 59766525Speter 59866525Speter/* Put into DESTINATION a character stored in three contiguous bytes 59966525Speter starting at SOURCE. */ 60066525Speter 60166525Speter#define EXTRACT_CHARACTER(destination, source) \ 60266525Speter do { \ 60366525Speter (destination) = ((source)[0] \ 60466525Speter | ((source)[1] << 8) \ 60566525Speter | ((source)[2] << 16)); \ 60666525Speter } while (0) 60766525Speter 60866525Speter 60966525Speter/* Macros for charset. */ 61066525Speter 61166525Speter/* Size of bitmap of charset P in bytes. P is a start of charset, 61266525Speter i.e. *P is (re_opcode_t) charset or (re_opcode_t) charset_not. */ 61366525Speter#define CHARSET_BITMAP_SIZE(p) ((p)[1] & 0x7F) 61466525Speter 61566525Speter/* Nonzero if charset P has range table. */ 61666525Speter#define CHARSET_RANGE_TABLE_EXISTS_P(p) ((p)[1] & 0x80) 61766525Speter 61866525Speter/* Return the address of range table of charset P. But not the start 61966525Speter of table itself, but the before where the number of ranges is 62066525Speter stored. `2 +' means to skip re_opcode_t and size of bitmap. */ 62166525Speter#define CHARSET_RANGE_TABLE(p) (&(p)[2 + CHARSET_BITMAP_SIZE (p)]) 62266525Speter 62366525Speter/* Test if C is listed in the bitmap of charset P. */ 62466525Speter#define CHARSET_LOOKUP_BITMAP(p, c) \ 62566525Speter ((c) < CHARSET_BITMAP_SIZE (p) * BYTEWIDTH \ 62666525Speter && (p)[2 + (c) / BYTEWIDTH] & (1 << ((c) % BYTEWIDTH))) 62766525Speter 62866525Speter/* Return the address of end of RANGE_TABLE. COUNT is number of 62966525Speter ranges (which is a pair of (start, end)) in the RANGE_TABLE. `* 2' 63066525Speter is start of range and end of range. `* 3' is size of each start 63166525Speter and end. */ 63266525Speter#define CHARSET_RANGE_TABLE_END(range_table, count) \ 63366525Speter ((range_table) + (count) * 2 * 3) 63466525Speter 63566525Speter/* Test if C is in RANGE_TABLE. A flag NOT is negated if C is in. 63666525Speter COUNT is number of ranges in RANGE_TABLE. */ 63766525Speter#define CHARSET_LOOKUP_RANGE_TABLE_RAW(not, c, range_table, count) \ 63866525Speter do \ 63966525Speter { \ 64066525Speter int range_start, range_end; \ 64166525Speter unsigned char *p; \ 64266525Speter unsigned char *range_table_end \ 64366525Speter = CHARSET_RANGE_TABLE_END ((range_table), (count)); \ 64466525Speter \ 64566525Speter for (p = (range_table); p < range_table_end; p += 2 * 3) \ 64666525Speter { \ 64766525Speter EXTRACT_CHARACTER (range_start, p); \ 64866525Speter EXTRACT_CHARACTER (range_end, p + 3); \ 64966525Speter \ 65066525Speter if (range_start <= (c) && (c) <= range_end) \ 65166525Speter { \ 65266525Speter (not) = !(not); \ 65366525Speter break; \ 65466525Speter } \ 65566525Speter } \ 65666525Speter } \ 65766525Speter while (0) 65866525Speter 65966525Speter/* Test if C is in range table of CHARSET. The flag NOT is negated if 66066525Speter C is listed in it. */ 66166525Speter#define CHARSET_LOOKUP_RANGE_TABLE(not, c, charset) \ 66266525Speter do \ 66366525Speter { \ 66466525Speter /* Number of ranges in range table. */ \ 66566525Speter int count; \ 66666525Speter unsigned char *range_table = CHARSET_RANGE_TABLE (charset); \ 66766525Speter \ 66866525Speter EXTRACT_NUMBER_AND_INCR (count, range_table); \ 66966525Speter CHARSET_LOOKUP_RANGE_TABLE_RAW ((not), (c), range_table, count); \ 67066525Speter } \ 67166525Speter while (0) 67266525Speter 67317721Speter/* If DEBUG is defined, Regex prints many voluminous messages about what 67417721Speter it is doing (if the variable `debug' is nonzero). If linked with the 67517721Speter main program in `iregex.c', you can enter patterns and strings 67617721Speter interactively. And if linked with the main program in `main.c' and 67766525Speter the other test files, you can run the already-written tests. */ 67817721Speter 67917721Speter#ifdef DEBUG 68017721Speter 68117721Speter/* We use standard I/O for debugging. */ 68217721Speter#include <stdio.h> 68317721Speter 68417721Speter/* It is useful to test things that ``must'' be true when debugging. */ 68517721Speter#include <assert.h> 68617721Speter 68717721Speterstatic int debug = 0; 68817721Speter 68917721Speter#define DEBUG_STATEMENT(e) e 69017721Speter#define DEBUG_PRINT1(x) if (debug) printf (x) 69117721Speter#define DEBUG_PRINT2(x1, x2) if (debug) printf (x1, x2) 69217721Speter#define DEBUG_PRINT3(x1, x2, x3) if (debug) printf (x1, x2, x3) 69317721Speter#define DEBUG_PRINT4(x1, x2, x3, x4) if (debug) printf (x1, x2, x3, x4) 69466525Speter#define DEBUG_PRINT_COMPILED_PATTERN(p, s, e) \ 69517721Speter if (debug) print_partial_compiled_pattern (s, e) 69617721Speter#define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2) \ 69717721Speter if (debug) print_double_string (w, s1, sz1, s2, sz2) 69817721Speter 69917721Speter 70017721Speter/* Print the fastmap in human-readable form. */ 70117721Speter 70217721Spetervoid 70317721Speterprint_fastmap (fastmap) 70417721Speter char *fastmap; 70517721Speter{ 70617721Speter unsigned was_a_range = 0; 70725839Speter unsigned i = 0; 70825839Speter 70917721Speter while (i < (1 << BYTEWIDTH)) 71017721Speter { 71117721Speter if (fastmap[i++]) 71217721Speter { 71317721Speter was_a_range = 0; 71466525Speter putchar (i - 1); 71566525Speter while (i < (1 << BYTEWIDTH) && fastmap[i]) 71666525Speter { 71766525Speter was_a_range = 1; 71866525Speter i++; 71966525Speter } 72017721Speter if (was_a_range) 72166525Speter { 72266525Speter printf ("-"); 72366525Speter putchar (i - 1); 72466525Speter } 72566525Speter } 72617721Speter } 72725839Speter putchar ('\n'); 72817721Speter} 72917721Speter 73017721Speter 73117721Speter/* Print a compiled pattern string in human-readable form, starting at 73217721Speter the START pointer into it and ending just before the pointer END. */ 73317721Speter 73417721Spetervoid 73517721Speterprint_partial_compiled_pattern (start, end) 73617721Speter unsigned char *start; 73717721Speter unsigned char *end; 73817721Speter{ 73917721Speter int mcnt, mcnt2; 74017721Speter unsigned char *p = start; 74117721Speter unsigned char *pend = end; 74217721Speter 74317721Speter if (start == NULL) 74417721Speter { 74517721Speter printf ("(null)\n"); 74617721Speter return; 74717721Speter } 74825839Speter 74917721Speter /* Loop over pattern commands. */ 75017721Speter while (p < pend) 75117721Speter { 75266525Speter printf ("%d:\t", p - start); 75366525Speter 75417721Speter switch ((re_opcode_t) *p++) 75517721Speter { 75666525Speter case no_op: 75766525Speter printf ("/no_op"); 75866525Speter break; 75917721Speter 76017721Speter case exactn: 76117721Speter mcnt = *p++; 76266525Speter printf ("/exactn/%d", mcnt); 76366525Speter do 76417721Speter { 76566525Speter putchar ('/'); 76666525Speter putchar (*p++); 76766525Speter } 76866525Speter while (--mcnt); 76966525Speter break; 77017721Speter 77117721Speter case start_memory: 77266525Speter mcnt = *p++; 77366525Speter printf ("/start_memory/%d/%d", mcnt, *p++); 77466525Speter break; 77517721Speter 77617721Speter case stop_memory: 77766525Speter mcnt = *p++; 77817721Speter printf ("/stop_memory/%d/%d", mcnt, *p++); 77966525Speter break; 78017721Speter 78117721Speter case duplicate: 78217721Speter printf ("/duplicate/%d", *p++); 78317721Speter break; 78417721Speter 78517721Speter case anychar: 78617721Speter printf ("/anychar"); 78717721Speter break; 78817721Speter 78917721Speter case charset: 79066525Speter case charset_not: 79166525Speter { 79266525Speter register int c, last = -100; 79366525Speter register int in_range = 0; 79417721Speter 79566525Speter printf ("/charset [%s", 79666525Speter (re_opcode_t) *(p - 1) == charset_not ? "^" : ""); 79717721Speter 79866525Speter assert (p + *p < pend); 79917721Speter 80066525Speter for (c = 0; c < 256; c++) 80166525Speter if (c / 8 < *p 80266525Speter && (p[1 + (c/8)] & (1 << (c % 8)))) 80366525Speter { 80466525Speter /* Are we starting a range? */ 80566525Speter if (last + 1 == c && ! in_range) 80666525Speter { 80766525Speter putchar ('-'); 80866525Speter in_range = 1; 80966525Speter } 81066525Speter /* Have we broken a range? */ 81166525Speter else if (last + 1 != c && in_range) 81266525Speter { 81366525Speter putchar (last); 81466525Speter in_range = 0; 81566525Speter } 81666525Speter 81766525Speter if (! in_range) 81866525Speter putchar (c); 81966525Speter 82066525Speter last = c; 82166525Speter } 82266525Speter 82366525Speter if (in_range) 82466525Speter putchar (last); 82566525Speter 82666525Speter putchar (']'); 82766525Speter 82817721Speter p += 1 + *p; 82917721Speter } 83066525Speter break; 83117721Speter 83217721Speter case begline: 83317721Speter printf ("/begline"); 83466525Speter break; 83517721Speter 83617721Speter case endline: 83766525Speter printf ("/endline"); 83866525Speter break; 83917721Speter 84017721Speter case on_failure_jump: 84166525Speter extract_number_and_incr (&mcnt, &p); 84266525Speter printf ("/on_failure_jump to %d", p + mcnt - start); 84366525Speter break; 84417721Speter 84517721Speter case on_failure_keep_string_jump: 84666525Speter extract_number_and_incr (&mcnt, &p); 84766525Speter printf ("/on_failure_keep_string_jump to %d", p + mcnt - start); 84866525Speter break; 84917721Speter 85017721Speter case dummy_failure_jump: 85166525Speter extract_number_and_incr (&mcnt, &p); 85266525Speter printf ("/dummy_failure_jump to %d", p + mcnt - start); 85366525Speter break; 85417721Speter 85517721Speter case push_dummy_failure: 85666525Speter printf ("/push_dummy_failure"); 85717721Speter break; 85817721Speter 85966525Speter case maybe_pop_jump: 86017721Speter extract_number_and_incr (&mcnt, &p); 86166525Speter printf ("/maybe_pop_jump to %d", p + mcnt - start); 86266525Speter break; 86366525Speter 86466525Speter case pop_failure_jump: 86517721Speter extract_number_and_incr (&mcnt, &p); 86666525Speter printf ("/pop_failure_jump to %d", p + mcnt - start); 86766525Speter break; 86866525Speter 86966525Speter case jump_past_alt: 87017721Speter extract_number_and_incr (&mcnt, &p); 87166525Speter printf ("/jump_past_alt to %d", p + mcnt - start); 87217721Speter break; 87317721Speter 87466525Speter case jump: 87566525Speter extract_number_and_incr (&mcnt, &p); 87666525Speter printf ("/jump to %d", p + mcnt - start); 87766525Speter break; 87825839Speter 87966525Speter case succeed_n: 88066525Speter extract_number_and_incr (&mcnt, &p); 88166525Speter extract_number_and_incr (&mcnt2, &p); 88266525Speter printf ("/succeed_n to %d, %d times", p + mcnt - start, mcnt2); 88366525Speter break; 88425839Speter 88566525Speter case jump_n: 88666525Speter extract_number_and_incr (&mcnt, &p); 88766525Speter extract_number_and_incr (&mcnt2, &p); 88866525Speter printf ("/jump_n to %d, %d times", p + mcnt - start, mcnt2); 88966525Speter break; 89025839Speter 89166525Speter case set_number_at: 89266525Speter extract_number_and_incr (&mcnt, &p); 89366525Speter extract_number_and_incr (&mcnt2, &p); 89466525Speter printf ("/set_number_at location %d to %d", p + mcnt - start, mcnt2); 89566525Speter break; 89666525Speter 89766525Speter case wordbound: 89817721Speter printf ("/wordbound"); 89917721Speter break; 90017721Speter 90117721Speter case notwordbound: 90217721Speter printf ("/notwordbound"); 90366525Speter break; 90417721Speter 90517721Speter case wordbeg: 90617721Speter printf ("/wordbeg"); 90717721Speter break; 90825839Speter 90917721Speter case wordend: 91017721Speter printf ("/wordend"); 91125839Speter 91217721Speter#ifdef emacs 91317721Speter case before_dot: 91417721Speter printf ("/before_dot"); 91566525Speter break; 91617721Speter 91717721Speter case at_dot: 91817721Speter printf ("/at_dot"); 91966525Speter break; 92017721Speter 92117721Speter case after_dot: 92217721Speter printf ("/after_dot"); 92366525Speter break; 92417721Speter 92517721Speter case syntaxspec: 92666525Speter printf ("/syntaxspec"); 92717721Speter mcnt = *p++; 92817721Speter printf ("/%d", mcnt); 92966525Speter break; 93025839Speter 93117721Speter case notsyntaxspec: 93266525Speter printf ("/notsyntaxspec"); 93317721Speter mcnt = *p++; 93417721Speter printf ("/%d", mcnt); 93517721Speter break; 93617721Speter#endif /* emacs */ 93717721Speter 93817721Speter case wordchar: 93917721Speter printf ("/wordchar"); 94066525Speter break; 94125839Speter 94217721Speter case notwordchar: 94317721Speter printf ("/notwordchar"); 94466525Speter break; 94517721Speter 94617721Speter case begbuf: 94717721Speter printf ("/begbuf"); 94866525Speter break; 94917721Speter 95017721Speter case endbuf: 95117721Speter printf ("/endbuf"); 95266525Speter break; 95317721Speter 95466525Speter default: 95566525Speter printf ("?%d", *(p-1)); 95617721Speter } 95766525Speter 95866525Speter putchar ('\n'); 95917721Speter } 96066525Speter 96166525Speter printf ("%d:\tend of pattern.\n", p - start); 96217721Speter} 96317721Speter 96417721Speter 96517721Spetervoid 96617721Speterprint_compiled_pattern (bufp) 96717721Speter struct re_pattern_buffer *bufp; 96817721Speter{ 96917721Speter unsigned char *buffer = bufp->buffer; 97017721Speter 97117721Speter print_partial_compiled_pattern (buffer, buffer + bufp->used); 97217721Speter printf ("%d bytes used/%d bytes allocated.\n", bufp->used, bufp->allocated); 97317721Speter 97417721Speter if (bufp->fastmap_accurate && bufp->fastmap) 97517721Speter { 97617721Speter printf ("fastmap: "); 97717721Speter print_fastmap (bufp->fastmap); 97817721Speter } 97917721Speter 98017721Speter printf ("re_nsub: %d\t", bufp->re_nsub); 98117721Speter printf ("regs_alloc: %d\t", bufp->regs_allocated); 98217721Speter printf ("can_be_null: %d\t", bufp->can_be_null); 98317721Speter printf ("newline_anchor: %d\n", bufp->newline_anchor); 98417721Speter printf ("no_sub: %d\t", bufp->no_sub); 98517721Speter printf ("not_bol: %d\t", bufp->not_bol); 98617721Speter printf ("not_eol: %d\t", bufp->not_eol); 98717721Speter printf ("syntax: %d\n", bufp->syntax); 98817721Speter /* Perhaps we should print the translate table? */ 98917721Speter} 99017721Speter 99117721Speter 99217721Spetervoid 99317721Speterprint_double_string (where, string1, size1, string2, size2) 99417721Speter const char *where; 99517721Speter const char *string1; 99617721Speter const char *string2; 99717721Speter int size1; 99817721Speter int size2; 99917721Speter{ 100017721Speter unsigned this_char; 100125839Speter 100217721Speter if (where == NULL) 100317721Speter printf ("(null)"); 100417721Speter else 100517721Speter { 100617721Speter if (FIRST_STRING_P (where)) 100766525Speter { 100866525Speter for (this_char = where - string1; this_char < size1; this_char++) 100966525Speter putchar (string1[this_char]); 101017721Speter 101166525Speter where = string2; 101266525Speter } 101317721Speter 101417721Speter for (this_char = where - string2; this_char < size2; this_char++) 101566525Speter putchar (string2[this_char]); 101617721Speter } 101717721Speter} 101817721Speter 101917721Speter#else /* not DEBUG */ 102017721Speter 102117721Speter#undef assert 102217721Speter#define assert(e) 102317721Speter 102417721Speter#define DEBUG_STATEMENT(e) 102517721Speter#define DEBUG_PRINT1(x) 102617721Speter#define DEBUG_PRINT2(x1, x2) 102717721Speter#define DEBUG_PRINT3(x1, x2, x3) 102817721Speter#define DEBUG_PRINT4(x1, x2, x3, x4) 102917721Speter#define DEBUG_PRINT_COMPILED_PATTERN(p, s, e) 103017721Speter#define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2) 103117721Speter 103217721Speter#endif /* not DEBUG */ 103317721Speter 103417721Speter/* Set by `re_set_syntax' to the current regexp syntax to recognize. Can 103517721Speter also be assigned to arbitrarily: each pattern buffer stores its own 103617721Speter syntax, so it can be changed between regex compilations. */ 103766525Speter/* This has no initializer because initialized variables in Emacs 103866525Speter become read-only after dumping. */ 103966525Speterreg_syntax_t re_syntax_options; 104017721Speter 104117721Speter 104217721Speter/* Specify the precise syntax of regexps for compilation. This provides 104317721Speter for compatibility for various utilities which historically have 104417721Speter different, incompatible syntaxes. 104517721Speter 104617721Speter The argument SYNTAX is a bit mask comprised of the various bits 104766525Speter defined in regex.h. We return the old syntax. */ 104817721Speter 104917721Speterreg_syntax_t 105017721Speterre_set_syntax (syntax) 105117721Speter reg_syntax_t syntax; 105217721Speter{ 105317721Speter reg_syntax_t ret = re_syntax_options; 105425839Speter 105517721Speter re_syntax_options = syntax; 105617721Speter return ret; 105717721Speter} 105817721Speter 105917721Speter/* This table gives an error message for each of the error codes listed 106066525Speter in regex.h. Obviously the order here has to be same as there. 106166525Speter POSIX doesn't require that we do anything for REG_NOERROR, 106266525Speter but why not be nice? */ 106317721Speter 106466525Speterstatic const char *re_error_msgid[] = 106566525Speter { 106666525Speter gettext_noop ("Success"), /* REG_NOERROR */ 106766525Speter gettext_noop ("No match"), /* REG_NOMATCH */ 106866525Speter gettext_noop ("Invalid regular expression"), /* REG_BADPAT */ 106966525Speter gettext_noop ("Invalid collation character"), /* REG_ECOLLATE */ 107066525Speter gettext_noop ("Invalid character class name"), /* REG_ECTYPE */ 107166525Speter gettext_noop ("Trailing backslash"), /* REG_EESCAPE */ 107266525Speter gettext_noop ("Invalid back reference"), /* REG_ESUBREG */ 107366525Speter gettext_noop ("Unmatched [ or [^"), /* REG_EBRACK */ 107466525Speter gettext_noop ("Unmatched ( or \\("), /* REG_EPAREN */ 107566525Speter gettext_noop ("Unmatched \\{"), /* REG_EBRACE */ 107666525Speter gettext_noop ("Invalid content of \\{\\}"), /* REG_BADBR */ 107766525Speter gettext_noop ("Invalid range end"), /* REG_ERANGE */ 107866525Speter gettext_noop ("Memory exhausted"), /* REG_ESPACE */ 107966525Speter gettext_noop ("Invalid preceding regular expression"), /* REG_BADRPT */ 108066525Speter gettext_noop ("Premature end of regular expression"), /* REG_EEND */ 108166525Speter gettext_noop ("Regular expression too big"), /* REG_ESIZE */ 108266525Speter gettext_noop ("Unmatched ) or \\)"), /* REG_ERPAREN */ 108317721Speter }; 108417721Speter 108566525Speter/* Avoiding alloca during matching, to placate r_alloc. */ 108666525Speter 108766525Speter/* Define MATCH_MAY_ALLOCATE unless we need to make sure that the 108866525Speter searching and matching functions should not call alloca. On some 108966525Speter systems, alloca is implemented in terms of malloc, and if we're 109066525Speter using the relocating allocator routines, then malloc could cause a 109166525Speter relocation, which might (if the strings being searched are in the 109266525Speter ralloc heap) shift the data out from underneath the regexp 109366525Speter routines. 109466525Speter 109566525Speter Here's another reason to avoid allocation: Emacs 109666525Speter processes input from X in a signal handler; processing X input may 109766525Speter call malloc; if input arrives while a matching routine is calling 109866525Speter malloc, then we're scrod. But Emacs can't just block input while 109966525Speter calling matching routines; then we don't notice interrupts when 110066525Speter they come in. So, Emacs blocks input around all regexp calls 110166525Speter except the matching calls, which it leaves unprotected, in the 110266525Speter faith that they will not malloc. */ 110366525Speter 110466525Speter/* Normally, this is fine. */ 110566525Speter#define MATCH_MAY_ALLOCATE 110666525Speter 110766525Speter/* When using GNU C, we are not REALLY using the C alloca, no matter 110866525Speter what config.h may say. So don't take precautions for it. */ 110966525Speter#ifdef __GNUC__ 111066525Speter#undef C_ALLOCA 111166525Speter#endif 111266525Speter 111366525Speter/* The match routines may not allocate if (1) they would do it with malloc 111466525Speter and (2) it's not safe for them to use malloc. 111566525Speter Note that if REL_ALLOC is defined, matching would not use malloc for the 111666525Speter failure stack, but we would still use it for the register vectors; 111766525Speter so REL_ALLOC should not affect this. */ 111866525Speter#if (defined (C_ALLOCA) || defined (REGEX_MALLOC)) && defined (emacs) 111966525Speter#undef MATCH_MAY_ALLOCATE 112066525Speter#endif 112166525Speter 112266525Speter 112366525Speter/* Failure stack declarations and macros; both re_compile_fastmap and 112466525Speter re_match_2 use a failure stack. These have to be macros because of 112566525Speter REGEX_ALLOCATE_STACK. */ 112666525Speter 112766525Speter 112866525Speter/* Approximate number of failure points for which to initially allocate space 112966525Speter when matching. If this number is exceeded, we allocate more 113066525Speter space, so it is not a hard limit. */ 113166525Speter#ifndef INIT_FAILURE_ALLOC 113266525Speter#define INIT_FAILURE_ALLOC 20 113366525Speter#endif 113466525Speter 113566525Speter/* Roughly the maximum number of failure points on the stack. Would be 113666525Speter exactly that if always used TYPICAL_FAILURE_SIZE items each time we failed. 113766525Speter This is a variable only so users of regex can assign to it; we never 113866525Speter change it ourselves. */ 113966525Speter#if defined (MATCH_MAY_ALLOCATE) 114066525Speter/* Note that 4400 is enough to cause a crash on Alpha OSF/1, 114166525Speter whose default stack limit is 2mb. In order for a larger 114266525Speter value to work reliably, you have to try to make it accord 114366525Speter with the process stack limit. */ 114466525Speterint re_max_failures = 40000; 114566525Speter#else 114666525Speterint re_max_failures = 4000; 114766525Speter#endif 114866525Speter 114966525Speterunion fail_stack_elt 115066525Speter{ 115166525Speter unsigned char *pointer; 115266525Speter int integer; 115366525Speter}; 115466525Speter 115566525Spetertypedef union fail_stack_elt fail_stack_elt_t; 115666525Speter 115766525Spetertypedef struct 115866525Speter{ 115966525Speter fail_stack_elt_t *stack; 116066525Speter unsigned size; 116166525Speter unsigned avail; /* Offset of next open position. */ 116266525Speter} fail_stack_type; 116366525Speter 116466525Speter#define FAIL_STACK_EMPTY() (fail_stack.avail == 0) 116566525Speter#define FAIL_STACK_PTR_EMPTY() (fail_stack_ptr->avail == 0) 116666525Speter#define FAIL_STACK_FULL() (fail_stack.avail == fail_stack.size) 116766525Speter 116866525Speter 116966525Speter/* Define macros to initialize and free the failure stack. 117066525Speter Do `return -2' if the alloc fails. */ 117166525Speter 117266525Speter#ifdef MATCH_MAY_ALLOCATE 117366525Speter#define INIT_FAIL_STACK() \ 117466525Speter do { \ 117566525Speter fail_stack.stack = (fail_stack_elt_t *) \ 117666525Speter REGEX_ALLOCATE_STACK (INIT_FAILURE_ALLOC * TYPICAL_FAILURE_SIZE \ 117766525Speter * sizeof (fail_stack_elt_t)); \ 117866525Speter \ 117966525Speter if (fail_stack.stack == NULL) \ 118066525Speter return -2; \ 118166525Speter \ 118266525Speter fail_stack.size = INIT_FAILURE_ALLOC; \ 118366525Speter fail_stack.avail = 0; \ 118466525Speter } while (0) 118566525Speter 118666525Speter#define RESET_FAIL_STACK() REGEX_FREE_STACK (fail_stack.stack) 118766525Speter#else 118866525Speter#define INIT_FAIL_STACK() \ 118966525Speter do { \ 119066525Speter fail_stack.avail = 0; \ 119166525Speter } while (0) 119266525Speter 119366525Speter#define RESET_FAIL_STACK() 119466525Speter#endif 119566525Speter 119666525Speter 119766525Speter/* Double the size of FAIL_STACK, up to a limit 119866525Speter which allows approximately `re_max_failures' items. 119966525Speter 120066525Speter Return 1 if succeeds, and 0 if either ran out of memory 120166525Speter allocating space for it or it was already too large. 120266525Speter 120366525Speter REGEX_REALLOCATE_STACK requires `destination' be declared. */ 120466525Speter 120566525Speter/* Factor to increase the failure stack size by 120666525Speter when we increase it. 120766525Speter This used to be 2, but 2 was too wasteful 120866525Speter because the old discarded stacks added up to as much space 120966525Speter were as ultimate, maximum-size stack. */ 121066525Speter#define FAIL_STACK_GROWTH_FACTOR 4 121166525Speter 121266525Speter#define GROW_FAIL_STACK(fail_stack) \ 121366525Speter (((fail_stack).size * sizeof (fail_stack_elt_t) \ 121466525Speter >= re_max_failures * TYPICAL_FAILURE_SIZE) \ 121566525Speter ? 0 \ 121666525Speter : ((fail_stack).stack \ 121766525Speter = (fail_stack_elt_t *) \ 121866525Speter REGEX_REALLOCATE_STACK ((fail_stack).stack, \ 121966525Speter (fail_stack).size * sizeof (fail_stack_elt_t), \ 122066525Speter MIN (re_max_failures * TYPICAL_FAILURE_SIZE, \ 122166525Speter ((fail_stack).size * sizeof (fail_stack_elt_t) \ 122266525Speter * FAIL_STACK_GROWTH_FACTOR))), \ 122366525Speter \ 122466525Speter (fail_stack).stack == NULL \ 122566525Speter ? 0 \ 122666525Speter : ((fail_stack).size \ 122766525Speter = (MIN (re_max_failures * TYPICAL_FAILURE_SIZE, \ 122866525Speter ((fail_stack).size * sizeof (fail_stack_elt_t) \ 122966525Speter * FAIL_STACK_GROWTH_FACTOR)) \ 123066525Speter / sizeof (fail_stack_elt_t)), \ 123166525Speter 1))) 123266525Speter 123366525Speter 123466525Speter/* Push pointer POINTER on FAIL_STACK. 123566525Speter Return 1 if was able to do so and 0 if ran out of memory allocating 123666525Speter space to do so. */ 123766525Speter#define PUSH_PATTERN_OP(POINTER, FAIL_STACK) \ 123866525Speter ((FAIL_STACK_FULL () \ 123966525Speter && !GROW_FAIL_STACK (FAIL_STACK)) \ 124066525Speter ? 0 \ 124166525Speter : ((FAIL_STACK).stack[(FAIL_STACK).avail++].pointer = POINTER, \ 124266525Speter 1)) 124366525Speter 124466525Speter/* Push a pointer value onto the failure stack. 124566525Speter Assumes the variable `fail_stack'. Probably should only 124666525Speter be called from within `PUSH_FAILURE_POINT'. */ 124766525Speter#define PUSH_FAILURE_POINTER(item) \ 124866525Speter fail_stack.stack[fail_stack.avail++].pointer = (unsigned char *) (item) 124966525Speter 125066525Speter/* This pushes an integer-valued item onto the failure stack. 125166525Speter Assumes the variable `fail_stack'. Probably should only 125266525Speter be called from within `PUSH_FAILURE_POINT'. */ 125366525Speter#define PUSH_FAILURE_INT(item) \ 125466525Speter fail_stack.stack[fail_stack.avail++].integer = (item) 125566525Speter 125666525Speter/* Push a fail_stack_elt_t value onto the failure stack. 125766525Speter Assumes the variable `fail_stack'. Probably should only 125866525Speter be called from within `PUSH_FAILURE_POINT'. */ 125966525Speter#define PUSH_FAILURE_ELT(item) \ 126066525Speter fail_stack.stack[fail_stack.avail++] = (item) 126166525Speter 126266525Speter/* These three POP... operations complement the three PUSH... operations. 126366525Speter All assume that `fail_stack' is nonempty. */ 126466525Speter#define POP_FAILURE_POINTER() fail_stack.stack[--fail_stack.avail].pointer 126566525Speter#define POP_FAILURE_INT() fail_stack.stack[--fail_stack.avail].integer 126666525Speter#define POP_FAILURE_ELT() fail_stack.stack[--fail_stack.avail] 126766525Speter 126866525Speter/* Used to omit pushing failure point id's when we're not debugging. */ 126966525Speter#ifdef DEBUG 127066525Speter#define DEBUG_PUSH PUSH_FAILURE_INT 127166525Speter#define DEBUG_POP(item_addr) *(item_addr) = POP_FAILURE_INT () 127266525Speter#else 127366525Speter#define DEBUG_PUSH(item) 127466525Speter#define DEBUG_POP(item_addr) 127566525Speter#endif 127666525Speter 127766525Speter 127866525Speter/* Push the information about the state we will need 127966525Speter if we ever fail back to it. 128066525Speter 128166525Speter Requires variables fail_stack, regstart, regend, reg_info, and 128266525Speter num_regs be declared. GROW_FAIL_STACK requires `destination' be 128366525Speter declared. 128466525Speter 128566525Speter Does `return FAILURE_CODE' if runs out of memory. */ 128666525Speter 128766525Speter#define PUSH_FAILURE_POINT(pattern_place, string_place, failure_code) \ 128866525Speter do { \ 128966525Speter char *destination; \ 129066525Speter /* Must be int, so when we don't save any registers, the arithmetic \ 129166525Speter of 0 + -1 isn't done as unsigned. */ \ 129266525Speter int this_reg; \ 129366525Speter \ 129466525Speter DEBUG_STATEMENT (failure_id++); \ 129566525Speter DEBUG_STATEMENT (nfailure_points_pushed++); \ 129666525Speter DEBUG_PRINT2 ("\nPUSH_FAILURE_POINT #%u:\n", failure_id); \ 129766525Speter DEBUG_PRINT2 (" Before push, next avail: %d\n", (fail_stack).avail);\ 129866525Speter DEBUG_PRINT2 (" size: %d\n", (fail_stack).size);\ 129966525Speter \ 130066525Speter DEBUG_PRINT2 (" slots needed: %d\n", NUM_FAILURE_ITEMS); \ 130166525Speter DEBUG_PRINT2 (" available: %d\n", REMAINING_AVAIL_SLOTS); \ 130266525Speter \ 130366525Speter /* Ensure we have enough space allocated for what we will push. */ \ 130466525Speter while (REMAINING_AVAIL_SLOTS < NUM_FAILURE_ITEMS) \ 130566525Speter { \ 130666525Speter if (!GROW_FAIL_STACK (fail_stack)) \ 130766525Speter return failure_code; \ 130866525Speter \ 130966525Speter DEBUG_PRINT2 ("\n Doubled stack; size now: %d\n", \ 131066525Speter (fail_stack).size); \ 131166525Speter DEBUG_PRINT2 (" slots available: %d\n", REMAINING_AVAIL_SLOTS);\ 131266525Speter } \ 131366525Speter \ 131466525Speter /* Push the info, starting with the registers. */ \ 131566525Speter DEBUG_PRINT1 ("\n"); \ 131666525Speter \ 131766525Speter if (1) \ 131866525Speter for (this_reg = lowest_active_reg; this_reg <= highest_active_reg; \ 131966525Speter this_reg++) \ 132066525Speter { \ 132166525Speter DEBUG_PRINT2 (" Pushing reg: %d\n", this_reg); \ 132266525Speter DEBUG_STATEMENT (num_regs_pushed++); \ 132366525Speter \ 132466525Speter DEBUG_PRINT2 (" start: 0x%x\n", regstart[this_reg]); \ 132566525Speter PUSH_FAILURE_POINTER (regstart[this_reg]); \ 132666525Speter \ 132766525Speter DEBUG_PRINT2 (" end: 0x%x\n", regend[this_reg]); \ 132866525Speter PUSH_FAILURE_POINTER (regend[this_reg]); \ 132966525Speter \ 133066525Speter DEBUG_PRINT2 (" info: 0x%x\n ", reg_info[this_reg]); \ 133166525Speter DEBUG_PRINT2 (" match_null=%d", \ 133266525Speter REG_MATCH_NULL_STRING_P (reg_info[this_reg])); \ 133366525Speter DEBUG_PRINT2 (" active=%d", IS_ACTIVE (reg_info[this_reg])); \ 133466525Speter DEBUG_PRINT2 (" matched_something=%d", \ 133566525Speter MATCHED_SOMETHING (reg_info[this_reg])); \ 133666525Speter DEBUG_PRINT2 (" ever_matched=%d", \ 133766525Speter EVER_MATCHED_SOMETHING (reg_info[this_reg])); \ 133866525Speter DEBUG_PRINT1 ("\n"); \ 133966525Speter PUSH_FAILURE_ELT (reg_info[this_reg].word); \ 134066525Speter } \ 134166525Speter \ 134266525Speter DEBUG_PRINT2 (" Pushing low active reg: %d\n", lowest_active_reg);\ 134366525Speter PUSH_FAILURE_INT (lowest_active_reg); \ 134466525Speter \ 134566525Speter DEBUG_PRINT2 (" Pushing high active reg: %d\n", highest_active_reg);\ 134666525Speter PUSH_FAILURE_INT (highest_active_reg); \ 134766525Speter \ 134866525Speter DEBUG_PRINT2 (" Pushing pattern 0x%x: ", pattern_place); \ 134966525Speter DEBUG_PRINT_COMPILED_PATTERN (bufp, pattern_place, pend); \ 135066525Speter PUSH_FAILURE_POINTER (pattern_place); \ 135166525Speter \ 135266525Speter DEBUG_PRINT2 (" Pushing string 0x%x: `", string_place); \ 135366525Speter DEBUG_PRINT_DOUBLE_STRING (string_place, string1, size1, string2, \ 135466525Speter size2); \ 135566525Speter DEBUG_PRINT1 ("'\n"); \ 135666525Speter PUSH_FAILURE_POINTER (string_place); \ 135766525Speter \ 135866525Speter DEBUG_PRINT2 (" Pushing failure id: %u\n", failure_id); \ 135966525Speter DEBUG_PUSH (failure_id); \ 136066525Speter } while (0) 136166525Speter 136266525Speter/* This is the number of items that are pushed and popped on the stack 136366525Speter for each register. */ 136466525Speter#define NUM_REG_ITEMS 3 136566525Speter 136666525Speter/* Individual items aside from the registers. */ 136766525Speter#ifdef DEBUG 136866525Speter#define NUM_NONREG_ITEMS 5 /* Includes failure point id. */ 136966525Speter#else 137066525Speter#define NUM_NONREG_ITEMS 4 137166525Speter#endif 137266525Speter 137366525Speter/* Estimate the size of data pushed by a typical failure stack entry. 137466525Speter An estimate is all we need, because all we use this for 137566525Speter is to choose a limit for how big to make the failure stack. */ 137666525Speter 137766525Speter#define TYPICAL_FAILURE_SIZE 20 137866525Speter 137966525Speter/* This is how many items we actually use for a failure point. 138066525Speter It depends on the regexp. */ 138166525Speter#define NUM_FAILURE_ITEMS \ 138266525Speter (((0 \ 138366525Speter ? 0 : highest_active_reg - lowest_active_reg + 1) \ 138466525Speter * NUM_REG_ITEMS) \ 138566525Speter + NUM_NONREG_ITEMS) 138666525Speter 138766525Speter/* How many items can still be added to the stack without overflowing it. */ 138866525Speter#define REMAINING_AVAIL_SLOTS ((fail_stack).size - (fail_stack).avail) 138966525Speter 139066525Speter 139166525Speter/* Pops what PUSH_FAIL_STACK pushes. 139266525Speter 139366525Speter We restore into the parameters, all of which should be lvalues: 139466525Speter STR -- the saved data position. 139566525Speter PAT -- the saved pattern position. 139666525Speter LOW_REG, HIGH_REG -- the highest and lowest active registers. 139766525Speter REGSTART, REGEND -- arrays of string positions. 139866525Speter REG_INFO -- array of information about each subexpression. 139966525Speter 140066525Speter Also assumes the variables `fail_stack' and (if debugging), `bufp', 140166525Speter `pend', `string1', `size1', `string2', and `size2'. */ 140266525Speter 140366525Speter#define POP_FAILURE_POINT(str, pat, low_reg, high_reg, regstart, regend, reg_info)\ 140466525Speter{ \ 140566525Speter DEBUG_STATEMENT (fail_stack_elt_t failure_id;) \ 140666525Speter int this_reg; \ 140766525Speter const unsigned char *string_temp; \ 140866525Speter \ 140966525Speter assert (!FAIL_STACK_EMPTY ()); \ 141066525Speter \ 141166525Speter /* Remove failure points and point to how many regs pushed. */ \ 141266525Speter DEBUG_PRINT1 ("POP_FAILURE_POINT:\n"); \ 141366525Speter DEBUG_PRINT2 (" Before pop, next avail: %d\n", fail_stack.avail); \ 141466525Speter DEBUG_PRINT2 (" size: %d\n", fail_stack.size); \ 141566525Speter \ 141666525Speter assert (fail_stack.avail >= NUM_NONREG_ITEMS); \ 141766525Speter \ 141866525Speter DEBUG_POP (&failure_id); \ 141966525Speter DEBUG_PRINT2 (" Popping failure id: %u\n", failure_id); \ 142066525Speter \ 142166525Speter /* If the saved string location is NULL, it came from an \ 142266525Speter on_failure_keep_string_jump opcode, and we want to throw away the \ 142366525Speter saved NULL, thus retaining our current position in the string. */ \ 142466525Speter string_temp = POP_FAILURE_POINTER (); \ 142566525Speter if (string_temp != NULL) \ 142666525Speter str = (const char *) string_temp; \ 142766525Speter \ 142866525Speter DEBUG_PRINT2 (" Popping string 0x%x: `", str); \ 142966525Speter DEBUG_PRINT_DOUBLE_STRING (str, string1, size1, string2, size2); \ 143066525Speter DEBUG_PRINT1 ("'\n"); \ 143166525Speter \ 143266525Speter pat = (unsigned char *) POP_FAILURE_POINTER (); \ 143366525Speter DEBUG_PRINT2 (" Popping pattern 0x%x: ", pat); \ 143466525Speter DEBUG_PRINT_COMPILED_PATTERN (bufp, pat, pend); \ 143566525Speter \ 143666525Speter /* Restore register info. */ \ 143766525Speter high_reg = (unsigned) POP_FAILURE_INT (); \ 143866525Speter DEBUG_PRINT2 (" Popping high active reg: %d\n", high_reg); \ 143966525Speter \ 144066525Speter low_reg = (unsigned) POP_FAILURE_INT (); \ 144166525Speter DEBUG_PRINT2 (" Popping low active reg: %d\n", low_reg); \ 144266525Speter \ 144366525Speter if (1) \ 144466525Speter for (this_reg = high_reg; this_reg >= low_reg; this_reg--) \ 144566525Speter { \ 144666525Speter DEBUG_PRINT2 (" Popping reg: %d\n", this_reg); \ 144766525Speter \ 144866525Speter reg_info[this_reg].word = POP_FAILURE_ELT (); \ 144966525Speter DEBUG_PRINT2 (" info: 0x%x\n", reg_info[this_reg]); \ 145066525Speter \ 145166525Speter regend[this_reg] = (const char *) POP_FAILURE_POINTER (); \ 145266525Speter DEBUG_PRINT2 (" end: 0x%x\n", regend[this_reg]); \ 145366525Speter \ 145466525Speter regstart[this_reg] = (const char *) POP_FAILURE_POINTER (); \ 145566525Speter DEBUG_PRINT2 (" start: 0x%x\n", regstart[this_reg]); \ 145666525Speter } \ 145766525Speter else \ 145866525Speter { \ 145966525Speter for (this_reg = highest_active_reg; this_reg > high_reg; this_reg--) \ 146066525Speter { \ 146166525Speter reg_info[this_reg].word.integer = 0; \ 146266525Speter regend[this_reg] = 0; \ 146366525Speter regstart[this_reg] = 0; \ 146466525Speter } \ 146566525Speter highest_active_reg = high_reg; \ 146666525Speter } \ 146766525Speter \ 146866525Speter set_regs_matched_done = 0; \ 146966525Speter DEBUG_STATEMENT (nfailure_points_popped++); \ 147066525Speter} /* POP_FAILURE_POINT */ 147166525Speter 147266525Speter 147366525Speter 147466525Speter/* Structure for per-register (a.k.a. per-group) information. 147566525Speter Other register information, such as the 147666525Speter starting and ending positions (which are addresses), and the list of 147766525Speter inner groups (which is a bits list) are maintained in separate 147866525Speter variables. 147966525Speter 148066525Speter We are making a (strictly speaking) nonportable assumption here: that 148166525Speter the compiler will pack our bit fields into something that fits into 148266525Speter the type of `word', i.e., is something that fits into one item on the 148366525Speter failure stack. */ 148466525Speter 148566525Spetertypedef union 148666525Speter{ 148766525Speter fail_stack_elt_t word; 148866525Speter struct 148966525Speter { 149066525Speter /* This field is one if this group can match the empty string, 149166525Speter zero if not. If not yet determined, `MATCH_NULL_UNSET_VALUE'. */ 149266525Speter#define MATCH_NULL_UNSET_VALUE 3 149366525Speter unsigned match_null_string_p : 2; 149466525Speter unsigned is_active : 1; 149566525Speter unsigned matched_something : 1; 149666525Speter unsigned ever_matched_something : 1; 149766525Speter } bits; 149866525Speter} register_info_type; 149966525Speter 150066525Speter#define REG_MATCH_NULL_STRING_P(R) ((R).bits.match_null_string_p) 150166525Speter#define IS_ACTIVE(R) ((R).bits.is_active) 150266525Speter#define MATCHED_SOMETHING(R) ((R).bits.matched_something) 150366525Speter#define EVER_MATCHED_SOMETHING(R) ((R).bits.ever_matched_something) 150466525Speter 150566525Speter 150666525Speter/* Call this when have matched a real character; it sets `matched' flags 150766525Speter for the subexpressions which we are currently inside. Also records 150866525Speter that those subexprs have matched. */ 150966525Speter#define SET_REGS_MATCHED() \ 151066525Speter do \ 151166525Speter { \ 151266525Speter if (!set_regs_matched_done) \ 151366525Speter { \ 151466525Speter unsigned r; \ 151566525Speter set_regs_matched_done = 1; \ 151666525Speter for (r = lowest_active_reg; r <= highest_active_reg; r++) \ 151766525Speter { \ 151866525Speter MATCHED_SOMETHING (reg_info[r]) \ 151966525Speter = EVER_MATCHED_SOMETHING (reg_info[r]) \ 152066525Speter = 1; \ 152166525Speter } \ 152266525Speter } \ 152366525Speter } \ 152466525Speter while (0) 152566525Speter 152666525Speter/* Registers are set to a sentinel when they haven't yet matched. */ 152766525Speterstatic char reg_unset_dummy; 152866525Speter#define REG_UNSET_VALUE (®_unset_dummy) 152966525Speter#define REG_UNSET(e) ((e) == REG_UNSET_VALUE) 153066525Speter 153117721Speter/* Subroutine declarations and macros for regex_compile. */ 153217721Speter 153317721Speterstatic void store_op1 (), store_op2 (); 153417721Speterstatic void insert_op1 (), insert_op2 (); 153517721Speterstatic boolean at_begline_loc_p (), at_endline_loc_p (); 153617721Speterstatic boolean group_in_compile_stack (); 153717721Speter 153866525Speter/* Fetch the next character in the uncompiled pattern---translating it 153917721Speter if necessary. Also cast from a signed character in the constant 154017721Speter string passed to us by the user to an unsigned char that we can use 154117721Speter as an array index (in, e.g., `translate'). */ 154266525Speter#ifndef PATFETCH 154317721Speter#define PATFETCH(c) \ 154417721Speter do {if (p == pend) return REG_EEND; \ 154517721Speter c = (unsigned char) *p++; \ 154666525Speter if (RE_TRANSLATE_P (translate)) c = RE_TRANSLATE (translate, c); \ 154717721Speter } while (0) 154866525Speter#endif 154917721Speter 155017721Speter/* Fetch the next character in the uncompiled pattern, with no 155166525Speter translation. */ 155217721Speter#define PATFETCH_RAW(c) \ 155317721Speter do {if (p == pend) return REG_EEND; \ 155466525Speter c = (unsigned char) *p++; \ 155517721Speter } while (0) 155617721Speter 155717721Speter/* Go backwards one character in the pattern. */ 155817721Speter#define PATUNFETCH p-- 155917721Speter 156017721Speter 156117721Speter/* If `translate' is non-null, return translate[D], else just D. We 156217721Speter cast the subscript to translate because some data is declared as 156317721Speter `char *', to avoid warnings when a string constant is passed. But 156417721Speter when we use a character as a subscript we must make it unsigned. */ 156566525Speter#ifndef TRANSLATE 156666525Speter#define TRANSLATE(d) \ 156766525Speter (RE_TRANSLATE_P (translate) \ 156866525Speter ? (unsigned) RE_TRANSLATE (translate, (unsigned) (d)) : (d)) 156966525Speter#endif 157017721Speter 157117721Speter 157217721Speter/* Macros for outputting the compiled pattern into `buffer'. */ 157317721Speter 157417721Speter/* If the buffer isn't allocated when it comes in, use this. */ 157517721Speter#define INIT_BUF_SIZE 32 157617721Speter 157766525Speter/* Make sure we have at least N more bytes of space in buffer. */ 157817721Speter#define GET_BUFFER_SPACE(n) \ 157917721Speter while (b - bufp->buffer + (n) > bufp->allocated) \ 158017721Speter EXTEND_BUFFER () 158117721Speter 158217721Speter/* Make sure we have one more byte of buffer space and then add C to it. */ 158317721Speter#define BUF_PUSH(c) \ 158417721Speter do { \ 158517721Speter GET_BUFFER_SPACE (1); \ 158617721Speter *b++ = (unsigned char) (c); \ 158717721Speter } while (0) 158817721Speter 158917721Speter 159017721Speter/* Ensure we have two more bytes of buffer space and then append C1 and C2. */ 159117721Speter#define BUF_PUSH_2(c1, c2) \ 159217721Speter do { \ 159317721Speter GET_BUFFER_SPACE (2); \ 159417721Speter *b++ = (unsigned char) (c1); \ 159517721Speter *b++ = (unsigned char) (c2); \ 159617721Speter } while (0) 159717721Speter 159817721Speter 159966525Speter/* As with BUF_PUSH_2, except for three bytes. */ 160017721Speter#define BUF_PUSH_3(c1, c2, c3) \ 160117721Speter do { \ 160217721Speter GET_BUFFER_SPACE (3); \ 160317721Speter *b++ = (unsigned char) (c1); \ 160417721Speter *b++ = (unsigned char) (c2); \ 160517721Speter *b++ = (unsigned char) (c3); \ 160617721Speter } while (0) 160717721Speter 160817721Speter 160917721Speter/* Store a jump with opcode OP at LOC to location TO. We store a 161066525Speter relative address offset by the three bytes the jump itself occupies. */ 161117721Speter#define STORE_JUMP(op, loc, to) \ 161217721Speter store_op1 (op, loc, (to) - (loc) - 3) 161317721Speter 161417721Speter/* Likewise, for a two-argument jump. */ 161517721Speter#define STORE_JUMP2(op, loc, to, arg) \ 161617721Speter store_op2 (op, loc, (to) - (loc) - 3, arg) 161717721Speter 161866525Speter/* Like `STORE_JUMP', but for inserting. Assume `b' is the buffer end. */ 161917721Speter#define INSERT_JUMP(op, loc, to) \ 162017721Speter insert_op1 (op, loc, (to) - (loc) - 3, b) 162117721Speter 162217721Speter/* Like `STORE_JUMP2', but for inserting. Assume `b' is the buffer end. */ 162317721Speter#define INSERT_JUMP2(op, loc, to, arg) \ 162417721Speter insert_op2 (op, loc, (to) - (loc) - 3, arg, b) 162517721Speter 162617721Speter 162717721Speter/* This is not an arbitrary limit: the arguments which represent offsets 162866525Speter into the pattern are two bytes long. So if 2^16 bytes turns out to 162917721Speter be too small, many things would have to change. */ 163017721Speter#define MAX_BUF_SIZE (1L << 16) 163117721Speter 163217721Speter 163317721Speter/* Extend the buffer by twice its current size via realloc and 163417721Speter reset the pointers that pointed into the old block to point to the 163517721Speter correct places in the new one. If extending the buffer results in it 163666525Speter being larger than MAX_BUF_SIZE, then flag memory exhausted. */ 163717721Speter#define EXTEND_BUFFER() \ 163866525Speter do { \ 163917721Speter unsigned char *old_buffer = bufp->buffer; \ 164066525Speter if (bufp->allocated == MAX_BUF_SIZE) \ 164117721Speter return REG_ESIZE; \ 164217721Speter bufp->allocated <<= 1; \ 164317721Speter if (bufp->allocated > MAX_BUF_SIZE) \ 164466525Speter bufp->allocated = MAX_BUF_SIZE; \ 164517721Speter bufp->buffer = (unsigned char *) realloc (bufp->buffer, bufp->allocated);\ 164617721Speter if (bufp->buffer == NULL) \ 164717721Speter return REG_ESPACE; \ 164817721Speter /* If the buffer moved, move all the pointers into it. */ \ 164917721Speter if (old_buffer != bufp->buffer) \ 165017721Speter { \ 165166525Speter b = (b - old_buffer) + bufp->buffer; \ 165266525Speter begalt = (begalt - old_buffer) + bufp->buffer; \ 165366525Speter if (fixup_alt_jump) \ 165466525Speter fixup_alt_jump = (fixup_alt_jump - old_buffer) + bufp->buffer;\ 165566525Speter if (laststart) \ 165666525Speter laststart = (laststart - old_buffer) + bufp->buffer; \ 165766525Speter if (pending_exact) \ 165866525Speter pending_exact = (pending_exact - old_buffer) + bufp->buffer; \ 165917721Speter } \ 166017721Speter } while (0) 166117721Speter 166217721Speter 166317721Speter/* Since we have one byte reserved for the register number argument to 166417721Speter {start,stop}_memory, the maximum number of groups we can report 166517721Speter things about is what fits in that byte. */ 166617721Speter#define MAX_REGNUM 255 166717721Speter 166817721Speter/* But patterns can have more than `MAX_REGNUM' registers. We just 166917721Speter ignore the excess. */ 167017721Spetertypedef unsigned regnum_t; 167117721Speter 167217721Speter 167317721Speter/* Macros for the compile stack. */ 167417721Speter 167517721Speter/* Since offsets can go either forwards or backwards, this type needs to 167666525Speter be able to hold values from -(MAX_BUF_SIZE - 1) to MAX_BUF_SIZE - 1. */ 167717721Spetertypedef int pattern_offset_t; 167817721Speter 167917721Spetertypedef struct 168017721Speter{ 168117721Speter pattern_offset_t begalt_offset; 168217721Speter pattern_offset_t fixup_alt_jump; 168317721Speter pattern_offset_t inner_group_offset; 168425839Speter pattern_offset_t laststart_offset; 168517721Speter regnum_t regnum; 168617721Speter} compile_stack_elt_t; 168717721Speter 168817721Speter 168917721Spetertypedef struct 169017721Speter{ 169117721Speter compile_stack_elt_t *stack; 169217721Speter unsigned size; 169317721Speter unsigned avail; /* Offset of next open position. */ 169417721Speter} compile_stack_type; 169517721Speter 169617721Speter 169717721Speter#define INIT_COMPILE_STACK_SIZE 32 169817721Speter 169917721Speter#define COMPILE_STACK_EMPTY (compile_stack.avail == 0) 170017721Speter#define COMPILE_STACK_FULL (compile_stack.avail == compile_stack.size) 170117721Speter 170266525Speter/* The next available element. */ 170317721Speter#define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail]) 170417721Speter 170517721Speter 170666525Speter/* Structure to manage work area for range table. */ 170766525Speterstruct range_table_work_area 170866525Speter{ 170966525Speter int *table; /* actual work area. */ 171066525Speter int allocated; /* allocated size for work area in bytes. */ 171166525Speter int used; /* actually used size in words. */ 171266525Speter}; 171366525Speter 171466525Speter/* Make sure that WORK_AREA can hold more N multibyte characters. */ 171566525Speter#define EXTEND_RANGE_TABLE_WORK_AREA(work_area, n) \ 171666525Speter do { \ 171766525Speter if (((work_area).used + (n)) * sizeof (int) > (work_area).allocated) \ 171866525Speter { \ 171966525Speter (work_area).allocated += 16 * sizeof (int); \ 172066525Speter if ((work_area).table) \ 172166525Speter (work_area).table \ 172266525Speter = (int *) realloc ((work_area).table, (work_area).allocated); \ 172366525Speter else \ 172466525Speter (work_area).table \ 172566525Speter = (int *) malloc ((work_area).allocated); \ 172666525Speter if ((work_area).table == 0) \ 172766525Speter FREE_STACK_RETURN (REG_ESPACE); \ 172866525Speter } \ 172966525Speter } while (0) 173066525Speter 173166525Speter/* Set a range (RANGE_START, RANGE_END) to WORK_AREA. */ 173266525Speter#define SET_RANGE_TABLE_WORK_AREA(work_area, range_start, range_end) \ 173366525Speter do { \ 173466525Speter EXTEND_RANGE_TABLE_WORK_AREA ((work_area), 2); \ 173566525Speter (work_area).table[(work_area).used++] = (range_start); \ 173666525Speter (work_area).table[(work_area).used++] = (range_end); \ 173766525Speter } while (0) 173866525Speter 173966525Speter/* Free allocated memory for WORK_AREA. */ 174066525Speter#define FREE_RANGE_TABLE_WORK_AREA(work_area) \ 174166525Speter do { \ 174266525Speter if ((work_area).table) \ 174366525Speter free ((work_area).table); \ 174466525Speter } while (0) 174566525Speter 174666525Speter#define CLEAR_RANGE_TABLE_WORK_USED(work_area) ((work_area).used = 0) 174766525Speter#define RANGE_TABLE_WORK_USED(work_area) ((work_area).used) 174866525Speter#define RANGE_TABLE_WORK_ELT(work_area, i) ((work_area).table[i]) 174966525Speter 175066525Speter 175117721Speter/* Set the bit for character C in a list. */ 175266525Speter#define SET_LIST_BIT(c) \ 175366525Speter (b[((unsigned char) (c)) / BYTEWIDTH] \ 175417721Speter |= 1 << (((unsigned char) c) % BYTEWIDTH)) 175517721Speter 175617721Speter 175717721Speter/* Get the next unsigned number in the uncompiled pattern. */ 175866525Speter#define GET_UNSIGNED_NUMBER(num) \ 175917721Speter { if (p != pend) \ 176017721Speter { \ 176166525Speter PATFETCH (c); \ 176266525Speter while (ISDIGIT (c)) \ 176366525Speter { \ 176466525Speter if (num < 0) \ 176566525Speter num = 0; \ 176666525Speter num = num * 10 + c - '0'; \ 176766525Speter if (p == pend) \ 176866525Speter break; \ 176966525Speter PATFETCH (c); \ 177066525Speter } \ 177166525Speter } \ 177225839Speter } 177317721Speter 177417721Speter#define CHAR_CLASS_MAX_LENGTH 6 /* Namely, `xdigit'. */ 177517721Speter 177617721Speter#define IS_CHAR_CLASS(string) \ 177717721Speter (STREQ (string, "alpha") || STREQ (string, "upper") \ 177817721Speter || STREQ (string, "lower") || STREQ (string, "digit") \ 177917721Speter || STREQ (string, "alnum") || STREQ (string, "xdigit") \ 178017721Speter || STREQ (string, "space") || STREQ (string, "print") \ 178117721Speter || STREQ (string, "punct") || STREQ (string, "graph") \ 178217721Speter || STREQ (string, "cntrl") || STREQ (string, "blank")) 178317721Speter 178466525Speter#ifndef MATCH_MAY_ALLOCATE 178566525Speter 178666525Speter/* If we cannot allocate large objects within re_match_2_internal, 178766525Speter we make the fail stack and register vectors global. 178866525Speter The fail stack, we grow to the maximum size when a regexp 178966525Speter is compiled. 179066525Speter The register vectors, we adjust in size each time we 179166525Speter compile a regexp, according to the number of registers it needs. */ 179266525Speter 179366525Speterstatic fail_stack_type fail_stack; 179466525Speter 179566525Speter/* Size with which the following vectors are currently allocated. 179666525Speter That is so we can make them bigger as needed, 179766525Speter but never make them smaller. */ 179866525Speterstatic int regs_allocated_size; 179966525Speter 180066525Speterstatic const char ** regstart, ** regend; 180166525Speterstatic const char ** old_regstart, ** old_regend; 180266525Speterstatic const char **best_regstart, **best_regend; 180366525Speterstatic register_info_type *reg_info; 180466525Speterstatic const char **reg_dummy; 180566525Speterstatic register_info_type *reg_info_dummy; 180666525Speter 180766525Speter/* Make the register vectors big enough for NUM_REGS registers, 180866525Speter but don't make them smaller. */ 180966525Speter 181066525Speterstatic 181166525Speterregex_grow_registers (num_regs) 181266525Speter int num_regs; 181366525Speter{ 181466525Speter if (num_regs > regs_allocated_size) 181566525Speter { 181666525Speter RETALLOC_IF (regstart, num_regs, const char *); 181766525Speter RETALLOC_IF (regend, num_regs, const char *); 181866525Speter RETALLOC_IF (old_regstart, num_regs, const char *); 181966525Speter RETALLOC_IF (old_regend, num_regs, const char *); 182066525Speter RETALLOC_IF (best_regstart, num_regs, const char *); 182166525Speter RETALLOC_IF (best_regend, num_regs, const char *); 182266525Speter RETALLOC_IF (reg_info, num_regs, register_info_type); 182366525Speter RETALLOC_IF (reg_dummy, num_regs, const char *); 182466525Speter RETALLOC_IF (reg_info_dummy, num_regs, register_info_type); 182566525Speter 182666525Speter regs_allocated_size = num_regs; 182766525Speter } 182866525Speter} 182966525Speter 183066525Speter#endif /* not MATCH_MAY_ALLOCATE */ 183166525Speter 183217721Speter/* `regex_compile' compiles PATTERN (of length SIZE) according to SYNTAX. 183317721Speter Returns one of error codes defined in `regex.h', or zero for success. 183417721Speter 183517721Speter Assumes the `allocated' (and perhaps `buffer') and `translate' 183617721Speter fields are set in BUFP on entry. 183717721Speter 183817721Speter If it succeeds, results are put in BUFP (if it returns an error, the 183917721Speter contents of BUFP are undefined): 184017721Speter `buffer' is the compiled pattern; 184117721Speter `syntax' is set to SYNTAX; 184217721Speter `used' is set to the length of the compiled pattern; 184317721Speter `fastmap_accurate' is zero; 184417721Speter `re_nsub' is the number of subexpressions in PATTERN; 184517721Speter `not_bol' and `not_eol' are zero; 184625839Speter 184717721Speter The `fastmap' and `newline_anchor' fields are neither 184817721Speter examined nor set. */ 184917721Speter 185066525Speter/* Return, freeing storage we allocated. */ 185166525Speter#define FREE_STACK_RETURN(value) \ 185266525Speter do { \ 185366525Speter FREE_RANGE_TABLE_WORK_AREA (range_table_work); \ 185466525Speter free (compile_stack.stack); \ 185566525Speter return value; \ 185666525Speter } while (0) 185766525Speter 185817721Speterstatic reg_errcode_t 185917721Speterregex_compile (pattern, size, syntax, bufp) 186017721Speter const char *pattern; 186117721Speter int size; 186217721Speter reg_syntax_t syntax; 186317721Speter struct re_pattern_buffer *bufp; 186417721Speter{ 186517721Speter /* We fetch characters from PATTERN here. Even though PATTERN is 186617721Speter `char *' (i.e., signed), we declare these variables as unsigned, so 186717721Speter they can be reliably used as array indices. */ 186866525Speter register unsigned int c, c1; 186925839Speter 187025839Speter /* A random temporary spot in PATTERN. */ 187117721Speter const char *p1; 187217721Speter 187317721Speter /* Points to the end of the buffer, where we should append. */ 187417721Speter register unsigned char *b; 187525839Speter 187617721Speter /* Keeps track of unclosed groups. */ 187717721Speter compile_stack_type compile_stack; 187817721Speter 187917721Speter /* Points to the current (ending) position in the pattern. */ 188066525Speter#ifdef AIX 188166525Speter /* `const' makes AIX compiler fail. */ 188266525Speter char *p = pattern; 188366525Speter#else 188417721Speter const char *p = pattern; 188566525Speter#endif 188617721Speter const char *pend = pattern + size; 188725839Speter 188817721Speter /* How to translate the characters in the pattern. */ 188966525Speter RE_TRANSLATE_TYPE translate = bufp->translate; 189017721Speter 189117721Speter /* Address of the count-byte of the most recently inserted `exactn' 189217721Speter command. This makes it possible to tell if a new exact-match 189317721Speter character can be added to that command or if the character requires 189417721Speter a new `exactn' command. */ 189517721Speter unsigned char *pending_exact = 0; 189617721Speter 189717721Speter /* Address of start of the most recently finished expression. 189817721Speter This tells, e.g., postfix * where to find the start of its 189917721Speter operand. Reset at the beginning of groups and alternatives. */ 190017721Speter unsigned char *laststart = 0; 190117721Speter 190217721Speter /* Address of beginning of regexp, or inside of last group. */ 190317721Speter unsigned char *begalt; 190417721Speter 190517721Speter /* Place in the uncompiled pattern (i.e., the {) to 190617721Speter which to go back if the interval is invalid. */ 190717721Speter const char *beg_interval; 190825839Speter 190917721Speter /* Address of the place where a forward jump should go to the end of 191066525Speter the containing expression. Each alternative of an `or' -- except the 191117721Speter last -- ends with a forward jump of this sort. */ 191217721Speter unsigned char *fixup_alt_jump = 0; 191317721Speter 191417721Speter /* Counts open-groups as they are encountered. Remembered for the 191517721Speter matching close-group on the compile stack, so the same register 191617721Speter number is put in the stop_memory as the start_memory. */ 191717721Speter regnum_t regnum = 0; 191817721Speter 191966525Speter /* Work area for range table of charset. */ 192066525Speter struct range_table_work_area range_table_work; 192166525Speter 192217721Speter#ifdef DEBUG 192317721Speter DEBUG_PRINT1 ("\nCompiling pattern: "); 192417721Speter if (debug) 192517721Speter { 192617721Speter unsigned debug_count; 192725839Speter 192817721Speter for (debug_count = 0; debug_count < size; debug_count++) 192966525Speter putchar (pattern[debug_count]); 193017721Speter putchar ('\n'); 193117721Speter } 193217721Speter#endif /* DEBUG */ 193317721Speter 193417721Speter /* Initialize the compile stack. */ 193517721Speter compile_stack.stack = TALLOC (INIT_COMPILE_STACK_SIZE, compile_stack_elt_t); 193617721Speter if (compile_stack.stack == NULL) 193717721Speter return REG_ESPACE; 193817721Speter 193917721Speter compile_stack.size = INIT_COMPILE_STACK_SIZE; 194017721Speter compile_stack.avail = 0; 194117721Speter 194266525Speter range_table_work.table = 0; 194366525Speter range_table_work.allocated = 0; 194466525Speter 194517721Speter /* Initialize the pattern buffer. */ 194617721Speter bufp->syntax = syntax; 194717721Speter bufp->fastmap_accurate = 0; 194817721Speter bufp->not_bol = bufp->not_eol = 0; 194917721Speter 195017721Speter /* Set `used' to zero, so that if we return an error, the pattern 195117721Speter printer (for debugging) will think there's no pattern. We reset it 195217721Speter at the end. */ 195317721Speter bufp->used = 0; 195425839Speter 195517721Speter /* Always count groups, whether or not bufp->no_sub is set. */ 195625839Speter bufp->re_nsub = 0; 195717721Speter 195866525Speter#ifdef emacs 195966525Speter /* bufp->multibyte is set before regex_compile is called, so don't alter 196066525Speter it. */ 196166525Speter#else /* not emacs */ 196266525Speter /* Nothing is recognized as a multibyte character. */ 196366525Speter bufp->multibyte = 0; 196466525Speter#endif 196566525Speter 196617721Speter#if !defined (emacs) && !defined (SYNTAX_TABLE) 196717721Speter /* Initialize the syntax table. */ 196817721Speter init_syntax_once (); 196917721Speter#endif 197017721Speter 197117721Speter if (bufp->allocated == 0) 197217721Speter { 197317721Speter if (bufp->buffer) 197417721Speter { /* If zero allocated, but buffer is non-null, try to realloc 197566525Speter enough space. This loses if buffer's address is bogus, but 197666525Speter that is the user's responsibility. */ 197766525Speter RETALLOC (bufp->buffer, INIT_BUF_SIZE, unsigned char); 197866525Speter } 197917721Speter else 198066525Speter { /* Caller did not allocate a buffer. Do it for them. */ 198166525Speter bufp->buffer = TALLOC (INIT_BUF_SIZE, unsigned char); 198266525Speter } 198366525Speter if (!bufp->buffer) FREE_STACK_RETURN (REG_ESPACE); 198417721Speter 198517721Speter bufp->allocated = INIT_BUF_SIZE; 198617721Speter } 198717721Speter 198817721Speter begalt = b = bufp->buffer; 198917721Speter 199017721Speter /* Loop through the uncompiled pattern until we're at the end. */ 199117721Speter while (p != pend) 199217721Speter { 199317721Speter PATFETCH (c); 199417721Speter 199517721Speter switch (c) 199666525Speter { 199766525Speter case '^': 199866525Speter { 199966525Speter if ( /* If at start of pattern, it's an operator. */ 200066525Speter p == pattern + 1 200166525Speter /* If context independent, it's an operator. */ 200266525Speter || syntax & RE_CONTEXT_INDEP_ANCHORS 200366525Speter /* Otherwise, depends on what's come before. */ 200466525Speter || at_begline_loc_p (pattern, p, syntax)) 200566525Speter BUF_PUSH (begline); 200666525Speter else 200766525Speter goto normal_char; 200866525Speter } 200966525Speter break; 201017721Speter 201117721Speter 201266525Speter case '$': 201366525Speter { 201466525Speter if ( /* If at end of pattern, it's an operator. */ 201566525Speter p == pend 201666525Speter /* If context independent, it's an operator. */ 201766525Speter || syntax & RE_CONTEXT_INDEP_ANCHORS 201866525Speter /* Otherwise, depends on what's next. */ 201966525Speter || at_endline_loc_p (p, pend, syntax)) 202066525Speter BUF_PUSH (endline); 202166525Speter else 202266525Speter goto normal_char; 202366525Speter } 202466525Speter break; 202517721Speter 202617721Speter 202717721Speter case '+': 202866525Speter case '?': 202966525Speter if ((syntax & RE_BK_PLUS_QM) 203066525Speter || (syntax & RE_LIMITED_OPS)) 203166525Speter goto normal_char; 203266525Speter handle_plus: 203366525Speter case '*': 203466525Speter /* If there is no previous pattern... */ 203566525Speter if (!laststart) 203666525Speter { 203766525Speter if (syntax & RE_CONTEXT_INVALID_OPS) 203866525Speter FREE_STACK_RETURN (REG_BADRPT); 203966525Speter else if (!(syntax & RE_CONTEXT_INDEP_OPS)) 204066525Speter goto normal_char; 204166525Speter } 204217721Speter 204366525Speter { 204466525Speter /* Are we optimizing this jump? */ 204566525Speter boolean keep_string_p = false; 204625839Speter 204766525Speter /* 1 means zero (many) matches is allowed. */ 204866525Speter char zero_times_ok = 0, many_times_ok = 0; 204917721Speter 205066525Speter /* If there is a sequence of repetition chars, collapse it 205166525Speter down to just one (the right one). We can't combine 205266525Speter interval operators with these because of, e.g., `a{2}*', 205366525Speter which should only match an even number of `a's. */ 205417721Speter 205566525Speter for (;;) 205666525Speter { 205766525Speter zero_times_ok |= c != '+'; 205866525Speter many_times_ok |= c != '?'; 205917721Speter 206066525Speter if (p == pend) 206166525Speter break; 206217721Speter 206366525Speter PATFETCH (c); 206417721Speter 206566525Speter if (c == '*' 206666525Speter || (!(syntax & RE_BK_PLUS_QM) && (c == '+' || c == '?'))) 206766525Speter ; 206817721Speter 206966525Speter else if (syntax & RE_BK_PLUS_QM && c == '\\') 207066525Speter { 207166525Speter if (p == pend) FREE_STACK_RETURN (REG_EESCAPE); 207217721Speter 207366525Speter PATFETCH (c1); 207466525Speter if (!(c1 == '+' || c1 == '?')) 207566525Speter { 207666525Speter PATUNFETCH; 207766525Speter PATUNFETCH; 207866525Speter break; 207966525Speter } 208017721Speter 208166525Speter c = c1; 208266525Speter } 208366525Speter else 208466525Speter { 208566525Speter PATUNFETCH; 208666525Speter break; 208766525Speter } 208817721Speter 208966525Speter /* If we get here, we found another repeat character. */ 209066525Speter } 209117721Speter 209266525Speter /* Star, etc. applied to an empty pattern is equivalent 209366525Speter to an empty pattern. */ 209466525Speter if (!laststart) 209566525Speter break; 209617721Speter 209766525Speter /* Now we know whether or not zero matches is allowed 209866525Speter and also whether or not two or more matches is allowed. */ 209966525Speter if (many_times_ok) 210066525Speter { /* More than one repetition is allowed, so put in at the 210166525Speter end a backward relative jump from `b' to before the next 210266525Speter jump we're going to put in below (which jumps from 210366525Speter laststart to after this jump). 210417721Speter 210566525Speter But if we are at the `*' in the exact sequence `.*\n', 210666525Speter insert an unconditional jump backwards to the ., 210766525Speter instead of the beginning of the loop. This way we only 210866525Speter push a failure point once, instead of every time 210966525Speter through the loop. */ 211066525Speter assert (p - 1 > pattern); 211117721Speter 211266525Speter /* Allocate the space for the jump. */ 211366525Speter GET_BUFFER_SPACE (3); 211417721Speter 211566525Speter /* We know we are not at the first character of the pattern, 211666525Speter because laststart was nonzero. And we've already 211766525Speter incremented `p', by the way, to be the character after 211866525Speter the `*'. Do we have to do something analogous here 211966525Speter for null bytes, because of RE_DOT_NOT_NULL? */ 212066525Speter if (TRANSLATE ((unsigned char)*(p - 2)) == TRANSLATE ('.') 212117721Speter && zero_times_ok 212266525Speter && p < pend 212366525Speter && TRANSLATE ((unsigned char)*p) == TRANSLATE ('\n') 212466525Speter && !(syntax & RE_DOT_NEWLINE)) 212566525Speter { /* We have .*\n. */ 212666525Speter STORE_JUMP (jump, b, laststart); 212766525Speter keep_string_p = true; 212866525Speter } 212966525Speter else 213066525Speter /* Anything else. */ 213166525Speter STORE_JUMP (maybe_pop_jump, b, laststart - 3); 213217721Speter 213366525Speter /* We've added more stuff to the buffer. */ 213466525Speter b += 3; 213566525Speter } 213617721Speter 213766525Speter /* On failure, jump from laststart to b + 3, which will be the 213866525Speter end of the buffer after this jump is inserted. */ 213966525Speter GET_BUFFER_SPACE (3); 214066525Speter INSERT_JUMP (keep_string_p ? on_failure_keep_string_jump 214166525Speter : on_failure_jump, 214266525Speter laststart, b + 3); 214366525Speter pending_exact = 0; 214466525Speter b += 3; 214517721Speter 214666525Speter if (!zero_times_ok) 214766525Speter { 214866525Speter /* At least one repetition is required, so insert a 214966525Speter `dummy_failure_jump' before the initial 215066525Speter `on_failure_jump' instruction of the loop. This 215166525Speter effects a skip over that instruction the first time 215266525Speter we hit that loop. */ 215366525Speter GET_BUFFER_SPACE (3); 215466525Speter INSERT_JUMP (dummy_failure_jump, laststart, laststart + 6); 215566525Speter b += 3; 215666525Speter } 215766525Speter } 215817721Speter break; 215917721Speter 216017721Speter 216117721Speter case '.': 216266525Speter laststart = b; 216366525Speter BUF_PUSH (anychar); 216466525Speter break; 216517721Speter 216617721Speter 216766525Speter case '[': 216866525Speter { 216966525Speter CLEAR_RANGE_TABLE_WORK_USED (range_table_work); 217017721Speter 217166525Speter if (p == pend) FREE_STACK_RETURN (REG_EBRACK); 217217721Speter 217366525Speter /* Ensure that we have enough space to push a charset: the 217466525Speter opcode, the length count, and the bitset; 34 bytes in all. */ 217517721Speter GET_BUFFER_SPACE (34); 217617721Speter 217766525Speter laststart = b; 217817721Speter 217966525Speter /* We test `*p == '^' twice, instead of using an if 218066525Speter statement, so we only need one BUF_PUSH. */ 218166525Speter BUF_PUSH (*p == '^' ? charset_not : charset); 218266525Speter if (*p == '^') 218366525Speter p++; 218417721Speter 218566525Speter /* Remember the first position in the bracket expression. */ 218666525Speter p1 = p; 218717721Speter 218866525Speter /* Push the number of bytes in the bitmap. */ 218966525Speter BUF_PUSH ((1 << BYTEWIDTH) / BYTEWIDTH); 219017721Speter 219166525Speter /* Clear the whole map. */ 219266525Speter bzero (b, (1 << BYTEWIDTH) / BYTEWIDTH); 219317721Speter 219466525Speter /* charset_not matches newline according to a syntax bit. */ 219566525Speter if ((re_opcode_t) b[-2] == charset_not 219666525Speter && (syntax & RE_HAT_LISTS_NOT_NEWLINE)) 219766525Speter SET_LIST_BIT ('\n'); 219817721Speter 219966525Speter /* Read in characters and ranges, setting map bits. */ 220066525Speter for (;;) 220166525Speter { 220266525Speter int len; 220366525Speter boolean escaped_char = false; 220417721Speter 220566525Speter if (p == pend) FREE_STACK_RETURN (REG_EBRACK); 220617721Speter 220766525Speter PATFETCH (c); 220817721Speter 220966525Speter /* \ might escape characters inside [...] and [^...]. */ 221066525Speter if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\') 221166525Speter { 221266525Speter if (p == pend) FREE_STACK_RETURN (REG_EESCAPE); 221317721Speter 221466525Speter PATFETCH (c); 221566525Speter escaped_char = true; 221666525Speter } 221766525Speter else 221866525Speter { 221966525Speter /* Could be the end of the bracket expression. If it's 222066525Speter not (i.e., when the bracket expression is `[]' so 222166525Speter far), the ']' character bit gets set way below. */ 222266525Speter if (c == ']' && p != p1 + 1) 222366525Speter break; 222466525Speter } 222517721Speter 222666525Speter /* If C indicates start of multibyte char, get the 222766525Speter actual character code in C, and set the pattern 222866525Speter pointer P to the next character boundary. */ 222966525Speter if (bufp->multibyte && BASE_LEADING_CODE_P (c)) 223066525Speter { 223166525Speter PATUNFETCH; 223266525Speter c = STRING_CHAR_AND_LENGTH (p, pend - p, len); 223366525Speter p += len; 223466525Speter } 223566525Speter /* What should we do for the character which is 223666525Speter greater than 0x7F, but not BASE_LEADING_CODE_P? 223766525Speter XXX */ 223817721Speter 223966525Speter /* See if we're at the beginning of a possible character 224066525Speter class. */ 224117721Speter 224266525Speter else if (!escaped_char && 224366525Speter syntax & RE_CHAR_CLASSES && c == '[' && *p == ':') 224466525Speter { 224566525Speter /* Leave room for the null. */ 224666525Speter char str[CHAR_CLASS_MAX_LENGTH + 1]; 224717721Speter 224866525Speter PATFETCH (c); 224966525Speter c1 = 0; 225025839Speter 225166525Speter /* If pattern is `[[:'. */ 225266525Speter if (p == pend) FREE_STACK_RETURN (REG_EBRACK); 225317721Speter 225466525Speter for (;;) 225566525Speter { 225666525Speter PATFETCH (c); 225766525Speter if (c == ':' || c == ']' || p == pend 225866525Speter || c1 == CHAR_CLASS_MAX_LENGTH) 225966525Speter break; 226066525Speter str[c1++] = c; 226166525Speter } 226266525Speter str[c1] = '\0'; 226317721Speter 226466525Speter /* If isn't a word bracketed by `[:' and `:]': 226566525Speter undo the ending character, the letters, and 226666525Speter leave the leading `:' and `[' (but set bits for 226766525Speter them). */ 226866525Speter if (c == ':' && *p == ']') 226966525Speter { 227066525Speter int ch; 227166525Speter boolean is_alnum = STREQ (str, "alnum"); 227266525Speter boolean is_alpha = STREQ (str, "alpha"); 227366525Speter boolean is_blank = STREQ (str, "blank"); 227466525Speter boolean is_cntrl = STREQ (str, "cntrl"); 227566525Speter boolean is_digit = STREQ (str, "digit"); 227666525Speter boolean is_graph = STREQ (str, "graph"); 227766525Speter boolean is_lower = STREQ (str, "lower"); 227866525Speter boolean is_print = STREQ (str, "print"); 227966525Speter boolean is_punct = STREQ (str, "punct"); 228066525Speter boolean is_space = STREQ (str, "space"); 228166525Speter boolean is_upper = STREQ (str, "upper"); 228266525Speter boolean is_xdigit = STREQ (str, "xdigit"); 228317721Speter 228466525Speter if (!IS_CHAR_CLASS (str)) 228566525Speter FREE_STACK_RETURN (REG_ECTYPE); 228617721Speter 228766525Speter /* Throw away the ] at the end of the character 228866525Speter class. */ 228966525Speter PATFETCH (c); 229017721Speter 229166525Speter if (p == pend) FREE_STACK_RETURN (REG_EBRACK); 229217721Speter 229366525Speter for (ch = 0; ch < 1 << BYTEWIDTH; ch++) 229466525Speter { 229566525Speter int translated = TRANSLATE (ch); 229666525Speter /* This was split into 3 if's to 229766525Speter avoid an arbitrary limit in some compiler. */ 229866525Speter if ( (is_alnum && ISALNUM (ch)) 229966525Speter || (is_alpha && ISALPHA (ch)) 230066525Speter || (is_blank && ISBLANK (ch)) 230166525Speter || (is_cntrl && ISCNTRL (ch))) 230266525Speter SET_LIST_BIT (translated); 230366525Speter if ( (is_digit && ISDIGIT (ch)) 230466525Speter || (is_graph && ISGRAPH (ch)) 230566525Speter || (is_lower && ISLOWER (ch)) 230666525Speter || (is_print && ISPRINT (ch))) 230766525Speter SET_LIST_BIT (translated); 230866525Speter if ( (is_punct && ISPUNCT (ch)) 230966525Speter || (is_space && ISSPACE (ch)) 231066525Speter || (is_upper && ISUPPER (ch)) 231166525Speter || (is_xdigit && ISXDIGIT (ch))) 231266525Speter SET_LIST_BIT (translated); 231366525Speter } 231425839Speter 231566525Speter /* Repeat the loop. */ 231666525Speter continue; 231766525Speter } 231866525Speter else 231966525Speter { 232066525Speter c1++; 232166525Speter while (c1--) 232266525Speter PATUNFETCH; 232366525Speter SET_LIST_BIT ('['); 232417721Speter 232566525Speter /* Because the `:' may starts the range, we 232666525Speter can't simply set bit and repeat the loop. 232766525Speter Instead, just set it to C and handle below. */ 232866525Speter c = ':'; 232966525Speter } 233066525Speter } 233117721Speter 233266525Speter if (p < pend && p[0] == '-' && p[1] != ']') 233366525Speter { 233417721Speter 233566525Speter /* Discard the `-'. */ 233666525Speter PATFETCH (c1); 233717721Speter 233866525Speter /* Fetch the character which ends the range. */ 233966525Speter PATFETCH (c1); 234066525Speter if (bufp->multibyte && BASE_LEADING_CODE_P (c1)) 234166525Speter { 234266525Speter PATUNFETCH; 234366525Speter c1 = STRING_CHAR_AND_LENGTH (p, pend - p, len); 234466525Speter p += len; 234566525Speter } 234617721Speter 234766525Speter if (SINGLE_BYTE_CHAR_P (c) 234866525Speter && ! SINGLE_BYTE_CHAR_P (c1)) 234966525Speter { 235066525Speter /* Handle a range such as \177-\377 in multibyte mode. 235166525Speter Split that into two ranges,, 235266525Speter the low one ending at 0237, and the high one 235366525Speter starting at ...040. */ 235466525Speter int c1_base = (c1 & ~0177) | 040; 235566525Speter SET_RANGE_TABLE_WORK_AREA (range_table_work, c, c1); 235666525Speter c1 = 0237; 235766525Speter } 235866525Speter else if (!SAME_CHARSET_P (c, c1)) 235966525Speter FREE_STACK_RETURN (REG_ERANGE); 236066525Speter } 236166525Speter else 236266525Speter /* Range from C to C. */ 236366525Speter c1 = c; 236417721Speter 236566525Speter /* Set the range ... */ 236666525Speter if (SINGLE_BYTE_CHAR_P (c)) 236766525Speter /* ... into bitmap. */ 236866525Speter { 236966525Speter unsigned this_char; 237066525Speter int range_start = c, range_end = c1; 237166525Speter 237266525Speter /* If the start is after the end, the range is empty. */ 237366525Speter if (range_start > range_end) 237466525Speter { 237566525Speter if (syntax & RE_NO_EMPTY_RANGES) 237666525Speter FREE_STACK_RETURN (REG_ERANGE); 237766525Speter /* Else, repeat the loop. */ 237866525Speter } 237966525Speter else 238066525Speter { 238166525Speter for (this_char = range_start; this_char <= range_end; 238266525Speter this_char++) 238366525Speter SET_LIST_BIT (TRANSLATE (this_char)); 238466525Speter } 238566525Speter } 238666525Speter else 238766525Speter /* ... into range table. */ 238866525Speter SET_RANGE_TABLE_WORK_AREA (range_table_work, c, c1); 238966525Speter } 239066525Speter 239166525Speter /* Discard any (non)matching list bytes that are all 0 at the 239266525Speter end of the map. Decrease the map-length byte too. */ 239366525Speter while ((int) b[-1] > 0 && b[b[-1] - 1] == 0) 239466525Speter b[-1]--; 239566525Speter b += b[-1]; 239666525Speter 239766525Speter /* Build real range table from work area. */ 239866525Speter if (RANGE_TABLE_WORK_USED (range_table_work)) 239966525Speter { 240066525Speter int i; 240166525Speter int used = RANGE_TABLE_WORK_USED (range_table_work); 240266525Speter 240366525Speter /* Allocate space for COUNT + RANGE_TABLE. Needs two 240466525Speter bytes for COUNT and three bytes for each character. */ 240566525Speter GET_BUFFER_SPACE (2 + used * 3); 240666525Speter 240766525Speter /* Indicate the existence of range table. */ 240866525Speter laststart[1] |= 0x80; 240966525Speter 241066525Speter STORE_NUMBER_AND_INCR (b, used / 2); 241166525Speter for (i = 0; i < used; i++) 241266525Speter STORE_CHARACTER_AND_INCR 241366525Speter (b, RANGE_TABLE_WORK_ELT (range_table_work, i)); 241466525Speter } 241566525Speter } 241666525Speter break; 241766525Speter 241866525Speter 241917721Speter case '(': 242066525Speter if (syntax & RE_NO_BK_PARENS) 242166525Speter goto handle_open; 242266525Speter else 242366525Speter goto normal_char; 242417721Speter 242517721Speter 242666525Speter case ')': 242766525Speter if (syntax & RE_NO_BK_PARENS) 242866525Speter goto handle_close; 242966525Speter else 243066525Speter goto normal_char; 243117721Speter 243217721Speter 243366525Speter case '\n': 243466525Speter if (syntax & RE_NEWLINE_ALT) 243566525Speter goto handle_alt; 243666525Speter else 243766525Speter goto normal_char; 243817721Speter 243917721Speter 244017721Speter case '|': 244166525Speter if (syntax & RE_NO_BK_VBAR) 244266525Speter goto handle_alt; 244366525Speter else 244466525Speter goto normal_char; 244517721Speter 244617721Speter 244766525Speter case '{': 244866525Speter if (syntax & RE_INTERVALS && syntax & RE_NO_BK_BRACES) 244966525Speter goto handle_interval; 245066525Speter else 245166525Speter goto normal_char; 245217721Speter 245317721Speter 245466525Speter case '\\': 245566525Speter if (p == pend) FREE_STACK_RETURN (REG_EESCAPE); 245617721Speter 245766525Speter /* Do not translate the character after the \, so that we can 245866525Speter distinguish, e.g., \B from \b, even if we normally would 245966525Speter translate, e.g., B to b. */ 246066525Speter PATFETCH_RAW (c); 246117721Speter 246266525Speter switch (c) 246366525Speter { 246466525Speter case '(': 246566525Speter if (syntax & RE_NO_BK_PARENS) 246666525Speter goto normal_backslash; 246717721Speter 246866525Speter handle_open: 246966525Speter bufp->re_nsub++; 247066525Speter regnum++; 247117721Speter 247266525Speter if (COMPILE_STACK_FULL) 247366525Speter { 247466525Speter RETALLOC (compile_stack.stack, compile_stack.size << 1, 247566525Speter compile_stack_elt_t); 247666525Speter if (compile_stack.stack == NULL) return REG_ESPACE; 247717721Speter 247866525Speter compile_stack.size <<= 1; 247966525Speter } 248017721Speter 248166525Speter /* These are the values to restore when we hit end of this 248266525Speter group. They are all relative offsets, so that if the 248366525Speter whole pattern moves because of realloc, they will still 248466525Speter be valid. */ 248566525Speter COMPILE_STACK_TOP.begalt_offset = begalt - bufp->buffer; 248666525Speter COMPILE_STACK_TOP.fixup_alt_jump 248766525Speter = fixup_alt_jump ? fixup_alt_jump - bufp->buffer + 1 : 0; 248866525Speter COMPILE_STACK_TOP.laststart_offset = b - bufp->buffer; 248966525Speter COMPILE_STACK_TOP.regnum = regnum; 249017721Speter 249166525Speter /* We will eventually replace the 0 with the number of 249266525Speter groups inner to this one. But do not push a 249366525Speter start_memory for groups beyond the last one we can 249466525Speter represent in the compiled pattern. */ 249566525Speter if (regnum <= MAX_REGNUM) 249666525Speter { 249766525Speter COMPILE_STACK_TOP.inner_group_offset = b - bufp->buffer + 2; 249866525Speter BUF_PUSH_3 (start_memory, regnum, 0); 249966525Speter } 250025839Speter 250166525Speter compile_stack.avail++; 250217721Speter 250366525Speter fixup_alt_jump = 0; 250466525Speter laststart = 0; 250566525Speter begalt = b; 250617721Speter /* If we've reached MAX_REGNUM groups, then this open 250717721Speter won't actually generate any code, so we'll have to 250817721Speter clear pending_exact explicitly. */ 250917721Speter pending_exact = 0; 251066525Speter break; 251117721Speter 251217721Speter 251366525Speter case ')': 251466525Speter if (syntax & RE_NO_BK_PARENS) goto normal_backslash; 251517721Speter 251666525Speter if (COMPILE_STACK_EMPTY) 251766525Speter if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD) 251866525Speter goto normal_backslash; 251966525Speter else 252066525Speter FREE_STACK_RETURN (REG_ERPAREN); 252117721Speter 252266525Speter handle_close: 252366525Speter if (fixup_alt_jump) 252466525Speter { /* Push a dummy failure point at the end of the 252566525Speter alternative for a possible future 252666525Speter `pop_failure_jump' to pop. See comments at 252766525Speter `push_dummy_failure' in `re_match_2'. */ 252866525Speter BUF_PUSH (push_dummy_failure); 252925839Speter 253066525Speter /* We allocated space for this jump when we assigned 253166525Speter to `fixup_alt_jump', in the `handle_alt' case below. */ 253266525Speter STORE_JUMP (jump_past_alt, fixup_alt_jump, b - 1); 253366525Speter } 253417721Speter 253566525Speter /* See similar code for backslashed left paren above. */ 253666525Speter if (COMPILE_STACK_EMPTY) 253766525Speter if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD) 253866525Speter goto normal_char; 253966525Speter else 254066525Speter FREE_STACK_RETURN (REG_ERPAREN); 254117721Speter 254266525Speter /* Since we just checked for an empty stack above, this 254366525Speter ``can't happen''. */ 254466525Speter assert (compile_stack.avail != 0); 254566525Speter { 254666525Speter /* We don't just want to restore into `regnum', because 254766525Speter later groups should continue to be numbered higher, 254866525Speter as in `(ab)c(de)' -- the second group is #2. */ 254966525Speter regnum_t this_group_regnum; 255017721Speter 255166525Speter compile_stack.avail--; 255266525Speter begalt = bufp->buffer + COMPILE_STACK_TOP.begalt_offset; 255366525Speter fixup_alt_jump 255466525Speter = COMPILE_STACK_TOP.fixup_alt_jump 255566525Speter ? bufp->buffer + COMPILE_STACK_TOP.fixup_alt_jump - 1 255666525Speter : 0; 255766525Speter laststart = bufp->buffer + COMPILE_STACK_TOP.laststart_offset; 255866525Speter this_group_regnum = COMPILE_STACK_TOP.regnum; 255917721Speter /* If we've reached MAX_REGNUM groups, then this open 256017721Speter won't actually generate any code, so we'll have to 256117721Speter clear pending_exact explicitly. */ 256217721Speter pending_exact = 0; 256317721Speter 256466525Speter /* We're at the end of the group, so now we know how many 256566525Speter groups were inside this one. */ 256666525Speter if (this_group_regnum <= MAX_REGNUM) 256766525Speter { 256866525Speter unsigned char *inner_group_loc 256966525Speter = bufp->buffer + COMPILE_STACK_TOP.inner_group_offset; 257025839Speter 257166525Speter *inner_group_loc = regnum - this_group_regnum; 257266525Speter BUF_PUSH_3 (stop_memory, this_group_regnum, 257366525Speter regnum - this_group_regnum); 257466525Speter } 257566525Speter } 257666525Speter break; 257717721Speter 257817721Speter 257966525Speter case '|': /* `\|'. */ 258066525Speter if (syntax & RE_LIMITED_OPS || syntax & RE_NO_BK_VBAR) 258166525Speter goto normal_backslash; 258266525Speter handle_alt: 258366525Speter if (syntax & RE_LIMITED_OPS) 258466525Speter goto normal_char; 258517721Speter 258666525Speter /* Insert before the previous alternative a jump which 258766525Speter jumps to this alternative if the former fails. */ 258866525Speter GET_BUFFER_SPACE (3); 258966525Speter INSERT_JUMP (on_failure_jump, begalt, b + 6); 259066525Speter pending_exact = 0; 259166525Speter b += 3; 259217721Speter 259366525Speter /* The alternative before this one has a jump after it 259466525Speter which gets executed if it gets matched. Adjust that 259566525Speter jump so it will jump to this alternative's analogous 259666525Speter jump (put in below, which in turn will jump to the next 259766525Speter (if any) alternative's such jump, etc.). The last such 259866525Speter jump jumps to the correct final destination. A picture: 259966525Speter _____ _____ 260066525Speter | | | | 260166525Speter | v | v 260266525Speter a | b | c 260317721Speter 260466525Speter If we are at `b', then fixup_alt_jump right now points to a 260566525Speter three-byte space after `a'. We'll put in the jump, set 260666525Speter fixup_alt_jump to right after `b', and leave behind three 260766525Speter bytes which we'll fill in when we get to after `c'. */ 260817721Speter 260966525Speter if (fixup_alt_jump) 261066525Speter STORE_JUMP (jump_past_alt, fixup_alt_jump, b); 261117721Speter 261266525Speter /* Mark and leave space for a jump after this alternative, 261366525Speter to be filled in later either by next alternative or 261466525Speter when know we're at the end of a series of alternatives. */ 261566525Speter fixup_alt_jump = b; 261666525Speter GET_BUFFER_SPACE (3); 261766525Speter b += 3; 261817721Speter 261966525Speter laststart = 0; 262066525Speter begalt = b; 262166525Speter break; 262217721Speter 262317721Speter 262466525Speter case '{': 262566525Speter /* If \{ is a literal. */ 262666525Speter if (!(syntax & RE_INTERVALS) 262766525Speter /* If we're at `\{' and it's not the open-interval 262866525Speter operator. */ 262966525Speter || ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES)) 263066525Speter || (p - 2 == pattern && p == pend)) 263166525Speter goto normal_backslash; 263217721Speter 263366525Speter handle_interval: 263466525Speter { 263566525Speter /* If got here, then the syntax allows intervals. */ 263617721Speter 263766525Speter /* At least (most) this many matches must be made. */ 263866525Speter int lower_bound = -1, upper_bound = -1; 263917721Speter 264066525Speter beg_interval = p - 1; 264117721Speter 264266525Speter if (p == pend) 264366525Speter { 264466525Speter if (syntax & RE_NO_BK_BRACES) 264566525Speter goto unfetch_interval; 264666525Speter else 264766525Speter FREE_STACK_RETURN (REG_EBRACE); 264866525Speter } 264917721Speter 265066525Speter GET_UNSIGNED_NUMBER (lower_bound); 265117721Speter 265266525Speter if (c == ',') 265366525Speter { 265466525Speter GET_UNSIGNED_NUMBER (upper_bound); 265566525Speter if (upper_bound < 0) upper_bound = RE_DUP_MAX; 265666525Speter } 265766525Speter else 265866525Speter /* Interval such as `{1}' => match exactly once. */ 265966525Speter upper_bound = lower_bound; 266017721Speter 266166525Speter if (lower_bound < 0 || upper_bound > RE_DUP_MAX 266266525Speter || lower_bound > upper_bound) 266366525Speter { 266466525Speter if (syntax & RE_NO_BK_BRACES) 266566525Speter goto unfetch_interval; 266666525Speter else 266766525Speter FREE_STACK_RETURN (REG_BADBR); 266866525Speter } 266917721Speter 267066525Speter if (!(syntax & RE_NO_BK_BRACES)) 267166525Speter { 267266525Speter if (c != '\\') FREE_STACK_RETURN (REG_EBRACE); 267317721Speter 267466525Speter PATFETCH (c); 267566525Speter } 267617721Speter 267766525Speter if (c != '}') 267866525Speter { 267966525Speter if (syntax & RE_NO_BK_BRACES) 268066525Speter goto unfetch_interval; 268166525Speter else 268266525Speter FREE_STACK_RETURN (REG_BADBR); 268366525Speter } 268417721Speter 268566525Speter /* We just parsed a valid interval. */ 268617721Speter 268766525Speter /* If it's invalid to have no preceding re. */ 268866525Speter if (!laststart) 268966525Speter { 269066525Speter if (syntax & RE_CONTEXT_INVALID_OPS) 269166525Speter FREE_STACK_RETURN (REG_BADRPT); 269266525Speter else if (syntax & RE_CONTEXT_INDEP_OPS) 269366525Speter laststart = b; 269466525Speter else 269566525Speter goto unfetch_interval; 269666525Speter } 269717721Speter 269866525Speter /* If the upper bound is zero, don't want to succeed at 269966525Speter all; jump from `laststart' to `b + 3', which will be 270066525Speter the end of the buffer after we insert the jump. */ 270166525Speter if (upper_bound == 0) 270266525Speter { 270366525Speter GET_BUFFER_SPACE (3); 270466525Speter INSERT_JUMP (jump, laststart, b + 3); 270566525Speter b += 3; 270666525Speter } 270717721Speter 270866525Speter /* Otherwise, we have a nontrivial interval. When 270966525Speter we're all done, the pattern will look like: 271066525Speter set_number_at <jump count> <upper bound> 271166525Speter set_number_at <succeed_n count> <lower bound> 271266525Speter succeed_n <after jump addr> <succeed_n count> 271366525Speter <body of loop> 271466525Speter jump_n <succeed_n addr> <jump count> 271566525Speter (The upper bound and `jump_n' are omitted if 271666525Speter `upper_bound' is 1, though.) */ 271766525Speter else 271866525Speter { /* If the upper bound is > 1, we need to insert 271966525Speter more at the end of the loop. */ 272066525Speter unsigned nbytes = 10 + (upper_bound > 1) * 10; 272117721Speter 272266525Speter GET_BUFFER_SPACE (nbytes); 272317721Speter 272466525Speter /* Initialize lower bound of the `succeed_n', even 272566525Speter though it will be set during matching by its 272666525Speter attendant `set_number_at' (inserted next), 272766525Speter because `re_compile_fastmap' needs to know. 272866525Speter Jump to the `jump_n' we might insert below. */ 272966525Speter INSERT_JUMP2 (succeed_n, laststart, 273066525Speter b + 5 + (upper_bound > 1) * 5, 273166525Speter lower_bound); 273266525Speter b += 5; 273317721Speter 273466525Speter /* Code to initialize the lower bound. Insert 273566525Speter before the `succeed_n'. The `5' is the last two 273666525Speter bytes of this `set_number_at', plus 3 bytes of 273766525Speter the following `succeed_n'. */ 273866525Speter insert_op2 (set_number_at, laststart, 5, lower_bound, b); 273966525Speter b += 5; 274017721Speter 274166525Speter if (upper_bound > 1) 274266525Speter { /* More than one repetition is allowed, so 274366525Speter append a backward jump to the `succeed_n' 274466525Speter that starts this interval. 274525839Speter 274666525Speter When we've reached this during matching, 274766525Speter we'll have matched the interval once, so 274866525Speter jump back only `upper_bound - 1' times. */ 274966525Speter STORE_JUMP2 (jump_n, b, laststart + 5, 275066525Speter upper_bound - 1); 275166525Speter b += 5; 275217721Speter 275366525Speter /* The location we want to set is the second 275466525Speter parameter of the `jump_n'; that is `b-2' as 275566525Speter an absolute address. `laststart' will be 275666525Speter the `set_number_at' we're about to insert; 275766525Speter `laststart+3' the number to set, the source 275866525Speter for the relative address. But we are 275966525Speter inserting into the middle of the pattern -- 276066525Speter so everything is getting moved up by 5. 276166525Speter Conclusion: (b - 2) - (laststart + 3) + 5, 276266525Speter i.e., b - laststart. 276325839Speter 276466525Speter We insert this at the beginning of the loop 276566525Speter so that if we fail during matching, we'll 276666525Speter reinitialize the bounds. */ 276766525Speter insert_op2 (set_number_at, laststart, b - laststart, 276866525Speter upper_bound - 1, b); 276966525Speter b += 5; 277066525Speter } 277166525Speter } 277266525Speter pending_exact = 0; 277366525Speter beg_interval = NULL; 277466525Speter } 277566525Speter break; 277617721Speter 277766525Speter unfetch_interval: 277866525Speter /* If an invalid interval, match the characters as literals. */ 277966525Speter assert (beg_interval); 278066525Speter p = beg_interval; 278166525Speter beg_interval = NULL; 278217721Speter 278366525Speter /* normal_char and normal_backslash need `c'. */ 278466525Speter PATFETCH (c); 278517721Speter 278666525Speter if (!(syntax & RE_NO_BK_BRACES)) 278766525Speter { 278866525Speter if (p > pattern && p[-1] == '\\') 278966525Speter goto normal_backslash; 279066525Speter } 279166525Speter goto normal_char; 279217721Speter 279317721Speter#ifdef emacs 279466525Speter /* There is no way to specify the before_dot and after_dot 279566525Speter operators. rms says this is ok. --karl */ 279666525Speter case '=': 279766525Speter BUF_PUSH (at_dot); 279866525Speter break; 279917721Speter 280066525Speter case 's': 280166525Speter laststart = b; 280266525Speter PATFETCH (c); 280366525Speter BUF_PUSH_2 (syntaxspec, syntax_spec_code[c]); 280466525Speter break; 280517721Speter 280666525Speter case 'S': 280766525Speter laststart = b; 280866525Speter PATFETCH (c); 280966525Speter BUF_PUSH_2 (notsyntaxspec, syntax_spec_code[c]); 281066525Speter break; 281166525Speter 281266525Speter case 'c': 281366525Speter laststart = b; 281466525Speter PATFETCH_RAW (c); 281566525Speter BUF_PUSH_2 (categoryspec, c); 281666525Speter break; 281766525Speter 281866525Speter case 'C': 281966525Speter laststart = b; 282066525Speter PATFETCH_RAW (c); 282166525Speter BUF_PUSH_2 (notcategoryspec, c); 282266525Speter break; 282317721Speter#endif /* emacs */ 282417721Speter 282517721Speter 282666525Speter case 'w': 282766525Speter laststart = b; 282866525Speter BUF_PUSH (wordchar); 282966525Speter break; 283017721Speter 283117721Speter 283266525Speter case 'W': 283366525Speter laststart = b; 283466525Speter BUF_PUSH (notwordchar); 283566525Speter break; 283617721Speter 283717721Speter 283866525Speter case '<': 283966525Speter BUF_PUSH (wordbeg); 284066525Speter break; 284117721Speter 284266525Speter case '>': 284366525Speter BUF_PUSH (wordend); 284466525Speter break; 284517721Speter 284666525Speter case 'b': 284766525Speter BUF_PUSH (wordbound); 284866525Speter break; 284917721Speter 285066525Speter case 'B': 285166525Speter BUF_PUSH (notwordbound); 285266525Speter break; 285317721Speter 285466525Speter case '`': 285566525Speter BUF_PUSH (begbuf); 285666525Speter break; 285717721Speter 285866525Speter case '\'': 285966525Speter BUF_PUSH (endbuf); 286066525Speter break; 286117721Speter 286266525Speter case '1': case '2': case '3': case '4': case '5': 286366525Speter case '6': case '7': case '8': case '9': 286466525Speter if (syntax & RE_NO_BK_REFS) 286566525Speter goto normal_char; 286617721Speter 286766525Speter c1 = c - '0'; 286817721Speter 286966525Speter if (c1 > regnum) 287066525Speter FREE_STACK_RETURN (REG_ESUBREG); 287117721Speter 287266525Speter /* Can't back reference to a subexpression if inside of it. */ 287366525Speter if (group_in_compile_stack (compile_stack, c1)) 287466525Speter goto normal_char; 287517721Speter 287666525Speter laststart = b; 287766525Speter BUF_PUSH_2 (duplicate, c1); 287866525Speter break; 287917721Speter 288017721Speter 288166525Speter case '+': 288266525Speter case '?': 288366525Speter if (syntax & RE_BK_PLUS_QM) 288466525Speter goto handle_plus; 288566525Speter else 288666525Speter goto normal_backslash; 288717721Speter 288866525Speter default: 288966525Speter normal_backslash: 289066525Speter /* You might think it would be useful for \ to mean 289166525Speter not to translate; but if we don't translate it 289266525Speter it will never match anything. */ 289366525Speter c = TRANSLATE (c); 289466525Speter goto normal_char; 289566525Speter } 289666525Speter break; 289717721Speter 289817721Speter 289917721Speter default: 290066525Speter /* Expects the character in `c'. */ 290117721Speter normal_char: 290266525Speter p1 = p - 1; /* P1 points the head of C. */ 290366525Speter#ifdef emacs 290466525Speter if (bufp->multibyte) 290566525Speter { 290666525Speter c = STRING_CHAR (p1, pend - p1); 290766525Speter c = TRANSLATE (c); 290866525Speter /* Set P to the next character boundary. */ 290966525Speter p += MULTIBYTE_FORM_LENGTH (p1, pend - p1) - 1; 291066525Speter } 291166525Speter#endif 291217721Speter /* If no exactn currently being built. */ 291366525Speter if (!pending_exact 291417721Speter 291566525Speter /* If last exactn not at current position. */ 291666525Speter || pending_exact + *pending_exact + 1 != b 291725839Speter 291866525Speter /* We have only one byte following the exactn for the count. */ 291966525Speter || *pending_exact >= (1 << BYTEWIDTH) - (p - p1) 292017721Speter 292166525Speter /* If followed by a repetition operator. */ 292266525Speter || (p != pend && (*p == '*' || *p == '^')) 292317721Speter || ((syntax & RE_BK_PLUS_QM) 292466525Speter ? p + 1 < pend && *p == '\\' && (p[1] == '+' || p[1] == '?') 292566525Speter : p != pend && (*p == '+' || *p == '?')) 292617721Speter || ((syntax & RE_INTERVALS) 292766525Speter && ((syntax & RE_NO_BK_BRACES) 292866525Speter ? p != pend && *p == '{' 292966525Speter : p + 1 < pend && p[0] == '\\' && p[1] == '{'))) 293017721Speter { 293117721Speter /* Start building a new exactn. */ 293225839Speter 293366525Speter laststart = b; 293417721Speter 293517721Speter BUF_PUSH_2 (exactn, 0); 293617721Speter pending_exact = b - 1; 293766525Speter } 293825839Speter 293966525Speter#ifdef emacs 294066525Speter if (! SINGLE_BYTE_CHAR_P (c)) 294166525Speter { 294266525Speter unsigned char work[4], *str; 294366525Speter int i = CHAR_STRING (c, work, str); 294466525Speter int j; 294566525Speter for (j = 0; j < i; j++) 294666525Speter { 294766525Speter BUF_PUSH (str[j]); 294866525Speter (*pending_exact)++; 294966525Speter } 295066525Speter } 295166525Speter else 295266525Speter#endif 295366525Speter { 295466525Speter BUF_PUSH (c); 295566525Speter (*pending_exact)++; 295666525Speter } 295717721Speter break; 295866525Speter } /* switch (c) */ 295917721Speter } /* while p != pend */ 296017721Speter 296125839Speter 296217721Speter /* Through the pattern now. */ 296325839Speter 296417721Speter if (fixup_alt_jump) 296517721Speter STORE_JUMP (jump_past_alt, fixup_alt_jump, b); 296617721Speter 296725839Speter if (!COMPILE_STACK_EMPTY) 296866525Speter FREE_STACK_RETURN (REG_EPAREN); 296917721Speter 297066525Speter /* If we don't want backtracking, force success 297166525Speter the first time we reach the end of the compiled pattern. */ 297266525Speter if (syntax & RE_NO_POSIX_BACKTRACKING) 297366525Speter BUF_PUSH (succeed); 297466525Speter 297517721Speter free (compile_stack.stack); 297617721Speter 297717721Speter /* We have succeeded; set the length of the buffer. */ 297817721Speter bufp->used = b - bufp->buffer; 297917721Speter 298017721Speter#ifdef DEBUG 298117721Speter if (debug) 298217721Speter { 298366525Speter DEBUG_PRINT1 ("\nCompiled pattern: \n"); 298417721Speter print_compiled_pattern (bufp); 298517721Speter } 298617721Speter#endif /* DEBUG */ 298717721Speter 298866525Speter#ifndef MATCH_MAY_ALLOCATE 298966525Speter /* Initialize the failure stack to the largest possible stack. This 299066525Speter isn't necessary unless we're trying to avoid calling alloca in 299166525Speter the search and match routines. */ 299266525Speter { 299366525Speter int num_regs = bufp->re_nsub + 1; 299466525Speter 299566525Speter if (fail_stack.size < re_max_failures * TYPICAL_FAILURE_SIZE) 299666525Speter { 299766525Speter fail_stack.size = re_max_failures * TYPICAL_FAILURE_SIZE; 299866525Speter 299966525Speter#ifdef emacs 300066525Speter if (! fail_stack.stack) 300166525Speter fail_stack.stack 300266525Speter = (fail_stack_elt_t *) xmalloc (fail_stack.size 300366525Speter * sizeof (fail_stack_elt_t)); 300466525Speter else 300566525Speter fail_stack.stack 300666525Speter = (fail_stack_elt_t *) xrealloc (fail_stack.stack, 300766525Speter (fail_stack.size 300866525Speter * sizeof (fail_stack_elt_t))); 300966525Speter#else /* not emacs */ 301066525Speter if (! fail_stack.stack) 301166525Speter fail_stack.stack 301266525Speter = (fail_stack_elt_t *) malloc (fail_stack.size 301366525Speter * sizeof (fail_stack_elt_t)); 301466525Speter else 301566525Speter fail_stack.stack 301666525Speter = (fail_stack_elt_t *) realloc (fail_stack.stack, 301766525Speter (fail_stack.size 301866525Speter * sizeof (fail_stack_elt_t))); 301966525Speter#endif /* not emacs */ 302066525Speter } 302166525Speter 302266525Speter regex_grow_registers (num_regs); 302366525Speter } 302466525Speter#endif /* not MATCH_MAY_ALLOCATE */ 302566525Speter 302617721Speter return REG_NOERROR; 302717721Speter} /* regex_compile */ 302817721Speter 302917721Speter/* Subroutines for `regex_compile'. */ 303017721Speter 303166525Speter/* Store OP at LOC followed by two-byte integer parameter ARG. */ 303217721Speter 303317721Speterstatic void 303417721Speterstore_op1 (op, loc, arg) 303517721Speter re_opcode_t op; 303617721Speter unsigned char *loc; 303717721Speter int arg; 303817721Speter{ 303917721Speter *loc = (unsigned char) op; 304017721Speter STORE_NUMBER (loc + 1, arg); 304117721Speter} 304217721Speter 304317721Speter 304417721Speter/* Like `store_op1', but for two two-byte parameters ARG1 and ARG2. */ 304517721Speter 304617721Speterstatic void 304717721Speterstore_op2 (op, loc, arg1, arg2) 304817721Speter re_opcode_t op; 304917721Speter unsigned char *loc; 305017721Speter int arg1, arg2; 305117721Speter{ 305217721Speter *loc = (unsigned char) op; 305317721Speter STORE_NUMBER (loc + 1, arg1); 305417721Speter STORE_NUMBER (loc + 3, arg2); 305517721Speter} 305617721Speter 305717721Speter 305817721Speter/* Copy the bytes from LOC to END to open up three bytes of space at LOC 305917721Speter for OP followed by two-byte integer parameter ARG. */ 306017721Speter 306117721Speterstatic void 306217721Speterinsert_op1 (op, loc, arg, end) 306317721Speter re_opcode_t op; 306417721Speter unsigned char *loc; 306517721Speter int arg; 306625839Speter unsigned char *end; 306717721Speter{ 306817721Speter register unsigned char *pfrom = end; 306917721Speter register unsigned char *pto = end + 3; 307017721Speter 307117721Speter while (pfrom != loc) 307217721Speter *--pto = *--pfrom; 307325839Speter 307417721Speter store_op1 (op, loc, arg); 307517721Speter} 307617721Speter 307717721Speter 307817721Speter/* Like `insert_op1', but for two two-byte parameters ARG1 and ARG2. */ 307917721Speter 308017721Speterstatic void 308117721Speterinsert_op2 (op, loc, arg1, arg2, end) 308217721Speter re_opcode_t op; 308317721Speter unsigned char *loc; 308417721Speter int arg1, arg2; 308525839Speter unsigned char *end; 308617721Speter{ 308717721Speter register unsigned char *pfrom = end; 308817721Speter register unsigned char *pto = end + 5; 308917721Speter 309017721Speter while (pfrom != loc) 309117721Speter *--pto = *--pfrom; 309225839Speter 309317721Speter store_op2 (op, loc, arg1, arg2); 309417721Speter} 309517721Speter 309617721Speter 309717721Speter/* P points to just after a ^ in PATTERN. Return true if that ^ comes 309817721Speter after an alternative or a begin-subexpression. We assume there is at 309917721Speter least one character before the ^. */ 310017721Speter 310117721Speterstatic boolean 310217721Speterat_begline_loc_p (pattern, p, syntax) 310317721Speter const char *pattern, *p; 310417721Speter reg_syntax_t syntax; 310517721Speter{ 310617721Speter const char *prev = p - 2; 310717721Speter boolean prev_prev_backslash = prev > pattern && prev[-1] == '\\'; 310825839Speter 310917721Speter return 311017721Speter /* After a subexpression? */ 311117721Speter (*prev == '(' && (syntax & RE_NO_BK_PARENS || prev_prev_backslash)) 311266525Speter /* After an alternative? */ 311317721Speter || (*prev == '|' && (syntax & RE_NO_BK_VBAR || prev_prev_backslash)); 311417721Speter} 311517721Speter 311617721Speter 311717721Speter/* The dual of at_begline_loc_p. This one is for $. We assume there is 311817721Speter at least one character after the $, i.e., `P < PEND'. */ 311917721Speter 312017721Speterstatic boolean 312117721Speterat_endline_loc_p (p, pend, syntax) 312217721Speter const char *p, *pend; 312317721Speter int syntax; 312417721Speter{ 312517721Speter const char *next = p; 312617721Speter boolean next_backslash = *next == '\\'; 312766525Speter const char *next_next = p + 1 < pend ? p + 1 : 0; 312866525Speter 312917721Speter return 313017721Speter /* Before a subexpression? */ 313117721Speter (syntax & RE_NO_BK_PARENS ? *next == ')' 313266525Speter : next_backslash && next_next && *next_next == ')') 313317721Speter /* Before an alternative? */ 313417721Speter || (syntax & RE_NO_BK_VBAR ? *next == '|' 313566525Speter : next_backslash && next_next && *next_next == '|'); 313617721Speter} 313717721Speter 313817721Speter 313966525Speter/* Returns true if REGNUM is in one of COMPILE_STACK's elements and 314017721Speter false if it's not. */ 314117721Speter 314217721Speterstatic boolean 314317721Spetergroup_in_compile_stack (compile_stack, regnum) 314417721Speter compile_stack_type compile_stack; 314517721Speter regnum_t regnum; 314617721Speter{ 314717721Speter int this_element; 314817721Speter 314966525Speter for (this_element = compile_stack.avail - 1; 315066525Speter this_element >= 0; 315117721Speter this_element--) 315217721Speter if (compile_stack.stack[this_element].regnum == regnum) 315317721Speter return true; 315417721Speter 315517721Speter return false; 315617721Speter} 315717721Speter 315817721Speter/* re_compile_fastmap computes a ``fastmap'' for the compiled pattern in 315917721Speter BUFP. A fastmap records which of the (1 << BYTEWIDTH) possible 316017721Speter characters can start a string that matches the pattern. This fastmap 316117721Speter is used by re_search to skip quickly over impossible starting points. 316217721Speter 316317721Speter The caller must supply the address of a (1 << BYTEWIDTH)-byte data 316417721Speter area as BUFP->fastmap. 316525839Speter 316617721Speter We set the `fastmap', `fastmap_accurate', and `can_be_null' fields in 316717721Speter the pattern buffer. 316817721Speter 316917721Speter Returns 0 if we succeed, -2 if an internal error. */ 317017721Speter 317117721Speterint 317217721Speterre_compile_fastmap (bufp) 317317721Speter struct re_pattern_buffer *bufp; 317417721Speter{ 317566525Speter int i, j, k; 317666525Speter#ifdef MATCH_MAY_ALLOCATE 317717721Speter fail_stack_type fail_stack; 317866525Speter#endif 317917721Speter#ifndef REGEX_MALLOC 318017721Speter char *destination; 318117721Speter#endif 318217721Speter /* We don't push any register information onto the failure stack. */ 318317721Speter unsigned num_regs = 0; 318425839Speter 318517721Speter register char *fastmap = bufp->fastmap; 318617721Speter unsigned char *pattern = bufp->buffer; 318717721Speter unsigned long size = bufp->used; 318825839Speter unsigned char *p = pattern; 318917721Speter register unsigned char *pend = pattern + size; 319017721Speter 319166525Speter /* This holds the pointer to the failure stack, when 319266525Speter it is allocated relocatably. */ 319366525Speter fail_stack_elt_t *failure_stack_ptr; 319466525Speter 319517721Speter /* Assume that each path through the pattern can be null until 319666525Speter proven otherwise. We set this false at the bottom of switch 319717721Speter statement, to which we get only if a particular path doesn't 319817721Speter match the empty string. */ 319917721Speter boolean path_can_be_null = true; 320017721Speter 320117721Speter /* We aren't doing a `succeed_n' to begin with. */ 320217721Speter boolean succeed_n_p = false; 320317721Speter 320466525Speter /* If all elements for base leading-codes in fastmap is set, this 320566525Speter flag is set true. */ 320666525Speter boolean match_any_multibyte_characters = false; 320766525Speter 320866525Speter /* Maximum code of simple (single byte) character. */ 320966525Speter int simple_char_max; 321066525Speter 321117721Speter assert (fastmap != NULL && p != NULL); 321225839Speter 321317721Speter INIT_FAIL_STACK (); 321466525Speter bzero (fastmap, 1 << BYTEWIDTH); /* Assume nothing's valid. */ 321517721Speter bufp->fastmap_accurate = 1; /* It will be when we're done. */ 321617721Speter bufp->can_be_null = 0; 321725839Speter 321866525Speter while (1) 321917721Speter { 322066525Speter if (p == pend || *p == succeed) 322166525Speter { 322266525Speter /* We have reached the (effective) end of pattern. */ 322366525Speter if (!FAIL_STACK_EMPTY ()) 322466525Speter { 322566525Speter bufp->can_be_null |= path_can_be_null; 322666525Speter 322766525Speter /* Reset for next path. */ 322866525Speter path_can_be_null = true; 322966525Speter 323066525Speter p = fail_stack.stack[--fail_stack.avail].pointer; 323166525Speter 323266525Speter continue; 323366525Speter } 323466525Speter else 323566525Speter break; 323617721Speter } 323717721Speter 323866525Speter /* We should never be about to go beyond the end of the pattern. */ 323917721Speter assert (p < pend); 324066525Speter 324166525Speter switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++)) 324217721Speter { 324317721Speter 324466525Speter /* I guess the idea here is to simply not bother with a fastmap 324566525Speter if a backreference is used, since it's too hard to figure out 324666525Speter the fastmap for the corresponding group. Setting 324766525Speter `can_be_null' stops `re_search_2' from using the fastmap, so 324866525Speter that is all we do. */ 324917721Speter case duplicate: 325017721Speter bufp->can_be_null = 1; 325166525Speter goto done; 325217721Speter 325317721Speter 325417721Speter /* Following are the cases which match a character. These end 325566525Speter with `break'. */ 325617721Speter 325717721Speter case exactn: 325866525Speter fastmap[p[1]] = 1; 325917721Speter break; 326017721Speter 326117721Speter 326266525Speter#ifndef emacs 326366525Speter case charset: 326466525Speter for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--) 326517721Speter if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))) 326666525Speter fastmap[j] = 1; 326717721Speter break; 326817721Speter 326917721Speter 327017721Speter case charset_not: 327117721Speter /* Chars beyond end of map must be allowed. */ 327217721Speter for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++) 327366525Speter fastmap[j] = 1; 327417721Speter 327517721Speter for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--) 327617721Speter if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))) 327766525Speter fastmap[j] = 1; 327866525Speter break; 327917721Speter 328017721Speter 328117721Speter case wordchar: 328217721Speter for (j = 0; j < (1 << BYTEWIDTH); j++) 328317721Speter if (SYNTAX (j) == Sword) 328417721Speter fastmap[j] = 1; 328517721Speter break; 328617721Speter 328717721Speter 328817721Speter case notwordchar: 328917721Speter for (j = 0; j < (1 << BYTEWIDTH); j++) 329017721Speter if (SYNTAX (j) != Sword) 329117721Speter fastmap[j] = 1; 329217721Speter break; 329366525Speter#else /* emacs */ 329466525Speter case charset: 329566525Speter for (j = CHARSET_BITMAP_SIZE (&p[-1]) * BYTEWIDTH - 1, p++; 329666525Speter j >= 0; j--) 329766525Speter if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))) 329866525Speter fastmap[j] = 1; 329917721Speter 330066525Speter if (CHARSET_RANGE_TABLE_EXISTS_P (&p[-2]) 330166525Speter && match_any_multibyte_characters == false) 330266525Speter { 330366525Speter /* Set fastmap[I] 1 where I is a base leading code of each 330466525Speter multibyte character in the range table. */ 330566525Speter int c, count; 330617721Speter 330766525Speter /* Make P points the range table. */ 330866525Speter p += CHARSET_BITMAP_SIZE (&p[-2]); 330917721Speter 331066525Speter /* Extract the number of ranges in range table into 331166525Speter COUNT. */ 331266525Speter EXTRACT_NUMBER_AND_INCR (count, p); 331366525Speter for (; count > 0; count--, p += 2 * 3) /* XXX */ 331466525Speter { 331566525Speter /* Extract the start of each range. */ 331666525Speter EXTRACT_CHARACTER (c, p); 331766525Speter j = CHAR_CHARSET (c); 331866525Speter fastmap[CHARSET_LEADING_CODE_BASE (j)] = 1; 331966525Speter } 332066525Speter } 332166525Speter break; 332217721Speter 332317721Speter 332466525Speter case charset_not: 332566525Speter /* Chars beyond end of bitmap are possible matches. 332666525Speter All the single-byte codes can occur in multibyte buffers. 332766525Speter So any that are not listed in the charset 332866525Speter are possible matches, even in multibyte buffers. */ 332966525Speter simple_char_max = (1 << BYTEWIDTH); 333066525Speter for (j = CHARSET_BITMAP_SIZE (&p[-1]) * BYTEWIDTH; 333166525Speter j < simple_char_max; j++) 333266525Speter fastmap[j] = 1; 333366525Speter 333466525Speter for (j = CHARSET_BITMAP_SIZE (&p[-1]) * BYTEWIDTH - 1, p++; 333566525Speter j >= 0; j--) 333666525Speter if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))) 333766525Speter fastmap[j] = 1; 333866525Speter 333966525Speter if (bufp->multibyte) 334066525Speter /* Any character set can possibly contain a character 334166525Speter which doesn't match the specified set of characters. */ 334266525Speter { 334366525Speter set_fastmap_for_multibyte_characters: 334466525Speter if (match_any_multibyte_characters == false) 334566525Speter { 334666525Speter for (j = 0x80; j < 0xA0; j++) /* XXX */ 334766525Speter if (BASE_LEADING_CODE_P (j)) 334866525Speter fastmap[j] = 1; 334966525Speter match_any_multibyte_characters = true; 335066525Speter } 335166525Speter } 335217721Speter break; 335317721Speter 335417721Speter 335566525Speter case wordchar: 335666525Speter /* All the single-byte codes can occur in multibyte buffers, 335766525Speter and they may have word syntax. So do consider them. */ 335866525Speter simple_char_max = (1 << BYTEWIDTH); 335966525Speter for (j = 0; j < simple_char_max; j++) 336066525Speter if (SYNTAX (j) == Sword) 336166525Speter fastmap[j] = 1; 336266525Speter 336366525Speter if (bufp->multibyte) 336466525Speter /* Any character set can possibly contain a character 336566525Speter whose syntax is `Sword'. */ 336666525Speter goto set_fastmap_for_multibyte_characters; 336766525Speter break; 336866525Speter 336966525Speter 337066525Speter case notwordchar: 337166525Speter /* All the single-byte codes can occur in multibyte buffers, 337266525Speter and they may not have word syntax. So do consider them. */ 337366525Speter simple_char_max = (1 << BYTEWIDTH); 337466525Speter for (j = 0; j < simple_char_max; j++) 337566525Speter if (SYNTAX (j) != Sword) 337666525Speter fastmap[j] = 1; 337766525Speter 337866525Speter if (bufp->multibyte) 337966525Speter /* Any character set can possibly contain a character 338066525Speter whose syntax is not `Sword'. */ 338166525Speter goto set_fastmap_for_multibyte_characters; 338266525Speter break; 338366525Speter#endif 338466525Speter 338566525Speter case anychar: 338666525Speter { 338766525Speter int fastmap_newline = fastmap['\n']; 338866525Speter 338966525Speter /* `.' matches anything, except perhaps newline. 339066525Speter Even in a multibyte buffer, it should match any 339166525Speter conceivable byte value for the fastmap. */ 339266525Speter if (bufp->multibyte) 339366525Speter match_any_multibyte_characters = true; 339466525Speter 339566525Speter simple_char_max = (1 << BYTEWIDTH); 339666525Speter for (j = 0; j < simple_char_max; j++) 339766525Speter fastmap[j] = 1; 339866525Speter 339966525Speter /* ... except perhaps newline. */ 340066525Speter if (!(bufp->syntax & RE_DOT_NEWLINE)) 340166525Speter fastmap['\n'] = fastmap_newline; 340266525Speter 340366525Speter /* Return if we have already set `can_be_null'; if we have, 340466525Speter then the fastmap is irrelevant. Something's wrong here. */ 340566525Speter else if (bufp->can_be_null) 340666525Speter goto done; 340766525Speter 340866525Speter /* Otherwise, have to check alternative paths. */ 340966525Speter break; 341066525Speter } 341166525Speter 341217721Speter#ifdef emacs 341366525Speter case wordbound: 341466525Speter case notwordbound: 341566525Speter case wordbeg: 341666525Speter case wordend: 341766525Speter case notsyntaxspec: 341866525Speter case syntaxspec: 341966525Speter /* This match depends on text properties. These end with 342066525Speter aborting optimizations. */ 342166525Speter bufp->can_be_null = 1; 342266525Speter goto done; 342366525Speter#if 0 342417721Speter k = *p++; 342566525Speter simple_char_max = bufp->multibyte ? 0x80 : (1 << BYTEWIDTH); 342666525Speter for (j = 0; j < simple_char_max; j++) 342717721Speter if (SYNTAX (j) == (enum syntaxcode) k) 342817721Speter fastmap[j] = 1; 342966525Speter 343066525Speter if (bufp->multibyte) 343166525Speter /* Any character set can possibly contain a character 343266525Speter whose syntax is K. */ 343366525Speter goto set_fastmap_for_multibyte_characters; 343417721Speter break; 343517721Speter 343617721Speter case notsyntaxspec: 343717721Speter k = *p++; 343866525Speter simple_char_max = bufp->multibyte ? 0x80 : (1 << BYTEWIDTH); 343966525Speter for (j = 0; j < simple_char_max; j++) 344017721Speter if (SYNTAX (j) != (enum syntaxcode) k) 344117721Speter fastmap[j] = 1; 344266525Speter 344366525Speter if (bufp->multibyte) 344466525Speter /* Any character set can possibly contain a character 344566525Speter whose syntax is not K. */ 344666525Speter goto set_fastmap_for_multibyte_characters; 344717721Speter break; 344866525Speter#endif 344917721Speter 345017721Speter 345166525Speter case categoryspec: 345266525Speter k = *p++; 345366525Speter simple_char_max = (1 << BYTEWIDTH); 345466525Speter for (j = 0; j < simple_char_max; j++) 345566525Speter if (CHAR_HAS_CATEGORY (j, k)) 345666525Speter fastmap[j] = 1; 345766525Speter 345866525Speter if (bufp->multibyte) 345966525Speter /* Any character set can possibly contain a character 346066525Speter whose category is K. */ 346166525Speter goto set_fastmap_for_multibyte_characters; 346266525Speter break; 346366525Speter 346466525Speter 346566525Speter case notcategoryspec: 346666525Speter k = *p++; 346766525Speter simple_char_max = (1 << BYTEWIDTH); 346866525Speter for (j = 0; j < simple_char_max; j++) 346966525Speter if (!CHAR_HAS_CATEGORY (j, k)) 347066525Speter fastmap[j] = 1; 347166525Speter 347266525Speter if (bufp->multibyte) 347366525Speter /* Any character set can possibly contain a character 347466525Speter whose category is not K. */ 347566525Speter goto set_fastmap_for_multibyte_characters; 347666525Speter break; 347766525Speter 347817721Speter /* All cases after this match the empty string. These end with 347966525Speter `continue'. */ 348017721Speter 348117721Speter 348217721Speter case before_dot: 348317721Speter case at_dot: 348417721Speter case after_dot: 348566525Speter continue; 348666525Speter#endif /* emacs */ 348717721Speter 348817721Speter 348966525Speter case no_op: 349066525Speter case begline: 349166525Speter case endline: 349217721Speter case begbuf: 349317721Speter case endbuf: 349466525Speter#ifndef emacs 349517721Speter case wordbound: 349617721Speter case notwordbound: 349717721Speter case wordbeg: 349817721Speter case wordend: 349966525Speter#endif 350066525Speter case push_dummy_failure: 350166525Speter continue; 350217721Speter 350317721Speter 350417721Speter case jump_n: 350566525Speter case pop_failure_jump: 350617721Speter case maybe_pop_jump: 350717721Speter case jump: 350866525Speter case jump_past_alt: 350917721Speter case dummy_failure_jump: 351066525Speter EXTRACT_NUMBER_AND_INCR (j, p); 351125839Speter p += j; 351217721Speter if (j > 0) 351317721Speter continue; 351425839Speter 351566525Speter /* Jump backward implies we just went through the body of a 351666525Speter loop and matched nothing. Opcode jumped to should be 351766525Speter `on_failure_jump' or `succeed_n'. Just treat it like an 351866525Speter ordinary jump. For a * loop, it has pushed its failure 351966525Speter point already; if so, discard that as redundant. */ 352066525Speter if ((re_opcode_t) *p != on_failure_jump 352117721Speter && (re_opcode_t) *p != succeed_n) 352217721Speter continue; 352317721Speter 352466525Speter p++; 352566525Speter EXTRACT_NUMBER_AND_INCR (j, p); 352666525Speter p += j; 352725839Speter 352866525Speter /* If what's on the stack is where we are now, pop it. */ 352966525Speter if (!FAIL_STACK_EMPTY () 353025839Speter && fail_stack.stack[fail_stack.avail - 1].pointer == p) 353166525Speter fail_stack.avail--; 353217721Speter 353366525Speter continue; 353417721Speter 353517721Speter 353666525Speter case on_failure_jump: 353766525Speter case on_failure_keep_string_jump: 353817721Speter handle_on_failure_jump: 353966525Speter EXTRACT_NUMBER_AND_INCR (j, p); 354017721Speter 354166525Speter /* For some patterns, e.g., `(a?)?', `p+j' here points to the 354266525Speter end of the pattern. We don't want to push such a point, 354366525Speter since when we restore it above, entering the switch will 354466525Speter increment `p' past the end of the pattern. We don't need 354566525Speter to push such a point since we obviously won't find any more 354666525Speter fastmap entries beyond `pend'. Such a pattern can match 354766525Speter the null string, though. */ 354866525Speter if (p + j < pend) 354966525Speter { 355066525Speter if (!PUSH_PATTERN_OP (p + j, fail_stack)) 355166525Speter { 355266525Speter RESET_FAIL_STACK (); 355366525Speter return -2; 355466525Speter } 355566525Speter } 355666525Speter else 355766525Speter bufp->can_be_null = 1; 355817721Speter 355966525Speter if (succeed_n_p) 356066525Speter { 356166525Speter EXTRACT_NUMBER_AND_INCR (k, p); /* Skip the n. */ 356266525Speter succeed_n_p = false; 356317721Speter } 356417721Speter 356566525Speter continue; 356617721Speter 356717721Speter 356817721Speter case succeed_n: 356966525Speter /* Get to the number of times to succeed. */ 357066525Speter p += 2; 357117721Speter 357266525Speter /* Increment p past the n for when k != 0. */ 357366525Speter EXTRACT_NUMBER_AND_INCR (k, p); 357466525Speter if (k == 0) 357517721Speter { 357666525Speter p -= 4; 357766525Speter succeed_n_p = true; /* Spaghetti code alert. */ 357866525Speter goto handle_on_failure_jump; 357966525Speter } 358066525Speter continue; 358117721Speter 358217721Speter 358317721Speter case set_number_at: 358466525Speter p += 4; 358566525Speter continue; 358617721Speter 358717721Speter 358817721Speter case start_memory: 358966525Speter case stop_memory: 359017721Speter p += 2; 359117721Speter continue; 359217721Speter 359317721Speter 359417721Speter default: 359566525Speter abort (); /* We have listed all the cases. */ 359666525Speter } /* switch *p++ */ 359717721Speter 359817721Speter /* Getting here means we have found the possible starting 359966525Speter characters for one path of the pattern -- and that the empty 360066525Speter string does not match. We need not follow this path further. 360166525Speter Instead, look at the next alternative (remembered on the 360266525Speter stack), or quit if no more. The test at the top of the loop 360366525Speter does these things. */ 360417721Speter path_can_be_null = false; 360517721Speter p = pend; 360617721Speter } /* while p */ 360717721Speter 360817721Speter /* Set `can_be_null' for the last path (also the first path, if the 360966525Speter pattern is empty). */ 361017721Speter bufp->can_be_null |= path_can_be_null; 361166525Speter 361266525Speter done: 361366525Speter RESET_FAIL_STACK (); 361417721Speter return 0; 361517721Speter} /* re_compile_fastmap */ 361617721Speter 361717721Speter/* Set REGS to hold NUM_REGS registers, storing them in STARTS and 361817721Speter ENDS. Subsequent matches using PATTERN_BUFFER and REGS will use 361917721Speter this memory for recording register information. STARTS and ENDS 362017721Speter must be allocated using the malloc library routine, and must each 362117721Speter be at least NUM_REGS * sizeof (regoff_t) bytes long. 362217721Speter 362317721Speter If NUM_REGS == 0, then subsequent matches should allocate their own 362417721Speter register data. 362517721Speter 362617721Speter Unless this function is called, the first search or match using 362717721Speter PATTERN_BUFFER will allocate its own register data, without 362817721Speter freeing the old data. */ 362917721Speter 363017721Spetervoid 363117721Speterre_set_registers (bufp, regs, num_regs, starts, ends) 363217721Speter struct re_pattern_buffer *bufp; 363317721Speter struct re_registers *regs; 363417721Speter unsigned num_regs; 363517721Speter regoff_t *starts, *ends; 363617721Speter{ 363717721Speter if (num_regs) 363817721Speter { 363917721Speter bufp->regs_allocated = REGS_REALLOCATE; 364017721Speter regs->num_regs = num_regs; 364117721Speter regs->start = starts; 364217721Speter regs->end = ends; 364317721Speter } 364417721Speter else 364517721Speter { 364617721Speter bufp->regs_allocated = REGS_UNALLOCATED; 364717721Speter regs->num_regs = 0; 364817721Speter regs->start = regs->end = (regoff_t *) 0; 364917721Speter } 365017721Speter} 365117721Speter 365266525Speter/* Searching routines. */ 365317721Speter 365417721Speter/* Like re_search_2, below, but only one string is specified, and 365517721Speter doesn't let you say where to stop matching. */ 365617721Speter 365717721Speterint 365817721Speterre_search (bufp, string, size, startpos, range, regs) 365917721Speter struct re_pattern_buffer *bufp; 366017721Speter const char *string; 366117721Speter int size, startpos, range; 366217721Speter struct re_registers *regs; 366317721Speter{ 366425839Speter return re_search_2 (bufp, NULL, 0, string, size, startpos, range, 366517721Speter regs, size); 366617721Speter} 366717721Speter 366866525Speter/* End address of virtual concatenation of string. */ 366966525Speter#define STOP_ADDR_VSTRING(P) \ 367066525Speter (((P) >= size1 ? string2 + size2 : string1 + size1)) 367117721Speter 367266525Speter/* Address of POS in the concatenation of virtual string. */ 367366525Speter#define POS_ADDR_VSTRING(POS) \ 367466525Speter (((POS) >= size1 ? string2 - size1 : string1) + (POS)) 367566525Speter 367617721Speter/* Using the compiled pattern in BUFP->buffer, first tries to match the 367717721Speter virtual concatenation of STRING1 and STRING2, starting first at index 367817721Speter STARTPOS, then at STARTPOS + 1, and so on. 367925839Speter 368017721Speter STRING1 and STRING2 have length SIZE1 and SIZE2, respectively. 368125839Speter 368217721Speter RANGE is how far to scan while trying to match. RANGE = 0 means try 368317721Speter only at STARTPOS; in general, the last start tried is STARTPOS + 368417721Speter RANGE. 368525839Speter 368617721Speter In REGS, return the indices of the virtual concatenation of STRING1 368717721Speter and STRING2 that matched the entire BUFP->buffer and its contained 368817721Speter subexpressions. 368925839Speter 369017721Speter Do not consider matching one past the index STOP in the virtual 369117721Speter concatenation of STRING1 and STRING2. 369217721Speter 369317721Speter We return either the position in the strings at which the match was 369417721Speter found, -1 if no match, or -2 if error (such as failure 369517721Speter stack overflow). */ 369617721Speter 369717721Speterint 369817721Speterre_search_2 (bufp, string1, size1, string2, size2, startpos, range, regs, stop) 369917721Speter struct re_pattern_buffer *bufp; 370017721Speter const char *string1, *string2; 370117721Speter int size1, size2; 370217721Speter int startpos; 370317721Speter int range; 370417721Speter struct re_registers *regs; 370517721Speter int stop; 370617721Speter{ 370717721Speter int val; 370817721Speter register char *fastmap = bufp->fastmap; 370966525Speter register RE_TRANSLATE_TYPE translate = bufp->translate; 371017721Speter int total_size = size1 + size2; 371117721Speter int endpos = startpos + range; 371266525Speter int anchored_start = 0; 371317721Speter 371466525Speter /* Nonzero if we have to concern multibyte character. */ 371566525Speter int multibyte = bufp->multibyte; 371666525Speter 371717721Speter /* Check for out-of-range STARTPOS. */ 371817721Speter if (startpos < 0 || startpos > total_size) 371917721Speter return -1; 372025839Speter 372117721Speter /* Fix up RANGE if it might eventually take us outside 372266525Speter the virtual concatenation of STRING1 and STRING2. 372366525Speter Make sure we won't move STARTPOS below 0 or above TOTAL_SIZE. */ 372466525Speter if (endpos < 0) 372566525Speter range = 0 - startpos; 372617721Speter else if (endpos > total_size) 372717721Speter range = total_size - startpos; 372817721Speter 372917721Speter /* If the search isn't to be a backwards one, don't waste time in a 373066525Speter search for a pattern anchored at beginning of buffer. */ 373117721Speter if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == begbuf && range > 0) 373217721Speter { 373317721Speter if (startpos > 0) 373417721Speter return -1; 373517721Speter else 373666525Speter range = 0; 373717721Speter } 373817721Speter 373925839Speter#ifdef emacs 374025839Speter /* In a forward search for something that starts with \=. 374125839Speter don't keep searching past point. */ 374225839Speter if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == at_dot && range > 0) 374325839Speter { 374466525Speter range = PT_BYTE - BEGV_BYTE - startpos; 374566525Speter if (range < 0) 374625839Speter return -1; 374725839Speter } 374825839Speter#endif /* emacs */ 374925839Speter 375017721Speter /* Update the fastmap now if not correct already. */ 375117721Speter if (fastmap && !bufp->fastmap_accurate) 375217721Speter if (re_compile_fastmap (bufp) == -2) 375317721Speter return -2; 375425839Speter 375566525Speter /* See whether the pattern is anchored. */ 375666525Speter if (bufp->buffer[0] == begline) 375766525Speter anchored_start = 1; 375866525Speter 375966525Speter#ifdef emacs 376066525Speter gl_state.object = re_match_object; 376166525Speter { 376266525Speter int adjpos = NILP (re_match_object) || BUFFERP (re_match_object); 376366525Speter int charpos = SYNTAX_TABLE_BYTE_TO_CHAR (startpos + adjpos); 376466525Speter 376566525Speter SETUP_SYNTAX_TABLE_FOR_OBJECT (re_match_object, charpos, 1); 376666525Speter } 376766525Speter#endif 376866525Speter 376917721Speter /* Loop through the string, looking for a place to start matching. */ 377017721Speter for (;;) 377125839Speter { 377266525Speter /* If the pattern is anchored, 377366525Speter skip quickly past places we cannot match. 377466525Speter We don't bother to treat startpos == 0 specially 377566525Speter because that case doesn't repeat. */ 377666525Speter if (anchored_start && startpos > 0) 377766525Speter { 377866525Speter if (! (bufp->newline_anchor 377966525Speter && ((startpos <= size1 ? string1[startpos - 1] 378066525Speter : string2[startpos - size1 - 1]) 378166525Speter == '\n'))) 378266525Speter goto advance; 378366525Speter } 378466525Speter 378517721Speter /* If a fastmap is supplied, skip quickly over characters that 378666525Speter cannot be the start of a match. If the pattern can match the 378766525Speter null string, however, we don't need to skip characters; we want 378866525Speter the first null string. */ 378917721Speter if (fastmap && startpos < total_size && !bufp->can_be_null) 379017721Speter { 379166525Speter register const char *d; 379266525Speter register unsigned int buf_ch; 379366525Speter 379466525Speter d = POS_ADDR_VSTRING (startpos); 379566525Speter 379666525Speter if (range > 0) /* Searching forwards. */ 379717721Speter { 379817721Speter register int lim = 0; 379917721Speter int irange = range; 380017721Speter 380166525Speter if (startpos < size1 && startpos + range >= size1) 380266525Speter lim = range - (size1 - startpos); 380317721Speter 380466525Speter /* Written out as an if-else to avoid testing `translate' 380566525Speter inside the loop. */ 380666525Speter if (RE_TRANSLATE_P (translate)) 380766525Speter { 380866525Speter if (multibyte) 380966525Speter while (range > lim) 381066525Speter { 381166525Speter int buf_charlen; 381225839Speter 381366525Speter buf_ch = STRING_CHAR_AND_LENGTH (d, range - lim, 381466525Speter buf_charlen); 381566525Speter 381666525Speter buf_ch = RE_TRANSLATE (translate, buf_ch); 381766525Speter if (buf_ch >= 0400 381866525Speter || fastmap[buf_ch]) 381966525Speter break; 382066525Speter 382166525Speter range -= buf_charlen; 382266525Speter d += buf_charlen; 382366525Speter } 382466525Speter else 382566525Speter while (range > lim 382666525Speter && !fastmap[(unsigned char) 382766525Speter RE_TRANSLATE (translate, (unsigned char) *d)]) 382866525Speter { 382966525Speter d++; 383066525Speter range--; 383166525Speter } 383266525Speter } 383317721Speter else 383466525Speter while (range > lim && !fastmap[(unsigned char) *d]) 383566525Speter { 383666525Speter d++; 383766525Speter range--; 383866525Speter } 383917721Speter 384017721Speter startpos += irange - range; 384117721Speter } 384266525Speter else /* Searching backwards. */ 384317721Speter { 384466525Speter int room = (size1 == 0 || startpos >= size1 384566525Speter ? size2 + size1 - startpos 384666525Speter : size1 - startpos); 384717721Speter 384866525Speter buf_ch = STRING_CHAR (d, room); 384966525Speter if (RE_TRANSLATE_P (translate)) 385066525Speter buf_ch = RE_TRANSLATE (translate, buf_ch); 385166525Speter 385266525Speter if (! (buf_ch >= 0400 385366525Speter || fastmap[buf_ch])) 385417721Speter goto advance; 385517721Speter } 385617721Speter } 385717721Speter 385817721Speter /* If can't match the null string, and that's all we have left, fail. */ 385917721Speter if (range >= 0 && startpos == total_size && fastmap 386066525Speter && !bufp->can_be_null) 386117721Speter return -1; 386217721Speter 386366525Speter val = re_match_2_internal (bufp, string1, size1, string2, size2, 386466525Speter startpos, regs, stop); 386566525Speter#ifndef REGEX_MALLOC 386666525Speter#ifdef C_ALLOCA 386766525Speter alloca (0); 386866525Speter#endif 386966525Speter#endif 387066525Speter 387117721Speter if (val >= 0) 387217721Speter return startpos; 387325839Speter 387417721Speter if (val == -2) 387517721Speter return -2; 387617721Speter 387717721Speter advance: 387825839Speter if (!range) 387966525Speter break; 388025839Speter else if (range > 0) 388166525Speter { 388266525Speter /* Update STARTPOS to the next character boundary. */ 388366525Speter if (multibyte) 388466525Speter { 388566525Speter const unsigned char *p 388666525Speter = (const unsigned char *) POS_ADDR_VSTRING (startpos); 388766525Speter const unsigned char *pend 388866525Speter = (const unsigned char *) STOP_ADDR_VSTRING (startpos); 388966525Speter int len = MULTIBYTE_FORM_LENGTH (p, pend - p); 389066525Speter 389166525Speter range -= len; 389266525Speter if (range < 0) 389366525Speter break; 389466525Speter startpos += len; 389566525Speter } 389666525Speter else 389766525Speter { 389866525Speter range--; 389966525Speter startpos++; 390066525Speter } 390166525Speter } 390217721Speter else 390366525Speter { 390466525Speter range++; 390566525Speter startpos--; 390666525Speter 390766525Speter /* Update STARTPOS to the previous character boundary. */ 390866525Speter if (multibyte) 390966525Speter { 391066525Speter const unsigned char *p 391166525Speter = (const unsigned char *) POS_ADDR_VSTRING (startpos); 391266525Speter int len = 0; 391366525Speter 391466525Speter /* Find the head of multibyte form. */ 391566525Speter while (!CHAR_HEAD_P (*p)) 391666525Speter p--, len++; 391766525Speter 391866525Speter /* Adjust it. */ 391966525Speter#if 0 /* XXX */ 392066525Speter if (MULTIBYTE_FORM_LENGTH (p, len + 1) != (len + 1)) 392166525Speter ; 392266525Speter else 392366525Speter#endif 392466525Speter { 392566525Speter range += len; 392666525Speter if (range > 0) 392766525Speter break; 392866525Speter 392966525Speter startpos -= len; 393066525Speter } 393166525Speter } 393266525Speter } 393317721Speter } 393417721Speter return -1; 393517721Speter} /* re_search_2 */ 393617721Speter 393717721Speter/* Declarations and macros for re_match_2. */ 393817721Speter 393917721Speterstatic int bcmp_translate (); 394017721Speterstatic boolean alt_match_null_string_p (), 394166525Speter common_op_match_null_string_p (), 394266525Speter group_match_null_string_p (); 394317721Speter 394417721Speter/* This converts PTR, a pointer into one of the search strings `string1' 394517721Speter and `string2' into an offset from the beginning of that string. */ 394666525Speter#define POINTER_TO_OFFSET(ptr) \ 394766525Speter (FIRST_STRING_P (ptr) \ 394866525Speter ? ((regoff_t) ((ptr) - string1)) \ 394966525Speter : ((regoff_t) ((ptr) - string2 + size1))) 395017721Speter 395117721Speter/* Macros for dealing with the split strings in re_match_2. */ 395217721Speter 395317721Speter#define MATCHING_IN_FIRST_STRING (dend == end_match_1) 395417721Speter 395517721Speter/* Call before fetching a character with *d. This switches over to 395617721Speter string2 if necessary. */ 395717721Speter#define PREFETCH() \ 395866525Speter while (d == dend) \ 395917721Speter { \ 396017721Speter /* End of string2 => fail. */ \ 396166525Speter if (dend == end_match_2) \ 396266525Speter goto fail; \ 396366525Speter /* End of string1 => advance to string2. */ \ 396466525Speter d = string2; \ 396517721Speter dend = end_match_2; \ 396617721Speter } 396717721Speter 396817721Speter 396917721Speter/* Test if at very beginning or at very end of the virtual concatenation 397066525Speter of `string1' and `string2'. If only one string, it's `string2'. */ 397117721Speter#define AT_STRINGS_BEG(d) ((d) == (size1 ? string1 : string2) || !size2) 397225839Speter#define AT_STRINGS_END(d) ((d) == end2) 397317721Speter 397417721Speter 397517721Speter/* Test if D points to a character which is word-constituent. We have 397617721Speter two special cases to check for: if past the end of string1, look at 397717721Speter the first character in string2; and if before the beginning of 397817721Speter string2, look at the last character in string1. */ 397917721Speter#define WORDCHAR_P(d) \ 398017721Speter (SYNTAX ((d) == end1 ? *string2 \ 398166525Speter : (d) == string2 - 1 ? *(end1 - 1) : *(d)) \ 398217721Speter == Sword) 398317721Speter 398466525Speter/* Disabled due to a compiler bug -- see comment at case wordbound */ 398566525Speter 398666525Speter/* The comment at case wordbound is following one, but we don't use 398766525Speter AT_WORD_BOUNDARY anymore to support multibyte form. 398866525Speter 398966525Speter The DEC Alpha C compiler 3.x generates incorrect code for the 399066525Speter test WORDCHAR_P (d - 1) != WORDCHAR_P (d) in the expansion of 399166525Speter AT_WORD_BOUNDARY, so this code is disabled. Expanding the 399266525Speter macro and introducing temporary variables works around the bug. */ 399366525Speter 399466525Speter#if 0 399517721Speter/* Test if the character before D and the one at D differ with respect 399617721Speter to being word-constituent. */ 399717721Speter#define AT_WORD_BOUNDARY(d) \ 399817721Speter (AT_STRINGS_BEG (d) || AT_STRINGS_END (d) \ 399917721Speter || WORDCHAR_P (d - 1) != WORDCHAR_P (d)) 400066525Speter#endif 400117721Speter 400217721Speter/* Free everything we malloc. */ 400366525Speter#ifdef MATCH_MAY_ALLOCATE 4004175261Sobrien#define FREE_VAR(var) if (var) { REGEX_FREE (var); var = NULL; } else 400517721Speter#define FREE_VARIABLES() \ 400617721Speter do { \ 400766525Speter REGEX_FREE_STACK (fail_stack.stack); \ 400817721Speter FREE_VAR (regstart); \ 400917721Speter FREE_VAR (regend); \ 401017721Speter FREE_VAR (old_regstart); \ 401117721Speter FREE_VAR (old_regend); \ 401217721Speter FREE_VAR (best_regstart); \ 401317721Speter FREE_VAR (best_regend); \ 401417721Speter FREE_VAR (reg_info); \ 401517721Speter FREE_VAR (reg_dummy); \ 401617721Speter FREE_VAR (reg_info_dummy); \ 401717721Speter } while (0) 401866525Speter#else 401966525Speter#define FREE_VARIABLES() ((void)0) /* Do nothing! But inhibit gcc warning. */ 402066525Speter#endif /* not MATCH_MAY_ALLOCATE */ 402117721Speter 402266525Speter/* These values must meet several constraints. They must not be valid 402317721Speter register values; since we have a limit of 255 registers (because 402417721Speter we use only one byte in the pattern for the register number), we can 402566525Speter use numbers larger than 255. They must differ by 1, because of 402617721Speter NUM_FAILURE_ITEMS above. And the value for the lowest register must 402717721Speter be larger than the value for the highest register, so we do not try 402866525Speter to actually save any registers when none are active. */ 402917721Speter#define NO_HIGHEST_ACTIVE_REG (1 << BYTEWIDTH) 403017721Speter#define NO_LOWEST_ACTIVE_REG (NO_HIGHEST_ACTIVE_REG + 1) 403117721Speter 403217721Speter/* Matching routines. */ 403317721Speter 403466525Speter#ifndef emacs /* Emacs never uses this. */ 403517721Speter/* re_match is like re_match_2 except it takes only a single string. */ 403617721Speter 403717721Speterint 403817721Speterre_match (bufp, string, size, pos, regs) 403917721Speter struct re_pattern_buffer *bufp; 404017721Speter const char *string; 404117721Speter int size, pos; 404217721Speter struct re_registers *regs; 404366525Speter{ 404466525Speter int result = re_match_2_internal (bufp, NULL, 0, string, size, 404566525Speter pos, regs, size); 404666525Speter#ifndef REGEX_MALLOC /* CVS */ 404766525Speter#ifdef C_ALLOCA /* CVS */ 404866525Speter alloca (0); 404966525Speter#endif /* CVS */ 405066525Speter#endif /* CVS */ 405166525Speter return result; 405217721Speter} 405317721Speter#endif /* not emacs */ 405417721Speter 405566525Speter#ifdef emacs 405666525Speter/* In Emacs, this is the string or buffer in which we 405766525Speter are matching. It is used for looking up syntax properties. */ 405866525SpeterLisp_Object re_match_object; 405966525Speter#endif 406017721Speter 406117721Speter/* re_match_2 matches the compiled pattern in BUFP against the 406217721Speter the (virtual) concatenation of STRING1 and STRING2 (of length SIZE1 406317721Speter and SIZE2, respectively). We start matching at POS, and stop 406417721Speter matching at STOP. 406525839Speter 406617721Speter If REGS is non-null and the `no_sub' field of BUFP is nonzero, we 406766525Speter store offsets for the substring each group matched in REGS. See the 406817721Speter documentation for exactly how many groups we fill. 406917721Speter 407017721Speter We return -1 if no match, -2 if an internal error (such as the 407166525Speter failure stack overflowing). Otherwise, we return the length of the 407217721Speter matched substring. */ 407317721Speter 407417721Speterint 407517721Speterre_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop) 407617721Speter struct re_pattern_buffer *bufp; 407717721Speter const char *string1, *string2; 407817721Speter int size1, size2; 407917721Speter int pos; 408017721Speter struct re_registers *regs; 408117721Speter int stop; 408217721Speter{ 408366525Speter int result; 408466525Speter 408566525Speter#ifdef emacs 408666525Speter int charpos; 408766525Speter int adjpos = NILP (re_match_object) || BUFFERP (re_match_object); 408866525Speter gl_state.object = re_match_object; 408966525Speter charpos = SYNTAX_TABLE_BYTE_TO_CHAR (pos + adjpos); 409066525Speter SETUP_SYNTAX_TABLE_FOR_OBJECT (re_match_object, charpos, 1); 409166525Speter#endif 409266525Speter 409366525Speter result = re_match_2_internal (bufp, string1, size1, string2, size2, 409466525Speter pos, regs, stop); 409566525Speter#ifndef REGEX_MALLOC /* CVS */ 409666525Speter#ifdef C_ALLOCA /* CVS */ 409766525Speter alloca (0); 409866525Speter#endif /* CVS */ 409966525Speter#endif /* CVS */ 410066525Speter return result; 410166525Speter} 410266525Speter 410366525Speter/* This is a separate function so that we can force an alloca cleanup 410466525Speter afterwards. */ 410566525Speterstatic int 410666525Speterre_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) 410766525Speter struct re_pattern_buffer *bufp; 410866525Speter const char *string1, *string2; 410966525Speter int size1, size2; 411066525Speter int pos; 411166525Speter struct re_registers *regs; 411266525Speter int stop; 411366525Speter{ 411417721Speter /* General temporaries. */ 411517721Speter int mcnt; 411617721Speter unsigned char *p1; 411717721Speter 411817721Speter /* Just past the end of the corresponding string. */ 411917721Speter const char *end1, *end2; 412017721Speter 412117721Speter /* Pointers into string1 and string2, just past the last characters in 412266525Speter each to consider matching. */ 412317721Speter const char *end_match_1, *end_match_2; 412417721Speter 412517721Speter /* Where we are in the data, and the end of the current string. */ 412617721Speter const char *d, *dend; 412725839Speter 412817721Speter /* Where we are in the pattern, and the end of the pattern. */ 412917721Speter unsigned char *p = bufp->buffer; 413017721Speter register unsigned char *pend = p + bufp->used; 413117721Speter 413266525Speter /* Mark the opcode just after a start_memory, so we can test for an 413366525Speter empty subpattern when we get to the stop_memory. */ 413466525Speter unsigned char *just_past_start_mem = 0; 413517721Speter 413666525Speter /* We use this to map every character in the string. */ 413766525Speter RE_TRANSLATE_TYPE translate = bufp->translate; 413866525Speter 413966525Speter /* Nonzero if we have to concern multibyte character. */ 414066525Speter int multibyte = bufp->multibyte; 414166525Speter 414217721Speter /* Failure point stack. Each place that can handle a failure further 414317721Speter down the line pushes a failure point on this stack. It consists of 414417721Speter restart, regend, and reg_info for all registers corresponding to 414517721Speter the subexpressions we're currently inside, plus the number of such 414617721Speter registers, and, finally, two char *'s. The first char * is where 414717721Speter to resume scanning the pattern; the second one is where to resume 414817721Speter scanning the strings. If the latter is zero, the failure point is 414917721Speter a ``dummy''; if a failure happens and the failure point is a dummy, 415066525Speter it gets discarded and the next next one is tried. */ 415166525Speter#ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global. */ 415217721Speter fail_stack_type fail_stack; 415366525Speter#endif 415417721Speter#ifdef DEBUG 415517721Speter static unsigned failure_id = 0; 415617721Speter unsigned nfailure_points_pushed = 0, nfailure_points_popped = 0; 415717721Speter#endif 415817721Speter 415966525Speter /* This holds the pointer to the failure stack, when 416066525Speter it is allocated relocatably. */ 416166525Speter fail_stack_elt_t *failure_stack_ptr; 416266525Speter 416317721Speter /* We fill all the registers internally, independent of what we 416466525Speter return, for use in backreferences. The number here includes 416517721Speter an element for register zero. */ 416617721Speter unsigned num_regs = bufp->re_nsub + 1; 416725839Speter 416817721Speter /* The currently active registers. */ 416917721Speter unsigned lowest_active_reg = NO_LOWEST_ACTIVE_REG; 417017721Speter unsigned highest_active_reg = NO_HIGHEST_ACTIVE_REG; 417117721Speter 417217721Speter /* Information on the contents of registers. These are pointers into 417317721Speter the input strings; they record just what was matched (on this 417417721Speter attempt) by a subexpression part of the pattern, that is, the 417517721Speter regnum-th regstart pointer points to where in the pattern we began 417617721Speter matching and the regnum-th regend points to right after where we 417717721Speter stopped matching the regnum-th subexpression. (The zeroth register 417817721Speter keeps track of what the whole pattern matches.) */ 417966525Speter#ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */ 418017721Speter const char **regstart, **regend; 418166525Speter#endif 418217721Speter 418317721Speter /* If a group that's operated upon by a repetition operator fails to 418417721Speter match anything, then the register for its start will need to be 418517721Speter restored because it will have been set to wherever in the string we 418617721Speter are when we last see its open-group operator. Similarly for a 418717721Speter register's end. */ 418866525Speter#ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */ 418917721Speter const char **old_regstart, **old_regend; 419066525Speter#endif 419117721Speter 419217721Speter /* The is_active field of reg_info helps us keep track of which (possibly 419317721Speter nested) subexpressions we are currently in. The matched_something 419417721Speter field of reg_info[reg_num] helps us tell whether or not we have 419517721Speter matched any of the pattern so far this time through the reg_num-th 419617721Speter subexpression. These two fields get reset each time through any 419766525Speter loop their register is in. */ 419866525Speter#ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global. */ 419925839Speter register_info_type *reg_info; 420066525Speter#endif 420117721Speter 420217721Speter /* The following record the register info as found in the above 420325839Speter variables when we find a match better than any we've seen before. 420417721Speter This happens as we backtrack through the failure points, which in 420517721Speter turn happens only if we have not yet matched the entire string. */ 420617721Speter unsigned best_regs_set = false; 420766525Speter#ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */ 420817721Speter const char **best_regstart, **best_regend; 420966525Speter#endif 421066525Speter 421117721Speter /* Logically, this is `best_regend[0]'. But we don't want to have to 421217721Speter allocate space for that if we're not allocating space for anything 421366525Speter else (see below). Also, we never need info about register 0 for 421417721Speter any of the other register vectors, and it seems rather a kludge to 421517721Speter treat `best_regend' differently than the rest. So we keep track of 421617721Speter the end of the best match so far in a separate variable. We 421717721Speter initialize this to NULL so that when we backtrack the first time 421817721Speter and need to test it, it's not garbage. */ 421917721Speter const char *match_end = NULL; 422017721Speter 422166525Speter /* This helps SET_REGS_MATCHED avoid doing redundant work. */ 422266525Speter int set_regs_matched_done = 0; 422366525Speter 422417721Speter /* Used when we pop values we don't care about. */ 422566525Speter#ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */ 422617721Speter const char **reg_dummy; 422717721Speter register_info_type *reg_info_dummy; 422866525Speter#endif 422917721Speter 423017721Speter#ifdef DEBUG 423117721Speter /* Counts the total number of registers pushed. */ 423225839Speter unsigned num_regs_pushed = 0; 423317721Speter#endif 423417721Speter 423517721Speter DEBUG_PRINT1 ("\n\nEntering re_match_2.\n"); 423625839Speter 423717721Speter INIT_FAIL_STACK (); 423825839Speter 423966525Speter#ifdef MATCH_MAY_ALLOCATE 424017721Speter /* Do not bother to initialize all the register variables if there are 424117721Speter no groups in the pattern, as it takes a fair amount of time. If 424217721Speter there are groups, we include space for register 0 (the whole 424317721Speter pattern), even though we never use it, since it simplifies the 424417721Speter array indexing. We should fix this. */ 424517721Speter if (bufp->re_nsub) 424617721Speter { 424717721Speter regstart = REGEX_TALLOC (num_regs, const char *); 424817721Speter regend = REGEX_TALLOC (num_regs, const char *); 424917721Speter old_regstart = REGEX_TALLOC (num_regs, const char *); 425017721Speter old_regend = REGEX_TALLOC (num_regs, const char *); 425117721Speter best_regstart = REGEX_TALLOC (num_regs, const char *); 425217721Speter best_regend = REGEX_TALLOC (num_regs, const char *); 425317721Speter reg_info = REGEX_TALLOC (num_regs, register_info_type); 425417721Speter reg_dummy = REGEX_TALLOC (num_regs, const char *); 425517721Speter reg_info_dummy = REGEX_TALLOC (num_regs, register_info_type); 425617721Speter 425725839Speter if (!(regstart && regend && old_regstart && old_regend && reg_info 425866525Speter && best_regstart && best_regend && reg_dummy && reg_info_dummy)) 425966525Speter { 426066525Speter FREE_VARIABLES (); 426166525Speter return -2; 426266525Speter } 426317721Speter } 426417721Speter else 426517721Speter { 426617721Speter /* We must initialize all our variables to NULL, so that 426766525Speter `FREE_VARIABLES' doesn't try to free them. */ 426817721Speter regstart = regend = old_regstart = old_regend = best_regstart 426966525Speter = best_regend = reg_dummy = NULL; 427017721Speter reg_info = reg_info_dummy = (register_info_type *) NULL; 427117721Speter } 427266525Speter#endif /* MATCH_MAY_ALLOCATE */ 427317721Speter 427417721Speter /* The starting position is bogus. */ 427517721Speter if (pos < 0 || pos > size1 + size2) 427617721Speter { 427717721Speter FREE_VARIABLES (); 427817721Speter return -1; 427917721Speter } 428025839Speter 428117721Speter /* Initialize subexpression text positions to -1 to mark ones that no 428217721Speter start_memory/stop_memory has been seen for. Also initialize the 428317721Speter register information struct. */ 428417721Speter for (mcnt = 1; mcnt < num_regs; mcnt++) 428517721Speter { 428625839Speter regstart[mcnt] = regend[mcnt] 428766525Speter = old_regstart[mcnt] = old_regend[mcnt] = REG_UNSET_VALUE; 428825839Speter 428917721Speter REG_MATCH_NULL_STRING_P (reg_info[mcnt]) = MATCH_NULL_UNSET_VALUE; 429017721Speter IS_ACTIVE (reg_info[mcnt]) = 0; 429117721Speter MATCHED_SOMETHING (reg_info[mcnt]) = 0; 429217721Speter EVER_MATCHED_SOMETHING (reg_info[mcnt]) = 0; 429317721Speter } 429425839Speter 429517721Speter /* We move `string1' into `string2' if the latter's empty -- but not if 429666525Speter `string1' is null. */ 429717721Speter if (size2 == 0 && string1 != NULL) 429817721Speter { 429917721Speter string2 = string1; 430017721Speter size2 = size1; 430117721Speter string1 = 0; 430217721Speter size1 = 0; 430317721Speter } 430417721Speter end1 = string1 + size1; 430517721Speter end2 = string2 + size2; 430617721Speter 430717721Speter /* Compute where to stop matching, within the two strings. */ 430817721Speter if (stop <= size1) 430917721Speter { 431017721Speter end_match_1 = string1 + stop; 431117721Speter end_match_2 = string2; 431217721Speter } 431317721Speter else 431417721Speter { 431517721Speter end_match_1 = end1; 431617721Speter end_match_2 = string2 + stop - size1; 431717721Speter } 431817721Speter 431925839Speter /* `p' scans through the pattern as `d' scans through the data. 432017721Speter `dend' is the end of the input string that `d' points within. `d' 432117721Speter is advanced into the following input string whenever necessary, but 432217721Speter this happens before fetching; therefore, at the beginning of the 432317721Speter loop, `d' can be pointing at the end of a string, but it cannot 432417721Speter equal `string2'. */ 432517721Speter if (size1 > 0 && pos <= size1) 432617721Speter { 432717721Speter d = string1 + pos; 432817721Speter dend = end_match_1; 432917721Speter } 433017721Speter else 433117721Speter { 433217721Speter d = string2 + pos - size1; 433317721Speter dend = end_match_2; 433417721Speter } 433517721Speter 433617721Speter DEBUG_PRINT1 ("The compiled pattern is: "); 433717721Speter DEBUG_PRINT_COMPILED_PATTERN (bufp, p, pend); 433817721Speter DEBUG_PRINT1 ("The string to match is: `"); 433917721Speter DEBUG_PRINT_DOUBLE_STRING (d, string1, size1, string2, size2); 434017721Speter DEBUG_PRINT1 ("'\n"); 434125839Speter 434266525Speter /* This loops over pattern commands. It exits by returning from the 434317721Speter function if the match is complete, or it drops through if the match 434417721Speter fails at this starting point in the input data. */ 434517721Speter for (;;) 434617721Speter { 434717721Speter DEBUG_PRINT2 ("\n0x%x: ", p); 434817721Speter 434917721Speter if (p == pend) 435017721Speter { /* End of pattern means we might have succeeded. */ 435166525Speter DEBUG_PRINT1 ("end of pattern ... "); 435225839Speter 435317721Speter /* If we haven't matched the entire string, and we want the 435466525Speter longest match, try backtracking. */ 435566525Speter if (d != end_match_2) 435617721Speter { 435766525Speter /* 1 if this match ends in the same string (string1 or string2) 435866525Speter as the best previous match. */ 435966525Speter boolean same_str_p = (FIRST_STRING_P (match_end) 436066525Speter == MATCHING_IN_FIRST_STRING); 436166525Speter /* 1 if this match is the best seen so far. */ 436266525Speter boolean best_match_p; 436325839Speter 436466525Speter /* AIX compiler got confused when this was combined 436566525Speter with the previous declaration. */ 436666525Speter if (same_str_p) 436766525Speter best_match_p = d > match_end; 436866525Speter else 436966525Speter best_match_p = !MATCHING_IN_FIRST_STRING; 437017721Speter 437166525Speter DEBUG_PRINT1 ("backtracking.\n"); 437225839Speter 437366525Speter if (!FAIL_STACK_EMPTY ()) 437466525Speter { /* More failure points to try. */ 437525839Speter 437666525Speter /* If exceeds best match so far, save it. */ 437766525Speter if (!best_regs_set || best_match_p) 437866525Speter { 437966525Speter best_regs_set = true; 438066525Speter match_end = d; 438117721Speter 438266525Speter DEBUG_PRINT1 ("\nSAVING match as best so far.\n"); 438325839Speter 438466525Speter for (mcnt = 1; mcnt < num_regs; mcnt++) 438566525Speter { 438666525Speter best_regstart[mcnt] = regstart[mcnt]; 438766525Speter best_regend[mcnt] = regend[mcnt]; 438866525Speter } 438966525Speter } 439066525Speter goto fail; 439166525Speter } 439217721Speter 439366525Speter /* If no failure points, don't restore garbage. And if 439466525Speter last match is real best match, don't restore second 439566525Speter best one. */ 439666525Speter else if (best_regs_set && !best_match_p) 439766525Speter { 439866525Speter restore_best_regs: 439966525Speter /* Restore best match. It may happen that `dend == 440066525Speter end_match_1' while the restored d is in string2. 440166525Speter For example, the pattern `x.*y.*z' against the 440266525Speter strings `x-' and `y-z-', if the two strings are 440366525Speter not consecutive in memory. */ 440466525Speter DEBUG_PRINT1 ("Restoring best registers.\n"); 440566525Speter 440666525Speter d = match_end; 440766525Speter dend = ((d >= string1 && d <= end1) 440866525Speter ? end_match_1 : end_match_2); 440966525Speter 441017721Speter for (mcnt = 1; mcnt < num_regs; mcnt++) 441117721Speter { 441217721Speter regstart[mcnt] = best_regstart[mcnt]; 441317721Speter regend[mcnt] = best_regend[mcnt]; 441417721Speter } 441566525Speter } 441666525Speter } /* d != end_match_2 */ 441717721Speter 441866525Speter succeed_label: 441966525Speter DEBUG_PRINT1 ("Accepting match.\n"); 442017721Speter 442166525Speter /* If caller wants register contents data back, do it. */ 442266525Speter if (regs && !bufp->no_sub) 442317721Speter { 442466525Speter /* Have the register data arrays been allocated? */ 442566525Speter if (bufp->regs_allocated == REGS_UNALLOCATED) 442666525Speter { /* No. So allocate them with malloc. We need one 442766525Speter extra element beyond `num_regs' for the `-1' marker 442866525Speter GNU code uses. */ 442966525Speter regs->num_regs = MAX (RE_NREGS, num_regs + 1); 443066525Speter regs->start = TALLOC (regs->num_regs, regoff_t); 443166525Speter regs->end = TALLOC (regs->num_regs, regoff_t); 443266525Speter if (regs->start == NULL || regs->end == NULL) 443366525Speter { 443466525Speter FREE_VARIABLES (); 443566525Speter return -2; 443666525Speter } 443766525Speter bufp->regs_allocated = REGS_REALLOCATE; 443866525Speter } 443966525Speter else if (bufp->regs_allocated == REGS_REALLOCATE) 444066525Speter { /* Yes. If we need more elements than were already 444166525Speter allocated, reallocate them. If we need fewer, just 444266525Speter leave it alone. */ 444366525Speter if (regs->num_regs < num_regs + 1) 444466525Speter { 444566525Speter regs->num_regs = num_regs + 1; 444666525Speter RETALLOC (regs->start, regs->num_regs, regoff_t); 444766525Speter RETALLOC (regs->end, regs->num_regs, regoff_t); 444866525Speter if (regs->start == NULL || regs->end == NULL) 444966525Speter { 445066525Speter FREE_VARIABLES (); 445166525Speter return -2; 445266525Speter } 445366525Speter } 445466525Speter } 445566525Speter else 445625839Speter { 445725839Speter /* These braces fend off a "empty body in an else-statement" 445866525Speter warning under GCC when assert expands to nothing. */ 445925839Speter assert (bufp->regs_allocated == REGS_FIXED); 446025839Speter } 446117721Speter 446266525Speter /* Convert the pointer data in `regstart' and `regend' to 446366525Speter indices. Register zero has to be set differently, 446466525Speter since we haven't kept track of any info for it. */ 446566525Speter if (regs->num_regs > 0) 446666525Speter { 446766525Speter regs->start[0] = pos; 446866525Speter regs->end[0] = (MATCHING_IN_FIRST_STRING 446966525Speter ? ((regoff_t) (d - string1)) 447066525Speter : ((regoff_t) (d - string2 + size1))); 447166525Speter } 447225839Speter 447366525Speter /* Go through the first `min (num_regs, regs->num_regs)' 447466525Speter registers, since that is all we initialized. */ 447517721Speter for (mcnt = 1; mcnt < MIN (num_regs, regs->num_regs); mcnt++) 447617721Speter { 447766525Speter if (REG_UNSET (regstart[mcnt]) || REG_UNSET (regend[mcnt])) 447866525Speter regs->start[mcnt] = regs->end[mcnt] = -1; 447966525Speter else 448066525Speter { 448166525Speter regs->start[mcnt] 448266525Speter = (regoff_t) POINTER_TO_OFFSET (regstart[mcnt]); 448366525Speter regs->end[mcnt] 448466525Speter = (regoff_t) POINTER_TO_OFFSET (regend[mcnt]); 448566525Speter } 448617721Speter } 448725839Speter 448866525Speter /* If the regs structure we return has more elements than 448966525Speter were in the pattern, set the extra elements to -1. If 449066525Speter we (re)allocated the registers, this is the case, 449166525Speter because we always allocate enough to have at least one 449266525Speter -1 at the end. */ 449366525Speter for (mcnt = num_regs; mcnt < regs->num_regs; mcnt++) 449466525Speter regs->start[mcnt] = regs->end[mcnt] = -1; 449517721Speter } /* regs && !bufp->no_sub */ 449617721Speter 449766525Speter DEBUG_PRINT4 ("%u failure points pushed, %u popped (%u remain).\n", 449866525Speter nfailure_points_pushed, nfailure_points_popped, 449966525Speter nfailure_points_pushed - nfailure_points_popped); 450066525Speter DEBUG_PRINT2 ("%u registers pushed.\n", num_regs_pushed); 450117721Speter 450266525Speter mcnt = d - pos - (MATCHING_IN_FIRST_STRING 450325839Speter ? string1 450417721Speter : string2 - size1); 450517721Speter 450666525Speter DEBUG_PRINT2 ("Returning %d from re_match_2.\n", mcnt); 450717721Speter 450866525Speter FREE_VARIABLES (); 450966525Speter return mcnt; 451066525Speter } 451117721Speter 451266525Speter /* Otherwise match next pattern command. */ 451366525Speter switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++)) 451417721Speter { 451566525Speter /* Ignore these. Used to ignore the n of succeed_n's which 451666525Speter currently have n == 0. */ 451766525Speter case no_op: 451866525Speter DEBUG_PRINT1 ("EXECUTING no_op.\n"); 451966525Speter break; 452017721Speter 452166525Speter case succeed: 452266525Speter DEBUG_PRINT1 ("EXECUTING succeed.\n"); 452366525Speter goto succeed_label; 452417721Speter 452566525Speter /* Match the next n pattern characters exactly. The following 452666525Speter byte in the pattern defines n, and the n bytes after that 452766525Speter are the characters to match. */ 452817721Speter case exactn: 452917721Speter mcnt = *p++; 453066525Speter DEBUG_PRINT2 ("EXECUTING exactn %d.\n", mcnt); 453117721Speter 453266525Speter /* This is written out as an if-else so we don't waste time 453366525Speter testing `translate' inside the loop. */ 453466525Speter if (RE_TRANSLATE_P (translate)) 453517721Speter { 453666525Speter#ifdef emacs 453766525Speter if (multibyte) 453866525Speter do 453966525Speter { 454066525Speter int pat_charlen, buf_charlen; 454166525Speter unsigned int pat_ch, buf_ch; 454266525Speter 454366525Speter PREFETCH (); 454466525Speter pat_ch = STRING_CHAR_AND_LENGTH (p, pend - p, pat_charlen); 454566525Speter buf_ch = STRING_CHAR_AND_LENGTH (d, dend - d, buf_charlen); 454666525Speter 454766525Speter if (RE_TRANSLATE (translate, buf_ch) 454866525Speter != pat_ch) 454966525Speter goto fail; 455066525Speter 455166525Speter p += pat_charlen; 455266525Speter d += buf_charlen; 455366525Speter mcnt -= pat_charlen; 455466525Speter } 455566525Speter while (mcnt > 0); 455666525Speter else 455766525Speter#endif /* not emacs */ 455866525Speter do 455966525Speter { 456066525Speter PREFETCH (); 456166525Speter if ((unsigned char) RE_TRANSLATE (translate, (unsigned char) *d) 456266525Speter != (unsigned char) *p++) 456366525Speter goto fail; 456466525Speter d++; 456566525Speter } 456666525Speter while (--mcnt); 456717721Speter } 456817721Speter else 456917721Speter { 457017721Speter do 457117721Speter { 457217721Speter PREFETCH (); 457317721Speter if (*d++ != (char) *p++) goto fail; 457417721Speter } 457517721Speter while (--mcnt); 457617721Speter } 457717721Speter SET_REGS_MATCHED (); 457866525Speter break; 457917721Speter 458017721Speter 458166525Speter /* Match any character except possibly a newline or a null. */ 458217721Speter case anychar: 458366525Speter { 458466525Speter int buf_charlen; 458566525Speter unsigned int buf_ch; 458617721Speter 458766525Speter DEBUG_PRINT1 ("EXECUTING anychar.\n"); 458817721Speter 458966525Speter PREFETCH (); 459017721Speter 459166525Speter#ifdef emacs 459266525Speter if (multibyte) 459366525Speter buf_ch = STRING_CHAR_AND_LENGTH (d, dend - d, buf_charlen); 459466525Speter else 459566525Speter#endif /* not emacs */ 459666525Speter { 459766525Speter buf_ch = (unsigned char) *d; 459866525Speter buf_charlen = 1; 459966525Speter } 460066525Speter 460166525Speter buf_ch = TRANSLATE (buf_ch); 460266525Speter 460366525Speter if ((!(bufp->syntax & RE_DOT_NEWLINE) 460466525Speter && buf_ch == '\n') 460566525Speter || ((bufp->syntax & RE_DOT_NOT_NULL) 460666525Speter && buf_ch == '\000')) 460766525Speter goto fail; 460866525Speter 460966525Speter SET_REGS_MATCHED (); 461066525Speter DEBUG_PRINT2 (" Matched `%d'.\n", *d); 461166525Speter d += buf_charlen; 461266525Speter } 461317721Speter break; 461417721Speter 461517721Speter 461617721Speter case charset: 461717721Speter case charset_not: 461817721Speter { 461966525Speter register unsigned int c; 462017721Speter boolean not = (re_opcode_t) *(p - 1) == charset_not; 462166525Speter int len; 462217721Speter 462366525Speter /* Start of actual range_table, or end of bitmap if there is no 462466525Speter range table. */ 462566525Speter unsigned char *range_table; 462617721Speter 462766525Speter /* Nonzero if there is range table. */ 462866525Speter int range_table_exists; 462966525Speter 463066525Speter /* Number of ranges of range table. Not in bytes. */ 463166525Speter int count; 463266525Speter 463366525Speter DEBUG_PRINT2 ("EXECUTING charset%s.\n", not ? "_not" : ""); 463466525Speter 463517721Speter PREFETCH (); 463666525Speter c = (unsigned char) *d; 463717721Speter 463866525Speter range_table = CHARSET_RANGE_TABLE (&p[-1]); /* Past the bitmap. */ 463966525Speter range_table_exists = CHARSET_RANGE_TABLE_EXISTS_P (&p[-1]); 464066525Speter if (range_table_exists) 464166525Speter EXTRACT_NUMBER_AND_INCR (count, range_table); 464266525Speter else 464366525Speter count = 0; 464466525Speter 464566525Speter if (multibyte && BASE_LEADING_CODE_P (c)) 464666525Speter c = STRING_CHAR_AND_LENGTH (d, dend - d, len); 464766525Speter 464866525Speter if (SINGLE_BYTE_CHAR_P (c)) 464966525Speter { /* Lookup bitmap. */ 465066525Speter c = TRANSLATE (c); /* The character to match. */ 465166525Speter len = 1; 465266525Speter 465366525Speter /* Cast to `unsigned' instead of `unsigned char' in 465466525Speter case the bit list is a full 32 bytes long. */ 465566525Speter if (c < (unsigned) (CHARSET_BITMAP_SIZE (&p[-1]) * BYTEWIDTH) 465617721Speter && p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH))) 465717721Speter not = !not; 465866525Speter } 465966525Speter else if (range_table_exists) 466066525Speter CHARSET_LOOKUP_RANGE_TABLE_RAW (not, c, range_table, count); 466117721Speter 466266525Speter p = CHARSET_RANGE_TABLE_END (range_table, count); 466317721Speter 466417721Speter if (!not) goto fail; 466525839Speter 466617721Speter SET_REGS_MATCHED (); 466766525Speter d += len; 466817721Speter break; 466917721Speter } 467017721Speter 467117721Speter 467266525Speter /* The beginning of a group is represented by start_memory. 467366525Speter The arguments are the register number in the next byte, and the 467466525Speter number of groups inner to this one in the next. The text 467566525Speter matched within the group is recorded (in the internal 467666525Speter registers data structure) under the register number. */ 467766525Speter case start_memory: 467817721Speter DEBUG_PRINT3 ("EXECUTING start_memory %d (%d):\n", *p, p[1]); 467917721Speter 468066525Speter /* Find out if this group can match the empty string. */ 468117721Speter p1 = p; /* To send to group_match_null_string_p. */ 468225839Speter 468366525Speter if (REG_MATCH_NULL_STRING_P (reg_info[*p]) == MATCH_NULL_UNSET_VALUE) 468466525Speter REG_MATCH_NULL_STRING_P (reg_info[*p]) 468566525Speter = group_match_null_string_p (&p1, pend, reg_info); 468617721Speter 468766525Speter /* Save the position in the string where we were the last time 468866525Speter we were at this open-group operator in case the group is 468966525Speter operated upon by a repetition operator, e.g., with `(a*)*b' 469066525Speter against `ab'; then we want to ignore where we are now in 469166525Speter the string in case this attempt to match fails. */ 469266525Speter old_regstart[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p]) 469366525Speter ? REG_UNSET (regstart[*p]) ? d : regstart[*p] 469466525Speter : regstart[*p]; 469525839Speter DEBUG_PRINT2 (" old_regstart: %d\n", 469617721Speter POINTER_TO_OFFSET (old_regstart[*p])); 469717721Speter 469866525Speter regstart[*p] = d; 469917721Speter DEBUG_PRINT2 (" regstart: %d\n", POINTER_TO_OFFSET (regstart[*p])); 470017721Speter 470166525Speter IS_ACTIVE (reg_info[*p]) = 1; 470266525Speter MATCHED_SOMETHING (reg_info[*p]) = 0; 470325839Speter 470466525Speter /* Clear this whenever we change the register activity status. */ 470566525Speter set_regs_matched_done = 0; 470625839Speter 470766525Speter /* This is the new highest active register. */ 470866525Speter highest_active_reg = *p; 470917721Speter 471066525Speter /* If nothing was active before, this is the new lowest active 471166525Speter register. */ 471266525Speter if (lowest_active_reg == NO_LOWEST_ACTIVE_REG) 471366525Speter lowest_active_reg = *p; 471417721Speter 471566525Speter /* Move past the register number and inner group count. */ 471666525Speter p += 2; 471766525Speter just_past_start_mem = p; 471817721Speter 471966525Speter break; 472066525Speter 472166525Speter 472266525Speter /* The stop_memory opcode represents the end of a group. Its 472366525Speter arguments are the same as start_memory's: the register 472466525Speter number, and the number of inner groups. */ 472517721Speter case stop_memory: 472617721Speter DEBUG_PRINT3 ("EXECUTING stop_memory %d (%d):\n", *p, p[1]); 472725839Speter 472866525Speter /* We need to save the string position the last time we were at 472966525Speter this close-group operator in case the group is operated 473066525Speter upon by a repetition operator, e.g., with `((a*)*(b*)*)*' 473166525Speter against `aba'; then we want to ignore where we are now in 473266525Speter the string in case this attempt to match fails. */ 473366525Speter old_regend[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p]) 473466525Speter ? REG_UNSET (regend[*p]) ? d : regend[*p] 473517721Speter : regend[*p]; 473625839Speter DEBUG_PRINT2 (" old_regend: %d\n", 473717721Speter POINTER_TO_OFFSET (old_regend[*p])); 473817721Speter 473966525Speter regend[*p] = d; 474017721Speter DEBUG_PRINT2 (" regend: %d\n", POINTER_TO_OFFSET (regend[*p])); 474117721Speter 474266525Speter /* This register isn't active anymore. */ 474366525Speter IS_ACTIVE (reg_info[*p]) = 0; 474425839Speter 474566525Speter /* Clear this whenever we change the register activity status. */ 474666525Speter set_regs_matched_done = 0; 474725839Speter 474866525Speter /* If this was the only register active, nothing is active 474966525Speter anymore. */ 475066525Speter if (lowest_active_reg == highest_active_reg) 475166525Speter { 475266525Speter lowest_active_reg = NO_LOWEST_ACTIVE_REG; 475366525Speter highest_active_reg = NO_HIGHEST_ACTIVE_REG; 475466525Speter } 475566525Speter else 475666525Speter { /* We must scan for the new highest active register, since 475766525Speter it isn't necessarily one less than now: consider 475866525Speter (a(b)c(d(e)f)g). When group 3 ends, after the f), the 475966525Speter new highest active register is 1. */ 476066525Speter unsigned char r = *p - 1; 476166525Speter while (r > 0 && !IS_ACTIVE (reg_info[r])) 476266525Speter r--; 476366525Speter 476466525Speter /* If we end up at register zero, that means that we saved 476566525Speter the registers as the result of an `on_failure_jump', not 476666525Speter a `start_memory', and we jumped to past the innermost 476766525Speter `stop_memory'. For example, in ((.)*) we save 476866525Speter registers 1 and 2 as a result of the *, but when we pop 476966525Speter back to the second ), we are at the stop_memory 1. 477066525Speter Thus, nothing is active. */ 477117721Speter if (r == 0) 477266525Speter { 477366525Speter lowest_active_reg = NO_LOWEST_ACTIVE_REG; 477466525Speter highest_active_reg = NO_HIGHEST_ACTIVE_REG; 477566525Speter } 477666525Speter else 477766525Speter highest_active_reg = r; 477866525Speter } 477925839Speter 478066525Speter /* If just failed to match something this time around with a 478166525Speter group that's operated on by a repetition operator, try to 478266525Speter force exit from the ``loop'', and restore the register 478366525Speter information for this group that we had before trying this 478466525Speter last match. */ 478566525Speter if ((!MATCHED_SOMETHING (reg_info[*p]) 478666525Speter || just_past_start_mem == p - 1) 478766525Speter && (p + 2) < pend) 478866525Speter { 478966525Speter boolean is_a_jump_n = false; 479025839Speter 479166525Speter p1 = p + 2; 479266525Speter mcnt = 0; 479366525Speter switch ((re_opcode_t) *p1++) 479466525Speter { 479566525Speter case jump_n: 479617721Speter is_a_jump_n = true; 479766525Speter case pop_failure_jump: 479817721Speter case maybe_pop_jump: 479917721Speter case jump: 480017721Speter case dummy_failure_jump: 480166525Speter EXTRACT_NUMBER_AND_INCR (mcnt, p1); 480217721Speter if (is_a_jump_n) 480317721Speter p1 += 2; 480466525Speter break; 480525839Speter 480666525Speter default: 480766525Speter /* do nothing */ ; 480866525Speter } 480917721Speter p1 += mcnt; 481025839Speter 481166525Speter /* If the next operation is a jump backwards in the pattern 481266525Speter to an on_failure_jump right before the start_memory 481366525Speter corresponding to this stop_memory, exit from the loop 481466525Speter by forcing a failure after pushing on the stack the 481566525Speter on_failure_jump's jump in the pattern, and d. */ 481666525Speter if (mcnt < 0 && (re_opcode_t) *p1 == on_failure_jump 481766525Speter && (re_opcode_t) p1[3] == start_memory && p1[4] == *p) 481817721Speter { 481966525Speter /* If this group ever matched anything, then restore 482066525Speter what its registers were before trying this last 482166525Speter failed match, e.g., with `(a*)*b' against `ab' for 482266525Speter regstart[1], and, e.g., with `((a*)*(b*)*)*' 482366525Speter against `aba' for regend[3]. 482425839Speter 482566525Speter Also restore the registers for inner groups for, 482666525Speter e.g., `((a*)(b*))*' against `aba' (register 3 would 482766525Speter otherwise get trashed). */ 482825839Speter 482966525Speter if (EVER_MATCHED_SOMETHING (reg_info[*p])) 483017721Speter { 483125839Speter unsigned r; 483225839Speter 483366525Speter EVER_MATCHED_SOMETHING (reg_info[*p]) = 0; 483425839Speter 483517721Speter /* Restore this and inner groups' (if any) registers. */ 483634461Speter for (r = *p; r < *p + *(p + 1); r++) 483734461Speter { 483834461Speter regstart[r] = old_regstart[r]; 483917721Speter 484034461Speter /* xx why this test? */ 484134461Speter if (old_regend[r] >= regstart[r]) 484234461Speter regend[r] = old_regend[r]; 484334461Speter } 484434461Speter } 484517721Speter p1++; 484666525Speter EXTRACT_NUMBER_AND_INCR (mcnt, p1); 484766525Speter PUSH_FAILURE_POINT (p1 + mcnt, d, -2); 484817721Speter 484966525Speter goto fail; 485066525Speter } 485166525Speter } 485225839Speter 485366525Speter /* Move past the register number and the inner group count. */ 485466525Speter p += 2; 485566525Speter break; 485617721Speter 485717721Speter 485817721Speter /* \<digit> has been turned into a `duplicate' command which is 485966525Speter followed by the numeric value of <digit> as the register number. */ 486066525Speter case duplicate: 486117721Speter { 486217721Speter register const char *d2, *dend2; 486366525Speter int regno = *p++; /* Get which register to match against. */ 486417721Speter DEBUG_PRINT2 ("EXECUTING duplicate %d.\n", regno); 486517721Speter 486666525Speter /* Can't back reference a group which we've never matched. */ 486766525Speter if (REG_UNSET (regstart[regno]) || REG_UNSET (regend[regno])) 486866525Speter goto fail; 486925839Speter 487066525Speter /* Where in input to try to start matching. */ 487166525Speter d2 = regstart[regno]; 487225839Speter 487366525Speter /* Where to stop matching; if both the place to start and 487466525Speter the place to stop matching are in the same string, then 487566525Speter set to the place to stop, otherwise, for now have to use 487666525Speter the end of the first string. */ 487717721Speter 487866525Speter dend2 = ((FIRST_STRING_P (regstart[regno]) 487917721Speter == FIRST_STRING_P (regend[regno])) 488017721Speter ? regend[regno] : end_match_1); 488117721Speter for (;;) 488217721Speter { 488317721Speter /* If necessary, advance to next segment in register 488466525Speter contents. */ 488517721Speter while (d2 == dend2) 488617721Speter { 488717721Speter if (dend2 == end_match_2) break; 488817721Speter if (dend2 == regend[regno]) break; 488917721Speter 489066525Speter /* End of string1 => advance to string2. */ 489166525Speter d2 = string2; 489266525Speter dend2 = regend[regno]; 489317721Speter } 489417721Speter /* At end of register contents => success */ 489517721Speter if (d2 == dend2) break; 489617721Speter 489717721Speter /* If necessary, advance to next segment in data. */ 489817721Speter PREFETCH (); 489917721Speter 490017721Speter /* How many characters left in this segment to match. */ 490117721Speter mcnt = dend - d; 490225839Speter 490317721Speter /* Want how many consecutive characters we can match in 490466525Speter one shot, so, if necessary, adjust the count. */ 490566525Speter if (mcnt > dend2 - d2) 490617721Speter mcnt = dend2 - d2; 490725839Speter 490817721Speter /* Compare that many; failure if mismatch, else move 490966525Speter past them. */ 491066525Speter if (RE_TRANSLATE_P (translate) 491166525Speter ? bcmp_translate (d, d2, mcnt, translate) 491266525Speter : bcmp (d, d2, mcnt)) 491317721Speter goto fail; 491417721Speter d += mcnt, d2 += mcnt; 491566525Speter 491666525Speter /* Do this because we've match some characters. */ 491766525Speter SET_REGS_MATCHED (); 491817721Speter } 491917721Speter } 492017721Speter break; 492117721Speter 492217721Speter 492366525Speter /* begline matches the empty string at the beginning of the string 492466525Speter (unless `not_bol' is set in `bufp'), and, if 492566525Speter `newline_anchor' is set, after newlines. */ 492617721Speter case begline: 492766525Speter DEBUG_PRINT1 ("EXECUTING begline.\n"); 492825839Speter 492966525Speter if (AT_STRINGS_BEG (d)) 493066525Speter { 493166525Speter if (!bufp->not_bol) break; 493266525Speter } 493366525Speter else if (d[-1] == '\n' && bufp->newline_anchor) 493466525Speter { 493566525Speter break; 493666525Speter } 493766525Speter /* In all other cases, we fail. */ 493866525Speter goto fail; 493917721Speter 494017721Speter 494166525Speter /* endline is the dual of begline. */ 494217721Speter case endline: 494366525Speter DEBUG_PRINT1 ("EXECUTING endline.\n"); 494417721Speter 494566525Speter if (AT_STRINGS_END (d)) 494666525Speter { 494766525Speter if (!bufp->not_eol) break; 494866525Speter } 494925839Speter 495066525Speter /* We have to ``prefetch'' the next character. */ 495166525Speter else if ((d == end1 ? *string2 : *d) == '\n' 495266525Speter && bufp->newline_anchor) 495366525Speter { 495466525Speter break; 495566525Speter } 495666525Speter goto fail; 495717721Speter 495817721Speter 495917721Speter /* Match at the very beginning of the data. */ 496066525Speter case begbuf: 496166525Speter DEBUG_PRINT1 ("EXECUTING begbuf.\n"); 496266525Speter if (AT_STRINGS_BEG (d)) 496366525Speter break; 496466525Speter goto fail; 496517721Speter 496617721Speter 496717721Speter /* Match at the very end of the data. */ 496866525Speter case endbuf: 496966525Speter DEBUG_PRINT1 ("EXECUTING endbuf.\n"); 497017721Speter if (AT_STRINGS_END (d)) 497117721Speter break; 497266525Speter goto fail; 497317721Speter 497417721Speter 497566525Speter /* on_failure_keep_string_jump is used to optimize `.*\n'. It 497666525Speter pushes NULL as the value for the string on the stack. Then 497766525Speter `pop_failure_point' will keep the current value for the 497866525Speter string, instead of restoring it. To see why, consider 497966525Speter matching `foo\nbar' against `.*\n'. The .* matches the foo; 498066525Speter then the . fails against the \n. But the next thing we want 498166525Speter to do is match the \n against the \n; if we restored the 498266525Speter string value, we would be back at the foo. 498325839Speter 498466525Speter Because this is used only in specific cases, we don't need to 498566525Speter check all the things that `on_failure_jump' does, to make 498666525Speter sure the right things get saved on the stack. Hence we don't 498766525Speter share its code. The only reason to push anything on the 498866525Speter stack at all is that otherwise we would have to change 498966525Speter `anychar's code to do something besides goto fail in this 499066525Speter case; that seems worse than this. */ 499166525Speter case on_failure_keep_string_jump: 499266525Speter DEBUG_PRINT1 ("EXECUTING on_failure_keep_string_jump"); 499325839Speter 499466525Speter EXTRACT_NUMBER_AND_INCR (mcnt, p); 499566525Speter DEBUG_PRINT3 (" %d (to 0x%x):\n", mcnt, p + mcnt); 499617721Speter 499766525Speter PUSH_FAILURE_POINT (p + mcnt, NULL, -2); 499866525Speter break; 499917721Speter 500017721Speter 500117721Speter /* Uses of on_failure_jump: 500225839Speter 500366525Speter Each alternative starts with an on_failure_jump that points 500466525Speter to the beginning of the next alternative. Each alternative 500566525Speter except the last ends with a jump that in effect jumps past 500666525Speter the rest of the alternatives. (They really jump to the 500766525Speter ending jump of the following alternative, because tensioning 500866525Speter these jumps is a hassle.) 500917721Speter 501066525Speter Repeats start with an on_failure_jump that points past both 501166525Speter the repetition text and either the following jump or 501266525Speter pop_failure_jump back to this on_failure_jump. */ 501317721Speter case on_failure_jump: 501466525Speter on_failure: 501566525Speter DEBUG_PRINT1 ("EXECUTING on_failure_jump"); 501617721Speter 501766525Speter#if defined (WINDOWSNT) && defined (emacs) 501866525Speter QUIT; 501966525Speter#endif 502017721Speter 502166525Speter EXTRACT_NUMBER_AND_INCR (mcnt, p); 502266525Speter DEBUG_PRINT3 (" %d (to 0x%x)", mcnt, p + mcnt); 502317721Speter 502466525Speter /* If this on_failure_jump comes right before a group (i.e., 502566525Speter the original * applied to a group), save the information 502666525Speter for that group and all inner ones, so that if we fail back 502766525Speter to this point, the group's information will be correct. 502866525Speter For example, in \(a*\)*\1, we need the preceding group, 502966525Speter and in \(zz\(a*\)b*\)\2, we need the inner group. */ 503017721Speter 503166525Speter /* We can't use `p' to check ahead because we push 503266525Speter a failure point to `p + mcnt' after we do this. */ 503366525Speter p1 = p; 503417721Speter 503566525Speter /* We need to skip no_op's before we look for the 503666525Speter start_memory in case this on_failure_jump is happening as 503766525Speter the result of a completed succeed_n, as in \(a\)\{1,3\}b\1 503866525Speter against aba. */ 503966525Speter while (p1 < pend && (re_opcode_t) *p1 == no_op) 504066525Speter p1++; 504117721Speter 504266525Speter if (p1 < pend && (re_opcode_t) *p1 == start_memory) 504366525Speter { 504466525Speter /* We have a new highest active register now. This will 504566525Speter get reset at the start_memory we are about to get to, 504666525Speter but we will have saved all the registers relevant to 504766525Speter this repetition op, as described above. */ 504866525Speter highest_active_reg = *(p1 + 1) + *(p1 + 2); 504966525Speter if (lowest_active_reg == NO_LOWEST_ACTIVE_REG) 505066525Speter lowest_active_reg = *(p1 + 1); 505166525Speter } 505217721Speter 505366525Speter DEBUG_PRINT1 (":\n"); 505466525Speter PUSH_FAILURE_POINT (p + mcnt, d, -2); 505566525Speter break; 505617721Speter 505766525Speter 505866525Speter /* A smart repeat ends with `maybe_pop_jump'. 505966525Speter We change it to either `pop_failure_jump' or `jump'. */ 506066525Speter case maybe_pop_jump: 506166525Speter#if defined (WINDOWSNT) && defined (emacs) 506266525Speter QUIT; 506366525Speter#endif 506466525Speter EXTRACT_NUMBER_AND_INCR (mcnt, p); 506566525Speter DEBUG_PRINT2 ("EXECUTING maybe_pop_jump %d.\n", mcnt); 506666525Speter { 506717721Speter register unsigned char *p2 = p; 506817721Speter 506966525Speter /* Compare the beginning of the repeat with what in the 507066525Speter pattern follows its end. If we can establish that there 507166525Speter is nothing that they would both match, i.e., that we 507266525Speter would have to backtrack because of (as in, e.g., `a*a') 507366525Speter then we can change to pop_failure_jump, because we'll 507466525Speter never have to backtrack. 507525839Speter 507666525Speter This is not true in the case of alternatives: in 507766525Speter `(a|ab)*' we do need to backtrack to the `ab' alternative 507866525Speter (e.g., if the string was `ab'). But instead of trying to 507966525Speter detect that here, the alternative has put on a dummy 508066525Speter failure point which is what we will end up popping. */ 508117721Speter 508266525Speter /* Skip over open/close-group commands. 508366525Speter If what follows this loop is a ...+ construct, 508466525Speter look at what begins its body, since we will have to 508566525Speter match at least one of that. */ 508666525Speter while (1) 508766525Speter { 508866525Speter if (p2 + 2 < pend 508966525Speter && ((re_opcode_t) *p2 == stop_memory 509066525Speter || (re_opcode_t) *p2 == start_memory)) 509166525Speter p2 += 3; 509266525Speter else if (p2 + 6 < pend 509366525Speter && (re_opcode_t) *p2 == dummy_failure_jump) 509466525Speter p2 += 6; 509566525Speter else 509666525Speter break; 509766525Speter } 509817721Speter 509966525Speter p1 = p + mcnt; 510066525Speter /* p1[0] ... p1[2] are the `on_failure_jump' corresponding 510166525Speter to the `maybe_finalize_jump' of this case. Examine what 510266525Speter follows. */ 510366525Speter 510466525Speter /* If we're at the end of the pattern, we can change. */ 510566525Speter if (p2 == pend) 510617721Speter { 510717721Speter /* Consider what happens when matching ":\(.*\)" 510817721Speter against ":/". I don't really understand this code 510966525Speter yet. */ 511066525Speter p[-3] = (unsigned char) pop_failure_jump; 511166525Speter DEBUG_PRINT1 511266525Speter (" End of pattern: change to `pop_failure_jump'.\n"); 511366525Speter } 511417721Speter 511566525Speter else if ((re_opcode_t) *p2 == exactn 511617721Speter || (bufp->newline_anchor && (re_opcode_t) *p2 == endline)) 511717721Speter { 511866525Speter register unsigned int c 511966525Speter = *p2 == (unsigned char) endline ? '\n' : p2[2]; 512017721Speter 512166525Speter if ((re_opcode_t) p1[3] == exactn) 512266525Speter { 512366525Speter if (!(multibyte /* && (c != '\n') */ 512466525Speter && BASE_LEADING_CODE_P (c)) 512566525Speter ? c != p1[5] 512666525Speter : (STRING_CHAR (&p2[2], pend - &p2[2]) 512766525Speter != STRING_CHAR (&p1[5], pend - &p1[5]))) 512866525Speter { 512966525Speter p[-3] = (unsigned char) pop_failure_jump; 513066525Speter DEBUG_PRINT3 (" %c != %c => pop_failure_jump.\n", 513166525Speter c, p1[5]); 513266525Speter } 513366525Speter } 513425839Speter 513517721Speter else if ((re_opcode_t) p1[3] == charset 513617721Speter || (re_opcode_t) p1[3] == charset_not) 513717721Speter { 513817721Speter int not = (re_opcode_t) p1[3] == charset_not; 513925839Speter 514066525Speter if (multibyte /* && (c != '\n') */ 514166525Speter && BASE_LEADING_CODE_P (c)) 514266525Speter c = STRING_CHAR (&p2[2], pend - &p2[2]); 514366525Speter 514466525Speter /* Test if C is listed in charset (or charset_not) 514566525Speter at `&p1[3]'. */ 514666525Speter if (SINGLE_BYTE_CHAR_P (c)) 514766525Speter { 514866525Speter if (c < CHARSET_BITMAP_SIZE (&p1[3]) * BYTEWIDTH 514917721Speter && p1[5 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH))) 515017721Speter not = !not; 515166525Speter } 515266525Speter else if (CHARSET_RANGE_TABLE_EXISTS_P (&p1[3])) 515366525Speter CHARSET_LOOKUP_RANGE_TABLE (not, c, &p1[3]); 515417721Speter 515566525Speter /* `not' is equal to 1 if c would match, which means 515666525Speter that we can't change to pop_failure_jump. */ 515717721Speter if (!not) 515866525Speter { 515966525Speter p[-3] = (unsigned char) pop_failure_jump; 516066525Speter DEBUG_PRINT1 (" No match => pop_failure_jump.\n"); 516166525Speter } 516217721Speter } 516317721Speter } 516466525Speter else if ((re_opcode_t) *p2 == charset) 516566525Speter { 516666525Speter if ((re_opcode_t) p1[3] == exactn) 516766525Speter { 516866525Speter register unsigned int c = p1[5]; 516966525Speter int not = 0; 517066525Speter 517166525Speter if (multibyte && BASE_LEADING_CODE_P (c)) 517266525Speter c = STRING_CHAR (&p1[5], pend - &p1[5]); 517366525Speter 517466525Speter /* Test if C is listed in charset at `p2'. */ 517566525Speter if (SINGLE_BYTE_CHAR_P (c)) 517666525Speter { 517766525Speter if (c < CHARSET_BITMAP_SIZE (p2) * BYTEWIDTH 517866525Speter && (p2[2 + c / BYTEWIDTH] 517966525Speter & (1 << (c % BYTEWIDTH)))) 518066525Speter not = !not; 518166525Speter } 518266525Speter else if (CHARSET_RANGE_TABLE_EXISTS_P (p2)) 518366525Speter CHARSET_LOOKUP_RANGE_TABLE (not, c, p2); 518466525Speter 518566525Speter if (!not) 518666525Speter { 518766525Speter p[-3] = (unsigned char) pop_failure_jump; 518866525Speter DEBUG_PRINT1 (" No match => pop_failure_jump.\n"); 518966525Speter } 519066525Speter } 519166525Speter 519266525Speter /* It is hard to list up all the character in charset 519366525Speter P2 if it includes multibyte character. Give up in 519466525Speter such case. */ 519566525Speter else if (!multibyte || !CHARSET_RANGE_TABLE_EXISTS_P (p2)) 519666525Speter { 519766525Speter /* Now, we are sure that P2 has no range table. 519866525Speter So, for the size of bitmap in P2, `p2[1]' is 519966525Speter enough. But P1 may have range table, so the 520066525Speter size of bitmap table of P1 is extracted by 520166525Speter using macro `CHARSET_BITMAP_SIZE'. 520266525Speter 520366525Speter Since we know that all the character listed in 520466525Speter P2 is ASCII, it is enough to test only bitmap 520566525Speter table of P1. */ 520666525Speter 520766525Speter if ((re_opcode_t) p1[3] == charset_not) 520866525Speter { 520966525Speter int idx; 521066525Speter /* We win if the charset_not inside the loop lists 521166525Speter every character listed in the charset after. */ 521266525Speter for (idx = 0; idx < (int) p2[1]; idx++) 521366525Speter if (! (p2[2 + idx] == 0 521466525Speter || (idx < CHARSET_BITMAP_SIZE (&p1[3]) 521566525Speter && ((p2[2 + idx] & ~ p1[5 + idx]) == 0)))) 521666525Speter break; 521766525Speter 521866525Speter if (idx == p2[1]) 521966525Speter { 522066525Speter p[-3] = (unsigned char) pop_failure_jump; 522166525Speter DEBUG_PRINT1 (" No match => pop_failure_jump.\n"); 522266525Speter } 522366525Speter } 522466525Speter else if ((re_opcode_t) p1[3] == charset) 522566525Speter { 522666525Speter int idx; 522766525Speter /* We win if the charset inside the loop 522866525Speter has no overlap with the one after the loop. */ 522966525Speter for (idx = 0; 523066525Speter (idx < (int) p2[1] 523166525Speter && idx < CHARSET_BITMAP_SIZE (&p1[3])); 523266525Speter idx++) 523366525Speter if ((p2[2 + idx] & p1[5 + idx]) != 0) 523466525Speter break; 523566525Speter 523666525Speter if (idx == p2[1] 523766525Speter || idx == CHARSET_BITMAP_SIZE (&p1[3])) 523866525Speter { 523966525Speter p[-3] = (unsigned char) pop_failure_jump; 524066525Speter DEBUG_PRINT1 (" No match => pop_failure_jump.\n"); 524166525Speter } 524266525Speter } 524366525Speter } 524417721Speter } 524566525Speter } 524617721Speter p -= 2; /* Point at relative address again. */ 524717721Speter if ((re_opcode_t) p[-1] != pop_failure_jump) 524817721Speter { 524917721Speter p[-1] = (unsigned char) jump; 525066525Speter DEBUG_PRINT1 (" Match => jump.\n"); 525117721Speter goto unconditional_jump; 525217721Speter } 525366525Speter /* Note fall through. */ 525417721Speter 525517721Speter 525617721Speter /* The end of a simple repeat has a pop_failure_jump back to 525766525Speter its matching on_failure_jump, where the latter will push a 525866525Speter failure point. The pop_failure_jump takes off failure 525966525Speter points put on by this pop_failure_jump's matching 526066525Speter on_failure_jump; we got through the pattern to here from the 526166525Speter matching on_failure_jump, so didn't fail. */ 526266525Speter case pop_failure_jump: 526366525Speter { 526466525Speter /* We need to pass separate storage for the lowest and 526566525Speter highest registers, even though we don't care about the 526666525Speter actual values. Otherwise, we will restore only one 526766525Speter register from the stack, since lowest will == highest in 526866525Speter `pop_failure_point'. */ 526966525Speter unsigned dummy_low_reg, dummy_high_reg; 527066525Speter unsigned char *pdummy; 527166525Speter const char *sdummy; 527217721Speter 527366525Speter DEBUG_PRINT1 ("EXECUTING pop_failure_jump.\n"); 527466525Speter POP_FAILURE_POINT (sdummy, pdummy, 527566525Speter dummy_low_reg, dummy_high_reg, 527666525Speter reg_dummy, reg_dummy, reg_info_dummy); 527766525Speter } 527866525Speter /* Note fall through. */ 527917721Speter 528025839Speter 528166525Speter /* Unconditionally jump (without popping any failure points). */ 528266525Speter case jump: 528317721Speter unconditional_jump: 528466525Speter#if defined (WINDOWSNT) && defined (emacs) 528566525Speter QUIT; 528666525Speter#endif 528717721Speter EXTRACT_NUMBER_AND_INCR (mcnt, p); /* Get the amount to jump. */ 528866525Speter DEBUG_PRINT2 ("EXECUTING jump %d ", mcnt); 528966525Speter p += mcnt; /* Do the jump. */ 529066525Speter DEBUG_PRINT2 ("(to 0x%x).\n", p); 529117721Speter break; 529217721Speter 529325839Speter 529466525Speter /* We need this opcode so we can detect where alternatives end 529566525Speter in `group_match_null_string_p' et al. */ 529666525Speter case jump_past_alt: 529766525Speter DEBUG_PRINT1 ("EXECUTING jump_past_alt.\n"); 529866525Speter goto unconditional_jump; 529917721Speter 530017721Speter 530166525Speter /* Normally, the on_failure_jump pushes a failure point, which 530266525Speter then gets popped at pop_failure_jump. We will end up at 530366525Speter pop_failure_jump, also, and with a pattern of, say, `a+', we 530466525Speter are skipping over the on_failure_jump, so we have to push 530566525Speter something meaningless for pop_failure_jump to pop. */ 530666525Speter case dummy_failure_jump: 530766525Speter DEBUG_PRINT1 ("EXECUTING dummy_failure_jump.\n"); 530866525Speter /* It doesn't matter what we push for the string here. What 530966525Speter the code at `fail' tests is the value for the pattern. */ 531066525Speter PUSH_FAILURE_POINT (0, 0, -2); 531166525Speter goto unconditional_jump; 531217721Speter 531317721Speter 531466525Speter /* At the end of an alternative, we need to push a dummy failure 531566525Speter point in case we are followed by a `pop_failure_jump', because 531666525Speter we don't want the failure point for the alternative to be 531766525Speter popped. For example, matching `(a|ab)*' against `aab' 531866525Speter requires that we match the `ab' alternative. */ 531966525Speter case push_dummy_failure: 532066525Speter DEBUG_PRINT1 ("EXECUTING push_dummy_failure.\n"); 532166525Speter /* See comments just above at `dummy_failure_jump' about the 532266525Speter two zeroes. */ 532366525Speter PUSH_FAILURE_POINT (0, 0, -2); 532466525Speter break; 532517721Speter 532666525Speter /* Have to succeed matching what follows at least n times. 532766525Speter After that, handle like `on_failure_jump'. */ 532866525Speter case succeed_n: 532966525Speter EXTRACT_NUMBER (mcnt, p + 2); 533066525Speter DEBUG_PRINT2 ("EXECUTING succeed_n %d.\n", mcnt); 533117721Speter 533266525Speter assert (mcnt >= 0); 533366525Speter /* Originally, this is how many times we HAVE to succeed. */ 533466525Speter if (mcnt > 0) 533566525Speter { 533666525Speter mcnt--; 533717721Speter p += 2; 533866525Speter STORE_NUMBER_AND_INCR (p, mcnt); 533966525Speter DEBUG_PRINT3 (" Setting 0x%x to %d.\n", p, mcnt); 534066525Speter } 534117721Speter else if (mcnt == 0) 534266525Speter { 534366525Speter DEBUG_PRINT2 (" Setting two bytes from 0x%x to no_op.\n", p+2); 534417721Speter p[2] = (unsigned char) no_op; 534566525Speter p[3] = (unsigned char) no_op; 534666525Speter goto on_failure; 534766525Speter } 534866525Speter break; 534925839Speter 535066525Speter case jump_n: 535166525Speter EXTRACT_NUMBER (mcnt, p + 2); 535266525Speter DEBUG_PRINT2 ("EXECUTING jump_n %d.\n", mcnt); 535317721Speter 535466525Speter /* Originally, this is how many times we CAN jump. */ 535566525Speter if (mcnt) 535666525Speter { 535766525Speter mcnt--; 535866525Speter STORE_NUMBER (p + 2, mcnt); 535925839Speter goto unconditional_jump; 536066525Speter } 536166525Speter /* If don't have to jump any more, skip over the rest of command. */ 536225839Speter else 536325839Speter p += 4; 536466525Speter break; 536525839Speter 536617721Speter case set_number_at: 536717721Speter { 536866525Speter DEBUG_PRINT1 ("EXECUTING set_number_at.\n"); 536917721Speter 537066525Speter EXTRACT_NUMBER_AND_INCR (mcnt, p); 537166525Speter p1 = p + mcnt; 537266525Speter EXTRACT_NUMBER_AND_INCR (mcnt, p); 537366525Speter DEBUG_PRINT3 (" Setting 0x%x to %d.\n", p1, mcnt); 537417721Speter STORE_NUMBER (p1, mcnt); 537566525Speter break; 537666525Speter } 537717721Speter 537866525Speter case wordbound: 537966525Speter DEBUG_PRINT1 ("EXECUTING wordbound.\n"); 538066525Speter 538166525Speter /* We SUCCEED in one of the following cases: */ 538266525Speter 538366525Speter /* Case 1: D is at the beginning or the end of string. */ 538466525Speter if (AT_STRINGS_BEG (d) || AT_STRINGS_END (d)) 538517721Speter break; 538666525Speter else 538766525Speter { 538866525Speter /* C1 is the character before D, S1 is the syntax of C1, C2 538966525Speter is the character at D, and S2 is the syntax of C2. */ 539066525Speter int c1, c2, s1, s2; 539166525Speter int pos1 = PTR_TO_OFFSET (d - 1); 539266525Speter int charpos; 539317721Speter 539466525Speter GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2); 539566525Speter GET_CHAR_AFTER_2 (c2, d, string1, end1, string2, end2); 539666525Speter#ifdef emacs 539766525Speter charpos = SYNTAX_TABLE_BYTE_TO_CHAR (pos1); 539866525Speter UPDATE_SYNTAX_TABLE (charpos); 539966525Speter#endif 540066525Speter s1 = SYNTAX (c1); 540166525Speter#ifdef emacs 540266525Speter UPDATE_SYNTAX_TABLE_FORWARD (charpos + 1); 540366525Speter#endif 540466525Speter s2 = SYNTAX (c2); 540566525Speter 540666525Speter if (/* Case 2: Only one of S1 and S2 is Sword. */ 540766525Speter ((s1 == Sword) != (s2 == Sword)) 540866525Speter /* Case 3: Both of S1 and S2 are Sword, and macro 540966525Speter WORD_BOUNDARY_P (C1, C2) returns nonzero. */ 541066525Speter || ((s1 == Sword) && WORD_BOUNDARY_P (c1, c2))) 541166525Speter break; 541266525Speter } 541366525Speter goto fail; 541466525Speter 541566525Speter case notwordbound: 541666525Speter DEBUG_PRINT1 ("EXECUTING notwordbound.\n"); 541766525Speter 541866525Speter /* We FAIL in one of the following cases: */ 541966525Speter 542066525Speter /* Case 1: D is at the beginning or the end of string. */ 542166525Speter if (AT_STRINGS_BEG (d) || AT_STRINGS_END (d)) 542217721Speter goto fail; 542366525Speter else 542466525Speter { 542566525Speter /* C1 is the character before D, S1 is the syntax of C1, C2 542666525Speter is the character at D, and S2 is the syntax of C2. */ 542766525Speter int c1, c2, s1, s2; 542866525Speter int pos1 = PTR_TO_OFFSET (d - 1); 542966525Speter int charpos; 543017721Speter 543166525Speter GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2); 543266525Speter GET_CHAR_AFTER_2 (c2, d, string1, end1, string2, end2); 543366525Speter#ifdef emacs 543466525Speter charpos = SYNTAX_TABLE_BYTE_TO_CHAR (pos1); 543566525Speter UPDATE_SYNTAX_TABLE (charpos); 543666525Speter#endif 543766525Speter s1 = SYNTAX (c1); 543866525Speter#ifdef emacs 543966525Speter UPDATE_SYNTAX_TABLE_FORWARD (charpos + 1); 544066525Speter#endif 544166525Speter s2 = SYNTAX (c2); 544266525Speter 544366525Speter if (/* Case 2: Only one of S1 and S2 is Sword. */ 544466525Speter ((s1 == Sword) != (s2 == Sword)) 544566525Speter /* Case 3: Both of S1 and S2 are Sword, and macro 544666525Speter WORD_BOUNDARY_P (C1, C2) returns nonzero. */ 544766525Speter || ((s1 == Sword) && WORD_BOUNDARY_P (c1, c2))) 544866525Speter goto fail; 544966525Speter } 545066525Speter break; 545166525Speter 545217721Speter case wordbeg: 545366525Speter DEBUG_PRINT1 ("EXECUTING wordbeg.\n"); 545417721Speter 545566525Speter /* We FAIL in one of the following cases: */ 545666525Speter 545766525Speter /* Case 1: D is at the end of string. */ 545866525Speter if (AT_STRINGS_END (d)) 545966525Speter goto fail; 546066525Speter else 546166525Speter { 546266525Speter /* C1 is the character before D, S1 is the syntax of C1, C2 546366525Speter is the character at D, and S2 is the syntax of C2. */ 546466525Speter int c1, c2, s1, s2; 546566525Speter int pos1 = PTR_TO_OFFSET (d); 546666525Speter int charpos; 546766525Speter 546866525Speter GET_CHAR_AFTER_2 (c2, d, string1, end1, string2, end2); 546966525Speter#ifdef emacs 547066525Speter charpos = SYNTAX_TABLE_BYTE_TO_CHAR (pos1); 547166525Speter UPDATE_SYNTAX_TABLE (charpos); 547266525Speter#endif 547366525Speter s2 = SYNTAX (c2); 5474107484Speter 547566525Speter /* Case 2: S2 is not Sword. */ 547666525Speter if (s2 != Sword) 547766525Speter goto fail; 547866525Speter 547966525Speter /* Case 3: D is not at the beginning of string ... */ 548066525Speter if (!AT_STRINGS_BEG (d)) 548166525Speter { 548266525Speter GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2); 548366525Speter#ifdef emacs 548466525Speter UPDATE_SYNTAX_TABLE_BACKWARD (charpos - 1); 548566525Speter#endif 548666525Speter s1 = SYNTAX (c1); 548766525Speter 548866525Speter /* ... and S1 is Sword, and WORD_BOUNDARY_P (C1, C2) 548966525Speter returns 0. */ 549066525Speter if ((s1 == Sword) && !WORD_BOUNDARY_P (c1, c2)) 549166525Speter goto fail; 549266525Speter } 549366525Speter } 549466525Speter break; 549566525Speter 549617721Speter case wordend: 549766525Speter DEBUG_PRINT1 ("EXECUTING wordend.\n"); 549817721Speter 549966525Speter /* We FAIL in one of the following cases: */ 550066525Speter 550166525Speter /* Case 1: D is at the beginning of string. */ 550266525Speter if (AT_STRINGS_BEG (d)) 550366525Speter goto fail; 550466525Speter else 550566525Speter { 550666525Speter /* C1 is the character before D, S1 is the syntax of C1, C2 550766525Speter is the character at D, and S2 is the syntax of C2. */ 550866525Speter int c1, c2, s1, s2; 550966525Speter int pos1 = PTR_TO_OFFSET (d); 551066525Speter int charpos; 551166525Speter 551266525Speter GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2); 551317721Speter#ifdef emacs 551466525Speter charpos = SYNTAX_TABLE_BYTE_TO_CHAR (pos1 - 1); 551566525Speter UPDATE_SYNTAX_TABLE (charpos); 551666525Speter#endif 551766525Speter s1 = SYNTAX (c1); 551825839Speter 551966525Speter /* Case 2: S1 is not Sword. */ 552066525Speter if (s1 != Sword) 552166525Speter goto fail; 552225839Speter 552366525Speter /* Case 3: D is not at the end of string ... */ 552466525Speter if (!AT_STRINGS_END (d)) 552566525Speter { 552666525Speter GET_CHAR_AFTER_2 (c2, d, string1, end1, string2, end2); 552766525Speter#ifdef emacs 552866525Speter UPDATE_SYNTAX_TABLE_FORWARD (charpos); 552966525Speter#endif 553066525Speter s2 = SYNTAX (c2); 553117721Speter 553266525Speter /* ... and S2 is Sword, and WORD_BOUNDARY_P (C1, C2) 553366525Speter returns 0. */ 553466525Speter if ((s2 == Sword) && !WORD_BOUNDARY_P (c1, c2)) 553566525Speter goto fail; 553666525Speter } 553766525Speter } 553866525Speter break; 553966525Speter 554066525Speter#ifdef emacs 554166525Speter case before_dot: 554266525Speter DEBUG_PRINT1 ("EXECUTING before_dot.\n"); 554366525Speter if (PTR_BYTE_POS ((unsigned char *) d) >= PT_BYTE) 554466525Speter goto fail; 554566525Speter break; 554666525Speter 554766525Speter case at_dot: 554866525Speter DEBUG_PRINT1 ("EXECUTING at_dot.\n"); 554966525Speter if (PTR_BYTE_POS ((unsigned char *) d) != PT_BYTE) 555066525Speter goto fail; 555166525Speter break; 555266525Speter 555366525Speter case after_dot: 555466525Speter DEBUG_PRINT1 ("EXECUTING after_dot.\n"); 555566525Speter if (PTR_BYTE_POS ((unsigned char *) d) <= PT_BYTE) 555666525Speter goto fail; 555766525Speter break; 555866525Speter 555917721Speter case syntaxspec: 556066525Speter DEBUG_PRINT2 ("EXECUTING syntaxspec %d.\n", mcnt); 556117721Speter mcnt = *p++; 556217721Speter goto matchsyntax; 556317721Speter 556466525Speter case wordchar: 556566525Speter DEBUG_PRINT1 ("EXECUTING Emacs wordchar.\n"); 556617721Speter mcnt = (int) Sword; 556766525Speter matchsyntax: 556817721Speter PREFETCH (); 556966525Speter#ifdef emacs 557066525Speter { 557166525Speter int pos1 = SYNTAX_TABLE_BYTE_TO_CHAR (PTR_TO_OFFSET (d)); 557266525Speter UPDATE_SYNTAX_TABLE (pos1); 557366525Speter } 557466525Speter#endif 557566525Speter { 557666525Speter int c, len; 557766525Speter 557866525Speter if (multibyte) 557966525Speter /* we must concern about multibyte form, ... */ 558066525Speter c = STRING_CHAR_AND_LENGTH (d, dend - d, len); 558166525Speter else 558266525Speter /* everything should be handled as ASCII, even though it 558366525Speter looks like multibyte form. */ 558466525Speter c = *d, len = 1; 558566525Speter 558666525Speter if (SYNTAX (c) != (enum syntaxcode) mcnt) 558725839Speter goto fail; 558866525Speter d += len; 558966525Speter } 559066525Speter SET_REGS_MATCHED (); 559117721Speter break; 559217721Speter 559317721Speter case notsyntaxspec: 559466525Speter DEBUG_PRINT2 ("EXECUTING notsyntaxspec %d.\n", mcnt); 559517721Speter mcnt = *p++; 559617721Speter goto matchnotsyntax; 559717721Speter 559866525Speter case notwordchar: 559966525Speter DEBUG_PRINT1 ("EXECUTING Emacs notwordchar.\n"); 560017721Speter mcnt = (int) Sword; 560166525Speter matchnotsyntax: 560217721Speter PREFETCH (); 560366525Speter#ifdef emacs 560466525Speter { 560566525Speter int pos1 = SYNTAX_TABLE_BYTE_TO_CHAR (PTR_TO_OFFSET (d)); 560666525Speter UPDATE_SYNTAX_TABLE (pos1); 560766525Speter } 560866525Speter#endif 560966525Speter { 561066525Speter int c, len; 561166525Speter 561266525Speter if (multibyte) 561366525Speter c = STRING_CHAR_AND_LENGTH (d, dend - d, len); 561466525Speter else 561566525Speter c = *d, len = 1; 561666525Speter 561766525Speter if (SYNTAX (c) == (enum syntaxcode) mcnt) 561825839Speter goto fail; 561966525Speter d += len; 562066525Speter } 562117721Speter SET_REGS_MATCHED (); 562266525Speter break; 562366525Speter 562466525Speter case categoryspec: 562566525Speter DEBUG_PRINT2 ("EXECUTING categoryspec %d.\n", *p); 562666525Speter mcnt = *p++; 562766525Speter PREFETCH (); 562866525Speter { 562966525Speter int c, len; 563066525Speter 563166525Speter if (multibyte) 563266525Speter c = STRING_CHAR_AND_LENGTH (d, dend - d, len); 563366525Speter else 563466525Speter c = *d, len = 1; 563566525Speter 563666525Speter if (!CHAR_HAS_CATEGORY (c, mcnt)) 563766525Speter goto fail; 563866525Speter d += len; 563966525Speter } 564066525Speter SET_REGS_MATCHED (); 564166525Speter break; 564266525Speter 564366525Speter case notcategoryspec: 564466525Speter DEBUG_PRINT2 ("EXECUTING notcategoryspec %d.\n", *p); 564566525Speter mcnt = *p++; 564666525Speter PREFETCH (); 564766525Speter { 564866525Speter int c, len; 564966525Speter 565066525Speter if (multibyte) 565166525Speter c = STRING_CHAR_AND_LENGTH (d, dend - d, len); 565266525Speter else 565366525Speter c = *d, len = 1; 565466525Speter 565566525Speter if (CHAR_HAS_CATEGORY (c, mcnt)) 565666525Speter goto fail; 565766525Speter d += len; 565866525Speter } 565966525Speter SET_REGS_MATCHED (); 566017721Speter break; 566117721Speter 566217721Speter#else /* not emacs */ 566317721Speter case wordchar: 566417721Speter DEBUG_PRINT1 ("EXECUTING non-Emacs wordchar.\n"); 566517721Speter PREFETCH (); 566617721Speter if (!WORDCHAR_P (d)) 566717721Speter goto fail; 566817721Speter SET_REGS_MATCHED (); 566917721Speter d++; 567017721Speter break; 567125839Speter 567217721Speter case notwordchar: 567317721Speter DEBUG_PRINT1 ("EXECUTING non-Emacs notwordchar.\n"); 567417721Speter PREFETCH (); 567517721Speter if (WORDCHAR_P (d)) 567617721Speter goto fail; 567717721Speter SET_REGS_MATCHED (); 567817721Speter d++; 567917721Speter break; 568017721Speter#endif /* not emacs */ 568125839Speter 568217721Speter default: 568317721Speter abort (); 568417721Speter } 568517721Speter continue; /* Successfully executed one pattern command; keep going. */ 568617721Speter 568717721Speter 568817721Speter /* We goto here if a matching operation fails. */ 568917721Speter fail: 569066525Speter#if defined (WINDOWSNT) && defined (emacs) 569166525Speter QUIT; 569266525Speter#endif 569317721Speter if (!FAIL_STACK_EMPTY ()) 569417721Speter { /* A restart point is known. Restore to that state. */ 569517721Speter DEBUG_PRINT1 ("\nFAIL:\n"); 569617721Speter POP_FAILURE_POINT (d, p, 569717721Speter lowest_active_reg, highest_active_reg, 569817721Speter regstart, regend, reg_info); 569917721Speter 570017721Speter /* If this failure point is a dummy, try the next one. */ 570117721Speter if (!p) 570217721Speter goto fail; 570317721Speter 570417721Speter /* If we failed to the end of the pattern, don't examine *p. */ 570517721Speter assert (p <= pend); 570617721Speter if (p < pend) 570717721Speter { 570817721Speter boolean is_a_jump_n = false; 570925839Speter 571017721Speter /* If failed to a backwards jump that's part of a repetition 571117721Speter loop, need to pop this failure point and use the next one. */ 571217721Speter switch ((re_opcode_t) *p) 571317721Speter { 571417721Speter case jump_n: 571517721Speter is_a_jump_n = true; 571617721Speter case maybe_pop_jump: 571717721Speter case pop_failure_jump: 571817721Speter case jump: 571917721Speter p1 = p + 1; 572017721Speter EXTRACT_NUMBER_AND_INCR (mcnt, p1); 572125839Speter p1 += mcnt; 572217721Speter 572317721Speter if ((is_a_jump_n && (re_opcode_t) *p1 == succeed_n) 572417721Speter || (!is_a_jump_n 572517721Speter && (re_opcode_t) *p1 == on_failure_jump)) 572617721Speter goto fail; 572717721Speter break; 572817721Speter default: 572917721Speter /* do nothing */ ; 573017721Speter } 573117721Speter } 573217721Speter 573317721Speter if (d >= string1 && d <= end1) 573417721Speter dend = end_match_1; 573517721Speter } 573617721Speter else 573717721Speter break; /* Matching at this starting point really fails. */ 573817721Speter } /* for (;;) */ 573917721Speter 574017721Speter if (best_regs_set) 574117721Speter goto restore_best_regs; 574217721Speter 574317721Speter FREE_VARIABLES (); 574417721Speter 574517721Speter return -1; /* Failure to match. */ 574617721Speter} /* re_match_2 */ 574717721Speter 574817721Speter/* Subroutine definitions for re_match_2. */ 574917721Speter 575017721Speter 575117721Speter/* We are passed P pointing to a register number after a start_memory. 575225839Speter 575317721Speter Return true if the pattern up to the corresponding stop_memory can 575417721Speter match the empty string, and false otherwise. 575525839Speter 575617721Speter If we find the matching stop_memory, sets P to point to one past its number. 575717721Speter Otherwise, sets P to an undefined byte less than or equal to END. 575817721Speter 575917721Speter We don't handle duplicates properly (yet). */ 576017721Speter 576117721Speterstatic boolean 576217721Spetergroup_match_null_string_p (p, end, reg_info) 576317721Speter unsigned char **p, *end; 576417721Speter register_info_type *reg_info; 576517721Speter{ 576617721Speter int mcnt; 576717721Speter /* Point to after the args to the start_memory. */ 576817721Speter unsigned char *p1 = *p + 2; 576925839Speter 577017721Speter while (p1 < end) 577117721Speter { 577217721Speter /* Skip over opcodes that can match nothing, and return true or 577317721Speter false, as appropriate, when we get to one that can't, or to the 577417721Speter matching stop_memory. */ 577525839Speter 577617721Speter switch ((re_opcode_t) *p1) 577717721Speter { 577817721Speter /* Could be either a loop or a series of alternatives. */ 577917721Speter case on_failure_jump: 578017721Speter p1++; 578117721Speter EXTRACT_NUMBER_AND_INCR (mcnt, p1); 578225839Speter 578317721Speter /* If the next operation is not a jump backwards in the 578417721Speter pattern. */ 578517721Speter 578617721Speter if (mcnt >= 0) 578717721Speter { 578817721Speter /* Go through the on_failure_jumps of the alternatives, 578917721Speter seeing if any of the alternatives cannot match nothing. 579017721Speter The last alternative starts with only a jump, 579117721Speter whereas the rest start with on_failure_jump and end 579217721Speter with a jump, e.g., here is the pattern for `a|b|c': 579317721Speter 579417721Speter /on_failure_jump/0/6/exactn/1/a/jump_past_alt/0/6 579517721Speter /on_failure_jump/0/6/exactn/1/b/jump_past_alt/0/3 579625839Speter /exactn/1/c 579717721Speter 579817721Speter So, we have to first go through the first (n-1) 579917721Speter alternatives and then deal with the last one separately. */ 580017721Speter 580117721Speter 580217721Speter /* Deal with the first (n-1) alternatives, which start 580317721Speter with an on_failure_jump (see above) that jumps to right 580417721Speter past a jump_past_alt. */ 580517721Speter 580617721Speter while ((re_opcode_t) p1[mcnt-3] == jump_past_alt) 580717721Speter { 580817721Speter /* `mcnt' holds how many bytes long the alternative 580917721Speter is, including the ending `jump_past_alt' and 581017721Speter its number. */ 581117721Speter 581225839Speter if (!alt_match_null_string_p (p1, p1 + mcnt - 3, 581317721Speter reg_info)) 581417721Speter return false; 581517721Speter 581617721Speter /* Move to right after this alternative, including the 581717721Speter jump_past_alt. */ 581825839Speter p1 += mcnt; 581917721Speter 582017721Speter /* Break if it's the beginning of an n-th alternative 582117721Speter that doesn't begin with an on_failure_jump. */ 582217721Speter if ((re_opcode_t) *p1 != on_failure_jump) 582317721Speter break; 582425839Speter 582517721Speter /* Still have to check that it's not an n-th 582617721Speter alternative that starts with an on_failure_jump. */ 582717721Speter p1++; 582817721Speter EXTRACT_NUMBER_AND_INCR (mcnt, p1); 582917721Speter if ((re_opcode_t) p1[mcnt-3] != jump_past_alt) 583017721Speter { 583117721Speter /* Get to the beginning of the n-th alternative. */ 583217721Speter p1 -= 3; 583317721Speter break; 583417721Speter } 583517721Speter } 583617721Speter 583717721Speter /* Deal with the last alternative: go back and get number 583817721Speter of the `jump_past_alt' just before it. `mcnt' contains 583917721Speter the length of the alternative. */ 584017721Speter EXTRACT_NUMBER (mcnt, p1 - 2); 584117721Speter 584217721Speter if (!alt_match_null_string_p (p1, p1 + mcnt, reg_info)) 584317721Speter return false; 584417721Speter 584517721Speter p1 += mcnt; /* Get past the n-th alternative. */ 584617721Speter } /* if mcnt > 0 */ 584717721Speter break; 584817721Speter 584925839Speter 585017721Speter case stop_memory: 585117721Speter assert (p1[1] == **p); 585217721Speter *p = p1 + 2; 585317721Speter return true; 585417721Speter 585525839Speter 585625839Speter default: 585717721Speter if (!common_op_match_null_string_p (&p1, end, reg_info)) 585817721Speter return false; 585917721Speter } 586017721Speter } /* while p1 < end */ 586117721Speter 586217721Speter return false; 586317721Speter} /* group_match_null_string_p */ 586417721Speter 586517721Speter 586617721Speter/* Similar to group_match_null_string_p, but doesn't deal with alternatives: 586717721Speter It expects P to be the first byte of a single alternative and END one 586817721Speter byte past the last. The alternative can contain groups. */ 586925839Speter 587017721Speterstatic boolean 587117721Speteralt_match_null_string_p (p, end, reg_info) 587217721Speter unsigned char *p, *end; 587317721Speter register_info_type *reg_info; 587417721Speter{ 587517721Speter int mcnt; 587617721Speter unsigned char *p1 = p; 587725839Speter 587817721Speter while (p1 < end) 587917721Speter { 588025839Speter /* Skip over opcodes that can match nothing, and break when we get 588117721Speter to one that can't. */ 588225839Speter 588317721Speter switch ((re_opcode_t) *p1) 588417721Speter { 588517721Speter /* It's a loop. */ 588617721Speter case on_failure_jump: 588717721Speter p1++; 588817721Speter EXTRACT_NUMBER_AND_INCR (mcnt, p1); 588917721Speter p1 += mcnt; 589017721Speter break; 589125839Speter 589225839Speter default: 589317721Speter if (!common_op_match_null_string_p (&p1, end, reg_info)) 589417721Speter return false; 589517721Speter } 589617721Speter } /* while p1 < end */ 589717721Speter 589817721Speter return true; 589917721Speter} /* alt_match_null_string_p */ 590017721Speter 590117721Speter 590217721Speter/* Deals with the ops common to group_match_null_string_p and 590325839Speter alt_match_null_string_p. 590425839Speter 590517721Speter Sets P to one after the op and its arguments, if any. */ 590617721Speter 590717721Speterstatic boolean 590817721Spetercommon_op_match_null_string_p (p, end, reg_info) 590917721Speter unsigned char **p, *end; 591017721Speter register_info_type *reg_info; 591117721Speter{ 591217721Speter int mcnt; 591317721Speter boolean ret; 591417721Speter int reg_no; 591517721Speter unsigned char *p1 = *p; 591617721Speter 591717721Speter switch ((re_opcode_t) *p1++) 591817721Speter { 591917721Speter case no_op: 592017721Speter case begline: 592117721Speter case endline: 592217721Speter case begbuf: 592317721Speter case endbuf: 592417721Speter case wordbeg: 592517721Speter case wordend: 592617721Speter case wordbound: 592717721Speter case notwordbound: 592817721Speter#ifdef emacs 592917721Speter case before_dot: 593017721Speter case at_dot: 593117721Speter case after_dot: 593217721Speter#endif 593317721Speter break; 593417721Speter 593517721Speter case start_memory: 593617721Speter reg_no = *p1; 593717721Speter assert (reg_no > 0 && reg_no <= MAX_REGNUM); 593817721Speter ret = group_match_null_string_p (&p1, end, reg_info); 593925839Speter 594017721Speter /* Have to set this here in case we're checking a group which 594117721Speter contains a group and a back reference to it. */ 594217721Speter 594317721Speter if (REG_MATCH_NULL_STRING_P (reg_info[reg_no]) == MATCH_NULL_UNSET_VALUE) 594417721Speter REG_MATCH_NULL_STRING_P (reg_info[reg_no]) = ret; 594517721Speter 594617721Speter if (!ret) 594717721Speter return false; 594817721Speter break; 594925839Speter 595017721Speter /* If this is an optimized succeed_n for zero times, make the jump. */ 595117721Speter case jump: 595217721Speter EXTRACT_NUMBER_AND_INCR (mcnt, p1); 595317721Speter if (mcnt >= 0) 595417721Speter p1 += mcnt; 595517721Speter else 595617721Speter return false; 595717721Speter break; 595817721Speter 595917721Speter case succeed_n: 596017721Speter /* Get to the number of times to succeed. */ 596125839Speter p1 += 2; 596217721Speter EXTRACT_NUMBER_AND_INCR (mcnt, p1); 596317721Speter 596417721Speter if (mcnt == 0) 596517721Speter { 596617721Speter p1 -= 4; 596717721Speter EXTRACT_NUMBER_AND_INCR (mcnt, p1); 596817721Speter p1 += mcnt; 596917721Speter } 597017721Speter else 597117721Speter return false; 597217721Speter break; 597317721Speter 597425839Speter case duplicate: 597517721Speter if (!REG_MATCH_NULL_STRING_P (reg_info[*p1])) 597617721Speter return false; 597717721Speter break; 597817721Speter 597917721Speter case set_number_at: 598017721Speter p1 += 4; 598117721Speter 598217721Speter default: 598317721Speter /* All other opcodes mean we cannot match the empty string. */ 598417721Speter return false; 598517721Speter } 598617721Speter 598717721Speter *p = p1; 598817721Speter return true; 598917721Speter} /* common_op_match_null_string_p */ 599017721Speter 599117721Speter 599217721Speter/* Return zero if TRANSLATE[S1] and TRANSLATE[S2] are identical for LEN 599317721Speter bytes; nonzero otherwise. */ 599425839Speter 599517721Speterstatic int 599617721Speterbcmp_translate (s1, s2, len, translate) 599717721Speter unsigned char *s1, *s2; 599817721Speter register int len; 599966525Speter RE_TRANSLATE_TYPE translate; 600017721Speter{ 600117721Speter register unsigned char *p1 = s1, *p2 = s2; 600266525Speter unsigned char *p1_end = s1 + len; 600366525Speter unsigned char *p2_end = s2 + len; 600466525Speter 600566525Speter while (p1 != p1_end && p2 != p2_end) 600617721Speter { 600766525Speter int p1_charlen, p2_charlen; 600866525Speter int p1_ch, p2_ch; 600966525Speter 601066525Speter p1_ch = STRING_CHAR_AND_LENGTH (p1, p1_end - p1, p1_charlen); 601166525Speter p2_ch = STRING_CHAR_AND_LENGTH (p2, p2_end - p2, p2_charlen); 601266525Speter 601366525Speter if (RE_TRANSLATE (translate, p1_ch) 601466525Speter != RE_TRANSLATE (translate, p2_ch)) 601566525Speter return 1; 601666525Speter 601766525Speter p1 += p1_charlen, p2 += p2_charlen; 601817721Speter } 601966525Speter 602066525Speter if (p1 != p1_end || p2 != p2_end) 602166525Speter return 1; 602266525Speter 602317721Speter return 0; 602417721Speter} 602517721Speter 602617721Speter/* Entry points for GNU code. */ 602717721Speter 602817721Speter/* re_compile_pattern is the GNU regular expression compiler: it 602917721Speter compiles PATTERN (of length SIZE) and puts the result in BUFP. 603017721Speter Returns 0 if the pattern was valid, otherwise an error string. 603125839Speter 603217721Speter Assumes the `allocated' (and perhaps `buffer') and `translate' fields 603317721Speter are set in BUFP on entry. 603425839Speter 603517721Speter We call regex_compile to do the actual compilation. */ 603617721Speter 603717721Speterconst char * 603817721Speterre_compile_pattern (pattern, length, bufp) 603917721Speter const char *pattern; 604017721Speter int length; 604117721Speter struct re_pattern_buffer *bufp; 604217721Speter{ 604317721Speter reg_errcode_t ret; 604425839Speter 604517721Speter /* GNU code is written to assume at least RE_NREGS registers will be set 604617721Speter (and at least one extra will be -1). */ 604717721Speter bufp->regs_allocated = REGS_UNALLOCATED; 604825839Speter 604917721Speter /* And GNU code determines whether or not to get register information 605017721Speter by passing null for the REGS argument to re_match, etc., not by 605117721Speter setting no_sub. */ 605217721Speter bufp->no_sub = 0; 605325839Speter 605417721Speter /* Match anchors at newline. */ 605517721Speter bufp->newline_anchor = 1; 605625839Speter 605717721Speter ret = regex_compile (pattern, length, re_syntax_options, bufp); 605817721Speter 605966525Speter if (!ret) 606066525Speter return NULL; 606166525Speter return gettext (re_error_msgid[(int) ret]); 606266525Speter} 606317721Speter 606417721Speter/* Entry points compatible with 4.2 BSD regex library. We don't define 606566525Speter them unless specifically requested. */ 606617721Speter 606766525Speter#if defined (_REGEX_RE_COMP) || defined (_LIBC) 606817721Speter 606917721Speter/* BSD has one and only one pattern buffer. */ 607017721Speterstatic struct re_pattern_buffer re_comp_buf; 607117721Speter 607217721Speterchar * 607366525Speter#ifdef _LIBC 607466525Speter/* Make these definitions weak in libc, so POSIX programs can redefine 607566525Speter these names if they don't use our functions, and still use 607666525Speter regcomp/regexec below without link errors. */ 607766525Speterweak_function 607866525Speter#endif 607917721Speterre_comp (s) 608017721Speter const char *s; 608117721Speter{ 608217721Speter reg_errcode_t ret; 608325839Speter 608417721Speter if (!s) 608517721Speter { 608617721Speter if (!re_comp_buf.buffer) 6087175261Sobrien return (char *) gettext ("No previous regular expression"); 608817721Speter return 0; 608917721Speter } 609017721Speter 609117721Speter if (!re_comp_buf.buffer) 609217721Speter { 609317721Speter re_comp_buf.buffer = (unsigned char *) malloc (200); 609417721Speter if (re_comp_buf.buffer == NULL) 609566525Speter /* CVS: Yes, we're discarding `const' here if !HAVE_LIBINTL. */ 609666525Speter return (char *) gettext (re_error_msgid[(int) REG_ESPACE]); 609717721Speter re_comp_buf.allocated = 200; 609817721Speter 609917721Speter re_comp_buf.fastmap = (char *) malloc (1 << BYTEWIDTH); 610017721Speter if (re_comp_buf.fastmap == NULL) 610166525Speter /* CVS: Yes, we're discarding `const' here if !HAVE_LIBINTL. */ 610266525Speter return (char *) gettext (re_error_msgid[(int) REG_ESPACE]); 610317721Speter } 610417721Speter 610517721Speter /* Since `re_exec' always passes NULL for the `regs' argument, we 610617721Speter don't need to initialize the pattern buffer fields which affect it. */ 610717721Speter 610817721Speter /* Match anchors at newlines. */ 610917721Speter re_comp_buf.newline_anchor = 1; 611017721Speter 611117721Speter ret = regex_compile (s, strlen (s), re_syntax_options, &re_comp_buf); 611266525Speter 611366525Speter if (!ret) 611466525Speter return NULL; 611566525Speter 611666525Speter /* Yes, we're discarding `const' here if !HAVE_LIBINTL. */ 611766525Speter return (char *) gettext (re_error_msgid[(int) ret]); 611817721Speter} 611917721Speter 612017721Speter 612117721Speterint 612266525Speter#ifdef _LIBC 612366525Speterweak_function 612466525Speter#endif 612517721Speterre_exec (s) 612617721Speter const char *s; 612717721Speter{ 612817721Speter const int len = strlen (s); 612917721Speter return 613017721Speter 0 <= re_search (&re_comp_buf, s, len, 0, len, (struct re_registers *) 0); 613117721Speter} 613266525Speter#endif /* _REGEX_RE_COMP */ 613317721Speter 613417721Speter/* POSIX.2 functions. Don't define these for Emacs. */ 613517721Speter 613617721Speter#ifndef emacs 613717721Speter 613817721Speter/* regcomp takes a regular expression as a string and compiles it. 613917721Speter 614017721Speter PREG is a regex_t *. We do not expect any fields to be initialized, 614117721Speter since POSIX says we shouldn't. Thus, we set 614217721Speter 614317721Speter `buffer' to the compiled pattern; 614417721Speter `used' to the length of the compiled pattern; 614517721Speter `syntax' to RE_SYNTAX_POSIX_EXTENDED if the 614617721Speter REG_EXTENDED bit in CFLAGS is set; otherwise, to 614717721Speter RE_SYNTAX_POSIX_BASIC; 614817721Speter `newline_anchor' to REG_NEWLINE being set in CFLAGS; 614917721Speter `fastmap' and `fastmap_accurate' to zero; 615017721Speter `re_nsub' to the number of subexpressions in PATTERN. 615117721Speter 615217721Speter PATTERN is the address of the pattern string. 615317721Speter 615417721Speter CFLAGS is a series of bits which affect compilation. 615517721Speter 615617721Speter If REG_EXTENDED is set, we use POSIX extended syntax; otherwise, we 615717721Speter use POSIX basic syntax. 615817721Speter 615917721Speter If REG_NEWLINE is set, then . and [^...] don't match newline. 616017721Speter Also, regexec will try a match beginning after every newline. 616117721Speter 616217721Speter If REG_ICASE is set, then we considers upper- and lowercase 616317721Speter versions of letters to be equivalent when matching. 616417721Speter 616517721Speter If REG_NOSUB is set, then when PREG is passed to regexec, that 616617721Speter routine will report only success or failure, and nothing about the 616717721Speter registers. 616817721Speter 616917721Speter It returns 0 if it succeeds, nonzero if it doesn't. (See regex.h for 617017721Speter the return codes and their meanings.) */ 617117721Speter 617217721Speterint 617317721Speterregcomp (preg, pattern, cflags) 617417721Speter regex_t *preg; 617525839Speter const char *pattern; 617617721Speter int cflags; 617717721Speter{ 617817721Speter reg_errcode_t ret; 617917721Speter unsigned syntax 618017721Speter = (cflags & REG_EXTENDED) ? 618117721Speter RE_SYNTAX_POSIX_EXTENDED : RE_SYNTAX_POSIX_BASIC; 618217721Speter 618317721Speter /* regex_compile will allocate the space for the compiled pattern. */ 618417721Speter preg->buffer = 0; 618517721Speter preg->allocated = 0; 618666525Speter preg->used = 0; 618725839Speter 618817721Speter /* Don't bother to use a fastmap when searching. This simplifies the 618917721Speter REG_NEWLINE case: if we used a fastmap, we'd have to put all the 619017721Speter characters after newlines into the fastmap. This way, we just try 619117721Speter every character. */ 619217721Speter preg->fastmap = 0; 619325839Speter 619417721Speter if (cflags & REG_ICASE) 619517721Speter { 619617721Speter unsigned i; 619725839Speter 619866525Speter preg->translate 619966525Speter = (RE_TRANSLATE_TYPE) malloc (CHAR_SET_SIZE 620066525Speter * sizeof (*(RE_TRANSLATE_TYPE)0)); 620117721Speter if (preg->translate == NULL) 620217721Speter return (int) REG_ESPACE; 620317721Speter 620417721Speter /* Map uppercase characters to corresponding lowercase ones. */ 620517721Speter for (i = 0; i < CHAR_SET_SIZE; i++) 620617721Speter preg->translate[i] = ISUPPER (i) ? tolower (i) : i; 620717721Speter } 620817721Speter else 620917721Speter preg->translate = NULL; 621017721Speter 621117721Speter /* If REG_NEWLINE is set, newlines are treated differently. */ 621217721Speter if (cflags & REG_NEWLINE) 621317721Speter { /* REG_NEWLINE implies neither . nor [^...] match newline. */ 621417721Speter syntax &= ~RE_DOT_NEWLINE; 621517721Speter syntax |= RE_HAT_LISTS_NOT_NEWLINE; 621617721Speter /* It also changes the matching behavior. */ 621717721Speter preg->newline_anchor = 1; 621817721Speter } 621917721Speter else 622017721Speter preg->newline_anchor = 0; 622117721Speter 622217721Speter preg->no_sub = !!(cflags & REG_NOSUB); 622317721Speter 622425839Speter /* POSIX says a null character in the pattern terminates it, so we 622517721Speter can use strlen here in compiling the pattern. */ 622617721Speter ret = regex_compile (pattern, strlen (pattern), syntax, preg); 622725839Speter 622817721Speter /* POSIX doesn't distinguish between an unmatched open-group and an 622917721Speter unmatched close-group: both are REG_EPAREN. */ 623017721Speter if (ret == REG_ERPAREN) ret = REG_EPAREN; 623125839Speter 623217721Speter return (int) ret; 623317721Speter} 623417721Speter 623517721Speter 623617721Speter/* regexec searches for a given pattern, specified by PREG, in the 623717721Speter string STRING. 623825839Speter 623917721Speter If NMATCH is zero or REG_NOSUB was set in the cflags argument to 624017721Speter `regcomp', we ignore PMATCH. Otherwise, we assume PMATCH has at 624117721Speter least NMATCH elements, and we set them to the offsets of the 624217721Speter corresponding matched substrings. 624325839Speter 624417721Speter EFLAGS specifies `execution flags' which affect matching: if 624517721Speter REG_NOTBOL is set, then ^ does not match at the beginning of the 624617721Speter string; if REG_NOTEOL is set, then $ does not match at the end. 624725839Speter 624817721Speter We return 0 if we find a match and REG_NOMATCH if not. */ 624917721Speter 625017721Speterint 625117721Speterregexec (preg, string, nmatch, pmatch, eflags) 625217721Speter const regex_t *preg; 625325839Speter const char *string; 625425839Speter size_t nmatch; 625525839Speter regmatch_t pmatch[]; 625617721Speter int eflags; 625717721Speter{ 625817721Speter int ret; 625917721Speter struct re_registers regs; 626017721Speter regex_t private_preg; 626117721Speter int len = strlen (string); 626217721Speter boolean want_reg_info = !preg->no_sub && nmatch > 0; 626317721Speter 626417721Speter private_preg = *preg; 626525839Speter 626617721Speter private_preg.not_bol = !!(eflags & REG_NOTBOL); 626717721Speter private_preg.not_eol = !!(eflags & REG_NOTEOL); 626825839Speter 626917721Speter /* The user has told us exactly how many registers to return 627017721Speter information about, via `nmatch'. We have to pass that on to the 627117721Speter matching routines. */ 627217721Speter private_preg.regs_allocated = REGS_FIXED; 627325839Speter 627417721Speter if (want_reg_info) 627517721Speter { 627617721Speter regs.num_regs = nmatch; 627717721Speter regs.start = TALLOC (nmatch, regoff_t); 627817721Speter regs.end = TALLOC (nmatch, regoff_t); 627917721Speter if (regs.start == NULL || regs.end == NULL) 628017721Speter return (int) REG_NOMATCH; 628117721Speter } 628217721Speter 628317721Speter /* Perform the searching operation. */ 628417721Speter ret = re_search (&private_preg, string, len, 628517721Speter /* start: */ 0, /* range: */ len, 628617721Speter want_reg_info ? ®s : (struct re_registers *) 0); 628725839Speter 628817721Speter /* Copy the register information to the POSIX structure. */ 628917721Speter if (want_reg_info) 629017721Speter { 629117721Speter if (ret >= 0) 629217721Speter { 629317721Speter unsigned r; 629417721Speter 629517721Speter for (r = 0; r < nmatch; r++) 629617721Speter { 629717721Speter pmatch[r].rm_so = regs.start[r]; 629817721Speter pmatch[r].rm_eo = regs.end[r]; 629917721Speter } 630017721Speter } 630117721Speter 630217721Speter /* If we needed the temporary register info, free the space now. */ 630317721Speter free (regs.start); 630417721Speter free (regs.end); 630517721Speter } 630617721Speter 630717721Speter /* We want zero return to mean success, unlike `re_search'. */ 630817721Speter return ret >= 0 ? (int) REG_NOERROR : (int) REG_NOMATCH; 630917721Speter} 631017721Speter 631117721Speter 631217721Speter/* Returns a message corresponding to an error code, ERRCODE, returned 631317721Speter from either regcomp or regexec. We don't use PREG here. */ 631417721Speter 631517721Spetersize_t 631617721Speterregerror (errcode, preg, errbuf, errbuf_size) 631717721Speter int errcode; 631817721Speter const regex_t *preg; 631917721Speter char *errbuf; 632017721Speter size_t errbuf_size; 632117721Speter{ 632217721Speter const char *msg; 632317721Speter size_t msg_size; 632417721Speter 632517721Speter if (errcode < 0 632666525Speter || errcode >= (sizeof (re_error_msgid) / sizeof (re_error_msgid[0]))) 632725839Speter /* Only error codes returned by the rest of the code should be passed 632817721Speter to this routine. If we are given anything else, or if other regex 632917721Speter code generates an invalid error code, then the program has a bug. 633017721Speter Dump core so we can fix it. */ 633117721Speter abort (); 633217721Speter 633366525Speter msg = gettext (re_error_msgid[errcode]); 633417721Speter 633517721Speter msg_size = strlen (msg) + 1; /* Includes the null. */ 633625839Speter 633717721Speter if (errbuf_size != 0) 633817721Speter { 633917721Speter if (msg_size > errbuf_size) 634017721Speter { 634117721Speter strncpy (errbuf, msg, errbuf_size - 1); 634217721Speter errbuf[errbuf_size - 1] = 0; 634317721Speter } 634417721Speter else 634517721Speter strcpy (errbuf, msg); 634617721Speter } 634717721Speter 634817721Speter return msg_size; 634917721Speter} 635017721Speter 635117721Speter 635217721Speter/* Free dynamically allocated space used by PREG. */ 635317721Speter 635417721Spetervoid 635517721Speterregfree (preg) 635617721Speter regex_t *preg; 635717721Speter{ 635817721Speter if (preg->buffer != NULL) 635917721Speter free (preg->buffer); 636017721Speter preg->buffer = NULL; 636125839Speter 636217721Speter preg->allocated = 0; 636317721Speter preg->used = 0; 636417721Speter 636517721Speter if (preg->fastmap != NULL) 636617721Speter free (preg->fastmap); 636717721Speter preg->fastmap = NULL; 636817721Speter preg->fastmap_accurate = 0; 636917721Speter 637017721Speter if (preg->translate != NULL) 637117721Speter free (preg->translate); 637217721Speter preg->translate = NULL; 637317721Speter} 637417721Speter 637517721Speter#endif /* not emacs */ 6376