189857Sobrien/* Definitions for data structures and routines for the regular 289857Sobrien expression library, version 0.12. 3218822Sdim 4218822Sdim Copyright (C) 1985, 1989, 1990, 1991, 1992, 1993, 1995, 1996, 1997, 5218822Sdim 1998, 2000, 2005 Free Software Foundation, Inc. 6218822Sdim 789857Sobrien This file is part of the GNU C Library. Its master source is NOT part of 889857Sobrien the C library, however. The master source lives in /gd/gnu/lib. 989857Sobrien 1089857Sobrien The GNU C Library is free software; you can redistribute it and/or 1189857Sobrien modify it under the terms of the GNU Lesser General Public 1289857Sobrien License as published by the Free Software Foundation; either 1389857Sobrien version 2.1 of the License, or (at your option) any later version. 1489857Sobrien 1589857Sobrien The GNU C Library is distributed in the hope that it will be useful, 1689857Sobrien but WITHOUT ANY WARRANTY; without even the implied warranty of 1789857Sobrien MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 1889857Sobrien Lesser General Public License for more details. 1989857Sobrien 2089857Sobrien You should have received a copy of the GNU Lesser General Public 2189857Sobrien License along with the GNU C Library; if not, write to the Free 22218822Sdim Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 23218822Sdim 02110-1301 USA. */ 2489857Sobrien 2589857Sobrien#ifndef _REGEX_H 2689857Sobrien#define _REGEX_H 1 2789857Sobrien 2889857Sobrien/* Allow the use in C++ code. */ 2989857Sobrien#ifdef __cplusplus 3089857Sobrienextern "C" { 3189857Sobrien#endif 3289857Sobrien 3389857Sobrien/* POSIX says that <sys/types.h> must be included (by the caller) before 3489857Sobrien <regex.h>. */ 3589857Sobrien 3689857Sobrien#if !defined _POSIX_C_SOURCE && !defined _POSIX_SOURCE && defined VMS 3789857Sobrien/* VMS doesn't have `size_t' in <sys/types.h>, even though POSIX says it 3889857Sobrien should be there. */ 3989857Sobrien# include <stddef.h> 4089857Sobrien#endif 4189857Sobrien 4289857Sobrien/* The following two types have to be signed and unsigned integer type 4389857Sobrien wide enough to hold a value of a pointer. For most ANSI compilers 4489857Sobrien ptrdiff_t and size_t should be likely OK. Still size of these two 4589857Sobrien types is 2 for Microsoft C. Ugh... */ 4689857Sobrientypedef long int s_reg_t; 4789857Sobrientypedef unsigned long int active_reg_t; 4889857Sobrien 4989857Sobrien/* The following bits are used to determine the regexp syntax we 5089857Sobrien recognize. The set/not-set meanings are chosen so that Emacs syntax 5189857Sobrien remains the value 0. The bits are given in alphabetical order, and 5289857Sobrien the definitions shifted by one from the previous bit; thus, when we 5389857Sobrien add or remove a bit, only one other definition need change. */ 5489857Sobrientypedef unsigned long int reg_syntax_t; 5589857Sobrien 5689857Sobrien/* If this bit is not set, then \ inside a bracket expression is literal. 5789857Sobrien If set, then such a \ quotes the following character. */ 5889857Sobrien#define RE_BACKSLASH_ESCAPE_IN_LISTS ((unsigned long int) 1) 5989857Sobrien 6089857Sobrien/* If this bit is not set, then + and ? are operators, and \+ and \? are 6189857Sobrien literals. 6289857Sobrien If set, then \+ and \? are operators and + and ? are literals. */ 6389857Sobrien#define RE_BK_PLUS_QM (RE_BACKSLASH_ESCAPE_IN_LISTS << 1) 6489857Sobrien 6589857Sobrien/* If this bit is set, then character classes are supported. They are: 6689857Sobrien [:alpha:], [:upper:], [:lower:], [:digit:], [:alnum:], [:xdigit:], 6789857Sobrien [:space:], [:print:], [:punct:], [:graph:], and [:cntrl:]. 6889857Sobrien If not set, then character classes are not supported. */ 6989857Sobrien#define RE_CHAR_CLASSES (RE_BK_PLUS_QM << 1) 7089857Sobrien 7189857Sobrien/* If this bit is set, then ^ and $ are always anchors (outside bracket 7289857Sobrien expressions, of course). 7389857Sobrien If this bit is not set, then it depends: 7489857Sobrien ^ is an anchor if it is at the beginning of a regular 7589857Sobrien expression or after an open-group or an alternation operator; 7689857Sobrien $ is an anchor if it is at the end of a regular expression, or 7789857Sobrien before a close-group or an alternation operator. 7889857Sobrien 7989857Sobrien This bit could be (re)combined with RE_CONTEXT_INDEP_OPS, because 8089857Sobrien POSIX draft 11.2 says that * etc. in leading positions is undefined. 8189857Sobrien We already implemented a previous draft which made those constructs 8289857Sobrien invalid, though, so we haven't changed the code back. */ 8389857Sobrien#define RE_CONTEXT_INDEP_ANCHORS (RE_CHAR_CLASSES << 1) 8489857Sobrien 8589857Sobrien/* If this bit is set, then special characters are always special 8689857Sobrien regardless of where they are in the pattern. 8789857Sobrien If this bit is not set, then special characters are special only in 8889857Sobrien some contexts; otherwise they are ordinary. Specifically, 8989857Sobrien * + ? and intervals are only special when not after the beginning, 9089857Sobrien open-group, or alternation operator. */ 9189857Sobrien#define RE_CONTEXT_INDEP_OPS (RE_CONTEXT_INDEP_ANCHORS << 1) 9289857Sobrien 9389857Sobrien/* If this bit is set, then *, +, ?, and { cannot be first in an re or 9489857Sobrien immediately after an alternation or begin-group operator. */ 9589857Sobrien#define RE_CONTEXT_INVALID_OPS (RE_CONTEXT_INDEP_OPS << 1) 9689857Sobrien 9789857Sobrien/* If this bit is set, then . matches newline. 9889857Sobrien If not set, then it doesn't. */ 9989857Sobrien#define RE_DOT_NEWLINE (RE_CONTEXT_INVALID_OPS << 1) 10089857Sobrien 10189857Sobrien/* If this bit is set, then . doesn't match NUL. 10289857Sobrien If not set, then it does. */ 10389857Sobrien#define RE_DOT_NOT_NULL (RE_DOT_NEWLINE << 1) 10489857Sobrien 10589857Sobrien/* If this bit is set, nonmatching lists [^...] do not match newline. 10689857Sobrien If not set, they do. */ 10789857Sobrien#define RE_HAT_LISTS_NOT_NEWLINE (RE_DOT_NOT_NULL << 1) 10889857Sobrien 10989857Sobrien/* If this bit is set, either \{...\} or {...} defines an 11089857Sobrien interval, depending on RE_NO_BK_BRACES. 11189857Sobrien If not set, \{, \}, {, and } are literals. */ 11289857Sobrien#define RE_INTERVALS (RE_HAT_LISTS_NOT_NEWLINE << 1) 11389857Sobrien 11489857Sobrien/* If this bit is set, +, ? and | aren't recognized as operators. 11589857Sobrien If not set, they are. */ 11689857Sobrien#define RE_LIMITED_OPS (RE_INTERVALS << 1) 11789857Sobrien 11889857Sobrien/* If this bit is set, newline is an alternation operator. 11989857Sobrien If not set, newline is literal. */ 12089857Sobrien#define RE_NEWLINE_ALT (RE_LIMITED_OPS << 1) 12189857Sobrien 12289857Sobrien/* If this bit is set, then `{...}' defines an interval, and \{ and \} 12389857Sobrien are literals. 12489857Sobrien If not set, then `\{...\}' defines an interval. */ 12589857Sobrien#define RE_NO_BK_BRACES (RE_NEWLINE_ALT << 1) 12689857Sobrien 12789857Sobrien/* If this bit is set, (...) defines a group, and \( and \) are literals. 12889857Sobrien If not set, \(...\) defines a group, and ( and ) are literals. */ 12989857Sobrien#define RE_NO_BK_PARENS (RE_NO_BK_BRACES << 1) 13089857Sobrien 13189857Sobrien/* If this bit is set, then \<digit> matches <digit>. 13289857Sobrien If not set, then \<digit> is a back-reference. */ 13389857Sobrien#define RE_NO_BK_REFS (RE_NO_BK_PARENS << 1) 13489857Sobrien 13589857Sobrien/* If this bit is set, then | is an alternation operator, and \| is literal. 13689857Sobrien If not set, then \| is an alternation operator, and | is literal. */ 13789857Sobrien#define RE_NO_BK_VBAR (RE_NO_BK_REFS << 1) 13889857Sobrien 13989857Sobrien/* If this bit is set, then an ending range point collating higher 14089857Sobrien than the starting range point, as in [z-a], is invalid. 14189857Sobrien If not set, then when ending range point collates higher than the 14289857Sobrien starting range point, the range is ignored. */ 14389857Sobrien#define RE_NO_EMPTY_RANGES (RE_NO_BK_VBAR << 1) 14489857Sobrien 14589857Sobrien/* If this bit is set, then an unmatched ) is ordinary. 14689857Sobrien If not set, then an unmatched ) is invalid. */ 14789857Sobrien#define RE_UNMATCHED_RIGHT_PAREN_ORD (RE_NO_EMPTY_RANGES << 1) 14889857Sobrien 14989857Sobrien/* If this bit is set, succeed as soon as we match the whole pattern, 15089857Sobrien without further backtracking. */ 15189857Sobrien#define RE_NO_POSIX_BACKTRACKING (RE_UNMATCHED_RIGHT_PAREN_ORD << 1) 15289857Sobrien 15389857Sobrien/* If this bit is set, do not process the GNU regex operators. 15489857Sobrien If not set, then the GNU regex operators are recognized. */ 15589857Sobrien#define RE_NO_GNU_OPS (RE_NO_POSIX_BACKTRACKING << 1) 15689857Sobrien 15789857Sobrien/* If this bit is set, turn on internal regex debugging. 15889857Sobrien If not set, and debugging was on, turn it off. 15989857Sobrien This only works if regex.c is compiled -DDEBUG. 16089857Sobrien We define this bit always, so that all that's needed to turn on 16189857Sobrien debugging is to recompile regex.c; the calling code can always have 16289857Sobrien this bit set, and it won't affect anything in the normal case. */ 16389857Sobrien#define RE_DEBUG (RE_NO_GNU_OPS << 1) 16489857Sobrien 16589857Sobrien/* If this bit is set, a syntactically invalid interval is treated as 16689857Sobrien a string of ordinary characters. For example, the ERE 'a{1' is 16789857Sobrien treated as 'a\{1'. */ 16889857Sobrien#define RE_INVALID_INTERVAL_ORD (RE_DEBUG << 1) 16989857Sobrien 17089857Sobrien/* This global variable defines the particular regexp syntax to use (for 17189857Sobrien some interfaces). When a regexp is compiled, the syntax used is 17289857Sobrien stored in the pattern buffer, so changing this does not affect 17389857Sobrien already-compiled regexps. */ 17489857Sobrienextern reg_syntax_t re_syntax_options; 17589857Sobrien 17689857Sobrien/* Define combinations of the above bits for the standard possibilities. 17789857Sobrien (The [[[ comments delimit what gets put into the Texinfo file, so 17889857Sobrien don't delete them!) */ 17989857Sobrien/* [[[begin syntaxes]]] */ 18089857Sobrien#define RE_SYNTAX_EMACS 0 18189857Sobrien 18289857Sobrien#define RE_SYNTAX_AWK \ 18389857Sobrien (RE_BACKSLASH_ESCAPE_IN_LISTS | RE_DOT_NOT_NULL \ 18489857Sobrien | RE_NO_BK_PARENS | RE_NO_BK_REFS \ 18589857Sobrien | RE_NO_BK_VBAR | RE_NO_EMPTY_RANGES \ 18689857Sobrien | RE_DOT_NEWLINE | RE_CONTEXT_INDEP_ANCHORS \ 18789857Sobrien | RE_UNMATCHED_RIGHT_PAREN_ORD | RE_NO_GNU_OPS) 18889857Sobrien 18989857Sobrien#define RE_SYNTAX_GNU_AWK \ 19089857Sobrien ((RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS | RE_DEBUG) \ 19189857Sobrien & ~(RE_DOT_NOT_NULL | RE_INTERVALS | RE_CONTEXT_INDEP_OPS)) 19289857Sobrien 19389857Sobrien#define RE_SYNTAX_POSIX_AWK \ 19489857Sobrien (RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS \ 19589857Sobrien | RE_INTERVALS | RE_NO_GNU_OPS) 19689857Sobrien 19789857Sobrien#define RE_SYNTAX_GREP \ 19889857Sobrien (RE_BK_PLUS_QM | RE_CHAR_CLASSES \ 19989857Sobrien | RE_HAT_LISTS_NOT_NEWLINE | RE_INTERVALS \ 20089857Sobrien | RE_NEWLINE_ALT) 20189857Sobrien 20289857Sobrien#define RE_SYNTAX_EGREP \ 20389857Sobrien (RE_CHAR_CLASSES | RE_CONTEXT_INDEP_ANCHORS \ 20489857Sobrien | RE_CONTEXT_INDEP_OPS | RE_HAT_LISTS_NOT_NEWLINE \ 20589857Sobrien | RE_NEWLINE_ALT | RE_NO_BK_PARENS \ 20689857Sobrien | RE_NO_BK_VBAR) 20789857Sobrien 20889857Sobrien#define RE_SYNTAX_POSIX_EGREP \ 20989857Sobrien (RE_SYNTAX_EGREP | RE_INTERVALS | RE_NO_BK_BRACES \ 21089857Sobrien | RE_INVALID_INTERVAL_ORD) 21189857Sobrien 21289857Sobrien/* P1003.2/D11.2, section 4.20.7.1, lines 5078ff. */ 21389857Sobrien#define RE_SYNTAX_ED RE_SYNTAX_POSIX_BASIC 21489857Sobrien 21589857Sobrien#define RE_SYNTAX_SED RE_SYNTAX_POSIX_BASIC 21689857Sobrien 21789857Sobrien/* Syntax bits common to both basic and extended POSIX regex syntax. */ 21889857Sobrien#define _RE_SYNTAX_POSIX_COMMON \ 21989857Sobrien (RE_CHAR_CLASSES | RE_DOT_NEWLINE | RE_DOT_NOT_NULL \ 22089857Sobrien | RE_INTERVALS | RE_NO_EMPTY_RANGES) 22189857Sobrien 22289857Sobrien#define RE_SYNTAX_POSIX_BASIC \ 22389857Sobrien (_RE_SYNTAX_POSIX_COMMON | RE_BK_PLUS_QM) 22489857Sobrien 22589857Sobrien/* Differs from ..._POSIX_BASIC only in that RE_BK_PLUS_QM becomes 22689857Sobrien RE_LIMITED_OPS, i.e., \? \+ \| are not recognized. Actually, this 22789857Sobrien isn't minimal, since other operators, such as \`, aren't disabled. */ 22889857Sobrien#define RE_SYNTAX_POSIX_MINIMAL_BASIC \ 22989857Sobrien (_RE_SYNTAX_POSIX_COMMON | RE_LIMITED_OPS) 23089857Sobrien 23189857Sobrien#define RE_SYNTAX_POSIX_EXTENDED \ 23289857Sobrien (_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS \ 23389857Sobrien | RE_CONTEXT_INDEP_OPS | RE_NO_BK_BRACES \ 23489857Sobrien | RE_NO_BK_PARENS | RE_NO_BK_VBAR \ 23589857Sobrien | RE_CONTEXT_INVALID_OPS | RE_UNMATCHED_RIGHT_PAREN_ORD) 23689857Sobrien 23789857Sobrien/* Differs from ..._POSIX_EXTENDED in that RE_CONTEXT_INDEP_OPS is 23889857Sobrien removed and RE_NO_BK_REFS is added. */ 23989857Sobrien#define RE_SYNTAX_POSIX_MINIMAL_EXTENDED \ 24089857Sobrien (_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS \ 24189857Sobrien | RE_CONTEXT_INVALID_OPS | RE_NO_BK_BRACES \ 24289857Sobrien | RE_NO_BK_PARENS | RE_NO_BK_REFS \ 24389857Sobrien | RE_NO_BK_VBAR | RE_UNMATCHED_RIGHT_PAREN_ORD) 24489857Sobrien/* [[[end syntaxes]]] */ 24589857Sobrien 24689857Sobrien/* Maximum number of duplicates an interval can allow. Some systems 24789857Sobrien (erroneously) define this in other header files, but we want our 24889857Sobrien value, so remove any previous define. */ 24989857Sobrien#ifdef RE_DUP_MAX 25089857Sobrien# undef RE_DUP_MAX 25189857Sobrien#endif 25289857Sobrien/* If sizeof(int) == 2, then ((1 << 15) - 1) overflows. */ 25389857Sobrien#define RE_DUP_MAX (0x7fff) 25489857Sobrien 25589857Sobrien 25689857Sobrien/* POSIX `cflags' bits (i.e., information for `regcomp'). */ 25789857Sobrien 25889857Sobrien/* If this bit is set, then use extended regular expression syntax. 25989857Sobrien If not set, then use basic regular expression syntax. */ 26089857Sobrien#define REG_EXTENDED 1 26189857Sobrien 26289857Sobrien/* If this bit is set, then ignore case when matching. 26389857Sobrien If not set, then case is significant. */ 26489857Sobrien#define REG_ICASE (REG_EXTENDED << 1) 26589857Sobrien 26689857Sobrien/* If this bit is set, then anchors do not match at newline 26789857Sobrien characters in the string. 26889857Sobrien If not set, then anchors do match at newlines. */ 26989857Sobrien#define REG_NEWLINE (REG_ICASE << 1) 27089857Sobrien 27189857Sobrien/* If this bit is set, then report only success or fail in regexec. 27289857Sobrien If not set, then returns differ between not matching and errors. */ 27389857Sobrien#define REG_NOSUB (REG_NEWLINE << 1) 27489857Sobrien 27589857Sobrien 27689857Sobrien/* POSIX `eflags' bits (i.e., information for regexec). */ 27789857Sobrien 27889857Sobrien/* If this bit is set, then the beginning-of-line operator doesn't match 27989857Sobrien the beginning of the string (presumably because it's not the 28089857Sobrien beginning of a line). 28189857Sobrien If not set, then the beginning-of-line operator does match the 28289857Sobrien beginning of the string. */ 28389857Sobrien#define REG_NOTBOL 1 28489857Sobrien 28589857Sobrien/* Like REG_NOTBOL, except for the end-of-line. */ 28689857Sobrien#define REG_NOTEOL (1 << 1) 28789857Sobrien 28889857Sobrien 28989857Sobrien/* If any error codes are removed, changed, or added, update the 29089857Sobrien `re_error_msg' table in regex.c. */ 29189857Sobrientypedef enum 29289857Sobrien{ 29389857Sobrien#ifdef _XOPEN_SOURCE 29489857Sobrien REG_ENOSYS = -1, /* This will never happen for this implementation. */ 29589857Sobrien#endif 29689857Sobrien 29789857Sobrien REG_NOERROR = 0, /* Success. */ 29889857Sobrien REG_NOMATCH, /* Didn't find a match (for regexec). */ 29989857Sobrien 30089857Sobrien /* POSIX regcomp return error codes. (In the order listed in the 30189857Sobrien standard.) */ 30289857Sobrien REG_BADPAT, /* Invalid pattern. */ 30389857Sobrien REG_ECOLLATE, /* Not implemented. */ 30489857Sobrien REG_ECTYPE, /* Invalid character class name. */ 30589857Sobrien REG_EESCAPE, /* Trailing backslash. */ 30689857Sobrien REG_ESUBREG, /* Invalid back reference. */ 30789857Sobrien REG_EBRACK, /* Unmatched left bracket. */ 30889857Sobrien REG_EPAREN, /* Parenthesis imbalance. */ 30989857Sobrien REG_EBRACE, /* Unmatched \{. */ 31089857Sobrien REG_BADBR, /* Invalid contents of \{\}. */ 31189857Sobrien REG_ERANGE, /* Invalid range end. */ 31289857Sobrien REG_ESPACE, /* Ran out of memory. */ 31389857Sobrien REG_BADRPT, /* No preceding re for repetition op. */ 31489857Sobrien 31589857Sobrien /* Error codes we've added. */ 31689857Sobrien REG_EEND, /* Premature end. */ 31789857Sobrien REG_ESIZE, /* Compiled pattern bigger than 2^16 bytes. */ 31889857Sobrien REG_ERPAREN /* Unmatched ) or \); not returned from regcomp. */ 31989857Sobrien} reg_errcode_t; 32089857Sobrien 32189857Sobrien/* This data structure represents a compiled pattern. Before calling 32289857Sobrien the pattern compiler, the fields `buffer', `allocated', `fastmap', 32389857Sobrien `translate', and `no_sub' can be set. After the pattern has been 32489857Sobrien compiled, the `re_nsub' field is available. All other fields are 32589857Sobrien private to the regex routines. */ 32689857Sobrien 32789857Sobrien#ifndef RE_TRANSLATE_TYPE 32889857Sobrien# define RE_TRANSLATE_TYPE char * 32989857Sobrien#endif 33089857Sobrien 33189857Sobrienstruct re_pattern_buffer 33289857Sobrien{ 33389857Sobrien/* [[[begin pattern_buffer]]] */ 33489857Sobrien /* Space that holds the compiled pattern. It is declared as 33589857Sobrien `unsigned char *' because its elements are 33689857Sobrien sometimes used as array indexes. */ 33789857Sobrien unsigned char *buffer; 33889857Sobrien 33989857Sobrien /* Number of bytes to which `buffer' points. */ 34089857Sobrien unsigned long int allocated; 34189857Sobrien 34289857Sobrien /* Number of bytes actually used in `buffer'. */ 34389857Sobrien unsigned long int used; 34489857Sobrien 34589857Sobrien /* Syntax setting with which the pattern was compiled. */ 34689857Sobrien reg_syntax_t syntax; 34789857Sobrien 34889857Sobrien /* Pointer to a fastmap, if any, otherwise zero. re_search uses 34989857Sobrien the fastmap, if there is one, to skip over impossible 35089857Sobrien starting points for matches. */ 35189857Sobrien char *fastmap; 35289857Sobrien 35389857Sobrien /* Either a translate table to apply to all characters before 35489857Sobrien comparing them, or zero for no translation. The translation 35589857Sobrien is applied to a pattern when it is compiled and to a string 35689857Sobrien when it is matched. */ 35789857Sobrien RE_TRANSLATE_TYPE translate; 35889857Sobrien 35989857Sobrien /* Number of subexpressions found by the compiler. */ 36089857Sobrien size_t re_nsub; 36189857Sobrien 36289857Sobrien /* Zero if this pattern cannot match the empty string, one else. 36389857Sobrien Well, in truth it's used only in `re_search_2', to see 36489857Sobrien whether or not we should use the fastmap, so we don't set 36589857Sobrien this absolutely perfectly; see `re_compile_fastmap' (the 36689857Sobrien `duplicate' case). */ 36789857Sobrien unsigned can_be_null : 1; 36889857Sobrien 36989857Sobrien /* If REGS_UNALLOCATED, allocate space in the `regs' structure 37089857Sobrien for `max (RE_NREGS, re_nsub + 1)' groups. 37189857Sobrien If REGS_REALLOCATE, reallocate space if necessary. 37289857Sobrien If REGS_FIXED, use what's there. */ 37389857Sobrien#define REGS_UNALLOCATED 0 37489857Sobrien#define REGS_REALLOCATE 1 37589857Sobrien#define REGS_FIXED 2 37689857Sobrien unsigned regs_allocated : 2; 37789857Sobrien 37889857Sobrien /* Set to zero when `regex_compile' compiles a pattern; set to one 37989857Sobrien by `re_compile_fastmap' if it updates the fastmap. */ 38089857Sobrien unsigned fastmap_accurate : 1; 38189857Sobrien 38289857Sobrien /* If set, `re_match_2' does not return information about 38389857Sobrien subexpressions. */ 38489857Sobrien unsigned no_sub : 1; 38589857Sobrien 38689857Sobrien /* If set, a beginning-of-line anchor doesn't match at the 38789857Sobrien beginning of the string. */ 38889857Sobrien unsigned not_bol : 1; 38989857Sobrien 39089857Sobrien /* Similarly for an end-of-line anchor. */ 39189857Sobrien unsigned not_eol : 1; 39289857Sobrien 39389857Sobrien /* If true, an anchor at a newline matches. */ 39489857Sobrien unsigned newline_anchor : 1; 39589857Sobrien 39689857Sobrien/* [[[end pattern_buffer]]] */ 39789857Sobrien}; 39889857Sobrien 39989857Sobrientypedef struct re_pattern_buffer regex_t; 40089857Sobrien 40189857Sobrien/* Type for byte offsets within the string. POSIX mandates this. */ 40289857Sobrientypedef int regoff_t; 40389857Sobrien 40489857Sobrien 40589857Sobrien/* This is the structure we store register match data in. See 40689857Sobrien regex.texinfo for a full description of what registers match. */ 40789857Sobrienstruct re_registers 40889857Sobrien{ 40989857Sobrien unsigned num_regs; 41089857Sobrien regoff_t *start; 41189857Sobrien regoff_t *end; 41289857Sobrien}; 41389857Sobrien 41489857Sobrien 41589857Sobrien/* If `regs_allocated' is REGS_UNALLOCATED in the pattern buffer, 41689857Sobrien `re_match_2' returns information about at least this many registers 41789857Sobrien the first time a `regs' structure is passed. */ 41889857Sobrien#ifndef RE_NREGS 41989857Sobrien# define RE_NREGS 30 42089857Sobrien#endif 42189857Sobrien 42289857Sobrien 42389857Sobrien/* POSIX specification for registers. Aside from the different names than 42489857Sobrien `re_registers', POSIX uses an array of structures, instead of a 42589857Sobrien structure of arrays. */ 42689857Sobrientypedef struct 42789857Sobrien{ 42889857Sobrien regoff_t rm_so; /* Byte offset from string's start to substring's start. */ 42989857Sobrien regoff_t rm_eo; /* Byte offset from string's start to substring's end. */ 43089857Sobrien} regmatch_t; 43189857Sobrien 43289857Sobrien/* Declarations for routines. */ 43389857Sobrien 43489857Sobrien/* To avoid duplicating every routine declaration -- once with a 43589857Sobrien prototype (if we are ANSI), and once without (if we aren't) -- we 43689857Sobrien use the following macro to declare argument types. This 43789857Sobrien unfortunately clutters up the declarations a bit, but I think it's 43889857Sobrien worth it. */ 43989857Sobrien 44089857Sobrien/* Sets the current default syntax to SYNTAX, and return the old syntax. 44189857Sobrien You can also simply assign to the `re_syntax_options' variable. */ 442218822Sdimextern reg_syntax_t re_set_syntax (reg_syntax_t syntax); 44389857Sobrien 44489857Sobrien/* Compile the regular expression PATTERN, with length LENGTH 44589857Sobrien and syntax given by the global `re_syntax_options', into the buffer 44689857Sobrien BUFFER. Return NULL if successful, and an error string if not. */ 447218822Sdimextern const char *re_compile_pattern (const char *pattern, size_t length, 448218822Sdim struct re_pattern_buffer *buffer); 44989857Sobrien 45089857Sobrien 45189857Sobrien/* Compile a fastmap for the compiled pattern in BUFFER; used to 45289857Sobrien accelerate searches. Return 0 if successful and -2 if was an 45389857Sobrien internal error. */ 454218822Sdimextern int re_compile_fastmap (struct re_pattern_buffer *buffer); 45589857Sobrien 45689857Sobrien 45789857Sobrien/* Search in the string STRING (with length LENGTH) for the pattern 45889857Sobrien compiled into BUFFER. Start searching at position START, for RANGE 45989857Sobrien characters. Return the starting position of the match, -1 for no 46089857Sobrien match, or -2 for an internal error. Also return register 46189857Sobrien information in REGS (if REGS and BUFFER->no_sub are nonzero). */ 462218822Sdimextern int re_search (struct re_pattern_buffer *buffer, const char *string, 463218822Sdim int length, int start, int range, 464218822Sdim struct re_registers *regs); 46589857Sobrien 46689857Sobrien 46789857Sobrien/* Like `re_search', but search in the concatenation of STRING1 and 46889857Sobrien STRING2. Also, stop searching at index START + STOP. */ 469218822Sdimextern int re_search_2 (struct re_pattern_buffer *buffer, const char *string1, 470218822Sdim int length1, const char *string2, int length2, 471218822Sdim int start, int range, struct re_registers *regs, 472218822Sdim int stop); 47389857Sobrien 47489857Sobrien 47589857Sobrien/* Like `re_search', but return how many characters in STRING the regexp 47689857Sobrien in BUFFER matched, starting at position START. */ 477218822Sdimextern int re_match (struct re_pattern_buffer *buffer, const char *string, 478218822Sdim int length, int start, struct re_registers *regs); 47989857Sobrien 48089857Sobrien 48189857Sobrien/* Relates to `re_match' as `re_search_2' relates to `re_search'. */ 482218822Sdimextern int re_match_2 (struct re_pattern_buffer *buffer, const char *string1, 483218822Sdim int length1, const char *string2, int length2, 484218822Sdim int start, struct re_registers *regs, int stop); 48589857Sobrien 48689857Sobrien 48789857Sobrien/* Set REGS to hold NUM_REGS registers, storing them in STARTS and 48889857Sobrien ENDS. Subsequent matches using BUFFER and REGS will use this memory 48989857Sobrien for recording register information. STARTS and ENDS must be 49089857Sobrien allocated with malloc, and must each be at least `NUM_REGS * sizeof 49189857Sobrien (regoff_t)' bytes long. 49289857Sobrien 49389857Sobrien If NUM_REGS == 0, then subsequent matches should allocate their own 49489857Sobrien register data. 49589857Sobrien 49689857Sobrien Unless this function is called, the first search or match using 49789857Sobrien PATTERN_BUFFER will allocate its own register data, without 49889857Sobrien freeing the old data. */ 499218822Sdimextern void re_set_registers (struct re_pattern_buffer *buffer, 500218822Sdim struct re_registers *regs, 501218822Sdim unsigned num_regs, regoff_t *starts, 502218822Sdim regoff_t *ends); 50389857Sobrien 50489857Sobrien#if defined _REGEX_RE_COMP || defined _LIBC 50589857Sobrien# ifndef _CRAY 50689857Sobrien/* 4.2 bsd compatibility. */ 507218822Sdimextern char *re_comp (const char *); 508218822Sdimextern int re_exec (const char *); 50989857Sobrien# endif 51089857Sobrien#endif 51189857Sobrien 51289857Sobrien/* GCC 2.95 and later have "__restrict"; C99 compilers have 51389857Sobrien "restrict", and "configure" may have defined "restrict". */ 51489857Sobrien#ifndef __restrict 51589857Sobrien# if ! (2 < __GNUC__ || (2 == __GNUC__ && 95 <= __GNUC_MINOR__)) 51689857Sobrien# if defined restrict || 199901L <= __STDC_VERSION__ 51789857Sobrien# define __restrict restrict 51889857Sobrien# else 51989857Sobrien# define __restrict 52089857Sobrien# endif 52189857Sobrien# endif 52289857Sobrien#endif 52389857Sobrien 524104834Sobrien/* GCC 3.1 and later support declaring arrays as non-overlapping 525104834Sobrien using the syntax array_name[restrict] */ 526104834Sobrien#ifndef __restrict_arr 527104834Sobrien# if ! (3 < __GNUC__ || (3 == __GNUC__ && 1 <= __GNUC_MINOR__)) || defined (__GNUG__) 528104834Sobrien# define __restrict_arr 529104834Sobrien# else 530104834Sobrien# define __restrict_arr __restrict 531104834Sobrien# endif 532104834Sobrien#endif 533104834Sobrien 53489857Sobrien/* POSIX compatibility. */ 535218822Sdimextern int regcomp (regex_t *__restrict __preg, 536218822Sdim const char *__restrict __pattern, 537218822Sdim int __cflags); 53889857Sobrien 539218822Sdim#if (__GNUC__) 540218822Sdim__extension__ 541218822Sdim#endif 542218822Sdimextern int regexec (const regex_t *__restrict __preg, 543218822Sdim const char *__restrict __string, size_t __nmatch, 544218822Sdim regmatch_t __pmatch[__restrict_arr], 545218822Sdim int __eflags); 54689857Sobrien 547218822Sdimextern size_t regerror (int __errcode, const regex_t *__preg, 548218822Sdim char *__errbuf, size_t __errbuf_size); 54989857Sobrien 550218822Sdimextern void regfree (regex_t *__preg); 55189857Sobrien 55289857Sobrien 55389857Sobrien#ifdef __cplusplus 55489857Sobrien} 55589857Sobrien#endif /* C++ */ 55689857Sobrien 55789857Sobrien#endif /* regex.h */ 55889857Sobrien 55989857Sobrien/* 56089857SobrienLocal variables: 56189857Sobrienmake-backup-files: t 56289857Sobrienversion-control: t 56389857Sobrientrim-versions-without-asking: nil 56489857SobrienEnd: 56589857Sobrien*/ 566