11573Srgrimes/* Definitions for data structures and routines for the regular 21573Srgrimes expression library, version 0.12. 31573Srgrimes Copyright (C) 1985,1989-1993,1995-1998, 2000 Free Software Foundation, Inc. 41573Srgrimes This file is part of the GNU C Library. Its master source is NOT part of 51573Srgrimes the C library, however. The master source lives in /gd/gnu/lib. 61573Srgrimes 71573Srgrimes The GNU C Library is free software; you can redistribute it and/or 81573Srgrimes modify it under the terms of the GNU Lesser General Public 91573Srgrimes License as published by the Free Software Foundation; either 101573Srgrimes version 2.1 of the License, or (at your option) any later version. 111573Srgrimes 121573Srgrimes The GNU C Library is distributed in the hope that it will be useful, 131573Srgrimes but WITHOUT ANY WARRANTY; without even the implied warranty of 141573Srgrimes MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 151573Srgrimes Lesser General Public License for more details. 161573Srgrimes 171573Srgrimes You should have received a copy of the GNU Lesser General Public 181573Srgrimes License along with the GNU C Library; if not, write to the Free 191573Srgrimes Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 201573Srgrimes 02111-1307 USA. */ 211573Srgrimes 221573Srgrimes#ifndef _REGEX_H 231573Srgrimes#define _REGEX_H 1 241573Srgrimes 251573Srgrimes/* Allow the use in C++ code. */ 261573Srgrimes#ifdef __cplusplus 271573Srgrimesextern "C" { 281573Srgrimes#endif 291573Srgrimes 301573Srgrimes/* POSIX says that <sys/types.h> must be included (by the caller) before 311573Srgrimes <regex.h>. */ 321573Srgrimes 3392889Sobrien#if !defined _POSIX_C_SOURCE && !defined _POSIX_SOURCE && defined VMS 3492889Sobrien/* VMS doesn't have `size_t' in <sys/types.h>, even though POSIX says it 351573Srgrimes should be there. */ 36157779Sume# include <stddef.h> 371573Srgrimes#endif 381573Srgrimes 39157779Sume/* The following two types have to be signed and unsigned integer type 40149313Sstefanf wide enough to hold a value of a pointer. For most ANSI compilers 411573Srgrimes ptrdiff_t and size_t should be likely OK. Still size of these two 42158115Sume types is 2 for Microsoft C. Ugh... */ 431573Srgrimestypedef long int s_reg_t; 441573Srgrimestypedef unsigned long int active_reg_t; 451573Srgrimes 46145279Sume/* The following bits are used to determine the regexp syntax we 47145279Sume recognize. The set/not-set meanings are chosen so that Emacs syntax 48145279Sume remains the value 0. The bits are given in alphabetical order, and 49145279Sume the definitions shifted by one from the previous bit; thus, when we 50158115Sume add or remove a bit, only one other definition need change. */ 51158115Sumetypedef unsigned long int reg_syntax_t; 52158115Sume 53158115Sume/* If this bit is not set, then \ inside a bracket expression is literal. 541573Srgrimes If set, then such a \ quotes the following character. */ 55158115Sume#define RE_BACKSLASH_ESCAPE_IN_LISTS ((unsigned long int) 1) 56158115Sume 57158115Sume/* If this bit is not set, then + and ? are operators, and \+ and \? are 58158115Sume literals. 59158115Sume If set, then \+ and \? are operators and + and ? are literals. */ 60157779Sume#define RE_BK_PLUS_QM (RE_BACKSLASH_ESCAPE_IN_LISTS << 1) 61157779Sume 621573Srgrimes/* If this bit is set, then character classes are supported. They are: 63145279Sume [:alpha:], [:upper:], [:lower:], [:digit:], [:alnum:], [:xdigit:], 64145279Sume [:space:], [:print:], [:punct:], [:graph:], and [:cntrl:]. 65145279Sume If not set, then character classes are not supported. */ 66145279Sume#define RE_CHAR_CLASSES (RE_BK_PLUS_QM << 1) 67145279Sume 68145279Sume/* If this bit is set, then ^ and $ are always anchors (outside bracket 69145279Sume expressions, of course). 70145279Sume If this bit is not set, then it depends: 711573Srgrimes ^ is an anchor if it is at the beginning of a regular 72145279Sume expression or after an open-group or an alternation operator; 73157779Sume $ is an anchor if it is at the end of a regular expression, or 74145279Sume before a close-group or an alternation operator. 75157779Sume 76145279Sume This bit could be (re)combined with RE_CONTEXT_INDEP_OPS, because 77157779Sume POSIX draft 11.2 says that * etc. in leading positions is undefined. 78157779Sume We already implemented a previous draft which made those constructs 79145279Sume invalid, though, so we haven't changed the code back. */ 80145279Sume#define RE_CONTEXT_INDEP_ANCHORS (RE_CHAR_CLASSES << 1) 81145279Sume 82157779Sume/* If this bit is set, then special characters are always special 83145279Sume regardless of where they are in the pattern. 84157779Sume If this bit is not set, then special characters are special only in 85145279Sume some contexts; otherwise they are ordinary. Specifically, 86145279Sume * + ? and intervals are only special when not after the beginning, 87158115Sume open-group, or alternation operator. */ 88157779Sume#define RE_CONTEXT_INDEP_OPS (RE_CONTEXT_INDEP_ANCHORS << 1) 89158115Sume 90158115Sume/* If this bit is set, then *, +, ?, and { cannot be first in an re or 91158115Sume immediately after an alternation or begin-group operator. */ 92158115Sume#define RE_CONTEXT_INVALID_OPS (RE_CONTEXT_INDEP_OPS << 1) 93158115Sume 94158115Sume/* If this bit is set, then . matches newline. 95158115Sume If not set, then it doesn't. */ 96158115Sume#define RE_DOT_NEWLINE (RE_CONTEXT_INVALID_OPS << 1) 97158115Sume 98158115Sume/* If this bit is set, then . doesn't match NUL. 99158115Sume If not set, then it does. */ 100158115Sume#define RE_DOT_NOT_NULL (RE_DOT_NEWLINE << 1) 101158115Sume 102158115Sume/* If this bit is set, nonmatching lists [^...] do not match newline. 103158115Sume If not set, they do. */ 104158115Sume#define RE_HAT_LISTS_NOT_NEWLINE (RE_DOT_NOT_NULL << 1) 105158115Sume 106158115Sume/* If this bit is set, either \{...\} or {...} defines an 107158115Sume interval, depending on RE_NO_BK_BRACES. 108158115Sume If not set, \{, \}, {, and } are literals. */ 109158115Sume#define RE_INTERVALS (RE_HAT_LISTS_NOT_NEWLINE << 1) 110158115Sume 111158115Sume/* If this bit is set, +, ? and | aren't recognized as operators. 112158115Sume If not set, they are. */ 113158115Sume#define RE_LIMITED_OPS (RE_INTERVALS << 1) 114158115Sume 115158115Sume/* If this bit is set, newline is an alternation operator. 116158115Sume If not set, newline is literal. */ 117158115Sume#define RE_NEWLINE_ALT (RE_LIMITED_OPS << 1) 118158115Sume 119158115Sume/* If this bit is set, then `{...}' defines an interval, and \{ and \} 120158115Sume are literals. 121158115Sume If not set, then `\{...\}' defines an interval. */ 122158115Sume#define RE_NO_BK_BRACES (RE_NEWLINE_ALT << 1) 123158115Sume 124158115Sume/* If this bit is set, (...) defines a group, and \( and \) are literals. 125158115Sume If not set, \(...\) defines a group, and ( and ) are literals. */ 126158115Sume#define RE_NO_BK_PARENS (RE_NO_BK_BRACES << 1) 127158115Sume 128158115Sume/* If this bit is set, then \<digit> matches <digit>. 129158115Sume If not set, then \<digit> is a back-reference. */ 130158115Sume#define RE_NO_BK_REFS (RE_NO_BK_PARENS << 1) 131158115Sume 132158115Sume/* If this bit is set, then | is an alternation operator, and \| is literal. 133158115Sume If not set, then \| is an alternation operator, and | is literal. */ 134158115Sume#define RE_NO_BK_VBAR (RE_NO_BK_REFS << 1) 135158115Sume 136158115Sume/* If this bit is set, then an ending range point collating higher 137158115Sume than the starting range point, as in [z-a], is invalid. 138158115Sume If not set, then when ending range point collates higher than the 139158115Sume starting range point, the range is ignored. */ 140158115Sume#define RE_NO_EMPTY_RANGES (RE_NO_BK_VBAR << 1) 141158115Sume 142158115Sume/* If this bit is set, then an unmatched ) is ordinary. 143158115Sume If not set, then an unmatched ) is invalid. */ 144158115Sume#define RE_UNMATCHED_RIGHT_PAREN_ORD (RE_NO_EMPTY_RANGES << 1) 145158115Sume 146158115Sume/* If this bit is set, succeed as soon as we match the whole pattern, 147158115Sume without further backtracking. */ 148158115Sume#define RE_NO_POSIX_BACKTRACKING (RE_UNMATCHED_RIGHT_PAREN_ORD << 1) 149158115Sume 150158115Sume/* If this bit is set, do not process the GNU regex operators. 151158115Sume If not set, then the GNU regex operators are recognized. */ 152158115Sume#define RE_NO_GNU_OPS (RE_NO_POSIX_BACKTRACKING << 1) 153158115Sume 154158115Sume/* If this bit is set, turn on internal regex debugging. 155158115Sume If not set, and debugging was on, turn it off. 156158115Sume This only works if regex.c is compiled -DDEBUG. 157158115Sume We define this bit always, so that all that's needed to turn on 158158115Sume debugging is to recompile regex.c; the calling code can always have 159158115Sume this bit set, and it won't affect anything in the normal case. */ 160158115Sume#define RE_DEBUG (RE_NO_GNU_OPS << 1) 161158115Sume 162158115Sume/* If this bit is set, a syntactically invalid interval is treated as 163158115Sume a string of ordinary characters. For example, the ERE 'a{1' is 164158115Sume treated as 'a\{1'. */ 165158115Sume#define RE_INVALID_INTERVAL_ORD (RE_DEBUG << 1) 166158115Sume 167158115Sume/* This global variable defines the particular regexp syntax to use (for 168158115Sume some interfaces). When a regexp is compiled, the syntax used is 169158115Sume stored in the pattern buffer, so changing this does not affect 170158115Sume already-compiled regexps. */ 171158115Sumeextern reg_syntax_t re_syntax_options; 172158115Sume 173158115Sume/* Define combinations of the above bits for the standard possibilities. 174158115Sume (The [[[ comments delimit what gets put into the Texinfo file, so 175158115Sume don't delete them!) */ 176158115Sume/* [[[begin syntaxes]]] */ 177158115Sume#define RE_SYNTAX_EMACS 0 178158115Sume 179158115Sume#define RE_SYNTAX_AWK \ 180158115Sume (RE_BACKSLASH_ESCAPE_IN_LISTS | RE_DOT_NOT_NULL \ 181158115Sume | RE_NO_BK_PARENS | RE_NO_BK_REFS \ 182158115Sume | RE_NO_BK_VBAR | RE_NO_EMPTY_RANGES \ 183158115Sume | RE_DOT_NEWLINE | RE_CONTEXT_INDEP_ANCHORS \ 184158115Sume | RE_UNMATCHED_RIGHT_PAREN_ORD | RE_NO_GNU_OPS) 185158115Sume 186158115Sume#define RE_SYNTAX_GNU_AWK \ 187158115Sume ((RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS | RE_DEBUG) \ 188158115Sume & ~(RE_DOT_NOT_NULL | RE_INTERVALS | RE_CONTEXT_INDEP_OPS)) 189158115Sume 190158115Sume#define RE_SYNTAX_POSIX_AWK \ 191158115Sume (RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS \ 192158115Sume | RE_INTERVALS | RE_NO_GNU_OPS) 193158115Sume 194158115Sume#define RE_SYNTAX_GREP \ 195158115Sume (RE_BK_PLUS_QM | RE_CHAR_CLASSES \ 196158115Sume | RE_HAT_LISTS_NOT_NEWLINE | RE_INTERVALS \ 197158115Sume | RE_NEWLINE_ALT) 198158115Sume 199158115Sume#define RE_SYNTAX_EGREP \ 200158115Sume (RE_CHAR_CLASSES | RE_CONTEXT_INDEP_ANCHORS \ 201158115Sume | RE_CONTEXT_INDEP_OPS | RE_HAT_LISTS_NOT_NEWLINE \ 202158115Sume | RE_NEWLINE_ALT | RE_NO_BK_PARENS \ 203158115Sume | RE_NO_BK_VBAR) 204158115Sume 205158115Sume#define RE_SYNTAX_POSIX_EGREP \ 206158115Sume (RE_SYNTAX_EGREP | RE_INTERVALS | RE_NO_BK_BRACES \ 207158115Sume | RE_INVALID_INTERVAL_ORD) 208158115Sume 209158115Sume/* P1003.2/D11.2, section 4.20.7.1, lines 5078ff. */ 210158115Sume#define RE_SYNTAX_ED RE_SYNTAX_POSIX_BASIC 211158115Sume 212158115Sume#define RE_SYNTAX_SED RE_SYNTAX_POSIX_BASIC 213158115Sume 214158115Sume/* Syntax bits common to both basic and extended POSIX regex syntax. */ 215158115Sume#define _RE_SYNTAX_POSIX_COMMON \ 216158115Sume (RE_CHAR_CLASSES | RE_DOT_NEWLINE | RE_DOT_NOT_NULL \ 217158115Sume | RE_INTERVALS | RE_NO_EMPTY_RANGES) 218158115Sume 219158115Sume#define RE_SYNTAX_POSIX_BASIC \ 220158115Sume (_RE_SYNTAX_POSIX_COMMON | RE_BK_PLUS_QM) 221158115Sume 222158115Sume/* Differs from ..._POSIX_BASIC only in that RE_BK_PLUS_QM becomes 223158115Sume RE_LIMITED_OPS, i.e., \? \+ \| are not recognized. Actually, this 224158115Sume isn't minimal, since other operators, such as \`, aren't disabled. */ 225158115Sume#define RE_SYNTAX_POSIX_MINIMAL_BASIC \ 226158115Sume (_RE_SYNTAX_POSIX_COMMON | RE_LIMITED_OPS) 227158115Sume 228158115Sume#define RE_SYNTAX_POSIX_EXTENDED \ 229158115Sume (_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS \ 230158115Sume | RE_CONTEXT_INDEP_OPS | RE_NO_BK_BRACES \ 231158115Sume | RE_NO_BK_PARENS | RE_NO_BK_VBAR \ 232158115Sume | RE_CONTEXT_INVALID_OPS | RE_UNMATCHED_RIGHT_PAREN_ORD) 233158115Sume 234158115Sume/* Differs from ..._POSIX_EXTENDED in that RE_CONTEXT_INDEP_OPS is 235158115Sume removed and RE_NO_BK_REFS is added. */ 236158115Sume#define RE_SYNTAX_POSIX_MINIMAL_EXTENDED \ 237158115Sume (_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS \ 238158115Sume | RE_CONTEXT_INVALID_OPS | RE_NO_BK_BRACES \ 239158115Sume | RE_NO_BK_PARENS | RE_NO_BK_REFS \ 240158115Sume | RE_NO_BK_VBAR | RE_UNMATCHED_RIGHT_PAREN_ORD) 241158115Sume/* [[[end syntaxes]]] */ 242158115Sume 243158115Sume/* Maximum number of duplicates an interval can allow. Some systems 244158115Sume (erroneously) define this in other header files, but we want our 245158115Sume value, so remove any previous define. */ 246158115Sume#ifdef RE_DUP_MAX 247158115Sume# undef RE_DUP_MAX 248158115Sume#endif 249158115Sume/* If sizeof(int) == 2, then ((1 << 15) - 1) overflows. */ 250158115Sume#define RE_DUP_MAX (0x7fff) 251158115Sume 252158115Sume 253158115Sume/* POSIX `cflags' bits (i.e., information for `regcomp'). */ 254158115Sume 255158115Sume/* If this bit is set, then use extended regular expression syntax. 256158115Sume If not set, then use basic regular expression syntax. */ 257158115Sume#define REG_EXTENDED 1 258158115Sume 259158115Sume/* If this bit is set, then ignore case when matching. 260158115Sume If not set, then case is significant. */ 261158115Sume#define REG_ICASE (REG_EXTENDED << 1) 262158115Sume 263158115Sume/* If this bit is set, then anchors do not match at newline 264158115Sume characters in the string. 265158115Sume If not set, then anchors do match at newlines. */ 266158115Sume#define REG_NEWLINE (REG_ICASE << 1) 267158115Sume 268158115Sume/* If this bit is set, then report only success or fail in regexec. 269158115Sume If not set, then returns differ between not matching and errors. */ 270158115Sume#define REG_NOSUB (REG_NEWLINE << 1) 271158115Sume 272158115Sume 273158115Sume/* POSIX `eflags' bits (i.e., information for regexec). */ 274158115Sume 275158115Sume/* If this bit is set, then the beginning-of-line operator doesn't match 276158115Sume the beginning of the string (presumably because it's not the 277158115Sume beginning of a line). 278158115Sume If not set, then the beginning-of-line operator does match the 279158115Sume beginning of the string. */ 280158115Sume#define REG_NOTBOL 1 281158115Sume 282158115Sume/* Like REG_NOTBOL, except for the end-of-line. */ 283158115Sume#define REG_NOTEOL (1 << 1) 284158115Sume 285158115Sume 286158115Sume/* If any error codes are removed, changed, or added, update the 287158115Sume `re_error_msg' table in regex.c. */ 288158115Sumetypedef enum 289158115Sume{ 290158115Sume#ifdef _XOPEN_SOURCE 291158115Sume REG_ENOSYS = -1, /* This will never happen for this implementation. */ 292158115Sume#endif 293158115Sume 294158115Sume REG_NOERROR = 0, /* Success. */ 295158115Sume REG_NOMATCH, /* Didn't find a match (for regexec). */ 296157779Sume 297157779Sume /* POSIX regcomp return error codes. (In the order listed in the 298145279Sume standard.) */ 299157779Sume REG_BADPAT, /* Invalid pattern. */ 300157779Sume REG_ECOLLATE, /* Not implemented. */ 301157779Sume REG_ECTYPE, /* Invalid character class name. */ 302145279Sume REG_EESCAPE, /* Trailing backslash. */ 303157779Sume REG_ESUBREG, /* Invalid back reference. */ 304157779Sume REG_EBRACK, /* Unmatched left bracket. */ 305157779Sume REG_EPAREN, /* Parenthesis imbalance. */ 306157779Sume REG_EBRACE, /* Unmatched \{. */ 307157779Sume REG_BADBR, /* Invalid contents of \{\}. */ 308157779Sume REG_ERANGE, /* Invalid range end. */ 309157779Sume REG_ESPACE, /* Ran out of memory. */ 310157779Sume REG_BADRPT, /* No preceding re for repetition op. */ 311157779Sume 312157779Sume /* Error codes we've added. */ 313157779Sume REG_EEND, /* Premature end. */ 314157779Sume REG_ESIZE, /* Compiled pattern bigger than 2^16 bytes. */ 315157779Sume REG_ERPAREN /* Unmatched ) or \); not returned from regcomp. */ 316157779Sume} reg_errcode_t; 317157779Sume 318157779Sume/* This data structure represents a compiled pattern. Before calling 319157779Sume the pattern compiler, the fields `buffer', `allocated', `fastmap', 320157779Sume `translate', and `no_sub' can be set. After the pattern has been 321157779Sume compiled, the `re_nsub' field is available. All other fields are 322157779Sume private to the regex routines. */ 323157779Sume 324157779Sume#ifndef RE_TRANSLATE_TYPE 325157779Sume# define RE_TRANSLATE_TYPE char * 326157779Sume#endif 327157779Sume 328157779Sumestruct re_pattern_buffer 329157779Sume{ 330157779Sume/* [[[begin pattern_buffer]]] */ 331157779Sume /* Space that holds the compiled pattern. It is declared as 332157779Sume `unsigned char *' because its elements are 333157779Sume sometimes used as array indexes. */ 334157779Sume unsigned char *buffer; 335157779Sume 336157779Sume /* Number of bytes to which `buffer' points. */ 337157779Sume unsigned long int allocated; 338157779Sume 339145279Sume /* Number of bytes actually used in `buffer'. */ 340145279Sume unsigned long int used; 3411573Srgrimes 342157779Sume /* Syntax setting with which the pattern was compiled. */ 3431573Srgrimes reg_syntax_t syntax; 344145279Sume 345254700Sjilles /* Pointer to a fastmap, if any, otherwise zero. re_search uses 3461573Srgrimes the fastmap, if there is one, to skip over impossible 347145279Sume starting points for matches. */ 348145279Sume char *fastmap; 3491573Srgrimes 3501573Srgrimes /* Either a translate table to apply to all characters before 3511573Srgrimes comparing them, or zero for no translation. The translation 352157779Sume is applied to a pattern when it is compiled and to a string 3531573Srgrimes when it is matched. */ 354145279Sume RE_TRANSLATE_TYPE translate; 355145279Sume 356145279Sume /* Number of subexpressions found by the compiler. */ 3571573Srgrimes size_t re_nsub; 358145279Sume 3591573Srgrimes /* Zero if this pattern cannot match the empty string, one else. 3601573Srgrimes Well, in truth it's used only in `re_search_2', to see 361145279Sume whether or not we should use the fastmap, so we don't set 362157779Sume this absolutely perfectly; see `re_compile_fastmap' (the 3631573Srgrimes `duplicate' case). */ 3641573Srgrimes unsigned can_be_null : 1; 365145279Sume 366145279Sume /* If REGS_UNALLOCATED, allocate space in the `regs' structure 3671573Srgrimes for `max (RE_NREGS, re_nsub + 1)' groups. 368254700Sjilles If REGS_REALLOCATE, reallocate space if necessary. 369145279Sume If REGS_FIXED, use what's there. */ 3701573Srgrimes#define REGS_UNALLOCATED 0 371145626Sume#define REGS_REALLOCATE 1 372145279Sume#define REGS_FIXED 2 3731573Srgrimes unsigned regs_allocated : 2; 3741573Srgrimes 3751573Srgrimes /* Set to zero when `regex_compile' compiles a pattern; set to one 376139612Ssobomax by `re_compile_fastmap' if it updates the fastmap. */ 377139612Ssobomax unsigned fastmap_accurate : 1; 378145279Sume 3791573Srgrimes /* If set, `re_match_2' does not return information about 3801573Srgrimes subexpressions. */ 3811573Srgrimes unsigned no_sub : 1; 3821573Srgrimes 3831573Srgrimes /* If set, a beginning-of-line anchor doesn't match at the 3841573Srgrimes beginning of the string. */ 3851573Srgrimes unsigned not_bol : 1; 3861573Srgrimes 3871573Srgrimes /* Similarly for an end-of-line anchor. */ 388145279Sume unsigned not_eol : 1; 389145279Sume 390145279Sume /* If true, an anchor at a newline matches. */ 391145279Sume unsigned newline_anchor : 1; 392145279Sume 3931573Srgrimes/* [[[end pattern_buffer]]] */ 3941573Srgrimes}; 3951573Srgrimes 3961573Srgrimestypedef struct re_pattern_buffer regex_t; 3971573Srgrimes 3981573Srgrimes/* Type for byte offsets within the string. POSIX mandates this. */ 3991573Srgrimestypedef int regoff_t; 400145626Sume 4011573Srgrimes 4021573Srgrimes/* This is the structure we store register match data in. See 4031573Srgrimes regex.texinfo for a full description of what registers match. */ 4041573Srgrimesstruct re_registers 4051573Srgrimes{ 4061573Srgrimes unsigned num_regs; 4071573Srgrimes regoff_t *start; 408145279Sume regoff_t *end; 4091573Srgrimes}; 410145279Sume 411158115Sume 412158115Sume/* If `regs_allocated' is REGS_UNALLOCATED in the pattern buffer, 413157779Sume `re_match_2' returns information about at least this many registers 414157779Sume the first time a `regs' structure is passed. */ 415157779Sume#ifndef RE_NREGS 416157779Sume# define RE_NREGS 30 417158115Sume#endif 418158115Sume 419158115Sume 420158115Sume/* POSIX specification for registers. Aside from the different names than 421158115Sume `re_registers', POSIX uses an array of structures, instead of a 422158115Sume structure of arrays. */ 423158115Sumetypedef struct 424158115Sume{ 425158115Sume regoff_t rm_so; /* Byte offset from string's start to substring's start. */ 426158115Sume regoff_t rm_eo; /* Byte offset from string's start to substring's end. */ 427211276Sume} regmatch_t; 428211276Sume 429211276Sume/* Declarations for routines. */ 430211276Sume 431157779Sume/* To avoid duplicating every routine declaration -- once with a 432158115Sume prototype (if we are ANSI), and once without (if we aren't) -- we 433158115Sume use the following macro to declare argument types. This 434158115Sume unfortunately clutters up the declarations a bit, but I think it's 435158115Sume worth it. */ 436158115Sume 437158115Sume#if __STDC__ 438158115Sume 439211276Sume# define _RE_ARGS(args) args 440158115Sume 441158115Sume#else /* not __STDC__ */ 442158115Sume 443158115Sume# define _RE_ARGS(args) () 444157779Sume 445157779Sume#endif /* not __STDC__ */ 446158115Sume 447158115Sume/* Sets the current default syntax to SYNTAX, and return the old syntax. 448145279Sume You can also simply assign to the `re_syntax_options' variable. */ 449157779Sumeextern reg_syntax_t re_set_syntax _RE_ARGS ((reg_syntax_t syntax)); 450158115Sume 451145279Sume/* Compile the regular expression PATTERN, with length LENGTH 452158115Sume and syntax given by the global `re_syntax_options', into the buffer 453157779Sume BUFFER. Return NULL if successful, and an error string if not. */ 454158115Sumeextern const char *re_compile_pattern 455158115Sume _RE_ARGS ((const char *pattern, size_t length, 456157779Sume struct re_pattern_buffer *buffer)); 457158115Sume 458145279Sume 459145279Sume/* Compile a fastmap for the compiled pattern in BUFFER; used to 460158115Sume accelerate searches. Return 0 if successful and -2 if was an 461158115Sume internal error. */ 462145279Sumeextern int re_compile_fastmap _RE_ARGS ((struct re_pattern_buffer *buffer)); 463157779Sume 464145279Sume 465157779Sume/* Search in the string STRING (with length LENGTH) for the pattern 466158115Sume compiled into BUFFER. Start searching at position START, for RANGE 467158115Sume characters. Return the starting position of the match, -1 for no 468157779Sume match, or -2 for an internal error. Also return register 469158115Sume information in REGS (if REGS and BUFFER->no_sub are nonzero). */ 470145279Sumeextern int re_search 471145279Sume _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string, 472158115Sume int length, int start, int range, struct re_registers *regs)); 473158115Sume 474158115Sume 475158115Sume/* Like `re_search', but search in the concatenation of STRING1 and 476158115Sume STRING2. Also, stop searching at index START + STOP. */ 477158115Sumeextern int re_search_2 478158115Sume _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string1, 479158115Sume int length1, const char *string2, int length2, 480158115Sume int start, int range, struct re_registers *regs, int stop)); 481158115Sume 482158115Sume 483158115Sume/* Like `re_search', but return how many characters in STRING the regexp 484158115Sume in BUFFER matched, starting at position START. */ 485158115Sumeextern int re_match 486158115Sume _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string, 487158115Sume int length, int start, struct re_registers *regs)); 488158115Sume 489158115Sume 490158115Sume/* Relates to `re_match' as `re_search_2' relates to `re_search'. */ 491158115Sumeextern int re_match_2 492158115Sume _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string1, 493158115Sume int length1, const char *string2, int length2, 494158115Sume int start, struct re_registers *regs, int stop)); 495211276Sume 496211276Sume 497213453Sume/* Set REGS to hold NUM_REGS registers, storing them in STARTS and 498211276Sume ENDS. Subsequent matches using BUFFER and REGS will use this memory 499211276Sume for recording register information. STARTS and ENDS must be 500158115Sume allocated with malloc, and must each be at least `NUM_REGS * sizeof 501158115Sume (regoff_t)' bytes long. 502158115Sume 503158115Sume If NUM_REGS == 0, then subsequent matches should allocate their own 504158115Sume register data. 505158115Sume 506158115Sume Unless this function is called, the first search or match using 507158115Sume PATTERN_BUFFER will allocate its own register data, without 508158115Sume freeing the old data. */ 509158115Sumeextern void re_set_registers 510158115Sume _RE_ARGS ((struct re_pattern_buffer *buffer, struct re_registers *regs, 511158115Sume unsigned num_regs, regoff_t *starts, regoff_t *ends)); 512158115Sume 513158115Sume#if defined _REGEX_RE_COMP || defined _LIBC 514158115Sume# ifndef _CRAY 515158115Sume/* 4.2 bsd compatibility. */ 516158115Sumeextern char *re_comp _RE_ARGS ((const char *)); 517158115Sumeextern int re_exec _RE_ARGS ((const char *)); 518158115Sume# endif 519158115Sume#endif 520158115Sume 521158115Sume/* GCC 2.95 and later have "__restrict"; C99 compilers have 522158115Sume "restrict", and "configure" may have defined "restrict". */ 523158115Sume#ifndef __restrict 524158115Sume# if ! (2 < __GNUC__ || (2 == __GNUC__ && 95 <= __GNUC_MINOR__)) 525158115Sume# if defined restrict || 199901L <= __STDC_VERSION__ 526158115Sume# define __restrict restrict 527158115Sume# else 528158115Sume# define __restrict 529158115Sume# endif 530158115Sume# endif 531158115Sume#endif 532158115Sume 533158115Sume/* GCC 3.1 and later support declaring arrays as non-overlapping 534158115Sume using the syntax array_name[restrict] */ 535158115Sume#ifndef __restrict_arr 536158115Sume# if ! (3 < __GNUC__ || (3 == __GNUC__ && 1 <= __GNUC_MINOR__)) || defined (__GNUG__) 537158115Sume# define __restrict_arr 538158115Sume# else 539158115Sume# define __restrict_arr __restrict 540158115Sume# endif 541158115Sume#endif 542158115Sume 543145279Sume/* POSIX compatibility. */ 544145279Sumeextern int regcomp _RE_ARGS ((regex_t *__restrict __preg, 545145279Sume const char *__restrict __pattern, 546145279Sume int __cflags)); 547157779Sume 548145279Sumeextern int regexec _RE_ARGS ((const regex_t *__restrict __preg, 549145279Sume const char *__restrict __string, size_t __nmatch, 550145279Sume regmatch_t __pmatch[__restrict_arr], 551157779Sume int __eflags)); 552145279Sume 553157779Sumeextern size_t regerror _RE_ARGS ((int __errcode, const regex_t *__preg, 554145279Sume char *__errbuf, size_t __errbuf_size)); 555 556extern void regfree _RE_ARGS ((regex_t *__preg)); 557 558 559#ifdef __cplusplus 560} 561#endif /* C++ */ 562 563#endif /* regex.h */ 564 565/* 566Local variables: 567make-backup-files: t 568version-control: t 569trim-versions-without-asking: nil 570End: 571*/ 572