1169695Skan/* Definitions for data structures and routines for the regular 2169695Skan expression library, version 0.12. 3169695Skan 4169695Skan Copyright (C) 1985, 1989, 1990, 1991, 1992, 1993, 1995, 1996, 1997, 5169695Skan 1998, 2000, 2005 Free Software Foundation, Inc. 6169695Skan 7169695Skan This file is part of the GNU C Library. Its master source is NOT part of 8169695Skan the C library, however. The master source lives in /gd/gnu/lib. 9169695Skan 10169695Skan The GNU C Library is free software; you can redistribute it and/or 11169695Skan modify it under the terms of the GNU Lesser General Public 12169695Skan License as published by the Free Software Foundation; either 13169695Skan version 2.1 of the License, or (at your option) any later version. 14169695Skan 15169695Skan The GNU C Library is distributed in the hope that it will be useful, 16169695Skan but WITHOUT ANY WARRANTY; without even the implied warranty of 17169695Skan MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 18169695Skan Lesser General Public License for more details. 19169695Skan 20169695Skan You should have received a copy of the GNU Lesser General Public 21169695Skan License along with the GNU C Library; if not, write to the Free 22169695Skan Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 23169695Skan 02110-1301 USA. */ 24169695Skan 25169695Skan#ifndef _REGEX_H 26169695Skan#define _REGEX_H 1 27169695Skan 28169695Skan/* Allow the use in C++ code. */ 29169695Skan#ifdef __cplusplus 30169695Skanextern "C" { 31169695Skan#endif 32169695Skan 33169695Skan/* POSIX says that <sys/types.h> must be included (by the caller) before 34169695Skan <regex.h>. */ 35169695Skan 36169695Skan#if !defined _POSIX_C_SOURCE && !defined _POSIX_SOURCE && defined VMS 37169695Skan/* VMS doesn't have `size_t' in <sys/types.h>, even though POSIX says it 38169695Skan should be there. */ 39169695Skan# include <stddef.h> 40169695Skan#endif 41169695Skan 42169695Skan/* The following two types have to be signed and unsigned integer type 43169695Skan wide enough to hold a value of a pointer. For most ANSI compilers 44169695Skan ptrdiff_t and size_t should be likely OK. Still size of these two 45169695Skan types is 2 for Microsoft C. Ugh... */ 46169695Skantypedef long int s_reg_t; 47169695Skantypedef unsigned long int active_reg_t; 48169695Skan 49169695Skan/* The following bits are used to determine the regexp syntax we 50169695Skan recognize. The set/not-set meanings are chosen so that Emacs syntax 51169695Skan remains the value 0. The bits are given in alphabetical order, and 52169695Skan the definitions shifted by one from the previous bit; thus, when we 53169695Skan add or remove a bit, only one other definition need change. */ 54169695Skantypedef unsigned long int reg_syntax_t; 55169695Skan 56169695Skan/* If this bit is not set, then \ inside a bracket expression is literal. 57169695Skan If set, then such a \ quotes the following character. */ 58169695Skan#define RE_BACKSLASH_ESCAPE_IN_LISTS ((unsigned long int) 1) 59169695Skan 60169695Skan/* If this bit is not set, then + and ? are operators, and \+ and \? are 61169695Skan literals. 62169695Skan If set, then \+ and \? are operators and + and ? are literals. */ 63169695Skan#define RE_BK_PLUS_QM (RE_BACKSLASH_ESCAPE_IN_LISTS << 1) 64169695Skan 65169695Skan/* If this bit is set, then character classes are supported. They are: 66169695Skan [:alpha:], [:upper:], [:lower:], [:digit:], [:alnum:], [:xdigit:], 67169695Skan [:space:], [:print:], [:punct:], [:graph:], and [:cntrl:]. 68169695Skan If not set, then character classes are not supported. */ 69169695Skan#define RE_CHAR_CLASSES (RE_BK_PLUS_QM << 1) 70169695Skan 71169695Skan/* If this bit is set, then ^ and $ are always anchors (outside bracket 72169695Skan expressions, of course). 73169695Skan If this bit is not set, then it depends: 74169695Skan ^ is an anchor if it is at the beginning of a regular 75169695Skan expression or after an open-group or an alternation operator; 76169695Skan $ is an anchor if it is at the end of a regular expression, or 77169695Skan before a close-group or an alternation operator. 78169695Skan 79169695Skan This bit could be (re)combined with RE_CONTEXT_INDEP_OPS, because 80169695Skan POSIX draft 11.2 says that * etc. in leading positions is undefined. 81169695Skan We already implemented a previous draft which made those constructs 82169695Skan invalid, though, so we haven't changed the code back. */ 83169695Skan#define RE_CONTEXT_INDEP_ANCHORS (RE_CHAR_CLASSES << 1) 84169695Skan 85169695Skan/* If this bit is set, then special characters are always special 86169695Skan regardless of where they are in the pattern. 87169695Skan If this bit is not set, then special characters are special only in 88169695Skan some contexts; otherwise they are ordinary. Specifically, 89169695Skan * + ? and intervals are only special when not after the beginning, 90169695Skan open-group, or alternation operator. */ 91169695Skan#define RE_CONTEXT_INDEP_OPS (RE_CONTEXT_INDEP_ANCHORS << 1) 92169695Skan 93169695Skan/* If this bit is set, then *, +, ?, and { cannot be first in an re or 94169695Skan immediately after an alternation or begin-group operator. */ 95169695Skan#define RE_CONTEXT_INVALID_OPS (RE_CONTEXT_INDEP_OPS << 1) 96169695Skan 97169695Skan/* If this bit is set, then . matches newline. 98169695Skan If not set, then it doesn't. */ 99169695Skan#define RE_DOT_NEWLINE (RE_CONTEXT_INVALID_OPS << 1) 100169695Skan 101169695Skan/* If this bit is set, then . doesn't match NUL. 102169695Skan If not set, then it does. */ 103169695Skan#define RE_DOT_NOT_NULL (RE_DOT_NEWLINE << 1) 104169695Skan 105169695Skan/* If this bit is set, nonmatching lists [^...] do not match newline. 106169695Skan If not set, they do. */ 107169695Skan#define RE_HAT_LISTS_NOT_NEWLINE (RE_DOT_NOT_NULL << 1) 108169695Skan 109169695Skan/* If this bit is set, either \{...\} or {...} defines an 110169695Skan interval, depending on RE_NO_BK_BRACES. 111169695Skan If not set, \{, \}, {, and } are literals. */ 112169695Skan#define RE_INTERVALS (RE_HAT_LISTS_NOT_NEWLINE << 1) 113169695Skan 114169695Skan/* If this bit is set, +, ? and | aren't recognized as operators. 115169695Skan If not set, they are. */ 116169695Skan#define RE_LIMITED_OPS (RE_INTERVALS << 1) 117169695Skan 118169695Skan/* If this bit is set, newline is an alternation operator. 119169695Skan If not set, newline is literal. */ 120169695Skan#define RE_NEWLINE_ALT (RE_LIMITED_OPS << 1) 121169695Skan 122169695Skan/* If this bit is set, then `{...}' defines an interval, and \{ and \} 123169695Skan are literals. 124169695Skan If not set, then `\{...\}' defines an interval. */ 125169695Skan#define RE_NO_BK_BRACES (RE_NEWLINE_ALT << 1) 126169695Skan 127169695Skan/* If this bit is set, (...) defines a group, and \( and \) are literals. 128169695Skan If not set, \(...\) defines a group, and ( and ) are literals. */ 129169695Skan#define RE_NO_BK_PARENS (RE_NO_BK_BRACES << 1) 130169695Skan 131169695Skan/* If this bit is set, then \<digit> matches <digit>. 132169695Skan If not set, then \<digit> is a back-reference. */ 133169695Skan#define RE_NO_BK_REFS (RE_NO_BK_PARENS << 1) 134169695Skan 135169695Skan/* If this bit is set, then | is an alternation operator, and \| is literal. 136169695Skan If not set, then \| is an alternation operator, and | is literal. */ 137169695Skan#define RE_NO_BK_VBAR (RE_NO_BK_REFS << 1) 138169695Skan 139169695Skan/* If this bit is set, then an ending range point collating higher 140169695Skan than the starting range point, as in [z-a], is invalid. 141169695Skan If not set, then when ending range point collates higher than the 142169695Skan starting range point, the range is ignored. */ 143169695Skan#define RE_NO_EMPTY_RANGES (RE_NO_BK_VBAR << 1) 144169695Skan 145169695Skan/* If this bit is set, then an unmatched ) is ordinary. 146169695Skan If not set, then an unmatched ) is invalid. */ 147169695Skan#define RE_UNMATCHED_RIGHT_PAREN_ORD (RE_NO_EMPTY_RANGES << 1) 148169695Skan 149169695Skan/* If this bit is set, succeed as soon as we match the whole pattern, 150169695Skan without further backtracking. */ 151169695Skan#define RE_NO_POSIX_BACKTRACKING (RE_UNMATCHED_RIGHT_PAREN_ORD << 1) 152169695Skan 153169695Skan/* If this bit is set, do not process the GNU regex operators. 154169695Skan If not set, then the GNU regex operators are recognized. */ 155169695Skan#define RE_NO_GNU_OPS (RE_NO_POSIX_BACKTRACKING << 1) 156169695Skan 157169695Skan/* If this bit is set, turn on internal regex debugging. 158169695Skan If not set, and debugging was on, turn it off. 159169695Skan This only works if regex.c is compiled -DDEBUG. 160169695Skan We define this bit always, so that all that's needed to turn on 161169695Skan debugging is to recompile regex.c; the calling code can always have 162169695Skan this bit set, and it won't affect anything in the normal case. */ 163169695Skan#define RE_DEBUG (RE_NO_GNU_OPS << 1) 164169695Skan 165169695Skan/* If this bit is set, a syntactically invalid interval is treated as 166169695Skan a string of ordinary characters. For example, the ERE 'a{1' is 167169695Skan treated as 'a\{1'. */ 168169695Skan#define RE_INVALID_INTERVAL_ORD (RE_DEBUG << 1) 169169695Skan 170169695Skan/* This global variable defines the particular regexp syntax to use (for 171169695Skan some interfaces). When a regexp is compiled, the syntax used is 172169695Skan stored in the pattern buffer, so changing this does not affect 173169695Skan already-compiled regexps. */ 174169695Skanextern reg_syntax_t re_syntax_options; 175169695Skan 176169695Skan/* Define combinations of the above bits for the standard possibilities. 177169695Skan (The [[[ comments delimit what gets put into the Texinfo file, so 178169695Skan don't delete them!) */ 179169695Skan/* [[[begin syntaxes]]] */ 180169695Skan#define RE_SYNTAX_EMACS 0 181169695Skan 182169695Skan#define RE_SYNTAX_AWK \ 183169695Skan (RE_BACKSLASH_ESCAPE_IN_LISTS | RE_DOT_NOT_NULL \ 184169695Skan | RE_NO_BK_PARENS | RE_NO_BK_REFS \ 185169695Skan | RE_NO_BK_VBAR | RE_NO_EMPTY_RANGES \ 186169695Skan | RE_DOT_NEWLINE | RE_CONTEXT_INDEP_ANCHORS \ 187169695Skan | RE_UNMATCHED_RIGHT_PAREN_ORD | RE_NO_GNU_OPS) 188169695Skan 189169695Skan#define RE_SYNTAX_GNU_AWK \ 190169695Skan ((RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS | RE_DEBUG) \ 191169695Skan & ~(RE_DOT_NOT_NULL | RE_INTERVALS | RE_CONTEXT_INDEP_OPS)) 192169695Skan 193169695Skan#define RE_SYNTAX_POSIX_AWK \ 194169695Skan (RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS \ 195169695Skan | RE_INTERVALS | RE_NO_GNU_OPS) 196169695Skan 197169695Skan#define RE_SYNTAX_GREP \ 198169695Skan (RE_BK_PLUS_QM | RE_CHAR_CLASSES \ 199169695Skan | RE_HAT_LISTS_NOT_NEWLINE | RE_INTERVALS \ 200169695Skan | RE_NEWLINE_ALT) 201169695Skan 202169695Skan#define RE_SYNTAX_EGREP \ 203169695Skan (RE_CHAR_CLASSES | RE_CONTEXT_INDEP_ANCHORS \ 204169695Skan | RE_CONTEXT_INDEP_OPS | RE_HAT_LISTS_NOT_NEWLINE \ 205169695Skan | RE_NEWLINE_ALT | RE_NO_BK_PARENS \ 206169695Skan | RE_NO_BK_VBAR) 207169695Skan 208169695Skan#define RE_SYNTAX_POSIX_EGREP \ 209169695Skan (RE_SYNTAX_EGREP | RE_INTERVALS | RE_NO_BK_BRACES \ 210169695Skan | RE_INVALID_INTERVAL_ORD) 211169695Skan 212169695Skan/* P1003.2/D11.2, section 4.20.7.1, lines 5078ff. */ 213169695Skan#define RE_SYNTAX_ED RE_SYNTAX_POSIX_BASIC 214169695Skan 215169695Skan#define RE_SYNTAX_SED RE_SYNTAX_POSIX_BASIC 216169695Skan 217169695Skan/* Syntax bits common to both basic and extended POSIX regex syntax. */ 218169695Skan#define _RE_SYNTAX_POSIX_COMMON \ 219169695Skan (RE_CHAR_CLASSES | RE_DOT_NEWLINE | RE_DOT_NOT_NULL \ 220169695Skan | RE_INTERVALS | RE_NO_EMPTY_RANGES) 221169695Skan 222169695Skan#define RE_SYNTAX_POSIX_BASIC \ 223169695Skan (_RE_SYNTAX_POSIX_COMMON | RE_BK_PLUS_QM) 224169695Skan 225169695Skan/* Differs from ..._POSIX_BASIC only in that RE_BK_PLUS_QM becomes 226169695Skan RE_LIMITED_OPS, i.e., \? \+ \| are not recognized. Actually, this 227169695Skan isn't minimal, since other operators, such as \`, aren't disabled. */ 228169695Skan#define RE_SYNTAX_POSIX_MINIMAL_BASIC \ 229169695Skan (_RE_SYNTAX_POSIX_COMMON | RE_LIMITED_OPS) 230169695Skan 231169695Skan#define RE_SYNTAX_POSIX_EXTENDED \ 232169695Skan (_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS \ 233169695Skan | RE_CONTEXT_INDEP_OPS | RE_NO_BK_BRACES \ 234169695Skan | RE_NO_BK_PARENS | RE_NO_BK_VBAR \ 235169695Skan | RE_CONTEXT_INVALID_OPS | RE_UNMATCHED_RIGHT_PAREN_ORD) 236169695Skan 237169695Skan/* Differs from ..._POSIX_EXTENDED in that RE_CONTEXT_INDEP_OPS is 238169695Skan removed and RE_NO_BK_REFS is added. */ 239169695Skan#define RE_SYNTAX_POSIX_MINIMAL_EXTENDED \ 240169695Skan (_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS \ 241169695Skan | RE_CONTEXT_INVALID_OPS | RE_NO_BK_BRACES \ 242169695Skan | RE_NO_BK_PARENS | RE_NO_BK_REFS \ 243169695Skan | RE_NO_BK_VBAR | RE_UNMATCHED_RIGHT_PAREN_ORD) 244169695Skan/* [[[end syntaxes]]] */ 245169695Skan 246169695Skan/* Maximum number of duplicates an interval can allow. Some systems 247169695Skan (erroneously) define this in other header files, but we want our 248169695Skan value, so remove any previous define. */ 249169695Skan#ifdef RE_DUP_MAX 250169695Skan# undef RE_DUP_MAX 251169695Skan#endif 252169695Skan/* If sizeof(int) == 2, then ((1 << 15) - 1) overflows. */ 253169695Skan#define RE_DUP_MAX (0x7fff) 254169695Skan 255169695Skan 256169695Skan/* POSIX `cflags' bits (i.e., information for `regcomp'). */ 257169695Skan 258169695Skan/* If this bit is set, then use extended regular expression syntax. 259169695Skan If not set, then use basic regular expression syntax. */ 260169695Skan#define REG_EXTENDED 1 261169695Skan 262169695Skan/* If this bit is set, then ignore case when matching. 263169695Skan If not set, then case is significant. */ 264169695Skan#define REG_ICASE (REG_EXTENDED << 1) 265169695Skan 266169695Skan/* If this bit is set, then anchors do not match at newline 267169695Skan characters in the string. 268169695Skan If not set, then anchors do match at newlines. */ 269169695Skan#define REG_NEWLINE (REG_ICASE << 1) 270169695Skan 271169695Skan/* If this bit is set, then report only success or fail in regexec. 272169695Skan If not set, then returns differ between not matching and errors. */ 273169695Skan#define REG_NOSUB (REG_NEWLINE << 1) 274169695Skan 275169695Skan 276169695Skan/* POSIX `eflags' bits (i.e., information for regexec). */ 277169695Skan 278169695Skan/* If this bit is set, then the beginning-of-line operator doesn't match 279169695Skan the beginning of the string (presumably because it's not the 280169695Skan beginning of a line). 281169695Skan If not set, then the beginning-of-line operator does match the 282169695Skan beginning of the string. */ 283169695Skan#define REG_NOTBOL 1 284169695Skan 285169695Skan/* Like REG_NOTBOL, except for the end-of-line. */ 286169695Skan#define REG_NOTEOL (1 << 1) 287169695Skan 288169695Skan 289169695Skan/* If any error codes are removed, changed, or added, update the 290169695Skan `re_error_msg' table in regex.c. */ 291169695Skantypedef enum 292169695Skan{ 293169695Skan#ifdef _XOPEN_SOURCE 294169695Skan REG_ENOSYS = -1, /* This will never happen for this implementation. */ 295169695Skan#endif 296169695Skan 297169695Skan REG_NOERROR = 0, /* Success. */ 298169695Skan REG_NOMATCH, /* Didn't find a match (for regexec). */ 299169695Skan 300169695Skan /* POSIX regcomp return error codes. (In the order listed in the 301169695Skan standard.) */ 302169695Skan REG_BADPAT, /* Invalid pattern. */ 303169695Skan REG_ECOLLATE, /* Not implemented. */ 304169695Skan REG_ECTYPE, /* Invalid character class name. */ 305169695Skan REG_EESCAPE, /* Trailing backslash. */ 306169695Skan REG_ESUBREG, /* Invalid back reference. */ 307169695Skan REG_EBRACK, /* Unmatched left bracket. */ 308169695Skan REG_EPAREN, /* Parenthesis imbalance. */ 309169695Skan REG_EBRACE, /* Unmatched \{. */ 310169695Skan REG_BADBR, /* Invalid contents of \{\}. */ 311169695Skan REG_ERANGE, /* Invalid range end. */ 312169695Skan REG_ESPACE, /* Ran out of memory. */ 313169695Skan REG_BADRPT, /* No preceding re for repetition op. */ 314169695Skan 315169695Skan /* Error codes we've added. */ 316169695Skan REG_EEND, /* Premature end. */ 317169695Skan REG_ESIZE, /* Compiled pattern bigger than 2^16 bytes. */ 318169695Skan REG_ERPAREN /* Unmatched ) or \); not returned from regcomp. */ 319169695Skan} reg_errcode_t; 320169695Skan 321169695Skan/* This data structure represents a compiled pattern. Before calling 322169695Skan the pattern compiler, the fields `buffer', `allocated', `fastmap', 323169695Skan `translate', and `no_sub' can be set. After the pattern has been 324169695Skan compiled, the `re_nsub' field is available. All other fields are 325169695Skan private to the regex routines. */ 326169695Skan 327169695Skan#ifndef RE_TRANSLATE_TYPE 328169695Skan# define RE_TRANSLATE_TYPE char * 329169695Skan#endif 330169695Skan 331169695Skanstruct re_pattern_buffer 332169695Skan{ 333169695Skan/* [[[begin pattern_buffer]]] */ 334169695Skan /* Space that holds the compiled pattern. It is declared as 335169695Skan `unsigned char *' because its elements are 336169695Skan sometimes used as array indexes. */ 337169695Skan unsigned char *buffer; 338169695Skan 339169695Skan /* Number of bytes to which `buffer' points. */ 340169695Skan unsigned long int allocated; 341169695Skan 342169695Skan /* Number of bytes actually used in `buffer'. */ 343169695Skan unsigned long int used; 344169695Skan 345169695Skan /* Syntax setting with which the pattern was compiled. */ 346169695Skan reg_syntax_t syntax; 347169695Skan 348169695Skan /* Pointer to a fastmap, if any, otherwise zero. re_search uses 349169695Skan the fastmap, if there is one, to skip over impossible 350169695Skan starting points for matches. */ 351169695Skan char *fastmap; 352169695Skan 353169695Skan /* Either a translate table to apply to all characters before 354169695Skan comparing them, or zero for no translation. The translation 355169695Skan is applied to a pattern when it is compiled and to a string 356169695Skan when it is matched. */ 357169695Skan RE_TRANSLATE_TYPE translate; 358169695Skan 359169695Skan /* Number of subexpressions found by the compiler. */ 360169695Skan size_t re_nsub; 361169695Skan 362169695Skan /* Zero if this pattern cannot match the empty string, one else. 363169695Skan Well, in truth it's used only in `re_search_2', to see 364169695Skan whether or not we should use the fastmap, so we don't set 365169695Skan this absolutely perfectly; see `re_compile_fastmap' (the 366169695Skan `duplicate' case). */ 367169695Skan unsigned can_be_null : 1; 368169695Skan 369169695Skan /* If REGS_UNALLOCATED, allocate space in the `regs' structure 370169695Skan for `max (RE_NREGS, re_nsub + 1)' groups. 371169695Skan If REGS_REALLOCATE, reallocate space if necessary. 372169695Skan If REGS_FIXED, use what's there. */ 373169695Skan#define REGS_UNALLOCATED 0 374169695Skan#define REGS_REALLOCATE 1 375169695Skan#define REGS_FIXED 2 376169695Skan unsigned regs_allocated : 2; 377169695Skan 378169695Skan /* Set to zero when `regex_compile' compiles a pattern; set to one 379169695Skan by `re_compile_fastmap' if it updates the fastmap. */ 380169695Skan unsigned fastmap_accurate : 1; 381169695Skan 382169695Skan /* If set, `re_match_2' does not return information about 383169695Skan subexpressions. */ 384169695Skan unsigned no_sub : 1; 385169695Skan 386169695Skan /* If set, a beginning-of-line anchor doesn't match at the 387169695Skan beginning of the string. */ 388169695Skan unsigned not_bol : 1; 389169695Skan 390169695Skan /* Similarly for an end-of-line anchor. */ 391169695Skan unsigned not_eol : 1; 392169695Skan 393169695Skan /* If true, an anchor at a newline matches. */ 394169695Skan unsigned newline_anchor : 1; 395169695Skan 396169695Skan/* [[[end pattern_buffer]]] */ 397169695Skan}; 398169695Skan 399169695Skantypedef struct re_pattern_buffer regex_t; 400169695Skan 401169695Skan/* Type for byte offsets within the string. POSIX mandates this. */ 402169695Skantypedef int regoff_t; 403169695Skan 404169695Skan 405169695Skan/* This is the structure we store register match data in. See 406169695Skan regex.texinfo for a full description of what registers match. */ 407169695Skanstruct re_registers 408169695Skan{ 409169695Skan unsigned num_regs; 410169695Skan regoff_t *start; 411169695Skan regoff_t *end; 412169695Skan}; 413169695Skan 414169695Skan 415169695Skan/* If `regs_allocated' is REGS_UNALLOCATED in the pattern buffer, 416169695Skan `re_match_2' returns information about at least this many registers 417169695Skan the first time a `regs' structure is passed. */ 418169695Skan#ifndef RE_NREGS 419169695Skan# define RE_NREGS 30 420169695Skan#endif 421169695Skan 422169695Skan 423169695Skan/* POSIX specification for registers. Aside from the different names than 424169695Skan `re_registers', POSIX uses an array of structures, instead of a 425169695Skan structure of arrays. */ 426169695Skantypedef struct 427169695Skan{ 428169695Skan regoff_t rm_so; /* Byte offset from string's start to substring's start. */ 429169695Skan regoff_t rm_eo; /* Byte offset from string's start to substring's end. */ 430169695Skan} regmatch_t; 431169695Skan 432169695Skan/* Declarations for routines. */ 433169695Skan 434169695Skan/* To avoid duplicating every routine declaration -- once with a 435169695Skan prototype (if we are ANSI), and once without (if we aren't) -- we 436169695Skan use the following macro to declare argument types. This 437169695Skan unfortunately clutters up the declarations a bit, but I think it's 438169695Skan worth it. */ 439169695Skan 440169695Skan/* Sets the current default syntax to SYNTAX, and return the old syntax. 441169695Skan You can also simply assign to the `re_syntax_options' variable. */ 442169695Skanextern reg_syntax_t re_set_syntax (reg_syntax_t syntax); 443169695Skan 444169695Skan/* Compile the regular expression PATTERN, with length LENGTH 445169695Skan and syntax given by the global `re_syntax_options', into the buffer 446169695Skan BUFFER. Return NULL if successful, and an error string if not. */ 447169695Skanextern const char *re_compile_pattern (const char *pattern, size_t length, 448169695Skan struct re_pattern_buffer *buffer); 449169695Skan 450169695Skan 451169695Skan/* Compile a fastmap for the compiled pattern in BUFFER; used to 452169695Skan accelerate searches. Return 0 if successful and -2 if was an 453169695Skan internal error. */ 454169695Skanextern int re_compile_fastmap (struct re_pattern_buffer *buffer); 455169695Skan 456169695Skan 457169695Skan/* Search in the string STRING (with length LENGTH) for the pattern 458169695Skan compiled into BUFFER. Start searching at position START, for RANGE 459169695Skan characters. Return the starting position of the match, -1 for no 460169695Skan match, or -2 for an internal error. Also return register 461169695Skan information in REGS (if REGS and BUFFER->no_sub are nonzero). */ 462169695Skanextern int re_search (struct re_pattern_buffer *buffer, const char *string, 463169695Skan int length, int start, int range, 464169695Skan struct re_registers *regs); 465169695Skan 466169695Skan 467169695Skan/* Like `re_search', but search in the concatenation of STRING1 and 468169695Skan STRING2. Also, stop searching at index START + STOP. */ 469169695Skanextern int re_search_2 (struct re_pattern_buffer *buffer, const char *string1, 470169695Skan int length1, const char *string2, int length2, 471169695Skan int start, int range, struct re_registers *regs, 472169695Skan int stop); 473169695Skan 474169695Skan 475169695Skan/* Like `re_search', but return how many characters in STRING the regexp 476169695Skan in BUFFER matched, starting at position START. */ 477169695Skanextern int re_match (struct re_pattern_buffer *buffer, const char *string, 478169695Skan int length, int start, struct re_registers *regs); 479169695Skan 480169695Skan 481169695Skan/* Relates to `re_match' as `re_search_2' relates to `re_search'. */ 482169695Skanextern int re_match_2 (struct re_pattern_buffer *buffer, const char *string1, 483169695Skan int length1, const char *string2, int length2, 484169695Skan int start, struct re_registers *regs, int stop); 485169695Skan 486169695Skan 487169695Skan/* Set REGS to hold NUM_REGS registers, storing them in STARTS and 488169695Skan ENDS. Subsequent matches using BUFFER and REGS will use this memory 489169695Skan for recording register information. STARTS and ENDS must be 490169695Skan allocated with malloc, and must each be at least `NUM_REGS * sizeof 491169695Skan (regoff_t)' bytes long. 492169695Skan 493169695Skan If NUM_REGS == 0, then subsequent matches should allocate their own 494169695Skan register data. 495169695Skan 496169695Skan Unless this function is called, the first search or match using 497169695Skan PATTERN_BUFFER will allocate its own register data, without 498169695Skan freeing the old data. */ 499169695Skanextern void re_set_registers (struct re_pattern_buffer *buffer, 500169695Skan struct re_registers *regs, 501169695Skan unsigned num_regs, regoff_t *starts, 502169695Skan regoff_t *ends); 503169695Skan 504169695Skan#if defined _REGEX_RE_COMP || defined _LIBC 505169695Skan# ifndef _CRAY 506169695Skan/* 4.2 bsd compatibility. */ 507169695Skanextern char *re_comp (const char *); 508169695Skanextern int re_exec (const char *); 509169695Skan# endif 510169695Skan#endif 511169695Skan 512169695Skan/* GCC 2.95 and later have "__restrict"; C99 compilers have 513169695Skan "restrict", and "configure" may have defined "restrict". */ 514169695Skan#ifndef __restrict 515169695Skan# if ! (2 < __GNUC__ || (2 == __GNUC__ && 95 <= __GNUC_MINOR__)) 516169695Skan# if defined restrict || 199901L <= __STDC_VERSION__ 517169695Skan# define __restrict restrict 518169695Skan# else 519169695Skan# define __restrict 520169695Skan# endif 521169695Skan# endif 522169695Skan#endif 523169695Skan 524169695Skan/* GCC 3.1 and later support declaring arrays as non-overlapping 525169695Skan using the syntax array_name[restrict] */ 526169695Skan#ifndef __restrict_arr 527169695Skan# if ! (3 < __GNUC__ || (3 == __GNUC__ && 1 <= __GNUC_MINOR__)) || defined (__GNUG__) 528169695Skan# define __restrict_arr 529169695Skan# else 530169695Skan# define __restrict_arr __restrict 531169695Skan# endif 532169695Skan#endif 533169695Skan 534169695Skan/* POSIX compatibility. */ 535169695Skanextern int regcomp (regex_t *__restrict __preg, 536169695Skan const char *__restrict __pattern, 537169695Skan int __cflags); 538169695Skan 539169695Skan#if (__GNUC__) 540169695Skan__extension__ 541169695Skan#endif 542169695Skanextern int regexec (const regex_t *__restrict __preg, 543169695Skan const char *__restrict __string, size_t __nmatch, 544169695Skan regmatch_t __pmatch[__restrict_arr], 545169695Skan int __eflags); 546169695Skan 547169695Skanextern size_t regerror (int __errcode, const regex_t *__preg, 548169695Skan char *__errbuf, size_t __errbuf_size); 549169695Skan 550169695Skanextern void regfree (regex_t *__preg); 551169695Skan 552169695Skan 553169695Skan#ifdef __cplusplus 554169695Skan} 555169695Skan#endif /* C++ */ 556169695Skan 557169695Skan#endif /* regex.h */ 558169695Skan 559169695Skan/* 560169695SkanLocal variables: 561169695Skanmake-backup-files: t 562169695Skanversion-control: t 563169695Skantrim-versions-without-asking: nil 564169695SkanEnd: 565169695Skan*/ 566