1126209Sache/* Definitions for data structures and routines for the regular 2146040Stjr expression library. 3146040Stjr Copyright (C) 1985,1989-93,1995-98,2000,2001,2002,2003 4146040Stjr Free Software Foundation, Inc. 5146040Stjr This file is part of the GNU C Library. 6126209Sache 7126209Sache The GNU C Library is free software; you can redistribute it and/or 8146040Stjr modify it under the terms of the GNU Lesser General Public 9146040Stjr License as published by the Free Software Foundation; either 10146040Stjr version 2.1 of the License, or (at your option) any later version. 11126209Sache 12126209Sache The GNU C Library is distributed in the hope that it will be useful, 13126209Sache but WITHOUT ANY WARRANTY; without even the implied warranty of 14126209Sache MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 15146040Stjr Lesser General Public License for more details. 16126209Sache 17146040Stjr You should have received a copy of the GNU Lesser General Public 18146040Stjr License along with the GNU C Library; if not, write to the Free 19146040Stjr Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 20146040Stjr 02111-1307 USA. */ 21126209Sache 22126209Sache#ifndef _REGEX_H 23126209Sache#define _REGEX_H 1 24126209Sache 25146040Stjr#include <sys/types.h> 26146040Stjr 27126209Sache/* Allow the use in C++ code. */ 28126209Sache#ifdef __cplusplus 29126209Sacheextern "C" { 30126209Sache#endif 31126209Sache 32126209Sache/* POSIX says that <sys/types.h> must be included (by the caller) before 33126209Sache <regex.h>. */ 34126209Sache 35126209Sache#if !defined _POSIX_C_SOURCE && !defined _POSIX_SOURCE && defined VMS 36126209Sache/* VMS doesn't have `size_t' in <sys/types.h>, even though POSIX says it 37126209Sache should be there. */ 38126209Sache# include <stddef.h> 39126209Sache#endif 40126209Sache 41126209Sache/* The following two types have to be signed and unsigned integer type 42126209Sache wide enough to hold a value of a pointer. For most ANSI compilers 43126209Sache ptrdiff_t and size_t should be likely OK. Still size of these two 44126209Sache types is 2 for Microsoft C. Ugh... */ 45126209Sachetypedef long int s_reg_t; 46126209Sachetypedef unsigned long int active_reg_t; 47126209Sache 48126209Sache/* The following bits are used to determine the regexp syntax we 49126209Sache recognize. The set/not-set meanings are chosen so that Emacs syntax 50126209Sache remains the value 0. The bits are given in alphabetical order, and 51126209Sache the definitions shifted by one from the previous bit; thus, when we 52126209Sache add or remove a bit, only one other definition need change. */ 53126209Sachetypedef unsigned long int reg_syntax_t; 54126209Sache 55126209Sache/* If this bit is not set, then \ inside a bracket expression is literal. 56126209Sache If set, then such a \ quotes the following character. */ 57126209Sache#define RE_BACKSLASH_ESCAPE_IN_LISTS ((unsigned long int) 1) 58126209Sache 59126209Sache/* If this bit is not set, then + and ? are operators, and \+ and \? are 60126209Sache literals. 61126209Sache If set, then \+ and \? are operators and + and ? are literals. */ 62126209Sache#define RE_BK_PLUS_QM (RE_BACKSLASH_ESCAPE_IN_LISTS << 1) 63126209Sache 64126209Sache/* If this bit is set, then character classes are supported. They are: 65126209Sache [:alpha:], [:upper:], [:lower:], [:digit:], [:alnum:], [:xdigit:], 66126209Sache [:space:], [:print:], [:punct:], [:graph:], and [:cntrl:]. 67126209Sache If not set, then character classes are not supported. */ 68126209Sache#define RE_CHAR_CLASSES (RE_BK_PLUS_QM << 1) 69126209Sache 70126209Sache/* If this bit is set, then ^ and $ are always anchors (outside bracket 71126209Sache expressions, of course). 72126209Sache If this bit is not set, then it depends: 73126209Sache ^ is an anchor if it is at the beginning of a regular 74126209Sache expression or after an open-group or an alternation operator; 75126209Sache $ is an anchor if it is at the end of a regular expression, or 76126209Sache before a close-group or an alternation operator. 77126209Sache 78126209Sache This bit could be (re)combined with RE_CONTEXT_INDEP_OPS, because 79126209Sache POSIX draft 11.2 says that * etc. in leading positions is undefined. 80126209Sache We already implemented a previous draft which made those constructs 81126209Sache invalid, though, so we haven't changed the code back. */ 82126209Sache#define RE_CONTEXT_INDEP_ANCHORS (RE_CHAR_CLASSES << 1) 83126209Sache 84126209Sache/* If this bit is set, then special characters are always special 85126209Sache regardless of where they are in the pattern. 86126209Sache If this bit is not set, then special characters are special only in 87126209Sache some contexts; otherwise they are ordinary. Specifically, 88126209Sache * + ? and intervals are only special when not after the beginning, 89126209Sache open-group, or alternation operator. */ 90126209Sache#define RE_CONTEXT_INDEP_OPS (RE_CONTEXT_INDEP_ANCHORS << 1) 91126209Sache 92126209Sache/* If this bit is set, then *, +, ?, and { cannot be first in an re or 93126209Sache immediately after an alternation or begin-group operator. */ 94126209Sache#define RE_CONTEXT_INVALID_OPS (RE_CONTEXT_INDEP_OPS << 1) 95126209Sache 96126209Sache/* If this bit is set, then . matches newline. 97126209Sache If not set, then it doesn't. */ 98126209Sache#define RE_DOT_NEWLINE (RE_CONTEXT_INVALID_OPS << 1) 99126209Sache 100126209Sache/* If this bit is set, then . doesn't match NUL. 101126209Sache If not set, then it does. */ 102126209Sache#define RE_DOT_NOT_NULL (RE_DOT_NEWLINE << 1) 103126209Sache 104126209Sache/* If this bit is set, nonmatching lists [^...] do not match newline. 105126209Sache If not set, they do. */ 106126209Sache#define RE_HAT_LISTS_NOT_NEWLINE (RE_DOT_NOT_NULL << 1) 107126209Sache 108126209Sache/* If this bit is set, either \{...\} or {...} defines an 109126209Sache interval, depending on RE_NO_BK_BRACES. 110126209Sache If not set, \{, \}, {, and } are literals. */ 111126209Sache#define RE_INTERVALS (RE_HAT_LISTS_NOT_NEWLINE << 1) 112126209Sache 113126209Sache/* If this bit is set, +, ? and | aren't recognized as operators. 114126209Sache If not set, they are. */ 115126209Sache#define RE_LIMITED_OPS (RE_INTERVALS << 1) 116126209Sache 117126209Sache/* If this bit is set, newline is an alternation operator. 118126209Sache If not set, newline is literal. */ 119126209Sache#define RE_NEWLINE_ALT (RE_LIMITED_OPS << 1) 120126209Sache 121126209Sache/* If this bit is set, then `{...}' defines an interval, and \{ and \} 122126209Sache are literals. 123126209Sache If not set, then `\{...\}' defines an interval. */ 124126209Sache#define RE_NO_BK_BRACES (RE_NEWLINE_ALT << 1) 125126209Sache 126126209Sache/* If this bit is set, (...) defines a group, and \( and \) are literals. 127126209Sache If not set, \(...\) defines a group, and ( and ) are literals. */ 128126209Sache#define RE_NO_BK_PARENS (RE_NO_BK_BRACES << 1) 129126209Sache 130126209Sache/* If this bit is set, then \<digit> matches <digit>. 131126209Sache If not set, then \<digit> is a back-reference. */ 132126209Sache#define RE_NO_BK_REFS (RE_NO_BK_PARENS << 1) 133126209Sache 134126209Sache/* If this bit is set, then | is an alternation operator, and \| is literal. 135126209Sache If not set, then \| is an alternation operator, and | is literal. */ 136126209Sache#define RE_NO_BK_VBAR (RE_NO_BK_REFS << 1) 137126209Sache 138126209Sache/* If this bit is set, then an ending range point collating higher 139126209Sache than the starting range point, as in [z-a], is invalid. 140126209Sache If not set, then when ending range point collates higher than the 141126209Sache starting range point, the range is ignored. */ 142126209Sache#define RE_NO_EMPTY_RANGES (RE_NO_BK_VBAR << 1) 143126209Sache 144126209Sache/* If this bit is set, then an unmatched ) is ordinary. 145126209Sache If not set, then an unmatched ) is invalid. */ 146126209Sache#define RE_UNMATCHED_RIGHT_PAREN_ORD (RE_NO_EMPTY_RANGES << 1) 147126209Sache 148126209Sache/* If this bit is set, succeed as soon as we match the whole pattern, 149126209Sache without further backtracking. */ 150126209Sache#define RE_NO_POSIX_BACKTRACKING (RE_UNMATCHED_RIGHT_PAREN_ORD << 1) 151126209Sache 152126209Sache/* If this bit is set, do not process the GNU regex operators. 153126209Sache If not set, then the GNU regex operators are recognized. */ 154126209Sache#define RE_NO_GNU_OPS (RE_NO_POSIX_BACKTRACKING << 1) 155126209Sache 156126209Sache/* If this bit is set, turn on internal regex debugging. 157126209Sache If not set, and debugging was on, turn it off. 158126209Sache This only works if regex.c is compiled -DDEBUG. 159126209Sache We define this bit always, so that all that's needed to turn on 160126209Sache debugging is to recompile regex.c; the calling code can always have 161126209Sache this bit set, and it won't affect anything in the normal case. */ 162126209Sache#define RE_DEBUG (RE_NO_GNU_OPS << 1) 163126209Sache 164131543Stjr/* If this bit is set, a syntactically invalid interval is treated as 165131543Stjr a string of ordinary characters. For example, the ERE 'a{1' is 166131543Stjr treated as 'a\{1'. */ 167131543Stjr#define RE_INVALID_INTERVAL_ORD (RE_DEBUG << 1) 168131543Stjr 169146040Stjr/* If this bit is set, then ignore case when matching. 170146040Stjr If not set, then case is significant. */ 171146040Stjr#define RE_ICASE (RE_INVALID_INTERVAL_ORD << 1) 172146040Stjr 173146040Stjr/* This bit is used internally like RE_CONTEXT_INDEP_ANCHORS but only 174146040Stjr for ^, because it is difficult to scan the regex backwards to find 175146040Stjr whether ^ should be special. */ 176146040Stjr#define RE_CARET_ANCHORS_HERE (RE_ICASE << 1) 177146040Stjr 178146040Stjr/* If this bit is set, then \{ cannot be first in an bre or 179146040Stjr immediately after an alternation or begin-group operator. */ 180146040Stjr#define RE_CONTEXT_INVALID_DUP (RE_CARET_ANCHORS_HERE << 1) 181146040Stjr 182146040Stjr/* If this bit is set, then no_sub will be set to 1 during 183146040Stjr re_compile_pattern. */ 184146040Stjr#define RE_NO_SUB (RE_CONTEXT_INVALID_DUP << 1) 185146040Stjr 186126209Sache/* This global variable defines the particular regexp syntax to use (for 187126209Sache some interfaces). When a regexp is compiled, the syntax used is 188126209Sache stored in the pattern buffer, so changing this does not affect 189126209Sache already-compiled regexps. */ 190126209Sacheextern reg_syntax_t re_syntax_options; 191126209Sache 192126209Sache/* Define combinations of the above bits for the standard possibilities. 193126209Sache (The [[[ comments delimit what gets put into the Texinfo file, so 194126209Sache don't delete them!) */ 195126209Sache/* [[[begin syntaxes]]] */ 196126209Sache#define RE_SYNTAX_EMACS 0 197126209Sache 198126209Sache#define RE_SYNTAX_AWK \ 199126209Sache (RE_BACKSLASH_ESCAPE_IN_LISTS | RE_DOT_NOT_NULL \ 200126209Sache | RE_NO_BK_PARENS | RE_NO_BK_REFS \ 201126209Sache | RE_NO_BK_VBAR | RE_NO_EMPTY_RANGES \ 202126209Sache | RE_DOT_NEWLINE | RE_CONTEXT_INDEP_ANCHORS \ 203126209Sache | RE_UNMATCHED_RIGHT_PAREN_ORD | RE_NO_GNU_OPS) 204126209Sache 205126209Sache#define RE_SYNTAX_GNU_AWK \ 206126209Sache ((RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS | RE_DEBUG) \ 207146040Stjr & ~(RE_DOT_NOT_NULL | RE_INTERVALS | RE_CONTEXT_INDEP_OPS \ 208146040Stjr | RE_CONTEXT_INVALID_OPS )) 209126209Sache 210126209Sache#define RE_SYNTAX_POSIX_AWK \ 211126209Sache (RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS \ 212126209Sache | RE_INTERVALS | RE_NO_GNU_OPS) 213126209Sache 214126209Sache#define RE_SYNTAX_GREP \ 215126209Sache (RE_BK_PLUS_QM | RE_CHAR_CLASSES \ 216126209Sache | RE_HAT_LISTS_NOT_NEWLINE | RE_INTERVALS \ 217126209Sache | RE_NEWLINE_ALT) 218126209Sache 219126209Sache#define RE_SYNTAX_EGREP \ 220126209Sache (RE_CHAR_CLASSES | RE_CONTEXT_INDEP_ANCHORS \ 221126209Sache | RE_CONTEXT_INDEP_OPS | RE_HAT_LISTS_NOT_NEWLINE \ 222126209Sache | RE_NEWLINE_ALT | RE_NO_BK_PARENS \ 223126209Sache | RE_NO_BK_VBAR) 224126209Sache 225126209Sache#define RE_SYNTAX_POSIX_EGREP \ 226131543Stjr (RE_SYNTAX_EGREP | RE_INTERVALS | RE_NO_BK_BRACES \ 227131543Stjr | RE_INVALID_INTERVAL_ORD) 228126209Sache 229126209Sache/* P1003.2/D11.2, section 4.20.7.1, lines 5078ff. */ 230126209Sache#define RE_SYNTAX_ED RE_SYNTAX_POSIX_BASIC 231126209Sache 232126209Sache#define RE_SYNTAX_SED RE_SYNTAX_POSIX_BASIC 233126209Sache 234126209Sache/* Syntax bits common to both basic and extended POSIX regex syntax. */ 235126209Sache#define _RE_SYNTAX_POSIX_COMMON \ 236126209Sache (RE_CHAR_CLASSES | RE_DOT_NEWLINE | RE_DOT_NOT_NULL \ 237126209Sache | RE_INTERVALS | RE_NO_EMPTY_RANGES) 238126209Sache 239126209Sache#define RE_SYNTAX_POSIX_BASIC \ 240146040Stjr (_RE_SYNTAX_POSIX_COMMON | RE_BK_PLUS_QM | RE_CONTEXT_INVALID_DUP) 241126209Sache 242126209Sache/* Differs from ..._POSIX_BASIC only in that RE_BK_PLUS_QM becomes 243126209Sache RE_LIMITED_OPS, i.e., \? \+ \| are not recognized. Actually, this 244126209Sache isn't minimal, since other operators, such as \`, aren't disabled. */ 245126209Sache#define RE_SYNTAX_POSIX_MINIMAL_BASIC \ 246126209Sache (_RE_SYNTAX_POSIX_COMMON | RE_LIMITED_OPS) 247126209Sache 248126209Sache#define RE_SYNTAX_POSIX_EXTENDED \ 249131543Stjr (_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS \ 250131543Stjr | RE_CONTEXT_INDEP_OPS | RE_NO_BK_BRACES \ 251131543Stjr | RE_NO_BK_PARENS | RE_NO_BK_VBAR \ 252131543Stjr | RE_CONTEXT_INVALID_OPS | RE_UNMATCHED_RIGHT_PAREN_ORD) 253126209Sache 254131543Stjr/* Differs from ..._POSIX_EXTENDED in that RE_CONTEXT_INDEP_OPS is 255131543Stjr removed and RE_NO_BK_REFS is added. */ 256126209Sache#define RE_SYNTAX_POSIX_MINIMAL_EXTENDED \ 257126209Sache (_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS \ 258126209Sache | RE_CONTEXT_INVALID_OPS | RE_NO_BK_BRACES \ 259126209Sache | RE_NO_BK_PARENS | RE_NO_BK_REFS \ 260126209Sache | RE_NO_BK_VBAR | RE_UNMATCHED_RIGHT_PAREN_ORD) 261126209Sache/* [[[end syntaxes]]] */ 262126209Sache 263126209Sache/* Maximum number of duplicates an interval can allow. Some systems 264126209Sache (erroneously) define this in other header files, but we want our 265126209Sache value, so remove any previous define. */ 266126209Sache#ifdef RE_DUP_MAX 267126209Sache# undef RE_DUP_MAX 268126209Sache#endif 269126209Sache/* If sizeof(int) == 2, then ((1 << 15) - 1) overflows. */ 270126209Sache#define RE_DUP_MAX (0x7fff) 271126209Sache 272126209Sache 273126209Sache/* POSIX `cflags' bits (i.e., information for `regcomp'). */ 274126209Sache 275126209Sache/* If this bit is set, then use extended regular expression syntax. 276126209Sache If not set, then use basic regular expression syntax. */ 277126209Sache#define REG_EXTENDED 1 278126209Sache 279126209Sache/* If this bit is set, then ignore case when matching. 280126209Sache If not set, then case is significant. */ 281126209Sache#define REG_ICASE (REG_EXTENDED << 1) 282126209Sache 283126209Sache/* If this bit is set, then anchors do not match at newline 284126209Sache characters in the string. 285126209Sache If not set, then anchors do match at newlines. */ 286126209Sache#define REG_NEWLINE (REG_ICASE << 1) 287126209Sache 288126209Sache/* If this bit is set, then report only success or fail in regexec. 289126209Sache If not set, then returns differ between not matching and errors. */ 290126209Sache#define REG_NOSUB (REG_NEWLINE << 1) 291126209Sache 292126209Sache 293126209Sache/* POSIX `eflags' bits (i.e., information for regexec). */ 294126209Sache 295126209Sache/* If this bit is set, then the beginning-of-line operator doesn't match 296126209Sache the beginning of the string (presumably because it's not the 297126209Sache beginning of a line). 298126209Sache If not set, then the beginning-of-line operator does match the 299126209Sache beginning of the string. */ 300126209Sache#define REG_NOTBOL 1 301126209Sache 302126209Sache/* Like REG_NOTBOL, except for the end-of-line. */ 303126209Sache#define REG_NOTEOL (1 << 1) 304126209Sache 305146040Stjr/* Use PMATCH[0] to delimit the start and end of the search in the 306146040Stjr buffer. */ 307146040Stjr#define REG_STARTEND (1 << 2) 308126209Sache 309146040Stjr 310126209Sache/* If any error codes are removed, changed, or added, update the 311126209Sache `re_error_msg' table in regex.c. */ 312126209Sachetypedef enum 313126209Sache{ 314126209Sache#ifdef _XOPEN_SOURCE 315126209Sache REG_ENOSYS = -1, /* This will never happen for this implementation. */ 316126209Sache#endif 317126209Sache 318126209Sache REG_NOERROR = 0, /* Success. */ 319126209Sache REG_NOMATCH, /* Didn't find a match (for regexec). */ 320126209Sache 321126209Sache /* POSIX regcomp return error codes. (In the order listed in the 322126209Sache standard.) */ 323126209Sache REG_BADPAT, /* Invalid pattern. */ 324146040Stjr REG_ECOLLATE, /* Inalid collating element. */ 325126209Sache REG_ECTYPE, /* Invalid character class name. */ 326126209Sache REG_EESCAPE, /* Trailing backslash. */ 327126209Sache REG_ESUBREG, /* Invalid back reference. */ 328126209Sache REG_EBRACK, /* Unmatched left bracket. */ 329126209Sache REG_EPAREN, /* Parenthesis imbalance. */ 330126209Sache REG_EBRACE, /* Unmatched \{. */ 331126209Sache REG_BADBR, /* Invalid contents of \{\}. */ 332126209Sache REG_ERANGE, /* Invalid range end. */ 333126209Sache REG_ESPACE, /* Ran out of memory. */ 334126209Sache REG_BADRPT, /* No preceding re for repetition op. */ 335126209Sache 336126209Sache /* Error codes we've added. */ 337126209Sache REG_EEND, /* Premature end. */ 338126209Sache REG_ESIZE, /* Compiled pattern bigger than 2^16 bytes. */ 339126209Sache REG_ERPAREN /* Unmatched ) or \); not returned from regcomp. */ 340126209Sache} reg_errcode_t; 341126209Sache 342126209Sache/* This data structure represents a compiled pattern. Before calling 343126209Sache the pattern compiler, the fields `buffer', `allocated', `fastmap', 344126209Sache `translate', and `no_sub' can be set. After the pattern has been 345126209Sache compiled, the `re_nsub' field is available. All other fields are 346126209Sache private to the regex routines. */ 347126209Sache 348126209Sache#ifndef RE_TRANSLATE_TYPE 349126209Sache# define RE_TRANSLATE_TYPE char * 350126209Sache#endif 351126209Sache 352126209Sachestruct re_pattern_buffer 353126209Sache{ 354126209Sache/* [[[begin pattern_buffer]]] */ 355126209Sache /* Space that holds the compiled pattern. It is declared as 356126209Sache `unsigned char *' because its elements are 357126209Sache sometimes used as array indexes. */ 358126209Sache unsigned char *buffer; 359126209Sache 360126209Sache /* Number of bytes to which `buffer' points. */ 361126209Sache unsigned long int allocated; 362126209Sache 363126209Sache /* Number of bytes actually used in `buffer'. */ 364126209Sache unsigned long int used; 365126209Sache 366126209Sache /* Syntax setting with which the pattern was compiled. */ 367126209Sache reg_syntax_t syntax; 368126209Sache 369126209Sache /* Pointer to a fastmap, if any, otherwise zero. re_search uses 370126209Sache the fastmap, if there is one, to skip over impossible 371126209Sache starting points for matches. */ 372126209Sache char *fastmap; 373126209Sache 374126209Sache /* Either a translate table to apply to all characters before 375126209Sache comparing them, or zero for no translation. The translation 376126209Sache is applied to a pattern when it is compiled and to a string 377126209Sache when it is matched. */ 378126209Sache RE_TRANSLATE_TYPE translate; 379126209Sache 380126209Sache /* Number of subexpressions found by the compiler. */ 381126209Sache size_t re_nsub; 382126209Sache 383126209Sache /* Zero if this pattern cannot match the empty string, one else. 384126209Sache Well, in truth it's used only in `re_search_2', to see 385126209Sache whether or not we should use the fastmap, so we don't set 386126209Sache this absolutely perfectly; see `re_compile_fastmap' (the 387126209Sache `duplicate' case). */ 388126209Sache unsigned can_be_null : 1; 389126209Sache 390126209Sache /* If REGS_UNALLOCATED, allocate space in the `regs' structure 391126209Sache for `max (RE_NREGS, re_nsub + 1)' groups. 392126209Sache If REGS_REALLOCATE, reallocate space if necessary. 393126209Sache If REGS_FIXED, use what's there. */ 394126209Sache#define REGS_UNALLOCATED 0 395126209Sache#define REGS_REALLOCATE 1 396126209Sache#define REGS_FIXED 2 397126209Sache unsigned regs_allocated : 2; 398126209Sache 399126209Sache /* Set to zero when `regex_compile' compiles a pattern; set to one 400126209Sache by `re_compile_fastmap' if it updates the fastmap. */ 401126209Sache unsigned fastmap_accurate : 1; 402126209Sache 403126209Sache /* If set, `re_match_2' does not return information about 404126209Sache subexpressions. */ 405126209Sache unsigned no_sub : 1; 406126209Sache 407126209Sache /* If set, a beginning-of-line anchor doesn't match at the 408126209Sache beginning of the string. */ 409126209Sache unsigned not_bol : 1; 410126209Sache 411126209Sache /* Similarly for an end-of-line anchor. */ 412126209Sache unsigned not_eol : 1; 413126209Sache 414126209Sache /* If true, an anchor at a newline matches. */ 415126209Sache unsigned newline_anchor : 1; 416126209Sache 417126209Sache/* [[[end pattern_buffer]]] */ 418126209Sache}; 419126209Sache 420126209Sachetypedef struct re_pattern_buffer regex_t; 421126209Sache 422126209Sache/* Type for byte offsets within the string. POSIX mandates this. */ 423126209Sachetypedef int regoff_t; 424126209Sache 425126209Sache 426126209Sache/* This is the structure we store register match data in. See 427126209Sache regex.texinfo for a full description of what registers match. */ 428126209Sachestruct re_registers 429126209Sache{ 430126209Sache unsigned num_regs; 431126209Sache regoff_t *start; 432126209Sache regoff_t *end; 433126209Sache}; 434126209Sache 435126209Sache 436126209Sache/* If `regs_allocated' is REGS_UNALLOCATED in the pattern buffer, 437126209Sache `re_match_2' returns information about at least this many registers 438126209Sache the first time a `regs' structure is passed. */ 439126209Sache#ifndef RE_NREGS 440126209Sache# define RE_NREGS 30 441126209Sache#endif 442126209Sache 443126209Sache 444126209Sache/* POSIX specification for registers. Aside from the different names than 445126209Sache `re_registers', POSIX uses an array of structures, instead of a 446126209Sache structure of arrays. */ 447126209Sachetypedef struct 448126209Sache{ 449126209Sache regoff_t rm_so; /* Byte offset from string's start to substring's start. */ 450126209Sache regoff_t rm_eo; /* Byte offset from string's start to substring's end. */ 451126209Sache} regmatch_t; 452126209Sache 453126209Sache/* Declarations for routines. */ 454126209Sache 455126209Sache/* To avoid duplicating every routine declaration -- once with a 456126209Sache prototype (if we are ANSI), and once without (if we aren't) -- we 457126209Sache use the following macro to declare argument types. This 458126209Sache unfortunately clutters up the declarations a bit, but I think it's 459126209Sache worth it. */ 460126209Sache 461126209Sache#if __STDC__ 462126209Sache 463126209Sache# define _RE_ARGS(args) args 464126209Sache 465126209Sache#else /* not __STDC__ */ 466126209Sache 467126209Sache# define _RE_ARGS(args) () 468126209Sache 469126209Sache#endif /* not __STDC__ */ 470126209Sache 471126209Sache/* Sets the current default syntax to SYNTAX, and return the old syntax. 472126209Sache You can also simply assign to the `re_syntax_options' variable. */ 473126209Sacheextern reg_syntax_t re_set_syntax _RE_ARGS ((reg_syntax_t syntax)); 474126209Sache 475126209Sache/* Compile the regular expression PATTERN, with length LENGTH 476126209Sache and syntax given by the global `re_syntax_options', into the buffer 477126209Sache BUFFER. Return NULL if successful, and an error string if not. */ 478126209Sacheextern const char *re_compile_pattern 479126209Sache _RE_ARGS ((const char *pattern, size_t length, 480126209Sache struct re_pattern_buffer *buffer)); 481126209Sache 482126209Sache 483126209Sache/* Compile a fastmap for the compiled pattern in BUFFER; used to 484126209Sache accelerate searches. Return 0 if successful and -2 if was an 485126209Sache internal error. */ 486126209Sacheextern int re_compile_fastmap _RE_ARGS ((struct re_pattern_buffer *buffer)); 487126209Sache 488126209Sache 489126209Sache/* Search in the string STRING (with length LENGTH) for the pattern 490126209Sache compiled into BUFFER. Start searching at position START, for RANGE 491126209Sache characters. Return the starting position of the match, -1 for no 492126209Sache match, or -2 for an internal error. Also return register 493126209Sache information in REGS (if REGS and BUFFER->no_sub are nonzero). */ 494126209Sacheextern int re_search 495126209Sache _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string, 496126209Sache int length, int start, int range, struct re_registers *regs)); 497126209Sache 498126209Sache 499126209Sache/* Like `re_search', but search in the concatenation of STRING1 and 500126209Sache STRING2. Also, stop searching at index START + STOP. */ 501126209Sacheextern int re_search_2 502126209Sache _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string1, 503126209Sache int length1, const char *string2, int length2, 504126209Sache int start, int range, struct re_registers *regs, int stop)); 505126209Sache 506126209Sache 507126209Sache/* Like `re_search', but return how many characters in STRING the regexp 508126209Sache in BUFFER matched, starting at position START. */ 509126209Sacheextern int re_match 510126209Sache _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string, 511126209Sache int length, int start, struct re_registers *regs)); 512126209Sache 513126209Sache 514126209Sache/* Relates to `re_match' as `re_search_2' relates to `re_search'. */ 515126209Sacheextern int re_match_2 516126209Sache _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string1, 517126209Sache int length1, const char *string2, int length2, 518126209Sache int start, struct re_registers *regs, int stop)); 519126209Sache 520126209Sache 521126209Sache/* Set REGS to hold NUM_REGS registers, storing them in STARTS and 522126209Sache ENDS. Subsequent matches using BUFFER and REGS will use this memory 523126209Sache for recording register information. STARTS and ENDS must be 524126209Sache allocated with malloc, and must each be at least `NUM_REGS * sizeof 525126209Sache (regoff_t)' bytes long. 526126209Sache 527126209Sache If NUM_REGS == 0, then subsequent matches should allocate their own 528126209Sache register data. 529126209Sache 530126209Sache Unless this function is called, the first search or match using 531126209Sache PATTERN_BUFFER will allocate its own register data, without 532126209Sache freeing the old data. */ 533126209Sacheextern void re_set_registers 534126209Sache _RE_ARGS ((struct re_pattern_buffer *buffer, struct re_registers *regs, 535126209Sache unsigned num_regs, regoff_t *starts, regoff_t *ends)); 536126209Sache 537126209Sache#if defined _REGEX_RE_COMP || defined _LIBC 538126209Sache# ifndef _CRAY 539126209Sache/* 4.2 bsd compatibility. */ 540126209Sacheextern char *re_comp _RE_ARGS ((const char *)); 541126209Sacheextern int re_exec _RE_ARGS ((const char *)); 542126209Sache# endif 543126209Sache#endif 544126209Sache 545131543Stjr/* GCC 2.95 and later have "__restrict"; C99 compilers have 546131543Stjr "restrict", and "configure" may have defined "restrict". */ 547131543Stjr#ifndef __restrict 548131543Stjr# if ! (2 < __GNUC__ || (2 == __GNUC__ && 95 <= __GNUC_MINOR__)) 549131543Stjr# if defined restrict || 199901L <= __STDC_VERSION__ 550131543Stjr# define __restrict restrict 551131543Stjr# else 552131543Stjr# define __restrict 553131543Stjr# endif 554131543Stjr# endif 555131543Stjr#endif 556146040Stjr/* gcc 3.1 and up support the [restrict] syntax. */ 557146040Stjr#ifndef __restrict_arr 558146040Stjr# if __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1) 559146040Stjr# define __restrict_arr __restrict 560146040Stjr# else 561146040Stjr# define __restrict_arr 562146040Stjr# endif 563146040Stjr#endif 564131543Stjr 565126209Sache/* POSIX compatibility. */ 566131543Stjrextern int regcomp _RE_ARGS ((regex_t *__restrict __preg, 567131543Stjr const char *__restrict __pattern, 568126209Sache int __cflags)); 569126209Sache 570131543Stjrextern int regexec _RE_ARGS ((const regex_t *__restrict __preg, 571131543Stjr const char *__restrict __string, size_t __nmatch, 572131543Stjr regmatch_t __pmatch[__restrict_arr], 573131543Stjr int __eflags)); 574126209Sache 575126209Sacheextern size_t regerror _RE_ARGS ((int __errcode, const regex_t *__preg, 576126209Sache char *__errbuf, size_t __errbuf_size)); 577126209Sache 578126209Sacheextern void regfree _RE_ARGS ((regex_t *__preg)); 579126209Sache 580126209Sache 581126209Sache#ifdef __cplusplus 582126209Sache} 583126209Sache#endif /* C++ */ 584126209Sache 585126209Sache#endif /* regex.h */ 586126209Sache 587126209Sache/* 588126209SacheLocal variables: 589126209Sachemake-backup-files: t 590126209Sacheversion-control: t 591126209Sachetrim-versions-without-asking: nil 592126209SacheEnd: 593126209Sache*/ 594