189857Sobrien/* Definitions for data structures and routines for the regular
289857Sobrien   expression library, version 0.12.
3218822Sdim
4218822Sdim   Copyright (C) 1985, 1989, 1990, 1991, 1992, 1993, 1995, 1996, 1997,
5218822Sdim   1998, 2000, 2005 Free Software Foundation, Inc.
6218822Sdim
789857Sobrien   This file is part of the GNU C Library.  Its master source is NOT part of
889857Sobrien   the C library, however.  The master source lives in /gd/gnu/lib.
989857Sobrien
1089857Sobrien   The GNU C Library is free software; you can redistribute it and/or
1189857Sobrien   modify it under the terms of the GNU Lesser General Public
1289857Sobrien   License as published by the Free Software Foundation; either
1389857Sobrien   version 2.1 of the License, or (at your option) any later version.
1489857Sobrien
1589857Sobrien   The GNU C Library is distributed in the hope that it will be useful,
1689857Sobrien   but WITHOUT ANY WARRANTY; without even the implied warranty of
1789857Sobrien   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
1889857Sobrien   Lesser General Public License for more details.
1989857Sobrien
2089857Sobrien   You should have received a copy of the GNU Lesser General Public
2189857Sobrien   License along with the GNU C Library; if not, write to the Free
22218822Sdim   Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
23218822Sdim   02110-1301 USA.  */
2489857Sobrien
2589857Sobrien#ifndef _REGEX_H
2689857Sobrien#define _REGEX_H 1
2789857Sobrien
2889857Sobrien/* Allow the use in C++ code.  */
2989857Sobrien#ifdef __cplusplus
3089857Sobrienextern "C" {
3189857Sobrien#endif
3289857Sobrien
3389857Sobrien/* POSIX says that <sys/types.h> must be included (by the caller) before
3489857Sobrien   <regex.h>.  */
3589857Sobrien
3689857Sobrien#if !defined _POSIX_C_SOURCE && !defined _POSIX_SOURCE && defined VMS
3789857Sobrien/* VMS doesn't have `size_t' in <sys/types.h>, even though POSIX says it
3889857Sobrien   should be there.  */
3989857Sobrien# include <stddef.h>
4089857Sobrien#endif
4189857Sobrien
4289857Sobrien/* The following two types have to be signed and unsigned integer type
4389857Sobrien   wide enough to hold a value of a pointer.  For most ANSI compilers
4489857Sobrien   ptrdiff_t and size_t should be likely OK.  Still size of these two
4589857Sobrien   types is 2 for Microsoft C.  Ugh... */
4689857Sobrientypedef long int s_reg_t;
4789857Sobrientypedef unsigned long int active_reg_t;
4889857Sobrien
4989857Sobrien/* The following bits are used to determine the regexp syntax we
5089857Sobrien   recognize.  The set/not-set meanings are chosen so that Emacs syntax
5189857Sobrien   remains the value 0.  The bits are given in alphabetical order, and
5289857Sobrien   the definitions shifted by one from the previous bit; thus, when we
5389857Sobrien   add or remove a bit, only one other definition need change.  */
5489857Sobrientypedef unsigned long int reg_syntax_t;
5589857Sobrien
5689857Sobrien/* If this bit is not set, then \ inside a bracket expression is literal.
5789857Sobrien   If set, then such a \ quotes the following character.  */
5889857Sobrien#define RE_BACKSLASH_ESCAPE_IN_LISTS ((unsigned long int) 1)
5989857Sobrien
6089857Sobrien/* If this bit is not set, then + and ? are operators, and \+ and \? are
6189857Sobrien     literals.
6289857Sobrien   If set, then \+ and \? are operators and + and ? are literals.  */
6389857Sobrien#define RE_BK_PLUS_QM (RE_BACKSLASH_ESCAPE_IN_LISTS << 1)
6489857Sobrien
6589857Sobrien/* If this bit is set, then character classes are supported.  They are:
6689857Sobrien     [:alpha:], [:upper:], [:lower:],  [:digit:], [:alnum:], [:xdigit:],
6789857Sobrien     [:space:], [:print:], [:punct:], [:graph:], and [:cntrl:].
6889857Sobrien   If not set, then character classes are not supported.  */
6989857Sobrien#define RE_CHAR_CLASSES (RE_BK_PLUS_QM << 1)
7089857Sobrien
7189857Sobrien/* If this bit is set, then ^ and $ are always anchors (outside bracket
7289857Sobrien     expressions, of course).
7389857Sobrien   If this bit is not set, then it depends:
7489857Sobrien        ^  is an anchor if it is at the beginning of a regular
7589857Sobrien           expression or after an open-group or an alternation operator;
7689857Sobrien        $  is an anchor if it is at the end of a regular expression, or
7789857Sobrien           before a close-group or an alternation operator.
7889857Sobrien
7989857Sobrien   This bit could be (re)combined with RE_CONTEXT_INDEP_OPS, because
8089857Sobrien   POSIX draft 11.2 says that * etc. in leading positions is undefined.
8189857Sobrien   We already implemented a previous draft which made those constructs
8289857Sobrien   invalid, though, so we haven't changed the code back.  */
8389857Sobrien#define RE_CONTEXT_INDEP_ANCHORS (RE_CHAR_CLASSES << 1)
8489857Sobrien
8589857Sobrien/* If this bit is set, then special characters are always special
8689857Sobrien     regardless of where they are in the pattern.
8789857Sobrien   If this bit is not set, then special characters are special only in
8889857Sobrien     some contexts; otherwise they are ordinary.  Specifically,
8989857Sobrien     * + ? and intervals are only special when not after the beginning,
9089857Sobrien     open-group, or alternation operator.  */
9189857Sobrien#define RE_CONTEXT_INDEP_OPS (RE_CONTEXT_INDEP_ANCHORS << 1)
9289857Sobrien
9389857Sobrien/* If this bit is set, then *, +, ?, and { cannot be first in an re or
9489857Sobrien     immediately after an alternation or begin-group operator.  */
9589857Sobrien#define RE_CONTEXT_INVALID_OPS (RE_CONTEXT_INDEP_OPS << 1)
9689857Sobrien
9789857Sobrien/* If this bit is set, then . matches newline.
9889857Sobrien   If not set, then it doesn't.  */
9989857Sobrien#define RE_DOT_NEWLINE (RE_CONTEXT_INVALID_OPS << 1)
10089857Sobrien
10189857Sobrien/* If this bit is set, then . doesn't match NUL.
10289857Sobrien   If not set, then it does.  */
10389857Sobrien#define RE_DOT_NOT_NULL (RE_DOT_NEWLINE << 1)
10489857Sobrien
10589857Sobrien/* If this bit is set, nonmatching lists [^...] do not match newline.
10689857Sobrien   If not set, they do.  */
10789857Sobrien#define RE_HAT_LISTS_NOT_NEWLINE (RE_DOT_NOT_NULL << 1)
10889857Sobrien
10989857Sobrien/* If this bit is set, either \{...\} or {...} defines an
11089857Sobrien     interval, depending on RE_NO_BK_BRACES.
11189857Sobrien   If not set, \{, \}, {, and } are literals.  */
11289857Sobrien#define RE_INTERVALS (RE_HAT_LISTS_NOT_NEWLINE << 1)
11389857Sobrien
11489857Sobrien/* If this bit is set, +, ? and | aren't recognized as operators.
11589857Sobrien   If not set, they are.  */
11689857Sobrien#define RE_LIMITED_OPS (RE_INTERVALS << 1)
11789857Sobrien
11889857Sobrien/* If this bit is set, newline is an alternation operator.
11989857Sobrien   If not set, newline is literal.  */
12089857Sobrien#define RE_NEWLINE_ALT (RE_LIMITED_OPS << 1)
12189857Sobrien
12289857Sobrien/* If this bit is set, then `{...}' defines an interval, and \{ and \}
12389857Sobrien     are literals.
12489857Sobrien  If not set, then `\{...\}' defines an interval.  */
12589857Sobrien#define RE_NO_BK_BRACES (RE_NEWLINE_ALT << 1)
12689857Sobrien
12789857Sobrien/* If this bit is set, (...) defines a group, and \( and \) are literals.
12889857Sobrien   If not set, \(...\) defines a group, and ( and ) are literals.  */
12989857Sobrien#define RE_NO_BK_PARENS (RE_NO_BK_BRACES << 1)
13089857Sobrien
13189857Sobrien/* If this bit is set, then \<digit> matches <digit>.
13289857Sobrien   If not set, then \<digit> is a back-reference.  */
13389857Sobrien#define RE_NO_BK_REFS (RE_NO_BK_PARENS << 1)
13489857Sobrien
13589857Sobrien/* If this bit is set, then | is an alternation operator, and \| is literal.
13689857Sobrien   If not set, then \| is an alternation operator, and | is literal.  */
13789857Sobrien#define RE_NO_BK_VBAR (RE_NO_BK_REFS << 1)
13889857Sobrien
13989857Sobrien/* If this bit is set, then an ending range point collating higher
14089857Sobrien     than the starting range point, as in [z-a], is invalid.
14189857Sobrien   If not set, then when ending range point collates higher than the
14289857Sobrien     starting range point, the range is ignored.  */
14389857Sobrien#define RE_NO_EMPTY_RANGES (RE_NO_BK_VBAR << 1)
14489857Sobrien
14589857Sobrien/* If this bit is set, then an unmatched ) is ordinary.
14689857Sobrien   If not set, then an unmatched ) is invalid.  */
14789857Sobrien#define RE_UNMATCHED_RIGHT_PAREN_ORD (RE_NO_EMPTY_RANGES << 1)
14889857Sobrien
14989857Sobrien/* If this bit is set, succeed as soon as we match the whole pattern,
15089857Sobrien   without further backtracking.  */
15189857Sobrien#define RE_NO_POSIX_BACKTRACKING (RE_UNMATCHED_RIGHT_PAREN_ORD << 1)
15289857Sobrien
15389857Sobrien/* If this bit is set, do not process the GNU regex operators.
15489857Sobrien   If not set, then the GNU regex operators are recognized. */
15589857Sobrien#define RE_NO_GNU_OPS (RE_NO_POSIX_BACKTRACKING << 1)
15689857Sobrien
15789857Sobrien/* If this bit is set, turn on internal regex debugging.
15889857Sobrien   If not set, and debugging was on, turn it off.
15989857Sobrien   This only works if regex.c is compiled -DDEBUG.
16089857Sobrien   We define this bit always, so that all that's needed to turn on
16189857Sobrien   debugging is to recompile regex.c; the calling code can always have
16289857Sobrien   this bit set, and it won't affect anything in the normal case. */
16389857Sobrien#define RE_DEBUG (RE_NO_GNU_OPS << 1)
16489857Sobrien
16589857Sobrien/* If this bit is set, a syntactically invalid interval is treated as
16689857Sobrien   a string of ordinary characters.  For example, the ERE 'a{1' is
16789857Sobrien   treated as 'a\{1'.  */
16889857Sobrien#define RE_INVALID_INTERVAL_ORD (RE_DEBUG << 1)
16989857Sobrien
17089857Sobrien/* This global variable defines the particular regexp syntax to use (for
17189857Sobrien   some interfaces).  When a regexp is compiled, the syntax used is
17289857Sobrien   stored in the pattern buffer, so changing this does not affect
17389857Sobrien   already-compiled regexps.  */
17489857Sobrienextern reg_syntax_t re_syntax_options;
17589857Sobrien
17689857Sobrien/* Define combinations of the above bits for the standard possibilities.
17789857Sobrien   (The [[[ comments delimit what gets put into the Texinfo file, so
17889857Sobrien   don't delete them!)  */
17989857Sobrien/* [[[begin syntaxes]]] */
18089857Sobrien#define RE_SYNTAX_EMACS 0
18189857Sobrien
18289857Sobrien#define RE_SYNTAX_AWK							\
18389857Sobrien  (RE_BACKSLASH_ESCAPE_IN_LISTS   | RE_DOT_NOT_NULL			\
18489857Sobrien   | RE_NO_BK_PARENS              | RE_NO_BK_REFS			\
18589857Sobrien   | RE_NO_BK_VBAR                | RE_NO_EMPTY_RANGES			\
18689857Sobrien   | RE_DOT_NEWLINE		  | RE_CONTEXT_INDEP_ANCHORS		\
18789857Sobrien   | RE_UNMATCHED_RIGHT_PAREN_ORD | RE_NO_GNU_OPS)
18889857Sobrien
18989857Sobrien#define RE_SYNTAX_GNU_AWK						\
19089857Sobrien  ((RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS | RE_DEBUG)	\
19189857Sobrien   & ~(RE_DOT_NOT_NULL | RE_INTERVALS | RE_CONTEXT_INDEP_OPS))
19289857Sobrien
19389857Sobrien#define RE_SYNTAX_POSIX_AWK 						\
19489857Sobrien  (RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS		\
19589857Sobrien   | RE_INTERVALS	    | RE_NO_GNU_OPS)
19689857Sobrien
19789857Sobrien#define RE_SYNTAX_GREP							\
19889857Sobrien  (RE_BK_PLUS_QM              | RE_CHAR_CLASSES				\
19989857Sobrien   | RE_HAT_LISTS_NOT_NEWLINE | RE_INTERVALS				\
20089857Sobrien   | RE_NEWLINE_ALT)
20189857Sobrien
20289857Sobrien#define RE_SYNTAX_EGREP							\
20389857Sobrien  (RE_CHAR_CLASSES        | RE_CONTEXT_INDEP_ANCHORS			\
20489857Sobrien   | RE_CONTEXT_INDEP_OPS | RE_HAT_LISTS_NOT_NEWLINE			\
20589857Sobrien   | RE_NEWLINE_ALT       | RE_NO_BK_PARENS				\
20689857Sobrien   | RE_NO_BK_VBAR)
20789857Sobrien
20889857Sobrien#define RE_SYNTAX_POSIX_EGREP						\
20989857Sobrien  (RE_SYNTAX_EGREP | RE_INTERVALS | RE_NO_BK_BRACES			\
21089857Sobrien   | RE_INVALID_INTERVAL_ORD)
21189857Sobrien
21289857Sobrien/* P1003.2/D11.2, section 4.20.7.1, lines 5078ff.  */
21389857Sobrien#define RE_SYNTAX_ED RE_SYNTAX_POSIX_BASIC
21489857Sobrien
21589857Sobrien#define RE_SYNTAX_SED RE_SYNTAX_POSIX_BASIC
21689857Sobrien
21789857Sobrien/* Syntax bits common to both basic and extended POSIX regex syntax.  */
21889857Sobrien#define _RE_SYNTAX_POSIX_COMMON						\
21989857Sobrien  (RE_CHAR_CLASSES | RE_DOT_NEWLINE      | RE_DOT_NOT_NULL		\
22089857Sobrien   | RE_INTERVALS  | RE_NO_EMPTY_RANGES)
22189857Sobrien
22289857Sobrien#define RE_SYNTAX_POSIX_BASIC						\
22389857Sobrien  (_RE_SYNTAX_POSIX_COMMON | RE_BK_PLUS_QM)
22489857Sobrien
22589857Sobrien/* Differs from ..._POSIX_BASIC only in that RE_BK_PLUS_QM becomes
22689857Sobrien   RE_LIMITED_OPS, i.e., \? \+ \| are not recognized.  Actually, this
22789857Sobrien   isn't minimal, since other operators, such as \`, aren't disabled.  */
22889857Sobrien#define RE_SYNTAX_POSIX_MINIMAL_BASIC					\
22989857Sobrien  (_RE_SYNTAX_POSIX_COMMON | RE_LIMITED_OPS)
23089857Sobrien
23189857Sobrien#define RE_SYNTAX_POSIX_EXTENDED					\
23289857Sobrien  (_RE_SYNTAX_POSIX_COMMON  | RE_CONTEXT_INDEP_ANCHORS			\
23389857Sobrien   | RE_CONTEXT_INDEP_OPS   | RE_NO_BK_BRACES				\
23489857Sobrien   | RE_NO_BK_PARENS        | RE_NO_BK_VBAR				\
23589857Sobrien   | RE_CONTEXT_INVALID_OPS | RE_UNMATCHED_RIGHT_PAREN_ORD)
23689857Sobrien
23789857Sobrien/* Differs from ..._POSIX_EXTENDED in that RE_CONTEXT_INDEP_OPS is
23889857Sobrien   removed and RE_NO_BK_REFS is added.  */
23989857Sobrien#define RE_SYNTAX_POSIX_MINIMAL_EXTENDED				\
24089857Sobrien  (_RE_SYNTAX_POSIX_COMMON  | RE_CONTEXT_INDEP_ANCHORS			\
24189857Sobrien   | RE_CONTEXT_INVALID_OPS | RE_NO_BK_BRACES				\
24289857Sobrien   | RE_NO_BK_PARENS        | RE_NO_BK_REFS				\
24389857Sobrien   | RE_NO_BK_VBAR	    | RE_UNMATCHED_RIGHT_PAREN_ORD)
24489857Sobrien/* [[[end syntaxes]]] */
24589857Sobrien
24689857Sobrien/* Maximum number of duplicates an interval can allow.  Some systems
24789857Sobrien   (erroneously) define this in other header files, but we want our
24889857Sobrien   value, so remove any previous define.  */
24989857Sobrien#ifdef RE_DUP_MAX
25089857Sobrien# undef RE_DUP_MAX
25189857Sobrien#endif
25289857Sobrien/* If sizeof(int) == 2, then ((1 << 15) - 1) overflows.  */
25389857Sobrien#define RE_DUP_MAX (0x7fff)
25489857Sobrien
25589857Sobrien
25689857Sobrien/* POSIX `cflags' bits (i.e., information for `regcomp').  */
25789857Sobrien
25889857Sobrien/* If this bit is set, then use extended regular expression syntax.
25989857Sobrien   If not set, then use basic regular expression syntax.  */
26089857Sobrien#define REG_EXTENDED 1
26189857Sobrien
26289857Sobrien/* If this bit is set, then ignore case when matching.
26389857Sobrien   If not set, then case is significant.  */
26489857Sobrien#define REG_ICASE (REG_EXTENDED << 1)
26589857Sobrien
26689857Sobrien/* If this bit is set, then anchors do not match at newline
26789857Sobrien     characters in the string.
26889857Sobrien   If not set, then anchors do match at newlines.  */
26989857Sobrien#define REG_NEWLINE (REG_ICASE << 1)
27089857Sobrien
27189857Sobrien/* If this bit is set, then report only success or fail in regexec.
27289857Sobrien   If not set, then returns differ between not matching and errors.  */
27389857Sobrien#define REG_NOSUB (REG_NEWLINE << 1)
27489857Sobrien
27589857Sobrien
27689857Sobrien/* POSIX `eflags' bits (i.e., information for regexec).  */
27789857Sobrien
27889857Sobrien/* If this bit is set, then the beginning-of-line operator doesn't match
27989857Sobrien     the beginning of the string (presumably because it's not the
28089857Sobrien     beginning of a line).
28189857Sobrien   If not set, then the beginning-of-line operator does match the
28289857Sobrien     beginning of the string.  */
28389857Sobrien#define REG_NOTBOL 1
28489857Sobrien
28589857Sobrien/* Like REG_NOTBOL, except for the end-of-line.  */
28689857Sobrien#define REG_NOTEOL (1 << 1)
28789857Sobrien
28889857Sobrien
28989857Sobrien/* If any error codes are removed, changed, or added, update the
29089857Sobrien   `re_error_msg' table in regex.c.  */
29189857Sobrientypedef enum
29289857Sobrien{
29389857Sobrien#ifdef _XOPEN_SOURCE
29489857Sobrien  REG_ENOSYS = -1,	/* This will never happen for this implementation.  */
29589857Sobrien#endif
29689857Sobrien
29789857Sobrien  REG_NOERROR = 0,	/* Success.  */
29889857Sobrien  REG_NOMATCH,		/* Didn't find a match (for regexec).  */
29989857Sobrien
30089857Sobrien  /* POSIX regcomp return error codes.  (In the order listed in the
30189857Sobrien     standard.)  */
30289857Sobrien  REG_BADPAT,		/* Invalid pattern.  */
30389857Sobrien  REG_ECOLLATE,		/* Not implemented.  */
30489857Sobrien  REG_ECTYPE,		/* Invalid character class name.  */
30589857Sobrien  REG_EESCAPE,		/* Trailing backslash.  */
30689857Sobrien  REG_ESUBREG,		/* Invalid back reference.  */
30789857Sobrien  REG_EBRACK,		/* Unmatched left bracket.  */
30889857Sobrien  REG_EPAREN,		/* Parenthesis imbalance.  */
30989857Sobrien  REG_EBRACE,		/* Unmatched \{.  */
31089857Sobrien  REG_BADBR,		/* Invalid contents of \{\}.  */
31189857Sobrien  REG_ERANGE,		/* Invalid range end.  */
31289857Sobrien  REG_ESPACE,		/* Ran out of memory.  */
31389857Sobrien  REG_BADRPT,		/* No preceding re for repetition op.  */
31489857Sobrien
31589857Sobrien  /* Error codes we've added.  */
31689857Sobrien  REG_EEND,		/* Premature end.  */
31789857Sobrien  REG_ESIZE,		/* Compiled pattern bigger than 2^16 bytes.  */
31889857Sobrien  REG_ERPAREN		/* Unmatched ) or \); not returned from regcomp.  */
31989857Sobrien} reg_errcode_t;
32089857Sobrien
32189857Sobrien/* This data structure represents a compiled pattern.  Before calling
32289857Sobrien   the pattern compiler, the fields `buffer', `allocated', `fastmap',
32389857Sobrien   `translate', and `no_sub' can be set.  After the pattern has been
32489857Sobrien   compiled, the `re_nsub' field is available.  All other fields are
32589857Sobrien   private to the regex routines.  */
32689857Sobrien
32789857Sobrien#ifndef RE_TRANSLATE_TYPE
32889857Sobrien# define RE_TRANSLATE_TYPE char *
32989857Sobrien#endif
33089857Sobrien
33189857Sobrienstruct re_pattern_buffer
33289857Sobrien{
33389857Sobrien/* [[[begin pattern_buffer]]] */
33489857Sobrien	/* Space that holds the compiled pattern.  It is declared as
33589857Sobrien          `unsigned char *' because its elements are
33689857Sobrien           sometimes used as array indexes.  */
33789857Sobrien  unsigned char *buffer;
33889857Sobrien
33989857Sobrien	/* Number of bytes to which `buffer' points.  */
34089857Sobrien  unsigned long int allocated;
34189857Sobrien
34289857Sobrien	/* Number of bytes actually used in `buffer'.  */
34389857Sobrien  unsigned long int used;
34489857Sobrien
34589857Sobrien        /* Syntax setting with which the pattern was compiled.  */
34689857Sobrien  reg_syntax_t syntax;
34789857Sobrien
34889857Sobrien        /* Pointer to a fastmap, if any, otherwise zero.  re_search uses
34989857Sobrien           the fastmap, if there is one, to skip over impossible
35089857Sobrien           starting points for matches.  */
35189857Sobrien  char *fastmap;
35289857Sobrien
35389857Sobrien        /* Either a translate table to apply to all characters before
35489857Sobrien           comparing them, or zero for no translation.  The translation
35589857Sobrien           is applied to a pattern when it is compiled and to a string
35689857Sobrien           when it is matched.  */
35789857Sobrien  RE_TRANSLATE_TYPE translate;
35889857Sobrien
35989857Sobrien	/* Number of subexpressions found by the compiler.  */
36089857Sobrien  size_t re_nsub;
36189857Sobrien
36289857Sobrien        /* Zero if this pattern cannot match the empty string, one else.
36389857Sobrien           Well, in truth it's used only in `re_search_2', to see
36489857Sobrien           whether or not we should use the fastmap, so we don't set
36589857Sobrien           this absolutely perfectly; see `re_compile_fastmap' (the
36689857Sobrien           `duplicate' case).  */
36789857Sobrien  unsigned can_be_null : 1;
36889857Sobrien
36989857Sobrien        /* If REGS_UNALLOCATED, allocate space in the `regs' structure
37089857Sobrien             for `max (RE_NREGS, re_nsub + 1)' groups.
37189857Sobrien           If REGS_REALLOCATE, reallocate space if necessary.
37289857Sobrien           If REGS_FIXED, use what's there.  */
37389857Sobrien#define REGS_UNALLOCATED 0
37489857Sobrien#define REGS_REALLOCATE 1
37589857Sobrien#define REGS_FIXED 2
37689857Sobrien  unsigned regs_allocated : 2;
37789857Sobrien
37889857Sobrien        /* Set to zero when `regex_compile' compiles a pattern; set to one
37989857Sobrien           by `re_compile_fastmap' if it updates the fastmap.  */
38089857Sobrien  unsigned fastmap_accurate : 1;
38189857Sobrien
38289857Sobrien        /* If set, `re_match_2' does not return information about
38389857Sobrien           subexpressions.  */
38489857Sobrien  unsigned no_sub : 1;
38589857Sobrien
38689857Sobrien        /* If set, a beginning-of-line anchor doesn't match at the
38789857Sobrien           beginning of the string.  */
38889857Sobrien  unsigned not_bol : 1;
38989857Sobrien
39089857Sobrien        /* Similarly for an end-of-line anchor.  */
39189857Sobrien  unsigned not_eol : 1;
39289857Sobrien
39389857Sobrien        /* If true, an anchor at a newline matches.  */
39489857Sobrien  unsigned newline_anchor : 1;
39589857Sobrien
39689857Sobrien/* [[[end pattern_buffer]]] */
39789857Sobrien};
39889857Sobrien
39989857Sobrientypedef struct re_pattern_buffer regex_t;
40089857Sobrien
40189857Sobrien/* Type for byte offsets within the string.  POSIX mandates this.  */
40289857Sobrientypedef int regoff_t;
40389857Sobrien
40489857Sobrien
40589857Sobrien/* This is the structure we store register match data in.  See
40689857Sobrien   regex.texinfo for a full description of what registers match.  */
40789857Sobrienstruct re_registers
40889857Sobrien{
40989857Sobrien  unsigned num_regs;
41089857Sobrien  regoff_t *start;
41189857Sobrien  regoff_t *end;
41289857Sobrien};
41389857Sobrien
41489857Sobrien
41589857Sobrien/* If `regs_allocated' is REGS_UNALLOCATED in the pattern buffer,
41689857Sobrien   `re_match_2' returns information about at least this many registers
41789857Sobrien   the first time a `regs' structure is passed.  */
41889857Sobrien#ifndef RE_NREGS
41989857Sobrien# define RE_NREGS 30
42089857Sobrien#endif
42189857Sobrien
42289857Sobrien
42389857Sobrien/* POSIX specification for registers.  Aside from the different names than
42489857Sobrien   `re_registers', POSIX uses an array of structures, instead of a
42589857Sobrien   structure of arrays.  */
42689857Sobrientypedef struct
42789857Sobrien{
42889857Sobrien  regoff_t rm_so;  /* Byte offset from string's start to substring's start.  */
42989857Sobrien  regoff_t rm_eo;  /* Byte offset from string's start to substring's end.  */
43089857Sobrien} regmatch_t;
43189857Sobrien
43289857Sobrien/* Declarations for routines.  */
43389857Sobrien
43489857Sobrien/* To avoid duplicating every routine declaration -- once with a
43589857Sobrien   prototype (if we are ANSI), and once without (if we aren't) -- we
43689857Sobrien   use the following macro to declare argument types.  This
43789857Sobrien   unfortunately clutters up the declarations a bit, but I think it's
43889857Sobrien   worth it.  */
43989857Sobrien
44089857Sobrien/* Sets the current default syntax to SYNTAX, and return the old syntax.
44189857Sobrien   You can also simply assign to the `re_syntax_options' variable.  */
442218822Sdimextern reg_syntax_t re_set_syntax (reg_syntax_t syntax);
44389857Sobrien
44489857Sobrien/* Compile the regular expression PATTERN, with length LENGTH
44589857Sobrien   and syntax given by the global `re_syntax_options', into the buffer
44689857Sobrien   BUFFER.  Return NULL if successful, and an error string if not.  */
447218822Sdimextern const char *re_compile_pattern (const char *pattern, size_t length,
448218822Sdim                                       struct re_pattern_buffer *buffer);
44989857Sobrien
45089857Sobrien
45189857Sobrien/* Compile a fastmap for the compiled pattern in BUFFER; used to
45289857Sobrien   accelerate searches.  Return 0 if successful and -2 if was an
45389857Sobrien   internal error.  */
454218822Sdimextern int re_compile_fastmap (struct re_pattern_buffer *buffer);
45589857Sobrien
45689857Sobrien
45789857Sobrien/* Search in the string STRING (with length LENGTH) for the pattern
45889857Sobrien   compiled into BUFFER.  Start searching at position START, for RANGE
45989857Sobrien   characters.  Return the starting position of the match, -1 for no
46089857Sobrien   match, or -2 for an internal error.  Also return register
46189857Sobrien   information in REGS (if REGS and BUFFER->no_sub are nonzero).  */
462218822Sdimextern int re_search (struct re_pattern_buffer *buffer, const char *string,
463218822Sdim                      int length, int start, int range,
464218822Sdim                      struct re_registers *regs);
46589857Sobrien
46689857Sobrien
46789857Sobrien/* Like `re_search', but search in the concatenation of STRING1 and
46889857Sobrien   STRING2.  Also, stop searching at index START + STOP.  */
469218822Sdimextern int re_search_2 (struct re_pattern_buffer *buffer, const char *string1,
470218822Sdim                        int length1, const char *string2, int length2,
471218822Sdim                        int start, int range, struct re_registers *regs,
472218822Sdim                        int stop);
47389857Sobrien
47489857Sobrien
47589857Sobrien/* Like `re_search', but return how many characters in STRING the regexp
47689857Sobrien   in BUFFER matched, starting at position START.  */
477218822Sdimextern int re_match (struct re_pattern_buffer *buffer, const char *string,
478218822Sdim                     int length, int start, struct re_registers *regs);
47989857Sobrien
48089857Sobrien
48189857Sobrien/* Relates to `re_match' as `re_search_2' relates to `re_search'.  */
482218822Sdimextern int re_match_2 (struct re_pattern_buffer *buffer, const char *string1,
483218822Sdim                       int length1, const char *string2, int length2,
484218822Sdim                       int start, struct re_registers *regs, int stop);
48589857Sobrien
48689857Sobrien
48789857Sobrien/* Set REGS to hold NUM_REGS registers, storing them in STARTS and
48889857Sobrien   ENDS.  Subsequent matches using BUFFER and REGS will use this memory
48989857Sobrien   for recording register information.  STARTS and ENDS must be
49089857Sobrien   allocated with malloc, and must each be at least `NUM_REGS * sizeof
49189857Sobrien   (regoff_t)' bytes long.
49289857Sobrien
49389857Sobrien   If NUM_REGS == 0, then subsequent matches should allocate their own
49489857Sobrien   register data.
49589857Sobrien
49689857Sobrien   Unless this function is called, the first search or match using
49789857Sobrien   PATTERN_BUFFER will allocate its own register data, without
49889857Sobrien   freeing the old data.  */
499218822Sdimextern void re_set_registers (struct re_pattern_buffer *buffer,
500218822Sdim                              struct re_registers *regs,
501218822Sdim                              unsigned num_regs, regoff_t *starts,
502218822Sdim                              regoff_t *ends);
50389857Sobrien
50489857Sobrien#if defined _REGEX_RE_COMP || defined _LIBC
50589857Sobrien# ifndef _CRAY
50689857Sobrien/* 4.2 bsd compatibility.  */
507218822Sdimextern char *re_comp (const char *);
508218822Sdimextern int re_exec (const char *);
50989857Sobrien# endif
51089857Sobrien#endif
51189857Sobrien
51289857Sobrien/* GCC 2.95 and later have "__restrict"; C99 compilers have
51389857Sobrien   "restrict", and "configure" may have defined "restrict".  */
51489857Sobrien#ifndef __restrict
51589857Sobrien# if ! (2 < __GNUC__ || (2 == __GNUC__ && 95 <= __GNUC_MINOR__))
51689857Sobrien#  if defined restrict || 199901L <= __STDC_VERSION__
51789857Sobrien#   define __restrict restrict
51889857Sobrien#  else
51989857Sobrien#   define __restrict
52089857Sobrien#  endif
52189857Sobrien# endif
52289857Sobrien#endif
52389857Sobrien
524104834Sobrien/* GCC 3.1 and later support declaring arrays as non-overlapping
525104834Sobrien   using the syntax array_name[restrict]  */
526104834Sobrien#ifndef __restrict_arr
527104834Sobrien# if ! (3 < __GNUC__ || (3 == __GNUC__ && 1 <= __GNUC_MINOR__)) || defined (__GNUG__)
528104834Sobrien#  define __restrict_arr
529104834Sobrien# else
530104834Sobrien#  define __restrict_arr __restrict
531104834Sobrien# endif
532104834Sobrien#endif
533104834Sobrien
53489857Sobrien/* POSIX compatibility.  */
535218822Sdimextern int regcomp (regex_t *__restrict __preg,
536218822Sdim                    const char *__restrict __pattern,
537218822Sdim                    int __cflags);
53889857Sobrien
539218822Sdim#if (__GNUC__)
540218822Sdim__extension__
541218822Sdim#endif
542218822Sdimextern int regexec (const regex_t *__restrict __preg,
543218822Sdim                    const char *__restrict __string, size_t __nmatch,
544218822Sdim                    regmatch_t __pmatch[__restrict_arr],
545218822Sdim                    int __eflags);
54689857Sobrien
547218822Sdimextern size_t regerror (int __errcode, const regex_t *__preg,
548218822Sdim                        char *__errbuf, size_t __errbuf_size);
54989857Sobrien
550218822Sdimextern void regfree (regex_t *__preg);
55189857Sobrien
55289857Sobrien
55389857Sobrien#ifdef __cplusplus
55489857Sobrien}
55589857Sobrien#endif	/* C++ */
55689857Sobrien
55789857Sobrien#endif /* regex.h */
55889857Sobrien
55989857Sobrien/*
56089857SobrienLocal variables:
56189857Sobrienmake-backup-files: t
56289857Sobrienversion-control: t
56389857Sobrientrim-versions-without-asking: nil
56489857SobrienEnd:
56589857Sobrien*/
566