regex.h revision 146040
114339Sssadetsky/* Definitions for data structures and routines for the regular
214339Sssadetsky   expression library.
314339Sssadetsky   Copyright (C) 1985,1989-93,1995-98,2000,2001,2002,2003
414339Sssadetsky   Free Software Foundation, Inc.
514339Sssadetsky   This file is part of the GNU C Library.
614339Sssadetsky
714339Sssadetsky   The GNU C Library is free software; you can redistribute it and/or
814339Sssadetsky   modify it under the terms of the GNU Lesser General Public
914339Sssadetsky   License as published by the Free Software Foundation; either
1014339Sssadetsky   version 2.1 of the License, or (at your option) any later version.
1114339Sssadetsky
1214339Sssadetsky   The GNU C Library is distributed in the hope that it will be useful,
1314339Sssadetsky   but WITHOUT ANY WARRANTY; without even the implied warranty of
1414339Sssadetsky   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
1514339Sssadetsky   Lesser General Public License for more details.
1614339Sssadetsky
1714339Sssadetsky   You should have received a copy of the GNU Lesser General Public
1814339Sssadetsky   License along with the GNU C Library; if not, write to the Free
1914339Sssadetsky   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
2014339Sssadetsky   02111-1307 USA.  */
2114339Sssadetsky
2214339Sssadetsky#ifndef _REGEX_H
2314339Sssadetsky#define _REGEX_H 1
2414339Sssadetsky
2514339Sssadetsky#include <sys/types.h>
2614339Sssadetsky
2714339Sssadetsky/* Allow the use in C++ code.  */
2814339Sssadetsky#ifdef __cplusplus
2914339Sssadetskyextern "C" {
3014339Sssadetsky#endif
3114339Sssadetsky
3214339Sssadetsky/* POSIX says that <sys/types.h> must be included (by the caller) before
3314339Sssadetsky   <regex.h>.  */
3414339Sssadetsky
3514339Sssadetsky#if !defined _POSIX_C_SOURCE && !defined _POSIX_SOURCE && defined VMS
3614339Sssadetsky/* VMS doesn't have `size_t' in <sys/types.h>, even though POSIX says it
3714339Sssadetsky   should be there.  */
3815939Sssadetsky# include <stddef.h>
3914339Sssadetsky#endif
4014339Sssadetsky
4114339Sssadetsky/* The following two types have to be signed and unsigned integer type
4214339Sssadetsky   wide enough to hold a value of a pointer.  For most ANSI compilers
4314339Sssadetsky   ptrdiff_t and size_t should be likely OK.  Still size of these two
4414339Sssadetsky   types is 2 for Microsoft C.  Ugh... */
4514339Sssadetskytypedef long int s_reg_t;
4614339Sssadetskytypedef unsigned long int active_reg_t;
4714339Sssadetsky
4814339Sssadetsky/* The following bits are used to determine the regexp syntax we
4914339Sssadetsky   recognize.  The set/not-set meanings are chosen so that Emacs syntax
5014339Sssadetsky   remains the value 0.  The bits are given in alphabetical order, and
5114339Sssadetsky   the definitions shifted by one from the previous bit; thus, when we
5214339Sssadetsky   add or remove a bit, only one other definition need change.  */
5314339Sssadetskytypedef unsigned long int reg_syntax_t;
5414339Sssadetsky
5514339Sssadetsky/* If this bit is not set, then \ inside a bracket expression is literal.
5614339Sssadetsky   If set, then such a \ quotes the following character.  */
5714339Sssadetsky#define RE_BACKSLASH_ESCAPE_IN_LISTS ((unsigned long int) 1)
5814339Sssadetsky
5914339Sssadetsky/* If this bit is not set, then + and ? are operators, and \+ and \? are
6014339Sssadetsky     literals.
6114339Sssadetsky   If set, then \+ and \? are operators and + and ? are literals.  */
6214339Sssadetsky#define RE_BK_PLUS_QM (RE_BACKSLASH_ESCAPE_IN_LISTS << 1)
6314339Sssadetsky
6414339Sssadetsky/* If this bit is set, then character classes are supported.  They are:
6514339Sssadetsky     [:alpha:], [:upper:], [:lower:],  [:digit:], [:alnum:], [:xdigit:],
6614339Sssadetsky     [:space:], [:print:], [:punct:], [:graph:], and [:cntrl:].
6714339Sssadetsky   If not set, then character classes are not supported.  */
6814339Sssadetsky#define RE_CHAR_CLASSES (RE_BK_PLUS_QM << 1)
6914339Sssadetsky
7014339Sssadetsky/* If this bit is set, then ^ and $ are always anchors (outside bracket
7114339Sssadetsky     expressions, of course).
7214339Sssadetsky   If this bit is not set, then it depends:
7314339Sssadetsky        ^  is an anchor if it is at the beginning of a regular
7414339Sssadetsky           expression or after an open-group or an alternation operator;
7514339Sssadetsky        $  is an anchor if it is at the end of a regular expression, or
7614339Sssadetsky           before a close-group or an alternation operator.
7714339Sssadetsky
7814339Sssadetsky   This bit could be (re)combined with RE_CONTEXT_INDEP_OPS, because
7914339Sssadetsky   POSIX draft 11.2 says that * etc. in leading positions is undefined.
8014339Sssadetsky   We already implemented a previous draft which made those constructs
8114339Sssadetsky   invalid, though, so we haven't changed the code back.  */
8214339Sssadetsky#define RE_CONTEXT_INDEP_ANCHORS (RE_CHAR_CLASSES << 1)
8314339Sssadetsky
8414339Sssadetsky/* If this bit is set, then special characters are always special
8514339Sssadetsky     regardless of where they are in the pattern.
8614339Sssadetsky   If this bit is not set, then special characters are special only in
8714339Sssadetsky     some contexts; otherwise they are ordinary.  Specifically,
8814339Sssadetsky     * + ? and intervals are only special when not after the beginning,
8914339Sssadetsky     open-group, or alternation operator.  */
9014339Sssadetsky#define RE_CONTEXT_INDEP_OPS (RE_CONTEXT_INDEP_ANCHORS << 1)
9115939Sssadetsky
9215939Sssadetsky/* If this bit is set, then *, +, ?, and { cannot be first in an re or
9315939Sssadetsky     immediately after an alternation or begin-group operator.  */
9415939Sssadetsky#define RE_CONTEXT_INVALID_OPS (RE_CONTEXT_INDEP_OPS << 1)
9515939Sssadetsky
9615939Sssadetsky/* If this bit is set, then . matches newline.
9715939Sssadetsky   If not set, then it doesn't.  */
9815939Sssadetsky#define RE_DOT_NEWLINE (RE_CONTEXT_INVALID_OPS << 1)
9915939Sssadetsky
10014389Ssimonis/* If this bit is set, then . doesn't match NUL.
10114339Sssadetsky   If not set, then it does.  */
10214339Sssadetsky#define RE_DOT_NOT_NULL (RE_DOT_NEWLINE << 1)
10314339Sssadetsky
10414339Sssadetsky/* If this bit is set, nonmatching lists [^...] do not match newline.
10514339Sssadetsky   If not set, they do.  */
10614389Ssimonis#define RE_HAT_LISTS_NOT_NEWLINE (RE_DOT_NOT_NULL << 1)
10714389Ssimonis
10814389Ssimonis/* If this bit is set, either \{...\} or {...} defines an
10914389Ssimonis     interval, depending on RE_NO_BK_BRACES.
11014389Ssimonis   If not set, \{, \}, {, and } are literals.  */
11114389Ssimonis#define RE_INTERVALS (RE_HAT_LISTS_NOT_NEWLINE << 1)
11214389Ssimonis
11314389Ssimonis/* If this bit is set, +, ? and | aren't recognized as operators.
11414389Ssimonis   If not set, they are.  */
11514389Ssimonis#define RE_LIMITED_OPS (RE_INTERVALS << 1)
11614389Ssimonis
11714389Ssimonis/* If this bit is set, newline is an alternation operator.
11814389Ssimonis   If not set, newline is literal.  */
11914339Sssadetsky#define RE_NEWLINE_ALT (RE_LIMITED_OPS << 1)
12014339Sssadetsky
12114339Sssadetsky/* If this bit is set, then `{...}' defines an interval, and \{ and \}
12214339Sssadetsky     are literals.
12314339Sssadetsky  If not set, then `\{...\}' defines an interval.  */
12414339Sssadetsky#define RE_NO_BK_BRACES (RE_NEWLINE_ALT << 1)
12514339Sssadetsky
12614339Sssadetsky/* If this bit is set, (...) defines a group, and \( and \) are literals.
12714339Sssadetsky   If not set, \(...\) defines a group, and ( and ) are literals.  */
12814339Sssadetsky#define RE_NO_BK_PARENS (RE_NO_BK_BRACES << 1)
12914339Sssadetsky
13014339Sssadetsky/* If this bit is set, then \<digit> matches <digit>.
13114339Sssadetsky   If not set, then \<digit> is a back-reference.  */
13214339Sssadetsky#define RE_NO_BK_REFS (RE_NO_BK_PARENS << 1)
13314339Sssadetsky
13414339Sssadetsky/* If this bit is set, then | is an alternation operator, and \| is literal.
13514339Sssadetsky   If not set, then \| is an alternation operator, and | is literal.  */
13614339Sssadetsky#define RE_NO_BK_VBAR (RE_NO_BK_REFS << 1)
13714339Sssadetsky
13814339Sssadetsky/* If this bit is set, then an ending range point collating higher
13914339Sssadetsky     than the starting range point, as in [z-a], is invalid.
14014339Sssadetsky   If not set, then when ending range point collates higher than the
14114339Sssadetsky     starting range point, the range is ignored.  */
14214339Sssadetsky#define RE_NO_EMPTY_RANGES (RE_NO_BK_VBAR << 1)
14314339Sssadetsky
14414339Sssadetsky/* If this bit is set, then an unmatched ) is ordinary.
14514339Sssadetsky   If not set, then an unmatched ) is invalid.  */
14614339Sssadetsky#define RE_UNMATCHED_RIGHT_PAREN_ORD (RE_NO_EMPTY_RANGES << 1)
14714339Sssadetsky
14814339Sssadetsky/* If this bit is set, succeed as soon as we match the whole pattern,
14914339Sssadetsky   without further backtracking.  */
15014339Sssadetsky#define RE_NO_POSIX_BACKTRACKING (RE_UNMATCHED_RIGHT_PAREN_ORD << 1)
15114339Sssadetsky
15214339Sssadetsky/* If this bit is set, do not process the GNU regex operators.
15314339Sssadetsky   If not set, then the GNU regex operators are recognized. */
15414339Sssadetsky#define RE_NO_GNU_OPS (RE_NO_POSIX_BACKTRACKING << 1)
15514339Sssadetsky
15614339Sssadetsky/* If this bit is set, turn on internal regex debugging.
15714339Sssadetsky   If not set, and debugging was on, turn it off.
15814339Sssadetsky   This only works if regex.c is compiled -DDEBUG.
15914339Sssadetsky   We define this bit always, so that all that's needed to turn on
16014339Sssadetsky   debugging is to recompile regex.c; the calling code can always have
16114339Sssadetsky   this bit set, and it won't affect anything in the normal case. */
16214339Sssadetsky#define RE_DEBUG (RE_NO_GNU_OPS << 1)
16314339Sssadetsky
16414339Sssadetsky/* If this bit is set, a syntactically invalid interval is treated as
16514339Sssadetsky   a string of ordinary characters.  For example, the ERE 'a{1' is
16614339Sssadetsky   treated as 'a\{1'.  */
16714339Sssadetsky#define RE_INVALID_INTERVAL_ORD (RE_DEBUG << 1)
16814339Sssadetsky
16914339Sssadetsky/* If this bit is set, then ignore case when matching.
17014339Sssadetsky   If not set, then case is significant.  */
17114339Sssadetsky#define RE_ICASE (RE_INVALID_INTERVAL_ORD << 1)
17214339Sssadetsky
17314339Sssadetsky/* This bit is used internally like RE_CONTEXT_INDEP_ANCHORS but only
17414339Sssadetsky   for ^, because it is difficult to scan the regex backwards to find
17514339Sssadetsky   whether ^ should be special.  */
17614339Sssadetsky#define RE_CARET_ANCHORS_HERE (RE_ICASE << 1)
17714339Sssadetsky
17814339Sssadetsky/* If this bit is set, then \{ cannot be first in an bre or
17914339Sssadetsky   immediately after an alternation or begin-group operator.  */
18014339Sssadetsky#define RE_CONTEXT_INVALID_DUP (RE_CARET_ANCHORS_HERE << 1)
18114339Sssadetsky
18214339Sssadetsky/* If this bit is set, then no_sub will be set to 1 during
18314339Sssadetsky   re_compile_pattern.  */
18414339Sssadetsky#define RE_NO_SUB (RE_CONTEXT_INVALID_DUP << 1)
18514339Sssadetsky
18614339Sssadetsky/* This global variable defines the particular regexp syntax to use (for
18714339Sssadetsky   some interfaces).  When a regexp is compiled, the syntax used is
18814339Sssadetsky   stored in the pattern buffer, so changing this does not affect
18914339Sssadetsky   already-compiled regexps.  */
19014339Sssadetskyextern reg_syntax_t re_syntax_options;
19114339Sssadetsky
19214339Sssadetsky/* Define combinations of the above bits for the standard possibilities.
19314339Sssadetsky   (The [[[ comments delimit what gets put into the Texinfo file, so
19414339Sssadetsky   don't delete them!)  */
19514339Sssadetsky/* [[[begin syntaxes]]] */
19614339Sssadetsky#define RE_SYNTAX_EMACS 0
19714339Sssadetsky
19814339Sssadetsky#define RE_SYNTAX_AWK							\
19914339Sssadetsky  (RE_BACKSLASH_ESCAPE_IN_LISTS   | RE_DOT_NOT_NULL			\
20014339Sssadetsky   | RE_NO_BK_PARENS              | RE_NO_BK_REFS			\
20114339Sssadetsky   | RE_NO_BK_VBAR                | RE_NO_EMPTY_RANGES			\
20214339Sssadetsky   | RE_DOT_NEWLINE		  | RE_CONTEXT_INDEP_ANCHORS		\
20314339Sssadetsky   | RE_UNMATCHED_RIGHT_PAREN_ORD | RE_NO_GNU_OPS)
20414339Sssadetsky
20514339Sssadetsky#define RE_SYNTAX_GNU_AWK						\
20614339Sssadetsky  ((RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS | RE_DEBUG)	\
20714339Sssadetsky   & ~(RE_DOT_NOT_NULL | RE_INTERVALS | RE_CONTEXT_INDEP_OPS		\
20814339Sssadetsky       | RE_CONTEXT_INVALID_OPS ))
20914339Sssadetsky
21014339Sssadetsky#define RE_SYNTAX_POSIX_AWK 						\
21114339Sssadetsky  (RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS		\
21214339Sssadetsky   | RE_INTERVALS	    | RE_NO_GNU_OPS)
21314339Sssadetsky
21414339Sssadetsky#define RE_SYNTAX_GREP							\
21514339Sssadetsky  (RE_BK_PLUS_QM              | RE_CHAR_CLASSES				\
21614339Sssadetsky   | RE_HAT_LISTS_NOT_NEWLINE | RE_INTERVALS				\
21714339Sssadetsky   | RE_NEWLINE_ALT)
21814339Sssadetsky
21914339Sssadetsky#define RE_SYNTAX_EGREP							\
22014344Sssadetsky  (RE_CHAR_CLASSES        | RE_CONTEXT_INDEP_ANCHORS			\
22114339Sssadetsky   | RE_CONTEXT_INDEP_OPS | RE_HAT_LISTS_NOT_NEWLINE			\
22214339Sssadetsky   | RE_NEWLINE_ALT       | RE_NO_BK_PARENS				\
22314339Sssadetsky   | RE_NO_BK_VBAR)
22414339Sssadetsky
22514339Sssadetsky#define RE_SYNTAX_POSIX_EGREP						\
22614339Sssadetsky  (RE_SYNTAX_EGREP | RE_INTERVALS | RE_NO_BK_BRACES			\
22714339Sssadetsky   | RE_INVALID_INTERVAL_ORD)
22814339Sssadetsky
22914339Sssadetsky/* P1003.2/D11.2, section 4.20.7.1, lines 5078ff.  */
23014339Sssadetsky#define RE_SYNTAX_ED RE_SYNTAX_POSIX_BASIC
23114339Sssadetsky
23214339Sssadetsky#define RE_SYNTAX_SED RE_SYNTAX_POSIX_BASIC
23314339Sssadetsky
23414339Sssadetsky/* Syntax bits common to both basic and extended POSIX regex syntax.  */
23514339Sssadetsky#define _RE_SYNTAX_POSIX_COMMON						\
23614339Sssadetsky  (RE_CHAR_CLASSES | RE_DOT_NEWLINE      | RE_DOT_NOT_NULL		\
23714339Sssadetsky   | RE_INTERVALS  | RE_NO_EMPTY_RANGES)
23814339Sssadetsky
23914339Sssadetsky#define RE_SYNTAX_POSIX_BASIC						\
24014339Sssadetsky  (_RE_SYNTAX_POSIX_COMMON | RE_BK_PLUS_QM | RE_CONTEXT_INVALID_DUP)
24114339Sssadetsky
24214339Sssadetsky/* Differs from ..._POSIX_BASIC only in that RE_BK_PLUS_QM becomes
24314339Sssadetsky   RE_LIMITED_OPS, i.e., \? \+ \| are not recognized.  Actually, this
24414339Sssadetsky   isn't minimal, since other operators, such as \`, aren't disabled.  */
24514339Sssadetsky#define RE_SYNTAX_POSIX_MINIMAL_BASIC					\
24614339Sssadetsky  (_RE_SYNTAX_POSIX_COMMON | RE_LIMITED_OPS)
24714339Sssadetsky
24814339Sssadetsky#define RE_SYNTAX_POSIX_EXTENDED					\
24914339Sssadetsky  (_RE_SYNTAX_POSIX_COMMON  | RE_CONTEXT_INDEP_ANCHORS			\
25014339Sssadetsky   | RE_CONTEXT_INDEP_OPS   | RE_NO_BK_BRACES				\
25114339Sssadetsky   | RE_NO_BK_PARENS        | RE_NO_BK_VBAR				\
25214339Sssadetsky   | RE_CONTEXT_INVALID_OPS | RE_UNMATCHED_RIGHT_PAREN_ORD)
25314339Sssadetsky
25414339Sssadetsky/* Differs from ..._POSIX_EXTENDED in that RE_CONTEXT_INDEP_OPS is
25514339Sssadetsky   removed and RE_NO_BK_REFS is added.  */
25614339Sssadetsky#define RE_SYNTAX_POSIX_MINIMAL_EXTENDED				\
25714339Sssadetsky  (_RE_SYNTAX_POSIX_COMMON  | RE_CONTEXT_INDEP_ANCHORS			\
25814339Sssadetsky   | RE_CONTEXT_INVALID_OPS | RE_NO_BK_BRACES				\
25914339Sssadetsky   | RE_NO_BK_PARENS        | RE_NO_BK_REFS				\
26014339Sssadetsky   | RE_NO_BK_VBAR	    | RE_UNMATCHED_RIGHT_PAREN_ORD)
26114339Sssadetsky/* [[[end syntaxes]]] */
26214339Sssadetsky
26314339Sssadetsky/* Maximum number of duplicates an interval can allow.  Some systems
26414339Sssadetsky   (erroneously) define this in other header files, but we want our
26514339Sssadetsky   value, so remove any previous define.  */
26614339Sssadetsky#ifdef RE_DUP_MAX
26714339Sssadetsky# undef RE_DUP_MAX
26814339Sssadetsky#endif
26914339Sssadetsky/* If sizeof(int) == 2, then ((1 << 15) - 1) overflows.  */
27014339Sssadetsky#define RE_DUP_MAX (0x7fff)
27114339Sssadetsky
27214339Sssadetsky
27314339Sssadetsky/* POSIX `cflags' bits (i.e., information for `regcomp').  */
27415939Sssadetsky
27515939Sssadetsky/* If this bit is set, then use extended regular expression syntax.
27615939Sssadetsky   If not set, then use basic regular expression syntax.  */
27715939Sssadetsky#define REG_EXTENDED 1
27815939Sssadetsky
27915939Sssadetsky/* If this bit is set, then ignore case when matching.
28015939Sssadetsky   If not set, then case is significant.  */
28114339Sssadetsky#define REG_ICASE (REG_EXTENDED << 1)
28214339Sssadetsky
28314339Sssadetsky/* If this bit is set, then anchors do not match at newline
28414339Sssadetsky     characters in the string.
28514339Sssadetsky   If not set, then anchors do match at newlines.  */
28614339Sssadetsky#define REG_NEWLINE (REG_ICASE << 1)
28714339Sssadetsky
28814339Sssadetsky/* If this bit is set, then report only success or fail in regexec.
28914339Sssadetsky   If not set, then returns differ between not matching and errors.  */
29014339Sssadetsky#define REG_NOSUB (REG_NEWLINE << 1)
29114339Sssadetsky
29214339Sssadetsky
29314339Sssadetsky/* POSIX `eflags' bits (i.e., information for regexec).  */
29414339Sssadetsky
29514339Sssadetsky/* If this bit is set, then the beginning-of-line operator doesn't match
29614339Sssadetsky     the beginning of the string (presumably because it's not the
29714339Sssadetsky     beginning of a line).
29814339Sssadetsky   If not set, then the beginning-of-line operator does match the
29914339Sssadetsky     beginning of the string.  */
30014339Sssadetsky#define REG_NOTBOL 1
30114339Sssadetsky
30214339Sssadetsky/* Like REG_NOTBOL, except for the end-of-line.  */
30314339Sssadetsky#define REG_NOTEOL (1 << 1)
30414339Sssadetsky
30514339Sssadetsky/* Use PMATCH[0] to delimit the start and end of the search in the
30614339Sssadetsky   buffer.  */
30714339Sssadetsky#define REG_STARTEND (1 << 2)
30814339Sssadetsky
30914339Sssadetsky
31014339Sssadetsky/* If any error codes are removed, changed, or added, update the
31114339Sssadetsky   `re_error_msg' table in regex.c.  */
31214339Sssadetskytypedef enum
31314339Sssadetsky{
31414339Sssadetsky#ifdef _XOPEN_SOURCE
31514339Sssadetsky  REG_ENOSYS = -1,	/* This will never happen for this implementation.  */
31614339Sssadetsky#endif
31714339Sssadetsky
31814339Sssadetsky  REG_NOERROR = 0,	/* Success.  */
31914339Sssadetsky  REG_NOMATCH,		/* Didn't find a match (for regexec).  */
32014339Sssadetsky
32114339Sssadetsky  /* POSIX regcomp return error codes.  (In the order listed in the
32214339Sssadetsky     standard.)  */
32314339Sssadetsky  REG_BADPAT,		/* Invalid pattern.  */
32414339Sssadetsky  REG_ECOLLATE,		/* Inalid collating element.  */
32514339Sssadetsky  REG_ECTYPE,		/* Invalid character class name.  */
32614339Sssadetsky  REG_EESCAPE,		/* Trailing backslash.  */
32714339Sssadetsky  REG_ESUBREG,		/* Invalid back reference.  */
32814339Sssadetsky  REG_EBRACK,		/* Unmatched left bracket.  */
32914339Sssadetsky  REG_EPAREN,		/* Parenthesis imbalance.  */
33014339Sssadetsky  REG_EBRACE,		/* Unmatched \{.  */
33114339Sssadetsky  REG_BADBR,		/* Invalid contents of \{\}.  */
33214339Sssadetsky  REG_ERANGE,		/* Invalid range end.  */
33314339Sssadetsky  REG_ESPACE,		/* Ran out of memory.  */
33414339Sssadetsky  REG_BADRPT,		/* No preceding re for repetition op.  */
33514339Sssadetsky
33614339Sssadetsky  /* Error codes we've added.  */
33714339Sssadetsky  REG_EEND,		/* Premature end.  */
33814339Sssadetsky  REG_ESIZE,		/* Compiled pattern bigger than 2^16 bytes.  */
33914339Sssadetsky  REG_ERPAREN		/* Unmatched ) or \); not returned from regcomp.  */
34014339Sssadetsky} reg_errcode_t;
34114339Sssadetsky
34214339Sssadetsky/* This data structure represents a compiled pattern.  Before calling
34314339Sssadetsky   the pattern compiler, the fields `buffer', `allocated', `fastmap',
34414339Sssadetsky   `translate', and `no_sub' can be set.  After the pattern has been
34514339Sssadetsky   compiled, the `re_nsub' field is available.  All other fields are
34614339Sssadetsky   private to the regex routines.  */
34714339Sssadetsky
34814339Sssadetsky#ifndef RE_TRANSLATE_TYPE
34914339Sssadetsky# define RE_TRANSLATE_TYPE char *
35014339Sssadetsky#endif
35114339Sssadetsky
35214339Sssadetskystruct re_pattern_buffer
35314339Sssadetsky{
35414339Sssadetsky/* [[[begin pattern_buffer]]] */
35514339Sssadetsky	/* Space that holds the compiled pattern.  It is declared as
35614339Sssadetsky          `unsigned char *' because its elements are
35714339Sssadetsky           sometimes used as array indexes.  */
35814339Sssadetsky  unsigned char *buffer;
35914339Sssadetsky
36014339Sssadetsky	/* Number of bytes to which `buffer' points.  */
36114339Sssadetsky  unsigned long int allocated;
36214339Sssadetsky
36314339Sssadetsky	/* Number of bytes actually used in `buffer'.  */
36414339Sssadetsky  unsigned long int used;
36514339Sssadetsky
36614339Sssadetsky        /* Syntax setting with which the pattern was compiled.  */
36714339Sssadetsky  reg_syntax_t syntax;
36814339Sssadetsky
36914339Sssadetsky        /* Pointer to a fastmap, if any, otherwise zero.  re_search uses
37014339Sssadetsky           the fastmap, if there is one, to skip over impossible
37114339Sssadetsky           starting points for matches.  */
37214339Sssadetsky  char *fastmap;
37314339Sssadetsky
37414339Sssadetsky        /* Either a translate table to apply to all characters before
37514339Sssadetsky           comparing them, or zero for no translation.  The translation
37614339Sssadetsky           is applied to a pattern when it is compiled and to a string
37714339Sssadetsky           when it is matched.  */
37814339Sssadetsky  RE_TRANSLATE_TYPE translate;
37914339Sssadetsky
38014339Sssadetsky	/* Number of subexpressions found by the compiler.  */
38114339Sssadetsky  size_t re_nsub;
38214339Sssadetsky
38314339Sssadetsky        /* Zero if this pattern cannot match the empty string, one else.
38414339Sssadetsky           Well, in truth it's used only in `re_search_2', to see
38514339Sssadetsky           whether or not we should use the fastmap, so we don't set
38614339Sssadetsky           this absolutely perfectly; see `re_compile_fastmap' (the
38714339Sssadetsky           `duplicate' case).  */
38814339Sssadetsky  unsigned can_be_null : 1;
38914339Sssadetsky
39014339Sssadetsky        /* If REGS_UNALLOCATED, allocate space in the `regs' structure
39114339Sssadetsky             for `max (RE_NREGS, re_nsub + 1)' groups.
39214339Sssadetsky           If REGS_REALLOCATE, reallocate space if necessary.
39314339Sssadetsky           If REGS_FIXED, use what's there.  */
39414339Sssadetsky#define REGS_UNALLOCATED 0
39514339Sssadetsky#define REGS_REALLOCATE 1
39614339Sssadetsky#define REGS_FIXED 2
39714339Sssadetsky  unsigned regs_allocated : 2;
39814339Sssadetsky
39914339Sssadetsky        /* Set to zero when `regex_compile' compiles a pattern; set to one
40014339Sssadetsky           by `re_compile_fastmap' if it updates the fastmap.  */
40114339Sssadetsky  unsigned fastmap_accurate : 1;
40214339Sssadetsky
40314339Sssadetsky        /* If set, `re_match_2' does not return information about
40414339Sssadetsky           subexpressions.  */
40514339Sssadetsky  unsigned no_sub : 1;
40614339Sssadetsky
40714339Sssadetsky        /* If set, a beginning-of-line anchor doesn't match at the
40814339Sssadetsky           beginning of the string.  */
40914339Sssadetsky  unsigned not_bol : 1;
41014339Sssadetsky
41114339Sssadetsky        /* Similarly for an end-of-line anchor.  */
41214339Sssadetsky  unsigned not_eol : 1;
41314339Sssadetsky
41414339Sssadetsky        /* If true, an anchor at a newline matches.  */
41514339Sssadetsky  unsigned newline_anchor : 1;
41614339Sssadetsky
41714339Sssadetsky/* [[[end pattern_buffer]]] */
41814339Sssadetsky};
41914339Sssadetsky
42014339Sssadetskytypedef struct re_pattern_buffer regex_t;
42114339Sssadetsky
42214339Sssadetsky/* Type for byte offsets within the string.  POSIX mandates this.  */
42314339Sssadetskytypedef int regoff_t;
42414339Sssadetsky
42514339Sssadetsky
42614339Sssadetsky/* This is the structure we store register match data in.  See
42714339Sssadetsky   regex.texinfo for a full description of what registers match.  */
42814339Sssadetskystruct re_registers
42914339Sssadetsky{
43014339Sssadetsky  unsigned num_regs;
43114339Sssadetsky  regoff_t *start;
43214339Sssadetsky  regoff_t *end;
43314339Sssadetsky};
43414339Sssadetsky
43514339Sssadetsky
43614339Sssadetsky/* If `regs_allocated' is REGS_UNALLOCATED in the pattern buffer,
43714339Sssadetsky   `re_match_2' returns information about at least this many registers
43814339Sssadetsky   the first time a `regs' structure is passed.  */
43914339Sssadetsky#ifndef RE_NREGS
44014339Sssadetsky# define RE_NREGS 30
44114339Sssadetsky#endif
44214339Sssadetsky
44314339Sssadetsky
44414339Sssadetsky/* POSIX specification for registers.  Aside from the different names than
44514339Sssadetsky   `re_registers', POSIX uses an array of structures, instead of a
44614339Sssadetsky   structure of arrays.  */
44714339Sssadetskytypedef struct
44814339Sssadetsky{
44914339Sssadetsky  regoff_t rm_so;  /* Byte offset from string's start to substring's start.  */
45014339Sssadetsky  regoff_t rm_eo;  /* Byte offset from string's start to substring's end.  */
45114339Sssadetsky} regmatch_t;
45214339Sssadetsky
45314339Sssadetsky/* Declarations for routines.  */
45414339Sssadetsky
45514339Sssadetsky/* To avoid duplicating every routine declaration -- once with a
45614339Sssadetsky   prototype (if we are ANSI), and once without (if we aren't) -- we
45714339Sssadetsky   use the following macro to declare argument types.  This
45814339Sssadetsky   unfortunately clutters up the declarations a bit, but I think it's
45914339Sssadetsky   worth it.  */
46014339Sssadetsky
46114339Sssadetsky#if __STDC__
46214339Sssadetsky
46314339Sssadetsky# define _RE_ARGS(args) args
46414339Sssadetsky
46514339Sssadetsky#else /* not __STDC__ */
46614339Sssadetsky
46714339Sssadetsky# define _RE_ARGS(args) ()
46814339Sssadetsky
46914339Sssadetsky#endif /* not __STDC__ */
47014339Sssadetsky
47114339Sssadetsky/* Sets the current default syntax to SYNTAX, and return the old syntax.
47214339Sssadetsky   You can also simply assign to the `re_syntax_options' variable.  */
47314339Sssadetskyextern reg_syntax_t re_set_syntax _RE_ARGS ((reg_syntax_t syntax));
47414339Sssadetsky
47514339Sssadetsky/* Compile the regular expression PATTERN, with length LENGTH
47614339Sssadetsky   and syntax given by the global `re_syntax_options', into the buffer
47714339Sssadetsky   BUFFER.  Return NULL if successful, and an error string if not.  */
47814339Sssadetskyextern const char *re_compile_pattern
47914339Sssadetsky  _RE_ARGS ((const char *pattern, size_t length,
48014339Sssadetsky             struct re_pattern_buffer *buffer));
48114339Sssadetsky
48214339Sssadetsky
48314339Sssadetsky/* Compile a fastmap for the compiled pattern in BUFFER; used to
48414339Sssadetsky   accelerate searches.  Return 0 if successful and -2 if was an
48514339Sssadetsky   internal error.  */
48614339Sssadetskyextern int re_compile_fastmap _RE_ARGS ((struct re_pattern_buffer *buffer));
48714339Sssadetsky
48814339Sssadetsky
48914339Sssadetsky/* Search in the string STRING (with length LENGTH) for the pattern
49014339Sssadetsky   compiled into BUFFER.  Start searching at position START, for RANGE
49114339Sssadetsky   characters.  Return the starting position of the match, -1 for no
49214339Sssadetsky   match, or -2 for an internal error.  Also return register
49314339Sssadetsky   information in REGS (if REGS and BUFFER->no_sub are nonzero).  */
49414339Sssadetskyextern int re_search
49514339Sssadetsky  _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string,
49614339Sssadetsky            int length, int start, int range, struct re_registers *regs));
49714339Sssadetsky
49814339Sssadetsky
49914339Sssadetsky/* Like `re_search', but search in the concatenation of STRING1 and
50014339Sssadetsky   STRING2.  Also, stop searching at index START + STOP.  */
50114339Sssadetskyextern int re_search_2
50214339Sssadetsky  _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string1,
50314339Sssadetsky             int length1, const char *string2, int length2,
50414339Sssadetsky             int start, int range, struct re_registers *regs, int stop));
50514339Sssadetsky
50614339Sssadetsky
50714339Sssadetsky/* Like `re_search', but return how many characters in STRING the regexp
50814339Sssadetsky   in BUFFER matched, starting at position START.  */
50914339Sssadetskyextern int re_match
51014339Sssadetsky  _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string,
51114339Sssadetsky             int length, int start, struct re_registers *regs));
51214339Sssadetsky
51314339Sssadetsky
51414339Sssadetsky/* Relates to `re_match' as `re_search_2' relates to `re_search'.  */
51514339Sssadetskyextern int re_match_2
51614339Sssadetsky  _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string1,
51714339Sssadetsky             int length1, const char *string2, int length2,
51814339Sssadetsky             int start, struct re_registers *regs, int stop));
51914339Sssadetsky
52014339Sssadetsky
52114339Sssadetsky/* Set REGS to hold NUM_REGS registers, storing them in STARTS and
52214339Sssadetsky   ENDS.  Subsequent matches using BUFFER and REGS will use this memory
52314339Sssadetsky   for recording register information.  STARTS and ENDS must be
52414339Sssadetsky   allocated with malloc, and must each be at least `NUM_REGS * sizeof
52514339Sssadetsky   (regoff_t)' bytes long.
52614339Sssadetsky
52714339Sssadetsky   If NUM_REGS == 0, then subsequent matches should allocate their own
52814339Sssadetsky   register data.
52914339Sssadetsky
53014339Sssadetsky   Unless this function is called, the first search or match using
53114339Sssadetsky   PATTERN_BUFFER will allocate its own register data, without
53214339Sssadetsky   freeing the old data.  */
53314339Sssadetskyextern void re_set_registers
53414339Sssadetsky  _RE_ARGS ((struct re_pattern_buffer *buffer, struct re_registers *regs,
53514339Sssadetsky             unsigned num_regs, regoff_t *starts, regoff_t *ends));
53614339Sssadetsky
53714339Sssadetsky#if defined _REGEX_RE_COMP || defined _LIBC
53814339Sssadetsky# ifndef _CRAY
53914339Sssadetsky/* 4.2 bsd compatibility.  */
54014339Sssadetskyextern char *re_comp _RE_ARGS ((const char *));
54114339Sssadetskyextern int re_exec _RE_ARGS ((const char *));
54214339Sssadetsky# endif
54314339Sssadetsky#endif
54414339Sssadetsky
54514339Sssadetsky/* GCC 2.95 and later have "__restrict"; C99 compilers have
54614339Sssadetsky   "restrict", and "configure" may have defined "restrict".  */
54714339Sssadetsky#ifndef __restrict
54814339Sssadetsky# if ! (2 < __GNUC__ || (2 == __GNUC__ && 95 <= __GNUC_MINOR__))
54914339Sssadetsky#  if defined restrict || 199901L <= __STDC_VERSION__
55015939Sssadetsky#   define __restrict restrict
55115939Sssadetsky#  else
55214339Sssadetsky#   define __restrict
55314339Sssadetsky#  endif
55414339Sssadetsky# endif
55514339Sssadetsky#endif
55614339Sssadetsky/* gcc 3.1 and up support the [restrict] syntax.  */
55714339Sssadetsky#ifndef __restrict_arr
55814339Sssadetsky# if __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1)
55914339Sssadetsky#  define __restrict_arr __restrict
56014339Sssadetsky# else
56114339Sssadetsky#  define __restrict_arr
56214339Sssadetsky# endif
56314339Sssadetsky#endif
56414339Sssadetsky
56514339Sssadetsky/* POSIX compatibility.  */
56614339Sssadetskyextern int regcomp _RE_ARGS ((regex_t *__restrict __preg,
56714339Sssadetsky			      const char *__restrict __pattern,
56814339Sssadetsky			      int __cflags));
56914339Sssadetsky
57014339Sssadetskyextern int regexec _RE_ARGS ((const regex_t *__restrict __preg,
57114339Sssadetsky			      const char *__restrict __string, size_t __nmatch,
57214339Sssadetsky			      regmatch_t __pmatch[__restrict_arr],
57314339Sssadetsky			      int __eflags));
57414339Sssadetsky
57514339Sssadetskyextern size_t regerror _RE_ARGS ((int __errcode, const regex_t *__preg,
57615939Sssadetsky				  char *__errbuf, size_t __errbuf_size));
57715939Sssadetsky
57815939Sssadetskyextern void regfree _RE_ARGS ((regex_t *__preg));
57914339Sssadetsky
58014339Sssadetsky
58114339Sssadetsky#ifdef __cplusplus
58214339Sssadetsky}
58314339Sssadetsky#endif	/* C++ */
58414339Sssadetsky
58514339Sssadetsky#endif /* regex.h */
58614339Sssadetsky
58714339Sssadetsky/*
58814339SssadetskyLocal variables:
58914339Sssadetskymake-backup-files: t
59014339Sssadetskyversion-control: t
59114339Sssadetskytrim-versions-without-asking: nil
59214339SssadetskyEnd:
59314339Sssadetsky*/
59414339Sssadetsky