11573Srgrimes/* Definitions for data structures and routines for the regular
21573Srgrimes   expression library, version 0.12.
31573Srgrimes   Copyright (C) 1985,1989-1993,1995-1998, 2000 Free Software Foundation, Inc.
41573Srgrimes   This file is part of the GNU C Library.  Its master source is NOT part of
51573Srgrimes   the C library, however.  The master source lives in /gd/gnu/lib.
61573Srgrimes
71573Srgrimes   The GNU C Library is free software; you can redistribute it and/or
81573Srgrimes   modify it under the terms of the GNU Lesser General Public
91573Srgrimes   License as published by the Free Software Foundation; either
101573Srgrimes   version 2.1 of the License, or (at your option) any later version.
111573Srgrimes
121573Srgrimes   The GNU C Library is distributed in the hope that it will be useful,
131573Srgrimes   but WITHOUT ANY WARRANTY; without even the implied warranty of
141573Srgrimes   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
151573Srgrimes   Lesser General Public License for more details.
161573Srgrimes
171573Srgrimes   You should have received a copy of the GNU Lesser General Public
181573Srgrimes   License along with the GNU C Library; if not, write to the Free
191573Srgrimes   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
201573Srgrimes   02111-1307 USA.  */
211573Srgrimes
221573Srgrimes#ifndef _REGEX_H
231573Srgrimes#define _REGEX_H 1
241573Srgrimes
251573Srgrimes/* Allow the use in C++ code.  */
261573Srgrimes#ifdef __cplusplus
271573Srgrimesextern "C" {
281573Srgrimes#endif
291573Srgrimes
301573Srgrimes/* POSIX says that <sys/types.h> must be included (by the caller) before
311573Srgrimes   <regex.h>.  */
321573Srgrimes
3392889Sobrien#if !defined _POSIX_C_SOURCE && !defined _POSIX_SOURCE && defined VMS
3492889Sobrien/* VMS doesn't have `size_t' in <sys/types.h>, even though POSIX says it
351573Srgrimes   should be there.  */
36157779Sume# include <stddef.h>
371573Srgrimes#endif
381573Srgrimes
39157779Sume/* The following two types have to be signed and unsigned integer type
40149313Sstefanf   wide enough to hold a value of a pointer.  For most ANSI compilers
411573Srgrimes   ptrdiff_t and size_t should be likely OK.  Still size of these two
42158115Sume   types is 2 for Microsoft C.  Ugh... */
431573Srgrimestypedef long int s_reg_t;
441573Srgrimestypedef unsigned long int active_reg_t;
451573Srgrimes
46145279Sume/* The following bits are used to determine the regexp syntax we
47145279Sume   recognize.  The set/not-set meanings are chosen so that Emacs syntax
48145279Sume   remains the value 0.  The bits are given in alphabetical order, and
49145279Sume   the definitions shifted by one from the previous bit; thus, when we
50158115Sume   add or remove a bit, only one other definition need change.  */
51158115Sumetypedef unsigned long int reg_syntax_t;
52158115Sume
53158115Sume/* If this bit is not set, then \ inside a bracket expression is literal.
541573Srgrimes   If set, then such a \ quotes the following character.  */
55158115Sume#define RE_BACKSLASH_ESCAPE_IN_LISTS ((unsigned long int) 1)
56158115Sume
57158115Sume/* If this bit is not set, then + and ? are operators, and \+ and \? are
58158115Sume     literals.
59158115Sume   If set, then \+ and \? are operators and + and ? are literals.  */
60157779Sume#define RE_BK_PLUS_QM (RE_BACKSLASH_ESCAPE_IN_LISTS << 1)
61157779Sume
621573Srgrimes/* If this bit is set, then character classes are supported.  They are:
63145279Sume     [:alpha:], [:upper:], [:lower:],  [:digit:], [:alnum:], [:xdigit:],
64145279Sume     [:space:], [:print:], [:punct:], [:graph:], and [:cntrl:].
65145279Sume   If not set, then character classes are not supported.  */
66145279Sume#define RE_CHAR_CLASSES (RE_BK_PLUS_QM << 1)
67145279Sume
68145279Sume/* If this bit is set, then ^ and $ are always anchors (outside bracket
69145279Sume     expressions, of course).
70145279Sume   If this bit is not set, then it depends:
711573Srgrimes        ^  is an anchor if it is at the beginning of a regular
72145279Sume           expression or after an open-group or an alternation operator;
73157779Sume        $  is an anchor if it is at the end of a regular expression, or
74145279Sume           before a close-group or an alternation operator.
75157779Sume
76145279Sume   This bit could be (re)combined with RE_CONTEXT_INDEP_OPS, because
77157779Sume   POSIX draft 11.2 says that * etc. in leading positions is undefined.
78157779Sume   We already implemented a previous draft which made those constructs
79145279Sume   invalid, though, so we haven't changed the code back.  */
80145279Sume#define RE_CONTEXT_INDEP_ANCHORS (RE_CHAR_CLASSES << 1)
81145279Sume
82157779Sume/* If this bit is set, then special characters are always special
83145279Sume     regardless of where they are in the pattern.
84157779Sume   If this bit is not set, then special characters are special only in
85145279Sume     some contexts; otherwise they are ordinary.  Specifically,
86145279Sume     * + ? and intervals are only special when not after the beginning,
87158115Sume     open-group, or alternation operator.  */
88157779Sume#define RE_CONTEXT_INDEP_OPS (RE_CONTEXT_INDEP_ANCHORS << 1)
89158115Sume
90158115Sume/* If this bit is set, then *, +, ?, and { cannot be first in an re or
91158115Sume     immediately after an alternation or begin-group operator.  */
92158115Sume#define RE_CONTEXT_INVALID_OPS (RE_CONTEXT_INDEP_OPS << 1)
93158115Sume
94158115Sume/* If this bit is set, then . matches newline.
95158115Sume   If not set, then it doesn't.  */
96158115Sume#define RE_DOT_NEWLINE (RE_CONTEXT_INVALID_OPS << 1)
97158115Sume
98158115Sume/* If this bit is set, then . doesn't match NUL.
99158115Sume   If not set, then it does.  */
100158115Sume#define RE_DOT_NOT_NULL (RE_DOT_NEWLINE << 1)
101158115Sume
102158115Sume/* If this bit is set, nonmatching lists [^...] do not match newline.
103158115Sume   If not set, they do.  */
104158115Sume#define RE_HAT_LISTS_NOT_NEWLINE (RE_DOT_NOT_NULL << 1)
105158115Sume
106158115Sume/* If this bit is set, either \{...\} or {...} defines an
107158115Sume     interval, depending on RE_NO_BK_BRACES.
108158115Sume   If not set, \{, \}, {, and } are literals.  */
109158115Sume#define RE_INTERVALS (RE_HAT_LISTS_NOT_NEWLINE << 1)
110158115Sume
111158115Sume/* If this bit is set, +, ? and | aren't recognized as operators.
112158115Sume   If not set, they are.  */
113158115Sume#define RE_LIMITED_OPS (RE_INTERVALS << 1)
114158115Sume
115158115Sume/* If this bit is set, newline is an alternation operator.
116158115Sume   If not set, newline is literal.  */
117158115Sume#define RE_NEWLINE_ALT (RE_LIMITED_OPS << 1)
118158115Sume
119158115Sume/* If this bit is set, then `{...}' defines an interval, and \{ and \}
120158115Sume     are literals.
121158115Sume  If not set, then `\{...\}' defines an interval.  */
122158115Sume#define RE_NO_BK_BRACES (RE_NEWLINE_ALT << 1)
123158115Sume
124158115Sume/* If this bit is set, (...) defines a group, and \( and \) are literals.
125158115Sume   If not set, \(...\) defines a group, and ( and ) are literals.  */
126158115Sume#define RE_NO_BK_PARENS (RE_NO_BK_BRACES << 1)
127158115Sume
128158115Sume/* If this bit is set, then \<digit> matches <digit>.
129158115Sume   If not set, then \<digit> is a back-reference.  */
130158115Sume#define RE_NO_BK_REFS (RE_NO_BK_PARENS << 1)
131158115Sume
132158115Sume/* If this bit is set, then | is an alternation operator, and \| is literal.
133158115Sume   If not set, then \| is an alternation operator, and | is literal.  */
134158115Sume#define RE_NO_BK_VBAR (RE_NO_BK_REFS << 1)
135158115Sume
136158115Sume/* If this bit is set, then an ending range point collating higher
137158115Sume     than the starting range point, as in [z-a], is invalid.
138158115Sume   If not set, then when ending range point collates higher than the
139158115Sume     starting range point, the range is ignored.  */
140158115Sume#define RE_NO_EMPTY_RANGES (RE_NO_BK_VBAR << 1)
141158115Sume
142158115Sume/* If this bit is set, then an unmatched ) is ordinary.
143158115Sume   If not set, then an unmatched ) is invalid.  */
144158115Sume#define RE_UNMATCHED_RIGHT_PAREN_ORD (RE_NO_EMPTY_RANGES << 1)
145158115Sume
146158115Sume/* If this bit is set, succeed as soon as we match the whole pattern,
147158115Sume   without further backtracking.  */
148158115Sume#define RE_NO_POSIX_BACKTRACKING (RE_UNMATCHED_RIGHT_PAREN_ORD << 1)
149158115Sume
150158115Sume/* If this bit is set, do not process the GNU regex operators.
151158115Sume   If not set, then the GNU regex operators are recognized. */
152158115Sume#define RE_NO_GNU_OPS (RE_NO_POSIX_BACKTRACKING << 1)
153158115Sume
154158115Sume/* If this bit is set, turn on internal regex debugging.
155158115Sume   If not set, and debugging was on, turn it off.
156158115Sume   This only works if regex.c is compiled -DDEBUG.
157158115Sume   We define this bit always, so that all that's needed to turn on
158158115Sume   debugging is to recompile regex.c; the calling code can always have
159158115Sume   this bit set, and it won't affect anything in the normal case. */
160158115Sume#define RE_DEBUG (RE_NO_GNU_OPS << 1)
161158115Sume
162158115Sume/* If this bit is set, a syntactically invalid interval is treated as
163158115Sume   a string of ordinary characters.  For example, the ERE 'a{1' is
164158115Sume   treated as 'a\{1'.  */
165158115Sume#define RE_INVALID_INTERVAL_ORD (RE_DEBUG << 1)
166158115Sume
167158115Sume/* This global variable defines the particular regexp syntax to use (for
168158115Sume   some interfaces).  When a regexp is compiled, the syntax used is
169158115Sume   stored in the pattern buffer, so changing this does not affect
170158115Sume   already-compiled regexps.  */
171158115Sumeextern reg_syntax_t re_syntax_options;
172158115Sume
173158115Sume/* Define combinations of the above bits for the standard possibilities.
174158115Sume   (The [[[ comments delimit what gets put into the Texinfo file, so
175158115Sume   don't delete them!)  */
176158115Sume/* [[[begin syntaxes]]] */
177158115Sume#define RE_SYNTAX_EMACS 0
178158115Sume
179158115Sume#define RE_SYNTAX_AWK							\
180158115Sume  (RE_BACKSLASH_ESCAPE_IN_LISTS   | RE_DOT_NOT_NULL			\
181158115Sume   | RE_NO_BK_PARENS              | RE_NO_BK_REFS			\
182158115Sume   | RE_NO_BK_VBAR                | RE_NO_EMPTY_RANGES			\
183158115Sume   | RE_DOT_NEWLINE		  | RE_CONTEXT_INDEP_ANCHORS		\
184158115Sume   | RE_UNMATCHED_RIGHT_PAREN_ORD | RE_NO_GNU_OPS)
185158115Sume
186158115Sume#define RE_SYNTAX_GNU_AWK						\
187158115Sume  ((RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS | RE_DEBUG)	\
188158115Sume   & ~(RE_DOT_NOT_NULL | RE_INTERVALS | RE_CONTEXT_INDEP_OPS))
189158115Sume
190158115Sume#define RE_SYNTAX_POSIX_AWK 						\
191158115Sume  (RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS		\
192158115Sume   | RE_INTERVALS	    | RE_NO_GNU_OPS)
193158115Sume
194158115Sume#define RE_SYNTAX_GREP							\
195158115Sume  (RE_BK_PLUS_QM              | RE_CHAR_CLASSES				\
196158115Sume   | RE_HAT_LISTS_NOT_NEWLINE | RE_INTERVALS				\
197158115Sume   | RE_NEWLINE_ALT)
198158115Sume
199158115Sume#define RE_SYNTAX_EGREP							\
200158115Sume  (RE_CHAR_CLASSES        | RE_CONTEXT_INDEP_ANCHORS			\
201158115Sume   | RE_CONTEXT_INDEP_OPS | RE_HAT_LISTS_NOT_NEWLINE			\
202158115Sume   | RE_NEWLINE_ALT       | RE_NO_BK_PARENS				\
203158115Sume   | RE_NO_BK_VBAR)
204158115Sume
205158115Sume#define RE_SYNTAX_POSIX_EGREP						\
206158115Sume  (RE_SYNTAX_EGREP | RE_INTERVALS | RE_NO_BK_BRACES			\
207158115Sume   | RE_INVALID_INTERVAL_ORD)
208158115Sume
209158115Sume/* P1003.2/D11.2, section 4.20.7.1, lines 5078ff.  */
210158115Sume#define RE_SYNTAX_ED RE_SYNTAX_POSIX_BASIC
211158115Sume
212158115Sume#define RE_SYNTAX_SED RE_SYNTAX_POSIX_BASIC
213158115Sume
214158115Sume/* Syntax bits common to both basic and extended POSIX regex syntax.  */
215158115Sume#define _RE_SYNTAX_POSIX_COMMON						\
216158115Sume  (RE_CHAR_CLASSES | RE_DOT_NEWLINE      | RE_DOT_NOT_NULL		\
217158115Sume   | RE_INTERVALS  | RE_NO_EMPTY_RANGES)
218158115Sume
219158115Sume#define RE_SYNTAX_POSIX_BASIC						\
220158115Sume  (_RE_SYNTAX_POSIX_COMMON | RE_BK_PLUS_QM)
221158115Sume
222158115Sume/* Differs from ..._POSIX_BASIC only in that RE_BK_PLUS_QM becomes
223158115Sume   RE_LIMITED_OPS, i.e., \? \+ \| are not recognized.  Actually, this
224158115Sume   isn't minimal, since other operators, such as \`, aren't disabled.  */
225158115Sume#define RE_SYNTAX_POSIX_MINIMAL_BASIC					\
226158115Sume  (_RE_SYNTAX_POSIX_COMMON | RE_LIMITED_OPS)
227158115Sume
228158115Sume#define RE_SYNTAX_POSIX_EXTENDED					\
229158115Sume  (_RE_SYNTAX_POSIX_COMMON  | RE_CONTEXT_INDEP_ANCHORS			\
230158115Sume   | RE_CONTEXT_INDEP_OPS   | RE_NO_BK_BRACES				\
231158115Sume   | RE_NO_BK_PARENS        | RE_NO_BK_VBAR				\
232158115Sume   | RE_CONTEXT_INVALID_OPS | RE_UNMATCHED_RIGHT_PAREN_ORD)
233158115Sume
234158115Sume/* Differs from ..._POSIX_EXTENDED in that RE_CONTEXT_INDEP_OPS is
235158115Sume   removed and RE_NO_BK_REFS is added.  */
236158115Sume#define RE_SYNTAX_POSIX_MINIMAL_EXTENDED				\
237158115Sume  (_RE_SYNTAX_POSIX_COMMON  | RE_CONTEXT_INDEP_ANCHORS			\
238158115Sume   | RE_CONTEXT_INVALID_OPS | RE_NO_BK_BRACES				\
239158115Sume   | RE_NO_BK_PARENS        | RE_NO_BK_REFS				\
240158115Sume   | RE_NO_BK_VBAR	    | RE_UNMATCHED_RIGHT_PAREN_ORD)
241158115Sume/* [[[end syntaxes]]] */
242158115Sume
243158115Sume/* Maximum number of duplicates an interval can allow.  Some systems
244158115Sume   (erroneously) define this in other header files, but we want our
245158115Sume   value, so remove any previous define.  */
246158115Sume#ifdef RE_DUP_MAX
247158115Sume# undef RE_DUP_MAX
248158115Sume#endif
249158115Sume/* If sizeof(int) == 2, then ((1 << 15) - 1) overflows.  */
250158115Sume#define RE_DUP_MAX (0x7fff)
251158115Sume
252158115Sume
253158115Sume/* POSIX `cflags' bits (i.e., information for `regcomp').  */
254158115Sume
255158115Sume/* If this bit is set, then use extended regular expression syntax.
256158115Sume   If not set, then use basic regular expression syntax.  */
257158115Sume#define REG_EXTENDED 1
258158115Sume
259158115Sume/* If this bit is set, then ignore case when matching.
260158115Sume   If not set, then case is significant.  */
261158115Sume#define REG_ICASE (REG_EXTENDED << 1)
262158115Sume
263158115Sume/* If this bit is set, then anchors do not match at newline
264158115Sume     characters in the string.
265158115Sume   If not set, then anchors do match at newlines.  */
266158115Sume#define REG_NEWLINE (REG_ICASE << 1)
267158115Sume
268158115Sume/* If this bit is set, then report only success or fail in regexec.
269158115Sume   If not set, then returns differ between not matching and errors.  */
270158115Sume#define REG_NOSUB (REG_NEWLINE << 1)
271158115Sume
272158115Sume
273158115Sume/* POSIX `eflags' bits (i.e., information for regexec).  */
274158115Sume
275158115Sume/* If this bit is set, then the beginning-of-line operator doesn't match
276158115Sume     the beginning of the string (presumably because it's not the
277158115Sume     beginning of a line).
278158115Sume   If not set, then the beginning-of-line operator does match the
279158115Sume     beginning of the string.  */
280158115Sume#define REG_NOTBOL 1
281158115Sume
282158115Sume/* Like REG_NOTBOL, except for the end-of-line.  */
283158115Sume#define REG_NOTEOL (1 << 1)
284158115Sume
285158115Sume
286158115Sume/* If any error codes are removed, changed, or added, update the
287158115Sume   `re_error_msg' table in regex.c.  */
288158115Sumetypedef enum
289158115Sume{
290158115Sume#ifdef _XOPEN_SOURCE
291158115Sume  REG_ENOSYS = -1,	/* This will never happen for this implementation.  */
292158115Sume#endif
293158115Sume
294158115Sume  REG_NOERROR = 0,	/* Success.  */
295158115Sume  REG_NOMATCH,		/* Didn't find a match (for regexec).  */
296157779Sume
297157779Sume  /* POSIX regcomp return error codes.  (In the order listed in the
298145279Sume     standard.)  */
299157779Sume  REG_BADPAT,		/* Invalid pattern.  */
300157779Sume  REG_ECOLLATE,		/* Not implemented.  */
301157779Sume  REG_ECTYPE,		/* Invalid character class name.  */
302145279Sume  REG_EESCAPE,		/* Trailing backslash.  */
303157779Sume  REG_ESUBREG,		/* Invalid back reference.  */
304157779Sume  REG_EBRACK,		/* Unmatched left bracket.  */
305157779Sume  REG_EPAREN,		/* Parenthesis imbalance.  */
306157779Sume  REG_EBRACE,		/* Unmatched \{.  */
307157779Sume  REG_BADBR,		/* Invalid contents of \{\}.  */
308157779Sume  REG_ERANGE,		/* Invalid range end.  */
309157779Sume  REG_ESPACE,		/* Ran out of memory.  */
310157779Sume  REG_BADRPT,		/* No preceding re for repetition op.  */
311157779Sume
312157779Sume  /* Error codes we've added.  */
313157779Sume  REG_EEND,		/* Premature end.  */
314157779Sume  REG_ESIZE,		/* Compiled pattern bigger than 2^16 bytes.  */
315157779Sume  REG_ERPAREN		/* Unmatched ) or \); not returned from regcomp.  */
316157779Sume} reg_errcode_t;
317157779Sume
318157779Sume/* This data structure represents a compiled pattern.  Before calling
319157779Sume   the pattern compiler, the fields `buffer', `allocated', `fastmap',
320157779Sume   `translate', and `no_sub' can be set.  After the pattern has been
321157779Sume   compiled, the `re_nsub' field is available.  All other fields are
322157779Sume   private to the regex routines.  */
323157779Sume
324157779Sume#ifndef RE_TRANSLATE_TYPE
325157779Sume# define RE_TRANSLATE_TYPE char *
326157779Sume#endif
327157779Sume
328157779Sumestruct re_pattern_buffer
329157779Sume{
330157779Sume/* [[[begin pattern_buffer]]] */
331157779Sume	/* Space that holds the compiled pattern.  It is declared as
332157779Sume          `unsigned char *' because its elements are
333157779Sume           sometimes used as array indexes.  */
334157779Sume  unsigned char *buffer;
335157779Sume
336157779Sume	/* Number of bytes to which `buffer' points.  */
337157779Sume  unsigned long int allocated;
338157779Sume
339145279Sume	/* Number of bytes actually used in `buffer'.  */
340145279Sume  unsigned long int used;
3411573Srgrimes
342157779Sume        /* Syntax setting with which the pattern was compiled.  */
3431573Srgrimes  reg_syntax_t syntax;
344145279Sume
345254700Sjilles        /* Pointer to a fastmap, if any, otherwise zero.  re_search uses
3461573Srgrimes           the fastmap, if there is one, to skip over impossible
347145279Sume           starting points for matches.  */
348145279Sume  char *fastmap;
3491573Srgrimes
3501573Srgrimes        /* Either a translate table to apply to all characters before
3511573Srgrimes           comparing them, or zero for no translation.  The translation
352157779Sume           is applied to a pattern when it is compiled and to a string
3531573Srgrimes           when it is matched.  */
354145279Sume  RE_TRANSLATE_TYPE translate;
355145279Sume
356145279Sume	/* Number of subexpressions found by the compiler.  */
3571573Srgrimes  size_t re_nsub;
358145279Sume
3591573Srgrimes        /* Zero if this pattern cannot match the empty string, one else.
3601573Srgrimes           Well, in truth it's used only in `re_search_2', to see
361145279Sume           whether or not we should use the fastmap, so we don't set
362157779Sume           this absolutely perfectly; see `re_compile_fastmap' (the
3631573Srgrimes           `duplicate' case).  */
3641573Srgrimes  unsigned can_be_null : 1;
365145279Sume
366145279Sume        /* If REGS_UNALLOCATED, allocate space in the `regs' structure
3671573Srgrimes             for `max (RE_NREGS, re_nsub + 1)' groups.
368254700Sjilles           If REGS_REALLOCATE, reallocate space if necessary.
369145279Sume           If REGS_FIXED, use what's there.  */
3701573Srgrimes#define REGS_UNALLOCATED 0
371145626Sume#define REGS_REALLOCATE 1
372145279Sume#define REGS_FIXED 2
3731573Srgrimes  unsigned regs_allocated : 2;
3741573Srgrimes
3751573Srgrimes        /* Set to zero when `regex_compile' compiles a pattern; set to one
376139612Ssobomax           by `re_compile_fastmap' if it updates the fastmap.  */
377139612Ssobomax  unsigned fastmap_accurate : 1;
378145279Sume
3791573Srgrimes        /* If set, `re_match_2' does not return information about
3801573Srgrimes           subexpressions.  */
3811573Srgrimes  unsigned no_sub : 1;
3821573Srgrimes
3831573Srgrimes        /* If set, a beginning-of-line anchor doesn't match at the
3841573Srgrimes           beginning of the string.  */
3851573Srgrimes  unsigned not_bol : 1;
3861573Srgrimes
3871573Srgrimes        /* Similarly for an end-of-line anchor.  */
388145279Sume  unsigned not_eol : 1;
389145279Sume
390145279Sume        /* If true, an anchor at a newline matches.  */
391145279Sume  unsigned newline_anchor : 1;
392145279Sume
3931573Srgrimes/* [[[end pattern_buffer]]] */
3941573Srgrimes};
3951573Srgrimes
3961573Srgrimestypedef struct re_pattern_buffer regex_t;
3971573Srgrimes
3981573Srgrimes/* Type for byte offsets within the string.  POSIX mandates this.  */
3991573Srgrimestypedef int regoff_t;
400145626Sume
4011573Srgrimes
4021573Srgrimes/* This is the structure we store register match data in.  See
4031573Srgrimes   regex.texinfo for a full description of what registers match.  */
4041573Srgrimesstruct re_registers
4051573Srgrimes{
4061573Srgrimes  unsigned num_regs;
4071573Srgrimes  regoff_t *start;
408145279Sume  regoff_t *end;
4091573Srgrimes};
410145279Sume
411158115Sume
412158115Sume/* If `regs_allocated' is REGS_UNALLOCATED in the pattern buffer,
413157779Sume   `re_match_2' returns information about at least this many registers
414157779Sume   the first time a `regs' structure is passed.  */
415157779Sume#ifndef RE_NREGS
416157779Sume# define RE_NREGS 30
417158115Sume#endif
418158115Sume
419158115Sume
420158115Sume/* POSIX specification for registers.  Aside from the different names than
421158115Sume   `re_registers', POSIX uses an array of structures, instead of a
422158115Sume   structure of arrays.  */
423158115Sumetypedef struct
424158115Sume{
425158115Sume  regoff_t rm_so;  /* Byte offset from string's start to substring's start.  */
426158115Sume  regoff_t rm_eo;  /* Byte offset from string's start to substring's end.  */
427211276Sume} regmatch_t;
428211276Sume
429211276Sume/* Declarations for routines.  */
430211276Sume
431157779Sume/* To avoid duplicating every routine declaration -- once with a
432158115Sume   prototype (if we are ANSI), and once without (if we aren't) -- we
433158115Sume   use the following macro to declare argument types.  This
434158115Sume   unfortunately clutters up the declarations a bit, but I think it's
435158115Sume   worth it.  */
436158115Sume
437158115Sume#if __STDC__
438158115Sume
439211276Sume# define _RE_ARGS(args) args
440158115Sume
441158115Sume#else /* not __STDC__ */
442158115Sume
443158115Sume# define _RE_ARGS(args) ()
444157779Sume
445157779Sume#endif /* not __STDC__ */
446158115Sume
447158115Sume/* Sets the current default syntax to SYNTAX, and return the old syntax.
448145279Sume   You can also simply assign to the `re_syntax_options' variable.  */
449157779Sumeextern reg_syntax_t re_set_syntax _RE_ARGS ((reg_syntax_t syntax));
450158115Sume
451145279Sume/* Compile the regular expression PATTERN, with length LENGTH
452158115Sume   and syntax given by the global `re_syntax_options', into the buffer
453157779Sume   BUFFER.  Return NULL if successful, and an error string if not.  */
454158115Sumeextern const char *re_compile_pattern
455158115Sume  _RE_ARGS ((const char *pattern, size_t length,
456157779Sume             struct re_pattern_buffer *buffer));
457158115Sume
458145279Sume
459145279Sume/* Compile a fastmap for the compiled pattern in BUFFER; used to
460158115Sume   accelerate searches.  Return 0 if successful and -2 if was an
461158115Sume   internal error.  */
462145279Sumeextern int re_compile_fastmap _RE_ARGS ((struct re_pattern_buffer *buffer));
463157779Sume
464145279Sume
465157779Sume/* Search in the string STRING (with length LENGTH) for the pattern
466158115Sume   compiled into BUFFER.  Start searching at position START, for RANGE
467158115Sume   characters.  Return the starting position of the match, -1 for no
468157779Sume   match, or -2 for an internal error.  Also return register
469158115Sume   information in REGS (if REGS and BUFFER->no_sub are nonzero).  */
470145279Sumeextern int re_search
471145279Sume  _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string,
472158115Sume            int length, int start, int range, struct re_registers *regs));
473158115Sume
474158115Sume
475158115Sume/* Like `re_search', but search in the concatenation of STRING1 and
476158115Sume   STRING2.  Also, stop searching at index START + STOP.  */
477158115Sumeextern int re_search_2
478158115Sume  _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string1,
479158115Sume             int length1, const char *string2, int length2,
480158115Sume             int start, int range, struct re_registers *regs, int stop));
481158115Sume
482158115Sume
483158115Sume/* Like `re_search', but return how many characters in STRING the regexp
484158115Sume   in BUFFER matched, starting at position START.  */
485158115Sumeextern int re_match
486158115Sume  _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string,
487158115Sume             int length, int start, struct re_registers *regs));
488158115Sume
489158115Sume
490158115Sume/* Relates to `re_match' as `re_search_2' relates to `re_search'.  */
491158115Sumeextern int re_match_2
492158115Sume  _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string1,
493158115Sume             int length1, const char *string2, int length2,
494158115Sume             int start, struct re_registers *regs, int stop));
495211276Sume
496211276Sume
497213453Sume/* Set REGS to hold NUM_REGS registers, storing them in STARTS and
498211276Sume   ENDS.  Subsequent matches using BUFFER and REGS will use this memory
499211276Sume   for recording register information.  STARTS and ENDS must be
500158115Sume   allocated with malloc, and must each be at least `NUM_REGS * sizeof
501158115Sume   (regoff_t)' bytes long.
502158115Sume
503158115Sume   If NUM_REGS == 0, then subsequent matches should allocate their own
504158115Sume   register data.
505158115Sume
506158115Sume   Unless this function is called, the first search or match using
507158115Sume   PATTERN_BUFFER will allocate its own register data, without
508158115Sume   freeing the old data.  */
509158115Sumeextern void re_set_registers
510158115Sume  _RE_ARGS ((struct re_pattern_buffer *buffer, struct re_registers *regs,
511158115Sume             unsigned num_regs, regoff_t *starts, regoff_t *ends));
512158115Sume
513158115Sume#if defined _REGEX_RE_COMP || defined _LIBC
514158115Sume# ifndef _CRAY
515158115Sume/* 4.2 bsd compatibility.  */
516158115Sumeextern char *re_comp _RE_ARGS ((const char *));
517158115Sumeextern int re_exec _RE_ARGS ((const char *));
518158115Sume# endif
519158115Sume#endif
520158115Sume
521158115Sume/* GCC 2.95 and later have "__restrict"; C99 compilers have
522158115Sume   "restrict", and "configure" may have defined "restrict".  */
523158115Sume#ifndef __restrict
524158115Sume# if ! (2 < __GNUC__ || (2 == __GNUC__ && 95 <= __GNUC_MINOR__))
525158115Sume#  if defined restrict || 199901L <= __STDC_VERSION__
526158115Sume#   define __restrict restrict
527158115Sume#  else
528158115Sume#   define __restrict
529158115Sume#  endif
530158115Sume# endif
531158115Sume#endif
532158115Sume
533158115Sume/* GCC 3.1 and later support declaring arrays as non-overlapping
534158115Sume   using the syntax array_name[restrict]  */
535158115Sume#ifndef __restrict_arr
536158115Sume# if ! (3 < __GNUC__ || (3 == __GNUC__ && 1 <= __GNUC_MINOR__)) || defined (__GNUG__)
537158115Sume#  define __restrict_arr
538158115Sume# else
539158115Sume#  define __restrict_arr __restrict
540158115Sume# endif
541158115Sume#endif
542158115Sume
543145279Sume/* POSIX compatibility.  */
544145279Sumeextern int regcomp _RE_ARGS ((regex_t *__restrict __preg,
545145279Sume			      const char *__restrict __pattern,
546145279Sume			      int __cflags));
547157779Sume
548145279Sumeextern int regexec _RE_ARGS ((const regex_t *__restrict __preg,
549145279Sume			      const char *__restrict __string, size_t __nmatch,
550145279Sume			      regmatch_t __pmatch[__restrict_arr],
551157779Sume			      int __eflags));
552145279Sume
553157779Sumeextern size_t regerror _RE_ARGS ((int __errcode, const regex_t *__preg,
554145279Sume				  char *__errbuf, size_t __errbuf_size));
555
556extern void regfree _RE_ARGS ((regex_t *__preg));
557
558
559#ifdef __cplusplus
560}
561#endif	/* C++ */
562
563#endif /* regex.h */
564
565/*
566Local variables:
567make-backup-files: t
568version-control: t
569trim-versions-without-asking: nil
570End:
571*/
572