1126209Sache/* Definitions for data structures and routines for the regular
2146040Stjr   expression library.
3146040Stjr   Copyright (C) 1985,1989-93,1995-98,2000,2001,2002,2003
4146040Stjr   Free Software Foundation, Inc.
5146040Stjr   This file is part of the GNU C Library.
6126209Sache
7126209Sache   The GNU C Library is free software; you can redistribute it and/or
8146040Stjr   modify it under the terms of the GNU Lesser General Public
9146040Stjr   License as published by the Free Software Foundation; either
10146040Stjr   version 2.1 of the License, or (at your option) any later version.
11126209Sache
12126209Sache   The GNU C Library is distributed in the hope that it will be useful,
13126209Sache   but WITHOUT ANY WARRANTY; without even the implied warranty of
14126209Sache   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15146040Stjr   Lesser General Public License for more details.
16126209Sache
17146040Stjr   You should have received a copy of the GNU Lesser General Public
18146040Stjr   License along with the GNU C Library; if not, write to the Free
19146040Stjr   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
20146040Stjr   02111-1307 USA.  */
21126209Sache
22126209Sache#ifndef _REGEX_H
23126209Sache#define _REGEX_H 1
24126209Sache
25146040Stjr#include <sys/types.h>
26146040Stjr
27126209Sache/* Allow the use in C++ code.  */
28126209Sache#ifdef __cplusplus
29126209Sacheextern "C" {
30126209Sache#endif
31126209Sache
32126209Sache/* POSIX says that <sys/types.h> must be included (by the caller) before
33126209Sache   <regex.h>.  */
34126209Sache
35126209Sache#if !defined _POSIX_C_SOURCE && !defined _POSIX_SOURCE && defined VMS
36126209Sache/* VMS doesn't have `size_t' in <sys/types.h>, even though POSIX says it
37126209Sache   should be there.  */
38126209Sache# include <stddef.h>
39126209Sache#endif
40126209Sache
41126209Sache/* The following two types have to be signed and unsigned integer type
42126209Sache   wide enough to hold a value of a pointer.  For most ANSI compilers
43126209Sache   ptrdiff_t and size_t should be likely OK.  Still size of these two
44126209Sache   types is 2 for Microsoft C.  Ugh... */
45126209Sachetypedef long int s_reg_t;
46126209Sachetypedef unsigned long int active_reg_t;
47126209Sache
48126209Sache/* The following bits are used to determine the regexp syntax we
49126209Sache   recognize.  The set/not-set meanings are chosen so that Emacs syntax
50126209Sache   remains the value 0.  The bits are given in alphabetical order, and
51126209Sache   the definitions shifted by one from the previous bit; thus, when we
52126209Sache   add or remove a bit, only one other definition need change.  */
53126209Sachetypedef unsigned long int reg_syntax_t;
54126209Sache
55126209Sache/* If this bit is not set, then \ inside a bracket expression is literal.
56126209Sache   If set, then such a \ quotes the following character.  */
57126209Sache#define RE_BACKSLASH_ESCAPE_IN_LISTS ((unsigned long int) 1)
58126209Sache
59126209Sache/* If this bit is not set, then + and ? are operators, and \+ and \? are
60126209Sache     literals.
61126209Sache   If set, then \+ and \? are operators and + and ? are literals.  */
62126209Sache#define RE_BK_PLUS_QM (RE_BACKSLASH_ESCAPE_IN_LISTS << 1)
63126209Sache
64126209Sache/* If this bit is set, then character classes are supported.  They are:
65126209Sache     [:alpha:], [:upper:], [:lower:],  [:digit:], [:alnum:], [:xdigit:],
66126209Sache     [:space:], [:print:], [:punct:], [:graph:], and [:cntrl:].
67126209Sache   If not set, then character classes are not supported.  */
68126209Sache#define RE_CHAR_CLASSES (RE_BK_PLUS_QM << 1)
69126209Sache
70126209Sache/* If this bit is set, then ^ and $ are always anchors (outside bracket
71126209Sache     expressions, of course).
72126209Sache   If this bit is not set, then it depends:
73126209Sache        ^  is an anchor if it is at the beginning of a regular
74126209Sache           expression or after an open-group or an alternation operator;
75126209Sache        $  is an anchor if it is at the end of a regular expression, or
76126209Sache           before a close-group or an alternation operator.
77126209Sache
78126209Sache   This bit could be (re)combined with RE_CONTEXT_INDEP_OPS, because
79126209Sache   POSIX draft 11.2 says that * etc. in leading positions is undefined.
80126209Sache   We already implemented a previous draft which made those constructs
81126209Sache   invalid, though, so we haven't changed the code back.  */
82126209Sache#define RE_CONTEXT_INDEP_ANCHORS (RE_CHAR_CLASSES << 1)
83126209Sache
84126209Sache/* If this bit is set, then special characters are always special
85126209Sache     regardless of where they are in the pattern.
86126209Sache   If this bit is not set, then special characters are special only in
87126209Sache     some contexts; otherwise they are ordinary.  Specifically,
88126209Sache     * + ? and intervals are only special when not after the beginning,
89126209Sache     open-group, or alternation operator.  */
90126209Sache#define RE_CONTEXT_INDEP_OPS (RE_CONTEXT_INDEP_ANCHORS << 1)
91126209Sache
92126209Sache/* If this bit is set, then *, +, ?, and { cannot be first in an re or
93126209Sache     immediately after an alternation or begin-group operator.  */
94126209Sache#define RE_CONTEXT_INVALID_OPS (RE_CONTEXT_INDEP_OPS << 1)
95126209Sache
96126209Sache/* If this bit is set, then . matches newline.
97126209Sache   If not set, then it doesn't.  */
98126209Sache#define RE_DOT_NEWLINE (RE_CONTEXT_INVALID_OPS << 1)
99126209Sache
100126209Sache/* If this bit is set, then . doesn't match NUL.
101126209Sache   If not set, then it does.  */
102126209Sache#define RE_DOT_NOT_NULL (RE_DOT_NEWLINE << 1)
103126209Sache
104126209Sache/* If this bit is set, nonmatching lists [^...] do not match newline.
105126209Sache   If not set, they do.  */
106126209Sache#define RE_HAT_LISTS_NOT_NEWLINE (RE_DOT_NOT_NULL << 1)
107126209Sache
108126209Sache/* If this bit is set, either \{...\} or {...} defines an
109126209Sache     interval, depending on RE_NO_BK_BRACES.
110126209Sache   If not set, \{, \}, {, and } are literals.  */
111126209Sache#define RE_INTERVALS (RE_HAT_LISTS_NOT_NEWLINE << 1)
112126209Sache
113126209Sache/* If this bit is set, +, ? and | aren't recognized as operators.
114126209Sache   If not set, they are.  */
115126209Sache#define RE_LIMITED_OPS (RE_INTERVALS << 1)
116126209Sache
117126209Sache/* If this bit is set, newline is an alternation operator.
118126209Sache   If not set, newline is literal.  */
119126209Sache#define RE_NEWLINE_ALT (RE_LIMITED_OPS << 1)
120126209Sache
121126209Sache/* If this bit is set, then `{...}' defines an interval, and \{ and \}
122126209Sache     are literals.
123126209Sache  If not set, then `\{...\}' defines an interval.  */
124126209Sache#define RE_NO_BK_BRACES (RE_NEWLINE_ALT << 1)
125126209Sache
126126209Sache/* If this bit is set, (...) defines a group, and \( and \) are literals.
127126209Sache   If not set, \(...\) defines a group, and ( and ) are literals.  */
128126209Sache#define RE_NO_BK_PARENS (RE_NO_BK_BRACES << 1)
129126209Sache
130126209Sache/* If this bit is set, then \<digit> matches <digit>.
131126209Sache   If not set, then \<digit> is a back-reference.  */
132126209Sache#define RE_NO_BK_REFS (RE_NO_BK_PARENS << 1)
133126209Sache
134126209Sache/* If this bit is set, then | is an alternation operator, and \| is literal.
135126209Sache   If not set, then \| is an alternation operator, and | is literal.  */
136126209Sache#define RE_NO_BK_VBAR (RE_NO_BK_REFS << 1)
137126209Sache
138126209Sache/* If this bit is set, then an ending range point collating higher
139126209Sache     than the starting range point, as in [z-a], is invalid.
140126209Sache   If not set, then when ending range point collates higher than the
141126209Sache     starting range point, the range is ignored.  */
142126209Sache#define RE_NO_EMPTY_RANGES (RE_NO_BK_VBAR << 1)
143126209Sache
144126209Sache/* If this bit is set, then an unmatched ) is ordinary.
145126209Sache   If not set, then an unmatched ) is invalid.  */
146126209Sache#define RE_UNMATCHED_RIGHT_PAREN_ORD (RE_NO_EMPTY_RANGES << 1)
147126209Sache
148126209Sache/* If this bit is set, succeed as soon as we match the whole pattern,
149126209Sache   without further backtracking.  */
150126209Sache#define RE_NO_POSIX_BACKTRACKING (RE_UNMATCHED_RIGHT_PAREN_ORD << 1)
151126209Sache
152126209Sache/* If this bit is set, do not process the GNU regex operators.
153126209Sache   If not set, then the GNU regex operators are recognized. */
154126209Sache#define RE_NO_GNU_OPS (RE_NO_POSIX_BACKTRACKING << 1)
155126209Sache
156126209Sache/* If this bit is set, turn on internal regex debugging.
157126209Sache   If not set, and debugging was on, turn it off.
158126209Sache   This only works if regex.c is compiled -DDEBUG.
159126209Sache   We define this bit always, so that all that's needed to turn on
160126209Sache   debugging is to recompile regex.c; the calling code can always have
161126209Sache   this bit set, and it won't affect anything in the normal case. */
162126209Sache#define RE_DEBUG (RE_NO_GNU_OPS << 1)
163126209Sache
164131543Stjr/* If this bit is set, a syntactically invalid interval is treated as
165131543Stjr   a string of ordinary characters.  For example, the ERE 'a{1' is
166131543Stjr   treated as 'a\{1'.  */
167131543Stjr#define RE_INVALID_INTERVAL_ORD (RE_DEBUG << 1)
168131543Stjr
169146040Stjr/* If this bit is set, then ignore case when matching.
170146040Stjr   If not set, then case is significant.  */
171146040Stjr#define RE_ICASE (RE_INVALID_INTERVAL_ORD << 1)
172146040Stjr
173146040Stjr/* This bit is used internally like RE_CONTEXT_INDEP_ANCHORS but only
174146040Stjr   for ^, because it is difficult to scan the regex backwards to find
175146040Stjr   whether ^ should be special.  */
176146040Stjr#define RE_CARET_ANCHORS_HERE (RE_ICASE << 1)
177146040Stjr
178146040Stjr/* If this bit is set, then \{ cannot be first in an bre or
179146040Stjr   immediately after an alternation or begin-group operator.  */
180146040Stjr#define RE_CONTEXT_INVALID_DUP (RE_CARET_ANCHORS_HERE << 1)
181146040Stjr
182146040Stjr/* If this bit is set, then no_sub will be set to 1 during
183146040Stjr   re_compile_pattern.  */
184146040Stjr#define RE_NO_SUB (RE_CONTEXT_INVALID_DUP << 1)
185146040Stjr
186126209Sache/* This global variable defines the particular regexp syntax to use (for
187126209Sache   some interfaces).  When a regexp is compiled, the syntax used is
188126209Sache   stored in the pattern buffer, so changing this does not affect
189126209Sache   already-compiled regexps.  */
190126209Sacheextern reg_syntax_t re_syntax_options;
191126209Sache
192126209Sache/* Define combinations of the above bits for the standard possibilities.
193126209Sache   (The [[[ comments delimit what gets put into the Texinfo file, so
194126209Sache   don't delete them!)  */
195126209Sache/* [[[begin syntaxes]]] */
196126209Sache#define RE_SYNTAX_EMACS 0
197126209Sache
198126209Sache#define RE_SYNTAX_AWK							\
199126209Sache  (RE_BACKSLASH_ESCAPE_IN_LISTS   | RE_DOT_NOT_NULL			\
200126209Sache   | RE_NO_BK_PARENS              | RE_NO_BK_REFS			\
201126209Sache   | RE_NO_BK_VBAR                | RE_NO_EMPTY_RANGES			\
202126209Sache   | RE_DOT_NEWLINE		  | RE_CONTEXT_INDEP_ANCHORS		\
203126209Sache   | RE_UNMATCHED_RIGHT_PAREN_ORD | RE_NO_GNU_OPS)
204126209Sache
205126209Sache#define RE_SYNTAX_GNU_AWK						\
206126209Sache  ((RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS | RE_DEBUG)	\
207146040Stjr   & ~(RE_DOT_NOT_NULL | RE_INTERVALS | RE_CONTEXT_INDEP_OPS		\
208146040Stjr       | RE_CONTEXT_INVALID_OPS ))
209126209Sache
210126209Sache#define RE_SYNTAX_POSIX_AWK 						\
211126209Sache  (RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS		\
212126209Sache   | RE_INTERVALS	    | RE_NO_GNU_OPS)
213126209Sache
214126209Sache#define RE_SYNTAX_GREP							\
215126209Sache  (RE_BK_PLUS_QM              | RE_CHAR_CLASSES				\
216126209Sache   | RE_HAT_LISTS_NOT_NEWLINE | RE_INTERVALS				\
217126209Sache   | RE_NEWLINE_ALT)
218126209Sache
219126209Sache#define RE_SYNTAX_EGREP							\
220126209Sache  (RE_CHAR_CLASSES        | RE_CONTEXT_INDEP_ANCHORS			\
221126209Sache   | RE_CONTEXT_INDEP_OPS | RE_HAT_LISTS_NOT_NEWLINE			\
222126209Sache   | RE_NEWLINE_ALT       | RE_NO_BK_PARENS				\
223126209Sache   | RE_NO_BK_VBAR)
224126209Sache
225126209Sache#define RE_SYNTAX_POSIX_EGREP						\
226131543Stjr  (RE_SYNTAX_EGREP | RE_INTERVALS | RE_NO_BK_BRACES			\
227131543Stjr   | RE_INVALID_INTERVAL_ORD)
228126209Sache
229126209Sache/* P1003.2/D11.2, section 4.20.7.1, lines 5078ff.  */
230126209Sache#define RE_SYNTAX_ED RE_SYNTAX_POSIX_BASIC
231126209Sache
232126209Sache#define RE_SYNTAX_SED RE_SYNTAX_POSIX_BASIC
233126209Sache
234126209Sache/* Syntax bits common to both basic and extended POSIX regex syntax.  */
235126209Sache#define _RE_SYNTAX_POSIX_COMMON						\
236126209Sache  (RE_CHAR_CLASSES | RE_DOT_NEWLINE      | RE_DOT_NOT_NULL		\
237126209Sache   | RE_INTERVALS  | RE_NO_EMPTY_RANGES)
238126209Sache
239126209Sache#define RE_SYNTAX_POSIX_BASIC						\
240146040Stjr  (_RE_SYNTAX_POSIX_COMMON | RE_BK_PLUS_QM | RE_CONTEXT_INVALID_DUP)
241126209Sache
242126209Sache/* Differs from ..._POSIX_BASIC only in that RE_BK_PLUS_QM becomes
243126209Sache   RE_LIMITED_OPS, i.e., \? \+ \| are not recognized.  Actually, this
244126209Sache   isn't minimal, since other operators, such as \`, aren't disabled.  */
245126209Sache#define RE_SYNTAX_POSIX_MINIMAL_BASIC					\
246126209Sache  (_RE_SYNTAX_POSIX_COMMON | RE_LIMITED_OPS)
247126209Sache
248126209Sache#define RE_SYNTAX_POSIX_EXTENDED					\
249131543Stjr  (_RE_SYNTAX_POSIX_COMMON  | RE_CONTEXT_INDEP_ANCHORS			\
250131543Stjr   | RE_CONTEXT_INDEP_OPS   | RE_NO_BK_BRACES				\
251131543Stjr   | RE_NO_BK_PARENS        | RE_NO_BK_VBAR				\
252131543Stjr   | RE_CONTEXT_INVALID_OPS | RE_UNMATCHED_RIGHT_PAREN_ORD)
253126209Sache
254131543Stjr/* Differs from ..._POSIX_EXTENDED in that RE_CONTEXT_INDEP_OPS is
255131543Stjr   removed and RE_NO_BK_REFS is added.  */
256126209Sache#define RE_SYNTAX_POSIX_MINIMAL_EXTENDED				\
257126209Sache  (_RE_SYNTAX_POSIX_COMMON  | RE_CONTEXT_INDEP_ANCHORS			\
258126209Sache   | RE_CONTEXT_INVALID_OPS | RE_NO_BK_BRACES				\
259126209Sache   | RE_NO_BK_PARENS        | RE_NO_BK_REFS				\
260126209Sache   | RE_NO_BK_VBAR	    | RE_UNMATCHED_RIGHT_PAREN_ORD)
261126209Sache/* [[[end syntaxes]]] */
262126209Sache
263126209Sache/* Maximum number of duplicates an interval can allow.  Some systems
264126209Sache   (erroneously) define this in other header files, but we want our
265126209Sache   value, so remove any previous define.  */
266126209Sache#ifdef RE_DUP_MAX
267126209Sache# undef RE_DUP_MAX
268126209Sache#endif
269126209Sache/* If sizeof(int) == 2, then ((1 << 15) - 1) overflows.  */
270126209Sache#define RE_DUP_MAX (0x7fff)
271126209Sache
272126209Sache
273126209Sache/* POSIX `cflags' bits (i.e., information for `regcomp').  */
274126209Sache
275126209Sache/* If this bit is set, then use extended regular expression syntax.
276126209Sache   If not set, then use basic regular expression syntax.  */
277126209Sache#define REG_EXTENDED 1
278126209Sache
279126209Sache/* If this bit is set, then ignore case when matching.
280126209Sache   If not set, then case is significant.  */
281126209Sache#define REG_ICASE (REG_EXTENDED << 1)
282126209Sache
283126209Sache/* If this bit is set, then anchors do not match at newline
284126209Sache     characters in the string.
285126209Sache   If not set, then anchors do match at newlines.  */
286126209Sache#define REG_NEWLINE (REG_ICASE << 1)
287126209Sache
288126209Sache/* If this bit is set, then report only success or fail in regexec.
289126209Sache   If not set, then returns differ between not matching and errors.  */
290126209Sache#define REG_NOSUB (REG_NEWLINE << 1)
291126209Sache
292126209Sache
293126209Sache/* POSIX `eflags' bits (i.e., information for regexec).  */
294126209Sache
295126209Sache/* If this bit is set, then the beginning-of-line operator doesn't match
296126209Sache     the beginning of the string (presumably because it's not the
297126209Sache     beginning of a line).
298126209Sache   If not set, then the beginning-of-line operator does match the
299126209Sache     beginning of the string.  */
300126209Sache#define REG_NOTBOL 1
301126209Sache
302126209Sache/* Like REG_NOTBOL, except for the end-of-line.  */
303126209Sache#define REG_NOTEOL (1 << 1)
304126209Sache
305146040Stjr/* Use PMATCH[0] to delimit the start and end of the search in the
306146040Stjr   buffer.  */
307146040Stjr#define REG_STARTEND (1 << 2)
308126209Sache
309146040Stjr
310126209Sache/* If any error codes are removed, changed, or added, update the
311126209Sache   `re_error_msg' table in regex.c.  */
312126209Sachetypedef enum
313126209Sache{
314126209Sache#ifdef _XOPEN_SOURCE
315126209Sache  REG_ENOSYS = -1,	/* This will never happen for this implementation.  */
316126209Sache#endif
317126209Sache
318126209Sache  REG_NOERROR = 0,	/* Success.  */
319126209Sache  REG_NOMATCH,		/* Didn't find a match (for regexec).  */
320126209Sache
321126209Sache  /* POSIX regcomp return error codes.  (In the order listed in the
322126209Sache     standard.)  */
323126209Sache  REG_BADPAT,		/* Invalid pattern.  */
324146040Stjr  REG_ECOLLATE,		/* Inalid collating element.  */
325126209Sache  REG_ECTYPE,		/* Invalid character class name.  */
326126209Sache  REG_EESCAPE,		/* Trailing backslash.  */
327126209Sache  REG_ESUBREG,		/* Invalid back reference.  */
328126209Sache  REG_EBRACK,		/* Unmatched left bracket.  */
329126209Sache  REG_EPAREN,		/* Parenthesis imbalance.  */
330126209Sache  REG_EBRACE,		/* Unmatched \{.  */
331126209Sache  REG_BADBR,		/* Invalid contents of \{\}.  */
332126209Sache  REG_ERANGE,		/* Invalid range end.  */
333126209Sache  REG_ESPACE,		/* Ran out of memory.  */
334126209Sache  REG_BADRPT,		/* No preceding re for repetition op.  */
335126209Sache
336126209Sache  /* Error codes we've added.  */
337126209Sache  REG_EEND,		/* Premature end.  */
338126209Sache  REG_ESIZE,		/* Compiled pattern bigger than 2^16 bytes.  */
339126209Sache  REG_ERPAREN		/* Unmatched ) or \); not returned from regcomp.  */
340126209Sache} reg_errcode_t;
341126209Sache
342126209Sache/* This data structure represents a compiled pattern.  Before calling
343126209Sache   the pattern compiler, the fields `buffer', `allocated', `fastmap',
344126209Sache   `translate', and `no_sub' can be set.  After the pattern has been
345126209Sache   compiled, the `re_nsub' field is available.  All other fields are
346126209Sache   private to the regex routines.  */
347126209Sache
348126209Sache#ifndef RE_TRANSLATE_TYPE
349126209Sache# define RE_TRANSLATE_TYPE char *
350126209Sache#endif
351126209Sache
352126209Sachestruct re_pattern_buffer
353126209Sache{
354126209Sache/* [[[begin pattern_buffer]]] */
355126209Sache	/* Space that holds the compiled pattern.  It is declared as
356126209Sache          `unsigned char *' because its elements are
357126209Sache           sometimes used as array indexes.  */
358126209Sache  unsigned char *buffer;
359126209Sache
360126209Sache	/* Number of bytes to which `buffer' points.  */
361126209Sache  unsigned long int allocated;
362126209Sache
363126209Sache	/* Number of bytes actually used in `buffer'.  */
364126209Sache  unsigned long int used;
365126209Sache
366126209Sache        /* Syntax setting with which the pattern was compiled.  */
367126209Sache  reg_syntax_t syntax;
368126209Sache
369126209Sache        /* Pointer to a fastmap, if any, otherwise zero.  re_search uses
370126209Sache           the fastmap, if there is one, to skip over impossible
371126209Sache           starting points for matches.  */
372126209Sache  char *fastmap;
373126209Sache
374126209Sache        /* Either a translate table to apply to all characters before
375126209Sache           comparing them, or zero for no translation.  The translation
376126209Sache           is applied to a pattern when it is compiled and to a string
377126209Sache           when it is matched.  */
378126209Sache  RE_TRANSLATE_TYPE translate;
379126209Sache
380126209Sache	/* Number of subexpressions found by the compiler.  */
381126209Sache  size_t re_nsub;
382126209Sache
383126209Sache        /* Zero if this pattern cannot match the empty string, one else.
384126209Sache           Well, in truth it's used only in `re_search_2', to see
385126209Sache           whether or not we should use the fastmap, so we don't set
386126209Sache           this absolutely perfectly; see `re_compile_fastmap' (the
387126209Sache           `duplicate' case).  */
388126209Sache  unsigned can_be_null : 1;
389126209Sache
390126209Sache        /* If REGS_UNALLOCATED, allocate space in the `regs' structure
391126209Sache             for `max (RE_NREGS, re_nsub + 1)' groups.
392126209Sache           If REGS_REALLOCATE, reallocate space if necessary.
393126209Sache           If REGS_FIXED, use what's there.  */
394126209Sache#define REGS_UNALLOCATED 0
395126209Sache#define REGS_REALLOCATE 1
396126209Sache#define REGS_FIXED 2
397126209Sache  unsigned regs_allocated : 2;
398126209Sache
399126209Sache        /* Set to zero when `regex_compile' compiles a pattern; set to one
400126209Sache           by `re_compile_fastmap' if it updates the fastmap.  */
401126209Sache  unsigned fastmap_accurate : 1;
402126209Sache
403126209Sache        /* If set, `re_match_2' does not return information about
404126209Sache           subexpressions.  */
405126209Sache  unsigned no_sub : 1;
406126209Sache
407126209Sache        /* If set, a beginning-of-line anchor doesn't match at the
408126209Sache           beginning of the string.  */
409126209Sache  unsigned not_bol : 1;
410126209Sache
411126209Sache        /* Similarly for an end-of-line anchor.  */
412126209Sache  unsigned not_eol : 1;
413126209Sache
414126209Sache        /* If true, an anchor at a newline matches.  */
415126209Sache  unsigned newline_anchor : 1;
416126209Sache
417126209Sache/* [[[end pattern_buffer]]] */
418126209Sache};
419126209Sache
420126209Sachetypedef struct re_pattern_buffer regex_t;
421126209Sache
422126209Sache/* Type for byte offsets within the string.  POSIX mandates this.  */
423126209Sachetypedef int regoff_t;
424126209Sache
425126209Sache
426126209Sache/* This is the structure we store register match data in.  See
427126209Sache   regex.texinfo for a full description of what registers match.  */
428126209Sachestruct re_registers
429126209Sache{
430126209Sache  unsigned num_regs;
431126209Sache  regoff_t *start;
432126209Sache  regoff_t *end;
433126209Sache};
434126209Sache
435126209Sache
436126209Sache/* If `regs_allocated' is REGS_UNALLOCATED in the pattern buffer,
437126209Sache   `re_match_2' returns information about at least this many registers
438126209Sache   the first time a `regs' structure is passed.  */
439126209Sache#ifndef RE_NREGS
440126209Sache# define RE_NREGS 30
441126209Sache#endif
442126209Sache
443126209Sache
444126209Sache/* POSIX specification for registers.  Aside from the different names than
445126209Sache   `re_registers', POSIX uses an array of structures, instead of a
446126209Sache   structure of arrays.  */
447126209Sachetypedef struct
448126209Sache{
449126209Sache  regoff_t rm_so;  /* Byte offset from string's start to substring's start.  */
450126209Sache  regoff_t rm_eo;  /* Byte offset from string's start to substring's end.  */
451126209Sache} regmatch_t;
452126209Sache
453126209Sache/* Declarations for routines.  */
454126209Sache
455126209Sache/* To avoid duplicating every routine declaration -- once with a
456126209Sache   prototype (if we are ANSI), and once without (if we aren't) -- we
457126209Sache   use the following macro to declare argument types.  This
458126209Sache   unfortunately clutters up the declarations a bit, but I think it's
459126209Sache   worth it.  */
460126209Sache
461126209Sache#if __STDC__
462126209Sache
463126209Sache# define _RE_ARGS(args) args
464126209Sache
465126209Sache#else /* not __STDC__ */
466126209Sache
467126209Sache# define _RE_ARGS(args) ()
468126209Sache
469126209Sache#endif /* not __STDC__ */
470126209Sache
471126209Sache/* Sets the current default syntax to SYNTAX, and return the old syntax.
472126209Sache   You can also simply assign to the `re_syntax_options' variable.  */
473126209Sacheextern reg_syntax_t re_set_syntax _RE_ARGS ((reg_syntax_t syntax));
474126209Sache
475126209Sache/* Compile the regular expression PATTERN, with length LENGTH
476126209Sache   and syntax given by the global `re_syntax_options', into the buffer
477126209Sache   BUFFER.  Return NULL if successful, and an error string if not.  */
478126209Sacheextern const char *re_compile_pattern
479126209Sache  _RE_ARGS ((const char *pattern, size_t length,
480126209Sache             struct re_pattern_buffer *buffer));
481126209Sache
482126209Sache
483126209Sache/* Compile a fastmap for the compiled pattern in BUFFER; used to
484126209Sache   accelerate searches.  Return 0 if successful and -2 if was an
485126209Sache   internal error.  */
486126209Sacheextern int re_compile_fastmap _RE_ARGS ((struct re_pattern_buffer *buffer));
487126209Sache
488126209Sache
489126209Sache/* Search in the string STRING (with length LENGTH) for the pattern
490126209Sache   compiled into BUFFER.  Start searching at position START, for RANGE
491126209Sache   characters.  Return the starting position of the match, -1 for no
492126209Sache   match, or -2 for an internal error.  Also return register
493126209Sache   information in REGS (if REGS and BUFFER->no_sub are nonzero).  */
494126209Sacheextern int re_search
495126209Sache  _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string,
496126209Sache            int length, int start, int range, struct re_registers *regs));
497126209Sache
498126209Sache
499126209Sache/* Like `re_search', but search in the concatenation of STRING1 and
500126209Sache   STRING2.  Also, stop searching at index START + STOP.  */
501126209Sacheextern int re_search_2
502126209Sache  _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string1,
503126209Sache             int length1, const char *string2, int length2,
504126209Sache             int start, int range, struct re_registers *regs, int stop));
505126209Sache
506126209Sache
507126209Sache/* Like `re_search', but return how many characters in STRING the regexp
508126209Sache   in BUFFER matched, starting at position START.  */
509126209Sacheextern int re_match
510126209Sache  _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string,
511126209Sache             int length, int start, struct re_registers *regs));
512126209Sache
513126209Sache
514126209Sache/* Relates to `re_match' as `re_search_2' relates to `re_search'.  */
515126209Sacheextern int re_match_2
516126209Sache  _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string1,
517126209Sache             int length1, const char *string2, int length2,
518126209Sache             int start, struct re_registers *regs, int stop));
519126209Sache
520126209Sache
521126209Sache/* Set REGS to hold NUM_REGS registers, storing them in STARTS and
522126209Sache   ENDS.  Subsequent matches using BUFFER and REGS will use this memory
523126209Sache   for recording register information.  STARTS and ENDS must be
524126209Sache   allocated with malloc, and must each be at least `NUM_REGS * sizeof
525126209Sache   (regoff_t)' bytes long.
526126209Sache
527126209Sache   If NUM_REGS == 0, then subsequent matches should allocate their own
528126209Sache   register data.
529126209Sache
530126209Sache   Unless this function is called, the first search or match using
531126209Sache   PATTERN_BUFFER will allocate its own register data, without
532126209Sache   freeing the old data.  */
533126209Sacheextern void re_set_registers
534126209Sache  _RE_ARGS ((struct re_pattern_buffer *buffer, struct re_registers *regs,
535126209Sache             unsigned num_regs, regoff_t *starts, regoff_t *ends));
536126209Sache
537126209Sache#if defined _REGEX_RE_COMP || defined _LIBC
538126209Sache# ifndef _CRAY
539126209Sache/* 4.2 bsd compatibility.  */
540126209Sacheextern char *re_comp _RE_ARGS ((const char *));
541126209Sacheextern int re_exec _RE_ARGS ((const char *));
542126209Sache# endif
543126209Sache#endif
544126209Sache
545131543Stjr/* GCC 2.95 and later have "__restrict"; C99 compilers have
546131543Stjr   "restrict", and "configure" may have defined "restrict".  */
547131543Stjr#ifndef __restrict
548131543Stjr# if ! (2 < __GNUC__ || (2 == __GNUC__ && 95 <= __GNUC_MINOR__))
549131543Stjr#  if defined restrict || 199901L <= __STDC_VERSION__
550131543Stjr#   define __restrict restrict
551131543Stjr#  else
552131543Stjr#   define __restrict
553131543Stjr#  endif
554131543Stjr# endif
555131543Stjr#endif
556146040Stjr/* gcc 3.1 and up support the [restrict] syntax.  */
557146040Stjr#ifndef __restrict_arr
558146040Stjr# if __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1)
559146040Stjr#  define __restrict_arr __restrict
560146040Stjr# else
561146040Stjr#  define __restrict_arr
562146040Stjr# endif
563146040Stjr#endif
564131543Stjr
565126209Sache/* POSIX compatibility.  */
566131543Stjrextern int regcomp _RE_ARGS ((regex_t *__restrict __preg,
567131543Stjr			      const char *__restrict __pattern,
568126209Sache			      int __cflags));
569126209Sache
570131543Stjrextern int regexec _RE_ARGS ((const regex_t *__restrict __preg,
571131543Stjr			      const char *__restrict __string, size_t __nmatch,
572131543Stjr			      regmatch_t __pmatch[__restrict_arr],
573131543Stjr			      int __eflags));
574126209Sache
575126209Sacheextern size_t regerror _RE_ARGS ((int __errcode, const regex_t *__preg,
576126209Sache				  char *__errbuf, size_t __errbuf_size));
577126209Sache
578126209Sacheextern void regfree _RE_ARGS ((regex_t *__preg));
579126209Sache
580126209Sache
581126209Sache#ifdef __cplusplus
582126209Sache}
583126209Sache#endif	/* C++ */
584126209Sache
585126209Sache#endif /* regex.h */
586126209Sache
587126209Sache/*
588126209SacheLocal variables:
589126209Sachemake-backup-files: t
590126209Sacheversion-control: t
591126209Sachetrim-versions-without-asking: nil
592126209SacheEnd:
593126209Sache*/
594