1#ifndef _REGEX_H_
2#define	_REGEX_H_	/* never again */
3/*
4 * regular expressions
5 *
6 * Copyright (c) 1998, 1999 Henry Spencer.  All rights reserved.
7 *
8 * Development of this software was funded, in part, by Cray Research Inc.,
9 * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics
10 * Corporation, none of whom are responsible for the results.  The author
11 * thanks all of them.
12 *
13 * Redistribution and use in source and binary forms -- with or without
14 * modification -- are permitted for any purpose, provided that
15 * redistributions in source form retain this entire copyright notice and
16 * indicate the origin and nature of any modifications.
17 *
18 * I'd appreciate being given credit for this package in the documentation
19 * of software which uses it, but that is not a requirement.
20 *
21 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
22 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
23 * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL
24 * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
25 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
26 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
27 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
28 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
29 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
30 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 *
32 *
33 *
34 * Prototypes etc. marked with "^" within comments get gathered up (and
35 * possibly edited) by the regfwd program and inserted near the bottom of
36 * this file.
37 *
38 * We offer the option of declaring one wide-character version of the
39 * RE functions as well as the char versions.  To do that, define
40 * __REG_WIDE_T to the type of wide characters (unfortunately, there
41 * is no consensus that wchar_t is suitable) and __REG_WIDE_COMPILE and
42 * __REG_WIDE_EXEC to the names to be used for the compile and execute
43 * functions (suggestion:  re_Xcomp and re_Xexec, where X is a letter
44 * suggestive of the wide type, e.g. re_ucomp and re_uexec for Unicode).
45 * For cranky old compilers, it may be necessary to do something like:
46 * #define	__REG_WIDE_COMPILE(a,b,c,d)	re_Xcomp(a,b,c,d)
47 * #define	__REG_WIDE_EXEC(a,b,c,d,e,f,g)	re_Xexec(a,b,c,d,e,f,g)
48 * rather than just #defining the names as parameterless macros.
49 *
50 * For some specialized purposes, it may be desirable to suppress the
51 * declarations of the "front end" functions, regcomp() and regexec(),
52 * or of the char versions of the compile and execute functions.  To
53 * suppress the front-end functions, define __REG_NOFRONT.  To suppress
54 * the char versions, define __REG_NOCHAR.
55 *
56 * The right place to do those defines (and some others you may want, see
57 * below) would be <sys/types.h>.  If you don't have control of that file,
58 * the right place to add your own defines to this file is marked below.
59 * This is normally done automatically, by the makefile and regmkhdr, based
60 * on the contents of regcustom.h.
61 */
62
63
64
65/*
66 * voodoo for C++
67 */
68#ifdef __cplusplus
69extern "C" {
70#endif
71
72
73
74/*
75 * Add your own defines, if needed, here.
76 */
77
78
79
80/*
81 * Location where a chunk of regcustom.h is automatically spliced into
82 * this file (working from its prototype, regproto.h).
83 */
84/* --- begin --- */
85/* ensure certain things don't sneak in from system headers */
86#ifdef __REG_WIDE_T
87#undef __REG_WIDE_T
88#endif
89#ifdef __REG_WIDE_COMPILE
90#undef __REG_WIDE_COMPILE
91#endif
92#ifdef __REG_WIDE_EXEC
93#undef __REG_WIDE_EXEC
94#endif
95#ifdef __REG_REGOFF_T
96#undef __REG_REGOFF_T
97#endif
98#ifdef __REG_VOID_T
99#undef __REG_VOID_T
100#endif
101#ifdef __REG_CONST
102#undef __REG_CONST
103#endif
104#ifdef __REG_NOFRONT
105#undef __REG_NOFRONT
106#endif
107#ifdef __REG_NOCHAR
108#undef __REG_NOCHAR
109#endif
110/* interface types */
111#define	__REG_WIDE_T	Tcl_UniChar
112#define	__REG_REGOFF_T	long	/* not really right, but good enough... */
113#define	__REG_VOID_T	VOID
114#define	__REG_CONST	CONST
115/* names and declarations */
116#define	__REG_WIDE_COMPILE	TclReComp
117#define	__REG_WIDE_EXEC		TclReExec
118#define	__REG_NOFRONT		/* don't want regcomp() and regexec() */
119#define	__REG_NOCHAR		/* or the char versions */
120#define	regfree		TclReFree
121#define	regerror	TclReError
122/* --- end --- */
123
124
125/*
126 * interface types etc.
127 */
128
129/*
130 * regoff_t has to be large enough to hold either off_t or ssize_t,
131 * and must be signed; it's only a guess that long is suitable, so we
132 * offer <sys/types.h> an override.
133 */
134#ifdef __REG_REGOFF_T
135typedef __REG_REGOFF_T regoff_t;
136#else
137typedef long regoff_t;
138#endif
139
140/*
141 * For benefit of old compilers, we offer <sys/types.h> the option of
142 * overriding the `void' type used to declare nonexistent return types.
143 */
144#ifdef __REG_VOID_T
145typedef __REG_VOID_T re_void;
146#else
147typedef void re_void;
148#endif
149
150/*
151 * Also for benefit of old compilers, <sys/types.h> can supply a macro
152 * which expands to a substitute for `const'.
153 */
154#ifndef __REG_CONST
155#define	__REG_CONST	const
156#endif
157
158
159
160/*
161 * other interface types
162 */
163
164/* the biggie, a compiled RE (or rather, a front end to same) */
165typedef struct {
166	int re_magic;		/* magic number */
167	size_t re_nsub;		/* number of subexpressions */
168	long re_info;		/* information about RE */
169#		define	REG_UBACKREF		000001
170#		define	REG_ULOOKAHEAD		000002
171#		define	REG_UBOUNDS		000004
172#		define	REG_UBRACES		000010
173#		define	REG_UBSALNUM		000020
174#		define	REG_UPBOTCH		000040
175#		define	REG_UBBS		000100
176#		define	REG_UNONPOSIX		000200
177#		define	REG_UUNSPEC		000400
178#		define	REG_UUNPORT		001000
179#		define	REG_ULOCALE		002000
180#		define	REG_UEMPTYMATCH		004000
181#		define	REG_UIMPOSSIBLE		010000
182#		define	REG_USHORTEST		020000
183	int re_csize;		/* sizeof(character) */
184	char *re_endp;		/* backward compatibility kludge */
185	/* the rest is opaque pointers to hidden innards */
186	char *re_guts;		/* `char *' is more portable than `void *' */
187	char *re_fns;
188} regex_t;
189
190/* result reporting (may acquire more fields later) */
191typedef struct {
192	regoff_t rm_so;		/* start of substring */
193	regoff_t rm_eo;		/* end of substring */
194} regmatch_t;
195
196/* supplementary control and reporting */
197typedef struct {
198	regmatch_t rm_extend;	/* see REG_EXPECT */
199} rm_detail_t;
200
201
202
203/*
204 * compilation
205 ^ #ifndef __REG_NOCHAR
206 ^ int re_comp(regex_t *, __REG_CONST char *, size_t, int);
207 ^ #endif
208 ^ #ifndef __REG_NOFRONT
209 ^ int regcomp(regex_t *, __REG_CONST char *, int);
210 ^ #endif
211 ^ #ifdef __REG_WIDE_T
212 ^ int __REG_WIDE_COMPILE(regex_t *, __REG_CONST __REG_WIDE_T *, size_t, int);
213 ^ #endif
214 */
215#define	REG_BASIC	000000	/* BREs (convenience) */
216#define	REG_EXTENDED	000001	/* EREs */
217#define	REG_ADVF	000002	/* advanced features in EREs */
218#define	REG_ADVANCED	000003	/* AREs (which are also EREs) */
219#define	REG_QUOTE	000004	/* no special characters, none */
220#define	REG_NOSPEC	REG_QUOTE	/* historical synonym */
221#define	REG_ICASE	000010	/* ignore case */
222#define	REG_NOSUB	000020	/* don't care about subexpressions */
223#define	REG_EXPANDED	000040	/* expanded format, white space & comments */
224#define	REG_NLSTOP	000100	/* \n doesn't match . or [^ ] */
225#define	REG_NLANCH	000200	/* ^ matches after \n, $ before */
226#define	REG_NEWLINE	000300	/* newlines are line terminators */
227#define	REG_PEND	000400	/* ugh -- backward-compatibility hack */
228#define	REG_EXPECT	001000	/* report details on partial/limited matches */
229#define	REG_BOSONLY	002000	/* temporary kludge for BOS-only matches */
230#define	REG_DUMP	004000	/* none of your business :-) */
231#define	REG_FAKE	010000	/* none of your business :-) */
232#define	REG_PROGRESS	020000	/* none of your business :-) */
233
234
235
236/*
237 * execution
238 ^ #ifndef __REG_NOCHAR
239 ^ int re_exec(regex_t *, __REG_CONST char *, size_t,
240 ^				rm_detail_t *, size_t, regmatch_t [], int);
241 ^ #endif
242 ^ #ifndef __REG_NOFRONT
243 ^ int regexec(regex_t *, __REG_CONST char *, size_t, regmatch_t [], int);
244 ^ #endif
245 ^ #ifdef __REG_WIDE_T
246 ^ int __REG_WIDE_EXEC(regex_t *, __REG_CONST __REG_WIDE_T *, size_t,
247 ^				rm_detail_t *, size_t, regmatch_t [], int);
248 ^ #endif
249 */
250#define	REG_NOTBOL	0001	/* BOS is not BOL */
251#define	REG_NOTEOL	0002	/* EOS is not EOL */
252#define	REG_STARTEND	0004	/* backward compatibility kludge */
253#define	REG_FTRACE	0010	/* none of your business */
254#define	REG_MTRACE	0020	/* none of your business */
255#define	REG_SMALL	0040	/* none of your business */
256
257
258
259/*
260 * misc generics (may be more functions here eventually)
261 ^ re_void regfree(regex_t *);
262 */
263
264
265
266/*
267 * error reporting
268 * Be careful if modifying the list of error codes -- the table used by
269 * regerror() is generated automatically from this file!
270 *
271 * Note that there is no wide-char variant of regerror at this time; what
272 * kind of character is used for error reports is independent of what kind
273 * is used in matching.
274 *
275 ^ extern size_t regerror(int, __REG_CONST regex_t *, char *, size_t);
276 */
277#define	REG_OKAY	 0	/* no errors detected */
278#define	REG_NOMATCH	 1	/* failed to match */
279#define	REG_BADPAT	 2	/* invalid regexp */
280#define	REG_ECOLLATE	 3	/* invalid collating element */
281#define	REG_ECTYPE	 4	/* invalid character class */
282#define	REG_EESCAPE	 5	/* invalid escape \ sequence */
283#define	REG_ESUBREG	 6	/* invalid backreference number */
284#define	REG_EBRACK	 7	/* brackets [] not balanced */
285#define	REG_EPAREN	 8	/* parentheses () not balanced */
286#define	REG_EBRACE	 9	/* braces {} not balanced */
287#define	REG_BADBR	10	/* invalid repetition count(s) */
288#define	REG_ERANGE	11	/* invalid character range */
289#define	REG_ESPACE	12	/* out of memory */
290#define	REG_BADRPT	13	/* quantifier operand invalid */
291#define	REG_ASSERT	15	/* "can't happen" -- you found a bug */
292#define	REG_INVARG	16	/* invalid argument to regex function */
293#define	REG_MIXED	17	/* character widths of regex and string differ */
294#define	REG_BADOPT	18	/* invalid embedded option */
295#define	REG_ETOOBIG	19	/* nfa has too many states */
296/* two specials for debugging and testing */
297#define	REG_ATOI	101	/* convert error-code name to number */
298#define	REG_ITOA	102	/* convert error-code number to name */
299
300
301
302/*
303 * the prototypes, as possibly munched by regfwd
304 */
305/* =====^!^===== begin forwards =====^!^===== */
306/* automatically gathered by fwd; do not hand-edit */
307/* === regproto.h === */
308#ifndef __REG_NOCHAR
309int re_comp _ANSI_ARGS_((regex_t *, __REG_CONST char *, size_t, int));
310#endif
311#ifndef __REG_NOFRONT
312int regcomp _ANSI_ARGS_((regex_t *, __REG_CONST char *, int));
313#endif
314#ifdef __REG_WIDE_T
315int __REG_WIDE_COMPILE _ANSI_ARGS_((regex_t *, __REG_CONST __REG_WIDE_T *, size_t, int));
316#endif
317#ifndef __REG_NOCHAR
318int re_exec _ANSI_ARGS_((regex_t *, __REG_CONST char *, size_t, rm_detail_t *, size_t, regmatch_t [], int));
319#endif
320#ifndef __REG_NOFRONT
321int regexec _ANSI_ARGS_((regex_t *, __REG_CONST char *, size_t, regmatch_t [], int));
322#endif
323#ifdef __REG_WIDE_T
324int __REG_WIDE_EXEC _ANSI_ARGS_((regex_t *, __REG_CONST __REG_WIDE_T *, size_t, rm_detail_t *, size_t, regmatch_t [], int));
325#endif
326re_void regfree _ANSI_ARGS_((regex_t *));
327extern size_t regerror _ANSI_ARGS_((int, __REG_CONST regex_t *, char *, size_t));
328/* automatically gathered by fwd; do not hand-edit */
329/* =====^!^===== end forwards =====^!^===== */
330
331
332
333/*
334 * more C++ voodoo
335 */
336#ifdef __cplusplus
337}
338#endif
339
340
341
342#endif
343