1#ifndef _REGEX_H_ 2#define _REGEX_H_ /* never again */ 3/* 4 * regular expressions 5 * 6 * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved. 7 * 8 * Development of this software was funded, in part, by Cray Research Inc., 9 * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics 10 * Corporation, none of whom are responsible for the results. The author 11 * thanks all of them. 12 * 13 * Redistribution and use in source and binary forms -- with or without 14 * modification -- are permitted for any purpose, provided that 15 * redistributions in source form retain this entire copyright notice and 16 * indicate the origin and nature of any modifications. 17 * 18 * I'd appreciate being given credit for this package in the documentation of 19 * software which uses it, but that is not a requirement. 20 * 21 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, 22 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY 23 * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL 24 * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 25 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 26 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; 27 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 28 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR 29 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 30 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 * 32 * 33 * Prototypes etc. marked with "^" within comments get gathered up (and 34 * possibly edited) by the regfwd program and inserted near the bottom of this 35 * file. 36 * 37 * We offer the option of declaring one wide-character version of the RE 38 * functions as well as the char versions. To do that, define __REG_WIDE_T to 39 * the type of wide characters (unfortunately, there is no consensus that 40 * wchar_t is suitable) and __REG_WIDE_COMPILE and __REG_WIDE_EXEC to the 41 * names to be used for the compile and execute functions (suggestion: 42 * re_Xcomp and re_Xexec, where X is a letter suggestive of the wide type, 43 * e.g. re_ucomp and re_uexec for Unicode). For cranky old compilers, it may 44 * be necessary to do something like: 45 * #define __REG_WIDE_COMPILE(a,b,c,d) re_Xcomp(a,b,c,d) 46 * #define __REG_WIDE_EXEC(a,b,c,d,e,f,g) re_Xexec(a,b,c,d,e,f,g) 47 * rather than just #defining the names as parameterless macros. 48 * 49 * For some specialized purposes, it may be desirable to suppress the 50 * declarations of the "front end" functions, regcomp() and regexec(), or of 51 * the char versions of the compile and execute functions. To suppress the 52 * front-end functions, define __REG_NOFRONT. To suppress the char versions, 53 * define __REG_NOCHAR. 54 * 55 * The right place to do those defines (and some others you may want, see 56 * below) would be <sys/types.h>. If you don't have control of that file, the 57 * right place to add your own defines to this file is marked below. This is 58 * normally done automatically, by the makefile and regmkhdr, based on the 59 * contents of regcustom.h. 60 */ 61 62/* 63 * voodoo for C++ 64 */ 65#ifdef __cplusplus 66extern "C" { 67#endif 68 69/* 70 * Add your own defines, if needed, here. 71 */ 72 73/* 74 * Location where a chunk of regcustom.h is automatically spliced into this 75 * file (working from its prototype, regproto.h). 76 */ 77 78/* --- begin --- */ 79/* ensure certain things don't sneak in from system headers */ 80#ifdef __REG_WIDE_T 81#undef __REG_WIDE_T 82#endif 83#ifdef __REG_WIDE_COMPILE 84#undef __REG_WIDE_COMPILE 85#endif 86#ifdef __REG_WIDE_EXEC 87#undef __REG_WIDE_EXEC 88#endif 89#ifdef __REG_REGOFF_T 90#undef __REG_REGOFF_T 91#endif 92#ifdef __REG_VOID_T 93#undef __REG_VOID_T 94#endif 95#ifdef __REG_CONST 96#undef __REG_CONST 97#endif 98#ifdef __REG_NOFRONT 99#undef __REG_NOFRONT 100#endif 101#ifdef __REG_NOCHAR 102#undef __REG_NOCHAR 103#endif 104/* interface types */ 105#define __REG_WIDE_T Tcl_UniChar 106#define __REG_REGOFF_T long /* not really right, but good enough... */ 107#define __REG_VOID_T VOID 108#define __REG_CONST CONST 109/* names and declarations */ 110#define __REG_WIDE_COMPILE TclReComp 111#define __REG_WIDE_EXEC TclReExec 112#define __REG_NOFRONT /* don't want regcomp() and regexec() */ 113#define __REG_NOCHAR /* or the char versions */ 114#define regfree TclReFree 115#define regerror TclReError 116/* --- end --- */ 117 118/* 119 * interface types etc. 120 */ 121 122/* 123 * regoff_t has to be large enough to hold either off_t or ssize_t, and must 124 * be signed; it's only a guess that long is suitable, so we offer 125 * <sys/types.h> an override. 126 */ 127#ifdef __REG_REGOFF_T 128typedef __REG_REGOFF_T regoff_t; 129#else 130typedef long regoff_t; 131#endif 132 133/* 134 * For benefit of old compilers, we offer <sys/types.h> the option of 135 * overriding the `void' type used to declare nonexistent return types. 136 */ 137#ifdef __REG_VOID_T 138typedef __REG_VOID_T re_void; 139#else 140typedef void re_void; 141#endif 142 143/* 144 * Also for benefit of old compilers, <sys/types.h> can supply a macro which 145 * expands to a substitute for `const'. 146 */ 147#ifndef __REG_CONST 148#define __REG_CONST const 149#endif 150 151 152 153/* 154 * other interface types 155 */ 156 157/* the biggie, a compiled RE (or rather, a front end to same) */ 158typedef struct { 159 int re_magic; /* magic number */ 160 size_t re_nsub; /* number of subexpressions */ 161 long re_info; /* information about RE */ 162#define REG_UBACKREF 000001 163#define REG_ULOOKAHEAD 000002 164#define REG_UBOUNDS 000004 165#define REG_UBRACES 000010 166#define REG_UBSALNUM 000020 167#define REG_UPBOTCH 000040 168#define REG_UBBS 000100 169#define REG_UNONPOSIX 000200 170#define REG_UUNSPEC 000400 171#define REG_UUNPORT 001000 172#define REG_ULOCALE 002000 173#define REG_UEMPTYMATCH 004000 174#define REG_UIMPOSSIBLE 010000 175#define REG_USHORTEST 020000 176 int re_csize; /* sizeof(character) */ 177 char *re_endp; /* backward compatibility kludge */ 178 /* the rest is opaque pointers to hidden innards */ 179 char *re_guts; /* `char *' is more portable than `void *' */ 180 char *re_fns; 181} regex_t; 182 183/* result reporting (may acquire more fields later) */ 184typedef struct { 185 regoff_t rm_so; /* start of substring */ 186 regoff_t rm_eo; /* end of substring */ 187} regmatch_t; 188 189/* supplementary control and reporting */ 190typedef struct { 191 regmatch_t rm_extend; /* see REG_EXPECT */ 192} rm_detail_t; 193 194/* 195 * compilation 196 ^ #ifndef __REG_NOCHAR 197 ^ int re_comp(regex_t *, __REG_CONST char *, size_t, int); 198 ^ #endif 199 ^ #ifndef __REG_NOFRONT 200 ^ int regcomp(regex_t *, __REG_CONST char *, int); 201 ^ #endif 202 ^ #ifdef __REG_WIDE_T 203 ^ int __REG_WIDE_COMPILE(regex_t *, __REG_CONST __REG_WIDE_T *, size_t, int); 204 ^ #endif 205 */ 206#define REG_BASIC 000000 /* BREs (convenience) */ 207#define REG_EXTENDED 000001 /* EREs */ 208#define REG_ADVF 000002 /* advanced features in EREs */ 209#define REG_ADVANCED 000003 /* AREs (which are also EREs) */ 210#define REG_QUOTE 000004 /* no special characters, none */ 211#define REG_NOSPEC REG_QUOTE /* historical synonym */ 212#define REG_ICASE 000010 /* ignore case */ 213#define REG_NOSUB 000020 /* don't care about subexpressions */ 214#define REG_EXPANDED 000040 /* expanded format, white space & comments */ 215#define REG_NLSTOP 000100 /* \n doesn't match . or [^ ] */ 216#define REG_NLANCH 000200 /* ^ matches after \n, $ before */ 217#define REG_NEWLINE 000300 /* newlines are line terminators */ 218#define REG_PEND 000400 /* ugh -- backward-compatibility hack */ 219#define REG_EXPECT 001000 /* report details on partial/limited matches */ 220#define REG_BOSONLY 002000 /* temporary kludge for BOS-only matches */ 221#define REG_DUMP 004000 /* none of your business :-) */ 222#define REG_FAKE 010000 /* none of your business :-) */ 223#define REG_PROGRESS 020000 /* none of your business :-) */ 224 225/* 226 * execution 227 ^ #ifndef __REG_NOCHAR 228 ^ int re_exec(regex_t *, __REG_CONST char *, size_t, 229 ^ rm_detail_t *, size_t, regmatch_t [], int); 230 ^ #endif 231 ^ #ifndef __REG_NOFRONT 232 ^ int regexec(regex_t *, __REG_CONST char *, size_t, regmatch_t [], int); 233 ^ #endif 234 ^ #ifdef __REG_WIDE_T 235 ^ int __REG_WIDE_EXEC(regex_t *, __REG_CONST __REG_WIDE_T *, size_t, 236 ^ rm_detail_t *, size_t, regmatch_t [], int); 237 ^ #endif 238 */ 239#define REG_NOTBOL 0001 /* BOS is not BOL */ 240#define REG_NOTEOL 0002 /* EOS is not EOL */ 241#define REG_STARTEND 0004 /* backward compatibility kludge */ 242#define REG_FTRACE 0010 /* none of your business */ 243#define REG_MTRACE 0020 /* none of your business */ 244#define REG_SMALL 0040 /* none of your business */ 245 246/* 247 * misc generics (may be more functions here eventually) 248 ^ re_void regfree(regex_t *); 249 */ 250 251/* 252 * error reporting 253 * Be careful if modifying the list of error codes -- the table used by 254 * regerror() is generated automatically from this file! 255 * 256 * Note that there is no wide-char variant of regerror at this time; what kind 257 * of character is used for error reports is independent of what kind is used 258 * in matching. 259 * 260 ^ extern size_t regerror(int, __REG_CONST regex_t *, char *, size_t); 261 */ 262#define REG_OKAY 0 /* no errors detected */ 263#define REG_NOMATCH 1 /* failed to match */ 264#define REG_BADPAT 2 /* invalid regexp */ 265#define REG_ECOLLATE 3 /* invalid collating element */ 266#define REG_ECTYPE 4 /* invalid character class */ 267#define REG_EESCAPE 5 /* invalid escape \ sequence */ 268#define REG_ESUBREG 6 /* invalid backreference number */ 269#define REG_EBRACK 7 /* brackets [] not balanced */ 270#define REG_EPAREN 8 /* parentheses () not balanced */ 271#define REG_EBRACE 9 /* braces {} not balanced */ 272#define REG_BADBR 10 /* invalid repetition count(s) */ 273#define REG_ERANGE 11 /* invalid character range */ 274#define REG_ESPACE 12 /* out of memory */ 275#define REG_BADRPT 13 /* quantifier operand invalid */ 276#define REG_ASSERT 15 /* "can't happen" -- you found a bug */ 277#define REG_INVARG 16 /* invalid argument to regex function */ 278#define REG_MIXED 17 /* character widths of regex and string differ */ 279#define REG_BADOPT 18 /* invalid embedded option */ 280#define REG_ETOOBIG 19 /* nfa has too many states */ 281/* two specials for debugging and testing */ 282#define REG_ATOI 101 /* convert error-code name to number */ 283#define REG_ITOA 102 /* convert error-code number to name */ 284 285/* 286 * the prototypes, as possibly munched by regfwd 287 */ 288/* =====^!^===== begin forwards =====^!^===== */ 289/* automatically gathered by fwd; do not hand-edit */ 290/* === regproto.h === */ 291#ifndef __REG_NOCHAR 292int re_comp(regex_t *, __REG_CONST char *, size_t, int); 293#endif 294#ifndef __REG_NOFRONT 295int regcomp(regex_t *, __REG_CONST char *, int); 296#endif 297#ifdef __REG_WIDE_T 298MODULE_SCOPE int __REG_WIDE_COMPILE(regex_t *, __REG_CONST __REG_WIDE_T *, size_t, int); 299#endif 300#ifndef __REG_NOCHAR 301int re_exec(regex_t *, __REG_CONST char *, size_t, rm_detail_t *, size_t, regmatch_t [], int); 302#endif 303#ifndef __REG_NOFRONT 304int regexec(regex_t *, __REG_CONST char *, size_t, regmatch_t [], int); 305#endif 306#ifdef __REG_WIDE_T 307MODULE_SCOPE int __REG_WIDE_EXEC(regex_t *, __REG_CONST __REG_WIDE_T *, size_t, rm_detail_t *, size_t, regmatch_t [], int); 308#endif 309MODULE_SCOPE re_void regfree(regex_t *); 310MODULE_SCOPE size_t regerror(int, __REG_CONST regex_t *, char *, size_t); 311/* automatically gathered by fwd; do not hand-edit */ 312/* =====^!^===== end forwards =====^!^===== */ 313 314/* 315 * more C++ voodoo 316 */ 317#ifdef __cplusplus 318} 319#endif 320 321#endif 322