1/*
2  tre.h - TRE public API definitions
3
4  This software is released under a BSD-style license.
5  See the file LICENSE for details and copyright.
6
7*/
8
9#ifndef TRE_H
10#define TRE_H 1
11
12#include "tre-config.h"
13
14#ifdef HAVE_SYS_TYPES_H
15#include <sys/types.h>
16#endif /* HAVE_SYS_TYPES_H */
17
18#ifdef HAVE_LIBUTF8_H
19#include <libutf8.h>
20#endif /* HAVE_LIBUTF8_H */
21
22#ifdef TRE_USE_SYSTEM_REGEX_H
23/* Include the system regex.h to make TRE ABI compatible with the
24   system regex. */
25#include TRE_SYSTEM_REGEX_H_PATH
26#define tre_regcomp    regcomp
27#define tre_regexec    regexec
28#define tre_regerror   regerror
29#define tre_regfree    regfree
30
31#ifdef TRE_APPROX
32#define tre_regaexec   regaexec
33#define tre_reganexec  reganexec
34#define tre_regawexec  regawexec
35#define tre_regawnexec regawnexec
36#endif /* TRE_APPROX */
37#define tre_regncomp   regncomp
38#define tre_regnexec   regnexec
39#define tre_regwcomp   regwcomp
40#define tre_regwexec   regwexec
41#define tre_regwncomp  regwncomp
42#define tre_regwnexec  regwnexec
43
44#define tre_regcomp_l    regcomp_l
45#define tre_regncomp_l   regncomp_l
46#define tre_regwcomp_l   regwcomp_l
47#define tre_regwncomp_l  regwncomp_l
48#endif /* TRE_USE_SYSTEM_REGEX_H */
49
50#ifdef __cplusplus
51extern "C" {
52#endif
53
54#ifdef TRE_USE_SYSTEM_REGEX_H
55
56#ifndef REG_OK
57#define REG_OK 0
58#endif /* !REG_OK */
59
60#ifndef HAVE_REG_ERRCODE_T
61typedef int reg_errcode_t;
62#endif /* !HAVE_REG_ERRCODE_T */
63
64#if !defined(REG_NOSPEC) && !defined(REG_LITERAL)
65#define REG_LITERAL 0x1000
66#endif
67
68/* Extra tre_regcomp() flags. */
69#ifndef REG_BASIC
70#define REG_BASIC	0
71#endif /* !REG_BASIC */
72#if 0
73#define REG_LEFT_ASSOC  (REG_LITERAL << 1)
74#define REG_UNGREEDY    (REG_LEFT_ASSOC << 1)
75
76/* Extra tre_regexec() flags. */
77#define REG_APPROX_MATCHER	 0x1000
78#define REG_BACKTRACKING_MATCHER (REG_APPROX_MATCHER << 1)
79#endif
80
81#define RE_MAGIC  ((('r'^0200)<<8) | 'e')
82
83#else /* !TRE_USE_SYSTEM_REGEX_H */
84
85/* If the we're not using system regex.h, we need to define the
86   structs and enums ourselves. */
87
88typedef int regoff_t;
89typedef struct {
90  size_t re_nsub;      /* Number of parenthesized subexpressions. */
91  const void *re_endp; /* regex string end pointer (REG_PEND) */
92  void *value;	       /* For internal use only. */
93} regex_t;
94
95typedef struct {
96  regoff_t rm_so;
97  regoff_t rm_eo;
98} regmatch_t;
99
100
101typedef enum {
102  REG_OK = 0,		/* No error. */
103  /* POSIX tre_regcomp() return error codes.  (In the order listed in the
104     standard.)	 */
105  REG_NOMATCH,		/* No match. */
106  REG_BADPAT,		/* Invalid regexp. */
107  REG_ECOLLATE,		/* Unknown collating element. */
108  REG_ECTYPE,		/* Unknown character class name. */
109  REG_EESCAPE,		/* Trailing backslash. */
110  REG_ESUBREG,		/* Invalid back reference. */
111  REG_EBRACK,		/* "[]" imbalance */
112  REG_EPAREN,		/* "\(\)" or "()" imbalance */
113  REG_EBRACE,		/* "\{\}" or "{}" imbalance */
114  REG_BADBR,		/* Invalid content of {} */
115  REG_ERANGE,		/* Invalid use of range operator */
116  REG_ESPACE,		/* Out of memory.  */
117  REG_BADRPT,           /* Invalid use of repetition operators. */
118  REG_INVARG,           /* Invalid argument, e.g. negative-length string */
119  REG_ILLSEQ,           /* illegal byte sequence (bad multibyte character) */
120} reg_errcode_t;
121
122/* POSIX tre_regcomp() flags. */
123#define REG_EXTENDED	1
124#define REG_ICASE	(REG_EXTENDED << 1)
125#define REG_NEWLINE	(REG_ICASE << 1)
126#define REG_NOSUB	(REG_NEWLINE << 1)
127
128/* Extra tre_regcomp() flags. */
129#define REG_BASIC	0
130#define REG_LITERAL	(REG_NOSUB << 1)
131#define REG_LEFT_ASSOC  (REG_LITERAL << 1)
132#define REG_UNGREEDY    (REG_LEFT_ASSOC << 1)
133#define REG_PEND        (REG_UNGREEDY << 1)
134
135/* POSIX tre_regexec() flags. */
136#define REG_NOTBOL 1
137#define REG_NOTEOL (REG_NOTBOL << 1)
138
139/* Extra tre_regexec() flags. */
140#define REG_APPROX_MATCHER	 (REG_NOTEOL << 1)
141#define REG_BACKTRACKING_MATCHER (REG_APPROX_MATCHER << 1)
142#define REG_STARTEND             (REG_BACKTRACKING_MATCHER << 1)
143
144#endif /* !TRE_USE_SYSTEM_REGEX_H */
145
146/* REG_NOSPEC and REG_LITERAL mean the same thing. */
147#if defined(REG_LITERAL) && !defined(REG_NOSPEC)
148#define REG_NOSPEC	REG_LITERAL
149#elif defined(REG_NOSPEC) && !defined(REG_LITERAL)
150#define REG_LITERAL	REG_NOSPEC
151#endif /* defined(REG_NOSPEC) */
152
153/* The maximum number of iterations in a bound expression. */
154#undef RE_DUP_MAX
155#define RE_DUP_MAX 255
156
157/* The POSIX.2 regexp functions */
158extern int
159tre_regcomp(regex_t * __restrict preg, const char * __restrict regex, int cflags);
160
161extern int
162tre_regexec(const regex_t * __restrict preg, const char * __restrict string, size_t nmatch,
163	regmatch_t pmatch[ __restrict ], int eflags);
164
165extern size_t
166tre_regerror(int errcode, const regex_t * __restrict preg, char * __restrict errbuf,
167	 size_t errbuf_size);
168
169extern void
170tre_regfree(regex_t *preg);
171
172#ifdef TRE_WCHAR
173#ifdef HAVE_WCHAR_H
174#include <wchar.h>
175#endif /* HAVE_WCHAR_H */
176
177/* Wide character versions (not in POSIX.2). */
178extern int
179tre_regwcomp(regex_t * __restrict preg, const wchar_t * __restrict regex, int cflags);
180
181extern int
182tre_regwexec(const regex_t * __restrict preg, const wchar_t * __restrict string,
183	 size_t nmatch, regmatch_t pmatch[ __restrict ], int eflags);
184#endif /* TRE_WCHAR */
185
186/* Versions with a maximum length argument and therefore the capability to
187   handle null characters in the middle of the strings (not in POSIX.2). */
188extern int
189tre_regncomp(regex_t * __restrict preg, const char * __restrict regex, size_t len, int cflags);
190
191extern int
192tre_regnexec(const regex_t * __restrict preg, const char * __restrict string, size_t len,
193	 size_t nmatch, regmatch_t pmatch[ __restrict ], int eflags);
194
195#ifdef TRE_WCHAR
196extern int
197tre_regwncomp(regex_t * __restrict preg, const wchar_t * __restrict regex, size_t len, int cflags);
198
199extern int
200tre_regwnexec(const regex_t * __restrict preg, const wchar_t * __restrict string, size_t len,
201	  size_t nmatch, regmatch_t pmatch[ __restrict ], int eflags);
202#endif /* TRE_WCHAR */
203
204/* Extended locale versions */
205#include <xlocale.h>
206
207extern int
208tre_regcomp_l(regex_t * __restrict preg, const char * __restrict regex, int cflags, locale_t __restrict loc);
209
210#ifdef TRE_WCHAR
211extern int
212tre_regwcomp_l(regex_t * __restrict preg, const wchar_t * __restrict regex, int cflags, locale_t __restrict loc);
213#endif /* TRE_WCHAR */
214
215extern int
216tre_regncomp_l(regex_t * __restrict preg, const char * __restrict regex, size_t len, int cflags, locale_t __restrict loc);
217
218#ifdef TRE_WCHAR
219extern int
220tre_regwncomp_l(regex_t * __restrict preg, const wchar_t * __restrict regex, size_t len, int cflags, locale_t __restrict loc);
221#endif /* TRE_WCHAR */
222
223#ifdef TRE_APPROX
224
225/* Approximate matching parameter struct. */
226typedef struct {
227  int cost_ins;	       /* Default cost of an inserted character. */
228  int cost_del;	       /* Default cost of a deleted character. */
229  int cost_subst;      /* Default cost of a substituted character. */
230  int max_cost;	       /* Maximum allowed cost of a match. */
231
232  int max_ins;	       /* Maximum allowed number of inserts. */
233  int max_del;	       /* Maximum allowed number of deletes. */
234  int max_subst;       /* Maximum allowed number of substitutes. */
235  int max_err;	       /* Maximum allowed number of errors total. */
236} regaparams_t;
237
238/* Approximate matching result struct. */
239typedef struct {
240  size_t nmatch;       /* Length of pmatch[] array. */
241  regmatch_t *pmatch;  /* Submatch data. */
242  int cost;	       /* Cost of the match. */
243  int num_ins;	       /* Number of inserts in the match. */
244  int num_del;	       /* Number of deletes in the match. */
245  int num_subst;       /* Number of substitutes in the match. */
246} regamatch_t;
247
248
249/* Approximate matching functions. */
250extern int
251tre_regaexec(const regex_t * __restrict preg, const char * __restrict string,
252	 regamatch_t * __restrict match, regaparams_t params, int eflags);
253
254extern int
255tre_reganexec(const regex_t * __restrict preg, const char * __restrict string, size_t len,
256	  regamatch_t * __restrict match, regaparams_t params, int eflags);
257#ifdef TRE_WCHAR
258/* Wide character approximate matching. */
259extern int
260tre_regawexec(const regex_t * __restrict preg, const wchar_t * __restrict string,
261	  regamatch_t * __restrict match, regaparams_t params, int eflags);
262
263extern int
264tre_regawnexec(const regex_t * __restrict preg, const wchar_t * __restrict string, size_t len,
265	   regamatch_t * __restrict match, regaparams_t params, int eflags);
266#endif /* TRE_WCHAR */
267
268/* Sets the parameters to default values. */
269extern void
270tre_regaparams_default(regaparams_t *params);
271#endif /* TRE_APPROX */
272
273#ifdef TRE_WCHAR
274typedef wchar_t tre_char_t;
275#else /* !TRE_WCHAR */
276typedef unsigned char tre_char_t;
277#endif /* !TRE_WCHAR */
278
279#ifdef TRE_STR_USER
280typedef struct {
281  int (*get_next_char)(tre_char_t *c, unsigned int *pos_add, void *context);
282  void (*rewind)(size_t pos, void *context);
283  int (*compare)(size_t pos1, size_t pos2, size_t len, void *context);
284  void *context;
285} tre_str_source;
286
287extern int
288tre_reguexec(const regex_t * __restrict preg, const tre_str_source * __restrict string,
289	 size_t nmatch, regmatch_t pmatch[ __restrict ], int eflags);
290#endif /* TRE_STR_USER */
291
292#ifndef __LIBC__
293/* Returns the version string.	The returned string is static. */
294extern char *
295tre_version(void);
296
297/* Returns the value for a config parameter.  The type to which `result'
298   must point to depends of the value of `query', see documentation for
299   more details. */
300extern int
301tre_config(int query, void *result);
302
303enum {
304  TRE_CONFIG_APPROX,
305  TRE_CONFIG_WCHAR,
306  TRE_CONFIG_MULTIBYTE,
307  TRE_CONFIG_SYSTEM_ABI,
308  TRE_CONFIG_VERSION
309};
310
311/* Returns 1 if the compiled pattern has back references, 0 if not. */
312extern int
313tre_have_backrefs(const regex_t *preg);
314
315#ifdef TRE_APPROX
316/* Returns 1 if the compiled pattern uses approximate matching features,
317   0 if not. */
318extern int
319tre_have_approx(const regex_t *preg);
320#endif /* TRE_APPROX */
321#endif /* !__LIBC__ */
322
323#ifdef __cplusplus
324}
325#endif
326#endif				/* TRE_H */
327
328/* EOF */
329