1/*
2  tre.h - TRE public API definitions
3
4  This software is released under a BSD-style license.
5  See the file LICENSE for details and copyright.
6
7*/
8
9#ifndef TRE_H
10#define TRE_H 1
11
12#include "tre-config.h"
13
14#ifdef HAVE_SYS_TYPES_H
15#include <sys/types.h>
16#endif /* HAVE_SYS_TYPES_H */
17
18#ifdef HAVE_LIBUTF8_H
19#include <libutf8.h>
20#endif /* HAVE_LIBUTF8_H */
21
22#ifdef TRE_USE_SYSTEM_REGEX_H
23/* Include the system regex.h to make TRE ABI compatible with the
24   system regex. */
25#include TRE_SYSTEM_REGEX_H_PATH
26#define tre_regcomp  regcomp
27#define tre_regexec  regexec
28#define tre_regerror regerror
29#define tre_regfree  regfree
30#endif /* TRE_USE_SYSTEM_REGEX_H */
31
32#ifdef __cplusplus
33extern "C" {
34#endif
35
36#ifdef TRE_USE_SYSTEM_REGEX_H
37
38#ifndef REG_OK
39#define REG_OK 0
40#endif /* !REG_OK */
41
42#ifndef HAVE_REG_ERRCODE_T
43typedef int reg_errcode_t;
44#endif /* !HAVE_REG_ERRCODE_T */
45
46#if !defined(REG_NOSPEC) && !defined(REG_LITERAL)
47#define REG_LITERAL 0x1000
48#endif
49
50/* Extra tre_regcomp() flags. */
51#ifndef REG_BASIC
52#define REG_BASIC	0
53#endif /* !REG_BASIC */
54#define REG_RIGHT_ASSOC (REG_LITERAL << 1)
55#define REG_UNGREEDY    (REG_RIGHT_ASSOC << 1)
56
57/* Extra tre_regexec() flags. */
58#define REG_APPROX_MATCHER	 0x1000
59#define REG_BACKTRACKING_MATCHER (REG_APPROX_MATCHER << 1)
60
61#else /* !TRE_USE_SYSTEM_REGEX_H */
62
63/* If the we're not using system regex.h, we need to define the
64   structs and enums ourselves. */
65
66typedef int regoff_t;
67typedef struct {
68  size_t re_nsub;  /* Number of parenthesized subexpressions. */
69  void *value;	   /* For internal use only. */
70} regex_t;
71
72typedef struct {
73  regoff_t rm_so;
74  regoff_t rm_eo;
75} regmatch_t;
76
77
78typedef enum {
79  REG_OK = 0,		/* No error. */
80  /* POSIX tre_regcomp() return error codes.  (In the order listed in the
81     standard.)	 */
82  REG_NOMATCH,		/* No match. */
83  REG_BADPAT,		/* Invalid regexp. */
84  REG_ECOLLATE,		/* Unknown collating element. */
85  REG_ECTYPE,		/* Unknown character class name. */
86  REG_EESCAPE,		/* Trailing backslash. */
87  REG_ESUBREG,		/* Invalid back reference. */
88  REG_EBRACK,		/* "[]" imbalance */
89  REG_EPAREN,		/* "\(\)" or "()" imbalance */
90  REG_EBRACE,		/* "\{\}" or "{}" imbalance */
91  REG_BADBR,		/* Invalid content of {} */
92  REG_ERANGE,		/* Invalid use of range operator */
93  REG_ESPACE,		/* Out of memory.  */
94  REG_BADRPT            /* Invalid use of repetition operators. */
95} reg_errcode_t;
96
97/* POSIX tre_regcomp() flags. */
98#define REG_EXTENDED	1
99#define REG_ICASE	(REG_EXTENDED << 1)
100#define REG_NEWLINE	(REG_ICASE << 1)
101#define REG_NOSUB	(REG_NEWLINE << 1)
102
103/* Extra tre_regcomp() flags. */
104#define REG_BASIC	0
105#define REG_LITERAL	(REG_NOSUB << 1)
106#define REG_RIGHT_ASSOC (REG_LITERAL << 1)
107#define REG_UNGREEDY    (REG_RIGHT_ASSOC << 1)
108
109/* POSIX tre_regexec() flags. */
110#define REG_NOTBOL 1
111#define REG_NOTEOL (REG_NOTBOL << 1)
112
113/* Extra tre_regexec() flags. */
114#define REG_APPROX_MATCHER	 (REG_NOTEOL << 1)
115#define REG_BACKTRACKING_MATCHER (REG_APPROX_MATCHER << 1)
116
117#endif /* !TRE_USE_SYSTEM_REGEX_H */
118
119/* REG_NOSPEC and REG_LITERAL mean the same thing. */
120#if defined(REG_LITERAL) && !defined(REG_NOSPEC)
121#define REG_NOSPEC	REG_LITERAL
122#elif defined(REG_NOSPEC) && !defined(REG_LITERAL)
123#define REG_LITERAL	REG_NOSPEC
124#endif /* defined(REG_NOSPEC) */
125
126/* The maximum number of iterations in a bound expression. */
127#undef RE_DUP_MAX
128#define RE_DUP_MAX 255
129
130/* The POSIX.2 regexp functions */
131extern int
132tre_regcomp(regex_t *preg, const char *regex, int cflags);
133
134extern int
135tre_regexec(const regex_t *preg, const char *string, size_t nmatch,
136	regmatch_t pmatch[], int eflags);
137
138extern size_t
139tre_regerror(int errcode, const regex_t *preg, char *errbuf,
140	 size_t errbuf_size);
141
142extern void
143tre_regfree(regex_t *preg);
144
145#ifdef TRE_WCHAR
146#ifdef HAVE_WCHAR_H
147#include <wchar.h>
148#endif /* HAVE_WCHAR_H */
149
150/* Wide character versions (not in POSIX.2). */
151extern int
152tre_regwcomp(regex_t *preg, const wchar_t *regex, int cflags);
153
154extern int
155tre_regwexec(const regex_t *preg, const wchar_t *string,
156	 size_t nmatch, regmatch_t pmatch[], int eflags);
157#endif /* TRE_WCHAR */
158
159/* Versions with a maximum length argument and therefore the capability to
160   handle null characters in the middle of the strings (not in POSIX.2). */
161extern int
162tre_regncomp(regex_t *preg, const char *regex, size_t len, int cflags);
163
164extern int
165tre_regnexec(const regex_t *preg, const char *string, size_t len,
166	 size_t nmatch, regmatch_t pmatch[], int eflags);
167
168#ifdef TRE_WCHAR
169extern int
170tre_regwncomp(regex_t *preg, const wchar_t *regex, size_t len, int cflags);
171
172extern int
173tre_regwnexec(const regex_t *preg, const wchar_t *string, size_t len,
174	  size_t nmatch, regmatch_t pmatch[], int eflags);
175#endif /* TRE_WCHAR */
176
177#ifdef TRE_APPROX
178
179/* Approximate matching parameter struct. */
180typedef struct {
181  int cost_ins;	       /* Default cost of an inserted character. */
182  int cost_del;	       /* Default cost of a deleted character. */
183  int cost_subst;      /* Default cost of a substituted character. */
184  int max_cost;	       /* Maximum allowed cost of a match. */
185
186  int max_ins;	       /* Maximum allowed number of inserts. */
187  int max_del;	       /* Maximum allowed number of deletes. */
188  int max_subst;       /* Maximum allowed number of substitutes. */
189  int max_err;	       /* Maximum allowed number of errors total. */
190} regaparams_t;
191
192/* Approximate matching result struct. */
193typedef struct {
194  size_t nmatch;       /* Length of pmatch[] array. */
195  regmatch_t *pmatch;  /* Submatch data. */
196  int cost;	       /* Cost of the match. */
197  int num_ins;	       /* Number of inserts in the match. */
198  int num_del;	       /* Number of deletes in the match. */
199  int num_subst;       /* Number of substitutes in the match. */
200} regamatch_t;
201
202
203/* Approximate matching functions. */
204extern int
205tre_regaexec(const regex_t *preg, const char *string,
206	 regamatch_t *match, regaparams_t params, int eflags);
207
208extern int
209tre_reganexec(const regex_t *preg, const char *string, size_t len,
210	  regamatch_t *match, regaparams_t params, int eflags);
211#ifdef TRE_WCHAR
212/* Wide character approximate matching. */
213extern int
214tre_regawexec(const regex_t *preg, const wchar_t *string,
215	  regamatch_t *match, regaparams_t params, int eflags);
216
217extern int
218tre_regawnexec(const regex_t *preg, const wchar_t *string, size_t len,
219	   regamatch_t *match, regaparams_t params, int eflags);
220#endif /* TRE_WCHAR */
221
222/* Sets the parameters to default values. */
223extern void
224tre_regaparams_default(regaparams_t *params);
225#endif /* TRE_APPROX */
226
227#ifdef TRE_WCHAR
228typedef wchar_t tre_char_t;
229#else /* !TRE_WCHAR */
230typedef unsigned char tre_char_t;
231#endif /* !TRE_WCHAR */
232
233typedef struct {
234  int (*get_next_char)(tre_char_t *c, unsigned int *pos_add, void *context);
235  void (*rewind)(size_t pos, void *context);
236  int (*compare)(size_t pos1, size_t pos2, size_t len, void *context);
237  void *context;
238} tre_str_source;
239
240extern int
241tre_reguexec(const regex_t *preg, const tre_str_source *string,
242	 size_t nmatch, regmatch_t pmatch[], int eflags);
243
244/* Returns the version string.	The returned string is static. */
245extern char *
246tre_version(void);
247
248/* Returns the value for a config parameter.  The type to which `result'
249   must point to depends of the value of `query', see documentation for
250   more details. */
251extern int
252tre_config(int query, void *result);
253
254enum {
255  TRE_CONFIG_APPROX,
256  TRE_CONFIG_WCHAR,
257  TRE_CONFIG_MULTIBYTE,
258  TRE_CONFIG_SYSTEM_ABI,
259  TRE_CONFIG_VERSION
260};
261
262/* Returns 1 if the compiled pattern has back references, 0 if not. */
263extern int
264tre_have_backrefs(const regex_t *preg);
265
266/* Returns 1 if the compiled pattern uses approximate matching features,
267   0 if not. */
268extern int
269tre_have_approx(const regex_t *preg);
270
271#ifdef __cplusplus
272}
273#endif
274#endif				/* TRE_H */
275
276/* EOF */
277