1/*
2  tre.h - TRE public API definitions
3
4  This software is released under a BSD-style license.
5  See the file LICENSE for details and copyright.
6
7*/
8
9#ifndef TRE_H
10#define TRE_H 1
11
12#include "tre-config.h"
13
14#ifdef HAVE_SYS_TYPES_H
15#include <sys/types.h>
16#endif /* HAVE_SYS_TYPES_H */
17
18#ifdef HAVE_LIBUTF8_H
19#include <libutf8.h>
20#endif /* HAVE_LIBUTF8_H */
21
22#ifdef TRE_USE_SYSTEM_REGEX_H
23/* Include the system regex.h to make TRE ABI compatible with the
24   system regex. */
25#include TRE_SYSTEM_REGEX_H_PATH
26#ifdef __weak_alias
27__weak_alias(regcomp, tre_regcomp)
28__weak_alias(regexec, tre_regexec)
29__weak_alias(regerror, tre_regerror)
30__weak_alias(regfree, tre_regfree)
31#else
32#define tre_regcomp  regcomp
33#define tre_regexec  regexec
34#define tre_regerror regerror
35#define tre_regfree  regfree
36#endif
37#endif /* TRE_USE_SYSTEM_REGEX_H */
38
39#ifdef __cplusplus
40extern "C" {
41#endif
42
43#ifdef TRE_USE_SYSTEM_REGEX_H
44
45#ifndef REG_OK
46#define REG_OK 0
47#endif /* !REG_OK */
48
49#ifndef HAVE_REG_ERRCODE_T
50typedef int reg_errcode_t;
51#endif /* !HAVE_REG_ERRCODE_T */
52
53#if !defined(REG_NOSPEC) && !defined(REG_LITERAL)
54#define REG_LITERAL 0x1000
55#endif
56
57/* Extra tre_regcomp() flags. */
58#ifndef REG_BASIC
59#define REG_BASIC	0
60#endif /* !REG_BASIC */
61#define REG_RIGHT_ASSOC (REG_LITERAL << 1)
62#define REG_UNGREEDY    (REG_RIGHT_ASSOC << 1)
63
64/* Extra tre_regexec() flags. */
65#define REG_APPROX_MATCHER	 0x1000
66#define REG_BACKTRACKING_MATCHER (REG_APPROX_MATCHER << 1)
67
68#else /* !TRE_USE_SYSTEM_REGEX_H */
69
70/* If the we're not using system regex.h, we need to define the
71   structs and enums ourselves. */
72
73typedef int regoff_t;
74typedef struct {
75  size_t re_nsub;  /* Number of parenthesized subexpressions. */
76  void *value;	   /* For internal use only. */
77} regex_t;
78
79typedef struct {
80  regoff_t rm_so;
81  regoff_t rm_eo;
82} regmatch_t;
83
84
85typedef enum {
86  REG_OK = 0,		/* No error. */
87  /* POSIX tre_regcomp() return error codes.  (In the order listed in the
88     standard.)	 */
89  REG_NOMATCH,		/* No match. */
90  REG_BADPAT,		/* Invalid regexp. */
91  REG_ECOLLATE,		/* Unknown collating element. */
92  REG_ECTYPE,		/* Unknown character class name. */
93  REG_EESCAPE,		/* Trailing backslash. */
94  REG_ESUBREG,		/* Invalid back reference. */
95  REG_EBRACK,		/* "[]" imbalance */
96  REG_EPAREN,		/* "\(\)" or "()" imbalance */
97  REG_EBRACE,		/* "\{\}" or "{}" imbalance */
98  REG_BADBR,		/* Invalid content of {} */
99  REG_ERANGE,		/* Invalid use of range operator */
100  REG_ESPACE,		/* Out of memory.  */
101  REG_BADRPT            /* Invalid use of repetition operators. */
102} reg_errcode_t;
103
104/* POSIX tre_regcomp() flags. */
105#define REG_EXTENDED	1
106#define REG_ICASE	(REG_EXTENDED << 1)
107#define REG_NEWLINE	(REG_ICASE << 1)
108#define REG_NOSUB	(REG_NEWLINE << 1)
109
110/* Extra tre_regcomp() flags. */
111#define REG_BASIC	0
112#define REG_LITERAL	(REG_NOSUB << 1)
113#define REG_RIGHT_ASSOC (REG_LITERAL << 1)
114#define REG_UNGREEDY    (REG_RIGHT_ASSOC << 1)
115
116/* POSIX tre_regexec() flags. */
117#define REG_NOTBOL 1
118#define REG_NOTEOL (REG_NOTBOL << 1)
119
120/* Extra tre_regexec() flags. */
121#define REG_APPROX_MATCHER	 (REG_NOTEOL << 1)
122#define REG_BACKTRACKING_MATCHER (REG_APPROX_MATCHER << 1)
123
124#endif /* !TRE_USE_SYSTEM_REGEX_H */
125
126/* REG_NOSPEC and REG_LITERAL mean the same thing. */
127#if defined(REG_LITERAL) && !defined(REG_NOSPEC)
128#define REG_NOSPEC	REG_LITERAL
129#elif defined(REG_NOSPEC) && !defined(REG_LITERAL)
130#define REG_LITERAL	REG_NOSPEC
131#endif /* defined(REG_NOSPEC) */
132
133/* The maximum number of iterations in a bound expression. */
134#undef RE_DUP_MAX
135#define RE_DUP_MAX 255
136
137/* The POSIX.2 regexp functions */
138extern int
139tre_regcomp(regex_t *preg, const char *regex, int cflags);
140
141extern int
142tre_regexec(const regex_t *preg, const char *string, size_t nmatch,
143	regmatch_t pmatch[], int eflags);
144
145extern size_t
146tre_regerror(int errcode, const regex_t *preg, char *errbuf,
147	 size_t errbuf_size);
148
149extern void
150tre_regfree(regex_t *preg);
151
152#ifdef TRE_WCHAR
153#ifdef HAVE_WCHAR_H
154#include <wchar.h>
155#endif /* HAVE_WCHAR_H */
156
157/* Wide character versions (not in POSIX.2). */
158extern int
159tre_regwcomp(regex_t *preg, const wchar_t *regex, int cflags);
160
161extern int
162tre_regwexec(const regex_t *preg, const wchar_t *string,
163	 size_t nmatch, regmatch_t pmatch[], int eflags);
164#endif /* TRE_WCHAR */
165
166/* Versions with a maximum length argument and therefore the capability to
167   handle null characters in the middle of the strings (not in POSIX.2). */
168extern int
169tre_regncomp(regex_t *preg, const char *regex, size_t len, int cflags);
170
171extern int
172tre_regnexec(const regex_t *preg, const char *string, size_t len,
173	 size_t nmatch, regmatch_t pmatch[], int eflags);
174
175#ifdef TRE_WCHAR
176extern int
177tre_regwncomp(regex_t *preg, const wchar_t *regex, size_t len, int cflags);
178
179extern int
180tre_regwnexec(const regex_t *preg, const wchar_t *string, size_t len,
181	  size_t nmatch, regmatch_t pmatch[], int eflags);
182#endif /* TRE_WCHAR */
183
184#ifdef TRE_APPROX
185
186/* Approximate matching parameter struct. */
187typedef struct {
188  int cost_ins;	       /* Default cost of an inserted character. */
189  int cost_del;	       /* Default cost of a deleted character. */
190  int cost_subst;      /* Default cost of a substituted character. */
191  int max_cost;	       /* Maximum allowed cost of a match. */
192
193  int max_ins;	       /* Maximum allowed number of inserts. */
194  int max_del;	       /* Maximum allowed number of deletes. */
195  int max_subst;       /* Maximum allowed number of substitutes. */
196  int max_err;	       /* Maximum allowed number of errors total. */
197} regaparams_t;
198
199/* Approximate matching result struct. */
200typedef struct {
201  size_t nmatch;       /* Length of pmatch[] array. */
202  regmatch_t *pmatch;  /* Submatch data. */
203  int cost;	       /* Cost of the match. */
204  int num_ins;	       /* Number of inserts in the match. */
205  int num_del;	       /* Number of deletes in the match. */
206  int num_subst;       /* Number of substitutes in the match. */
207} regamatch_t;
208
209
210/* Approximate matching functions. */
211extern int
212tre_regaexec(const regex_t *preg, const char *string,
213	 regamatch_t *match, regaparams_t params, int eflags);
214
215extern int
216tre_reganexec(const regex_t *preg, const char *string, size_t len,
217	  regamatch_t *match, regaparams_t params, int eflags);
218#ifdef TRE_WCHAR
219/* Wide character approximate matching. */
220extern int
221tre_regawexec(const regex_t *preg, const wchar_t *string,
222	  regamatch_t *match, regaparams_t params, int eflags);
223
224extern int
225tre_regawnexec(const regex_t *preg, const wchar_t *string, size_t len,
226	   regamatch_t *match, regaparams_t params, int eflags);
227#endif /* TRE_WCHAR */
228
229/* Sets the parameters to default values. */
230extern void
231tre_regaparams_default(regaparams_t *params);
232#endif /* TRE_APPROX */
233
234#ifdef TRE_WCHAR
235typedef wchar_t tre_char_t;
236#else /* !TRE_WCHAR */
237typedef unsigned char tre_char_t;
238#endif /* !TRE_WCHAR */
239
240typedef struct {
241  int (*get_next_char)(tre_char_t *c, unsigned int *pos_add, void *context);
242  void (*rewind)(size_t pos, void *context);
243  int (*compare)(size_t pos1, size_t pos2, size_t len, void *context);
244  void *context;
245} tre_str_source;
246
247extern int
248tre_reguexec(const regex_t *preg, const tre_str_source *string,
249	 size_t nmatch, regmatch_t pmatch[], int eflags);
250
251/* Returns the version string.	The returned string is static. */
252extern char *
253tre_version(void);
254
255/* Returns the value for a config parameter.  The type to which `result'
256   must point to depends of the value of `query', see documentation for
257   more details. */
258extern int
259tre_config(int query, void *result);
260
261enum {
262  TRE_CONFIG_APPROX,
263  TRE_CONFIG_WCHAR,
264  TRE_CONFIG_MULTIBYTE,
265  TRE_CONFIG_SYSTEM_ABI,
266  TRE_CONFIG_VERSION
267};
268
269/* Returns 1 if the compiled pattern has back references, 0 if not. */
270extern int
271tre_have_backrefs(const regex_t *preg);
272
273/* Returns 1 if the compiled pattern uses approximate matching features,
274   0 if not. */
275extern int
276tre_have_approx(const regex_t *preg);
277
278#ifdef __cplusplus
279}
280#endif
281#endif				/* TRE_H */
282
283/* EOF */
284