1/*
2  tre-internal.h - TRE internal definitions
3
4  This software is released under a BSD-style license.
5  See the file LICENSE for details and copyright.
6
7*/
8
9#ifndef TRE_INTERNAL_H
10#define TRE_INTERNAL_H 1
11
12#ifdef HAVE_WCHAR_H
13#include <wchar.h>
14#endif /* HAVE_WCHAR_H */
15
16#ifdef HAVE_WCTYPE_H
17#include <wctype.h>
18#endif /* !HAVE_WCTYPE_H */
19
20#include <ctype.h>
21
22#ifdef __LIBC__
23#include <xlocale_private.h>
24#else /* !__LIBC__ */
25#include <xlocale.h>
26#endif /* !__LIBC__ */
27
28#include "tre.h"
29#include "tre-last-matched.h"
30
31#ifdef TRE_DEBUG
32#include <stdio.h>
33#define DPRINT(msg) do {printf msg; fflush(stdout);} while(/*CONSTCOND*/0)
34#else /* !TRE_DEBUG */
35#define DPRINT(msg) do { } while(/*CONSTCOND*/0)
36#endif /* !TRE_DEBUG */
37
38#define elementsof(x)	( sizeof(x) / sizeof(x[0]) )
39
40#ifdef HAVE_MBRTOWC
41#define tre_mbrtowc(pwc, s, n, ps) (mbrtowc((pwc), (s), (n), (ps)))
42/* xlocale */
43#define tre_mbrtowc_l(pwc, s, n, ps, l) (mbrtowc_l((pwc), (s), (n), (ps), (l)))
44#else /* !HAVE_MBRTOWC */
45#ifdef HAVE_MBTOWC
46#define tre_mbrtowc(pwc, s, n, ps) (mbtowc((pwc), (s), (n)))
47#endif /* HAVE_MBTOWC */
48#endif /* !HAVE_MBRTOWC */
49
50#ifdef TRE_MULTIBYTE
51#ifdef HAVE_MBSTATE_T
52#define TRE_MBSTATE
53#endif /* TRE_MULTIBYTE */
54#endif /* HAVE_MBSTATE_T */
55
56/* Define the character types and functions. */
57#ifdef TRE_WCHAR
58
59/* Wide characters. */
60typedef wint_t tre_cint_t;
61#define TRE_CHAR_MAX WCHAR_MAX
62
63#ifdef TRE_MULTIBYTE
64#define TRE_MB_CUR_MAX MB_CUR_MAX
65/* xlocale */
66#define TRE_MB_CUR_MAX_L MB_CUR_MAX_L
67#else /* !TRE_MULTIBYTE */
68#define TRE_MB_CUR_MAX 1
69#endif /* !TRE_MULTIBYTE */
70
71#define tre_isalnum iswalnum
72#define tre_isalpha iswalpha
73#ifdef HAVE_ISWBLANK
74#define tre_isblank iswblank
75#endif /* HAVE_ISWBLANK */
76#define tre_iscntrl iswcntrl
77#define tre_isdigit iswdigit
78#define tre_isgraph iswgraph
79#define tre_islower iswlower
80#define tre_isprint iswprint
81#define tre_ispunct iswpunct
82#define tre_isspace iswspace
83#define tre_isupper iswupper
84#define tre_isxdigit iswxdigit
85
86#define tre_tolower towlower
87#define tre_toupper towupper
88#define tre_strlen  wcslen
89
90/* xlocale */
91#define tre_isalnum_l iswalnum_l
92#define tre_isdigit_l iswdigit_l
93#define tre_islower_l iswlower_l
94#define tre_isupper_l iswupper_l
95#define tre_isxdigit_l iswxdigit_l
96#define tre_tolower_l towlower_l
97#define tre_toupper_l towupper_l
98
99#else /* !TRE_WCHAR */
100
101/* 8 bit characters. */
102typedef short tre_cint_t;
103#define TRE_CHAR_MAX 255
104#define TRE_MB_CUR_MAX 1
105
106#define tre_isalnum isalnum
107#define tre_isalpha isalpha
108#ifdef HAVE_ISASCII
109#define tre_isascii isascii
110#endif /* HAVE_ISASCII */
111#ifdef HAVE_ISBLANK
112#define tre_isblank isblank
113#endif /* HAVE_ISBLANK */
114#define tre_iscntrl iscntrl
115#define tre_isdigit isdigit
116#define tre_isgraph isgraph
117#define tre_islower islower
118#define tre_isprint isprint
119#define tre_ispunct ispunct
120#define tre_isspace isspace
121#define tre_isupper isupper
122#define tre_isxdigit isxdigit
123
124#define tre_tolower(c) (tre_cint_t)(tolower(c))
125#define tre_toupper(c) (tre_cint_t)(toupper(c))
126#define tre_strlen(s)  (strlen((const char*)s))
127
128#endif /* !TRE_WCHAR */
129
130#if defined(TRE_WCHAR) && defined(HAVE_ISWCTYPE) && defined(HAVE_WCTYPE)
131#define TRE_USE_SYSTEM_WCTYPE 1
132#endif
133
134#ifdef TRE_USE_SYSTEM_WCTYPE
135/* Use system provided iswctype() and wctype(). */
136typedef wctype_t tre_ctype_t;
137#define tre_isctype iswctype
138#define tre_ctype   wctype
139
140/* xlocale */
141#define tre_isctype_l iswctype_l
142#define tre_ctype_l   wctype_l
143
144#else /* !TRE_USE_SYSTEM_WCTYPE */
145/* Define our own versions of iswctype() and wctype(). */
146typedef int (*tre_ctype_t)(tre_cint_t);
147#define tre_isctype(c, type) ( (type)(c) )
148tre_ctype_t tre_ctype(const char *name);
149#endif /* !TRE_USE_SYSTEM_WCTYPE */
150
151typedef enum { STR_WIDE, STR_BYTE, STR_MBS,
152#ifdef TRE_STR_USER
153	       STR_USER
154#endif /* TRE_STR_USER */
155	     } tre_str_type_t;
156
157/* Returns number of bytes to add to (char *)ptr to make it
158   properly aligned for the type. */
159#define ALIGN(ptr, type) \
160  ((((long)ptr) % sizeof(type)) \
161   ? (sizeof(type) - (((long)ptr) % sizeof(type))) \
162   : 0)
163
164#undef MAX
165#undef MIN
166#define MAX(a, b) (((a) >= (b)) ? (a) : (b))
167#define MIN(a, b) (((a) <= (b)) ? (a) : (b))
168
169/* Define STRF to the correct printf formatter for strings. */
170#ifdef TRE_WCHAR
171#define STRF "ls"
172#else /* !TRE_WCHAR */
173#define STRF "s"
174#endif /* !TRE_WCHAR */
175
176/* Types to handle bracket expressions. */
177typedef enum {
178  TRE_BRACKET_MATCH_TYPE_UNUSED = 0,
179  TRE_BRACKET_MATCH_TYPE_CHAR,		/* Single character value */
180  TRE_BRACKET_MATCH_TYPE_RANGE_BEGIN,	/* Collation range begin */
181  TRE_BRACKET_MATCH_TYPE_RANGE_END,	/* Collation range end */
182  TRE_BRACKET_MATCH_TYPE_CLASS,		/* Character class */
183  TRE_BRACKET_MATCH_TYPE_EQUIVALENCE,	/* Collation equivalence value */
184} tre_bracket_match_type_t;
185
186typedef struct {
187  tre_bracket_match_type_t type;
188  tre_cint_t value;
189} tre_bracket_match_t;
190
191#define TRE_BRACKET_MATCH_FLAG_NEGATE	1
192
193typedef struct {
194  int num_bracket_matches;
195  int flags;
196  tre_bracket_match_t bracket_matches[0];
197} tre_bracket_match_list_t;
198
199#define SIZEOF_BRACKET_MATCH_LIST_N(n)	(sizeof(tre_bracket_match_list_t) + \
200					 sizeof(tre_bracket_match_t) * (n))
201#define SIZEOF_BRACKET_MATCH_LIST(l)	SIZEOF_BRACKET_MATCH_LIST_N( \
202					 (l)->num_bracket_matches)
203
204/* The "count" field is the number of time the tag was set, initially zero.
205   The "first" field contains the first set value (when "count" equals 1).
206   The "value" field contains the current value of the tag, if "count" is
207   greater than zero (the tag's current value is -1 if "count" is zero).
208   The "touch" field is the touch value, a montonically increasing value
209   (maintained by the caller) set each time the tag itself is set. */
210typedef struct {
211  int count;
212  int first;
213  int value;
214  int touch;
215} tre_tag_t;
216
217/* TNFA transition type. A TNFA state is an array of transitions,
218   the terminator is a transition with NULL `state'. */
219typedef struct tnfa_transition tre_tnfa_transition_t;
220
221struct tnfa_transition {
222  /* Range of accepted characters. */
223  tre_cint_t code_min;
224  tre_cint_t code_max;
225  /* Pointer to the destination state. */
226  tre_tnfa_transition_t *state;
227  /* ID number of the destination state. */
228  int state_id;
229  /* -1 terminated array of tags (or NULL). */
230  int *tags;
231  /* Matching parameters settings (or NULL). */
232  int *params;
233  /* Assertion bitmap. */
234  int assertions;
235  /* Assertion parameters. */
236  union {
237    /* Bracket matches. */
238    tre_bracket_match_list_t *bracket_match_list;
239    /* Back reference assertion. */
240    int backref;
241  } u;
242};
243
244
245/* Assertions. */
246#define ASSERT_AT_BOL		  1   /* Beginning of line. */
247#define ASSERT_AT_EOL		  2   /* End of line. */
248#define ASSERT_BRACKET_MATCH	  4   /* Matches in `bracket_match_list'. */
249#define ASSERT_AT_BOW		  8   /* Beginning of word. */
250#define ASSERT_AT_EOW		 16   /* End of word. */
251#define ASSERT_AT_WB		 32   /* Word boundary. */
252#define ASSERT_AT_WB_NEG	 64   /* Not a word boundary. */
253#define ASSERT_BACKREF		128   /* A back reference in `backref'. */
254#define ASSERT_LAST		128
255
256/* Tag directions. */
257typedef enum {
258  TRE_TAG_MINIMIZE = 0,
259  TRE_TAG_MAXIMIZE,
260  TRE_TAG_LEFT_MAXIMIZE,
261} tre_tag_direction_t;
262
263/* Parameters that can be changed dynamically while matching. */
264typedef enum {
265  TRE_PARAM_COST_INS	    = 0,
266  TRE_PARAM_COST_DEL	    = 1,
267  TRE_PARAM_COST_SUBST	    = 2,
268  TRE_PARAM_COST_MAX	    = 3,
269  TRE_PARAM_MAX_INS	    = 4,
270  TRE_PARAM_MAX_DEL	    = 5,
271  TRE_PARAM_MAX_SUBST	    = 6,
272  TRE_PARAM_MAX_ERR	    = 7,
273  TRE_PARAM_DEPTH	    = 8,
274  TRE_PARAM_LAST	    = 9
275} tre_param_t;
276
277/* Unset matching parameter */
278#define TRE_PARAM_UNSET -1
279
280/* Signifies the default matching parameter value. */
281#define TRE_PARAM_DEFAULT -2
282
283/* Instructions to compute submatch register values from tag values
284   after a successful match.  */
285struct tre_submatch_data {
286  /* Tag that gives the value for rm_so (submatch start offset). */
287  int so_tag;
288  /* Tag that gives the value for rm_eo (submatch end offset). */
289  int eo_tag;
290};
291
292typedef struct tre_submatch_data tre_submatch_data_t;
293
294
295/* TNFA definition. */
296typedef struct tnfa tre_tnfa_t;
297
298struct tnfa {
299  tre_tnfa_transition_t *transitions;
300  tre_tnfa_transition_t *initial;
301  tre_tnfa_transition_t *final;
302  tre_submatch_data_t *submatch_data;
303#ifdef USE_FIRSTPOS_CHARS /* not defined */
304  char *firstpos_chars;
305#endif /* USE_FIRSTPOS_CHARS */
306  tre_tag_direction_t *tag_directions;
307  int *minimal_tags;
308  tre_last_matched_branch_t *last_matched_branch;
309  locale_t loc;
310  unsigned int num_transitions;
311  int first_char;
312  unsigned int num_submatches;
313  unsigned int num_submatches_invisible;
314  int num_tags;
315  int num_minimals;
316  int end_tag;
317  int num_states;
318  int cflags;
319  int have_backrefs;
320  int num_reorder_tags;
321  int have_approx;
322  int params_depth;
323};
324
325__private_extern__ int
326tre_compile(regex_t * __restrict preg, const tre_char_t * __restrict regex, size_t n, int cflags,
327	    locale_t __restrict loc);
328
329__private_extern__ void
330tre_free(regex_t *preg);
331
332__private_extern__ reg_errcode_t
333tre_fill_pmatch(size_t nmatch, regmatch_t pmatch[ __restrict ], int cflags,
334		const tre_tnfa_t * __restrict tnfa, const tre_tag_t * __restrict tags, int match_eo);
335
336__private_extern__ reg_errcode_t
337tre_tnfa_run_parallel(const tre_tnfa_t * __restrict tnfa, const void * __restrict string, int len,
338		      tre_str_type_t type, tre_tag_t * __restrict match_tags, int eflags,
339		      int * __restrict match_end_ofs);
340
341__private_extern__ reg_errcode_t
342tre_tnfa_run_backtrack(const tre_tnfa_t * __restrict tnfa, const void * __restrict string,
343		       int len, tre_str_type_t type, tre_tag_t * __restrict match_tags,
344		       int eflags, int * __restrict match_end_ofs);
345
346#ifdef TRE_APPROX
347__private_extern__ reg_errcode_t
348tre_tnfa_run_approx(const tre_tnfa_t * __restrict tnfa, const void * __restrict string, int len,
349		    tre_str_type_t type, tre_tag_t * __restrict match_tags,
350		    regamatch_t * __restrict match, regaparams_t params,
351		    int eflags, int * __restrict match_end_ofs);
352#endif /* TRE_APPROX */
353
354#endif /* TRE_INTERNAL_H */
355
356/* EOF */
357