1/* 2 tre-internal.h - TRE internal definitions 3 4 This software is released under a BSD-style license. 5 See the file LICENSE for details and copyright. 6 7*/ 8 9#ifndef TRE_INTERNAL_H 10#define TRE_INTERNAL_H 1 11 12#ifdef HAVE_WCHAR_H 13#include <wchar.h> 14#endif /* HAVE_WCHAR_H */ 15 16#ifdef HAVE_WCTYPE_H 17#include <wctype.h> 18#endif /* !HAVE_WCTYPE_H */ 19 20#include <ctype.h> 21 22#ifdef __LIBC__ 23#include <xlocale_private.h> 24#else /* !__LIBC__ */ 25#include <xlocale.h> 26#endif /* !__LIBC__ */ 27 28#include "tre.h" 29#include "tre-last-matched.h" 30 31#ifdef TRE_DEBUG 32#include <stdio.h> 33#define DPRINT(msg) do {printf msg; fflush(stdout);} while(/*CONSTCOND*/0) 34#else /* !TRE_DEBUG */ 35#define DPRINT(msg) do { } while(/*CONSTCOND*/0) 36#endif /* !TRE_DEBUG */ 37 38#define elementsof(x) ( sizeof(x) / sizeof(x[0]) ) 39 40#ifdef HAVE_MBRTOWC 41#define tre_mbrtowc(pwc, s, n, ps) (mbrtowc((pwc), (s), (n), (ps))) 42/* xlocale */ 43#define tre_mbrtowc_l(pwc, s, n, ps, l) (mbrtowc_l((pwc), (s), (n), (ps), (l))) 44#else /* !HAVE_MBRTOWC */ 45#ifdef HAVE_MBTOWC 46#define tre_mbrtowc(pwc, s, n, ps) (mbtowc((pwc), (s), (n))) 47#endif /* HAVE_MBTOWC */ 48#endif /* !HAVE_MBRTOWC */ 49 50#ifdef TRE_MULTIBYTE 51#ifdef HAVE_MBSTATE_T 52#define TRE_MBSTATE 53#endif /* TRE_MULTIBYTE */ 54#endif /* HAVE_MBSTATE_T */ 55 56/* Define the character types and functions. */ 57#ifdef TRE_WCHAR 58 59/* Wide characters. */ 60typedef wint_t tre_cint_t; 61#define TRE_CHAR_MAX WCHAR_MAX 62 63#ifdef TRE_MULTIBYTE 64#define TRE_MB_CUR_MAX MB_CUR_MAX 65/* xlocale */ 66#define TRE_MB_CUR_MAX_L MB_CUR_MAX_L 67#else /* !TRE_MULTIBYTE */ 68#define TRE_MB_CUR_MAX 1 69#endif /* !TRE_MULTIBYTE */ 70 71#define tre_isalnum iswalnum 72#define tre_isalpha iswalpha 73#ifdef HAVE_ISWBLANK 74#define tre_isblank iswblank 75#endif /* HAVE_ISWBLANK */ 76#define tre_iscntrl iswcntrl 77#define tre_isdigit iswdigit 78#define tre_isgraph iswgraph 79#define tre_islower iswlower 80#define tre_isprint iswprint 81#define tre_ispunct iswpunct 82#define tre_isspace iswspace 83#define tre_isupper iswupper 84#define tre_isxdigit iswxdigit 85 86#define tre_tolower towlower 87#define tre_toupper towupper 88#define tre_strlen wcslen 89 90/* xlocale */ 91#define tre_isalnum_l iswalnum_l 92#define tre_isdigit_l iswdigit_l 93#define tre_islower_l iswlower_l 94#define tre_isupper_l iswupper_l 95#define tre_isxdigit_l iswxdigit_l 96#define tre_tolower_l towlower_l 97#define tre_toupper_l towupper_l 98 99#else /* !TRE_WCHAR */ 100 101/* 8 bit characters. */ 102typedef short tre_cint_t; 103#define TRE_CHAR_MAX 255 104#define TRE_MB_CUR_MAX 1 105 106#define tre_isalnum isalnum 107#define tre_isalpha isalpha 108#ifdef HAVE_ISASCII 109#define tre_isascii isascii 110#endif /* HAVE_ISASCII */ 111#ifdef HAVE_ISBLANK 112#define tre_isblank isblank 113#endif /* HAVE_ISBLANK */ 114#define tre_iscntrl iscntrl 115#define tre_isdigit isdigit 116#define tre_isgraph isgraph 117#define tre_islower islower 118#define tre_isprint isprint 119#define tre_ispunct ispunct 120#define tre_isspace isspace 121#define tre_isupper isupper 122#define tre_isxdigit isxdigit 123 124#define tre_tolower(c) (tre_cint_t)(tolower(c)) 125#define tre_toupper(c) (tre_cint_t)(toupper(c)) 126#define tre_strlen(s) (strlen((const char*)s)) 127 128#endif /* !TRE_WCHAR */ 129 130#if defined(TRE_WCHAR) && defined(HAVE_ISWCTYPE) && defined(HAVE_WCTYPE) 131#define TRE_USE_SYSTEM_WCTYPE 1 132#endif 133 134#ifdef TRE_USE_SYSTEM_WCTYPE 135/* Use system provided iswctype() and wctype(). */ 136typedef wctype_t tre_ctype_t; 137#define tre_isctype iswctype 138#define tre_ctype wctype 139 140/* xlocale */ 141#define tre_isctype_l iswctype_l 142#define tre_ctype_l wctype_l 143 144#else /* !TRE_USE_SYSTEM_WCTYPE */ 145/* Define our own versions of iswctype() and wctype(). */ 146typedef int (*tre_ctype_t)(tre_cint_t); 147#define tre_isctype(c, type) ( (type)(c) ) 148tre_ctype_t tre_ctype(const char *name); 149#endif /* !TRE_USE_SYSTEM_WCTYPE */ 150 151typedef enum { STR_WIDE, STR_BYTE, STR_MBS, 152#ifdef TRE_STR_USER 153 STR_USER 154#endif /* TRE_STR_USER */ 155 } tre_str_type_t; 156 157/* Returns number of bytes to add to (char *)ptr to make it 158 properly aligned for the type. */ 159#define ALIGN(ptr, type) \ 160 ((((long)ptr) % sizeof(type)) \ 161 ? (sizeof(type) - (((long)ptr) % sizeof(type))) \ 162 : 0) 163 164#undef MAX 165#undef MIN 166#define MAX(a, b) (((a) >= (b)) ? (a) : (b)) 167#define MIN(a, b) (((a) <= (b)) ? (a) : (b)) 168 169/* Define STRF to the correct printf formatter for strings. */ 170#ifdef TRE_WCHAR 171#define STRF "ls" 172#else /* !TRE_WCHAR */ 173#define STRF "s" 174#endif /* !TRE_WCHAR */ 175 176/* Types to handle bracket expressions. */ 177typedef enum { 178 TRE_BRACKET_MATCH_TYPE_UNUSED = 0, 179 TRE_BRACKET_MATCH_TYPE_CHAR, /* Single character value */ 180 TRE_BRACKET_MATCH_TYPE_RANGE_BEGIN, /* Collation range begin */ 181 TRE_BRACKET_MATCH_TYPE_RANGE_END, /* Collation range end */ 182 TRE_BRACKET_MATCH_TYPE_CLASS, /* Character class */ 183 TRE_BRACKET_MATCH_TYPE_EQUIVALENCE, /* Collation equivalence value */ 184} tre_bracket_match_type_t; 185 186typedef struct { 187 tre_bracket_match_type_t type; 188 tre_cint_t value; 189} tre_bracket_match_t; 190 191#define TRE_BRACKET_MATCH_FLAG_NEGATE 1 192 193typedef struct { 194 int num_bracket_matches; 195 int flags; 196 tre_bracket_match_t bracket_matches[0]; 197} tre_bracket_match_list_t; 198 199#define SIZEOF_BRACKET_MATCH_LIST_N(n) (sizeof(tre_bracket_match_list_t) + \ 200 sizeof(tre_bracket_match_t) * (n)) 201#define SIZEOF_BRACKET_MATCH_LIST(l) SIZEOF_BRACKET_MATCH_LIST_N( \ 202 (l)->num_bracket_matches) 203 204/* The "count" field is the number of time the tag was set, initially zero. 205 The "first" field contains the first set value (when "count" equals 1). 206 The "value" field contains the current value of the tag, if "count" is 207 greater than zero (the tag's current value is -1 if "count" is zero). 208 The "touch" field is the touch value, a montonically increasing value 209 (maintained by the caller) set each time the tag itself is set. */ 210typedef struct { 211 int count; 212 int first; 213 int value; 214 int touch; 215} tre_tag_t; 216 217/* TNFA transition type. A TNFA state is an array of transitions, 218 the terminator is a transition with NULL `state'. */ 219typedef struct tnfa_transition tre_tnfa_transition_t; 220 221struct tnfa_transition { 222 /* Range of accepted characters. */ 223 tre_cint_t code_min; 224 tre_cint_t code_max; 225 /* Pointer to the destination state. */ 226 tre_tnfa_transition_t *state; 227 /* ID number of the destination state. */ 228 int state_id; 229 /* -1 terminated array of tags (or NULL). */ 230 int *tags; 231 /* Matching parameters settings (or NULL). */ 232 int *params; 233 /* Assertion bitmap. */ 234 int assertions; 235 /* Assertion parameters. */ 236 union { 237 /* Bracket matches. */ 238 tre_bracket_match_list_t *bracket_match_list; 239 /* Back reference assertion. */ 240 int backref; 241 } u; 242}; 243 244 245/* Assertions. */ 246#define ASSERT_AT_BOL 1 /* Beginning of line. */ 247#define ASSERT_AT_EOL 2 /* End of line. */ 248#define ASSERT_BRACKET_MATCH 4 /* Matches in `bracket_match_list'. */ 249#define ASSERT_AT_BOW 8 /* Beginning of word. */ 250#define ASSERT_AT_EOW 16 /* End of word. */ 251#define ASSERT_AT_WB 32 /* Word boundary. */ 252#define ASSERT_AT_WB_NEG 64 /* Not a word boundary. */ 253#define ASSERT_BACKREF 128 /* A back reference in `backref'. */ 254#define ASSERT_LAST 128 255 256/* Tag directions. */ 257typedef enum { 258 TRE_TAG_MINIMIZE = 0, 259 TRE_TAG_MAXIMIZE, 260 TRE_TAG_LEFT_MAXIMIZE, 261} tre_tag_direction_t; 262 263/* Parameters that can be changed dynamically while matching. */ 264typedef enum { 265 TRE_PARAM_COST_INS = 0, 266 TRE_PARAM_COST_DEL = 1, 267 TRE_PARAM_COST_SUBST = 2, 268 TRE_PARAM_COST_MAX = 3, 269 TRE_PARAM_MAX_INS = 4, 270 TRE_PARAM_MAX_DEL = 5, 271 TRE_PARAM_MAX_SUBST = 6, 272 TRE_PARAM_MAX_ERR = 7, 273 TRE_PARAM_DEPTH = 8, 274 TRE_PARAM_LAST = 9 275} tre_param_t; 276 277/* Unset matching parameter */ 278#define TRE_PARAM_UNSET -1 279 280/* Signifies the default matching parameter value. */ 281#define TRE_PARAM_DEFAULT -2 282 283/* Instructions to compute submatch register values from tag values 284 after a successful match. */ 285struct tre_submatch_data { 286 /* Tag that gives the value for rm_so (submatch start offset). */ 287 int so_tag; 288 /* Tag that gives the value for rm_eo (submatch end offset). */ 289 int eo_tag; 290}; 291 292typedef struct tre_submatch_data tre_submatch_data_t; 293 294 295/* TNFA definition. */ 296typedef struct tnfa tre_tnfa_t; 297 298struct tnfa { 299 tre_tnfa_transition_t *transitions; 300 tre_tnfa_transition_t *initial; 301 tre_tnfa_transition_t *final; 302 tre_submatch_data_t *submatch_data; 303#ifdef USE_FIRSTPOS_CHARS /* not defined */ 304 char *firstpos_chars; 305#endif /* USE_FIRSTPOS_CHARS */ 306 tre_tag_direction_t *tag_directions; 307 int *minimal_tags; 308 tre_last_matched_branch_t *last_matched_branch; 309 locale_t loc; 310 unsigned int num_transitions; 311 int first_char; 312 unsigned int num_submatches; 313 unsigned int num_submatches_invisible; 314 int num_tags; 315 int num_minimals; 316 int end_tag; 317 int num_states; 318 int cflags; 319 int have_backrefs; 320 int num_reorder_tags; 321 int have_approx; 322 int params_depth; 323}; 324 325__private_extern__ int 326tre_compile(regex_t * __restrict preg, const tre_char_t * __restrict regex, size_t n, int cflags, 327 locale_t __restrict loc); 328 329__private_extern__ void 330tre_free(regex_t *preg); 331 332__private_extern__ reg_errcode_t 333tre_fill_pmatch(size_t nmatch, regmatch_t pmatch[ __restrict ], int cflags, 334 const tre_tnfa_t * __restrict tnfa, const tre_tag_t * __restrict tags, int match_eo); 335 336__private_extern__ reg_errcode_t 337tre_tnfa_run_parallel(const tre_tnfa_t * __restrict tnfa, const void * __restrict string, int len, 338 tre_str_type_t type, tre_tag_t * __restrict match_tags, int eflags, 339 int * __restrict match_end_ofs); 340 341__private_extern__ reg_errcode_t 342tre_tnfa_run_backtrack(const tre_tnfa_t * __restrict tnfa, const void * __restrict string, 343 int len, tre_str_type_t type, tre_tag_t * __restrict match_tags, 344 int eflags, int * __restrict match_end_ofs); 345 346#ifdef TRE_APPROX 347__private_extern__ reg_errcode_t 348tre_tnfa_run_approx(const tre_tnfa_t * __restrict tnfa, const void * __restrict string, int len, 349 tre_str_type_t type, tre_tag_t * __restrict match_tags, 350 regamatch_t * __restrict match, regaparams_t params, 351 int eflags, int * __restrict match_end_ofs); 352#endif /* TRE_APPROX */ 353 354#endif /* TRE_INTERNAL_H */ 355 356/* EOF */ 357