1146040Stjr/* Extended regular expression matching and search library. 2250724Sjkim Copyright (C) 2002-2005,2007,2009,2010,2011 Free Software Foundation, Inc. 3146040Stjr This file is part of the GNU C Library. 4146040Stjr Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>. 5146040Stjr 6146040Stjr The GNU C Library is free software; you can redistribute it and/or 7146040Stjr modify it under the terms of the GNU Lesser General Public 8146040Stjr License as published by the Free Software Foundation; either 9146040Stjr version 2.1 of the License, or (at your option) any later version. 10146040Stjr 11146040Stjr The GNU C Library is distributed in the hope that it will be useful, 12146040Stjr but WITHOUT ANY WARRANTY; without even the implied warranty of 13146040Stjr MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14146040Stjr Lesser General Public License for more details. 15146040Stjr 16146040Stjr You should have received a copy of the GNU Lesser General Public 17250724Sjkim License along with the GNU C Library; if not, see 18250724Sjkim <http://www.gnu.org/licenses/>. */ 19146040Stjr 20146040Stjrstatic reg_errcode_t match_ctx_init (re_match_context_t *cache, int eflags, 21146040Stjr int n) internal_function; 22146040Stjrstatic void match_ctx_clean (re_match_context_t *mctx) internal_function; 23146040Stjrstatic void match_ctx_free (re_match_context_t *cache) internal_function; 24146040Stjrstatic reg_errcode_t match_ctx_add_entry (re_match_context_t *cache, int node, 25146040Stjr int str_idx, int from, int to) 26146040Stjr internal_function; 27250724Sjkimstatic int search_cur_bkref_entry (const re_match_context_t *mctx, int str_idx) 28146040Stjr internal_function; 29146040Stjrstatic reg_errcode_t match_ctx_add_subtop (re_match_context_t *mctx, int node, 30146040Stjr int str_idx) internal_function; 31146040Stjrstatic re_sub_match_last_t * match_ctx_add_sublast (re_sub_match_top_t *subtop, 32146040Stjr int node, int str_idx) 33146040Stjr internal_function; 34146040Stjrstatic void sift_ctx_init (re_sift_context_t *sctx, re_dfastate_t **sifted_sts, 35146040Stjr re_dfastate_t **limited_sts, int last_node, 36146040Stjr int last_str_idx) 37146040Stjr internal_function; 38146040Stjrstatic reg_errcode_t re_search_internal (const regex_t *preg, 39146040Stjr const char *string, int length, 40146040Stjr int start, int range, int stop, 41146040Stjr size_t nmatch, regmatch_t pmatch[], 42146040Stjr int eflags) internal_function; 43146040Stjrstatic int re_search_2_stub (struct re_pattern_buffer *bufp, 44146040Stjr const char *string1, int length1, 45146040Stjr const char *string2, int length2, 46146040Stjr int start, int range, struct re_registers *regs, 47146040Stjr int stop, int ret_len) internal_function; 48146040Stjrstatic int re_search_stub (struct re_pattern_buffer *bufp, 49146040Stjr const char *string, int length, int start, 50146040Stjr int range, int stop, struct re_registers *regs, 51146040Stjr int ret_len) internal_function; 52146040Stjrstatic unsigned re_copy_regs (struct re_registers *regs, regmatch_t *pmatch, 53146040Stjr int nregs, int regs_allocated) internal_function; 54146040Stjrstatic reg_errcode_t prune_impossible_nodes (re_match_context_t *mctx) 55146040Stjr internal_function; 56146040Stjrstatic int check_matching (re_match_context_t *mctx, int fl_longest_match, 57250724Sjkim int *p_match_first) internal_function; 58146040Stjrstatic int check_halt_state_context (const re_match_context_t *mctx, 59146040Stjr const re_dfastate_t *state, int idx) 60146040Stjr internal_function; 61250724Sjkimstatic void update_regs (const re_dfa_t *dfa, regmatch_t *pmatch, 62146040Stjr regmatch_t *prev_idx_match, int cur_node, 63146040Stjr int cur_idx, int nmatch) internal_function; 64146040Stjrstatic reg_errcode_t push_fail_stack (struct re_fail_stack_t *fs, 65146040Stjr int str_idx, int dest_node, int nregs, 66146040Stjr regmatch_t *regs, 67250724Sjkim re_node_set *eps_via_nodes) 68250724Sjkim internal_function; 69146040Stjrstatic reg_errcode_t set_regs (const regex_t *preg, 70146040Stjr const re_match_context_t *mctx, 71146040Stjr size_t nmatch, regmatch_t *pmatch, 72146040Stjr int fl_backtrack) internal_function; 73250724Sjkimstatic reg_errcode_t free_fail_stack_return (struct re_fail_stack_t *fs) 74250724Sjkim internal_function; 75146040Stjr 76146040Stjr#ifdef RE_ENABLE_I18N 77146040Stjrstatic int sift_states_iter_mb (const re_match_context_t *mctx, 78146040Stjr re_sift_context_t *sctx, 79250724Sjkim int node_idx, int str_idx, int max_str_idx) 80250724Sjkim internal_function; 81146040Stjr#endif /* RE_ENABLE_I18N */ 82250724Sjkimstatic reg_errcode_t sift_states_backward (const re_match_context_t *mctx, 83250724Sjkim re_sift_context_t *sctx) 84250724Sjkim internal_function; 85250724Sjkimstatic reg_errcode_t build_sifted_states (const re_match_context_t *mctx, 86146040Stjr re_sift_context_t *sctx, int str_idx, 87250724Sjkim re_node_set *cur_dest) 88250724Sjkim internal_function; 89250724Sjkimstatic reg_errcode_t update_cur_sifted_state (const re_match_context_t *mctx, 90146040Stjr re_sift_context_t *sctx, 91146040Stjr int str_idx, 92250724Sjkim re_node_set *dest_nodes) 93250724Sjkim internal_function; 94250724Sjkimstatic reg_errcode_t add_epsilon_src_nodes (const re_dfa_t *dfa, 95146040Stjr re_node_set *dest_nodes, 96250724Sjkim const re_node_set *candidates) 97250724Sjkim internal_function; 98250724Sjkimstatic int check_dst_limits (const re_match_context_t *mctx, 99250724Sjkim re_node_set *limits, 100146040Stjr int dst_node, int dst_idx, int src_node, 101146040Stjr int src_idx) internal_function; 102250724Sjkimstatic int check_dst_limits_calc_pos_1 (const re_match_context_t *mctx, 103146040Stjr int boundaries, int subexp_idx, 104250724Sjkim int from_node, int bkref_idx) 105250724Sjkim internal_function; 106250724Sjkimstatic int check_dst_limits_calc_pos (const re_match_context_t *mctx, 107146040Stjr int limit, int subexp_idx, 108146040Stjr int node, int str_idx, 109146040Stjr int bkref_idx) internal_function; 110250724Sjkimstatic reg_errcode_t check_subexp_limits (const re_dfa_t *dfa, 111146040Stjr re_node_set *dest_nodes, 112146040Stjr const re_node_set *candidates, 113146040Stjr re_node_set *limits, 114146040Stjr struct re_backref_cache_entry *bkref_ents, 115146040Stjr int str_idx) internal_function; 116250724Sjkimstatic reg_errcode_t sift_states_bkref (const re_match_context_t *mctx, 117146040Stjr re_sift_context_t *sctx, 118250724Sjkim int str_idx, const re_node_set *candidates) 119250724Sjkim internal_function; 120250724Sjkimstatic reg_errcode_t merge_state_array (const re_dfa_t *dfa, 121250724Sjkim re_dfastate_t **dst, 122250724Sjkim re_dfastate_t **src, int num) 123250724Sjkim internal_function; 124146040Stjrstatic re_dfastate_t *find_recover_state (reg_errcode_t *err, 125146040Stjr re_match_context_t *mctx) internal_function; 126146040Stjrstatic re_dfastate_t *transit_state (reg_errcode_t *err, 127146040Stjr re_match_context_t *mctx, 128146040Stjr re_dfastate_t *state) internal_function; 129146040Stjrstatic re_dfastate_t *merge_state_with_log (reg_errcode_t *err, 130146040Stjr re_match_context_t *mctx, 131250724Sjkim re_dfastate_t *next_state) 132250724Sjkim internal_function; 133146040Stjrstatic reg_errcode_t check_subexp_matching_top (re_match_context_t *mctx, 134146040Stjr re_node_set *cur_nodes, 135146040Stjr int str_idx) internal_function; 136146040Stjr#if 0 137146040Stjrstatic re_dfastate_t *transit_state_sb (reg_errcode_t *err, 138146040Stjr re_match_context_t *mctx, 139250724Sjkim re_dfastate_t *pstate) 140250724Sjkim internal_function; 141146040Stjr#endif 142146040Stjr#ifdef RE_ENABLE_I18N 143146040Stjrstatic reg_errcode_t transit_state_mb (re_match_context_t *mctx, 144250724Sjkim re_dfastate_t *pstate) 145250724Sjkim internal_function; 146146040Stjr#endif /* RE_ENABLE_I18N */ 147146040Stjrstatic reg_errcode_t transit_state_bkref (re_match_context_t *mctx, 148250724Sjkim const re_node_set *nodes) 149250724Sjkim internal_function; 150146040Stjrstatic reg_errcode_t get_subexp (re_match_context_t *mctx, 151250724Sjkim int bkref_node, int bkref_str_idx) 152250724Sjkim internal_function; 153146040Stjrstatic reg_errcode_t get_subexp_sub (re_match_context_t *mctx, 154146040Stjr const re_sub_match_top_t *sub_top, 155146040Stjr re_sub_match_last_t *sub_last, 156250724Sjkim int bkref_node, int bkref_str) 157250724Sjkim internal_function; 158146040Stjrstatic int find_subexp_node (const re_dfa_t *dfa, const re_node_set *nodes, 159146040Stjr int subexp_idx, int type) internal_function; 160146040Stjrstatic reg_errcode_t check_arrival (re_match_context_t *mctx, 161146040Stjr state_array_t *path, int top_node, 162146040Stjr int top_str, int last_node, int last_str, 163146040Stjr int type) internal_function; 164146040Stjrstatic reg_errcode_t check_arrival_add_next_nodes (re_match_context_t *mctx, 165146040Stjr int str_idx, 166146040Stjr re_node_set *cur_nodes, 167250724Sjkim re_node_set *next_nodes) 168250724Sjkim internal_function; 169250724Sjkimstatic reg_errcode_t check_arrival_expand_ecl (const re_dfa_t *dfa, 170146040Stjr re_node_set *cur_nodes, 171250724Sjkim int ex_subexp, int type) 172250724Sjkim internal_function; 173250724Sjkimstatic reg_errcode_t check_arrival_expand_ecl_sub (const re_dfa_t *dfa, 174146040Stjr re_node_set *dst_nodes, 175146040Stjr int target, int ex_subexp, 176146040Stjr int type) internal_function; 177146040Stjrstatic reg_errcode_t expand_bkref_cache (re_match_context_t *mctx, 178146040Stjr re_node_set *cur_nodes, int cur_str, 179250724Sjkim int subexp_num, int type) 180250724Sjkim internal_function; 181250724Sjkimstatic int build_trtable (const re_dfa_t *dfa, 182146040Stjr re_dfastate_t *state) internal_function; 183146040Stjr#ifdef RE_ENABLE_I18N 184250724Sjkimstatic int check_node_accept_bytes (const re_dfa_t *dfa, int node_idx, 185250724Sjkim const re_string_t *input, int idx) 186250724Sjkim internal_function; 187146040Stjr# ifdef _LIBC 188146040Stjrstatic unsigned int find_collation_sequence_value (const unsigned char *mbs, 189250724Sjkim size_t name_len) 190250724Sjkim internal_function; 191146040Stjr# endif /* _LIBC */ 192146040Stjr#endif /* RE_ENABLE_I18N */ 193250724Sjkimstatic int group_nodes_into_DFAstates (const re_dfa_t *dfa, 194146040Stjr const re_dfastate_t *state, 195146040Stjr re_node_set *states_node, 196250724Sjkim bitset_t *states_ch) internal_function; 197146040Stjrstatic int check_node_accept (const re_match_context_t *mctx, 198250724Sjkim const re_token_t *node, int idx) 199250724Sjkim internal_function; 200251435Sjkimstatic reg_errcode_t extend_buffers (re_match_context_t *mctx, int min_len) 201250724Sjkim internal_function; 202146040Stjr 203146040Stjr/* Entry point for POSIX code. */ 204146040Stjr 205146040Stjr/* regexec searches for a given pattern, specified by PREG, in the 206146040Stjr string STRING. 207146040Stjr 208146040Stjr If NMATCH is zero or REG_NOSUB was set in the cflags argument to 209146040Stjr `regcomp', we ignore PMATCH. Otherwise, we assume PMATCH has at 210146040Stjr least NMATCH elements, and we set them to the offsets of the 211146040Stjr corresponding matched substrings. 212146040Stjr 213146040Stjr EFLAGS specifies `execution flags' which affect matching: if 214146040Stjr REG_NOTBOL is set, then ^ does not match at the beginning of the 215146040Stjr string; if REG_NOTEOL is set, then $ does not match at the end. 216146040Stjr 217146040Stjr We return 0 if we find a match and REG_NOMATCH if not. */ 218146040Stjr 219146040Stjrint 220146040Stjrregexec (preg, string, nmatch, pmatch, eflags) 221146040Stjr const regex_t *__restrict preg; 222146040Stjr const char *__restrict string; 223146040Stjr size_t nmatch; 224146040Stjr regmatch_t pmatch[]; 225146040Stjr int eflags; 226146040Stjr{ 227146040Stjr reg_errcode_t err; 228146040Stjr int start, length; 229250724Sjkim re_dfa_t *dfa = (re_dfa_t *) preg->buffer; 230146040Stjr 231146040Stjr if (eflags & ~(REG_NOTBOL | REG_NOTEOL | REG_STARTEND)) 232146040Stjr return REG_BADPAT; 233146040Stjr 234146040Stjr if (eflags & REG_STARTEND) 235146040Stjr { 236146040Stjr start = pmatch[0].rm_so; 237146040Stjr length = pmatch[0].rm_eo; 238146040Stjr } 239146040Stjr else 240146040Stjr { 241146040Stjr start = 0; 242146040Stjr length = strlen (string); 243146040Stjr } 244250724Sjkim 245250724Sjkim __libc_lock_lock (dfa->lock); 246146040Stjr if (preg->no_sub) 247146040Stjr err = re_search_internal (preg, string, length, start, length - start, 248146040Stjr length, 0, NULL, eflags); 249146040Stjr else 250146040Stjr err = re_search_internal (preg, string, length, start, length - start, 251146040Stjr length, nmatch, pmatch, eflags); 252250724Sjkim __libc_lock_unlock (dfa->lock); 253146040Stjr return err != REG_NOERROR; 254146040Stjr} 255146040Stjr 256146040Stjr#ifdef _LIBC 257146040Stjr# include <shlib-compat.h> 258146040Stjrversioned_symbol (libc, __regexec, regexec, GLIBC_2_3_4); 259146040Stjr 260146040Stjr# if SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_3_4) 261146040Stjr__typeof__ (__regexec) __compat_regexec; 262146040Stjr 263146040Stjrint 264146040Stjrattribute_compat_text_section 265146040Stjr__compat_regexec (const regex_t *__restrict preg, 266146040Stjr const char *__restrict string, size_t nmatch, 267146040Stjr regmatch_t pmatch[], int eflags) 268146040Stjr{ 269146040Stjr return regexec (preg, string, nmatch, pmatch, 270146040Stjr eflags & (REG_NOTBOL | REG_NOTEOL)); 271146040Stjr} 272146040Stjrcompat_symbol (libc, __compat_regexec, regexec, GLIBC_2_0); 273146040Stjr# endif 274146040Stjr#endif 275146040Stjr 276146040Stjr/* Entry points for GNU code. */ 277146040Stjr 278146040Stjr/* re_match, re_search, re_match_2, re_search_2 279146040Stjr 280146040Stjr The former two functions operate on STRING with length LENGTH, 281146040Stjr while the later two operate on concatenation of STRING1 and STRING2 282146040Stjr with lengths LENGTH1 and LENGTH2, respectively. 283146040Stjr 284146040Stjr re_match() matches the compiled pattern in BUFP against the string, 285146040Stjr starting at index START. 286146040Stjr 287146040Stjr re_search() first tries matching at index START, then it tries to match 288146040Stjr starting from index START + 1, and so on. The last start position tried 289146040Stjr is START + RANGE. (Thus RANGE = 0 forces re_search to operate the same 290146040Stjr way as re_match().) 291146040Stjr 292146040Stjr The parameter STOP of re_{match,search}_2 specifies that no match exceeding 293146040Stjr the first STOP characters of the concatenation of the strings should be 294146040Stjr concerned. 295146040Stjr 296146040Stjr If REGS is not NULL, and BUFP->no_sub is not set, the offsets of the match 297146040Stjr and all groups is stroed in REGS. (For the "_2" variants, the offsets are 298146040Stjr computed relative to the concatenation, not relative to the individual 299146040Stjr strings.) 300146040Stjr 301146040Stjr On success, re_match* functions return the length of the match, re_search* 302146040Stjr return the position of the start of the match. Return value -1 means no 303146040Stjr match was found and -2 indicates an internal error. */ 304146040Stjr 305146040Stjrint 306146040Stjrre_match (bufp, string, length, start, regs) 307146040Stjr struct re_pattern_buffer *bufp; 308146040Stjr const char *string; 309146040Stjr int length, start; 310146040Stjr struct re_registers *regs; 311146040Stjr{ 312146040Stjr return re_search_stub (bufp, string, length, start, 0, length, regs, 1); 313146040Stjr} 314146040Stjr#ifdef _LIBC 315146040Stjrweak_alias (__re_match, re_match) 316146040Stjr#endif 317146040Stjr 318146040Stjrint 319146040Stjrre_search (bufp, string, length, start, range, regs) 320146040Stjr struct re_pattern_buffer *bufp; 321146040Stjr const char *string; 322146040Stjr int length, start, range; 323146040Stjr struct re_registers *regs; 324146040Stjr{ 325146040Stjr return re_search_stub (bufp, string, length, start, range, length, regs, 0); 326146040Stjr} 327146040Stjr#ifdef _LIBC 328146040Stjrweak_alias (__re_search, re_search) 329146040Stjr#endif 330146040Stjr 331146040Stjrint 332146040Stjrre_match_2 (bufp, string1, length1, string2, length2, start, regs, stop) 333146040Stjr struct re_pattern_buffer *bufp; 334146040Stjr const char *string1, *string2; 335146040Stjr int length1, length2, start, stop; 336146040Stjr struct re_registers *regs; 337146040Stjr{ 338146040Stjr return re_search_2_stub (bufp, string1, length1, string2, length2, 339146040Stjr start, 0, regs, stop, 1); 340146040Stjr} 341146040Stjr#ifdef _LIBC 342146040Stjrweak_alias (__re_match_2, re_match_2) 343146040Stjr#endif 344146040Stjr 345146040Stjrint 346146040Stjrre_search_2 (bufp, string1, length1, string2, length2, start, range, regs, stop) 347146040Stjr struct re_pattern_buffer *bufp; 348146040Stjr const char *string1, *string2; 349146040Stjr int length1, length2, start, range, stop; 350146040Stjr struct re_registers *regs; 351146040Stjr{ 352146040Stjr return re_search_2_stub (bufp, string1, length1, string2, length2, 353146040Stjr start, range, regs, stop, 0); 354146040Stjr} 355146040Stjr#ifdef _LIBC 356146040Stjrweak_alias (__re_search_2, re_search_2) 357146040Stjr#endif 358146040Stjr 359146040Stjrstatic int 360146040Stjrre_search_2_stub (bufp, string1, length1, string2, length2, start, range, regs, 361146040Stjr stop, ret_len) 362146040Stjr struct re_pattern_buffer *bufp; 363146040Stjr const char *string1, *string2; 364146040Stjr int length1, length2, start, range, stop, ret_len; 365146040Stjr struct re_registers *regs; 366146040Stjr{ 367146040Stjr const char *str; 368146040Stjr int rval; 369146040Stjr int len = length1 + length2; 370250724Sjkim char *s = NULL; 371146040Stjr 372250724Sjkim if (BE (length1 < 0 || length2 < 0 || stop < 0 || len < length1, 0)) 373146040Stjr return -2; 374146040Stjr 375146040Stjr /* Concatenate the strings. */ 376146040Stjr if (length2 > 0) 377146040Stjr if (length1 > 0) 378146040Stjr { 379250724Sjkim s = re_malloc (char, len); 380146040Stjr 381146040Stjr if (BE (s == NULL, 0)) 382146040Stjr return -2; 383250724Sjkim#ifdef _LIBC 384250724Sjkim memcpy (__mempcpy (s, string1, length1), string2, length2); 385250724Sjkim#else 386146040Stjr memcpy (s, string1, length1); 387146040Stjr memcpy (s + length1, string2, length2); 388250724Sjkim#endif 389146040Stjr str = s; 390146040Stjr } 391146040Stjr else 392146040Stjr str = string2; 393146040Stjr else 394146040Stjr str = string1; 395146040Stjr 396250724Sjkim rval = re_search_stub (bufp, str, len, start, range, stop, regs, ret_len); 397250724Sjkim re_free (s); 398146040Stjr return rval; 399146040Stjr} 400146040Stjr 401146040Stjr/* The parameters have the same meaning as those of re_search. 402146040Stjr Additional parameters: 403146040Stjr If RET_LEN is nonzero the length of the match is returned (re_match style); 404146040Stjr otherwise the position of the match is returned. */ 405146040Stjr 406146040Stjrstatic int 407146040Stjrre_search_stub (bufp, string, length, start, range, stop, regs, ret_len) 408146040Stjr struct re_pattern_buffer *bufp; 409146040Stjr const char *string; 410146040Stjr int length, start, range, stop, ret_len; 411146040Stjr struct re_registers *regs; 412146040Stjr{ 413146040Stjr reg_errcode_t result; 414146040Stjr regmatch_t *pmatch; 415146040Stjr int nregs, rval; 416146040Stjr int eflags = 0; 417250724Sjkim re_dfa_t *dfa = (re_dfa_t *) bufp->buffer; 418146040Stjr 419146040Stjr /* Check for out-of-range. */ 420146040Stjr if (BE (start < 0 || start > length, 0)) 421146040Stjr return -1; 422146040Stjr if (BE (start + range > length, 0)) 423146040Stjr range = length - start; 424146040Stjr else if (BE (start + range < 0, 0)) 425146040Stjr range = -start; 426146040Stjr 427250724Sjkim __libc_lock_lock (dfa->lock); 428250724Sjkim 429146040Stjr eflags |= (bufp->not_bol) ? REG_NOTBOL : 0; 430146040Stjr eflags |= (bufp->not_eol) ? REG_NOTEOL : 0; 431146040Stjr 432146040Stjr /* Compile fastmap if we haven't yet. */ 433146040Stjr if (range > 0 && bufp->fastmap != NULL && !bufp->fastmap_accurate) 434146040Stjr re_compile_fastmap (bufp); 435146040Stjr 436146040Stjr if (BE (bufp->no_sub, 0)) 437146040Stjr regs = NULL; 438146040Stjr 439146040Stjr /* We need at least 1 register. */ 440146040Stjr if (regs == NULL) 441146040Stjr nregs = 1; 442146040Stjr else if (BE (bufp->regs_allocated == REGS_FIXED && 443146040Stjr regs->num_regs < bufp->re_nsub + 1, 0)) 444146040Stjr { 445146040Stjr nregs = regs->num_regs; 446146040Stjr if (BE (nregs < 1, 0)) 447146040Stjr { 448146040Stjr /* Nothing can be copied to regs. */ 449146040Stjr regs = NULL; 450146040Stjr nregs = 1; 451146040Stjr } 452146040Stjr } 453146040Stjr else 454146040Stjr nregs = bufp->re_nsub + 1; 455146040Stjr pmatch = re_malloc (regmatch_t, nregs); 456146040Stjr if (BE (pmatch == NULL, 0)) 457250724Sjkim { 458250724Sjkim rval = -2; 459250724Sjkim goto out; 460250724Sjkim } 461146040Stjr 462146040Stjr result = re_search_internal (bufp, string, length, start, range, stop, 463146040Stjr nregs, pmatch, eflags); 464146040Stjr 465146040Stjr rval = 0; 466146040Stjr 467146040Stjr /* I hope we needn't fill ther regs with -1's when no match was found. */ 468146040Stjr if (result != REG_NOERROR) 469146040Stjr rval = -1; 470146040Stjr else if (regs != NULL) 471146040Stjr { 472146040Stjr /* If caller wants register contents data back, copy them. */ 473146040Stjr bufp->regs_allocated = re_copy_regs (regs, pmatch, nregs, 474146040Stjr bufp->regs_allocated); 475146040Stjr if (BE (bufp->regs_allocated == REGS_UNALLOCATED, 0)) 476146040Stjr rval = -2; 477146040Stjr } 478146040Stjr 479146040Stjr if (BE (rval == 0, 1)) 480146040Stjr { 481146040Stjr if (ret_len) 482146040Stjr { 483146040Stjr assert (pmatch[0].rm_so == start); 484146040Stjr rval = pmatch[0].rm_eo - start; 485146040Stjr } 486146040Stjr else 487146040Stjr rval = pmatch[0].rm_so; 488146040Stjr } 489146040Stjr re_free (pmatch); 490250724Sjkim out: 491250724Sjkim __libc_lock_unlock (dfa->lock); 492146040Stjr return rval; 493146040Stjr} 494146040Stjr 495146040Stjrstatic unsigned 496146040Stjrre_copy_regs (regs, pmatch, nregs, regs_allocated) 497146040Stjr struct re_registers *regs; 498146040Stjr regmatch_t *pmatch; 499146040Stjr int nregs, regs_allocated; 500146040Stjr{ 501146040Stjr int rval = REGS_REALLOCATE; 502146040Stjr int i; 503146040Stjr int need_regs = nregs + 1; 504146040Stjr /* We need one extra element beyond `num_regs' for the `-1' marker GNU code 505146040Stjr uses. */ 506146040Stjr 507146040Stjr /* Have the register data arrays been allocated? */ 508146040Stjr if (regs_allocated == REGS_UNALLOCATED) 509146040Stjr { /* No. So allocate them with malloc. */ 510146040Stjr regs->start = re_malloc (regoff_t, need_regs); 511250724Sjkim if (BE (regs->start == NULL, 0)) 512250724Sjkim return REGS_UNALLOCATED; 513146040Stjr regs->end = re_malloc (regoff_t, need_regs); 514250724Sjkim if (BE (regs->end == NULL, 0)) 515250724Sjkim { 516250724Sjkim re_free (regs->start); 517250724Sjkim return REGS_UNALLOCATED; 518250724Sjkim } 519146040Stjr regs->num_regs = need_regs; 520146040Stjr } 521146040Stjr else if (regs_allocated == REGS_REALLOCATE) 522146040Stjr { /* Yes. If we need more elements than were already 523146040Stjr allocated, reallocate them. If we need fewer, just 524146040Stjr leave it alone. */ 525146040Stjr if (BE (need_regs > regs->num_regs, 0)) 526146040Stjr { 527146040Stjr regoff_t *new_start = re_realloc (regs->start, regoff_t, need_regs); 528250724Sjkim regoff_t *new_end; 529250724Sjkim if (BE (new_start == NULL, 0)) 530146040Stjr return REGS_UNALLOCATED; 531250724Sjkim new_end = re_realloc (regs->end, regoff_t, need_regs); 532250724Sjkim if (BE (new_end == NULL, 0)) 533250724Sjkim { 534250724Sjkim re_free (new_start); 535250724Sjkim return REGS_UNALLOCATED; 536250724Sjkim } 537146040Stjr regs->start = new_start; 538146040Stjr regs->end = new_end; 539146040Stjr regs->num_regs = need_regs; 540146040Stjr } 541146040Stjr } 542146040Stjr else 543146040Stjr { 544146040Stjr assert (regs_allocated == REGS_FIXED); 545146040Stjr /* This function may not be called with REGS_FIXED and nregs too big. */ 546146040Stjr assert (regs->num_regs >= nregs); 547146040Stjr rval = REGS_FIXED; 548146040Stjr } 549146040Stjr 550146040Stjr /* Copy the regs. */ 551146040Stjr for (i = 0; i < nregs; ++i) 552146040Stjr { 553146040Stjr regs->start[i] = pmatch[i].rm_so; 554146040Stjr regs->end[i] = pmatch[i].rm_eo; 555146040Stjr } 556146040Stjr for ( ; i < regs->num_regs; ++i) 557146040Stjr regs->start[i] = regs->end[i] = -1; 558146040Stjr 559146040Stjr return rval; 560146040Stjr} 561146040Stjr 562146040Stjr/* Set REGS to hold NUM_REGS registers, storing them in STARTS and 563146040Stjr ENDS. Subsequent matches using PATTERN_BUFFER and REGS will use 564146040Stjr this memory for recording register information. STARTS and ENDS 565146040Stjr must be allocated using the malloc library routine, and must each 566146040Stjr be at least NUM_REGS * sizeof (regoff_t) bytes long. 567146040Stjr 568146040Stjr If NUM_REGS == 0, then subsequent matches should allocate their own 569146040Stjr register data. 570146040Stjr 571146040Stjr Unless this function is called, the first search or match using 572146040Stjr PATTERN_BUFFER will allocate its own register data, without 573146040Stjr freeing the old data. */ 574146040Stjr 575146040Stjrvoid 576146040Stjrre_set_registers (bufp, regs, num_regs, starts, ends) 577146040Stjr struct re_pattern_buffer *bufp; 578146040Stjr struct re_registers *regs; 579146040Stjr unsigned num_regs; 580146040Stjr regoff_t *starts, *ends; 581146040Stjr{ 582146040Stjr if (num_regs) 583146040Stjr { 584146040Stjr bufp->regs_allocated = REGS_REALLOCATE; 585146040Stjr regs->num_regs = num_regs; 586146040Stjr regs->start = starts; 587146040Stjr regs->end = ends; 588146040Stjr } 589146040Stjr else 590146040Stjr { 591146040Stjr bufp->regs_allocated = REGS_UNALLOCATED; 592146040Stjr regs->num_regs = 0; 593146040Stjr regs->start = regs->end = (regoff_t *) 0; 594146040Stjr } 595146040Stjr} 596146040Stjr#ifdef _LIBC 597146040Stjrweak_alias (__re_set_registers, re_set_registers) 598146040Stjr#endif 599146040Stjr 600146040Stjr/* Entry points compatible with 4.2 BSD regex library. We don't define 601146040Stjr them unless specifically requested. */ 602146040Stjr 603146040Stjr#if defined _REGEX_RE_COMP || defined _LIBC 604146040Stjrint 605146040Stjr# ifdef _LIBC 606146040Stjrweak_function 607146040Stjr# endif 608146040Stjrre_exec (s) 609146040Stjr const char *s; 610146040Stjr{ 611146040Stjr return 0 == regexec (&re_comp_buf, s, 0, NULL, 0); 612146040Stjr} 613146040Stjr#endif /* _REGEX_RE_COMP */ 614146040Stjr 615146040Stjr/* Internal entry point. */ 616146040Stjr 617146040Stjr/* Searches for a compiled pattern PREG in the string STRING, whose 618146040Stjr length is LENGTH. NMATCH, PMATCH, and EFLAGS have the same 619146040Stjr mingings with regexec. START, and RANGE have the same meanings 620146040Stjr with re_search. 621146040Stjr Return REG_NOERROR if we find a match, and REG_NOMATCH if not, 622146040Stjr otherwise return the error code. 623146040Stjr Note: We assume front end functions already check ranges. 624146040Stjr (START + RANGE >= 0 && START + RANGE <= LENGTH) */ 625146040Stjr 626146040Stjrstatic reg_errcode_t 627250724Sjkim__attribute_warn_unused_result__ 628146040Stjrre_search_internal (preg, string, length, start, range, stop, nmatch, pmatch, 629146040Stjr eflags) 630146040Stjr const regex_t *preg; 631146040Stjr const char *string; 632146040Stjr int length, start, range, stop, eflags; 633146040Stjr size_t nmatch; 634146040Stjr regmatch_t pmatch[]; 635146040Stjr{ 636146040Stjr reg_errcode_t err; 637250724Sjkim const re_dfa_t *dfa = (const re_dfa_t *) preg->buffer; 638146040Stjr int left_lim, right_lim, incr; 639146040Stjr int fl_longest_match, match_first, match_kind, match_last = -1; 640146040Stjr int extra_nmatch; 641146040Stjr int sb, ch; 642146040Stjr#if defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L) 643146040Stjr re_match_context_t mctx = { .dfa = dfa }; 644146040Stjr#else 645146040Stjr re_match_context_t mctx; 646146040Stjr#endif 647146040Stjr char *fastmap = (preg->fastmap != NULL && preg->fastmap_accurate 648146040Stjr && range && !preg->can_be_null) ? preg->fastmap : NULL; 649250724Sjkim RE_TRANSLATE_TYPE t = preg->translate; 650146040Stjr 651146040Stjr#if !(defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L)) 652146040Stjr memset (&mctx, '\0', sizeof (re_match_context_t)); 653146040Stjr mctx.dfa = dfa; 654146040Stjr#endif 655146040Stjr 656146040Stjr extra_nmatch = (nmatch > preg->re_nsub) ? nmatch - (preg->re_nsub + 1) : 0; 657146040Stjr nmatch -= extra_nmatch; 658146040Stjr 659146040Stjr /* Check if the DFA haven't been compiled. */ 660146040Stjr if (BE (preg->used == 0 || dfa->init_state == NULL 661146040Stjr || dfa->init_state_word == NULL || dfa->init_state_nl == NULL 662146040Stjr || dfa->init_state_begbuf == NULL, 0)) 663146040Stjr return REG_NOMATCH; 664146040Stjr 665146040Stjr#ifdef DEBUG 666146040Stjr /* We assume front-end functions already check them. */ 667146040Stjr assert (start + range >= 0 && start + range <= length); 668146040Stjr#endif 669146040Stjr 670146040Stjr /* If initial states with non-begbuf contexts have no elements, 671146040Stjr the regex must be anchored. If preg->newline_anchor is set, 672146040Stjr we'll never use init_state_nl, so do not check it. */ 673146040Stjr if (dfa->init_state->nodes.nelem == 0 674146040Stjr && dfa->init_state_word->nodes.nelem == 0 675146040Stjr && (dfa->init_state_nl->nodes.nelem == 0 676146040Stjr || !preg->newline_anchor)) 677146040Stjr { 678146040Stjr if (start != 0 && start + range != 0) 679250724Sjkim return REG_NOMATCH; 680146040Stjr start = range = 0; 681146040Stjr } 682146040Stjr 683146040Stjr /* We must check the longest matching, if nmatch > 0. */ 684146040Stjr fl_longest_match = (nmatch != 0 || dfa->nbackref); 685146040Stjr 686146040Stjr err = re_string_allocate (&mctx.input, string, length, dfa->nodes_len + 1, 687146040Stjr preg->translate, preg->syntax & RE_ICASE, dfa); 688146040Stjr if (BE (err != REG_NOERROR, 0)) 689146040Stjr goto free_return; 690146040Stjr mctx.input.stop = stop; 691146040Stjr mctx.input.raw_stop = stop; 692146040Stjr mctx.input.newline_anchor = preg->newline_anchor; 693146040Stjr 694146040Stjr err = match_ctx_init (&mctx, eflags, dfa->nbackref * 2); 695146040Stjr if (BE (err != REG_NOERROR, 0)) 696146040Stjr goto free_return; 697146040Stjr 698146040Stjr /* We will log all the DFA states through which the dfa pass, 699146040Stjr if nmatch > 1, or this dfa has "multibyte node", which is a 700146040Stjr back-reference or a node which can accept multibyte character or 701146040Stjr multi character collating element. */ 702146040Stjr if (nmatch > 1 || dfa->has_mb_node) 703146040Stjr { 704250724Sjkim /* Avoid overflow. */ 705250724Sjkim if (BE (SIZE_MAX / sizeof (re_dfastate_t *) <= mctx.input.bufs_len, 0)) 706250724Sjkim { 707250724Sjkim err = REG_ESPACE; 708250724Sjkim goto free_return; 709250724Sjkim } 710250724Sjkim 711146040Stjr mctx.state_log = re_malloc (re_dfastate_t *, mctx.input.bufs_len + 1); 712146040Stjr if (BE (mctx.state_log == NULL, 0)) 713146040Stjr { 714146040Stjr err = REG_ESPACE; 715146040Stjr goto free_return; 716146040Stjr } 717146040Stjr } 718146040Stjr else 719146040Stjr mctx.state_log = NULL; 720146040Stjr 721146040Stjr match_first = start; 722146040Stjr mctx.input.tip_context = (eflags & REG_NOTBOL) ? CONTEXT_BEGBUF 723146040Stjr : CONTEXT_NEWLINE | CONTEXT_BEGBUF; 724146040Stjr 725146040Stjr /* Check incrementally whether of not the input string match. */ 726146040Stjr incr = (range < 0) ? -1 : 1; 727146040Stjr left_lim = (range < 0) ? start + range : start; 728146040Stjr right_lim = (range < 0) ? start : start + range; 729146040Stjr sb = dfa->mb_cur_max == 1; 730146040Stjr match_kind = 731146040Stjr (fastmap 732146040Stjr ? ((sb || !(preg->syntax & RE_ICASE || t) ? 4 : 0) 733146040Stjr | (range >= 0 ? 2 : 0) 734146040Stjr | (t != NULL ? 1 : 0)) 735146040Stjr : 8); 736146040Stjr 737146040Stjr for (;; match_first += incr) 738146040Stjr { 739146040Stjr err = REG_NOMATCH; 740146040Stjr if (match_first < left_lim || right_lim < match_first) 741146040Stjr goto free_return; 742146040Stjr 743146040Stjr /* Advance as rapidly as possible through the string, until we 744146040Stjr find a plausible place to start matching. This may be done 745146040Stjr with varying efficiency, so there are various possibilities: 746146040Stjr only the most common of them are specialized, in order to 747146040Stjr save on code size. We use a switch statement for speed. */ 748146040Stjr switch (match_kind) 749146040Stjr { 750146040Stjr case 8: 751146040Stjr /* No fastmap. */ 752146040Stjr break; 753146040Stjr 754146040Stjr case 7: 755146040Stjr /* Fastmap with single-byte translation, match forward. */ 756146040Stjr while (BE (match_first < right_lim, 1) 757146040Stjr && !fastmap[t[(unsigned char) string[match_first]]]) 758146040Stjr ++match_first; 759146040Stjr goto forward_match_found_start_or_reached_end; 760146040Stjr 761146040Stjr case 6: 762146040Stjr /* Fastmap without translation, match forward. */ 763146040Stjr while (BE (match_first < right_lim, 1) 764146040Stjr && !fastmap[(unsigned char) string[match_first]]) 765146040Stjr ++match_first; 766146040Stjr 767146040Stjr forward_match_found_start_or_reached_end: 768146040Stjr if (BE (match_first == right_lim, 0)) 769146040Stjr { 770146040Stjr ch = match_first >= length 771146040Stjr ? 0 : (unsigned char) string[match_first]; 772146040Stjr if (!fastmap[t ? t[ch] : ch]) 773146040Stjr goto free_return; 774146040Stjr } 775146040Stjr break; 776146040Stjr 777146040Stjr case 4: 778146040Stjr case 5: 779146040Stjr /* Fastmap without multi-byte translation, match backwards. */ 780146040Stjr while (match_first >= left_lim) 781146040Stjr { 782146040Stjr ch = match_first >= length 783146040Stjr ? 0 : (unsigned char) string[match_first]; 784146040Stjr if (fastmap[t ? t[ch] : ch]) 785146040Stjr break; 786146040Stjr --match_first; 787146040Stjr } 788146040Stjr if (match_first < left_lim) 789146040Stjr goto free_return; 790146040Stjr break; 791146040Stjr 792146040Stjr default: 793146040Stjr /* In this case, we can't determine easily the current byte, 794146040Stjr since it might be a component byte of a multibyte 795146040Stjr character. Then we use the constructed buffer instead. */ 796146040Stjr for (;;) 797146040Stjr { 798146040Stjr /* If MATCH_FIRST is out of the valid range, reconstruct the 799146040Stjr buffers. */ 800146040Stjr unsigned int offset = match_first - mctx.input.raw_mbs_idx; 801146040Stjr if (BE (offset >= (unsigned int) mctx.input.valid_raw_len, 0)) 802146040Stjr { 803146040Stjr err = re_string_reconstruct (&mctx.input, match_first, 804146040Stjr eflags); 805146040Stjr if (BE (err != REG_NOERROR, 0)) 806146040Stjr goto free_return; 807146040Stjr 808146040Stjr offset = match_first - mctx.input.raw_mbs_idx; 809146040Stjr } 810146040Stjr /* If MATCH_FIRST is out of the buffer, leave it as '\0'. 811146040Stjr Note that MATCH_FIRST must not be smaller than 0. */ 812146040Stjr ch = (match_first >= length 813146040Stjr ? 0 : re_string_byte_at (&mctx.input, offset)); 814146040Stjr if (fastmap[ch]) 815146040Stjr break; 816146040Stjr match_first += incr; 817146040Stjr if (match_first < left_lim || match_first > right_lim) 818250724Sjkim { 819250724Sjkim err = REG_NOMATCH; 820250724Sjkim goto free_return; 821250724Sjkim } 822146040Stjr } 823146040Stjr break; 824146040Stjr } 825146040Stjr 826146040Stjr /* Reconstruct the buffers so that the matcher can assume that 827146040Stjr the matching starts from the beginning of the buffer. */ 828146040Stjr err = re_string_reconstruct (&mctx.input, match_first, eflags); 829146040Stjr if (BE (err != REG_NOERROR, 0)) 830146040Stjr goto free_return; 831146040Stjr 832146040Stjr#ifdef RE_ENABLE_I18N 833146040Stjr /* Don't consider this char as a possible match start if it part, 834146040Stjr yet isn't the head, of a multibyte character. */ 835146040Stjr if (!sb && !re_string_first_byte (&mctx.input, 0)) 836146040Stjr continue; 837146040Stjr#endif 838146040Stjr 839146040Stjr /* It seems to be appropriate one, then use the matcher. */ 840146040Stjr /* We assume that the matching starts from 0. */ 841146040Stjr mctx.state_log_top = mctx.nbkref_ents = mctx.max_mb_elem_len = 0; 842146040Stjr match_last = check_matching (&mctx, fl_longest_match, 843146040Stjr range >= 0 ? &match_first : NULL); 844146040Stjr if (match_last != -1) 845146040Stjr { 846146040Stjr if (BE (match_last == -2, 0)) 847146040Stjr { 848146040Stjr err = REG_ESPACE; 849146040Stjr goto free_return; 850146040Stjr } 851146040Stjr else 852146040Stjr { 853146040Stjr mctx.match_last = match_last; 854146040Stjr if ((!preg->no_sub && nmatch > 1) || dfa->nbackref) 855146040Stjr { 856146040Stjr re_dfastate_t *pstate = mctx.state_log[match_last]; 857146040Stjr mctx.last_node = check_halt_state_context (&mctx, pstate, 858146040Stjr match_last); 859146040Stjr } 860146040Stjr if ((!preg->no_sub && nmatch > 1 && dfa->has_plural_match) 861146040Stjr || dfa->nbackref) 862146040Stjr { 863146040Stjr err = prune_impossible_nodes (&mctx); 864146040Stjr if (err == REG_NOERROR) 865146040Stjr break; 866146040Stjr if (BE (err != REG_NOMATCH, 0)) 867146040Stjr goto free_return; 868146040Stjr match_last = -1; 869146040Stjr } 870146040Stjr else 871146040Stjr break; /* We found a match. */ 872146040Stjr } 873146040Stjr } 874146040Stjr 875146040Stjr match_ctx_clean (&mctx); 876146040Stjr } 877146040Stjr 878146040Stjr#ifdef DEBUG 879146040Stjr assert (match_last != -1); 880146040Stjr assert (err == REG_NOERROR); 881146040Stjr#endif 882146040Stjr 883146040Stjr /* Set pmatch[] if we need. */ 884146040Stjr if (nmatch > 0) 885146040Stjr { 886146040Stjr int reg_idx; 887146040Stjr 888146040Stjr /* Initialize registers. */ 889146040Stjr for (reg_idx = 1; reg_idx < nmatch; ++reg_idx) 890146040Stjr pmatch[reg_idx].rm_so = pmatch[reg_idx].rm_eo = -1; 891146040Stjr 892146040Stjr /* Set the points where matching start/end. */ 893146040Stjr pmatch[0].rm_so = 0; 894146040Stjr pmatch[0].rm_eo = mctx.match_last; 895146040Stjr 896146040Stjr if (!preg->no_sub && nmatch > 1) 897146040Stjr { 898146040Stjr err = set_regs (preg, &mctx, nmatch, pmatch, 899146040Stjr dfa->has_plural_match && dfa->nbackref > 0); 900146040Stjr if (BE (err != REG_NOERROR, 0)) 901146040Stjr goto free_return; 902146040Stjr } 903146040Stjr 904146040Stjr /* At last, add the offset to the each registers, since we slided 905146040Stjr the buffers so that we could assume that the matching starts 906146040Stjr from 0. */ 907146040Stjr for (reg_idx = 0; reg_idx < nmatch; ++reg_idx) 908146040Stjr if (pmatch[reg_idx].rm_so != -1) 909146040Stjr { 910146040Stjr#ifdef RE_ENABLE_I18N 911146040Stjr if (BE (mctx.input.offsets_needed != 0, 0)) 912146040Stjr { 913250724Sjkim pmatch[reg_idx].rm_so = 914250724Sjkim (pmatch[reg_idx].rm_so == mctx.input.valid_len 915250724Sjkim ? mctx.input.valid_raw_len 916250724Sjkim : mctx.input.offsets[pmatch[reg_idx].rm_so]); 917250724Sjkim pmatch[reg_idx].rm_eo = 918250724Sjkim (pmatch[reg_idx].rm_eo == mctx.input.valid_len 919250724Sjkim ? mctx.input.valid_raw_len 920250724Sjkim : mctx.input.offsets[pmatch[reg_idx].rm_eo]); 921146040Stjr } 922146040Stjr#else 923146040Stjr assert (mctx.input.offsets_needed == 0); 924146040Stjr#endif 925146040Stjr pmatch[reg_idx].rm_so += match_first; 926146040Stjr pmatch[reg_idx].rm_eo += match_first; 927146040Stjr } 928146040Stjr for (reg_idx = 0; reg_idx < extra_nmatch; ++reg_idx) 929146040Stjr { 930146040Stjr pmatch[nmatch + reg_idx].rm_so = -1; 931146040Stjr pmatch[nmatch + reg_idx].rm_eo = -1; 932146040Stjr } 933146040Stjr 934146040Stjr if (dfa->subexp_map) 935250724Sjkim for (reg_idx = 0; reg_idx + 1 < nmatch; reg_idx++) 936250724Sjkim if (dfa->subexp_map[reg_idx] != reg_idx) 937250724Sjkim { 938250724Sjkim pmatch[reg_idx + 1].rm_so 939250724Sjkim = pmatch[dfa->subexp_map[reg_idx] + 1].rm_so; 940250724Sjkim pmatch[reg_idx + 1].rm_eo 941250724Sjkim = pmatch[dfa->subexp_map[reg_idx] + 1].rm_eo; 942250724Sjkim } 943146040Stjr } 944146040Stjr 945146040Stjr free_return: 946146040Stjr re_free (mctx.state_log); 947146040Stjr if (dfa->nbackref) 948146040Stjr match_ctx_free (&mctx); 949146040Stjr re_string_destruct (&mctx.input); 950146040Stjr return err; 951146040Stjr} 952146040Stjr 953146040Stjrstatic reg_errcode_t 954250724Sjkim__attribute_warn_unused_result__ 955146040Stjrprune_impossible_nodes (mctx) 956146040Stjr re_match_context_t *mctx; 957146040Stjr{ 958250724Sjkim const re_dfa_t *const dfa = mctx->dfa; 959146040Stjr int halt_node, match_last; 960146040Stjr reg_errcode_t ret; 961146040Stjr re_dfastate_t **sifted_states; 962146040Stjr re_dfastate_t **lim_states = NULL; 963146040Stjr re_sift_context_t sctx; 964146040Stjr#ifdef DEBUG 965146040Stjr assert (mctx->state_log != NULL); 966146040Stjr#endif 967146040Stjr match_last = mctx->match_last; 968146040Stjr halt_node = mctx->last_node; 969250724Sjkim 970250724Sjkim /* Avoid overflow. */ 971250724Sjkim if (BE (SIZE_MAX / sizeof (re_dfastate_t *) <= match_last, 0)) 972250724Sjkim return REG_ESPACE; 973250724Sjkim 974146040Stjr sifted_states = re_malloc (re_dfastate_t *, match_last + 1); 975146040Stjr if (BE (sifted_states == NULL, 0)) 976146040Stjr { 977146040Stjr ret = REG_ESPACE; 978146040Stjr goto free_return; 979146040Stjr } 980146040Stjr if (dfa->nbackref) 981146040Stjr { 982146040Stjr lim_states = re_malloc (re_dfastate_t *, match_last + 1); 983146040Stjr if (BE (lim_states == NULL, 0)) 984146040Stjr { 985146040Stjr ret = REG_ESPACE; 986146040Stjr goto free_return; 987146040Stjr } 988146040Stjr while (1) 989146040Stjr { 990146040Stjr memset (lim_states, '\0', 991146040Stjr sizeof (re_dfastate_t *) * (match_last + 1)); 992146040Stjr sift_ctx_init (&sctx, sifted_states, lim_states, halt_node, 993146040Stjr match_last); 994146040Stjr ret = sift_states_backward (mctx, &sctx); 995146040Stjr re_node_set_free (&sctx.limits); 996146040Stjr if (BE (ret != REG_NOERROR, 0)) 997146040Stjr goto free_return; 998146040Stjr if (sifted_states[0] != NULL || lim_states[0] != NULL) 999146040Stjr break; 1000146040Stjr do 1001146040Stjr { 1002146040Stjr --match_last; 1003146040Stjr if (match_last < 0) 1004146040Stjr { 1005146040Stjr ret = REG_NOMATCH; 1006146040Stjr goto free_return; 1007146040Stjr } 1008146040Stjr } while (mctx->state_log[match_last] == NULL 1009146040Stjr || !mctx->state_log[match_last]->halt); 1010146040Stjr halt_node = check_halt_state_context (mctx, 1011146040Stjr mctx->state_log[match_last], 1012146040Stjr match_last); 1013146040Stjr } 1014146040Stjr ret = merge_state_array (dfa, sifted_states, lim_states, 1015146040Stjr match_last + 1); 1016146040Stjr re_free (lim_states); 1017146040Stjr lim_states = NULL; 1018146040Stjr if (BE (ret != REG_NOERROR, 0)) 1019146040Stjr goto free_return; 1020146040Stjr } 1021146040Stjr else 1022146040Stjr { 1023146040Stjr sift_ctx_init (&sctx, sifted_states, lim_states, halt_node, match_last); 1024146040Stjr ret = sift_states_backward (mctx, &sctx); 1025146040Stjr re_node_set_free (&sctx.limits); 1026146040Stjr if (BE (ret != REG_NOERROR, 0)) 1027146040Stjr goto free_return; 1028250724Sjkim if (sifted_states[0] == NULL) 1029250724Sjkim { 1030250724Sjkim ret = REG_NOMATCH; 1031250724Sjkim goto free_return; 1032250724Sjkim } 1033146040Stjr } 1034146040Stjr re_free (mctx->state_log); 1035146040Stjr mctx->state_log = sifted_states; 1036146040Stjr sifted_states = NULL; 1037146040Stjr mctx->last_node = halt_node; 1038146040Stjr mctx->match_last = match_last; 1039146040Stjr ret = REG_NOERROR; 1040146040Stjr free_return: 1041146040Stjr re_free (sifted_states); 1042146040Stjr re_free (lim_states); 1043146040Stjr return ret; 1044146040Stjr} 1045146040Stjr 1046146040Stjr/* Acquire an initial state and return it. 1047146040Stjr We must select appropriate initial state depending on the context, 1048146040Stjr since initial states may have constraints like "\<", "^", etc.. */ 1049146040Stjr 1050146040Stjrstatic inline re_dfastate_t * 1051250724Sjkim__attribute ((always_inline)) internal_function 1052250724Sjkimacquire_init_state_context (reg_errcode_t *err, const re_match_context_t *mctx, 1053250724Sjkim int idx) 1054146040Stjr{ 1055250724Sjkim const re_dfa_t *const dfa = mctx->dfa; 1056146040Stjr if (dfa->init_state->has_constraint) 1057146040Stjr { 1058146040Stjr unsigned int context; 1059146040Stjr context = re_string_context_at (&mctx->input, idx - 1, mctx->eflags); 1060146040Stjr if (IS_WORD_CONTEXT (context)) 1061146040Stjr return dfa->init_state_word; 1062146040Stjr else if (IS_ORDINARY_CONTEXT (context)) 1063146040Stjr return dfa->init_state; 1064146040Stjr else if (IS_BEGBUF_CONTEXT (context) && IS_NEWLINE_CONTEXT (context)) 1065146040Stjr return dfa->init_state_begbuf; 1066146040Stjr else if (IS_NEWLINE_CONTEXT (context)) 1067146040Stjr return dfa->init_state_nl; 1068146040Stjr else if (IS_BEGBUF_CONTEXT (context)) 1069146040Stjr { 1070146040Stjr /* It is relatively rare case, then calculate on demand. */ 1071146040Stjr return re_acquire_state_context (err, dfa, 1072146040Stjr dfa->init_state->entrance_nodes, 1073146040Stjr context); 1074146040Stjr } 1075146040Stjr else 1076146040Stjr /* Must not happen? */ 1077146040Stjr return dfa->init_state; 1078146040Stjr } 1079146040Stjr else 1080146040Stjr return dfa->init_state; 1081146040Stjr} 1082146040Stjr 1083146040Stjr/* Check whether the regular expression match input string INPUT or not, 1084146040Stjr and return the index where the matching end, return -1 if not match, 1085146040Stjr or return -2 in case of an error. 1086146040Stjr FL_LONGEST_MATCH means we want the POSIX longest matching. 1087146040Stjr If P_MATCH_FIRST is not NULL, and the match fails, it is set to the 1088146040Stjr next place where we may want to try matching. 1089146040Stjr Note that the matcher assume that the maching starts from the current 1090146040Stjr index of the buffer. */ 1091146040Stjr 1092146040Stjrstatic int 1093250724Sjkiminternal_function __attribute_warn_unused_result__ 1094250724Sjkimcheck_matching (re_match_context_t *mctx, int fl_longest_match, 1095250724Sjkim int *p_match_first) 1096146040Stjr{ 1097250724Sjkim const re_dfa_t *const dfa = mctx->dfa; 1098146040Stjr reg_errcode_t err; 1099146040Stjr int match = 0; 1100146040Stjr int match_last = -1; 1101146040Stjr int cur_str_idx = re_string_cur_idx (&mctx->input); 1102146040Stjr re_dfastate_t *cur_state; 1103146040Stjr int at_init_state = p_match_first != NULL; 1104146040Stjr int next_start_idx = cur_str_idx; 1105146040Stjr 1106146040Stjr err = REG_NOERROR; 1107146040Stjr cur_state = acquire_init_state_context (&err, mctx, cur_str_idx); 1108146040Stjr /* An initial state must not be NULL (invalid). */ 1109146040Stjr if (BE (cur_state == NULL, 0)) 1110146040Stjr { 1111146040Stjr assert (err == REG_ESPACE); 1112146040Stjr return -2; 1113146040Stjr } 1114146040Stjr 1115146040Stjr if (mctx->state_log != NULL) 1116146040Stjr { 1117146040Stjr mctx->state_log[cur_str_idx] = cur_state; 1118146040Stjr 1119146040Stjr /* Check OP_OPEN_SUBEXP in the initial state in case that we use them 1120146040Stjr later. E.g. Processing back references. */ 1121146040Stjr if (BE (dfa->nbackref, 0)) 1122146040Stjr { 1123146040Stjr at_init_state = 0; 1124146040Stjr err = check_subexp_matching_top (mctx, &cur_state->nodes, 0); 1125146040Stjr if (BE (err != REG_NOERROR, 0)) 1126146040Stjr return err; 1127146040Stjr 1128146040Stjr if (cur_state->has_backref) 1129146040Stjr { 1130146040Stjr err = transit_state_bkref (mctx, &cur_state->nodes); 1131146040Stjr if (BE (err != REG_NOERROR, 0)) 1132250724Sjkim return err; 1133146040Stjr } 1134146040Stjr } 1135146040Stjr } 1136146040Stjr 1137146040Stjr /* If the RE accepts NULL string. */ 1138146040Stjr if (BE (cur_state->halt, 0)) 1139146040Stjr { 1140146040Stjr if (!cur_state->has_constraint 1141146040Stjr || check_halt_state_context (mctx, cur_state, cur_str_idx)) 1142146040Stjr { 1143146040Stjr if (!fl_longest_match) 1144146040Stjr return cur_str_idx; 1145146040Stjr else 1146146040Stjr { 1147146040Stjr match_last = cur_str_idx; 1148146040Stjr match = 1; 1149146040Stjr } 1150146040Stjr } 1151146040Stjr } 1152146040Stjr 1153146040Stjr while (!re_string_eoi (&mctx->input)) 1154146040Stjr { 1155146040Stjr re_dfastate_t *old_state = cur_state; 1156146040Stjr int next_char_idx = re_string_cur_idx (&mctx->input) + 1; 1157146040Stjr 1158250724Sjkim if ((BE (next_char_idx >= mctx->input.bufs_len, 0) 1159250724Sjkim && mctx->input.bufs_len < mctx->input.len) 1160250724Sjkim || (BE (next_char_idx >= mctx->input.valid_len, 0) 1161250724Sjkim && mctx->input.valid_len < mctx->input.len)) 1162250724Sjkim { 1163251435Sjkim err = extend_buffers (mctx, next_char_idx + 1); 1164250724Sjkim if (BE (err != REG_NOERROR, 0)) 1165146040Stjr { 1166146040Stjr assert (err == REG_ESPACE); 1167146040Stjr return -2; 1168146040Stjr } 1169250724Sjkim } 1170146040Stjr 1171146040Stjr cur_state = transit_state (&err, mctx, cur_state); 1172146040Stjr if (mctx->state_log != NULL) 1173146040Stjr cur_state = merge_state_with_log (&err, mctx, cur_state); 1174146040Stjr 1175146040Stjr if (cur_state == NULL) 1176146040Stjr { 1177146040Stjr /* Reached the invalid state or an error. Try to recover a valid 1178146040Stjr state using the state log, if available and if we have not 1179146040Stjr already found a valid (even if not the longest) match. */ 1180146040Stjr if (BE (err != REG_NOERROR, 0)) 1181146040Stjr return -2; 1182146040Stjr 1183146040Stjr if (mctx->state_log == NULL 1184146040Stjr || (match && !fl_longest_match) 1185146040Stjr || (cur_state = find_recover_state (&err, mctx)) == NULL) 1186146040Stjr break; 1187146040Stjr } 1188146040Stjr 1189146040Stjr if (BE (at_init_state, 0)) 1190146040Stjr { 1191146040Stjr if (old_state == cur_state) 1192146040Stjr next_start_idx = next_char_idx; 1193146040Stjr else 1194146040Stjr at_init_state = 0; 1195146040Stjr } 1196146040Stjr 1197146040Stjr if (cur_state->halt) 1198146040Stjr { 1199146040Stjr /* Reached a halt state. 1200146040Stjr Check the halt state can satisfy the current context. */ 1201146040Stjr if (!cur_state->has_constraint 1202146040Stjr || check_halt_state_context (mctx, cur_state, 1203146040Stjr re_string_cur_idx (&mctx->input))) 1204146040Stjr { 1205146040Stjr /* We found an appropriate halt state. */ 1206146040Stjr match_last = re_string_cur_idx (&mctx->input); 1207146040Stjr match = 1; 1208146040Stjr 1209146040Stjr /* We found a match, do not modify match_first below. */ 1210146040Stjr p_match_first = NULL; 1211146040Stjr if (!fl_longest_match) 1212146040Stjr break; 1213146040Stjr } 1214146040Stjr } 1215146040Stjr } 1216146040Stjr 1217146040Stjr if (p_match_first) 1218146040Stjr *p_match_first += next_start_idx; 1219146040Stjr 1220146040Stjr return match_last; 1221146040Stjr} 1222146040Stjr 1223146040Stjr/* Check NODE match the current context. */ 1224146040Stjr 1225250724Sjkimstatic int 1226250724Sjkiminternal_function 1227250724Sjkimcheck_halt_node_context (const re_dfa_t *dfa, int node, unsigned int context) 1228146040Stjr{ 1229146040Stjr re_token_type_t type = dfa->nodes[node].type; 1230146040Stjr unsigned int constraint = dfa->nodes[node].constraint; 1231146040Stjr if (type != END_OF_RE) 1232146040Stjr return 0; 1233146040Stjr if (!constraint) 1234146040Stjr return 1; 1235146040Stjr if (NOT_SATISFY_NEXT_CONSTRAINT (constraint, context)) 1236146040Stjr return 0; 1237146040Stjr return 1; 1238146040Stjr} 1239146040Stjr 1240146040Stjr/* Check the halt state STATE match the current context. 1241146040Stjr Return 0 if not match, if the node, STATE has, is a halt node and 1242146040Stjr match the context, return the node. */ 1243146040Stjr 1244146040Stjrstatic int 1245250724Sjkiminternal_function 1246250724Sjkimcheck_halt_state_context (const re_match_context_t *mctx, 1247250724Sjkim const re_dfastate_t *state, int idx) 1248146040Stjr{ 1249146040Stjr int i; 1250146040Stjr unsigned int context; 1251146040Stjr#ifdef DEBUG 1252146040Stjr assert (state->halt); 1253146040Stjr#endif 1254146040Stjr context = re_string_context_at (&mctx->input, idx, mctx->eflags); 1255146040Stjr for (i = 0; i < state->nodes.nelem; ++i) 1256146040Stjr if (check_halt_node_context (mctx->dfa, state->nodes.elems[i], context)) 1257146040Stjr return state->nodes.elems[i]; 1258146040Stjr return 0; 1259146040Stjr} 1260146040Stjr 1261146040Stjr/* Compute the next node to which "NFA" transit from NODE("NFA" is a NFA 1262146040Stjr corresponding to the DFA). 1263146040Stjr Return the destination node, and update EPS_VIA_NODES, return -1 in case 1264146040Stjr of errors. */ 1265146040Stjr 1266146040Stjrstatic int 1267250724Sjkiminternal_function 1268250724Sjkimproceed_next_node (const re_match_context_t *mctx, int nregs, regmatch_t *regs, 1269250724Sjkim int *pidx, int node, re_node_set *eps_via_nodes, 1270250724Sjkim struct re_fail_stack_t *fs) 1271146040Stjr{ 1272250724Sjkim const re_dfa_t *const dfa = mctx->dfa; 1273250724Sjkim int i, err; 1274146040Stjr if (IS_EPSILON_NODE (dfa->nodes[node].type)) 1275146040Stjr { 1276146040Stjr re_node_set *cur_nodes = &mctx->state_log[*pidx]->nodes; 1277146040Stjr re_node_set *edests = &dfa->edests[node]; 1278146040Stjr int dest_node; 1279146040Stjr err = re_node_set_insert (eps_via_nodes, node); 1280146040Stjr if (BE (err < 0, 0)) 1281146040Stjr return -2; 1282146040Stjr /* Pick up a valid destination, or return -1 if none is found. */ 1283146040Stjr for (dest_node = -1, i = 0; i < edests->nelem; ++i) 1284146040Stjr { 1285146040Stjr int candidate = edests->elems[i]; 1286146040Stjr if (!re_node_set_contains (cur_nodes, candidate)) 1287146040Stjr continue; 1288250724Sjkim if (dest_node == -1) 1289146040Stjr dest_node = candidate; 1290146040Stjr 1291250724Sjkim else 1292146040Stjr { 1293146040Stjr /* In order to avoid infinite loop like "(a*)*", return the second 1294250724Sjkim epsilon-transition if the first was already considered. */ 1295146040Stjr if (re_node_set_contains (eps_via_nodes, dest_node)) 1296250724Sjkim return candidate; 1297146040Stjr 1298146040Stjr /* Otherwise, push the second epsilon-transition on the fail stack. */ 1299146040Stjr else if (fs != NULL 1300146040Stjr && push_fail_stack (fs, *pidx, candidate, nregs, regs, 1301250724Sjkim eps_via_nodes)) 1302146040Stjr return -2; 1303146040Stjr 1304146040Stjr /* We know we are going to exit. */ 1305146040Stjr break; 1306146040Stjr } 1307146040Stjr } 1308146040Stjr return dest_node; 1309146040Stjr } 1310146040Stjr else 1311146040Stjr { 1312146040Stjr int naccepted = 0; 1313146040Stjr re_token_type_t type = dfa->nodes[node].type; 1314146040Stjr 1315146040Stjr#ifdef RE_ENABLE_I18N 1316146040Stjr if (dfa->nodes[node].accept_mb) 1317146040Stjr naccepted = check_node_accept_bytes (dfa, node, &mctx->input, *pidx); 1318146040Stjr else 1319146040Stjr#endif /* RE_ENABLE_I18N */ 1320146040Stjr if (type == OP_BACK_REF) 1321146040Stjr { 1322146040Stjr int subexp_idx = dfa->nodes[node].opr.idx + 1; 1323146040Stjr naccepted = regs[subexp_idx].rm_eo - regs[subexp_idx].rm_so; 1324146040Stjr if (fs != NULL) 1325146040Stjr { 1326146040Stjr if (regs[subexp_idx].rm_so == -1 || regs[subexp_idx].rm_eo == -1) 1327146040Stjr return -1; 1328146040Stjr else if (naccepted) 1329146040Stjr { 1330146040Stjr char *buf = (char *) re_string_get_buffer (&mctx->input); 1331146040Stjr if (memcmp (buf + regs[subexp_idx].rm_so, buf + *pidx, 1332146040Stjr naccepted) != 0) 1333146040Stjr return -1; 1334146040Stjr } 1335146040Stjr } 1336146040Stjr 1337146040Stjr if (naccepted == 0) 1338146040Stjr { 1339250724Sjkim int dest_node; 1340146040Stjr err = re_node_set_insert (eps_via_nodes, node); 1341146040Stjr if (BE (err < 0, 0)) 1342146040Stjr return -2; 1343146040Stjr dest_node = dfa->edests[node].elems[0]; 1344146040Stjr if (re_node_set_contains (&mctx->state_log[*pidx]->nodes, 1345146040Stjr dest_node)) 1346146040Stjr return dest_node; 1347146040Stjr } 1348146040Stjr } 1349146040Stjr 1350146040Stjr if (naccepted != 0 1351146040Stjr || check_node_accept (mctx, dfa->nodes + node, *pidx)) 1352146040Stjr { 1353250724Sjkim int dest_node = dfa->nexts[node]; 1354146040Stjr *pidx = (naccepted == 0) ? *pidx + 1 : *pidx + naccepted; 1355146040Stjr if (fs && (*pidx > mctx->match_last || mctx->state_log[*pidx] == NULL 1356146040Stjr || !re_node_set_contains (&mctx->state_log[*pidx]->nodes, 1357146040Stjr dest_node))) 1358146040Stjr return -1; 1359146040Stjr re_node_set_empty (eps_via_nodes); 1360146040Stjr return dest_node; 1361146040Stjr } 1362146040Stjr } 1363146040Stjr return -1; 1364146040Stjr} 1365146040Stjr 1366146040Stjrstatic reg_errcode_t 1367250724Sjkiminternal_function __attribute_warn_unused_result__ 1368250724Sjkimpush_fail_stack (struct re_fail_stack_t *fs, int str_idx, int dest_node, 1369250724Sjkim int nregs, regmatch_t *regs, re_node_set *eps_via_nodes) 1370146040Stjr{ 1371146040Stjr reg_errcode_t err; 1372146040Stjr int num = fs->num++; 1373146040Stjr if (fs->num == fs->alloc) 1374146040Stjr { 1375146040Stjr struct re_fail_stack_ent_t *new_array; 1376146040Stjr new_array = realloc (fs->stack, (sizeof (struct re_fail_stack_ent_t) 1377146040Stjr * fs->alloc * 2)); 1378146040Stjr if (new_array == NULL) 1379146040Stjr return REG_ESPACE; 1380146040Stjr fs->alloc *= 2; 1381146040Stjr fs->stack = new_array; 1382146040Stjr } 1383146040Stjr fs->stack[num].idx = str_idx; 1384146040Stjr fs->stack[num].node = dest_node; 1385146040Stjr fs->stack[num].regs = re_malloc (regmatch_t, nregs); 1386146040Stjr if (fs->stack[num].regs == NULL) 1387146040Stjr return REG_ESPACE; 1388146040Stjr memcpy (fs->stack[num].regs, regs, sizeof (regmatch_t) * nregs); 1389146040Stjr err = re_node_set_init_copy (&fs->stack[num].eps_via_nodes, eps_via_nodes); 1390146040Stjr return err; 1391146040Stjr} 1392146040Stjr 1393146040Stjrstatic int 1394250724Sjkiminternal_function 1395250724Sjkimpop_fail_stack (struct re_fail_stack_t *fs, int *pidx, int nregs, 1396250724Sjkim regmatch_t *regs, re_node_set *eps_via_nodes) 1397146040Stjr{ 1398146040Stjr int num = --fs->num; 1399146040Stjr assert (num >= 0); 1400146040Stjr *pidx = fs->stack[num].idx; 1401146040Stjr memcpy (regs, fs->stack[num].regs, sizeof (regmatch_t) * nregs); 1402146040Stjr re_node_set_free (eps_via_nodes); 1403146040Stjr re_free (fs->stack[num].regs); 1404146040Stjr *eps_via_nodes = fs->stack[num].eps_via_nodes; 1405146040Stjr return fs->stack[num].node; 1406146040Stjr} 1407146040Stjr 1408146040Stjr/* Set the positions where the subexpressions are starts/ends to registers 1409146040Stjr PMATCH. 1410146040Stjr Note: We assume that pmatch[0] is already set, and 1411146040Stjr pmatch[i].rm_so == pmatch[i].rm_eo == -1 for 0 < i < nmatch. */ 1412146040Stjr 1413146040Stjrstatic reg_errcode_t 1414250724Sjkiminternal_function __attribute_warn_unused_result__ 1415250724Sjkimset_regs (const regex_t *preg, const re_match_context_t *mctx, size_t nmatch, 1416250724Sjkim regmatch_t *pmatch, int fl_backtrack) 1417146040Stjr{ 1418250724Sjkim const re_dfa_t *dfa = (const re_dfa_t *) preg->buffer; 1419146040Stjr int idx, cur_node; 1420146040Stjr re_node_set eps_via_nodes; 1421146040Stjr struct re_fail_stack_t *fs; 1422146040Stjr struct re_fail_stack_t fs_body = { 0, 2, NULL }; 1423146040Stjr regmatch_t *prev_idx_match; 1424250724Sjkim int prev_idx_match_malloced = 0; 1425146040Stjr 1426146040Stjr#ifdef DEBUG 1427146040Stjr assert (nmatch > 1); 1428146040Stjr assert (mctx->state_log != NULL); 1429146040Stjr#endif 1430146040Stjr if (fl_backtrack) 1431146040Stjr { 1432146040Stjr fs = &fs_body; 1433146040Stjr fs->stack = re_malloc (struct re_fail_stack_ent_t, fs->alloc); 1434146040Stjr if (fs->stack == NULL) 1435146040Stjr return REG_ESPACE; 1436146040Stjr } 1437146040Stjr else 1438146040Stjr fs = NULL; 1439146040Stjr 1440146040Stjr cur_node = dfa->init_node; 1441146040Stjr re_node_set_init_empty (&eps_via_nodes); 1442146040Stjr 1443250724Sjkim if (__libc_use_alloca (nmatch * sizeof (regmatch_t))) 1444250724Sjkim prev_idx_match = (regmatch_t *) alloca (nmatch * sizeof (regmatch_t)); 1445250724Sjkim else 1446250724Sjkim { 1447250724Sjkim prev_idx_match = re_malloc (regmatch_t, nmatch); 1448250724Sjkim if (prev_idx_match == NULL) 1449250724Sjkim { 1450250724Sjkim free_fail_stack_return (fs); 1451250724Sjkim return REG_ESPACE; 1452250724Sjkim } 1453250724Sjkim prev_idx_match_malloced = 1; 1454250724Sjkim } 1455146040Stjr memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch); 1456146040Stjr 1457146040Stjr for (idx = pmatch[0].rm_so; idx <= pmatch[0].rm_eo ;) 1458146040Stjr { 1459146040Stjr update_regs (dfa, pmatch, prev_idx_match, cur_node, idx, nmatch); 1460146040Stjr 1461146040Stjr if (idx == pmatch[0].rm_eo && cur_node == mctx->last_node) 1462146040Stjr { 1463146040Stjr int reg_idx; 1464146040Stjr if (fs) 1465146040Stjr { 1466146040Stjr for (reg_idx = 0; reg_idx < nmatch; ++reg_idx) 1467146040Stjr if (pmatch[reg_idx].rm_so > -1 && pmatch[reg_idx].rm_eo == -1) 1468146040Stjr break; 1469146040Stjr if (reg_idx == nmatch) 1470146040Stjr { 1471146040Stjr re_node_set_free (&eps_via_nodes); 1472250724Sjkim if (prev_idx_match_malloced) 1473250724Sjkim re_free (prev_idx_match); 1474146040Stjr return free_fail_stack_return (fs); 1475146040Stjr } 1476146040Stjr cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch, 1477146040Stjr &eps_via_nodes); 1478146040Stjr } 1479146040Stjr else 1480146040Stjr { 1481146040Stjr re_node_set_free (&eps_via_nodes); 1482250724Sjkim if (prev_idx_match_malloced) 1483250724Sjkim re_free (prev_idx_match); 1484146040Stjr return REG_NOERROR; 1485146040Stjr } 1486146040Stjr } 1487146040Stjr 1488146040Stjr /* Proceed to next node. */ 1489146040Stjr cur_node = proceed_next_node (mctx, nmatch, pmatch, &idx, cur_node, 1490146040Stjr &eps_via_nodes, fs); 1491146040Stjr 1492146040Stjr if (BE (cur_node < 0, 0)) 1493146040Stjr { 1494146040Stjr if (BE (cur_node == -2, 0)) 1495146040Stjr { 1496146040Stjr re_node_set_free (&eps_via_nodes); 1497250724Sjkim if (prev_idx_match_malloced) 1498250724Sjkim re_free (prev_idx_match); 1499146040Stjr free_fail_stack_return (fs); 1500146040Stjr return REG_ESPACE; 1501146040Stjr } 1502146040Stjr if (fs) 1503146040Stjr cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch, 1504146040Stjr &eps_via_nodes); 1505146040Stjr else 1506146040Stjr { 1507146040Stjr re_node_set_free (&eps_via_nodes); 1508250724Sjkim if (prev_idx_match_malloced) 1509250724Sjkim re_free (prev_idx_match); 1510146040Stjr return REG_NOMATCH; 1511146040Stjr } 1512146040Stjr } 1513146040Stjr } 1514146040Stjr re_node_set_free (&eps_via_nodes); 1515250724Sjkim if (prev_idx_match_malloced) 1516250724Sjkim re_free (prev_idx_match); 1517146040Stjr return free_fail_stack_return (fs); 1518146040Stjr} 1519146040Stjr 1520146040Stjrstatic reg_errcode_t 1521250724Sjkiminternal_function 1522250724Sjkimfree_fail_stack_return (struct re_fail_stack_t *fs) 1523146040Stjr{ 1524146040Stjr if (fs) 1525146040Stjr { 1526146040Stjr int fs_idx; 1527146040Stjr for (fs_idx = 0; fs_idx < fs->num; ++fs_idx) 1528146040Stjr { 1529146040Stjr re_node_set_free (&fs->stack[fs_idx].eps_via_nodes); 1530146040Stjr re_free (fs->stack[fs_idx].regs); 1531146040Stjr } 1532146040Stjr re_free (fs->stack); 1533146040Stjr } 1534146040Stjr return REG_NOERROR; 1535146040Stjr} 1536146040Stjr 1537146040Stjrstatic void 1538250724Sjkiminternal_function 1539250724Sjkimupdate_regs (const re_dfa_t *dfa, regmatch_t *pmatch, 1540250724Sjkim regmatch_t *prev_idx_match, int cur_node, int cur_idx, int nmatch) 1541146040Stjr{ 1542146040Stjr int type = dfa->nodes[cur_node].type; 1543146040Stjr if (type == OP_OPEN_SUBEXP) 1544146040Stjr { 1545146040Stjr int reg_num = dfa->nodes[cur_node].opr.idx + 1; 1546146040Stjr 1547146040Stjr /* We are at the first node of this sub expression. */ 1548146040Stjr if (reg_num < nmatch) 1549146040Stjr { 1550146040Stjr pmatch[reg_num].rm_so = cur_idx; 1551146040Stjr pmatch[reg_num].rm_eo = -1; 1552146040Stjr } 1553146040Stjr } 1554146040Stjr else if (type == OP_CLOSE_SUBEXP) 1555146040Stjr { 1556146040Stjr int reg_num = dfa->nodes[cur_node].opr.idx + 1; 1557146040Stjr if (reg_num < nmatch) 1558146040Stjr { 1559146040Stjr /* We are at the last node of this sub expression. */ 1560146040Stjr if (pmatch[reg_num].rm_so < cur_idx) 1561146040Stjr { 1562146040Stjr pmatch[reg_num].rm_eo = cur_idx; 1563146040Stjr /* This is a non-empty match or we are not inside an optional 1564146040Stjr subexpression. Accept this right away. */ 1565146040Stjr memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch); 1566146040Stjr } 1567146040Stjr else 1568146040Stjr { 1569146040Stjr if (dfa->nodes[cur_node].opt_subexp 1570146040Stjr && prev_idx_match[reg_num].rm_so != -1) 1571146040Stjr /* We transited through an empty match for an optional 1572146040Stjr subexpression, like (a?)*, and this is not the subexp's 1573146040Stjr first match. Copy back the old content of the registers 1574146040Stjr so that matches of an inner subexpression are undone as 1575146040Stjr well, like in ((a?))*. */ 1576146040Stjr memcpy (pmatch, prev_idx_match, sizeof (regmatch_t) * nmatch); 1577146040Stjr else 1578146040Stjr /* We completed a subexpression, but it may be part of 1579146040Stjr an optional one, so do not update PREV_IDX_MATCH. */ 1580146040Stjr pmatch[reg_num].rm_eo = cur_idx; 1581146040Stjr } 1582146040Stjr } 1583146040Stjr } 1584146040Stjr} 1585146040Stjr 1586146040Stjr/* This function checks the STATE_LOG from the SCTX->last_str_idx to 0 1587146040Stjr and sift the nodes in each states according to the following rules. 1588146040Stjr Updated state_log will be wrote to STATE_LOG. 1589146040Stjr 1590146040Stjr Rules: We throw away the Node `a' in the STATE_LOG[STR_IDX] if... 1591146040Stjr 1. When STR_IDX == MATCH_LAST(the last index in the state_log): 1592146040Stjr If `a' isn't the LAST_NODE and `a' can't epsilon transit to 1593146040Stjr the LAST_NODE, we throw away the node `a'. 1594146040Stjr 2. When 0 <= STR_IDX < MATCH_LAST and `a' accepts 1595146040Stjr string `s' and transit to `b': 1596146040Stjr i. If 'b' isn't in the STATE_LOG[STR_IDX+strlen('s')], we throw 1597146040Stjr away the node `a'. 1598146040Stjr ii. If 'b' is in the STATE_LOG[STR_IDX+strlen('s')] but 'b' is 1599146040Stjr thrown away, we throw away the node `a'. 1600146040Stjr 3. When 0 <= STR_IDX < MATCH_LAST and 'a' epsilon transit to 'b': 1601146040Stjr i. If 'b' isn't in the STATE_LOG[STR_IDX], we throw away the 1602146040Stjr node `a'. 1603146040Stjr ii. If 'b' is in the STATE_LOG[STR_IDX] but 'b' is thrown away, 1604146040Stjr we throw away the node `a'. */ 1605146040Stjr 1606146040Stjr#define STATE_NODE_CONTAINS(state,node) \ 1607146040Stjr ((state) != NULL && re_node_set_contains (&(state)->nodes, node)) 1608146040Stjr 1609146040Stjrstatic reg_errcode_t 1610250724Sjkiminternal_function 1611250724Sjkimsift_states_backward (const re_match_context_t *mctx, re_sift_context_t *sctx) 1612146040Stjr{ 1613146040Stjr reg_errcode_t err; 1614146040Stjr int null_cnt = 0; 1615146040Stjr int str_idx = sctx->last_str_idx; 1616146040Stjr re_node_set cur_dest; 1617146040Stjr 1618146040Stjr#ifdef DEBUG 1619146040Stjr assert (mctx->state_log != NULL && mctx->state_log[str_idx] != NULL); 1620146040Stjr#endif 1621146040Stjr 1622146040Stjr /* Build sifted state_log[str_idx]. It has the nodes which can epsilon 1623146040Stjr transit to the last_node and the last_node itself. */ 1624146040Stjr err = re_node_set_init_1 (&cur_dest, sctx->last_node); 1625146040Stjr if (BE (err != REG_NOERROR, 0)) 1626146040Stjr return err; 1627146040Stjr err = update_cur_sifted_state (mctx, sctx, str_idx, &cur_dest); 1628146040Stjr if (BE (err != REG_NOERROR, 0)) 1629146040Stjr goto free_return; 1630146040Stjr 1631146040Stjr /* Then check each states in the state_log. */ 1632146040Stjr while (str_idx > 0) 1633146040Stjr { 1634146040Stjr /* Update counters. */ 1635146040Stjr null_cnt = (sctx->sifted_states[str_idx] == NULL) ? null_cnt + 1 : 0; 1636146040Stjr if (null_cnt > mctx->max_mb_elem_len) 1637146040Stjr { 1638146040Stjr memset (sctx->sifted_states, '\0', 1639146040Stjr sizeof (re_dfastate_t *) * str_idx); 1640146040Stjr re_node_set_free (&cur_dest); 1641146040Stjr return REG_NOERROR; 1642146040Stjr } 1643146040Stjr re_node_set_empty (&cur_dest); 1644146040Stjr --str_idx; 1645146040Stjr 1646146040Stjr if (mctx->state_log[str_idx]) 1647146040Stjr { 1648146040Stjr err = build_sifted_states (mctx, sctx, str_idx, &cur_dest); 1649250724Sjkim if (BE (err != REG_NOERROR, 0)) 1650146040Stjr goto free_return; 1651146040Stjr } 1652146040Stjr 1653146040Stjr /* Add all the nodes which satisfy the following conditions: 1654146040Stjr - It can epsilon transit to a node in CUR_DEST. 1655146040Stjr - It is in CUR_SRC. 1656146040Stjr And update state_log. */ 1657146040Stjr err = update_cur_sifted_state (mctx, sctx, str_idx, &cur_dest); 1658146040Stjr if (BE (err != REG_NOERROR, 0)) 1659146040Stjr goto free_return; 1660146040Stjr } 1661146040Stjr err = REG_NOERROR; 1662146040Stjr free_return: 1663146040Stjr re_node_set_free (&cur_dest); 1664146040Stjr return err; 1665146040Stjr} 1666146040Stjr 1667146040Stjrstatic reg_errcode_t 1668250724Sjkiminternal_function __attribute_warn_unused_result__ 1669250724Sjkimbuild_sifted_states (const re_match_context_t *mctx, re_sift_context_t *sctx, 1670250724Sjkim int str_idx, re_node_set *cur_dest) 1671146040Stjr{ 1672250724Sjkim const re_dfa_t *const dfa = mctx->dfa; 1673250724Sjkim const re_node_set *cur_src = &mctx->state_log[str_idx]->non_eps_nodes; 1674146040Stjr int i; 1675146040Stjr 1676146040Stjr /* Then build the next sifted state. 1677146040Stjr We build the next sifted state on `cur_dest', and update 1678146040Stjr `sifted_states[str_idx]' with `cur_dest'. 1679146040Stjr Note: 1680146040Stjr `cur_dest' is the sifted state from `state_log[str_idx + 1]'. 1681146040Stjr `cur_src' points the node_set of the old `state_log[str_idx]' 1682146040Stjr (with the epsilon nodes pre-filtered out). */ 1683146040Stjr for (i = 0; i < cur_src->nelem; i++) 1684146040Stjr { 1685146040Stjr int prev_node = cur_src->elems[i]; 1686146040Stjr int naccepted = 0; 1687146040Stjr int ret; 1688146040Stjr 1689146040Stjr#ifdef DEBUG 1690146040Stjr re_token_type_t type = dfa->nodes[prev_node].type; 1691146040Stjr assert (!IS_EPSILON_NODE (type)); 1692146040Stjr#endif 1693146040Stjr#ifdef RE_ENABLE_I18N 1694146040Stjr /* If the node may accept `multi byte'. */ 1695146040Stjr if (dfa->nodes[prev_node].accept_mb) 1696146040Stjr naccepted = sift_states_iter_mb (mctx, sctx, prev_node, 1697146040Stjr str_idx, sctx->last_str_idx); 1698146040Stjr#endif /* RE_ENABLE_I18N */ 1699146040Stjr 1700146040Stjr /* We don't check backreferences here. 1701146040Stjr See update_cur_sifted_state(). */ 1702146040Stjr if (!naccepted 1703146040Stjr && check_node_accept (mctx, dfa->nodes + prev_node, str_idx) 1704146040Stjr && STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + 1], 1705146040Stjr dfa->nexts[prev_node])) 1706146040Stjr naccepted = 1; 1707146040Stjr 1708146040Stjr if (naccepted == 0) 1709146040Stjr continue; 1710146040Stjr 1711146040Stjr if (sctx->limits.nelem) 1712146040Stjr { 1713146040Stjr int to_idx = str_idx + naccepted; 1714146040Stjr if (check_dst_limits (mctx, &sctx->limits, 1715146040Stjr dfa->nexts[prev_node], to_idx, 1716146040Stjr prev_node, str_idx)) 1717146040Stjr continue; 1718146040Stjr } 1719146040Stjr ret = re_node_set_insert (cur_dest, prev_node); 1720146040Stjr if (BE (ret == -1, 0)) 1721146040Stjr return REG_ESPACE; 1722146040Stjr } 1723146040Stjr 1724146040Stjr return REG_NOERROR; 1725146040Stjr} 1726146040Stjr 1727146040Stjr/* Helper functions. */ 1728146040Stjr 1729146040Stjrstatic reg_errcode_t 1730250724Sjkiminternal_function 1731250724Sjkimclean_state_log_if_needed (re_match_context_t *mctx, int next_state_log_idx) 1732146040Stjr{ 1733146040Stjr int top = mctx->state_log_top; 1734146040Stjr 1735250724Sjkim if ((next_state_log_idx >= mctx->input.bufs_len 1736250724Sjkim && mctx->input.bufs_len < mctx->input.len) 1737146040Stjr || (next_state_log_idx >= mctx->input.valid_len 1738146040Stjr && mctx->input.valid_len < mctx->input.len)) 1739146040Stjr { 1740146040Stjr reg_errcode_t err; 1741251435Sjkim err = extend_buffers (mctx, next_state_log_idx + 1); 1742146040Stjr if (BE (err != REG_NOERROR, 0)) 1743146040Stjr return err; 1744146040Stjr } 1745146040Stjr 1746146040Stjr if (top < next_state_log_idx) 1747146040Stjr { 1748146040Stjr memset (mctx->state_log + top + 1, '\0', 1749146040Stjr sizeof (re_dfastate_t *) * (next_state_log_idx - top)); 1750146040Stjr mctx->state_log_top = next_state_log_idx; 1751146040Stjr } 1752146040Stjr return REG_NOERROR; 1753146040Stjr} 1754146040Stjr 1755146040Stjrstatic reg_errcode_t 1756250724Sjkiminternal_function 1757250724Sjkimmerge_state_array (const re_dfa_t *dfa, re_dfastate_t **dst, 1758250724Sjkim re_dfastate_t **src, int num) 1759146040Stjr{ 1760146040Stjr int st_idx; 1761146040Stjr reg_errcode_t err; 1762146040Stjr for (st_idx = 0; st_idx < num; ++st_idx) 1763146040Stjr { 1764146040Stjr if (dst[st_idx] == NULL) 1765146040Stjr dst[st_idx] = src[st_idx]; 1766146040Stjr else if (src[st_idx] != NULL) 1767146040Stjr { 1768146040Stjr re_node_set merged_set; 1769146040Stjr err = re_node_set_init_union (&merged_set, &dst[st_idx]->nodes, 1770146040Stjr &src[st_idx]->nodes); 1771146040Stjr if (BE (err != REG_NOERROR, 0)) 1772146040Stjr return err; 1773146040Stjr dst[st_idx] = re_acquire_state (&err, dfa, &merged_set); 1774146040Stjr re_node_set_free (&merged_set); 1775146040Stjr if (BE (err != REG_NOERROR, 0)) 1776146040Stjr return err; 1777146040Stjr } 1778146040Stjr } 1779146040Stjr return REG_NOERROR; 1780146040Stjr} 1781146040Stjr 1782146040Stjrstatic reg_errcode_t 1783250724Sjkiminternal_function 1784250724Sjkimupdate_cur_sifted_state (const re_match_context_t *mctx, 1785250724Sjkim re_sift_context_t *sctx, int str_idx, 1786250724Sjkim re_node_set *dest_nodes) 1787146040Stjr{ 1788250724Sjkim const re_dfa_t *const dfa = mctx->dfa; 1789250724Sjkim reg_errcode_t err = REG_NOERROR; 1790146040Stjr const re_node_set *candidates; 1791146040Stjr candidates = ((mctx->state_log[str_idx] == NULL) ? NULL 1792146040Stjr : &mctx->state_log[str_idx]->nodes); 1793146040Stjr 1794146040Stjr if (dest_nodes->nelem == 0) 1795146040Stjr sctx->sifted_states[str_idx] = NULL; 1796146040Stjr else 1797146040Stjr { 1798146040Stjr if (candidates) 1799146040Stjr { 1800146040Stjr /* At first, add the nodes which can epsilon transit to a node in 1801146040Stjr DEST_NODE. */ 1802146040Stjr err = add_epsilon_src_nodes (dfa, dest_nodes, candidates); 1803146040Stjr if (BE (err != REG_NOERROR, 0)) 1804146040Stjr return err; 1805146040Stjr 1806146040Stjr /* Then, check the limitations in the current sift_context. */ 1807146040Stjr if (sctx->limits.nelem) 1808146040Stjr { 1809146040Stjr err = check_subexp_limits (dfa, dest_nodes, candidates, &sctx->limits, 1810146040Stjr mctx->bkref_ents, str_idx); 1811146040Stjr if (BE (err != REG_NOERROR, 0)) 1812146040Stjr return err; 1813146040Stjr } 1814146040Stjr } 1815146040Stjr 1816146040Stjr sctx->sifted_states[str_idx] = re_acquire_state (&err, dfa, dest_nodes); 1817146040Stjr if (BE (err != REG_NOERROR, 0)) 1818146040Stjr return err; 1819146040Stjr } 1820146040Stjr 1821146040Stjr if (candidates && mctx->state_log[str_idx]->has_backref) 1822146040Stjr { 1823146040Stjr err = sift_states_bkref (mctx, sctx, str_idx, candidates); 1824146040Stjr if (BE (err != REG_NOERROR, 0)) 1825146040Stjr return err; 1826146040Stjr } 1827146040Stjr return REG_NOERROR; 1828146040Stjr} 1829146040Stjr 1830146040Stjrstatic reg_errcode_t 1831250724Sjkiminternal_function __attribute_warn_unused_result__ 1832250724Sjkimadd_epsilon_src_nodes (const re_dfa_t *dfa, re_node_set *dest_nodes, 1833250724Sjkim const re_node_set *candidates) 1834146040Stjr{ 1835146040Stjr reg_errcode_t err = REG_NOERROR; 1836146040Stjr int i; 1837146040Stjr 1838146040Stjr re_dfastate_t *state = re_acquire_state (&err, dfa, dest_nodes); 1839146040Stjr if (BE (err != REG_NOERROR, 0)) 1840146040Stjr return err; 1841146040Stjr 1842146040Stjr if (!state->inveclosure.alloc) 1843146040Stjr { 1844146040Stjr err = re_node_set_alloc (&state->inveclosure, dest_nodes->nelem); 1845146040Stjr if (BE (err != REG_NOERROR, 0)) 1846250724Sjkim return REG_ESPACE; 1847146040Stjr for (i = 0; i < dest_nodes->nelem; i++) 1848250724Sjkim { 1849250724Sjkim err = re_node_set_merge (&state->inveclosure, 1850250724Sjkim dfa->inveclosures + dest_nodes->elems[i]); 1851250724Sjkim if (BE (err != REG_NOERROR, 0)) 1852250724Sjkim return REG_ESPACE; 1853250724Sjkim } 1854146040Stjr } 1855146040Stjr return re_node_set_add_intersect (dest_nodes, candidates, 1856146040Stjr &state->inveclosure); 1857146040Stjr} 1858146040Stjr 1859146040Stjrstatic reg_errcode_t 1860250724Sjkiminternal_function 1861250724Sjkimsub_epsilon_src_nodes (const re_dfa_t *dfa, int node, re_node_set *dest_nodes, 1862250724Sjkim const re_node_set *candidates) 1863146040Stjr{ 1864146040Stjr int ecl_idx; 1865146040Stjr reg_errcode_t err; 1866146040Stjr re_node_set *inv_eclosure = dfa->inveclosures + node; 1867146040Stjr re_node_set except_nodes; 1868146040Stjr re_node_set_init_empty (&except_nodes); 1869146040Stjr for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx) 1870146040Stjr { 1871146040Stjr int cur_node = inv_eclosure->elems[ecl_idx]; 1872146040Stjr if (cur_node == node) 1873146040Stjr continue; 1874146040Stjr if (IS_EPSILON_NODE (dfa->nodes[cur_node].type)) 1875146040Stjr { 1876146040Stjr int edst1 = dfa->edests[cur_node].elems[0]; 1877146040Stjr int edst2 = ((dfa->edests[cur_node].nelem > 1) 1878146040Stjr ? dfa->edests[cur_node].elems[1] : -1); 1879146040Stjr if ((!re_node_set_contains (inv_eclosure, edst1) 1880146040Stjr && re_node_set_contains (dest_nodes, edst1)) 1881146040Stjr || (edst2 > 0 1882146040Stjr && !re_node_set_contains (inv_eclosure, edst2) 1883146040Stjr && re_node_set_contains (dest_nodes, edst2))) 1884146040Stjr { 1885146040Stjr err = re_node_set_add_intersect (&except_nodes, candidates, 1886146040Stjr dfa->inveclosures + cur_node); 1887146040Stjr if (BE (err != REG_NOERROR, 0)) 1888146040Stjr { 1889146040Stjr re_node_set_free (&except_nodes); 1890146040Stjr return err; 1891146040Stjr } 1892146040Stjr } 1893146040Stjr } 1894146040Stjr } 1895146040Stjr for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx) 1896146040Stjr { 1897146040Stjr int cur_node = inv_eclosure->elems[ecl_idx]; 1898146040Stjr if (!re_node_set_contains (&except_nodes, cur_node)) 1899146040Stjr { 1900146040Stjr int idx = re_node_set_contains (dest_nodes, cur_node) - 1; 1901146040Stjr re_node_set_remove_at (dest_nodes, idx); 1902146040Stjr } 1903146040Stjr } 1904146040Stjr re_node_set_free (&except_nodes); 1905146040Stjr return REG_NOERROR; 1906146040Stjr} 1907146040Stjr 1908146040Stjrstatic int 1909250724Sjkiminternal_function 1910250724Sjkimcheck_dst_limits (const re_match_context_t *mctx, re_node_set *limits, 1911250724Sjkim int dst_node, int dst_idx, int src_node, int src_idx) 1912146040Stjr{ 1913250724Sjkim const re_dfa_t *const dfa = mctx->dfa; 1914146040Stjr int lim_idx, src_pos, dst_pos; 1915146040Stjr 1916146040Stjr int dst_bkref_idx = search_cur_bkref_entry (mctx, dst_idx); 1917146040Stjr int src_bkref_idx = search_cur_bkref_entry (mctx, src_idx); 1918146040Stjr for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx) 1919146040Stjr { 1920146040Stjr int subexp_idx; 1921146040Stjr struct re_backref_cache_entry *ent; 1922146040Stjr ent = mctx->bkref_ents + limits->elems[lim_idx]; 1923146040Stjr subexp_idx = dfa->nodes[ent->node].opr.idx; 1924146040Stjr 1925146040Stjr dst_pos = check_dst_limits_calc_pos (mctx, limits->elems[lim_idx], 1926146040Stjr subexp_idx, dst_node, dst_idx, 1927146040Stjr dst_bkref_idx); 1928146040Stjr src_pos = check_dst_limits_calc_pos (mctx, limits->elems[lim_idx], 1929146040Stjr subexp_idx, src_node, src_idx, 1930146040Stjr src_bkref_idx); 1931146040Stjr 1932146040Stjr /* In case of: 1933146040Stjr <src> <dst> ( <subexp> ) 1934146040Stjr ( <subexp> ) <src> <dst> 1935146040Stjr ( <subexp1> <src> <subexp2> <dst> <subexp3> ) */ 1936146040Stjr if (src_pos == dst_pos) 1937146040Stjr continue; /* This is unrelated limitation. */ 1938146040Stjr else 1939146040Stjr return 1; 1940146040Stjr } 1941146040Stjr return 0; 1942146040Stjr} 1943146040Stjr 1944146040Stjrstatic int 1945250724Sjkiminternal_function 1946250724Sjkimcheck_dst_limits_calc_pos_1 (const re_match_context_t *mctx, int boundaries, 1947250724Sjkim int subexp_idx, int from_node, int bkref_idx) 1948146040Stjr{ 1949250724Sjkim const re_dfa_t *const dfa = mctx->dfa; 1950250724Sjkim const re_node_set *eclosures = dfa->eclosures + from_node; 1951146040Stjr int node_idx; 1952146040Stjr 1953146040Stjr /* Else, we are on the boundary: examine the nodes on the epsilon 1954146040Stjr closure. */ 1955146040Stjr for (node_idx = 0; node_idx < eclosures->nelem; ++node_idx) 1956146040Stjr { 1957146040Stjr int node = eclosures->elems[node_idx]; 1958146040Stjr switch (dfa->nodes[node].type) 1959146040Stjr { 1960146040Stjr case OP_BACK_REF: 1961146040Stjr if (bkref_idx != -1) 1962146040Stjr { 1963146040Stjr struct re_backref_cache_entry *ent = mctx->bkref_ents + bkref_idx; 1964146040Stjr do 1965250724Sjkim { 1966146040Stjr int dst, cpos; 1967146040Stjr 1968146040Stjr if (ent->node != node) 1969146040Stjr continue; 1970146040Stjr 1971250724Sjkim if (subexp_idx < BITSET_WORD_BITS 1972250724Sjkim && !(ent->eps_reachable_subexps_map 1973250724Sjkim & ((bitset_word_t) 1 << subexp_idx))) 1974146040Stjr continue; 1975146040Stjr 1976146040Stjr /* Recurse trying to reach the OP_OPEN_SUBEXP and 1977146040Stjr OP_CLOSE_SUBEXP cases below. But, if the 1978146040Stjr destination node is the same node as the source 1979146040Stjr node, don't recurse because it would cause an 1980146040Stjr infinite loop: a regex that exhibits this behavior 1981146040Stjr is ()\1*\1* */ 1982146040Stjr dst = dfa->edests[node].elems[0]; 1983146040Stjr if (dst == from_node) 1984146040Stjr { 1985146040Stjr if (boundaries & 1) 1986250724Sjkim return -1; 1987146040Stjr else /* if (boundaries & 2) */ 1988250724Sjkim return 0; 1989146040Stjr } 1990146040Stjr 1991146040Stjr cpos = 1992146040Stjr check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx, 1993146040Stjr dst, bkref_idx); 1994146040Stjr if (cpos == -1 /* && (boundaries & 1) */) 1995146040Stjr return -1; 1996146040Stjr if (cpos == 0 && (boundaries & 2)) 1997146040Stjr return 0; 1998146040Stjr 1999250724Sjkim if (subexp_idx < BITSET_WORD_BITS) 2000250724Sjkim ent->eps_reachable_subexps_map 2001250724Sjkim &= ~((bitset_word_t) 1 << subexp_idx); 2002250724Sjkim } 2003146040Stjr while (ent++->more); 2004146040Stjr } 2005146040Stjr break; 2006146040Stjr 2007146040Stjr case OP_OPEN_SUBEXP: 2008146040Stjr if ((boundaries & 1) && subexp_idx == dfa->nodes[node].opr.idx) 2009146040Stjr return -1; 2010146040Stjr break; 2011146040Stjr 2012146040Stjr case OP_CLOSE_SUBEXP: 2013146040Stjr if ((boundaries & 2) && subexp_idx == dfa->nodes[node].opr.idx) 2014146040Stjr return 0; 2015146040Stjr break; 2016146040Stjr 2017146040Stjr default: 2018146040Stjr break; 2019146040Stjr } 2020146040Stjr } 2021146040Stjr 2022146040Stjr return (boundaries & 2) ? 1 : 0; 2023146040Stjr} 2024146040Stjr 2025146040Stjrstatic int 2026250724Sjkiminternal_function 2027250724Sjkimcheck_dst_limits_calc_pos (const re_match_context_t *mctx, int limit, 2028250724Sjkim int subexp_idx, int from_node, int str_idx, 2029250724Sjkim int bkref_idx) 2030146040Stjr{ 2031146040Stjr struct re_backref_cache_entry *lim = mctx->bkref_ents + limit; 2032146040Stjr int boundaries; 2033146040Stjr 2034146040Stjr /* If we are outside the range of the subexpression, return -1 or 1. */ 2035146040Stjr if (str_idx < lim->subexp_from) 2036146040Stjr return -1; 2037146040Stjr 2038146040Stjr if (lim->subexp_to < str_idx) 2039146040Stjr return 1; 2040146040Stjr 2041146040Stjr /* If we are within the subexpression, return 0. */ 2042146040Stjr boundaries = (str_idx == lim->subexp_from); 2043146040Stjr boundaries |= (str_idx == lim->subexp_to) << 1; 2044146040Stjr if (boundaries == 0) 2045146040Stjr return 0; 2046146040Stjr 2047146040Stjr /* Else, examine epsilon closure. */ 2048146040Stjr return check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx, 2049146040Stjr from_node, bkref_idx); 2050146040Stjr} 2051146040Stjr 2052146040Stjr/* Check the limitations of sub expressions LIMITS, and remove the nodes 2053146040Stjr which are against limitations from DEST_NODES. */ 2054146040Stjr 2055146040Stjrstatic reg_errcode_t 2056250724Sjkiminternal_function 2057250724Sjkimcheck_subexp_limits (const re_dfa_t *dfa, re_node_set *dest_nodes, 2058250724Sjkim const re_node_set *candidates, re_node_set *limits, 2059250724Sjkim struct re_backref_cache_entry *bkref_ents, int str_idx) 2060146040Stjr{ 2061146040Stjr reg_errcode_t err; 2062146040Stjr int node_idx, lim_idx; 2063146040Stjr 2064146040Stjr for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx) 2065146040Stjr { 2066146040Stjr int subexp_idx; 2067146040Stjr struct re_backref_cache_entry *ent; 2068146040Stjr ent = bkref_ents + limits->elems[lim_idx]; 2069146040Stjr 2070146040Stjr if (str_idx <= ent->subexp_from || ent->str_idx < str_idx) 2071146040Stjr continue; /* This is unrelated limitation. */ 2072146040Stjr 2073146040Stjr subexp_idx = dfa->nodes[ent->node].opr.idx; 2074146040Stjr if (ent->subexp_to == str_idx) 2075146040Stjr { 2076146040Stjr int ops_node = -1; 2077146040Stjr int cls_node = -1; 2078146040Stjr for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx) 2079146040Stjr { 2080146040Stjr int node = dest_nodes->elems[node_idx]; 2081146040Stjr re_token_type_t type = dfa->nodes[node].type; 2082146040Stjr if (type == OP_OPEN_SUBEXP 2083146040Stjr && subexp_idx == dfa->nodes[node].opr.idx) 2084146040Stjr ops_node = node; 2085146040Stjr else if (type == OP_CLOSE_SUBEXP 2086146040Stjr && subexp_idx == dfa->nodes[node].opr.idx) 2087146040Stjr cls_node = node; 2088146040Stjr } 2089146040Stjr 2090146040Stjr /* Check the limitation of the open subexpression. */ 2091146040Stjr /* Note that (ent->subexp_to = str_idx != ent->subexp_from). */ 2092146040Stjr if (ops_node >= 0) 2093146040Stjr { 2094146040Stjr err = sub_epsilon_src_nodes (dfa, ops_node, dest_nodes, 2095146040Stjr candidates); 2096146040Stjr if (BE (err != REG_NOERROR, 0)) 2097146040Stjr return err; 2098146040Stjr } 2099146040Stjr 2100146040Stjr /* Check the limitation of the close subexpression. */ 2101146040Stjr if (cls_node >= 0) 2102146040Stjr for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx) 2103146040Stjr { 2104146040Stjr int node = dest_nodes->elems[node_idx]; 2105146040Stjr if (!re_node_set_contains (dfa->inveclosures + node, 2106146040Stjr cls_node) 2107146040Stjr && !re_node_set_contains (dfa->eclosures + node, 2108146040Stjr cls_node)) 2109146040Stjr { 2110146040Stjr /* It is against this limitation. 2111146040Stjr Remove it form the current sifted state. */ 2112146040Stjr err = sub_epsilon_src_nodes (dfa, node, dest_nodes, 2113146040Stjr candidates); 2114146040Stjr if (BE (err != REG_NOERROR, 0)) 2115146040Stjr return err; 2116146040Stjr --node_idx; 2117146040Stjr } 2118146040Stjr } 2119146040Stjr } 2120146040Stjr else /* (ent->subexp_to != str_idx) */ 2121146040Stjr { 2122146040Stjr for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx) 2123146040Stjr { 2124146040Stjr int node = dest_nodes->elems[node_idx]; 2125146040Stjr re_token_type_t type = dfa->nodes[node].type; 2126146040Stjr if (type == OP_CLOSE_SUBEXP || type == OP_OPEN_SUBEXP) 2127146040Stjr { 2128146040Stjr if (subexp_idx != dfa->nodes[node].opr.idx) 2129146040Stjr continue; 2130146040Stjr /* It is against this limitation. 2131146040Stjr Remove it form the current sifted state. */ 2132146040Stjr err = sub_epsilon_src_nodes (dfa, node, dest_nodes, 2133146040Stjr candidates); 2134146040Stjr if (BE (err != REG_NOERROR, 0)) 2135146040Stjr return err; 2136146040Stjr } 2137146040Stjr } 2138146040Stjr } 2139146040Stjr } 2140146040Stjr return REG_NOERROR; 2141146040Stjr} 2142146040Stjr 2143146040Stjrstatic reg_errcode_t 2144250724Sjkiminternal_function __attribute_warn_unused_result__ 2145250724Sjkimsift_states_bkref (const re_match_context_t *mctx, re_sift_context_t *sctx, 2146250724Sjkim int str_idx, const re_node_set *candidates) 2147146040Stjr{ 2148250724Sjkim const re_dfa_t *const dfa = mctx->dfa; 2149146040Stjr reg_errcode_t err; 2150146040Stjr int node_idx, node; 2151146040Stjr re_sift_context_t local_sctx; 2152146040Stjr int first_idx = search_cur_bkref_entry (mctx, str_idx); 2153146040Stjr 2154146040Stjr if (first_idx == -1) 2155146040Stjr return REG_NOERROR; 2156146040Stjr 2157146040Stjr local_sctx.sifted_states = NULL; /* Mark that it hasn't been initialized. */ 2158146040Stjr 2159146040Stjr for (node_idx = 0; node_idx < candidates->nelem; ++node_idx) 2160146040Stjr { 2161146040Stjr int enabled_idx; 2162146040Stjr re_token_type_t type; 2163146040Stjr struct re_backref_cache_entry *entry; 2164146040Stjr node = candidates->elems[node_idx]; 2165146040Stjr type = dfa->nodes[node].type; 2166146040Stjr /* Avoid infinite loop for the REs like "()\1+". */ 2167146040Stjr if (node == sctx->last_node && str_idx == sctx->last_str_idx) 2168146040Stjr continue; 2169146040Stjr if (type != OP_BACK_REF) 2170146040Stjr continue; 2171146040Stjr 2172146040Stjr entry = mctx->bkref_ents + first_idx; 2173146040Stjr enabled_idx = first_idx; 2174146040Stjr do 2175146040Stjr { 2176250724Sjkim int subexp_len; 2177250724Sjkim int to_idx; 2178250724Sjkim int dst_node; 2179250724Sjkim int ret; 2180146040Stjr re_dfastate_t *cur_state; 2181146040Stjr 2182146040Stjr if (entry->node != node) 2183146040Stjr continue; 2184146040Stjr subexp_len = entry->subexp_to - entry->subexp_from; 2185146040Stjr to_idx = str_idx + subexp_len; 2186146040Stjr dst_node = (subexp_len ? dfa->nexts[node] 2187146040Stjr : dfa->edests[node].elems[0]); 2188146040Stjr 2189146040Stjr if (to_idx > sctx->last_str_idx 2190146040Stjr || sctx->sifted_states[to_idx] == NULL 2191146040Stjr || !STATE_NODE_CONTAINS (sctx->sifted_states[to_idx], dst_node) 2192146040Stjr || check_dst_limits (mctx, &sctx->limits, node, 2193146040Stjr str_idx, dst_node, to_idx)) 2194146040Stjr continue; 2195146040Stjr 2196146040Stjr if (local_sctx.sifted_states == NULL) 2197146040Stjr { 2198146040Stjr local_sctx = *sctx; 2199146040Stjr err = re_node_set_init_copy (&local_sctx.limits, &sctx->limits); 2200146040Stjr if (BE (err != REG_NOERROR, 0)) 2201146040Stjr goto free_return; 2202146040Stjr } 2203146040Stjr local_sctx.last_node = node; 2204146040Stjr local_sctx.last_str_idx = str_idx; 2205250724Sjkim ret = re_node_set_insert (&local_sctx.limits, enabled_idx); 2206250724Sjkim if (BE (ret < 0, 0)) 2207146040Stjr { 2208146040Stjr err = REG_ESPACE; 2209146040Stjr goto free_return; 2210146040Stjr } 2211146040Stjr cur_state = local_sctx.sifted_states[str_idx]; 2212146040Stjr err = sift_states_backward (mctx, &local_sctx); 2213146040Stjr if (BE (err != REG_NOERROR, 0)) 2214146040Stjr goto free_return; 2215146040Stjr if (sctx->limited_states != NULL) 2216146040Stjr { 2217146040Stjr err = merge_state_array (dfa, sctx->limited_states, 2218146040Stjr local_sctx.sifted_states, 2219146040Stjr str_idx + 1); 2220146040Stjr if (BE (err != REG_NOERROR, 0)) 2221146040Stjr goto free_return; 2222146040Stjr } 2223146040Stjr local_sctx.sifted_states[str_idx] = cur_state; 2224146040Stjr re_node_set_remove (&local_sctx.limits, enabled_idx); 2225146040Stjr 2226146040Stjr /* mctx->bkref_ents may have changed, reload the pointer. */ 2227250724Sjkim entry = mctx->bkref_ents + enabled_idx; 2228146040Stjr } 2229146040Stjr while (enabled_idx++, entry++->more); 2230146040Stjr } 2231146040Stjr err = REG_NOERROR; 2232146040Stjr free_return: 2233146040Stjr if (local_sctx.sifted_states != NULL) 2234146040Stjr { 2235146040Stjr re_node_set_free (&local_sctx.limits); 2236146040Stjr } 2237146040Stjr 2238146040Stjr return err; 2239146040Stjr} 2240146040Stjr 2241146040Stjr 2242146040Stjr#ifdef RE_ENABLE_I18N 2243146040Stjrstatic int 2244250724Sjkiminternal_function 2245250724Sjkimsift_states_iter_mb (const re_match_context_t *mctx, re_sift_context_t *sctx, 2246250724Sjkim int node_idx, int str_idx, int max_str_idx) 2247146040Stjr{ 2248250724Sjkim const re_dfa_t *const dfa = mctx->dfa; 2249146040Stjr int naccepted; 2250146040Stjr /* Check the node can accept `multi byte'. */ 2251146040Stjr naccepted = check_node_accept_bytes (dfa, node_idx, &mctx->input, str_idx); 2252146040Stjr if (naccepted > 0 && str_idx + naccepted <= max_str_idx && 2253146040Stjr !STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + naccepted], 2254146040Stjr dfa->nexts[node_idx])) 2255146040Stjr /* The node can't accept the `multi byte', or the 2256146040Stjr destination was already thrown away, then the node 2257146040Stjr could't accept the current input `multi byte'. */ 2258146040Stjr naccepted = 0; 2259146040Stjr /* Otherwise, it is sure that the node could accept 2260146040Stjr `naccepted' bytes input. */ 2261146040Stjr return naccepted; 2262146040Stjr} 2263146040Stjr#endif /* RE_ENABLE_I18N */ 2264146040Stjr 2265146040Stjr 2266146040Stjr/* Functions for state transition. */ 2267146040Stjr 2268146040Stjr/* Return the next state to which the current state STATE will transit by 2269146040Stjr accepting the current input byte, and update STATE_LOG if necessary. 2270146040Stjr If STATE can accept a multibyte char/collating element/back reference 2271146040Stjr update the destination of STATE_LOG. */ 2272146040Stjr 2273146040Stjrstatic re_dfastate_t * 2274250724Sjkiminternal_function __attribute_warn_unused_result__ 2275250724Sjkimtransit_state (reg_errcode_t *err, re_match_context_t *mctx, 2276250724Sjkim re_dfastate_t *state) 2277146040Stjr{ 2278146040Stjr re_dfastate_t **trtable; 2279146040Stjr unsigned char ch; 2280146040Stjr 2281146040Stjr#ifdef RE_ENABLE_I18N 2282146040Stjr /* If the current state can accept multibyte. */ 2283146040Stjr if (BE (state->accept_mb, 0)) 2284146040Stjr { 2285146040Stjr *err = transit_state_mb (mctx, state); 2286146040Stjr if (BE (*err != REG_NOERROR, 0)) 2287146040Stjr return NULL; 2288146040Stjr } 2289146040Stjr#endif /* RE_ENABLE_I18N */ 2290146040Stjr 2291146040Stjr /* Then decide the next state with the single byte. */ 2292146040Stjr#if 0 2293146040Stjr if (0) 2294146040Stjr /* don't use transition table */ 2295146040Stjr return transit_state_sb (err, mctx, state); 2296146040Stjr#endif 2297146040Stjr 2298146040Stjr /* Use transition table */ 2299146040Stjr ch = re_string_fetch_byte (&mctx->input); 2300146040Stjr for (;;) 2301146040Stjr { 2302146040Stjr trtable = state->trtable; 2303146040Stjr if (BE (trtable != NULL, 1)) 2304146040Stjr return trtable[ch]; 2305146040Stjr 2306146040Stjr trtable = state->word_trtable; 2307146040Stjr if (BE (trtable != NULL, 1)) 2308250724Sjkim { 2309146040Stjr unsigned int context; 2310146040Stjr context 2311146040Stjr = re_string_context_at (&mctx->input, 2312146040Stjr re_string_cur_idx (&mctx->input) - 1, 2313146040Stjr mctx->eflags); 2314146040Stjr if (IS_WORD_CONTEXT (context)) 2315146040Stjr return trtable[ch + SBC_MAX]; 2316146040Stjr else 2317146040Stjr return trtable[ch]; 2318146040Stjr } 2319146040Stjr 2320146040Stjr if (!build_trtable (mctx->dfa, state)) 2321146040Stjr { 2322146040Stjr *err = REG_ESPACE; 2323146040Stjr return NULL; 2324146040Stjr } 2325146040Stjr 2326146040Stjr /* Retry, we now have a transition table. */ 2327146040Stjr } 2328146040Stjr} 2329146040Stjr 2330146040Stjr/* Update the state_log if we need */ 2331146040Stjrre_dfastate_t * 2332250724Sjkiminternal_function 2333250724Sjkimmerge_state_with_log (reg_errcode_t *err, re_match_context_t *mctx, 2334250724Sjkim re_dfastate_t *next_state) 2335146040Stjr{ 2336250724Sjkim const re_dfa_t *const dfa = mctx->dfa; 2337146040Stjr int cur_idx = re_string_cur_idx (&mctx->input); 2338146040Stjr 2339146040Stjr if (cur_idx > mctx->state_log_top) 2340146040Stjr { 2341146040Stjr mctx->state_log[cur_idx] = next_state; 2342146040Stjr mctx->state_log_top = cur_idx; 2343146040Stjr } 2344146040Stjr else if (mctx->state_log[cur_idx] == 0) 2345146040Stjr { 2346146040Stjr mctx->state_log[cur_idx] = next_state; 2347146040Stjr } 2348146040Stjr else 2349146040Stjr { 2350146040Stjr re_dfastate_t *pstate; 2351146040Stjr unsigned int context; 2352146040Stjr re_node_set next_nodes, *log_nodes, *table_nodes = NULL; 2353146040Stjr /* If (state_log[cur_idx] != 0), it implies that cur_idx is 2354250724Sjkim the destination of a multibyte char/collating element/ 2355250724Sjkim back reference. Then the next state is the union set of 2356250724Sjkim these destinations and the results of the transition table. */ 2357146040Stjr pstate = mctx->state_log[cur_idx]; 2358146040Stjr log_nodes = pstate->entrance_nodes; 2359146040Stjr if (next_state != NULL) 2360250724Sjkim { 2361250724Sjkim table_nodes = next_state->entrance_nodes; 2362250724Sjkim *err = re_node_set_init_union (&next_nodes, table_nodes, 2363146040Stjr log_nodes); 2364250724Sjkim if (BE (*err != REG_NOERROR, 0)) 2365146040Stjr return NULL; 2366250724Sjkim } 2367146040Stjr else 2368250724Sjkim next_nodes = *log_nodes; 2369146040Stjr /* Note: We already add the nodes of the initial state, 2370146040Stjr then we don't need to add them here. */ 2371146040Stjr 2372146040Stjr context = re_string_context_at (&mctx->input, 2373146040Stjr re_string_cur_idx (&mctx->input) - 1, 2374146040Stjr mctx->eflags); 2375146040Stjr next_state = mctx->state_log[cur_idx] 2376250724Sjkim = re_acquire_state_context (err, dfa, &next_nodes, context); 2377146040Stjr /* We don't need to check errors here, since the return value of 2378250724Sjkim this function is next_state and ERR is already set. */ 2379146040Stjr 2380146040Stjr if (table_nodes != NULL) 2381250724Sjkim re_node_set_free (&next_nodes); 2382146040Stjr } 2383146040Stjr 2384146040Stjr if (BE (dfa->nbackref, 0) && next_state != NULL) 2385146040Stjr { 2386146040Stjr /* Check OP_OPEN_SUBEXP in the current state in case that we use them 2387146040Stjr later. We must check them here, since the back references in the 2388146040Stjr next state might use them. */ 2389146040Stjr *err = check_subexp_matching_top (mctx, &next_state->nodes, 2390146040Stjr cur_idx); 2391146040Stjr if (BE (*err != REG_NOERROR, 0)) 2392146040Stjr return NULL; 2393146040Stjr 2394146040Stjr /* If the next state has back references. */ 2395146040Stjr if (next_state->has_backref) 2396146040Stjr { 2397146040Stjr *err = transit_state_bkref (mctx, &next_state->nodes); 2398146040Stjr if (BE (*err != REG_NOERROR, 0)) 2399146040Stjr return NULL; 2400146040Stjr next_state = mctx->state_log[cur_idx]; 2401146040Stjr } 2402146040Stjr } 2403146040Stjr 2404146040Stjr return next_state; 2405146040Stjr} 2406146040Stjr 2407146040Stjr/* Skip bytes in the input that correspond to part of a 2408146040Stjr multi-byte match, then look in the log for a state 2409146040Stjr from which to restart matching. */ 2410146040Stjrre_dfastate_t * 2411250724Sjkiminternal_function 2412250724Sjkimfind_recover_state (reg_errcode_t *err, re_match_context_t *mctx) 2413146040Stjr{ 2414250724Sjkim re_dfastate_t *cur_state; 2415146040Stjr do 2416146040Stjr { 2417146040Stjr int max = mctx->state_log_top; 2418146040Stjr int cur_str_idx = re_string_cur_idx (&mctx->input); 2419146040Stjr 2420146040Stjr do 2421146040Stjr { 2422250724Sjkim if (++cur_str_idx > max) 2423250724Sjkim return NULL; 2424250724Sjkim re_string_skip_bytes (&mctx->input, 1); 2425146040Stjr } 2426146040Stjr while (mctx->state_log[cur_str_idx] == NULL); 2427146040Stjr 2428146040Stjr cur_state = merge_state_with_log (err, mctx, NULL); 2429146040Stjr } 2430250724Sjkim while (*err == REG_NOERROR && cur_state == NULL); 2431146040Stjr return cur_state; 2432146040Stjr} 2433146040Stjr 2434146040Stjr/* Helper functions for transit_state. */ 2435146040Stjr 2436146040Stjr/* From the node set CUR_NODES, pick up the nodes whose types are 2437146040Stjr OP_OPEN_SUBEXP and which have corresponding back references in the regular 2438146040Stjr expression. And register them to use them later for evaluating the 2439146040Stjr correspoding back references. */ 2440146040Stjr 2441146040Stjrstatic reg_errcode_t 2442250724Sjkiminternal_function 2443250724Sjkimcheck_subexp_matching_top (re_match_context_t *mctx, re_node_set *cur_nodes, 2444250724Sjkim int str_idx) 2445146040Stjr{ 2446250724Sjkim const re_dfa_t *const dfa = mctx->dfa; 2447146040Stjr int node_idx; 2448146040Stjr reg_errcode_t err; 2449146040Stjr 2450146040Stjr /* TODO: This isn't efficient. 2451146040Stjr Because there might be more than one nodes whose types are 2452146040Stjr OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all 2453146040Stjr nodes. 2454146040Stjr E.g. RE: (a){2} */ 2455146040Stjr for (node_idx = 0; node_idx < cur_nodes->nelem; ++node_idx) 2456146040Stjr { 2457146040Stjr int node = cur_nodes->elems[node_idx]; 2458146040Stjr if (dfa->nodes[node].type == OP_OPEN_SUBEXP 2459250724Sjkim && dfa->nodes[node].opr.idx < BITSET_WORD_BITS 2460250724Sjkim && (dfa->used_bkref_map 2461250724Sjkim & ((bitset_word_t) 1 << dfa->nodes[node].opr.idx))) 2462146040Stjr { 2463146040Stjr err = match_ctx_add_subtop (mctx, node, str_idx); 2464146040Stjr if (BE (err != REG_NOERROR, 0)) 2465146040Stjr return err; 2466146040Stjr } 2467146040Stjr } 2468146040Stjr return REG_NOERROR; 2469146040Stjr} 2470146040Stjr 2471146040Stjr#if 0 2472146040Stjr/* Return the next state to which the current state STATE will transit by 2473146040Stjr accepting the current input byte. */ 2474146040Stjr 2475146040Stjrstatic re_dfastate_t * 2476250724Sjkimtransit_state_sb (reg_errcode_t *err, re_match_context_t *mctx, 2477250724Sjkim re_dfastate_t *state) 2478146040Stjr{ 2479250724Sjkim const re_dfa_t *const dfa = mctx->dfa; 2480146040Stjr re_node_set next_nodes; 2481146040Stjr re_dfastate_t *next_state; 2482146040Stjr int node_cnt, cur_str_idx = re_string_cur_idx (&mctx->input); 2483146040Stjr unsigned int context; 2484146040Stjr 2485146040Stjr *err = re_node_set_alloc (&next_nodes, state->nodes.nelem + 1); 2486146040Stjr if (BE (*err != REG_NOERROR, 0)) 2487146040Stjr return NULL; 2488146040Stjr for (node_cnt = 0; node_cnt < state->nodes.nelem; ++node_cnt) 2489146040Stjr { 2490146040Stjr int cur_node = state->nodes.elems[node_cnt]; 2491146040Stjr if (check_node_accept (mctx, dfa->nodes + cur_node, cur_str_idx)) 2492146040Stjr { 2493146040Stjr *err = re_node_set_merge (&next_nodes, 2494146040Stjr dfa->eclosures + dfa->nexts[cur_node]); 2495146040Stjr if (BE (*err != REG_NOERROR, 0)) 2496146040Stjr { 2497146040Stjr re_node_set_free (&next_nodes); 2498146040Stjr return NULL; 2499146040Stjr } 2500146040Stjr } 2501146040Stjr } 2502146040Stjr context = re_string_context_at (&mctx->input, cur_str_idx, mctx->eflags); 2503146040Stjr next_state = re_acquire_state_context (err, dfa, &next_nodes, context); 2504146040Stjr /* We don't need to check errors here, since the return value of 2505146040Stjr this function is next_state and ERR is already set. */ 2506146040Stjr 2507146040Stjr re_node_set_free (&next_nodes); 2508146040Stjr re_string_skip_bytes (&mctx->input, 1); 2509146040Stjr return next_state; 2510146040Stjr} 2511146040Stjr#endif 2512146040Stjr 2513146040Stjr#ifdef RE_ENABLE_I18N 2514146040Stjrstatic reg_errcode_t 2515250724Sjkiminternal_function 2516250724Sjkimtransit_state_mb (re_match_context_t *mctx, re_dfastate_t *pstate) 2517146040Stjr{ 2518250724Sjkim const re_dfa_t *const dfa = mctx->dfa; 2519146040Stjr reg_errcode_t err; 2520146040Stjr int i; 2521146040Stjr 2522146040Stjr for (i = 0; i < pstate->nodes.nelem; ++i) 2523146040Stjr { 2524146040Stjr re_node_set dest_nodes, *new_nodes; 2525146040Stjr int cur_node_idx = pstate->nodes.elems[i]; 2526146040Stjr int naccepted, dest_idx; 2527146040Stjr unsigned int context; 2528146040Stjr re_dfastate_t *dest_state; 2529146040Stjr 2530146040Stjr if (!dfa->nodes[cur_node_idx].accept_mb) 2531250724Sjkim continue; 2532146040Stjr 2533146040Stjr if (dfa->nodes[cur_node_idx].constraint) 2534146040Stjr { 2535146040Stjr context = re_string_context_at (&mctx->input, 2536146040Stjr re_string_cur_idx (&mctx->input), 2537146040Stjr mctx->eflags); 2538146040Stjr if (NOT_SATISFY_NEXT_CONSTRAINT (dfa->nodes[cur_node_idx].constraint, 2539146040Stjr context)) 2540146040Stjr continue; 2541146040Stjr } 2542146040Stjr 2543146040Stjr /* How many bytes the node can accept? */ 2544146040Stjr naccepted = check_node_accept_bytes (dfa, cur_node_idx, &mctx->input, 2545146040Stjr re_string_cur_idx (&mctx->input)); 2546146040Stjr if (naccepted == 0) 2547146040Stjr continue; 2548146040Stjr 2549146040Stjr /* The node can accepts `naccepted' bytes. */ 2550146040Stjr dest_idx = re_string_cur_idx (&mctx->input) + naccepted; 2551146040Stjr mctx->max_mb_elem_len = ((mctx->max_mb_elem_len < naccepted) ? naccepted 2552146040Stjr : mctx->max_mb_elem_len); 2553146040Stjr err = clean_state_log_if_needed (mctx, dest_idx); 2554146040Stjr if (BE (err != REG_NOERROR, 0)) 2555146040Stjr return err; 2556146040Stjr#ifdef DEBUG 2557146040Stjr assert (dfa->nexts[cur_node_idx] != -1); 2558146040Stjr#endif 2559146040Stjr new_nodes = dfa->eclosures + dfa->nexts[cur_node_idx]; 2560146040Stjr 2561146040Stjr dest_state = mctx->state_log[dest_idx]; 2562146040Stjr if (dest_state == NULL) 2563146040Stjr dest_nodes = *new_nodes; 2564146040Stjr else 2565146040Stjr { 2566146040Stjr err = re_node_set_init_union (&dest_nodes, 2567146040Stjr dest_state->entrance_nodes, new_nodes); 2568146040Stjr if (BE (err != REG_NOERROR, 0)) 2569146040Stjr return err; 2570146040Stjr } 2571250724Sjkim context = re_string_context_at (&mctx->input, dest_idx - 1, 2572250724Sjkim mctx->eflags); 2573146040Stjr mctx->state_log[dest_idx] 2574146040Stjr = re_acquire_state_context (&err, dfa, &dest_nodes, context); 2575146040Stjr if (dest_state != NULL) 2576146040Stjr re_node_set_free (&dest_nodes); 2577146040Stjr if (BE (mctx->state_log[dest_idx] == NULL && err != REG_NOERROR, 0)) 2578146040Stjr return err; 2579146040Stjr } 2580146040Stjr return REG_NOERROR; 2581146040Stjr} 2582146040Stjr#endif /* RE_ENABLE_I18N */ 2583146040Stjr 2584146040Stjrstatic reg_errcode_t 2585250724Sjkiminternal_function 2586250724Sjkimtransit_state_bkref (re_match_context_t *mctx, const re_node_set *nodes) 2587146040Stjr{ 2588250724Sjkim const re_dfa_t *const dfa = mctx->dfa; 2589146040Stjr reg_errcode_t err; 2590146040Stjr int i; 2591146040Stjr int cur_str_idx = re_string_cur_idx (&mctx->input); 2592146040Stjr 2593146040Stjr for (i = 0; i < nodes->nelem; ++i) 2594146040Stjr { 2595146040Stjr int dest_str_idx, prev_nelem, bkc_idx; 2596146040Stjr int node_idx = nodes->elems[i]; 2597146040Stjr unsigned int context; 2598146040Stjr const re_token_t *node = dfa->nodes + node_idx; 2599146040Stjr re_node_set *new_dest_nodes; 2600146040Stjr 2601146040Stjr /* Check whether `node' is a backreference or not. */ 2602146040Stjr if (node->type != OP_BACK_REF) 2603146040Stjr continue; 2604146040Stjr 2605146040Stjr if (node->constraint) 2606146040Stjr { 2607146040Stjr context = re_string_context_at (&mctx->input, cur_str_idx, 2608146040Stjr mctx->eflags); 2609146040Stjr if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context)) 2610146040Stjr continue; 2611146040Stjr } 2612146040Stjr 2613146040Stjr /* `node' is a backreference. 2614146040Stjr Check the substring which the substring matched. */ 2615146040Stjr bkc_idx = mctx->nbkref_ents; 2616146040Stjr err = get_subexp (mctx, node_idx, cur_str_idx); 2617146040Stjr if (BE (err != REG_NOERROR, 0)) 2618146040Stjr goto free_return; 2619146040Stjr 2620146040Stjr /* And add the epsilon closures (which is `new_dest_nodes') of 2621146040Stjr the backreference to appropriate state_log. */ 2622146040Stjr#ifdef DEBUG 2623146040Stjr assert (dfa->nexts[node_idx] != -1); 2624146040Stjr#endif 2625146040Stjr for (; bkc_idx < mctx->nbkref_ents; ++bkc_idx) 2626146040Stjr { 2627146040Stjr int subexp_len; 2628146040Stjr re_dfastate_t *dest_state; 2629146040Stjr struct re_backref_cache_entry *bkref_ent; 2630146040Stjr bkref_ent = mctx->bkref_ents + bkc_idx; 2631146040Stjr if (bkref_ent->node != node_idx || bkref_ent->str_idx != cur_str_idx) 2632146040Stjr continue; 2633146040Stjr subexp_len = bkref_ent->subexp_to - bkref_ent->subexp_from; 2634146040Stjr new_dest_nodes = (subexp_len == 0 2635146040Stjr ? dfa->eclosures + dfa->edests[node_idx].elems[0] 2636146040Stjr : dfa->eclosures + dfa->nexts[node_idx]); 2637146040Stjr dest_str_idx = (cur_str_idx + bkref_ent->subexp_to 2638146040Stjr - bkref_ent->subexp_from); 2639146040Stjr context = re_string_context_at (&mctx->input, dest_str_idx - 1, 2640146040Stjr mctx->eflags); 2641146040Stjr dest_state = mctx->state_log[dest_str_idx]; 2642146040Stjr prev_nelem = ((mctx->state_log[cur_str_idx] == NULL) ? 0 2643146040Stjr : mctx->state_log[cur_str_idx]->nodes.nelem); 2644146040Stjr /* Add `new_dest_node' to state_log. */ 2645146040Stjr if (dest_state == NULL) 2646146040Stjr { 2647146040Stjr mctx->state_log[dest_str_idx] 2648146040Stjr = re_acquire_state_context (&err, dfa, new_dest_nodes, 2649146040Stjr context); 2650146040Stjr if (BE (mctx->state_log[dest_str_idx] == NULL 2651146040Stjr && err != REG_NOERROR, 0)) 2652146040Stjr goto free_return; 2653146040Stjr } 2654146040Stjr else 2655146040Stjr { 2656146040Stjr re_node_set dest_nodes; 2657146040Stjr err = re_node_set_init_union (&dest_nodes, 2658146040Stjr dest_state->entrance_nodes, 2659146040Stjr new_dest_nodes); 2660146040Stjr if (BE (err != REG_NOERROR, 0)) 2661146040Stjr { 2662146040Stjr re_node_set_free (&dest_nodes); 2663146040Stjr goto free_return; 2664146040Stjr } 2665146040Stjr mctx->state_log[dest_str_idx] 2666146040Stjr = re_acquire_state_context (&err, dfa, &dest_nodes, context); 2667146040Stjr re_node_set_free (&dest_nodes); 2668146040Stjr if (BE (mctx->state_log[dest_str_idx] == NULL 2669146040Stjr && err != REG_NOERROR, 0)) 2670146040Stjr goto free_return; 2671146040Stjr } 2672146040Stjr /* We need to check recursively if the backreference can epsilon 2673146040Stjr transit. */ 2674146040Stjr if (subexp_len == 0 2675146040Stjr && mctx->state_log[cur_str_idx]->nodes.nelem > prev_nelem) 2676146040Stjr { 2677146040Stjr err = check_subexp_matching_top (mctx, new_dest_nodes, 2678146040Stjr cur_str_idx); 2679146040Stjr if (BE (err != REG_NOERROR, 0)) 2680146040Stjr goto free_return; 2681146040Stjr err = transit_state_bkref (mctx, new_dest_nodes); 2682146040Stjr if (BE (err != REG_NOERROR, 0)) 2683146040Stjr goto free_return; 2684146040Stjr } 2685146040Stjr } 2686146040Stjr } 2687146040Stjr err = REG_NOERROR; 2688146040Stjr free_return: 2689146040Stjr return err; 2690146040Stjr} 2691146040Stjr 2692146040Stjr/* Enumerate all the candidates which the backreference BKREF_NODE can match 2693146040Stjr at BKREF_STR_IDX, and register them by match_ctx_add_entry(). 2694146040Stjr Note that we might collect inappropriate candidates here. 2695146040Stjr However, the cost of checking them strictly here is too high, then we 2696146040Stjr delay these checking for prune_impossible_nodes(). */ 2697146040Stjr 2698146040Stjrstatic reg_errcode_t 2699250724Sjkiminternal_function __attribute_warn_unused_result__ 2700250724Sjkimget_subexp (re_match_context_t *mctx, int bkref_node, int bkref_str_idx) 2701146040Stjr{ 2702250724Sjkim const re_dfa_t *const dfa = mctx->dfa; 2703146040Stjr int subexp_num, sub_top_idx; 2704146040Stjr const char *buf = (const char *) re_string_get_buffer (&mctx->input); 2705146040Stjr /* Return if we have already checked BKREF_NODE at BKREF_STR_IDX. */ 2706146040Stjr int cache_idx = search_cur_bkref_entry (mctx, bkref_str_idx); 2707146040Stjr if (cache_idx != -1) 2708146040Stjr { 2709250724Sjkim const struct re_backref_cache_entry *entry 2710250724Sjkim = mctx->bkref_ents + cache_idx; 2711146040Stjr do 2712250724Sjkim if (entry->node == bkref_node) 2713146040Stjr return REG_NOERROR; /* We already checked it. */ 2714146040Stjr while (entry++->more); 2715146040Stjr } 2716146040Stjr 2717146040Stjr subexp_num = dfa->nodes[bkref_node].opr.idx; 2718146040Stjr 2719146040Stjr /* For each sub expression */ 2720146040Stjr for (sub_top_idx = 0; sub_top_idx < mctx->nsub_tops; ++sub_top_idx) 2721146040Stjr { 2722146040Stjr reg_errcode_t err; 2723146040Stjr re_sub_match_top_t *sub_top = mctx->sub_tops[sub_top_idx]; 2724146040Stjr re_sub_match_last_t *sub_last; 2725146040Stjr int sub_last_idx, sl_str, bkref_str_off; 2726146040Stjr 2727146040Stjr if (dfa->nodes[sub_top->node].opr.idx != subexp_num) 2728146040Stjr continue; /* It isn't related. */ 2729146040Stjr 2730146040Stjr sl_str = sub_top->str_idx; 2731146040Stjr bkref_str_off = bkref_str_idx; 2732146040Stjr /* At first, check the last node of sub expressions we already 2733146040Stjr evaluated. */ 2734146040Stjr for (sub_last_idx = 0; sub_last_idx < sub_top->nlasts; ++sub_last_idx) 2735146040Stjr { 2736146040Stjr int sl_str_diff; 2737146040Stjr sub_last = sub_top->lasts[sub_last_idx]; 2738146040Stjr sl_str_diff = sub_last->str_idx - sl_str; 2739146040Stjr /* The matched string by the sub expression match with the substring 2740146040Stjr at the back reference? */ 2741146040Stjr if (sl_str_diff > 0) 2742146040Stjr { 2743146040Stjr if (BE (bkref_str_off + sl_str_diff > mctx->input.valid_len, 0)) 2744146040Stjr { 2745146040Stjr /* Not enough chars for a successful match. */ 2746146040Stjr if (bkref_str_off + sl_str_diff > mctx->input.len) 2747146040Stjr break; 2748146040Stjr 2749146040Stjr err = clean_state_log_if_needed (mctx, 2750146040Stjr bkref_str_off 2751146040Stjr + sl_str_diff); 2752146040Stjr if (BE (err != REG_NOERROR, 0)) 2753146040Stjr return err; 2754146040Stjr buf = (const char *) re_string_get_buffer (&mctx->input); 2755146040Stjr } 2756146040Stjr if (memcmp (buf + bkref_str_off, buf + sl_str, sl_str_diff) != 0) 2757250724Sjkim /* We don't need to search this sub expression any more. */ 2758250724Sjkim break; 2759146040Stjr } 2760146040Stjr bkref_str_off += sl_str_diff; 2761146040Stjr sl_str += sl_str_diff; 2762146040Stjr err = get_subexp_sub (mctx, sub_top, sub_last, bkref_node, 2763146040Stjr bkref_str_idx); 2764146040Stjr 2765146040Stjr /* Reload buf, since the preceding call might have reallocated 2766146040Stjr the buffer. */ 2767146040Stjr buf = (const char *) re_string_get_buffer (&mctx->input); 2768146040Stjr 2769146040Stjr if (err == REG_NOMATCH) 2770146040Stjr continue; 2771146040Stjr if (BE (err != REG_NOERROR, 0)) 2772146040Stjr return err; 2773146040Stjr } 2774146040Stjr 2775146040Stjr if (sub_last_idx < sub_top->nlasts) 2776146040Stjr continue; 2777146040Stjr if (sub_last_idx > 0) 2778146040Stjr ++sl_str; 2779146040Stjr /* Then, search for the other last nodes of the sub expression. */ 2780146040Stjr for (; sl_str <= bkref_str_idx; ++sl_str) 2781146040Stjr { 2782146040Stjr int cls_node, sl_str_off; 2783146040Stjr const re_node_set *nodes; 2784146040Stjr sl_str_off = sl_str - sub_top->str_idx; 2785146040Stjr /* The matched string by the sub expression match with the substring 2786146040Stjr at the back reference? */ 2787146040Stjr if (sl_str_off > 0) 2788146040Stjr { 2789146040Stjr if (BE (bkref_str_off >= mctx->input.valid_len, 0)) 2790146040Stjr { 2791146040Stjr /* If we are at the end of the input, we cannot match. */ 2792146040Stjr if (bkref_str_off >= mctx->input.len) 2793146040Stjr break; 2794146040Stjr 2795251435Sjkim err = extend_buffers (mctx, bkref_str_off + 1); 2796146040Stjr if (BE (err != REG_NOERROR, 0)) 2797146040Stjr return err; 2798146040Stjr 2799146040Stjr buf = (const char *) re_string_get_buffer (&mctx->input); 2800146040Stjr } 2801146040Stjr if (buf [bkref_str_off++] != buf[sl_str - 1]) 2802146040Stjr break; /* We don't need to search this sub expression 2803146040Stjr any more. */ 2804146040Stjr } 2805146040Stjr if (mctx->state_log[sl_str] == NULL) 2806146040Stjr continue; 2807146040Stjr /* Does this state have a ')' of the sub expression? */ 2808146040Stjr nodes = &mctx->state_log[sl_str]->nodes; 2809250724Sjkim cls_node = find_subexp_node (dfa, nodes, subexp_num, 2810250724Sjkim OP_CLOSE_SUBEXP); 2811146040Stjr if (cls_node == -1) 2812146040Stjr continue; /* No. */ 2813146040Stjr if (sub_top->path == NULL) 2814146040Stjr { 2815146040Stjr sub_top->path = calloc (sizeof (state_array_t), 2816146040Stjr sl_str - sub_top->str_idx + 1); 2817146040Stjr if (sub_top->path == NULL) 2818146040Stjr return REG_ESPACE; 2819146040Stjr } 2820146040Stjr /* Can the OP_OPEN_SUBEXP node arrive the OP_CLOSE_SUBEXP node 2821146040Stjr in the current context? */ 2822146040Stjr err = check_arrival (mctx, sub_top->path, sub_top->node, 2823250724Sjkim sub_top->str_idx, cls_node, sl_str, 2824250724Sjkim OP_CLOSE_SUBEXP); 2825146040Stjr if (err == REG_NOMATCH) 2826146040Stjr continue; 2827146040Stjr if (BE (err != REG_NOERROR, 0)) 2828146040Stjr return err; 2829146040Stjr sub_last = match_ctx_add_sublast (sub_top, cls_node, sl_str); 2830146040Stjr if (BE (sub_last == NULL, 0)) 2831146040Stjr return REG_ESPACE; 2832146040Stjr err = get_subexp_sub (mctx, sub_top, sub_last, bkref_node, 2833146040Stjr bkref_str_idx); 2834146040Stjr if (err == REG_NOMATCH) 2835146040Stjr continue; 2836146040Stjr } 2837146040Stjr } 2838146040Stjr return REG_NOERROR; 2839146040Stjr} 2840146040Stjr 2841146040Stjr/* Helper functions for get_subexp(). */ 2842146040Stjr 2843146040Stjr/* Check SUB_LAST can arrive to the back reference BKREF_NODE at BKREF_STR. 2844146040Stjr If it can arrive, register the sub expression expressed with SUB_TOP 2845146040Stjr and SUB_LAST. */ 2846146040Stjr 2847146040Stjrstatic reg_errcode_t 2848250724Sjkiminternal_function 2849250724Sjkimget_subexp_sub (re_match_context_t *mctx, const re_sub_match_top_t *sub_top, 2850250724Sjkim re_sub_match_last_t *sub_last, int bkref_node, int bkref_str) 2851146040Stjr{ 2852146040Stjr reg_errcode_t err; 2853146040Stjr int to_idx; 2854146040Stjr /* Can the subexpression arrive the back reference? */ 2855146040Stjr err = check_arrival (mctx, &sub_last->path, sub_last->node, 2856250724Sjkim sub_last->str_idx, bkref_node, bkref_str, 2857250724Sjkim OP_OPEN_SUBEXP); 2858146040Stjr if (err != REG_NOERROR) 2859146040Stjr return err; 2860146040Stjr err = match_ctx_add_entry (mctx, bkref_node, bkref_str, sub_top->str_idx, 2861146040Stjr sub_last->str_idx); 2862146040Stjr if (BE (err != REG_NOERROR, 0)) 2863146040Stjr return err; 2864146040Stjr to_idx = bkref_str + sub_last->str_idx - sub_top->str_idx; 2865146040Stjr return clean_state_log_if_needed (mctx, to_idx); 2866146040Stjr} 2867146040Stjr 2868146040Stjr/* Find the first node which is '(' or ')' and whose index is SUBEXP_IDX. 2869146040Stjr Search '(' if FL_OPEN, or search ')' otherwise. 2870146040Stjr TODO: This function isn't efficient... 2871146040Stjr Because there might be more than one nodes whose types are 2872146040Stjr OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all 2873146040Stjr nodes. 2874146040Stjr E.g. RE: (a){2} */ 2875146040Stjr 2876146040Stjrstatic int 2877250724Sjkiminternal_function 2878250724Sjkimfind_subexp_node (const re_dfa_t *dfa, const re_node_set *nodes, 2879250724Sjkim int subexp_idx, int type) 2880146040Stjr{ 2881146040Stjr int cls_idx; 2882146040Stjr for (cls_idx = 0; cls_idx < nodes->nelem; ++cls_idx) 2883146040Stjr { 2884146040Stjr int cls_node = nodes->elems[cls_idx]; 2885146040Stjr const re_token_t *node = dfa->nodes + cls_node; 2886146040Stjr if (node->type == type 2887146040Stjr && node->opr.idx == subexp_idx) 2888146040Stjr return cls_node; 2889146040Stjr } 2890146040Stjr return -1; 2891146040Stjr} 2892146040Stjr 2893146040Stjr/* Check whether the node TOP_NODE at TOP_STR can arrive to the node 2894146040Stjr LAST_NODE at LAST_STR. We record the path onto PATH since it will be 2895146040Stjr heavily reused. 2896146040Stjr Return REG_NOERROR if it can arrive, or REG_NOMATCH otherwise. */ 2897146040Stjr 2898146040Stjrstatic reg_errcode_t 2899250724Sjkiminternal_function __attribute_warn_unused_result__ 2900250724Sjkimcheck_arrival (re_match_context_t *mctx, state_array_t *path, int top_node, 2901250724Sjkim int top_str, int last_node, int last_str, int type) 2902146040Stjr{ 2903250724Sjkim const re_dfa_t *const dfa = mctx->dfa; 2904250724Sjkim reg_errcode_t err = REG_NOERROR; 2905146040Stjr int subexp_num, backup_cur_idx, str_idx, null_cnt; 2906146040Stjr re_dfastate_t *cur_state = NULL; 2907146040Stjr re_node_set *cur_nodes, next_nodes; 2908146040Stjr re_dfastate_t **backup_state_log; 2909146040Stjr unsigned int context; 2910146040Stjr 2911146040Stjr subexp_num = dfa->nodes[top_node].opr.idx; 2912146040Stjr /* Extend the buffer if we need. */ 2913146040Stjr if (BE (path->alloc < last_str + mctx->max_mb_elem_len + 1, 0)) 2914146040Stjr { 2915146040Stjr re_dfastate_t **new_array; 2916146040Stjr int old_alloc = path->alloc; 2917146040Stjr path->alloc += last_str + mctx->max_mb_elem_len + 1; 2918146040Stjr new_array = re_realloc (path->array, re_dfastate_t *, path->alloc); 2919250724Sjkim if (BE (new_array == NULL, 0)) 2920146040Stjr { 2921146040Stjr path->alloc = old_alloc; 2922146040Stjr return REG_ESPACE; 2923146040Stjr } 2924146040Stjr path->array = new_array; 2925146040Stjr memset (new_array + old_alloc, '\0', 2926146040Stjr sizeof (re_dfastate_t *) * (path->alloc - old_alloc)); 2927146040Stjr } 2928146040Stjr 2929250724Sjkim str_idx = path->next_idx ?: top_str; 2930146040Stjr 2931146040Stjr /* Temporary modify MCTX. */ 2932146040Stjr backup_state_log = mctx->state_log; 2933146040Stjr backup_cur_idx = mctx->input.cur_idx; 2934146040Stjr mctx->state_log = path->array; 2935146040Stjr mctx->input.cur_idx = str_idx; 2936146040Stjr 2937146040Stjr /* Setup initial node set. */ 2938146040Stjr context = re_string_context_at (&mctx->input, str_idx - 1, mctx->eflags); 2939146040Stjr if (str_idx == top_str) 2940146040Stjr { 2941146040Stjr err = re_node_set_init_1 (&next_nodes, top_node); 2942146040Stjr if (BE (err != REG_NOERROR, 0)) 2943146040Stjr return err; 2944146040Stjr err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, type); 2945146040Stjr if (BE (err != REG_NOERROR, 0)) 2946146040Stjr { 2947146040Stjr re_node_set_free (&next_nodes); 2948146040Stjr return err; 2949146040Stjr } 2950146040Stjr } 2951146040Stjr else 2952146040Stjr { 2953146040Stjr cur_state = mctx->state_log[str_idx]; 2954146040Stjr if (cur_state && cur_state->has_backref) 2955146040Stjr { 2956146040Stjr err = re_node_set_init_copy (&next_nodes, &cur_state->nodes); 2957250724Sjkim if (BE (err != REG_NOERROR, 0)) 2958146040Stjr return err; 2959146040Stjr } 2960146040Stjr else 2961146040Stjr re_node_set_init_empty (&next_nodes); 2962146040Stjr } 2963146040Stjr if (str_idx == top_str || (cur_state && cur_state->has_backref)) 2964146040Stjr { 2965146040Stjr if (next_nodes.nelem) 2966146040Stjr { 2967146040Stjr err = expand_bkref_cache (mctx, &next_nodes, str_idx, 2968146040Stjr subexp_num, type); 2969250724Sjkim if (BE (err != REG_NOERROR, 0)) 2970146040Stjr { 2971146040Stjr re_node_set_free (&next_nodes); 2972146040Stjr return err; 2973146040Stjr } 2974146040Stjr } 2975146040Stjr cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context); 2976146040Stjr if (BE (cur_state == NULL && err != REG_NOERROR, 0)) 2977146040Stjr { 2978146040Stjr re_node_set_free (&next_nodes); 2979146040Stjr return err; 2980146040Stjr } 2981146040Stjr mctx->state_log[str_idx] = cur_state; 2982146040Stjr } 2983146040Stjr 2984146040Stjr for (null_cnt = 0; str_idx < last_str && null_cnt <= mctx->max_mb_elem_len;) 2985146040Stjr { 2986146040Stjr re_node_set_empty (&next_nodes); 2987146040Stjr if (mctx->state_log[str_idx + 1]) 2988146040Stjr { 2989146040Stjr err = re_node_set_merge (&next_nodes, 2990146040Stjr &mctx->state_log[str_idx + 1]->nodes); 2991146040Stjr if (BE (err != REG_NOERROR, 0)) 2992146040Stjr { 2993146040Stjr re_node_set_free (&next_nodes); 2994146040Stjr return err; 2995146040Stjr } 2996146040Stjr } 2997146040Stjr if (cur_state) 2998146040Stjr { 2999146040Stjr err = check_arrival_add_next_nodes (mctx, str_idx, 3000250724Sjkim &cur_state->non_eps_nodes, 3001250724Sjkim &next_nodes); 3002146040Stjr if (BE (err != REG_NOERROR, 0)) 3003146040Stjr { 3004146040Stjr re_node_set_free (&next_nodes); 3005146040Stjr return err; 3006146040Stjr } 3007146040Stjr } 3008146040Stjr ++str_idx; 3009146040Stjr if (next_nodes.nelem) 3010146040Stjr { 3011146040Stjr err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, type); 3012146040Stjr if (BE (err != REG_NOERROR, 0)) 3013146040Stjr { 3014146040Stjr re_node_set_free (&next_nodes); 3015146040Stjr return err; 3016146040Stjr } 3017146040Stjr err = expand_bkref_cache (mctx, &next_nodes, str_idx, 3018146040Stjr subexp_num, type); 3019250724Sjkim if (BE (err != REG_NOERROR, 0)) 3020146040Stjr { 3021146040Stjr re_node_set_free (&next_nodes); 3022146040Stjr return err; 3023146040Stjr } 3024146040Stjr } 3025146040Stjr context = re_string_context_at (&mctx->input, str_idx - 1, mctx->eflags); 3026146040Stjr cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context); 3027146040Stjr if (BE (cur_state == NULL && err != REG_NOERROR, 0)) 3028146040Stjr { 3029146040Stjr re_node_set_free (&next_nodes); 3030146040Stjr return err; 3031146040Stjr } 3032146040Stjr mctx->state_log[str_idx] = cur_state; 3033146040Stjr null_cnt = cur_state == NULL ? null_cnt + 1 : 0; 3034146040Stjr } 3035146040Stjr re_node_set_free (&next_nodes); 3036146040Stjr cur_nodes = (mctx->state_log[last_str] == NULL ? NULL 3037146040Stjr : &mctx->state_log[last_str]->nodes); 3038146040Stjr path->next_idx = str_idx; 3039146040Stjr 3040146040Stjr /* Fix MCTX. */ 3041146040Stjr mctx->state_log = backup_state_log; 3042146040Stjr mctx->input.cur_idx = backup_cur_idx; 3043146040Stjr 3044146040Stjr /* Then check the current node set has the node LAST_NODE. */ 3045146040Stjr if (cur_nodes != NULL && re_node_set_contains (cur_nodes, last_node)) 3046146040Stjr return REG_NOERROR; 3047146040Stjr 3048146040Stjr return REG_NOMATCH; 3049146040Stjr} 3050146040Stjr 3051146040Stjr/* Helper functions for check_arrival. */ 3052146040Stjr 3053146040Stjr/* Calculate the destination nodes of CUR_NODES at STR_IDX, and append them 3054146040Stjr to NEXT_NODES. 3055146040Stjr TODO: This function is similar to the functions transit_state*(), 3056146040Stjr however this function has many additional works. 3057146040Stjr Can't we unify them? */ 3058146040Stjr 3059146040Stjrstatic reg_errcode_t 3060250724Sjkiminternal_function __attribute_warn_unused_result__ 3061250724Sjkimcheck_arrival_add_next_nodes (re_match_context_t *mctx, int str_idx, 3062250724Sjkim re_node_set *cur_nodes, re_node_set *next_nodes) 3063146040Stjr{ 3064250724Sjkim const re_dfa_t *const dfa = mctx->dfa; 3065146040Stjr int result; 3066146040Stjr int cur_idx; 3067250724Sjkim reg_errcode_t err = REG_NOERROR; 3068146040Stjr re_node_set union_set; 3069146040Stjr re_node_set_init_empty (&union_set); 3070146040Stjr for (cur_idx = 0; cur_idx < cur_nodes->nelem; ++cur_idx) 3071146040Stjr { 3072146040Stjr int naccepted = 0; 3073146040Stjr int cur_node = cur_nodes->elems[cur_idx]; 3074146040Stjr#ifdef DEBUG 3075146040Stjr re_token_type_t type = dfa->nodes[cur_node].type; 3076146040Stjr assert (!IS_EPSILON_NODE (type)); 3077146040Stjr#endif 3078146040Stjr#ifdef RE_ENABLE_I18N 3079146040Stjr /* If the node may accept `multi byte'. */ 3080146040Stjr if (dfa->nodes[cur_node].accept_mb) 3081146040Stjr { 3082146040Stjr naccepted = check_node_accept_bytes (dfa, cur_node, &mctx->input, 3083146040Stjr str_idx); 3084146040Stjr if (naccepted > 1) 3085146040Stjr { 3086146040Stjr re_dfastate_t *dest_state; 3087146040Stjr int next_node = dfa->nexts[cur_node]; 3088146040Stjr int next_idx = str_idx + naccepted; 3089146040Stjr dest_state = mctx->state_log[next_idx]; 3090146040Stjr re_node_set_empty (&union_set); 3091146040Stjr if (dest_state) 3092146040Stjr { 3093146040Stjr err = re_node_set_merge (&union_set, &dest_state->nodes); 3094146040Stjr if (BE (err != REG_NOERROR, 0)) 3095146040Stjr { 3096146040Stjr re_node_set_free (&union_set); 3097146040Stjr return err; 3098146040Stjr } 3099146040Stjr } 3100146040Stjr result = re_node_set_insert (&union_set, next_node); 3101146040Stjr if (BE (result < 0, 0)) 3102146040Stjr { 3103146040Stjr re_node_set_free (&union_set); 3104146040Stjr return REG_ESPACE; 3105146040Stjr } 3106146040Stjr mctx->state_log[next_idx] = re_acquire_state (&err, dfa, 3107146040Stjr &union_set); 3108146040Stjr if (BE (mctx->state_log[next_idx] == NULL 3109146040Stjr && err != REG_NOERROR, 0)) 3110146040Stjr { 3111146040Stjr re_node_set_free (&union_set); 3112146040Stjr return err; 3113146040Stjr } 3114146040Stjr } 3115146040Stjr } 3116146040Stjr#endif /* RE_ENABLE_I18N */ 3117146040Stjr if (naccepted 3118146040Stjr || check_node_accept (mctx, dfa->nodes + cur_node, str_idx)) 3119146040Stjr { 3120146040Stjr result = re_node_set_insert (next_nodes, dfa->nexts[cur_node]); 3121146040Stjr if (BE (result < 0, 0)) 3122146040Stjr { 3123146040Stjr re_node_set_free (&union_set); 3124146040Stjr return REG_ESPACE; 3125146040Stjr } 3126146040Stjr } 3127146040Stjr } 3128146040Stjr re_node_set_free (&union_set); 3129146040Stjr return REG_NOERROR; 3130146040Stjr} 3131146040Stjr 3132146040Stjr/* For all the nodes in CUR_NODES, add the epsilon closures of them to 3133146040Stjr CUR_NODES, however exclude the nodes which are: 3134146040Stjr - inside the sub expression whose number is EX_SUBEXP, if FL_OPEN. 3135146040Stjr - out of the sub expression whose number is EX_SUBEXP, if !FL_OPEN. 3136146040Stjr*/ 3137146040Stjr 3138146040Stjrstatic reg_errcode_t 3139250724Sjkiminternal_function 3140250724Sjkimcheck_arrival_expand_ecl (const re_dfa_t *dfa, re_node_set *cur_nodes, 3141250724Sjkim int ex_subexp, int type) 3142146040Stjr{ 3143146040Stjr reg_errcode_t err; 3144146040Stjr int idx, outside_node; 3145146040Stjr re_node_set new_nodes; 3146146040Stjr#ifdef DEBUG 3147146040Stjr assert (cur_nodes->nelem); 3148146040Stjr#endif 3149146040Stjr err = re_node_set_alloc (&new_nodes, cur_nodes->nelem); 3150146040Stjr if (BE (err != REG_NOERROR, 0)) 3151146040Stjr return err; 3152146040Stjr /* Create a new node set NEW_NODES with the nodes which are epsilon 3153146040Stjr closures of the node in CUR_NODES. */ 3154146040Stjr 3155146040Stjr for (idx = 0; idx < cur_nodes->nelem; ++idx) 3156146040Stjr { 3157146040Stjr int cur_node = cur_nodes->elems[idx]; 3158250724Sjkim const re_node_set *eclosure = dfa->eclosures + cur_node; 3159146040Stjr outside_node = find_subexp_node (dfa, eclosure, ex_subexp, type); 3160146040Stjr if (outside_node == -1) 3161146040Stjr { 3162146040Stjr /* There are no problematic nodes, just merge them. */ 3163146040Stjr err = re_node_set_merge (&new_nodes, eclosure); 3164146040Stjr if (BE (err != REG_NOERROR, 0)) 3165146040Stjr { 3166146040Stjr re_node_set_free (&new_nodes); 3167146040Stjr return err; 3168146040Stjr } 3169146040Stjr } 3170146040Stjr else 3171146040Stjr { 3172146040Stjr /* There are problematic nodes, re-calculate incrementally. */ 3173146040Stjr err = check_arrival_expand_ecl_sub (dfa, &new_nodes, cur_node, 3174146040Stjr ex_subexp, type); 3175146040Stjr if (BE (err != REG_NOERROR, 0)) 3176146040Stjr { 3177146040Stjr re_node_set_free (&new_nodes); 3178146040Stjr return err; 3179146040Stjr } 3180146040Stjr } 3181146040Stjr } 3182146040Stjr re_node_set_free (cur_nodes); 3183146040Stjr *cur_nodes = new_nodes; 3184146040Stjr return REG_NOERROR; 3185146040Stjr} 3186146040Stjr 3187146040Stjr/* Helper function for check_arrival_expand_ecl. 3188146040Stjr Check incrementally the epsilon closure of TARGET, and if it isn't 3189146040Stjr problematic append it to DST_NODES. */ 3190146040Stjr 3191146040Stjrstatic reg_errcode_t 3192250724Sjkiminternal_function __attribute_warn_unused_result__ 3193250724Sjkimcheck_arrival_expand_ecl_sub (const re_dfa_t *dfa, re_node_set *dst_nodes, 3194250724Sjkim int target, int ex_subexp, int type) 3195146040Stjr{ 3196146040Stjr int cur_node; 3197146040Stjr for (cur_node = target; !re_node_set_contains (dst_nodes, cur_node);) 3198146040Stjr { 3199146040Stjr int err; 3200146040Stjr 3201146040Stjr if (dfa->nodes[cur_node].type == type 3202146040Stjr && dfa->nodes[cur_node].opr.idx == ex_subexp) 3203146040Stjr { 3204146040Stjr if (type == OP_CLOSE_SUBEXP) 3205146040Stjr { 3206146040Stjr err = re_node_set_insert (dst_nodes, cur_node); 3207146040Stjr if (BE (err == -1, 0)) 3208146040Stjr return REG_ESPACE; 3209146040Stjr } 3210146040Stjr break; 3211146040Stjr } 3212146040Stjr err = re_node_set_insert (dst_nodes, cur_node); 3213146040Stjr if (BE (err == -1, 0)) 3214146040Stjr return REG_ESPACE; 3215146040Stjr if (dfa->edests[cur_node].nelem == 0) 3216146040Stjr break; 3217146040Stjr if (dfa->edests[cur_node].nelem == 2) 3218146040Stjr { 3219146040Stjr err = check_arrival_expand_ecl_sub (dfa, dst_nodes, 3220146040Stjr dfa->edests[cur_node].elems[1], 3221146040Stjr ex_subexp, type); 3222146040Stjr if (BE (err != REG_NOERROR, 0)) 3223146040Stjr return err; 3224146040Stjr } 3225146040Stjr cur_node = dfa->edests[cur_node].elems[0]; 3226146040Stjr } 3227146040Stjr return REG_NOERROR; 3228146040Stjr} 3229146040Stjr 3230146040Stjr 3231146040Stjr/* For all the back references in the current state, calculate the 3232146040Stjr destination of the back references by the appropriate entry 3233146040Stjr in MCTX->BKREF_ENTS. */ 3234146040Stjr 3235146040Stjrstatic reg_errcode_t 3236250724Sjkiminternal_function __attribute_warn_unused_result__ 3237250724Sjkimexpand_bkref_cache (re_match_context_t *mctx, re_node_set *cur_nodes, 3238250724Sjkim int cur_str, int subexp_num, int type) 3239146040Stjr{ 3240250724Sjkim const re_dfa_t *const dfa = mctx->dfa; 3241146040Stjr reg_errcode_t err; 3242146040Stjr int cache_idx_start = search_cur_bkref_entry (mctx, cur_str); 3243146040Stjr struct re_backref_cache_entry *ent; 3244146040Stjr 3245146040Stjr if (cache_idx_start == -1) 3246146040Stjr return REG_NOERROR; 3247146040Stjr 3248146040Stjr restart: 3249146040Stjr ent = mctx->bkref_ents + cache_idx_start; 3250146040Stjr do 3251146040Stjr { 3252146040Stjr int to_idx, next_node; 3253146040Stjr 3254146040Stjr /* Is this entry ENT is appropriate? */ 3255146040Stjr if (!re_node_set_contains (cur_nodes, ent->node)) 3256146040Stjr continue; /* No. */ 3257146040Stjr 3258146040Stjr to_idx = cur_str + ent->subexp_to - ent->subexp_from; 3259146040Stjr /* Calculate the destination of the back reference, and append it 3260146040Stjr to MCTX->STATE_LOG. */ 3261146040Stjr if (to_idx == cur_str) 3262146040Stjr { 3263146040Stjr /* The backreference did epsilon transit, we must re-check all the 3264146040Stjr node in the current state. */ 3265146040Stjr re_node_set new_dests; 3266146040Stjr reg_errcode_t err2, err3; 3267146040Stjr next_node = dfa->edests[ent->node].elems[0]; 3268146040Stjr if (re_node_set_contains (cur_nodes, next_node)) 3269146040Stjr continue; 3270146040Stjr err = re_node_set_init_1 (&new_dests, next_node); 3271146040Stjr err2 = check_arrival_expand_ecl (dfa, &new_dests, subexp_num, type); 3272146040Stjr err3 = re_node_set_merge (cur_nodes, &new_dests); 3273146040Stjr re_node_set_free (&new_dests); 3274146040Stjr if (BE (err != REG_NOERROR || err2 != REG_NOERROR 3275146040Stjr || err3 != REG_NOERROR, 0)) 3276146040Stjr { 3277146040Stjr err = (err != REG_NOERROR ? err 3278146040Stjr : (err2 != REG_NOERROR ? err2 : err3)); 3279146040Stjr return err; 3280146040Stjr } 3281146040Stjr /* TODO: It is still inefficient... */ 3282146040Stjr goto restart; 3283146040Stjr } 3284146040Stjr else 3285146040Stjr { 3286146040Stjr re_node_set union_set; 3287146040Stjr next_node = dfa->nexts[ent->node]; 3288146040Stjr if (mctx->state_log[to_idx]) 3289146040Stjr { 3290146040Stjr int ret; 3291146040Stjr if (re_node_set_contains (&mctx->state_log[to_idx]->nodes, 3292146040Stjr next_node)) 3293146040Stjr continue; 3294146040Stjr err = re_node_set_init_copy (&union_set, 3295146040Stjr &mctx->state_log[to_idx]->nodes); 3296146040Stjr ret = re_node_set_insert (&union_set, next_node); 3297146040Stjr if (BE (err != REG_NOERROR || ret < 0, 0)) 3298146040Stjr { 3299146040Stjr re_node_set_free (&union_set); 3300146040Stjr err = err != REG_NOERROR ? err : REG_ESPACE; 3301146040Stjr return err; 3302146040Stjr } 3303146040Stjr } 3304146040Stjr else 3305146040Stjr { 3306146040Stjr err = re_node_set_init_1 (&union_set, next_node); 3307146040Stjr if (BE (err != REG_NOERROR, 0)) 3308146040Stjr return err; 3309146040Stjr } 3310146040Stjr mctx->state_log[to_idx] = re_acquire_state (&err, dfa, &union_set); 3311146040Stjr re_node_set_free (&union_set); 3312146040Stjr if (BE (mctx->state_log[to_idx] == NULL 3313146040Stjr && err != REG_NOERROR, 0)) 3314146040Stjr return err; 3315146040Stjr } 3316146040Stjr } 3317146040Stjr while (ent++->more); 3318146040Stjr return REG_NOERROR; 3319146040Stjr} 3320146040Stjr 3321146040Stjr/* Build transition table for the state. 3322146040Stjr Return 1 if succeeded, otherwise return NULL. */ 3323146040Stjr 3324146040Stjrstatic int 3325250724Sjkiminternal_function 3326250724Sjkimbuild_trtable (const re_dfa_t *dfa, re_dfastate_t *state) 3327146040Stjr{ 3328146040Stjr reg_errcode_t err; 3329146040Stjr int i, j, ch, need_word_trtable = 0; 3330250724Sjkim bitset_word_t elem, mask; 3331250724Sjkim bool dests_node_malloced = false; 3332250724Sjkim bool dest_states_malloced = false; 3333146040Stjr int ndests; /* Number of the destination states from `state'. */ 3334146040Stjr re_dfastate_t **trtable; 3335146040Stjr re_dfastate_t **dest_states = NULL, **dest_states_word, **dest_states_nl; 3336146040Stjr re_node_set follows, *dests_node; 3337250724Sjkim bitset_t *dests_ch; 3338250724Sjkim bitset_t acceptable; 3339146040Stjr 3340250724Sjkim struct dests_alloc 3341250724Sjkim { 3342250724Sjkim re_node_set dests_node[SBC_MAX]; 3343250724Sjkim bitset_t dests_ch[SBC_MAX]; 3344250724Sjkim } *dests_alloc; 3345250724Sjkim 3346146040Stjr /* We build DFA states which corresponds to the destination nodes 3347146040Stjr from `state'. `dests_node[i]' represents the nodes which i-th 3348146040Stjr destination state contains, and `dests_ch[i]' represents the 3349146040Stjr characters which i-th destination state accepts. */ 3350250724Sjkim if (__libc_use_alloca (sizeof (struct dests_alloc))) 3351250724Sjkim dests_alloc = (struct dests_alloc *) alloca (sizeof (struct dests_alloc)); 3352146040Stjr else 3353146040Stjr { 3354250724Sjkim dests_alloc = re_malloc (struct dests_alloc, 1); 3355250724Sjkim if (BE (dests_alloc == NULL, 0)) 3356146040Stjr return 0; 3357250724Sjkim dests_node_malloced = true; 3358146040Stjr } 3359250724Sjkim dests_node = dests_alloc->dests_node; 3360250724Sjkim dests_ch = dests_alloc->dests_ch; 3361146040Stjr 3362146040Stjr /* Initialize transiton table. */ 3363146040Stjr state->word_trtable = state->trtable = NULL; 3364146040Stjr 3365146040Stjr /* At first, group all nodes belonging to `state' into several 3366146040Stjr destinations. */ 3367146040Stjr ndests = group_nodes_into_DFAstates (dfa, state, dests_node, dests_ch); 3368146040Stjr if (BE (ndests <= 0, 0)) 3369146040Stjr { 3370146040Stjr if (dests_node_malloced) 3371250724Sjkim free (dests_alloc); 3372146040Stjr /* Return 0 in case of an error, 1 otherwise. */ 3373146040Stjr if (ndests == 0) 3374146040Stjr { 3375146040Stjr state->trtable = (re_dfastate_t **) 3376146040Stjr calloc (sizeof (re_dfastate_t *), SBC_MAX); 3377250724Sjkim if (BE (state->trtable == NULL, 0)) 3378250724Sjkim return 0; 3379146040Stjr return 1; 3380146040Stjr } 3381146040Stjr return 0; 3382146040Stjr } 3383146040Stjr 3384146040Stjr err = re_node_set_alloc (&follows, ndests + 1); 3385146040Stjr if (BE (err != REG_NOERROR, 0)) 3386146040Stjr goto out_free; 3387146040Stjr 3388250724Sjkim /* Avoid arithmetic overflow in size calculation. */ 3389250724Sjkim if (BE ((((SIZE_MAX - (sizeof (re_node_set) + sizeof (bitset_t)) * SBC_MAX) 3390250724Sjkim / (3 * sizeof (re_dfastate_t *))) 3391250724Sjkim < ndests), 3392250724Sjkim 0)) 3393250724Sjkim goto out_free; 3394250724Sjkim 3395250724Sjkim if (__libc_use_alloca ((sizeof (re_node_set) + sizeof (bitset_t)) * SBC_MAX 3396146040Stjr + ndests * 3 * sizeof (re_dfastate_t *))) 3397146040Stjr dest_states = (re_dfastate_t **) 3398146040Stjr alloca (ndests * 3 * sizeof (re_dfastate_t *)); 3399146040Stjr else 3400146040Stjr { 3401146040Stjr dest_states = (re_dfastate_t **) 3402146040Stjr malloc (ndests * 3 * sizeof (re_dfastate_t *)); 3403146040Stjr if (BE (dest_states == NULL, 0)) 3404146040Stjr { 3405146040Stjrout_free: 3406146040Stjr if (dest_states_malloced) 3407146040Stjr free (dest_states); 3408146040Stjr re_node_set_free (&follows); 3409146040Stjr for (i = 0; i < ndests; ++i) 3410146040Stjr re_node_set_free (dests_node + i); 3411146040Stjr if (dests_node_malloced) 3412250724Sjkim free (dests_alloc); 3413146040Stjr return 0; 3414146040Stjr } 3415250724Sjkim dest_states_malloced = true; 3416146040Stjr } 3417146040Stjr dest_states_word = dest_states + ndests; 3418146040Stjr dest_states_nl = dest_states_word + ndests; 3419146040Stjr bitset_empty (acceptable); 3420146040Stjr 3421146040Stjr /* Then build the states for all destinations. */ 3422146040Stjr for (i = 0; i < ndests; ++i) 3423146040Stjr { 3424146040Stjr int next_node; 3425146040Stjr re_node_set_empty (&follows); 3426146040Stjr /* Merge the follows of this destination states. */ 3427146040Stjr for (j = 0; j < dests_node[i].nelem; ++j) 3428146040Stjr { 3429146040Stjr next_node = dfa->nexts[dests_node[i].elems[j]]; 3430146040Stjr if (next_node != -1) 3431146040Stjr { 3432146040Stjr err = re_node_set_merge (&follows, dfa->eclosures + next_node); 3433146040Stjr if (BE (err != REG_NOERROR, 0)) 3434146040Stjr goto out_free; 3435146040Stjr } 3436146040Stjr } 3437146040Stjr dest_states[i] = re_acquire_state_context (&err, dfa, &follows, 0); 3438146040Stjr if (BE (dest_states[i] == NULL && err != REG_NOERROR, 0)) 3439146040Stjr goto out_free; 3440146040Stjr /* If the new state has context constraint, 3441146040Stjr build appropriate states for these contexts. */ 3442146040Stjr if (dest_states[i]->has_constraint) 3443146040Stjr { 3444146040Stjr dest_states_word[i] = re_acquire_state_context (&err, dfa, &follows, 3445146040Stjr CONTEXT_WORD); 3446146040Stjr if (BE (dest_states_word[i] == NULL && err != REG_NOERROR, 0)) 3447146040Stjr goto out_free; 3448146040Stjr 3449146040Stjr if (dest_states[i] != dest_states_word[i] && dfa->mb_cur_max > 1) 3450146040Stjr need_word_trtable = 1; 3451146040Stjr 3452146040Stjr dest_states_nl[i] = re_acquire_state_context (&err, dfa, &follows, 3453146040Stjr CONTEXT_NEWLINE); 3454146040Stjr if (BE (dest_states_nl[i] == NULL && err != REG_NOERROR, 0)) 3455146040Stjr goto out_free; 3456146040Stjr } 3457146040Stjr else 3458146040Stjr { 3459146040Stjr dest_states_word[i] = dest_states[i]; 3460146040Stjr dest_states_nl[i] = dest_states[i]; 3461146040Stjr } 3462146040Stjr bitset_merge (acceptable, dests_ch[i]); 3463146040Stjr } 3464146040Stjr 3465146040Stjr if (!BE (need_word_trtable, 0)) 3466146040Stjr { 3467146040Stjr /* We don't care about whether the following character is a word 3468146040Stjr character, or we are in a single-byte character set so we can 3469146040Stjr discern by looking at the character code: allocate a 3470146040Stjr 256-entry transition table. */ 3471146040Stjr trtable = state->trtable = 3472146040Stjr (re_dfastate_t **) calloc (sizeof (re_dfastate_t *), SBC_MAX); 3473146040Stjr if (BE (trtable == NULL, 0)) 3474146040Stjr goto out_free; 3475146040Stjr 3476146040Stjr /* For all characters ch...: */ 3477250724Sjkim for (i = 0; i < BITSET_WORDS; ++i) 3478250724Sjkim for (ch = i * BITSET_WORD_BITS, elem = acceptable[i], mask = 1; 3479146040Stjr elem; 3480146040Stjr mask <<= 1, elem >>= 1, ++ch) 3481146040Stjr if (BE (elem & 1, 0)) 3482146040Stjr { 3483146040Stjr /* There must be exactly one destination which accepts 3484146040Stjr character ch. See group_nodes_into_DFAstates. */ 3485146040Stjr for (j = 0; (dests_ch[j][i] & mask) == 0; ++j) 3486146040Stjr ; 3487146040Stjr 3488146040Stjr /* j-th destination accepts the word character ch. */ 3489146040Stjr if (dfa->word_char[i] & mask) 3490146040Stjr trtable[ch] = dest_states_word[j]; 3491146040Stjr else 3492146040Stjr trtable[ch] = dest_states[j]; 3493146040Stjr } 3494146040Stjr } 3495146040Stjr else 3496146040Stjr { 3497146040Stjr /* We care about whether the following character is a word 3498146040Stjr character, and we are in a multi-byte character set: discern 3499146040Stjr by looking at the character code: build two 256-entry 3500146040Stjr transition tables, one starting at trtable[0] and one 3501146040Stjr starting at trtable[SBC_MAX]. */ 3502146040Stjr trtable = state->word_trtable = 3503146040Stjr (re_dfastate_t **) calloc (sizeof (re_dfastate_t *), 2 * SBC_MAX); 3504146040Stjr if (BE (trtable == NULL, 0)) 3505146040Stjr goto out_free; 3506146040Stjr 3507146040Stjr /* For all characters ch...: */ 3508250724Sjkim for (i = 0; i < BITSET_WORDS; ++i) 3509250724Sjkim for (ch = i * BITSET_WORD_BITS, elem = acceptable[i], mask = 1; 3510146040Stjr elem; 3511146040Stjr mask <<= 1, elem >>= 1, ++ch) 3512146040Stjr if (BE (elem & 1, 0)) 3513146040Stjr { 3514146040Stjr /* There must be exactly one destination which accepts 3515146040Stjr character ch. See group_nodes_into_DFAstates. */ 3516146040Stjr for (j = 0; (dests_ch[j][i] & mask) == 0; ++j) 3517146040Stjr ; 3518146040Stjr 3519146040Stjr /* j-th destination accepts the word character ch. */ 3520146040Stjr trtable[ch] = dest_states[j]; 3521146040Stjr trtable[ch + SBC_MAX] = dest_states_word[j]; 3522146040Stjr } 3523146040Stjr } 3524146040Stjr 3525146040Stjr /* new line */ 3526146040Stjr if (bitset_contain (acceptable, NEWLINE_CHAR)) 3527146040Stjr { 3528146040Stjr /* The current state accepts newline character. */ 3529146040Stjr for (j = 0; j < ndests; ++j) 3530146040Stjr if (bitset_contain (dests_ch[j], NEWLINE_CHAR)) 3531146040Stjr { 3532146040Stjr /* k-th destination accepts newline character. */ 3533146040Stjr trtable[NEWLINE_CHAR] = dest_states_nl[j]; 3534146040Stjr if (need_word_trtable) 3535146040Stjr trtable[NEWLINE_CHAR + SBC_MAX] = dest_states_nl[j]; 3536146040Stjr /* There must be only one destination which accepts 3537146040Stjr newline. See group_nodes_into_DFAstates. */ 3538146040Stjr break; 3539146040Stjr } 3540146040Stjr } 3541146040Stjr 3542146040Stjr if (dest_states_malloced) 3543146040Stjr free (dest_states); 3544146040Stjr 3545146040Stjr re_node_set_free (&follows); 3546146040Stjr for (i = 0; i < ndests; ++i) 3547146040Stjr re_node_set_free (dests_node + i); 3548146040Stjr 3549146040Stjr if (dests_node_malloced) 3550250724Sjkim free (dests_alloc); 3551146040Stjr 3552146040Stjr return 1; 3553146040Stjr} 3554146040Stjr 3555146040Stjr/* Group all nodes belonging to STATE into several destinations. 3556146040Stjr Then for all destinations, set the nodes belonging to the destination 3557146040Stjr to DESTS_NODE[i] and set the characters accepted by the destination 3558146040Stjr to DEST_CH[i]. This function return the number of destinations. */ 3559146040Stjr 3560146040Stjrstatic int 3561250724Sjkiminternal_function 3562250724Sjkimgroup_nodes_into_DFAstates (const re_dfa_t *dfa, const re_dfastate_t *state, 3563250724Sjkim re_node_set *dests_node, bitset_t *dests_ch) 3564146040Stjr{ 3565146040Stjr reg_errcode_t err; 3566146040Stjr int result; 3567146040Stjr int i, j, k; 3568146040Stjr int ndests; /* Number of the destinations from `state'. */ 3569250724Sjkim bitset_t accepts; /* Characters a node can accept. */ 3570146040Stjr const re_node_set *cur_nodes = &state->nodes; 3571146040Stjr bitset_empty (accepts); 3572146040Stjr ndests = 0; 3573146040Stjr 3574146040Stjr /* For all the nodes belonging to `state', */ 3575146040Stjr for (i = 0; i < cur_nodes->nelem; ++i) 3576146040Stjr { 3577146040Stjr re_token_t *node = &dfa->nodes[cur_nodes->elems[i]]; 3578146040Stjr re_token_type_t type = node->type; 3579146040Stjr unsigned int constraint = node->constraint; 3580146040Stjr 3581146040Stjr /* Enumerate all single byte character this node can accept. */ 3582146040Stjr if (type == CHARACTER) 3583146040Stjr bitset_set (accepts, node->opr.c); 3584146040Stjr else if (type == SIMPLE_BRACKET) 3585146040Stjr { 3586146040Stjr bitset_merge (accepts, node->opr.sbcset); 3587146040Stjr } 3588146040Stjr else if (type == OP_PERIOD) 3589146040Stjr { 3590146040Stjr#ifdef RE_ENABLE_I18N 3591146040Stjr if (dfa->mb_cur_max > 1) 3592146040Stjr bitset_merge (accepts, dfa->sb_char); 3593146040Stjr else 3594146040Stjr#endif 3595146040Stjr bitset_set_all (accepts); 3596146040Stjr if (!(dfa->syntax & RE_DOT_NEWLINE)) 3597146040Stjr bitset_clear (accepts, '\n'); 3598146040Stjr if (dfa->syntax & RE_DOT_NOT_NULL) 3599146040Stjr bitset_clear (accepts, '\0'); 3600146040Stjr } 3601146040Stjr#ifdef RE_ENABLE_I18N 3602146040Stjr else if (type == OP_UTF8_PERIOD) 3603250724Sjkim { 3604250724Sjkim memset (accepts, '\xff', sizeof (bitset_t) / 2); 3605146040Stjr if (!(dfa->syntax & RE_DOT_NEWLINE)) 3606146040Stjr bitset_clear (accepts, '\n'); 3607146040Stjr if (dfa->syntax & RE_DOT_NOT_NULL) 3608146040Stjr bitset_clear (accepts, '\0'); 3609250724Sjkim } 3610146040Stjr#endif 3611146040Stjr else 3612146040Stjr continue; 3613146040Stjr 3614146040Stjr /* Check the `accepts' and sift the characters which are not 3615146040Stjr match it the context. */ 3616146040Stjr if (constraint) 3617146040Stjr { 3618146040Stjr if (constraint & NEXT_NEWLINE_CONSTRAINT) 3619146040Stjr { 3620250724Sjkim bool accepts_newline = bitset_contain (accepts, NEWLINE_CHAR); 3621146040Stjr bitset_empty (accepts); 3622146040Stjr if (accepts_newline) 3623146040Stjr bitset_set (accepts, NEWLINE_CHAR); 3624146040Stjr else 3625146040Stjr continue; 3626146040Stjr } 3627146040Stjr if (constraint & NEXT_ENDBUF_CONSTRAINT) 3628146040Stjr { 3629146040Stjr bitset_empty (accepts); 3630146040Stjr continue; 3631146040Stjr } 3632146040Stjr 3633146040Stjr if (constraint & NEXT_WORD_CONSTRAINT) 3634146040Stjr { 3635250724Sjkim bitset_word_t any_set = 0; 3636146040Stjr if (type == CHARACTER && !node->word_char) 3637146040Stjr { 3638146040Stjr bitset_empty (accepts); 3639146040Stjr continue; 3640146040Stjr } 3641146040Stjr#ifdef RE_ENABLE_I18N 3642146040Stjr if (dfa->mb_cur_max > 1) 3643250724Sjkim for (j = 0; j < BITSET_WORDS; ++j) 3644146040Stjr any_set |= (accepts[j] &= (dfa->word_char[j] | ~dfa->sb_char[j])); 3645146040Stjr else 3646146040Stjr#endif 3647250724Sjkim for (j = 0; j < BITSET_WORDS; ++j) 3648146040Stjr any_set |= (accepts[j] &= dfa->word_char[j]); 3649146040Stjr if (!any_set) 3650146040Stjr continue; 3651146040Stjr } 3652146040Stjr if (constraint & NEXT_NOTWORD_CONSTRAINT) 3653146040Stjr { 3654250724Sjkim bitset_word_t any_set = 0; 3655146040Stjr if (type == CHARACTER && node->word_char) 3656146040Stjr { 3657146040Stjr bitset_empty (accepts); 3658146040Stjr continue; 3659146040Stjr } 3660146040Stjr#ifdef RE_ENABLE_I18N 3661146040Stjr if (dfa->mb_cur_max > 1) 3662250724Sjkim for (j = 0; j < BITSET_WORDS; ++j) 3663146040Stjr any_set |= (accepts[j] &= ~(dfa->word_char[j] & dfa->sb_char[j])); 3664146040Stjr else 3665146040Stjr#endif 3666250724Sjkim for (j = 0; j < BITSET_WORDS; ++j) 3667146040Stjr any_set |= (accepts[j] &= ~dfa->word_char[j]); 3668146040Stjr if (!any_set) 3669146040Stjr continue; 3670146040Stjr } 3671146040Stjr } 3672146040Stjr 3673146040Stjr /* Then divide `accepts' into DFA states, or create a new 3674146040Stjr state. Above, we make sure that accepts is not empty. */ 3675146040Stjr for (j = 0; j < ndests; ++j) 3676146040Stjr { 3677250724Sjkim bitset_t intersec; /* Intersection sets, see below. */ 3678250724Sjkim bitset_t remains; 3679146040Stjr /* Flags, see below. */ 3680250724Sjkim bitset_word_t has_intersec, not_subset, not_consumed; 3681146040Stjr 3682146040Stjr /* Optimization, skip if this state doesn't accept the character. */ 3683146040Stjr if (type == CHARACTER && !bitset_contain (dests_ch[j], node->opr.c)) 3684146040Stjr continue; 3685146040Stjr 3686146040Stjr /* Enumerate the intersection set of this state and `accepts'. */ 3687146040Stjr has_intersec = 0; 3688250724Sjkim for (k = 0; k < BITSET_WORDS; ++k) 3689146040Stjr has_intersec |= intersec[k] = accepts[k] & dests_ch[j][k]; 3690146040Stjr /* And skip if the intersection set is empty. */ 3691146040Stjr if (!has_intersec) 3692146040Stjr continue; 3693146040Stjr 3694146040Stjr /* Then check if this state is a subset of `accepts'. */ 3695146040Stjr not_subset = not_consumed = 0; 3696250724Sjkim for (k = 0; k < BITSET_WORDS; ++k) 3697146040Stjr { 3698146040Stjr not_subset |= remains[k] = ~accepts[k] & dests_ch[j][k]; 3699146040Stjr not_consumed |= accepts[k] = accepts[k] & ~dests_ch[j][k]; 3700146040Stjr } 3701146040Stjr 3702146040Stjr /* If this state isn't a subset of `accepts', create a 3703146040Stjr new group state, which has the `remains'. */ 3704146040Stjr if (not_subset) 3705146040Stjr { 3706146040Stjr bitset_copy (dests_ch[ndests], remains); 3707146040Stjr bitset_copy (dests_ch[j], intersec); 3708146040Stjr err = re_node_set_init_copy (dests_node + ndests, &dests_node[j]); 3709146040Stjr if (BE (err != REG_NOERROR, 0)) 3710146040Stjr goto error_return; 3711146040Stjr ++ndests; 3712146040Stjr } 3713146040Stjr 3714146040Stjr /* Put the position in the current group. */ 3715146040Stjr result = re_node_set_insert (&dests_node[j], cur_nodes->elems[i]); 3716146040Stjr if (BE (result < 0, 0)) 3717146040Stjr goto error_return; 3718146040Stjr 3719146040Stjr /* If all characters are consumed, go to next node. */ 3720146040Stjr if (!not_consumed) 3721146040Stjr break; 3722146040Stjr } 3723146040Stjr /* Some characters remain, create a new group. */ 3724146040Stjr if (j == ndests) 3725146040Stjr { 3726146040Stjr bitset_copy (dests_ch[ndests], accepts); 3727146040Stjr err = re_node_set_init_1 (dests_node + ndests, cur_nodes->elems[i]); 3728146040Stjr if (BE (err != REG_NOERROR, 0)) 3729146040Stjr goto error_return; 3730146040Stjr ++ndests; 3731146040Stjr bitset_empty (accepts); 3732146040Stjr } 3733146040Stjr } 3734146040Stjr return ndests; 3735146040Stjr error_return: 3736146040Stjr for (j = 0; j < ndests; ++j) 3737146040Stjr re_node_set_free (dests_node + j); 3738146040Stjr return -1; 3739146040Stjr} 3740146040Stjr 3741146040Stjr#ifdef RE_ENABLE_I18N 3742146040Stjr/* Check how many bytes the node `dfa->nodes[node_idx]' accepts. 3743146040Stjr Return the number of the bytes the node accepts. 3744146040Stjr STR_IDX is the current index of the input string. 3745146040Stjr 3746146040Stjr This function handles the nodes which can accept one character, or 3747146040Stjr one collating element like '.', '[a-z]', opposite to the other nodes 3748146040Stjr can only accept one byte. */ 3749146040Stjr 3750146040Stjrstatic int 3751250724Sjkiminternal_function 3752250724Sjkimcheck_node_accept_bytes (const re_dfa_t *dfa, int node_idx, 3753250724Sjkim const re_string_t *input, int str_idx) 3754146040Stjr{ 3755146040Stjr const re_token_t *node = dfa->nodes + node_idx; 3756146040Stjr int char_len, elem_len; 3757146040Stjr int i; 3758146040Stjr 3759146040Stjr if (BE (node->type == OP_UTF8_PERIOD, 0)) 3760146040Stjr { 3761146040Stjr unsigned char c = re_string_byte_at (input, str_idx), d; 3762146040Stjr if (BE (c < 0xc2, 1)) 3763146040Stjr return 0; 3764146040Stjr 3765146040Stjr if (str_idx + 2 > input->len) 3766146040Stjr return 0; 3767146040Stjr 3768146040Stjr d = re_string_byte_at (input, str_idx + 1); 3769146040Stjr if (c < 0xe0) 3770146040Stjr return (d < 0x80 || d > 0xbf) ? 0 : 2; 3771146040Stjr else if (c < 0xf0) 3772146040Stjr { 3773146040Stjr char_len = 3; 3774146040Stjr if (c == 0xe0 && d < 0xa0) 3775146040Stjr return 0; 3776146040Stjr } 3777146040Stjr else if (c < 0xf8) 3778146040Stjr { 3779146040Stjr char_len = 4; 3780146040Stjr if (c == 0xf0 && d < 0x90) 3781146040Stjr return 0; 3782146040Stjr } 3783146040Stjr else if (c < 0xfc) 3784146040Stjr { 3785146040Stjr char_len = 5; 3786146040Stjr if (c == 0xf8 && d < 0x88) 3787146040Stjr return 0; 3788146040Stjr } 3789146040Stjr else if (c < 0xfe) 3790146040Stjr { 3791146040Stjr char_len = 6; 3792146040Stjr if (c == 0xfc && d < 0x84) 3793146040Stjr return 0; 3794146040Stjr } 3795146040Stjr else 3796146040Stjr return 0; 3797146040Stjr 3798146040Stjr if (str_idx + char_len > input->len) 3799146040Stjr return 0; 3800146040Stjr 3801146040Stjr for (i = 1; i < char_len; ++i) 3802146040Stjr { 3803146040Stjr d = re_string_byte_at (input, str_idx + i); 3804146040Stjr if (d < 0x80 || d > 0xbf) 3805146040Stjr return 0; 3806146040Stjr } 3807146040Stjr return char_len; 3808146040Stjr } 3809146040Stjr 3810146040Stjr char_len = re_string_char_size_at (input, str_idx); 3811146040Stjr if (node->type == OP_PERIOD) 3812146040Stjr { 3813146040Stjr if (char_len <= 1) 3814250724Sjkim return 0; 3815146040Stjr /* FIXME: I don't think this if is needed, as both '\n' 3816146040Stjr and '\0' are char_len == 1. */ 3817146040Stjr /* '.' accepts any one character except the following two cases. */ 3818146040Stjr if ((!(dfa->syntax & RE_DOT_NEWLINE) && 3819146040Stjr re_string_byte_at (input, str_idx) == '\n') || 3820146040Stjr ((dfa->syntax & RE_DOT_NOT_NULL) && 3821146040Stjr re_string_byte_at (input, str_idx) == '\0')) 3822146040Stjr return 0; 3823146040Stjr return char_len; 3824146040Stjr } 3825146040Stjr 3826146040Stjr elem_len = re_string_elem_size_at (input, str_idx); 3827146040Stjr if ((elem_len <= 1 && char_len <= 1) || char_len == 0) 3828146040Stjr return 0; 3829146040Stjr 3830146040Stjr if (node->type == COMPLEX_BRACKET) 3831146040Stjr { 3832146040Stjr const re_charset_t *cset = node->opr.mbcset; 3833146040Stjr# ifdef _LIBC 3834146040Stjr const unsigned char *pin 3835146040Stjr = ((const unsigned char *) re_string_get_buffer (input) + str_idx); 3836146040Stjr int j; 3837146040Stjr uint32_t nrules; 3838146040Stjr# endif /* _LIBC */ 3839146040Stjr int match_len = 0; 3840146040Stjr wchar_t wc = ((cset->nranges || cset->nchar_classes || cset->nmbchars) 3841146040Stjr ? re_string_wchar_at (input, str_idx) : 0); 3842146040Stjr 3843146040Stjr /* match with multibyte character? */ 3844146040Stjr for (i = 0; i < cset->nmbchars; ++i) 3845146040Stjr if (wc == cset->mbchars[i]) 3846146040Stjr { 3847146040Stjr match_len = char_len; 3848146040Stjr goto check_node_accept_bytes_match; 3849146040Stjr } 3850146040Stjr /* match with character_class? */ 3851146040Stjr for (i = 0; i < cset->nchar_classes; ++i) 3852146040Stjr { 3853146040Stjr wctype_t wt = cset->char_classes[i]; 3854146040Stjr if (__iswctype (wc, wt)) 3855146040Stjr { 3856146040Stjr match_len = char_len; 3857146040Stjr goto check_node_accept_bytes_match; 3858146040Stjr } 3859146040Stjr } 3860146040Stjr 3861146040Stjr# ifdef _LIBC 3862146040Stjr nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES); 3863146040Stjr if (nrules != 0) 3864146040Stjr { 3865146040Stjr unsigned int in_collseq = 0; 3866146040Stjr const int32_t *table, *indirect; 3867146040Stjr const unsigned char *weights, *extra; 3868146040Stjr const char *collseqwc; 3869146040Stjr /* This #include defines a local function! */ 3870146040Stjr# include <locale/weight.h> 3871146040Stjr 3872146040Stjr /* match with collating_symbol? */ 3873146040Stjr if (cset->ncoll_syms) 3874146040Stjr extra = (const unsigned char *) 3875146040Stjr _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB); 3876146040Stjr for (i = 0; i < cset->ncoll_syms; ++i) 3877146040Stjr { 3878146040Stjr const unsigned char *coll_sym = extra + cset->coll_syms[i]; 3879146040Stjr /* Compare the length of input collating element and 3880146040Stjr the length of current collating element. */ 3881146040Stjr if (*coll_sym != elem_len) 3882146040Stjr continue; 3883146040Stjr /* Compare each bytes. */ 3884146040Stjr for (j = 0; j < *coll_sym; j++) 3885146040Stjr if (pin[j] != coll_sym[1 + j]) 3886146040Stjr break; 3887146040Stjr if (j == *coll_sym) 3888146040Stjr { 3889146040Stjr /* Match if every bytes is equal. */ 3890146040Stjr match_len = j; 3891146040Stjr goto check_node_accept_bytes_match; 3892146040Stjr } 3893146040Stjr } 3894146040Stjr 3895146040Stjr if (cset->nranges) 3896146040Stjr { 3897146040Stjr if (elem_len <= char_len) 3898146040Stjr { 3899146040Stjr collseqwc = _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQWC); 3900146040Stjr in_collseq = __collseq_table_lookup (collseqwc, wc); 3901146040Stjr } 3902146040Stjr else 3903146040Stjr in_collseq = find_collation_sequence_value (pin, elem_len); 3904146040Stjr } 3905146040Stjr /* match with range expression? */ 3906146040Stjr for (i = 0; i < cset->nranges; ++i) 3907146040Stjr if (cset->range_starts[i] <= in_collseq 3908146040Stjr && in_collseq <= cset->range_ends[i]) 3909146040Stjr { 3910146040Stjr match_len = elem_len; 3911146040Stjr goto check_node_accept_bytes_match; 3912146040Stjr } 3913146040Stjr 3914146040Stjr /* match with equivalence_class? */ 3915146040Stjr if (cset->nequiv_classes) 3916146040Stjr { 3917146040Stjr const unsigned char *cp = pin; 3918146040Stjr table = (const int32_t *) 3919146040Stjr _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB); 3920146040Stjr weights = (const unsigned char *) 3921146040Stjr _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB); 3922146040Stjr extra = (const unsigned char *) 3923146040Stjr _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB); 3924146040Stjr indirect = (const int32_t *) 3925146040Stjr _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB); 3926250724Sjkim int32_t idx = findidx (&cp, elem_len); 3927146040Stjr if (idx > 0) 3928146040Stjr for (i = 0; i < cset->nequiv_classes; ++i) 3929146040Stjr { 3930146040Stjr int32_t equiv_class_idx = cset->equiv_classes[i]; 3931250724Sjkim size_t weight_len = weights[idx & 0xffffff]; 3932250724Sjkim if (weight_len == weights[equiv_class_idx & 0xffffff] 3933250724Sjkim && (idx >> 24) == (equiv_class_idx >> 24)) 3934146040Stjr { 3935146040Stjr int cnt = 0; 3936250724Sjkim 3937250724Sjkim idx &= 0xffffff; 3938250724Sjkim equiv_class_idx &= 0xffffff; 3939250724Sjkim 3940146040Stjr while (cnt <= weight_len 3941146040Stjr && (weights[equiv_class_idx + 1 + cnt] 3942146040Stjr == weights[idx + 1 + cnt])) 3943146040Stjr ++cnt; 3944146040Stjr if (cnt > weight_len) 3945146040Stjr { 3946146040Stjr match_len = elem_len; 3947146040Stjr goto check_node_accept_bytes_match; 3948146040Stjr } 3949146040Stjr } 3950146040Stjr } 3951146040Stjr } 3952146040Stjr } 3953146040Stjr else 3954146040Stjr# endif /* _LIBC */ 3955146040Stjr { 3956146040Stjr /* match with range expression? */ 3957146040Stjr#if __GNUC__ >= 2 3958146040Stjr wchar_t cmp_buf[] = {L'\0', L'\0', wc, L'\0', L'\0', L'\0'}; 3959146040Stjr#else 3960146040Stjr wchar_t cmp_buf[] = {L'\0', L'\0', L'\0', L'\0', L'\0', L'\0'}; 3961146040Stjr cmp_buf[2] = wc; 3962146040Stjr#endif 3963146040Stjr for (i = 0; i < cset->nranges; ++i) 3964146040Stjr { 3965146040Stjr cmp_buf[0] = cset->range_starts[i]; 3966146040Stjr cmp_buf[4] = cset->range_ends[i]; 3967146040Stjr if (wcscoll (cmp_buf, cmp_buf + 2) <= 0 3968146040Stjr && wcscoll (cmp_buf + 2, cmp_buf + 4) <= 0) 3969146040Stjr { 3970146040Stjr match_len = char_len; 3971146040Stjr goto check_node_accept_bytes_match; 3972146040Stjr } 3973146040Stjr } 3974146040Stjr } 3975146040Stjr check_node_accept_bytes_match: 3976146040Stjr if (!cset->non_match) 3977146040Stjr return match_len; 3978146040Stjr else 3979146040Stjr { 3980146040Stjr if (match_len > 0) 3981146040Stjr return 0; 3982146040Stjr else 3983146040Stjr return (elem_len > char_len) ? elem_len : char_len; 3984146040Stjr } 3985146040Stjr } 3986146040Stjr return 0; 3987146040Stjr} 3988146040Stjr 3989146040Stjr# ifdef _LIBC 3990146040Stjrstatic unsigned int 3991250724Sjkiminternal_function 3992250724Sjkimfind_collation_sequence_value (const unsigned char *mbs, size_t mbs_len) 3993146040Stjr{ 3994146040Stjr uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES); 3995146040Stjr if (nrules == 0) 3996146040Stjr { 3997146040Stjr if (mbs_len == 1) 3998146040Stjr { 3999146040Stjr /* No valid character. Match it as a single byte character. */ 4000146040Stjr const unsigned char *collseq = (const unsigned char *) 4001146040Stjr _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQMB); 4002146040Stjr return collseq[mbs[0]]; 4003146040Stjr } 4004146040Stjr return UINT_MAX; 4005146040Stjr } 4006146040Stjr else 4007146040Stjr { 4008146040Stjr int32_t idx; 4009146040Stjr const unsigned char *extra = (const unsigned char *) 4010146040Stjr _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB); 4011146040Stjr int32_t extrasize = (const unsigned char *) 4012146040Stjr _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB + 1) - extra; 4013146040Stjr 4014146040Stjr for (idx = 0; idx < extrasize;) 4015146040Stjr { 4016146040Stjr int mbs_cnt, found = 0; 4017146040Stjr int32_t elem_mbs_len; 4018146040Stjr /* Skip the name of collating element name. */ 4019146040Stjr idx = idx + extra[idx] + 1; 4020146040Stjr elem_mbs_len = extra[idx++]; 4021146040Stjr if (mbs_len == elem_mbs_len) 4022146040Stjr { 4023146040Stjr for (mbs_cnt = 0; mbs_cnt < elem_mbs_len; ++mbs_cnt) 4024146040Stjr if (extra[idx + mbs_cnt] != mbs[mbs_cnt]) 4025146040Stjr break; 4026146040Stjr if (mbs_cnt == elem_mbs_len) 4027146040Stjr /* Found the entry. */ 4028146040Stjr found = 1; 4029146040Stjr } 4030146040Stjr /* Skip the byte sequence of the collating element. */ 4031146040Stjr idx += elem_mbs_len; 4032146040Stjr /* Adjust for the alignment. */ 4033146040Stjr idx = (idx + 3) & ~3; 4034146040Stjr /* Skip the collation sequence value. */ 4035146040Stjr idx += sizeof (uint32_t); 4036146040Stjr /* Skip the wide char sequence of the collating element. */ 4037250724Sjkim idx = idx + sizeof (uint32_t) * (*(int32_t *) (extra + idx) + 1); 4038146040Stjr /* If we found the entry, return the sequence value. */ 4039146040Stjr if (found) 4040146040Stjr return *(uint32_t *) (extra + idx); 4041146040Stjr /* Skip the collation sequence value. */ 4042146040Stjr idx += sizeof (uint32_t); 4043146040Stjr } 4044146040Stjr return UINT_MAX; 4045146040Stjr } 4046146040Stjr} 4047146040Stjr# endif /* _LIBC */ 4048146040Stjr#endif /* RE_ENABLE_I18N */ 4049146040Stjr 4050146040Stjr/* Check whether the node accepts the byte which is IDX-th 4051146040Stjr byte of the INPUT. */ 4052146040Stjr 4053146040Stjrstatic int 4054250724Sjkiminternal_function 4055250724Sjkimcheck_node_accept (const re_match_context_t *mctx, const re_token_t *node, 4056250724Sjkim int idx) 4057146040Stjr{ 4058146040Stjr unsigned char ch; 4059146040Stjr ch = re_string_byte_at (&mctx->input, idx); 4060146040Stjr switch (node->type) 4061146040Stjr { 4062146040Stjr case CHARACTER: 4063146040Stjr if (node->opr.c != ch) 4064250724Sjkim return 0; 4065146040Stjr break; 4066146040Stjr 4067146040Stjr case SIMPLE_BRACKET: 4068146040Stjr if (!bitset_contain (node->opr.sbcset, ch)) 4069250724Sjkim return 0; 4070146040Stjr break; 4071146040Stjr 4072146040Stjr#ifdef RE_ENABLE_I18N 4073146040Stjr case OP_UTF8_PERIOD: 4074146040Stjr if (ch >= 0x80) 4075250724Sjkim return 0; 4076146040Stjr /* FALLTHROUGH */ 4077146040Stjr#endif 4078146040Stjr case OP_PERIOD: 4079146040Stjr if ((ch == '\n' && !(mctx->dfa->syntax & RE_DOT_NEWLINE)) 4080146040Stjr || (ch == '\0' && (mctx->dfa->syntax & RE_DOT_NOT_NULL))) 4081146040Stjr return 0; 4082146040Stjr break; 4083146040Stjr 4084146040Stjr default: 4085146040Stjr return 0; 4086146040Stjr } 4087146040Stjr 4088146040Stjr if (node->constraint) 4089146040Stjr { 4090146040Stjr /* The node has constraints. Check whether the current context 4091146040Stjr satisfies the constraints. */ 4092146040Stjr unsigned int context = re_string_context_at (&mctx->input, idx, 4093146040Stjr mctx->eflags); 4094146040Stjr if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context)) 4095146040Stjr return 0; 4096146040Stjr } 4097146040Stjr 4098146040Stjr return 1; 4099146040Stjr} 4100146040Stjr 4101146040Stjr/* Extend the buffers, if the buffers have run out. */ 4102146040Stjr 4103146040Stjrstatic reg_errcode_t 4104250724Sjkiminternal_function __attribute_warn_unused_result__ 4105251435Sjkimextend_buffers (re_match_context_t *mctx, int min_len) 4106146040Stjr{ 4107146040Stjr reg_errcode_t ret; 4108146040Stjr re_string_t *pstr = &mctx->input; 4109146040Stjr 4110250724Sjkim /* Avoid overflow. */ 4111250724Sjkim if (BE (INT_MAX / 2 / sizeof (re_dfastate_t *) <= pstr->bufs_len, 0)) 4112250724Sjkim return REG_ESPACE; 4113250724Sjkim 4114251435Sjkim /* Double the lengthes of the buffers, but allocate at least MIN_LEN. */ 4115251435Sjkim ret = re_string_realloc_buffers (pstr, 4116251435Sjkim MAX (min_len, 4117251435Sjkim MIN (pstr->len, pstr->bufs_len * 2))); 4118146040Stjr if (BE (ret != REG_NOERROR, 0)) 4119146040Stjr return ret; 4120146040Stjr 4121146040Stjr if (mctx->state_log != NULL) 4122146040Stjr { 4123146040Stjr /* And double the length of state_log. */ 4124146040Stjr /* XXX We have no indication of the size of this buffer. If this 4125146040Stjr allocation fail we have no indication that the state_log array 4126146040Stjr does not have the right size. */ 4127146040Stjr re_dfastate_t **new_array = re_realloc (mctx->state_log, re_dfastate_t *, 4128146040Stjr pstr->bufs_len + 1); 4129146040Stjr if (BE (new_array == NULL, 0)) 4130146040Stjr return REG_ESPACE; 4131146040Stjr mctx->state_log = new_array; 4132146040Stjr } 4133146040Stjr 4134146040Stjr /* Then reconstruct the buffers. */ 4135146040Stjr if (pstr->icase) 4136146040Stjr { 4137146040Stjr#ifdef RE_ENABLE_I18N 4138146040Stjr if (pstr->mb_cur_max > 1) 4139146040Stjr { 4140146040Stjr ret = build_wcs_upper_buffer (pstr); 4141146040Stjr if (BE (ret != REG_NOERROR, 0)) 4142146040Stjr return ret; 4143146040Stjr } 4144146040Stjr else 4145146040Stjr#endif /* RE_ENABLE_I18N */ 4146146040Stjr build_upper_buffer (pstr); 4147146040Stjr } 4148146040Stjr else 4149146040Stjr { 4150146040Stjr#ifdef RE_ENABLE_I18N 4151146040Stjr if (pstr->mb_cur_max > 1) 4152146040Stjr build_wcs_buffer (pstr); 4153146040Stjr else 4154146040Stjr#endif /* RE_ENABLE_I18N */ 4155146040Stjr { 4156146040Stjr if (pstr->trans != NULL) 4157146040Stjr re_string_translate_buffer (pstr); 4158146040Stjr } 4159146040Stjr } 4160146040Stjr return REG_NOERROR; 4161146040Stjr} 4162146040Stjr 4163146040Stjr 4164146040Stjr/* Functions for matching context. */ 4165146040Stjr 4166146040Stjr/* Initialize MCTX. */ 4167146040Stjr 4168146040Stjrstatic reg_errcode_t 4169250724Sjkiminternal_function __attribute_warn_unused_result__ 4170250724Sjkimmatch_ctx_init (re_match_context_t *mctx, int eflags, int n) 4171146040Stjr{ 4172146040Stjr mctx->eflags = eflags; 4173146040Stjr mctx->match_last = -1; 4174146040Stjr if (n > 0) 4175146040Stjr { 4176146040Stjr mctx->bkref_ents = re_malloc (struct re_backref_cache_entry, n); 4177146040Stjr mctx->sub_tops = re_malloc (re_sub_match_top_t *, n); 4178146040Stjr if (BE (mctx->bkref_ents == NULL || mctx->sub_tops == NULL, 0)) 4179146040Stjr return REG_ESPACE; 4180146040Stjr } 4181146040Stjr /* Already zero-ed by the caller. 4182146040Stjr else 4183146040Stjr mctx->bkref_ents = NULL; 4184146040Stjr mctx->nbkref_ents = 0; 4185146040Stjr mctx->nsub_tops = 0; */ 4186146040Stjr mctx->abkref_ents = n; 4187146040Stjr mctx->max_mb_elem_len = 1; 4188146040Stjr mctx->asub_tops = n; 4189146040Stjr return REG_NOERROR; 4190146040Stjr} 4191146040Stjr 4192146040Stjr/* Clean the entries which depend on the current input in MCTX. 4193146040Stjr This function must be invoked when the matcher changes the start index 4194146040Stjr of the input, or changes the input string. */ 4195146040Stjr 4196146040Stjrstatic void 4197250724Sjkiminternal_function 4198250724Sjkimmatch_ctx_clean (re_match_context_t *mctx) 4199146040Stjr{ 4200146040Stjr int st_idx; 4201146040Stjr for (st_idx = 0; st_idx < mctx->nsub_tops; ++st_idx) 4202146040Stjr { 4203146040Stjr int sl_idx; 4204146040Stjr re_sub_match_top_t *top = mctx->sub_tops[st_idx]; 4205146040Stjr for (sl_idx = 0; sl_idx < top->nlasts; ++sl_idx) 4206146040Stjr { 4207146040Stjr re_sub_match_last_t *last = top->lasts[sl_idx]; 4208146040Stjr re_free (last->path.array); 4209146040Stjr re_free (last); 4210146040Stjr } 4211146040Stjr re_free (top->lasts); 4212146040Stjr if (top->path) 4213146040Stjr { 4214146040Stjr re_free (top->path->array); 4215146040Stjr re_free (top->path); 4216146040Stjr } 4217146040Stjr free (top); 4218146040Stjr } 4219146040Stjr 4220146040Stjr mctx->nsub_tops = 0; 4221146040Stjr mctx->nbkref_ents = 0; 4222146040Stjr} 4223146040Stjr 4224146040Stjr/* Free all the memory associated with MCTX. */ 4225146040Stjr 4226146040Stjrstatic void 4227250724Sjkiminternal_function 4228250724Sjkimmatch_ctx_free (re_match_context_t *mctx) 4229146040Stjr{ 4230146040Stjr /* First, free all the memory associated with MCTX->SUB_TOPS. */ 4231146040Stjr match_ctx_clean (mctx); 4232146040Stjr re_free (mctx->sub_tops); 4233146040Stjr re_free (mctx->bkref_ents); 4234146040Stjr} 4235146040Stjr 4236146040Stjr/* Add a new backreference entry to MCTX. 4237146040Stjr Note that we assume that caller never call this function with duplicate 4238146040Stjr entry, and call with STR_IDX which isn't smaller than any existing entry. 4239146040Stjr*/ 4240146040Stjr 4241146040Stjrstatic reg_errcode_t 4242250724Sjkiminternal_function __attribute_warn_unused_result__ 4243250724Sjkimmatch_ctx_add_entry (re_match_context_t *mctx, int node, int str_idx, int from, 4244250724Sjkim int to) 4245146040Stjr{ 4246146040Stjr if (mctx->nbkref_ents >= mctx->abkref_ents) 4247146040Stjr { 4248146040Stjr struct re_backref_cache_entry* new_entry; 4249146040Stjr new_entry = re_realloc (mctx->bkref_ents, struct re_backref_cache_entry, 4250146040Stjr mctx->abkref_ents * 2); 4251146040Stjr if (BE (new_entry == NULL, 0)) 4252146040Stjr { 4253146040Stjr re_free (mctx->bkref_ents); 4254146040Stjr return REG_ESPACE; 4255146040Stjr } 4256146040Stjr mctx->bkref_ents = new_entry; 4257146040Stjr memset (mctx->bkref_ents + mctx->nbkref_ents, '\0', 4258146040Stjr sizeof (struct re_backref_cache_entry) * mctx->abkref_ents); 4259146040Stjr mctx->abkref_ents *= 2; 4260146040Stjr } 4261146040Stjr if (mctx->nbkref_ents > 0 4262146040Stjr && mctx->bkref_ents[mctx->nbkref_ents - 1].str_idx == str_idx) 4263146040Stjr mctx->bkref_ents[mctx->nbkref_ents - 1].more = 1; 4264146040Stjr 4265146040Stjr mctx->bkref_ents[mctx->nbkref_ents].node = node; 4266146040Stjr mctx->bkref_ents[mctx->nbkref_ents].str_idx = str_idx; 4267146040Stjr mctx->bkref_ents[mctx->nbkref_ents].subexp_from = from; 4268146040Stjr mctx->bkref_ents[mctx->nbkref_ents].subexp_to = to; 4269146040Stjr 4270146040Stjr /* This is a cache that saves negative results of check_dst_limits_calc_pos. 4271146040Stjr If bit N is clear, means that this entry won't epsilon-transition to 4272146040Stjr an OP_OPEN_SUBEXP or OP_CLOSE_SUBEXP for the N+1-th subexpression. If 4273146040Stjr it is set, check_dst_limits_calc_pos_1 will recurse and try to find one 4274146040Stjr such node. 4275146040Stjr 4276146040Stjr A backreference does not epsilon-transition unless it is empty, so set 4277146040Stjr to all zeros if FROM != TO. */ 4278146040Stjr mctx->bkref_ents[mctx->nbkref_ents].eps_reachable_subexps_map 4279146040Stjr = (from == to ? ~0 : 0); 4280146040Stjr 4281146040Stjr mctx->bkref_ents[mctx->nbkref_ents++].more = 0; 4282146040Stjr if (mctx->max_mb_elem_len < to - from) 4283146040Stjr mctx->max_mb_elem_len = to - from; 4284146040Stjr return REG_NOERROR; 4285146040Stjr} 4286146040Stjr 4287146040Stjr/* Search for the first entry which has the same str_idx, or -1 if none is 4288146040Stjr found. Note that MCTX->BKREF_ENTS is already sorted by MCTX->STR_IDX. */ 4289146040Stjr 4290146040Stjrstatic int 4291250724Sjkiminternal_function 4292250724Sjkimsearch_cur_bkref_entry (const re_match_context_t *mctx, int str_idx) 4293146040Stjr{ 4294146040Stjr int left, right, mid, last; 4295146040Stjr last = right = mctx->nbkref_ents; 4296146040Stjr for (left = 0; left < right;) 4297146040Stjr { 4298146040Stjr mid = (left + right) / 2; 4299146040Stjr if (mctx->bkref_ents[mid].str_idx < str_idx) 4300146040Stjr left = mid + 1; 4301146040Stjr else 4302146040Stjr right = mid; 4303146040Stjr } 4304146040Stjr if (left < last && mctx->bkref_ents[left].str_idx == str_idx) 4305146040Stjr return left; 4306146040Stjr else 4307146040Stjr return -1; 4308146040Stjr} 4309146040Stjr 4310146040Stjr/* Register the node NODE, whose type is OP_OPEN_SUBEXP, and which matches 4311146040Stjr at STR_IDX. */ 4312146040Stjr 4313146040Stjrstatic reg_errcode_t 4314250724Sjkiminternal_function __attribute_warn_unused_result__ 4315250724Sjkimmatch_ctx_add_subtop (re_match_context_t *mctx, int node, int str_idx) 4316146040Stjr{ 4317146040Stjr#ifdef DEBUG 4318146040Stjr assert (mctx->sub_tops != NULL); 4319146040Stjr assert (mctx->asub_tops > 0); 4320146040Stjr#endif 4321146040Stjr if (BE (mctx->nsub_tops == mctx->asub_tops, 0)) 4322146040Stjr { 4323146040Stjr int new_asub_tops = mctx->asub_tops * 2; 4324146040Stjr re_sub_match_top_t **new_array = re_realloc (mctx->sub_tops, 4325146040Stjr re_sub_match_top_t *, 4326146040Stjr new_asub_tops); 4327146040Stjr if (BE (new_array == NULL, 0)) 4328146040Stjr return REG_ESPACE; 4329146040Stjr mctx->sub_tops = new_array; 4330146040Stjr mctx->asub_tops = new_asub_tops; 4331146040Stjr } 4332146040Stjr mctx->sub_tops[mctx->nsub_tops] = calloc (1, sizeof (re_sub_match_top_t)); 4333146040Stjr if (BE (mctx->sub_tops[mctx->nsub_tops] == NULL, 0)) 4334146040Stjr return REG_ESPACE; 4335146040Stjr mctx->sub_tops[mctx->nsub_tops]->node = node; 4336146040Stjr mctx->sub_tops[mctx->nsub_tops++]->str_idx = str_idx; 4337146040Stjr return REG_NOERROR; 4338146040Stjr} 4339146040Stjr 4340146040Stjr/* Register the node NODE, whose type is OP_CLOSE_SUBEXP, and which matches 4341146040Stjr at STR_IDX, whose corresponding OP_OPEN_SUBEXP is SUB_TOP. */ 4342146040Stjr 4343146040Stjrstatic re_sub_match_last_t * 4344250724Sjkiminternal_function 4345250724Sjkimmatch_ctx_add_sublast (re_sub_match_top_t *subtop, int node, int str_idx) 4346146040Stjr{ 4347146040Stjr re_sub_match_last_t *new_entry; 4348146040Stjr if (BE (subtop->nlasts == subtop->alasts, 0)) 4349146040Stjr { 4350146040Stjr int new_alasts = 2 * subtop->alasts + 1; 4351146040Stjr re_sub_match_last_t **new_array = re_realloc (subtop->lasts, 4352146040Stjr re_sub_match_last_t *, 4353146040Stjr new_alasts); 4354146040Stjr if (BE (new_array == NULL, 0)) 4355146040Stjr return NULL; 4356146040Stjr subtop->lasts = new_array; 4357146040Stjr subtop->alasts = new_alasts; 4358146040Stjr } 4359146040Stjr new_entry = calloc (1, sizeof (re_sub_match_last_t)); 4360146040Stjr if (BE (new_entry != NULL, 1)) 4361146040Stjr { 4362146040Stjr subtop->lasts[subtop->nlasts] = new_entry; 4363146040Stjr new_entry->node = node; 4364146040Stjr new_entry->str_idx = str_idx; 4365146040Stjr ++subtop->nlasts; 4366146040Stjr } 4367146040Stjr return new_entry; 4368146040Stjr} 4369146040Stjr 4370146040Stjrstatic void 4371250724Sjkiminternal_function 4372250724Sjkimsift_ctx_init (re_sift_context_t *sctx, re_dfastate_t **sifted_sts, 4373250724Sjkim re_dfastate_t **limited_sts, int last_node, int last_str_idx) 4374146040Stjr{ 4375146040Stjr sctx->sifted_states = sifted_sts; 4376146040Stjr sctx->limited_states = limited_sts; 4377146040Stjr sctx->last_node = last_node; 4378146040Stjr sctx->last_str_idx = last_str_idx; 4379146040Stjr re_node_set_init_empty (&sctx->limits); 4380146040Stjr} 4381