1146040Stjr/* Extended regular expression matching and search library.
2250724Sjkim   Copyright (C) 2002-2005,2007,2009,2010,2011 Free Software Foundation, Inc.
3146040Stjr   This file is part of the GNU C Library.
4146040Stjr   Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
5146040Stjr
6146040Stjr   The GNU C Library is free software; you can redistribute it and/or
7146040Stjr   modify it under the terms of the GNU Lesser General Public
8146040Stjr   License as published by the Free Software Foundation; either
9146040Stjr   version 2.1 of the License, or (at your option) any later version.
10146040Stjr
11146040Stjr   The GNU C Library is distributed in the hope that it will be useful,
12146040Stjr   but WITHOUT ANY WARRANTY; without even the implied warranty of
13146040Stjr   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14146040Stjr   Lesser General Public License for more details.
15146040Stjr
16146040Stjr   You should have received a copy of the GNU Lesser General Public
17250724Sjkim   License along with the GNU C Library; if not, see
18250724Sjkim   <http://www.gnu.org/licenses/>.  */
19146040Stjr
20146040Stjrstatic reg_errcode_t match_ctx_init (re_match_context_t *cache, int eflags,
21146040Stjr				     int n) internal_function;
22146040Stjrstatic void match_ctx_clean (re_match_context_t *mctx) internal_function;
23146040Stjrstatic void match_ctx_free (re_match_context_t *cache) internal_function;
24146040Stjrstatic reg_errcode_t match_ctx_add_entry (re_match_context_t *cache, int node,
25146040Stjr					  int str_idx, int from, int to)
26146040Stjr     internal_function;
27250724Sjkimstatic int search_cur_bkref_entry (const re_match_context_t *mctx, int str_idx)
28146040Stjr     internal_function;
29146040Stjrstatic reg_errcode_t match_ctx_add_subtop (re_match_context_t *mctx, int node,
30146040Stjr					   int str_idx) internal_function;
31146040Stjrstatic re_sub_match_last_t * match_ctx_add_sublast (re_sub_match_top_t *subtop,
32146040Stjr						   int node, int str_idx)
33146040Stjr     internal_function;
34146040Stjrstatic void sift_ctx_init (re_sift_context_t *sctx, re_dfastate_t **sifted_sts,
35146040Stjr			   re_dfastate_t **limited_sts, int last_node,
36146040Stjr			   int last_str_idx)
37146040Stjr     internal_function;
38146040Stjrstatic reg_errcode_t re_search_internal (const regex_t *preg,
39146040Stjr					 const char *string, int length,
40146040Stjr					 int start, int range, int stop,
41146040Stjr					 size_t nmatch, regmatch_t pmatch[],
42146040Stjr					 int eflags) internal_function;
43146040Stjrstatic int re_search_2_stub (struct re_pattern_buffer *bufp,
44146040Stjr			     const char *string1, int length1,
45146040Stjr			     const char *string2, int length2,
46146040Stjr			     int start, int range, struct re_registers *regs,
47146040Stjr			     int stop, int ret_len) internal_function;
48146040Stjrstatic int re_search_stub (struct re_pattern_buffer *bufp,
49146040Stjr			   const char *string, int length, int start,
50146040Stjr			   int range, int stop, struct re_registers *regs,
51146040Stjr			   int ret_len) internal_function;
52146040Stjrstatic unsigned re_copy_regs (struct re_registers *regs, regmatch_t *pmatch,
53146040Stjr			      int nregs, int regs_allocated) internal_function;
54146040Stjrstatic reg_errcode_t prune_impossible_nodes (re_match_context_t *mctx)
55146040Stjr     internal_function;
56146040Stjrstatic int check_matching (re_match_context_t *mctx, int fl_longest_match,
57250724Sjkim			   int *p_match_first) internal_function;
58146040Stjrstatic int check_halt_state_context (const re_match_context_t *mctx,
59146040Stjr				     const re_dfastate_t *state, int idx)
60146040Stjr     internal_function;
61250724Sjkimstatic void update_regs (const re_dfa_t *dfa, regmatch_t *pmatch,
62146040Stjr			 regmatch_t *prev_idx_match, int cur_node,
63146040Stjr			 int cur_idx, int nmatch) internal_function;
64146040Stjrstatic reg_errcode_t push_fail_stack (struct re_fail_stack_t *fs,
65146040Stjr				      int str_idx, int dest_node, int nregs,
66146040Stjr				      regmatch_t *regs,
67250724Sjkim				      re_node_set *eps_via_nodes)
68250724Sjkim     internal_function;
69146040Stjrstatic reg_errcode_t set_regs (const regex_t *preg,
70146040Stjr			       const re_match_context_t *mctx,
71146040Stjr			       size_t nmatch, regmatch_t *pmatch,
72146040Stjr			       int fl_backtrack) internal_function;
73250724Sjkimstatic reg_errcode_t free_fail_stack_return (struct re_fail_stack_t *fs)
74250724Sjkim     internal_function;
75146040Stjr
76146040Stjr#ifdef RE_ENABLE_I18N
77146040Stjrstatic int sift_states_iter_mb (const re_match_context_t *mctx,
78146040Stjr				re_sift_context_t *sctx,
79250724Sjkim				int node_idx, int str_idx, int max_str_idx)
80250724Sjkim     internal_function;
81146040Stjr#endif /* RE_ENABLE_I18N */
82250724Sjkimstatic reg_errcode_t sift_states_backward (const re_match_context_t *mctx,
83250724Sjkim					   re_sift_context_t *sctx)
84250724Sjkim     internal_function;
85250724Sjkimstatic reg_errcode_t build_sifted_states (const re_match_context_t *mctx,
86146040Stjr					  re_sift_context_t *sctx, int str_idx,
87250724Sjkim					  re_node_set *cur_dest)
88250724Sjkim     internal_function;
89250724Sjkimstatic reg_errcode_t update_cur_sifted_state (const re_match_context_t *mctx,
90146040Stjr					      re_sift_context_t *sctx,
91146040Stjr					      int str_idx,
92250724Sjkim					      re_node_set *dest_nodes)
93250724Sjkim     internal_function;
94250724Sjkimstatic reg_errcode_t add_epsilon_src_nodes (const re_dfa_t *dfa,
95146040Stjr					    re_node_set *dest_nodes,
96250724Sjkim					    const re_node_set *candidates)
97250724Sjkim     internal_function;
98250724Sjkimstatic int check_dst_limits (const re_match_context_t *mctx,
99250724Sjkim			     re_node_set *limits,
100146040Stjr			     int dst_node, int dst_idx, int src_node,
101146040Stjr			     int src_idx) internal_function;
102250724Sjkimstatic int check_dst_limits_calc_pos_1 (const re_match_context_t *mctx,
103146040Stjr					int boundaries, int subexp_idx,
104250724Sjkim					int from_node, int bkref_idx)
105250724Sjkim     internal_function;
106250724Sjkimstatic int check_dst_limits_calc_pos (const re_match_context_t *mctx,
107146040Stjr				      int limit, int subexp_idx,
108146040Stjr				      int node, int str_idx,
109146040Stjr				      int bkref_idx) internal_function;
110250724Sjkimstatic reg_errcode_t check_subexp_limits (const re_dfa_t *dfa,
111146040Stjr					  re_node_set *dest_nodes,
112146040Stjr					  const re_node_set *candidates,
113146040Stjr					  re_node_set *limits,
114146040Stjr					  struct re_backref_cache_entry *bkref_ents,
115146040Stjr					  int str_idx) internal_function;
116250724Sjkimstatic reg_errcode_t sift_states_bkref (const re_match_context_t *mctx,
117146040Stjr					re_sift_context_t *sctx,
118250724Sjkim					int str_idx, const re_node_set *candidates)
119250724Sjkim     internal_function;
120250724Sjkimstatic reg_errcode_t merge_state_array (const re_dfa_t *dfa,
121250724Sjkim					re_dfastate_t **dst,
122250724Sjkim					re_dfastate_t **src, int num)
123250724Sjkim     internal_function;
124146040Stjrstatic re_dfastate_t *find_recover_state (reg_errcode_t *err,
125146040Stjr					 re_match_context_t *mctx) internal_function;
126146040Stjrstatic re_dfastate_t *transit_state (reg_errcode_t *err,
127146040Stjr				     re_match_context_t *mctx,
128146040Stjr				     re_dfastate_t *state) internal_function;
129146040Stjrstatic re_dfastate_t *merge_state_with_log (reg_errcode_t *err,
130146040Stjr					    re_match_context_t *mctx,
131250724Sjkim					    re_dfastate_t *next_state)
132250724Sjkim     internal_function;
133146040Stjrstatic reg_errcode_t check_subexp_matching_top (re_match_context_t *mctx,
134146040Stjr						re_node_set *cur_nodes,
135146040Stjr						int str_idx) internal_function;
136146040Stjr#if 0
137146040Stjrstatic re_dfastate_t *transit_state_sb (reg_errcode_t *err,
138146040Stjr					re_match_context_t *mctx,
139250724Sjkim					re_dfastate_t *pstate)
140250724Sjkim     internal_function;
141146040Stjr#endif
142146040Stjr#ifdef RE_ENABLE_I18N
143146040Stjrstatic reg_errcode_t transit_state_mb (re_match_context_t *mctx,
144250724Sjkim				       re_dfastate_t *pstate)
145250724Sjkim     internal_function;
146146040Stjr#endif /* RE_ENABLE_I18N */
147146040Stjrstatic reg_errcode_t transit_state_bkref (re_match_context_t *mctx,
148250724Sjkim					  const re_node_set *nodes)
149250724Sjkim     internal_function;
150146040Stjrstatic reg_errcode_t get_subexp (re_match_context_t *mctx,
151250724Sjkim				 int bkref_node, int bkref_str_idx)
152250724Sjkim     internal_function;
153146040Stjrstatic reg_errcode_t get_subexp_sub (re_match_context_t *mctx,
154146040Stjr				     const re_sub_match_top_t *sub_top,
155146040Stjr				     re_sub_match_last_t *sub_last,
156250724Sjkim				     int bkref_node, int bkref_str)
157250724Sjkim     internal_function;
158146040Stjrstatic int find_subexp_node (const re_dfa_t *dfa, const re_node_set *nodes,
159146040Stjr			     int subexp_idx, int type) internal_function;
160146040Stjrstatic reg_errcode_t check_arrival (re_match_context_t *mctx,
161146040Stjr				    state_array_t *path, int top_node,
162146040Stjr				    int top_str, int last_node, int last_str,
163146040Stjr				    int type) internal_function;
164146040Stjrstatic reg_errcode_t check_arrival_add_next_nodes (re_match_context_t *mctx,
165146040Stjr						   int str_idx,
166146040Stjr						   re_node_set *cur_nodes,
167250724Sjkim						   re_node_set *next_nodes)
168250724Sjkim     internal_function;
169250724Sjkimstatic reg_errcode_t check_arrival_expand_ecl (const re_dfa_t *dfa,
170146040Stjr					       re_node_set *cur_nodes,
171250724Sjkim					       int ex_subexp, int type)
172250724Sjkim     internal_function;
173250724Sjkimstatic reg_errcode_t check_arrival_expand_ecl_sub (const re_dfa_t *dfa,
174146040Stjr						   re_node_set *dst_nodes,
175146040Stjr						   int target, int ex_subexp,
176146040Stjr						   int type) internal_function;
177146040Stjrstatic reg_errcode_t expand_bkref_cache (re_match_context_t *mctx,
178146040Stjr					 re_node_set *cur_nodes, int cur_str,
179250724Sjkim					 int subexp_num, int type)
180250724Sjkim     internal_function;
181250724Sjkimstatic int build_trtable (const re_dfa_t *dfa,
182146040Stjr			  re_dfastate_t *state) internal_function;
183146040Stjr#ifdef RE_ENABLE_I18N
184250724Sjkimstatic int check_node_accept_bytes (const re_dfa_t *dfa, int node_idx,
185250724Sjkim				    const re_string_t *input, int idx)
186250724Sjkim     internal_function;
187146040Stjr# ifdef _LIBC
188146040Stjrstatic unsigned int find_collation_sequence_value (const unsigned char *mbs,
189250724Sjkim						   size_t name_len)
190250724Sjkim     internal_function;
191146040Stjr# endif /* _LIBC */
192146040Stjr#endif /* RE_ENABLE_I18N */
193250724Sjkimstatic int group_nodes_into_DFAstates (const re_dfa_t *dfa,
194146040Stjr				       const re_dfastate_t *state,
195146040Stjr				       re_node_set *states_node,
196250724Sjkim				       bitset_t *states_ch) internal_function;
197146040Stjrstatic int check_node_accept (const re_match_context_t *mctx,
198250724Sjkim			      const re_token_t *node, int idx)
199250724Sjkim     internal_function;
200251435Sjkimstatic reg_errcode_t extend_buffers (re_match_context_t *mctx, int min_len)
201250724Sjkim     internal_function;
202146040Stjr
203146040Stjr/* Entry point for POSIX code.  */
204146040Stjr
205146040Stjr/* regexec searches for a given pattern, specified by PREG, in the
206146040Stjr   string STRING.
207146040Stjr
208146040Stjr   If NMATCH is zero or REG_NOSUB was set in the cflags argument to
209146040Stjr   `regcomp', we ignore PMATCH.  Otherwise, we assume PMATCH has at
210146040Stjr   least NMATCH elements, and we set them to the offsets of the
211146040Stjr   corresponding matched substrings.
212146040Stjr
213146040Stjr   EFLAGS specifies `execution flags' which affect matching: if
214146040Stjr   REG_NOTBOL is set, then ^ does not match at the beginning of the
215146040Stjr   string; if REG_NOTEOL is set, then $ does not match at the end.
216146040Stjr
217146040Stjr   We return 0 if we find a match and REG_NOMATCH if not.  */
218146040Stjr
219146040Stjrint
220146040Stjrregexec (preg, string, nmatch, pmatch, eflags)
221146040Stjr    const regex_t *__restrict preg;
222146040Stjr    const char *__restrict string;
223146040Stjr    size_t nmatch;
224146040Stjr    regmatch_t pmatch[];
225146040Stjr    int eflags;
226146040Stjr{
227146040Stjr  reg_errcode_t err;
228146040Stjr  int start, length;
229250724Sjkim  re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
230146040Stjr
231146040Stjr  if (eflags & ~(REG_NOTBOL | REG_NOTEOL | REG_STARTEND))
232146040Stjr    return REG_BADPAT;
233146040Stjr
234146040Stjr  if (eflags & REG_STARTEND)
235146040Stjr    {
236146040Stjr      start = pmatch[0].rm_so;
237146040Stjr      length = pmatch[0].rm_eo;
238146040Stjr    }
239146040Stjr  else
240146040Stjr    {
241146040Stjr      start = 0;
242146040Stjr      length = strlen (string);
243146040Stjr    }
244250724Sjkim
245250724Sjkim  __libc_lock_lock (dfa->lock);
246146040Stjr  if (preg->no_sub)
247146040Stjr    err = re_search_internal (preg, string, length, start, length - start,
248146040Stjr			      length, 0, NULL, eflags);
249146040Stjr  else
250146040Stjr    err = re_search_internal (preg, string, length, start, length - start,
251146040Stjr			      length, nmatch, pmatch, eflags);
252250724Sjkim  __libc_lock_unlock (dfa->lock);
253146040Stjr  return err != REG_NOERROR;
254146040Stjr}
255146040Stjr
256146040Stjr#ifdef _LIBC
257146040Stjr# include <shlib-compat.h>
258146040Stjrversioned_symbol (libc, __regexec, regexec, GLIBC_2_3_4);
259146040Stjr
260146040Stjr# if SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_3_4)
261146040Stjr__typeof__ (__regexec) __compat_regexec;
262146040Stjr
263146040Stjrint
264146040Stjrattribute_compat_text_section
265146040Stjr__compat_regexec (const regex_t *__restrict preg,
266146040Stjr		  const char *__restrict string, size_t nmatch,
267146040Stjr		  regmatch_t pmatch[], int eflags)
268146040Stjr{
269146040Stjr  return regexec (preg, string, nmatch, pmatch,
270146040Stjr		  eflags & (REG_NOTBOL | REG_NOTEOL));
271146040Stjr}
272146040Stjrcompat_symbol (libc, __compat_regexec, regexec, GLIBC_2_0);
273146040Stjr# endif
274146040Stjr#endif
275146040Stjr
276146040Stjr/* Entry points for GNU code.  */
277146040Stjr
278146040Stjr/* re_match, re_search, re_match_2, re_search_2
279146040Stjr
280146040Stjr   The former two functions operate on STRING with length LENGTH,
281146040Stjr   while the later two operate on concatenation of STRING1 and STRING2
282146040Stjr   with lengths LENGTH1 and LENGTH2, respectively.
283146040Stjr
284146040Stjr   re_match() matches the compiled pattern in BUFP against the string,
285146040Stjr   starting at index START.
286146040Stjr
287146040Stjr   re_search() first tries matching at index START, then it tries to match
288146040Stjr   starting from index START + 1, and so on.  The last start position tried
289146040Stjr   is START + RANGE.  (Thus RANGE = 0 forces re_search to operate the same
290146040Stjr   way as re_match().)
291146040Stjr
292146040Stjr   The parameter STOP of re_{match,search}_2 specifies that no match exceeding
293146040Stjr   the first STOP characters of the concatenation of the strings should be
294146040Stjr   concerned.
295146040Stjr
296146040Stjr   If REGS is not NULL, and BUFP->no_sub is not set, the offsets of the match
297146040Stjr   and all groups is stroed in REGS.  (For the "_2" variants, the offsets are
298146040Stjr   computed relative to the concatenation, not relative to the individual
299146040Stjr   strings.)
300146040Stjr
301146040Stjr   On success, re_match* functions return the length of the match, re_search*
302146040Stjr   return the position of the start of the match.  Return value -1 means no
303146040Stjr   match was found and -2 indicates an internal error.  */
304146040Stjr
305146040Stjrint
306146040Stjrre_match (bufp, string, length, start, regs)
307146040Stjr    struct re_pattern_buffer *bufp;
308146040Stjr    const char *string;
309146040Stjr    int length, start;
310146040Stjr    struct re_registers *regs;
311146040Stjr{
312146040Stjr  return re_search_stub (bufp, string, length, start, 0, length, regs, 1);
313146040Stjr}
314146040Stjr#ifdef _LIBC
315146040Stjrweak_alias (__re_match, re_match)
316146040Stjr#endif
317146040Stjr
318146040Stjrint
319146040Stjrre_search (bufp, string, length, start, range, regs)
320146040Stjr    struct re_pattern_buffer *bufp;
321146040Stjr    const char *string;
322146040Stjr    int length, start, range;
323146040Stjr    struct re_registers *regs;
324146040Stjr{
325146040Stjr  return re_search_stub (bufp, string, length, start, range, length, regs, 0);
326146040Stjr}
327146040Stjr#ifdef _LIBC
328146040Stjrweak_alias (__re_search, re_search)
329146040Stjr#endif
330146040Stjr
331146040Stjrint
332146040Stjrre_match_2 (bufp, string1, length1, string2, length2, start, regs, stop)
333146040Stjr    struct re_pattern_buffer *bufp;
334146040Stjr    const char *string1, *string2;
335146040Stjr    int length1, length2, start, stop;
336146040Stjr    struct re_registers *regs;
337146040Stjr{
338146040Stjr  return re_search_2_stub (bufp, string1, length1, string2, length2,
339146040Stjr			   start, 0, regs, stop, 1);
340146040Stjr}
341146040Stjr#ifdef _LIBC
342146040Stjrweak_alias (__re_match_2, re_match_2)
343146040Stjr#endif
344146040Stjr
345146040Stjrint
346146040Stjrre_search_2 (bufp, string1, length1, string2, length2, start, range, regs, stop)
347146040Stjr    struct re_pattern_buffer *bufp;
348146040Stjr    const char *string1, *string2;
349146040Stjr    int length1, length2, start, range, stop;
350146040Stjr    struct re_registers *regs;
351146040Stjr{
352146040Stjr  return re_search_2_stub (bufp, string1, length1, string2, length2,
353146040Stjr			   start, range, regs, stop, 0);
354146040Stjr}
355146040Stjr#ifdef _LIBC
356146040Stjrweak_alias (__re_search_2, re_search_2)
357146040Stjr#endif
358146040Stjr
359146040Stjrstatic int
360146040Stjrre_search_2_stub (bufp, string1, length1, string2, length2, start, range, regs,
361146040Stjr		  stop, ret_len)
362146040Stjr    struct re_pattern_buffer *bufp;
363146040Stjr    const char *string1, *string2;
364146040Stjr    int length1, length2, start, range, stop, ret_len;
365146040Stjr    struct re_registers *regs;
366146040Stjr{
367146040Stjr  const char *str;
368146040Stjr  int rval;
369146040Stjr  int len = length1 + length2;
370250724Sjkim  char *s = NULL;
371146040Stjr
372250724Sjkim  if (BE (length1 < 0 || length2 < 0 || stop < 0 || len < length1, 0))
373146040Stjr    return -2;
374146040Stjr
375146040Stjr  /* Concatenate the strings.  */
376146040Stjr  if (length2 > 0)
377146040Stjr    if (length1 > 0)
378146040Stjr      {
379250724Sjkim	s = re_malloc (char, len);
380146040Stjr
381146040Stjr	if (BE (s == NULL, 0))
382146040Stjr	  return -2;
383250724Sjkim#ifdef _LIBC
384250724Sjkim	memcpy (__mempcpy (s, string1, length1), string2, length2);
385250724Sjkim#else
386146040Stjr	memcpy (s, string1, length1);
387146040Stjr	memcpy (s + length1, string2, length2);
388250724Sjkim#endif
389146040Stjr	str = s;
390146040Stjr      }
391146040Stjr    else
392146040Stjr      str = string2;
393146040Stjr  else
394146040Stjr    str = string1;
395146040Stjr
396250724Sjkim  rval = re_search_stub (bufp, str, len, start, range, stop, regs, ret_len);
397250724Sjkim  re_free (s);
398146040Stjr  return rval;
399146040Stjr}
400146040Stjr
401146040Stjr/* The parameters have the same meaning as those of re_search.
402146040Stjr   Additional parameters:
403146040Stjr   If RET_LEN is nonzero the length of the match is returned (re_match style);
404146040Stjr   otherwise the position of the match is returned.  */
405146040Stjr
406146040Stjrstatic int
407146040Stjrre_search_stub (bufp, string, length, start, range, stop, regs, ret_len)
408146040Stjr    struct re_pattern_buffer *bufp;
409146040Stjr    const char *string;
410146040Stjr    int length, start, range, stop, ret_len;
411146040Stjr    struct re_registers *regs;
412146040Stjr{
413146040Stjr  reg_errcode_t result;
414146040Stjr  regmatch_t *pmatch;
415146040Stjr  int nregs, rval;
416146040Stjr  int eflags = 0;
417250724Sjkim  re_dfa_t *dfa = (re_dfa_t *) bufp->buffer;
418146040Stjr
419146040Stjr  /* Check for out-of-range.  */
420146040Stjr  if (BE (start < 0 || start > length, 0))
421146040Stjr    return -1;
422146040Stjr  if (BE (start + range > length, 0))
423146040Stjr    range = length - start;
424146040Stjr  else if (BE (start + range < 0, 0))
425146040Stjr    range = -start;
426146040Stjr
427250724Sjkim  __libc_lock_lock (dfa->lock);
428250724Sjkim
429146040Stjr  eflags |= (bufp->not_bol) ? REG_NOTBOL : 0;
430146040Stjr  eflags |= (bufp->not_eol) ? REG_NOTEOL : 0;
431146040Stjr
432146040Stjr  /* Compile fastmap if we haven't yet.  */
433146040Stjr  if (range > 0 && bufp->fastmap != NULL && !bufp->fastmap_accurate)
434146040Stjr    re_compile_fastmap (bufp);
435146040Stjr
436146040Stjr  if (BE (bufp->no_sub, 0))
437146040Stjr    regs = NULL;
438146040Stjr
439146040Stjr  /* We need at least 1 register.  */
440146040Stjr  if (regs == NULL)
441146040Stjr    nregs = 1;
442146040Stjr  else if (BE (bufp->regs_allocated == REGS_FIXED &&
443146040Stjr	       regs->num_regs < bufp->re_nsub + 1, 0))
444146040Stjr    {
445146040Stjr      nregs = regs->num_regs;
446146040Stjr      if (BE (nregs < 1, 0))
447146040Stjr	{
448146040Stjr	  /* Nothing can be copied to regs.  */
449146040Stjr	  regs = NULL;
450146040Stjr	  nregs = 1;
451146040Stjr	}
452146040Stjr    }
453146040Stjr  else
454146040Stjr    nregs = bufp->re_nsub + 1;
455146040Stjr  pmatch = re_malloc (regmatch_t, nregs);
456146040Stjr  if (BE (pmatch == NULL, 0))
457250724Sjkim    {
458250724Sjkim      rval = -2;
459250724Sjkim      goto out;
460250724Sjkim    }
461146040Stjr
462146040Stjr  result = re_search_internal (bufp, string, length, start, range, stop,
463146040Stjr			       nregs, pmatch, eflags);
464146040Stjr
465146040Stjr  rval = 0;
466146040Stjr
467146040Stjr  /* I hope we needn't fill ther regs with -1's when no match was found.  */
468146040Stjr  if (result != REG_NOERROR)
469146040Stjr    rval = -1;
470146040Stjr  else if (regs != NULL)
471146040Stjr    {
472146040Stjr      /* If caller wants register contents data back, copy them.  */
473146040Stjr      bufp->regs_allocated = re_copy_regs (regs, pmatch, nregs,
474146040Stjr					   bufp->regs_allocated);
475146040Stjr      if (BE (bufp->regs_allocated == REGS_UNALLOCATED, 0))
476146040Stjr	rval = -2;
477146040Stjr    }
478146040Stjr
479146040Stjr  if (BE (rval == 0, 1))
480146040Stjr    {
481146040Stjr      if (ret_len)
482146040Stjr	{
483146040Stjr	  assert (pmatch[0].rm_so == start);
484146040Stjr	  rval = pmatch[0].rm_eo - start;
485146040Stjr	}
486146040Stjr      else
487146040Stjr	rval = pmatch[0].rm_so;
488146040Stjr    }
489146040Stjr  re_free (pmatch);
490250724Sjkim out:
491250724Sjkim  __libc_lock_unlock (dfa->lock);
492146040Stjr  return rval;
493146040Stjr}
494146040Stjr
495146040Stjrstatic unsigned
496146040Stjrre_copy_regs (regs, pmatch, nregs, regs_allocated)
497146040Stjr    struct re_registers *regs;
498146040Stjr    regmatch_t *pmatch;
499146040Stjr    int nregs, regs_allocated;
500146040Stjr{
501146040Stjr  int rval = REGS_REALLOCATE;
502146040Stjr  int i;
503146040Stjr  int need_regs = nregs + 1;
504146040Stjr  /* We need one extra element beyond `num_regs' for the `-1' marker GNU code
505146040Stjr     uses.  */
506146040Stjr
507146040Stjr  /* Have the register data arrays been allocated?  */
508146040Stjr  if (regs_allocated == REGS_UNALLOCATED)
509146040Stjr    { /* No.  So allocate them with malloc.  */
510146040Stjr      regs->start = re_malloc (regoff_t, need_regs);
511250724Sjkim      if (BE (regs->start == NULL, 0))
512250724Sjkim	return REGS_UNALLOCATED;
513146040Stjr      regs->end = re_malloc (regoff_t, need_regs);
514250724Sjkim      if (BE (regs->end == NULL, 0))
515250724Sjkim	{
516250724Sjkim	  re_free (regs->start);
517250724Sjkim	  return REGS_UNALLOCATED;
518250724Sjkim	}
519146040Stjr      regs->num_regs = need_regs;
520146040Stjr    }
521146040Stjr  else if (regs_allocated == REGS_REALLOCATE)
522146040Stjr    { /* Yes.  If we need more elements than were already
523146040Stjr	 allocated, reallocate them.  If we need fewer, just
524146040Stjr	 leave it alone.  */
525146040Stjr      if (BE (need_regs > regs->num_regs, 0))
526146040Stjr	{
527146040Stjr	  regoff_t *new_start = re_realloc (regs->start, regoff_t, need_regs);
528250724Sjkim	  regoff_t *new_end;
529250724Sjkim	  if (BE (new_start == NULL, 0))
530146040Stjr	    return REGS_UNALLOCATED;
531250724Sjkim	  new_end = re_realloc (regs->end, regoff_t, need_regs);
532250724Sjkim	  if (BE (new_end == NULL, 0))
533250724Sjkim	    {
534250724Sjkim	      re_free (new_start);
535250724Sjkim	      return REGS_UNALLOCATED;
536250724Sjkim	    }
537146040Stjr	  regs->start = new_start;
538146040Stjr	  regs->end = new_end;
539146040Stjr	  regs->num_regs = need_regs;
540146040Stjr	}
541146040Stjr    }
542146040Stjr  else
543146040Stjr    {
544146040Stjr      assert (regs_allocated == REGS_FIXED);
545146040Stjr      /* This function may not be called with REGS_FIXED and nregs too big.  */
546146040Stjr      assert (regs->num_regs >= nregs);
547146040Stjr      rval = REGS_FIXED;
548146040Stjr    }
549146040Stjr
550146040Stjr  /* Copy the regs.  */
551146040Stjr  for (i = 0; i < nregs; ++i)
552146040Stjr    {
553146040Stjr      regs->start[i] = pmatch[i].rm_so;
554146040Stjr      regs->end[i] = pmatch[i].rm_eo;
555146040Stjr    }
556146040Stjr  for ( ; i < regs->num_regs; ++i)
557146040Stjr    regs->start[i] = regs->end[i] = -1;
558146040Stjr
559146040Stjr  return rval;
560146040Stjr}
561146040Stjr
562146040Stjr/* Set REGS to hold NUM_REGS registers, storing them in STARTS and
563146040Stjr   ENDS.  Subsequent matches using PATTERN_BUFFER and REGS will use
564146040Stjr   this memory for recording register information.  STARTS and ENDS
565146040Stjr   must be allocated using the malloc library routine, and must each
566146040Stjr   be at least NUM_REGS * sizeof (regoff_t) bytes long.
567146040Stjr
568146040Stjr   If NUM_REGS == 0, then subsequent matches should allocate their own
569146040Stjr   register data.
570146040Stjr
571146040Stjr   Unless this function is called, the first search or match using
572146040Stjr   PATTERN_BUFFER will allocate its own register data, without
573146040Stjr   freeing the old data.  */
574146040Stjr
575146040Stjrvoid
576146040Stjrre_set_registers (bufp, regs, num_regs, starts, ends)
577146040Stjr    struct re_pattern_buffer *bufp;
578146040Stjr    struct re_registers *regs;
579146040Stjr    unsigned num_regs;
580146040Stjr    regoff_t *starts, *ends;
581146040Stjr{
582146040Stjr  if (num_regs)
583146040Stjr    {
584146040Stjr      bufp->regs_allocated = REGS_REALLOCATE;
585146040Stjr      regs->num_regs = num_regs;
586146040Stjr      regs->start = starts;
587146040Stjr      regs->end = ends;
588146040Stjr    }
589146040Stjr  else
590146040Stjr    {
591146040Stjr      bufp->regs_allocated = REGS_UNALLOCATED;
592146040Stjr      regs->num_regs = 0;
593146040Stjr      regs->start = regs->end = (regoff_t *) 0;
594146040Stjr    }
595146040Stjr}
596146040Stjr#ifdef _LIBC
597146040Stjrweak_alias (__re_set_registers, re_set_registers)
598146040Stjr#endif
599146040Stjr
600146040Stjr/* Entry points compatible with 4.2 BSD regex library.  We don't define
601146040Stjr   them unless specifically requested.  */
602146040Stjr
603146040Stjr#if defined _REGEX_RE_COMP || defined _LIBC
604146040Stjrint
605146040Stjr# ifdef _LIBC
606146040Stjrweak_function
607146040Stjr# endif
608146040Stjrre_exec (s)
609146040Stjr     const char *s;
610146040Stjr{
611146040Stjr  return 0 == regexec (&re_comp_buf, s, 0, NULL, 0);
612146040Stjr}
613146040Stjr#endif /* _REGEX_RE_COMP */
614146040Stjr
615146040Stjr/* Internal entry point.  */
616146040Stjr
617146040Stjr/* Searches for a compiled pattern PREG in the string STRING, whose
618146040Stjr   length is LENGTH.  NMATCH, PMATCH, and EFLAGS have the same
619146040Stjr   mingings with regexec.  START, and RANGE have the same meanings
620146040Stjr   with re_search.
621146040Stjr   Return REG_NOERROR if we find a match, and REG_NOMATCH if not,
622146040Stjr   otherwise return the error code.
623146040Stjr   Note: We assume front end functions already check ranges.
624146040Stjr   (START + RANGE >= 0 && START + RANGE <= LENGTH)  */
625146040Stjr
626146040Stjrstatic reg_errcode_t
627250724Sjkim__attribute_warn_unused_result__
628146040Stjrre_search_internal (preg, string, length, start, range, stop, nmatch, pmatch,
629146040Stjr		    eflags)
630146040Stjr    const regex_t *preg;
631146040Stjr    const char *string;
632146040Stjr    int length, start, range, stop, eflags;
633146040Stjr    size_t nmatch;
634146040Stjr    regmatch_t pmatch[];
635146040Stjr{
636146040Stjr  reg_errcode_t err;
637250724Sjkim  const re_dfa_t *dfa = (const re_dfa_t *) preg->buffer;
638146040Stjr  int left_lim, right_lim, incr;
639146040Stjr  int fl_longest_match, match_first, match_kind, match_last = -1;
640146040Stjr  int extra_nmatch;
641146040Stjr  int sb, ch;
642146040Stjr#if defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L)
643146040Stjr  re_match_context_t mctx = { .dfa = dfa };
644146040Stjr#else
645146040Stjr  re_match_context_t mctx;
646146040Stjr#endif
647146040Stjr  char *fastmap = (preg->fastmap != NULL && preg->fastmap_accurate
648146040Stjr		   && range && !preg->can_be_null) ? preg->fastmap : NULL;
649250724Sjkim  RE_TRANSLATE_TYPE t = preg->translate;
650146040Stjr
651146040Stjr#if !(defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L))
652146040Stjr  memset (&mctx, '\0', sizeof (re_match_context_t));
653146040Stjr  mctx.dfa = dfa;
654146040Stjr#endif
655146040Stjr
656146040Stjr  extra_nmatch = (nmatch > preg->re_nsub) ? nmatch - (preg->re_nsub + 1) : 0;
657146040Stjr  nmatch -= extra_nmatch;
658146040Stjr
659146040Stjr  /* Check if the DFA haven't been compiled.  */
660146040Stjr  if (BE (preg->used == 0 || dfa->init_state == NULL
661146040Stjr	  || dfa->init_state_word == NULL || dfa->init_state_nl == NULL
662146040Stjr	  || dfa->init_state_begbuf == NULL, 0))
663146040Stjr    return REG_NOMATCH;
664146040Stjr
665146040Stjr#ifdef DEBUG
666146040Stjr  /* We assume front-end functions already check them.  */
667146040Stjr  assert (start + range >= 0 && start + range <= length);
668146040Stjr#endif
669146040Stjr
670146040Stjr  /* If initial states with non-begbuf contexts have no elements,
671146040Stjr     the regex must be anchored.  If preg->newline_anchor is set,
672146040Stjr     we'll never use init_state_nl, so do not check it.  */
673146040Stjr  if (dfa->init_state->nodes.nelem == 0
674146040Stjr      && dfa->init_state_word->nodes.nelem == 0
675146040Stjr      && (dfa->init_state_nl->nodes.nelem == 0
676146040Stjr	  || !preg->newline_anchor))
677146040Stjr    {
678146040Stjr      if (start != 0 && start + range != 0)
679250724Sjkim	return REG_NOMATCH;
680146040Stjr      start = range = 0;
681146040Stjr    }
682146040Stjr
683146040Stjr  /* We must check the longest matching, if nmatch > 0.  */
684146040Stjr  fl_longest_match = (nmatch != 0 || dfa->nbackref);
685146040Stjr
686146040Stjr  err = re_string_allocate (&mctx.input, string, length, dfa->nodes_len + 1,
687146040Stjr			    preg->translate, preg->syntax & RE_ICASE, dfa);
688146040Stjr  if (BE (err != REG_NOERROR, 0))
689146040Stjr    goto free_return;
690146040Stjr  mctx.input.stop = stop;
691146040Stjr  mctx.input.raw_stop = stop;
692146040Stjr  mctx.input.newline_anchor = preg->newline_anchor;
693146040Stjr
694146040Stjr  err = match_ctx_init (&mctx, eflags, dfa->nbackref * 2);
695146040Stjr  if (BE (err != REG_NOERROR, 0))
696146040Stjr    goto free_return;
697146040Stjr
698146040Stjr  /* We will log all the DFA states through which the dfa pass,
699146040Stjr     if nmatch > 1, or this dfa has "multibyte node", which is a
700146040Stjr     back-reference or a node which can accept multibyte character or
701146040Stjr     multi character collating element.  */
702146040Stjr  if (nmatch > 1 || dfa->has_mb_node)
703146040Stjr    {
704250724Sjkim      /* Avoid overflow.  */
705250724Sjkim      if (BE (SIZE_MAX / sizeof (re_dfastate_t *) <= mctx.input.bufs_len, 0))
706250724Sjkim	{
707250724Sjkim	  err = REG_ESPACE;
708250724Sjkim	  goto free_return;
709250724Sjkim	}
710250724Sjkim
711146040Stjr      mctx.state_log = re_malloc (re_dfastate_t *, mctx.input.bufs_len + 1);
712146040Stjr      if (BE (mctx.state_log == NULL, 0))
713146040Stjr	{
714146040Stjr	  err = REG_ESPACE;
715146040Stjr	  goto free_return;
716146040Stjr	}
717146040Stjr    }
718146040Stjr  else
719146040Stjr    mctx.state_log = NULL;
720146040Stjr
721146040Stjr  match_first = start;
722146040Stjr  mctx.input.tip_context = (eflags & REG_NOTBOL) ? CONTEXT_BEGBUF
723146040Stjr			   : CONTEXT_NEWLINE | CONTEXT_BEGBUF;
724146040Stjr
725146040Stjr  /* Check incrementally whether of not the input string match.  */
726146040Stjr  incr = (range < 0) ? -1 : 1;
727146040Stjr  left_lim = (range < 0) ? start + range : start;
728146040Stjr  right_lim = (range < 0) ? start : start + range;
729146040Stjr  sb = dfa->mb_cur_max == 1;
730146040Stjr  match_kind =
731146040Stjr    (fastmap
732146040Stjr     ? ((sb || !(preg->syntax & RE_ICASE || t) ? 4 : 0)
733146040Stjr	| (range >= 0 ? 2 : 0)
734146040Stjr	| (t != NULL ? 1 : 0))
735146040Stjr     : 8);
736146040Stjr
737146040Stjr  for (;; match_first += incr)
738146040Stjr    {
739146040Stjr      err = REG_NOMATCH;
740146040Stjr      if (match_first < left_lim || right_lim < match_first)
741146040Stjr	goto free_return;
742146040Stjr
743146040Stjr      /* Advance as rapidly as possible through the string, until we
744146040Stjr	 find a plausible place to start matching.  This may be done
745146040Stjr	 with varying efficiency, so there are various possibilities:
746146040Stjr	 only the most common of them are specialized, in order to
747146040Stjr	 save on code size.  We use a switch statement for speed.  */
748146040Stjr      switch (match_kind)
749146040Stjr	{
750146040Stjr	case 8:
751146040Stjr	  /* No fastmap.  */
752146040Stjr	  break;
753146040Stjr
754146040Stjr	case 7:
755146040Stjr	  /* Fastmap with single-byte translation, match forward.  */
756146040Stjr	  while (BE (match_first < right_lim, 1)
757146040Stjr		 && !fastmap[t[(unsigned char) string[match_first]]])
758146040Stjr	    ++match_first;
759146040Stjr	  goto forward_match_found_start_or_reached_end;
760146040Stjr
761146040Stjr	case 6:
762146040Stjr	  /* Fastmap without translation, match forward.  */
763146040Stjr	  while (BE (match_first < right_lim, 1)
764146040Stjr		 && !fastmap[(unsigned char) string[match_first]])
765146040Stjr	    ++match_first;
766146040Stjr
767146040Stjr	forward_match_found_start_or_reached_end:
768146040Stjr	  if (BE (match_first == right_lim, 0))
769146040Stjr	    {
770146040Stjr	      ch = match_first >= length
771146040Stjr		       ? 0 : (unsigned char) string[match_first];
772146040Stjr	      if (!fastmap[t ? t[ch] : ch])
773146040Stjr		goto free_return;
774146040Stjr	    }
775146040Stjr	  break;
776146040Stjr
777146040Stjr	case 4:
778146040Stjr	case 5:
779146040Stjr	  /* Fastmap without multi-byte translation, match backwards.  */
780146040Stjr	  while (match_first >= left_lim)
781146040Stjr	    {
782146040Stjr	      ch = match_first >= length
783146040Stjr		       ? 0 : (unsigned char) string[match_first];
784146040Stjr	      if (fastmap[t ? t[ch] : ch])
785146040Stjr		break;
786146040Stjr	      --match_first;
787146040Stjr	    }
788146040Stjr	  if (match_first < left_lim)
789146040Stjr	    goto free_return;
790146040Stjr	  break;
791146040Stjr
792146040Stjr	default:
793146040Stjr	  /* In this case, we can't determine easily the current byte,
794146040Stjr	     since it might be a component byte of a multibyte
795146040Stjr	     character.  Then we use the constructed buffer instead.  */
796146040Stjr	  for (;;)
797146040Stjr	    {
798146040Stjr	      /* If MATCH_FIRST is out of the valid range, reconstruct the
799146040Stjr		 buffers.  */
800146040Stjr	      unsigned int offset = match_first - mctx.input.raw_mbs_idx;
801146040Stjr	      if (BE (offset >= (unsigned int) mctx.input.valid_raw_len, 0))
802146040Stjr		{
803146040Stjr		  err = re_string_reconstruct (&mctx.input, match_first,
804146040Stjr					       eflags);
805146040Stjr		  if (BE (err != REG_NOERROR, 0))
806146040Stjr		    goto free_return;
807146040Stjr
808146040Stjr		  offset = match_first - mctx.input.raw_mbs_idx;
809146040Stjr		}
810146040Stjr	      /* If MATCH_FIRST is out of the buffer, leave it as '\0'.
811146040Stjr		 Note that MATCH_FIRST must not be smaller than 0.  */
812146040Stjr	      ch = (match_first >= length
813146040Stjr		    ? 0 : re_string_byte_at (&mctx.input, offset));
814146040Stjr	      if (fastmap[ch])
815146040Stjr		break;
816146040Stjr	      match_first += incr;
817146040Stjr	      if (match_first < left_lim || match_first > right_lim)
818250724Sjkim		{
819250724Sjkim		  err = REG_NOMATCH;
820250724Sjkim		  goto free_return;
821250724Sjkim		}
822146040Stjr	    }
823146040Stjr	  break;
824146040Stjr	}
825146040Stjr
826146040Stjr      /* Reconstruct the buffers so that the matcher can assume that
827146040Stjr	 the matching starts from the beginning of the buffer.  */
828146040Stjr      err = re_string_reconstruct (&mctx.input, match_first, eflags);
829146040Stjr      if (BE (err != REG_NOERROR, 0))
830146040Stjr	goto free_return;
831146040Stjr
832146040Stjr#ifdef RE_ENABLE_I18N
833146040Stjr     /* Don't consider this char as a possible match start if it part,
834146040Stjr	yet isn't the head, of a multibyte character.  */
835146040Stjr      if (!sb && !re_string_first_byte (&mctx.input, 0))
836146040Stjr	continue;
837146040Stjr#endif
838146040Stjr
839146040Stjr      /* It seems to be appropriate one, then use the matcher.  */
840146040Stjr      /* We assume that the matching starts from 0.  */
841146040Stjr      mctx.state_log_top = mctx.nbkref_ents = mctx.max_mb_elem_len = 0;
842146040Stjr      match_last = check_matching (&mctx, fl_longest_match,
843146040Stjr				   range >= 0 ? &match_first : NULL);
844146040Stjr      if (match_last != -1)
845146040Stjr	{
846146040Stjr	  if (BE (match_last == -2, 0))
847146040Stjr	    {
848146040Stjr	      err = REG_ESPACE;
849146040Stjr	      goto free_return;
850146040Stjr	    }
851146040Stjr	  else
852146040Stjr	    {
853146040Stjr	      mctx.match_last = match_last;
854146040Stjr	      if ((!preg->no_sub && nmatch > 1) || dfa->nbackref)
855146040Stjr		{
856146040Stjr		  re_dfastate_t *pstate = mctx.state_log[match_last];
857146040Stjr		  mctx.last_node = check_halt_state_context (&mctx, pstate,
858146040Stjr							     match_last);
859146040Stjr		}
860146040Stjr	      if ((!preg->no_sub && nmatch > 1 && dfa->has_plural_match)
861146040Stjr		  || dfa->nbackref)
862146040Stjr		{
863146040Stjr		  err = prune_impossible_nodes (&mctx);
864146040Stjr		  if (err == REG_NOERROR)
865146040Stjr		    break;
866146040Stjr		  if (BE (err != REG_NOMATCH, 0))
867146040Stjr		    goto free_return;
868146040Stjr		  match_last = -1;
869146040Stjr		}
870146040Stjr	      else
871146040Stjr		break; /* We found a match.  */
872146040Stjr	    }
873146040Stjr	}
874146040Stjr
875146040Stjr      match_ctx_clean (&mctx);
876146040Stjr    }
877146040Stjr
878146040Stjr#ifdef DEBUG
879146040Stjr  assert (match_last != -1);
880146040Stjr  assert (err == REG_NOERROR);
881146040Stjr#endif
882146040Stjr
883146040Stjr  /* Set pmatch[] if we need.  */
884146040Stjr  if (nmatch > 0)
885146040Stjr    {
886146040Stjr      int reg_idx;
887146040Stjr
888146040Stjr      /* Initialize registers.  */
889146040Stjr      for (reg_idx = 1; reg_idx < nmatch; ++reg_idx)
890146040Stjr	pmatch[reg_idx].rm_so = pmatch[reg_idx].rm_eo = -1;
891146040Stjr
892146040Stjr      /* Set the points where matching start/end.  */
893146040Stjr      pmatch[0].rm_so = 0;
894146040Stjr      pmatch[0].rm_eo = mctx.match_last;
895146040Stjr
896146040Stjr      if (!preg->no_sub && nmatch > 1)
897146040Stjr	{
898146040Stjr	  err = set_regs (preg, &mctx, nmatch, pmatch,
899146040Stjr			  dfa->has_plural_match && dfa->nbackref > 0);
900146040Stjr	  if (BE (err != REG_NOERROR, 0))
901146040Stjr	    goto free_return;
902146040Stjr	}
903146040Stjr
904146040Stjr      /* At last, add the offset to the each registers, since we slided
905146040Stjr	 the buffers so that we could assume that the matching starts
906146040Stjr	 from 0.  */
907146040Stjr      for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
908146040Stjr	if (pmatch[reg_idx].rm_so != -1)
909146040Stjr	  {
910146040Stjr#ifdef RE_ENABLE_I18N
911146040Stjr	    if (BE (mctx.input.offsets_needed != 0, 0))
912146040Stjr	      {
913250724Sjkim		pmatch[reg_idx].rm_so =
914250724Sjkim		  (pmatch[reg_idx].rm_so == mctx.input.valid_len
915250724Sjkim		   ? mctx.input.valid_raw_len
916250724Sjkim		   : mctx.input.offsets[pmatch[reg_idx].rm_so]);
917250724Sjkim		pmatch[reg_idx].rm_eo =
918250724Sjkim		  (pmatch[reg_idx].rm_eo == mctx.input.valid_len
919250724Sjkim		   ? mctx.input.valid_raw_len
920250724Sjkim		   : mctx.input.offsets[pmatch[reg_idx].rm_eo]);
921146040Stjr	      }
922146040Stjr#else
923146040Stjr	    assert (mctx.input.offsets_needed == 0);
924146040Stjr#endif
925146040Stjr	    pmatch[reg_idx].rm_so += match_first;
926146040Stjr	    pmatch[reg_idx].rm_eo += match_first;
927146040Stjr	  }
928146040Stjr      for (reg_idx = 0; reg_idx < extra_nmatch; ++reg_idx)
929146040Stjr	{
930146040Stjr	  pmatch[nmatch + reg_idx].rm_so = -1;
931146040Stjr	  pmatch[nmatch + reg_idx].rm_eo = -1;
932146040Stjr	}
933146040Stjr
934146040Stjr      if (dfa->subexp_map)
935250724Sjkim	for (reg_idx = 0; reg_idx + 1 < nmatch; reg_idx++)
936250724Sjkim	  if (dfa->subexp_map[reg_idx] != reg_idx)
937250724Sjkim	    {
938250724Sjkim	      pmatch[reg_idx + 1].rm_so
939250724Sjkim		= pmatch[dfa->subexp_map[reg_idx] + 1].rm_so;
940250724Sjkim	      pmatch[reg_idx + 1].rm_eo
941250724Sjkim		= pmatch[dfa->subexp_map[reg_idx] + 1].rm_eo;
942250724Sjkim	    }
943146040Stjr    }
944146040Stjr
945146040Stjr free_return:
946146040Stjr  re_free (mctx.state_log);
947146040Stjr  if (dfa->nbackref)
948146040Stjr    match_ctx_free (&mctx);
949146040Stjr  re_string_destruct (&mctx.input);
950146040Stjr  return err;
951146040Stjr}
952146040Stjr
953146040Stjrstatic reg_errcode_t
954250724Sjkim__attribute_warn_unused_result__
955146040Stjrprune_impossible_nodes (mctx)
956146040Stjr     re_match_context_t *mctx;
957146040Stjr{
958250724Sjkim  const re_dfa_t *const dfa = mctx->dfa;
959146040Stjr  int halt_node, match_last;
960146040Stjr  reg_errcode_t ret;
961146040Stjr  re_dfastate_t **sifted_states;
962146040Stjr  re_dfastate_t **lim_states = NULL;
963146040Stjr  re_sift_context_t sctx;
964146040Stjr#ifdef DEBUG
965146040Stjr  assert (mctx->state_log != NULL);
966146040Stjr#endif
967146040Stjr  match_last = mctx->match_last;
968146040Stjr  halt_node = mctx->last_node;
969250724Sjkim
970250724Sjkim  /* Avoid overflow.  */
971250724Sjkim  if (BE (SIZE_MAX / sizeof (re_dfastate_t *) <= match_last, 0))
972250724Sjkim    return REG_ESPACE;
973250724Sjkim
974146040Stjr  sifted_states = re_malloc (re_dfastate_t *, match_last + 1);
975146040Stjr  if (BE (sifted_states == NULL, 0))
976146040Stjr    {
977146040Stjr      ret = REG_ESPACE;
978146040Stjr      goto free_return;
979146040Stjr    }
980146040Stjr  if (dfa->nbackref)
981146040Stjr    {
982146040Stjr      lim_states = re_malloc (re_dfastate_t *, match_last + 1);
983146040Stjr      if (BE (lim_states == NULL, 0))
984146040Stjr	{
985146040Stjr	  ret = REG_ESPACE;
986146040Stjr	  goto free_return;
987146040Stjr	}
988146040Stjr      while (1)
989146040Stjr	{
990146040Stjr	  memset (lim_states, '\0',
991146040Stjr		  sizeof (re_dfastate_t *) * (match_last + 1));
992146040Stjr	  sift_ctx_init (&sctx, sifted_states, lim_states, halt_node,
993146040Stjr			 match_last);
994146040Stjr	  ret = sift_states_backward (mctx, &sctx);
995146040Stjr	  re_node_set_free (&sctx.limits);
996146040Stjr	  if (BE (ret != REG_NOERROR, 0))
997146040Stjr	      goto free_return;
998146040Stjr	  if (sifted_states[0] != NULL || lim_states[0] != NULL)
999146040Stjr	    break;
1000146040Stjr	  do
1001146040Stjr	    {
1002146040Stjr	      --match_last;
1003146040Stjr	      if (match_last < 0)
1004146040Stjr		{
1005146040Stjr		  ret = REG_NOMATCH;
1006146040Stjr		  goto free_return;
1007146040Stjr		}
1008146040Stjr	    } while (mctx->state_log[match_last] == NULL
1009146040Stjr		     || !mctx->state_log[match_last]->halt);
1010146040Stjr	  halt_node = check_halt_state_context (mctx,
1011146040Stjr						mctx->state_log[match_last],
1012146040Stjr						match_last);
1013146040Stjr	}
1014146040Stjr      ret = merge_state_array (dfa, sifted_states, lim_states,
1015146040Stjr			       match_last + 1);
1016146040Stjr      re_free (lim_states);
1017146040Stjr      lim_states = NULL;
1018146040Stjr      if (BE (ret != REG_NOERROR, 0))
1019146040Stjr	goto free_return;
1020146040Stjr    }
1021146040Stjr  else
1022146040Stjr    {
1023146040Stjr      sift_ctx_init (&sctx, sifted_states, lim_states, halt_node, match_last);
1024146040Stjr      ret = sift_states_backward (mctx, &sctx);
1025146040Stjr      re_node_set_free (&sctx.limits);
1026146040Stjr      if (BE (ret != REG_NOERROR, 0))
1027146040Stjr	goto free_return;
1028250724Sjkim      if (sifted_states[0] == NULL)
1029250724Sjkim	{
1030250724Sjkim	  ret = REG_NOMATCH;
1031250724Sjkim	  goto free_return;
1032250724Sjkim	}
1033146040Stjr    }
1034146040Stjr  re_free (mctx->state_log);
1035146040Stjr  mctx->state_log = sifted_states;
1036146040Stjr  sifted_states = NULL;
1037146040Stjr  mctx->last_node = halt_node;
1038146040Stjr  mctx->match_last = match_last;
1039146040Stjr  ret = REG_NOERROR;
1040146040Stjr free_return:
1041146040Stjr  re_free (sifted_states);
1042146040Stjr  re_free (lim_states);
1043146040Stjr  return ret;
1044146040Stjr}
1045146040Stjr
1046146040Stjr/* Acquire an initial state and return it.
1047146040Stjr   We must select appropriate initial state depending on the context,
1048146040Stjr   since initial states may have constraints like "\<", "^", etc..  */
1049146040Stjr
1050146040Stjrstatic inline re_dfastate_t *
1051250724Sjkim__attribute ((always_inline)) internal_function
1052250724Sjkimacquire_init_state_context (reg_errcode_t *err, const re_match_context_t *mctx,
1053250724Sjkim			    int idx)
1054146040Stjr{
1055250724Sjkim  const re_dfa_t *const dfa = mctx->dfa;
1056146040Stjr  if (dfa->init_state->has_constraint)
1057146040Stjr    {
1058146040Stjr      unsigned int context;
1059146040Stjr      context = re_string_context_at (&mctx->input, idx - 1, mctx->eflags);
1060146040Stjr      if (IS_WORD_CONTEXT (context))
1061146040Stjr	return dfa->init_state_word;
1062146040Stjr      else if (IS_ORDINARY_CONTEXT (context))
1063146040Stjr	return dfa->init_state;
1064146040Stjr      else if (IS_BEGBUF_CONTEXT (context) && IS_NEWLINE_CONTEXT (context))
1065146040Stjr	return dfa->init_state_begbuf;
1066146040Stjr      else if (IS_NEWLINE_CONTEXT (context))
1067146040Stjr	return dfa->init_state_nl;
1068146040Stjr      else if (IS_BEGBUF_CONTEXT (context))
1069146040Stjr	{
1070146040Stjr	  /* It is relatively rare case, then calculate on demand.  */
1071146040Stjr	  return re_acquire_state_context (err, dfa,
1072146040Stjr					   dfa->init_state->entrance_nodes,
1073146040Stjr					   context);
1074146040Stjr	}
1075146040Stjr      else
1076146040Stjr	/* Must not happen?  */
1077146040Stjr	return dfa->init_state;
1078146040Stjr    }
1079146040Stjr  else
1080146040Stjr    return dfa->init_state;
1081146040Stjr}
1082146040Stjr
1083146040Stjr/* Check whether the regular expression match input string INPUT or not,
1084146040Stjr   and return the index where the matching end, return -1 if not match,
1085146040Stjr   or return -2 in case of an error.
1086146040Stjr   FL_LONGEST_MATCH means we want the POSIX longest matching.
1087146040Stjr   If P_MATCH_FIRST is not NULL, and the match fails, it is set to the
1088146040Stjr   next place where we may want to try matching.
1089146040Stjr   Note that the matcher assume that the maching starts from the current
1090146040Stjr   index of the buffer.  */
1091146040Stjr
1092146040Stjrstatic int
1093250724Sjkiminternal_function __attribute_warn_unused_result__
1094250724Sjkimcheck_matching (re_match_context_t *mctx, int fl_longest_match,
1095250724Sjkim		int *p_match_first)
1096146040Stjr{
1097250724Sjkim  const re_dfa_t *const dfa = mctx->dfa;
1098146040Stjr  reg_errcode_t err;
1099146040Stjr  int match = 0;
1100146040Stjr  int match_last = -1;
1101146040Stjr  int cur_str_idx = re_string_cur_idx (&mctx->input);
1102146040Stjr  re_dfastate_t *cur_state;
1103146040Stjr  int at_init_state = p_match_first != NULL;
1104146040Stjr  int next_start_idx = cur_str_idx;
1105146040Stjr
1106146040Stjr  err = REG_NOERROR;
1107146040Stjr  cur_state = acquire_init_state_context (&err, mctx, cur_str_idx);
1108146040Stjr  /* An initial state must not be NULL (invalid).  */
1109146040Stjr  if (BE (cur_state == NULL, 0))
1110146040Stjr    {
1111146040Stjr      assert (err == REG_ESPACE);
1112146040Stjr      return -2;
1113146040Stjr    }
1114146040Stjr
1115146040Stjr  if (mctx->state_log != NULL)
1116146040Stjr    {
1117146040Stjr      mctx->state_log[cur_str_idx] = cur_state;
1118146040Stjr
1119146040Stjr      /* Check OP_OPEN_SUBEXP in the initial state in case that we use them
1120146040Stjr	 later.  E.g. Processing back references.  */
1121146040Stjr      if (BE (dfa->nbackref, 0))
1122146040Stjr	{
1123146040Stjr	  at_init_state = 0;
1124146040Stjr	  err = check_subexp_matching_top (mctx, &cur_state->nodes, 0);
1125146040Stjr	  if (BE (err != REG_NOERROR, 0))
1126146040Stjr	    return err;
1127146040Stjr
1128146040Stjr	  if (cur_state->has_backref)
1129146040Stjr	    {
1130146040Stjr	      err = transit_state_bkref (mctx, &cur_state->nodes);
1131146040Stjr	      if (BE (err != REG_NOERROR, 0))
1132250724Sjkim		return err;
1133146040Stjr	    }
1134146040Stjr	}
1135146040Stjr    }
1136146040Stjr
1137146040Stjr  /* If the RE accepts NULL string.  */
1138146040Stjr  if (BE (cur_state->halt, 0))
1139146040Stjr    {
1140146040Stjr      if (!cur_state->has_constraint
1141146040Stjr	  || check_halt_state_context (mctx, cur_state, cur_str_idx))
1142146040Stjr	{
1143146040Stjr	  if (!fl_longest_match)
1144146040Stjr	    return cur_str_idx;
1145146040Stjr	  else
1146146040Stjr	    {
1147146040Stjr	      match_last = cur_str_idx;
1148146040Stjr	      match = 1;
1149146040Stjr	    }
1150146040Stjr	}
1151146040Stjr    }
1152146040Stjr
1153146040Stjr  while (!re_string_eoi (&mctx->input))
1154146040Stjr    {
1155146040Stjr      re_dfastate_t *old_state = cur_state;
1156146040Stjr      int next_char_idx = re_string_cur_idx (&mctx->input) + 1;
1157146040Stjr
1158250724Sjkim      if ((BE (next_char_idx >= mctx->input.bufs_len, 0)
1159250724Sjkim	   && mctx->input.bufs_len < mctx->input.len)
1160250724Sjkim	  || (BE (next_char_idx >= mctx->input.valid_len, 0)
1161250724Sjkim	      && mctx->input.valid_len < mctx->input.len))
1162250724Sjkim	{
1163251435Sjkim	  err = extend_buffers (mctx, next_char_idx + 1);
1164250724Sjkim	  if (BE (err != REG_NOERROR, 0))
1165146040Stjr	    {
1166146040Stjr	      assert (err == REG_ESPACE);
1167146040Stjr	      return -2;
1168146040Stjr	    }
1169250724Sjkim	}
1170146040Stjr
1171146040Stjr      cur_state = transit_state (&err, mctx, cur_state);
1172146040Stjr      if (mctx->state_log != NULL)
1173146040Stjr	cur_state = merge_state_with_log (&err, mctx, cur_state);
1174146040Stjr
1175146040Stjr      if (cur_state == NULL)
1176146040Stjr	{
1177146040Stjr	  /* Reached the invalid state or an error.  Try to recover a valid
1178146040Stjr	     state using the state log, if available and if we have not
1179146040Stjr	     already found a valid (even if not the longest) match.  */
1180146040Stjr	  if (BE (err != REG_NOERROR, 0))
1181146040Stjr	    return -2;
1182146040Stjr
1183146040Stjr	  if (mctx->state_log == NULL
1184146040Stjr	      || (match && !fl_longest_match)
1185146040Stjr	      || (cur_state = find_recover_state (&err, mctx)) == NULL)
1186146040Stjr	    break;
1187146040Stjr	}
1188146040Stjr
1189146040Stjr      if (BE (at_init_state, 0))
1190146040Stjr	{
1191146040Stjr	  if (old_state == cur_state)
1192146040Stjr	    next_start_idx = next_char_idx;
1193146040Stjr	  else
1194146040Stjr	    at_init_state = 0;
1195146040Stjr	}
1196146040Stjr
1197146040Stjr      if (cur_state->halt)
1198146040Stjr	{
1199146040Stjr	  /* Reached a halt state.
1200146040Stjr	     Check the halt state can satisfy the current context.  */
1201146040Stjr	  if (!cur_state->has_constraint
1202146040Stjr	      || check_halt_state_context (mctx, cur_state,
1203146040Stjr					   re_string_cur_idx (&mctx->input)))
1204146040Stjr	    {
1205146040Stjr	      /* We found an appropriate halt state.  */
1206146040Stjr	      match_last = re_string_cur_idx (&mctx->input);
1207146040Stjr	      match = 1;
1208146040Stjr
1209146040Stjr	      /* We found a match, do not modify match_first below.  */
1210146040Stjr	      p_match_first = NULL;
1211146040Stjr	      if (!fl_longest_match)
1212146040Stjr		break;
1213146040Stjr	    }
1214146040Stjr	}
1215146040Stjr    }
1216146040Stjr
1217146040Stjr  if (p_match_first)
1218146040Stjr    *p_match_first += next_start_idx;
1219146040Stjr
1220146040Stjr  return match_last;
1221146040Stjr}
1222146040Stjr
1223146040Stjr/* Check NODE match the current context.  */
1224146040Stjr
1225250724Sjkimstatic int
1226250724Sjkiminternal_function
1227250724Sjkimcheck_halt_node_context (const re_dfa_t *dfa, int node, unsigned int context)
1228146040Stjr{
1229146040Stjr  re_token_type_t type = dfa->nodes[node].type;
1230146040Stjr  unsigned int constraint = dfa->nodes[node].constraint;
1231146040Stjr  if (type != END_OF_RE)
1232146040Stjr    return 0;
1233146040Stjr  if (!constraint)
1234146040Stjr    return 1;
1235146040Stjr  if (NOT_SATISFY_NEXT_CONSTRAINT (constraint, context))
1236146040Stjr    return 0;
1237146040Stjr  return 1;
1238146040Stjr}
1239146040Stjr
1240146040Stjr/* Check the halt state STATE match the current context.
1241146040Stjr   Return 0 if not match, if the node, STATE has, is a halt node and
1242146040Stjr   match the context, return the node.  */
1243146040Stjr
1244146040Stjrstatic int
1245250724Sjkiminternal_function
1246250724Sjkimcheck_halt_state_context (const re_match_context_t *mctx,
1247250724Sjkim			  const re_dfastate_t *state, int idx)
1248146040Stjr{
1249146040Stjr  int i;
1250146040Stjr  unsigned int context;
1251146040Stjr#ifdef DEBUG
1252146040Stjr  assert (state->halt);
1253146040Stjr#endif
1254146040Stjr  context = re_string_context_at (&mctx->input, idx, mctx->eflags);
1255146040Stjr  for (i = 0; i < state->nodes.nelem; ++i)
1256146040Stjr    if (check_halt_node_context (mctx->dfa, state->nodes.elems[i], context))
1257146040Stjr      return state->nodes.elems[i];
1258146040Stjr  return 0;
1259146040Stjr}
1260146040Stjr
1261146040Stjr/* Compute the next node to which "NFA" transit from NODE("NFA" is a NFA
1262146040Stjr   corresponding to the DFA).
1263146040Stjr   Return the destination node, and update EPS_VIA_NODES, return -1 in case
1264146040Stjr   of errors.  */
1265146040Stjr
1266146040Stjrstatic int
1267250724Sjkiminternal_function
1268250724Sjkimproceed_next_node (const re_match_context_t *mctx, int nregs, regmatch_t *regs,
1269250724Sjkim		   int *pidx, int node, re_node_set *eps_via_nodes,
1270250724Sjkim		   struct re_fail_stack_t *fs)
1271146040Stjr{
1272250724Sjkim  const re_dfa_t *const dfa = mctx->dfa;
1273250724Sjkim  int i, err;
1274146040Stjr  if (IS_EPSILON_NODE (dfa->nodes[node].type))
1275146040Stjr    {
1276146040Stjr      re_node_set *cur_nodes = &mctx->state_log[*pidx]->nodes;
1277146040Stjr      re_node_set *edests = &dfa->edests[node];
1278146040Stjr      int dest_node;
1279146040Stjr      err = re_node_set_insert (eps_via_nodes, node);
1280146040Stjr      if (BE (err < 0, 0))
1281146040Stjr	return -2;
1282146040Stjr      /* Pick up a valid destination, or return -1 if none is found.  */
1283146040Stjr      for (dest_node = -1, i = 0; i < edests->nelem; ++i)
1284146040Stjr	{
1285146040Stjr	  int candidate = edests->elems[i];
1286146040Stjr	  if (!re_node_set_contains (cur_nodes, candidate))
1287146040Stjr	    continue;
1288250724Sjkim	  if (dest_node == -1)
1289146040Stjr	    dest_node = candidate;
1290146040Stjr
1291250724Sjkim	  else
1292146040Stjr	    {
1293146040Stjr	      /* In order to avoid infinite loop like "(a*)*", return the second
1294250724Sjkim		 epsilon-transition if the first was already considered.  */
1295146040Stjr	      if (re_node_set_contains (eps_via_nodes, dest_node))
1296250724Sjkim		return candidate;
1297146040Stjr
1298146040Stjr	      /* Otherwise, push the second epsilon-transition on the fail stack.  */
1299146040Stjr	      else if (fs != NULL
1300146040Stjr		       && push_fail_stack (fs, *pidx, candidate, nregs, regs,
1301250724Sjkim					   eps_via_nodes))
1302146040Stjr		return -2;
1303146040Stjr
1304146040Stjr	      /* We know we are going to exit.  */
1305146040Stjr	      break;
1306146040Stjr	    }
1307146040Stjr	}
1308146040Stjr      return dest_node;
1309146040Stjr    }
1310146040Stjr  else
1311146040Stjr    {
1312146040Stjr      int naccepted = 0;
1313146040Stjr      re_token_type_t type = dfa->nodes[node].type;
1314146040Stjr
1315146040Stjr#ifdef RE_ENABLE_I18N
1316146040Stjr      if (dfa->nodes[node].accept_mb)
1317146040Stjr	naccepted = check_node_accept_bytes (dfa, node, &mctx->input, *pidx);
1318146040Stjr      else
1319146040Stjr#endif /* RE_ENABLE_I18N */
1320146040Stjr      if (type == OP_BACK_REF)
1321146040Stjr	{
1322146040Stjr	  int subexp_idx = dfa->nodes[node].opr.idx + 1;
1323146040Stjr	  naccepted = regs[subexp_idx].rm_eo - regs[subexp_idx].rm_so;
1324146040Stjr	  if (fs != NULL)
1325146040Stjr	    {
1326146040Stjr	      if (regs[subexp_idx].rm_so == -1 || regs[subexp_idx].rm_eo == -1)
1327146040Stjr		return -1;
1328146040Stjr	      else if (naccepted)
1329146040Stjr		{
1330146040Stjr		  char *buf = (char *) re_string_get_buffer (&mctx->input);
1331146040Stjr		  if (memcmp (buf + regs[subexp_idx].rm_so, buf + *pidx,
1332146040Stjr			      naccepted) != 0)
1333146040Stjr		    return -1;
1334146040Stjr		}
1335146040Stjr	    }
1336146040Stjr
1337146040Stjr	  if (naccepted == 0)
1338146040Stjr	    {
1339250724Sjkim	      int dest_node;
1340146040Stjr	      err = re_node_set_insert (eps_via_nodes, node);
1341146040Stjr	      if (BE (err < 0, 0))
1342146040Stjr		return -2;
1343146040Stjr	      dest_node = dfa->edests[node].elems[0];
1344146040Stjr	      if (re_node_set_contains (&mctx->state_log[*pidx]->nodes,
1345146040Stjr					dest_node))
1346146040Stjr		return dest_node;
1347146040Stjr	    }
1348146040Stjr	}
1349146040Stjr
1350146040Stjr      if (naccepted != 0
1351146040Stjr	  || check_node_accept (mctx, dfa->nodes + node, *pidx))
1352146040Stjr	{
1353250724Sjkim	  int dest_node = dfa->nexts[node];
1354146040Stjr	  *pidx = (naccepted == 0) ? *pidx + 1 : *pidx + naccepted;
1355146040Stjr	  if (fs && (*pidx > mctx->match_last || mctx->state_log[*pidx] == NULL
1356146040Stjr		     || !re_node_set_contains (&mctx->state_log[*pidx]->nodes,
1357146040Stjr					       dest_node)))
1358146040Stjr	    return -1;
1359146040Stjr	  re_node_set_empty (eps_via_nodes);
1360146040Stjr	  return dest_node;
1361146040Stjr	}
1362146040Stjr    }
1363146040Stjr  return -1;
1364146040Stjr}
1365146040Stjr
1366146040Stjrstatic reg_errcode_t
1367250724Sjkiminternal_function __attribute_warn_unused_result__
1368250724Sjkimpush_fail_stack (struct re_fail_stack_t *fs, int str_idx, int dest_node,
1369250724Sjkim		 int nregs, regmatch_t *regs, re_node_set *eps_via_nodes)
1370146040Stjr{
1371146040Stjr  reg_errcode_t err;
1372146040Stjr  int num = fs->num++;
1373146040Stjr  if (fs->num == fs->alloc)
1374146040Stjr    {
1375146040Stjr      struct re_fail_stack_ent_t *new_array;
1376146040Stjr      new_array = realloc (fs->stack, (sizeof (struct re_fail_stack_ent_t)
1377146040Stjr				       * fs->alloc * 2));
1378146040Stjr      if (new_array == NULL)
1379146040Stjr	return REG_ESPACE;
1380146040Stjr      fs->alloc *= 2;
1381146040Stjr      fs->stack = new_array;
1382146040Stjr    }
1383146040Stjr  fs->stack[num].idx = str_idx;
1384146040Stjr  fs->stack[num].node = dest_node;
1385146040Stjr  fs->stack[num].regs = re_malloc (regmatch_t, nregs);
1386146040Stjr  if (fs->stack[num].regs == NULL)
1387146040Stjr    return REG_ESPACE;
1388146040Stjr  memcpy (fs->stack[num].regs, regs, sizeof (regmatch_t) * nregs);
1389146040Stjr  err = re_node_set_init_copy (&fs->stack[num].eps_via_nodes, eps_via_nodes);
1390146040Stjr  return err;
1391146040Stjr}
1392146040Stjr
1393146040Stjrstatic int
1394250724Sjkiminternal_function
1395250724Sjkimpop_fail_stack (struct re_fail_stack_t *fs, int *pidx, int nregs,
1396250724Sjkim		regmatch_t *regs, re_node_set *eps_via_nodes)
1397146040Stjr{
1398146040Stjr  int num = --fs->num;
1399146040Stjr  assert (num >= 0);
1400146040Stjr  *pidx = fs->stack[num].idx;
1401146040Stjr  memcpy (regs, fs->stack[num].regs, sizeof (regmatch_t) * nregs);
1402146040Stjr  re_node_set_free (eps_via_nodes);
1403146040Stjr  re_free (fs->stack[num].regs);
1404146040Stjr  *eps_via_nodes = fs->stack[num].eps_via_nodes;
1405146040Stjr  return fs->stack[num].node;
1406146040Stjr}
1407146040Stjr
1408146040Stjr/* Set the positions where the subexpressions are starts/ends to registers
1409146040Stjr   PMATCH.
1410146040Stjr   Note: We assume that pmatch[0] is already set, and
1411146040Stjr   pmatch[i].rm_so == pmatch[i].rm_eo == -1 for 0 < i < nmatch.  */
1412146040Stjr
1413146040Stjrstatic reg_errcode_t
1414250724Sjkiminternal_function __attribute_warn_unused_result__
1415250724Sjkimset_regs (const regex_t *preg, const re_match_context_t *mctx, size_t nmatch,
1416250724Sjkim	  regmatch_t *pmatch, int fl_backtrack)
1417146040Stjr{
1418250724Sjkim  const re_dfa_t *dfa = (const re_dfa_t *) preg->buffer;
1419146040Stjr  int idx, cur_node;
1420146040Stjr  re_node_set eps_via_nodes;
1421146040Stjr  struct re_fail_stack_t *fs;
1422146040Stjr  struct re_fail_stack_t fs_body = { 0, 2, NULL };
1423146040Stjr  regmatch_t *prev_idx_match;
1424250724Sjkim  int prev_idx_match_malloced = 0;
1425146040Stjr
1426146040Stjr#ifdef DEBUG
1427146040Stjr  assert (nmatch > 1);
1428146040Stjr  assert (mctx->state_log != NULL);
1429146040Stjr#endif
1430146040Stjr  if (fl_backtrack)
1431146040Stjr    {
1432146040Stjr      fs = &fs_body;
1433146040Stjr      fs->stack = re_malloc (struct re_fail_stack_ent_t, fs->alloc);
1434146040Stjr      if (fs->stack == NULL)
1435146040Stjr	return REG_ESPACE;
1436146040Stjr    }
1437146040Stjr  else
1438146040Stjr    fs = NULL;
1439146040Stjr
1440146040Stjr  cur_node = dfa->init_node;
1441146040Stjr  re_node_set_init_empty (&eps_via_nodes);
1442146040Stjr
1443250724Sjkim  if (__libc_use_alloca (nmatch * sizeof (regmatch_t)))
1444250724Sjkim    prev_idx_match = (regmatch_t *) alloca (nmatch * sizeof (regmatch_t));
1445250724Sjkim  else
1446250724Sjkim    {
1447250724Sjkim      prev_idx_match = re_malloc (regmatch_t, nmatch);
1448250724Sjkim      if (prev_idx_match == NULL)
1449250724Sjkim	{
1450250724Sjkim	  free_fail_stack_return (fs);
1451250724Sjkim	  return REG_ESPACE;
1452250724Sjkim	}
1453250724Sjkim      prev_idx_match_malloced = 1;
1454250724Sjkim    }
1455146040Stjr  memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch);
1456146040Stjr
1457146040Stjr  for (idx = pmatch[0].rm_so; idx <= pmatch[0].rm_eo ;)
1458146040Stjr    {
1459146040Stjr      update_regs (dfa, pmatch, prev_idx_match, cur_node, idx, nmatch);
1460146040Stjr
1461146040Stjr      if (idx == pmatch[0].rm_eo && cur_node == mctx->last_node)
1462146040Stjr	{
1463146040Stjr	  int reg_idx;
1464146040Stjr	  if (fs)
1465146040Stjr	    {
1466146040Stjr	      for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
1467146040Stjr		if (pmatch[reg_idx].rm_so > -1 && pmatch[reg_idx].rm_eo == -1)
1468146040Stjr		  break;
1469146040Stjr	      if (reg_idx == nmatch)
1470146040Stjr		{
1471146040Stjr		  re_node_set_free (&eps_via_nodes);
1472250724Sjkim		  if (prev_idx_match_malloced)
1473250724Sjkim		    re_free (prev_idx_match);
1474146040Stjr		  return free_fail_stack_return (fs);
1475146040Stjr		}
1476146040Stjr	      cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,
1477146040Stjr					 &eps_via_nodes);
1478146040Stjr	    }
1479146040Stjr	  else
1480146040Stjr	    {
1481146040Stjr	      re_node_set_free (&eps_via_nodes);
1482250724Sjkim	      if (prev_idx_match_malloced)
1483250724Sjkim		re_free (prev_idx_match);
1484146040Stjr	      return REG_NOERROR;
1485146040Stjr	    }
1486146040Stjr	}
1487146040Stjr
1488146040Stjr      /* Proceed to next node.  */
1489146040Stjr      cur_node = proceed_next_node (mctx, nmatch, pmatch, &idx, cur_node,
1490146040Stjr				    &eps_via_nodes, fs);
1491146040Stjr
1492146040Stjr      if (BE (cur_node < 0, 0))
1493146040Stjr	{
1494146040Stjr	  if (BE (cur_node == -2, 0))
1495146040Stjr	    {
1496146040Stjr	      re_node_set_free (&eps_via_nodes);
1497250724Sjkim	      if (prev_idx_match_malloced)
1498250724Sjkim		re_free (prev_idx_match);
1499146040Stjr	      free_fail_stack_return (fs);
1500146040Stjr	      return REG_ESPACE;
1501146040Stjr	    }
1502146040Stjr	  if (fs)
1503146040Stjr	    cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,
1504146040Stjr				       &eps_via_nodes);
1505146040Stjr	  else
1506146040Stjr	    {
1507146040Stjr	      re_node_set_free (&eps_via_nodes);
1508250724Sjkim	      if (prev_idx_match_malloced)
1509250724Sjkim		re_free (prev_idx_match);
1510146040Stjr	      return REG_NOMATCH;
1511146040Stjr	    }
1512146040Stjr	}
1513146040Stjr    }
1514146040Stjr  re_node_set_free (&eps_via_nodes);
1515250724Sjkim  if (prev_idx_match_malloced)
1516250724Sjkim    re_free (prev_idx_match);
1517146040Stjr  return free_fail_stack_return (fs);
1518146040Stjr}
1519146040Stjr
1520146040Stjrstatic reg_errcode_t
1521250724Sjkiminternal_function
1522250724Sjkimfree_fail_stack_return (struct re_fail_stack_t *fs)
1523146040Stjr{
1524146040Stjr  if (fs)
1525146040Stjr    {
1526146040Stjr      int fs_idx;
1527146040Stjr      for (fs_idx = 0; fs_idx < fs->num; ++fs_idx)
1528146040Stjr	{
1529146040Stjr	  re_node_set_free (&fs->stack[fs_idx].eps_via_nodes);
1530146040Stjr	  re_free (fs->stack[fs_idx].regs);
1531146040Stjr	}
1532146040Stjr      re_free (fs->stack);
1533146040Stjr    }
1534146040Stjr  return REG_NOERROR;
1535146040Stjr}
1536146040Stjr
1537146040Stjrstatic void
1538250724Sjkiminternal_function
1539250724Sjkimupdate_regs (const re_dfa_t *dfa, regmatch_t *pmatch,
1540250724Sjkim	     regmatch_t *prev_idx_match, int cur_node, int cur_idx, int nmatch)
1541146040Stjr{
1542146040Stjr  int type = dfa->nodes[cur_node].type;
1543146040Stjr  if (type == OP_OPEN_SUBEXP)
1544146040Stjr    {
1545146040Stjr      int reg_num = dfa->nodes[cur_node].opr.idx + 1;
1546146040Stjr
1547146040Stjr      /* We are at the first node of this sub expression.  */
1548146040Stjr      if (reg_num < nmatch)
1549146040Stjr	{
1550146040Stjr	  pmatch[reg_num].rm_so = cur_idx;
1551146040Stjr	  pmatch[reg_num].rm_eo = -1;
1552146040Stjr	}
1553146040Stjr    }
1554146040Stjr  else if (type == OP_CLOSE_SUBEXP)
1555146040Stjr    {
1556146040Stjr      int reg_num = dfa->nodes[cur_node].opr.idx + 1;
1557146040Stjr      if (reg_num < nmatch)
1558146040Stjr	{
1559146040Stjr	  /* We are at the last node of this sub expression.  */
1560146040Stjr	  if (pmatch[reg_num].rm_so < cur_idx)
1561146040Stjr	    {
1562146040Stjr	      pmatch[reg_num].rm_eo = cur_idx;
1563146040Stjr	      /* This is a non-empty match or we are not inside an optional
1564146040Stjr		 subexpression.  Accept this right away.  */
1565146040Stjr	      memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch);
1566146040Stjr	    }
1567146040Stjr	  else
1568146040Stjr	    {
1569146040Stjr	      if (dfa->nodes[cur_node].opt_subexp
1570146040Stjr		  && prev_idx_match[reg_num].rm_so != -1)
1571146040Stjr		/* We transited through an empty match for an optional
1572146040Stjr		   subexpression, like (a?)*, and this is not the subexp's
1573146040Stjr		   first match.  Copy back the old content of the registers
1574146040Stjr		   so that matches of an inner subexpression are undone as
1575146040Stjr		   well, like in ((a?))*.  */
1576146040Stjr		memcpy (pmatch, prev_idx_match, sizeof (regmatch_t) * nmatch);
1577146040Stjr	      else
1578146040Stjr		/* We completed a subexpression, but it may be part of
1579146040Stjr		   an optional one, so do not update PREV_IDX_MATCH.  */
1580146040Stjr		pmatch[reg_num].rm_eo = cur_idx;
1581146040Stjr	    }
1582146040Stjr	}
1583146040Stjr    }
1584146040Stjr}
1585146040Stjr
1586146040Stjr/* This function checks the STATE_LOG from the SCTX->last_str_idx to 0
1587146040Stjr   and sift the nodes in each states according to the following rules.
1588146040Stjr   Updated state_log will be wrote to STATE_LOG.
1589146040Stjr
1590146040Stjr   Rules: We throw away the Node `a' in the STATE_LOG[STR_IDX] if...
1591146040Stjr     1. When STR_IDX == MATCH_LAST(the last index in the state_log):
1592146040Stjr	If `a' isn't the LAST_NODE and `a' can't epsilon transit to
1593146040Stjr	the LAST_NODE, we throw away the node `a'.
1594146040Stjr     2. When 0 <= STR_IDX < MATCH_LAST and `a' accepts
1595146040Stjr	string `s' and transit to `b':
1596146040Stjr	i. If 'b' isn't in the STATE_LOG[STR_IDX+strlen('s')], we throw
1597146040Stjr	   away the node `a'.
1598146040Stjr	ii. If 'b' is in the STATE_LOG[STR_IDX+strlen('s')] but 'b' is
1599146040Stjr	    thrown away, we throw away the node `a'.
1600146040Stjr     3. When 0 <= STR_IDX < MATCH_LAST and 'a' epsilon transit to 'b':
1601146040Stjr	i. If 'b' isn't in the STATE_LOG[STR_IDX], we throw away the
1602146040Stjr	   node `a'.
1603146040Stjr	ii. If 'b' is in the STATE_LOG[STR_IDX] but 'b' is thrown away,
1604146040Stjr	    we throw away the node `a'.  */
1605146040Stjr
1606146040Stjr#define STATE_NODE_CONTAINS(state,node) \
1607146040Stjr  ((state) != NULL && re_node_set_contains (&(state)->nodes, node))
1608146040Stjr
1609146040Stjrstatic reg_errcode_t
1610250724Sjkiminternal_function
1611250724Sjkimsift_states_backward (const re_match_context_t *mctx, re_sift_context_t *sctx)
1612146040Stjr{
1613146040Stjr  reg_errcode_t err;
1614146040Stjr  int null_cnt = 0;
1615146040Stjr  int str_idx = sctx->last_str_idx;
1616146040Stjr  re_node_set cur_dest;
1617146040Stjr
1618146040Stjr#ifdef DEBUG
1619146040Stjr  assert (mctx->state_log != NULL && mctx->state_log[str_idx] != NULL);
1620146040Stjr#endif
1621146040Stjr
1622146040Stjr  /* Build sifted state_log[str_idx].  It has the nodes which can epsilon
1623146040Stjr     transit to the last_node and the last_node itself.  */
1624146040Stjr  err = re_node_set_init_1 (&cur_dest, sctx->last_node);
1625146040Stjr  if (BE (err != REG_NOERROR, 0))
1626146040Stjr    return err;
1627146040Stjr  err = update_cur_sifted_state (mctx, sctx, str_idx, &cur_dest);
1628146040Stjr  if (BE (err != REG_NOERROR, 0))
1629146040Stjr    goto free_return;
1630146040Stjr
1631146040Stjr  /* Then check each states in the state_log.  */
1632146040Stjr  while (str_idx > 0)
1633146040Stjr    {
1634146040Stjr      /* Update counters.  */
1635146040Stjr      null_cnt = (sctx->sifted_states[str_idx] == NULL) ? null_cnt + 1 : 0;
1636146040Stjr      if (null_cnt > mctx->max_mb_elem_len)
1637146040Stjr	{
1638146040Stjr	  memset (sctx->sifted_states, '\0',
1639146040Stjr		  sizeof (re_dfastate_t *) * str_idx);
1640146040Stjr	  re_node_set_free (&cur_dest);
1641146040Stjr	  return REG_NOERROR;
1642146040Stjr	}
1643146040Stjr      re_node_set_empty (&cur_dest);
1644146040Stjr      --str_idx;
1645146040Stjr
1646146040Stjr      if (mctx->state_log[str_idx])
1647146040Stjr	{
1648146040Stjr	  err = build_sifted_states (mctx, sctx, str_idx, &cur_dest);
1649250724Sjkim	  if (BE (err != REG_NOERROR, 0))
1650146040Stjr	    goto free_return;
1651146040Stjr	}
1652146040Stjr
1653146040Stjr      /* Add all the nodes which satisfy the following conditions:
1654146040Stjr	 - It can epsilon transit to a node in CUR_DEST.
1655146040Stjr	 - It is in CUR_SRC.
1656146040Stjr	 And update state_log.  */
1657146040Stjr      err = update_cur_sifted_state (mctx, sctx, str_idx, &cur_dest);
1658146040Stjr      if (BE (err != REG_NOERROR, 0))
1659146040Stjr	goto free_return;
1660146040Stjr    }
1661146040Stjr  err = REG_NOERROR;
1662146040Stjr free_return:
1663146040Stjr  re_node_set_free (&cur_dest);
1664146040Stjr  return err;
1665146040Stjr}
1666146040Stjr
1667146040Stjrstatic reg_errcode_t
1668250724Sjkiminternal_function __attribute_warn_unused_result__
1669250724Sjkimbuild_sifted_states (const re_match_context_t *mctx, re_sift_context_t *sctx,
1670250724Sjkim		     int str_idx, re_node_set *cur_dest)
1671146040Stjr{
1672250724Sjkim  const re_dfa_t *const dfa = mctx->dfa;
1673250724Sjkim  const re_node_set *cur_src = &mctx->state_log[str_idx]->non_eps_nodes;
1674146040Stjr  int i;
1675146040Stjr
1676146040Stjr  /* Then build the next sifted state.
1677146040Stjr     We build the next sifted state on `cur_dest', and update
1678146040Stjr     `sifted_states[str_idx]' with `cur_dest'.
1679146040Stjr     Note:
1680146040Stjr     `cur_dest' is the sifted state from `state_log[str_idx + 1]'.
1681146040Stjr     `cur_src' points the node_set of the old `state_log[str_idx]'
1682146040Stjr     (with the epsilon nodes pre-filtered out).  */
1683146040Stjr  for (i = 0; i < cur_src->nelem; i++)
1684146040Stjr    {
1685146040Stjr      int prev_node = cur_src->elems[i];
1686146040Stjr      int naccepted = 0;
1687146040Stjr      int ret;
1688146040Stjr
1689146040Stjr#ifdef DEBUG
1690146040Stjr      re_token_type_t type = dfa->nodes[prev_node].type;
1691146040Stjr      assert (!IS_EPSILON_NODE (type));
1692146040Stjr#endif
1693146040Stjr#ifdef RE_ENABLE_I18N
1694146040Stjr      /* If the node may accept `multi byte'.  */
1695146040Stjr      if (dfa->nodes[prev_node].accept_mb)
1696146040Stjr	naccepted = sift_states_iter_mb (mctx, sctx, prev_node,
1697146040Stjr					 str_idx, sctx->last_str_idx);
1698146040Stjr#endif /* RE_ENABLE_I18N */
1699146040Stjr
1700146040Stjr      /* We don't check backreferences here.
1701146040Stjr	 See update_cur_sifted_state().  */
1702146040Stjr      if (!naccepted
1703146040Stjr	  && check_node_accept (mctx, dfa->nodes + prev_node, str_idx)
1704146040Stjr	  && STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + 1],
1705146040Stjr				  dfa->nexts[prev_node]))
1706146040Stjr	naccepted = 1;
1707146040Stjr
1708146040Stjr      if (naccepted == 0)
1709146040Stjr	continue;
1710146040Stjr
1711146040Stjr      if (sctx->limits.nelem)
1712146040Stjr	{
1713146040Stjr	  int to_idx = str_idx + naccepted;
1714146040Stjr	  if (check_dst_limits (mctx, &sctx->limits,
1715146040Stjr				dfa->nexts[prev_node], to_idx,
1716146040Stjr				prev_node, str_idx))
1717146040Stjr	    continue;
1718146040Stjr	}
1719146040Stjr      ret = re_node_set_insert (cur_dest, prev_node);
1720146040Stjr      if (BE (ret == -1, 0))
1721146040Stjr	return REG_ESPACE;
1722146040Stjr    }
1723146040Stjr
1724146040Stjr  return REG_NOERROR;
1725146040Stjr}
1726146040Stjr
1727146040Stjr/* Helper functions.  */
1728146040Stjr
1729146040Stjrstatic reg_errcode_t
1730250724Sjkiminternal_function
1731250724Sjkimclean_state_log_if_needed (re_match_context_t *mctx, int next_state_log_idx)
1732146040Stjr{
1733146040Stjr  int top = mctx->state_log_top;
1734146040Stjr
1735250724Sjkim  if ((next_state_log_idx >= mctx->input.bufs_len
1736250724Sjkim       && mctx->input.bufs_len < mctx->input.len)
1737146040Stjr      || (next_state_log_idx >= mctx->input.valid_len
1738146040Stjr	  && mctx->input.valid_len < mctx->input.len))
1739146040Stjr    {
1740146040Stjr      reg_errcode_t err;
1741251435Sjkim      err = extend_buffers (mctx, next_state_log_idx + 1);
1742146040Stjr      if (BE (err != REG_NOERROR, 0))
1743146040Stjr	return err;
1744146040Stjr    }
1745146040Stjr
1746146040Stjr  if (top < next_state_log_idx)
1747146040Stjr    {
1748146040Stjr      memset (mctx->state_log + top + 1, '\0',
1749146040Stjr	      sizeof (re_dfastate_t *) * (next_state_log_idx - top));
1750146040Stjr      mctx->state_log_top = next_state_log_idx;
1751146040Stjr    }
1752146040Stjr  return REG_NOERROR;
1753146040Stjr}
1754146040Stjr
1755146040Stjrstatic reg_errcode_t
1756250724Sjkiminternal_function
1757250724Sjkimmerge_state_array (const re_dfa_t *dfa, re_dfastate_t **dst,
1758250724Sjkim		   re_dfastate_t **src, int num)
1759146040Stjr{
1760146040Stjr  int st_idx;
1761146040Stjr  reg_errcode_t err;
1762146040Stjr  for (st_idx = 0; st_idx < num; ++st_idx)
1763146040Stjr    {
1764146040Stjr      if (dst[st_idx] == NULL)
1765146040Stjr	dst[st_idx] = src[st_idx];
1766146040Stjr      else if (src[st_idx] != NULL)
1767146040Stjr	{
1768146040Stjr	  re_node_set merged_set;
1769146040Stjr	  err = re_node_set_init_union (&merged_set, &dst[st_idx]->nodes,
1770146040Stjr					&src[st_idx]->nodes);
1771146040Stjr	  if (BE (err != REG_NOERROR, 0))
1772146040Stjr	    return err;
1773146040Stjr	  dst[st_idx] = re_acquire_state (&err, dfa, &merged_set);
1774146040Stjr	  re_node_set_free (&merged_set);
1775146040Stjr	  if (BE (err != REG_NOERROR, 0))
1776146040Stjr	    return err;
1777146040Stjr	}
1778146040Stjr    }
1779146040Stjr  return REG_NOERROR;
1780146040Stjr}
1781146040Stjr
1782146040Stjrstatic reg_errcode_t
1783250724Sjkiminternal_function
1784250724Sjkimupdate_cur_sifted_state (const re_match_context_t *mctx,
1785250724Sjkim			 re_sift_context_t *sctx, int str_idx,
1786250724Sjkim			 re_node_set *dest_nodes)
1787146040Stjr{
1788250724Sjkim  const re_dfa_t *const dfa = mctx->dfa;
1789250724Sjkim  reg_errcode_t err = REG_NOERROR;
1790146040Stjr  const re_node_set *candidates;
1791146040Stjr  candidates = ((mctx->state_log[str_idx] == NULL) ? NULL
1792146040Stjr		: &mctx->state_log[str_idx]->nodes);
1793146040Stjr
1794146040Stjr  if (dest_nodes->nelem == 0)
1795146040Stjr    sctx->sifted_states[str_idx] = NULL;
1796146040Stjr  else
1797146040Stjr    {
1798146040Stjr      if (candidates)
1799146040Stjr	{
1800146040Stjr	  /* At first, add the nodes which can epsilon transit to a node in
1801146040Stjr	     DEST_NODE.  */
1802146040Stjr	  err = add_epsilon_src_nodes (dfa, dest_nodes, candidates);
1803146040Stjr	  if (BE (err != REG_NOERROR, 0))
1804146040Stjr	    return err;
1805146040Stjr
1806146040Stjr	  /* Then, check the limitations in the current sift_context.  */
1807146040Stjr	  if (sctx->limits.nelem)
1808146040Stjr	    {
1809146040Stjr	      err = check_subexp_limits (dfa, dest_nodes, candidates, &sctx->limits,
1810146040Stjr					 mctx->bkref_ents, str_idx);
1811146040Stjr	      if (BE (err != REG_NOERROR, 0))
1812146040Stjr		return err;
1813146040Stjr	    }
1814146040Stjr	}
1815146040Stjr
1816146040Stjr      sctx->sifted_states[str_idx] = re_acquire_state (&err, dfa, dest_nodes);
1817146040Stjr      if (BE (err != REG_NOERROR, 0))
1818146040Stjr	return err;
1819146040Stjr    }
1820146040Stjr
1821146040Stjr  if (candidates && mctx->state_log[str_idx]->has_backref)
1822146040Stjr    {
1823146040Stjr      err = sift_states_bkref (mctx, sctx, str_idx, candidates);
1824146040Stjr      if (BE (err != REG_NOERROR, 0))
1825146040Stjr	return err;
1826146040Stjr    }
1827146040Stjr  return REG_NOERROR;
1828146040Stjr}
1829146040Stjr
1830146040Stjrstatic reg_errcode_t
1831250724Sjkiminternal_function __attribute_warn_unused_result__
1832250724Sjkimadd_epsilon_src_nodes (const re_dfa_t *dfa, re_node_set *dest_nodes,
1833250724Sjkim		       const re_node_set *candidates)
1834146040Stjr{
1835146040Stjr  reg_errcode_t err = REG_NOERROR;
1836146040Stjr  int i;
1837146040Stjr
1838146040Stjr  re_dfastate_t *state = re_acquire_state (&err, dfa, dest_nodes);
1839146040Stjr  if (BE (err != REG_NOERROR, 0))
1840146040Stjr    return err;
1841146040Stjr
1842146040Stjr  if (!state->inveclosure.alloc)
1843146040Stjr    {
1844146040Stjr      err = re_node_set_alloc (&state->inveclosure, dest_nodes->nelem);
1845146040Stjr      if (BE (err != REG_NOERROR, 0))
1846250724Sjkim	return REG_ESPACE;
1847146040Stjr      for (i = 0; i < dest_nodes->nelem; i++)
1848250724Sjkim	{
1849250724Sjkim	  err = re_node_set_merge (&state->inveclosure,
1850250724Sjkim				   dfa->inveclosures + dest_nodes->elems[i]);
1851250724Sjkim	  if (BE (err != REG_NOERROR, 0))
1852250724Sjkim	    return REG_ESPACE;
1853250724Sjkim	}
1854146040Stjr    }
1855146040Stjr  return re_node_set_add_intersect (dest_nodes, candidates,
1856146040Stjr				    &state->inveclosure);
1857146040Stjr}
1858146040Stjr
1859146040Stjrstatic reg_errcode_t
1860250724Sjkiminternal_function
1861250724Sjkimsub_epsilon_src_nodes (const re_dfa_t *dfa, int node, re_node_set *dest_nodes,
1862250724Sjkim		       const re_node_set *candidates)
1863146040Stjr{
1864146040Stjr    int ecl_idx;
1865146040Stjr    reg_errcode_t err;
1866146040Stjr    re_node_set *inv_eclosure = dfa->inveclosures + node;
1867146040Stjr    re_node_set except_nodes;
1868146040Stjr    re_node_set_init_empty (&except_nodes);
1869146040Stjr    for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx)
1870146040Stjr      {
1871146040Stjr	int cur_node = inv_eclosure->elems[ecl_idx];
1872146040Stjr	if (cur_node == node)
1873146040Stjr	  continue;
1874146040Stjr	if (IS_EPSILON_NODE (dfa->nodes[cur_node].type))
1875146040Stjr	  {
1876146040Stjr	    int edst1 = dfa->edests[cur_node].elems[0];
1877146040Stjr	    int edst2 = ((dfa->edests[cur_node].nelem > 1)
1878146040Stjr			 ? dfa->edests[cur_node].elems[1] : -1);
1879146040Stjr	    if ((!re_node_set_contains (inv_eclosure, edst1)
1880146040Stjr		 && re_node_set_contains (dest_nodes, edst1))
1881146040Stjr		|| (edst2 > 0
1882146040Stjr		    && !re_node_set_contains (inv_eclosure, edst2)
1883146040Stjr		    && re_node_set_contains (dest_nodes, edst2)))
1884146040Stjr	      {
1885146040Stjr		err = re_node_set_add_intersect (&except_nodes, candidates,
1886146040Stjr						 dfa->inveclosures + cur_node);
1887146040Stjr		if (BE (err != REG_NOERROR, 0))
1888146040Stjr		  {
1889146040Stjr		    re_node_set_free (&except_nodes);
1890146040Stjr		    return err;
1891146040Stjr		  }
1892146040Stjr	      }
1893146040Stjr	  }
1894146040Stjr      }
1895146040Stjr    for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx)
1896146040Stjr      {
1897146040Stjr	int cur_node = inv_eclosure->elems[ecl_idx];
1898146040Stjr	if (!re_node_set_contains (&except_nodes, cur_node))
1899146040Stjr	  {
1900146040Stjr	    int idx = re_node_set_contains (dest_nodes, cur_node) - 1;
1901146040Stjr	    re_node_set_remove_at (dest_nodes, idx);
1902146040Stjr	  }
1903146040Stjr      }
1904146040Stjr    re_node_set_free (&except_nodes);
1905146040Stjr    return REG_NOERROR;
1906146040Stjr}
1907146040Stjr
1908146040Stjrstatic int
1909250724Sjkiminternal_function
1910250724Sjkimcheck_dst_limits (const re_match_context_t *mctx, re_node_set *limits,
1911250724Sjkim		  int dst_node, int dst_idx, int src_node, int src_idx)
1912146040Stjr{
1913250724Sjkim  const re_dfa_t *const dfa = mctx->dfa;
1914146040Stjr  int lim_idx, src_pos, dst_pos;
1915146040Stjr
1916146040Stjr  int dst_bkref_idx = search_cur_bkref_entry (mctx, dst_idx);
1917146040Stjr  int src_bkref_idx = search_cur_bkref_entry (mctx, src_idx);
1918146040Stjr  for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx)
1919146040Stjr    {
1920146040Stjr      int subexp_idx;
1921146040Stjr      struct re_backref_cache_entry *ent;
1922146040Stjr      ent = mctx->bkref_ents + limits->elems[lim_idx];
1923146040Stjr      subexp_idx = dfa->nodes[ent->node].opr.idx;
1924146040Stjr
1925146040Stjr      dst_pos = check_dst_limits_calc_pos (mctx, limits->elems[lim_idx],
1926146040Stjr					   subexp_idx, dst_node, dst_idx,
1927146040Stjr					   dst_bkref_idx);
1928146040Stjr      src_pos = check_dst_limits_calc_pos (mctx, limits->elems[lim_idx],
1929146040Stjr					   subexp_idx, src_node, src_idx,
1930146040Stjr					   src_bkref_idx);
1931146040Stjr
1932146040Stjr      /* In case of:
1933146040Stjr	 <src> <dst> ( <subexp> )
1934146040Stjr	 ( <subexp> ) <src> <dst>
1935146040Stjr	 ( <subexp1> <src> <subexp2> <dst> <subexp3> )  */
1936146040Stjr      if (src_pos == dst_pos)
1937146040Stjr	continue; /* This is unrelated limitation.  */
1938146040Stjr      else
1939146040Stjr	return 1;
1940146040Stjr    }
1941146040Stjr  return 0;
1942146040Stjr}
1943146040Stjr
1944146040Stjrstatic int
1945250724Sjkiminternal_function
1946250724Sjkimcheck_dst_limits_calc_pos_1 (const re_match_context_t *mctx, int boundaries,
1947250724Sjkim			     int subexp_idx, int from_node, int bkref_idx)
1948146040Stjr{
1949250724Sjkim  const re_dfa_t *const dfa = mctx->dfa;
1950250724Sjkim  const re_node_set *eclosures = dfa->eclosures + from_node;
1951146040Stjr  int node_idx;
1952146040Stjr
1953146040Stjr  /* Else, we are on the boundary: examine the nodes on the epsilon
1954146040Stjr     closure.  */
1955146040Stjr  for (node_idx = 0; node_idx < eclosures->nelem; ++node_idx)
1956146040Stjr    {
1957146040Stjr      int node = eclosures->elems[node_idx];
1958146040Stjr      switch (dfa->nodes[node].type)
1959146040Stjr	{
1960146040Stjr	case OP_BACK_REF:
1961146040Stjr	  if (bkref_idx != -1)
1962146040Stjr	    {
1963146040Stjr	      struct re_backref_cache_entry *ent = mctx->bkref_ents + bkref_idx;
1964146040Stjr	      do
1965250724Sjkim		{
1966146040Stjr		  int dst, cpos;
1967146040Stjr
1968146040Stjr		  if (ent->node != node)
1969146040Stjr		    continue;
1970146040Stjr
1971250724Sjkim		  if (subexp_idx < BITSET_WORD_BITS
1972250724Sjkim		      && !(ent->eps_reachable_subexps_map
1973250724Sjkim			   & ((bitset_word_t) 1 << subexp_idx)))
1974146040Stjr		    continue;
1975146040Stjr
1976146040Stjr		  /* Recurse trying to reach the OP_OPEN_SUBEXP and
1977146040Stjr		     OP_CLOSE_SUBEXP cases below.  But, if the
1978146040Stjr		     destination node is the same node as the source
1979146040Stjr		     node, don't recurse because it would cause an
1980146040Stjr		     infinite loop: a regex that exhibits this behavior
1981146040Stjr		     is ()\1*\1*  */
1982146040Stjr		  dst = dfa->edests[node].elems[0];
1983146040Stjr		  if (dst == from_node)
1984146040Stjr		    {
1985146040Stjr		      if (boundaries & 1)
1986250724Sjkim			return -1;
1987146040Stjr		      else /* if (boundaries & 2) */
1988250724Sjkim			return 0;
1989146040Stjr		    }
1990146040Stjr
1991146040Stjr		  cpos =
1992146040Stjr		    check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx,
1993146040Stjr						 dst, bkref_idx);
1994146040Stjr		  if (cpos == -1 /* && (boundaries & 1) */)
1995146040Stjr		    return -1;
1996146040Stjr		  if (cpos == 0 && (boundaries & 2))
1997146040Stjr		    return 0;
1998146040Stjr
1999250724Sjkim		  if (subexp_idx < BITSET_WORD_BITS)
2000250724Sjkim		    ent->eps_reachable_subexps_map
2001250724Sjkim		      &= ~((bitset_word_t) 1 << subexp_idx);
2002250724Sjkim		}
2003146040Stjr	      while (ent++->more);
2004146040Stjr	    }
2005146040Stjr	  break;
2006146040Stjr
2007146040Stjr	case OP_OPEN_SUBEXP:
2008146040Stjr	  if ((boundaries & 1) && subexp_idx == dfa->nodes[node].opr.idx)
2009146040Stjr	    return -1;
2010146040Stjr	  break;
2011146040Stjr
2012146040Stjr	case OP_CLOSE_SUBEXP:
2013146040Stjr	  if ((boundaries & 2) && subexp_idx == dfa->nodes[node].opr.idx)
2014146040Stjr	    return 0;
2015146040Stjr	  break;
2016146040Stjr
2017146040Stjr	default:
2018146040Stjr	    break;
2019146040Stjr	}
2020146040Stjr    }
2021146040Stjr
2022146040Stjr  return (boundaries & 2) ? 1 : 0;
2023146040Stjr}
2024146040Stjr
2025146040Stjrstatic int
2026250724Sjkiminternal_function
2027250724Sjkimcheck_dst_limits_calc_pos (const re_match_context_t *mctx, int limit,
2028250724Sjkim			   int subexp_idx, int from_node, int str_idx,
2029250724Sjkim			   int bkref_idx)
2030146040Stjr{
2031146040Stjr  struct re_backref_cache_entry *lim = mctx->bkref_ents + limit;
2032146040Stjr  int boundaries;
2033146040Stjr
2034146040Stjr  /* If we are outside the range of the subexpression, return -1 or 1.  */
2035146040Stjr  if (str_idx < lim->subexp_from)
2036146040Stjr    return -1;
2037146040Stjr
2038146040Stjr  if (lim->subexp_to < str_idx)
2039146040Stjr    return 1;
2040146040Stjr
2041146040Stjr  /* If we are within the subexpression, return 0.  */
2042146040Stjr  boundaries = (str_idx == lim->subexp_from);
2043146040Stjr  boundaries |= (str_idx == lim->subexp_to) << 1;
2044146040Stjr  if (boundaries == 0)
2045146040Stjr    return 0;
2046146040Stjr
2047146040Stjr  /* Else, examine epsilon closure.  */
2048146040Stjr  return check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx,
2049146040Stjr				      from_node, bkref_idx);
2050146040Stjr}
2051146040Stjr
2052146040Stjr/* Check the limitations of sub expressions LIMITS, and remove the nodes
2053146040Stjr   which are against limitations from DEST_NODES. */
2054146040Stjr
2055146040Stjrstatic reg_errcode_t
2056250724Sjkiminternal_function
2057250724Sjkimcheck_subexp_limits (const re_dfa_t *dfa, re_node_set *dest_nodes,
2058250724Sjkim		     const re_node_set *candidates, re_node_set *limits,
2059250724Sjkim		     struct re_backref_cache_entry *bkref_ents, int str_idx)
2060146040Stjr{
2061146040Stjr  reg_errcode_t err;
2062146040Stjr  int node_idx, lim_idx;
2063146040Stjr
2064146040Stjr  for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx)
2065146040Stjr    {
2066146040Stjr      int subexp_idx;
2067146040Stjr      struct re_backref_cache_entry *ent;
2068146040Stjr      ent = bkref_ents + limits->elems[lim_idx];
2069146040Stjr
2070146040Stjr      if (str_idx <= ent->subexp_from || ent->str_idx < str_idx)
2071146040Stjr	continue; /* This is unrelated limitation.  */
2072146040Stjr
2073146040Stjr      subexp_idx = dfa->nodes[ent->node].opr.idx;
2074146040Stjr      if (ent->subexp_to == str_idx)
2075146040Stjr	{
2076146040Stjr	  int ops_node = -1;
2077146040Stjr	  int cls_node = -1;
2078146040Stjr	  for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
2079146040Stjr	    {
2080146040Stjr	      int node = dest_nodes->elems[node_idx];
2081146040Stjr	      re_token_type_t type = dfa->nodes[node].type;
2082146040Stjr	      if (type == OP_OPEN_SUBEXP
2083146040Stjr		  && subexp_idx == dfa->nodes[node].opr.idx)
2084146040Stjr		ops_node = node;
2085146040Stjr	      else if (type == OP_CLOSE_SUBEXP
2086146040Stjr		       && subexp_idx == dfa->nodes[node].opr.idx)
2087146040Stjr		cls_node = node;
2088146040Stjr	    }
2089146040Stjr
2090146040Stjr	  /* Check the limitation of the open subexpression.  */
2091146040Stjr	  /* Note that (ent->subexp_to = str_idx != ent->subexp_from).  */
2092146040Stjr	  if (ops_node >= 0)
2093146040Stjr	    {
2094146040Stjr	      err = sub_epsilon_src_nodes (dfa, ops_node, dest_nodes,
2095146040Stjr					   candidates);
2096146040Stjr	      if (BE (err != REG_NOERROR, 0))
2097146040Stjr		return err;
2098146040Stjr	    }
2099146040Stjr
2100146040Stjr	  /* Check the limitation of the close subexpression.  */
2101146040Stjr	  if (cls_node >= 0)
2102146040Stjr	    for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
2103146040Stjr	      {
2104146040Stjr		int node = dest_nodes->elems[node_idx];
2105146040Stjr		if (!re_node_set_contains (dfa->inveclosures + node,
2106146040Stjr					   cls_node)
2107146040Stjr		    && !re_node_set_contains (dfa->eclosures + node,
2108146040Stjr					      cls_node))
2109146040Stjr		  {
2110146040Stjr		    /* It is against this limitation.
2111146040Stjr		       Remove it form the current sifted state.  */
2112146040Stjr		    err = sub_epsilon_src_nodes (dfa, node, dest_nodes,
2113146040Stjr						 candidates);
2114146040Stjr		    if (BE (err != REG_NOERROR, 0))
2115146040Stjr		      return err;
2116146040Stjr		    --node_idx;
2117146040Stjr		  }
2118146040Stjr	      }
2119146040Stjr	}
2120146040Stjr      else /* (ent->subexp_to != str_idx)  */
2121146040Stjr	{
2122146040Stjr	  for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
2123146040Stjr	    {
2124146040Stjr	      int node = dest_nodes->elems[node_idx];
2125146040Stjr	      re_token_type_t type = dfa->nodes[node].type;
2126146040Stjr	      if (type == OP_CLOSE_SUBEXP || type == OP_OPEN_SUBEXP)
2127146040Stjr		{
2128146040Stjr		  if (subexp_idx != dfa->nodes[node].opr.idx)
2129146040Stjr		    continue;
2130146040Stjr		  /* It is against this limitation.
2131146040Stjr		     Remove it form the current sifted state.  */
2132146040Stjr		  err = sub_epsilon_src_nodes (dfa, node, dest_nodes,
2133146040Stjr					       candidates);
2134146040Stjr		  if (BE (err != REG_NOERROR, 0))
2135146040Stjr		    return err;
2136146040Stjr		}
2137146040Stjr	    }
2138146040Stjr	}
2139146040Stjr    }
2140146040Stjr  return REG_NOERROR;
2141146040Stjr}
2142146040Stjr
2143146040Stjrstatic reg_errcode_t
2144250724Sjkiminternal_function __attribute_warn_unused_result__
2145250724Sjkimsift_states_bkref (const re_match_context_t *mctx, re_sift_context_t *sctx,
2146250724Sjkim		   int str_idx, const re_node_set *candidates)
2147146040Stjr{
2148250724Sjkim  const re_dfa_t *const dfa = mctx->dfa;
2149146040Stjr  reg_errcode_t err;
2150146040Stjr  int node_idx, node;
2151146040Stjr  re_sift_context_t local_sctx;
2152146040Stjr  int first_idx = search_cur_bkref_entry (mctx, str_idx);
2153146040Stjr
2154146040Stjr  if (first_idx == -1)
2155146040Stjr    return REG_NOERROR;
2156146040Stjr
2157146040Stjr  local_sctx.sifted_states = NULL; /* Mark that it hasn't been initialized.  */
2158146040Stjr
2159146040Stjr  for (node_idx = 0; node_idx < candidates->nelem; ++node_idx)
2160146040Stjr    {
2161146040Stjr      int enabled_idx;
2162146040Stjr      re_token_type_t type;
2163146040Stjr      struct re_backref_cache_entry *entry;
2164146040Stjr      node = candidates->elems[node_idx];
2165146040Stjr      type = dfa->nodes[node].type;
2166146040Stjr      /* Avoid infinite loop for the REs like "()\1+".  */
2167146040Stjr      if (node == sctx->last_node && str_idx == sctx->last_str_idx)
2168146040Stjr	continue;
2169146040Stjr      if (type != OP_BACK_REF)
2170146040Stjr	continue;
2171146040Stjr
2172146040Stjr      entry = mctx->bkref_ents + first_idx;
2173146040Stjr      enabled_idx = first_idx;
2174146040Stjr      do
2175146040Stjr	{
2176250724Sjkim	  int subexp_len;
2177250724Sjkim	  int to_idx;
2178250724Sjkim	  int dst_node;
2179250724Sjkim	  int ret;
2180146040Stjr	  re_dfastate_t *cur_state;
2181146040Stjr
2182146040Stjr	  if (entry->node != node)
2183146040Stjr	    continue;
2184146040Stjr	  subexp_len = entry->subexp_to - entry->subexp_from;
2185146040Stjr	  to_idx = str_idx + subexp_len;
2186146040Stjr	  dst_node = (subexp_len ? dfa->nexts[node]
2187146040Stjr		      : dfa->edests[node].elems[0]);
2188146040Stjr
2189146040Stjr	  if (to_idx > sctx->last_str_idx
2190146040Stjr	      || sctx->sifted_states[to_idx] == NULL
2191146040Stjr	      || !STATE_NODE_CONTAINS (sctx->sifted_states[to_idx], dst_node)
2192146040Stjr	      || check_dst_limits (mctx, &sctx->limits, node,
2193146040Stjr				   str_idx, dst_node, to_idx))
2194146040Stjr	    continue;
2195146040Stjr
2196146040Stjr	  if (local_sctx.sifted_states == NULL)
2197146040Stjr	    {
2198146040Stjr	      local_sctx = *sctx;
2199146040Stjr	      err = re_node_set_init_copy (&local_sctx.limits, &sctx->limits);
2200146040Stjr	      if (BE (err != REG_NOERROR, 0))
2201146040Stjr		goto free_return;
2202146040Stjr	    }
2203146040Stjr	  local_sctx.last_node = node;
2204146040Stjr	  local_sctx.last_str_idx = str_idx;
2205250724Sjkim	  ret = re_node_set_insert (&local_sctx.limits, enabled_idx);
2206250724Sjkim	  if (BE (ret < 0, 0))
2207146040Stjr	    {
2208146040Stjr	      err = REG_ESPACE;
2209146040Stjr	      goto free_return;
2210146040Stjr	    }
2211146040Stjr	  cur_state = local_sctx.sifted_states[str_idx];
2212146040Stjr	  err = sift_states_backward (mctx, &local_sctx);
2213146040Stjr	  if (BE (err != REG_NOERROR, 0))
2214146040Stjr	    goto free_return;
2215146040Stjr	  if (sctx->limited_states != NULL)
2216146040Stjr	    {
2217146040Stjr	      err = merge_state_array (dfa, sctx->limited_states,
2218146040Stjr				       local_sctx.sifted_states,
2219146040Stjr				       str_idx + 1);
2220146040Stjr	      if (BE (err != REG_NOERROR, 0))
2221146040Stjr		goto free_return;
2222146040Stjr	    }
2223146040Stjr	  local_sctx.sifted_states[str_idx] = cur_state;
2224146040Stjr	  re_node_set_remove (&local_sctx.limits, enabled_idx);
2225146040Stjr
2226146040Stjr	  /* mctx->bkref_ents may have changed, reload the pointer.  */
2227250724Sjkim	  entry = mctx->bkref_ents + enabled_idx;
2228146040Stjr	}
2229146040Stjr      while (enabled_idx++, entry++->more);
2230146040Stjr    }
2231146040Stjr  err = REG_NOERROR;
2232146040Stjr free_return:
2233146040Stjr  if (local_sctx.sifted_states != NULL)
2234146040Stjr    {
2235146040Stjr      re_node_set_free (&local_sctx.limits);
2236146040Stjr    }
2237146040Stjr
2238146040Stjr  return err;
2239146040Stjr}
2240146040Stjr
2241146040Stjr
2242146040Stjr#ifdef RE_ENABLE_I18N
2243146040Stjrstatic int
2244250724Sjkiminternal_function
2245250724Sjkimsift_states_iter_mb (const re_match_context_t *mctx, re_sift_context_t *sctx,
2246250724Sjkim		     int node_idx, int str_idx, int max_str_idx)
2247146040Stjr{
2248250724Sjkim  const re_dfa_t *const dfa = mctx->dfa;
2249146040Stjr  int naccepted;
2250146040Stjr  /* Check the node can accept `multi byte'.  */
2251146040Stjr  naccepted = check_node_accept_bytes (dfa, node_idx, &mctx->input, str_idx);
2252146040Stjr  if (naccepted > 0 && str_idx + naccepted <= max_str_idx &&
2253146040Stjr      !STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + naccepted],
2254146040Stjr			    dfa->nexts[node_idx]))
2255146040Stjr    /* The node can't accept the `multi byte', or the
2256146040Stjr       destination was already thrown away, then the node
2257146040Stjr       could't accept the current input `multi byte'.   */
2258146040Stjr    naccepted = 0;
2259146040Stjr  /* Otherwise, it is sure that the node could accept
2260146040Stjr     `naccepted' bytes input.  */
2261146040Stjr  return naccepted;
2262146040Stjr}
2263146040Stjr#endif /* RE_ENABLE_I18N */
2264146040Stjr
2265146040Stjr
2266146040Stjr/* Functions for state transition.  */
2267146040Stjr
2268146040Stjr/* Return the next state to which the current state STATE will transit by
2269146040Stjr   accepting the current input byte, and update STATE_LOG if necessary.
2270146040Stjr   If STATE can accept a multibyte char/collating element/back reference
2271146040Stjr   update the destination of STATE_LOG.  */
2272146040Stjr
2273146040Stjrstatic re_dfastate_t *
2274250724Sjkiminternal_function __attribute_warn_unused_result__
2275250724Sjkimtransit_state (reg_errcode_t *err, re_match_context_t *mctx,
2276250724Sjkim	       re_dfastate_t *state)
2277146040Stjr{
2278146040Stjr  re_dfastate_t **trtable;
2279146040Stjr  unsigned char ch;
2280146040Stjr
2281146040Stjr#ifdef RE_ENABLE_I18N
2282146040Stjr  /* If the current state can accept multibyte.  */
2283146040Stjr  if (BE (state->accept_mb, 0))
2284146040Stjr    {
2285146040Stjr      *err = transit_state_mb (mctx, state);
2286146040Stjr      if (BE (*err != REG_NOERROR, 0))
2287146040Stjr	return NULL;
2288146040Stjr    }
2289146040Stjr#endif /* RE_ENABLE_I18N */
2290146040Stjr
2291146040Stjr  /* Then decide the next state with the single byte.  */
2292146040Stjr#if 0
2293146040Stjr  if (0)
2294146040Stjr    /* don't use transition table  */
2295146040Stjr    return transit_state_sb (err, mctx, state);
2296146040Stjr#endif
2297146040Stjr
2298146040Stjr  /* Use transition table  */
2299146040Stjr  ch = re_string_fetch_byte (&mctx->input);
2300146040Stjr  for (;;)
2301146040Stjr    {
2302146040Stjr      trtable = state->trtable;
2303146040Stjr      if (BE (trtable != NULL, 1))
2304146040Stjr	return trtable[ch];
2305146040Stjr
2306146040Stjr      trtable = state->word_trtable;
2307146040Stjr      if (BE (trtable != NULL, 1))
2308250724Sjkim	{
2309146040Stjr	  unsigned int context;
2310146040Stjr	  context
2311146040Stjr	    = re_string_context_at (&mctx->input,
2312146040Stjr				    re_string_cur_idx (&mctx->input) - 1,
2313146040Stjr				    mctx->eflags);
2314146040Stjr	  if (IS_WORD_CONTEXT (context))
2315146040Stjr	    return trtable[ch + SBC_MAX];
2316146040Stjr	  else
2317146040Stjr	    return trtable[ch];
2318146040Stjr	}
2319146040Stjr
2320146040Stjr      if (!build_trtable (mctx->dfa, state))
2321146040Stjr	{
2322146040Stjr	  *err = REG_ESPACE;
2323146040Stjr	  return NULL;
2324146040Stjr	}
2325146040Stjr
2326146040Stjr      /* Retry, we now have a transition table.  */
2327146040Stjr    }
2328146040Stjr}
2329146040Stjr
2330146040Stjr/* Update the state_log if we need */
2331146040Stjrre_dfastate_t *
2332250724Sjkiminternal_function
2333250724Sjkimmerge_state_with_log (reg_errcode_t *err, re_match_context_t *mctx,
2334250724Sjkim		      re_dfastate_t *next_state)
2335146040Stjr{
2336250724Sjkim  const re_dfa_t *const dfa = mctx->dfa;
2337146040Stjr  int cur_idx = re_string_cur_idx (&mctx->input);
2338146040Stjr
2339146040Stjr  if (cur_idx > mctx->state_log_top)
2340146040Stjr    {
2341146040Stjr      mctx->state_log[cur_idx] = next_state;
2342146040Stjr      mctx->state_log_top = cur_idx;
2343146040Stjr    }
2344146040Stjr  else if (mctx->state_log[cur_idx] == 0)
2345146040Stjr    {
2346146040Stjr      mctx->state_log[cur_idx] = next_state;
2347146040Stjr    }
2348146040Stjr  else
2349146040Stjr    {
2350146040Stjr      re_dfastate_t *pstate;
2351146040Stjr      unsigned int context;
2352146040Stjr      re_node_set next_nodes, *log_nodes, *table_nodes = NULL;
2353146040Stjr      /* If (state_log[cur_idx] != 0), it implies that cur_idx is
2354250724Sjkim	 the destination of a multibyte char/collating element/
2355250724Sjkim	 back reference.  Then the next state is the union set of
2356250724Sjkim	 these destinations and the results of the transition table.  */
2357146040Stjr      pstate = mctx->state_log[cur_idx];
2358146040Stjr      log_nodes = pstate->entrance_nodes;
2359146040Stjr      if (next_state != NULL)
2360250724Sjkim	{
2361250724Sjkim	  table_nodes = next_state->entrance_nodes;
2362250724Sjkim	  *err = re_node_set_init_union (&next_nodes, table_nodes,
2363146040Stjr					     log_nodes);
2364250724Sjkim	  if (BE (*err != REG_NOERROR, 0))
2365146040Stjr	    return NULL;
2366250724Sjkim	}
2367146040Stjr      else
2368250724Sjkim	next_nodes = *log_nodes;
2369146040Stjr      /* Note: We already add the nodes of the initial state,
2370146040Stjr	 then we don't need to add them here.  */
2371146040Stjr
2372146040Stjr      context = re_string_context_at (&mctx->input,
2373146040Stjr				      re_string_cur_idx (&mctx->input) - 1,
2374146040Stjr				      mctx->eflags);
2375146040Stjr      next_state = mctx->state_log[cur_idx]
2376250724Sjkim	= re_acquire_state_context (err, dfa, &next_nodes, context);
2377146040Stjr      /* We don't need to check errors here, since the return value of
2378250724Sjkim	 this function is next_state and ERR is already set.  */
2379146040Stjr
2380146040Stjr      if (table_nodes != NULL)
2381250724Sjkim	re_node_set_free (&next_nodes);
2382146040Stjr    }
2383146040Stjr
2384146040Stjr  if (BE (dfa->nbackref, 0) && next_state != NULL)
2385146040Stjr    {
2386146040Stjr      /* Check OP_OPEN_SUBEXP in the current state in case that we use them
2387146040Stjr	 later.  We must check them here, since the back references in the
2388146040Stjr	 next state might use them.  */
2389146040Stjr      *err = check_subexp_matching_top (mctx, &next_state->nodes,
2390146040Stjr					cur_idx);
2391146040Stjr      if (BE (*err != REG_NOERROR, 0))
2392146040Stjr	return NULL;
2393146040Stjr
2394146040Stjr      /* If the next state has back references.  */
2395146040Stjr      if (next_state->has_backref)
2396146040Stjr	{
2397146040Stjr	  *err = transit_state_bkref (mctx, &next_state->nodes);
2398146040Stjr	  if (BE (*err != REG_NOERROR, 0))
2399146040Stjr	    return NULL;
2400146040Stjr	  next_state = mctx->state_log[cur_idx];
2401146040Stjr	}
2402146040Stjr    }
2403146040Stjr
2404146040Stjr  return next_state;
2405146040Stjr}
2406146040Stjr
2407146040Stjr/* Skip bytes in the input that correspond to part of a
2408146040Stjr   multi-byte match, then look in the log for a state
2409146040Stjr   from which to restart matching.  */
2410146040Stjrre_dfastate_t *
2411250724Sjkiminternal_function
2412250724Sjkimfind_recover_state (reg_errcode_t *err, re_match_context_t *mctx)
2413146040Stjr{
2414250724Sjkim  re_dfastate_t *cur_state;
2415146040Stjr  do
2416146040Stjr    {
2417146040Stjr      int max = mctx->state_log_top;
2418146040Stjr      int cur_str_idx = re_string_cur_idx (&mctx->input);
2419146040Stjr
2420146040Stjr      do
2421146040Stjr	{
2422250724Sjkim	  if (++cur_str_idx > max)
2423250724Sjkim	    return NULL;
2424250724Sjkim	  re_string_skip_bytes (&mctx->input, 1);
2425146040Stjr	}
2426146040Stjr      while (mctx->state_log[cur_str_idx] == NULL);
2427146040Stjr
2428146040Stjr      cur_state = merge_state_with_log (err, mctx, NULL);
2429146040Stjr    }
2430250724Sjkim  while (*err == REG_NOERROR && cur_state == NULL);
2431146040Stjr  return cur_state;
2432146040Stjr}
2433146040Stjr
2434146040Stjr/* Helper functions for transit_state.  */
2435146040Stjr
2436146040Stjr/* From the node set CUR_NODES, pick up the nodes whose types are
2437146040Stjr   OP_OPEN_SUBEXP and which have corresponding back references in the regular
2438146040Stjr   expression. And register them to use them later for evaluating the
2439146040Stjr   correspoding back references.  */
2440146040Stjr
2441146040Stjrstatic reg_errcode_t
2442250724Sjkiminternal_function
2443250724Sjkimcheck_subexp_matching_top (re_match_context_t *mctx, re_node_set *cur_nodes,
2444250724Sjkim			   int str_idx)
2445146040Stjr{
2446250724Sjkim  const re_dfa_t *const dfa = mctx->dfa;
2447146040Stjr  int node_idx;
2448146040Stjr  reg_errcode_t err;
2449146040Stjr
2450146040Stjr  /* TODO: This isn't efficient.
2451146040Stjr	   Because there might be more than one nodes whose types are
2452146040Stjr	   OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all
2453146040Stjr	   nodes.
2454146040Stjr	   E.g. RE: (a){2}  */
2455146040Stjr  for (node_idx = 0; node_idx < cur_nodes->nelem; ++node_idx)
2456146040Stjr    {
2457146040Stjr      int node = cur_nodes->elems[node_idx];
2458146040Stjr      if (dfa->nodes[node].type == OP_OPEN_SUBEXP
2459250724Sjkim	  && dfa->nodes[node].opr.idx < BITSET_WORD_BITS
2460250724Sjkim	  && (dfa->used_bkref_map
2461250724Sjkim	      & ((bitset_word_t) 1 << dfa->nodes[node].opr.idx)))
2462146040Stjr	{
2463146040Stjr	  err = match_ctx_add_subtop (mctx, node, str_idx);
2464146040Stjr	  if (BE (err != REG_NOERROR, 0))
2465146040Stjr	    return err;
2466146040Stjr	}
2467146040Stjr    }
2468146040Stjr  return REG_NOERROR;
2469146040Stjr}
2470146040Stjr
2471146040Stjr#if 0
2472146040Stjr/* Return the next state to which the current state STATE will transit by
2473146040Stjr   accepting the current input byte.  */
2474146040Stjr
2475146040Stjrstatic re_dfastate_t *
2476250724Sjkimtransit_state_sb (reg_errcode_t *err, re_match_context_t *mctx,
2477250724Sjkim		  re_dfastate_t *state)
2478146040Stjr{
2479250724Sjkim  const re_dfa_t *const dfa = mctx->dfa;
2480146040Stjr  re_node_set next_nodes;
2481146040Stjr  re_dfastate_t *next_state;
2482146040Stjr  int node_cnt, cur_str_idx = re_string_cur_idx (&mctx->input);
2483146040Stjr  unsigned int context;
2484146040Stjr
2485146040Stjr  *err = re_node_set_alloc (&next_nodes, state->nodes.nelem + 1);
2486146040Stjr  if (BE (*err != REG_NOERROR, 0))
2487146040Stjr    return NULL;
2488146040Stjr  for (node_cnt = 0; node_cnt < state->nodes.nelem; ++node_cnt)
2489146040Stjr    {
2490146040Stjr      int cur_node = state->nodes.elems[node_cnt];
2491146040Stjr      if (check_node_accept (mctx, dfa->nodes + cur_node, cur_str_idx))
2492146040Stjr	{
2493146040Stjr	  *err = re_node_set_merge (&next_nodes,
2494146040Stjr				    dfa->eclosures + dfa->nexts[cur_node]);
2495146040Stjr	  if (BE (*err != REG_NOERROR, 0))
2496146040Stjr	    {
2497146040Stjr	      re_node_set_free (&next_nodes);
2498146040Stjr	      return NULL;
2499146040Stjr	    }
2500146040Stjr	}
2501146040Stjr    }
2502146040Stjr  context = re_string_context_at (&mctx->input, cur_str_idx, mctx->eflags);
2503146040Stjr  next_state = re_acquire_state_context (err, dfa, &next_nodes, context);
2504146040Stjr  /* We don't need to check errors here, since the return value of
2505146040Stjr     this function is next_state and ERR is already set.  */
2506146040Stjr
2507146040Stjr  re_node_set_free (&next_nodes);
2508146040Stjr  re_string_skip_bytes (&mctx->input, 1);
2509146040Stjr  return next_state;
2510146040Stjr}
2511146040Stjr#endif
2512146040Stjr
2513146040Stjr#ifdef RE_ENABLE_I18N
2514146040Stjrstatic reg_errcode_t
2515250724Sjkiminternal_function
2516250724Sjkimtransit_state_mb (re_match_context_t *mctx, re_dfastate_t *pstate)
2517146040Stjr{
2518250724Sjkim  const re_dfa_t *const dfa = mctx->dfa;
2519146040Stjr  reg_errcode_t err;
2520146040Stjr  int i;
2521146040Stjr
2522146040Stjr  for (i = 0; i < pstate->nodes.nelem; ++i)
2523146040Stjr    {
2524146040Stjr      re_node_set dest_nodes, *new_nodes;
2525146040Stjr      int cur_node_idx = pstate->nodes.elems[i];
2526146040Stjr      int naccepted, dest_idx;
2527146040Stjr      unsigned int context;
2528146040Stjr      re_dfastate_t *dest_state;
2529146040Stjr
2530146040Stjr      if (!dfa->nodes[cur_node_idx].accept_mb)
2531250724Sjkim	continue;
2532146040Stjr
2533146040Stjr      if (dfa->nodes[cur_node_idx].constraint)
2534146040Stjr	{
2535146040Stjr	  context = re_string_context_at (&mctx->input,
2536146040Stjr					  re_string_cur_idx (&mctx->input),
2537146040Stjr					  mctx->eflags);
2538146040Stjr	  if (NOT_SATISFY_NEXT_CONSTRAINT (dfa->nodes[cur_node_idx].constraint,
2539146040Stjr					   context))
2540146040Stjr	    continue;
2541146040Stjr	}
2542146040Stjr
2543146040Stjr      /* How many bytes the node can accept?  */
2544146040Stjr      naccepted = check_node_accept_bytes (dfa, cur_node_idx, &mctx->input,
2545146040Stjr					   re_string_cur_idx (&mctx->input));
2546146040Stjr      if (naccepted == 0)
2547146040Stjr	continue;
2548146040Stjr
2549146040Stjr      /* The node can accepts `naccepted' bytes.  */
2550146040Stjr      dest_idx = re_string_cur_idx (&mctx->input) + naccepted;
2551146040Stjr      mctx->max_mb_elem_len = ((mctx->max_mb_elem_len < naccepted) ? naccepted
2552146040Stjr			       : mctx->max_mb_elem_len);
2553146040Stjr      err = clean_state_log_if_needed (mctx, dest_idx);
2554146040Stjr      if (BE (err != REG_NOERROR, 0))
2555146040Stjr	return err;
2556146040Stjr#ifdef DEBUG
2557146040Stjr      assert (dfa->nexts[cur_node_idx] != -1);
2558146040Stjr#endif
2559146040Stjr      new_nodes = dfa->eclosures + dfa->nexts[cur_node_idx];
2560146040Stjr
2561146040Stjr      dest_state = mctx->state_log[dest_idx];
2562146040Stjr      if (dest_state == NULL)
2563146040Stjr	dest_nodes = *new_nodes;
2564146040Stjr      else
2565146040Stjr	{
2566146040Stjr	  err = re_node_set_init_union (&dest_nodes,
2567146040Stjr					dest_state->entrance_nodes, new_nodes);
2568146040Stjr	  if (BE (err != REG_NOERROR, 0))
2569146040Stjr	    return err;
2570146040Stjr	}
2571250724Sjkim      context = re_string_context_at (&mctx->input, dest_idx - 1,
2572250724Sjkim				      mctx->eflags);
2573146040Stjr      mctx->state_log[dest_idx]
2574146040Stjr	= re_acquire_state_context (&err, dfa, &dest_nodes, context);
2575146040Stjr      if (dest_state != NULL)
2576146040Stjr	re_node_set_free (&dest_nodes);
2577146040Stjr      if (BE (mctx->state_log[dest_idx] == NULL && err != REG_NOERROR, 0))
2578146040Stjr	return err;
2579146040Stjr    }
2580146040Stjr  return REG_NOERROR;
2581146040Stjr}
2582146040Stjr#endif /* RE_ENABLE_I18N */
2583146040Stjr
2584146040Stjrstatic reg_errcode_t
2585250724Sjkiminternal_function
2586250724Sjkimtransit_state_bkref (re_match_context_t *mctx, const re_node_set *nodes)
2587146040Stjr{
2588250724Sjkim  const re_dfa_t *const dfa = mctx->dfa;
2589146040Stjr  reg_errcode_t err;
2590146040Stjr  int i;
2591146040Stjr  int cur_str_idx = re_string_cur_idx (&mctx->input);
2592146040Stjr
2593146040Stjr  for (i = 0; i < nodes->nelem; ++i)
2594146040Stjr    {
2595146040Stjr      int dest_str_idx, prev_nelem, bkc_idx;
2596146040Stjr      int node_idx = nodes->elems[i];
2597146040Stjr      unsigned int context;
2598146040Stjr      const re_token_t *node = dfa->nodes + node_idx;
2599146040Stjr      re_node_set *new_dest_nodes;
2600146040Stjr
2601146040Stjr      /* Check whether `node' is a backreference or not.  */
2602146040Stjr      if (node->type != OP_BACK_REF)
2603146040Stjr	continue;
2604146040Stjr
2605146040Stjr      if (node->constraint)
2606146040Stjr	{
2607146040Stjr	  context = re_string_context_at (&mctx->input, cur_str_idx,
2608146040Stjr					  mctx->eflags);
2609146040Stjr	  if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context))
2610146040Stjr	    continue;
2611146040Stjr	}
2612146040Stjr
2613146040Stjr      /* `node' is a backreference.
2614146040Stjr	 Check the substring which the substring matched.  */
2615146040Stjr      bkc_idx = mctx->nbkref_ents;
2616146040Stjr      err = get_subexp (mctx, node_idx, cur_str_idx);
2617146040Stjr      if (BE (err != REG_NOERROR, 0))
2618146040Stjr	goto free_return;
2619146040Stjr
2620146040Stjr      /* And add the epsilon closures (which is `new_dest_nodes') of
2621146040Stjr	 the backreference to appropriate state_log.  */
2622146040Stjr#ifdef DEBUG
2623146040Stjr      assert (dfa->nexts[node_idx] != -1);
2624146040Stjr#endif
2625146040Stjr      for (; bkc_idx < mctx->nbkref_ents; ++bkc_idx)
2626146040Stjr	{
2627146040Stjr	  int subexp_len;
2628146040Stjr	  re_dfastate_t *dest_state;
2629146040Stjr	  struct re_backref_cache_entry *bkref_ent;
2630146040Stjr	  bkref_ent = mctx->bkref_ents + bkc_idx;
2631146040Stjr	  if (bkref_ent->node != node_idx || bkref_ent->str_idx != cur_str_idx)
2632146040Stjr	    continue;
2633146040Stjr	  subexp_len = bkref_ent->subexp_to - bkref_ent->subexp_from;
2634146040Stjr	  new_dest_nodes = (subexp_len == 0
2635146040Stjr			    ? dfa->eclosures + dfa->edests[node_idx].elems[0]
2636146040Stjr			    : dfa->eclosures + dfa->nexts[node_idx]);
2637146040Stjr	  dest_str_idx = (cur_str_idx + bkref_ent->subexp_to
2638146040Stjr			  - bkref_ent->subexp_from);
2639146040Stjr	  context = re_string_context_at (&mctx->input, dest_str_idx - 1,
2640146040Stjr					  mctx->eflags);
2641146040Stjr	  dest_state = mctx->state_log[dest_str_idx];
2642146040Stjr	  prev_nelem = ((mctx->state_log[cur_str_idx] == NULL) ? 0
2643146040Stjr			: mctx->state_log[cur_str_idx]->nodes.nelem);
2644146040Stjr	  /* Add `new_dest_node' to state_log.  */
2645146040Stjr	  if (dest_state == NULL)
2646146040Stjr	    {
2647146040Stjr	      mctx->state_log[dest_str_idx]
2648146040Stjr		= re_acquire_state_context (&err, dfa, new_dest_nodes,
2649146040Stjr					    context);
2650146040Stjr	      if (BE (mctx->state_log[dest_str_idx] == NULL
2651146040Stjr		      && err != REG_NOERROR, 0))
2652146040Stjr		goto free_return;
2653146040Stjr	    }
2654146040Stjr	  else
2655146040Stjr	    {
2656146040Stjr	      re_node_set dest_nodes;
2657146040Stjr	      err = re_node_set_init_union (&dest_nodes,
2658146040Stjr					    dest_state->entrance_nodes,
2659146040Stjr					    new_dest_nodes);
2660146040Stjr	      if (BE (err != REG_NOERROR, 0))
2661146040Stjr		{
2662146040Stjr		  re_node_set_free (&dest_nodes);
2663146040Stjr		  goto free_return;
2664146040Stjr		}
2665146040Stjr	      mctx->state_log[dest_str_idx]
2666146040Stjr		= re_acquire_state_context (&err, dfa, &dest_nodes, context);
2667146040Stjr	      re_node_set_free (&dest_nodes);
2668146040Stjr	      if (BE (mctx->state_log[dest_str_idx] == NULL
2669146040Stjr		      && err != REG_NOERROR, 0))
2670146040Stjr		goto free_return;
2671146040Stjr	    }
2672146040Stjr	  /* We need to check recursively if the backreference can epsilon
2673146040Stjr	     transit.  */
2674146040Stjr	  if (subexp_len == 0
2675146040Stjr	      && mctx->state_log[cur_str_idx]->nodes.nelem > prev_nelem)
2676146040Stjr	    {
2677146040Stjr	      err = check_subexp_matching_top (mctx, new_dest_nodes,
2678146040Stjr					       cur_str_idx);
2679146040Stjr	      if (BE (err != REG_NOERROR, 0))
2680146040Stjr		goto free_return;
2681146040Stjr	      err = transit_state_bkref (mctx, new_dest_nodes);
2682146040Stjr	      if (BE (err != REG_NOERROR, 0))
2683146040Stjr		goto free_return;
2684146040Stjr	    }
2685146040Stjr	}
2686146040Stjr    }
2687146040Stjr  err = REG_NOERROR;
2688146040Stjr free_return:
2689146040Stjr  return err;
2690146040Stjr}
2691146040Stjr
2692146040Stjr/* Enumerate all the candidates which the backreference BKREF_NODE can match
2693146040Stjr   at BKREF_STR_IDX, and register them by match_ctx_add_entry().
2694146040Stjr   Note that we might collect inappropriate candidates here.
2695146040Stjr   However, the cost of checking them strictly here is too high, then we
2696146040Stjr   delay these checking for prune_impossible_nodes().  */
2697146040Stjr
2698146040Stjrstatic reg_errcode_t
2699250724Sjkiminternal_function __attribute_warn_unused_result__
2700250724Sjkimget_subexp (re_match_context_t *mctx, int bkref_node, int bkref_str_idx)
2701146040Stjr{
2702250724Sjkim  const re_dfa_t *const dfa = mctx->dfa;
2703146040Stjr  int subexp_num, sub_top_idx;
2704146040Stjr  const char *buf = (const char *) re_string_get_buffer (&mctx->input);
2705146040Stjr  /* Return if we have already checked BKREF_NODE at BKREF_STR_IDX.  */
2706146040Stjr  int cache_idx = search_cur_bkref_entry (mctx, bkref_str_idx);
2707146040Stjr  if (cache_idx != -1)
2708146040Stjr    {
2709250724Sjkim      const struct re_backref_cache_entry *entry
2710250724Sjkim	= mctx->bkref_ents + cache_idx;
2711146040Stjr      do
2712250724Sjkim	if (entry->node == bkref_node)
2713146040Stjr	  return REG_NOERROR; /* We already checked it.  */
2714146040Stjr      while (entry++->more);
2715146040Stjr    }
2716146040Stjr
2717146040Stjr  subexp_num = dfa->nodes[bkref_node].opr.idx;
2718146040Stjr
2719146040Stjr  /* For each sub expression  */
2720146040Stjr  for (sub_top_idx = 0; sub_top_idx < mctx->nsub_tops; ++sub_top_idx)
2721146040Stjr    {
2722146040Stjr      reg_errcode_t err;
2723146040Stjr      re_sub_match_top_t *sub_top = mctx->sub_tops[sub_top_idx];
2724146040Stjr      re_sub_match_last_t *sub_last;
2725146040Stjr      int sub_last_idx, sl_str, bkref_str_off;
2726146040Stjr
2727146040Stjr      if (dfa->nodes[sub_top->node].opr.idx != subexp_num)
2728146040Stjr	continue; /* It isn't related.  */
2729146040Stjr
2730146040Stjr      sl_str = sub_top->str_idx;
2731146040Stjr      bkref_str_off = bkref_str_idx;
2732146040Stjr      /* At first, check the last node of sub expressions we already
2733146040Stjr	 evaluated.  */
2734146040Stjr      for (sub_last_idx = 0; sub_last_idx < sub_top->nlasts; ++sub_last_idx)
2735146040Stjr	{
2736146040Stjr	  int sl_str_diff;
2737146040Stjr	  sub_last = sub_top->lasts[sub_last_idx];
2738146040Stjr	  sl_str_diff = sub_last->str_idx - sl_str;
2739146040Stjr	  /* The matched string by the sub expression match with the substring
2740146040Stjr	     at the back reference?  */
2741146040Stjr	  if (sl_str_diff > 0)
2742146040Stjr	    {
2743146040Stjr	      if (BE (bkref_str_off + sl_str_diff > mctx->input.valid_len, 0))
2744146040Stjr		{
2745146040Stjr		  /* Not enough chars for a successful match.  */
2746146040Stjr		  if (bkref_str_off + sl_str_diff > mctx->input.len)
2747146040Stjr		    break;
2748146040Stjr
2749146040Stjr		  err = clean_state_log_if_needed (mctx,
2750146040Stjr						   bkref_str_off
2751146040Stjr						   + sl_str_diff);
2752146040Stjr		  if (BE (err != REG_NOERROR, 0))
2753146040Stjr		    return err;
2754146040Stjr		  buf = (const char *) re_string_get_buffer (&mctx->input);
2755146040Stjr		}
2756146040Stjr	      if (memcmp (buf + bkref_str_off, buf + sl_str, sl_str_diff) != 0)
2757250724Sjkim		/* We don't need to search this sub expression any more.  */
2758250724Sjkim		break;
2759146040Stjr	    }
2760146040Stjr	  bkref_str_off += sl_str_diff;
2761146040Stjr	  sl_str += sl_str_diff;
2762146040Stjr	  err = get_subexp_sub (mctx, sub_top, sub_last, bkref_node,
2763146040Stjr				bkref_str_idx);
2764146040Stjr
2765146040Stjr	  /* Reload buf, since the preceding call might have reallocated
2766146040Stjr	     the buffer.  */
2767146040Stjr	  buf = (const char *) re_string_get_buffer (&mctx->input);
2768146040Stjr
2769146040Stjr	  if (err == REG_NOMATCH)
2770146040Stjr	    continue;
2771146040Stjr	  if (BE (err != REG_NOERROR, 0))
2772146040Stjr	    return err;
2773146040Stjr	}
2774146040Stjr
2775146040Stjr      if (sub_last_idx < sub_top->nlasts)
2776146040Stjr	continue;
2777146040Stjr      if (sub_last_idx > 0)
2778146040Stjr	++sl_str;
2779146040Stjr      /* Then, search for the other last nodes of the sub expression.  */
2780146040Stjr      for (; sl_str <= bkref_str_idx; ++sl_str)
2781146040Stjr	{
2782146040Stjr	  int cls_node, sl_str_off;
2783146040Stjr	  const re_node_set *nodes;
2784146040Stjr	  sl_str_off = sl_str - sub_top->str_idx;
2785146040Stjr	  /* The matched string by the sub expression match with the substring
2786146040Stjr	     at the back reference?  */
2787146040Stjr	  if (sl_str_off > 0)
2788146040Stjr	    {
2789146040Stjr	      if (BE (bkref_str_off >= mctx->input.valid_len, 0))
2790146040Stjr		{
2791146040Stjr		  /* If we are at the end of the input, we cannot match.  */
2792146040Stjr		  if (bkref_str_off >= mctx->input.len)
2793146040Stjr		    break;
2794146040Stjr
2795251435Sjkim		  err = extend_buffers (mctx, bkref_str_off + 1);
2796146040Stjr		  if (BE (err != REG_NOERROR, 0))
2797146040Stjr		    return err;
2798146040Stjr
2799146040Stjr		  buf = (const char *) re_string_get_buffer (&mctx->input);
2800146040Stjr		}
2801146040Stjr	      if (buf [bkref_str_off++] != buf[sl_str - 1])
2802146040Stjr		break; /* We don't need to search this sub expression
2803146040Stjr			  any more.  */
2804146040Stjr	    }
2805146040Stjr	  if (mctx->state_log[sl_str] == NULL)
2806146040Stjr	    continue;
2807146040Stjr	  /* Does this state have a ')' of the sub expression?  */
2808146040Stjr	  nodes = &mctx->state_log[sl_str]->nodes;
2809250724Sjkim	  cls_node = find_subexp_node (dfa, nodes, subexp_num,
2810250724Sjkim				       OP_CLOSE_SUBEXP);
2811146040Stjr	  if (cls_node == -1)
2812146040Stjr	    continue; /* No.  */
2813146040Stjr	  if (sub_top->path == NULL)
2814146040Stjr	    {
2815146040Stjr	      sub_top->path = calloc (sizeof (state_array_t),
2816146040Stjr				      sl_str - sub_top->str_idx + 1);
2817146040Stjr	      if (sub_top->path == NULL)
2818146040Stjr		return REG_ESPACE;
2819146040Stjr	    }
2820146040Stjr	  /* Can the OP_OPEN_SUBEXP node arrive the OP_CLOSE_SUBEXP node
2821146040Stjr	     in the current context?  */
2822146040Stjr	  err = check_arrival (mctx, sub_top->path, sub_top->node,
2823250724Sjkim			       sub_top->str_idx, cls_node, sl_str,
2824250724Sjkim			       OP_CLOSE_SUBEXP);
2825146040Stjr	  if (err == REG_NOMATCH)
2826146040Stjr	      continue;
2827146040Stjr	  if (BE (err != REG_NOERROR, 0))
2828146040Stjr	      return err;
2829146040Stjr	  sub_last = match_ctx_add_sublast (sub_top, cls_node, sl_str);
2830146040Stjr	  if (BE (sub_last == NULL, 0))
2831146040Stjr	    return REG_ESPACE;
2832146040Stjr	  err = get_subexp_sub (mctx, sub_top, sub_last, bkref_node,
2833146040Stjr				bkref_str_idx);
2834146040Stjr	  if (err == REG_NOMATCH)
2835146040Stjr	    continue;
2836146040Stjr	}
2837146040Stjr    }
2838146040Stjr  return REG_NOERROR;
2839146040Stjr}
2840146040Stjr
2841146040Stjr/* Helper functions for get_subexp().  */
2842146040Stjr
2843146040Stjr/* Check SUB_LAST can arrive to the back reference BKREF_NODE at BKREF_STR.
2844146040Stjr   If it can arrive, register the sub expression expressed with SUB_TOP
2845146040Stjr   and SUB_LAST.  */
2846146040Stjr
2847146040Stjrstatic reg_errcode_t
2848250724Sjkiminternal_function
2849250724Sjkimget_subexp_sub (re_match_context_t *mctx, const re_sub_match_top_t *sub_top,
2850250724Sjkim		re_sub_match_last_t *sub_last, int bkref_node, int bkref_str)
2851146040Stjr{
2852146040Stjr  reg_errcode_t err;
2853146040Stjr  int to_idx;
2854146040Stjr  /* Can the subexpression arrive the back reference?  */
2855146040Stjr  err = check_arrival (mctx, &sub_last->path, sub_last->node,
2856250724Sjkim		       sub_last->str_idx, bkref_node, bkref_str,
2857250724Sjkim		       OP_OPEN_SUBEXP);
2858146040Stjr  if (err != REG_NOERROR)
2859146040Stjr    return err;
2860146040Stjr  err = match_ctx_add_entry (mctx, bkref_node, bkref_str, sub_top->str_idx,
2861146040Stjr			     sub_last->str_idx);
2862146040Stjr  if (BE (err != REG_NOERROR, 0))
2863146040Stjr    return err;
2864146040Stjr  to_idx = bkref_str + sub_last->str_idx - sub_top->str_idx;
2865146040Stjr  return clean_state_log_if_needed (mctx, to_idx);
2866146040Stjr}
2867146040Stjr
2868146040Stjr/* Find the first node which is '(' or ')' and whose index is SUBEXP_IDX.
2869146040Stjr   Search '(' if FL_OPEN, or search ')' otherwise.
2870146040Stjr   TODO: This function isn't efficient...
2871146040Stjr	 Because there might be more than one nodes whose types are
2872146040Stjr	 OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all
2873146040Stjr	 nodes.
2874146040Stjr	 E.g. RE: (a){2}  */
2875146040Stjr
2876146040Stjrstatic int
2877250724Sjkiminternal_function
2878250724Sjkimfind_subexp_node (const re_dfa_t *dfa, const re_node_set *nodes,
2879250724Sjkim		  int subexp_idx, int type)
2880146040Stjr{
2881146040Stjr  int cls_idx;
2882146040Stjr  for (cls_idx = 0; cls_idx < nodes->nelem; ++cls_idx)
2883146040Stjr    {
2884146040Stjr      int cls_node = nodes->elems[cls_idx];
2885146040Stjr      const re_token_t *node = dfa->nodes + cls_node;
2886146040Stjr      if (node->type == type
2887146040Stjr	  && node->opr.idx == subexp_idx)
2888146040Stjr	return cls_node;
2889146040Stjr    }
2890146040Stjr  return -1;
2891146040Stjr}
2892146040Stjr
2893146040Stjr/* Check whether the node TOP_NODE at TOP_STR can arrive to the node
2894146040Stjr   LAST_NODE at LAST_STR.  We record the path onto PATH since it will be
2895146040Stjr   heavily reused.
2896146040Stjr   Return REG_NOERROR if it can arrive, or REG_NOMATCH otherwise.  */
2897146040Stjr
2898146040Stjrstatic reg_errcode_t
2899250724Sjkiminternal_function __attribute_warn_unused_result__
2900250724Sjkimcheck_arrival (re_match_context_t *mctx, state_array_t *path, int top_node,
2901250724Sjkim	       int top_str, int last_node, int last_str, int type)
2902146040Stjr{
2903250724Sjkim  const re_dfa_t *const dfa = mctx->dfa;
2904250724Sjkim  reg_errcode_t err = REG_NOERROR;
2905146040Stjr  int subexp_num, backup_cur_idx, str_idx, null_cnt;
2906146040Stjr  re_dfastate_t *cur_state = NULL;
2907146040Stjr  re_node_set *cur_nodes, next_nodes;
2908146040Stjr  re_dfastate_t **backup_state_log;
2909146040Stjr  unsigned int context;
2910146040Stjr
2911146040Stjr  subexp_num = dfa->nodes[top_node].opr.idx;
2912146040Stjr  /* Extend the buffer if we need.  */
2913146040Stjr  if (BE (path->alloc < last_str + mctx->max_mb_elem_len + 1, 0))
2914146040Stjr    {
2915146040Stjr      re_dfastate_t **new_array;
2916146040Stjr      int old_alloc = path->alloc;
2917146040Stjr      path->alloc += last_str + mctx->max_mb_elem_len + 1;
2918146040Stjr      new_array = re_realloc (path->array, re_dfastate_t *, path->alloc);
2919250724Sjkim      if (BE (new_array == NULL, 0))
2920146040Stjr	{
2921146040Stjr	  path->alloc = old_alloc;
2922146040Stjr	  return REG_ESPACE;
2923146040Stjr	}
2924146040Stjr      path->array = new_array;
2925146040Stjr      memset (new_array + old_alloc, '\0',
2926146040Stjr	      sizeof (re_dfastate_t *) * (path->alloc - old_alloc));
2927146040Stjr    }
2928146040Stjr
2929250724Sjkim  str_idx = path->next_idx ?: top_str;
2930146040Stjr
2931146040Stjr  /* Temporary modify MCTX.  */
2932146040Stjr  backup_state_log = mctx->state_log;
2933146040Stjr  backup_cur_idx = mctx->input.cur_idx;
2934146040Stjr  mctx->state_log = path->array;
2935146040Stjr  mctx->input.cur_idx = str_idx;
2936146040Stjr
2937146040Stjr  /* Setup initial node set.  */
2938146040Stjr  context = re_string_context_at (&mctx->input, str_idx - 1, mctx->eflags);
2939146040Stjr  if (str_idx == top_str)
2940146040Stjr    {
2941146040Stjr      err = re_node_set_init_1 (&next_nodes, top_node);
2942146040Stjr      if (BE (err != REG_NOERROR, 0))
2943146040Stjr	return err;
2944146040Stjr      err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, type);
2945146040Stjr      if (BE (err != REG_NOERROR, 0))
2946146040Stjr	{
2947146040Stjr	  re_node_set_free (&next_nodes);
2948146040Stjr	  return err;
2949146040Stjr	}
2950146040Stjr    }
2951146040Stjr  else
2952146040Stjr    {
2953146040Stjr      cur_state = mctx->state_log[str_idx];
2954146040Stjr      if (cur_state && cur_state->has_backref)
2955146040Stjr	{
2956146040Stjr	  err = re_node_set_init_copy (&next_nodes, &cur_state->nodes);
2957250724Sjkim	  if (BE (err != REG_NOERROR, 0))
2958146040Stjr	    return err;
2959146040Stjr	}
2960146040Stjr      else
2961146040Stjr	re_node_set_init_empty (&next_nodes);
2962146040Stjr    }
2963146040Stjr  if (str_idx == top_str || (cur_state && cur_state->has_backref))
2964146040Stjr    {
2965146040Stjr      if (next_nodes.nelem)
2966146040Stjr	{
2967146040Stjr	  err = expand_bkref_cache (mctx, &next_nodes, str_idx,
2968146040Stjr				    subexp_num, type);
2969250724Sjkim	  if (BE (err != REG_NOERROR, 0))
2970146040Stjr	    {
2971146040Stjr	      re_node_set_free (&next_nodes);
2972146040Stjr	      return err;
2973146040Stjr	    }
2974146040Stjr	}
2975146040Stjr      cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context);
2976146040Stjr      if (BE (cur_state == NULL && err != REG_NOERROR, 0))
2977146040Stjr	{
2978146040Stjr	  re_node_set_free (&next_nodes);
2979146040Stjr	  return err;
2980146040Stjr	}
2981146040Stjr      mctx->state_log[str_idx] = cur_state;
2982146040Stjr    }
2983146040Stjr
2984146040Stjr  for (null_cnt = 0; str_idx < last_str && null_cnt <= mctx->max_mb_elem_len;)
2985146040Stjr    {
2986146040Stjr      re_node_set_empty (&next_nodes);
2987146040Stjr      if (mctx->state_log[str_idx + 1])
2988146040Stjr	{
2989146040Stjr	  err = re_node_set_merge (&next_nodes,
2990146040Stjr				   &mctx->state_log[str_idx + 1]->nodes);
2991146040Stjr	  if (BE (err != REG_NOERROR, 0))
2992146040Stjr	    {
2993146040Stjr	      re_node_set_free (&next_nodes);
2994146040Stjr	      return err;
2995146040Stjr	    }
2996146040Stjr	}
2997146040Stjr      if (cur_state)
2998146040Stjr	{
2999146040Stjr	  err = check_arrival_add_next_nodes (mctx, str_idx,
3000250724Sjkim					      &cur_state->non_eps_nodes,
3001250724Sjkim					      &next_nodes);
3002146040Stjr	  if (BE (err != REG_NOERROR, 0))
3003146040Stjr	    {
3004146040Stjr	      re_node_set_free (&next_nodes);
3005146040Stjr	      return err;
3006146040Stjr	    }
3007146040Stjr	}
3008146040Stjr      ++str_idx;
3009146040Stjr      if (next_nodes.nelem)
3010146040Stjr	{
3011146040Stjr	  err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, type);
3012146040Stjr	  if (BE (err != REG_NOERROR, 0))
3013146040Stjr	    {
3014146040Stjr	      re_node_set_free (&next_nodes);
3015146040Stjr	      return err;
3016146040Stjr	    }
3017146040Stjr	  err = expand_bkref_cache (mctx, &next_nodes, str_idx,
3018146040Stjr				    subexp_num, type);
3019250724Sjkim	  if (BE (err != REG_NOERROR, 0))
3020146040Stjr	    {
3021146040Stjr	      re_node_set_free (&next_nodes);
3022146040Stjr	      return err;
3023146040Stjr	    }
3024146040Stjr	}
3025146040Stjr      context = re_string_context_at (&mctx->input, str_idx - 1, mctx->eflags);
3026146040Stjr      cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context);
3027146040Stjr      if (BE (cur_state == NULL && err != REG_NOERROR, 0))
3028146040Stjr	{
3029146040Stjr	  re_node_set_free (&next_nodes);
3030146040Stjr	  return err;
3031146040Stjr	}
3032146040Stjr      mctx->state_log[str_idx] = cur_state;
3033146040Stjr      null_cnt = cur_state == NULL ? null_cnt + 1 : 0;
3034146040Stjr    }
3035146040Stjr  re_node_set_free (&next_nodes);
3036146040Stjr  cur_nodes = (mctx->state_log[last_str] == NULL ? NULL
3037146040Stjr	       : &mctx->state_log[last_str]->nodes);
3038146040Stjr  path->next_idx = str_idx;
3039146040Stjr
3040146040Stjr  /* Fix MCTX.  */
3041146040Stjr  mctx->state_log = backup_state_log;
3042146040Stjr  mctx->input.cur_idx = backup_cur_idx;
3043146040Stjr
3044146040Stjr  /* Then check the current node set has the node LAST_NODE.  */
3045146040Stjr  if (cur_nodes != NULL && re_node_set_contains (cur_nodes, last_node))
3046146040Stjr    return REG_NOERROR;
3047146040Stjr
3048146040Stjr  return REG_NOMATCH;
3049146040Stjr}
3050146040Stjr
3051146040Stjr/* Helper functions for check_arrival.  */
3052146040Stjr
3053146040Stjr/* Calculate the destination nodes of CUR_NODES at STR_IDX, and append them
3054146040Stjr   to NEXT_NODES.
3055146040Stjr   TODO: This function is similar to the functions transit_state*(),
3056146040Stjr	 however this function has many additional works.
3057146040Stjr	 Can't we unify them?  */
3058146040Stjr
3059146040Stjrstatic reg_errcode_t
3060250724Sjkiminternal_function __attribute_warn_unused_result__
3061250724Sjkimcheck_arrival_add_next_nodes (re_match_context_t *mctx, int str_idx,
3062250724Sjkim			      re_node_set *cur_nodes, re_node_set *next_nodes)
3063146040Stjr{
3064250724Sjkim  const re_dfa_t *const dfa = mctx->dfa;
3065146040Stjr  int result;
3066146040Stjr  int cur_idx;
3067250724Sjkim  reg_errcode_t err = REG_NOERROR;
3068146040Stjr  re_node_set union_set;
3069146040Stjr  re_node_set_init_empty (&union_set);
3070146040Stjr  for (cur_idx = 0; cur_idx < cur_nodes->nelem; ++cur_idx)
3071146040Stjr    {
3072146040Stjr      int naccepted = 0;
3073146040Stjr      int cur_node = cur_nodes->elems[cur_idx];
3074146040Stjr#ifdef DEBUG
3075146040Stjr      re_token_type_t type = dfa->nodes[cur_node].type;
3076146040Stjr      assert (!IS_EPSILON_NODE (type));
3077146040Stjr#endif
3078146040Stjr#ifdef RE_ENABLE_I18N
3079146040Stjr      /* If the node may accept `multi byte'.  */
3080146040Stjr      if (dfa->nodes[cur_node].accept_mb)
3081146040Stjr	{
3082146040Stjr	  naccepted = check_node_accept_bytes (dfa, cur_node, &mctx->input,
3083146040Stjr					       str_idx);
3084146040Stjr	  if (naccepted > 1)
3085146040Stjr	    {
3086146040Stjr	      re_dfastate_t *dest_state;
3087146040Stjr	      int next_node = dfa->nexts[cur_node];
3088146040Stjr	      int next_idx = str_idx + naccepted;
3089146040Stjr	      dest_state = mctx->state_log[next_idx];
3090146040Stjr	      re_node_set_empty (&union_set);
3091146040Stjr	      if (dest_state)
3092146040Stjr		{
3093146040Stjr		  err = re_node_set_merge (&union_set, &dest_state->nodes);
3094146040Stjr		  if (BE (err != REG_NOERROR, 0))
3095146040Stjr		    {
3096146040Stjr		      re_node_set_free (&union_set);
3097146040Stjr		      return err;
3098146040Stjr		    }
3099146040Stjr		}
3100146040Stjr	      result = re_node_set_insert (&union_set, next_node);
3101146040Stjr	      if (BE (result < 0, 0))
3102146040Stjr		{
3103146040Stjr		  re_node_set_free (&union_set);
3104146040Stjr		  return REG_ESPACE;
3105146040Stjr		}
3106146040Stjr	      mctx->state_log[next_idx] = re_acquire_state (&err, dfa,
3107146040Stjr							    &union_set);
3108146040Stjr	      if (BE (mctx->state_log[next_idx] == NULL
3109146040Stjr		      && err != REG_NOERROR, 0))
3110146040Stjr		{
3111146040Stjr		  re_node_set_free (&union_set);
3112146040Stjr		  return err;
3113146040Stjr		}
3114146040Stjr	    }
3115146040Stjr	}
3116146040Stjr#endif /* RE_ENABLE_I18N */
3117146040Stjr      if (naccepted
3118146040Stjr	  || check_node_accept (mctx, dfa->nodes + cur_node, str_idx))
3119146040Stjr	{
3120146040Stjr	  result = re_node_set_insert (next_nodes, dfa->nexts[cur_node]);
3121146040Stjr	  if (BE (result < 0, 0))
3122146040Stjr	    {
3123146040Stjr	      re_node_set_free (&union_set);
3124146040Stjr	      return REG_ESPACE;
3125146040Stjr	    }
3126146040Stjr	}
3127146040Stjr    }
3128146040Stjr  re_node_set_free (&union_set);
3129146040Stjr  return REG_NOERROR;
3130146040Stjr}
3131146040Stjr
3132146040Stjr/* For all the nodes in CUR_NODES, add the epsilon closures of them to
3133146040Stjr   CUR_NODES, however exclude the nodes which are:
3134146040Stjr    - inside the sub expression whose number is EX_SUBEXP, if FL_OPEN.
3135146040Stjr    - out of the sub expression whose number is EX_SUBEXP, if !FL_OPEN.
3136146040Stjr*/
3137146040Stjr
3138146040Stjrstatic reg_errcode_t
3139250724Sjkiminternal_function
3140250724Sjkimcheck_arrival_expand_ecl (const re_dfa_t *dfa, re_node_set *cur_nodes,
3141250724Sjkim			  int ex_subexp, int type)
3142146040Stjr{
3143146040Stjr  reg_errcode_t err;
3144146040Stjr  int idx, outside_node;
3145146040Stjr  re_node_set new_nodes;
3146146040Stjr#ifdef DEBUG
3147146040Stjr  assert (cur_nodes->nelem);
3148146040Stjr#endif
3149146040Stjr  err = re_node_set_alloc (&new_nodes, cur_nodes->nelem);
3150146040Stjr  if (BE (err != REG_NOERROR, 0))
3151146040Stjr    return err;
3152146040Stjr  /* Create a new node set NEW_NODES with the nodes which are epsilon
3153146040Stjr     closures of the node in CUR_NODES.  */
3154146040Stjr
3155146040Stjr  for (idx = 0; idx < cur_nodes->nelem; ++idx)
3156146040Stjr    {
3157146040Stjr      int cur_node = cur_nodes->elems[idx];
3158250724Sjkim      const re_node_set *eclosure = dfa->eclosures + cur_node;
3159146040Stjr      outside_node = find_subexp_node (dfa, eclosure, ex_subexp, type);
3160146040Stjr      if (outside_node == -1)
3161146040Stjr	{
3162146040Stjr	  /* There are no problematic nodes, just merge them.  */
3163146040Stjr	  err = re_node_set_merge (&new_nodes, eclosure);
3164146040Stjr	  if (BE (err != REG_NOERROR, 0))
3165146040Stjr	    {
3166146040Stjr	      re_node_set_free (&new_nodes);
3167146040Stjr	      return err;
3168146040Stjr	    }
3169146040Stjr	}
3170146040Stjr      else
3171146040Stjr	{
3172146040Stjr	  /* There are problematic nodes, re-calculate incrementally.  */
3173146040Stjr	  err = check_arrival_expand_ecl_sub (dfa, &new_nodes, cur_node,
3174146040Stjr					      ex_subexp, type);
3175146040Stjr	  if (BE (err != REG_NOERROR, 0))
3176146040Stjr	    {
3177146040Stjr	      re_node_set_free (&new_nodes);
3178146040Stjr	      return err;
3179146040Stjr	    }
3180146040Stjr	}
3181146040Stjr    }
3182146040Stjr  re_node_set_free (cur_nodes);
3183146040Stjr  *cur_nodes = new_nodes;
3184146040Stjr  return REG_NOERROR;
3185146040Stjr}
3186146040Stjr
3187146040Stjr/* Helper function for check_arrival_expand_ecl.
3188146040Stjr   Check incrementally the epsilon closure of TARGET, and if it isn't
3189146040Stjr   problematic append it to DST_NODES.  */
3190146040Stjr
3191146040Stjrstatic reg_errcode_t
3192250724Sjkiminternal_function __attribute_warn_unused_result__
3193250724Sjkimcheck_arrival_expand_ecl_sub (const re_dfa_t *dfa, re_node_set *dst_nodes,
3194250724Sjkim			      int target, int ex_subexp, int type)
3195146040Stjr{
3196146040Stjr  int cur_node;
3197146040Stjr  for (cur_node = target; !re_node_set_contains (dst_nodes, cur_node);)
3198146040Stjr    {
3199146040Stjr      int err;
3200146040Stjr
3201146040Stjr      if (dfa->nodes[cur_node].type == type
3202146040Stjr	  && dfa->nodes[cur_node].opr.idx == ex_subexp)
3203146040Stjr	{
3204146040Stjr	  if (type == OP_CLOSE_SUBEXP)
3205146040Stjr	    {
3206146040Stjr	      err = re_node_set_insert (dst_nodes, cur_node);
3207146040Stjr	      if (BE (err == -1, 0))
3208146040Stjr		return REG_ESPACE;
3209146040Stjr	    }
3210146040Stjr	  break;
3211146040Stjr	}
3212146040Stjr      err = re_node_set_insert (dst_nodes, cur_node);
3213146040Stjr      if (BE (err == -1, 0))
3214146040Stjr	return REG_ESPACE;
3215146040Stjr      if (dfa->edests[cur_node].nelem == 0)
3216146040Stjr	break;
3217146040Stjr      if (dfa->edests[cur_node].nelem == 2)
3218146040Stjr	{
3219146040Stjr	  err = check_arrival_expand_ecl_sub (dfa, dst_nodes,
3220146040Stjr					      dfa->edests[cur_node].elems[1],
3221146040Stjr					      ex_subexp, type);
3222146040Stjr	  if (BE (err != REG_NOERROR, 0))
3223146040Stjr	    return err;
3224146040Stjr	}
3225146040Stjr      cur_node = dfa->edests[cur_node].elems[0];
3226146040Stjr    }
3227146040Stjr  return REG_NOERROR;
3228146040Stjr}
3229146040Stjr
3230146040Stjr
3231146040Stjr/* For all the back references in the current state, calculate the
3232146040Stjr   destination of the back references by the appropriate entry
3233146040Stjr   in MCTX->BKREF_ENTS.  */
3234146040Stjr
3235146040Stjrstatic reg_errcode_t
3236250724Sjkiminternal_function __attribute_warn_unused_result__
3237250724Sjkimexpand_bkref_cache (re_match_context_t *mctx, re_node_set *cur_nodes,
3238250724Sjkim		    int cur_str, int subexp_num, int type)
3239146040Stjr{
3240250724Sjkim  const re_dfa_t *const dfa = mctx->dfa;
3241146040Stjr  reg_errcode_t err;
3242146040Stjr  int cache_idx_start = search_cur_bkref_entry (mctx, cur_str);
3243146040Stjr  struct re_backref_cache_entry *ent;
3244146040Stjr
3245146040Stjr  if (cache_idx_start == -1)
3246146040Stjr    return REG_NOERROR;
3247146040Stjr
3248146040Stjr restart:
3249146040Stjr  ent = mctx->bkref_ents + cache_idx_start;
3250146040Stjr  do
3251146040Stjr    {
3252146040Stjr      int to_idx, next_node;
3253146040Stjr
3254146040Stjr      /* Is this entry ENT is appropriate?  */
3255146040Stjr      if (!re_node_set_contains (cur_nodes, ent->node))
3256146040Stjr	continue; /* No.  */
3257146040Stjr
3258146040Stjr      to_idx = cur_str + ent->subexp_to - ent->subexp_from;
3259146040Stjr      /* Calculate the destination of the back reference, and append it
3260146040Stjr	 to MCTX->STATE_LOG.  */
3261146040Stjr      if (to_idx == cur_str)
3262146040Stjr	{
3263146040Stjr	  /* The backreference did epsilon transit, we must re-check all the
3264146040Stjr	     node in the current state.  */
3265146040Stjr	  re_node_set new_dests;
3266146040Stjr	  reg_errcode_t err2, err3;
3267146040Stjr	  next_node = dfa->edests[ent->node].elems[0];
3268146040Stjr	  if (re_node_set_contains (cur_nodes, next_node))
3269146040Stjr	    continue;
3270146040Stjr	  err = re_node_set_init_1 (&new_dests, next_node);
3271146040Stjr	  err2 = check_arrival_expand_ecl (dfa, &new_dests, subexp_num, type);
3272146040Stjr	  err3 = re_node_set_merge (cur_nodes, &new_dests);
3273146040Stjr	  re_node_set_free (&new_dests);
3274146040Stjr	  if (BE (err != REG_NOERROR || err2 != REG_NOERROR
3275146040Stjr		  || err3 != REG_NOERROR, 0))
3276146040Stjr	    {
3277146040Stjr	      err = (err != REG_NOERROR ? err
3278146040Stjr		     : (err2 != REG_NOERROR ? err2 : err3));
3279146040Stjr	      return err;
3280146040Stjr	    }
3281146040Stjr	  /* TODO: It is still inefficient...  */
3282146040Stjr	  goto restart;
3283146040Stjr	}
3284146040Stjr      else
3285146040Stjr	{
3286146040Stjr	  re_node_set union_set;
3287146040Stjr	  next_node = dfa->nexts[ent->node];
3288146040Stjr	  if (mctx->state_log[to_idx])
3289146040Stjr	    {
3290146040Stjr	      int ret;
3291146040Stjr	      if (re_node_set_contains (&mctx->state_log[to_idx]->nodes,
3292146040Stjr					next_node))
3293146040Stjr		continue;
3294146040Stjr	      err = re_node_set_init_copy (&union_set,
3295146040Stjr					   &mctx->state_log[to_idx]->nodes);
3296146040Stjr	      ret = re_node_set_insert (&union_set, next_node);
3297146040Stjr	      if (BE (err != REG_NOERROR || ret < 0, 0))
3298146040Stjr		{
3299146040Stjr		  re_node_set_free (&union_set);
3300146040Stjr		  err = err != REG_NOERROR ? err : REG_ESPACE;
3301146040Stjr		  return err;
3302146040Stjr		}
3303146040Stjr	    }
3304146040Stjr	  else
3305146040Stjr	    {
3306146040Stjr	      err = re_node_set_init_1 (&union_set, next_node);
3307146040Stjr	      if (BE (err != REG_NOERROR, 0))
3308146040Stjr		return err;
3309146040Stjr	    }
3310146040Stjr	  mctx->state_log[to_idx] = re_acquire_state (&err, dfa, &union_set);
3311146040Stjr	  re_node_set_free (&union_set);
3312146040Stjr	  if (BE (mctx->state_log[to_idx] == NULL
3313146040Stjr		  && err != REG_NOERROR, 0))
3314146040Stjr	    return err;
3315146040Stjr	}
3316146040Stjr    }
3317146040Stjr  while (ent++->more);
3318146040Stjr  return REG_NOERROR;
3319146040Stjr}
3320146040Stjr
3321146040Stjr/* Build transition table for the state.
3322146040Stjr   Return 1 if succeeded, otherwise return NULL.  */
3323146040Stjr
3324146040Stjrstatic int
3325250724Sjkiminternal_function
3326250724Sjkimbuild_trtable (const re_dfa_t *dfa, re_dfastate_t *state)
3327146040Stjr{
3328146040Stjr  reg_errcode_t err;
3329146040Stjr  int i, j, ch, need_word_trtable = 0;
3330250724Sjkim  bitset_word_t elem, mask;
3331250724Sjkim  bool dests_node_malloced = false;
3332250724Sjkim  bool dest_states_malloced = false;
3333146040Stjr  int ndests; /* Number of the destination states from `state'.  */
3334146040Stjr  re_dfastate_t **trtable;
3335146040Stjr  re_dfastate_t **dest_states = NULL, **dest_states_word, **dest_states_nl;
3336146040Stjr  re_node_set follows, *dests_node;
3337250724Sjkim  bitset_t *dests_ch;
3338250724Sjkim  bitset_t acceptable;
3339146040Stjr
3340250724Sjkim  struct dests_alloc
3341250724Sjkim  {
3342250724Sjkim    re_node_set dests_node[SBC_MAX];
3343250724Sjkim    bitset_t dests_ch[SBC_MAX];
3344250724Sjkim  } *dests_alloc;
3345250724Sjkim
3346146040Stjr  /* We build DFA states which corresponds to the destination nodes
3347146040Stjr     from `state'.  `dests_node[i]' represents the nodes which i-th
3348146040Stjr     destination state contains, and `dests_ch[i]' represents the
3349146040Stjr     characters which i-th destination state accepts.  */
3350250724Sjkim  if (__libc_use_alloca (sizeof (struct dests_alloc)))
3351250724Sjkim    dests_alloc = (struct dests_alloc *) alloca (sizeof (struct dests_alloc));
3352146040Stjr  else
3353146040Stjr    {
3354250724Sjkim      dests_alloc = re_malloc (struct dests_alloc, 1);
3355250724Sjkim      if (BE (dests_alloc == NULL, 0))
3356146040Stjr	return 0;
3357250724Sjkim      dests_node_malloced = true;
3358146040Stjr    }
3359250724Sjkim  dests_node = dests_alloc->dests_node;
3360250724Sjkim  dests_ch = dests_alloc->dests_ch;
3361146040Stjr
3362146040Stjr  /* Initialize transiton table.  */
3363146040Stjr  state->word_trtable = state->trtable = NULL;
3364146040Stjr
3365146040Stjr  /* At first, group all nodes belonging to `state' into several
3366146040Stjr     destinations.  */
3367146040Stjr  ndests = group_nodes_into_DFAstates (dfa, state, dests_node, dests_ch);
3368146040Stjr  if (BE (ndests <= 0, 0))
3369146040Stjr    {
3370146040Stjr      if (dests_node_malloced)
3371250724Sjkim	free (dests_alloc);
3372146040Stjr      /* Return 0 in case of an error, 1 otherwise.  */
3373146040Stjr      if (ndests == 0)
3374146040Stjr	{
3375146040Stjr	  state->trtable = (re_dfastate_t **)
3376146040Stjr	    calloc (sizeof (re_dfastate_t *), SBC_MAX);
3377250724Sjkim	  if (BE (state->trtable == NULL, 0))
3378250724Sjkim	    return 0;
3379146040Stjr	  return 1;
3380146040Stjr	}
3381146040Stjr      return 0;
3382146040Stjr    }
3383146040Stjr
3384146040Stjr  err = re_node_set_alloc (&follows, ndests + 1);
3385146040Stjr  if (BE (err != REG_NOERROR, 0))
3386146040Stjr    goto out_free;
3387146040Stjr
3388250724Sjkim  /* Avoid arithmetic overflow in size calculation.  */
3389250724Sjkim  if (BE ((((SIZE_MAX - (sizeof (re_node_set) + sizeof (bitset_t)) * SBC_MAX)
3390250724Sjkim	    / (3 * sizeof (re_dfastate_t *)))
3391250724Sjkim	   < ndests),
3392250724Sjkim	  0))
3393250724Sjkim    goto out_free;
3394250724Sjkim
3395250724Sjkim  if (__libc_use_alloca ((sizeof (re_node_set) + sizeof (bitset_t)) * SBC_MAX
3396146040Stjr			 + ndests * 3 * sizeof (re_dfastate_t *)))
3397146040Stjr    dest_states = (re_dfastate_t **)
3398146040Stjr      alloca (ndests * 3 * sizeof (re_dfastate_t *));
3399146040Stjr  else
3400146040Stjr    {
3401146040Stjr      dest_states = (re_dfastate_t **)
3402146040Stjr	malloc (ndests * 3 * sizeof (re_dfastate_t *));
3403146040Stjr      if (BE (dest_states == NULL, 0))
3404146040Stjr	{
3405146040Stjrout_free:
3406146040Stjr	  if (dest_states_malloced)
3407146040Stjr	    free (dest_states);
3408146040Stjr	  re_node_set_free (&follows);
3409146040Stjr	  for (i = 0; i < ndests; ++i)
3410146040Stjr	    re_node_set_free (dests_node + i);
3411146040Stjr	  if (dests_node_malloced)
3412250724Sjkim	    free (dests_alloc);
3413146040Stjr	  return 0;
3414146040Stjr	}
3415250724Sjkim      dest_states_malloced = true;
3416146040Stjr    }
3417146040Stjr  dest_states_word = dest_states + ndests;
3418146040Stjr  dest_states_nl = dest_states_word + ndests;
3419146040Stjr  bitset_empty (acceptable);
3420146040Stjr
3421146040Stjr  /* Then build the states for all destinations.  */
3422146040Stjr  for (i = 0; i < ndests; ++i)
3423146040Stjr    {
3424146040Stjr      int next_node;
3425146040Stjr      re_node_set_empty (&follows);
3426146040Stjr      /* Merge the follows of this destination states.  */
3427146040Stjr      for (j = 0; j < dests_node[i].nelem; ++j)
3428146040Stjr	{
3429146040Stjr	  next_node = dfa->nexts[dests_node[i].elems[j]];
3430146040Stjr	  if (next_node != -1)
3431146040Stjr	    {
3432146040Stjr	      err = re_node_set_merge (&follows, dfa->eclosures + next_node);
3433146040Stjr	      if (BE (err != REG_NOERROR, 0))
3434146040Stjr		goto out_free;
3435146040Stjr	    }
3436146040Stjr	}
3437146040Stjr      dest_states[i] = re_acquire_state_context (&err, dfa, &follows, 0);
3438146040Stjr      if (BE (dest_states[i] == NULL && err != REG_NOERROR, 0))
3439146040Stjr	goto out_free;
3440146040Stjr      /* If the new state has context constraint,
3441146040Stjr	 build appropriate states for these contexts.  */
3442146040Stjr      if (dest_states[i]->has_constraint)
3443146040Stjr	{
3444146040Stjr	  dest_states_word[i] = re_acquire_state_context (&err, dfa, &follows,
3445146040Stjr							  CONTEXT_WORD);
3446146040Stjr	  if (BE (dest_states_word[i] == NULL && err != REG_NOERROR, 0))
3447146040Stjr	    goto out_free;
3448146040Stjr
3449146040Stjr	  if (dest_states[i] != dest_states_word[i] && dfa->mb_cur_max > 1)
3450146040Stjr	    need_word_trtable = 1;
3451146040Stjr
3452146040Stjr	  dest_states_nl[i] = re_acquire_state_context (&err, dfa, &follows,
3453146040Stjr							CONTEXT_NEWLINE);
3454146040Stjr	  if (BE (dest_states_nl[i] == NULL && err != REG_NOERROR, 0))
3455146040Stjr	    goto out_free;
3456146040Stjr 	}
3457146040Stjr      else
3458146040Stjr	{
3459146040Stjr	  dest_states_word[i] = dest_states[i];
3460146040Stjr	  dest_states_nl[i] = dest_states[i];
3461146040Stjr	}
3462146040Stjr      bitset_merge (acceptable, dests_ch[i]);
3463146040Stjr    }
3464146040Stjr
3465146040Stjr  if (!BE (need_word_trtable, 0))
3466146040Stjr    {
3467146040Stjr      /* We don't care about whether the following character is a word
3468146040Stjr	 character, or we are in a single-byte character set so we can
3469146040Stjr	 discern by looking at the character code: allocate a
3470146040Stjr	 256-entry transition table.  */
3471146040Stjr      trtable = state->trtable =
3472146040Stjr	(re_dfastate_t **) calloc (sizeof (re_dfastate_t *), SBC_MAX);
3473146040Stjr      if (BE (trtable == NULL, 0))
3474146040Stjr	goto out_free;
3475146040Stjr
3476146040Stjr      /* For all characters ch...:  */
3477250724Sjkim      for (i = 0; i < BITSET_WORDS; ++i)
3478250724Sjkim	for (ch = i * BITSET_WORD_BITS, elem = acceptable[i], mask = 1;
3479146040Stjr	     elem;
3480146040Stjr	     mask <<= 1, elem >>= 1, ++ch)
3481146040Stjr	  if (BE (elem & 1, 0))
3482146040Stjr	    {
3483146040Stjr	      /* There must be exactly one destination which accepts
3484146040Stjr		 character ch.  See group_nodes_into_DFAstates.  */
3485146040Stjr	      for (j = 0; (dests_ch[j][i] & mask) == 0; ++j)
3486146040Stjr		;
3487146040Stjr
3488146040Stjr	      /* j-th destination accepts the word character ch.  */
3489146040Stjr	      if (dfa->word_char[i] & mask)
3490146040Stjr		trtable[ch] = dest_states_word[j];
3491146040Stjr	      else
3492146040Stjr		trtable[ch] = dest_states[j];
3493146040Stjr	    }
3494146040Stjr    }
3495146040Stjr  else
3496146040Stjr    {
3497146040Stjr      /* We care about whether the following character is a word
3498146040Stjr	 character, and we are in a multi-byte character set: discern
3499146040Stjr	 by looking at the character code: build two 256-entry
3500146040Stjr	 transition tables, one starting at trtable[0] and one
3501146040Stjr	 starting at trtable[SBC_MAX].  */
3502146040Stjr      trtable = state->word_trtable =
3503146040Stjr	(re_dfastate_t **) calloc (sizeof (re_dfastate_t *), 2 * SBC_MAX);
3504146040Stjr      if (BE (trtable == NULL, 0))
3505146040Stjr	goto out_free;
3506146040Stjr
3507146040Stjr      /* For all characters ch...:  */
3508250724Sjkim      for (i = 0; i < BITSET_WORDS; ++i)
3509250724Sjkim	for (ch = i * BITSET_WORD_BITS, elem = acceptable[i], mask = 1;
3510146040Stjr	     elem;
3511146040Stjr	     mask <<= 1, elem >>= 1, ++ch)
3512146040Stjr	  if (BE (elem & 1, 0))
3513146040Stjr	    {
3514146040Stjr	      /* There must be exactly one destination which accepts
3515146040Stjr		 character ch.  See group_nodes_into_DFAstates.  */
3516146040Stjr	      for (j = 0; (dests_ch[j][i] & mask) == 0; ++j)
3517146040Stjr		;
3518146040Stjr
3519146040Stjr	      /* j-th destination accepts the word character ch.  */
3520146040Stjr	      trtable[ch] = dest_states[j];
3521146040Stjr	      trtable[ch + SBC_MAX] = dest_states_word[j];
3522146040Stjr	    }
3523146040Stjr    }
3524146040Stjr
3525146040Stjr  /* new line */
3526146040Stjr  if (bitset_contain (acceptable, NEWLINE_CHAR))
3527146040Stjr    {
3528146040Stjr      /* The current state accepts newline character.  */
3529146040Stjr      for (j = 0; j < ndests; ++j)
3530146040Stjr	if (bitset_contain (dests_ch[j], NEWLINE_CHAR))
3531146040Stjr	  {
3532146040Stjr	    /* k-th destination accepts newline character.  */
3533146040Stjr	    trtable[NEWLINE_CHAR] = dest_states_nl[j];
3534146040Stjr	    if (need_word_trtable)
3535146040Stjr	      trtable[NEWLINE_CHAR + SBC_MAX] = dest_states_nl[j];
3536146040Stjr	    /* There must be only one destination which accepts
3537146040Stjr	       newline.  See group_nodes_into_DFAstates.  */
3538146040Stjr	    break;
3539146040Stjr	  }
3540146040Stjr    }
3541146040Stjr
3542146040Stjr  if (dest_states_malloced)
3543146040Stjr    free (dest_states);
3544146040Stjr
3545146040Stjr  re_node_set_free (&follows);
3546146040Stjr  for (i = 0; i < ndests; ++i)
3547146040Stjr    re_node_set_free (dests_node + i);
3548146040Stjr
3549146040Stjr  if (dests_node_malloced)
3550250724Sjkim    free (dests_alloc);
3551146040Stjr
3552146040Stjr  return 1;
3553146040Stjr}
3554146040Stjr
3555146040Stjr/* Group all nodes belonging to STATE into several destinations.
3556146040Stjr   Then for all destinations, set the nodes belonging to the destination
3557146040Stjr   to DESTS_NODE[i] and set the characters accepted by the destination
3558146040Stjr   to DEST_CH[i].  This function return the number of destinations.  */
3559146040Stjr
3560146040Stjrstatic int
3561250724Sjkiminternal_function
3562250724Sjkimgroup_nodes_into_DFAstates (const re_dfa_t *dfa, const re_dfastate_t *state,
3563250724Sjkim			    re_node_set *dests_node, bitset_t *dests_ch)
3564146040Stjr{
3565146040Stjr  reg_errcode_t err;
3566146040Stjr  int result;
3567146040Stjr  int i, j, k;
3568146040Stjr  int ndests; /* Number of the destinations from `state'.  */
3569250724Sjkim  bitset_t accepts; /* Characters a node can accept.  */
3570146040Stjr  const re_node_set *cur_nodes = &state->nodes;
3571146040Stjr  bitset_empty (accepts);
3572146040Stjr  ndests = 0;
3573146040Stjr
3574146040Stjr  /* For all the nodes belonging to `state',  */
3575146040Stjr  for (i = 0; i < cur_nodes->nelem; ++i)
3576146040Stjr    {
3577146040Stjr      re_token_t *node = &dfa->nodes[cur_nodes->elems[i]];
3578146040Stjr      re_token_type_t type = node->type;
3579146040Stjr      unsigned int constraint = node->constraint;
3580146040Stjr
3581146040Stjr      /* Enumerate all single byte character this node can accept.  */
3582146040Stjr      if (type == CHARACTER)
3583146040Stjr	bitset_set (accepts, node->opr.c);
3584146040Stjr      else if (type == SIMPLE_BRACKET)
3585146040Stjr	{
3586146040Stjr	  bitset_merge (accepts, node->opr.sbcset);
3587146040Stjr	}
3588146040Stjr      else if (type == OP_PERIOD)
3589146040Stjr	{
3590146040Stjr#ifdef RE_ENABLE_I18N
3591146040Stjr	  if (dfa->mb_cur_max > 1)
3592146040Stjr	    bitset_merge (accepts, dfa->sb_char);
3593146040Stjr	  else
3594146040Stjr#endif
3595146040Stjr	    bitset_set_all (accepts);
3596146040Stjr	  if (!(dfa->syntax & RE_DOT_NEWLINE))
3597146040Stjr	    bitset_clear (accepts, '\n');
3598146040Stjr	  if (dfa->syntax & RE_DOT_NOT_NULL)
3599146040Stjr	    bitset_clear (accepts, '\0');
3600146040Stjr	}
3601146040Stjr#ifdef RE_ENABLE_I18N
3602146040Stjr      else if (type == OP_UTF8_PERIOD)
3603250724Sjkim	{
3604250724Sjkim	  memset (accepts, '\xff', sizeof (bitset_t) / 2);
3605146040Stjr	  if (!(dfa->syntax & RE_DOT_NEWLINE))
3606146040Stjr	    bitset_clear (accepts, '\n');
3607146040Stjr	  if (dfa->syntax & RE_DOT_NOT_NULL)
3608146040Stjr	    bitset_clear (accepts, '\0');
3609250724Sjkim	}
3610146040Stjr#endif
3611146040Stjr      else
3612146040Stjr	continue;
3613146040Stjr
3614146040Stjr      /* Check the `accepts' and sift the characters which are not
3615146040Stjr	 match it the context.  */
3616146040Stjr      if (constraint)
3617146040Stjr	{
3618146040Stjr	  if (constraint & NEXT_NEWLINE_CONSTRAINT)
3619146040Stjr	    {
3620250724Sjkim	      bool accepts_newline = bitset_contain (accepts, NEWLINE_CHAR);
3621146040Stjr	      bitset_empty (accepts);
3622146040Stjr	      if (accepts_newline)
3623146040Stjr		bitset_set (accepts, NEWLINE_CHAR);
3624146040Stjr	      else
3625146040Stjr		continue;
3626146040Stjr	    }
3627146040Stjr	  if (constraint & NEXT_ENDBUF_CONSTRAINT)
3628146040Stjr	    {
3629146040Stjr	      bitset_empty (accepts);
3630146040Stjr	      continue;
3631146040Stjr	    }
3632146040Stjr
3633146040Stjr	  if (constraint & NEXT_WORD_CONSTRAINT)
3634146040Stjr	    {
3635250724Sjkim	      bitset_word_t any_set = 0;
3636146040Stjr	      if (type == CHARACTER && !node->word_char)
3637146040Stjr		{
3638146040Stjr		  bitset_empty (accepts);
3639146040Stjr		  continue;
3640146040Stjr		}
3641146040Stjr#ifdef RE_ENABLE_I18N
3642146040Stjr	      if (dfa->mb_cur_max > 1)
3643250724Sjkim		for (j = 0; j < BITSET_WORDS; ++j)
3644146040Stjr		  any_set |= (accepts[j] &= (dfa->word_char[j] | ~dfa->sb_char[j]));
3645146040Stjr	      else
3646146040Stjr#endif
3647250724Sjkim		for (j = 0; j < BITSET_WORDS; ++j)
3648146040Stjr		  any_set |= (accepts[j] &= dfa->word_char[j]);
3649146040Stjr	      if (!any_set)
3650146040Stjr		continue;
3651146040Stjr	    }
3652146040Stjr	  if (constraint & NEXT_NOTWORD_CONSTRAINT)
3653146040Stjr	    {
3654250724Sjkim	      bitset_word_t any_set = 0;
3655146040Stjr	      if (type == CHARACTER && node->word_char)
3656146040Stjr		{
3657146040Stjr		  bitset_empty (accepts);
3658146040Stjr		  continue;
3659146040Stjr		}
3660146040Stjr#ifdef RE_ENABLE_I18N
3661146040Stjr	      if (dfa->mb_cur_max > 1)
3662250724Sjkim		for (j = 0; j < BITSET_WORDS; ++j)
3663146040Stjr		  any_set |= (accepts[j] &= ~(dfa->word_char[j] & dfa->sb_char[j]));
3664146040Stjr	      else
3665146040Stjr#endif
3666250724Sjkim		for (j = 0; j < BITSET_WORDS; ++j)
3667146040Stjr		  any_set |= (accepts[j] &= ~dfa->word_char[j]);
3668146040Stjr	      if (!any_set)
3669146040Stjr		continue;
3670146040Stjr	    }
3671146040Stjr	}
3672146040Stjr
3673146040Stjr      /* Then divide `accepts' into DFA states, or create a new
3674146040Stjr	 state.  Above, we make sure that accepts is not empty.  */
3675146040Stjr      for (j = 0; j < ndests; ++j)
3676146040Stjr	{
3677250724Sjkim	  bitset_t intersec; /* Intersection sets, see below.  */
3678250724Sjkim	  bitset_t remains;
3679146040Stjr	  /* Flags, see below.  */
3680250724Sjkim	  bitset_word_t has_intersec, not_subset, not_consumed;
3681146040Stjr
3682146040Stjr	  /* Optimization, skip if this state doesn't accept the character.  */
3683146040Stjr	  if (type == CHARACTER && !bitset_contain (dests_ch[j], node->opr.c))
3684146040Stjr	    continue;
3685146040Stjr
3686146040Stjr	  /* Enumerate the intersection set of this state and `accepts'.  */
3687146040Stjr	  has_intersec = 0;
3688250724Sjkim	  for (k = 0; k < BITSET_WORDS; ++k)
3689146040Stjr	    has_intersec |= intersec[k] = accepts[k] & dests_ch[j][k];
3690146040Stjr	  /* And skip if the intersection set is empty.  */
3691146040Stjr	  if (!has_intersec)
3692146040Stjr	    continue;
3693146040Stjr
3694146040Stjr	  /* Then check if this state is a subset of `accepts'.  */
3695146040Stjr	  not_subset = not_consumed = 0;
3696250724Sjkim	  for (k = 0; k < BITSET_WORDS; ++k)
3697146040Stjr	    {
3698146040Stjr	      not_subset |= remains[k] = ~accepts[k] & dests_ch[j][k];
3699146040Stjr	      not_consumed |= accepts[k] = accepts[k] & ~dests_ch[j][k];
3700146040Stjr	    }
3701146040Stjr
3702146040Stjr	  /* If this state isn't a subset of `accepts', create a
3703146040Stjr	     new group state, which has the `remains'. */
3704146040Stjr	  if (not_subset)
3705146040Stjr	    {
3706146040Stjr	      bitset_copy (dests_ch[ndests], remains);
3707146040Stjr	      bitset_copy (dests_ch[j], intersec);
3708146040Stjr	      err = re_node_set_init_copy (dests_node + ndests, &dests_node[j]);
3709146040Stjr	      if (BE (err != REG_NOERROR, 0))
3710146040Stjr		goto error_return;
3711146040Stjr	      ++ndests;
3712146040Stjr	    }
3713146040Stjr
3714146040Stjr	  /* Put the position in the current group. */
3715146040Stjr	  result = re_node_set_insert (&dests_node[j], cur_nodes->elems[i]);
3716146040Stjr	  if (BE (result < 0, 0))
3717146040Stjr	    goto error_return;
3718146040Stjr
3719146040Stjr	  /* If all characters are consumed, go to next node. */
3720146040Stjr	  if (!not_consumed)
3721146040Stjr	    break;
3722146040Stjr	}
3723146040Stjr      /* Some characters remain, create a new group. */
3724146040Stjr      if (j == ndests)
3725146040Stjr	{
3726146040Stjr	  bitset_copy (dests_ch[ndests], accepts);
3727146040Stjr	  err = re_node_set_init_1 (dests_node + ndests, cur_nodes->elems[i]);
3728146040Stjr	  if (BE (err != REG_NOERROR, 0))
3729146040Stjr	    goto error_return;
3730146040Stjr	  ++ndests;
3731146040Stjr	  bitset_empty (accepts);
3732146040Stjr	}
3733146040Stjr    }
3734146040Stjr  return ndests;
3735146040Stjr error_return:
3736146040Stjr  for (j = 0; j < ndests; ++j)
3737146040Stjr    re_node_set_free (dests_node + j);
3738146040Stjr  return -1;
3739146040Stjr}
3740146040Stjr
3741146040Stjr#ifdef RE_ENABLE_I18N
3742146040Stjr/* Check how many bytes the node `dfa->nodes[node_idx]' accepts.
3743146040Stjr   Return the number of the bytes the node accepts.
3744146040Stjr   STR_IDX is the current index of the input string.
3745146040Stjr
3746146040Stjr   This function handles the nodes which can accept one character, or
3747146040Stjr   one collating element like '.', '[a-z]', opposite to the other nodes
3748146040Stjr   can only accept one byte.  */
3749146040Stjr
3750146040Stjrstatic int
3751250724Sjkiminternal_function
3752250724Sjkimcheck_node_accept_bytes (const re_dfa_t *dfa, int node_idx,
3753250724Sjkim			 const re_string_t *input, int str_idx)
3754146040Stjr{
3755146040Stjr  const re_token_t *node = dfa->nodes + node_idx;
3756146040Stjr  int char_len, elem_len;
3757146040Stjr  int i;
3758146040Stjr
3759146040Stjr  if (BE (node->type == OP_UTF8_PERIOD, 0))
3760146040Stjr    {
3761146040Stjr      unsigned char c = re_string_byte_at (input, str_idx), d;
3762146040Stjr      if (BE (c < 0xc2, 1))
3763146040Stjr	return 0;
3764146040Stjr
3765146040Stjr      if (str_idx + 2 > input->len)
3766146040Stjr	return 0;
3767146040Stjr
3768146040Stjr      d = re_string_byte_at (input, str_idx + 1);
3769146040Stjr      if (c < 0xe0)
3770146040Stjr	return (d < 0x80 || d > 0xbf) ? 0 : 2;
3771146040Stjr      else if (c < 0xf0)
3772146040Stjr	{
3773146040Stjr	  char_len = 3;
3774146040Stjr	  if (c == 0xe0 && d < 0xa0)
3775146040Stjr	    return 0;
3776146040Stjr	}
3777146040Stjr      else if (c < 0xf8)
3778146040Stjr	{
3779146040Stjr	  char_len = 4;
3780146040Stjr	  if (c == 0xf0 && d < 0x90)
3781146040Stjr	    return 0;
3782146040Stjr	}
3783146040Stjr      else if (c < 0xfc)
3784146040Stjr	{
3785146040Stjr	  char_len = 5;
3786146040Stjr	  if (c == 0xf8 && d < 0x88)
3787146040Stjr	    return 0;
3788146040Stjr	}
3789146040Stjr      else if (c < 0xfe)
3790146040Stjr	{
3791146040Stjr	  char_len = 6;
3792146040Stjr	  if (c == 0xfc && d < 0x84)
3793146040Stjr	    return 0;
3794146040Stjr	}
3795146040Stjr      else
3796146040Stjr	return 0;
3797146040Stjr
3798146040Stjr      if (str_idx + char_len > input->len)
3799146040Stjr	return 0;
3800146040Stjr
3801146040Stjr      for (i = 1; i < char_len; ++i)
3802146040Stjr	{
3803146040Stjr	  d = re_string_byte_at (input, str_idx + i);
3804146040Stjr	  if (d < 0x80 || d > 0xbf)
3805146040Stjr	    return 0;
3806146040Stjr	}
3807146040Stjr      return char_len;
3808146040Stjr    }
3809146040Stjr
3810146040Stjr  char_len = re_string_char_size_at (input, str_idx);
3811146040Stjr  if (node->type == OP_PERIOD)
3812146040Stjr    {
3813146040Stjr      if (char_len <= 1)
3814250724Sjkim	return 0;
3815146040Stjr      /* FIXME: I don't think this if is needed, as both '\n'
3816146040Stjr	 and '\0' are char_len == 1.  */
3817146040Stjr      /* '.' accepts any one character except the following two cases.  */
3818146040Stjr      if ((!(dfa->syntax & RE_DOT_NEWLINE) &&
3819146040Stjr	   re_string_byte_at (input, str_idx) == '\n') ||
3820146040Stjr	  ((dfa->syntax & RE_DOT_NOT_NULL) &&
3821146040Stjr	   re_string_byte_at (input, str_idx) == '\0'))
3822146040Stjr	return 0;
3823146040Stjr      return char_len;
3824146040Stjr    }
3825146040Stjr
3826146040Stjr  elem_len = re_string_elem_size_at (input, str_idx);
3827146040Stjr  if ((elem_len <= 1 && char_len <= 1) || char_len == 0)
3828146040Stjr    return 0;
3829146040Stjr
3830146040Stjr  if (node->type == COMPLEX_BRACKET)
3831146040Stjr    {
3832146040Stjr      const re_charset_t *cset = node->opr.mbcset;
3833146040Stjr# ifdef _LIBC
3834146040Stjr      const unsigned char *pin
3835146040Stjr	= ((const unsigned char *) re_string_get_buffer (input) + str_idx);
3836146040Stjr      int j;
3837146040Stjr      uint32_t nrules;
3838146040Stjr# endif /* _LIBC */
3839146040Stjr      int match_len = 0;
3840146040Stjr      wchar_t wc = ((cset->nranges || cset->nchar_classes || cset->nmbchars)
3841146040Stjr		    ? re_string_wchar_at (input, str_idx) : 0);
3842146040Stjr
3843146040Stjr      /* match with multibyte character?  */
3844146040Stjr      for (i = 0; i < cset->nmbchars; ++i)
3845146040Stjr	if (wc == cset->mbchars[i])
3846146040Stjr	  {
3847146040Stjr	    match_len = char_len;
3848146040Stjr	    goto check_node_accept_bytes_match;
3849146040Stjr	  }
3850146040Stjr      /* match with character_class?  */
3851146040Stjr      for (i = 0; i < cset->nchar_classes; ++i)
3852146040Stjr	{
3853146040Stjr	  wctype_t wt = cset->char_classes[i];
3854146040Stjr	  if (__iswctype (wc, wt))
3855146040Stjr	    {
3856146040Stjr	      match_len = char_len;
3857146040Stjr	      goto check_node_accept_bytes_match;
3858146040Stjr	    }
3859146040Stjr	}
3860146040Stjr
3861146040Stjr# ifdef _LIBC
3862146040Stjr      nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
3863146040Stjr      if (nrules != 0)
3864146040Stjr	{
3865146040Stjr	  unsigned int in_collseq = 0;
3866146040Stjr	  const int32_t *table, *indirect;
3867146040Stjr	  const unsigned char *weights, *extra;
3868146040Stjr	  const char *collseqwc;
3869146040Stjr	  /* This #include defines a local function!  */
3870146040Stjr#  include <locale/weight.h>
3871146040Stjr
3872146040Stjr	  /* match with collating_symbol?  */
3873146040Stjr	  if (cset->ncoll_syms)
3874146040Stjr	    extra = (const unsigned char *)
3875146040Stjr	      _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
3876146040Stjr	  for (i = 0; i < cset->ncoll_syms; ++i)
3877146040Stjr	    {
3878146040Stjr	      const unsigned char *coll_sym = extra + cset->coll_syms[i];
3879146040Stjr	      /* Compare the length of input collating element and
3880146040Stjr		 the length of current collating element.  */
3881146040Stjr	      if (*coll_sym != elem_len)
3882146040Stjr		continue;
3883146040Stjr	      /* Compare each bytes.  */
3884146040Stjr	      for (j = 0; j < *coll_sym; j++)
3885146040Stjr		if (pin[j] != coll_sym[1 + j])
3886146040Stjr		  break;
3887146040Stjr	      if (j == *coll_sym)
3888146040Stjr		{
3889146040Stjr		  /* Match if every bytes is equal.  */
3890146040Stjr		  match_len = j;
3891146040Stjr		  goto check_node_accept_bytes_match;
3892146040Stjr		}
3893146040Stjr	    }
3894146040Stjr
3895146040Stjr	  if (cset->nranges)
3896146040Stjr	    {
3897146040Stjr	      if (elem_len <= char_len)
3898146040Stjr		{
3899146040Stjr		  collseqwc = _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQWC);
3900146040Stjr		  in_collseq = __collseq_table_lookup (collseqwc, wc);
3901146040Stjr		}
3902146040Stjr	      else
3903146040Stjr		in_collseq = find_collation_sequence_value (pin, elem_len);
3904146040Stjr	    }
3905146040Stjr	  /* match with range expression?  */
3906146040Stjr	  for (i = 0; i < cset->nranges; ++i)
3907146040Stjr	    if (cset->range_starts[i] <= in_collseq
3908146040Stjr		&& in_collseq <= cset->range_ends[i])
3909146040Stjr	      {
3910146040Stjr		match_len = elem_len;
3911146040Stjr		goto check_node_accept_bytes_match;
3912146040Stjr	      }
3913146040Stjr
3914146040Stjr	  /* match with equivalence_class?  */
3915146040Stjr	  if (cset->nequiv_classes)
3916146040Stjr	    {
3917146040Stjr	      const unsigned char *cp = pin;
3918146040Stjr	      table = (const int32_t *)
3919146040Stjr		_NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
3920146040Stjr	      weights = (const unsigned char *)
3921146040Stjr		_NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
3922146040Stjr	      extra = (const unsigned char *)
3923146040Stjr		_NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
3924146040Stjr	      indirect = (const int32_t *)
3925146040Stjr		_NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
3926250724Sjkim	      int32_t idx = findidx (&cp, elem_len);
3927146040Stjr	      if (idx > 0)
3928146040Stjr		for (i = 0; i < cset->nequiv_classes; ++i)
3929146040Stjr		  {
3930146040Stjr		    int32_t equiv_class_idx = cset->equiv_classes[i];
3931250724Sjkim		    size_t weight_len = weights[idx & 0xffffff];
3932250724Sjkim		    if (weight_len == weights[equiv_class_idx & 0xffffff]
3933250724Sjkim			&& (idx >> 24) == (equiv_class_idx >> 24))
3934146040Stjr		      {
3935146040Stjr			int cnt = 0;
3936250724Sjkim
3937250724Sjkim			idx &= 0xffffff;
3938250724Sjkim			equiv_class_idx &= 0xffffff;
3939250724Sjkim
3940146040Stjr			while (cnt <= weight_len
3941146040Stjr			       && (weights[equiv_class_idx + 1 + cnt]
3942146040Stjr				   == weights[idx + 1 + cnt]))
3943146040Stjr			  ++cnt;
3944146040Stjr			if (cnt > weight_len)
3945146040Stjr			  {
3946146040Stjr			    match_len = elem_len;
3947146040Stjr			    goto check_node_accept_bytes_match;
3948146040Stjr			  }
3949146040Stjr		      }
3950146040Stjr		  }
3951146040Stjr	    }
3952146040Stjr	}
3953146040Stjr      else
3954146040Stjr# endif /* _LIBC */
3955146040Stjr	{
3956146040Stjr	  /* match with range expression?  */
3957146040Stjr#if __GNUC__ >= 2
3958146040Stjr	  wchar_t cmp_buf[] = {L'\0', L'\0', wc, L'\0', L'\0', L'\0'};
3959146040Stjr#else
3960146040Stjr	  wchar_t cmp_buf[] = {L'\0', L'\0', L'\0', L'\0', L'\0', L'\0'};
3961146040Stjr	  cmp_buf[2] = wc;
3962146040Stjr#endif
3963146040Stjr	  for (i = 0; i < cset->nranges; ++i)
3964146040Stjr	    {
3965146040Stjr	      cmp_buf[0] = cset->range_starts[i];
3966146040Stjr	      cmp_buf[4] = cset->range_ends[i];
3967146040Stjr	      if (wcscoll (cmp_buf, cmp_buf + 2) <= 0
3968146040Stjr		  && wcscoll (cmp_buf + 2, cmp_buf + 4) <= 0)
3969146040Stjr		{
3970146040Stjr		  match_len = char_len;
3971146040Stjr		  goto check_node_accept_bytes_match;
3972146040Stjr		}
3973146040Stjr	    }
3974146040Stjr	}
3975146040Stjr    check_node_accept_bytes_match:
3976146040Stjr      if (!cset->non_match)
3977146040Stjr	return match_len;
3978146040Stjr      else
3979146040Stjr	{
3980146040Stjr	  if (match_len > 0)
3981146040Stjr	    return 0;
3982146040Stjr	  else
3983146040Stjr	    return (elem_len > char_len) ? elem_len : char_len;
3984146040Stjr	}
3985146040Stjr    }
3986146040Stjr  return 0;
3987146040Stjr}
3988146040Stjr
3989146040Stjr# ifdef _LIBC
3990146040Stjrstatic unsigned int
3991250724Sjkiminternal_function
3992250724Sjkimfind_collation_sequence_value (const unsigned char *mbs, size_t mbs_len)
3993146040Stjr{
3994146040Stjr  uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
3995146040Stjr  if (nrules == 0)
3996146040Stjr    {
3997146040Stjr      if (mbs_len == 1)
3998146040Stjr	{
3999146040Stjr	  /* No valid character.  Match it as a single byte character.  */
4000146040Stjr	  const unsigned char *collseq = (const unsigned char *)
4001146040Stjr	    _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQMB);
4002146040Stjr	  return collseq[mbs[0]];
4003146040Stjr	}
4004146040Stjr      return UINT_MAX;
4005146040Stjr    }
4006146040Stjr  else
4007146040Stjr    {
4008146040Stjr      int32_t idx;
4009146040Stjr      const unsigned char *extra = (const unsigned char *)
4010146040Stjr	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
4011146040Stjr      int32_t extrasize = (const unsigned char *)
4012146040Stjr	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB + 1) - extra;
4013146040Stjr
4014146040Stjr      for (idx = 0; idx < extrasize;)
4015146040Stjr	{
4016146040Stjr	  int mbs_cnt, found = 0;
4017146040Stjr	  int32_t elem_mbs_len;
4018146040Stjr	  /* Skip the name of collating element name.  */
4019146040Stjr	  idx = idx + extra[idx] + 1;
4020146040Stjr	  elem_mbs_len = extra[idx++];
4021146040Stjr	  if (mbs_len == elem_mbs_len)
4022146040Stjr	    {
4023146040Stjr	      for (mbs_cnt = 0; mbs_cnt < elem_mbs_len; ++mbs_cnt)
4024146040Stjr		if (extra[idx + mbs_cnt] != mbs[mbs_cnt])
4025146040Stjr		  break;
4026146040Stjr	      if (mbs_cnt == elem_mbs_len)
4027146040Stjr		/* Found the entry.  */
4028146040Stjr		found = 1;
4029146040Stjr	    }
4030146040Stjr	  /* Skip the byte sequence of the collating element.  */
4031146040Stjr	  idx += elem_mbs_len;
4032146040Stjr	  /* Adjust for the alignment.  */
4033146040Stjr	  idx = (idx + 3) & ~3;
4034146040Stjr	  /* Skip the collation sequence value.  */
4035146040Stjr	  idx += sizeof (uint32_t);
4036146040Stjr	  /* Skip the wide char sequence of the collating element.  */
4037250724Sjkim	  idx = idx + sizeof (uint32_t) * (*(int32_t *) (extra + idx) + 1);
4038146040Stjr	  /* If we found the entry, return the sequence value.  */
4039146040Stjr	  if (found)
4040146040Stjr	    return *(uint32_t *) (extra + idx);
4041146040Stjr	  /* Skip the collation sequence value.  */
4042146040Stjr	  idx += sizeof (uint32_t);
4043146040Stjr	}
4044146040Stjr      return UINT_MAX;
4045146040Stjr    }
4046146040Stjr}
4047146040Stjr# endif /* _LIBC */
4048146040Stjr#endif /* RE_ENABLE_I18N */
4049146040Stjr
4050146040Stjr/* Check whether the node accepts the byte which is IDX-th
4051146040Stjr   byte of the INPUT.  */
4052146040Stjr
4053146040Stjrstatic int
4054250724Sjkiminternal_function
4055250724Sjkimcheck_node_accept (const re_match_context_t *mctx, const re_token_t *node,
4056250724Sjkim		   int idx)
4057146040Stjr{
4058146040Stjr  unsigned char ch;
4059146040Stjr  ch = re_string_byte_at (&mctx->input, idx);
4060146040Stjr  switch (node->type)
4061146040Stjr    {
4062146040Stjr    case CHARACTER:
4063146040Stjr      if (node->opr.c != ch)
4064250724Sjkim	return 0;
4065146040Stjr      break;
4066146040Stjr
4067146040Stjr    case SIMPLE_BRACKET:
4068146040Stjr      if (!bitset_contain (node->opr.sbcset, ch))
4069250724Sjkim	return 0;
4070146040Stjr      break;
4071146040Stjr
4072146040Stjr#ifdef RE_ENABLE_I18N
4073146040Stjr    case OP_UTF8_PERIOD:
4074146040Stjr      if (ch >= 0x80)
4075250724Sjkim	return 0;
4076146040Stjr      /* FALLTHROUGH */
4077146040Stjr#endif
4078146040Stjr    case OP_PERIOD:
4079146040Stjr      if ((ch == '\n' && !(mctx->dfa->syntax & RE_DOT_NEWLINE))
4080146040Stjr	  || (ch == '\0' && (mctx->dfa->syntax & RE_DOT_NOT_NULL)))
4081146040Stjr	return 0;
4082146040Stjr      break;
4083146040Stjr
4084146040Stjr    default:
4085146040Stjr      return 0;
4086146040Stjr    }
4087146040Stjr
4088146040Stjr  if (node->constraint)
4089146040Stjr    {
4090146040Stjr      /* The node has constraints.  Check whether the current context
4091146040Stjr	 satisfies the constraints.  */
4092146040Stjr      unsigned int context = re_string_context_at (&mctx->input, idx,
4093146040Stjr						   mctx->eflags);
4094146040Stjr      if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context))
4095146040Stjr	return 0;
4096146040Stjr    }
4097146040Stjr
4098146040Stjr  return 1;
4099146040Stjr}
4100146040Stjr
4101146040Stjr/* Extend the buffers, if the buffers have run out.  */
4102146040Stjr
4103146040Stjrstatic reg_errcode_t
4104250724Sjkiminternal_function __attribute_warn_unused_result__
4105251435Sjkimextend_buffers (re_match_context_t *mctx, int min_len)
4106146040Stjr{
4107146040Stjr  reg_errcode_t ret;
4108146040Stjr  re_string_t *pstr = &mctx->input;
4109146040Stjr
4110250724Sjkim  /* Avoid overflow.  */
4111250724Sjkim  if (BE (INT_MAX / 2 / sizeof (re_dfastate_t *) <= pstr->bufs_len, 0))
4112250724Sjkim    return REG_ESPACE;
4113250724Sjkim
4114251435Sjkim  /* Double the lengthes of the buffers, but allocate at least MIN_LEN.  */
4115251435Sjkim  ret = re_string_realloc_buffers (pstr,
4116251435Sjkim				   MAX (min_len,
4117251435Sjkim					MIN (pstr->len, pstr->bufs_len * 2)));
4118146040Stjr  if (BE (ret != REG_NOERROR, 0))
4119146040Stjr    return ret;
4120146040Stjr
4121146040Stjr  if (mctx->state_log != NULL)
4122146040Stjr    {
4123146040Stjr      /* And double the length of state_log.  */
4124146040Stjr      /* XXX We have no indication of the size of this buffer.  If this
4125146040Stjr	 allocation fail we have no indication that the state_log array
4126146040Stjr	 does not have the right size.  */
4127146040Stjr      re_dfastate_t **new_array = re_realloc (mctx->state_log, re_dfastate_t *,
4128146040Stjr					      pstr->bufs_len + 1);
4129146040Stjr      if (BE (new_array == NULL, 0))
4130146040Stjr	return REG_ESPACE;
4131146040Stjr      mctx->state_log = new_array;
4132146040Stjr    }
4133146040Stjr
4134146040Stjr  /* Then reconstruct the buffers.  */
4135146040Stjr  if (pstr->icase)
4136146040Stjr    {
4137146040Stjr#ifdef RE_ENABLE_I18N
4138146040Stjr      if (pstr->mb_cur_max > 1)
4139146040Stjr	{
4140146040Stjr	  ret = build_wcs_upper_buffer (pstr);
4141146040Stjr	  if (BE (ret != REG_NOERROR, 0))
4142146040Stjr	    return ret;
4143146040Stjr	}
4144146040Stjr      else
4145146040Stjr#endif /* RE_ENABLE_I18N  */
4146146040Stjr	build_upper_buffer (pstr);
4147146040Stjr    }
4148146040Stjr  else
4149146040Stjr    {
4150146040Stjr#ifdef RE_ENABLE_I18N
4151146040Stjr      if (pstr->mb_cur_max > 1)
4152146040Stjr	build_wcs_buffer (pstr);
4153146040Stjr      else
4154146040Stjr#endif /* RE_ENABLE_I18N  */
4155146040Stjr	{
4156146040Stjr	  if (pstr->trans != NULL)
4157146040Stjr	    re_string_translate_buffer (pstr);
4158146040Stjr	}
4159146040Stjr    }
4160146040Stjr  return REG_NOERROR;
4161146040Stjr}
4162146040Stjr
4163146040Stjr
4164146040Stjr/* Functions for matching context.  */
4165146040Stjr
4166146040Stjr/* Initialize MCTX.  */
4167146040Stjr
4168146040Stjrstatic reg_errcode_t
4169250724Sjkiminternal_function __attribute_warn_unused_result__
4170250724Sjkimmatch_ctx_init (re_match_context_t *mctx, int eflags, int n)
4171146040Stjr{
4172146040Stjr  mctx->eflags = eflags;
4173146040Stjr  mctx->match_last = -1;
4174146040Stjr  if (n > 0)
4175146040Stjr    {
4176146040Stjr      mctx->bkref_ents = re_malloc (struct re_backref_cache_entry, n);
4177146040Stjr      mctx->sub_tops = re_malloc (re_sub_match_top_t *, n);
4178146040Stjr      if (BE (mctx->bkref_ents == NULL || mctx->sub_tops == NULL, 0))
4179146040Stjr	return REG_ESPACE;
4180146040Stjr    }
4181146040Stjr  /* Already zero-ed by the caller.
4182146040Stjr     else
4183146040Stjr       mctx->bkref_ents = NULL;
4184146040Stjr     mctx->nbkref_ents = 0;
4185146040Stjr     mctx->nsub_tops = 0;  */
4186146040Stjr  mctx->abkref_ents = n;
4187146040Stjr  mctx->max_mb_elem_len = 1;
4188146040Stjr  mctx->asub_tops = n;
4189146040Stjr  return REG_NOERROR;
4190146040Stjr}
4191146040Stjr
4192146040Stjr/* Clean the entries which depend on the current input in MCTX.
4193146040Stjr   This function must be invoked when the matcher changes the start index
4194146040Stjr   of the input, or changes the input string.  */
4195146040Stjr
4196146040Stjrstatic void
4197250724Sjkiminternal_function
4198250724Sjkimmatch_ctx_clean (re_match_context_t *mctx)
4199146040Stjr{
4200146040Stjr  int st_idx;
4201146040Stjr  for (st_idx = 0; st_idx < mctx->nsub_tops; ++st_idx)
4202146040Stjr    {
4203146040Stjr      int sl_idx;
4204146040Stjr      re_sub_match_top_t *top = mctx->sub_tops[st_idx];
4205146040Stjr      for (sl_idx = 0; sl_idx < top->nlasts; ++sl_idx)
4206146040Stjr	{
4207146040Stjr	  re_sub_match_last_t *last = top->lasts[sl_idx];
4208146040Stjr	  re_free (last->path.array);
4209146040Stjr	  re_free (last);
4210146040Stjr	}
4211146040Stjr      re_free (top->lasts);
4212146040Stjr      if (top->path)
4213146040Stjr	{
4214146040Stjr	  re_free (top->path->array);
4215146040Stjr	  re_free (top->path);
4216146040Stjr	}
4217146040Stjr      free (top);
4218146040Stjr    }
4219146040Stjr
4220146040Stjr  mctx->nsub_tops = 0;
4221146040Stjr  mctx->nbkref_ents = 0;
4222146040Stjr}
4223146040Stjr
4224146040Stjr/* Free all the memory associated with MCTX.  */
4225146040Stjr
4226146040Stjrstatic void
4227250724Sjkiminternal_function
4228250724Sjkimmatch_ctx_free (re_match_context_t *mctx)
4229146040Stjr{
4230146040Stjr  /* First, free all the memory associated with MCTX->SUB_TOPS.  */
4231146040Stjr  match_ctx_clean (mctx);
4232146040Stjr  re_free (mctx->sub_tops);
4233146040Stjr  re_free (mctx->bkref_ents);
4234146040Stjr}
4235146040Stjr
4236146040Stjr/* Add a new backreference entry to MCTX.
4237146040Stjr   Note that we assume that caller never call this function with duplicate
4238146040Stjr   entry, and call with STR_IDX which isn't smaller than any existing entry.
4239146040Stjr*/
4240146040Stjr
4241146040Stjrstatic reg_errcode_t
4242250724Sjkiminternal_function __attribute_warn_unused_result__
4243250724Sjkimmatch_ctx_add_entry (re_match_context_t *mctx, int node, int str_idx, int from,
4244250724Sjkim		     int to)
4245146040Stjr{
4246146040Stjr  if (mctx->nbkref_ents >= mctx->abkref_ents)
4247146040Stjr    {
4248146040Stjr      struct re_backref_cache_entry* new_entry;
4249146040Stjr      new_entry = re_realloc (mctx->bkref_ents, struct re_backref_cache_entry,
4250146040Stjr			      mctx->abkref_ents * 2);
4251146040Stjr      if (BE (new_entry == NULL, 0))
4252146040Stjr	{
4253146040Stjr	  re_free (mctx->bkref_ents);
4254146040Stjr	  return REG_ESPACE;
4255146040Stjr	}
4256146040Stjr      mctx->bkref_ents = new_entry;
4257146040Stjr      memset (mctx->bkref_ents + mctx->nbkref_ents, '\0',
4258146040Stjr	      sizeof (struct re_backref_cache_entry) * mctx->abkref_ents);
4259146040Stjr      mctx->abkref_ents *= 2;
4260146040Stjr    }
4261146040Stjr  if (mctx->nbkref_ents > 0
4262146040Stjr      && mctx->bkref_ents[mctx->nbkref_ents - 1].str_idx == str_idx)
4263146040Stjr    mctx->bkref_ents[mctx->nbkref_ents - 1].more = 1;
4264146040Stjr
4265146040Stjr  mctx->bkref_ents[mctx->nbkref_ents].node = node;
4266146040Stjr  mctx->bkref_ents[mctx->nbkref_ents].str_idx = str_idx;
4267146040Stjr  mctx->bkref_ents[mctx->nbkref_ents].subexp_from = from;
4268146040Stjr  mctx->bkref_ents[mctx->nbkref_ents].subexp_to = to;
4269146040Stjr
4270146040Stjr  /* This is a cache that saves negative results of check_dst_limits_calc_pos.
4271146040Stjr     If bit N is clear, means that this entry won't epsilon-transition to
4272146040Stjr     an OP_OPEN_SUBEXP or OP_CLOSE_SUBEXP for the N+1-th subexpression.  If
4273146040Stjr     it is set, check_dst_limits_calc_pos_1 will recurse and try to find one
4274146040Stjr     such node.
4275146040Stjr
4276146040Stjr     A backreference does not epsilon-transition unless it is empty, so set
4277146040Stjr     to all zeros if FROM != TO.  */
4278146040Stjr  mctx->bkref_ents[mctx->nbkref_ents].eps_reachable_subexps_map
4279146040Stjr    = (from == to ? ~0 : 0);
4280146040Stjr
4281146040Stjr  mctx->bkref_ents[mctx->nbkref_ents++].more = 0;
4282146040Stjr  if (mctx->max_mb_elem_len < to - from)
4283146040Stjr    mctx->max_mb_elem_len = to - from;
4284146040Stjr  return REG_NOERROR;
4285146040Stjr}
4286146040Stjr
4287146040Stjr/* Search for the first entry which has the same str_idx, or -1 if none is
4288146040Stjr   found.  Note that MCTX->BKREF_ENTS is already sorted by MCTX->STR_IDX.  */
4289146040Stjr
4290146040Stjrstatic int
4291250724Sjkiminternal_function
4292250724Sjkimsearch_cur_bkref_entry (const re_match_context_t *mctx, int str_idx)
4293146040Stjr{
4294146040Stjr  int left, right, mid, last;
4295146040Stjr  last = right = mctx->nbkref_ents;
4296146040Stjr  for (left = 0; left < right;)
4297146040Stjr    {
4298146040Stjr      mid = (left + right) / 2;
4299146040Stjr      if (mctx->bkref_ents[mid].str_idx < str_idx)
4300146040Stjr	left = mid + 1;
4301146040Stjr      else
4302146040Stjr	right = mid;
4303146040Stjr    }
4304146040Stjr  if (left < last && mctx->bkref_ents[left].str_idx == str_idx)
4305146040Stjr    return left;
4306146040Stjr  else
4307146040Stjr    return -1;
4308146040Stjr}
4309146040Stjr
4310146040Stjr/* Register the node NODE, whose type is OP_OPEN_SUBEXP, and which matches
4311146040Stjr   at STR_IDX.  */
4312146040Stjr
4313146040Stjrstatic reg_errcode_t
4314250724Sjkiminternal_function __attribute_warn_unused_result__
4315250724Sjkimmatch_ctx_add_subtop (re_match_context_t *mctx, int node, int str_idx)
4316146040Stjr{
4317146040Stjr#ifdef DEBUG
4318146040Stjr  assert (mctx->sub_tops != NULL);
4319146040Stjr  assert (mctx->asub_tops > 0);
4320146040Stjr#endif
4321146040Stjr  if (BE (mctx->nsub_tops == mctx->asub_tops, 0))
4322146040Stjr    {
4323146040Stjr      int new_asub_tops = mctx->asub_tops * 2;
4324146040Stjr      re_sub_match_top_t **new_array = re_realloc (mctx->sub_tops,
4325146040Stjr						   re_sub_match_top_t *,
4326146040Stjr						   new_asub_tops);
4327146040Stjr      if (BE (new_array == NULL, 0))
4328146040Stjr	return REG_ESPACE;
4329146040Stjr      mctx->sub_tops = new_array;
4330146040Stjr      mctx->asub_tops = new_asub_tops;
4331146040Stjr    }
4332146040Stjr  mctx->sub_tops[mctx->nsub_tops] = calloc (1, sizeof (re_sub_match_top_t));
4333146040Stjr  if (BE (mctx->sub_tops[mctx->nsub_tops] == NULL, 0))
4334146040Stjr    return REG_ESPACE;
4335146040Stjr  mctx->sub_tops[mctx->nsub_tops]->node = node;
4336146040Stjr  mctx->sub_tops[mctx->nsub_tops++]->str_idx = str_idx;
4337146040Stjr  return REG_NOERROR;
4338146040Stjr}
4339146040Stjr
4340146040Stjr/* Register the node NODE, whose type is OP_CLOSE_SUBEXP, and which matches
4341146040Stjr   at STR_IDX, whose corresponding OP_OPEN_SUBEXP is SUB_TOP.  */
4342146040Stjr
4343146040Stjrstatic re_sub_match_last_t *
4344250724Sjkiminternal_function
4345250724Sjkimmatch_ctx_add_sublast (re_sub_match_top_t *subtop, int node, int str_idx)
4346146040Stjr{
4347146040Stjr  re_sub_match_last_t *new_entry;
4348146040Stjr  if (BE (subtop->nlasts == subtop->alasts, 0))
4349146040Stjr    {
4350146040Stjr      int new_alasts = 2 * subtop->alasts + 1;
4351146040Stjr      re_sub_match_last_t **new_array = re_realloc (subtop->lasts,
4352146040Stjr						    re_sub_match_last_t *,
4353146040Stjr						    new_alasts);
4354146040Stjr      if (BE (new_array == NULL, 0))
4355146040Stjr	return NULL;
4356146040Stjr      subtop->lasts = new_array;
4357146040Stjr      subtop->alasts = new_alasts;
4358146040Stjr    }
4359146040Stjr  new_entry = calloc (1, sizeof (re_sub_match_last_t));
4360146040Stjr  if (BE (new_entry != NULL, 1))
4361146040Stjr    {
4362146040Stjr      subtop->lasts[subtop->nlasts] = new_entry;
4363146040Stjr      new_entry->node = node;
4364146040Stjr      new_entry->str_idx = str_idx;
4365146040Stjr      ++subtop->nlasts;
4366146040Stjr    }
4367146040Stjr  return new_entry;
4368146040Stjr}
4369146040Stjr
4370146040Stjrstatic void
4371250724Sjkiminternal_function
4372250724Sjkimsift_ctx_init (re_sift_context_t *sctx, re_dfastate_t **sifted_sts,
4373250724Sjkim	       re_dfastate_t **limited_sts, int last_node, int last_str_idx)
4374146040Stjr{
4375146040Stjr  sctx->sifted_states = sifted_sts;
4376146040Stjr  sctx->limited_states = limited_sts;
4377146040Stjr  sctx->last_node = last_node;
4378146040Stjr  sctx->last_str_idx = last_str_idx;
4379146040Stjr  re_node_set_init_empty (&sctx->limits);
4380146040Stjr}
4381