regexec.c revision 146040
1146040Stjr/* Extended regular expression matching and search library.
2146040Stjr   Copyright (C) 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
3146040Stjr   This file is part of the GNU C Library.
4146040Stjr   Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
5146040Stjr
6146040Stjr   The GNU C Library is free software; you can redistribute it and/or
7146040Stjr   modify it under the terms of the GNU Lesser General Public
8146040Stjr   License as published by the Free Software Foundation; either
9146040Stjr   version 2.1 of the License, or (at your option) any later version.
10146040Stjr
11146040Stjr   The GNU C Library is distributed in the hope that it will be useful,
12146040Stjr   but WITHOUT ANY WARRANTY; without even the implied warranty of
13146040Stjr   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14146040Stjr   Lesser General Public License for more details.
15146040Stjr
16146040Stjr   You should have received a copy of the GNU Lesser General Public
17146040Stjr   License along with the GNU C Library; if not, write to the Free
18146040Stjr   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
19146040Stjr   02111-1307 USA.  */
20146040Stjr
21146040Stjrstatic reg_errcode_t match_ctx_init (re_match_context_t *cache, int eflags,
22146040Stjr				     int n) internal_function;
23146040Stjrstatic void match_ctx_clean (re_match_context_t *mctx) internal_function;
24146040Stjrstatic void match_ctx_free (re_match_context_t *cache) internal_function;
25146040Stjrstatic reg_errcode_t match_ctx_add_entry (re_match_context_t *cache, int node,
26146040Stjr					  int str_idx, int from, int to)
27146040Stjr     internal_function;
28146040Stjrstatic int search_cur_bkref_entry (re_match_context_t *mctx, int str_idx)
29146040Stjr     internal_function;
30146040Stjrstatic reg_errcode_t match_ctx_add_subtop (re_match_context_t *mctx, int node,
31146040Stjr					   int str_idx) internal_function;
32146040Stjrstatic re_sub_match_last_t * match_ctx_add_sublast (re_sub_match_top_t *subtop,
33146040Stjr						   int node, int str_idx)
34146040Stjr     internal_function;
35146040Stjrstatic void sift_ctx_init (re_sift_context_t *sctx, re_dfastate_t **sifted_sts,
36146040Stjr			   re_dfastate_t **limited_sts, int last_node,
37146040Stjr			   int last_str_idx)
38146040Stjr     internal_function;
39146040Stjrstatic reg_errcode_t re_search_internal (const regex_t *preg,
40146040Stjr					 const char *string, int length,
41146040Stjr					 int start, int range, int stop,
42146040Stjr					 size_t nmatch, regmatch_t pmatch[],
43146040Stjr					 int eflags) internal_function;
44146040Stjrstatic int re_search_2_stub (struct re_pattern_buffer *bufp,
45146040Stjr			     const char *string1, int length1,
46146040Stjr			     const char *string2, int length2,
47146040Stjr			     int start, int range, struct re_registers *regs,
48146040Stjr			     int stop, int ret_len) internal_function;
49146040Stjrstatic int re_search_stub (struct re_pattern_buffer *bufp,
50146040Stjr			   const char *string, int length, int start,
51146040Stjr			   int range, int stop, struct re_registers *regs,
52146040Stjr			   int ret_len) internal_function;
53146040Stjrstatic unsigned re_copy_regs (struct re_registers *regs, regmatch_t *pmatch,
54146040Stjr			      int nregs, int regs_allocated) internal_function;
55146040Stjrstatic inline re_dfastate_t *acquire_init_state_context
56146040Stjr     (reg_errcode_t *err, const re_match_context_t *mctx, int idx)
57146040Stjr     __attribute ((always_inline)) internal_function;
58146040Stjrstatic reg_errcode_t prune_impossible_nodes (re_match_context_t *mctx)
59146040Stjr     internal_function;
60146040Stjrstatic int check_matching (re_match_context_t *mctx, int fl_longest_match,
61146040Stjr			   int *p_match_first)
62146040Stjr     internal_function;
63146040Stjrstatic int check_halt_node_context (const re_dfa_t *dfa, int node,
64146040Stjr				    unsigned int context) internal_function;
65146040Stjrstatic int check_halt_state_context (const re_match_context_t *mctx,
66146040Stjr				     const re_dfastate_t *state, int idx)
67146040Stjr     internal_function;
68146040Stjrstatic void update_regs (re_dfa_t *dfa, regmatch_t *pmatch,
69146040Stjr			 regmatch_t *prev_idx_match, int cur_node,
70146040Stjr			 int cur_idx, int nmatch) internal_function;
71146040Stjrstatic int proceed_next_node (const re_match_context_t *mctx,
72146040Stjr			      int nregs, regmatch_t *regs,
73146040Stjr			      int *pidx, int node, re_node_set *eps_via_nodes,
74146040Stjr			      struct re_fail_stack_t *fs) internal_function;
75146040Stjrstatic reg_errcode_t push_fail_stack (struct re_fail_stack_t *fs,
76146040Stjr				      int str_idx, int dest_node, int nregs,
77146040Stjr				      regmatch_t *regs,
78146040Stjr				      re_node_set *eps_via_nodes) internal_function;
79146040Stjrstatic int pop_fail_stack (struct re_fail_stack_t *fs, int *pidx, int nregs,
80146040Stjr			   regmatch_t *regs, re_node_set *eps_via_nodes) internal_function;
81146040Stjrstatic reg_errcode_t set_regs (const regex_t *preg,
82146040Stjr			       const re_match_context_t *mctx,
83146040Stjr			       size_t nmatch, regmatch_t *pmatch,
84146040Stjr			       int fl_backtrack) internal_function;
85146040Stjrstatic reg_errcode_t free_fail_stack_return (struct re_fail_stack_t *fs) internal_function;
86146040Stjr
87146040Stjr#ifdef RE_ENABLE_I18N
88146040Stjrstatic int sift_states_iter_mb (const re_match_context_t *mctx,
89146040Stjr				re_sift_context_t *sctx,
90146040Stjr				int node_idx, int str_idx, int max_str_idx) internal_function;
91146040Stjr#endif /* RE_ENABLE_I18N */
92146040Stjrstatic reg_errcode_t sift_states_backward (re_match_context_t *mctx,
93146040Stjr					   re_sift_context_t *sctx) internal_function;
94146040Stjrstatic reg_errcode_t build_sifted_states (re_match_context_t *mctx,
95146040Stjr					  re_sift_context_t *sctx, int str_idx,
96146040Stjr					  re_node_set *cur_dest) internal_function;
97146040Stjrstatic reg_errcode_t update_cur_sifted_state (re_match_context_t *mctx,
98146040Stjr					      re_sift_context_t *sctx,
99146040Stjr					      int str_idx,
100146040Stjr					      re_node_set *dest_nodes) internal_function;
101146040Stjrstatic reg_errcode_t add_epsilon_src_nodes (re_dfa_t *dfa,
102146040Stjr					    re_node_set *dest_nodes,
103146040Stjr					    const re_node_set *candidates) internal_function;
104146040Stjrstatic reg_errcode_t sub_epsilon_src_nodes (re_dfa_t *dfa, int node,
105146040Stjr					    re_node_set *dest_nodes,
106146040Stjr					    const re_node_set *and_nodes) internal_function;
107146040Stjrstatic int check_dst_limits (re_match_context_t *mctx, re_node_set *limits,
108146040Stjr			     int dst_node, int dst_idx, int src_node,
109146040Stjr			     int src_idx) internal_function;
110146040Stjrstatic int check_dst_limits_calc_pos_1 (re_match_context_t *mctx,
111146040Stjr					int boundaries, int subexp_idx,
112146040Stjr					int from_node, int bkref_idx) internal_function;
113146040Stjrstatic int check_dst_limits_calc_pos (re_match_context_t *mctx,
114146040Stjr				      int limit, int subexp_idx,
115146040Stjr				      int node, int str_idx,
116146040Stjr				      int bkref_idx) internal_function;
117146040Stjrstatic reg_errcode_t check_subexp_limits (re_dfa_t *dfa,
118146040Stjr					  re_node_set *dest_nodes,
119146040Stjr					  const re_node_set *candidates,
120146040Stjr					  re_node_set *limits,
121146040Stjr					  struct re_backref_cache_entry *bkref_ents,
122146040Stjr					  int str_idx) internal_function;
123146040Stjrstatic reg_errcode_t sift_states_bkref (re_match_context_t *mctx,
124146040Stjr					re_sift_context_t *sctx,
125146040Stjr					int str_idx, const re_node_set *candidates) internal_function;
126146040Stjrstatic reg_errcode_t clean_state_log_if_needed (re_match_context_t *mctx,
127146040Stjr					        int next_state_log_idx) internal_function;
128146040Stjrstatic reg_errcode_t merge_state_array (re_dfa_t *dfa, re_dfastate_t **dst,
129146040Stjr					re_dfastate_t **src, int num) internal_function;
130146040Stjrstatic re_dfastate_t *find_recover_state (reg_errcode_t *err,
131146040Stjr					 re_match_context_t *mctx) internal_function;
132146040Stjrstatic re_dfastate_t *transit_state (reg_errcode_t *err,
133146040Stjr				     re_match_context_t *mctx,
134146040Stjr				     re_dfastate_t *state) internal_function;
135146040Stjrstatic re_dfastate_t *merge_state_with_log (reg_errcode_t *err,
136146040Stjr					    re_match_context_t *mctx,
137146040Stjr					    re_dfastate_t *next_state) internal_function;
138146040Stjrstatic reg_errcode_t check_subexp_matching_top (re_match_context_t *mctx,
139146040Stjr						re_node_set *cur_nodes,
140146040Stjr						int str_idx) internal_function;
141146040Stjr#if 0
142146040Stjrstatic re_dfastate_t *transit_state_sb (reg_errcode_t *err,
143146040Stjr					re_match_context_t *mctx,
144146040Stjr					re_dfastate_t *pstate) internal_function;
145146040Stjr#endif
146146040Stjr#ifdef RE_ENABLE_I18N
147146040Stjrstatic reg_errcode_t transit_state_mb (re_match_context_t *mctx,
148146040Stjr				       re_dfastate_t *pstate) internal_function;
149146040Stjr#endif /* RE_ENABLE_I18N */
150146040Stjrstatic reg_errcode_t transit_state_bkref (re_match_context_t *mctx,
151146040Stjr					  const re_node_set *nodes) internal_function;
152146040Stjrstatic reg_errcode_t get_subexp (re_match_context_t *mctx,
153146040Stjr				 int bkref_node, int bkref_str_idx) internal_function;
154146040Stjrstatic reg_errcode_t get_subexp_sub (re_match_context_t *mctx,
155146040Stjr				     const re_sub_match_top_t *sub_top,
156146040Stjr				     re_sub_match_last_t *sub_last,
157146040Stjr				     int bkref_node, int bkref_str) internal_function;
158146040Stjrstatic int find_subexp_node (const re_dfa_t *dfa, const re_node_set *nodes,
159146040Stjr			     int subexp_idx, int type) internal_function;
160146040Stjrstatic reg_errcode_t check_arrival (re_match_context_t *mctx,
161146040Stjr				    state_array_t *path, int top_node,
162146040Stjr				    int top_str, int last_node, int last_str,
163146040Stjr				    int type) internal_function;
164146040Stjrstatic reg_errcode_t check_arrival_add_next_nodes (re_match_context_t *mctx,
165146040Stjr						   int str_idx,
166146040Stjr						   re_node_set *cur_nodes,
167146040Stjr						   re_node_set *next_nodes) internal_function;
168146040Stjrstatic reg_errcode_t check_arrival_expand_ecl (re_dfa_t *dfa,
169146040Stjr					       re_node_set *cur_nodes,
170146040Stjr					       int ex_subexp, int type) internal_function;
171146040Stjrstatic reg_errcode_t check_arrival_expand_ecl_sub (re_dfa_t *dfa,
172146040Stjr						   re_node_set *dst_nodes,
173146040Stjr						   int target, int ex_subexp,
174146040Stjr						   int type) internal_function;
175146040Stjrstatic reg_errcode_t expand_bkref_cache (re_match_context_t *mctx,
176146040Stjr					 re_node_set *cur_nodes, int cur_str,
177146040Stjr					 int subexp_num, int type) internal_function;
178146040Stjrstatic int build_trtable (re_dfa_t *dfa,
179146040Stjr			  re_dfastate_t *state) internal_function;
180146040Stjr#ifdef RE_ENABLE_I18N
181146040Stjrstatic int check_node_accept_bytes (re_dfa_t *dfa, int node_idx,
182146040Stjr				    const re_string_t *input, int idx) internal_function;
183146040Stjr# ifdef _LIBC
184146040Stjrstatic unsigned int find_collation_sequence_value (const unsigned char *mbs,
185146040Stjr						   size_t name_len) internal_function;
186146040Stjr# endif /* _LIBC */
187146040Stjr#endif /* RE_ENABLE_I18N */
188146040Stjrstatic int group_nodes_into_DFAstates (re_dfa_t *dfa,
189146040Stjr				       const re_dfastate_t *state,
190146040Stjr				       re_node_set *states_node,
191146040Stjr				       bitset *states_ch) internal_function;
192146040Stjrstatic int check_node_accept (const re_match_context_t *mctx,
193146040Stjr			      const re_token_t *node, int idx) internal_function;
194146040Stjrstatic reg_errcode_t extend_buffers (re_match_context_t *mctx) internal_function;
195146040Stjr
196146040Stjr/* Entry point for POSIX code.  */
197146040Stjr
198146040Stjr/* regexec searches for a given pattern, specified by PREG, in the
199146040Stjr   string STRING.
200146040Stjr
201146040Stjr   If NMATCH is zero or REG_NOSUB was set in the cflags argument to
202146040Stjr   `regcomp', we ignore PMATCH.  Otherwise, we assume PMATCH has at
203146040Stjr   least NMATCH elements, and we set them to the offsets of the
204146040Stjr   corresponding matched substrings.
205146040Stjr
206146040Stjr   EFLAGS specifies `execution flags' which affect matching: if
207146040Stjr   REG_NOTBOL is set, then ^ does not match at the beginning of the
208146040Stjr   string; if REG_NOTEOL is set, then $ does not match at the end.
209146040Stjr
210146040Stjr   We return 0 if we find a match and REG_NOMATCH if not.  */
211146040Stjr
212146040Stjrint
213146040Stjrregexec (preg, string, nmatch, pmatch, eflags)
214146040Stjr    const regex_t *__restrict preg;
215146040Stjr    const char *__restrict string;
216146040Stjr    size_t nmatch;
217146040Stjr    regmatch_t pmatch[];
218146040Stjr    int eflags;
219146040Stjr{
220146040Stjr  reg_errcode_t err;
221146040Stjr  int start, length;
222146040Stjr
223146040Stjr  if (eflags & ~(REG_NOTBOL | REG_NOTEOL | REG_STARTEND))
224146040Stjr    return REG_BADPAT;
225146040Stjr
226146040Stjr  if (eflags & REG_STARTEND)
227146040Stjr    {
228146040Stjr      start = pmatch[0].rm_so;
229146040Stjr      length = pmatch[0].rm_eo;
230146040Stjr    }
231146040Stjr  else
232146040Stjr    {
233146040Stjr      start = 0;
234146040Stjr      length = strlen (string);
235146040Stjr    }
236146040Stjr  if (preg->no_sub)
237146040Stjr    err = re_search_internal (preg, string, length, start, length - start,
238146040Stjr			      length, 0, NULL, eflags);
239146040Stjr  else
240146040Stjr    err = re_search_internal (preg, string, length, start, length - start,
241146040Stjr			      length, nmatch, pmatch, eflags);
242146040Stjr  return err != REG_NOERROR;
243146040Stjr}
244146040Stjr
245146040Stjr#ifdef _LIBC
246146040Stjr# include <shlib-compat.h>
247146040Stjrversioned_symbol (libc, __regexec, regexec, GLIBC_2_3_4);
248146040Stjr
249146040Stjr# if SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_3_4)
250146040Stjr__typeof__ (__regexec) __compat_regexec;
251146040Stjr
252146040Stjrint
253146040Stjrattribute_compat_text_section
254146040Stjr__compat_regexec (const regex_t *__restrict preg,
255146040Stjr		  const char *__restrict string, size_t nmatch,
256146040Stjr		  regmatch_t pmatch[], int eflags)
257146040Stjr{
258146040Stjr  return regexec (preg, string, nmatch, pmatch,
259146040Stjr		  eflags & (REG_NOTBOL | REG_NOTEOL));
260146040Stjr}
261146040Stjrcompat_symbol (libc, __compat_regexec, regexec, GLIBC_2_0);
262146040Stjr# endif
263146040Stjr#endif
264146040Stjr
265146040Stjr/* Entry points for GNU code.  */
266146040Stjr
267146040Stjr/* re_match, re_search, re_match_2, re_search_2
268146040Stjr
269146040Stjr   The former two functions operate on STRING with length LENGTH,
270146040Stjr   while the later two operate on concatenation of STRING1 and STRING2
271146040Stjr   with lengths LENGTH1 and LENGTH2, respectively.
272146040Stjr
273146040Stjr   re_match() matches the compiled pattern in BUFP against the string,
274146040Stjr   starting at index START.
275146040Stjr
276146040Stjr   re_search() first tries matching at index START, then it tries to match
277146040Stjr   starting from index START + 1, and so on.  The last start position tried
278146040Stjr   is START + RANGE.  (Thus RANGE = 0 forces re_search to operate the same
279146040Stjr   way as re_match().)
280146040Stjr
281146040Stjr   The parameter STOP of re_{match,search}_2 specifies that no match exceeding
282146040Stjr   the first STOP characters of the concatenation of the strings should be
283146040Stjr   concerned.
284146040Stjr
285146040Stjr   If REGS is not NULL, and BUFP->no_sub is not set, the offsets of the match
286146040Stjr   and all groups is stroed in REGS.  (For the "_2" variants, the offsets are
287146040Stjr   computed relative to the concatenation, not relative to the individual
288146040Stjr   strings.)
289146040Stjr
290146040Stjr   On success, re_match* functions return the length of the match, re_search*
291146040Stjr   return the position of the start of the match.  Return value -1 means no
292146040Stjr   match was found and -2 indicates an internal error.  */
293146040Stjr
294146040Stjrint
295146040Stjrre_match (bufp, string, length, start, regs)
296146040Stjr    struct re_pattern_buffer *bufp;
297146040Stjr    const char *string;
298146040Stjr    int length, start;
299146040Stjr    struct re_registers *regs;
300146040Stjr{
301146040Stjr  return re_search_stub (bufp, string, length, start, 0, length, regs, 1);
302146040Stjr}
303146040Stjr#ifdef _LIBC
304146040Stjrweak_alias (__re_match, re_match)
305146040Stjr#endif
306146040Stjr
307146040Stjrint
308146040Stjrre_search (bufp, string, length, start, range, regs)
309146040Stjr    struct re_pattern_buffer *bufp;
310146040Stjr    const char *string;
311146040Stjr    int length, start, range;
312146040Stjr    struct re_registers *regs;
313146040Stjr{
314146040Stjr  return re_search_stub (bufp, string, length, start, range, length, regs, 0);
315146040Stjr}
316146040Stjr#ifdef _LIBC
317146040Stjrweak_alias (__re_search, re_search)
318146040Stjr#endif
319146040Stjr
320146040Stjrint
321146040Stjrre_match_2 (bufp, string1, length1, string2, length2, start, regs, stop)
322146040Stjr    struct re_pattern_buffer *bufp;
323146040Stjr    const char *string1, *string2;
324146040Stjr    int length1, length2, start, stop;
325146040Stjr    struct re_registers *regs;
326146040Stjr{
327146040Stjr  return re_search_2_stub (bufp, string1, length1, string2, length2,
328146040Stjr			   start, 0, regs, stop, 1);
329146040Stjr}
330146040Stjr#ifdef _LIBC
331146040Stjrweak_alias (__re_match_2, re_match_2)
332146040Stjr#endif
333146040Stjr
334146040Stjrint
335146040Stjrre_search_2 (bufp, string1, length1, string2, length2, start, range, regs, stop)
336146040Stjr    struct re_pattern_buffer *bufp;
337146040Stjr    const char *string1, *string2;
338146040Stjr    int length1, length2, start, range, stop;
339146040Stjr    struct re_registers *regs;
340146040Stjr{
341146040Stjr  return re_search_2_stub (bufp, string1, length1, string2, length2,
342146040Stjr			   start, range, regs, stop, 0);
343146040Stjr}
344146040Stjr#ifdef _LIBC
345146040Stjrweak_alias (__re_search_2, re_search_2)
346146040Stjr#endif
347146040Stjr
348146040Stjrstatic int
349146040Stjrre_search_2_stub (bufp, string1, length1, string2, length2, start, range, regs,
350146040Stjr		  stop, ret_len)
351146040Stjr    struct re_pattern_buffer *bufp;
352146040Stjr    const char *string1, *string2;
353146040Stjr    int length1, length2, start, range, stop, ret_len;
354146040Stjr    struct re_registers *regs;
355146040Stjr{
356146040Stjr  const char *str;
357146040Stjr  int rval;
358146040Stjr  int len = length1 + length2;
359146040Stjr  int free_str = 0;
360146040Stjr
361146040Stjr  if (BE (length1 < 0 || length2 < 0 || stop < 0, 0))
362146040Stjr    return -2;
363146040Stjr
364146040Stjr  /* Concatenate the strings.  */
365146040Stjr  if (length2 > 0)
366146040Stjr    if (length1 > 0)
367146040Stjr      {
368146040Stjr	char *s = re_malloc (char, len);
369146040Stjr
370146040Stjr	if (BE (s == NULL, 0))
371146040Stjr	  return -2;
372146040Stjr	memcpy (s, string1, length1);
373146040Stjr	memcpy (s + length1, string2, length2);
374146040Stjr	str = s;
375146040Stjr	free_str = 1;
376146040Stjr      }
377146040Stjr    else
378146040Stjr      str = string2;
379146040Stjr  else
380146040Stjr    str = string1;
381146040Stjr
382146040Stjr  rval = re_search_stub (bufp, str, len, start, range, stop, regs,
383146040Stjr			 ret_len);
384146040Stjr  if (free_str)
385146040Stjr    re_free ((char *) str);
386146040Stjr  return rval;
387146040Stjr}
388146040Stjr
389146040Stjr/* The parameters have the same meaning as those of re_search.
390146040Stjr   Additional parameters:
391146040Stjr   If RET_LEN is nonzero the length of the match is returned (re_match style);
392146040Stjr   otherwise the position of the match is returned.  */
393146040Stjr
394146040Stjrstatic int
395146040Stjrre_search_stub (bufp, string, length, start, range, stop, regs, ret_len)
396146040Stjr    struct re_pattern_buffer *bufp;
397146040Stjr    const char *string;
398146040Stjr    int length, start, range, stop, ret_len;
399146040Stjr    struct re_registers *regs;
400146040Stjr{
401146040Stjr  reg_errcode_t result;
402146040Stjr  regmatch_t *pmatch;
403146040Stjr  int nregs, rval;
404146040Stjr  int eflags = 0;
405146040Stjr
406146040Stjr  /* Check for out-of-range.  */
407146040Stjr  if (BE (start < 0 || start > length, 0))
408146040Stjr    return -1;
409146040Stjr  if (BE (start + range > length, 0))
410146040Stjr    range = length - start;
411146040Stjr  else if (BE (start + range < 0, 0))
412146040Stjr    range = -start;
413146040Stjr
414146040Stjr  eflags |= (bufp->not_bol) ? REG_NOTBOL : 0;
415146040Stjr  eflags |= (bufp->not_eol) ? REG_NOTEOL : 0;
416146040Stjr
417146040Stjr  /* Compile fastmap if we haven't yet.  */
418146040Stjr  if (range > 0 && bufp->fastmap != NULL && !bufp->fastmap_accurate)
419146040Stjr    re_compile_fastmap (bufp);
420146040Stjr
421146040Stjr  if (BE (bufp->no_sub, 0))
422146040Stjr    regs = NULL;
423146040Stjr
424146040Stjr  /* We need at least 1 register.  */
425146040Stjr  if (regs == NULL)
426146040Stjr    nregs = 1;
427146040Stjr  else if (BE (bufp->regs_allocated == REGS_FIXED &&
428146040Stjr	       regs->num_regs < bufp->re_nsub + 1, 0))
429146040Stjr    {
430146040Stjr      nregs = regs->num_regs;
431146040Stjr      if (BE (nregs < 1, 0))
432146040Stjr	{
433146040Stjr	  /* Nothing can be copied to regs.  */
434146040Stjr	  regs = NULL;
435146040Stjr	  nregs = 1;
436146040Stjr	}
437146040Stjr    }
438146040Stjr  else
439146040Stjr    nregs = bufp->re_nsub + 1;
440146040Stjr  pmatch = re_malloc (regmatch_t, nregs);
441146040Stjr  if (BE (pmatch == NULL, 0))
442146040Stjr    return -2;
443146040Stjr
444146040Stjr  result = re_search_internal (bufp, string, length, start, range, stop,
445146040Stjr			       nregs, pmatch, eflags);
446146040Stjr
447146040Stjr  rval = 0;
448146040Stjr
449146040Stjr  /* I hope we needn't fill ther regs with -1's when no match was found.  */
450146040Stjr  if (result != REG_NOERROR)
451146040Stjr    rval = -1;
452146040Stjr  else if (regs != NULL)
453146040Stjr    {
454146040Stjr      /* If caller wants register contents data back, copy them.  */
455146040Stjr      bufp->regs_allocated = re_copy_regs (regs, pmatch, nregs,
456146040Stjr					   bufp->regs_allocated);
457146040Stjr      if (BE (bufp->regs_allocated == REGS_UNALLOCATED, 0))
458146040Stjr	rval = -2;
459146040Stjr    }
460146040Stjr
461146040Stjr  if (BE (rval == 0, 1))
462146040Stjr    {
463146040Stjr      if (ret_len)
464146040Stjr	{
465146040Stjr	  assert (pmatch[0].rm_so == start);
466146040Stjr	  rval = pmatch[0].rm_eo - start;
467146040Stjr	}
468146040Stjr      else
469146040Stjr	rval = pmatch[0].rm_so;
470146040Stjr    }
471146040Stjr  re_free (pmatch);
472146040Stjr  return rval;
473146040Stjr}
474146040Stjr
475146040Stjrstatic unsigned
476146040Stjrre_copy_regs (regs, pmatch, nregs, regs_allocated)
477146040Stjr    struct re_registers *regs;
478146040Stjr    regmatch_t *pmatch;
479146040Stjr    int nregs, regs_allocated;
480146040Stjr{
481146040Stjr  int rval = REGS_REALLOCATE;
482146040Stjr  int i;
483146040Stjr  int need_regs = nregs + 1;
484146040Stjr  /* We need one extra element beyond `num_regs' for the `-1' marker GNU code
485146040Stjr     uses.  */
486146040Stjr
487146040Stjr  /* Have the register data arrays been allocated?  */
488146040Stjr  if (regs_allocated == REGS_UNALLOCATED)
489146040Stjr    { /* No.  So allocate them with malloc.  */
490146040Stjr      regs->start = re_malloc (regoff_t, need_regs);
491146040Stjr      regs->end = re_malloc (regoff_t, need_regs);
492146040Stjr      if (BE (regs->start == NULL, 0) || BE (regs->end == NULL, 0))
493146040Stjr	return REGS_UNALLOCATED;
494146040Stjr      regs->num_regs = need_regs;
495146040Stjr    }
496146040Stjr  else if (regs_allocated == REGS_REALLOCATE)
497146040Stjr    { /* Yes.  If we need more elements than were already
498146040Stjr	 allocated, reallocate them.  If we need fewer, just
499146040Stjr	 leave it alone.  */
500146040Stjr      if (BE (need_regs > regs->num_regs, 0))
501146040Stjr	{
502146040Stjr	  regoff_t *new_start = re_realloc (regs->start, regoff_t, need_regs);
503146040Stjr	  regoff_t *new_end = re_realloc (regs->end, regoff_t, need_regs);
504146040Stjr	  if (BE (new_start == NULL, 0) || BE (new_end == NULL, 0))
505146040Stjr	    return REGS_UNALLOCATED;
506146040Stjr	  regs->start = new_start;
507146040Stjr	  regs->end = new_end;
508146040Stjr	  regs->num_regs = need_regs;
509146040Stjr	}
510146040Stjr    }
511146040Stjr  else
512146040Stjr    {
513146040Stjr      assert (regs_allocated == REGS_FIXED);
514146040Stjr      /* This function may not be called with REGS_FIXED and nregs too big.  */
515146040Stjr      assert (regs->num_regs >= nregs);
516146040Stjr      rval = REGS_FIXED;
517146040Stjr    }
518146040Stjr
519146040Stjr  /* Copy the regs.  */
520146040Stjr  for (i = 0; i < nregs; ++i)
521146040Stjr    {
522146040Stjr      regs->start[i] = pmatch[i].rm_so;
523146040Stjr      regs->end[i] = pmatch[i].rm_eo;
524146040Stjr    }
525146040Stjr  for ( ; i < regs->num_regs; ++i)
526146040Stjr    regs->start[i] = regs->end[i] = -1;
527146040Stjr
528146040Stjr  return rval;
529146040Stjr}
530146040Stjr
531146040Stjr/* Set REGS to hold NUM_REGS registers, storing them in STARTS and
532146040Stjr   ENDS.  Subsequent matches using PATTERN_BUFFER and REGS will use
533146040Stjr   this memory for recording register information.  STARTS and ENDS
534146040Stjr   must be allocated using the malloc library routine, and must each
535146040Stjr   be at least NUM_REGS * sizeof (regoff_t) bytes long.
536146040Stjr
537146040Stjr   If NUM_REGS == 0, then subsequent matches should allocate their own
538146040Stjr   register data.
539146040Stjr
540146040Stjr   Unless this function is called, the first search or match using
541146040Stjr   PATTERN_BUFFER will allocate its own register data, without
542146040Stjr   freeing the old data.  */
543146040Stjr
544146040Stjrvoid
545146040Stjrre_set_registers (bufp, regs, num_regs, starts, ends)
546146040Stjr    struct re_pattern_buffer *bufp;
547146040Stjr    struct re_registers *regs;
548146040Stjr    unsigned num_regs;
549146040Stjr    regoff_t *starts, *ends;
550146040Stjr{
551146040Stjr  if (num_regs)
552146040Stjr    {
553146040Stjr      bufp->regs_allocated = REGS_REALLOCATE;
554146040Stjr      regs->num_regs = num_regs;
555146040Stjr      regs->start = starts;
556146040Stjr      regs->end = ends;
557146040Stjr    }
558146040Stjr  else
559146040Stjr    {
560146040Stjr      bufp->regs_allocated = REGS_UNALLOCATED;
561146040Stjr      regs->num_regs = 0;
562146040Stjr      regs->start = regs->end = (regoff_t *) 0;
563146040Stjr    }
564146040Stjr}
565146040Stjr#ifdef _LIBC
566146040Stjrweak_alias (__re_set_registers, re_set_registers)
567146040Stjr#endif
568146040Stjr
569146040Stjr/* Entry points compatible with 4.2 BSD regex library.  We don't define
570146040Stjr   them unless specifically requested.  */
571146040Stjr
572146040Stjr#if defined _REGEX_RE_COMP || defined _LIBC
573146040Stjrint
574146040Stjr# ifdef _LIBC
575146040Stjrweak_function
576146040Stjr# endif
577146040Stjrre_exec (s)
578146040Stjr     const char *s;
579146040Stjr{
580146040Stjr  return 0 == regexec (&re_comp_buf, s, 0, NULL, 0);
581146040Stjr}
582146040Stjr#endif /* _REGEX_RE_COMP */
583146040Stjr
584146040Stjr/* Internal entry point.  */
585146040Stjr
586146040Stjr/* Searches for a compiled pattern PREG in the string STRING, whose
587146040Stjr   length is LENGTH.  NMATCH, PMATCH, and EFLAGS have the same
588146040Stjr   mingings with regexec.  START, and RANGE have the same meanings
589146040Stjr   with re_search.
590146040Stjr   Return REG_NOERROR if we find a match, and REG_NOMATCH if not,
591146040Stjr   otherwise return the error code.
592146040Stjr   Note: We assume front end functions already check ranges.
593146040Stjr   (START + RANGE >= 0 && START + RANGE <= LENGTH)  */
594146040Stjr
595146040Stjrstatic reg_errcode_t
596146040Stjrre_search_internal (preg, string, length, start, range, stop, nmatch, pmatch,
597146040Stjr		    eflags)
598146040Stjr    const regex_t *preg;
599146040Stjr    const char *string;
600146040Stjr    int length, start, range, stop, eflags;
601146040Stjr    size_t nmatch;
602146040Stjr    regmatch_t pmatch[];
603146040Stjr{
604146040Stjr  reg_errcode_t err;
605146040Stjr  re_dfa_t *dfa = (re_dfa_t *)preg->buffer;
606146040Stjr  int left_lim, right_lim, incr;
607146040Stjr  int fl_longest_match, match_first, match_kind, match_last = -1;
608146040Stjr  int extra_nmatch;
609146040Stjr  int sb, ch;
610146040Stjr#if defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L)
611146040Stjr  re_match_context_t mctx = { .dfa = dfa };
612146040Stjr#else
613146040Stjr  re_match_context_t mctx;
614146040Stjr#endif
615146040Stjr  char *fastmap = (preg->fastmap != NULL && preg->fastmap_accurate
616146040Stjr		   && range && !preg->can_be_null) ? preg->fastmap : NULL;
617146040Stjr  unsigned RE_TRANSLATE_TYPE t = (unsigned RE_TRANSLATE_TYPE) preg->translate;
618146040Stjr
619146040Stjr#if !(defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L))
620146040Stjr  memset (&mctx, '\0', sizeof (re_match_context_t));
621146040Stjr  mctx.dfa = dfa;
622146040Stjr#endif
623146040Stjr
624146040Stjr  extra_nmatch = (nmatch > preg->re_nsub) ? nmatch - (preg->re_nsub + 1) : 0;
625146040Stjr  nmatch -= extra_nmatch;
626146040Stjr
627146040Stjr  /* Check if the DFA haven't been compiled.  */
628146040Stjr  if (BE (preg->used == 0 || dfa->init_state == NULL
629146040Stjr	  || dfa->init_state_word == NULL || dfa->init_state_nl == NULL
630146040Stjr	  || dfa->init_state_begbuf == NULL, 0))
631146040Stjr    return REG_NOMATCH;
632146040Stjr
633146040Stjr#ifdef DEBUG
634146040Stjr  /* We assume front-end functions already check them.  */
635146040Stjr  assert (start + range >= 0 && start + range <= length);
636146040Stjr#endif
637146040Stjr
638146040Stjr  /* If initial states with non-begbuf contexts have no elements,
639146040Stjr     the regex must be anchored.  If preg->newline_anchor is set,
640146040Stjr     we'll never use init_state_nl, so do not check it.  */
641146040Stjr  if (dfa->init_state->nodes.nelem == 0
642146040Stjr      && dfa->init_state_word->nodes.nelem == 0
643146040Stjr      && (dfa->init_state_nl->nodes.nelem == 0
644146040Stjr	  || !preg->newline_anchor))
645146040Stjr    {
646146040Stjr      if (start != 0 && start + range != 0)
647146040Stjr        return REG_NOMATCH;
648146040Stjr      start = range = 0;
649146040Stjr    }
650146040Stjr
651146040Stjr  /* We must check the longest matching, if nmatch > 0.  */
652146040Stjr  fl_longest_match = (nmatch != 0 || dfa->nbackref);
653146040Stjr
654146040Stjr  err = re_string_allocate (&mctx.input, string, length, dfa->nodes_len + 1,
655146040Stjr			    preg->translate, preg->syntax & RE_ICASE, dfa);
656146040Stjr  if (BE (err != REG_NOERROR, 0))
657146040Stjr    goto free_return;
658146040Stjr  mctx.input.stop = stop;
659146040Stjr  mctx.input.raw_stop = stop;
660146040Stjr  mctx.input.newline_anchor = preg->newline_anchor;
661146040Stjr
662146040Stjr  err = match_ctx_init (&mctx, eflags, dfa->nbackref * 2);
663146040Stjr  if (BE (err != REG_NOERROR, 0))
664146040Stjr    goto free_return;
665146040Stjr
666146040Stjr  /* We will log all the DFA states through which the dfa pass,
667146040Stjr     if nmatch > 1, or this dfa has "multibyte node", which is a
668146040Stjr     back-reference or a node which can accept multibyte character or
669146040Stjr     multi character collating element.  */
670146040Stjr  if (nmatch > 1 || dfa->has_mb_node)
671146040Stjr    {
672146040Stjr      mctx.state_log = re_malloc (re_dfastate_t *, mctx.input.bufs_len + 1);
673146040Stjr      if (BE (mctx.state_log == NULL, 0))
674146040Stjr	{
675146040Stjr	  err = REG_ESPACE;
676146040Stjr	  goto free_return;
677146040Stjr	}
678146040Stjr    }
679146040Stjr  else
680146040Stjr    mctx.state_log = NULL;
681146040Stjr
682146040Stjr  match_first = start;
683146040Stjr  mctx.input.tip_context = (eflags & REG_NOTBOL) ? CONTEXT_BEGBUF
684146040Stjr			   : CONTEXT_NEWLINE | CONTEXT_BEGBUF;
685146040Stjr
686146040Stjr  /* Check incrementally whether of not the input string match.  */
687146040Stjr  incr = (range < 0) ? -1 : 1;
688146040Stjr  left_lim = (range < 0) ? start + range : start;
689146040Stjr  right_lim = (range < 0) ? start : start + range;
690146040Stjr  sb = dfa->mb_cur_max == 1;
691146040Stjr  match_kind =
692146040Stjr    (fastmap
693146040Stjr     ? ((sb || !(preg->syntax & RE_ICASE || t) ? 4 : 0)
694146040Stjr	| (range >= 0 ? 2 : 0)
695146040Stjr	| (t != NULL ? 1 : 0))
696146040Stjr     : 8);
697146040Stjr
698146040Stjr  for (;; match_first += incr)
699146040Stjr    {
700146040Stjr      err = REG_NOMATCH;
701146040Stjr      if (match_first < left_lim || right_lim < match_first)
702146040Stjr	goto free_return;
703146040Stjr
704146040Stjr      /* Advance as rapidly as possible through the string, until we
705146040Stjr	 find a plausible place to start matching.  This may be done
706146040Stjr	 with varying efficiency, so there are various possibilities:
707146040Stjr	 only the most common of them are specialized, in order to
708146040Stjr	 save on code size.  We use a switch statement for speed.  */
709146040Stjr      switch (match_kind)
710146040Stjr	{
711146040Stjr	case 8:
712146040Stjr	  /* No fastmap.  */
713146040Stjr	  break;
714146040Stjr
715146040Stjr	case 7:
716146040Stjr	  /* Fastmap with single-byte translation, match forward.  */
717146040Stjr	  while (BE (match_first < right_lim, 1)
718146040Stjr		 && !fastmap[t[(unsigned char) string[match_first]]])
719146040Stjr	    ++match_first;
720146040Stjr	  goto forward_match_found_start_or_reached_end;
721146040Stjr
722146040Stjr	case 6:
723146040Stjr	  /* Fastmap without translation, match forward.  */
724146040Stjr	  while (BE (match_first < right_lim, 1)
725146040Stjr		 && !fastmap[(unsigned char) string[match_first]])
726146040Stjr	    ++match_first;
727146040Stjr
728146040Stjr	forward_match_found_start_or_reached_end:
729146040Stjr	  if (BE (match_first == right_lim, 0))
730146040Stjr	    {
731146040Stjr	      ch = match_first >= length
732146040Stjr		       ? 0 : (unsigned char) string[match_first];
733146040Stjr	      if (!fastmap[t ? t[ch] : ch])
734146040Stjr		goto free_return;
735146040Stjr	    }
736146040Stjr	  break;
737146040Stjr
738146040Stjr	case 4:
739146040Stjr	case 5:
740146040Stjr	  /* Fastmap without multi-byte translation, match backwards.  */
741146040Stjr	  while (match_first >= left_lim)
742146040Stjr	    {
743146040Stjr	      ch = match_first >= length
744146040Stjr		       ? 0 : (unsigned char) string[match_first];
745146040Stjr	      if (fastmap[t ? t[ch] : ch])
746146040Stjr		break;
747146040Stjr	      --match_first;
748146040Stjr	    }
749146040Stjr	  if (match_first < left_lim)
750146040Stjr	    goto free_return;
751146040Stjr	  break;
752146040Stjr
753146040Stjr	default:
754146040Stjr	  /* In this case, we can't determine easily the current byte,
755146040Stjr	     since it might be a component byte of a multibyte
756146040Stjr	     character.  Then we use the constructed buffer instead.  */
757146040Stjr	  for (;;)
758146040Stjr	    {
759146040Stjr	      /* If MATCH_FIRST is out of the valid range, reconstruct the
760146040Stjr		 buffers.  */
761146040Stjr	      unsigned int offset = match_first - mctx.input.raw_mbs_idx;
762146040Stjr	      if (BE (offset >= (unsigned int) mctx.input.valid_raw_len, 0))
763146040Stjr		{
764146040Stjr		  err = re_string_reconstruct (&mctx.input, match_first,
765146040Stjr					       eflags);
766146040Stjr		  if (BE (err != REG_NOERROR, 0))
767146040Stjr		    goto free_return;
768146040Stjr
769146040Stjr		  offset = match_first - mctx.input.raw_mbs_idx;
770146040Stjr		}
771146040Stjr	      /* If MATCH_FIRST is out of the buffer, leave it as '\0'.
772146040Stjr		 Note that MATCH_FIRST must not be smaller than 0.  */
773146040Stjr	      ch = (match_first >= length
774146040Stjr		    ? 0 : re_string_byte_at (&mctx.input, offset));
775146040Stjr	      if (fastmap[ch])
776146040Stjr		break;
777146040Stjr	      match_first += incr;
778146040Stjr	      if (match_first < left_lim || match_first > right_lim)
779146040Stjr	        {
780146040Stjr	          err = REG_NOMATCH;
781146040Stjr	          goto free_return;
782146040Stjr	        }
783146040Stjr	    }
784146040Stjr	  break;
785146040Stjr	}
786146040Stjr
787146040Stjr      /* Reconstruct the buffers so that the matcher can assume that
788146040Stjr	 the matching starts from the beginning of the buffer.  */
789146040Stjr      err = re_string_reconstruct (&mctx.input, match_first, eflags);
790146040Stjr      if (BE (err != REG_NOERROR, 0))
791146040Stjr	goto free_return;
792146040Stjr
793146040Stjr#ifdef RE_ENABLE_I18N
794146040Stjr     /* Don't consider this char as a possible match start if it part,
795146040Stjr	yet isn't the head, of a multibyte character.  */
796146040Stjr      if (!sb && !re_string_first_byte (&mctx.input, 0))
797146040Stjr	continue;
798146040Stjr#endif
799146040Stjr
800146040Stjr      /* It seems to be appropriate one, then use the matcher.  */
801146040Stjr      /* We assume that the matching starts from 0.  */
802146040Stjr      mctx.state_log_top = mctx.nbkref_ents = mctx.max_mb_elem_len = 0;
803146040Stjr      match_last = check_matching (&mctx, fl_longest_match,
804146040Stjr				   range >= 0 ? &match_first : NULL);
805146040Stjr      if (match_last != -1)
806146040Stjr	{
807146040Stjr	  if (BE (match_last == -2, 0))
808146040Stjr	    {
809146040Stjr	      err = REG_ESPACE;
810146040Stjr	      goto free_return;
811146040Stjr	    }
812146040Stjr	  else
813146040Stjr	    {
814146040Stjr	      mctx.match_last = match_last;
815146040Stjr	      if ((!preg->no_sub && nmatch > 1) || dfa->nbackref)
816146040Stjr		{
817146040Stjr		  re_dfastate_t *pstate = mctx.state_log[match_last];
818146040Stjr		  mctx.last_node = check_halt_state_context (&mctx, pstate,
819146040Stjr							     match_last);
820146040Stjr		}
821146040Stjr	      if ((!preg->no_sub && nmatch > 1 && dfa->has_plural_match)
822146040Stjr		  || dfa->nbackref)
823146040Stjr		{
824146040Stjr		  err = prune_impossible_nodes (&mctx);
825146040Stjr		  if (err == REG_NOERROR)
826146040Stjr		    break;
827146040Stjr		  if (BE (err != REG_NOMATCH, 0))
828146040Stjr		    goto free_return;
829146040Stjr		  match_last = -1;
830146040Stjr		}
831146040Stjr	      else
832146040Stjr		break; /* We found a match.  */
833146040Stjr	    }
834146040Stjr	}
835146040Stjr
836146040Stjr      match_ctx_clean (&mctx);
837146040Stjr    }
838146040Stjr
839146040Stjr#ifdef DEBUG
840146040Stjr  assert (match_last != -1);
841146040Stjr  assert (err == REG_NOERROR);
842146040Stjr#endif
843146040Stjr
844146040Stjr  /* Set pmatch[] if we need.  */
845146040Stjr  if (nmatch > 0)
846146040Stjr    {
847146040Stjr      int reg_idx;
848146040Stjr
849146040Stjr      /* Initialize registers.  */
850146040Stjr      for (reg_idx = 1; reg_idx < nmatch; ++reg_idx)
851146040Stjr	pmatch[reg_idx].rm_so = pmatch[reg_idx].rm_eo = -1;
852146040Stjr
853146040Stjr      /* Set the points where matching start/end.  */
854146040Stjr      pmatch[0].rm_so = 0;
855146040Stjr      pmatch[0].rm_eo = mctx.match_last;
856146040Stjr
857146040Stjr      if (!preg->no_sub && nmatch > 1)
858146040Stjr	{
859146040Stjr	  err = set_regs (preg, &mctx, nmatch, pmatch,
860146040Stjr			  dfa->has_plural_match && dfa->nbackref > 0);
861146040Stjr	  if (BE (err != REG_NOERROR, 0))
862146040Stjr	    goto free_return;
863146040Stjr	}
864146040Stjr
865146040Stjr      /* At last, add the offset to the each registers, since we slided
866146040Stjr	 the buffers so that we could assume that the matching starts
867146040Stjr	 from 0.  */
868146040Stjr      for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
869146040Stjr	if (pmatch[reg_idx].rm_so != -1)
870146040Stjr	  {
871146040Stjr#ifdef RE_ENABLE_I18N
872146040Stjr	    if (BE (mctx.input.offsets_needed != 0, 0))
873146040Stjr	      {
874146040Stjr		if (pmatch[reg_idx].rm_so == mctx.input.valid_len)
875146040Stjr		  pmatch[reg_idx].rm_so += mctx.input.valid_raw_len - mctx.input.valid_len;
876146040Stjr		else
877146040Stjr		  pmatch[reg_idx].rm_so = mctx.input.offsets[pmatch[reg_idx].rm_so];
878146040Stjr		if (pmatch[reg_idx].rm_eo == mctx.input.valid_len)
879146040Stjr		  pmatch[reg_idx].rm_eo += mctx.input.valid_raw_len - mctx.input.valid_len;
880146040Stjr		else
881146040Stjr		  pmatch[reg_idx].rm_eo = mctx.input.offsets[pmatch[reg_idx].rm_eo];
882146040Stjr	      }
883146040Stjr#else
884146040Stjr	    assert (mctx.input.offsets_needed == 0);
885146040Stjr#endif
886146040Stjr	    pmatch[reg_idx].rm_so += match_first;
887146040Stjr	    pmatch[reg_idx].rm_eo += match_first;
888146040Stjr	  }
889146040Stjr      for (reg_idx = 0; reg_idx < extra_nmatch; ++reg_idx)
890146040Stjr	{
891146040Stjr	  pmatch[nmatch + reg_idx].rm_so = -1;
892146040Stjr	  pmatch[nmatch + reg_idx].rm_eo = -1;
893146040Stjr	}
894146040Stjr
895146040Stjr      if (dfa->subexp_map)
896146040Stjr        for (reg_idx = 0; reg_idx + 1 < nmatch; reg_idx++)
897146040Stjr          if (dfa->subexp_map[reg_idx] != reg_idx)
898146040Stjr            {
899146040Stjr              pmatch[reg_idx + 1].rm_so
900146040Stjr                = pmatch[dfa->subexp_map[reg_idx] + 1].rm_so;
901146040Stjr              pmatch[reg_idx + 1].rm_eo
902146040Stjr                = pmatch[dfa->subexp_map[reg_idx] + 1].rm_eo;
903146040Stjr            }
904146040Stjr    }
905146040Stjr
906146040Stjr free_return:
907146040Stjr  re_free (mctx.state_log);
908146040Stjr  if (dfa->nbackref)
909146040Stjr    match_ctx_free (&mctx);
910146040Stjr  re_string_destruct (&mctx.input);
911146040Stjr  return err;
912146040Stjr}
913146040Stjr
914146040Stjrstatic reg_errcode_t
915146040Stjrprune_impossible_nodes (mctx)
916146040Stjr     re_match_context_t *mctx;
917146040Stjr{
918146040Stjr  re_dfa_t *const dfa = mctx->dfa;
919146040Stjr  int halt_node, match_last;
920146040Stjr  reg_errcode_t ret;
921146040Stjr  re_dfastate_t **sifted_states;
922146040Stjr  re_dfastate_t **lim_states = NULL;
923146040Stjr  re_sift_context_t sctx;
924146040Stjr#ifdef DEBUG
925146040Stjr  assert (mctx->state_log != NULL);
926146040Stjr#endif
927146040Stjr  match_last = mctx->match_last;
928146040Stjr  halt_node = mctx->last_node;
929146040Stjr  sifted_states = re_malloc (re_dfastate_t *, match_last + 1);
930146040Stjr  if (BE (sifted_states == NULL, 0))
931146040Stjr    {
932146040Stjr      ret = REG_ESPACE;
933146040Stjr      goto free_return;
934146040Stjr    }
935146040Stjr  if (dfa->nbackref)
936146040Stjr    {
937146040Stjr      lim_states = re_malloc (re_dfastate_t *, match_last + 1);
938146040Stjr      if (BE (lim_states == NULL, 0))
939146040Stjr	{
940146040Stjr	  ret = REG_ESPACE;
941146040Stjr	  goto free_return;
942146040Stjr	}
943146040Stjr      while (1)
944146040Stjr	{
945146040Stjr	  memset (lim_states, '\0',
946146040Stjr		  sizeof (re_dfastate_t *) * (match_last + 1));
947146040Stjr	  sift_ctx_init (&sctx, sifted_states, lim_states, halt_node,
948146040Stjr			 match_last);
949146040Stjr	  ret = sift_states_backward (mctx, &sctx);
950146040Stjr	  re_node_set_free (&sctx.limits);
951146040Stjr	  if (BE (ret != REG_NOERROR, 0))
952146040Stjr	      goto free_return;
953146040Stjr	  if (sifted_states[0] != NULL || lim_states[0] != NULL)
954146040Stjr	    break;
955146040Stjr	  do
956146040Stjr	    {
957146040Stjr	      --match_last;
958146040Stjr	      if (match_last < 0)
959146040Stjr		{
960146040Stjr		  ret = REG_NOMATCH;
961146040Stjr		  goto free_return;
962146040Stjr		}
963146040Stjr	    } while (mctx->state_log[match_last] == NULL
964146040Stjr		     || !mctx->state_log[match_last]->halt);
965146040Stjr	  halt_node = check_halt_state_context (mctx,
966146040Stjr						mctx->state_log[match_last],
967146040Stjr						match_last);
968146040Stjr	}
969146040Stjr      ret = merge_state_array (dfa, sifted_states, lim_states,
970146040Stjr			       match_last + 1);
971146040Stjr      re_free (lim_states);
972146040Stjr      lim_states = NULL;
973146040Stjr      if (BE (ret != REG_NOERROR, 0))
974146040Stjr	goto free_return;
975146040Stjr    }
976146040Stjr  else
977146040Stjr    {
978146040Stjr      sift_ctx_init (&sctx, sifted_states, lim_states, halt_node, match_last);
979146040Stjr      ret = sift_states_backward (mctx, &sctx);
980146040Stjr      re_node_set_free (&sctx.limits);
981146040Stjr      if (BE (ret != REG_NOERROR, 0))
982146040Stjr	goto free_return;
983146040Stjr    }
984146040Stjr  re_free (mctx->state_log);
985146040Stjr  mctx->state_log = sifted_states;
986146040Stjr  sifted_states = NULL;
987146040Stjr  mctx->last_node = halt_node;
988146040Stjr  mctx->match_last = match_last;
989146040Stjr  ret = REG_NOERROR;
990146040Stjr free_return:
991146040Stjr  re_free (sifted_states);
992146040Stjr  re_free (lim_states);
993146040Stjr  return ret;
994146040Stjr}
995146040Stjr
996146040Stjr/* Acquire an initial state and return it.
997146040Stjr   We must select appropriate initial state depending on the context,
998146040Stjr   since initial states may have constraints like "\<", "^", etc..  */
999146040Stjr
1000146040Stjrstatic inline re_dfastate_t *
1001146040Stjracquire_init_state_context (err, mctx, idx)
1002146040Stjr     reg_errcode_t *err;
1003146040Stjr     const re_match_context_t *mctx;
1004146040Stjr     int idx;
1005146040Stjr{
1006146040Stjr  re_dfa_t *const dfa = mctx->dfa;
1007146040Stjr  if (dfa->init_state->has_constraint)
1008146040Stjr    {
1009146040Stjr      unsigned int context;
1010146040Stjr      context = re_string_context_at (&mctx->input, idx - 1, mctx->eflags);
1011146040Stjr      if (IS_WORD_CONTEXT (context))
1012146040Stjr	return dfa->init_state_word;
1013146040Stjr      else if (IS_ORDINARY_CONTEXT (context))
1014146040Stjr	return dfa->init_state;
1015146040Stjr      else if (IS_BEGBUF_CONTEXT (context) && IS_NEWLINE_CONTEXT (context))
1016146040Stjr	return dfa->init_state_begbuf;
1017146040Stjr      else if (IS_NEWLINE_CONTEXT (context))
1018146040Stjr	return dfa->init_state_nl;
1019146040Stjr      else if (IS_BEGBUF_CONTEXT (context))
1020146040Stjr	{
1021146040Stjr	  /* It is relatively rare case, then calculate on demand.  */
1022146040Stjr	  return re_acquire_state_context (err, dfa,
1023146040Stjr					   dfa->init_state->entrance_nodes,
1024146040Stjr					   context);
1025146040Stjr	}
1026146040Stjr      else
1027146040Stjr	/* Must not happen?  */
1028146040Stjr	return dfa->init_state;
1029146040Stjr    }
1030146040Stjr  else
1031146040Stjr    return dfa->init_state;
1032146040Stjr}
1033146040Stjr
1034146040Stjr/* Check whether the regular expression match input string INPUT or not,
1035146040Stjr   and return the index where the matching end, return -1 if not match,
1036146040Stjr   or return -2 in case of an error.
1037146040Stjr   FL_LONGEST_MATCH means we want the POSIX longest matching.
1038146040Stjr   If P_MATCH_FIRST is not NULL, and the match fails, it is set to the
1039146040Stjr   next place where we may want to try matching.
1040146040Stjr   Note that the matcher assume that the maching starts from the current
1041146040Stjr   index of the buffer.  */
1042146040Stjr
1043146040Stjrstatic int
1044146040Stjrcheck_matching (mctx, fl_longest_match, p_match_first)
1045146040Stjr    re_match_context_t *mctx;
1046146040Stjr    int fl_longest_match;
1047146040Stjr    int *p_match_first;
1048146040Stjr{
1049146040Stjr  re_dfa_t *const dfa = mctx->dfa;
1050146040Stjr  reg_errcode_t err;
1051146040Stjr  int match = 0;
1052146040Stjr  int match_last = -1;
1053146040Stjr  int cur_str_idx = re_string_cur_idx (&mctx->input);
1054146040Stjr  re_dfastate_t *cur_state;
1055146040Stjr  int at_init_state = p_match_first != NULL;
1056146040Stjr  int next_start_idx = cur_str_idx;
1057146040Stjr
1058146040Stjr  err = REG_NOERROR;
1059146040Stjr  cur_state = acquire_init_state_context (&err, mctx, cur_str_idx);
1060146040Stjr  /* An initial state must not be NULL (invalid).  */
1061146040Stjr  if (BE (cur_state == NULL, 0))
1062146040Stjr    {
1063146040Stjr      assert (err == REG_ESPACE);
1064146040Stjr      return -2;
1065146040Stjr    }
1066146040Stjr
1067146040Stjr  if (mctx->state_log != NULL)
1068146040Stjr    {
1069146040Stjr      mctx->state_log[cur_str_idx] = cur_state;
1070146040Stjr
1071146040Stjr      /* Check OP_OPEN_SUBEXP in the initial state in case that we use them
1072146040Stjr	 later.  E.g. Processing back references.  */
1073146040Stjr      if (BE (dfa->nbackref, 0))
1074146040Stjr	{
1075146040Stjr	  at_init_state = 0;
1076146040Stjr	  err = check_subexp_matching_top (mctx, &cur_state->nodes, 0);
1077146040Stjr	  if (BE (err != REG_NOERROR, 0))
1078146040Stjr	    return err;
1079146040Stjr
1080146040Stjr	  if (cur_state->has_backref)
1081146040Stjr	    {
1082146040Stjr	      err = transit_state_bkref (mctx, &cur_state->nodes);
1083146040Stjr	      if (BE (err != REG_NOERROR, 0))
1084146040Stjr	        return err;
1085146040Stjr	    }
1086146040Stjr	}
1087146040Stjr    }
1088146040Stjr
1089146040Stjr  /* If the RE accepts NULL string.  */
1090146040Stjr  if (BE (cur_state->halt, 0))
1091146040Stjr    {
1092146040Stjr      if (!cur_state->has_constraint
1093146040Stjr	  || check_halt_state_context (mctx, cur_state, cur_str_idx))
1094146040Stjr	{
1095146040Stjr	  if (!fl_longest_match)
1096146040Stjr	    return cur_str_idx;
1097146040Stjr	  else
1098146040Stjr	    {
1099146040Stjr	      match_last = cur_str_idx;
1100146040Stjr	      match = 1;
1101146040Stjr	    }
1102146040Stjr	}
1103146040Stjr    }
1104146040Stjr
1105146040Stjr  while (!re_string_eoi (&mctx->input))
1106146040Stjr    {
1107146040Stjr      re_dfastate_t *old_state = cur_state;
1108146040Stjr      int next_char_idx = re_string_cur_idx (&mctx->input) + 1;
1109146040Stjr
1110146040Stjr      if (BE (next_char_idx >= mctx->input.bufs_len, 0)
1111146040Stjr          || (BE (next_char_idx >= mctx->input.valid_len, 0)
1112146040Stjr              && mctx->input.valid_len < mctx->input.len))
1113146040Stjr        {
1114146040Stjr          err = extend_buffers (mctx);
1115146040Stjr          if (BE (err != REG_NOERROR, 0))
1116146040Stjr	    {
1117146040Stjr	      assert (err == REG_ESPACE);
1118146040Stjr	      return -2;
1119146040Stjr	    }
1120146040Stjr        }
1121146040Stjr
1122146040Stjr      cur_state = transit_state (&err, mctx, cur_state);
1123146040Stjr      if (mctx->state_log != NULL)
1124146040Stjr	cur_state = merge_state_with_log (&err, mctx, cur_state);
1125146040Stjr
1126146040Stjr      if (cur_state == NULL)
1127146040Stjr	{
1128146040Stjr	  /* Reached the invalid state or an error.  Try to recover a valid
1129146040Stjr	     state using the state log, if available and if we have not
1130146040Stjr	     already found a valid (even if not the longest) match.  */
1131146040Stjr	  if (BE (err != REG_NOERROR, 0))
1132146040Stjr	    return -2;
1133146040Stjr
1134146040Stjr	  if (mctx->state_log == NULL
1135146040Stjr	      || (match && !fl_longest_match)
1136146040Stjr	      || (cur_state = find_recover_state (&err, mctx)) == NULL)
1137146040Stjr	    break;
1138146040Stjr	}
1139146040Stjr
1140146040Stjr      if (BE (at_init_state, 0))
1141146040Stjr	{
1142146040Stjr	  if (old_state == cur_state)
1143146040Stjr	    next_start_idx = next_char_idx;
1144146040Stjr	  else
1145146040Stjr	    at_init_state = 0;
1146146040Stjr	}
1147146040Stjr
1148146040Stjr      if (cur_state->halt)
1149146040Stjr	{
1150146040Stjr	  /* Reached a halt state.
1151146040Stjr	     Check the halt state can satisfy the current context.  */
1152146040Stjr	  if (!cur_state->has_constraint
1153146040Stjr	      || check_halt_state_context (mctx, cur_state,
1154146040Stjr					   re_string_cur_idx (&mctx->input)))
1155146040Stjr	    {
1156146040Stjr	      /* We found an appropriate halt state.  */
1157146040Stjr	      match_last = re_string_cur_idx (&mctx->input);
1158146040Stjr	      match = 1;
1159146040Stjr
1160146040Stjr	      /* We found a match, do not modify match_first below.  */
1161146040Stjr	      p_match_first = NULL;
1162146040Stjr	      if (!fl_longest_match)
1163146040Stjr		break;
1164146040Stjr	    }
1165146040Stjr	}
1166146040Stjr    }
1167146040Stjr
1168146040Stjr  if (p_match_first)
1169146040Stjr    *p_match_first += next_start_idx;
1170146040Stjr
1171146040Stjr  return match_last;
1172146040Stjr}
1173146040Stjr
1174146040Stjr/* Check NODE match the current context.  */
1175146040Stjr
1176146040Stjrstatic int check_halt_node_context (dfa, node, context)
1177146040Stjr    const re_dfa_t *dfa;
1178146040Stjr    int node;
1179146040Stjr    unsigned int context;
1180146040Stjr{
1181146040Stjr  re_token_type_t type = dfa->nodes[node].type;
1182146040Stjr  unsigned int constraint = dfa->nodes[node].constraint;
1183146040Stjr  if (type != END_OF_RE)
1184146040Stjr    return 0;
1185146040Stjr  if (!constraint)
1186146040Stjr    return 1;
1187146040Stjr  if (NOT_SATISFY_NEXT_CONSTRAINT (constraint, context))
1188146040Stjr    return 0;
1189146040Stjr  return 1;
1190146040Stjr}
1191146040Stjr
1192146040Stjr/* Check the halt state STATE match the current context.
1193146040Stjr   Return 0 if not match, if the node, STATE has, is a halt node and
1194146040Stjr   match the context, return the node.  */
1195146040Stjr
1196146040Stjrstatic int
1197146040Stjrcheck_halt_state_context (mctx, state, idx)
1198146040Stjr    const re_match_context_t *mctx;
1199146040Stjr    const re_dfastate_t *state;
1200146040Stjr    int idx;
1201146040Stjr{
1202146040Stjr  int i;
1203146040Stjr  unsigned int context;
1204146040Stjr#ifdef DEBUG
1205146040Stjr  assert (state->halt);
1206146040Stjr#endif
1207146040Stjr  context = re_string_context_at (&mctx->input, idx, mctx->eflags);
1208146040Stjr  for (i = 0; i < state->nodes.nelem; ++i)
1209146040Stjr    if (check_halt_node_context (mctx->dfa, state->nodes.elems[i], context))
1210146040Stjr      return state->nodes.elems[i];
1211146040Stjr  return 0;
1212146040Stjr}
1213146040Stjr
1214146040Stjr/* Compute the next node to which "NFA" transit from NODE("NFA" is a NFA
1215146040Stjr   corresponding to the DFA).
1216146040Stjr   Return the destination node, and update EPS_VIA_NODES, return -1 in case
1217146040Stjr   of errors.  */
1218146040Stjr
1219146040Stjrstatic int
1220146040Stjrproceed_next_node (mctx, nregs, regs, pidx, node, eps_via_nodes, fs)
1221146040Stjr    const re_match_context_t *mctx;
1222146040Stjr    regmatch_t *regs;
1223146040Stjr    int nregs, *pidx, node;
1224146040Stjr    re_node_set *eps_via_nodes;
1225146040Stjr    struct re_fail_stack_t *fs;
1226146040Stjr{
1227146040Stjr  re_dfa_t *const dfa = mctx->dfa;
1228146040Stjr  int i, err, dest_node;
1229146040Stjr  dest_node = -1;
1230146040Stjr  if (IS_EPSILON_NODE (dfa->nodes[node].type))
1231146040Stjr    {
1232146040Stjr      re_node_set *cur_nodes = &mctx->state_log[*pidx]->nodes;
1233146040Stjr      re_node_set *edests = &dfa->edests[node];
1234146040Stjr      int dest_node;
1235146040Stjr      err = re_node_set_insert (eps_via_nodes, node);
1236146040Stjr      if (BE (err < 0, 0))
1237146040Stjr	return -2;
1238146040Stjr      /* Pick up a valid destination, or return -1 if none is found.  */
1239146040Stjr      for (dest_node = -1, i = 0; i < edests->nelem; ++i)
1240146040Stjr	{
1241146040Stjr	  int candidate = edests->elems[i];
1242146040Stjr	  if (!re_node_set_contains (cur_nodes, candidate))
1243146040Stjr	    continue;
1244146040Stjr          if (dest_node == -1)
1245146040Stjr	    dest_node = candidate;
1246146040Stjr
1247146040Stjr          else
1248146040Stjr	    {
1249146040Stjr	      /* In order to avoid infinite loop like "(a*)*", return the second
1250146040Stjr	         epsilon-transition if the first was already considered.  */
1251146040Stjr	      if (re_node_set_contains (eps_via_nodes, dest_node))
1252146040Stjr	        return candidate;
1253146040Stjr
1254146040Stjr	      /* Otherwise, push the second epsilon-transition on the fail stack.  */
1255146040Stjr	      else if (fs != NULL
1256146040Stjr		       && push_fail_stack (fs, *pidx, candidate, nregs, regs,
1257146040Stjr				           eps_via_nodes))
1258146040Stjr		return -2;
1259146040Stjr
1260146040Stjr	      /* We know we are going to exit.  */
1261146040Stjr	      break;
1262146040Stjr	    }
1263146040Stjr	}
1264146040Stjr      return dest_node;
1265146040Stjr    }
1266146040Stjr  else
1267146040Stjr    {
1268146040Stjr      int naccepted = 0;
1269146040Stjr      re_token_type_t type = dfa->nodes[node].type;
1270146040Stjr
1271146040Stjr#ifdef RE_ENABLE_I18N
1272146040Stjr      if (dfa->nodes[node].accept_mb)
1273146040Stjr	naccepted = check_node_accept_bytes (dfa, node, &mctx->input, *pidx);
1274146040Stjr      else
1275146040Stjr#endif /* RE_ENABLE_I18N */
1276146040Stjr      if (type == OP_BACK_REF)
1277146040Stjr	{
1278146040Stjr	  int subexp_idx = dfa->nodes[node].opr.idx + 1;
1279146040Stjr	  naccepted = regs[subexp_idx].rm_eo - regs[subexp_idx].rm_so;
1280146040Stjr	  if (fs != NULL)
1281146040Stjr	    {
1282146040Stjr	      if (regs[subexp_idx].rm_so == -1 || regs[subexp_idx].rm_eo == -1)
1283146040Stjr		return -1;
1284146040Stjr	      else if (naccepted)
1285146040Stjr		{
1286146040Stjr		  char *buf = (char *) re_string_get_buffer (&mctx->input);
1287146040Stjr		  if (memcmp (buf + regs[subexp_idx].rm_so, buf + *pidx,
1288146040Stjr			      naccepted) != 0)
1289146040Stjr		    return -1;
1290146040Stjr		}
1291146040Stjr	    }
1292146040Stjr
1293146040Stjr	  if (naccepted == 0)
1294146040Stjr	    {
1295146040Stjr	      err = re_node_set_insert (eps_via_nodes, node);
1296146040Stjr	      if (BE (err < 0, 0))
1297146040Stjr		return -2;
1298146040Stjr	      dest_node = dfa->edests[node].elems[0];
1299146040Stjr	      if (re_node_set_contains (&mctx->state_log[*pidx]->nodes,
1300146040Stjr					dest_node))
1301146040Stjr		return dest_node;
1302146040Stjr	    }
1303146040Stjr	}
1304146040Stjr
1305146040Stjr      if (naccepted != 0
1306146040Stjr	  || check_node_accept (mctx, dfa->nodes + node, *pidx))
1307146040Stjr	{
1308146040Stjr	  dest_node = dfa->nexts[node];
1309146040Stjr	  *pidx = (naccepted == 0) ? *pidx + 1 : *pidx + naccepted;
1310146040Stjr	  if (fs && (*pidx > mctx->match_last || mctx->state_log[*pidx] == NULL
1311146040Stjr		     || !re_node_set_contains (&mctx->state_log[*pidx]->nodes,
1312146040Stjr					       dest_node)))
1313146040Stjr	    return -1;
1314146040Stjr	  re_node_set_empty (eps_via_nodes);
1315146040Stjr	  return dest_node;
1316146040Stjr	}
1317146040Stjr    }
1318146040Stjr  return -1;
1319146040Stjr}
1320146040Stjr
1321146040Stjrstatic reg_errcode_t
1322146040Stjrpush_fail_stack (fs, str_idx, dest_node, nregs, regs, eps_via_nodes)
1323146040Stjr     struct re_fail_stack_t *fs;
1324146040Stjr     int str_idx, dest_node, nregs;
1325146040Stjr     regmatch_t *regs;
1326146040Stjr     re_node_set *eps_via_nodes;
1327146040Stjr{
1328146040Stjr  reg_errcode_t err;
1329146040Stjr  int num = fs->num++;
1330146040Stjr  if (fs->num == fs->alloc)
1331146040Stjr    {
1332146040Stjr      struct re_fail_stack_ent_t *new_array;
1333146040Stjr      new_array = realloc (fs->stack, (sizeof (struct re_fail_stack_ent_t)
1334146040Stjr				       * fs->alloc * 2));
1335146040Stjr      if (new_array == NULL)
1336146040Stjr	return REG_ESPACE;
1337146040Stjr      fs->alloc *= 2;
1338146040Stjr      fs->stack = new_array;
1339146040Stjr    }
1340146040Stjr  fs->stack[num].idx = str_idx;
1341146040Stjr  fs->stack[num].node = dest_node;
1342146040Stjr  fs->stack[num].regs = re_malloc (regmatch_t, nregs);
1343146040Stjr  if (fs->stack[num].regs == NULL)
1344146040Stjr    return REG_ESPACE;
1345146040Stjr  memcpy (fs->stack[num].regs, regs, sizeof (regmatch_t) * nregs);
1346146040Stjr  err = re_node_set_init_copy (&fs->stack[num].eps_via_nodes, eps_via_nodes);
1347146040Stjr  return err;
1348146040Stjr}
1349146040Stjr
1350146040Stjrstatic int
1351146040Stjrpop_fail_stack (fs, pidx, nregs, regs, eps_via_nodes)
1352146040Stjr     struct re_fail_stack_t *fs;
1353146040Stjr     int *pidx, nregs;
1354146040Stjr     regmatch_t *regs;
1355146040Stjr     re_node_set *eps_via_nodes;
1356146040Stjr{
1357146040Stjr  int num = --fs->num;
1358146040Stjr  assert (num >= 0);
1359146040Stjr  *pidx = fs->stack[num].idx;
1360146040Stjr  memcpy (regs, fs->stack[num].regs, sizeof (regmatch_t) * nregs);
1361146040Stjr  re_node_set_free (eps_via_nodes);
1362146040Stjr  re_free (fs->stack[num].regs);
1363146040Stjr  *eps_via_nodes = fs->stack[num].eps_via_nodes;
1364146040Stjr  return fs->stack[num].node;
1365146040Stjr}
1366146040Stjr
1367146040Stjr/* Set the positions where the subexpressions are starts/ends to registers
1368146040Stjr   PMATCH.
1369146040Stjr   Note: We assume that pmatch[0] is already set, and
1370146040Stjr   pmatch[i].rm_so == pmatch[i].rm_eo == -1 for 0 < i < nmatch.  */
1371146040Stjr
1372146040Stjrstatic reg_errcode_t
1373146040Stjrset_regs (preg, mctx, nmatch, pmatch, fl_backtrack)
1374146040Stjr     const regex_t *preg;
1375146040Stjr     const re_match_context_t *mctx;
1376146040Stjr     size_t nmatch;
1377146040Stjr     regmatch_t *pmatch;
1378146040Stjr     int fl_backtrack;
1379146040Stjr{
1380146040Stjr  re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
1381146040Stjr  int idx, cur_node;
1382146040Stjr  re_node_set eps_via_nodes;
1383146040Stjr  struct re_fail_stack_t *fs;
1384146040Stjr  struct re_fail_stack_t fs_body = { 0, 2, NULL };
1385146040Stjr  regmatch_t *prev_idx_match;
1386146040Stjr
1387146040Stjr#ifdef DEBUG
1388146040Stjr  assert (nmatch > 1);
1389146040Stjr  assert (mctx->state_log != NULL);
1390146040Stjr#endif
1391146040Stjr  if (fl_backtrack)
1392146040Stjr    {
1393146040Stjr      fs = &fs_body;
1394146040Stjr      fs->stack = re_malloc (struct re_fail_stack_ent_t, fs->alloc);
1395146040Stjr      if (fs->stack == NULL)
1396146040Stjr	return REG_ESPACE;
1397146040Stjr    }
1398146040Stjr  else
1399146040Stjr    fs = NULL;
1400146040Stjr
1401146040Stjr  cur_node = dfa->init_node;
1402146040Stjr  re_node_set_init_empty (&eps_via_nodes);
1403146040Stjr
1404146040Stjr  prev_idx_match = (regmatch_t *) alloca (sizeof (regmatch_t) * nmatch);
1405146040Stjr  memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch);
1406146040Stjr
1407146040Stjr  for (idx = pmatch[0].rm_so; idx <= pmatch[0].rm_eo ;)
1408146040Stjr    {
1409146040Stjr      update_regs (dfa, pmatch, prev_idx_match, cur_node, idx, nmatch);
1410146040Stjr
1411146040Stjr      if (idx == pmatch[0].rm_eo && cur_node == mctx->last_node)
1412146040Stjr	{
1413146040Stjr	  int reg_idx;
1414146040Stjr	  if (fs)
1415146040Stjr	    {
1416146040Stjr	      for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
1417146040Stjr		if (pmatch[reg_idx].rm_so > -1 && pmatch[reg_idx].rm_eo == -1)
1418146040Stjr		  break;
1419146040Stjr	      if (reg_idx == nmatch)
1420146040Stjr		{
1421146040Stjr		  re_node_set_free (&eps_via_nodes);
1422146040Stjr		  return free_fail_stack_return (fs);
1423146040Stjr		}
1424146040Stjr	      cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,
1425146040Stjr					 &eps_via_nodes);
1426146040Stjr	    }
1427146040Stjr	  else
1428146040Stjr	    {
1429146040Stjr	      re_node_set_free (&eps_via_nodes);
1430146040Stjr	      return REG_NOERROR;
1431146040Stjr	    }
1432146040Stjr	}
1433146040Stjr
1434146040Stjr      /* Proceed to next node.  */
1435146040Stjr      cur_node = proceed_next_node (mctx, nmatch, pmatch, &idx, cur_node,
1436146040Stjr				    &eps_via_nodes, fs);
1437146040Stjr
1438146040Stjr      if (BE (cur_node < 0, 0))
1439146040Stjr	{
1440146040Stjr	  if (BE (cur_node == -2, 0))
1441146040Stjr	    {
1442146040Stjr	      re_node_set_free (&eps_via_nodes);
1443146040Stjr	      free_fail_stack_return (fs);
1444146040Stjr	      return REG_ESPACE;
1445146040Stjr	    }
1446146040Stjr	  if (fs)
1447146040Stjr	    cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,
1448146040Stjr				       &eps_via_nodes);
1449146040Stjr	  else
1450146040Stjr	    {
1451146040Stjr	      re_node_set_free (&eps_via_nodes);
1452146040Stjr	      return REG_NOMATCH;
1453146040Stjr	    }
1454146040Stjr	}
1455146040Stjr    }
1456146040Stjr  re_node_set_free (&eps_via_nodes);
1457146040Stjr  return free_fail_stack_return (fs);
1458146040Stjr}
1459146040Stjr
1460146040Stjrstatic reg_errcode_t
1461146040Stjrfree_fail_stack_return (fs)
1462146040Stjr     struct re_fail_stack_t *fs;
1463146040Stjr{
1464146040Stjr  if (fs)
1465146040Stjr    {
1466146040Stjr      int fs_idx;
1467146040Stjr      for (fs_idx = 0; fs_idx < fs->num; ++fs_idx)
1468146040Stjr	{
1469146040Stjr	  re_node_set_free (&fs->stack[fs_idx].eps_via_nodes);
1470146040Stjr	  re_free (fs->stack[fs_idx].regs);
1471146040Stjr	}
1472146040Stjr      re_free (fs->stack);
1473146040Stjr    }
1474146040Stjr  return REG_NOERROR;
1475146040Stjr}
1476146040Stjr
1477146040Stjrstatic void
1478146040Stjrupdate_regs (dfa, pmatch, prev_idx_match, cur_node, cur_idx, nmatch)
1479146040Stjr     re_dfa_t *dfa;
1480146040Stjr     regmatch_t *pmatch, *prev_idx_match;
1481146040Stjr     int cur_node, cur_idx, nmatch;
1482146040Stjr{
1483146040Stjr  int type = dfa->nodes[cur_node].type;
1484146040Stjr  if (type == OP_OPEN_SUBEXP)
1485146040Stjr    {
1486146040Stjr      int reg_num = dfa->nodes[cur_node].opr.idx + 1;
1487146040Stjr
1488146040Stjr      /* We are at the first node of this sub expression.  */
1489146040Stjr      if (reg_num < nmatch)
1490146040Stjr	{
1491146040Stjr	  pmatch[reg_num].rm_so = cur_idx;
1492146040Stjr	  pmatch[reg_num].rm_eo = -1;
1493146040Stjr	}
1494146040Stjr    }
1495146040Stjr  else if (type == OP_CLOSE_SUBEXP)
1496146040Stjr    {
1497146040Stjr      int reg_num = dfa->nodes[cur_node].opr.idx + 1;
1498146040Stjr      if (reg_num < nmatch)
1499146040Stjr	{
1500146040Stjr	  /* We are at the last node of this sub expression.  */
1501146040Stjr	  if (pmatch[reg_num].rm_so < cur_idx)
1502146040Stjr	    {
1503146040Stjr	      pmatch[reg_num].rm_eo = cur_idx;
1504146040Stjr	      /* This is a non-empty match or we are not inside an optional
1505146040Stjr		 subexpression.  Accept this right away.  */
1506146040Stjr	      memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch);
1507146040Stjr	    }
1508146040Stjr	  else
1509146040Stjr	    {
1510146040Stjr	      if (dfa->nodes[cur_node].opt_subexp
1511146040Stjr		  && prev_idx_match[reg_num].rm_so != -1)
1512146040Stjr		/* We transited through an empty match for an optional
1513146040Stjr		   subexpression, like (a?)*, and this is not the subexp's
1514146040Stjr		   first match.  Copy back the old content of the registers
1515146040Stjr		   so that matches of an inner subexpression are undone as
1516146040Stjr		   well, like in ((a?))*.  */
1517146040Stjr		memcpy (pmatch, prev_idx_match, sizeof (regmatch_t) * nmatch);
1518146040Stjr	      else
1519146040Stjr		/* We completed a subexpression, but it may be part of
1520146040Stjr		   an optional one, so do not update PREV_IDX_MATCH.  */
1521146040Stjr		pmatch[reg_num].rm_eo = cur_idx;
1522146040Stjr	    }
1523146040Stjr	}
1524146040Stjr    }
1525146040Stjr}
1526146040Stjr
1527146040Stjr/* This function checks the STATE_LOG from the SCTX->last_str_idx to 0
1528146040Stjr   and sift the nodes in each states according to the following rules.
1529146040Stjr   Updated state_log will be wrote to STATE_LOG.
1530146040Stjr
1531146040Stjr   Rules: We throw away the Node `a' in the STATE_LOG[STR_IDX] if...
1532146040Stjr     1. When STR_IDX == MATCH_LAST(the last index in the state_log):
1533146040Stjr	If `a' isn't the LAST_NODE and `a' can't epsilon transit to
1534146040Stjr	the LAST_NODE, we throw away the node `a'.
1535146040Stjr     2. When 0 <= STR_IDX < MATCH_LAST and `a' accepts
1536146040Stjr	string `s' and transit to `b':
1537146040Stjr	i. If 'b' isn't in the STATE_LOG[STR_IDX+strlen('s')], we throw
1538146040Stjr	   away the node `a'.
1539146040Stjr	ii. If 'b' is in the STATE_LOG[STR_IDX+strlen('s')] but 'b' is
1540146040Stjr	    thrown away, we throw away the node `a'.
1541146040Stjr     3. When 0 <= STR_IDX < MATCH_LAST and 'a' epsilon transit to 'b':
1542146040Stjr	i. If 'b' isn't in the STATE_LOG[STR_IDX], we throw away the
1543146040Stjr	   node `a'.
1544146040Stjr	ii. If 'b' is in the STATE_LOG[STR_IDX] but 'b' is thrown away,
1545146040Stjr	    we throw away the node `a'.  */
1546146040Stjr
1547146040Stjr#define STATE_NODE_CONTAINS(state,node) \
1548146040Stjr  ((state) != NULL && re_node_set_contains (&(state)->nodes, node))
1549146040Stjr
1550146040Stjrstatic reg_errcode_t
1551146040Stjrsift_states_backward (mctx, sctx)
1552146040Stjr     re_match_context_t *mctx;
1553146040Stjr     re_sift_context_t *sctx;
1554146040Stjr{
1555146040Stjr  reg_errcode_t err;
1556146040Stjr  int null_cnt = 0;
1557146040Stjr  int str_idx = sctx->last_str_idx;
1558146040Stjr  re_node_set cur_dest;
1559146040Stjr
1560146040Stjr#ifdef DEBUG
1561146040Stjr  assert (mctx->state_log != NULL && mctx->state_log[str_idx] != NULL);
1562146040Stjr#endif
1563146040Stjr
1564146040Stjr  /* Build sifted state_log[str_idx].  It has the nodes which can epsilon
1565146040Stjr     transit to the last_node and the last_node itself.  */
1566146040Stjr  err = re_node_set_init_1 (&cur_dest, sctx->last_node);
1567146040Stjr  if (BE (err != REG_NOERROR, 0))
1568146040Stjr    return err;
1569146040Stjr  err = update_cur_sifted_state (mctx, sctx, str_idx, &cur_dest);
1570146040Stjr  if (BE (err != REG_NOERROR, 0))
1571146040Stjr    goto free_return;
1572146040Stjr
1573146040Stjr  /* Then check each states in the state_log.  */
1574146040Stjr  while (str_idx > 0)
1575146040Stjr    {
1576146040Stjr      /* Update counters.  */
1577146040Stjr      null_cnt = (sctx->sifted_states[str_idx] == NULL) ? null_cnt + 1 : 0;
1578146040Stjr      if (null_cnt > mctx->max_mb_elem_len)
1579146040Stjr	{
1580146040Stjr	  memset (sctx->sifted_states, '\0',
1581146040Stjr		  sizeof (re_dfastate_t *) * str_idx);
1582146040Stjr	  re_node_set_free (&cur_dest);
1583146040Stjr	  return REG_NOERROR;
1584146040Stjr	}
1585146040Stjr      re_node_set_empty (&cur_dest);
1586146040Stjr      --str_idx;
1587146040Stjr
1588146040Stjr      if (mctx->state_log[str_idx])
1589146040Stjr	{
1590146040Stjr	  err = build_sifted_states (mctx, sctx, str_idx, &cur_dest);
1591146040Stjr          if (BE (err != REG_NOERROR, 0))
1592146040Stjr	    goto free_return;
1593146040Stjr	}
1594146040Stjr
1595146040Stjr      /* Add all the nodes which satisfy the following conditions:
1596146040Stjr	 - It can epsilon transit to a node in CUR_DEST.
1597146040Stjr	 - It is in CUR_SRC.
1598146040Stjr	 And update state_log.  */
1599146040Stjr      err = update_cur_sifted_state (mctx, sctx, str_idx, &cur_dest);
1600146040Stjr      if (BE (err != REG_NOERROR, 0))
1601146040Stjr	goto free_return;
1602146040Stjr    }
1603146040Stjr  err = REG_NOERROR;
1604146040Stjr free_return:
1605146040Stjr  re_node_set_free (&cur_dest);
1606146040Stjr  return err;
1607146040Stjr}
1608146040Stjr
1609146040Stjrstatic reg_errcode_t
1610146040Stjrbuild_sifted_states (mctx, sctx, str_idx, cur_dest)
1611146040Stjr     re_match_context_t *mctx;
1612146040Stjr     re_sift_context_t *sctx;
1613146040Stjr     int str_idx;
1614146040Stjr     re_node_set *cur_dest;
1615146040Stjr{
1616146040Stjr  re_dfa_t *const dfa = mctx->dfa;
1617146040Stjr  re_node_set *cur_src = &mctx->state_log[str_idx]->non_eps_nodes;
1618146040Stjr  int i;
1619146040Stjr
1620146040Stjr  /* Then build the next sifted state.
1621146040Stjr     We build the next sifted state on `cur_dest', and update
1622146040Stjr     `sifted_states[str_idx]' with `cur_dest'.
1623146040Stjr     Note:
1624146040Stjr     `cur_dest' is the sifted state from `state_log[str_idx + 1]'.
1625146040Stjr     `cur_src' points the node_set of the old `state_log[str_idx]'
1626146040Stjr     (with the epsilon nodes pre-filtered out).  */
1627146040Stjr  for (i = 0; i < cur_src->nelem; i++)
1628146040Stjr    {
1629146040Stjr      int prev_node = cur_src->elems[i];
1630146040Stjr      int naccepted = 0;
1631146040Stjr      int ret;
1632146040Stjr
1633146040Stjr#ifdef DEBUG
1634146040Stjr      re_token_type_t type = dfa->nodes[prev_node].type;
1635146040Stjr      assert (!IS_EPSILON_NODE (type));
1636146040Stjr#endif
1637146040Stjr#ifdef RE_ENABLE_I18N
1638146040Stjr      /* If the node may accept `multi byte'.  */
1639146040Stjr      if (dfa->nodes[prev_node].accept_mb)
1640146040Stjr	naccepted = sift_states_iter_mb (mctx, sctx, prev_node,
1641146040Stjr					 str_idx, sctx->last_str_idx);
1642146040Stjr#endif /* RE_ENABLE_I18N */
1643146040Stjr
1644146040Stjr      /* We don't check backreferences here.
1645146040Stjr	 See update_cur_sifted_state().  */
1646146040Stjr      if (!naccepted
1647146040Stjr	  && check_node_accept (mctx, dfa->nodes + prev_node, str_idx)
1648146040Stjr	  && STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + 1],
1649146040Stjr				  dfa->nexts[prev_node]))
1650146040Stjr	naccepted = 1;
1651146040Stjr
1652146040Stjr      if (naccepted == 0)
1653146040Stjr	continue;
1654146040Stjr
1655146040Stjr      if (sctx->limits.nelem)
1656146040Stjr	{
1657146040Stjr	  int to_idx = str_idx + naccepted;
1658146040Stjr	  if (check_dst_limits (mctx, &sctx->limits,
1659146040Stjr				dfa->nexts[prev_node], to_idx,
1660146040Stjr				prev_node, str_idx))
1661146040Stjr	    continue;
1662146040Stjr	}
1663146040Stjr      ret = re_node_set_insert (cur_dest, prev_node);
1664146040Stjr      if (BE (ret == -1, 0))
1665146040Stjr	return REG_ESPACE;
1666146040Stjr    }
1667146040Stjr
1668146040Stjr  return REG_NOERROR;
1669146040Stjr}
1670146040Stjr
1671146040Stjr/* Helper functions.  */
1672146040Stjr
1673146040Stjrstatic reg_errcode_t
1674146040Stjrclean_state_log_if_needed (mctx, next_state_log_idx)
1675146040Stjr    re_match_context_t *mctx;
1676146040Stjr    int next_state_log_idx;
1677146040Stjr{
1678146040Stjr  int top = mctx->state_log_top;
1679146040Stjr
1680146040Stjr  if (next_state_log_idx >= mctx->input.bufs_len
1681146040Stjr      || (next_state_log_idx >= mctx->input.valid_len
1682146040Stjr	  && mctx->input.valid_len < mctx->input.len))
1683146040Stjr    {
1684146040Stjr      reg_errcode_t err;
1685146040Stjr      err = extend_buffers (mctx);
1686146040Stjr      if (BE (err != REG_NOERROR, 0))
1687146040Stjr	return err;
1688146040Stjr    }
1689146040Stjr
1690146040Stjr  if (top < next_state_log_idx)
1691146040Stjr    {
1692146040Stjr      memset (mctx->state_log + top + 1, '\0',
1693146040Stjr	      sizeof (re_dfastate_t *) * (next_state_log_idx - top));
1694146040Stjr      mctx->state_log_top = next_state_log_idx;
1695146040Stjr    }
1696146040Stjr  return REG_NOERROR;
1697146040Stjr}
1698146040Stjr
1699146040Stjrstatic reg_errcode_t
1700146040Stjrmerge_state_array (dfa, dst, src, num)
1701146040Stjr     re_dfa_t *dfa;
1702146040Stjr     re_dfastate_t **dst;
1703146040Stjr     re_dfastate_t **src;
1704146040Stjr     int num;
1705146040Stjr{
1706146040Stjr  int st_idx;
1707146040Stjr  reg_errcode_t err;
1708146040Stjr  for (st_idx = 0; st_idx < num; ++st_idx)
1709146040Stjr    {
1710146040Stjr      if (dst[st_idx] == NULL)
1711146040Stjr	dst[st_idx] = src[st_idx];
1712146040Stjr      else if (src[st_idx] != NULL)
1713146040Stjr	{
1714146040Stjr	  re_node_set merged_set;
1715146040Stjr	  err = re_node_set_init_union (&merged_set, &dst[st_idx]->nodes,
1716146040Stjr					&src[st_idx]->nodes);
1717146040Stjr	  if (BE (err != REG_NOERROR, 0))
1718146040Stjr	    return err;
1719146040Stjr	  dst[st_idx] = re_acquire_state (&err, dfa, &merged_set);
1720146040Stjr	  re_node_set_free (&merged_set);
1721146040Stjr	  if (BE (err != REG_NOERROR, 0))
1722146040Stjr	    return err;
1723146040Stjr	}
1724146040Stjr    }
1725146040Stjr  return REG_NOERROR;
1726146040Stjr}
1727146040Stjr
1728146040Stjrstatic reg_errcode_t
1729146040Stjrupdate_cur_sifted_state (mctx, sctx, str_idx, dest_nodes)
1730146040Stjr     re_match_context_t *mctx;
1731146040Stjr     re_sift_context_t *sctx;
1732146040Stjr     int str_idx;
1733146040Stjr     re_node_set *dest_nodes;
1734146040Stjr{
1735146040Stjr  re_dfa_t *const dfa = mctx->dfa;
1736146040Stjr  reg_errcode_t err;
1737146040Stjr  const re_node_set *candidates;
1738146040Stjr  candidates = ((mctx->state_log[str_idx] == NULL) ? NULL
1739146040Stjr		: &mctx->state_log[str_idx]->nodes);
1740146040Stjr
1741146040Stjr  if (dest_nodes->nelem == 0)
1742146040Stjr    sctx->sifted_states[str_idx] = NULL;
1743146040Stjr  else
1744146040Stjr    {
1745146040Stjr      if (candidates)
1746146040Stjr	{
1747146040Stjr	  /* At first, add the nodes which can epsilon transit to a node in
1748146040Stjr	     DEST_NODE.  */
1749146040Stjr	  err = add_epsilon_src_nodes (dfa, dest_nodes, candidates);
1750146040Stjr	  if (BE (err != REG_NOERROR, 0))
1751146040Stjr	    return err;
1752146040Stjr
1753146040Stjr	  /* Then, check the limitations in the current sift_context.  */
1754146040Stjr	  if (sctx->limits.nelem)
1755146040Stjr	    {
1756146040Stjr	      err = check_subexp_limits (dfa, dest_nodes, candidates, &sctx->limits,
1757146040Stjr					 mctx->bkref_ents, str_idx);
1758146040Stjr	      if (BE (err != REG_NOERROR, 0))
1759146040Stjr		return err;
1760146040Stjr	    }
1761146040Stjr	}
1762146040Stjr
1763146040Stjr      sctx->sifted_states[str_idx] = re_acquire_state (&err, dfa, dest_nodes);
1764146040Stjr      if (BE (err != REG_NOERROR, 0))
1765146040Stjr	return err;
1766146040Stjr    }
1767146040Stjr
1768146040Stjr  if (candidates && mctx->state_log[str_idx]->has_backref)
1769146040Stjr    {
1770146040Stjr      err = sift_states_bkref (mctx, sctx, str_idx, candidates);
1771146040Stjr      if (BE (err != REG_NOERROR, 0))
1772146040Stjr	return err;
1773146040Stjr    }
1774146040Stjr  return REG_NOERROR;
1775146040Stjr}
1776146040Stjr
1777146040Stjrstatic reg_errcode_t
1778146040Stjradd_epsilon_src_nodes (dfa, dest_nodes, candidates)
1779146040Stjr     re_dfa_t *dfa;
1780146040Stjr     re_node_set *dest_nodes;
1781146040Stjr     const re_node_set *candidates;
1782146040Stjr{
1783146040Stjr  reg_errcode_t err = REG_NOERROR;
1784146040Stjr  int i;
1785146040Stjr
1786146040Stjr  re_dfastate_t *state = re_acquire_state (&err, dfa, dest_nodes);
1787146040Stjr  if (BE (err != REG_NOERROR, 0))
1788146040Stjr    return err;
1789146040Stjr
1790146040Stjr  if (!state->inveclosure.alloc)
1791146040Stjr    {
1792146040Stjr      err = re_node_set_alloc (&state->inveclosure, dest_nodes->nelem);
1793146040Stjr      if (BE (err != REG_NOERROR, 0))
1794146040Stjr        return REG_ESPACE;
1795146040Stjr      for (i = 0; i < dest_nodes->nelem; i++)
1796146040Stjr        re_node_set_merge (&state->inveclosure,
1797146040Stjr			   dfa->inveclosures + dest_nodes->elems[i]);
1798146040Stjr    }
1799146040Stjr  return re_node_set_add_intersect (dest_nodes, candidates,
1800146040Stjr				    &state->inveclosure);
1801146040Stjr}
1802146040Stjr
1803146040Stjrstatic reg_errcode_t
1804146040Stjrsub_epsilon_src_nodes (dfa, node, dest_nodes, candidates)
1805146040Stjr     re_dfa_t *dfa;
1806146040Stjr     int node;
1807146040Stjr     re_node_set *dest_nodes;
1808146040Stjr     const re_node_set *candidates;
1809146040Stjr{
1810146040Stjr    int ecl_idx;
1811146040Stjr    reg_errcode_t err;
1812146040Stjr    re_node_set *inv_eclosure = dfa->inveclosures + node;
1813146040Stjr    re_node_set except_nodes;
1814146040Stjr    re_node_set_init_empty (&except_nodes);
1815146040Stjr    for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx)
1816146040Stjr      {
1817146040Stjr	int cur_node = inv_eclosure->elems[ecl_idx];
1818146040Stjr	if (cur_node == node)
1819146040Stjr	  continue;
1820146040Stjr	if (IS_EPSILON_NODE (dfa->nodes[cur_node].type))
1821146040Stjr	  {
1822146040Stjr	    int edst1 = dfa->edests[cur_node].elems[0];
1823146040Stjr	    int edst2 = ((dfa->edests[cur_node].nelem > 1)
1824146040Stjr			 ? dfa->edests[cur_node].elems[1] : -1);
1825146040Stjr	    if ((!re_node_set_contains (inv_eclosure, edst1)
1826146040Stjr		 && re_node_set_contains (dest_nodes, edst1))
1827146040Stjr		|| (edst2 > 0
1828146040Stjr		    && !re_node_set_contains (inv_eclosure, edst2)
1829146040Stjr		    && re_node_set_contains (dest_nodes, edst2)))
1830146040Stjr	      {
1831146040Stjr		err = re_node_set_add_intersect (&except_nodes, candidates,
1832146040Stjr						 dfa->inveclosures + cur_node);
1833146040Stjr		if (BE (err != REG_NOERROR, 0))
1834146040Stjr		  {
1835146040Stjr		    re_node_set_free (&except_nodes);
1836146040Stjr		    return err;
1837146040Stjr		  }
1838146040Stjr	      }
1839146040Stjr	  }
1840146040Stjr      }
1841146040Stjr    for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx)
1842146040Stjr      {
1843146040Stjr	int cur_node = inv_eclosure->elems[ecl_idx];
1844146040Stjr	if (!re_node_set_contains (&except_nodes, cur_node))
1845146040Stjr	  {
1846146040Stjr	    int idx = re_node_set_contains (dest_nodes, cur_node) - 1;
1847146040Stjr	    re_node_set_remove_at (dest_nodes, idx);
1848146040Stjr	  }
1849146040Stjr      }
1850146040Stjr    re_node_set_free (&except_nodes);
1851146040Stjr    return REG_NOERROR;
1852146040Stjr}
1853146040Stjr
1854146040Stjrstatic int
1855146040Stjrcheck_dst_limits (mctx, limits, dst_node, dst_idx, src_node, src_idx)
1856146040Stjr     re_match_context_t *mctx;
1857146040Stjr     re_node_set *limits;
1858146040Stjr     int dst_node, dst_idx, src_node, src_idx;
1859146040Stjr{
1860146040Stjr  re_dfa_t *const dfa = mctx->dfa;
1861146040Stjr  int lim_idx, src_pos, dst_pos;
1862146040Stjr
1863146040Stjr  int dst_bkref_idx = search_cur_bkref_entry (mctx, dst_idx);
1864146040Stjr  int src_bkref_idx = search_cur_bkref_entry (mctx, src_idx);
1865146040Stjr  for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx)
1866146040Stjr    {
1867146040Stjr      int subexp_idx;
1868146040Stjr      struct re_backref_cache_entry *ent;
1869146040Stjr      ent = mctx->bkref_ents + limits->elems[lim_idx];
1870146040Stjr      subexp_idx = dfa->nodes[ent->node].opr.idx;
1871146040Stjr
1872146040Stjr      dst_pos = check_dst_limits_calc_pos (mctx, limits->elems[lim_idx],
1873146040Stjr					   subexp_idx, dst_node, dst_idx,
1874146040Stjr					   dst_bkref_idx);
1875146040Stjr      src_pos = check_dst_limits_calc_pos (mctx, limits->elems[lim_idx],
1876146040Stjr					   subexp_idx, src_node, src_idx,
1877146040Stjr					   src_bkref_idx);
1878146040Stjr
1879146040Stjr      /* In case of:
1880146040Stjr	 <src> <dst> ( <subexp> )
1881146040Stjr	 ( <subexp> ) <src> <dst>
1882146040Stjr	 ( <subexp1> <src> <subexp2> <dst> <subexp3> )  */
1883146040Stjr      if (src_pos == dst_pos)
1884146040Stjr	continue; /* This is unrelated limitation.  */
1885146040Stjr      else
1886146040Stjr	return 1;
1887146040Stjr    }
1888146040Stjr  return 0;
1889146040Stjr}
1890146040Stjr
1891146040Stjrstatic int
1892146040Stjrcheck_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx, from_node, bkref_idx)
1893146040Stjr     re_match_context_t *mctx;
1894146040Stjr     int boundaries, subexp_idx, from_node, bkref_idx;
1895146040Stjr{
1896146040Stjr  re_dfa_t *const dfa = mctx->dfa;
1897146040Stjr  re_node_set *eclosures = dfa->eclosures + from_node;
1898146040Stjr  int node_idx;
1899146040Stjr
1900146040Stjr  /* Else, we are on the boundary: examine the nodes on the epsilon
1901146040Stjr     closure.  */
1902146040Stjr  for (node_idx = 0; node_idx < eclosures->nelem; ++node_idx)
1903146040Stjr    {
1904146040Stjr      int node = eclosures->elems[node_idx];
1905146040Stjr      switch (dfa->nodes[node].type)
1906146040Stjr	{
1907146040Stjr	case OP_BACK_REF:
1908146040Stjr	  if (bkref_idx != -1)
1909146040Stjr	    {
1910146040Stjr	      struct re_backref_cache_entry *ent = mctx->bkref_ents + bkref_idx;
1911146040Stjr	      do
1912146040Stjr	        {
1913146040Stjr		  int dst, cpos;
1914146040Stjr
1915146040Stjr		  if (ent->node != node)
1916146040Stjr		    continue;
1917146040Stjr
1918146040Stjr		  if (subexp_idx <= 8 * sizeof (ent->eps_reachable_subexps_map)
1919146040Stjr		      && !(ent->eps_reachable_subexps_map & (1 << subexp_idx)))
1920146040Stjr		    continue;
1921146040Stjr
1922146040Stjr		  /* Recurse trying to reach the OP_OPEN_SUBEXP and
1923146040Stjr		     OP_CLOSE_SUBEXP cases below.  But, if the
1924146040Stjr		     destination node is the same node as the source
1925146040Stjr		     node, don't recurse because it would cause an
1926146040Stjr		     infinite loop: a regex that exhibits this behavior
1927146040Stjr		     is ()\1*\1*  */
1928146040Stjr		  dst = dfa->edests[node].elems[0];
1929146040Stjr		  if (dst == from_node)
1930146040Stjr		    {
1931146040Stjr		      if (boundaries & 1)
1932146040Stjr		        return -1;
1933146040Stjr		      else /* if (boundaries & 2) */
1934146040Stjr		        return 0;
1935146040Stjr		    }
1936146040Stjr
1937146040Stjr		  cpos =
1938146040Stjr		    check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx,
1939146040Stjr						 dst, bkref_idx);
1940146040Stjr		  if (cpos == -1 /* && (boundaries & 1) */)
1941146040Stjr		    return -1;
1942146040Stjr		  if (cpos == 0 && (boundaries & 2))
1943146040Stjr		    return 0;
1944146040Stjr
1945146040Stjr		  ent->eps_reachable_subexps_map &= ~(1 << subexp_idx);
1946146040Stjr	        }
1947146040Stjr	      while (ent++->more);
1948146040Stjr	    }
1949146040Stjr	  break;
1950146040Stjr
1951146040Stjr	case OP_OPEN_SUBEXP:
1952146040Stjr	  if ((boundaries & 1) && subexp_idx == dfa->nodes[node].opr.idx)
1953146040Stjr	    return -1;
1954146040Stjr	  break;
1955146040Stjr
1956146040Stjr	case OP_CLOSE_SUBEXP:
1957146040Stjr	  if ((boundaries & 2) && subexp_idx == dfa->nodes[node].opr.idx)
1958146040Stjr	    return 0;
1959146040Stjr	  break;
1960146040Stjr
1961146040Stjr	default:
1962146040Stjr	    break;
1963146040Stjr	}
1964146040Stjr    }
1965146040Stjr
1966146040Stjr  return (boundaries & 2) ? 1 : 0;
1967146040Stjr}
1968146040Stjr
1969146040Stjrstatic int
1970146040Stjrcheck_dst_limits_calc_pos (mctx, limit, subexp_idx, from_node, str_idx, bkref_idx)
1971146040Stjr     re_match_context_t *mctx;
1972146040Stjr     int limit, subexp_idx, from_node, str_idx, bkref_idx;
1973146040Stjr{
1974146040Stjr  struct re_backref_cache_entry *lim = mctx->bkref_ents + limit;
1975146040Stjr  int boundaries;
1976146040Stjr
1977146040Stjr  /* If we are outside the range of the subexpression, return -1 or 1.  */
1978146040Stjr  if (str_idx < lim->subexp_from)
1979146040Stjr    return -1;
1980146040Stjr
1981146040Stjr  if (lim->subexp_to < str_idx)
1982146040Stjr    return 1;
1983146040Stjr
1984146040Stjr  /* If we are within the subexpression, return 0.  */
1985146040Stjr  boundaries = (str_idx == lim->subexp_from);
1986146040Stjr  boundaries |= (str_idx == lim->subexp_to) << 1;
1987146040Stjr  if (boundaries == 0)
1988146040Stjr    return 0;
1989146040Stjr
1990146040Stjr  /* Else, examine epsilon closure.  */
1991146040Stjr  return check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx,
1992146040Stjr				      from_node, bkref_idx);
1993146040Stjr}
1994146040Stjr
1995146040Stjr/* Check the limitations of sub expressions LIMITS, and remove the nodes
1996146040Stjr   which are against limitations from DEST_NODES. */
1997146040Stjr
1998146040Stjrstatic reg_errcode_t
1999146040Stjrcheck_subexp_limits (dfa, dest_nodes, candidates, limits, bkref_ents, str_idx)
2000146040Stjr     re_dfa_t *dfa;
2001146040Stjr     re_node_set *dest_nodes;
2002146040Stjr     const re_node_set *candidates;
2003146040Stjr     re_node_set *limits;
2004146040Stjr     struct re_backref_cache_entry *bkref_ents;
2005146040Stjr     int str_idx;
2006146040Stjr{
2007146040Stjr  reg_errcode_t err;
2008146040Stjr  int node_idx, lim_idx;
2009146040Stjr
2010146040Stjr  for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx)
2011146040Stjr    {
2012146040Stjr      int subexp_idx;
2013146040Stjr      struct re_backref_cache_entry *ent;
2014146040Stjr      ent = bkref_ents + limits->elems[lim_idx];
2015146040Stjr
2016146040Stjr      if (str_idx <= ent->subexp_from || ent->str_idx < str_idx)
2017146040Stjr	continue; /* This is unrelated limitation.  */
2018146040Stjr
2019146040Stjr      subexp_idx = dfa->nodes[ent->node].opr.idx;
2020146040Stjr      if (ent->subexp_to == str_idx)
2021146040Stjr	{
2022146040Stjr	  int ops_node = -1;
2023146040Stjr	  int cls_node = -1;
2024146040Stjr	  for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
2025146040Stjr	    {
2026146040Stjr	      int node = dest_nodes->elems[node_idx];
2027146040Stjr	      re_token_type_t type = dfa->nodes[node].type;
2028146040Stjr	      if (type == OP_OPEN_SUBEXP
2029146040Stjr		  && subexp_idx == dfa->nodes[node].opr.idx)
2030146040Stjr		ops_node = node;
2031146040Stjr	      else if (type == OP_CLOSE_SUBEXP
2032146040Stjr		       && subexp_idx == dfa->nodes[node].opr.idx)
2033146040Stjr		cls_node = node;
2034146040Stjr	    }
2035146040Stjr
2036146040Stjr	  /* Check the limitation of the open subexpression.  */
2037146040Stjr	  /* Note that (ent->subexp_to = str_idx != ent->subexp_from).  */
2038146040Stjr	  if (ops_node >= 0)
2039146040Stjr	    {
2040146040Stjr	      err = sub_epsilon_src_nodes (dfa, ops_node, dest_nodes,
2041146040Stjr					   candidates);
2042146040Stjr	      if (BE (err != REG_NOERROR, 0))
2043146040Stjr		return err;
2044146040Stjr	    }
2045146040Stjr
2046146040Stjr	  /* Check the limitation of the close subexpression.  */
2047146040Stjr	  if (cls_node >= 0)
2048146040Stjr	    for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
2049146040Stjr	      {
2050146040Stjr		int node = dest_nodes->elems[node_idx];
2051146040Stjr		if (!re_node_set_contains (dfa->inveclosures + node,
2052146040Stjr					   cls_node)
2053146040Stjr		    && !re_node_set_contains (dfa->eclosures + node,
2054146040Stjr					      cls_node))
2055146040Stjr		  {
2056146040Stjr		    /* It is against this limitation.
2057146040Stjr		       Remove it form the current sifted state.  */
2058146040Stjr		    err = sub_epsilon_src_nodes (dfa, node, dest_nodes,
2059146040Stjr						 candidates);
2060146040Stjr		    if (BE (err != REG_NOERROR, 0))
2061146040Stjr		      return err;
2062146040Stjr		    --node_idx;
2063146040Stjr		  }
2064146040Stjr	      }
2065146040Stjr	}
2066146040Stjr      else /* (ent->subexp_to != str_idx)  */
2067146040Stjr	{
2068146040Stjr	  for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
2069146040Stjr	    {
2070146040Stjr	      int node = dest_nodes->elems[node_idx];
2071146040Stjr	      re_token_type_t type = dfa->nodes[node].type;
2072146040Stjr	      if (type == OP_CLOSE_SUBEXP || type == OP_OPEN_SUBEXP)
2073146040Stjr		{
2074146040Stjr		  if (subexp_idx != dfa->nodes[node].opr.idx)
2075146040Stjr		    continue;
2076146040Stjr		  /* It is against this limitation.
2077146040Stjr		     Remove it form the current sifted state.  */
2078146040Stjr		  err = sub_epsilon_src_nodes (dfa, node, dest_nodes,
2079146040Stjr					       candidates);
2080146040Stjr		  if (BE (err != REG_NOERROR, 0))
2081146040Stjr		    return err;
2082146040Stjr		}
2083146040Stjr	    }
2084146040Stjr	}
2085146040Stjr    }
2086146040Stjr  return REG_NOERROR;
2087146040Stjr}
2088146040Stjr
2089146040Stjrstatic reg_errcode_t
2090146040Stjrsift_states_bkref (mctx, sctx, str_idx, candidates)
2091146040Stjr     re_match_context_t *mctx;
2092146040Stjr     re_sift_context_t *sctx;
2093146040Stjr     int str_idx;
2094146040Stjr     const re_node_set *candidates;
2095146040Stjr{
2096146040Stjr  re_dfa_t *const dfa = mctx->dfa;
2097146040Stjr  reg_errcode_t err;
2098146040Stjr  int node_idx, node;
2099146040Stjr  re_sift_context_t local_sctx;
2100146040Stjr  int first_idx = search_cur_bkref_entry (mctx, str_idx);
2101146040Stjr
2102146040Stjr  if (first_idx == -1)
2103146040Stjr    return REG_NOERROR;
2104146040Stjr
2105146040Stjr  local_sctx.sifted_states = NULL; /* Mark that it hasn't been initialized.  */
2106146040Stjr
2107146040Stjr  for (node_idx = 0; node_idx < candidates->nelem; ++node_idx)
2108146040Stjr    {
2109146040Stjr      int enabled_idx;
2110146040Stjr      re_token_type_t type;
2111146040Stjr      struct re_backref_cache_entry *entry;
2112146040Stjr      node = candidates->elems[node_idx];
2113146040Stjr      type = dfa->nodes[node].type;
2114146040Stjr      /* Avoid infinite loop for the REs like "()\1+".  */
2115146040Stjr      if (node == sctx->last_node && str_idx == sctx->last_str_idx)
2116146040Stjr	continue;
2117146040Stjr      if (type != OP_BACK_REF)
2118146040Stjr	continue;
2119146040Stjr
2120146040Stjr      entry = mctx->bkref_ents + first_idx;
2121146040Stjr      enabled_idx = first_idx;
2122146040Stjr      do
2123146040Stjr	{
2124146040Stjr	  int subexp_len, to_idx, dst_node;
2125146040Stjr	  re_dfastate_t *cur_state;
2126146040Stjr
2127146040Stjr	  if (entry->node != node)
2128146040Stjr	    continue;
2129146040Stjr	  subexp_len = entry->subexp_to - entry->subexp_from;
2130146040Stjr	  to_idx = str_idx + subexp_len;
2131146040Stjr	  dst_node = (subexp_len ? dfa->nexts[node]
2132146040Stjr		      : dfa->edests[node].elems[0]);
2133146040Stjr
2134146040Stjr	  if (to_idx > sctx->last_str_idx
2135146040Stjr	      || sctx->sifted_states[to_idx] == NULL
2136146040Stjr	      || !STATE_NODE_CONTAINS (sctx->sifted_states[to_idx], dst_node)
2137146040Stjr	      || check_dst_limits (mctx, &sctx->limits, node,
2138146040Stjr				   str_idx, dst_node, to_idx))
2139146040Stjr	    continue;
2140146040Stjr
2141146040Stjr	  if (local_sctx.sifted_states == NULL)
2142146040Stjr	    {
2143146040Stjr	      local_sctx = *sctx;
2144146040Stjr	      err = re_node_set_init_copy (&local_sctx.limits, &sctx->limits);
2145146040Stjr	      if (BE (err != REG_NOERROR, 0))
2146146040Stjr		goto free_return;
2147146040Stjr	    }
2148146040Stjr	  local_sctx.last_node = node;
2149146040Stjr	  local_sctx.last_str_idx = str_idx;
2150146040Stjr	  err = re_node_set_insert (&local_sctx.limits, enabled_idx);
2151146040Stjr	  if (BE (err < 0, 0))
2152146040Stjr	    {
2153146040Stjr	      err = REG_ESPACE;
2154146040Stjr	      goto free_return;
2155146040Stjr	    }
2156146040Stjr	  cur_state = local_sctx.sifted_states[str_idx];
2157146040Stjr	  err = sift_states_backward (mctx, &local_sctx);
2158146040Stjr	  if (BE (err != REG_NOERROR, 0))
2159146040Stjr	    goto free_return;
2160146040Stjr	  if (sctx->limited_states != NULL)
2161146040Stjr	    {
2162146040Stjr	      err = merge_state_array (dfa, sctx->limited_states,
2163146040Stjr				       local_sctx.sifted_states,
2164146040Stjr				       str_idx + 1);
2165146040Stjr	      if (BE (err != REG_NOERROR, 0))
2166146040Stjr		goto free_return;
2167146040Stjr	    }
2168146040Stjr	  local_sctx.sifted_states[str_idx] = cur_state;
2169146040Stjr	  re_node_set_remove (&local_sctx.limits, enabled_idx);
2170146040Stjr
2171146040Stjr	  /* mctx->bkref_ents may have changed, reload the pointer.  */
2172146040Stjr          entry = mctx->bkref_ents + enabled_idx;
2173146040Stjr	}
2174146040Stjr      while (enabled_idx++, entry++->more);
2175146040Stjr    }
2176146040Stjr  err = REG_NOERROR;
2177146040Stjr free_return:
2178146040Stjr  if (local_sctx.sifted_states != NULL)
2179146040Stjr    {
2180146040Stjr      re_node_set_free (&local_sctx.limits);
2181146040Stjr    }
2182146040Stjr
2183146040Stjr  return err;
2184146040Stjr}
2185146040Stjr
2186146040Stjr
2187146040Stjr#ifdef RE_ENABLE_I18N
2188146040Stjrstatic int
2189146040Stjrsift_states_iter_mb (mctx, sctx, node_idx, str_idx, max_str_idx)
2190146040Stjr    const re_match_context_t *mctx;
2191146040Stjr    re_sift_context_t *sctx;
2192146040Stjr    int node_idx, str_idx, max_str_idx;
2193146040Stjr{
2194146040Stjr  re_dfa_t *const dfa = mctx->dfa;
2195146040Stjr  int naccepted;
2196146040Stjr  /* Check the node can accept `multi byte'.  */
2197146040Stjr  naccepted = check_node_accept_bytes (dfa, node_idx, &mctx->input, str_idx);
2198146040Stjr  if (naccepted > 0 && str_idx + naccepted <= max_str_idx &&
2199146040Stjr      !STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + naccepted],
2200146040Stjr			    dfa->nexts[node_idx]))
2201146040Stjr    /* The node can't accept the `multi byte', or the
2202146040Stjr       destination was already thrown away, then the node
2203146040Stjr       could't accept the current input `multi byte'.   */
2204146040Stjr    naccepted = 0;
2205146040Stjr  /* Otherwise, it is sure that the node could accept
2206146040Stjr     `naccepted' bytes input.  */
2207146040Stjr  return naccepted;
2208146040Stjr}
2209146040Stjr#endif /* RE_ENABLE_I18N */
2210146040Stjr
2211146040Stjr
2212146040Stjr/* Functions for state transition.  */
2213146040Stjr
2214146040Stjr/* Return the next state to which the current state STATE will transit by
2215146040Stjr   accepting the current input byte, and update STATE_LOG if necessary.
2216146040Stjr   If STATE can accept a multibyte char/collating element/back reference
2217146040Stjr   update the destination of STATE_LOG.  */
2218146040Stjr
2219146040Stjrstatic re_dfastate_t *
2220146040Stjrtransit_state (err, mctx, state)
2221146040Stjr     reg_errcode_t *err;
2222146040Stjr     re_match_context_t *mctx;
2223146040Stjr     re_dfastate_t *state;
2224146040Stjr{
2225146040Stjr  re_dfastate_t **trtable;
2226146040Stjr  unsigned char ch;
2227146040Stjr
2228146040Stjr#ifdef RE_ENABLE_I18N
2229146040Stjr  /* If the current state can accept multibyte.  */
2230146040Stjr  if (BE (state->accept_mb, 0))
2231146040Stjr    {
2232146040Stjr      *err = transit_state_mb (mctx, state);
2233146040Stjr      if (BE (*err != REG_NOERROR, 0))
2234146040Stjr	return NULL;
2235146040Stjr    }
2236146040Stjr#endif /* RE_ENABLE_I18N */
2237146040Stjr
2238146040Stjr  /* Then decide the next state with the single byte.  */
2239146040Stjr#if 0
2240146040Stjr  if (0)
2241146040Stjr    /* don't use transition table  */
2242146040Stjr    return transit_state_sb (err, mctx, state);
2243146040Stjr#endif
2244146040Stjr
2245146040Stjr  /* Use transition table  */
2246146040Stjr  ch = re_string_fetch_byte (&mctx->input);
2247146040Stjr  for (;;)
2248146040Stjr    {
2249146040Stjr      trtable = state->trtable;
2250146040Stjr      if (BE (trtable != NULL, 1))
2251146040Stjr	return trtable[ch];
2252146040Stjr
2253146040Stjr      trtable = state->word_trtable;
2254146040Stjr      if (BE (trtable != NULL, 1))
2255146040Stjr        {
2256146040Stjr	  unsigned int context;
2257146040Stjr	  context
2258146040Stjr	    = re_string_context_at (&mctx->input,
2259146040Stjr				    re_string_cur_idx (&mctx->input) - 1,
2260146040Stjr				    mctx->eflags);
2261146040Stjr	  if (IS_WORD_CONTEXT (context))
2262146040Stjr	    return trtable[ch + SBC_MAX];
2263146040Stjr	  else
2264146040Stjr	    return trtable[ch];
2265146040Stjr	}
2266146040Stjr
2267146040Stjr      if (!build_trtable (mctx->dfa, state))
2268146040Stjr	{
2269146040Stjr	  *err = REG_ESPACE;
2270146040Stjr	  return NULL;
2271146040Stjr	}
2272146040Stjr
2273146040Stjr      /* Retry, we now have a transition table.  */
2274146040Stjr    }
2275146040Stjr}
2276146040Stjr
2277146040Stjr/* Update the state_log if we need */
2278146040Stjrre_dfastate_t *
2279146040Stjrmerge_state_with_log (err, mctx, next_state)
2280146040Stjr     reg_errcode_t *err;
2281146040Stjr     re_match_context_t *mctx;
2282146040Stjr     re_dfastate_t *next_state;
2283146040Stjr{
2284146040Stjr  re_dfa_t *const dfa = mctx->dfa;
2285146040Stjr  int cur_idx = re_string_cur_idx (&mctx->input);
2286146040Stjr
2287146040Stjr  if (cur_idx > mctx->state_log_top)
2288146040Stjr    {
2289146040Stjr      mctx->state_log[cur_idx] = next_state;
2290146040Stjr      mctx->state_log_top = cur_idx;
2291146040Stjr    }
2292146040Stjr  else if (mctx->state_log[cur_idx] == 0)
2293146040Stjr    {
2294146040Stjr      mctx->state_log[cur_idx] = next_state;
2295146040Stjr    }
2296146040Stjr  else
2297146040Stjr    {
2298146040Stjr      re_dfastate_t *pstate;
2299146040Stjr      unsigned int context;
2300146040Stjr      re_node_set next_nodes, *log_nodes, *table_nodes = NULL;
2301146040Stjr      /* If (state_log[cur_idx] != 0), it implies that cur_idx is
2302146040Stjr         the destination of a multibyte char/collating element/
2303146040Stjr         back reference.  Then the next state is the union set of
2304146040Stjr         these destinations and the results of the transition table.  */
2305146040Stjr      pstate = mctx->state_log[cur_idx];
2306146040Stjr      log_nodes = pstate->entrance_nodes;
2307146040Stjr      if (next_state != NULL)
2308146040Stjr        {
2309146040Stjr          table_nodes = next_state->entrance_nodes;
2310146040Stjr          *err = re_node_set_init_union (&next_nodes, table_nodes,
2311146040Stjr					     log_nodes);
2312146040Stjr          if (BE (*err != REG_NOERROR, 0))
2313146040Stjr	    return NULL;
2314146040Stjr        }
2315146040Stjr      else
2316146040Stjr        next_nodes = *log_nodes;
2317146040Stjr      /* Note: We already add the nodes of the initial state,
2318146040Stjr	 then we don't need to add them here.  */
2319146040Stjr
2320146040Stjr      context = re_string_context_at (&mctx->input,
2321146040Stjr				      re_string_cur_idx (&mctx->input) - 1,
2322146040Stjr				      mctx->eflags);
2323146040Stjr      next_state = mctx->state_log[cur_idx]
2324146040Stjr        = re_acquire_state_context (err, dfa, &next_nodes, context);
2325146040Stjr      /* We don't need to check errors here, since the return value of
2326146040Stjr         this function is next_state and ERR is already set.  */
2327146040Stjr
2328146040Stjr      if (table_nodes != NULL)
2329146040Stjr        re_node_set_free (&next_nodes);
2330146040Stjr    }
2331146040Stjr
2332146040Stjr  if (BE (dfa->nbackref, 0) && next_state != NULL)
2333146040Stjr    {
2334146040Stjr      /* Check OP_OPEN_SUBEXP in the current state in case that we use them
2335146040Stjr	 later.  We must check them here, since the back references in the
2336146040Stjr	 next state might use them.  */
2337146040Stjr      *err = check_subexp_matching_top (mctx, &next_state->nodes,
2338146040Stjr					cur_idx);
2339146040Stjr      if (BE (*err != REG_NOERROR, 0))
2340146040Stjr	return NULL;
2341146040Stjr
2342146040Stjr      /* If the next state has back references.  */
2343146040Stjr      if (next_state->has_backref)
2344146040Stjr	{
2345146040Stjr	  *err = transit_state_bkref (mctx, &next_state->nodes);
2346146040Stjr	  if (BE (*err != REG_NOERROR, 0))
2347146040Stjr	    return NULL;
2348146040Stjr	  next_state = mctx->state_log[cur_idx];
2349146040Stjr	}
2350146040Stjr    }
2351146040Stjr
2352146040Stjr  return next_state;
2353146040Stjr}
2354146040Stjr
2355146040Stjr/* Skip bytes in the input that correspond to part of a
2356146040Stjr   multi-byte match, then look in the log for a state
2357146040Stjr   from which to restart matching.  */
2358146040Stjrre_dfastate_t *
2359146040Stjrfind_recover_state (err, mctx)
2360146040Stjr     reg_errcode_t *err;
2361146040Stjr     re_match_context_t *mctx;
2362146040Stjr{
2363146040Stjr  re_dfastate_t *cur_state = NULL;
2364146040Stjr  do
2365146040Stjr    {
2366146040Stjr      int max = mctx->state_log_top;
2367146040Stjr      int cur_str_idx = re_string_cur_idx (&mctx->input);
2368146040Stjr
2369146040Stjr      do
2370146040Stjr	{
2371146040Stjr          if (++cur_str_idx > max)
2372146040Stjr            return NULL;
2373146040Stjr          re_string_skip_bytes (&mctx->input, 1);
2374146040Stjr	}
2375146040Stjr      while (mctx->state_log[cur_str_idx] == NULL);
2376146040Stjr
2377146040Stjr      cur_state = merge_state_with_log (err, mctx, NULL);
2378146040Stjr    }
2379146040Stjr  while (err == REG_NOERROR && cur_state == NULL);
2380146040Stjr  return cur_state;
2381146040Stjr}
2382146040Stjr
2383146040Stjr/* Helper functions for transit_state.  */
2384146040Stjr
2385146040Stjr/* From the node set CUR_NODES, pick up the nodes whose types are
2386146040Stjr   OP_OPEN_SUBEXP and which have corresponding back references in the regular
2387146040Stjr   expression. And register them to use them later for evaluating the
2388146040Stjr   correspoding back references.  */
2389146040Stjr
2390146040Stjrstatic reg_errcode_t
2391146040Stjrcheck_subexp_matching_top (mctx, cur_nodes, str_idx)
2392146040Stjr     re_match_context_t *mctx;
2393146040Stjr     re_node_set *cur_nodes;
2394146040Stjr     int str_idx;
2395146040Stjr{
2396146040Stjr  re_dfa_t *const dfa = mctx->dfa;
2397146040Stjr  int node_idx;
2398146040Stjr  reg_errcode_t err;
2399146040Stjr
2400146040Stjr  /* TODO: This isn't efficient.
2401146040Stjr	   Because there might be more than one nodes whose types are
2402146040Stjr	   OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all
2403146040Stjr	   nodes.
2404146040Stjr	   E.g. RE: (a){2}  */
2405146040Stjr  for (node_idx = 0; node_idx < cur_nodes->nelem; ++node_idx)
2406146040Stjr    {
2407146040Stjr      int node = cur_nodes->elems[node_idx];
2408146040Stjr      if (dfa->nodes[node].type == OP_OPEN_SUBEXP
2409146040Stjr	  && dfa->nodes[node].opr.idx < (8 * sizeof (dfa->used_bkref_map))
2410146040Stjr	  && dfa->used_bkref_map & (1 << dfa->nodes[node].opr.idx))
2411146040Stjr	{
2412146040Stjr	  err = match_ctx_add_subtop (mctx, node, str_idx);
2413146040Stjr	  if (BE (err != REG_NOERROR, 0))
2414146040Stjr	    return err;
2415146040Stjr	}
2416146040Stjr    }
2417146040Stjr  return REG_NOERROR;
2418146040Stjr}
2419146040Stjr
2420146040Stjr#if 0
2421146040Stjr/* Return the next state to which the current state STATE will transit by
2422146040Stjr   accepting the current input byte.  */
2423146040Stjr
2424146040Stjrstatic re_dfastate_t *
2425146040Stjrtransit_state_sb (err, mctx, state)
2426146040Stjr     reg_errcode_t *err;
2427146040Stjr     re_match_context_t *mctx;
2428146040Stjr     re_dfastate_t *state;
2429146040Stjr{
2430146040Stjr  re_dfa_t *const dfa = mctx->dfa;
2431146040Stjr  re_node_set next_nodes;
2432146040Stjr  re_dfastate_t *next_state;
2433146040Stjr  int node_cnt, cur_str_idx = re_string_cur_idx (&mctx->input);
2434146040Stjr  unsigned int context;
2435146040Stjr
2436146040Stjr  *err = re_node_set_alloc (&next_nodes, state->nodes.nelem + 1);
2437146040Stjr  if (BE (*err != REG_NOERROR, 0))
2438146040Stjr    return NULL;
2439146040Stjr  for (node_cnt = 0; node_cnt < state->nodes.nelem; ++node_cnt)
2440146040Stjr    {
2441146040Stjr      int cur_node = state->nodes.elems[node_cnt];
2442146040Stjr      if (check_node_accept (mctx, dfa->nodes + cur_node, cur_str_idx))
2443146040Stjr	{
2444146040Stjr	  *err = re_node_set_merge (&next_nodes,
2445146040Stjr				    dfa->eclosures + dfa->nexts[cur_node]);
2446146040Stjr	  if (BE (*err != REG_NOERROR, 0))
2447146040Stjr	    {
2448146040Stjr	      re_node_set_free (&next_nodes);
2449146040Stjr	      return NULL;
2450146040Stjr	    }
2451146040Stjr	}
2452146040Stjr    }
2453146040Stjr  context = re_string_context_at (&mctx->input, cur_str_idx, mctx->eflags);
2454146040Stjr  next_state = re_acquire_state_context (err, dfa, &next_nodes, context);
2455146040Stjr  /* We don't need to check errors here, since the return value of
2456146040Stjr     this function is next_state and ERR is already set.  */
2457146040Stjr
2458146040Stjr  re_node_set_free (&next_nodes);
2459146040Stjr  re_string_skip_bytes (&mctx->input, 1);
2460146040Stjr  return next_state;
2461146040Stjr}
2462146040Stjr#endif
2463146040Stjr
2464146040Stjr#ifdef RE_ENABLE_I18N
2465146040Stjrstatic reg_errcode_t
2466146040Stjrtransit_state_mb (mctx, pstate)
2467146040Stjr    re_match_context_t *mctx;
2468146040Stjr    re_dfastate_t *pstate;
2469146040Stjr{
2470146040Stjr  re_dfa_t *const dfa = mctx->dfa;
2471146040Stjr  reg_errcode_t err;
2472146040Stjr  int i;
2473146040Stjr
2474146040Stjr  for (i = 0; i < pstate->nodes.nelem; ++i)
2475146040Stjr    {
2476146040Stjr      re_node_set dest_nodes, *new_nodes;
2477146040Stjr      int cur_node_idx = pstate->nodes.elems[i];
2478146040Stjr      int naccepted, dest_idx;
2479146040Stjr      unsigned int context;
2480146040Stjr      re_dfastate_t *dest_state;
2481146040Stjr
2482146040Stjr      if (!dfa->nodes[cur_node_idx].accept_mb)
2483146040Stjr        continue;
2484146040Stjr
2485146040Stjr      if (dfa->nodes[cur_node_idx].constraint)
2486146040Stjr	{
2487146040Stjr	  context = re_string_context_at (&mctx->input,
2488146040Stjr					  re_string_cur_idx (&mctx->input),
2489146040Stjr					  mctx->eflags);
2490146040Stjr	  if (NOT_SATISFY_NEXT_CONSTRAINT (dfa->nodes[cur_node_idx].constraint,
2491146040Stjr					   context))
2492146040Stjr	    continue;
2493146040Stjr	}
2494146040Stjr
2495146040Stjr      /* How many bytes the node can accept?  */
2496146040Stjr      naccepted = check_node_accept_bytes (dfa, cur_node_idx, &mctx->input,
2497146040Stjr					   re_string_cur_idx (&mctx->input));
2498146040Stjr      if (naccepted == 0)
2499146040Stjr	continue;
2500146040Stjr
2501146040Stjr      /* The node can accepts `naccepted' bytes.  */
2502146040Stjr      dest_idx = re_string_cur_idx (&mctx->input) + naccepted;
2503146040Stjr      mctx->max_mb_elem_len = ((mctx->max_mb_elem_len < naccepted) ? naccepted
2504146040Stjr			       : mctx->max_mb_elem_len);
2505146040Stjr      err = clean_state_log_if_needed (mctx, dest_idx);
2506146040Stjr      if (BE (err != REG_NOERROR, 0))
2507146040Stjr	return err;
2508146040Stjr#ifdef DEBUG
2509146040Stjr      assert (dfa->nexts[cur_node_idx] != -1);
2510146040Stjr#endif
2511146040Stjr      new_nodes = dfa->eclosures + dfa->nexts[cur_node_idx];
2512146040Stjr
2513146040Stjr      dest_state = mctx->state_log[dest_idx];
2514146040Stjr      if (dest_state == NULL)
2515146040Stjr	dest_nodes = *new_nodes;
2516146040Stjr      else
2517146040Stjr	{
2518146040Stjr	  err = re_node_set_init_union (&dest_nodes,
2519146040Stjr					dest_state->entrance_nodes, new_nodes);
2520146040Stjr	  if (BE (err != REG_NOERROR, 0))
2521146040Stjr	    return err;
2522146040Stjr	}
2523146040Stjr      context = re_string_context_at (&mctx->input, dest_idx - 1, mctx->eflags);
2524146040Stjr      mctx->state_log[dest_idx]
2525146040Stjr	= re_acquire_state_context (&err, dfa, &dest_nodes, context);
2526146040Stjr      if (dest_state != NULL)
2527146040Stjr	re_node_set_free (&dest_nodes);
2528146040Stjr      if (BE (mctx->state_log[dest_idx] == NULL && err != REG_NOERROR, 0))
2529146040Stjr	return err;
2530146040Stjr    }
2531146040Stjr  return REG_NOERROR;
2532146040Stjr}
2533146040Stjr#endif /* RE_ENABLE_I18N */
2534146040Stjr
2535146040Stjrstatic reg_errcode_t
2536146040Stjrtransit_state_bkref (mctx, nodes)
2537146040Stjr    re_match_context_t *mctx;
2538146040Stjr    const re_node_set *nodes;
2539146040Stjr{
2540146040Stjr  re_dfa_t *const dfa = mctx->dfa;
2541146040Stjr  reg_errcode_t err;
2542146040Stjr  int i;
2543146040Stjr  int cur_str_idx = re_string_cur_idx (&mctx->input);
2544146040Stjr
2545146040Stjr  for (i = 0; i < nodes->nelem; ++i)
2546146040Stjr    {
2547146040Stjr      int dest_str_idx, prev_nelem, bkc_idx;
2548146040Stjr      int node_idx = nodes->elems[i];
2549146040Stjr      unsigned int context;
2550146040Stjr      const re_token_t *node = dfa->nodes + node_idx;
2551146040Stjr      re_node_set *new_dest_nodes;
2552146040Stjr
2553146040Stjr      /* Check whether `node' is a backreference or not.  */
2554146040Stjr      if (node->type != OP_BACK_REF)
2555146040Stjr	continue;
2556146040Stjr
2557146040Stjr      if (node->constraint)
2558146040Stjr	{
2559146040Stjr	  context = re_string_context_at (&mctx->input, cur_str_idx,
2560146040Stjr					  mctx->eflags);
2561146040Stjr	  if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context))
2562146040Stjr	    continue;
2563146040Stjr	}
2564146040Stjr
2565146040Stjr      /* `node' is a backreference.
2566146040Stjr	 Check the substring which the substring matched.  */
2567146040Stjr      bkc_idx = mctx->nbkref_ents;
2568146040Stjr      err = get_subexp (mctx, node_idx, cur_str_idx);
2569146040Stjr      if (BE (err != REG_NOERROR, 0))
2570146040Stjr	goto free_return;
2571146040Stjr
2572146040Stjr      /* And add the epsilon closures (which is `new_dest_nodes') of
2573146040Stjr	 the backreference to appropriate state_log.  */
2574146040Stjr#ifdef DEBUG
2575146040Stjr      assert (dfa->nexts[node_idx] != -1);
2576146040Stjr#endif
2577146040Stjr      for (; bkc_idx < mctx->nbkref_ents; ++bkc_idx)
2578146040Stjr	{
2579146040Stjr	  int subexp_len;
2580146040Stjr	  re_dfastate_t *dest_state;
2581146040Stjr	  struct re_backref_cache_entry *bkref_ent;
2582146040Stjr	  bkref_ent = mctx->bkref_ents + bkc_idx;
2583146040Stjr	  if (bkref_ent->node != node_idx || bkref_ent->str_idx != cur_str_idx)
2584146040Stjr	    continue;
2585146040Stjr	  subexp_len = bkref_ent->subexp_to - bkref_ent->subexp_from;
2586146040Stjr	  new_dest_nodes = (subexp_len == 0
2587146040Stjr			    ? dfa->eclosures + dfa->edests[node_idx].elems[0]
2588146040Stjr			    : dfa->eclosures + dfa->nexts[node_idx]);
2589146040Stjr	  dest_str_idx = (cur_str_idx + bkref_ent->subexp_to
2590146040Stjr			  - bkref_ent->subexp_from);
2591146040Stjr	  context = re_string_context_at (&mctx->input, dest_str_idx - 1,
2592146040Stjr					  mctx->eflags);
2593146040Stjr	  dest_state = mctx->state_log[dest_str_idx];
2594146040Stjr	  prev_nelem = ((mctx->state_log[cur_str_idx] == NULL) ? 0
2595146040Stjr			: mctx->state_log[cur_str_idx]->nodes.nelem);
2596146040Stjr	  /* Add `new_dest_node' to state_log.  */
2597146040Stjr	  if (dest_state == NULL)
2598146040Stjr	    {
2599146040Stjr	      mctx->state_log[dest_str_idx]
2600146040Stjr		= re_acquire_state_context (&err, dfa, new_dest_nodes,
2601146040Stjr					    context);
2602146040Stjr	      if (BE (mctx->state_log[dest_str_idx] == NULL
2603146040Stjr		      && err != REG_NOERROR, 0))
2604146040Stjr		goto free_return;
2605146040Stjr	    }
2606146040Stjr	  else
2607146040Stjr	    {
2608146040Stjr	      re_node_set dest_nodes;
2609146040Stjr	      err = re_node_set_init_union (&dest_nodes,
2610146040Stjr					    dest_state->entrance_nodes,
2611146040Stjr					    new_dest_nodes);
2612146040Stjr	      if (BE (err != REG_NOERROR, 0))
2613146040Stjr		{
2614146040Stjr		  re_node_set_free (&dest_nodes);
2615146040Stjr		  goto free_return;
2616146040Stjr		}
2617146040Stjr	      mctx->state_log[dest_str_idx]
2618146040Stjr		= re_acquire_state_context (&err, dfa, &dest_nodes, context);
2619146040Stjr	      re_node_set_free (&dest_nodes);
2620146040Stjr	      if (BE (mctx->state_log[dest_str_idx] == NULL
2621146040Stjr		      && err != REG_NOERROR, 0))
2622146040Stjr		goto free_return;
2623146040Stjr	    }
2624146040Stjr	  /* We need to check recursively if the backreference can epsilon
2625146040Stjr	     transit.  */
2626146040Stjr	  if (subexp_len == 0
2627146040Stjr	      && mctx->state_log[cur_str_idx]->nodes.nelem > prev_nelem)
2628146040Stjr	    {
2629146040Stjr	      err = check_subexp_matching_top (mctx, new_dest_nodes,
2630146040Stjr					       cur_str_idx);
2631146040Stjr	      if (BE (err != REG_NOERROR, 0))
2632146040Stjr		goto free_return;
2633146040Stjr	      err = transit_state_bkref (mctx, new_dest_nodes);
2634146040Stjr	      if (BE (err != REG_NOERROR, 0))
2635146040Stjr		goto free_return;
2636146040Stjr	    }
2637146040Stjr	}
2638146040Stjr    }
2639146040Stjr  err = REG_NOERROR;
2640146040Stjr free_return:
2641146040Stjr  return err;
2642146040Stjr}
2643146040Stjr
2644146040Stjr/* Enumerate all the candidates which the backreference BKREF_NODE can match
2645146040Stjr   at BKREF_STR_IDX, and register them by match_ctx_add_entry().
2646146040Stjr   Note that we might collect inappropriate candidates here.
2647146040Stjr   However, the cost of checking them strictly here is too high, then we
2648146040Stjr   delay these checking for prune_impossible_nodes().  */
2649146040Stjr
2650146040Stjrstatic reg_errcode_t
2651146040Stjrget_subexp (mctx, bkref_node, bkref_str_idx)
2652146040Stjr     re_match_context_t *mctx;
2653146040Stjr     int bkref_node, bkref_str_idx;
2654146040Stjr{
2655146040Stjr  re_dfa_t *const dfa = mctx->dfa;
2656146040Stjr  int subexp_num, sub_top_idx;
2657146040Stjr  const char *buf = (const char *) re_string_get_buffer (&mctx->input);
2658146040Stjr  /* Return if we have already checked BKREF_NODE at BKREF_STR_IDX.  */
2659146040Stjr  int cache_idx = search_cur_bkref_entry (mctx, bkref_str_idx);
2660146040Stjr  if (cache_idx != -1)
2661146040Stjr    {
2662146040Stjr      const struct re_backref_cache_entry *entry = mctx->bkref_ents + cache_idx;
2663146040Stjr      do
2664146040Stjr        if (entry->node == bkref_node)
2665146040Stjr	  return REG_NOERROR; /* We already checked it.  */
2666146040Stjr      while (entry++->more);
2667146040Stjr    }
2668146040Stjr
2669146040Stjr  subexp_num = dfa->nodes[bkref_node].opr.idx;
2670146040Stjr
2671146040Stjr  /* For each sub expression  */
2672146040Stjr  for (sub_top_idx = 0; sub_top_idx < mctx->nsub_tops; ++sub_top_idx)
2673146040Stjr    {
2674146040Stjr      reg_errcode_t err;
2675146040Stjr      re_sub_match_top_t *sub_top = mctx->sub_tops[sub_top_idx];
2676146040Stjr      re_sub_match_last_t *sub_last;
2677146040Stjr      int sub_last_idx, sl_str, bkref_str_off;
2678146040Stjr
2679146040Stjr      if (dfa->nodes[sub_top->node].opr.idx != subexp_num)
2680146040Stjr	continue; /* It isn't related.  */
2681146040Stjr
2682146040Stjr      sl_str = sub_top->str_idx;
2683146040Stjr      bkref_str_off = bkref_str_idx;
2684146040Stjr      /* At first, check the last node of sub expressions we already
2685146040Stjr	 evaluated.  */
2686146040Stjr      for (sub_last_idx = 0; sub_last_idx < sub_top->nlasts; ++sub_last_idx)
2687146040Stjr	{
2688146040Stjr	  int sl_str_diff;
2689146040Stjr	  sub_last = sub_top->lasts[sub_last_idx];
2690146040Stjr	  sl_str_diff = sub_last->str_idx - sl_str;
2691146040Stjr	  /* The matched string by the sub expression match with the substring
2692146040Stjr	     at the back reference?  */
2693146040Stjr	  if (sl_str_diff > 0)
2694146040Stjr	    {
2695146040Stjr	      if (BE (bkref_str_off + sl_str_diff > mctx->input.valid_len, 0))
2696146040Stjr		{
2697146040Stjr		  /* Not enough chars for a successful match.  */
2698146040Stjr		  if (bkref_str_off + sl_str_diff > mctx->input.len)
2699146040Stjr		    break;
2700146040Stjr
2701146040Stjr		  err = clean_state_log_if_needed (mctx,
2702146040Stjr						   bkref_str_off
2703146040Stjr						   + sl_str_diff);
2704146040Stjr		  if (BE (err != REG_NOERROR, 0))
2705146040Stjr		    return err;
2706146040Stjr		  buf = (const char *) re_string_get_buffer (&mctx->input);
2707146040Stjr		}
2708146040Stjr	      if (memcmp (buf + bkref_str_off, buf + sl_str, sl_str_diff) != 0)
2709146040Stjr		break; /* We don't need to search this sub expression any more.  */
2710146040Stjr	    }
2711146040Stjr	  bkref_str_off += sl_str_diff;
2712146040Stjr	  sl_str += sl_str_diff;
2713146040Stjr	  err = get_subexp_sub (mctx, sub_top, sub_last, bkref_node,
2714146040Stjr				bkref_str_idx);
2715146040Stjr
2716146040Stjr	  /* Reload buf, since the preceding call might have reallocated
2717146040Stjr	     the buffer.  */
2718146040Stjr	  buf = (const char *) re_string_get_buffer (&mctx->input);
2719146040Stjr
2720146040Stjr	  if (err == REG_NOMATCH)
2721146040Stjr	    continue;
2722146040Stjr	  if (BE (err != REG_NOERROR, 0))
2723146040Stjr	    return err;
2724146040Stjr	}
2725146040Stjr
2726146040Stjr      if (sub_last_idx < sub_top->nlasts)
2727146040Stjr	continue;
2728146040Stjr      if (sub_last_idx > 0)
2729146040Stjr	++sl_str;
2730146040Stjr      /* Then, search for the other last nodes of the sub expression.  */
2731146040Stjr      for (; sl_str <= bkref_str_idx; ++sl_str)
2732146040Stjr	{
2733146040Stjr	  int cls_node, sl_str_off;
2734146040Stjr	  const re_node_set *nodes;
2735146040Stjr	  sl_str_off = sl_str - sub_top->str_idx;
2736146040Stjr	  /* The matched string by the sub expression match with the substring
2737146040Stjr	     at the back reference?  */
2738146040Stjr	  if (sl_str_off > 0)
2739146040Stjr	    {
2740146040Stjr	      if (BE (bkref_str_off >= mctx->input.valid_len, 0))
2741146040Stjr		{
2742146040Stjr		  /* If we are at the end of the input, we cannot match.  */
2743146040Stjr		  if (bkref_str_off >= mctx->input.len)
2744146040Stjr		    break;
2745146040Stjr
2746146040Stjr		  err = extend_buffers (mctx);
2747146040Stjr		  if (BE (err != REG_NOERROR, 0))
2748146040Stjr		    return err;
2749146040Stjr
2750146040Stjr		  buf = (const char *) re_string_get_buffer (&mctx->input);
2751146040Stjr		}
2752146040Stjr	      if (buf [bkref_str_off++] != buf[sl_str - 1])
2753146040Stjr		break; /* We don't need to search this sub expression
2754146040Stjr			  any more.  */
2755146040Stjr	    }
2756146040Stjr	  if (mctx->state_log[sl_str] == NULL)
2757146040Stjr	    continue;
2758146040Stjr	  /* Does this state have a ')' of the sub expression?  */
2759146040Stjr	  nodes = &mctx->state_log[sl_str]->nodes;
2760146040Stjr	  cls_node = find_subexp_node (dfa, nodes, subexp_num, OP_CLOSE_SUBEXP);
2761146040Stjr	  if (cls_node == -1)
2762146040Stjr	    continue; /* No.  */
2763146040Stjr	  if (sub_top->path == NULL)
2764146040Stjr	    {
2765146040Stjr	      sub_top->path = calloc (sizeof (state_array_t),
2766146040Stjr				      sl_str - sub_top->str_idx + 1);
2767146040Stjr	      if (sub_top->path == NULL)
2768146040Stjr		return REG_ESPACE;
2769146040Stjr	    }
2770146040Stjr	  /* Can the OP_OPEN_SUBEXP node arrive the OP_CLOSE_SUBEXP node
2771146040Stjr	     in the current context?  */
2772146040Stjr	  err = check_arrival (mctx, sub_top->path, sub_top->node,
2773146040Stjr			       sub_top->str_idx, cls_node, sl_str, OP_CLOSE_SUBEXP);
2774146040Stjr	  if (err == REG_NOMATCH)
2775146040Stjr	      continue;
2776146040Stjr	  if (BE (err != REG_NOERROR, 0))
2777146040Stjr	      return err;
2778146040Stjr	  sub_last = match_ctx_add_sublast (sub_top, cls_node, sl_str);
2779146040Stjr	  if (BE (sub_last == NULL, 0))
2780146040Stjr	    return REG_ESPACE;
2781146040Stjr	  err = get_subexp_sub (mctx, sub_top, sub_last, bkref_node,
2782146040Stjr				bkref_str_idx);
2783146040Stjr	  if (err == REG_NOMATCH)
2784146040Stjr	    continue;
2785146040Stjr	}
2786146040Stjr    }
2787146040Stjr  return REG_NOERROR;
2788146040Stjr}
2789146040Stjr
2790146040Stjr/* Helper functions for get_subexp().  */
2791146040Stjr
2792146040Stjr/* Check SUB_LAST can arrive to the back reference BKREF_NODE at BKREF_STR.
2793146040Stjr   If it can arrive, register the sub expression expressed with SUB_TOP
2794146040Stjr   and SUB_LAST.  */
2795146040Stjr
2796146040Stjrstatic reg_errcode_t
2797146040Stjrget_subexp_sub (mctx, sub_top, sub_last, bkref_node, bkref_str)
2798146040Stjr     re_match_context_t *mctx;
2799146040Stjr     const re_sub_match_top_t *sub_top;
2800146040Stjr     re_sub_match_last_t *sub_last;
2801146040Stjr     int bkref_node, bkref_str;
2802146040Stjr{
2803146040Stjr  reg_errcode_t err;
2804146040Stjr  int to_idx;
2805146040Stjr  /* Can the subexpression arrive the back reference?  */
2806146040Stjr  err = check_arrival (mctx, &sub_last->path, sub_last->node,
2807146040Stjr		       sub_last->str_idx, bkref_node, bkref_str, OP_OPEN_SUBEXP);
2808146040Stjr  if (err != REG_NOERROR)
2809146040Stjr    return err;
2810146040Stjr  err = match_ctx_add_entry (mctx, bkref_node, bkref_str, sub_top->str_idx,
2811146040Stjr			     sub_last->str_idx);
2812146040Stjr  if (BE (err != REG_NOERROR, 0))
2813146040Stjr    return err;
2814146040Stjr  to_idx = bkref_str + sub_last->str_idx - sub_top->str_idx;
2815146040Stjr  return clean_state_log_if_needed (mctx, to_idx);
2816146040Stjr}
2817146040Stjr
2818146040Stjr/* Find the first node which is '(' or ')' and whose index is SUBEXP_IDX.
2819146040Stjr   Search '(' if FL_OPEN, or search ')' otherwise.
2820146040Stjr   TODO: This function isn't efficient...
2821146040Stjr	 Because there might be more than one nodes whose types are
2822146040Stjr	 OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all
2823146040Stjr	 nodes.
2824146040Stjr	 E.g. RE: (a){2}  */
2825146040Stjr
2826146040Stjrstatic int
2827146040Stjrfind_subexp_node (dfa, nodes, subexp_idx, type)
2828146040Stjr     const re_dfa_t *dfa;
2829146040Stjr     const re_node_set *nodes;
2830146040Stjr     int subexp_idx, type;
2831146040Stjr{
2832146040Stjr  int cls_idx;
2833146040Stjr  for (cls_idx = 0; cls_idx < nodes->nelem; ++cls_idx)
2834146040Stjr    {
2835146040Stjr      int cls_node = nodes->elems[cls_idx];
2836146040Stjr      const re_token_t *node = dfa->nodes + cls_node;
2837146040Stjr      if (node->type == type
2838146040Stjr	  && node->opr.idx == subexp_idx)
2839146040Stjr	return cls_node;
2840146040Stjr    }
2841146040Stjr  return -1;
2842146040Stjr}
2843146040Stjr
2844146040Stjr/* Check whether the node TOP_NODE at TOP_STR can arrive to the node
2845146040Stjr   LAST_NODE at LAST_STR.  We record the path onto PATH since it will be
2846146040Stjr   heavily reused.
2847146040Stjr   Return REG_NOERROR if it can arrive, or REG_NOMATCH otherwise.  */
2848146040Stjr
2849146040Stjrstatic reg_errcode_t
2850146040Stjrcheck_arrival (mctx, path, top_node, top_str, last_node, last_str,
2851146040Stjr	       type)
2852146040Stjr     re_match_context_t *mctx;
2853146040Stjr     state_array_t *path;
2854146040Stjr     int top_node, top_str, last_node, last_str, type;
2855146040Stjr{
2856146040Stjr  re_dfa_t *const dfa = mctx->dfa;
2857146040Stjr  reg_errcode_t err;
2858146040Stjr  int subexp_num, backup_cur_idx, str_idx, null_cnt;
2859146040Stjr  re_dfastate_t *cur_state = NULL;
2860146040Stjr  re_node_set *cur_nodes, next_nodes;
2861146040Stjr  re_dfastate_t **backup_state_log;
2862146040Stjr  unsigned int context;
2863146040Stjr
2864146040Stjr  subexp_num = dfa->nodes[top_node].opr.idx;
2865146040Stjr  /* Extend the buffer if we need.  */
2866146040Stjr  if (BE (path->alloc < last_str + mctx->max_mb_elem_len + 1, 0))
2867146040Stjr    {
2868146040Stjr      re_dfastate_t **new_array;
2869146040Stjr      int old_alloc = path->alloc;
2870146040Stjr      path->alloc += last_str + mctx->max_mb_elem_len + 1;
2871146040Stjr      new_array = re_realloc (path->array, re_dfastate_t *, path->alloc);
2872146040Stjr      if (new_array == NULL)
2873146040Stjr	{
2874146040Stjr	  path->alloc = old_alloc;
2875146040Stjr	  return REG_ESPACE;
2876146040Stjr	}
2877146040Stjr      path->array = new_array;
2878146040Stjr      memset (new_array + old_alloc, '\0',
2879146040Stjr	      sizeof (re_dfastate_t *) * (path->alloc - old_alloc));
2880146040Stjr    }
2881146040Stjr
2882146040Stjr  str_idx = path->next_idx == 0 ? top_str : path->next_idx;
2883146040Stjr
2884146040Stjr  /* Temporary modify MCTX.  */
2885146040Stjr  backup_state_log = mctx->state_log;
2886146040Stjr  backup_cur_idx = mctx->input.cur_idx;
2887146040Stjr  mctx->state_log = path->array;
2888146040Stjr  mctx->input.cur_idx = str_idx;
2889146040Stjr
2890146040Stjr  /* Setup initial node set.  */
2891146040Stjr  context = re_string_context_at (&mctx->input, str_idx - 1, mctx->eflags);
2892146040Stjr  if (str_idx == top_str)
2893146040Stjr    {
2894146040Stjr      err = re_node_set_init_1 (&next_nodes, top_node);
2895146040Stjr      if (BE (err != REG_NOERROR, 0))
2896146040Stjr	return err;
2897146040Stjr      err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, type);
2898146040Stjr      if (BE (err != REG_NOERROR, 0))
2899146040Stjr	{
2900146040Stjr	  re_node_set_free (&next_nodes);
2901146040Stjr	  return err;
2902146040Stjr	}
2903146040Stjr    }
2904146040Stjr  else
2905146040Stjr    {
2906146040Stjr      cur_state = mctx->state_log[str_idx];
2907146040Stjr      if (cur_state && cur_state->has_backref)
2908146040Stjr	{
2909146040Stjr	  err = re_node_set_init_copy (&next_nodes, &cur_state->nodes);
2910146040Stjr	  if (BE ( err != REG_NOERROR, 0))
2911146040Stjr	    return err;
2912146040Stjr	}
2913146040Stjr      else
2914146040Stjr	re_node_set_init_empty (&next_nodes);
2915146040Stjr    }
2916146040Stjr  if (str_idx == top_str || (cur_state && cur_state->has_backref))
2917146040Stjr    {
2918146040Stjr      if (next_nodes.nelem)
2919146040Stjr	{
2920146040Stjr	  err = expand_bkref_cache (mctx, &next_nodes, str_idx,
2921146040Stjr				    subexp_num, type);
2922146040Stjr	  if (BE ( err != REG_NOERROR, 0))
2923146040Stjr	    {
2924146040Stjr	      re_node_set_free (&next_nodes);
2925146040Stjr	      return err;
2926146040Stjr	    }
2927146040Stjr	}
2928146040Stjr      cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context);
2929146040Stjr      if (BE (cur_state == NULL && err != REG_NOERROR, 0))
2930146040Stjr	{
2931146040Stjr	  re_node_set_free (&next_nodes);
2932146040Stjr	  return err;
2933146040Stjr	}
2934146040Stjr      mctx->state_log[str_idx] = cur_state;
2935146040Stjr    }
2936146040Stjr
2937146040Stjr  for (null_cnt = 0; str_idx < last_str && null_cnt <= mctx->max_mb_elem_len;)
2938146040Stjr    {
2939146040Stjr      re_node_set_empty (&next_nodes);
2940146040Stjr      if (mctx->state_log[str_idx + 1])
2941146040Stjr	{
2942146040Stjr	  err = re_node_set_merge (&next_nodes,
2943146040Stjr				   &mctx->state_log[str_idx + 1]->nodes);
2944146040Stjr	  if (BE (err != REG_NOERROR, 0))
2945146040Stjr	    {
2946146040Stjr	      re_node_set_free (&next_nodes);
2947146040Stjr	      return err;
2948146040Stjr	    }
2949146040Stjr	}
2950146040Stjr      if (cur_state)
2951146040Stjr	{
2952146040Stjr	  err = check_arrival_add_next_nodes (mctx, str_idx,
2953146040Stjr					      &cur_state->non_eps_nodes, &next_nodes);
2954146040Stjr	  if (BE (err != REG_NOERROR, 0))
2955146040Stjr	    {
2956146040Stjr	      re_node_set_free (&next_nodes);
2957146040Stjr	      return err;
2958146040Stjr	    }
2959146040Stjr	}
2960146040Stjr      ++str_idx;
2961146040Stjr      if (next_nodes.nelem)
2962146040Stjr	{
2963146040Stjr	  err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, type);
2964146040Stjr	  if (BE (err != REG_NOERROR, 0))
2965146040Stjr	    {
2966146040Stjr	      re_node_set_free (&next_nodes);
2967146040Stjr	      return err;
2968146040Stjr	    }
2969146040Stjr	  err = expand_bkref_cache (mctx, &next_nodes, str_idx,
2970146040Stjr				    subexp_num, type);
2971146040Stjr	  if (BE ( err != REG_NOERROR, 0))
2972146040Stjr	    {
2973146040Stjr	      re_node_set_free (&next_nodes);
2974146040Stjr	      return err;
2975146040Stjr	    }
2976146040Stjr	}
2977146040Stjr      context = re_string_context_at (&mctx->input, str_idx - 1, mctx->eflags);
2978146040Stjr      cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context);
2979146040Stjr      if (BE (cur_state == NULL && err != REG_NOERROR, 0))
2980146040Stjr	{
2981146040Stjr	  re_node_set_free (&next_nodes);
2982146040Stjr	  return err;
2983146040Stjr	}
2984146040Stjr      mctx->state_log[str_idx] = cur_state;
2985146040Stjr      null_cnt = cur_state == NULL ? null_cnt + 1 : 0;
2986146040Stjr    }
2987146040Stjr  re_node_set_free (&next_nodes);
2988146040Stjr  cur_nodes = (mctx->state_log[last_str] == NULL ? NULL
2989146040Stjr	       : &mctx->state_log[last_str]->nodes);
2990146040Stjr  path->next_idx = str_idx;
2991146040Stjr
2992146040Stjr  /* Fix MCTX.  */
2993146040Stjr  mctx->state_log = backup_state_log;
2994146040Stjr  mctx->input.cur_idx = backup_cur_idx;
2995146040Stjr
2996146040Stjr  /* Then check the current node set has the node LAST_NODE.  */
2997146040Stjr  if (cur_nodes != NULL && re_node_set_contains (cur_nodes, last_node))
2998146040Stjr    return REG_NOERROR;
2999146040Stjr
3000146040Stjr  return REG_NOMATCH;
3001146040Stjr}
3002146040Stjr
3003146040Stjr/* Helper functions for check_arrival.  */
3004146040Stjr
3005146040Stjr/* Calculate the destination nodes of CUR_NODES at STR_IDX, and append them
3006146040Stjr   to NEXT_NODES.
3007146040Stjr   TODO: This function is similar to the functions transit_state*(),
3008146040Stjr	 however this function has many additional works.
3009146040Stjr	 Can't we unify them?  */
3010146040Stjr
3011146040Stjrstatic reg_errcode_t
3012146040Stjrcheck_arrival_add_next_nodes (mctx, str_idx, cur_nodes, next_nodes)
3013146040Stjr     re_match_context_t *mctx;
3014146040Stjr     int str_idx;
3015146040Stjr     re_node_set *cur_nodes, *next_nodes;
3016146040Stjr{
3017146040Stjr  re_dfa_t *const dfa = mctx->dfa;
3018146040Stjr  int result;
3019146040Stjr  int cur_idx;
3020146040Stjr  reg_errcode_t err;
3021146040Stjr  re_node_set union_set;
3022146040Stjr  re_node_set_init_empty (&union_set);
3023146040Stjr  for (cur_idx = 0; cur_idx < cur_nodes->nelem; ++cur_idx)
3024146040Stjr    {
3025146040Stjr      int naccepted = 0;
3026146040Stjr      int cur_node = cur_nodes->elems[cur_idx];
3027146040Stjr#ifdef DEBUG
3028146040Stjr      re_token_type_t type = dfa->nodes[cur_node].type;
3029146040Stjr      assert (!IS_EPSILON_NODE (type));
3030146040Stjr#endif
3031146040Stjr#ifdef RE_ENABLE_I18N
3032146040Stjr      /* If the node may accept `multi byte'.  */
3033146040Stjr      if (dfa->nodes[cur_node].accept_mb)
3034146040Stjr	{
3035146040Stjr	  naccepted = check_node_accept_bytes (dfa, cur_node, &mctx->input,
3036146040Stjr					       str_idx);
3037146040Stjr	  if (naccepted > 1)
3038146040Stjr	    {
3039146040Stjr	      re_dfastate_t *dest_state;
3040146040Stjr	      int next_node = dfa->nexts[cur_node];
3041146040Stjr	      int next_idx = str_idx + naccepted;
3042146040Stjr	      dest_state = mctx->state_log[next_idx];
3043146040Stjr	      re_node_set_empty (&union_set);
3044146040Stjr	      if (dest_state)
3045146040Stjr		{
3046146040Stjr		  err = re_node_set_merge (&union_set, &dest_state->nodes);
3047146040Stjr		  if (BE (err != REG_NOERROR, 0))
3048146040Stjr		    {
3049146040Stjr		      re_node_set_free (&union_set);
3050146040Stjr		      return err;
3051146040Stjr		    }
3052146040Stjr		}
3053146040Stjr	      result = re_node_set_insert (&union_set, next_node);
3054146040Stjr	      if (BE (result < 0, 0))
3055146040Stjr		{
3056146040Stjr		  re_node_set_free (&union_set);
3057146040Stjr		  return REG_ESPACE;
3058146040Stjr		}
3059146040Stjr	      mctx->state_log[next_idx] = re_acquire_state (&err, dfa,
3060146040Stjr							    &union_set);
3061146040Stjr	      if (BE (mctx->state_log[next_idx] == NULL
3062146040Stjr		      && err != REG_NOERROR, 0))
3063146040Stjr		{
3064146040Stjr		  re_node_set_free (&union_set);
3065146040Stjr		  return err;
3066146040Stjr		}
3067146040Stjr	    }
3068146040Stjr	}
3069146040Stjr#endif /* RE_ENABLE_I18N */
3070146040Stjr      if (naccepted
3071146040Stjr	  || check_node_accept (mctx, dfa->nodes + cur_node, str_idx))
3072146040Stjr	{
3073146040Stjr	  result = re_node_set_insert (next_nodes, dfa->nexts[cur_node]);
3074146040Stjr	  if (BE (result < 0, 0))
3075146040Stjr	    {
3076146040Stjr	      re_node_set_free (&union_set);
3077146040Stjr	      return REG_ESPACE;
3078146040Stjr	    }
3079146040Stjr	}
3080146040Stjr    }
3081146040Stjr  re_node_set_free (&union_set);
3082146040Stjr  return REG_NOERROR;
3083146040Stjr}
3084146040Stjr
3085146040Stjr/* For all the nodes in CUR_NODES, add the epsilon closures of them to
3086146040Stjr   CUR_NODES, however exclude the nodes which are:
3087146040Stjr    - inside the sub expression whose number is EX_SUBEXP, if FL_OPEN.
3088146040Stjr    - out of the sub expression whose number is EX_SUBEXP, if !FL_OPEN.
3089146040Stjr*/
3090146040Stjr
3091146040Stjrstatic reg_errcode_t
3092146040Stjrcheck_arrival_expand_ecl (dfa, cur_nodes, ex_subexp, type)
3093146040Stjr     re_dfa_t *dfa;
3094146040Stjr     re_node_set *cur_nodes;
3095146040Stjr     int ex_subexp, type;
3096146040Stjr{
3097146040Stjr  reg_errcode_t err;
3098146040Stjr  int idx, outside_node;
3099146040Stjr  re_node_set new_nodes;
3100146040Stjr#ifdef DEBUG
3101146040Stjr  assert (cur_nodes->nelem);
3102146040Stjr#endif
3103146040Stjr  err = re_node_set_alloc (&new_nodes, cur_nodes->nelem);
3104146040Stjr  if (BE (err != REG_NOERROR, 0))
3105146040Stjr    return err;
3106146040Stjr  /* Create a new node set NEW_NODES with the nodes which are epsilon
3107146040Stjr     closures of the node in CUR_NODES.  */
3108146040Stjr
3109146040Stjr  for (idx = 0; idx < cur_nodes->nelem; ++idx)
3110146040Stjr    {
3111146040Stjr      int cur_node = cur_nodes->elems[idx];
3112146040Stjr      re_node_set *eclosure = dfa->eclosures + cur_node;
3113146040Stjr      outside_node = find_subexp_node (dfa, eclosure, ex_subexp, type);
3114146040Stjr      if (outside_node == -1)
3115146040Stjr	{
3116146040Stjr	  /* There are no problematic nodes, just merge them.  */
3117146040Stjr	  err = re_node_set_merge (&new_nodes, eclosure);
3118146040Stjr	  if (BE (err != REG_NOERROR, 0))
3119146040Stjr	    {
3120146040Stjr	      re_node_set_free (&new_nodes);
3121146040Stjr	      return err;
3122146040Stjr	    }
3123146040Stjr	}
3124146040Stjr      else
3125146040Stjr	{
3126146040Stjr	  /* There are problematic nodes, re-calculate incrementally.  */
3127146040Stjr	  err = check_arrival_expand_ecl_sub (dfa, &new_nodes, cur_node,
3128146040Stjr					      ex_subexp, type);
3129146040Stjr	  if (BE (err != REG_NOERROR, 0))
3130146040Stjr	    {
3131146040Stjr	      re_node_set_free (&new_nodes);
3132146040Stjr	      return err;
3133146040Stjr	    }
3134146040Stjr	}
3135146040Stjr    }
3136146040Stjr  re_node_set_free (cur_nodes);
3137146040Stjr  *cur_nodes = new_nodes;
3138146040Stjr  return REG_NOERROR;
3139146040Stjr}
3140146040Stjr
3141146040Stjr/* Helper function for check_arrival_expand_ecl.
3142146040Stjr   Check incrementally the epsilon closure of TARGET, and if it isn't
3143146040Stjr   problematic append it to DST_NODES.  */
3144146040Stjr
3145146040Stjrstatic reg_errcode_t
3146146040Stjrcheck_arrival_expand_ecl_sub (dfa, dst_nodes, target, ex_subexp, type)
3147146040Stjr     re_dfa_t *dfa;
3148146040Stjr     int target, ex_subexp, type;
3149146040Stjr     re_node_set *dst_nodes;
3150146040Stjr{
3151146040Stjr  int cur_node;
3152146040Stjr  for (cur_node = target; !re_node_set_contains (dst_nodes, cur_node);)
3153146040Stjr    {
3154146040Stjr      int err;
3155146040Stjr
3156146040Stjr      if (dfa->nodes[cur_node].type == type
3157146040Stjr	  && dfa->nodes[cur_node].opr.idx == ex_subexp)
3158146040Stjr	{
3159146040Stjr	  if (type == OP_CLOSE_SUBEXP)
3160146040Stjr	    {
3161146040Stjr	      err = re_node_set_insert (dst_nodes, cur_node);
3162146040Stjr	      if (BE (err == -1, 0))
3163146040Stjr		return REG_ESPACE;
3164146040Stjr	    }
3165146040Stjr	  break;
3166146040Stjr	}
3167146040Stjr      err = re_node_set_insert (dst_nodes, cur_node);
3168146040Stjr      if (BE (err == -1, 0))
3169146040Stjr	return REG_ESPACE;
3170146040Stjr      if (dfa->edests[cur_node].nelem == 0)
3171146040Stjr	break;
3172146040Stjr      if (dfa->edests[cur_node].nelem == 2)
3173146040Stjr	{
3174146040Stjr	  err = check_arrival_expand_ecl_sub (dfa, dst_nodes,
3175146040Stjr					      dfa->edests[cur_node].elems[1],
3176146040Stjr					      ex_subexp, type);
3177146040Stjr	  if (BE (err != REG_NOERROR, 0))
3178146040Stjr	    return err;
3179146040Stjr	}
3180146040Stjr      cur_node = dfa->edests[cur_node].elems[0];
3181146040Stjr    }
3182146040Stjr  return REG_NOERROR;
3183146040Stjr}
3184146040Stjr
3185146040Stjr
3186146040Stjr/* For all the back references in the current state, calculate the
3187146040Stjr   destination of the back references by the appropriate entry
3188146040Stjr   in MCTX->BKREF_ENTS.  */
3189146040Stjr
3190146040Stjrstatic reg_errcode_t
3191146040Stjrexpand_bkref_cache (mctx, cur_nodes, cur_str, subexp_num,
3192146040Stjr		    type)
3193146040Stjr     re_match_context_t *mctx;
3194146040Stjr     int cur_str, subexp_num, type;
3195146040Stjr     re_node_set *cur_nodes;
3196146040Stjr{
3197146040Stjr  re_dfa_t *const dfa = mctx->dfa;
3198146040Stjr  reg_errcode_t err;
3199146040Stjr  int cache_idx_start = search_cur_bkref_entry (mctx, cur_str);
3200146040Stjr  struct re_backref_cache_entry *ent;
3201146040Stjr
3202146040Stjr  if (cache_idx_start == -1)
3203146040Stjr    return REG_NOERROR;
3204146040Stjr
3205146040Stjr restart:
3206146040Stjr  ent = mctx->bkref_ents + cache_idx_start;
3207146040Stjr  do
3208146040Stjr    {
3209146040Stjr      int to_idx, next_node;
3210146040Stjr
3211146040Stjr      /* Is this entry ENT is appropriate?  */
3212146040Stjr      if (!re_node_set_contains (cur_nodes, ent->node))
3213146040Stjr	continue; /* No.  */
3214146040Stjr
3215146040Stjr      to_idx = cur_str + ent->subexp_to - ent->subexp_from;
3216146040Stjr      /* Calculate the destination of the back reference, and append it
3217146040Stjr	 to MCTX->STATE_LOG.  */
3218146040Stjr      if (to_idx == cur_str)
3219146040Stjr	{
3220146040Stjr	  /* The backreference did epsilon transit, we must re-check all the
3221146040Stjr	     node in the current state.  */
3222146040Stjr	  re_node_set new_dests;
3223146040Stjr	  reg_errcode_t err2, err3;
3224146040Stjr	  next_node = dfa->edests[ent->node].elems[0];
3225146040Stjr	  if (re_node_set_contains (cur_nodes, next_node))
3226146040Stjr	    continue;
3227146040Stjr	  err = re_node_set_init_1 (&new_dests, next_node);
3228146040Stjr	  err2 = check_arrival_expand_ecl (dfa, &new_dests, subexp_num, type);
3229146040Stjr	  err3 = re_node_set_merge (cur_nodes, &new_dests);
3230146040Stjr	  re_node_set_free (&new_dests);
3231146040Stjr	  if (BE (err != REG_NOERROR || err2 != REG_NOERROR
3232146040Stjr		  || err3 != REG_NOERROR, 0))
3233146040Stjr	    {
3234146040Stjr	      err = (err != REG_NOERROR ? err
3235146040Stjr		     : (err2 != REG_NOERROR ? err2 : err3));
3236146040Stjr	      return err;
3237146040Stjr	    }
3238146040Stjr	  /* TODO: It is still inefficient...  */
3239146040Stjr	  goto restart;
3240146040Stjr	}
3241146040Stjr      else
3242146040Stjr	{
3243146040Stjr	  re_node_set union_set;
3244146040Stjr	  next_node = dfa->nexts[ent->node];
3245146040Stjr	  if (mctx->state_log[to_idx])
3246146040Stjr	    {
3247146040Stjr	      int ret;
3248146040Stjr	      if (re_node_set_contains (&mctx->state_log[to_idx]->nodes,
3249146040Stjr					next_node))
3250146040Stjr		continue;
3251146040Stjr	      err = re_node_set_init_copy (&union_set,
3252146040Stjr					   &mctx->state_log[to_idx]->nodes);
3253146040Stjr	      ret = re_node_set_insert (&union_set, next_node);
3254146040Stjr	      if (BE (err != REG_NOERROR || ret < 0, 0))
3255146040Stjr		{
3256146040Stjr		  re_node_set_free (&union_set);
3257146040Stjr		  err = err != REG_NOERROR ? err : REG_ESPACE;
3258146040Stjr		  return err;
3259146040Stjr		}
3260146040Stjr	    }
3261146040Stjr	  else
3262146040Stjr	    {
3263146040Stjr	      err = re_node_set_init_1 (&union_set, next_node);
3264146040Stjr	      if (BE (err != REG_NOERROR, 0))
3265146040Stjr		return err;
3266146040Stjr	    }
3267146040Stjr	  mctx->state_log[to_idx] = re_acquire_state (&err, dfa, &union_set);
3268146040Stjr	  re_node_set_free (&union_set);
3269146040Stjr	  if (BE (mctx->state_log[to_idx] == NULL
3270146040Stjr		  && err != REG_NOERROR, 0))
3271146040Stjr	    return err;
3272146040Stjr	}
3273146040Stjr    }
3274146040Stjr  while (ent++->more);
3275146040Stjr  return REG_NOERROR;
3276146040Stjr}
3277146040Stjr
3278146040Stjr/* Build transition table for the state.
3279146040Stjr   Return 1 if succeeded, otherwise return NULL.  */
3280146040Stjr
3281146040Stjrstatic int
3282146040Stjrbuild_trtable (dfa, state)
3283146040Stjr    re_dfa_t *dfa;
3284146040Stjr    re_dfastate_t *state;
3285146040Stjr{
3286146040Stjr  reg_errcode_t err;
3287146040Stjr  int i, j, ch, need_word_trtable = 0;
3288146040Stjr  unsigned int elem, mask;
3289146040Stjr  int dests_node_malloced = 0, dest_states_malloced = 0;
3290146040Stjr  int ndests; /* Number of the destination states from `state'.  */
3291146040Stjr  re_dfastate_t **trtable;
3292146040Stjr  re_dfastate_t **dest_states = NULL, **dest_states_word, **dest_states_nl;
3293146040Stjr  re_node_set follows, *dests_node;
3294146040Stjr  bitset *dests_ch;
3295146040Stjr  bitset acceptable;
3296146040Stjr
3297146040Stjr  /* We build DFA states which corresponds to the destination nodes
3298146040Stjr     from `state'.  `dests_node[i]' represents the nodes which i-th
3299146040Stjr     destination state contains, and `dests_ch[i]' represents the
3300146040Stjr     characters which i-th destination state accepts.  */
3301146040Stjr#ifdef _LIBC
3302146040Stjr  if (__libc_use_alloca ((sizeof (re_node_set) + sizeof (bitset)) * SBC_MAX))
3303146040Stjr    dests_node = (re_node_set *)
3304146040Stjr      alloca ((sizeof (re_node_set) + sizeof (bitset)) * SBC_MAX);
3305146040Stjr  else
3306146040Stjr#endif
3307146040Stjr    {
3308146040Stjr      dests_node = (re_node_set *)
3309146040Stjr	malloc ((sizeof (re_node_set) + sizeof (bitset)) * SBC_MAX);
3310146040Stjr      if (BE (dests_node == NULL, 0))
3311146040Stjr	return 0;
3312146040Stjr      dests_node_malloced = 1;
3313146040Stjr    }
3314146040Stjr  dests_ch = (bitset *) (dests_node + SBC_MAX);
3315146040Stjr
3316146040Stjr  /* Initialize transiton table.  */
3317146040Stjr  state->word_trtable = state->trtable = NULL;
3318146040Stjr
3319146040Stjr  /* At first, group all nodes belonging to `state' into several
3320146040Stjr     destinations.  */
3321146040Stjr  ndests = group_nodes_into_DFAstates (dfa, state, dests_node, dests_ch);
3322146040Stjr  if (BE (ndests <= 0, 0))
3323146040Stjr    {
3324146040Stjr      if (dests_node_malloced)
3325146040Stjr	free (dests_node);
3326146040Stjr      /* Return 0 in case of an error, 1 otherwise.  */
3327146040Stjr      if (ndests == 0)
3328146040Stjr	{
3329146040Stjr	  state->trtable = (re_dfastate_t **)
3330146040Stjr	    calloc (sizeof (re_dfastate_t *), SBC_MAX);
3331146040Stjr	  return 1;
3332146040Stjr	}
3333146040Stjr      return 0;
3334146040Stjr    }
3335146040Stjr
3336146040Stjr  err = re_node_set_alloc (&follows, ndests + 1);
3337146040Stjr  if (BE (err != REG_NOERROR, 0))
3338146040Stjr    goto out_free;
3339146040Stjr
3340146040Stjr#ifdef _LIBC
3341146040Stjr  if (__libc_use_alloca ((sizeof (re_node_set) + sizeof (bitset)) * SBC_MAX
3342146040Stjr			 + ndests * 3 * sizeof (re_dfastate_t *)))
3343146040Stjr    dest_states = (re_dfastate_t **)
3344146040Stjr      alloca (ndests * 3 * sizeof (re_dfastate_t *));
3345146040Stjr  else
3346146040Stjr#endif
3347146040Stjr    {
3348146040Stjr      dest_states = (re_dfastate_t **)
3349146040Stjr	malloc (ndests * 3 * sizeof (re_dfastate_t *));
3350146040Stjr      if (BE (dest_states == NULL, 0))
3351146040Stjr	{
3352146040Stjrout_free:
3353146040Stjr	  if (dest_states_malloced)
3354146040Stjr	    free (dest_states);
3355146040Stjr	  re_node_set_free (&follows);
3356146040Stjr	  for (i = 0; i < ndests; ++i)
3357146040Stjr	    re_node_set_free (dests_node + i);
3358146040Stjr	  if (dests_node_malloced)
3359146040Stjr	    free (dests_node);
3360146040Stjr	  return 0;
3361146040Stjr	}
3362146040Stjr      dest_states_malloced = 1;
3363146040Stjr    }
3364146040Stjr  dest_states_word = dest_states + ndests;
3365146040Stjr  dest_states_nl = dest_states_word + ndests;
3366146040Stjr  bitset_empty (acceptable);
3367146040Stjr
3368146040Stjr  /* Then build the states for all destinations.  */
3369146040Stjr  for (i = 0; i < ndests; ++i)
3370146040Stjr    {
3371146040Stjr      int next_node;
3372146040Stjr      re_node_set_empty (&follows);
3373146040Stjr      /* Merge the follows of this destination states.  */
3374146040Stjr      for (j = 0; j < dests_node[i].nelem; ++j)
3375146040Stjr	{
3376146040Stjr	  next_node = dfa->nexts[dests_node[i].elems[j]];
3377146040Stjr	  if (next_node != -1)
3378146040Stjr	    {
3379146040Stjr	      err = re_node_set_merge (&follows, dfa->eclosures + next_node);
3380146040Stjr	      if (BE (err != REG_NOERROR, 0))
3381146040Stjr		goto out_free;
3382146040Stjr	    }
3383146040Stjr	}
3384146040Stjr      dest_states[i] = re_acquire_state_context (&err, dfa, &follows, 0);
3385146040Stjr      if (BE (dest_states[i] == NULL && err != REG_NOERROR, 0))
3386146040Stjr	goto out_free;
3387146040Stjr      /* If the new state has context constraint,
3388146040Stjr	 build appropriate states for these contexts.  */
3389146040Stjr      if (dest_states[i]->has_constraint)
3390146040Stjr	{
3391146040Stjr	  dest_states_word[i] = re_acquire_state_context (&err, dfa, &follows,
3392146040Stjr							  CONTEXT_WORD);
3393146040Stjr	  if (BE (dest_states_word[i] == NULL && err != REG_NOERROR, 0))
3394146040Stjr	    goto out_free;
3395146040Stjr
3396146040Stjr	  if (dest_states[i] != dest_states_word[i] && dfa->mb_cur_max > 1)
3397146040Stjr	    need_word_trtable = 1;
3398146040Stjr
3399146040Stjr	  dest_states_nl[i] = re_acquire_state_context (&err, dfa, &follows,
3400146040Stjr							CONTEXT_NEWLINE);
3401146040Stjr	  if (BE (dest_states_nl[i] == NULL && err != REG_NOERROR, 0))
3402146040Stjr	    goto out_free;
3403146040Stjr 	}
3404146040Stjr      else
3405146040Stjr	{
3406146040Stjr	  dest_states_word[i] = dest_states[i];
3407146040Stjr	  dest_states_nl[i] = dest_states[i];
3408146040Stjr	}
3409146040Stjr      bitset_merge (acceptable, dests_ch[i]);
3410146040Stjr    }
3411146040Stjr
3412146040Stjr  if (!BE (need_word_trtable, 0))
3413146040Stjr    {
3414146040Stjr      /* We don't care about whether the following character is a word
3415146040Stjr	 character, or we are in a single-byte character set so we can
3416146040Stjr	 discern by looking at the character code: allocate a
3417146040Stjr	 256-entry transition table.  */
3418146040Stjr      trtable = state->trtable =
3419146040Stjr	(re_dfastate_t **) calloc (sizeof (re_dfastate_t *), SBC_MAX);
3420146040Stjr      if (BE (trtable == NULL, 0))
3421146040Stjr	goto out_free;
3422146040Stjr
3423146040Stjr      /* For all characters ch...:  */
3424146040Stjr      for (i = 0; i < BITSET_UINTS; ++i)
3425146040Stjr	for (ch = i * UINT_BITS, elem = acceptable[i], mask = 1;
3426146040Stjr	     elem;
3427146040Stjr	     mask <<= 1, elem >>= 1, ++ch)
3428146040Stjr	  if (BE (elem & 1, 0))
3429146040Stjr	    {
3430146040Stjr	      /* There must be exactly one destination which accepts
3431146040Stjr		 character ch.  See group_nodes_into_DFAstates.  */
3432146040Stjr	      for (j = 0; (dests_ch[j][i] & mask) == 0; ++j)
3433146040Stjr		;
3434146040Stjr
3435146040Stjr	      /* j-th destination accepts the word character ch.  */
3436146040Stjr	      if (dfa->word_char[i] & mask)
3437146040Stjr		trtable[ch] = dest_states_word[j];
3438146040Stjr	      else
3439146040Stjr		trtable[ch] = dest_states[j];
3440146040Stjr	    }
3441146040Stjr    }
3442146040Stjr  else
3443146040Stjr    {
3444146040Stjr      /* We care about whether the following character is a word
3445146040Stjr	 character, and we are in a multi-byte character set: discern
3446146040Stjr	 by looking at the character code: build two 256-entry
3447146040Stjr	 transition tables, one starting at trtable[0] and one
3448146040Stjr	 starting at trtable[SBC_MAX].  */
3449146040Stjr      trtable = state->word_trtable =
3450146040Stjr	(re_dfastate_t **) calloc (sizeof (re_dfastate_t *), 2 * SBC_MAX);
3451146040Stjr      if (BE (trtable == NULL, 0))
3452146040Stjr	goto out_free;
3453146040Stjr
3454146040Stjr      /* For all characters ch...:  */
3455146040Stjr      for (i = 0; i < BITSET_UINTS; ++i)
3456146040Stjr	for (ch = i * UINT_BITS, elem = acceptable[i], mask = 1;
3457146040Stjr	     elem;
3458146040Stjr	     mask <<= 1, elem >>= 1, ++ch)
3459146040Stjr	  if (BE (elem & 1, 0))
3460146040Stjr	    {
3461146040Stjr	      /* There must be exactly one destination which accepts
3462146040Stjr		 character ch.  See group_nodes_into_DFAstates.  */
3463146040Stjr	      for (j = 0; (dests_ch[j][i] & mask) == 0; ++j)
3464146040Stjr		;
3465146040Stjr
3466146040Stjr	      /* j-th destination accepts the word character ch.  */
3467146040Stjr	      trtable[ch] = dest_states[j];
3468146040Stjr	      trtable[ch + SBC_MAX] = dest_states_word[j];
3469146040Stjr	    }
3470146040Stjr    }
3471146040Stjr
3472146040Stjr  /* new line */
3473146040Stjr  if (bitset_contain (acceptable, NEWLINE_CHAR))
3474146040Stjr    {
3475146040Stjr      /* The current state accepts newline character.  */
3476146040Stjr      for (j = 0; j < ndests; ++j)
3477146040Stjr	if (bitset_contain (dests_ch[j], NEWLINE_CHAR))
3478146040Stjr	  {
3479146040Stjr	    /* k-th destination accepts newline character.  */
3480146040Stjr	    trtable[NEWLINE_CHAR] = dest_states_nl[j];
3481146040Stjr	    if (need_word_trtable)
3482146040Stjr	      trtable[NEWLINE_CHAR + SBC_MAX] = dest_states_nl[j];
3483146040Stjr	    /* There must be only one destination which accepts
3484146040Stjr	       newline.  See group_nodes_into_DFAstates.  */
3485146040Stjr	    break;
3486146040Stjr	  }
3487146040Stjr    }
3488146040Stjr
3489146040Stjr  if (dest_states_malloced)
3490146040Stjr    free (dest_states);
3491146040Stjr
3492146040Stjr  re_node_set_free (&follows);
3493146040Stjr  for (i = 0; i < ndests; ++i)
3494146040Stjr    re_node_set_free (dests_node + i);
3495146040Stjr
3496146040Stjr  if (dests_node_malloced)
3497146040Stjr    free (dests_node);
3498146040Stjr
3499146040Stjr  return 1;
3500146040Stjr}
3501146040Stjr
3502146040Stjr/* Group all nodes belonging to STATE into several destinations.
3503146040Stjr   Then for all destinations, set the nodes belonging to the destination
3504146040Stjr   to DESTS_NODE[i] and set the characters accepted by the destination
3505146040Stjr   to DEST_CH[i].  This function return the number of destinations.  */
3506146040Stjr
3507146040Stjrstatic int
3508146040Stjrgroup_nodes_into_DFAstates (dfa, state, dests_node, dests_ch)
3509146040Stjr    re_dfa_t *dfa;
3510146040Stjr    const re_dfastate_t *state;
3511146040Stjr    re_node_set *dests_node;
3512146040Stjr    bitset *dests_ch;
3513146040Stjr{
3514146040Stjr  reg_errcode_t err;
3515146040Stjr  int result;
3516146040Stjr  int i, j, k;
3517146040Stjr  int ndests; /* Number of the destinations from `state'.  */
3518146040Stjr  bitset accepts; /* Characters a node can accept.  */
3519146040Stjr  const re_node_set *cur_nodes = &state->nodes;
3520146040Stjr  bitset_empty (accepts);
3521146040Stjr  ndests = 0;
3522146040Stjr
3523146040Stjr  /* For all the nodes belonging to `state',  */
3524146040Stjr  for (i = 0; i < cur_nodes->nelem; ++i)
3525146040Stjr    {
3526146040Stjr      re_token_t *node = &dfa->nodes[cur_nodes->elems[i]];
3527146040Stjr      re_token_type_t type = node->type;
3528146040Stjr      unsigned int constraint = node->constraint;
3529146040Stjr
3530146040Stjr      /* Enumerate all single byte character this node can accept.  */
3531146040Stjr      if (type == CHARACTER)
3532146040Stjr	bitset_set (accepts, node->opr.c);
3533146040Stjr      else if (type == SIMPLE_BRACKET)
3534146040Stjr	{
3535146040Stjr	  bitset_merge (accepts, node->opr.sbcset);
3536146040Stjr	}
3537146040Stjr      else if (type == OP_PERIOD)
3538146040Stjr	{
3539146040Stjr#ifdef RE_ENABLE_I18N
3540146040Stjr	  if (dfa->mb_cur_max > 1)
3541146040Stjr	    bitset_merge (accepts, dfa->sb_char);
3542146040Stjr	  else
3543146040Stjr#endif
3544146040Stjr	    bitset_set_all (accepts);
3545146040Stjr	  if (!(dfa->syntax & RE_DOT_NEWLINE))
3546146040Stjr	    bitset_clear (accepts, '\n');
3547146040Stjr	  if (dfa->syntax & RE_DOT_NOT_NULL)
3548146040Stjr	    bitset_clear (accepts, '\0');
3549146040Stjr	}
3550146040Stjr#ifdef RE_ENABLE_I18N
3551146040Stjr      else if (type == OP_UTF8_PERIOD)
3552146040Stjr        {
3553146040Stjr	  memset (accepts, 255, sizeof (unsigned int) * BITSET_UINTS / 2);
3554146040Stjr	  if (!(dfa->syntax & RE_DOT_NEWLINE))
3555146040Stjr	    bitset_clear (accepts, '\n');
3556146040Stjr	  if (dfa->syntax & RE_DOT_NOT_NULL)
3557146040Stjr	    bitset_clear (accepts, '\0');
3558146040Stjr        }
3559146040Stjr#endif
3560146040Stjr      else
3561146040Stjr	continue;
3562146040Stjr
3563146040Stjr      /* Check the `accepts' and sift the characters which are not
3564146040Stjr	 match it the context.  */
3565146040Stjr      if (constraint)
3566146040Stjr	{
3567146040Stjr	  if (constraint & NEXT_NEWLINE_CONSTRAINT)
3568146040Stjr	    {
3569146040Stjr	      int accepts_newline = bitset_contain (accepts, NEWLINE_CHAR);
3570146040Stjr	      bitset_empty (accepts);
3571146040Stjr	      if (accepts_newline)
3572146040Stjr		bitset_set (accepts, NEWLINE_CHAR);
3573146040Stjr	      else
3574146040Stjr		continue;
3575146040Stjr	    }
3576146040Stjr	  if (constraint & NEXT_ENDBUF_CONSTRAINT)
3577146040Stjr	    {
3578146040Stjr	      bitset_empty (accepts);
3579146040Stjr	      continue;
3580146040Stjr	    }
3581146040Stjr
3582146040Stjr	  if (constraint & NEXT_WORD_CONSTRAINT)
3583146040Stjr	    {
3584146040Stjr	      unsigned int any_set = 0;
3585146040Stjr	      if (type == CHARACTER && !node->word_char)
3586146040Stjr		{
3587146040Stjr		  bitset_empty (accepts);
3588146040Stjr		  continue;
3589146040Stjr		}
3590146040Stjr#ifdef RE_ENABLE_I18N
3591146040Stjr	      if (dfa->mb_cur_max > 1)
3592146040Stjr		for (j = 0; j < BITSET_UINTS; ++j)
3593146040Stjr		  any_set |= (accepts[j] &= (dfa->word_char[j] | ~dfa->sb_char[j]));
3594146040Stjr	      else
3595146040Stjr#endif
3596146040Stjr		for (j = 0; j < BITSET_UINTS; ++j)
3597146040Stjr		  any_set |= (accepts[j] &= dfa->word_char[j]);
3598146040Stjr	      if (!any_set)
3599146040Stjr		continue;
3600146040Stjr	    }
3601146040Stjr	  if (constraint & NEXT_NOTWORD_CONSTRAINT)
3602146040Stjr	    {
3603146040Stjr	      unsigned int any_set = 0;
3604146040Stjr	      if (type == CHARACTER && node->word_char)
3605146040Stjr		{
3606146040Stjr		  bitset_empty (accepts);
3607146040Stjr		  continue;
3608146040Stjr		}
3609146040Stjr#ifdef RE_ENABLE_I18N
3610146040Stjr	      if (dfa->mb_cur_max > 1)
3611146040Stjr		for (j = 0; j < BITSET_UINTS; ++j)
3612146040Stjr		  any_set |= (accepts[j] &= ~(dfa->word_char[j] & dfa->sb_char[j]));
3613146040Stjr	      else
3614146040Stjr#endif
3615146040Stjr		for (j = 0; j < BITSET_UINTS; ++j)
3616146040Stjr		  any_set |= (accepts[j] &= ~dfa->word_char[j]);
3617146040Stjr	      if (!any_set)
3618146040Stjr		continue;
3619146040Stjr	    }
3620146040Stjr	}
3621146040Stjr
3622146040Stjr      /* Then divide `accepts' into DFA states, or create a new
3623146040Stjr	 state.  Above, we make sure that accepts is not empty.  */
3624146040Stjr      for (j = 0; j < ndests; ++j)
3625146040Stjr	{
3626146040Stjr	  bitset intersec; /* Intersection sets, see below.  */
3627146040Stjr	  bitset remains;
3628146040Stjr	  /* Flags, see below.  */
3629146040Stjr	  int has_intersec, not_subset, not_consumed;
3630146040Stjr
3631146040Stjr	  /* Optimization, skip if this state doesn't accept the character.  */
3632146040Stjr	  if (type == CHARACTER && !bitset_contain (dests_ch[j], node->opr.c))
3633146040Stjr	    continue;
3634146040Stjr
3635146040Stjr	  /* Enumerate the intersection set of this state and `accepts'.  */
3636146040Stjr	  has_intersec = 0;
3637146040Stjr	  for (k = 0; k < BITSET_UINTS; ++k)
3638146040Stjr	    has_intersec |= intersec[k] = accepts[k] & dests_ch[j][k];
3639146040Stjr	  /* And skip if the intersection set is empty.  */
3640146040Stjr	  if (!has_intersec)
3641146040Stjr	    continue;
3642146040Stjr
3643146040Stjr	  /* Then check if this state is a subset of `accepts'.  */
3644146040Stjr	  not_subset = not_consumed = 0;
3645146040Stjr	  for (k = 0; k < BITSET_UINTS; ++k)
3646146040Stjr	    {
3647146040Stjr	      not_subset |= remains[k] = ~accepts[k] & dests_ch[j][k];
3648146040Stjr	      not_consumed |= accepts[k] = accepts[k] & ~dests_ch[j][k];
3649146040Stjr	    }
3650146040Stjr
3651146040Stjr	  /* If this state isn't a subset of `accepts', create a
3652146040Stjr	     new group state, which has the `remains'. */
3653146040Stjr	  if (not_subset)
3654146040Stjr	    {
3655146040Stjr	      bitset_copy (dests_ch[ndests], remains);
3656146040Stjr	      bitset_copy (dests_ch[j], intersec);
3657146040Stjr	      err = re_node_set_init_copy (dests_node + ndests, &dests_node[j]);
3658146040Stjr	      if (BE (err != REG_NOERROR, 0))
3659146040Stjr		goto error_return;
3660146040Stjr	      ++ndests;
3661146040Stjr	    }
3662146040Stjr
3663146040Stjr	  /* Put the position in the current group. */
3664146040Stjr	  result = re_node_set_insert (&dests_node[j], cur_nodes->elems[i]);
3665146040Stjr	  if (BE (result < 0, 0))
3666146040Stjr	    goto error_return;
3667146040Stjr
3668146040Stjr	  /* If all characters are consumed, go to next node. */
3669146040Stjr	  if (!not_consumed)
3670146040Stjr	    break;
3671146040Stjr	}
3672146040Stjr      /* Some characters remain, create a new group. */
3673146040Stjr      if (j == ndests)
3674146040Stjr	{
3675146040Stjr	  bitset_copy (dests_ch[ndests], accepts);
3676146040Stjr	  err = re_node_set_init_1 (dests_node + ndests, cur_nodes->elems[i]);
3677146040Stjr	  if (BE (err != REG_NOERROR, 0))
3678146040Stjr	    goto error_return;
3679146040Stjr	  ++ndests;
3680146040Stjr	  bitset_empty (accepts);
3681146040Stjr	}
3682146040Stjr    }
3683146040Stjr  return ndests;
3684146040Stjr error_return:
3685146040Stjr  for (j = 0; j < ndests; ++j)
3686146040Stjr    re_node_set_free (dests_node + j);
3687146040Stjr  return -1;
3688146040Stjr}
3689146040Stjr
3690146040Stjr#ifdef RE_ENABLE_I18N
3691146040Stjr/* Check how many bytes the node `dfa->nodes[node_idx]' accepts.
3692146040Stjr   Return the number of the bytes the node accepts.
3693146040Stjr   STR_IDX is the current index of the input string.
3694146040Stjr
3695146040Stjr   This function handles the nodes which can accept one character, or
3696146040Stjr   one collating element like '.', '[a-z]', opposite to the other nodes
3697146040Stjr   can only accept one byte.  */
3698146040Stjr
3699146040Stjrstatic int
3700146040Stjrcheck_node_accept_bytes (dfa, node_idx, input, str_idx)
3701146040Stjr    re_dfa_t *dfa;
3702146040Stjr    int node_idx, str_idx;
3703146040Stjr    const re_string_t *input;
3704146040Stjr{
3705146040Stjr  const re_token_t *node = dfa->nodes + node_idx;
3706146040Stjr  int char_len, elem_len;
3707146040Stjr  int i;
3708146040Stjr
3709146040Stjr  if (BE (node->type == OP_UTF8_PERIOD, 0))
3710146040Stjr    {
3711146040Stjr      unsigned char c = re_string_byte_at (input, str_idx), d;
3712146040Stjr      if (BE (c < 0xc2, 1))
3713146040Stjr	return 0;
3714146040Stjr
3715146040Stjr      if (str_idx + 2 > input->len)
3716146040Stjr	return 0;
3717146040Stjr
3718146040Stjr      d = re_string_byte_at (input, str_idx + 1);
3719146040Stjr      if (c < 0xe0)
3720146040Stjr	return (d < 0x80 || d > 0xbf) ? 0 : 2;
3721146040Stjr      else if (c < 0xf0)
3722146040Stjr	{
3723146040Stjr	  char_len = 3;
3724146040Stjr	  if (c == 0xe0 && d < 0xa0)
3725146040Stjr	    return 0;
3726146040Stjr	}
3727146040Stjr      else if (c < 0xf8)
3728146040Stjr	{
3729146040Stjr	  char_len = 4;
3730146040Stjr	  if (c == 0xf0 && d < 0x90)
3731146040Stjr	    return 0;
3732146040Stjr	}
3733146040Stjr      else if (c < 0xfc)
3734146040Stjr	{
3735146040Stjr	  char_len = 5;
3736146040Stjr	  if (c == 0xf8 && d < 0x88)
3737146040Stjr	    return 0;
3738146040Stjr	}
3739146040Stjr      else if (c < 0xfe)
3740146040Stjr	{
3741146040Stjr	  char_len = 6;
3742146040Stjr	  if (c == 0xfc && d < 0x84)
3743146040Stjr	    return 0;
3744146040Stjr	}
3745146040Stjr      else
3746146040Stjr	return 0;
3747146040Stjr
3748146040Stjr      if (str_idx + char_len > input->len)
3749146040Stjr	return 0;
3750146040Stjr
3751146040Stjr      for (i = 1; i < char_len; ++i)
3752146040Stjr	{
3753146040Stjr	  d = re_string_byte_at (input, str_idx + i);
3754146040Stjr	  if (d < 0x80 || d > 0xbf)
3755146040Stjr	    return 0;
3756146040Stjr	}
3757146040Stjr      return char_len;
3758146040Stjr    }
3759146040Stjr
3760146040Stjr  char_len = re_string_char_size_at (input, str_idx);
3761146040Stjr  if (node->type == OP_PERIOD)
3762146040Stjr    {
3763146040Stjr      if (char_len <= 1)
3764146040Stjr        return 0;
3765146040Stjr      /* FIXME: I don't think this if is needed, as both '\n'
3766146040Stjr	 and '\0' are char_len == 1.  */
3767146040Stjr      /* '.' accepts any one character except the following two cases.  */
3768146040Stjr      if ((!(dfa->syntax & RE_DOT_NEWLINE) &&
3769146040Stjr	   re_string_byte_at (input, str_idx) == '\n') ||
3770146040Stjr	  ((dfa->syntax & RE_DOT_NOT_NULL) &&
3771146040Stjr	   re_string_byte_at (input, str_idx) == '\0'))
3772146040Stjr	return 0;
3773146040Stjr      return char_len;
3774146040Stjr    }
3775146040Stjr
3776146040Stjr  elem_len = re_string_elem_size_at (input, str_idx);
3777146040Stjr  if ((elem_len <= 1 && char_len <= 1) || char_len == 0)
3778146040Stjr    return 0;
3779146040Stjr
3780146040Stjr  if (node->type == COMPLEX_BRACKET)
3781146040Stjr    {
3782146040Stjr      const re_charset_t *cset = node->opr.mbcset;
3783146040Stjr# ifdef _LIBC
3784146040Stjr      const unsigned char *pin
3785146040Stjr	= ((const unsigned char *) re_string_get_buffer (input) + str_idx);
3786146040Stjr      int j;
3787146040Stjr      uint32_t nrules;
3788146040Stjr# endif /* _LIBC */
3789146040Stjr      int match_len = 0;
3790146040Stjr      wchar_t wc = ((cset->nranges || cset->nchar_classes || cset->nmbchars)
3791146040Stjr		    ? re_string_wchar_at (input, str_idx) : 0);
3792146040Stjr
3793146040Stjr      /* match with multibyte character?  */
3794146040Stjr      for (i = 0; i < cset->nmbchars; ++i)
3795146040Stjr	if (wc == cset->mbchars[i])
3796146040Stjr	  {
3797146040Stjr	    match_len = char_len;
3798146040Stjr	    goto check_node_accept_bytes_match;
3799146040Stjr	  }
3800146040Stjr      /* match with character_class?  */
3801146040Stjr      for (i = 0; i < cset->nchar_classes; ++i)
3802146040Stjr	{
3803146040Stjr	  wctype_t wt = cset->char_classes[i];
3804146040Stjr	  if (__iswctype (wc, wt))
3805146040Stjr	    {
3806146040Stjr	      match_len = char_len;
3807146040Stjr	      goto check_node_accept_bytes_match;
3808146040Stjr	    }
3809146040Stjr	}
3810146040Stjr
3811146040Stjr# ifdef _LIBC
3812146040Stjr      nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
3813146040Stjr      if (nrules != 0)
3814146040Stjr	{
3815146040Stjr	  unsigned int in_collseq = 0;
3816146040Stjr	  const int32_t *table, *indirect;
3817146040Stjr	  const unsigned char *weights, *extra;
3818146040Stjr	  const char *collseqwc;
3819146040Stjr	  int32_t idx;
3820146040Stjr	  /* This #include defines a local function!  */
3821146040Stjr#  include <locale/weight.h>
3822146040Stjr
3823146040Stjr	  /* match with collating_symbol?  */
3824146040Stjr	  if (cset->ncoll_syms)
3825146040Stjr	    extra = (const unsigned char *)
3826146040Stjr	      _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
3827146040Stjr	  for (i = 0; i < cset->ncoll_syms; ++i)
3828146040Stjr	    {
3829146040Stjr	      const unsigned char *coll_sym = extra + cset->coll_syms[i];
3830146040Stjr	      /* Compare the length of input collating element and
3831146040Stjr		 the length of current collating element.  */
3832146040Stjr	      if (*coll_sym != elem_len)
3833146040Stjr		continue;
3834146040Stjr	      /* Compare each bytes.  */
3835146040Stjr	      for (j = 0; j < *coll_sym; j++)
3836146040Stjr		if (pin[j] != coll_sym[1 + j])
3837146040Stjr		  break;
3838146040Stjr	      if (j == *coll_sym)
3839146040Stjr		{
3840146040Stjr		  /* Match if every bytes is equal.  */
3841146040Stjr		  match_len = j;
3842146040Stjr		  goto check_node_accept_bytes_match;
3843146040Stjr		}
3844146040Stjr	    }
3845146040Stjr
3846146040Stjr	  if (cset->nranges)
3847146040Stjr	    {
3848146040Stjr	      if (elem_len <= char_len)
3849146040Stjr		{
3850146040Stjr		  collseqwc = _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQWC);
3851146040Stjr		  in_collseq = __collseq_table_lookup (collseqwc, wc);
3852146040Stjr		}
3853146040Stjr	      else
3854146040Stjr		in_collseq = find_collation_sequence_value (pin, elem_len);
3855146040Stjr	    }
3856146040Stjr	  /* match with range expression?  */
3857146040Stjr	  for (i = 0; i < cset->nranges; ++i)
3858146040Stjr	    if (cset->range_starts[i] <= in_collseq
3859146040Stjr		&& in_collseq <= cset->range_ends[i])
3860146040Stjr	      {
3861146040Stjr		match_len = elem_len;
3862146040Stjr		goto check_node_accept_bytes_match;
3863146040Stjr	      }
3864146040Stjr
3865146040Stjr	  /* match with equivalence_class?  */
3866146040Stjr	  if (cset->nequiv_classes)
3867146040Stjr	    {
3868146040Stjr	      const unsigned char *cp = pin;
3869146040Stjr	      table = (const int32_t *)
3870146040Stjr		_NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
3871146040Stjr	      weights = (const unsigned char *)
3872146040Stjr		_NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
3873146040Stjr	      extra = (const unsigned char *)
3874146040Stjr		_NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
3875146040Stjr	      indirect = (const int32_t *)
3876146040Stjr		_NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
3877146040Stjr	      idx = findidx (&cp);
3878146040Stjr	      if (idx > 0)
3879146040Stjr		for (i = 0; i < cset->nequiv_classes; ++i)
3880146040Stjr		  {
3881146040Stjr		    int32_t equiv_class_idx = cset->equiv_classes[i];
3882146040Stjr		    size_t weight_len = weights[idx];
3883146040Stjr		    if (weight_len == weights[equiv_class_idx])
3884146040Stjr		      {
3885146040Stjr			int cnt = 0;
3886146040Stjr			while (cnt <= weight_len
3887146040Stjr			       && (weights[equiv_class_idx + 1 + cnt]
3888146040Stjr				   == weights[idx + 1 + cnt]))
3889146040Stjr			  ++cnt;
3890146040Stjr			if (cnt > weight_len)
3891146040Stjr			  {
3892146040Stjr			    match_len = elem_len;
3893146040Stjr			    goto check_node_accept_bytes_match;
3894146040Stjr			  }
3895146040Stjr		      }
3896146040Stjr		  }
3897146040Stjr	    }
3898146040Stjr	}
3899146040Stjr      else
3900146040Stjr# endif /* _LIBC */
3901146040Stjr	{
3902146040Stjr	  /* match with range expression?  */
3903146040Stjr#if __GNUC__ >= 2
3904146040Stjr	  wchar_t cmp_buf[] = {L'\0', L'\0', wc, L'\0', L'\0', L'\0'};
3905146040Stjr#else
3906146040Stjr	  wchar_t cmp_buf[] = {L'\0', L'\0', L'\0', L'\0', L'\0', L'\0'};
3907146040Stjr	  cmp_buf[2] = wc;
3908146040Stjr#endif
3909146040Stjr	  for (i = 0; i < cset->nranges; ++i)
3910146040Stjr	    {
3911146040Stjr	      cmp_buf[0] = cset->range_starts[i];
3912146040Stjr	      cmp_buf[4] = cset->range_ends[i];
3913146040Stjr	      if (wcscoll (cmp_buf, cmp_buf + 2) <= 0
3914146040Stjr		  && wcscoll (cmp_buf + 2, cmp_buf + 4) <= 0)
3915146040Stjr		{
3916146040Stjr		  match_len = char_len;
3917146040Stjr		  goto check_node_accept_bytes_match;
3918146040Stjr		}
3919146040Stjr	    }
3920146040Stjr	}
3921146040Stjr    check_node_accept_bytes_match:
3922146040Stjr      if (!cset->non_match)
3923146040Stjr	return match_len;
3924146040Stjr      else
3925146040Stjr	{
3926146040Stjr	  if (match_len > 0)
3927146040Stjr	    return 0;
3928146040Stjr	  else
3929146040Stjr	    return (elem_len > char_len) ? elem_len : char_len;
3930146040Stjr	}
3931146040Stjr    }
3932146040Stjr  return 0;
3933146040Stjr}
3934146040Stjr
3935146040Stjr# ifdef _LIBC
3936146040Stjrstatic unsigned int
3937146040Stjrfind_collation_sequence_value (mbs, mbs_len)
3938146040Stjr    const unsigned char *mbs;
3939146040Stjr    size_t mbs_len;
3940146040Stjr{
3941146040Stjr  uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
3942146040Stjr  if (nrules == 0)
3943146040Stjr    {
3944146040Stjr      if (mbs_len == 1)
3945146040Stjr	{
3946146040Stjr	  /* No valid character.  Match it as a single byte character.  */
3947146040Stjr	  const unsigned char *collseq = (const unsigned char *)
3948146040Stjr	    _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQMB);
3949146040Stjr	  return collseq[mbs[0]];
3950146040Stjr	}
3951146040Stjr      return UINT_MAX;
3952146040Stjr    }
3953146040Stjr  else
3954146040Stjr    {
3955146040Stjr      int32_t idx;
3956146040Stjr      const unsigned char *extra = (const unsigned char *)
3957146040Stjr	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
3958146040Stjr      int32_t extrasize = (const unsigned char *)
3959146040Stjr	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB + 1) - extra;
3960146040Stjr
3961146040Stjr      for (idx = 0; idx < extrasize;)
3962146040Stjr	{
3963146040Stjr	  int mbs_cnt, found = 0;
3964146040Stjr	  int32_t elem_mbs_len;
3965146040Stjr	  /* Skip the name of collating element name.  */
3966146040Stjr	  idx = idx + extra[idx] + 1;
3967146040Stjr	  elem_mbs_len = extra[idx++];
3968146040Stjr	  if (mbs_len == elem_mbs_len)
3969146040Stjr	    {
3970146040Stjr	      for (mbs_cnt = 0; mbs_cnt < elem_mbs_len; ++mbs_cnt)
3971146040Stjr		if (extra[idx + mbs_cnt] != mbs[mbs_cnt])
3972146040Stjr		  break;
3973146040Stjr	      if (mbs_cnt == elem_mbs_len)
3974146040Stjr		/* Found the entry.  */
3975146040Stjr		found = 1;
3976146040Stjr	    }
3977146040Stjr	  /* Skip the byte sequence of the collating element.  */
3978146040Stjr	  idx += elem_mbs_len;
3979146040Stjr	  /* Adjust for the alignment.  */
3980146040Stjr	  idx = (idx + 3) & ~3;
3981146040Stjr	  /* Skip the collation sequence value.  */
3982146040Stjr	  idx += sizeof (uint32_t);
3983146040Stjr	  /* Skip the wide char sequence of the collating element.  */
3984146040Stjr	  idx = idx + sizeof (uint32_t) * (extra[idx] + 1);
3985146040Stjr	  /* If we found the entry, return the sequence value.  */
3986146040Stjr	  if (found)
3987146040Stjr	    return *(uint32_t *) (extra + idx);
3988146040Stjr	  /* Skip the collation sequence value.  */
3989146040Stjr	  idx += sizeof (uint32_t);
3990146040Stjr	}
3991146040Stjr      return UINT_MAX;
3992146040Stjr    }
3993146040Stjr}
3994146040Stjr# endif /* _LIBC */
3995146040Stjr#endif /* RE_ENABLE_I18N */
3996146040Stjr
3997146040Stjr/* Check whether the node accepts the byte which is IDX-th
3998146040Stjr   byte of the INPUT.  */
3999146040Stjr
4000146040Stjrstatic int
4001146040Stjrcheck_node_accept (mctx, node, idx)
4002146040Stjr    const re_match_context_t *mctx;
4003146040Stjr    const re_token_t *node;
4004146040Stjr    int idx;
4005146040Stjr{
4006146040Stjr  unsigned char ch;
4007146040Stjr  ch = re_string_byte_at (&mctx->input, idx);
4008146040Stjr  switch (node->type)
4009146040Stjr    {
4010146040Stjr    case CHARACTER:
4011146040Stjr      if (node->opr.c != ch)
4012146040Stjr        return 0;
4013146040Stjr      break;
4014146040Stjr
4015146040Stjr    case SIMPLE_BRACKET:
4016146040Stjr      if (!bitset_contain (node->opr.sbcset, ch))
4017146040Stjr        return 0;
4018146040Stjr      break;
4019146040Stjr
4020146040Stjr#ifdef RE_ENABLE_I18N
4021146040Stjr    case OP_UTF8_PERIOD:
4022146040Stjr      if (ch >= 0x80)
4023146040Stjr        return 0;
4024146040Stjr      /* FALLTHROUGH */
4025146040Stjr#endif
4026146040Stjr    case OP_PERIOD:
4027146040Stjr      if ((ch == '\n' && !(mctx->dfa->syntax & RE_DOT_NEWLINE))
4028146040Stjr	  || (ch == '\0' && (mctx->dfa->syntax & RE_DOT_NOT_NULL)))
4029146040Stjr	return 0;
4030146040Stjr      break;
4031146040Stjr
4032146040Stjr    default:
4033146040Stjr      return 0;
4034146040Stjr    }
4035146040Stjr
4036146040Stjr  if (node->constraint)
4037146040Stjr    {
4038146040Stjr      /* The node has constraints.  Check whether the current context
4039146040Stjr	 satisfies the constraints.  */
4040146040Stjr      unsigned int context = re_string_context_at (&mctx->input, idx,
4041146040Stjr						   mctx->eflags);
4042146040Stjr      if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context))
4043146040Stjr	return 0;
4044146040Stjr    }
4045146040Stjr
4046146040Stjr  return 1;
4047146040Stjr}
4048146040Stjr
4049146040Stjr/* Extend the buffers, if the buffers have run out.  */
4050146040Stjr
4051146040Stjrstatic reg_errcode_t
4052146040Stjrextend_buffers (mctx)
4053146040Stjr     re_match_context_t *mctx;
4054146040Stjr{
4055146040Stjr  reg_errcode_t ret;
4056146040Stjr  re_string_t *pstr = &mctx->input;
4057146040Stjr
4058146040Stjr  /* Double the lengthes of the buffers.  */
4059146040Stjr  ret = re_string_realloc_buffers (pstr, pstr->bufs_len * 2);
4060146040Stjr  if (BE (ret != REG_NOERROR, 0))
4061146040Stjr    return ret;
4062146040Stjr
4063146040Stjr  if (mctx->state_log != NULL)
4064146040Stjr    {
4065146040Stjr      /* And double the length of state_log.  */
4066146040Stjr      /* XXX We have no indication of the size of this buffer.  If this
4067146040Stjr	 allocation fail we have no indication that the state_log array
4068146040Stjr	 does not have the right size.  */
4069146040Stjr      re_dfastate_t **new_array = re_realloc (mctx->state_log, re_dfastate_t *,
4070146040Stjr					      pstr->bufs_len + 1);
4071146040Stjr      if (BE (new_array == NULL, 0))
4072146040Stjr	return REG_ESPACE;
4073146040Stjr      mctx->state_log = new_array;
4074146040Stjr    }
4075146040Stjr
4076146040Stjr  /* Then reconstruct the buffers.  */
4077146040Stjr  if (pstr->icase)
4078146040Stjr    {
4079146040Stjr#ifdef RE_ENABLE_I18N
4080146040Stjr      if (pstr->mb_cur_max > 1)
4081146040Stjr	{
4082146040Stjr	  ret = build_wcs_upper_buffer (pstr);
4083146040Stjr	  if (BE (ret != REG_NOERROR, 0))
4084146040Stjr	    return ret;
4085146040Stjr	}
4086146040Stjr      else
4087146040Stjr#endif /* RE_ENABLE_I18N  */
4088146040Stjr	build_upper_buffer (pstr);
4089146040Stjr    }
4090146040Stjr  else
4091146040Stjr    {
4092146040Stjr#ifdef RE_ENABLE_I18N
4093146040Stjr      if (pstr->mb_cur_max > 1)
4094146040Stjr	build_wcs_buffer (pstr);
4095146040Stjr      else
4096146040Stjr#endif /* RE_ENABLE_I18N  */
4097146040Stjr	{
4098146040Stjr	  if (pstr->trans != NULL)
4099146040Stjr	    re_string_translate_buffer (pstr);
4100146040Stjr	}
4101146040Stjr    }
4102146040Stjr  return REG_NOERROR;
4103146040Stjr}
4104146040Stjr
4105146040Stjr
4106146040Stjr/* Functions for matching context.  */
4107146040Stjr
4108146040Stjr/* Initialize MCTX.  */
4109146040Stjr
4110146040Stjrstatic reg_errcode_t
4111146040Stjrmatch_ctx_init (mctx, eflags, n)
4112146040Stjr    re_match_context_t *mctx;
4113146040Stjr    int eflags, n;
4114146040Stjr{
4115146040Stjr  mctx->eflags = eflags;
4116146040Stjr  mctx->match_last = -1;
4117146040Stjr  if (n > 0)
4118146040Stjr    {
4119146040Stjr      mctx->bkref_ents = re_malloc (struct re_backref_cache_entry, n);
4120146040Stjr      mctx->sub_tops = re_malloc (re_sub_match_top_t *, n);
4121146040Stjr      if (BE (mctx->bkref_ents == NULL || mctx->sub_tops == NULL, 0))
4122146040Stjr	return REG_ESPACE;
4123146040Stjr    }
4124146040Stjr  /* Already zero-ed by the caller.
4125146040Stjr     else
4126146040Stjr       mctx->bkref_ents = NULL;
4127146040Stjr     mctx->nbkref_ents = 0;
4128146040Stjr     mctx->nsub_tops = 0;  */
4129146040Stjr  mctx->abkref_ents = n;
4130146040Stjr  mctx->max_mb_elem_len = 1;
4131146040Stjr  mctx->asub_tops = n;
4132146040Stjr  return REG_NOERROR;
4133146040Stjr}
4134146040Stjr
4135146040Stjr/* Clean the entries which depend on the current input in MCTX.
4136146040Stjr   This function must be invoked when the matcher changes the start index
4137146040Stjr   of the input, or changes the input string.  */
4138146040Stjr
4139146040Stjrstatic void
4140146040Stjrmatch_ctx_clean (mctx)
4141146040Stjr    re_match_context_t *mctx;
4142146040Stjr{
4143146040Stjr  int st_idx;
4144146040Stjr  for (st_idx = 0; st_idx < mctx->nsub_tops; ++st_idx)
4145146040Stjr    {
4146146040Stjr      int sl_idx;
4147146040Stjr      re_sub_match_top_t *top = mctx->sub_tops[st_idx];
4148146040Stjr      for (sl_idx = 0; sl_idx < top->nlasts; ++sl_idx)
4149146040Stjr	{
4150146040Stjr	  re_sub_match_last_t *last = top->lasts[sl_idx];
4151146040Stjr	  re_free (last->path.array);
4152146040Stjr	  re_free (last);
4153146040Stjr	}
4154146040Stjr      re_free (top->lasts);
4155146040Stjr      if (top->path)
4156146040Stjr	{
4157146040Stjr	  re_free (top->path->array);
4158146040Stjr	  re_free (top->path);
4159146040Stjr	}
4160146040Stjr      free (top);
4161146040Stjr    }
4162146040Stjr
4163146040Stjr  mctx->nsub_tops = 0;
4164146040Stjr  mctx->nbkref_ents = 0;
4165146040Stjr}
4166146040Stjr
4167146040Stjr/* Free all the memory associated with MCTX.  */
4168146040Stjr
4169146040Stjrstatic void
4170146040Stjrmatch_ctx_free (mctx)
4171146040Stjr    re_match_context_t *mctx;
4172146040Stjr{
4173146040Stjr  /* First, free all the memory associated with MCTX->SUB_TOPS.  */
4174146040Stjr  match_ctx_clean (mctx);
4175146040Stjr  re_free (mctx->sub_tops);
4176146040Stjr  re_free (mctx->bkref_ents);
4177146040Stjr}
4178146040Stjr
4179146040Stjr/* Add a new backreference entry to MCTX.
4180146040Stjr   Note that we assume that caller never call this function with duplicate
4181146040Stjr   entry, and call with STR_IDX which isn't smaller than any existing entry.
4182146040Stjr*/
4183146040Stjr
4184146040Stjrstatic reg_errcode_t
4185146040Stjrmatch_ctx_add_entry (mctx, node, str_idx, from, to)
4186146040Stjr     re_match_context_t *mctx;
4187146040Stjr     int node, str_idx, from, to;
4188146040Stjr{
4189146040Stjr  if (mctx->nbkref_ents >= mctx->abkref_ents)
4190146040Stjr    {
4191146040Stjr      struct re_backref_cache_entry* new_entry;
4192146040Stjr      new_entry = re_realloc (mctx->bkref_ents, struct re_backref_cache_entry,
4193146040Stjr			      mctx->abkref_ents * 2);
4194146040Stjr      if (BE (new_entry == NULL, 0))
4195146040Stjr	{
4196146040Stjr	  re_free (mctx->bkref_ents);
4197146040Stjr	  return REG_ESPACE;
4198146040Stjr	}
4199146040Stjr      mctx->bkref_ents = new_entry;
4200146040Stjr      memset (mctx->bkref_ents + mctx->nbkref_ents, '\0',
4201146040Stjr	      sizeof (struct re_backref_cache_entry) * mctx->abkref_ents);
4202146040Stjr      mctx->abkref_ents *= 2;
4203146040Stjr    }
4204146040Stjr  if (mctx->nbkref_ents > 0
4205146040Stjr      && mctx->bkref_ents[mctx->nbkref_ents - 1].str_idx == str_idx)
4206146040Stjr    mctx->bkref_ents[mctx->nbkref_ents - 1].more = 1;
4207146040Stjr
4208146040Stjr  mctx->bkref_ents[mctx->nbkref_ents].node = node;
4209146040Stjr  mctx->bkref_ents[mctx->nbkref_ents].str_idx = str_idx;
4210146040Stjr  mctx->bkref_ents[mctx->nbkref_ents].subexp_from = from;
4211146040Stjr  mctx->bkref_ents[mctx->nbkref_ents].subexp_to = to;
4212146040Stjr
4213146040Stjr  /* This is a cache that saves negative results of check_dst_limits_calc_pos.
4214146040Stjr     If bit N is clear, means that this entry won't epsilon-transition to
4215146040Stjr     an OP_OPEN_SUBEXP or OP_CLOSE_SUBEXP for the N+1-th subexpression.  If
4216146040Stjr     it is set, check_dst_limits_calc_pos_1 will recurse and try to find one
4217146040Stjr     such node.
4218146040Stjr
4219146040Stjr     A backreference does not epsilon-transition unless it is empty, so set
4220146040Stjr     to all zeros if FROM != TO.  */
4221146040Stjr  mctx->bkref_ents[mctx->nbkref_ents].eps_reachable_subexps_map
4222146040Stjr    = (from == to ? ~0 : 0);
4223146040Stjr
4224146040Stjr  mctx->bkref_ents[mctx->nbkref_ents++].more = 0;
4225146040Stjr  if (mctx->max_mb_elem_len < to - from)
4226146040Stjr    mctx->max_mb_elem_len = to - from;
4227146040Stjr  return REG_NOERROR;
4228146040Stjr}
4229146040Stjr
4230146040Stjr/* Search for the first entry which has the same str_idx, or -1 if none is
4231146040Stjr   found.  Note that MCTX->BKREF_ENTS is already sorted by MCTX->STR_IDX.  */
4232146040Stjr
4233146040Stjrstatic int
4234146040Stjrsearch_cur_bkref_entry (mctx, str_idx)
4235146040Stjr     re_match_context_t *mctx;
4236146040Stjr     int str_idx;
4237146040Stjr{
4238146040Stjr  int left, right, mid, last;
4239146040Stjr  last = right = mctx->nbkref_ents;
4240146040Stjr  for (left = 0; left < right;)
4241146040Stjr    {
4242146040Stjr      mid = (left + right) / 2;
4243146040Stjr      if (mctx->bkref_ents[mid].str_idx < str_idx)
4244146040Stjr	left = mid + 1;
4245146040Stjr      else
4246146040Stjr	right = mid;
4247146040Stjr    }
4248146040Stjr  if (left < last && mctx->bkref_ents[left].str_idx == str_idx)
4249146040Stjr    return left;
4250146040Stjr  else
4251146040Stjr    return -1;
4252146040Stjr}
4253146040Stjr
4254146040Stjr/* Register the node NODE, whose type is OP_OPEN_SUBEXP, and which matches
4255146040Stjr   at STR_IDX.  */
4256146040Stjr
4257146040Stjrstatic reg_errcode_t
4258146040Stjrmatch_ctx_add_subtop (mctx, node, str_idx)
4259146040Stjr     re_match_context_t *mctx;
4260146040Stjr     int node, str_idx;
4261146040Stjr{
4262146040Stjr#ifdef DEBUG
4263146040Stjr  assert (mctx->sub_tops != NULL);
4264146040Stjr  assert (mctx->asub_tops > 0);
4265146040Stjr#endif
4266146040Stjr  if (BE (mctx->nsub_tops == mctx->asub_tops, 0))
4267146040Stjr    {
4268146040Stjr      int new_asub_tops = mctx->asub_tops * 2;
4269146040Stjr      re_sub_match_top_t **new_array = re_realloc (mctx->sub_tops,
4270146040Stjr						   re_sub_match_top_t *,
4271146040Stjr						   new_asub_tops);
4272146040Stjr      if (BE (new_array == NULL, 0))
4273146040Stjr	return REG_ESPACE;
4274146040Stjr      mctx->sub_tops = new_array;
4275146040Stjr      mctx->asub_tops = new_asub_tops;
4276146040Stjr    }
4277146040Stjr  mctx->sub_tops[mctx->nsub_tops] = calloc (1, sizeof (re_sub_match_top_t));
4278146040Stjr  if (BE (mctx->sub_tops[mctx->nsub_tops] == NULL, 0))
4279146040Stjr    return REG_ESPACE;
4280146040Stjr  mctx->sub_tops[mctx->nsub_tops]->node = node;
4281146040Stjr  mctx->sub_tops[mctx->nsub_tops++]->str_idx = str_idx;
4282146040Stjr  return REG_NOERROR;
4283146040Stjr}
4284146040Stjr
4285146040Stjr/* Register the node NODE, whose type is OP_CLOSE_SUBEXP, and which matches
4286146040Stjr   at STR_IDX, whose corresponding OP_OPEN_SUBEXP is SUB_TOP.  */
4287146040Stjr
4288146040Stjrstatic re_sub_match_last_t *
4289146040Stjrmatch_ctx_add_sublast (subtop, node, str_idx)
4290146040Stjr     re_sub_match_top_t *subtop;
4291146040Stjr     int node, str_idx;
4292146040Stjr{
4293146040Stjr  re_sub_match_last_t *new_entry;
4294146040Stjr  if (BE (subtop->nlasts == subtop->alasts, 0))
4295146040Stjr    {
4296146040Stjr      int new_alasts = 2 * subtop->alasts + 1;
4297146040Stjr      re_sub_match_last_t **new_array = re_realloc (subtop->lasts,
4298146040Stjr						    re_sub_match_last_t *,
4299146040Stjr						    new_alasts);
4300146040Stjr      if (BE (new_array == NULL, 0))
4301146040Stjr	return NULL;
4302146040Stjr      subtop->lasts = new_array;
4303146040Stjr      subtop->alasts = new_alasts;
4304146040Stjr    }
4305146040Stjr  new_entry = calloc (1, sizeof (re_sub_match_last_t));
4306146040Stjr  if (BE (new_entry != NULL, 1))
4307146040Stjr    {
4308146040Stjr      subtop->lasts[subtop->nlasts] = new_entry;
4309146040Stjr      new_entry->node = node;
4310146040Stjr      new_entry->str_idx = str_idx;
4311146040Stjr      ++subtop->nlasts;
4312146040Stjr    }
4313146040Stjr  return new_entry;
4314146040Stjr}
4315146040Stjr
4316146040Stjrstatic void
4317146040Stjrsift_ctx_init (sctx, sifted_sts, limited_sts, last_node, last_str_idx)
4318146040Stjr    re_sift_context_t *sctx;
4319146040Stjr    re_dfastate_t **sifted_sts, **limited_sts;
4320146040Stjr    int last_node, last_str_idx;
4321146040Stjr{
4322146040Stjr  sctx->sifted_states = sifted_sts;
4323146040Stjr  sctx->limited_states = limited_sts;
4324146040Stjr  sctx->last_node = last_node;
4325146040Stjr  sctx->last_str_idx = last_str_idx;
4326146040Stjr  re_node_set_init_empty (&sctx->limits);
4327146040Stjr}
4328