1/* Extended regular expression matching and search library.
2   Copyright (C) 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
3   This file is part of the GNU C Library.
4   Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
5
6   The GNU C Library is free software; you can redistribute it and/or
7   modify it under the terms of the GNU Lesser General Public
8   License as published by the Free Software Foundation; either
9   version 2.1 of the License, or (at your option) any later version.
10
11   The GNU C Library is distributed in the hope that it will be useful,
12   but WITHOUT ANY WARRANTY; without even the implied warranty of
13   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14   Lesser General Public License for more details.
15
16   You should have received a copy of the GNU Lesser General Public
17   License along with the GNU C Library; if not, write to the Free
18   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
19   02111-1307 USA.  */
20
21static reg_errcode_t match_ctx_init (re_match_context_t *cache, int eflags,
22				     int n) internal_function;
23static void match_ctx_clean (re_match_context_t *mctx) internal_function;
24static void match_ctx_free (re_match_context_t *cache) internal_function;
25static reg_errcode_t match_ctx_add_entry (re_match_context_t *cache, int node,
26					  int str_idx, int from, int to)
27     internal_function;
28static int search_cur_bkref_entry (re_match_context_t *mctx, int str_idx)
29     internal_function;
30static reg_errcode_t match_ctx_add_subtop (re_match_context_t *mctx, int node,
31					   int str_idx) internal_function;
32static re_sub_match_last_t * match_ctx_add_sublast (re_sub_match_top_t *subtop,
33						   int node, int str_idx)
34     internal_function;
35static void sift_ctx_init (re_sift_context_t *sctx, re_dfastate_t **sifted_sts,
36			   re_dfastate_t **limited_sts, int last_node,
37			   int last_str_idx)
38     internal_function;
39static reg_errcode_t re_search_internal (const regex_t *preg,
40					 const char *string, int length,
41					 int start, int range, int stop,
42					 size_t nmatch, regmatch_t pmatch[],
43					 int eflags) internal_function;
44static int re_search_2_stub (struct re_pattern_buffer *bufp,
45			     const char *string1, int length1,
46			     const char *string2, int length2,
47			     int start, int range, struct re_registers *regs,
48			     int stop, int ret_len) internal_function;
49static int re_search_stub (struct re_pattern_buffer *bufp,
50			   const char *string, int length, int start,
51			   int range, int stop, struct re_registers *regs,
52			   int ret_len) internal_function;
53static unsigned re_copy_regs (struct re_registers *regs, regmatch_t *pmatch,
54			      int nregs, int regs_allocated) internal_function;
55static inline re_dfastate_t *acquire_init_state_context
56     (reg_errcode_t *err, const re_match_context_t *mctx, int idx)
57     __attribute ((always_inline)) internal_function;
58static reg_errcode_t prune_impossible_nodes (re_match_context_t *mctx)
59     internal_function;
60static int check_matching (re_match_context_t *mctx, int fl_longest_match,
61			   int *p_match_first)
62     internal_function;
63static int check_halt_node_context (const re_dfa_t *dfa, int node,
64				    unsigned int context) internal_function;
65static int check_halt_state_context (const re_match_context_t *mctx,
66				     const re_dfastate_t *state, int idx)
67     internal_function;
68static void update_regs (re_dfa_t *dfa, regmatch_t *pmatch,
69			 regmatch_t *prev_idx_match, int cur_node,
70			 int cur_idx, int nmatch) internal_function;
71static int proceed_next_node (const re_match_context_t *mctx,
72			      int nregs, regmatch_t *regs,
73			      int *pidx, int node, re_node_set *eps_via_nodes,
74			      struct re_fail_stack_t *fs) internal_function;
75static reg_errcode_t push_fail_stack (struct re_fail_stack_t *fs,
76				      int str_idx, int dest_node, int nregs,
77				      regmatch_t *regs,
78				      re_node_set *eps_via_nodes) internal_function;
79static int pop_fail_stack (struct re_fail_stack_t *fs, int *pidx, int nregs,
80			   regmatch_t *regs, re_node_set *eps_via_nodes) internal_function;
81static reg_errcode_t set_regs (const regex_t *preg,
82			       const re_match_context_t *mctx,
83			       size_t nmatch, regmatch_t *pmatch,
84			       int fl_backtrack) internal_function;
85static reg_errcode_t free_fail_stack_return (struct re_fail_stack_t *fs) internal_function;
86
87#ifdef RE_ENABLE_I18N
88static int sift_states_iter_mb (const re_match_context_t *mctx,
89				re_sift_context_t *sctx,
90				int node_idx, int str_idx, int max_str_idx) internal_function;
91#endif /* RE_ENABLE_I18N */
92static reg_errcode_t sift_states_backward (re_match_context_t *mctx,
93					   re_sift_context_t *sctx) internal_function;
94static reg_errcode_t build_sifted_states (re_match_context_t *mctx,
95					  re_sift_context_t *sctx, int str_idx,
96					  re_node_set *cur_dest) internal_function;
97static reg_errcode_t update_cur_sifted_state (re_match_context_t *mctx,
98					      re_sift_context_t *sctx,
99					      int str_idx,
100					      re_node_set *dest_nodes) internal_function;
101static reg_errcode_t add_epsilon_src_nodes (re_dfa_t *dfa,
102					    re_node_set *dest_nodes,
103					    const re_node_set *candidates) internal_function;
104static reg_errcode_t sub_epsilon_src_nodes (re_dfa_t *dfa, int node,
105					    re_node_set *dest_nodes,
106					    const re_node_set *and_nodes) internal_function;
107static int check_dst_limits (re_match_context_t *mctx, re_node_set *limits,
108			     int dst_node, int dst_idx, int src_node,
109			     int src_idx) internal_function;
110static int check_dst_limits_calc_pos_1 (re_match_context_t *mctx,
111					int boundaries, int subexp_idx,
112					int from_node, int bkref_idx) internal_function;
113static int check_dst_limits_calc_pos (re_match_context_t *mctx,
114				      int limit, int subexp_idx,
115				      int node, int str_idx,
116				      int bkref_idx) internal_function;
117static reg_errcode_t check_subexp_limits (re_dfa_t *dfa,
118					  re_node_set *dest_nodes,
119					  const re_node_set *candidates,
120					  re_node_set *limits,
121					  struct re_backref_cache_entry *bkref_ents,
122					  int str_idx) internal_function;
123static reg_errcode_t sift_states_bkref (re_match_context_t *mctx,
124					re_sift_context_t *sctx,
125					int str_idx, const re_node_set *candidates) internal_function;
126static reg_errcode_t clean_state_log_if_needed (re_match_context_t *mctx,
127					        int next_state_log_idx) internal_function;
128static reg_errcode_t merge_state_array (re_dfa_t *dfa, re_dfastate_t **dst,
129					re_dfastate_t **src, int num) internal_function;
130static re_dfastate_t *find_recover_state (reg_errcode_t *err,
131					 re_match_context_t *mctx) internal_function;
132static re_dfastate_t *transit_state (reg_errcode_t *err,
133				     re_match_context_t *mctx,
134				     re_dfastate_t *state) internal_function;
135static re_dfastate_t *merge_state_with_log (reg_errcode_t *err,
136					    re_match_context_t *mctx,
137					    re_dfastate_t *next_state) internal_function;
138static reg_errcode_t check_subexp_matching_top (re_match_context_t *mctx,
139						re_node_set *cur_nodes,
140						int str_idx) internal_function;
141#if 0
142static re_dfastate_t *transit_state_sb (reg_errcode_t *err,
143					re_match_context_t *mctx,
144					re_dfastate_t *pstate) internal_function;
145#endif
146#ifdef RE_ENABLE_I18N
147static reg_errcode_t transit_state_mb (re_match_context_t *mctx,
148				       re_dfastate_t *pstate) internal_function;
149#endif /* RE_ENABLE_I18N */
150static reg_errcode_t transit_state_bkref (re_match_context_t *mctx,
151					  const re_node_set *nodes) internal_function;
152static reg_errcode_t get_subexp (re_match_context_t *mctx,
153				 int bkref_node, int bkref_str_idx) internal_function;
154static reg_errcode_t get_subexp_sub (re_match_context_t *mctx,
155				     const re_sub_match_top_t *sub_top,
156				     re_sub_match_last_t *sub_last,
157				     int bkref_node, int bkref_str) internal_function;
158static int find_subexp_node (const re_dfa_t *dfa, const re_node_set *nodes,
159			     int subexp_idx, int type) internal_function;
160static reg_errcode_t check_arrival (re_match_context_t *mctx,
161				    state_array_t *path, int top_node,
162				    int top_str, int last_node, int last_str,
163				    int type) internal_function;
164static reg_errcode_t check_arrival_add_next_nodes (re_match_context_t *mctx,
165						   int str_idx,
166						   re_node_set *cur_nodes,
167						   re_node_set *next_nodes) internal_function;
168static reg_errcode_t check_arrival_expand_ecl (re_dfa_t *dfa,
169					       re_node_set *cur_nodes,
170					       int ex_subexp, int type) internal_function;
171static reg_errcode_t check_arrival_expand_ecl_sub (re_dfa_t *dfa,
172						   re_node_set *dst_nodes,
173						   int target, int ex_subexp,
174						   int type) internal_function;
175static reg_errcode_t expand_bkref_cache (re_match_context_t *mctx,
176					 re_node_set *cur_nodes, int cur_str,
177					 int subexp_num, int type) internal_function;
178static int build_trtable (re_dfa_t *dfa,
179			  re_dfastate_t *state) internal_function;
180#ifdef RE_ENABLE_I18N
181static int check_node_accept_bytes (re_dfa_t *dfa, int node_idx,
182				    const re_string_t *input, int idx) internal_function;
183# ifdef _LIBC
184static unsigned int find_collation_sequence_value (const unsigned char *mbs,
185						   size_t name_len) internal_function;
186# endif /* _LIBC */
187#endif /* RE_ENABLE_I18N */
188static int group_nodes_into_DFAstates (re_dfa_t *dfa,
189				       const re_dfastate_t *state,
190				       re_node_set *states_node,
191				       bitset *states_ch) internal_function;
192static int check_node_accept (const re_match_context_t *mctx,
193			      const re_token_t *node, int idx) internal_function;
194static reg_errcode_t extend_buffers (re_match_context_t *mctx) internal_function;
195
196/* Entry point for POSIX code.  */
197
198/* regexec searches for a given pattern, specified by PREG, in the
199   string STRING.
200
201   If NMATCH is zero or REG_NOSUB was set in the cflags argument to
202   `regcomp', we ignore PMATCH.  Otherwise, we assume PMATCH has at
203   least NMATCH elements, and we set them to the offsets of the
204   corresponding matched substrings.
205
206   EFLAGS specifies `execution flags' which affect matching: if
207   REG_NOTBOL is set, then ^ does not match at the beginning of the
208   string; if REG_NOTEOL is set, then $ does not match at the end.
209
210   We return 0 if we find a match and REG_NOMATCH if not.  */
211
212int
213regexec (preg, string, nmatch, pmatch, eflags)
214    const regex_t *__restrict preg;
215    const char *__restrict string;
216    size_t nmatch;
217    regmatch_t pmatch[];
218    int eflags;
219{
220  reg_errcode_t err;
221  int start, length;
222
223  if (eflags & ~(REG_NOTBOL | REG_NOTEOL | REG_STARTEND))
224    return REG_BADPAT;
225
226  if (eflags & REG_STARTEND)
227    {
228      start = pmatch[0].rm_so;
229      length = pmatch[0].rm_eo;
230    }
231  else
232    {
233      start = 0;
234      length = strlen (string);
235    }
236  if (preg->no_sub)
237    err = re_search_internal (preg, string, length, start, length - start,
238			      length, 0, NULL, eflags);
239  else
240    err = re_search_internal (preg, string, length, start, length - start,
241			      length, nmatch, pmatch, eflags);
242  return err != REG_NOERROR;
243}
244
245#ifdef _LIBC
246# include <shlib-compat.h>
247versioned_symbol (libc, __regexec, regexec, GLIBC_2_3_4);
248
249# if SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_3_4)
250__typeof__ (__regexec) __compat_regexec;
251
252int
253attribute_compat_text_section
254__compat_regexec (const regex_t *__restrict preg,
255		  const char *__restrict string, size_t nmatch,
256		  regmatch_t pmatch[], int eflags)
257{
258  return regexec (preg, string, nmatch, pmatch,
259		  eflags & (REG_NOTBOL | REG_NOTEOL));
260}
261compat_symbol (libc, __compat_regexec, regexec, GLIBC_2_0);
262# endif
263#endif
264
265/* Entry points for GNU code.  */
266
267/* re_match, re_search, re_match_2, re_search_2
268
269   The former two functions operate on STRING with length LENGTH,
270   while the later two operate on concatenation of STRING1 and STRING2
271   with lengths LENGTH1 and LENGTH2, respectively.
272
273   re_match() matches the compiled pattern in BUFP against the string,
274   starting at index START.
275
276   re_search() first tries matching at index START, then it tries to match
277   starting from index START + 1, and so on.  The last start position tried
278   is START + RANGE.  (Thus RANGE = 0 forces re_search to operate the same
279   way as re_match().)
280
281   The parameter STOP of re_{match,search}_2 specifies that no match exceeding
282   the first STOP characters of the concatenation of the strings should be
283   concerned.
284
285   If REGS is not NULL, and BUFP->no_sub is not set, the offsets of the match
286   and all groups is stroed in REGS.  (For the "_2" variants, the offsets are
287   computed relative to the concatenation, not relative to the individual
288   strings.)
289
290   On success, re_match* functions return the length of the match, re_search*
291   return the position of the start of the match.  Return value -1 means no
292   match was found and -2 indicates an internal error.  */
293
294int
295re_match (bufp, string, length, start, regs)
296    struct re_pattern_buffer *bufp;
297    const char *string;
298    int length, start;
299    struct re_registers *regs;
300{
301  return re_search_stub (bufp, string, length, start, 0, length, regs, 1);
302}
303#ifdef _LIBC
304weak_alias (__re_match, re_match)
305#endif
306
307int
308re_search (bufp, string, length, start, range, regs)
309    struct re_pattern_buffer *bufp;
310    const char *string;
311    int length, start, range;
312    struct re_registers *regs;
313{
314  return re_search_stub (bufp, string, length, start, range, length, regs, 0);
315}
316#ifdef _LIBC
317weak_alias (__re_search, re_search)
318#endif
319
320int
321re_match_2 (bufp, string1, length1, string2, length2, start, regs, stop)
322    struct re_pattern_buffer *bufp;
323    const char *string1, *string2;
324    int length1, length2, start, stop;
325    struct re_registers *regs;
326{
327  return re_search_2_stub (bufp, string1, length1, string2, length2,
328			   start, 0, regs, stop, 1);
329}
330#ifdef _LIBC
331weak_alias (__re_match_2, re_match_2)
332#endif
333
334int
335re_search_2 (bufp, string1, length1, string2, length2, start, range, regs, stop)
336    struct re_pattern_buffer *bufp;
337    const char *string1, *string2;
338    int length1, length2, start, range, stop;
339    struct re_registers *regs;
340{
341  return re_search_2_stub (bufp, string1, length1, string2, length2,
342			   start, range, regs, stop, 0);
343}
344#ifdef _LIBC
345weak_alias (__re_search_2, re_search_2)
346#endif
347
348static int
349re_search_2_stub (bufp, string1, length1, string2, length2, start, range, regs,
350		  stop, ret_len)
351    struct re_pattern_buffer *bufp;
352    const char *string1, *string2;
353    int length1, length2, start, range, stop, ret_len;
354    struct re_registers *regs;
355{
356  const char *str;
357  int rval;
358  int len = length1 + length2;
359  int free_str = 0;
360
361  if (BE (length1 < 0 || length2 < 0 || stop < 0, 0))
362    return -2;
363
364  /* Concatenate the strings.  */
365  if (length2 > 0)
366    if (length1 > 0)
367      {
368	char *s = re_malloc (char, len);
369
370	if (BE (s == NULL, 0))
371	  return -2;
372	memcpy (s, string1, length1);
373	memcpy (s + length1, string2, length2);
374	str = s;
375	free_str = 1;
376      }
377    else
378      str = string2;
379  else
380    str = string1;
381
382  rval = re_search_stub (bufp, str, len, start, range, stop, regs,
383			 ret_len);
384  if (free_str)
385    re_free ((char *) str);
386  return rval;
387}
388
389/* The parameters have the same meaning as those of re_search.
390   Additional parameters:
391   If RET_LEN is nonzero the length of the match is returned (re_match style);
392   otherwise the position of the match is returned.  */
393
394static int
395re_search_stub (bufp, string, length, start, range, stop, regs, ret_len)
396    struct re_pattern_buffer *bufp;
397    const char *string;
398    int length, start, range, stop, ret_len;
399    struct re_registers *regs;
400{
401  reg_errcode_t result;
402  regmatch_t *pmatch;
403  int nregs, rval;
404  int eflags = 0;
405
406  /* Check for out-of-range.  */
407  if (BE (start < 0 || start > length, 0))
408    return -1;
409  if (BE (start + range > length, 0))
410    range = length - start;
411  else if (BE (start + range < 0, 0))
412    range = -start;
413
414  eflags |= (bufp->not_bol) ? REG_NOTBOL : 0;
415  eflags |= (bufp->not_eol) ? REG_NOTEOL : 0;
416
417  /* Compile fastmap if we haven't yet.  */
418  if (range > 0 && bufp->fastmap != NULL && !bufp->fastmap_accurate)
419    re_compile_fastmap (bufp);
420
421  if (BE (bufp->no_sub, 0))
422    regs = NULL;
423
424  /* We need at least 1 register.  */
425  if (regs == NULL)
426    nregs = 1;
427  else if (BE (bufp->regs_allocated == REGS_FIXED &&
428	       regs->num_regs < bufp->re_nsub + 1, 0))
429    {
430      nregs = regs->num_regs;
431      if (BE (nregs < 1, 0))
432	{
433	  /* Nothing can be copied to regs.  */
434	  regs = NULL;
435	  nregs = 1;
436	}
437    }
438  else
439    nregs = bufp->re_nsub + 1;
440  pmatch = re_malloc (regmatch_t, nregs);
441  if (BE (pmatch == NULL, 0))
442    return -2;
443
444  result = re_search_internal (bufp, string, length, start, range, stop,
445			       nregs, pmatch, eflags);
446
447  rval = 0;
448
449  /* I hope we needn't fill ther regs with -1's when no match was found.  */
450  if (result != REG_NOERROR)
451    rval = -1;
452  else if (regs != NULL)
453    {
454      /* If caller wants register contents data back, copy them.  */
455      bufp->regs_allocated = re_copy_regs (regs, pmatch, nregs,
456					   bufp->regs_allocated);
457      if (BE (bufp->regs_allocated == REGS_UNALLOCATED, 0))
458	rval = -2;
459    }
460
461  if (BE (rval == 0, 1))
462    {
463      if (ret_len)
464	{
465	  assert (pmatch[0].rm_so == start);
466	  rval = pmatch[0].rm_eo - start;
467	}
468      else
469	rval = pmatch[0].rm_so;
470    }
471  re_free (pmatch);
472  return rval;
473}
474
475static unsigned
476re_copy_regs (regs, pmatch, nregs, regs_allocated)
477    struct re_registers *regs;
478    regmatch_t *pmatch;
479    int nregs, regs_allocated;
480{
481  int rval = REGS_REALLOCATE;
482  int i;
483  int need_regs = nregs + 1;
484  /* We need one extra element beyond `num_regs' for the `-1' marker GNU code
485     uses.  */
486
487  /* Have the register data arrays been allocated?  */
488  if (regs_allocated == REGS_UNALLOCATED)
489    { /* No.  So allocate them with malloc.  */
490      regs->start = re_malloc (regoff_t, need_regs);
491      regs->end = re_malloc (regoff_t, need_regs);
492      if (BE (regs->start == NULL, 0) || BE (regs->end == NULL, 0))
493	return REGS_UNALLOCATED;
494      regs->num_regs = need_regs;
495    }
496  else if (regs_allocated == REGS_REALLOCATE)
497    { /* Yes.  If we need more elements than were already
498	 allocated, reallocate them.  If we need fewer, just
499	 leave it alone.  */
500      if (BE (need_regs > regs->num_regs, 0))
501	{
502	  regoff_t *new_start = re_realloc (regs->start, regoff_t, need_regs);
503	  regoff_t *new_end = re_realloc (regs->end, regoff_t, need_regs);
504	  if (BE (new_start == NULL, 0) || BE (new_end == NULL, 0))
505	    return REGS_UNALLOCATED;
506	  regs->start = new_start;
507	  regs->end = new_end;
508	  regs->num_regs = need_regs;
509	}
510    }
511  else
512    {
513      assert (regs_allocated == REGS_FIXED);
514      /* This function may not be called with REGS_FIXED and nregs too big.  */
515      assert (regs->num_regs >= nregs);
516      rval = REGS_FIXED;
517    }
518
519  /* Copy the regs.  */
520  for (i = 0; i < nregs; ++i)
521    {
522      regs->start[i] = pmatch[i].rm_so;
523      regs->end[i] = pmatch[i].rm_eo;
524    }
525  for ( ; i < regs->num_regs; ++i)
526    regs->start[i] = regs->end[i] = -1;
527
528  return rval;
529}
530
531/* Set REGS to hold NUM_REGS registers, storing them in STARTS and
532   ENDS.  Subsequent matches using PATTERN_BUFFER and REGS will use
533   this memory for recording register information.  STARTS and ENDS
534   must be allocated using the malloc library routine, and must each
535   be at least NUM_REGS * sizeof (regoff_t) bytes long.
536
537   If NUM_REGS == 0, then subsequent matches should allocate their own
538   register data.
539
540   Unless this function is called, the first search or match using
541   PATTERN_BUFFER will allocate its own register data, without
542   freeing the old data.  */
543
544void
545re_set_registers (bufp, regs, num_regs, starts, ends)
546    struct re_pattern_buffer *bufp;
547    struct re_registers *regs;
548    unsigned num_regs;
549    regoff_t *starts, *ends;
550{
551  if (num_regs)
552    {
553      bufp->regs_allocated = REGS_REALLOCATE;
554      regs->num_regs = num_regs;
555      regs->start = starts;
556      regs->end = ends;
557    }
558  else
559    {
560      bufp->regs_allocated = REGS_UNALLOCATED;
561      regs->num_regs = 0;
562      regs->start = regs->end = (regoff_t *) 0;
563    }
564}
565#ifdef _LIBC
566weak_alias (__re_set_registers, re_set_registers)
567#endif
568
569/* Entry points compatible with 4.2 BSD regex library.  We don't define
570   them unless specifically requested.  */
571
572#if defined _REGEX_RE_COMP || defined _LIBC
573int
574# ifdef _LIBC
575weak_function
576# endif
577re_exec (s)
578     const char *s;
579{
580  return 0 == regexec (&re_comp_buf, s, 0, NULL, 0);
581}
582#endif /* _REGEX_RE_COMP */
583
584/* Internal entry point.  */
585
586/* Searches for a compiled pattern PREG in the string STRING, whose
587   length is LENGTH.  NMATCH, PMATCH, and EFLAGS have the same
588   mingings with regexec.  START, and RANGE have the same meanings
589   with re_search.
590   Return REG_NOERROR if we find a match, and REG_NOMATCH if not,
591   otherwise return the error code.
592   Note: We assume front end functions already check ranges.
593   (START + RANGE >= 0 && START + RANGE <= LENGTH)  */
594
595static reg_errcode_t
596re_search_internal (preg, string, length, start, range, stop, nmatch, pmatch,
597		    eflags)
598    const regex_t *preg;
599    const char *string;
600    int length, start, range, stop, eflags;
601    size_t nmatch;
602    regmatch_t pmatch[];
603{
604  reg_errcode_t err;
605  re_dfa_t *dfa = (re_dfa_t *)preg->buffer;
606  int left_lim, right_lim, incr;
607  int fl_longest_match, match_first, match_kind, match_last = -1;
608  int extra_nmatch;
609  int sb, ch;
610#if defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L)
611  re_match_context_t mctx = { .dfa = dfa };
612#else
613  re_match_context_t mctx;
614#endif
615  char *fastmap = (preg->fastmap != NULL && preg->fastmap_accurate
616		   && range && !preg->can_be_null) ? preg->fastmap : NULL;
617  unsigned RE_TRANSLATE_TYPE t = (unsigned RE_TRANSLATE_TYPE) preg->translate;
618
619#if !(defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L))
620  memset (&mctx, '\0', sizeof (re_match_context_t));
621  mctx.dfa = dfa;
622#endif
623
624  extra_nmatch = (nmatch > preg->re_nsub) ? nmatch - (preg->re_nsub + 1) : 0;
625  nmatch -= extra_nmatch;
626
627  /* Check if the DFA haven't been compiled.  */
628  if (BE (preg->used == 0 || dfa->init_state == NULL
629	  || dfa->init_state_word == NULL || dfa->init_state_nl == NULL
630	  || dfa->init_state_begbuf == NULL, 0))
631    return REG_NOMATCH;
632
633#ifdef DEBUG
634  /* We assume front-end functions already check them.  */
635  assert (start + range >= 0 && start + range <= length);
636#endif
637
638  /* If initial states with non-begbuf contexts have no elements,
639     the regex must be anchored.  If preg->newline_anchor is set,
640     we'll never use init_state_nl, so do not check it.  */
641  if (dfa->init_state->nodes.nelem == 0
642      && dfa->init_state_word->nodes.nelem == 0
643      && (dfa->init_state_nl->nodes.nelem == 0
644	  || !preg->newline_anchor))
645    {
646      if (start != 0 && start + range != 0)
647        return REG_NOMATCH;
648      start = range = 0;
649    }
650
651  /* We must check the longest matching, if nmatch > 0.  */
652  fl_longest_match = (nmatch != 0 || dfa->nbackref);
653
654  err = re_string_allocate (&mctx.input, string, length, dfa->nodes_len + 1,
655			    preg->translate, preg->syntax & RE_ICASE, dfa);
656  if (BE (err != REG_NOERROR, 0))
657    goto free_return;
658  mctx.input.stop = stop;
659  mctx.input.raw_stop = stop;
660  mctx.input.newline_anchor = preg->newline_anchor;
661
662  err = match_ctx_init (&mctx, eflags, dfa->nbackref * 2);
663  if (BE (err != REG_NOERROR, 0))
664    goto free_return;
665
666  /* We will log all the DFA states through which the dfa pass,
667     if nmatch > 1, or this dfa has "multibyte node", which is a
668     back-reference or a node which can accept multibyte character or
669     multi character collating element.  */
670  if (nmatch > 1 || dfa->has_mb_node)
671    {
672      mctx.state_log = re_malloc (re_dfastate_t *, mctx.input.bufs_len + 1);
673      if (BE (mctx.state_log == NULL, 0))
674	{
675	  err = REG_ESPACE;
676	  goto free_return;
677	}
678    }
679  else
680    mctx.state_log = NULL;
681
682  match_first = start;
683  mctx.input.tip_context = (eflags & REG_NOTBOL) ? CONTEXT_BEGBUF
684			   : CONTEXT_NEWLINE | CONTEXT_BEGBUF;
685
686  /* Check incrementally whether of not the input string match.  */
687  incr = (range < 0) ? -1 : 1;
688  left_lim = (range < 0) ? start + range : start;
689  right_lim = (range < 0) ? start : start + range;
690  sb = dfa->mb_cur_max == 1;
691  match_kind =
692    (fastmap
693     ? ((sb || !(preg->syntax & RE_ICASE || t) ? 4 : 0)
694	| (range >= 0 ? 2 : 0)
695	| (t != NULL ? 1 : 0))
696     : 8);
697
698  for (;; match_first += incr)
699    {
700      err = REG_NOMATCH;
701      if (match_first < left_lim || right_lim < match_first)
702	goto free_return;
703
704      /* Advance as rapidly as possible through the string, until we
705	 find a plausible place to start matching.  This may be done
706	 with varying efficiency, so there are various possibilities:
707	 only the most common of them are specialized, in order to
708	 save on code size.  We use a switch statement for speed.  */
709      switch (match_kind)
710	{
711	case 8:
712	  /* No fastmap.  */
713	  break;
714
715	case 7:
716	  /* Fastmap with single-byte translation, match forward.  */
717	  while (BE (match_first < right_lim, 1)
718		 && !fastmap[t[(unsigned char) string[match_first]]])
719	    ++match_first;
720	  goto forward_match_found_start_or_reached_end;
721
722	case 6:
723	  /* Fastmap without translation, match forward.  */
724	  while (BE (match_first < right_lim, 1)
725		 && !fastmap[(unsigned char) string[match_first]])
726	    ++match_first;
727
728	forward_match_found_start_or_reached_end:
729	  if (BE (match_first == right_lim, 0))
730	    {
731	      ch = match_first >= length
732		       ? 0 : (unsigned char) string[match_first];
733	      if (!fastmap[t ? t[ch] : ch])
734		goto free_return;
735	    }
736	  break;
737
738	case 4:
739	case 5:
740	  /* Fastmap without multi-byte translation, match backwards.  */
741	  while (match_first >= left_lim)
742	    {
743	      ch = match_first >= length
744		       ? 0 : (unsigned char) string[match_first];
745	      if (fastmap[t ? t[ch] : ch])
746		break;
747	      --match_first;
748	    }
749	  if (match_first < left_lim)
750	    goto free_return;
751	  break;
752
753	default:
754	  /* In this case, we can't determine easily the current byte,
755	     since it might be a component byte of a multibyte
756	     character.  Then we use the constructed buffer instead.  */
757	  for (;;)
758	    {
759	      /* If MATCH_FIRST is out of the valid range, reconstruct the
760		 buffers.  */
761	      unsigned int offset = match_first - mctx.input.raw_mbs_idx;
762	      if (BE (offset >= (unsigned int) mctx.input.valid_raw_len, 0))
763		{
764		  err = re_string_reconstruct (&mctx.input, match_first,
765					       eflags);
766		  if (BE (err != REG_NOERROR, 0))
767		    goto free_return;
768
769		  offset = match_first - mctx.input.raw_mbs_idx;
770		}
771	      /* If MATCH_FIRST is out of the buffer, leave it as '\0'.
772		 Note that MATCH_FIRST must not be smaller than 0.  */
773	      ch = (match_first >= length
774		    ? 0 : re_string_byte_at (&mctx.input, offset));
775	      if (fastmap[ch])
776		break;
777	      match_first += incr;
778	      if (match_first < left_lim || match_first > right_lim)
779	        {
780	          err = REG_NOMATCH;
781	          goto free_return;
782	        }
783	    }
784	  break;
785	}
786
787      /* Reconstruct the buffers so that the matcher can assume that
788	 the matching starts from the beginning of the buffer.  */
789      err = re_string_reconstruct (&mctx.input, match_first, eflags);
790      if (BE (err != REG_NOERROR, 0))
791	goto free_return;
792
793#ifdef RE_ENABLE_I18N
794     /* Don't consider this char as a possible match start if it part,
795	yet isn't the head, of a multibyte character.  */
796      if (!sb && !re_string_first_byte (&mctx.input, 0))
797	continue;
798#endif
799
800      /* It seems to be appropriate one, then use the matcher.  */
801      /* We assume that the matching starts from 0.  */
802      mctx.state_log_top = mctx.nbkref_ents = mctx.max_mb_elem_len = 0;
803      match_last = check_matching (&mctx, fl_longest_match,
804				   range >= 0 ? &match_first : NULL);
805      if (match_last != -1)
806	{
807	  if (BE (match_last == -2, 0))
808	    {
809	      err = REG_ESPACE;
810	      goto free_return;
811	    }
812	  else
813	    {
814	      mctx.match_last = match_last;
815	      if ((!preg->no_sub && nmatch > 1) || dfa->nbackref)
816		{
817		  re_dfastate_t *pstate = mctx.state_log[match_last];
818		  mctx.last_node = check_halt_state_context (&mctx, pstate,
819							     match_last);
820		}
821	      if ((!preg->no_sub && nmatch > 1 && dfa->has_plural_match)
822		  || dfa->nbackref)
823		{
824		  err = prune_impossible_nodes (&mctx);
825		  if (err == REG_NOERROR)
826		    break;
827		  if (BE (err != REG_NOMATCH, 0))
828		    goto free_return;
829		  match_last = -1;
830		}
831	      else
832		break; /* We found a match.  */
833	    }
834	}
835
836      match_ctx_clean (&mctx);
837    }
838
839#ifdef DEBUG
840  assert (match_last != -1);
841  assert (err == REG_NOERROR);
842#endif
843
844  /* Set pmatch[] if we need.  */
845  if (nmatch > 0)
846    {
847      int reg_idx;
848
849      /* Initialize registers.  */
850      for (reg_idx = 1; reg_idx < nmatch; ++reg_idx)
851	pmatch[reg_idx].rm_so = pmatch[reg_idx].rm_eo = -1;
852
853      /* Set the points where matching start/end.  */
854      pmatch[0].rm_so = 0;
855      pmatch[0].rm_eo = mctx.match_last;
856
857      if (!preg->no_sub && nmatch > 1)
858	{
859	  err = set_regs (preg, &mctx, nmatch, pmatch,
860			  dfa->has_plural_match && dfa->nbackref > 0);
861	  if (BE (err != REG_NOERROR, 0))
862	    goto free_return;
863	}
864
865      /* At last, add the offset to the each registers, since we slided
866	 the buffers so that we could assume that the matching starts
867	 from 0.  */
868      for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
869	if (pmatch[reg_idx].rm_so != -1)
870	  {
871#ifdef RE_ENABLE_I18N
872	    if (BE (mctx.input.offsets_needed != 0, 0))
873	      {
874		if (pmatch[reg_idx].rm_so == mctx.input.valid_len)
875		  pmatch[reg_idx].rm_so += mctx.input.valid_raw_len - mctx.input.valid_len;
876		else
877		  pmatch[reg_idx].rm_so = mctx.input.offsets[pmatch[reg_idx].rm_so];
878		if (pmatch[reg_idx].rm_eo == mctx.input.valid_len)
879		  pmatch[reg_idx].rm_eo += mctx.input.valid_raw_len - mctx.input.valid_len;
880		else
881		  pmatch[reg_idx].rm_eo = mctx.input.offsets[pmatch[reg_idx].rm_eo];
882	      }
883#else
884	    assert (mctx.input.offsets_needed == 0);
885#endif
886	    pmatch[reg_idx].rm_so += match_first;
887	    pmatch[reg_idx].rm_eo += match_first;
888	  }
889      for (reg_idx = 0; reg_idx < extra_nmatch; ++reg_idx)
890	{
891	  pmatch[nmatch + reg_idx].rm_so = -1;
892	  pmatch[nmatch + reg_idx].rm_eo = -1;
893	}
894
895      if (dfa->subexp_map)
896        for (reg_idx = 0; reg_idx + 1 < nmatch; reg_idx++)
897          if (dfa->subexp_map[reg_idx] != reg_idx)
898            {
899              pmatch[reg_idx + 1].rm_so
900                = pmatch[dfa->subexp_map[reg_idx] + 1].rm_so;
901              pmatch[reg_idx + 1].rm_eo
902                = pmatch[dfa->subexp_map[reg_idx] + 1].rm_eo;
903            }
904    }
905
906 free_return:
907  re_free (mctx.state_log);
908  if (dfa->nbackref)
909    match_ctx_free (&mctx);
910  re_string_destruct (&mctx.input);
911  return err;
912}
913
914static reg_errcode_t
915prune_impossible_nodes (mctx)
916     re_match_context_t *mctx;
917{
918  re_dfa_t *const dfa = mctx->dfa;
919  int halt_node, match_last;
920  reg_errcode_t ret;
921  re_dfastate_t **sifted_states;
922  re_dfastate_t **lim_states = NULL;
923  re_sift_context_t sctx;
924#ifdef DEBUG
925  assert (mctx->state_log != NULL);
926#endif
927  match_last = mctx->match_last;
928  halt_node = mctx->last_node;
929  sifted_states = re_malloc (re_dfastate_t *, match_last + 1);
930  if (BE (sifted_states == NULL, 0))
931    {
932      ret = REG_ESPACE;
933      goto free_return;
934    }
935  if (dfa->nbackref)
936    {
937      lim_states = re_malloc (re_dfastate_t *, match_last + 1);
938      if (BE (lim_states == NULL, 0))
939	{
940	  ret = REG_ESPACE;
941	  goto free_return;
942	}
943      while (1)
944	{
945	  memset (lim_states, '\0',
946		  sizeof (re_dfastate_t *) * (match_last + 1));
947	  sift_ctx_init (&sctx, sifted_states, lim_states, halt_node,
948			 match_last);
949	  ret = sift_states_backward (mctx, &sctx);
950	  re_node_set_free (&sctx.limits);
951	  if (BE (ret != REG_NOERROR, 0))
952	      goto free_return;
953	  if (sifted_states[0] != NULL || lim_states[0] != NULL)
954	    break;
955	  do
956	    {
957	      --match_last;
958	      if (match_last < 0)
959		{
960		  ret = REG_NOMATCH;
961		  goto free_return;
962		}
963	    } while (mctx->state_log[match_last] == NULL
964		     || !mctx->state_log[match_last]->halt);
965	  halt_node = check_halt_state_context (mctx,
966						mctx->state_log[match_last],
967						match_last);
968	}
969      ret = merge_state_array (dfa, sifted_states, lim_states,
970			       match_last + 1);
971      re_free (lim_states);
972      lim_states = NULL;
973      if (BE (ret != REG_NOERROR, 0))
974	goto free_return;
975    }
976  else
977    {
978      sift_ctx_init (&sctx, sifted_states, lim_states, halt_node, match_last);
979      ret = sift_states_backward (mctx, &sctx);
980      re_node_set_free (&sctx.limits);
981      if (BE (ret != REG_NOERROR, 0))
982	goto free_return;
983    }
984  re_free (mctx->state_log);
985  mctx->state_log = sifted_states;
986  sifted_states = NULL;
987  mctx->last_node = halt_node;
988  mctx->match_last = match_last;
989  ret = REG_NOERROR;
990 free_return:
991  re_free (sifted_states);
992  re_free (lim_states);
993  return ret;
994}
995
996/* Acquire an initial state and return it.
997   We must select appropriate initial state depending on the context,
998   since initial states may have constraints like "\<", "^", etc..  */
999
1000static inline re_dfastate_t *
1001acquire_init_state_context (err, mctx, idx)
1002     reg_errcode_t *err;
1003     const re_match_context_t *mctx;
1004     int idx;
1005{
1006  re_dfa_t *const dfa = mctx->dfa;
1007  if (dfa->init_state->has_constraint)
1008    {
1009      unsigned int context;
1010      context = re_string_context_at (&mctx->input, idx - 1, mctx->eflags);
1011      if (IS_WORD_CONTEXT (context))
1012	return dfa->init_state_word;
1013      else if (IS_ORDINARY_CONTEXT (context))
1014	return dfa->init_state;
1015      else if (IS_BEGBUF_CONTEXT (context) && IS_NEWLINE_CONTEXT (context))
1016	return dfa->init_state_begbuf;
1017      else if (IS_NEWLINE_CONTEXT (context))
1018	return dfa->init_state_nl;
1019      else if (IS_BEGBUF_CONTEXT (context))
1020	{
1021	  /* It is relatively rare case, then calculate on demand.  */
1022	  return re_acquire_state_context (err, dfa,
1023					   dfa->init_state->entrance_nodes,
1024					   context);
1025	}
1026      else
1027	/* Must not happen?  */
1028	return dfa->init_state;
1029    }
1030  else
1031    return dfa->init_state;
1032}
1033
1034/* Check whether the regular expression match input string INPUT or not,
1035   and return the index where the matching end, return -1 if not match,
1036   or return -2 in case of an error.
1037   FL_LONGEST_MATCH means we want the POSIX longest matching.
1038   If P_MATCH_FIRST is not NULL, and the match fails, it is set to the
1039   next place where we may want to try matching.
1040   Note that the matcher assume that the maching starts from the current
1041   index of the buffer.  */
1042
1043static int
1044check_matching (mctx, fl_longest_match, p_match_first)
1045    re_match_context_t *mctx;
1046    int fl_longest_match;
1047    int *p_match_first;
1048{
1049  re_dfa_t *const dfa = mctx->dfa;
1050  reg_errcode_t err;
1051  int match = 0;
1052  int match_last = -1;
1053  int cur_str_idx = re_string_cur_idx (&mctx->input);
1054  re_dfastate_t *cur_state;
1055  int at_init_state = p_match_first != NULL;
1056  int next_start_idx = cur_str_idx;
1057
1058  err = REG_NOERROR;
1059  cur_state = acquire_init_state_context (&err, mctx, cur_str_idx);
1060  /* An initial state must not be NULL (invalid).  */
1061  if (BE (cur_state == NULL, 0))
1062    {
1063      assert (err == REG_ESPACE);
1064      return -2;
1065    }
1066
1067  if (mctx->state_log != NULL)
1068    {
1069      mctx->state_log[cur_str_idx] = cur_state;
1070
1071      /* Check OP_OPEN_SUBEXP in the initial state in case that we use them
1072	 later.  E.g. Processing back references.  */
1073      if (BE (dfa->nbackref, 0))
1074	{
1075	  at_init_state = 0;
1076	  err = check_subexp_matching_top (mctx, &cur_state->nodes, 0);
1077	  if (BE (err != REG_NOERROR, 0))
1078	    return err;
1079
1080	  if (cur_state->has_backref)
1081	    {
1082	      err = transit_state_bkref (mctx, &cur_state->nodes);
1083	      if (BE (err != REG_NOERROR, 0))
1084	        return err;
1085	    }
1086	}
1087    }
1088
1089  /* If the RE accepts NULL string.  */
1090  if (BE (cur_state->halt, 0))
1091    {
1092      if (!cur_state->has_constraint
1093	  || check_halt_state_context (mctx, cur_state, cur_str_idx))
1094	{
1095	  if (!fl_longest_match)
1096	    return cur_str_idx;
1097	  else
1098	    {
1099	      match_last = cur_str_idx;
1100	      match = 1;
1101	    }
1102	}
1103    }
1104
1105  while (!re_string_eoi (&mctx->input))
1106    {
1107      re_dfastate_t *old_state = cur_state;
1108      int next_char_idx = re_string_cur_idx (&mctx->input) + 1;
1109
1110      if (BE (next_char_idx >= mctx->input.bufs_len, 0)
1111          || (BE (next_char_idx >= mctx->input.valid_len, 0)
1112              && mctx->input.valid_len < mctx->input.len))
1113        {
1114          err = extend_buffers (mctx);
1115          if (BE (err != REG_NOERROR, 0))
1116	    {
1117	      assert (err == REG_ESPACE);
1118	      return -2;
1119	    }
1120        }
1121
1122      cur_state = transit_state (&err, mctx, cur_state);
1123      if (mctx->state_log != NULL)
1124	cur_state = merge_state_with_log (&err, mctx, cur_state);
1125
1126      if (cur_state == NULL)
1127	{
1128	  /* Reached the invalid state or an error.  Try to recover a valid
1129	     state using the state log, if available and if we have not
1130	     already found a valid (even if not the longest) match.  */
1131	  if (BE (err != REG_NOERROR, 0))
1132	    return -2;
1133
1134	  if (mctx->state_log == NULL
1135	      || (match && !fl_longest_match)
1136	      || (cur_state = find_recover_state (&err, mctx)) == NULL)
1137	    break;
1138	}
1139
1140      if (BE (at_init_state, 0))
1141	{
1142	  if (old_state == cur_state)
1143	    next_start_idx = next_char_idx;
1144	  else
1145	    at_init_state = 0;
1146	}
1147
1148      if (cur_state->halt)
1149	{
1150	  /* Reached a halt state.
1151	     Check the halt state can satisfy the current context.  */
1152	  if (!cur_state->has_constraint
1153	      || check_halt_state_context (mctx, cur_state,
1154					   re_string_cur_idx (&mctx->input)))
1155	    {
1156	      /* We found an appropriate halt state.  */
1157	      match_last = re_string_cur_idx (&mctx->input);
1158	      match = 1;
1159
1160	      /* We found a match, do not modify match_first below.  */
1161	      p_match_first = NULL;
1162	      if (!fl_longest_match)
1163		break;
1164	    }
1165	}
1166    }
1167
1168  if (p_match_first)
1169    *p_match_first += next_start_idx;
1170
1171  return match_last;
1172}
1173
1174/* Check NODE match the current context.  */
1175
1176static int check_halt_node_context (dfa, node, context)
1177    const re_dfa_t *dfa;
1178    int node;
1179    unsigned int context;
1180{
1181  re_token_type_t type = dfa->nodes[node].type;
1182  unsigned int constraint = dfa->nodes[node].constraint;
1183  if (type != END_OF_RE)
1184    return 0;
1185  if (!constraint)
1186    return 1;
1187  if (NOT_SATISFY_NEXT_CONSTRAINT (constraint, context))
1188    return 0;
1189  return 1;
1190}
1191
1192/* Check the halt state STATE match the current context.
1193   Return 0 if not match, if the node, STATE has, is a halt node and
1194   match the context, return the node.  */
1195
1196static int
1197check_halt_state_context (mctx, state, idx)
1198    const re_match_context_t *mctx;
1199    const re_dfastate_t *state;
1200    int idx;
1201{
1202  int i;
1203  unsigned int context;
1204#ifdef DEBUG
1205  assert (state->halt);
1206#endif
1207  context = re_string_context_at (&mctx->input, idx, mctx->eflags);
1208  for (i = 0; i < state->nodes.nelem; ++i)
1209    if (check_halt_node_context (mctx->dfa, state->nodes.elems[i], context))
1210      return state->nodes.elems[i];
1211  return 0;
1212}
1213
1214/* Compute the next node to which "NFA" transit from NODE("NFA" is a NFA
1215   corresponding to the DFA).
1216   Return the destination node, and update EPS_VIA_NODES, return -1 in case
1217   of errors.  */
1218
1219static int
1220proceed_next_node (mctx, nregs, regs, pidx, node, eps_via_nodes, fs)
1221    const re_match_context_t *mctx;
1222    regmatch_t *regs;
1223    int nregs, *pidx, node;
1224    re_node_set *eps_via_nodes;
1225    struct re_fail_stack_t *fs;
1226{
1227  re_dfa_t *const dfa = mctx->dfa;
1228  int i, err, dest_node;
1229  dest_node = -1;
1230  if (IS_EPSILON_NODE (dfa->nodes[node].type))
1231    {
1232      re_node_set *cur_nodes = &mctx->state_log[*pidx]->nodes;
1233      re_node_set *edests = &dfa->edests[node];
1234      int dest_node;
1235      err = re_node_set_insert (eps_via_nodes, node);
1236      if (BE (err < 0, 0))
1237	return -2;
1238      /* Pick up a valid destination, or return -1 if none is found.  */
1239      for (dest_node = -1, i = 0; i < edests->nelem; ++i)
1240	{
1241	  int candidate = edests->elems[i];
1242	  if (!re_node_set_contains (cur_nodes, candidate))
1243	    continue;
1244          if (dest_node == -1)
1245	    dest_node = candidate;
1246
1247          else
1248	    {
1249	      /* In order to avoid infinite loop like "(a*)*", return the second
1250	         epsilon-transition if the first was already considered.  */
1251	      if (re_node_set_contains (eps_via_nodes, dest_node))
1252	        return candidate;
1253
1254	      /* Otherwise, push the second epsilon-transition on the fail stack.  */
1255	      else if (fs != NULL
1256		       && push_fail_stack (fs, *pidx, candidate, nregs, regs,
1257				           eps_via_nodes))
1258		return -2;
1259
1260	      /* We know we are going to exit.  */
1261	      break;
1262	    }
1263	}
1264      return dest_node;
1265    }
1266  else
1267    {
1268      int naccepted = 0;
1269      re_token_type_t type = dfa->nodes[node].type;
1270
1271#ifdef RE_ENABLE_I18N
1272      if (dfa->nodes[node].accept_mb)
1273	naccepted = check_node_accept_bytes (dfa, node, &mctx->input, *pidx);
1274      else
1275#endif /* RE_ENABLE_I18N */
1276      if (type == OP_BACK_REF)
1277	{
1278	  int subexp_idx = dfa->nodes[node].opr.idx + 1;
1279	  naccepted = regs[subexp_idx].rm_eo - regs[subexp_idx].rm_so;
1280	  if (fs != NULL)
1281	    {
1282	      if (regs[subexp_idx].rm_so == -1 || regs[subexp_idx].rm_eo == -1)
1283		return -1;
1284	      else if (naccepted)
1285		{
1286		  char *buf = (char *) re_string_get_buffer (&mctx->input);
1287		  if (memcmp (buf + regs[subexp_idx].rm_so, buf + *pidx,
1288			      naccepted) != 0)
1289		    return -1;
1290		}
1291	    }
1292
1293	  if (naccepted == 0)
1294	    {
1295	      err = re_node_set_insert (eps_via_nodes, node);
1296	      if (BE (err < 0, 0))
1297		return -2;
1298	      dest_node = dfa->edests[node].elems[0];
1299	      if (re_node_set_contains (&mctx->state_log[*pidx]->nodes,
1300					dest_node))
1301		return dest_node;
1302	    }
1303	}
1304
1305      if (naccepted != 0
1306	  || check_node_accept (mctx, dfa->nodes + node, *pidx))
1307	{
1308	  dest_node = dfa->nexts[node];
1309	  *pidx = (naccepted == 0) ? *pidx + 1 : *pidx + naccepted;
1310	  if (fs && (*pidx > mctx->match_last || mctx->state_log[*pidx] == NULL
1311		     || !re_node_set_contains (&mctx->state_log[*pidx]->nodes,
1312					       dest_node)))
1313	    return -1;
1314	  re_node_set_empty (eps_via_nodes);
1315	  return dest_node;
1316	}
1317    }
1318  return -1;
1319}
1320
1321static reg_errcode_t
1322push_fail_stack (fs, str_idx, dest_node, nregs, regs, eps_via_nodes)
1323     struct re_fail_stack_t *fs;
1324     int str_idx, dest_node, nregs;
1325     regmatch_t *regs;
1326     re_node_set *eps_via_nodes;
1327{
1328  reg_errcode_t err;
1329  int num = fs->num++;
1330  if (fs->num == fs->alloc)
1331    {
1332      struct re_fail_stack_ent_t *new_array;
1333      new_array = realloc (fs->stack, (sizeof (struct re_fail_stack_ent_t)
1334				       * fs->alloc * 2));
1335      if (new_array == NULL)
1336	return REG_ESPACE;
1337      fs->alloc *= 2;
1338      fs->stack = new_array;
1339    }
1340  fs->stack[num].idx = str_idx;
1341  fs->stack[num].node = dest_node;
1342  fs->stack[num].regs = re_malloc (regmatch_t, nregs);
1343  if (fs->stack[num].regs == NULL)
1344    return REG_ESPACE;
1345  memcpy (fs->stack[num].regs, regs, sizeof (regmatch_t) * nregs);
1346  err = re_node_set_init_copy (&fs->stack[num].eps_via_nodes, eps_via_nodes);
1347  return err;
1348}
1349
1350static int
1351pop_fail_stack (fs, pidx, nregs, regs, eps_via_nodes)
1352     struct re_fail_stack_t *fs;
1353     int *pidx, nregs;
1354     regmatch_t *regs;
1355     re_node_set *eps_via_nodes;
1356{
1357  int num = --fs->num;
1358  assert (num >= 0);
1359  *pidx = fs->stack[num].idx;
1360  memcpy (regs, fs->stack[num].regs, sizeof (regmatch_t) * nregs);
1361  re_node_set_free (eps_via_nodes);
1362  re_free (fs->stack[num].regs);
1363  *eps_via_nodes = fs->stack[num].eps_via_nodes;
1364  return fs->stack[num].node;
1365}
1366
1367/* Set the positions where the subexpressions are starts/ends to registers
1368   PMATCH.
1369   Note: We assume that pmatch[0] is already set, and
1370   pmatch[i].rm_so == pmatch[i].rm_eo == -1 for 0 < i < nmatch.  */
1371
1372static reg_errcode_t
1373set_regs (preg, mctx, nmatch, pmatch, fl_backtrack)
1374     const regex_t *preg;
1375     const re_match_context_t *mctx;
1376     size_t nmatch;
1377     regmatch_t *pmatch;
1378     int fl_backtrack;
1379{
1380  re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
1381  int idx, cur_node;
1382  re_node_set eps_via_nodes;
1383  struct re_fail_stack_t *fs;
1384  struct re_fail_stack_t fs_body = { 0, 2, NULL };
1385  regmatch_t *prev_idx_match;
1386
1387#ifdef DEBUG
1388  assert (nmatch > 1);
1389  assert (mctx->state_log != NULL);
1390#endif
1391  if (fl_backtrack)
1392    {
1393      fs = &fs_body;
1394      fs->stack = re_malloc (struct re_fail_stack_ent_t, fs->alloc);
1395      if (fs->stack == NULL)
1396	return REG_ESPACE;
1397    }
1398  else
1399    fs = NULL;
1400
1401  cur_node = dfa->init_node;
1402  re_node_set_init_empty (&eps_via_nodes);
1403
1404  prev_idx_match = (regmatch_t *) alloca (sizeof (regmatch_t) * nmatch);
1405  memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch);
1406
1407  for (idx = pmatch[0].rm_so; idx <= pmatch[0].rm_eo ;)
1408    {
1409      update_regs (dfa, pmatch, prev_idx_match, cur_node, idx, nmatch);
1410
1411      if (idx == pmatch[0].rm_eo && cur_node == mctx->last_node)
1412	{
1413	  int reg_idx;
1414	  if (fs)
1415	    {
1416	      for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
1417		if (pmatch[reg_idx].rm_so > -1 && pmatch[reg_idx].rm_eo == -1)
1418		  break;
1419	      if (reg_idx == nmatch)
1420		{
1421		  re_node_set_free (&eps_via_nodes);
1422		  return free_fail_stack_return (fs);
1423		}
1424	      cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,
1425					 &eps_via_nodes);
1426	    }
1427	  else
1428	    {
1429	      re_node_set_free (&eps_via_nodes);
1430	      return REG_NOERROR;
1431	    }
1432	}
1433
1434      /* Proceed to next node.  */
1435      cur_node = proceed_next_node (mctx, nmatch, pmatch, &idx, cur_node,
1436				    &eps_via_nodes, fs);
1437
1438      if (BE (cur_node < 0, 0))
1439	{
1440	  if (BE (cur_node == -2, 0))
1441	    {
1442	      re_node_set_free (&eps_via_nodes);
1443	      free_fail_stack_return (fs);
1444	      return REG_ESPACE;
1445	    }
1446	  if (fs)
1447	    cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,
1448				       &eps_via_nodes);
1449	  else
1450	    {
1451	      re_node_set_free (&eps_via_nodes);
1452	      return REG_NOMATCH;
1453	    }
1454	}
1455    }
1456  re_node_set_free (&eps_via_nodes);
1457  return free_fail_stack_return (fs);
1458}
1459
1460static reg_errcode_t
1461free_fail_stack_return (fs)
1462     struct re_fail_stack_t *fs;
1463{
1464  if (fs)
1465    {
1466      int fs_idx;
1467      for (fs_idx = 0; fs_idx < fs->num; ++fs_idx)
1468	{
1469	  re_node_set_free (&fs->stack[fs_idx].eps_via_nodes);
1470	  re_free (fs->stack[fs_idx].regs);
1471	}
1472      re_free (fs->stack);
1473    }
1474  return REG_NOERROR;
1475}
1476
1477static void
1478update_regs (dfa, pmatch, prev_idx_match, cur_node, cur_idx, nmatch)
1479     re_dfa_t *dfa;
1480     regmatch_t *pmatch, *prev_idx_match;
1481     int cur_node, cur_idx, nmatch;
1482{
1483  int type = dfa->nodes[cur_node].type;
1484  if (type == OP_OPEN_SUBEXP)
1485    {
1486      int reg_num = dfa->nodes[cur_node].opr.idx + 1;
1487
1488      /* We are at the first node of this sub expression.  */
1489      if (reg_num < nmatch)
1490	{
1491	  pmatch[reg_num].rm_so = cur_idx;
1492	  pmatch[reg_num].rm_eo = -1;
1493	}
1494    }
1495  else if (type == OP_CLOSE_SUBEXP)
1496    {
1497      int reg_num = dfa->nodes[cur_node].opr.idx + 1;
1498      if (reg_num < nmatch)
1499	{
1500	  /* We are at the last node of this sub expression.  */
1501	  if (pmatch[reg_num].rm_so < cur_idx)
1502	    {
1503	      pmatch[reg_num].rm_eo = cur_idx;
1504	      /* This is a non-empty match or we are not inside an optional
1505		 subexpression.  Accept this right away.  */
1506	      memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch);
1507	    }
1508	  else
1509	    {
1510	      if (dfa->nodes[cur_node].opt_subexp
1511		  && prev_idx_match[reg_num].rm_so != -1)
1512		/* We transited through an empty match for an optional
1513		   subexpression, like (a?)*, and this is not the subexp's
1514		   first match.  Copy back the old content of the registers
1515		   so that matches of an inner subexpression are undone as
1516		   well, like in ((a?))*.  */
1517		memcpy (pmatch, prev_idx_match, sizeof (regmatch_t) * nmatch);
1518	      else
1519		/* We completed a subexpression, but it may be part of
1520		   an optional one, so do not update PREV_IDX_MATCH.  */
1521		pmatch[reg_num].rm_eo = cur_idx;
1522	    }
1523	}
1524    }
1525}
1526
1527/* This function checks the STATE_LOG from the SCTX->last_str_idx to 0
1528   and sift the nodes in each states according to the following rules.
1529   Updated state_log will be wrote to STATE_LOG.
1530
1531   Rules: We throw away the Node `a' in the STATE_LOG[STR_IDX] if...
1532     1. When STR_IDX == MATCH_LAST(the last index in the state_log):
1533	If `a' isn't the LAST_NODE and `a' can't epsilon transit to
1534	the LAST_NODE, we throw away the node `a'.
1535     2. When 0 <= STR_IDX < MATCH_LAST and `a' accepts
1536	string `s' and transit to `b':
1537	i. If 'b' isn't in the STATE_LOG[STR_IDX+strlen('s')], we throw
1538	   away the node `a'.
1539	ii. If 'b' is in the STATE_LOG[STR_IDX+strlen('s')] but 'b' is
1540	    thrown away, we throw away the node `a'.
1541     3. When 0 <= STR_IDX < MATCH_LAST and 'a' epsilon transit to 'b':
1542	i. If 'b' isn't in the STATE_LOG[STR_IDX], we throw away the
1543	   node `a'.
1544	ii. If 'b' is in the STATE_LOG[STR_IDX] but 'b' is thrown away,
1545	    we throw away the node `a'.  */
1546
1547#define STATE_NODE_CONTAINS(state,node) \
1548  ((state) != NULL && re_node_set_contains (&(state)->nodes, node))
1549
1550static reg_errcode_t
1551sift_states_backward (mctx, sctx)
1552     re_match_context_t *mctx;
1553     re_sift_context_t *sctx;
1554{
1555  reg_errcode_t err;
1556  int null_cnt = 0;
1557  int str_idx = sctx->last_str_idx;
1558  re_node_set cur_dest;
1559
1560#ifdef DEBUG
1561  assert (mctx->state_log != NULL && mctx->state_log[str_idx] != NULL);
1562#endif
1563
1564  /* Build sifted state_log[str_idx].  It has the nodes which can epsilon
1565     transit to the last_node and the last_node itself.  */
1566  err = re_node_set_init_1 (&cur_dest, sctx->last_node);
1567  if (BE (err != REG_NOERROR, 0))
1568    return err;
1569  err = update_cur_sifted_state (mctx, sctx, str_idx, &cur_dest);
1570  if (BE (err != REG_NOERROR, 0))
1571    goto free_return;
1572
1573  /* Then check each states in the state_log.  */
1574  while (str_idx > 0)
1575    {
1576      /* Update counters.  */
1577      null_cnt = (sctx->sifted_states[str_idx] == NULL) ? null_cnt + 1 : 0;
1578      if (null_cnt > mctx->max_mb_elem_len)
1579	{
1580	  memset (sctx->sifted_states, '\0',
1581		  sizeof (re_dfastate_t *) * str_idx);
1582	  re_node_set_free (&cur_dest);
1583	  return REG_NOERROR;
1584	}
1585      re_node_set_empty (&cur_dest);
1586      --str_idx;
1587
1588      if (mctx->state_log[str_idx])
1589	{
1590	  err = build_sifted_states (mctx, sctx, str_idx, &cur_dest);
1591          if (BE (err != REG_NOERROR, 0))
1592	    goto free_return;
1593	}
1594
1595      /* Add all the nodes which satisfy the following conditions:
1596	 - It can epsilon transit to a node in CUR_DEST.
1597	 - It is in CUR_SRC.
1598	 And update state_log.  */
1599      err = update_cur_sifted_state (mctx, sctx, str_idx, &cur_dest);
1600      if (BE (err != REG_NOERROR, 0))
1601	goto free_return;
1602    }
1603  err = REG_NOERROR;
1604 free_return:
1605  re_node_set_free (&cur_dest);
1606  return err;
1607}
1608
1609static reg_errcode_t
1610build_sifted_states (mctx, sctx, str_idx, cur_dest)
1611     re_match_context_t *mctx;
1612     re_sift_context_t *sctx;
1613     int str_idx;
1614     re_node_set *cur_dest;
1615{
1616  re_dfa_t *const dfa = mctx->dfa;
1617  re_node_set *cur_src = &mctx->state_log[str_idx]->non_eps_nodes;
1618  int i;
1619
1620  /* Then build the next sifted state.
1621     We build the next sifted state on `cur_dest', and update
1622     `sifted_states[str_idx]' with `cur_dest'.
1623     Note:
1624     `cur_dest' is the sifted state from `state_log[str_idx + 1]'.
1625     `cur_src' points the node_set of the old `state_log[str_idx]'
1626     (with the epsilon nodes pre-filtered out).  */
1627  for (i = 0; i < cur_src->nelem; i++)
1628    {
1629      int prev_node = cur_src->elems[i];
1630      int naccepted = 0;
1631      int ret;
1632
1633#ifdef DEBUG
1634      re_token_type_t type = dfa->nodes[prev_node].type;
1635      assert (!IS_EPSILON_NODE (type));
1636#endif
1637#ifdef RE_ENABLE_I18N
1638      /* If the node may accept `multi byte'.  */
1639      if (dfa->nodes[prev_node].accept_mb)
1640	naccepted = sift_states_iter_mb (mctx, sctx, prev_node,
1641					 str_idx, sctx->last_str_idx);
1642#endif /* RE_ENABLE_I18N */
1643
1644      /* We don't check backreferences here.
1645	 See update_cur_sifted_state().  */
1646      if (!naccepted
1647	  && check_node_accept (mctx, dfa->nodes + prev_node, str_idx)
1648	  && STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + 1],
1649				  dfa->nexts[prev_node]))
1650	naccepted = 1;
1651
1652      if (naccepted == 0)
1653	continue;
1654
1655      if (sctx->limits.nelem)
1656	{
1657	  int to_idx = str_idx + naccepted;
1658	  if (check_dst_limits (mctx, &sctx->limits,
1659				dfa->nexts[prev_node], to_idx,
1660				prev_node, str_idx))
1661	    continue;
1662	}
1663      ret = re_node_set_insert (cur_dest, prev_node);
1664      if (BE (ret == -1, 0))
1665	return REG_ESPACE;
1666    }
1667
1668  return REG_NOERROR;
1669}
1670
1671/* Helper functions.  */
1672
1673static reg_errcode_t
1674clean_state_log_if_needed (mctx, next_state_log_idx)
1675    re_match_context_t *mctx;
1676    int next_state_log_idx;
1677{
1678  int top = mctx->state_log_top;
1679
1680  if (next_state_log_idx >= mctx->input.bufs_len
1681      || (next_state_log_idx >= mctx->input.valid_len
1682	  && mctx->input.valid_len < mctx->input.len))
1683    {
1684      reg_errcode_t err;
1685      err = extend_buffers (mctx);
1686      if (BE (err != REG_NOERROR, 0))
1687	return err;
1688    }
1689
1690  if (top < next_state_log_idx)
1691    {
1692      memset (mctx->state_log + top + 1, '\0',
1693	      sizeof (re_dfastate_t *) * (next_state_log_idx - top));
1694      mctx->state_log_top = next_state_log_idx;
1695    }
1696  return REG_NOERROR;
1697}
1698
1699static reg_errcode_t
1700merge_state_array (dfa, dst, src, num)
1701     re_dfa_t *dfa;
1702     re_dfastate_t **dst;
1703     re_dfastate_t **src;
1704     int num;
1705{
1706  int st_idx;
1707  reg_errcode_t err;
1708  for (st_idx = 0; st_idx < num; ++st_idx)
1709    {
1710      if (dst[st_idx] == NULL)
1711	dst[st_idx] = src[st_idx];
1712      else if (src[st_idx] != NULL)
1713	{
1714	  re_node_set merged_set;
1715	  err = re_node_set_init_union (&merged_set, &dst[st_idx]->nodes,
1716					&src[st_idx]->nodes);
1717	  if (BE (err != REG_NOERROR, 0))
1718	    return err;
1719	  dst[st_idx] = re_acquire_state (&err, dfa, &merged_set);
1720	  re_node_set_free (&merged_set);
1721	  if (BE (err != REG_NOERROR, 0))
1722	    return err;
1723	}
1724    }
1725  return REG_NOERROR;
1726}
1727
1728static reg_errcode_t
1729update_cur_sifted_state (mctx, sctx, str_idx, dest_nodes)
1730     re_match_context_t *mctx;
1731     re_sift_context_t *sctx;
1732     int str_idx;
1733     re_node_set *dest_nodes;
1734{
1735  re_dfa_t *const dfa = mctx->dfa;
1736  reg_errcode_t err;
1737  const re_node_set *candidates;
1738  candidates = ((mctx->state_log[str_idx] == NULL) ? NULL
1739		: &mctx->state_log[str_idx]->nodes);
1740
1741  if (dest_nodes->nelem == 0)
1742    sctx->sifted_states[str_idx] = NULL;
1743  else
1744    {
1745      if (candidates)
1746	{
1747	  /* At first, add the nodes which can epsilon transit to a node in
1748	     DEST_NODE.  */
1749	  err = add_epsilon_src_nodes (dfa, dest_nodes, candidates);
1750	  if (BE (err != REG_NOERROR, 0))
1751	    return err;
1752
1753	  /* Then, check the limitations in the current sift_context.  */
1754	  if (sctx->limits.nelem)
1755	    {
1756	      err = check_subexp_limits (dfa, dest_nodes, candidates, &sctx->limits,
1757					 mctx->bkref_ents, str_idx);
1758	      if (BE (err != REG_NOERROR, 0))
1759		return err;
1760	    }
1761	}
1762
1763      sctx->sifted_states[str_idx] = re_acquire_state (&err, dfa, dest_nodes);
1764      if (BE (err != REG_NOERROR, 0))
1765	return err;
1766    }
1767
1768  if (candidates && mctx->state_log[str_idx]->has_backref)
1769    {
1770      err = sift_states_bkref (mctx, sctx, str_idx, candidates);
1771      if (BE (err != REG_NOERROR, 0))
1772	return err;
1773    }
1774  return REG_NOERROR;
1775}
1776
1777static reg_errcode_t
1778add_epsilon_src_nodes (dfa, dest_nodes, candidates)
1779     re_dfa_t *dfa;
1780     re_node_set *dest_nodes;
1781     const re_node_set *candidates;
1782{
1783  reg_errcode_t err = REG_NOERROR;
1784  int i;
1785
1786  re_dfastate_t *state = re_acquire_state (&err, dfa, dest_nodes);
1787  if (BE (err != REG_NOERROR, 0))
1788    return err;
1789
1790  if (!state->inveclosure.alloc)
1791    {
1792      err = re_node_set_alloc (&state->inveclosure, dest_nodes->nelem);
1793      if (BE (err != REG_NOERROR, 0))
1794        return REG_ESPACE;
1795      for (i = 0; i < dest_nodes->nelem; i++)
1796        re_node_set_merge (&state->inveclosure,
1797			   dfa->inveclosures + dest_nodes->elems[i]);
1798    }
1799  return re_node_set_add_intersect (dest_nodes, candidates,
1800				    &state->inveclosure);
1801}
1802
1803static reg_errcode_t
1804sub_epsilon_src_nodes (dfa, node, dest_nodes, candidates)
1805     re_dfa_t *dfa;
1806     int node;
1807     re_node_set *dest_nodes;
1808     const re_node_set *candidates;
1809{
1810    int ecl_idx;
1811    reg_errcode_t err;
1812    re_node_set *inv_eclosure = dfa->inveclosures + node;
1813    re_node_set except_nodes;
1814    re_node_set_init_empty (&except_nodes);
1815    for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx)
1816      {
1817	int cur_node = inv_eclosure->elems[ecl_idx];
1818	if (cur_node == node)
1819	  continue;
1820	if (IS_EPSILON_NODE (dfa->nodes[cur_node].type))
1821	  {
1822	    int edst1 = dfa->edests[cur_node].elems[0];
1823	    int edst2 = ((dfa->edests[cur_node].nelem > 1)
1824			 ? dfa->edests[cur_node].elems[1] : -1);
1825	    if ((!re_node_set_contains (inv_eclosure, edst1)
1826		 && re_node_set_contains (dest_nodes, edst1))
1827		|| (edst2 > 0
1828		    && !re_node_set_contains (inv_eclosure, edst2)
1829		    && re_node_set_contains (dest_nodes, edst2)))
1830	      {
1831		err = re_node_set_add_intersect (&except_nodes, candidates,
1832						 dfa->inveclosures + cur_node);
1833		if (BE (err != REG_NOERROR, 0))
1834		  {
1835		    re_node_set_free (&except_nodes);
1836		    return err;
1837		  }
1838	      }
1839	  }
1840      }
1841    for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx)
1842      {
1843	int cur_node = inv_eclosure->elems[ecl_idx];
1844	if (!re_node_set_contains (&except_nodes, cur_node))
1845	  {
1846	    int idx = re_node_set_contains (dest_nodes, cur_node) - 1;
1847	    re_node_set_remove_at (dest_nodes, idx);
1848	  }
1849      }
1850    re_node_set_free (&except_nodes);
1851    return REG_NOERROR;
1852}
1853
1854static int
1855check_dst_limits (mctx, limits, dst_node, dst_idx, src_node, src_idx)
1856     re_match_context_t *mctx;
1857     re_node_set *limits;
1858     int dst_node, dst_idx, src_node, src_idx;
1859{
1860  re_dfa_t *const dfa = mctx->dfa;
1861  int lim_idx, src_pos, dst_pos;
1862
1863  int dst_bkref_idx = search_cur_bkref_entry (mctx, dst_idx);
1864  int src_bkref_idx = search_cur_bkref_entry (mctx, src_idx);
1865  for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx)
1866    {
1867      int subexp_idx;
1868      struct re_backref_cache_entry *ent;
1869      ent = mctx->bkref_ents + limits->elems[lim_idx];
1870      subexp_idx = dfa->nodes[ent->node].opr.idx;
1871
1872      dst_pos = check_dst_limits_calc_pos (mctx, limits->elems[lim_idx],
1873					   subexp_idx, dst_node, dst_idx,
1874					   dst_bkref_idx);
1875      src_pos = check_dst_limits_calc_pos (mctx, limits->elems[lim_idx],
1876					   subexp_idx, src_node, src_idx,
1877					   src_bkref_idx);
1878
1879      /* In case of:
1880	 <src> <dst> ( <subexp> )
1881	 ( <subexp> ) <src> <dst>
1882	 ( <subexp1> <src> <subexp2> <dst> <subexp3> )  */
1883      if (src_pos == dst_pos)
1884	continue; /* This is unrelated limitation.  */
1885      else
1886	return 1;
1887    }
1888  return 0;
1889}
1890
1891static int
1892check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx, from_node, bkref_idx)
1893     re_match_context_t *mctx;
1894     int boundaries, subexp_idx, from_node, bkref_idx;
1895{
1896  re_dfa_t *const dfa = mctx->dfa;
1897  re_node_set *eclosures = dfa->eclosures + from_node;
1898  int node_idx;
1899
1900  /* Else, we are on the boundary: examine the nodes on the epsilon
1901     closure.  */
1902  for (node_idx = 0; node_idx < eclosures->nelem; ++node_idx)
1903    {
1904      int node = eclosures->elems[node_idx];
1905      switch (dfa->nodes[node].type)
1906	{
1907	case OP_BACK_REF:
1908	  if (bkref_idx != -1)
1909	    {
1910	      struct re_backref_cache_entry *ent = mctx->bkref_ents + bkref_idx;
1911	      do
1912	        {
1913		  int dst, cpos;
1914
1915		  if (ent->node != node)
1916		    continue;
1917
1918		  if (subexp_idx <= 8 * sizeof (ent->eps_reachable_subexps_map)
1919		      && !(ent->eps_reachable_subexps_map & (1 << subexp_idx)))
1920		    continue;
1921
1922		  /* Recurse trying to reach the OP_OPEN_SUBEXP and
1923		     OP_CLOSE_SUBEXP cases below.  But, if the
1924		     destination node is the same node as the source
1925		     node, don't recurse because it would cause an
1926		     infinite loop: a regex that exhibits this behavior
1927		     is ()\1*\1*  */
1928		  dst = dfa->edests[node].elems[0];
1929		  if (dst == from_node)
1930		    {
1931		      if (boundaries & 1)
1932		        return -1;
1933		      else /* if (boundaries & 2) */
1934		        return 0;
1935		    }
1936
1937		  cpos =
1938		    check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx,
1939						 dst, bkref_idx);
1940		  if (cpos == -1 /* && (boundaries & 1) */)
1941		    return -1;
1942		  if (cpos == 0 && (boundaries & 2))
1943		    return 0;
1944
1945		  ent->eps_reachable_subexps_map &= ~(1 << subexp_idx);
1946	        }
1947	      while (ent++->more);
1948	    }
1949	  break;
1950
1951	case OP_OPEN_SUBEXP:
1952	  if ((boundaries & 1) && subexp_idx == dfa->nodes[node].opr.idx)
1953	    return -1;
1954	  break;
1955
1956	case OP_CLOSE_SUBEXP:
1957	  if ((boundaries & 2) && subexp_idx == dfa->nodes[node].opr.idx)
1958	    return 0;
1959	  break;
1960
1961	default:
1962	    break;
1963	}
1964    }
1965
1966  return (boundaries & 2) ? 1 : 0;
1967}
1968
1969static int
1970check_dst_limits_calc_pos (mctx, limit, subexp_idx, from_node, str_idx, bkref_idx)
1971     re_match_context_t *mctx;
1972     int limit, subexp_idx, from_node, str_idx, bkref_idx;
1973{
1974  struct re_backref_cache_entry *lim = mctx->bkref_ents + limit;
1975  int boundaries;
1976
1977  /* If we are outside the range of the subexpression, return -1 or 1.  */
1978  if (str_idx < lim->subexp_from)
1979    return -1;
1980
1981  if (lim->subexp_to < str_idx)
1982    return 1;
1983
1984  /* If we are within the subexpression, return 0.  */
1985  boundaries = (str_idx == lim->subexp_from);
1986  boundaries |= (str_idx == lim->subexp_to) << 1;
1987  if (boundaries == 0)
1988    return 0;
1989
1990  /* Else, examine epsilon closure.  */
1991  return check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx,
1992				      from_node, bkref_idx);
1993}
1994
1995/* Check the limitations of sub expressions LIMITS, and remove the nodes
1996   which are against limitations from DEST_NODES. */
1997
1998static reg_errcode_t
1999check_subexp_limits (dfa, dest_nodes, candidates, limits, bkref_ents, str_idx)
2000     re_dfa_t *dfa;
2001     re_node_set *dest_nodes;
2002     const re_node_set *candidates;
2003     re_node_set *limits;
2004     struct re_backref_cache_entry *bkref_ents;
2005     int str_idx;
2006{
2007  reg_errcode_t err;
2008  int node_idx, lim_idx;
2009
2010  for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx)
2011    {
2012      int subexp_idx;
2013      struct re_backref_cache_entry *ent;
2014      ent = bkref_ents + limits->elems[lim_idx];
2015
2016      if (str_idx <= ent->subexp_from || ent->str_idx < str_idx)
2017	continue; /* This is unrelated limitation.  */
2018
2019      subexp_idx = dfa->nodes[ent->node].opr.idx;
2020      if (ent->subexp_to == str_idx)
2021	{
2022	  int ops_node = -1;
2023	  int cls_node = -1;
2024	  for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
2025	    {
2026	      int node = dest_nodes->elems[node_idx];
2027	      re_token_type_t type = dfa->nodes[node].type;
2028	      if (type == OP_OPEN_SUBEXP
2029		  && subexp_idx == dfa->nodes[node].opr.idx)
2030		ops_node = node;
2031	      else if (type == OP_CLOSE_SUBEXP
2032		       && subexp_idx == dfa->nodes[node].opr.idx)
2033		cls_node = node;
2034	    }
2035
2036	  /* Check the limitation of the open subexpression.  */
2037	  /* Note that (ent->subexp_to = str_idx != ent->subexp_from).  */
2038	  if (ops_node >= 0)
2039	    {
2040	      err = sub_epsilon_src_nodes (dfa, ops_node, dest_nodes,
2041					   candidates);
2042	      if (BE (err != REG_NOERROR, 0))
2043		return err;
2044	    }
2045
2046	  /* Check the limitation of the close subexpression.  */
2047	  if (cls_node >= 0)
2048	    for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
2049	      {
2050		int node = dest_nodes->elems[node_idx];
2051		if (!re_node_set_contains (dfa->inveclosures + node,
2052					   cls_node)
2053		    && !re_node_set_contains (dfa->eclosures + node,
2054					      cls_node))
2055		  {
2056		    /* It is against this limitation.
2057		       Remove it form the current sifted state.  */
2058		    err = sub_epsilon_src_nodes (dfa, node, dest_nodes,
2059						 candidates);
2060		    if (BE (err != REG_NOERROR, 0))
2061		      return err;
2062		    --node_idx;
2063		  }
2064	      }
2065	}
2066      else /* (ent->subexp_to != str_idx)  */
2067	{
2068	  for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
2069	    {
2070	      int node = dest_nodes->elems[node_idx];
2071	      re_token_type_t type = dfa->nodes[node].type;
2072	      if (type == OP_CLOSE_SUBEXP || type == OP_OPEN_SUBEXP)
2073		{
2074		  if (subexp_idx != dfa->nodes[node].opr.idx)
2075		    continue;
2076		  /* It is against this limitation.
2077		     Remove it form the current sifted state.  */
2078		  err = sub_epsilon_src_nodes (dfa, node, dest_nodes,
2079					       candidates);
2080		  if (BE (err != REG_NOERROR, 0))
2081		    return err;
2082		}
2083	    }
2084	}
2085    }
2086  return REG_NOERROR;
2087}
2088
2089static reg_errcode_t
2090sift_states_bkref (mctx, sctx, str_idx, candidates)
2091     re_match_context_t *mctx;
2092     re_sift_context_t *sctx;
2093     int str_idx;
2094     const re_node_set *candidates;
2095{
2096  re_dfa_t *const dfa = mctx->dfa;
2097  reg_errcode_t err;
2098  int node_idx, node;
2099  re_sift_context_t local_sctx;
2100  int first_idx = search_cur_bkref_entry (mctx, str_idx);
2101
2102  if (first_idx == -1)
2103    return REG_NOERROR;
2104
2105  local_sctx.sifted_states = NULL; /* Mark that it hasn't been initialized.  */
2106
2107  for (node_idx = 0; node_idx < candidates->nelem; ++node_idx)
2108    {
2109      int enabled_idx;
2110      re_token_type_t type;
2111      struct re_backref_cache_entry *entry;
2112      node = candidates->elems[node_idx];
2113      type = dfa->nodes[node].type;
2114      /* Avoid infinite loop for the REs like "()\1+".  */
2115      if (node == sctx->last_node && str_idx == sctx->last_str_idx)
2116	continue;
2117      if (type != OP_BACK_REF)
2118	continue;
2119
2120      entry = mctx->bkref_ents + first_idx;
2121      enabled_idx = first_idx;
2122      do
2123	{
2124	  int subexp_len, to_idx, dst_node;
2125	  re_dfastate_t *cur_state;
2126
2127	  if (entry->node != node)
2128	    continue;
2129	  subexp_len = entry->subexp_to - entry->subexp_from;
2130	  to_idx = str_idx + subexp_len;
2131	  dst_node = (subexp_len ? dfa->nexts[node]
2132		      : dfa->edests[node].elems[0]);
2133
2134	  if (to_idx > sctx->last_str_idx
2135	      || sctx->sifted_states[to_idx] == NULL
2136	      || !STATE_NODE_CONTAINS (sctx->sifted_states[to_idx], dst_node)
2137	      || check_dst_limits (mctx, &sctx->limits, node,
2138				   str_idx, dst_node, to_idx))
2139	    continue;
2140
2141	  if (local_sctx.sifted_states == NULL)
2142	    {
2143	      local_sctx = *sctx;
2144	      err = re_node_set_init_copy (&local_sctx.limits, &sctx->limits);
2145	      if (BE (err != REG_NOERROR, 0))
2146		goto free_return;
2147	    }
2148	  local_sctx.last_node = node;
2149	  local_sctx.last_str_idx = str_idx;
2150	  err = re_node_set_insert (&local_sctx.limits, enabled_idx);
2151	  if (BE (err < 0, 0))
2152	    {
2153	      err = REG_ESPACE;
2154	      goto free_return;
2155	    }
2156	  cur_state = local_sctx.sifted_states[str_idx];
2157	  err = sift_states_backward (mctx, &local_sctx);
2158	  if (BE (err != REG_NOERROR, 0))
2159	    goto free_return;
2160	  if (sctx->limited_states != NULL)
2161	    {
2162	      err = merge_state_array (dfa, sctx->limited_states,
2163				       local_sctx.sifted_states,
2164				       str_idx + 1);
2165	      if (BE (err != REG_NOERROR, 0))
2166		goto free_return;
2167	    }
2168	  local_sctx.sifted_states[str_idx] = cur_state;
2169	  re_node_set_remove (&local_sctx.limits, enabled_idx);
2170
2171	  /* mctx->bkref_ents may have changed, reload the pointer.  */
2172          entry = mctx->bkref_ents + enabled_idx;
2173	}
2174      while (enabled_idx++, entry++->more);
2175    }
2176  err = REG_NOERROR;
2177 free_return:
2178  if (local_sctx.sifted_states != NULL)
2179    {
2180      re_node_set_free (&local_sctx.limits);
2181    }
2182
2183  return err;
2184}
2185
2186
2187#ifdef RE_ENABLE_I18N
2188static int
2189sift_states_iter_mb (mctx, sctx, node_idx, str_idx, max_str_idx)
2190    const re_match_context_t *mctx;
2191    re_sift_context_t *sctx;
2192    int node_idx, str_idx, max_str_idx;
2193{
2194  re_dfa_t *const dfa = mctx->dfa;
2195  int naccepted;
2196  /* Check the node can accept `multi byte'.  */
2197  naccepted = check_node_accept_bytes (dfa, node_idx, &mctx->input, str_idx);
2198  if (naccepted > 0 && str_idx + naccepted <= max_str_idx &&
2199      !STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + naccepted],
2200			    dfa->nexts[node_idx]))
2201    /* The node can't accept the `multi byte', or the
2202       destination was already thrown away, then the node
2203       could't accept the current input `multi byte'.   */
2204    naccepted = 0;
2205  /* Otherwise, it is sure that the node could accept
2206     `naccepted' bytes input.  */
2207  return naccepted;
2208}
2209#endif /* RE_ENABLE_I18N */
2210
2211
2212/* Functions for state transition.  */
2213
2214/* Return the next state to which the current state STATE will transit by
2215   accepting the current input byte, and update STATE_LOG if necessary.
2216   If STATE can accept a multibyte char/collating element/back reference
2217   update the destination of STATE_LOG.  */
2218
2219static re_dfastate_t *
2220transit_state (err, mctx, state)
2221     reg_errcode_t *err;
2222     re_match_context_t *mctx;
2223     re_dfastate_t *state;
2224{
2225  re_dfastate_t **trtable;
2226  unsigned char ch;
2227
2228#ifdef RE_ENABLE_I18N
2229  /* If the current state can accept multibyte.  */
2230  if (BE (state->accept_mb, 0))
2231    {
2232      *err = transit_state_mb (mctx, state);
2233      if (BE (*err != REG_NOERROR, 0))
2234	return NULL;
2235    }
2236#endif /* RE_ENABLE_I18N */
2237
2238  /* Then decide the next state with the single byte.  */
2239#if 0
2240  if (0)
2241    /* don't use transition table  */
2242    return transit_state_sb (err, mctx, state);
2243#endif
2244
2245  /* Use transition table  */
2246  ch = re_string_fetch_byte (&mctx->input);
2247  for (;;)
2248    {
2249      trtable = state->trtable;
2250      if (BE (trtable != NULL, 1))
2251	return trtable[ch];
2252
2253      trtable = state->word_trtable;
2254      if (BE (trtable != NULL, 1))
2255        {
2256	  unsigned int context;
2257	  context
2258	    = re_string_context_at (&mctx->input,
2259				    re_string_cur_idx (&mctx->input) - 1,
2260				    mctx->eflags);
2261	  if (IS_WORD_CONTEXT (context))
2262	    return trtable[ch + SBC_MAX];
2263	  else
2264	    return trtable[ch];
2265	}
2266
2267      if (!build_trtable (mctx->dfa, state))
2268	{
2269	  *err = REG_ESPACE;
2270	  return NULL;
2271	}
2272
2273      /* Retry, we now have a transition table.  */
2274    }
2275}
2276
2277/* Update the state_log if we need */
2278re_dfastate_t *
2279merge_state_with_log (err, mctx, next_state)
2280     reg_errcode_t *err;
2281     re_match_context_t *mctx;
2282     re_dfastate_t *next_state;
2283{
2284  re_dfa_t *const dfa = mctx->dfa;
2285  int cur_idx = re_string_cur_idx (&mctx->input);
2286
2287  if (cur_idx > mctx->state_log_top)
2288    {
2289      mctx->state_log[cur_idx] = next_state;
2290      mctx->state_log_top = cur_idx;
2291    }
2292  else if (mctx->state_log[cur_idx] == 0)
2293    {
2294      mctx->state_log[cur_idx] = next_state;
2295    }
2296  else
2297    {
2298      re_dfastate_t *pstate;
2299      unsigned int context;
2300      re_node_set next_nodes, *log_nodes, *table_nodes = NULL;
2301      /* If (state_log[cur_idx] != 0), it implies that cur_idx is
2302         the destination of a multibyte char/collating element/
2303         back reference.  Then the next state is the union set of
2304         these destinations and the results of the transition table.  */
2305      pstate = mctx->state_log[cur_idx];
2306      log_nodes = pstate->entrance_nodes;
2307      if (next_state != NULL)
2308        {
2309          table_nodes = next_state->entrance_nodes;
2310          *err = re_node_set_init_union (&next_nodes, table_nodes,
2311					     log_nodes);
2312          if (BE (*err != REG_NOERROR, 0))
2313	    return NULL;
2314        }
2315      else
2316        next_nodes = *log_nodes;
2317      /* Note: We already add the nodes of the initial state,
2318	 then we don't need to add them here.  */
2319
2320      context = re_string_context_at (&mctx->input,
2321				      re_string_cur_idx (&mctx->input) - 1,
2322				      mctx->eflags);
2323      next_state = mctx->state_log[cur_idx]
2324        = re_acquire_state_context (err, dfa, &next_nodes, context);
2325      /* We don't need to check errors here, since the return value of
2326         this function is next_state and ERR is already set.  */
2327
2328      if (table_nodes != NULL)
2329        re_node_set_free (&next_nodes);
2330    }
2331
2332  if (BE (dfa->nbackref, 0) && next_state != NULL)
2333    {
2334      /* Check OP_OPEN_SUBEXP in the current state in case that we use them
2335	 later.  We must check them here, since the back references in the
2336	 next state might use them.  */
2337      *err = check_subexp_matching_top (mctx, &next_state->nodes,
2338					cur_idx);
2339      if (BE (*err != REG_NOERROR, 0))
2340	return NULL;
2341
2342      /* If the next state has back references.  */
2343      if (next_state->has_backref)
2344	{
2345	  *err = transit_state_bkref (mctx, &next_state->nodes);
2346	  if (BE (*err != REG_NOERROR, 0))
2347	    return NULL;
2348	  next_state = mctx->state_log[cur_idx];
2349	}
2350    }
2351
2352  return next_state;
2353}
2354
2355/* Skip bytes in the input that correspond to part of a
2356   multi-byte match, then look in the log for a state
2357   from which to restart matching.  */
2358re_dfastate_t *
2359find_recover_state (err, mctx)
2360     reg_errcode_t *err;
2361     re_match_context_t *mctx;
2362{
2363  re_dfastate_t *cur_state = NULL;
2364  do
2365    {
2366      int max = mctx->state_log_top;
2367      int cur_str_idx = re_string_cur_idx (&mctx->input);
2368
2369      do
2370	{
2371          if (++cur_str_idx > max)
2372            return NULL;
2373          re_string_skip_bytes (&mctx->input, 1);
2374	}
2375      while (mctx->state_log[cur_str_idx] == NULL);
2376
2377      cur_state = merge_state_with_log (err, mctx, NULL);
2378    }
2379  while (err == REG_NOERROR && cur_state == NULL);
2380  return cur_state;
2381}
2382
2383/* Helper functions for transit_state.  */
2384
2385/* From the node set CUR_NODES, pick up the nodes whose types are
2386   OP_OPEN_SUBEXP and which have corresponding back references in the regular
2387   expression. And register them to use them later for evaluating the
2388   correspoding back references.  */
2389
2390static reg_errcode_t
2391check_subexp_matching_top (mctx, cur_nodes, str_idx)
2392     re_match_context_t *mctx;
2393     re_node_set *cur_nodes;
2394     int str_idx;
2395{
2396  re_dfa_t *const dfa = mctx->dfa;
2397  int node_idx;
2398  reg_errcode_t err;
2399
2400  /* TODO: This isn't efficient.
2401	   Because there might be more than one nodes whose types are
2402	   OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all
2403	   nodes.
2404	   E.g. RE: (a){2}  */
2405  for (node_idx = 0; node_idx < cur_nodes->nelem; ++node_idx)
2406    {
2407      int node = cur_nodes->elems[node_idx];
2408      if (dfa->nodes[node].type == OP_OPEN_SUBEXP
2409	  && dfa->nodes[node].opr.idx < (8 * sizeof (dfa->used_bkref_map))
2410	  && dfa->used_bkref_map & (1 << dfa->nodes[node].opr.idx))
2411	{
2412	  err = match_ctx_add_subtop (mctx, node, str_idx);
2413	  if (BE (err != REG_NOERROR, 0))
2414	    return err;
2415	}
2416    }
2417  return REG_NOERROR;
2418}
2419
2420#if 0
2421/* Return the next state to which the current state STATE will transit by
2422   accepting the current input byte.  */
2423
2424static re_dfastate_t *
2425transit_state_sb (err, mctx, state)
2426     reg_errcode_t *err;
2427     re_match_context_t *mctx;
2428     re_dfastate_t *state;
2429{
2430  re_dfa_t *const dfa = mctx->dfa;
2431  re_node_set next_nodes;
2432  re_dfastate_t *next_state;
2433  int node_cnt, cur_str_idx = re_string_cur_idx (&mctx->input);
2434  unsigned int context;
2435
2436  *err = re_node_set_alloc (&next_nodes, state->nodes.nelem + 1);
2437  if (BE (*err != REG_NOERROR, 0))
2438    return NULL;
2439  for (node_cnt = 0; node_cnt < state->nodes.nelem; ++node_cnt)
2440    {
2441      int cur_node = state->nodes.elems[node_cnt];
2442      if (check_node_accept (mctx, dfa->nodes + cur_node, cur_str_idx))
2443	{
2444	  *err = re_node_set_merge (&next_nodes,
2445				    dfa->eclosures + dfa->nexts[cur_node]);
2446	  if (BE (*err != REG_NOERROR, 0))
2447	    {
2448	      re_node_set_free (&next_nodes);
2449	      return NULL;
2450	    }
2451	}
2452    }
2453  context = re_string_context_at (&mctx->input, cur_str_idx, mctx->eflags);
2454  next_state = re_acquire_state_context (err, dfa, &next_nodes, context);
2455  /* We don't need to check errors here, since the return value of
2456     this function is next_state and ERR is already set.  */
2457
2458  re_node_set_free (&next_nodes);
2459  re_string_skip_bytes (&mctx->input, 1);
2460  return next_state;
2461}
2462#endif
2463
2464#ifdef RE_ENABLE_I18N
2465static reg_errcode_t
2466transit_state_mb (mctx, pstate)
2467    re_match_context_t *mctx;
2468    re_dfastate_t *pstate;
2469{
2470  re_dfa_t *const dfa = mctx->dfa;
2471  reg_errcode_t err;
2472  int i;
2473
2474  for (i = 0; i < pstate->nodes.nelem; ++i)
2475    {
2476      re_node_set dest_nodes, *new_nodes;
2477      int cur_node_idx = pstate->nodes.elems[i];
2478      int naccepted, dest_idx;
2479      unsigned int context;
2480      re_dfastate_t *dest_state;
2481
2482      if (!dfa->nodes[cur_node_idx].accept_mb)
2483        continue;
2484
2485      if (dfa->nodes[cur_node_idx].constraint)
2486	{
2487	  context = re_string_context_at (&mctx->input,
2488					  re_string_cur_idx (&mctx->input),
2489					  mctx->eflags);
2490	  if (NOT_SATISFY_NEXT_CONSTRAINT (dfa->nodes[cur_node_idx].constraint,
2491					   context))
2492	    continue;
2493	}
2494
2495      /* How many bytes the node can accept?  */
2496      naccepted = check_node_accept_bytes (dfa, cur_node_idx, &mctx->input,
2497					   re_string_cur_idx (&mctx->input));
2498      if (naccepted == 0)
2499	continue;
2500
2501      /* The node can accepts `naccepted' bytes.  */
2502      dest_idx = re_string_cur_idx (&mctx->input) + naccepted;
2503      mctx->max_mb_elem_len = ((mctx->max_mb_elem_len < naccepted) ? naccepted
2504			       : mctx->max_mb_elem_len);
2505      err = clean_state_log_if_needed (mctx, dest_idx);
2506      if (BE (err != REG_NOERROR, 0))
2507	return err;
2508#ifdef DEBUG
2509      assert (dfa->nexts[cur_node_idx] != -1);
2510#endif
2511      new_nodes = dfa->eclosures + dfa->nexts[cur_node_idx];
2512
2513      dest_state = mctx->state_log[dest_idx];
2514      if (dest_state == NULL)
2515	dest_nodes = *new_nodes;
2516      else
2517	{
2518	  err = re_node_set_init_union (&dest_nodes,
2519					dest_state->entrance_nodes, new_nodes);
2520	  if (BE (err != REG_NOERROR, 0))
2521	    return err;
2522	}
2523      context = re_string_context_at (&mctx->input, dest_idx - 1, mctx->eflags);
2524      mctx->state_log[dest_idx]
2525	= re_acquire_state_context (&err, dfa, &dest_nodes, context);
2526      if (dest_state != NULL)
2527	re_node_set_free (&dest_nodes);
2528      if (BE (mctx->state_log[dest_idx] == NULL && err != REG_NOERROR, 0))
2529	return err;
2530    }
2531  return REG_NOERROR;
2532}
2533#endif /* RE_ENABLE_I18N */
2534
2535static reg_errcode_t
2536transit_state_bkref (mctx, nodes)
2537    re_match_context_t *mctx;
2538    const re_node_set *nodes;
2539{
2540  re_dfa_t *const dfa = mctx->dfa;
2541  reg_errcode_t err;
2542  int i;
2543  int cur_str_idx = re_string_cur_idx (&mctx->input);
2544
2545  for (i = 0; i < nodes->nelem; ++i)
2546    {
2547      int dest_str_idx, prev_nelem, bkc_idx;
2548      int node_idx = nodes->elems[i];
2549      unsigned int context;
2550      const re_token_t *node = dfa->nodes + node_idx;
2551      re_node_set *new_dest_nodes;
2552
2553      /* Check whether `node' is a backreference or not.  */
2554      if (node->type != OP_BACK_REF)
2555	continue;
2556
2557      if (node->constraint)
2558	{
2559	  context = re_string_context_at (&mctx->input, cur_str_idx,
2560					  mctx->eflags);
2561	  if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context))
2562	    continue;
2563	}
2564
2565      /* `node' is a backreference.
2566	 Check the substring which the substring matched.  */
2567      bkc_idx = mctx->nbkref_ents;
2568      err = get_subexp (mctx, node_idx, cur_str_idx);
2569      if (BE (err != REG_NOERROR, 0))
2570	goto free_return;
2571
2572      /* And add the epsilon closures (which is `new_dest_nodes') of
2573	 the backreference to appropriate state_log.  */
2574#ifdef DEBUG
2575      assert (dfa->nexts[node_idx] != -1);
2576#endif
2577      for (; bkc_idx < mctx->nbkref_ents; ++bkc_idx)
2578	{
2579	  int subexp_len;
2580	  re_dfastate_t *dest_state;
2581	  struct re_backref_cache_entry *bkref_ent;
2582	  bkref_ent = mctx->bkref_ents + bkc_idx;
2583	  if (bkref_ent->node != node_idx || bkref_ent->str_idx != cur_str_idx)
2584	    continue;
2585	  subexp_len = bkref_ent->subexp_to - bkref_ent->subexp_from;
2586	  new_dest_nodes = (subexp_len == 0
2587			    ? dfa->eclosures + dfa->edests[node_idx].elems[0]
2588			    : dfa->eclosures + dfa->nexts[node_idx]);
2589	  dest_str_idx = (cur_str_idx + bkref_ent->subexp_to
2590			  - bkref_ent->subexp_from);
2591	  context = re_string_context_at (&mctx->input, dest_str_idx - 1,
2592					  mctx->eflags);
2593	  dest_state = mctx->state_log[dest_str_idx];
2594	  prev_nelem = ((mctx->state_log[cur_str_idx] == NULL) ? 0
2595			: mctx->state_log[cur_str_idx]->nodes.nelem);
2596	  /* Add `new_dest_node' to state_log.  */
2597	  if (dest_state == NULL)
2598	    {
2599	      mctx->state_log[dest_str_idx]
2600		= re_acquire_state_context (&err, dfa, new_dest_nodes,
2601					    context);
2602	      if (BE (mctx->state_log[dest_str_idx] == NULL
2603		      && err != REG_NOERROR, 0))
2604		goto free_return;
2605	    }
2606	  else
2607	    {
2608	      re_node_set dest_nodes;
2609	      err = re_node_set_init_union (&dest_nodes,
2610					    dest_state->entrance_nodes,
2611					    new_dest_nodes);
2612	      if (BE (err != REG_NOERROR, 0))
2613		{
2614		  re_node_set_free (&dest_nodes);
2615		  goto free_return;
2616		}
2617	      mctx->state_log[dest_str_idx]
2618		= re_acquire_state_context (&err, dfa, &dest_nodes, context);
2619	      re_node_set_free (&dest_nodes);
2620	      if (BE (mctx->state_log[dest_str_idx] == NULL
2621		      && err != REG_NOERROR, 0))
2622		goto free_return;
2623	    }
2624	  /* We need to check recursively if the backreference can epsilon
2625	     transit.  */
2626	  if (subexp_len == 0
2627	      && mctx->state_log[cur_str_idx]->nodes.nelem > prev_nelem)
2628	    {
2629	      err = check_subexp_matching_top (mctx, new_dest_nodes,
2630					       cur_str_idx);
2631	      if (BE (err != REG_NOERROR, 0))
2632		goto free_return;
2633	      err = transit_state_bkref (mctx, new_dest_nodes);
2634	      if (BE (err != REG_NOERROR, 0))
2635		goto free_return;
2636	    }
2637	}
2638    }
2639  err = REG_NOERROR;
2640 free_return:
2641  return err;
2642}
2643
2644/* Enumerate all the candidates which the backreference BKREF_NODE can match
2645   at BKREF_STR_IDX, and register them by match_ctx_add_entry().
2646   Note that we might collect inappropriate candidates here.
2647   However, the cost of checking them strictly here is too high, then we
2648   delay these checking for prune_impossible_nodes().  */
2649
2650static reg_errcode_t
2651get_subexp (mctx, bkref_node, bkref_str_idx)
2652     re_match_context_t *mctx;
2653     int bkref_node, bkref_str_idx;
2654{
2655  re_dfa_t *const dfa = mctx->dfa;
2656  int subexp_num, sub_top_idx;
2657  const char *buf = (const char *) re_string_get_buffer (&mctx->input);
2658  /* Return if we have already checked BKREF_NODE at BKREF_STR_IDX.  */
2659  int cache_idx = search_cur_bkref_entry (mctx, bkref_str_idx);
2660  if (cache_idx != -1)
2661    {
2662      const struct re_backref_cache_entry *entry = mctx->bkref_ents + cache_idx;
2663      do
2664        if (entry->node == bkref_node)
2665	  return REG_NOERROR; /* We already checked it.  */
2666      while (entry++->more);
2667    }
2668
2669  subexp_num = dfa->nodes[bkref_node].opr.idx;
2670
2671  /* For each sub expression  */
2672  for (sub_top_idx = 0; sub_top_idx < mctx->nsub_tops; ++sub_top_idx)
2673    {
2674      reg_errcode_t err;
2675      re_sub_match_top_t *sub_top = mctx->sub_tops[sub_top_idx];
2676      re_sub_match_last_t *sub_last;
2677      int sub_last_idx, sl_str, bkref_str_off;
2678
2679      if (dfa->nodes[sub_top->node].opr.idx != subexp_num)
2680	continue; /* It isn't related.  */
2681
2682      sl_str = sub_top->str_idx;
2683      bkref_str_off = bkref_str_idx;
2684      /* At first, check the last node of sub expressions we already
2685	 evaluated.  */
2686      for (sub_last_idx = 0; sub_last_idx < sub_top->nlasts; ++sub_last_idx)
2687	{
2688	  int sl_str_diff;
2689	  sub_last = sub_top->lasts[sub_last_idx];
2690	  sl_str_diff = sub_last->str_idx - sl_str;
2691	  /* The matched string by the sub expression match with the substring
2692	     at the back reference?  */
2693	  if (sl_str_diff > 0)
2694	    {
2695	      if (BE (bkref_str_off + sl_str_diff > mctx->input.valid_len, 0))
2696		{
2697		  /* Not enough chars for a successful match.  */
2698		  if (bkref_str_off + sl_str_diff > mctx->input.len)
2699		    break;
2700
2701		  err = clean_state_log_if_needed (mctx,
2702						   bkref_str_off
2703						   + sl_str_diff);
2704		  if (BE (err != REG_NOERROR, 0))
2705		    return err;
2706		  buf = (const char *) re_string_get_buffer (&mctx->input);
2707		}
2708	      if (memcmp (buf + bkref_str_off, buf + sl_str, sl_str_diff) != 0)
2709		break; /* We don't need to search this sub expression any more.  */
2710	    }
2711	  bkref_str_off += sl_str_diff;
2712	  sl_str += sl_str_diff;
2713	  err = get_subexp_sub (mctx, sub_top, sub_last, bkref_node,
2714				bkref_str_idx);
2715
2716	  /* Reload buf, since the preceding call might have reallocated
2717	     the buffer.  */
2718	  buf = (const char *) re_string_get_buffer (&mctx->input);
2719
2720	  if (err == REG_NOMATCH)
2721	    continue;
2722	  if (BE (err != REG_NOERROR, 0))
2723	    return err;
2724	}
2725
2726      if (sub_last_idx < sub_top->nlasts)
2727	continue;
2728      if (sub_last_idx > 0)
2729	++sl_str;
2730      /* Then, search for the other last nodes of the sub expression.  */
2731      for (; sl_str <= bkref_str_idx; ++sl_str)
2732	{
2733	  int cls_node, sl_str_off;
2734	  const re_node_set *nodes;
2735	  sl_str_off = sl_str - sub_top->str_idx;
2736	  /* The matched string by the sub expression match with the substring
2737	     at the back reference?  */
2738	  if (sl_str_off > 0)
2739	    {
2740	      if (BE (bkref_str_off >= mctx->input.valid_len, 0))
2741		{
2742		  /* If we are at the end of the input, we cannot match.  */
2743		  if (bkref_str_off >= mctx->input.len)
2744		    break;
2745
2746		  err = extend_buffers (mctx);
2747		  if (BE (err != REG_NOERROR, 0))
2748		    return err;
2749
2750		  buf = (const char *) re_string_get_buffer (&mctx->input);
2751		}
2752	      if (buf [bkref_str_off++] != buf[sl_str - 1])
2753		break; /* We don't need to search this sub expression
2754			  any more.  */
2755	    }
2756	  if (mctx->state_log[sl_str] == NULL)
2757	    continue;
2758	  /* Does this state have a ')' of the sub expression?  */
2759	  nodes = &mctx->state_log[sl_str]->nodes;
2760	  cls_node = find_subexp_node (dfa, nodes, subexp_num, OP_CLOSE_SUBEXP);
2761	  if (cls_node == -1)
2762	    continue; /* No.  */
2763	  if (sub_top->path == NULL)
2764	    {
2765	      sub_top->path = calloc (sizeof (state_array_t),
2766				      sl_str - sub_top->str_idx + 1);
2767	      if (sub_top->path == NULL)
2768		return REG_ESPACE;
2769	    }
2770	  /* Can the OP_OPEN_SUBEXP node arrive the OP_CLOSE_SUBEXP node
2771	     in the current context?  */
2772	  err = check_arrival (mctx, sub_top->path, sub_top->node,
2773			       sub_top->str_idx, cls_node, sl_str, OP_CLOSE_SUBEXP);
2774	  if (err == REG_NOMATCH)
2775	      continue;
2776	  if (BE (err != REG_NOERROR, 0))
2777	      return err;
2778	  sub_last = match_ctx_add_sublast (sub_top, cls_node, sl_str);
2779	  if (BE (sub_last == NULL, 0))
2780	    return REG_ESPACE;
2781	  err = get_subexp_sub (mctx, sub_top, sub_last, bkref_node,
2782				bkref_str_idx);
2783	  if (err == REG_NOMATCH)
2784	    continue;
2785	}
2786    }
2787  return REG_NOERROR;
2788}
2789
2790/* Helper functions for get_subexp().  */
2791
2792/* Check SUB_LAST can arrive to the back reference BKREF_NODE at BKREF_STR.
2793   If it can arrive, register the sub expression expressed with SUB_TOP
2794   and SUB_LAST.  */
2795
2796static reg_errcode_t
2797get_subexp_sub (mctx, sub_top, sub_last, bkref_node, bkref_str)
2798     re_match_context_t *mctx;
2799     const re_sub_match_top_t *sub_top;
2800     re_sub_match_last_t *sub_last;
2801     int bkref_node, bkref_str;
2802{
2803  reg_errcode_t err;
2804  int to_idx;
2805  /* Can the subexpression arrive the back reference?  */
2806  err = check_arrival (mctx, &sub_last->path, sub_last->node,
2807		       sub_last->str_idx, bkref_node, bkref_str, OP_OPEN_SUBEXP);
2808  if (err != REG_NOERROR)
2809    return err;
2810  err = match_ctx_add_entry (mctx, bkref_node, bkref_str, sub_top->str_idx,
2811			     sub_last->str_idx);
2812  if (BE (err != REG_NOERROR, 0))
2813    return err;
2814  to_idx = bkref_str + sub_last->str_idx - sub_top->str_idx;
2815  return clean_state_log_if_needed (mctx, to_idx);
2816}
2817
2818/* Find the first node which is '(' or ')' and whose index is SUBEXP_IDX.
2819   Search '(' if FL_OPEN, or search ')' otherwise.
2820   TODO: This function isn't efficient...
2821	 Because there might be more than one nodes whose types are
2822	 OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all
2823	 nodes.
2824	 E.g. RE: (a){2}  */
2825
2826static int
2827find_subexp_node (dfa, nodes, subexp_idx, type)
2828     const re_dfa_t *dfa;
2829     const re_node_set *nodes;
2830     int subexp_idx, type;
2831{
2832  int cls_idx;
2833  for (cls_idx = 0; cls_idx < nodes->nelem; ++cls_idx)
2834    {
2835      int cls_node = nodes->elems[cls_idx];
2836      const re_token_t *node = dfa->nodes + cls_node;
2837      if (node->type == type
2838	  && node->opr.idx == subexp_idx)
2839	return cls_node;
2840    }
2841  return -1;
2842}
2843
2844/* Check whether the node TOP_NODE at TOP_STR can arrive to the node
2845   LAST_NODE at LAST_STR.  We record the path onto PATH since it will be
2846   heavily reused.
2847   Return REG_NOERROR if it can arrive, or REG_NOMATCH otherwise.  */
2848
2849static reg_errcode_t
2850check_arrival (mctx, path, top_node, top_str, last_node, last_str,
2851	       type)
2852     re_match_context_t *mctx;
2853     state_array_t *path;
2854     int top_node, top_str, last_node, last_str, type;
2855{
2856  re_dfa_t *const dfa = mctx->dfa;
2857  reg_errcode_t err;
2858  int subexp_num, backup_cur_idx, str_idx, null_cnt;
2859  re_dfastate_t *cur_state = NULL;
2860  re_node_set *cur_nodes, next_nodes;
2861  re_dfastate_t **backup_state_log;
2862  unsigned int context;
2863
2864  subexp_num = dfa->nodes[top_node].opr.idx;
2865  /* Extend the buffer if we need.  */
2866  if (BE (path->alloc < last_str + mctx->max_mb_elem_len + 1, 0))
2867    {
2868      re_dfastate_t **new_array;
2869      int old_alloc = path->alloc;
2870      path->alloc += last_str + mctx->max_mb_elem_len + 1;
2871      new_array = re_realloc (path->array, re_dfastate_t *, path->alloc);
2872      if (new_array == NULL)
2873	{
2874	  path->alloc = old_alloc;
2875	  return REG_ESPACE;
2876	}
2877      path->array = new_array;
2878      memset (new_array + old_alloc, '\0',
2879	      sizeof (re_dfastate_t *) * (path->alloc - old_alloc));
2880    }
2881
2882  str_idx = path->next_idx == 0 ? top_str : path->next_idx;
2883
2884  /* Temporary modify MCTX.  */
2885  backup_state_log = mctx->state_log;
2886  backup_cur_idx = mctx->input.cur_idx;
2887  mctx->state_log = path->array;
2888  mctx->input.cur_idx = str_idx;
2889
2890  /* Setup initial node set.  */
2891  context = re_string_context_at (&mctx->input, str_idx - 1, mctx->eflags);
2892  if (str_idx == top_str)
2893    {
2894      err = re_node_set_init_1 (&next_nodes, top_node);
2895      if (BE (err != REG_NOERROR, 0))
2896	return err;
2897      err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, type);
2898      if (BE (err != REG_NOERROR, 0))
2899	{
2900	  re_node_set_free (&next_nodes);
2901	  return err;
2902	}
2903    }
2904  else
2905    {
2906      cur_state = mctx->state_log[str_idx];
2907      if (cur_state && cur_state->has_backref)
2908	{
2909	  err = re_node_set_init_copy (&next_nodes, &cur_state->nodes);
2910	  if (BE ( err != REG_NOERROR, 0))
2911	    return err;
2912	}
2913      else
2914	re_node_set_init_empty (&next_nodes);
2915    }
2916  if (str_idx == top_str || (cur_state && cur_state->has_backref))
2917    {
2918      if (next_nodes.nelem)
2919	{
2920	  err = expand_bkref_cache (mctx, &next_nodes, str_idx,
2921				    subexp_num, type);
2922	  if (BE ( err != REG_NOERROR, 0))
2923	    {
2924	      re_node_set_free (&next_nodes);
2925	      return err;
2926	    }
2927	}
2928      cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context);
2929      if (BE (cur_state == NULL && err != REG_NOERROR, 0))
2930	{
2931	  re_node_set_free (&next_nodes);
2932	  return err;
2933	}
2934      mctx->state_log[str_idx] = cur_state;
2935    }
2936
2937  for (null_cnt = 0; str_idx < last_str && null_cnt <= mctx->max_mb_elem_len;)
2938    {
2939      re_node_set_empty (&next_nodes);
2940      if (mctx->state_log[str_idx + 1])
2941	{
2942	  err = re_node_set_merge (&next_nodes,
2943				   &mctx->state_log[str_idx + 1]->nodes);
2944	  if (BE (err != REG_NOERROR, 0))
2945	    {
2946	      re_node_set_free (&next_nodes);
2947	      return err;
2948	    }
2949	}
2950      if (cur_state)
2951	{
2952	  err = check_arrival_add_next_nodes (mctx, str_idx,
2953					      &cur_state->non_eps_nodes, &next_nodes);
2954	  if (BE (err != REG_NOERROR, 0))
2955	    {
2956	      re_node_set_free (&next_nodes);
2957	      return err;
2958	    }
2959	}
2960      ++str_idx;
2961      if (next_nodes.nelem)
2962	{
2963	  err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, type);
2964	  if (BE (err != REG_NOERROR, 0))
2965	    {
2966	      re_node_set_free (&next_nodes);
2967	      return err;
2968	    }
2969	  err = expand_bkref_cache (mctx, &next_nodes, str_idx,
2970				    subexp_num, type);
2971	  if (BE ( err != REG_NOERROR, 0))
2972	    {
2973	      re_node_set_free (&next_nodes);
2974	      return err;
2975	    }
2976	}
2977      context = re_string_context_at (&mctx->input, str_idx - 1, mctx->eflags);
2978      cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context);
2979      if (BE (cur_state == NULL && err != REG_NOERROR, 0))
2980	{
2981	  re_node_set_free (&next_nodes);
2982	  return err;
2983	}
2984      mctx->state_log[str_idx] = cur_state;
2985      null_cnt = cur_state == NULL ? null_cnt + 1 : 0;
2986    }
2987  re_node_set_free (&next_nodes);
2988  cur_nodes = (mctx->state_log[last_str] == NULL ? NULL
2989	       : &mctx->state_log[last_str]->nodes);
2990  path->next_idx = str_idx;
2991
2992  /* Fix MCTX.  */
2993  mctx->state_log = backup_state_log;
2994  mctx->input.cur_idx = backup_cur_idx;
2995
2996  /* Then check the current node set has the node LAST_NODE.  */
2997  if (cur_nodes != NULL && re_node_set_contains (cur_nodes, last_node))
2998    return REG_NOERROR;
2999
3000  return REG_NOMATCH;
3001}
3002
3003/* Helper functions for check_arrival.  */
3004
3005/* Calculate the destination nodes of CUR_NODES at STR_IDX, and append them
3006   to NEXT_NODES.
3007   TODO: This function is similar to the functions transit_state*(),
3008	 however this function has many additional works.
3009	 Can't we unify them?  */
3010
3011static reg_errcode_t
3012check_arrival_add_next_nodes (mctx, str_idx, cur_nodes, next_nodes)
3013     re_match_context_t *mctx;
3014     int str_idx;
3015     re_node_set *cur_nodes, *next_nodes;
3016{
3017  re_dfa_t *const dfa = mctx->dfa;
3018  int result;
3019  int cur_idx;
3020  reg_errcode_t err;
3021  re_node_set union_set;
3022  re_node_set_init_empty (&union_set);
3023  for (cur_idx = 0; cur_idx < cur_nodes->nelem; ++cur_idx)
3024    {
3025      int naccepted = 0;
3026      int cur_node = cur_nodes->elems[cur_idx];
3027#ifdef DEBUG
3028      re_token_type_t type = dfa->nodes[cur_node].type;
3029      assert (!IS_EPSILON_NODE (type));
3030#endif
3031#ifdef RE_ENABLE_I18N
3032      /* If the node may accept `multi byte'.  */
3033      if (dfa->nodes[cur_node].accept_mb)
3034	{
3035	  naccepted = check_node_accept_bytes (dfa, cur_node, &mctx->input,
3036					       str_idx);
3037	  if (naccepted > 1)
3038	    {
3039	      re_dfastate_t *dest_state;
3040	      int next_node = dfa->nexts[cur_node];
3041	      int next_idx = str_idx + naccepted;
3042	      dest_state = mctx->state_log[next_idx];
3043	      re_node_set_empty (&union_set);
3044	      if (dest_state)
3045		{
3046		  err = re_node_set_merge (&union_set, &dest_state->nodes);
3047		  if (BE (err != REG_NOERROR, 0))
3048		    {
3049		      re_node_set_free (&union_set);
3050		      return err;
3051		    }
3052		}
3053	      result = re_node_set_insert (&union_set, next_node);
3054	      if (BE (result < 0, 0))
3055		{
3056		  re_node_set_free (&union_set);
3057		  return REG_ESPACE;
3058		}
3059	      mctx->state_log[next_idx] = re_acquire_state (&err, dfa,
3060							    &union_set);
3061	      if (BE (mctx->state_log[next_idx] == NULL
3062		      && err != REG_NOERROR, 0))
3063		{
3064		  re_node_set_free (&union_set);
3065		  return err;
3066		}
3067	    }
3068	}
3069#endif /* RE_ENABLE_I18N */
3070      if (naccepted
3071	  || check_node_accept (mctx, dfa->nodes + cur_node, str_idx))
3072	{
3073	  result = re_node_set_insert (next_nodes, dfa->nexts[cur_node]);
3074	  if (BE (result < 0, 0))
3075	    {
3076	      re_node_set_free (&union_set);
3077	      return REG_ESPACE;
3078	    }
3079	}
3080    }
3081  re_node_set_free (&union_set);
3082  return REG_NOERROR;
3083}
3084
3085/* For all the nodes in CUR_NODES, add the epsilon closures of them to
3086   CUR_NODES, however exclude the nodes which are:
3087    - inside the sub expression whose number is EX_SUBEXP, if FL_OPEN.
3088    - out of the sub expression whose number is EX_SUBEXP, if !FL_OPEN.
3089*/
3090
3091static reg_errcode_t
3092check_arrival_expand_ecl (dfa, cur_nodes, ex_subexp, type)
3093     re_dfa_t *dfa;
3094     re_node_set *cur_nodes;
3095     int ex_subexp, type;
3096{
3097  reg_errcode_t err;
3098  int idx, outside_node;
3099  re_node_set new_nodes;
3100#ifdef DEBUG
3101  assert (cur_nodes->nelem);
3102#endif
3103  err = re_node_set_alloc (&new_nodes, cur_nodes->nelem);
3104  if (BE (err != REG_NOERROR, 0))
3105    return err;
3106  /* Create a new node set NEW_NODES with the nodes which are epsilon
3107     closures of the node in CUR_NODES.  */
3108
3109  for (idx = 0; idx < cur_nodes->nelem; ++idx)
3110    {
3111      int cur_node = cur_nodes->elems[idx];
3112      re_node_set *eclosure = dfa->eclosures + cur_node;
3113      outside_node = find_subexp_node (dfa, eclosure, ex_subexp, type);
3114      if (outside_node == -1)
3115	{
3116	  /* There are no problematic nodes, just merge them.  */
3117	  err = re_node_set_merge (&new_nodes, eclosure);
3118	  if (BE (err != REG_NOERROR, 0))
3119	    {
3120	      re_node_set_free (&new_nodes);
3121	      return err;
3122	    }
3123	}
3124      else
3125	{
3126	  /* There are problematic nodes, re-calculate incrementally.  */
3127	  err = check_arrival_expand_ecl_sub (dfa, &new_nodes, cur_node,
3128					      ex_subexp, type);
3129	  if (BE (err != REG_NOERROR, 0))
3130	    {
3131	      re_node_set_free (&new_nodes);
3132	      return err;
3133	    }
3134	}
3135    }
3136  re_node_set_free (cur_nodes);
3137  *cur_nodes = new_nodes;
3138  return REG_NOERROR;
3139}
3140
3141/* Helper function for check_arrival_expand_ecl.
3142   Check incrementally the epsilon closure of TARGET, and if it isn't
3143   problematic append it to DST_NODES.  */
3144
3145static reg_errcode_t
3146check_arrival_expand_ecl_sub (dfa, dst_nodes, target, ex_subexp, type)
3147     re_dfa_t *dfa;
3148     int target, ex_subexp, type;
3149     re_node_set *dst_nodes;
3150{
3151  int cur_node;
3152  for (cur_node = target; !re_node_set_contains (dst_nodes, cur_node);)
3153    {
3154      int err;
3155
3156      if (dfa->nodes[cur_node].type == type
3157	  && dfa->nodes[cur_node].opr.idx == ex_subexp)
3158	{
3159	  if (type == OP_CLOSE_SUBEXP)
3160	    {
3161	      err = re_node_set_insert (dst_nodes, cur_node);
3162	      if (BE (err == -1, 0))
3163		return REG_ESPACE;
3164	    }
3165	  break;
3166	}
3167      err = re_node_set_insert (dst_nodes, cur_node);
3168      if (BE (err == -1, 0))
3169	return REG_ESPACE;
3170      if (dfa->edests[cur_node].nelem == 0)
3171	break;
3172      if (dfa->edests[cur_node].nelem == 2)
3173	{
3174	  err = check_arrival_expand_ecl_sub (dfa, dst_nodes,
3175					      dfa->edests[cur_node].elems[1],
3176					      ex_subexp, type);
3177	  if (BE (err != REG_NOERROR, 0))
3178	    return err;
3179	}
3180      cur_node = dfa->edests[cur_node].elems[0];
3181    }
3182  return REG_NOERROR;
3183}
3184
3185
3186/* For all the back references in the current state, calculate the
3187   destination of the back references by the appropriate entry
3188   in MCTX->BKREF_ENTS.  */
3189
3190static reg_errcode_t
3191expand_bkref_cache (mctx, cur_nodes, cur_str, subexp_num,
3192		    type)
3193     re_match_context_t *mctx;
3194     int cur_str, subexp_num, type;
3195     re_node_set *cur_nodes;
3196{
3197  re_dfa_t *const dfa = mctx->dfa;
3198  reg_errcode_t err;
3199  int cache_idx_start = search_cur_bkref_entry (mctx, cur_str);
3200  struct re_backref_cache_entry *ent;
3201
3202  if (cache_idx_start == -1)
3203    return REG_NOERROR;
3204
3205 restart:
3206  ent = mctx->bkref_ents + cache_idx_start;
3207  do
3208    {
3209      int to_idx, next_node;
3210
3211      /* Is this entry ENT is appropriate?  */
3212      if (!re_node_set_contains (cur_nodes, ent->node))
3213	continue; /* No.  */
3214
3215      to_idx = cur_str + ent->subexp_to - ent->subexp_from;
3216      /* Calculate the destination of the back reference, and append it
3217	 to MCTX->STATE_LOG.  */
3218      if (to_idx == cur_str)
3219	{
3220	  /* The backreference did epsilon transit, we must re-check all the
3221	     node in the current state.  */
3222	  re_node_set new_dests;
3223	  reg_errcode_t err2, err3;
3224	  next_node = dfa->edests[ent->node].elems[0];
3225	  if (re_node_set_contains (cur_nodes, next_node))
3226	    continue;
3227	  err = re_node_set_init_1 (&new_dests, next_node);
3228	  err2 = check_arrival_expand_ecl (dfa, &new_dests, subexp_num, type);
3229	  err3 = re_node_set_merge (cur_nodes, &new_dests);
3230	  re_node_set_free (&new_dests);
3231	  if (BE (err != REG_NOERROR || err2 != REG_NOERROR
3232		  || err3 != REG_NOERROR, 0))
3233	    {
3234	      err = (err != REG_NOERROR ? err
3235		     : (err2 != REG_NOERROR ? err2 : err3));
3236	      return err;
3237	    }
3238	  /* TODO: It is still inefficient...  */
3239	  goto restart;
3240	}
3241      else
3242	{
3243	  re_node_set union_set;
3244	  next_node = dfa->nexts[ent->node];
3245	  if (mctx->state_log[to_idx])
3246	    {
3247	      int ret;
3248	      if (re_node_set_contains (&mctx->state_log[to_idx]->nodes,
3249					next_node))
3250		continue;
3251	      err = re_node_set_init_copy (&union_set,
3252					   &mctx->state_log[to_idx]->nodes);
3253	      ret = re_node_set_insert (&union_set, next_node);
3254	      if (BE (err != REG_NOERROR || ret < 0, 0))
3255		{
3256		  re_node_set_free (&union_set);
3257		  err = err != REG_NOERROR ? err : REG_ESPACE;
3258		  return err;
3259		}
3260	    }
3261	  else
3262	    {
3263	      err = re_node_set_init_1 (&union_set, next_node);
3264	      if (BE (err != REG_NOERROR, 0))
3265		return err;
3266	    }
3267	  mctx->state_log[to_idx] = re_acquire_state (&err, dfa, &union_set);
3268	  re_node_set_free (&union_set);
3269	  if (BE (mctx->state_log[to_idx] == NULL
3270		  && err != REG_NOERROR, 0))
3271	    return err;
3272	}
3273    }
3274  while (ent++->more);
3275  return REG_NOERROR;
3276}
3277
3278/* Build transition table for the state.
3279   Return 1 if succeeded, otherwise return NULL.  */
3280
3281static int
3282build_trtable (dfa, state)
3283    re_dfa_t *dfa;
3284    re_dfastate_t *state;
3285{
3286  reg_errcode_t err;
3287  int i, j, ch, need_word_trtable = 0;
3288  unsigned int elem, mask;
3289  int dests_node_malloced = 0, dest_states_malloced = 0;
3290  int ndests; /* Number of the destination states from `state'.  */
3291  re_dfastate_t **trtable;
3292  re_dfastate_t **dest_states = NULL, **dest_states_word, **dest_states_nl;
3293  re_node_set follows, *dests_node;
3294  bitset *dests_ch;
3295  bitset acceptable;
3296
3297  /* We build DFA states which corresponds to the destination nodes
3298     from `state'.  `dests_node[i]' represents the nodes which i-th
3299     destination state contains, and `dests_ch[i]' represents the
3300     characters which i-th destination state accepts.  */
3301#ifdef _LIBC
3302  if (__libc_use_alloca ((sizeof (re_node_set) + sizeof (bitset)) * SBC_MAX))
3303    dests_node = (re_node_set *)
3304      alloca ((sizeof (re_node_set) + sizeof (bitset)) * SBC_MAX);
3305  else
3306#endif
3307    {
3308      dests_node = (re_node_set *)
3309	malloc ((sizeof (re_node_set) + sizeof (bitset)) * SBC_MAX);
3310      if (BE (dests_node == NULL, 0))
3311	return 0;
3312      dests_node_malloced = 1;
3313    }
3314  dests_ch = (bitset *) (dests_node + SBC_MAX);
3315
3316  /* Initialize transiton table.  */
3317  state->word_trtable = state->trtable = NULL;
3318
3319  /* At first, group all nodes belonging to `state' into several
3320     destinations.  */
3321  ndests = group_nodes_into_DFAstates (dfa, state, dests_node, dests_ch);
3322  if (BE (ndests <= 0, 0))
3323    {
3324      if (dests_node_malloced)
3325	free (dests_node);
3326      /* Return 0 in case of an error, 1 otherwise.  */
3327      if (ndests == 0)
3328	{
3329	  state->trtable = (re_dfastate_t **)
3330	    calloc (sizeof (re_dfastate_t *), SBC_MAX);
3331	  return 1;
3332	}
3333      return 0;
3334    }
3335
3336  err = re_node_set_alloc (&follows, ndests + 1);
3337  if (BE (err != REG_NOERROR, 0))
3338    goto out_free;
3339
3340#ifdef _LIBC
3341  if (__libc_use_alloca ((sizeof (re_node_set) + sizeof (bitset)) * SBC_MAX
3342			 + ndests * 3 * sizeof (re_dfastate_t *)))
3343    dest_states = (re_dfastate_t **)
3344      alloca (ndests * 3 * sizeof (re_dfastate_t *));
3345  else
3346#endif
3347    {
3348      dest_states = (re_dfastate_t **)
3349	malloc (ndests * 3 * sizeof (re_dfastate_t *));
3350      if (BE (dest_states == NULL, 0))
3351	{
3352out_free:
3353	  if (dest_states_malloced)
3354	    free (dest_states);
3355	  re_node_set_free (&follows);
3356	  for (i = 0; i < ndests; ++i)
3357	    re_node_set_free (dests_node + i);
3358	  if (dests_node_malloced)
3359	    free (dests_node);
3360	  return 0;
3361	}
3362      dest_states_malloced = 1;
3363    }
3364  dest_states_word = dest_states + ndests;
3365  dest_states_nl = dest_states_word + ndests;
3366  bitset_empty (acceptable);
3367
3368  /* Then build the states for all destinations.  */
3369  for (i = 0; i < ndests; ++i)
3370    {
3371      int next_node;
3372      re_node_set_empty (&follows);
3373      /* Merge the follows of this destination states.  */
3374      for (j = 0; j < dests_node[i].nelem; ++j)
3375	{
3376	  next_node = dfa->nexts[dests_node[i].elems[j]];
3377	  if (next_node != -1)
3378	    {
3379	      err = re_node_set_merge (&follows, dfa->eclosures + next_node);
3380	      if (BE (err != REG_NOERROR, 0))
3381		goto out_free;
3382	    }
3383	}
3384      dest_states[i] = re_acquire_state_context (&err, dfa, &follows, 0);
3385      if (BE (dest_states[i] == NULL && err != REG_NOERROR, 0))
3386	goto out_free;
3387      /* If the new state has context constraint,
3388	 build appropriate states for these contexts.  */
3389      if (dest_states[i]->has_constraint)
3390	{
3391	  dest_states_word[i] = re_acquire_state_context (&err, dfa, &follows,
3392							  CONTEXT_WORD);
3393	  if (BE (dest_states_word[i] == NULL && err != REG_NOERROR, 0))
3394	    goto out_free;
3395
3396	  if (dest_states[i] != dest_states_word[i] && dfa->mb_cur_max > 1)
3397	    need_word_trtable = 1;
3398
3399	  dest_states_nl[i] = re_acquire_state_context (&err, dfa, &follows,
3400							CONTEXT_NEWLINE);
3401	  if (BE (dest_states_nl[i] == NULL && err != REG_NOERROR, 0))
3402	    goto out_free;
3403 	}
3404      else
3405	{
3406	  dest_states_word[i] = dest_states[i];
3407	  dest_states_nl[i] = dest_states[i];
3408	}
3409      bitset_merge (acceptable, dests_ch[i]);
3410    }
3411
3412  if (!BE (need_word_trtable, 0))
3413    {
3414      /* We don't care about whether the following character is a word
3415	 character, or we are in a single-byte character set so we can
3416	 discern by looking at the character code: allocate a
3417	 256-entry transition table.  */
3418      trtable = state->trtable =
3419	(re_dfastate_t **) calloc (sizeof (re_dfastate_t *), SBC_MAX);
3420      if (BE (trtable == NULL, 0))
3421	goto out_free;
3422
3423      /* For all characters ch...:  */
3424      for (i = 0; i < BITSET_UINTS; ++i)
3425	for (ch = i * UINT_BITS, elem = acceptable[i], mask = 1;
3426	     elem;
3427	     mask <<= 1, elem >>= 1, ++ch)
3428	  if (BE (elem & 1, 0))
3429	    {
3430	      /* There must be exactly one destination which accepts
3431		 character ch.  See group_nodes_into_DFAstates.  */
3432	      for (j = 0; (dests_ch[j][i] & mask) == 0; ++j)
3433		;
3434
3435	      /* j-th destination accepts the word character ch.  */
3436	      if (dfa->word_char[i] & mask)
3437		trtable[ch] = dest_states_word[j];
3438	      else
3439		trtable[ch] = dest_states[j];
3440	    }
3441    }
3442  else
3443    {
3444      /* We care about whether the following character is a word
3445	 character, and we are in a multi-byte character set: discern
3446	 by looking at the character code: build two 256-entry
3447	 transition tables, one starting at trtable[0] and one
3448	 starting at trtable[SBC_MAX].  */
3449      trtable = state->word_trtable =
3450	(re_dfastate_t **) calloc (sizeof (re_dfastate_t *), 2 * SBC_MAX);
3451      if (BE (trtable == NULL, 0))
3452	goto out_free;
3453
3454      /* For all characters ch...:  */
3455      for (i = 0; i < BITSET_UINTS; ++i)
3456	for (ch = i * UINT_BITS, elem = acceptable[i], mask = 1;
3457	     elem;
3458	     mask <<= 1, elem >>= 1, ++ch)
3459	  if (BE (elem & 1, 0))
3460	    {
3461	      /* There must be exactly one destination which accepts
3462		 character ch.  See group_nodes_into_DFAstates.  */
3463	      for (j = 0; (dests_ch[j][i] & mask) == 0; ++j)
3464		;
3465
3466	      /* j-th destination accepts the word character ch.  */
3467	      trtable[ch] = dest_states[j];
3468	      trtable[ch + SBC_MAX] = dest_states_word[j];
3469	    }
3470    }
3471
3472  /* new line */
3473  if (bitset_contain (acceptable, NEWLINE_CHAR))
3474    {
3475      /* The current state accepts newline character.  */
3476      for (j = 0; j < ndests; ++j)
3477	if (bitset_contain (dests_ch[j], NEWLINE_CHAR))
3478	  {
3479	    /* k-th destination accepts newline character.  */
3480	    trtable[NEWLINE_CHAR] = dest_states_nl[j];
3481	    if (need_word_trtable)
3482	      trtable[NEWLINE_CHAR + SBC_MAX] = dest_states_nl[j];
3483	    /* There must be only one destination which accepts
3484	       newline.  See group_nodes_into_DFAstates.  */
3485	    break;
3486	  }
3487    }
3488
3489  if (dest_states_malloced)
3490    free (dest_states);
3491
3492  re_node_set_free (&follows);
3493  for (i = 0; i < ndests; ++i)
3494    re_node_set_free (dests_node + i);
3495
3496  if (dests_node_malloced)
3497    free (dests_node);
3498
3499  return 1;
3500}
3501
3502/* Group all nodes belonging to STATE into several destinations.
3503   Then for all destinations, set the nodes belonging to the destination
3504   to DESTS_NODE[i] and set the characters accepted by the destination
3505   to DEST_CH[i].  This function return the number of destinations.  */
3506
3507static int
3508group_nodes_into_DFAstates (dfa, state, dests_node, dests_ch)
3509    re_dfa_t *dfa;
3510    const re_dfastate_t *state;
3511    re_node_set *dests_node;
3512    bitset *dests_ch;
3513{
3514  reg_errcode_t err;
3515  int result;
3516  int i, j, k;
3517  int ndests; /* Number of the destinations from `state'.  */
3518  bitset accepts; /* Characters a node can accept.  */
3519  const re_node_set *cur_nodes = &state->nodes;
3520  bitset_empty (accepts);
3521  ndests = 0;
3522
3523  /* For all the nodes belonging to `state',  */
3524  for (i = 0; i < cur_nodes->nelem; ++i)
3525    {
3526      re_token_t *node = &dfa->nodes[cur_nodes->elems[i]];
3527      re_token_type_t type = node->type;
3528      unsigned int constraint = node->constraint;
3529
3530      /* Enumerate all single byte character this node can accept.  */
3531      if (type == CHARACTER)
3532	bitset_set (accepts, node->opr.c);
3533      else if (type == SIMPLE_BRACKET)
3534	{
3535	  bitset_merge (accepts, node->opr.sbcset);
3536	}
3537      else if (type == OP_PERIOD)
3538	{
3539#ifdef RE_ENABLE_I18N
3540	  if (dfa->mb_cur_max > 1)
3541	    bitset_merge (accepts, dfa->sb_char);
3542	  else
3543#endif
3544	    bitset_set_all (accepts);
3545	  if (!(dfa->syntax & RE_DOT_NEWLINE))
3546	    bitset_clear (accepts, '\n');
3547	  if (dfa->syntax & RE_DOT_NOT_NULL)
3548	    bitset_clear (accepts, '\0');
3549	}
3550#ifdef RE_ENABLE_I18N
3551      else if (type == OP_UTF8_PERIOD)
3552        {
3553	  memset (accepts, 255, sizeof (unsigned int) * BITSET_UINTS / 2);
3554	  if (!(dfa->syntax & RE_DOT_NEWLINE))
3555	    bitset_clear (accepts, '\n');
3556	  if (dfa->syntax & RE_DOT_NOT_NULL)
3557	    bitset_clear (accepts, '\0');
3558        }
3559#endif
3560      else
3561	continue;
3562
3563      /* Check the `accepts' and sift the characters which are not
3564	 match it the context.  */
3565      if (constraint)
3566	{
3567	  if (constraint & NEXT_NEWLINE_CONSTRAINT)
3568	    {
3569	      int accepts_newline = bitset_contain (accepts, NEWLINE_CHAR);
3570	      bitset_empty (accepts);
3571	      if (accepts_newline)
3572		bitset_set (accepts, NEWLINE_CHAR);
3573	      else
3574		continue;
3575	    }
3576	  if (constraint & NEXT_ENDBUF_CONSTRAINT)
3577	    {
3578	      bitset_empty (accepts);
3579	      continue;
3580	    }
3581
3582	  if (constraint & NEXT_WORD_CONSTRAINT)
3583	    {
3584	      unsigned int any_set = 0;
3585	      if (type == CHARACTER && !node->word_char)
3586		{
3587		  bitset_empty (accepts);
3588		  continue;
3589		}
3590#ifdef RE_ENABLE_I18N
3591	      if (dfa->mb_cur_max > 1)
3592		for (j = 0; j < BITSET_UINTS; ++j)
3593		  any_set |= (accepts[j] &= (dfa->word_char[j] | ~dfa->sb_char[j]));
3594	      else
3595#endif
3596		for (j = 0; j < BITSET_UINTS; ++j)
3597		  any_set |= (accepts[j] &= dfa->word_char[j]);
3598	      if (!any_set)
3599		continue;
3600	    }
3601	  if (constraint & NEXT_NOTWORD_CONSTRAINT)
3602	    {
3603	      unsigned int any_set = 0;
3604	      if (type == CHARACTER && node->word_char)
3605		{
3606		  bitset_empty (accepts);
3607		  continue;
3608		}
3609#ifdef RE_ENABLE_I18N
3610	      if (dfa->mb_cur_max > 1)
3611		for (j = 0; j < BITSET_UINTS; ++j)
3612		  any_set |= (accepts[j] &= ~(dfa->word_char[j] & dfa->sb_char[j]));
3613	      else
3614#endif
3615		for (j = 0; j < BITSET_UINTS; ++j)
3616		  any_set |= (accepts[j] &= ~dfa->word_char[j]);
3617	      if (!any_set)
3618		continue;
3619	    }
3620	}
3621
3622      /* Then divide `accepts' into DFA states, or create a new
3623	 state.  Above, we make sure that accepts is not empty.  */
3624      for (j = 0; j < ndests; ++j)
3625	{
3626	  bitset intersec; /* Intersection sets, see below.  */
3627	  bitset remains;
3628	  /* Flags, see below.  */
3629	  int has_intersec, not_subset, not_consumed;
3630
3631	  /* Optimization, skip if this state doesn't accept the character.  */
3632	  if (type == CHARACTER && !bitset_contain (dests_ch[j], node->opr.c))
3633	    continue;
3634
3635	  /* Enumerate the intersection set of this state and `accepts'.  */
3636	  has_intersec = 0;
3637	  for (k = 0; k < BITSET_UINTS; ++k)
3638	    has_intersec |= intersec[k] = accepts[k] & dests_ch[j][k];
3639	  /* And skip if the intersection set is empty.  */
3640	  if (!has_intersec)
3641	    continue;
3642
3643	  /* Then check if this state is a subset of `accepts'.  */
3644	  not_subset = not_consumed = 0;
3645	  for (k = 0; k < BITSET_UINTS; ++k)
3646	    {
3647	      not_subset |= remains[k] = ~accepts[k] & dests_ch[j][k];
3648	      not_consumed |= accepts[k] = accepts[k] & ~dests_ch[j][k];
3649	    }
3650
3651	  /* If this state isn't a subset of `accepts', create a
3652	     new group state, which has the `remains'. */
3653	  if (not_subset)
3654	    {
3655	      bitset_copy (dests_ch[ndests], remains);
3656	      bitset_copy (dests_ch[j], intersec);
3657	      err = re_node_set_init_copy (dests_node + ndests, &dests_node[j]);
3658	      if (BE (err != REG_NOERROR, 0))
3659		goto error_return;
3660	      ++ndests;
3661	    }
3662
3663	  /* Put the position in the current group. */
3664	  result = re_node_set_insert (&dests_node[j], cur_nodes->elems[i]);
3665	  if (BE (result < 0, 0))
3666	    goto error_return;
3667
3668	  /* If all characters are consumed, go to next node. */
3669	  if (!not_consumed)
3670	    break;
3671	}
3672      /* Some characters remain, create a new group. */
3673      if (j == ndests)
3674	{
3675	  bitset_copy (dests_ch[ndests], accepts);
3676	  err = re_node_set_init_1 (dests_node + ndests, cur_nodes->elems[i]);
3677	  if (BE (err != REG_NOERROR, 0))
3678	    goto error_return;
3679	  ++ndests;
3680	  bitset_empty (accepts);
3681	}
3682    }
3683  return ndests;
3684 error_return:
3685  for (j = 0; j < ndests; ++j)
3686    re_node_set_free (dests_node + j);
3687  return -1;
3688}
3689
3690#ifdef RE_ENABLE_I18N
3691/* Check how many bytes the node `dfa->nodes[node_idx]' accepts.
3692   Return the number of the bytes the node accepts.
3693   STR_IDX is the current index of the input string.
3694
3695   This function handles the nodes which can accept one character, or
3696   one collating element like '.', '[a-z]', opposite to the other nodes
3697   can only accept one byte.  */
3698
3699static int
3700check_node_accept_bytes (dfa, node_idx, input, str_idx)
3701    re_dfa_t *dfa;
3702    int node_idx, str_idx;
3703    const re_string_t *input;
3704{
3705  const re_token_t *node = dfa->nodes + node_idx;
3706  int char_len, elem_len;
3707  int i;
3708
3709  if (BE (node->type == OP_UTF8_PERIOD, 0))
3710    {
3711      unsigned char c = re_string_byte_at (input, str_idx), d;
3712      if (BE (c < 0xc2, 1))
3713	return 0;
3714
3715      if (str_idx + 2 > input->len)
3716	return 0;
3717
3718      d = re_string_byte_at (input, str_idx + 1);
3719      if (c < 0xe0)
3720	return (d < 0x80 || d > 0xbf) ? 0 : 2;
3721      else if (c < 0xf0)
3722	{
3723	  char_len = 3;
3724	  if (c == 0xe0 && d < 0xa0)
3725	    return 0;
3726	}
3727      else if (c < 0xf8)
3728	{
3729	  char_len = 4;
3730	  if (c == 0xf0 && d < 0x90)
3731	    return 0;
3732	}
3733      else if (c < 0xfc)
3734	{
3735	  char_len = 5;
3736	  if (c == 0xf8 && d < 0x88)
3737	    return 0;
3738	}
3739      else if (c < 0xfe)
3740	{
3741	  char_len = 6;
3742	  if (c == 0xfc && d < 0x84)
3743	    return 0;
3744	}
3745      else
3746	return 0;
3747
3748      if (str_idx + char_len > input->len)
3749	return 0;
3750
3751      for (i = 1; i < char_len; ++i)
3752	{
3753	  d = re_string_byte_at (input, str_idx + i);
3754	  if (d < 0x80 || d > 0xbf)
3755	    return 0;
3756	}
3757      return char_len;
3758    }
3759
3760  char_len = re_string_char_size_at (input, str_idx);
3761  if (node->type == OP_PERIOD)
3762    {
3763      if (char_len <= 1)
3764        return 0;
3765      /* FIXME: I don't think this if is needed, as both '\n'
3766	 and '\0' are char_len == 1.  */
3767      /* '.' accepts any one character except the following two cases.  */
3768      if ((!(dfa->syntax & RE_DOT_NEWLINE) &&
3769	   re_string_byte_at (input, str_idx) == '\n') ||
3770	  ((dfa->syntax & RE_DOT_NOT_NULL) &&
3771	   re_string_byte_at (input, str_idx) == '\0'))
3772	return 0;
3773      return char_len;
3774    }
3775
3776  elem_len = re_string_elem_size_at (input, str_idx);
3777  if ((elem_len <= 1 && char_len <= 1) || char_len == 0)
3778    return 0;
3779
3780  if (node->type == COMPLEX_BRACKET)
3781    {
3782      const re_charset_t *cset = node->opr.mbcset;
3783# ifdef _LIBC
3784      const unsigned char *pin
3785	= ((const unsigned char *) re_string_get_buffer (input) + str_idx);
3786      int j;
3787      uint32_t nrules;
3788# endif /* _LIBC */
3789      int match_len = 0;
3790      wchar_t wc = ((cset->nranges || cset->nchar_classes || cset->nmbchars)
3791		    ? re_string_wchar_at (input, str_idx) : 0);
3792
3793      /* match with multibyte character?  */
3794      for (i = 0; i < cset->nmbchars; ++i)
3795	if (wc == cset->mbchars[i])
3796	  {
3797	    match_len = char_len;
3798	    goto check_node_accept_bytes_match;
3799	  }
3800      /* match with character_class?  */
3801      for (i = 0; i < cset->nchar_classes; ++i)
3802	{
3803	  wctype_t wt = cset->char_classes[i];
3804	  if (__iswctype (wc, wt))
3805	    {
3806	      match_len = char_len;
3807	      goto check_node_accept_bytes_match;
3808	    }
3809	}
3810
3811# ifdef _LIBC
3812      nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
3813      if (nrules != 0)
3814	{
3815	  unsigned int in_collseq = 0;
3816	  const int32_t *table, *indirect;
3817	  const unsigned char *weights, *extra;
3818	  const char *collseqwc;
3819	  int32_t idx;
3820	  /* This #include defines a local function!  */
3821#  include <locale/weight.h>
3822
3823	  /* match with collating_symbol?  */
3824	  if (cset->ncoll_syms)
3825	    extra = (const unsigned char *)
3826	      _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
3827	  for (i = 0; i < cset->ncoll_syms; ++i)
3828	    {
3829	      const unsigned char *coll_sym = extra + cset->coll_syms[i];
3830	      /* Compare the length of input collating element and
3831		 the length of current collating element.  */
3832	      if (*coll_sym != elem_len)
3833		continue;
3834	      /* Compare each bytes.  */
3835	      for (j = 0; j < *coll_sym; j++)
3836		if (pin[j] != coll_sym[1 + j])
3837		  break;
3838	      if (j == *coll_sym)
3839		{
3840		  /* Match if every bytes is equal.  */
3841		  match_len = j;
3842		  goto check_node_accept_bytes_match;
3843		}
3844	    }
3845
3846	  if (cset->nranges)
3847	    {
3848	      if (elem_len <= char_len)
3849		{
3850		  collseqwc = _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQWC);
3851		  in_collseq = __collseq_table_lookup (collseqwc, wc);
3852		}
3853	      else
3854		in_collseq = find_collation_sequence_value (pin, elem_len);
3855	    }
3856	  /* match with range expression?  */
3857	  for (i = 0; i < cset->nranges; ++i)
3858	    if (cset->range_starts[i] <= in_collseq
3859		&& in_collseq <= cset->range_ends[i])
3860	      {
3861		match_len = elem_len;
3862		goto check_node_accept_bytes_match;
3863	      }
3864
3865	  /* match with equivalence_class?  */
3866	  if (cset->nequiv_classes)
3867	    {
3868	      const unsigned char *cp = pin;
3869	      table = (const int32_t *)
3870		_NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
3871	      weights = (const unsigned char *)
3872		_NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
3873	      extra = (const unsigned char *)
3874		_NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
3875	      indirect = (const int32_t *)
3876		_NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
3877	      idx = findidx (&cp);
3878	      if (idx > 0)
3879		for (i = 0; i < cset->nequiv_classes; ++i)
3880		  {
3881		    int32_t equiv_class_idx = cset->equiv_classes[i];
3882		    size_t weight_len = weights[idx];
3883		    if (weight_len == weights[equiv_class_idx])
3884		      {
3885			int cnt = 0;
3886			while (cnt <= weight_len
3887			       && (weights[equiv_class_idx + 1 + cnt]
3888				   == weights[idx + 1 + cnt]))
3889			  ++cnt;
3890			if (cnt > weight_len)
3891			  {
3892			    match_len = elem_len;
3893			    goto check_node_accept_bytes_match;
3894			  }
3895		      }
3896		  }
3897	    }
3898	}
3899      else
3900# endif /* _LIBC */
3901	{
3902	  /* match with range expression?  */
3903#if __GNUC__ >= 2
3904	  wchar_t cmp_buf[] = {L'\0', L'\0', wc, L'\0', L'\0', L'\0'};
3905#else
3906	  wchar_t cmp_buf[] = {L'\0', L'\0', L'\0', L'\0', L'\0', L'\0'};
3907	  cmp_buf[2] = wc;
3908#endif
3909	  for (i = 0; i < cset->nranges; ++i)
3910	    {
3911	      cmp_buf[0] = cset->range_starts[i];
3912	      cmp_buf[4] = cset->range_ends[i];
3913	      if (wcscoll (cmp_buf, cmp_buf + 2) <= 0
3914		  && wcscoll (cmp_buf + 2, cmp_buf + 4) <= 0)
3915		{
3916		  match_len = char_len;
3917		  goto check_node_accept_bytes_match;
3918		}
3919	    }
3920	}
3921    check_node_accept_bytes_match:
3922      if (!cset->non_match)
3923	return match_len;
3924      else
3925	{
3926	  if (match_len > 0)
3927	    return 0;
3928	  else
3929	    return (elem_len > char_len) ? elem_len : char_len;
3930	}
3931    }
3932  return 0;
3933}
3934
3935# ifdef _LIBC
3936static unsigned int
3937find_collation_sequence_value (mbs, mbs_len)
3938    const unsigned char *mbs;
3939    size_t mbs_len;
3940{
3941  uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
3942  if (nrules == 0)
3943    {
3944      if (mbs_len == 1)
3945	{
3946	  /* No valid character.  Match it as a single byte character.  */
3947	  const unsigned char *collseq = (const unsigned char *)
3948	    _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQMB);
3949	  return collseq[mbs[0]];
3950	}
3951      return UINT_MAX;
3952    }
3953  else
3954    {
3955      int32_t idx;
3956      const unsigned char *extra = (const unsigned char *)
3957	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
3958      int32_t extrasize = (const unsigned char *)
3959	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB + 1) - extra;
3960
3961      for (idx = 0; idx < extrasize;)
3962	{
3963	  int mbs_cnt, found = 0;
3964	  int32_t elem_mbs_len;
3965	  /* Skip the name of collating element name.  */
3966	  idx = idx + extra[idx] + 1;
3967	  elem_mbs_len = extra[idx++];
3968	  if (mbs_len == elem_mbs_len)
3969	    {
3970	      for (mbs_cnt = 0; mbs_cnt < elem_mbs_len; ++mbs_cnt)
3971		if (extra[idx + mbs_cnt] != mbs[mbs_cnt])
3972		  break;
3973	      if (mbs_cnt == elem_mbs_len)
3974		/* Found the entry.  */
3975		found = 1;
3976	    }
3977	  /* Skip the byte sequence of the collating element.  */
3978	  idx += elem_mbs_len;
3979	  /* Adjust for the alignment.  */
3980	  idx = (idx + 3) & ~3;
3981	  /* Skip the collation sequence value.  */
3982	  idx += sizeof (uint32_t);
3983	  /* Skip the wide char sequence of the collating element.  */
3984	  idx = idx + sizeof (uint32_t) * (extra[idx] + 1);
3985	  /* If we found the entry, return the sequence value.  */
3986	  if (found)
3987	    return *(uint32_t *) (extra + idx);
3988	  /* Skip the collation sequence value.  */
3989	  idx += sizeof (uint32_t);
3990	}
3991      return UINT_MAX;
3992    }
3993}
3994# endif /* _LIBC */
3995#endif /* RE_ENABLE_I18N */
3996
3997/* Check whether the node accepts the byte which is IDX-th
3998   byte of the INPUT.  */
3999
4000static int
4001check_node_accept (mctx, node, idx)
4002    const re_match_context_t *mctx;
4003    const re_token_t *node;
4004    int idx;
4005{
4006  unsigned char ch;
4007  ch = re_string_byte_at (&mctx->input, idx);
4008  switch (node->type)
4009    {
4010    case CHARACTER:
4011      if (node->opr.c != ch)
4012        return 0;
4013      break;
4014
4015    case SIMPLE_BRACKET:
4016      if (!bitset_contain (node->opr.sbcset, ch))
4017        return 0;
4018      break;
4019
4020#ifdef RE_ENABLE_I18N
4021    case OP_UTF8_PERIOD:
4022      if (ch >= 0x80)
4023        return 0;
4024      /* FALLTHROUGH */
4025#endif
4026    case OP_PERIOD:
4027      if ((ch == '\n' && !(mctx->dfa->syntax & RE_DOT_NEWLINE))
4028	  || (ch == '\0' && (mctx->dfa->syntax & RE_DOT_NOT_NULL)))
4029	return 0;
4030      break;
4031
4032    default:
4033      return 0;
4034    }
4035
4036  if (node->constraint)
4037    {
4038      /* The node has constraints.  Check whether the current context
4039	 satisfies the constraints.  */
4040      unsigned int context = re_string_context_at (&mctx->input, idx,
4041						   mctx->eflags);
4042      if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context))
4043	return 0;
4044    }
4045
4046  return 1;
4047}
4048
4049/* Extend the buffers, if the buffers have run out.  */
4050
4051static reg_errcode_t
4052extend_buffers (mctx)
4053     re_match_context_t *mctx;
4054{
4055  reg_errcode_t ret;
4056  re_string_t *pstr = &mctx->input;
4057
4058  /* Double the lengthes of the buffers.  */
4059  ret = re_string_realloc_buffers (pstr, pstr->bufs_len * 2);
4060  if (BE (ret != REG_NOERROR, 0))
4061    return ret;
4062
4063  if (mctx->state_log != NULL)
4064    {
4065      /* And double the length of state_log.  */
4066      /* XXX We have no indication of the size of this buffer.  If this
4067	 allocation fail we have no indication that the state_log array
4068	 does not have the right size.  */
4069      re_dfastate_t **new_array = re_realloc (mctx->state_log, re_dfastate_t *,
4070					      pstr->bufs_len + 1);
4071      if (BE (new_array == NULL, 0))
4072	return REG_ESPACE;
4073      mctx->state_log = new_array;
4074    }
4075
4076  /* Then reconstruct the buffers.  */
4077  if (pstr->icase)
4078    {
4079#ifdef RE_ENABLE_I18N
4080      if (pstr->mb_cur_max > 1)
4081	{
4082	  ret = build_wcs_upper_buffer (pstr);
4083	  if (BE (ret != REG_NOERROR, 0))
4084	    return ret;
4085	}
4086      else
4087#endif /* RE_ENABLE_I18N  */
4088	build_upper_buffer (pstr);
4089    }
4090  else
4091    {
4092#ifdef RE_ENABLE_I18N
4093      if (pstr->mb_cur_max > 1)
4094	build_wcs_buffer (pstr);
4095      else
4096#endif /* RE_ENABLE_I18N  */
4097	{
4098	  if (pstr->trans != NULL)
4099	    re_string_translate_buffer (pstr);
4100	}
4101    }
4102  return REG_NOERROR;
4103}
4104
4105
4106/* Functions for matching context.  */
4107
4108/* Initialize MCTX.  */
4109
4110static reg_errcode_t
4111match_ctx_init (mctx, eflags, n)
4112    re_match_context_t *mctx;
4113    int eflags, n;
4114{
4115  mctx->eflags = eflags;
4116  mctx->match_last = -1;
4117  if (n > 0)
4118    {
4119      mctx->bkref_ents = re_malloc (struct re_backref_cache_entry, n);
4120      mctx->sub_tops = re_malloc (re_sub_match_top_t *, n);
4121      if (BE (mctx->bkref_ents == NULL || mctx->sub_tops == NULL, 0))
4122	return REG_ESPACE;
4123    }
4124  /* Already zero-ed by the caller.
4125     else
4126       mctx->bkref_ents = NULL;
4127     mctx->nbkref_ents = 0;
4128     mctx->nsub_tops = 0;  */
4129  mctx->abkref_ents = n;
4130  mctx->max_mb_elem_len = 1;
4131  mctx->asub_tops = n;
4132  return REG_NOERROR;
4133}
4134
4135/* Clean the entries which depend on the current input in MCTX.
4136   This function must be invoked when the matcher changes the start index
4137   of the input, or changes the input string.  */
4138
4139static void
4140match_ctx_clean (mctx)
4141    re_match_context_t *mctx;
4142{
4143  int st_idx;
4144  for (st_idx = 0; st_idx < mctx->nsub_tops; ++st_idx)
4145    {
4146      int sl_idx;
4147      re_sub_match_top_t *top = mctx->sub_tops[st_idx];
4148      for (sl_idx = 0; sl_idx < top->nlasts; ++sl_idx)
4149	{
4150	  re_sub_match_last_t *last = top->lasts[sl_idx];
4151	  re_free (last->path.array);
4152	  re_free (last);
4153	}
4154      re_free (top->lasts);
4155      if (top->path)
4156	{
4157	  re_free (top->path->array);
4158	  re_free (top->path);
4159	}
4160      free (top);
4161    }
4162
4163  mctx->nsub_tops = 0;
4164  mctx->nbkref_ents = 0;
4165}
4166
4167/* Free all the memory associated with MCTX.  */
4168
4169static void
4170match_ctx_free (mctx)
4171    re_match_context_t *mctx;
4172{
4173  /* First, free all the memory associated with MCTX->SUB_TOPS.  */
4174  match_ctx_clean (mctx);
4175  re_free (mctx->sub_tops);
4176  re_free (mctx->bkref_ents);
4177}
4178
4179/* Add a new backreference entry to MCTX.
4180   Note that we assume that caller never call this function with duplicate
4181   entry, and call with STR_IDX which isn't smaller than any existing entry.
4182*/
4183
4184static reg_errcode_t
4185match_ctx_add_entry (mctx, node, str_idx, from, to)
4186     re_match_context_t *mctx;
4187     int node, str_idx, from, to;
4188{
4189  if (mctx->nbkref_ents >= mctx->abkref_ents)
4190    {
4191      struct re_backref_cache_entry* new_entry;
4192      new_entry = re_realloc (mctx->bkref_ents, struct re_backref_cache_entry,
4193			      mctx->abkref_ents * 2);
4194      if (BE (new_entry == NULL, 0))
4195	{
4196	  re_free (mctx->bkref_ents);
4197	  return REG_ESPACE;
4198	}
4199      mctx->bkref_ents = new_entry;
4200      memset (mctx->bkref_ents + mctx->nbkref_ents, '\0',
4201	      sizeof (struct re_backref_cache_entry) * mctx->abkref_ents);
4202      mctx->abkref_ents *= 2;
4203    }
4204  if (mctx->nbkref_ents > 0
4205      && mctx->bkref_ents[mctx->nbkref_ents - 1].str_idx == str_idx)
4206    mctx->bkref_ents[mctx->nbkref_ents - 1].more = 1;
4207
4208  mctx->bkref_ents[mctx->nbkref_ents].node = node;
4209  mctx->bkref_ents[mctx->nbkref_ents].str_idx = str_idx;
4210  mctx->bkref_ents[mctx->nbkref_ents].subexp_from = from;
4211  mctx->bkref_ents[mctx->nbkref_ents].subexp_to = to;
4212
4213  /* This is a cache that saves negative results of check_dst_limits_calc_pos.
4214     If bit N is clear, means that this entry won't epsilon-transition to
4215     an OP_OPEN_SUBEXP or OP_CLOSE_SUBEXP for the N+1-th subexpression.  If
4216     it is set, check_dst_limits_calc_pos_1 will recurse and try to find one
4217     such node.
4218
4219     A backreference does not epsilon-transition unless it is empty, so set
4220     to all zeros if FROM != TO.  */
4221  mctx->bkref_ents[mctx->nbkref_ents].eps_reachable_subexps_map
4222    = (from == to ? ~0 : 0);
4223
4224  mctx->bkref_ents[mctx->nbkref_ents++].more = 0;
4225  if (mctx->max_mb_elem_len < to - from)
4226    mctx->max_mb_elem_len = to - from;
4227  return REG_NOERROR;
4228}
4229
4230/* Search for the first entry which has the same str_idx, or -1 if none is
4231   found.  Note that MCTX->BKREF_ENTS is already sorted by MCTX->STR_IDX.  */
4232
4233static int
4234search_cur_bkref_entry (mctx, str_idx)
4235     re_match_context_t *mctx;
4236     int str_idx;
4237{
4238  int left, right, mid, last;
4239  last = right = mctx->nbkref_ents;
4240  for (left = 0; left < right;)
4241    {
4242      mid = (left + right) / 2;
4243      if (mctx->bkref_ents[mid].str_idx < str_idx)
4244	left = mid + 1;
4245      else
4246	right = mid;
4247    }
4248  if (left < last && mctx->bkref_ents[left].str_idx == str_idx)
4249    return left;
4250  else
4251    return -1;
4252}
4253
4254/* Register the node NODE, whose type is OP_OPEN_SUBEXP, and which matches
4255   at STR_IDX.  */
4256
4257static reg_errcode_t
4258match_ctx_add_subtop (mctx, node, str_idx)
4259     re_match_context_t *mctx;
4260     int node, str_idx;
4261{
4262#ifdef DEBUG
4263  assert (mctx->sub_tops != NULL);
4264  assert (mctx->asub_tops > 0);
4265#endif
4266  if (BE (mctx->nsub_tops == mctx->asub_tops, 0))
4267    {
4268      int new_asub_tops = mctx->asub_tops * 2;
4269      re_sub_match_top_t **new_array = re_realloc (mctx->sub_tops,
4270						   re_sub_match_top_t *,
4271						   new_asub_tops);
4272      if (BE (new_array == NULL, 0))
4273	return REG_ESPACE;
4274      mctx->sub_tops = new_array;
4275      mctx->asub_tops = new_asub_tops;
4276    }
4277  mctx->sub_tops[mctx->nsub_tops] = calloc (1, sizeof (re_sub_match_top_t));
4278  if (BE (mctx->sub_tops[mctx->nsub_tops] == NULL, 0))
4279    return REG_ESPACE;
4280  mctx->sub_tops[mctx->nsub_tops]->node = node;
4281  mctx->sub_tops[mctx->nsub_tops++]->str_idx = str_idx;
4282  return REG_NOERROR;
4283}
4284
4285/* Register the node NODE, whose type is OP_CLOSE_SUBEXP, and which matches
4286   at STR_IDX, whose corresponding OP_OPEN_SUBEXP is SUB_TOP.  */
4287
4288static re_sub_match_last_t *
4289match_ctx_add_sublast (subtop, node, str_idx)
4290     re_sub_match_top_t *subtop;
4291     int node, str_idx;
4292{
4293  re_sub_match_last_t *new_entry;
4294  if (BE (subtop->nlasts == subtop->alasts, 0))
4295    {
4296      int new_alasts = 2 * subtop->alasts + 1;
4297      re_sub_match_last_t **new_array = re_realloc (subtop->lasts,
4298						    re_sub_match_last_t *,
4299						    new_alasts);
4300      if (BE (new_array == NULL, 0))
4301	return NULL;
4302      subtop->lasts = new_array;
4303      subtop->alasts = new_alasts;
4304    }
4305  new_entry = calloc (1, sizeof (re_sub_match_last_t));
4306  if (BE (new_entry != NULL, 1))
4307    {
4308      subtop->lasts[subtop->nlasts] = new_entry;
4309      new_entry->node = node;
4310      new_entry->str_idx = str_idx;
4311      ++subtop->nlasts;
4312    }
4313  return new_entry;
4314}
4315
4316static void
4317sift_ctx_init (sctx, sifted_sts, limited_sts, last_node, last_str_idx)
4318    re_sift_context_t *sctx;
4319    re_dfastate_t **sifted_sts, **limited_sts;
4320    int last_node, last_str_idx;
4321{
4322  sctx->sifted_states = sifted_sts;
4323  sctx->limited_states = limited_sts;
4324  sctx->last_node = last_node;
4325  sctx->last_str_idx = last_str_idx;
4326  re_node_set_init_empty (&sctx->limits);
4327}
4328