1/* Extended regular expression matching and search library.
2   Copyright (C) 2002, 2003 Free Software Foundation, Inc.
3   This file is part of the GNU C Library.
4   Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
5
6   The GNU C Library is free software; you can redistribute it and/or
7   modify it under the terms of the GNU Lesser General Public
8   License as published by the Free Software Foundation; either
9   version 2.1 of the License, or (at your option) any later version.
10
11   The GNU C Library is distributed in the hope that it will be useful,
12   but WITHOUT ANY WARRANTY; without even the implied warranty of
13   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14   Lesser General Public License for more details.
15
16   You should have received a copy of the GNU Lesser General Public
17   License along with the GNU C Library; if not, write to the Free
18   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
19   02111-1307 USA.  */
20
21static reg_errcode_t match_ctx_init _RE_ARGS((re_match_context_t *cache, int eflags,
22				     re_string_t *input, int n));
23static void match_ctx_clean _RE_ARGS((re_match_context_t *mctx));
24static void match_ctx_free _RE_ARGS((re_match_context_t *cache));
25static void match_ctx_free_subtops _RE_ARGS((re_match_context_t *mctx));
26static reg_errcode_t match_ctx_add_entry _RE_ARGS((re_match_context_t *cache, int node,
27                                          int str_idx, int from, int to));
28static int search_cur_bkref_entry _RE_ARGS((re_match_context_t *mctx, int str_idx));
29static void match_ctx_clear_flag _RE_ARGS((re_match_context_t *mctx));
30static reg_errcode_t match_ctx_add_subtop _RE_ARGS((re_match_context_t *mctx, int node,
31					   int str_idx));
32static re_sub_match_last_t * match_ctx_add_sublast _RE_ARGS((re_sub_match_top_t *subtop,
33						   int node, int str_idx));
34static void sift_ctx_init _RE_ARGS((re_sift_context_t *sctx, re_dfastate_t **sifted_sts,
35			   re_dfastate_t **limited_sts, int last_node,
36                           int last_str_idx, int check_subexp));
37static reg_errcode_t re_search_internal _RE_ARGS((const regex_t *preg,
38					 const char *string, int length,
39					 int start, int range, int stop,
40					 size_t nmatch, regmatch_t pmatch[],
41                                         int eflags));
42static int re_search_2_stub _RE_ARGS((struct re_pattern_buffer *bufp,
43			     const char *string1, int length1,
44			     const char *string2, int length2,
45			     int start, int range, struct re_registers *regs,
46                             int stop, int ret_len));
47static int re_search_stub _RE_ARGS((struct re_pattern_buffer *bufp,
48			   const char *string, int length, int start,
49			   int range, int stop, struct re_registers *regs,
50                           int ret_len));
51static unsigned re_copy_regs _RE_ARGS((struct re_registers *regs, regmatch_t *pmatch,
52                              int nregs, int regs_allocated));
53static inline re_dfastate_t *acquire_init_state_context _RE_ARGS((reg_errcode_t *err,
54							 const regex_t *preg,
55							 const re_match_context_t *mctx,
56                                                         int idx));
57static reg_errcode_t prune_impossible_nodes _RE_ARGS((const regex_t *preg,
58					     re_match_context_t *mctx));
59static int check_matching _RE_ARGS((const regex_t *preg, re_match_context_t *mctx,
60                           int fl_search, int fl_longest_match));
61static int check_halt_node_context _RE_ARGS((const re_dfa_t *dfa, int node,
62                                    unsigned int context));
63static int check_halt_state_context _RE_ARGS((const regex_t *preg,
64				     const re_dfastate_t *state,
65                                     const re_match_context_t *mctx, int idx));
66static void update_regs _RE_ARGS((re_dfa_t *dfa, regmatch_t *pmatch, int cur_node,
67                         int cur_idx, int nmatch));
68static int proceed_next_node _RE_ARGS((const regex_t *preg, int nregs, regmatch_t *regs,
69			      const re_match_context_t *mctx,
70			      int *pidx, int node, re_node_set *eps_via_nodes,
71                              struct re_fail_stack_t *fs));
72static reg_errcode_t push_fail_stack _RE_ARGS((struct re_fail_stack_t *fs,
73				      int str_idx, int *dests, int nregs,
74				      regmatch_t *regs,
75                                      re_node_set *eps_via_nodes));
76static int pop_fail_stack _RE_ARGS((struct re_fail_stack_t *fs, int *pidx, int nregs,
77                           regmatch_t *regs, re_node_set *eps_via_nodes));
78static reg_errcode_t set_regs _RE_ARGS((const regex_t *preg,
79			       const re_match_context_t *mctx,
80			       size_t nmatch, regmatch_t *pmatch,
81                               int fl_backtrack));
82static reg_errcode_t free_fail_stack_return _RE_ARGS((struct re_fail_stack_t *fs));
83
84#ifdef RE_ENABLE_I18N
85static int sift_states_iter_mb _RE_ARGS((const regex_t *preg,
86				const re_match_context_t *mctx,
87				re_sift_context_t *sctx,
88                                int node_idx, int str_idx, int max_str_idx));
89#endif /* RE_ENABLE_I18N */
90static reg_errcode_t sift_states_backward _RE_ARGS((const regex_t *preg,
91					   re_match_context_t *mctx,
92                                           re_sift_context_t *sctx));
93static reg_errcode_t update_cur_sifted_state _RE_ARGS((const regex_t *preg,
94					      re_match_context_t *mctx,
95					      re_sift_context_t *sctx,
96					      int str_idx,
97                                              re_node_set *dest_nodes));
98static reg_errcode_t add_epsilon_src_nodes _RE_ARGS((re_dfa_t *dfa,
99					    re_node_set *dest_nodes,
100                                            const re_node_set *candidates));
101static reg_errcode_t sub_epsilon_src_nodes _RE_ARGS((re_dfa_t *dfa, int node,
102					    re_node_set *dest_nodes,
103                                            const re_node_set *and_nodes));
104static int check_dst_limits _RE_ARGS((re_dfa_t *dfa, re_node_set *limits,
105			     re_match_context_t *mctx, int dst_node,
106                             int dst_idx, int src_node, int src_idx));
107static int check_dst_limits_calc_pos _RE_ARGS((re_dfa_t *dfa, re_match_context_t *mctx,
108				      int limit, re_node_set *eclosures,
109                                      int subexp_idx, int node, int str_idx));
110static reg_errcode_t check_subexp_limits _RE_ARGS((re_dfa_t *dfa,
111					  re_node_set *dest_nodes,
112					  const re_node_set *candidates,
113					  re_node_set *limits,
114					  struct re_backref_cache_entry *bkref_ents,
115                                          int str_idx));
116static reg_errcode_t sift_states_bkref _RE_ARGS((const regex_t *preg,
117					re_match_context_t *mctx,
118					re_sift_context_t *sctx,
119                                        int str_idx, re_node_set *dest_nodes));
120static reg_errcode_t clean_state_log_if_need _RE_ARGS((re_match_context_t *mctx,
121                                              int next_state_log_idx));
122static reg_errcode_t merge_state_array _RE_ARGS((re_dfa_t *dfa, re_dfastate_t **dst,
123                                        re_dfastate_t **src, int num));
124static re_dfastate_t *transit_state _RE_ARGS((reg_errcode_t *err, const regex_t *preg,
125				     re_match_context_t *mctx,
126                                     re_dfastate_t *state, int fl_search));
127static reg_errcode_t check_subexp_matching_top _RE_ARGS((re_dfa_t *dfa,
128						re_match_context_t *mctx,
129						re_node_set *cur_nodes,
130						int str_idx));
131static re_dfastate_t *transit_state_sb _RE_ARGS((reg_errcode_t *err, const regex_t *preg,
132					re_dfastate_t *pstate,
133					int fl_search,
134                                        re_match_context_t *mctx));
135#ifdef RE_ENABLE_I18N
136static reg_errcode_t transit_state_mb _RE_ARGS((const regex_t *preg,
137				       re_dfastate_t *pstate,
138                                       re_match_context_t *mctx));
139#endif /* RE_ENABLE_I18N */
140static reg_errcode_t transit_state_bkref _RE_ARGS((const regex_t *preg,
141					  re_node_set *nodes,
142					  re_match_context_t *mctx));
143static reg_errcode_t get_subexp _RE_ARGS((const regex_t *preg, re_match_context_t *mctx,
144				 int bkref_node, int bkref_str_idx));
145static reg_errcode_t get_subexp_sub _RE_ARGS((const regex_t *preg,
146				     re_match_context_t *mctx,
147				     re_sub_match_top_t *sub_top,
148				     re_sub_match_last_t *sub_last,
149				     int bkref_node, int bkref_str));
150static int find_subexp_node _RE_ARGS((re_dfa_t *dfa, re_node_set *nodes,
151			     int subexp_idx, int fl_open));
152static reg_errcode_t check_arrival _RE_ARGS((const regex_t *preg,
153				    re_match_context_t *mctx,
154				    state_array_t *path, int top_node,
155				    int top_str, int last_node, int last_str,
156				    int fl_open));
157static reg_errcode_t check_arrival_add_next_nodes _RE_ARGS((const regex_t *preg,
158						   re_dfa_t *dfa,
159						   re_match_context_t *mctx,
160						   int str_idx,
161						   re_node_set *cur_nodes,
162						   re_node_set *next_nodes));
163static reg_errcode_t check_arrival_expand_ecl _RE_ARGS((re_dfa_t *dfa,
164					       re_node_set *cur_nodes,
165					       int ex_subexp, int fl_open));
166static reg_errcode_t check_arrival_expand_ecl_sub _RE_ARGS((re_dfa_t *dfa,
167						   re_node_set *dst_nodes,
168						   int target, int ex_subexp,
169						   int fl_open));
170static reg_errcode_t expand_bkref_cache _RE_ARGS((const regex_t *preg,
171					 re_match_context_t *mctx,
172					 re_node_set *cur_nodes, int cur_str,
173					 int last_str, int subexp_num,
174					 int fl_open));
175static re_dfastate_t **build_trtable _RE_ARGS((const regex_t *dfa,
176				      const re_dfastate_t *state,
177                                      int fl_search));
178#ifdef RE_ENABLE_I18N
179static int check_node_accept_bytes _RE_ARGS((const regex_t *preg, int node_idx,
180                                    const re_string_t *input, int idx));
181# ifdef _LIBC
182static unsigned int find_collation_sequence_value _RE_ARGS((const unsigned char *mbs,
183                                                   size_t name_len));
184# endif /* _LIBC */
185#endif /* RE_ENABLE_I18N */
186static int group_nodes_into_DFAstates _RE_ARGS((const regex_t *dfa,
187				       const re_dfastate_t *state,
188				       re_node_set *states_node,
189                                       bitset *states_ch));
190static int check_node_accept _RE_ARGS((const regex_t *preg, const re_token_t *node,
191                              const re_match_context_t *mctx, int idx));
192static reg_errcode_t extend_buffers _RE_ARGS((re_match_context_t *mctx));
193
194/* Entry point for POSIX code.  */
195
196/* regexec searches for a given pattern, specified by PREG, in the
197   string STRING.
198
199   If NMATCH is zero or REG_NOSUB was set in the cflags argument to
200   `regcomp', we ignore PMATCH.  Otherwise, we assume PMATCH has at
201   least NMATCH elements, and we set them to the offsets of the
202   corresponding matched substrings.
203
204   EFLAGS specifies `execution flags' which affect matching: if
205   REG_NOTBOL is set, then ^ does not match at the beginning of the
206   string; if REG_NOTEOL is set, then $ does not match at the end.
207
208   We return 0 if we find a match and REG_NOMATCH if not.  */
209
210int
211regexec (preg, string, nmatch, pmatch, eflags)
212    const regex_t *__restrict preg;
213    const char *__restrict string;
214    size_t nmatch;
215    regmatch_t pmatch[];
216    int eflags;
217{
218  reg_errcode_t err;
219  int length = strlen (string);
220  if (preg->no_sub)
221    err = re_search_internal (preg, string, length, 0, length, length, 0,
222			      NULL, eflags);
223  else
224    err = re_search_internal (preg, string, length, 0, length, length, nmatch,
225			      pmatch, eflags);
226  return err != REG_NOERROR;
227}
228#ifdef _LIBC
229weak_alias (__regexec, regexec)
230#endif
231
232/* Entry points for GNU code.  */
233
234/* re_match, re_search, re_match_2, re_search_2
235
236   The former two functions operate on STRING with length LENGTH,
237   while the later two operate on concatenation of STRING1 and STRING2
238   with lengths LENGTH1 and LENGTH2, respectively.
239
240   re_match() matches the compiled pattern in BUFP against the string,
241   starting at index START.
242
243   re_search() first tries matching at index START, then it tries to match
244   starting from index START + 1, and so on.  The last start position tried
245   is START + RANGE.  (Thus RANGE = 0 forces re_search to operate the same
246   way as re_match().)
247
248   The parameter STOP of re_{match,search}_2 specifies that no match exceeding
249   the first STOP characters of the concatenation of the strings should be
250   concerned.
251
252   If REGS is not NULL, and BUFP->no_sub is not set, the offsets of the match
253   and all groups is stroed in REGS.  (For the "_2" variants, the offsets are
254   computed relative to the concatenation, not relative to the individual
255   strings.)
256
257   On success, re_match* functions return the length of the match, re_search*
258   return the position of the start of the match.  Return value -1 means no
259   match was found and -2 indicates an internal error.  */
260
261int
262re_match (bufp, string, length, start, regs)
263    struct re_pattern_buffer *bufp;
264    const char *string;
265    int length, start;
266    struct re_registers *regs;
267{
268  return re_search_stub (bufp, string, length, start, 0, length, regs, 1);
269}
270#ifdef _LIBC
271weak_alias (__re_match, re_match)
272#endif
273
274int
275re_search (bufp, string, length, start, range, regs)
276    struct re_pattern_buffer *bufp;
277    const char *string;
278    int length, start, range;
279    struct re_registers *regs;
280{
281  return re_search_stub (bufp, string, length, start, range, length, regs, 0);
282}
283#ifdef _LIBC
284weak_alias (__re_search, re_search)
285#endif
286
287int
288re_match_2 (bufp, string1, length1, string2, length2, start, regs, stop)
289    struct re_pattern_buffer *bufp;
290    const char *string1, *string2;
291    int length1, length2, start, stop;
292    struct re_registers *regs;
293{
294  return re_search_2_stub (bufp, string1, length1, string2, length2,
295			   start, 0, regs, stop, 1);
296}
297#ifdef _LIBC
298weak_alias (__re_match_2, re_match_2)
299#endif
300
301int
302re_search_2 (bufp, string1, length1, string2, length2, start, range, regs, stop)
303    struct re_pattern_buffer *bufp;
304    const char *string1, *string2;
305    int length1, length2, start, range, stop;
306    struct re_registers *regs;
307{
308  return re_search_2_stub (bufp, string1, length1, string2, length2,
309			   start, range, regs, stop, 0);
310}
311#ifdef _LIBC
312weak_alias (__re_search_2, re_search_2)
313#endif
314
315static int
316re_search_2_stub (bufp, string1, length1, string2, length2, start, range, regs,
317		  stop, ret_len)
318    struct re_pattern_buffer *bufp;
319    const char *string1, *string2;
320    int length1, length2, start, range, stop, ret_len;
321    struct re_registers *regs;
322{
323  const char *str;
324  int rval;
325  int len = length1 + length2;
326  int free_str = 0;
327
328  if (BE (length1 < 0 || length2 < 0 || stop < 0, 0))
329    return -2;
330
331  /* Concatenate the strings.  */
332  if (length2 > 0)
333    if (length1 > 0)
334      {
335	char *s = re_malloc (char, len);
336
337	if (BE (s == NULL, 0))
338	  return -2;
339	memcpy (s, string1, length1);
340	memcpy (s + length1, string2, length2);
341	str = s;
342	free_str = 1;
343      }
344    else
345      str = string2;
346  else
347    str = string1;
348
349  rval = re_search_stub (bufp, str, len, start, range, stop, regs,
350			 ret_len);
351  if (free_str)
352    re_free ((char *) str);
353  return rval;
354}
355
356/* The parameters have the same meaning as those of re_search.
357   Additional parameters:
358   If RET_LEN is nonzero the length of the match is returned (re_match style);
359   otherwise the position of the match is returned.  */
360
361static int
362re_search_stub (bufp, string, length, start, range, stop, regs, ret_len)
363    struct re_pattern_buffer *bufp;
364    const char *string;
365    int length, start, range, stop, ret_len;
366    struct re_registers *regs;
367{
368  reg_errcode_t result;
369  regmatch_t *pmatch;
370  int nregs, rval;
371  int eflags = 0;
372
373  /* Check for out-of-range.  */
374  if (BE (start < 0 || start > length, 0))
375    return -1;
376  if (BE (start + range > length, 0))
377    range = length - start;
378  else if (BE (start + range < 0, 0))
379    range = -start;
380
381  eflags |= (bufp->not_bol) ? REG_NOTBOL : 0;
382  eflags |= (bufp->not_eol) ? REG_NOTEOL : 0;
383
384  /* Compile fastmap if we haven't yet.  */
385  if (range > 0 && bufp->fastmap != NULL && !bufp->fastmap_accurate)
386    re_compile_fastmap (bufp);
387
388  if (BE (bufp->no_sub, 0))
389    regs = NULL;
390
391  /* We need at least 1 register.  */
392  if (regs == NULL)
393    nregs = 1;
394  else if (BE (bufp->regs_allocated == REGS_FIXED &&
395	       regs->num_regs < bufp->re_nsub + 1, 0))
396    {
397      nregs = regs->num_regs;
398      if (BE (nregs < 1, 0))
399	{
400	  /* Nothing can be copied to regs.  */
401	  regs = NULL;
402	  nregs = 1;
403	}
404    }
405  else
406    nregs = bufp->re_nsub + 1;
407  pmatch = re_malloc (regmatch_t, nregs);
408  if (BE (pmatch == NULL, 0))
409    return -2;
410
411  result = re_search_internal (bufp, string, length, start, range, stop,
412			       nregs, pmatch, eflags);
413
414  rval = 0;
415
416  /* I hope we needn't fill ther regs with -1's when no match was found.  */
417  if (result != REG_NOERROR)
418    rval = -1;
419  else if (regs != NULL)
420    {
421      /* If caller wants register contents data back, copy them.  */
422      bufp->regs_allocated = re_copy_regs (regs, pmatch, nregs,
423					   bufp->regs_allocated);
424      if (BE (bufp->regs_allocated == REGS_UNALLOCATED, 0))
425	rval = -2;
426    }
427
428  if (BE (rval == 0, 1))
429    {
430      if (ret_len)
431	{
432	  assert (pmatch[0].rm_so == start);
433	  rval = pmatch[0].rm_eo - start;
434	}
435      else
436	rval = pmatch[0].rm_so;
437    }
438  re_free (pmatch);
439  return rval;
440}
441
442static unsigned
443re_copy_regs (regs, pmatch, nregs, regs_allocated)
444    struct re_registers *regs;
445    regmatch_t *pmatch;
446    int nregs, regs_allocated;
447{
448  int rval = REGS_REALLOCATE;
449  int i;
450  int need_regs = nregs + 1;
451  /* We need one extra element beyond `num_regs' for the `-1' marker GNU code
452     uses.  */
453
454  /* Have the register data arrays been allocated?  */
455  if (regs_allocated == REGS_UNALLOCATED)
456    { /* No.  So allocate them with malloc.  */
457      regs->start = re_malloc (regoff_t, need_regs);
458      if (BE (regs->start == NULL, 0))
459	return REGS_UNALLOCATED;
460      regs->end = re_malloc (regoff_t, need_regs);
461      if (BE (regs->end == NULL, 0))
462	{
463	  re_free (regs->start);
464	  return REGS_UNALLOCATED;
465	}
466      regs->num_regs = need_regs;
467    }
468  else if (regs_allocated == REGS_REALLOCATE)
469    { /* Yes.  If we need more elements than were already
470	 allocated, reallocate them.  If we need fewer, just
471	 leave it alone.  */
472      if (need_regs > regs->num_regs)
473	{
474	  regs->start = re_realloc (regs->start, regoff_t, need_regs);
475	  if (BE (regs->start == NULL, 0))
476	    {
477	      if (regs->end != NULL)
478		re_free (regs->end);
479	      return REGS_UNALLOCATED;
480	    }
481	  regs->end = re_realloc (regs->end, regoff_t, need_regs);
482	  if (BE (regs->end == NULL, 0))
483	    {
484	      re_free (regs->start);
485	      return REGS_UNALLOCATED;
486	    }
487	  regs->num_regs = need_regs;
488	}
489    }
490  else
491    {
492      assert (regs_allocated == REGS_FIXED);
493      /* This function may not be called with REGS_FIXED and nregs too big.  */
494      assert (regs->num_regs >= nregs);
495      rval = REGS_FIXED;
496    }
497
498  /* Copy the regs.  */
499  for (i = 0; i < nregs; ++i)
500    {
501      regs->start[i] = pmatch[i].rm_so;
502      regs->end[i] = pmatch[i].rm_eo;
503    }
504  for ( ; i < regs->num_regs; ++i)
505    regs->start[i] = regs->end[i] = -1;
506
507  return rval;
508}
509
510/* Set REGS to hold NUM_REGS registers, storing them in STARTS and
511   ENDS.  Subsequent matches using PATTERN_BUFFER and REGS will use
512   this memory for recording register information.  STARTS and ENDS
513   must be allocated using the malloc library routine, and must each
514   be at least NUM_REGS * sizeof (regoff_t) bytes long.
515
516   If NUM_REGS == 0, then subsequent matches should allocate their own
517   register data.
518
519   Unless this function is called, the first search or match using
520   PATTERN_BUFFER will allocate its own register data, without
521   freeing the old data.  */
522
523void
524re_set_registers (bufp, regs, num_regs, starts, ends)
525    struct re_pattern_buffer *bufp;
526    struct re_registers *regs;
527    unsigned num_regs;
528    regoff_t *starts, *ends;
529{
530  if (num_regs)
531    {
532      bufp->regs_allocated = REGS_REALLOCATE;
533      regs->num_regs = num_regs;
534      regs->start = starts;
535      regs->end = ends;
536    }
537  else
538    {
539      bufp->regs_allocated = REGS_UNALLOCATED;
540      regs->num_regs = 0;
541      regs->start = regs->end = (regoff_t *) 0;
542    }
543}
544#ifdef _LIBC
545weak_alias (__re_set_registers, re_set_registers)
546#endif
547
548/* Entry points compatible with 4.2 BSD regex library.  We don't define
549   them unless specifically requested.  */
550
551#if defined _REGEX_RE_COMP || defined _LIBC
552int
553# ifdef _LIBC
554weak_function
555# endif
556re_exec (s)
557     const char *s;
558{
559  return 0 == regexec (&re_comp_buf, s, 0, NULL, 0);
560}
561#endif /* _REGEX_RE_COMP */
562
563static re_node_set empty_set;
564
565/* Internal entry point.  */
566
567/* Searches for a compiled pattern PREG in the string STRING, whose
568   length is LENGTH.  NMATCH, PMATCH, and EFLAGS have the same
569   mingings with regexec.  START, and RANGE have the same meanings
570   with re_search.
571   Return REG_NOERROR if we find a match, and REG_NOMATCH if not,
572   otherwise return the error code.
573   Note: We assume front end functions already check ranges.
574   (START + RANGE >= 0 && START + RANGE <= LENGTH)  */
575
576static reg_errcode_t
577re_search_internal (preg, string, length, start, range, stop, nmatch, pmatch,
578		    eflags)
579    const regex_t *preg;
580    const char *string;
581    int length, start, range, stop, eflags;
582    size_t nmatch;
583    regmatch_t pmatch[];
584{
585  reg_errcode_t err;
586  re_dfa_t *dfa = (re_dfa_t *)preg->buffer;
587  re_string_t input;
588  int left_lim, right_lim, incr;
589  int fl_longest_match, match_first, match_last = -1;
590  int fast_translate, sb;
591  re_match_context_t mctx;
592  char *fastmap = ((preg->fastmap != NULL && preg->fastmap_accurate
593		    && range && !preg->can_be_null) ? preg->fastmap : NULL);
594
595  /* Check if the DFA haven't been compiled.  */
596  if (BE (preg->used == 0 || dfa->init_state == NULL
597	  || dfa->init_state_word == NULL || dfa->init_state_nl == NULL
598	  || dfa->init_state_begbuf == NULL, 0))
599    return REG_NOMATCH;
600
601  re_node_set_init_empty (&empty_set);
602  memset (&mctx, '\0', sizeof (re_match_context_t));
603
604  /* We must check the longest matching, if nmatch > 0.  */
605  fl_longest_match = (nmatch != 0 || dfa->nbackref);
606
607  err = re_string_allocate (&input, string, length, dfa->nodes_len + 1,
608			    preg->translate, preg->syntax & RE_ICASE);
609  if (BE (err != REG_NOERROR, 0))
610    goto free_return;
611  input.stop = stop;
612
613  err = match_ctx_init (&mctx, eflags, &input, dfa->nbackref * 2);
614  if (BE (err != REG_NOERROR, 0))
615    goto free_return;
616
617  /* We will log all the DFA states through which the dfa pass,
618     if nmatch > 1, or this dfa has "multibyte node", which is a
619     back-reference or a node which can accept multibyte character or
620     multi character collating element.  */
621  if (nmatch > 1 || dfa->has_mb_node)
622    {
623      mctx.state_log = re_malloc (re_dfastate_t *, dfa->nodes_len + 1);
624      if (BE (mctx.state_log == NULL, 0))
625	{
626	  err = REG_ESPACE;
627	  goto free_return;
628	}
629    }
630  else
631    mctx.state_log = NULL;
632
633#ifdef DEBUG
634  /* We assume front-end functions already check them.  */
635  assert (start + range >= 0 && start + range <= length);
636#endif
637
638  match_first = start;
639  input.tip_context = ((eflags & REG_NOTBOL) ? CONTEXT_BEGBUF
640		       : CONTEXT_NEWLINE | CONTEXT_BEGBUF);
641
642  /* Check incrementally whether of not the input string match.  */
643  incr = (range < 0) ? -1 : 1;
644  left_lim = (range < 0) ? start + range : start;
645  right_lim = (range < 0) ? start : start + range;
646#ifdef RE_ENABLE_I18N
647  sb = re_mb_cur_max == 1;
648#else
649  sb = 1;
650#endif
651  fast_translate = sb || !(preg->syntax & RE_ICASE || preg->translate);
652
653  for (;;)
654    {
655      /* At first get the current byte from input string.  */
656      if (fastmap)
657	{
658	  if (BE (fast_translate, 1))
659	    {
660	      unsigned RE_TRANSLATE_TYPE t
661		= (unsigned RE_TRANSLATE_TYPE) preg->translate;
662	      if (BE (range >= 0, 1))
663		{
664		  if (BE (t != NULL, 0))
665		    {
666		      while (BE (match_first < right_lim, 1)
667			     && !fastmap[t[(unsigned char) string[match_first]]])
668			++match_first;
669		    }
670		  else
671		    {
672		      while (BE (match_first < right_lim, 1)
673			     && !fastmap[(unsigned char) string[match_first]])
674			++match_first;
675		    }
676		  if (BE (match_first == right_lim, 0))
677		    {
678		      int ch = match_first >= length
679			       ? 0 : (unsigned char) string[match_first];
680		      if (!fastmap[t ? t[ch] : ch])
681			break;
682		    }
683		}
684	      else
685		{
686		  while (match_first >= left_lim)
687		    {
688		      int ch = match_first >= length
689			       ? 0 : (unsigned char) string[match_first];
690		      if (fastmap[t ? t[ch] : ch])
691			break;
692		      --match_first;
693		    }
694		  if (match_first < left_lim)
695		    break;
696		}
697	    }
698	  else
699	    {
700	      int ch;
701
702	      do
703		{
704		  /* In this case, we can't determine easily the current byte,
705		     since it might be a component byte of a multibyte
706		     character.  Then we use the constructed buffer
707		     instead.  */
708		  /* If MATCH_FIRST is out of the valid range, reconstruct the
709		     buffers.  */
710		  if (input.raw_mbs_idx + input.valid_len <= match_first
711		      || match_first < input.raw_mbs_idx)
712		    {
713		      err = re_string_reconstruct (&input, match_first, eflags,
714						   preg->newline_anchor);
715		      if (BE (err != REG_NOERROR, 0))
716			goto free_return;
717		    }
718		  /* If MATCH_FIRST is out of the buffer, leave it as '\0'.
719		     Note that MATCH_FIRST must not be smaller than 0.  */
720		  ch = ((match_first >= length) ? 0
721		       : re_string_byte_at (&input,
722					    match_first - input.raw_mbs_idx));
723		  if (fastmap[ch])
724		    break;
725		  match_first += incr;
726		}
727	      while (match_first >= left_lim && match_first <= right_lim);
728	      if (! fastmap[ch])
729		break;
730	    }
731	}
732
733      /* Reconstruct the buffers so that the matcher can assume that
734	 the matching starts from the begining of the buffer.  */
735      err = re_string_reconstruct (&input, match_first, eflags,
736				   preg->newline_anchor);
737      if (BE (err != REG_NOERROR, 0))
738	goto free_return;
739#ifdef RE_ENABLE_I18N
740     /* Eliminate it when it is a component of a multibyte character
741	 and isn't the head of a multibyte character.  */
742      if (sb || re_string_first_byte (&input, 0))
743#endif
744	{
745	  /* It seems to be appropriate one, then use the matcher.  */
746	  /* We assume that the matching starts from 0.  */
747	  mctx.state_log_top = mctx.nbkref_ents = mctx.max_mb_elem_len = 0;
748	  match_last = check_matching (preg, &mctx, 0, fl_longest_match);
749	  if (match_last != -1)
750	    {
751	      if (BE (match_last == -2, 0))
752		{
753		  err = REG_ESPACE;
754		  goto free_return;
755		}
756	      else
757		{
758		  mctx.match_last = match_last;
759		  if ((!preg->no_sub && nmatch > 1) || dfa->nbackref)
760		    {
761		      re_dfastate_t *pstate = mctx.state_log[match_last];
762		      mctx.last_node = check_halt_state_context (preg, pstate,
763								 &mctx, match_last);
764		    }
765		  if ((!preg->no_sub && nmatch > 1 && dfa->has_plural_match)
766		      || dfa->nbackref)
767		    {
768		      err = prune_impossible_nodes (preg, &mctx);
769		      if (err == REG_NOERROR)
770			break;
771		      if (BE (err != REG_NOMATCH, 0))
772			goto free_return;
773		    }
774		  else
775		    break; /* We found a matching.  */
776		}
777	    }
778	  match_ctx_clean (&mctx);
779	}
780      /* Update counter.  */
781      match_first += incr;
782      if (match_first < left_lim || right_lim < match_first)
783	break;
784    }
785
786  /* Set pmatch[] if we need.  */
787  if (match_last != -1 && nmatch > 0)
788    {
789      int reg_idx;
790
791      /* Initialize registers.  */
792      for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
793	pmatch[reg_idx].rm_so = pmatch[reg_idx].rm_eo = -1;
794
795      /* Set the points where matching start/end.  */
796      pmatch[0].rm_so = 0;
797      pmatch[0].rm_eo = mctx.match_last;
798
799      if (!preg->no_sub && nmatch > 1)
800	{
801	  err = set_regs (preg, &mctx, nmatch, pmatch,
802			  dfa->has_plural_match && dfa->nbackref > 0);
803	  if (BE (err != REG_NOERROR, 0))
804	    goto free_return;
805	}
806
807      /* At last, add the offset to the each registers, since we slided
808	 the buffers so that We can assume that the matching starts from 0.  */
809      for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
810	if (pmatch[reg_idx].rm_so != -1)
811	  {
812	    pmatch[reg_idx].rm_so += match_first;
813	    pmatch[reg_idx].rm_eo += match_first;
814	  }
815    }
816  err = (match_last == -1) ? REG_NOMATCH : REG_NOERROR;
817 free_return:
818  re_free (mctx.state_log);
819  if (dfa->nbackref)
820    match_ctx_free (&mctx);
821  re_string_destruct (&input);
822  return err;
823}
824
825static reg_errcode_t
826prune_impossible_nodes (preg, mctx)
827     const regex_t *preg;
828     re_match_context_t *mctx;
829{
830  int halt_node, match_last;
831  reg_errcode_t ret;
832  re_dfa_t *dfa = (re_dfa_t *)preg->buffer;
833  re_dfastate_t **sifted_states;
834  re_dfastate_t **lim_states = NULL;
835  re_sift_context_t sctx;
836#ifdef DEBUG
837  assert (mctx->state_log != NULL);
838#endif
839  match_last = mctx->match_last;
840  halt_node = mctx->last_node;
841  sifted_states = re_malloc (re_dfastate_t *, match_last + 1);
842  if (BE (sifted_states == NULL, 0))
843    {
844      ret = REG_ESPACE;
845      goto free_return;
846    }
847  if (dfa->nbackref)
848    {
849      lim_states = re_malloc (re_dfastate_t *, match_last + 1);
850      if (BE (lim_states == NULL, 0))
851	{
852	  ret = REG_ESPACE;
853	  goto free_return;
854	}
855      while (1)
856	{
857	  memset (lim_states, '\0',
858		  sizeof (re_dfastate_t *) * (match_last + 1));
859	  match_ctx_clear_flag (mctx);
860	  sift_ctx_init (&sctx, sifted_states, lim_states, halt_node,
861			 match_last, 0);
862	  ret = sift_states_backward (preg, mctx, &sctx);
863	  re_node_set_free (&sctx.limits);
864	  if (BE (ret != REG_NOERROR, 0))
865	      goto free_return;
866	  if (sifted_states[0] != NULL || lim_states[0] != NULL)
867	    break;
868	  do
869	    {
870	      --match_last;
871	      if (match_last < 0)
872		{
873		  ret = REG_NOMATCH;
874		  goto free_return;
875		}
876	    } while (!mctx->state_log[match_last]->halt);
877	  halt_node = check_halt_state_context (preg,
878						mctx->state_log[match_last],
879						mctx, match_last);
880	}
881      ret = merge_state_array (dfa, sifted_states, lim_states,
882			       match_last + 1);
883      re_free (lim_states);
884      lim_states = NULL;
885      if (BE (ret != REG_NOERROR, 0))
886	goto free_return;
887    }
888  else
889    {
890      sift_ctx_init (&sctx, sifted_states, lim_states, halt_node,
891		     match_last, 0);
892      ret = sift_states_backward (preg, mctx, &sctx);
893      re_node_set_free (&sctx.limits);
894      if (BE (ret != REG_NOERROR, 0))
895	goto free_return;
896    }
897  re_free (mctx->state_log);
898  mctx->state_log = sifted_states;
899  sifted_states = NULL;
900  mctx->last_node = halt_node;
901  mctx->match_last = match_last;
902  ret = REG_NOERROR;
903 free_return:
904  re_free (sifted_states);
905  re_free (lim_states);
906  return ret;
907}
908
909/* Acquire an initial state and return it.
910   We must select appropriate initial state depending on the context,
911   since initial states may have constraints like "\<", "^", etc..  */
912
913static inline re_dfastate_t *
914acquire_init_state_context (err, preg, mctx, idx)
915     reg_errcode_t *err;
916     const regex_t *preg;
917     const re_match_context_t *mctx;
918     int idx;
919{
920  re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
921
922  *err = REG_NOERROR;
923  if (dfa->init_state->has_constraint)
924    {
925      unsigned int context;
926      context =  re_string_context_at (mctx->input, idx - 1, mctx->eflags,
927				       preg->newline_anchor);
928      if (IS_WORD_CONTEXT (context))
929	return dfa->init_state_word;
930      else if (IS_ORDINARY_CONTEXT (context))
931	return dfa->init_state;
932      else if (IS_BEGBUF_CONTEXT (context) && IS_NEWLINE_CONTEXT (context))
933	return dfa->init_state_begbuf;
934      else if (IS_NEWLINE_CONTEXT (context))
935	return dfa->init_state_nl;
936      else if (IS_BEGBUF_CONTEXT (context))
937	{
938	  /* It is relatively rare case, then calculate on demand.  */
939	  return  re_acquire_state_context (err, dfa,
940					    dfa->init_state->entrance_nodes,
941					    context);
942	}
943      else
944	/* Must not happen?  */
945	return dfa->init_state;
946    }
947  else
948    return dfa->init_state;
949}
950
951/* Check whether the regular expression match input string INPUT or not,
952   and return the index where the matching end, return -1 if not match,
953   or return -2 in case of an error.
954   FL_SEARCH means we must search where the matching starts,
955   FL_LONGEST_MATCH means we want the POSIX longest matching.
956   Note that the matcher assume that the maching starts from the current
957   index of the buffer.  */
958
959static int
960check_matching (preg, mctx, fl_search, fl_longest_match)
961    const regex_t *preg;
962    re_match_context_t *mctx;
963    int fl_search, fl_longest_match;
964{
965  re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
966  reg_errcode_t err;
967  int match = 0;
968  int match_last = -1;
969  int cur_str_idx = re_string_cur_idx (mctx->input);
970  re_dfastate_t *cur_state;
971
972  cur_state = acquire_init_state_context (&err, preg, mctx, cur_str_idx);
973  /* An initial state must not be NULL(invalid state).  */
974  if (BE (cur_state == NULL, 0))
975    return -2;
976  if (mctx->state_log != NULL)
977    mctx->state_log[cur_str_idx] = cur_state;
978
979  /* Check OP_OPEN_SUBEXP in the initial state in case that we use them
980     later.  E.g. Processing back references.  */
981  if (dfa->nbackref)
982    {
983      err = check_subexp_matching_top (dfa, mctx, &cur_state->nodes, 0);
984      if (BE (err != REG_NOERROR, 0))
985	return err;
986    }
987
988  if (cur_state->has_backref)
989    {
990      err = transit_state_bkref (preg, &cur_state->nodes, mctx);
991      if (BE (err != REG_NOERROR, 0))
992	return err;
993    }
994
995  /* If the RE accepts NULL string.  */
996  if (cur_state->halt)
997    {
998      if (!cur_state->has_constraint
999	  || check_halt_state_context (preg, cur_state, mctx, cur_str_idx))
1000	{
1001	  if (!fl_longest_match)
1002	    return cur_str_idx;
1003	  else
1004	    {
1005	      match_last = cur_str_idx;
1006	      match = 1;
1007	    }
1008	}
1009    }
1010
1011  while (!re_string_eoi (mctx->input))
1012    {
1013      cur_state = transit_state (&err, preg, mctx, cur_state,
1014				 fl_search && !match);
1015      if (cur_state == NULL) /* Reached at the invalid state or an error.  */
1016	{
1017	  cur_str_idx = re_string_cur_idx (mctx->input);
1018	  if (BE (err != REG_NOERROR, 0))
1019	    return -2;
1020	  if (fl_search && !match)
1021	    {
1022	      /* Restart from initial state, since we are searching
1023		 the point from where matching start.  */
1024#ifdef RE_ENABLE_I18N
1025	      if (re_mb_cur_max == 1
1026		  || re_string_first_byte (mctx->input, cur_str_idx))
1027#endif /* RE_ENABLE_I18N */
1028		cur_state = acquire_init_state_context (&err, preg, mctx,
1029							cur_str_idx);
1030	      if (BE (cur_state == NULL && err != REG_NOERROR, 0))
1031		return -2;
1032	      if (mctx->state_log != NULL)
1033		mctx->state_log[cur_str_idx] = cur_state;
1034	    }
1035	  else if (!fl_longest_match && match)
1036	    break;
1037	  else /* (fl_longest_match && match) || (!fl_search && !match)  */
1038	    {
1039	      if (mctx->state_log == NULL)
1040		break;
1041	      else
1042		{
1043		  int max = mctx->state_log_top;
1044		  for (; cur_str_idx <= max; ++cur_str_idx)
1045		    if (mctx->state_log[cur_str_idx] != NULL)
1046		      break;
1047		  if (cur_str_idx > max)
1048		    break;
1049		}
1050	    }
1051	}
1052
1053      if (cur_state != NULL && cur_state->halt)
1054	{
1055	  /* Reached at a halt state.
1056	     Check the halt state can satisfy the current context.  */
1057	  if (!cur_state->has_constraint
1058	      || check_halt_state_context (preg, cur_state, mctx,
1059					   re_string_cur_idx (mctx->input)))
1060	    {
1061	      /* We found an appropriate halt state.  */
1062	      match_last = re_string_cur_idx (mctx->input);
1063	      match = 1;
1064	      if (!fl_longest_match)
1065		break;
1066	    }
1067	}
1068   }
1069  return match_last;
1070}
1071
1072/* Check NODE match the current context.  */
1073
1074static int check_halt_node_context (dfa, node, context)
1075    const re_dfa_t *dfa;
1076    int node;
1077    unsigned int context;
1078{
1079  re_token_type_t type = dfa->nodes[node].type;
1080  unsigned int constraint = dfa->nodes[node].constraint;
1081  if (type != END_OF_RE)
1082    return 0;
1083  if (!constraint)
1084    return 1;
1085  if (NOT_SATISFY_NEXT_CONSTRAINT (constraint, context))
1086    return 0;
1087  return 1;
1088}
1089
1090/* Check the halt state STATE match the current context.
1091   Return 0 if not match, if the node, STATE has, is a halt node and
1092   match the context, return the node.  */
1093
1094static int
1095check_halt_state_context (preg, state, mctx, idx)
1096    const regex_t *preg;
1097    const re_dfastate_t *state;
1098    const re_match_context_t *mctx;
1099    int idx;
1100{
1101  re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
1102  int i;
1103  unsigned int context;
1104#ifdef DEBUG
1105  assert (state->halt);
1106#endif
1107  context = re_string_context_at (mctx->input, idx, mctx->eflags,
1108				  preg->newline_anchor);
1109  for (i = 0; i < state->nodes.nelem; ++i)
1110    if (check_halt_node_context (dfa, state->nodes.elems[i], context))
1111      return state->nodes.elems[i];
1112  return 0;
1113}
1114
1115/* Compute the next node to which "NFA" transit from NODE("NFA" is a NFA
1116   corresponding to the DFA).
1117   Return the destination node, and update EPS_VIA_NODES, return -1 in case
1118   of errors.  */
1119
1120static int
1121proceed_next_node (preg, nregs, regs, mctx, pidx, node, eps_via_nodes, fs)
1122    const regex_t *preg;
1123    regmatch_t *regs;
1124    const re_match_context_t *mctx;
1125    int nregs, *pidx, node;
1126    re_node_set *eps_via_nodes;
1127    struct re_fail_stack_t *fs;
1128{
1129  re_dfa_t *dfa = (re_dfa_t *)preg->buffer;
1130  int i, err, dest_node;
1131  dest_node = -1;
1132  if (IS_EPSILON_NODE (dfa->nodes[node].type))
1133    {
1134      re_node_set *cur_nodes = &mctx->state_log[*pidx]->nodes;
1135      int ndest, dest_nodes[2];
1136      err = re_node_set_insert (eps_via_nodes, node);
1137      if (BE (err < 0, 0))
1138	return -1;
1139      /* Pick up valid destinations.  */
1140      for (ndest = 0, i = 0; i < dfa->edests[node].nelem; ++i)
1141	{
1142	  int candidate = dfa->edests[node].elems[i];
1143	  if (!re_node_set_contains (cur_nodes, candidate))
1144	    continue;
1145	  dest_nodes[0] = (ndest == 0) ? candidate : dest_nodes[0];
1146	  dest_nodes[1] = (ndest == 1) ? candidate : dest_nodes[1];
1147	  ++ndest;
1148	}
1149      if (ndest <= 1)
1150	return ndest == 0 ? -1 : (ndest == 1 ? dest_nodes[0] : 0);
1151      /* In order to avoid infinite loop like "(a*)*".  */
1152      if (re_node_set_contains (eps_via_nodes, dest_nodes[0]))
1153	return dest_nodes[1];
1154      if (fs != NULL)
1155	push_fail_stack (fs, *pidx, dest_nodes, nregs, regs, eps_via_nodes);
1156      return dest_nodes[0];
1157    }
1158  else
1159    {
1160      int naccepted = 0;
1161      re_token_type_t type = dfa->nodes[node].type;
1162
1163#ifdef RE_ENABLE_I18N
1164      if (ACCEPT_MB_NODE (type))
1165	naccepted = check_node_accept_bytes (preg, node, mctx->input, *pidx);
1166      else
1167#endif /* RE_ENABLE_I18N */
1168      if (type == OP_BACK_REF)
1169	{
1170	  int subexp_idx = dfa->nodes[node].opr.idx;
1171	  naccepted = regs[subexp_idx].rm_eo - regs[subexp_idx].rm_so;
1172	  if (fs != NULL)
1173	    {
1174	      if (regs[subexp_idx].rm_so == -1 || regs[subexp_idx].rm_eo == -1)
1175		return -1;
1176	      else if (naccepted)
1177		{
1178		  char *buf = (char *) re_string_get_buffer (mctx->input);
1179		  if (memcmp (buf + regs[subexp_idx].rm_so, buf + *pidx,
1180			      naccepted) != 0)
1181		    return -1;
1182		}
1183	    }
1184
1185	  if (naccepted == 0)
1186	    {
1187	      err = re_node_set_insert (eps_via_nodes, node);
1188	      if (BE (err < 0, 0))
1189		return -2;
1190	      dest_node = dfa->edests[node].elems[0];
1191	      if (re_node_set_contains (&mctx->state_log[*pidx]->nodes,
1192					dest_node))
1193		return dest_node;
1194	    }
1195	}
1196
1197      if (naccepted != 0
1198	  || check_node_accept (preg, dfa->nodes + node, mctx, *pidx))
1199	{
1200	  dest_node = dfa->nexts[node];
1201	  *pidx = (naccepted == 0) ? *pidx + 1 : *pidx + naccepted;
1202	  if (fs && (*pidx > mctx->match_last || mctx->state_log[*pidx] == NULL
1203		     || !re_node_set_contains (&mctx->state_log[*pidx]->nodes,
1204					       dest_node)))
1205	    return -1;
1206	  re_node_set_empty (eps_via_nodes);
1207	  return dest_node;
1208	}
1209    }
1210  return -1;
1211}
1212
1213static reg_errcode_t
1214push_fail_stack (fs, str_idx, dests, nregs, regs, eps_via_nodes)
1215     struct re_fail_stack_t *fs;
1216     int str_idx, *dests, nregs;
1217     regmatch_t *regs;
1218     re_node_set *eps_via_nodes;
1219{
1220  reg_errcode_t err;
1221  int num = fs->num++;
1222  if (fs->num == fs->alloc)
1223    {
1224      struct re_fail_stack_ent_t *new_array;
1225      fs->alloc *= 2;
1226      new_array = realloc (fs->stack, (sizeof (struct re_fail_stack_ent_t)
1227				       * fs->alloc));
1228      if (new_array == NULL)
1229	return REG_ESPACE;
1230      fs->stack = new_array;
1231    }
1232  fs->stack[num].idx = str_idx;
1233  fs->stack[num].node = dests[1];
1234  fs->stack[num].regs = re_malloc (regmatch_t, nregs);
1235  memcpy (fs->stack[num].regs, regs, sizeof (regmatch_t) * nregs);
1236  err = re_node_set_init_copy (&fs->stack[num].eps_via_nodes, eps_via_nodes);
1237  return err;
1238}
1239
1240static int
1241pop_fail_stack (fs, pidx, nregs, regs, eps_via_nodes)
1242     struct re_fail_stack_t *fs;
1243     int *pidx, nregs;
1244     regmatch_t *regs;
1245     re_node_set *eps_via_nodes;
1246{
1247  int num = --fs->num;
1248  assert (num >= 0);
1249 *pidx = fs->stack[num].idx;
1250  memcpy (regs, fs->stack[num].regs, sizeof (regmatch_t) * nregs);
1251  re_node_set_free (eps_via_nodes);
1252  re_free (fs->stack[num].regs);
1253  *eps_via_nodes = fs->stack[num].eps_via_nodes;
1254  return fs->stack[num].node;
1255}
1256
1257/* Set the positions where the subexpressions are starts/ends to registers
1258   PMATCH.
1259   Note: We assume that pmatch[0] is already set, and
1260   pmatch[i].rm_so == pmatch[i].rm_eo == -1 (i > 1).  */
1261
1262static reg_errcode_t
1263set_regs (preg, mctx, nmatch, pmatch, fl_backtrack)
1264     const regex_t *preg;
1265     const re_match_context_t *mctx;
1266     size_t nmatch;
1267     regmatch_t *pmatch;
1268     int fl_backtrack;
1269{
1270  re_dfa_t *dfa = (re_dfa_t *)preg->buffer;
1271  int idx, cur_node, real_nmatch;
1272  re_node_set eps_via_nodes;
1273  struct re_fail_stack_t *fs;
1274  struct re_fail_stack_t fs_body;
1275  fs_body.num = 0;
1276  fs_body.alloc = 2;
1277  fs_body.stack = NULL;
1278#ifdef DEBUG
1279  assert (nmatch > 1);
1280  assert (mctx->state_log != NULL);
1281#endif
1282  if (fl_backtrack)
1283    {
1284      fs = &fs_body;
1285      fs->stack = re_malloc (struct re_fail_stack_ent_t, fs->alloc);
1286    }
1287  else
1288    fs = NULL;
1289  cur_node = dfa->init_node;
1290  real_nmatch = (nmatch <= preg->re_nsub) ? nmatch : preg->re_nsub + 1;
1291  re_node_set_init_empty (&eps_via_nodes);
1292  for (idx = pmatch[0].rm_so; idx <= pmatch[0].rm_eo ;)
1293    {
1294      update_regs (dfa, pmatch, cur_node, idx, real_nmatch);
1295      if (idx == pmatch[0].rm_eo && cur_node == mctx->last_node)
1296	{
1297	  int reg_idx;
1298	  if (fs)
1299	    {
1300	      for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
1301		if (pmatch[reg_idx].rm_so > -1 && pmatch[reg_idx].rm_eo == -1)
1302		  break;
1303	      if (reg_idx == nmatch)
1304		{
1305		  re_node_set_free (&eps_via_nodes);
1306		  return free_fail_stack_return (fs);
1307		}
1308	      cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,
1309					 &eps_via_nodes);
1310	    }
1311	  else
1312	    {
1313	      re_node_set_free (&eps_via_nodes);
1314	      return REG_NOERROR;
1315	    }
1316	}
1317
1318      /* Proceed to next node.  */
1319      cur_node = proceed_next_node (preg, nmatch, pmatch, mctx, &idx, cur_node,
1320				    &eps_via_nodes, fs);
1321
1322      if (BE (cur_node < 0, 0))
1323	{
1324	  if (cur_node == -2)
1325	    return REG_ESPACE;
1326	  if (fs)
1327	    cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,
1328				       &eps_via_nodes);
1329	  else
1330	    {
1331	      re_node_set_free (&eps_via_nodes);
1332	      return REG_NOMATCH;
1333	    }
1334	}
1335    }
1336  re_node_set_free (&eps_via_nodes);
1337  return free_fail_stack_return (fs);
1338}
1339
1340static reg_errcode_t
1341free_fail_stack_return (fs)
1342     struct re_fail_stack_t *fs;
1343{
1344  if (fs)
1345    {
1346      int fs_idx;
1347      for (fs_idx = 0; fs_idx < fs->num; ++fs_idx)
1348	{
1349	  re_node_set_free (&fs->stack[fs_idx].eps_via_nodes);
1350	  re_free (fs->stack[fs_idx].regs);
1351	}
1352      re_free (fs->stack);
1353    }
1354  return REG_NOERROR;
1355}
1356
1357static void
1358update_regs (dfa, pmatch, cur_node, cur_idx, nmatch)
1359     re_dfa_t *dfa;
1360     regmatch_t *pmatch;
1361     int cur_node, cur_idx, nmatch;
1362{
1363  int type = dfa->nodes[cur_node].type;
1364  int reg_num;
1365  if (type != OP_OPEN_SUBEXP && type != OP_CLOSE_SUBEXP)
1366    return;
1367  reg_num = dfa->nodes[cur_node].opr.idx + 1;
1368  if (reg_num >= nmatch)
1369    return;
1370  if (type == OP_OPEN_SUBEXP)
1371    {
1372      /* We are at the first node of this sub expression.  */
1373      pmatch[reg_num].rm_so = cur_idx;
1374      pmatch[reg_num].rm_eo = -1;
1375    }
1376  else if (type == OP_CLOSE_SUBEXP)
1377    /* We are at the first node of this sub expression.  */
1378    pmatch[reg_num].rm_eo = cur_idx;
1379}
1380
1381#define NUMBER_OF_STATE 1
1382
1383/* This function checks the STATE_LOG from the SCTX->last_str_idx to 0
1384   and sift the nodes in each states according to the following rules.
1385   Updated state_log will be wrote to STATE_LOG.
1386
1387   Rules: We throw away the Node `a' in the STATE_LOG[STR_IDX] if...
1388     1. When STR_IDX == MATCH_LAST(the last index in the state_log):
1389	If `a' isn't the LAST_NODE and `a' can't epsilon transit to
1390	the LAST_NODE, we throw away the node `a'.
1391     2. When 0 <= STR_IDX < MATCH_LAST and `a' accepts
1392	string `s' and transit to `b':
1393	i. If 'b' isn't in the STATE_LOG[STR_IDX+strlen('s')], we throw
1394	   away the node `a'.
1395	ii. If 'b' is in the STATE_LOG[STR_IDX+strlen('s')] but 'b' is
1396	    throwed away, we throw away the node `a'.
1397     3. When 0 <= STR_IDX < n and 'a' epsilon transit to 'b':
1398	i. If 'b' isn't in the STATE_LOG[STR_IDX], we throw away the
1399	   node `a'.
1400	ii. If 'b' is in the STATE_LOG[STR_IDX] but 'b' is throwed away,
1401	    we throw away the node `a'.  */
1402
1403#define STATE_NODE_CONTAINS(state,node) \
1404  ((state) != NULL && re_node_set_contains (&(state)->nodes, node))
1405
1406static reg_errcode_t
1407sift_states_backward (preg, mctx, sctx)
1408     const regex_t *preg;
1409     re_match_context_t *mctx;
1410     re_sift_context_t *sctx;
1411{
1412  reg_errcode_t err;
1413  re_dfa_t *dfa = (re_dfa_t *)preg->buffer;
1414  int null_cnt = 0;
1415  int str_idx = sctx->last_str_idx;
1416  re_node_set cur_dest;
1417  re_node_set *cur_src; /* Points the state_log[str_idx]->nodes  */
1418
1419#ifdef DEBUG
1420  assert (mctx->state_log != NULL && mctx->state_log[str_idx] != NULL);
1421#endif
1422  cur_src = &mctx->state_log[str_idx]->nodes;
1423
1424  /* Build sifted state_log[str_idx].  It has the nodes which can epsilon
1425     transit to the last_node and the last_node itself.  */
1426  err = re_node_set_init_1 (&cur_dest, sctx->last_node);
1427  if (BE (err != REG_NOERROR, 0))
1428    return err;
1429  err = update_cur_sifted_state (preg, mctx, sctx, str_idx, &cur_dest);
1430  if (BE (err != REG_NOERROR, 0))
1431    goto free_return;
1432
1433  /* Then check each states in the state_log.  */
1434  while (str_idx > 0)
1435    {
1436      int i, ret;
1437      /* Update counters.  */
1438      null_cnt = (sctx->sifted_states[str_idx] == NULL) ? null_cnt + 1 : 0;
1439      if (null_cnt > mctx->max_mb_elem_len)
1440	{
1441	  memset (sctx->sifted_states, '\0',
1442		  sizeof (re_dfastate_t *) * str_idx);
1443	  re_node_set_free (&cur_dest);
1444	  return REG_NOERROR;
1445	}
1446      re_node_set_empty (&cur_dest);
1447      --str_idx;
1448      cur_src = ((mctx->state_log[str_idx] == NULL) ? &empty_set
1449		 : &mctx->state_log[str_idx]->nodes);
1450
1451      /* Then build the next sifted state.
1452	 We build the next sifted state on `cur_dest', and update
1453	 `sifted_states[str_idx]' with `cur_dest'.
1454	 Note:
1455	 `cur_dest' is the sifted state from `state_log[str_idx + 1]'.
1456	 `cur_src' points the node_set of the old `state_log[str_idx]'.  */
1457      for (i = 0; i < cur_src->nelem; i++)
1458	{
1459	  int prev_node = cur_src->elems[i];
1460	  int naccepted = 0;
1461	  re_token_type_t type = dfa->nodes[prev_node].type;
1462
1463	  if (IS_EPSILON_NODE(type))
1464	    continue;
1465#ifdef RE_ENABLE_I18N
1466	  /* If the node may accept `multi byte'.  */
1467	  if (ACCEPT_MB_NODE (type))
1468	    naccepted = sift_states_iter_mb (preg, mctx, sctx, prev_node,
1469					     str_idx, sctx->last_str_idx);
1470
1471#endif /* RE_ENABLE_I18N */
1472	  /* We don't check backreferences here.
1473	     See update_cur_sifted_state().  */
1474
1475	  if (!naccepted
1476	      && check_node_accept (preg, dfa->nodes + prev_node, mctx,
1477				    str_idx)
1478	      && STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + 1],
1479				      dfa->nexts[prev_node]))
1480	    naccepted = 1;
1481
1482	  if (naccepted == 0)
1483	    continue;
1484
1485	  if (sctx->limits.nelem)
1486	    {
1487	      int to_idx = str_idx + naccepted;
1488	      if (check_dst_limits (dfa, &sctx->limits, mctx,
1489				    dfa->nexts[prev_node], to_idx,
1490				    prev_node, str_idx))
1491		continue;
1492	    }
1493	  ret = re_node_set_insert (&cur_dest, prev_node);
1494	  if (BE (ret == -1, 0))
1495	    {
1496	      err = REG_ESPACE;
1497	      goto free_return;
1498	    }
1499	}
1500
1501      /* Add all the nodes which satisfy the following conditions:
1502	 - It can epsilon transit to a node in CUR_DEST.
1503	 - It is in CUR_SRC.
1504	 And update state_log.  */
1505      err = update_cur_sifted_state (preg, mctx, sctx, str_idx, &cur_dest);
1506      if (BE (err != REG_NOERROR, 0))
1507	goto free_return;
1508    }
1509  err = REG_NOERROR;
1510 free_return:
1511  re_node_set_free (&cur_dest);
1512  return err;
1513}
1514
1515/* Helper functions.  */
1516
1517static inline reg_errcode_t
1518clean_state_log_if_need (mctx, next_state_log_idx)
1519    re_match_context_t *mctx;
1520    int next_state_log_idx;
1521{
1522  int top = mctx->state_log_top;
1523
1524  if (next_state_log_idx >= mctx->input->bufs_len
1525      || (next_state_log_idx >= mctx->input->valid_len
1526	  && mctx->input->valid_len < mctx->input->len))
1527    {
1528      reg_errcode_t err;
1529      err = extend_buffers (mctx);
1530      if (BE (err != REG_NOERROR, 0))
1531	return err;
1532    }
1533
1534  if (top < next_state_log_idx)
1535    {
1536      memset (mctx->state_log + top + 1, '\0',
1537	      sizeof (re_dfastate_t *) * (next_state_log_idx - top));
1538      mctx->state_log_top = next_state_log_idx;
1539    }
1540  return REG_NOERROR;
1541}
1542
1543static reg_errcode_t
1544merge_state_array (dfa, dst, src, num)
1545     re_dfa_t *dfa;
1546     re_dfastate_t **dst;
1547     re_dfastate_t **src;
1548     int num;
1549{
1550  int st_idx;
1551  reg_errcode_t err;
1552  for (st_idx = 0; st_idx < num; ++st_idx)
1553    {
1554      if (dst[st_idx] == NULL)
1555	dst[st_idx] = src[st_idx];
1556      else if (src[st_idx] != NULL)
1557	{
1558	  re_node_set merged_set;
1559	  err = re_node_set_init_union (&merged_set, &dst[st_idx]->nodes,
1560					&src[st_idx]->nodes);
1561	  if (BE (err != REG_NOERROR, 0))
1562	    return err;
1563	  dst[st_idx] = re_acquire_state (&err, dfa, &merged_set);
1564	  re_node_set_free (&merged_set);
1565	  if (BE (err != REG_NOERROR, 0))
1566	    return err;
1567	}
1568    }
1569  return REG_NOERROR;
1570}
1571
1572static reg_errcode_t
1573update_cur_sifted_state (preg, mctx, sctx, str_idx, dest_nodes)
1574     const regex_t *preg;
1575     re_match_context_t *mctx;
1576     re_sift_context_t *sctx;
1577     int str_idx;
1578     re_node_set *dest_nodes;
1579{
1580  reg_errcode_t err;
1581  re_dfa_t *dfa = (re_dfa_t *)preg->buffer;
1582  const re_node_set *candidates;
1583  candidates = ((mctx->state_log[str_idx] == NULL) ? &empty_set
1584		: &mctx->state_log[str_idx]->nodes);
1585
1586  /* At first, add the nodes which can epsilon transit to a node in
1587     DEST_NODE.  */
1588  if (dest_nodes->nelem)
1589    {
1590      err = add_epsilon_src_nodes (dfa, dest_nodes, candidates);
1591      if (BE (err != REG_NOERROR, 0))
1592	return err;
1593    }
1594
1595  /* Then, check the limitations in the current sift_context.  */
1596  if (dest_nodes->nelem && sctx->limits.nelem)
1597    {
1598      err = check_subexp_limits (dfa, dest_nodes, candidates, &sctx->limits,
1599				 mctx->bkref_ents, str_idx);
1600      if (BE (err != REG_NOERROR, 0))
1601	return err;
1602    }
1603
1604  /* Update state_log.  */
1605  sctx->sifted_states[str_idx] = re_acquire_state (&err, dfa, dest_nodes);
1606  if (BE (sctx->sifted_states[str_idx] == NULL && err != REG_NOERROR, 0))
1607    return err;
1608
1609  if ((mctx->state_log[str_idx] != NULL
1610       && mctx->state_log[str_idx]->has_backref))
1611    {
1612      err = sift_states_bkref (preg, mctx, sctx, str_idx, dest_nodes);
1613      if (BE (err != REG_NOERROR, 0))
1614	return err;
1615    }
1616  return REG_NOERROR;
1617}
1618
1619static reg_errcode_t
1620add_epsilon_src_nodes (dfa, dest_nodes, candidates)
1621     re_dfa_t *dfa;
1622     re_node_set *dest_nodes;
1623     const re_node_set *candidates;
1624{
1625  reg_errcode_t err;
1626  int src_idx;
1627  re_node_set src_copy;
1628
1629  err = re_node_set_init_copy (&src_copy, dest_nodes);
1630  if (BE (err != REG_NOERROR, 0))
1631    return err;
1632  for (src_idx = 0; src_idx < src_copy.nelem; ++src_idx)
1633    {
1634      err = re_node_set_add_intersect (dest_nodes, candidates,
1635				       dfa->inveclosures
1636				       + src_copy.elems[src_idx]);
1637      if (BE (err != REG_NOERROR, 0))
1638	{
1639	  re_node_set_free (&src_copy);
1640	  return err;
1641	}
1642    }
1643  re_node_set_free (&src_copy);
1644  return REG_NOERROR;
1645}
1646
1647static reg_errcode_t
1648sub_epsilon_src_nodes (dfa, node, dest_nodes, candidates)
1649     re_dfa_t *dfa;
1650     int node;
1651     re_node_set *dest_nodes;
1652     const re_node_set *candidates;
1653{
1654    int ecl_idx;
1655    reg_errcode_t err;
1656    re_node_set *inv_eclosure = dfa->inveclosures + node;
1657    re_node_set except_nodes;
1658    re_node_set_init_empty (&except_nodes);
1659    for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx)
1660      {
1661	int cur_node = inv_eclosure->elems[ecl_idx];
1662	if (cur_node == node)
1663	  continue;
1664	if (IS_EPSILON_NODE (dfa->nodes[cur_node].type))
1665	  {
1666	    int edst1 = dfa->edests[cur_node].elems[0];
1667	    int edst2 = ((dfa->edests[cur_node].nelem > 1)
1668			 ? dfa->edests[cur_node].elems[1] : -1);
1669	    if ((!re_node_set_contains (inv_eclosure, edst1)
1670		 && re_node_set_contains (dest_nodes, edst1))
1671		|| (edst2 > 0
1672		    && !re_node_set_contains (inv_eclosure, edst2)
1673		    && re_node_set_contains (dest_nodes, edst2)))
1674	      {
1675		err = re_node_set_add_intersect (&except_nodes, candidates,
1676						 dfa->inveclosures + cur_node);
1677		if (BE (err != REG_NOERROR, 0))
1678		  {
1679		    re_node_set_free (&except_nodes);
1680		    return err;
1681		  }
1682	      }
1683	  }
1684      }
1685    for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx)
1686      {
1687	int cur_node = inv_eclosure->elems[ecl_idx];
1688	if (!re_node_set_contains (&except_nodes, cur_node))
1689	  {
1690	    int idx = re_node_set_contains (dest_nodes, cur_node) - 1;
1691	    re_node_set_remove_at (dest_nodes, idx);
1692	  }
1693      }
1694    re_node_set_free (&except_nodes);
1695    return REG_NOERROR;
1696}
1697
1698static int
1699check_dst_limits (dfa, limits, mctx, dst_node, dst_idx, src_node, src_idx)
1700     re_dfa_t *dfa;
1701     re_node_set *limits;
1702     re_match_context_t *mctx;
1703     int dst_node, dst_idx, src_node, src_idx;
1704{
1705  int lim_idx, src_pos, dst_pos;
1706
1707  for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx)
1708    {
1709      int subexp_idx;
1710      struct re_backref_cache_entry *ent;
1711      ent = mctx->bkref_ents + limits->elems[lim_idx];
1712      subexp_idx = dfa->nodes[ent->node].opr.idx - 1;
1713
1714      dst_pos = check_dst_limits_calc_pos (dfa, mctx, limits->elems[lim_idx],
1715					   dfa->eclosures + dst_node,
1716					   subexp_idx, dst_node, dst_idx);
1717      src_pos = check_dst_limits_calc_pos (dfa, mctx, limits->elems[lim_idx],
1718					   dfa->eclosures + src_node,
1719					   subexp_idx, src_node, src_idx);
1720
1721      /* In case of:
1722	 <src> <dst> ( <subexp> )
1723	 ( <subexp> ) <src> <dst>
1724	 ( <subexp1> <src> <subexp2> <dst> <subexp3> )  */
1725      if (src_pos == dst_pos)
1726	continue; /* This is unrelated limitation.  */
1727      else
1728	return 1;
1729    }
1730  return 0;
1731}
1732
1733static int
1734check_dst_limits_calc_pos (dfa, mctx, limit, eclosures, subexp_idx, node,
1735			   str_idx)
1736     re_dfa_t *dfa;
1737     re_match_context_t *mctx;
1738     re_node_set *eclosures;
1739     int limit, subexp_idx, node, str_idx;
1740{
1741  struct re_backref_cache_entry *lim = mctx->bkref_ents + limit;
1742  int pos = (str_idx < lim->subexp_from ? -1
1743	     : (lim->subexp_to < str_idx ? 1 : 0));
1744  if (pos == 0
1745      && (str_idx == lim->subexp_from || str_idx == lim->subexp_to))
1746    {
1747      int node_idx;
1748      for (node_idx = 0; node_idx < eclosures->nelem; ++node_idx)
1749	{
1750	  int node = eclosures->elems[node_idx];
1751	  re_token_type_t type= dfa->nodes[node].type;
1752	  if (type == OP_BACK_REF)
1753	    {
1754	      int bi = search_cur_bkref_entry (mctx, str_idx);
1755	      for (; bi < mctx->nbkref_ents; ++bi)
1756		{
1757		  struct re_backref_cache_entry *ent = mctx->bkref_ents + bi;
1758		  if (ent->str_idx > str_idx)
1759		    break;
1760		  if (ent->node == node && ent->subexp_from == ent->subexp_to)
1761		    {
1762		      int cpos, dst;
1763		      dst = dfa->edests[node].elems[0];
1764		      cpos = check_dst_limits_calc_pos (dfa, mctx, limit,
1765							dfa->eclosures + dst,
1766							subexp_idx, dst,
1767							str_idx);
1768		      if ((str_idx == lim->subexp_from && cpos == -1)
1769			  || (str_idx == lim->subexp_to && cpos == 0))
1770			return cpos;
1771		    }
1772		}
1773	    }
1774	  if (type == OP_OPEN_SUBEXP && subexp_idx == dfa->nodes[node].opr.idx
1775	      && str_idx == lim->subexp_from)
1776	    {
1777	      pos = -1;
1778	      break;
1779	    }
1780	  if (type == OP_CLOSE_SUBEXP && subexp_idx == dfa->nodes[node].opr.idx
1781	      && str_idx == lim->subexp_to)
1782	    break;
1783	}
1784      if (node_idx == eclosures->nelem && str_idx == lim->subexp_to)
1785	pos = 1;
1786    }
1787  return pos;
1788}
1789
1790/* Check the limitations of sub expressions LIMITS, and remove the nodes
1791   which are against limitations from DEST_NODES. */
1792
1793static reg_errcode_t
1794check_subexp_limits (dfa, dest_nodes, candidates, limits, bkref_ents, str_idx)
1795     re_dfa_t *dfa;
1796     re_node_set *dest_nodes;
1797     const re_node_set *candidates;
1798     re_node_set *limits;
1799     struct re_backref_cache_entry *bkref_ents;
1800     int str_idx;
1801{
1802  reg_errcode_t err;
1803  int node_idx, lim_idx;
1804
1805  for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx)
1806    {
1807      int subexp_idx;
1808      struct re_backref_cache_entry *ent;
1809      ent = bkref_ents + limits->elems[lim_idx];
1810
1811      if (str_idx <= ent->subexp_from || ent->str_idx < str_idx)
1812	continue; /* This is unrelated limitation.  */
1813
1814      subexp_idx = dfa->nodes[ent->node].opr.idx - 1;
1815      if (ent->subexp_to == str_idx)
1816	{
1817	  int ops_node = -1;
1818	  int cls_node = -1;
1819	  for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
1820	    {
1821	      int node = dest_nodes->elems[node_idx];
1822	      re_token_type_t type= dfa->nodes[node].type;
1823	      if (type == OP_OPEN_SUBEXP
1824		  && subexp_idx == dfa->nodes[node].opr.idx)
1825		ops_node = node;
1826	      else if (type == OP_CLOSE_SUBEXP
1827		       && subexp_idx == dfa->nodes[node].opr.idx)
1828		cls_node = node;
1829	    }
1830
1831	  /* Check the limitation of the open subexpression.  */
1832	  /* Note that (ent->subexp_to = str_idx != ent->subexp_from).  */
1833	  if (ops_node >= 0)
1834	    {
1835	      err = sub_epsilon_src_nodes(dfa, ops_node, dest_nodes,
1836					  candidates);
1837	      if (BE (err != REG_NOERROR, 0))
1838		return err;
1839	    }
1840	  /* Check the limitation of the close subexpression.  */
1841	  for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
1842	    {
1843	      int node = dest_nodes->elems[node_idx];
1844	      if (!re_node_set_contains (dfa->inveclosures + node, cls_node)
1845		  && !re_node_set_contains (dfa->eclosures + node, cls_node))
1846		{
1847		  /* It is against this limitation.
1848		     Remove it form the current sifted state.  */
1849		  err = sub_epsilon_src_nodes(dfa, node, dest_nodes,
1850					      candidates);
1851		  if (BE (err != REG_NOERROR, 0))
1852		    return err;
1853		  --node_idx;
1854		}
1855	    }
1856	}
1857      else /* (ent->subexp_to != str_idx)  */
1858	{
1859	  for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
1860	    {
1861	      int node = dest_nodes->elems[node_idx];
1862	      re_token_type_t type= dfa->nodes[node].type;
1863	      if (type == OP_CLOSE_SUBEXP || type == OP_OPEN_SUBEXP)
1864		{
1865		  if (subexp_idx != dfa->nodes[node].opr.idx)
1866		    continue;
1867		  if ((type == OP_CLOSE_SUBEXP && ent->subexp_to != str_idx)
1868		      || (type == OP_OPEN_SUBEXP))
1869		    {
1870		      /* It is against this limitation.
1871			 Remove it form the current sifted state.  */
1872		      err = sub_epsilon_src_nodes(dfa, node, dest_nodes,
1873						  candidates);
1874		      if (BE (err != REG_NOERROR, 0))
1875			return err;
1876		    }
1877		}
1878	    }
1879	}
1880    }
1881  return REG_NOERROR;
1882}
1883
1884static reg_errcode_t
1885sift_states_bkref (preg, mctx, sctx, str_idx, dest_nodes)
1886     const regex_t *preg;
1887     re_match_context_t *mctx;
1888     re_sift_context_t *sctx;
1889     int str_idx;
1890     re_node_set *dest_nodes;
1891{
1892  reg_errcode_t err;
1893  re_dfa_t *dfa = (re_dfa_t *)preg->buffer;
1894  int node_idx, node;
1895  re_sift_context_t local_sctx;
1896  const re_node_set *candidates;
1897  candidates = ((mctx->state_log[str_idx] == NULL) ? &empty_set
1898		: &mctx->state_log[str_idx]->nodes);
1899  local_sctx.sifted_states = NULL; /* Mark that it hasn't been initialized.  */
1900
1901  for (node_idx = 0; node_idx < candidates->nelem; ++node_idx)
1902    {
1903      int cur_bkref_idx = re_string_cur_idx (mctx->input);
1904      re_token_type_t type;
1905      node = candidates->elems[node_idx];
1906      type = dfa->nodes[node].type;
1907      if (node == sctx->cur_bkref && str_idx == cur_bkref_idx)
1908	continue;
1909      /* Avoid infinite loop for the REs like "()\1+".  */
1910      if (node == sctx->last_node && str_idx == sctx->last_str_idx)
1911	continue;
1912      if (type == OP_BACK_REF)
1913	{
1914	  int enabled_idx = search_cur_bkref_entry (mctx, str_idx);
1915	  for (; enabled_idx < mctx->nbkref_ents; ++enabled_idx)
1916	    {
1917	      int disabled_idx, subexp_len, to_idx, dst_node;
1918	      struct re_backref_cache_entry *entry;
1919	      entry = mctx->bkref_ents + enabled_idx;
1920	      if (entry->str_idx > str_idx)
1921		break;
1922	      if (entry->node != node)
1923		  continue;
1924	      subexp_len = entry->subexp_to - entry->subexp_from;
1925	      to_idx = str_idx + subexp_len;
1926	      dst_node = (subexp_len ? dfa->nexts[node]
1927			  : dfa->edests[node].elems[0]);
1928
1929	      if (to_idx > sctx->last_str_idx
1930		  || sctx->sifted_states[to_idx] == NULL
1931		  || !STATE_NODE_CONTAINS (sctx->sifted_states[to_idx],
1932					   dst_node)
1933		  || check_dst_limits (dfa, &sctx->limits, mctx, node,
1934				       str_idx, dst_node, to_idx))
1935		continue;
1936		{
1937		  re_dfastate_t *cur_state;
1938		  entry->flag = 0;
1939		  for (disabled_idx = enabled_idx + 1;
1940		       disabled_idx < mctx->nbkref_ents; ++disabled_idx)
1941		    {
1942		      struct re_backref_cache_entry *entry2;
1943		      entry2 = mctx->bkref_ents + disabled_idx;
1944		      if (entry2->str_idx > str_idx)
1945			break;
1946		      entry2->flag = (entry2->node == node) ? 1 : entry2->flag;
1947		    }
1948
1949		  if (local_sctx.sifted_states == NULL)
1950		    {
1951		      local_sctx = *sctx;
1952		      err = re_node_set_init_copy (&local_sctx.limits,
1953						   &sctx->limits);
1954		      if (BE (err != REG_NOERROR, 0))
1955			goto free_return;
1956		    }
1957		  local_sctx.last_node = node;
1958		  local_sctx.last_str_idx = str_idx;
1959		  err = re_node_set_insert (&local_sctx.limits, enabled_idx);
1960		  if (BE (err < 0, 0))
1961		    {
1962		      err = REG_ESPACE;
1963		      goto free_return;
1964		    }
1965		  cur_state = local_sctx.sifted_states[str_idx];
1966		  err = sift_states_backward (preg, mctx, &local_sctx);
1967		  if (BE (err != REG_NOERROR, 0))
1968		    goto free_return;
1969		  if (sctx->limited_states != NULL)
1970		    {
1971		      err = merge_state_array (dfa, sctx->limited_states,
1972					       local_sctx.sifted_states,
1973					       str_idx + 1);
1974		      if (BE (err != REG_NOERROR, 0))
1975			goto free_return;
1976		    }
1977		  local_sctx.sifted_states[str_idx] = cur_state;
1978		  re_node_set_remove (&local_sctx.limits, enabled_idx);
1979		  /* We must not use the variable entry here, since
1980		     mctx->bkref_ents might be realloced.  */
1981		  mctx->bkref_ents[enabled_idx].flag = 1;
1982		}
1983	    }
1984	  enabled_idx = search_cur_bkref_entry (mctx, str_idx);
1985	  for (; enabled_idx < mctx->nbkref_ents; ++enabled_idx)
1986	    {
1987	      struct re_backref_cache_entry *entry;
1988	      entry = mctx->bkref_ents + enabled_idx;
1989	      if (entry->str_idx > str_idx)
1990		break;
1991	      if (entry->node == node)
1992		entry->flag = 0;
1993	    }
1994	}
1995    }
1996  err = REG_NOERROR;
1997 free_return:
1998  if (local_sctx.sifted_states != NULL)
1999    {
2000      re_node_set_free (&local_sctx.limits);
2001    }
2002
2003  return err;
2004}
2005
2006
2007#ifdef RE_ENABLE_I18N
2008static int
2009sift_states_iter_mb (preg, mctx, sctx, node_idx, str_idx, max_str_idx)
2010    const regex_t *preg;
2011    const re_match_context_t *mctx;
2012    re_sift_context_t *sctx;
2013    int node_idx, str_idx, max_str_idx;
2014{
2015  re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
2016  int naccepted;
2017  /* Check the node can accept `multi byte'.  */
2018  naccepted = check_node_accept_bytes (preg, node_idx, mctx->input, str_idx);
2019  if (naccepted > 0 && str_idx + naccepted <= max_str_idx &&
2020      !STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + naccepted],
2021			    dfa->nexts[node_idx]))
2022    /* The node can't accept the `multi byte', or the
2023       destination was already throwed away, then the node
2024       could't accept the current input `multi byte'.   */
2025    naccepted = 0;
2026  /* Otherwise, it is sure that the node could accept
2027     `naccepted' bytes input.  */
2028  return naccepted;
2029}
2030#endif /* RE_ENABLE_I18N */
2031
2032
2033/* Functions for state transition.  */
2034
2035/* Return the next state to which the current state STATE will transit by
2036   accepting the current input byte, and update STATE_LOG if necessary.
2037   If STATE can accept a multibyte char/collating element/back reference
2038   update the destination of STATE_LOG.  */
2039
2040static re_dfastate_t *
2041transit_state (err, preg, mctx, state, fl_search)
2042     reg_errcode_t *err;
2043     const regex_t *preg;
2044     re_match_context_t *mctx;
2045     re_dfastate_t *state;
2046     int fl_search;
2047{
2048  re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
2049  re_dfastate_t **trtable, *next_state;
2050  unsigned char ch;
2051  int cur_idx;
2052
2053  if (re_string_cur_idx (mctx->input) + 1 >= mctx->input->bufs_len
2054      || (re_string_cur_idx (mctx->input) + 1 >= mctx->input->valid_len
2055	  && mctx->input->valid_len < mctx->input->len))
2056    {
2057      *err = extend_buffers (mctx);
2058      if (BE (*err != REG_NOERROR, 0))
2059	return NULL;
2060    }
2061
2062  *err = REG_NOERROR;
2063  if (state == NULL)
2064    {
2065      next_state = state;
2066      re_string_skip_bytes (mctx->input, 1);
2067    }
2068  else
2069    {
2070#ifdef RE_ENABLE_I18N
2071      /* If the current state can accept multibyte.  */
2072      if (state->accept_mb)
2073	{
2074	  *err = transit_state_mb (preg, state, mctx);
2075	  if (BE (*err != REG_NOERROR, 0))
2076	    return NULL;
2077	}
2078#endif /* RE_ENABLE_I18N */
2079
2080      /* Then decide the next state with the single byte.  */
2081      if (1)
2082	{
2083	  /* Use transition table  */
2084	  ch = re_string_fetch_byte (mctx->input);
2085	  trtable = fl_search ? state->trtable_search : state->trtable;
2086	  if (trtable == NULL)
2087	    {
2088	      trtable = build_trtable (preg, state, fl_search);
2089	      if (fl_search)
2090		state->trtable_search = trtable;
2091	      else
2092		state->trtable = trtable;
2093	    }
2094	  next_state = trtable[ch];
2095	}
2096      else
2097	{
2098	  /* don't use transition table  */
2099	  next_state = transit_state_sb (err, preg, state, fl_search, mctx);
2100	  if (BE (next_state == NULL && err != REG_NOERROR, 0))
2101	    return NULL;
2102	}
2103    }
2104
2105  cur_idx = re_string_cur_idx (mctx->input);
2106  /* Update the state_log if we need.  */
2107  if (mctx->state_log != NULL)
2108    {
2109      if (cur_idx > mctx->state_log_top)
2110	{
2111	  mctx->state_log[cur_idx] = next_state;
2112	  mctx->state_log_top = cur_idx;
2113	}
2114      else if (mctx->state_log[cur_idx] == 0)
2115	{
2116	  mctx->state_log[cur_idx] = next_state;
2117	}
2118      else
2119	{
2120	  re_dfastate_t *pstate;
2121	  unsigned int context;
2122	  re_node_set next_nodes, *log_nodes, *table_nodes = NULL;
2123	  /* If (state_log[cur_idx] != 0), it implies that cur_idx is
2124	     the destination of a multibyte char/collating element/
2125	     back reference.  Then the next state is the union set of
2126	     these destinations and the results of the transition table.  */
2127	  pstate = mctx->state_log[cur_idx];
2128	  log_nodes = pstate->entrance_nodes;
2129	  if (next_state != NULL)
2130	    {
2131	      table_nodes = next_state->entrance_nodes;
2132	      *err = re_node_set_init_union (&next_nodes, table_nodes,
2133					     log_nodes);
2134	      if (BE (*err != REG_NOERROR, 0))
2135		return NULL;
2136	    }
2137	  else
2138	    next_nodes = *log_nodes;
2139	  /* Note: We already add the nodes of the initial state,
2140		   then we don't need to add them here.  */
2141
2142	  context = re_string_context_at (mctx->input,
2143					  re_string_cur_idx (mctx->input) - 1,
2144					  mctx->eflags, preg->newline_anchor);
2145	  next_state = mctx->state_log[cur_idx]
2146	    = re_acquire_state_context (err, dfa, &next_nodes, context);
2147	  /* We don't need to check errors here, since the return value of
2148	     this function is next_state and ERR is already set.  */
2149
2150	  if (table_nodes != NULL)
2151	    re_node_set_free (&next_nodes);
2152	}
2153    }
2154
2155  /* Check OP_OPEN_SUBEXP in the current state in case that we use them
2156     later.  We must check them here, since the back references in the
2157     next state might use them.  */
2158  if (dfa->nbackref && next_state/* && fl_process_bkref */)
2159    {
2160      *err = check_subexp_matching_top (dfa, mctx, &next_state->nodes,
2161					cur_idx);
2162      if (BE (*err != REG_NOERROR, 0))
2163	return NULL;
2164    }
2165
2166  /* If the next state has back references.  */
2167  if (next_state != NULL && next_state->has_backref)
2168    {
2169      *err = transit_state_bkref (preg, &next_state->nodes, mctx);
2170      if (BE (*err != REG_NOERROR, 0))
2171	return NULL;
2172      next_state = mctx->state_log[cur_idx];
2173    }
2174  return next_state;
2175}
2176
2177/* Helper functions for transit_state.  */
2178
2179/* From the node set CUR_NODES, pick up the nodes whose types are
2180   OP_OPEN_SUBEXP and which have corresponding back references in the regular
2181   expression. And register them to use them later for evaluating the
2182   correspoding back references.  */
2183
2184static reg_errcode_t
2185check_subexp_matching_top (dfa, mctx, cur_nodes, str_idx)
2186     re_dfa_t *dfa;
2187     re_match_context_t *mctx;
2188     re_node_set *cur_nodes;
2189     int str_idx;
2190{
2191  int node_idx;
2192  reg_errcode_t err;
2193
2194  /* TODO: This isn't efficient.
2195	   Because there might be more than one nodes whose types are
2196	   OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all
2197	   nodes.
2198	   E.g. RE: (a){2}  */
2199  for (node_idx = 0; node_idx < cur_nodes->nelem; ++node_idx)
2200    {
2201      int node = cur_nodes->elems[node_idx];
2202      if (dfa->nodes[node].type == OP_OPEN_SUBEXP
2203	  && dfa->used_bkref_map & (1 << dfa->nodes[node].opr.idx))
2204	{
2205	  err = match_ctx_add_subtop (mctx, node, str_idx);
2206	  if (BE (err != REG_NOERROR, 0))
2207	    return err;
2208	}
2209    }
2210  return REG_NOERROR;
2211}
2212
2213/* Return the next state to which the current state STATE will transit by
2214   accepting the current input byte.  */
2215
2216static re_dfastate_t *
2217transit_state_sb (err, preg, state, fl_search, mctx)
2218     reg_errcode_t *err;
2219     const regex_t *preg;
2220     re_dfastate_t *state;
2221     int fl_search;
2222     re_match_context_t *mctx;
2223{
2224  re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
2225  re_node_set next_nodes;
2226  re_dfastate_t *next_state;
2227  int node_cnt, cur_str_idx = re_string_cur_idx (mctx->input);
2228  unsigned int context;
2229
2230  *err = re_node_set_alloc (&next_nodes, state->nodes.nelem + 1);
2231  if (BE (*err != REG_NOERROR, 0))
2232    return NULL;
2233  for (node_cnt = 0; node_cnt < state->nodes.nelem; ++node_cnt)
2234    {
2235      int cur_node = state->nodes.elems[node_cnt];
2236      if (check_node_accept (preg, dfa->nodes + cur_node, mctx, cur_str_idx))
2237	{
2238	  *err = re_node_set_merge (&next_nodes,
2239				    dfa->eclosures + dfa->nexts[cur_node]);
2240	  if (BE (*err != REG_NOERROR, 0))
2241	    {
2242	      re_node_set_free (&next_nodes);
2243	      return NULL;
2244	    }
2245	}
2246    }
2247  if (fl_search)
2248    {
2249#ifdef RE_ENABLE_I18N
2250      int not_initial = 0;
2251      if (re_mb_cur_max > 1)
2252	for (node_cnt = 0; node_cnt < next_nodes.nelem; ++node_cnt)
2253	  if (dfa->nodes[next_nodes.elems[node_cnt]].type == CHARACTER)
2254	    {
2255	      not_initial = dfa->nodes[next_nodes.elems[node_cnt]].mb_partial;
2256	      break;
2257	    }
2258      if (!not_initial)
2259#endif
2260	{
2261	  *err = re_node_set_merge (&next_nodes,
2262				    dfa->init_state->entrance_nodes);
2263	  if (BE (*err != REG_NOERROR, 0))
2264	    {
2265	      re_node_set_free (&next_nodes);
2266	      return NULL;
2267	    }
2268	}
2269    }
2270  context = re_string_context_at (mctx->input, cur_str_idx, mctx->eflags,
2271				  preg->newline_anchor);
2272  next_state = re_acquire_state_context (err, dfa, &next_nodes, context);
2273  /* We don't need to check errors here, since the return value of
2274     this function is next_state and ERR is already set.  */
2275
2276  re_node_set_free (&next_nodes);
2277  re_string_skip_bytes (mctx->input, 1);
2278  return next_state;
2279}
2280
2281#ifdef RE_ENABLE_I18N
2282static reg_errcode_t
2283transit_state_mb (preg, pstate, mctx)
2284    const regex_t *preg;
2285    re_dfastate_t *pstate;
2286    re_match_context_t *mctx;
2287{
2288  reg_errcode_t err;
2289  re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
2290  int i;
2291
2292  for (i = 0; i < pstate->nodes.nelem; ++i)
2293    {
2294      re_node_set dest_nodes, *new_nodes;
2295      int cur_node_idx = pstate->nodes.elems[i];
2296      int naccepted = 0, dest_idx;
2297      unsigned int context;
2298      re_dfastate_t *dest_state;
2299
2300      if (dfa->nodes[cur_node_idx].constraint)
2301	{
2302	  context = re_string_context_at (mctx->input,
2303					  re_string_cur_idx (mctx->input),
2304					  mctx->eflags, preg->newline_anchor);
2305	  if (NOT_SATISFY_NEXT_CONSTRAINT (dfa->nodes[cur_node_idx].constraint,
2306					   context))
2307	    continue;
2308	}
2309
2310      /* How many bytes the node can accepts?  */
2311      if (ACCEPT_MB_NODE (dfa->nodes[cur_node_idx].type))
2312	naccepted = check_node_accept_bytes (preg, cur_node_idx, mctx->input,
2313					     re_string_cur_idx (mctx->input));
2314      if (naccepted == 0)
2315	continue;
2316
2317      /* The node can accepts `naccepted' bytes.  */
2318      dest_idx = re_string_cur_idx (mctx->input) + naccepted;
2319      mctx->max_mb_elem_len = ((mctx->max_mb_elem_len < naccepted) ? naccepted
2320			       : mctx->max_mb_elem_len);
2321      err = clean_state_log_if_need (mctx, dest_idx);
2322      if (BE (err != REG_NOERROR, 0))
2323	return err;
2324#ifdef DEBUG
2325      assert (dfa->nexts[cur_node_idx] != -1);
2326#endif
2327      /* `cur_node_idx' may point the entity of the OP_CONTEXT_NODE,
2328	 then we use pstate->nodes.elems[i] instead.  */
2329      new_nodes = dfa->eclosures + dfa->nexts[pstate->nodes.elems[i]];
2330
2331      dest_state = mctx->state_log[dest_idx];
2332      if (dest_state == NULL)
2333	dest_nodes = *new_nodes;
2334      else
2335	{
2336	  err = re_node_set_init_union (&dest_nodes,
2337					dest_state->entrance_nodes, new_nodes);
2338	  if (BE (err != REG_NOERROR, 0))
2339	    return err;
2340	}
2341      context = re_string_context_at (mctx->input, dest_idx - 1, mctx->eflags,
2342				      preg->newline_anchor);
2343      mctx->state_log[dest_idx]
2344	= re_acquire_state_context (&err, dfa, &dest_nodes, context);
2345      if (dest_state != NULL)
2346	re_node_set_free (&dest_nodes);
2347      if (BE (mctx->state_log[dest_idx] == NULL && err != REG_NOERROR, 0))
2348	return err;
2349    }
2350  return REG_NOERROR;
2351}
2352#endif /* RE_ENABLE_I18N */
2353
2354static reg_errcode_t
2355transit_state_bkref (preg, nodes, mctx)
2356    const regex_t *preg;
2357    re_node_set *nodes;
2358    re_match_context_t *mctx;
2359{
2360  reg_errcode_t err;
2361  re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
2362  int i;
2363  int cur_str_idx = re_string_cur_idx (mctx->input);
2364
2365  for (i = 0; i < nodes->nelem; ++i)
2366    {
2367      int dest_str_idx, prev_nelem, bkc_idx;
2368      int node_idx = nodes->elems[i];
2369      unsigned int context;
2370      re_token_t *node = dfa->nodes + node_idx;
2371      re_node_set *new_dest_nodes;
2372
2373      /* Check whether `node' is a backreference or not.  */
2374      if (node->type != OP_BACK_REF)
2375	continue;
2376
2377      if (node->constraint)
2378	{
2379	  context = re_string_context_at (mctx->input, cur_str_idx,
2380					  mctx->eflags, preg->newline_anchor);
2381	  if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context))
2382	    continue;
2383	}
2384
2385      /* `node' is a backreference.
2386	 Check the substring which the substring matched.  */
2387      bkc_idx = mctx->nbkref_ents;
2388      err = get_subexp (preg, mctx, node_idx, cur_str_idx);
2389      if (BE (err != REG_NOERROR, 0))
2390	goto free_return;
2391
2392      /* And add the epsilon closures (which is `new_dest_nodes') of
2393	 the backreference to appropriate state_log.  */
2394#ifdef DEBUG
2395      assert (dfa->nexts[node_idx] != -1);
2396#endif
2397      for (; bkc_idx < mctx->nbkref_ents; ++bkc_idx)
2398	{
2399	  int subexp_len;
2400	  re_dfastate_t *dest_state;
2401	  struct re_backref_cache_entry *bkref_ent;
2402	  bkref_ent = mctx->bkref_ents + bkc_idx;
2403	  if (bkref_ent->node != node_idx || bkref_ent->str_idx != cur_str_idx)
2404	    continue;
2405	  subexp_len = bkref_ent->subexp_to - bkref_ent->subexp_from;
2406	  new_dest_nodes = (subexp_len == 0
2407			    ? dfa->eclosures + dfa->edests[node_idx].elems[0]
2408			    : dfa->eclosures + dfa->nexts[node_idx]);
2409	  dest_str_idx = (cur_str_idx + bkref_ent->subexp_to
2410			  - bkref_ent->subexp_from);
2411	  context = re_string_context_at (mctx->input, dest_str_idx - 1,
2412					  mctx->eflags, preg->newline_anchor);
2413	  dest_state = mctx->state_log[dest_str_idx];
2414	  prev_nelem = ((mctx->state_log[cur_str_idx] == NULL) ? 0
2415			: mctx->state_log[cur_str_idx]->nodes.nelem);
2416	  /* Add `new_dest_node' to state_log.  */
2417	  if (dest_state == NULL)
2418	    {
2419	      mctx->state_log[dest_str_idx]
2420		= re_acquire_state_context (&err, dfa, new_dest_nodes,
2421					    context);
2422	      if (BE (mctx->state_log[dest_str_idx] == NULL
2423		      && err != REG_NOERROR, 0))
2424		goto free_return;
2425	    }
2426	  else
2427	    {
2428	      re_node_set dest_nodes;
2429	      err = re_node_set_init_union (&dest_nodes,
2430					    dest_state->entrance_nodes,
2431					    new_dest_nodes);
2432	      if (BE (err != REG_NOERROR, 0))
2433		{
2434		  re_node_set_free (&dest_nodes);
2435		  goto free_return;
2436		}
2437	      mctx->state_log[dest_str_idx]
2438		= re_acquire_state_context (&err, dfa, &dest_nodes, context);
2439	      re_node_set_free (&dest_nodes);
2440	      if (BE (mctx->state_log[dest_str_idx] == NULL
2441		      && err != REG_NOERROR, 0))
2442		goto free_return;
2443	    }
2444	  /* We need to check recursively if the backreference can epsilon
2445	     transit.  */
2446	  if (subexp_len == 0
2447	      && mctx->state_log[cur_str_idx]->nodes.nelem > prev_nelem)
2448	    {
2449	      err = check_subexp_matching_top (dfa, mctx, new_dest_nodes,
2450					       cur_str_idx);
2451	      if (BE (err != REG_NOERROR, 0))
2452		goto free_return;
2453	      err = transit_state_bkref (preg, new_dest_nodes, mctx);
2454	      if (BE (err != REG_NOERROR, 0))
2455		goto free_return;
2456	    }
2457	}
2458    }
2459  err = REG_NOERROR;
2460 free_return:
2461  return err;
2462}
2463
2464/* Enumerate all the candidates which the backreference BKREF_NODE can match
2465   at BKREF_STR_IDX, and register them by match_ctx_add_entry().
2466   Note that we might collect inappropriate candidates here.
2467   However, the cost of checking them strictly here is too high, then we
2468   delay these checking for prune_impossible_nodes().  */
2469
2470static reg_errcode_t
2471get_subexp (preg, mctx, bkref_node, bkref_str_idx)
2472     const regex_t *preg;
2473     re_match_context_t *mctx;
2474     int bkref_node, bkref_str_idx;
2475{
2476  int subexp_num, sub_top_idx;
2477  re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
2478  char *buf = (char *) re_string_get_buffer (mctx->input);
2479  /* Return if we have already checked BKREF_NODE at BKREF_STR_IDX.  */
2480  int cache_idx = search_cur_bkref_entry (mctx, bkref_str_idx);
2481  for (; cache_idx < mctx->nbkref_ents; ++cache_idx)
2482    {
2483      struct re_backref_cache_entry *entry = mctx->bkref_ents + cache_idx;
2484      if (entry->str_idx > bkref_str_idx)
2485	break;
2486      if (entry->node == bkref_node)
2487	return REG_NOERROR; /* We already checked it.  */
2488    }
2489  subexp_num = dfa->nodes[bkref_node].opr.idx - 1;
2490
2491  /* For each sub expression  */
2492  for (sub_top_idx = 0; sub_top_idx < mctx->nsub_tops; ++sub_top_idx)
2493    {
2494      reg_errcode_t err;
2495      re_sub_match_top_t *sub_top = mctx->sub_tops[sub_top_idx];
2496      re_sub_match_last_t *sub_last;
2497      int sub_last_idx, sl_str;
2498      char *bkref_str;
2499
2500      if (dfa->nodes[sub_top->node].opr.idx != subexp_num)
2501	continue; /* It isn't related.  */
2502
2503      sl_str = sub_top->str_idx;
2504      bkref_str = buf + bkref_str_idx;
2505      /* At first, check the last node of sub expressions we already
2506	 evaluated.  */
2507      for (sub_last_idx = 0; sub_last_idx < sub_top->nlasts; ++sub_last_idx)
2508	{
2509	  int sl_str_diff;
2510	  sub_last = sub_top->lasts[sub_last_idx];
2511	  sl_str_diff = sub_last->str_idx - sl_str;
2512	  /* The matched string by the sub expression match with the substring
2513	     at the back reference?  */
2514	  if (sl_str_diff > 0
2515	      && memcmp (bkref_str, buf + sl_str, sl_str_diff) != 0)
2516	    break; /* We don't need to search this sub expression any more.  */
2517	  bkref_str += sl_str_diff;
2518	  sl_str += sl_str_diff;
2519	  err = get_subexp_sub (preg, mctx, sub_top, sub_last, bkref_node,
2520				bkref_str_idx);
2521	  if (err == REG_NOMATCH)
2522	    continue;
2523	  if (BE (err != REG_NOERROR, 0))
2524	    return err;
2525	}
2526      if (sub_last_idx < sub_top->nlasts)
2527	continue;
2528      if (sub_last_idx > 0)
2529	++sl_str;
2530      /* Then, search for the other last nodes of the sub expression.  */
2531      for (; sl_str <= bkref_str_idx; ++sl_str)
2532	{
2533	  int cls_node, sl_str_off;
2534	  re_node_set *nodes;
2535	  sl_str_off = sl_str - sub_top->str_idx;
2536	  /* The matched string by the sub expression match with the substring
2537	     at the back reference?  */
2538	  if (sl_str_off > 0
2539	      && memcmp (bkref_str++, buf + sl_str - 1, 1) != 0)
2540	    break; /* We don't need to search this sub expression any more.  */
2541	  if (mctx->state_log[sl_str] == NULL)
2542	    continue;
2543	  /* Does this state have a ')' of the sub expression?  */
2544	  nodes = &mctx->state_log[sl_str]->nodes;
2545	  cls_node = find_subexp_node (dfa, nodes, subexp_num, 0);
2546	  if (cls_node == -1)
2547	    continue; /* No.  */
2548	  if (sub_top->path == NULL)
2549	    {
2550	      sub_top->path = calloc (sizeof (state_array_t),
2551				      sl_str - sub_top->str_idx + 1);
2552	      if (sub_top->path == NULL)
2553		return REG_ESPACE;
2554	    }
2555	  /* Can the OP_OPEN_SUBEXP node arrive the OP_CLOSE_SUBEXP node
2556	     in the current context?  */
2557	  err = check_arrival (preg, mctx, sub_top->path, sub_top->node,
2558			       sub_top->str_idx, cls_node, sl_str, 0);
2559	  if (err == REG_NOMATCH)
2560	      continue;
2561	  if (BE (err != REG_NOERROR, 0))
2562	      return err;
2563	  sub_last = match_ctx_add_sublast (sub_top, cls_node, sl_str);
2564	  if (BE (sub_last == NULL, 0))
2565	    return REG_ESPACE;
2566	  err = get_subexp_sub (preg, mctx, sub_top, sub_last, bkref_node,
2567				bkref_str_idx);
2568	  if (err == REG_NOMATCH)
2569	    continue;
2570	}
2571    }
2572  return REG_NOERROR;
2573}
2574
2575/* Helper functions for get_subexp().  */
2576
2577/* Check SUB_LAST can arrive to the back reference BKREF_NODE at BKREF_STR.
2578   If it can arrive, register the sub expression expressed with SUB_TOP
2579   and SUB_LAST.  */
2580
2581static reg_errcode_t
2582get_subexp_sub (preg, mctx, sub_top, sub_last, bkref_node, bkref_str)
2583     const regex_t *preg;
2584     re_match_context_t *mctx;
2585     re_sub_match_top_t *sub_top;
2586     re_sub_match_last_t *sub_last;
2587     int bkref_node, bkref_str;
2588{
2589  reg_errcode_t err;
2590  int to_idx;
2591  /* Can the subexpression arrive the back reference?  */
2592  err = check_arrival (preg, mctx, &sub_last->path, sub_last->node,
2593		       sub_last->str_idx, bkref_node, bkref_str, 1);
2594  if (err != REG_NOERROR)
2595    return err;
2596  err = match_ctx_add_entry (mctx, bkref_node, bkref_str, sub_top->str_idx,
2597			     sub_last->str_idx);
2598  if (BE (err != REG_NOERROR, 0))
2599    return err;
2600  to_idx = bkref_str + sub_last->str_idx - sub_top->str_idx;
2601  clean_state_log_if_need (mctx, to_idx);
2602  return REG_NOERROR;
2603}
2604
2605/* Find the first node which is '(' or ')' and whose index is SUBEXP_IDX.
2606   Search '(' if FL_OPEN, or search ')' otherwise.
2607   TODO: This function isn't efficient...
2608	 Because there might be more than one nodes whose types are
2609	 OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all
2610	 nodes.
2611	 E.g. RE: (a){2}  */
2612
2613static int
2614find_subexp_node (dfa, nodes, subexp_idx, fl_open)
2615     re_dfa_t *dfa;
2616     re_node_set *nodes;
2617     int subexp_idx, fl_open;
2618{
2619  int cls_idx;
2620  for (cls_idx = 0; cls_idx < nodes->nelem; ++cls_idx)
2621    {
2622      int cls_node = nodes->elems[cls_idx];
2623      re_token_t *node = dfa->nodes + cls_node;
2624      if (((fl_open && node->type == OP_OPEN_SUBEXP)
2625	  || (!fl_open && node->type == OP_CLOSE_SUBEXP))
2626	  && node->opr.idx == subexp_idx)
2627	return cls_node;
2628    }
2629  return -1;
2630}
2631
2632/* Check whether the node TOP_NODE at TOP_STR can arrive to the node
2633   LAST_NODE at LAST_STR.  We record the path onto PATH since it will be
2634   heavily reused.
2635   Return REG_NOERROR if it can arrive, or REG_NOMATCH otherwise.  */
2636
2637static reg_errcode_t
2638check_arrival (preg, mctx, path, top_node, top_str, last_node, last_str,
2639	       fl_open)
2640     const regex_t *preg;
2641     re_match_context_t *mctx;
2642     state_array_t *path;
2643     int top_node, top_str, last_node, last_str, fl_open;
2644{
2645  re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
2646  reg_errcode_t err;
2647  int subexp_num, backup_cur_idx, str_idx, null_cnt;
2648  re_dfastate_t *cur_state = NULL;
2649  re_node_set *cur_nodes, next_nodes;
2650  re_dfastate_t **backup_state_log;
2651  unsigned int context;
2652
2653  subexp_num = dfa->nodes[top_node].opr.idx;
2654  /* Extend the buffer if we need.  */
2655  if (path->alloc < last_str + mctx->max_mb_elem_len + 1)
2656    {
2657      re_dfastate_t **new_array;
2658      int old_alloc = path->alloc;
2659      path->alloc += last_str + mctx->max_mb_elem_len + 1;
2660      new_array = re_realloc (path->array, re_dfastate_t *, path->alloc);
2661      if (new_array == NULL)
2662	return REG_ESPACE;
2663      path->array = new_array;
2664      memset (new_array + old_alloc, '\0',
2665	      sizeof (re_dfastate_t *) * (path->alloc - old_alloc));
2666    }
2667
2668  str_idx = path->next_idx == 0 ? top_str : path->next_idx;
2669
2670  /* Temporary modify MCTX.  */
2671  backup_state_log = mctx->state_log;
2672  backup_cur_idx = mctx->input->cur_idx;
2673  mctx->state_log = path->array;
2674  mctx->input->cur_idx = str_idx;
2675
2676  /* Setup initial node set.  */
2677  context = re_string_context_at (mctx->input, str_idx - 1, mctx->eflags,
2678				  preg->newline_anchor);
2679  if (str_idx == top_str)
2680    {
2681      err = re_node_set_init_1 (&next_nodes, top_node);
2682      if (BE (err != REG_NOERROR, 0))
2683	return err;
2684      err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, fl_open);
2685      if (BE (err != REG_NOERROR, 0))
2686	{
2687	  re_node_set_free (&next_nodes);
2688	  return err;
2689	}
2690    }
2691  else
2692    {
2693      cur_state = mctx->state_log[str_idx];
2694      if (cur_state && cur_state->has_backref)
2695	{
2696	  err = re_node_set_init_copy (&next_nodes, &cur_state->nodes);
2697	  if (BE ( err != REG_NOERROR, 0))
2698	    return err;
2699	}
2700      else
2701	re_node_set_init_empty (&next_nodes);
2702    }
2703  if (str_idx == top_str || (cur_state && cur_state->has_backref))
2704    {
2705      if (next_nodes.nelem)
2706	{
2707	  err = expand_bkref_cache (preg, mctx, &next_nodes, str_idx, last_str,
2708				    subexp_num, fl_open);
2709	  if (BE ( err != REG_NOERROR, 0))
2710	    {
2711	      re_node_set_free (&next_nodes);
2712	      return err;
2713	    }
2714	}
2715      cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context);
2716      if (BE (cur_state == NULL && err != REG_NOERROR, 0))
2717	{
2718	  re_node_set_free (&next_nodes);
2719	  return err;
2720	}
2721      mctx->state_log[str_idx] = cur_state;
2722    }
2723
2724  for (null_cnt = 0; str_idx < last_str && null_cnt <= mctx->max_mb_elem_len;)
2725    {
2726      re_node_set_empty (&next_nodes);
2727      if (mctx->state_log[str_idx + 1])
2728	{
2729	  err = re_node_set_merge (&next_nodes,
2730				   &mctx->state_log[str_idx + 1]->nodes);
2731	  if (BE (err != REG_NOERROR, 0))
2732	    {
2733	      re_node_set_free (&next_nodes);
2734	      return err;
2735	    }
2736	}
2737      if (cur_state)
2738	{
2739	  err = check_arrival_add_next_nodes(preg, dfa, mctx, str_idx,
2740					     &cur_state->nodes, &next_nodes);
2741	  if (BE (err != REG_NOERROR, 0))
2742	    {
2743	      re_node_set_free (&next_nodes);
2744	      return err;
2745	    }
2746	}
2747      ++str_idx;
2748      if (next_nodes.nelem)
2749	{
2750	  err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num,
2751					  fl_open);
2752	  if (BE (err != REG_NOERROR, 0))
2753	    {
2754	      re_node_set_free (&next_nodes);
2755	      return err;
2756	    }
2757	  err = expand_bkref_cache (preg, mctx, &next_nodes, str_idx, last_str,
2758				    subexp_num, fl_open);
2759	  if (BE ( err != REG_NOERROR, 0))
2760	    {
2761	      re_node_set_free (&next_nodes);
2762	      return err;
2763	    }
2764	}
2765      context = re_string_context_at (mctx->input, str_idx - 1, mctx->eflags,
2766				      preg->newline_anchor);
2767      cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context);
2768      if (BE (cur_state == NULL && err != REG_NOERROR, 0))
2769	{
2770	  re_node_set_free (&next_nodes);
2771	  return err;
2772	}
2773      mctx->state_log[str_idx] = cur_state;
2774      null_cnt = cur_state == NULL ? null_cnt + 1 : 0;
2775    }
2776  re_node_set_free (&next_nodes);
2777  cur_nodes = (mctx->state_log[last_str] == NULL ? NULL
2778	       : &mctx->state_log[last_str]->nodes);
2779  path->next_idx = str_idx;
2780
2781  /* Fix MCTX.  */
2782  mctx->state_log = backup_state_log;
2783  mctx->input->cur_idx = backup_cur_idx;
2784
2785  if (cur_nodes == NULL)
2786    return REG_NOMATCH;
2787  /* Then check the current node set has the node LAST_NODE.  */
2788  return (re_node_set_contains (cur_nodes, last_node)
2789	  || re_node_set_contains (cur_nodes, last_node) ? REG_NOERROR
2790	  : REG_NOMATCH);
2791}
2792
2793/* Helper functions for check_arrival.  */
2794
2795/* Calculate the destination nodes of CUR_NODES at STR_IDX, and append them
2796   to NEXT_NODES.
2797   TODO: This function is similar to the functions transit_state*(),
2798	 however this function has many additional works.
2799	 Can't we unify them?  */
2800
2801static reg_errcode_t
2802check_arrival_add_next_nodes (preg, dfa, mctx, str_idx, cur_nodes, next_nodes)
2803     const regex_t *preg;
2804     re_dfa_t *dfa;
2805     re_match_context_t *mctx;
2806     int str_idx;
2807     re_node_set *cur_nodes, *next_nodes;
2808{
2809  int cur_idx;
2810  reg_errcode_t err;
2811  re_node_set union_set;
2812  re_node_set_init_empty (&union_set);
2813  for (cur_idx = 0; cur_idx < cur_nodes->nelem; ++cur_idx)
2814    {
2815      int naccepted = 0;
2816      int cur_node = cur_nodes->elems[cur_idx];
2817      re_token_type_t type = dfa->nodes[cur_node].type;
2818      if (IS_EPSILON_NODE(type))
2819	continue;
2820#ifdef RE_ENABLE_I18N
2821      /* If the node may accept `multi byte'.  */
2822      if (ACCEPT_MB_NODE (type))
2823	{
2824	  naccepted = check_node_accept_bytes (preg, cur_node, mctx->input,
2825					       str_idx);
2826	  if (naccepted > 1)
2827	    {
2828	      re_dfastate_t *dest_state;
2829	      int next_node = dfa->nexts[cur_node];
2830	      int next_idx = str_idx + naccepted;
2831	      dest_state = mctx->state_log[next_idx];
2832	      re_node_set_empty (&union_set);
2833	      if (dest_state)
2834		{
2835		  err = re_node_set_merge (&union_set, &dest_state->nodes);
2836		  if (BE (err != REG_NOERROR, 0))
2837		    {
2838		      re_node_set_free (&union_set);
2839		      return err;
2840		    }
2841		  err = re_node_set_insert (&union_set, next_node);
2842		  if (BE (err < 0, 0))
2843		    {
2844		      re_node_set_free (&union_set);
2845		      return REG_ESPACE;
2846		    }
2847		}
2848	      else
2849		{
2850		  err = re_node_set_insert (&union_set, next_node);
2851		  if (BE (err < 0, 0))
2852		    {
2853		      re_node_set_free (&union_set);
2854		      return REG_ESPACE;
2855		    }
2856		}
2857	      mctx->state_log[next_idx] = re_acquire_state (&err, dfa,
2858							    &union_set);
2859	      if (BE (mctx->state_log[next_idx] == NULL
2860		      && err != REG_NOERROR, 0))
2861		{
2862		  re_node_set_free (&union_set);
2863		  return err;
2864		}
2865	    }
2866	}
2867#endif /* RE_ENABLE_I18N */
2868      if (naccepted
2869	  || check_node_accept (preg, dfa->nodes + cur_node, mctx,
2870				str_idx))
2871	{
2872	  err = re_node_set_insert (next_nodes, dfa->nexts[cur_node]);
2873	  if (BE (err < 0, 0))
2874	    {
2875	      re_node_set_free (&union_set);
2876	      return REG_ESPACE;
2877	    }
2878	}
2879    }
2880  re_node_set_free (&union_set);
2881  return REG_NOERROR;
2882}
2883
2884/* For all the nodes in CUR_NODES, add the epsilon closures of them to
2885   CUR_NODES, however exclude the nodes which are:
2886    - inside the sub expression whose number is EX_SUBEXP, if FL_OPEN.
2887    - out of the sub expression whose number is EX_SUBEXP, if !FL_OPEN.
2888*/
2889
2890static reg_errcode_t
2891check_arrival_expand_ecl (dfa, cur_nodes, ex_subexp, fl_open)
2892     re_dfa_t *dfa;
2893     re_node_set *cur_nodes;
2894     int ex_subexp, fl_open;
2895{
2896  reg_errcode_t err;
2897  int idx, outside_node;
2898  re_node_set new_nodes;
2899#ifdef DEBUG
2900  assert (cur_nodes->nelem);
2901#endif
2902  err = re_node_set_alloc (&new_nodes, cur_nodes->nelem);
2903  if (BE (err != REG_NOERROR, 0))
2904    return err;
2905  /* Create a new node set NEW_NODES with the nodes which are epsilon
2906     closures of the node in CUR_NODES.  */
2907
2908  for (idx = 0; idx < cur_nodes->nelem; ++idx)
2909    {
2910      int cur_node = cur_nodes->elems[idx];
2911      re_node_set *eclosure = dfa->eclosures + cur_node;
2912      outside_node = find_subexp_node (dfa, eclosure, ex_subexp, fl_open);
2913      if (outside_node == -1)
2914	{
2915	  /* There are no problematic nodes, just merge them.  */
2916	  err = re_node_set_merge (&new_nodes, eclosure);
2917	  if (BE (err != REG_NOERROR, 0))
2918	    {
2919	      re_node_set_free (&new_nodes);
2920	      return err;
2921	    }
2922	}
2923      else
2924	{
2925	  /* There are problematic nodes, re-calculate incrementally.  */
2926	  err = check_arrival_expand_ecl_sub (dfa, &new_nodes, cur_node,
2927					      ex_subexp, fl_open);
2928	  if (BE (err != REG_NOERROR, 0))
2929	    {
2930	      re_node_set_free (&new_nodes);
2931	      return err;
2932	    }
2933	}
2934    }
2935  re_node_set_free (cur_nodes);
2936  *cur_nodes = new_nodes;
2937  return REG_NOERROR;
2938}
2939
2940/* Helper function for check_arrival_expand_ecl.
2941   Check incrementally the epsilon closure of TARGET, and if it isn't
2942   problematic append it to DST_NODES.  */
2943
2944static reg_errcode_t
2945check_arrival_expand_ecl_sub (dfa, dst_nodes, target, ex_subexp, fl_open)
2946     re_dfa_t *dfa;
2947     int target, ex_subexp, fl_open;
2948     re_node_set *dst_nodes;
2949{
2950  int cur_node, type;
2951  for (cur_node = target; !re_node_set_contains (dst_nodes, cur_node);)
2952    {
2953      int err;
2954      type = dfa->nodes[cur_node].type;
2955
2956      if (((type == OP_OPEN_SUBEXP && fl_open)
2957	   || (type == OP_CLOSE_SUBEXP && !fl_open))
2958	  && dfa->nodes[cur_node].opr.idx == ex_subexp)
2959	{
2960	  if (!fl_open)
2961	    {
2962	      err = re_node_set_insert (dst_nodes, cur_node);
2963	      if (BE (err == -1, 0))
2964		return REG_ESPACE;
2965	    }
2966	  break;
2967	}
2968      err = re_node_set_insert (dst_nodes, cur_node);
2969      if (BE (err == -1, 0))
2970	return REG_ESPACE;
2971      if (dfa->edests[cur_node].nelem == 0)
2972	break;
2973      if (dfa->edests[cur_node].nelem == 2)
2974	{
2975	  err = check_arrival_expand_ecl_sub (dfa, dst_nodes,
2976					      dfa->edests[cur_node].elems[1],
2977					      ex_subexp, fl_open);
2978	  if (BE (err != REG_NOERROR, 0))
2979	    return err;
2980	}
2981      cur_node = dfa->edests[cur_node].elems[0];
2982    }
2983  return REG_NOERROR;
2984}
2985
2986
2987/* For all the back references in the current state, calculate the
2988   destination of the back references by the appropriate entry
2989   in MCTX->BKREF_ENTS.  */
2990
2991static reg_errcode_t
2992expand_bkref_cache (preg, mctx, cur_nodes, cur_str, last_str, subexp_num,
2993		    fl_open)
2994     const regex_t *preg;
2995     re_match_context_t *mctx;
2996     int cur_str, last_str, subexp_num, fl_open;
2997     re_node_set *cur_nodes;
2998{
2999  reg_errcode_t err;
3000  re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
3001  int cache_idx, cache_idx_start;
3002  /* The current state.  */
3003
3004  cache_idx_start = search_cur_bkref_entry (mctx, cur_str);
3005  for (cache_idx = cache_idx_start; cache_idx < mctx->nbkref_ents; ++cache_idx)
3006    {
3007      int to_idx, next_node;
3008      struct re_backref_cache_entry *ent = mctx->bkref_ents + cache_idx;
3009      if (ent->str_idx > cur_str)
3010	break;
3011      /* Is this entry ENT is appropriate?  */
3012      if (!re_node_set_contains (cur_nodes, ent->node))
3013	continue; /* No.  */
3014
3015      to_idx = cur_str + ent->subexp_to - ent->subexp_from;
3016      /* Calculate the destination of the back reference, and append it
3017	 to MCTX->STATE_LOG.  */
3018      if (to_idx == cur_str)
3019	{
3020	  /* The backreference did epsilon transit, we must re-check all the
3021	     node in the current state.  */
3022	  re_node_set new_dests;
3023	  reg_errcode_t err2, err3;
3024	  next_node = dfa->edests[ent->node].elems[0];
3025	  if (re_node_set_contains (cur_nodes, next_node))
3026	    continue;
3027	  err = re_node_set_init_1 (&new_dests, next_node);
3028	  err2 = check_arrival_expand_ecl (dfa, &new_dests, subexp_num,
3029					   fl_open);
3030	  err3 = re_node_set_merge (cur_nodes, &new_dests);
3031	  re_node_set_free (&new_dests);
3032	  if (BE (err != REG_NOERROR || err2 != REG_NOERROR
3033		  || err3 != REG_NOERROR, 0))
3034	    {
3035	      err = (err != REG_NOERROR ? err
3036		     : (err2 != REG_NOERROR ? err2 : err3));
3037	      return err;
3038	    }
3039	  /* TODO: It is still inefficient...  */
3040	  cache_idx = cache_idx_start - 1;
3041	  continue;
3042	}
3043      else
3044	{
3045	  re_node_set union_set;
3046	  next_node = dfa->nexts[ent->node];
3047	  if (mctx->state_log[to_idx])
3048	    {
3049	      int ret;
3050	      if (re_node_set_contains (&mctx->state_log[to_idx]->nodes,
3051					next_node))
3052		continue;
3053	      err = re_node_set_init_copy (&union_set,
3054					   &mctx->state_log[to_idx]->nodes);
3055	      ret = re_node_set_insert (&union_set, next_node);
3056	      if (BE (err != REG_NOERROR || ret < 0, 0))
3057		{
3058		  re_node_set_free (&union_set);
3059		  err = err != REG_NOERROR ? err : REG_ESPACE;
3060		  return err;
3061		}
3062	    }
3063	  else
3064	    {
3065	      err = re_node_set_init_1 (&union_set, next_node);
3066	      if (BE (err != REG_NOERROR, 0))
3067		return err;
3068	    }
3069	  mctx->state_log[to_idx] = re_acquire_state (&err, dfa, &union_set);
3070	  re_node_set_free (&union_set);
3071	  if (BE (mctx->state_log[to_idx] == NULL
3072		  && err != REG_NOERROR, 0))
3073	    return err;
3074	}
3075    }
3076  return REG_NOERROR;
3077}
3078
3079/* Build transition table for the state.
3080   Return the new table if succeeded, otherwise return NULL.  */
3081
3082static re_dfastate_t **
3083build_trtable (preg, state, fl_search)
3084    const regex_t *preg;
3085    const re_dfastate_t *state;
3086    int fl_search;
3087{
3088  reg_errcode_t err;
3089  re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
3090  int i, j, k, ch;
3091  int dests_node_malloced = 0, dest_states_malloced = 0;
3092  int ndests; /* Number of the destination states from `state'.  */
3093  re_dfastate_t **trtable;
3094  re_dfastate_t **dest_states = NULL, **dest_states_word, **dest_states_nl;
3095  re_node_set follows, *dests_node;
3096  bitset *dests_ch;
3097  bitset acceptable;
3098
3099  /* We build DFA states which corresponds to the destination nodes
3100     from `state'.  `dests_node[i]' represents the nodes which i-th
3101     destination state contains, and `dests_ch[i]' represents the
3102     characters which i-th destination state accepts.  */
3103#ifdef _LIBC
3104  if (__libc_use_alloca ((sizeof (re_node_set) + sizeof (bitset)) * SBC_MAX))
3105    dests_node = (re_node_set *)
3106		 alloca ((sizeof (re_node_set) + sizeof (bitset)) * SBC_MAX);
3107  else
3108#endif
3109    {
3110      dests_node = (re_node_set *)
3111		   malloc ((sizeof (re_node_set) + sizeof (bitset)) * SBC_MAX);
3112      if (BE (dests_node == NULL, 0))
3113	return NULL;
3114      dests_node_malloced = 1;
3115    }
3116  dests_ch = (bitset *) (dests_node + SBC_MAX);
3117
3118  /* Initialize transiton table.  */
3119  trtable = (re_dfastate_t **) calloc (sizeof (re_dfastate_t *), SBC_MAX);
3120  if (BE (trtable == NULL, 0))
3121    {
3122      if (dests_node_malloced)
3123	free (dests_node);
3124      return NULL;
3125    }
3126
3127  /* At first, group all nodes belonging to `state' into several
3128     destinations.  */
3129  ndests = group_nodes_into_DFAstates (preg, state, dests_node, dests_ch);
3130  if (BE (ndests <= 0, 0))
3131    {
3132      if (dests_node_malloced)
3133	free (dests_node);
3134      /* Return NULL in case of an error, trtable otherwise.  */
3135      if (ndests == 0)
3136	return trtable;
3137      free (trtable);
3138      return NULL;
3139    }
3140
3141  err = re_node_set_alloc (&follows, ndests + 1);
3142  if (BE (err != REG_NOERROR, 0))
3143    goto out_free;
3144
3145#ifdef _LIBC
3146  if (__libc_use_alloca ((sizeof (re_node_set) + sizeof (bitset)) * SBC_MAX
3147			 + ndests * 3 * sizeof (re_dfastate_t *)))
3148    dest_states = (re_dfastate_t **)
3149		  alloca (ndests * 3 * sizeof (re_dfastate_t *));
3150  else
3151#endif
3152    {
3153      dest_states = (re_dfastate_t **)
3154		    malloc (ndests * 3 * sizeof (re_dfastate_t *));
3155      if (BE (dest_states == NULL, 0))
3156	{
3157out_free:
3158	  if (dest_states_malloced)
3159	    free (dest_states);
3160	  re_node_set_free (&follows);
3161	  for (i = 0; i < ndests; ++i)
3162	    re_node_set_free (dests_node + i);
3163	  free (trtable);
3164	  if (dests_node_malloced)
3165	    free (dests_node);
3166	  return NULL;
3167	}
3168      dest_states_malloced = 1;
3169    }
3170  dest_states_word = dest_states + ndests;
3171  dest_states_nl = dest_states_word + ndests;
3172  bitset_empty (acceptable);
3173
3174  /* Then build the states for all destinations.  */
3175  for (i = 0; i < ndests; ++i)
3176    {
3177      int next_node;
3178      re_node_set_empty (&follows);
3179      /* Merge the follows of this destination states.  */
3180      for (j = 0; j < dests_node[i].nelem; ++j)
3181	{
3182	  next_node = dfa->nexts[dests_node[i].elems[j]];
3183	  if (next_node != -1)
3184	    {
3185	      err = re_node_set_merge (&follows, dfa->eclosures + next_node);
3186	      if (BE (err != REG_NOERROR, 0))
3187		goto out_free;
3188	    }
3189	}
3190      /* If search flag is set, merge the initial state.  */
3191      if (fl_search)
3192	{
3193#ifdef RE_ENABLE_I18N
3194	  int not_initial = 0;
3195	  for (j = 0; j < follows.nelem; ++j)
3196	    if (dfa->nodes[follows.elems[j]].type == CHARACTER)
3197	      {
3198		not_initial = dfa->nodes[follows.elems[j]].mb_partial;
3199		break;
3200	      }
3201	  if (!not_initial)
3202#endif
3203	    {
3204	      err = re_node_set_merge (&follows,
3205				       dfa->init_state->entrance_nodes);
3206	      if (BE (err != REG_NOERROR, 0))
3207		goto out_free;
3208	    }
3209	}
3210      dest_states[i] = re_acquire_state_context (&err, dfa, &follows, 0);
3211      if (BE (dest_states[i] == NULL && err != REG_NOERROR, 0))
3212	goto out_free;
3213      /* If the new state has context constraint,
3214	 build appropriate states for these contexts.  */
3215      if (dest_states[i]->has_constraint)
3216	{
3217	  dest_states_word[i] = re_acquire_state_context (&err, dfa, &follows,
3218							  CONTEXT_WORD);
3219	  if (BE (dest_states_word[i] == NULL && err != REG_NOERROR, 0))
3220	    goto out_free;
3221	  dest_states_nl[i] = re_acquire_state_context (&err, dfa, &follows,
3222							CONTEXT_NEWLINE);
3223	  if (BE (dest_states_nl[i] == NULL && err != REG_NOERROR, 0))
3224	    goto out_free;
3225	}
3226      else
3227	{
3228	  dest_states_word[i] = dest_states[i];
3229	  dest_states_nl[i] = dest_states[i];
3230	}
3231      bitset_merge (acceptable, dests_ch[i]);
3232    }
3233
3234  /* Update the transition table.  */
3235  /* For all characters ch...:  */
3236  for (i = 0, ch = 0; i < BITSET_UINTS; ++i)
3237    for (j = 0; j < UINT_BITS; ++j, ++ch)
3238      if ((acceptable[i] >> j) & 1)
3239	{
3240	  /* The current state accepts the character ch.  */
3241	  if (IS_WORD_CHAR (ch))
3242	    {
3243	      for (k = 0; k < ndests; ++k)
3244		if ((dests_ch[k][i] >> j) & 1)
3245		  {
3246		    /* k-th destination accepts the word character ch.  */
3247		    trtable[ch] = dest_states_word[k];
3248		    /* There must be only one destination which accepts
3249		       character ch.  See group_nodes_into_DFAstates.  */
3250		    break;
3251		  }
3252	    }
3253	  else /* not WORD_CHAR */
3254	    {
3255	      for (k = 0; k < ndests; ++k)
3256		if ((dests_ch[k][i] >> j) & 1)
3257		  {
3258		    /* k-th destination accepts the non-word character ch.  */
3259		    trtable[ch] = dest_states[k];
3260		    /* There must be only one destination which accepts
3261		       character ch.  See group_nodes_into_DFAstates.  */
3262		    break;
3263		  }
3264	    }
3265	}
3266  /* new line */
3267  if (bitset_contain (acceptable, NEWLINE_CHAR))
3268    {
3269      /* The current state accepts newline character.  */
3270      for (k = 0; k < ndests; ++k)
3271	if (bitset_contain (dests_ch[k], NEWLINE_CHAR))
3272	  {
3273	    /* k-th destination accepts newline character.  */
3274	    trtable[NEWLINE_CHAR] = dest_states_nl[k];
3275	    /* There must be only one destination which accepts
3276	       newline.  See group_nodes_into_DFAstates.  */
3277	    break;
3278	  }
3279    }
3280
3281  if (dest_states_malloced)
3282    free (dest_states);
3283
3284  re_node_set_free (&follows);
3285  for (i = 0; i < ndests; ++i)
3286    re_node_set_free (dests_node + i);
3287
3288  if (dests_node_malloced)
3289    free (dests_node);
3290
3291  return trtable;
3292}
3293
3294/* Group all nodes belonging to STATE into several destinations.
3295   Then for all destinations, set the nodes belonging to the destination
3296   to DESTS_NODE[i] and set the characters accepted by the destination
3297   to DEST_CH[i].  This function return the number of destinations.  */
3298
3299static int
3300group_nodes_into_DFAstates (preg, state, dests_node, dests_ch)
3301    const regex_t *preg;
3302    const re_dfastate_t *state;
3303    re_node_set *dests_node;
3304    bitset *dests_ch;
3305{
3306  reg_errcode_t err;
3307  const re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
3308  int i, j, k;
3309  int ndests; /* Number of the destinations from `state'.  */
3310  bitset accepts; /* Characters a node can accept.  */
3311  const re_node_set *cur_nodes = &state->nodes;
3312  bitset_empty (accepts);
3313  ndests = 0;
3314
3315  /* For all the nodes belonging to `state',  */
3316  for (i = 0; i < cur_nodes->nelem; ++i)
3317    {
3318      re_token_t *node = &dfa->nodes[cur_nodes->elems[i]];
3319      re_token_type_t type = node->type;
3320      unsigned int constraint = node->constraint;
3321
3322      /* Enumerate all single byte character this node can accept.  */
3323      if (type == CHARACTER)
3324	bitset_set (accepts, node->opr.c);
3325      else if (type == SIMPLE_BRACKET)
3326	{
3327	  bitset_merge (accepts, node->opr.sbcset);
3328	}
3329      else if (type == OP_PERIOD)
3330	{
3331	  bitset_set_all (accepts);
3332	  if (!(preg->syntax & RE_DOT_NEWLINE))
3333	    bitset_clear (accepts, '\n');
3334	  if (preg->syntax & RE_DOT_NOT_NULL)
3335	    bitset_clear (accepts, '\0');
3336	}
3337      else
3338	continue;
3339
3340      /* Check the `accepts' and sift the characters which are not
3341	 match it the context.  */
3342      if (constraint)
3343	{
3344	  if (constraint & NEXT_WORD_CONSTRAINT)
3345	    for (j = 0; j < BITSET_UINTS; ++j)
3346	      accepts[j] &= dfa->word_char[j];
3347	  if (constraint & NEXT_NOTWORD_CONSTRAINT)
3348	    for (j = 0; j < BITSET_UINTS; ++j)
3349	      accepts[j] &= ~dfa->word_char[j];
3350	  if (constraint & NEXT_NEWLINE_CONSTRAINT)
3351	    {
3352	      int accepts_newline = bitset_contain (accepts, NEWLINE_CHAR);
3353	      bitset_empty (accepts);
3354	      if (accepts_newline)
3355		bitset_set (accepts, NEWLINE_CHAR);
3356	      else
3357		continue;
3358	    }
3359	}
3360
3361      /* Then divide `accepts' into DFA states, or create a new
3362	 state.  */
3363      for (j = 0; j < ndests; ++j)
3364	{
3365	  bitset intersec; /* Intersection sets, see below.  */
3366	  bitset remains;
3367	  /* Flags, see below.  */
3368	  int has_intersec, not_subset, not_consumed;
3369
3370	  /* Optimization, skip if this state doesn't accept the character.  */
3371	  if (type == CHARACTER && !bitset_contain (dests_ch[j], node->opr.c))
3372	    continue;
3373
3374	  /* Enumerate the intersection set of this state and `accepts'.  */
3375	  has_intersec = 0;
3376	  for (k = 0; k < BITSET_UINTS; ++k)
3377	    has_intersec |= intersec[k] = accepts[k] & dests_ch[j][k];
3378	  /* And skip if the intersection set is empty.  */
3379	  if (!has_intersec)
3380	    continue;
3381
3382	  /* Then check if this state is a subset of `accepts'.  */
3383	  not_subset = not_consumed = 0;
3384	  for (k = 0; k < BITSET_UINTS; ++k)
3385	    {
3386	      not_subset |= remains[k] = ~accepts[k] & dests_ch[j][k];
3387	      not_consumed |= accepts[k] = accepts[k] & ~dests_ch[j][k];
3388	    }
3389
3390	  /* If this state isn't a subset of `accepts', create a
3391	     new group state, which has the `remains'. */
3392	  if (not_subset)
3393	    {
3394	      bitset_copy (dests_ch[ndests], remains);
3395	      bitset_copy (dests_ch[j], intersec);
3396	      err = re_node_set_init_copy (dests_node + ndests, &dests_node[j]);
3397	      if (BE (err != REG_NOERROR, 0))
3398		goto error_return;
3399	      ++ndests;
3400	    }
3401
3402	  /* Put the position in the current group. */
3403	  err = re_node_set_insert (&dests_node[j], cur_nodes->elems[i]);
3404	  if (BE (err < 0, 0))
3405	    goto error_return;
3406
3407	  /* If all characters are consumed, go to next node. */
3408	  if (!not_consumed)
3409	    break;
3410	}
3411      /* Some characters remain, create a new group. */
3412      if (j == ndests)
3413	{
3414	  bitset_copy (dests_ch[ndests], accepts);
3415	  err = re_node_set_init_1 (dests_node + ndests, cur_nodes->elems[i]);
3416	  if (BE (err != REG_NOERROR, 0))
3417	    goto error_return;
3418	  ++ndests;
3419	  bitset_empty (accepts);
3420	}
3421    }
3422  return ndests;
3423 error_return:
3424  for (j = 0; j < ndests; ++j)
3425    re_node_set_free (dests_node + j);
3426  return -1;
3427}
3428
3429#ifdef RE_ENABLE_I18N
3430/* Check how many bytes the node `dfa->nodes[node_idx]' accepts.
3431   Return the number of the bytes the node accepts.
3432   STR_IDX is the current index of the input string.
3433
3434   This function handles the nodes which can accept one character, or
3435   one collating element like '.', '[a-z]', opposite to the other nodes
3436   can only accept one byte.  */
3437
3438static int
3439check_node_accept_bytes (preg, node_idx, input, str_idx)
3440    const regex_t *preg;
3441    int node_idx, str_idx;
3442    const re_string_t *input;
3443{
3444  const re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
3445  const re_token_t *node = dfa->nodes + node_idx;
3446  int elem_len = re_string_elem_size_at (input, str_idx);
3447  int char_len = re_string_char_size_at (input, str_idx);
3448  int i;
3449# ifdef _LIBC
3450  int j;
3451  uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
3452# endif /* _LIBC */
3453  if (elem_len <= 1 && char_len <= 1)
3454    return 0;
3455  if (node->type == OP_PERIOD)
3456    {
3457      /* '.' accepts any one character except the following two cases.  */
3458      if ((!(preg->syntax & RE_DOT_NEWLINE) &&
3459	   re_string_byte_at (input, str_idx) == '\n') ||
3460	  ((preg->syntax & RE_DOT_NOT_NULL) &&
3461	   re_string_byte_at (input, str_idx) == '\0'))
3462	return 0;
3463      return char_len;
3464    }
3465  else if (node->type == COMPLEX_BRACKET)
3466    {
3467      const re_charset_t *cset = node->opr.mbcset;
3468# ifdef _LIBC
3469      const unsigned char *pin = re_string_get_buffer (input) + str_idx;
3470# endif /* _LIBC */
3471      int match_len = 0;
3472      wchar_t wc = ((cset->nranges || cset->nchar_classes || cset->nmbchars)
3473		    ? re_string_wchar_at (input, str_idx) : 0);
3474
3475      /* match with multibyte character?  */
3476      for (i = 0; i < cset->nmbchars; ++i)
3477	if (wc == cset->mbchars[i])
3478	  {
3479	    match_len = char_len;
3480	    goto check_node_accept_bytes_match;
3481	  }
3482      /* match with character_class?  */
3483      for (i = 0; i < cset->nchar_classes; ++i)
3484	{
3485	  wctype_t wt = cset->char_classes[i];
3486	  if (__iswctype (wc, wt))
3487	    {
3488	      match_len = char_len;
3489	      goto check_node_accept_bytes_match;
3490	    }
3491	}
3492
3493# ifdef _LIBC
3494      if (nrules != 0)
3495	{
3496	  unsigned int in_collseq = 0;
3497	  const int32_t *table, *indirect;
3498	  const unsigned char *weights, *extra;
3499	  const char *collseqwc;
3500	  int32_t idx;
3501	  /* This #include defines a local function!  */
3502#  include <locale/weight.h>
3503
3504	  /* match with collating_symbol?  */
3505	  if (cset->ncoll_syms)
3506	    extra = (const unsigned char *)
3507	      _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
3508	  for (i = 0; i < cset->ncoll_syms; ++i)
3509	    {
3510	      const unsigned char *coll_sym = extra + cset->coll_syms[i];
3511	      /* Compare the length of input collating element and
3512		 the length of current collating element.  */
3513	      if (*coll_sym != elem_len)
3514		continue;
3515	      /* Compare each bytes.  */
3516	      for (j = 0; j < *coll_sym; j++)
3517		if (pin[j] != coll_sym[1 + j])
3518		  break;
3519	      if (j == *coll_sym)
3520		{
3521		  /* Match if every bytes is equal.  */
3522		  match_len = j;
3523		  goto check_node_accept_bytes_match;
3524		}
3525	    }
3526
3527	  if (cset->nranges)
3528	    {
3529	      if (elem_len <= char_len)
3530		{
3531		  collseqwc = _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQWC);
3532		  in_collseq = collseq_table_lookup (collseqwc, wc);
3533		}
3534	      else
3535		in_collseq = find_collation_sequence_value (pin, elem_len);
3536	    }
3537	  /* match with range expression?  */
3538	  for (i = 0; i < cset->nranges; ++i)
3539	    if (cset->range_starts[i] <= in_collseq
3540		&& in_collseq <= cset->range_ends[i])
3541	      {
3542		match_len = elem_len;
3543		goto check_node_accept_bytes_match;
3544	      }
3545
3546	  /* match with equivalence_class?  */
3547	  if (cset->nequiv_classes)
3548	    {
3549	      const unsigned char *cp = pin;
3550	      table = (const int32_t *)
3551		_NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
3552	      weights = (const unsigned char *)
3553		_NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
3554	      extra = (const unsigned char *)
3555		_NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
3556	      indirect = (const int32_t *)
3557		_NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
3558	      idx = findidx (&cp);
3559	      if (idx > 0)
3560		for (i = 0; i < cset->nequiv_classes; ++i)
3561		  {
3562		    int32_t equiv_class_idx = cset->equiv_classes[i];
3563		    size_t weight_len = weights[idx];
3564		    if (weight_len == weights[equiv_class_idx])
3565		      {
3566			int cnt = 0;
3567			while (cnt <= weight_len
3568			       && (weights[equiv_class_idx + 1 + cnt]
3569				   == weights[idx + 1 + cnt]))
3570			  ++cnt;
3571			if (cnt > weight_len)
3572			  {
3573			    match_len = elem_len;
3574			    goto check_node_accept_bytes_match;
3575			  }
3576		      }
3577		  }
3578	    }
3579	}
3580      else
3581# endif /* _LIBC */
3582	{
3583	  /* match with range expression?  */
3584#if __GNUC__ >= 2
3585	  wchar_t cmp_buf[] = {L'\0', L'\0', wc, L'\0', L'\0', L'\0'};
3586#else
3587	  wchar_t cmp_buf[] = {L'\0', L'\0', L'\0', L'\0', L'\0', L'\0'};
3588	  cmp_buf[2] = wc;
3589#endif
3590	  for (i = 0; i < cset->nranges; ++i)
3591	    {
3592	      cmp_buf[0] = cset->range_starts[i];
3593	      cmp_buf[4] = cset->range_ends[i];
3594	      if (wcscoll (cmp_buf, cmp_buf + 2) <= 0
3595		  && wcscoll (cmp_buf + 2, cmp_buf + 4) <= 0)
3596		{
3597		  match_len = char_len;
3598		  goto check_node_accept_bytes_match;
3599		}
3600	    }
3601	}
3602    check_node_accept_bytes_match:
3603      if (!cset->non_match)
3604	return match_len;
3605      else
3606	{
3607	  if (match_len > 0)
3608	    return 0;
3609	  else
3610	    return (elem_len > char_len) ? elem_len : char_len;
3611	}
3612    }
3613  return 0;
3614}
3615
3616# ifdef _LIBC
3617static unsigned int
3618find_collation_sequence_value (mbs, mbs_len)
3619    const unsigned char *mbs;
3620    size_t mbs_len;
3621{
3622  uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
3623  if (nrules == 0)
3624    {
3625      if (mbs_len == 1)
3626	{
3627	  /* No valid character.  Match it as a single byte character.  */
3628	  const unsigned char *collseq = (const unsigned char *)
3629	    _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQMB);
3630	  return collseq[mbs[0]];
3631	}
3632      return UINT_MAX;
3633    }
3634  else
3635    {
3636      int32_t idx;
3637      const unsigned char *extra = (const unsigned char *)
3638	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
3639
3640      for (idx = 0; ;)
3641	{
3642	  int mbs_cnt, found = 0;
3643	  int32_t elem_mbs_len;
3644	  /* Skip the name of collating element name.  */
3645	  idx = idx + extra[idx] + 1;
3646	  elem_mbs_len = extra[idx++];
3647	  if (mbs_len == elem_mbs_len)
3648	    {
3649	      for (mbs_cnt = 0; mbs_cnt < elem_mbs_len; ++mbs_cnt)
3650		if (extra[idx + mbs_cnt] != mbs[mbs_cnt])
3651		  break;
3652	      if (mbs_cnt == elem_mbs_len)
3653		/* Found the entry.  */
3654		found = 1;
3655	    }
3656	  /* Skip the byte sequence of the collating element.  */
3657	  idx += elem_mbs_len;
3658	  /* Adjust for the alignment.  */
3659	  idx = (idx + 3) & ~3;
3660	  /* Skip the collation sequence value.  */
3661	  idx += sizeof (uint32_t);
3662	  /* Skip the wide char sequence of the collating element.  */
3663	  idx = idx + sizeof (uint32_t) * (extra[idx] + 1);
3664	  /* If we found the entry, return the sequence value.  */
3665	  if (found)
3666	    return *(uint32_t *) (extra + idx);
3667	  /* Skip the collation sequence value.  */
3668	  idx += sizeof (uint32_t);
3669	}
3670    }
3671}
3672# endif /* _LIBC */
3673#endif /* RE_ENABLE_I18N */
3674
3675/* Check whether the node accepts the byte which is IDX-th
3676   byte of the INPUT.  */
3677
3678static int
3679check_node_accept (preg, node, mctx, idx)
3680    const regex_t *preg;
3681    const re_token_t *node;
3682    const re_match_context_t *mctx;
3683    int idx;
3684{
3685  unsigned char ch;
3686  if (node->constraint)
3687    {
3688      /* The node has constraints.  Check whether the current context
3689	 satisfies the constraints.  */
3690      unsigned int context = re_string_context_at (mctx->input, idx,
3691						   mctx->eflags,
3692						   preg->newline_anchor);
3693      if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context))
3694	return 0;
3695    }
3696  ch = re_string_byte_at (mctx->input, idx);
3697  if (node->type == CHARACTER)
3698    return node->opr.c == ch;
3699  else if (node->type == SIMPLE_BRACKET)
3700    return bitset_contain (node->opr.sbcset, ch);
3701  else if (node->type == OP_PERIOD)
3702    return !((ch == '\n' && !(preg->syntax & RE_DOT_NEWLINE))
3703	     || (ch == '\0' && (preg->syntax & RE_DOT_NOT_NULL)));
3704  else
3705    return 0;
3706}
3707
3708/* Extend the buffers, if the buffers have run out.  */
3709
3710static reg_errcode_t
3711extend_buffers (mctx)
3712     re_match_context_t *mctx;
3713{
3714  reg_errcode_t ret;
3715  re_string_t *pstr = mctx->input;
3716
3717  /* Double the lengthes of the buffers.  */
3718  ret = re_string_realloc_buffers (pstr, pstr->bufs_len * 2);
3719  if (BE (ret != REG_NOERROR, 0))
3720    return ret;
3721
3722  if (mctx->state_log != NULL)
3723    {
3724      /* And double the length of state_log.  */
3725      re_dfastate_t **new_array;
3726      new_array = re_realloc (mctx->state_log, re_dfastate_t *,
3727			      pstr->bufs_len * 2);
3728      if (BE (new_array == NULL, 0))
3729	return REG_ESPACE;
3730      mctx->state_log = new_array;
3731    }
3732
3733  /* Then reconstruct the buffers.  */
3734  if (pstr->icase)
3735    {
3736#ifdef RE_ENABLE_I18N
3737      if (re_mb_cur_max > 1)
3738	build_wcs_upper_buffer (pstr);
3739      else
3740#endif /* RE_ENABLE_I18N  */
3741	build_upper_buffer (pstr);
3742    }
3743  else
3744    {
3745#ifdef RE_ENABLE_I18N
3746      if (re_mb_cur_max > 1)
3747	build_wcs_buffer (pstr);
3748      else
3749#endif /* RE_ENABLE_I18N  */
3750	{
3751	  if (pstr->trans != NULL)
3752	    re_string_translate_buffer (pstr);
3753	  else
3754	    pstr->valid_len = pstr->bufs_len;
3755	}
3756    }
3757  return REG_NOERROR;
3758}
3759
3760
3761/* Functions for matching context.  */
3762
3763/* Initialize MCTX.  */
3764
3765static reg_errcode_t
3766match_ctx_init (mctx, eflags, input, n)
3767    re_match_context_t *mctx;
3768    int eflags, n;
3769    re_string_t *input;
3770{
3771  mctx->eflags = eflags;
3772  mctx->input = input;
3773  mctx->match_last = -1;
3774  if (n > 0)
3775    {
3776      mctx->bkref_ents = re_malloc (struct re_backref_cache_entry, n);
3777      mctx->sub_tops = re_malloc (re_sub_match_top_t *, n);
3778      if (BE (mctx->bkref_ents == NULL || mctx->sub_tops == NULL, 0))
3779	return REG_ESPACE;
3780    }
3781  else
3782    mctx->bkref_ents = NULL;
3783  mctx->nbkref_ents = 0;
3784  mctx->abkref_ents = n;
3785  mctx->max_mb_elem_len = 1;
3786  mctx->nsub_tops = 0;
3787  mctx->asub_tops = n;
3788  return REG_NOERROR;
3789}
3790
3791/* Clean the entries which depend on the current input in MCTX.
3792   This function must be invoked when the matcher changes the start index
3793   of the input, or changes the input string.  */
3794
3795static void
3796match_ctx_clean (mctx)
3797    re_match_context_t *mctx;
3798{
3799  match_ctx_free_subtops (mctx);
3800  mctx->nsub_tops = 0;
3801  mctx->nbkref_ents = 0;
3802}
3803
3804/* Free all the memory associated with MCTX.  */
3805
3806static void
3807match_ctx_free (mctx)
3808    re_match_context_t *mctx;
3809{
3810  match_ctx_free_subtops (mctx);
3811  re_free (mctx->sub_tops);
3812  re_free (mctx->bkref_ents);
3813}
3814
3815/* Free all the memory associated with MCTX->SUB_TOPS.  */
3816
3817static void
3818match_ctx_free_subtops (mctx)
3819     re_match_context_t *mctx;
3820{
3821  int st_idx;
3822  for (st_idx = 0; st_idx < mctx->nsub_tops; ++st_idx)
3823    {
3824      int sl_idx;
3825      re_sub_match_top_t *top = mctx->sub_tops[st_idx];
3826      for (sl_idx = 0; sl_idx < top->nlasts; ++sl_idx)
3827	{
3828	  re_sub_match_last_t *last = top->lasts[sl_idx];
3829	  re_free (last->path.array);
3830	  re_free (last);
3831	}
3832      re_free (top->lasts);
3833      if (top->path)
3834	{
3835	  re_free (top->path->array);
3836	  re_free (top->path);
3837	}
3838      free (top);
3839    }
3840}
3841
3842/* Add a new backreference entry to MCTX.
3843   Note that we assume that caller never call this function with duplicate
3844   entry, and call with STR_IDX which isn't smaller than any existing entry.
3845*/
3846
3847static reg_errcode_t
3848match_ctx_add_entry (mctx, node, str_idx, from, to)
3849     re_match_context_t *mctx;
3850     int node, str_idx, from, to;
3851{
3852  if (mctx->nbkref_ents >= mctx->abkref_ents)
3853    {
3854      struct re_backref_cache_entry* new_entry;
3855      new_entry = re_realloc (mctx->bkref_ents, struct re_backref_cache_entry,
3856			      mctx->abkref_ents * 2);
3857      if (BE (new_entry == NULL, 0))
3858	{
3859	  re_free (mctx->bkref_ents);
3860	  return REG_ESPACE;
3861	}
3862      mctx->bkref_ents = new_entry;
3863      memset (mctx->bkref_ents + mctx->nbkref_ents, '\0',
3864	      sizeof (struct re_backref_cache_entry) * mctx->abkref_ents);
3865      mctx->abkref_ents *= 2;
3866    }
3867  mctx->bkref_ents[mctx->nbkref_ents].node = node;
3868  mctx->bkref_ents[mctx->nbkref_ents].str_idx = str_idx;
3869  mctx->bkref_ents[mctx->nbkref_ents].subexp_from = from;
3870  mctx->bkref_ents[mctx->nbkref_ents].subexp_to = to;
3871  mctx->bkref_ents[mctx->nbkref_ents++].flag = 0;
3872  if (mctx->max_mb_elem_len < to - from)
3873    mctx->max_mb_elem_len = to - from;
3874  return REG_NOERROR;
3875}
3876
3877/* Search for the first entry which has the same str_idx.
3878   Note that MCTX->BKREF_ENTS is already sorted by MCTX->STR_IDX.  */
3879
3880static int
3881search_cur_bkref_entry (mctx, str_idx)
3882     re_match_context_t *mctx;
3883     int str_idx;
3884{
3885  int left, right, mid;
3886  right = mctx->nbkref_ents;
3887  for (left = 0; left < right;)
3888    {
3889      mid = (left + right) / 2;
3890      if (mctx->bkref_ents[mid].str_idx < str_idx)
3891	left = mid + 1;
3892      else
3893	right = mid;
3894    }
3895  return left;
3896}
3897
3898static void
3899match_ctx_clear_flag (mctx)
3900     re_match_context_t *mctx;
3901{
3902  int i;
3903  for (i = 0; i < mctx->nbkref_ents; ++i)
3904    {
3905      mctx->bkref_ents[i].flag = 0;
3906    }
3907}
3908
3909/* Register the node NODE, whose type is OP_OPEN_SUBEXP, and which matches
3910   at STR_IDX.  */
3911
3912static reg_errcode_t
3913match_ctx_add_subtop (mctx, node, str_idx)
3914     re_match_context_t *mctx;
3915     int node, str_idx;
3916{
3917#ifdef DEBUG
3918  assert (mctx->sub_tops != NULL);
3919  assert (mctx->asub_tops > 0);
3920#endif
3921  if (mctx->nsub_tops == mctx->asub_tops)
3922    {
3923      re_sub_match_top_t **new_array;
3924      mctx->asub_tops *= 2;
3925      new_array = re_realloc (mctx->sub_tops, re_sub_match_top_t *,
3926			      mctx->asub_tops);
3927      if (BE (new_array == NULL, 0))
3928	return REG_ESPACE;
3929      mctx->sub_tops = new_array;
3930    }
3931  mctx->sub_tops[mctx->nsub_tops] = calloc (1, sizeof (re_sub_match_top_t));
3932  if (mctx->sub_tops[mctx->nsub_tops] == NULL)
3933    return REG_ESPACE;
3934  mctx->sub_tops[mctx->nsub_tops]->node = node;
3935  mctx->sub_tops[mctx->nsub_tops++]->str_idx = str_idx;
3936  return REG_NOERROR;
3937}
3938
3939/* Register the node NODE, whose type is OP_CLOSE_SUBEXP, and which matches
3940   at STR_IDX, whose corresponding OP_OPEN_SUBEXP is SUB_TOP.  */
3941
3942static re_sub_match_last_t *
3943match_ctx_add_sublast (subtop, node, str_idx)
3944     re_sub_match_top_t *subtop;
3945     int node, str_idx;
3946{
3947  re_sub_match_last_t *new_entry;
3948  if (subtop->nlasts == subtop->alasts)
3949    {
3950      re_sub_match_last_t **new_array;
3951      subtop->alasts = 2 * subtop->alasts + 1;
3952      new_array = re_realloc (subtop->lasts, re_sub_match_last_t *,
3953			      subtop->alasts);
3954      if (BE (new_array == NULL, 0))
3955	return NULL;
3956      subtop->lasts = new_array;
3957    }
3958  new_entry = calloc (1, sizeof (re_sub_match_last_t));
3959  if (BE (new_entry == NULL, 0))
3960    return NULL;
3961  subtop->lasts[subtop->nlasts] = new_entry;
3962  new_entry->node = node;
3963  new_entry->str_idx = str_idx;
3964  ++subtop->nlasts;
3965  return new_entry;
3966}
3967
3968static void
3969sift_ctx_init (sctx, sifted_sts, limited_sts, last_node, last_str_idx,
3970	       check_subexp)
3971    re_sift_context_t *sctx;
3972    re_dfastate_t **sifted_sts, **limited_sts;
3973    int last_node, last_str_idx, check_subexp;
3974{
3975  sctx->sifted_states = sifted_sts;
3976  sctx->limited_states = limited_sts;
3977  sctx->last_node = last_node;
3978  sctx->last_str_idx = last_str_idx;
3979  sctx->check_subexp = check_subexp;
3980  sctx->cur_bkref = -1;
3981  sctx->cls_subexp_idx = -1;
3982  re_node_set_init_empty (&sctx->limits);
3983}
3984