1/* Extended regular expression matching and search library.
2   Copyright (C) 2002-2014 Free Software Foundation, Inc.
3   This file is part of the GNU C Library.
4   Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
5
6   The GNU C Library is free software; you can redistribute it and/or
7   modify it under the terms of the GNU General Public
8   License as published by the Free Software Foundation; either
9   version 3 of the License, or (at your option) any later version.
10
11   The GNU C Library is distributed in the hope that it will be useful,
12   but WITHOUT ANY WARRANTY; without even the implied warranty of
13   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14   General Public License for more details.
15
16   You should have received a copy of the GNU General Public
17   License along with the GNU C Library; if not, see
18   <http://www.gnu.org/licenses/>.  */
19
20static reg_errcode_t match_ctx_init (re_match_context_t *cache, int eflags,
21				     Idx n) internal_function;
22static void match_ctx_clean (re_match_context_t *mctx) internal_function;
23static void match_ctx_free (re_match_context_t *cache) internal_function;
24static reg_errcode_t match_ctx_add_entry (re_match_context_t *cache, Idx node,
25					  Idx str_idx, Idx from, Idx to)
26     internal_function;
27static Idx search_cur_bkref_entry (const re_match_context_t *mctx, Idx str_idx)
28     internal_function;
29static reg_errcode_t match_ctx_add_subtop (re_match_context_t *mctx, Idx node,
30					   Idx str_idx) internal_function;
31static re_sub_match_last_t * match_ctx_add_sublast (re_sub_match_top_t *subtop,
32						    Idx node, Idx str_idx)
33     internal_function;
34static void sift_ctx_init (re_sift_context_t *sctx, re_dfastate_t **sifted_sts,
35			   re_dfastate_t **limited_sts, Idx last_node,
36			   Idx last_str_idx)
37     internal_function;
38static reg_errcode_t re_search_internal (const regex_t *preg,
39					 const char *string, Idx length,
40					 Idx start, Idx last_start, Idx stop,
41					 size_t nmatch, regmatch_t pmatch[],
42					 int eflags) internal_function;
43static regoff_t re_search_2_stub (struct re_pattern_buffer *bufp,
44				  const char *string1, Idx length1,
45				  const char *string2, Idx length2,
46				  Idx start, regoff_t range,
47				  struct re_registers *regs,
48				  Idx stop, bool ret_len) internal_function;
49static regoff_t re_search_stub (struct re_pattern_buffer *bufp,
50				const char *string, Idx length, Idx start,
51				regoff_t range, Idx stop,
52				struct re_registers *regs,
53				bool ret_len) internal_function;
54static unsigned re_copy_regs (struct re_registers *regs, regmatch_t *pmatch,
55                              Idx nregs, int regs_allocated) internal_function;
56static reg_errcode_t prune_impossible_nodes (re_match_context_t *mctx)
57     internal_function;
58static Idx check_matching (re_match_context_t *mctx, bool fl_longest_match,
59			   Idx *p_match_first) internal_function;
60static Idx check_halt_state_context (const re_match_context_t *mctx,
61				     const re_dfastate_t *state, Idx idx)
62     internal_function;
63static void update_regs (const re_dfa_t *dfa, regmatch_t *pmatch,
64			 regmatch_t *prev_idx_match, Idx cur_node,
65			 Idx cur_idx, Idx nmatch) internal_function;
66static reg_errcode_t push_fail_stack (struct re_fail_stack_t *fs,
67				      Idx str_idx, Idx dest_node, Idx nregs,
68				      regmatch_t *regs,
69				      re_node_set *eps_via_nodes)
70     internal_function;
71static reg_errcode_t set_regs (const regex_t *preg,
72			       const re_match_context_t *mctx,
73			       size_t nmatch, regmatch_t *pmatch,
74			       bool fl_backtrack) internal_function;
75static reg_errcode_t free_fail_stack_return (struct re_fail_stack_t *fs)
76     internal_function;
77
78#ifdef RE_ENABLE_I18N
79static int sift_states_iter_mb (const re_match_context_t *mctx,
80				re_sift_context_t *sctx,
81				Idx node_idx, Idx str_idx, Idx max_str_idx)
82     internal_function;
83#endif /* RE_ENABLE_I18N */
84static reg_errcode_t sift_states_backward (const re_match_context_t *mctx,
85					   re_sift_context_t *sctx)
86     internal_function;
87static reg_errcode_t build_sifted_states (const re_match_context_t *mctx,
88					  re_sift_context_t *sctx, Idx str_idx,
89					  re_node_set *cur_dest)
90     internal_function;
91static reg_errcode_t update_cur_sifted_state (const re_match_context_t *mctx,
92					      re_sift_context_t *sctx,
93					      Idx str_idx,
94					      re_node_set *dest_nodes)
95     internal_function;
96static reg_errcode_t add_epsilon_src_nodes (const re_dfa_t *dfa,
97					    re_node_set *dest_nodes,
98					    const re_node_set *candidates)
99     internal_function;
100static bool check_dst_limits (const re_match_context_t *mctx,
101			      const re_node_set *limits,
102			      Idx dst_node, Idx dst_idx, Idx src_node,
103			      Idx src_idx) internal_function;
104static int check_dst_limits_calc_pos_1 (const re_match_context_t *mctx,
105					int boundaries, Idx subexp_idx,
106					Idx from_node, Idx bkref_idx)
107     internal_function;
108static int check_dst_limits_calc_pos (const re_match_context_t *mctx,
109				      Idx limit, Idx subexp_idx,
110				      Idx node, Idx str_idx,
111				      Idx bkref_idx) internal_function;
112static reg_errcode_t check_subexp_limits (const re_dfa_t *dfa,
113					  re_node_set *dest_nodes,
114					  const re_node_set *candidates,
115					  re_node_set *limits,
116					  struct re_backref_cache_entry *bkref_ents,
117					  Idx str_idx) internal_function;
118static reg_errcode_t sift_states_bkref (const re_match_context_t *mctx,
119					re_sift_context_t *sctx,
120					Idx str_idx, const re_node_set *candidates)
121     internal_function;
122static reg_errcode_t merge_state_array (const re_dfa_t *dfa,
123					re_dfastate_t **dst,
124					re_dfastate_t **src, Idx num)
125     internal_function;
126static re_dfastate_t *find_recover_state (reg_errcode_t *err,
127					 re_match_context_t *mctx) internal_function;
128static re_dfastate_t *transit_state (reg_errcode_t *err,
129				     re_match_context_t *mctx,
130				     re_dfastate_t *state) internal_function;
131static re_dfastate_t *merge_state_with_log (reg_errcode_t *err,
132					    re_match_context_t *mctx,
133					    re_dfastate_t *next_state)
134     internal_function;
135static reg_errcode_t check_subexp_matching_top (re_match_context_t *mctx,
136						re_node_set *cur_nodes,
137						Idx str_idx) internal_function;
138#if 0
139static re_dfastate_t *transit_state_sb (reg_errcode_t *err,
140					re_match_context_t *mctx,
141					re_dfastate_t *pstate)
142     internal_function;
143#endif
144#ifdef RE_ENABLE_I18N
145static reg_errcode_t transit_state_mb (re_match_context_t *mctx,
146				       re_dfastate_t *pstate)
147     internal_function;
148#endif /* RE_ENABLE_I18N */
149static reg_errcode_t transit_state_bkref (re_match_context_t *mctx,
150					  const re_node_set *nodes)
151     internal_function;
152static reg_errcode_t get_subexp (re_match_context_t *mctx,
153				 Idx bkref_node, Idx bkref_str_idx)
154     internal_function;
155static reg_errcode_t get_subexp_sub (re_match_context_t *mctx,
156				     const re_sub_match_top_t *sub_top,
157				     re_sub_match_last_t *sub_last,
158				     Idx bkref_node, Idx bkref_str)
159     internal_function;
160static Idx find_subexp_node (const re_dfa_t *dfa, const re_node_set *nodes,
161			     Idx subexp_idx, int type) internal_function;
162static reg_errcode_t check_arrival (re_match_context_t *mctx,
163				    state_array_t *path, Idx top_node,
164				    Idx top_str, Idx last_node, Idx last_str,
165				    int type) internal_function;
166static reg_errcode_t check_arrival_add_next_nodes (re_match_context_t *mctx,
167						   Idx str_idx,
168						   re_node_set *cur_nodes,
169						   re_node_set *next_nodes)
170     internal_function;
171static reg_errcode_t check_arrival_expand_ecl (const re_dfa_t *dfa,
172					       re_node_set *cur_nodes,
173					       Idx ex_subexp, int type)
174     internal_function;
175static reg_errcode_t check_arrival_expand_ecl_sub (const re_dfa_t *dfa,
176						   re_node_set *dst_nodes,
177						   Idx target, Idx ex_subexp,
178						   int type) internal_function;
179static reg_errcode_t expand_bkref_cache (re_match_context_t *mctx,
180					 re_node_set *cur_nodes, Idx cur_str,
181					 Idx subexp_num, int type)
182     internal_function;
183static bool build_trtable (const re_dfa_t *dfa,
184			   re_dfastate_t *state) internal_function;
185#ifdef RE_ENABLE_I18N
186static int check_node_accept_bytes (const re_dfa_t *dfa, Idx node_idx,
187				    const re_string_t *input, Idx idx)
188     internal_function;
189# ifdef _LIBC
190static unsigned int find_collation_sequence_value (const unsigned char *mbs,
191						   size_t name_len)
192     internal_function;
193# endif /* _LIBC */
194#endif /* RE_ENABLE_I18N */
195static Idx group_nodes_into_DFAstates (const re_dfa_t *dfa,
196				       const re_dfastate_t *state,
197				       re_node_set *states_node,
198				       bitset_t *states_ch) internal_function;
199static bool check_node_accept (const re_match_context_t *mctx,
200			       const re_token_t *node, Idx idx)
201     internal_function;
202static reg_errcode_t extend_buffers (re_match_context_t *mctx, int min_len)
203     internal_function;
204
205/* Entry point for POSIX code.  */
206
207/* regexec searches for a given pattern, specified by PREG, in the
208   string STRING.
209
210   If NMATCH is zero or REG_NOSUB was set in the cflags argument to
211   'regcomp', we ignore PMATCH.  Otherwise, we assume PMATCH has at
212   least NMATCH elements, and we set them to the offsets of the
213   corresponding matched substrings.
214
215   EFLAGS specifies "execution flags" which affect matching: if
216   REG_NOTBOL is set, then ^ does not match at the beginning of the
217   string; if REG_NOTEOL is set, then $ does not match at the end.
218
219   We return 0 if we find a match and REG_NOMATCH if not.  */
220
221int
222regexec (preg, string, nmatch, pmatch, eflags)
223    const regex_t *_Restrict_ preg;
224    const char *_Restrict_ string;
225    size_t nmatch;
226    regmatch_t pmatch[_Restrict_arr_];
227    int eflags;
228{
229  reg_errcode_t err;
230  Idx start, length;
231  re_dfa_t *dfa = preg->buffer;
232
233  if (eflags & ~(REG_NOTBOL | REG_NOTEOL | REG_STARTEND))
234    return REG_BADPAT;
235
236  if (eflags & REG_STARTEND)
237    {
238      start = pmatch[0].rm_so;
239      length = pmatch[0].rm_eo;
240    }
241  else
242    {
243      start = 0;
244      length = strlen (string);
245    }
246
247  lock_lock (dfa->lock);
248  if (preg->no_sub)
249    err = re_search_internal (preg, string, length, start, length,
250			      length, 0, NULL, eflags);
251  else
252    err = re_search_internal (preg, string, length, start, length,
253			      length, nmatch, pmatch, eflags);
254  lock_unlock (dfa->lock);
255  return err != REG_NOERROR;
256}
257
258#ifdef _LIBC
259# include <shlib-compat.h>
260versioned_symbol (libc, __regexec, regexec, GLIBC_2_3_4);
261
262# if SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_3_4)
263__typeof__ (__regexec) __compat_regexec;
264
265int
266attribute_compat_text_section
267__compat_regexec (const regex_t *_Restrict_ preg,
268		  const char *_Restrict_ string, size_t nmatch,
269		  regmatch_t pmatch[], int eflags)
270{
271  return regexec (preg, string, nmatch, pmatch,
272		  eflags & (REG_NOTBOL | REG_NOTEOL));
273}
274compat_symbol (libc, __compat_regexec, regexec, GLIBC_2_0);
275# endif
276#endif
277
278/* Entry points for GNU code.  */
279
280/* re_match, re_search, re_match_2, re_search_2
281
282   The former two functions operate on STRING with length LENGTH,
283   while the later two operate on concatenation of STRING1 and STRING2
284   with lengths LENGTH1 and LENGTH2, respectively.
285
286   re_match() matches the compiled pattern in BUFP against the string,
287   starting at index START.
288
289   re_search() first tries matching at index START, then it tries to match
290   starting from index START + 1, and so on.  The last start position tried
291   is START + RANGE.  (Thus RANGE = 0 forces re_search to operate the same
292   way as re_match().)
293
294   The parameter STOP of re_{match,search}_2 specifies that no match exceeding
295   the first STOP characters of the concatenation of the strings should be
296   concerned.
297
298   If REGS is not NULL, and BUFP->no_sub is not set, the offsets of the match
299   and all groups is stored in REGS.  (For the "_2" variants, the offsets are
300   computed relative to the concatenation, not relative to the individual
301   strings.)
302
303   On success, re_match* functions return the length of the match, re_search*
304   return the position of the start of the match.  Return value -1 means no
305   match was found and -2 indicates an internal error.  */
306
307regoff_t
308re_match (bufp, string, length, start, regs)
309    struct re_pattern_buffer *bufp;
310    const char *string;
311    Idx length, start;
312    struct re_registers *regs;
313{
314  return re_search_stub (bufp, string, length, start, 0, length, regs, true);
315}
316#ifdef _LIBC
317weak_alias (__re_match, re_match)
318#endif
319
320regoff_t
321re_search (bufp, string, length, start, range, regs)
322    struct re_pattern_buffer *bufp;
323    const char *string;
324    Idx length, start;
325    regoff_t range;
326    struct re_registers *regs;
327{
328  return re_search_stub (bufp, string, length, start, range, length, regs,
329			 false);
330}
331#ifdef _LIBC
332weak_alias (__re_search, re_search)
333#endif
334
335regoff_t
336re_match_2 (bufp, string1, length1, string2, length2, start, regs, stop)
337    struct re_pattern_buffer *bufp;
338    const char *string1, *string2;
339    Idx length1, length2, start, stop;
340    struct re_registers *regs;
341{
342  return re_search_2_stub (bufp, string1, length1, string2, length2,
343			   start, 0, regs, stop, true);
344}
345#ifdef _LIBC
346weak_alias (__re_match_2, re_match_2)
347#endif
348
349regoff_t
350re_search_2 (bufp, string1, length1, string2, length2, start, range, regs, stop)
351    struct re_pattern_buffer *bufp;
352    const char *string1, *string2;
353    Idx length1, length2, start, stop;
354    regoff_t range;
355    struct re_registers *regs;
356{
357  return re_search_2_stub (bufp, string1, length1, string2, length2,
358			   start, range, regs, stop, false);
359}
360#ifdef _LIBC
361weak_alias (__re_search_2, re_search_2)
362#endif
363
364static regoff_t
365re_search_2_stub (struct re_pattern_buffer *bufp,
366		  const char *string1, Idx length1,
367		  const char *string2, Idx length2,
368		  Idx start, regoff_t range, struct re_registers *regs,
369		  Idx stop, bool ret_len)
370{
371  const char *str;
372  regoff_t rval;
373  Idx len = length1 + length2;
374  char *s = NULL;
375
376  if (BE (length1 < 0 || length2 < 0 || stop < 0 || len < length1, 0))
377    return -2;
378
379  /* Concatenate the strings.  */
380  if (length2 > 0)
381    if (length1 > 0)
382      {
383	s = re_malloc (char, len);
384
385	if (BE (s == NULL, 0))
386	  return -2;
387#ifdef _LIBC
388	memcpy (__mempcpy (s, string1, length1), string2, length2);
389#else
390	memcpy (s, string1, length1);
391	memcpy (s + length1, string2, length2);
392#endif
393	str = s;
394      }
395    else
396      str = string2;
397  else
398    str = string1;
399
400  rval = re_search_stub (bufp, str, len, start, range, stop, regs,
401			 ret_len);
402  re_free (s);
403  return rval;
404}
405
406/* The parameters have the same meaning as those of re_search.
407   Additional parameters:
408   If RET_LEN is true the length of the match is returned (re_match style);
409   otherwise the position of the match is returned.  */
410
411static regoff_t
412re_search_stub (struct re_pattern_buffer *bufp,
413		const char *string, Idx length,
414		Idx start, regoff_t range, Idx stop, struct re_registers *regs,
415		bool ret_len)
416{
417  reg_errcode_t result;
418  regmatch_t *pmatch;
419  Idx nregs;
420  regoff_t rval;
421  int eflags = 0;
422  re_dfa_t *dfa = bufp->buffer;
423  Idx last_start = start + range;
424
425  /* Check for out-of-range.  */
426  if (BE (start < 0 || start > length, 0))
427    return -1;
428  if (BE (length < last_start || (0 <= range && last_start < start), 0))
429    last_start = length;
430  else if (BE (last_start < 0 || (range < 0 && start <= last_start), 0))
431    last_start = 0;
432
433  lock_lock (dfa->lock);
434
435  eflags |= (bufp->not_bol) ? REG_NOTBOL : 0;
436  eflags |= (bufp->not_eol) ? REG_NOTEOL : 0;
437
438  /* Compile fastmap if we haven't yet.  */
439  if (start < last_start && bufp->fastmap != NULL && !bufp->fastmap_accurate)
440    re_compile_fastmap (bufp);
441
442  if (BE (bufp->no_sub, 0))
443    regs = NULL;
444
445  /* We need at least 1 register.  */
446  if (regs == NULL)
447    nregs = 1;
448  else if (BE (bufp->regs_allocated == REGS_FIXED
449	       && regs->num_regs <= bufp->re_nsub, 0))
450    {
451      nregs = regs->num_regs;
452      if (BE (nregs < 1, 0))
453	{
454	  /* Nothing can be copied to regs.  */
455	  regs = NULL;
456	  nregs = 1;
457	}
458    }
459  else
460    nregs = bufp->re_nsub + 1;
461  pmatch = re_malloc (regmatch_t, nregs);
462  if (BE (pmatch == NULL, 0))
463    {
464      rval = -2;
465      goto out;
466    }
467
468  result = re_search_internal (bufp, string, length, start, last_start, stop,
469			       nregs, pmatch, eflags);
470
471  rval = 0;
472
473  /* I hope we needn't fill their regs with -1's when no match was found.  */
474  if (result != REG_NOERROR)
475    rval = result == REG_NOMATCH ? -1 : -2;
476  else if (regs != NULL)
477    {
478      /* If caller wants register contents data back, copy them.  */
479      bufp->regs_allocated = re_copy_regs (regs, pmatch, nregs,
480					   bufp->regs_allocated);
481      if (BE (bufp->regs_allocated == REGS_UNALLOCATED, 0))
482	rval = -2;
483    }
484
485  if (BE (rval == 0, 1))
486    {
487      if (ret_len)
488	{
489	  assert (pmatch[0].rm_so == start);
490	  rval = pmatch[0].rm_eo - start;
491	}
492      else
493	rval = pmatch[0].rm_so;
494    }
495  re_free (pmatch);
496 out:
497  lock_unlock (dfa->lock);
498  return rval;
499}
500
501static unsigned
502re_copy_regs (struct re_registers *regs, regmatch_t *pmatch, Idx nregs,
503	      int regs_allocated)
504{
505  int rval = REGS_REALLOCATE;
506  Idx i;
507  Idx need_regs = nregs + 1;
508  /* We need one extra element beyond 'num_regs' for the '-1' marker GNU code
509     uses.  */
510
511  /* Have the register data arrays been allocated?  */
512  if (regs_allocated == REGS_UNALLOCATED)
513    { /* No.  So allocate them with malloc.  */
514      regs->start = re_malloc (regoff_t, need_regs);
515      if (BE (regs->start == NULL, 0))
516	return REGS_UNALLOCATED;
517      regs->end = re_malloc (regoff_t, need_regs);
518      if (BE (regs->end == NULL, 0))
519	{
520	  re_free (regs->start);
521	  return REGS_UNALLOCATED;
522	}
523      regs->num_regs = need_regs;
524    }
525  else if (regs_allocated == REGS_REALLOCATE)
526    { /* Yes.  If we need more elements than were already
527	 allocated, reallocate them.  If we need fewer, just
528	 leave it alone.  */
529      if (BE (need_regs > regs->num_regs, 0))
530	{
531	  regoff_t *new_start = re_realloc (regs->start, regoff_t, need_regs);
532	  regoff_t *new_end;
533	  if (BE (new_start == NULL, 0))
534	    return REGS_UNALLOCATED;
535	  new_end = re_realloc (regs->end, regoff_t, need_regs);
536	  if (BE (new_end == NULL, 0))
537	    {
538	      re_free (new_start);
539	      return REGS_UNALLOCATED;
540	    }
541	  regs->start = new_start;
542	  regs->end = new_end;
543	  regs->num_regs = need_regs;
544	}
545    }
546  else
547    {
548      assert (regs_allocated == REGS_FIXED);
549      /* This function may not be called with REGS_FIXED and nregs too big.  */
550      assert (regs->num_regs >= nregs);
551      rval = REGS_FIXED;
552    }
553
554  /* Copy the regs.  */
555  for (i = 0; i < nregs; ++i)
556    {
557      regs->start[i] = pmatch[i].rm_so;
558      regs->end[i] = pmatch[i].rm_eo;
559    }
560  for ( ; i < regs->num_regs; ++i)
561    regs->start[i] = regs->end[i] = -1;
562
563  return rval;
564}
565
566/* Set REGS to hold NUM_REGS registers, storing them in STARTS and
567   ENDS.  Subsequent matches using PATTERN_BUFFER and REGS will use
568   this memory for recording register information.  STARTS and ENDS
569   must be allocated using the malloc library routine, and must each
570   be at least NUM_REGS * sizeof (regoff_t) bytes long.
571
572   If NUM_REGS == 0, then subsequent matches should allocate their own
573   register data.
574
575   Unless this function is called, the first search or match using
576   PATTERN_BUFFER will allocate its own register data, without
577   freeing the old data.  */
578
579void
580re_set_registers (bufp, regs, num_regs, starts, ends)
581    struct re_pattern_buffer *bufp;
582    struct re_registers *regs;
583    __re_size_t num_regs;
584    regoff_t *starts, *ends;
585{
586  if (num_regs)
587    {
588      bufp->regs_allocated = REGS_REALLOCATE;
589      regs->num_regs = num_regs;
590      regs->start = starts;
591      regs->end = ends;
592    }
593  else
594    {
595      bufp->regs_allocated = REGS_UNALLOCATED;
596      regs->num_regs = 0;
597      regs->start = regs->end = NULL;
598    }
599}
600#ifdef _LIBC
601weak_alias (__re_set_registers, re_set_registers)
602#endif
603
604/* Entry points compatible with 4.2 BSD regex library.  We don't define
605   them unless specifically requested.  */
606
607#if defined _REGEX_RE_COMP || defined _LIBC
608int
609# ifdef _LIBC
610weak_function
611# endif
612re_exec (s)
613     const char *s;
614{
615  return 0 == regexec (&re_comp_buf, s, 0, NULL, 0);
616}
617#endif /* _REGEX_RE_COMP */
618
619/* Internal entry point.  */
620
621/* Searches for a compiled pattern PREG in the string STRING, whose
622   length is LENGTH.  NMATCH, PMATCH, and EFLAGS have the same
623   meaning as with regexec.  LAST_START is START + RANGE, where
624   START and RANGE have the same meaning as with re_search.
625   Return REG_NOERROR if we find a match, and REG_NOMATCH if not,
626   otherwise return the error code.
627   Note: We assume front end functions already check ranges.
628   (0 <= LAST_START && LAST_START <= LENGTH)  */
629
630static reg_errcode_t
631__attribute_warn_unused_result__
632re_search_internal (const regex_t *preg,
633		    const char *string, Idx length,
634		    Idx start, Idx last_start, Idx stop,
635		    size_t nmatch, regmatch_t pmatch[],
636		    int eflags)
637{
638  reg_errcode_t err;
639  const re_dfa_t *dfa = preg->buffer;
640  Idx left_lim, right_lim;
641  int incr;
642  bool fl_longest_match;
643  int match_kind;
644  Idx match_first;
645  Idx match_last = REG_MISSING;
646  Idx extra_nmatch;
647  bool sb;
648  int ch;
649#if defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L)
650  re_match_context_t mctx = { .dfa = dfa };
651#else
652  re_match_context_t mctx;
653#endif
654  char *fastmap = ((preg->fastmap != NULL && preg->fastmap_accurate
655		    && start != last_start && !preg->can_be_null)
656		   ? preg->fastmap : NULL);
657  RE_TRANSLATE_TYPE t = preg->translate;
658
659#if !(defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L))
660  memset (&mctx, '\0', sizeof (re_match_context_t));
661  mctx.dfa = dfa;
662#endif
663
664  extra_nmatch = (nmatch > preg->re_nsub) ? nmatch - (preg->re_nsub + 1) : 0;
665  nmatch -= extra_nmatch;
666
667  /* Check if the DFA haven't been compiled.  */
668  if (BE (preg->used == 0 || dfa->init_state == NULL
669	  || dfa->init_state_word == NULL || dfa->init_state_nl == NULL
670	  || dfa->init_state_begbuf == NULL, 0))
671    return REG_NOMATCH;
672
673#ifdef DEBUG
674  /* We assume front-end functions already check them.  */
675  assert (0 <= last_start && last_start <= length);
676#endif
677
678  /* If initial states with non-begbuf contexts have no elements,
679     the regex must be anchored.  If preg->newline_anchor is set,
680     we'll never use init_state_nl, so do not check it.  */
681  if (dfa->init_state->nodes.nelem == 0
682      && dfa->init_state_word->nodes.nelem == 0
683      && (dfa->init_state_nl->nodes.nelem == 0
684	  || !preg->newline_anchor))
685    {
686      if (start != 0 && last_start != 0)
687        return REG_NOMATCH;
688      start = last_start = 0;
689    }
690
691  /* We must check the longest matching, if nmatch > 0.  */
692  fl_longest_match = (nmatch != 0 || dfa->nbackref);
693
694  err = re_string_allocate (&mctx.input, string, length, dfa->nodes_len + 1,
695			    preg->translate, (preg->syntax & RE_ICASE) != 0,
696			    dfa);
697  if (BE (err != REG_NOERROR, 0))
698    goto free_return;
699  mctx.input.stop = stop;
700  mctx.input.raw_stop = stop;
701  mctx.input.newline_anchor = preg->newline_anchor;
702
703  err = match_ctx_init (&mctx, eflags, dfa->nbackref * 2);
704  if (BE (err != REG_NOERROR, 0))
705    goto free_return;
706
707  /* We will log all the DFA states through which the dfa pass,
708     if nmatch > 1, or this dfa has "multibyte node", which is a
709     back-reference or a node which can accept multibyte character or
710     multi character collating element.  */
711  if (nmatch > 1 || dfa->has_mb_node)
712    {
713      /* Avoid overflow.  */
714      if (BE ((MIN (IDX_MAX, SIZE_MAX / sizeof (re_dfastate_t *))
715               <= mctx.input.bufs_len), 0))
716	{
717	  err = REG_ESPACE;
718	  goto free_return;
719	}
720
721      mctx.state_log = re_malloc (re_dfastate_t *, mctx.input.bufs_len + 1);
722      if (BE (mctx.state_log == NULL, 0))
723	{
724	  err = REG_ESPACE;
725	  goto free_return;
726	}
727    }
728  else
729    mctx.state_log = NULL;
730
731  match_first = start;
732  mctx.input.tip_context = (eflags & REG_NOTBOL) ? CONTEXT_BEGBUF
733			   : CONTEXT_NEWLINE | CONTEXT_BEGBUF;
734
735  /* Check incrementally whether the input string matches.  */
736  incr = (last_start < start) ? -1 : 1;
737  left_lim = (last_start < start) ? last_start : start;
738  right_lim = (last_start < start) ? start : last_start;
739  sb = dfa->mb_cur_max == 1;
740  match_kind =
741    (fastmap
742     ? ((sb || !(preg->syntax & RE_ICASE || t) ? 4 : 0)
743	| (start <= last_start ? 2 : 0)
744	| (t != NULL ? 1 : 0))
745     : 8);
746
747  for (;; match_first += incr)
748    {
749      err = REG_NOMATCH;
750      if (match_first < left_lim || right_lim < match_first)
751	goto free_return;
752
753      /* Advance as rapidly as possible through the string, until we
754	 find a plausible place to start matching.  This may be done
755	 with varying efficiency, so there are various possibilities:
756	 only the most common of them are specialized, in order to
757	 save on code size.  We use a switch statement for speed.  */
758      switch (match_kind)
759	{
760	case 8:
761	  /* No fastmap.  */
762	  break;
763
764	case 7:
765	  /* Fastmap with single-byte translation, match forward.  */
766	  while (BE (match_first < right_lim, 1)
767		 && !fastmap[t[(unsigned char) string[match_first]]])
768	    ++match_first;
769	  goto forward_match_found_start_or_reached_end;
770
771	case 6:
772	  /* Fastmap without translation, match forward.  */
773	  while (BE (match_first < right_lim, 1)
774		 && !fastmap[(unsigned char) string[match_first]])
775	    ++match_first;
776
777	forward_match_found_start_or_reached_end:
778	  if (BE (match_first == right_lim, 0))
779	    {
780	      ch = match_first >= length
781		       ? 0 : (unsigned char) string[match_first];
782	      if (!fastmap[t ? t[ch] : ch])
783		goto free_return;
784	    }
785	  break;
786
787	case 4:
788	case 5:
789	  /* Fastmap without multi-byte translation, match backwards.  */
790	  while (match_first >= left_lim)
791	    {
792	      ch = match_first >= length
793		       ? 0 : (unsigned char) string[match_first];
794	      if (fastmap[t ? t[ch] : ch])
795		break;
796	      --match_first;
797	    }
798	  if (match_first < left_lim)
799	    goto free_return;
800	  break;
801
802	default:
803	  /* In this case, we can't determine easily the current byte,
804	     since it might be a component byte of a multibyte
805	     character.  Then we use the constructed buffer instead.  */
806	  for (;;)
807	    {
808	      /* If MATCH_FIRST is out of the valid range, reconstruct the
809		 buffers.  */
810	      __re_size_t offset = match_first - mctx.input.raw_mbs_idx;
811	      if (BE (offset >= (__re_size_t) mctx.input.valid_raw_len, 0))
812		{
813		  err = re_string_reconstruct (&mctx.input, match_first,
814					       eflags);
815		  if (BE (err != REG_NOERROR, 0))
816		    goto free_return;
817
818		  offset = match_first - mctx.input.raw_mbs_idx;
819		}
820	      /* If MATCH_FIRST is out of the buffer, leave it as '\0'.
821		 Note that MATCH_FIRST must not be smaller than 0.  */
822	      ch = (match_first >= length
823		    ? 0 : re_string_byte_at (&mctx.input, offset));
824	      if (fastmap[ch])
825		break;
826	      match_first += incr;
827	      if (match_first < left_lim || match_first > right_lim)
828		{
829		  err = REG_NOMATCH;
830		  goto free_return;
831		}
832	    }
833	  break;
834	}
835
836      /* Reconstruct the buffers so that the matcher can assume that
837	 the matching starts from the beginning of the buffer.  */
838      err = re_string_reconstruct (&mctx.input, match_first, eflags);
839      if (BE (err != REG_NOERROR, 0))
840	goto free_return;
841
842#ifdef RE_ENABLE_I18N
843     /* Don't consider this char as a possible match start if it part,
844	yet isn't the head, of a multibyte character.  */
845      if (!sb && !re_string_first_byte (&mctx.input, 0))
846	continue;
847#endif
848
849      /* It seems to be appropriate one, then use the matcher.  */
850      /* We assume that the matching starts from 0.  */
851      mctx.state_log_top = mctx.nbkref_ents = mctx.max_mb_elem_len = 0;
852      match_last = check_matching (&mctx, fl_longest_match,
853				   start <= last_start ? &match_first : NULL);
854      if (match_last != REG_MISSING)
855	{
856	  if (BE (match_last == REG_ERROR, 0))
857	    {
858	      err = REG_ESPACE;
859	      goto free_return;
860	    }
861	  else
862	    {
863	      mctx.match_last = match_last;
864	      if ((!preg->no_sub && nmatch > 1) || dfa->nbackref)
865		{
866		  re_dfastate_t *pstate = mctx.state_log[match_last];
867		  mctx.last_node = check_halt_state_context (&mctx, pstate,
868							     match_last);
869		}
870	      if ((!preg->no_sub && nmatch > 1 && dfa->has_plural_match)
871		  || dfa->nbackref)
872		{
873		  err = prune_impossible_nodes (&mctx);
874		  if (err == REG_NOERROR)
875		    break;
876		  if (BE (err != REG_NOMATCH, 0))
877		    goto free_return;
878		  match_last = REG_MISSING;
879		}
880	      else
881		break; /* We found a match.  */
882	    }
883	}
884
885      match_ctx_clean (&mctx);
886    }
887
888#ifdef DEBUG
889  assert (match_last != REG_MISSING);
890  assert (err == REG_NOERROR);
891#endif
892
893  /* Set pmatch[] if we need.  */
894  if (nmatch > 0)
895    {
896      Idx reg_idx;
897
898      /* Initialize registers.  */
899      for (reg_idx = 1; reg_idx < nmatch; ++reg_idx)
900	pmatch[reg_idx].rm_so = pmatch[reg_idx].rm_eo = -1;
901
902      /* Set the points where matching start/end.  */
903      pmatch[0].rm_so = 0;
904      pmatch[0].rm_eo = mctx.match_last;
905      /* FIXME: This function should fail if mctx.match_last exceeds
906	 the maximum possible regoff_t value.  We need a new error
907	 code REG_OVERFLOW.  */
908
909      if (!preg->no_sub && nmatch > 1)
910	{
911	  err = set_regs (preg, &mctx, nmatch, pmatch,
912			  dfa->has_plural_match && dfa->nbackref > 0);
913	  if (BE (err != REG_NOERROR, 0))
914	    goto free_return;
915	}
916
917      /* At last, add the offset to each register, since we slid
918	 the buffers so that we could assume that the matching starts
919	 from 0.  */
920      for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
921	if (pmatch[reg_idx].rm_so != -1)
922	  {
923#ifdef RE_ENABLE_I18N
924	    if (BE (mctx.input.offsets_needed != 0, 0))
925	      {
926		pmatch[reg_idx].rm_so =
927		  (pmatch[reg_idx].rm_so == mctx.input.valid_len
928		   ? mctx.input.valid_raw_len
929		   : mctx.input.offsets[pmatch[reg_idx].rm_so]);
930		pmatch[reg_idx].rm_eo =
931		  (pmatch[reg_idx].rm_eo == mctx.input.valid_len
932		   ? mctx.input.valid_raw_len
933		   : mctx.input.offsets[pmatch[reg_idx].rm_eo]);
934	      }
935#else
936	    assert (mctx.input.offsets_needed == 0);
937#endif
938	    pmatch[reg_idx].rm_so += match_first;
939	    pmatch[reg_idx].rm_eo += match_first;
940	  }
941      for (reg_idx = 0; reg_idx < extra_nmatch; ++reg_idx)
942	{
943	  pmatch[nmatch + reg_idx].rm_so = -1;
944	  pmatch[nmatch + reg_idx].rm_eo = -1;
945	}
946
947      if (dfa->subexp_map)
948	for (reg_idx = 0; reg_idx + 1 < nmatch; reg_idx++)
949	  if (dfa->subexp_map[reg_idx] != reg_idx)
950	    {
951	      pmatch[reg_idx + 1].rm_so
952		= pmatch[dfa->subexp_map[reg_idx] + 1].rm_so;
953	      pmatch[reg_idx + 1].rm_eo
954		= pmatch[dfa->subexp_map[reg_idx] + 1].rm_eo;
955	    }
956    }
957
958 free_return:
959  re_free (mctx.state_log);
960  if (dfa->nbackref)
961    match_ctx_free (&mctx);
962  re_string_destruct (&mctx.input);
963  return err;
964}
965
966static reg_errcode_t
967__attribute_warn_unused_result__
968prune_impossible_nodes (re_match_context_t *mctx)
969{
970  const re_dfa_t *const dfa = mctx->dfa;
971  Idx halt_node, match_last;
972  reg_errcode_t ret;
973  re_dfastate_t **sifted_states;
974  re_dfastate_t **lim_states = NULL;
975  re_sift_context_t sctx;
976#ifdef DEBUG
977  assert (mctx->state_log != NULL);
978#endif
979  match_last = mctx->match_last;
980  halt_node = mctx->last_node;
981
982  /* Avoid overflow.  */
983  if (BE (MIN (IDX_MAX, SIZE_MAX / sizeof (re_dfastate_t *)) <= match_last, 0))
984    return REG_ESPACE;
985
986  sifted_states = re_malloc (re_dfastate_t *, match_last + 1);
987  if (BE (sifted_states == NULL, 0))
988    {
989      ret = REG_ESPACE;
990      goto free_return;
991    }
992  if (dfa->nbackref)
993    {
994      lim_states = re_malloc (re_dfastate_t *, match_last + 1);
995      if (BE (lim_states == NULL, 0))
996	{
997	  ret = REG_ESPACE;
998	  goto free_return;
999	}
1000      while (1)
1001	{
1002	  memset (lim_states, '\0',
1003		  sizeof (re_dfastate_t *) * (match_last + 1));
1004	  sift_ctx_init (&sctx, sifted_states, lim_states, halt_node,
1005			 match_last);
1006	  ret = sift_states_backward (mctx, &sctx);
1007	  re_node_set_free (&sctx.limits);
1008	  if (BE (ret != REG_NOERROR, 0))
1009	      goto free_return;
1010	  if (sifted_states[0] != NULL || lim_states[0] != NULL)
1011	    break;
1012	  do
1013	    {
1014	      --match_last;
1015	      if (! REG_VALID_INDEX (match_last))
1016		{
1017		  ret = REG_NOMATCH;
1018		  goto free_return;
1019		}
1020	    } while (mctx->state_log[match_last] == NULL
1021		     || !mctx->state_log[match_last]->halt);
1022	  halt_node = check_halt_state_context (mctx,
1023						mctx->state_log[match_last],
1024						match_last);
1025	}
1026      ret = merge_state_array (dfa, sifted_states, lim_states,
1027			       match_last + 1);
1028      re_free (lim_states);
1029      lim_states = NULL;
1030      if (BE (ret != REG_NOERROR, 0))
1031	goto free_return;
1032    }
1033  else
1034    {
1035      sift_ctx_init (&sctx, sifted_states, lim_states, halt_node, match_last);
1036      ret = sift_states_backward (mctx, &sctx);
1037      re_node_set_free (&sctx.limits);
1038      if (BE (ret != REG_NOERROR, 0))
1039	goto free_return;
1040      if (sifted_states[0] == NULL)
1041	{
1042	  ret = REG_NOMATCH;
1043	  goto free_return;
1044	}
1045    }
1046  re_free (mctx->state_log);
1047  mctx->state_log = sifted_states;
1048  sifted_states = NULL;
1049  mctx->last_node = halt_node;
1050  mctx->match_last = match_last;
1051  ret = REG_NOERROR;
1052 free_return:
1053  re_free (sifted_states);
1054  re_free (lim_states);
1055  return ret;
1056}
1057
1058/* Acquire an initial state and return it.
1059   We must select appropriate initial state depending on the context,
1060   since initial states may have constraints like "\<", "^", etc..  */
1061
1062static inline re_dfastate_t *
1063__attribute__ ((always_inline)) internal_function
1064acquire_init_state_context (reg_errcode_t *err, const re_match_context_t *mctx,
1065			    Idx idx)
1066{
1067  const re_dfa_t *const dfa = mctx->dfa;
1068  if (dfa->init_state->has_constraint)
1069    {
1070      unsigned int context;
1071      context = re_string_context_at (&mctx->input, idx - 1, mctx->eflags);
1072      if (IS_WORD_CONTEXT (context))
1073	return dfa->init_state_word;
1074      else if (IS_ORDINARY_CONTEXT (context))
1075	return dfa->init_state;
1076      else if (IS_BEGBUF_CONTEXT (context) && IS_NEWLINE_CONTEXT (context))
1077	return dfa->init_state_begbuf;
1078      else if (IS_NEWLINE_CONTEXT (context))
1079	return dfa->init_state_nl;
1080      else if (IS_BEGBUF_CONTEXT (context))
1081	{
1082	  /* It is relatively rare case, then calculate on demand.  */
1083	  return re_acquire_state_context (err, dfa,
1084					   dfa->init_state->entrance_nodes,
1085					   context);
1086	}
1087      else
1088	/* Must not happen?  */
1089	return dfa->init_state;
1090    }
1091  else
1092    return dfa->init_state;
1093}
1094
1095/* Check whether the regular expression match input string INPUT or not,
1096   and return the index where the matching end.  Return REG_MISSING if
1097   there is no match, and return REG_ERROR in case of an error.
1098   FL_LONGEST_MATCH means we want the POSIX longest matching.
1099   If P_MATCH_FIRST is not NULL, and the match fails, it is set to the
1100   next place where we may want to try matching.
1101   Note that the matcher assumes that the matching starts from the current
1102   index of the buffer.  */
1103
1104static Idx
1105internal_function __attribute_warn_unused_result__
1106check_matching (re_match_context_t *mctx, bool fl_longest_match,
1107		Idx *p_match_first)
1108{
1109  const re_dfa_t *const dfa = mctx->dfa;
1110  reg_errcode_t err;
1111  Idx match = 0;
1112  Idx match_last = REG_MISSING;
1113  Idx cur_str_idx = re_string_cur_idx (&mctx->input);
1114  re_dfastate_t *cur_state;
1115  bool at_init_state = p_match_first != NULL;
1116  Idx next_start_idx = cur_str_idx;
1117
1118  err = REG_NOERROR;
1119  cur_state = acquire_init_state_context (&err, mctx, cur_str_idx);
1120  /* An initial state must not be NULL (invalid).  */
1121  if (BE (cur_state == NULL, 0))
1122    {
1123      assert (err == REG_ESPACE);
1124      return REG_ERROR;
1125    }
1126
1127  if (mctx->state_log != NULL)
1128    {
1129      mctx->state_log[cur_str_idx] = cur_state;
1130
1131      /* Check OP_OPEN_SUBEXP in the initial state in case that we use them
1132	 later.  E.g. Processing back references.  */
1133      if (BE (dfa->nbackref, 0))
1134	{
1135	  at_init_state = false;
1136	  err = check_subexp_matching_top (mctx, &cur_state->nodes, 0);
1137	  if (BE (err != REG_NOERROR, 0))
1138	    return err;
1139
1140	  if (cur_state->has_backref)
1141	    {
1142	      err = transit_state_bkref (mctx, &cur_state->nodes);
1143	      if (BE (err != REG_NOERROR, 0))
1144		return err;
1145	    }
1146	}
1147    }
1148
1149  /* If the RE accepts NULL string.  */
1150  if (BE (cur_state->halt, 0))
1151    {
1152      if (!cur_state->has_constraint
1153	  || check_halt_state_context (mctx, cur_state, cur_str_idx))
1154	{
1155	  if (!fl_longest_match)
1156	    return cur_str_idx;
1157	  else
1158	    {
1159	      match_last = cur_str_idx;
1160	      match = 1;
1161	    }
1162	}
1163    }
1164
1165  while (!re_string_eoi (&mctx->input))
1166    {
1167      re_dfastate_t *old_state = cur_state;
1168      Idx next_char_idx = re_string_cur_idx (&mctx->input) + 1;
1169
1170      if ((BE (next_char_idx >= mctx->input.bufs_len, 0)
1171	   && mctx->input.bufs_len < mctx->input.len)
1172	  || (BE (next_char_idx >= mctx->input.valid_len, 0)
1173	      && mctx->input.valid_len < mctx->input.len))
1174	{
1175	  err = extend_buffers (mctx, next_char_idx + 1);
1176	  if (BE (err != REG_NOERROR, 0))
1177	    {
1178	      assert (err == REG_ESPACE);
1179	      return REG_ERROR;
1180	    }
1181	}
1182
1183      cur_state = transit_state (&err, mctx, cur_state);
1184      if (mctx->state_log != NULL)
1185	cur_state = merge_state_with_log (&err, mctx, cur_state);
1186
1187      if (cur_state == NULL)
1188	{
1189	  /* Reached the invalid state or an error.  Try to recover a valid
1190	     state using the state log, if available and if we have not
1191	     already found a valid (even if not the longest) match.  */
1192	  if (BE (err != REG_NOERROR, 0))
1193	    return REG_ERROR;
1194
1195	  if (mctx->state_log == NULL
1196	      || (match && !fl_longest_match)
1197	      || (cur_state = find_recover_state (&err, mctx)) == NULL)
1198	    break;
1199	}
1200
1201      if (BE (at_init_state, 0))
1202	{
1203	  if (old_state == cur_state)
1204	    next_start_idx = next_char_idx;
1205	  else
1206	    at_init_state = false;
1207	}
1208
1209      if (cur_state->halt)
1210	{
1211	  /* Reached a halt state.
1212	     Check the halt state can satisfy the current context.  */
1213	  if (!cur_state->has_constraint
1214	      || check_halt_state_context (mctx, cur_state,
1215					   re_string_cur_idx (&mctx->input)))
1216	    {
1217	      /* We found an appropriate halt state.  */
1218	      match_last = re_string_cur_idx (&mctx->input);
1219	      match = 1;
1220
1221	      /* We found a match, do not modify match_first below.  */
1222	      p_match_first = NULL;
1223	      if (!fl_longest_match)
1224		break;
1225	    }
1226	}
1227    }
1228
1229  if (p_match_first)
1230    *p_match_first += next_start_idx;
1231
1232  return match_last;
1233}
1234
1235/* Check NODE match the current context.  */
1236
1237static bool
1238internal_function
1239check_halt_node_context (const re_dfa_t *dfa, Idx node, unsigned int context)
1240{
1241  re_token_type_t type = dfa->nodes[node].type;
1242  unsigned int constraint = dfa->nodes[node].constraint;
1243  if (type != END_OF_RE)
1244    return false;
1245  if (!constraint)
1246    return true;
1247  if (NOT_SATISFY_NEXT_CONSTRAINT (constraint, context))
1248    return false;
1249  return true;
1250}
1251
1252/* Check the halt state STATE match the current context.
1253   Return 0 if not match, if the node, STATE has, is a halt node and
1254   match the context, return the node.  */
1255
1256static Idx
1257internal_function
1258check_halt_state_context (const re_match_context_t *mctx,
1259			  const re_dfastate_t *state, Idx idx)
1260{
1261  Idx i;
1262  unsigned int context;
1263#ifdef DEBUG
1264  assert (state->halt);
1265#endif
1266  context = re_string_context_at (&mctx->input, idx, mctx->eflags);
1267  for (i = 0; i < state->nodes.nelem; ++i)
1268    if (check_halt_node_context (mctx->dfa, state->nodes.elems[i], context))
1269      return state->nodes.elems[i];
1270  return 0;
1271}
1272
1273/* Compute the next node to which "NFA" transit from NODE("NFA" is a NFA
1274   corresponding to the DFA).
1275   Return the destination node, and update EPS_VIA_NODES;
1276   return REG_MISSING in case of errors.  */
1277
1278static Idx
1279internal_function
1280proceed_next_node (const re_match_context_t *mctx, Idx nregs, regmatch_t *regs,
1281		   Idx *pidx, Idx node, re_node_set *eps_via_nodes,
1282		   struct re_fail_stack_t *fs)
1283{
1284  const re_dfa_t *const dfa = mctx->dfa;
1285  Idx i;
1286  bool ok;
1287  if (IS_EPSILON_NODE (dfa->nodes[node].type))
1288    {
1289      re_node_set *cur_nodes = &mctx->state_log[*pidx]->nodes;
1290      re_node_set *edests = &dfa->edests[node];
1291      Idx dest_node;
1292      ok = re_node_set_insert (eps_via_nodes, node);
1293      if (BE (! ok, 0))
1294	return REG_ERROR;
1295      /* Pick up a valid destination, or return REG_MISSING if none
1296	 is found.  */
1297      for (dest_node = REG_MISSING, i = 0; i < edests->nelem; ++i)
1298	{
1299	  Idx candidate = edests->elems[i];
1300	  if (!re_node_set_contains (cur_nodes, candidate))
1301	    continue;
1302          if (dest_node == REG_MISSING)
1303	    dest_node = candidate;
1304
1305	  else
1306	    {
1307	      /* In order to avoid infinite loop like "(a*)*", return the second
1308		 epsilon-transition if the first was already considered.  */
1309	      if (re_node_set_contains (eps_via_nodes, dest_node))
1310		return candidate;
1311
1312	      /* Otherwise, push the second epsilon-transition on the fail stack.  */
1313	      else if (fs != NULL
1314		       && push_fail_stack (fs, *pidx, candidate, nregs, regs,
1315					   eps_via_nodes))
1316		return REG_ERROR;
1317
1318	      /* We know we are going to exit.  */
1319	      break;
1320	    }
1321	}
1322      return dest_node;
1323    }
1324  else
1325    {
1326      Idx naccepted = 0;
1327      re_token_type_t type = dfa->nodes[node].type;
1328
1329#ifdef RE_ENABLE_I18N
1330      if (dfa->nodes[node].accept_mb)
1331	naccepted = check_node_accept_bytes (dfa, node, &mctx->input, *pidx);
1332      else
1333#endif /* RE_ENABLE_I18N */
1334      if (type == OP_BACK_REF)
1335	{
1336	  Idx subexp_idx = dfa->nodes[node].opr.idx + 1;
1337	  naccepted = regs[subexp_idx].rm_eo - regs[subexp_idx].rm_so;
1338	  if (fs != NULL)
1339	    {
1340	      if (regs[subexp_idx].rm_so == -1 || regs[subexp_idx].rm_eo == -1)
1341		return REG_MISSING;
1342	      else if (naccepted)
1343		{
1344		  char *buf = (char *) re_string_get_buffer (&mctx->input);
1345		  if (memcmp (buf + regs[subexp_idx].rm_so, buf + *pidx,
1346			      naccepted) != 0)
1347		    return REG_MISSING;
1348		}
1349	    }
1350
1351	  if (naccepted == 0)
1352	    {
1353	      Idx dest_node;
1354	      ok = re_node_set_insert (eps_via_nodes, node);
1355	      if (BE (! ok, 0))
1356		return REG_ERROR;
1357	      dest_node = dfa->edests[node].elems[0];
1358	      if (re_node_set_contains (&mctx->state_log[*pidx]->nodes,
1359					dest_node))
1360		return dest_node;
1361	    }
1362	}
1363
1364      if (naccepted != 0
1365	  || check_node_accept (mctx, dfa->nodes + node, *pidx))
1366	{
1367	  Idx dest_node = dfa->nexts[node];
1368	  *pidx = (naccepted == 0) ? *pidx + 1 : *pidx + naccepted;
1369	  if (fs && (*pidx > mctx->match_last || mctx->state_log[*pidx] == NULL
1370		     || !re_node_set_contains (&mctx->state_log[*pidx]->nodes,
1371					       dest_node)))
1372	    return REG_MISSING;
1373	  re_node_set_empty (eps_via_nodes);
1374	  return dest_node;
1375	}
1376    }
1377  return REG_MISSING;
1378}
1379
1380static reg_errcode_t
1381internal_function __attribute_warn_unused_result__
1382push_fail_stack (struct re_fail_stack_t *fs, Idx str_idx, Idx dest_node,
1383		 Idx nregs, regmatch_t *regs, re_node_set *eps_via_nodes)
1384{
1385  reg_errcode_t err;
1386  Idx num = fs->num++;
1387  if (fs->num == fs->alloc)
1388    {
1389      struct re_fail_stack_ent_t *new_array;
1390      new_array = realloc (fs->stack, (sizeof (struct re_fail_stack_ent_t)
1391				       * fs->alloc * 2));
1392      if (new_array == NULL)
1393	return REG_ESPACE;
1394      fs->alloc *= 2;
1395      fs->stack = new_array;
1396    }
1397  fs->stack[num].idx = str_idx;
1398  fs->stack[num].node = dest_node;
1399  fs->stack[num].regs = re_malloc (regmatch_t, nregs);
1400  if (fs->stack[num].regs == NULL)
1401    return REG_ESPACE;
1402  memcpy (fs->stack[num].regs, regs, sizeof (regmatch_t) * nregs);
1403  err = re_node_set_init_copy (&fs->stack[num].eps_via_nodes, eps_via_nodes);
1404  return err;
1405}
1406
1407static Idx
1408internal_function
1409pop_fail_stack (struct re_fail_stack_t *fs, Idx *pidx, Idx nregs,
1410		regmatch_t *regs, re_node_set *eps_via_nodes)
1411{
1412  Idx num = --fs->num;
1413  assert (REG_VALID_INDEX (num));
1414  *pidx = fs->stack[num].idx;
1415  memcpy (regs, fs->stack[num].regs, sizeof (regmatch_t) * nregs);
1416  re_node_set_free (eps_via_nodes);
1417  re_free (fs->stack[num].regs);
1418  *eps_via_nodes = fs->stack[num].eps_via_nodes;
1419  return fs->stack[num].node;
1420}
1421
1422/* Set the positions where the subexpressions are starts/ends to registers
1423   PMATCH.
1424   Note: We assume that pmatch[0] is already set, and
1425   pmatch[i].rm_so == pmatch[i].rm_eo == -1 for 0 < i < nmatch.  */
1426
1427static reg_errcode_t
1428internal_function __attribute_warn_unused_result__
1429set_regs (const regex_t *preg, const re_match_context_t *mctx, size_t nmatch,
1430	  regmatch_t *pmatch, bool fl_backtrack)
1431{
1432  const re_dfa_t *dfa = preg->buffer;
1433  Idx idx, cur_node;
1434  re_node_set eps_via_nodes;
1435  struct re_fail_stack_t *fs;
1436  struct re_fail_stack_t fs_body = { 0, 2, NULL };
1437  regmatch_t *prev_idx_match;
1438  bool prev_idx_match_malloced = false;
1439
1440#ifdef DEBUG
1441  assert (nmatch > 1);
1442  assert (mctx->state_log != NULL);
1443#endif
1444  if (fl_backtrack)
1445    {
1446      fs = &fs_body;
1447      fs->stack = re_malloc (struct re_fail_stack_ent_t, fs->alloc);
1448      if (fs->stack == NULL)
1449	return REG_ESPACE;
1450    }
1451  else
1452    fs = NULL;
1453
1454  cur_node = dfa->init_node;
1455  re_node_set_init_empty (&eps_via_nodes);
1456
1457  if (__libc_use_alloca (nmatch * sizeof (regmatch_t)))
1458    prev_idx_match = (regmatch_t *) alloca (nmatch * sizeof (regmatch_t));
1459  else
1460    {
1461      prev_idx_match = re_malloc (regmatch_t, nmatch);
1462      if (prev_idx_match == NULL)
1463	{
1464	  free_fail_stack_return (fs);
1465	  return REG_ESPACE;
1466	}
1467      prev_idx_match_malloced = true;
1468    }
1469  memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch);
1470
1471  for (idx = pmatch[0].rm_so; idx <= pmatch[0].rm_eo ;)
1472    {
1473      update_regs (dfa, pmatch, prev_idx_match, cur_node, idx, nmatch);
1474
1475      if (idx == pmatch[0].rm_eo && cur_node == mctx->last_node)
1476	{
1477	  Idx reg_idx;
1478	  if (fs)
1479	    {
1480	      for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
1481		if (pmatch[reg_idx].rm_so > -1 && pmatch[reg_idx].rm_eo == -1)
1482		  break;
1483	      if (reg_idx == nmatch)
1484		{
1485		  re_node_set_free (&eps_via_nodes);
1486		  if (prev_idx_match_malloced)
1487		    re_free (prev_idx_match);
1488		  return free_fail_stack_return (fs);
1489		}
1490	      cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,
1491					 &eps_via_nodes);
1492	    }
1493	  else
1494	    {
1495	      re_node_set_free (&eps_via_nodes);
1496	      if (prev_idx_match_malloced)
1497		re_free (prev_idx_match);
1498	      return REG_NOERROR;
1499	    }
1500	}
1501
1502      /* Proceed to next node.  */
1503      cur_node = proceed_next_node (mctx, nmatch, pmatch, &idx, cur_node,
1504				    &eps_via_nodes, fs);
1505
1506      if (BE (! REG_VALID_INDEX (cur_node), 0))
1507	{
1508	  if (BE (cur_node == REG_ERROR, 0))
1509	    {
1510	      re_node_set_free (&eps_via_nodes);
1511	      if (prev_idx_match_malloced)
1512		re_free (prev_idx_match);
1513	      free_fail_stack_return (fs);
1514	      return REG_ESPACE;
1515	    }
1516	  if (fs)
1517	    cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,
1518				       &eps_via_nodes);
1519	  else
1520	    {
1521	      re_node_set_free (&eps_via_nodes);
1522	      if (prev_idx_match_malloced)
1523		re_free (prev_idx_match);
1524	      return REG_NOMATCH;
1525	    }
1526	}
1527    }
1528  re_node_set_free (&eps_via_nodes);
1529  if (prev_idx_match_malloced)
1530    re_free (prev_idx_match);
1531  return free_fail_stack_return (fs);
1532}
1533
1534static reg_errcode_t
1535internal_function
1536free_fail_stack_return (struct re_fail_stack_t *fs)
1537{
1538  if (fs)
1539    {
1540      Idx fs_idx;
1541      for (fs_idx = 0; fs_idx < fs->num; ++fs_idx)
1542	{
1543	  re_node_set_free (&fs->stack[fs_idx].eps_via_nodes);
1544	  re_free (fs->stack[fs_idx].regs);
1545	}
1546      re_free (fs->stack);
1547    }
1548  return REG_NOERROR;
1549}
1550
1551static void
1552internal_function
1553update_regs (const re_dfa_t *dfa, regmatch_t *pmatch,
1554	     regmatch_t *prev_idx_match, Idx cur_node, Idx cur_idx, Idx nmatch)
1555{
1556  int type = dfa->nodes[cur_node].type;
1557  if (type == OP_OPEN_SUBEXP)
1558    {
1559      Idx reg_num = dfa->nodes[cur_node].opr.idx + 1;
1560
1561      /* We are at the first node of this sub expression.  */
1562      if (reg_num < nmatch)
1563	{
1564	  pmatch[reg_num].rm_so = cur_idx;
1565	  pmatch[reg_num].rm_eo = -1;
1566	}
1567    }
1568  else if (type == OP_CLOSE_SUBEXP)
1569    {
1570      Idx reg_num = dfa->nodes[cur_node].opr.idx + 1;
1571      if (reg_num < nmatch)
1572	{
1573	  /* We are at the last node of this sub expression.  */
1574	  if (pmatch[reg_num].rm_so < cur_idx)
1575	    {
1576	      pmatch[reg_num].rm_eo = cur_idx;
1577	      /* This is a non-empty match or we are not inside an optional
1578		 subexpression.  Accept this right away.  */
1579	      memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch);
1580	    }
1581	  else
1582	    {
1583	      if (dfa->nodes[cur_node].opt_subexp
1584		  && prev_idx_match[reg_num].rm_so != -1)
1585		/* We transited through an empty match for an optional
1586		   subexpression, like (a?)*, and this is not the subexp's
1587		   first match.  Copy back the old content of the registers
1588		   so that matches of an inner subexpression are undone as
1589		   well, like in ((a?))*.  */
1590		memcpy (pmatch, prev_idx_match, sizeof (regmatch_t) * nmatch);
1591	      else
1592		/* We completed a subexpression, but it may be part of
1593		   an optional one, so do not update PREV_IDX_MATCH.  */
1594		pmatch[reg_num].rm_eo = cur_idx;
1595	    }
1596	}
1597    }
1598}
1599
1600/* This function checks the STATE_LOG from the SCTX->last_str_idx to 0
1601   and sift the nodes in each states according to the following rules.
1602   Updated state_log will be wrote to STATE_LOG.
1603
1604   Rules: We throw away the Node 'a' in the STATE_LOG[STR_IDX] if...
1605     1. When STR_IDX == MATCH_LAST(the last index in the state_log):
1606	If 'a' isn't the LAST_NODE and 'a' can't epsilon transit to
1607	the LAST_NODE, we throw away the node 'a'.
1608     2. When 0 <= STR_IDX < MATCH_LAST and 'a' accepts
1609	string 's' and transit to 'b':
1610	i. If 'b' isn't in the STATE_LOG[STR_IDX+strlen('s')], we throw
1611	   away the node 'a'.
1612	ii. If 'b' is in the STATE_LOG[STR_IDX+strlen('s')] but 'b' is
1613	    thrown away, we throw away the node 'a'.
1614     3. When 0 <= STR_IDX < MATCH_LAST and 'a' epsilon transit to 'b':
1615	i. If 'b' isn't in the STATE_LOG[STR_IDX], we throw away the
1616	   node 'a'.
1617	ii. If 'b' is in the STATE_LOG[STR_IDX] but 'b' is thrown away,
1618	    we throw away the node 'a'.  */
1619
1620#define STATE_NODE_CONTAINS(state,node) \
1621  ((state) != NULL && re_node_set_contains (&(state)->nodes, node))
1622
1623static reg_errcode_t
1624internal_function
1625sift_states_backward (const re_match_context_t *mctx, re_sift_context_t *sctx)
1626{
1627  reg_errcode_t err;
1628  int null_cnt = 0;
1629  Idx str_idx = sctx->last_str_idx;
1630  re_node_set cur_dest;
1631
1632#ifdef DEBUG
1633  assert (mctx->state_log != NULL && mctx->state_log[str_idx] != NULL);
1634#endif
1635
1636  /* Build sifted state_log[str_idx].  It has the nodes which can epsilon
1637     transit to the last_node and the last_node itself.  */
1638  err = re_node_set_init_1 (&cur_dest, sctx->last_node);
1639  if (BE (err != REG_NOERROR, 0))
1640    return err;
1641  err = update_cur_sifted_state (mctx, sctx, str_idx, &cur_dest);
1642  if (BE (err != REG_NOERROR, 0))
1643    goto free_return;
1644
1645  /* Then check each states in the state_log.  */
1646  while (str_idx > 0)
1647    {
1648      /* Update counters.  */
1649      null_cnt = (sctx->sifted_states[str_idx] == NULL) ? null_cnt + 1 : 0;
1650      if (null_cnt > mctx->max_mb_elem_len)
1651	{
1652	  memset (sctx->sifted_states, '\0',
1653		  sizeof (re_dfastate_t *) * str_idx);
1654	  re_node_set_free (&cur_dest);
1655	  return REG_NOERROR;
1656	}
1657      re_node_set_empty (&cur_dest);
1658      --str_idx;
1659
1660      if (mctx->state_log[str_idx])
1661	{
1662	  err = build_sifted_states (mctx, sctx, str_idx, &cur_dest);
1663	  if (BE (err != REG_NOERROR, 0))
1664	    goto free_return;
1665	}
1666
1667      /* Add all the nodes which satisfy the following conditions:
1668	 - It can epsilon transit to a node in CUR_DEST.
1669	 - It is in CUR_SRC.
1670	 And update state_log.  */
1671      err = update_cur_sifted_state (mctx, sctx, str_idx, &cur_dest);
1672      if (BE (err != REG_NOERROR, 0))
1673	goto free_return;
1674    }
1675  err = REG_NOERROR;
1676 free_return:
1677  re_node_set_free (&cur_dest);
1678  return err;
1679}
1680
1681static reg_errcode_t
1682internal_function __attribute_warn_unused_result__
1683build_sifted_states (const re_match_context_t *mctx, re_sift_context_t *sctx,
1684		     Idx str_idx, re_node_set *cur_dest)
1685{
1686  const re_dfa_t *const dfa = mctx->dfa;
1687  const re_node_set *cur_src = &mctx->state_log[str_idx]->non_eps_nodes;
1688  Idx i;
1689
1690  /* Then build the next sifted state.
1691     We build the next sifted state on 'cur_dest', and update
1692     'sifted_states[str_idx]' with 'cur_dest'.
1693     Note:
1694     'cur_dest' is the sifted state from 'state_log[str_idx + 1]'.
1695     'cur_src' points the node_set of the old 'state_log[str_idx]'
1696     (with the epsilon nodes pre-filtered out).  */
1697  for (i = 0; i < cur_src->nelem; i++)
1698    {
1699      Idx prev_node = cur_src->elems[i];
1700      int naccepted = 0;
1701      bool ok;
1702
1703#ifdef DEBUG
1704      re_token_type_t type = dfa->nodes[prev_node].type;
1705      assert (!IS_EPSILON_NODE (type));
1706#endif
1707#ifdef RE_ENABLE_I18N
1708      /* If the node may accept "multi byte".  */
1709      if (dfa->nodes[prev_node].accept_mb)
1710	naccepted = sift_states_iter_mb (mctx, sctx, prev_node,
1711					 str_idx, sctx->last_str_idx);
1712#endif /* RE_ENABLE_I18N */
1713
1714      /* We don't check backreferences here.
1715	 See update_cur_sifted_state().  */
1716      if (!naccepted
1717	  && check_node_accept (mctx, dfa->nodes + prev_node, str_idx)
1718	  && STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + 1],
1719				  dfa->nexts[prev_node]))
1720	naccepted = 1;
1721
1722      if (naccepted == 0)
1723	continue;
1724
1725      if (sctx->limits.nelem)
1726	{
1727	  Idx to_idx = str_idx + naccepted;
1728	  if (check_dst_limits (mctx, &sctx->limits,
1729				dfa->nexts[prev_node], to_idx,
1730				prev_node, str_idx))
1731	    continue;
1732	}
1733      ok = re_node_set_insert (cur_dest, prev_node);
1734      if (BE (! ok, 0))
1735	return REG_ESPACE;
1736    }
1737
1738  return REG_NOERROR;
1739}
1740
1741/* Helper functions.  */
1742
1743static reg_errcode_t
1744internal_function
1745clean_state_log_if_needed (re_match_context_t *mctx, Idx next_state_log_idx)
1746{
1747  Idx top = mctx->state_log_top;
1748
1749  if ((next_state_log_idx >= mctx->input.bufs_len
1750       && mctx->input.bufs_len < mctx->input.len)
1751      || (next_state_log_idx >= mctx->input.valid_len
1752	  && mctx->input.valid_len < mctx->input.len))
1753    {
1754      reg_errcode_t err;
1755      err = extend_buffers (mctx, next_state_log_idx + 1);
1756      if (BE (err != REG_NOERROR, 0))
1757	return err;
1758    }
1759
1760  if (top < next_state_log_idx)
1761    {
1762      memset (mctx->state_log + top + 1, '\0',
1763	      sizeof (re_dfastate_t *) * (next_state_log_idx - top));
1764      mctx->state_log_top = next_state_log_idx;
1765    }
1766  return REG_NOERROR;
1767}
1768
1769static reg_errcode_t
1770internal_function
1771merge_state_array (const re_dfa_t *dfa, re_dfastate_t **dst,
1772		   re_dfastate_t **src, Idx num)
1773{
1774  Idx st_idx;
1775  reg_errcode_t err;
1776  for (st_idx = 0; st_idx < num; ++st_idx)
1777    {
1778      if (dst[st_idx] == NULL)
1779	dst[st_idx] = src[st_idx];
1780      else if (src[st_idx] != NULL)
1781	{
1782	  re_node_set merged_set;
1783	  err = re_node_set_init_union (&merged_set, &dst[st_idx]->nodes,
1784					&src[st_idx]->nodes);
1785	  if (BE (err != REG_NOERROR, 0))
1786	    return err;
1787	  dst[st_idx] = re_acquire_state (&err, dfa, &merged_set);
1788	  re_node_set_free (&merged_set);
1789	  if (BE (err != REG_NOERROR, 0))
1790	    return err;
1791	}
1792    }
1793  return REG_NOERROR;
1794}
1795
1796static reg_errcode_t
1797internal_function
1798update_cur_sifted_state (const re_match_context_t *mctx,
1799			 re_sift_context_t *sctx, Idx str_idx,
1800			 re_node_set *dest_nodes)
1801{
1802  const re_dfa_t *const dfa = mctx->dfa;
1803  reg_errcode_t err = REG_NOERROR;
1804  const re_node_set *candidates;
1805  candidates = ((mctx->state_log[str_idx] == NULL) ? NULL
1806		: &mctx->state_log[str_idx]->nodes);
1807
1808  if (dest_nodes->nelem == 0)
1809    sctx->sifted_states[str_idx] = NULL;
1810  else
1811    {
1812      if (candidates)
1813	{
1814	  /* At first, add the nodes which can epsilon transit to a node in
1815	     DEST_NODE.  */
1816	  err = add_epsilon_src_nodes (dfa, dest_nodes, candidates);
1817	  if (BE (err != REG_NOERROR, 0))
1818	    return err;
1819
1820	  /* Then, check the limitations in the current sift_context.  */
1821	  if (sctx->limits.nelem)
1822	    {
1823	      err = check_subexp_limits (dfa, dest_nodes, candidates, &sctx->limits,
1824					 mctx->bkref_ents, str_idx);
1825	      if (BE (err != REG_NOERROR, 0))
1826		return err;
1827	    }
1828	}
1829
1830      sctx->sifted_states[str_idx] = re_acquire_state (&err, dfa, dest_nodes);
1831      if (BE (err != REG_NOERROR, 0))
1832	return err;
1833    }
1834
1835  if (candidates && mctx->state_log[str_idx]->has_backref)
1836    {
1837      err = sift_states_bkref (mctx, sctx, str_idx, candidates);
1838      if (BE (err != REG_NOERROR, 0))
1839	return err;
1840    }
1841  return REG_NOERROR;
1842}
1843
1844static reg_errcode_t
1845internal_function __attribute_warn_unused_result__
1846add_epsilon_src_nodes (const re_dfa_t *dfa, re_node_set *dest_nodes,
1847		       const re_node_set *candidates)
1848{
1849  reg_errcode_t err = REG_NOERROR;
1850  Idx i;
1851
1852  re_dfastate_t *state = re_acquire_state (&err, dfa, dest_nodes);
1853  if (BE (err != REG_NOERROR, 0))
1854    return err;
1855
1856  if (!state->inveclosure.alloc)
1857    {
1858      err = re_node_set_alloc (&state->inveclosure, dest_nodes->nelem);
1859      if (BE (err != REG_NOERROR, 0))
1860	return REG_ESPACE;
1861      for (i = 0; i < dest_nodes->nelem; i++)
1862	{
1863	  err = re_node_set_merge (&state->inveclosure,
1864				   dfa->inveclosures + dest_nodes->elems[i]);
1865	  if (BE (err != REG_NOERROR, 0))
1866	    return REG_ESPACE;
1867	}
1868    }
1869  return re_node_set_add_intersect (dest_nodes, candidates,
1870				    &state->inveclosure);
1871}
1872
1873static reg_errcode_t
1874internal_function
1875sub_epsilon_src_nodes (const re_dfa_t *dfa, Idx node, re_node_set *dest_nodes,
1876		       const re_node_set *candidates)
1877{
1878    Idx ecl_idx;
1879    reg_errcode_t err;
1880    re_node_set *inv_eclosure = dfa->inveclosures + node;
1881    re_node_set except_nodes;
1882    re_node_set_init_empty (&except_nodes);
1883    for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx)
1884      {
1885	Idx cur_node = inv_eclosure->elems[ecl_idx];
1886	if (cur_node == node)
1887	  continue;
1888	if (IS_EPSILON_NODE (dfa->nodes[cur_node].type))
1889	  {
1890	    Idx edst1 = dfa->edests[cur_node].elems[0];
1891	    Idx edst2 = ((dfa->edests[cur_node].nelem > 1)
1892			 ? dfa->edests[cur_node].elems[1] : REG_MISSING);
1893	    if ((!re_node_set_contains (inv_eclosure, edst1)
1894		 && re_node_set_contains (dest_nodes, edst1))
1895		|| (REG_VALID_NONZERO_INDEX (edst2)
1896		    && !re_node_set_contains (inv_eclosure, edst2)
1897		    && re_node_set_contains (dest_nodes, edst2)))
1898	      {
1899		err = re_node_set_add_intersect (&except_nodes, candidates,
1900						 dfa->inveclosures + cur_node);
1901		if (BE (err != REG_NOERROR, 0))
1902		  {
1903		    re_node_set_free (&except_nodes);
1904		    return err;
1905		  }
1906	      }
1907	  }
1908      }
1909    for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx)
1910      {
1911	Idx cur_node = inv_eclosure->elems[ecl_idx];
1912	if (!re_node_set_contains (&except_nodes, cur_node))
1913	  {
1914	    Idx idx = re_node_set_contains (dest_nodes, cur_node) - 1;
1915	    re_node_set_remove_at (dest_nodes, idx);
1916	  }
1917      }
1918    re_node_set_free (&except_nodes);
1919    return REG_NOERROR;
1920}
1921
1922static bool
1923internal_function
1924check_dst_limits (const re_match_context_t *mctx, const re_node_set *limits,
1925		  Idx dst_node, Idx dst_idx, Idx src_node, Idx src_idx)
1926{
1927  const re_dfa_t *const dfa = mctx->dfa;
1928  Idx lim_idx, src_pos, dst_pos;
1929
1930  Idx dst_bkref_idx = search_cur_bkref_entry (mctx, dst_idx);
1931  Idx src_bkref_idx = search_cur_bkref_entry (mctx, src_idx);
1932  for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx)
1933    {
1934      Idx subexp_idx;
1935      struct re_backref_cache_entry *ent;
1936      ent = mctx->bkref_ents + limits->elems[lim_idx];
1937      subexp_idx = dfa->nodes[ent->node].opr.idx;
1938
1939      dst_pos = check_dst_limits_calc_pos (mctx, limits->elems[lim_idx],
1940					   subexp_idx, dst_node, dst_idx,
1941					   dst_bkref_idx);
1942      src_pos = check_dst_limits_calc_pos (mctx, limits->elems[lim_idx],
1943					   subexp_idx, src_node, src_idx,
1944					   src_bkref_idx);
1945
1946      /* In case of:
1947	 <src> <dst> ( <subexp> )
1948	 ( <subexp> ) <src> <dst>
1949	 ( <subexp1> <src> <subexp2> <dst> <subexp3> )  */
1950      if (src_pos == dst_pos)
1951	continue; /* This is unrelated limitation.  */
1952      else
1953	return true;
1954    }
1955  return false;
1956}
1957
1958static int
1959internal_function
1960check_dst_limits_calc_pos_1 (const re_match_context_t *mctx, int boundaries,
1961			     Idx subexp_idx, Idx from_node, Idx bkref_idx)
1962{
1963  const re_dfa_t *const dfa = mctx->dfa;
1964  const re_node_set *eclosures = dfa->eclosures + from_node;
1965  Idx node_idx;
1966
1967  /* Else, we are on the boundary: examine the nodes on the epsilon
1968     closure.  */
1969  for (node_idx = 0; node_idx < eclosures->nelem; ++node_idx)
1970    {
1971      Idx node = eclosures->elems[node_idx];
1972      switch (dfa->nodes[node].type)
1973	{
1974	case OP_BACK_REF:
1975	  if (bkref_idx != REG_MISSING)
1976	    {
1977	      struct re_backref_cache_entry *ent = mctx->bkref_ents + bkref_idx;
1978	      do
1979		{
1980		  Idx dst;
1981		  int cpos;
1982
1983		  if (ent->node != node)
1984		    continue;
1985
1986		  if (subexp_idx < BITSET_WORD_BITS
1987		      && !(ent->eps_reachable_subexps_map
1988			   & ((bitset_word_t) 1 << subexp_idx)))
1989		    continue;
1990
1991		  /* Recurse trying to reach the OP_OPEN_SUBEXP and
1992		     OP_CLOSE_SUBEXP cases below.  But, if the
1993		     destination node is the same node as the source
1994		     node, don't recurse because it would cause an
1995		     infinite loop: a regex that exhibits this behavior
1996		     is ()\1*\1*  */
1997		  dst = dfa->edests[node].elems[0];
1998		  if (dst == from_node)
1999		    {
2000		      if (boundaries & 1)
2001			return -1;
2002		      else /* if (boundaries & 2) */
2003			return 0;
2004		    }
2005
2006		  cpos =
2007		    check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx,
2008						 dst, bkref_idx);
2009		  if (cpos == -1 /* && (boundaries & 1) */)
2010		    return -1;
2011		  if (cpos == 0 && (boundaries & 2))
2012		    return 0;
2013
2014		  if (subexp_idx < BITSET_WORD_BITS)
2015		    ent->eps_reachable_subexps_map
2016		      &= ~((bitset_word_t) 1 << subexp_idx);
2017		}
2018	      while (ent++->more);
2019	    }
2020	  break;
2021
2022	case OP_OPEN_SUBEXP:
2023	  if ((boundaries & 1) && subexp_idx == dfa->nodes[node].opr.idx)
2024	    return -1;
2025	  break;
2026
2027	case OP_CLOSE_SUBEXP:
2028	  if ((boundaries & 2) && subexp_idx == dfa->nodes[node].opr.idx)
2029	    return 0;
2030	  break;
2031
2032	default:
2033	    break;
2034	}
2035    }
2036
2037  return (boundaries & 2) ? 1 : 0;
2038}
2039
2040static int
2041internal_function
2042check_dst_limits_calc_pos (const re_match_context_t *mctx, Idx limit,
2043			   Idx subexp_idx, Idx from_node, Idx str_idx,
2044			   Idx bkref_idx)
2045{
2046  struct re_backref_cache_entry *lim = mctx->bkref_ents + limit;
2047  int boundaries;
2048
2049  /* If we are outside the range of the subexpression, return -1 or 1.  */
2050  if (str_idx < lim->subexp_from)
2051    return -1;
2052
2053  if (lim->subexp_to < str_idx)
2054    return 1;
2055
2056  /* If we are within the subexpression, return 0.  */
2057  boundaries = (str_idx == lim->subexp_from);
2058  boundaries |= (str_idx == lim->subexp_to) << 1;
2059  if (boundaries == 0)
2060    return 0;
2061
2062  /* Else, examine epsilon closure.  */
2063  return check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx,
2064				      from_node, bkref_idx);
2065}
2066
2067/* Check the limitations of sub expressions LIMITS, and remove the nodes
2068   which are against limitations from DEST_NODES. */
2069
2070static reg_errcode_t
2071internal_function
2072check_subexp_limits (const re_dfa_t *dfa, re_node_set *dest_nodes,
2073		     const re_node_set *candidates, re_node_set *limits,
2074		     struct re_backref_cache_entry *bkref_ents, Idx str_idx)
2075{
2076  reg_errcode_t err;
2077  Idx node_idx, lim_idx;
2078
2079  for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx)
2080    {
2081      Idx subexp_idx;
2082      struct re_backref_cache_entry *ent;
2083      ent = bkref_ents + limits->elems[lim_idx];
2084
2085      if (str_idx <= ent->subexp_from || ent->str_idx < str_idx)
2086	continue; /* This is unrelated limitation.  */
2087
2088      subexp_idx = dfa->nodes[ent->node].opr.idx;
2089      if (ent->subexp_to == str_idx)
2090	{
2091	  Idx ops_node = REG_MISSING;
2092	  Idx cls_node = REG_MISSING;
2093	  for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
2094	    {
2095	      Idx node = dest_nodes->elems[node_idx];
2096	      re_token_type_t type = dfa->nodes[node].type;
2097	      if (type == OP_OPEN_SUBEXP
2098		  && subexp_idx == dfa->nodes[node].opr.idx)
2099		ops_node = node;
2100	      else if (type == OP_CLOSE_SUBEXP
2101		       && subexp_idx == dfa->nodes[node].opr.idx)
2102		cls_node = node;
2103	    }
2104
2105	  /* Check the limitation of the open subexpression.  */
2106	  /* Note that (ent->subexp_to = str_idx != ent->subexp_from).  */
2107	  if (REG_VALID_INDEX (ops_node))
2108	    {
2109	      err = sub_epsilon_src_nodes (dfa, ops_node, dest_nodes,
2110					   candidates);
2111	      if (BE (err != REG_NOERROR, 0))
2112		return err;
2113	    }
2114
2115	  /* Check the limitation of the close subexpression.  */
2116	  if (REG_VALID_INDEX (cls_node))
2117	    for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
2118	      {
2119		Idx node = dest_nodes->elems[node_idx];
2120		if (!re_node_set_contains (dfa->inveclosures + node,
2121					   cls_node)
2122		    && !re_node_set_contains (dfa->eclosures + node,
2123					      cls_node))
2124		  {
2125		    /* It is against this limitation.
2126		       Remove it form the current sifted state.  */
2127		    err = sub_epsilon_src_nodes (dfa, node, dest_nodes,
2128						 candidates);
2129		    if (BE (err != REG_NOERROR, 0))
2130		      return err;
2131		    --node_idx;
2132		  }
2133	      }
2134	}
2135      else /* (ent->subexp_to != str_idx)  */
2136	{
2137	  for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
2138	    {
2139	      Idx node = dest_nodes->elems[node_idx];
2140	      re_token_type_t type = dfa->nodes[node].type;
2141	      if (type == OP_CLOSE_SUBEXP || type == OP_OPEN_SUBEXP)
2142		{
2143		  if (subexp_idx != dfa->nodes[node].opr.idx)
2144		    continue;
2145		  /* It is against this limitation.
2146		     Remove it form the current sifted state.  */
2147		  err = sub_epsilon_src_nodes (dfa, node, dest_nodes,
2148					       candidates);
2149		  if (BE (err != REG_NOERROR, 0))
2150		    return err;
2151		}
2152	    }
2153	}
2154    }
2155  return REG_NOERROR;
2156}
2157
2158static reg_errcode_t
2159internal_function __attribute_warn_unused_result__
2160sift_states_bkref (const re_match_context_t *mctx, re_sift_context_t *sctx,
2161		   Idx str_idx, const re_node_set *candidates)
2162{
2163  const re_dfa_t *const dfa = mctx->dfa;
2164  reg_errcode_t err;
2165  Idx node_idx, node;
2166  re_sift_context_t local_sctx;
2167  Idx first_idx = search_cur_bkref_entry (mctx, str_idx);
2168
2169  if (first_idx == REG_MISSING)
2170    return REG_NOERROR;
2171
2172  local_sctx.sifted_states = NULL; /* Mark that it hasn't been initialized.  */
2173
2174  for (node_idx = 0; node_idx < candidates->nelem; ++node_idx)
2175    {
2176      Idx enabled_idx;
2177      re_token_type_t type;
2178      struct re_backref_cache_entry *entry;
2179      node = candidates->elems[node_idx];
2180      type = dfa->nodes[node].type;
2181      /* Avoid infinite loop for the REs like "()\1+".  */
2182      if (node == sctx->last_node && str_idx == sctx->last_str_idx)
2183	continue;
2184      if (type != OP_BACK_REF)
2185	continue;
2186
2187      entry = mctx->bkref_ents + first_idx;
2188      enabled_idx = first_idx;
2189      do
2190	{
2191	  Idx subexp_len;
2192	  Idx to_idx;
2193	  Idx dst_node;
2194	  bool ok;
2195	  re_dfastate_t *cur_state;
2196
2197	  if (entry->node != node)
2198	    continue;
2199	  subexp_len = entry->subexp_to - entry->subexp_from;
2200	  to_idx = str_idx + subexp_len;
2201	  dst_node = (subexp_len ? dfa->nexts[node]
2202		      : dfa->edests[node].elems[0]);
2203
2204	  if (to_idx > sctx->last_str_idx
2205	      || sctx->sifted_states[to_idx] == NULL
2206	      || !STATE_NODE_CONTAINS (sctx->sifted_states[to_idx], dst_node)
2207	      || check_dst_limits (mctx, &sctx->limits, node,
2208				   str_idx, dst_node, to_idx))
2209	    continue;
2210
2211	  if (local_sctx.sifted_states == NULL)
2212	    {
2213	      local_sctx = *sctx;
2214	      err = re_node_set_init_copy (&local_sctx.limits, &sctx->limits);
2215	      if (BE (err != REG_NOERROR, 0))
2216		goto free_return;
2217	    }
2218	  local_sctx.last_node = node;
2219	  local_sctx.last_str_idx = str_idx;
2220	  ok = re_node_set_insert (&local_sctx.limits, enabled_idx);
2221	  if (BE (! ok, 0))
2222	    {
2223	      err = REG_ESPACE;
2224	      goto free_return;
2225	    }
2226	  cur_state = local_sctx.sifted_states[str_idx];
2227	  err = sift_states_backward (mctx, &local_sctx);
2228	  if (BE (err != REG_NOERROR, 0))
2229	    goto free_return;
2230	  if (sctx->limited_states != NULL)
2231	    {
2232	      err = merge_state_array (dfa, sctx->limited_states,
2233				       local_sctx.sifted_states,
2234				       str_idx + 1);
2235	      if (BE (err != REG_NOERROR, 0))
2236		goto free_return;
2237	    }
2238	  local_sctx.sifted_states[str_idx] = cur_state;
2239	  re_node_set_remove (&local_sctx.limits, enabled_idx);
2240
2241	  /* mctx->bkref_ents may have changed, reload the pointer.  */
2242	  entry = mctx->bkref_ents + enabled_idx;
2243	}
2244      while (enabled_idx++, entry++->more);
2245    }
2246  err = REG_NOERROR;
2247 free_return:
2248  if (local_sctx.sifted_states != NULL)
2249    {
2250      re_node_set_free (&local_sctx.limits);
2251    }
2252
2253  return err;
2254}
2255
2256
2257#ifdef RE_ENABLE_I18N
2258static int
2259internal_function
2260sift_states_iter_mb (const re_match_context_t *mctx, re_sift_context_t *sctx,
2261		     Idx node_idx, Idx str_idx, Idx max_str_idx)
2262{
2263  const re_dfa_t *const dfa = mctx->dfa;
2264  int naccepted;
2265  /* Check the node can accept "multi byte".  */
2266  naccepted = check_node_accept_bytes (dfa, node_idx, &mctx->input, str_idx);
2267  if (naccepted > 0 && str_idx + naccepted <= max_str_idx &&
2268      !STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + naccepted],
2269			    dfa->nexts[node_idx]))
2270    /* The node can't accept the "multi byte", or the
2271       destination was already thrown away, then the node
2272       could't accept the current input "multi byte".   */
2273    naccepted = 0;
2274  /* Otherwise, it is sure that the node could accept
2275     'naccepted' bytes input.  */
2276  return naccepted;
2277}
2278#endif /* RE_ENABLE_I18N */
2279
2280
2281/* Functions for state transition.  */
2282
2283/* Return the next state to which the current state STATE will transit by
2284   accepting the current input byte, and update STATE_LOG if necessary.
2285   If STATE can accept a multibyte char/collating element/back reference
2286   update the destination of STATE_LOG.  */
2287
2288static re_dfastate_t *
2289internal_function __attribute_warn_unused_result__
2290transit_state (reg_errcode_t *err, re_match_context_t *mctx,
2291	       re_dfastate_t *state)
2292{
2293  re_dfastate_t **trtable;
2294  unsigned char ch;
2295
2296#ifdef RE_ENABLE_I18N
2297  /* If the current state can accept multibyte.  */
2298  if (BE (state->accept_mb, 0))
2299    {
2300      *err = transit_state_mb (mctx, state);
2301      if (BE (*err != REG_NOERROR, 0))
2302	return NULL;
2303    }
2304#endif /* RE_ENABLE_I18N */
2305
2306  /* Then decide the next state with the single byte.  */
2307#if 0
2308  if (0)
2309    /* don't use transition table  */
2310    return transit_state_sb (err, mctx, state);
2311#endif
2312
2313  /* Use transition table  */
2314  ch = re_string_fetch_byte (&mctx->input);
2315  for (;;)
2316    {
2317      trtable = state->trtable;
2318      if (BE (trtable != NULL, 1))
2319	return trtable[ch];
2320
2321      trtable = state->word_trtable;
2322      if (BE (trtable != NULL, 1))
2323	{
2324	  unsigned int context;
2325	  context
2326	    = re_string_context_at (&mctx->input,
2327				    re_string_cur_idx (&mctx->input) - 1,
2328				    mctx->eflags);
2329	  if (IS_WORD_CONTEXT (context))
2330	    return trtable[ch + SBC_MAX];
2331	  else
2332	    return trtable[ch];
2333	}
2334
2335      if (!build_trtable (mctx->dfa, state))
2336	{
2337	  *err = REG_ESPACE;
2338	  return NULL;
2339	}
2340
2341      /* Retry, we now have a transition table.  */
2342    }
2343}
2344
2345/* Update the state_log if we need */
2346static re_dfastate_t *
2347internal_function
2348merge_state_with_log (reg_errcode_t *err, re_match_context_t *mctx,
2349		      re_dfastate_t *next_state)
2350{
2351  const re_dfa_t *const dfa = mctx->dfa;
2352  Idx cur_idx = re_string_cur_idx (&mctx->input);
2353
2354  if (cur_idx > mctx->state_log_top)
2355    {
2356      mctx->state_log[cur_idx] = next_state;
2357      mctx->state_log_top = cur_idx;
2358    }
2359  else if (mctx->state_log[cur_idx] == 0)
2360    {
2361      mctx->state_log[cur_idx] = next_state;
2362    }
2363  else
2364    {
2365      re_dfastate_t *pstate;
2366      unsigned int context;
2367      re_node_set next_nodes, *log_nodes, *table_nodes = NULL;
2368      /* If (state_log[cur_idx] != 0), it implies that cur_idx is
2369	 the destination of a multibyte char/collating element/
2370	 back reference.  Then the next state is the union set of
2371	 these destinations and the results of the transition table.  */
2372      pstate = mctx->state_log[cur_idx];
2373      log_nodes = pstate->entrance_nodes;
2374      if (next_state != NULL)
2375	{
2376	  table_nodes = next_state->entrance_nodes;
2377	  *err = re_node_set_init_union (&next_nodes, table_nodes,
2378					     log_nodes);
2379	  if (BE (*err != REG_NOERROR, 0))
2380	    return NULL;
2381	}
2382      else
2383	next_nodes = *log_nodes;
2384      /* Note: We already add the nodes of the initial state,
2385	 then we don't need to add them here.  */
2386
2387      context = re_string_context_at (&mctx->input,
2388				      re_string_cur_idx (&mctx->input) - 1,
2389				      mctx->eflags);
2390      next_state = mctx->state_log[cur_idx]
2391	= re_acquire_state_context (err, dfa, &next_nodes, context);
2392      /* We don't need to check errors here, since the return value of
2393	 this function is next_state and ERR is already set.  */
2394
2395      if (table_nodes != NULL)
2396	re_node_set_free (&next_nodes);
2397    }
2398
2399  if (BE (dfa->nbackref, 0) && next_state != NULL)
2400    {
2401      /* Check OP_OPEN_SUBEXP in the current state in case that we use them
2402	 later.  We must check them here, since the back references in the
2403	 next state might use them.  */
2404      *err = check_subexp_matching_top (mctx, &next_state->nodes,
2405					cur_idx);
2406      if (BE (*err != REG_NOERROR, 0))
2407	return NULL;
2408
2409      /* If the next state has back references.  */
2410      if (next_state->has_backref)
2411	{
2412	  *err = transit_state_bkref (mctx, &next_state->nodes);
2413	  if (BE (*err != REG_NOERROR, 0))
2414	    return NULL;
2415	  next_state = mctx->state_log[cur_idx];
2416	}
2417    }
2418
2419  return next_state;
2420}
2421
2422/* Skip bytes in the input that correspond to part of a
2423   multi-byte match, then look in the log for a state
2424   from which to restart matching.  */
2425static re_dfastate_t *
2426internal_function
2427find_recover_state (reg_errcode_t *err, re_match_context_t *mctx)
2428{
2429  re_dfastate_t *cur_state;
2430  do
2431    {
2432      Idx max = mctx->state_log_top;
2433      Idx cur_str_idx = re_string_cur_idx (&mctx->input);
2434
2435      do
2436	{
2437	  if (++cur_str_idx > max)
2438	    return NULL;
2439	  re_string_skip_bytes (&mctx->input, 1);
2440	}
2441      while (mctx->state_log[cur_str_idx] == NULL);
2442
2443      cur_state = merge_state_with_log (err, mctx, NULL);
2444    }
2445  while (*err == REG_NOERROR && cur_state == NULL);
2446  return cur_state;
2447}
2448
2449/* Helper functions for transit_state.  */
2450
2451/* From the node set CUR_NODES, pick up the nodes whose types are
2452   OP_OPEN_SUBEXP and which have corresponding back references in the regular
2453   expression. And register them to use them later for evaluating the
2454   corresponding back references.  */
2455
2456static reg_errcode_t
2457internal_function
2458check_subexp_matching_top (re_match_context_t *mctx, re_node_set *cur_nodes,
2459			   Idx str_idx)
2460{
2461  const re_dfa_t *const dfa = mctx->dfa;
2462  Idx node_idx;
2463  reg_errcode_t err;
2464
2465  /* TODO: This isn't efficient.
2466	   Because there might be more than one nodes whose types are
2467	   OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all
2468	   nodes.
2469	   E.g. RE: (a){2}  */
2470  for (node_idx = 0; node_idx < cur_nodes->nelem; ++node_idx)
2471    {
2472      Idx node = cur_nodes->elems[node_idx];
2473      if (dfa->nodes[node].type == OP_OPEN_SUBEXP
2474	  && dfa->nodes[node].opr.idx < BITSET_WORD_BITS
2475	  && (dfa->used_bkref_map
2476	      & ((bitset_word_t) 1 << dfa->nodes[node].opr.idx)))
2477	{
2478	  err = match_ctx_add_subtop (mctx, node, str_idx);
2479	  if (BE (err != REG_NOERROR, 0))
2480	    return err;
2481	}
2482    }
2483  return REG_NOERROR;
2484}
2485
2486#if 0
2487/* Return the next state to which the current state STATE will transit by
2488   accepting the current input byte.  */
2489
2490static re_dfastate_t *
2491transit_state_sb (reg_errcode_t *err, re_match_context_t *mctx,
2492		  re_dfastate_t *state)
2493{
2494  const re_dfa_t *const dfa = mctx->dfa;
2495  re_node_set next_nodes;
2496  re_dfastate_t *next_state;
2497  Idx node_cnt, cur_str_idx = re_string_cur_idx (&mctx->input);
2498  unsigned int context;
2499
2500  *err = re_node_set_alloc (&next_nodes, state->nodes.nelem + 1);
2501  if (BE (*err != REG_NOERROR, 0))
2502    return NULL;
2503  for (node_cnt = 0; node_cnt < state->nodes.nelem; ++node_cnt)
2504    {
2505      Idx cur_node = state->nodes.elems[node_cnt];
2506      if (check_node_accept (mctx, dfa->nodes + cur_node, cur_str_idx))
2507	{
2508	  *err = re_node_set_merge (&next_nodes,
2509				    dfa->eclosures + dfa->nexts[cur_node]);
2510	  if (BE (*err != REG_NOERROR, 0))
2511	    {
2512	      re_node_set_free (&next_nodes);
2513	      return NULL;
2514	    }
2515	}
2516    }
2517  context = re_string_context_at (&mctx->input, cur_str_idx, mctx->eflags);
2518  next_state = re_acquire_state_context (err, dfa, &next_nodes, context);
2519  /* We don't need to check errors here, since the return value of
2520     this function is next_state and ERR is already set.  */
2521
2522  re_node_set_free (&next_nodes);
2523  re_string_skip_bytes (&mctx->input, 1);
2524  return next_state;
2525}
2526#endif
2527
2528#ifdef RE_ENABLE_I18N
2529static reg_errcode_t
2530internal_function
2531transit_state_mb (re_match_context_t *mctx, re_dfastate_t *pstate)
2532{
2533  const re_dfa_t *const dfa = mctx->dfa;
2534  reg_errcode_t err;
2535  Idx i;
2536
2537  for (i = 0; i < pstate->nodes.nelem; ++i)
2538    {
2539      re_node_set dest_nodes, *new_nodes;
2540      Idx cur_node_idx = pstate->nodes.elems[i];
2541      int naccepted;
2542      Idx dest_idx;
2543      unsigned int context;
2544      re_dfastate_t *dest_state;
2545
2546      if (!dfa->nodes[cur_node_idx].accept_mb)
2547	continue;
2548
2549      if (dfa->nodes[cur_node_idx].constraint)
2550	{
2551	  context = re_string_context_at (&mctx->input,
2552					  re_string_cur_idx (&mctx->input),
2553					  mctx->eflags);
2554	  if (NOT_SATISFY_NEXT_CONSTRAINT (dfa->nodes[cur_node_idx].constraint,
2555					   context))
2556	    continue;
2557	}
2558
2559      /* How many bytes the node can accept?  */
2560      naccepted = check_node_accept_bytes (dfa, cur_node_idx, &mctx->input,
2561					   re_string_cur_idx (&mctx->input));
2562      if (naccepted == 0)
2563	continue;
2564
2565      /* The node can accepts 'naccepted' bytes.  */
2566      dest_idx = re_string_cur_idx (&mctx->input) + naccepted;
2567      mctx->max_mb_elem_len = ((mctx->max_mb_elem_len < naccepted) ? naccepted
2568			       : mctx->max_mb_elem_len);
2569      err = clean_state_log_if_needed (mctx, dest_idx);
2570      if (BE (err != REG_NOERROR, 0))
2571	return err;
2572#ifdef DEBUG
2573      assert (dfa->nexts[cur_node_idx] != REG_MISSING);
2574#endif
2575      new_nodes = dfa->eclosures + dfa->nexts[cur_node_idx];
2576
2577      dest_state = mctx->state_log[dest_idx];
2578      if (dest_state == NULL)
2579	dest_nodes = *new_nodes;
2580      else
2581	{
2582	  err = re_node_set_init_union (&dest_nodes,
2583					dest_state->entrance_nodes, new_nodes);
2584	  if (BE (err != REG_NOERROR, 0))
2585	    return err;
2586	}
2587      context = re_string_context_at (&mctx->input, dest_idx - 1,
2588				      mctx->eflags);
2589      mctx->state_log[dest_idx]
2590	= re_acquire_state_context (&err, dfa, &dest_nodes, context);
2591      if (dest_state != NULL)
2592	re_node_set_free (&dest_nodes);
2593      if (BE (mctx->state_log[dest_idx] == NULL && err != REG_NOERROR, 0))
2594	return err;
2595    }
2596  return REG_NOERROR;
2597}
2598#endif /* RE_ENABLE_I18N */
2599
2600static reg_errcode_t
2601internal_function
2602transit_state_bkref (re_match_context_t *mctx, const re_node_set *nodes)
2603{
2604  const re_dfa_t *const dfa = mctx->dfa;
2605  reg_errcode_t err;
2606  Idx i;
2607  Idx cur_str_idx = re_string_cur_idx (&mctx->input);
2608
2609  for (i = 0; i < nodes->nelem; ++i)
2610    {
2611      Idx dest_str_idx, prev_nelem, bkc_idx;
2612      Idx node_idx = nodes->elems[i];
2613      unsigned int context;
2614      const re_token_t *node = dfa->nodes + node_idx;
2615      re_node_set *new_dest_nodes;
2616
2617      /* Check whether 'node' is a backreference or not.  */
2618      if (node->type != OP_BACK_REF)
2619	continue;
2620
2621      if (node->constraint)
2622	{
2623	  context = re_string_context_at (&mctx->input, cur_str_idx,
2624					  mctx->eflags);
2625	  if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context))
2626	    continue;
2627	}
2628
2629      /* 'node' is a backreference.
2630	 Check the substring which the substring matched.  */
2631      bkc_idx = mctx->nbkref_ents;
2632      err = get_subexp (mctx, node_idx, cur_str_idx);
2633      if (BE (err != REG_NOERROR, 0))
2634	goto free_return;
2635
2636      /* And add the epsilon closures (which is 'new_dest_nodes') of
2637	 the backreference to appropriate state_log.  */
2638#ifdef DEBUG
2639      assert (dfa->nexts[node_idx] != REG_MISSING);
2640#endif
2641      for (; bkc_idx < mctx->nbkref_ents; ++bkc_idx)
2642	{
2643	  Idx subexp_len;
2644	  re_dfastate_t *dest_state;
2645	  struct re_backref_cache_entry *bkref_ent;
2646	  bkref_ent = mctx->bkref_ents + bkc_idx;
2647	  if (bkref_ent->node != node_idx || bkref_ent->str_idx != cur_str_idx)
2648	    continue;
2649	  subexp_len = bkref_ent->subexp_to - bkref_ent->subexp_from;
2650	  new_dest_nodes = (subexp_len == 0
2651			    ? dfa->eclosures + dfa->edests[node_idx].elems[0]
2652			    : dfa->eclosures + dfa->nexts[node_idx]);
2653	  dest_str_idx = (cur_str_idx + bkref_ent->subexp_to
2654			  - bkref_ent->subexp_from);
2655	  context = re_string_context_at (&mctx->input, dest_str_idx - 1,
2656					  mctx->eflags);
2657	  dest_state = mctx->state_log[dest_str_idx];
2658	  prev_nelem = ((mctx->state_log[cur_str_idx] == NULL) ? 0
2659			: mctx->state_log[cur_str_idx]->nodes.nelem);
2660	  /* Add 'new_dest_node' to state_log.  */
2661	  if (dest_state == NULL)
2662	    {
2663	      mctx->state_log[dest_str_idx]
2664		= re_acquire_state_context (&err, dfa, new_dest_nodes,
2665					    context);
2666	      if (BE (mctx->state_log[dest_str_idx] == NULL
2667		      && err != REG_NOERROR, 0))
2668		goto free_return;
2669	    }
2670	  else
2671	    {
2672	      re_node_set dest_nodes;
2673	      err = re_node_set_init_union (&dest_nodes,
2674					    dest_state->entrance_nodes,
2675					    new_dest_nodes);
2676	      if (BE (err != REG_NOERROR, 0))
2677		{
2678		  re_node_set_free (&dest_nodes);
2679		  goto free_return;
2680		}
2681	      mctx->state_log[dest_str_idx]
2682		= re_acquire_state_context (&err, dfa, &dest_nodes, context);
2683	      re_node_set_free (&dest_nodes);
2684	      if (BE (mctx->state_log[dest_str_idx] == NULL
2685		      && err != REG_NOERROR, 0))
2686		goto free_return;
2687	    }
2688	  /* We need to check recursively if the backreference can epsilon
2689	     transit.  */
2690	  if (subexp_len == 0
2691	      && mctx->state_log[cur_str_idx]->nodes.nelem > prev_nelem)
2692	    {
2693	      err = check_subexp_matching_top (mctx, new_dest_nodes,
2694					       cur_str_idx);
2695	      if (BE (err != REG_NOERROR, 0))
2696		goto free_return;
2697	      err = transit_state_bkref (mctx, new_dest_nodes);
2698	      if (BE (err != REG_NOERROR, 0))
2699		goto free_return;
2700	    }
2701	}
2702    }
2703  err = REG_NOERROR;
2704 free_return:
2705  return err;
2706}
2707
2708/* Enumerate all the candidates which the backreference BKREF_NODE can match
2709   at BKREF_STR_IDX, and register them by match_ctx_add_entry().
2710   Note that we might collect inappropriate candidates here.
2711   However, the cost of checking them strictly here is too high, then we
2712   delay these checking for prune_impossible_nodes().  */
2713
2714static reg_errcode_t
2715internal_function __attribute_warn_unused_result__
2716get_subexp (re_match_context_t *mctx, Idx bkref_node, Idx bkref_str_idx)
2717{
2718  const re_dfa_t *const dfa = mctx->dfa;
2719  Idx subexp_num, sub_top_idx;
2720  const char *buf = (const char *) re_string_get_buffer (&mctx->input);
2721  /* Return if we have already checked BKREF_NODE at BKREF_STR_IDX.  */
2722  Idx cache_idx = search_cur_bkref_entry (mctx, bkref_str_idx);
2723  if (cache_idx != REG_MISSING)
2724    {
2725      const struct re_backref_cache_entry *entry
2726	= mctx->bkref_ents + cache_idx;
2727      do
2728	if (entry->node == bkref_node)
2729	  return REG_NOERROR; /* We already checked it.  */
2730      while (entry++->more);
2731    }
2732
2733  subexp_num = dfa->nodes[bkref_node].opr.idx;
2734
2735  /* For each sub expression  */
2736  for (sub_top_idx = 0; sub_top_idx < mctx->nsub_tops; ++sub_top_idx)
2737    {
2738      reg_errcode_t err;
2739      re_sub_match_top_t *sub_top = mctx->sub_tops[sub_top_idx];
2740      re_sub_match_last_t *sub_last;
2741      Idx sub_last_idx, sl_str, bkref_str_off;
2742
2743      if (dfa->nodes[sub_top->node].opr.idx != subexp_num)
2744	continue; /* It isn't related.  */
2745
2746      sl_str = sub_top->str_idx;
2747      bkref_str_off = bkref_str_idx;
2748      /* At first, check the last node of sub expressions we already
2749	 evaluated.  */
2750      for (sub_last_idx = 0; sub_last_idx < sub_top->nlasts; ++sub_last_idx)
2751	{
2752	  regoff_t sl_str_diff;
2753	  sub_last = sub_top->lasts[sub_last_idx];
2754	  sl_str_diff = sub_last->str_idx - sl_str;
2755	  /* The matched string by the sub expression match with the substring
2756	     at the back reference?  */
2757	  if (sl_str_diff > 0)
2758	    {
2759	      if (BE (bkref_str_off + sl_str_diff > mctx->input.valid_len, 0))
2760		{
2761		  /* Not enough chars for a successful match.  */
2762		  if (bkref_str_off + sl_str_diff > mctx->input.len)
2763		    break;
2764
2765		  err = clean_state_log_if_needed (mctx,
2766						   bkref_str_off
2767						   + sl_str_diff);
2768		  if (BE (err != REG_NOERROR, 0))
2769		    return err;
2770		  buf = (const char *) re_string_get_buffer (&mctx->input);
2771		}
2772	      if (memcmp (buf + bkref_str_off, buf + sl_str, sl_str_diff) != 0)
2773		/* We don't need to search this sub expression any more.  */
2774		break;
2775	    }
2776	  bkref_str_off += sl_str_diff;
2777	  sl_str += sl_str_diff;
2778	  err = get_subexp_sub (mctx, sub_top, sub_last, bkref_node,
2779				bkref_str_idx);
2780
2781	  /* Reload buf, since the preceding call might have reallocated
2782	     the buffer.  */
2783	  buf = (const char *) re_string_get_buffer (&mctx->input);
2784
2785	  if (err == REG_NOMATCH)
2786	    continue;
2787	  if (BE (err != REG_NOERROR, 0))
2788	    return err;
2789	}
2790
2791      if (sub_last_idx < sub_top->nlasts)
2792	continue;
2793      if (sub_last_idx > 0)
2794	++sl_str;
2795      /* Then, search for the other last nodes of the sub expression.  */
2796      for (; sl_str <= bkref_str_idx; ++sl_str)
2797	{
2798	  Idx cls_node;
2799	  regoff_t sl_str_off;
2800	  const re_node_set *nodes;
2801	  sl_str_off = sl_str - sub_top->str_idx;
2802	  /* The matched string by the sub expression match with the substring
2803	     at the back reference?  */
2804	  if (sl_str_off > 0)
2805	    {
2806	      if (BE (bkref_str_off >= mctx->input.valid_len, 0))
2807		{
2808		  /* If we are at the end of the input, we cannot match.  */
2809		  if (bkref_str_off >= mctx->input.len)
2810		    break;
2811
2812		  err = extend_buffers (mctx, bkref_str_off + 1);
2813		  if (BE (err != REG_NOERROR, 0))
2814		    return err;
2815
2816		  buf = (const char *) re_string_get_buffer (&mctx->input);
2817		}
2818	      if (buf [bkref_str_off++] != buf[sl_str - 1])
2819		break; /* We don't need to search this sub expression
2820			  any more.  */
2821	    }
2822	  if (mctx->state_log[sl_str] == NULL)
2823	    continue;
2824	  /* Does this state have a ')' of the sub expression?  */
2825	  nodes = &mctx->state_log[sl_str]->nodes;
2826	  cls_node = find_subexp_node (dfa, nodes, subexp_num,
2827				       OP_CLOSE_SUBEXP);
2828	  if (cls_node == REG_MISSING)
2829	    continue; /* No.  */
2830	  if (sub_top->path == NULL)
2831	    {
2832	      sub_top->path = calloc (sizeof (state_array_t),
2833				      sl_str - sub_top->str_idx + 1);
2834	      if (sub_top->path == NULL)
2835		return REG_ESPACE;
2836	    }
2837	  /* Can the OP_OPEN_SUBEXP node arrive the OP_CLOSE_SUBEXP node
2838	     in the current context?  */
2839	  err = check_arrival (mctx, sub_top->path, sub_top->node,
2840			       sub_top->str_idx, cls_node, sl_str,
2841			       OP_CLOSE_SUBEXP);
2842	  if (err == REG_NOMATCH)
2843	      continue;
2844	  if (BE (err != REG_NOERROR, 0))
2845	      return err;
2846	  sub_last = match_ctx_add_sublast (sub_top, cls_node, sl_str);
2847	  if (BE (sub_last == NULL, 0))
2848	    return REG_ESPACE;
2849	  err = get_subexp_sub (mctx, sub_top, sub_last, bkref_node,
2850				bkref_str_idx);
2851	  if (err == REG_NOMATCH)
2852	    continue;
2853	}
2854    }
2855  return REG_NOERROR;
2856}
2857
2858/* Helper functions for get_subexp().  */
2859
2860/* Check SUB_LAST can arrive to the back reference BKREF_NODE at BKREF_STR.
2861   If it can arrive, register the sub expression expressed with SUB_TOP
2862   and SUB_LAST.  */
2863
2864static reg_errcode_t
2865internal_function
2866get_subexp_sub (re_match_context_t *mctx, const re_sub_match_top_t *sub_top,
2867		re_sub_match_last_t *sub_last, Idx bkref_node, Idx bkref_str)
2868{
2869  reg_errcode_t err;
2870  Idx to_idx;
2871  /* Can the subexpression arrive the back reference?  */
2872  err = check_arrival (mctx, &sub_last->path, sub_last->node,
2873		       sub_last->str_idx, bkref_node, bkref_str,
2874		       OP_OPEN_SUBEXP);
2875  if (err != REG_NOERROR)
2876    return err;
2877  err = match_ctx_add_entry (mctx, bkref_node, bkref_str, sub_top->str_idx,
2878			     sub_last->str_idx);
2879  if (BE (err != REG_NOERROR, 0))
2880    return err;
2881  to_idx = bkref_str + sub_last->str_idx - sub_top->str_idx;
2882  return clean_state_log_if_needed (mctx, to_idx);
2883}
2884
2885/* Find the first node which is '(' or ')' and whose index is SUBEXP_IDX.
2886   Search '(' if FL_OPEN, or search ')' otherwise.
2887   TODO: This function isn't efficient...
2888	 Because there might be more than one nodes whose types are
2889	 OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all
2890	 nodes.
2891	 E.g. RE: (a){2}  */
2892
2893static Idx
2894internal_function
2895find_subexp_node (const re_dfa_t *dfa, const re_node_set *nodes,
2896		  Idx subexp_idx, int type)
2897{
2898  Idx cls_idx;
2899  for (cls_idx = 0; cls_idx < nodes->nelem; ++cls_idx)
2900    {
2901      Idx cls_node = nodes->elems[cls_idx];
2902      const re_token_t *node = dfa->nodes + cls_node;
2903      if (node->type == type
2904	  && node->opr.idx == subexp_idx)
2905	return cls_node;
2906    }
2907  return REG_MISSING;
2908}
2909
2910/* Check whether the node TOP_NODE at TOP_STR can arrive to the node
2911   LAST_NODE at LAST_STR.  We record the path onto PATH since it will be
2912   heavily reused.
2913   Return REG_NOERROR if it can arrive, or REG_NOMATCH otherwise.  */
2914
2915static reg_errcode_t
2916internal_function __attribute_warn_unused_result__
2917check_arrival (re_match_context_t *mctx, state_array_t *path, Idx top_node,
2918	       Idx top_str, Idx last_node, Idx last_str, int type)
2919{
2920  const re_dfa_t *const dfa = mctx->dfa;
2921  reg_errcode_t err = REG_NOERROR;
2922  Idx subexp_num, backup_cur_idx, str_idx, null_cnt;
2923  re_dfastate_t *cur_state = NULL;
2924  re_node_set *cur_nodes, next_nodes;
2925  re_dfastate_t **backup_state_log;
2926  unsigned int context;
2927
2928  subexp_num = dfa->nodes[top_node].opr.idx;
2929  /* Extend the buffer if we need.  */
2930  if (BE (path->alloc < last_str + mctx->max_mb_elem_len + 1, 0))
2931    {
2932      re_dfastate_t **new_array;
2933      Idx old_alloc = path->alloc;
2934      Idx incr_alloc = last_str + mctx->max_mb_elem_len + 1;
2935      Idx new_alloc;
2936      if (BE (IDX_MAX - old_alloc < incr_alloc, 0))
2937	return REG_ESPACE;
2938      new_alloc = old_alloc + incr_alloc;
2939      if (BE (SIZE_MAX / sizeof (re_dfastate_t *) < new_alloc, 0))
2940	return REG_ESPACE;
2941      new_array = re_realloc (path->array, re_dfastate_t *, new_alloc);
2942      if (BE (new_array == NULL, 0))
2943	return REG_ESPACE;
2944      path->array = new_array;
2945      path->alloc = new_alloc;
2946      memset (new_array + old_alloc, '\0',
2947	      sizeof (re_dfastate_t *) * (path->alloc - old_alloc));
2948    }
2949
2950  str_idx = path->next_idx ? path->next_idx : top_str;
2951
2952  /* Temporary modify MCTX.  */
2953  backup_state_log = mctx->state_log;
2954  backup_cur_idx = mctx->input.cur_idx;
2955  mctx->state_log = path->array;
2956  mctx->input.cur_idx = str_idx;
2957
2958  /* Setup initial node set.  */
2959  context = re_string_context_at (&mctx->input, str_idx - 1, mctx->eflags);
2960  if (str_idx == top_str)
2961    {
2962      err = re_node_set_init_1 (&next_nodes, top_node);
2963      if (BE (err != REG_NOERROR, 0))
2964	return err;
2965      err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, type);
2966      if (BE (err != REG_NOERROR, 0))
2967	{
2968	  re_node_set_free (&next_nodes);
2969	  return err;
2970	}
2971    }
2972  else
2973    {
2974      cur_state = mctx->state_log[str_idx];
2975      if (cur_state && cur_state->has_backref)
2976	{
2977	  err = re_node_set_init_copy (&next_nodes, &cur_state->nodes);
2978	  if (BE (err != REG_NOERROR, 0))
2979	    return err;
2980	}
2981      else
2982	re_node_set_init_empty (&next_nodes);
2983    }
2984  if (str_idx == top_str || (cur_state && cur_state->has_backref))
2985    {
2986      if (next_nodes.nelem)
2987	{
2988	  err = expand_bkref_cache (mctx, &next_nodes, str_idx,
2989				    subexp_num, type);
2990	  if (BE (err != REG_NOERROR, 0))
2991	    {
2992	      re_node_set_free (&next_nodes);
2993	      return err;
2994	    }
2995	}
2996      cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context);
2997      if (BE (cur_state == NULL && err != REG_NOERROR, 0))
2998	{
2999	  re_node_set_free (&next_nodes);
3000	  return err;
3001	}
3002      mctx->state_log[str_idx] = cur_state;
3003    }
3004
3005  for (null_cnt = 0; str_idx < last_str && null_cnt <= mctx->max_mb_elem_len;)
3006    {
3007      re_node_set_empty (&next_nodes);
3008      if (mctx->state_log[str_idx + 1])
3009	{
3010	  err = re_node_set_merge (&next_nodes,
3011				   &mctx->state_log[str_idx + 1]->nodes);
3012	  if (BE (err != REG_NOERROR, 0))
3013	    {
3014	      re_node_set_free (&next_nodes);
3015	      return err;
3016	    }
3017	}
3018      if (cur_state)
3019	{
3020	  err = check_arrival_add_next_nodes (mctx, str_idx,
3021					      &cur_state->non_eps_nodes,
3022					      &next_nodes);
3023	  if (BE (err != REG_NOERROR, 0))
3024	    {
3025	      re_node_set_free (&next_nodes);
3026	      return err;
3027	    }
3028	}
3029      ++str_idx;
3030      if (next_nodes.nelem)
3031	{
3032	  err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, type);
3033	  if (BE (err != REG_NOERROR, 0))
3034	    {
3035	      re_node_set_free (&next_nodes);
3036	      return err;
3037	    }
3038	  err = expand_bkref_cache (mctx, &next_nodes, str_idx,
3039				    subexp_num, type);
3040	  if (BE (err != REG_NOERROR, 0))
3041	    {
3042	      re_node_set_free (&next_nodes);
3043	      return err;
3044	    }
3045	}
3046      context = re_string_context_at (&mctx->input, str_idx - 1, mctx->eflags);
3047      cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context);
3048      if (BE (cur_state == NULL && err != REG_NOERROR, 0))
3049	{
3050	  re_node_set_free (&next_nodes);
3051	  return err;
3052	}
3053      mctx->state_log[str_idx] = cur_state;
3054      null_cnt = cur_state == NULL ? null_cnt + 1 : 0;
3055    }
3056  re_node_set_free (&next_nodes);
3057  cur_nodes = (mctx->state_log[last_str] == NULL ? NULL
3058	       : &mctx->state_log[last_str]->nodes);
3059  path->next_idx = str_idx;
3060
3061  /* Fix MCTX.  */
3062  mctx->state_log = backup_state_log;
3063  mctx->input.cur_idx = backup_cur_idx;
3064
3065  /* Then check the current node set has the node LAST_NODE.  */
3066  if (cur_nodes != NULL && re_node_set_contains (cur_nodes, last_node))
3067    return REG_NOERROR;
3068
3069  return REG_NOMATCH;
3070}
3071
3072/* Helper functions for check_arrival.  */
3073
3074/* Calculate the destination nodes of CUR_NODES at STR_IDX, and append them
3075   to NEXT_NODES.
3076   TODO: This function is similar to the functions transit_state*(),
3077	 however this function has many additional works.
3078	 Can't we unify them?  */
3079
3080static reg_errcode_t
3081internal_function __attribute_warn_unused_result__
3082check_arrival_add_next_nodes (re_match_context_t *mctx, Idx str_idx,
3083			      re_node_set *cur_nodes, re_node_set *next_nodes)
3084{
3085  const re_dfa_t *const dfa = mctx->dfa;
3086  bool ok;
3087  Idx cur_idx;
3088#ifdef RE_ENABLE_I18N
3089  reg_errcode_t err = REG_NOERROR;
3090#endif
3091  re_node_set union_set;
3092  re_node_set_init_empty (&union_set);
3093  for (cur_idx = 0; cur_idx < cur_nodes->nelem; ++cur_idx)
3094    {
3095      int naccepted = 0;
3096      Idx cur_node = cur_nodes->elems[cur_idx];
3097#ifdef DEBUG
3098      re_token_type_t type = dfa->nodes[cur_node].type;
3099      assert (!IS_EPSILON_NODE (type));
3100#endif
3101#ifdef RE_ENABLE_I18N
3102      /* If the node may accept "multi byte".  */
3103      if (dfa->nodes[cur_node].accept_mb)
3104	{
3105	  naccepted = check_node_accept_bytes (dfa, cur_node, &mctx->input,
3106					       str_idx);
3107	  if (naccepted > 1)
3108	    {
3109	      re_dfastate_t *dest_state;
3110	      Idx next_node = dfa->nexts[cur_node];
3111	      Idx next_idx = str_idx + naccepted;
3112	      dest_state = mctx->state_log[next_idx];
3113	      re_node_set_empty (&union_set);
3114	      if (dest_state)
3115		{
3116		  err = re_node_set_merge (&union_set, &dest_state->nodes);
3117		  if (BE (err != REG_NOERROR, 0))
3118		    {
3119		      re_node_set_free (&union_set);
3120		      return err;
3121		    }
3122		}
3123	      ok = re_node_set_insert (&union_set, next_node);
3124	      if (BE (! ok, 0))
3125		{
3126		  re_node_set_free (&union_set);
3127		  return REG_ESPACE;
3128		}
3129	      mctx->state_log[next_idx] = re_acquire_state (&err, dfa,
3130							    &union_set);
3131	      if (BE (mctx->state_log[next_idx] == NULL
3132		      && err != REG_NOERROR, 0))
3133		{
3134		  re_node_set_free (&union_set);
3135		  return err;
3136		}
3137	    }
3138	}
3139#endif /* RE_ENABLE_I18N */
3140      if (naccepted
3141	  || check_node_accept (mctx, dfa->nodes + cur_node, str_idx))
3142	{
3143	  ok = re_node_set_insert (next_nodes, dfa->nexts[cur_node]);
3144	  if (BE (! ok, 0))
3145	    {
3146	      re_node_set_free (&union_set);
3147	      return REG_ESPACE;
3148	    }
3149	}
3150    }
3151  re_node_set_free (&union_set);
3152  return REG_NOERROR;
3153}
3154
3155/* For all the nodes in CUR_NODES, add the epsilon closures of them to
3156   CUR_NODES, however exclude the nodes which are:
3157    - inside the sub expression whose number is EX_SUBEXP, if FL_OPEN.
3158    - out of the sub expression whose number is EX_SUBEXP, if !FL_OPEN.
3159*/
3160
3161static reg_errcode_t
3162internal_function
3163check_arrival_expand_ecl (const re_dfa_t *dfa, re_node_set *cur_nodes,
3164			  Idx ex_subexp, int type)
3165{
3166  reg_errcode_t err;
3167  Idx idx, outside_node;
3168  re_node_set new_nodes;
3169#ifdef DEBUG
3170  assert (cur_nodes->nelem);
3171#endif
3172  err = re_node_set_alloc (&new_nodes, cur_nodes->nelem);
3173  if (BE (err != REG_NOERROR, 0))
3174    return err;
3175  /* Create a new node set NEW_NODES with the nodes which are epsilon
3176     closures of the node in CUR_NODES.  */
3177
3178  for (idx = 0; idx < cur_nodes->nelem; ++idx)
3179    {
3180      Idx cur_node = cur_nodes->elems[idx];
3181      const re_node_set *eclosure = dfa->eclosures + cur_node;
3182      outside_node = find_subexp_node (dfa, eclosure, ex_subexp, type);
3183      if (outside_node == REG_MISSING)
3184	{
3185	  /* There are no problematic nodes, just merge them.  */
3186	  err = re_node_set_merge (&new_nodes, eclosure);
3187	  if (BE (err != REG_NOERROR, 0))
3188	    {
3189	      re_node_set_free (&new_nodes);
3190	      return err;
3191	    }
3192	}
3193      else
3194	{
3195	  /* There are problematic nodes, re-calculate incrementally.  */
3196	  err = check_arrival_expand_ecl_sub (dfa, &new_nodes, cur_node,
3197					      ex_subexp, type);
3198	  if (BE (err != REG_NOERROR, 0))
3199	    {
3200	      re_node_set_free (&new_nodes);
3201	      return err;
3202	    }
3203	}
3204    }
3205  re_node_set_free (cur_nodes);
3206  *cur_nodes = new_nodes;
3207  return REG_NOERROR;
3208}
3209
3210/* Helper function for check_arrival_expand_ecl.
3211   Check incrementally the epsilon closure of TARGET, and if it isn't
3212   problematic append it to DST_NODES.  */
3213
3214static reg_errcode_t
3215internal_function __attribute_warn_unused_result__
3216check_arrival_expand_ecl_sub (const re_dfa_t *dfa, re_node_set *dst_nodes,
3217			      Idx target, Idx ex_subexp, int type)
3218{
3219  Idx cur_node;
3220  for (cur_node = target; !re_node_set_contains (dst_nodes, cur_node);)
3221    {
3222      bool ok;
3223
3224      if (dfa->nodes[cur_node].type == type
3225	  && dfa->nodes[cur_node].opr.idx == ex_subexp)
3226	{
3227	  if (type == OP_CLOSE_SUBEXP)
3228	    {
3229	      ok = re_node_set_insert (dst_nodes, cur_node);
3230	      if (BE (! ok, 0))
3231		return REG_ESPACE;
3232	    }
3233	  break;
3234	}
3235      ok = re_node_set_insert (dst_nodes, cur_node);
3236      if (BE (! ok, 0))
3237	return REG_ESPACE;
3238      if (dfa->edests[cur_node].nelem == 0)
3239	break;
3240      if (dfa->edests[cur_node].nelem == 2)
3241	{
3242	  reg_errcode_t err;
3243	  err = check_arrival_expand_ecl_sub (dfa, dst_nodes,
3244					      dfa->edests[cur_node].elems[1],
3245					      ex_subexp, type);
3246	  if (BE (err != REG_NOERROR, 0))
3247	    return err;
3248	}
3249      cur_node = dfa->edests[cur_node].elems[0];
3250    }
3251  return REG_NOERROR;
3252}
3253
3254
3255/* For all the back references in the current state, calculate the
3256   destination of the back references by the appropriate entry
3257   in MCTX->BKREF_ENTS.  */
3258
3259static reg_errcode_t
3260internal_function __attribute_warn_unused_result__
3261expand_bkref_cache (re_match_context_t *mctx, re_node_set *cur_nodes,
3262		    Idx cur_str, Idx subexp_num, int type)
3263{
3264  const re_dfa_t *const dfa = mctx->dfa;
3265  reg_errcode_t err;
3266  Idx cache_idx_start = search_cur_bkref_entry (mctx, cur_str);
3267  struct re_backref_cache_entry *ent;
3268
3269  if (cache_idx_start == REG_MISSING)
3270    return REG_NOERROR;
3271
3272 restart:
3273  ent = mctx->bkref_ents + cache_idx_start;
3274  do
3275    {
3276      Idx to_idx, next_node;
3277
3278      /* Is this entry ENT is appropriate?  */
3279      if (!re_node_set_contains (cur_nodes, ent->node))
3280	continue; /* No.  */
3281
3282      to_idx = cur_str + ent->subexp_to - ent->subexp_from;
3283      /* Calculate the destination of the back reference, and append it
3284	 to MCTX->STATE_LOG.  */
3285      if (to_idx == cur_str)
3286	{
3287	  /* The backreference did epsilon transit, we must re-check all the
3288	     node in the current state.  */
3289	  re_node_set new_dests;
3290	  reg_errcode_t err2, err3;
3291	  next_node = dfa->edests[ent->node].elems[0];
3292	  if (re_node_set_contains (cur_nodes, next_node))
3293	    continue;
3294	  err = re_node_set_init_1 (&new_dests, next_node);
3295	  err2 = check_arrival_expand_ecl (dfa, &new_dests, subexp_num, type);
3296	  err3 = re_node_set_merge (cur_nodes, &new_dests);
3297	  re_node_set_free (&new_dests);
3298	  if (BE (err != REG_NOERROR || err2 != REG_NOERROR
3299		  || err3 != REG_NOERROR, 0))
3300	    {
3301	      err = (err != REG_NOERROR ? err
3302		     : (err2 != REG_NOERROR ? err2 : err3));
3303	      return err;
3304	    }
3305	  /* TODO: It is still inefficient...  */
3306	  goto restart;
3307	}
3308      else
3309	{
3310	  re_node_set union_set;
3311	  next_node = dfa->nexts[ent->node];
3312	  if (mctx->state_log[to_idx])
3313	    {
3314	      bool ok;
3315	      if (re_node_set_contains (&mctx->state_log[to_idx]->nodes,
3316					next_node))
3317		continue;
3318	      err = re_node_set_init_copy (&union_set,
3319					   &mctx->state_log[to_idx]->nodes);
3320	      ok = re_node_set_insert (&union_set, next_node);
3321	      if (BE (err != REG_NOERROR || ! ok, 0))
3322		{
3323		  re_node_set_free (&union_set);
3324		  err = err != REG_NOERROR ? err : REG_ESPACE;
3325		  return err;
3326		}
3327	    }
3328	  else
3329	    {
3330	      err = re_node_set_init_1 (&union_set, next_node);
3331	      if (BE (err != REG_NOERROR, 0))
3332		return err;
3333	    }
3334	  mctx->state_log[to_idx] = re_acquire_state (&err, dfa, &union_set);
3335	  re_node_set_free (&union_set);
3336	  if (BE (mctx->state_log[to_idx] == NULL
3337		  && err != REG_NOERROR, 0))
3338	    return err;
3339	}
3340    }
3341  while (ent++->more);
3342  return REG_NOERROR;
3343}
3344
3345/* Build transition table for the state.
3346   Return true if successful.  */
3347
3348static bool
3349internal_function
3350build_trtable (const re_dfa_t *dfa, re_dfastate_t *state)
3351{
3352  reg_errcode_t err;
3353  Idx i, j;
3354  int ch;
3355  bool need_word_trtable = false;
3356  bitset_word_t elem, mask;
3357  bool dests_node_malloced = false;
3358  bool dest_states_malloced = false;
3359  Idx ndests; /* Number of the destination states from 'state'.  */
3360  re_dfastate_t **trtable;
3361  re_dfastate_t **dest_states = NULL, **dest_states_word, **dest_states_nl;
3362  re_node_set follows, *dests_node;
3363  bitset_t *dests_ch;
3364  bitset_t acceptable;
3365
3366  struct dests_alloc
3367  {
3368    re_node_set dests_node[SBC_MAX];
3369    bitset_t dests_ch[SBC_MAX];
3370  } *dests_alloc;
3371
3372  /* We build DFA states which corresponds to the destination nodes
3373     from 'state'.  'dests_node[i]' represents the nodes which i-th
3374     destination state contains, and 'dests_ch[i]' represents the
3375     characters which i-th destination state accepts.  */
3376  if (__libc_use_alloca (sizeof (struct dests_alloc)))
3377    dests_alloc = (struct dests_alloc *) alloca (sizeof (struct dests_alloc));
3378  else
3379    {
3380      dests_alloc = re_malloc (struct dests_alloc, 1);
3381      if (BE (dests_alloc == NULL, 0))
3382	return false;
3383      dests_node_malloced = true;
3384    }
3385  dests_node = dests_alloc->dests_node;
3386  dests_ch = dests_alloc->dests_ch;
3387
3388  /* Initialize transition table.  */
3389  state->word_trtable = state->trtable = NULL;
3390
3391  /* At first, group all nodes belonging to 'state' into several
3392     destinations.  */
3393  ndests = group_nodes_into_DFAstates (dfa, state, dests_node, dests_ch);
3394  if (BE (! REG_VALID_NONZERO_INDEX (ndests), 0))
3395    {
3396      if (dests_node_malloced)
3397	free (dests_alloc);
3398      /* Return false in case of an error, true otherwise.  */
3399      if (ndests == 0)
3400	{
3401	  state->trtable = (re_dfastate_t **)
3402	    calloc (sizeof (re_dfastate_t *), SBC_MAX);
3403          if (BE (state->trtable == NULL, 0))
3404            return false;
3405	  return true;
3406	}
3407      return false;
3408    }
3409
3410  err = re_node_set_alloc (&follows, ndests + 1);
3411  if (BE (err != REG_NOERROR, 0))
3412    goto out_free;
3413
3414  /* Avoid arithmetic overflow in size calculation.  */
3415  if (BE ((((SIZE_MAX - (sizeof (re_node_set) + sizeof (bitset_t)) * SBC_MAX)
3416	    / (3 * sizeof (re_dfastate_t *)))
3417	   < ndests),
3418	  0))
3419    goto out_free;
3420
3421  if (__libc_use_alloca ((sizeof (re_node_set) + sizeof (bitset_t)) * SBC_MAX
3422			 + ndests * 3 * sizeof (re_dfastate_t *)))
3423    dest_states = (re_dfastate_t **)
3424      alloca (ndests * 3 * sizeof (re_dfastate_t *));
3425  else
3426    {
3427      dest_states = (re_dfastate_t **)
3428	malloc (ndests * 3 * sizeof (re_dfastate_t *));
3429      if (BE (dest_states == NULL, 0))
3430	{
3431out_free:
3432	  if (dest_states_malloced)
3433	    free (dest_states);
3434	  re_node_set_free (&follows);
3435	  for (i = 0; i < ndests; ++i)
3436	    re_node_set_free (dests_node + i);
3437	  if (dests_node_malloced)
3438	    free (dests_alloc);
3439	  return false;
3440	}
3441      dest_states_malloced = true;
3442    }
3443  dest_states_word = dest_states + ndests;
3444  dest_states_nl = dest_states_word + ndests;
3445  bitset_empty (acceptable);
3446
3447  /* Then build the states for all destinations.  */
3448  for (i = 0; i < ndests; ++i)
3449    {
3450      Idx next_node;
3451      re_node_set_empty (&follows);
3452      /* Merge the follows of this destination states.  */
3453      for (j = 0; j < dests_node[i].nelem; ++j)
3454	{
3455	  next_node = dfa->nexts[dests_node[i].elems[j]];
3456	  if (next_node != REG_MISSING)
3457	    {
3458	      err = re_node_set_merge (&follows, dfa->eclosures + next_node);
3459	      if (BE (err != REG_NOERROR, 0))
3460		goto out_free;
3461	    }
3462	}
3463      dest_states[i] = re_acquire_state_context (&err, dfa, &follows, 0);
3464      if (BE (dest_states[i] == NULL && err != REG_NOERROR, 0))
3465	goto out_free;
3466      /* If the new state has context constraint,
3467	 build appropriate states for these contexts.  */
3468      if (dest_states[i]->has_constraint)
3469	{
3470	  dest_states_word[i] = re_acquire_state_context (&err, dfa, &follows,
3471							  CONTEXT_WORD);
3472	  if (BE (dest_states_word[i] == NULL && err != REG_NOERROR, 0))
3473	    goto out_free;
3474
3475	  if (dest_states[i] != dest_states_word[i] && dfa->mb_cur_max > 1)
3476	    need_word_trtable = true;
3477
3478	  dest_states_nl[i] = re_acquire_state_context (&err, dfa, &follows,
3479							CONTEXT_NEWLINE);
3480	  if (BE (dest_states_nl[i] == NULL && err != REG_NOERROR, 0))
3481	    goto out_free;
3482	}
3483      else
3484	{
3485	  dest_states_word[i] = dest_states[i];
3486	  dest_states_nl[i] = dest_states[i];
3487	}
3488      bitset_merge (acceptable, dests_ch[i]);
3489    }
3490
3491  if (!BE (need_word_trtable, 0))
3492    {
3493      /* We don't care about whether the following character is a word
3494	 character, or we are in a single-byte character set so we can
3495	 discern by looking at the character code: allocate a
3496	 256-entry transition table.  */
3497      trtable = state->trtable =
3498	(re_dfastate_t **) calloc (sizeof (re_dfastate_t *), SBC_MAX);
3499      if (BE (trtable == NULL, 0))
3500	goto out_free;
3501
3502      /* For all characters ch...:  */
3503      for (i = 0; i < BITSET_WORDS; ++i)
3504	for (ch = i * BITSET_WORD_BITS, elem = acceptable[i], mask = 1;
3505	     elem;
3506	     mask <<= 1, elem >>= 1, ++ch)
3507	  if (BE (elem & 1, 0))
3508	    {
3509	      /* There must be exactly one destination which accepts
3510		 character ch.  See group_nodes_into_DFAstates.  */
3511	      for (j = 0; (dests_ch[j][i] & mask) == 0; ++j)
3512		;
3513
3514	      /* j-th destination accepts the word character ch.  */
3515	      if (dfa->word_char[i] & mask)
3516		trtable[ch] = dest_states_word[j];
3517	      else
3518		trtable[ch] = dest_states[j];
3519	    }
3520    }
3521  else
3522    {
3523      /* We care about whether the following character is a word
3524	 character, and we are in a multi-byte character set: discern
3525	 by looking at the character code: build two 256-entry
3526	 transition tables, one starting at trtable[0] and one
3527	 starting at trtable[SBC_MAX].  */
3528      trtable = state->word_trtable =
3529	(re_dfastate_t **) calloc (sizeof (re_dfastate_t *), 2 * SBC_MAX);
3530      if (BE (trtable == NULL, 0))
3531	goto out_free;
3532
3533      /* For all characters ch...:  */
3534      for (i = 0; i < BITSET_WORDS; ++i)
3535	for (ch = i * BITSET_WORD_BITS, elem = acceptable[i], mask = 1;
3536	     elem;
3537	     mask <<= 1, elem >>= 1, ++ch)
3538	  if (BE (elem & 1, 0))
3539	    {
3540	      /* There must be exactly one destination which accepts
3541		 character ch.  See group_nodes_into_DFAstates.  */
3542	      for (j = 0; (dests_ch[j][i] & mask) == 0; ++j)
3543		;
3544
3545	      /* j-th destination accepts the word character ch.  */
3546	      trtable[ch] = dest_states[j];
3547	      trtable[ch + SBC_MAX] = dest_states_word[j];
3548	    }
3549    }
3550
3551  /* new line */
3552  if (bitset_contain (acceptable, NEWLINE_CHAR))
3553    {
3554      /* The current state accepts newline character.  */
3555      for (j = 0; j < ndests; ++j)
3556	if (bitset_contain (dests_ch[j], NEWLINE_CHAR))
3557	  {
3558	    /* k-th destination accepts newline character.  */
3559	    trtable[NEWLINE_CHAR] = dest_states_nl[j];
3560	    if (need_word_trtable)
3561	      trtable[NEWLINE_CHAR + SBC_MAX] = dest_states_nl[j];
3562	    /* There must be only one destination which accepts
3563	       newline.  See group_nodes_into_DFAstates.  */
3564	    break;
3565	  }
3566    }
3567
3568  if (dest_states_malloced)
3569    free (dest_states);
3570
3571  re_node_set_free (&follows);
3572  for (i = 0; i < ndests; ++i)
3573    re_node_set_free (dests_node + i);
3574
3575  if (dests_node_malloced)
3576    free (dests_alloc);
3577
3578  return true;
3579}
3580
3581/* Group all nodes belonging to STATE into several destinations.
3582   Then for all destinations, set the nodes belonging to the destination
3583   to DESTS_NODE[i] and set the characters accepted by the destination
3584   to DEST_CH[i].  This function return the number of destinations.  */
3585
3586static Idx
3587internal_function
3588group_nodes_into_DFAstates (const re_dfa_t *dfa, const re_dfastate_t *state,
3589			    re_node_set *dests_node, bitset_t *dests_ch)
3590{
3591  reg_errcode_t err;
3592  bool ok;
3593  Idx i, j, k;
3594  Idx ndests; /* Number of the destinations from 'state'.  */
3595  bitset_t accepts; /* Characters a node can accept.  */
3596  const re_node_set *cur_nodes = &state->nodes;
3597  bitset_empty (accepts);
3598  ndests = 0;
3599
3600  /* For all the nodes belonging to 'state',  */
3601  for (i = 0; i < cur_nodes->nelem; ++i)
3602    {
3603      re_token_t *node = &dfa->nodes[cur_nodes->elems[i]];
3604      re_token_type_t type = node->type;
3605      unsigned int constraint = node->constraint;
3606
3607      /* Enumerate all single byte character this node can accept.  */
3608      if (type == CHARACTER)
3609	bitset_set (accepts, node->opr.c);
3610      else if (type == SIMPLE_BRACKET)
3611	{
3612	  bitset_merge (accepts, node->opr.sbcset);
3613	}
3614      else if (type == OP_PERIOD)
3615	{
3616#ifdef RE_ENABLE_I18N
3617	  if (dfa->mb_cur_max > 1)
3618	    bitset_merge (accepts, dfa->sb_char);
3619	  else
3620#endif
3621	    bitset_set_all (accepts);
3622	  if (!(dfa->syntax & RE_DOT_NEWLINE))
3623	    bitset_clear (accepts, '\n');
3624	  if (dfa->syntax & RE_DOT_NOT_NULL)
3625	    bitset_clear (accepts, '\0');
3626	}
3627#ifdef RE_ENABLE_I18N
3628      else if (type == OP_UTF8_PERIOD)
3629	{
3630	  if (ASCII_CHARS % BITSET_WORD_BITS == 0)
3631	    memset (accepts, -1, ASCII_CHARS / CHAR_BIT);
3632	  else
3633	    bitset_merge (accepts, utf8_sb_map);
3634	  if (!(dfa->syntax & RE_DOT_NEWLINE))
3635	    bitset_clear (accepts, '\n');
3636	  if (dfa->syntax & RE_DOT_NOT_NULL)
3637	    bitset_clear (accepts, '\0');
3638	}
3639#endif
3640      else
3641	continue;
3642
3643      /* Check the 'accepts' and sift the characters which are not
3644	 match it the context.  */
3645      if (constraint)
3646	{
3647	  if (constraint & NEXT_NEWLINE_CONSTRAINT)
3648	    {
3649	      bool accepts_newline = bitset_contain (accepts, NEWLINE_CHAR);
3650	      bitset_empty (accepts);
3651	      if (accepts_newline)
3652		bitset_set (accepts, NEWLINE_CHAR);
3653	      else
3654		continue;
3655	    }
3656	  if (constraint & NEXT_ENDBUF_CONSTRAINT)
3657	    {
3658	      bitset_empty (accepts);
3659	      continue;
3660	    }
3661
3662	  if (constraint & NEXT_WORD_CONSTRAINT)
3663	    {
3664	      bitset_word_t any_set = 0;
3665	      if (type == CHARACTER && !node->word_char)
3666		{
3667		  bitset_empty (accepts);
3668		  continue;
3669		}
3670#ifdef RE_ENABLE_I18N
3671	      if (dfa->mb_cur_max > 1)
3672		for (j = 0; j < BITSET_WORDS; ++j)
3673		  any_set |= (accepts[j] &= (dfa->word_char[j] | ~dfa->sb_char[j]));
3674	      else
3675#endif
3676		for (j = 0; j < BITSET_WORDS; ++j)
3677		  any_set |= (accepts[j] &= dfa->word_char[j]);
3678	      if (!any_set)
3679		continue;
3680	    }
3681	  if (constraint & NEXT_NOTWORD_CONSTRAINT)
3682	    {
3683	      bitset_word_t any_set = 0;
3684	      if (type == CHARACTER && node->word_char)
3685		{
3686		  bitset_empty (accepts);
3687		  continue;
3688		}
3689#ifdef RE_ENABLE_I18N
3690	      if (dfa->mb_cur_max > 1)
3691		for (j = 0; j < BITSET_WORDS; ++j)
3692		  any_set |= (accepts[j] &= ~(dfa->word_char[j] & dfa->sb_char[j]));
3693	      else
3694#endif
3695		for (j = 0; j < BITSET_WORDS; ++j)
3696		  any_set |= (accepts[j] &= ~dfa->word_char[j]);
3697	      if (!any_set)
3698		continue;
3699	    }
3700	}
3701
3702      /* Then divide 'accepts' into DFA states, or create a new
3703	 state.  Above, we make sure that accepts is not empty.  */
3704      for (j = 0; j < ndests; ++j)
3705	{
3706	  bitset_t intersec; /* Intersection sets, see below.  */
3707	  bitset_t remains;
3708	  /* Flags, see below.  */
3709	  bitset_word_t has_intersec, not_subset, not_consumed;
3710
3711	  /* Optimization, skip if this state doesn't accept the character.  */
3712	  if (type == CHARACTER && !bitset_contain (dests_ch[j], node->opr.c))
3713	    continue;
3714
3715	  /* Enumerate the intersection set of this state and 'accepts'.  */
3716	  has_intersec = 0;
3717	  for (k = 0; k < BITSET_WORDS; ++k)
3718	    has_intersec |= intersec[k] = accepts[k] & dests_ch[j][k];
3719	  /* And skip if the intersection set is empty.  */
3720	  if (!has_intersec)
3721	    continue;
3722
3723	  /* Then check if this state is a subset of 'accepts'.  */
3724	  not_subset = not_consumed = 0;
3725	  for (k = 0; k < BITSET_WORDS; ++k)
3726	    {
3727	      not_subset |= remains[k] = ~accepts[k] & dests_ch[j][k];
3728	      not_consumed |= accepts[k] = accepts[k] & ~dests_ch[j][k];
3729	    }
3730
3731	  /* If this state isn't a subset of 'accepts', create a
3732	     new group state, which has the 'remains'. */
3733	  if (not_subset)
3734	    {
3735	      bitset_copy (dests_ch[ndests], remains);
3736	      bitset_copy (dests_ch[j], intersec);
3737	      err = re_node_set_init_copy (dests_node + ndests, &dests_node[j]);
3738	      if (BE (err != REG_NOERROR, 0))
3739		goto error_return;
3740	      ++ndests;
3741	    }
3742
3743	  /* Put the position in the current group. */
3744	  ok = re_node_set_insert (&dests_node[j], cur_nodes->elems[i]);
3745	  if (BE (! ok, 0))
3746	    goto error_return;
3747
3748	  /* If all characters are consumed, go to next node. */
3749	  if (!not_consumed)
3750	    break;
3751	}
3752      /* Some characters remain, create a new group. */
3753      if (j == ndests)
3754	{
3755	  bitset_copy (dests_ch[ndests], accepts);
3756	  err = re_node_set_init_1 (dests_node + ndests, cur_nodes->elems[i]);
3757	  if (BE (err != REG_NOERROR, 0))
3758	    goto error_return;
3759	  ++ndests;
3760	  bitset_empty (accepts);
3761	}
3762    }
3763  return ndests;
3764 error_return:
3765  for (j = 0; j < ndests; ++j)
3766    re_node_set_free (dests_node + j);
3767  return REG_MISSING;
3768}
3769
3770#ifdef RE_ENABLE_I18N
3771/* Check how many bytes the node 'dfa->nodes[node_idx]' accepts.
3772   Return the number of the bytes the node accepts.
3773   STR_IDX is the current index of the input string.
3774
3775   This function handles the nodes which can accept one character, or
3776   one collating element like '.', '[a-z]', opposite to the other nodes
3777   can only accept one byte.  */
3778
3779static int
3780internal_function
3781check_node_accept_bytes (const re_dfa_t *dfa, Idx node_idx,
3782			 const re_string_t *input, Idx str_idx)
3783{
3784  const re_token_t *node = dfa->nodes + node_idx;
3785  int char_len, elem_len;
3786  Idx i;
3787
3788  if (BE (node->type == OP_UTF8_PERIOD, 0))
3789    {
3790      unsigned char c = re_string_byte_at (input, str_idx), d;
3791      if (BE (c < 0xc2, 1))
3792	return 0;
3793
3794      if (str_idx + 2 > input->len)
3795	return 0;
3796
3797      d = re_string_byte_at (input, str_idx + 1);
3798      if (c < 0xe0)
3799	return (d < 0x80 || d > 0xbf) ? 0 : 2;
3800      else if (c < 0xf0)
3801	{
3802	  char_len = 3;
3803	  if (c == 0xe0 && d < 0xa0)
3804	    return 0;
3805	}
3806      else if (c < 0xf8)
3807	{
3808	  char_len = 4;
3809	  if (c == 0xf0 && d < 0x90)
3810	    return 0;
3811	}
3812      else if (c < 0xfc)
3813	{
3814	  char_len = 5;
3815	  if (c == 0xf8 && d < 0x88)
3816	    return 0;
3817	}
3818      else if (c < 0xfe)
3819	{
3820	  char_len = 6;
3821	  if (c == 0xfc && d < 0x84)
3822	    return 0;
3823	}
3824      else
3825	return 0;
3826
3827      if (str_idx + char_len > input->len)
3828	return 0;
3829
3830      for (i = 1; i < char_len; ++i)
3831	{
3832	  d = re_string_byte_at (input, str_idx + i);
3833	  if (d < 0x80 || d > 0xbf)
3834	    return 0;
3835	}
3836      return char_len;
3837    }
3838
3839  char_len = re_string_char_size_at (input, str_idx);
3840  if (node->type == OP_PERIOD)
3841    {
3842      if (char_len <= 1)
3843	return 0;
3844      /* FIXME: I don't think this if is needed, as both '\n'
3845	 and '\0' are char_len == 1.  */
3846      /* '.' accepts any one character except the following two cases.  */
3847      if ((!(dfa->syntax & RE_DOT_NEWLINE) &&
3848	   re_string_byte_at (input, str_idx) == '\n') ||
3849	  ((dfa->syntax & RE_DOT_NOT_NULL) &&
3850	   re_string_byte_at (input, str_idx) == '\0'))
3851	return 0;
3852      return char_len;
3853    }
3854
3855  elem_len = re_string_elem_size_at (input, str_idx);
3856  if ((elem_len <= 1 && char_len <= 1) || char_len == 0)
3857    return 0;
3858
3859  if (node->type == COMPLEX_BRACKET)
3860    {
3861      const re_charset_t *cset = node->opr.mbcset;
3862# ifdef _LIBC
3863      const unsigned char *pin
3864	= ((const unsigned char *) re_string_get_buffer (input) + str_idx);
3865      Idx j;
3866      uint32_t nrules;
3867# endif /* _LIBC */
3868      int match_len = 0;
3869      wchar_t wc = ((cset->nranges || cset->nchar_classes || cset->nmbchars)
3870		    ? re_string_wchar_at (input, str_idx) : 0);
3871
3872      /* match with multibyte character?  */
3873      for (i = 0; i < cset->nmbchars; ++i)
3874	if (wc == cset->mbchars[i])
3875	  {
3876	    match_len = char_len;
3877	    goto check_node_accept_bytes_match;
3878	  }
3879      /* match with character_class?  */
3880      for (i = 0; i < cset->nchar_classes; ++i)
3881	{
3882	  wctype_t wt = cset->char_classes[i];
3883	  if (__iswctype (wc, wt))
3884	    {
3885	      match_len = char_len;
3886	      goto check_node_accept_bytes_match;
3887	    }
3888	}
3889
3890# ifdef _LIBC
3891      nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
3892      if (nrules != 0)
3893	{
3894	  unsigned int in_collseq = 0;
3895	  const int32_t *table, *indirect;
3896	  const unsigned char *weights, *extra;
3897	  const char *collseqwc;
3898	  /* This #include defines a local function!  */
3899#  include <locale/weight.h>
3900
3901	  /* match with collating_symbol?  */
3902	  if (cset->ncoll_syms)
3903	    extra = (const unsigned char *)
3904	      _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
3905	  for (i = 0; i < cset->ncoll_syms; ++i)
3906	    {
3907	      const unsigned char *coll_sym = extra + cset->coll_syms[i];
3908	      /* Compare the length of input collating element and
3909		 the length of current collating element.  */
3910	      if (*coll_sym != elem_len)
3911		continue;
3912	      /* Compare each bytes.  */
3913	      for (j = 0; j < *coll_sym; j++)
3914		if (pin[j] != coll_sym[1 + j])
3915		  break;
3916	      if (j == *coll_sym)
3917		{
3918		  /* Match if every bytes is equal.  */
3919		  match_len = j;
3920		  goto check_node_accept_bytes_match;
3921		}
3922	    }
3923
3924	  if (cset->nranges)
3925	    {
3926	      if (elem_len <= char_len)
3927		{
3928		  collseqwc = _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQWC);
3929		  in_collseq = __collseq_table_lookup (collseqwc, wc);
3930		}
3931	      else
3932		in_collseq = find_collation_sequence_value (pin, elem_len);
3933	    }
3934	  /* match with range expression?  */
3935	  /* FIXME: Implement rational ranges here, too.  */
3936	  for (i = 0; i < cset->nranges; ++i)
3937	    if (cset->range_starts[i] <= in_collseq
3938		&& in_collseq <= cset->range_ends[i])
3939	      {
3940		match_len = elem_len;
3941		goto check_node_accept_bytes_match;
3942	      }
3943
3944	  /* match with equivalence_class?  */
3945	  if (cset->nequiv_classes)
3946	    {
3947	      const unsigned char *cp = pin;
3948	      table = (const int32_t *)
3949		_NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
3950	      weights = (const unsigned char *)
3951		_NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
3952	      extra = (const unsigned char *)
3953		_NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
3954	      indirect = (const int32_t *)
3955		_NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
3956	      int32_t idx = findidx (&cp, elem_len);
3957	      if (idx > 0)
3958		for (i = 0; i < cset->nequiv_classes; ++i)
3959		  {
3960		    int32_t equiv_class_idx = cset->equiv_classes[i];
3961		    size_t weight_len = weights[idx & 0xffffff];
3962		    if (weight_len == weights[equiv_class_idx & 0xffffff]
3963			&& (idx >> 24) == (equiv_class_idx >> 24))
3964		      {
3965			Idx cnt = 0;
3966
3967			idx &= 0xffffff;
3968			equiv_class_idx &= 0xffffff;
3969
3970			while (cnt <= weight_len
3971			       && (weights[equiv_class_idx + 1 + cnt]
3972				   == weights[idx + 1 + cnt]))
3973			  ++cnt;
3974			if (cnt > weight_len)
3975			  {
3976			    match_len = elem_len;
3977			    goto check_node_accept_bytes_match;
3978			  }
3979		      }
3980		  }
3981	    }
3982	}
3983      else
3984# endif /* _LIBC */
3985	{
3986	  /* match with range expression?  */
3987	  for (i = 0; i < cset->nranges; ++i)
3988	    {
3989	      if (cset->range_starts[i] <= wc && wc <= cset->range_ends[i])
3990		{
3991		  match_len = char_len;
3992		  goto check_node_accept_bytes_match;
3993		}
3994	    }
3995	}
3996    check_node_accept_bytes_match:
3997      if (!cset->non_match)
3998	return match_len;
3999      else
4000	{
4001	  if (match_len > 0)
4002	    return 0;
4003	  else
4004	    return (elem_len > char_len) ? elem_len : char_len;
4005	}
4006    }
4007  return 0;
4008}
4009
4010# ifdef _LIBC
4011static unsigned int
4012internal_function
4013find_collation_sequence_value (const unsigned char *mbs, size_t mbs_len)
4014{
4015  uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
4016  if (nrules == 0)
4017    {
4018      if (mbs_len == 1)
4019	{
4020	  /* No valid character.  Match it as a single byte character.  */
4021	  const unsigned char *collseq = (const unsigned char *)
4022	    _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQMB);
4023	  return collseq[mbs[0]];
4024	}
4025      return UINT_MAX;
4026    }
4027  else
4028    {
4029      int32_t idx;
4030      const unsigned char *extra = (const unsigned char *)
4031	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
4032      int32_t extrasize = (const unsigned char *)
4033	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB + 1) - extra;
4034
4035      for (idx = 0; idx < extrasize;)
4036	{
4037	  int mbs_cnt;
4038	  bool found = false;
4039	  int32_t elem_mbs_len;
4040	  /* Skip the name of collating element name.  */
4041	  idx = idx + extra[idx] + 1;
4042	  elem_mbs_len = extra[idx++];
4043	  if (mbs_len == elem_mbs_len)
4044	    {
4045	      for (mbs_cnt = 0; mbs_cnt < elem_mbs_len; ++mbs_cnt)
4046		if (extra[idx + mbs_cnt] != mbs[mbs_cnt])
4047		  break;
4048	      if (mbs_cnt == elem_mbs_len)
4049		/* Found the entry.  */
4050		found = true;
4051	    }
4052	  /* Skip the byte sequence of the collating element.  */
4053	  idx += elem_mbs_len;
4054	  /* Adjust for the alignment.  */
4055	  idx = (idx + 3) & ~3;
4056	  /* Skip the collation sequence value.  */
4057	  idx += sizeof (uint32_t);
4058	  /* Skip the wide char sequence of the collating element.  */
4059	  idx = idx + sizeof (uint32_t) * (*(int32_t *) (extra + idx) + 1);
4060	  /* If we found the entry, return the sequence value.  */
4061	  if (found)
4062	    return *(uint32_t *) (extra + idx);
4063	  /* Skip the collation sequence value.  */
4064	  idx += sizeof (uint32_t);
4065	}
4066      return UINT_MAX;
4067    }
4068}
4069# endif /* _LIBC */
4070#endif /* RE_ENABLE_I18N */
4071
4072/* Check whether the node accepts the byte which is IDX-th
4073   byte of the INPUT.  */
4074
4075static bool
4076internal_function
4077check_node_accept (const re_match_context_t *mctx, const re_token_t *node,
4078		   Idx idx)
4079{
4080  unsigned char ch;
4081  ch = re_string_byte_at (&mctx->input, idx);
4082  switch (node->type)
4083    {
4084    case CHARACTER:
4085      if (node->opr.c != ch)
4086        return false;
4087      break;
4088
4089    case SIMPLE_BRACKET:
4090      if (!bitset_contain (node->opr.sbcset, ch))
4091        return false;
4092      break;
4093
4094#ifdef RE_ENABLE_I18N
4095    case OP_UTF8_PERIOD:
4096      if (ch >= ASCII_CHARS)
4097        return false;
4098      /* FALLTHROUGH */
4099#endif
4100    case OP_PERIOD:
4101      if ((ch == '\n' && !(mctx->dfa->syntax & RE_DOT_NEWLINE))
4102	  || (ch == '\0' && (mctx->dfa->syntax & RE_DOT_NOT_NULL)))
4103	return false;
4104      break;
4105
4106    default:
4107      return false;
4108    }
4109
4110  if (node->constraint)
4111    {
4112      /* The node has constraints.  Check whether the current context
4113	 satisfies the constraints.  */
4114      unsigned int context = re_string_context_at (&mctx->input, idx,
4115						   mctx->eflags);
4116      if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context))
4117	return false;
4118    }
4119
4120  return true;
4121}
4122
4123/* Extend the buffers, if the buffers have run out.  */
4124
4125static reg_errcode_t
4126internal_function __attribute_warn_unused_result__
4127extend_buffers (re_match_context_t *mctx, int min_len)
4128{
4129  reg_errcode_t ret;
4130  re_string_t *pstr = &mctx->input;
4131
4132  /* Avoid overflow.  */
4133  if (BE (MIN (IDX_MAX, SIZE_MAX / sizeof (re_dfastate_t *)) / 2
4134          <= pstr->bufs_len, 0))
4135    return REG_ESPACE;
4136
4137  /* Double the lengths of the buffers, but allocate at least MIN_LEN.  */
4138  ret = re_string_realloc_buffers (pstr,
4139				   MAX (min_len,
4140					MIN (pstr->len, pstr->bufs_len * 2)));
4141  if (BE (ret != REG_NOERROR, 0))
4142    return ret;
4143
4144  if (mctx->state_log != NULL)
4145    {
4146      /* And double the length of state_log.  */
4147      /* XXX We have no indication of the size of this buffer.  If this
4148	 allocation fail we have no indication that the state_log array
4149	 does not have the right size.  */
4150      re_dfastate_t **new_array = re_realloc (mctx->state_log, re_dfastate_t *,
4151					      pstr->bufs_len + 1);
4152      if (BE (new_array == NULL, 0))
4153	return REG_ESPACE;
4154      mctx->state_log = new_array;
4155    }
4156
4157  /* Then reconstruct the buffers.  */
4158  if (pstr->icase)
4159    {
4160#ifdef RE_ENABLE_I18N
4161      if (pstr->mb_cur_max > 1)
4162	{
4163	  ret = build_wcs_upper_buffer (pstr);
4164	  if (BE (ret != REG_NOERROR, 0))
4165	    return ret;
4166	}
4167      else
4168#endif /* RE_ENABLE_I18N  */
4169	build_upper_buffer (pstr);
4170    }
4171  else
4172    {
4173#ifdef RE_ENABLE_I18N
4174      if (pstr->mb_cur_max > 1)
4175	build_wcs_buffer (pstr);
4176      else
4177#endif /* RE_ENABLE_I18N  */
4178	{
4179	  if (pstr->trans != NULL)
4180	    re_string_translate_buffer (pstr);
4181	}
4182    }
4183  return REG_NOERROR;
4184}
4185
4186
4187/* Functions for matching context.  */
4188
4189/* Initialize MCTX.  */
4190
4191static reg_errcode_t
4192internal_function __attribute_warn_unused_result__
4193match_ctx_init (re_match_context_t *mctx, int eflags, Idx n)
4194{
4195  mctx->eflags = eflags;
4196  mctx->match_last = REG_MISSING;
4197  if (n > 0)
4198    {
4199      /* Avoid overflow.  */
4200      size_t max_object_size =
4201	MAX (sizeof (struct re_backref_cache_entry),
4202	     sizeof (re_sub_match_top_t *));
4203      if (BE (MIN (IDX_MAX, SIZE_MAX / max_object_size) < n, 0))
4204	return REG_ESPACE;
4205
4206      mctx->bkref_ents = re_malloc (struct re_backref_cache_entry, n);
4207      mctx->sub_tops = re_malloc (re_sub_match_top_t *, n);
4208      if (BE (mctx->bkref_ents == NULL || mctx->sub_tops == NULL, 0))
4209	return REG_ESPACE;
4210    }
4211  /* Already zero-ed by the caller.
4212     else
4213       mctx->bkref_ents = NULL;
4214     mctx->nbkref_ents = 0;
4215     mctx->nsub_tops = 0;  */
4216  mctx->abkref_ents = n;
4217  mctx->max_mb_elem_len = 1;
4218  mctx->asub_tops = n;
4219  return REG_NOERROR;
4220}
4221
4222/* Clean the entries which depend on the current input in MCTX.
4223   This function must be invoked when the matcher changes the start index
4224   of the input, or changes the input string.  */
4225
4226static void
4227internal_function
4228match_ctx_clean (re_match_context_t *mctx)
4229{
4230  Idx st_idx;
4231  for (st_idx = 0; st_idx < mctx->nsub_tops; ++st_idx)
4232    {
4233      Idx sl_idx;
4234      re_sub_match_top_t *top = mctx->sub_tops[st_idx];
4235      for (sl_idx = 0; sl_idx < top->nlasts; ++sl_idx)
4236	{
4237	  re_sub_match_last_t *last = top->lasts[sl_idx];
4238	  re_free (last->path.array);
4239	  re_free (last);
4240	}
4241      re_free (top->lasts);
4242      if (top->path)
4243	{
4244	  re_free (top->path->array);
4245	  re_free (top->path);
4246	}
4247      free (top);
4248    }
4249
4250  mctx->nsub_tops = 0;
4251  mctx->nbkref_ents = 0;
4252}
4253
4254/* Free all the memory associated with MCTX.  */
4255
4256static void
4257internal_function
4258match_ctx_free (re_match_context_t *mctx)
4259{
4260  /* First, free all the memory associated with MCTX->SUB_TOPS.  */
4261  match_ctx_clean (mctx);
4262  re_free (mctx->sub_tops);
4263  re_free (mctx->bkref_ents);
4264}
4265
4266/* Add a new backreference entry to MCTX.
4267   Note that we assume that caller never call this function with duplicate
4268   entry, and call with STR_IDX which isn't smaller than any existing entry.
4269*/
4270
4271static reg_errcode_t
4272internal_function __attribute_warn_unused_result__
4273match_ctx_add_entry (re_match_context_t *mctx, Idx node, Idx str_idx, Idx from,
4274		     Idx to)
4275{
4276  if (mctx->nbkref_ents >= mctx->abkref_ents)
4277    {
4278      struct re_backref_cache_entry* new_entry;
4279      new_entry = re_realloc (mctx->bkref_ents, struct re_backref_cache_entry,
4280			      mctx->abkref_ents * 2);
4281      if (BE (new_entry == NULL, 0))
4282	{
4283	  re_free (mctx->bkref_ents);
4284	  return REG_ESPACE;
4285	}
4286      mctx->bkref_ents = new_entry;
4287      memset (mctx->bkref_ents + mctx->nbkref_ents, '\0',
4288	      sizeof (struct re_backref_cache_entry) * mctx->abkref_ents);
4289      mctx->abkref_ents *= 2;
4290    }
4291  if (mctx->nbkref_ents > 0
4292      && mctx->bkref_ents[mctx->nbkref_ents - 1].str_idx == str_idx)
4293    mctx->bkref_ents[mctx->nbkref_ents - 1].more = 1;
4294
4295  mctx->bkref_ents[mctx->nbkref_ents].node = node;
4296  mctx->bkref_ents[mctx->nbkref_ents].str_idx = str_idx;
4297  mctx->bkref_ents[mctx->nbkref_ents].subexp_from = from;
4298  mctx->bkref_ents[mctx->nbkref_ents].subexp_to = to;
4299
4300  /* This is a cache that saves negative results of check_dst_limits_calc_pos.
4301     If bit N is clear, means that this entry won't epsilon-transition to
4302     an OP_OPEN_SUBEXP or OP_CLOSE_SUBEXP for the N+1-th subexpression.  If
4303     it is set, check_dst_limits_calc_pos_1 will recurse and try to find one
4304     such node.
4305
4306     A backreference does not epsilon-transition unless it is empty, so set
4307     to all zeros if FROM != TO.  */
4308  mctx->bkref_ents[mctx->nbkref_ents].eps_reachable_subexps_map
4309    = (from == to ? -1 : 0);
4310
4311  mctx->bkref_ents[mctx->nbkref_ents++].more = 0;
4312  if (mctx->max_mb_elem_len < to - from)
4313    mctx->max_mb_elem_len = to - from;
4314  return REG_NOERROR;
4315}
4316
4317/* Return the first entry with the same str_idx, or REG_MISSING if none is
4318   found.  Note that MCTX->BKREF_ENTS is already sorted by MCTX->STR_IDX.  */
4319
4320static Idx
4321internal_function
4322search_cur_bkref_entry (const re_match_context_t *mctx, Idx str_idx)
4323{
4324  Idx left, right, mid, last;
4325  last = right = mctx->nbkref_ents;
4326  for (left = 0; left < right;)
4327    {
4328      mid = (left + right) / 2;
4329      if (mctx->bkref_ents[mid].str_idx < str_idx)
4330	left = mid + 1;
4331      else
4332	right = mid;
4333    }
4334  if (left < last && mctx->bkref_ents[left].str_idx == str_idx)
4335    return left;
4336  else
4337    return REG_MISSING;
4338}
4339
4340/* Register the node NODE, whose type is OP_OPEN_SUBEXP, and which matches
4341   at STR_IDX.  */
4342
4343static reg_errcode_t
4344internal_function __attribute_warn_unused_result__
4345match_ctx_add_subtop (re_match_context_t *mctx, Idx node, Idx str_idx)
4346{
4347#ifdef DEBUG
4348  assert (mctx->sub_tops != NULL);
4349  assert (mctx->asub_tops > 0);
4350#endif
4351  if (BE (mctx->nsub_tops == mctx->asub_tops, 0))
4352    {
4353      Idx new_asub_tops = mctx->asub_tops * 2;
4354      re_sub_match_top_t **new_array = re_realloc (mctx->sub_tops,
4355						   re_sub_match_top_t *,
4356						   new_asub_tops);
4357      if (BE (new_array == NULL, 0))
4358	return REG_ESPACE;
4359      mctx->sub_tops = new_array;
4360      mctx->asub_tops = new_asub_tops;
4361    }
4362  mctx->sub_tops[mctx->nsub_tops] = calloc (1, sizeof (re_sub_match_top_t));
4363  if (BE (mctx->sub_tops[mctx->nsub_tops] == NULL, 0))
4364    return REG_ESPACE;
4365  mctx->sub_tops[mctx->nsub_tops]->node = node;
4366  mctx->sub_tops[mctx->nsub_tops++]->str_idx = str_idx;
4367  return REG_NOERROR;
4368}
4369
4370/* Register the node NODE, whose type is OP_CLOSE_SUBEXP, and which matches
4371   at STR_IDX, whose corresponding OP_OPEN_SUBEXP is SUB_TOP.  */
4372
4373static re_sub_match_last_t *
4374internal_function
4375match_ctx_add_sublast (re_sub_match_top_t *subtop, Idx node, Idx str_idx)
4376{
4377  re_sub_match_last_t *new_entry;
4378  if (BE (subtop->nlasts == subtop->alasts, 0))
4379    {
4380      Idx new_alasts = 2 * subtop->alasts + 1;
4381      re_sub_match_last_t **new_array = re_realloc (subtop->lasts,
4382						    re_sub_match_last_t *,
4383						    new_alasts);
4384      if (BE (new_array == NULL, 0))
4385	return NULL;
4386      subtop->lasts = new_array;
4387      subtop->alasts = new_alasts;
4388    }
4389  new_entry = calloc (1, sizeof (re_sub_match_last_t));
4390  if (BE (new_entry != NULL, 1))
4391    {
4392      subtop->lasts[subtop->nlasts] = new_entry;
4393      new_entry->node = node;
4394      new_entry->str_idx = str_idx;
4395      ++subtop->nlasts;
4396    }
4397  return new_entry;
4398}
4399
4400static void
4401internal_function
4402sift_ctx_init (re_sift_context_t *sctx, re_dfastate_t **sifted_sts,
4403	       re_dfastate_t **limited_sts, Idx last_node, Idx last_str_idx)
4404{
4405  sctx->sifted_states = sifted_sts;
4406  sctx->limited_states = limited_sts;
4407  sctx->last_node = last_node;
4408  sctx->last_str_idx = last_str_idx;
4409  re_node_set_init_empty (&sctx->limits);
4410}
4411