1/* -*- buffer-read-only: t -*- vi: set ro: */
2/* DO NOT EDIT! GENERATED AUTOMATICALLY! */
3#line 1
4/* Extended regular expression matching and search library.
5   Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010 Free
6   Software Foundation, Inc.
7   This file is part of the GNU C Library.
8   Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
9
10   This program is free software; you can redistribute it and/or modify
11   it under the terms of the GNU General Public License as published by
12   the Free Software Foundation; either version 3, or (at your option)
13   any later version.
14
15   This program is distributed in the hope that it will be useful,
16   but WITHOUT ANY WARRANTY; without even the implied warranty of
17   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18   GNU General Public License for more details.
19
20   You should have received a copy of the GNU General Public License along
21   with this program; if not, write to the Free Software Foundation,
22   Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
23
24#include "verify.h"
25#include "intprops.h"
26static reg_errcode_t match_ctx_init (re_match_context_t *cache, int eflags,
27				     Idx n) internal_function;
28static void match_ctx_clean (re_match_context_t *mctx) internal_function;
29static void match_ctx_free (re_match_context_t *cache) internal_function;
30static reg_errcode_t match_ctx_add_entry (re_match_context_t *cache, Idx node,
31					  Idx str_idx, Idx from, Idx to)
32     internal_function;
33static Idx search_cur_bkref_entry (const re_match_context_t *mctx, Idx str_idx)
34     internal_function;
35static reg_errcode_t match_ctx_add_subtop (re_match_context_t *mctx, Idx node,
36					   Idx str_idx) internal_function;
37static re_sub_match_last_t * match_ctx_add_sublast (re_sub_match_top_t *subtop,
38						    Idx node, Idx str_idx)
39     internal_function;
40static void sift_ctx_init (re_sift_context_t *sctx, re_dfastate_t **sifted_sts,
41			   re_dfastate_t **limited_sts, Idx last_node,
42			   Idx last_str_idx)
43     internal_function;
44static reg_errcode_t re_search_internal (const regex_t *preg,
45					 const char *string, Idx length,
46					 Idx start, Idx last_start, Idx stop,
47					 size_t nmatch, regmatch_t pmatch[],
48					 int eflags) internal_function;
49static regoff_t re_search_2_stub (struct re_pattern_buffer *bufp,
50				  const char *string1, Idx length1,
51				  const char *string2, Idx length2,
52				  Idx start, regoff_t range,
53				  struct re_registers *regs,
54				  Idx stop, bool ret_len) internal_function;
55static regoff_t re_search_stub (struct re_pattern_buffer *bufp,
56				const char *string, Idx length, Idx start,
57				regoff_t range, Idx stop,
58				struct re_registers *regs,
59				bool ret_len) internal_function;
60static unsigned int re_copy_regs (struct re_registers *regs, regmatch_t *pmatch,
61				  Idx nregs, int regs_allocated)
62     internal_function;
63static reg_errcode_t prune_impossible_nodes (re_match_context_t *mctx)
64     internal_function;
65static Idx check_matching (re_match_context_t *mctx, bool fl_longest_match,
66			   Idx *p_match_first) internal_function;
67static Idx check_halt_state_context (const re_match_context_t *mctx,
68				     const re_dfastate_t *state, Idx idx)
69     internal_function;
70static void update_regs (const re_dfa_t *dfa, regmatch_t *pmatch,
71			 regmatch_t *prev_idx_match, Idx cur_node,
72			 Idx cur_idx, Idx nmatch) internal_function;
73static reg_errcode_t push_fail_stack (struct re_fail_stack_t *fs,
74				      Idx str_idx, Idx dest_node, Idx nregs,
75				      regmatch_t *regs,
76				      re_node_set *eps_via_nodes)
77     internal_function;
78static reg_errcode_t set_regs (const regex_t *preg,
79			       const re_match_context_t *mctx,
80			       size_t nmatch, regmatch_t *pmatch,
81			       bool fl_backtrack) internal_function;
82static reg_errcode_t free_fail_stack_return (struct re_fail_stack_t *fs)
83     internal_function;
84
85#ifdef RE_ENABLE_I18N
86static int sift_states_iter_mb (const re_match_context_t *mctx,
87				re_sift_context_t *sctx,
88				Idx node_idx, Idx str_idx, Idx max_str_idx)
89     internal_function;
90#endif /* RE_ENABLE_I18N */
91static reg_errcode_t sift_states_backward (const re_match_context_t *mctx,
92					   re_sift_context_t *sctx)
93     internal_function;
94static reg_errcode_t build_sifted_states (const re_match_context_t *mctx,
95					  re_sift_context_t *sctx, Idx str_idx,
96					  re_node_set *cur_dest)
97     internal_function;
98static reg_errcode_t update_cur_sifted_state (const re_match_context_t *mctx,
99					      re_sift_context_t *sctx,
100					      Idx str_idx,
101					      re_node_set *dest_nodes)
102     internal_function;
103static reg_errcode_t add_epsilon_src_nodes (const re_dfa_t *dfa,
104					    re_node_set *dest_nodes,
105					    const re_node_set *candidates)
106     internal_function;
107static bool check_dst_limits (const re_match_context_t *mctx,
108			      const re_node_set *limits,
109			      Idx dst_node, Idx dst_idx, Idx src_node,
110			      Idx src_idx) internal_function;
111static int check_dst_limits_calc_pos_1 (const re_match_context_t *mctx,
112					int boundaries, Idx subexp_idx,
113					Idx from_node, Idx bkref_idx)
114     internal_function;
115static int check_dst_limits_calc_pos (const re_match_context_t *mctx,
116				      Idx limit, Idx subexp_idx,
117				      Idx node, Idx str_idx,
118				      Idx bkref_idx) internal_function;
119static reg_errcode_t check_subexp_limits (const re_dfa_t *dfa,
120					  re_node_set *dest_nodes,
121					  const re_node_set *candidates,
122					  re_node_set *limits,
123					  struct re_backref_cache_entry *bkref_ents,
124					  Idx str_idx) internal_function;
125static reg_errcode_t sift_states_bkref (const re_match_context_t *mctx,
126					re_sift_context_t *sctx,
127					Idx str_idx, const re_node_set *candidates)
128     internal_function;
129static reg_errcode_t merge_state_array (const re_dfa_t *dfa,
130					re_dfastate_t **dst,
131					re_dfastate_t **src, Idx num)
132     internal_function;
133static re_dfastate_t *find_recover_state (reg_errcode_t *err,
134					 re_match_context_t *mctx) internal_function;
135static re_dfastate_t *transit_state (reg_errcode_t *err,
136				     re_match_context_t *mctx,
137				     re_dfastate_t *state) internal_function;
138static re_dfastate_t *merge_state_with_log (reg_errcode_t *err,
139					    re_match_context_t *mctx,
140					    re_dfastate_t *next_state)
141     internal_function;
142static reg_errcode_t check_subexp_matching_top (re_match_context_t *mctx,
143						re_node_set *cur_nodes,
144						Idx str_idx) internal_function;
145#if 0
146static re_dfastate_t *transit_state_sb (reg_errcode_t *err,
147					re_match_context_t *mctx,
148					re_dfastate_t *pstate)
149     internal_function;
150#endif
151#ifdef RE_ENABLE_I18N
152static reg_errcode_t transit_state_mb (re_match_context_t *mctx,
153				       re_dfastate_t *pstate)
154     internal_function;
155#endif /* RE_ENABLE_I18N */
156static reg_errcode_t transit_state_bkref (re_match_context_t *mctx,
157					  const re_node_set *nodes)
158     internal_function;
159static reg_errcode_t get_subexp (re_match_context_t *mctx,
160				 Idx bkref_node, Idx bkref_str_idx)
161     internal_function;
162static reg_errcode_t get_subexp_sub (re_match_context_t *mctx,
163				     const re_sub_match_top_t *sub_top,
164				     re_sub_match_last_t *sub_last,
165				     Idx bkref_node, Idx bkref_str)
166     internal_function;
167static Idx find_subexp_node (const re_dfa_t *dfa, const re_node_set *nodes,
168			     Idx subexp_idx, int type) internal_function;
169static reg_errcode_t check_arrival (re_match_context_t *mctx,
170				    state_array_t *path, Idx top_node,
171				    Idx top_str, Idx last_node, Idx last_str,
172				    int type) internal_function;
173static reg_errcode_t check_arrival_add_next_nodes (re_match_context_t *mctx,
174						   Idx str_idx,
175						   re_node_set *cur_nodes,
176						   re_node_set *next_nodes)
177     internal_function;
178static reg_errcode_t check_arrival_expand_ecl (const re_dfa_t *dfa,
179					       re_node_set *cur_nodes,
180					       Idx ex_subexp, int type)
181     internal_function;
182static reg_errcode_t check_arrival_expand_ecl_sub (const re_dfa_t *dfa,
183						   re_node_set *dst_nodes,
184						   Idx target, Idx ex_subexp,
185						   int type) internal_function;
186static reg_errcode_t expand_bkref_cache (re_match_context_t *mctx,
187					 re_node_set *cur_nodes, Idx cur_str,
188					 Idx subexp_num, int type)
189     internal_function;
190static bool build_trtable (const re_dfa_t *dfa,
191			   re_dfastate_t *state) internal_function;
192#ifdef RE_ENABLE_I18N
193static int check_node_accept_bytes (const re_dfa_t *dfa, Idx node_idx,
194				    const re_string_t *input, Idx idx)
195     internal_function;
196# ifdef _LIBC
197static unsigned int find_collation_sequence_value (const unsigned char *mbs,
198						   size_t name_len)
199     internal_function;
200# endif /* _LIBC */
201#endif /* RE_ENABLE_I18N */
202static Idx group_nodes_into_DFAstates (const re_dfa_t *dfa,
203				       const re_dfastate_t *state,
204				       re_node_set *states_node,
205				       bitset_t *states_ch) internal_function;
206static bool check_node_accept (const re_match_context_t *mctx,
207			       const re_token_t *node, Idx idx)
208     internal_function;
209static reg_errcode_t extend_buffers (re_match_context_t *mctx)
210     internal_function;
211
212/* Entry point for POSIX code.  */
213
214/* regexec searches for a given pattern, specified by PREG, in the
215   string STRING.
216
217   If NMATCH is zero or REG_NOSUB was set in the cflags argument to
218   `regcomp', we ignore PMATCH.  Otherwise, we assume PMATCH has at
219   least NMATCH elements, and we set them to the offsets of the
220   corresponding matched substrings.
221
222   EFLAGS specifies `execution flags' which affect matching: if
223   REG_NOTBOL is set, then ^ does not match at the beginning of the
224   string; if REG_NOTEOL is set, then $ does not match at the end.
225
226   We return 0 if we find a match and REG_NOMATCH if not.  */
227
228int
229regexec (preg, string, nmatch, pmatch, eflags)
230    const regex_t *_Restrict_ preg;
231    const char *_Restrict_ string;
232    size_t nmatch;
233    regmatch_t pmatch[_Restrict_arr_];
234    int eflags;
235{
236  reg_errcode_t err;
237  Idx start, length;
238#ifdef _LIBC
239  re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
240#endif
241
242  if (eflags & ~(REG_NOTBOL | REG_NOTEOL | REG_STARTEND))
243    return REG_BADPAT;
244
245  if (eflags & REG_STARTEND)
246    {
247      start = pmatch[0].rm_so;
248      length = pmatch[0].rm_eo;
249    }
250  else
251    {
252      start = 0;
253      length = strlen (string);
254    }
255
256  __libc_lock_lock (dfa->lock);
257  if (preg->no_sub)
258    err = re_search_internal (preg, string, length, start, length,
259			      length, 0, NULL, eflags);
260  else
261    err = re_search_internal (preg, string, length, start, length,
262			      length, nmatch, pmatch, eflags);
263  __libc_lock_unlock (dfa->lock);
264  return err != REG_NOERROR;
265}
266
267#ifdef _LIBC
268# include <shlib-compat.h>
269versioned_symbol (libc, __regexec, regexec, GLIBC_2_3_4);
270
271# if SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_3_4)
272__typeof__ (__regexec) __compat_regexec;
273
274int
275attribute_compat_text_section
276__compat_regexec (const regex_t *_Restrict_ preg,
277		  const char *_Restrict_ string, size_t nmatch,
278		  regmatch_t pmatch[], int eflags)
279{
280  return regexec (preg, string, nmatch, pmatch,
281		  eflags & (REG_NOTBOL | REG_NOTEOL));
282}
283compat_symbol (libc, __compat_regexec, regexec, GLIBC_2_0);
284# endif
285#endif
286
287/* Entry points for GNU code.  */
288
289/* re_match, re_search, re_match_2, re_search_2
290
291   The former two functions operate on STRING with length LENGTH,
292   while the later two operate on concatenation of STRING1 and STRING2
293   with lengths LENGTH1 and LENGTH2, respectively.
294
295   re_match() matches the compiled pattern in BUFP against the string,
296   starting at index START.
297
298   re_search() first tries matching at index START, then it tries to match
299   starting from index START + 1, and so on.  The last start position tried
300   is START + RANGE.  (Thus RANGE = 0 forces re_search to operate the same
301   way as re_match().)
302
303   The parameter STOP of re_{match,search}_2 specifies that no match exceeding
304   the first STOP characters of the concatenation of the strings should be
305   concerned.
306
307   If REGS is not NULL, and BUFP->no_sub is not set, the offsets of the match
308   and all groups is stored in REGS.  (For the "_2" variants, the offsets are
309   computed relative to the concatenation, not relative to the individual
310   strings.)
311
312   On success, re_match* functions return the length of the match, re_search*
313   return the position of the start of the match.  Return value -1 means no
314   match was found and -2 indicates an internal error.  */
315
316regoff_t
317re_match (bufp, string, length, start, regs)
318    struct re_pattern_buffer *bufp;
319    const char *string;
320    Idx length, start;
321    struct re_registers *regs;
322{
323  return re_search_stub (bufp, string, length, start, 0, length, regs, true);
324}
325#ifdef _LIBC
326weak_alias (__re_match, re_match)
327#endif
328
329regoff_t
330re_search (bufp, string, length, start, range, regs)
331    struct re_pattern_buffer *bufp;
332    const char *string;
333    Idx length, start;
334    regoff_t range;
335    struct re_registers *regs;
336{
337  return re_search_stub (bufp, string, length, start, range, length, regs,
338			 false);
339}
340#ifdef _LIBC
341weak_alias (__re_search, re_search)
342#endif
343
344regoff_t
345re_match_2 (bufp, string1, length1, string2, length2, start, regs, stop)
346    struct re_pattern_buffer *bufp;
347    const char *string1, *string2;
348    Idx length1, length2, start, stop;
349    struct re_registers *regs;
350{
351  return re_search_2_stub (bufp, string1, length1, string2, length2,
352			   start, 0, regs, stop, true);
353}
354#ifdef _LIBC
355weak_alias (__re_match_2, re_match_2)
356#endif
357
358regoff_t
359re_search_2 (bufp, string1, length1, string2, length2, start, range, regs, stop)
360    struct re_pattern_buffer *bufp;
361    const char *string1, *string2;
362    Idx length1, length2, start, stop;
363    regoff_t range;
364    struct re_registers *regs;
365{
366  return re_search_2_stub (bufp, string1, length1, string2, length2,
367			   start, range, regs, stop, false);
368}
369#ifdef _LIBC
370weak_alias (__re_search_2, re_search_2)
371#endif
372
373static regoff_t
374internal_function
375re_search_2_stub (struct re_pattern_buffer *bufp,
376		  const char *string1, Idx length1,
377		  const char *string2, Idx length2,
378		  Idx start, regoff_t range, struct re_registers *regs,
379		  Idx stop, bool ret_len)
380{
381  const char *str;
382  regoff_t rval;
383  Idx len = length1 + length2;
384  char *s = NULL;
385
386  verify (! TYPE_SIGNED (Idx));
387  if (BE (len < length1, 0))
388     return -2;
389  /* if (BE (length1 < 0 || length2 < 0 || stop < 0, 0))
390     return -2; */
391
392  /* Concatenate the strings.  */
393  if (length2 > 0)
394    if (length1 > 0)
395      {
396	s = re_malloc (char, len);
397
398	if (BE (s == NULL, 0))
399	  return -2;
400#ifdef _LIBC
401	memcpy (__mempcpy (s, string1, length1), string2, length2);
402#else
403	memcpy (s, string1, length1);
404	memcpy (s + length1, string2, length2);
405#endif
406	str = s;
407      }
408    else
409      str = string2;
410  else
411    str = string1;
412
413  rval = re_search_stub (bufp, str, len, start, range, stop, regs,
414			 ret_len);
415  re_free (s);
416  return rval;
417}
418
419/* The parameters have the same meaning as those of re_search.
420   Additional parameters:
421   If RET_LEN is true the length of the match is returned (re_match style);
422   otherwise the position of the match is returned.  */
423
424static regoff_t
425internal_function
426re_search_stub (struct re_pattern_buffer *bufp,
427		const char *string, Idx length,
428		Idx start, regoff_t range, Idx stop, struct re_registers *regs,
429		bool ret_len)
430{
431  reg_errcode_t result;
432  regmatch_t *pmatch;
433  Idx nregs;
434  regoff_t rval;
435  int eflags = 0;
436#ifdef _LIBC
437  re_dfa_t *dfa = (re_dfa_t *) bufp->buffer;
438#endif
439  Idx last_start = start + range;
440
441  /* Check for out-of-range.  */
442  verify (! TYPE_SIGNED (Idx));
443  /* if (BE (start < 0, 0))
444     return -1; */
445  if (BE (start > length, 0))
446     return -1;
447  if (BE (length < last_start || (0 <= range && last_start < start), 0))
448    last_start = length;
449  else if (BE (/* last_start < 0 || */ (range < 0 && start <= last_start), 0))
450    last_start = 0;
451
452  __libc_lock_lock (dfa->lock);
453
454  eflags |= (bufp->not_bol) ? REG_NOTBOL : 0;
455  eflags |= (bufp->not_eol) ? REG_NOTEOL : 0;
456
457  /* Compile fastmap if we haven't yet.  */
458  if (start < last_start && bufp->fastmap != NULL && !bufp->fastmap_accurate)
459    re_compile_fastmap (bufp);
460
461  if (BE (bufp->no_sub, 0))
462    regs = NULL;
463
464  /* We need at least 1 register.  */
465  if (regs == NULL)
466    nregs = 1;
467  else if (BE (bufp->regs_allocated == REGS_FIXED
468	       && regs->num_regs <= bufp->re_nsub, 0))
469    {
470      nregs = regs->num_regs;
471      if (BE (nregs < 1, 0))
472	{
473	  /* Nothing can be copied to regs.  */
474	  regs = NULL;
475	  nregs = 1;
476	}
477    }
478  else
479    nregs = bufp->re_nsub + 1;
480  pmatch = re_malloc (regmatch_t, nregs);
481  if (BE (pmatch == NULL, 0))
482    {
483      rval = -2;
484      goto out;
485    }
486
487  result = re_search_internal (bufp, string, length, start, last_start, stop,
488			       nregs, pmatch, eflags);
489
490  rval = 0;
491
492  /* I hope we needn't fill ther regs with -1's when no match was found.  */
493  if (result != REG_NOERROR)
494    rval = -1;
495  else if (regs != NULL)
496    {
497      /* If caller wants register contents data back, copy them.  */
498      bufp->regs_allocated = re_copy_regs (regs, pmatch, nregs,
499					   bufp->regs_allocated);
500      if (BE (bufp->regs_allocated == REGS_UNALLOCATED, 0))
501	rval = -2;
502    }
503
504  if (BE (rval == 0, 1))
505    {
506      if (ret_len)
507	{
508	  assert (pmatch[0].rm_so == start);
509	  rval = pmatch[0].rm_eo - start;
510	}
511      else
512	rval = pmatch[0].rm_so;
513    }
514  re_free (pmatch);
515 out:
516  __libc_lock_unlock (dfa->lock);
517  return rval;
518}
519
520static unsigned int
521internal_function
522re_copy_regs (struct re_registers *regs, regmatch_t *pmatch, Idx nregs,
523	      int regs_allocated)
524{
525  int rval = REGS_REALLOCATE;
526  Idx i;
527  Idx need_regs = nregs + 1;
528  /* We need one extra element beyond `num_regs' for the `-1' marker GNU code
529     uses.  */
530
531  /* Have the register data arrays been allocated?  */
532  if (regs_allocated == REGS_UNALLOCATED)
533    { /* No.  So allocate them with malloc.  */
534      regs->start = re_malloc (regoff_t, need_regs);
535      if (BE (regs->start == NULL, 0))
536	return REGS_UNALLOCATED;
537      regs->end = re_malloc (regoff_t, need_regs);
538      if (BE (regs->end == NULL, 0))
539	{
540	  re_free (regs->start);
541	  return REGS_UNALLOCATED;
542	}
543      regs->num_regs = need_regs;
544    }
545  else if (regs_allocated == REGS_REALLOCATE)
546    { /* Yes.  If we need more elements than were already
547	 allocated, reallocate them.  If we need fewer, just
548	 leave it alone.  */
549      if (BE (need_regs > regs->num_regs, 0))
550	{
551	  regoff_t *new_start = re_realloc (regs->start, regoff_t, need_regs);
552	  regoff_t *new_end;
553	  if (BE (new_start == NULL, 0))
554	    return REGS_UNALLOCATED;
555	  new_end = re_realloc (regs->end, regoff_t, need_regs);
556	  if (BE (new_end == NULL, 0))
557	    {
558	      re_free (new_start);
559	      return REGS_UNALLOCATED;
560	    }
561	  regs->start = new_start;
562	  regs->end = new_end;
563	  regs->num_regs = need_regs;
564	}
565    }
566  else
567    {
568      assert (regs_allocated == REGS_FIXED);
569      /* This function may not be called with REGS_FIXED and nregs too big.  */
570      assert (regs->num_regs >= nregs);
571      rval = REGS_FIXED;
572    }
573
574  /* Copy the regs.  */
575  for (i = 0; i < nregs; ++i)
576    {
577      regs->start[i] = pmatch[i].rm_so;
578      regs->end[i] = pmatch[i].rm_eo;
579    }
580  for ( ; i < regs->num_regs; ++i)
581    regs->start[i] = regs->end[i] = -1;
582
583  return rval;
584}
585
586/* Set REGS to hold NUM_REGS registers, storing them in STARTS and
587   ENDS.  Subsequent matches using PATTERN_BUFFER and REGS will use
588   this memory for recording register information.  STARTS and ENDS
589   must be allocated using the malloc library routine, and must each
590   be at least NUM_REGS * sizeof (regoff_t) bytes long.
591
592   If NUM_REGS == 0, then subsequent matches should allocate their own
593   register data.
594
595   Unless this function is called, the first search or match using
596   PATTERN_BUFFER will allocate its own register data, without
597   freeing the old data.  */
598
599void
600re_set_registers (bufp, regs, num_regs, starts, ends)
601    struct re_pattern_buffer *bufp;
602    struct re_registers *regs;
603    __re_size_t num_regs;
604    regoff_t *starts, *ends;
605{
606  if (num_regs)
607    {
608      bufp->regs_allocated = REGS_REALLOCATE;
609      regs->num_regs = num_regs;
610      regs->start = starts;
611      regs->end = ends;
612    }
613  else
614    {
615      bufp->regs_allocated = REGS_UNALLOCATED;
616      regs->num_regs = 0;
617      regs->start = regs->end = NULL;
618    }
619}
620#ifdef _LIBC
621weak_alias (__re_set_registers, re_set_registers)
622#endif
623
624/* Entry points compatible with 4.2 BSD regex library.  We don't define
625   them unless specifically requested.  */
626
627#if defined _REGEX_RE_COMP || defined _LIBC
628int
629# ifdef _LIBC
630weak_function
631# endif
632re_exec (s)
633     const char *s;
634{
635  return 0 == regexec (&re_comp_buf, s, 0, NULL, 0);
636}
637#endif /* _REGEX_RE_COMP */
638
639/* Internal entry point.  */
640
641/* Searches for a compiled pattern PREG in the string STRING, whose
642   length is LENGTH.  NMATCH, PMATCH, and EFLAGS have the same
643   meaning as with regexec.  LAST_START is START + RANGE, where
644   START and RANGE have the same meaning as with re_search.
645   Return REG_NOERROR if we find a match, and REG_NOMATCH if not,
646   otherwise return the error code.
647   Note: We assume front end functions already check ranges.
648   (0 <= LAST_START && LAST_START <= LENGTH)  */
649
650static reg_errcode_t
651internal_function
652re_search_internal (const regex_t *preg,
653		    const char *string, Idx length,
654		    Idx start, Idx last_start, Idx stop,
655		    size_t nmatch, regmatch_t pmatch[],
656		    int eflags)
657{
658  reg_errcode_t err;
659  const re_dfa_t *dfa = (const re_dfa_t *) preg->buffer;
660  Idx left_lim, right_lim;
661  int incr;
662  bool fl_longest_match;
663  int match_kind;
664  Idx match_first;
665  Idx match_last = REG_MISSING;
666  Idx extra_nmatch;
667  bool sb;
668  int ch;
669#if defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L)
670  re_match_context_t mctx = { .dfa = dfa };
671#else
672  re_match_context_t mctx;
673#endif
674  char *fastmap = ((preg->fastmap != NULL && preg->fastmap_accurate
675		    && start != last_start && !preg->can_be_null)
676		   ? preg->fastmap : NULL);
677  RE_TRANSLATE_TYPE t = preg->translate;
678
679#if !(defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L))
680  memset (&mctx, '\0', sizeof (re_match_context_t));
681  mctx.dfa = dfa;
682#endif
683
684  extra_nmatch = (nmatch > preg->re_nsub) ? nmatch - (preg->re_nsub + 1) : 0;
685  nmatch -= extra_nmatch;
686
687  /* Check if the DFA haven't been compiled.  */
688  if (BE (preg->used == 0 || dfa->init_state == NULL
689	  || dfa->init_state_word == NULL || dfa->init_state_nl == NULL
690	  || dfa->init_state_begbuf == NULL, 0))
691    return REG_NOMATCH;
692
693#ifdef DEBUG
694  /* We assume front-end functions already check them.  */
695  assert (0 <= last_start && last_start <= length);
696#endif
697
698  /* If initial states with non-begbuf contexts have no elements,
699     the regex must be anchored.  If preg->newline_anchor is set,
700     we'll never use init_state_nl, so do not check it.  */
701  if (dfa->init_state->nodes.nelem == 0
702      && dfa->init_state_word->nodes.nelem == 0
703      && (dfa->init_state_nl->nodes.nelem == 0
704	  || !preg->newline_anchor))
705    {
706      if (start != 0 && last_start != 0)
707        return REG_NOMATCH;
708      start = last_start = 0;
709    }
710
711  /* We must check the longest matching, if nmatch > 0.  */
712  fl_longest_match = (nmatch != 0 || dfa->nbackref);
713
714  err = re_string_allocate (&mctx.input, string, length, dfa->nodes_len + 1,
715			    preg->translate, (preg->syntax & RE_ICASE) != 0,
716			    dfa);
717  if (BE (err != REG_NOERROR, 0))
718    goto free_return;
719  mctx.input.stop = stop;
720  mctx.input.raw_stop = stop;
721  mctx.input.newline_anchor = preg->newline_anchor;
722
723  err = match_ctx_init (&mctx, eflags, dfa->nbackref * 2);
724  if (BE (err != REG_NOERROR, 0))
725    goto free_return;
726
727  /* We will log all the DFA states through which the dfa pass,
728     if nmatch > 1, or this dfa has "multibyte node", which is a
729     back-reference or a node which can accept multibyte character or
730     multi character collating element.  */
731  if (nmatch > 1 || dfa->has_mb_node)
732    {
733      /* Avoid overflow.  */
734      if (BE (SIZE_MAX / sizeof (re_dfastate_t *) <= mctx.input.bufs_len, 0))
735	{
736	  err = REG_ESPACE;
737	  goto free_return;
738	}
739
740      mctx.state_log = re_malloc (re_dfastate_t *, mctx.input.bufs_len + 1);
741      if (BE (mctx.state_log == NULL, 0))
742	{
743	  err = REG_ESPACE;
744	  goto free_return;
745	}
746    }
747  else
748    mctx.state_log = NULL;
749
750  match_first = start;
751  mctx.input.tip_context = (eflags & REG_NOTBOL) ? CONTEXT_BEGBUF
752			   : CONTEXT_NEWLINE | CONTEXT_BEGBUF;
753
754  /* Check incrementally whether of not the input string match.  */
755  incr = (last_start < start) ? -1 : 1;
756  left_lim = (last_start < start) ? last_start : start;
757  right_lim = (last_start < start) ? start : last_start;
758  sb = dfa->mb_cur_max == 1;
759  match_kind =
760    (fastmap
761     ? ((sb || !(preg->syntax & RE_ICASE || t) ? 4 : 0)
762	| (start <= last_start ? 2 : 0)
763	| (t != NULL ? 1 : 0))
764     : 8);
765
766  for (;; match_first += incr)
767    {
768      err = REG_NOMATCH;
769      if (match_first < left_lim || right_lim < match_first)
770	goto free_return;
771
772      /* Advance as rapidly as possible through the string, until we
773	 find a plausible place to start matching.  This may be done
774	 with varying efficiency, so there are various possibilities:
775	 only the most common of them are specialized, in order to
776	 save on code size.  We use a switch statement for speed.  */
777      switch (match_kind)
778	{
779	case 8:
780	  /* No fastmap.  */
781	  break;
782
783	case 7:
784	  /* Fastmap with single-byte translation, match forward.  */
785	  while (BE (match_first < right_lim, 1)
786		 && !fastmap[t[(unsigned char) string[match_first]]])
787	    ++match_first;
788	  goto forward_match_found_start_or_reached_end;
789
790	case 6:
791	  /* Fastmap without translation, match forward.  */
792	  while (BE (match_first < right_lim, 1)
793		 && !fastmap[(unsigned char) string[match_first]])
794	    ++match_first;
795
796	forward_match_found_start_or_reached_end:
797	  if (BE (match_first == right_lim, 0))
798	    {
799	      ch = match_first >= length
800		       ? 0 : (unsigned char) string[match_first];
801	      if (!fastmap[t ? t[ch] : ch])
802		goto free_return;
803	    }
804	  break;
805
806	case 4:
807	case 5:
808	  /* Fastmap without multi-byte translation, match backwards.  */
809	  while (match_first >= left_lim)
810	    {
811	      ch = match_first >= length
812		       ? 0 : (unsigned char) string[match_first];
813	      if (fastmap[t ? t[ch] : ch])
814		break;
815	      --match_first;
816	    }
817	  if (match_first < left_lim)
818	    goto free_return;
819	  break;
820
821	default:
822	  /* In this case, we can't determine easily the current byte,
823	     since it might be a component byte of a multibyte
824	     character.  Then we use the constructed buffer instead.  */
825	  for (;;)
826	    {
827	      /* If MATCH_FIRST is out of the valid range, reconstruct the
828		 buffers.  */
829	      __re_size_t offset = match_first - mctx.input.raw_mbs_idx;
830	      if (BE (offset >= (__re_size_t) mctx.input.valid_raw_len, 0))
831		{
832		  err = re_string_reconstruct (&mctx.input, match_first,
833					       eflags);
834		  if (BE (err != REG_NOERROR, 0))
835		    goto free_return;
836
837		  offset = match_first - mctx.input.raw_mbs_idx;
838		}
839	      /* If MATCH_FIRST is out of the buffer, leave it as '\0'.
840		 Note that MATCH_FIRST must not be smaller than 0.  */
841	      ch = (match_first >= length
842		    ? 0 : re_string_byte_at (&mctx.input, offset));
843	      if (fastmap[ch])
844		break;
845	      match_first += incr;
846	      if (match_first < left_lim || match_first > right_lim)
847	        {
848	          err = REG_NOMATCH;
849	          goto free_return;
850	        }
851	    }
852	  break;
853	}
854
855      /* Reconstruct the buffers so that the matcher can assume that
856	 the matching starts from the beginning of the buffer.  */
857      err = re_string_reconstruct (&mctx.input, match_first, eflags);
858      if (BE (err != REG_NOERROR, 0))
859	goto free_return;
860
861#ifdef RE_ENABLE_I18N
862     /* Don't consider this char as a possible match start if it part,
863	yet isn't the head, of a multibyte character.  */
864      if (!sb && !re_string_first_byte (&mctx.input, 0))
865	continue;
866#endif
867
868      /* It seems to be appropriate one, then use the matcher.  */
869      /* We assume that the matching starts from 0.  */
870      mctx.state_log_top = mctx.nbkref_ents = mctx.max_mb_elem_len = 0;
871      match_last = check_matching (&mctx, fl_longest_match,
872				   start <= last_start ? &match_first : NULL);
873      if (match_last != REG_MISSING)
874	{
875	  if (BE (match_last == REG_ERROR, 0))
876	    {
877	      err = REG_ESPACE;
878	      goto free_return;
879	    }
880	  else
881	    {
882	      mctx.match_last = match_last;
883	      if ((!preg->no_sub && nmatch > 1) || dfa->nbackref)
884		{
885		  re_dfastate_t *pstate = mctx.state_log[match_last];
886		  mctx.last_node = check_halt_state_context (&mctx, pstate,
887							     match_last);
888		}
889	      if ((!preg->no_sub && nmatch > 1 && dfa->has_plural_match)
890		  || dfa->nbackref)
891		{
892		  err = prune_impossible_nodes (&mctx);
893		  if (err == REG_NOERROR)
894		    break;
895		  if (BE (err != REG_NOMATCH, 0))
896		    goto free_return;
897		  match_last = REG_MISSING;
898		}
899	      else
900		break; /* We found a match.  */
901	    }
902	}
903
904      match_ctx_clean (&mctx);
905    }
906
907#ifdef DEBUG
908  assert (match_last != REG_MISSING);
909  assert (err == REG_NOERROR);
910#endif
911
912  /* Set pmatch[] if we need.  */
913  if (nmatch > 0)
914    {
915      Idx reg_idx;
916
917      /* Initialize registers.  */
918      for (reg_idx = 1; reg_idx < nmatch; ++reg_idx)
919	pmatch[reg_idx].rm_so = pmatch[reg_idx].rm_eo = -1;
920
921      /* Set the points where matching start/end.  */
922      pmatch[0].rm_so = 0;
923      pmatch[0].rm_eo = mctx.match_last;
924      /* FIXME: This function should fail if mctx.match_last exceeds
925	 the maximum possible regoff_t value.  We need a new error
926	 code REG_OVERFLOW.  */
927
928      if (!preg->no_sub && nmatch > 1)
929	{
930	  err = set_regs (preg, &mctx, nmatch, pmatch,
931			  dfa->has_plural_match && dfa->nbackref > 0);
932	  if (BE (err != REG_NOERROR, 0))
933	    goto free_return;
934	}
935
936      /* At last, add the offset to the each registers, since we slided
937	 the buffers so that we could assume that the matching starts
938	 from 0.  */
939      for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
940	if (pmatch[reg_idx].rm_so != -1)
941	  {
942#ifdef RE_ENABLE_I18N
943	    if (BE (mctx.input.offsets_needed != 0, 0))
944	      {
945		pmatch[reg_idx].rm_so =
946		  (pmatch[reg_idx].rm_so == mctx.input.valid_len
947		   ? mctx.input.valid_raw_len
948		   : mctx.input.offsets[pmatch[reg_idx].rm_so]);
949		pmatch[reg_idx].rm_eo =
950		  (pmatch[reg_idx].rm_eo == mctx.input.valid_len
951		   ? mctx.input.valid_raw_len
952		   : mctx.input.offsets[pmatch[reg_idx].rm_eo]);
953	      }
954#else
955	    assert (mctx.input.offsets_needed == 0);
956#endif
957	    pmatch[reg_idx].rm_so += match_first;
958	    pmatch[reg_idx].rm_eo += match_first;
959	  }
960      for (reg_idx = 0; reg_idx < extra_nmatch; ++reg_idx)
961	{
962	  pmatch[nmatch + reg_idx].rm_so = -1;
963	  pmatch[nmatch + reg_idx].rm_eo = -1;
964	}
965
966      if (dfa->subexp_map)
967        for (reg_idx = 0; reg_idx + 1 < nmatch; reg_idx++)
968          if (dfa->subexp_map[reg_idx] != reg_idx)
969            {
970              pmatch[reg_idx + 1].rm_so
971                = pmatch[dfa->subexp_map[reg_idx] + 1].rm_so;
972              pmatch[reg_idx + 1].rm_eo
973                = pmatch[dfa->subexp_map[reg_idx] + 1].rm_eo;
974            }
975    }
976
977 free_return:
978  re_free (mctx.state_log);
979  if (dfa->nbackref)
980    match_ctx_free (&mctx);
981  re_string_destruct (&mctx.input);
982  return err;
983}
984
985static reg_errcode_t
986internal_function
987prune_impossible_nodes (re_match_context_t *mctx)
988{
989  const re_dfa_t *const dfa = mctx->dfa;
990  Idx halt_node, match_last;
991  reg_errcode_t ret;
992  re_dfastate_t **sifted_states;
993  re_dfastate_t **lim_states = NULL;
994  re_sift_context_t sctx;
995#ifdef DEBUG
996  assert (mctx->state_log != NULL);
997#endif
998  match_last = mctx->match_last;
999  halt_node = mctx->last_node;
1000
1001  /* Avoid overflow.  */
1002  if (BE (SIZE_MAX / sizeof (re_dfastate_t *) <= match_last, 0))
1003    return REG_ESPACE;
1004
1005  sifted_states = re_malloc (re_dfastate_t *, match_last + 1);
1006  if (BE (sifted_states == NULL, 0))
1007    {
1008      ret = REG_ESPACE;
1009      goto free_return;
1010    }
1011  if (dfa->nbackref)
1012    {
1013      lim_states = re_malloc (re_dfastate_t *, match_last + 1);
1014      if (BE (lim_states == NULL, 0))
1015	{
1016	  ret = REG_ESPACE;
1017	  goto free_return;
1018	}
1019      while (1)
1020	{
1021	  memset (lim_states, '\0',
1022		  sizeof (re_dfastate_t *) * (match_last + 1));
1023	  sift_ctx_init (&sctx, sifted_states, lim_states, halt_node,
1024			 match_last);
1025	  ret = sift_states_backward (mctx, &sctx);
1026	  re_node_set_free (&sctx.limits);
1027	  if (BE (ret != REG_NOERROR, 0))
1028	      goto free_return;
1029	  if (sifted_states[0] != NULL || lim_states[0] != NULL)
1030	    break;
1031	  do
1032	    {
1033	      --match_last;
1034	      if (! REG_VALID_INDEX (match_last))
1035		{
1036		  ret = REG_NOMATCH;
1037		  goto free_return;
1038		}
1039	    } while (mctx->state_log[match_last] == NULL
1040		     || !mctx->state_log[match_last]->halt);
1041	  halt_node = check_halt_state_context (mctx,
1042						mctx->state_log[match_last],
1043						match_last);
1044	}
1045      ret = merge_state_array (dfa, sifted_states, lim_states,
1046			       match_last + 1);
1047      re_free (lim_states);
1048      lim_states = NULL;
1049      if (BE (ret != REG_NOERROR, 0))
1050	goto free_return;
1051    }
1052  else
1053    {
1054      sift_ctx_init (&sctx, sifted_states, lim_states, halt_node, match_last);
1055      ret = sift_states_backward (mctx, &sctx);
1056      re_node_set_free (&sctx.limits);
1057      if (BE (ret != REG_NOERROR, 0))
1058	goto free_return;
1059      if (sifted_states[0] == NULL)
1060	{
1061	  ret = REG_NOMATCH;
1062	  goto free_return;
1063	}
1064    }
1065  re_free (mctx->state_log);
1066  mctx->state_log = sifted_states;
1067  sifted_states = NULL;
1068  mctx->last_node = halt_node;
1069  mctx->match_last = match_last;
1070  ret = REG_NOERROR;
1071 free_return:
1072  re_free (sifted_states);
1073  re_free (lim_states);
1074  return ret;
1075}
1076
1077/* Acquire an initial state and return it.
1078   We must select appropriate initial state depending on the context,
1079   since initial states may have constraints like "\<", "^", etc..  */
1080
1081static inline re_dfastate_t *
1082__attribute ((always_inline)) internal_function
1083acquire_init_state_context (reg_errcode_t *err, const re_match_context_t *mctx,
1084			    Idx idx)
1085{
1086  const re_dfa_t *const dfa = mctx->dfa;
1087  if (dfa->init_state->has_constraint)
1088    {
1089      unsigned int context;
1090      context = re_string_context_at (&mctx->input, idx - 1, mctx->eflags);
1091      if (IS_WORD_CONTEXT (context))
1092	return dfa->init_state_word;
1093      else if (IS_ORDINARY_CONTEXT (context))
1094	return dfa->init_state;
1095      else if (IS_BEGBUF_CONTEXT (context) && IS_NEWLINE_CONTEXT (context))
1096	return dfa->init_state_begbuf;
1097      else if (IS_NEWLINE_CONTEXT (context))
1098	return dfa->init_state_nl;
1099      else if (IS_BEGBUF_CONTEXT (context))
1100	{
1101	  /* It is relatively rare case, then calculate on demand.  */
1102	  return re_acquire_state_context (err, dfa,
1103					   dfa->init_state->entrance_nodes,
1104					   context);
1105	}
1106      else
1107	/* Must not happen?  */
1108	return dfa->init_state;
1109    }
1110  else
1111    return dfa->init_state;
1112}
1113
1114/* Check whether the regular expression match input string INPUT or not,
1115   and return the index where the matching end.  Return REG_MISSING if
1116   there is no match, and return REG_ERROR in case of an error.
1117   FL_LONGEST_MATCH means we want the POSIX longest matching.
1118   If P_MATCH_FIRST is not NULL, and the match fails, it is set to the
1119   next place where we may want to try matching.
1120   Note that the matcher assume that the maching starts from the current
1121   index of the buffer.  */
1122
1123static Idx
1124internal_function
1125check_matching (re_match_context_t *mctx, bool fl_longest_match,
1126		Idx *p_match_first)
1127{
1128  const re_dfa_t *const dfa = mctx->dfa;
1129  reg_errcode_t err;
1130  Idx match = 0;
1131  Idx match_last = REG_MISSING;
1132  Idx cur_str_idx = re_string_cur_idx (&mctx->input);
1133  re_dfastate_t *cur_state;
1134  bool at_init_state = p_match_first != NULL;
1135  Idx next_start_idx = cur_str_idx;
1136
1137  err = REG_NOERROR;
1138  cur_state = acquire_init_state_context (&err, mctx, cur_str_idx);
1139  /* An initial state must not be NULL (invalid).  */
1140  if (BE (cur_state == NULL, 0))
1141    {
1142      assert (err == REG_ESPACE);
1143      return REG_ERROR;
1144    }
1145
1146  if (mctx->state_log != NULL)
1147    {
1148      mctx->state_log[cur_str_idx] = cur_state;
1149
1150      /* Check OP_OPEN_SUBEXP in the initial state in case that we use them
1151	 later.  E.g. Processing back references.  */
1152      if (BE (dfa->nbackref, 0))
1153	{
1154	  at_init_state = false;
1155	  err = check_subexp_matching_top (mctx, &cur_state->nodes, 0);
1156	  if (BE (err != REG_NOERROR, 0))
1157	    return err;
1158
1159	  if (cur_state->has_backref)
1160	    {
1161	      err = transit_state_bkref (mctx, &cur_state->nodes);
1162	      if (BE (err != REG_NOERROR, 0))
1163	        return err;
1164	    }
1165	}
1166    }
1167
1168  /* If the RE accepts NULL string.  */
1169  if (BE (cur_state->halt, 0))
1170    {
1171      if (!cur_state->has_constraint
1172	  || check_halt_state_context (mctx, cur_state, cur_str_idx))
1173	{
1174	  if (!fl_longest_match)
1175	    return cur_str_idx;
1176	  else
1177	    {
1178	      match_last = cur_str_idx;
1179	      match = 1;
1180	    }
1181	}
1182    }
1183
1184  while (!re_string_eoi (&mctx->input))
1185    {
1186      re_dfastate_t *old_state = cur_state;
1187      Idx next_char_idx = re_string_cur_idx (&mctx->input) + 1;
1188
1189      if (BE (next_char_idx >= mctx->input.bufs_len, 0)
1190          || (BE (next_char_idx >= mctx->input.valid_len, 0)
1191              && mctx->input.valid_len < mctx->input.len))
1192        {
1193          err = extend_buffers (mctx);
1194          if (BE (err != REG_NOERROR, 0))
1195	    {
1196	      assert (err == REG_ESPACE);
1197	      return REG_ERROR;
1198	    }
1199        }
1200
1201      cur_state = transit_state (&err, mctx, cur_state);
1202      if (mctx->state_log != NULL)
1203	cur_state = merge_state_with_log (&err, mctx, cur_state);
1204
1205      if (cur_state == NULL)
1206	{
1207	  /* Reached the invalid state or an error.  Try to recover a valid
1208	     state using the state log, if available and if we have not
1209	     already found a valid (even if not the longest) match.  */
1210	  if (BE (err != REG_NOERROR, 0))
1211	    return REG_ERROR;
1212
1213	  if (mctx->state_log == NULL
1214	      || (match && !fl_longest_match)
1215	      || (cur_state = find_recover_state (&err, mctx)) == NULL)
1216	    break;
1217	}
1218
1219      if (BE (at_init_state, 0))
1220	{
1221	  if (old_state == cur_state)
1222	    next_start_idx = next_char_idx;
1223	  else
1224	    at_init_state = false;
1225	}
1226
1227      if (cur_state->halt)
1228	{
1229	  /* Reached a halt state.
1230	     Check the halt state can satisfy the current context.  */
1231	  if (!cur_state->has_constraint
1232	      || check_halt_state_context (mctx, cur_state,
1233					   re_string_cur_idx (&mctx->input)))
1234	    {
1235	      /* We found an appropriate halt state.  */
1236	      match_last = re_string_cur_idx (&mctx->input);
1237	      match = 1;
1238
1239	      /* We found a match, do not modify match_first below.  */
1240	      p_match_first = NULL;
1241	      if (!fl_longest_match)
1242		break;
1243	    }
1244	}
1245    }
1246
1247  if (p_match_first)
1248    *p_match_first += next_start_idx;
1249
1250  return match_last;
1251}
1252
1253/* Check NODE match the current context.  */
1254
1255static bool
1256internal_function
1257check_halt_node_context (const re_dfa_t *dfa, Idx node, unsigned int context)
1258{
1259  re_token_type_t type = dfa->nodes[node].type;
1260  unsigned int constraint = dfa->nodes[node].constraint;
1261  if (type != END_OF_RE)
1262    return false;
1263  if (!constraint)
1264    return true;
1265  if (NOT_SATISFY_NEXT_CONSTRAINT (constraint, context))
1266    return false;
1267  return true;
1268}
1269
1270/* Check the halt state STATE match the current context.
1271   Return 0 if not match, if the node, STATE has, is a halt node and
1272   match the context, return the node.  */
1273
1274static Idx
1275internal_function
1276check_halt_state_context (const re_match_context_t *mctx,
1277			  const re_dfastate_t *state, Idx idx)
1278{
1279  Idx i;
1280  unsigned int context;
1281#ifdef DEBUG
1282  assert (state->halt);
1283#endif
1284  context = re_string_context_at (&mctx->input, idx, mctx->eflags);
1285  for (i = 0; i < state->nodes.nelem; ++i)
1286    if (check_halt_node_context (mctx->dfa, state->nodes.elems[i], context))
1287      return state->nodes.elems[i];
1288  return 0;
1289}
1290
1291/* Compute the next node to which "NFA" transit from NODE("NFA" is a NFA
1292   corresponding to the DFA).
1293   Return the destination node, and update EPS_VIA_NODES;
1294   return REG_MISSING in case of errors.  */
1295
1296static Idx
1297internal_function
1298proceed_next_node (const re_match_context_t *mctx, Idx nregs, regmatch_t *regs,
1299		   Idx *pidx, Idx node, re_node_set *eps_via_nodes,
1300		   struct re_fail_stack_t *fs)
1301{
1302  const re_dfa_t *const dfa = mctx->dfa;
1303  Idx i;
1304  bool ok;
1305  if (IS_EPSILON_NODE (dfa->nodes[node].type))
1306    {
1307      re_node_set *cur_nodes = &mctx->state_log[*pidx]->nodes;
1308      re_node_set *edests = &dfa->edests[node];
1309      Idx dest_node;
1310      ok = re_node_set_insert (eps_via_nodes, node);
1311      if (BE (! ok, 0))
1312	return REG_ERROR;
1313      /* Pick up a valid destination, or return REG_MISSING if none
1314	 is found.  */
1315      for (dest_node = REG_MISSING, i = 0; i < edests->nelem; ++i)
1316	{
1317	  Idx candidate = edests->elems[i];
1318	  if (!re_node_set_contains (cur_nodes, candidate))
1319	    continue;
1320          if (dest_node == REG_MISSING)
1321	    dest_node = candidate;
1322
1323          else
1324	    {
1325	      /* In order to avoid infinite loop like "(a*)*", return the second
1326	         epsilon-transition if the first was already considered.  */
1327	      if (re_node_set_contains (eps_via_nodes, dest_node))
1328	        return candidate;
1329
1330	      /* Otherwise, push the second epsilon-transition on the fail stack.  */
1331	      else if (fs != NULL
1332		       && push_fail_stack (fs, *pidx, candidate, nregs, regs,
1333				           eps_via_nodes))
1334		return REG_ERROR;
1335
1336	      /* We know we are going to exit.  */
1337	      break;
1338	    }
1339	}
1340      return dest_node;
1341    }
1342  else
1343    {
1344      Idx naccepted = 0;
1345      re_token_type_t type = dfa->nodes[node].type;
1346
1347#ifdef RE_ENABLE_I18N
1348      if (dfa->nodes[node].accept_mb)
1349	naccepted = check_node_accept_bytes (dfa, node, &mctx->input, *pidx);
1350      else
1351#endif /* RE_ENABLE_I18N */
1352      if (type == OP_BACK_REF)
1353	{
1354	  Idx subexp_idx = dfa->nodes[node].opr.idx + 1;
1355	  naccepted = regs[subexp_idx].rm_eo - regs[subexp_idx].rm_so;
1356	  if (fs != NULL)
1357	    {
1358	      if (regs[subexp_idx].rm_so == -1 || regs[subexp_idx].rm_eo == -1)
1359		return REG_MISSING;
1360	      else if (naccepted)
1361		{
1362		  char *buf = (char *) re_string_get_buffer (&mctx->input);
1363		  if (memcmp (buf + regs[subexp_idx].rm_so, buf + *pidx,
1364			      naccepted) != 0)
1365		    return REG_MISSING;
1366		}
1367	    }
1368
1369	  if (naccepted == 0)
1370	    {
1371	      Idx dest_node;
1372	      ok = re_node_set_insert (eps_via_nodes, node);
1373	      if (BE (! ok, 0))
1374		return REG_ERROR;
1375	      dest_node = dfa->edests[node].elems[0];
1376	      if (re_node_set_contains (&mctx->state_log[*pidx]->nodes,
1377					dest_node))
1378		return dest_node;
1379	    }
1380	}
1381
1382      if (naccepted != 0
1383	  || check_node_accept (mctx, dfa->nodes + node, *pidx))
1384	{
1385	  Idx dest_node = dfa->nexts[node];
1386	  *pidx = (naccepted == 0) ? *pidx + 1 : *pidx + naccepted;
1387	  if (fs && (*pidx > mctx->match_last || mctx->state_log[*pidx] == NULL
1388		     || !re_node_set_contains (&mctx->state_log[*pidx]->nodes,
1389					       dest_node)))
1390	    return REG_MISSING;
1391	  re_node_set_empty (eps_via_nodes);
1392	  return dest_node;
1393	}
1394    }
1395  return REG_MISSING;
1396}
1397
1398static reg_errcode_t
1399internal_function
1400push_fail_stack (struct re_fail_stack_t *fs, Idx str_idx, Idx dest_node,
1401		 Idx nregs, regmatch_t *regs, re_node_set *eps_via_nodes)
1402{
1403  reg_errcode_t err;
1404  Idx num = fs->num++;
1405  if (fs->num == fs->alloc)
1406    {
1407      struct re_fail_stack_ent_t *new_array;
1408      new_array = realloc (fs->stack, (sizeof (struct re_fail_stack_ent_t)
1409				       * fs->alloc * 2));
1410      if (new_array == NULL)
1411	return REG_ESPACE;
1412      fs->alloc *= 2;
1413      fs->stack = new_array;
1414    }
1415  fs->stack[num].idx = str_idx;
1416  fs->stack[num].node = dest_node;
1417  fs->stack[num].regs = re_malloc (regmatch_t, nregs);
1418  if (fs->stack[num].regs == NULL)
1419    return REG_ESPACE;
1420  memcpy (fs->stack[num].regs, regs, sizeof (regmatch_t) * nregs);
1421  err = re_node_set_init_copy (&fs->stack[num].eps_via_nodes, eps_via_nodes);
1422  return err;
1423}
1424
1425static Idx
1426internal_function
1427pop_fail_stack (struct re_fail_stack_t *fs, Idx *pidx, Idx nregs,
1428		regmatch_t *regs, re_node_set *eps_via_nodes)
1429{
1430  Idx num = --fs->num;
1431  assert (REG_VALID_INDEX (num));
1432  *pidx = fs->stack[num].idx;
1433  memcpy (regs, fs->stack[num].regs, sizeof (regmatch_t) * nregs);
1434  re_node_set_free (eps_via_nodes);
1435  re_free (fs->stack[num].regs);
1436  *eps_via_nodes = fs->stack[num].eps_via_nodes;
1437  return fs->stack[num].node;
1438}
1439
1440/* Set the positions where the subexpressions are starts/ends to registers
1441   PMATCH.
1442   Note: We assume that pmatch[0] is already set, and
1443   pmatch[i].rm_so == pmatch[i].rm_eo == -1 for 0 < i < nmatch.  */
1444
1445static reg_errcode_t
1446internal_function
1447set_regs (const regex_t *preg, const re_match_context_t *mctx, size_t nmatch,
1448	  regmatch_t *pmatch, bool fl_backtrack)
1449{
1450  const re_dfa_t *dfa = (const re_dfa_t *) preg->buffer;
1451  Idx idx, cur_node;
1452  re_node_set eps_via_nodes;
1453  struct re_fail_stack_t *fs;
1454  struct re_fail_stack_t fs_body = { 0, 2, NULL };
1455  regmatch_t *prev_idx_match;
1456  bool prev_idx_match_malloced = false;
1457
1458#ifdef DEBUG
1459  assert (nmatch > 1);
1460  assert (mctx->state_log != NULL);
1461#endif
1462  if (fl_backtrack)
1463    {
1464      fs = &fs_body;
1465      fs->stack = re_malloc (struct re_fail_stack_ent_t, fs->alloc);
1466      if (fs->stack == NULL)
1467	return REG_ESPACE;
1468    }
1469  else
1470    fs = NULL;
1471
1472  cur_node = dfa->init_node;
1473  re_node_set_init_empty (&eps_via_nodes);
1474
1475  if (__libc_use_alloca (nmatch * sizeof (regmatch_t)))
1476    prev_idx_match = (regmatch_t *) alloca (nmatch * sizeof (regmatch_t));
1477  else
1478    {
1479      prev_idx_match = re_malloc (regmatch_t, nmatch);
1480      if (prev_idx_match == NULL)
1481	{
1482	  free_fail_stack_return (fs);
1483	  return REG_ESPACE;
1484	}
1485      prev_idx_match_malloced = true;
1486    }
1487  memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch);
1488
1489  for (idx = pmatch[0].rm_so; idx <= pmatch[0].rm_eo ;)
1490    {
1491      update_regs (dfa, pmatch, prev_idx_match, cur_node, idx, nmatch);
1492
1493      if (idx == pmatch[0].rm_eo && cur_node == mctx->last_node)
1494	{
1495	  Idx reg_idx;
1496	  if (fs)
1497	    {
1498	      for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
1499		if (pmatch[reg_idx].rm_so > -1 && pmatch[reg_idx].rm_eo == -1)
1500		  break;
1501	      if (reg_idx == nmatch)
1502		{
1503		  re_node_set_free (&eps_via_nodes);
1504		  if (prev_idx_match_malloced)
1505		    re_free (prev_idx_match);
1506		  return free_fail_stack_return (fs);
1507		}
1508	      cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,
1509					 &eps_via_nodes);
1510	    }
1511	  else
1512	    {
1513	      re_node_set_free (&eps_via_nodes);
1514	      if (prev_idx_match_malloced)
1515		re_free (prev_idx_match);
1516	      return REG_NOERROR;
1517	    }
1518	}
1519
1520      /* Proceed to next node.  */
1521      cur_node = proceed_next_node (mctx, nmatch, pmatch, &idx, cur_node,
1522				    &eps_via_nodes, fs);
1523
1524      if (BE (! REG_VALID_INDEX (cur_node), 0))
1525	{
1526	  if (BE (cur_node == REG_ERROR, 0))
1527	    {
1528	      re_node_set_free (&eps_via_nodes);
1529	      if (prev_idx_match_malloced)
1530		re_free (prev_idx_match);
1531	      free_fail_stack_return (fs);
1532	      return REG_ESPACE;
1533	    }
1534	  if (fs)
1535	    cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,
1536				       &eps_via_nodes);
1537	  else
1538	    {
1539	      re_node_set_free (&eps_via_nodes);
1540	      if (prev_idx_match_malloced)
1541		re_free (prev_idx_match);
1542	      return REG_NOMATCH;
1543	    }
1544	}
1545    }
1546  re_node_set_free (&eps_via_nodes);
1547  if (prev_idx_match_malloced)
1548    re_free (prev_idx_match);
1549  return free_fail_stack_return (fs);
1550}
1551
1552static reg_errcode_t
1553internal_function
1554free_fail_stack_return (struct re_fail_stack_t *fs)
1555{
1556  if (fs)
1557    {
1558      Idx fs_idx;
1559      for (fs_idx = 0; fs_idx < fs->num; ++fs_idx)
1560	{
1561	  re_node_set_free (&fs->stack[fs_idx].eps_via_nodes);
1562	  re_free (fs->stack[fs_idx].regs);
1563	}
1564      re_free (fs->stack);
1565    }
1566  return REG_NOERROR;
1567}
1568
1569static void
1570internal_function
1571update_regs (const re_dfa_t *dfa, regmatch_t *pmatch,
1572	     regmatch_t *prev_idx_match, Idx cur_node, Idx cur_idx, Idx nmatch)
1573{
1574  int type = dfa->nodes[cur_node].type;
1575  if (type == OP_OPEN_SUBEXP)
1576    {
1577      Idx reg_num = dfa->nodes[cur_node].opr.idx + 1;
1578
1579      /* We are at the first node of this sub expression.  */
1580      if (reg_num < nmatch)
1581	{
1582	  pmatch[reg_num].rm_so = cur_idx;
1583	  pmatch[reg_num].rm_eo = -1;
1584	}
1585    }
1586  else if (type == OP_CLOSE_SUBEXP)
1587    {
1588      Idx reg_num = dfa->nodes[cur_node].opr.idx + 1;
1589      if (reg_num < nmatch)
1590	{
1591	  /* We are at the last node of this sub expression.  */
1592	  if (pmatch[reg_num].rm_so < cur_idx)
1593	    {
1594	      pmatch[reg_num].rm_eo = cur_idx;
1595	      /* This is a non-empty match or we are not inside an optional
1596		 subexpression.  Accept this right away.  */
1597	      memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch);
1598	    }
1599	  else
1600	    {
1601	      if (dfa->nodes[cur_node].opt_subexp
1602		  && prev_idx_match[reg_num].rm_so != -1)
1603		/* We transited through an empty match for an optional
1604		   subexpression, like (a?)*, and this is not the subexp's
1605		   first match.  Copy back the old content of the registers
1606		   so that matches of an inner subexpression are undone as
1607		   well, like in ((a?))*.  */
1608		memcpy (pmatch, prev_idx_match, sizeof (regmatch_t) * nmatch);
1609	      else
1610		/* We completed a subexpression, but it may be part of
1611		   an optional one, so do not update PREV_IDX_MATCH.  */
1612		pmatch[reg_num].rm_eo = cur_idx;
1613	    }
1614	}
1615    }
1616}
1617
1618/* This function checks the STATE_LOG from the SCTX->last_str_idx to 0
1619   and sift the nodes in each states according to the following rules.
1620   Updated state_log will be wrote to STATE_LOG.
1621
1622   Rules: We throw away the Node `a' in the STATE_LOG[STR_IDX] if...
1623     1. When STR_IDX == MATCH_LAST(the last index in the state_log):
1624	If `a' isn't the LAST_NODE and `a' can't epsilon transit to
1625	the LAST_NODE, we throw away the node `a'.
1626     2. When 0 <= STR_IDX < MATCH_LAST and `a' accepts
1627	string `s' and transit to `b':
1628	i. If 'b' isn't in the STATE_LOG[STR_IDX+strlen('s')], we throw
1629	   away the node `a'.
1630	ii. If 'b' is in the STATE_LOG[STR_IDX+strlen('s')] but 'b' is
1631	    thrown away, we throw away the node `a'.
1632     3. When 0 <= STR_IDX < MATCH_LAST and 'a' epsilon transit to 'b':
1633	i. If 'b' isn't in the STATE_LOG[STR_IDX], we throw away the
1634	   node `a'.
1635	ii. If 'b' is in the STATE_LOG[STR_IDX] but 'b' is thrown away,
1636	    we throw away the node `a'.  */
1637
1638#define STATE_NODE_CONTAINS(state,node) \
1639  ((state) != NULL && re_node_set_contains (&(state)->nodes, node))
1640
1641static reg_errcode_t
1642internal_function
1643sift_states_backward (const re_match_context_t *mctx, re_sift_context_t *sctx)
1644{
1645  reg_errcode_t err;
1646  int null_cnt = 0;
1647  Idx str_idx = sctx->last_str_idx;
1648  re_node_set cur_dest;
1649
1650#ifdef DEBUG
1651  assert (mctx->state_log != NULL && mctx->state_log[str_idx] != NULL);
1652#endif
1653
1654  /* Build sifted state_log[str_idx].  It has the nodes which can epsilon
1655     transit to the last_node and the last_node itself.  */
1656  err = re_node_set_init_1 (&cur_dest, sctx->last_node);
1657  if (BE (err != REG_NOERROR, 0))
1658    return err;
1659  err = update_cur_sifted_state (mctx, sctx, str_idx, &cur_dest);
1660  if (BE (err != REG_NOERROR, 0))
1661    goto free_return;
1662
1663  /* Then check each states in the state_log.  */
1664  while (str_idx > 0)
1665    {
1666      /* Update counters.  */
1667      null_cnt = (sctx->sifted_states[str_idx] == NULL) ? null_cnt + 1 : 0;
1668      if (null_cnt > mctx->max_mb_elem_len)
1669	{
1670	  memset (sctx->sifted_states, '\0',
1671		  sizeof (re_dfastate_t *) * str_idx);
1672	  re_node_set_free (&cur_dest);
1673	  return REG_NOERROR;
1674	}
1675      re_node_set_empty (&cur_dest);
1676      --str_idx;
1677
1678      if (mctx->state_log[str_idx])
1679	{
1680	  err = build_sifted_states (mctx, sctx, str_idx, &cur_dest);
1681          if (BE (err != REG_NOERROR, 0))
1682	    goto free_return;
1683	}
1684
1685      /* Add all the nodes which satisfy the following conditions:
1686	 - It can epsilon transit to a node in CUR_DEST.
1687	 - It is in CUR_SRC.
1688	 And update state_log.  */
1689      err = update_cur_sifted_state (mctx, sctx, str_idx, &cur_dest);
1690      if (BE (err != REG_NOERROR, 0))
1691	goto free_return;
1692    }
1693  err = REG_NOERROR;
1694 free_return:
1695  re_node_set_free (&cur_dest);
1696  return err;
1697}
1698
1699static reg_errcode_t
1700internal_function
1701build_sifted_states (const re_match_context_t *mctx, re_sift_context_t *sctx,
1702		     Idx str_idx, re_node_set *cur_dest)
1703{
1704  const re_dfa_t *const dfa = mctx->dfa;
1705  const re_node_set *cur_src = &mctx->state_log[str_idx]->non_eps_nodes;
1706  Idx i;
1707
1708  /* Then build the next sifted state.
1709     We build the next sifted state on `cur_dest', and update
1710     `sifted_states[str_idx]' with `cur_dest'.
1711     Note:
1712     `cur_dest' is the sifted state from `state_log[str_idx + 1]'.
1713     `cur_src' points the node_set of the old `state_log[str_idx]'
1714     (with the epsilon nodes pre-filtered out).  */
1715  for (i = 0; i < cur_src->nelem; i++)
1716    {
1717      Idx prev_node = cur_src->elems[i];
1718      int naccepted = 0;
1719      bool ok;
1720
1721#ifdef DEBUG
1722      re_token_type_t type = dfa->nodes[prev_node].type;
1723      assert (!IS_EPSILON_NODE (type));
1724#endif
1725#ifdef RE_ENABLE_I18N
1726      /* If the node may accept `multi byte'.  */
1727      if (dfa->nodes[prev_node].accept_mb)
1728	naccepted = sift_states_iter_mb (mctx, sctx, prev_node,
1729					 str_idx, sctx->last_str_idx);
1730#endif /* RE_ENABLE_I18N */
1731
1732      /* We don't check backreferences here.
1733	 See update_cur_sifted_state().  */
1734      if (!naccepted
1735	  && check_node_accept (mctx, dfa->nodes + prev_node, str_idx)
1736	  && STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + 1],
1737				  dfa->nexts[prev_node]))
1738	naccepted = 1;
1739
1740      if (naccepted == 0)
1741	continue;
1742
1743      if (sctx->limits.nelem)
1744	{
1745	  Idx to_idx = str_idx + naccepted;
1746	  if (check_dst_limits (mctx, &sctx->limits,
1747				dfa->nexts[prev_node], to_idx,
1748				prev_node, str_idx))
1749	    continue;
1750	}
1751      ok = re_node_set_insert (cur_dest, prev_node);
1752      if (BE (! ok, 0))
1753	return REG_ESPACE;
1754    }
1755
1756  return REG_NOERROR;
1757}
1758
1759/* Helper functions.  */
1760
1761static reg_errcode_t
1762internal_function
1763clean_state_log_if_needed (re_match_context_t *mctx, Idx next_state_log_idx)
1764{
1765  Idx top = mctx->state_log_top;
1766
1767  if (next_state_log_idx >= mctx->input.bufs_len
1768      || (next_state_log_idx >= mctx->input.valid_len
1769	  && mctx->input.valid_len < mctx->input.len))
1770    {
1771      reg_errcode_t err;
1772      err = extend_buffers (mctx);
1773      if (BE (err != REG_NOERROR, 0))
1774	return err;
1775    }
1776
1777  if (top < next_state_log_idx)
1778    {
1779      memset (mctx->state_log + top + 1, '\0',
1780	      sizeof (re_dfastate_t *) * (next_state_log_idx - top));
1781      mctx->state_log_top = next_state_log_idx;
1782    }
1783  return REG_NOERROR;
1784}
1785
1786static reg_errcode_t
1787internal_function
1788merge_state_array (const re_dfa_t *dfa, re_dfastate_t **dst,
1789		   re_dfastate_t **src, Idx num)
1790{
1791  Idx st_idx;
1792  reg_errcode_t err;
1793  for (st_idx = 0; st_idx < num; ++st_idx)
1794    {
1795      if (dst[st_idx] == NULL)
1796	dst[st_idx] = src[st_idx];
1797      else if (src[st_idx] != NULL)
1798	{
1799	  re_node_set merged_set;
1800	  err = re_node_set_init_union (&merged_set, &dst[st_idx]->nodes,
1801					&src[st_idx]->nodes);
1802	  if (BE (err != REG_NOERROR, 0))
1803	    return err;
1804	  dst[st_idx] = re_acquire_state (&err, dfa, &merged_set);
1805	  re_node_set_free (&merged_set);
1806	  if (BE (err != REG_NOERROR, 0))
1807	    return err;
1808	}
1809    }
1810  return REG_NOERROR;
1811}
1812
1813static reg_errcode_t
1814internal_function
1815update_cur_sifted_state (const re_match_context_t *mctx,
1816			 re_sift_context_t *sctx, Idx str_idx,
1817			 re_node_set *dest_nodes)
1818{
1819  const re_dfa_t *const dfa = mctx->dfa;
1820  reg_errcode_t err = REG_NOERROR;
1821  const re_node_set *candidates;
1822  candidates = ((mctx->state_log[str_idx] == NULL) ? NULL
1823		: &mctx->state_log[str_idx]->nodes);
1824
1825  if (dest_nodes->nelem == 0)
1826    sctx->sifted_states[str_idx] = NULL;
1827  else
1828    {
1829      if (candidates)
1830	{
1831	  /* At first, add the nodes which can epsilon transit to a node in
1832	     DEST_NODE.  */
1833	  err = add_epsilon_src_nodes (dfa, dest_nodes, candidates);
1834	  if (BE (err != REG_NOERROR, 0))
1835	    return err;
1836
1837	  /* Then, check the limitations in the current sift_context.  */
1838	  if (sctx->limits.nelem)
1839	    {
1840	      err = check_subexp_limits (dfa, dest_nodes, candidates, &sctx->limits,
1841					 mctx->bkref_ents, str_idx);
1842	      if (BE (err != REG_NOERROR, 0))
1843		return err;
1844	    }
1845	}
1846
1847      sctx->sifted_states[str_idx] = re_acquire_state (&err, dfa, dest_nodes);
1848      if (BE (err != REG_NOERROR, 0))
1849	return err;
1850    }
1851
1852  if (candidates && mctx->state_log[str_idx]->has_backref)
1853    {
1854      err = sift_states_bkref (mctx, sctx, str_idx, candidates);
1855      if (BE (err != REG_NOERROR, 0))
1856	return err;
1857    }
1858  return REG_NOERROR;
1859}
1860
1861static reg_errcode_t
1862internal_function
1863add_epsilon_src_nodes (const re_dfa_t *dfa, re_node_set *dest_nodes,
1864		       const re_node_set *candidates)
1865{
1866  reg_errcode_t err = REG_NOERROR;
1867  Idx i;
1868
1869  re_dfastate_t *state = re_acquire_state (&err, dfa, dest_nodes);
1870  if (BE (err != REG_NOERROR, 0))
1871    return err;
1872
1873  if (!state->inveclosure.alloc)
1874    {
1875      err = re_node_set_alloc (&state->inveclosure, dest_nodes->nelem);
1876      if (BE (err != REG_NOERROR, 0))
1877        return REG_ESPACE;
1878      for (i = 0; i < dest_nodes->nelem; i++)
1879        re_node_set_merge (&state->inveclosure,
1880			   dfa->inveclosures + dest_nodes->elems[i]);
1881    }
1882  return re_node_set_add_intersect (dest_nodes, candidates,
1883				    &state->inveclosure);
1884}
1885
1886static reg_errcode_t
1887internal_function
1888sub_epsilon_src_nodes (const re_dfa_t *dfa, Idx node, re_node_set *dest_nodes,
1889		       const re_node_set *candidates)
1890{
1891    Idx ecl_idx;
1892    reg_errcode_t err;
1893    re_node_set *inv_eclosure = dfa->inveclosures + node;
1894    re_node_set except_nodes;
1895    re_node_set_init_empty (&except_nodes);
1896    for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx)
1897      {
1898	Idx cur_node = inv_eclosure->elems[ecl_idx];
1899	if (cur_node == node)
1900	  continue;
1901	if (IS_EPSILON_NODE (dfa->nodes[cur_node].type))
1902	  {
1903	    Idx edst1 = dfa->edests[cur_node].elems[0];
1904	    Idx edst2 = ((dfa->edests[cur_node].nelem > 1)
1905			 ? dfa->edests[cur_node].elems[1] : REG_MISSING);
1906	    if ((!re_node_set_contains (inv_eclosure, edst1)
1907		 && re_node_set_contains (dest_nodes, edst1))
1908		|| (REG_VALID_NONZERO_INDEX (edst2)
1909		    && !re_node_set_contains (inv_eclosure, edst2)
1910		    && re_node_set_contains (dest_nodes, edst2)))
1911	      {
1912		err = re_node_set_add_intersect (&except_nodes, candidates,
1913						 dfa->inveclosures + cur_node);
1914		if (BE (err != REG_NOERROR, 0))
1915		  {
1916		    re_node_set_free (&except_nodes);
1917		    return err;
1918		  }
1919	      }
1920	  }
1921      }
1922    for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx)
1923      {
1924	Idx cur_node = inv_eclosure->elems[ecl_idx];
1925	if (!re_node_set_contains (&except_nodes, cur_node))
1926	  {
1927	    Idx idx = re_node_set_contains (dest_nodes, cur_node) - 1;
1928	    re_node_set_remove_at (dest_nodes, idx);
1929	  }
1930      }
1931    re_node_set_free (&except_nodes);
1932    return REG_NOERROR;
1933}
1934
1935static bool
1936internal_function
1937check_dst_limits (const re_match_context_t *mctx, const re_node_set *limits,
1938		  Idx dst_node, Idx dst_idx, Idx src_node, Idx src_idx)
1939{
1940  const re_dfa_t *const dfa = mctx->dfa;
1941  Idx lim_idx, src_pos, dst_pos;
1942
1943  Idx dst_bkref_idx = search_cur_bkref_entry (mctx, dst_idx);
1944  Idx src_bkref_idx = search_cur_bkref_entry (mctx, src_idx);
1945  for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx)
1946    {
1947      Idx subexp_idx;
1948      struct re_backref_cache_entry *ent;
1949      ent = mctx->bkref_ents + limits->elems[lim_idx];
1950      subexp_idx = dfa->nodes[ent->node].opr.idx;
1951
1952      dst_pos = check_dst_limits_calc_pos (mctx, limits->elems[lim_idx],
1953					   subexp_idx, dst_node, dst_idx,
1954					   dst_bkref_idx);
1955      src_pos = check_dst_limits_calc_pos (mctx, limits->elems[lim_idx],
1956					   subexp_idx, src_node, src_idx,
1957					   src_bkref_idx);
1958
1959      /* In case of:
1960	 <src> <dst> ( <subexp> )
1961	 ( <subexp> ) <src> <dst>
1962	 ( <subexp1> <src> <subexp2> <dst> <subexp3> )  */
1963      if (src_pos == dst_pos)
1964	continue; /* This is unrelated limitation.  */
1965      else
1966	return true;
1967    }
1968  return false;
1969}
1970
1971static int
1972internal_function
1973check_dst_limits_calc_pos_1 (const re_match_context_t *mctx, int boundaries,
1974			     Idx subexp_idx, Idx from_node, Idx bkref_idx)
1975{
1976  const re_dfa_t *const dfa = mctx->dfa;
1977  const re_node_set *eclosures = dfa->eclosures + from_node;
1978  Idx node_idx;
1979
1980  /* Else, we are on the boundary: examine the nodes on the epsilon
1981     closure.  */
1982  for (node_idx = 0; node_idx < eclosures->nelem; ++node_idx)
1983    {
1984      Idx node = eclosures->elems[node_idx];
1985      switch (dfa->nodes[node].type)
1986	{
1987	case OP_BACK_REF:
1988	  if (bkref_idx != REG_MISSING)
1989	    {
1990	      struct re_backref_cache_entry *ent = mctx->bkref_ents + bkref_idx;
1991	      do
1992	        {
1993		  Idx dst;
1994		  int cpos;
1995
1996		  if (ent->node != node)
1997		    continue;
1998
1999		  if (subexp_idx < BITSET_WORD_BITS
2000		      && !(ent->eps_reachable_subexps_map
2001			   & ((bitset_word_t) 1 << subexp_idx)))
2002		    continue;
2003
2004		  /* Recurse trying to reach the OP_OPEN_SUBEXP and
2005		     OP_CLOSE_SUBEXP cases below.  But, if the
2006		     destination node is the same node as the source
2007		     node, don't recurse because it would cause an
2008		     infinite loop: a regex that exhibits this behavior
2009		     is ()\1*\1*  */
2010		  dst = dfa->edests[node].elems[0];
2011		  if (dst == from_node)
2012		    {
2013		      if (boundaries & 1)
2014		        return -1;
2015		      else /* if (boundaries & 2) */
2016		        return 0;
2017		    }
2018
2019		  cpos =
2020		    check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx,
2021						 dst, bkref_idx);
2022		  if (cpos == -1 /* && (boundaries & 1) */)
2023		    return -1;
2024		  if (cpos == 0 && (boundaries & 2))
2025		    return 0;
2026
2027		  if (subexp_idx < BITSET_WORD_BITS)
2028		    ent->eps_reachable_subexps_map
2029		      &= ~((bitset_word_t) 1 << subexp_idx);
2030	        }
2031	      while (ent++->more);
2032	    }
2033	  break;
2034
2035	case OP_OPEN_SUBEXP:
2036	  if ((boundaries & 1) && subexp_idx == dfa->nodes[node].opr.idx)
2037	    return -1;
2038	  break;
2039
2040	case OP_CLOSE_SUBEXP:
2041	  if ((boundaries & 2) && subexp_idx == dfa->nodes[node].opr.idx)
2042	    return 0;
2043	  break;
2044
2045	default:
2046	    break;
2047	}
2048    }
2049
2050  return (boundaries & 2) ? 1 : 0;
2051}
2052
2053static int
2054internal_function
2055check_dst_limits_calc_pos (const re_match_context_t *mctx, Idx limit,
2056			   Idx subexp_idx, Idx from_node, Idx str_idx,
2057			   Idx bkref_idx)
2058{
2059  struct re_backref_cache_entry *lim = mctx->bkref_ents + limit;
2060  int boundaries;
2061
2062  /* If we are outside the range of the subexpression, return -1 or 1.  */
2063  if (str_idx < lim->subexp_from)
2064    return -1;
2065
2066  if (lim->subexp_to < str_idx)
2067    return 1;
2068
2069  /* If we are within the subexpression, return 0.  */
2070  boundaries = (str_idx == lim->subexp_from);
2071  boundaries |= (str_idx == lim->subexp_to) << 1;
2072  if (boundaries == 0)
2073    return 0;
2074
2075  /* Else, examine epsilon closure.  */
2076  return check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx,
2077				      from_node, bkref_idx);
2078}
2079
2080/* Check the limitations of sub expressions LIMITS, and remove the nodes
2081   which are against limitations from DEST_NODES. */
2082
2083static reg_errcode_t
2084internal_function
2085check_subexp_limits (const re_dfa_t *dfa, re_node_set *dest_nodes,
2086		     const re_node_set *candidates, re_node_set *limits,
2087		     struct re_backref_cache_entry *bkref_ents, Idx str_idx)
2088{
2089  reg_errcode_t err;
2090  Idx node_idx, lim_idx;
2091
2092  for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx)
2093    {
2094      Idx subexp_idx;
2095      struct re_backref_cache_entry *ent;
2096      ent = bkref_ents + limits->elems[lim_idx];
2097
2098      if (str_idx <= ent->subexp_from || ent->str_idx < str_idx)
2099	continue; /* This is unrelated limitation.  */
2100
2101      subexp_idx = dfa->nodes[ent->node].opr.idx;
2102      if (ent->subexp_to == str_idx)
2103	{
2104	  Idx ops_node = REG_MISSING;
2105	  Idx cls_node = REG_MISSING;
2106	  for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
2107	    {
2108	      Idx node = dest_nodes->elems[node_idx];
2109	      re_token_type_t type = dfa->nodes[node].type;
2110	      if (type == OP_OPEN_SUBEXP
2111		  && subexp_idx == dfa->nodes[node].opr.idx)
2112		ops_node = node;
2113	      else if (type == OP_CLOSE_SUBEXP
2114		       && subexp_idx == dfa->nodes[node].opr.idx)
2115		cls_node = node;
2116	    }
2117
2118	  /* Check the limitation of the open subexpression.  */
2119	  /* Note that (ent->subexp_to = str_idx != ent->subexp_from).  */
2120	  if (REG_VALID_INDEX (ops_node))
2121	    {
2122	      err = sub_epsilon_src_nodes (dfa, ops_node, dest_nodes,
2123					   candidates);
2124	      if (BE (err != REG_NOERROR, 0))
2125		return err;
2126	    }
2127
2128	  /* Check the limitation of the close subexpression.  */
2129	  if (REG_VALID_INDEX (cls_node))
2130	    for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
2131	      {
2132		Idx node = dest_nodes->elems[node_idx];
2133		if (!re_node_set_contains (dfa->inveclosures + node,
2134					   cls_node)
2135		    && !re_node_set_contains (dfa->eclosures + node,
2136					      cls_node))
2137		  {
2138		    /* It is against this limitation.
2139		       Remove it form the current sifted state.  */
2140		    err = sub_epsilon_src_nodes (dfa, node, dest_nodes,
2141						 candidates);
2142		    if (BE (err != REG_NOERROR, 0))
2143		      return err;
2144		    --node_idx;
2145		  }
2146	      }
2147	}
2148      else /* (ent->subexp_to != str_idx)  */
2149	{
2150	  for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
2151	    {
2152	      Idx node = dest_nodes->elems[node_idx];
2153	      re_token_type_t type = dfa->nodes[node].type;
2154	      if (type == OP_CLOSE_SUBEXP || type == OP_OPEN_SUBEXP)
2155		{
2156		  if (subexp_idx != dfa->nodes[node].opr.idx)
2157		    continue;
2158		  /* It is against this limitation.
2159		     Remove it form the current sifted state.  */
2160		  err = sub_epsilon_src_nodes (dfa, node, dest_nodes,
2161					       candidates);
2162		  if (BE (err != REG_NOERROR, 0))
2163		    return err;
2164		}
2165	    }
2166	}
2167    }
2168  return REG_NOERROR;
2169}
2170
2171static reg_errcode_t
2172internal_function
2173sift_states_bkref (const re_match_context_t *mctx, re_sift_context_t *sctx,
2174		   Idx str_idx, const re_node_set *candidates)
2175{
2176  const re_dfa_t *const dfa = mctx->dfa;
2177  reg_errcode_t err;
2178  Idx node_idx, node;
2179  re_sift_context_t local_sctx;
2180  Idx first_idx = search_cur_bkref_entry (mctx, str_idx);
2181
2182  if (first_idx == REG_MISSING)
2183    return REG_NOERROR;
2184
2185  local_sctx.sifted_states = NULL; /* Mark that it hasn't been initialized.  */
2186
2187  for (node_idx = 0; node_idx < candidates->nelem; ++node_idx)
2188    {
2189      Idx enabled_idx;
2190      re_token_type_t type;
2191      struct re_backref_cache_entry *entry;
2192      node = candidates->elems[node_idx];
2193      type = dfa->nodes[node].type;
2194      /* Avoid infinite loop for the REs like "()\1+".  */
2195      if (node == sctx->last_node && str_idx == sctx->last_str_idx)
2196	continue;
2197      if (type != OP_BACK_REF)
2198	continue;
2199
2200      entry = mctx->bkref_ents + first_idx;
2201      enabled_idx = first_idx;
2202      do
2203	{
2204	  Idx subexp_len;
2205	  Idx to_idx;
2206	  Idx dst_node;
2207	  bool ok;
2208	  re_dfastate_t *cur_state;
2209
2210	  if (entry->node != node)
2211	    continue;
2212	  subexp_len = entry->subexp_to - entry->subexp_from;
2213	  to_idx = str_idx + subexp_len;
2214	  dst_node = (subexp_len ? dfa->nexts[node]
2215		      : dfa->edests[node].elems[0]);
2216
2217	  if (to_idx > sctx->last_str_idx
2218	      || sctx->sifted_states[to_idx] == NULL
2219	      || !STATE_NODE_CONTAINS (sctx->sifted_states[to_idx], dst_node)
2220	      || check_dst_limits (mctx, &sctx->limits, node,
2221				   str_idx, dst_node, to_idx))
2222	    continue;
2223
2224	  if (local_sctx.sifted_states == NULL)
2225	    {
2226	      local_sctx = *sctx;
2227	      err = re_node_set_init_copy (&local_sctx.limits, &sctx->limits);
2228	      if (BE (err != REG_NOERROR, 0))
2229		goto free_return;
2230	    }
2231	  local_sctx.last_node = node;
2232	  local_sctx.last_str_idx = str_idx;
2233	  ok = re_node_set_insert (&local_sctx.limits, enabled_idx);
2234	  if (BE (! ok, 0))
2235	    {
2236	      err = REG_ESPACE;
2237	      goto free_return;
2238	    }
2239	  cur_state = local_sctx.sifted_states[str_idx];
2240	  err = sift_states_backward (mctx, &local_sctx);
2241	  if (BE (err != REG_NOERROR, 0))
2242	    goto free_return;
2243	  if (sctx->limited_states != NULL)
2244	    {
2245	      err = merge_state_array (dfa, sctx->limited_states,
2246				       local_sctx.sifted_states,
2247				       str_idx + 1);
2248	      if (BE (err != REG_NOERROR, 0))
2249		goto free_return;
2250	    }
2251	  local_sctx.sifted_states[str_idx] = cur_state;
2252	  re_node_set_remove (&local_sctx.limits, enabled_idx);
2253
2254	  /* mctx->bkref_ents may have changed, reload the pointer.  */
2255          entry = mctx->bkref_ents + enabled_idx;
2256	}
2257      while (enabled_idx++, entry++->more);
2258    }
2259  err = REG_NOERROR;
2260 free_return:
2261  if (local_sctx.sifted_states != NULL)
2262    {
2263      re_node_set_free (&local_sctx.limits);
2264    }
2265
2266  return err;
2267}
2268
2269
2270#ifdef RE_ENABLE_I18N
2271static int
2272internal_function
2273sift_states_iter_mb (const re_match_context_t *mctx, re_sift_context_t *sctx,
2274		     Idx node_idx, Idx str_idx, Idx max_str_idx)
2275{
2276  const re_dfa_t *const dfa = mctx->dfa;
2277  int naccepted;
2278  /* Check the node can accept `multi byte'.  */
2279  naccepted = check_node_accept_bytes (dfa, node_idx, &mctx->input, str_idx);
2280  if (naccepted > 0 && str_idx + naccepted <= max_str_idx &&
2281      !STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + naccepted],
2282			    dfa->nexts[node_idx]))
2283    /* The node can't accept the `multi byte', or the
2284       destination was already thrown away, then the node
2285       could't accept the current input `multi byte'.   */
2286    naccepted = 0;
2287  /* Otherwise, it is sure that the node could accept
2288     `naccepted' bytes input.  */
2289  return naccepted;
2290}
2291#endif /* RE_ENABLE_I18N */
2292
2293
2294/* Functions for state transition.  */
2295
2296/* Return the next state to which the current state STATE will transit by
2297   accepting the current input byte, and update STATE_LOG if necessary.
2298   If STATE can accept a multibyte char/collating element/back reference
2299   update the destination of STATE_LOG.  */
2300
2301static re_dfastate_t *
2302internal_function
2303transit_state (reg_errcode_t *err, re_match_context_t *mctx,
2304	       re_dfastate_t *state)
2305{
2306  re_dfastate_t **trtable;
2307  unsigned char ch;
2308
2309#ifdef RE_ENABLE_I18N
2310  /* If the current state can accept multibyte.  */
2311  if (BE (state->accept_mb, 0))
2312    {
2313      *err = transit_state_mb (mctx, state);
2314      if (BE (*err != REG_NOERROR, 0))
2315	return NULL;
2316    }
2317#endif /* RE_ENABLE_I18N */
2318
2319  /* Then decide the next state with the single byte.  */
2320#if 0
2321  if (0)
2322    /* don't use transition table  */
2323    return transit_state_sb (err, mctx, state);
2324#endif
2325
2326  /* Use transition table  */
2327  ch = re_string_fetch_byte (&mctx->input);
2328  for (;;)
2329    {
2330      trtable = state->trtable;
2331      if (BE (trtable != NULL, 1))
2332	return trtable[ch];
2333
2334      trtable = state->word_trtable;
2335      if (BE (trtable != NULL, 1))
2336        {
2337	  unsigned int context;
2338	  context
2339	    = re_string_context_at (&mctx->input,
2340				    re_string_cur_idx (&mctx->input) - 1,
2341				    mctx->eflags);
2342	  if (IS_WORD_CONTEXT (context))
2343	    return trtable[ch + SBC_MAX];
2344	  else
2345	    return trtable[ch];
2346	}
2347
2348      if (!build_trtable (mctx->dfa, state))
2349	{
2350	  *err = REG_ESPACE;
2351	  return NULL;
2352	}
2353
2354      /* Retry, we now have a transition table.  */
2355    }
2356}
2357
2358/* Update the state_log if we need */
2359static re_dfastate_t *
2360internal_function
2361merge_state_with_log (reg_errcode_t *err, re_match_context_t *mctx,
2362		      re_dfastate_t *next_state)
2363{
2364  const re_dfa_t *const dfa = mctx->dfa;
2365  Idx cur_idx = re_string_cur_idx (&mctx->input);
2366
2367  if (cur_idx > mctx->state_log_top)
2368    {
2369      mctx->state_log[cur_idx] = next_state;
2370      mctx->state_log_top = cur_idx;
2371    }
2372  else if (mctx->state_log[cur_idx] == 0)
2373    {
2374      mctx->state_log[cur_idx] = next_state;
2375    }
2376  else
2377    {
2378      re_dfastate_t *pstate;
2379      unsigned int context;
2380      re_node_set next_nodes, *log_nodes, *table_nodes = NULL;
2381      /* If (state_log[cur_idx] != 0), it implies that cur_idx is
2382         the destination of a multibyte char/collating element/
2383         back reference.  Then the next state is the union set of
2384         these destinations and the results of the transition table.  */
2385      pstate = mctx->state_log[cur_idx];
2386      log_nodes = pstate->entrance_nodes;
2387      if (next_state != NULL)
2388        {
2389          table_nodes = next_state->entrance_nodes;
2390          *err = re_node_set_init_union (&next_nodes, table_nodes,
2391					     log_nodes);
2392          if (BE (*err != REG_NOERROR, 0))
2393	    return NULL;
2394        }
2395      else
2396        next_nodes = *log_nodes;
2397      /* Note: We already add the nodes of the initial state,
2398	 then we don't need to add them here.  */
2399
2400      context = re_string_context_at (&mctx->input,
2401				      re_string_cur_idx (&mctx->input) - 1,
2402				      mctx->eflags);
2403      next_state = mctx->state_log[cur_idx]
2404        = re_acquire_state_context (err, dfa, &next_nodes, context);
2405      /* We don't need to check errors here, since the return value of
2406         this function is next_state and ERR is already set.  */
2407
2408      if (table_nodes != NULL)
2409        re_node_set_free (&next_nodes);
2410    }
2411
2412  if (BE (dfa->nbackref, 0) && next_state != NULL)
2413    {
2414      /* Check OP_OPEN_SUBEXP in the current state in case that we use them
2415	 later.  We must check them here, since the back references in the
2416	 next state might use them.  */
2417      *err = check_subexp_matching_top (mctx, &next_state->nodes,
2418					cur_idx);
2419      if (BE (*err != REG_NOERROR, 0))
2420	return NULL;
2421
2422      /* If the next state has back references.  */
2423      if (next_state->has_backref)
2424	{
2425	  *err = transit_state_bkref (mctx, &next_state->nodes);
2426	  if (BE (*err != REG_NOERROR, 0))
2427	    return NULL;
2428	  next_state = mctx->state_log[cur_idx];
2429	}
2430    }
2431
2432  return next_state;
2433}
2434
2435/* Skip bytes in the input that correspond to part of a
2436   multi-byte match, then look in the log for a state
2437   from which to restart matching.  */
2438static re_dfastate_t *
2439internal_function
2440find_recover_state (reg_errcode_t *err, re_match_context_t *mctx)
2441{
2442  re_dfastate_t *cur_state;
2443  do
2444    {
2445      Idx max = mctx->state_log_top;
2446      Idx cur_str_idx = re_string_cur_idx (&mctx->input);
2447
2448      do
2449	{
2450          if (++cur_str_idx > max)
2451            return NULL;
2452          re_string_skip_bytes (&mctx->input, 1);
2453	}
2454      while (mctx->state_log[cur_str_idx] == NULL);
2455
2456      cur_state = merge_state_with_log (err, mctx, NULL);
2457    }
2458  while (*err == REG_NOERROR && cur_state == NULL);
2459  return cur_state;
2460}
2461
2462/* Helper functions for transit_state.  */
2463
2464/* From the node set CUR_NODES, pick up the nodes whose types are
2465   OP_OPEN_SUBEXP and which have corresponding back references in the regular
2466   expression. And register them to use them later for evaluating the
2467   correspoding back references.  */
2468
2469static reg_errcode_t
2470internal_function
2471check_subexp_matching_top (re_match_context_t *mctx, re_node_set *cur_nodes,
2472			   Idx str_idx)
2473{
2474  const re_dfa_t *const dfa = mctx->dfa;
2475  Idx node_idx;
2476  reg_errcode_t err;
2477
2478  /* TODO: This isn't efficient.
2479	   Because there might be more than one nodes whose types are
2480	   OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all
2481	   nodes.
2482	   E.g. RE: (a){2}  */
2483  for (node_idx = 0; node_idx < cur_nodes->nelem; ++node_idx)
2484    {
2485      Idx node = cur_nodes->elems[node_idx];
2486      if (dfa->nodes[node].type == OP_OPEN_SUBEXP
2487	  && dfa->nodes[node].opr.idx < BITSET_WORD_BITS
2488	  && (dfa->used_bkref_map
2489	      & ((bitset_word_t) 1 << dfa->nodes[node].opr.idx)))
2490	{
2491	  err = match_ctx_add_subtop (mctx, node, str_idx);
2492	  if (BE (err != REG_NOERROR, 0))
2493	    return err;
2494	}
2495    }
2496  return REG_NOERROR;
2497}
2498
2499#if 0
2500/* Return the next state to which the current state STATE will transit by
2501   accepting the current input byte.  */
2502
2503static re_dfastate_t *
2504transit_state_sb (reg_errcode_t *err, re_match_context_t *mctx,
2505		  re_dfastate_t *state)
2506{
2507  const re_dfa_t *const dfa = mctx->dfa;
2508  re_node_set next_nodes;
2509  re_dfastate_t *next_state;
2510  Idx node_cnt, cur_str_idx = re_string_cur_idx (&mctx->input);
2511  unsigned int context;
2512
2513  *err = re_node_set_alloc (&next_nodes, state->nodes.nelem + 1);
2514  if (BE (*err != REG_NOERROR, 0))
2515    return NULL;
2516  for (node_cnt = 0; node_cnt < state->nodes.nelem; ++node_cnt)
2517    {
2518      Idx cur_node = state->nodes.elems[node_cnt];
2519      if (check_node_accept (mctx, dfa->nodes + cur_node, cur_str_idx))
2520	{
2521	  *err = re_node_set_merge (&next_nodes,
2522				    dfa->eclosures + dfa->nexts[cur_node]);
2523	  if (BE (*err != REG_NOERROR, 0))
2524	    {
2525	      re_node_set_free (&next_nodes);
2526	      return NULL;
2527	    }
2528	}
2529    }
2530  context = re_string_context_at (&mctx->input, cur_str_idx, mctx->eflags);
2531  next_state = re_acquire_state_context (err, dfa, &next_nodes, context);
2532  /* We don't need to check errors here, since the return value of
2533     this function is next_state and ERR is already set.  */
2534
2535  re_node_set_free (&next_nodes);
2536  re_string_skip_bytes (&mctx->input, 1);
2537  return next_state;
2538}
2539#endif
2540
2541#ifdef RE_ENABLE_I18N
2542static reg_errcode_t
2543internal_function
2544transit_state_mb (re_match_context_t *mctx, re_dfastate_t *pstate)
2545{
2546  const re_dfa_t *const dfa = mctx->dfa;
2547  reg_errcode_t err;
2548  Idx i;
2549
2550  for (i = 0; i < pstate->nodes.nelem; ++i)
2551    {
2552      re_node_set dest_nodes, *new_nodes;
2553      Idx cur_node_idx = pstate->nodes.elems[i];
2554      int naccepted;
2555      Idx dest_idx;
2556      unsigned int context;
2557      re_dfastate_t *dest_state;
2558
2559      if (!dfa->nodes[cur_node_idx].accept_mb)
2560        continue;
2561
2562      if (dfa->nodes[cur_node_idx].constraint)
2563	{
2564	  context = re_string_context_at (&mctx->input,
2565					  re_string_cur_idx (&mctx->input),
2566					  mctx->eflags);
2567	  if (NOT_SATISFY_NEXT_CONSTRAINT (dfa->nodes[cur_node_idx].constraint,
2568					   context))
2569	    continue;
2570	}
2571
2572      /* How many bytes the node can accept?  */
2573      naccepted = check_node_accept_bytes (dfa, cur_node_idx, &mctx->input,
2574					   re_string_cur_idx (&mctx->input));
2575      if (naccepted == 0)
2576	continue;
2577
2578      /* The node can accepts `naccepted' bytes.  */
2579      dest_idx = re_string_cur_idx (&mctx->input) + naccepted;
2580      mctx->max_mb_elem_len = ((mctx->max_mb_elem_len < naccepted) ? naccepted
2581			       : mctx->max_mb_elem_len);
2582      err = clean_state_log_if_needed (mctx, dest_idx);
2583      if (BE (err != REG_NOERROR, 0))
2584	return err;
2585#ifdef DEBUG
2586      assert (dfa->nexts[cur_node_idx] != REG_MISSING);
2587#endif
2588      new_nodes = dfa->eclosures + dfa->nexts[cur_node_idx];
2589
2590      dest_state = mctx->state_log[dest_idx];
2591      if (dest_state == NULL)
2592	dest_nodes = *new_nodes;
2593      else
2594	{
2595	  err = re_node_set_init_union (&dest_nodes,
2596					dest_state->entrance_nodes, new_nodes);
2597	  if (BE (err != REG_NOERROR, 0))
2598	    return err;
2599	}
2600      context = re_string_context_at (&mctx->input, dest_idx - 1,
2601				      mctx->eflags);
2602      mctx->state_log[dest_idx]
2603	= re_acquire_state_context (&err, dfa, &dest_nodes, context);
2604      if (dest_state != NULL)
2605	re_node_set_free (&dest_nodes);
2606      if (BE (mctx->state_log[dest_idx] == NULL && err != REG_NOERROR, 0))
2607	return err;
2608    }
2609  return REG_NOERROR;
2610}
2611#endif /* RE_ENABLE_I18N */
2612
2613static reg_errcode_t
2614internal_function
2615transit_state_bkref (re_match_context_t *mctx, const re_node_set *nodes)
2616{
2617  const re_dfa_t *const dfa = mctx->dfa;
2618  reg_errcode_t err;
2619  Idx i;
2620  Idx cur_str_idx = re_string_cur_idx (&mctx->input);
2621
2622  for (i = 0; i < nodes->nelem; ++i)
2623    {
2624      Idx dest_str_idx, prev_nelem, bkc_idx;
2625      Idx node_idx = nodes->elems[i];
2626      unsigned int context;
2627      const re_token_t *node = dfa->nodes + node_idx;
2628      re_node_set *new_dest_nodes;
2629
2630      /* Check whether `node' is a backreference or not.  */
2631      if (node->type != OP_BACK_REF)
2632	continue;
2633
2634      if (node->constraint)
2635	{
2636	  context = re_string_context_at (&mctx->input, cur_str_idx,
2637					  mctx->eflags);
2638	  if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context))
2639	    continue;
2640	}
2641
2642      /* `node' is a backreference.
2643	 Check the substring which the substring matched.  */
2644      bkc_idx = mctx->nbkref_ents;
2645      err = get_subexp (mctx, node_idx, cur_str_idx);
2646      if (BE (err != REG_NOERROR, 0))
2647	goto free_return;
2648
2649      /* And add the epsilon closures (which is `new_dest_nodes') of
2650	 the backreference to appropriate state_log.  */
2651#ifdef DEBUG
2652      assert (dfa->nexts[node_idx] != REG_MISSING);
2653#endif
2654      for (; bkc_idx < mctx->nbkref_ents; ++bkc_idx)
2655	{
2656	  Idx subexp_len;
2657	  re_dfastate_t *dest_state;
2658	  struct re_backref_cache_entry *bkref_ent;
2659	  bkref_ent = mctx->bkref_ents + bkc_idx;
2660	  if (bkref_ent->node != node_idx || bkref_ent->str_idx != cur_str_idx)
2661	    continue;
2662	  subexp_len = bkref_ent->subexp_to - bkref_ent->subexp_from;
2663	  new_dest_nodes = (subexp_len == 0
2664			    ? dfa->eclosures + dfa->edests[node_idx].elems[0]
2665			    : dfa->eclosures + dfa->nexts[node_idx]);
2666	  dest_str_idx = (cur_str_idx + bkref_ent->subexp_to
2667			  - bkref_ent->subexp_from);
2668	  context = re_string_context_at (&mctx->input, dest_str_idx - 1,
2669					  mctx->eflags);
2670	  dest_state = mctx->state_log[dest_str_idx];
2671	  prev_nelem = ((mctx->state_log[cur_str_idx] == NULL) ? 0
2672			: mctx->state_log[cur_str_idx]->nodes.nelem);
2673	  /* Add `new_dest_node' to state_log.  */
2674	  if (dest_state == NULL)
2675	    {
2676	      mctx->state_log[dest_str_idx]
2677		= re_acquire_state_context (&err, dfa, new_dest_nodes,
2678					    context);
2679	      if (BE (mctx->state_log[dest_str_idx] == NULL
2680		      && err != REG_NOERROR, 0))
2681		goto free_return;
2682	    }
2683	  else
2684	    {
2685	      re_node_set dest_nodes;
2686	      err = re_node_set_init_union (&dest_nodes,
2687					    dest_state->entrance_nodes,
2688					    new_dest_nodes);
2689	      if (BE (err != REG_NOERROR, 0))
2690		{
2691		  re_node_set_free (&dest_nodes);
2692		  goto free_return;
2693		}
2694	      mctx->state_log[dest_str_idx]
2695		= re_acquire_state_context (&err, dfa, &dest_nodes, context);
2696	      re_node_set_free (&dest_nodes);
2697	      if (BE (mctx->state_log[dest_str_idx] == NULL
2698		      && err != REG_NOERROR, 0))
2699		goto free_return;
2700	    }
2701	  /* We need to check recursively if the backreference can epsilon
2702	     transit.  */
2703	  if (subexp_len == 0
2704	      && mctx->state_log[cur_str_idx]->nodes.nelem > prev_nelem)
2705	    {
2706	      err = check_subexp_matching_top (mctx, new_dest_nodes,
2707					       cur_str_idx);
2708	      if (BE (err != REG_NOERROR, 0))
2709		goto free_return;
2710	      err = transit_state_bkref (mctx, new_dest_nodes);
2711	      if (BE (err != REG_NOERROR, 0))
2712		goto free_return;
2713	    }
2714	}
2715    }
2716  err = REG_NOERROR;
2717 free_return:
2718  return err;
2719}
2720
2721/* Enumerate all the candidates which the backreference BKREF_NODE can match
2722   at BKREF_STR_IDX, and register them by match_ctx_add_entry().
2723   Note that we might collect inappropriate candidates here.
2724   However, the cost of checking them strictly here is too high, then we
2725   delay these checking for prune_impossible_nodes().  */
2726
2727static reg_errcode_t
2728internal_function
2729get_subexp (re_match_context_t *mctx, Idx bkref_node, Idx bkref_str_idx)
2730{
2731  const re_dfa_t *const dfa = mctx->dfa;
2732  Idx subexp_num, sub_top_idx;
2733  const char *buf = (const char *) re_string_get_buffer (&mctx->input);
2734  /* Return if we have already checked BKREF_NODE at BKREF_STR_IDX.  */
2735  Idx cache_idx = search_cur_bkref_entry (mctx, bkref_str_idx);
2736  if (cache_idx != REG_MISSING)
2737    {
2738      const struct re_backref_cache_entry *entry
2739	= mctx->bkref_ents + cache_idx;
2740      do
2741        if (entry->node == bkref_node)
2742	  return REG_NOERROR; /* We already checked it.  */
2743      while (entry++->more);
2744    }
2745
2746  subexp_num = dfa->nodes[bkref_node].opr.idx;
2747
2748  /* For each sub expression  */
2749  for (sub_top_idx = 0; sub_top_idx < mctx->nsub_tops; ++sub_top_idx)
2750    {
2751      reg_errcode_t err;
2752      re_sub_match_top_t *sub_top = mctx->sub_tops[sub_top_idx];
2753      re_sub_match_last_t *sub_last;
2754      Idx sub_last_idx, sl_str, bkref_str_off;
2755
2756      if (dfa->nodes[sub_top->node].opr.idx != subexp_num)
2757	continue; /* It isn't related.  */
2758
2759      sl_str = sub_top->str_idx;
2760      bkref_str_off = bkref_str_idx;
2761      /* At first, check the last node of sub expressions we already
2762	 evaluated.  */
2763      for (sub_last_idx = 0; sub_last_idx < sub_top->nlasts; ++sub_last_idx)
2764	{
2765	  regoff_t sl_str_diff;
2766	  sub_last = sub_top->lasts[sub_last_idx];
2767	  sl_str_diff = sub_last->str_idx - sl_str;
2768	  /* The matched string by the sub expression match with the substring
2769	     at the back reference?  */
2770	  if (sl_str_diff > 0)
2771	    {
2772	      if (BE (bkref_str_off + sl_str_diff > mctx->input.valid_len, 0))
2773		{
2774		  /* Not enough chars for a successful match.  */
2775		  if (bkref_str_off + sl_str_diff > mctx->input.len)
2776		    break;
2777
2778		  err = clean_state_log_if_needed (mctx,
2779						   bkref_str_off
2780						   + sl_str_diff);
2781		  if (BE (err != REG_NOERROR, 0))
2782		    return err;
2783		  buf = (const char *) re_string_get_buffer (&mctx->input);
2784		}
2785	      if (memcmp (buf + bkref_str_off, buf + sl_str, sl_str_diff) != 0)
2786		/* We don't need to search this sub expression any more.  */
2787		break;
2788	    }
2789	  bkref_str_off += sl_str_diff;
2790	  sl_str += sl_str_diff;
2791	  err = get_subexp_sub (mctx, sub_top, sub_last, bkref_node,
2792				bkref_str_idx);
2793
2794	  /* Reload buf, since the preceding call might have reallocated
2795	     the buffer.  */
2796	  buf = (const char *) re_string_get_buffer (&mctx->input);
2797
2798	  if (err == REG_NOMATCH)
2799	    continue;
2800	  if (BE (err != REG_NOERROR, 0))
2801	    return err;
2802	}
2803
2804      if (sub_last_idx < sub_top->nlasts)
2805	continue;
2806      if (sub_last_idx > 0)
2807	++sl_str;
2808      /* Then, search for the other last nodes of the sub expression.  */
2809      for (; sl_str <= bkref_str_idx; ++sl_str)
2810	{
2811	  Idx cls_node;
2812	  regoff_t sl_str_off;
2813	  const re_node_set *nodes;
2814	  sl_str_off = sl_str - sub_top->str_idx;
2815	  /* The matched string by the sub expression match with the substring
2816	     at the back reference?  */
2817	  if (sl_str_off > 0)
2818	    {
2819	      if (BE (bkref_str_off >= mctx->input.valid_len, 0))
2820		{
2821		  /* If we are at the end of the input, we cannot match.  */
2822		  if (bkref_str_off >= mctx->input.len)
2823		    break;
2824
2825		  err = extend_buffers (mctx);
2826		  if (BE (err != REG_NOERROR, 0))
2827		    return err;
2828
2829		  buf = (const char *) re_string_get_buffer (&mctx->input);
2830		}
2831	      if (buf [bkref_str_off++] != buf[sl_str - 1])
2832		break; /* We don't need to search this sub expression
2833			  any more.  */
2834	    }
2835	  if (mctx->state_log[sl_str] == NULL)
2836	    continue;
2837	  /* Does this state have a ')' of the sub expression?  */
2838	  nodes = &mctx->state_log[sl_str]->nodes;
2839	  cls_node = find_subexp_node (dfa, nodes, subexp_num,
2840				       OP_CLOSE_SUBEXP);
2841	  if (cls_node == REG_MISSING)
2842	    continue; /* No.  */
2843	  if (sub_top->path == NULL)
2844	    {
2845	      sub_top->path = calloc (sizeof (state_array_t),
2846				      sl_str - sub_top->str_idx + 1);
2847	      if (sub_top->path == NULL)
2848		return REG_ESPACE;
2849	    }
2850	  /* Can the OP_OPEN_SUBEXP node arrive the OP_CLOSE_SUBEXP node
2851	     in the current context?  */
2852	  err = check_arrival (mctx, sub_top->path, sub_top->node,
2853			       sub_top->str_idx, cls_node, sl_str,
2854			       OP_CLOSE_SUBEXP);
2855	  if (err == REG_NOMATCH)
2856	      continue;
2857	  if (BE (err != REG_NOERROR, 0))
2858	      return err;
2859	  sub_last = match_ctx_add_sublast (sub_top, cls_node, sl_str);
2860	  if (BE (sub_last == NULL, 0))
2861	    return REG_ESPACE;
2862	  err = get_subexp_sub (mctx, sub_top, sub_last, bkref_node,
2863				bkref_str_idx);
2864	  if (err == REG_NOMATCH)
2865	    continue;
2866	}
2867    }
2868  return REG_NOERROR;
2869}
2870
2871/* Helper functions for get_subexp().  */
2872
2873/* Check SUB_LAST can arrive to the back reference BKREF_NODE at BKREF_STR.
2874   If it can arrive, register the sub expression expressed with SUB_TOP
2875   and SUB_LAST.  */
2876
2877static reg_errcode_t
2878internal_function
2879get_subexp_sub (re_match_context_t *mctx, const re_sub_match_top_t *sub_top,
2880		re_sub_match_last_t *sub_last, Idx bkref_node, Idx bkref_str)
2881{
2882  reg_errcode_t err;
2883  Idx to_idx;
2884  /* Can the subexpression arrive the back reference?  */
2885  err = check_arrival (mctx, &sub_last->path, sub_last->node,
2886		       sub_last->str_idx, bkref_node, bkref_str,
2887		       OP_OPEN_SUBEXP);
2888  if (err != REG_NOERROR)
2889    return err;
2890  err = match_ctx_add_entry (mctx, bkref_node, bkref_str, sub_top->str_idx,
2891			     sub_last->str_idx);
2892  if (BE (err != REG_NOERROR, 0))
2893    return err;
2894  to_idx = bkref_str + sub_last->str_idx - sub_top->str_idx;
2895  return clean_state_log_if_needed (mctx, to_idx);
2896}
2897
2898/* Find the first node which is '(' or ')' and whose index is SUBEXP_IDX.
2899   Search '(' if FL_OPEN, or search ')' otherwise.
2900   TODO: This function isn't efficient...
2901	 Because there might be more than one nodes whose types are
2902	 OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all
2903	 nodes.
2904	 E.g. RE: (a){2}  */
2905
2906static Idx
2907internal_function
2908find_subexp_node (const re_dfa_t *dfa, const re_node_set *nodes,
2909		  Idx subexp_idx, int type)
2910{
2911  Idx cls_idx;
2912  for (cls_idx = 0; cls_idx < nodes->nelem; ++cls_idx)
2913    {
2914      Idx cls_node = nodes->elems[cls_idx];
2915      const re_token_t *node = dfa->nodes + cls_node;
2916      if (node->type == type
2917	  && node->opr.idx == subexp_idx)
2918	return cls_node;
2919    }
2920  return REG_MISSING;
2921}
2922
2923/* Check whether the node TOP_NODE at TOP_STR can arrive to the node
2924   LAST_NODE at LAST_STR.  We record the path onto PATH since it will be
2925   heavily reused.
2926   Return REG_NOERROR if it can arrive, or REG_NOMATCH otherwise.  */
2927
2928static reg_errcode_t
2929internal_function
2930check_arrival (re_match_context_t *mctx, state_array_t *path, Idx top_node,
2931	       Idx top_str, Idx last_node, Idx last_str, int type)
2932{
2933  const re_dfa_t *const dfa = mctx->dfa;
2934  reg_errcode_t err = REG_NOERROR;
2935  Idx subexp_num, backup_cur_idx, str_idx, null_cnt;
2936  re_dfastate_t *cur_state = NULL;
2937  re_node_set *cur_nodes, next_nodes;
2938  re_dfastate_t **backup_state_log;
2939  unsigned int context;
2940
2941  subexp_num = dfa->nodes[top_node].opr.idx;
2942  /* Extend the buffer if we need.  */
2943  if (BE (path->alloc < last_str + mctx->max_mb_elem_len + 1, 0))
2944    {
2945      re_dfastate_t **new_array;
2946      Idx old_alloc = path->alloc;
2947      Idx new_alloc = old_alloc + last_str + mctx->max_mb_elem_len + 1;
2948      if (BE (new_alloc < old_alloc, 0)
2949	  || BE (SIZE_MAX / sizeof (re_dfastate_t *) < new_alloc, 0))
2950	return REG_ESPACE;
2951      new_array = re_realloc (path->array, re_dfastate_t *, new_alloc);
2952      if (BE (new_array == NULL, 0))
2953	return REG_ESPACE;
2954      path->array = new_array;
2955      path->alloc = new_alloc;
2956      memset (new_array + old_alloc, '\0',
2957	      sizeof (re_dfastate_t *) * (path->alloc - old_alloc));
2958    }
2959
2960  str_idx = path->next_idx ? path->next_idx : top_str;
2961
2962  /* Temporary modify MCTX.  */
2963  backup_state_log = mctx->state_log;
2964  backup_cur_idx = mctx->input.cur_idx;
2965  mctx->state_log = path->array;
2966  mctx->input.cur_idx = str_idx;
2967
2968  /* Setup initial node set.  */
2969  context = re_string_context_at (&mctx->input, str_idx - 1, mctx->eflags);
2970  if (str_idx == top_str)
2971    {
2972      err = re_node_set_init_1 (&next_nodes, top_node);
2973      if (BE (err != REG_NOERROR, 0))
2974	return err;
2975      err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, type);
2976      if (BE (err != REG_NOERROR, 0))
2977	{
2978	  re_node_set_free (&next_nodes);
2979	  return err;
2980	}
2981    }
2982  else
2983    {
2984      cur_state = mctx->state_log[str_idx];
2985      if (cur_state && cur_state->has_backref)
2986	{
2987	  err = re_node_set_init_copy (&next_nodes, &cur_state->nodes);
2988	  if (BE (err != REG_NOERROR, 0))
2989	    return err;
2990	}
2991      else
2992	re_node_set_init_empty (&next_nodes);
2993    }
2994  if (str_idx == top_str || (cur_state && cur_state->has_backref))
2995    {
2996      if (next_nodes.nelem)
2997	{
2998	  err = expand_bkref_cache (mctx, &next_nodes, str_idx,
2999				    subexp_num, type);
3000	  if (BE (err != REG_NOERROR, 0))
3001	    {
3002	      re_node_set_free (&next_nodes);
3003	      return err;
3004	    }
3005	}
3006      cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context);
3007      if (BE (cur_state == NULL && err != REG_NOERROR, 0))
3008	{
3009	  re_node_set_free (&next_nodes);
3010	  return err;
3011	}
3012      mctx->state_log[str_idx] = cur_state;
3013    }
3014
3015  for (null_cnt = 0; str_idx < last_str && null_cnt <= mctx->max_mb_elem_len;)
3016    {
3017      re_node_set_empty (&next_nodes);
3018      if (mctx->state_log[str_idx + 1])
3019	{
3020	  err = re_node_set_merge (&next_nodes,
3021				   &mctx->state_log[str_idx + 1]->nodes);
3022	  if (BE (err != REG_NOERROR, 0))
3023	    {
3024	      re_node_set_free (&next_nodes);
3025	      return err;
3026	    }
3027	}
3028      if (cur_state)
3029	{
3030	  err = check_arrival_add_next_nodes (mctx, str_idx,
3031					      &cur_state->non_eps_nodes,
3032					      &next_nodes);
3033	  if (BE (err != REG_NOERROR, 0))
3034	    {
3035	      re_node_set_free (&next_nodes);
3036	      return err;
3037	    }
3038	}
3039      ++str_idx;
3040      if (next_nodes.nelem)
3041	{
3042	  err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, type);
3043	  if (BE (err != REG_NOERROR, 0))
3044	    {
3045	      re_node_set_free (&next_nodes);
3046	      return err;
3047	    }
3048	  err = expand_bkref_cache (mctx, &next_nodes, str_idx,
3049				    subexp_num, type);
3050	  if (BE (err != REG_NOERROR, 0))
3051	    {
3052	      re_node_set_free (&next_nodes);
3053	      return err;
3054	    }
3055	}
3056      context = re_string_context_at (&mctx->input, str_idx - 1, mctx->eflags);
3057      cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context);
3058      if (BE (cur_state == NULL && err != REG_NOERROR, 0))
3059	{
3060	  re_node_set_free (&next_nodes);
3061	  return err;
3062	}
3063      mctx->state_log[str_idx] = cur_state;
3064      null_cnt = cur_state == NULL ? null_cnt + 1 : 0;
3065    }
3066  re_node_set_free (&next_nodes);
3067  cur_nodes = (mctx->state_log[last_str] == NULL ? NULL
3068	       : &mctx->state_log[last_str]->nodes);
3069  path->next_idx = str_idx;
3070
3071  /* Fix MCTX.  */
3072  mctx->state_log = backup_state_log;
3073  mctx->input.cur_idx = backup_cur_idx;
3074
3075  /* Then check the current node set has the node LAST_NODE.  */
3076  if (cur_nodes != NULL && re_node_set_contains (cur_nodes, last_node))
3077    return REG_NOERROR;
3078
3079  return REG_NOMATCH;
3080}
3081
3082/* Helper functions for check_arrival.  */
3083
3084/* Calculate the destination nodes of CUR_NODES at STR_IDX, and append them
3085   to NEXT_NODES.
3086   TODO: This function is similar to the functions transit_state*(),
3087	 however this function has many additional works.
3088	 Can't we unify them?  */
3089
3090static reg_errcode_t
3091internal_function
3092check_arrival_add_next_nodes (re_match_context_t *mctx, Idx str_idx,
3093			      re_node_set *cur_nodes, re_node_set *next_nodes)
3094{
3095  const re_dfa_t *const dfa = mctx->dfa;
3096  bool ok;
3097  Idx cur_idx;
3098#ifdef RE_ENABLE_I18N
3099  reg_errcode_t err = REG_NOERROR;
3100#endif
3101  re_node_set union_set;
3102  re_node_set_init_empty (&union_set);
3103  for (cur_idx = 0; cur_idx < cur_nodes->nelem; ++cur_idx)
3104    {
3105      int naccepted = 0;
3106      Idx cur_node = cur_nodes->elems[cur_idx];
3107#ifdef DEBUG
3108      re_token_type_t type = dfa->nodes[cur_node].type;
3109      assert (!IS_EPSILON_NODE (type));
3110#endif
3111#ifdef RE_ENABLE_I18N
3112      /* If the node may accept `multi byte'.  */
3113      if (dfa->nodes[cur_node].accept_mb)
3114	{
3115	  naccepted = check_node_accept_bytes (dfa, cur_node, &mctx->input,
3116					       str_idx);
3117	  if (naccepted > 1)
3118	    {
3119	      re_dfastate_t *dest_state;
3120	      Idx next_node = dfa->nexts[cur_node];
3121	      Idx next_idx = str_idx + naccepted;
3122	      dest_state = mctx->state_log[next_idx];
3123	      re_node_set_empty (&union_set);
3124	      if (dest_state)
3125		{
3126		  err = re_node_set_merge (&union_set, &dest_state->nodes);
3127		  if (BE (err != REG_NOERROR, 0))
3128		    {
3129		      re_node_set_free (&union_set);
3130		      return err;
3131		    }
3132		}
3133	      ok = re_node_set_insert (&union_set, next_node);
3134	      if (BE (! ok, 0))
3135		{
3136		  re_node_set_free (&union_set);
3137		  return REG_ESPACE;
3138		}
3139	      mctx->state_log[next_idx] = re_acquire_state (&err, dfa,
3140							    &union_set);
3141	      if (BE (mctx->state_log[next_idx] == NULL
3142		      && err != REG_NOERROR, 0))
3143		{
3144		  re_node_set_free (&union_set);
3145		  return err;
3146		}
3147	    }
3148	}
3149#endif /* RE_ENABLE_I18N */
3150      if (naccepted
3151	  || check_node_accept (mctx, dfa->nodes + cur_node, str_idx))
3152	{
3153	  ok = re_node_set_insert (next_nodes, dfa->nexts[cur_node]);
3154	  if (BE (! ok, 0))
3155	    {
3156	      re_node_set_free (&union_set);
3157	      return REG_ESPACE;
3158	    }
3159	}
3160    }
3161  re_node_set_free (&union_set);
3162  return REG_NOERROR;
3163}
3164
3165/* For all the nodes in CUR_NODES, add the epsilon closures of them to
3166   CUR_NODES, however exclude the nodes which are:
3167    - inside the sub expression whose number is EX_SUBEXP, if FL_OPEN.
3168    - out of the sub expression whose number is EX_SUBEXP, if !FL_OPEN.
3169*/
3170
3171static reg_errcode_t
3172internal_function
3173check_arrival_expand_ecl (const re_dfa_t *dfa, re_node_set *cur_nodes,
3174			  Idx ex_subexp, int type)
3175{
3176  reg_errcode_t err;
3177  Idx idx, outside_node;
3178  re_node_set new_nodes;
3179#ifdef DEBUG
3180  assert (cur_nodes->nelem);
3181#endif
3182  err = re_node_set_alloc (&new_nodes, cur_nodes->nelem);
3183  if (BE (err != REG_NOERROR, 0))
3184    return err;
3185  /* Create a new node set NEW_NODES with the nodes which are epsilon
3186     closures of the node in CUR_NODES.  */
3187
3188  for (idx = 0; idx < cur_nodes->nelem; ++idx)
3189    {
3190      Idx cur_node = cur_nodes->elems[idx];
3191      const re_node_set *eclosure = dfa->eclosures + cur_node;
3192      outside_node = find_subexp_node (dfa, eclosure, ex_subexp, type);
3193      if (outside_node == REG_MISSING)
3194	{
3195	  /* There are no problematic nodes, just merge them.  */
3196	  err = re_node_set_merge (&new_nodes, eclosure);
3197	  if (BE (err != REG_NOERROR, 0))
3198	    {
3199	      re_node_set_free (&new_nodes);
3200	      return err;
3201	    }
3202	}
3203      else
3204	{
3205	  /* There are problematic nodes, re-calculate incrementally.  */
3206	  err = check_arrival_expand_ecl_sub (dfa, &new_nodes, cur_node,
3207					      ex_subexp, type);
3208	  if (BE (err != REG_NOERROR, 0))
3209	    {
3210	      re_node_set_free (&new_nodes);
3211	      return err;
3212	    }
3213	}
3214    }
3215  re_node_set_free (cur_nodes);
3216  *cur_nodes = new_nodes;
3217  return REG_NOERROR;
3218}
3219
3220/* Helper function for check_arrival_expand_ecl.
3221   Check incrementally the epsilon closure of TARGET, and if it isn't
3222   problematic append it to DST_NODES.  */
3223
3224static reg_errcode_t
3225internal_function
3226check_arrival_expand_ecl_sub (const re_dfa_t *dfa, re_node_set *dst_nodes,
3227			      Idx target, Idx ex_subexp, int type)
3228{
3229  Idx cur_node;
3230  for (cur_node = target; !re_node_set_contains (dst_nodes, cur_node);)
3231    {
3232      bool ok;
3233
3234      if (dfa->nodes[cur_node].type == type
3235	  && dfa->nodes[cur_node].opr.idx == ex_subexp)
3236	{
3237	  if (type == OP_CLOSE_SUBEXP)
3238	    {
3239	      ok = re_node_set_insert (dst_nodes, cur_node);
3240	      if (BE (! ok, 0))
3241		return REG_ESPACE;
3242	    }
3243	  break;
3244	}
3245      ok = re_node_set_insert (dst_nodes, cur_node);
3246      if (BE (! ok, 0))
3247	return REG_ESPACE;
3248      if (dfa->edests[cur_node].nelem == 0)
3249	break;
3250      if (dfa->edests[cur_node].nelem == 2)
3251	{
3252	  reg_errcode_t err;
3253	  err = check_arrival_expand_ecl_sub (dfa, dst_nodes,
3254					      dfa->edests[cur_node].elems[1],
3255					      ex_subexp, type);
3256	  if (BE (err != REG_NOERROR, 0))
3257	    return err;
3258	}
3259      cur_node = dfa->edests[cur_node].elems[0];
3260    }
3261  return REG_NOERROR;
3262}
3263
3264
3265/* For all the back references in the current state, calculate the
3266   destination of the back references by the appropriate entry
3267   in MCTX->BKREF_ENTS.  */
3268
3269static reg_errcode_t
3270internal_function
3271expand_bkref_cache (re_match_context_t *mctx, re_node_set *cur_nodes,
3272		    Idx cur_str, Idx subexp_num, int type)
3273{
3274  const re_dfa_t *const dfa = mctx->dfa;
3275  reg_errcode_t err;
3276  Idx cache_idx_start = search_cur_bkref_entry (mctx, cur_str);
3277  struct re_backref_cache_entry *ent;
3278
3279  if (cache_idx_start == REG_MISSING)
3280    return REG_NOERROR;
3281
3282 restart:
3283  ent = mctx->bkref_ents + cache_idx_start;
3284  do
3285    {
3286      Idx to_idx, next_node;
3287
3288      /* Is this entry ENT is appropriate?  */
3289      if (!re_node_set_contains (cur_nodes, ent->node))
3290	continue; /* No.  */
3291
3292      to_idx = cur_str + ent->subexp_to - ent->subexp_from;
3293      /* Calculate the destination of the back reference, and append it
3294	 to MCTX->STATE_LOG.  */
3295      if (to_idx == cur_str)
3296	{
3297	  /* The backreference did epsilon transit, we must re-check all the
3298	     node in the current state.  */
3299	  re_node_set new_dests;
3300	  reg_errcode_t err2, err3;
3301	  next_node = dfa->edests[ent->node].elems[0];
3302	  if (re_node_set_contains (cur_nodes, next_node))
3303	    continue;
3304	  err = re_node_set_init_1 (&new_dests, next_node);
3305	  err2 = check_arrival_expand_ecl (dfa, &new_dests, subexp_num, type);
3306	  err3 = re_node_set_merge (cur_nodes, &new_dests);
3307	  re_node_set_free (&new_dests);
3308	  if (BE (err != REG_NOERROR || err2 != REG_NOERROR
3309		  || err3 != REG_NOERROR, 0))
3310	    {
3311	      err = (err != REG_NOERROR ? err
3312		     : (err2 != REG_NOERROR ? err2 : err3));
3313	      return err;
3314	    }
3315	  /* TODO: It is still inefficient...  */
3316	  goto restart;
3317	}
3318      else
3319	{
3320	  re_node_set union_set;
3321	  next_node = dfa->nexts[ent->node];
3322	  if (mctx->state_log[to_idx])
3323	    {
3324	      bool ok;
3325	      if (re_node_set_contains (&mctx->state_log[to_idx]->nodes,
3326					next_node))
3327		continue;
3328	      err = re_node_set_init_copy (&union_set,
3329					   &mctx->state_log[to_idx]->nodes);
3330	      ok = re_node_set_insert (&union_set, next_node);
3331	      if (BE (err != REG_NOERROR || ! ok, 0))
3332		{
3333		  re_node_set_free (&union_set);
3334		  err = err != REG_NOERROR ? err : REG_ESPACE;
3335		  return err;
3336		}
3337	    }
3338	  else
3339	    {
3340	      err = re_node_set_init_1 (&union_set, next_node);
3341	      if (BE (err != REG_NOERROR, 0))
3342		return err;
3343	    }
3344	  mctx->state_log[to_idx] = re_acquire_state (&err, dfa, &union_set);
3345	  re_node_set_free (&union_set);
3346	  if (BE (mctx->state_log[to_idx] == NULL
3347		  && err != REG_NOERROR, 0))
3348	    return err;
3349	}
3350    }
3351  while (ent++->more);
3352  return REG_NOERROR;
3353}
3354
3355/* Build transition table for the state.
3356   Return true if successful.  */
3357
3358static bool
3359internal_function
3360build_trtable (const re_dfa_t *dfa, re_dfastate_t *state)
3361{
3362  reg_errcode_t err;
3363  Idx i, j;
3364  int ch;
3365  bool need_word_trtable = false;
3366  bitset_word_t elem, mask;
3367  bool dests_node_malloced = false;
3368  bool dest_states_malloced = false;
3369  Idx ndests; /* Number of the destination states from `state'.  */
3370  re_dfastate_t **trtable;
3371  re_dfastate_t **dest_states = NULL, **dest_states_word, **dest_states_nl;
3372  re_node_set follows, *dests_node;
3373  bitset_t *dests_ch;
3374  bitset_t acceptable;
3375
3376  struct dests_alloc
3377  {
3378    re_node_set dests_node[SBC_MAX];
3379    bitset_t dests_ch[SBC_MAX];
3380  } *dests_alloc;
3381
3382  /* We build DFA states which corresponds to the destination nodes
3383     from `state'.  `dests_node[i]' represents the nodes which i-th
3384     destination state contains, and `dests_ch[i]' represents the
3385     characters which i-th destination state accepts.  */
3386  if (__libc_use_alloca (sizeof (struct dests_alloc)))
3387    dests_alloc = (struct dests_alloc *) alloca (sizeof (struct dests_alloc));
3388  else
3389    {
3390      dests_alloc = re_malloc (struct dests_alloc, 1);
3391      if (BE (dests_alloc == NULL, 0))
3392	return false;
3393      dests_node_malloced = true;
3394    }
3395  dests_node = dests_alloc->dests_node;
3396  dests_ch = dests_alloc->dests_ch;
3397
3398  /* Initialize transiton table.  */
3399  state->word_trtable = state->trtable = NULL;
3400
3401  /* At first, group all nodes belonging to `state' into several
3402     destinations.  */
3403  ndests = group_nodes_into_DFAstates (dfa, state, dests_node, dests_ch);
3404  if (BE (! REG_VALID_NONZERO_INDEX (ndests), 0))
3405    {
3406      if (dests_node_malloced)
3407	free (dests_alloc);
3408      if (ndests == 0)
3409	{
3410	  state->trtable = (re_dfastate_t **)
3411	    calloc (sizeof (re_dfastate_t *), SBC_MAX);
3412	  return true;
3413	}
3414      return false;
3415    }
3416
3417  err = re_node_set_alloc (&follows, ndests + 1);
3418  if (BE (err != REG_NOERROR, 0))
3419    goto out_free;
3420
3421  /* Avoid arithmetic overflow in size calculation.  */
3422  if (BE ((((SIZE_MAX - (sizeof (re_node_set) + sizeof (bitset_t)) * SBC_MAX)
3423	    / (3 * sizeof (re_dfastate_t *)))
3424	   < ndests),
3425	  0))
3426    goto out_free;
3427
3428  if (__libc_use_alloca ((sizeof (re_node_set) + sizeof (bitset_t)) * SBC_MAX
3429			 + ndests * 3 * sizeof (re_dfastate_t *)))
3430    dest_states = (re_dfastate_t **)
3431      alloca (ndests * 3 * sizeof (re_dfastate_t *));
3432  else
3433    {
3434      dest_states = (re_dfastate_t **)
3435	malloc (ndests * 3 * sizeof (re_dfastate_t *));
3436      if (BE (dest_states == NULL, 0))
3437	{
3438out_free:
3439	  if (dest_states_malloced)
3440	    free (dest_states);
3441	  re_node_set_free (&follows);
3442	  for (i = 0; i < ndests; ++i)
3443	    re_node_set_free (dests_node + i);
3444	  if (dests_node_malloced)
3445	    free (dests_alloc);
3446	  return false;
3447	}
3448      dest_states_malloced = true;
3449    }
3450  dest_states_word = dest_states + ndests;
3451  dest_states_nl = dest_states_word + ndests;
3452  bitset_empty (acceptable);
3453
3454  /* Then build the states for all destinations.  */
3455  for (i = 0; i < ndests; ++i)
3456    {
3457      Idx next_node;
3458      re_node_set_empty (&follows);
3459      /* Merge the follows of this destination states.  */
3460      for (j = 0; j < dests_node[i].nelem; ++j)
3461	{
3462	  next_node = dfa->nexts[dests_node[i].elems[j]];
3463	  if (next_node != REG_MISSING)
3464	    {
3465	      err = re_node_set_merge (&follows, dfa->eclosures + next_node);
3466	      if (BE (err != REG_NOERROR, 0))
3467		goto out_free;
3468	    }
3469	}
3470      dest_states[i] = re_acquire_state_context (&err, dfa, &follows, 0);
3471      if (BE (dest_states[i] == NULL && err != REG_NOERROR, 0))
3472	goto out_free;
3473      /* If the new state has context constraint,
3474	 build appropriate states for these contexts.  */
3475      if (dest_states[i]->has_constraint)
3476	{
3477	  dest_states_word[i] = re_acquire_state_context (&err, dfa, &follows,
3478							  CONTEXT_WORD);
3479	  if (BE (dest_states_word[i] == NULL && err != REG_NOERROR, 0))
3480	    goto out_free;
3481
3482	  if (dest_states[i] != dest_states_word[i] && dfa->mb_cur_max > 1)
3483	    need_word_trtable = true;
3484
3485	  dest_states_nl[i] = re_acquire_state_context (&err, dfa, &follows,
3486							CONTEXT_NEWLINE);
3487	  if (BE (dest_states_nl[i] == NULL && err != REG_NOERROR, 0))
3488	    goto out_free;
3489	}
3490      else
3491	{
3492	  dest_states_word[i] = dest_states[i];
3493	  dest_states_nl[i] = dest_states[i];
3494	}
3495      bitset_merge (acceptable, dests_ch[i]);
3496    }
3497
3498  if (!BE (need_word_trtable, 0))
3499    {
3500      /* We don't care about whether the following character is a word
3501	 character, or we are in a single-byte character set so we can
3502	 discern by looking at the character code: allocate a
3503	 256-entry transition table.  */
3504      trtable = state->trtable =
3505	(re_dfastate_t **) calloc (sizeof (re_dfastate_t *), SBC_MAX);
3506      if (BE (trtable == NULL, 0))
3507	goto out_free;
3508
3509      /* For all characters ch...:  */
3510      for (i = 0; i < BITSET_WORDS; ++i)
3511	for (ch = i * BITSET_WORD_BITS, elem = acceptable[i], mask = 1;
3512	     elem;
3513	     mask <<= 1, elem >>= 1, ++ch)
3514	  if (BE (elem & 1, 0))
3515	    {
3516	      /* There must be exactly one destination which accepts
3517		 character ch.  See group_nodes_into_DFAstates.  */
3518	      for (j = 0; (dests_ch[j][i] & mask) == 0; ++j)
3519		;
3520
3521	      /* j-th destination accepts the word character ch.  */
3522	      if (dfa->word_char[i] & mask)
3523		trtable[ch] = dest_states_word[j];
3524	      else
3525		trtable[ch] = dest_states[j];
3526	    }
3527    }
3528  else
3529    {
3530      /* We care about whether the following character is a word
3531	 character, and we are in a multi-byte character set: discern
3532	 by looking at the character code: build two 256-entry
3533	 transition tables, one starting at trtable[0] and one
3534	 starting at trtable[SBC_MAX].  */
3535      trtable = state->word_trtable =
3536	(re_dfastate_t **) calloc (sizeof (re_dfastate_t *), 2 * SBC_MAX);
3537      if (BE (trtable == NULL, 0))
3538	goto out_free;
3539
3540      /* For all characters ch...:  */
3541      for (i = 0; i < BITSET_WORDS; ++i)
3542	for (ch = i * BITSET_WORD_BITS, elem = acceptable[i], mask = 1;
3543	     elem;
3544	     mask <<= 1, elem >>= 1, ++ch)
3545	  if (BE (elem & 1, 0))
3546	    {
3547	      /* There must be exactly one destination which accepts
3548		 character ch.  See group_nodes_into_DFAstates.  */
3549	      for (j = 0; (dests_ch[j][i] & mask) == 0; ++j)
3550		;
3551
3552	      /* j-th destination accepts the word character ch.  */
3553	      trtable[ch] = dest_states[j];
3554	      trtable[ch + SBC_MAX] = dest_states_word[j];
3555	    }
3556    }
3557
3558  /* new line */
3559  if (bitset_contain (acceptable, NEWLINE_CHAR))
3560    {
3561      /* The current state accepts newline character.  */
3562      for (j = 0; j < ndests; ++j)
3563	if (bitset_contain (dests_ch[j], NEWLINE_CHAR))
3564	  {
3565	    /* k-th destination accepts newline character.  */
3566	    trtable[NEWLINE_CHAR] = dest_states_nl[j];
3567	    if (need_word_trtable)
3568	      trtable[NEWLINE_CHAR + SBC_MAX] = dest_states_nl[j];
3569	    /* There must be only one destination which accepts
3570	       newline.  See group_nodes_into_DFAstates.  */
3571	    break;
3572	  }
3573    }
3574
3575  if (dest_states_malloced)
3576    free (dest_states);
3577
3578  re_node_set_free (&follows);
3579  for (i = 0; i < ndests; ++i)
3580    re_node_set_free (dests_node + i);
3581
3582  if (dests_node_malloced)
3583    free (dests_alloc);
3584
3585  return true;
3586}
3587
3588/* Group all nodes belonging to STATE into several destinations.
3589   Then for all destinations, set the nodes belonging to the destination
3590   to DESTS_NODE[i] and set the characters accepted by the destination
3591   to DEST_CH[i].  This function return the number of destinations.  */
3592
3593static Idx
3594internal_function
3595group_nodes_into_DFAstates (const re_dfa_t *dfa, const re_dfastate_t *state,
3596			    re_node_set *dests_node, bitset_t *dests_ch)
3597{
3598  reg_errcode_t err;
3599  bool ok;
3600  Idx i, j, k;
3601  Idx ndests; /* Number of the destinations from `state'.  */
3602  bitset_t accepts; /* Characters a node can accept.  */
3603  const re_node_set *cur_nodes = &state->nodes;
3604  bitset_empty (accepts);
3605  ndests = 0;
3606
3607  /* For all the nodes belonging to `state',  */
3608  for (i = 0; i < cur_nodes->nelem; ++i)
3609    {
3610      re_token_t *node = &dfa->nodes[cur_nodes->elems[i]];
3611      re_token_type_t type = node->type;
3612      unsigned int constraint = node->constraint;
3613
3614      /* Enumerate all single byte character this node can accept.  */
3615      if (type == CHARACTER)
3616	bitset_set (accepts, node->opr.c);
3617      else if (type == SIMPLE_BRACKET)
3618	{
3619	  bitset_merge (accepts, node->opr.sbcset);
3620	}
3621      else if (type == OP_PERIOD)
3622	{
3623#ifdef RE_ENABLE_I18N
3624	  if (dfa->mb_cur_max > 1)
3625	    bitset_merge (accepts, dfa->sb_char);
3626	  else
3627#endif
3628	    bitset_set_all (accepts);
3629	  if (!(dfa->syntax & RE_DOT_NEWLINE))
3630	    bitset_clear (accepts, '\n');
3631	  if (dfa->syntax & RE_DOT_NOT_NULL)
3632	    bitset_clear (accepts, '\0');
3633	}
3634#ifdef RE_ENABLE_I18N
3635      else if (type == OP_UTF8_PERIOD)
3636        {
3637	  if (ASCII_CHARS % BITSET_WORD_BITS == 0)
3638	    memset (accepts, -1, ASCII_CHARS / CHAR_BIT);
3639	  else
3640	    bitset_merge (accepts, utf8_sb_map);
3641	  if (!(dfa->syntax & RE_DOT_NEWLINE))
3642	    bitset_clear (accepts, '\n');
3643	  if (dfa->syntax & RE_DOT_NOT_NULL)
3644	    bitset_clear (accepts, '\0');
3645        }
3646#endif
3647      else
3648	continue;
3649
3650      /* Check the `accepts' and sift the characters which are not
3651	 match it the context.  */
3652      if (constraint)
3653	{
3654	  if (constraint & NEXT_NEWLINE_CONSTRAINT)
3655	    {
3656	      bool accepts_newline = bitset_contain (accepts, NEWLINE_CHAR);
3657	      bitset_empty (accepts);
3658	      if (accepts_newline)
3659		bitset_set (accepts, NEWLINE_CHAR);
3660	      else
3661		continue;
3662	    }
3663	  if (constraint & NEXT_ENDBUF_CONSTRAINT)
3664	    {
3665	      bitset_empty (accepts);
3666	      continue;
3667	    }
3668
3669	  if (constraint & NEXT_WORD_CONSTRAINT)
3670	    {
3671	      bitset_word_t any_set = 0;
3672	      if (type == CHARACTER && !node->word_char)
3673		{
3674		  bitset_empty (accepts);
3675		  continue;
3676		}
3677#ifdef RE_ENABLE_I18N
3678	      if (dfa->mb_cur_max > 1)
3679		for (j = 0; j < BITSET_WORDS; ++j)
3680		  any_set |= (accepts[j] &= (dfa->word_char[j] | ~dfa->sb_char[j]));
3681	      else
3682#endif
3683		for (j = 0; j < BITSET_WORDS; ++j)
3684		  any_set |= (accepts[j] &= dfa->word_char[j]);
3685	      if (!any_set)
3686		continue;
3687	    }
3688	  if (constraint & NEXT_NOTWORD_CONSTRAINT)
3689	    {
3690	      bitset_word_t any_set = 0;
3691	      if (type == CHARACTER && node->word_char)
3692		{
3693		  bitset_empty (accepts);
3694		  continue;
3695		}
3696#ifdef RE_ENABLE_I18N
3697	      if (dfa->mb_cur_max > 1)
3698		for (j = 0; j < BITSET_WORDS; ++j)
3699		  any_set |= (accepts[j] &= ~(dfa->word_char[j] & dfa->sb_char[j]));
3700	      else
3701#endif
3702		for (j = 0; j < BITSET_WORDS; ++j)
3703		  any_set |= (accepts[j] &= ~dfa->word_char[j]);
3704	      if (!any_set)
3705		continue;
3706	    }
3707	}
3708
3709      /* Then divide `accepts' into DFA states, or create a new
3710	 state.  Above, we make sure that accepts is not empty.  */
3711      for (j = 0; j < ndests; ++j)
3712	{
3713	  bitset_t intersec; /* Intersection sets, see below.  */
3714	  bitset_t remains;
3715	  /* Flags, see below.  */
3716	  bitset_word_t has_intersec, not_subset, not_consumed;
3717
3718	  /* Optimization, skip if this state doesn't accept the character.  */
3719	  if (type == CHARACTER && !bitset_contain (dests_ch[j], node->opr.c))
3720	    continue;
3721
3722	  /* Enumerate the intersection set of this state and `accepts'.  */
3723	  has_intersec = 0;
3724	  for (k = 0; k < BITSET_WORDS; ++k)
3725	    has_intersec |= intersec[k] = accepts[k] & dests_ch[j][k];
3726	  /* And skip if the intersection set is empty.  */
3727	  if (!has_intersec)
3728	    continue;
3729
3730	  /* Then check if this state is a subset of `accepts'.  */
3731	  not_subset = not_consumed = 0;
3732	  for (k = 0; k < BITSET_WORDS; ++k)
3733	    {
3734	      not_subset |= remains[k] = ~accepts[k] & dests_ch[j][k];
3735	      not_consumed |= accepts[k] = accepts[k] & ~dests_ch[j][k];
3736	    }
3737
3738	  /* If this state isn't a subset of `accepts', create a
3739	     new group state, which has the `remains'. */
3740	  if (not_subset)
3741	    {
3742	      bitset_copy (dests_ch[ndests], remains);
3743	      bitset_copy (dests_ch[j], intersec);
3744	      err = re_node_set_init_copy (dests_node + ndests, &dests_node[j]);
3745	      if (BE (err != REG_NOERROR, 0))
3746		goto error_return;
3747	      ++ndests;
3748	    }
3749
3750	  /* Put the position in the current group. */
3751	  ok = re_node_set_insert (&dests_node[j], cur_nodes->elems[i]);
3752	  if (BE (! ok, 0))
3753	    goto error_return;
3754
3755	  /* If all characters are consumed, go to next node. */
3756	  if (!not_consumed)
3757	    break;
3758	}
3759      /* Some characters remain, create a new group. */
3760      if (j == ndests)
3761	{
3762	  bitset_copy (dests_ch[ndests], accepts);
3763	  err = re_node_set_init_1 (dests_node + ndests, cur_nodes->elems[i]);
3764	  if (BE (err != REG_NOERROR, 0))
3765	    goto error_return;
3766	  ++ndests;
3767	  bitset_empty (accepts);
3768	}
3769    }
3770  return ndests;
3771 error_return:
3772  for (j = 0; j < ndests; ++j)
3773    re_node_set_free (dests_node + j);
3774  return REG_MISSING;
3775}
3776
3777#ifdef RE_ENABLE_I18N
3778/* Check how many bytes the node `dfa->nodes[node_idx]' accepts.
3779   Return the number of the bytes the node accepts.
3780   STR_IDX is the current index of the input string.
3781
3782   This function handles the nodes which can accept one character, or
3783   one collating element like '.', '[a-z]', opposite to the other nodes
3784   can only accept one byte.  */
3785
3786static int
3787internal_function
3788check_node_accept_bytes (const re_dfa_t *dfa, Idx node_idx,
3789			 const re_string_t *input, Idx str_idx)
3790{
3791  const re_token_t *node = dfa->nodes + node_idx;
3792  int char_len, elem_len;
3793  Idx i;
3794
3795  if (BE (node->type == OP_UTF8_PERIOD, 0))
3796    {
3797      unsigned char c = re_string_byte_at (input, str_idx), d;
3798      if (BE (c < 0xc2, 1))
3799	return 0;
3800
3801      if (str_idx + 2 > input->len)
3802	return 0;
3803
3804      d = re_string_byte_at (input, str_idx + 1);
3805      if (c < 0xe0)
3806	return (d < 0x80 || d > 0xbf) ? 0 : 2;
3807      else if (c < 0xf0)
3808	{
3809	  char_len = 3;
3810	  if (c == 0xe0 && d < 0xa0)
3811	    return 0;
3812	}
3813      else if (c < 0xf8)
3814	{
3815	  char_len = 4;
3816	  if (c == 0xf0 && d < 0x90)
3817	    return 0;
3818	}
3819      else if (c < 0xfc)
3820	{
3821	  char_len = 5;
3822	  if (c == 0xf8 && d < 0x88)
3823	    return 0;
3824	}
3825      else if (c < 0xfe)
3826	{
3827	  char_len = 6;
3828	  if (c == 0xfc && d < 0x84)
3829	    return 0;
3830	}
3831      else
3832	return 0;
3833
3834      if (str_idx + char_len > input->len)
3835	return 0;
3836
3837      for (i = 1; i < char_len; ++i)
3838	{
3839	  d = re_string_byte_at (input, str_idx + i);
3840	  if (d < 0x80 || d > 0xbf)
3841	    return 0;
3842	}
3843      return char_len;
3844    }
3845
3846  char_len = re_string_char_size_at (input, str_idx);
3847  if (node->type == OP_PERIOD)
3848    {
3849      if (char_len <= 1)
3850        return 0;
3851      /* FIXME: I don't think this if is needed, as both '\n'
3852	 and '\0' are char_len == 1.  */
3853      /* '.' accepts any one character except the following two cases.  */
3854      if ((!(dfa->syntax & RE_DOT_NEWLINE) &&
3855	   re_string_byte_at (input, str_idx) == '\n') ||
3856	  ((dfa->syntax & RE_DOT_NOT_NULL) &&
3857	   re_string_byte_at (input, str_idx) == '\0'))
3858	return 0;
3859      return char_len;
3860    }
3861
3862  elem_len = re_string_elem_size_at (input, str_idx);
3863  if ((elem_len <= 1 && char_len <= 1) || char_len == 0)
3864    return 0;
3865
3866  if (node->type == COMPLEX_BRACKET)
3867    {
3868      const re_charset_t *cset = node->opr.mbcset;
3869# ifdef _LIBC
3870      const unsigned char *pin
3871	= ((const unsigned char *) re_string_get_buffer (input) + str_idx);
3872      Idx j;
3873      uint32_t nrules;
3874# endif /* _LIBC */
3875      int match_len = 0;
3876      wchar_t wc = ((cset->nranges || cset->nchar_classes || cset->nmbchars)
3877		    ? re_string_wchar_at (input, str_idx) : 0);
3878
3879      /* match with multibyte character?  */
3880      for (i = 0; i < cset->nmbchars; ++i)
3881	if (wc == cset->mbchars[i])
3882	  {
3883	    match_len = char_len;
3884	    goto check_node_accept_bytes_match;
3885	  }
3886      /* match with character_class?  */
3887      for (i = 0; i < cset->nchar_classes; ++i)
3888	{
3889	  wctype_t wt = cset->char_classes[i];
3890	  if (__iswctype (wc, wt))
3891	    {
3892	      match_len = char_len;
3893	      goto check_node_accept_bytes_match;
3894	    }
3895	}
3896
3897# ifdef _LIBC
3898      nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
3899      if (nrules != 0)
3900	{
3901	  unsigned int in_collseq = 0;
3902	  const int32_t *table, *indirect;
3903	  const unsigned char *weights, *extra;
3904	  const char *collseqwc;
3905	  int32_t idx;
3906	  /* This #include defines a local function!  */
3907#  include <locale/weight.h>
3908
3909	  /* match with collating_symbol?  */
3910	  if (cset->ncoll_syms)
3911	    extra = (const unsigned char *)
3912	      _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
3913	  for (i = 0; i < cset->ncoll_syms; ++i)
3914	    {
3915	      const unsigned char *coll_sym = extra + cset->coll_syms[i];
3916	      /* Compare the length of input collating element and
3917		 the length of current collating element.  */
3918	      if (*coll_sym != elem_len)
3919		continue;
3920	      /* Compare each bytes.  */
3921	      for (j = 0; j < *coll_sym; j++)
3922		if (pin[j] != coll_sym[1 + j])
3923		  break;
3924	      if (j == *coll_sym)
3925		{
3926		  /* Match if every bytes is equal.  */
3927		  match_len = j;
3928		  goto check_node_accept_bytes_match;
3929		}
3930	    }
3931
3932	  if (cset->nranges)
3933	    {
3934	      if (elem_len <= char_len)
3935		{
3936		  collseqwc = _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQWC);
3937		  in_collseq = __collseq_table_lookup (collseqwc, wc);
3938		}
3939	      else
3940		in_collseq = find_collation_sequence_value (pin, elem_len);
3941	    }
3942	  /* match with range expression?  */
3943	  for (i = 0; i < cset->nranges; ++i)
3944	    if (cset->range_starts[i] <= in_collseq
3945		&& in_collseq <= cset->range_ends[i])
3946	      {
3947		match_len = elem_len;
3948		goto check_node_accept_bytes_match;
3949	      }
3950
3951	  /* match with equivalence_class?  */
3952	  if (cset->nequiv_classes)
3953	    {
3954	      const unsigned char *cp = pin;
3955	      table = (const int32_t *)
3956		_NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
3957	      weights = (const unsigned char *)
3958		_NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
3959	      extra = (const unsigned char *)
3960		_NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
3961	      indirect = (const int32_t *)
3962		_NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
3963	      int32_t idx = findidx (&cp);
3964	      if (idx > 0)
3965		for (i = 0; i < cset->nequiv_classes; ++i)
3966		  {
3967		    int32_t equiv_class_idx = cset->equiv_classes[i];
3968		    size_t weight_len = weights[idx & 0xffffff];
3969		    if (weight_len == weights[equiv_class_idx & 0xffffff]
3970			&& (idx >> 24) == (equiv_class_idx >> 24))
3971		      {
3972			Idx cnt = 0;
3973
3974			idx &= 0xffffff;
3975			equiv_class_idx &= 0xffffff;
3976
3977			while (cnt <= weight_len
3978			       && (weights[equiv_class_idx + 1 + cnt]
3979				   == weights[idx + 1 + cnt]))
3980			  ++cnt;
3981			if (cnt > weight_len)
3982			  {
3983			    match_len = elem_len;
3984			    goto check_node_accept_bytes_match;
3985			  }
3986		      }
3987		  }
3988	    }
3989	}
3990      else
3991# endif /* _LIBC */
3992	{
3993	  /* match with range expression?  */
3994#if __GNUC__ >= 2 && ! (__STDC_VERSION__ < 199901L && __STRICT_ANSI__)
3995	  wchar_t cmp_buf[] = {L'\0', L'\0', wc, L'\0', L'\0', L'\0'};
3996#else
3997	  wchar_t cmp_buf[] = {L'\0', L'\0', L'\0', L'\0', L'\0', L'\0'};
3998	  cmp_buf[2] = wc;
3999#endif
4000	  for (i = 0; i < cset->nranges; ++i)
4001	    {
4002	      cmp_buf[0] = cset->range_starts[i];
4003	      cmp_buf[4] = cset->range_ends[i];
4004	      if (wcscoll (cmp_buf, cmp_buf + 2) <= 0
4005		  && wcscoll (cmp_buf + 2, cmp_buf + 4) <= 0)
4006		{
4007		  match_len = char_len;
4008		  goto check_node_accept_bytes_match;
4009		}
4010	    }
4011	}
4012    check_node_accept_bytes_match:
4013      if (!cset->non_match)
4014	return match_len;
4015      else
4016	{
4017	  if (match_len > 0)
4018	    return 0;
4019	  else
4020	    return (elem_len > char_len) ? elem_len : char_len;
4021	}
4022    }
4023  return 0;
4024}
4025
4026# ifdef _LIBC
4027static unsigned int
4028internal_function
4029find_collation_sequence_value (const unsigned char *mbs, size_t mbs_len)
4030{
4031  uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
4032  if (nrules == 0)
4033    {
4034      if (mbs_len == 1)
4035	{
4036	  /* No valid character.  Match it as a single byte character.  */
4037	  const unsigned char *collseq = (const unsigned char *)
4038	    _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQMB);
4039	  return collseq[mbs[0]];
4040	}
4041      return UINT_MAX;
4042    }
4043  else
4044    {
4045      int32_t idx;
4046      const unsigned char *extra = (const unsigned char *)
4047	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
4048      int32_t extrasize = (const unsigned char *)
4049	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB + 1) - extra;
4050
4051      for (idx = 0; idx < extrasize;)
4052	{
4053	  int mbs_cnt;
4054	  bool found = false;
4055	  int32_t elem_mbs_len;
4056	  /* Skip the name of collating element name.  */
4057	  idx = idx + extra[idx] + 1;
4058	  elem_mbs_len = extra[idx++];
4059	  if (mbs_len == elem_mbs_len)
4060	    {
4061	      for (mbs_cnt = 0; mbs_cnt < elem_mbs_len; ++mbs_cnt)
4062		if (extra[idx + mbs_cnt] != mbs[mbs_cnt])
4063		  break;
4064	      if (mbs_cnt == elem_mbs_len)
4065		/* Found the entry.  */
4066		found = true;
4067	    }
4068	  /* Skip the byte sequence of the collating element.  */
4069	  idx += elem_mbs_len;
4070	  /* Adjust for the alignment.  */
4071	  idx = (idx + 3) & ~3;
4072	  /* Skip the collation sequence value.  */
4073	  idx += sizeof (uint32_t);
4074	  /* Skip the wide char sequence of the collating element.  */
4075	  idx = idx + sizeof (uint32_t) * (extra[idx] + 1);
4076	  /* If we found the entry, return the sequence value.  */
4077	  if (found)
4078	    return *(uint32_t *) (extra + idx);
4079	  /* Skip the collation sequence value.  */
4080	  idx += sizeof (uint32_t);
4081	}
4082      return UINT_MAX;
4083    }
4084}
4085# endif /* _LIBC */
4086#endif /* RE_ENABLE_I18N */
4087
4088/* Check whether the node accepts the byte which is IDX-th
4089   byte of the INPUT.  */
4090
4091static bool
4092internal_function
4093check_node_accept (const re_match_context_t *mctx, const re_token_t *node,
4094		   Idx idx)
4095{
4096  unsigned char ch;
4097  ch = re_string_byte_at (&mctx->input, idx);
4098  switch (node->type)
4099    {
4100    case CHARACTER:
4101      if (node->opr.c != ch)
4102        return false;
4103      break;
4104
4105    case SIMPLE_BRACKET:
4106      if (!bitset_contain (node->opr.sbcset, ch))
4107        return false;
4108      break;
4109
4110#ifdef RE_ENABLE_I18N
4111    case OP_UTF8_PERIOD:
4112      if (ch >= ASCII_CHARS)
4113        return false;
4114      /* FALLTHROUGH */
4115#endif
4116    case OP_PERIOD:
4117      if ((ch == '\n' && !(mctx->dfa->syntax & RE_DOT_NEWLINE))
4118	  || (ch == '\0' && (mctx->dfa->syntax & RE_DOT_NOT_NULL)))
4119	return false;
4120      break;
4121
4122    default:
4123      return false;
4124    }
4125
4126  if (node->constraint)
4127    {
4128      /* The node has constraints.  Check whether the current context
4129	 satisfies the constraints.  */
4130      unsigned int context = re_string_context_at (&mctx->input, idx,
4131						   mctx->eflags);
4132      if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context))
4133	return false;
4134    }
4135
4136  return true;
4137}
4138
4139/* Extend the buffers, if the buffers have run out.  */
4140
4141static reg_errcode_t
4142internal_function
4143extend_buffers (re_match_context_t *mctx)
4144{
4145  reg_errcode_t ret;
4146  re_string_t *pstr = &mctx->input;
4147
4148  /* Avoid overflow.  */
4149  if (BE (SIZE_MAX / 2 / sizeof (re_dfastate_t *) <= pstr->bufs_len, 0))
4150    return REG_ESPACE;
4151
4152  /* Double the lengthes of the buffers.  */
4153  ret = re_string_realloc_buffers (pstr, pstr->bufs_len * 2);
4154  if (BE (ret != REG_NOERROR, 0))
4155    return ret;
4156
4157  if (mctx->state_log != NULL)
4158    {
4159      /* And double the length of state_log.  */
4160      /* XXX We have no indication of the size of this buffer.  If this
4161	 allocation fail we have no indication that the state_log array
4162	 does not have the right size.  */
4163      re_dfastate_t **new_array = re_realloc (mctx->state_log, re_dfastate_t *,
4164					      pstr->bufs_len + 1);
4165      if (BE (new_array == NULL, 0))
4166	return REG_ESPACE;
4167      mctx->state_log = new_array;
4168    }
4169
4170  /* Then reconstruct the buffers.  */
4171  if (pstr->icase)
4172    {
4173#ifdef RE_ENABLE_I18N
4174      if (pstr->mb_cur_max > 1)
4175	{
4176	  ret = build_wcs_upper_buffer (pstr);
4177	  if (BE (ret != REG_NOERROR, 0))
4178	    return ret;
4179	}
4180      else
4181#endif /* RE_ENABLE_I18N  */
4182	build_upper_buffer (pstr);
4183    }
4184  else
4185    {
4186#ifdef RE_ENABLE_I18N
4187      if (pstr->mb_cur_max > 1)
4188	build_wcs_buffer (pstr);
4189      else
4190#endif /* RE_ENABLE_I18N  */
4191	{
4192	  if (pstr->trans != NULL)
4193	    re_string_translate_buffer (pstr);
4194	}
4195    }
4196  return REG_NOERROR;
4197}
4198
4199
4200/* Functions for matching context.  */
4201
4202/* Initialize MCTX.  */
4203
4204static reg_errcode_t
4205internal_function
4206match_ctx_init (re_match_context_t *mctx, int eflags, Idx n)
4207{
4208  mctx->eflags = eflags;
4209  mctx->match_last = REG_MISSING;
4210  if (n > 0)
4211    {
4212      /* Avoid overflow.  */
4213      size_t max_object_size =
4214	MAX (sizeof (struct re_backref_cache_entry),
4215	     sizeof (re_sub_match_top_t *));
4216      if (BE (SIZE_MAX / max_object_size < n, 0))
4217	return REG_ESPACE;
4218
4219      mctx->bkref_ents = re_malloc (struct re_backref_cache_entry, n);
4220      mctx->sub_tops = re_malloc (re_sub_match_top_t *, n);
4221      if (BE (mctx->bkref_ents == NULL || mctx->sub_tops == NULL, 0))
4222	return REG_ESPACE;
4223    }
4224  /* Already zero-ed by the caller.
4225     else
4226       mctx->bkref_ents = NULL;
4227     mctx->nbkref_ents = 0;
4228     mctx->nsub_tops = 0;  */
4229  mctx->abkref_ents = n;
4230  mctx->max_mb_elem_len = 1;
4231  mctx->asub_tops = n;
4232  return REG_NOERROR;
4233}
4234
4235/* Clean the entries which depend on the current input in MCTX.
4236   This function must be invoked when the matcher changes the start index
4237   of the input, or changes the input string.  */
4238
4239static void
4240internal_function
4241match_ctx_clean (re_match_context_t *mctx)
4242{
4243  Idx st_idx;
4244  for (st_idx = 0; st_idx < mctx->nsub_tops; ++st_idx)
4245    {
4246      Idx sl_idx;
4247      re_sub_match_top_t *top = mctx->sub_tops[st_idx];
4248      for (sl_idx = 0; sl_idx < top->nlasts; ++sl_idx)
4249	{
4250	  re_sub_match_last_t *last = top->lasts[sl_idx];
4251	  re_free (last->path.array);
4252	  re_free (last);
4253	}
4254      re_free (top->lasts);
4255      if (top->path)
4256	{
4257	  re_free (top->path->array);
4258	  re_free (top->path);
4259	}
4260      free (top);
4261    }
4262
4263  mctx->nsub_tops = 0;
4264  mctx->nbkref_ents = 0;
4265}
4266
4267/* Free all the memory associated with MCTX.  */
4268
4269static void
4270internal_function
4271match_ctx_free (re_match_context_t *mctx)
4272{
4273  /* First, free all the memory associated with MCTX->SUB_TOPS.  */
4274  match_ctx_clean (mctx);
4275  re_free (mctx->sub_tops);
4276  re_free (mctx->bkref_ents);
4277}
4278
4279/* Add a new backreference entry to MCTX.
4280   Note that we assume that caller never call this function with duplicate
4281   entry, and call with STR_IDX which isn't smaller than any existing entry.
4282*/
4283
4284static reg_errcode_t
4285internal_function
4286match_ctx_add_entry (re_match_context_t *mctx, Idx node, Idx str_idx, Idx from,
4287		     Idx to)
4288{
4289  if (mctx->nbkref_ents >= mctx->abkref_ents)
4290    {
4291      struct re_backref_cache_entry* new_entry;
4292      new_entry = re_realloc (mctx->bkref_ents, struct re_backref_cache_entry,
4293			      mctx->abkref_ents * 2);
4294      if (BE (new_entry == NULL, 0))
4295	{
4296	  re_free (mctx->bkref_ents);
4297	  return REG_ESPACE;
4298	}
4299      mctx->bkref_ents = new_entry;
4300      memset (mctx->bkref_ents + mctx->nbkref_ents, '\0',
4301	      sizeof (struct re_backref_cache_entry) * mctx->abkref_ents);
4302      mctx->abkref_ents *= 2;
4303    }
4304  if (mctx->nbkref_ents > 0
4305      && mctx->bkref_ents[mctx->nbkref_ents - 1].str_idx == str_idx)
4306    mctx->bkref_ents[mctx->nbkref_ents - 1].more = 1;
4307
4308  mctx->bkref_ents[mctx->nbkref_ents].node = node;
4309  mctx->bkref_ents[mctx->nbkref_ents].str_idx = str_idx;
4310  mctx->bkref_ents[mctx->nbkref_ents].subexp_from = from;
4311  mctx->bkref_ents[mctx->nbkref_ents].subexp_to = to;
4312
4313  /* This is a cache that saves negative results of check_dst_limits_calc_pos.
4314     If bit N is clear, means that this entry won't epsilon-transition to
4315     an OP_OPEN_SUBEXP or OP_CLOSE_SUBEXP for the N+1-th subexpression.  If
4316     it is set, check_dst_limits_calc_pos_1 will recurse and try to find one
4317     such node.
4318
4319     A backreference does not epsilon-transition unless it is empty, so set
4320     to all zeros if FROM != TO.  */
4321  mctx->bkref_ents[mctx->nbkref_ents].eps_reachable_subexps_map
4322    = (from == to ? -1 : 0);
4323
4324  mctx->bkref_ents[mctx->nbkref_ents++].more = 0;
4325  if (mctx->max_mb_elem_len < to - from)
4326    mctx->max_mb_elem_len = to - from;
4327  return REG_NOERROR;
4328}
4329
4330/* Return the first entry with the same str_idx, or REG_MISSING if none is
4331   found.  Note that MCTX->BKREF_ENTS is already sorted by MCTX->STR_IDX.  */
4332
4333static Idx
4334internal_function
4335search_cur_bkref_entry (const re_match_context_t *mctx, Idx str_idx)
4336{
4337  Idx left, right, mid, last;
4338  last = right = mctx->nbkref_ents;
4339  for (left = 0; left < right;)
4340    {
4341      mid = (left + right) / 2;
4342      if (mctx->bkref_ents[mid].str_idx < str_idx)
4343	left = mid + 1;
4344      else
4345	right = mid;
4346    }
4347  if (left < last && mctx->bkref_ents[left].str_idx == str_idx)
4348    return left;
4349  else
4350    return REG_MISSING;
4351}
4352
4353/* Register the node NODE, whose type is OP_OPEN_SUBEXP, and which matches
4354   at STR_IDX.  */
4355
4356static reg_errcode_t
4357internal_function
4358match_ctx_add_subtop (re_match_context_t *mctx, Idx node, Idx str_idx)
4359{
4360#ifdef DEBUG
4361  assert (mctx->sub_tops != NULL);
4362  assert (mctx->asub_tops > 0);
4363#endif
4364  if (BE (mctx->nsub_tops == mctx->asub_tops, 0))
4365    {
4366      Idx new_asub_tops = mctx->asub_tops * 2;
4367      re_sub_match_top_t **new_array = re_realloc (mctx->sub_tops,
4368						   re_sub_match_top_t *,
4369						   new_asub_tops);
4370      if (BE (new_array == NULL, 0))
4371	return REG_ESPACE;
4372      mctx->sub_tops = new_array;
4373      mctx->asub_tops = new_asub_tops;
4374    }
4375  mctx->sub_tops[mctx->nsub_tops] = calloc (1, sizeof (re_sub_match_top_t));
4376  if (BE (mctx->sub_tops[mctx->nsub_tops] == NULL, 0))
4377    return REG_ESPACE;
4378  mctx->sub_tops[mctx->nsub_tops]->node = node;
4379  mctx->sub_tops[mctx->nsub_tops++]->str_idx = str_idx;
4380  return REG_NOERROR;
4381}
4382
4383/* Register the node NODE, whose type is OP_CLOSE_SUBEXP, and which matches
4384   at STR_IDX, whose corresponding OP_OPEN_SUBEXP is SUB_TOP.  */
4385
4386static re_sub_match_last_t *
4387internal_function
4388match_ctx_add_sublast (re_sub_match_top_t *subtop, Idx node, Idx str_idx)
4389{
4390  re_sub_match_last_t *new_entry;
4391  if (BE (subtop->nlasts == subtop->alasts, 0))
4392    {
4393      Idx new_alasts = 2 * subtop->alasts + 1;
4394      re_sub_match_last_t **new_array = re_realloc (subtop->lasts,
4395						    re_sub_match_last_t *,
4396						    new_alasts);
4397      if (BE (new_array == NULL, 0))
4398	return NULL;
4399      subtop->lasts = new_array;
4400      subtop->alasts = new_alasts;
4401    }
4402  new_entry = calloc (1, sizeof (re_sub_match_last_t));
4403  if (BE (new_entry != NULL, 1))
4404    {
4405      subtop->lasts[subtop->nlasts] = new_entry;
4406      new_entry->node = node;
4407      new_entry->str_idx = str_idx;
4408      ++subtop->nlasts;
4409    }
4410  return new_entry;
4411}
4412
4413static void
4414internal_function
4415sift_ctx_init (re_sift_context_t *sctx, re_dfastate_t **sifted_sts,
4416	       re_dfastate_t **limited_sts, Idx last_node, Idx last_str_idx)
4417{
4418  sctx->sifted_states = sifted_sts;
4419  sctx->limited_states = limited_sts;
4420  sctx->last_node = last_node;
4421  sctx->last_str_idx = last_str_idx;
4422  re_node_set_init_empty (&sctx->limits);
4423}
4424