1/* Extended regular expression matching and search library.
2   Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007 Free Software Foundation,
3   Inc.
4   This file is part of the GNU C Library.
5   Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
6
7   This program is free software; you can redistribute it and/or modify
8   it under the terms of the GNU General Public License as published by
9   the Free Software Foundation; either version 2, or (at your option)
10   any later version.
11
12   This program is distributed in the hope that it will be useful,
13   but WITHOUT ANY WARRANTY; without even the implied warranty of
14   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15   GNU General Public License for more details.
16
17   You should have received a copy of the GNU General Public License along
18   with this program; if not, write to the Free Software Foundation,
19   Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
20
21static reg_errcode_t match_ctx_init (re_match_context_t *cache, int eflags,
22				     Idx n) internal_function;
23static void match_ctx_clean (re_match_context_t *mctx) internal_function;
24static void match_ctx_free (re_match_context_t *cache) internal_function;
25static reg_errcode_t match_ctx_add_entry (re_match_context_t *cache, Idx node,
26					  Idx str_idx, Idx from, Idx to)
27     internal_function;
28static Idx search_cur_bkref_entry (const re_match_context_t *mctx, Idx str_idx)
29     internal_function;
30static reg_errcode_t match_ctx_add_subtop (re_match_context_t *mctx, Idx node,
31					   Idx str_idx) internal_function;
32static re_sub_match_last_t * match_ctx_add_sublast (re_sub_match_top_t *subtop,
33						    Idx node, Idx str_idx)
34     internal_function;
35static void sift_ctx_init (re_sift_context_t *sctx, re_dfastate_t **sifted_sts,
36			   re_dfastate_t **limited_sts, Idx last_node,
37			   Idx last_str_idx)
38     internal_function;
39static reg_errcode_t re_search_internal (const regex_t *preg,
40					 const char *string, Idx length,
41					 Idx start, Idx last_start, Idx stop,
42					 size_t nmatch, regmatch_t pmatch[],
43					 int eflags) internal_function;
44static regoff_t re_search_2_stub (struct re_pattern_buffer *bufp,
45				  const char *string1, Idx length1,
46				  const char *string2, Idx length2,
47				  Idx start, regoff_t range,
48				  struct re_registers *regs,
49				  Idx stop, bool ret_len) internal_function;
50static regoff_t re_search_stub (struct re_pattern_buffer *bufp,
51				const char *string, Idx length, Idx start,
52				regoff_t range, Idx stop,
53				struct re_registers *regs,
54				bool ret_len) internal_function;
55static unsigned int re_copy_regs (struct re_registers *regs, regmatch_t *pmatch,
56				  Idx nregs, int regs_allocated)
57     internal_function;
58static reg_errcode_t prune_impossible_nodes (re_match_context_t *mctx)
59     internal_function;
60static Idx check_matching (re_match_context_t *mctx, bool fl_longest_match,
61			   Idx *p_match_first) internal_function;
62static Idx check_halt_state_context (const re_match_context_t *mctx,
63				     const re_dfastate_t *state, Idx idx)
64     internal_function;
65static void update_regs (const re_dfa_t *dfa, regmatch_t *pmatch,
66			 regmatch_t *prev_idx_match, Idx cur_node,
67			 Idx cur_idx, Idx nmatch) internal_function;
68static reg_errcode_t push_fail_stack (struct re_fail_stack_t *fs,
69				      Idx str_idx, Idx dest_node, Idx nregs,
70				      regmatch_t *regs,
71				      re_node_set *eps_via_nodes)
72     internal_function;
73static reg_errcode_t set_regs (const regex_t *preg,
74			       const re_match_context_t *mctx,
75			       size_t nmatch, regmatch_t *pmatch,
76			       bool fl_backtrack) internal_function;
77static reg_errcode_t free_fail_stack_return (struct re_fail_stack_t *fs)
78     internal_function;
79
80#ifdef RE_ENABLE_I18N
81static int sift_states_iter_mb (const re_match_context_t *mctx,
82				re_sift_context_t *sctx,
83				Idx node_idx, Idx str_idx, Idx max_str_idx)
84     internal_function;
85#endif /* RE_ENABLE_I18N */
86static reg_errcode_t sift_states_backward (const re_match_context_t *mctx,
87					   re_sift_context_t *sctx)
88     internal_function;
89static reg_errcode_t build_sifted_states (const re_match_context_t *mctx,
90					  re_sift_context_t *sctx, Idx str_idx,
91					  re_node_set *cur_dest)
92     internal_function;
93static reg_errcode_t update_cur_sifted_state (const re_match_context_t *mctx,
94					      re_sift_context_t *sctx,
95					      Idx str_idx,
96					      re_node_set *dest_nodes)
97     internal_function;
98static reg_errcode_t add_epsilon_src_nodes (const re_dfa_t *dfa,
99					    re_node_set *dest_nodes,
100					    const re_node_set *candidates)
101     internal_function;
102static bool check_dst_limits (const re_match_context_t *mctx,
103			      const re_node_set *limits,
104			      Idx dst_node, Idx dst_idx, Idx src_node,
105			      Idx src_idx) internal_function;
106static int check_dst_limits_calc_pos_1 (const re_match_context_t *mctx,
107					int boundaries, Idx subexp_idx,
108					Idx from_node, Idx bkref_idx)
109     internal_function;
110static int check_dst_limits_calc_pos (const re_match_context_t *mctx,
111				      Idx limit, Idx subexp_idx,
112				      Idx node, Idx str_idx,
113				      Idx bkref_idx) internal_function;
114static reg_errcode_t check_subexp_limits (const re_dfa_t *dfa,
115					  re_node_set *dest_nodes,
116					  const re_node_set *candidates,
117					  re_node_set *limits,
118					  struct re_backref_cache_entry *bkref_ents,
119					  Idx str_idx) internal_function;
120static reg_errcode_t sift_states_bkref (const re_match_context_t *mctx,
121					re_sift_context_t *sctx,
122					Idx str_idx, const re_node_set *candidates)
123     internal_function;
124static reg_errcode_t merge_state_array (const re_dfa_t *dfa,
125					re_dfastate_t **dst,
126					re_dfastate_t **src, Idx num)
127     internal_function;
128static re_dfastate_t *find_recover_state (reg_errcode_t *err,
129					 re_match_context_t *mctx) internal_function;
130static re_dfastate_t *transit_state (reg_errcode_t *err,
131				     re_match_context_t *mctx,
132				     re_dfastate_t *state) internal_function;
133static re_dfastate_t *merge_state_with_log (reg_errcode_t *err,
134					    re_match_context_t *mctx,
135					    re_dfastate_t *next_state)
136     internal_function;
137static reg_errcode_t check_subexp_matching_top (re_match_context_t *mctx,
138						re_node_set *cur_nodes,
139						Idx str_idx) internal_function;
140#if 0
141static re_dfastate_t *transit_state_sb (reg_errcode_t *err,
142					re_match_context_t *mctx,
143					re_dfastate_t *pstate)
144     internal_function;
145#endif
146#ifdef RE_ENABLE_I18N
147static reg_errcode_t transit_state_mb (re_match_context_t *mctx,
148				       re_dfastate_t *pstate)
149     internal_function;
150#endif /* RE_ENABLE_I18N */
151static reg_errcode_t transit_state_bkref (re_match_context_t *mctx,
152					  const re_node_set *nodes)
153     internal_function;
154static reg_errcode_t get_subexp (re_match_context_t *mctx,
155				 Idx bkref_node, Idx bkref_str_idx)
156     internal_function;
157static reg_errcode_t get_subexp_sub (re_match_context_t *mctx,
158				     const re_sub_match_top_t *sub_top,
159				     re_sub_match_last_t *sub_last,
160				     Idx bkref_node, Idx bkref_str)
161     internal_function;
162static Idx find_subexp_node (const re_dfa_t *dfa, const re_node_set *nodes,
163			     Idx subexp_idx, int type) internal_function;
164static reg_errcode_t check_arrival (re_match_context_t *mctx,
165				    state_array_t *path, Idx top_node,
166				    Idx top_str, Idx last_node, Idx last_str,
167				    int type) internal_function;
168static reg_errcode_t check_arrival_add_next_nodes (re_match_context_t *mctx,
169						   Idx str_idx,
170						   re_node_set *cur_nodes,
171						   re_node_set *next_nodes)
172     internal_function;
173static reg_errcode_t check_arrival_expand_ecl (const re_dfa_t *dfa,
174					       re_node_set *cur_nodes,
175					       Idx ex_subexp, int type)
176     internal_function;
177static reg_errcode_t check_arrival_expand_ecl_sub (const re_dfa_t *dfa,
178						   re_node_set *dst_nodes,
179						   Idx target, Idx ex_subexp,
180						   int type) internal_function;
181static reg_errcode_t expand_bkref_cache (re_match_context_t *mctx,
182					 re_node_set *cur_nodes, Idx cur_str,
183					 Idx subexp_num, int type)
184     internal_function;
185static bool build_trtable (const re_dfa_t *dfa,
186			   re_dfastate_t *state) internal_function;
187#ifdef RE_ENABLE_I18N
188static int check_node_accept_bytes (const re_dfa_t *dfa, Idx node_idx,
189				    const re_string_t *input, Idx idx)
190     internal_function;
191# ifdef _LIBC
192static unsigned int find_collation_sequence_value (const unsigned char *mbs,
193						   size_t name_len)
194     internal_function;
195# endif /* _LIBC */
196#endif /* RE_ENABLE_I18N */
197static Idx group_nodes_into_DFAstates (const re_dfa_t *dfa,
198				       const re_dfastate_t *state,
199				       re_node_set *states_node,
200				       bitset_t *states_ch) internal_function;
201static bool check_node_accept (const re_match_context_t *mctx,
202			       const re_token_t *node, Idx idx)
203     internal_function;
204static reg_errcode_t extend_buffers (re_match_context_t *mctx)
205     internal_function;
206
207/* Entry point for POSIX code.  */
208
209/* regexec searches for a given pattern, specified by PREG, in the
210   string STRING.
211
212   If NMATCH is zero or REG_NOSUB was set in the cflags argument to
213   `regcomp', we ignore PMATCH.  Otherwise, we assume PMATCH has at
214   least NMATCH elements, and we set them to the offsets of the
215   corresponding matched substrings.
216
217   EFLAGS specifies `execution flags' which affect matching: if
218   REG_NOTBOL is set, then ^ does not match at the beginning of the
219   string; if REG_NOTEOL is set, then $ does not match at the end.
220
221   We return 0 if we find a match and REG_NOMATCH if not.  */
222
223int
224regexec (preg, string, nmatch, pmatch, eflags)
225    const regex_t *_Restrict_ preg;
226    const char *_Restrict_ string;
227    size_t nmatch;
228    regmatch_t pmatch[_Restrict_arr_];
229    int eflags;
230{
231  reg_errcode_t err;
232  Idx start, length;
233#ifdef _LIBC
234  re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
235#endif
236
237  if (eflags & ~(REG_NOTBOL | REG_NOTEOL | REG_STARTEND))
238    return REG_BADPAT;
239
240  if (eflags & REG_STARTEND)
241    {
242      start = pmatch[0].rm_so;
243      length = pmatch[0].rm_eo;
244    }
245  else
246    {
247      start = 0;
248      length = strlen (string);
249    }
250
251  __libc_lock_lock (dfa->lock);
252  if (preg->no_sub)
253    err = re_search_internal (preg, string, length, start, length,
254			      length, 0, NULL, eflags);
255  else
256    err = re_search_internal (preg, string, length, start, length,
257			      length, nmatch, pmatch, eflags);
258  __libc_lock_unlock (dfa->lock);
259  return err != REG_NOERROR;
260}
261
262#ifdef _LIBC
263# include <shlib-compat.h>
264versioned_symbol (libc, __regexec, regexec, GLIBC_2_3_4);
265
266# if SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_3_4)
267__typeof__ (__regexec) __compat_regexec;
268
269int
270attribute_compat_text_section
271__compat_regexec (const regex_t *_Restrict_ preg,
272		  const char *_Restrict_ string, size_t nmatch,
273		  regmatch_t pmatch[], int eflags)
274{
275  return regexec (preg, string, nmatch, pmatch,
276		  eflags & (REG_NOTBOL | REG_NOTEOL));
277}
278compat_symbol (libc, __compat_regexec, regexec, GLIBC_2_0);
279# endif
280#endif
281
282/* Entry points for GNU code.  */
283
284/* re_match, re_search, re_match_2, re_search_2
285
286   The former two functions operate on STRING with length LENGTH,
287   while the later two operate on concatenation of STRING1 and STRING2
288   with lengths LENGTH1 and LENGTH2, respectively.
289
290   re_match() matches the compiled pattern in BUFP against the string,
291   starting at index START.
292
293   re_search() first tries matching at index START, then it tries to match
294   starting from index START + 1, and so on.  The last start position tried
295   is START + RANGE.  (Thus RANGE = 0 forces re_search to operate the same
296   way as re_match().)
297
298   The parameter STOP of re_{match,search}_2 specifies that no match exceeding
299   the first STOP characters of the concatenation of the strings should be
300   concerned.
301
302   If REGS is not NULL, and BUFP->no_sub is not set, the offsets of the match
303   and all groups is stored in REGS.  (For the "_2" variants, the offsets are
304   computed relative to the concatenation, not relative to the individual
305   strings.)
306
307   On success, re_match* functions return the length of the match, re_search*
308   return the position of the start of the match.  Return value -1 means no
309   match was found and -2 indicates an internal error.  */
310
311regoff_t
312re_match (bufp, string, length, start, regs)
313    struct re_pattern_buffer *bufp;
314    const char *string;
315    Idx length, start;
316    struct re_registers *regs;
317{
318  return re_search_stub (bufp, string, length, start, 0, length, regs, true);
319}
320#ifdef _LIBC
321weak_alias (__re_match, re_match)
322#endif
323
324regoff_t
325re_search (bufp, string, length, start, range, regs)
326    struct re_pattern_buffer *bufp;
327    const char *string;
328    Idx length, start;
329    regoff_t range;
330    struct re_registers *regs;
331{
332  return re_search_stub (bufp, string, length, start, range, length, regs,
333			 false);
334}
335#ifdef _LIBC
336weak_alias (__re_search, re_search)
337#endif
338
339regoff_t
340re_match_2 (bufp, string1, length1, string2, length2, start, regs, stop)
341    struct re_pattern_buffer *bufp;
342    const char *string1, *string2;
343    Idx length1, length2, start, stop;
344    struct re_registers *regs;
345{
346  return re_search_2_stub (bufp, string1, length1, string2, length2,
347			   start, 0, regs, stop, true);
348}
349#ifdef _LIBC
350weak_alias (__re_match_2, re_match_2)
351#endif
352
353regoff_t
354re_search_2 (bufp, string1, length1, string2, length2, start, range, regs, stop)
355    struct re_pattern_buffer *bufp;
356    const char *string1, *string2;
357    Idx length1, length2, start, stop;
358    regoff_t range;
359    struct re_registers *regs;
360{
361  return re_search_2_stub (bufp, string1, length1, string2, length2,
362			   start, range, regs, stop, false);
363}
364#ifdef _LIBC
365weak_alias (__re_search_2, re_search_2)
366#endif
367
368static regoff_t
369internal_function
370re_search_2_stub (struct re_pattern_buffer *bufp,
371		  const char *string1, Idx length1,
372		  const char *string2, Idx length2,
373		  Idx start, regoff_t range, struct re_registers *regs,
374		  Idx stop, bool ret_len)
375{
376  const char *str;
377  regoff_t rval;
378  Idx len = length1 + length2;
379  char *s = NULL;
380
381  if (BE (length1 < 0 || length2 < 0 || stop < 0 || len < length1, 0))
382    return -2;
383
384  /* Concatenate the strings.  */
385  if (length2 > 0)
386    if (length1 > 0)
387      {
388	s = re_malloc (char, len);
389
390	if (BE (s == NULL, 0))
391	  return -2;
392#ifdef _LIBC
393	memcpy (__mempcpy (s, string1, length1), string2, length2);
394#else
395	memcpy (s, string1, length1);
396	memcpy (s + length1, string2, length2);
397#endif
398	str = s;
399      }
400    else
401      str = string2;
402  else
403    str = string1;
404
405  rval = re_search_stub (bufp, str, len, start, range, stop, regs,
406			 ret_len);
407  re_free (s);
408  return rval;
409}
410
411/* The parameters have the same meaning as those of re_search.
412   Additional parameters:
413   If RET_LEN is true the length of the match is returned (re_match style);
414   otherwise the position of the match is returned.  */
415
416static regoff_t
417internal_function
418re_search_stub (struct re_pattern_buffer *bufp,
419		const char *string, Idx length,
420		Idx start, regoff_t range, Idx stop, struct re_registers *regs,
421		bool ret_len)
422{
423  reg_errcode_t result;
424  regmatch_t *pmatch;
425  Idx nregs;
426  regoff_t rval;
427  int eflags = 0;
428#ifdef _LIBC
429  re_dfa_t *dfa = (re_dfa_t *) bufp->buffer;
430#endif
431  Idx last_start = start + range;
432
433  /* Check for out-of-range.  */
434  if (BE (start < 0 || start > length, 0))
435    return -1;
436  if (BE (length < last_start || (0 <= range && last_start < start), 0))
437    last_start = length;
438  else if (BE (last_start < 0 || (range < 0 && start <= last_start), 0))
439    last_start = 0;
440
441  __libc_lock_lock (dfa->lock);
442
443  eflags |= (bufp->not_bol) ? REG_NOTBOL : 0;
444  eflags |= (bufp->not_eol) ? REG_NOTEOL : 0;
445
446  /* Compile fastmap if we haven't yet.  */
447  if (start < last_start && bufp->fastmap != NULL && !bufp->fastmap_accurate)
448    re_compile_fastmap (bufp);
449
450  if (BE (bufp->no_sub, 0))
451    regs = NULL;
452
453  /* We need at least 1 register.  */
454  if (regs == NULL)
455    nregs = 1;
456  else if (BE (bufp->regs_allocated == REGS_FIXED
457	       && regs->num_regs <= bufp->re_nsub, 0))
458    {
459      nregs = regs->num_regs;
460      if (BE (nregs < 1, 0))
461	{
462	  /* Nothing can be copied to regs.  */
463	  regs = NULL;
464	  nregs = 1;
465	}
466    }
467  else
468    nregs = bufp->re_nsub + 1;
469  pmatch = re_malloc (regmatch_t, nregs);
470  if (BE (pmatch == NULL, 0))
471    {
472      rval = -2;
473      goto out;
474    }
475
476  result = re_search_internal (bufp, string, length, start, last_start, stop,
477			       nregs, pmatch, eflags);
478
479  rval = 0;
480
481  /* I hope we needn't fill ther regs with -1's when no match was found.  */
482  if (result != REG_NOERROR)
483    rval = -1;
484  else if (regs != NULL)
485    {
486      /* If caller wants register contents data back, copy them.  */
487      bufp->regs_allocated = re_copy_regs (regs, pmatch, nregs,
488					   bufp->regs_allocated);
489      if (BE (bufp->regs_allocated == REGS_UNALLOCATED, 0))
490	rval = -2;
491    }
492
493  if (BE (rval == 0, 1))
494    {
495      if (ret_len)
496	{
497	  assert (pmatch[0].rm_so == start);
498	  rval = pmatch[0].rm_eo - start;
499	}
500      else
501	rval = pmatch[0].rm_so;
502    }
503  re_free (pmatch);
504 out:
505  __libc_lock_unlock (dfa->lock);
506  return rval;
507}
508
509static unsigned int
510internal_function
511re_copy_regs (struct re_registers *regs, regmatch_t *pmatch, Idx nregs,
512	      int regs_allocated)
513{
514  int rval = REGS_REALLOCATE;
515  Idx i;
516  Idx need_regs = nregs + 1;
517  /* We need one extra element beyond `num_regs' for the `-1' marker GNU code
518     uses.  */
519
520  /* Have the register data arrays been allocated?  */
521  if (regs_allocated == REGS_UNALLOCATED)
522    { /* No.  So allocate them with malloc.  */
523      regs->start = re_malloc (regoff_t, need_regs);
524      if (BE (regs->start == NULL, 0))
525	return REGS_UNALLOCATED;
526      regs->end = re_malloc (regoff_t, need_regs);
527      if (BE (regs->end == NULL, 0))
528	{
529	  re_free (regs->start);
530	  return REGS_UNALLOCATED;
531	}
532      regs->num_regs = need_regs;
533    }
534  else if (regs_allocated == REGS_REALLOCATE)
535    { /* Yes.  If we need more elements than were already
536	 allocated, reallocate them.  If we need fewer, just
537	 leave it alone.  */
538      if (BE (need_regs > regs->num_regs, 0))
539	{
540	  regoff_t *new_start = re_realloc (regs->start, regoff_t, need_regs);
541	  regoff_t *new_end;
542	  if (BE (new_start == NULL, 0))
543	    return REGS_UNALLOCATED;
544	  new_end = re_realloc (regs->end, regoff_t, need_regs);
545	  if (BE (new_end == NULL, 0))
546	    {
547	      re_free (new_start);
548	      return REGS_UNALLOCATED;
549	    }
550	  regs->start = new_start;
551	  regs->end = new_end;
552	  regs->num_regs = need_regs;
553	}
554    }
555  else
556    {
557      assert (regs_allocated == REGS_FIXED);
558      /* This function may not be called with REGS_FIXED and nregs too big.  */
559      assert (regs->num_regs >= nregs);
560      rval = REGS_FIXED;
561    }
562
563  /* Copy the regs.  */
564  for (i = 0; i < nregs; ++i)
565    {
566      regs->start[i] = pmatch[i].rm_so;
567      regs->end[i] = pmatch[i].rm_eo;
568    }
569  for ( ; i < regs->num_regs; ++i)
570    regs->start[i] = regs->end[i] = -1;
571
572  return rval;
573}
574
575/* Set REGS to hold NUM_REGS registers, storing them in STARTS and
576   ENDS.  Subsequent matches using PATTERN_BUFFER and REGS will use
577   this memory for recording register information.  STARTS and ENDS
578   must be allocated using the malloc library routine, and must each
579   be at least NUM_REGS * sizeof (regoff_t) bytes long.
580
581   If NUM_REGS == 0, then subsequent matches should allocate their own
582   register data.
583
584   Unless this function is called, the first search or match using
585   PATTERN_BUFFER will allocate its own register data, without
586   freeing the old data.  */
587
588void
589re_set_registers (bufp, regs, num_regs, starts, ends)
590    struct re_pattern_buffer *bufp;
591    struct re_registers *regs;
592    __re_size_t num_regs;
593    regoff_t *starts, *ends;
594{
595  if (num_regs)
596    {
597      bufp->regs_allocated = REGS_REALLOCATE;
598      regs->num_regs = num_regs;
599      regs->start = starts;
600      regs->end = ends;
601    }
602  else
603    {
604      bufp->regs_allocated = REGS_UNALLOCATED;
605      regs->num_regs = 0;
606      regs->start = regs->end = NULL;
607    }
608}
609#ifdef _LIBC
610weak_alias (__re_set_registers, re_set_registers)
611#endif
612
613/* Entry points compatible with 4.2 BSD regex library.  We don't define
614   them unless specifically requested.  */
615
616#if defined _REGEX_RE_COMP || defined _LIBC
617int
618# ifdef _LIBC
619weak_function
620# endif
621re_exec (s)
622     const char *s;
623{
624  return 0 == regexec (&re_comp_buf, s, 0, NULL, 0);
625}
626#endif /* _REGEX_RE_COMP */
627
628/* Internal entry point.  */
629
630/* Searches for a compiled pattern PREG in the string STRING, whose
631   length is LENGTH.  NMATCH, PMATCH, and EFLAGS have the same
632   meaning as with regexec.  LAST_START is START + RANGE, where
633   START and RANGE have the same meaning as with re_search.
634   Return REG_NOERROR if we find a match, and REG_NOMATCH if not,
635   otherwise return the error code.
636   Note: We assume front end functions already check ranges.
637   (0 <= LAST_START && LAST_START <= LENGTH)  */
638
639static reg_errcode_t
640internal_function
641re_search_internal (const regex_t *preg,
642		    const char *string, Idx length,
643		    Idx start, Idx last_start, Idx stop,
644		    size_t nmatch, regmatch_t pmatch[],
645		    int eflags)
646{
647  reg_errcode_t err;
648  const re_dfa_t *dfa = (const re_dfa_t *) preg->buffer;
649  Idx left_lim, right_lim;
650  int incr;
651  bool fl_longest_match;
652  int match_kind;
653  Idx match_first;
654  Idx match_last = REG_MISSING;
655  Idx extra_nmatch;
656  bool sb;
657  int ch;
658#if defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L)
659  re_match_context_t mctx = { .dfa = dfa };
660#else
661  re_match_context_t mctx;
662#endif
663  char *fastmap = ((preg->fastmap != NULL && preg->fastmap_accurate
664		    && start != last_start && !preg->can_be_null)
665		   ? preg->fastmap : NULL);
666  RE_TRANSLATE_TYPE t = preg->translate;
667
668#if !(defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L))
669  memset (&mctx, '\0', sizeof (re_match_context_t));
670  mctx.dfa = dfa;
671#endif
672
673  extra_nmatch = (nmatch > preg->re_nsub) ? nmatch - (preg->re_nsub + 1) : 0;
674  nmatch -= extra_nmatch;
675
676  /* Check if the DFA haven't been compiled.  */
677  if (BE (preg->used == 0 || dfa->init_state == NULL
678	  || dfa->init_state_word == NULL || dfa->init_state_nl == NULL
679	  || dfa->init_state_begbuf == NULL, 0))
680    return REG_NOMATCH;
681
682#ifdef DEBUG
683  /* We assume front-end functions already check them.  */
684  assert (0 <= last_start && last_start <= length);
685#endif
686
687  /* If initial states with non-begbuf contexts have no elements,
688     the regex must be anchored.  If preg->newline_anchor is set,
689     we'll never use init_state_nl, so do not check it.  */
690  if (dfa->init_state->nodes.nelem == 0
691      && dfa->init_state_word->nodes.nelem == 0
692      && (dfa->init_state_nl->nodes.nelem == 0
693	  || !preg->newline_anchor))
694    {
695      if (start != 0 && last_start != 0)
696        return REG_NOMATCH;
697      start = last_start = 0;
698    }
699
700  /* We must check the longest matching, if nmatch > 0.  */
701  fl_longest_match = (nmatch != 0 || dfa->nbackref);
702
703  err = re_string_allocate (&mctx.input, string, length, dfa->nodes_len + 1,
704			    preg->translate, preg->syntax & RE_ICASE, dfa);
705  if (BE (err != REG_NOERROR, 0))
706    goto free_return;
707  mctx.input.stop = stop;
708  mctx.input.raw_stop = stop;
709  mctx.input.newline_anchor = preg->newline_anchor;
710
711  err = match_ctx_init (&mctx, eflags, dfa->nbackref * 2);
712  if (BE (err != REG_NOERROR, 0))
713    goto free_return;
714
715  /* We will log all the DFA states through which the dfa pass,
716     if nmatch > 1, or this dfa has "multibyte node", which is a
717     back-reference or a node which can accept multibyte character or
718     multi character collating element.  */
719  if (nmatch > 1 || dfa->has_mb_node)
720    {
721      /* Avoid overflow.  */
722      if (BE (SIZE_MAX / sizeof (re_dfastate_t *) <= mctx.input.bufs_len, 0))
723	{
724	  err = REG_ESPACE;
725	  goto free_return;
726	}
727
728      mctx.state_log = re_malloc (re_dfastate_t *, mctx.input.bufs_len + 1);
729      if (BE (mctx.state_log == NULL, 0))
730	{
731	  err = REG_ESPACE;
732	  goto free_return;
733	}
734    }
735  else
736    mctx.state_log = NULL;
737
738  match_first = start;
739  mctx.input.tip_context = (eflags & REG_NOTBOL) ? CONTEXT_BEGBUF
740			   : CONTEXT_NEWLINE | CONTEXT_BEGBUF;
741
742  /* Check incrementally whether of not the input string match.  */
743  incr = (last_start < start) ? -1 : 1;
744  left_lim = (last_start < start) ? last_start : start;
745  right_lim = (last_start < start) ? start : last_start;
746  sb = dfa->mb_cur_max == 1;
747  match_kind =
748    (fastmap
749     ? ((sb || !(preg->syntax & RE_ICASE || t) ? 4 : 0)
750	| (start <= last_start ? 2 : 0)
751	| (t != NULL ? 1 : 0))
752     : 8);
753
754  for (;; match_first += incr)
755    {
756      err = REG_NOMATCH;
757      if (match_first < left_lim || right_lim < match_first)
758	goto free_return;
759
760      /* Advance as rapidly as possible through the string, until we
761	 find a plausible place to start matching.  This may be done
762	 with varying efficiency, so there are various possibilities:
763	 only the most common of them are specialized, in order to
764	 save on code size.  We use a switch statement for speed.  */
765      switch (match_kind)
766	{
767	case 8:
768	  /* No fastmap.  */
769	  break;
770
771	case 7:
772	  /* Fastmap with single-byte translation, match forward.  */
773	  while (BE (match_first < right_lim, 1)
774		 && !fastmap[t[(unsigned char) string[match_first]]])
775	    ++match_first;
776	  goto forward_match_found_start_or_reached_end;
777
778	case 6:
779	  /* Fastmap without translation, match forward.  */
780	  while (BE (match_first < right_lim, 1)
781		 && !fastmap[(unsigned char) string[match_first]])
782	    ++match_first;
783
784	forward_match_found_start_or_reached_end:
785	  if (BE (match_first == right_lim, 0))
786	    {
787	      ch = match_first >= length
788		       ? 0 : (unsigned char) string[match_first];
789	      if (!fastmap[t ? t[ch] : ch])
790		goto free_return;
791	    }
792	  break;
793
794	case 4:
795	case 5:
796	  /* Fastmap without multi-byte translation, match backwards.  */
797	  while (match_first >= left_lim)
798	    {
799	      ch = match_first >= length
800		       ? 0 : (unsigned char) string[match_first];
801	      if (fastmap[t ? t[ch] : ch])
802		break;
803	      --match_first;
804	    }
805	  if (match_first < left_lim)
806	    goto free_return;
807	  break;
808
809	default:
810	  /* In this case, we can't determine easily the current byte,
811	     since it might be a component byte of a multibyte
812	     character.  Then we use the constructed buffer instead.  */
813	  for (;;)
814	    {
815	      /* If MATCH_FIRST is out of the valid range, reconstruct the
816		 buffers.  */
817	      __re_size_t offset = match_first - mctx.input.raw_mbs_idx;
818	      if (BE (offset >= (__re_size_t) mctx.input.valid_raw_len, 0))
819		{
820		  err = re_string_reconstruct (&mctx.input, match_first,
821					       eflags);
822		  if (BE (err != REG_NOERROR, 0))
823		    goto free_return;
824
825		  offset = match_first - mctx.input.raw_mbs_idx;
826		}
827	      /* If MATCH_FIRST is out of the buffer, leave it as '\0'.
828		 Note that MATCH_FIRST must not be smaller than 0.  */
829	      ch = (match_first >= length
830		    ? 0 : re_string_byte_at (&mctx.input, offset));
831	      if (fastmap[ch])
832		break;
833	      match_first += incr;
834	      if (match_first < left_lim || match_first > right_lim)
835	        {
836	          err = REG_NOMATCH;
837	          goto free_return;
838	        }
839	    }
840	  break;
841	}
842
843      /* Reconstruct the buffers so that the matcher can assume that
844	 the matching starts from the beginning of the buffer.  */
845      err = re_string_reconstruct (&mctx.input, match_first, eflags);
846      if (BE (err != REG_NOERROR, 0))
847	goto free_return;
848
849#ifdef RE_ENABLE_I18N
850     /* Don't consider this char as a possible match start if it part,
851	yet isn't the head, of a multibyte character.  */
852      if (!sb && !re_string_first_byte (&mctx.input, 0))
853	continue;
854#endif
855
856      /* It seems to be appropriate one, then use the matcher.  */
857      /* We assume that the matching starts from 0.  */
858      mctx.state_log_top = mctx.nbkref_ents = mctx.max_mb_elem_len = 0;
859      match_last = check_matching (&mctx, fl_longest_match,
860				   start <= last_start ? &match_first : NULL);
861      if (match_last != REG_MISSING)
862	{
863	  if (BE (match_last == REG_ERROR, 0))
864	    {
865	      err = REG_ESPACE;
866	      goto free_return;
867	    }
868	  else
869	    {
870	      mctx.match_last = match_last;
871	      if ((!preg->no_sub && nmatch > 1) || dfa->nbackref)
872		{
873		  re_dfastate_t *pstate = mctx.state_log[match_last];
874		  mctx.last_node = check_halt_state_context (&mctx, pstate,
875							     match_last);
876		}
877	      if ((!preg->no_sub && nmatch > 1 && dfa->has_plural_match)
878		  || dfa->nbackref)
879		{
880		  err = prune_impossible_nodes (&mctx);
881		  if (err == REG_NOERROR)
882		    break;
883		  if (BE (err != REG_NOMATCH, 0))
884		    goto free_return;
885		  match_last = REG_MISSING;
886		}
887	      else
888		break; /* We found a match.  */
889	    }
890	}
891
892      match_ctx_clean (&mctx);
893    }
894
895#ifdef DEBUG
896  assert (match_last != REG_MISSING);
897  assert (err == REG_NOERROR);
898#endif
899
900  /* Set pmatch[] if we need.  */
901  if (nmatch > 0)
902    {
903      Idx reg_idx;
904
905      /* Initialize registers.  */
906      for (reg_idx = 1; reg_idx < nmatch; ++reg_idx)
907	pmatch[reg_idx].rm_so = pmatch[reg_idx].rm_eo = -1;
908
909      /* Set the points where matching start/end.  */
910      pmatch[0].rm_so = 0;
911      pmatch[0].rm_eo = mctx.match_last;
912      /* FIXME: This function should fail if mctx.match_last exceeds
913	 the maximum possible regoff_t value.  We need a new error
914	 code REG_OVERFLOW.  */
915
916      if (!preg->no_sub && nmatch > 1)
917	{
918	  err = set_regs (preg, &mctx, nmatch, pmatch,
919			  dfa->has_plural_match && dfa->nbackref > 0);
920	  if (BE (err != REG_NOERROR, 0))
921	    goto free_return;
922	}
923
924      /* At last, add the offset to the each registers, since we slided
925	 the buffers so that we could assume that the matching starts
926	 from 0.  */
927      for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
928	if (pmatch[reg_idx].rm_so != -1)
929	  {
930#ifdef RE_ENABLE_I18N
931	    if (BE (mctx.input.offsets_needed != 0, 0))
932	      {
933		pmatch[reg_idx].rm_so =
934		  (pmatch[reg_idx].rm_so == mctx.input.valid_len
935		   ? mctx.input.valid_raw_len
936		   : mctx.input.offsets[pmatch[reg_idx].rm_so]);
937		pmatch[reg_idx].rm_eo =
938		  (pmatch[reg_idx].rm_eo == mctx.input.valid_len
939		   ? mctx.input.valid_raw_len
940		   : mctx.input.offsets[pmatch[reg_idx].rm_eo]);
941	      }
942#else
943	    assert (mctx.input.offsets_needed == 0);
944#endif
945	    pmatch[reg_idx].rm_so += match_first;
946	    pmatch[reg_idx].rm_eo += match_first;
947	  }
948      for (reg_idx = 0; reg_idx < extra_nmatch; ++reg_idx)
949	{
950	  pmatch[nmatch + reg_idx].rm_so = -1;
951	  pmatch[nmatch + reg_idx].rm_eo = -1;
952	}
953
954      if (dfa->subexp_map)
955        for (reg_idx = 0; reg_idx + 1 < nmatch; reg_idx++)
956          if (dfa->subexp_map[reg_idx] != reg_idx)
957            {
958              pmatch[reg_idx + 1].rm_so
959                = pmatch[dfa->subexp_map[reg_idx] + 1].rm_so;
960              pmatch[reg_idx + 1].rm_eo
961                = pmatch[dfa->subexp_map[reg_idx] + 1].rm_eo;
962            }
963    }
964
965 free_return:
966  re_free (mctx.state_log);
967  if (dfa->nbackref)
968    match_ctx_free (&mctx);
969  re_string_destruct (&mctx.input);
970  return err;
971}
972
973static reg_errcode_t
974internal_function
975prune_impossible_nodes (re_match_context_t *mctx)
976{
977  const re_dfa_t *const dfa = mctx->dfa;
978  Idx halt_node, match_last;
979  reg_errcode_t ret;
980  re_dfastate_t **sifted_states;
981  re_dfastate_t **lim_states = NULL;
982  re_sift_context_t sctx;
983#ifdef DEBUG
984  assert (mctx->state_log != NULL);
985#endif
986  match_last = mctx->match_last;
987  halt_node = mctx->last_node;
988
989  /* Avoid overflow.  */
990  if (BE (SIZE_MAX / sizeof (re_dfastate_t *) <= match_last, 0))
991    return REG_ESPACE;
992
993  sifted_states = re_malloc (re_dfastate_t *, match_last + 1);
994  if (BE (sifted_states == NULL, 0))
995    {
996      ret = REG_ESPACE;
997      goto free_return;
998    }
999  if (dfa->nbackref)
1000    {
1001      lim_states = re_malloc (re_dfastate_t *, match_last + 1);
1002      if (BE (lim_states == NULL, 0))
1003	{
1004	  ret = REG_ESPACE;
1005	  goto free_return;
1006	}
1007      while (1)
1008	{
1009	  memset (lim_states, '\0',
1010		  sizeof (re_dfastate_t *) * (match_last + 1));
1011	  sift_ctx_init (&sctx, sifted_states, lim_states, halt_node,
1012			 match_last);
1013	  ret = sift_states_backward (mctx, &sctx);
1014	  re_node_set_free (&sctx.limits);
1015	  if (BE (ret != REG_NOERROR, 0))
1016	      goto free_return;
1017	  if (sifted_states[0] != NULL || lim_states[0] != NULL)
1018	    break;
1019	  do
1020	    {
1021	      --match_last;
1022	      if (! REG_VALID_INDEX (match_last))
1023		{
1024		  ret = REG_NOMATCH;
1025		  goto free_return;
1026		}
1027	    } while (mctx->state_log[match_last] == NULL
1028		     || !mctx->state_log[match_last]->halt);
1029	  halt_node = check_halt_state_context (mctx,
1030						mctx->state_log[match_last],
1031						match_last);
1032	}
1033      ret = merge_state_array (dfa, sifted_states, lim_states,
1034			       match_last + 1);
1035      re_free (lim_states);
1036      lim_states = NULL;
1037      if (BE (ret != REG_NOERROR, 0))
1038	goto free_return;
1039    }
1040  else
1041    {
1042      sift_ctx_init (&sctx, sifted_states, lim_states, halt_node, match_last);
1043      ret = sift_states_backward (mctx, &sctx);
1044      re_node_set_free (&sctx.limits);
1045      if (BE (ret != REG_NOERROR, 0))
1046	goto free_return;
1047    }
1048  re_free (mctx->state_log);
1049  mctx->state_log = sifted_states;
1050  sifted_states = NULL;
1051  mctx->last_node = halt_node;
1052  mctx->match_last = match_last;
1053  ret = REG_NOERROR;
1054 free_return:
1055  re_free (sifted_states);
1056  re_free (lim_states);
1057  return ret;
1058}
1059
1060/* Acquire an initial state and return it.
1061   We must select appropriate initial state depending on the context,
1062   since initial states may have constraints like "\<", "^", etc..  */
1063
1064static inline re_dfastate_t *
1065__attribute ((always_inline)) internal_function
1066acquire_init_state_context (reg_errcode_t *err, const re_match_context_t *mctx,
1067			    Idx idx)
1068{
1069  const re_dfa_t *const dfa = mctx->dfa;
1070  if (dfa->init_state->has_constraint)
1071    {
1072      unsigned int context;
1073      context = re_string_context_at (&mctx->input, idx - 1, mctx->eflags);
1074      if (IS_WORD_CONTEXT (context))
1075	return dfa->init_state_word;
1076      else if (IS_ORDINARY_CONTEXT (context))
1077	return dfa->init_state;
1078      else if (IS_BEGBUF_CONTEXT (context) && IS_NEWLINE_CONTEXT (context))
1079	return dfa->init_state_begbuf;
1080      else if (IS_NEWLINE_CONTEXT (context))
1081	return dfa->init_state_nl;
1082      else if (IS_BEGBUF_CONTEXT (context))
1083	{
1084	  /* It is relatively rare case, then calculate on demand.  */
1085	  return re_acquire_state_context (err, dfa,
1086					   dfa->init_state->entrance_nodes,
1087					   context);
1088	}
1089      else
1090	/* Must not happen?  */
1091	return dfa->init_state;
1092    }
1093  else
1094    return dfa->init_state;
1095}
1096
1097/* Check whether the regular expression match input string INPUT or not,
1098   and return the index where the matching end.  Return REG_MISSING if
1099   there is no match, and return REG_ERROR in case of an error.
1100   FL_LONGEST_MATCH means we want the POSIX longest matching.
1101   If P_MATCH_FIRST is not NULL, and the match fails, it is set to the
1102   next place where we may want to try matching.
1103   Note that the matcher assume that the maching starts from the current
1104   index of the buffer.  */
1105
1106static Idx
1107internal_function
1108check_matching (re_match_context_t *mctx, bool fl_longest_match,
1109		Idx *p_match_first)
1110{
1111  const re_dfa_t *const dfa = mctx->dfa;
1112  reg_errcode_t err;
1113  Idx match = 0;
1114  Idx match_last = REG_MISSING;
1115  Idx cur_str_idx = re_string_cur_idx (&mctx->input);
1116  re_dfastate_t *cur_state;
1117  bool at_init_state = p_match_first != NULL;
1118  Idx next_start_idx = cur_str_idx;
1119
1120  err = REG_NOERROR;
1121  cur_state = acquire_init_state_context (&err, mctx, cur_str_idx);
1122  /* An initial state must not be NULL (invalid).  */
1123  if (BE (cur_state == NULL, 0))
1124    {
1125      assert (err == REG_ESPACE);
1126      return REG_ERROR;
1127    }
1128
1129  if (mctx->state_log != NULL)
1130    {
1131      mctx->state_log[cur_str_idx] = cur_state;
1132
1133      /* Check OP_OPEN_SUBEXP in the initial state in case that we use them
1134	 later.  E.g. Processing back references.  */
1135      if (BE (dfa->nbackref, 0))
1136	{
1137	  at_init_state = false;
1138	  err = check_subexp_matching_top (mctx, &cur_state->nodes, 0);
1139	  if (BE (err != REG_NOERROR, 0))
1140	    return err;
1141
1142	  if (cur_state->has_backref)
1143	    {
1144	      err = transit_state_bkref (mctx, &cur_state->nodes);
1145	      if (BE (err != REG_NOERROR, 0))
1146	        return err;
1147	    }
1148	}
1149    }
1150
1151  /* If the RE accepts NULL string.  */
1152  if (BE (cur_state->halt, 0))
1153    {
1154      if (!cur_state->has_constraint
1155	  || check_halt_state_context (mctx, cur_state, cur_str_idx))
1156	{
1157	  if (!fl_longest_match)
1158	    return cur_str_idx;
1159	  else
1160	    {
1161	      match_last = cur_str_idx;
1162	      match = 1;
1163	    }
1164	}
1165    }
1166
1167  while (!re_string_eoi (&mctx->input))
1168    {
1169      re_dfastate_t *old_state = cur_state;
1170      Idx next_char_idx = re_string_cur_idx (&mctx->input) + 1;
1171
1172      if (BE (next_char_idx >= mctx->input.bufs_len, 0)
1173          || (BE (next_char_idx >= mctx->input.valid_len, 0)
1174              && mctx->input.valid_len < mctx->input.len))
1175        {
1176          err = extend_buffers (mctx);
1177          if (BE (err != REG_NOERROR, 0))
1178	    {
1179	      assert (err == REG_ESPACE);
1180	      return REG_ERROR;
1181	    }
1182        }
1183
1184      cur_state = transit_state (&err, mctx, cur_state);
1185      if (mctx->state_log != NULL)
1186	cur_state = merge_state_with_log (&err, mctx, cur_state);
1187
1188      if (cur_state == NULL)
1189	{
1190	  /* Reached the invalid state or an error.  Try to recover a valid
1191	     state using the state log, if available and if we have not
1192	     already found a valid (even if not the longest) match.  */
1193	  if (BE (err != REG_NOERROR, 0))
1194	    return REG_ERROR;
1195
1196	  if (mctx->state_log == NULL
1197	      || (match && !fl_longest_match)
1198	      || (cur_state = find_recover_state (&err, mctx)) == NULL)
1199	    break;
1200	}
1201
1202      if (BE (at_init_state, 0))
1203	{
1204	  if (old_state == cur_state)
1205	    next_start_idx = next_char_idx;
1206	  else
1207	    at_init_state = false;
1208	}
1209
1210      if (cur_state->halt)
1211	{
1212	  /* Reached a halt state.
1213	     Check the halt state can satisfy the current context.  */
1214	  if (!cur_state->has_constraint
1215	      || check_halt_state_context (mctx, cur_state,
1216					   re_string_cur_idx (&mctx->input)))
1217	    {
1218	      /* We found an appropriate halt state.  */
1219	      match_last = re_string_cur_idx (&mctx->input);
1220	      match = 1;
1221
1222	      /* We found a match, do not modify match_first below.  */
1223	      p_match_first = NULL;
1224	      if (!fl_longest_match)
1225		break;
1226	    }
1227	}
1228    }
1229
1230  if (p_match_first)
1231    *p_match_first += next_start_idx;
1232
1233  return match_last;
1234}
1235
1236/* Check NODE match the current context.  */
1237
1238static bool
1239internal_function
1240check_halt_node_context (const re_dfa_t *dfa, Idx node, unsigned int context)
1241{
1242  re_token_type_t type = dfa->nodes[node].type;
1243  unsigned int constraint = dfa->nodes[node].constraint;
1244  if (type != END_OF_RE)
1245    return false;
1246  if (!constraint)
1247    return true;
1248  if (NOT_SATISFY_NEXT_CONSTRAINT (constraint, context))
1249    return false;
1250  return true;
1251}
1252
1253/* Check the halt state STATE match the current context.
1254   Return 0 if not match, if the node, STATE has, is a halt node and
1255   match the context, return the node.  */
1256
1257static Idx
1258internal_function
1259check_halt_state_context (const re_match_context_t *mctx,
1260			  const re_dfastate_t *state, Idx idx)
1261{
1262  Idx i;
1263  unsigned int context;
1264#ifdef DEBUG
1265  assert (state->halt);
1266#endif
1267  context = re_string_context_at (&mctx->input, idx, mctx->eflags);
1268  for (i = 0; i < state->nodes.nelem; ++i)
1269    if (check_halt_node_context (mctx->dfa, state->nodes.elems[i], context))
1270      return state->nodes.elems[i];
1271  return 0;
1272}
1273
1274/* Compute the next node to which "NFA" transit from NODE("NFA" is a NFA
1275   corresponding to the DFA).
1276   Return the destination node, and update EPS_VIA_NODES;
1277   return REG_MISSING in case of errors.  */
1278
1279static Idx
1280internal_function
1281proceed_next_node (const re_match_context_t *mctx, Idx nregs, regmatch_t *regs,
1282		   Idx *pidx, Idx node, re_node_set *eps_via_nodes,
1283		   struct re_fail_stack_t *fs)
1284{
1285  const re_dfa_t *const dfa = mctx->dfa;
1286  Idx i;
1287  bool ok;
1288  if (IS_EPSILON_NODE (dfa->nodes[node].type))
1289    {
1290      re_node_set *cur_nodes = &mctx->state_log[*pidx]->nodes;
1291      re_node_set *edests = &dfa->edests[node];
1292      Idx dest_node;
1293      ok = re_node_set_insert (eps_via_nodes, node);
1294      if (BE (! ok, 0))
1295	return REG_ERROR;
1296      /* Pick up a valid destination, or return REG_MISSING if none
1297	 is found.  */
1298      for (dest_node = REG_MISSING, i = 0; i < edests->nelem; ++i)
1299	{
1300	  Idx candidate = edests->elems[i];
1301	  if (!re_node_set_contains (cur_nodes, candidate))
1302	    continue;
1303          if (dest_node == REG_MISSING)
1304	    dest_node = candidate;
1305
1306          else
1307	    {
1308	      /* In order to avoid infinite loop like "(a*)*", return the second
1309	         epsilon-transition if the first was already considered.  */
1310	      if (re_node_set_contains (eps_via_nodes, dest_node))
1311	        return candidate;
1312
1313	      /* Otherwise, push the second epsilon-transition on the fail stack.  */
1314	      else if (fs != NULL
1315		       && push_fail_stack (fs, *pidx, candidate, nregs, regs,
1316				           eps_via_nodes))
1317		return REG_ERROR;
1318
1319	      /* We know we are going to exit.  */
1320	      break;
1321	    }
1322	}
1323      return dest_node;
1324    }
1325  else
1326    {
1327      Idx naccepted = 0;
1328      re_token_type_t type = dfa->nodes[node].type;
1329
1330#ifdef RE_ENABLE_I18N
1331      if (dfa->nodes[node].accept_mb)
1332	naccepted = check_node_accept_bytes (dfa, node, &mctx->input, *pidx);
1333      else
1334#endif /* RE_ENABLE_I18N */
1335      if (type == OP_BACK_REF)
1336	{
1337	  Idx subexp_idx = dfa->nodes[node].opr.idx + 1;
1338	  naccepted = regs[subexp_idx].rm_eo - regs[subexp_idx].rm_so;
1339	  if (fs != NULL)
1340	    {
1341	      if (regs[subexp_idx].rm_so == -1 || regs[subexp_idx].rm_eo == -1)
1342		return REG_MISSING;
1343	      else if (naccepted)
1344		{
1345		  char *buf = (char *) re_string_get_buffer (&mctx->input);
1346		  if (memcmp (buf + regs[subexp_idx].rm_so, buf + *pidx,
1347			      naccepted) != 0)
1348		    return REG_MISSING;
1349		}
1350	    }
1351
1352	  if (naccepted == 0)
1353	    {
1354	      Idx dest_node;
1355	      ok = re_node_set_insert (eps_via_nodes, node);
1356	      if (BE (! ok, 0))
1357		return REG_ERROR;
1358	      dest_node = dfa->edests[node].elems[0];
1359	      if (re_node_set_contains (&mctx->state_log[*pidx]->nodes,
1360					dest_node))
1361		return dest_node;
1362	    }
1363	}
1364
1365      if (naccepted != 0
1366	  || check_node_accept (mctx, dfa->nodes + node, *pidx))
1367	{
1368	  Idx dest_node = dfa->nexts[node];
1369	  *pidx = (naccepted == 0) ? *pidx + 1 : *pidx + naccepted;
1370	  if (fs && (*pidx > mctx->match_last || mctx->state_log[*pidx] == NULL
1371		     || !re_node_set_contains (&mctx->state_log[*pidx]->nodes,
1372					       dest_node)))
1373	    return REG_MISSING;
1374	  re_node_set_empty (eps_via_nodes);
1375	  return dest_node;
1376	}
1377    }
1378  return REG_MISSING;
1379}
1380
1381static reg_errcode_t
1382internal_function
1383push_fail_stack (struct re_fail_stack_t *fs, Idx str_idx, Idx dest_node,
1384		 Idx nregs, regmatch_t *regs, re_node_set *eps_via_nodes)
1385{
1386  reg_errcode_t err;
1387  Idx num = fs->num++;
1388  if (fs->num == fs->alloc)
1389    {
1390      struct re_fail_stack_ent_t *new_array;
1391      new_array = realloc (fs->stack, (sizeof (struct re_fail_stack_ent_t)
1392				       * fs->alloc * 2));
1393      if (new_array == NULL)
1394	return REG_ESPACE;
1395      fs->alloc *= 2;
1396      fs->stack = new_array;
1397    }
1398  fs->stack[num].idx = str_idx;
1399  fs->stack[num].node = dest_node;
1400  fs->stack[num].regs = re_malloc (regmatch_t, nregs);
1401  if (fs->stack[num].regs == NULL)
1402    return REG_ESPACE;
1403  memcpy (fs->stack[num].regs, regs, sizeof (regmatch_t) * nregs);
1404  err = re_node_set_init_copy (&fs->stack[num].eps_via_nodes, eps_via_nodes);
1405  return err;
1406}
1407
1408static Idx
1409internal_function
1410pop_fail_stack (struct re_fail_stack_t *fs, Idx *pidx, Idx nregs,
1411		regmatch_t *regs, re_node_set *eps_via_nodes)
1412{
1413  Idx num = --fs->num;
1414  assert (REG_VALID_INDEX (num));
1415  *pidx = fs->stack[num].idx;
1416  memcpy (regs, fs->stack[num].regs, sizeof (regmatch_t) * nregs);
1417  re_node_set_free (eps_via_nodes);
1418  re_free (fs->stack[num].regs);
1419  *eps_via_nodes = fs->stack[num].eps_via_nodes;
1420  return fs->stack[num].node;
1421}
1422
1423/* Set the positions where the subexpressions are starts/ends to registers
1424   PMATCH.
1425   Note: We assume that pmatch[0] is already set, and
1426   pmatch[i].rm_so == pmatch[i].rm_eo == -1 for 0 < i < nmatch.  */
1427
1428static reg_errcode_t
1429internal_function
1430set_regs (const regex_t *preg, const re_match_context_t *mctx, size_t nmatch,
1431	  regmatch_t *pmatch, bool fl_backtrack)
1432{
1433  const re_dfa_t *dfa = (const re_dfa_t *) preg->buffer;
1434  Idx idx, cur_node;
1435  re_node_set eps_via_nodes;
1436  struct re_fail_stack_t *fs;
1437  struct re_fail_stack_t fs_body = { 0, 2, NULL };
1438  regmatch_t *prev_idx_match;
1439  bool prev_idx_match_malloced = false;
1440
1441#ifdef DEBUG
1442  assert (nmatch > 1);
1443  assert (mctx->state_log != NULL);
1444#endif
1445  if (fl_backtrack)
1446    {
1447      fs = &fs_body;
1448      fs->stack = re_malloc (struct re_fail_stack_ent_t, fs->alloc);
1449      if (fs->stack == NULL)
1450	return REG_ESPACE;
1451    }
1452  else
1453    fs = NULL;
1454
1455  cur_node = dfa->init_node;
1456  re_node_set_init_empty (&eps_via_nodes);
1457
1458  if (__libc_use_alloca (nmatch * sizeof (regmatch_t)))
1459    prev_idx_match = (regmatch_t *) alloca (nmatch * sizeof (regmatch_t));
1460  else
1461    {
1462      prev_idx_match = re_malloc (regmatch_t, nmatch);
1463      if (prev_idx_match == NULL)
1464	{
1465	  free_fail_stack_return (fs);
1466	  return REG_ESPACE;
1467	}
1468      prev_idx_match_malloced = true;
1469    }
1470  memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch);
1471
1472  for (idx = pmatch[0].rm_so; idx <= pmatch[0].rm_eo ;)
1473    {
1474      update_regs (dfa, pmatch, prev_idx_match, cur_node, idx, nmatch);
1475
1476      if (idx == pmatch[0].rm_eo && cur_node == mctx->last_node)
1477	{
1478	  Idx reg_idx;
1479	  if (fs)
1480	    {
1481	      for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
1482		if (pmatch[reg_idx].rm_so > -1 && pmatch[reg_idx].rm_eo == -1)
1483		  break;
1484	      if (reg_idx == nmatch)
1485		{
1486		  re_node_set_free (&eps_via_nodes);
1487		  if (prev_idx_match_malloced)
1488		    re_free (prev_idx_match);
1489		  return free_fail_stack_return (fs);
1490		}
1491	      cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,
1492					 &eps_via_nodes);
1493	    }
1494	  else
1495	    {
1496	      re_node_set_free (&eps_via_nodes);
1497	      if (prev_idx_match_malloced)
1498		re_free (prev_idx_match);
1499	      return REG_NOERROR;
1500	    }
1501	}
1502
1503      /* Proceed to next node.  */
1504      cur_node = proceed_next_node (mctx, nmatch, pmatch, &idx, cur_node,
1505				    &eps_via_nodes, fs);
1506
1507      if (BE (! REG_VALID_INDEX (cur_node), 0))
1508	{
1509	  if (BE (cur_node == REG_ERROR, 0))
1510	    {
1511	      re_node_set_free (&eps_via_nodes);
1512	      if (prev_idx_match_malloced)
1513		re_free (prev_idx_match);
1514	      free_fail_stack_return (fs);
1515	      return REG_ESPACE;
1516	    }
1517	  if (fs)
1518	    cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,
1519				       &eps_via_nodes);
1520	  else
1521	    {
1522	      re_node_set_free (&eps_via_nodes);
1523	      if (prev_idx_match_malloced)
1524		re_free (prev_idx_match);
1525	      return REG_NOMATCH;
1526	    }
1527	}
1528    }
1529  re_node_set_free (&eps_via_nodes);
1530  if (prev_idx_match_malloced)
1531    re_free (prev_idx_match);
1532  return free_fail_stack_return (fs);
1533}
1534
1535static reg_errcode_t
1536internal_function
1537free_fail_stack_return (struct re_fail_stack_t *fs)
1538{
1539  if (fs)
1540    {
1541      Idx fs_idx;
1542      for (fs_idx = 0; fs_idx < fs->num; ++fs_idx)
1543	{
1544	  re_node_set_free (&fs->stack[fs_idx].eps_via_nodes);
1545	  re_free (fs->stack[fs_idx].regs);
1546	}
1547      re_free (fs->stack);
1548    }
1549  return REG_NOERROR;
1550}
1551
1552static void
1553internal_function
1554update_regs (const re_dfa_t *dfa, regmatch_t *pmatch,
1555	     regmatch_t *prev_idx_match, Idx cur_node, Idx cur_idx, Idx nmatch)
1556{
1557  int type = dfa->nodes[cur_node].type;
1558  if (type == OP_OPEN_SUBEXP)
1559    {
1560      Idx reg_num = dfa->nodes[cur_node].opr.idx + 1;
1561
1562      /* We are at the first node of this sub expression.  */
1563      if (reg_num < nmatch)
1564	{
1565	  pmatch[reg_num].rm_so = cur_idx;
1566	  pmatch[reg_num].rm_eo = -1;
1567	}
1568    }
1569  else if (type == OP_CLOSE_SUBEXP)
1570    {
1571      Idx reg_num = dfa->nodes[cur_node].opr.idx + 1;
1572      if (reg_num < nmatch)
1573	{
1574	  /* We are at the last node of this sub expression.  */
1575	  if (pmatch[reg_num].rm_so < cur_idx)
1576	    {
1577	      pmatch[reg_num].rm_eo = cur_idx;
1578	      /* This is a non-empty match or we are not inside an optional
1579		 subexpression.  Accept this right away.  */
1580	      memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch);
1581	    }
1582	  else
1583	    {
1584	      if (dfa->nodes[cur_node].opt_subexp
1585		  && prev_idx_match[reg_num].rm_so != -1)
1586		/* We transited through an empty match for an optional
1587		   subexpression, like (a?)*, and this is not the subexp's
1588		   first match.  Copy back the old content of the registers
1589		   so that matches of an inner subexpression are undone as
1590		   well, like in ((a?))*.  */
1591		memcpy (pmatch, prev_idx_match, sizeof (regmatch_t) * nmatch);
1592	      else
1593		/* We completed a subexpression, but it may be part of
1594		   an optional one, so do not update PREV_IDX_MATCH.  */
1595		pmatch[reg_num].rm_eo = cur_idx;
1596	    }
1597	}
1598    }
1599}
1600
1601/* This function checks the STATE_LOG from the SCTX->last_str_idx to 0
1602   and sift the nodes in each states according to the following rules.
1603   Updated state_log will be wrote to STATE_LOG.
1604
1605   Rules: We throw away the Node `a' in the STATE_LOG[STR_IDX] if...
1606     1. When STR_IDX == MATCH_LAST(the last index in the state_log):
1607	If `a' isn't the LAST_NODE and `a' can't epsilon transit to
1608	the LAST_NODE, we throw away the node `a'.
1609     2. When 0 <= STR_IDX < MATCH_LAST and `a' accepts
1610	string `s' and transit to `b':
1611	i. If 'b' isn't in the STATE_LOG[STR_IDX+strlen('s')], we throw
1612	   away the node `a'.
1613	ii. If 'b' is in the STATE_LOG[STR_IDX+strlen('s')] but 'b' is
1614	    thrown away, we throw away the node `a'.
1615     3. When 0 <= STR_IDX < MATCH_LAST and 'a' epsilon transit to 'b':
1616	i. If 'b' isn't in the STATE_LOG[STR_IDX], we throw away the
1617	   node `a'.
1618	ii. If 'b' is in the STATE_LOG[STR_IDX] but 'b' is thrown away,
1619	    we throw away the node `a'.  */
1620
1621#define STATE_NODE_CONTAINS(state,node) \
1622  ((state) != NULL && re_node_set_contains (&(state)->nodes, node))
1623
1624static reg_errcode_t
1625internal_function
1626sift_states_backward (const re_match_context_t *mctx, re_sift_context_t *sctx)
1627{
1628  reg_errcode_t err;
1629  int null_cnt = 0;
1630  Idx str_idx = sctx->last_str_idx;
1631  re_node_set cur_dest;
1632
1633#ifdef DEBUG
1634  assert (mctx->state_log != NULL && mctx->state_log[str_idx] != NULL);
1635#endif
1636
1637  /* Build sifted state_log[str_idx].  It has the nodes which can epsilon
1638     transit to the last_node and the last_node itself.  */
1639  err = re_node_set_init_1 (&cur_dest, sctx->last_node);
1640  if (BE (err != REG_NOERROR, 0))
1641    return err;
1642  err = update_cur_sifted_state (mctx, sctx, str_idx, &cur_dest);
1643  if (BE (err != REG_NOERROR, 0))
1644    goto free_return;
1645
1646  /* Then check each states in the state_log.  */
1647  while (str_idx > 0)
1648    {
1649      /* Update counters.  */
1650      null_cnt = (sctx->sifted_states[str_idx] == NULL) ? null_cnt + 1 : 0;
1651      if (null_cnt > mctx->max_mb_elem_len)
1652	{
1653	  memset (sctx->sifted_states, '\0',
1654		  sizeof (re_dfastate_t *) * str_idx);
1655	  re_node_set_free (&cur_dest);
1656	  return REG_NOERROR;
1657	}
1658      re_node_set_empty (&cur_dest);
1659      --str_idx;
1660
1661      if (mctx->state_log[str_idx])
1662	{
1663	  err = build_sifted_states (mctx, sctx, str_idx, &cur_dest);
1664          if (BE (err != REG_NOERROR, 0))
1665	    goto free_return;
1666	}
1667
1668      /* Add all the nodes which satisfy the following conditions:
1669	 - It can epsilon transit to a node in CUR_DEST.
1670	 - It is in CUR_SRC.
1671	 And update state_log.  */
1672      err = update_cur_sifted_state (mctx, sctx, str_idx, &cur_dest);
1673      if (BE (err != REG_NOERROR, 0))
1674	goto free_return;
1675    }
1676  err = REG_NOERROR;
1677 free_return:
1678  re_node_set_free (&cur_dest);
1679  return err;
1680}
1681
1682static reg_errcode_t
1683internal_function
1684build_sifted_states (const re_match_context_t *mctx, re_sift_context_t *sctx,
1685		     Idx str_idx, re_node_set *cur_dest)
1686{
1687  const re_dfa_t *const dfa = mctx->dfa;
1688  const re_node_set *cur_src = &mctx->state_log[str_idx]->non_eps_nodes;
1689  Idx i;
1690
1691  /* Then build the next sifted state.
1692     We build the next sifted state on `cur_dest', and update
1693     `sifted_states[str_idx]' with `cur_dest'.
1694     Note:
1695     `cur_dest' is the sifted state from `state_log[str_idx + 1]'.
1696     `cur_src' points the node_set of the old `state_log[str_idx]'
1697     (with the epsilon nodes pre-filtered out).  */
1698  for (i = 0; i < cur_src->nelem; i++)
1699    {
1700      Idx prev_node = cur_src->elems[i];
1701      int naccepted = 0;
1702      bool ok;
1703
1704#ifdef DEBUG
1705      re_token_type_t type = dfa->nodes[prev_node].type;
1706      assert (!IS_EPSILON_NODE (type));
1707#endif
1708#ifdef RE_ENABLE_I18N
1709      /* If the node may accept `multi byte'.  */
1710      if (dfa->nodes[prev_node].accept_mb)
1711	naccepted = sift_states_iter_mb (mctx, sctx, prev_node,
1712					 str_idx, sctx->last_str_idx);
1713#endif /* RE_ENABLE_I18N */
1714
1715      /* We don't check backreferences here.
1716	 See update_cur_sifted_state().  */
1717      if (!naccepted
1718	  && check_node_accept (mctx, dfa->nodes + prev_node, str_idx)
1719	  && STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + 1],
1720				  dfa->nexts[prev_node]))
1721	naccepted = 1;
1722
1723      if (naccepted == 0)
1724	continue;
1725
1726      if (sctx->limits.nelem)
1727	{
1728	  Idx to_idx = str_idx + naccepted;
1729	  if (check_dst_limits (mctx, &sctx->limits,
1730				dfa->nexts[prev_node], to_idx,
1731				prev_node, str_idx))
1732	    continue;
1733	}
1734      ok = re_node_set_insert (cur_dest, prev_node);
1735      if (BE (! ok, 0))
1736	return REG_ESPACE;
1737    }
1738
1739  return REG_NOERROR;
1740}
1741
1742/* Helper functions.  */
1743
1744static reg_errcode_t
1745internal_function
1746clean_state_log_if_needed (re_match_context_t *mctx, Idx next_state_log_idx)
1747{
1748  Idx top = mctx->state_log_top;
1749
1750  if (next_state_log_idx >= mctx->input.bufs_len
1751      || (next_state_log_idx >= mctx->input.valid_len
1752	  && mctx->input.valid_len < mctx->input.len))
1753    {
1754      reg_errcode_t err;
1755      err = extend_buffers (mctx);
1756      if (BE (err != REG_NOERROR, 0))
1757	return err;
1758    }
1759
1760  if (top < next_state_log_idx)
1761    {
1762      memset (mctx->state_log + top + 1, '\0',
1763	      sizeof (re_dfastate_t *) * (next_state_log_idx - top));
1764      mctx->state_log_top = next_state_log_idx;
1765    }
1766  return REG_NOERROR;
1767}
1768
1769static reg_errcode_t
1770internal_function
1771merge_state_array (const re_dfa_t *dfa, re_dfastate_t **dst,
1772		   re_dfastate_t **src, Idx num)
1773{
1774  Idx st_idx;
1775  reg_errcode_t err;
1776  for (st_idx = 0; st_idx < num; ++st_idx)
1777    {
1778      if (dst[st_idx] == NULL)
1779	dst[st_idx] = src[st_idx];
1780      else if (src[st_idx] != NULL)
1781	{
1782	  re_node_set merged_set;
1783	  err = re_node_set_init_union (&merged_set, &dst[st_idx]->nodes,
1784					&src[st_idx]->nodes);
1785	  if (BE (err != REG_NOERROR, 0))
1786	    return err;
1787	  dst[st_idx] = re_acquire_state (&err, dfa, &merged_set);
1788	  re_node_set_free (&merged_set);
1789	  if (BE (err != REG_NOERROR, 0))
1790	    return err;
1791	}
1792    }
1793  return REG_NOERROR;
1794}
1795
1796static reg_errcode_t
1797internal_function
1798update_cur_sifted_state (const re_match_context_t *mctx,
1799			 re_sift_context_t *sctx, Idx str_idx,
1800			 re_node_set *dest_nodes)
1801{
1802  const re_dfa_t *const dfa = mctx->dfa;
1803  reg_errcode_t err = REG_NOERROR;
1804  const re_node_set *candidates;
1805  candidates = ((mctx->state_log[str_idx] == NULL) ? NULL
1806		: &mctx->state_log[str_idx]->nodes);
1807
1808  if (dest_nodes->nelem == 0)
1809    sctx->sifted_states[str_idx] = NULL;
1810  else
1811    {
1812      if (candidates)
1813	{
1814	  /* At first, add the nodes which can epsilon transit to a node in
1815	     DEST_NODE.  */
1816	  err = add_epsilon_src_nodes (dfa, dest_nodes, candidates);
1817	  if (BE (err != REG_NOERROR, 0))
1818	    return err;
1819
1820	  /* Then, check the limitations in the current sift_context.  */
1821	  if (sctx->limits.nelem)
1822	    {
1823	      err = check_subexp_limits (dfa, dest_nodes, candidates, &sctx->limits,
1824					 mctx->bkref_ents, str_idx);
1825	      if (BE (err != REG_NOERROR, 0))
1826		return err;
1827	    }
1828	}
1829
1830      sctx->sifted_states[str_idx] = re_acquire_state (&err, dfa, dest_nodes);
1831      if (BE (err != REG_NOERROR, 0))
1832	return err;
1833    }
1834
1835  if (candidates && mctx->state_log[str_idx]->has_backref)
1836    {
1837      err = sift_states_bkref (mctx, sctx, str_idx, candidates);
1838      if (BE (err != REG_NOERROR, 0))
1839	return err;
1840    }
1841  return REG_NOERROR;
1842}
1843
1844static reg_errcode_t
1845internal_function
1846add_epsilon_src_nodes (const re_dfa_t *dfa, re_node_set *dest_nodes,
1847		       const re_node_set *candidates)
1848{
1849  reg_errcode_t err = REG_NOERROR;
1850  Idx i;
1851
1852  re_dfastate_t *state = re_acquire_state (&err, dfa, dest_nodes);
1853  if (BE (err != REG_NOERROR, 0))
1854    return err;
1855
1856  if (!state->inveclosure.alloc)
1857    {
1858      err = re_node_set_alloc (&state->inveclosure, dest_nodes->nelem);
1859      if (BE (err != REG_NOERROR, 0))
1860        return REG_ESPACE;
1861      for (i = 0; i < dest_nodes->nelem; i++)
1862        re_node_set_merge (&state->inveclosure,
1863			   dfa->inveclosures + dest_nodes->elems[i]);
1864    }
1865  return re_node_set_add_intersect (dest_nodes, candidates,
1866				    &state->inveclosure);
1867}
1868
1869static reg_errcode_t
1870internal_function
1871sub_epsilon_src_nodes (const re_dfa_t *dfa, Idx node, re_node_set *dest_nodes,
1872		       const re_node_set *candidates)
1873{
1874    Idx ecl_idx;
1875    reg_errcode_t err;
1876    re_node_set *inv_eclosure = dfa->inveclosures + node;
1877    re_node_set except_nodes;
1878    re_node_set_init_empty (&except_nodes);
1879    for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx)
1880      {
1881	Idx cur_node = inv_eclosure->elems[ecl_idx];
1882	if (cur_node == node)
1883	  continue;
1884	if (IS_EPSILON_NODE (dfa->nodes[cur_node].type))
1885	  {
1886	    Idx edst1 = dfa->edests[cur_node].elems[0];
1887	    Idx edst2 = ((dfa->edests[cur_node].nelem > 1)
1888			 ? dfa->edests[cur_node].elems[1] : REG_MISSING);
1889	    if ((!re_node_set_contains (inv_eclosure, edst1)
1890		 && re_node_set_contains (dest_nodes, edst1))
1891		|| (REG_VALID_NONZERO_INDEX (edst2)
1892		    && !re_node_set_contains (inv_eclosure, edst2)
1893		    && re_node_set_contains (dest_nodes, edst2)))
1894	      {
1895		err = re_node_set_add_intersect (&except_nodes, candidates,
1896						 dfa->inveclosures + cur_node);
1897		if (BE (err != REG_NOERROR, 0))
1898		  {
1899		    re_node_set_free (&except_nodes);
1900		    return err;
1901		  }
1902	      }
1903	  }
1904      }
1905    for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx)
1906      {
1907	Idx cur_node = inv_eclosure->elems[ecl_idx];
1908	if (!re_node_set_contains (&except_nodes, cur_node))
1909	  {
1910	    Idx idx = re_node_set_contains (dest_nodes, cur_node) - 1;
1911	    re_node_set_remove_at (dest_nodes, idx);
1912	  }
1913      }
1914    re_node_set_free (&except_nodes);
1915    return REG_NOERROR;
1916}
1917
1918static bool
1919internal_function
1920check_dst_limits (const re_match_context_t *mctx, const re_node_set *limits,
1921		  Idx dst_node, Idx dst_idx, Idx src_node, Idx src_idx)
1922{
1923  const re_dfa_t *const dfa = mctx->dfa;
1924  Idx lim_idx, src_pos, dst_pos;
1925
1926  Idx dst_bkref_idx = search_cur_bkref_entry (mctx, dst_idx);
1927  Idx src_bkref_idx = search_cur_bkref_entry (mctx, src_idx);
1928  for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx)
1929    {
1930      Idx subexp_idx;
1931      struct re_backref_cache_entry *ent;
1932      ent = mctx->bkref_ents + limits->elems[lim_idx];
1933      subexp_idx = dfa->nodes[ent->node].opr.idx;
1934
1935      dst_pos = check_dst_limits_calc_pos (mctx, limits->elems[lim_idx],
1936					   subexp_idx, dst_node, dst_idx,
1937					   dst_bkref_idx);
1938      src_pos = check_dst_limits_calc_pos (mctx, limits->elems[lim_idx],
1939					   subexp_idx, src_node, src_idx,
1940					   src_bkref_idx);
1941
1942      /* In case of:
1943	 <src> <dst> ( <subexp> )
1944	 ( <subexp> ) <src> <dst>
1945	 ( <subexp1> <src> <subexp2> <dst> <subexp3> )  */
1946      if (src_pos == dst_pos)
1947	continue; /* This is unrelated limitation.  */
1948      else
1949	return true;
1950    }
1951  return false;
1952}
1953
1954static int
1955internal_function
1956check_dst_limits_calc_pos_1 (const re_match_context_t *mctx, int boundaries,
1957			     Idx subexp_idx, Idx from_node, Idx bkref_idx)
1958{
1959  const re_dfa_t *const dfa = mctx->dfa;
1960  const re_node_set *eclosures = dfa->eclosures + from_node;
1961  Idx node_idx;
1962
1963  /* Else, we are on the boundary: examine the nodes on the epsilon
1964     closure.  */
1965  for (node_idx = 0; node_idx < eclosures->nelem; ++node_idx)
1966    {
1967      Idx node = eclosures->elems[node_idx];
1968      switch (dfa->nodes[node].type)
1969	{
1970	case OP_BACK_REF:
1971	  if (bkref_idx != REG_MISSING)
1972	    {
1973	      struct re_backref_cache_entry *ent = mctx->bkref_ents + bkref_idx;
1974	      do
1975	        {
1976		  Idx dst;
1977		  int cpos;
1978
1979		  if (ent->node != node)
1980		    continue;
1981
1982		  if (subexp_idx < BITSET_WORD_BITS
1983		      && !(ent->eps_reachable_subexps_map
1984			   & ((bitset_word_t) 1 << subexp_idx)))
1985		    continue;
1986
1987		  /* Recurse trying to reach the OP_OPEN_SUBEXP and
1988		     OP_CLOSE_SUBEXP cases below.  But, if the
1989		     destination node is the same node as the source
1990		     node, don't recurse because it would cause an
1991		     infinite loop: a regex that exhibits this behavior
1992		     is ()\1*\1*  */
1993		  dst = dfa->edests[node].elems[0];
1994		  if (dst == from_node)
1995		    {
1996		      if (boundaries & 1)
1997		        return -1;
1998		      else /* if (boundaries & 2) */
1999		        return 0;
2000		    }
2001
2002		  cpos =
2003		    check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx,
2004						 dst, bkref_idx);
2005		  if (cpos == -1 /* && (boundaries & 1) */)
2006		    return -1;
2007		  if (cpos == 0 && (boundaries & 2))
2008		    return 0;
2009
2010		  if (subexp_idx < BITSET_WORD_BITS)
2011		    ent->eps_reachable_subexps_map
2012		      &= ~((bitset_word_t) 1 << subexp_idx);
2013	        }
2014	      while (ent++->more);
2015	    }
2016	  break;
2017
2018	case OP_OPEN_SUBEXP:
2019	  if ((boundaries & 1) && subexp_idx == dfa->nodes[node].opr.idx)
2020	    return -1;
2021	  break;
2022
2023	case OP_CLOSE_SUBEXP:
2024	  if ((boundaries & 2) && subexp_idx == dfa->nodes[node].opr.idx)
2025	    return 0;
2026	  break;
2027
2028	default:
2029	    break;
2030	}
2031    }
2032
2033  return (boundaries & 2) ? 1 : 0;
2034}
2035
2036static int
2037internal_function
2038check_dst_limits_calc_pos (const re_match_context_t *mctx, Idx limit,
2039			   Idx subexp_idx, Idx from_node, Idx str_idx,
2040			   Idx bkref_idx)
2041{
2042  struct re_backref_cache_entry *lim = mctx->bkref_ents + limit;
2043  int boundaries;
2044
2045  /* If we are outside the range of the subexpression, return -1 or 1.  */
2046  if (str_idx < lim->subexp_from)
2047    return -1;
2048
2049  if (lim->subexp_to < str_idx)
2050    return 1;
2051
2052  /* If we are within the subexpression, return 0.  */
2053  boundaries = (str_idx == lim->subexp_from);
2054  boundaries |= (str_idx == lim->subexp_to) << 1;
2055  if (boundaries == 0)
2056    return 0;
2057
2058  /* Else, examine epsilon closure.  */
2059  return check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx,
2060				      from_node, bkref_idx);
2061}
2062
2063/* Check the limitations of sub expressions LIMITS, and remove the nodes
2064   which are against limitations from DEST_NODES. */
2065
2066static reg_errcode_t
2067internal_function
2068check_subexp_limits (const re_dfa_t *dfa, re_node_set *dest_nodes,
2069		     const re_node_set *candidates, re_node_set *limits,
2070		     struct re_backref_cache_entry *bkref_ents, Idx str_idx)
2071{
2072  reg_errcode_t err;
2073  Idx node_idx, lim_idx;
2074
2075  for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx)
2076    {
2077      Idx subexp_idx;
2078      struct re_backref_cache_entry *ent;
2079      ent = bkref_ents + limits->elems[lim_idx];
2080
2081      if (str_idx <= ent->subexp_from || ent->str_idx < str_idx)
2082	continue; /* This is unrelated limitation.  */
2083
2084      subexp_idx = dfa->nodes[ent->node].opr.idx;
2085      if (ent->subexp_to == str_idx)
2086	{
2087	  Idx ops_node = REG_MISSING;
2088	  Idx cls_node = REG_MISSING;
2089	  for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
2090	    {
2091	      Idx node = dest_nodes->elems[node_idx];
2092	      re_token_type_t type = dfa->nodes[node].type;
2093	      if (type == OP_OPEN_SUBEXP
2094		  && subexp_idx == dfa->nodes[node].opr.idx)
2095		ops_node = node;
2096	      else if (type == OP_CLOSE_SUBEXP
2097		       && subexp_idx == dfa->nodes[node].opr.idx)
2098		cls_node = node;
2099	    }
2100
2101	  /* Check the limitation of the open subexpression.  */
2102	  /* Note that (ent->subexp_to = str_idx != ent->subexp_from).  */
2103	  if (REG_VALID_INDEX (ops_node))
2104	    {
2105	      err = sub_epsilon_src_nodes (dfa, ops_node, dest_nodes,
2106					   candidates);
2107	      if (BE (err != REG_NOERROR, 0))
2108		return err;
2109	    }
2110
2111	  /* Check the limitation of the close subexpression.  */
2112	  if (REG_VALID_INDEX (cls_node))
2113	    for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
2114	      {
2115		Idx node = dest_nodes->elems[node_idx];
2116		if (!re_node_set_contains (dfa->inveclosures + node,
2117					   cls_node)
2118		    && !re_node_set_contains (dfa->eclosures + node,
2119					      cls_node))
2120		  {
2121		    /* It is against this limitation.
2122		       Remove it form the current sifted state.  */
2123		    err = sub_epsilon_src_nodes (dfa, node, dest_nodes,
2124						 candidates);
2125		    if (BE (err != REG_NOERROR, 0))
2126		      return err;
2127		    --node_idx;
2128		  }
2129	      }
2130	}
2131      else /* (ent->subexp_to != str_idx)  */
2132	{
2133	  for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
2134	    {
2135	      Idx node = dest_nodes->elems[node_idx];
2136	      re_token_type_t type = dfa->nodes[node].type;
2137	      if (type == OP_CLOSE_SUBEXP || type == OP_OPEN_SUBEXP)
2138		{
2139		  if (subexp_idx != dfa->nodes[node].opr.idx)
2140		    continue;
2141		  /* It is against this limitation.
2142		     Remove it form the current sifted state.  */
2143		  err = sub_epsilon_src_nodes (dfa, node, dest_nodes,
2144					       candidates);
2145		  if (BE (err != REG_NOERROR, 0))
2146		    return err;
2147		}
2148	    }
2149	}
2150    }
2151  return REG_NOERROR;
2152}
2153
2154static reg_errcode_t
2155internal_function
2156sift_states_bkref (const re_match_context_t *mctx, re_sift_context_t *sctx,
2157		   Idx str_idx, const re_node_set *candidates)
2158{
2159  const re_dfa_t *const dfa = mctx->dfa;
2160  reg_errcode_t err;
2161  Idx node_idx, node;
2162  re_sift_context_t local_sctx;
2163  Idx first_idx = search_cur_bkref_entry (mctx, str_idx);
2164
2165  if (first_idx == REG_MISSING)
2166    return REG_NOERROR;
2167
2168  local_sctx.sifted_states = NULL; /* Mark that it hasn't been initialized.  */
2169
2170  for (node_idx = 0; node_idx < candidates->nelem; ++node_idx)
2171    {
2172      Idx enabled_idx;
2173      re_token_type_t type;
2174      struct re_backref_cache_entry *entry;
2175      node = candidates->elems[node_idx];
2176      type = dfa->nodes[node].type;
2177      /* Avoid infinite loop for the REs like "()\1+".  */
2178      if (node == sctx->last_node && str_idx == sctx->last_str_idx)
2179	continue;
2180      if (type != OP_BACK_REF)
2181	continue;
2182
2183      entry = mctx->bkref_ents + first_idx;
2184      enabled_idx = first_idx;
2185      do
2186	{
2187	  Idx subexp_len;
2188	  Idx to_idx;
2189	  Idx dst_node;
2190	  bool ok;
2191	  re_dfastate_t *cur_state;
2192
2193	  if (entry->node != node)
2194	    continue;
2195	  subexp_len = entry->subexp_to - entry->subexp_from;
2196	  to_idx = str_idx + subexp_len;
2197	  dst_node = (subexp_len ? dfa->nexts[node]
2198		      : dfa->edests[node].elems[0]);
2199
2200	  if (to_idx > sctx->last_str_idx
2201	      || sctx->sifted_states[to_idx] == NULL
2202	      || !STATE_NODE_CONTAINS (sctx->sifted_states[to_idx], dst_node)
2203	      || check_dst_limits (mctx, &sctx->limits, node,
2204				   str_idx, dst_node, to_idx))
2205	    continue;
2206
2207	  if (local_sctx.sifted_states == NULL)
2208	    {
2209	      local_sctx = *sctx;
2210	      err = re_node_set_init_copy (&local_sctx.limits, &sctx->limits);
2211	      if (BE (err != REG_NOERROR, 0))
2212		goto free_return;
2213	    }
2214	  local_sctx.last_node = node;
2215	  local_sctx.last_str_idx = str_idx;
2216	  ok = re_node_set_insert (&local_sctx.limits, enabled_idx);
2217	  if (BE (! ok, 0))
2218	    {
2219	      err = REG_ESPACE;
2220	      goto free_return;
2221	    }
2222	  cur_state = local_sctx.sifted_states[str_idx];
2223	  err = sift_states_backward (mctx, &local_sctx);
2224	  if (BE (err != REG_NOERROR, 0))
2225	    goto free_return;
2226	  if (sctx->limited_states != NULL)
2227	    {
2228	      err = merge_state_array (dfa, sctx->limited_states,
2229				       local_sctx.sifted_states,
2230				       str_idx + 1);
2231	      if (BE (err != REG_NOERROR, 0))
2232		goto free_return;
2233	    }
2234	  local_sctx.sifted_states[str_idx] = cur_state;
2235	  re_node_set_remove (&local_sctx.limits, enabled_idx);
2236
2237	  /* mctx->bkref_ents may have changed, reload the pointer.  */
2238          entry = mctx->bkref_ents + enabled_idx;
2239	}
2240      while (enabled_idx++, entry++->more);
2241    }
2242  err = REG_NOERROR;
2243 free_return:
2244  if (local_sctx.sifted_states != NULL)
2245    {
2246      re_node_set_free (&local_sctx.limits);
2247    }
2248
2249  return err;
2250}
2251
2252
2253#ifdef RE_ENABLE_I18N
2254static int
2255internal_function
2256sift_states_iter_mb (const re_match_context_t *mctx, re_sift_context_t *sctx,
2257		     Idx node_idx, Idx str_idx, Idx max_str_idx)
2258{
2259  const re_dfa_t *const dfa = mctx->dfa;
2260  int naccepted;
2261  /* Check the node can accept `multi byte'.  */
2262  naccepted = check_node_accept_bytes (dfa, node_idx, &mctx->input, str_idx);
2263  if (naccepted > 0 && str_idx + naccepted <= max_str_idx &&
2264      !STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + naccepted],
2265			    dfa->nexts[node_idx]))
2266    /* The node can't accept the `multi byte', or the
2267       destination was already thrown away, then the node
2268       could't accept the current input `multi byte'.   */
2269    naccepted = 0;
2270  /* Otherwise, it is sure that the node could accept
2271     `naccepted' bytes input.  */
2272  return naccepted;
2273}
2274#endif /* RE_ENABLE_I18N */
2275
2276
2277/* Functions for state transition.  */
2278
2279/* Return the next state to which the current state STATE will transit by
2280   accepting the current input byte, and update STATE_LOG if necessary.
2281   If STATE can accept a multibyte char/collating element/back reference
2282   update the destination of STATE_LOG.  */
2283
2284static re_dfastate_t *
2285internal_function
2286transit_state (reg_errcode_t *err, re_match_context_t *mctx,
2287	       re_dfastate_t *state)
2288{
2289  re_dfastate_t **trtable;
2290  unsigned char ch;
2291
2292#ifdef RE_ENABLE_I18N
2293  /* If the current state can accept multibyte.  */
2294  if (BE (state->accept_mb, 0))
2295    {
2296      *err = transit_state_mb (mctx, state);
2297      if (BE (*err != REG_NOERROR, 0))
2298	return NULL;
2299    }
2300#endif /* RE_ENABLE_I18N */
2301
2302  /* Then decide the next state with the single byte.  */
2303#if 0
2304  if (0)
2305    /* don't use transition table  */
2306    return transit_state_sb (err, mctx, state);
2307#endif
2308
2309  /* Use transition table  */
2310  ch = re_string_fetch_byte (&mctx->input);
2311  for (;;)
2312    {
2313      trtable = state->trtable;
2314      if (BE (trtable != NULL, 1))
2315	return trtable[ch];
2316
2317      trtable = state->word_trtable;
2318      if (BE (trtable != NULL, 1))
2319        {
2320	  unsigned int context;
2321	  context
2322	    = re_string_context_at (&mctx->input,
2323				    re_string_cur_idx (&mctx->input) - 1,
2324				    mctx->eflags);
2325	  if (IS_WORD_CONTEXT (context))
2326	    return trtable[ch + SBC_MAX];
2327	  else
2328	    return trtable[ch];
2329	}
2330
2331      if (!build_trtable (mctx->dfa, state))
2332	{
2333	  *err = REG_ESPACE;
2334	  return NULL;
2335	}
2336
2337      /* Retry, we now have a transition table.  */
2338    }
2339}
2340
2341/* Update the state_log if we need */
2342static re_dfastate_t *
2343internal_function
2344merge_state_with_log (reg_errcode_t *err, re_match_context_t *mctx,
2345		      re_dfastate_t *next_state)
2346{
2347  const re_dfa_t *const dfa = mctx->dfa;
2348  Idx cur_idx = re_string_cur_idx (&mctx->input);
2349
2350  if (cur_idx > mctx->state_log_top)
2351    {
2352      mctx->state_log[cur_idx] = next_state;
2353      mctx->state_log_top = cur_idx;
2354    }
2355  else if (mctx->state_log[cur_idx] == 0)
2356    {
2357      mctx->state_log[cur_idx] = next_state;
2358    }
2359  else
2360    {
2361      re_dfastate_t *pstate;
2362      unsigned int context;
2363      re_node_set next_nodes, *log_nodes, *table_nodes = NULL;
2364      /* If (state_log[cur_idx] != 0), it implies that cur_idx is
2365         the destination of a multibyte char/collating element/
2366         back reference.  Then the next state is the union set of
2367         these destinations and the results of the transition table.  */
2368      pstate = mctx->state_log[cur_idx];
2369      log_nodes = pstate->entrance_nodes;
2370      if (next_state != NULL)
2371        {
2372          table_nodes = next_state->entrance_nodes;
2373          *err = re_node_set_init_union (&next_nodes, table_nodes,
2374					     log_nodes);
2375          if (BE (*err != REG_NOERROR, 0))
2376	    return NULL;
2377        }
2378      else
2379        next_nodes = *log_nodes;
2380      /* Note: We already add the nodes of the initial state,
2381	 then we don't need to add them here.  */
2382
2383      context = re_string_context_at (&mctx->input,
2384				      re_string_cur_idx (&mctx->input) - 1,
2385				      mctx->eflags);
2386      next_state = mctx->state_log[cur_idx]
2387        = re_acquire_state_context (err, dfa, &next_nodes, context);
2388      /* We don't need to check errors here, since the return value of
2389         this function is next_state and ERR is already set.  */
2390
2391      if (table_nodes != NULL)
2392        re_node_set_free (&next_nodes);
2393    }
2394
2395  if (BE (dfa->nbackref, 0) && next_state != NULL)
2396    {
2397      /* Check OP_OPEN_SUBEXP in the current state in case that we use them
2398	 later.  We must check them here, since the back references in the
2399	 next state might use them.  */
2400      *err = check_subexp_matching_top (mctx, &next_state->nodes,
2401					cur_idx);
2402      if (BE (*err != REG_NOERROR, 0))
2403	return NULL;
2404
2405      /* If the next state has back references.  */
2406      if (next_state->has_backref)
2407	{
2408	  *err = transit_state_bkref (mctx, &next_state->nodes);
2409	  if (BE (*err != REG_NOERROR, 0))
2410	    return NULL;
2411	  next_state = mctx->state_log[cur_idx];
2412	}
2413    }
2414
2415  return next_state;
2416}
2417
2418/* Skip bytes in the input that correspond to part of a
2419   multi-byte match, then look in the log for a state
2420   from which to restart matching.  */
2421static re_dfastate_t *
2422internal_function
2423find_recover_state (reg_errcode_t *err, re_match_context_t *mctx)
2424{
2425  re_dfastate_t *cur_state;
2426  do
2427    {
2428      Idx max = mctx->state_log_top;
2429      Idx cur_str_idx = re_string_cur_idx (&mctx->input);
2430
2431      do
2432	{
2433          if (++cur_str_idx > max)
2434            return NULL;
2435          re_string_skip_bytes (&mctx->input, 1);
2436	}
2437      while (mctx->state_log[cur_str_idx] == NULL);
2438
2439      cur_state = merge_state_with_log (err, mctx, NULL);
2440    }
2441  while (*err == REG_NOERROR && cur_state == NULL);
2442  return cur_state;
2443}
2444
2445/* Helper functions for transit_state.  */
2446
2447/* From the node set CUR_NODES, pick up the nodes whose types are
2448   OP_OPEN_SUBEXP and which have corresponding back references in the regular
2449   expression. And register them to use them later for evaluating the
2450   correspoding back references.  */
2451
2452static reg_errcode_t
2453internal_function
2454check_subexp_matching_top (re_match_context_t *mctx, re_node_set *cur_nodes,
2455			   Idx str_idx)
2456{
2457  const re_dfa_t *const dfa = mctx->dfa;
2458  Idx node_idx;
2459  reg_errcode_t err;
2460
2461  /* TODO: This isn't efficient.
2462	   Because there might be more than one nodes whose types are
2463	   OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all
2464	   nodes.
2465	   E.g. RE: (a){2}  */
2466  for (node_idx = 0; node_idx < cur_nodes->nelem; ++node_idx)
2467    {
2468      Idx node = cur_nodes->elems[node_idx];
2469      if (dfa->nodes[node].type == OP_OPEN_SUBEXP
2470	  && dfa->nodes[node].opr.idx < BITSET_WORD_BITS
2471	  && (dfa->used_bkref_map
2472	      & ((bitset_word_t) 1 << dfa->nodes[node].opr.idx)))
2473	{
2474	  err = match_ctx_add_subtop (mctx, node, str_idx);
2475	  if (BE (err != REG_NOERROR, 0))
2476	    return err;
2477	}
2478    }
2479  return REG_NOERROR;
2480}
2481
2482#if 0
2483/* Return the next state to which the current state STATE will transit by
2484   accepting the current input byte.  */
2485
2486static re_dfastate_t *
2487transit_state_sb (reg_errcode_t *err, re_match_context_t *mctx,
2488		  re_dfastate_t *state)
2489{
2490  const re_dfa_t *const dfa = mctx->dfa;
2491  re_node_set next_nodes;
2492  re_dfastate_t *next_state;
2493  Idx node_cnt, cur_str_idx = re_string_cur_idx (&mctx->input);
2494  unsigned int context;
2495
2496  *err = re_node_set_alloc (&next_nodes, state->nodes.nelem + 1);
2497  if (BE (*err != REG_NOERROR, 0))
2498    return NULL;
2499  for (node_cnt = 0; node_cnt < state->nodes.nelem; ++node_cnt)
2500    {
2501      Idx cur_node = state->nodes.elems[node_cnt];
2502      if (check_node_accept (mctx, dfa->nodes + cur_node, cur_str_idx))
2503	{
2504	  *err = re_node_set_merge (&next_nodes,
2505				    dfa->eclosures + dfa->nexts[cur_node]);
2506	  if (BE (*err != REG_NOERROR, 0))
2507	    {
2508	      re_node_set_free (&next_nodes);
2509	      return NULL;
2510	    }
2511	}
2512    }
2513  context = re_string_context_at (&mctx->input, cur_str_idx, mctx->eflags);
2514  next_state = re_acquire_state_context (err, dfa, &next_nodes, context);
2515  /* We don't need to check errors here, since the return value of
2516     this function is next_state and ERR is already set.  */
2517
2518  re_node_set_free (&next_nodes);
2519  re_string_skip_bytes (&mctx->input, 1);
2520  return next_state;
2521}
2522#endif
2523
2524#ifdef RE_ENABLE_I18N
2525static reg_errcode_t
2526internal_function
2527transit_state_mb (re_match_context_t *mctx, re_dfastate_t *pstate)
2528{
2529  const re_dfa_t *const dfa = mctx->dfa;
2530  reg_errcode_t err;
2531  Idx i;
2532
2533  for (i = 0; i < pstate->nodes.nelem; ++i)
2534    {
2535      re_node_set dest_nodes, *new_nodes;
2536      Idx cur_node_idx = pstate->nodes.elems[i];
2537      int naccepted;
2538      Idx dest_idx;
2539      unsigned int context;
2540      re_dfastate_t *dest_state;
2541
2542      if (!dfa->nodes[cur_node_idx].accept_mb)
2543        continue;
2544
2545      if (dfa->nodes[cur_node_idx].constraint)
2546	{
2547	  context = re_string_context_at (&mctx->input,
2548					  re_string_cur_idx (&mctx->input),
2549					  mctx->eflags);
2550	  if (NOT_SATISFY_NEXT_CONSTRAINT (dfa->nodes[cur_node_idx].constraint,
2551					   context))
2552	    continue;
2553	}
2554
2555      /* How many bytes the node can accept?  */
2556      naccepted = check_node_accept_bytes (dfa, cur_node_idx, &mctx->input,
2557					   re_string_cur_idx (&mctx->input));
2558      if (naccepted == 0)
2559	continue;
2560
2561      /* The node can accepts `naccepted' bytes.  */
2562      dest_idx = re_string_cur_idx (&mctx->input) + naccepted;
2563      mctx->max_mb_elem_len = ((mctx->max_mb_elem_len < naccepted) ? naccepted
2564			       : mctx->max_mb_elem_len);
2565      err = clean_state_log_if_needed (mctx, dest_idx);
2566      if (BE (err != REG_NOERROR, 0))
2567	return err;
2568#ifdef DEBUG
2569      assert (dfa->nexts[cur_node_idx] != REG_MISSING);
2570#endif
2571      new_nodes = dfa->eclosures + dfa->nexts[cur_node_idx];
2572
2573      dest_state = mctx->state_log[dest_idx];
2574      if (dest_state == NULL)
2575	dest_nodes = *new_nodes;
2576      else
2577	{
2578	  err = re_node_set_init_union (&dest_nodes,
2579					dest_state->entrance_nodes, new_nodes);
2580	  if (BE (err != REG_NOERROR, 0))
2581	    return err;
2582	}
2583      context = re_string_context_at (&mctx->input, dest_idx - 1,
2584				      mctx->eflags);
2585      mctx->state_log[dest_idx]
2586	= re_acquire_state_context (&err, dfa, &dest_nodes, context);
2587      if (dest_state != NULL)
2588	re_node_set_free (&dest_nodes);
2589      if (BE (mctx->state_log[dest_idx] == NULL && err != REG_NOERROR, 0))
2590	return err;
2591    }
2592  return REG_NOERROR;
2593}
2594#endif /* RE_ENABLE_I18N */
2595
2596static reg_errcode_t
2597internal_function
2598transit_state_bkref (re_match_context_t *mctx, const re_node_set *nodes)
2599{
2600  const re_dfa_t *const dfa = mctx->dfa;
2601  reg_errcode_t err;
2602  Idx i;
2603  Idx cur_str_idx = re_string_cur_idx (&mctx->input);
2604
2605  for (i = 0; i < nodes->nelem; ++i)
2606    {
2607      Idx dest_str_idx, prev_nelem, bkc_idx;
2608      Idx node_idx = nodes->elems[i];
2609      unsigned int context;
2610      const re_token_t *node = dfa->nodes + node_idx;
2611      re_node_set *new_dest_nodes;
2612
2613      /* Check whether `node' is a backreference or not.  */
2614      if (node->type != OP_BACK_REF)
2615	continue;
2616
2617      if (node->constraint)
2618	{
2619	  context = re_string_context_at (&mctx->input, cur_str_idx,
2620					  mctx->eflags);
2621	  if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context))
2622	    continue;
2623	}
2624
2625      /* `node' is a backreference.
2626	 Check the substring which the substring matched.  */
2627      bkc_idx = mctx->nbkref_ents;
2628      err = get_subexp (mctx, node_idx, cur_str_idx);
2629      if (BE (err != REG_NOERROR, 0))
2630	goto free_return;
2631
2632      /* And add the epsilon closures (which is `new_dest_nodes') of
2633	 the backreference to appropriate state_log.  */
2634#ifdef DEBUG
2635      assert (dfa->nexts[node_idx] != REG_MISSING);
2636#endif
2637      for (; bkc_idx < mctx->nbkref_ents; ++bkc_idx)
2638	{
2639	  Idx subexp_len;
2640	  re_dfastate_t *dest_state;
2641	  struct re_backref_cache_entry *bkref_ent;
2642	  bkref_ent = mctx->bkref_ents + bkc_idx;
2643	  if (bkref_ent->node != node_idx || bkref_ent->str_idx != cur_str_idx)
2644	    continue;
2645	  subexp_len = bkref_ent->subexp_to - bkref_ent->subexp_from;
2646	  new_dest_nodes = (subexp_len == 0
2647			    ? dfa->eclosures + dfa->edests[node_idx].elems[0]
2648			    : dfa->eclosures + dfa->nexts[node_idx]);
2649	  dest_str_idx = (cur_str_idx + bkref_ent->subexp_to
2650			  - bkref_ent->subexp_from);
2651	  context = re_string_context_at (&mctx->input, dest_str_idx - 1,
2652					  mctx->eflags);
2653	  dest_state = mctx->state_log[dest_str_idx];
2654	  prev_nelem = ((mctx->state_log[cur_str_idx] == NULL) ? 0
2655			: mctx->state_log[cur_str_idx]->nodes.nelem);
2656	  /* Add `new_dest_node' to state_log.  */
2657	  if (dest_state == NULL)
2658	    {
2659	      mctx->state_log[dest_str_idx]
2660		= re_acquire_state_context (&err, dfa, new_dest_nodes,
2661					    context);
2662	      if (BE (mctx->state_log[dest_str_idx] == NULL
2663		      && err != REG_NOERROR, 0))
2664		goto free_return;
2665	    }
2666	  else
2667	    {
2668	      re_node_set dest_nodes;
2669	      err = re_node_set_init_union (&dest_nodes,
2670					    dest_state->entrance_nodes,
2671					    new_dest_nodes);
2672	      if (BE (err != REG_NOERROR, 0))
2673		{
2674		  re_node_set_free (&dest_nodes);
2675		  goto free_return;
2676		}
2677	      mctx->state_log[dest_str_idx]
2678		= re_acquire_state_context (&err, dfa, &dest_nodes, context);
2679	      re_node_set_free (&dest_nodes);
2680	      if (BE (mctx->state_log[dest_str_idx] == NULL
2681		      && err != REG_NOERROR, 0))
2682		goto free_return;
2683	    }
2684	  /* We need to check recursively if the backreference can epsilon
2685	     transit.  */
2686	  if (subexp_len == 0
2687	      && mctx->state_log[cur_str_idx]->nodes.nelem > prev_nelem)
2688	    {
2689	      err = check_subexp_matching_top (mctx, new_dest_nodes,
2690					       cur_str_idx);
2691	      if (BE (err != REG_NOERROR, 0))
2692		goto free_return;
2693	      err = transit_state_bkref (mctx, new_dest_nodes);
2694	      if (BE (err != REG_NOERROR, 0))
2695		goto free_return;
2696	    }
2697	}
2698    }
2699  err = REG_NOERROR;
2700 free_return:
2701  return err;
2702}
2703
2704/* Enumerate all the candidates which the backreference BKREF_NODE can match
2705   at BKREF_STR_IDX, and register them by match_ctx_add_entry().
2706   Note that we might collect inappropriate candidates here.
2707   However, the cost of checking them strictly here is too high, then we
2708   delay these checking for prune_impossible_nodes().  */
2709
2710static reg_errcode_t
2711internal_function
2712get_subexp (re_match_context_t *mctx, Idx bkref_node, Idx bkref_str_idx)
2713{
2714  const re_dfa_t *const dfa = mctx->dfa;
2715  Idx subexp_num, sub_top_idx;
2716  const char *buf = (const char *) re_string_get_buffer (&mctx->input);
2717  /* Return if we have already checked BKREF_NODE at BKREF_STR_IDX.  */
2718  Idx cache_idx = search_cur_bkref_entry (mctx, bkref_str_idx);
2719  if (cache_idx != REG_MISSING)
2720    {
2721      const struct re_backref_cache_entry *entry
2722	= mctx->bkref_ents + cache_idx;
2723      do
2724        if (entry->node == bkref_node)
2725	  return REG_NOERROR; /* We already checked it.  */
2726      while (entry++->more);
2727    }
2728
2729  subexp_num = dfa->nodes[bkref_node].opr.idx;
2730
2731  /* For each sub expression  */
2732  for (sub_top_idx = 0; sub_top_idx < mctx->nsub_tops; ++sub_top_idx)
2733    {
2734      reg_errcode_t err;
2735      re_sub_match_top_t *sub_top = mctx->sub_tops[sub_top_idx];
2736      re_sub_match_last_t *sub_last;
2737      Idx sub_last_idx, sl_str, bkref_str_off;
2738
2739      if (dfa->nodes[sub_top->node].opr.idx != subexp_num)
2740	continue; /* It isn't related.  */
2741
2742      sl_str = sub_top->str_idx;
2743      bkref_str_off = bkref_str_idx;
2744      /* At first, check the last node of sub expressions we already
2745	 evaluated.  */
2746      for (sub_last_idx = 0; sub_last_idx < sub_top->nlasts; ++sub_last_idx)
2747	{
2748	  regoff_t sl_str_diff;
2749	  sub_last = sub_top->lasts[sub_last_idx];
2750	  sl_str_diff = sub_last->str_idx - sl_str;
2751	  /* The matched string by the sub expression match with the substring
2752	     at the back reference?  */
2753	  if (sl_str_diff > 0)
2754	    {
2755	      if (BE (bkref_str_off + sl_str_diff > mctx->input.valid_len, 0))
2756		{
2757		  /* Not enough chars for a successful match.  */
2758		  if (bkref_str_off + sl_str_diff > mctx->input.len)
2759		    break;
2760
2761		  err = clean_state_log_if_needed (mctx,
2762						   bkref_str_off
2763						   + sl_str_diff);
2764		  if (BE (err != REG_NOERROR, 0))
2765		    return err;
2766		  buf = (const char *) re_string_get_buffer (&mctx->input);
2767		}
2768	      if (memcmp (buf + bkref_str_off, buf + sl_str, sl_str_diff) != 0)
2769		/* We don't need to search this sub expression any more.  */
2770		break;
2771	    }
2772	  bkref_str_off += sl_str_diff;
2773	  sl_str += sl_str_diff;
2774	  err = get_subexp_sub (mctx, sub_top, sub_last, bkref_node,
2775				bkref_str_idx);
2776
2777	  /* Reload buf, since the preceding call might have reallocated
2778	     the buffer.  */
2779	  buf = (const char *) re_string_get_buffer (&mctx->input);
2780
2781	  if (err == REG_NOMATCH)
2782	    continue;
2783	  if (BE (err != REG_NOERROR, 0))
2784	    return err;
2785	}
2786
2787      if (sub_last_idx < sub_top->nlasts)
2788	continue;
2789      if (sub_last_idx > 0)
2790	++sl_str;
2791      /* Then, search for the other last nodes of the sub expression.  */
2792      for (; sl_str <= bkref_str_idx; ++sl_str)
2793	{
2794	  Idx cls_node;
2795	  regoff_t sl_str_off;
2796	  const re_node_set *nodes;
2797	  sl_str_off = sl_str - sub_top->str_idx;
2798	  /* The matched string by the sub expression match with the substring
2799	     at the back reference?  */
2800	  if (sl_str_off > 0)
2801	    {
2802	      if (BE (bkref_str_off >= mctx->input.valid_len, 0))
2803		{
2804		  /* If we are at the end of the input, we cannot match.  */
2805		  if (bkref_str_off >= mctx->input.len)
2806		    break;
2807
2808		  err = extend_buffers (mctx);
2809		  if (BE (err != REG_NOERROR, 0))
2810		    return err;
2811
2812		  buf = (const char *) re_string_get_buffer (&mctx->input);
2813		}
2814	      if (buf [bkref_str_off++] != buf[sl_str - 1])
2815		break; /* We don't need to search this sub expression
2816			  any more.  */
2817	    }
2818	  if (mctx->state_log[sl_str] == NULL)
2819	    continue;
2820	  /* Does this state have a ')' of the sub expression?  */
2821	  nodes = &mctx->state_log[sl_str]->nodes;
2822	  cls_node = find_subexp_node (dfa, nodes, subexp_num,
2823				       OP_CLOSE_SUBEXP);
2824	  if (cls_node == REG_MISSING)
2825	    continue; /* No.  */
2826	  if (sub_top->path == NULL)
2827	    {
2828	      sub_top->path = calloc (sizeof (state_array_t),
2829				      sl_str - sub_top->str_idx + 1);
2830	      if (sub_top->path == NULL)
2831		return REG_ESPACE;
2832	    }
2833	  /* Can the OP_OPEN_SUBEXP node arrive the OP_CLOSE_SUBEXP node
2834	     in the current context?  */
2835	  err = check_arrival (mctx, sub_top->path, sub_top->node,
2836			       sub_top->str_idx, cls_node, sl_str,
2837			       OP_CLOSE_SUBEXP);
2838	  if (err == REG_NOMATCH)
2839	      continue;
2840	  if (BE (err != REG_NOERROR, 0))
2841	      return err;
2842	  sub_last = match_ctx_add_sublast (sub_top, cls_node, sl_str);
2843	  if (BE (sub_last == NULL, 0))
2844	    return REG_ESPACE;
2845	  err = get_subexp_sub (mctx, sub_top, sub_last, bkref_node,
2846				bkref_str_idx);
2847	  if (err == REG_NOMATCH)
2848	    continue;
2849	}
2850    }
2851  return REG_NOERROR;
2852}
2853
2854/* Helper functions for get_subexp().  */
2855
2856/* Check SUB_LAST can arrive to the back reference BKREF_NODE at BKREF_STR.
2857   If it can arrive, register the sub expression expressed with SUB_TOP
2858   and SUB_LAST.  */
2859
2860static reg_errcode_t
2861internal_function
2862get_subexp_sub (re_match_context_t *mctx, const re_sub_match_top_t *sub_top,
2863		re_sub_match_last_t *sub_last, Idx bkref_node, Idx bkref_str)
2864{
2865  reg_errcode_t err;
2866  Idx to_idx;
2867  /* Can the subexpression arrive the back reference?  */
2868  err = check_arrival (mctx, &sub_last->path, sub_last->node,
2869		       sub_last->str_idx, bkref_node, bkref_str,
2870		       OP_OPEN_SUBEXP);
2871  if (err != REG_NOERROR)
2872    return err;
2873  err = match_ctx_add_entry (mctx, bkref_node, bkref_str, sub_top->str_idx,
2874			     sub_last->str_idx);
2875  if (BE (err != REG_NOERROR, 0))
2876    return err;
2877  to_idx = bkref_str + sub_last->str_idx - sub_top->str_idx;
2878  return clean_state_log_if_needed (mctx, to_idx);
2879}
2880
2881/* Find the first node which is '(' or ')' and whose index is SUBEXP_IDX.
2882   Search '(' if FL_OPEN, or search ')' otherwise.
2883   TODO: This function isn't efficient...
2884	 Because there might be more than one nodes whose types are
2885	 OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all
2886	 nodes.
2887	 E.g. RE: (a){2}  */
2888
2889static Idx
2890internal_function
2891find_subexp_node (const re_dfa_t *dfa, const re_node_set *nodes,
2892		  Idx subexp_idx, int type)
2893{
2894  Idx cls_idx;
2895  for (cls_idx = 0; cls_idx < nodes->nelem; ++cls_idx)
2896    {
2897      Idx cls_node = nodes->elems[cls_idx];
2898      const re_token_t *node = dfa->nodes + cls_node;
2899      if (node->type == type
2900	  && node->opr.idx == subexp_idx)
2901	return cls_node;
2902    }
2903  return REG_MISSING;
2904}
2905
2906/* Check whether the node TOP_NODE at TOP_STR can arrive to the node
2907   LAST_NODE at LAST_STR.  We record the path onto PATH since it will be
2908   heavily reused.
2909   Return REG_NOERROR if it can arrive, or REG_NOMATCH otherwise.  */
2910
2911static reg_errcode_t
2912internal_function
2913check_arrival (re_match_context_t *mctx, state_array_t *path, Idx top_node,
2914	       Idx top_str, Idx last_node, Idx last_str, int type)
2915{
2916  const re_dfa_t *const dfa = mctx->dfa;
2917  reg_errcode_t err = REG_NOERROR;
2918  Idx subexp_num, backup_cur_idx, str_idx, null_cnt;
2919  re_dfastate_t *cur_state = NULL;
2920  re_node_set *cur_nodes, next_nodes;
2921  re_dfastate_t **backup_state_log;
2922  unsigned int context;
2923
2924  subexp_num = dfa->nodes[top_node].opr.idx;
2925  /* Extend the buffer if we need.  */
2926  if (BE (path->alloc < last_str + mctx->max_mb_elem_len + 1, 0))
2927    {
2928      re_dfastate_t **new_array;
2929      Idx old_alloc = path->alloc;
2930      Idx new_alloc = old_alloc + last_str + mctx->max_mb_elem_len + 1;
2931      if (BE (new_alloc < old_alloc, 0)
2932	  || BE (SIZE_MAX / sizeof (re_dfastate_t *) < new_alloc, 0))
2933	return REG_ESPACE;
2934      new_array = re_realloc (path->array, re_dfastate_t *, new_alloc);
2935      if (BE (new_array == NULL, 0))
2936	return REG_ESPACE;
2937      path->array = new_array;
2938      path->alloc = new_alloc;
2939      memset (new_array + old_alloc, '\0',
2940	      sizeof (re_dfastate_t *) * (path->alloc - old_alloc));
2941    }
2942
2943  str_idx = path->next_idx ? path->next_idx : top_str;
2944
2945  /* Temporary modify MCTX.  */
2946  backup_state_log = mctx->state_log;
2947  backup_cur_idx = mctx->input.cur_idx;
2948  mctx->state_log = path->array;
2949  mctx->input.cur_idx = str_idx;
2950
2951  /* Setup initial node set.  */
2952  context = re_string_context_at (&mctx->input, str_idx - 1, mctx->eflags);
2953  if (str_idx == top_str)
2954    {
2955      err = re_node_set_init_1 (&next_nodes, top_node);
2956      if (BE (err != REG_NOERROR, 0))
2957	return err;
2958      err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, type);
2959      if (BE (err != REG_NOERROR, 0))
2960	{
2961	  re_node_set_free (&next_nodes);
2962	  return err;
2963	}
2964    }
2965  else
2966    {
2967      cur_state = mctx->state_log[str_idx];
2968      if (cur_state && cur_state->has_backref)
2969	{
2970	  err = re_node_set_init_copy (&next_nodes, &cur_state->nodes);
2971	  if (BE (err != REG_NOERROR, 0))
2972	    return err;
2973	}
2974      else
2975	re_node_set_init_empty (&next_nodes);
2976    }
2977  if (str_idx == top_str || (cur_state && cur_state->has_backref))
2978    {
2979      if (next_nodes.nelem)
2980	{
2981	  err = expand_bkref_cache (mctx, &next_nodes, str_idx,
2982				    subexp_num, type);
2983	  if (BE (err != REG_NOERROR, 0))
2984	    {
2985	      re_node_set_free (&next_nodes);
2986	      return err;
2987	    }
2988	}
2989      cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context);
2990      if (BE (cur_state == NULL && err != REG_NOERROR, 0))
2991	{
2992	  re_node_set_free (&next_nodes);
2993	  return err;
2994	}
2995      mctx->state_log[str_idx] = cur_state;
2996    }
2997
2998  for (null_cnt = 0; str_idx < last_str && null_cnt <= mctx->max_mb_elem_len;)
2999    {
3000      re_node_set_empty (&next_nodes);
3001      if (mctx->state_log[str_idx + 1])
3002	{
3003	  err = re_node_set_merge (&next_nodes,
3004				   &mctx->state_log[str_idx + 1]->nodes);
3005	  if (BE (err != REG_NOERROR, 0))
3006	    {
3007	      re_node_set_free (&next_nodes);
3008	      return err;
3009	    }
3010	}
3011      if (cur_state)
3012	{
3013	  err = check_arrival_add_next_nodes (mctx, str_idx,
3014					      &cur_state->non_eps_nodes,
3015					      &next_nodes);
3016	  if (BE (err != REG_NOERROR, 0))
3017	    {
3018	      re_node_set_free (&next_nodes);
3019	      return err;
3020	    }
3021	}
3022      ++str_idx;
3023      if (next_nodes.nelem)
3024	{
3025	  err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, type);
3026	  if (BE (err != REG_NOERROR, 0))
3027	    {
3028	      re_node_set_free (&next_nodes);
3029	      return err;
3030	    }
3031	  err = expand_bkref_cache (mctx, &next_nodes, str_idx,
3032				    subexp_num, type);
3033	  if (BE (err != REG_NOERROR, 0))
3034	    {
3035	      re_node_set_free (&next_nodes);
3036	      return err;
3037	    }
3038	}
3039      context = re_string_context_at (&mctx->input, str_idx - 1, mctx->eflags);
3040      cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context);
3041      if (BE (cur_state == NULL && err != REG_NOERROR, 0))
3042	{
3043	  re_node_set_free (&next_nodes);
3044	  return err;
3045	}
3046      mctx->state_log[str_idx] = cur_state;
3047      null_cnt = cur_state == NULL ? null_cnt + 1 : 0;
3048    }
3049  re_node_set_free (&next_nodes);
3050  cur_nodes = (mctx->state_log[last_str] == NULL ? NULL
3051	       : &mctx->state_log[last_str]->nodes);
3052  path->next_idx = str_idx;
3053
3054  /* Fix MCTX.  */
3055  mctx->state_log = backup_state_log;
3056  mctx->input.cur_idx = backup_cur_idx;
3057
3058  /* Then check the current node set has the node LAST_NODE.  */
3059  if (cur_nodes != NULL && re_node_set_contains (cur_nodes, last_node))
3060    return REG_NOERROR;
3061
3062  return REG_NOMATCH;
3063}
3064
3065/* Helper functions for check_arrival.  */
3066
3067/* Calculate the destination nodes of CUR_NODES at STR_IDX, and append them
3068   to NEXT_NODES.
3069   TODO: This function is similar to the functions transit_state*(),
3070	 however this function has many additional works.
3071	 Can't we unify them?  */
3072
3073static reg_errcode_t
3074internal_function
3075check_arrival_add_next_nodes (re_match_context_t *mctx, Idx str_idx,
3076			      re_node_set *cur_nodes, re_node_set *next_nodes)
3077{
3078  const re_dfa_t *const dfa = mctx->dfa;
3079  bool ok;
3080  Idx cur_idx;
3081  reg_errcode_t err = REG_NOERROR;
3082  re_node_set union_set;
3083  re_node_set_init_empty (&union_set);
3084  for (cur_idx = 0; cur_idx < cur_nodes->nelem; ++cur_idx)
3085    {
3086      int naccepted = 0;
3087      Idx cur_node = cur_nodes->elems[cur_idx];
3088#ifdef DEBUG
3089      re_token_type_t type = dfa->nodes[cur_node].type;
3090      assert (!IS_EPSILON_NODE (type));
3091#endif
3092#ifdef RE_ENABLE_I18N
3093      /* If the node may accept `multi byte'.  */
3094      if (dfa->nodes[cur_node].accept_mb)
3095	{
3096	  naccepted = check_node_accept_bytes (dfa, cur_node, &mctx->input,
3097					       str_idx);
3098	  if (naccepted > 1)
3099	    {
3100	      re_dfastate_t *dest_state;
3101	      Idx next_node = dfa->nexts[cur_node];
3102	      Idx next_idx = str_idx + naccepted;
3103	      dest_state = mctx->state_log[next_idx];
3104	      re_node_set_empty (&union_set);
3105	      if (dest_state)
3106		{
3107		  err = re_node_set_merge (&union_set, &dest_state->nodes);
3108		  if (BE (err != REG_NOERROR, 0))
3109		    {
3110		      re_node_set_free (&union_set);
3111		      return err;
3112		    }
3113		}
3114	      ok = re_node_set_insert (&union_set, next_node);
3115	      if (BE (! ok, 0))
3116		{
3117		  re_node_set_free (&union_set);
3118		  return REG_ESPACE;
3119		}
3120	      mctx->state_log[next_idx] = re_acquire_state (&err, dfa,
3121							    &union_set);
3122	      if (BE (mctx->state_log[next_idx] == NULL
3123		      && err != REG_NOERROR, 0))
3124		{
3125		  re_node_set_free (&union_set);
3126		  return err;
3127		}
3128	    }
3129	}
3130#endif /* RE_ENABLE_I18N */
3131      if (naccepted
3132	  || check_node_accept (mctx, dfa->nodes + cur_node, str_idx))
3133	{
3134	  ok = re_node_set_insert (next_nodes, dfa->nexts[cur_node]);
3135	  if (BE (! ok, 0))
3136	    {
3137	      re_node_set_free (&union_set);
3138	      return REG_ESPACE;
3139	    }
3140	}
3141    }
3142  re_node_set_free (&union_set);
3143  return REG_NOERROR;
3144}
3145
3146/* For all the nodes in CUR_NODES, add the epsilon closures of them to
3147   CUR_NODES, however exclude the nodes which are:
3148    - inside the sub expression whose number is EX_SUBEXP, if FL_OPEN.
3149    - out of the sub expression whose number is EX_SUBEXP, if !FL_OPEN.
3150*/
3151
3152static reg_errcode_t
3153internal_function
3154check_arrival_expand_ecl (const re_dfa_t *dfa, re_node_set *cur_nodes,
3155			  Idx ex_subexp, int type)
3156{
3157  reg_errcode_t err;
3158  Idx idx, outside_node;
3159  re_node_set new_nodes;
3160#ifdef DEBUG
3161  assert (cur_nodes->nelem);
3162#endif
3163  err = re_node_set_alloc (&new_nodes, cur_nodes->nelem);
3164  if (BE (err != REG_NOERROR, 0))
3165    return err;
3166  /* Create a new node set NEW_NODES with the nodes which are epsilon
3167     closures of the node in CUR_NODES.  */
3168
3169  for (idx = 0; idx < cur_nodes->nelem; ++idx)
3170    {
3171      Idx cur_node = cur_nodes->elems[idx];
3172      const re_node_set *eclosure = dfa->eclosures + cur_node;
3173      outside_node = find_subexp_node (dfa, eclosure, ex_subexp, type);
3174      if (outside_node == REG_MISSING)
3175	{
3176	  /* There are no problematic nodes, just merge them.  */
3177	  err = re_node_set_merge (&new_nodes, eclosure);
3178	  if (BE (err != REG_NOERROR, 0))
3179	    {
3180	      re_node_set_free (&new_nodes);
3181	      return err;
3182	    }
3183	}
3184      else
3185	{
3186	  /* There are problematic nodes, re-calculate incrementally.  */
3187	  err = check_arrival_expand_ecl_sub (dfa, &new_nodes, cur_node,
3188					      ex_subexp, type);
3189	  if (BE (err != REG_NOERROR, 0))
3190	    {
3191	      re_node_set_free (&new_nodes);
3192	      return err;
3193	    }
3194	}
3195    }
3196  re_node_set_free (cur_nodes);
3197  *cur_nodes = new_nodes;
3198  return REG_NOERROR;
3199}
3200
3201/* Helper function for check_arrival_expand_ecl.
3202   Check incrementally the epsilon closure of TARGET, and if it isn't
3203   problematic append it to DST_NODES.  */
3204
3205static reg_errcode_t
3206internal_function
3207check_arrival_expand_ecl_sub (const re_dfa_t *dfa, re_node_set *dst_nodes,
3208			      Idx target, Idx ex_subexp, int type)
3209{
3210  Idx cur_node;
3211  for (cur_node = target; !re_node_set_contains (dst_nodes, cur_node);)
3212    {
3213      bool ok;
3214
3215      if (dfa->nodes[cur_node].type == type
3216	  && dfa->nodes[cur_node].opr.idx == ex_subexp)
3217	{
3218	  if (type == OP_CLOSE_SUBEXP)
3219	    {
3220	      ok = re_node_set_insert (dst_nodes, cur_node);
3221	      if (BE (! ok, 0))
3222		return REG_ESPACE;
3223	    }
3224	  break;
3225	}
3226      ok = re_node_set_insert (dst_nodes, cur_node);
3227      if (BE (! ok, 0))
3228	return REG_ESPACE;
3229      if (dfa->edests[cur_node].nelem == 0)
3230	break;
3231      if (dfa->edests[cur_node].nelem == 2)
3232	{
3233	  reg_errcode_t err;
3234	  err = check_arrival_expand_ecl_sub (dfa, dst_nodes,
3235					      dfa->edests[cur_node].elems[1],
3236					      ex_subexp, type);
3237	  if (BE (err != REG_NOERROR, 0))
3238	    return err;
3239	}
3240      cur_node = dfa->edests[cur_node].elems[0];
3241    }
3242  return REG_NOERROR;
3243}
3244
3245
3246/* For all the back references in the current state, calculate the
3247   destination of the back references by the appropriate entry
3248   in MCTX->BKREF_ENTS.  */
3249
3250static reg_errcode_t
3251internal_function
3252expand_bkref_cache (re_match_context_t *mctx, re_node_set *cur_nodes,
3253		    Idx cur_str, Idx subexp_num, int type)
3254{
3255  const re_dfa_t *const dfa = mctx->dfa;
3256  reg_errcode_t err;
3257  Idx cache_idx_start = search_cur_bkref_entry (mctx, cur_str);
3258  struct re_backref_cache_entry *ent;
3259
3260  if (cache_idx_start == REG_MISSING)
3261    return REG_NOERROR;
3262
3263 restart:
3264  ent = mctx->bkref_ents + cache_idx_start;
3265  do
3266    {
3267      Idx to_idx, next_node;
3268
3269      /* Is this entry ENT is appropriate?  */
3270      if (!re_node_set_contains (cur_nodes, ent->node))
3271	continue; /* No.  */
3272
3273      to_idx = cur_str + ent->subexp_to - ent->subexp_from;
3274      /* Calculate the destination of the back reference, and append it
3275	 to MCTX->STATE_LOG.  */
3276      if (to_idx == cur_str)
3277	{
3278	  /* The backreference did epsilon transit, we must re-check all the
3279	     node in the current state.  */
3280	  re_node_set new_dests;
3281	  reg_errcode_t err2, err3;
3282	  next_node = dfa->edests[ent->node].elems[0];
3283	  if (re_node_set_contains (cur_nodes, next_node))
3284	    continue;
3285	  err = re_node_set_init_1 (&new_dests, next_node);
3286	  err2 = check_arrival_expand_ecl (dfa, &new_dests, subexp_num, type);
3287	  err3 = re_node_set_merge (cur_nodes, &new_dests);
3288	  re_node_set_free (&new_dests);
3289	  if (BE (err != REG_NOERROR || err2 != REG_NOERROR
3290		  || err3 != REG_NOERROR, 0))
3291	    {
3292	      err = (err != REG_NOERROR ? err
3293		     : (err2 != REG_NOERROR ? err2 : err3));
3294	      return err;
3295	    }
3296	  /* TODO: It is still inefficient...  */
3297	  goto restart;
3298	}
3299      else
3300	{
3301	  re_node_set union_set;
3302	  next_node = dfa->nexts[ent->node];
3303	  if (mctx->state_log[to_idx])
3304	    {
3305	      bool ok;
3306	      if (re_node_set_contains (&mctx->state_log[to_idx]->nodes,
3307					next_node))
3308		continue;
3309	      err = re_node_set_init_copy (&union_set,
3310					   &mctx->state_log[to_idx]->nodes);
3311	      ok = re_node_set_insert (&union_set, next_node);
3312	      if (BE (err != REG_NOERROR || ! ok, 0))
3313		{
3314		  re_node_set_free (&union_set);
3315		  err = err != REG_NOERROR ? err : REG_ESPACE;
3316		  return err;
3317		}
3318	    }
3319	  else
3320	    {
3321	      err = re_node_set_init_1 (&union_set, next_node);
3322	      if (BE (err != REG_NOERROR, 0))
3323		return err;
3324	    }
3325	  mctx->state_log[to_idx] = re_acquire_state (&err, dfa, &union_set);
3326	  re_node_set_free (&union_set);
3327	  if (BE (mctx->state_log[to_idx] == NULL
3328		  && err != REG_NOERROR, 0))
3329	    return err;
3330	}
3331    }
3332  while (ent++->more);
3333  return REG_NOERROR;
3334}
3335
3336/* Build transition table for the state.
3337   Return true if successful.  */
3338
3339static bool
3340internal_function
3341build_trtable (const re_dfa_t *dfa, re_dfastate_t *state)
3342{
3343  reg_errcode_t err;
3344  Idx i, j;
3345  int ch;
3346  bool need_word_trtable = false;
3347  bitset_word_t elem, mask;
3348  bool dests_node_malloced = false;
3349  bool dest_states_malloced = false;
3350  Idx ndests; /* Number of the destination states from `state'.  */
3351  re_dfastate_t **trtable;
3352  re_dfastate_t **dest_states = NULL, **dest_states_word, **dest_states_nl;
3353  re_node_set follows, *dests_node;
3354  bitset_t *dests_ch;
3355  bitset_t acceptable;
3356
3357  struct dests_alloc
3358  {
3359    re_node_set dests_node[SBC_MAX];
3360    bitset_t dests_ch[SBC_MAX];
3361  } *dests_alloc;
3362
3363  /* We build DFA states which corresponds to the destination nodes
3364     from `state'.  `dests_node[i]' represents the nodes which i-th
3365     destination state contains, and `dests_ch[i]' represents the
3366     characters which i-th destination state accepts.  */
3367  if (__libc_use_alloca (sizeof (struct dests_alloc)))
3368    dests_alloc = (struct dests_alloc *) alloca (sizeof (struct dests_alloc));
3369  else
3370    {
3371      dests_alloc = re_malloc (struct dests_alloc, 1);
3372      if (BE (dests_alloc == NULL, 0))
3373	return false;
3374      dests_node_malloced = true;
3375    }
3376  dests_node = dests_alloc->dests_node;
3377  dests_ch = dests_alloc->dests_ch;
3378
3379  /* Initialize transiton table.  */
3380  state->word_trtable = state->trtable = NULL;
3381
3382  /* At first, group all nodes belonging to `state' into several
3383     destinations.  */
3384  ndests = group_nodes_into_DFAstates (dfa, state, dests_node, dests_ch);
3385  if (BE (! REG_VALID_NONZERO_INDEX (ndests), 0))
3386    {
3387      if (dests_node_malloced)
3388	free (dests_alloc);
3389      if (ndests == 0)
3390	{
3391	  state->trtable = (re_dfastate_t **)
3392	    calloc (sizeof (re_dfastate_t *), SBC_MAX);
3393	  return true;
3394	}
3395      return false;
3396    }
3397
3398  err = re_node_set_alloc (&follows, ndests + 1);
3399  if (BE (err != REG_NOERROR, 0))
3400    goto out_free;
3401
3402  /* Avoid arithmetic overflow in size calculation.  */
3403  if (BE ((((SIZE_MAX - (sizeof (re_node_set) + sizeof (bitset_t)) * SBC_MAX)
3404	    / (3 * sizeof (re_dfastate_t *)))
3405	   < ndests),
3406	  0))
3407    goto out_free;
3408
3409  if (__libc_use_alloca ((sizeof (re_node_set) + sizeof (bitset_t)) * SBC_MAX
3410			 + ndests * 3 * sizeof (re_dfastate_t *)))
3411    dest_states = (re_dfastate_t **)
3412      alloca (ndests * 3 * sizeof (re_dfastate_t *));
3413  else
3414    {
3415      dest_states = (re_dfastate_t **)
3416	malloc (ndests * 3 * sizeof (re_dfastate_t *));
3417      if (BE (dest_states == NULL, 0))
3418	{
3419out_free:
3420	  if (dest_states_malloced)
3421	    free (dest_states);
3422	  re_node_set_free (&follows);
3423	  for (i = 0; i < ndests; ++i)
3424	    re_node_set_free (dests_node + i);
3425	  if (dests_node_malloced)
3426	    free (dests_alloc);
3427	  return false;
3428	}
3429      dest_states_malloced = true;
3430    }
3431  dest_states_word = dest_states + ndests;
3432  dest_states_nl = dest_states_word + ndests;
3433  bitset_empty (acceptable);
3434
3435  /* Then build the states for all destinations.  */
3436  for (i = 0; i < ndests; ++i)
3437    {
3438      Idx next_node;
3439      re_node_set_empty (&follows);
3440      /* Merge the follows of this destination states.  */
3441      for (j = 0; j < dests_node[i].nelem; ++j)
3442	{
3443	  next_node = dfa->nexts[dests_node[i].elems[j]];
3444	  if (next_node != REG_MISSING)
3445	    {
3446	      err = re_node_set_merge (&follows, dfa->eclosures + next_node);
3447	      if (BE (err != REG_NOERROR, 0))
3448		goto out_free;
3449	    }
3450	}
3451      dest_states[i] = re_acquire_state_context (&err, dfa, &follows, 0);
3452      if (BE (dest_states[i] == NULL && err != REG_NOERROR, 0))
3453	goto out_free;
3454      /* If the new state has context constraint,
3455	 build appropriate states for these contexts.  */
3456      if (dest_states[i]->has_constraint)
3457	{
3458	  dest_states_word[i] = re_acquire_state_context (&err, dfa, &follows,
3459							  CONTEXT_WORD);
3460	  if (BE (dest_states_word[i] == NULL && err != REG_NOERROR, 0))
3461	    goto out_free;
3462
3463	  if (dest_states[i] != dest_states_word[i] && dfa->mb_cur_max > 1)
3464	    need_word_trtable = true;
3465
3466	  dest_states_nl[i] = re_acquire_state_context (&err, dfa, &follows,
3467							CONTEXT_NEWLINE);
3468	  if (BE (dest_states_nl[i] == NULL && err != REG_NOERROR, 0))
3469	    goto out_free;
3470 	}
3471      else
3472	{
3473	  dest_states_word[i] = dest_states[i];
3474	  dest_states_nl[i] = dest_states[i];
3475	}
3476      bitset_merge (acceptable, dests_ch[i]);
3477    }
3478
3479  if (!BE (need_word_trtable, 0))
3480    {
3481      /* We don't care about whether the following character is a word
3482	 character, or we are in a single-byte character set so we can
3483	 discern by looking at the character code: allocate a
3484	 256-entry transition table.  */
3485      trtable = state->trtable =
3486	(re_dfastate_t **) calloc (sizeof (re_dfastate_t *), SBC_MAX);
3487      if (BE (trtable == NULL, 0))
3488	goto out_free;
3489
3490      /* For all characters ch...:  */
3491      for (i = 0; i < BITSET_WORDS; ++i)
3492	for (ch = i * BITSET_WORD_BITS, elem = acceptable[i], mask = 1;
3493	     elem;
3494	     mask <<= 1, elem >>= 1, ++ch)
3495	  if (BE (elem & 1, 0))
3496	    {
3497	      /* There must be exactly one destination which accepts
3498		 character ch.  See group_nodes_into_DFAstates.  */
3499	      for (j = 0; (dests_ch[j][i] & mask) == 0; ++j)
3500		;
3501
3502	      /* j-th destination accepts the word character ch.  */
3503	      if (dfa->word_char[i] & mask)
3504		trtable[ch] = dest_states_word[j];
3505	      else
3506		trtable[ch] = dest_states[j];
3507	    }
3508    }
3509  else
3510    {
3511      /* We care about whether the following character is a word
3512	 character, and we are in a multi-byte character set: discern
3513	 by looking at the character code: build two 256-entry
3514	 transition tables, one starting at trtable[0] and one
3515	 starting at trtable[SBC_MAX].  */
3516      trtable = state->word_trtable =
3517	(re_dfastate_t **) calloc (sizeof (re_dfastate_t *), 2 * SBC_MAX);
3518      if (BE (trtable == NULL, 0))
3519	goto out_free;
3520
3521      /* For all characters ch...:  */
3522      for (i = 0; i < BITSET_WORDS; ++i)
3523	for (ch = i * BITSET_WORD_BITS, elem = acceptable[i], mask = 1;
3524	     elem;
3525	     mask <<= 1, elem >>= 1, ++ch)
3526	  if (BE (elem & 1, 0))
3527	    {
3528	      /* There must be exactly one destination which accepts
3529		 character ch.  See group_nodes_into_DFAstates.  */
3530	      for (j = 0; (dests_ch[j][i] & mask) == 0; ++j)
3531		;
3532
3533	      /* j-th destination accepts the word character ch.  */
3534	      trtable[ch] = dest_states[j];
3535	      trtable[ch + SBC_MAX] = dest_states_word[j];
3536	    }
3537    }
3538
3539  /* new line */
3540  if (bitset_contain (acceptable, NEWLINE_CHAR))
3541    {
3542      /* The current state accepts newline character.  */
3543      for (j = 0; j < ndests; ++j)
3544	if (bitset_contain (dests_ch[j], NEWLINE_CHAR))
3545	  {
3546	    /* k-th destination accepts newline character.  */
3547	    trtable[NEWLINE_CHAR] = dest_states_nl[j];
3548	    if (need_word_trtable)
3549	      trtable[NEWLINE_CHAR + SBC_MAX] = dest_states_nl[j];
3550	    /* There must be only one destination which accepts
3551	       newline.  See group_nodes_into_DFAstates.  */
3552	    break;
3553	  }
3554    }
3555
3556  if (dest_states_malloced)
3557    free (dest_states);
3558
3559  re_node_set_free (&follows);
3560  for (i = 0; i < ndests; ++i)
3561    re_node_set_free (dests_node + i);
3562
3563  if (dests_node_malloced)
3564    free (dests_alloc);
3565
3566  return true;
3567}
3568
3569/* Group all nodes belonging to STATE into several destinations.
3570   Then for all destinations, set the nodes belonging to the destination
3571   to DESTS_NODE[i] and set the characters accepted by the destination
3572   to DEST_CH[i].  This function return the number of destinations.  */
3573
3574static Idx
3575internal_function
3576group_nodes_into_DFAstates (const re_dfa_t *dfa, const re_dfastate_t *state,
3577			    re_node_set *dests_node, bitset_t *dests_ch)
3578{
3579  reg_errcode_t err;
3580  bool ok;
3581  Idx i, j, k;
3582  Idx ndests; /* Number of the destinations from `state'.  */
3583  bitset_t accepts; /* Characters a node can accept.  */
3584  const re_node_set *cur_nodes = &state->nodes;
3585  bitset_empty (accepts);
3586  ndests = 0;
3587
3588  /* For all the nodes belonging to `state',  */
3589  for (i = 0; i < cur_nodes->nelem; ++i)
3590    {
3591      re_token_t *node = &dfa->nodes[cur_nodes->elems[i]];
3592      re_token_type_t type = node->type;
3593      unsigned int constraint = node->constraint;
3594
3595      /* Enumerate all single byte character this node can accept.  */
3596      if (type == CHARACTER)
3597	bitset_set (accepts, node->opr.c);
3598      else if (type == SIMPLE_BRACKET)
3599	{
3600	  bitset_merge (accepts, node->opr.sbcset);
3601	}
3602      else if (type == OP_PERIOD)
3603	{
3604#ifdef RE_ENABLE_I18N
3605	  if (dfa->mb_cur_max > 1)
3606	    bitset_merge (accepts, dfa->sb_char);
3607	  else
3608#endif
3609	    bitset_set_all (accepts);
3610	  if (!(dfa->syntax & RE_DOT_NEWLINE))
3611	    bitset_clear (accepts, '\n');
3612	  if (dfa->syntax & RE_DOT_NOT_NULL)
3613	    bitset_clear (accepts, '\0');
3614	}
3615#ifdef RE_ENABLE_I18N
3616      else if (type == OP_UTF8_PERIOD)
3617        {
3618	  if (ASCII_CHARS % BITSET_WORD_BITS == 0)
3619	    memset (accepts, -1, ASCII_CHARS / CHAR_BIT);
3620	  else
3621	    bitset_merge (accepts, utf8_sb_map);
3622	  if (!(dfa->syntax & RE_DOT_NEWLINE))
3623	    bitset_clear (accepts, '\n');
3624	  if (dfa->syntax & RE_DOT_NOT_NULL)
3625	    bitset_clear (accepts, '\0');
3626        }
3627#endif
3628      else
3629	continue;
3630
3631      /* Check the `accepts' and sift the characters which are not
3632	 match it the context.  */
3633      if (constraint)
3634	{
3635	  if (constraint & NEXT_NEWLINE_CONSTRAINT)
3636	    {
3637	      bool accepts_newline = bitset_contain (accepts, NEWLINE_CHAR);
3638	      bitset_empty (accepts);
3639	      if (accepts_newline)
3640		bitset_set (accepts, NEWLINE_CHAR);
3641	      else
3642		continue;
3643	    }
3644	  if (constraint & NEXT_ENDBUF_CONSTRAINT)
3645	    {
3646	      bitset_empty (accepts);
3647	      continue;
3648	    }
3649
3650	  if (constraint & NEXT_WORD_CONSTRAINT)
3651	    {
3652	      bitset_word_t any_set = 0;
3653	      if (type == CHARACTER && !node->word_char)
3654		{
3655		  bitset_empty (accepts);
3656		  continue;
3657		}
3658#ifdef RE_ENABLE_I18N
3659	      if (dfa->mb_cur_max > 1)
3660		for (j = 0; j < BITSET_WORDS; ++j)
3661		  any_set |= (accepts[j] &= (dfa->word_char[j] | ~dfa->sb_char[j]));
3662	      else
3663#endif
3664		for (j = 0; j < BITSET_WORDS; ++j)
3665		  any_set |= (accepts[j] &= dfa->word_char[j]);
3666	      if (!any_set)
3667		continue;
3668	    }
3669	  if (constraint & NEXT_NOTWORD_CONSTRAINT)
3670	    {
3671	      bitset_word_t any_set = 0;
3672	      if (type == CHARACTER && node->word_char)
3673		{
3674		  bitset_empty (accepts);
3675		  continue;
3676		}
3677#ifdef RE_ENABLE_I18N
3678	      if (dfa->mb_cur_max > 1)
3679		for (j = 0; j < BITSET_WORDS; ++j)
3680		  any_set |= (accepts[j] &= ~(dfa->word_char[j] & dfa->sb_char[j]));
3681	      else
3682#endif
3683		for (j = 0; j < BITSET_WORDS; ++j)
3684		  any_set |= (accepts[j] &= ~dfa->word_char[j]);
3685	      if (!any_set)
3686		continue;
3687	    }
3688	}
3689
3690      /* Then divide `accepts' into DFA states, or create a new
3691	 state.  Above, we make sure that accepts is not empty.  */
3692      for (j = 0; j < ndests; ++j)
3693	{
3694	  bitset_t intersec; /* Intersection sets, see below.  */
3695	  bitset_t remains;
3696	  /* Flags, see below.  */
3697	  bitset_word_t has_intersec, not_subset, not_consumed;
3698
3699	  /* Optimization, skip if this state doesn't accept the character.  */
3700	  if (type == CHARACTER && !bitset_contain (dests_ch[j], node->opr.c))
3701	    continue;
3702
3703	  /* Enumerate the intersection set of this state and `accepts'.  */
3704	  has_intersec = 0;
3705	  for (k = 0; k < BITSET_WORDS; ++k)
3706	    has_intersec |= intersec[k] = accepts[k] & dests_ch[j][k];
3707	  /* And skip if the intersection set is empty.  */
3708	  if (!has_intersec)
3709	    continue;
3710
3711	  /* Then check if this state is a subset of `accepts'.  */
3712	  not_subset = not_consumed = 0;
3713	  for (k = 0; k < BITSET_WORDS; ++k)
3714	    {
3715	      not_subset |= remains[k] = ~accepts[k] & dests_ch[j][k];
3716	      not_consumed |= accepts[k] = accepts[k] & ~dests_ch[j][k];
3717	    }
3718
3719	  /* If this state isn't a subset of `accepts', create a
3720	     new group state, which has the `remains'. */
3721	  if (not_subset)
3722	    {
3723	      bitset_copy (dests_ch[ndests], remains);
3724	      bitset_copy (dests_ch[j], intersec);
3725	      err = re_node_set_init_copy (dests_node + ndests, &dests_node[j]);
3726	      if (BE (err != REG_NOERROR, 0))
3727		goto error_return;
3728	      ++ndests;
3729	    }
3730
3731	  /* Put the position in the current group. */
3732	  ok = re_node_set_insert (&dests_node[j], cur_nodes->elems[i]);
3733	  if (BE (! ok, 0))
3734	    goto error_return;
3735
3736	  /* If all characters are consumed, go to next node. */
3737	  if (!not_consumed)
3738	    break;
3739	}
3740      /* Some characters remain, create a new group. */
3741      if (j == ndests)
3742	{
3743	  bitset_copy (dests_ch[ndests], accepts);
3744	  err = re_node_set_init_1 (dests_node + ndests, cur_nodes->elems[i]);
3745	  if (BE (err != REG_NOERROR, 0))
3746	    goto error_return;
3747	  ++ndests;
3748	  bitset_empty (accepts);
3749	}
3750    }
3751  return ndests;
3752 error_return:
3753  for (j = 0; j < ndests; ++j)
3754    re_node_set_free (dests_node + j);
3755  return REG_MISSING;
3756}
3757
3758#ifdef RE_ENABLE_I18N
3759/* Check how many bytes the node `dfa->nodes[node_idx]' accepts.
3760   Return the number of the bytes the node accepts.
3761   STR_IDX is the current index of the input string.
3762
3763   This function handles the nodes which can accept one character, or
3764   one collating element like '.', '[a-z]', opposite to the other nodes
3765   can only accept one byte.  */
3766
3767static int
3768internal_function
3769check_node_accept_bytes (const re_dfa_t *dfa, Idx node_idx,
3770			 const re_string_t *input, Idx str_idx)
3771{
3772  const re_token_t *node = dfa->nodes + node_idx;
3773  int char_len, elem_len;
3774  Idx i;
3775
3776  if (BE (node->type == OP_UTF8_PERIOD, 0))
3777    {
3778      unsigned char c = re_string_byte_at (input, str_idx), d;
3779      if (BE (c < 0xc2, 1))
3780	return 0;
3781
3782      if (str_idx + 2 > input->len)
3783	return 0;
3784
3785      d = re_string_byte_at (input, str_idx + 1);
3786      if (c < 0xe0)
3787	return (d < 0x80 || d > 0xbf) ? 0 : 2;
3788      else if (c < 0xf0)
3789	{
3790	  char_len = 3;
3791	  if (c == 0xe0 && d < 0xa0)
3792	    return 0;
3793	}
3794      else if (c < 0xf8)
3795	{
3796	  char_len = 4;
3797	  if (c == 0xf0 && d < 0x90)
3798	    return 0;
3799	}
3800      else if (c < 0xfc)
3801	{
3802	  char_len = 5;
3803	  if (c == 0xf8 && d < 0x88)
3804	    return 0;
3805	}
3806      else if (c < 0xfe)
3807	{
3808	  char_len = 6;
3809	  if (c == 0xfc && d < 0x84)
3810	    return 0;
3811	}
3812      else
3813	return 0;
3814
3815      if (str_idx + char_len > input->len)
3816	return 0;
3817
3818      for (i = 1; i < char_len; ++i)
3819	{
3820	  d = re_string_byte_at (input, str_idx + i);
3821	  if (d < 0x80 || d > 0xbf)
3822	    return 0;
3823	}
3824      return char_len;
3825    }
3826
3827  char_len = re_string_char_size_at (input, str_idx);
3828  if (node->type == OP_PERIOD)
3829    {
3830      if (char_len <= 1)
3831        return 0;
3832      /* FIXME: I don't think this if is needed, as both '\n'
3833	 and '\0' are char_len == 1.  */
3834      /* '.' accepts any one character except the following two cases.  */
3835      if ((!(dfa->syntax & RE_DOT_NEWLINE) &&
3836	   re_string_byte_at (input, str_idx) == '\n') ||
3837	  ((dfa->syntax & RE_DOT_NOT_NULL) &&
3838	   re_string_byte_at (input, str_idx) == '\0'))
3839	return 0;
3840      return char_len;
3841    }
3842
3843  elem_len = re_string_elem_size_at (input, str_idx);
3844  if ((elem_len <= 1 && char_len <= 1) || char_len == 0)
3845    return 0;
3846
3847  if (node->type == COMPLEX_BRACKET)
3848    {
3849      const re_charset_t *cset = node->opr.mbcset;
3850# ifdef _LIBC
3851      const unsigned char *pin
3852	= ((const unsigned char *) re_string_get_buffer (input) + str_idx);
3853      Idx j;
3854      uint32_t nrules;
3855# endif /* _LIBC */
3856      int match_len = 0;
3857      wchar_t wc = ((cset->nranges || cset->nchar_classes || cset->nmbchars)
3858		    ? re_string_wchar_at (input, str_idx) : 0);
3859
3860      /* match with multibyte character?  */
3861      for (i = 0; i < cset->nmbchars; ++i)
3862	if (wc == cset->mbchars[i])
3863	  {
3864	    match_len = char_len;
3865	    goto check_node_accept_bytes_match;
3866	  }
3867      /* match with character_class?  */
3868      for (i = 0; i < cset->nchar_classes; ++i)
3869	{
3870	  wctype_t wt = cset->char_classes[i];
3871	  if (__iswctype (wc, wt))
3872	    {
3873	      match_len = char_len;
3874	      goto check_node_accept_bytes_match;
3875	    }
3876	}
3877
3878# ifdef _LIBC
3879      nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
3880      if (nrules != 0)
3881	{
3882	  unsigned int in_collseq = 0;
3883	  const int32_t *table, *indirect;
3884	  const unsigned char *weights, *extra;
3885	  const char *collseqwc;
3886	  int32_t idx;
3887	  /* This #include defines a local function!  */
3888#  include <locale/weight.h>
3889
3890	  /* match with collating_symbol?  */
3891	  if (cset->ncoll_syms)
3892	    extra = (const unsigned char *)
3893	      _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
3894	  for (i = 0; i < cset->ncoll_syms; ++i)
3895	    {
3896	      const unsigned char *coll_sym = extra + cset->coll_syms[i];
3897	      /* Compare the length of input collating element and
3898		 the length of current collating element.  */
3899	      if (*coll_sym != elem_len)
3900		continue;
3901	      /* Compare each bytes.  */
3902	      for (j = 0; j < *coll_sym; j++)
3903		if (pin[j] != coll_sym[1 + j])
3904		  break;
3905	      if (j == *coll_sym)
3906		{
3907		  /* Match if every bytes is equal.  */
3908		  match_len = j;
3909		  goto check_node_accept_bytes_match;
3910		}
3911	    }
3912
3913	  if (cset->nranges)
3914	    {
3915	      if (elem_len <= char_len)
3916		{
3917		  collseqwc = _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQWC);
3918		  in_collseq = __collseq_table_lookup (collseqwc, wc);
3919		}
3920	      else
3921		in_collseq = find_collation_sequence_value (pin, elem_len);
3922	    }
3923	  /* match with range expression?  */
3924	  for (i = 0; i < cset->nranges; ++i)
3925	    if (cset->range_starts[i] <= in_collseq
3926		&& in_collseq <= cset->range_ends[i])
3927	      {
3928		match_len = elem_len;
3929		goto check_node_accept_bytes_match;
3930	      }
3931
3932	  /* match with equivalence_class?  */
3933	  if (cset->nequiv_classes)
3934	    {
3935	      const unsigned char *cp = pin;
3936	      table = (const int32_t *)
3937		_NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
3938	      weights = (const unsigned char *)
3939		_NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
3940	      extra = (const unsigned char *)
3941		_NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
3942	      indirect = (const int32_t *)
3943		_NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
3944	      idx = findidx (&cp);
3945	      if (idx > 0)
3946		for (i = 0; i < cset->nequiv_classes; ++i)
3947		  {
3948		    int32_t equiv_class_idx = cset->equiv_classes[i];
3949		    size_t weight_len = weights[idx];
3950		    if (weight_len == weights[equiv_class_idx])
3951		      {
3952			Idx cnt = 0;
3953			while (cnt <= weight_len
3954			       && (weights[equiv_class_idx + 1 + cnt]
3955				   == weights[idx + 1 + cnt]))
3956			  ++cnt;
3957			if (cnt > weight_len)
3958			  {
3959			    match_len = elem_len;
3960			    goto check_node_accept_bytes_match;
3961			  }
3962		      }
3963		  }
3964	    }
3965	}
3966      else
3967# endif /* _LIBC */
3968	{
3969	  /* match with range expression?  */
3970#if __GNUC__ >= 2 && ! (__STDC_VERSION__ < 199901L && __STRICT_ANSI__)
3971	  wchar_t cmp_buf[] = {L'\0', L'\0', wc, L'\0', L'\0', L'\0'};
3972#else
3973	  wchar_t cmp_buf[] = {L'\0', L'\0', L'\0', L'\0', L'\0', L'\0'};
3974	  cmp_buf[2] = wc;
3975#endif
3976	  for (i = 0; i < cset->nranges; ++i)
3977	    {
3978	      cmp_buf[0] = cset->range_starts[i];
3979	      cmp_buf[4] = cset->range_ends[i];
3980	      if (wcscoll (cmp_buf, cmp_buf + 2) <= 0
3981		  && wcscoll (cmp_buf + 2, cmp_buf + 4) <= 0)
3982		{
3983		  match_len = char_len;
3984		  goto check_node_accept_bytes_match;
3985		}
3986	    }
3987	}
3988    check_node_accept_bytes_match:
3989      if (!cset->non_match)
3990	return match_len;
3991      else
3992	{
3993	  if (match_len > 0)
3994	    return 0;
3995	  else
3996	    return (elem_len > char_len) ? elem_len : char_len;
3997	}
3998    }
3999  return 0;
4000}
4001
4002# ifdef _LIBC
4003static unsigned int
4004internal_function
4005find_collation_sequence_value (const unsigned char *mbs, size_t mbs_len)
4006{
4007  uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
4008  if (nrules == 0)
4009    {
4010      if (mbs_len == 1)
4011	{
4012	  /* No valid character.  Match it as a single byte character.  */
4013	  const unsigned char *collseq = (const unsigned char *)
4014	    _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQMB);
4015	  return collseq[mbs[0]];
4016	}
4017      return UINT_MAX;
4018    }
4019  else
4020    {
4021      int32_t idx;
4022      const unsigned char *extra = (const unsigned char *)
4023	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
4024      int32_t extrasize = (const unsigned char *)
4025	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB + 1) - extra;
4026
4027      for (idx = 0; idx < extrasize;)
4028	{
4029	  int mbs_cnt;
4030	  bool found = false;
4031	  int32_t elem_mbs_len;
4032	  /* Skip the name of collating element name.  */
4033	  idx = idx + extra[idx] + 1;
4034	  elem_mbs_len = extra[idx++];
4035	  if (mbs_len == elem_mbs_len)
4036	    {
4037	      for (mbs_cnt = 0; mbs_cnt < elem_mbs_len; ++mbs_cnt)
4038		if (extra[idx + mbs_cnt] != mbs[mbs_cnt])
4039		  break;
4040	      if (mbs_cnt == elem_mbs_len)
4041		/* Found the entry.  */
4042		found = true;
4043	    }
4044	  /* Skip the byte sequence of the collating element.  */
4045	  idx += elem_mbs_len;
4046	  /* Adjust for the alignment.  */
4047	  idx = (idx + 3) & ~3;
4048	  /* Skip the collation sequence value.  */
4049	  idx += sizeof (uint32_t);
4050	  /* Skip the wide char sequence of the collating element.  */
4051	  idx = idx + sizeof (uint32_t) * (extra[idx] + 1);
4052	  /* If we found the entry, return the sequence value.  */
4053	  if (found)
4054	    return *(uint32_t *) (extra + idx);
4055	  /* Skip the collation sequence value.  */
4056	  idx += sizeof (uint32_t);
4057	}
4058      return UINT_MAX;
4059    }
4060}
4061# endif /* _LIBC */
4062#endif /* RE_ENABLE_I18N */
4063
4064/* Check whether the node accepts the byte which is IDX-th
4065   byte of the INPUT.  */
4066
4067static bool
4068internal_function
4069check_node_accept (const re_match_context_t *mctx, const re_token_t *node,
4070		   Idx idx)
4071{
4072  unsigned char ch;
4073  ch = re_string_byte_at (&mctx->input, idx);
4074  switch (node->type)
4075    {
4076    case CHARACTER:
4077      if (node->opr.c != ch)
4078        return false;
4079      break;
4080
4081    case SIMPLE_BRACKET:
4082      if (!bitset_contain (node->opr.sbcset, ch))
4083        return false;
4084      break;
4085
4086#ifdef RE_ENABLE_I18N
4087    case OP_UTF8_PERIOD:
4088      if (ch >= ASCII_CHARS)
4089        return false;
4090      /* FALLTHROUGH */
4091#endif
4092    case OP_PERIOD:
4093      if ((ch == '\n' && !(mctx->dfa->syntax & RE_DOT_NEWLINE))
4094	  || (ch == '\0' && (mctx->dfa->syntax & RE_DOT_NOT_NULL)))
4095	return false;
4096      break;
4097
4098    default:
4099      return false;
4100    }
4101
4102  if (node->constraint)
4103    {
4104      /* The node has constraints.  Check whether the current context
4105	 satisfies the constraints.  */
4106      unsigned int context = re_string_context_at (&mctx->input, idx,
4107						   mctx->eflags);
4108      if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context))
4109	return false;
4110    }
4111
4112  return true;
4113}
4114
4115/* Extend the buffers, if the buffers have run out.  */
4116
4117static reg_errcode_t
4118internal_function
4119extend_buffers (re_match_context_t *mctx)
4120{
4121  reg_errcode_t ret;
4122  re_string_t *pstr = &mctx->input;
4123
4124  /* Avoid overflow.  */
4125  if (BE (SIZE_MAX / 2 / sizeof (re_dfastate_t *) <= pstr->bufs_len, 0))
4126    return REG_ESPACE;
4127
4128  /* Double the lengthes of the buffers.  */
4129  ret = re_string_realloc_buffers (pstr, pstr->bufs_len * 2);
4130  if (BE (ret != REG_NOERROR, 0))
4131    return ret;
4132
4133  if (mctx->state_log != NULL)
4134    {
4135      /* And double the length of state_log.  */
4136      /* XXX We have no indication of the size of this buffer.  If this
4137	 allocation fail we have no indication that the state_log array
4138	 does not have the right size.  */
4139      re_dfastate_t **new_array = re_realloc (mctx->state_log, re_dfastate_t *,
4140					      pstr->bufs_len + 1);
4141      if (BE (new_array == NULL, 0))
4142	return REG_ESPACE;
4143      mctx->state_log = new_array;
4144    }
4145
4146  /* Then reconstruct the buffers.  */
4147  if (pstr->icase)
4148    {
4149#ifdef RE_ENABLE_I18N
4150      if (pstr->mb_cur_max > 1)
4151	{
4152	  ret = build_wcs_upper_buffer (pstr);
4153	  if (BE (ret != REG_NOERROR, 0))
4154	    return ret;
4155	}
4156      else
4157#endif /* RE_ENABLE_I18N  */
4158	build_upper_buffer (pstr);
4159    }
4160  else
4161    {
4162#ifdef RE_ENABLE_I18N
4163      if (pstr->mb_cur_max > 1)
4164	build_wcs_buffer (pstr);
4165      else
4166#endif /* RE_ENABLE_I18N  */
4167	{
4168	  if (pstr->trans != NULL)
4169	    re_string_translate_buffer (pstr);
4170	}
4171    }
4172  return REG_NOERROR;
4173}
4174
4175
4176/* Functions for matching context.  */
4177
4178/* Initialize MCTX.  */
4179
4180static reg_errcode_t
4181internal_function
4182match_ctx_init (re_match_context_t *mctx, int eflags, Idx n)
4183{
4184  mctx->eflags = eflags;
4185  mctx->match_last = REG_MISSING;
4186  if (n > 0)
4187    {
4188      /* Avoid overflow.  */
4189      size_t max_object_size =
4190	MAX (sizeof (struct re_backref_cache_entry),
4191	     sizeof (re_sub_match_top_t *));
4192      if (BE (SIZE_MAX / max_object_size < n, 0))
4193	return REG_ESPACE;
4194
4195      mctx->bkref_ents = re_malloc (struct re_backref_cache_entry, n);
4196      mctx->sub_tops = re_malloc (re_sub_match_top_t *, n);
4197      if (BE (mctx->bkref_ents == NULL || mctx->sub_tops == NULL, 0))
4198	return REG_ESPACE;
4199    }
4200  /* Already zero-ed by the caller.
4201     else
4202       mctx->bkref_ents = NULL;
4203     mctx->nbkref_ents = 0;
4204     mctx->nsub_tops = 0;  */
4205  mctx->abkref_ents = n;
4206  mctx->max_mb_elem_len = 1;
4207  mctx->asub_tops = n;
4208  return REG_NOERROR;
4209}
4210
4211/* Clean the entries which depend on the current input in MCTX.
4212   This function must be invoked when the matcher changes the start index
4213   of the input, or changes the input string.  */
4214
4215static void
4216internal_function
4217match_ctx_clean (re_match_context_t *mctx)
4218{
4219  Idx st_idx;
4220  for (st_idx = 0; st_idx < mctx->nsub_tops; ++st_idx)
4221    {
4222      Idx sl_idx;
4223      re_sub_match_top_t *top = mctx->sub_tops[st_idx];
4224      for (sl_idx = 0; sl_idx < top->nlasts; ++sl_idx)
4225	{
4226	  re_sub_match_last_t *last = top->lasts[sl_idx];
4227	  re_free (last->path.array);
4228	  re_free (last);
4229	}
4230      re_free (top->lasts);
4231      if (top->path)
4232	{
4233	  re_free (top->path->array);
4234	  re_free (top->path);
4235	}
4236      free (top);
4237    }
4238
4239  mctx->nsub_tops = 0;
4240  mctx->nbkref_ents = 0;
4241}
4242
4243/* Free all the memory associated with MCTX.  */
4244
4245static void
4246internal_function
4247match_ctx_free (re_match_context_t *mctx)
4248{
4249  /* First, free all the memory associated with MCTX->SUB_TOPS.  */
4250  match_ctx_clean (mctx);
4251  re_free (mctx->sub_tops);
4252  re_free (mctx->bkref_ents);
4253}
4254
4255/* Add a new backreference entry to MCTX.
4256   Note that we assume that caller never call this function with duplicate
4257   entry, and call with STR_IDX which isn't smaller than any existing entry.
4258*/
4259
4260static reg_errcode_t
4261internal_function
4262match_ctx_add_entry (re_match_context_t *mctx, Idx node, Idx str_idx, Idx from,
4263		     Idx to)
4264{
4265  if (mctx->nbkref_ents >= mctx->abkref_ents)
4266    {
4267      struct re_backref_cache_entry* new_entry;
4268      new_entry = re_realloc (mctx->bkref_ents, struct re_backref_cache_entry,
4269			      mctx->abkref_ents * 2);
4270      if (BE (new_entry == NULL, 0))
4271	{
4272	  re_free (mctx->bkref_ents);
4273	  return REG_ESPACE;
4274	}
4275      mctx->bkref_ents = new_entry;
4276      memset (mctx->bkref_ents + mctx->nbkref_ents, '\0',
4277	      sizeof (struct re_backref_cache_entry) * mctx->abkref_ents);
4278      mctx->abkref_ents *= 2;
4279    }
4280  if (mctx->nbkref_ents > 0
4281      && mctx->bkref_ents[mctx->nbkref_ents - 1].str_idx == str_idx)
4282    mctx->bkref_ents[mctx->nbkref_ents - 1].more = 1;
4283
4284  mctx->bkref_ents[mctx->nbkref_ents].node = node;
4285  mctx->bkref_ents[mctx->nbkref_ents].str_idx = str_idx;
4286  mctx->bkref_ents[mctx->nbkref_ents].subexp_from = from;
4287  mctx->bkref_ents[mctx->nbkref_ents].subexp_to = to;
4288
4289  /* This is a cache that saves negative results of check_dst_limits_calc_pos.
4290     If bit N is clear, means that this entry won't epsilon-transition to
4291     an OP_OPEN_SUBEXP or OP_CLOSE_SUBEXP for the N+1-th subexpression.  If
4292     it is set, check_dst_limits_calc_pos_1 will recurse and try to find one
4293     such node.
4294
4295     A backreference does not epsilon-transition unless it is empty, so set
4296     to all zeros if FROM != TO.  */
4297  mctx->bkref_ents[mctx->nbkref_ents].eps_reachable_subexps_map
4298    = (from == to ? -1 : 0);
4299
4300  mctx->bkref_ents[mctx->nbkref_ents++].more = 0;
4301  if (mctx->max_mb_elem_len < to - from)
4302    mctx->max_mb_elem_len = to - from;
4303  return REG_NOERROR;
4304}
4305
4306/* Return the first entry with the same str_idx, or REG_MISSING if none is
4307   found.  Note that MCTX->BKREF_ENTS is already sorted by MCTX->STR_IDX.  */
4308
4309static Idx
4310internal_function
4311search_cur_bkref_entry (const re_match_context_t *mctx, Idx str_idx)
4312{
4313  Idx left, right, mid, last;
4314  last = right = mctx->nbkref_ents;
4315  for (left = 0; left < right;)
4316    {
4317      mid = (left + right) / 2;
4318      if (mctx->bkref_ents[mid].str_idx < str_idx)
4319	left = mid + 1;
4320      else
4321	right = mid;
4322    }
4323  if (left < last && mctx->bkref_ents[left].str_idx == str_idx)
4324    return left;
4325  else
4326    return REG_MISSING;
4327}
4328
4329/* Register the node NODE, whose type is OP_OPEN_SUBEXP, and which matches
4330   at STR_IDX.  */
4331
4332static reg_errcode_t
4333internal_function
4334match_ctx_add_subtop (re_match_context_t *mctx, Idx node, Idx str_idx)
4335{
4336#ifdef DEBUG
4337  assert (mctx->sub_tops != NULL);
4338  assert (mctx->asub_tops > 0);
4339#endif
4340  if (BE (mctx->nsub_tops == mctx->asub_tops, 0))
4341    {
4342      Idx new_asub_tops = mctx->asub_tops * 2;
4343      re_sub_match_top_t **new_array = re_realloc (mctx->sub_tops,
4344						   re_sub_match_top_t *,
4345						   new_asub_tops);
4346      if (BE (new_array == NULL, 0))
4347	return REG_ESPACE;
4348      mctx->sub_tops = new_array;
4349      mctx->asub_tops = new_asub_tops;
4350    }
4351  mctx->sub_tops[mctx->nsub_tops] = calloc (1, sizeof (re_sub_match_top_t));
4352  if (BE (mctx->sub_tops[mctx->nsub_tops] == NULL, 0))
4353    return REG_ESPACE;
4354  mctx->sub_tops[mctx->nsub_tops]->node = node;
4355  mctx->sub_tops[mctx->nsub_tops++]->str_idx = str_idx;
4356  return REG_NOERROR;
4357}
4358
4359/* Register the node NODE, whose type is OP_CLOSE_SUBEXP, and which matches
4360   at STR_IDX, whose corresponding OP_OPEN_SUBEXP is SUB_TOP.  */
4361
4362static re_sub_match_last_t *
4363internal_function
4364match_ctx_add_sublast (re_sub_match_top_t *subtop, Idx node, Idx str_idx)
4365{
4366  re_sub_match_last_t *new_entry;
4367  if (BE (subtop->nlasts == subtop->alasts, 0))
4368    {
4369      Idx new_alasts = 2 * subtop->alasts + 1;
4370      re_sub_match_last_t **new_array = re_realloc (subtop->lasts,
4371						    re_sub_match_last_t *,
4372						    new_alasts);
4373      if (BE (new_array == NULL, 0))
4374	return NULL;
4375      subtop->lasts = new_array;
4376      subtop->alasts = new_alasts;
4377    }
4378  new_entry = calloc (1, sizeof (re_sub_match_last_t));
4379  if (BE (new_entry != NULL, 1))
4380    {
4381      subtop->lasts[subtop->nlasts] = new_entry;
4382      new_entry->node = node;
4383      new_entry->str_idx = str_idx;
4384      ++subtop->nlasts;
4385    }
4386  return new_entry;
4387}
4388
4389static void
4390internal_function
4391sift_ctx_init (re_sift_context_t *sctx, re_dfastate_t **sifted_sts,
4392	       re_dfastate_t **limited_sts, Idx last_node, Idx last_str_idx)
4393{
4394  sctx->sifted_states = sifted_sts;
4395  sctx->limited_states = limited_sts;
4396  sctx->last_node = last_node;
4397  sctx->last_str_idx = last_str_idx;
4398  re_node_set_init_empty (&sctx->limits);
4399}
4400