1/* Extended regular expression matching and search library.
2   Copyright (C) 2002, 2003, 2004, 2005, 2007, 2009 Free Software Foundation, Inc.
3   This file is part of the GNU C Library.
4   Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
5
6   The GNU C Library is free software; you can redistribute it and/or
7   modify it under the terms of the GNU Lesser General Public
8   License as published by the Free Software Foundation; either
9   version 2.1 of the License, or (at your option) any later version.
10
11   The GNU C Library is distributed in the hope that it will be useful,
12   but WITHOUT ANY WARRANTY; without even the implied warranty of
13   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14   Lesser General Public License for more details.
15
16   You should have received a copy of the GNU Lesser General Public
17   License along with the GNU C Library; if not, write to the Free
18   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
19   02111-1307 USA.  */
20
21static reg_errcode_t match_ctx_init (re_match_context_t *cache, int eflags,
22				     int n) internal_function;
23static void match_ctx_clean (re_match_context_t *mctx) internal_function;
24static void match_ctx_free (re_match_context_t *cache) internal_function;
25static reg_errcode_t match_ctx_add_entry (re_match_context_t *cache, int node,
26					  int str_idx, int from, int to)
27     internal_function;
28static int search_cur_bkref_entry (const re_match_context_t *mctx, int str_idx)
29     internal_function;
30static reg_errcode_t match_ctx_add_subtop (re_match_context_t *mctx, int node,
31					   int str_idx) internal_function;
32static re_sub_match_last_t * match_ctx_add_sublast (re_sub_match_top_t *subtop,
33						   int node, int str_idx)
34     internal_function;
35static void sift_ctx_init (re_sift_context_t *sctx, re_dfastate_t **sifted_sts,
36			   re_dfastate_t **limited_sts, int last_node,
37			   int last_str_idx)
38     internal_function;
39static reg_errcode_t re_search_internal (const regex_t *preg,
40					 const char *string, int length,
41					 int start, int range, int stop,
42					 size_t nmatch, regmatch_t pmatch[],
43					 int eflags) internal_function;
44static int re_search_2_stub (struct re_pattern_buffer *bufp,
45			     const char *string1, int length1,
46			     const char *string2, int length2,
47			     int start, int range, struct re_registers *regs,
48			     int stop, int ret_len) internal_function;
49static int re_search_stub (struct re_pattern_buffer *bufp,
50			   const char *string, int length, int start,
51			   int range, int stop, struct re_registers *regs,
52			   int ret_len) internal_function;
53static unsigned re_copy_regs (struct re_registers *regs, regmatch_t *pmatch,
54			      int nregs, int regs_allocated) internal_function;
55static reg_errcode_t prune_impossible_nodes (re_match_context_t *mctx)
56     internal_function;
57static int check_matching (re_match_context_t *mctx, int fl_longest_match,
58			   int *p_match_first) internal_function;
59static int check_halt_state_context (const re_match_context_t *mctx,
60				     const re_dfastate_t *state, int idx)
61     internal_function;
62static void update_regs (const re_dfa_t *dfa, regmatch_t *pmatch,
63			 regmatch_t *prev_idx_match, int cur_node,
64			 int cur_idx, int nmatch) internal_function;
65static reg_errcode_t push_fail_stack (struct re_fail_stack_t *fs,
66				      int str_idx, int dest_node, int nregs,
67				      regmatch_t *regs,
68				      re_node_set *eps_via_nodes)
69     internal_function;
70static reg_errcode_t set_regs (const regex_t *preg,
71			       const re_match_context_t *mctx,
72			       size_t nmatch, regmatch_t *pmatch,
73			       int fl_backtrack) internal_function;
74static reg_errcode_t free_fail_stack_return (struct re_fail_stack_t *fs)
75     internal_function;
76
77#ifdef RE_ENABLE_I18N
78static int sift_states_iter_mb (const re_match_context_t *mctx,
79				re_sift_context_t *sctx,
80				int node_idx, int str_idx, int max_str_idx)
81     internal_function;
82#endif /* RE_ENABLE_I18N */
83static reg_errcode_t sift_states_backward (const re_match_context_t *mctx,
84					   re_sift_context_t *sctx)
85     internal_function;
86static reg_errcode_t build_sifted_states (const re_match_context_t *mctx,
87					  re_sift_context_t *sctx, int str_idx,
88					  re_node_set *cur_dest)
89     internal_function;
90static reg_errcode_t update_cur_sifted_state (const re_match_context_t *mctx,
91					      re_sift_context_t *sctx,
92					      int str_idx,
93					      re_node_set *dest_nodes)
94     internal_function;
95static reg_errcode_t add_epsilon_src_nodes (const re_dfa_t *dfa,
96					    re_node_set *dest_nodes,
97					    const re_node_set *candidates)
98     internal_function;
99static int check_dst_limits (const re_match_context_t *mctx,
100			     re_node_set *limits,
101			     int dst_node, int dst_idx, int src_node,
102			     int src_idx) internal_function;
103static int check_dst_limits_calc_pos_1 (const re_match_context_t *mctx,
104					int boundaries, int subexp_idx,
105					int from_node, int bkref_idx)
106     internal_function;
107static int check_dst_limits_calc_pos (const re_match_context_t *mctx,
108				      int limit, int subexp_idx,
109				      int node, int str_idx,
110				      int bkref_idx) internal_function;
111static reg_errcode_t check_subexp_limits (const re_dfa_t *dfa,
112					  re_node_set *dest_nodes,
113					  const re_node_set *candidates,
114					  re_node_set *limits,
115					  struct re_backref_cache_entry *bkref_ents,
116					  int str_idx) internal_function;
117static reg_errcode_t sift_states_bkref (const re_match_context_t *mctx,
118					re_sift_context_t *sctx,
119					int str_idx, const re_node_set *candidates)
120     internal_function;
121static reg_errcode_t merge_state_array (const re_dfa_t *dfa,
122					re_dfastate_t **dst,
123					re_dfastate_t **src, int num)
124     internal_function;
125static re_dfastate_t *find_recover_state (reg_errcode_t *err,
126					 re_match_context_t *mctx) internal_function;
127static re_dfastate_t *transit_state (reg_errcode_t *err,
128				     re_match_context_t *mctx,
129				     re_dfastate_t *state) internal_function;
130static re_dfastate_t *merge_state_with_log (reg_errcode_t *err,
131					    re_match_context_t *mctx,
132					    re_dfastate_t *next_state)
133     internal_function;
134static reg_errcode_t check_subexp_matching_top (re_match_context_t *mctx,
135						re_node_set *cur_nodes,
136						int str_idx) internal_function;
137#if 0
138static re_dfastate_t *transit_state_sb (reg_errcode_t *err,
139					re_match_context_t *mctx,
140					re_dfastate_t *pstate)
141     internal_function;
142#endif
143#ifdef RE_ENABLE_I18N
144static reg_errcode_t transit_state_mb (re_match_context_t *mctx,
145				       re_dfastate_t *pstate)
146     internal_function;
147#endif /* RE_ENABLE_I18N */
148static reg_errcode_t transit_state_bkref (re_match_context_t *mctx,
149					  const re_node_set *nodes)
150     internal_function;
151static reg_errcode_t get_subexp (re_match_context_t *mctx,
152				 int bkref_node, int bkref_str_idx)
153     internal_function;
154static reg_errcode_t get_subexp_sub (re_match_context_t *mctx,
155				     const re_sub_match_top_t *sub_top,
156				     re_sub_match_last_t *sub_last,
157				     int bkref_node, int bkref_str)
158     internal_function;
159static int find_subexp_node (const re_dfa_t *dfa, const re_node_set *nodes,
160			     int subexp_idx, int type) internal_function;
161static reg_errcode_t check_arrival (re_match_context_t *mctx,
162				    state_array_t *path, int top_node,
163				    int top_str, int last_node, int last_str,
164				    int type) internal_function;
165static reg_errcode_t check_arrival_add_next_nodes (re_match_context_t *mctx,
166						   int str_idx,
167						   re_node_set *cur_nodes,
168						   re_node_set *next_nodes)
169     internal_function;
170static reg_errcode_t check_arrival_expand_ecl (const re_dfa_t *dfa,
171					       re_node_set *cur_nodes,
172					       int ex_subexp, int type)
173     internal_function;
174static reg_errcode_t check_arrival_expand_ecl_sub (const re_dfa_t *dfa,
175						   re_node_set *dst_nodes,
176						   int target, int ex_subexp,
177						   int type) internal_function;
178static reg_errcode_t expand_bkref_cache (re_match_context_t *mctx,
179					 re_node_set *cur_nodes, int cur_str,
180					 int subexp_num, int type)
181     internal_function;
182static int build_trtable (const re_dfa_t *dfa,
183			  re_dfastate_t *state) internal_function;
184#ifdef RE_ENABLE_I18N
185static int check_node_accept_bytes (const re_dfa_t *dfa, int node_idx,
186				    const re_string_t *input, int idx)
187     internal_function;
188# ifdef _LIBC
189static unsigned int find_collation_sequence_value (const unsigned char *mbs,
190						   size_t name_len)
191     internal_function;
192# endif /* _LIBC */
193#endif /* RE_ENABLE_I18N */
194static int group_nodes_into_DFAstates (const re_dfa_t *dfa,
195				       const re_dfastate_t *state,
196				       re_node_set *states_node,
197				       bitset_t *states_ch) internal_function;
198static int check_node_accept (const re_match_context_t *mctx,
199			      const re_token_t *node, int idx)
200     internal_function;
201static reg_errcode_t extend_buffers (re_match_context_t *mctx)
202     internal_function;
203
204/* Entry point for POSIX code.  */
205
206/* regexec searches for a given pattern, specified by PREG, in the
207   string STRING.
208
209   If NMATCH is zero or REG_NOSUB was set in the cflags argument to
210   `regcomp', we ignore PMATCH.  Otherwise, we assume PMATCH has at
211   least NMATCH elements, and we set them to the offsets of the
212   corresponding matched substrings.
213
214   EFLAGS specifies `execution flags' which affect matching: if
215   REG_NOTBOL is set, then ^ does not match at the beginning of the
216   string; if REG_NOTEOL is set, then $ does not match at the end.
217
218   We return 0 if we find a match and REG_NOMATCH if not.  */
219
220int
221regexec (preg, string, nmatch, pmatch, eflags)
222    const regex_t *__restrict preg;
223    const char *__restrict string;
224    size_t nmatch;
225    regmatch_t pmatch[];
226    int eflags;
227{
228  reg_errcode_t err;
229  int start, length;
230  re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
231
232  if (eflags & ~(REG_NOTBOL | REG_NOTEOL | REG_STARTEND))
233    return REG_BADPAT;
234
235  if (eflags & REG_STARTEND)
236    {
237      start = pmatch[0].rm_so;
238      length = pmatch[0].rm_eo;
239    }
240  else
241    {
242      start = 0;
243      length = strlen (string);
244    }
245
246  __libc_lock_lock (dfa->lock);
247  if (preg->no_sub)
248    err = re_search_internal (preg, string, length, start, length - start,
249			      length, 0, NULL, eflags);
250  else
251    err = re_search_internal (preg, string, length, start, length - start,
252			      length, nmatch, pmatch, eflags);
253  __libc_lock_unlock (dfa->lock);
254  return err != REG_NOERROR;
255}
256
257#ifdef _LIBC
258# include <shlib-compat.h>
259versioned_symbol (libc, __regexec, regexec, GLIBC_2_3_4);
260
261# if SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_3_4)
262__typeof__ (__regexec) __compat_regexec;
263
264int
265attribute_compat_text_section
266__compat_regexec (const regex_t *__restrict preg,
267		  const char *__restrict string, size_t nmatch,
268		  regmatch_t pmatch[], int eflags)
269{
270  return regexec (preg, string, nmatch, pmatch,
271		  eflags & (REG_NOTBOL | REG_NOTEOL));
272}
273compat_symbol (libc, __compat_regexec, regexec, GLIBC_2_0);
274# endif
275#endif
276
277/* Entry points for GNU code.  */
278
279/* re_match, re_search, re_match_2, re_search_2
280
281   The former two functions operate on STRING with length LENGTH,
282   while the later two operate on concatenation of STRING1 and STRING2
283   with lengths LENGTH1 and LENGTH2, respectively.
284
285   re_match() matches the compiled pattern in BUFP against the string,
286   starting at index START.
287
288   re_search() first tries matching at index START, then it tries to match
289   starting from index START + 1, and so on.  The last start position tried
290   is START + RANGE.  (Thus RANGE = 0 forces re_search to operate the same
291   way as re_match().)
292
293   The parameter STOP of re_{match,search}_2 specifies that no match exceeding
294   the first STOP characters of the concatenation of the strings should be
295   concerned.
296
297   If REGS is not NULL, and BUFP->no_sub is not set, the offsets of the match
298   and all groups is stroed in REGS.  (For the "_2" variants, the offsets are
299   computed relative to the concatenation, not relative to the individual
300   strings.)
301
302   On success, re_match* functions return the length of the match, re_search*
303   return the position of the start of the match.  Return value -1 means no
304   match was found and -2 indicates an internal error.  */
305
306int
307re_match (bufp, string, length, start, regs)
308    struct re_pattern_buffer *bufp;
309    const char *string;
310    int length, start;
311    struct re_registers *regs;
312{
313  return re_search_stub (bufp, string, length, start, 0, length, regs, 1);
314}
315#ifdef _LIBC
316weak_alias (__re_match, re_match)
317#endif
318
319int
320re_search (bufp, string, length, start, range, regs)
321    struct re_pattern_buffer *bufp;
322    const char *string;
323    int length, start, range;
324    struct re_registers *regs;
325{
326  return re_search_stub (bufp, string, length, start, range, length, regs, 0);
327}
328#ifdef _LIBC
329weak_alias (__re_search, re_search)
330#endif
331
332int
333re_match_2 (bufp, string1, length1, string2, length2, start, regs, stop)
334    struct re_pattern_buffer *bufp;
335    const char *string1, *string2;
336    int length1, length2, start, stop;
337    struct re_registers *regs;
338{
339  return re_search_2_stub (bufp, string1, length1, string2, length2,
340			   start, 0, regs, stop, 1);
341}
342#ifdef _LIBC
343weak_alias (__re_match_2, re_match_2)
344#endif
345
346int
347re_search_2 (bufp, string1, length1, string2, length2, start, range, regs, stop)
348    struct re_pattern_buffer *bufp;
349    const char *string1, *string2;
350    int length1, length2, start, range, stop;
351    struct re_registers *regs;
352{
353  return re_search_2_stub (bufp, string1, length1, string2, length2,
354			   start, range, regs, stop, 0);
355}
356#ifdef _LIBC
357weak_alias (__re_search_2, re_search_2)
358#endif
359
360static int
361re_search_2_stub (bufp, string1, length1, string2, length2, start, range, regs,
362		  stop, ret_len)
363    struct re_pattern_buffer *bufp;
364    const char *string1, *string2;
365    int length1, length2, start, range, stop, ret_len;
366    struct re_registers *regs;
367{
368  const char *str;
369  int rval;
370  int len = length1 + length2;
371  int free_str = 0;
372
373  if (BE (length1 < 0 || length2 < 0 || stop < 0, 0))
374    return -2;
375
376  /* Concatenate the strings.  */
377  if (length2 > 0)
378    if (length1 > 0)
379      {
380	char *s = re_malloc (char, len);
381
382	if (BE (s == NULL, 0))
383	  return -2;
384#ifdef _LIBC
385	memcpy (__mempcpy (s, string1, length1), string2, length2);
386#else
387	memcpy (s, string1, length1);
388	memcpy (s + length1, string2, length2);
389#endif
390	str = s;
391	free_str = 1;
392      }
393    else
394      str = string2;
395  else
396    str = string1;
397
398  rval = re_search_stub (bufp, str, len, start, range, stop, regs,
399			 ret_len);
400  if (free_str)
401    re_free ((char *) str);
402  return rval;
403}
404
405/* The parameters have the same meaning as those of re_search.
406   Additional parameters:
407   If RET_LEN is nonzero the length of the match is returned (re_match style);
408   otherwise the position of the match is returned.  */
409
410static int
411re_search_stub (bufp, string, length, start, range, stop, regs, ret_len)
412    struct re_pattern_buffer *bufp;
413    const char *string;
414    int length, start, range, stop, ret_len;
415    struct re_registers *regs;
416{
417  reg_errcode_t result;
418  regmatch_t *pmatch;
419  int nregs, rval;
420  int eflags = 0;
421  re_dfa_t *dfa = (re_dfa_t *) bufp->buffer;
422
423  /* Check for out-of-range.  */
424  if (BE (start < 0 || start > length, 0))
425    return -1;
426  if (BE (start + range > length, 0))
427    range = length - start;
428  else if (BE (start + range < 0, 0))
429    range = -start;
430
431  __libc_lock_lock (dfa->lock);
432
433  eflags |= (bufp->not_bol) ? REG_NOTBOL : 0;
434  eflags |= (bufp->not_eol) ? REG_NOTEOL : 0;
435
436  /* Compile fastmap if we haven't yet.  */
437  if (range > 0 && bufp->fastmap != NULL && !bufp->fastmap_accurate)
438    re_compile_fastmap (bufp);
439
440  if (BE (bufp->no_sub, 0))
441    regs = NULL;
442
443  /* We need at least 1 register.  */
444  if (regs == NULL)
445    nregs = 1;
446  else if (BE (bufp->regs_allocated == REGS_FIXED &&
447	       regs->num_regs < bufp->re_nsub + 1, 0))
448    {
449      nregs = regs->num_regs;
450      if (BE (nregs < 1, 0))
451	{
452	  /* Nothing can be copied to regs.  */
453	  regs = NULL;
454	  nregs = 1;
455	}
456    }
457  else
458    nregs = bufp->re_nsub + 1;
459  pmatch = re_malloc (regmatch_t, nregs);
460  if (BE (pmatch == NULL, 0))
461    {
462      rval = -2;
463      goto out;
464    }
465
466  result = re_search_internal (bufp, string, length, start, range, stop,
467			       nregs, pmatch, eflags);
468
469  rval = 0;
470
471  /* I hope we needn't fill ther regs with -1's when no match was found.  */
472  if (result != REG_NOERROR)
473    rval = -1;
474  else if (regs != NULL)
475    {
476      /* If caller wants register contents data back, copy them.  */
477      bufp->regs_allocated = re_copy_regs (regs, pmatch, nregs,
478					   bufp->regs_allocated);
479      if (BE (bufp->regs_allocated == REGS_UNALLOCATED, 0))
480	rval = -2;
481    }
482
483  if (BE (rval == 0, 1))
484    {
485      if (ret_len)
486	{
487	  assert (pmatch[0].rm_so == start);
488	  rval = pmatch[0].rm_eo - start;
489	}
490      else
491	rval = pmatch[0].rm_so;
492    }
493  re_free (pmatch);
494 out:
495  __libc_lock_unlock (dfa->lock);
496  return rval;
497}
498
499static unsigned
500re_copy_regs (regs, pmatch, nregs, regs_allocated)
501    struct re_registers *regs;
502    regmatch_t *pmatch;
503    int nregs, regs_allocated;
504{
505  int rval = REGS_REALLOCATE;
506  int i;
507  int need_regs = nregs + 1;
508  /* We need one extra element beyond `num_regs' for the `-1' marker GNU code
509     uses.  */
510
511  /* Have the register data arrays been allocated?  */
512  if (regs_allocated == REGS_UNALLOCATED)
513    { /* No.  So allocate them with malloc.  */
514      regs->start = re_malloc (regoff_t, need_regs);
515      regs->end = re_malloc (regoff_t, need_regs);
516      if (BE (regs->start == NULL, 0) || BE (regs->end == NULL, 0))
517	return REGS_UNALLOCATED;
518      regs->num_regs = need_regs;
519    }
520  else if (regs_allocated == REGS_REALLOCATE)
521    { /* Yes.  If we need more elements than were already
522	 allocated, reallocate them.  If we need fewer, just
523	 leave it alone.  */
524      if (BE (need_regs > regs->num_regs, 0))
525	{
526	  regoff_t *new_start = re_realloc (regs->start, regoff_t, need_regs);
527	  regoff_t *new_end = re_realloc (regs->end, regoff_t, need_regs);
528	  if (BE (new_start == NULL, 0) || BE (new_end == NULL, 0))
529	    return REGS_UNALLOCATED;
530	  regs->start = new_start;
531	  regs->end = new_end;
532	  regs->num_regs = need_regs;
533	}
534    }
535  else
536    {
537      assert (regs_allocated == REGS_FIXED);
538      /* This function may not be called with REGS_FIXED and nregs too big.  */
539      assert (regs->num_regs >= nregs);
540      rval = REGS_FIXED;
541    }
542
543  /* Copy the regs.  */
544  for (i = 0; i < nregs; ++i)
545    {
546      regs->start[i] = pmatch[i].rm_so;
547      regs->end[i] = pmatch[i].rm_eo;
548    }
549  for ( ; i < regs->num_regs; ++i)
550    regs->start[i] = regs->end[i] = -1;
551
552  return rval;
553}
554
555/* Set REGS to hold NUM_REGS registers, storing them in STARTS and
556   ENDS.  Subsequent matches using PATTERN_BUFFER and REGS will use
557   this memory for recording register information.  STARTS and ENDS
558   must be allocated using the malloc library routine, and must each
559   be at least NUM_REGS * sizeof (regoff_t) bytes long.
560
561   If NUM_REGS == 0, then subsequent matches should allocate their own
562   register data.
563
564   Unless this function is called, the first search or match using
565   PATTERN_BUFFER will allocate its own register data, without
566   freeing the old data.  */
567
568void
569re_set_registers (bufp, regs, num_regs, starts, ends)
570    struct re_pattern_buffer *bufp;
571    struct re_registers *regs;
572    unsigned num_regs;
573    regoff_t *starts, *ends;
574{
575  if (num_regs)
576    {
577      bufp->regs_allocated = REGS_REALLOCATE;
578      regs->num_regs = num_regs;
579      regs->start = starts;
580      regs->end = ends;
581    }
582  else
583    {
584      bufp->regs_allocated = REGS_UNALLOCATED;
585      regs->num_regs = 0;
586      regs->start = regs->end = (regoff_t *) 0;
587    }
588}
589#ifdef _LIBC
590weak_alias (__re_set_registers, re_set_registers)
591#endif
592
593/* Entry points compatible with 4.2 BSD regex library.  We don't define
594   them unless specifically requested.  */
595
596#if defined _REGEX_RE_COMP || defined _LIBC
597int
598# ifdef _LIBC
599weak_function
600# endif
601re_exec (s)
602     const char *s;
603{
604  return 0 == regexec (&re_comp_buf, s, 0, NULL, 0);
605}
606#endif /* _REGEX_RE_COMP */
607
608/* Internal entry point.  */
609
610/* Searches for a compiled pattern PREG in the string STRING, whose
611   length is LENGTH.  NMATCH, PMATCH, and EFLAGS have the same
612   mingings with regexec.  START, and RANGE have the same meanings
613   with re_search.
614   Return REG_NOERROR if we find a match, and REG_NOMATCH if not,
615   otherwise return the error code.
616   Note: We assume front end functions already check ranges.
617   (START + RANGE >= 0 && START + RANGE <= LENGTH)  */
618
619static reg_errcode_t
620re_search_internal (preg, string, length, start, range, stop, nmatch, pmatch,
621		    eflags)
622    const regex_t *preg;
623    const char *string;
624    int length, start, range, stop, eflags;
625    size_t nmatch;
626    regmatch_t pmatch[];
627{
628  reg_errcode_t err;
629  const re_dfa_t *dfa = (const re_dfa_t *) preg->buffer;
630  int left_lim, right_lim, incr;
631  int fl_longest_match, match_first, match_kind, match_last = -1;
632  int extra_nmatch;
633  int sb, ch;
634#if defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L)
635  re_match_context_t mctx = { .dfa = dfa };
636#else
637  re_match_context_t mctx;
638#endif
639  char *fastmap = (preg->fastmap != NULL && preg->fastmap_accurate
640		   && range && !preg->can_be_null) ? preg->fastmap : NULL;
641  RE_TRANSLATE_TYPE t = preg->translate;
642
643#if !(defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L))
644  memset (&mctx, '\0', sizeof (re_match_context_t));
645  mctx.dfa = dfa;
646#endif
647
648  extra_nmatch = (nmatch > preg->re_nsub) ? nmatch - (preg->re_nsub + 1) : 0;
649  nmatch -= extra_nmatch;
650
651  /* Check if the DFA haven't been compiled.  */
652  if (BE (preg->used == 0 || dfa->init_state == NULL
653	  || dfa->init_state_word == NULL || dfa->init_state_nl == NULL
654	  || dfa->init_state_begbuf == NULL, 0))
655    return REG_NOMATCH;
656
657#ifdef DEBUG
658  /* We assume front-end functions already check them.  */
659  assert (start + range >= 0 && start + range <= length);
660#endif
661
662  /* If initial states with non-begbuf contexts have no elements,
663     the regex must be anchored.  If preg->newline_anchor is set,
664     we'll never use init_state_nl, so do not check it.  */
665  if (dfa->init_state->nodes.nelem == 0
666      && dfa->init_state_word->nodes.nelem == 0
667      && (dfa->init_state_nl->nodes.nelem == 0
668	  || !preg->newline_anchor))
669    {
670      if (start != 0 && start + range != 0)
671        return REG_NOMATCH;
672      start = range = 0;
673    }
674
675  /* We must check the longest matching, if nmatch > 0.  */
676  fl_longest_match = (nmatch != 0 || dfa->nbackref);
677
678  err = re_string_allocate (&mctx.input, string, length, dfa->nodes_len + 1,
679			    preg->translate, preg->syntax & RE_ICASE, dfa);
680  if (BE (err != REG_NOERROR, 0))
681    goto free_return;
682  mctx.input.stop = stop;
683  mctx.input.raw_stop = stop;
684  mctx.input.newline_anchor = preg->newline_anchor;
685
686  err = match_ctx_init (&mctx, eflags, dfa->nbackref * 2);
687  if (BE (err != REG_NOERROR, 0))
688    goto free_return;
689
690  /* We will log all the DFA states through which the dfa pass,
691     if nmatch > 1, or this dfa has "multibyte node", which is a
692     back-reference or a node which can accept multibyte character or
693     multi character collating element.  */
694  if (nmatch > 1 || dfa->has_mb_node)
695    {
696      mctx.state_log = re_malloc (re_dfastate_t *, mctx.input.bufs_len + 1);
697      if (BE (mctx.state_log == NULL, 0))
698	{
699	  err = REG_ESPACE;
700	  goto free_return;
701	}
702    }
703  else
704    mctx.state_log = NULL;
705
706  match_first = start;
707  mctx.input.tip_context = (eflags & REG_NOTBOL) ? CONTEXT_BEGBUF
708			   : CONTEXT_NEWLINE | CONTEXT_BEGBUF;
709
710  /* Check incrementally whether of not the input string match.  */
711  incr = (range < 0) ? -1 : 1;
712  left_lim = (range < 0) ? start + range : start;
713  right_lim = (range < 0) ? start : start + range;
714  sb = dfa->mb_cur_max == 1;
715  match_kind =
716    (fastmap
717     ? ((sb || !(preg->syntax & RE_ICASE || t) ? 4 : 0)
718	| (range >= 0 ? 2 : 0)
719	| (t != NULL ? 1 : 0))
720     : 8);
721
722  for (;; match_first += incr)
723    {
724      err = REG_NOMATCH;
725      if (match_first < left_lim || right_lim < match_first)
726	goto free_return;
727
728      /* Advance as rapidly as possible through the string, until we
729	 find a plausible place to start matching.  This may be done
730	 with varying efficiency, so there are various possibilities:
731	 only the most common of them are specialized, in order to
732	 save on code size.  We use a switch statement for speed.  */
733      switch (match_kind)
734	{
735	case 8:
736	  /* No fastmap.  */
737	  break;
738
739	case 7:
740	  /* Fastmap with single-byte translation, match forward.  */
741	  while (BE (match_first < right_lim, 1)
742		 && !fastmap[t[(unsigned char) string[match_first]]])
743	    ++match_first;
744	  goto forward_match_found_start_or_reached_end;
745
746	case 6:
747	  /* Fastmap without translation, match forward.  */
748	  while (BE (match_first < right_lim, 1)
749		 && !fastmap[(unsigned char) string[match_first]])
750	    ++match_first;
751
752	forward_match_found_start_or_reached_end:
753	  if (BE (match_first == right_lim, 0))
754	    {
755	      ch = match_first >= length
756		       ? 0 : (unsigned char) string[match_first];
757	      if (!fastmap[t ? t[ch] : ch])
758		goto free_return;
759	    }
760	  break;
761
762	case 4:
763	case 5:
764	  /* Fastmap without multi-byte translation, match backwards.  */
765	  while (match_first >= left_lim)
766	    {
767	      ch = match_first >= length
768		       ? 0 : (unsigned char) string[match_first];
769	      if (fastmap[t ? t[ch] : ch])
770		break;
771	      --match_first;
772	    }
773	  if (match_first < left_lim)
774	    goto free_return;
775	  break;
776
777	default:
778	  /* In this case, we can't determine easily the current byte,
779	     since it might be a component byte of a multibyte
780	     character.  Then we use the constructed buffer instead.  */
781	  for (;;)
782	    {
783	      /* If MATCH_FIRST is out of the valid range, reconstruct the
784		 buffers.  */
785	      unsigned int offset = match_first - mctx.input.raw_mbs_idx;
786	      if (BE (offset >= (unsigned int) mctx.input.valid_raw_len, 0))
787		{
788		  err = re_string_reconstruct (&mctx.input, match_first,
789					       eflags);
790		  if (BE (err != REG_NOERROR, 0))
791		    goto free_return;
792
793		  offset = match_first - mctx.input.raw_mbs_idx;
794		}
795	      /* If MATCH_FIRST is out of the buffer, leave it as '\0'.
796		 Note that MATCH_FIRST must not be smaller than 0.  */
797	      ch = (match_first >= length
798		    ? 0 : re_string_byte_at (&mctx.input, offset));
799	      if (fastmap[ch])
800		break;
801	      match_first += incr;
802	      if (match_first < left_lim || match_first > right_lim)
803	        {
804	          err = REG_NOMATCH;
805	          goto free_return;
806	        }
807	    }
808	  break;
809	}
810
811      /* Reconstruct the buffers so that the matcher can assume that
812	 the matching starts from the beginning of the buffer.  */
813      err = re_string_reconstruct (&mctx.input, match_first, eflags);
814      if (BE (err != REG_NOERROR, 0))
815	goto free_return;
816
817#ifdef RE_ENABLE_I18N
818     /* Don't consider this char as a possible match start if it part,
819	yet isn't the head, of a multibyte character.  */
820      if (!sb && !re_string_first_byte (&mctx.input, 0))
821	continue;
822#endif
823
824      /* It seems to be appropriate one, then use the matcher.  */
825      /* We assume that the matching starts from 0.  */
826      mctx.state_log_top = mctx.nbkref_ents = mctx.max_mb_elem_len = 0;
827      match_last = check_matching (&mctx, fl_longest_match,
828				   range >= 0 ? &match_first : NULL);
829      if (match_last != -1)
830	{
831	  if (BE (match_last == -2, 0))
832	    {
833	      err = REG_ESPACE;
834	      goto free_return;
835	    }
836	  else
837	    {
838	      mctx.match_last = match_last;
839	      if ((!preg->no_sub && nmatch > 1) || dfa->nbackref)
840		{
841		  re_dfastate_t *pstate = mctx.state_log[match_last];
842		  mctx.last_node = check_halt_state_context (&mctx, pstate,
843							     match_last);
844		}
845	      if ((!preg->no_sub && nmatch > 1 && dfa->has_plural_match)
846		  || dfa->nbackref)
847		{
848		  err = prune_impossible_nodes (&mctx);
849		  if (err == REG_NOERROR)
850		    break;
851		  if (BE (err != REG_NOMATCH, 0))
852		    goto free_return;
853		  match_last = -1;
854		}
855	      else
856		break; /* We found a match.  */
857	    }
858	}
859
860      match_ctx_clean (&mctx);
861    }
862
863#ifdef DEBUG
864  assert (match_last != -1);
865  assert (err == REG_NOERROR);
866#endif
867
868  /* Set pmatch[] if we need.  */
869  if (nmatch > 0)
870    {
871      int reg_idx;
872
873      /* Initialize registers.  */
874      for (reg_idx = 1; reg_idx < nmatch; ++reg_idx)
875	pmatch[reg_idx].rm_so = pmatch[reg_idx].rm_eo = -1;
876
877      /* Set the points where matching start/end.  */
878      pmatch[0].rm_so = 0;
879      pmatch[0].rm_eo = mctx.match_last;
880
881      if (!preg->no_sub && nmatch > 1)
882	{
883	  err = set_regs (preg, &mctx, nmatch, pmatch,
884			  dfa->has_plural_match && dfa->nbackref > 0);
885	  if (BE (err != REG_NOERROR, 0))
886	    goto free_return;
887	}
888
889      /* At last, add the offset to the each registers, since we slided
890	 the buffers so that we could assume that the matching starts
891	 from 0.  */
892      for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
893	if (pmatch[reg_idx].rm_so != -1)
894	  {
895#ifdef RE_ENABLE_I18N
896	    if (BE (mctx.input.offsets_needed != 0, 0))
897	      {
898		pmatch[reg_idx].rm_so =
899		  (pmatch[reg_idx].rm_so == mctx.input.valid_len
900		   ? mctx.input.valid_raw_len
901		   : mctx.input.offsets[pmatch[reg_idx].rm_so]);
902		pmatch[reg_idx].rm_eo =
903		  (pmatch[reg_idx].rm_eo == mctx.input.valid_len
904		   ? mctx.input.valid_raw_len
905		   : mctx.input.offsets[pmatch[reg_idx].rm_eo]);
906	      }
907#else
908	    assert (mctx.input.offsets_needed == 0);
909#endif
910	    pmatch[reg_idx].rm_so += match_first;
911	    pmatch[reg_idx].rm_eo += match_first;
912	  }
913      for (reg_idx = 0; reg_idx < extra_nmatch; ++reg_idx)
914	{
915	  pmatch[nmatch + reg_idx].rm_so = -1;
916	  pmatch[nmatch + reg_idx].rm_eo = -1;
917	}
918
919      if (dfa->subexp_map)
920        for (reg_idx = 0; reg_idx + 1 < nmatch; reg_idx++)
921          if (dfa->subexp_map[reg_idx] != reg_idx)
922            {
923              pmatch[reg_idx + 1].rm_so
924                = pmatch[dfa->subexp_map[reg_idx] + 1].rm_so;
925              pmatch[reg_idx + 1].rm_eo
926                = pmatch[dfa->subexp_map[reg_idx] + 1].rm_eo;
927            }
928    }
929
930 free_return:
931  re_free (mctx.state_log);
932  if (dfa->nbackref)
933    match_ctx_free (&mctx);
934  re_string_destruct (&mctx.input);
935  return err;
936}
937
938static reg_errcode_t
939prune_impossible_nodes (mctx)
940     re_match_context_t *mctx;
941{
942  const re_dfa_t *const dfa = mctx->dfa;
943  int halt_node, match_last;
944  reg_errcode_t ret;
945  re_dfastate_t **sifted_states;
946  re_dfastate_t **lim_states = NULL;
947  re_sift_context_t sctx;
948#ifdef DEBUG
949  assert (mctx->state_log != NULL);
950#endif
951  match_last = mctx->match_last;
952  halt_node = mctx->last_node;
953  sifted_states = re_malloc (re_dfastate_t *, match_last + 1);
954  if (BE (sifted_states == NULL, 0))
955    {
956      ret = REG_ESPACE;
957      goto free_return;
958    }
959  if (dfa->nbackref)
960    {
961      lim_states = re_malloc (re_dfastate_t *, match_last + 1);
962      if (BE (lim_states == NULL, 0))
963	{
964	  ret = REG_ESPACE;
965	  goto free_return;
966	}
967      while (1)
968	{
969	  memset (lim_states, '\0',
970		  sizeof (re_dfastate_t *) * (match_last + 1));
971	  sift_ctx_init (&sctx, sifted_states, lim_states, halt_node,
972			 match_last);
973	  ret = sift_states_backward (mctx, &sctx);
974	  re_node_set_free (&sctx.limits);
975	  if (BE (ret != REG_NOERROR, 0))
976	      goto free_return;
977	  if (sifted_states[0] != NULL || lim_states[0] != NULL)
978	    break;
979	  do
980	    {
981	      --match_last;
982	      if (match_last < 0)
983		{
984		  ret = REG_NOMATCH;
985		  goto free_return;
986		}
987	    } while (mctx->state_log[match_last] == NULL
988		     || !mctx->state_log[match_last]->halt);
989	  halt_node = check_halt_state_context (mctx,
990						mctx->state_log[match_last],
991						match_last);
992	}
993      ret = merge_state_array (dfa, sifted_states, lim_states,
994			       match_last + 1);
995      re_free (lim_states);
996      lim_states = NULL;
997      if (BE (ret != REG_NOERROR, 0))
998	goto free_return;
999    }
1000  else
1001    {
1002      sift_ctx_init (&sctx, sifted_states, lim_states, halt_node, match_last);
1003      ret = sift_states_backward (mctx, &sctx);
1004      re_node_set_free (&sctx.limits);
1005      if (BE (ret != REG_NOERROR, 0))
1006	goto free_return;
1007      if (sifted_states[0] == NULL)
1008	{
1009	  ret = REG_NOMATCH;
1010	  goto free_return;
1011	}
1012    }
1013  re_free (mctx->state_log);
1014  mctx->state_log = sifted_states;
1015  sifted_states = NULL;
1016  mctx->last_node = halt_node;
1017  mctx->match_last = match_last;
1018  ret = REG_NOERROR;
1019 free_return:
1020  re_free (sifted_states);
1021  re_free (lim_states);
1022  return ret;
1023}
1024
1025/* Acquire an initial state and return it.
1026   We must select appropriate initial state depending on the context,
1027   since initial states may have constraints like "\<", "^", etc..  */
1028
1029static inline re_dfastate_t *
1030__attribute ((always_inline)) internal_function
1031acquire_init_state_context (reg_errcode_t *err, const re_match_context_t *mctx,
1032			    int idx)
1033{
1034  const re_dfa_t *const dfa = mctx->dfa;
1035  if (dfa->init_state->has_constraint)
1036    {
1037      unsigned int context;
1038      context = re_string_context_at (&mctx->input, idx - 1, mctx->eflags);
1039      if (IS_WORD_CONTEXT (context))
1040	return dfa->init_state_word;
1041      else if (IS_ORDINARY_CONTEXT (context))
1042	return dfa->init_state;
1043      else if (IS_BEGBUF_CONTEXT (context) && IS_NEWLINE_CONTEXT (context))
1044	return dfa->init_state_begbuf;
1045      else if (IS_NEWLINE_CONTEXT (context))
1046	return dfa->init_state_nl;
1047      else if (IS_BEGBUF_CONTEXT (context))
1048	{
1049	  /* It is relatively rare case, then calculate on demand.  */
1050	  return re_acquire_state_context (err, dfa,
1051					   dfa->init_state->entrance_nodes,
1052					   context);
1053	}
1054      else
1055	/* Must not happen?  */
1056	return dfa->init_state;
1057    }
1058  else
1059    return dfa->init_state;
1060}
1061
1062/* Check whether the regular expression match input string INPUT or not,
1063   and return the index where the matching end, return -1 if not match,
1064   or return -2 in case of an error.
1065   FL_LONGEST_MATCH means we want the POSIX longest matching.
1066   If P_MATCH_FIRST is not NULL, and the match fails, it is set to the
1067   next place where we may want to try matching.
1068   Note that the matcher assume that the maching starts from the current
1069   index of the buffer.  */
1070
1071static int
1072internal_function
1073check_matching (re_match_context_t *mctx, int fl_longest_match,
1074		int *p_match_first)
1075{
1076  const re_dfa_t *const dfa = mctx->dfa;
1077  reg_errcode_t err;
1078  int match = 0;
1079  int match_last = -1;
1080  int cur_str_idx = re_string_cur_idx (&mctx->input);
1081  re_dfastate_t *cur_state;
1082  int at_init_state = p_match_first != NULL;
1083  int next_start_idx = cur_str_idx;
1084
1085  err = REG_NOERROR;
1086  cur_state = acquire_init_state_context (&err, mctx, cur_str_idx);
1087  /* An initial state must not be NULL (invalid).  */
1088  if (BE (cur_state == NULL, 0))
1089    {
1090      assert (err == REG_ESPACE);
1091      return -2;
1092    }
1093
1094  if (mctx->state_log != NULL)
1095    {
1096      mctx->state_log[cur_str_idx] = cur_state;
1097
1098      /* Check OP_OPEN_SUBEXP in the initial state in case that we use them
1099	 later.  E.g. Processing back references.  */
1100      if (BE (dfa->nbackref, 0))
1101	{
1102	  at_init_state = 0;
1103	  err = check_subexp_matching_top (mctx, &cur_state->nodes, 0);
1104	  if (BE (err != REG_NOERROR, 0))
1105	    return err;
1106
1107	  if (cur_state->has_backref)
1108	    {
1109	      err = transit_state_bkref (mctx, &cur_state->nodes);
1110	      if (BE (err != REG_NOERROR, 0))
1111	        return err;
1112	    }
1113	}
1114    }
1115
1116  /* If the RE accepts NULL string.  */
1117  if (BE (cur_state->halt, 0))
1118    {
1119      if (!cur_state->has_constraint
1120	  || check_halt_state_context (mctx, cur_state, cur_str_idx))
1121	{
1122	  if (!fl_longest_match)
1123	    return cur_str_idx;
1124	  else
1125	    {
1126	      match_last = cur_str_idx;
1127	      match = 1;
1128	    }
1129	}
1130    }
1131
1132  while (!re_string_eoi (&mctx->input))
1133    {
1134      re_dfastate_t *old_state = cur_state;
1135      int next_char_idx = re_string_cur_idx (&mctx->input) + 1;
1136
1137      if (BE (next_char_idx >= mctx->input.bufs_len, 0)
1138          || (BE (next_char_idx >= mctx->input.valid_len, 0)
1139              && mctx->input.valid_len < mctx->input.len))
1140        {
1141          err = extend_buffers (mctx);
1142          if (BE (err != REG_NOERROR, 0))
1143	    {
1144	      assert (err == REG_ESPACE);
1145	      return -2;
1146	    }
1147        }
1148
1149      cur_state = transit_state (&err, mctx, cur_state);
1150      if (mctx->state_log != NULL)
1151	cur_state = merge_state_with_log (&err, mctx, cur_state);
1152
1153      if (cur_state == NULL)
1154	{
1155	  /* Reached the invalid state or an error.  Try to recover a valid
1156	     state using the state log, if available and if we have not
1157	     already found a valid (even if not the longest) match.  */
1158	  if (BE (err != REG_NOERROR, 0))
1159	    return -2;
1160
1161	  if (mctx->state_log == NULL
1162	      || (match && !fl_longest_match)
1163	      || (cur_state = find_recover_state (&err, mctx)) == NULL)
1164	    break;
1165	}
1166
1167      if (BE (at_init_state, 0))
1168	{
1169	  if (old_state == cur_state)
1170	    next_start_idx = next_char_idx;
1171	  else
1172	    at_init_state = 0;
1173	}
1174
1175      if (cur_state->halt)
1176	{
1177	  /* Reached a halt state.
1178	     Check the halt state can satisfy the current context.  */
1179	  if (!cur_state->has_constraint
1180	      || check_halt_state_context (mctx, cur_state,
1181					   re_string_cur_idx (&mctx->input)))
1182	    {
1183	      /* We found an appropriate halt state.  */
1184	      match_last = re_string_cur_idx (&mctx->input);
1185	      match = 1;
1186
1187	      /* We found a match, do not modify match_first below.  */
1188	      p_match_first = NULL;
1189	      if (!fl_longest_match)
1190		break;
1191	    }
1192	}
1193    }
1194
1195  if (p_match_first)
1196    *p_match_first += next_start_idx;
1197
1198  return match_last;
1199}
1200
1201/* Check NODE match the current context.  */
1202
1203static int
1204internal_function
1205check_halt_node_context (const re_dfa_t *dfa, int node, unsigned int context)
1206{
1207  re_token_type_t type = dfa->nodes[node].type;
1208  unsigned int constraint = dfa->nodes[node].constraint;
1209  if (type != END_OF_RE)
1210    return 0;
1211  if (!constraint)
1212    return 1;
1213  if (NOT_SATISFY_NEXT_CONSTRAINT (constraint, context))
1214    return 0;
1215  return 1;
1216}
1217
1218/* Check the halt state STATE match the current context.
1219   Return 0 if not match, if the node, STATE has, is a halt node and
1220   match the context, return the node.  */
1221
1222static int
1223internal_function
1224check_halt_state_context (const re_match_context_t *mctx,
1225			  const re_dfastate_t *state, int idx)
1226{
1227  int i;
1228  unsigned int context;
1229#ifdef DEBUG
1230  assert (state->halt);
1231#endif
1232  context = re_string_context_at (&mctx->input, idx, mctx->eflags);
1233  for (i = 0; i < state->nodes.nelem; ++i)
1234    if (check_halt_node_context (mctx->dfa, state->nodes.elems[i], context))
1235      return state->nodes.elems[i];
1236  return 0;
1237}
1238
1239/* Compute the next node to which "NFA" transit from NODE("NFA" is a NFA
1240   corresponding to the DFA).
1241   Return the destination node, and update EPS_VIA_NODES, return -1 in case
1242   of errors.  */
1243
1244static int
1245internal_function
1246proceed_next_node (const re_match_context_t *mctx, int nregs, regmatch_t *regs,
1247		   int *pidx, int node, re_node_set *eps_via_nodes,
1248		   struct re_fail_stack_t *fs)
1249{
1250  const re_dfa_t *const dfa = mctx->dfa;
1251  int i, err;
1252  if (IS_EPSILON_NODE (dfa->nodes[node].type))
1253    {
1254      re_node_set *cur_nodes = &mctx->state_log[*pidx]->nodes;
1255      re_node_set *edests = &dfa->edests[node];
1256      int dest_node;
1257      err = re_node_set_insert (eps_via_nodes, node);
1258      if (BE (err < 0, 0))
1259	return -2;
1260      /* Pick up a valid destination, or return -1 if none is found.  */
1261      for (dest_node = -1, i = 0; i < edests->nelem; ++i)
1262	{
1263	  int candidate = edests->elems[i];
1264	  if (!re_node_set_contains (cur_nodes, candidate))
1265	    continue;
1266          if (dest_node == -1)
1267	    dest_node = candidate;
1268
1269          else
1270	    {
1271	      /* In order to avoid infinite loop like "(a*)*", return the second
1272	         epsilon-transition if the first was already considered.  */
1273	      if (re_node_set_contains (eps_via_nodes, dest_node))
1274	        return candidate;
1275
1276	      /* Otherwise, push the second epsilon-transition on the fail stack.  */
1277	      else if (fs != NULL
1278		       && push_fail_stack (fs, *pidx, candidate, nregs, regs,
1279				           eps_via_nodes))
1280		return -2;
1281
1282	      /* We know we are going to exit.  */
1283	      break;
1284	    }
1285	}
1286      return dest_node;
1287    }
1288  else
1289    {
1290      int naccepted = 0;
1291      re_token_type_t type = dfa->nodes[node].type;
1292
1293#ifdef RE_ENABLE_I18N
1294      if (dfa->nodes[node].accept_mb)
1295	naccepted = check_node_accept_bytes (dfa, node, &mctx->input, *pidx);
1296      else
1297#endif /* RE_ENABLE_I18N */
1298      if (type == OP_BACK_REF)
1299	{
1300	  int subexp_idx = dfa->nodes[node].opr.idx + 1;
1301	  naccepted = regs[subexp_idx].rm_eo - regs[subexp_idx].rm_so;
1302	  if (fs != NULL)
1303	    {
1304	      if (regs[subexp_idx].rm_so == -1 || regs[subexp_idx].rm_eo == -1)
1305		return -1;
1306	      else if (naccepted)
1307		{
1308		  char *buf = (char *) re_string_get_buffer (&mctx->input);
1309		  if (memcmp (buf + regs[subexp_idx].rm_so, buf + *pidx,
1310			      naccepted) != 0)
1311		    return -1;
1312		}
1313	    }
1314
1315	  if (naccepted == 0)
1316	    {
1317	      int dest_node;
1318	      err = re_node_set_insert (eps_via_nodes, node);
1319	      if (BE (err < 0, 0))
1320		return -2;
1321	      dest_node = dfa->edests[node].elems[0];
1322	      if (re_node_set_contains (&mctx->state_log[*pidx]->nodes,
1323					dest_node))
1324		return dest_node;
1325	    }
1326	}
1327
1328      if (naccepted != 0
1329	  || check_node_accept (mctx, dfa->nodes + node, *pidx))
1330	{
1331	  int dest_node = dfa->nexts[node];
1332	  *pidx = (naccepted == 0) ? *pidx + 1 : *pidx + naccepted;
1333	  if (fs && (*pidx > mctx->match_last || mctx->state_log[*pidx] == NULL
1334		     || !re_node_set_contains (&mctx->state_log[*pidx]->nodes,
1335					       dest_node)))
1336	    return -1;
1337	  re_node_set_empty (eps_via_nodes);
1338	  return dest_node;
1339	}
1340    }
1341  return -1;
1342}
1343
1344static reg_errcode_t
1345internal_function
1346push_fail_stack (struct re_fail_stack_t *fs, int str_idx, int dest_node,
1347		 int nregs, regmatch_t *regs, re_node_set *eps_via_nodes)
1348{
1349  reg_errcode_t err;
1350  int num = fs->num++;
1351  if (fs->num == fs->alloc)
1352    {
1353      struct re_fail_stack_ent_t *new_array;
1354      new_array = realloc (fs->stack, (sizeof (struct re_fail_stack_ent_t)
1355				       * fs->alloc * 2));
1356      if (new_array == NULL)
1357	return REG_ESPACE;
1358      fs->alloc *= 2;
1359      fs->stack = new_array;
1360    }
1361  fs->stack[num].idx = str_idx;
1362  fs->stack[num].node = dest_node;
1363  fs->stack[num].regs = re_malloc (regmatch_t, nregs);
1364  if (fs->stack[num].regs == NULL)
1365    return REG_ESPACE;
1366  memcpy (fs->stack[num].regs, regs, sizeof (regmatch_t) * nregs);
1367  err = re_node_set_init_copy (&fs->stack[num].eps_via_nodes, eps_via_nodes);
1368  return err;
1369}
1370
1371static int
1372internal_function
1373pop_fail_stack (struct re_fail_stack_t *fs, int *pidx, int nregs,
1374		regmatch_t *regs, re_node_set *eps_via_nodes)
1375{
1376  int num = --fs->num;
1377  assert (num >= 0);
1378  *pidx = fs->stack[num].idx;
1379  memcpy (regs, fs->stack[num].regs, sizeof (regmatch_t) * nregs);
1380  re_node_set_free (eps_via_nodes);
1381  re_free (fs->stack[num].regs);
1382  *eps_via_nodes = fs->stack[num].eps_via_nodes;
1383  return fs->stack[num].node;
1384}
1385
1386/* Set the positions where the subexpressions are starts/ends to registers
1387   PMATCH.
1388   Note: We assume that pmatch[0] is already set, and
1389   pmatch[i].rm_so == pmatch[i].rm_eo == -1 for 0 < i < nmatch.  */
1390
1391static reg_errcode_t
1392internal_function
1393set_regs (const regex_t *preg, const re_match_context_t *mctx, size_t nmatch,
1394	  regmatch_t *pmatch, int fl_backtrack)
1395{
1396  const re_dfa_t *dfa = (const re_dfa_t *) preg->buffer;
1397  int idx, cur_node;
1398  re_node_set eps_via_nodes;
1399  struct re_fail_stack_t *fs;
1400  struct re_fail_stack_t fs_body = { 0, 2, NULL };
1401  regmatch_t *prev_idx_match;
1402  int prev_idx_match_malloced = 0;
1403
1404#ifdef DEBUG
1405  assert (nmatch > 1);
1406  assert (mctx->state_log != NULL);
1407#endif
1408  if (fl_backtrack)
1409    {
1410      fs = &fs_body;
1411      fs->stack = re_malloc (struct re_fail_stack_ent_t, fs->alloc);
1412      if (fs->stack == NULL)
1413	return REG_ESPACE;
1414    }
1415  else
1416    fs = NULL;
1417
1418  cur_node = dfa->init_node;
1419  re_node_set_init_empty (&eps_via_nodes);
1420
1421  if (__libc_use_alloca (nmatch * sizeof (regmatch_t)))
1422    prev_idx_match = (regmatch_t *) alloca (nmatch * sizeof (regmatch_t));
1423  else
1424    {
1425      prev_idx_match = re_malloc (regmatch_t, nmatch);
1426      if (prev_idx_match == NULL)
1427	{
1428	  free_fail_stack_return (fs);
1429	  return REG_ESPACE;
1430	}
1431      prev_idx_match_malloced = 1;
1432    }
1433  memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch);
1434
1435  for (idx = pmatch[0].rm_so; idx <= pmatch[0].rm_eo ;)
1436    {
1437      update_regs (dfa, pmatch, prev_idx_match, cur_node, idx, nmatch);
1438
1439      if (idx == pmatch[0].rm_eo && cur_node == mctx->last_node)
1440	{
1441	  int reg_idx;
1442	  if (fs)
1443	    {
1444	      for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
1445		if (pmatch[reg_idx].rm_so > -1 && pmatch[reg_idx].rm_eo == -1)
1446		  break;
1447	      if (reg_idx == nmatch)
1448		{
1449		  re_node_set_free (&eps_via_nodes);
1450		  if (prev_idx_match_malloced)
1451		    re_free (prev_idx_match);
1452		  return free_fail_stack_return (fs);
1453		}
1454	      cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,
1455					 &eps_via_nodes);
1456	    }
1457	  else
1458	    {
1459	      re_node_set_free (&eps_via_nodes);
1460	      if (prev_idx_match_malloced)
1461		re_free (prev_idx_match);
1462	      return REG_NOERROR;
1463	    }
1464	}
1465
1466      /* Proceed to next node.  */
1467      cur_node = proceed_next_node (mctx, nmatch, pmatch, &idx, cur_node,
1468				    &eps_via_nodes, fs);
1469
1470      if (BE (cur_node < 0, 0))
1471	{
1472	  if (BE (cur_node == -2, 0))
1473	    {
1474	      re_node_set_free (&eps_via_nodes);
1475	      if (prev_idx_match_malloced)
1476		re_free (prev_idx_match);
1477	      free_fail_stack_return (fs);
1478	      return REG_ESPACE;
1479	    }
1480	  if (fs)
1481	    cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,
1482				       &eps_via_nodes);
1483	  else
1484	    {
1485	      re_node_set_free (&eps_via_nodes);
1486	      if (prev_idx_match_malloced)
1487		re_free (prev_idx_match);
1488	      return REG_NOMATCH;
1489	    }
1490	}
1491    }
1492  re_node_set_free (&eps_via_nodes);
1493  if (prev_idx_match_malloced)
1494    re_free (prev_idx_match);
1495  return free_fail_stack_return (fs);
1496}
1497
1498static reg_errcode_t
1499internal_function
1500free_fail_stack_return (struct re_fail_stack_t *fs)
1501{
1502  if (fs)
1503    {
1504      int fs_idx;
1505      for (fs_idx = 0; fs_idx < fs->num; ++fs_idx)
1506	{
1507	  re_node_set_free (&fs->stack[fs_idx].eps_via_nodes);
1508	  re_free (fs->stack[fs_idx].regs);
1509	}
1510      re_free (fs->stack);
1511    }
1512  return REG_NOERROR;
1513}
1514
1515static void
1516internal_function
1517update_regs (const re_dfa_t *dfa, regmatch_t *pmatch,
1518	     regmatch_t *prev_idx_match, int cur_node, int cur_idx, int nmatch)
1519{
1520  int type = dfa->nodes[cur_node].type;
1521  if (type == OP_OPEN_SUBEXP)
1522    {
1523      int reg_num = dfa->nodes[cur_node].opr.idx + 1;
1524
1525      /* We are at the first node of this sub expression.  */
1526      if (reg_num < nmatch)
1527	{
1528	  pmatch[reg_num].rm_so = cur_idx;
1529	  pmatch[reg_num].rm_eo = -1;
1530	}
1531    }
1532  else if (type == OP_CLOSE_SUBEXP)
1533    {
1534      int reg_num = dfa->nodes[cur_node].opr.idx + 1;
1535      if (reg_num < nmatch)
1536	{
1537	  /* We are at the last node of this sub expression.  */
1538	  if (pmatch[reg_num].rm_so < cur_idx)
1539	    {
1540	      pmatch[reg_num].rm_eo = cur_idx;
1541	      /* This is a non-empty match or we are not inside an optional
1542		 subexpression.  Accept this right away.  */
1543	      memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch);
1544	    }
1545	  else
1546	    {
1547	      if (dfa->nodes[cur_node].opt_subexp
1548		  && prev_idx_match[reg_num].rm_so != -1)
1549		/* We transited through an empty match for an optional
1550		   subexpression, like (a?)*, and this is not the subexp's
1551		   first match.  Copy back the old content of the registers
1552		   so that matches of an inner subexpression are undone as
1553		   well, like in ((a?))*.  */
1554		memcpy (pmatch, prev_idx_match, sizeof (regmatch_t) * nmatch);
1555	      else
1556		/* We completed a subexpression, but it may be part of
1557		   an optional one, so do not update PREV_IDX_MATCH.  */
1558		pmatch[reg_num].rm_eo = cur_idx;
1559	    }
1560	}
1561    }
1562}
1563
1564/* This function checks the STATE_LOG from the SCTX->last_str_idx to 0
1565   and sift the nodes in each states according to the following rules.
1566   Updated state_log will be wrote to STATE_LOG.
1567
1568   Rules: We throw away the Node `a' in the STATE_LOG[STR_IDX] if...
1569     1. When STR_IDX == MATCH_LAST(the last index in the state_log):
1570	If `a' isn't the LAST_NODE and `a' can't epsilon transit to
1571	the LAST_NODE, we throw away the node `a'.
1572     2. When 0 <= STR_IDX < MATCH_LAST and `a' accepts
1573	string `s' and transit to `b':
1574	i. If 'b' isn't in the STATE_LOG[STR_IDX+strlen('s')], we throw
1575	   away the node `a'.
1576	ii. If 'b' is in the STATE_LOG[STR_IDX+strlen('s')] but 'b' is
1577	    thrown away, we throw away the node `a'.
1578     3. When 0 <= STR_IDX < MATCH_LAST and 'a' epsilon transit to 'b':
1579	i. If 'b' isn't in the STATE_LOG[STR_IDX], we throw away the
1580	   node `a'.
1581	ii. If 'b' is in the STATE_LOG[STR_IDX] but 'b' is thrown away,
1582	    we throw away the node `a'.  */
1583
1584#define STATE_NODE_CONTAINS(state,node) \
1585  ((state) != NULL && re_node_set_contains (&(state)->nodes, node))
1586
1587static reg_errcode_t
1588internal_function
1589sift_states_backward (const re_match_context_t *mctx, re_sift_context_t *sctx)
1590{
1591  reg_errcode_t err;
1592  int null_cnt = 0;
1593  int str_idx = sctx->last_str_idx;
1594  re_node_set cur_dest;
1595
1596#ifdef DEBUG
1597  assert (mctx->state_log != NULL && mctx->state_log[str_idx] != NULL);
1598#endif
1599
1600  /* Build sifted state_log[str_idx].  It has the nodes which can epsilon
1601     transit to the last_node and the last_node itself.  */
1602  err = re_node_set_init_1 (&cur_dest, sctx->last_node);
1603  if (BE (err != REG_NOERROR, 0))
1604    return err;
1605  err = update_cur_sifted_state (mctx, sctx, str_idx, &cur_dest);
1606  if (BE (err != REG_NOERROR, 0))
1607    goto free_return;
1608
1609  /* Then check each states in the state_log.  */
1610  while (str_idx > 0)
1611    {
1612      /* Update counters.  */
1613      null_cnt = (sctx->sifted_states[str_idx] == NULL) ? null_cnt + 1 : 0;
1614      if (null_cnt > mctx->max_mb_elem_len)
1615	{
1616	  memset (sctx->sifted_states, '\0',
1617		  sizeof (re_dfastate_t *) * str_idx);
1618	  re_node_set_free (&cur_dest);
1619	  return REG_NOERROR;
1620	}
1621      re_node_set_empty (&cur_dest);
1622      --str_idx;
1623
1624      if (mctx->state_log[str_idx])
1625	{
1626	  err = build_sifted_states (mctx, sctx, str_idx, &cur_dest);
1627          if (BE (err != REG_NOERROR, 0))
1628	    goto free_return;
1629	}
1630
1631      /* Add all the nodes which satisfy the following conditions:
1632	 - It can epsilon transit to a node in CUR_DEST.
1633	 - It is in CUR_SRC.
1634	 And update state_log.  */
1635      err = update_cur_sifted_state (mctx, sctx, str_idx, &cur_dest);
1636      if (BE (err != REG_NOERROR, 0))
1637	goto free_return;
1638    }
1639  err = REG_NOERROR;
1640 free_return:
1641  re_node_set_free (&cur_dest);
1642  return err;
1643}
1644
1645static reg_errcode_t
1646internal_function
1647build_sifted_states (const re_match_context_t *mctx, re_sift_context_t *sctx,
1648		     int str_idx, re_node_set *cur_dest)
1649{
1650  const re_dfa_t *const dfa = mctx->dfa;
1651  const re_node_set *cur_src = &mctx->state_log[str_idx]->non_eps_nodes;
1652  int i;
1653
1654  /* Then build the next sifted state.
1655     We build the next sifted state on `cur_dest', and update
1656     `sifted_states[str_idx]' with `cur_dest'.
1657     Note:
1658     `cur_dest' is the sifted state from `state_log[str_idx + 1]'.
1659     `cur_src' points the node_set of the old `state_log[str_idx]'
1660     (with the epsilon nodes pre-filtered out).  */
1661  for (i = 0; i < cur_src->nelem; i++)
1662    {
1663      int prev_node = cur_src->elems[i];
1664      int naccepted = 0;
1665      int ret;
1666
1667#ifdef DEBUG
1668      re_token_type_t type = dfa->nodes[prev_node].type;
1669      assert (!IS_EPSILON_NODE (type));
1670#endif
1671#ifdef RE_ENABLE_I18N
1672      /* If the node may accept `multi byte'.  */
1673      if (dfa->nodes[prev_node].accept_mb)
1674	naccepted = sift_states_iter_mb (mctx, sctx, prev_node,
1675					 str_idx, sctx->last_str_idx);
1676#endif /* RE_ENABLE_I18N */
1677
1678      /* We don't check backreferences here.
1679	 See update_cur_sifted_state().  */
1680      if (!naccepted
1681	  && check_node_accept (mctx, dfa->nodes + prev_node, str_idx)
1682	  && STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + 1],
1683				  dfa->nexts[prev_node]))
1684	naccepted = 1;
1685
1686      if (naccepted == 0)
1687	continue;
1688
1689      if (sctx->limits.nelem)
1690	{
1691	  int to_idx = str_idx + naccepted;
1692	  if (check_dst_limits (mctx, &sctx->limits,
1693				dfa->nexts[prev_node], to_idx,
1694				prev_node, str_idx))
1695	    continue;
1696	}
1697      ret = re_node_set_insert (cur_dest, prev_node);
1698      if (BE (ret == -1, 0))
1699	return REG_ESPACE;
1700    }
1701
1702  return REG_NOERROR;
1703}
1704
1705/* Helper functions.  */
1706
1707static reg_errcode_t
1708internal_function
1709clean_state_log_if_needed (re_match_context_t *mctx, int next_state_log_idx)
1710{
1711  int top = mctx->state_log_top;
1712
1713  if (next_state_log_idx >= mctx->input.bufs_len
1714      || (next_state_log_idx >= mctx->input.valid_len
1715	  && mctx->input.valid_len < mctx->input.len))
1716    {
1717      reg_errcode_t err;
1718      err = extend_buffers (mctx);
1719      if (BE (err != REG_NOERROR, 0))
1720	return err;
1721    }
1722
1723  if (top < next_state_log_idx)
1724    {
1725      memset (mctx->state_log + top + 1, '\0',
1726	      sizeof (re_dfastate_t *) * (next_state_log_idx - top));
1727      mctx->state_log_top = next_state_log_idx;
1728    }
1729  return REG_NOERROR;
1730}
1731
1732static reg_errcode_t
1733internal_function
1734merge_state_array (const re_dfa_t *dfa, re_dfastate_t **dst,
1735		   re_dfastate_t **src, int num)
1736{
1737  int st_idx;
1738  reg_errcode_t err;
1739  for (st_idx = 0; st_idx < num; ++st_idx)
1740    {
1741      if (dst[st_idx] == NULL)
1742	dst[st_idx] = src[st_idx];
1743      else if (src[st_idx] != NULL)
1744	{
1745	  re_node_set merged_set;
1746	  err = re_node_set_init_union (&merged_set, &dst[st_idx]->nodes,
1747					&src[st_idx]->nodes);
1748	  if (BE (err != REG_NOERROR, 0))
1749	    return err;
1750	  dst[st_idx] = re_acquire_state (&err, dfa, &merged_set);
1751	  re_node_set_free (&merged_set);
1752	  if (BE (err != REG_NOERROR, 0))
1753	    return err;
1754	}
1755    }
1756  return REG_NOERROR;
1757}
1758
1759static reg_errcode_t
1760internal_function
1761update_cur_sifted_state (const re_match_context_t *mctx,
1762			 re_sift_context_t *sctx, int str_idx,
1763			 re_node_set *dest_nodes)
1764{
1765  const re_dfa_t *const dfa = mctx->dfa;
1766  reg_errcode_t err = REG_NOERROR;
1767  const re_node_set *candidates;
1768  candidates = ((mctx->state_log[str_idx] == NULL) ? NULL
1769		: &mctx->state_log[str_idx]->nodes);
1770
1771  if (dest_nodes->nelem == 0)
1772    sctx->sifted_states[str_idx] = NULL;
1773  else
1774    {
1775      if (candidates)
1776	{
1777	  /* At first, add the nodes which can epsilon transit to a node in
1778	     DEST_NODE.  */
1779	  err = add_epsilon_src_nodes (dfa, dest_nodes, candidates);
1780	  if (BE (err != REG_NOERROR, 0))
1781	    return err;
1782
1783	  /* Then, check the limitations in the current sift_context.  */
1784	  if (sctx->limits.nelem)
1785	    {
1786	      err = check_subexp_limits (dfa, dest_nodes, candidates, &sctx->limits,
1787					 mctx->bkref_ents, str_idx);
1788	      if (BE (err != REG_NOERROR, 0))
1789		return err;
1790	    }
1791	}
1792
1793      sctx->sifted_states[str_idx] = re_acquire_state (&err, dfa, dest_nodes);
1794      if (BE (err != REG_NOERROR, 0))
1795	return err;
1796    }
1797
1798  if (candidates && mctx->state_log[str_idx]->has_backref)
1799    {
1800      err = sift_states_bkref (mctx, sctx, str_idx, candidates);
1801      if (BE (err != REG_NOERROR, 0))
1802	return err;
1803    }
1804  return REG_NOERROR;
1805}
1806
1807static reg_errcode_t
1808internal_function
1809add_epsilon_src_nodes (const re_dfa_t *dfa, re_node_set *dest_nodes,
1810		       const re_node_set *candidates)
1811{
1812  reg_errcode_t err = REG_NOERROR;
1813  int i;
1814
1815  re_dfastate_t *state = re_acquire_state (&err, dfa, dest_nodes);
1816  if (BE (err != REG_NOERROR, 0))
1817    return err;
1818
1819  if (!state->inveclosure.alloc)
1820    {
1821      err = re_node_set_alloc (&state->inveclosure, dest_nodes->nelem);
1822      if (BE (err != REG_NOERROR, 0))
1823        return REG_ESPACE;
1824      for (i = 0; i < dest_nodes->nelem; i++)
1825        re_node_set_merge (&state->inveclosure,
1826			   dfa->inveclosures + dest_nodes->elems[i]);
1827    }
1828  return re_node_set_add_intersect (dest_nodes, candidates,
1829				    &state->inveclosure);
1830}
1831
1832static reg_errcode_t
1833internal_function
1834sub_epsilon_src_nodes (const re_dfa_t *dfa, int node, re_node_set *dest_nodes,
1835		       const re_node_set *candidates)
1836{
1837    int ecl_idx;
1838    reg_errcode_t err;
1839    re_node_set *inv_eclosure = dfa->inveclosures + node;
1840    re_node_set except_nodes;
1841    re_node_set_init_empty (&except_nodes);
1842    for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx)
1843      {
1844	int cur_node = inv_eclosure->elems[ecl_idx];
1845	if (cur_node == node)
1846	  continue;
1847	if (IS_EPSILON_NODE (dfa->nodes[cur_node].type))
1848	  {
1849	    int edst1 = dfa->edests[cur_node].elems[0];
1850	    int edst2 = ((dfa->edests[cur_node].nelem > 1)
1851			 ? dfa->edests[cur_node].elems[1] : -1);
1852	    if ((!re_node_set_contains (inv_eclosure, edst1)
1853		 && re_node_set_contains (dest_nodes, edst1))
1854		|| (edst2 > 0
1855		    && !re_node_set_contains (inv_eclosure, edst2)
1856		    && re_node_set_contains (dest_nodes, edst2)))
1857	      {
1858		err = re_node_set_add_intersect (&except_nodes, candidates,
1859						 dfa->inveclosures + cur_node);
1860		if (BE (err != REG_NOERROR, 0))
1861		  {
1862		    re_node_set_free (&except_nodes);
1863		    return err;
1864		  }
1865	      }
1866	  }
1867      }
1868    for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx)
1869      {
1870	int cur_node = inv_eclosure->elems[ecl_idx];
1871	if (!re_node_set_contains (&except_nodes, cur_node))
1872	  {
1873	    int idx = re_node_set_contains (dest_nodes, cur_node) - 1;
1874	    re_node_set_remove_at (dest_nodes, idx);
1875	  }
1876      }
1877    re_node_set_free (&except_nodes);
1878    return REG_NOERROR;
1879}
1880
1881static int
1882internal_function
1883check_dst_limits (const re_match_context_t *mctx, re_node_set *limits,
1884		  int dst_node, int dst_idx, int src_node, int src_idx)
1885{
1886  const re_dfa_t *const dfa = mctx->dfa;
1887  int lim_idx, src_pos, dst_pos;
1888
1889  int dst_bkref_idx = search_cur_bkref_entry (mctx, dst_idx);
1890  int src_bkref_idx = search_cur_bkref_entry (mctx, src_idx);
1891  for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx)
1892    {
1893      int subexp_idx;
1894      struct re_backref_cache_entry *ent;
1895      ent = mctx->bkref_ents + limits->elems[lim_idx];
1896      subexp_idx = dfa->nodes[ent->node].opr.idx;
1897
1898      dst_pos = check_dst_limits_calc_pos (mctx, limits->elems[lim_idx],
1899					   subexp_idx, dst_node, dst_idx,
1900					   dst_bkref_idx);
1901      src_pos = check_dst_limits_calc_pos (mctx, limits->elems[lim_idx],
1902					   subexp_idx, src_node, src_idx,
1903					   src_bkref_idx);
1904
1905      /* In case of:
1906	 <src> <dst> ( <subexp> )
1907	 ( <subexp> ) <src> <dst>
1908	 ( <subexp1> <src> <subexp2> <dst> <subexp3> )  */
1909      if (src_pos == dst_pos)
1910	continue; /* This is unrelated limitation.  */
1911      else
1912	return 1;
1913    }
1914  return 0;
1915}
1916
1917static int
1918internal_function
1919check_dst_limits_calc_pos_1 (const re_match_context_t *mctx, int boundaries,
1920			     int subexp_idx, int from_node, int bkref_idx)
1921{
1922  const re_dfa_t *const dfa = mctx->dfa;
1923  const re_node_set *eclosures = dfa->eclosures + from_node;
1924  int node_idx;
1925
1926  /* Else, we are on the boundary: examine the nodes on the epsilon
1927     closure.  */
1928  for (node_idx = 0; node_idx < eclosures->nelem; ++node_idx)
1929    {
1930      int node = eclosures->elems[node_idx];
1931      switch (dfa->nodes[node].type)
1932	{
1933	case OP_BACK_REF:
1934	  if (bkref_idx != -1)
1935	    {
1936	      struct re_backref_cache_entry *ent = mctx->bkref_ents + bkref_idx;
1937	      do
1938	        {
1939		  int dst, cpos;
1940
1941		  if (ent->node != node)
1942		    continue;
1943
1944		  if (subexp_idx < BITSET_WORD_BITS
1945		      && !(ent->eps_reachable_subexps_map
1946			   & ((bitset_word_t) 1 << subexp_idx)))
1947		    continue;
1948
1949		  /* Recurse trying to reach the OP_OPEN_SUBEXP and
1950		     OP_CLOSE_SUBEXP cases below.  But, if the
1951		     destination node is the same node as the source
1952		     node, don't recurse because it would cause an
1953		     infinite loop: a regex that exhibits this behavior
1954		     is ()\1*\1*  */
1955		  dst = dfa->edests[node].elems[0];
1956		  if (dst == from_node)
1957		    {
1958		      if (boundaries & 1)
1959		        return -1;
1960		      else /* if (boundaries & 2) */
1961		        return 0;
1962		    }
1963
1964		  cpos =
1965		    check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx,
1966						 dst, bkref_idx);
1967		  if (cpos == -1 /* && (boundaries & 1) */)
1968		    return -1;
1969		  if (cpos == 0 && (boundaries & 2))
1970		    return 0;
1971
1972		  if (subexp_idx < BITSET_WORD_BITS)
1973		    ent->eps_reachable_subexps_map
1974		      &= ~((bitset_word_t) 1 << subexp_idx);
1975	        }
1976	      while (ent++->more);
1977	    }
1978	  break;
1979
1980	case OP_OPEN_SUBEXP:
1981	  if ((boundaries & 1) && subexp_idx == dfa->nodes[node].opr.idx)
1982	    return -1;
1983	  break;
1984
1985	case OP_CLOSE_SUBEXP:
1986	  if ((boundaries & 2) && subexp_idx == dfa->nodes[node].opr.idx)
1987	    return 0;
1988	  break;
1989
1990	default:
1991	    break;
1992	}
1993    }
1994
1995  return (boundaries & 2) ? 1 : 0;
1996}
1997
1998static int
1999internal_function
2000check_dst_limits_calc_pos (const re_match_context_t *mctx, int limit,
2001			   int subexp_idx, int from_node, int str_idx,
2002			   int bkref_idx)
2003{
2004  struct re_backref_cache_entry *lim = mctx->bkref_ents + limit;
2005  int boundaries;
2006
2007  /* If we are outside the range of the subexpression, return -1 or 1.  */
2008  if (str_idx < lim->subexp_from)
2009    return -1;
2010
2011  if (lim->subexp_to < str_idx)
2012    return 1;
2013
2014  /* If we are within the subexpression, return 0.  */
2015  boundaries = (str_idx == lim->subexp_from);
2016  boundaries |= (str_idx == lim->subexp_to) << 1;
2017  if (boundaries == 0)
2018    return 0;
2019
2020  /* Else, examine epsilon closure.  */
2021  return check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx,
2022				      from_node, bkref_idx);
2023}
2024
2025/* Check the limitations of sub expressions LIMITS, and remove the nodes
2026   which are against limitations from DEST_NODES. */
2027
2028static reg_errcode_t
2029internal_function
2030check_subexp_limits (const re_dfa_t *dfa, re_node_set *dest_nodes,
2031		     const re_node_set *candidates, re_node_set *limits,
2032		     struct re_backref_cache_entry *bkref_ents, int str_idx)
2033{
2034  reg_errcode_t err;
2035  int node_idx, lim_idx;
2036
2037  for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx)
2038    {
2039      int subexp_idx;
2040      struct re_backref_cache_entry *ent;
2041      ent = bkref_ents + limits->elems[lim_idx];
2042
2043      if (str_idx <= ent->subexp_from || ent->str_idx < str_idx)
2044	continue; /* This is unrelated limitation.  */
2045
2046      subexp_idx = dfa->nodes[ent->node].opr.idx;
2047      if (ent->subexp_to == str_idx)
2048	{
2049	  int ops_node = -1;
2050	  int cls_node = -1;
2051	  for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
2052	    {
2053	      int node = dest_nodes->elems[node_idx];
2054	      re_token_type_t type = dfa->nodes[node].type;
2055	      if (type == OP_OPEN_SUBEXP
2056		  && subexp_idx == dfa->nodes[node].opr.idx)
2057		ops_node = node;
2058	      else if (type == OP_CLOSE_SUBEXP
2059		       && subexp_idx == dfa->nodes[node].opr.idx)
2060		cls_node = node;
2061	    }
2062
2063	  /* Check the limitation of the open subexpression.  */
2064	  /* Note that (ent->subexp_to = str_idx != ent->subexp_from).  */
2065	  if (ops_node >= 0)
2066	    {
2067	      err = sub_epsilon_src_nodes (dfa, ops_node, dest_nodes,
2068					   candidates);
2069	      if (BE (err != REG_NOERROR, 0))
2070		return err;
2071	    }
2072
2073	  /* Check the limitation of the close subexpression.  */
2074	  if (cls_node >= 0)
2075	    for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
2076	      {
2077		int node = dest_nodes->elems[node_idx];
2078		if (!re_node_set_contains (dfa->inveclosures + node,
2079					   cls_node)
2080		    && !re_node_set_contains (dfa->eclosures + node,
2081					      cls_node))
2082		  {
2083		    /* It is against this limitation.
2084		       Remove it form the current sifted state.  */
2085		    err = sub_epsilon_src_nodes (dfa, node, dest_nodes,
2086						 candidates);
2087		    if (BE (err != REG_NOERROR, 0))
2088		      return err;
2089		    --node_idx;
2090		  }
2091	      }
2092	}
2093      else /* (ent->subexp_to != str_idx)  */
2094	{
2095	  for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
2096	    {
2097	      int node = dest_nodes->elems[node_idx];
2098	      re_token_type_t type = dfa->nodes[node].type;
2099	      if (type == OP_CLOSE_SUBEXP || type == OP_OPEN_SUBEXP)
2100		{
2101		  if (subexp_idx != dfa->nodes[node].opr.idx)
2102		    continue;
2103		  /* It is against this limitation.
2104		     Remove it form the current sifted state.  */
2105		  err = sub_epsilon_src_nodes (dfa, node, dest_nodes,
2106					       candidates);
2107		  if (BE (err != REG_NOERROR, 0))
2108		    return err;
2109		}
2110	    }
2111	}
2112    }
2113  return REG_NOERROR;
2114}
2115
2116static reg_errcode_t
2117internal_function
2118sift_states_bkref (const re_match_context_t *mctx, re_sift_context_t *sctx,
2119		   int str_idx, const re_node_set *candidates)
2120{
2121  const re_dfa_t *const dfa = mctx->dfa;
2122  reg_errcode_t err;
2123  int node_idx, node;
2124  re_sift_context_t local_sctx;
2125  int first_idx = search_cur_bkref_entry (mctx, str_idx);
2126
2127  if (first_idx == -1)
2128    return REG_NOERROR;
2129
2130  local_sctx.sifted_states = NULL; /* Mark that it hasn't been initialized.  */
2131
2132  for (node_idx = 0; node_idx < candidates->nelem; ++node_idx)
2133    {
2134      int enabled_idx;
2135      re_token_type_t type;
2136      struct re_backref_cache_entry *entry;
2137      node = candidates->elems[node_idx];
2138      type = dfa->nodes[node].type;
2139      /* Avoid infinite loop for the REs like "()\1+".  */
2140      if (node == sctx->last_node && str_idx == sctx->last_str_idx)
2141	continue;
2142      if (type != OP_BACK_REF)
2143	continue;
2144
2145      entry = mctx->bkref_ents + first_idx;
2146      enabled_idx = first_idx;
2147      do
2148	{
2149	  int subexp_len;
2150	  int to_idx;
2151	  int dst_node;
2152	  int ret;
2153	  re_dfastate_t *cur_state;
2154
2155	  if (entry->node != node)
2156	    continue;
2157	  subexp_len = entry->subexp_to - entry->subexp_from;
2158	  to_idx = str_idx + subexp_len;
2159	  dst_node = (subexp_len ? dfa->nexts[node]
2160		      : dfa->edests[node].elems[0]);
2161
2162	  if (to_idx > sctx->last_str_idx
2163	      || sctx->sifted_states[to_idx] == NULL
2164	      || !STATE_NODE_CONTAINS (sctx->sifted_states[to_idx], dst_node)
2165	      || check_dst_limits (mctx, &sctx->limits, node,
2166				   str_idx, dst_node, to_idx))
2167	    continue;
2168
2169	  if (local_sctx.sifted_states == NULL)
2170	    {
2171	      local_sctx = *sctx;
2172	      err = re_node_set_init_copy (&local_sctx.limits, &sctx->limits);
2173	      if (BE (err != REG_NOERROR, 0))
2174		goto free_return;
2175	    }
2176	  local_sctx.last_node = node;
2177	  local_sctx.last_str_idx = str_idx;
2178	  ret = re_node_set_insert (&local_sctx.limits, enabled_idx);
2179	  if (BE (ret < 0, 0))
2180	    {
2181	      err = REG_ESPACE;
2182	      goto free_return;
2183	    }
2184	  cur_state = local_sctx.sifted_states[str_idx];
2185	  err = sift_states_backward (mctx, &local_sctx);
2186	  if (BE (err != REG_NOERROR, 0))
2187	    goto free_return;
2188	  if (sctx->limited_states != NULL)
2189	    {
2190	      err = merge_state_array (dfa, sctx->limited_states,
2191				       local_sctx.sifted_states,
2192				       str_idx + 1);
2193	      if (BE (err != REG_NOERROR, 0))
2194		goto free_return;
2195	    }
2196	  local_sctx.sifted_states[str_idx] = cur_state;
2197	  re_node_set_remove (&local_sctx.limits, enabled_idx);
2198
2199	  /* mctx->bkref_ents may have changed, reload the pointer.  */
2200          entry = mctx->bkref_ents + enabled_idx;
2201	}
2202      while (enabled_idx++, entry++->more);
2203    }
2204  err = REG_NOERROR;
2205 free_return:
2206  if (local_sctx.sifted_states != NULL)
2207    {
2208      re_node_set_free (&local_sctx.limits);
2209    }
2210
2211  return err;
2212}
2213
2214
2215#ifdef RE_ENABLE_I18N
2216static int
2217internal_function
2218sift_states_iter_mb (const re_match_context_t *mctx, re_sift_context_t *sctx,
2219		     int node_idx, int str_idx, int max_str_idx)
2220{
2221  const re_dfa_t *const dfa = mctx->dfa;
2222  int naccepted;
2223  /* Check the node can accept `multi byte'.  */
2224  naccepted = check_node_accept_bytes (dfa, node_idx, &mctx->input, str_idx);
2225  if (naccepted > 0 && str_idx + naccepted <= max_str_idx &&
2226      !STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + naccepted],
2227			    dfa->nexts[node_idx]))
2228    /* The node can't accept the `multi byte', or the
2229       destination was already thrown away, then the node
2230       could't accept the current input `multi byte'.   */
2231    naccepted = 0;
2232  /* Otherwise, it is sure that the node could accept
2233     `naccepted' bytes input.  */
2234  return naccepted;
2235}
2236#endif /* RE_ENABLE_I18N */
2237
2238
2239/* Functions for state transition.  */
2240
2241/* Return the next state to which the current state STATE will transit by
2242   accepting the current input byte, and update STATE_LOG if necessary.
2243   If STATE can accept a multibyte char/collating element/back reference
2244   update the destination of STATE_LOG.  */
2245
2246static re_dfastate_t *
2247internal_function
2248transit_state (reg_errcode_t *err, re_match_context_t *mctx,
2249	       re_dfastate_t *state)
2250{
2251  re_dfastate_t **trtable;
2252  unsigned char ch;
2253
2254#ifdef RE_ENABLE_I18N
2255  /* If the current state can accept multibyte.  */
2256  if (BE (state->accept_mb, 0))
2257    {
2258      *err = transit_state_mb (mctx, state);
2259      if (BE (*err != REG_NOERROR, 0))
2260	return NULL;
2261    }
2262#endif /* RE_ENABLE_I18N */
2263
2264  /* Then decide the next state with the single byte.  */
2265#if 0
2266  if (0)
2267    /* don't use transition table  */
2268    return transit_state_sb (err, mctx, state);
2269#endif
2270
2271  /* Use transition table  */
2272  ch = re_string_fetch_byte (&mctx->input);
2273  for (;;)
2274    {
2275      trtable = state->trtable;
2276      if (BE (trtable != NULL, 1))
2277	return trtable[ch];
2278
2279      trtable = state->word_trtable;
2280      if (BE (trtable != NULL, 1))
2281        {
2282	  unsigned int context;
2283	  context
2284	    = re_string_context_at (&mctx->input,
2285				    re_string_cur_idx (&mctx->input) - 1,
2286				    mctx->eflags);
2287	  if (IS_WORD_CONTEXT (context))
2288	    return trtable[ch + SBC_MAX];
2289	  else
2290	    return trtable[ch];
2291	}
2292
2293      if (!build_trtable (mctx->dfa, state))
2294	{
2295	  *err = REG_ESPACE;
2296	  return NULL;
2297	}
2298
2299      /* Retry, we now have a transition table.  */
2300    }
2301}
2302
2303/* Update the state_log if we need */
2304re_dfastate_t *
2305internal_function
2306merge_state_with_log (reg_errcode_t *err, re_match_context_t *mctx,
2307		      re_dfastate_t *next_state)
2308{
2309  const re_dfa_t *const dfa = mctx->dfa;
2310  int cur_idx = re_string_cur_idx (&mctx->input);
2311
2312  if (cur_idx > mctx->state_log_top)
2313    {
2314      mctx->state_log[cur_idx] = next_state;
2315      mctx->state_log_top = cur_idx;
2316    }
2317  else if (mctx->state_log[cur_idx] == 0)
2318    {
2319      mctx->state_log[cur_idx] = next_state;
2320    }
2321  else
2322    {
2323      re_dfastate_t *pstate;
2324      unsigned int context;
2325      re_node_set next_nodes, *log_nodes, *table_nodes = NULL;
2326      /* If (state_log[cur_idx] != 0), it implies that cur_idx is
2327         the destination of a multibyte char/collating element/
2328         back reference.  Then the next state is the union set of
2329         these destinations and the results of the transition table.  */
2330      pstate = mctx->state_log[cur_idx];
2331      log_nodes = pstate->entrance_nodes;
2332      if (next_state != NULL)
2333        {
2334          table_nodes = next_state->entrance_nodes;
2335          *err = re_node_set_init_union (&next_nodes, table_nodes,
2336					     log_nodes);
2337          if (BE (*err != REG_NOERROR, 0))
2338	    return NULL;
2339        }
2340      else
2341        next_nodes = *log_nodes;
2342      /* Note: We already add the nodes of the initial state,
2343	 then we don't need to add them here.  */
2344
2345      context = re_string_context_at (&mctx->input,
2346				      re_string_cur_idx (&mctx->input) - 1,
2347				      mctx->eflags);
2348      next_state = mctx->state_log[cur_idx]
2349        = re_acquire_state_context (err, dfa, &next_nodes, context);
2350      /* We don't need to check errors here, since the return value of
2351         this function is next_state and ERR is already set.  */
2352
2353      if (table_nodes != NULL)
2354        re_node_set_free (&next_nodes);
2355    }
2356
2357  if (BE (dfa->nbackref, 0) && next_state != NULL)
2358    {
2359      /* Check OP_OPEN_SUBEXP in the current state in case that we use them
2360	 later.  We must check them here, since the back references in the
2361	 next state might use them.  */
2362      *err = check_subexp_matching_top (mctx, &next_state->nodes,
2363					cur_idx);
2364      if (BE (*err != REG_NOERROR, 0))
2365	return NULL;
2366
2367      /* If the next state has back references.  */
2368      if (next_state->has_backref)
2369	{
2370	  *err = transit_state_bkref (mctx, &next_state->nodes);
2371	  if (BE (*err != REG_NOERROR, 0))
2372	    return NULL;
2373	  next_state = mctx->state_log[cur_idx];
2374	}
2375    }
2376
2377  return next_state;
2378}
2379
2380/* Skip bytes in the input that correspond to part of a
2381   multi-byte match, then look in the log for a state
2382   from which to restart matching.  */
2383re_dfastate_t *
2384internal_function
2385find_recover_state (reg_errcode_t *err, re_match_context_t *mctx)
2386{
2387  re_dfastate_t *cur_state;
2388  do
2389    {
2390      int max = mctx->state_log_top;
2391      int cur_str_idx = re_string_cur_idx (&mctx->input);
2392
2393      do
2394	{
2395          if (++cur_str_idx > max)
2396            return NULL;
2397          re_string_skip_bytes (&mctx->input, 1);
2398	}
2399      while (mctx->state_log[cur_str_idx] == NULL);
2400
2401      cur_state = merge_state_with_log (err, mctx, NULL);
2402    }
2403  while (*err == REG_NOERROR && cur_state == NULL);
2404  return cur_state;
2405}
2406
2407/* Helper functions for transit_state.  */
2408
2409/* From the node set CUR_NODES, pick up the nodes whose types are
2410   OP_OPEN_SUBEXP and which have corresponding back references in the regular
2411   expression. And register them to use them later for evaluating the
2412   correspoding back references.  */
2413
2414static reg_errcode_t
2415internal_function
2416check_subexp_matching_top (re_match_context_t *mctx, re_node_set *cur_nodes,
2417			   int str_idx)
2418{
2419  const re_dfa_t *const dfa = mctx->dfa;
2420  int node_idx;
2421  reg_errcode_t err;
2422
2423  /* TODO: This isn't efficient.
2424	   Because there might be more than one nodes whose types are
2425	   OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all
2426	   nodes.
2427	   E.g. RE: (a){2}  */
2428  for (node_idx = 0; node_idx < cur_nodes->nelem; ++node_idx)
2429    {
2430      int node = cur_nodes->elems[node_idx];
2431      if (dfa->nodes[node].type == OP_OPEN_SUBEXP
2432	  && dfa->nodes[node].opr.idx < BITSET_WORD_BITS
2433	  && (dfa->used_bkref_map
2434	      & ((bitset_word_t) 1 << dfa->nodes[node].opr.idx)))
2435	{
2436	  err = match_ctx_add_subtop (mctx, node, str_idx);
2437	  if (BE (err != REG_NOERROR, 0))
2438	    return err;
2439	}
2440    }
2441  return REG_NOERROR;
2442}
2443
2444#if 0
2445/* Return the next state to which the current state STATE will transit by
2446   accepting the current input byte.  */
2447
2448static re_dfastate_t *
2449transit_state_sb (reg_errcode_t *err, re_match_context_t *mctx,
2450		  re_dfastate_t *state)
2451{
2452  const re_dfa_t *const dfa = mctx->dfa;
2453  re_node_set next_nodes;
2454  re_dfastate_t *next_state;
2455  int node_cnt, cur_str_idx = re_string_cur_idx (&mctx->input);
2456  unsigned int context;
2457
2458  *err = re_node_set_alloc (&next_nodes, state->nodes.nelem + 1);
2459  if (BE (*err != REG_NOERROR, 0))
2460    return NULL;
2461  for (node_cnt = 0; node_cnt < state->nodes.nelem; ++node_cnt)
2462    {
2463      int cur_node = state->nodes.elems[node_cnt];
2464      if (check_node_accept (mctx, dfa->nodes + cur_node, cur_str_idx))
2465	{
2466	  *err = re_node_set_merge (&next_nodes,
2467				    dfa->eclosures + dfa->nexts[cur_node]);
2468	  if (BE (*err != REG_NOERROR, 0))
2469	    {
2470	      re_node_set_free (&next_nodes);
2471	      return NULL;
2472	    }
2473	}
2474    }
2475  context = re_string_context_at (&mctx->input, cur_str_idx, mctx->eflags);
2476  next_state = re_acquire_state_context (err, dfa, &next_nodes, context);
2477  /* We don't need to check errors here, since the return value of
2478     this function is next_state and ERR is already set.  */
2479
2480  re_node_set_free (&next_nodes);
2481  re_string_skip_bytes (&mctx->input, 1);
2482  return next_state;
2483}
2484#endif
2485
2486#ifdef RE_ENABLE_I18N
2487static reg_errcode_t
2488internal_function
2489transit_state_mb (re_match_context_t *mctx, re_dfastate_t *pstate)
2490{
2491  const re_dfa_t *const dfa = mctx->dfa;
2492  reg_errcode_t err;
2493  int i;
2494
2495  for (i = 0; i < pstate->nodes.nelem; ++i)
2496    {
2497      re_node_set dest_nodes, *new_nodes;
2498      int cur_node_idx = pstate->nodes.elems[i];
2499      int naccepted, dest_idx;
2500      unsigned int context;
2501      re_dfastate_t *dest_state;
2502
2503      if (!dfa->nodes[cur_node_idx].accept_mb)
2504        continue;
2505
2506      if (dfa->nodes[cur_node_idx].constraint)
2507	{
2508	  context = re_string_context_at (&mctx->input,
2509					  re_string_cur_idx (&mctx->input),
2510					  mctx->eflags);
2511	  if (NOT_SATISFY_NEXT_CONSTRAINT (dfa->nodes[cur_node_idx].constraint,
2512					   context))
2513	    continue;
2514	}
2515
2516      /* How many bytes the node can accept?  */
2517      naccepted = check_node_accept_bytes (dfa, cur_node_idx, &mctx->input,
2518					   re_string_cur_idx (&mctx->input));
2519      if (naccepted == 0)
2520	continue;
2521
2522      /* The node can accepts `naccepted' bytes.  */
2523      dest_idx = re_string_cur_idx (&mctx->input) + naccepted;
2524      mctx->max_mb_elem_len = ((mctx->max_mb_elem_len < naccepted) ? naccepted
2525			       : mctx->max_mb_elem_len);
2526      err = clean_state_log_if_needed (mctx, dest_idx);
2527      if (BE (err != REG_NOERROR, 0))
2528	return err;
2529#ifdef DEBUG
2530      assert (dfa->nexts[cur_node_idx] != -1);
2531#endif
2532      new_nodes = dfa->eclosures + dfa->nexts[cur_node_idx];
2533
2534      dest_state = mctx->state_log[dest_idx];
2535      if (dest_state == NULL)
2536	dest_nodes = *new_nodes;
2537      else
2538	{
2539	  err = re_node_set_init_union (&dest_nodes,
2540					dest_state->entrance_nodes, new_nodes);
2541	  if (BE (err != REG_NOERROR, 0))
2542	    return err;
2543	}
2544      context = re_string_context_at (&mctx->input, dest_idx - 1,
2545				      mctx->eflags);
2546      mctx->state_log[dest_idx]
2547	= re_acquire_state_context (&err, dfa, &dest_nodes, context);
2548      if (dest_state != NULL)
2549	re_node_set_free (&dest_nodes);
2550      if (BE (mctx->state_log[dest_idx] == NULL && err != REG_NOERROR, 0))
2551	return err;
2552    }
2553  return REG_NOERROR;
2554}
2555#endif /* RE_ENABLE_I18N */
2556
2557static reg_errcode_t
2558internal_function
2559transit_state_bkref (re_match_context_t *mctx, const re_node_set *nodes)
2560{
2561  const re_dfa_t *const dfa = mctx->dfa;
2562  reg_errcode_t err;
2563  int i;
2564  int cur_str_idx = re_string_cur_idx (&mctx->input);
2565
2566  for (i = 0; i < nodes->nelem; ++i)
2567    {
2568      int dest_str_idx, prev_nelem, bkc_idx;
2569      int node_idx = nodes->elems[i];
2570      unsigned int context;
2571      const re_token_t *node = dfa->nodes + node_idx;
2572      re_node_set *new_dest_nodes;
2573
2574      /* Check whether `node' is a backreference or not.  */
2575      if (node->type != OP_BACK_REF)
2576	continue;
2577
2578      if (node->constraint)
2579	{
2580	  context = re_string_context_at (&mctx->input, cur_str_idx,
2581					  mctx->eflags);
2582	  if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context))
2583	    continue;
2584	}
2585
2586      /* `node' is a backreference.
2587	 Check the substring which the substring matched.  */
2588      bkc_idx = mctx->nbkref_ents;
2589      err = get_subexp (mctx, node_idx, cur_str_idx);
2590      if (BE (err != REG_NOERROR, 0))
2591	goto free_return;
2592
2593      /* And add the epsilon closures (which is `new_dest_nodes') of
2594	 the backreference to appropriate state_log.  */
2595#ifdef DEBUG
2596      assert (dfa->nexts[node_idx] != -1);
2597#endif
2598      for (; bkc_idx < mctx->nbkref_ents; ++bkc_idx)
2599	{
2600	  int subexp_len;
2601	  re_dfastate_t *dest_state;
2602	  struct re_backref_cache_entry *bkref_ent;
2603	  bkref_ent = mctx->bkref_ents + bkc_idx;
2604	  if (bkref_ent->node != node_idx || bkref_ent->str_idx != cur_str_idx)
2605	    continue;
2606	  subexp_len = bkref_ent->subexp_to - bkref_ent->subexp_from;
2607	  new_dest_nodes = (subexp_len == 0
2608			    ? dfa->eclosures + dfa->edests[node_idx].elems[0]
2609			    : dfa->eclosures + dfa->nexts[node_idx]);
2610	  dest_str_idx = (cur_str_idx + bkref_ent->subexp_to
2611			  - bkref_ent->subexp_from);
2612	  context = re_string_context_at (&mctx->input, dest_str_idx - 1,
2613					  mctx->eflags);
2614	  dest_state = mctx->state_log[dest_str_idx];
2615	  prev_nelem = ((mctx->state_log[cur_str_idx] == NULL) ? 0
2616			: mctx->state_log[cur_str_idx]->nodes.nelem);
2617	  /* Add `new_dest_node' to state_log.  */
2618	  if (dest_state == NULL)
2619	    {
2620	      mctx->state_log[dest_str_idx]
2621		= re_acquire_state_context (&err, dfa, new_dest_nodes,
2622					    context);
2623	      if (BE (mctx->state_log[dest_str_idx] == NULL
2624		      && err != REG_NOERROR, 0))
2625		goto free_return;
2626	    }
2627	  else
2628	    {
2629	      re_node_set dest_nodes;
2630	      err = re_node_set_init_union (&dest_nodes,
2631					    dest_state->entrance_nodes,
2632					    new_dest_nodes);
2633	      if (BE (err != REG_NOERROR, 0))
2634		{
2635		  re_node_set_free (&dest_nodes);
2636		  goto free_return;
2637		}
2638	      mctx->state_log[dest_str_idx]
2639		= re_acquire_state_context (&err, dfa, &dest_nodes, context);
2640	      re_node_set_free (&dest_nodes);
2641	      if (BE (mctx->state_log[dest_str_idx] == NULL
2642		      && err != REG_NOERROR, 0))
2643		goto free_return;
2644	    }
2645	  /* We need to check recursively if the backreference can epsilon
2646	     transit.  */
2647	  if (subexp_len == 0
2648	      && mctx->state_log[cur_str_idx]->nodes.nelem > prev_nelem)
2649	    {
2650	      err = check_subexp_matching_top (mctx, new_dest_nodes,
2651					       cur_str_idx);
2652	      if (BE (err != REG_NOERROR, 0))
2653		goto free_return;
2654	      err = transit_state_bkref (mctx, new_dest_nodes);
2655	      if (BE (err != REG_NOERROR, 0))
2656		goto free_return;
2657	    }
2658	}
2659    }
2660  err = REG_NOERROR;
2661 free_return:
2662  return err;
2663}
2664
2665/* Enumerate all the candidates which the backreference BKREF_NODE can match
2666   at BKREF_STR_IDX, and register them by match_ctx_add_entry().
2667   Note that we might collect inappropriate candidates here.
2668   However, the cost of checking them strictly here is too high, then we
2669   delay these checking for prune_impossible_nodes().  */
2670
2671static reg_errcode_t
2672internal_function
2673get_subexp (re_match_context_t *mctx, int bkref_node, int bkref_str_idx)
2674{
2675  const re_dfa_t *const dfa = mctx->dfa;
2676  int subexp_num, sub_top_idx;
2677  const char *buf = (const char *) re_string_get_buffer (&mctx->input);
2678  /* Return if we have already checked BKREF_NODE at BKREF_STR_IDX.  */
2679  int cache_idx = search_cur_bkref_entry (mctx, bkref_str_idx);
2680  if (cache_idx != -1)
2681    {
2682      const struct re_backref_cache_entry *entry
2683	= mctx->bkref_ents + cache_idx;
2684      do
2685        if (entry->node == bkref_node)
2686	  return REG_NOERROR; /* We already checked it.  */
2687      while (entry++->more);
2688    }
2689
2690  subexp_num = dfa->nodes[bkref_node].opr.idx;
2691
2692  /* For each sub expression  */
2693  for (sub_top_idx = 0; sub_top_idx < mctx->nsub_tops; ++sub_top_idx)
2694    {
2695      reg_errcode_t err;
2696      re_sub_match_top_t *sub_top = mctx->sub_tops[sub_top_idx];
2697      re_sub_match_last_t *sub_last;
2698      int sub_last_idx, sl_str, bkref_str_off;
2699
2700      if (dfa->nodes[sub_top->node].opr.idx != subexp_num)
2701	continue; /* It isn't related.  */
2702
2703      sl_str = sub_top->str_idx;
2704      bkref_str_off = bkref_str_idx;
2705      /* At first, check the last node of sub expressions we already
2706	 evaluated.  */
2707      for (sub_last_idx = 0; sub_last_idx < sub_top->nlasts; ++sub_last_idx)
2708	{
2709	  int sl_str_diff;
2710	  sub_last = sub_top->lasts[sub_last_idx];
2711	  sl_str_diff = sub_last->str_idx - sl_str;
2712	  /* The matched string by the sub expression match with the substring
2713	     at the back reference?  */
2714	  if (sl_str_diff > 0)
2715	    {
2716	      if (BE (bkref_str_off + sl_str_diff > mctx->input.valid_len, 0))
2717		{
2718		  /* Not enough chars for a successful match.  */
2719		  if (bkref_str_off + sl_str_diff > mctx->input.len)
2720		    break;
2721
2722		  err = clean_state_log_if_needed (mctx,
2723						   bkref_str_off
2724						   + sl_str_diff);
2725		  if (BE (err != REG_NOERROR, 0))
2726		    return err;
2727		  buf = (const char *) re_string_get_buffer (&mctx->input);
2728		}
2729	      if (memcmp (buf + bkref_str_off, buf + sl_str, sl_str_diff) != 0)
2730		/* We don't need to search this sub expression any more.  */
2731		break;
2732	    }
2733	  bkref_str_off += sl_str_diff;
2734	  sl_str += sl_str_diff;
2735	  err = get_subexp_sub (mctx, sub_top, sub_last, bkref_node,
2736				bkref_str_idx);
2737
2738	  /* Reload buf, since the preceding call might have reallocated
2739	     the buffer.  */
2740	  buf = (const char *) re_string_get_buffer (&mctx->input);
2741
2742	  if (err == REG_NOMATCH)
2743	    continue;
2744	  if (BE (err != REG_NOERROR, 0))
2745	    return err;
2746	}
2747
2748      if (sub_last_idx < sub_top->nlasts)
2749	continue;
2750      if (sub_last_idx > 0)
2751	++sl_str;
2752      /* Then, search for the other last nodes of the sub expression.  */
2753      for (; sl_str <= bkref_str_idx; ++sl_str)
2754	{
2755	  int cls_node, sl_str_off;
2756	  const re_node_set *nodes;
2757	  sl_str_off = sl_str - sub_top->str_idx;
2758	  /* The matched string by the sub expression match with the substring
2759	     at the back reference?  */
2760	  if (sl_str_off > 0)
2761	    {
2762	      if (BE (bkref_str_off >= mctx->input.valid_len, 0))
2763		{
2764		  /* If we are at the end of the input, we cannot match.  */
2765		  if (bkref_str_off >= mctx->input.len)
2766		    break;
2767
2768		  err = extend_buffers (mctx);
2769		  if (BE (err != REG_NOERROR, 0))
2770		    return err;
2771
2772		  buf = (const char *) re_string_get_buffer (&mctx->input);
2773		}
2774	      if (buf [bkref_str_off++] != buf[sl_str - 1])
2775		break; /* We don't need to search this sub expression
2776			  any more.  */
2777	    }
2778	  if (mctx->state_log[sl_str] == NULL)
2779	    continue;
2780	  /* Does this state have a ')' of the sub expression?  */
2781	  nodes = &mctx->state_log[sl_str]->nodes;
2782	  cls_node = find_subexp_node (dfa, nodes, subexp_num,
2783				       OP_CLOSE_SUBEXP);
2784	  if (cls_node == -1)
2785	    continue; /* No.  */
2786	  if (sub_top->path == NULL)
2787	    {
2788	      sub_top->path = calloc (sizeof (state_array_t),
2789				      sl_str - sub_top->str_idx + 1);
2790	      if (sub_top->path == NULL)
2791		return REG_ESPACE;
2792	    }
2793	  /* Can the OP_OPEN_SUBEXP node arrive the OP_CLOSE_SUBEXP node
2794	     in the current context?  */
2795	  err = check_arrival (mctx, sub_top->path, sub_top->node,
2796			       sub_top->str_idx, cls_node, sl_str,
2797			       OP_CLOSE_SUBEXP);
2798	  if (err == REG_NOMATCH)
2799	      continue;
2800	  if (BE (err != REG_NOERROR, 0))
2801	      return err;
2802	  sub_last = match_ctx_add_sublast (sub_top, cls_node, sl_str);
2803	  if (BE (sub_last == NULL, 0))
2804	    return REG_ESPACE;
2805	  err = get_subexp_sub (mctx, sub_top, sub_last, bkref_node,
2806				bkref_str_idx);
2807	  if (err == REG_NOMATCH)
2808	    continue;
2809	}
2810    }
2811  return REG_NOERROR;
2812}
2813
2814/* Helper functions for get_subexp().  */
2815
2816/* Check SUB_LAST can arrive to the back reference BKREF_NODE at BKREF_STR.
2817   If it can arrive, register the sub expression expressed with SUB_TOP
2818   and SUB_LAST.  */
2819
2820static reg_errcode_t
2821internal_function
2822get_subexp_sub (re_match_context_t *mctx, const re_sub_match_top_t *sub_top,
2823		re_sub_match_last_t *sub_last, int bkref_node, int bkref_str)
2824{
2825  reg_errcode_t err;
2826  int to_idx;
2827  /* Can the subexpression arrive the back reference?  */
2828  err = check_arrival (mctx, &sub_last->path, sub_last->node,
2829		       sub_last->str_idx, bkref_node, bkref_str,
2830		       OP_OPEN_SUBEXP);
2831  if (err != REG_NOERROR)
2832    return err;
2833  err = match_ctx_add_entry (mctx, bkref_node, bkref_str, sub_top->str_idx,
2834			     sub_last->str_idx);
2835  if (BE (err != REG_NOERROR, 0))
2836    return err;
2837  to_idx = bkref_str + sub_last->str_idx - sub_top->str_idx;
2838  return clean_state_log_if_needed (mctx, to_idx);
2839}
2840
2841/* Find the first node which is '(' or ')' and whose index is SUBEXP_IDX.
2842   Search '(' if FL_OPEN, or search ')' otherwise.
2843   TODO: This function isn't efficient...
2844	 Because there might be more than one nodes whose types are
2845	 OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all
2846	 nodes.
2847	 E.g. RE: (a){2}  */
2848
2849static int
2850internal_function
2851find_subexp_node (const re_dfa_t *dfa, const re_node_set *nodes,
2852		  int subexp_idx, int type)
2853{
2854  int cls_idx;
2855  for (cls_idx = 0; cls_idx < nodes->nelem; ++cls_idx)
2856    {
2857      int cls_node = nodes->elems[cls_idx];
2858      const re_token_t *node = dfa->nodes + cls_node;
2859      if (node->type == type
2860	  && node->opr.idx == subexp_idx)
2861	return cls_node;
2862    }
2863  return -1;
2864}
2865
2866/* Check whether the node TOP_NODE at TOP_STR can arrive to the node
2867   LAST_NODE at LAST_STR.  We record the path onto PATH since it will be
2868   heavily reused.
2869   Return REG_NOERROR if it can arrive, or REG_NOMATCH otherwise.  */
2870
2871static reg_errcode_t
2872internal_function
2873check_arrival (re_match_context_t *mctx, state_array_t *path, int top_node,
2874	       int top_str, int last_node, int last_str, int type)
2875{
2876  const re_dfa_t *const dfa = mctx->dfa;
2877  reg_errcode_t err = REG_NOERROR;
2878  int subexp_num, backup_cur_idx, str_idx, null_cnt;
2879  re_dfastate_t *cur_state = NULL;
2880  re_node_set *cur_nodes, next_nodes;
2881  re_dfastate_t **backup_state_log;
2882  unsigned int context;
2883
2884  subexp_num = dfa->nodes[top_node].opr.idx;
2885  /* Extend the buffer if we need.  */
2886  if (BE (path->alloc < last_str + mctx->max_mb_elem_len + 1, 0))
2887    {
2888      re_dfastate_t **new_array;
2889      int old_alloc = path->alloc;
2890      path->alloc += last_str + mctx->max_mb_elem_len + 1;
2891      new_array = re_realloc (path->array, re_dfastate_t *, path->alloc);
2892      if (BE (new_array == NULL, 0))
2893	{
2894	  path->alloc = old_alloc;
2895	  return REG_ESPACE;
2896	}
2897      path->array = new_array;
2898      memset (new_array + old_alloc, '\0',
2899	      sizeof (re_dfastate_t *) * (path->alloc - old_alloc));
2900    }
2901
2902  str_idx = path->next_idx ?: top_str;
2903
2904  /* Temporary modify MCTX.  */
2905  backup_state_log = mctx->state_log;
2906  backup_cur_idx = mctx->input.cur_idx;
2907  mctx->state_log = path->array;
2908  mctx->input.cur_idx = str_idx;
2909
2910  /* Setup initial node set.  */
2911  context = re_string_context_at (&mctx->input, str_idx - 1, mctx->eflags);
2912  if (str_idx == top_str)
2913    {
2914      err = re_node_set_init_1 (&next_nodes, top_node);
2915      if (BE (err != REG_NOERROR, 0))
2916	return err;
2917      err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, type);
2918      if (BE (err != REG_NOERROR, 0))
2919	{
2920	  re_node_set_free (&next_nodes);
2921	  return err;
2922	}
2923    }
2924  else
2925    {
2926      cur_state = mctx->state_log[str_idx];
2927      if (cur_state && cur_state->has_backref)
2928	{
2929	  err = re_node_set_init_copy (&next_nodes, &cur_state->nodes);
2930	  if (BE (err != REG_NOERROR, 0))
2931	    return err;
2932	}
2933      else
2934	re_node_set_init_empty (&next_nodes);
2935    }
2936  if (str_idx == top_str || (cur_state && cur_state->has_backref))
2937    {
2938      if (next_nodes.nelem)
2939	{
2940	  err = expand_bkref_cache (mctx, &next_nodes, str_idx,
2941				    subexp_num, type);
2942	  if (BE (err != REG_NOERROR, 0))
2943	    {
2944	      re_node_set_free (&next_nodes);
2945	      return err;
2946	    }
2947	}
2948      cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context);
2949      if (BE (cur_state == NULL && err != REG_NOERROR, 0))
2950	{
2951	  re_node_set_free (&next_nodes);
2952	  return err;
2953	}
2954      mctx->state_log[str_idx] = cur_state;
2955    }
2956
2957  for (null_cnt = 0; str_idx < last_str && null_cnt <= mctx->max_mb_elem_len;)
2958    {
2959      re_node_set_empty (&next_nodes);
2960      if (mctx->state_log[str_idx + 1])
2961	{
2962	  err = re_node_set_merge (&next_nodes,
2963				   &mctx->state_log[str_idx + 1]->nodes);
2964	  if (BE (err != REG_NOERROR, 0))
2965	    {
2966	      re_node_set_free (&next_nodes);
2967	      return err;
2968	    }
2969	}
2970      if (cur_state)
2971	{
2972	  err = check_arrival_add_next_nodes (mctx, str_idx,
2973					      &cur_state->non_eps_nodes,
2974					      &next_nodes);
2975	  if (BE (err != REG_NOERROR, 0))
2976	    {
2977	      re_node_set_free (&next_nodes);
2978	      return err;
2979	    }
2980	}
2981      ++str_idx;
2982      if (next_nodes.nelem)
2983	{
2984	  err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, type);
2985	  if (BE (err != REG_NOERROR, 0))
2986	    {
2987	      re_node_set_free (&next_nodes);
2988	      return err;
2989	    }
2990	  err = expand_bkref_cache (mctx, &next_nodes, str_idx,
2991				    subexp_num, type);
2992	  if (BE (err != REG_NOERROR, 0))
2993	    {
2994	      re_node_set_free (&next_nodes);
2995	      return err;
2996	    }
2997	}
2998      context = re_string_context_at (&mctx->input, str_idx - 1, mctx->eflags);
2999      cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context);
3000      if (BE (cur_state == NULL && err != REG_NOERROR, 0))
3001	{
3002	  re_node_set_free (&next_nodes);
3003	  return err;
3004	}
3005      mctx->state_log[str_idx] = cur_state;
3006      null_cnt = cur_state == NULL ? null_cnt + 1 : 0;
3007    }
3008  re_node_set_free (&next_nodes);
3009  cur_nodes = (mctx->state_log[last_str] == NULL ? NULL
3010	       : &mctx->state_log[last_str]->nodes);
3011  path->next_idx = str_idx;
3012
3013  /* Fix MCTX.  */
3014  mctx->state_log = backup_state_log;
3015  mctx->input.cur_idx = backup_cur_idx;
3016
3017  /* Then check the current node set has the node LAST_NODE.  */
3018  if (cur_nodes != NULL && re_node_set_contains (cur_nodes, last_node))
3019    return REG_NOERROR;
3020
3021  return REG_NOMATCH;
3022}
3023
3024/* Helper functions for check_arrival.  */
3025
3026/* Calculate the destination nodes of CUR_NODES at STR_IDX, and append them
3027   to NEXT_NODES.
3028   TODO: This function is similar to the functions transit_state*(),
3029	 however this function has many additional works.
3030	 Can't we unify them?  */
3031
3032static reg_errcode_t
3033internal_function
3034check_arrival_add_next_nodes (re_match_context_t *mctx, int str_idx,
3035			      re_node_set *cur_nodes, re_node_set *next_nodes)
3036{
3037  const re_dfa_t *const dfa = mctx->dfa;
3038  int result;
3039  int cur_idx;
3040  reg_errcode_t err = REG_NOERROR;
3041  re_node_set union_set;
3042  re_node_set_init_empty (&union_set);
3043  for (cur_idx = 0; cur_idx < cur_nodes->nelem; ++cur_idx)
3044    {
3045      int naccepted = 0;
3046      int cur_node = cur_nodes->elems[cur_idx];
3047#ifdef DEBUG
3048      re_token_type_t type = dfa->nodes[cur_node].type;
3049      assert (!IS_EPSILON_NODE (type));
3050#endif
3051#ifdef RE_ENABLE_I18N
3052      /* If the node may accept `multi byte'.  */
3053      if (dfa->nodes[cur_node].accept_mb)
3054	{
3055	  naccepted = check_node_accept_bytes (dfa, cur_node, &mctx->input,
3056					       str_idx);
3057	  if (naccepted > 1)
3058	    {
3059	      re_dfastate_t *dest_state;
3060	      int next_node = dfa->nexts[cur_node];
3061	      int next_idx = str_idx + naccepted;
3062	      dest_state = mctx->state_log[next_idx];
3063	      re_node_set_empty (&union_set);
3064	      if (dest_state)
3065		{
3066		  err = re_node_set_merge (&union_set, &dest_state->nodes);
3067		  if (BE (err != REG_NOERROR, 0))
3068		    {
3069		      re_node_set_free (&union_set);
3070		      return err;
3071		    }
3072		}
3073	      result = re_node_set_insert (&union_set, next_node);
3074	      if (BE (result < 0, 0))
3075		{
3076		  re_node_set_free (&union_set);
3077		  return REG_ESPACE;
3078		}
3079	      mctx->state_log[next_idx] = re_acquire_state (&err, dfa,
3080							    &union_set);
3081	      if (BE (mctx->state_log[next_idx] == NULL
3082		      && err != REG_NOERROR, 0))
3083		{
3084		  re_node_set_free (&union_set);
3085		  return err;
3086		}
3087	    }
3088	}
3089#endif /* RE_ENABLE_I18N */
3090      if (naccepted
3091	  || check_node_accept (mctx, dfa->nodes + cur_node, str_idx))
3092	{
3093	  result = re_node_set_insert (next_nodes, dfa->nexts[cur_node]);
3094	  if (BE (result < 0, 0))
3095	    {
3096	      re_node_set_free (&union_set);
3097	      return REG_ESPACE;
3098	    }
3099	}
3100    }
3101  re_node_set_free (&union_set);
3102  return REG_NOERROR;
3103}
3104
3105/* For all the nodes in CUR_NODES, add the epsilon closures of them to
3106   CUR_NODES, however exclude the nodes which are:
3107    - inside the sub expression whose number is EX_SUBEXP, if FL_OPEN.
3108    - out of the sub expression whose number is EX_SUBEXP, if !FL_OPEN.
3109*/
3110
3111static reg_errcode_t
3112internal_function
3113check_arrival_expand_ecl (const re_dfa_t *dfa, re_node_set *cur_nodes,
3114			  int ex_subexp, int type)
3115{
3116  reg_errcode_t err;
3117  int idx, outside_node;
3118  re_node_set new_nodes;
3119#ifdef DEBUG
3120  assert (cur_nodes->nelem);
3121#endif
3122  err = re_node_set_alloc (&new_nodes, cur_nodes->nelem);
3123  if (BE (err != REG_NOERROR, 0))
3124    return err;
3125  /* Create a new node set NEW_NODES with the nodes which are epsilon
3126     closures of the node in CUR_NODES.  */
3127
3128  for (idx = 0; idx < cur_nodes->nelem; ++idx)
3129    {
3130      int cur_node = cur_nodes->elems[idx];
3131      const re_node_set *eclosure = dfa->eclosures + cur_node;
3132      outside_node = find_subexp_node (dfa, eclosure, ex_subexp, type);
3133      if (outside_node == -1)
3134	{
3135	  /* There are no problematic nodes, just merge them.  */
3136	  err = re_node_set_merge (&new_nodes, eclosure);
3137	  if (BE (err != REG_NOERROR, 0))
3138	    {
3139	      re_node_set_free (&new_nodes);
3140	      return err;
3141	    }
3142	}
3143      else
3144	{
3145	  /* There are problematic nodes, re-calculate incrementally.  */
3146	  err = check_arrival_expand_ecl_sub (dfa, &new_nodes, cur_node,
3147					      ex_subexp, type);
3148	  if (BE (err != REG_NOERROR, 0))
3149	    {
3150	      re_node_set_free (&new_nodes);
3151	      return err;
3152	    }
3153	}
3154    }
3155  re_node_set_free (cur_nodes);
3156  *cur_nodes = new_nodes;
3157  return REG_NOERROR;
3158}
3159
3160/* Helper function for check_arrival_expand_ecl.
3161   Check incrementally the epsilon closure of TARGET, and if it isn't
3162   problematic append it to DST_NODES.  */
3163
3164static reg_errcode_t
3165internal_function
3166check_arrival_expand_ecl_sub (const re_dfa_t *dfa, re_node_set *dst_nodes,
3167			      int target, int ex_subexp, int type)
3168{
3169  int cur_node;
3170  for (cur_node = target; !re_node_set_contains (dst_nodes, cur_node);)
3171    {
3172      int err;
3173
3174      if (dfa->nodes[cur_node].type == type
3175	  && dfa->nodes[cur_node].opr.idx == ex_subexp)
3176	{
3177	  if (type == OP_CLOSE_SUBEXP)
3178	    {
3179	      err = re_node_set_insert (dst_nodes, cur_node);
3180	      if (BE (err == -1, 0))
3181		return REG_ESPACE;
3182	    }
3183	  break;
3184	}
3185      err = re_node_set_insert (dst_nodes, cur_node);
3186      if (BE (err == -1, 0))
3187	return REG_ESPACE;
3188      if (dfa->edests[cur_node].nelem == 0)
3189	break;
3190      if (dfa->edests[cur_node].nelem == 2)
3191	{
3192	  err = check_arrival_expand_ecl_sub (dfa, dst_nodes,
3193					      dfa->edests[cur_node].elems[1],
3194					      ex_subexp, type);
3195	  if (BE (err != REG_NOERROR, 0))
3196	    return err;
3197	}
3198      cur_node = dfa->edests[cur_node].elems[0];
3199    }
3200  return REG_NOERROR;
3201}
3202
3203
3204/* For all the back references in the current state, calculate the
3205   destination of the back references by the appropriate entry
3206   in MCTX->BKREF_ENTS.  */
3207
3208static reg_errcode_t
3209internal_function
3210expand_bkref_cache (re_match_context_t *mctx, re_node_set *cur_nodes,
3211		    int cur_str, int subexp_num, int type)
3212{
3213  const re_dfa_t *const dfa = mctx->dfa;
3214  reg_errcode_t err;
3215  int cache_idx_start = search_cur_bkref_entry (mctx, cur_str);
3216  struct re_backref_cache_entry *ent;
3217
3218  if (cache_idx_start == -1)
3219    return REG_NOERROR;
3220
3221 restart:
3222  ent = mctx->bkref_ents + cache_idx_start;
3223  do
3224    {
3225      int to_idx, next_node;
3226
3227      /* Is this entry ENT is appropriate?  */
3228      if (!re_node_set_contains (cur_nodes, ent->node))
3229	continue; /* No.  */
3230
3231      to_idx = cur_str + ent->subexp_to - ent->subexp_from;
3232      /* Calculate the destination of the back reference, and append it
3233	 to MCTX->STATE_LOG.  */
3234      if (to_idx == cur_str)
3235	{
3236	  /* The backreference did epsilon transit, we must re-check all the
3237	     node in the current state.  */
3238	  re_node_set new_dests;
3239	  reg_errcode_t err2, err3;
3240	  next_node = dfa->edests[ent->node].elems[0];
3241	  if (re_node_set_contains (cur_nodes, next_node))
3242	    continue;
3243	  err = re_node_set_init_1 (&new_dests, next_node);
3244	  err2 = check_arrival_expand_ecl (dfa, &new_dests, subexp_num, type);
3245	  err3 = re_node_set_merge (cur_nodes, &new_dests);
3246	  re_node_set_free (&new_dests);
3247	  if (BE (err != REG_NOERROR || err2 != REG_NOERROR
3248		  || err3 != REG_NOERROR, 0))
3249	    {
3250	      err = (err != REG_NOERROR ? err
3251		     : (err2 != REG_NOERROR ? err2 : err3));
3252	      return err;
3253	    }
3254	  /* TODO: It is still inefficient...  */
3255	  goto restart;
3256	}
3257      else
3258	{
3259	  re_node_set union_set;
3260	  next_node = dfa->nexts[ent->node];
3261	  if (mctx->state_log[to_idx])
3262	    {
3263	      int ret;
3264	      if (re_node_set_contains (&mctx->state_log[to_idx]->nodes,
3265					next_node))
3266		continue;
3267	      err = re_node_set_init_copy (&union_set,
3268					   &mctx->state_log[to_idx]->nodes);
3269	      ret = re_node_set_insert (&union_set, next_node);
3270	      if (BE (err != REG_NOERROR || ret < 0, 0))
3271		{
3272		  re_node_set_free (&union_set);
3273		  err = err != REG_NOERROR ? err : REG_ESPACE;
3274		  return err;
3275		}
3276	    }
3277	  else
3278	    {
3279	      err = re_node_set_init_1 (&union_set, next_node);
3280	      if (BE (err != REG_NOERROR, 0))
3281		return err;
3282	    }
3283	  mctx->state_log[to_idx] = re_acquire_state (&err, dfa, &union_set);
3284	  re_node_set_free (&union_set);
3285	  if (BE (mctx->state_log[to_idx] == NULL
3286		  && err != REG_NOERROR, 0))
3287	    return err;
3288	}
3289    }
3290  while (ent++->more);
3291  return REG_NOERROR;
3292}
3293
3294/* Build transition table for the state.
3295   Return 1 if succeeded, otherwise return NULL.  */
3296
3297static int
3298internal_function
3299build_trtable (const re_dfa_t *dfa, re_dfastate_t *state)
3300{
3301  reg_errcode_t err;
3302  int i, j, ch, need_word_trtable = 0;
3303  bitset_word_t elem, mask;
3304  bool dests_node_malloced = false;
3305  bool dest_states_malloced = false;
3306  int ndests; /* Number of the destination states from `state'.  */
3307  re_dfastate_t **trtable;
3308  re_dfastate_t **dest_states = NULL, **dest_states_word, **dest_states_nl;
3309  re_node_set follows, *dests_node;
3310  bitset_t *dests_ch;
3311  bitset_t acceptable;
3312
3313  struct dests_alloc
3314  {
3315    re_node_set dests_node[SBC_MAX];
3316    bitset_t dests_ch[SBC_MAX];
3317  } *dests_alloc;
3318
3319  /* We build DFA states which corresponds to the destination nodes
3320     from `state'.  `dests_node[i]' represents the nodes which i-th
3321     destination state contains, and `dests_ch[i]' represents the
3322     characters which i-th destination state accepts.  */
3323  if (__libc_use_alloca (sizeof (struct dests_alloc)))
3324    dests_alloc = (struct dests_alloc *) alloca (sizeof (struct dests_alloc));
3325  else
3326    {
3327      dests_alloc = re_malloc (struct dests_alloc, 1);
3328      if (BE (dests_alloc == NULL, 0))
3329	return 0;
3330      dests_node_malloced = true;
3331    }
3332  dests_node = dests_alloc->dests_node;
3333  dests_ch = dests_alloc->dests_ch;
3334
3335  /* Initialize transiton table.  */
3336  state->word_trtable = state->trtable = NULL;
3337
3338  /* At first, group all nodes belonging to `state' into several
3339     destinations.  */
3340  ndests = group_nodes_into_DFAstates (dfa, state, dests_node, dests_ch);
3341  if (BE (ndests <= 0, 0))
3342    {
3343      if (dests_node_malloced)
3344	free (dests_alloc);
3345      /* Return 0 in case of an error, 1 otherwise.  */
3346      if (ndests == 0)
3347	{
3348	  state->trtable = (re_dfastate_t **)
3349	    calloc (sizeof (re_dfastate_t *), SBC_MAX);
3350	  return 1;
3351	}
3352      return 0;
3353    }
3354
3355  err = re_node_set_alloc (&follows, ndests + 1);
3356  if (BE (err != REG_NOERROR, 0))
3357    goto out_free;
3358
3359  if (__libc_use_alloca ((sizeof (re_node_set) + sizeof (bitset_t)) * SBC_MAX
3360			 + ndests * 3 * sizeof (re_dfastate_t *)))
3361    dest_states = (re_dfastate_t **)
3362      alloca (ndests * 3 * sizeof (re_dfastate_t *));
3363  else
3364    {
3365      dest_states = (re_dfastate_t **)
3366	malloc (ndests * 3 * sizeof (re_dfastate_t *));
3367      if (BE (dest_states == NULL, 0))
3368	{
3369out_free:
3370	  if (dest_states_malloced)
3371	    free (dest_states);
3372	  re_node_set_free (&follows);
3373	  for (i = 0; i < ndests; ++i)
3374	    re_node_set_free (dests_node + i);
3375	  if (dests_node_malloced)
3376	    free (dests_alloc);
3377	  return 0;
3378	}
3379      dest_states_malloced = true;
3380    }
3381  dest_states_word = dest_states + ndests;
3382  dest_states_nl = dest_states_word + ndests;
3383  bitset_empty (acceptable);
3384
3385  /* Then build the states for all destinations.  */
3386  for (i = 0; i < ndests; ++i)
3387    {
3388      int next_node;
3389      re_node_set_empty (&follows);
3390      /* Merge the follows of this destination states.  */
3391      for (j = 0; j < dests_node[i].nelem; ++j)
3392	{
3393	  next_node = dfa->nexts[dests_node[i].elems[j]];
3394	  if (next_node != -1)
3395	    {
3396	      err = re_node_set_merge (&follows, dfa->eclosures + next_node);
3397	      if (BE (err != REG_NOERROR, 0))
3398		goto out_free;
3399	    }
3400	}
3401      dest_states[i] = re_acquire_state_context (&err, dfa, &follows, 0);
3402      if (BE (dest_states[i] == NULL && err != REG_NOERROR, 0))
3403	goto out_free;
3404      /* If the new state has context constraint,
3405	 build appropriate states for these contexts.  */
3406      if (dest_states[i]->has_constraint)
3407	{
3408	  dest_states_word[i] = re_acquire_state_context (&err, dfa, &follows,
3409							  CONTEXT_WORD);
3410	  if (BE (dest_states_word[i] == NULL && err != REG_NOERROR, 0))
3411	    goto out_free;
3412
3413	  if (dest_states[i] != dest_states_word[i] && dfa->mb_cur_max > 1)
3414	    need_word_trtable = 1;
3415
3416	  dest_states_nl[i] = re_acquire_state_context (&err, dfa, &follows,
3417							CONTEXT_NEWLINE);
3418	  if (BE (dest_states_nl[i] == NULL && err != REG_NOERROR, 0))
3419	    goto out_free;
3420 	}
3421      else
3422	{
3423	  dest_states_word[i] = dest_states[i];
3424	  dest_states_nl[i] = dest_states[i];
3425	}
3426      bitset_merge (acceptable, dests_ch[i]);
3427    }
3428
3429  if (!BE (need_word_trtable, 0))
3430    {
3431      /* We don't care about whether the following character is a word
3432	 character, or we are in a single-byte character set so we can
3433	 discern by looking at the character code: allocate a
3434	 256-entry transition table.  */
3435      trtable = state->trtable =
3436	(re_dfastate_t **) calloc (sizeof (re_dfastate_t *), SBC_MAX);
3437      if (BE (trtable == NULL, 0))
3438	goto out_free;
3439
3440      /* For all characters ch...:  */
3441      for (i = 0; i < BITSET_WORDS; ++i)
3442	for (ch = i * BITSET_WORD_BITS, elem = acceptable[i], mask = 1;
3443	     elem;
3444	     mask <<= 1, elem >>= 1, ++ch)
3445	  if (BE (elem & 1, 0))
3446	    {
3447	      /* There must be exactly one destination which accepts
3448		 character ch.  See group_nodes_into_DFAstates.  */
3449	      for (j = 0; (dests_ch[j][i] & mask) == 0; ++j)
3450		;
3451
3452	      /* j-th destination accepts the word character ch.  */
3453	      if (dfa->word_char[i] & mask)
3454		trtable[ch] = dest_states_word[j];
3455	      else
3456		trtable[ch] = dest_states[j];
3457	    }
3458    }
3459  else
3460    {
3461      /* We care about whether the following character is a word
3462	 character, and we are in a multi-byte character set: discern
3463	 by looking at the character code: build two 256-entry
3464	 transition tables, one starting at trtable[0] and one
3465	 starting at trtable[SBC_MAX].  */
3466      trtable = state->word_trtable =
3467	(re_dfastate_t **) calloc (sizeof (re_dfastate_t *), 2 * SBC_MAX);
3468      if (BE (trtable == NULL, 0))
3469	goto out_free;
3470
3471      /* For all characters ch...:  */
3472      for (i = 0; i < BITSET_WORDS; ++i)
3473	for (ch = i * BITSET_WORD_BITS, elem = acceptable[i], mask = 1;
3474	     elem;
3475	     mask <<= 1, elem >>= 1, ++ch)
3476	  if (BE (elem & 1, 0))
3477	    {
3478	      /* There must be exactly one destination which accepts
3479		 character ch.  See group_nodes_into_DFAstates.  */
3480	      for (j = 0; (dests_ch[j][i] & mask) == 0; ++j)
3481		;
3482
3483	      /* j-th destination accepts the word character ch.  */
3484	      trtable[ch] = dest_states[j];
3485	      trtable[ch + SBC_MAX] = dest_states_word[j];
3486	    }
3487    }
3488
3489  /* new line */
3490  if (bitset_contain (acceptable, NEWLINE_CHAR))
3491    {
3492      /* The current state accepts newline character.  */
3493      for (j = 0; j < ndests; ++j)
3494	if (bitset_contain (dests_ch[j], NEWLINE_CHAR))
3495	  {
3496	    /* k-th destination accepts newline character.  */
3497	    trtable[NEWLINE_CHAR] = dest_states_nl[j];
3498	    if (need_word_trtable)
3499	      trtable[NEWLINE_CHAR + SBC_MAX] = dest_states_nl[j];
3500	    /* There must be only one destination which accepts
3501	       newline.  See group_nodes_into_DFAstates.  */
3502	    break;
3503	  }
3504    }
3505
3506  if (dest_states_malloced)
3507    free (dest_states);
3508
3509  re_node_set_free (&follows);
3510  for (i = 0; i < ndests; ++i)
3511    re_node_set_free (dests_node + i);
3512
3513  if (dests_node_malloced)
3514    free (dests_alloc);
3515
3516  return 1;
3517}
3518
3519/* Group all nodes belonging to STATE into several destinations.
3520   Then for all destinations, set the nodes belonging to the destination
3521   to DESTS_NODE[i] and set the characters accepted by the destination
3522   to DEST_CH[i].  This function return the number of destinations.  */
3523
3524static int
3525internal_function
3526group_nodes_into_DFAstates (const re_dfa_t *dfa, const re_dfastate_t *state,
3527			    re_node_set *dests_node, bitset_t *dests_ch)
3528{
3529  reg_errcode_t err;
3530  int result;
3531  int i, j, k;
3532  int ndests; /* Number of the destinations from `state'.  */
3533  bitset_t accepts; /* Characters a node can accept.  */
3534  const re_node_set *cur_nodes = &state->nodes;
3535  bitset_empty (accepts);
3536  ndests = 0;
3537
3538  /* For all the nodes belonging to `state',  */
3539  for (i = 0; i < cur_nodes->nelem; ++i)
3540    {
3541      re_token_t *node = &dfa->nodes[cur_nodes->elems[i]];
3542      re_token_type_t type = node->type;
3543      unsigned int constraint = node->constraint;
3544
3545      /* Enumerate all single byte character this node can accept.  */
3546      if (type == CHARACTER)
3547	bitset_set (accepts, node->opr.c);
3548      else if (type == SIMPLE_BRACKET)
3549	{
3550	  bitset_merge (accepts, node->opr.sbcset);
3551	}
3552      else if (type == OP_PERIOD)
3553	{
3554#ifdef RE_ENABLE_I18N
3555	  if (dfa->mb_cur_max > 1)
3556	    bitset_merge (accepts, dfa->sb_char);
3557	  else
3558#endif
3559	    bitset_set_all (accepts);
3560	  if (!(dfa->syntax & RE_DOT_NEWLINE))
3561	    bitset_clear (accepts, '\n');
3562	  if (dfa->syntax & RE_DOT_NOT_NULL)
3563	    bitset_clear (accepts, '\0');
3564	}
3565#ifdef RE_ENABLE_I18N
3566      else if (type == OP_UTF8_PERIOD)
3567        {
3568	  memset (accepts, '\xff', sizeof (bitset_t) / 2);
3569	  if (!(dfa->syntax & RE_DOT_NEWLINE))
3570	    bitset_clear (accepts, '\n');
3571	  if (dfa->syntax & RE_DOT_NOT_NULL)
3572	    bitset_clear (accepts, '\0');
3573        }
3574#endif
3575      else
3576	continue;
3577
3578      /* Check the `accepts' and sift the characters which are not
3579	 match it the context.  */
3580      if (constraint)
3581	{
3582	  if (constraint & NEXT_NEWLINE_CONSTRAINT)
3583	    {
3584	      bool accepts_newline = bitset_contain (accepts, NEWLINE_CHAR);
3585	      bitset_empty (accepts);
3586	      if (accepts_newline)
3587		bitset_set (accepts, NEWLINE_CHAR);
3588	      else
3589		continue;
3590	    }
3591	  if (constraint & NEXT_ENDBUF_CONSTRAINT)
3592	    {
3593	      bitset_empty (accepts);
3594	      continue;
3595	    }
3596
3597	  if (constraint & NEXT_WORD_CONSTRAINT)
3598	    {
3599	      bitset_word_t any_set = 0;
3600	      if (type == CHARACTER && !node->word_char)
3601		{
3602		  bitset_empty (accepts);
3603		  continue;
3604		}
3605#ifdef RE_ENABLE_I18N
3606	      if (dfa->mb_cur_max > 1)
3607		for (j = 0; j < BITSET_WORDS; ++j)
3608		  any_set |= (accepts[j] &= (dfa->word_char[j] | ~dfa->sb_char[j]));
3609	      else
3610#endif
3611		for (j = 0; j < BITSET_WORDS; ++j)
3612		  any_set |= (accepts[j] &= dfa->word_char[j]);
3613	      if (!any_set)
3614		continue;
3615	    }
3616	  if (constraint & NEXT_NOTWORD_CONSTRAINT)
3617	    {
3618	      bitset_word_t any_set = 0;
3619	      if (type == CHARACTER && node->word_char)
3620		{
3621		  bitset_empty (accepts);
3622		  continue;
3623		}
3624#ifdef RE_ENABLE_I18N
3625	      if (dfa->mb_cur_max > 1)
3626		for (j = 0; j < BITSET_WORDS; ++j)
3627		  any_set |= (accepts[j] &= ~(dfa->word_char[j] & dfa->sb_char[j]));
3628	      else
3629#endif
3630		for (j = 0; j < BITSET_WORDS; ++j)
3631		  any_set |= (accepts[j] &= ~dfa->word_char[j]);
3632	      if (!any_set)
3633		continue;
3634	    }
3635	}
3636
3637      /* Then divide `accepts' into DFA states, or create a new
3638	 state.  Above, we make sure that accepts is not empty.  */
3639      for (j = 0; j < ndests; ++j)
3640	{
3641	  bitset_t intersec; /* Intersection sets, see below.  */
3642	  bitset_t remains;
3643	  /* Flags, see below.  */
3644	  bitset_word_t has_intersec, not_subset, not_consumed;
3645
3646	  /* Optimization, skip if this state doesn't accept the character.  */
3647	  if (type == CHARACTER && !bitset_contain (dests_ch[j], node->opr.c))
3648	    continue;
3649
3650	  /* Enumerate the intersection set of this state and `accepts'.  */
3651	  has_intersec = 0;
3652	  for (k = 0; k < BITSET_WORDS; ++k)
3653	    has_intersec |= intersec[k] = accepts[k] & dests_ch[j][k];
3654	  /* And skip if the intersection set is empty.  */
3655	  if (!has_intersec)
3656	    continue;
3657
3658	  /* Then check if this state is a subset of `accepts'.  */
3659	  not_subset = not_consumed = 0;
3660	  for (k = 0; k < BITSET_WORDS; ++k)
3661	    {
3662	      not_subset |= remains[k] = ~accepts[k] & dests_ch[j][k];
3663	      not_consumed |= accepts[k] = accepts[k] & ~dests_ch[j][k];
3664	    }
3665
3666	  /* If this state isn't a subset of `accepts', create a
3667	     new group state, which has the `remains'. */
3668	  if (not_subset)
3669	    {
3670	      bitset_copy (dests_ch[ndests], remains);
3671	      bitset_copy (dests_ch[j], intersec);
3672	      err = re_node_set_init_copy (dests_node + ndests, &dests_node[j]);
3673	      if (BE (err != REG_NOERROR, 0))
3674		goto error_return;
3675	      ++ndests;
3676	    }
3677
3678	  /* Put the position in the current group. */
3679	  result = re_node_set_insert (&dests_node[j], cur_nodes->elems[i]);
3680	  if (BE (result < 0, 0))
3681	    goto error_return;
3682
3683	  /* If all characters are consumed, go to next node. */
3684	  if (!not_consumed)
3685	    break;
3686	}
3687      /* Some characters remain, create a new group. */
3688      if (j == ndests)
3689	{
3690	  bitset_copy (dests_ch[ndests], accepts);
3691	  err = re_node_set_init_1 (dests_node + ndests, cur_nodes->elems[i]);
3692	  if (BE (err != REG_NOERROR, 0))
3693	    goto error_return;
3694	  ++ndests;
3695	  bitset_empty (accepts);
3696	}
3697    }
3698  return ndests;
3699 error_return:
3700  for (j = 0; j < ndests; ++j)
3701    re_node_set_free (dests_node + j);
3702  return -1;
3703}
3704
3705#ifdef RE_ENABLE_I18N
3706/* Check how many bytes the node `dfa->nodes[node_idx]' accepts.
3707   Return the number of the bytes the node accepts.
3708   STR_IDX is the current index of the input string.
3709
3710   This function handles the nodes which can accept one character, or
3711   one collating element like '.', '[a-z]', opposite to the other nodes
3712   can only accept one byte.  */
3713
3714static int
3715internal_function
3716check_node_accept_bytes (const re_dfa_t *dfa, int node_idx,
3717			 const re_string_t *input, int str_idx)
3718{
3719  const re_token_t *node = dfa->nodes + node_idx;
3720  int char_len, elem_len;
3721  int i;
3722
3723  if (BE (node->type == OP_UTF8_PERIOD, 0))
3724    {
3725      unsigned char c = re_string_byte_at (input, str_idx), d;
3726      if (BE (c < 0xc2, 1))
3727	return 0;
3728
3729      if (str_idx + 2 > input->len)
3730	return 0;
3731
3732      d = re_string_byte_at (input, str_idx + 1);
3733      if (c < 0xe0)
3734	return (d < 0x80 || d > 0xbf) ? 0 : 2;
3735      else if (c < 0xf0)
3736	{
3737	  char_len = 3;
3738	  if (c == 0xe0 && d < 0xa0)
3739	    return 0;
3740	}
3741      else if (c < 0xf8)
3742	{
3743	  char_len = 4;
3744	  if (c == 0xf0 && d < 0x90)
3745	    return 0;
3746	}
3747      else if (c < 0xfc)
3748	{
3749	  char_len = 5;
3750	  if (c == 0xf8 && d < 0x88)
3751	    return 0;
3752	}
3753      else if (c < 0xfe)
3754	{
3755	  char_len = 6;
3756	  if (c == 0xfc && d < 0x84)
3757	    return 0;
3758	}
3759      else
3760	return 0;
3761
3762      if (str_idx + char_len > input->len)
3763	return 0;
3764
3765      for (i = 1; i < char_len; ++i)
3766	{
3767	  d = re_string_byte_at (input, str_idx + i);
3768	  if (d < 0x80 || d > 0xbf)
3769	    return 0;
3770	}
3771      return char_len;
3772    }
3773
3774  char_len = re_string_char_size_at (input, str_idx);
3775  if (node->type == OP_PERIOD)
3776    {
3777      if (char_len <= 1)
3778        return 0;
3779      /* FIXME: I don't think this if is needed, as both '\n'
3780	 and '\0' are char_len == 1.  */
3781      /* '.' accepts any one character except the following two cases.  */
3782      if ((!(dfa->syntax & RE_DOT_NEWLINE) &&
3783	   re_string_byte_at (input, str_idx) == '\n') ||
3784	  ((dfa->syntax & RE_DOT_NOT_NULL) &&
3785	   re_string_byte_at (input, str_idx) == '\0'))
3786	return 0;
3787      return char_len;
3788    }
3789
3790  elem_len = re_string_elem_size_at (input, str_idx);
3791  if ((elem_len <= 1 && char_len <= 1) || char_len == 0)
3792    return 0;
3793
3794  if (node->type == COMPLEX_BRACKET)
3795    {
3796      const re_charset_t *cset = node->opr.mbcset;
3797# ifdef _LIBC
3798      const unsigned char *pin
3799	= ((const unsigned char *) re_string_get_buffer (input) + str_idx);
3800      int j;
3801      uint32_t nrules;
3802# endif /* _LIBC */
3803      int match_len = 0;
3804      wchar_t wc = ((cset->nranges || cset->nchar_classes || cset->nmbchars)
3805		    ? re_string_wchar_at (input, str_idx) : 0);
3806
3807      /* match with multibyte character?  */
3808      for (i = 0; i < cset->nmbchars; ++i)
3809	if (wc == cset->mbchars[i])
3810	  {
3811	    match_len = char_len;
3812	    goto check_node_accept_bytes_match;
3813	  }
3814      /* match with character_class?  */
3815      for (i = 0; i < cset->nchar_classes; ++i)
3816	{
3817	  wctype_t wt = cset->char_classes[i];
3818	  if (__iswctype (wc, wt))
3819	    {
3820	      match_len = char_len;
3821	      goto check_node_accept_bytes_match;
3822	    }
3823	}
3824
3825# ifdef _LIBC
3826      nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
3827      if (nrules != 0)
3828	{
3829	  unsigned int in_collseq = 0;
3830	  const int32_t *table, *indirect;
3831	  const unsigned char *weights, *extra;
3832	  const char *collseqwc;
3833	  /* This #include defines a local function!  */
3834#  include <locale/weight.h>
3835
3836	  /* match with collating_symbol?  */
3837	  if (cset->ncoll_syms)
3838	    extra = (const unsigned char *)
3839	      _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
3840	  for (i = 0; i < cset->ncoll_syms; ++i)
3841	    {
3842	      const unsigned char *coll_sym = extra + cset->coll_syms[i];
3843	      /* Compare the length of input collating element and
3844		 the length of current collating element.  */
3845	      if (*coll_sym != elem_len)
3846		continue;
3847	      /* Compare each bytes.  */
3848	      for (j = 0; j < *coll_sym; j++)
3849		if (pin[j] != coll_sym[1 + j])
3850		  break;
3851	      if (j == *coll_sym)
3852		{
3853		  /* Match if every bytes is equal.  */
3854		  match_len = j;
3855		  goto check_node_accept_bytes_match;
3856		}
3857	    }
3858
3859	  if (cset->nranges)
3860	    {
3861	      if (elem_len <= char_len)
3862		{
3863		  collseqwc = _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQWC);
3864		  in_collseq = __collseq_table_lookup (collseqwc, wc);
3865		}
3866	      else
3867		in_collseq = find_collation_sequence_value (pin, elem_len);
3868	    }
3869	  /* match with range expression?  */
3870	  for (i = 0; i < cset->nranges; ++i)
3871	    if (cset->range_starts[i] <= in_collseq
3872		&& in_collseq <= cset->range_ends[i])
3873	      {
3874		match_len = elem_len;
3875		goto check_node_accept_bytes_match;
3876	      }
3877
3878	  /* match with equivalence_class?  */
3879	  if (cset->nequiv_classes)
3880	    {
3881	      const unsigned char *cp = pin;
3882	      table = (const int32_t *)
3883		_NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
3884	      weights = (const unsigned char *)
3885		_NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
3886	      extra = (const unsigned char *)
3887		_NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
3888	      indirect = (const int32_t *)
3889		_NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
3890	      int32_t idx = findidx (&cp);
3891	      if (idx > 0)
3892		for (i = 0; i < cset->nequiv_classes; ++i)
3893		  {
3894		    int32_t equiv_class_idx = cset->equiv_classes[i];
3895		    size_t weight_len = weights[idx & 0xffffff];
3896		    if (weight_len == weights[equiv_class_idx & 0xffffff]
3897			&& (idx >> 24) == (equiv_class_idx >> 24))
3898		      {
3899			int cnt = 0;
3900
3901			idx &= 0xffffff;
3902			equiv_class_idx &= 0xffffff;
3903
3904			while (cnt <= weight_len
3905			       && (weights[equiv_class_idx + 1 + cnt]
3906				   == weights[idx + 1 + cnt]))
3907			  ++cnt;
3908			if (cnt > weight_len)
3909			  {
3910			    match_len = elem_len;
3911			    goto check_node_accept_bytes_match;
3912			  }
3913		      }
3914		  }
3915	    }
3916	}
3917      else
3918# endif /* _LIBC */
3919	{
3920	  /* match with range expression?  */
3921#if __GNUC__ >= 2
3922	  wchar_t cmp_buf[] = {L'\0', L'\0', wc, L'\0', L'\0', L'\0'};
3923#else
3924	  wchar_t cmp_buf[] = {L'\0', L'\0', L'\0', L'\0', L'\0', L'\0'};
3925	  cmp_buf[2] = wc;
3926#endif
3927	  for (i = 0; i < cset->nranges; ++i)
3928	    {
3929	      cmp_buf[0] = cset->range_starts[i];
3930	      cmp_buf[4] = cset->range_ends[i];
3931	      if (wcscoll (cmp_buf, cmp_buf + 2) <= 0
3932		  && wcscoll (cmp_buf + 2, cmp_buf + 4) <= 0)
3933		{
3934		  match_len = char_len;
3935		  goto check_node_accept_bytes_match;
3936		}
3937	    }
3938	}
3939    check_node_accept_bytes_match:
3940      if (!cset->non_match)
3941	return match_len;
3942      else
3943	{
3944	  if (match_len > 0)
3945	    return 0;
3946	  else
3947	    return (elem_len > char_len) ? elem_len : char_len;
3948	}
3949    }
3950  return 0;
3951}
3952
3953# ifdef _LIBC
3954static unsigned int
3955internal_function
3956find_collation_sequence_value (const unsigned char *mbs, size_t mbs_len)
3957{
3958  uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
3959  if (nrules == 0)
3960    {
3961      if (mbs_len == 1)
3962	{
3963	  /* No valid character.  Match it as a single byte character.  */
3964	  const unsigned char *collseq = (const unsigned char *)
3965	    _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQMB);
3966	  return collseq[mbs[0]];
3967	}
3968      return UINT_MAX;
3969    }
3970  else
3971    {
3972      int32_t idx;
3973      const unsigned char *extra = (const unsigned char *)
3974	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
3975      int32_t extrasize = (const unsigned char *)
3976	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB + 1) - extra;
3977
3978      for (idx = 0; idx < extrasize;)
3979	{
3980	  int mbs_cnt, found = 0;
3981	  int32_t elem_mbs_len;
3982	  /* Skip the name of collating element name.  */
3983	  idx = idx + extra[idx] + 1;
3984	  elem_mbs_len = extra[idx++];
3985	  if (mbs_len == elem_mbs_len)
3986	    {
3987	      for (mbs_cnt = 0; mbs_cnt < elem_mbs_len; ++mbs_cnt)
3988		if (extra[idx + mbs_cnt] != mbs[mbs_cnt])
3989		  break;
3990	      if (mbs_cnt == elem_mbs_len)
3991		/* Found the entry.  */
3992		found = 1;
3993	    }
3994	  /* Skip the byte sequence of the collating element.  */
3995	  idx += elem_mbs_len;
3996	  /* Adjust for the alignment.  */
3997	  idx = (idx + 3) & ~3;
3998	  /* Skip the collation sequence value.  */
3999	  idx += sizeof (uint32_t);
4000	  /* Skip the wide char sequence of the collating element.  */
4001	  idx = idx + sizeof (uint32_t) * (extra[idx] + 1);
4002	  /* If we found the entry, return the sequence value.  */
4003	  if (found)
4004	    return *(uint32_t *) (extra + idx);
4005	  /* Skip the collation sequence value.  */
4006	  idx += sizeof (uint32_t);
4007	}
4008      return UINT_MAX;
4009    }
4010}
4011# endif /* _LIBC */
4012#endif /* RE_ENABLE_I18N */
4013
4014/* Check whether the node accepts the byte which is IDX-th
4015   byte of the INPUT.  */
4016
4017static int
4018internal_function
4019check_node_accept (const re_match_context_t *mctx, const re_token_t *node,
4020		   int idx)
4021{
4022  unsigned char ch;
4023  ch = re_string_byte_at (&mctx->input, idx);
4024  switch (node->type)
4025    {
4026    case CHARACTER:
4027      if (node->opr.c != ch)
4028        return 0;
4029      break;
4030
4031    case SIMPLE_BRACKET:
4032      if (!bitset_contain (node->opr.sbcset, ch))
4033        return 0;
4034      break;
4035
4036#ifdef RE_ENABLE_I18N
4037    case OP_UTF8_PERIOD:
4038      if (ch >= 0x80)
4039        return 0;
4040      /* FALLTHROUGH */
4041#endif
4042    case OP_PERIOD:
4043      if ((ch == '\n' && !(mctx->dfa->syntax & RE_DOT_NEWLINE))
4044	  || (ch == '\0' && (mctx->dfa->syntax & RE_DOT_NOT_NULL)))
4045	return 0;
4046      break;
4047
4048    default:
4049      return 0;
4050    }
4051
4052  if (node->constraint)
4053    {
4054      /* The node has constraints.  Check whether the current context
4055	 satisfies the constraints.  */
4056      unsigned int context = re_string_context_at (&mctx->input, idx,
4057						   mctx->eflags);
4058      if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context))
4059	return 0;
4060    }
4061
4062  return 1;
4063}
4064
4065/* Extend the buffers, if the buffers have run out.  */
4066
4067static reg_errcode_t
4068internal_function
4069extend_buffers (re_match_context_t *mctx)
4070{
4071  reg_errcode_t ret;
4072  re_string_t *pstr = &mctx->input;
4073
4074  /* Double the lengthes of the buffers.  */
4075  ret = re_string_realloc_buffers (pstr, pstr->bufs_len * 2);
4076  if (BE (ret != REG_NOERROR, 0))
4077    return ret;
4078
4079  if (mctx->state_log != NULL)
4080    {
4081      /* And double the length of state_log.  */
4082      /* XXX We have no indication of the size of this buffer.  If this
4083	 allocation fail we have no indication that the state_log array
4084	 does not have the right size.  */
4085      re_dfastate_t **new_array = re_realloc (mctx->state_log, re_dfastate_t *,
4086					      pstr->bufs_len + 1);
4087      if (BE (new_array == NULL, 0))
4088	return REG_ESPACE;
4089      mctx->state_log = new_array;
4090    }
4091
4092  /* Then reconstruct the buffers.  */
4093  if (pstr->icase)
4094    {
4095#ifdef RE_ENABLE_I18N
4096      if (pstr->mb_cur_max > 1)
4097	{
4098	  ret = build_wcs_upper_buffer (pstr);
4099	  if (BE (ret != REG_NOERROR, 0))
4100	    return ret;
4101	}
4102      else
4103#endif /* RE_ENABLE_I18N  */
4104	build_upper_buffer (pstr);
4105    }
4106  else
4107    {
4108#ifdef RE_ENABLE_I18N
4109      if (pstr->mb_cur_max > 1)
4110	build_wcs_buffer (pstr);
4111      else
4112#endif /* RE_ENABLE_I18N  */
4113	{
4114	  if (pstr->trans != NULL)
4115	    re_string_translate_buffer (pstr);
4116	}
4117    }
4118  return REG_NOERROR;
4119}
4120
4121
4122/* Functions for matching context.  */
4123
4124/* Initialize MCTX.  */
4125
4126static reg_errcode_t
4127internal_function
4128match_ctx_init (re_match_context_t *mctx, int eflags, int n)
4129{
4130  mctx->eflags = eflags;
4131  mctx->match_last = -1;
4132  if (n > 0)
4133    {
4134      mctx->bkref_ents = re_malloc (struct re_backref_cache_entry, n);
4135      mctx->sub_tops = re_malloc (re_sub_match_top_t *, n);
4136      if (BE (mctx->bkref_ents == NULL || mctx->sub_tops == NULL, 0))
4137	return REG_ESPACE;
4138    }
4139  /* Already zero-ed by the caller.
4140     else
4141       mctx->bkref_ents = NULL;
4142     mctx->nbkref_ents = 0;
4143     mctx->nsub_tops = 0;  */
4144  mctx->abkref_ents = n;
4145  mctx->max_mb_elem_len = 1;
4146  mctx->asub_tops = n;
4147  return REG_NOERROR;
4148}
4149
4150/* Clean the entries which depend on the current input in MCTX.
4151   This function must be invoked when the matcher changes the start index
4152   of the input, or changes the input string.  */
4153
4154static void
4155internal_function
4156match_ctx_clean (re_match_context_t *mctx)
4157{
4158  int st_idx;
4159  for (st_idx = 0; st_idx < mctx->nsub_tops; ++st_idx)
4160    {
4161      int sl_idx;
4162      re_sub_match_top_t *top = mctx->sub_tops[st_idx];
4163      for (sl_idx = 0; sl_idx < top->nlasts; ++sl_idx)
4164	{
4165	  re_sub_match_last_t *last = top->lasts[sl_idx];
4166	  re_free (last->path.array);
4167	  re_free (last);
4168	}
4169      re_free (top->lasts);
4170      if (top->path)
4171	{
4172	  re_free (top->path->array);
4173	  re_free (top->path);
4174	}
4175      free (top);
4176    }
4177
4178  mctx->nsub_tops = 0;
4179  mctx->nbkref_ents = 0;
4180}
4181
4182/* Free all the memory associated with MCTX.  */
4183
4184static void
4185internal_function
4186match_ctx_free (re_match_context_t *mctx)
4187{
4188  /* First, free all the memory associated with MCTX->SUB_TOPS.  */
4189  match_ctx_clean (mctx);
4190  re_free (mctx->sub_tops);
4191  re_free (mctx->bkref_ents);
4192}
4193
4194/* Add a new backreference entry to MCTX.
4195   Note that we assume that caller never call this function with duplicate
4196   entry, and call with STR_IDX which isn't smaller than any existing entry.
4197*/
4198
4199static reg_errcode_t
4200internal_function
4201match_ctx_add_entry (re_match_context_t *mctx, int node, int str_idx, int from,
4202		     int to)
4203{
4204  if (mctx->nbkref_ents >= mctx->abkref_ents)
4205    {
4206      struct re_backref_cache_entry* new_entry;
4207      new_entry = re_realloc (mctx->bkref_ents, struct re_backref_cache_entry,
4208			      mctx->abkref_ents * 2);
4209      if (BE (new_entry == NULL, 0))
4210	{
4211	  re_free (mctx->bkref_ents);
4212	  return REG_ESPACE;
4213	}
4214      mctx->bkref_ents = new_entry;
4215      memset (mctx->bkref_ents + mctx->nbkref_ents, '\0',
4216	      sizeof (struct re_backref_cache_entry) * mctx->abkref_ents);
4217      mctx->abkref_ents *= 2;
4218    }
4219  if (mctx->nbkref_ents > 0
4220      && mctx->bkref_ents[mctx->nbkref_ents - 1].str_idx == str_idx)
4221    mctx->bkref_ents[mctx->nbkref_ents - 1].more = 1;
4222
4223  mctx->bkref_ents[mctx->nbkref_ents].node = node;
4224  mctx->bkref_ents[mctx->nbkref_ents].str_idx = str_idx;
4225  mctx->bkref_ents[mctx->nbkref_ents].subexp_from = from;
4226  mctx->bkref_ents[mctx->nbkref_ents].subexp_to = to;
4227
4228  /* This is a cache that saves negative results of check_dst_limits_calc_pos.
4229     If bit N is clear, means that this entry won't epsilon-transition to
4230     an OP_OPEN_SUBEXP or OP_CLOSE_SUBEXP for the N+1-th subexpression.  If
4231     it is set, check_dst_limits_calc_pos_1 will recurse and try to find one
4232     such node.
4233
4234     A backreference does not epsilon-transition unless it is empty, so set
4235     to all zeros if FROM != TO.  */
4236  mctx->bkref_ents[mctx->nbkref_ents].eps_reachable_subexps_map
4237    = (from == to ? ~0 : 0);
4238
4239  mctx->bkref_ents[mctx->nbkref_ents++].more = 0;
4240  if (mctx->max_mb_elem_len < to - from)
4241    mctx->max_mb_elem_len = to - from;
4242  return REG_NOERROR;
4243}
4244
4245/* Search for the first entry which has the same str_idx, or -1 if none is
4246   found.  Note that MCTX->BKREF_ENTS is already sorted by MCTX->STR_IDX.  */
4247
4248static int
4249internal_function
4250search_cur_bkref_entry (const re_match_context_t *mctx, int str_idx)
4251{
4252  int left, right, mid, last;
4253  last = right = mctx->nbkref_ents;
4254  for (left = 0; left < right;)
4255    {
4256      mid = (left + right) / 2;
4257      if (mctx->bkref_ents[mid].str_idx < str_idx)
4258	left = mid + 1;
4259      else
4260	right = mid;
4261    }
4262  if (left < last && mctx->bkref_ents[left].str_idx == str_idx)
4263    return left;
4264  else
4265    return -1;
4266}
4267
4268/* Register the node NODE, whose type is OP_OPEN_SUBEXP, and which matches
4269   at STR_IDX.  */
4270
4271static reg_errcode_t
4272internal_function
4273match_ctx_add_subtop (re_match_context_t *mctx, int node, int str_idx)
4274{
4275#ifdef DEBUG
4276  assert (mctx->sub_tops != NULL);
4277  assert (mctx->asub_tops > 0);
4278#endif
4279  if (BE (mctx->nsub_tops == mctx->asub_tops, 0))
4280    {
4281      int new_asub_tops = mctx->asub_tops * 2;
4282      re_sub_match_top_t **new_array = re_realloc (mctx->sub_tops,
4283						   re_sub_match_top_t *,
4284						   new_asub_tops);
4285      if (BE (new_array == NULL, 0))
4286	return REG_ESPACE;
4287      mctx->sub_tops = new_array;
4288      mctx->asub_tops = new_asub_tops;
4289    }
4290  mctx->sub_tops[mctx->nsub_tops] = calloc (1, sizeof (re_sub_match_top_t));
4291  if (BE (mctx->sub_tops[mctx->nsub_tops] == NULL, 0))
4292    return REG_ESPACE;
4293  mctx->sub_tops[mctx->nsub_tops]->node = node;
4294  mctx->sub_tops[mctx->nsub_tops++]->str_idx = str_idx;
4295  return REG_NOERROR;
4296}
4297
4298/* Register the node NODE, whose type is OP_CLOSE_SUBEXP, and which matches
4299   at STR_IDX, whose corresponding OP_OPEN_SUBEXP is SUB_TOP.  */
4300
4301static re_sub_match_last_t *
4302internal_function
4303match_ctx_add_sublast (re_sub_match_top_t *subtop, int node, int str_idx)
4304{
4305  re_sub_match_last_t *new_entry;
4306  if (BE (subtop->nlasts == subtop->alasts, 0))
4307    {
4308      int new_alasts = 2 * subtop->alasts + 1;
4309      re_sub_match_last_t **new_array = re_realloc (subtop->lasts,
4310						    re_sub_match_last_t *,
4311						    new_alasts);
4312      if (BE (new_array == NULL, 0))
4313	return NULL;
4314      subtop->lasts = new_array;
4315      subtop->alasts = new_alasts;
4316    }
4317  new_entry = calloc (1, sizeof (re_sub_match_last_t));
4318  if (BE (new_entry != NULL, 1))
4319    {
4320      subtop->lasts[subtop->nlasts] = new_entry;
4321      new_entry->node = node;
4322      new_entry->str_idx = str_idx;
4323      ++subtop->nlasts;
4324    }
4325  return new_entry;
4326}
4327
4328static void
4329internal_function
4330sift_ctx_init (re_sift_context_t *sctx, re_dfastate_t **sifted_sts,
4331	       re_dfastate_t **limited_sts, int last_node, int last_str_idx)
4332{
4333  sctx->sifted_states = sifted_sts;
4334  sctx->limited_states = limited_sts;
4335  sctx->last_node = last_node;
4336  sctx->last_str_idx = last_str_idx;
4337  re_node_set_init_empty (&sctx->limits);
4338}
4339