1/* Extended regular expression matching and search library.
2   Copyright (C) 2002, 2003 Free Software Foundation, Inc.
3   This file is part of the GNU C Library.
4   Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
5
6   The GNU C Library is free software; you can redistribute it and/or
7   modify it under the terms of the GNU Lesser General Public
8   License as published by the Free Software Foundation; either
9   version 2.1 of the License, or (at your option) any later version.
10
11   The GNU C Library is distributed in the hope that it will be useful,
12   but WITHOUT ANY WARRANTY; without even the implied warranty of
13   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14   Lesser General Public License for more details.
15
16   You should have received a copy of the GNU Lesser General Public
17   License along with the GNU C Library; if not, write to the Free
18   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
19   02111-1307 USA.  */
20
21static reg_errcode_t re_compile_internal _RE_ARGS((regex_t *preg, const char * pattern,
22                                          int length, reg_syntax_t syntax));
23static void re_compile_fastmap_iter _RE_ARGS((regex_t *bufp,
24				     const re_dfastate_t *init_state,
25                                     char *fastmap));
26static reg_errcode_t init_dfa _RE_ARGS((re_dfa_t *dfa, int pat_len));
27static reg_errcode_t init_word_char _RE_ARGS((re_dfa_t *dfa));
28#ifdef RE_ENABLE_I18N
29static void free_charset _RE_ARGS((re_charset_t *cset));
30#endif /* RE_ENABLE_I18N */
31static void free_workarea_compile _RE_ARGS((regex_t *preg));
32static reg_errcode_t create_initial_state _RE_ARGS((re_dfa_t *dfa));
33static reg_errcode_t analyze _RE_ARGS((re_dfa_t *dfa));
34static reg_errcode_t analyze_tree _RE_ARGS((re_dfa_t *dfa, bin_tree_t *node));
35static void calc_first _RE_ARGS((re_dfa_t *dfa, bin_tree_t *node));
36static void calc_next _RE_ARGS((re_dfa_t *dfa, bin_tree_t *node));
37static void calc_epsdest _RE_ARGS((re_dfa_t *dfa, bin_tree_t *node));
38static reg_errcode_t duplicate_node_closure _RE_ARGS((re_dfa_t *dfa, int top_org_node,
39					     int top_clone_node, int root_node,
40                                             unsigned int constraint));
41static reg_errcode_t duplicate_node _RE_ARGS((int *new_idx, re_dfa_t *dfa, int org_idx,
42                                     unsigned int constraint));
43static int search_duplicated_node _RE_ARGS((re_dfa_t *dfa, int org_node,
44				   unsigned int constraint));
45static reg_errcode_t calc_eclosure _RE_ARGS((re_dfa_t *dfa));
46static reg_errcode_t calc_eclosure_iter _RE_ARGS((re_node_set *new_set, re_dfa_t *dfa,
47                                         int node, int root));
48static void calc_inveclosure _RE_ARGS((re_dfa_t *dfa));
49static int fetch_number _RE_ARGS((re_string_t *input, re_token_t *token,
50                         reg_syntax_t syntax));
51static re_token_t fetch_token _RE_ARGS((re_string_t *input, reg_syntax_t syntax));
52static int peek_token _RE_ARGS((re_token_t *token, re_string_t *input,
53                        reg_syntax_t syntax));
54static int peek_token_bracket _RE_ARGS((re_token_t *token, re_string_t *input,
55                               reg_syntax_t syntax));
56static bin_tree_t *parse _RE_ARGS((re_string_t *regexp, regex_t *preg,
57                          reg_syntax_t syntax, reg_errcode_t *err));
58static bin_tree_t *parse_reg_exp _RE_ARGS((re_string_t *regexp, regex_t *preg,
59				  re_token_t *token, reg_syntax_t syntax,
60                                  int nest, reg_errcode_t *err));
61static bin_tree_t *parse_branch _RE_ARGS((re_string_t *regexp, regex_t *preg,
62				 re_token_t *token, reg_syntax_t syntax,
63                                 int nest, reg_errcode_t *err));
64static bin_tree_t *parse_expression _RE_ARGS((re_string_t *regexp, regex_t *preg,
65				     re_token_t *token, reg_syntax_t syntax,
66                                     int nest, reg_errcode_t *err));
67static bin_tree_t *parse_sub_exp _RE_ARGS((re_string_t *regexp, regex_t *preg,
68				  re_token_t *token, reg_syntax_t syntax,
69                                  int nest, reg_errcode_t *err));
70static bin_tree_t *parse_dup_op _RE_ARGS((bin_tree_t *dup_elem, re_string_t *regexp,
71				 re_dfa_t *dfa, re_token_t *token,
72                                 reg_syntax_t syntax, reg_errcode_t *err));
73static bin_tree_t *parse_bracket_exp _RE_ARGS((re_string_t *regexp, re_dfa_t *dfa,
74				      re_token_t *token, reg_syntax_t syntax,
75                                      reg_errcode_t *err));
76static reg_errcode_t parse_bracket_element _RE_ARGS((bracket_elem_t *elem,
77					    re_string_t *regexp,
78					    re_token_t *token, int token_len,
79					    re_dfa_t *dfa,
80                                            reg_syntax_t syntax));
81static reg_errcode_t parse_bracket_symbol _RE_ARGS((bracket_elem_t *elem,
82					  re_string_t *regexp,
83                                          re_token_t *token));
84#ifndef _LIBC
85# ifdef RE_ENABLE_I18N
86static reg_errcode_t build_range_exp _RE_ARGS((re_bitset_ptr_t sbcset,
87				      re_charset_t *mbcset, int *range_alloc,
88				      bracket_elem_t *start_elem,
89                                      bracket_elem_t *end_elem));
90static reg_errcode_t build_collating_symbol _RE_ARGS((re_bitset_ptr_t sbcset,
91					     re_charset_t *mbcset,
92					     int *coll_sym_alloc,
93                                             const unsigned char *name));
94# else /* not RE_ENABLE_I18N */
95static reg_errcode_t build_range_exp _RE_ARGS((re_bitset_ptr_t sbcset,
96				      bracket_elem_t *start_elem,
97                                      bracket_elem_t *end_elem));
98static reg_errcode_t build_collating_symbol _RE_ARGS((re_bitset_ptr_t sbcset,
99                                             const unsigned char *name));
100# endif /* not RE_ENABLE_I18N */
101#endif /* not _LIBC */
102#ifdef RE_ENABLE_I18N
103static reg_errcode_t build_equiv_class _RE_ARGS((re_bitset_ptr_t sbcset,
104					re_charset_t *mbcset,
105					int *equiv_class_alloc,
106                                        const unsigned char *name));
107static reg_errcode_t build_charclass _RE_ARGS((RE_TRANSLATE_TYPE trans,
108				      re_bitset_ptr_t sbcset,
109				      re_charset_t *mbcset,
110				      int *char_class_alloc,
111				      const unsigned char *class_name,
112                                      reg_syntax_t syntax));
113#else  /* not RE_ENABLE_I18N */
114static reg_errcode_t build_equiv_class _RE_ARGS((re_bitset_ptr_t sbcset,
115                                        const unsigned char *name));
116static reg_errcode_t build_charclass _RE_ARGS((RE_TRANSLATE_TYPE trans,
117				      re_bitset_ptr_t sbcset,
118				      const unsigned char *class_name,
119                                      reg_syntax_t syntax));
120#endif /* not RE_ENABLE_I18N */
121static bin_tree_t *build_word_op _RE_ARGS((re_dfa_t *dfa, RE_TRANSLATE_TYPE trans,
122				  int not, reg_errcode_t *err));
123static void free_bin_tree _RE_ARGS((bin_tree_t *tree));
124static bin_tree_t *create_tree _RE_ARGS((bin_tree_t *left, bin_tree_t *right,
125                                re_token_type_t type, int index));
126static bin_tree_t *duplicate_tree _RE_ARGS((const bin_tree_t *src, re_dfa_t *dfa));
127
128/* This table gives an error message for each of the error codes listed
129   in regex.h.  Obviously the order here has to be same as there.
130   POSIX doesn't require that we do anything for REG_NOERROR,
131   but why not be nice?  */
132
133const char __re_error_msgid[] attribute_hidden =
134  {
135#define REG_NOERROR_IDX	0
136    gettext_noop ("Success")	/* REG_NOERROR */
137    "\0"
138#define REG_NOMATCH_IDX (REG_NOERROR_IDX + sizeof "Success")
139    gettext_noop ("No match")	/* REG_NOMATCH */
140    "\0"
141#define REG_BADPAT_IDX	(REG_NOMATCH_IDX + sizeof "No match")
142    gettext_noop ("Invalid regular expression") /* REG_BADPAT */
143    "\0"
144#define REG_ECOLLATE_IDX (REG_BADPAT_IDX + sizeof "Invalid regular expression")
145    gettext_noop ("Invalid collation character") /* REG_ECOLLATE */
146    "\0"
147#define REG_ECTYPE_IDX	(REG_ECOLLATE_IDX + sizeof "Invalid collation character")
148    gettext_noop ("Invalid character class name") /* REG_ECTYPE */
149    "\0"
150#define REG_EESCAPE_IDX	(REG_ECTYPE_IDX + sizeof "Invalid character class name")
151    gettext_noop ("Trailing backslash") /* REG_EESCAPE */
152    "\0"
153#define REG_ESUBREG_IDX	(REG_EESCAPE_IDX + sizeof "Trailing backslash")
154    gettext_noop ("Invalid back reference") /* REG_ESUBREG */
155    "\0"
156#define REG_EBRACK_IDX	(REG_ESUBREG_IDX + sizeof "Invalid back reference")
157    gettext_noop ("Unmatched [ or [^")	/* REG_EBRACK */
158    "\0"
159#define REG_EPAREN_IDX	(REG_EBRACK_IDX + sizeof "Unmatched [ or [^")
160    gettext_noop ("Unmatched ( or \\(") /* REG_EPAREN */
161    "\0"
162#define REG_EBRACE_IDX	(REG_EPAREN_IDX + sizeof "Unmatched ( or \\(")
163    gettext_noop ("Unmatched \\{") /* REG_EBRACE */
164    "\0"
165#define REG_BADBR_IDX	(REG_EBRACE_IDX + sizeof "Unmatched \\{")
166    gettext_noop ("Invalid content of \\{\\}") /* REG_BADBR */
167    "\0"
168#define REG_ERANGE_IDX	(REG_BADBR_IDX + sizeof "Invalid content of \\{\\}")
169    gettext_noop ("Invalid range end")	/* REG_ERANGE */
170    "\0"
171#define REG_ESPACE_IDX	(REG_ERANGE_IDX + sizeof "Invalid range end")
172    gettext_noop ("Memory exhausted") /* REG_ESPACE */
173    "\0"
174#define REG_BADRPT_IDX	(REG_ESPACE_IDX + sizeof "Memory exhausted")
175    gettext_noop ("Invalid preceding regular expression") /* REG_BADRPT */
176    "\0"
177#define REG_EEND_IDX	(REG_BADRPT_IDX + sizeof "Invalid preceding regular expression")
178    gettext_noop ("Premature end of regular expression") /* REG_EEND */
179    "\0"
180#define REG_ESIZE_IDX	(REG_EEND_IDX + sizeof "Premature end of regular expression")
181    gettext_noop ("Regular expression too big") /* REG_ESIZE */
182    "\0"
183#define REG_ERPAREN_IDX	(REG_ESIZE_IDX + sizeof "Regular expression too big")
184    gettext_noop ("Unmatched ) or \\)") /* REG_ERPAREN */
185  };
186
187const size_t __re_error_msgid_idx[] attribute_hidden =
188  {
189    REG_NOERROR_IDX,
190    REG_NOMATCH_IDX,
191    REG_BADPAT_IDX,
192    REG_ECOLLATE_IDX,
193    REG_ECTYPE_IDX,
194    REG_EESCAPE_IDX,
195    REG_ESUBREG_IDX,
196    REG_EBRACK_IDX,
197    REG_EPAREN_IDX,
198    REG_EBRACE_IDX,
199    REG_BADBR_IDX,
200    REG_ERANGE_IDX,
201    REG_ESPACE_IDX,
202    REG_BADRPT_IDX,
203    REG_EEND_IDX,
204    REG_ESIZE_IDX,
205    REG_ERPAREN_IDX
206  };
207
208/* Entry points for GNU code.  */
209
210/* re_compile_pattern is the GNU regular expression compiler: it
211   compiles PATTERN (of length LENGTH) and puts the result in BUFP.
212   Returns 0 if the pattern was valid, otherwise an error string.
213
214   Assumes the `allocated' (and perhaps `buffer') and `translate' fields
215   are set in BUFP on entry.  */
216
217const char *
218re_compile_pattern (pattern, length, bufp)
219    const char *pattern;
220    size_t length;
221    struct re_pattern_buffer *bufp;
222{
223  reg_errcode_t ret;
224
225  /* And GNU code determines whether or not to get register information
226     by passing null for the REGS argument to re_match, etc., not by
227     setting no_sub.  */
228  bufp->no_sub = 0;
229
230  /* Match anchors at newline.  */
231  bufp->newline_anchor = 1;
232
233  ret = re_compile_internal (bufp, pattern, length, re_syntax_options);
234
235  if (!ret)
236    return NULL;
237  return gettext (__re_error_msgid + __re_error_msgid_idx[(int) ret]);
238}
239#ifdef _LIBC
240weak_alias (__re_compile_pattern, re_compile_pattern)
241#endif
242
243/* Set by `re_set_syntax' to the current regexp syntax to recognize.  Can
244   also be assigned to arbitrarily: each pattern buffer stores its own
245   syntax, so it can be changed between regex compilations.  */
246/* This has no initializer because initialized variables in Emacs
247   become read-only after dumping.  */
248reg_syntax_t re_syntax_options;
249
250
251/* Specify the precise syntax of regexps for compilation.  This provides
252   for compatibility for various utilities which historically have
253   different, incompatible syntaxes.
254
255   The argument SYNTAX is a bit mask comprised of the various bits
256   defined in regex.h.  We return the old syntax.  */
257
258reg_syntax_t
259re_set_syntax (syntax)
260    reg_syntax_t syntax;
261{
262  reg_syntax_t ret = re_syntax_options;
263
264  re_syntax_options = syntax;
265#ifdef RE_ENABLE_I18N
266  re_mb_cur_max = MB_CUR_MAX;
267#endif
268  return ret;
269}
270#ifdef _LIBC
271weak_alias (__re_set_syntax, re_set_syntax)
272#endif
273
274int
275re_compile_fastmap (bufp)
276    struct re_pattern_buffer *bufp;
277{
278  re_dfa_t *dfa = (re_dfa_t *) bufp->buffer;
279  char *fastmap = bufp->fastmap;
280
281  memset (fastmap, '\0', sizeof (char) * SBC_MAX);
282  re_compile_fastmap_iter (bufp, dfa->init_state, fastmap);
283  if (dfa->init_state != dfa->init_state_word)
284    re_compile_fastmap_iter (bufp, dfa->init_state_word, fastmap);
285  if (dfa->init_state != dfa->init_state_nl)
286    re_compile_fastmap_iter (bufp, dfa->init_state_nl, fastmap);
287  if (dfa->init_state != dfa->init_state_begbuf)
288    re_compile_fastmap_iter (bufp, dfa->init_state_begbuf, fastmap);
289  bufp->fastmap_accurate = 1;
290  return 0;
291}
292#ifdef _LIBC
293weak_alias (__re_compile_fastmap, re_compile_fastmap)
294#endif
295
296static inline void
297re_set_fastmap (char *fastmap, int icase, int ch)
298{
299  fastmap[ch] = 1;
300  if (icase)
301    fastmap[tolower (ch)] = 1;
302}
303
304/* Helper function for re_compile_fastmap.
305   Compile fastmap for the initial_state INIT_STATE.  */
306
307static void
308re_compile_fastmap_iter (bufp, init_state, fastmap)
309     regex_t *bufp;
310     const re_dfastate_t *init_state;
311     char *fastmap;
312{
313  re_dfa_t *dfa = (re_dfa_t *) bufp->buffer;
314  int node_cnt;
315#ifdef RE_ENABLE_I18N
316  int icase = (re_mb_cur_max == 1 && (bufp->syntax & RE_ICASE));
317#else
318  int icase = (bufp->syntax & RE_ICASE);
319#endif
320  for (node_cnt = 0; node_cnt < init_state->nodes.nelem; ++node_cnt)
321    {
322      int node = init_state->nodes.elems[node_cnt];
323      re_token_type_t type = dfa->nodes[node].type;
324
325      if (type == CHARACTER)
326	re_set_fastmap (fastmap, icase, dfa->nodes[node].opr.c);
327      else if (type == SIMPLE_BRACKET)
328	{
329	  int i, j, ch;
330	  for (i = 0, ch = 0; i < BITSET_UINTS; ++i)
331	    for (j = 0; j < UINT_BITS; ++j, ++ch)
332	      if (dfa->nodes[node].opr.sbcset[i] & (1UL << j))
333		re_set_fastmap (fastmap, icase, ch);
334	}
335#ifdef RE_ENABLE_I18N
336      else if (type == COMPLEX_BRACKET)
337	{
338	  int i;
339	  re_charset_t *cset = dfa->nodes[node].opr.mbcset;
340	  if (cset->non_match || cset->ncoll_syms || cset->nequiv_classes
341	      || cset->nranges || cset->nchar_classes)
342	    {
343# ifdef _LIBC
344	      if (_NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES) != 0)
345		{
346		  /* In this case we want to catch the bytes which are
347		     the first byte of any collation elements.
348		     e.g. In da_DK, we want to catch 'a' since "aa"
349			  is a valid collation element, and don't catch
350			  'b' since 'b' is the only collation element
351			  which starts from 'b'.  */
352		  int j, ch;
353		  const int32_t *table = (const int32_t *)
354		    _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
355		  for (i = 0, ch = 0; i < BITSET_UINTS; ++i)
356		    for (j = 0; j < UINT_BITS; ++j, ++ch)
357		      if (table[ch] < 0)
358			re_set_fastmap (fastmap, icase, ch);
359		}
360# else
361	      if (re_mb_cur_max > 1)
362		for (i = 0; i < SBC_MAX; ++i)
363		  if (__btowc (i) == WEOF)
364		    re_set_fastmap (fastmap, icase, i);
365# endif /* not _LIBC */
366	    }
367	  for (i = 0; i < cset->nmbchars; ++i)
368	    {
369	      char buf[256];
370	      mbstate_t state;
371	      memset (&state, '\0', sizeof (state));
372	      __wcrtomb (buf, cset->mbchars[i], &state);
373	      re_set_fastmap (fastmap, icase, *(unsigned char *) buf);
374	    }
375	}
376#endif /* RE_ENABLE_I18N */
377      else if (type == END_OF_RE || type == OP_PERIOD)
378	{
379	  memset (fastmap, '\1', sizeof (char) * SBC_MAX);
380	  if (type == END_OF_RE)
381	    bufp->can_be_null = 1;
382	  return;
383	}
384    }
385}
386
387/* Entry point for POSIX code.  */
388/* regcomp takes a regular expression as a string and compiles it.
389
390   PREG is a regex_t *.  We do not expect any fields to be initialized,
391   since POSIX says we shouldn't.  Thus, we set
392
393     `buffer' to the compiled pattern;
394     `used' to the length of the compiled pattern;
395     `syntax' to RE_SYNTAX_POSIX_EXTENDED if the
396       REG_EXTENDED bit in CFLAGS is set; otherwise, to
397       RE_SYNTAX_POSIX_BASIC;
398     `newline_anchor' to REG_NEWLINE being set in CFLAGS;
399     `fastmap' to an allocated space for the fastmap;
400     `fastmap_accurate' to zero;
401     `re_nsub' to the number of subexpressions in PATTERN.
402
403   PATTERN is the address of the pattern string.
404
405   CFLAGS is a series of bits which affect compilation.
406
407     If REG_EXTENDED is set, we use POSIX extended syntax; otherwise, we
408     use POSIX basic syntax.
409
410     If REG_NEWLINE is set, then . and [^...] don't match newline.
411     Also, regexec will try a match beginning after every newline.
412
413     If REG_ICASE is set, then we considers upper- and lowercase
414     versions of letters to be equivalent when matching.
415
416     If REG_NOSUB is set, then when PREG is passed to regexec, that
417     routine will report only success or failure, and nothing about the
418     registers.
419
420   It returns 0 if it succeeds, nonzero if it doesn't.  (See regex.h for
421   the return codes and their meanings.)  */
422
423int
424regcomp (preg, pattern, cflags)
425    regex_t *__restrict preg;
426    const char *__restrict pattern;
427    int cflags;
428{
429  reg_errcode_t ret;
430  reg_syntax_t syntax = ((cflags & REG_EXTENDED) ? RE_SYNTAX_POSIX_EXTENDED
431			 : RE_SYNTAX_POSIX_BASIC);
432
433  preg->buffer = NULL;
434  preg->allocated = 0;
435  preg->used = 0;
436
437  /* Try to allocate space for the fastmap.  */
438  preg->fastmap = re_malloc (char, SBC_MAX);
439  if (BE (preg->fastmap == NULL, 0))
440    return REG_ESPACE;
441
442  syntax |= (cflags & REG_ICASE) ? RE_ICASE : 0;
443
444  /* If REG_NEWLINE is set, newlines are treated differently.  */
445  if (cflags & REG_NEWLINE)
446    { /* REG_NEWLINE implies neither . nor [^...] match newline.  */
447      syntax &= ~RE_DOT_NEWLINE;
448      syntax |= RE_HAT_LISTS_NOT_NEWLINE;
449      /* It also changes the matching behavior.  */
450      preg->newline_anchor = 1;
451    }
452  else
453    preg->newline_anchor = 0;
454  preg->no_sub = !!(cflags & REG_NOSUB);
455  preg->translate = NULL;
456
457  ret = re_compile_internal (preg, pattern, strlen (pattern), syntax);
458
459  /* POSIX doesn't distinguish between an unmatched open-group and an
460     unmatched close-group: both are REG_EPAREN.  */
461  if (ret == REG_ERPAREN)
462    ret = REG_EPAREN;
463
464  /* We have already checked preg->fastmap != NULL.  */
465  if (BE (ret == REG_NOERROR, 1))
466    /* Compute the fastmap now, since regexec cannot modify the pattern
467       buffer.  This function nevers fails in this implementation.  */
468    (void) re_compile_fastmap (preg);
469  else
470    {
471      /* Some error occurred while compiling the expression.  */
472      re_free (preg->fastmap);
473      preg->fastmap = NULL;
474    }
475
476  return (int) ret;
477}
478#ifdef _LIBC
479weak_alias (__regcomp, regcomp)
480#endif
481
482/* Returns a message corresponding to an error code, ERRCODE, returned
483   from either regcomp or regexec.   We don't use PREG here.  */
484
485size_t
486regerror (errcode, preg, errbuf, errbuf_size)
487    int errcode;
488    const regex_t *preg;
489    char *errbuf;
490    size_t errbuf_size;
491{
492  const char *msg;
493  size_t msg_size;
494
495  if (BE (errcode < 0
496	  || errcode >= (int) (sizeof (__re_error_msgid_idx)
497			       / sizeof (__re_error_msgid_idx[0])), 0))
498    /* Only error codes returned by the rest of the code should be passed
499       to this routine.  If we are given anything else, or if other regex
500       code generates an invalid error code, then the program has a bug.
501       Dump core so we can fix it.  */
502    abort ();
503
504  msg = gettext (__re_error_msgid + __re_error_msgid_idx[errcode]);
505
506  msg_size = strlen (msg) + 1; /* Includes the null.  */
507
508  if (BE (errbuf_size != 0, 1))
509    {
510      if (BE (msg_size > errbuf_size, 0))
511	{
512	  memcpy (errbuf, msg, errbuf_size - 1);
513	  errbuf[errbuf_size - 1] = 0;
514	}
515      else
516	memcpy (errbuf, msg, msg_size);
517    }
518
519  return msg_size;
520}
521#ifdef _LIBC
522weak_alias (__regerror, regerror)
523#endif
524
525
526static void
527free_dfa_content (re_dfa_t *dfa)
528{
529  int i, j;
530
531  re_free (dfa->subexps);
532
533  for (i = 0; i < dfa->nodes_len; ++i)
534    {
535      re_token_t *node = dfa->nodes + i;
536#ifdef RE_ENABLE_I18N
537      if (node->type == COMPLEX_BRACKET && node->duplicated == 0)
538	free_charset (node->opr.mbcset);
539      else
540#endif /* RE_ENABLE_I18N */
541	if (node->type == SIMPLE_BRACKET && node->duplicated == 0)
542	  re_free (node->opr.sbcset);
543    }
544  re_free (dfa->nexts);
545  for (i = 0; i < dfa->nodes_len; ++i)
546    {
547      if (dfa->eclosures != NULL)
548	re_node_set_free (dfa->eclosures + i);
549      if (dfa->inveclosures != NULL)
550	re_node_set_free (dfa->inveclosures + i);
551      if (dfa->edests != NULL)
552	re_node_set_free (dfa->edests + i);
553    }
554  re_free (dfa->edests);
555  re_free (dfa->eclosures);
556  re_free (dfa->inveclosures);
557  re_free (dfa->nodes);
558
559  for (i = 0; i <= dfa->state_hash_mask; ++i)
560    {
561      struct re_state_table_entry *entry = dfa->state_table + i;
562      for (j = 0; j < entry->num; ++j)
563	{
564	  re_dfastate_t *state = entry->array[j];
565	  free_state (state);
566	}
567      re_free (entry->array);
568    }
569  re_free (dfa->state_table);
570
571  if (dfa->word_char != NULL)
572    re_free (dfa->word_char);
573#ifdef DEBUG
574  re_free (dfa->re_str);
575#endif
576
577  re_free (dfa);
578}
579
580
581/* Free dynamically allocated space used by PREG.  */
582
583void
584regfree (preg)
585    regex_t *preg;
586{
587  re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
588  if (BE (dfa != NULL, 1))
589    free_dfa_content (dfa);
590
591  re_free (preg->fastmap);
592}
593#ifdef _LIBC
594weak_alias (__regfree, regfree)
595#endif
596
597/* Entry points compatible with 4.2 BSD regex library.  We don't define
598   them unless specifically requested.  */
599
600#if defined _REGEX_RE_COMP || defined _LIBC
601
602/* BSD has one and only one pattern buffer.  */
603static struct re_pattern_buffer re_comp_buf;
604
605char *
606# ifdef _LIBC
607/* Make these definitions weak in libc, so POSIX programs can redefine
608   these names if they don't use our functions, and still use
609   regcomp/regexec above without link errors.  */
610weak_function
611# endif
612re_comp (s)
613     const char *s;
614{
615  reg_errcode_t ret;
616  char *fastmap;
617
618  if (!s)
619    {
620      if (!re_comp_buf.buffer)
621	return gettext ("No previous regular expression");
622      return 0;
623    }
624
625  if (re_comp_buf.buffer)
626    {
627      fastmap = re_comp_buf.fastmap;
628      re_comp_buf.fastmap = NULL;
629      __regfree (&re_comp_buf);
630      memset (&re_comp_buf, '\0', sizeof (re_comp_buf));
631      re_comp_buf.fastmap = fastmap;
632    }
633
634  if (re_comp_buf.fastmap == NULL)
635    {
636      re_comp_buf.fastmap = (char *) malloc (SBC_MAX);
637      if (re_comp_buf.fastmap == NULL)
638	return (char *) gettext (__re_error_msgid
639				 + __re_error_msgid_idx[(int) REG_ESPACE]);
640    }
641
642  /* Since `re_exec' always passes NULL for the `regs' argument, we
643     don't need to initialize the pattern buffer fields which affect it.  */
644
645  /* Match anchors at newlines.  */
646  re_comp_buf.newline_anchor = 1;
647
648  ret = re_compile_internal (&re_comp_buf, s, strlen (s), re_syntax_options);
649
650  if (!ret)
651    return NULL;
652
653  /* Yes, we're discarding `const' here if !HAVE_LIBINTL.  */
654  return (char *) gettext (__re_error_msgid + __re_error_msgid_idx[(int) ret]);
655}
656
657#ifdef _LIBC
658libc_freeres_fn (free_mem)
659{
660  __regfree (&re_comp_buf);
661}
662#endif
663
664#endif /* _REGEX_RE_COMP */
665
666/* Internal entry point.
667   Compile the regular expression PATTERN, whose length is LENGTH.
668   SYNTAX indicate regular expression's syntax.  */
669
670static reg_errcode_t
671re_compile_internal (preg, pattern, length, syntax)
672     regex_t *preg;
673     const char * pattern;
674     int length;
675     reg_syntax_t syntax;
676{
677  reg_errcode_t err = REG_NOERROR;
678  re_dfa_t *dfa;
679  re_string_t regexp;
680
681  /* Initialize the pattern buffer.  */
682  preg->fastmap_accurate = 0;
683  preg->syntax = syntax;
684  preg->not_bol = preg->not_eol = 0;
685  preg->used = 0;
686  preg->re_nsub = 0;
687  preg->can_be_null = 0;
688  preg->regs_allocated = REGS_UNALLOCATED;
689
690  /* Initialize the dfa.  */
691  dfa = (re_dfa_t *) preg->buffer;
692  if (preg->allocated < sizeof (re_dfa_t))
693    {
694      /* If zero allocated, but buffer is non-null, try to realloc
695	 enough space.  This loses if buffer's address is bogus, but
696	 that is the user's responsibility.  If ->buffer is NULL this
697	 is a simple allocation.  */
698      dfa = re_realloc (preg->buffer, re_dfa_t, 1);
699      if (dfa == NULL)
700	return REG_ESPACE;
701      preg->allocated = sizeof (re_dfa_t);
702    }
703  preg->buffer = (unsigned char *) dfa;
704  preg->used = sizeof (re_dfa_t);
705
706  err = init_dfa (dfa, length);
707  if (BE (err != REG_NOERROR, 0))
708    {
709      re_free (dfa);
710      preg->buffer = NULL;
711      preg->allocated = 0;
712      return err;
713    }
714#ifdef DEBUG
715  dfa->re_str = re_malloc (char, length + 1);
716  strncpy (dfa->re_str, pattern, length + 1);
717#endif
718
719  err = re_string_construct (&regexp, pattern, length, preg->translate,
720			     syntax & RE_ICASE);
721  if (BE (err != REG_NOERROR, 0))
722    {
723      re_free (dfa);
724      preg->buffer = NULL;
725      preg->allocated = 0;
726      return err;
727    }
728
729  /* Parse the regular expression, and build a structure tree.  */
730  preg->re_nsub = 0;
731  dfa->str_tree = parse (&regexp, preg, syntax, &err);
732  if (BE (dfa->str_tree == NULL, 0))
733    goto re_compile_internal_free_return;
734
735  /* Analyze the tree and collect information which is necessary to
736     create the dfa.  */
737  err = analyze (dfa);
738  if (BE (err != REG_NOERROR, 0))
739    goto re_compile_internal_free_return;
740
741  /* Then create the initial state of the dfa.  */
742  err = create_initial_state (dfa);
743
744  /* Release work areas.  */
745  free_workarea_compile (preg);
746  re_string_destruct (&regexp);
747
748  if (BE (err != REG_NOERROR, 0))
749    {
750    re_compile_internal_free_return:
751      free_dfa_content (dfa);
752      preg->buffer = NULL;
753      preg->allocated = 0;
754    }
755
756  return err;
757}
758
759/* Initialize DFA.  We use the length of the regular expression PAT_LEN
760   as the initial length of some arrays.  */
761
762static reg_errcode_t
763init_dfa (dfa, pat_len)
764     re_dfa_t *dfa;
765     int pat_len;
766{
767  int table_size;
768
769  memset (dfa, '\0', sizeof (re_dfa_t));
770
771  dfa->nodes_alloc = pat_len + 1;
772  dfa->nodes = re_malloc (re_token_t, dfa->nodes_alloc);
773
774  dfa->states_alloc = pat_len + 1;
775
776  /*  table_size = 2 ^ ceil(log pat_len) */
777  for (table_size = 1; table_size > 0; table_size <<= 1)
778    if (table_size > pat_len)
779      break;
780
781  dfa->state_table = calloc (sizeof (struct re_state_table_entry), table_size);
782  dfa->state_hash_mask = table_size - 1;
783
784  dfa->subexps_alloc = 1;
785  dfa->subexps = re_malloc (re_subexp_t, dfa->subexps_alloc);
786  dfa->word_char = NULL;
787
788  if (BE (dfa->nodes == NULL || dfa->state_table == NULL
789	  || dfa->subexps == NULL, 0))
790    {
791      /* We don't bother to free anything which was allocated.  Very
792	 soon the process will go down anyway.  */
793      dfa->subexps = NULL;
794      dfa->state_table = NULL;
795      dfa->nodes = NULL;
796      return REG_ESPACE;
797    }
798  return REG_NOERROR;
799}
800
801/* Initialize WORD_CHAR table, which indicate which character is
802   "word".  In this case "word" means that it is the word construction
803   character used by some operators like "\<", "\>", etc.  */
804
805static reg_errcode_t
806init_word_char (dfa)
807     re_dfa_t *dfa;
808{
809  int i, j, ch;
810  dfa->word_char = (re_bitset_ptr_t) calloc (sizeof (bitset), 1);
811  if (BE (dfa->word_char == NULL, 0))
812    return REG_ESPACE;
813  for (i = 0, ch = 0; i < BITSET_UINTS; ++i)
814    for (j = 0; j < UINT_BITS; ++j, ++ch)
815      if (isalnum (ch) || ch == '_')
816	dfa->word_char[i] |= 1UL << j;
817  return REG_NOERROR;
818}
819
820/* Free the work area which are only used while compiling.  */
821
822static void
823free_workarea_compile (preg)
824     regex_t *preg;
825{
826  re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
827  free_bin_tree (dfa->str_tree);
828  dfa->str_tree = NULL;
829  re_free (dfa->org_indices);
830  dfa->org_indices = NULL;
831}
832
833/* Create initial states for all contexts.  */
834
835static reg_errcode_t
836create_initial_state (dfa)
837     re_dfa_t *dfa;
838{
839  int first, i;
840  reg_errcode_t err;
841  re_node_set init_nodes;
842
843  /* Initial states have the epsilon closure of the node which is
844     the first node of the regular expression.  */
845  first = dfa->str_tree->first;
846  dfa->init_node = first;
847  err = re_node_set_init_copy (&init_nodes, dfa->eclosures + first);
848  if (BE (err != REG_NOERROR, 0))
849    return err;
850
851  /* The back-references which are in initial states can epsilon transit,
852     since in this case all of the subexpressions can be null.
853     Then we add epsilon closures of the nodes which are the next nodes of
854     the back-references.  */
855  if (dfa->nbackref > 0)
856    for (i = 0; i < init_nodes.nelem; ++i)
857      {
858	int node_idx = init_nodes.elems[i];
859	re_token_type_t type = dfa->nodes[node_idx].type;
860
861	int clexp_idx;
862	if (type != OP_BACK_REF)
863	  continue;
864	for (clexp_idx = 0; clexp_idx < init_nodes.nelem; ++clexp_idx)
865	  {
866	    re_token_t *clexp_node;
867	    clexp_node = dfa->nodes + init_nodes.elems[clexp_idx];
868	    if (clexp_node->type == OP_CLOSE_SUBEXP
869		&& clexp_node->opr.idx + 1 == dfa->nodes[node_idx].opr.idx)
870	      break;
871	  }
872	if (clexp_idx == init_nodes.nelem)
873	  continue;
874
875	if (type == OP_BACK_REF)
876	  {
877	    int dest_idx = dfa->edests[node_idx].elems[0];
878	    if (!re_node_set_contains (&init_nodes, dest_idx))
879	      {
880		re_node_set_merge (&init_nodes, dfa->eclosures + dest_idx);
881		i = 0;
882	      }
883	  }
884      }
885
886  /* It must be the first time to invoke acquire_state.  */
887  dfa->init_state = re_acquire_state_context (&err, dfa, &init_nodes, 0);
888  /* We don't check ERR here, since the initial state must not be NULL.  */
889  if (BE (dfa->init_state == NULL, 0))
890    return err;
891  if (dfa->init_state->has_constraint)
892    {
893      dfa->init_state_word = re_acquire_state_context (&err, dfa, &init_nodes,
894						       CONTEXT_WORD);
895      dfa->init_state_nl = re_acquire_state_context (&err, dfa, &init_nodes,
896						     CONTEXT_NEWLINE);
897      dfa->init_state_begbuf = re_acquire_state_context (&err, dfa,
898							 &init_nodes,
899							 CONTEXT_NEWLINE
900							 | CONTEXT_BEGBUF);
901      if (BE (dfa->init_state_word == NULL || dfa->init_state_nl == NULL
902	      || dfa->init_state_begbuf == NULL, 0))
903	return err;
904    }
905  else
906    dfa->init_state_word = dfa->init_state_nl
907      = dfa->init_state_begbuf = dfa->init_state;
908
909  re_node_set_free (&init_nodes);
910  return REG_NOERROR;
911}
912
913/* Analyze the structure tree, and calculate "first", "next", "edest",
914   "eclosure", and "inveclosure".  */
915
916static reg_errcode_t
917analyze (dfa)
918     re_dfa_t *dfa;
919{
920  int i;
921  reg_errcode_t ret;
922
923  /* Allocate arrays.  */
924  dfa->nexts = re_malloc (int, dfa->nodes_alloc);
925  dfa->org_indices = re_malloc (int, dfa->nodes_alloc);
926  dfa->edests = re_malloc (re_node_set, dfa->nodes_alloc);
927  dfa->eclosures = re_malloc (re_node_set, dfa->nodes_alloc);
928  dfa->inveclosures = re_malloc (re_node_set, dfa->nodes_alloc);
929  if (BE (dfa->nexts == NULL || dfa->org_indices == NULL || dfa->edests == NULL
930	  || dfa->eclosures == NULL || dfa->inveclosures == NULL, 0))
931    return REG_ESPACE;
932  /* Initialize them.  */
933  for (i = 0; i < dfa->nodes_len; ++i)
934    {
935      dfa->nexts[i] = -1;
936      re_node_set_init_empty (dfa->edests + i);
937      re_node_set_init_empty (dfa->eclosures + i);
938      re_node_set_init_empty (dfa->inveclosures + i);
939    }
940
941  ret = analyze_tree (dfa, dfa->str_tree);
942  if (BE (ret == REG_NOERROR, 1))
943    {
944      ret = calc_eclosure (dfa);
945      if (ret == REG_NOERROR)
946	calc_inveclosure (dfa);
947    }
948  return ret;
949}
950
951/* Helper functions for analyze.
952   This function calculate "first", "next", and "edest" for the subtree
953   whose root is NODE.  */
954
955static reg_errcode_t
956analyze_tree (dfa, node)
957     re_dfa_t *dfa;
958     bin_tree_t *node;
959{
960  reg_errcode_t ret;
961  if (node->first == -1)
962    calc_first (dfa, node);
963  if (node->next == -1)
964    calc_next (dfa, node);
965  if (node->eclosure.nelem == 0)
966    calc_epsdest (dfa, node);
967  /* Calculate "first" etc. for the left child.  */
968  if (node->left != NULL)
969    {
970      ret = analyze_tree (dfa, node->left);
971      if (BE (ret != REG_NOERROR, 0))
972	return ret;
973    }
974  /* Calculate "first" etc. for the right child.  */
975  if (node->right != NULL)
976    {
977      ret = analyze_tree (dfa, node->right);
978      if (BE (ret != REG_NOERROR, 0))
979	return ret;
980    }
981  return REG_NOERROR;
982}
983
984/* Calculate "first" for the node NODE.  */
985static void
986calc_first (dfa, node)
987     re_dfa_t *dfa;
988     bin_tree_t *node;
989{
990  int idx, type;
991  idx = node->node_idx;
992  type = (node->type == 0) ? dfa->nodes[idx].type : node->type;
993
994  switch (type)
995    {
996#ifdef DEBUG
997    case OP_OPEN_BRACKET:
998    case OP_CLOSE_BRACKET:
999    case OP_OPEN_DUP_NUM:
1000    case OP_CLOSE_DUP_NUM:
1001    case OP_NON_MATCH_LIST:
1002    case OP_OPEN_COLL_ELEM:
1003    case OP_CLOSE_COLL_ELEM:
1004    case OP_OPEN_EQUIV_CLASS:
1005    case OP_CLOSE_EQUIV_CLASS:
1006    case OP_OPEN_CHAR_CLASS:
1007    case OP_CLOSE_CHAR_CLASS:
1008      /* These must not be appeared here.  */
1009      assert (0);
1010#endif
1011    case END_OF_RE:
1012    case CHARACTER:
1013    case OP_PERIOD:
1014    case OP_DUP_ASTERISK:
1015    case OP_DUP_QUESTION:
1016#ifdef RE_ENABLE_I18N
1017    case COMPLEX_BRACKET:
1018#endif /* RE_ENABLE_I18N */
1019    case SIMPLE_BRACKET:
1020    case OP_BACK_REF:
1021    case ANCHOR:
1022    case OP_OPEN_SUBEXP:
1023    case OP_CLOSE_SUBEXP:
1024      node->first = idx;
1025      break;
1026    case OP_DUP_PLUS:
1027#ifdef DEBUG
1028      assert (node->left != NULL);
1029#endif
1030      if (node->left->first == -1)
1031	calc_first (dfa, node->left);
1032      node->first = node->left->first;
1033      break;
1034    case OP_ALT:
1035      node->first = idx;
1036      break;
1037      /* else fall through */
1038    default:
1039#ifdef DEBUG
1040      assert (node->left != NULL);
1041#endif
1042      if (node->left->first == -1)
1043	calc_first (dfa, node->left);
1044      node->first = node->left->first;
1045      break;
1046    }
1047}
1048
1049/* Calculate "next" for the node NODE.  */
1050
1051static void
1052calc_next (dfa, node)
1053     re_dfa_t *dfa;
1054     bin_tree_t *node;
1055{
1056  int idx, type;
1057  bin_tree_t *parent = node->parent;
1058  if (parent == NULL)
1059    {
1060      node->next = -1;
1061      idx = node->node_idx;
1062      if (node->type == 0)
1063	dfa->nexts[idx] = node->next;
1064      return;
1065    }
1066
1067  idx = parent->node_idx;
1068  type = (parent->type == 0) ? dfa->nodes[idx].type : parent->type;
1069
1070  switch (type)
1071    {
1072    case OP_DUP_ASTERISK:
1073    case OP_DUP_PLUS:
1074      node->next = idx;
1075      break;
1076    case CONCAT:
1077      if (parent->left == node)
1078	{
1079	  if (parent->right->first == -1)
1080	    calc_first (dfa, parent->right);
1081	  node->next = parent->right->first;
1082	  break;
1083	}
1084      /* else fall through */
1085    default:
1086      if (parent->next == -1)
1087	calc_next (dfa, parent);
1088      node->next = parent->next;
1089      break;
1090    }
1091  idx = node->node_idx;
1092  if (node->type == 0)
1093    dfa->nexts[idx] = node->next;
1094}
1095
1096/* Calculate "edest" for the node NODE.  */
1097
1098static void
1099calc_epsdest (dfa, node)
1100     re_dfa_t *dfa;
1101     bin_tree_t *node;
1102{
1103  int idx;
1104  idx = node->node_idx;
1105  if (node->type == 0)
1106    {
1107      if (dfa->nodes[idx].type == OP_DUP_ASTERISK
1108	  || dfa->nodes[idx].type == OP_DUP_PLUS
1109	  || dfa->nodes[idx].type == OP_DUP_QUESTION)
1110	{
1111	  if (node->left->first == -1)
1112	    calc_first (dfa, node->left);
1113	  if (node->next == -1)
1114	    calc_next (dfa, node);
1115	  re_node_set_init_2 (dfa->edests + idx, node->left->first,
1116			      node->next);
1117	}
1118      else if (dfa->nodes[idx].type == OP_ALT)
1119	{
1120	  int left, right;
1121	  if (node->left != NULL)
1122	    {
1123	      if (node->left->first == -1)
1124		calc_first (dfa, node->left);
1125	      left = node->left->first;
1126	    }
1127	  else
1128	    {
1129	      if (node->next == -1)
1130		calc_next (dfa, node);
1131	      left = node->next;
1132	    }
1133	  if (node->right != NULL)
1134	    {
1135	      if (node->right->first == -1)
1136		calc_first (dfa, node->right);
1137	      right = node->right->first;
1138	    }
1139	  else
1140	    {
1141	      if (node->next == -1)
1142		calc_next (dfa, node);
1143	      right = node->next;
1144	    }
1145	  re_node_set_init_2 (dfa->edests + idx, left, right);
1146	}
1147      else if (dfa->nodes[idx].type == ANCHOR
1148	       || dfa->nodes[idx].type == OP_OPEN_SUBEXP
1149	       || dfa->nodes[idx].type == OP_CLOSE_SUBEXP
1150	       || dfa->nodes[idx].type == OP_BACK_REF)
1151	re_node_set_init_1 (dfa->edests + idx, node->next);
1152    }
1153}
1154
1155/* Duplicate the epsilon closure of the node ROOT_NODE.
1156   Note that duplicated nodes have constraint INIT_CONSTRAINT in addition
1157   to their own constraint.  */
1158
1159static reg_errcode_t
1160duplicate_node_closure (dfa, top_org_node, top_clone_node, root_node,
1161			init_constraint)
1162     re_dfa_t *dfa;
1163     int top_org_node, top_clone_node, root_node;
1164     unsigned int init_constraint;
1165{
1166  reg_errcode_t err;
1167  int org_node, clone_node, ret;
1168  unsigned int constraint = init_constraint;
1169  for (org_node = top_org_node, clone_node = top_clone_node;;)
1170    {
1171      int org_dest, clone_dest;
1172      if (dfa->nodes[org_node].type == OP_BACK_REF)
1173	{
1174	  /* If the back reference epsilon-transit, its destination must
1175	     also have the constraint.  Then duplicate the epsilon closure
1176	     of the destination of the back reference, and store it in
1177	     edests of the back reference.  */
1178	  org_dest = dfa->nexts[org_node];
1179	  re_node_set_empty (dfa->edests + clone_node);
1180	  err = duplicate_node (&clone_dest, dfa, org_dest, constraint);
1181	  if (BE (err != REG_NOERROR, 0))
1182	    return err;
1183	  dfa->nexts[clone_node] = dfa->nexts[org_node];
1184	  ret = re_node_set_insert (dfa->edests + clone_node, clone_dest);
1185	  if (BE (ret < 0, 0))
1186	    return REG_ESPACE;
1187	}
1188      else if (dfa->edests[org_node].nelem == 0)
1189	{
1190	  /* In case of the node can't epsilon-transit, don't duplicate the
1191	     destination and store the original destination as the
1192	     destination of the node.  */
1193	  dfa->nexts[clone_node] = dfa->nexts[org_node];
1194	  break;
1195	}
1196      else if (dfa->edests[org_node].nelem == 1)
1197	{
1198	  /* In case of the node can epsilon-transit, and it has only one
1199	     destination.  */
1200	  org_dest = dfa->edests[org_node].elems[0];
1201	  re_node_set_empty (dfa->edests + clone_node);
1202	  if (dfa->nodes[org_node].type == ANCHOR)
1203	    {
1204	      /* In case of the node has another constraint, append it.  */
1205	      if (org_node == root_node && clone_node != org_node)
1206		{
1207		  /* ...but if the node is root_node itself, it means the
1208		     epsilon closure have a loop, then tie it to the
1209		     destination of the root_node.  */
1210		  ret = re_node_set_insert (dfa->edests + clone_node,
1211					    org_dest);
1212		  if (BE (ret < 0, 0))
1213		    return REG_ESPACE;
1214		  break;
1215		}
1216	      constraint |= dfa->nodes[org_node].opr.ctx_type;
1217	    }
1218	  err = duplicate_node (&clone_dest, dfa, org_dest, constraint);
1219	  if (BE (err != REG_NOERROR, 0))
1220	    return err;
1221	  ret = re_node_set_insert (dfa->edests + clone_node, clone_dest);
1222	  if (BE (ret < 0, 0))
1223	    return REG_ESPACE;
1224	}
1225      else /* dfa->edests[org_node].nelem == 2 */
1226	{
1227	  /* In case of the node can epsilon-transit, and it has two
1228	     destinations. E.g. '|', '*', '+', '?'.   */
1229	  org_dest = dfa->edests[org_node].elems[0];
1230	  re_node_set_empty (dfa->edests + clone_node);
1231	  /* Search for a duplicated node which satisfies the constraint.  */
1232	  clone_dest = search_duplicated_node (dfa, org_dest, constraint);
1233	  if (clone_dest == -1)
1234	    {
1235	      /* There are no such a duplicated node, create a new one.  */
1236	      err = duplicate_node (&clone_dest, dfa, org_dest, constraint);
1237	      if (BE (err != REG_NOERROR, 0))
1238		return err;
1239	      ret = re_node_set_insert (dfa->edests + clone_node, clone_dest);
1240	      if (BE (ret < 0, 0))
1241		return REG_ESPACE;
1242	      err = duplicate_node_closure (dfa, org_dest, clone_dest,
1243					    root_node, constraint);
1244	      if (BE (err != REG_NOERROR, 0))
1245		return err;
1246	    }
1247	  else
1248	    {
1249	      /* There are a duplicated node which satisfy the constraint,
1250		 use it to avoid infinite loop.  */
1251	      ret = re_node_set_insert (dfa->edests + clone_node, clone_dest);
1252	      if (BE (ret < 0, 0))
1253		return REG_ESPACE;
1254	    }
1255
1256	  org_dest = dfa->edests[org_node].elems[1];
1257	  err = duplicate_node (&clone_dest, dfa, org_dest, constraint);
1258	  if (BE (err != REG_NOERROR, 0))
1259	    return err;
1260	  ret = re_node_set_insert (dfa->edests + clone_node, clone_dest);
1261	  if (BE (ret < 0, 0))
1262	    return REG_ESPACE;
1263	}
1264      org_node = org_dest;
1265      clone_node = clone_dest;
1266    }
1267  return REG_NOERROR;
1268}
1269
1270/* Search for a node which is duplicated from the node ORG_NODE, and
1271   satisfies the constraint CONSTRAINT.  */
1272
1273static int
1274search_duplicated_node (dfa, org_node, constraint)
1275     re_dfa_t *dfa;
1276     int org_node;
1277     unsigned int constraint;
1278{
1279  int idx;
1280  for (idx = dfa->nodes_len - 1; dfa->nodes[idx].duplicated && idx > 0; --idx)
1281    {
1282      if (org_node == dfa->org_indices[idx]
1283	  && constraint == dfa->nodes[idx].constraint)
1284	return idx; /* Found.  */
1285    }
1286  return -1; /* Not found.  */
1287}
1288
1289/* Duplicate the node whose index is ORG_IDX and set the constraint CONSTRAINT.
1290   The new index will be stored in NEW_IDX and return REG_NOERROR if succeeded,
1291   otherwise return the error code.  */
1292
1293static reg_errcode_t
1294duplicate_node (new_idx, dfa, org_idx, constraint)
1295     re_dfa_t *dfa;
1296     int *new_idx, org_idx;
1297     unsigned int constraint;
1298{
1299  re_token_t dup;
1300  int dup_idx;
1301
1302  dup = dfa->nodes[org_idx];
1303  dup_idx = re_dfa_add_node (dfa, dup, 1);
1304  if (BE (dup_idx == -1, 0))
1305    return REG_ESPACE;
1306  dfa->nodes[dup_idx].constraint = constraint;
1307  if (dfa->nodes[org_idx].type == ANCHOR)
1308    dfa->nodes[dup_idx].constraint |= dfa->nodes[org_idx].opr.ctx_type;
1309  dfa->nodes[dup_idx].duplicated = 1;
1310  re_node_set_init_empty (dfa->edests + dup_idx);
1311  re_node_set_init_empty (dfa->eclosures + dup_idx);
1312  re_node_set_init_empty (dfa->inveclosures + dup_idx);
1313
1314  /* Store the index of the original node.  */
1315  dfa->org_indices[dup_idx] = org_idx;
1316  *new_idx = dup_idx;
1317  return REG_NOERROR;
1318}
1319
1320static void
1321calc_inveclosure (dfa)
1322     re_dfa_t *dfa;
1323{
1324  int src, idx, dest;
1325  for (src = 0; src < dfa->nodes_len; ++src)
1326    {
1327      for (idx = 0; idx < dfa->eclosures[src].nelem; ++idx)
1328	{
1329	  dest = dfa->eclosures[src].elems[idx];
1330	  re_node_set_insert (dfa->inveclosures + dest, src);
1331	}
1332    }
1333}
1334
1335/* Calculate "eclosure" for all the node in DFA.  */
1336
1337static reg_errcode_t
1338calc_eclosure (dfa)
1339     re_dfa_t *dfa;
1340{
1341  int node_idx, incomplete;
1342#ifdef DEBUG
1343  assert (dfa->nodes_len > 0);
1344#endif
1345  incomplete = 0;
1346  /* For each nodes, calculate epsilon closure.  */
1347  for (node_idx = 0; ; ++node_idx)
1348    {
1349      reg_errcode_t err;
1350      re_node_set eclosure_elem;
1351      if (node_idx == dfa->nodes_len)
1352	{
1353	  if (!incomplete)
1354	    break;
1355	  incomplete = 0;
1356	  node_idx = 0;
1357	}
1358
1359#ifdef DEBUG
1360      assert (dfa->eclosures[node_idx].nelem != -1);
1361#endif
1362      /* If we have already calculated, skip it.  */
1363      if (dfa->eclosures[node_idx].nelem != 0)
1364	continue;
1365      /* Calculate epsilon closure of `node_idx'.  */
1366      err = calc_eclosure_iter (&eclosure_elem, dfa, node_idx, 1);
1367      if (BE (err != REG_NOERROR, 0))
1368	return err;
1369
1370      if (dfa->eclosures[node_idx].nelem == 0)
1371	{
1372	  incomplete = 1;
1373	  re_node_set_free (&eclosure_elem);
1374	}
1375    }
1376  return REG_NOERROR;
1377}
1378
1379/* Calculate epsilon closure of NODE.  */
1380
1381static reg_errcode_t
1382calc_eclosure_iter (new_set, dfa, node, root)
1383     re_node_set *new_set;
1384     re_dfa_t *dfa;
1385     int node, root;
1386{
1387  reg_errcode_t err;
1388  unsigned int constraint;
1389  int i, incomplete;
1390  re_node_set eclosure;
1391  incomplete = 0;
1392  err = re_node_set_alloc (&eclosure, dfa->edests[node].nelem + 1);
1393  if (BE (err != REG_NOERROR, 0))
1394    return err;
1395
1396  /* This indicates that we are calculating this node now.
1397     We reference this value to avoid infinite loop.  */
1398  dfa->eclosures[node].nelem = -1;
1399
1400  constraint = ((dfa->nodes[node].type == ANCHOR)
1401		? dfa->nodes[node].opr.ctx_type : 0);
1402  /* If the current node has constraints, duplicate all nodes.
1403     Since they must inherit the constraints.  */
1404  if (constraint && !dfa->nodes[dfa->edests[node].elems[0]].duplicated)
1405    {
1406      int org_node, cur_node;
1407      org_node = cur_node = node;
1408      err = duplicate_node_closure (dfa, node, node, node, constraint);
1409      if (BE (err != REG_NOERROR, 0))
1410	return err;
1411    }
1412
1413  /* Expand each epsilon destination nodes.  */
1414  if (IS_EPSILON_NODE(dfa->nodes[node].type))
1415    for (i = 0; i < dfa->edests[node].nelem; ++i)
1416      {
1417	re_node_set eclosure_elem;
1418	int edest = dfa->edests[node].elems[i];
1419	/* If calculating the epsilon closure of `edest' is in progress,
1420	   return intermediate result.  */
1421	if (dfa->eclosures[edest].nelem == -1)
1422	  {
1423	    incomplete = 1;
1424	    continue;
1425	  }
1426	/* If we haven't calculated the epsilon closure of `edest' yet,
1427	   calculate now. Otherwise use calculated epsilon closure.  */
1428	if (dfa->eclosures[edest].nelem == 0)
1429	  {
1430	    err = calc_eclosure_iter (&eclosure_elem, dfa, edest, 0);
1431	    if (BE (err != REG_NOERROR, 0))
1432	      return err;
1433	  }
1434	else
1435	  eclosure_elem = dfa->eclosures[edest];
1436	/* Merge the epsilon closure of `edest'.  */
1437	re_node_set_merge (&eclosure, &eclosure_elem);
1438	/* If the epsilon closure of `edest' is incomplete,
1439	   the epsilon closure of this node is also incomplete.  */
1440	if (dfa->eclosures[edest].nelem == 0)
1441	  {
1442	    incomplete = 1;
1443	    re_node_set_free (&eclosure_elem);
1444	  }
1445      }
1446
1447  /* Epsilon closures include itself.  */
1448  re_node_set_insert (&eclosure, node);
1449  if (incomplete && !root)
1450    dfa->eclosures[node].nelem = 0;
1451  else
1452    dfa->eclosures[node] = eclosure;
1453  *new_set = eclosure;
1454  return REG_NOERROR;
1455}
1456
1457/* Functions for token which are used in the parser.  */
1458
1459/* Fetch a token from INPUT.
1460   We must not use this function inside bracket expressions.  */
1461
1462static re_token_t
1463fetch_token (input, syntax)
1464     re_string_t *input;
1465     reg_syntax_t syntax;
1466{
1467  re_token_t token;
1468  int consumed_byte;
1469  consumed_byte = peek_token (&token, input, syntax);
1470  re_string_skip_bytes (input, consumed_byte);
1471  return token;
1472}
1473
1474/* Peek a token from INPUT, and return the length of the token.
1475   We must not use this function inside bracket expressions.  */
1476
1477static int
1478peek_token (token, input, syntax)
1479     re_token_t *token;
1480     re_string_t *input;
1481     reg_syntax_t syntax;
1482{
1483  unsigned char c;
1484
1485  if (re_string_eoi (input))
1486    {
1487      token->type = END_OF_RE;
1488      return 0;
1489    }
1490
1491  c = re_string_peek_byte (input, 0);
1492  token->opr.c = c;
1493
1494#ifdef RE_ENABLE_I18N
1495  token->mb_partial = 0;
1496  if (re_mb_cur_max > 1 &&
1497      !re_string_first_byte (input, re_string_cur_idx (input)))
1498    {
1499      token->type = CHARACTER;
1500      token->mb_partial = 1;
1501      return 1;
1502    }
1503#endif
1504  if (c == '\\')
1505    {
1506      unsigned char c2;
1507      if (re_string_cur_idx (input) + 1 >= re_string_length (input))
1508	{
1509	  token->type = BACK_SLASH;
1510	  return 1;
1511	}
1512
1513      c2 = re_string_peek_byte_case (input, 1);
1514      token->opr.c = c2;
1515      token->type = CHARACTER;
1516      switch (c2)
1517	{
1518	case '|':
1519	  if (!(syntax & RE_LIMITED_OPS) && !(syntax & RE_NO_BK_VBAR))
1520	    token->type = OP_ALT;
1521	  break;
1522	case '1': case '2': case '3': case '4': case '5':
1523	case '6': case '7': case '8': case '9':
1524	  if (!(syntax & RE_NO_BK_REFS))
1525	    {
1526	      token->type = OP_BACK_REF;
1527	      token->opr.idx = c2 - '0';
1528	    }
1529	  break;
1530	case '<':
1531	  if (!(syntax & RE_NO_GNU_OPS))
1532	    {
1533	      token->type = ANCHOR;
1534	      token->opr.idx = WORD_FIRST;
1535	    }
1536	  break;
1537	case '>':
1538	  if (!(syntax & RE_NO_GNU_OPS))
1539	    {
1540	      token->type = ANCHOR;
1541	      token->opr.idx = WORD_LAST;
1542	    }
1543	  break;
1544	case 'b':
1545	  if (!(syntax & RE_NO_GNU_OPS))
1546	    {
1547	      token->type = ANCHOR;
1548	      token->opr.idx = WORD_DELIM;
1549	    }
1550	  break;
1551	case 'B':
1552	  if (!(syntax & RE_NO_GNU_OPS))
1553	    {
1554	      token->type = ANCHOR;
1555	      token->opr.idx = INSIDE_WORD;
1556	    }
1557	  break;
1558	case 'w':
1559	  if (!(syntax & RE_NO_GNU_OPS))
1560	    token->type = OP_WORD;
1561	  break;
1562	case 'W':
1563	  if (!(syntax & RE_NO_GNU_OPS))
1564	    token->type = OP_NOTWORD;
1565	  break;
1566	case '`':
1567	  if (!(syntax & RE_NO_GNU_OPS))
1568	    {
1569	      token->type = ANCHOR;
1570	      token->opr.idx = BUF_FIRST;
1571	    }
1572	  break;
1573	case '\'':
1574	  if (!(syntax & RE_NO_GNU_OPS))
1575	    {
1576	      token->type = ANCHOR;
1577	      token->opr.idx = BUF_LAST;
1578	    }
1579	  break;
1580	case '(':
1581	  if (!(syntax & RE_NO_BK_PARENS))
1582	    token->type = OP_OPEN_SUBEXP;
1583	  break;
1584	case ')':
1585	  if (!(syntax & RE_NO_BK_PARENS))
1586	    token->type = OP_CLOSE_SUBEXP;
1587	  break;
1588	case '+':
1589	  if (!(syntax & RE_LIMITED_OPS) && (syntax & RE_BK_PLUS_QM))
1590	    token->type = OP_DUP_PLUS;
1591	  break;
1592	case '?':
1593	  if (!(syntax & RE_LIMITED_OPS) && (syntax & RE_BK_PLUS_QM))
1594	    token->type = OP_DUP_QUESTION;
1595	  break;
1596	case '{':
1597	  if ((syntax & RE_INTERVALS) && (!(syntax & RE_NO_BK_BRACES)))
1598	    token->type = OP_OPEN_DUP_NUM;
1599	  break;
1600	case '}':
1601	  if ((syntax & RE_INTERVALS) && (!(syntax & RE_NO_BK_BRACES)))
1602	    token->type = OP_CLOSE_DUP_NUM;
1603	  break;
1604	default:
1605	  break;
1606	}
1607      return 2;
1608    }
1609
1610  token->type = CHARACTER;
1611  switch (c)
1612    {
1613    case '\n':
1614      if (syntax & RE_NEWLINE_ALT)
1615	token->type = OP_ALT;
1616      break;
1617    case '|':
1618      if (!(syntax & RE_LIMITED_OPS) && (syntax & RE_NO_BK_VBAR))
1619	token->type = OP_ALT;
1620      break;
1621    case '*':
1622      token->type = OP_DUP_ASTERISK;
1623      break;
1624    case '+':
1625      if (!(syntax & RE_LIMITED_OPS) && !(syntax & RE_BK_PLUS_QM))
1626	token->type = OP_DUP_PLUS;
1627      break;
1628    case '?':
1629      if (!(syntax & RE_LIMITED_OPS) && !(syntax & RE_BK_PLUS_QM))
1630	token->type = OP_DUP_QUESTION;
1631      break;
1632    case '{':
1633      if ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES))
1634	token->type = OP_OPEN_DUP_NUM;
1635      break;
1636    case '}':
1637      if ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES))
1638	token->type = OP_CLOSE_DUP_NUM;
1639      break;
1640    case '(':
1641      if (syntax & RE_NO_BK_PARENS)
1642	token->type = OP_OPEN_SUBEXP;
1643      break;
1644    case ')':
1645      if (syntax & RE_NO_BK_PARENS)
1646	token->type = OP_CLOSE_SUBEXP;
1647      break;
1648    case '[':
1649      token->type = OP_OPEN_BRACKET;
1650      break;
1651    case '.':
1652      token->type = OP_PERIOD;
1653      break;
1654    case '^':
1655      if (!(syntax & RE_CONTEXT_INDEP_ANCHORS) &&
1656	  re_string_cur_idx (input) != 0)
1657	{
1658	  char prev = re_string_peek_byte (input, -1);
1659	  if (prev != '|' && prev != '(' &&
1660	      (!(syntax & RE_NEWLINE_ALT) || prev != '\n'))
1661	    break;
1662	}
1663      token->type = ANCHOR;
1664      token->opr.idx = LINE_FIRST;
1665      break;
1666    case '$':
1667      if (!(syntax & RE_CONTEXT_INDEP_ANCHORS) &&
1668	  re_string_cur_idx (input) + 1 != re_string_length (input))
1669	{
1670	  re_token_t next;
1671	  re_string_skip_bytes (input, 1);
1672	  peek_token (&next, input, syntax);
1673	  re_string_skip_bytes (input, -1);
1674	  if (next.type != OP_ALT && next.type != OP_CLOSE_SUBEXP)
1675	    break;
1676	}
1677      token->type = ANCHOR;
1678      token->opr.idx = LINE_LAST;
1679      break;
1680    default:
1681      break;
1682    }
1683  return 1;
1684}
1685
1686/* Peek a token from INPUT, and return the length of the token.
1687   We must not use this function out of bracket expressions.  */
1688
1689static int
1690peek_token_bracket (token, input, syntax)
1691     re_token_t *token;
1692     re_string_t *input;
1693     reg_syntax_t syntax;
1694{
1695  unsigned char c;
1696  if (re_string_eoi (input))
1697    {
1698      token->type = END_OF_RE;
1699      return 0;
1700    }
1701  c = re_string_peek_byte (input, 0);
1702  token->opr.c = c;
1703
1704#ifdef RE_ENABLE_I18N
1705  if (re_mb_cur_max > 1 &&
1706      !re_string_first_byte (input, re_string_cur_idx (input)))
1707    {
1708      token->type = CHARACTER;
1709      return 1;
1710    }
1711#endif /* RE_ENABLE_I18N */
1712
1713  if (c == '\\' && (syntax & RE_BACKSLASH_ESCAPE_IN_LISTS))
1714    {
1715      /* In this case, '\' escape a character.  */
1716      unsigned char c2;
1717      re_string_skip_bytes (input, 1);
1718      c2 = re_string_peek_byte (input, 0);
1719      token->opr.c = c2;
1720      token->type = CHARACTER;
1721      return 1;
1722    }
1723  if (c == '[') /* '[' is a special char in a bracket exps.  */
1724    {
1725      unsigned char c2;
1726      int token_len;
1727      c2 = re_string_peek_byte (input, 1);
1728      token->opr.c = c2;
1729      token_len = 2;
1730      switch (c2)
1731	{
1732	case '.':
1733	  token->type = OP_OPEN_COLL_ELEM;
1734	  break;
1735	case '=':
1736	  token->type = OP_OPEN_EQUIV_CLASS;
1737	  break;
1738	case ':':
1739	  if (syntax & RE_CHAR_CLASSES)
1740	    {
1741	      token->type = OP_OPEN_CHAR_CLASS;
1742	      break;
1743	    }
1744	  /* else fall through.  */
1745	default:
1746	  token->type = CHARACTER;
1747	  token->opr.c = c;
1748	  token_len = 1;
1749	  break;
1750	}
1751      return token_len;
1752    }
1753  switch (c)
1754    {
1755    case '-':
1756      token->type = OP_CHARSET_RANGE;
1757      break;
1758    case ']':
1759      token->type = OP_CLOSE_BRACKET;
1760      break;
1761    case '^':
1762      token->type = OP_NON_MATCH_LIST;
1763      break;
1764    default:
1765      token->type = CHARACTER;
1766    }
1767  return 1;
1768}
1769
1770/* Functions for parser.  */
1771
1772/* Entry point of the parser.
1773   Parse the regular expression REGEXP and return the structure tree.
1774   If an error is occured, ERR is set by error code, and return NULL.
1775   This function build the following tree, from regular expression <reg_exp>:
1776	   CAT
1777	   / \
1778	  /   \
1779   <reg_exp>  EOR
1780
1781   CAT means concatenation.
1782   EOR means end of regular expression.  */
1783
1784static bin_tree_t *
1785parse (regexp, preg, syntax, err)
1786     re_string_t *regexp;
1787     regex_t *preg;
1788     reg_syntax_t syntax;
1789     reg_errcode_t *err;
1790{
1791  re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
1792  bin_tree_t *tree, *eor, *root;
1793  re_token_t current_token;
1794  int new_idx;
1795  current_token = fetch_token (regexp, syntax);
1796  tree = parse_reg_exp (regexp, preg, &current_token, syntax, 0, err);
1797  if (BE (*err != REG_NOERROR && tree == NULL, 0))
1798    return NULL;
1799  new_idx = re_dfa_add_node (dfa, current_token, 0);
1800  eor = create_tree (NULL, NULL, 0, new_idx);
1801  if (tree != NULL)
1802    root = create_tree (tree, eor, CONCAT, 0);
1803  else
1804    root = eor;
1805  if (BE (new_idx == -1 || eor == NULL || root == NULL, 0))
1806    {
1807      *err = REG_ESPACE;
1808      return NULL;
1809    }
1810  return root;
1811}
1812
1813/* This function build the following tree, from regular expression
1814   <branch1>|<branch2>:
1815	   ALT
1816	   / \
1817	  /   \
1818   <branch1> <branch2>
1819
1820   ALT means alternative, which represents the operator `|'.  */
1821
1822static bin_tree_t *
1823parse_reg_exp (regexp, preg, token, syntax, nest, err)
1824     re_string_t *regexp;
1825     regex_t *preg;
1826     re_token_t *token;
1827     reg_syntax_t syntax;
1828     int nest;
1829     reg_errcode_t *err;
1830{
1831  re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
1832  bin_tree_t *tree, *branch = NULL;
1833  int new_idx;
1834  tree = parse_branch (regexp, preg, token, syntax, nest, err);
1835  if (BE (*err != REG_NOERROR && tree == NULL, 0))
1836    return NULL;
1837
1838  while (token->type == OP_ALT)
1839    {
1840      re_token_t alt_token;
1841      alt_token = *token;
1842      new_idx = re_dfa_add_node (dfa, alt_token, 0);
1843      *token = fetch_token (regexp, syntax);
1844      if (token->type != OP_ALT && token->type != END_OF_RE
1845	  && (nest == 0 || token->type != OP_CLOSE_SUBEXP))
1846	{
1847	  branch = parse_branch (regexp, preg, token, syntax, nest, err);
1848	  if (BE (*err != REG_NOERROR && branch == NULL, 0))
1849	    {
1850	      free_bin_tree (tree);
1851	      return NULL;
1852	    }
1853	}
1854      else
1855	branch = NULL;
1856      tree = create_tree (tree, branch, 0, new_idx);
1857      if (BE (new_idx == -1 || tree == NULL, 0))
1858	{
1859	  *err = REG_ESPACE;
1860	  return NULL;
1861	}
1862      dfa->has_plural_match = 1;
1863    }
1864  return tree;
1865}
1866
1867/* This function build the following tree, from regular expression
1868   <exp1><exp2>:
1869	CAT
1870	/ \
1871       /   \
1872   <exp1> <exp2>
1873
1874   CAT means concatenation.  */
1875
1876static bin_tree_t *
1877parse_branch (regexp, preg, token, syntax, nest, err)
1878     re_string_t *regexp;
1879     regex_t *preg;
1880     re_token_t *token;
1881     reg_syntax_t syntax;
1882     int nest;
1883     reg_errcode_t *err;
1884{
1885  bin_tree_t *tree, *exp;
1886  tree = parse_expression (regexp, preg, token, syntax, nest, err);
1887  if (BE (*err != REG_NOERROR && tree == NULL, 0))
1888    return NULL;
1889
1890  while (token->type != OP_ALT && token->type != END_OF_RE
1891	 && (nest == 0 || token->type != OP_CLOSE_SUBEXP))
1892    {
1893      exp = parse_expression (regexp, preg, token, syntax, nest, err);
1894      if (BE (*err != REG_NOERROR && exp == NULL, 0))
1895	{
1896	  free_bin_tree (tree);
1897	  return NULL;
1898	}
1899      if (tree != NULL && exp != NULL)
1900	{
1901	  tree = create_tree (tree, exp, CONCAT, 0);
1902	  if (tree == NULL)
1903	    {
1904	      *err = REG_ESPACE;
1905	      return NULL;
1906	    }
1907	}
1908      else if (tree == NULL)
1909	tree = exp;
1910      /* Otherwise exp == NULL, we don't need to create new tree.  */
1911    }
1912  return tree;
1913}
1914
1915/* This function build the following tree, from regular expression a*:
1916	 *
1917	 |
1918	 a
1919*/
1920
1921static bin_tree_t *
1922parse_expression (regexp, preg, token, syntax, nest, err)
1923     re_string_t *regexp;
1924     regex_t *preg;
1925     re_token_t *token;
1926     reg_syntax_t syntax;
1927     int nest;
1928     reg_errcode_t *err;
1929{
1930  re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
1931  bin_tree_t *tree;
1932  int new_idx;
1933  switch (token->type)
1934    {
1935    case CHARACTER:
1936      new_idx = re_dfa_add_node (dfa, *token, 0);
1937      tree = create_tree (NULL, NULL, 0, new_idx);
1938      if (BE (new_idx == -1 || tree == NULL, 0))
1939	{
1940	  *err = REG_ESPACE;
1941	  return NULL;
1942	}
1943#ifdef RE_ENABLE_I18N
1944      if (re_mb_cur_max > 1)
1945	{
1946	  while (!re_string_eoi (regexp)
1947		 && !re_string_first_byte (regexp, re_string_cur_idx (regexp)))
1948	    {
1949	      bin_tree_t *mbc_remain;
1950	      *token = fetch_token (regexp, syntax);
1951	      new_idx = re_dfa_add_node (dfa, *token, 0);
1952	      mbc_remain = create_tree (NULL, NULL, 0, new_idx);
1953	      tree = create_tree (tree, mbc_remain, CONCAT, 0);
1954	      if (BE (new_idx == -1 || mbc_remain == NULL || tree == NULL, 0))
1955		{
1956		  *err = REG_ESPACE;
1957		  return NULL;
1958		}
1959	    }
1960	}
1961#endif
1962      break;
1963    case OP_OPEN_SUBEXP:
1964      tree = parse_sub_exp (regexp, preg, token, syntax, nest + 1, err);
1965      if (BE (*err != REG_NOERROR && tree == NULL, 0))
1966	return NULL;
1967      break;
1968    case OP_OPEN_BRACKET:
1969      tree = parse_bracket_exp (regexp, dfa, token, syntax, err);
1970      if (BE (*err != REG_NOERROR && tree == NULL, 0))
1971	return NULL;
1972      break;
1973    case OP_BACK_REF:
1974      if (BE (preg->re_nsub < token->opr.idx
1975	      || dfa->subexps[token->opr.idx - 1].end == -1, 0))
1976	{
1977	  *err = REG_ESUBREG;
1978	  return NULL;
1979	}
1980      dfa->used_bkref_map |= 1UL << (token->opr.idx - 1);
1981      new_idx = re_dfa_add_node (dfa, *token, 0);
1982      tree = create_tree (NULL, NULL, 0, new_idx);
1983      if (BE (new_idx == -1 || tree == NULL, 0))
1984	{
1985	  *err = REG_ESPACE;
1986	  return NULL;
1987	}
1988      ++dfa->nbackref;
1989      dfa->has_mb_node = 1;
1990      break;
1991    case OP_DUP_ASTERISK:
1992    case OP_DUP_PLUS:
1993    case OP_DUP_QUESTION:
1994    case OP_OPEN_DUP_NUM:
1995      if (syntax & RE_CONTEXT_INVALID_OPS)
1996	{
1997	  *err = REG_BADRPT;
1998	  return NULL;
1999	}
2000      else if (syntax & RE_CONTEXT_INDEP_OPS)
2001	{
2002	  *token = fetch_token (regexp, syntax);
2003	  return parse_expression (regexp, preg, token, syntax, nest, err);
2004	}
2005      /* else fall through  */
2006    case OP_CLOSE_SUBEXP:
2007      if ((token->type == OP_CLOSE_SUBEXP) &&
2008	  !(syntax & RE_UNMATCHED_RIGHT_PAREN_ORD))
2009	{
2010	  *err = REG_ERPAREN;
2011	  return NULL;
2012	}
2013      /* else fall through  */
2014    case OP_CLOSE_DUP_NUM:
2015      /* We treat it as a normal character.  */
2016
2017      /* Then we can these characters as normal characters.  */
2018      token->type = CHARACTER;
2019      new_idx = re_dfa_add_node (dfa, *token, 0);
2020      tree = create_tree (NULL, NULL, 0, new_idx);
2021      if (BE (new_idx == -1 || tree == NULL, 0))
2022	{
2023	  *err = REG_ESPACE;
2024	  return NULL;
2025	}
2026      break;
2027    case ANCHOR:
2028      if (dfa->word_char == NULL)
2029	{
2030	  *err = init_word_char (dfa);
2031	  if (BE (*err != REG_NOERROR, 0))
2032	    return NULL;
2033	}
2034      if (token->opr.ctx_type == WORD_DELIM)
2035	{
2036	  bin_tree_t *tree_first, *tree_last;
2037	  int idx_first, idx_last;
2038	  token->opr.ctx_type = WORD_FIRST;
2039	  idx_first = re_dfa_add_node (dfa, *token, 0);
2040	  tree_first = create_tree (NULL, NULL, 0, idx_first);
2041	  token->opr.ctx_type = WORD_LAST;
2042	  idx_last = re_dfa_add_node (dfa, *token, 0);
2043	  tree_last = create_tree (NULL, NULL, 0, idx_last);
2044	  token->type = OP_ALT;
2045	  new_idx = re_dfa_add_node (dfa, *token, 0);
2046	  tree = create_tree (tree_first, tree_last, 0, new_idx);
2047	  if (BE (idx_first == -1 || idx_last == -1 || new_idx == -1
2048		  || tree_first == NULL || tree_last == NULL
2049		  || tree == NULL, 0))
2050	    {
2051	      *err = REG_ESPACE;
2052	      return NULL;
2053	    }
2054	}
2055      else
2056	{
2057	  new_idx = re_dfa_add_node (dfa, *token, 0);
2058	  tree = create_tree (NULL, NULL, 0, new_idx);
2059	  if (BE (new_idx == -1 || tree == NULL, 0))
2060	    {
2061	      *err = REG_ESPACE;
2062	      return NULL;
2063	    }
2064	}
2065      /* We must return here, since ANCHORs can't be followed
2066	 by repetition operators.
2067	 eg. RE"^*" is invalid or "<ANCHOR(^)><CHAR(*)>",
2068	     it must not be "<ANCHOR(^)><REPEAT(*)>".  */
2069      *token = fetch_token (regexp, syntax);
2070      return tree;
2071    case OP_PERIOD:
2072      new_idx = re_dfa_add_node (dfa, *token, 0);
2073      tree = create_tree (NULL, NULL, 0, new_idx);
2074      if (BE (new_idx == -1 || tree == NULL, 0))
2075	{
2076	  *err = REG_ESPACE;
2077	  return NULL;
2078	}
2079#ifdef RE_ENABLE_I18N
2080      if (re_mb_cur_max > 1)
2081	dfa->has_mb_node = 1;
2082#endif
2083      break;
2084    case OP_WORD:
2085      tree = build_word_op (dfa, regexp->trans, 0, err);
2086      if (BE (*err != REG_NOERROR && tree == NULL, 0))
2087	return NULL;
2088      break;
2089    case OP_NOTWORD:
2090      tree = build_word_op (dfa, regexp->trans, 1, err);
2091      if (BE (*err != REG_NOERROR && tree == NULL, 0))
2092	return NULL;
2093      break;
2094    case OP_ALT:
2095    case END_OF_RE:
2096      return NULL;
2097    case BACK_SLASH:
2098      *err = REG_EESCAPE;
2099      return NULL;
2100    default:
2101      /* Must not happen?  */
2102#ifdef DEBUG
2103      assert (0);
2104#endif
2105      return NULL;
2106    }
2107  *token = fetch_token (regexp, syntax);
2108
2109  while (token->type == OP_DUP_ASTERISK || token->type == OP_DUP_PLUS
2110	 || token->type == OP_DUP_QUESTION || token->type == OP_OPEN_DUP_NUM)
2111    {
2112      tree = parse_dup_op (tree, regexp, dfa, token, syntax, err);
2113      if (BE (*err != REG_NOERROR && tree == NULL, 0))
2114	return NULL;
2115      dfa->has_plural_match = 1;
2116    }
2117
2118  return tree;
2119}
2120
2121/* This function build the following tree, from regular expression
2122   (<reg_exp>):
2123	 SUBEXP
2124	    |
2125	<reg_exp>
2126*/
2127
2128static bin_tree_t *
2129parse_sub_exp (regexp, preg, token, syntax, nest, err)
2130     re_string_t *regexp;
2131     regex_t *preg;
2132     re_token_t *token;
2133     reg_syntax_t syntax;
2134     int nest;
2135     reg_errcode_t *err;
2136{
2137  re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
2138  bin_tree_t *tree, *left_par, *right_par;
2139  size_t cur_nsub;
2140  int new_idx;
2141  cur_nsub = preg->re_nsub++;
2142  if (dfa->subexps_alloc < preg->re_nsub)
2143    {
2144      re_subexp_t *new_array;
2145      dfa->subexps_alloc *= 2;
2146      new_array = re_realloc (dfa->subexps, re_subexp_t, dfa->subexps_alloc);
2147      if (BE (new_array == NULL, 0))
2148	{
2149	  dfa->subexps_alloc /= 2;
2150	  *err = REG_ESPACE;
2151	  return NULL;
2152	}
2153      dfa->subexps = new_array;
2154    }
2155  dfa->subexps[cur_nsub].start = dfa->nodes_len;
2156  dfa->subexps[cur_nsub].end = -1;
2157
2158  new_idx = re_dfa_add_node (dfa, *token, 0);
2159  left_par = create_tree (NULL, NULL, 0, new_idx);
2160  if (BE (new_idx == -1 || left_par == NULL, 0))
2161    {
2162      *err = REG_ESPACE;
2163      return NULL;
2164    }
2165  dfa->nodes[new_idx].opr.idx = cur_nsub;
2166  *token = fetch_token (regexp, syntax);
2167
2168  /* The subexpression may be a null string.  */
2169  if (token->type == OP_CLOSE_SUBEXP)
2170    tree = NULL;
2171  else
2172    {
2173      tree = parse_reg_exp (regexp, preg, token, syntax, nest, err);
2174      if (BE (*err != REG_NOERROR && tree == NULL, 0))
2175	return NULL;
2176    }
2177  if (BE (token->type != OP_CLOSE_SUBEXP, 0))
2178    {
2179      free_bin_tree (tree);
2180      *err = REG_BADPAT;
2181      return NULL;
2182    }
2183  new_idx = re_dfa_add_node (dfa, *token, 0);
2184  dfa->subexps[cur_nsub].end = dfa->nodes_len;
2185  right_par = create_tree (NULL, NULL, 0, new_idx);
2186  tree = ((tree == NULL) ? right_par
2187	  : create_tree (tree, right_par, CONCAT, 0));
2188  tree = create_tree (left_par, tree, CONCAT, 0);
2189  if (BE (new_idx == -1 || right_par == NULL || tree == NULL, 0))
2190    {
2191      *err = REG_ESPACE;
2192      return NULL;
2193    }
2194  dfa->nodes[new_idx].opr.idx = cur_nsub;
2195
2196  return tree;
2197}
2198
2199/* This function parse repetition operators like "*", "+", "{1,3}" etc.  */
2200
2201static bin_tree_t *
2202parse_dup_op (dup_elem, regexp, dfa, token, syntax, err)
2203     bin_tree_t *dup_elem;
2204     re_string_t *regexp;
2205     re_dfa_t *dfa;
2206     re_token_t *token;
2207     reg_syntax_t syntax;
2208     reg_errcode_t *err;
2209{
2210  re_token_t dup_token;
2211  bin_tree_t *tree = dup_elem, *work_tree;
2212  int new_idx, start_idx = re_string_cur_idx (regexp);
2213  re_token_t start_token;
2214  start_token = *token;
2215  if (token->type == OP_OPEN_DUP_NUM)
2216    {
2217      int i;
2218      int end = 0;
2219      int start = fetch_number (regexp, token, syntax);
2220      bin_tree_t *elem;
2221      if (start == -1)
2222	{
2223	  if (token->type == CHARACTER && token->opr.c == ',')
2224	    start = 0; /* We treat "{,m}" as "{0,m}".  */
2225	  else
2226	    {
2227	      *err = REG_BADBR; /* <re>{} is invalid.  */
2228	      return NULL;
2229	    }
2230	}
2231      if (BE (start != -2, 1))
2232	{
2233	  /* We treat "{n}" as "{n,n}".  */
2234	  end = ((token->type == OP_CLOSE_DUP_NUM) ? start
2235		 : ((token->type == CHARACTER && token->opr.c == ',')
2236		    ? fetch_number (regexp, token, syntax) : -2));
2237	}
2238      if (BE (start == -2 || end == -2, 0))
2239	{
2240	  /* Invalid sequence.  */
2241	  if (token->type == OP_CLOSE_DUP_NUM)
2242	    goto parse_dup_op_invalid_interval;
2243	  else
2244	    goto parse_dup_op_ebrace;
2245	}
2246      if (BE (start == 0 && end == 0, 0))
2247	{
2248	  /* We treat "<re>{0}" and "<re>{0,0}" as null string.  */
2249	  *token = fetch_token (regexp, syntax);
2250	  free_bin_tree (dup_elem);
2251	  return NULL;
2252	}
2253
2254      /* Extract "<re>{n,m}" to "<re><re>...<re><re>{0,<m-n>}".  */
2255      elem = tree;
2256      for (i = 0; i < start; ++i)
2257	if (i != 0)
2258	  {
2259	    work_tree = duplicate_tree (elem, dfa);
2260	    tree = create_tree (tree, work_tree, CONCAT, 0);
2261	    if (BE (work_tree == NULL || tree == NULL, 0))
2262	      goto parse_dup_op_espace;
2263	  }
2264
2265      if (end == -1)
2266	{
2267	  /* We treat "<re>{0,}" as "<re>*".  */
2268	  dup_token.type = OP_DUP_ASTERISK;
2269	  if (start > 0)
2270	    {
2271	      elem = duplicate_tree (elem, dfa);
2272	      new_idx = re_dfa_add_node (dfa, dup_token, 0);
2273	      work_tree = create_tree (elem, NULL, 0, new_idx);
2274	      tree = create_tree (tree, work_tree, CONCAT, 0);
2275	      if (BE (elem == NULL || new_idx == -1 || work_tree == NULL
2276		      || tree == NULL, 0))
2277		goto parse_dup_op_espace;
2278	    }
2279	  else
2280	    {
2281	      new_idx = re_dfa_add_node (dfa, dup_token, 0);
2282	      tree = create_tree (elem, NULL, 0, new_idx);
2283	      if (BE (new_idx == -1 || tree == NULL, 0))
2284		goto parse_dup_op_espace;
2285	    }
2286	}
2287      else if (end - start > 0)
2288	{
2289	  /* Then extract "<re>{0,m}" to "<re>?<re>?...<re>?".  */
2290	  dup_token.type = OP_DUP_QUESTION;
2291	  if (start > 0)
2292	    {
2293	      elem = duplicate_tree (elem, dfa);
2294	      new_idx = re_dfa_add_node (dfa, dup_token, 0);
2295	      elem = create_tree (elem, NULL, 0, new_idx);
2296	      tree = create_tree (tree, elem, CONCAT, 0);
2297	      if (BE (elem == NULL || new_idx == -1 || tree == NULL, 0))
2298		goto parse_dup_op_espace;
2299	    }
2300	  else
2301	    {
2302	      new_idx = re_dfa_add_node (dfa, dup_token, 0);
2303	      tree = elem = create_tree (elem, NULL, 0, new_idx);
2304	      if (BE (new_idx == -1 || tree == NULL, 0))
2305		goto parse_dup_op_espace;
2306	    }
2307	  for (i = 1; i < end - start; ++i)
2308	    {
2309	      work_tree = duplicate_tree (elem, dfa);
2310	      tree = create_tree (tree, work_tree, CONCAT, 0);
2311	      if (BE (work_tree == NULL || tree == NULL, 0))
2312		{
2313		  *err = REG_ESPACE;
2314		  return NULL;
2315		}
2316	    }
2317	}
2318    }
2319  else
2320    {
2321      new_idx = re_dfa_add_node (dfa, *token, 0);
2322      tree = create_tree (tree, NULL, 0, new_idx);
2323      if (BE (new_idx == -1 || tree == NULL, 0))
2324	{
2325	  *err = REG_ESPACE;
2326	  return NULL;
2327	}
2328    }
2329  *token = fetch_token (regexp, syntax);
2330  return tree;
2331
2332 parse_dup_op_espace:
2333  free_bin_tree (tree);
2334  *err = REG_ESPACE;
2335  return NULL;
2336
2337 parse_dup_op_ebrace:
2338  if (BE (!(syntax & RE_INVALID_INTERVAL_ORD), 0))
2339    {
2340      *err = REG_EBRACE;
2341      return NULL;
2342    }
2343  goto parse_dup_op_rollback;
2344 parse_dup_op_invalid_interval:
2345  if (BE (!(syntax & RE_INVALID_INTERVAL_ORD), 0))
2346    {
2347      *err = REG_BADBR;
2348      return NULL;
2349    }
2350 parse_dup_op_rollback:
2351  re_string_set_index (regexp, start_idx);
2352  *token = start_token;
2353  token->type = CHARACTER;
2354  return dup_elem;
2355}
2356
2357/* Size of the names for collating symbol/equivalence_class/character_class.
2358   I'm not sure, but maybe enough.  */
2359#define BRACKET_NAME_BUF_SIZE 32
2360
2361#ifndef _LIBC
2362  /* Local function for parse_bracket_exp only used in case of NOT _LIBC.
2363     Build the range expression which starts from START_ELEM, and ends
2364     at END_ELEM.  The result are written to MBCSET and SBCSET.
2365     RANGE_ALLOC is the allocated size of mbcset->range_starts, and
2366     mbcset->range_ends, is a pointer argument sinse we may
2367     update it.  */
2368
2369static reg_errcode_t
2370# ifdef RE_ENABLE_I18N
2371build_range_exp (sbcset, mbcset, range_alloc, start_elem, end_elem)
2372     re_charset_t *mbcset;
2373     int *range_alloc;
2374# else /* not RE_ENABLE_I18N */
2375build_range_exp (sbcset, start_elem, end_elem)
2376# endif /* not RE_ENABLE_I18N */
2377     re_bitset_ptr_t sbcset;
2378     bracket_elem_t *start_elem, *end_elem;
2379{
2380  unsigned int start_ch, end_ch;
2381  /* Equivalence Classes and Character Classes can't be a range start/end.  */
2382  if (BE (start_elem->type == EQUIV_CLASS || start_elem->type == CHAR_CLASS
2383	  || end_elem->type == EQUIV_CLASS || end_elem->type == CHAR_CLASS,
2384	  0))
2385    return REG_ERANGE;
2386
2387  /* We can handle no multi character collating elements without libc
2388     support.  */
2389  if (BE ((start_elem->type == COLL_SYM
2390	   && strlen ((char *) start_elem->opr.name) > 1)
2391	  || (end_elem->type == COLL_SYM
2392	      && strlen ((char *) end_elem->opr.name) > 1), 0))
2393    return REG_ECOLLATE;
2394
2395# ifdef RE_ENABLE_I18N
2396  {
2397    wchar_t wc, start_wc, end_wc;
2398    wchar_t cmp_buf[6] = {L'\0', L'\0', L'\0', L'\0', L'\0', L'\0'};
2399
2400    start_ch = ((start_elem->type == SB_CHAR) ? start_elem->opr.ch
2401		: ((start_elem->type == COLL_SYM) ? start_elem->opr.name[0]
2402		   : 0));
2403    end_ch = ((end_elem->type == SB_CHAR) ? end_elem->opr.ch
2404	      : ((end_elem->type == COLL_SYM) ? end_elem->opr.name[0]
2405		 : 0));
2406    start_wc = ((start_elem->type == SB_CHAR || start_elem->type == COLL_SYM)
2407		? __btowc (start_ch) : start_elem->opr.wch);
2408    end_wc = ((end_elem->type == SB_CHAR || end_elem->type == COLL_SYM)
2409	      ? __btowc (end_ch) : end_elem->opr.wch);
2410    cmp_buf[0] = start_wc;
2411    cmp_buf[4] = end_wc;
2412    if (wcscoll (cmp_buf, cmp_buf + 4) > 0)
2413      return REG_ERANGE;
2414
2415    /* Check the space of the arrays.  */
2416    if (*range_alloc == mbcset->nranges)
2417      {
2418	/* There are not enough space, need realloc.  */
2419	wchar_t *new_array_start, *new_array_end;
2420	int new_nranges;
2421
2422	/* +1 in case of mbcset->nranges is 0.  */
2423	new_nranges = 2 * mbcset->nranges + 1;
2424	/* Use realloc since mbcset->range_starts and mbcset->range_ends
2425	   are NULL if *range_alloc == 0.  */
2426	new_array_start = re_realloc (mbcset->range_starts, wchar_t,
2427				      new_nranges);
2428	new_array_end = re_realloc (mbcset->range_ends, wchar_t,
2429				    new_nranges);
2430
2431	if (BE (new_array_start == NULL || new_array_end == NULL, 0))
2432	  return REG_ESPACE;
2433
2434	mbcset->range_starts = new_array_start;
2435	mbcset->range_ends = new_array_end;
2436	*range_alloc = new_nranges;
2437      }
2438
2439    mbcset->range_starts[mbcset->nranges] = start_wc;
2440    mbcset->range_ends[mbcset->nranges++] = end_wc;
2441
2442    /* Build the table for single byte characters.  */
2443    for (wc = 0; wc <= SBC_MAX; ++wc)
2444      {
2445	cmp_buf[2] = wc;
2446	if (wcscoll (cmp_buf, cmp_buf + 2) <= 0
2447	    && wcscoll (cmp_buf + 2, cmp_buf + 4) <= 0)
2448	  bitset_set (sbcset, wc);
2449      }
2450  }
2451# else /* not RE_ENABLE_I18N */
2452  {
2453    unsigned int ch;
2454    start_ch = ((start_elem->type == SB_CHAR ) ? start_elem->opr.ch
2455		: ((start_elem->type == COLL_SYM) ? start_elem->opr.name[0]
2456		   : 0));
2457    end_ch = ((end_elem->type == SB_CHAR ) ? end_elem->opr.ch
2458	      : ((end_elem->type == COLL_SYM) ? end_elem->opr.name[0]
2459		 : 0));
2460    if (start_ch > end_ch)
2461      return REG_ERANGE;
2462    /* Build the table for single byte characters.  */
2463    for (ch = 0; ch <= SBC_MAX; ++ch)
2464      if (start_ch <= ch  && ch <= end_ch)
2465	bitset_set (sbcset, ch);
2466  }
2467# endif /* not RE_ENABLE_I18N */
2468  return REG_NOERROR;
2469}
2470#endif /* not _LIBC */
2471
2472#ifndef _LIBC
2473/* Helper function for parse_bracket_exp only used in case of NOT _LIBC..
2474   Build the collating element which is represented by NAME.
2475   The result are written to MBCSET and SBCSET.
2476   COLL_SYM_ALLOC is the allocated size of mbcset->coll_sym, is a
2477   pointer argument since we may update it.  */
2478
2479static reg_errcode_t
2480# ifdef RE_ENABLE_I18N
2481build_collating_symbol (sbcset, mbcset, coll_sym_alloc, name)
2482     re_charset_t *mbcset;
2483     int *coll_sym_alloc;
2484# else /* not RE_ENABLE_I18N */
2485build_collating_symbol (sbcset, name)
2486# endif /* not RE_ENABLE_I18N */
2487     re_bitset_ptr_t sbcset;
2488     const unsigned char *name;
2489{
2490  size_t name_len = strlen ((const char *) name);
2491  if (BE (name_len != 1, 0))
2492    return REG_ECOLLATE;
2493  else
2494    {
2495      bitset_set (sbcset, name[0]);
2496      return REG_NOERROR;
2497    }
2498}
2499#endif /* not _LIBC */
2500
2501/* This function parse bracket expression like "[abc]", "[a-c]",
2502   "[[.a-a.]]" etc.  */
2503
2504static bin_tree_t *
2505parse_bracket_exp (regexp, dfa, token, syntax, err)
2506     re_string_t *regexp;
2507     re_dfa_t *dfa;
2508     re_token_t *token;
2509     reg_syntax_t syntax;
2510     reg_errcode_t *err;
2511{
2512#ifdef _LIBC
2513  const unsigned char *collseqmb;
2514  const char *collseqwc;
2515  uint32_t nrules;
2516  int32_t table_size;
2517  const int32_t *symb_table;
2518  const unsigned char *extra;
2519
2520  /* Local function for parse_bracket_exp used in _LIBC environement.
2521     Seek the collating symbol entry correspondings to NAME.
2522     Return the index of the symbol in the SYMB_TABLE.  */
2523
2524  static inline int32_t
2525  seek_collating_symbol_entry (name, name_len)
2526	 const unsigned char *name;
2527	 size_t name_len;
2528    {
2529      int32_t hash = elem_hash ((const char *) name, name_len);
2530      int32_t elem = hash % table_size;
2531      int32_t second = hash % (table_size - 2);
2532      while (symb_table[2 * elem] != 0)
2533	{
2534	  /* First compare the hashing value.  */
2535	  if (symb_table[2 * elem] == hash
2536	      /* Compare the length of the name.  */
2537	      && name_len == extra[symb_table[2 * elem + 1]]
2538	      /* Compare the name.  */
2539	      && memcmp (name, &extra[symb_table[2 * elem + 1] + 1],
2540			 name_len) == 0)
2541	    {
2542	      /* Yep, this is the entry.  */
2543	      break;
2544	    }
2545
2546	  /* Next entry.  */
2547	  elem += second;
2548	}
2549      return elem;
2550    }
2551
2552  /* Local function for parse_bracket_exp used in _LIBC environement.
2553     Look up the collation sequence value of BR_ELEM.
2554     Return the value if succeeded, UINT_MAX otherwise.  */
2555
2556  static inline unsigned int
2557  lookup_collation_sequence_value (br_elem)
2558	 bracket_elem_t *br_elem;
2559    {
2560      if (br_elem->type == SB_CHAR)
2561	{
2562	  /*
2563	  if (re_mb_cur_max == 1)
2564	  */
2565	  if (nrules == 0)
2566	    return collseqmb[br_elem->opr.ch];
2567	  else
2568	    {
2569	      wint_t wc = __btowc (br_elem->opr.ch);
2570	      return collseq_table_lookup (collseqwc, wc);
2571	    }
2572	}
2573      else if (br_elem->type == MB_CHAR)
2574	{
2575	  return collseq_table_lookup (collseqwc, br_elem->opr.wch);
2576	}
2577      else if (br_elem->type == COLL_SYM)
2578	{
2579	  size_t sym_name_len = strlen ((char *) br_elem->opr.name);
2580	  if (nrules != 0)
2581	    {
2582	      int32_t elem, idx;
2583	      elem = seek_collating_symbol_entry (br_elem->opr.name,
2584						  sym_name_len);
2585	      if (symb_table[2 * elem] != 0)
2586		{
2587		  /* We found the entry.  */
2588		  idx = symb_table[2 * elem + 1];
2589		  /* Skip the name of collating element name.  */
2590		  idx += 1 + extra[idx];
2591		  /* Skip the byte sequence of the collating element.  */
2592		  idx += 1 + extra[idx];
2593		  /* Adjust for the alignment.  */
2594		  idx = (idx + 3) & ~3;
2595		  /* Skip the multibyte collation sequence value.  */
2596		  idx += sizeof (unsigned int);
2597		  /* Skip the wide char sequence of the collating element.  */
2598		  idx += sizeof (unsigned int) *
2599		    (1 + *(unsigned int *) (extra + idx));
2600		  /* Return the collation sequence value.  */
2601		  return *(unsigned int *) (extra + idx);
2602		}
2603	      else if (symb_table[2 * elem] == 0 && sym_name_len == 1)
2604		{
2605		  /* No valid character.  Match it as a single byte
2606		     character.  */
2607		  return collseqmb[br_elem->opr.name[0]];
2608		}
2609	    }
2610	  else if (sym_name_len == 1)
2611	    return collseqmb[br_elem->opr.name[0]];
2612	}
2613      return UINT_MAX;
2614    }
2615
2616  /* Local function for parse_bracket_exp used in _LIBC environement.
2617     Build the range expression which starts from START_ELEM, and ends
2618     at END_ELEM.  The result are written to MBCSET and SBCSET.
2619     RANGE_ALLOC is the allocated size of mbcset->range_starts, and
2620     mbcset->range_ends, is a pointer argument sinse we may
2621     update it.  */
2622
2623  static inline reg_errcode_t
2624# ifdef RE_ENABLE_I18N
2625  build_range_exp (sbcset, mbcset, range_alloc, start_elem, end_elem)
2626	 re_charset_t *mbcset;
2627	 int *range_alloc;
2628# else /* not RE_ENABLE_I18N */
2629  build_range_exp (sbcset, start_elem, end_elem)
2630# endif /* not RE_ENABLE_I18N */
2631	 re_bitset_ptr_t sbcset;
2632	 bracket_elem_t *start_elem, *end_elem;
2633    {
2634      unsigned int ch;
2635      uint32_t start_collseq;
2636      uint32_t end_collseq;
2637
2638# ifdef RE_ENABLE_I18N
2639      /* Check the space of the arrays.  */
2640      if (*range_alloc == mbcset->nranges)
2641	{
2642	  /* There are not enough space, need realloc.  */
2643	  uint32_t *new_array_start;
2644	  uint32_t *new_array_end;
2645	  int new_nranges;
2646
2647	  /* +1 in case of mbcset->nranges is 0.  */
2648	  new_nranges = 2 * mbcset->nranges + 1;
2649	  /* Use realloc since mbcset->range_starts and mbcset->range_ends
2650	     are NULL if *range_alloc == 0.  */
2651	  new_array_start = re_realloc (mbcset->range_starts, uint32_t,
2652					new_nranges);
2653	  new_array_end = re_realloc (mbcset->range_ends, uint32_t,
2654				      new_nranges);
2655
2656	  if (BE (new_array_start == NULL || new_array_end == NULL, 0))
2657	    return REG_ESPACE;
2658
2659	  mbcset->range_starts = new_array_start;
2660	  mbcset->range_ends = new_array_end;
2661	  *range_alloc = new_nranges;
2662	}
2663# endif /* RE_ENABLE_I18N */
2664
2665      /* Equivalence Classes and Character Classes can't be a range
2666	 start/end.  */
2667      if (BE (start_elem->type == EQUIV_CLASS || start_elem->type == CHAR_CLASS
2668	      || end_elem->type == EQUIV_CLASS || end_elem->type == CHAR_CLASS,
2669	      0))
2670	return REG_ERANGE;
2671
2672      start_collseq = lookup_collation_sequence_value (start_elem);
2673      end_collseq = lookup_collation_sequence_value (end_elem);
2674      /* Check start/end collation sequence values.  */
2675      if (BE (start_collseq == UINT_MAX || end_collseq == UINT_MAX, 0))
2676	return REG_ECOLLATE;
2677      if (BE ((syntax & RE_NO_EMPTY_RANGES) && start_collseq > end_collseq, 0))
2678	return REG_ERANGE;
2679
2680# ifdef RE_ENABLE_I18N
2681      /* Got valid collation sequence values, add them as a new entry.  */
2682      mbcset->range_starts[mbcset->nranges] = start_collseq;
2683      mbcset->range_ends[mbcset->nranges++] = end_collseq;
2684# endif /* RE_ENABLE_I18N */
2685
2686      /* Build the table for single byte characters.  */
2687      for (ch = 0; ch <= SBC_MAX; ch++)
2688	{
2689	  uint32_t ch_collseq;
2690	  /*
2691	  if (re_mb_cur_max == 1)
2692	  */
2693	  if (nrules == 0)
2694	    ch_collseq = collseqmb[ch];
2695	  else
2696	    ch_collseq = collseq_table_lookup (collseqwc, __btowc (ch));
2697	  if (start_collseq <= ch_collseq && ch_collseq <= end_collseq)
2698	    bitset_set (sbcset, ch);
2699	}
2700      return REG_NOERROR;
2701    }
2702
2703  /* Local function for parse_bracket_exp used in _LIBC environement.
2704     Build the collating element which is represented by NAME.
2705     The result are written to MBCSET and SBCSET.
2706     COLL_SYM_ALLOC is the allocated size of mbcset->coll_sym, is a
2707     pointer argument sinse we may update it.  */
2708
2709  static inline reg_errcode_t
2710# ifdef RE_ENABLE_I18N
2711  build_collating_symbol (sbcset, mbcset, coll_sym_alloc, name)
2712	 re_charset_t *mbcset;
2713	 int *coll_sym_alloc;
2714# else /* not RE_ENABLE_I18N */
2715  build_collating_symbol (sbcset, name)
2716# endif /* not RE_ENABLE_I18N */
2717	 re_bitset_ptr_t sbcset;
2718	 const unsigned char *name;
2719    {
2720      int32_t elem, idx;
2721      size_t name_len = strlen ((const char *) name);
2722      if (nrules != 0)
2723	{
2724	  elem = seek_collating_symbol_entry (name, name_len);
2725	  if (symb_table[2 * elem] != 0)
2726	    {
2727	      /* We found the entry.  */
2728	      idx = symb_table[2 * elem + 1];
2729	      /* Skip the name of collating element name.  */
2730	      idx += 1 + extra[idx];
2731	    }
2732	  else if (symb_table[2 * elem] == 0 && name_len == 1)
2733	    {
2734	      /* No valid character, treat it as a normal
2735		 character.  */
2736	      bitset_set (sbcset, name[0]);
2737	      return REG_NOERROR;
2738	    }
2739	  else
2740	    return REG_ECOLLATE;
2741
2742# ifdef RE_ENABLE_I18N
2743	  /* Got valid collation sequence, add it as a new entry.  */
2744	  /* Check the space of the arrays.  */
2745	  if (*coll_sym_alloc == mbcset->ncoll_syms)
2746	    {
2747	      /* Not enough, realloc it.  */
2748	      /* +1 in case of mbcset->ncoll_syms is 0.  */
2749	      *coll_sym_alloc = 2 * mbcset->ncoll_syms + 1;
2750	      /* Use realloc since mbcset->coll_syms is NULL
2751		 if *alloc == 0.  */
2752	      mbcset->coll_syms = re_realloc (mbcset->coll_syms, int32_t,
2753					      *coll_sym_alloc);
2754	      if (BE (mbcset->coll_syms == NULL, 0))
2755		return REG_ESPACE;
2756	    }
2757	  mbcset->coll_syms[mbcset->ncoll_syms++] = idx;
2758# endif /* RE_ENABLE_I18N */
2759	  return REG_NOERROR;
2760	}
2761      else
2762	{
2763	  if (BE (name_len != 1, 0))
2764	    return REG_ECOLLATE;
2765	  else
2766	    {
2767	      bitset_set (sbcset, name[0]);
2768	      return REG_NOERROR;
2769	    }
2770	}
2771    }
2772#endif
2773
2774  re_token_t br_token;
2775  re_bitset_ptr_t sbcset;
2776#ifdef RE_ENABLE_I18N
2777  re_charset_t *mbcset;
2778  int coll_sym_alloc = 0, range_alloc = 0, mbchar_alloc = 0;
2779  int equiv_class_alloc = 0, char_class_alloc = 0;
2780#else /* not RE_ENABLE_I18N */
2781  int non_match = 0;
2782#endif /* not RE_ENABLE_I18N */
2783  bin_tree_t *work_tree;
2784  int token_len, new_idx;
2785#ifdef _LIBC
2786  collseqmb = (const unsigned char *)
2787    _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQMB);
2788  nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
2789  if (nrules)
2790    {
2791      /*
2792      if (re_mb_cur_max > 1)
2793      */
2794	collseqwc = _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQWC);
2795      table_size = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_SYMB_HASH_SIZEMB);
2796      symb_table = (const int32_t *) _NL_CURRENT (LC_COLLATE,
2797						  _NL_COLLATE_SYMB_TABLEMB);
2798      extra = (const unsigned char *) _NL_CURRENT (LC_COLLATE,
2799						   _NL_COLLATE_SYMB_EXTRAMB);
2800    }
2801#endif
2802  sbcset = (re_bitset_ptr_t) calloc (sizeof (unsigned int), BITSET_UINTS);
2803#ifdef RE_ENABLE_I18N
2804  mbcset = (re_charset_t *) calloc (sizeof (re_charset_t), 1);
2805#endif /* RE_ENABLE_I18N */
2806#ifdef RE_ENABLE_I18N
2807  if (BE (sbcset == NULL || mbcset == NULL, 0))
2808#else
2809  if (BE (sbcset == NULL, 0))
2810#endif /* RE_ENABLE_I18N */
2811    {
2812      *err = REG_ESPACE;
2813      return NULL;
2814    }
2815
2816  token_len = peek_token_bracket (token, regexp, syntax);
2817  if (BE (token->type == END_OF_RE, 0))
2818    {
2819      *err = REG_BADPAT;
2820      goto parse_bracket_exp_free_return;
2821    }
2822  if (token->type == OP_NON_MATCH_LIST)
2823    {
2824#ifdef RE_ENABLE_I18N
2825      int i;
2826      mbcset->non_match = 1;
2827#else /* not RE_ENABLE_I18N */
2828      non_match = 1;
2829#endif /* not RE_ENABLE_I18N */
2830      if (syntax & RE_HAT_LISTS_NOT_NEWLINE)
2831	bitset_set (sbcset, '\0');
2832      re_string_skip_bytes (regexp, token_len); /* Skip a token.  */
2833      token_len = peek_token_bracket (token, regexp, syntax);
2834      if (BE (token->type == END_OF_RE, 0))
2835	{
2836	  *err = REG_BADPAT;
2837	  goto parse_bracket_exp_free_return;
2838	}
2839#ifdef RE_ENABLE_I18N
2840      if (re_mb_cur_max > 1)
2841	for (i = 0; i < SBC_MAX; ++i)
2842	  if (__btowc (i) == WEOF)
2843	    bitset_set (sbcset, i);
2844#endif /* RE_ENABLE_I18N */
2845    }
2846
2847  /* We treat the first ']' as a normal character.  */
2848  if (token->type == OP_CLOSE_BRACKET)
2849    token->type = CHARACTER;
2850
2851  while (1)
2852    {
2853      bracket_elem_t start_elem, end_elem;
2854      unsigned char start_name_buf[BRACKET_NAME_BUF_SIZE];
2855      unsigned char end_name_buf[BRACKET_NAME_BUF_SIZE];
2856      reg_errcode_t ret;
2857      int token_len2 = 0, is_range_exp = 0;
2858      re_token_t token2;
2859
2860      start_elem.opr.name = start_name_buf;
2861      ret = parse_bracket_element (&start_elem, regexp, token, token_len, dfa,
2862				   syntax);
2863      if (BE (ret != REG_NOERROR, 0))
2864	{
2865	  *err = ret;
2866	  goto parse_bracket_exp_free_return;
2867	}
2868
2869      token_len = peek_token_bracket (token, regexp, syntax);
2870      if (BE (token->type == END_OF_RE, 0))
2871	{
2872	  *err = REG_BADPAT;
2873	  goto parse_bracket_exp_free_return;
2874	}
2875      if (token->type == OP_CHARSET_RANGE)
2876	{
2877	  re_string_skip_bytes (regexp, token_len); /* Skip '-'.  */
2878	  token_len2 = peek_token_bracket (&token2, regexp, syntax);
2879	  if (BE (token->type == END_OF_RE, 0))
2880	    {
2881	      *err = REG_BADPAT;
2882	      goto parse_bracket_exp_free_return;
2883	    }
2884	  if (token2.type == OP_CLOSE_BRACKET)
2885	    {
2886	      /* We treat the last '-' as a normal character.  */
2887	      re_string_skip_bytes (regexp, -token_len);
2888	      token->type = CHARACTER;
2889	    }
2890	  else
2891	    is_range_exp = 1;
2892	}
2893
2894      if (is_range_exp == 1)
2895	{
2896	  end_elem.opr.name = end_name_buf;
2897	  ret = parse_bracket_element (&end_elem, regexp, &token2, token_len2,
2898				       dfa, syntax);
2899	  if (BE (ret != REG_NOERROR, 0))
2900	    {
2901	      *err = ret;
2902	      goto parse_bracket_exp_free_return;
2903	    }
2904
2905	  token_len = peek_token_bracket (token, regexp, syntax);
2906	  if (BE (token->type == END_OF_RE, 0))
2907	    {
2908	      *err = REG_BADPAT;
2909	      goto parse_bracket_exp_free_return;
2910	    }
2911	  *err = build_range_exp (sbcset,
2912#ifdef RE_ENABLE_I18N
2913				  mbcset, &range_alloc,
2914#endif /* RE_ENABLE_I18N */
2915				  &start_elem, &end_elem);
2916	  if (BE (*err != REG_NOERROR, 0))
2917	    goto parse_bracket_exp_free_return;
2918	}
2919      else
2920	{
2921	  switch (start_elem.type)
2922	    {
2923	    case SB_CHAR:
2924	      bitset_set (sbcset, start_elem.opr.ch);
2925	      break;
2926#ifdef RE_ENABLE_I18N
2927	    case MB_CHAR:
2928	      /* Check whether the array has enough space.  */
2929	      if (mbchar_alloc == mbcset->nmbchars)
2930		{
2931		  /* Not enough, realloc it.  */
2932		  /* +1 in case of mbcset->nmbchars is 0.  */
2933		  mbchar_alloc = 2 * mbcset->nmbchars + 1;
2934		  /* Use realloc since array is NULL if *alloc == 0.  */
2935		  mbcset->mbchars = re_realloc (mbcset->mbchars, wchar_t,
2936						mbchar_alloc);
2937		  if (BE (mbcset->mbchars == NULL, 0))
2938		    goto parse_bracket_exp_espace;
2939		}
2940	      mbcset->mbchars[mbcset->nmbchars++] = start_elem.opr.wch;
2941	      break;
2942#endif /* RE_ENABLE_I18N */
2943	    case EQUIV_CLASS:
2944	      *err = build_equiv_class (sbcset,
2945#ifdef RE_ENABLE_I18N
2946					mbcset, &equiv_class_alloc,
2947#endif /* RE_ENABLE_I18N */
2948					start_elem.opr.name);
2949	      if (BE (*err != REG_NOERROR, 0))
2950		goto parse_bracket_exp_free_return;
2951	      break;
2952	    case COLL_SYM:
2953	      *err = build_collating_symbol (sbcset,
2954#ifdef RE_ENABLE_I18N
2955					     mbcset, &coll_sym_alloc,
2956#endif /* RE_ENABLE_I18N */
2957					     start_elem.opr.name);
2958	      if (BE (*err != REG_NOERROR, 0))
2959		goto parse_bracket_exp_free_return;
2960	      break;
2961	    case CHAR_CLASS:
2962              ret = build_charclass (regexp->trans, sbcset,
2963#ifdef RE_ENABLE_I18N
2964				      mbcset, &char_class_alloc,
2965#endif /* RE_ENABLE_I18N */
2966				      start_elem.opr.name, syntax);
2967              if (BE (ret != REG_NOERROR, 0))
2968               goto parse_bracket_exp_espace;
2969	      break;
2970	    default:
2971	      assert (0);
2972	      break;
2973	    }
2974	}
2975      if (token->type == OP_CLOSE_BRACKET)
2976	break;
2977    }
2978
2979  re_string_skip_bytes (regexp, token_len); /* Skip a token.  */
2980
2981  /* If it is non-matching list.  */
2982#ifdef RE_ENABLE_I18N
2983  if (mbcset->non_match)
2984#else /* not RE_ENABLE_I18N */
2985  if (non_match)
2986#endif /* not RE_ENABLE_I18N */
2987    bitset_not (sbcset);
2988
2989  /* Build a tree for simple bracket.  */
2990  br_token.type = SIMPLE_BRACKET;
2991  br_token.opr.sbcset = sbcset;
2992  new_idx = re_dfa_add_node (dfa, br_token, 0);
2993  work_tree = create_tree (NULL, NULL, 0, new_idx);
2994  if (BE (new_idx == -1 || work_tree == NULL, 0))
2995    goto parse_bracket_exp_espace;
2996
2997#ifdef RE_ENABLE_I18N
2998  if (mbcset->nmbchars || mbcset->ncoll_syms || mbcset->nequiv_classes
2999      || mbcset->nranges || (re_mb_cur_max > 1 && (mbcset->nchar_classes
3000						|| mbcset->non_match)))
3001    {
3002      re_token_t alt_token;
3003      bin_tree_t *mbc_tree;
3004      /* Build a tree for complex bracket.  */
3005      br_token.type = COMPLEX_BRACKET;
3006      br_token.opr.mbcset = mbcset;
3007      dfa->has_mb_node = 1;
3008      new_idx = re_dfa_add_node (dfa, br_token, 0);
3009      mbc_tree = create_tree (NULL, NULL, 0, new_idx);
3010      if (BE (new_idx == -1 || mbc_tree == NULL, 0))
3011	goto parse_bracket_exp_espace;
3012      /* Then join them by ALT node.  */
3013      dfa->has_plural_match = 1;
3014      alt_token.type = OP_ALT;
3015      new_idx = re_dfa_add_node (dfa, alt_token, 0);
3016      work_tree = create_tree (work_tree, mbc_tree, 0, new_idx);
3017      if (BE (new_idx != -1 && mbc_tree != NULL, 1))
3018	return work_tree;
3019    }
3020  else
3021    {
3022      free_charset (mbcset);
3023      return work_tree;
3024    }
3025#else /* not RE_ENABLE_I18N */
3026  return work_tree;
3027#endif /* not RE_ENABLE_I18N */
3028
3029 parse_bracket_exp_espace:
3030  *err = REG_ESPACE;
3031 parse_bracket_exp_free_return:
3032  re_free (sbcset);
3033#ifdef RE_ENABLE_I18N
3034  free_charset (mbcset);
3035#endif /* RE_ENABLE_I18N */
3036  return NULL;
3037}
3038
3039/* Parse an element in the bracket expression.  */
3040
3041static reg_errcode_t
3042parse_bracket_element (elem, regexp, token, token_len, dfa, syntax)
3043     bracket_elem_t *elem;
3044     re_string_t *regexp;
3045     re_token_t *token;
3046     int token_len;
3047     re_dfa_t *dfa;
3048     reg_syntax_t syntax;
3049{
3050#ifdef RE_ENABLE_I18N
3051  int cur_char_size;
3052  cur_char_size = re_string_char_size_at (regexp, re_string_cur_idx (regexp));
3053  if (cur_char_size > 1)
3054    {
3055      elem->type = MB_CHAR;
3056      elem->opr.wch = re_string_wchar_at (regexp, re_string_cur_idx (regexp));
3057      re_string_skip_bytes (regexp, cur_char_size);
3058      return REG_NOERROR;
3059    }
3060#endif /* RE_ENABLE_I18N */
3061  re_string_skip_bytes (regexp, token_len); /* Skip a token.  */
3062  if (token->type == OP_OPEN_COLL_ELEM || token->type == OP_OPEN_CHAR_CLASS
3063      || token->type == OP_OPEN_EQUIV_CLASS)
3064    return parse_bracket_symbol (elem, regexp, token);
3065  elem->type = SB_CHAR;
3066  elem->opr.ch = token->opr.c;
3067  return REG_NOERROR;
3068}
3069
3070/* Parse a bracket symbol in the bracket expression.  Bracket symbols are
3071   such as [:<character_class>:], [.<collating_element>.], and
3072   [=<equivalent_class>=].  */
3073
3074static reg_errcode_t
3075parse_bracket_symbol (elem, regexp, token)
3076     bracket_elem_t *elem;
3077     re_string_t *regexp;
3078     re_token_t *token;
3079{
3080  unsigned char ch, delim = token->opr.c;
3081  int i = 0;
3082  for (;; ++i)
3083    {
3084      if (re_string_eoi(regexp) || i >= BRACKET_NAME_BUF_SIZE)
3085	return REG_EBRACK;
3086      if (token->type == OP_OPEN_CHAR_CLASS)
3087	ch = re_string_fetch_byte_case (regexp);
3088      else
3089	ch = re_string_fetch_byte (regexp);
3090      if (ch == delim && re_string_peek_byte (regexp, 0) == ']')
3091	break;
3092      elem->opr.name[i] = ch;
3093    }
3094  re_string_skip_bytes (regexp, 1);
3095  elem->opr.name[i] = '\0';
3096  switch (token->type)
3097    {
3098    case OP_OPEN_COLL_ELEM:
3099      elem->type = COLL_SYM;
3100      break;
3101    case OP_OPEN_EQUIV_CLASS:
3102      elem->type = EQUIV_CLASS;
3103      break;
3104    case OP_OPEN_CHAR_CLASS:
3105      elem->type = CHAR_CLASS;
3106      break;
3107    default:
3108      break;
3109    }
3110  return REG_NOERROR;
3111}
3112
3113  /* Helper function for parse_bracket_exp.
3114     Build the equivalence class which is represented by NAME.
3115     The result are written to MBCSET and SBCSET.
3116     EQUIV_CLASS_ALLOC is the allocated size of mbcset->equiv_classes,
3117     is a pointer argument sinse we may update it.  */
3118
3119static reg_errcode_t
3120#ifdef RE_ENABLE_I18N
3121build_equiv_class (sbcset, mbcset, equiv_class_alloc, name)
3122     re_charset_t *mbcset;
3123     int *equiv_class_alloc;
3124#else /* not RE_ENABLE_I18N */
3125build_equiv_class (sbcset, name)
3126#endif /* not RE_ENABLE_I18N */
3127     re_bitset_ptr_t sbcset;
3128     const unsigned char *name;
3129{
3130#if defined _LIBC && defined RE_ENABLE_I18N
3131  uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
3132  if (nrules != 0)
3133    {
3134      const int32_t *table, *indirect;
3135      const unsigned char *weights, *extra, *cp;
3136      unsigned char char_buf[2];
3137      int32_t idx1, idx2;
3138      unsigned int ch;
3139      size_t len;
3140      /* This #include defines a local function!  */
3141# include <locale/weight.h>
3142      /* Calculate the index for equivalence class.  */
3143      cp = name;
3144      table = (const int32_t *) _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
3145      weights = (const unsigned char *) _NL_CURRENT (LC_COLLATE,
3146					       _NL_COLLATE_WEIGHTMB);
3147      extra = (const unsigned char *) _NL_CURRENT (LC_COLLATE,
3148						   _NL_COLLATE_EXTRAMB);
3149      indirect = (const int32_t *) _NL_CURRENT (LC_COLLATE,
3150						_NL_COLLATE_INDIRECTMB);
3151      idx1 = findidx (&cp);
3152      if (BE (idx1 == 0 || cp < name + strlen ((const char *) name), 0))
3153	/* This isn't a valid character.  */
3154	return REG_ECOLLATE;
3155
3156      /* Build single byte matcing table for this equivalence class.  */
3157      char_buf[1] = (unsigned char) '\0';
3158      len = weights[idx1];
3159      for (ch = 0; ch < SBC_MAX; ++ch)
3160	{
3161	  char_buf[0] = ch;
3162	  cp = char_buf;
3163	  idx2 = findidx (&cp);
3164/*
3165	  idx2 = table[ch];
3166*/
3167	  if (idx2 == 0)
3168	    /* This isn't a valid character.  */
3169	    continue;
3170	  if (len == weights[idx2])
3171	    {
3172	      int cnt = 0;
3173	      while (cnt <= len &&
3174		     weights[idx1 + 1 + cnt] == weights[idx2 + 1 + cnt])
3175		++cnt;
3176
3177	      if (cnt > len)
3178		bitset_set (sbcset, ch);
3179	    }
3180	}
3181      /* Check whether the array has enough space.  */
3182      if (*equiv_class_alloc == mbcset->nequiv_classes)
3183	{
3184	  /* Not enough, realloc it.  */
3185	  /* +1 in case of mbcset->nequiv_classes is 0.  */
3186	  *equiv_class_alloc = 2 * mbcset->nequiv_classes + 1;
3187	  /* Use realloc since the array is NULL if *alloc == 0.  */
3188	  mbcset->equiv_classes = re_realloc (mbcset->equiv_classes, int32_t,
3189					      *equiv_class_alloc);
3190	  if (BE (mbcset->equiv_classes == NULL, 0))
3191	    return REG_ESPACE;
3192	}
3193      mbcset->equiv_classes[mbcset->nequiv_classes++] = idx1;
3194    }
3195  else
3196#endif /* _LIBC && RE_ENABLE_I18N */
3197    {
3198      if (BE (strlen ((const char *) name) != 1, 0))
3199	return REG_ECOLLATE;
3200      bitset_set (sbcset, *name);
3201    }
3202  return REG_NOERROR;
3203}
3204
3205  /* Helper function for parse_bracket_exp.
3206     Build the character class which is represented by NAME.
3207     The result are written to MBCSET and SBCSET.
3208     CHAR_CLASS_ALLOC is the allocated size of mbcset->char_classes,
3209     is a pointer argument sinse we may update it.  */
3210
3211static reg_errcode_t
3212#ifdef RE_ENABLE_I18N
3213build_charclass (trans, sbcset, mbcset, char_class_alloc, class_name, syntax)
3214     re_charset_t *mbcset;
3215     int *char_class_alloc;
3216#else /* not RE_ENABLE_I18N */
3217build_charclass (trans, sbcset, class_name, syntax)
3218#endif /* not RE_ENABLE_I18N */
3219     RE_TRANSLATE_TYPE trans;
3220     re_bitset_ptr_t sbcset;
3221     const unsigned char *class_name;
3222     reg_syntax_t syntax;
3223{
3224  int i;
3225  const char *name = (const char *) class_name;
3226
3227  /* In case of REG_ICASE "upper" and "lower" match the both of
3228     upper and lower cases.  */
3229  if ((syntax & RE_ICASE)
3230      && (strcmp (name, "upper") == 0 || strcmp (name, "lower") == 0))
3231    name = "alpha";
3232
3233#ifdef RE_ENABLE_I18N
3234  /* Check the space of the arrays.  */
3235  if (*char_class_alloc == mbcset->nchar_classes)
3236    {
3237      /* Not enough, realloc it.  */
3238      /* +1 in case of mbcset->nchar_classes is 0.  */
3239      *char_class_alloc = 2 * mbcset->nchar_classes + 1;
3240      /* Use realloc since array is NULL if *alloc == 0.  */
3241      mbcset->char_classes = re_realloc (mbcset->char_classes, wctype_t,
3242					 *char_class_alloc);
3243      if (BE (mbcset->char_classes == NULL, 0))
3244	return REG_ESPACE;
3245    }
3246  mbcset->char_classes[mbcset->nchar_classes++] = __wctype (name);
3247#endif /* RE_ENABLE_I18N */
3248
3249#define BUILD_CHARCLASS_LOOP(ctype_func)\
3250    for (i = 0; i < SBC_MAX; ++i)	\
3251      {					\
3252	if (ctype_func (i))		\
3253	  {					\
3254	    int ch = trans ? trans[i] : i;	\
3255	    bitset_set (sbcset, ch);		\
3256	  }					\
3257      }
3258
3259  if (strcmp (name, "alnum") == 0)
3260    BUILD_CHARCLASS_LOOP (isalnum)
3261  else if (strcmp (name, "cntrl") == 0)
3262    BUILD_CHARCLASS_LOOP (iscntrl)
3263  else if (strcmp (name, "lower") == 0)
3264    BUILD_CHARCLASS_LOOP (islower)
3265  else if (strcmp (name, "space") == 0)
3266    BUILD_CHARCLASS_LOOP (isspace)
3267  else if (strcmp (name, "alpha") == 0)
3268    BUILD_CHARCLASS_LOOP (isalpha)
3269  else if (strcmp (name, "digit") == 0)
3270    BUILD_CHARCLASS_LOOP (isdigit)
3271  else if (strcmp (name, "print") == 0)
3272    BUILD_CHARCLASS_LOOP (isprint)
3273  else if (strcmp (name, "upper") == 0)
3274    BUILD_CHARCLASS_LOOP (isupper)
3275  else if (strcmp (name, "blank") == 0)
3276    BUILD_CHARCLASS_LOOP (isblank)
3277  else if (strcmp (name, "graph") == 0)
3278    BUILD_CHARCLASS_LOOP (isgraph)
3279  else if (strcmp (name, "punct") == 0)
3280    BUILD_CHARCLASS_LOOP (ispunct)
3281  else if (strcmp (name, "xdigit") == 0)
3282    BUILD_CHARCLASS_LOOP (isxdigit)
3283  else
3284    return REG_ECTYPE;
3285
3286  return REG_NOERROR;
3287}
3288
3289static bin_tree_t *
3290build_word_op (dfa, trans, not, err)
3291     re_dfa_t *dfa;
3292     RE_TRANSLATE_TYPE trans;
3293     int not;
3294     reg_errcode_t *err;
3295{
3296  re_bitset_ptr_t sbcset;
3297#ifdef RE_ENABLE_I18N
3298  re_charset_t *mbcset;
3299  int alloc = 0;
3300#else /* not RE_ENABLE_I18N */
3301  int non_match = 0;
3302#endif /* not RE_ENABLE_I18N */
3303  reg_errcode_t ret;
3304  re_token_t br_token;
3305  bin_tree_t *tree;
3306  int new_idx;
3307
3308  sbcset = (re_bitset_ptr_t) calloc (sizeof (unsigned int), BITSET_UINTS);
3309#ifdef RE_ENABLE_I18N
3310  mbcset = (re_charset_t *) calloc (sizeof (re_charset_t), 1);
3311#endif /* RE_ENABLE_I18N */
3312
3313#ifdef RE_ENABLE_I18N
3314  if (BE (sbcset == NULL || mbcset == NULL, 0))
3315#else /* not RE_ENABLE_I18N */
3316  if (BE (sbcset == NULL, 0))
3317#endif /* not RE_ENABLE_I18N */
3318    {
3319      *err = REG_ESPACE;
3320      return NULL;
3321    }
3322
3323  if (not)
3324    {
3325#ifdef RE_ENABLE_I18N
3326      int i;
3327      /*
3328      if (syntax & RE_HAT_LISTS_NOT_NEWLINE)
3329	bitset_set(cset->sbcset, '\0');
3330      */
3331      mbcset->non_match = 1;
3332      if (re_mb_cur_max > 1)
3333	for (i = 0; i < SBC_MAX; ++i)
3334	  if (__btowc (i) == WEOF)
3335	    bitset_set (sbcset, i);
3336#else /* not RE_ENABLE_I18N */
3337      non_match = 1;
3338#endif /* not RE_ENABLE_I18N */
3339    }
3340
3341  /* We don't care the syntax in this case.  */
3342  ret = build_charclass (trans, sbcset,
3343#ifdef RE_ENABLE_I18N
3344			 mbcset, &alloc,
3345#endif /* RE_ENABLE_I18N */
3346			 (const unsigned char *) "alpha", 0);
3347
3348  if (BE (ret != REG_NOERROR, 0))
3349    {
3350      re_free (sbcset);
3351#ifdef RE_ENABLE_I18N
3352      free_charset (mbcset);
3353#endif /* RE_ENABLE_I18N */
3354      *err = ret;
3355      return NULL;
3356    }
3357  /* \w match '_' also.  */
3358  bitset_set (sbcset, '_');
3359
3360  /* If it is non-matching list.  */
3361#ifdef RE_ENABLE_I18N
3362  if (mbcset->non_match)
3363#else /* not RE_ENABLE_I18N */
3364  if (non_match)
3365#endif /* not RE_ENABLE_I18N */
3366    bitset_not (sbcset);
3367
3368  /* Build a tree for simple bracket.  */
3369  br_token.type = SIMPLE_BRACKET;
3370  br_token.opr.sbcset = sbcset;
3371  new_idx = re_dfa_add_node (dfa, br_token, 0);
3372  tree = create_tree (NULL, NULL, 0, new_idx);
3373  if (BE (new_idx == -1 || tree == NULL, 0))
3374    goto build_word_op_espace;
3375
3376#ifdef RE_ENABLE_I18N
3377  if (re_mb_cur_max > 1)
3378    {
3379      re_token_t alt_token;
3380      bin_tree_t *mbc_tree;
3381      /* Build a tree for complex bracket.  */
3382      br_token.type = COMPLEX_BRACKET;
3383      br_token.opr.mbcset = mbcset;
3384      dfa->has_mb_node = 1;
3385      new_idx = re_dfa_add_node (dfa, br_token, 0);
3386      mbc_tree = create_tree (NULL, NULL, 0, new_idx);
3387      if (BE (new_idx == -1 || mbc_tree == NULL, 0))
3388	goto build_word_op_espace;
3389      /* Then join them by ALT node.  */
3390      alt_token.type = OP_ALT;
3391      new_idx = re_dfa_add_node (dfa, alt_token, 0);
3392      tree = create_tree (tree, mbc_tree, 0, new_idx);
3393      if (BE (new_idx != -1 && mbc_tree != NULL, 1))
3394	return tree;
3395    }
3396  else
3397    {
3398      free_charset (mbcset);
3399      return tree;
3400    }
3401#else /* not RE_ENABLE_I18N */
3402  return tree;
3403#endif /* not RE_ENABLE_I18N */
3404
3405 build_word_op_espace:
3406  re_free (sbcset);
3407#ifdef RE_ENABLE_I18N
3408  free_charset (mbcset);
3409#endif /* RE_ENABLE_I18N */
3410  *err = REG_ESPACE;
3411  return NULL;
3412}
3413
3414/* This is intended for the expressions like "a{1,3}".
3415   Fetch a number from `input', and return the number.
3416   Return -1, if the number field is empty like "{,1}".
3417   Return -2, If an error is occured.  */
3418
3419static int
3420fetch_number (input, token, syntax)
3421     re_string_t *input;
3422     re_token_t *token;
3423     reg_syntax_t syntax;
3424{
3425  int num = -1;
3426  unsigned char c;
3427  while (1)
3428    {
3429      *token = fetch_token (input, syntax);
3430      c = token->opr.c;
3431      if (BE (token->type == END_OF_RE, 0))
3432	return -2;
3433      if (token->type == OP_CLOSE_DUP_NUM || c == ',')
3434	break;
3435      num = ((token->type != CHARACTER || c < '0' || '9' < c || num == -2)
3436	     ? -2 : ((num == -1) ? c - '0' : num * 10 + c - '0'));
3437      num = (num > RE_DUP_MAX) ? -2 : num;
3438    }
3439  return num;
3440}
3441
3442#ifdef RE_ENABLE_I18N
3443static void
3444free_charset (re_charset_t *cset)
3445{
3446  re_free (cset->mbchars);
3447# ifdef _LIBC
3448  re_free (cset->coll_syms);
3449  re_free (cset->equiv_classes);
3450  re_free (cset->range_starts);
3451  re_free (cset->range_ends);
3452# endif
3453  re_free (cset->char_classes);
3454  re_free (cset);
3455}
3456#endif /* RE_ENABLE_I18N */
3457
3458/* Functions for binary tree operation.  */
3459
3460/* Create a node of tree.
3461   Note: This function automatically free left and right if malloc fails.  */
3462
3463static bin_tree_t *
3464create_tree (left, right, type, index)
3465     bin_tree_t *left;
3466     bin_tree_t *right;
3467     re_token_type_t type;
3468     int index;
3469{
3470  bin_tree_t *tree;
3471  tree = re_malloc (bin_tree_t, 1);
3472  if (BE (tree == NULL, 0))
3473    {
3474      free_bin_tree (left);
3475      free_bin_tree (right);
3476      return NULL;
3477    }
3478  tree->parent = NULL;
3479  tree->left = left;
3480  tree->right = right;
3481  tree->type = type;
3482  tree->node_idx = index;
3483  tree->first = -1;
3484  tree->next = -1;
3485  re_node_set_init_empty (&tree->eclosure);
3486
3487  if (left != NULL)
3488    left->parent = tree;
3489  if (right != NULL)
3490    right->parent = tree;
3491  return tree;
3492}
3493
3494/* Free the sub tree pointed by TREE.  */
3495
3496static void
3497free_bin_tree (tree)
3498     bin_tree_t *tree;
3499{
3500  if (tree == NULL)
3501    return;
3502  /*re_node_set_free (&tree->eclosure);*/
3503  free_bin_tree (tree->left);
3504  free_bin_tree (tree->right);
3505  re_free (tree);
3506}
3507
3508/* Duplicate the node SRC, and return new node.  */
3509
3510static bin_tree_t *
3511duplicate_tree (src, dfa)
3512     const bin_tree_t *src;
3513     re_dfa_t *dfa;
3514{
3515  bin_tree_t *left = NULL, *right = NULL, *new_tree;
3516  int new_node_idx;
3517  /* Since node indies must be according to Post-order of the tree,
3518     we must duplicate the left at first.  */
3519  if (src->left != NULL)
3520    {
3521      left = duplicate_tree (src->left, dfa);
3522      if (left == NULL)
3523	return NULL;
3524    }
3525
3526  /* Secondaly, duplicate the right.  */
3527  if (src->right != NULL)
3528    {
3529      right = duplicate_tree (src->right, dfa);
3530      if (right == NULL)
3531	{
3532	  free_bin_tree (left);
3533	  return NULL;
3534	}
3535    }
3536
3537  /* At last, duplicate itself.  */
3538  if (src->type == NON_TYPE)
3539    {
3540      new_node_idx = re_dfa_add_node (dfa, dfa->nodes[src->node_idx], 0);
3541      dfa->nodes[new_node_idx].duplicated = 1;
3542      if (BE (new_node_idx == -1, 0))
3543	{
3544	  free_bin_tree (left);
3545	  free_bin_tree (right);
3546	  return NULL;
3547	}
3548    }
3549  else
3550    new_node_idx = src->type;
3551
3552  new_tree = create_tree (left, right, src->type, new_node_idx);
3553  if (BE (new_tree == NULL, 0))
3554    {
3555      free_bin_tree (left);
3556      free_bin_tree (right);
3557    }
3558  return new_tree;
3559}
3560