1/* -*- buffer-read-only: t -*- vi: set ro: */
2/* DO NOT EDIT! GENERATED AUTOMATICALLY! */
3#line 1
4/* Extended regular expression matching and search library.
5   Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010 Free
6   Software Foundation, Inc.
7   This file is part of the GNU C Library.
8   Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
9
10   This program is free software; you can redistribute it and/or modify
11   it under the terms of the GNU General Public License as published by
12   the Free Software Foundation; either version 3, or (at your option)
13   any later version.
14
15   This program is distributed in the hope that it will be useful,
16   but WITHOUT ANY WARRANTY; without even the implied warranty of
17   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18   GNU General Public License for more details.
19
20   You should have received a copy of the GNU General Public License along
21   with this program; if not, write to the Free Software Foundation,
22   Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
23
24#include "verify.h"
25#include "intprops.h"
26static void re_string_construct_common (const char *str, Idx len,
27					re_string_t *pstr,
28					RE_TRANSLATE_TYPE trans, bool icase,
29					const re_dfa_t *dfa) internal_function;
30static re_dfastate_t *create_ci_newstate (const re_dfa_t *dfa,
31					  const re_node_set *nodes,
32					  re_hashval_t hash) internal_function;
33static re_dfastate_t *create_cd_newstate (const re_dfa_t *dfa,
34					  const re_node_set *nodes,
35					  unsigned int context,
36					  re_hashval_t hash) internal_function;
37
38/* Functions for string operation.  */
39
40/* This function allocate the buffers.  It is necessary to call
41   re_string_reconstruct before using the object.  */
42
43static reg_errcode_t
44internal_function
45re_string_allocate (re_string_t *pstr, const char *str, Idx len, Idx init_len,
46		    RE_TRANSLATE_TYPE trans, bool icase, const re_dfa_t *dfa)
47{
48  reg_errcode_t ret;
49  Idx init_buf_len;
50
51  /* Ensure at least one character fits into the buffers.  */
52  if (init_len < dfa->mb_cur_max)
53    init_len = dfa->mb_cur_max;
54  init_buf_len = (len + 1 < init_len) ? len + 1: init_len;
55  re_string_construct_common (str, len, pstr, trans, icase, dfa);
56
57  ret = re_string_realloc_buffers (pstr, init_buf_len);
58  if (BE (ret != REG_NOERROR, 0))
59    return ret;
60
61  pstr->word_char = dfa->word_char;
62  pstr->word_ops_used = dfa->word_ops_used;
63  pstr->mbs = pstr->mbs_allocated ? pstr->mbs : (unsigned char *) str;
64  pstr->valid_len = (pstr->mbs_allocated || dfa->mb_cur_max > 1) ? 0 : len;
65  pstr->valid_raw_len = pstr->valid_len;
66  return REG_NOERROR;
67}
68
69/* This function allocate the buffers, and initialize them.  */
70
71static reg_errcode_t
72internal_function
73re_string_construct (re_string_t *pstr, const char *str, Idx len,
74		     RE_TRANSLATE_TYPE trans, bool icase, const re_dfa_t *dfa)
75{
76  reg_errcode_t ret;
77  memset (pstr, '\0', sizeof (re_string_t));
78  re_string_construct_common (str, len, pstr, trans, icase, dfa);
79
80  if (len > 0)
81    {
82      ret = re_string_realloc_buffers (pstr, len + 1);
83      if (BE (ret != REG_NOERROR, 0))
84	return ret;
85    }
86  pstr->mbs = pstr->mbs_allocated ? pstr->mbs : (unsigned char *) str;
87
88  if (icase)
89    {
90#ifdef RE_ENABLE_I18N
91      if (dfa->mb_cur_max > 1)
92	{
93	  while (1)
94	    {
95	      ret = build_wcs_upper_buffer (pstr);
96	      if (BE (ret != REG_NOERROR, 0))
97		return ret;
98	      if (pstr->valid_raw_len >= len)
99		break;
100	      if (pstr->bufs_len > pstr->valid_len + dfa->mb_cur_max)
101		break;
102	      ret = re_string_realloc_buffers (pstr, pstr->bufs_len * 2);
103	      if (BE (ret != REG_NOERROR, 0))
104		return ret;
105	    }
106	}
107      else
108#endif /* RE_ENABLE_I18N  */
109	build_upper_buffer (pstr);
110    }
111  else
112    {
113#ifdef RE_ENABLE_I18N
114      if (dfa->mb_cur_max > 1)
115	build_wcs_buffer (pstr);
116      else
117#endif /* RE_ENABLE_I18N  */
118	{
119	  if (trans != NULL)
120	    re_string_translate_buffer (pstr);
121	  else
122	    {
123	      pstr->valid_len = pstr->bufs_len;
124	      pstr->valid_raw_len = pstr->bufs_len;
125	    }
126	}
127    }
128
129  return REG_NOERROR;
130}
131
132/* Helper functions for re_string_allocate, and re_string_construct.  */
133
134static reg_errcode_t
135internal_function
136re_string_realloc_buffers (re_string_t *pstr, Idx new_buf_len)
137{
138#ifdef RE_ENABLE_I18N
139  if (pstr->mb_cur_max > 1)
140    {
141      wint_t *new_wcs;
142
143      /* Avoid overflow.  */
144      size_t max_object_size = MAX (sizeof (wint_t), sizeof (Idx));
145      if (BE (SIZE_MAX / max_object_size < new_buf_len, 0))
146	return REG_ESPACE;
147
148      new_wcs = re_realloc (pstr->wcs, wint_t, new_buf_len);
149      if (BE (new_wcs == NULL, 0))
150	return REG_ESPACE;
151      pstr->wcs = new_wcs;
152      if (pstr->offsets != NULL)
153	{
154	  Idx *new_offsets = re_realloc (pstr->offsets, Idx, new_buf_len);
155	  if (BE (new_offsets == NULL, 0))
156	    return REG_ESPACE;
157	  pstr->offsets = new_offsets;
158	}
159    }
160#endif /* RE_ENABLE_I18N  */
161  if (pstr->mbs_allocated)
162    {
163      unsigned char *new_mbs = re_realloc (pstr->mbs, unsigned char,
164					   new_buf_len);
165      if (BE (new_mbs == NULL, 0))
166	return REG_ESPACE;
167      pstr->mbs = new_mbs;
168    }
169  pstr->bufs_len = new_buf_len;
170  return REG_NOERROR;
171}
172
173
174static void
175internal_function
176re_string_construct_common (const char *str, Idx len, re_string_t *pstr,
177			    RE_TRANSLATE_TYPE trans, bool icase,
178			    const re_dfa_t *dfa)
179{
180  pstr->raw_mbs = (const unsigned char *) str;
181  pstr->len = len;
182  pstr->raw_len = len;
183  pstr->trans = trans;
184  pstr->icase = icase;
185  pstr->mbs_allocated = (trans != NULL || icase);
186  pstr->mb_cur_max = dfa->mb_cur_max;
187  pstr->is_utf8 = dfa->is_utf8;
188  pstr->map_notascii = dfa->map_notascii;
189  pstr->stop = pstr->len;
190  pstr->raw_stop = pstr->stop;
191}
192
193#ifdef RE_ENABLE_I18N
194
195/* Build wide character buffer PSTR->WCS.
196   If the byte sequence of the string are:
197     <mb1>(0), <mb1>(1), <mb2>(0), <mb2>(1), <sb3>
198   Then wide character buffer will be:
199     <wc1>   , WEOF    , <wc2>   , WEOF    , <wc3>
200   We use WEOF for padding, they indicate that the position isn't
201   a first byte of a multibyte character.
202
203   Note that this function assumes PSTR->VALID_LEN elements are already
204   built and starts from PSTR->VALID_LEN.  */
205
206static void
207internal_function
208build_wcs_buffer (re_string_t *pstr)
209{
210#ifdef _LIBC
211  unsigned char buf[MB_LEN_MAX];
212  assert (MB_LEN_MAX >= pstr->mb_cur_max);
213#else
214  unsigned char buf[64];
215#endif
216  mbstate_t prev_st;
217  Idx byte_idx, end_idx, remain_len;
218  size_t mbclen;
219
220  /* Build the buffers from pstr->valid_len to either pstr->len or
221     pstr->bufs_len.  */
222  end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
223  for (byte_idx = pstr->valid_len; byte_idx < end_idx;)
224    {
225      wchar_t wc;
226      const char *p;
227
228      remain_len = end_idx - byte_idx;
229      prev_st = pstr->cur_state;
230      /* Apply the translation if we need.  */
231      if (BE (pstr->trans != NULL, 0))
232	{
233	  int i, ch;
234
235	  for (i = 0; i < pstr->mb_cur_max && i < remain_len; ++i)
236	    {
237	      ch = pstr->raw_mbs [pstr->raw_mbs_idx + byte_idx + i];
238	      buf[i] = pstr->mbs[byte_idx + i] = pstr->trans[ch];
239	    }
240	  p = (const char *) buf;
241	}
242      else
243	p = (const char *) pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx;
244      mbclen = __mbrtowc (&wc, p, remain_len, &pstr->cur_state);
245      if (BE (mbclen == (size_t) -2, 0))
246	{
247	  /* The buffer doesn't have enough space, finish to build.  */
248	  pstr->cur_state = prev_st;
249	  break;
250	}
251      else if (BE (mbclen == (size_t) -1 || mbclen == 0, 0))
252	{
253	  /* We treat these cases as a singlebyte character.  */
254	  mbclen = 1;
255	  wc = (wchar_t) pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx];
256	  if (BE (pstr->trans != NULL, 0))
257	    wc = pstr->trans[wc];
258	  pstr->cur_state = prev_st;
259	}
260
261      /* Write wide character and padding.  */
262      pstr->wcs[byte_idx++] = wc;
263      /* Write paddings.  */
264      for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
265	pstr->wcs[byte_idx++] = WEOF;
266    }
267  pstr->valid_len = byte_idx;
268  pstr->valid_raw_len = byte_idx;
269}
270
271/* Build wide character buffer PSTR->WCS like build_wcs_buffer,
272   but for REG_ICASE.  */
273
274static reg_errcode_t
275internal_function
276build_wcs_upper_buffer (re_string_t *pstr)
277{
278  mbstate_t prev_st;
279  Idx src_idx, byte_idx, end_idx, remain_len;
280  size_t mbclen;
281#ifdef _LIBC
282  char buf[MB_LEN_MAX];
283  assert (MB_LEN_MAX >= pstr->mb_cur_max);
284#else
285  char buf[64];
286#endif
287
288  byte_idx = pstr->valid_len;
289  end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
290
291  /* The following optimization assumes that ASCII characters can be
292     mapped to wide characters with a simple cast.  */
293  if (! pstr->map_notascii && pstr->trans == NULL && !pstr->offsets_needed)
294    {
295      while (byte_idx < end_idx)
296	{
297	  wchar_t wc;
298
299	  if (isascii (pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx])
300	      && mbsinit (&pstr->cur_state))
301	    {
302	      /* In case of a singlebyte character.  */
303	      pstr->mbs[byte_idx]
304		= toupper (pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx]);
305	      /* The next step uses the assumption that wchar_t is encoded
306		 ASCII-safe: all ASCII values can be converted like this.  */
307	      pstr->wcs[byte_idx] = (wchar_t) pstr->mbs[byte_idx];
308	      ++byte_idx;
309	      continue;
310	    }
311
312	  remain_len = end_idx - byte_idx;
313	  prev_st = pstr->cur_state;
314	  mbclen = __mbrtowc (&wc,
315			      ((const char *) pstr->raw_mbs + pstr->raw_mbs_idx
316			       + byte_idx), remain_len, &pstr->cur_state);
317	  if (BE (mbclen < (size_t) -2, 1))
318	    {
319	      wchar_t wcu = wc;
320	      if (iswlower (wc))
321		{
322		  size_t mbcdlen;
323
324		  wcu = towupper (wc);
325		  mbcdlen = wcrtomb (buf, wcu, &prev_st);
326		  if (BE (mbclen == mbcdlen, 1))
327		    memcpy (pstr->mbs + byte_idx, buf, mbclen);
328		  else
329		    {
330		      src_idx = byte_idx;
331		      goto offsets_needed;
332		    }
333		}
334	      else
335		memcpy (pstr->mbs + byte_idx,
336			pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx, mbclen);
337	      pstr->wcs[byte_idx++] = wcu;
338	      /* Write paddings.  */
339	      for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
340		pstr->wcs[byte_idx++] = WEOF;
341	    }
342	  else if (mbclen == (size_t) -1 || mbclen == 0)
343	    {
344	      /* It is an invalid character or '\0'.  Just use the byte.  */
345	      int ch = pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx];
346	      pstr->mbs[byte_idx] = ch;
347	      /* And also cast it to wide char.  */
348	      pstr->wcs[byte_idx++] = (wchar_t) ch;
349	      if (BE (mbclen == (size_t) -1, 0))
350		pstr->cur_state = prev_st;
351	    }
352	  else
353	    {
354	      /* The buffer doesn't have enough space, finish to build.  */
355	      pstr->cur_state = prev_st;
356	      break;
357	    }
358	}
359      pstr->valid_len = byte_idx;
360      pstr->valid_raw_len = byte_idx;
361      return REG_NOERROR;
362    }
363  else
364    for (src_idx = pstr->valid_raw_len; byte_idx < end_idx;)
365      {
366	wchar_t wc;
367	const char *p;
368      offsets_needed:
369	remain_len = end_idx - byte_idx;
370	prev_st = pstr->cur_state;
371	if (BE (pstr->trans != NULL, 0))
372	  {
373	    int i, ch;
374
375	    for (i = 0; i < pstr->mb_cur_max && i < remain_len; ++i)
376	      {
377		ch = pstr->raw_mbs [pstr->raw_mbs_idx + src_idx + i];
378		buf[i] = pstr->trans[ch];
379	      }
380	    p = (const char *) buf;
381	  }
382	else
383	  p = (const char *) pstr->raw_mbs + pstr->raw_mbs_idx + src_idx;
384	mbclen = __mbrtowc (&wc, p, remain_len, &pstr->cur_state);
385	if (BE (mbclen < (size_t) -2, 1))
386	  {
387	    wchar_t wcu = wc;
388	    if (iswlower (wc))
389	      {
390		size_t mbcdlen;
391
392		wcu = towupper (wc);
393		mbcdlen = wcrtomb ((char *) buf, wcu, &prev_st);
394		if (BE (mbclen == mbcdlen, 1))
395		  memcpy (pstr->mbs + byte_idx, buf, mbclen);
396		else if (mbcdlen != (size_t) -1)
397		  {
398		    size_t i;
399
400		    if (byte_idx + mbcdlen > pstr->bufs_len)
401		      {
402			pstr->cur_state = prev_st;
403			break;
404		      }
405
406		    if (pstr->offsets == NULL)
407		      {
408			pstr->offsets = re_malloc (Idx, pstr->bufs_len);
409
410			if (pstr->offsets == NULL)
411			  return REG_ESPACE;
412		      }
413		    if (!pstr->offsets_needed)
414		      {
415			for (i = 0; i < (size_t) byte_idx; ++i)
416			  pstr->offsets[i] = i;
417			pstr->offsets_needed = 1;
418		      }
419
420		    memcpy (pstr->mbs + byte_idx, buf, mbcdlen);
421		    pstr->wcs[byte_idx] = wcu;
422		    pstr->offsets[byte_idx] = src_idx;
423		    for (i = 1; i < mbcdlen; ++i)
424		      {
425			pstr->offsets[byte_idx + i]
426			  = src_idx + (i < mbclen ? i : mbclen - 1);
427			pstr->wcs[byte_idx + i] = WEOF;
428		      }
429		    pstr->len += mbcdlen - mbclen;
430		    if (pstr->raw_stop > src_idx)
431		      pstr->stop += mbcdlen - mbclen;
432		    end_idx = (pstr->bufs_len > pstr->len)
433			      ? pstr->len : pstr->bufs_len;
434		    byte_idx += mbcdlen;
435		    src_idx += mbclen;
436		    continue;
437		  }
438                else
439                  memcpy (pstr->mbs + byte_idx, p, mbclen);
440	      }
441	    else
442	      memcpy (pstr->mbs + byte_idx, p, mbclen);
443
444	    if (BE (pstr->offsets_needed != 0, 0))
445	      {
446		size_t i;
447		for (i = 0; i < mbclen; ++i)
448		  pstr->offsets[byte_idx + i] = src_idx + i;
449	      }
450	    src_idx += mbclen;
451
452	    pstr->wcs[byte_idx++] = wcu;
453	    /* Write paddings.  */
454	    for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
455	      pstr->wcs[byte_idx++] = WEOF;
456	  }
457	else if (mbclen == (size_t) -1 || mbclen == 0)
458	  {
459	    /* It is an invalid character or '\0'.  Just use the byte.  */
460	    int ch = pstr->raw_mbs[pstr->raw_mbs_idx + src_idx];
461
462	    if (BE (pstr->trans != NULL, 0))
463	      ch = pstr->trans [ch];
464	    pstr->mbs[byte_idx] = ch;
465
466	    if (BE (pstr->offsets_needed != 0, 0))
467	      pstr->offsets[byte_idx] = src_idx;
468	    ++src_idx;
469
470	    /* And also cast it to wide char.  */
471	    pstr->wcs[byte_idx++] = (wchar_t) ch;
472	    if (BE (mbclen == (size_t) -1, 0))
473	      pstr->cur_state = prev_st;
474	  }
475	else
476	  {
477	    /* The buffer doesn't have enough space, finish to build.  */
478	    pstr->cur_state = prev_st;
479	    break;
480	  }
481      }
482  pstr->valid_len = byte_idx;
483  pstr->valid_raw_len = src_idx;
484  return REG_NOERROR;
485}
486
487/* Skip characters until the index becomes greater than NEW_RAW_IDX.
488   Return the index.  */
489
490static Idx
491internal_function
492re_string_skip_chars (re_string_t *pstr, Idx new_raw_idx, wint_t *last_wc)
493{
494  mbstate_t prev_st;
495  Idx rawbuf_idx;
496  size_t mbclen;
497  wint_t wc = WEOF;
498
499  /* Skip the characters which are not necessary to check.  */
500  for (rawbuf_idx = pstr->raw_mbs_idx + pstr->valid_raw_len;
501       rawbuf_idx < new_raw_idx;)
502    {
503      wchar_t wc2;
504      Idx remain_len;
505      remain_len = pstr->len - rawbuf_idx;
506      prev_st = pstr->cur_state;
507      mbclen = __mbrtowc (&wc2, (const char *) pstr->raw_mbs + rawbuf_idx,
508			  remain_len, &pstr->cur_state);
509      if (BE (mbclen == (size_t) -2 || mbclen == (size_t) -1 || mbclen == 0, 0))
510	{
511	  /* We treat these cases as a single byte character.  */
512	  if (mbclen == 0 || remain_len == 0)
513	    wc = L'\0';
514	  else
515	    wc = *(unsigned char *) (pstr->raw_mbs + rawbuf_idx);
516	  mbclen = 1;
517	  pstr->cur_state = prev_st;
518	}
519      else
520	wc = wc2;
521      /* Then proceed the next character.  */
522      rawbuf_idx += mbclen;
523    }
524  *last_wc = wc;
525  return rawbuf_idx;
526}
527#endif /* RE_ENABLE_I18N  */
528
529/* Build the buffer PSTR->MBS, and apply the translation if we need.
530   This function is used in case of REG_ICASE.  */
531
532static void
533internal_function
534build_upper_buffer (re_string_t *pstr)
535{
536  Idx char_idx, end_idx;
537  end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
538
539  for (char_idx = pstr->valid_len; char_idx < end_idx; ++char_idx)
540    {
541      int ch = pstr->raw_mbs[pstr->raw_mbs_idx + char_idx];
542      if (BE (pstr->trans != NULL, 0))
543	ch = pstr->trans[ch];
544      if (islower (ch))
545	pstr->mbs[char_idx] = toupper (ch);
546      else
547	pstr->mbs[char_idx] = ch;
548    }
549  pstr->valid_len = char_idx;
550  pstr->valid_raw_len = char_idx;
551}
552
553/* Apply TRANS to the buffer in PSTR.  */
554
555static void
556internal_function
557re_string_translate_buffer (re_string_t *pstr)
558{
559  Idx buf_idx, end_idx;
560  end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
561
562  for (buf_idx = pstr->valid_len; buf_idx < end_idx; ++buf_idx)
563    {
564      int ch = pstr->raw_mbs[pstr->raw_mbs_idx + buf_idx];
565      pstr->mbs[buf_idx] = pstr->trans[ch];
566    }
567
568  pstr->valid_len = buf_idx;
569  pstr->valid_raw_len = buf_idx;
570}
571
572/* This function re-construct the buffers.
573   Concretely, convert to wide character in case of pstr->mb_cur_max > 1,
574   convert to upper case in case of REG_ICASE, apply translation.  */
575
576static reg_errcode_t
577internal_function
578re_string_reconstruct (re_string_t *pstr, Idx idx, int eflags)
579{
580  Idx offset;
581
582  if (BE (pstr->raw_mbs_idx <= idx, 0))
583    offset = idx - pstr->raw_mbs_idx;
584  else
585    {
586      /* Reset buffer.  */
587#ifdef RE_ENABLE_I18N
588      if (pstr->mb_cur_max > 1)
589	memset (&pstr->cur_state, '\0', sizeof (mbstate_t));
590#endif /* RE_ENABLE_I18N */
591      pstr->len = pstr->raw_len;
592      pstr->stop = pstr->raw_stop;
593      pstr->valid_len = 0;
594      pstr->raw_mbs_idx = 0;
595      pstr->valid_raw_len = 0;
596      pstr->offsets_needed = 0;
597      pstr->tip_context = ((eflags & REG_NOTBOL) ? CONTEXT_BEGBUF
598			   : CONTEXT_NEWLINE | CONTEXT_BEGBUF);
599      if (!pstr->mbs_allocated)
600	pstr->mbs = (unsigned char *) pstr->raw_mbs;
601      offset = idx;
602    }
603
604  if (BE (offset != 0, 1))
605    {
606      /* Should the already checked characters be kept?  */
607      if (BE (offset < pstr->valid_raw_len, 1))
608	{
609	  /* Yes, move them to the front of the buffer.  */
610#ifdef RE_ENABLE_I18N
611	  if (BE (pstr->offsets_needed, 0))
612	    {
613	      Idx low = 0, high = pstr->valid_len, mid;
614	      do
615		{
616		  mid = (high + low) / 2;
617		  if (pstr->offsets[mid] > offset)
618		    high = mid;
619		  else if (pstr->offsets[mid] < offset)
620		    low = mid + 1;
621		  else
622		    break;
623		}
624	      while (low < high);
625	      if (pstr->offsets[mid] < offset)
626		++mid;
627	      pstr->tip_context = re_string_context_at (pstr, mid - 1,
628							eflags);
629	      /* This can be quite complicated, so handle specially
630		 only the common and easy case where the character with
631		 different length representation of lower and upper
632		 case is present at or after offset.  */
633	      if (pstr->valid_len > offset
634		  && mid == offset && pstr->offsets[mid] == offset)
635		{
636		  memmove (pstr->wcs, pstr->wcs + offset,
637			   (pstr->valid_len - offset) * sizeof (wint_t));
638		  memmove (pstr->mbs, pstr->mbs + offset, pstr->valid_len - offset);
639		  pstr->valid_len -= offset;
640		  pstr->valid_raw_len -= offset;
641		  for (low = 0; low < pstr->valid_len; low++)
642		    pstr->offsets[low] = pstr->offsets[low + offset] - offset;
643		}
644	      else
645		{
646		  /* Otherwise, just find out how long the partial multibyte
647		     character at offset is and fill it with WEOF/255.  */
648		  pstr->len = pstr->raw_len - idx + offset;
649		  pstr->stop = pstr->raw_stop - idx + offset;
650		  pstr->offsets_needed = 0;
651		  while (mid > 0 && pstr->offsets[mid - 1] == offset)
652		    --mid;
653		  while (mid < pstr->valid_len)
654		    if (pstr->wcs[mid] != WEOF)
655		      break;
656		    else
657		      ++mid;
658		  if (mid == pstr->valid_len)
659		    pstr->valid_len = 0;
660		  else
661		    {
662		      pstr->valid_len = pstr->offsets[mid] - offset;
663		      if (pstr->valid_len)
664			{
665			  for (low = 0; low < pstr->valid_len; ++low)
666			    pstr->wcs[low] = WEOF;
667			  memset (pstr->mbs, 255, pstr->valid_len);
668			}
669		    }
670		  pstr->valid_raw_len = pstr->valid_len;
671		}
672	    }
673	  else
674#endif
675	    {
676	      pstr->tip_context = re_string_context_at (pstr, offset - 1,
677							eflags);
678#ifdef RE_ENABLE_I18N
679	      if (pstr->mb_cur_max > 1)
680		memmove (pstr->wcs, pstr->wcs + offset,
681			 (pstr->valid_len - offset) * sizeof (wint_t));
682#endif /* RE_ENABLE_I18N */
683	      if (BE (pstr->mbs_allocated, 0))
684		memmove (pstr->mbs, pstr->mbs + offset,
685			 pstr->valid_len - offset);
686	      pstr->valid_len -= offset;
687	      pstr->valid_raw_len -= offset;
688#if DEBUG
689	      assert (pstr->valid_len > 0);
690#endif
691	    }
692	}
693      else
694	{
695#ifdef RE_ENABLE_I18N
696	  /* No, skip all characters until IDX.  */
697	  Idx prev_valid_len = pstr->valid_len;
698
699	  if (BE (pstr->offsets_needed, 0))
700	    {
701	      pstr->len = pstr->raw_len - idx + offset;
702	      pstr->stop = pstr->raw_stop - idx + offset;
703	      pstr->offsets_needed = 0;
704	    }
705#endif
706	  pstr->valid_len = 0;
707#ifdef RE_ENABLE_I18N
708	  if (pstr->mb_cur_max > 1)
709	    {
710	      Idx wcs_idx;
711	      wint_t wc = WEOF;
712
713	      if (pstr->is_utf8)
714		{
715		  const unsigned char *raw, *p, *end;
716
717		  /* Special case UTF-8.  Multi-byte chars start with any
718		     byte other than 0x80 - 0xbf.  */
719		  raw = pstr->raw_mbs + pstr->raw_mbs_idx;
720		  end = raw + (offset - pstr->mb_cur_max);
721		  if (end < pstr->raw_mbs)
722		    end = pstr->raw_mbs;
723		  p = raw + offset - 1;
724#ifdef _LIBC
725		  /* We know the wchar_t encoding is UCS4, so for the simple
726		     case, ASCII characters, skip the conversion step.  */
727		  if (isascii (*p) && BE (pstr->trans == NULL, 1))
728		    {
729		      memset (&pstr->cur_state, '\0', sizeof (mbstate_t));
730		      /* pstr->valid_len = 0; */
731		      wc = (wchar_t) *p;
732		    }
733		  else
734#endif
735		    for (; p >= end; --p)
736		      if ((*p & 0xc0) != 0x80)
737			{
738			  mbstate_t cur_state;
739			  wchar_t wc2;
740			  Idx mlen = raw + pstr->len - p;
741			  unsigned char buf[6];
742			  size_t mbclen;
743
744			  if (BE (pstr->trans != NULL, 0))
745			    {
746			      int i = mlen < 6 ? mlen : 6;
747			      while (--i >= 0)
748				buf[i] = pstr->trans[p[i]];
749			    }
750			  /* XXX Don't use mbrtowc, we know which conversion
751			     to use (UTF-8 -> UCS4).  */
752			  memset (&cur_state, 0, sizeof (cur_state));
753			  mbclen = __mbrtowc (&wc2, (const char *) p, mlen,
754					      &cur_state);
755			  if (raw + offset - p <= mbclen
756			      && mbclen < (size_t) -2)
757			    {
758			      memset (&pstr->cur_state, '\0',
759				      sizeof (mbstate_t));
760			      pstr->valid_len = mbclen - (raw + offset - p);
761			      wc = wc2;
762			    }
763			  break;
764			}
765		}
766
767	      if (wc == WEOF)
768		pstr->valid_len = re_string_skip_chars (pstr, idx, &wc) - idx;
769	      if (wc == WEOF)
770		pstr->tip_context
771		  = re_string_context_at (pstr, prev_valid_len - 1, eflags);
772	      else
773		pstr->tip_context = ((BE (pstr->word_ops_used != 0, 0)
774				      && IS_WIDE_WORD_CHAR (wc))
775				     ? CONTEXT_WORD
776				     : ((IS_WIDE_NEWLINE (wc)
777					 && pstr->newline_anchor)
778					? CONTEXT_NEWLINE : 0));
779	      if (BE (pstr->valid_len, 0))
780		{
781		  for (wcs_idx = 0; wcs_idx < pstr->valid_len; ++wcs_idx)
782		    pstr->wcs[wcs_idx] = WEOF;
783		  if (pstr->mbs_allocated)
784		    memset (pstr->mbs, 255, pstr->valid_len);
785		}
786	      pstr->valid_raw_len = pstr->valid_len;
787	    }
788	  else
789#endif /* RE_ENABLE_I18N */
790	    {
791	      int c = pstr->raw_mbs[pstr->raw_mbs_idx + offset - 1];
792	      pstr->valid_raw_len = 0;
793	      if (pstr->trans)
794		c = pstr->trans[c];
795	      pstr->tip_context = (bitset_contain (pstr->word_char, c)
796				   ? CONTEXT_WORD
797				   : ((IS_NEWLINE (c) && pstr->newline_anchor)
798				      ? CONTEXT_NEWLINE : 0));
799	    }
800	}
801      if (!BE (pstr->mbs_allocated, 0))
802	pstr->mbs += offset;
803    }
804  pstr->raw_mbs_idx = idx;
805  pstr->len -= offset;
806  pstr->stop -= offset;
807
808  /* Then build the buffers.  */
809#ifdef RE_ENABLE_I18N
810  if (pstr->mb_cur_max > 1)
811    {
812      if (pstr->icase)
813	{
814	  reg_errcode_t ret = build_wcs_upper_buffer (pstr);
815	  if (BE (ret != REG_NOERROR, 0))
816	    return ret;
817	}
818      else
819	build_wcs_buffer (pstr);
820    }
821  else
822#endif /* RE_ENABLE_I18N */
823    if (BE (pstr->mbs_allocated, 0))
824      {
825	if (pstr->icase)
826	  build_upper_buffer (pstr);
827	else if (pstr->trans != NULL)
828	  re_string_translate_buffer (pstr);
829      }
830    else
831      pstr->valid_len = pstr->len;
832
833  pstr->cur_idx = 0;
834  return REG_NOERROR;
835}
836
837static unsigned char
838internal_function __attribute ((pure))
839re_string_peek_byte_case (const re_string_t *pstr, Idx idx)
840{
841  int ch;
842  Idx off;
843
844  /* Handle the common (easiest) cases first.  */
845  if (BE (!pstr->mbs_allocated, 1))
846    return re_string_peek_byte (pstr, idx);
847
848#ifdef RE_ENABLE_I18N
849  if (pstr->mb_cur_max > 1
850      && ! re_string_is_single_byte_char (pstr, pstr->cur_idx + idx))
851    return re_string_peek_byte (pstr, idx);
852#endif
853
854  off = pstr->cur_idx + idx;
855#ifdef RE_ENABLE_I18N
856  if (pstr->offsets_needed)
857    off = pstr->offsets[off];
858#endif
859
860  ch = pstr->raw_mbs[pstr->raw_mbs_idx + off];
861
862#ifdef RE_ENABLE_I18N
863  /* Ensure that e.g. for tr_TR.UTF-8 BACKSLASH DOTLESS SMALL LETTER I
864     this function returns CAPITAL LETTER I instead of first byte of
865     DOTLESS SMALL LETTER I.  The latter would confuse the parser,
866     since peek_byte_case doesn't advance cur_idx in any way.  */
867  if (pstr->offsets_needed && !isascii (ch))
868    return re_string_peek_byte (pstr, idx);
869#endif
870
871  return ch;
872}
873
874static unsigned char
875internal_function __attribute ((pure))
876re_string_fetch_byte_case (re_string_t *pstr)
877{
878  if (BE (!pstr->mbs_allocated, 1))
879    return re_string_fetch_byte (pstr);
880
881#ifdef RE_ENABLE_I18N
882  if (pstr->offsets_needed)
883    {
884      Idx off;
885      int ch;
886
887      /* For tr_TR.UTF-8 [[:islower:]] there is
888	 [[: CAPITAL LETTER I WITH DOT lower:]] in mbs.  Skip
889	 in that case the whole multi-byte character and return
890	 the original letter.  On the other side, with
891	 [[: DOTLESS SMALL LETTER I return [[:I, as doing
892	 anything else would complicate things too much.  */
893
894      if (!re_string_first_byte (pstr, pstr->cur_idx))
895	return re_string_fetch_byte (pstr);
896
897      off = pstr->offsets[pstr->cur_idx];
898      ch = pstr->raw_mbs[pstr->raw_mbs_idx + off];
899
900      if (! isascii (ch))
901	return re_string_fetch_byte (pstr);
902
903      re_string_skip_bytes (pstr,
904			    re_string_char_size_at (pstr, pstr->cur_idx));
905      return ch;
906    }
907#endif
908
909  return pstr->raw_mbs[pstr->raw_mbs_idx + pstr->cur_idx++];
910}
911
912static void
913internal_function
914re_string_destruct (re_string_t *pstr)
915{
916#ifdef RE_ENABLE_I18N
917  re_free (pstr->wcs);
918  re_free (pstr->offsets);
919#endif /* RE_ENABLE_I18N  */
920  if (pstr->mbs_allocated)
921    re_free (pstr->mbs);
922}
923
924/* Return the context at IDX in INPUT.  */
925
926static unsigned int
927internal_function
928re_string_context_at (const re_string_t *input, Idx idx, int eflags)
929{
930  int c;
931  if (BE (! REG_VALID_INDEX (idx), 0))
932    /* In this case, we use the value stored in input->tip_context,
933       since we can't know the character in input->mbs[-1] here.  */
934    return input->tip_context;
935  if (BE (idx == input->len, 0))
936    return ((eflags & REG_NOTEOL) ? CONTEXT_ENDBUF
937	    : CONTEXT_NEWLINE | CONTEXT_ENDBUF);
938#ifdef RE_ENABLE_I18N
939  if (input->mb_cur_max > 1)
940    {
941      wint_t wc;
942      Idx wc_idx = idx;
943      while(input->wcs[wc_idx] == WEOF)
944	{
945#ifdef DEBUG
946	  /* It must not happen.  */
947	  assert (REG_VALID_INDEX (wc_idx));
948#endif
949	  --wc_idx;
950	  if (! REG_VALID_INDEX (wc_idx))
951	    return input->tip_context;
952	}
953      wc = input->wcs[wc_idx];
954      if (BE (input->word_ops_used != 0, 0) && IS_WIDE_WORD_CHAR (wc))
955	return CONTEXT_WORD;
956      return (IS_WIDE_NEWLINE (wc) && input->newline_anchor
957	      ? CONTEXT_NEWLINE : 0);
958    }
959  else
960#endif
961    {
962      c = re_string_byte_at (input, idx);
963      if (bitset_contain (input->word_char, c))
964	return CONTEXT_WORD;
965      return IS_NEWLINE (c) && input->newline_anchor ? CONTEXT_NEWLINE : 0;
966    }
967}
968
969/* Functions for set operation.  */
970
971static reg_errcode_t
972internal_function
973re_node_set_alloc (re_node_set *set, Idx size)
974{
975  set->alloc = size;
976  set->nelem = 0;
977  set->elems = re_malloc (Idx, size);
978  if (BE (set->elems == NULL, 0))
979    return REG_ESPACE;
980  return REG_NOERROR;
981}
982
983static reg_errcode_t
984internal_function
985re_node_set_init_1 (re_node_set *set, Idx elem)
986{
987  set->alloc = 1;
988  set->nelem = 1;
989  set->elems = re_malloc (Idx, 1);
990  if (BE (set->elems == NULL, 0))
991    {
992      set->alloc = set->nelem = 0;
993      return REG_ESPACE;
994    }
995  set->elems[0] = elem;
996  return REG_NOERROR;
997}
998
999static reg_errcode_t
1000internal_function
1001re_node_set_init_2 (re_node_set *set, Idx elem1, Idx elem2)
1002{
1003  set->alloc = 2;
1004  set->elems = re_malloc (Idx, 2);
1005  if (BE (set->elems == NULL, 0))
1006    return REG_ESPACE;
1007  if (elem1 == elem2)
1008    {
1009      set->nelem = 1;
1010      set->elems[0] = elem1;
1011    }
1012  else
1013    {
1014      set->nelem = 2;
1015      if (elem1 < elem2)
1016	{
1017	  set->elems[0] = elem1;
1018	  set->elems[1] = elem2;
1019	}
1020      else
1021	{
1022	  set->elems[0] = elem2;
1023	  set->elems[1] = elem1;
1024	}
1025    }
1026  return REG_NOERROR;
1027}
1028
1029static reg_errcode_t
1030internal_function
1031re_node_set_init_copy (re_node_set *dest, const re_node_set *src)
1032{
1033  dest->nelem = src->nelem;
1034  if (src->nelem > 0)
1035    {
1036      dest->alloc = dest->nelem;
1037      dest->elems = re_malloc (Idx, dest->alloc);
1038      if (BE (dest->elems == NULL, 0))
1039	{
1040	  dest->alloc = dest->nelem = 0;
1041	  return REG_ESPACE;
1042	}
1043      memcpy (dest->elems, src->elems, src->nelem * sizeof (Idx));
1044    }
1045  else
1046    re_node_set_init_empty (dest);
1047  return REG_NOERROR;
1048}
1049
1050/* Calculate the intersection of the sets SRC1 and SRC2. And merge it to
1051   DEST. Return value indicate the error code or REG_NOERROR if succeeded.
1052   Note: We assume dest->elems is NULL, when dest->alloc is 0.  */
1053
1054static reg_errcode_t
1055internal_function
1056re_node_set_add_intersect (re_node_set *dest, const re_node_set *src1,
1057			   const re_node_set *src2)
1058{
1059  Idx i1, i2, is, id, delta, sbase;
1060  if (src1->nelem == 0 || src2->nelem == 0)
1061    return REG_NOERROR;
1062
1063  /* We need dest->nelem + 2 * elems_in_intersection; this is a
1064     conservative estimate.  */
1065  if (src1->nelem + src2->nelem + dest->nelem > dest->alloc)
1066    {
1067      Idx new_alloc = src1->nelem + src2->nelem + dest->alloc;
1068      Idx *new_elems = re_realloc (dest->elems, Idx, new_alloc);
1069      if (BE (new_elems == NULL, 0))
1070        return REG_ESPACE;
1071      dest->elems = new_elems;
1072      dest->alloc = new_alloc;
1073    }
1074
1075  /* Find the items in the intersection of SRC1 and SRC2, and copy
1076     into the top of DEST those that are not already in DEST itself.  */
1077  sbase = dest->nelem + src1->nelem + src2->nelem;
1078  i1 = src1->nelem - 1;
1079  i2 = src2->nelem - 1;
1080  id = dest->nelem - 1;
1081  for (;;)
1082    {
1083      if (src1->elems[i1] == src2->elems[i2])
1084	{
1085	  /* Try to find the item in DEST.  Maybe we could binary search?  */
1086	  while (REG_VALID_INDEX (id) && dest->elems[id] > src1->elems[i1])
1087	    --id;
1088
1089          if (! REG_VALID_INDEX (id) || dest->elems[id] != src1->elems[i1])
1090            dest->elems[--sbase] = src1->elems[i1];
1091
1092	  if (! REG_VALID_INDEX (--i1) || ! REG_VALID_INDEX (--i2))
1093	    break;
1094	}
1095
1096      /* Lower the highest of the two items.  */
1097      else if (src1->elems[i1] < src2->elems[i2])
1098	{
1099	  if (! REG_VALID_INDEX (--i2))
1100	    break;
1101	}
1102      else
1103	{
1104	  if (! REG_VALID_INDEX (--i1))
1105	    break;
1106	}
1107    }
1108
1109  id = dest->nelem - 1;
1110  is = dest->nelem + src1->nelem + src2->nelem - 1;
1111  delta = is - sbase + 1;
1112
1113  /* Now copy.  When DELTA becomes zero, the remaining
1114     DEST elements are already in place; this is more or
1115     less the same loop that is in re_node_set_merge.  */
1116  dest->nelem += delta;
1117  if (delta > 0 && REG_VALID_INDEX (id))
1118    for (;;)
1119      {
1120        if (dest->elems[is] > dest->elems[id])
1121          {
1122            /* Copy from the top.  */
1123            dest->elems[id + delta--] = dest->elems[is--];
1124            if (delta == 0)
1125              break;
1126          }
1127        else
1128          {
1129            /* Slide from the bottom.  */
1130            dest->elems[id + delta] = dest->elems[id];
1131            if (! REG_VALID_INDEX (--id))
1132              break;
1133          }
1134      }
1135
1136  /* Copy remaining SRC elements.  */
1137  memcpy (dest->elems, dest->elems + sbase, delta * sizeof (Idx));
1138
1139  return REG_NOERROR;
1140}
1141
1142/* Calculate the union set of the sets SRC1 and SRC2. And store it to
1143   DEST. Return value indicate the error code or REG_NOERROR if succeeded.  */
1144
1145static reg_errcode_t
1146internal_function
1147re_node_set_init_union (re_node_set *dest, const re_node_set *src1,
1148			const re_node_set *src2)
1149{
1150  Idx i1, i2, id;
1151  if (src1 != NULL && src1->nelem > 0 && src2 != NULL && src2->nelem > 0)
1152    {
1153      dest->alloc = src1->nelem + src2->nelem;
1154      dest->elems = re_malloc (Idx, dest->alloc);
1155      if (BE (dest->elems == NULL, 0))
1156	return REG_ESPACE;
1157    }
1158  else
1159    {
1160      if (src1 != NULL && src1->nelem > 0)
1161	return re_node_set_init_copy (dest, src1);
1162      else if (src2 != NULL && src2->nelem > 0)
1163	return re_node_set_init_copy (dest, src2);
1164      else
1165	re_node_set_init_empty (dest);
1166      return REG_NOERROR;
1167    }
1168  for (i1 = i2 = id = 0 ; i1 < src1->nelem && i2 < src2->nelem ;)
1169    {
1170      if (src1->elems[i1] > src2->elems[i2])
1171	{
1172	  dest->elems[id++] = src2->elems[i2++];
1173	  continue;
1174	}
1175      if (src1->elems[i1] == src2->elems[i2])
1176	++i2;
1177      dest->elems[id++] = src1->elems[i1++];
1178    }
1179  if (i1 < src1->nelem)
1180    {
1181      memcpy (dest->elems + id, src1->elems + i1,
1182	     (src1->nelem - i1) * sizeof (Idx));
1183      id += src1->nelem - i1;
1184    }
1185  else if (i2 < src2->nelem)
1186    {
1187      memcpy (dest->elems + id, src2->elems + i2,
1188	     (src2->nelem - i2) * sizeof (Idx));
1189      id += src2->nelem - i2;
1190    }
1191  dest->nelem = id;
1192  return REG_NOERROR;
1193}
1194
1195/* Calculate the union set of the sets DEST and SRC. And store it to
1196   DEST. Return value indicate the error code or REG_NOERROR if succeeded.  */
1197
1198static reg_errcode_t
1199internal_function
1200re_node_set_merge (re_node_set *dest, const re_node_set *src)
1201{
1202  Idx is, id, sbase, delta;
1203  if (src == NULL || src->nelem == 0)
1204    return REG_NOERROR;
1205  if (dest->alloc < 2 * src->nelem + dest->nelem)
1206    {
1207      Idx new_alloc = 2 * (src->nelem + dest->alloc);
1208      Idx *new_buffer = re_realloc (dest->elems, Idx, new_alloc);
1209      if (BE (new_buffer == NULL, 0))
1210	return REG_ESPACE;
1211      dest->elems = new_buffer;
1212      dest->alloc = new_alloc;
1213    }
1214
1215  if (BE (dest->nelem == 0, 0))
1216    {
1217      dest->nelem = src->nelem;
1218      memcpy (dest->elems, src->elems, src->nelem * sizeof (Idx));
1219      return REG_NOERROR;
1220    }
1221
1222  /* Copy into the top of DEST the items of SRC that are not
1223     found in DEST.  Maybe we could binary search in DEST?  */
1224  for (sbase = dest->nelem + 2 * src->nelem,
1225       is = src->nelem - 1, id = dest->nelem - 1;
1226       REG_VALID_INDEX (is) && REG_VALID_INDEX (id); )
1227    {
1228      if (dest->elems[id] == src->elems[is])
1229        is--, id--;
1230      else if (dest->elems[id] < src->elems[is])
1231        dest->elems[--sbase] = src->elems[is--];
1232      else /* if (dest->elems[id] > src->elems[is]) */
1233        --id;
1234    }
1235
1236  if (REG_VALID_INDEX (is))
1237    {
1238      /* If DEST is exhausted, the remaining items of SRC must be unique.  */
1239      sbase -= is + 1;
1240      memcpy (dest->elems + sbase, src->elems, (is + 1) * sizeof (Idx));
1241    }
1242
1243  id = dest->nelem - 1;
1244  is = dest->nelem + 2 * src->nelem - 1;
1245  delta = is - sbase + 1;
1246  if (delta == 0)
1247    return REG_NOERROR;
1248
1249  /* Now copy.  When DELTA becomes zero, the remaining
1250     DEST elements are already in place.  */
1251  dest->nelem += delta;
1252  for (;;)
1253    {
1254      if (dest->elems[is] > dest->elems[id])
1255        {
1256	  /* Copy from the top.  */
1257          dest->elems[id + delta--] = dest->elems[is--];
1258	  if (delta == 0)
1259	    break;
1260	}
1261      else
1262        {
1263          /* Slide from the bottom.  */
1264          dest->elems[id + delta] = dest->elems[id];
1265	  if (! REG_VALID_INDEX (--id))
1266	    {
1267	      /* Copy remaining SRC elements.  */
1268	      memcpy (dest->elems, dest->elems + sbase,
1269	              delta * sizeof (Idx));
1270	      break;
1271	    }
1272	}
1273    }
1274
1275  return REG_NOERROR;
1276}
1277
1278/* Insert the new element ELEM to the re_node_set* SET.
1279   SET should not already have ELEM.
1280   Return true if successful.  */
1281
1282static bool
1283internal_function
1284re_node_set_insert (re_node_set *set, Idx elem)
1285{
1286  Idx idx;
1287  /* In case the set is empty.  */
1288  if (set->alloc == 0)
1289    return BE (re_node_set_init_1 (set, elem) == REG_NOERROR, 1);
1290
1291  if (BE (set->nelem, 0) == 0)
1292    {
1293      /* We already guaranteed above that set->alloc != 0.  */
1294      set->elems[0] = elem;
1295      ++set->nelem;
1296      return true;
1297    }
1298
1299  /* Realloc if we need.  */
1300  if (set->alloc == set->nelem)
1301    {
1302      Idx *new_elems;
1303      set->alloc = set->alloc * 2;
1304      new_elems = re_realloc (set->elems, Idx, set->alloc);
1305      if (BE (new_elems == NULL, 0))
1306	return false;
1307      set->elems = new_elems;
1308    }
1309
1310  /* Move the elements which follows the new element.  Test the
1311     first element separately to skip a check in the inner loop.  */
1312  if (elem < set->elems[0])
1313    {
1314      idx = 0;
1315      for (idx = set->nelem; idx > 0; idx--)
1316        set->elems[idx] = set->elems[idx - 1];
1317    }
1318  else
1319    {
1320      for (idx = set->nelem; set->elems[idx - 1] > elem; idx--)
1321        set->elems[idx] = set->elems[idx - 1];
1322    }
1323
1324  /* Insert the new element.  */
1325  set->elems[idx] = elem;
1326  ++set->nelem;
1327  return true;
1328}
1329
1330/* Insert the new element ELEM to the re_node_set* SET.
1331   SET should not already have any element greater than or equal to ELEM.
1332   Return true if successful.  */
1333
1334static bool
1335internal_function
1336re_node_set_insert_last (re_node_set *set, Idx elem)
1337{
1338  /* Realloc if we need.  */
1339  if (set->alloc == set->nelem)
1340    {
1341      Idx *new_elems;
1342      set->alloc = (set->alloc + 1) * 2;
1343      new_elems = re_realloc (set->elems, Idx, set->alloc);
1344      if (BE (new_elems == NULL, 0))
1345	return false;
1346      set->elems = new_elems;
1347    }
1348
1349  /* Insert the new element.  */
1350  set->elems[set->nelem++] = elem;
1351  return true;
1352}
1353
1354/* Compare two node sets SET1 and SET2.
1355   Return true if SET1 and SET2 are equivalent.  */
1356
1357static bool
1358internal_function __attribute ((pure))
1359re_node_set_compare (const re_node_set *set1, const re_node_set *set2)
1360{
1361  Idx i;
1362  if (set1 == NULL || set2 == NULL || set1->nelem != set2->nelem)
1363    return false;
1364  for (i = set1->nelem ; REG_VALID_INDEX (--i) ; )
1365    if (set1->elems[i] != set2->elems[i])
1366      return false;
1367  return true;
1368}
1369
1370/* Return (idx + 1) if SET contains the element ELEM, return 0 otherwise.  */
1371
1372static Idx
1373internal_function __attribute ((pure))
1374re_node_set_contains (const re_node_set *set, Idx elem)
1375{
1376  __re_size_t idx, right, mid;
1377  if (! REG_VALID_NONZERO_INDEX (set->nelem))
1378    return 0;
1379
1380  /* Binary search the element.  */
1381  idx = 0;
1382  right = set->nelem - 1;
1383  while (idx < right)
1384    {
1385      mid = (idx + right) / 2;
1386      if (set->elems[mid] < elem)
1387	idx = mid + 1;
1388      else
1389	right = mid;
1390    }
1391  return set->elems[idx] == elem ? idx + 1 : 0;
1392}
1393
1394static void
1395internal_function
1396re_node_set_remove_at (re_node_set *set, Idx idx)
1397{
1398  verify (! TYPE_SIGNED (Idx));
1399  /* if (idx < 0)
1400     return; */
1401  if (idx >= set->nelem)
1402    return;
1403  --set->nelem;
1404  for (; idx < set->nelem; idx++)
1405    set->elems[idx] = set->elems[idx + 1];
1406}
1407
1408
1409/* Add the token TOKEN to dfa->nodes, and return the index of the token.
1410   Or return REG_MISSING if an error occurred.  */
1411
1412static Idx
1413internal_function
1414re_dfa_add_node (re_dfa_t *dfa, re_token_t token)
1415{
1416  if (BE (dfa->nodes_len >= dfa->nodes_alloc, 0))
1417    {
1418      size_t new_nodes_alloc = dfa->nodes_alloc * 2;
1419      Idx *new_nexts, *new_indices;
1420      re_node_set *new_edests, *new_eclosures;
1421      re_token_t *new_nodes;
1422      size_t max_object_size =
1423	MAX (sizeof (re_token_t),
1424	     MAX (sizeof (re_node_set),
1425		  sizeof (Idx)));
1426
1427      /* Avoid overflows.  */
1428      if (BE (SIZE_MAX / 2 / max_object_size < dfa->nodes_alloc, 0))
1429	return REG_MISSING;
1430
1431      new_nodes = re_realloc (dfa->nodes, re_token_t, new_nodes_alloc);
1432      if (BE (new_nodes == NULL, 0))
1433	return REG_MISSING;
1434      dfa->nodes = new_nodes;
1435      new_nexts = re_realloc (dfa->nexts, Idx, new_nodes_alloc);
1436      new_indices = re_realloc (dfa->org_indices, Idx, new_nodes_alloc);
1437      new_edests = re_realloc (dfa->edests, re_node_set, new_nodes_alloc);
1438      new_eclosures = re_realloc (dfa->eclosures, re_node_set, new_nodes_alloc);
1439      if (BE (new_nexts == NULL || new_indices == NULL
1440	      || new_edests == NULL || new_eclosures == NULL, 0))
1441	return REG_MISSING;
1442      dfa->nexts = new_nexts;
1443      dfa->org_indices = new_indices;
1444      dfa->edests = new_edests;
1445      dfa->eclosures = new_eclosures;
1446      dfa->nodes_alloc = new_nodes_alloc;
1447    }
1448  dfa->nodes[dfa->nodes_len] = token;
1449  dfa->nodes[dfa->nodes_len].constraint = 0;
1450#ifdef RE_ENABLE_I18N
1451  {
1452  int type = token.type;
1453  dfa->nodes[dfa->nodes_len].accept_mb =
1454    (type == OP_PERIOD && dfa->mb_cur_max > 1) || type == COMPLEX_BRACKET;
1455  }
1456#endif
1457  dfa->nexts[dfa->nodes_len] = REG_MISSING;
1458  re_node_set_init_empty (dfa->edests + dfa->nodes_len);
1459  re_node_set_init_empty (dfa->eclosures + dfa->nodes_len);
1460  return dfa->nodes_len++;
1461}
1462
1463static inline re_hashval_t
1464internal_function
1465calc_state_hash (const re_node_set *nodes, unsigned int context)
1466{
1467  re_hashval_t hash = nodes->nelem + context;
1468  Idx i;
1469  for (i = 0 ; i < nodes->nelem ; i++)
1470    hash += nodes->elems[i];
1471  return hash;
1472}
1473
1474/* Search for the state whose node_set is equivalent to NODES.
1475   Return the pointer to the state, if we found it in the DFA.
1476   Otherwise create the new one and return it.  In case of an error
1477   return NULL and set the error code in ERR.
1478   Note: - We assume NULL as the invalid state, then it is possible that
1479	   return value is NULL and ERR is REG_NOERROR.
1480	 - We never return non-NULL value in case of any errors, it is for
1481	   optimization.  */
1482
1483static re_dfastate_t *
1484internal_function
1485re_acquire_state (reg_errcode_t *err, const re_dfa_t *dfa,
1486		  const re_node_set *nodes)
1487{
1488  re_hashval_t hash;
1489  re_dfastate_t *new_state;
1490  struct re_state_table_entry *spot;
1491  Idx i;
1492#ifdef lint
1493  /* Suppress bogus uninitialized-variable warnings.  */
1494  *err = REG_NOERROR;
1495#endif
1496  if (BE (nodes->nelem == 0, 0))
1497    {
1498      *err = REG_NOERROR;
1499      return NULL;
1500    }
1501  hash = calc_state_hash (nodes, 0);
1502  spot = dfa->state_table + (hash & dfa->state_hash_mask);
1503
1504  for (i = 0 ; i < spot->num ; i++)
1505    {
1506      re_dfastate_t *state = spot->array[i];
1507      if (hash != state->hash)
1508	continue;
1509      if (re_node_set_compare (&state->nodes, nodes))
1510	return state;
1511    }
1512
1513  /* There are no appropriate state in the dfa, create the new one.  */
1514  new_state = create_ci_newstate (dfa, nodes, hash);
1515  if (BE (new_state == NULL, 0))
1516    *err = REG_ESPACE;
1517
1518  return new_state;
1519}
1520
1521/* Search for the state whose node_set is equivalent to NODES and
1522   whose context is equivalent to CONTEXT.
1523   Return the pointer to the state, if we found it in the DFA.
1524   Otherwise create the new one and return it.  In case of an error
1525   return NULL and set the error code in ERR.
1526   Note: - We assume NULL as the invalid state, then it is possible that
1527	   return value is NULL and ERR is REG_NOERROR.
1528	 - We never return non-NULL value in case of any errors, it is for
1529	   optimization.  */
1530
1531static re_dfastate_t *
1532internal_function
1533re_acquire_state_context (reg_errcode_t *err, const re_dfa_t *dfa,
1534			  const re_node_set *nodes, unsigned int context)
1535{
1536  re_hashval_t hash;
1537  re_dfastate_t *new_state;
1538  struct re_state_table_entry *spot;
1539  Idx i;
1540#ifdef lint
1541  /* Suppress bogus uninitialized-variable warnings.  */
1542  *err = REG_NOERROR;
1543#endif
1544  if (nodes->nelem == 0)
1545    {
1546      *err = REG_NOERROR;
1547      return NULL;
1548    }
1549  hash = calc_state_hash (nodes, context);
1550  spot = dfa->state_table + (hash & dfa->state_hash_mask);
1551
1552  for (i = 0 ; i < spot->num ; i++)
1553    {
1554      re_dfastate_t *state = spot->array[i];
1555      if (state->hash == hash
1556	  && state->context == context
1557	  && re_node_set_compare (state->entrance_nodes, nodes))
1558	return state;
1559    }
1560  /* There are no appropriate state in `dfa', create the new one.  */
1561  new_state = create_cd_newstate (dfa, nodes, context, hash);
1562  if (BE (new_state == NULL, 0))
1563    *err = REG_ESPACE;
1564
1565  return new_state;
1566}
1567
1568/* Finish initialization of the new state NEWSTATE, and using its hash value
1569   HASH put in the appropriate bucket of DFA's state table.  Return value
1570   indicates the error code if failed.  */
1571
1572static reg_errcode_t
1573register_state (const re_dfa_t *dfa, re_dfastate_t *newstate,
1574		re_hashval_t hash)
1575{
1576  struct re_state_table_entry *spot;
1577  reg_errcode_t err;
1578  Idx i;
1579
1580  newstate->hash = hash;
1581  err = re_node_set_alloc (&newstate->non_eps_nodes, newstate->nodes.nelem);
1582  if (BE (err != REG_NOERROR, 0))
1583    return REG_ESPACE;
1584  for (i = 0; i < newstate->nodes.nelem; i++)
1585    {
1586      Idx elem = newstate->nodes.elems[i];
1587      if (!IS_EPSILON_NODE (dfa->nodes[elem].type))
1588	if (BE (! re_node_set_insert_last (&newstate->non_eps_nodes, elem), 0))
1589	  return REG_ESPACE;
1590    }
1591
1592  spot = dfa->state_table + (hash & dfa->state_hash_mask);
1593  if (BE (spot->alloc <= spot->num, 0))
1594    {
1595      Idx new_alloc = 2 * spot->num + 2;
1596      re_dfastate_t **new_array = re_realloc (spot->array, re_dfastate_t *,
1597					      new_alloc);
1598      if (BE (new_array == NULL, 0))
1599	return REG_ESPACE;
1600      spot->array = new_array;
1601      spot->alloc = new_alloc;
1602    }
1603  spot->array[spot->num++] = newstate;
1604  return REG_NOERROR;
1605}
1606
1607static void
1608free_state (re_dfastate_t *state)
1609{
1610  re_node_set_free (&state->non_eps_nodes);
1611  re_node_set_free (&state->inveclosure);
1612  if (state->entrance_nodes != &state->nodes)
1613    {
1614      re_node_set_free (state->entrance_nodes);
1615      re_free (state->entrance_nodes);
1616    }
1617  re_node_set_free (&state->nodes);
1618  re_free (state->word_trtable);
1619  re_free (state->trtable);
1620  re_free (state);
1621}
1622
1623/* Create the new state which is independ of contexts.
1624   Return the new state if succeeded, otherwise return NULL.  */
1625
1626static re_dfastate_t *
1627internal_function
1628create_ci_newstate (const re_dfa_t *dfa, const re_node_set *nodes,
1629		    re_hashval_t hash)
1630{
1631  Idx i;
1632  reg_errcode_t err;
1633  re_dfastate_t *newstate;
1634
1635  newstate = (re_dfastate_t *) calloc (sizeof (re_dfastate_t), 1);
1636  if (BE (newstate == NULL, 0))
1637    return NULL;
1638  err = re_node_set_init_copy (&newstate->nodes, nodes);
1639  if (BE (err != REG_NOERROR, 0))
1640    {
1641      re_free (newstate);
1642      return NULL;
1643    }
1644
1645  newstate->entrance_nodes = &newstate->nodes;
1646  for (i = 0 ; i < nodes->nelem ; i++)
1647    {
1648      re_token_t *node = dfa->nodes + nodes->elems[i];
1649      re_token_type_t type = node->type;
1650      if (type == CHARACTER && !node->constraint)
1651	continue;
1652#ifdef RE_ENABLE_I18N
1653      newstate->accept_mb |= node->accept_mb;
1654#endif /* RE_ENABLE_I18N */
1655
1656      /* If the state has the halt node, the state is a halt state.  */
1657      if (type == END_OF_RE)
1658	newstate->halt = 1;
1659      else if (type == OP_BACK_REF)
1660	newstate->has_backref = 1;
1661      else if (type == ANCHOR || node->constraint)
1662	newstate->has_constraint = 1;
1663    }
1664  err = register_state (dfa, newstate, hash);
1665  if (BE (err != REG_NOERROR, 0))
1666    {
1667      free_state (newstate);
1668      newstate = NULL;
1669    }
1670  return newstate;
1671}
1672
1673/* Create the new state which is depend on the context CONTEXT.
1674   Return the new state if succeeded, otherwise return NULL.  */
1675
1676static re_dfastate_t *
1677internal_function
1678create_cd_newstate (const re_dfa_t *dfa, const re_node_set *nodes,
1679		    unsigned int context, re_hashval_t hash)
1680{
1681  Idx i, nctx_nodes = 0;
1682  reg_errcode_t err;
1683  re_dfastate_t *newstate;
1684
1685  newstate = (re_dfastate_t *) calloc (sizeof (re_dfastate_t), 1);
1686  if (BE (newstate == NULL, 0))
1687    return NULL;
1688  err = re_node_set_init_copy (&newstate->nodes, nodes);
1689  if (BE (err != REG_NOERROR, 0))
1690    {
1691      re_free (newstate);
1692      return NULL;
1693    }
1694
1695  newstate->context = context;
1696  newstate->entrance_nodes = &newstate->nodes;
1697
1698  for (i = 0 ; i < nodes->nelem ; i++)
1699    {
1700      re_token_t *node = dfa->nodes + nodes->elems[i];
1701      re_token_type_t type = node->type;
1702      unsigned int constraint = node->constraint;
1703
1704      if (type == CHARACTER && !constraint)
1705	continue;
1706#ifdef RE_ENABLE_I18N
1707      newstate->accept_mb |= node->accept_mb;
1708#endif /* RE_ENABLE_I18N */
1709
1710      /* If the state has the halt node, the state is a halt state.  */
1711      if (type == END_OF_RE)
1712	newstate->halt = 1;
1713      else if (type == OP_BACK_REF)
1714	newstate->has_backref = 1;
1715
1716      if (constraint)
1717	{
1718	  if (newstate->entrance_nodes == &newstate->nodes)
1719	    {
1720	      newstate->entrance_nodes = re_malloc (re_node_set, 1);
1721	      if (BE (newstate->entrance_nodes == NULL, 0))
1722		{
1723		  free_state (newstate);
1724		  return NULL;
1725		}
1726	      re_node_set_init_copy (newstate->entrance_nodes, nodes);
1727	      nctx_nodes = 0;
1728	      newstate->has_constraint = 1;
1729	    }
1730
1731	  if (NOT_SATISFY_PREV_CONSTRAINT (constraint,context))
1732	    {
1733	      re_node_set_remove_at (&newstate->nodes, i - nctx_nodes);
1734	      ++nctx_nodes;
1735	    }
1736	}
1737    }
1738  err = register_state (dfa, newstate, hash);
1739  if (BE (err != REG_NOERROR, 0))
1740    {
1741      free_state (newstate);
1742      newstate = NULL;
1743    }
1744  return  newstate;
1745}
1746