1226035Sgabor/*      $FreeBSD: releng/10.3/usr.bin/grep/regex/tre-compile.c 226035 2011-10-05 09:56:43Z gabor $       */
2226035Sgabor
3226035Sgabor#include "glue.h"
4226035Sgabor
5226035Sgabor#include <stdio.h>
6226035Sgabor#include <assert.h>
7226035Sgabor#include <errno.h>
8226035Sgabor#include <regex.h>
9226035Sgabor#include <string.h>
10226035Sgabor#include <wchar.h>
11226035Sgabor
12226035Sgabor#include "xmalloc.h"
13226035Sgabor
14226035Sgaborint
15226035Sgabortre_convert_pattern(const char *regex, size_t n, tre_char_t **w,
16226035Sgabor		    size_t *wn)
17226035Sgabor{
18226035Sgabor#if TRE_WCHAR
19226035Sgabor  tre_char_t *wregex;
20226035Sgabor  size_t wlen;
21226035Sgabor
22226035Sgabor  wregex = xmalloc(sizeof(tre_char_t) * (n + 1));
23226035Sgabor  if (wregex == NULL)
24226035Sgabor    return REG_ESPACE;
25226035Sgabor
26226035Sgabor  /* If the current locale uses the standard single byte encoding of
27226035Sgabor     characters, we don't do a multibyte string conversion.  If we did,
28226035Sgabor     many applications which use the default locale would break since
29226035Sgabor     the default "C" locale uses the 7-bit ASCII character set, and
30226035Sgabor     all characters with the eighth bit set would be considered invalid. */
31226035Sgabor#if TRE_MULTIBYTE
32226035Sgabor  if (TRE_MB_CUR_MAX == 1)
33226035Sgabor#endif /* TRE_MULTIBYTE */
34226035Sgabor    {
35226035Sgabor      unsigned int i;
36226035Sgabor      const unsigned char *str = (const unsigned char *)regex;
37226035Sgabor      tre_char_t *wstr = wregex;
38226035Sgabor
39226035Sgabor      for (i = 0; i < n; i++)
40226035Sgabor	*(wstr++) = *(str++);
41226035Sgabor      wlen = n;
42226035Sgabor    }
43226035Sgabor#if TRE_MULTIBYTE
44226035Sgabor  else
45226035Sgabor    {
46226035Sgabor      int consumed;
47226035Sgabor      tre_char_t *wcptr = wregex;
48226035Sgabor#ifdef HAVE_MBSTATE_T
49226035Sgabor      mbstate_t state;
50226035Sgabor      memset(&state, '\0', sizeof(state));
51226035Sgabor#endif /* HAVE_MBSTATE_T */
52226035Sgabor      while (n > 0)
53226035Sgabor	{
54226035Sgabor	  consumed = tre_mbrtowc(wcptr, regex, n, &state);
55226035Sgabor
56226035Sgabor	  switch (consumed)
57226035Sgabor	    {
58226035Sgabor	    case 0:
59226035Sgabor	      if (*regex == '\0')
60226035Sgabor		consumed = 1;
61226035Sgabor	      else
62226035Sgabor		{
63226035Sgabor		  xfree(wregex);
64226035Sgabor		  return REG_BADPAT;
65226035Sgabor		}
66226035Sgabor	      break;
67226035Sgabor	    case -1:
68226035Sgabor	      DPRINT(("mbrtowc: error %d: %s.\n", errno, strerror(errno)));
69226035Sgabor	      xfree(wregex);
70226035Sgabor	      return REG_BADPAT;
71226035Sgabor	    case -2:
72226035Sgabor	      /* The last character wasn't complete.  Let's not call it a
73226035Sgabor		 fatal error. */
74226035Sgabor	      consumed = n;
75226035Sgabor	      break;
76226035Sgabor	    }
77226035Sgabor	  regex += consumed;
78226035Sgabor	  n -= consumed;
79226035Sgabor	  wcptr++;
80226035Sgabor	}
81226035Sgabor      wlen = wcptr - wregex;
82226035Sgabor    }
83226035Sgabor#endif /* TRE_MULTIBYTE */
84226035Sgabor  wregex[wlen] = L'\0';
85226035Sgabor  *w = wregex;
86226035Sgabor  *wn = wlen;
87226035Sgabor  return REG_OK;
88226035Sgabor#else /* !TRE_WCHAR */
89226035Sgabor  {
90226035Sgabor    *w = (tre_char_t * const *)regex;
91226035Sgabor    *wn = n;
92226035Sgabor    return REG_OK;
93226035Sgabor  }
94226035Sgabor#endif /* !TRE_WCHAR */
95226035Sgabor}
96226035Sgabor
97226035Sgaborvoid
98226035Sgabortre_free_pattern(tre_char_t *wregex)
99226035Sgabor{
100226035Sgabor#if TRE_WCHAR
101226035Sgabor  xfree(wregex);
102226035Sgabor#endif
103226035Sgabor}
104