1226035Sgabor/*      $FreeBSD: stable/11/usr.bin/grep/regex/tre-compile.c 322557 2017-08-16 00:23:59Z kevans $       */
2226035Sgabor
3226035Sgabor#include "glue.h"
4226035Sgabor
5226035Sgabor#include <stdio.h>
6226035Sgabor#include <assert.h>
7226035Sgabor#include <errno.h>
8226035Sgabor#include <regex.h>
9226035Sgabor#include <string.h>
10226035Sgabor#include <wchar.h>
11226035Sgabor
12226035Sgaborint
13226035Sgabortre_convert_pattern(const char *regex, size_t n, tre_char_t **w,
14226035Sgabor		    size_t *wn)
15226035Sgabor{
16226035Sgabor#if TRE_WCHAR
17226035Sgabor  tre_char_t *wregex;
18226035Sgabor  size_t wlen;
19226035Sgabor
20322557Skevans  wregex = malloc(sizeof(tre_char_t) * (n + 1));
21226035Sgabor  if (wregex == NULL)
22226035Sgabor    return REG_ESPACE;
23226035Sgabor
24226035Sgabor  /* If the current locale uses the standard single byte encoding of
25226035Sgabor     characters, we don't do a multibyte string conversion.  If we did,
26226035Sgabor     many applications which use the default locale would break since
27226035Sgabor     the default "C" locale uses the 7-bit ASCII character set, and
28226035Sgabor     all characters with the eighth bit set would be considered invalid. */
29226035Sgabor#if TRE_MULTIBYTE
30226035Sgabor  if (TRE_MB_CUR_MAX == 1)
31226035Sgabor#endif /* TRE_MULTIBYTE */
32226035Sgabor    {
33226035Sgabor      unsigned int i;
34226035Sgabor      const unsigned char *str = (const unsigned char *)regex;
35226035Sgabor      tre_char_t *wstr = wregex;
36226035Sgabor
37226035Sgabor      for (i = 0; i < n; i++)
38226035Sgabor	*(wstr++) = *(str++);
39226035Sgabor      wlen = n;
40226035Sgabor    }
41226035Sgabor#if TRE_MULTIBYTE
42226035Sgabor  else
43226035Sgabor    {
44226035Sgabor      int consumed;
45226035Sgabor      tre_char_t *wcptr = wregex;
46226035Sgabor#ifdef HAVE_MBSTATE_T
47226035Sgabor      mbstate_t state;
48226035Sgabor      memset(&state, '\0', sizeof(state));
49226035Sgabor#endif /* HAVE_MBSTATE_T */
50226035Sgabor      while (n > 0)
51226035Sgabor	{
52226035Sgabor	  consumed = tre_mbrtowc(wcptr, regex, n, &state);
53226035Sgabor
54226035Sgabor	  switch (consumed)
55226035Sgabor	    {
56226035Sgabor	    case 0:
57226035Sgabor	      if (*regex == '\0')
58226035Sgabor		consumed = 1;
59226035Sgabor	      else
60226035Sgabor		{
61322557Skevans		  free(wregex);
62226035Sgabor		  return REG_BADPAT;
63226035Sgabor		}
64226035Sgabor	      break;
65226035Sgabor	    case -1:
66226035Sgabor	      DPRINT(("mbrtowc: error %d: %s.\n", errno, strerror(errno)));
67322557Skevans	      free(wregex);
68226035Sgabor	      return REG_BADPAT;
69226035Sgabor	    case -2:
70226035Sgabor	      /* The last character wasn't complete.  Let's not call it a
71226035Sgabor		 fatal error. */
72226035Sgabor	      consumed = n;
73226035Sgabor	      break;
74226035Sgabor	    }
75226035Sgabor	  regex += consumed;
76226035Sgabor	  n -= consumed;
77226035Sgabor	  wcptr++;
78226035Sgabor	}
79226035Sgabor      wlen = wcptr - wregex;
80226035Sgabor    }
81226035Sgabor#endif /* TRE_MULTIBYTE */
82226035Sgabor  wregex[wlen] = L'\0';
83226035Sgabor  *w = wregex;
84226035Sgabor  *wn = wlen;
85226035Sgabor  return REG_OK;
86226035Sgabor#else /* !TRE_WCHAR */
87226035Sgabor  {
88226035Sgabor    *w = (tre_char_t * const *)regex;
89226035Sgabor    *wn = n;
90226035Sgabor    return REG_OK;
91226035Sgabor  }
92226035Sgabor#endif /* !TRE_WCHAR */
93226035Sgabor}
94226035Sgabor
95226035Sgaborvoid
96226035Sgabortre_free_pattern(tre_char_t *wregex)
97226035Sgabor{
98226035Sgabor#if TRE_WCHAR
99322557Skevans  free(wregex);
100226035Sgabor#endif
101226035Sgabor}
102