1/*      $FreeBSD: stable/11/usr.bin/grep/regex/tre-compile.c 322557 2017-08-16 00:23:59Z kevans $       */
2
3#include "glue.h"
4
5#include <stdio.h>
6#include <assert.h>
7#include <errno.h>
8#include <regex.h>
9#include <string.h>
10#include <wchar.h>
11
12int
13tre_convert_pattern(const char *regex, size_t n, tre_char_t **w,
14		    size_t *wn)
15{
16#if TRE_WCHAR
17  tre_char_t *wregex;
18  size_t wlen;
19
20  wregex = malloc(sizeof(tre_char_t) * (n + 1));
21  if (wregex == NULL)
22    return REG_ESPACE;
23
24  /* If the current locale uses the standard single byte encoding of
25     characters, we don't do a multibyte string conversion.  If we did,
26     many applications which use the default locale would break since
27     the default "C" locale uses the 7-bit ASCII character set, and
28     all characters with the eighth bit set would be considered invalid. */
29#if TRE_MULTIBYTE
30  if (TRE_MB_CUR_MAX == 1)
31#endif /* TRE_MULTIBYTE */
32    {
33      unsigned int i;
34      const unsigned char *str = (const unsigned char *)regex;
35      tre_char_t *wstr = wregex;
36
37      for (i = 0; i < n; i++)
38	*(wstr++) = *(str++);
39      wlen = n;
40    }
41#if TRE_MULTIBYTE
42  else
43    {
44      int consumed;
45      tre_char_t *wcptr = wregex;
46#ifdef HAVE_MBSTATE_T
47      mbstate_t state;
48      memset(&state, '\0', sizeof(state));
49#endif /* HAVE_MBSTATE_T */
50      while (n > 0)
51	{
52	  consumed = tre_mbrtowc(wcptr, regex, n, &state);
53
54	  switch (consumed)
55	    {
56	    case 0:
57	      if (*regex == '\0')
58		consumed = 1;
59	      else
60		{
61		  free(wregex);
62		  return REG_BADPAT;
63		}
64	      break;
65	    case -1:
66	      DPRINT(("mbrtowc: error %d: %s.\n", errno, strerror(errno)));
67	      free(wregex);
68	      return REG_BADPAT;
69	    case -2:
70	      /* The last character wasn't complete.  Let's not call it a
71		 fatal error. */
72	      consumed = n;
73	      break;
74	    }
75	  regex += consumed;
76	  n -= consumed;
77	  wcptr++;
78	}
79      wlen = wcptr - wregex;
80    }
81#endif /* TRE_MULTIBYTE */
82  wregex[wlen] = L'\0';
83  *w = wregex;
84  *wn = wlen;
85  return REG_OK;
86#else /* !TRE_WCHAR */
87  {
88    *w = (tre_char_t * const *)regex;
89    *wn = n;
90    return REG_OK;
91  }
92#endif /* !TRE_WCHAR */
93}
94
95void
96tre_free_pattern(tre_char_t *wregex)
97{
98#if TRE_WCHAR
99  free(wregex);
100#endif
101}
102