tre-compile.c revision 226035
150477Speter/*      $FreeBSD: head/usr.bin/grep/regex/tre-compile.c 226035 2011-10-05 09:56:43Z gabor $       */
244344Smckusick
34Srgrimes#include "glue.h"
4185515Skensmith
54Srgrimes#include <stdio.h>
644344Smckusick#include <assert.h>
7185515Skensmith#include <errno.h>
844344Smckusick#include <regex.h>
9185515Skensmith#include <string.h>
1044344Smckusick#include <wchar.h>
1144344Smckusick
1244344Smckusick#include "xmalloc.h"
134Srgrimes
1444344Smckusickint
154Srgrimestre_convert_pattern(const char *regex, size_t n, tre_char_t **w,
16185515Skensmith		    size_t *wn)
17185515Skensmith{
184Srgrimes#if TRE_WCHAR
1944344Smckusick  tre_char_t *wregex;
2044344Smckusick  size_t wlen;
2144344Smckusick
2244344Smckusick  wregex = xmalloc(sizeof(tre_char_t) * (n + 1));
2344344Smckusick  if (wregex == NULL)
24185515Skensmith    return REG_ESPACE;
2544344Smckusick
2644344Smckusick  /* If the current locale uses the standard single byte encoding of
27185515Skensmith     characters, we don't do a multibyte string conversion.  If we did,
28185515Skensmith     many applications which use the default locale would break since
29185515Skensmith     the default "C" locale uses the 7-bit ASCII character set, and
30185515Skensmith     all characters with the eighth bit set would be considered invalid. */
31185515Skensmith#if TRE_MULTIBYTE
3244344Smckusick  if (TRE_MB_CUR_MAX == 1)
3344344Smckusick#endif /* TRE_MULTIBYTE */
34185515Skensmith    {
35185515Skensmith      unsigned int i;
364Srgrimes      const unsigned char *str = (const unsigned char *)regex;
3744344Smckusick      tre_char_t *wstr = wregex;
3844344Smckusick
3944344Smckusick      for (i = 0; i < n; i++)
40	*(wstr++) = *(str++);
41      wlen = n;
42    }
43#if TRE_MULTIBYTE
44  else
45    {
46      int consumed;
47      tre_char_t *wcptr = wregex;
48#ifdef HAVE_MBSTATE_T
49      mbstate_t state;
50      memset(&state, '\0', sizeof(state));
51#endif /* HAVE_MBSTATE_T */
52      while (n > 0)
53	{
54	  consumed = tre_mbrtowc(wcptr, regex, n, &state);
55
56	  switch (consumed)
57	    {
58	    case 0:
59	      if (*regex == '\0')
60		consumed = 1;
61	      else
62		{
63		  xfree(wregex);
64		  return REG_BADPAT;
65		}
66	      break;
67	    case -1:
68	      DPRINT(("mbrtowc: error %d: %s.\n", errno, strerror(errno)));
69	      xfree(wregex);
70	      return REG_BADPAT;
71	    case -2:
72	      /* The last character wasn't complete.  Let's not call it a
73		 fatal error. */
74	      consumed = n;
75	      break;
76	    }
77	  regex += consumed;
78	  n -= consumed;
79	  wcptr++;
80	}
81      wlen = wcptr - wregex;
82    }
83#endif /* TRE_MULTIBYTE */
84  wregex[wlen] = L'\0';
85  *w = wregex;
86  *wn = wlen;
87  return REG_OK;
88#else /* !TRE_WCHAR */
89  {
90    *w = (tre_char_t * const *)regex;
91    *wn = n;
92    return REG_OK;
93  }
94#endif /* !TRE_WCHAR */
95}
96
97void
98tre_free_pattern(tre_char_t *wregex)
99{
100#if TRE_WCHAR
101  xfree(wregex);
102#endif
103}
104