1/*
2  tre_regcomp.c - TRE POSIX compatible regex compilation functions.
3
4  This software is released under a BSD-style license.
5  See the file LICENSE for details and copyright.
6
7*/
8
9#ifdef HAVE_CONFIG_H
10#include <config.h>
11#endif /* HAVE_CONFIG_H */
12
13#include <string.h>
14#include <errno.h>
15#include <stdlib.h>
16
17#include "tre.h"
18#include "tre-internal.h"
19#include "xmalloc.h"
20
21int
22tre_regncomp(regex_t *preg, const char *regex, size_t n, int cflags)
23{
24  int ret;
25#if TRE_WCHAR
26  tre_char_t *wregex;
27  size_t wlen;
28
29  wregex = xmalloc(sizeof(tre_char_t) * (n + 1));
30  if (wregex == NULL)
31    return REG_ESPACE;
32
33  /* If the current locale uses the standard single byte encoding of
34     characters, we don't do a multibyte string conversion.  If we did,
35     many applications which use the default locale would break since
36     the default "C" locale uses the 7-bit ASCII character set, and
37     all characters with the eighth bit set would be considered invalid. */
38#if TRE_MULTIBYTE
39  if (TRE_MB_CUR_MAX == 1)
40#endif /* TRE_MULTIBYTE */
41    {
42      unsigned int i;
43      const unsigned char *str = (const unsigned char *)regex;
44      tre_char_t *wstr = wregex;
45
46      for (i = 0; i < n; i++)
47	*(wstr++) = *(str++);
48      wlen = n;
49    }
50#if TRE_MULTIBYTE
51  else
52    {
53      size_t consumed;
54      tre_char_t *wcptr = wregex;
55#ifdef HAVE_MBSTATE_T
56      mbstate_t state;
57      memset(&state, '\0', sizeof(state));
58#endif /* HAVE_MBSTATE_T */
59      while (n > 0)
60	{
61	  consumed = tre_mbrtowc(wcptr, regex, n, &state);
62
63	  switch (consumed)
64	    {
65	    case 0:
66	      if (*regex == '\0')
67		consumed = 1;
68	      else
69		{
70		  xfree(wregex);
71		  return REG_BADPAT;
72		}
73	      break;
74	    case (size_t)-1:
75	      DPRINT(("mbrtowc: error %d: %s.\n", errno, strerror(errno)));
76	      xfree(wregex);
77	      return REG_BADPAT;
78	    case (size_t)-2:
79	      /* The last character wasn't complete.  Let's not call it a
80		 fatal error. */
81	      consumed = n;
82	      break;
83	    }
84	  regex += consumed;
85	  n -= consumed;
86	  wcptr++;
87	}
88      wlen = wcptr - wregex;
89    }
90#endif /* TRE_MULTIBYTE */
91
92  wregex[wlen] = L'\0';
93  ret = tre_compile(preg, wregex, wlen, cflags);
94  xfree(wregex);
95#else /* !TRE_WCHAR */
96  ret = tre_compile(preg, (const tre_char_t *)regex, n, cflags);
97#endif /* !TRE_WCHAR */
98
99  return ret;
100}
101
102#ifdef REG_USEBYTES
103/* this version takes bytes literally, to be used with raw vectors */
104int
105tre_regncompb(regex_t *preg, const char *regex, size_t n, int cflags)
106{
107  int ret;
108#if TRE_WCHAR /* wide chars = we need to convert it all to the wide format */
109  tre_char_t *wregex;
110  size_t i;
111
112  wregex = xmalloc(sizeof(tre_char_t) * n);
113  if (wregex == NULL)
114    return REG_ESPACE;
115
116  for (i = 0; i < n; i++)
117    wregex[i] = (tre_char_t) ((unsigned char) regex[i]);
118
119  ret = tre_compile(preg, wregex, n, cflags | REG_USEBYTES);
120  xfree(wregex);
121#else /* !TRE_WCHAR */
122  ret = tre_compile(preg, (const tre_char_t *)regex, n, cflags | REG_USEBYTES);
123#endif /* !TRE_WCHAR */
124
125  return ret;
126}
127#endif /* REG_USEBYTES */
128
129int
130tre_regcomp(regex_t *preg, const char *regex, int cflags)
131{
132  return tre_regncomp(preg, regex, regex ? strlen(regex) : 0, cflags);
133}
134
135#ifdef REG_USEBYTES
136int
137tre_regcompb(regex_t *preg, const char *regex, int cflags)
138{
139  int ret;
140  tre_char_t *wregex;
141  size_t wlen, n = strlen(regex);
142  unsigned int i;
143  const unsigned char *str = (const unsigned char *)regex;
144  tre_char_t *wstr;
145
146  wregex = xmalloc(sizeof(tre_char_t) * (n + 1));
147  if (wregex == NULL) return REG_ESPACE;
148  wstr = wregex;
149
150  for (i = 0; i < n; i++) *(wstr++) = *(str++);
151  wlen = n;
152  wregex[wlen] = L'\0';
153  ret = tre_compile(preg, wregex, wlen, cflags | REG_USEBYTES);
154  xfree(wregex);
155  return ret;
156}
157#endif /* REG_USEBYTES */
158
159
160#ifdef TRE_WCHAR
161int
162tre_regwncomp(regex_t *preg, const wchar_t *regex, size_t n, int cflags)
163{
164  return tre_compile(preg, regex, n, cflags);
165}
166
167int
168tre_regwcomp(regex_t *preg, const wchar_t *regex, int cflags)
169{
170  return tre_compile(preg, regex, regex ? wcslen(regex) : 0, cflags);
171}
172#endif /* TRE_WCHAR */
173
174void
175tre_regfree(regex_t *preg)
176{
177  tre_free(preg);
178}
179
180/* EOF */
181