1/* $FreeBSD: releng/10.2/usr.bin/grep/regex/tre-compile.c 226035 2011-10-05 09:56:43Z gabor $ */ 2 3#include "glue.h" 4 5#include <stdio.h> 6#include <assert.h> 7#include <errno.h> 8#include <regex.h> 9#include <string.h> 10#include <wchar.h> 11 12#include "xmalloc.h" 13 14int 15tre_convert_pattern(const char *regex, size_t n, tre_char_t **w, 16 size_t *wn) 17{ 18#if TRE_WCHAR 19 tre_char_t *wregex; 20 size_t wlen; 21 22 wregex = xmalloc(sizeof(tre_char_t) * (n + 1)); 23 if (wregex == NULL) 24 return REG_ESPACE; 25 26 /* If the current locale uses the standard single byte encoding of 27 characters, we don't do a multibyte string conversion. If we did, 28 many applications which use the default locale would break since 29 the default "C" locale uses the 7-bit ASCII character set, and 30 all characters with the eighth bit set would be considered invalid. */ 31#if TRE_MULTIBYTE 32 if (TRE_MB_CUR_MAX == 1) 33#endif /* TRE_MULTIBYTE */ 34 { 35 unsigned int i; 36 const unsigned char *str = (const unsigned char *)regex; 37 tre_char_t *wstr = wregex; 38 39 for (i = 0; i < n; i++) 40 *(wstr++) = *(str++); 41 wlen = n; 42 } 43#if TRE_MULTIBYTE 44 else 45 { 46 int consumed; 47 tre_char_t *wcptr = wregex; 48#ifdef HAVE_MBSTATE_T 49 mbstate_t state; 50 memset(&state, '\0', sizeof(state)); 51#endif /* HAVE_MBSTATE_T */ 52 while (n > 0) 53 { 54 consumed = tre_mbrtowc(wcptr, regex, n, &state); 55 56 switch (consumed) 57 { 58 case 0: 59 if (*regex == '\0') 60 consumed = 1; 61 else 62 { 63 xfree(wregex); 64 return REG_BADPAT; 65 } 66 break; 67 case -1: 68 DPRINT(("mbrtowc: error %d: %s.\n", errno, strerror(errno))); 69 xfree(wregex); 70 return REG_BADPAT; 71 case -2: 72 /* The last character wasn't complete. Let's not call it a 73 fatal error. */ 74 consumed = n; 75 break; 76 } 77 regex += consumed; 78 n -= consumed; 79 wcptr++; 80 } 81 wlen = wcptr - wregex; 82 } 83#endif /* TRE_MULTIBYTE */ 84 wregex[wlen] = L'\0'; 85 *w = wregex; 86 *wn = wlen; 87 return REG_OK; 88#else /* !TRE_WCHAR */ 89 { 90 *w = (tre_char_t * const *)regex; 91 *wn = n; 92 return REG_OK; 93 } 94#endif /* !TRE_WCHAR */ 95} 96 97void 98tre_free_pattern(tre_char_t *wregex) 99{ 100#if TRE_WCHAR 101 xfree(wregex); 102#endif 103} 104