1226035Sgabor/* $FreeBSD: releng/10.3/usr.bin/grep/regex/tre-compile.c 226035 2011-10-05 09:56:43Z gabor $ */ 2226035Sgabor 3226035Sgabor#include "glue.h" 4226035Sgabor 5226035Sgabor#include <stdio.h> 6226035Sgabor#include <assert.h> 7226035Sgabor#include <errno.h> 8226035Sgabor#include <regex.h> 9226035Sgabor#include <string.h> 10226035Sgabor#include <wchar.h> 11226035Sgabor 12226035Sgabor#include "xmalloc.h" 13226035Sgabor 14226035Sgaborint 15226035Sgabortre_convert_pattern(const char *regex, size_t n, tre_char_t **w, 16226035Sgabor size_t *wn) 17226035Sgabor{ 18226035Sgabor#if TRE_WCHAR 19226035Sgabor tre_char_t *wregex; 20226035Sgabor size_t wlen; 21226035Sgabor 22226035Sgabor wregex = xmalloc(sizeof(tre_char_t) * (n + 1)); 23226035Sgabor if (wregex == NULL) 24226035Sgabor return REG_ESPACE; 25226035Sgabor 26226035Sgabor /* If the current locale uses the standard single byte encoding of 27226035Sgabor characters, we don't do a multibyte string conversion. If we did, 28226035Sgabor many applications which use the default locale would break since 29226035Sgabor the default "C" locale uses the 7-bit ASCII character set, and 30226035Sgabor all characters with the eighth bit set would be considered invalid. */ 31226035Sgabor#if TRE_MULTIBYTE 32226035Sgabor if (TRE_MB_CUR_MAX == 1) 33226035Sgabor#endif /* TRE_MULTIBYTE */ 34226035Sgabor { 35226035Sgabor unsigned int i; 36226035Sgabor const unsigned char *str = (const unsigned char *)regex; 37226035Sgabor tre_char_t *wstr = wregex; 38226035Sgabor 39226035Sgabor for (i = 0; i < n; i++) 40226035Sgabor *(wstr++) = *(str++); 41226035Sgabor wlen = n; 42226035Sgabor } 43226035Sgabor#if TRE_MULTIBYTE 44226035Sgabor else 45226035Sgabor { 46226035Sgabor int consumed; 47226035Sgabor tre_char_t *wcptr = wregex; 48226035Sgabor#ifdef HAVE_MBSTATE_T 49226035Sgabor mbstate_t state; 50226035Sgabor memset(&state, '\0', sizeof(state)); 51226035Sgabor#endif /* HAVE_MBSTATE_T */ 52226035Sgabor while (n > 0) 53226035Sgabor { 54226035Sgabor consumed = tre_mbrtowc(wcptr, regex, n, &state); 55226035Sgabor 56226035Sgabor switch (consumed) 57226035Sgabor { 58226035Sgabor case 0: 59226035Sgabor if (*regex == '\0') 60226035Sgabor consumed = 1; 61226035Sgabor else 62226035Sgabor { 63226035Sgabor xfree(wregex); 64226035Sgabor return REG_BADPAT; 65226035Sgabor } 66226035Sgabor break; 67226035Sgabor case -1: 68226035Sgabor DPRINT(("mbrtowc: error %d: %s.\n", errno, strerror(errno))); 69226035Sgabor xfree(wregex); 70226035Sgabor return REG_BADPAT; 71226035Sgabor case -2: 72226035Sgabor /* The last character wasn't complete. Let's not call it a 73226035Sgabor fatal error. */ 74226035Sgabor consumed = n; 75226035Sgabor break; 76226035Sgabor } 77226035Sgabor regex += consumed; 78226035Sgabor n -= consumed; 79226035Sgabor wcptr++; 80226035Sgabor } 81226035Sgabor wlen = wcptr - wregex; 82226035Sgabor } 83226035Sgabor#endif /* TRE_MULTIBYTE */ 84226035Sgabor wregex[wlen] = L'\0'; 85226035Sgabor *w = wregex; 86226035Sgabor *wn = wlen; 87226035Sgabor return REG_OK; 88226035Sgabor#else /* !TRE_WCHAR */ 89226035Sgabor { 90226035Sgabor *w = (tre_char_t * const *)regex; 91226035Sgabor *wn = n; 92226035Sgabor return REG_OK; 93226035Sgabor } 94226035Sgabor#endif /* !TRE_WCHAR */ 95226035Sgabor} 96226035Sgabor 97226035Sgaborvoid 98226035Sgabortre_free_pattern(tre_char_t *wregex) 99226035Sgabor{ 100226035Sgabor#if TRE_WCHAR 101226035Sgabor xfree(wregex); 102226035Sgabor#endif 103226035Sgabor} 104