1226035Sgabor/* $FreeBSD: stable/11/usr.bin/grep/regex/tre-compile.c 322557 2017-08-16 00:23:59Z kevans $ */ 2226035Sgabor 3226035Sgabor#include "glue.h" 4226035Sgabor 5226035Sgabor#include <stdio.h> 6226035Sgabor#include <assert.h> 7226035Sgabor#include <errno.h> 8226035Sgabor#include <regex.h> 9226035Sgabor#include <string.h> 10226035Sgabor#include <wchar.h> 11226035Sgabor 12226035Sgaborint 13226035Sgabortre_convert_pattern(const char *regex, size_t n, tre_char_t **w, 14226035Sgabor size_t *wn) 15226035Sgabor{ 16226035Sgabor#if TRE_WCHAR 17226035Sgabor tre_char_t *wregex; 18226035Sgabor size_t wlen; 19226035Sgabor 20322557Skevans wregex = malloc(sizeof(tre_char_t) * (n + 1)); 21226035Sgabor if (wregex == NULL) 22226035Sgabor return REG_ESPACE; 23226035Sgabor 24226035Sgabor /* If the current locale uses the standard single byte encoding of 25226035Sgabor characters, we don't do a multibyte string conversion. If we did, 26226035Sgabor many applications which use the default locale would break since 27226035Sgabor the default "C" locale uses the 7-bit ASCII character set, and 28226035Sgabor all characters with the eighth bit set would be considered invalid. */ 29226035Sgabor#if TRE_MULTIBYTE 30226035Sgabor if (TRE_MB_CUR_MAX == 1) 31226035Sgabor#endif /* TRE_MULTIBYTE */ 32226035Sgabor { 33226035Sgabor unsigned int i; 34226035Sgabor const unsigned char *str = (const unsigned char *)regex; 35226035Sgabor tre_char_t *wstr = wregex; 36226035Sgabor 37226035Sgabor for (i = 0; i < n; i++) 38226035Sgabor *(wstr++) = *(str++); 39226035Sgabor wlen = n; 40226035Sgabor } 41226035Sgabor#if TRE_MULTIBYTE 42226035Sgabor else 43226035Sgabor { 44226035Sgabor int consumed; 45226035Sgabor tre_char_t *wcptr = wregex; 46226035Sgabor#ifdef HAVE_MBSTATE_T 47226035Sgabor mbstate_t state; 48226035Sgabor memset(&state, '\0', sizeof(state)); 49226035Sgabor#endif /* HAVE_MBSTATE_T */ 50226035Sgabor while (n > 0) 51226035Sgabor { 52226035Sgabor consumed = tre_mbrtowc(wcptr, regex, n, &state); 53226035Sgabor 54226035Sgabor switch (consumed) 55226035Sgabor { 56226035Sgabor case 0: 57226035Sgabor if (*regex == '\0') 58226035Sgabor consumed = 1; 59226035Sgabor else 60226035Sgabor { 61322557Skevans free(wregex); 62226035Sgabor return REG_BADPAT; 63226035Sgabor } 64226035Sgabor break; 65226035Sgabor case -1: 66226035Sgabor DPRINT(("mbrtowc: error %d: %s.\n", errno, strerror(errno))); 67322557Skevans free(wregex); 68226035Sgabor return REG_BADPAT; 69226035Sgabor case -2: 70226035Sgabor /* The last character wasn't complete. Let's not call it a 71226035Sgabor fatal error. */ 72226035Sgabor consumed = n; 73226035Sgabor break; 74226035Sgabor } 75226035Sgabor regex += consumed; 76226035Sgabor n -= consumed; 77226035Sgabor wcptr++; 78226035Sgabor } 79226035Sgabor wlen = wcptr - wregex; 80226035Sgabor } 81226035Sgabor#endif /* TRE_MULTIBYTE */ 82226035Sgabor wregex[wlen] = L'\0'; 83226035Sgabor *w = wregex; 84226035Sgabor *wn = wlen; 85226035Sgabor return REG_OK; 86226035Sgabor#else /* !TRE_WCHAR */ 87226035Sgabor { 88226035Sgabor *w = (tre_char_t * const *)regex; 89226035Sgabor *wn = n; 90226035Sgabor return REG_OK; 91226035Sgabor } 92226035Sgabor#endif /* !TRE_WCHAR */ 93226035Sgabor} 94226035Sgabor 95226035Sgaborvoid 96226035Sgabortre_free_pattern(tre_char_t *wregex) 97226035Sgabor{ 98226035Sgabor#if TRE_WCHAR 99322557Skevans free(wregex); 100226035Sgabor#endif 101226035Sgabor} 102