1/* $FreeBSD: stable/11/usr.bin/grep/regex/tre-compile.c 322557 2017-08-16 00:23:59Z kevans $ */ 2 3#include "glue.h" 4 5#include <stdio.h> 6#include <assert.h> 7#include <errno.h> 8#include <regex.h> 9#include <string.h> 10#include <wchar.h> 11 12int 13tre_convert_pattern(const char *regex, size_t n, tre_char_t **w, 14 size_t *wn) 15{ 16#if TRE_WCHAR 17 tre_char_t *wregex; 18 size_t wlen; 19 20 wregex = malloc(sizeof(tre_char_t) * (n + 1)); 21 if (wregex == NULL) 22 return REG_ESPACE; 23 24 /* If the current locale uses the standard single byte encoding of 25 characters, we don't do a multibyte string conversion. If we did, 26 many applications which use the default locale would break since 27 the default "C" locale uses the 7-bit ASCII character set, and 28 all characters with the eighth bit set would be considered invalid. */ 29#if TRE_MULTIBYTE 30 if (TRE_MB_CUR_MAX == 1) 31#endif /* TRE_MULTIBYTE */ 32 { 33 unsigned int i; 34 const unsigned char *str = (const unsigned char *)regex; 35 tre_char_t *wstr = wregex; 36 37 for (i = 0; i < n; i++) 38 *(wstr++) = *(str++); 39 wlen = n; 40 } 41#if TRE_MULTIBYTE 42 else 43 { 44 int consumed; 45 tre_char_t *wcptr = wregex; 46#ifdef HAVE_MBSTATE_T 47 mbstate_t state; 48 memset(&state, '\0', sizeof(state)); 49#endif /* HAVE_MBSTATE_T */ 50 while (n > 0) 51 { 52 consumed = tre_mbrtowc(wcptr, regex, n, &state); 53 54 switch (consumed) 55 { 56 case 0: 57 if (*regex == '\0') 58 consumed = 1; 59 else 60 { 61 free(wregex); 62 return REG_BADPAT; 63 } 64 break; 65 case -1: 66 DPRINT(("mbrtowc: error %d: %s.\n", errno, strerror(errno))); 67 free(wregex); 68 return REG_BADPAT; 69 case -2: 70 /* The last character wasn't complete. Let's not call it a 71 fatal error. */ 72 consumed = n; 73 break; 74 } 75 regex += consumed; 76 n -= consumed; 77 wcptr++; 78 } 79 wlen = wcptr - wregex; 80 } 81#endif /* TRE_MULTIBYTE */ 82 wregex[wlen] = L'\0'; 83 *w = wregex; 84 *wn = wlen; 85 return REG_OK; 86#else /* !TRE_WCHAR */ 87 { 88 *w = (tre_char_t * const *)regex; 89 *wn = n; 90 return REG_OK; 91 } 92#endif /* !TRE_WCHAR */ 93} 94 95void 96tre_free_pattern(tre_char_t *wregex) 97{ 98#if TRE_WCHAR 99 free(wregex); 100#endif 101} 102