tre-compile.c revision 226035
150477Speter/* $FreeBSD: head/usr.bin/grep/regex/tre-compile.c 226035 2011-10-05 09:56:43Z gabor $ */ 244344Smckusick 34Srgrimes#include "glue.h" 4185515Skensmith 54Srgrimes#include <stdio.h> 644344Smckusick#include <assert.h> 7185515Skensmith#include <errno.h> 844344Smckusick#include <regex.h> 9185515Skensmith#include <string.h> 1044344Smckusick#include <wchar.h> 1144344Smckusick 1244344Smckusick#include "xmalloc.h" 134Srgrimes 1444344Smckusickint 154Srgrimestre_convert_pattern(const char *regex, size_t n, tre_char_t **w, 16185515Skensmith size_t *wn) 17185515Skensmith{ 184Srgrimes#if TRE_WCHAR 1944344Smckusick tre_char_t *wregex; 2044344Smckusick size_t wlen; 2144344Smckusick 2244344Smckusick wregex = xmalloc(sizeof(tre_char_t) * (n + 1)); 2344344Smckusick if (wregex == NULL) 24185515Skensmith return REG_ESPACE; 2544344Smckusick 2644344Smckusick /* If the current locale uses the standard single byte encoding of 27185515Skensmith characters, we don't do a multibyte string conversion. If we did, 28185515Skensmith many applications which use the default locale would break since 29185515Skensmith the default "C" locale uses the 7-bit ASCII character set, and 30185515Skensmith all characters with the eighth bit set would be considered invalid. */ 31185515Skensmith#if TRE_MULTIBYTE 3244344Smckusick if (TRE_MB_CUR_MAX == 1) 3344344Smckusick#endif /* TRE_MULTIBYTE */ 34185515Skensmith { 35185515Skensmith unsigned int i; 364Srgrimes const unsigned char *str = (const unsigned char *)regex; 3744344Smckusick tre_char_t *wstr = wregex; 3844344Smckusick 3944344Smckusick for (i = 0; i < n; i++) 40 *(wstr++) = *(str++); 41 wlen = n; 42 } 43#if TRE_MULTIBYTE 44 else 45 { 46 int consumed; 47 tre_char_t *wcptr = wregex; 48#ifdef HAVE_MBSTATE_T 49 mbstate_t state; 50 memset(&state, '\0', sizeof(state)); 51#endif /* HAVE_MBSTATE_T */ 52 while (n > 0) 53 { 54 consumed = tre_mbrtowc(wcptr, regex, n, &state); 55 56 switch (consumed) 57 { 58 case 0: 59 if (*regex == '\0') 60 consumed = 1; 61 else 62 { 63 xfree(wregex); 64 return REG_BADPAT; 65 } 66 break; 67 case -1: 68 DPRINT(("mbrtowc: error %d: %s.\n", errno, strerror(errno))); 69 xfree(wregex); 70 return REG_BADPAT; 71 case -2: 72 /* The last character wasn't complete. Let's not call it a 73 fatal error. */ 74 consumed = n; 75 break; 76 } 77 regex += consumed; 78 n -= consumed; 79 wcptr++; 80 } 81 wlen = wcptr - wregex; 82 } 83#endif /* TRE_MULTIBYTE */ 84 wregex[wlen] = L'\0'; 85 *w = wregex; 86 *wn = wlen; 87 return REG_OK; 88#else /* !TRE_WCHAR */ 89 { 90 *w = (tre_char_t * const *)regex; 91 *wn = n; 92 return REG_OK; 93 } 94#endif /* !TRE_WCHAR */ 95} 96 97void 98tre_free_pattern(tre_char_t *wregex) 99{ 100#if TRE_WCHAR 101 xfree(wregex); 102#endif 103} 104