1/* 2 * Copyright (C) 2000-2002 Free Software Foundation, Inc. 3 * This file is part of the GNU LIBICONV Library. 4 * 5 * The GNU LIBICONV Library is free software; you can redistribute it 6 * and/or modify it under the terms of the GNU Library General Public 7 * License as published by the Free Software Foundation; either version 2 8 * of the License, or (at your option) any later version. 9 * 10 * The GNU LIBICONV Library is distributed in the hope that it will be 11 * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 * Library General Public License for more details. 14 * 15 * You should have received a copy of the GNU Library General Public 16 * License along with the GNU LIBICONV Library; see the file COPYING.LIB. 17 * If not, write to the Free Software Foundation, Inc., 59 Temple Place - 18 * Suite 330, Boston, MA 02111-1307, USA. 19 */ 20 21/* This file defines three conversion loops: 22 - from wchar_t to anything else, 23 - from anything else to wchar_t, 24 - from wchar_t to wchar_t. 25 */ 26 27#if HAVE_WCRTOMB || HAVE_MBRTOWC 28# include <wchar.h> 29# define BUF_SIZE 64 /* assume MB_LEN_MAX <= 64 */ 30 /* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */ 31 extern size_t mbrtowc (); 32# ifdef mbstate_t 33# define mbrtowc(pwc, s, n, ps) (mbrtowc)(pwc, s, n, 0) 34# define mbsinit(ps) 1 35# endif 36# ifndef mbsinit 37# if !HAVE_MBSINIT 38# define mbsinit(ps) 1 39# endif 40# endif 41#else 42# ifndef mbstate_t 43 typedef int mbstate_t; 44# endif 45#endif 46 47/* 48 * The first two conversion loops have an extended conversion descriptor. 49 */ 50struct wchar_conv_struct { 51 struct conv_struct parent; 52 mbstate_t state; 53}; 54 55 56#if HAVE_WCRTOMB 57 58/* From wchar_t to anything else. */ 59 60static size_t wchar_from_loop_convert (iconv_t icd, 61 const char* * inbuf, size_t *inbytesleft, 62 char* * outbuf, size_t *outbytesleft) 63{ 64 struct wchar_conv_struct * wcd = (struct wchar_conv_struct *) icd; 65 size_t result = 0; 66 while (*inbytesleft >= sizeof(wchar_t)) { 67 const wchar_t * inptr = (const wchar_t *) *inbuf; 68 size_t inleft = *inbytesleft; 69 char buf[BUF_SIZE]; 70 mbstate_t state = wcd->state; 71 size_t bufcount = 0; 72 while (inleft >= sizeof(wchar_t)) { 73 /* Convert one wchar_t to multibyte representation. */ 74 size_t count = wcrtomb(buf+bufcount,*inptr,&state); 75 if (count == (size_t)(-1)) { 76 /* Invalid input. */ 77 if (!wcd->parent.discard_ilseq) { 78 errno = EILSEQ; 79 return -1; 80 } 81 count = 0; 82 } 83 inptr++; 84 inleft -= sizeof(wchar_t); 85 bufcount += count; 86 if (count == 0) { 87 /* Continue, append next wchar_t. */ 88 } else { 89 /* Attempt to convert the accumulated multibyte representations 90 to the target encoding. */ 91 const char* bufptr = buf; 92 size_t bufleft = bufcount; 93 char* outptr = *outbuf; 94 size_t outleft = *outbytesleft; 95 size_t res = unicode_loop_convert(&wcd->parent, 96 &bufptr,&bufleft, 97 &outptr,&outleft); 98 if (res == (size_t)(-1)) { 99 if (errno == EILSEQ) 100 /* Invalid input. */ 101 return -1; 102 else if (errno == E2BIG) 103 /* Output buffer too small. */ 104 return -1; 105 else if (errno == EINVAL) { 106 /* Continue, append next wchar_t, but avoid buffer overrun. */ 107 if (bufcount + MB_CUR_MAX > BUF_SIZE) 108 abort(); 109 } else 110 abort(); 111 } else { 112 /* Successful conversion. */ 113 wcd->state = state; 114 *inbuf = (const char *) inptr; 115 *inbytesleft = inleft; 116 *outbuf = outptr; 117 *outbytesleft = outleft; 118 result += res; 119 break; 120 } 121 } 122 } 123 } 124 return result; 125} 126 127static size_t wchar_from_loop_reset (iconv_t icd, 128 char* * outbuf, size_t *outbytesleft) 129{ 130 struct wchar_conv_struct * wcd = (struct wchar_conv_struct *) icd; 131 if (outbuf == NULL || *outbuf == NULL) { 132 /* Reset the states. */ 133 memset(&wcd->state,'\0',sizeof(mbstate_t)); 134 return unicode_loop_reset(&wcd->parent,NULL,NULL); 135 } else { 136 if (!mbsinit(&wcd->state)) { 137 mbstate_t state = wcd->state; 138 char buf[BUF_SIZE]; 139 size_t bufcount = wcrtomb(buf,(wchar_t)0,&state); 140 if (bufcount == (size_t)(-1) || bufcount == 0 || buf[bufcount-1] != '\0') 141 abort(); 142 else { 143 const char* bufptr = buf; 144 size_t bufleft = bufcount-1; 145 char* outptr = *outbuf; 146 size_t outleft = *outbytesleft; 147 size_t res = unicode_loop_convert(&wcd->parent, 148 &bufptr,&bufleft, 149 &outptr,&outleft); 150 if (res == (size_t)(-1)) { 151 if (errno == E2BIG) 152 return -1; 153 else 154 abort(); 155 } else { 156 res = unicode_loop_reset(&wcd->parent,&outptr,&outleft); 157 if (res == (size_t)(-1)) 158 return res; 159 else { 160 /* Successful. */ 161 wcd->state = state; 162 *outbuf = outptr; 163 *outbytesleft = outleft; 164 return 0; 165 } 166 } 167 } 168 } else 169 return unicode_loop_reset(&wcd->parent,outbuf,outbytesleft); 170 } 171} 172 173#endif 174 175 176#if HAVE_MBRTOWC 177 178/* From anything else to wchar_t. */ 179 180static size_t wchar_to_loop_convert (iconv_t icd, 181 const char* * inbuf, size_t *inbytesleft, 182 char* * outbuf, size_t *outbytesleft) 183{ 184 struct wchar_conv_struct * wcd = (struct wchar_conv_struct *) icd; 185 size_t result = 0; 186 while (*inbytesleft > 0) { 187 size_t incount; 188 for (incount = 1; incount <= *inbytesleft; incount++) { 189 char buf[BUF_SIZE]; 190 const char* inptr = *inbuf; 191 size_t inleft = incount; 192 char* bufptr = buf; 193 size_t bufleft = BUF_SIZE; 194 size_t res = unicode_loop_convert(&wcd->parent, 195 &inptr,&inleft, 196 &bufptr,&bufleft); 197 if (res == (size_t)(-1)) { 198 if (errno == EILSEQ) 199 /* Invalid input. */ 200 return -1; 201 else if (errno == EINVAL) { 202 /* Incomplete input. Next try with one more input byte. */ 203 } else 204 /* E2BIG shouldn't occur. */ 205 abort(); 206 } else { 207 /* Successful conversion. */ 208 size_t bufcount = bufptr-buf; /* = BUF_SIZE-bufleft */ 209 mbstate_t state = wcd->state; 210 wchar_t wc; 211 res = mbrtowc(&wc,buf,bufcount,&state); 212 if (res == (size_t)(-2)) { 213 /* Next try with one more input byte. */ 214 } else { 215 if (res == (size_t)(-1)) { 216 /* Invalid input. */ 217 if (!wcd->parent.discard_ilseq) 218 return -1; 219 } else { 220 if (*outbytesleft < sizeof(wchar_t)) { 221 errno = E2BIG; 222 return -1; 223 } 224 *(wchar_t*) *outbuf = wc; 225 /* Restoring the state is not needed because it is the initial 226 state anyway: For all known locale encodings, the multibyte 227 to wchar_t conversion doesn't have shift state, and we have 228 excluded partial accumulated characters. */ 229 /* wcd->state = state; */ 230 *outbuf += sizeof(wchar_t); 231 *outbytesleft -= sizeof(wchar_t); 232 } 233 *inbuf += incount; 234 *inbytesleft -= incount; 235 result += res; 236 break; 237 } 238 } 239 } 240 } 241 return result; 242} 243 244static size_t wchar_to_loop_reset (iconv_t icd, 245 char* * outbuf, size_t *outbytesleft) 246{ 247 struct wchar_conv_struct * wcd = (struct wchar_conv_struct *) icd; 248 size_t res = unicode_loop_reset(&wcd->parent,outbuf,outbytesleft); 249 if (res == (size_t)(-1)) 250 return res; 251 memset(&wcd->state,0,sizeof(mbstate_t)); 252 return 0; 253} 254 255#endif 256 257 258/* From wchar_t to wchar_t. */ 259 260static size_t wchar_id_loop_convert (iconv_t icd, 261 const char* * inbuf, size_t *inbytesleft, 262 char* * outbuf, size_t *outbytesleft) 263{ 264 const wchar_t* inptr = (const wchar_t*) *inbuf; 265 size_t inleft = *inbytesleft / sizeof(wchar_t); 266 wchar_t* outptr = (wchar_t*) *outbuf; 267 size_t outleft = *outbytesleft / sizeof(wchar_t); 268 size_t count = (inleft <= outleft ? inleft : outleft); 269 if (count > 0) { 270 *inbytesleft -= count * sizeof(wchar_t); 271 *outbytesleft -= count * sizeof(wchar_t); 272 do 273 *outptr++ = *inptr++; 274 while (--count > 0); 275 *inbuf = (const char*) inptr; 276 *outbuf = (char*) outptr; 277 } 278 return 0; 279} 280 281static size_t wchar_id_loop_reset (iconv_t icd, 282 char* * outbuf, size_t *outbytesleft) 283{ 284 return 0; 285} 286