1/* 2 * Copyright (C) 2000-2002, 2005-2006, 2008-2009, 2011 Free Software Foundation, Inc. 3 * This file is part of the GNU LIBICONV Library. 4 * 5 * The GNU LIBICONV Library is free software; you can redistribute it 6 * and/or modify it under the terms of the GNU Library General Public 7 * License as published by the Free Software Foundation; either version 2 8 * of the License, or (at your option) any later version. 9 * 10 * The GNU LIBICONV Library is distributed in the hope that it will be 11 * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 * Library General Public License for more details. 14 * 15 * You should have received a copy of the GNU Library General Public 16 * License along with the GNU LIBICONV Library; see the file COPYING.LIB. 17 * If not, write to the Free Software Foundation, Inc., 51 Franklin Street, 18 * Fifth Floor, Boston, MA 02110-1301, USA. 19 */ 20 21/* This file defines three conversion loops: 22 - from wchar_t to anything else, 23 - from anything else to wchar_t, 24 - from wchar_t to wchar_t. 25 */ 26 27#if HAVE_WCRTOMB || HAVE_MBRTOWC 28/* Tru64 with Desktop Toolkit C has a bug: <stdio.h> must be included before 29 <wchar.h>. 30 BSD/OS 4.0.1 has a bug: <stddef.h>, <stdio.h> and <time.h> must be 31 included before <wchar.h>. 32 In some builds of uClibc, <wchar.h> is nonexistent and wchar_t is defined 33 by <stddef.h>. */ 34# include <stddef.h> 35# include <stdio.h> 36# include <time.h> 37# include <wchar.h> 38# define BUF_SIZE 64 /* assume MB_LEN_MAX <= 64 */ 39 /* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */ 40 extern size_t mbrtowc (); 41# ifdef mbstate_t 42# define mbrtowc(pwc, s, n, ps) (mbrtowc)(pwc, s, n, 0) 43# define mbsinit(ps) 1 44# endif 45# ifndef mbsinit 46# if !HAVE_MBSINIT 47# define mbsinit(ps) 1 48# endif 49# endif 50#endif 51 52/* 53 * The first two conversion loops have an extended conversion descriptor. 54 */ 55struct wchar_conv_struct { 56 struct conv_struct parent; 57#if HAVE_WCRTOMB || HAVE_MBRTOWC 58 mbstate_t state; 59#endif 60}; 61 62 63#if HAVE_WCRTOMB 64 65/* From wchar_t to anything else. */ 66 67#ifndef LIBICONV_PLUG 68 69#if 0 70 71struct wc_to_mb_fallback_locals { 72 struct wchar_conv_struct * l_wcd; 73 char* l_outbuf; 74 size_t l_outbytesleft; 75 int l_errno; 76}; 77 78/* A callback that writes a string given in the locale encoding. */ 79static void wc_to_mb_write_replacement (const char *buf, size_t buflen, 80 void* callback_arg) 81{ 82 struct wc_to_mb_fallback_locals * plocals = 83 (struct wc_to_mb_fallback_locals *) callback_arg; 84 /* Do nothing if already encountered an error in a previous call. */ 85 if (plocals->l_errno == 0) { 86 /* Attempt to convert the passed buffer to the target encoding. 87 Here we don't support characters split across multiple calls. */ 88 const char* bufptr = buf; 89 size_t bufleft = buflen; 90 size_t res = unicode_loop_convert(&plocals->l_wcd->parent, 91 &bufptr,&bufleft, 92 &plocals->l_outbuf,&plocals->l_outbytesleft); 93 if (res == (size_t)(-1)) { 94 if (errno == EILSEQ || errno == EINVAL) 95 /* Invalid buf contents. */ 96 plocals->l_errno = EILSEQ; 97 else if (errno == E2BIG) 98 /* Output buffer too small. */ 99 plocals->l_errno = E2BIG; 100 else 101 abort(); 102 } else { 103 /* Successful conversion. */ 104 if (bufleft > 0) 105 abort(); 106 } 107 } 108} 109 110#else 111 112struct wc_to_mb_fallback_locals { 113 char* l_outbuf; 114 size_t l_outbytesleft; 115 int l_errno; 116}; 117 118/* A callback that writes a string given in the target encoding. */ 119static void wc_to_mb_write_replacement (const char *buf, size_t buflen, 120 void* callback_arg) 121{ 122 struct wc_to_mb_fallback_locals * plocals = 123 (struct wc_to_mb_fallback_locals *) callback_arg; 124 /* Do nothing if already encountered an error in a previous call. */ 125 if (plocals->l_errno == 0) { 126 /* Attempt to copy the passed buffer to the output buffer. */ 127 if (plocals->l_outbytesleft < buflen) 128 plocals->l_errno = E2BIG; 129 else { 130 memcpy(plocals->l_outbuf, buf, buflen); 131 plocals->l_outbuf += buflen; 132 plocals->l_outbytesleft -= buflen; 133 } 134 } 135} 136 137#endif 138 139#endif /* !LIBICONV_PLUG */ 140 141static size_t wchar_from_loop_convert (iconv_t icd, 142 const char* * inbuf, size_t *inbytesleft, 143 char* * outbuf, size_t *outbytesleft) 144{ 145 struct wchar_conv_struct * wcd = (struct wchar_conv_struct *) icd; 146 size_t result = 0; 147 while (*inbytesleft >= sizeof(wchar_t)) { 148 const wchar_t * inptr = (const wchar_t *) *inbuf; 149 size_t inleft = *inbytesleft; 150 char buf[BUF_SIZE]; 151 mbstate_t state = wcd->state; 152 size_t bufcount = 0; 153 while (inleft >= sizeof(wchar_t)) { 154 /* Convert one wchar_t to multibyte representation. */ 155 size_t count = wcrtomb(buf+bufcount,*inptr,&state); 156 if (count == (size_t)(-1)) { 157 /* Invalid input. */ 158 if (wcd->parent.discard_ilseq) { 159 count = 0; 160 } 161 #ifndef LIBICONV_PLUG 162 else if (wcd->parent.fallbacks.wc_to_mb_fallback != NULL) { 163 /* Drop the contents of buf[] accumulated so far, and instead 164 pass all queued wide characters to the fallback handler. */ 165 struct wc_to_mb_fallback_locals locals; 166 const wchar_t * fallback_inptr; 167 #if 0 168 locals.l_wcd = wcd; 169 #endif 170 locals.l_outbuf = *outbuf; 171 locals.l_outbytesleft = *outbytesleft; 172 locals.l_errno = 0; 173 for (fallback_inptr = (const wchar_t *) *inbuf; 174 fallback_inptr <= inptr; 175 fallback_inptr++) 176 wcd->parent.fallbacks.wc_to_mb_fallback(*fallback_inptr, 177 wc_to_mb_write_replacement, 178 &locals, 179 wcd->parent.fallbacks.data); 180 if (locals.l_errno != 0) { 181 errno = locals.l_errno; 182 return -1; 183 } 184 wcd->state = state; 185 *inbuf = (const char *) (inptr + 1); 186 *inbytesleft = inleft - sizeof(wchar_t); 187 *outbuf = locals.l_outbuf; 188 *outbytesleft = locals.l_outbytesleft; 189 result += 1; 190 break; 191 } 192 #endif 193 else { 194 errno = EILSEQ; 195 return -1; 196 } 197 } 198 inptr++; 199 inleft -= sizeof(wchar_t); 200 bufcount += count; 201 if (count == 0) { 202 /* Continue, append next wchar_t. */ 203 } else { 204 /* Attempt to convert the accumulated multibyte representations 205 to the target encoding. */ 206 const char* bufptr = buf; 207 size_t bufleft = bufcount; 208 char* outptr = *outbuf; 209 size_t outleft = *outbytesleft; 210 size_t res = unicode_loop_convert(&wcd->parent, 211 &bufptr,&bufleft, 212 &outptr,&outleft); 213 if (res == (size_t)(-1)) { 214 if (errno == EILSEQ) 215 /* Invalid input. */ 216 return -1; 217 else if (errno == E2BIG) 218 /* Output buffer too small. */ 219 return -1; 220 else if (errno == EINVAL) { 221 /* Continue, append next wchar_t, but avoid buffer overrun. */ 222 if (bufcount + MB_CUR_MAX > BUF_SIZE) 223 abort(); 224 } else 225 abort(); 226 } else { 227 /* Successful conversion. */ 228 wcd->state = state; 229 *inbuf = (const char *) inptr; 230 *inbytesleft = inleft; 231 *outbuf = outptr; 232 *outbytesleft = outleft; 233 result += res; 234 break; 235 } 236 } 237 } 238 } 239 return result; 240} 241 242static size_t wchar_from_loop_reset (iconv_t icd, 243 char* * outbuf, size_t *outbytesleft) 244{ 245 struct wchar_conv_struct * wcd = (struct wchar_conv_struct *) icd; 246 if (outbuf == NULL || *outbuf == NULL) { 247 /* Reset the states. */ 248 memset(&wcd->state,'\0',sizeof(mbstate_t)); 249 return unicode_loop_reset(&wcd->parent,NULL,NULL); 250 } else { 251 if (!mbsinit(&wcd->state)) { 252 mbstate_t state = wcd->state; 253 char buf[BUF_SIZE]; 254 size_t bufcount = wcrtomb(buf,(wchar_t)0,&state); 255 if (bufcount == (size_t)(-1) || bufcount == 0 || buf[bufcount-1] != '\0') 256 abort(); 257 else { 258 const char* bufptr = buf; 259 size_t bufleft = bufcount-1; 260 char* outptr = *outbuf; 261 size_t outleft = *outbytesleft; 262 size_t res = unicode_loop_convert(&wcd->parent, 263 &bufptr,&bufleft, 264 &outptr,&outleft); 265 if (res == (size_t)(-1)) { 266 if (errno == E2BIG) 267 return -1; 268 else 269 abort(); 270 } else { 271 res = unicode_loop_reset(&wcd->parent,&outptr,&outleft); 272 if (res == (size_t)(-1)) 273 return res; 274 else { 275 /* Successful. */ 276 wcd->state = state; 277 *outbuf = outptr; 278 *outbytesleft = outleft; 279 return 0; 280 } 281 } 282 } 283 } else 284 return unicode_loop_reset(&wcd->parent,outbuf,outbytesleft); 285 } 286} 287 288#endif 289 290 291#if HAVE_MBRTOWC 292 293/* From anything else to wchar_t. */ 294 295#ifndef LIBICONV_PLUG 296 297struct mb_to_wc_fallback_locals { 298 char* l_outbuf; 299 size_t l_outbytesleft; 300 int l_errno; 301}; 302 303static void mb_to_wc_write_replacement (const wchar_t *buf, size_t buflen, 304 void* callback_arg) 305{ 306 struct mb_to_wc_fallback_locals * plocals = 307 (struct mb_to_wc_fallback_locals *) callback_arg; 308 /* Do nothing if already encountered an error in a previous call. */ 309 if (plocals->l_errno == 0) { 310 /* Attempt to copy the passed buffer to the output buffer. */ 311 if (plocals->l_outbytesleft < sizeof(wchar_t)*buflen) 312 plocals->l_errno = E2BIG; 313 else { 314 for (; buflen > 0; buf++, buflen--) { 315 *(wchar_t*) plocals->l_outbuf = *buf; 316 plocals->l_outbuf += sizeof(wchar_t); 317 plocals->l_outbytesleft -= sizeof(wchar_t); 318 } 319 } 320 } 321} 322 323#endif /* !LIBICONV_PLUG */ 324 325static size_t wchar_to_loop_convert (iconv_t icd, 326 const char* * inbuf, size_t *inbytesleft, 327 char* * outbuf, size_t *outbytesleft) 328{ 329 struct wchar_conv_struct * wcd = (struct wchar_conv_struct *) icd; 330 size_t result = 0; 331 while (*inbytesleft > 0) { 332 size_t incount; 333 for (incount = 1; ; ) { 334 /* Here incount <= *inbytesleft. */ 335 char buf[BUF_SIZE]; 336 const char* inptr = *inbuf; 337 size_t inleft = incount; 338 char* bufptr = buf; 339 size_t bufleft = BUF_SIZE; 340 size_t res = unicode_loop_convert(&wcd->parent, 341 &inptr,&inleft, 342 &bufptr,&bufleft); 343 if (res == (size_t)(-1)) { 344 if (errno == EILSEQ) 345 /* Invalid input. */ 346 return -1; 347 else if (errno == EINVAL) { 348 /* Incomplete input. Next try with one more input byte. */ 349 } else 350 /* E2BIG shouldn't occur. */ 351 abort(); 352 } else { 353 /* Successful conversion. */ 354 size_t bufcount = bufptr-buf; /* = BUF_SIZE-bufleft */ 355 mbstate_t state = wcd->state; 356 wchar_t wc; 357 res = mbrtowc(&wc,buf,bufcount,&state); 358 if (res == (size_t)(-2)) { 359 /* Next try with one more input byte. */ 360 } else { 361 if (res == (size_t)(-1)) { 362 /* Invalid input. */ 363 if (wcd->parent.discard_ilseq) { 364 } 365 #ifndef LIBICONV_PLUG 366 else if (wcd->parent.fallbacks.mb_to_wc_fallback != NULL) { 367 /* Drop the contents of buf[] accumulated so far, and instead 368 pass all queued chars to the fallback handler. */ 369 struct mb_to_wc_fallback_locals locals; 370 locals.l_outbuf = *outbuf; 371 locals.l_outbytesleft = *outbytesleft; 372 locals.l_errno = 0; 373 wcd->parent.fallbacks.mb_to_wc_fallback(*inbuf, incount, 374 mb_to_wc_write_replacement, 375 &locals, 376 wcd->parent.fallbacks.data); 377 if (locals.l_errno != 0) { 378 errno = locals.l_errno; 379 return -1; 380 } 381 /* Restoring the state is not needed because it is the initial 382 state anyway: For all known locale encodings, the multibyte 383 to wchar_t conversion doesn't have shift state, and we have 384 excluded partial accumulated characters. */ 385 /* wcd->state = state; */ 386 *inbuf += incount; 387 *inbytesleft -= incount; 388 *outbuf = locals.l_outbuf; 389 *outbytesleft = locals.l_outbytesleft; 390 result += 1; 391 break; 392 } 393 #endif 394 else 395 return -1; 396 } else { 397 if (*outbytesleft < sizeof(wchar_t)) { 398 errno = E2BIG; 399 return -1; 400 } 401 *(wchar_t*) *outbuf = wc; 402 /* Restoring the state is not needed because it is the initial 403 state anyway: For all known locale encodings, the multibyte 404 to wchar_t conversion doesn't have shift state, and we have 405 excluded partial accumulated characters. */ 406 /* wcd->state = state; */ 407 *outbuf += sizeof(wchar_t); 408 *outbytesleft -= sizeof(wchar_t); 409 } 410 *inbuf += incount; 411 *inbytesleft -= incount; 412 result += res; 413 break; 414 } 415 } 416 incount++; 417 if (incount > *inbytesleft) { 418 /* Incomplete input. */ 419 errno = EINVAL; 420 return -1; 421 } 422 } 423 } 424 return result; 425} 426 427static size_t wchar_to_loop_reset (iconv_t icd, 428 char* * outbuf, size_t *outbytesleft) 429{ 430 struct wchar_conv_struct * wcd = (struct wchar_conv_struct *) icd; 431 size_t res = unicode_loop_reset(&wcd->parent,outbuf,outbytesleft); 432 if (res == (size_t)(-1)) 433 return res; 434 memset(&wcd->state,0,sizeof(mbstate_t)); 435 return 0; 436} 437 438#endif 439 440 441/* From wchar_t to wchar_t. */ 442 443static size_t wchar_id_loop_convert (iconv_t icd, 444 const char* * inbuf, size_t *inbytesleft, 445 char* * outbuf, size_t *outbytesleft) 446{ 447 struct conv_struct * cd = (struct conv_struct *) icd; 448 const wchar_t* inptr = (const wchar_t*) *inbuf; 449 size_t inleft = *inbytesleft / sizeof(wchar_t); 450 wchar_t* outptr = (wchar_t*) *outbuf; 451 size_t outleft = *outbytesleft / sizeof(wchar_t); 452 size_t count = (inleft <= outleft ? inleft : outleft); 453 if (count > 0) { 454 *inbytesleft -= count * sizeof(wchar_t); 455 *outbytesleft -= count * sizeof(wchar_t); 456 do { 457 wchar_t wc = *inptr++; 458 *outptr++ = wc; 459 #ifndef LIBICONV_PLUG 460 if (cd->hooks.wc_hook) 461 (*cd->hooks.wc_hook)(wc, cd->hooks.data); 462 #endif 463 } while (--count > 0); 464 *inbuf = (const char*) inptr; 465 *outbuf = (char*) outptr; 466 } 467 return 0; 468} 469 470static size_t wchar_id_loop_reset (iconv_t icd, 471 char* * outbuf, size_t *outbytesleft) 472{ 473 return 0; 474} 475