1/* 2 * Copyright (C) 1999-2003, 2005-2006, 2008 Free Software Foundation, Inc. 3 * This file is part of the GNU LIBICONV Library. 4 * 5 * The GNU LIBICONV Library is free software; you can redistribute it 6 * and/or modify it under the terms of the GNU Library General Public 7 * License as published by the Free Software Foundation; either version 2 8 * of the License, or (at your option) any later version. 9 * 10 * The GNU LIBICONV Library is distributed in the hope that it will be 11 * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 * Library General Public License for more details. 14 * 15 * You should have received a copy of the GNU Library General Public 16 * License along with the GNU LIBICONV Library; see the file COPYING.LIB. 17 * If not, write to the Free Software Foundation, Inc., 51 Franklin Street, 18 * Fifth Floor, Boston, MA 02110-1301, USA. 19 */ 20 21/* This file defines the conversion loop via Unicode as a pivot encoding. */ 22 23/* Attempt to transliterate wc. Return code as in xxx_wctomb. */ 24static int unicode_transliterate (conv_t cd, ucs4_t wc, 25 unsigned char* outptr, size_t outleft) 26{ 27 if (cd->oflags & HAVE_HANGUL_JAMO) { 28 /* Decompose Hangul into Jamo. Use double-width Jamo (contained 29 in all Korean encodings and ISO-2022-JP-2), not half-width Jamo 30 (contained in Unicode only). */ 31 ucs4_t buf[3]; 32 int ret = johab_hangul_decompose(cd,buf,wc); 33 if (ret != RET_ILUNI) { 34 /* we know 1 <= ret <= 3 */ 35 state_t backup_state = cd->ostate; 36 unsigned char* backup_outptr = outptr; 37 size_t backup_outleft = outleft; 38 int i, sub_outcount; 39 for (i = 0; i < ret; i++) { 40 if (outleft == 0) { 41 sub_outcount = RET_TOOSMALL; 42 goto johab_hangul_failed; 43 } 44 sub_outcount = cd->ofuncs.xxx_wctomb(cd,outptr,buf[i],outleft); 45 if (sub_outcount <= RET_ILUNI) 46 goto johab_hangul_failed; 47 if (!(sub_outcount <= outleft)) abort(); 48 outptr += sub_outcount; outleft -= sub_outcount; 49 } 50 return outptr-backup_outptr; 51 johab_hangul_failed: 52 cd->ostate = backup_state; 53 outptr = backup_outptr; 54 outleft = backup_outleft; 55 if (sub_outcount != RET_ILUNI) 56 return RET_TOOSMALL; 57 } 58 } 59 { 60 /* Try to use a variant, but postfix it with 61 U+303E IDEOGRAPHIC VARIATION INDICATOR 62 (cf. Ken Lunde's "CJKV information processing", p. 188). */ 63 int indx = -1; 64 if (wc == 0x3006) 65 indx = 0; 66 else if (wc == 0x30f6) 67 indx = 1; 68 else if (wc >= 0x4e00 && wc < 0xa000) 69 indx = cjk_variants_indx[wc-0x4e00]; 70 if (indx >= 0) { 71 for (;; indx++) { 72 ucs4_t buf[2]; 73 unsigned short variant = cjk_variants[indx]; 74 unsigned short last = variant & 0x8000; 75 variant &= 0x7fff; 76 variant += 0x3000; 77 buf[0] = variant; buf[1] = 0x303e; 78 { 79 state_t backup_state = cd->ostate; 80 unsigned char* backup_outptr = outptr; 81 size_t backup_outleft = outleft; 82 int i, sub_outcount; 83 for (i = 0; i < 2; i++) { 84 if (outleft == 0) { 85 sub_outcount = RET_TOOSMALL; 86 goto variant_failed; 87 } 88 sub_outcount = cd->ofuncs.xxx_wctomb(cd,outptr,buf[i],outleft); 89 if (sub_outcount <= RET_ILUNI) 90 goto variant_failed; 91 if (!(sub_outcount <= outleft)) abort(); 92 outptr += sub_outcount; outleft -= sub_outcount; 93 } 94 return outptr-backup_outptr; 95 variant_failed: 96 cd->ostate = backup_state; 97 outptr = backup_outptr; 98 outleft = backup_outleft; 99 if (sub_outcount != RET_ILUNI) 100 return RET_TOOSMALL; 101 } 102 if (last) 103 break; 104 } 105 } 106 } 107 if (wc >= 0x2018 && wc <= 0x201a) { 108 /* Special case for quotation marks 0x2018, 0x2019, 0x201a */ 109 ucs4_t substitute = 110 (cd->oflags & HAVE_QUOTATION_MARKS 111 ? (wc == 0x201a ? 0x2018 : wc) 112 : (cd->oflags & HAVE_ACCENTS 113 ? (wc==0x2019 ? 0x00b4 : 0x0060) /* use accents */ 114 : 0x0027 /* use apostrophe */ 115 ) ); 116 int outcount = cd->ofuncs.xxx_wctomb(cd,outptr,substitute,outleft); 117 if (outcount != RET_ILUNI) 118 return outcount; 119 } 120 { 121 /* Use the transliteration table. */ 122 int indx = translit_index(wc); 123 if (indx >= 0) { 124 const unsigned int * cp = &translit_data[indx]; 125 unsigned int num = *cp++; 126 state_t backup_state = cd->ostate; 127 unsigned char* backup_outptr = outptr; 128 size_t backup_outleft = outleft; 129 unsigned int i; 130 int sub_outcount; 131 for (i = 0; i < num; i++) { 132 if (outleft == 0) { 133 sub_outcount = RET_TOOSMALL; 134 goto translit_failed; 135 } 136 sub_outcount = cd->ofuncs.xxx_wctomb(cd,outptr,cp[i],outleft); 137 if (sub_outcount == RET_ILUNI) 138 /* Recursive transliteration. */ 139 sub_outcount = unicode_transliterate(cd,cp[i],outptr,outleft); 140 if (sub_outcount <= RET_ILUNI) 141 goto translit_failed; 142 if (!(sub_outcount <= outleft)) abort(); 143 outptr += sub_outcount; outleft -= sub_outcount; 144 } 145 return outptr-backup_outptr; 146 translit_failed: 147 cd->ostate = backup_state; 148 outptr = backup_outptr; 149 outleft = backup_outleft; 150 if (sub_outcount != RET_ILUNI) 151 return RET_TOOSMALL; 152 } 153 } 154 return RET_ILUNI; 155} 156 157#ifndef LIBICONV_PLUG 158 159struct uc_to_mb_fallback_locals { 160 unsigned char* l_outbuf; 161 size_t l_outbytesleft; 162 int l_errno; 163}; 164 165static void uc_to_mb_write_replacement (const char *buf, size_t buflen, 166 void* callback_arg) 167{ 168 struct uc_to_mb_fallback_locals * plocals = 169 (struct uc_to_mb_fallback_locals *) callback_arg; 170 /* Do nothing if already encountered an error in a previous call. */ 171 if (plocals->l_errno == 0) { 172 /* Attempt to copy the passed buffer to the output buffer. */ 173 if (plocals->l_outbytesleft < buflen) 174 plocals->l_errno = E2BIG; 175 else { 176 memcpy(plocals->l_outbuf, buf, buflen); 177 plocals->l_outbuf += buflen; 178 plocals->l_outbytesleft -= buflen; 179 } 180 } 181} 182 183struct mb_to_uc_fallback_locals { 184 conv_t l_cd; 185 unsigned char* l_outbuf; 186 size_t l_outbytesleft; 187 int l_errno; 188}; 189 190static void mb_to_uc_write_replacement (const unsigned int *buf, size_t buflen, 191 void* callback_arg) 192{ 193 struct mb_to_uc_fallback_locals * plocals = 194 (struct mb_to_uc_fallback_locals *) callback_arg; 195 /* Do nothing if already encountered an error in a previous call. */ 196 if (plocals->l_errno == 0) { 197 /* Attempt to convert the passed buffer to the target encoding. */ 198 conv_t cd = plocals->l_cd; 199 unsigned char* outptr = plocals->l_outbuf; 200 size_t outleft = plocals->l_outbytesleft; 201 for (; buflen > 0; buf++, buflen--) { 202 ucs4_t wc = *buf; 203 int outcount; 204 if (outleft == 0) { 205 plocals->l_errno = E2BIG; 206 break; 207 } 208 outcount = cd->ofuncs.xxx_wctomb(cd,outptr,wc,outleft); 209 if (outcount != RET_ILUNI) 210 goto outcount_ok; 211 /* Handle Unicode tag characters (range U+E0000..U+E007F). */ 212 if ((wc >> 7) == (0xe0000 >> 7)) 213 goto outcount_zero; 214 /* Try transliteration. */ 215 if (cd->transliterate) { 216 outcount = unicode_transliterate(cd,wc,outptr,outleft); 217 if (outcount != RET_ILUNI) 218 goto outcount_ok; 219 } 220 if (cd->discard_ilseq) { 221 outcount = 0; 222 goto outcount_ok; 223 } 224 #ifndef LIBICONV_PLUG 225 else if (cd->fallbacks.uc_to_mb_fallback != NULL) { 226 struct uc_to_mb_fallback_locals locals; 227 locals.l_outbuf = outptr; 228 locals.l_outbytesleft = outleft; 229 locals.l_errno = 0; 230 cd->fallbacks.uc_to_mb_fallback(wc, 231 uc_to_mb_write_replacement, 232 &locals, 233 cd->fallbacks.data); 234 if (locals.l_errno != 0) { 235 plocals->l_errno = locals.l_errno; 236 break; 237 } 238 outptr = locals.l_outbuf; 239 outleft = locals.l_outbytesleft; 240 outcount = 0; 241 goto outcount_ok; 242 } 243 #endif 244 outcount = cd->ofuncs.xxx_wctomb(cd,outptr,0xFFFD,outleft); 245 if (outcount != RET_ILUNI) 246 goto outcount_ok; 247 plocals->l_errno = EILSEQ; 248 break; 249 outcount_ok: 250 if (outcount < 0) { 251 plocals->l_errno = E2BIG; 252 break; 253 } 254 #ifndef LIBICONV_PLUG 255 if (cd->hooks.uc_hook) 256 (*cd->hooks.uc_hook)(wc, cd->hooks.data); 257 #endif 258 if (!(outcount <= outleft)) abort(); 259 outptr += outcount; outleft -= outcount; 260 outcount_zero: ; 261 } 262 plocals->l_outbuf = outptr; 263 plocals->l_outbytesleft = outleft; 264 } 265} 266 267#endif /* !LIBICONV_PLUG */ 268 269static size_t unicode_loop_convert (iconv_t icd, 270 const char* * inbuf, size_t *inbytesleft, 271 char* * outbuf, size_t *outbytesleft) 272{ 273 conv_t cd = (conv_t) icd; 274 size_t result = 0; 275 const unsigned char* inptr = (const unsigned char*) *inbuf; 276 size_t inleft = *inbytesleft; 277 unsigned char* outptr = (unsigned char*) *outbuf; 278 size_t outleft = *outbytesleft; 279 while (inleft > 0) { 280 state_t last_istate = cd->istate; 281 ucs4_t wc; 282 int incount; 283 int outcount; 284 incount = cd->ifuncs.xxx_mbtowc(cd,&wc,inptr,inleft); 285 if (incount < 0) { 286 if ((unsigned int)(-1-incount) % 2 == (unsigned int)(-1-RET_ILSEQ) % 2) { 287 /* Case 1: invalid input, possibly after a shift sequence */ 288 incount = DECODE_SHIFT_ILSEQ(incount); 289 if (cd->discard_ilseq) { 290 switch (cd->iindex) { 291 case ei_ucs4: case ei_ucs4be: case ei_ucs4le: 292 case ei_utf32: case ei_utf32be: case ei_utf32le: 293 case ei_ucs4internal: case ei_ucs4swapped: 294 incount += 4; break; 295 case ei_ucs2: case ei_ucs2be: case ei_ucs2le: 296 case ei_utf16: case ei_utf16be: case ei_utf16le: 297 case ei_ucs2internal: case ei_ucs2swapped: 298 incount += 2; break; 299 default: 300 incount += 1; break; 301 } 302 goto outcount_zero; 303 } 304 #ifndef LIBICONV_PLUG 305 else if (cd->fallbacks.mb_to_uc_fallback != NULL) { 306 unsigned int incount2; 307 struct mb_to_uc_fallback_locals locals; 308 switch (cd->iindex) { 309 case ei_ucs4: case ei_ucs4be: case ei_ucs4le: 310 case ei_utf32: case ei_utf32be: case ei_utf32le: 311 case ei_ucs4internal: case ei_ucs4swapped: 312 incount2 = 4; break; 313 case ei_ucs2: case ei_ucs2be: case ei_ucs2le: 314 case ei_utf16: case ei_utf16be: case ei_utf16le: 315 case ei_ucs2internal: case ei_ucs2swapped: 316 incount2 = 2; break; 317 default: 318 incount2 = 1; break; 319 } 320 locals.l_cd = cd; 321 locals.l_outbuf = outptr; 322 locals.l_outbytesleft = outleft; 323 locals.l_errno = 0; 324 cd->fallbacks.mb_to_uc_fallback((const char*)inptr+incount, incount2, 325 mb_to_uc_write_replacement, 326 &locals, 327 cd->fallbacks.data); 328 if (locals.l_errno != 0) { 329 inptr += incount; inleft -= incount; 330 errno = locals.l_errno; 331 result = -1; 332 break; 333 } 334 incount += incount2; 335 outptr = locals.l_outbuf; 336 outleft = locals.l_outbytesleft; 337 result += 1; 338 goto outcount_zero; 339 } 340 #endif 341 inptr += incount; inleft -= incount; 342 errno = EILSEQ; 343 result = -1; 344 break; 345 } 346 if (incount == RET_TOOFEW(0)) { 347 /* Case 2: not enough bytes available to detect anything */ 348 errno = EINVAL; 349 result = -1; 350 break; 351 } 352 /* Case 3: k bytes read, but only a shift sequence */ 353 incount = DECODE_TOOFEW(incount); 354 } else { 355 /* Case 4: k bytes read, making up a wide character */ 356 if (outleft == 0) { 357 cd->istate = last_istate; 358 errno = E2BIG; 359 result = -1; 360 break; 361 } 362 outcount = cd->ofuncs.xxx_wctomb(cd,outptr,wc,outleft); 363 if (outcount != RET_ILUNI) 364 goto outcount_ok; 365 /* Handle Unicode tag characters (range U+E0000..U+E007F). */ 366 if ((wc >> 7) == (0xe0000 >> 7)) 367 goto outcount_zero; 368 /* Try transliteration. */ 369 result++; 370 if (cd->transliterate) { 371 outcount = unicode_transliterate(cd,wc,outptr,outleft); 372 if (outcount != RET_ILUNI) 373 goto outcount_ok; 374 } 375 if (cd->discard_ilseq) { 376 outcount = 0; 377 goto outcount_ok; 378 } 379 #ifndef LIBICONV_PLUG 380 else if (cd->fallbacks.uc_to_mb_fallback != NULL) { 381 struct uc_to_mb_fallback_locals locals; 382 locals.l_outbuf = outptr; 383 locals.l_outbytesleft = outleft; 384 locals.l_errno = 0; 385 cd->fallbacks.uc_to_mb_fallback(wc, 386 uc_to_mb_write_replacement, 387 &locals, 388 cd->fallbacks.data); 389 if (locals.l_errno != 0) { 390 cd->istate = last_istate; 391 errno = locals.l_errno; 392 return -1; 393 } 394 outptr = locals.l_outbuf; 395 outleft = locals.l_outbytesleft; 396 outcount = 0; 397 goto outcount_ok; 398 } 399 #endif 400 outcount = cd->ofuncs.xxx_wctomb(cd,outptr,0xFFFD,outleft); 401 if (outcount != RET_ILUNI) 402 goto outcount_ok; 403 cd->istate = last_istate; 404 errno = EILSEQ; 405 result = -1; 406 break; 407 outcount_ok: 408 if (outcount < 0) { 409 cd->istate = last_istate; 410 errno = E2BIG; 411 result = -1; 412 break; 413 } 414 #ifndef LIBICONV_PLUG 415 if (cd->hooks.uc_hook) 416 (*cd->hooks.uc_hook)(wc, cd->hooks.data); 417 #endif 418 if (!(outcount <= outleft)) abort(); 419 outptr += outcount; outleft -= outcount; 420 } 421 outcount_zero: 422 if (!(incount <= inleft)) abort(); 423 inptr += incount; inleft -= incount; 424 } 425 *inbuf = (const char*) inptr; 426 *inbytesleft = inleft; 427 *outbuf = (char*) outptr; 428 *outbytesleft = outleft; 429 return result; 430} 431 432static size_t unicode_loop_reset (iconv_t icd, 433 char* * outbuf, size_t *outbytesleft) 434{ 435 conv_t cd = (conv_t) icd; 436 if (outbuf == NULL || *outbuf == NULL) { 437 /* Reset the states. */ 438 memset(&cd->istate,'\0',sizeof(state_t)); 439 memset(&cd->ostate,'\0',sizeof(state_t)); 440 return 0; 441 } else { 442 size_t result = 0; 443 if (cd->ifuncs.xxx_flushwc) { 444 state_t last_istate = cd->istate; 445 ucs4_t wc; 446 if (cd->ifuncs.xxx_flushwc(cd, &wc)) { 447 unsigned char* outptr = (unsigned char*) *outbuf; 448 size_t outleft = *outbytesleft; 449 int outcount = cd->ofuncs.xxx_wctomb(cd,outptr,wc,outleft); 450 if (outcount != RET_ILUNI) 451 goto outcount_ok; 452 /* Handle Unicode tag characters (range U+E0000..U+E007F). */ 453 if ((wc >> 7) == (0xe0000 >> 7)) 454 goto outcount_zero; 455 /* Try transliteration. */ 456 result++; 457 if (cd->transliterate) { 458 outcount = unicode_transliterate(cd,wc,outptr,outleft); 459 if (outcount != RET_ILUNI) 460 goto outcount_ok; 461 } 462 if (cd->discard_ilseq) { 463 outcount = 0; 464 goto outcount_ok; 465 } 466 #ifndef LIBICONV_PLUG 467 else if (cd->fallbacks.uc_to_mb_fallback != NULL) { 468 struct uc_to_mb_fallback_locals locals; 469 locals.l_outbuf = outptr; 470 locals.l_outbytesleft = outleft; 471 locals.l_errno = 0; 472 cd->fallbacks.uc_to_mb_fallback(wc, 473 uc_to_mb_write_replacement, 474 &locals, 475 cd->fallbacks.data); 476 if (locals.l_errno != 0) { 477 cd->istate = last_istate; 478 errno = locals.l_errno; 479 return -1; 480 } 481 outptr = locals.l_outbuf; 482 outleft = locals.l_outbytesleft; 483 outcount = 0; 484 goto outcount_ok; 485 } 486 #endif 487 outcount = cd->ofuncs.xxx_wctomb(cd,outptr,0xFFFD,outleft); 488 if (outcount != RET_ILUNI) 489 goto outcount_ok; 490 cd->istate = last_istate; 491 errno = EILSEQ; 492 return -1; 493 outcount_ok: 494 if (outcount < 0) { 495 cd->istate = last_istate; 496 errno = E2BIG; 497 return -1; 498 } 499 #ifndef LIBICONV_PLUG 500 if (cd->hooks.uc_hook) 501 (*cd->hooks.uc_hook)(wc, cd->hooks.data); 502 #endif 503 if (!(outcount <= outleft)) abort(); 504 outptr += outcount; 505 outleft -= outcount; 506 outcount_zero: 507 *outbuf = (char*) outptr; 508 *outbytesleft = outleft; 509 } 510 } 511 if (cd->ofuncs.xxx_reset) { 512 unsigned char* outptr = (unsigned char*) *outbuf; 513 size_t outleft = *outbytesleft; 514 int outcount = cd->ofuncs.xxx_reset(cd,outptr,outleft); 515 if (outcount < 0) { 516 errno = E2BIG; 517 return -1; 518 } 519 if (!(outcount <= outleft)) abort(); 520 *outbuf = (char*) (outptr + outcount); 521 *outbytesleft = outleft - outcount; 522 } 523 memset(&cd->istate,'\0',sizeof(state_t)); 524 memset(&cd->ostate,'\0',sizeof(state_t)); 525 return result; 526 } 527} 528