1/* 2 * Copyright (C) 1999-2003, 2005-2006 Free Software Foundation, Inc. 3 * This file is part of the GNU LIBICONV Library. 4 * 5 * The GNU LIBICONV Library is free software; you can redistribute it 6 * and/or modify it under the terms of the GNU Library General Public 7 * License as published by the Free Software Foundation; either version 2 8 * of the License, or (at your option) any later version. 9 * 10 * The GNU LIBICONV Library is distributed in the hope that it will be 11 * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 * Library General Public License for more details. 14 * 15 * You should have received a copy of the GNU Library General Public 16 * License along with the GNU LIBICONV Library; see the file COPYING.LIB. 17 * If not, write to the Free Software Foundation, Inc., 51 Franklin Street, 18 * Fifth Floor, Boston, MA 02110-1301, USA. 19 */ 20 21/* This file defines the conversion loop via Unicode as a pivot encoding. */ 22 23/* Attempt to transliterate wc. Return code as in xxx_wctomb. */ 24static int unicode_transliterate (conv_t cd, ucs4_t wc, 25 unsigned char* outptr, size_t outleft) 26{ 27/* 28 if (cd->oflags & HAVE_HANGUL_JAMO) { 29 /-* Decompose Hangul into Jamo. Use double-width Jamo (contained 30 in all Korean encodings and ISO-2022-JP-2), not half-width Jamo 31 (contained in Unicode only). *-/ 32 ucs4_t buf[3]; 33 int ret = johab_hangul_decompose(cd,buf,wc); 34 if (ret != RET_ILUNI) { 35 /-* we know 1 <= ret <= 3 *-/ 36 state_t backup_state = cd->ostate; 37 unsigned char* backup_outptr = outptr; 38 size_t backup_outleft = outleft; 39 int i, sub_outcount; 40 for (i = 0; i < ret; i++) { 41 if (outleft == 0) { 42 sub_outcount = RET_TOOSMALL; 43 goto johab_hangul_failed; 44 } 45 sub_outcount = cd->ofuncs.xxx_wctomb(cd,outptr,buf[i],outleft); 46 if (sub_outcount <= RET_ILUNI) 47 goto johab_hangul_failed; 48 if (!(sub_outcount <= outleft)) abort(); 49 outptr += sub_outcount; outleft -= sub_outcount; 50 } 51 return outptr-backup_outptr; 52 johab_hangul_failed: 53 cd->ostate = backup_state; 54 outptr = backup_outptr; 55 outleft = backup_outleft; 56 if (sub_outcount != RET_ILUNI) 57 return RET_TOOSMALL; 58 } 59 } 60 { 61 /-* Try to use a variant, but postfix it with 62 U+303E IDEOGRAPHIC VARIATION INDICATOR 63 (cf. Ken Lunde's "CJKV information processing", p. 188). *-/ 64 int indx = -1; 65 if (wc == 0x3006) 66 indx = 0; 67 else if (wc == 0x30f6) 68 indx = 1; 69 else if (wc >= 0x4e00 && wc < 0xa000) 70 indx = cjk_variants_indx[wc-0x4e00]; 71 if (indx >= 0) { 72 for (;; indx++) { 73 ucs4_t buf[2]; 74 unsigned short variant = cjk_variants[indx]; 75 unsigned short last = variant & 0x8000; 76 variant &= 0x7fff; 77 variant += 0x3000; 78 buf[0] = variant; buf[1] = 0x303e; 79 { 80 state_t backup_state = cd->ostate; 81 unsigned char* backup_outptr = outptr; 82 size_t backup_outleft = outleft; 83 int i, sub_outcount; 84 for (i = 0; i < 2; i++) { 85 if (outleft == 0) { 86 sub_outcount = RET_TOOSMALL; 87 goto variant_failed; 88 } 89 sub_outcount = cd->ofuncs.xxx_wctomb(cd,outptr,buf[i],outleft); 90 if (sub_outcount <= RET_ILUNI) 91 goto variant_failed; 92 if (!(sub_outcount <= outleft)) abort(); 93 outptr += sub_outcount; outleft -= sub_outcount; 94 } 95 return outptr-backup_outptr; 96 variant_failed: 97 cd->ostate = backup_state; 98 outptr = backup_outptr; 99 outleft = backup_outleft; 100 if (sub_outcount != RET_ILUNI) 101 return RET_TOOSMALL; 102 } 103 if (last) 104 break; 105 } 106 } 107 } 108 if (wc >= 0x2018 && wc <= 0x201a) { 109 /-* Special case for quotation marks 0x2018, 0x2019, 0x201a *-/ 110 ucs4_t substitute = 111 (cd->oflags & HAVE_QUOTATION_MARKS 112 ? (wc == 0x201a ? 0x2018 : wc) 113 : (cd->oflags & HAVE_ACCENTS 114 ? (wc==0x2019 ? 0x00b4 : 0x0060) /-* use accents *-/ 115 : 0x0027 /-* use apostrophe *-/ 116 ) ); 117 int outcount = cd->ofuncs.xxx_wctomb(cd,outptr,substitute,outleft); 118 if (outcount != RET_ILUNI) 119 return outcount; 120 } 121 { 122 /-* Use the transliteration table. *-/ 123 int indx = translit_index(wc); 124 if (indx >= 0) { 125 const unsigned int * cp = &translit_data[indx]; 126 unsigned int num = *cp++; 127 state_t backup_state = cd->ostate; 128 unsigned char* backup_outptr = outptr; 129 size_t backup_outleft = outleft; 130 unsigned int i; 131 int sub_outcount; 132 for (i = 0; i < num; i++) { 133 if (outleft == 0) { 134 sub_outcount = RET_TOOSMALL; 135 goto translit_failed; 136 } 137 sub_outcount = cd->ofuncs.xxx_wctomb(cd,outptr,cp[i],outleft); 138 if (sub_outcount == RET_ILUNI) 139 /-* Recursive transliteration. *-/ 140 sub_outcount = unicode_transliterate(cd,cp[i],outptr,outleft); 141 if (sub_outcount <= RET_ILUNI) 142 goto translit_failed; 143 if (!(sub_outcount <= outleft)) abort(); 144 outptr += sub_outcount; outleft -= sub_outcount; 145 } 146 return outptr-backup_outptr; 147 translit_failed: 148 cd->ostate = backup_state; 149 outptr = backup_outptr; 150 outleft = backup_outleft; 151 if (sub_outcount != RET_ILUNI) 152 return RET_TOOSMALL; 153 } 154 } 155*/ 156 return RET_ILUNI; 157} 158 159#ifndef LIBICONV_PLUG 160 161struct uc_to_mb_fallback_locals { 162 unsigned char* l_outbuf; 163 size_t l_outbytesleft; 164 int l_errno; 165}; 166 167static void uc_to_mb_write_replacement (const char *buf, size_t buflen, 168 void* callback_arg) 169{ 170 struct uc_to_mb_fallback_locals * plocals = 171 (struct uc_to_mb_fallback_locals *) callback_arg; 172 /* Do nothing if already encountered an error in a previous call. */ 173 if (plocals->l_errno == 0) { 174 /* Attempt to copy the passed buffer to the output buffer. */ 175 if (plocals->l_outbytesleft < buflen) 176 plocals->l_errno = E2BIG; 177 else { 178 memcpy(plocals->l_outbuf, buf, buflen); 179 plocals->l_outbuf += buflen; 180 plocals->l_outbytesleft -= buflen; 181 } 182 } 183} 184 185struct mb_to_uc_fallback_locals { 186 conv_t l_cd; 187 unsigned char* l_outbuf; 188 size_t l_outbytesleft; 189 int l_errno; 190}; 191 192static void mb_to_uc_write_replacement (const unsigned int *buf, size_t buflen, 193 void* callback_arg) 194{ 195 struct mb_to_uc_fallback_locals * plocals = 196 (struct mb_to_uc_fallback_locals *) callback_arg; 197 /* Do nothing if already encountered an error in a previous call. */ 198 if (plocals->l_errno == 0) { 199 /* Attempt to convert the passed buffer to the target encoding. */ 200 conv_t cd = plocals->l_cd; 201 unsigned char* outptr = plocals->l_outbuf; 202 size_t outleft = plocals->l_outbytesleft; 203 for (; buflen > 0; buf++, buflen--) { 204 ucs4_t wc = *buf; 205 int outcount; 206 if (outleft == 0) { 207 plocals->l_errno = E2BIG; 208 break; 209 } 210 outcount = cd->ofuncs.xxx_wctomb(cd,outptr,wc,outleft); 211 if (outcount != RET_ILUNI) 212 goto outcount_ok; 213 /* Handle Unicode tag characters (range U+E0000..U+E007F). */ 214 if ((wc >> 7) == (0xe0000 >> 7)) 215 goto outcount_zero; 216 /* Try transliteration. */ 217 if (cd->transliterate) { 218 outcount = unicode_transliterate(cd,wc,outptr,outleft); 219 if (outcount != RET_ILUNI) 220 goto outcount_ok; 221 } 222 if (cd->discard_ilseq) { 223 outcount = 0; 224 goto outcount_ok; 225 } 226 #ifndef LIBICONV_PLUG 227 else if (cd->fallbacks.uc_to_mb_fallback != NULL) { 228 struct uc_to_mb_fallback_locals locals; 229 locals.l_outbuf = outptr; 230 locals.l_outbytesleft = outleft; 231 locals.l_errno = 0; 232 cd->fallbacks.uc_to_mb_fallback(wc, 233 uc_to_mb_write_replacement, 234 &locals, 235 cd->fallbacks.data); 236 if (locals.l_errno != 0) { 237 plocals->l_errno = locals.l_errno; 238 break; 239 } 240 outptr = locals.l_outbuf; 241 outleft = locals.l_outbytesleft; 242 outcount = 0; 243 goto outcount_ok; 244 } 245 #endif 246 outcount = cd->ofuncs.xxx_wctomb(cd,outptr,0xFFFD,outleft); 247 if (outcount != RET_ILUNI) 248 goto outcount_ok; 249 plocals->l_errno = EILSEQ; 250 break; 251 outcount_ok: 252 if (outcount < 0) { 253 plocals->l_errno = E2BIG; 254 break; 255 } 256 #ifndef LIBICONV_PLUG 257 if (cd->hooks.uc_hook) 258 (*cd->hooks.uc_hook)(wc, cd->hooks.data); 259 #endif 260 if (!(outcount <= outleft)) abort(); 261 outptr += outcount; outleft -= outcount; 262 outcount_zero: ; 263 } 264 plocals->l_outbuf = outptr; 265 plocals->l_outbytesleft = outleft; 266 } 267} 268 269#endif /* !LIBICONV_PLUG */ 270 271static size_t unicode_loop_convert (iconv_t icd, 272 const char* * inbuf, size_t *inbytesleft, 273 char* * outbuf, size_t *outbytesleft) 274{ 275 conv_t cd = (conv_t) icd; 276 size_t result = 0; 277 const unsigned char* inptr = (const unsigned char*) *inbuf; 278 size_t inleft = *inbytesleft; 279 unsigned char* outptr = (unsigned char*) *outbuf; 280 size_t outleft = *outbytesleft; 281 while (inleft > 0) { 282 state_t last_istate = cd->istate; 283 ucs4_t wc; 284 int incount; 285 int outcount; 286 incount = cd->ifuncs.xxx_mbtowc(cd,&wc,inptr,inleft); 287 if (incount < 0) { 288 if (incount == RET_ILSEQ) { 289 /* Case 1: invalid input */ 290 if (cd->discard_ilseq) { 291 switch (cd->iindex) { 292 case ei_ucs4: case ei_ucs4be: case ei_ucs4le: 293 case ei_utf32: case ei_utf32be: case ei_utf32le: 294 case ei_ucs4internal: case ei_ucs4swapped: 295 incount = 4; break; 296 case ei_ucs2: case ei_ucs2be: case ei_ucs2le: 297 case ei_utf16: case ei_utf16be: case ei_utf16le: 298 case ei_ucs2internal: case ei_ucs2swapped: 299 incount = 2; break; 300 default: 301 incount = 1; break; 302 } 303 goto outcount_zero; 304 } 305 #ifndef LIBICONV_PLUG 306 else if (cd->fallbacks.mb_to_uc_fallback != NULL) { 307 struct mb_to_uc_fallback_locals locals; 308 switch (cd->iindex) { 309 case ei_ucs4: case ei_ucs4be: case ei_ucs4le: 310 case ei_utf32: case ei_utf32be: case ei_utf32le: 311 case ei_ucs4internal: case ei_ucs4swapped: 312 incount = 4; break; 313 case ei_ucs2: case ei_ucs2be: case ei_ucs2le: 314 case ei_utf16: case ei_utf16be: case ei_utf16le: 315 case ei_ucs2internal: case ei_ucs2swapped: 316 incount = 2; break; 317 default: 318 incount = 1; break; 319 } 320 locals.l_cd = cd; 321 locals.l_outbuf = outptr; 322 locals.l_outbytesleft = outleft; 323 locals.l_errno = 0; 324 cd->fallbacks.mb_to_uc_fallback(inptr, incount, 325 mb_to_uc_write_replacement, 326 &locals, 327 cd->fallbacks.data); 328 if (locals.l_errno != 0) { 329 errno = locals.l_errno; 330 result = -1; 331 break; 332 } 333 outptr = locals.l_outbuf; 334 outleft = locals.l_outbytesleft; 335 result += 1; 336 goto outcount_zero; 337 } 338 #endif 339 errno = EILSEQ; 340 result = -1; 341 break; 342 } 343 if (incount == RET_TOOFEW(0)) { 344 /* Case 2: not enough bytes available to detect anything */ 345 errno = EINVAL; 346 result = -1; 347 break; 348 } 349 /* Case 3: k bytes read, but only a shift sequence */ 350 incount = -2-incount; 351 } else { 352 /* Case 4: k bytes read, making up a wide character */ 353 if (outleft == 0) { 354 cd->istate = last_istate; 355 errno = E2BIG; 356 result = -1; 357 break; 358 } 359 outcount = cd->ofuncs.xxx_wctomb(cd,outptr,wc,outleft); 360 if (outcount != RET_ILUNI) 361 goto outcount_ok; 362 /* Handle Unicode tag characters (range U+E0000..U+E007F). */ 363 if ((wc >> 7) == (0xe0000 >> 7)) 364 goto outcount_zero; 365 /* Try transliteration. */ 366 result++; 367 if (cd->transliterate) { 368 outcount = unicode_transliterate(cd,wc,outptr,outleft); 369 if (outcount != RET_ILUNI) 370 goto outcount_ok; 371 } 372 if (cd->discard_ilseq) { 373 outcount = 0; 374 goto outcount_ok; 375 } 376 #ifndef LIBICONV_PLUG 377 else if (cd->fallbacks.uc_to_mb_fallback != NULL) { 378 struct uc_to_mb_fallback_locals locals; 379 locals.l_outbuf = outptr; 380 locals.l_outbytesleft = outleft; 381 locals.l_errno = 0; 382 cd->fallbacks.uc_to_mb_fallback(wc, 383 uc_to_mb_write_replacement, 384 &locals, 385 cd->fallbacks.data); 386 if (locals.l_errno != 0) { 387 cd->istate = last_istate; 388 errno = locals.l_errno; 389 return -1; 390 } 391 outptr = locals.l_outbuf; 392 outleft = locals.l_outbytesleft; 393 outcount = 0; 394 goto outcount_ok; 395 } 396 #endif 397 outcount = cd->ofuncs.xxx_wctomb(cd,outptr,0xFFFD,outleft); 398 if (outcount != RET_ILUNI) 399 goto outcount_ok; 400 cd->istate = last_istate; 401 errno = EILSEQ; 402 result = -1; 403 break; 404 outcount_ok: 405 if (outcount < 0) { 406 cd->istate = last_istate; 407 errno = E2BIG; 408 result = -1; 409 break; 410 } 411 #ifndef LIBICONV_PLUG 412 if (cd->hooks.uc_hook) 413 (*cd->hooks.uc_hook)(wc, cd->hooks.data); 414 #endif 415 if (!(outcount <= outleft)) abort(); 416 outptr += outcount; outleft -= outcount; 417 } 418 outcount_zero: 419 if (!(incount <= inleft)) abort(); 420 inptr += incount; inleft -= incount; 421 } 422 *inbuf = (const char*) inptr; 423 *inbytesleft = inleft; 424 *outbuf = (char*) outptr; 425 *outbytesleft = outleft; 426 return result; 427} 428 429static size_t unicode_loop_reset (iconv_t icd, 430 char* * outbuf, size_t *outbytesleft) 431{ 432 conv_t cd = (conv_t) icd; 433 if (outbuf == NULL || *outbuf == NULL) { 434 /* Reset the states. */ 435 memset(&cd->istate,'\0',sizeof(state_t)); 436 memset(&cd->ostate,'\0',sizeof(state_t)); 437 return 0; 438 } else { 439 size_t result = 0; 440 if (cd->ifuncs.xxx_flushwc) { 441 state_t last_istate = cd->istate; 442 ucs4_t wc; 443 if (cd->ifuncs.xxx_flushwc(cd, &wc)) { 444 unsigned char* outptr = (unsigned char*) *outbuf; 445 size_t outleft = *outbytesleft; 446 int outcount = cd->ofuncs.xxx_wctomb(cd,outptr,wc,outleft); 447 if (outcount != RET_ILUNI) 448 goto outcount_ok; 449 /* Handle Unicode tag characters (range U+E0000..U+E007F). */ 450 if ((wc >> 7) == (0xe0000 >> 7)) 451 goto outcount_zero; 452 /* Try transliteration. */ 453 result++; 454 if (cd->transliterate) { 455 outcount = unicode_transliterate(cd,wc,outptr,outleft); 456 if (outcount != RET_ILUNI) 457 goto outcount_ok; 458 } 459 if (cd->discard_ilseq) { 460 outcount = 0; 461 goto outcount_ok; 462 } 463 #ifndef LIBICONV_PLUG 464 else if (cd->fallbacks.uc_to_mb_fallback != NULL) { 465 struct uc_to_mb_fallback_locals locals; 466 locals.l_outbuf = outptr; 467 locals.l_outbytesleft = outleft; 468 locals.l_errno = 0; 469 cd->fallbacks.uc_to_mb_fallback(wc, 470 uc_to_mb_write_replacement, 471 &locals, 472 cd->fallbacks.data); 473 if (locals.l_errno != 0) { 474 cd->istate = last_istate; 475 errno = locals.l_errno; 476 return -1; 477 } 478 outptr = locals.l_outbuf; 479 outleft = locals.l_outbytesleft; 480 outcount = 0; 481 goto outcount_ok; 482 } 483 #endif 484 outcount = cd->ofuncs.xxx_wctomb(cd,outptr,0xFFFD,outleft); 485 if (outcount != RET_ILUNI) 486 goto outcount_ok; 487 cd->istate = last_istate; 488 errno = EILSEQ; 489 return -1; 490 outcount_ok: 491 if (outcount < 0) { 492 cd->istate = last_istate; 493 errno = E2BIG; 494 return -1; 495 } 496 #ifndef LIBICONV_PLUG 497 if (cd->hooks.uc_hook) 498 (*cd->hooks.uc_hook)(wc, cd->hooks.data); 499 #endif 500 if (!(outcount <= outleft)) abort(); 501 outptr += outcount; 502 outleft -= outcount; 503 outcount_zero: 504 *outbuf = (char*) outptr; 505 *outbytesleft = outleft; 506 } 507 } 508 if (cd->ofuncs.xxx_reset) { 509 unsigned char* outptr = (unsigned char*) *outbuf; 510 size_t outleft = *outbytesleft; 511 int outcount = cd->ofuncs.xxx_reset(cd,outptr,outleft); 512 if (outcount < 0) { 513 errno = E2BIG; 514 return -1; 515 } 516 if (!(outcount <= outleft)) abort(); 517 *outbuf = (char*) (outptr + outcount); 518 *outbytesleft = outleft - outcount; 519 } 520 memset(&cd->istate,'\0',sizeof(state_t)); 521 memset(&cd->ostate,'\0',sizeof(state_t)); 522 return result; 523 } 524} 525