1/* 2 * Copyright (C) 1999-2002 Free Software Foundation, Inc. 3 * This file is part of the GNU LIBICONV Library. 4 * 5 * The GNU LIBICONV Library is free software; you can redistribute it 6 * and/or modify it under the terms of the GNU Library General Public 7 * License as published by the Free Software Foundation; either version 2 8 * of the License, or (at your option) any later version. 9 * 10 * The GNU LIBICONV Library is distributed in the hope that it will be 11 * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 * Library General Public License for more details. 14 * 15 * You should have received a copy of the GNU Library General Public 16 * License along with the GNU LIBICONV Library; see the file COPYING.LIB. 17 * If not, write to the Free Software Foundation, Inc., 59 Temple Place - 18 * Suite 330, Boston, MA 02111-1307, USA. 19 */ 20 21/* This file defines the conversion loop via Unicode as a pivot encoding. */ 22 23/* Attempt to transliterate wc. Return code as in xxx_wctomb. */ 24#include <errno.h> 25static int unicode_transliterate (conv_t cd, ucs4_t wc, 26 unsigned char* outptr, size_t outleft) 27{ 28 if (cd->oflags & HAVE_HANGUL_JAMO) { 29 /* Decompose Hangul into Jamo. Use double-width Jamo (contained 30 in all Korean encodings and ISO-2022-JP-2), not half-width Jamo 31 (contained in Unicode only). */ 32 ucs4_t buf[3]; 33 int ret = johab_hangul_decompose(cd,buf,wc); 34 if (ret != RET_ILUNI) { 35 /* we know 1 <= ret <= 3 */ 36 state_t backup_state = cd->ostate; 37 unsigned char* backup_outptr = outptr; 38 size_t backup_outleft = outleft; 39 int i, sub_outcount; 40 for (i = 0; i < ret; i++) { 41 if (outleft == 0) { 42 sub_outcount = RET_TOOSMALL; 43 goto johab_hangul_failed; 44 } 45 sub_outcount = cd->ofuncs.xxx_wctomb(cd,outptr,buf[i],outleft); 46 if (sub_outcount <= RET_ILUNI) 47 goto johab_hangul_failed; 48 if (!(sub_outcount <= outleft)) abort(); 49 outptr += sub_outcount; outleft -= sub_outcount; 50 } 51 return outptr-backup_outptr; 52 johab_hangul_failed: 53 cd->ostate = backup_state; 54 outptr = backup_outptr; 55 outleft = backup_outleft; 56 if (sub_outcount < 0) 57 return RET_TOOSMALL; 58 } 59 } 60 { 61 /* Try to use a variant, but postfix it with 62 U+303E IDEOGRAPHIC VARIATION INDICATOR 63 (cf. Ken Lunde's "CJKV information processing", p. 188). */ 64 int indx = -1; 65 if (wc == 0x3006) 66 indx = 0; 67 else if (wc == 0x30f6) 68 indx = 1; 69 else if (wc >= 0x4e00 && wc < 0xa000) 70 indx = cjk_variants_indx[wc-0x4e00]; 71 if (indx >= 0) { 72 for (;; indx++) { 73 ucs4_t buf[2]; 74 unsigned short variant = cjk_variants[indx]; 75 unsigned short last = variant & 0x8000; 76 variant &= 0x7fff; 77 variant += 0x3000; 78 buf[0] = variant; buf[1] = 0x303e; 79 { 80 state_t backup_state = cd->ostate; 81 unsigned char* backup_outptr = outptr; 82 size_t backup_outleft = outleft; 83 int i, sub_outcount; 84 for (i = 0; i < 2; i++) { 85 if (outleft == 0) { 86 sub_outcount = RET_TOOSMALL; 87 goto variant_failed; 88 } 89 sub_outcount = cd->ofuncs.xxx_wctomb(cd,outptr,buf[i],outleft); 90 if (sub_outcount <= RET_ILUNI) 91 goto variant_failed; 92 if (!(sub_outcount <= outleft)) abort(); 93 outptr += sub_outcount; outleft -= sub_outcount; 94 } 95 return outptr-backup_outptr; 96 variant_failed: 97 cd->ostate = backup_state; 98 outptr = backup_outptr; 99 outleft = backup_outleft; 100 if (sub_outcount < 0) 101 return RET_TOOSMALL; 102 } 103 if (last) 104 break; 105 } 106 } 107 } 108 if (wc >= 0x2018 && wc <= 0x201a) { 109 /* Special case for quotation marks 0x2018, 0x2019, 0x201a */ 110 ucs4_t substitute = 111 (cd->oflags & HAVE_QUOTATION_MARKS 112 ? (wc == 0x201a ? 0x2018 : wc) 113 : (cd->oflags & HAVE_ACCENTS 114 ? (wc==0x2019 ? 0x00b4 : 0x0060) /* use accents */ 115 : 0x0027 /* use apostrophe */ 116 ) ); 117 int outcount = cd->ofuncs.xxx_wctomb(cd,outptr,substitute,outleft); 118 if (outcount != RET_ILUNI) 119 return outcount; 120 } 121 { 122 /* Use the transliteration table. */ 123 int indx = translit_index(wc); 124 if (indx >= 0) { 125 const unsigned short * cp = &translit_data[indx]; 126 unsigned int num = *cp++; 127 state_t backup_state = cd->ostate; 128 unsigned char* backup_outptr = outptr; 129 size_t backup_outleft = outleft; 130 unsigned int i; 131 int sub_outcount; 132 for (i = 0; i < num; i++) { 133 if (outleft == 0) { 134 sub_outcount = RET_TOOSMALL; 135 goto translit_failed; 136 } 137 sub_outcount = cd->ofuncs.xxx_wctomb(cd,outptr,cp[i],outleft); 138 if (sub_outcount <= RET_ILUNI) 139 goto translit_failed; 140 if (!(sub_outcount <= outleft)) abort(); 141 outptr += sub_outcount; outleft -= sub_outcount; 142 } 143 return outptr-backup_outptr; 144 translit_failed: 145 cd->ostate = backup_state; 146 outptr = backup_outptr; 147 outleft = backup_outleft; 148 if (sub_outcount != RET_ILUNI) 149 return RET_TOOSMALL; 150 } 151 } 152 return RET_ILUNI; 153} 154 155static size_t unicode_loop_convert (iconv_t icd, 156 const char* * inbuf, size_t *inbytesleft, 157 char* * outbuf, size_t *outbytesleft) 158{ 159 conv_t cd = (conv_t) icd; 160 size_t result = 0; 161 const unsigned char* inptr = (const unsigned char*) *inbuf; 162 size_t inleft = *inbytesleft; 163 unsigned char* outptr = (unsigned char*) *outbuf; 164 size_t outleft = *outbytesleft; 165 while (inleft > 0) { 166 state_t last_istate = cd->istate; 167 ucs4_t wc; 168 int incount; 169 int outcount; 170 incount = cd->ifuncs.xxx_mbtowc(cd,&wc,inptr,inleft); 171 if (incount < 0) { 172 if (incount == RET_ILSEQ) { 173 /* Case 1: invalid input */ 174 if (cd->discard_ilseq) { 175 switch (cd->iindex) { 176 case ei_ucs4: case ei_ucs4be: case ei_ucs4le: 177 case ei_utf32: case ei_utf32be: case ei_utf32le: 178 case ei_ucs4internal: case ei_ucs4swapped: 179 incount = 4; break; 180 case ei_ucs2: case ei_ucs2be: case ei_ucs2le: 181 case ei_utf16: case ei_utf16be: case ei_utf16le: 182 case ei_ucs2internal: case ei_ucs2swapped: 183 incount = 2; break; 184 default: 185 incount = 1; break; 186 } 187 goto outcount_zero; 188 } 189 errno = EILSEQ; 190 result = -1; 191 break; 192 } 193 if (incount == RET_TOOFEW(0)) { 194 /* Case 2: not enough bytes available to detect anything */ 195 errno = EINVAL; 196 result = -1; 197 break; 198 } 199 /* Case 3: k bytes read, but only a shift sequence */ 200 incount = -2-incount; 201 } else { 202 /* Case 4: k bytes read, making up a wide character */ 203 if (outleft == 0) { 204 cd->istate = last_istate; 205 errno = E2BIG; 206 result = -1; 207 break; 208 } 209 outcount = cd->ofuncs.xxx_wctomb(cd,outptr,wc,outleft); 210 if (outcount != RET_ILUNI) 211 goto outcount_ok; 212 /* Handle Unicode tag characters (range U+E0000..U+E007F). */ 213 if ((wc >> 7) == (0xe0000 >> 7)) 214 goto outcount_zero; 215 /* Try transliteration. */ 216 result++; 217 if (cd->transliterate) { 218 outcount = unicode_transliterate(cd,wc,outptr,outleft); 219 if (outcount != RET_ILUNI) 220 goto outcount_ok; 221 } 222 if (cd->discard_ilseq) 223 goto outcount_zero; 224 outcount = cd->ofuncs.xxx_wctomb(cd,outptr,0xFFFD,outleft); 225 if (outcount != RET_ILUNI) 226 goto outcount_ok; 227 cd->istate = last_istate; 228 errno = EILSEQ; 229 result = -1; 230 break; 231 outcount_ok: 232 if (outcount < 0) { 233 cd->istate = last_istate; 234 errno = E2BIG; 235 result = -1; 236 break; 237 } 238 if (!(outcount <= outleft)) abort(); 239 outptr += outcount; outleft -= outcount; 240 } 241 outcount_zero: 242 if (!(incount <= inleft)) abort(); 243 inptr += incount; inleft -= incount; 244 } 245 *inbuf = (const char*) inptr; 246 *inbytesleft = inleft; 247 *outbuf = (char*) outptr; 248 *outbytesleft = outleft; 249 return result; 250} 251 252static size_t unicode_loop_reset (iconv_t icd, 253 char* * outbuf, size_t *outbytesleft) 254{ 255 conv_t cd = (conv_t) icd; 256 if (outbuf == NULL || *outbuf == NULL) { 257 /* Reset the states. */ 258 memset(&cd->istate,'\0',sizeof(state_t)); 259 memset(&cd->ostate,'\0',sizeof(state_t)); 260 return 0; 261 } else { 262 size_t result = 0; 263 if (cd->ifuncs.xxx_flushwc) { 264 state_t last_istate = cd->istate; 265 ucs4_t wc; 266 if (cd->ifuncs.xxx_flushwc(cd, &wc)) { 267 unsigned char* outptr = (unsigned char*) *outbuf; 268 size_t outleft = *outbytesleft; 269 int outcount = cd->ofuncs.xxx_wctomb(cd,outptr,wc,outleft); 270 if (outcount != RET_ILUNI) 271 goto outcount_ok; 272 /* Handle Unicode tag characters (range U+E0000..U+E007F). */ 273 if ((wc >> 7) == (0xe0000 >> 7)) 274 goto outcount_zero; 275 /* Try transliteration. */ 276 result++; 277 if (cd->transliterate) { 278 outcount = unicode_transliterate(cd,wc,outptr,outleft); 279 if (outcount != RET_ILUNI) 280 goto outcount_ok; 281 } 282 if (cd->discard_ilseq) 283 goto outcount_zero; 284 outcount = cd->ofuncs.xxx_wctomb(cd,outptr,0xFFFD,outleft); 285 if (outcount != RET_ILUNI) 286 goto outcount_ok; 287 cd->istate = last_istate; 288 errno = EILSEQ; 289 return -1; 290 outcount_ok: 291 if (outcount < 0) { 292 cd->istate = last_istate; 293 errno = E2BIG; 294 return -1; 295 } 296 if (!(outcount <= outleft)) abort(); 297 outptr += outcount; 298 outleft -= outcount; 299 outcount_zero: 300 *outbuf = (char*) outptr; 301 *outbytesleft = outleft; 302 } 303 } 304 if (cd->ofuncs.xxx_reset) { 305 unsigned char* outptr = (unsigned char*) *outbuf; 306 size_t outleft = *outbytesleft; 307 int outcount = cd->ofuncs.xxx_reset(cd,outptr,outleft); 308 if (outcount < 0) { 309 errno = E2BIG; 310 return -1; 311 } 312 if (!(outcount <= outleft)) abort(); 313 *outbuf = (char*) (outptr + outcount); 314 *outbytesleft = outleft - outcount; 315 } 316 memset(&cd->istate,'\0',sizeof(state_t)); 317 memset(&cd->ostate,'\0',sizeof(state_t)); 318 return result; 319 } 320} 321