1/*
2 * Copyright (C) 1999-2003, 2005-2006, 2008 Free Software Foundation, Inc.
3 * This file is part of the GNU LIBICONV Library.
4 *
5 * The GNU LIBICONV Library is free software; you can redistribute it
6 * and/or modify it under the terms of the GNU Library General Public
7 * License as published by the Free Software Foundation; either version 2
8 * of the License, or (at your option) any later version.
9 *
10 * The GNU LIBICONV Library is distributed in the hope that it will be
11 * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13 * Library General Public License for more details.
14 *
15 * You should have received a copy of the GNU Library General Public
16 * License along with the GNU LIBICONV Library; see the file COPYING.LIB.
17 * If not, write to the Free Software Foundation, Inc., 51 Franklin Street,
18 * Fifth Floor, Boston, MA 02110-1301, USA.
19 */
20
21/* This file defines the conversion loop via Unicode as a pivot encoding. */
22
23/* Attempt to transliterate wc. Return code as in xxx_wctomb. */
24static int unicode_transliterate (conv_t cd, ucs4_t wc,
25                                  unsigned char* outptr, size_t outleft)
26{
27  if (cd->oflags & HAVE_HANGUL_JAMO) {
28    /* Decompose Hangul into Jamo. Use double-width Jamo (contained
29       in all Korean encodings and ISO-2022-JP-2), not half-width Jamo
30       (contained in Unicode only). */
31    ucs4_t buf[3];
32    int ret = johab_hangul_decompose(cd,buf,wc);
33    if (ret != RET_ILUNI) {
34      /* we know 1 <= ret <= 3 */
35      state_t backup_state = cd->ostate;
36      unsigned char* backup_outptr = outptr;
37      size_t backup_outleft = outleft;
38      int i, sub_outcount;
39      for (i = 0; i < ret; i++) {
40        if (outleft == 0) {
41          sub_outcount = RET_TOOSMALL;
42          goto johab_hangul_failed;
43        }
44        sub_outcount = cd->ofuncs.xxx_wctomb(cd,outptr,buf[i],outleft);
45        if (sub_outcount <= RET_ILUNI)
46          goto johab_hangul_failed;
47        if (!(sub_outcount <= outleft)) abort();
48        outptr += sub_outcount; outleft -= sub_outcount;
49      }
50      return outptr-backup_outptr;
51    johab_hangul_failed:
52      cd->ostate = backup_state;
53      outptr = backup_outptr;
54      outleft = backup_outleft;
55      if (sub_outcount != RET_ILUNI)
56        return RET_TOOSMALL;
57    }
58  }
59  {
60    /* Try to use a variant, but postfix it with
61       U+303E IDEOGRAPHIC VARIATION INDICATOR
62       (cf. Ken Lunde's "CJKV information processing", p. 188). */
63    int indx = -1;
64    if (wc == 0x3006)
65      indx = 0;
66    else if (wc == 0x30f6)
67      indx = 1;
68    else if (wc >= 0x4e00 && wc < 0xa000)
69      indx = cjk_variants_indx[wc-0x4e00];
70    if (indx >= 0) {
71      for (;; indx++) {
72        ucs4_t buf[2];
73        unsigned short variant = cjk_variants[indx];
74        unsigned short last = variant & 0x8000;
75        variant &= 0x7fff;
76        variant += 0x3000;
77        buf[0] = variant; buf[1] = 0x303e;
78        {
79          state_t backup_state = cd->ostate;
80          unsigned char* backup_outptr = outptr;
81          size_t backup_outleft = outleft;
82          int i, sub_outcount;
83          for (i = 0; i < 2; i++) {
84            if (outleft == 0) {
85              sub_outcount = RET_TOOSMALL;
86              goto variant_failed;
87            }
88            sub_outcount = cd->ofuncs.xxx_wctomb(cd,outptr,buf[i],outleft);
89            if (sub_outcount <= RET_ILUNI)
90              goto variant_failed;
91            if (!(sub_outcount <= outleft)) abort();
92            outptr += sub_outcount; outleft -= sub_outcount;
93          }
94          return outptr-backup_outptr;
95        variant_failed:
96          cd->ostate = backup_state;
97          outptr = backup_outptr;
98          outleft = backup_outleft;
99          if (sub_outcount != RET_ILUNI)
100            return RET_TOOSMALL;
101        }
102        if (last)
103          break;
104      }
105    }
106  }
107  if (wc >= 0x2018 && wc <= 0x201a) {
108    /* Special case for quotation marks 0x2018, 0x2019, 0x201a */
109    ucs4_t substitute =
110      (cd->oflags & HAVE_QUOTATION_MARKS
111       ? (wc == 0x201a ? 0x2018 : wc)
112       : (cd->oflags & HAVE_ACCENTS
113          ? (wc==0x2019 ? 0x00b4 : 0x0060) /* use accents */
114          : 0x0027 /* use apostrophe */
115      )  );
116    int outcount = cd->ofuncs.xxx_wctomb(cd,outptr,substitute,outleft);
117    if (outcount != RET_ILUNI)
118      return outcount;
119  }
120  {
121    /* Use the transliteration table. */
122    int indx = translit_index(wc);
123    if (indx >= 0) {
124      const unsigned int * cp = &translit_data[indx];
125      unsigned int num = *cp++;
126      state_t backup_state = cd->ostate;
127      unsigned char* backup_outptr = outptr;
128      size_t backup_outleft = outleft;
129      unsigned int i;
130      int sub_outcount;
131      for (i = 0; i < num; i++) {
132        if (outleft == 0) {
133          sub_outcount = RET_TOOSMALL;
134          goto translit_failed;
135        }
136        sub_outcount = cd->ofuncs.xxx_wctomb(cd,outptr,cp[i],outleft);
137        if (sub_outcount == RET_ILUNI)
138          /* Recursive transliteration. */
139          sub_outcount = unicode_transliterate(cd,cp[i],outptr,outleft);
140        if (sub_outcount <= RET_ILUNI)
141          goto translit_failed;
142        if (!(sub_outcount <= outleft)) abort();
143        outptr += sub_outcount; outleft -= sub_outcount;
144      }
145      return outptr-backup_outptr;
146    translit_failed:
147      cd->ostate = backup_state;
148      outptr = backup_outptr;
149      outleft = backup_outleft;
150      if (sub_outcount != RET_ILUNI)
151        return RET_TOOSMALL;
152    }
153  }
154  return RET_ILUNI;
155}
156
157#ifndef LIBICONV_PLUG
158
159struct uc_to_mb_fallback_locals {
160  unsigned char* l_outbuf;
161  size_t l_outbytesleft;
162  int l_errno;
163};
164
165static void uc_to_mb_write_replacement (const char *buf, size_t buflen,
166                                        void* callback_arg)
167{
168  struct uc_to_mb_fallback_locals * plocals =
169    (struct uc_to_mb_fallback_locals *) callback_arg;
170  /* Do nothing if already encountered an error in a previous call. */
171  if (plocals->l_errno == 0) {
172    /* Attempt to copy the passed buffer to the output buffer. */
173    if (plocals->l_outbytesleft < buflen)
174      plocals->l_errno = E2BIG;
175    else {
176      memcpy(plocals->l_outbuf, buf, buflen);
177      plocals->l_outbuf += buflen;
178      plocals->l_outbytesleft -= buflen;
179    }
180  }
181}
182
183struct mb_to_uc_fallback_locals {
184  conv_t l_cd;
185  unsigned char* l_outbuf;
186  size_t l_outbytesleft;
187  int l_errno;
188};
189
190static void mb_to_uc_write_replacement (const unsigned int *buf, size_t buflen,
191                                        void* callback_arg)
192{
193  struct mb_to_uc_fallback_locals * plocals =
194    (struct mb_to_uc_fallback_locals *) callback_arg;
195  /* Do nothing if already encountered an error in a previous call. */
196  if (plocals->l_errno == 0) {
197    /* Attempt to convert the passed buffer to the target encoding. */
198    conv_t cd = plocals->l_cd;
199    unsigned char* outptr = plocals->l_outbuf;
200    size_t outleft = plocals->l_outbytesleft;
201    for (; buflen > 0; buf++, buflen--) {
202      ucs4_t wc = *buf;
203      int outcount;
204      if (outleft == 0) {
205        plocals->l_errno = E2BIG;
206        break;
207      }
208      outcount = cd->ofuncs.xxx_wctomb(cd,outptr,wc,outleft);
209      if (outcount != RET_ILUNI)
210        goto outcount_ok;
211      /* Handle Unicode tag characters (range U+E0000..U+E007F). */
212      if ((wc >> 7) == (0xe0000 >> 7))
213        goto outcount_zero;
214      /* Try transliteration. */
215      if (cd->transliterate) {
216        outcount = unicode_transliterate(cd,wc,outptr,outleft);
217        if (outcount != RET_ILUNI)
218          goto outcount_ok;
219      }
220      if (cd->discard_ilseq) {
221        outcount = 0;
222        goto outcount_ok;
223      }
224      #ifndef LIBICONV_PLUG
225      else if (cd->fallbacks.uc_to_mb_fallback != NULL) {
226        struct uc_to_mb_fallback_locals locals;
227        locals.l_outbuf = outptr;
228        locals.l_outbytesleft = outleft;
229        locals.l_errno = 0;
230        cd->fallbacks.uc_to_mb_fallback(wc,
231                                        uc_to_mb_write_replacement,
232                                        &locals,
233                                        cd->fallbacks.data);
234        if (locals.l_errno != 0) {
235          plocals->l_errno = locals.l_errno;
236          break;
237        }
238        outptr = locals.l_outbuf;
239        outleft = locals.l_outbytesleft;
240        outcount = 0;
241        goto outcount_ok;
242      }
243      #endif
244      outcount = cd->ofuncs.xxx_wctomb(cd,outptr,0xFFFD,outleft);
245      if (outcount != RET_ILUNI)
246        goto outcount_ok;
247      plocals->l_errno = EILSEQ;
248      break;
249    outcount_ok:
250      if (outcount < 0) {
251        plocals->l_errno = E2BIG;
252        break;
253      }
254      #ifndef LIBICONV_PLUG
255      if (cd->hooks.uc_hook)
256        (*cd->hooks.uc_hook)(wc, cd->hooks.data);
257      #endif
258      if (!(outcount <= outleft)) abort();
259      outptr += outcount; outleft -= outcount;
260    outcount_zero: ;
261    }
262    plocals->l_outbuf = outptr;
263    plocals->l_outbytesleft = outleft;
264  }
265}
266
267#endif /* !LIBICONV_PLUG */
268
269static size_t unicode_loop_convert (iconv_t icd,
270                                    const char* * inbuf, size_t *inbytesleft,
271                                    char* * outbuf, size_t *outbytesleft)
272{
273  conv_t cd = (conv_t) icd;
274  size_t result = 0;
275  const unsigned char* inptr = (const unsigned char*) *inbuf;
276  size_t inleft = *inbytesleft;
277  unsigned char* outptr = (unsigned char*) *outbuf;
278  size_t outleft = *outbytesleft;
279  while (inleft > 0) {
280    state_t last_istate = cd->istate;
281    ucs4_t wc;
282    int incount;
283    int outcount;
284    incount = cd->ifuncs.xxx_mbtowc(cd,&wc,inptr,inleft);
285    if (incount < 0) {
286      if ((unsigned int)(-1-incount) % 2 == (unsigned int)(-1-RET_ILSEQ) % 2) {
287        /* Case 1: invalid input, possibly after a shift sequence */
288        incount = DECODE_SHIFT_ILSEQ(incount);
289        if (cd->discard_ilseq) {
290          switch (cd->iindex) {
291            case ei_ucs4: case ei_ucs4be: case ei_ucs4le:
292            case ei_utf32: case ei_utf32be: case ei_utf32le:
293            case ei_ucs4internal: case ei_ucs4swapped:
294              incount += 4; break;
295            case ei_ucs2: case ei_ucs2be: case ei_ucs2le:
296            case ei_utf16: case ei_utf16be: case ei_utf16le:
297            case ei_ucs2internal: case ei_ucs2swapped:
298              incount += 2; break;
299            default:
300              incount += 1; break;
301          }
302          goto outcount_zero;
303        }
304        #ifndef LIBICONV_PLUG
305        else if (cd->fallbacks.mb_to_uc_fallback != NULL) {
306          unsigned int incount2;
307          struct mb_to_uc_fallback_locals locals;
308          switch (cd->iindex) {
309            case ei_ucs4: case ei_ucs4be: case ei_ucs4le:
310            case ei_utf32: case ei_utf32be: case ei_utf32le:
311            case ei_ucs4internal: case ei_ucs4swapped:
312              incount2 = 4; break;
313            case ei_ucs2: case ei_ucs2be: case ei_ucs2le:
314            case ei_utf16: case ei_utf16be: case ei_utf16le:
315            case ei_ucs2internal: case ei_ucs2swapped:
316              incount2 = 2; break;
317            default:
318              incount2 = 1; break;
319          }
320          locals.l_cd = cd;
321          locals.l_outbuf = outptr;
322          locals.l_outbytesleft = outleft;
323          locals.l_errno = 0;
324          cd->fallbacks.mb_to_uc_fallback((const char*)inptr+incount, incount2,
325                                          mb_to_uc_write_replacement,
326                                          &locals,
327                                          cd->fallbacks.data);
328          if (locals.l_errno != 0) {
329            inptr += incount; inleft -= incount;
330            errno = locals.l_errno;
331            result = -1;
332            break;
333          }
334          incount += incount2;
335          outptr = locals.l_outbuf;
336          outleft = locals.l_outbytesleft;
337          result += 1;
338          goto outcount_zero;
339        }
340        #endif
341        inptr += incount; inleft -= incount;
342        errno = EILSEQ;
343        result = -1;
344        break;
345      }
346      if (incount == RET_TOOFEW(0)) {
347        /* Case 2: not enough bytes available to detect anything */
348        errno = EINVAL;
349        result = -1;
350        break;
351      }
352      /* Case 3: k bytes read, but only a shift sequence */
353      incount = DECODE_TOOFEW(incount);
354    } else {
355      /* Case 4: k bytes read, making up a wide character */
356      if (outleft == 0) {
357        cd->istate = last_istate;
358        errno = E2BIG;
359        result = -1;
360        break;
361      }
362      outcount = cd->ofuncs.xxx_wctomb(cd,outptr,wc,outleft);
363      if (outcount != RET_ILUNI)
364        goto outcount_ok;
365      /* Handle Unicode tag characters (range U+E0000..U+E007F). */
366      if ((wc >> 7) == (0xe0000 >> 7))
367        goto outcount_zero;
368      /* Try transliteration. */
369      result++;
370      if (cd->transliterate) {
371        outcount = unicode_transliterate(cd,wc,outptr,outleft);
372        if (outcount != RET_ILUNI)
373          goto outcount_ok;
374      }
375      if (cd->discard_ilseq) {
376        outcount = 0;
377        goto outcount_ok;
378      }
379      #ifndef LIBICONV_PLUG
380      else if (cd->fallbacks.uc_to_mb_fallback != NULL) {
381        struct uc_to_mb_fallback_locals locals;
382        locals.l_outbuf = outptr;
383        locals.l_outbytesleft = outleft;
384        locals.l_errno = 0;
385        cd->fallbacks.uc_to_mb_fallback(wc,
386                                        uc_to_mb_write_replacement,
387                                        &locals,
388                                        cd->fallbacks.data);
389        if (locals.l_errno != 0) {
390          cd->istate = last_istate;
391          errno = locals.l_errno;
392          return -1;
393        }
394        outptr = locals.l_outbuf;
395        outleft = locals.l_outbytesleft;
396        outcount = 0;
397        goto outcount_ok;
398      }
399      #endif
400      outcount = cd->ofuncs.xxx_wctomb(cd,outptr,0xFFFD,outleft);
401      if (outcount != RET_ILUNI)
402        goto outcount_ok;
403      cd->istate = last_istate;
404      errno = EILSEQ;
405      result = -1;
406      break;
407    outcount_ok:
408      if (outcount < 0) {
409        cd->istate = last_istate;
410        errno = E2BIG;
411        result = -1;
412        break;
413      }
414      #ifndef LIBICONV_PLUG
415      if (cd->hooks.uc_hook)
416        (*cd->hooks.uc_hook)(wc, cd->hooks.data);
417      #endif
418      if (!(outcount <= outleft)) abort();
419      outptr += outcount; outleft -= outcount;
420    }
421  outcount_zero:
422    if (!(incount <= inleft)) abort();
423    inptr += incount; inleft -= incount;
424  }
425  *inbuf = (const char*) inptr;
426  *inbytesleft = inleft;
427  *outbuf = (char*) outptr;
428  *outbytesleft = outleft;
429  return result;
430}
431
432static size_t unicode_loop_reset (iconv_t icd,
433                                  char* * outbuf, size_t *outbytesleft)
434{
435  conv_t cd = (conv_t) icd;
436  if (outbuf == NULL || *outbuf == NULL) {
437    /* Reset the states. */
438    memset(&cd->istate,'\0',sizeof(state_t));
439    memset(&cd->ostate,'\0',sizeof(state_t));
440    return 0;
441  } else {
442    size_t result = 0;
443    if (cd->ifuncs.xxx_flushwc) {
444      state_t last_istate = cd->istate;
445      ucs4_t wc;
446      if (cd->ifuncs.xxx_flushwc(cd, &wc)) {
447        unsigned char* outptr = (unsigned char*) *outbuf;
448        size_t outleft = *outbytesleft;
449        int outcount = cd->ofuncs.xxx_wctomb(cd,outptr,wc,outleft);
450        if (outcount != RET_ILUNI)
451          goto outcount_ok;
452        /* Handle Unicode tag characters (range U+E0000..U+E007F). */
453        if ((wc >> 7) == (0xe0000 >> 7))
454          goto outcount_zero;
455        /* Try transliteration. */
456        result++;
457        if (cd->transliterate) {
458          outcount = unicode_transliterate(cd,wc,outptr,outleft);
459          if (outcount != RET_ILUNI)
460            goto outcount_ok;
461        }
462        if (cd->discard_ilseq) {
463          outcount = 0;
464          goto outcount_ok;
465        }
466        #ifndef LIBICONV_PLUG
467        else if (cd->fallbacks.uc_to_mb_fallback != NULL) {
468          struct uc_to_mb_fallback_locals locals;
469          locals.l_outbuf = outptr;
470          locals.l_outbytesleft = outleft;
471          locals.l_errno = 0;
472          cd->fallbacks.uc_to_mb_fallback(wc,
473                                          uc_to_mb_write_replacement,
474                                          &locals,
475                                          cd->fallbacks.data);
476          if (locals.l_errno != 0) {
477            cd->istate = last_istate;
478            errno = locals.l_errno;
479            return -1;
480          }
481          outptr = locals.l_outbuf;
482          outleft = locals.l_outbytesleft;
483          outcount = 0;
484          goto outcount_ok;
485        }
486        #endif
487        outcount = cd->ofuncs.xxx_wctomb(cd,outptr,0xFFFD,outleft);
488        if (outcount != RET_ILUNI)
489          goto outcount_ok;
490        cd->istate = last_istate;
491        errno = EILSEQ;
492        return -1;
493      outcount_ok:
494        if (outcount < 0) {
495          cd->istate = last_istate;
496          errno = E2BIG;
497          return -1;
498        }
499        #ifndef LIBICONV_PLUG
500        if (cd->hooks.uc_hook)
501          (*cd->hooks.uc_hook)(wc, cd->hooks.data);
502        #endif
503        if (!(outcount <= outleft)) abort();
504        outptr += outcount;
505        outleft -= outcount;
506      outcount_zero:
507        *outbuf = (char*) outptr;
508        *outbytesleft = outleft;
509      }
510    }
511    if (cd->ofuncs.xxx_reset) {
512      unsigned char* outptr = (unsigned char*) *outbuf;
513      size_t outleft = *outbytesleft;
514      int outcount = cd->ofuncs.xxx_reset(cd,outptr,outleft);
515      if (outcount < 0) {
516        errno = E2BIG;
517        return -1;
518      }
519      if (!(outcount <= outleft)) abort();
520      *outbuf = (char*) (outptr + outcount);
521      *outbytesleft = outleft - outcount;
522    }
523    memset(&cd->istate,'\0',sizeof(state_t));
524    memset(&cd->ostate,'\0',sizeof(state_t));
525    return result;
526  }
527}
528