1/*
2 * Copyright (C) 1999-2003, 2005-2006 Free Software Foundation, Inc.
3 * This file is part of the GNU LIBICONV Library.
4 *
5 * The GNU LIBICONV Library is free software; you can redistribute it
6 * and/or modify it under the terms of the GNU Library General Public
7 * License as published by the Free Software Foundation; either version 2
8 * of the License, or (at your option) any later version.
9 *
10 * The GNU LIBICONV Library is distributed in the hope that it will be
11 * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13 * Library General Public License for more details.
14 *
15 * You should have received a copy of the GNU Library General Public
16 * License along with the GNU LIBICONV Library; see the file COPYING.LIB.
17 * If not, write to the Free Software Foundation, Inc., 51 Franklin Street,
18 * Fifth Floor, Boston, MA 02110-1301, USA.
19 */
20
21/* This file defines the conversion loop via Unicode as a pivot encoding. */
22
23/* Attempt to transliterate wc. Return code as in xxx_wctomb. */
24static int unicode_transliterate (conv_t cd, ucs4_t wc,
25                                  unsigned char* outptr, size_t outleft)
26{
27/*
28  if (cd->oflags & HAVE_HANGUL_JAMO) {
29    /-* Decompose Hangul into Jamo. Use double-width Jamo (contained
30       in all Korean encodings and ISO-2022-JP-2), not half-width Jamo
31       (contained in Unicode only). *-/
32    ucs4_t buf[3];
33    int ret = johab_hangul_decompose(cd,buf,wc);
34    if (ret != RET_ILUNI) {
35      /-* we know 1 <= ret <= 3 *-/
36      state_t backup_state = cd->ostate;
37      unsigned char* backup_outptr = outptr;
38      size_t backup_outleft = outleft;
39      int i, sub_outcount;
40      for (i = 0; i < ret; i++) {
41        if (outleft == 0) {
42          sub_outcount = RET_TOOSMALL;
43          goto johab_hangul_failed;
44        }
45        sub_outcount = cd->ofuncs.xxx_wctomb(cd,outptr,buf[i],outleft);
46        if (sub_outcount <= RET_ILUNI)
47          goto johab_hangul_failed;
48        if (!(sub_outcount <= outleft)) abort();
49        outptr += sub_outcount; outleft -= sub_outcount;
50      }
51      return outptr-backup_outptr;
52    johab_hangul_failed:
53      cd->ostate = backup_state;
54      outptr = backup_outptr;
55      outleft = backup_outleft;
56      if (sub_outcount != RET_ILUNI)
57        return RET_TOOSMALL;
58    }
59  }
60  {
61    /-* Try to use a variant, but postfix it with
62       U+303E IDEOGRAPHIC VARIATION INDICATOR
63       (cf. Ken Lunde's "CJKV information processing", p. 188). *-/
64    int indx = -1;
65    if (wc == 0x3006)
66      indx = 0;
67    else if (wc == 0x30f6)
68      indx = 1;
69    else if (wc >= 0x4e00 && wc < 0xa000)
70      indx = cjk_variants_indx[wc-0x4e00];
71    if (indx >= 0) {
72      for (;; indx++) {
73        ucs4_t buf[2];
74        unsigned short variant = cjk_variants[indx];
75        unsigned short last = variant & 0x8000;
76        variant &= 0x7fff;
77        variant += 0x3000;
78        buf[0] = variant; buf[1] = 0x303e;
79        {
80          state_t backup_state = cd->ostate;
81          unsigned char* backup_outptr = outptr;
82          size_t backup_outleft = outleft;
83          int i, sub_outcount;
84          for (i = 0; i < 2; i++) {
85            if (outleft == 0) {
86              sub_outcount = RET_TOOSMALL;
87              goto variant_failed;
88            }
89            sub_outcount = cd->ofuncs.xxx_wctomb(cd,outptr,buf[i],outleft);
90            if (sub_outcount <= RET_ILUNI)
91              goto variant_failed;
92            if (!(sub_outcount <= outleft)) abort();
93            outptr += sub_outcount; outleft -= sub_outcount;
94          }
95          return outptr-backup_outptr;
96        variant_failed:
97          cd->ostate = backup_state;
98          outptr = backup_outptr;
99          outleft = backup_outleft;
100          if (sub_outcount != RET_ILUNI)
101            return RET_TOOSMALL;
102        }
103        if (last)
104          break;
105      }
106    }
107  }
108  if (wc >= 0x2018 && wc <= 0x201a) {
109    /-* Special case for quotation marks 0x2018, 0x2019, 0x201a *-/
110    ucs4_t substitute =
111      (cd->oflags & HAVE_QUOTATION_MARKS
112       ? (wc == 0x201a ? 0x2018 : wc)
113       : (cd->oflags & HAVE_ACCENTS
114          ? (wc==0x2019 ? 0x00b4 : 0x0060) /-* use accents *-/
115          : 0x0027 /-* use apostrophe *-/
116      )  );
117    int outcount = cd->ofuncs.xxx_wctomb(cd,outptr,substitute,outleft);
118    if (outcount != RET_ILUNI)
119      return outcount;
120  }
121  {
122    /-* Use the transliteration table. *-/
123    int indx = translit_index(wc);
124    if (indx >= 0) {
125      const unsigned int * cp = &translit_data[indx];
126      unsigned int num = *cp++;
127      state_t backup_state = cd->ostate;
128      unsigned char* backup_outptr = outptr;
129      size_t backup_outleft = outleft;
130      unsigned int i;
131      int sub_outcount;
132      for (i = 0; i < num; i++) {
133        if (outleft == 0) {
134          sub_outcount = RET_TOOSMALL;
135          goto translit_failed;
136        }
137        sub_outcount = cd->ofuncs.xxx_wctomb(cd,outptr,cp[i],outleft);
138        if (sub_outcount == RET_ILUNI)
139          /-* Recursive transliteration. *-/
140          sub_outcount = unicode_transliterate(cd,cp[i],outptr,outleft);
141        if (sub_outcount <= RET_ILUNI)
142          goto translit_failed;
143        if (!(sub_outcount <= outleft)) abort();
144        outptr += sub_outcount; outleft -= sub_outcount;
145      }
146      return outptr-backup_outptr;
147    translit_failed:
148      cd->ostate = backup_state;
149      outptr = backup_outptr;
150      outleft = backup_outleft;
151      if (sub_outcount != RET_ILUNI)
152        return RET_TOOSMALL;
153    }
154  }
155*/
156  return RET_ILUNI;
157}
158
159#ifndef LIBICONV_PLUG
160
161struct uc_to_mb_fallback_locals {
162  unsigned char* l_outbuf;
163  size_t l_outbytesleft;
164  int l_errno;
165};
166
167static void uc_to_mb_write_replacement (const char *buf, size_t buflen,
168                                        void* callback_arg)
169{
170  struct uc_to_mb_fallback_locals * plocals =
171    (struct uc_to_mb_fallback_locals *) callback_arg;
172  /* Do nothing if already encountered an error in a previous call. */
173  if (plocals->l_errno == 0) {
174    /* Attempt to copy the passed buffer to the output buffer. */
175    if (plocals->l_outbytesleft < buflen)
176      plocals->l_errno = E2BIG;
177    else {
178      memcpy(plocals->l_outbuf, buf, buflen);
179      plocals->l_outbuf += buflen;
180      plocals->l_outbytesleft -= buflen;
181    }
182  }
183}
184
185struct mb_to_uc_fallback_locals {
186  conv_t l_cd;
187  unsigned char* l_outbuf;
188  size_t l_outbytesleft;
189  int l_errno;
190};
191
192static void mb_to_uc_write_replacement (const unsigned int *buf, size_t buflen,
193                                        void* callback_arg)
194{
195  struct mb_to_uc_fallback_locals * plocals =
196    (struct mb_to_uc_fallback_locals *) callback_arg;
197  /* Do nothing if already encountered an error in a previous call. */
198  if (plocals->l_errno == 0) {
199    /* Attempt to convert the passed buffer to the target encoding. */
200    conv_t cd = plocals->l_cd;
201    unsigned char* outptr = plocals->l_outbuf;
202    size_t outleft = plocals->l_outbytesleft;
203    for (; buflen > 0; buf++, buflen--) {
204      ucs4_t wc = *buf;
205      int outcount;
206      if (outleft == 0) {
207        plocals->l_errno = E2BIG;
208        break;
209      }
210      outcount = cd->ofuncs.xxx_wctomb(cd,outptr,wc,outleft);
211      if (outcount != RET_ILUNI)
212        goto outcount_ok;
213      /* Handle Unicode tag characters (range U+E0000..U+E007F). */
214      if ((wc >> 7) == (0xe0000 >> 7))
215        goto outcount_zero;
216      /* Try transliteration. */
217      if (cd->transliterate) {
218        outcount = unicode_transliterate(cd,wc,outptr,outleft);
219        if (outcount != RET_ILUNI)
220          goto outcount_ok;
221      }
222      if (cd->discard_ilseq) {
223        outcount = 0;
224        goto outcount_ok;
225      }
226      #ifndef LIBICONV_PLUG
227      else if (cd->fallbacks.uc_to_mb_fallback != NULL) {
228        struct uc_to_mb_fallback_locals locals;
229        locals.l_outbuf = outptr;
230        locals.l_outbytesleft = outleft;
231        locals.l_errno = 0;
232        cd->fallbacks.uc_to_mb_fallback(wc,
233                                        uc_to_mb_write_replacement,
234                                        &locals,
235                                        cd->fallbacks.data);
236        if (locals.l_errno != 0) {
237          plocals->l_errno = locals.l_errno;
238          break;
239        }
240        outptr = locals.l_outbuf;
241        outleft = locals.l_outbytesleft;
242        outcount = 0;
243        goto outcount_ok;
244      }
245      #endif
246      outcount = cd->ofuncs.xxx_wctomb(cd,outptr,0xFFFD,outleft);
247      if (outcount != RET_ILUNI)
248        goto outcount_ok;
249      plocals->l_errno = EILSEQ;
250      break;
251    outcount_ok:
252      if (outcount < 0) {
253        plocals->l_errno = E2BIG;
254        break;
255      }
256      #ifndef LIBICONV_PLUG
257      if (cd->hooks.uc_hook)
258        (*cd->hooks.uc_hook)(wc, cd->hooks.data);
259      #endif
260      if (!(outcount <= outleft)) abort();
261      outptr += outcount; outleft -= outcount;
262    outcount_zero: ;
263    }
264    plocals->l_outbuf = outptr;
265    plocals->l_outbytesleft = outleft;
266  }
267}
268
269#endif /* !LIBICONV_PLUG */
270
271static size_t unicode_loop_convert (iconv_t icd,
272                                    const char* * inbuf, size_t *inbytesleft,
273                                    char* * outbuf, size_t *outbytesleft)
274{
275  conv_t cd = (conv_t) icd;
276  size_t result = 0;
277  const unsigned char* inptr = (const unsigned char*) *inbuf;
278  size_t inleft = *inbytesleft;
279  unsigned char* outptr = (unsigned char*) *outbuf;
280  size_t outleft = *outbytesleft;
281  while (inleft > 0) {
282    state_t last_istate = cd->istate;
283    ucs4_t wc;
284    int incount;
285    int outcount;
286    incount = cd->ifuncs.xxx_mbtowc(cd,&wc,inptr,inleft);
287    if (incount < 0) {
288      if (incount == RET_ILSEQ) {
289        /* Case 1: invalid input */
290        if (cd->discard_ilseq) {
291          switch (cd->iindex) {
292            case ei_ucs4: case ei_ucs4be: case ei_ucs4le:
293            case ei_utf32: case ei_utf32be: case ei_utf32le:
294            case ei_ucs4internal: case ei_ucs4swapped:
295              incount = 4; break;
296            case ei_ucs2: case ei_ucs2be: case ei_ucs2le:
297            case ei_utf16: case ei_utf16be: case ei_utf16le:
298            case ei_ucs2internal: case ei_ucs2swapped:
299              incount = 2; break;
300            default:
301              incount = 1; break;
302          }
303          goto outcount_zero;
304        }
305        #ifndef LIBICONV_PLUG
306        else if (cd->fallbacks.mb_to_uc_fallback != NULL) {
307          struct mb_to_uc_fallback_locals locals;
308          switch (cd->iindex) {
309            case ei_ucs4: case ei_ucs4be: case ei_ucs4le:
310            case ei_utf32: case ei_utf32be: case ei_utf32le:
311            case ei_ucs4internal: case ei_ucs4swapped:
312              incount = 4; break;
313            case ei_ucs2: case ei_ucs2be: case ei_ucs2le:
314            case ei_utf16: case ei_utf16be: case ei_utf16le:
315            case ei_ucs2internal: case ei_ucs2swapped:
316              incount = 2; break;
317            default:
318              incount = 1; break;
319          }
320          locals.l_cd = cd;
321          locals.l_outbuf = outptr;
322          locals.l_outbytesleft = outleft;
323          locals.l_errno = 0;
324          cd->fallbacks.mb_to_uc_fallback(inptr, incount,
325                                          mb_to_uc_write_replacement,
326                                          &locals,
327                                          cd->fallbacks.data);
328          if (locals.l_errno != 0) {
329            errno = locals.l_errno;
330            result = -1;
331            break;
332          }
333          outptr = locals.l_outbuf;
334          outleft = locals.l_outbytesleft;
335          result += 1;
336          goto outcount_zero;
337        }
338        #endif
339        errno = EILSEQ;
340        result = -1;
341        break;
342      }
343      if (incount == RET_TOOFEW(0)) {
344        /* Case 2: not enough bytes available to detect anything */
345        errno = EINVAL;
346        result = -1;
347        break;
348      }
349      /* Case 3: k bytes read, but only a shift sequence */
350      incount = -2-incount;
351    } else {
352      /* Case 4: k bytes read, making up a wide character */
353      if (outleft == 0) {
354        cd->istate = last_istate;
355        errno = E2BIG;
356        result = -1;
357        break;
358      }
359      outcount = cd->ofuncs.xxx_wctomb(cd,outptr,wc,outleft);
360      if (outcount != RET_ILUNI)
361        goto outcount_ok;
362      /* Handle Unicode tag characters (range U+E0000..U+E007F). */
363      if ((wc >> 7) == (0xe0000 >> 7))
364        goto outcount_zero;
365      /* Try transliteration. */
366      result++;
367      if (cd->transliterate) {
368        outcount = unicode_transliterate(cd,wc,outptr,outleft);
369        if (outcount != RET_ILUNI)
370          goto outcount_ok;
371      }
372      if (cd->discard_ilseq) {
373        outcount = 0;
374        goto outcount_ok;
375      }
376      #ifndef LIBICONV_PLUG
377      else if (cd->fallbacks.uc_to_mb_fallback != NULL) {
378        struct uc_to_mb_fallback_locals locals;
379        locals.l_outbuf = outptr;
380        locals.l_outbytesleft = outleft;
381        locals.l_errno = 0;
382        cd->fallbacks.uc_to_mb_fallback(wc,
383                                        uc_to_mb_write_replacement,
384                                        &locals,
385                                        cd->fallbacks.data);
386        if (locals.l_errno != 0) {
387          cd->istate = last_istate;
388          errno = locals.l_errno;
389          return -1;
390        }
391        outptr = locals.l_outbuf;
392        outleft = locals.l_outbytesleft;
393        outcount = 0;
394        goto outcount_ok;
395      }
396      #endif
397      outcount = cd->ofuncs.xxx_wctomb(cd,outptr,0xFFFD,outleft);
398      if (outcount != RET_ILUNI)
399        goto outcount_ok;
400      cd->istate = last_istate;
401      errno = EILSEQ;
402      result = -1;
403      break;
404    outcount_ok:
405      if (outcount < 0) {
406        cd->istate = last_istate;
407        errno = E2BIG;
408        result = -1;
409        break;
410      }
411      #ifndef LIBICONV_PLUG
412      if (cd->hooks.uc_hook)
413        (*cd->hooks.uc_hook)(wc, cd->hooks.data);
414      #endif
415      if (!(outcount <= outleft)) abort();
416      outptr += outcount; outleft -= outcount;
417    }
418  outcount_zero:
419    if (!(incount <= inleft)) abort();
420    inptr += incount; inleft -= incount;
421  }
422  *inbuf = (const char*) inptr;
423  *inbytesleft = inleft;
424  *outbuf = (char*) outptr;
425  *outbytesleft = outleft;
426  return result;
427}
428
429static size_t unicode_loop_reset (iconv_t icd,
430                                  char* * outbuf, size_t *outbytesleft)
431{
432  conv_t cd = (conv_t) icd;
433  if (outbuf == NULL || *outbuf == NULL) {
434    /* Reset the states. */
435    memset(&cd->istate,'\0',sizeof(state_t));
436    memset(&cd->ostate,'\0',sizeof(state_t));
437    return 0;
438  } else {
439    size_t result = 0;
440    if (cd->ifuncs.xxx_flushwc) {
441      state_t last_istate = cd->istate;
442      ucs4_t wc;
443      if (cd->ifuncs.xxx_flushwc(cd, &wc)) {
444        unsigned char* outptr = (unsigned char*) *outbuf;
445        size_t outleft = *outbytesleft;
446        int outcount = cd->ofuncs.xxx_wctomb(cd,outptr,wc,outleft);
447        if (outcount != RET_ILUNI)
448          goto outcount_ok;
449        /* Handle Unicode tag characters (range U+E0000..U+E007F). */
450        if ((wc >> 7) == (0xe0000 >> 7))
451          goto outcount_zero;
452        /* Try transliteration. */
453        result++;
454        if (cd->transliterate) {
455          outcount = unicode_transliterate(cd,wc,outptr,outleft);
456          if (outcount != RET_ILUNI)
457            goto outcount_ok;
458        }
459        if (cd->discard_ilseq) {
460          outcount = 0;
461          goto outcount_ok;
462        }
463        #ifndef LIBICONV_PLUG
464        else if (cd->fallbacks.uc_to_mb_fallback != NULL) {
465          struct uc_to_mb_fallback_locals locals;
466          locals.l_outbuf = outptr;
467          locals.l_outbytesleft = outleft;
468          locals.l_errno = 0;
469          cd->fallbacks.uc_to_mb_fallback(wc,
470                                          uc_to_mb_write_replacement,
471                                          &locals,
472                                          cd->fallbacks.data);
473          if (locals.l_errno != 0) {
474            cd->istate = last_istate;
475            errno = locals.l_errno;
476            return -1;
477          }
478          outptr = locals.l_outbuf;
479          outleft = locals.l_outbytesleft;
480          outcount = 0;
481          goto outcount_ok;
482        }
483        #endif
484        outcount = cd->ofuncs.xxx_wctomb(cd,outptr,0xFFFD,outleft);
485        if (outcount != RET_ILUNI)
486          goto outcount_ok;
487        cd->istate = last_istate;
488        errno = EILSEQ;
489        return -1;
490      outcount_ok:
491        if (outcount < 0) {
492          cd->istate = last_istate;
493          errno = E2BIG;
494          return -1;
495        }
496        #ifndef LIBICONV_PLUG
497        if (cd->hooks.uc_hook)
498          (*cd->hooks.uc_hook)(wc, cd->hooks.data);
499        #endif
500        if (!(outcount <= outleft)) abort();
501        outptr += outcount;
502        outleft -= outcount;
503      outcount_zero:
504        *outbuf = (char*) outptr;
505        *outbytesleft = outleft;
506      }
507    }
508    if (cd->ofuncs.xxx_reset) {
509      unsigned char* outptr = (unsigned char*) *outbuf;
510      size_t outleft = *outbytesleft;
511      int outcount = cd->ofuncs.xxx_reset(cd,outptr,outleft);
512      if (outcount < 0) {
513        errno = E2BIG;
514        return -1;
515      }
516      if (!(outcount <= outleft)) abort();
517      *outbuf = (char*) (outptr + outcount);
518      *outbytesleft = outleft - outcount;
519    }
520    memset(&cd->istate,'\0',sizeof(state_t));
521    memset(&cd->ostate,'\0',sizeof(state_t));
522    return result;
523  }
524}
525