1/*
2 * Copyright (C) 1999-2002 Free Software Foundation, Inc.
3 * This file is part of the GNU LIBICONV Library.
4 *
5 * The GNU LIBICONV Library is free software; you can redistribute it
6 * and/or modify it under the terms of the GNU Library General Public
7 * License as published by the Free Software Foundation; either version 2
8 * of the License, or (at your option) any later version.
9 *
10 * The GNU LIBICONV Library is distributed in the hope that it will be
11 * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13 * Library General Public License for more details.
14 *
15 * You should have received a copy of the GNU Library General Public
16 * License along with the GNU LIBICONV Library; see the file COPYING.LIB.
17 * If not, write to the Free Software Foundation, Inc., 59 Temple Place -
18 * Suite 330, Boston, MA 02111-1307, USA.
19 */
20
21/* This file defines the conversion loop via Unicode as a pivot encoding. */
22
23/* Attempt to transliterate wc. Return code as in xxx_wctomb. */
24#include <errno.h>
25static int unicode_transliterate (conv_t cd, ucs4_t wc,
26                                  unsigned char* outptr, size_t outleft)
27{
28  if (cd->oflags & HAVE_HANGUL_JAMO) {
29    /* Decompose Hangul into Jamo. Use double-width Jamo (contained
30       in all Korean encodings and ISO-2022-JP-2), not half-width Jamo
31       (contained in Unicode only). */
32    ucs4_t buf[3];
33    int ret = johab_hangul_decompose(cd,buf,wc);
34    if (ret != RET_ILUNI) {
35      /* we know 1 <= ret <= 3 */
36      state_t backup_state = cd->ostate;
37      unsigned char* backup_outptr = outptr;
38      size_t backup_outleft = outleft;
39      int i, sub_outcount;
40      for (i = 0; i < ret; i++) {
41        if (outleft == 0) {
42          sub_outcount = RET_TOOSMALL;
43          goto johab_hangul_failed;
44        }
45        sub_outcount = cd->ofuncs.xxx_wctomb(cd,outptr,buf[i],outleft);
46        if (sub_outcount <= RET_ILUNI)
47          goto johab_hangul_failed;
48        if (!(sub_outcount <= outleft)) abort();
49        outptr += sub_outcount; outleft -= sub_outcount;
50      }
51      return outptr-backup_outptr;
52    johab_hangul_failed:
53      cd->ostate = backup_state;
54      outptr = backup_outptr;
55      outleft = backup_outleft;
56      if (sub_outcount < 0)
57        return RET_TOOSMALL;
58    }
59  }
60  {
61    /* Try to use a variant, but postfix it with
62       U+303E IDEOGRAPHIC VARIATION INDICATOR
63       (cf. Ken Lunde's "CJKV information processing", p. 188). */
64    int indx = -1;
65    if (wc == 0x3006)
66      indx = 0;
67    else if (wc == 0x30f6)
68      indx = 1;
69    else if (wc >= 0x4e00 && wc < 0xa000)
70      indx = cjk_variants_indx[wc-0x4e00];
71    if (indx >= 0) {
72      for (;; indx++) {
73        ucs4_t buf[2];
74        unsigned short variant = cjk_variants[indx];
75        unsigned short last = variant & 0x8000;
76        variant &= 0x7fff;
77        variant += 0x3000;
78        buf[0] = variant; buf[1] = 0x303e;
79        {
80          state_t backup_state = cd->ostate;
81          unsigned char* backup_outptr = outptr;
82          size_t backup_outleft = outleft;
83          int i, sub_outcount;
84          for (i = 0; i < 2; i++) {
85            if (outleft == 0) {
86              sub_outcount = RET_TOOSMALL;
87              goto variant_failed;
88            }
89            sub_outcount = cd->ofuncs.xxx_wctomb(cd,outptr,buf[i],outleft);
90            if (sub_outcount <= RET_ILUNI)
91              goto variant_failed;
92            if (!(sub_outcount <= outleft)) abort();
93            outptr += sub_outcount; outleft -= sub_outcount;
94          }
95          return outptr-backup_outptr;
96        variant_failed:
97          cd->ostate = backup_state;
98          outptr = backup_outptr;
99          outleft = backup_outleft;
100          if (sub_outcount < 0)
101            return RET_TOOSMALL;
102        }
103        if (last)
104          break;
105      }
106    }
107  }
108  if (wc >= 0x2018 && wc <= 0x201a) {
109    /* Special case for quotation marks 0x2018, 0x2019, 0x201a */
110    ucs4_t substitute =
111      (cd->oflags & HAVE_QUOTATION_MARKS
112       ? (wc == 0x201a ? 0x2018 : wc)
113       : (cd->oflags & HAVE_ACCENTS
114          ? (wc==0x2019 ? 0x00b4 : 0x0060) /* use accents */
115          : 0x0027 /* use apostrophe */
116      )  );
117    int outcount = cd->ofuncs.xxx_wctomb(cd,outptr,substitute,outleft);
118    if (outcount != RET_ILUNI)
119      return outcount;
120  }
121  {
122    /* Use the transliteration table. */
123    int indx = translit_index(wc);
124    if (indx >= 0) {
125      const unsigned short * cp = &translit_data[indx];
126      unsigned int num = *cp++;
127      state_t backup_state = cd->ostate;
128      unsigned char* backup_outptr = outptr;
129      size_t backup_outleft = outleft;
130      unsigned int i;
131      int sub_outcount;
132      for (i = 0; i < num; i++) {
133        if (outleft == 0) {
134          sub_outcount = RET_TOOSMALL;
135          goto translit_failed;
136        }
137        sub_outcount = cd->ofuncs.xxx_wctomb(cd,outptr,cp[i],outleft);
138        if (sub_outcount <= RET_ILUNI)
139          goto translit_failed;
140        if (!(sub_outcount <= outleft)) abort();
141        outptr += sub_outcount; outleft -= sub_outcount;
142      }
143      return outptr-backup_outptr;
144    translit_failed:
145      cd->ostate = backup_state;
146      outptr = backup_outptr;
147      outleft = backup_outleft;
148      if (sub_outcount != RET_ILUNI)
149        return RET_TOOSMALL;
150    }
151  }
152  return RET_ILUNI;
153}
154
155static size_t unicode_loop_convert (iconv_t icd,
156                                    const char* * inbuf, size_t *inbytesleft,
157                                    char* * outbuf, size_t *outbytesleft)
158{
159  conv_t cd = (conv_t) icd;
160  size_t result = 0;
161  const unsigned char* inptr = (const unsigned char*) *inbuf;
162  size_t inleft = *inbytesleft;
163  unsigned char* outptr = (unsigned char*) *outbuf;
164  size_t outleft = *outbytesleft;
165  while (inleft > 0) {
166    state_t last_istate = cd->istate;
167    ucs4_t wc;
168    int incount;
169    int outcount;
170    incount = cd->ifuncs.xxx_mbtowc(cd,&wc,inptr,inleft);
171    if (incount < 0) {
172      if (incount == RET_ILSEQ) {
173        /* Case 1: invalid input */
174        if (cd->discard_ilseq) {
175          switch (cd->iindex) {
176            case ei_ucs4: case ei_ucs4be: case ei_ucs4le:
177            case ei_utf32: case ei_utf32be: case ei_utf32le:
178            case ei_ucs4internal: case ei_ucs4swapped:
179              incount = 4; break;
180            case ei_ucs2: case ei_ucs2be: case ei_ucs2le:
181            case ei_utf16: case ei_utf16be: case ei_utf16le:
182            case ei_ucs2internal: case ei_ucs2swapped:
183              incount = 2; break;
184            default:
185              incount = 1; break;
186          }
187          goto outcount_zero;
188        }
189        errno = EILSEQ;
190        result = -1;
191        break;
192      }
193      if (incount == RET_TOOFEW(0)) {
194        /* Case 2: not enough bytes available to detect anything */
195        errno = EINVAL;
196        result = -1;
197        break;
198      }
199      /* Case 3: k bytes read, but only a shift sequence */
200      incount = -2-incount;
201    } else {
202      /* Case 4: k bytes read, making up a wide character */
203      if (outleft == 0) {
204        cd->istate = last_istate;
205        errno = E2BIG;
206        result = -1;
207        break;
208      }
209      outcount = cd->ofuncs.xxx_wctomb(cd,outptr,wc,outleft);
210      if (outcount != RET_ILUNI)
211        goto outcount_ok;
212      /* Handle Unicode tag characters (range U+E0000..U+E007F). */
213      if ((wc >> 7) == (0xe0000 >> 7))
214        goto outcount_zero;
215      /* Try transliteration. */
216      result++;
217      if (cd->transliterate) {
218        outcount = unicode_transliterate(cd,wc,outptr,outleft);
219        if (outcount != RET_ILUNI)
220          goto outcount_ok;
221      }
222      if (cd->discard_ilseq)
223        goto outcount_zero;
224      outcount = cd->ofuncs.xxx_wctomb(cd,outptr,0xFFFD,outleft);
225      if (outcount != RET_ILUNI)
226        goto outcount_ok;
227      cd->istate = last_istate;
228      errno = EILSEQ;
229      result = -1;
230      break;
231    outcount_ok:
232      if (outcount < 0) {
233        cd->istate = last_istate;
234        errno = E2BIG;
235        result = -1;
236        break;
237      }
238      if (!(outcount <= outleft)) abort();
239      outptr += outcount; outleft -= outcount;
240    }
241  outcount_zero:
242    if (!(incount <= inleft)) abort();
243    inptr += incount; inleft -= incount;
244  }
245  *inbuf = (const char*) inptr;
246  *inbytesleft = inleft;
247  *outbuf = (char*) outptr;
248  *outbytesleft = outleft;
249  return result;
250}
251
252static size_t unicode_loop_reset (iconv_t icd,
253                                  char* * outbuf, size_t *outbytesleft)
254{
255  conv_t cd = (conv_t) icd;
256  if (outbuf == NULL || *outbuf == NULL) {
257    /* Reset the states. */
258    memset(&cd->istate,'\0',sizeof(state_t));
259    memset(&cd->ostate,'\0',sizeof(state_t));
260    return 0;
261  } else {
262    size_t result = 0;
263    if (cd->ifuncs.xxx_flushwc) {
264      state_t last_istate = cd->istate;
265      ucs4_t wc;
266      if (cd->ifuncs.xxx_flushwc(cd, &wc)) {
267        unsigned char* outptr = (unsigned char*) *outbuf;
268        size_t outleft = *outbytesleft;
269        int outcount = cd->ofuncs.xxx_wctomb(cd,outptr,wc,outleft);
270        if (outcount != RET_ILUNI)
271          goto outcount_ok;
272        /* Handle Unicode tag characters (range U+E0000..U+E007F). */
273        if ((wc >> 7) == (0xe0000 >> 7))
274          goto outcount_zero;
275        /* Try transliteration. */
276        result++;
277        if (cd->transliterate) {
278          outcount = unicode_transliterate(cd,wc,outptr,outleft);
279          if (outcount != RET_ILUNI)
280            goto outcount_ok;
281        }
282        if (cd->discard_ilseq)
283          goto outcount_zero;
284        outcount = cd->ofuncs.xxx_wctomb(cd,outptr,0xFFFD,outleft);
285        if (outcount != RET_ILUNI)
286          goto outcount_ok;
287        cd->istate = last_istate;
288        errno = EILSEQ;
289        return -1;
290      outcount_ok:
291        if (outcount < 0) {
292          cd->istate = last_istate;
293          errno = E2BIG;
294          return -1;
295        }
296        if (!(outcount <= outleft)) abort();
297        outptr += outcount;
298        outleft -= outcount;
299      outcount_zero:
300        *outbuf = (char*) outptr;
301        *outbytesleft = outleft;
302      }
303    }
304    if (cd->ofuncs.xxx_reset) {
305      unsigned char* outptr = (unsigned char*) *outbuf;
306      size_t outleft = *outbytesleft;
307      int outcount = cd->ofuncs.xxx_reset(cd,outptr,outleft);
308      if (outcount < 0) {
309        errno = E2BIG;
310        return -1;
311      }
312      if (!(outcount <= outleft)) abort();
313      *outbuf = (char*) (outptr + outcount);
314      *outbytesleft = outleft - outcount;
315    }
316    memset(&cd->istate,'\0',sizeof(state_t));
317    memset(&cd->ostate,'\0',sizeof(state_t));
318    return result;
319  }
320}
321