1/*
2 * Copyright (C) 1999-2001 Free Software Foundation, Inc.
3 * This file is part of the GNU LIBICONV Library.
4 *
5 * The GNU LIBICONV Library is free software; you can redistribute it
6 * and/or modify it under the terms of the GNU Library General Public
7 * License as published by the Free Software Foundation; either version 2
8 * of the License, or (at your option) any later version.
9 *
10 * The GNU LIBICONV Library is distributed in the hope that it will be
11 * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13 * Library General Public License for more details.
14 *
15 * You should have received a copy of the GNU Library General Public
16 * License along with the GNU LIBICONV Library; see the file COPYING.LIB.
17 * If not, write to the Free Software Foundation, Inc., 51 Franklin Street,
18 * Fifth Floor, Boston, MA 02110-1301, USA.
19 */
20
21/*
22 * ISO-2022-CN-EXT
23 */
24
25/* Specification: RFC 1922 */
26
27#define ESC 0x1b
28#define SO  0x0e
29#define SI  0x0f
30
31/*
32 * The state is composed of one of the following values
33 */
34#define STATE_ASCII          0
35#define STATE_TWOBYTE        1
36/*
37 * and one of the following values, << 8
38 */
39#define STATE2_NONE                   0
40#define STATE2_DESIGNATED_GB2312      1
41#define STATE2_DESIGNATED_CNS11643_1  2
42#define STATE2_DESIGNATED_ISO_IR_165  3
43/*
44 * and one of the following values, << 16
45 */
46#define STATE3_NONE                   0
47#define STATE3_DESIGNATED_CNS11643_2  1
48/*
49 * and one of the following values, << 24
50 */
51#define STATE4_NONE                   0
52#define STATE4_DESIGNATED_CNS11643_3  1
53#define STATE4_DESIGNATED_CNS11643_4  2
54#define STATE4_DESIGNATED_CNS11643_5  3
55#define STATE4_DESIGNATED_CNS11643_6  4
56#define STATE4_DESIGNATED_CNS11643_7  5
57
58#define SPLIT_STATE \
59  unsigned int state1 = state & 0xff, state2 = (state >> 8) & 0xff, state3 = (state >> 16) & 0xff, state4 = state >> 24
60#define COMBINE_STATE \
61  state = (state4 << 24) | (state3 << 16) | (state2 << 8) | state1
62
63static int
64iso2022_cn_ext_mbtowc (conv_t conv, ucs4_t *pwc, const unsigned char *s, int n)
65{
66  state_t state = conv->istate;
67  SPLIT_STATE;
68  int count = 0;
69  unsigned char c;
70  for (;;) {
71    c = *s;
72    if (c == ESC) {
73      if (n < count+4)
74        goto none;
75      if (s[1] == '$') {
76        if (s[2] == ')') {
77          if (s[3] == 'A') {
78            state2 = STATE2_DESIGNATED_GB2312;
79            s += 4; count += 4;
80            if (n < count+1)
81              goto none;
82            continue;
83          }
84          if (s[3] == 'G') {
85            state2 = STATE2_DESIGNATED_CNS11643_1;
86            s += 4; count += 4;
87            if (n < count+1)
88              goto none;
89            continue;
90          }
91          if (s[3] == 'E') {
92            state2 = STATE2_DESIGNATED_ISO_IR_165;
93            s += 4; count += 4;
94            if (n < count+1)
95              goto none;
96            continue;
97          }
98        }
99        if (s[2] == '*') {
100          if (s[3] == 'H') {
101            state3 = STATE3_DESIGNATED_CNS11643_2;
102            s += 4; count += 4;
103            if (n < count+1)
104              goto none;
105            continue;
106          }
107        }
108        if (s[2] == '+') {
109          if (s[3] == 'I') {
110            state4 = STATE4_DESIGNATED_CNS11643_3;
111            s += 4; count += 4;
112            if (n < count+1)
113              goto none;
114            continue;
115          }
116          if (s[3] == 'J') {
117            state4 = STATE4_DESIGNATED_CNS11643_4;
118            s += 4; count += 4;
119            if (n < count+1)
120              goto none;
121            continue;
122          }
123          if (s[3] == 'K') {
124            state4 = STATE4_DESIGNATED_CNS11643_5;
125            s += 4; count += 4;
126            if (n < count+1)
127              goto none;
128            continue;
129          }
130          if (s[3] == 'L') {
131            state4 = STATE4_DESIGNATED_CNS11643_6;
132            s += 4; count += 4;
133            if (n < count+1)
134              goto none;
135            continue;
136          }
137          if (s[3] == 'M') {
138            state4 = STATE4_DESIGNATED_CNS11643_7;
139            s += 4; count += 4;
140            if (n < count+1)
141              goto none;
142            continue;
143          }
144        }
145      }
146      if (s[1] == 'N') {
147        switch (state3) {
148          case STATE3_NONE:
149            return RET_ILSEQ;
150          case STATE3_DESIGNATED_CNS11643_2:
151            if (s[2] < 0x80 && s[3] < 0x80) {
152              int ret = cns11643_2_mbtowc(conv,pwc,s+2,2);
153              if (ret == RET_ILSEQ)
154                return RET_ILSEQ;
155              if (ret != 2) abort();
156              COMBINE_STATE;
157              conv->istate = state;
158              return count+4;
159            } else
160              return RET_ILSEQ;
161          default: abort();
162        }
163      }
164      if (s[1] == 'O') {
165        switch (state4) {
166          case STATE4_NONE:
167            return RET_ILSEQ;
168          case STATE4_DESIGNATED_CNS11643_3:
169            if (s[2] < 0x80 && s[3] < 0x80) {
170              int ret = cns11643_3_mbtowc(conv,pwc,s+2,2);
171              if (ret == RET_ILSEQ)
172                return RET_ILSEQ;
173              if (ret != 2) abort();
174              COMBINE_STATE;
175              conv->istate = state;
176              return count+4;
177            } else
178              return RET_ILSEQ;
179          case STATE4_DESIGNATED_CNS11643_4:
180            if (s[2] < 0x80 && s[3] < 0x80) {
181              int ret = cns11643_4_mbtowc(conv,pwc,s+2,2);
182              if (ret == RET_ILSEQ)
183                return RET_ILSEQ;
184              if (ret != 2) abort();
185              COMBINE_STATE;
186              conv->istate = state;
187              return count+4;
188            } else
189              return RET_ILSEQ;
190          case STATE4_DESIGNATED_CNS11643_5:
191            if (s[2] < 0x80 && s[3] < 0x80) {
192              int ret = cns11643_5_mbtowc(conv,pwc,s+2,2);
193              if (ret == RET_ILSEQ)
194                return RET_ILSEQ;
195              if (ret != 2) abort();
196              COMBINE_STATE;
197              conv->istate = state;
198              return count+4;
199            } else
200              return RET_ILSEQ;
201          case STATE4_DESIGNATED_CNS11643_6:
202            if (s[2] < 0x80 && s[3] < 0x80) {
203              int ret = cns11643_6_mbtowc(conv,pwc,s+2,2);
204              if (ret == RET_ILSEQ)
205                return RET_ILSEQ;
206              if (ret != 2) abort();
207              COMBINE_STATE;
208              conv->istate = state;
209              return count+4;
210            } else
211              return RET_ILSEQ;
212          case STATE4_DESIGNATED_CNS11643_7:
213            if (s[2] < 0x80 && s[3] < 0x80) {
214              int ret = cns11643_7_mbtowc(conv,pwc,s+2,2);
215              if (ret == RET_ILSEQ)
216                return RET_ILSEQ;
217              if (ret != 2) abort();
218              COMBINE_STATE;
219              conv->istate = state;
220              return count+4;
221            } else
222              return RET_ILSEQ;
223          default: abort();
224        }
225      }
226      return RET_ILSEQ;
227    }
228    if (c == SO) {
229      if (state2 != STATE2_DESIGNATED_GB2312 && state2 != STATE2_DESIGNATED_CNS11643_1 && state2 != STATE2_DESIGNATED_ISO_IR_165)
230        return RET_ILSEQ;
231      state1 = STATE_TWOBYTE;
232      s++; count++;
233      if (n < count+1)
234        goto none;
235      continue;
236    }
237    if (c == SI) {
238      state1 = STATE_ASCII;
239      s++; count++;
240      if (n < count+1)
241        goto none;
242      continue;
243    }
244    break;
245  }
246  switch (state1) {
247    case STATE_ASCII:
248      if (c < 0x80) {
249        int ret = ascii_mbtowc(conv,pwc,s,1);
250        if (ret == RET_ILSEQ)
251          return RET_ILSEQ;
252        if (ret != 1) abort();
253        if (*pwc == 0x000a || *pwc == 0x000d) {
254          state2 = STATE2_NONE; state3 = STATE3_NONE; state4 = STATE3_NONE;
255        }
256        COMBINE_STATE;
257        conv->istate = state;
258        return count+1;
259      } else
260        return RET_ILSEQ;
261    case STATE_TWOBYTE:
262      if (n < count+2)
263        goto none;
264      if (s[0] < 0x80 && s[1] < 0x80) {
265        int ret;
266        switch (state2) {
267          case STATE2_NONE:
268            return RET_ILSEQ;
269          case STATE2_DESIGNATED_GB2312:
270            ret = gb2312_mbtowc(conv,pwc,s,2); break;
271          case STATE2_DESIGNATED_CNS11643_1:
272            ret = cns11643_1_mbtowc(conv,pwc,s,2); break;
273          case STATE2_DESIGNATED_ISO_IR_165:
274            ret = isoir165_mbtowc(conv,pwc,s,2); break;
275          default: abort();
276        }
277        if (ret == RET_ILSEQ)
278          return RET_ILSEQ;
279        if (ret != 2) abort();
280        COMBINE_STATE;
281        conv->istate = state;
282        return count+2;
283      } else
284        return RET_ILSEQ;
285    default: abort();
286  }
287
288none:
289  COMBINE_STATE;
290  conv->istate = state;
291  return RET_TOOFEW(count);
292}
293
294static int
295iso2022_cn_ext_wctomb (conv_t conv, unsigned char *r, ucs4_t wc, int n)
296{
297  state_t state = conv->ostate;
298  SPLIT_STATE;
299  unsigned char buf[3];
300  int ret;
301
302  /* There is no need to handle Unicode 3.1 tag characters and to look for
303     "zh-CN" or "zh-TW" tags, because GB2312 and CNS11643 are disjoint. */
304
305  /* Try ASCII. */
306  ret = ascii_wctomb(conv,buf,wc,1);
307  if (ret != RET_ILUNI) {
308    if (ret != 1) abort();
309    if (buf[0] < 0x80) {
310      int count = (state1 == STATE_ASCII ? 1 : 2);
311      if (n < count)
312        return RET_TOOSMALL;
313      if (state1 != STATE_ASCII) {
314        r[0] = SI;
315        r += 1;
316        state1 = STATE_ASCII;
317      }
318      r[0] = buf[0];
319      if (wc == 0x000a || wc == 0x000d) {
320        state2 = STATE2_NONE; state3 = STATE3_NONE; state4 = STATE3_NONE;
321      }
322      COMBINE_STATE;
323      conv->ostate = state;
324      return count;
325    }
326  }
327
328  /* Try GB 2312-1980. */
329  ret = gb2312_wctomb(conv,buf,wc,2);
330  if (ret != RET_ILUNI) {
331    if (ret != 2) abort();
332    if (buf[0] < 0x80 && buf[1] < 0x80) {
333      int count = (state2 == STATE2_DESIGNATED_GB2312 ? 0 : 4) + (state1 == STATE_TWOBYTE ? 0 : 1) + 2;
334      if (n < count)
335        return RET_TOOSMALL;
336      if (state2 != STATE2_DESIGNATED_GB2312) {
337        r[0] = ESC;
338        r[1] = '$';
339        r[2] = ')';
340        r[3] = 'A';
341        r += 4;
342        state2 = STATE2_DESIGNATED_GB2312;
343      }
344      if (state1 != STATE_TWOBYTE) {
345        r[0] = SO;
346        r += 1;
347        state1 = STATE_TWOBYTE;
348      }
349      r[0] = buf[0];
350      r[1] = buf[1];
351      COMBINE_STATE;
352      conv->ostate = state;
353      return count;
354    }
355  }
356
357  ret = cns11643_wctomb(conv,buf,wc,3);
358  if (ret != RET_ILUNI) {
359    if (ret != 3) abort();
360
361    /* Try CNS 11643-1992 Plane 1. */
362    if (buf[0] == 1 && buf[1] < 0x80 && buf[2] < 0x80) {
363      int count = (state2 == STATE2_DESIGNATED_CNS11643_1 ? 0 : 4) + (state1 == STATE_TWOBYTE ? 0 : 1) + 2;
364      if (n < count)
365        return RET_TOOSMALL;
366      if (state2 != STATE2_DESIGNATED_CNS11643_1) {
367        r[0] = ESC;
368        r[1] = '$';
369        r[2] = ')';
370        r[3] = 'G';
371        r += 4;
372        state2 = STATE2_DESIGNATED_CNS11643_1;
373      }
374      if (state1 != STATE_TWOBYTE) {
375        r[0] = SO;
376        r += 1;
377        state1 = STATE_TWOBYTE;
378      }
379      r[0] = buf[1];
380      r[1] = buf[2];
381      COMBINE_STATE;
382      conv->ostate = state;
383      return count;
384    }
385
386    /* Try CNS 11643-1992 Plane 2. */
387    if (buf[0] == 2 && buf[1] < 0x80 && buf[2] < 0x80) {
388      int count = (state3 == STATE3_DESIGNATED_CNS11643_2 ? 0 : 4) + 4;
389      if (n < count)
390        return RET_TOOSMALL;
391      if (state3 != STATE3_DESIGNATED_CNS11643_2) {
392        r[0] = ESC;
393        r[1] = '$';
394        r[2] = '*';
395        r[3] = 'H';
396        r += 4;
397        state3 = STATE3_DESIGNATED_CNS11643_2;
398      }
399      r[0] = ESC;
400      r[1] = 'N';
401      r[2] = buf[1];
402      r[3] = buf[2];
403      COMBINE_STATE;
404      conv->ostate = state;
405      return count;
406    }
407
408    /* Try CNS 11643-1992 Plane 3. */
409    if (buf[0] == 3 && buf[1] < 0x80 && buf[2] < 0x80) {
410      int count = (state4 == STATE4_DESIGNATED_CNS11643_3 ? 0 : 4) + 4;
411      if (n < count)
412        return RET_TOOSMALL;
413      if (state4 != STATE4_DESIGNATED_CNS11643_3) {
414        r[0] = ESC;
415        r[1] = '$';
416        r[2] = '+';
417        r[3] = 'I';
418        r += 4;
419        state4 = STATE4_DESIGNATED_CNS11643_3;
420      }
421      r[0] = ESC;
422      r[1] = 'O';
423      r[2] = buf[1];
424      r[3] = buf[2];
425      COMBINE_STATE;
426      conv->ostate = state;
427      return count;
428    }
429
430    /* Try CNS 11643-1992 Plane 4. */
431    if (buf[0] == 4 && buf[1] < 0x80 && buf[2] < 0x80) {
432      int count = (state4 == STATE4_DESIGNATED_CNS11643_4 ? 0 : 4) + 4;
433      if (n < count)
434        return RET_TOOSMALL;
435      if (state4 != STATE4_DESIGNATED_CNS11643_4) {
436        r[0] = ESC;
437        r[1] = '$';
438        r[2] = '+';
439        r[3] = 'J';
440        r += 4;
441        state4 = STATE4_DESIGNATED_CNS11643_4;
442      }
443      r[0] = ESC;
444      r[1] = 'O';
445      r[2] = buf[1];
446      r[3] = buf[2];
447      COMBINE_STATE;
448      conv->ostate = state;
449      return count;
450    }
451
452    /* Try CNS 11643-1992 Plane 5. */
453    if (buf[0] == 5 && buf[1] < 0x80 && buf[2] < 0x80) {
454      int count = (state4 == STATE4_DESIGNATED_CNS11643_5 ? 0 : 4) + 4;
455      if (n < count)
456        return RET_TOOSMALL;
457      if (state4 != STATE4_DESIGNATED_CNS11643_5) {
458        r[0] = ESC;
459        r[1] = '$';
460        r[2] = '+';
461        r[3] = 'K';
462        r += 4;
463        state4 = STATE4_DESIGNATED_CNS11643_5;
464      }
465      r[0] = ESC;
466      r[1] = 'O';
467      r[2] = buf[1];
468      r[3] = buf[2];
469      COMBINE_STATE;
470      conv->ostate = state;
471      return count;
472    }
473
474    /* Try CNS 11643-1992 Plane 6. */
475    if (buf[0] == 6 && buf[1] < 0x80 && buf[2] < 0x80) {
476      int count = (state4 == STATE4_DESIGNATED_CNS11643_6 ? 0 : 4) + 4;
477      if (n < count)
478        return RET_TOOSMALL;
479      if (state4 != STATE4_DESIGNATED_CNS11643_6) {
480        r[0] = ESC;
481        r[1] = '$';
482        r[2] = '+';
483        r[3] = 'L';
484        r += 4;
485        state4 = STATE4_DESIGNATED_CNS11643_6;
486      }
487      r[0] = ESC;
488      r[1] = 'O';
489      r[2] = buf[1];
490      r[3] = buf[2];
491      COMBINE_STATE;
492      conv->ostate = state;
493      return count;
494    }
495
496    /* Try CNS 11643-1992 Plane 7. */
497    if (buf[0] == 7 && buf[1] < 0x80 && buf[2] < 0x80) {
498      int count = (state4 == STATE4_DESIGNATED_CNS11643_7 ? 0 : 4) + 4;
499      if (n < count)
500        return RET_TOOSMALL;
501      if (state4 != STATE4_DESIGNATED_CNS11643_7) {
502        r[0] = ESC;
503        r[1] = '$';
504        r[2] = '+';
505        r[3] = 'M';
506        r += 4;
507        state4 = STATE4_DESIGNATED_CNS11643_7;
508      }
509      r[0] = ESC;
510      r[1] = 'O';
511      r[2] = buf[1];
512      r[3] = buf[2];
513      COMBINE_STATE;
514      conv->ostate = state;
515      return count;
516    }
517
518  }
519
520  /* Try ISO-IR-165. */
521  ret = isoir165_wctomb(conv,buf,wc,2);
522  if (ret != RET_ILUNI) {
523    if (ret != 2) abort();
524    if (buf[0] < 0x80 && buf[1] < 0x80) {
525      int count = (state2 == STATE2_DESIGNATED_ISO_IR_165 ? 0 : 4) + (state1 == STATE_TWOBYTE ? 0 : 1) + 2;
526      if (n < count)
527        return RET_TOOSMALL;
528      if (state2 != STATE2_DESIGNATED_ISO_IR_165) {
529        r[0] = ESC;
530        r[1] = '$';
531        r[2] = ')';
532        r[3] = 'E';
533        r += 4;
534        state2 = STATE2_DESIGNATED_ISO_IR_165;
535      }
536      if (state1 != STATE_TWOBYTE) {
537        r[0] = SO;
538        r += 1;
539        state1 = STATE_TWOBYTE;
540      }
541      r[0] = buf[0];
542      r[1] = buf[1];
543      COMBINE_STATE;
544      conv->ostate = state;
545      return count;
546    }
547  }
548
549  return RET_ILUNI;
550}
551
552static int
553iso2022_cn_ext_reset (conv_t conv, unsigned char *r, int n)
554{
555  state_t state = conv->ostate;
556  SPLIT_STATE;
557  (void)state2;
558  (void)state3;
559  (void)state4;
560  if (state1 != STATE_ASCII) {
561    if (n < 1)
562      return RET_TOOSMALL;
563    r[0] = SI;
564    /* conv->ostate = 0; will be done by the caller */
565    return 1;
566  } else
567    return 0;
568}
569
570#undef COMBINE_STATE
571#undef SPLIT_STATE
572#undef STATE4_DESIGNATED_CNS11643_7
573#undef STATE4_DESIGNATED_CNS11643_6
574#undef STATE4_DESIGNATED_CNS11643_5
575#undef STATE4_DESIGNATED_CNS11643_4
576#undef STATE4_DESIGNATED_CNS11643_3
577#undef STATE4_NONE
578#undef STATE3_DESIGNATED_CNS11643_2
579#undef STATE3_NONE
580#undef STATE2_DESIGNATED_ISO_IR_165
581#undef STATE2_DESIGNATED_CNS11643_1
582#undef STATE2_DESIGNATED_GB2312
583#undef STATE2_NONE
584#undef STATE_TWOBYTE
585#undef STATE_ASCII
586