1/*
2 * Copyright (C) 1999-2002 Free Software Foundation, Inc.
3 * This file is part of the GNU LIBICONV Library.
4 *
5 * The GNU LIBICONV Library is free software; you can redistribute it
6 * and/or modify it under the terms of the GNU Library General Public
7 * License as published by the Free Software Foundation; either version 2
8 * of the License, or (at your option) any later version.
9 *
10 * The GNU LIBICONV Library is distributed in the hope that it will be
11 * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13 * Library General Public License for more details.
14 *
15 * You should have received a copy of the GNU Library General Public
16 * License along with the GNU LIBICONV Library; see the file COPYING.LIB.
17 * If not, write to the Free Software Foundation, Inc., 51 Franklin Street,
18 * Fifth Floor, Boston, MA 02110-1301, USA.
19 */
20
21/*
22 * SHIFT_JISX0213
23 */
24
25/* The structure of Shift_JISX0213 is as follows:
26
27   0x00..0x7F: ISO646-JP, an ASCII variant
28
29   0x{A1..DF}: JISX0201 Katakana.
30
31   0x{81..9F,E0..EF}{40..7E,80..FC}: JISX0213 plane 1.
32
33   0x{F0..FC}{40..7E,80..FC}: JISX0213 plane 2, with irregular row mapping.
34
35   Note that some JISX0213 characters are not contained in Unicode 3.2
36   and are therefore best represented as sequences of Unicode characters.
37*/
38
39#include "jisx0213.h"
40#include "flushwc.h"
41
42static int
43shift_jisx0213_mbtowc (conv_t conv, ucs4_t *pwc, const unsigned char *s, int n)
44{
45  ucs4_t last_wc = conv->istate;
46  if (last_wc) {
47    /* Output the buffered character. */
48    conv->istate = 0;
49    *pwc = last_wc;
50    return 0; /* Don't advance the input pointer. */
51  } else {
52    unsigned char c = *s;
53    if (c < 0x80) {
54      /* Plain ISO646-JP character. */
55      if (c == 0x5c)
56        *pwc = (ucs4_t) 0x00a5;
57      else if (c == 0x7e)
58        *pwc = (ucs4_t) 0x203e;
59      else
60        *pwc = (ucs4_t) c;
61      return 1;
62    } else if (c >= 0xa1 && c <= 0xdf) {
63      *pwc = c + 0xfec0;
64      return 1;
65    } else {
66      if ((c >= 0x81 && c <= 0x9f) || (c >= 0xe0 && c <= 0xfc)) {
67        /* Two byte character. */
68        if (n >= 2) {
69          unsigned char c2 = s[1];
70          if ((c2 >= 0x40 && c2 <= 0x7e) || (c2 >= 0x80 && c2 <= 0xfc)) {
71            unsigned int c1;
72            ucs4_t wc;
73            /* Convert to row and column. */
74            if (c < 0xe0)
75              c -= 0x81;
76            else
77              c -= 0xc1;
78            if (c2 < 0x80)
79              c2 -= 0x40;
80            else
81              c2 -= 0x41;
82            /* Now 0 <= c <= 0x3b, 0 <= c2 <= 0xbb. */
83            c1 = 2 * c;
84            if (c2 >= 0x5e)
85              c2 -= 0x5e, c1++;
86            c2 += 0x21;
87            if (c1 >= 0x5e) {
88              /* Handling of JISX 0213 plane 2 rows. */
89              if (c1 >= 0x67)
90                c1 += 230;
91              else if (c1 >= 0x63 || c1 == 0x5f)
92                c1 += 168;
93              else
94                c1 += 162;
95            }
96            wc = jisx0213_to_ucs4(0x121+c1,c2);
97            if (wc) {
98              if (wc < 0x80) {
99                /* It's a combining character. */
100                ucs4_t wc1 = jisx0213_to_ucs_combining[wc - 1][0];
101                ucs4_t wc2 = jisx0213_to_ucs_combining[wc - 1][1];
102                /* We cannot output two Unicode characters at once. So,
103                   output the first character and buffer the second one. */
104                *pwc = wc1;
105                conv->istate = wc2;
106              } else
107                *pwc = wc;
108              return 2;
109            }
110          }
111        } else
112          return RET_TOOFEW(0);
113      }
114      return RET_ILSEQ;
115    }
116  }
117}
118
119#define shift_jisx0213_flushwc normal_flushwc
120
121/* Composition tables for each of the relevant combining characters.  */
122static const struct { unsigned short base; unsigned short composed; } shift_jisx0213_comp_table_data[] = {
123#define shift_jisx0213_comp_table02e5_idx 0
124#define shift_jisx0213_comp_table02e5_len 1
125  { 0x8684, 0x8685 }, /* 0x12B65 = 0x12B64 U+02E5 */
126#define shift_jisx0213_comp_table02e9_idx (shift_jisx0213_comp_table02e5_idx+shift_jisx0213_comp_table02e5_len)
127#define shift_jisx0213_comp_table02e9_len 1
128  { 0x8680, 0x8686 }, /* 0x12B66 = 0x12B60 U+02E9 */
129#define shift_jisx0213_comp_table0300_idx (shift_jisx0213_comp_table02e9_idx+shift_jisx0213_comp_table02e9_len)
130#define shift_jisx0213_comp_table0300_len 5
131  { 0x857b, 0x8663 }, /* 0x12B44 = 0x1295C U+0300 */
132  { 0x8657, 0x8667 }, /* 0x12B48 = 0x12B38 U+0300 */
133  { 0x8656, 0x8669 }, /* 0x12B4A = 0x12B37 U+0300 */
134  { 0x864f, 0x866b }, /* 0x12B4C = 0x12B30 U+0300 */
135  { 0x8662, 0x866d }, /* 0x12B4E = 0x12B43 U+0300 */
136#define shift_jisx0213_comp_table0301_idx (shift_jisx0213_comp_table0300_idx+shift_jisx0213_comp_table0300_len)
137#define shift_jisx0213_comp_table0301_len 4
138  { 0x8657, 0x8668 }, /* 0x12B49 = 0x12B38 U+0301 */
139  { 0x8656, 0x866a }, /* 0x12B4B = 0x12B37 U+0301 */
140  { 0x864f, 0x866c }, /* 0x12B4D = 0x12B30 U+0301 */
141  { 0x8662, 0x866e }, /* 0x12B4F = 0x12B43 U+0301 */
142#define shift_jisx0213_comp_table309a_idx (shift_jisx0213_comp_table0301_idx+shift_jisx0213_comp_table0301_len)
143#define shift_jisx0213_comp_table309a_len 14
144  { 0x82a9, 0x82f5 }, /* 0x12477 = 0x1242B U+309A */
145  { 0x82ab, 0x82f6 }, /* 0x12478 = 0x1242D U+309A */
146  { 0x82ad, 0x82f7 }, /* 0x12479 = 0x1242F U+309A */
147  { 0x82af, 0x82f8 }, /* 0x1247A = 0x12431 U+309A */
148  { 0x82b1, 0x82f9 }, /* 0x1247B = 0x12433 U+309A */
149  { 0x834a, 0x8397 }, /* 0x12577 = 0x1252B U+309A */
150  { 0x834c, 0x8398 }, /* 0x12578 = 0x1252D U+309A */
151  { 0x834e, 0x8399 }, /* 0x12579 = 0x1252F U+309A */
152  { 0x8350, 0x839a }, /* 0x1257A = 0x12531 U+309A */
153  { 0x8352, 0x839b }, /* 0x1257B = 0x12533 U+309A */
154  { 0x835a, 0x839c }, /* 0x1257C = 0x1253B U+309A */
155  { 0x8363, 0x839d }, /* 0x1257D = 0x12544 U+309A */
156  { 0x8367, 0x839e }, /* 0x1257E = 0x12548 U+309A */
157  { 0x83f3, 0x83f6 }, /* 0x12678 = 0x12675 U+309A */
158};
159
160static int
161shift_jisx0213_wctomb (conv_t conv, unsigned char *r, ucs4_t wc, int n)
162{
163  int count = 0;
164  unsigned short lasttwo = conv->ostate;
165
166  if (lasttwo) {
167    /* Attempt to combine the last character with this one. */
168    unsigned int idx;
169    unsigned int len;
170
171    if (wc == 0x02e5)
172      idx = shift_jisx0213_comp_table02e5_idx,
173      len = shift_jisx0213_comp_table02e5_len;
174    else if (wc == 0x02e9)
175      idx = shift_jisx0213_comp_table02e9_idx,
176      len = shift_jisx0213_comp_table02e9_len;
177    else if (wc == 0x0300)
178      idx = shift_jisx0213_comp_table0300_idx,
179      len = shift_jisx0213_comp_table0300_len;
180    else if (wc == 0x0301)
181      idx = shift_jisx0213_comp_table0301_idx,
182      len = shift_jisx0213_comp_table0301_len;
183    else if (wc == 0x309a)
184      idx = shift_jisx0213_comp_table309a_idx,
185      len = shift_jisx0213_comp_table309a_len;
186    else
187      goto not_combining;
188
189    do
190      if (shift_jisx0213_comp_table_data[idx].base == lasttwo)
191        break;
192    while (++idx, --len > 0);
193
194    if (len > 0) {
195      /* Output the combined character. */
196      if (n >= 2) {
197        lasttwo = shift_jisx0213_comp_table_data[idx].composed;
198        r[0] = (lasttwo >> 8) & 0xff;
199        r[1] = lasttwo & 0xff;
200        conv->ostate = 0;
201        return 2;
202      } else
203        return RET_TOOSMALL;
204    }
205
206  not_combining:
207    /* Output the buffered character. */
208    if (n < 2)
209      return RET_TOOSMALL;
210    r[0] = (lasttwo >> 8) & 0xff;
211    r[1] = lasttwo & 0xff;
212    r += 2;
213    count = 2;
214  }
215
216  if (wc < 0x80 && wc != 0x5c && wc != 0x7e) {
217    /* Plain ISO646-JP character. */
218    if (n > count) {
219      r[0] = (unsigned char) wc;
220      conv->ostate = 0;
221      return count+1;
222    } else
223      return RET_TOOSMALL;
224  } else if (wc == 0x00a5) {
225    if (n > count) {
226      r[0] = 0x5c;
227      conv->ostate = 0;
228      return count+1;
229    } else
230      return RET_TOOSMALL;
231  } else if (wc == 0x203e) {
232    if (n > count) {
233      r[0] = 0x7e;
234      conv->ostate = 0;
235      return count+1;
236    } else
237      return RET_TOOSMALL;
238  } else if (wc >= 0xff61 && wc <= 0xff9f) {
239    /* Half-width katakana. */
240    if (n > count) {
241      r[0] = wc - 0xfec0;
242      conv->ostate = 0;
243      return count+1;
244    } else
245      return RET_TOOSMALL;
246  } else {
247    unsigned int s1, s2;
248    unsigned short jch = ucs4_to_jisx0213(wc);
249    if (jch != 0) {
250      /* Convert it to shifted representation. */
251      s1 = jch >> 8;
252      s2 = jch & 0x7f;
253      s1 -= 0x21;
254      s2 -= 0x21;
255      if (s1 >= 0x5e) {
256        /* Handling of JISX 0213 plane 2 rows. */
257        if (s1 >= 0xcd) /* rows 0x26E..0x27E */
258          s1 -= 102;
259        else if (s1 >= 0x8b || s1 == 0x87) /* rows 0x228, 0x22C..0x22F */
260          s1 -= 40;
261        else /* rows 0x221, 0x223..0x225 */
262          s1 -= 34;
263        /* Now 0x5e <= s1 <= 0x77. */
264      }
265      if (s1 & 1)
266        s2 += 0x5e;
267      s1 = s1 >> 1;
268      if (s1 < 0x1f)
269        s1 += 0x81;
270      else
271        s1 += 0xc1;
272      if (s2 < 0x3f)
273        s2 += 0x40;
274      else
275        s2 += 0x41;
276      if (jch & 0x0080) {
277        /* A possible match in comp_table_data. We have to buffer it. */
278        /* We know it's a JISX 0213 plane 1 character. */
279        if (jch & 0x8000) abort();
280        conv->ostate = (s1 << 8) | s2;
281        return count+0;
282      }
283      /* Output the shifted representation. */
284      if (n >= count+2) {
285        r[0] = s1;
286        r[1] = s2;
287        conv->ostate = 0;
288        return count+2;
289      } else
290        return RET_TOOSMALL;
291    }
292    return RET_ILUNI;
293  }
294}
295
296static int
297shift_jisx0213_reset (conv_t conv, unsigned char *r, int n)
298{
299  state_t lasttwo = conv->ostate;
300
301  if (lasttwo) {
302    if (n < 2)
303      return RET_TOOSMALL;
304    r[0] = (lasttwo >> 8) & 0xff;
305    r[1] = lasttwo & 0xff;
306    /* conv->ostate = 0; will be done by the caller */
307    return 2;
308  } else
309    return 0;
310}
311