1/*
2 * Copyright (C) 1999-2002, 2004 Free Software Foundation, Inc.
3 * This file is part of the GNU LIBICONV Library.
4 *
5 * The GNU LIBICONV Library is free software; you can redistribute it
6 * and/or modify it under the terms of the GNU Library General Public
7 * License as published by the Free Software Foundation; either version 2
8 * of the License, or (at your option) any later version.
9 *
10 * The GNU LIBICONV Library is distributed in the hope that it will be
11 * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13 * Library General Public License for more details.
14 *
15 * You should have received a copy of the GNU Library General Public
16 * License along with the GNU LIBICONV Library; see the file COPYING.LIB.
17 * If not, write to the Free Software Foundation, Inc., 51 Franklin Street,
18 * Fifth Floor, Boston, MA 02110-1301, USA.
19 */
20
21/*
22 * TCVN-5712
23 */
24
25#include "flushwc.h"
26#include "vietcomb.h"
27
28static const unsigned char tcvn_comb_table[] = {
29  0xb0, 0xb3, 0xb2, 0xb1, 0xb4,
30};
31
32/* The possible bases in viet_comp_table_data:
33   0x0041..0x0045, 0x0047..0x0049, 0x004B..0x0050, 0x0052..0x0057,
34   0x0059..0x005A, 0x0061..0x0065, 0x0067..0x0069, 0x006B..0x0070,
35   0x0072..0x0077, 0x0079..0x007A, 0x00A5, 0x00C2, 0x00CA, 0x00D3..0x00D6,
36   0x00DA, 0x00E2, 0x00EA, 0x00F3..0x00F6, 0x00FA, 0x0102..0x0103,
37   0x0168..0x0169, 0x01A0..0x01A1, 0x01AF..0x01B0. */
38static const unsigned int tcvn_comp_bases[] = {
39  0x06fdfbbe, 0x06fdfbbe, 0x00000000, 0x00000020, 0x04780404, 0x04780404,
40  0x0000000c, 0x00000000, 0x00000000, 0x00000300, 0x00000000, 0x00018003
41};
42
43static const unsigned short tcvn_2uni_1[24] = {
44  /* 0x00 */
45  0x0000, 0x00da, 0x1ee4, 0x0003, 0x1eea, 0x1eec, 0x1eee, 0x0007,
46  0x0008, 0x0009, 0x000a, 0x000b, 0x000c, 0x000d, 0x000e, 0x000f,
47  /* 0x10 */
48  0x0010, 0x1ee8, 0x1ef0, 0x1ef2, 0x1ef6, 0x1ef8, 0x00dd, 0x1ef4,
49};
50static const unsigned short tcvn_2uni_2[128] = {
51  /* 0x80 */
52  0x00c0, 0x1ea2, 0x00c3, 0x00c1, 0x1ea0, 0x1eb6, 0x1eac, 0x00c8,
53  0x1eba, 0x1ebc, 0x00c9, 0x1eb8, 0x1ec6, 0x00cc, 0x1ec8, 0x0128,
54  /* 0x90 */
55  0x00cd, 0x1eca, 0x00d2, 0x1ece, 0x00d5, 0x00d3, 0x1ecc, 0x1ed8,
56  0x1edc, 0x1ede, 0x1ee0, 0x1eda, 0x1ee2, 0x00d9, 0x1ee6, 0x0168,
57  /* 0xa0 */
58  0x00a0, 0x0102, 0x00c2, 0x00ca, 0x00d4, 0x01a0, 0x01af, 0x0110,
59  0x0103, 0x00e2, 0x00ea, 0x00f4, 0x01a1, 0x01b0, 0x0111, 0x1eb0,
60  /* 0xb0 */
61  0x0300, 0x0309, 0x0303, 0x0301, 0x0323, 0x00e0, 0x1ea3, 0x00e3,
62  0x00e1, 0x1ea1, 0x1eb2, 0x1eb1, 0x1eb3, 0x1eb5, 0x1eaf, 0x1eb4,
63  /* 0xc0 */
64  0x1eae, 0x1ea6, 0x1ea8, 0x1eaa, 0x1ea4, 0x1ec0, 0x1eb7, 0x1ea7,
65  0x1ea9, 0x1eab, 0x1ea5, 0x1ead, 0x00e8, 0x1ec2, 0x1ebb, 0x1ebd,
66  /* 0xd0 */
67  0x00e9, 0x1eb9, 0x1ec1, 0x1ec3, 0x1ec5, 0x1ebf, 0x1ec7, 0x00ec,
68  0x1ec9, 0x1ec4, 0x1ebe, 0x1ed2, 0x0129, 0x00ed, 0x1ecb, 0x00f2,
69  /* 0xe0 */
70  0x1ed4, 0x1ecf, 0x00f5, 0x00f3, 0x1ecd, 0x1ed3, 0x1ed5, 0x1ed7,
71  0x1ed1, 0x1ed9, 0x1edd, 0x1edf, 0x1ee1, 0x1edb, 0x1ee3, 0x00f9,
72  /* 0xf0 */
73  0x1ed6, 0x1ee7, 0x0169, 0x00fa, 0x1ee5, 0x1eeb, 0x1eed, 0x1eef,
74  0x1ee9, 0x1ef1, 0x1ef3, 0x1ef7, 0x1ef9, 0x00fd, 0x1ef5, 0x1ed0,
75};
76
77/* In the TCVN to Unicode direction, the state contains a buffered
78   character, or 0 if none. */
79
80static int
81tcvn_mbtowc (conv_t conv, ucs4_t *pwc, const unsigned char *s, int n)
82{
83  unsigned char c = *s;
84  unsigned short wc;
85  unsigned short last_wc;
86  if (c < 0x18)
87    wc = tcvn_2uni_1[c];
88  else if (c < 0x80)
89    wc = c;
90  else
91    wc = tcvn_2uni_2[c-0x80];
92  last_wc = conv->istate;
93  if (last_wc) {
94    if (wc >= 0x0300 && wc < 0x0340) {
95      /* See whether last_wc and wc can be combined. */
96      unsigned int k;
97      unsigned int i1, i2;
98      switch (wc) {
99        case 0x0300: k = 0; break;
100        case 0x0301: k = 1; break;
101        case 0x0303: k = 2; break;
102        case 0x0309: k = 3; break;
103        case 0x0323: k = 4; break;
104        default: abort();
105      }
106      i1 = viet_comp_table[k].idx;
107      i2 = i1 + viet_comp_table[k].len-1;
108      if (last_wc >= viet_comp_table_data[i1].base
109          && last_wc <= viet_comp_table_data[i2].base) {
110        unsigned int i;
111        for (;;) {
112          i = (i1+i2)>>1;
113          if (last_wc == viet_comp_table_data[i].base)
114            break;
115          if (last_wc < viet_comp_table_data[i].base) {
116            if (i1 == i)
117              goto not_combining;
118            i2 = i;
119          } else {
120            if (i1 != i)
121              i1 = i;
122            else {
123              i = i2;
124              if (last_wc == viet_comp_table_data[i].base)
125                break;
126              goto not_combining;
127            }
128          }
129        }
130        last_wc = viet_comp_table_data[i].composed;
131        /* Output the combined character. */
132        conv->istate = 0;
133        *pwc = (ucs4_t) last_wc;
134        return 1;
135      }
136    }
137  not_combining:
138    /* Output the buffered character. */
139    conv->istate = 0;
140    *pwc = (ucs4_t) last_wc;
141    return 0; /* Don't advance the input pointer. */
142  }
143  if (wc >= 0x0041 && wc <= 0x01b0
144      && ((tcvn_comp_bases[(wc - 0x0040) >> 5] >> (wc & 0x1f)) & 1)) {
145    /* wc is a possible match in viet_comp_table_data. Buffer it. */
146    conv->istate = wc;
147    return RET_TOOFEW(1);
148  } else {
149    /* Output wc immediately. */
150    *pwc = (ucs4_t) wc;
151    return 1;
152  }
153}
154
155#define tcvn_flushwc normal_flushwc
156
157static const unsigned char tcvn_page00[96+184] = {
158  0xa0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xa0-0xa7 */
159  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xa8-0xaf */
160  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xb0-0xb7 */
161  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xb8-0xbf */
162  0x80, 0x83, 0xa2, 0x82, 0x00, 0x00, 0x00, 0x00, /* 0xc0-0xc7 */
163  0x87, 0x8a, 0xa3, 0x00, 0x8d, 0x90, 0x00, 0x00, /* 0xc8-0xcf */
164  0x00, 0x00, 0x92, 0x95, 0xa4, 0x94, 0x00, 0x00, /* 0xd0-0xd7 */
165  0x00, 0x9d, 0x01, 0x00, 0x00, 0x16, 0x00, 0x00, /* 0xd8-0xdf */
166  0xb5, 0xb8, 0xa9, 0xb7, 0x00, 0x00, 0x00, 0x00, /* 0xe0-0xe7 */
167  0xcc, 0xd0, 0xaa, 0x00, 0xd7, 0xdd, 0x00, 0x00, /* 0xe8-0xef */
168  0x00, 0x00, 0xdf, 0xe3, 0xab, 0xe2, 0x00, 0x00, /* 0xf0-0xf7 */
169  0x00, 0xef, 0xf3, 0x00, 0x00, 0xfd, 0x00, 0x00, /* 0xf8-0xff */
170  /* 0x0100 */
171  0x00, 0x00, 0xa1, 0xa8, 0x00, 0x00, 0x00, 0x00, /* 0x00-0x07 */
172  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x08-0x0f */
173  0xa7, 0xae, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x10-0x17 */
174  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x18-0x1f */
175  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x20-0x27 */
176  0x8f, 0xdc, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x28-0x2f */
177  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x30-0x37 */
178  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x38-0x3f */
179  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x40-0x47 */
180  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x48-0x4f */
181  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x50-0x57 */
182  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x58-0x5f */
183  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x60-0x67 */
184  0x9f, 0xf2, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x68-0x6f */
185  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x70-0x77 */
186  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x78-0x7f */
187  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x80-0x87 */
188  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x88-0x8f */
189  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x90-0x97 */
190  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x98-0x9f */
191  0xa5, 0xac, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xa0-0xa7 */
192  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xa6, /* 0xa8-0xaf */
193  0xad, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xb0-0xb7 */
194};
195static const unsigned char tcvn_page03[40] = {
196  0xb0, 0xb3, 0x00, 0xb2, 0x00, 0x00, 0x00, 0x00, /* 0x00-0x07 */
197  0x00, 0xb1, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x08-0x0f */
198  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x10-0x17 */
199  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x18-0x1f */
200  0x00, 0x00, 0x00, 0xb4, 0x00, 0x00, 0x00, 0x00, /* 0x20-0x27 */
201};
202static const unsigned char tcvn_page1e[96] = {
203  0x84, 0xb9, 0x81, 0xb6, 0xc4, 0xca, 0xc1, 0xc7, /* 0xa0-0xa7 */
204  0xc2, 0xc8, 0xc3, 0xc9, 0x86, 0xcb, 0xc0, 0xbe, /* 0xa8-0xaf */
205  0xaf, 0xbb, 0xba, 0xbc, 0xbf, 0xbd, 0x85, 0xc6, /* 0xb0-0xb7 */
206  0x8b, 0xd1, 0x88, 0xce, 0x89, 0xcf, 0xda, 0xd5, /* 0xb8-0xbf */
207  0xc5, 0xd2, 0xcd, 0xd3, 0xd9, 0xd4, 0x8c, 0xd6, /* 0xc0-0xc7 */
208  0x8e, 0xd8, 0x91, 0xde, 0x96, 0xe4, 0x93, 0xe1, /* 0xc8-0xcf */
209  0xff, 0xe8, 0xdb, 0xe5, 0xe0, 0xe6, 0xf0, 0xe7, /* 0xd0-0xd7 */
210  0x97, 0xe9, 0x9b, 0xed, 0x98, 0xea, 0x99, 0xeb, /* 0xd8-0xdf */
211  0x9a, 0xec, 0x9c, 0xee, 0x02, 0xf4, 0x9e, 0xf1, /* 0xe0-0xe7 */
212  0x11, 0xf8, 0x04, 0xf5, 0x05, 0xf6, 0x06, 0xf7, /* 0xe8-0xef */
213  0x12, 0xf9, 0x13, 0xfa, 0x17, 0xfe, 0x14, 0xfb, /* 0xf0-0xf7 */
214  0x15, 0xfc, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xf8-0xff */
215};
216
217static int
218tcvn_wctomb (conv_t conv, unsigned char *r, ucs4_t wc, int n)
219{
220  unsigned char c = 0;
221  if (wc < 0x0080 && (wc >= 0x0020 || (0x00fe0076 & (1 << wc)) == 0)) {
222    *r = wc;
223    return 1;
224  }
225  else if (wc >= 0x00a0 && wc < 0x01b8)
226    c = tcvn_page00[wc-0x00a0];
227  else if (wc >= 0x0300 && wc < 0x0328)
228    c = tcvn_page03[wc-0x0300];
229  else if (wc >= 0x0340 && wc < 0x0342) /* deprecated Vietnamese tone marks */
230    c = tcvn_page03[wc-0x0340];
231  else if (wc >= 0x1ea0 && wc < 0x1f00)
232    c = tcvn_page1e[wc-0x1ea0];
233  if (c != 0) {
234    *r = c;
235    return 1;
236  }
237  /* Try compatibility or canonical decomposition. */
238  {
239    /* Binary search through viet_decomp_table. */
240    unsigned int i1 = 0;
241    unsigned int i2 = sizeof(viet_decomp_table)/sizeof(viet_decomp_table[0])-1;
242    if (wc >= viet_decomp_table[i1].composed
243        && wc <= viet_decomp_table[i2].composed) {
244      unsigned int i;
245      for (;;) {
246        /* Here i2 - i1 > 0. */
247        i = (i1+i2)>>1;
248        if (wc == viet_decomp_table[i].composed)
249          break;
250        if (wc < viet_decomp_table[i].composed) {
251          if (i1 == i)
252            return RET_ILUNI;
253          /* Here i1 < i < i2. */
254          i2 = i;
255        } else {
256          /* Here i1 <= i < i2. */
257          if (i1 != i)
258            i1 = i;
259          else {
260            /* Here i2 - i1 = 1. */
261            i = i2;
262            if (wc == viet_decomp_table[i].composed)
263              break;
264            else
265              return RET_ILUNI;
266          }
267        }
268      }
269      /* Found a compatibility or canonical decomposition. */
270      wc = viet_decomp_table[i].base;
271      /* wc is one of 0x0020, 0x0041..0x005a, 0x0061..0x007a, 0x00a5, 0x00a8,
272         0x00c2, 0x00c5..0x00c7, 0x00ca, 0x00cf, 0x00d3, 0x00d4, 0x00d6,
273         0x00d8, 0x00da, 0x00dc, 0x00e2, 0x00e5..0x00e7, 0x00ea, 0x00ef,
274         0x00f3, 0x00f4, 0x00f6, 0x00f8, 0x00fc, 0x0102, 0x0103, 0x01a0,
275         0x01a1, 0x01af, 0x01b0. */
276      if (wc < 0x0080)
277        c = wc;
278      else {
279        c = tcvn_page00[wc-0x00a0];
280        if (c == 0)
281          return RET_ILUNI;
282      }
283      if (n < 2)
284        return RET_TOOSMALL;
285      r[0] = c;
286      r[1] = tcvn_comb_table[viet_decomp_table[i].comb1];
287      return 2;
288    }
289  }
290  return RET_ILUNI;
291}
292