1/*
2 * Copyright (C) 1999-2001, 2005 Free Software Foundation, Inc.
3 * This file is part of the GNU LIBICONV Library.
4 *
5 * The GNU LIBICONV Library is free software; you can redistribute it
6 * and/or modify it under the terms of the GNU Library General Public
7 * License as published by the Free Software Foundation; either version 2
8 * of the License, or (at your option) any later version.
9 *
10 * The GNU LIBICONV Library is distributed in the hope that it will be
11 * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13 * Library General Public License for more details.
14 *
15 * You should have received a copy of the GNU Library General Public
16 * License along with the GNU LIBICONV Library; see the file COPYING.LIB.
17 * If not, write to the Free Software Foundation, Inc., 51 Franklin Street,
18 * Fifth Floor, Boston, MA 02110-1301, USA.
19 */
20
21/*
22 * GB18030 four-byte extension
23 */
24
25static const unsigned short gb18030uni_charset2uni_ranges[412] = {
26  0x0000, 0x0023,  0x0024, 0x0025,  0x0026, 0x002c,  0x002d, 0x0031,
27  0x0032, 0x0050,  0x0051, 0x0058,  0x0059, 0x005e,  0x005f, 0x005f,
28  0x0060, 0x0063,  0x0064, 0x0066,  0x0067, 0x0067,  0x0068, 0x0068,
29  0x0069, 0x006c,  0x006d, 0x007d,  0x007e, 0x0084,  0x0085, 0x0093,
30  0x0094, 0x00ab,  0x00ac, 0x00ae,  0x00af, 0x00b2,  0x00b3, 0x00cf,
31  0x00d0, 0x0131,  0x0132, 0x0132,  0x0133, 0x0133,  0x0134, 0x0134,
32  0x0135, 0x0135,  0x0136, 0x0136,  0x0137, 0x0137,  0x0138, 0x0138,
33  0x0139, 0x0154,  0x0155, 0x01ab,  0x01ac, 0x01ba,  0x01bb, 0x021f,
34  0x0220, 0x0220,  0x0221, 0x022d,  0x022e, 0x02e4,  0x02e5, 0x02e5,
35  0x02e6, 0x02ec,  0x02ed, 0x02ed,  0x02ee, 0x0324,  0x0325, 0x0332,
36  0x0333, 0x0333,  0x0334, 0x1ef1,  0x1ef2, 0x1ef3,  0x1ef4, 0x1ef4,
37  0x1ef5, 0x1ef6,  0x1ef7, 0x1efd,  0x1efe, 0x1f06,  0x1f07, 0x1f07,
38  0x1f08, 0x1f08,  0x1f09, 0x1f0d,  0x1f0e, 0x1f7d,  0x1f7e, 0x1fd3,
39  0x1fd4, 0x1fd4,  0x1fd5, 0x1fd7,  0x1fd8, 0x1fe3,  0x1fe4, 0x1fed,
40  0x1fee, 0x202b,  0x202c, 0x202f,  0x2030, 0x2045,  0x2046, 0x2047,
41  0x2048, 0x20b5,  0x20b6, 0x20bb,  0x20bc, 0x20bc,  0x20bd, 0x20bf,
42  0x20c0, 0x20c3,  0x20c4, 0x20c5,  0x20c6, 0x20c7,  0x20c8, 0x20c8,
43  0x20c9, 0x20c9,  0x20ca, 0x20cb,  0x20cc, 0x20d0,  0x20d1, 0x20d5,
44  0x20d6, 0x20df,  0x20e0, 0x20e2,  0x20e3, 0x20e7,  0x20e8, 0x20f4,
45  0x20f5, 0x20f6,  0x20f7, 0x20fc,  0x20fd, 0x2121,  0x2122, 0x2124,
46  0x2125, 0x212f,  0x2130, 0x2148,  0x2149, 0x219a,  0x219b, 0x22e7,
47  0x22e8, 0x22f1,  0x22f2, 0x2355,  0x2356, 0x2359,  0x235a, 0x2366,
48  0x2367, 0x2369,  0x236a, 0x2373,  0x2374, 0x2383,  0x2384, 0x238b,
49  0x238c, 0x2393,  0x2394, 0x2396,  0x2397, 0x2398,  0x2399, 0x23aa,
50  0x23ab, 0x23c9,  0x23ca, 0x23cb,  0x23cc, 0x2401,  0x2402, 0x2402,
51  0x2403, 0x2c40,  0x2c41, 0x2c42,  0x2c43, 0x2c45,  0x2c46, 0x2c47,
52  0x2c48, 0x2c51,  0x2c52, 0x2c60,  0x2c61, 0x2c62,  0x2c63, 0x2c65,
53  0x2c66, 0x2c69,  0x2c6a, 0x2c6b,  0x2c6c, 0x2c6e,  0x2c6f, 0x2c7c,
54  0x2c7d, 0x2da1,  0x2da2, 0x2da5,  0x2da6, 0x2da6,  0x2da7, 0x2dab,
55  0x2dac, 0x2dad,  0x2dae, 0x2dc1,  0x2dc2, 0x2dc3,  0x2dc4, 0x2dca,
56  0x2dcb, 0x2dcc,  0x2dcd, 0x2dd1,  0x2dd2, 0x2dd7,  0x2dd8, 0x2ecd,
57  0x2ece, 0x2ed4,  0x2ed5, 0x2f45,  0x2f46, 0x302f,  0x3030, 0x303b,
58  0x303c, 0x303d,  0x303e, 0x305f,  0x3060, 0x3068,  0x3069, 0x306a,
59  0x306b, 0x306c,  0x306d, 0x30dd,  0x30de, 0x3108,  0x3109, 0x3232,
60  0x3233, 0x32a1,  0x32a2, 0x32ac,  0x32ad, 0x35a9,  0x35aa, 0x35fe,
61  0x35ff, 0x365e,  0x365f, 0x366c,  0x366d, 0x36ff,  0x3700, 0x37d9,
62  0x37da, 0x38f8,  0x38f9, 0x3969,  0x396a, 0x3cde,  0x3cdf, 0x3de6,
63  0x3de7, 0x3fbd,  0x3fbe, 0x4031,  0x4032, 0x4035,  0x4036, 0x4060,
64  0x4061, 0x4158,  0x4159, 0x42cd,  0x42ce, 0x42e1,  0x42e2, 0x43a2,
65  0x43a3, 0x43a7,  0x43a8, 0x43f9,  0x43fa, 0x4409,  0x440a, 0x45c2,
66  0x45c3, 0x45f4,  0x45f5, 0x45f6,  0x45f7, 0x45fa,  0x45fb, 0x45fb,
67  0x45fc, 0x460f,  0x4610, 0x4612,  0x4613, 0x4628,  0x4629, 0x48e7,
68  0x48e8, 0x490e,  0x490f, 0x497d,  0x497e, 0x4a11,  0x4a12, 0x4a62,
69  0x4a63, 0x82bc,
70                   0x82bd, 0x82bd,  0x82be, 0x82be,  0x82bf, 0x82cb,
71  0x82cc, 0x82cc,  0x82cd, 0x82d1,  0x82d2, 0x82d8,  0x82d9, 0x82dc,
72  0x82dd, 0x82e0,  0x82e1, 0x82e8,  0x82e9, 0x82ef,  0x82f0, 0x82ff,
73  0x8300, 0x830d,
74                   0x830e, 0x93d4,  0x93d5, 0x9420,  0x9421, 0x943b,
75  0x943c, 0x948c,  0x948d, 0x9495,  0x9496, 0x94af,  0x94b0, 0x94b0,
76  0x94b1, 0x94b1,  0x94b2, 0x94b4,  0x94b5, 0x94ba,  0x94bb, 0x94bb,
77  0x94bc, 0x94bd,  0x94be, 0x98c3,  0x98c4, 0x98c4,  0x98c5, 0x98c8,
78  0x98c9, 0x98c9,  0x98ca, 0x98ca,  0x98cb, 0x98cb,  0x98cc, 0x9960,
79  0x9961, 0x99e1,  0x99e2, 0x99fb
80};
81
82static const unsigned short gb18030uni_uni2charset_ranges[412] = {
83  0x0080, 0x00a3,  0x00a5, 0x00a6,  0x00a9, 0x00af,  0x00b2, 0x00b6,
84  0x00b8, 0x00d6,  0x00d8, 0x00df,  0x00e2, 0x00e7,  0x00eb, 0x00eb,
85  0x00ee, 0x00f1,  0x00f4, 0x00f6,  0x00f8, 0x00f8,  0x00fb, 0x00fb,
86  0x00fd, 0x0100,  0x0102, 0x0112,  0x0114, 0x011a,  0x011c, 0x012a,
87  0x012c, 0x0143,  0x0145, 0x0147,  0x0149, 0x014c,  0x014e, 0x016a,
88  0x016c, 0x01cd,  0x01cf, 0x01cf,  0x01d1, 0x01d1,  0x01d3, 0x01d3,
89  0x01d5, 0x01d5,  0x01d7, 0x01d7,  0x01d9, 0x01d9,  0x01db, 0x01db,
90  0x01dd, 0x01f8,  0x01fa, 0x0250,  0x0252, 0x0260,  0x0262, 0x02c6,
91  0x02c8, 0x02c8,  0x02cc, 0x02d8,  0x02da, 0x0390,  0x03a2, 0x03a2,
92  0x03aa, 0x03b0,  0x03c2, 0x03c2,  0x03ca, 0x0400,  0x0402, 0x040f,
93  0x0450, 0x0450,  0x0452, 0x200f,  0x2011, 0x2012,  0x2017, 0x2017,
94  0x201a, 0x201b,  0x201e, 0x2024,  0x2027, 0x202f,  0x2031, 0x2031,
95  0x2034, 0x2034,  0x2036, 0x203a,  0x203c, 0x20ab,  0x20ad, 0x2102,
96  0x2104, 0x2104,  0x2106, 0x2108,  0x210a, 0x2115,  0x2117, 0x2120,
97  0x2122, 0x215f,  0x216c, 0x216f,  0x217a, 0x218f,  0x2194, 0x2195,
98  0x219a, 0x2207,  0x2209, 0x220e,  0x2210, 0x2210,  0x2212, 0x2214,
99  0x2216, 0x2219,  0x221b, 0x221c,  0x2221, 0x2222,  0x2224, 0x2224,
100  0x2226, 0x2226,  0x222c, 0x222d,  0x222f, 0x2233,  0x2238, 0x223c,
101  0x223e, 0x2247,  0x2249, 0x224b,  0x224d, 0x2251,  0x2253, 0x225f,
102  0x2262, 0x2263,  0x2268, 0x226d,  0x2270, 0x2294,  0x2296, 0x2298,
103  0x229a, 0x22a4,  0x22a6, 0x22be,  0x22c0, 0x2311,  0x2313, 0x245f,
104  0x246a, 0x2473,  0x249c, 0x24ff,  0x254c, 0x254f,  0x2574, 0x2580,
105  0x2590, 0x2592,  0x2596, 0x259f,  0x25a2, 0x25b1,  0x25b4, 0x25bb,
106  0x25be, 0x25c5,  0x25c8, 0x25ca,  0x25cc, 0x25cd,  0x25d0, 0x25e1,
107  0x25e6, 0x2604,  0x2607, 0x2608,  0x260a, 0x263f,  0x2641, 0x2641,
108  0x2643, 0x2e80,  0x2e82, 0x2e83,  0x2e85, 0x2e87,  0x2e89, 0x2e8a,
109  0x2e8d, 0x2e96,  0x2e98, 0x2ea6,  0x2ea8, 0x2ea9,  0x2eab, 0x2ead,
110  0x2eaf, 0x2eb2,  0x2eb4, 0x2eb5,  0x2eb8, 0x2eba,  0x2ebc, 0x2ec9,
111  0x2ecb, 0x2fef,  0x2ffc, 0x2fff,  0x3004, 0x3004,  0x3018, 0x301c,
112  0x301f, 0x3020,  0x302a, 0x303d,  0x303f, 0x3040,  0x3094, 0x309a,
113  0x309f, 0x30a0,  0x30f7, 0x30fb,  0x30ff, 0x3104,  0x312a, 0x321f,
114  0x322a, 0x3230,  0x3232, 0x32a2,  0x32a4, 0x338d,  0x3390, 0x339b,
115  0x339f, 0x33a0,  0x33a2, 0x33c3,  0x33c5, 0x33cd,  0x33cf, 0x33d0,
116  0x33d3, 0x33d4,  0x33d6, 0x3446,  0x3448, 0x3472,  0x3474, 0x359d,
117  0x359f, 0x360d,  0x360f, 0x3619,  0x361b, 0x3917,  0x3919, 0x396d,
118  0x396f, 0x39ce,  0x39d1, 0x39de,  0x39e0, 0x3a72,  0x3a74, 0x3b4d,
119  0x3b4f, 0x3c6d,  0x3c6f, 0x3cdf,  0x3ce1, 0x4055,  0x4057, 0x415e,
120  0x4160, 0x4336,  0x4338, 0x43ab,  0x43ad, 0x43b0,  0x43b2, 0x43dc,
121  0x43de, 0x44d5,  0x44d7, 0x464b,  0x464d, 0x4660,  0x4662, 0x4722,
122  0x4724, 0x4728,  0x472a, 0x477b,  0x477d, 0x478c,  0x478e, 0x4946,
123  0x4948, 0x4979,  0x497b, 0x497c,  0x497e, 0x4981,  0x4984, 0x4984,
124  0x4987, 0x499a,  0x499c, 0x499e,  0x49a0, 0x49b5,  0x49b8, 0x4c76,
125  0x4c78, 0x4c9e,  0x4ca4, 0x4d12,  0x4d1a, 0x4dad,  0x4daf, 0x4dff,
126  0x9fa6, 0xd7ff,
127                   0xe76c, 0xe76c,  0xe7c8, 0xe7c8,  0xe7e7, 0xe7f3,
128  0xe815, 0xe815,  0xe819, 0xe81d,  0xe81f, 0xe825,  0xe827, 0xe82a,
129  0xe82d, 0xe830,  0xe833, 0xe83a,  0xe83c, 0xe842,  0xe844, 0xe853,
130  0xe856, 0xe863,
131                   0xe865, 0xf92b,  0xf92d, 0xf978,  0xf97a, 0xf994,
132  0xf996, 0xf9e6,  0xf9e8, 0xf9f0,  0xf9f2, 0xfa0b,  0xfa10, 0xfa10,
133  0xfa12, 0xfa12,  0xfa15, 0xfa17,  0xfa19, 0xfa1e,  0xfa22, 0xfa22,
134  0xfa25, 0xfa26,  0xfa2a, 0xfe2f,  0xfe32, 0xfe32,  0xfe45, 0xfe48,
135  0xfe53, 0xfe53,  0xfe58, 0xfe58,  0xfe67, 0xfe67,  0xfe6c, 0xff00,
136  0xff5f, 0xffdf,  0xffe6, 0xffff
137};
138
139static const unsigned short gb18030uni_ranges[206] = {
140    128,   129,   131,   133,   134,   135,   137,   140,
141    142,   144,   145,   147,   148,   149,   150,   151,
142    152,   153,   154,   155,   156,   157,   158,   159,
143    160,   161,   162,   163,   164,   165,   166,   167,
144    168,   171,   172,   189,   196,   213,   220,   221,
145    285,   286,   287,   291,   293,   295,   297,   298,
146    300,   301,   302,   303,   304,   305,   306,   307,
147    308,   320,   330,   334,   338,   339,   340,   341,
148    342,   343,   347,   348,   349,   354,   355,   359,
149    360,   361,   362,   363,   365,   369,   371,   372,
150    373,   374,   375,   376,   386,   426,   502,   538,
151    553,   556,   558,   560,   562,   564,   565,   567,
152    571,   573,   574,   575,   576,   577,   578,   579,
153    581,   582,   583,   584,   585,   586,   588,   589,
154    590,   602,   606,   625,   627,   636,   637,   720,
155    724,   810,   813,   850,   860,   861,   862,   864,
156    867,   868,   869,   870,   872,   873,   874,   875,
157    876,   877,   878,   879,   880,   882,   883,   884,
158    885,   886,   887,   888,   889,   890,   891,   892,
159    893,   894,   895,   896,   897,   898,   899,   900,
160    901,   902,   903,   905,   907,   908,   909,   911,
161    912,   917,   924,   925, 21827,
162                                     25775, 25866, 25896,
163  25929, 25932, 25933, 25934, 25936, 25938, 25939, 25940,
164  25942,
165         25943, 25944, 25945, 25946, 25947, 25948, 25952,
166  25953, 25955, 25956, 25959, 25961, 25964, 25966, 25984,
167  25994, 25998, 26012, 26016, 26110, 26116
168};
169
170static int
171gb18030uni_mbtowc (conv_t conv, ucs4_t *pwc, const unsigned char *s, int n)
172{
173  unsigned char c1 = s[0];
174  if (c1 >= 0x81 && c1 <= 0x84) {
175    if (n >= 2) {
176      unsigned char c2 = s[1];
177      if (c2 >= 0x30 && c2 <= 0x39) {
178        if (n >= 3) {
179          unsigned char c3 = s[2];
180          if (c3 >= 0x81 && c3 <= 0xfe) {
181            if (n >= 4) {
182              unsigned char c4 = s[3];
183              if (c4 >= 0x30 && c4 <= 0x39) {
184                unsigned int i = (((c1 - 0x81) * 10 + (c2 - 0x30)) * 126 + (c3 - 0x81)) * 10 + (c4 - 0x30);
185                if (i >= 0 && i <= 39419) {
186                  unsigned int k1 = 0;
187                  unsigned int k2 = 205;
188                  while (k1 < k2) {
189                    unsigned int k = (k1 + k2) / 2;
190                    if (i <= gb18030uni_charset2uni_ranges[2*k+1])
191                      k2 = k;
192                    else if (i >= gb18030uni_charset2uni_ranges[2*k+2])
193                      k1 = k + 1;
194                    else
195                      return RET_ILSEQ;
196                  }
197                  {
198                    unsigned int diff = gb18030uni_ranges[k1];
199                    *pwc = (ucs4_t) (i + diff);
200                    return 4;
201                  }
202                }
203              }
204              return RET_ILSEQ;
205            }
206            return RET_TOOFEW(0);
207          }
208          return RET_ILSEQ;
209        }
210        return RET_TOOFEW(0);
211      }
212      return RET_ILSEQ;
213    }
214    return RET_TOOFEW(0);
215  }
216  return RET_ILSEQ;
217}
218
219static int
220gb18030uni_wctomb (conv_t conv, unsigned char *r, ucs4_t wc, int n)
221{
222  if (n >= 4) {
223    unsigned int i = wc;
224    if (i >= 0x0080 && i <= 0xffff) {
225      unsigned int k1 = 0;
226      unsigned int k2 = 205;
227      while (k1 < k2) {
228        unsigned int k = (k1 + k2) / 2;
229        if (i <= gb18030uni_uni2charset_ranges[2*k+1])
230          k2 = k;
231        else if (i >= gb18030uni_uni2charset_ranges[2*k+2])
232          k1 = k + 1;
233        else
234          return RET_ILUNI;
235      }
236      {
237        unsigned int diff = gb18030uni_ranges[k1];
238        i -= diff;
239        r[3] = (i % 10) + 0x30; i = i / 10;
240        r[2] = (i % 126) + 0x81; i = i / 126;
241        r[1] = (i % 10) + 0x30; i = i / 10;
242        r[0] = i + 0x81;
243        return 4;
244      }
245    }
246    return RET_ILUNI;
247  }
248  return RET_TOOSMALL;
249}
250