1/* autogenerated. */
2/* src="transcode-tblgen.rb", len=28460, checksum=51276 */
3/* src="utf_16_32.trans", len=15308, checksum=28538 */
4
5#include "transcode_data.h"
6
7
8
9static const unsigned char
10utf_16_32_byte_array[1288] = {
11#define from_UTF_16LE_00toFF_D8toDB_00toFF_offsets 0
12220, 223,
13      1,  1,  1,  1,
14
15#define from_UTF_16LE_00toFF_D8toDB_offsets 6
160, 255,
17      0,  0,  0,  0,  0,  0,  0,  0,    0,  0,  0,  0,  0,  0,  0,  0,
18      0,  0,  0,  0,  0,  0,  0,  0,    0,  0,  0,  0,  0,  0,  0,  0,
19      0,  0,  0,  0,  0,  0,  0,  0,    0,  0,  0,  0,  0,  0,  0,  0,
20      0,  0,  0,  0,  0,  0,  0,  0,    0,  0,  0,  0,  0,  0,  0,  0,
21      0,  0,  0,  0,  0,  0,  0,  0,    0,  0,  0,  0,  0,  0,  0,  0,
22      0,  0,  0,  0,  0,  0,  0,  0,    0,  0,  0,  0,  0,  0,  0,  0,
23      0,  0,  0,  0,  0,  0,  0,  0,    0,  0,  0,  0,  0,  0,  0,  0,
24      0,  0,  0,  0,  0,  0,  0,  0,    0,  0,  0,  0,  0,  0,  0,  0,
25      0,  0,  0,  0,  0,  0,  0,  0,    0,  0,  0,  0,  0,  0,  0,  0,
26      0,  0,  0,  0,  0,  0,  0,  0,    0,  0,  0,  0,  0,  0,  0,  0,
27      0,  0,  0,  0,  0,  0,  0,  0,    0,  0,  0,  0,  0,  0,  0,  0,
28      0,  0,  0,  0,  0,  0,  0,  0,    0,  0,  0,  0,  0,  0,  0,  0,
29      0,  0,  0,  0,  0,  0,  0,  0,    0,  0,  0,  0,  0,  0,  0,  0,
30      0,  0,  0,  0,  0,  0,  0,  0,    0,  0,  0,  0,  0,  0,  0,  0,
31      0,  0,  0,  0,  0,  0,  0,  0,    0,  0,  0,  0,  0,  0,  0,  0,
32      0,  0,  0,  0,  0,  0,  0,  0,    0,  0,  0,  0,  0,  0,  0,  0,
33
34#define from_UTF_16LE_00toFF_offsets 264
350, 255,
36      0,  0,  0,  0,  0,  0,  0,  0,    0,  0,  0,  0,  0,  0,  0,  0,
37      0,  0,  0,  0,  0,  0,  0,  0,    0,  0,  0,  0,  0,  0,  0,  0,
38      0,  0,  0,  0,  0,  0,  0,  0,    0,  0,  0,  0,  0,  0,  0,  0,
39      0,  0,  0,  0,  0,  0,  0,  0,    0,  0,  0,  0,  0,  0,  0,  0,
40      0,  0,  0,  0,  0,  0,  0,  0,    0,  0,  0,  0,  0,  0,  0,  0,
41      0,  0,  0,  0,  0,  0,  0,  0,    0,  0,  0,  0,  0,  0,  0,  0,
42      0,  0,  0,  0,  0,  0,  0,  0,    0,  0,  0,  0,  0,  0,  0,  0,
43      0,  0,  0,  0,  0,  0,  0,  0,    0,  0,  0,  0,  0,  0,  0,  0,
44      0,  0,  0,  0,  0,  0,  0,  0,    0,  0,  0,  0,  0,  0,  0,  0,
45      0,  0,  0,  0,  0,  0,  0,  0,    0,  0,  0,  0,  0,  0,  0,  0,
46      0,  0,  0,  0,  0,  0,  0,  0,    0,  0,  0,  0,  0,  0,  0,  0,
47      0,  0,  0,  0,  0,  0,  0,  0,    0,  0,  0,  0,  0,  0,  0,  0,
48      0,  0,  0,  0,  0,  0,  0,  0,    0,  0,  0,  0,  0,  0,  0,  0,
49      0,  0,  0,  0,  0,  0,  0,  0,    1,  1,  1,  1,  2,  2,  2,  2,
50      0,  0,  0,  0,  0,  0,  0,  0,    0,  0,  0,  0,  0,  0,  0,  0,
51      0,  0,  0,  0,  0,  0,  0,  0,    0,  0,  0,  0,  0,  0,  0,  0,
52
53#define from_UTF_32LE_00toFF_00toD7_00_offsets 522
540, 0,
55      0,
56
57#define from_UTF_32LE_00toFF_00toD7_offsets 525
580, 16,
59      0,  0,  0,  0,  0,  0,  0,  0,    0,  0,  0,  0,  0,  0,  0,  0,
60      0,
61
62#define from_UTF_32LE_00toFF_D8toDF_offsets 544
631, 16,
64      1,  1,  1,  1,  1,  1,  1,  1,    1,  1,  1,  1,  1,  1,  1,  1,
65
66#define from_UTF_32LE_00toFF_offsets 562
670, 255,
68      0,  0,  0,  0,  0,  0,  0,  0,    0,  0,  0,  0,  0,  0,  0,  0,
69      0,  0,  0,  0,  0,  0,  0,  0,    0,  0,  0,  0,  0,  0,  0,  0,
70      0,  0,  0,  0,  0,  0,  0,  0,    0,  0,  0,  0,  0,  0,  0,  0,
71      0,  0,  0,  0,  0,  0,  0,  0,    0,  0,  0,  0,  0,  0,  0,  0,
72      0,  0,  0,  0,  0,  0,  0,  0,    0,  0,  0,  0,  0,  0,  0,  0,
73      0,  0,  0,  0,  0,  0,  0,  0,    0,  0,  0,  0,  0,  0,  0,  0,
74      0,  0,  0,  0,  0,  0,  0,  0,    0,  0,  0,  0,  0,  0,  0,  0,
75      0,  0,  0,  0,  0,  0,  0,  0,    0,  0,  0,  0,  0,  0,  0,  0,
76      0,  0,  0,  0,  0,  0,  0,  0,    0,  0,  0,  0,  0,  0,  0,  0,
77      0,  0,  0,  0,  0,  0,  0,  0,    0,  0,  0,  0,  0,  0,  0,  0,
78      0,  0,  0,  0,  0,  0,  0,  0,    0,  0,  0,  0,  0,  0,  0,  0,
79      0,  0,  0,  0,  0,  0,  0,  0,    0,  0,  0,  0,  0,  0,  0,  0,
80      0,  0,  0,  0,  0,  0,  0,  0,    0,  0,  0,  0,  0,  0,  0,  0,
81      0,  0,  0,  0,  0,  0,  0,  0,    1,  1,  1,  1,  1,  1,  1,  1,
82      0,  0,  0,  0,  0,  0,  0,  0,    0,  0,  0,  0,  0,  0,  0,  0,
83      0,  0,  0,  0,  0,  0,  0,  0,    0,  0,  0,  0,  0,  0,  0,  0,
84
85#define from_UTF_32BE_00_offsets 820
860, 16,
87      0,  1,  1,  1,  1,  1,  1,  1,    1,  1,  1,  1,  1,  1,  1,  1,
88      1,
89
90#define from_UTF_8_C2toDF_offsets 839
91128, 191,
92      1,  1,  1,  1,  1,  1,  1,  1,    1,  1,  1,  1,  1,  1,  1,  1,
93      1,  1,  1,  1,  1,  1,  1,  1,    1,  1,  1,  1,  1,  1,  1,  1,
94      1,  1,  1,  1,  1,  1,  1,  1,    1,  1,  1,  1,  1,  1,  1,  1,
95      1,  1,  1,  1,  1,  1,  1,  1,    1,  1,  1,  1,  1,  1,  1,  1,
96
97#define from_UTF_8_E0_offsets 905
98160, 191,
99      1,  1,  1,  1,  1,  1,  1,  1,    1,  1,  1,  1,  1,  1,  1,  1,
100      1,  1,  1,  1,  1,  1,  1,  1,    1,  1,  1,  1,  1,  1,  1,  1,
101
102#define from_UTF_8_ED_offsets 939
103128, 159,
104      1,  1,  1,  1,  1,  1,  1,  1,    1,  1,  1,  1,  1,  1,  1,  1,
105      1,  1,  1,  1,  1,  1,  1,  1,    1,  1,  1,  1,  1,  1,  1,  1,
106
107#define from_UTF_8_F0_offsets 973
108144, 191,
109      1,  1,  1,  1,  1,  1,  1,  1,    1,  1,  1,  1,  1,  1,  1,  1,
110      1,  1,  1,  1,  1,  1,  1,  1,    1,  1,  1,  1,  1,  1,  1,  1,
111      1,  1,  1,  1,  1,  1,  1,  1,    1,  1,  1,  1,  1,  1,  1,  1,
112
113#define from_UTF_8_F4_offsets 1023
114128, 143,
115      1,  1,  1,  1,  1,  1,  1,  1,    1,  1,  1,  1,  1,  1,  1,  1,
116
117#define from_UTF_8_offsets 1041
1180, 244,
119      0,  0,  0,  0,  0,  0,  0,  0,    0,  0,  0,  0,  0,  0,  0,  0,
120      0,  0,  0,  0,  0,  0,  0,  0,    0,  0,  0,  0,  0,  0,  0,  0,
121      0,  0,  0,  0,  0,  0,  0,  0,    0,  0,  0,  0,  0,  0,  0,  0,
122      0,  0,  0,  0,  0,  0,  0,  0,    0,  0,  0,  0,  0,  0,  0,  0,
123      0,  0,  0,  0,  0,  0,  0,  0,    0,  0,  0,  0,  0,  0,  0,  0,
124      0,  0,  0,  0,  0,  0,  0,  0,    0,  0,  0,  0,  0,  0,  0,  0,
125      0,  0,  0,  0,  0,  0,  0,  0,    0,  0,  0,  0,  0,  0,  0,  0,
126      0,  0,  0,  0,  0,  0,  0,  0,    0,  0,  0,  0,  0,  0,  0,  0,
127      1,  1,  1,  1,  1,  1,  1,  1,    1,  1,  1,  1,  1,  1,  1,  1,
128      1,  1,  1,  1,  1,  1,  1,  1,    1,  1,  1,  1,  1,  1,  1,  1,
129      1,  1,  1,  1,  1,  1,  1,  1,    1,  1,  1,  1,  1,  1,  1,  1,
130      1,  1,  1,  1,  1,  1,  1,  1,    1,  1,  1,  1,  1,  1,  1,  1,
131      1,  1,  2,  2,  2,  2,  2,  2,    2,  2,  2,  2,  2,  2,  2,  2,
132      2,  2,  2,  2,  2,  2,  2,  2,    2,  2,  2,  2,  2,  2,  2,  2,
133      3,  4,  4,  4,  4,  4,  4,  4,    4,  4,  4,  4,  4,  5,  4,  4,
134      6,  7,  7,  7,  8,
135
136};
137static const unsigned int
138utf_16_32_word_array[106] = {
139#define from_UTF_16LE_00toFF_D8toDB_00toFF_infos WORDINDEX2INFO(0)
140     INVALID,   FUNso,
141
142#define from_UTF_16LE_00toFF_D8toDB_00toFF WORDINDEX2INFO(2)
143    from_UTF_16LE_00toFF_D8toDB_00toFF_offsets,
144    from_UTF_16LE_00toFF_D8toDB_00toFF_infos,
145
146#define from_UTF_16LE_00toFF_D8toDB_infos WORDINDEX2INFO(4)
147     from_UTF_16LE_00toFF_D8toDB_00toFF,
148
149#define from_UTF_16LE_00toFF_D8toDB WORDINDEX2INFO(5)
150    from_UTF_16LE_00toFF_D8toDB_offsets,
151    from_UTF_16LE_00toFF_D8toDB_infos,
152
153#define from_UTF_16LE_00toFF_infos WORDINDEX2INFO(7)
154                           FUNso, from_UTF_16LE_00toFF_D8toDB,
155                         INVALID,
156
157#define from_UTF_16LE_00toFF WORDINDEX2INFO(10)
158    from_UTF_16LE_00toFF_offsets,
159    from_UTF_16LE_00toFF_infos,
160
161#define from_UTF_16LE_infos WORDINDEX2INFO(12)
162     from_UTF_16LE_00toFF,
163
164#define from_UTF_16LE WORDINDEX2INFO(13)
165    from_UTF_16LE_00toFF_D8toDB_offsets,
166    from_UTF_16LE_infos,
167
168#define from_UTF_32LE_00toFF_00toD7_00_infos WORDINDEX2INFO(15)
169       FUNso, INVALID,
170
171#define from_UTF_32LE_00toFF_00toD7_00 WORDINDEX2INFO(17)
172    from_UTF_32LE_00toFF_00toD7_00_offsets,
173    from_UTF_32LE_00toFF_00toD7_00_infos,
174
175#define from_UTF_32LE_00toFF_00toD7_infos WORDINDEX2INFO(19)
176     from_UTF_32LE_00toFF_00toD7_00,                        INVALID,
177
178#define from_UTF_32LE_00toFF_00toD7 WORDINDEX2INFO(21)
179    from_UTF_32LE_00toFF_00toD7_offsets,
180    from_UTF_32LE_00toFF_00toD7_infos,
181
182#define from_UTF_32LE_00toFF_D8toDF_infos WORDINDEX2INFO(23)
183                            INVALID, from_UTF_32LE_00toFF_00toD7_00,
184
185#define from_UTF_32LE_00toFF_D8toDF WORDINDEX2INFO(25)
186    from_UTF_32LE_00toFF_D8toDF_offsets,
187    from_UTF_32LE_00toFF_D8toDF_infos,
188
189#define from_UTF_32LE_00toFF_infos WORDINDEX2INFO(27)
190     from_UTF_32LE_00toFF_00toD7, from_UTF_32LE_00toFF_D8toDF,
191
192#define from_UTF_32LE_00toFF WORDINDEX2INFO(29)
193    from_UTF_32LE_00toFF_offsets,
194    from_UTF_32LE_00toFF_infos,
195
196#define from_UTF_32LE_infos WORDINDEX2INFO(31)
197     from_UTF_32LE_00toFF,
198
199#define from_UTF_32LE WORDINDEX2INFO(32)
200    from_UTF_16LE_00toFF_D8toDB_offsets,
201    from_UTF_32LE_infos,
202
203#define from_UTF_16BE_00toD7_infos WORDINDEX2INFO(34)
204     FUNso,
205
206#define from_UTF_16BE_00toD7 WORDINDEX2INFO(35)
207    from_UTF_16LE_00toFF_D8toDB_offsets,
208    from_UTF_16BE_00toD7_infos,
209
210#define from_UTF_16BE_D8toDB_00toFF_infos WORDINDEX2INFO(37)
211                  INVALID, from_UTF_16BE_00toD7,
212
213#define from_UTF_16BE_D8toDB_00toFF WORDINDEX2INFO(39)
214    from_UTF_16LE_00toFF_D8toDB_00toFF_offsets,
215    from_UTF_16BE_D8toDB_00toFF_infos,
216
217#define from_UTF_16BE_D8toDB_infos WORDINDEX2INFO(41)
218     from_UTF_16BE_D8toDB_00toFF,
219
220#define from_UTF_16BE_D8toDB WORDINDEX2INFO(42)
221    from_UTF_16LE_00toFF_D8toDB_offsets,
222    from_UTF_16BE_D8toDB_infos,
223
224#define from_UTF_16BE_infos WORDINDEX2INFO(44)
225     from_UTF_16BE_00toD7, from_UTF_16BE_D8toDB,
226                  INVALID,
227
228#define from_UTF_16BE WORDINDEX2INFO(47)
229    from_UTF_16LE_00toFF_offsets,
230    from_UTF_16BE_infos,
231
232#define from_UTF_32BE_00_00_infos WORDINDEX2INFO(49)
233     from_UTF_16BE_00toD7,              INVALID,
234
235#define from_UTF_32BE_00_00 WORDINDEX2INFO(51)
236    from_UTF_32LE_00toFF_offsets,
237    from_UTF_32BE_00_00_infos,
238
239#define from_UTF_32BE_00_01to10_infos WORDINDEX2INFO(53)
240     from_UTF_16BE_00toD7,
241
242#define from_UTF_32BE_00_01to10 WORDINDEX2INFO(54)
243    from_UTF_16LE_00toFF_D8toDB_offsets,
244    from_UTF_32BE_00_01to10_infos,
245
246#define from_UTF_32BE_00_infos WORDINDEX2INFO(56)
247         from_UTF_32BE_00_00, from_UTF_32BE_00_01to10,
248                     INVALID,
249
250#define from_UTF_32BE_00 WORDINDEX2INFO(59)
251    from_UTF_32BE_00_offsets,
252    from_UTF_32BE_00_infos,
253
254#define from_UTF_32BE_infos WORDINDEX2INFO(61)
255     from_UTF_32BE_00,          INVALID,
256
257#define from_UTF_32BE WORDINDEX2INFO(63)
258    from_UTF_32LE_00toFF_00toD7_00_offsets,
259    from_UTF_32BE_infos,
260
261#define from_UTF_16_00toFF_infos WORDINDEX2INFO(65)
262     FUNsi,
263
264#define from_UTF_16_00toFF WORDINDEX2INFO(66)
265    from_UTF_16LE_00toFF_D8toDB_offsets,
266    from_UTF_16_00toFF_infos,
267
268#define from_UTF_16_infos WORDINDEX2INFO(68)
269     from_UTF_16_00toFF,
270
271#define from_UTF_16 WORDINDEX2INFO(69)
272    from_UTF_16LE_00toFF_D8toDB_offsets,
273    from_UTF_16_infos,
274
275#define from_UTF_32_00toFF_infos WORDINDEX2INFO(71)
276     from_UTF_16,
277
278#define from_UTF_32_00toFF WORDINDEX2INFO(72)
279    from_UTF_16LE_00toFF_D8toDB_offsets,
280    from_UTF_32_00toFF_infos,
281
282#define from_UTF_32_infos WORDINDEX2INFO(74)
283     from_UTF_32_00toFF,
284
285#define from_UTF_32 WORDINDEX2INFO(75)
286    from_UTF_16LE_00toFF_D8toDB_offsets,
287    from_UTF_32_infos,
288
289#define from_UTF_8_C2toDF WORDINDEX2INFO(77)
290    from_UTF_8_C2toDF_offsets,
291    from_UTF_16LE_00toFF_D8toDB_00toFF_infos,
292
293#define from_UTF_8_E0_infos WORDINDEX2INFO(79)
294               INVALID, from_UTF_8_C2toDF,
295
296#define from_UTF_8_E0 WORDINDEX2INFO(81)
297    from_UTF_8_E0_offsets,
298    from_UTF_8_E0_infos,
299
300#define from_UTF_8_E1toEC WORDINDEX2INFO(83)
301    from_UTF_8_C2toDF_offsets,
302    from_UTF_8_E0_infos,
303
304#define from_UTF_8_ED WORDINDEX2INFO(85)
305    from_UTF_8_ED_offsets,
306    from_UTF_8_E0_infos,
307
308#define from_UTF_8_F0_infos WORDINDEX2INFO(87)
309               INVALID, from_UTF_8_E1toEC,
310
311#define from_UTF_8_F0 WORDINDEX2INFO(89)
312    from_UTF_8_F0_offsets,
313    from_UTF_8_F0_infos,
314
315#define from_UTF_8_F1toF3 WORDINDEX2INFO(91)
316    from_UTF_8_C2toDF_offsets,
317    from_UTF_8_F0_infos,
318
319#define from_UTF_8_F4 WORDINDEX2INFO(93)
320    from_UTF_8_F4_offsets,
321    from_UTF_8_F0_infos,
322
323#define from_UTF_8_infos WORDINDEX2INFO(95)
324                 FUNso,           INVALID,
325     from_UTF_8_C2toDF,     from_UTF_8_E0,
326     from_UTF_8_E1toEC,     from_UTF_8_ED,
327         from_UTF_8_F0, from_UTF_8_F1toF3,
328         from_UTF_8_F4,
329
330#define from_UTF_8 WORDINDEX2INFO(104)
331    from_UTF_8_offsets,
332    from_UTF_8_infos,
333
334};
335#define TRANSCODE_TABLE_INFO utf_16_32_byte_array, 1288, utf_16_32_word_array, 106, ((int)sizeof(unsigned int))
336
337
338static ssize_t
339fun_so_from_utf_16be(void *statep, const unsigned char *s, size_t l, unsigned char *o, size_t osize)
340{
341    if (!s[0] && s[1]<0x80) {
342        o[0] = s[1];
343        return 1;
344    }
345    else if (s[0]<0x08) {
346        o[0] = 0xC0 | (s[0]<<2) | (s[1]>>6);
347        o[1] = 0x80 | (s[1]&0x3F);
348        return 2;
349    }
350    else if ((s[0]&0xF8)!=0xD8) {
351        o[0] = 0xE0 | (s[0]>>4);
352        o[1] = 0x80 | ((s[0]&0x0F)<<2) | (s[1]>>6);
353        o[2] = 0x80 | (s[1]&0x3F);
354        return 3;
355    }
356    else {
357        unsigned int u = (((s[0]&0x03)<<2)|(s[1]>>6)) + 1;
358        o[0] = 0xF0 | (u>>2);
359        o[1] = 0x80 | ((u&0x03)<<4) | ((s[1]>>2)&0x0F);
360        o[2] = 0x80 | ((s[1]&0x03)<<4) | ((s[2]&0x03)<<2) | (s[3]>>6);
361        o[3] = 0x80 | (s[3]&0x3F);
362        return 4;
363    }
364}
365
366static ssize_t
367fun_so_to_utf_16be(void *statep, const unsigned char *s, size_t l, unsigned char *o, size_t osize)
368{
369    if (!(s[0]&0x80)) {
370        o[0] = 0x00;
371        o[1] = s[0];
372        return 2;
373    }
374    else if ((s[0]&0xE0)==0xC0) {
375        o[0] = (s[0]>>2)&0x07;
376        o[1] = ((s[0]&0x03)<<6) | (s[1]&0x3F);
377        return 2;
378    }
379    else if ((s[0]&0xF0)==0xE0) {
380        o[0] = (s[0]<<4) | ((s[1]>>2)^0x20);
381        o[1] = (s[1]<<6) | (s[2]^0x80);
382        return 2;
383    }
384    else {
385        int w = (((s[0]&0x07)<<2) | ((s[1]>>4)&0x03)) - 1;
386        o[0] = 0xD8 | (w>>2);
387        o[1] = (w<<6) | ((s[1]&0x0F)<<2) | ((s[2]>>4)-8);
388        o[2] = 0xDC | ((s[2]>>2)&0x03);
389        o[3] = (s[2]<<6) | (s[3]&~0x80);
390        return 4;
391    }
392}
393
394static ssize_t
395fun_so_from_utf_16le(void *statep, const unsigned char *s, size_t l, unsigned char *o, size_t osize)
396{
397    if (!s[1] && s[0]<0x80) {
398        o[0] = s[0];
399        return 1;
400    }
401    else if (s[1]<0x08) {
402        o[0] = 0xC0 | (s[1]<<2) | (s[0]>>6);
403        o[1] = 0x80 | (s[0]&0x3F);
404        return 2;
405    }
406    else if ((s[1]&0xF8)!=0xD8) {
407        o[0] = 0xE0 | (s[1]>>4);
408        o[1] = 0x80 | ((s[1]&0x0F)<<2) | (s[0]>>6);
409        o[2] = 0x80 | (s[0]&0x3F);
410        return 3;
411    }
412    else {
413        unsigned int u = (((s[1]&0x03)<<2)|(s[0]>>6)) + 1;
414        o[0] = 0xF0 | u>>2;
415        o[1] = 0x80 | ((u&0x03)<<4) | ((s[0]>>2)&0x0F);
416        o[2] = 0x80 | ((s[0]&0x03)<<4) | ((s[3]&0x03)<<2) | (s[2]>>6);
417        o[3] = 0x80 | (s[2]&0x3F);
418        return 4;
419    }
420}
421
422static ssize_t
423fun_so_to_utf_16le(void *statep, const unsigned char *s, size_t l, unsigned char *o, size_t osize)
424{
425    if (!(s[0]&0x80)) {
426        o[1] = 0x00;
427        o[0] = s[0];
428        return 2;
429    }
430    else if ((s[0]&0xE0)==0xC0) {
431        o[1] = (s[0]>>2)&0x07;
432        o[0] = ((s[0]&0x03)<<6) | (s[1]&0x3F);
433        return 2;
434    }
435    else if ((s[0]&0xF0)==0xE0) {
436        o[1] = (s[0]<<4) | ((s[1]>>2)^0x20);
437        o[0] = (s[1]<<6) | (s[2]^0x80);
438        return 2;
439    }
440    else {
441        int w = (((s[0]&0x07)<<2) | ((s[1]>>4)&0x03)) - 1;
442        o[1] = 0xD8 | (w>>2);
443        o[0] = (w<<6) | ((s[1]&0x0F)<<2) | ((s[2]>>4)-8);
444        o[3] = 0xDC | ((s[2]>>2)&0x03);
445        o[2] = (s[2]<<6) | (s[3]&~0x80);
446        return 4;
447    }
448}
449
450static ssize_t
451fun_so_from_utf_32be(void *statep, const unsigned char *s, size_t l, unsigned char *o, size_t osize)
452{
453    if (!s[1]) {
454        if (s[2]==0 && s[3]<0x80) {
455            o[0] = s[3];
456            return 1;
457        }
458        else if (s[2]<0x08) {
459            o[0] = 0xC0 | (s[2]<<2) | (s[3]>>6);
460            o[1] = 0x80 | (s[3]&0x3F);
461            return 2;
462        }
463        else {
464            o[0] = 0xE0 | (s[2]>>4);
465            o[1] = 0x80 | ((s[2]&0x0F)<<2) | (s[3]>>6);
466            o[2] = 0x80 | (s[3]&0x3F);
467            return 3;
468        }
469    }
470    else {
471        o[0] = 0xF0 | (s[1]>>2);
472        o[1] = 0x80 | ((s[1]&0x03)<<4) | (s[2]>>4);
473        o[2] = 0x80 | ((s[2]&0x0F)<<2) | (s[3]>>6);
474        o[3] = 0x80 | (s[3]&0x3F);
475        return 4;
476    }
477}
478
479static ssize_t
480fun_so_to_utf_32be(void *statep, const unsigned char *s, size_t l, unsigned char *o, size_t osize)
481{
482    o[0] = 0;
483    if (!(s[0]&0x80)) {
484        o[1] = o[2] = 0x00;
485        o[3] = s[0];
486    }
487    else if ((s[0]&0xE0)==0xC0) {
488        o[1] = 0x00;
489        o[2] = (s[0]>>2)&0x07;
490        o[3] = ((s[0]&0x03)<<6) | (s[1]&0x3F);
491    }
492    else if ((s[0]&0xF0)==0xE0) {
493        o[1] = 0x00;
494        o[2] = (s[0]<<4) | ((s[1]>>2)^0x20);
495        o[3] = (s[1]<<6) | (s[2]^0x80);
496    }
497    else {
498        o[1] = ((s[0]&0x07)<<2) | ((s[1]>>4)&0x03);
499        o[2] = ((s[1]&0x0F)<<4) | ((s[2]>>2)&0x0F);
500        o[3] = ((s[2]&0x03)<<6) | (s[3]&0x3F);
501    }
502    return 4;
503}
504
505static ssize_t
506fun_so_from_utf_32le(void *statep, const unsigned char *s, size_t l, unsigned char *o, size_t osize)
507{
508    if (!s[2]) {
509        if (s[1]==0 && s[0]<0x80) {
510            o[0] = s[0];
511            return 1;
512        }
513        else if (s[1]<0x08) {
514            o[0] = 0xC0 | (s[1]<<2) | (s[0]>>6);
515            o[1] = 0x80 | (s[0]&0x3F);
516            return 2;
517        }
518        else {
519            o[0] = 0xE0 | (s[1]>>4);
520            o[1] = 0x80 | ((s[1]&0x0F)<<2) | (s[0]>>6);
521            o[2] = 0x80 | (s[0]&0x3F);
522            return 3;
523        }
524    }
525    else {
526        o[0] = 0xF0 | (s[2]>>2);
527        o[1] = 0x80 | ((s[2]&0x03)<<4) | (s[1]>>4);
528        o[2] = 0x80 | ((s[1]&0x0F)<<2) | (s[0]>>6);
529        o[3] = 0x80 | (s[0]&0x3F);
530        return 4;
531    }
532}
533
534static ssize_t
535fun_so_to_utf_32le(void *statep, const unsigned char *s, size_t l, unsigned char *o, size_t osize)
536{
537    o[3] = 0;
538    if (!(s[0]&0x80)) {
539        o[2] = o[1] = 0x00;
540        o[0] = s[0];
541    }
542    else if ((s[0]&0xE0)==0xC0) {
543        o[2] = 0x00;
544        o[1] = (s[0]>>2)&0x07;
545        o[0] = ((s[0]&0x03)<<6) | (s[1]&0x3F);
546    }
547    else if ((s[0]&0xF0)==0xE0) {
548        o[2] = 0x00;
549        o[1] = (s[0]<<4) | ((s[1]>>2)^0x20);
550        o[0] = (s[1]<<6) | (s[2]^0x80);
551    }
552    else {
553        o[2] = ((s[0]&0x07)<<2) | ((s[1]>>4)&0x03);
554        o[1] = ((s[1]&0x0F)<<4) | ((s[2]>>2)&0x0F);
555        o[0] = ((s[2]&0x03)<<6) | (s[3]&0x3F);
556    }
557    return 4;
558}
559
560static int
561state_init(void *statep)
562{
563    unsigned char *sp = statep;
564    *sp = 0;
565    return 0;
566}
567
568static VALUE
569fun_si_from_utf_16(void *statep, const unsigned char *s, size_t l)
570{
571    #define BE 1
572    #define LE 2
573    unsigned char *sp = statep;
574    switch (*sp) {
575    case 0:
576        if (s[0] == 0xFE && s[1] == 0xFF) {
577            *sp = BE;
578            return ZERObt;
579        }
580        else if (s[0] == 0xFF && s[1] == 0xFE) {
581            *sp = LE;
582            return ZERObt;
583        }
584        break;
585    case BE:
586        if (s[0] < 0xD8 || 0xDF < s[0]) {
587            return (VALUE)FUNso;
588        }
589        else if (s[0] <= 0xDB) {
590            return (VALUE)from_UTF_16BE_D8toDB_00toFF;
591        }
592        break;
593    case LE:
594        if (s[1] < 0xD8 || 0xDF < s[1]) {
595            return (VALUE)FUNso;
596        }
597        else if (s[1] <= 0xDB) {
598            return (VALUE)from_UTF_16LE_00toFF_D8toDB;
599        }
600        break;
601    }
602    return (VALUE)INVALID;
603}
604
605static ssize_t
606fun_so_from_utf_16(void *statep, const unsigned char *s, size_t l, unsigned char *o, size_t osize)
607{
608    unsigned char *sp = statep;
609    switch (*sp) {
610    case BE:
611        return fun_so_from_utf_16be(statep, s, l, o, osize);
612    case LE:
613        return fun_so_from_utf_16le(statep, s, l, o, osize);
614    }
615    return 0;
616}
617
618static VALUE
619fun_si_from_utf_32(void *statep, const unsigned char *s, size_t l)
620{
621    unsigned char *sp = statep;
622    switch (*sp) {
623    case 0:
624        if (s[0] == 0 && s[1] == 0 && s[2] == 0xFE && s[3] == 0xFF) {
625            *sp = BE;
626            return ZERObt;
627        }
628        else if (s[0] == 0xFF && s[1] == 0xFE && s[2] == 0 && s[3] == 0) {
629            *sp = LE;
630            return ZERObt;
631        }
632        break;
633    case BE:
634        if (s[0] == 0 && ((0 < s[1] && s[1] <= 0x10) ||
635              (s[1] == 0 && (s[2] < 0xD8 || 0xDF < s[2]))))
636            return (VALUE)FUNso;
637        break;
638    case LE:
639        if (s[3] == 0 && ((0 < s[2] && s[2] <= 0x10) ||
640              (s[2] == 0 && (s[1] < 0xD8 || 0xDF < s[1]))))
641            return (VALUE)FUNso;
642        break;
643    }
644    return (VALUE)INVALID;
645}
646
647static ssize_t
648fun_so_from_utf_32(void *statep, const unsigned char *s, size_t l, unsigned char *o, size_t osize)
649{
650    unsigned char *sp = statep;
651    switch (*sp) {
652    case BE:
653        return fun_so_from_utf_32be(statep, s, l, o, osize);
654    case LE:
655        return fun_so_from_utf_32le(statep, s, l, o, osize);
656    }
657    return 0;
658}
659
660static ssize_t
661fun_so_to_utf_16(void *statep, const unsigned char *s, size_t l, unsigned char *o, size_t osize)
662{
663    unsigned char *sp = statep;
664    if (*sp == 0) {
665        *o++ = 0xFE;
666        *o++ = 0xFF;
667        *sp = 1;
668        return 2 + fun_so_to_utf_16be(statep, s, l, o, osize);
669    }
670    return fun_so_to_utf_16be(statep, s, l, o, osize);
671}
672
673static ssize_t
674fun_so_to_utf_32(void *statep, const unsigned char *s, size_t l, unsigned char *o, size_t osize)
675{
676    unsigned char *sp = statep;
677    if (*sp == 0) {
678        *o++ = 0x00;
679        *o++ = 0x00;
680        *o++ = 0xFE;
681        *o++ = 0xFF;
682        *sp = 1;
683        return 4 + fun_so_to_utf_32be(statep, s, l, o, osize);
684    }
685    return fun_so_to_utf_32be(statep, s, l, o, osize);
686}
687
688static const rb_transcoder
689rb_from_UTF_16BE = {
690    "UTF-16BE", "UTF-8", from_UTF_16BE,
691    TRANSCODE_TABLE_INFO,
692    2, /* input_unit_length */
693    4, /* max_input */
694    4, /* max_output */
695    asciicompat_decoder, /* asciicompat_type */
696    0, NULL, NULL, /* state_size, state_init, state_fini */
697    NULL, NULL, NULL, fun_so_from_utf_16be
698};
699
700static const rb_transcoder
701rb_to_UTF_16BE = {
702    "UTF-8", "UTF-16BE", from_UTF_8,
703    TRANSCODE_TABLE_INFO,
704    1, /* input_unit_length */
705    4, /* max_input */
706    4, /* max_output */
707    asciicompat_encoder, /* asciicompat_type */
708    0, NULL, NULL, /* state_size, state_init, state_fini */
709    NULL, NULL, NULL, fun_so_to_utf_16be
710};
711
712static const rb_transcoder
713rb_from_UTF_16LE = {
714    "UTF-16LE", "UTF-8", from_UTF_16LE,
715    TRANSCODE_TABLE_INFO,
716    2, /* input_unit_length */
717    4, /* max_input */
718    4, /* max_output */
719    asciicompat_decoder, /* asciicompat_type */
720    0, NULL, NULL, /* state_size, state_init, state_fini */
721    NULL, NULL, NULL, fun_so_from_utf_16le
722};
723
724static const rb_transcoder
725rb_to_UTF_16LE = {
726    "UTF-8", "UTF-16LE", from_UTF_8,
727    TRANSCODE_TABLE_INFO,
728    1, /* input_unit_length */
729    4, /* max_input */
730    4, /* max_output */
731    asciicompat_encoder, /* asciicompat_type */
732    0, NULL, NULL, /* state_size, state_init, state_fini */
733    NULL, NULL, NULL, fun_so_to_utf_16le
734};
735
736static const rb_transcoder
737rb_from_UTF_32BE = {
738    "UTF-32BE", "UTF-8", from_UTF_32BE,
739    TRANSCODE_TABLE_INFO,
740    4, /* input_unit_length */
741    4, /* max_input */
742    4, /* max_output */
743    asciicompat_decoder, /* asciicompat_type */
744    0, NULL, NULL, /* state_size, state_init, state_fini */
745    NULL, NULL, NULL, fun_so_from_utf_32be
746};
747
748static const rb_transcoder
749rb_to_UTF_32BE = {
750    "UTF-8", "UTF-32BE", from_UTF_8,
751    TRANSCODE_TABLE_INFO,
752    1, /* input_unit_length */
753    4, /* max_input */
754    4, /* max_output */
755    asciicompat_encoder, /* asciicompat_type */
756    0, NULL, NULL, /* state_size, state_init, state_fini */
757    NULL, NULL, NULL, fun_so_to_utf_32be
758};
759
760static const rb_transcoder
761rb_from_UTF_32LE = {
762    "UTF-32LE", "UTF-8", from_UTF_32LE,
763    TRANSCODE_TABLE_INFO,
764    4, /* input_unit_length */
765    4, /* max_input */
766    4, /* max_output */
767    asciicompat_decoder, /* asciicompat_type */
768    0, NULL, NULL, /* state_size, state_init, state_fini */
769    NULL, NULL, NULL, fun_so_from_utf_32le
770};
771
772static const rb_transcoder
773rb_to_UTF_32LE = {
774    "UTF-8", "UTF-32LE", from_UTF_8,
775    TRANSCODE_TABLE_INFO,
776    1, /* input_unit_length */
777    4, /* max_input */
778    4, /* max_output */
779    asciicompat_encoder, /* asciicompat_type */
780    0, NULL, NULL, /* state_size, state_init, state_fini */
781    NULL, NULL, NULL, fun_so_to_utf_32le
782};
783
784static const rb_transcoder
785rb_from_UTF_16 = {
786    "UTF-16", "UTF-8", from_UTF_16,
787    TRANSCODE_TABLE_INFO,
788    2, /* input_unit_length */
789    4, /* max_input */
790    4, /* max_output */
791    asciicompat_decoder, /* asciicompat_type */
792    1, state_init, NULL, /* state_size, state_init, state_fini */
793    NULL, fun_si_from_utf_16, NULL, fun_so_from_utf_16
794};
795
796static const rb_transcoder
797rb_from_UTF_32 = {
798    "UTF-32", "UTF-8", from_UTF_32,
799    TRANSCODE_TABLE_INFO,
800    4, /* input_unit_length */
801    4, /* max_input */
802    4, /* max_output */
803    asciicompat_decoder, /* asciicompat_type */
804    1, state_init, NULL, /* state_size, state_init, state_fini */
805    NULL, fun_si_from_utf_32, NULL, fun_so_from_utf_32
806};
807
808static const rb_transcoder
809rb_to_UTF_16 = {
810    "UTF-8", "UTF-16", from_UTF_8,
811    TRANSCODE_TABLE_INFO,
812    1, /* input_unit_length */
813    4, /* max_input */
814    4, /* max_output */
815    asciicompat_encoder, /* asciicompat_type */
816    1, state_init, NULL, /* state_size, state_init, state_fini */
817    NULL, NULL, NULL, fun_so_to_utf_16
818};
819
820static const rb_transcoder
821rb_to_UTF_32 = {
822    "UTF-8", "UTF-32", from_UTF_8,
823    TRANSCODE_TABLE_INFO,
824    1, /* input_unit_length */
825    4, /* max_input */
826    4, /* max_output */
827    asciicompat_encoder, /* asciicompat_type */
828    1, state_init, NULL, /* state_size, state_init, state_fini */
829    NULL, NULL, NULL, fun_so_to_utf_32
830};
831
832TRANS_INIT(utf_16_32)
833{
834    rb_register_transcoder(&rb_from_UTF_16BE);
835    rb_register_transcoder(&rb_to_UTF_16BE);
836    rb_register_transcoder(&rb_from_UTF_16LE);
837    rb_register_transcoder(&rb_to_UTF_16LE);
838    rb_register_transcoder(&rb_from_UTF_32BE);
839    rb_register_transcoder(&rb_to_UTF_32BE);
840    rb_register_transcoder(&rb_from_UTF_32LE);
841    rb_register_transcoder(&rb_to_UTF_32LE);
842    rb_register_transcoder(&rb_from_UTF_16);
843    rb_register_transcoder(&rb_to_UTF_16);
844    rb_register_transcoder(&rb_from_UTF_32);
845    rb_register_transcoder(&rb_to_UTF_32);
846}
847
848