1/* autogenerated. */ 2/* src="transcode-tblgen.rb", len=28460, checksum=51276 */ 3/* src="utf_16_32.trans", len=15308, checksum=28538 */ 4 5#include "transcode_data.h" 6 7 8 9static const unsigned char 10utf_16_32_byte_array[1288] = { 11#define from_UTF_16LE_00toFF_D8toDB_00toFF_offsets 0 12220, 223, 13 1, 1, 1, 1, 14 15#define from_UTF_16LE_00toFF_D8toDB_offsets 6 160, 255, 17 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 18 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 19 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 20 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 21 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 22 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 23 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 24 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 25 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 26 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 27 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 28 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 29 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 30 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 31 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 32 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 33 34#define from_UTF_16LE_00toFF_offsets 264 350, 255, 36 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 37 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 38 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 39 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 40 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 41 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 42 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 43 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 44 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 45 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 46 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 47 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 48 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 49 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 50 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 51 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 52 53#define from_UTF_32LE_00toFF_00toD7_00_offsets 522 540, 0, 55 0, 56 57#define from_UTF_32LE_00toFF_00toD7_offsets 525 580, 16, 59 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 60 0, 61 62#define from_UTF_32LE_00toFF_D8toDF_offsets 544 631, 16, 64 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 65 66#define from_UTF_32LE_00toFF_offsets 562 670, 255, 68 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 69 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 70 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 71 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 72 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 73 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 74 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 75 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 76 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 77 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 78 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 79 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 80 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 81 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 82 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 83 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 84 85#define from_UTF_32BE_00_offsets 820 860, 16, 87 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 88 1, 89 90#define from_UTF_8_C2toDF_offsets 839 91128, 191, 92 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 93 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 94 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 95 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 96 97#define from_UTF_8_E0_offsets 905 98160, 191, 99 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 100 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 101 102#define from_UTF_8_ED_offsets 939 103128, 159, 104 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 105 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 106 107#define from_UTF_8_F0_offsets 973 108144, 191, 109 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 110 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 111 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 112 113#define from_UTF_8_F4_offsets 1023 114128, 143, 115 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 116 117#define from_UTF_8_offsets 1041 1180, 244, 119 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 120 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 121 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 122 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 123 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 124 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 125 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 126 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 127 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 128 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 129 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 130 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 131 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 132 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 133 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 4, 4, 134 6, 7, 7, 7, 8, 135 136}; 137static const unsigned int 138utf_16_32_word_array[106] = { 139#define from_UTF_16LE_00toFF_D8toDB_00toFF_infos WORDINDEX2INFO(0) 140 INVALID, FUNso, 141 142#define from_UTF_16LE_00toFF_D8toDB_00toFF WORDINDEX2INFO(2) 143 from_UTF_16LE_00toFF_D8toDB_00toFF_offsets, 144 from_UTF_16LE_00toFF_D8toDB_00toFF_infos, 145 146#define from_UTF_16LE_00toFF_D8toDB_infos WORDINDEX2INFO(4) 147 from_UTF_16LE_00toFF_D8toDB_00toFF, 148 149#define from_UTF_16LE_00toFF_D8toDB WORDINDEX2INFO(5) 150 from_UTF_16LE_00toFF_D8toDB_offsets, 151 from_UTF_16LE_00toFF_D8toDB_infos, 152 153#define from_UTF_16LE_00toFF_infos WORDINDEX2INFO(7) 154 FUNso, from_UTF_16LE_00toFF_D8toDB, 155 INVALID, 156 157#define from_UTF_16LE_00toFF WORDINDEX2INFO(10) 158 from_UTF_16LE_00toFF_offsets, 159 from_UTF_16LE_00toFF_infos, 160 161#define from_UTF_16LE_infos WORDINDEX2INFO(12) 162 from_UTF_16LE_00toFF, 163 164#define from_UTF_16LE WORDINDEX2INFO(13) 165 from_UTF_16LE_00toFF_D8toDB_offsets, 166 from_UTF_16LE_infos, 167 168#define from_UTF_32LE_00toFF_00toD7_00_infos WORDINDEX2INFO(15) 169 FUNso, INVALID, 170 171#define from_UTF_32LE_00toFF_00toD7_00 WORDINDEX2INFO(17) 172 from_UTF_32LE_00toFF_00toD7_00_offsets, 173 from_UTF_32LE_00toFF_00toD7_00_infos, 174 175#define from_UTF_32LE_00toFF_00toD7_infos WORDINDEX2INFO(19) 176 from_UTF_32LE_00toFF_00toD7_00, INVALID, 177 178#define from_UTF_32LE_00toFF_00toD7 WORDINDEX2INFO(21) 179 from_UTF_32LE_00toFF_00toD7_offsets, 180 from_UTF_32LE_00toFF_00toD7_infos, 181 182#define from_UTF_32LE_00toFF_D8toDF_infos WORDINDEX2INFO(23) 183 INVALID, from_UTF_32LE_00toFF_00toD7_00, 184 185#define from_UTF_32LE_00toFF_D8toDF WORDINDEX2INFO(25) 186 from_UTF_32LE_00toFF_D8toDF_offsets, 187 from_UTF_32LE_00toFF_D8toDF_infos, 188 189#define from_UTF_32LE_00toFF_infos WORDINDEX2INFO(27) 190 from_UTF_32LE_00toFF_00toD7, from_UTF_32LE_00toFF_D8toDF, 191 192#define from_UTF_32LE_00toFF WORDINDEX2INFO(29) 193 from_UTF_32LE_00toFF_offsets, 194 from_UTF_32LE_00toFF_infos, 195 196#define from_UTF_32LE_infos WORDINDEX2INFO(31) 197 from_UTF_32LE_00toFF, 198 199#define from_UTF_32LE WORDINDEX2INFO(32) 200 from_UTF_16LE_00toFF_D8toDB_offsets, 201 from_UTF_32LE_infos, 202 203#define from_UTF_16BE_00toD7_infos WORDINDEX2INFO(34) 204 FUNso, 205 206#define from_UTF_16BE_00toD7 WORDINDEX2INFO(35) 207 from_UTF_16LE_00toFF_D8toDB_offsets, 208 from_UTF_16BE_00toD7_infos, 209 210#define from_UTF_16BE_D8toDB_00toFF_infos WORDINDEX2INFO(37) 211 INVALID, from_UTF_16BE_00toD7, 212 213#define from_UTF_16BE_D8toDB_00toFF WORDINDEX2INFO(39) 214 from_UTF_16LE_00toFF_D8toDB_00toFF_offsets, 215 from_UTF_16BE_D8toDB_00toFF_infos, 216 217#define from_UTF_16BE_D8toDB_infos WORDINDEX2INFO(41) 218 from_UTF_16BE_D8toDB_00toFF, 219 220#define from_UTF_16BE_D8toDB WORDINDEX2INFO(42) 221 from_UTF_16LE_00toFF_D8toDB_offsets, 222 from_UTF_16BE_D8toDB_infos, 223 224#define from_UTF_16BE_infos WORDINDEX2INFO(44) 225 from_UTF_16BE_00toD7, from_UTF_16BE_D8toDB, 226 INVALID, 227 228#define from_UTF_16BE WORDINDEX2INFO(47) 229 from_UTF_16LE_00toFF_offsets, 230 from_UTF_16BE_infos, 231 232#define from_UTF_32BE_00_00_infos WORDINDEX2INFO(49) 233 from_UTF_16BE_00toD7, INVALID, 234 235#define from_UTF_32BE_00_00 WORDINDEX2INFO(51) 236 from_UTF_32LE_00toFF_offsets, 237 from_UTF_32BE_00_00_infos, 238 239#define from_UTF_32BE_00_01to10_infos WORDINDEX2INFO(53) 240 from_UTF_16BE_00toD7, 241 242#define from_UTF_32BE_00_01to10 WORDINDEX2INFO(54) 243 from_UTF_16LE_00toFF_D8toDB_offsets, 244 from_UTF_32BE_00_01to10_infos, 245 246#define from_UTF_32BE_00_infos WORDINDEX2INFO(56) 247 from_UTF_32BE_00_00, from_UTF_32BE_00_01to10, 248 INVALID, 249 250#define from_UTF_32BE_00 WORDINDEX2INFO(59) 251 from_UTF_32BE_00_offsets, 252 from_UTF_32BE_00_infos, 253 254#define from_UTF_32BE_infos WORDINDEX2INFO(61) 255 from_UTF_32BE_00, INVALID, 256 257#define from_UTF_32BE WORDINDEX2INFO(63) 258 from_UTF_32LE_00toFF_00toD7_00_offsets, 259 from_UTF_32BE_infos, 260 261#define from_UTF_16_00toFF_infos WORDINDEX2INFO(65) 262 FUNsi, 263 264#define from_UTF_16_00toFF WORDINDEX2INFO(66) 265 from_UTF_16LE_00toFF_D8toDB_offsets, 266 from_UTF_16_00toFF_infos, 267 268#define from_UTF_16_infos WORDINDEX2INFO(68) 269 from_UTF_16_00toFF, 270 271#define from_UTF_16 WORDINDEX2INFO(69) 272 from_UTF_16LE_00toFF_D8toDB_offsets, 273 from_UTF_16_infos, 274 275#define from_UTF_32_00toFF_infos WORDINDEX2INFO(71) 276 from_UTF_16, 277 278#define from_UTF_32_00toFF WORDINDEX2INFO(72) 279 from_UTF_16LE_00toFF_D8toDB_offsets, 280 from_UTF_32_00toFF_infos, 281 282#define from_UTF_32_infos WORDINDEX2INFO(74) 283 from_UTF_32_00toFF, 284 285#define from_UTF_32 WORDINDEX2INFO(75) 286 from_UTF_16LE_00toFF_D8toDB_offsets, 287 from_UTF_32_infos, 288 289#define from_UTF_8_C2toDF WORDINDEX2INFO(77) 290 from_UTF_8_C2toDF_offsets, 291 from_UTF_16LE_00toFF_D8toDB_00toFF_infos, 292 293#define from_UTF_8_E0_infos WORDINDEX2INFO(79) 294 INVALID, from_UTF_8_C2toDF, 295 296#define from_UTF_8_E0 WORDINDEX2INFO(81) 297 from_UTF_8_E0_offsets, 298 from_UTF_8_E0_infos, 299 300#define from_UTF_8_E1toEC WORDINDEX2INFO(83) 301 from_UTF_8_C2toDF_offsets, 302 from_UTF_8_E0_infos, 303 304#define from_UTF_8_ED WORDINDEX2INFO(85) 305 from_UTF_8_ED_offsets, 306 from_UTF_8_E0_infos, 307 308#define from_UTF_8_F0_infos WORDINDEX2INFO(87) 309 INVALID, from_UTF_8_E1toEC, 310 311#define from_UTF_8_F0 WORDINDEX2INFO(89) 312 from_UTF_8_F0_offsets, 313 from_UTF_8_F0_infos, 314 315#define from_UTF_8_F1toF3 WORDINDEX2INFO(91) 316 from_UTF_8_C2toDF_offsets, 317 from_UTF_8_F0_infos, 318 319#define from_UTF_8_F4 WORDINDEX2INFO(93) 320 from_UTF_8_F4_offsets, 321 from_UTF_8_F0_infos, 322 323#define from_UTF_8_infos WORDINDEX2INFO(95) 324 FUNso, INVALID, 325 from_UTF_8_C2toDF, from_UTF_8_E0, 326 from_UTF_8_E1toEC, from_UTF_8_ED, 327 from_UTF_8_F0, from_UTF_8_F1toF3, 328 from_UTF_8_F4, 329 330#define from_UTF_8 WORDINDEX2INFO(104) 331 from_UTF_8_offsets, 332 from_UTF_8_infos, 333 334}; 335#define TRANSCODE_TABLE_INFO utf_16_32_byte_array, 1288, utf_16_32_word_array, 106, ((int)sizeof(unsigned int)) 336 337 338static ssize_t 339fun_so_from_utf_16be(void *statep, const unsigned char *s, size_t l, unsigned char *o, size_t osize) 340{ 341 if (!s[0] && s[1]<0x80) { 342 o[0] = s[1]; 343 return 1; 344 } 345 else if (s[0]<0x08) { 346 o[0] = 0xC0 | (s[0]<<2) | (s[1]>>6); 347 o[1] = 0x80 | (s[1]&0x3F); 348 return 2; 349 } 350 else if ((s[0]&0xF8)!=0xD8) { 351 o[0] = 0xE0 | (s[0]>>4); 352 o[1] = 0x80 | ((s[0]&0x0F)<<2) | (s[1]>>6); 353 o[2] = 0x80 | (s[1]&0x3F); 354 return 3; 355 } 356 else { 357 unsigned int u = (((s[0]&0x03)<<2)|(s[1]>>6)) + 1; 358 o[0] = 0xF0 | (u>>2); 359 o[1] = 0x80 | ((u&0x03)<<4) | ((s[1]>>2)&0x0F); 360 o[2] = 0x80 | ((s[1]&0x03)<<4) | ((s[2]&0x03)<<2) | (s[3]>>6); 361 o[3] = 0x80 | (s[3]&0x3F); 362 return 4; 363 } 364} 365 366static ssize_t 367fun_so_to_utf_16be(void *statep, const unsigned char *s, size_t l, unsigned char *o, size_t osize) 368{ 369 if (!(s[0]&0x80)) { 370 o[0] = 0x00; 371 o[1] = s[0]; 372 return 2; 373 } 374 else if ((s[0]&0xE0)==0xC0) { 375 o[0] = (s[0]>>2)&0x07; 376 o[1] = ((s[0]&0x03)<<6) | (s[1]&0x3F); 377 return 2; 378 } 379 else if ((s[0]&0xF0)==0xE0) { 380 o[0] = (s[0]<<4) | ((s[1]>>2)^0x20); 381 o[1] = (s[1]<<6) | (s[2]^0x80); 382 return 2; 383 } 384 else { 385 int w = (((s[0]&0x07)<<2) | ((s[1]>>4)&0x03)) - 1; 386 o[0] = 0xD8 | (w>>2); 387 o[1] = (w<<6) | ((s[1]&0x0F)<<2) | ((s[2]>>4)-8); 388 o[2] = 0xDC | ((s[2]>>2)&0x03); 389 o[3] = (s[2]<<6) | (s[3]&~0x80); 390 return 4; 391 } 392} 393 394static ssize_t 395fun_so_from_utf_16le(void *statep, const unsigned char *s, size_t l, unsigned char *o, size_t osize) 396{ 397 if (!s[1] && s[0]<0x80) { 398 o[0] = s[0]; 399 return 1; 400 } 401 else if (s[1]<0x08) { 402 o[0] = 0xC0 | (s[1]<<2) | (s[0]>>6); 403 o[1] = 0x80 | (s[0]&0x3F); 404 return 2; 405 } 406 else if ((s[1]&0xF8)!=0xD8) { 407 o[0] = 0xE0 | (s[1]>>4); 408 o[1] = 0x80 | ((s[1]&0x0F)<<2) | (s[0]>>6); 409 o[2] = 0x80 | (s[0]&0x3F); 410 return 3; 411 } 412 else { 413 unsigned int u = (((s[1]&0x03)<<2)|(s[0]>>6)) + 1; 414 o[0] = 0xF0 | u>>2; 415 o[1] = 0x80 | ((u&0x03)<<4) | ((s[0]>>2)&0x0F); 416 o[2] = 0x80 | ((s[0]&0x03)<<4) | ((s[3]&0x03)<<2) | (s[2]>>6); 417 o[3] = 0x80 | (s[2]&0x3F); 418 return 4; 419 } 420} 421 422static ssize_t 423fun_so_to_utf_16le(void *statep, const unsigned char *s, size_t l, unsigned char *o, size_t osize) 424{ 425 if (!(s[0]&0x80)) { 426 o[1] = 0x00; 427 o[0] = s[0]; 428 return 2; 429 } 430 else if ((s[0]&0xE0)==0xC0) { 431 o[1] = (s[0]>>2)&0x07; 432 o[0] = ((s[0]&0x03)<<6) | (s[1]&0x3F); 433 return 2; 434 } 435 else if ((s[0]&0xF0)==0xE0) { 436 o[1] = (s[0]<<4) | ((s[1]>>2)^0x20); 437 o[0] = (s[1]<<6) | (s[2]^0x80); 438 return 2; 439 } 440 else { 441 int w = (((s[0]&0x07)<<2) | ((s[1]>>4)&0x03)) - 1; 442 o[1] = 0xD8 | (w>>2); 443 o[0] = (w<<6) | ((s[1]&0x0F)<<2) | ((s[2]>>4)-8); 444 o[3] = 0xDC | ((s[2]>>2)&0x03); 445 o[2] = (s[2]<<6) | (s[3]&~0x80); 446 return 4; 447 } 448} 449 450static ssize_t 451fun_so_from_utf_32be(void *statep, const unsigned char *s, size_t l, unsigned char *o, size_t osize) 452{ 453 if (!s[1]) { 454 if (s[2]==0 && s[3]<0x80) { 455 o[0] = s[3]; 456 return 1; 457 } 458 else if (s[2]<0x08) { 459 o[0] = 0xC0 | (s[2]<<2) | (s[3]>>6); 460 o[1] = 0x80 | (s[3]&0x3F); 461 return 2; 462 } 463 else { 464 o[0] = 0xE0 | (s[2]>>4); 465 o[1] = 0x80 | ((s[2]&0x0F)<<2) | (s[3]>>6); 466 o[2] = 0x80 | (s[3]&0x3F); 467 return 3; 468 } 469 } 470 else { 471 o[0] = 0xF0 | (s[1]>>2); 472 o[1] = 0x80 | ((s[1]&0x03)<<4) | (s[2]>>4); 473 o[2] = 0x80 | ((s[2]&0x0F)<<2) | (s[3]>>6); 474 o[3] = 0x80 | (s[3]&0x3F); 475 return 4; 476 } 477} 478 479static ssize_t 480fun_so_to_utf_32be(void *statep, const unsigned char *s, size_t l, unsigned char *o, size_t osize) 481{ 482 o[0] = 0; 483 if (!(s[0]&0x80)) { 484 o[1] = o[2] = 0x00; 485 o[3] = s[0]; 486 } 487 else if ((s[0]&0xE0)==0xC0) { 488 o[1] = 0x00; 489 o[2] = (s[0]>>2)&0x07; 490 o[3] = ((s[0]&0x03)<<6) | (s[1]&0x3F); 491 } 492 else if ((s[0]&0xF0)==0xE0) { 493 o[1] = 0x00; 494 o[2] = (s[0]<<4) | ((s[1]>>2)^0x20); 495 o[3] = (s[1]<<6) | (s[2]^0x80); 496 } 497 else { 498 o[1] = ((s[0]&0x07)<<2) | ((s[1]>>4)&0x03); 499 o[2] = ((s[1]&0x0F)<<4) | ((s[2]>>2)&0x0F); 500 o[3] = ((s[2]&0x03)<<6) | (s[3]&0x3F); 501 } 502 return 4; 503} 504 505static ssize_t 506fun_so_from_utf_32le(void *statep, const unsigned char *s, size_t l, unsigned char *o, size_t osize) 507{ 508 if (!s[2]) { 509 if (s[1]==0 && s[0]<0x80) { 510 o[0] = s[0]; 511 return 1; 512 } 513 else if (s[1]<0x08) { 514 o[0] = 0xC0 | (s[1]<<2) | (s[0]>>6); 515 o[1] = 0x80 | (s[0]&0x3F); 516 return 2; 517 } 518 else { 519 o[0] = 0xE0 | (s[1]>>4); 520 o[1] = 0x80 | ((s[1]&0x0F)<<2) | (s[0]>>6); 521 o[2] = 0x80 | (s[0]&0x3F); 522 return 3; 523 } 524 } 525 else { 526 o[0] = 0xF0 | (s[2]>>2); 527 o[1] = 0x80 | ((s[2]&0x03)<<4) | (s[1]>>4); 528 o[2] = 0x80 | ((s[1]&0x0F)<<2) | (s[0]>>6); 529 o[3] = 0x80 | (s[0]&0x3F); 530 return 4; 531 } 532} 533 534static ssize_t 535fun_so_to_utf_32le(void *statep, const unsigned char *s, size_t l, unsigned char *o, size_t osize) 536{ 537 o[3] = 0; 538 if (!(s[0]&0x80)) { 539 o[2] = o[1] = 0x00; 540 o[0] = s[0]; 541 } 542 else if ((s[0]&0xE0)==0xC0) { 543 o[2] = 0x00; 544 o[1] = (s[0]>>2)&0x07; 545 o[0] = ((s[0]&0x03)<<6) | (s[1]&0x3F); 546 } 547 else if ((s[0]&0xF0)==0xE0) { 548 o[2] = 0x00; 549 o[1] = (s[0]<<4) | ((s[1]>>2)^0x20); 550 o[0] = (s[1]<<6) | (s[2]^0x80); 551 } 552 else { 553 o[2] = ((s[0]&0x07)<<2) | ((s[1]>>4)&0x03); 554 o[1] = ((s[1]&0x0F)<<4) | ((s[2]>>2)&0x0F); 555 o[0] = ((s[2]&0x03)<<6) | (s[3]&0x3F); 556 } 557 return 4; 558} 559 560static int 561state_init(void *statep) 562{ 563 unsigned char *sp = statep; 564 *sp = 0; 565 return 0; 566} 567 568static VALUE 569fun_si_from_utf_16(void *statep, const unsigned char *s, size_t l) 570{ 571 #define BE 1 572 #define LE 2 573 unsigned char *sp = statep; 574 switch (*sp) { 575 case 0: 576 if (s[0] == 0xFE && s[1] == 0xFF) { 577 *sp = BE; 578 return ZERObt; 579 } 580 else if (s[0] == 0xFF && s[1] == 0xFE) { 581 *sp = LE; 582 return ZERObt; 583 } 584 break; 585 case BE: 586 if (s[0] < 0xD8 || 0xDF < s[0]) { 587 return (VALUE)FUNso; 588 } 589 else if (s[0] <= 0xDB) { 590 return (VALUE)from_UTF_16BE_D8toDB_00toFF; 591 } 592 break; 593 case LE: 594 if (s[1] < 0xD8 || 0xDF < s[1]) { 595 return (VALUE)FUNso; 596 } 597 else if (s[1] <= 0xDB) { 598 return (VALUE)from_UTF_16LE_00toFF_D8toDB; 599 } 600 break; 601 } 602 return (VALUE)INVALID; 603} 604 605static ssize_t 606fun_so_from_utf_16(void *statep, const unsigned char *s, size_t l, unsigned char *o, size_t osize) 607{ 608 unsigned char *sp = statep; 609 switch (*sp) { 610 case BE: 611 return fun_so_from_utf_16be(statep, s, l, o, osize); 612 case LE: 613 return fun_so_from_utf_16le(statep, s, l, o, osize); 614 } 615 return 0; 616} 617 618static VALUE 619fun_si_from_utf_32(void *statep, const unsigned char *s, size_t l) 620{ 621 unsigned char *sp = statep; 622 switch (*sp) { 623 case 0: 624 if (s[0] == 0 && s[1] == 0 && s[2] == 0xFE && s[3] == 0xFF) { 625 *sp = BE; 626 return ZERObt; 627 } 628 else if (s[0] == 0xFF && s[1] == 0xFE && s[2] == 0 && s[3] == 0) { 629 *sp = LE; 630 return ZERObt; 631 } 632 break; 633 case BE: 634 if (s[0] == 0 && ((0 < s[1] && s[1] <= 0x10) || 635 (s[1] == 0 && (s[2] < 0xD8 || 0xDF < s[2])))) 636 return (VALUE)FUNso; 637 break; 638 case LE: 639 if (s[3] == 0 && ((0 < s[2] && s[2] <= 0x10) || 640 (s[2] == 0 && (s[1] < 0xD8 || 0xDF < s[1])))) 641 return (VALUE)FUNso; 642 break; 643 } 644 return (VALUE)INVALID; 645} 646 647static ssize_t 648fun_so_from_utf_32(void *statep, const unsigned char *s, size_t l, unsigned char *o, size_t osize) 649{ 650 unsigned char *sp = statep; 651 switch (*sp) { 652 case BE: 653 return fun_so_from_utf_32be(statep, s, l, o, osize); 654 case LE: 655 return fun_so_from_utf_32le(statep, s, l, o, osize); 656 } 657 return 0; 658} 659 660static ssize_t 661fun_so_to_utf_16(void *statep, const unsigned char *s, size_t l, unsigned char *o, size_t osize) 662{ 663 unsigned char *sp = statep; 664 if (*sp == 0) { 665 *o++ = 0xFE; 666 *o++ = 0xFF; 667 *sp = 1; 668 return 2 + fun_so_to_utf_16be(statep, s, l, o, osize); 669 } 670 return fun_so_to_utf_16be(statep, s, l, o, osize); 671} 672 673static ssize_t 674fun_so_to_utf_32(void *statep, const unsigned char *s, size_t l, unsigned char *o, size_t osize) 675{ 676 unsigned char *sp = statep; 677 if (*sp == 0) { 678 *o++ = 0x00; 679 *o++ = 0x00; 680 *o++ = 0xFE; 681 *o++ = 0xFF; 682 *sp = 1; 683 return 4 + fun_so_to_utf_32be(statep, s, l, o, osize); 684 } 685 return fun_so_to_utf_32be(statep, s, l, o, osize); 686} 687 688static const rb_transcoder 689rb_from_UTF_16BE = { 690 "UTF-16BE", "UTF-8", from_UTF_16BE, 691 TRANSCODE_TABLE_INFO, 692 2, /* input_unit_length */ 693 4, /* max_input */ 694 4, /* max_output */ 695 asciicompat_decoder, /* asciicompat_type */ 696 0, NULL, NULL, /* state_size, state_init, state_fini */ 697 NULL, NULL, NULL, fun_so_from_utf_16be 698}; 699 700static const rb_transcoder 701rb_to_UTF_16BE = { 702 "UTF-8", "UTF-16BE", from_UTF_8, 703 TRANSCODE_TABLE_INFO, 704 1, /* input_unit_length */ 705 4, /* max_input */ 706 4, /* max_output */ 707 asciicompat_encoder, /* asciicompat_type */ 708 0, NULL, NULL, /* state_size, state_init, state_fini */ 709 NULL, NULL, NULL, fun_so_to_utf_16be 710}; 711 712static const rb_transcoder 713rb_from_UTF_16LE = { 714 "UTF-16LE", "UTF-8", from_UTF_16LE, 715 TRANSCODE_TABLE_INFO, 716 2, /* input_unit_length */ 717 4, /* max_input */ 718 4, /* max_output */ 719 asciicompat_decoder, /* asciicompat_type */ 720 0, NULL, NULL, /* state_size, state_init, state_fini */ 721 NULL, NULL, NULL, fun_so_from_utf_16le 722}; 723 724static const rb_transcoder 725rb_to_UTF_16LE = { 726 "UTF-8", "UTF-16LE", from_UTF_8, 727 TRANSCODE_TABLE_INFO, 728 1, /* input_unit_length */ 729 4, /* max_input */ 730 4, /* max_output */ 731 asciicompat_encoder, /* asciicompat_type */ 732 0, NULL, NULL, /* state_size, state_init, state_fini */ 733 NULL, NULL, NULL, fun_so_to_utf_16le 734}; 735 736static const rb_transcoder 737rb_from_UTF_32BE = { 738 "UTF-32BE", "UTF-8", from_UTF_32BE, 739 TRANSCODE_TABLE_INFO, 740 4, /* input_unit_length */ 741 4, /* max_input */ 742 4, /* max_output */ 743 asciicompat_decoder, /* asciicompat_type */ 744 0, NULL, NULL, /* state_size, state_init, state_fini */ 745 NULL, NULL, NULL, fun_so_from_utf_32be 746}; 747 748static const rb_transcoder 749rb_to_UTF_32BE = { 750 "UTF-8", "UTF-32BE", from_UTF_8, 751 TRANSCODE_TABLE_INFO, 752 1, /* input_unit_length */ 753 4, /* max_input */ 754 4, /* max_output */ 755 asciicompat_encoder, /* asciicompat_type */ 756 0, NULL, NULL, /* state_size, state_init, state_fini */ 757 NULL, NULL, NULL, fun_so_to_utf_32be 758}; 759 760static const rb_transcoder 761rb_from_UTF_32LE = { 762 "UTF-32LE", "UTF-8", from_UTF_32LE, 763 TRANSCODE_TABLE_INFO, 764 4, /* input_unit_length */ 765 4, /* max_input */ 766 4, /* max_output */ 767 asciicompat_decoder, /* asciicompat_type */ 768 0, NULL, NULL, /* state_size, state_init, state_fini */ 769 NULL, NULL, NULL, fun_so_from_utf_32le 770}; 771 772static const rb_transcoder 773rb_to_UTF_32LE = { 774 "UTF-8", "UTF-32LE", from_UTF_8, 775 TRANSCODE_TABLE_INFO, 776 1, /* input_unit_length */ 777 4, /* max_input */ 778 4, /* max_output */ 779 asciicompat_encoder, /* asciicompat_type */ 780 0, NULL, NULL, /* state_size, state_init, state_fini */ 781 NULL, NULL, NULL, fun_so_to_utf_32le 782}; 783 784static const rb_transcoder 785rb_from_UTF_16 = { 786 "UTF-16", "UTF-8", from_UTF_16, 787 TRANSCODE_TABLE_INFO, 788 2, /* input_unit_length */ 789 4, /* max_input */ 790 4, /* max_output */ 791 asciicompat_decoder, /* asciicompat_type */ 792 1, state_init, NULL, /* state_size, state_init, state_fini */ 793 NULL, fun_si_from_utf_16, NULL, fun_so_from_utf_16 794}; 795 796static const rb_transcoder 797rb_from_UTF_32 = { 798 "UTF-32", "UTF-8", from_UTF_32, 799 TRANSCODE_TABLE_INFO, 800 4, /* input_unit_length */ 801 4, /* max_input */ 802 4, /* max_output */ 803 asciicompat_decoder, /* asciicompat_type */ 804 1, state_init, NULL, /* state_size, state_init, state_fini */ 805 NULL, fun_si_from_utf_32, NULL, fun_so_from_utf_32 806}; 807 808static const rb_transcoder 809rb_to_UTF_16 = { 810 "UTF-8", "UTF-16", from_UTF_8, 811 TRANSCODE_TABLE_INFO, 812 1, /* input_unit_length */ 813 4, /* max_input */ 814 4, /* max_output */ 815 asciicompat_encoder, /* asciicompat_type */ 816 1, state_init, NULL, /* state_size, state_init, state_fini */ 817 NULL, NULL, NULL, fun_so_to_utf_16 818}; 819 820static const rb_transcoder 821rb_to_UTF_32 = { 822 "UTF-8", "UTF-32", from_UTF_8, 823 TRANSCODE_TABLE_INFO, 824 1, /* input_unit_length */ 825 4, /* max_input */ 826 4, /* max_output */ 827 asciicompat_encoder, /* asciicompat_type */ 828 1, state_init, NULL, /* state_size, state_init, state_fini */ 829 NULL, NULL, NULL, fun_so_to_utf_32 830}; 831 832TRANS_INIT(utf_16_32) 833{ 834 rb_register_transcoder(&rb_from_UTF_16BE); 835 rb_register_transcoder(&rb_to_UTF_16BE); 836 rb_register_transcoder(&rb_from_UTF_16LE); 837 rb_register_transcoder(&rb_to_UTF_16LE); 838 rb_register_transcoder(&rb_from_UTF_32BE); 839 rb_register_transcoder(&rb_to_UTF_32BE); 840 rb_register_transcoder(&rb_from_UTF_32LE); 841 rb_register_transcoder(&rb_to_UTF_32LE); 842 rb_register_transcoder(&rb_from_UTF_16); 843 rb_register_transcoder(&rb_to_UTF_16); 844 rb_register_transcoder(&rb_from_UTF_32); 845 rb_register_transcoder(&rb_to_UTF_32); 846} 847 848