1/* 2 * Copyright (C) 1999-2001, 2008 Free Software Foundation, Inc. 3 * This file is part of the GNU LIBICONV Library. 4 * 5 * The GNU LIBICONV Library is free software; you can redistribute it 6 * and/or modify it under the terms of the GNU Library General Public 7 * License as published by the Free Software Foundation; either version 2 8 * of the License, or (at your option) any later version. 9 * 10 * The GNU LIBICONV Library is distributed in the hope that it will be 11 * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 * Library General Public License for more details. 14 * 15 * You should have received a copy of the GNU Library General Public 16 * License along with the GNU LIBICONV Library; see the file COPYING.LIB. 17 * If not, write to the Free Software Foundation, Inc., 51 Franklin Street, 18 * Fifth Floor, Boston, MA 02110-1301, USA. 19 */ 20 21/* 22 * ISO-2022-CN-EXT 23 */ 24 25/* Specification: RFC 1922 */ 26 27#define ESC 0x1b 28#define SO 0x0e 29#define SI 0x0f 30 31/* 32 * The state is composed of one of the following values 33 */ 34#define STATE_ASCII 0 35#define STATE_TWOBYTE 1 36/* 37 * and one of the following values, << 8 38 */ 39#define STATE2_NONE 0 40#define STATE2_DESIGNATED_GB2312 1 41#define STATE2_DESIGNATED_CNS11643_1 2 42#define STATE2_DESIGNATED_ISO_IR_165 3 43/* 44 * and one of the following values, << 16 45 */ 46#define STATE3_NONE 0 47#define STATE3_DESIGNATED_CNS11643_2 1 48/* 49 * and one of the following values, << 24 50 */ 51#define STATE4_NONE 0 52#define STATE4_DESIGNATED_CNS11643_3 1 53#define STATE4_DESIGNATED_CNS11643_4 2 54#define STATE4_DESIGNATED_CNS11643_5 3 55#define STATE4_DESIGNATED_CNS11643_6 4 56#define STATE4_DESIGNATED_CNS11643_7 5 57 58#define SPLIT_STATE \ 59 unsigned int state1 = state & 0xff, state2 = (state >> 8) & 0xff, state3 = (state >> 16) & 0xff, state4 = state >> 24 60#define COMBINE_STATE \ 61 state = (state4 << 24) | (state3 << 16) | (state2 << 8) | state1 62 63static int 64iso2022_cn_ext_mbtowc (conv_t conv, ucs4_t *pwc, const unsigned char *s, int n) 65{ 66 state_t state = conv->istate; 67 SPLIT_STATE; 68 int count = 0; 69 unsigned char c; 70 for (;;) { 71 c = *s; 72 if (c == ESC) { 73 if (n < count+4) 74 goto none; 75 if (s[1] == '$') { 76 if (s[2] == ')') { 77 if (s[3] == 'A') { 78 state2 = STATE2_DESIGNATED_GB2312; 79 s += 4; count += 4; 80 if (n < count+1) 81 goto none; 82 continue; 83 } 84 if (s[3] == 'G') { 85 state2 = STATE2_DESIGNATED_CNS11643_1; 86 s += 4; count += 4; 87 if (n < count+1) 88 goto none; 89 continue; 90 } 91 if (s[3] == 'E') { 92 state2 = STATE2_DESIGNATED_ISO_IR_165; 93 s += 4; count += 4; 94 if (n < count+1) 95 goto none; 96 continue; 97 } 98 } 99 if (s[2] == '*') { 100 if (s[3] == 'H') { 101 state3 = STATE3_DESIGNATED_CNS11643_2; 102 s += 4; count += 4; 103 if (n < count+1) 104 goto none; 105 continue; 106 } 107 } 108 if (s[2] == '+') { 109 if (s[3] == 'I') { 110 state4 = STATE4_DESIGNATED_CNS11643_3; 111 s += 4; count += 4; 112 if (n < count+1) 113 goto none; 114 continue; 115 } 116 if (s[3] == 'J') { 117 state4 = STATE4_DESIGNATED_CNS11643_4; 118 s += 4; count += 4; 119 if (n < count+1) 120 goto none; 121 continue; 122 } 123 if (s[3] == 'K') { 124 state4 = STATE4_DESIGNATED_CNS11643_5; 125 s += 4; count += 4; 126 if (n < count+1) 127 goto none; 128 continue; 129 } 130 if (s[3] == 'L') { 131 state4 = STATE4_DESIGNATED_CNS11643_6; 132 s += 4; count += 4; 133 if (n < count+1) 134 goto none; 135 continue; 136 } 137 if (s[3] == 'M') { 138 state4 = STATE4_DESIGNATED_CNS11643_7; 139 s += 4; count += 4; 140 if (n < count+1) 141 goto none; 142 continue; 143 } 144 } 145 } 146 if (s[1] == 'N') { 147 switch (state3) { 148 case STATE3_NONE: 149 goto ilseq; 150 case STATE3_DESIGNATED_CNS11643_2: 151 if (s[2] < 0x80 && s[3] < 0x80) { 152 int ret = cns11643_2_mbtowc(conv,pwc,s+2,2); 153 if (ret == RET_ILSEQ) 154 goto ilseq; 155 if (ret != 2) abort(); 156 COMBINE_STATE; 157 conv->istate = state; 158 return count+4; 159 } else 160 goto ilseq; 161 default: abort(); 162 } 163 } 164 if (s[1] == 'O') { 165 switch (state4) { 166 case STATE4_NONE: 167 goto ilseq; 168 case STATE4_DESIGNATED_CNS11643_3: 169 if (s[2] < 0x80 && s[3] < 0x80) { 170 int ret = cns11643_3_mbtowc(conv,pwc,s+2,2); 171 if (ret == RET_ILSEQ) 172 goto ilseq; 173 if (ret != 2) abort(); 174 COMBINE_STATE; 175 conv->istate = state; 176 return count+4; 177 } else 178 goto ilseq; 179 case STATE4_DESIGNATED_CNS11643_4: 180 if (s[2] < 0x80 && s[3] < 0x80) { 181 int ret = cns11643_4_mbtowc(conv,pwc,s+2,2); 182 if (ret == RET_ILSEQ) 183 goto ilseq; 184 if (ret != 2) abort(); 185 COMBINE_STATE; 186 conv->istate = state; 187 return count+4; 188 } else 189 goto ilseq; 190 case STATE4_DESIGNATED_CNS11643_5: 191 if (s[2] < 0x80 && s[3] < 0x80) { 192 int ret = cns11643_5_mbtowc(conv,pwc,s+2,2); 193 if (ret == RET_ILSEQ) 194 goto ilseq; 195 if (ret != 2) abort(); 196 COMBINE_STATE; 197 conv->istate = state; 198 return count+4; 199 } else 200 goto ilseq; 201 case STATE4_DESIGNATED_CNS11643_6: 202 if (s[2] < 0x80 && s[3] < 0x80) { 203 int ret = cns11643_6_mbtowc(conv,pwc,s+2,2); 204 if (ret == RET_ILSEQ) 205 goto ilseq; 206 if (ret != 2) abort(); 207 COMBINE_STATE; 208 conv->istate = state; 209 return count+4; 210 } else 211 goto ilseq; 212 case STATE4_DESIGNATED_CNS11643_7: 213 if (s[2] < 0x80 && s[3] < 0x80) { 214 int ret = cns11643_7_mbtowc(conv,pwc,s+2,2); 215 if (ret == RET_ILSEQ) 216 goto ilseq; 217 if (ret != 2) abort(); 218 COMBINE_STATE; 219 conv->istate = state; 220 return count+4; 221 } else 222 goto ilseq; 223 default: abort(); 224 } 225 } 226 goto ilseq; 227 } 228 if (c == SO) { 229 if (state2 != STATE2_DESIGNATED_GB2312 && state2 != STATE2_DESIGNATED_CNS11643_1 && state2 != STATE2_DESIGNATED_ISO_IR_165) 230 goto ilseq; 231 state1 = STATE_TWOBYTE; 232 s++; count++; 233 if (n < count+1) 234 goto none; 235 continue; 236 } 237 if (c == SI) { 238 state1 = STATE_ASCII; 239 s++; count++; 240 if (n < count+1) 241 goto none; 242 continue; 243 } 244 break; 245 } 246 switch (state1) { 247 case STATE_ASCII: 248 if (c < 0x80) { 249 int ret = ascii_mbtowc(conv,pwc,s,1); 250 if (ret == RET_ILSEQ) 251 goto ilseq; 252 if (ret != 1) abort(); 253 if (*pwc == 0x000a || *pwc == 0x000d) { 254 state2 = STATE2_NONE; state3 = STATE3_NONE; state4 = STATE3_NONE; 255 } 256 COMBINE_STATE; 257 conv->istate = state; 258 return count+1; 259 } else 260 goto ilseq; 261 case STATE_TWOBYTE: 262 if (n < count+2) 263 goto none; 264 if (s[0] < 0x80 && s[1] < 0x80) { 265 int ret; 266 switch (state2) { 267 case STATE2_NONE: 268 goto ilseq; 269 case STATE2_DESIGNATED_GB2312: 270 ret = gb2312_mbtowc(conv,pwc,s,2); break; 271 case STATE2_DESIGNATED_CNS11643_1: 272 ret = cns11643_1_mbtowc(conv,pwc,s,2); break; 273 case STATE2_DESIGNATED_ISO_IR_165: 274 ret = isoir165_mbtowc(conv,pwc,s,2); break; 275 default: abort(); 276 } 277 if (ret == RET_ILSEQ) 278 goto ilseq; 279 if (ret != 2) abort(); 280 COMBINE_STATE; 281 conv->istate = state; 282 return count+2; 283 } else 284 goto ilseq; 285 default: abort(); 286 } 287 288none: 289 COMBINE_STATE; 290 conv->istate = state; 291 return RET_TOOFEW(count); 292 293ilseq: 294 COMBINE_STATE; 295 conv->istate = state; 296 return RET_SHIFT_ILSEQ(count); 297} 298 299static int 300iso2022_cn_ext_wctomb (conv_t conv, unsigned char *r, ucs4_t wc, int n) 301{ 302 state_t state = conv->ostate; 303 SPLIT_STATE; 304 unsigned char buf[3]; 305 int ret; 306 307 /* There is no need to handle Unicode 3.1 tag characters and to look for 308 "zh-CN" or "zh-TW" tags, because GB2312 and CNS11643 are disjoint. */ 309 310 /* Try ASCII. */ 311 ret = ascii_wctomb(conv,buf,wc,1); 312 if (ret != RET_ILUNI) { 313 if (ret != 1) abort(); 314 if (buf[0] < 0x80) { 315 int count = (state1 == STATE_ASCII ? 1 : 2); 316 if (n < count) 317 return RET_TOOSMALL; 318 if (state1 != STATE_ASCII) { 319 r[0] = SI; 320 r += 1; 321 state1 = STATE_ASCII; 322 } 323 r[0] = buf[0]; 324 if (wc == 0x000a || wc == 0x000d) { 325 state2 = STATE2_NONE; state3 = STATE3_NONE; state4 = STATE3_NONE; 326 } 327 COMBINE_STATE; 328 conv->ostate = state; 329 return count; 330 } 331 } 332 333 /* Try GB 2312-1980. */ 334 ret = gb2312_wctomb(conv,buf,wc,2); 335 if (ret != RET_ILUNI) { 336 if (ret != 2) abort(); 337 if (buf[0] < 0x80 && buf[1] < 0x80) { 338 int count = (state2 == STATE2_DESIGNATED_GB2312 ? 0 : 4) + (state1 == STATE_TWOBYTE ? 0 : 1) + 2; 339 if (n < count) 340 return RET_TOOSMALL; 341 if (state2 != STATE2_DESIGNATED_GB2312) { 342 r[0] = ESC; 343 r[1] = '$'; 344 r[2] = ')'; 345 r[3] = 'A'; 346 r += 4; 347 state2 = STATE2_DESIGNATED_GB2312; 348 } 349 if (state1 != STATE_TWOBYTE) { 350 r[0] = SO; 351 r += 1; 352 state1 = STATE_TWOBYTE; 353 } 354 r[0] = buf[0]; 355 r[1] = buf[1]; 356 COMBINE_STATE; 357 conv->ostate = state; 358 return count; 359 } 360 } 361 362 ret = cns11643_wctomb(conv,buf,wc,3); 363 if (ret != RET_ILUNI) { 364 if (ret != 3) abort(); 365 366 /* Try CNS 11643-1992 Plane 1. */ 367 if (buf[0] == 1 && buf[1] < 0x80 && buf[2] < 0x80) { 368 int count = (state2 == STATE2_DESIGNATED_CNS11643_1 ? 0 : 4) + (state1 == STATE_TWOBYTE ? 0 : 1) + 2; 369 if (n < count) 370 return RET_TOOSMALL; 371 if (state2 != STATE2_DESIGNATED_CNS11643_1) { 372 r[0] = ESC; 373 r[1] = '$'; 374 r[2] = ')'; 375 r[3] = 'G'; 376 r += 4; 377 state2 = STATE2_DESIGNATED_CNS11643_1; 378 } 379 if (state1 != STATE_TWOBYTE) { 380 r[0] = SO; 381 r += 1; 382 state1 = STATE_TWOBYTE; 383 } 384 r[0] = buf[1]; 385 r[1] = buf[2]; 386 COMBINE_STATE; 387 conv->ostate = state; 388 return count; 389 } 390 391 /* Try CNS 11643-1992 Plane 2. */ 392 if (buf[0] == 2 && buf[1] < 0x80 && buf[2] < 0x80) { 393 int count = (state3 == STATE3_DESIGNATED_CNS11643_2 ? 0 : 4) + 4; 394 if (n < count) 395 return RET_TOOSMALL; 396 if (state3 != STATE3_DESIGNATED_CNS11643_2) { 397 r[0] = ESC; 398 r[1] = '$'; 399 r[2] = '*'; 400 r[3] = 'H'; 401 r += 4; 402 state3 = STATE3_DESIGNATED_CNS11643_2; 403 } 404 r[0] = ESC; 405 r[1] = 'N'; 406 r[2] = buf[1]; 407 r[3] = buf[2]; 408 COMBINE_STATE; 409 conv->ostate = state; 410 return count; 411 } 412 413 /* Try CNS 11643-1992 Plane 3. */ 414 if (buf[0] == 3 && buf[1] < 0x80 && buf[2] < 0x80) { 415 int count = (state4 == STATE4_DESIGNATED_CNS11643_3 ? 0 : 4) + 4; 416 if (n < count) 417 return RET_TOOSMALL; 418 if (state4 != STATE4_DESIGNATED_CNS11643_3) { 419 r[0] = ESC; 420 r[1] = '$'; 421 r[2] = '+'; 422 r[3] = 'I'; 423 r += 4; 424 state4 = STATE4_DESIGNATED_CNS11643_3; 425 } 426 r[0] = ESC; 427 r[1] = 'O'; 428 r[2] = buf[1]; 429 r[3] = buf[2]; 430 COMBINE_STATE; 431 conv->ostate = state; 432 return count; 433 } 434 435 /* Try CNS 11643-1992 Plane 4. */ 436 if (buf[0] == 4 && buf[1] < 0x80 && buf[2] < 0x80) { 437 int count = (state4 == STATE4_DESIGNATED_CNS11643_4 ? 0 : 4) + 4; 438 if (n < count) 439 return RET_TOOSMALL; 440 if (state4 != STATE4_DESIGNATED_CNS11643_4) { 441 r[0] = ESC; 442 r[1] = '$'; 443 r[2] = '+'; 444 r[3] = 'J'; 445 r += 4; 446 state4 = STATE4_DESIGNATED_CNS11643_4; 447 } 448 r[0] = ESC; 449 r[1] = 'O'; 450 r[2] = buf[1]; 451 r[3] = buf[2]; 452 COMBINE_STATE; 453 conv->ostate = state; 454 return count; 455 } 456 457 /* Try CNS 11643-1992 Plane 5. */ 458 if (buf[0] == 5 && buf[1] < 0x80 && buf[2] < 0x80) { 459 int count = (state4 == STATE4_DESIGNATED_CNS11643_5 ? 0 : 4) + 4; 460 if (n < count) 461 return RET_TOOSMALL; 462 if (state4 != STATE4_DESIGNATED_CNS11643_5) { 463 r[0] = ESC; 464 r[1] = '$'; 465 r[2] = '+'; 466 r[3] = 'K'; 467 r += 4; 468 state4 = STATE4_DESIGNATED_CNS11643_5; 469 } 470 r[0] = ESC; 471 r[1] = 'O'; 472 r[2] = buf[1]; 473 r[3] = buf[2]; 474 COMBINE_STATE; 475 conv->ostate = state; 476 return count; 477 } 478 479 /* Try CNS 11643-1992 Plane 6. */ 480 if (buf[0] == 6 && buf[1] < 0x80 && buf[2] < 0x80) { 481 int count = (state4 == STATE4_DESIGNATED_CNS11643_6 ? 0 : 4) + 4; 482 if (n < count) 483 return RET_TOOSMALL; 484 if (state4 != STATE4_DESIGNATED_CNS11643_6) { 485 r[0] = ESC; 486 r[1] = '$'; 487 r[2] = '+'; 488 r[3] = 'L'; 489 r += 4; 490 state4 = STATE4_DESIGNATED_CNS11643_6; 491 } 492 r[0] = ESC; 493 r[1] = 'O'; 494 r[2] = buf[1]; 495 r[3] = buf[2]; 496 COMBINE_STATE; 497 conv->ostate = state; 498 return count; 499 } 500 501 /* Try CNS 11643-1992 Plane 7. */ 502 if (buf[0] == 7 && buf[1] < 0x80 && buf[2] < 0x80) { 503 int count = (state4 == STATE4_DESIGNATED_CNS11643_7 ? 0 : 4) + 4; 504 if (n < count) 505 return RET_TOOSMALL; 506 if (state4 != STATE4_DESIGNATED_CNS11643_7) { 507 r[0] = ESC; 508 r[1] = '$'; 509 r[2] = '+'; 510 r[3] = 'M'; 511 r += 4; 512 state4 = STATE4_DESIGNATED_CNS11643_7; 513 } 514 r[0] = ESC; 515 r[1] = 'O'; 516 r[2] = buf[1]; 517 r[3] = buf[2]; 518 COMBINE_STATE; 519 conv->ostate = state; 520 return count; 521 } 522 523 } 524 525 /* Try ISO-IR-165. */ 526 ret = isoir165_wctomb(conv,buf,wc,2); 527 if (ret != RET_ILUNI) { 528 if (ret != 2) abort(); 529 if (buf[0] < 0x80 && buf[1] < 0x80) { 530 int count = (state2 == STATE2_DESIGNATED_ISO_IR_165 ? 0 : 4) + (state1 == STATE_TWOBYTE ? 0 : 1) + 2; 531 if (n < count) 532 return RET_TOOSMALL; 533 if (state2 != STATE2_DESIGNATED_ISO_IR_165) { 534 r[0] = ESC; 535 r[1] = '$'; 536 r[2] = ')'; 537 r[3] = 'E'; 538 r += 4; 539 state2 = STATE2_DESIGNATED_ISO_IR_165; 540 } 541 if (state1 != STATE_TWOBYTE) { 542 r[0] = SO; 543 r += 1; 544 state1 = STATE_TWOBYTE; 545 } 546 r[0] = buf[0]; 547 r[1] = buf[1]; 548 COMBINE_STATE; 549 conv->ostate = state; 550 return count; 551 } 552 } 553 554 return RET_ILUNI; 555} 556 557static int 558iso2022_cn_ext_reset (conv_t conv, unsigned char *r, int n) 559{ 560 state_t state = conv->ostate; 561 SPLIT_STATE; 562 (void)state2; 563 (void)state3; 564 (void)state4; 565 if (state1 != STATE_ASCII) { 566 if (n < 1) 567 return RET_TOOSMALL; 568 r[0] = SI; 569 /* conv->ostate = 0; will be done by the caller */ 570 return 1; 571 } else 572 return 0; 573} 574 575#undef COMBINE_STATE 576#undef SPLIT_STATE 577#undef STATE4_DESIGNATED_CNS11643_7 578#undef STATE4_DESIGNATED_CNS11643_6 579#undef STATE4_DESIGNATED_CNS11643_5 580#undef STATE4_DESIGNATED_CNS11643_4 581#undef STATE4_DESIGNATED_CNS11643_3 582#undef STATE4_NONE 583#undef STATE3_DESIGNATED_CNS11643_2 584#undef STATE3_NONE 585#undef STATE2_DESIGNATED_ISO_IR_165 586#undef STATE2_DESIGNATED_CNS11643_1 587#undef STATE2_DESIGNATED_GB2312 588#undef STATE2_NONE 589#undef STATE_TWOBYTE 590#undef STATE_ASCII 591