1/* 2 * Copyright (C) 1999-2001, 2008 Free Software Foundation, Inc. 3 * This file is part of the GNU LIBICONV Library. 4 * 5 * The GNU LIBICONV Library is free software; you can redistribute it 6 * and/or modify it under the terms of the GNU Library General Public 7 * License as published by the Free Software Foundation; either version 2 8 * of the License, or (at your option) any later version. 9 * 10 * The GNU LIBICONV Library is distributed in the hope that it will be 11 * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 * Library General Public License for more details. 14 * 15 * You should have received a copy of the GNU Library General Public 16 * License along with the GNU LIBICONV Library; see the file COPYING.LIB. 17 * If not, write to the Free Software Foundation, Inc., 51 Franklin Street, 18 * Fifth Floor, Boston, MA 02110-1301, USA. 19 */ 20 21/* 22 * ISO-2022-CN 23 */ 24 25/* Specification: RFC 1922 */ 26 27#define ESC 0x1b 28#define SO 0x0e 29#define SI 0x0f 30 31/* 32 * The state is composed of one of the following values 33 */ 34#define STATE_ASCII 0 35#define STATE_TWOBYTE 1 36/* 37 * and one of the following values, << 8 38 */ 39#define STATE2_NONE 0 40#define STATE2_DESIGNATED_GB2312 1 41#define STATE2_DESIGNATED_CNS11643_1 2 42/* 43 * and one of the following values, << 16 44 */ 45#define STATE3_NONE 0 46#define STATE3_DESIGNATED_CNS11643_2 1 47 48#define SPLIT_STATE \ 49 unsigned int state1 = state & 0xff, state2 = (state >> 8) & 0xff, state3 = state >> 16 50#define COMBINE_STATE \ 51 state = (state3 << 16) | (state2 << 8) | state1 52 53static int 54iso2022_cn_mbtowc (conv_t conv, ucs4_t *pwc, const unsigned char *s, int n) 55{ 56 state_t state = conv->istate; 57 SPLIT_STATE; 58 int count = 0; 59 unsigned char c; 60 for (;;) { 61 c = *s; 62 if (c == ESC) { 63 if (n < count+4) 64 goto none; 65 if (s[1] == '$') { 66 if (s[2] == ')') { 67 if (s[3] == 'A') { 68 state2 = STATE2_DESIGNATED_GB2312; 69 s += 4; count += 4; 70 if (n < count+1) 71 goto none; 72 continue; 73 } 74 if (s[3] == 'G') { 75 state2 = STATE2_DESIGNATED_CNS11643_1; 76 s += 4; count += 4; 77 if (n < count+1) 78 goto none; 79 continue; 80 } 81 } 82 if (s[2] == '*') { 83 if (s[3] == 'H') { 84 state3 = STATE3_DESIGNATED_CNS11643_2; 85 s += 4; count += 4; 86 if (n < count+1) 87 goto none; 88 continue; 89 } 90 } 91 } 92 if (s[1] == 'N') { 93 switch (state3) { 94 case STATE3_NONE: 95 goto ilseq; 96 case STATE3_DESIGNATED_CNS11643_2: 97 if (s[2] < 0x80 && s[3] < 0x80) { 98 int ret = cns11643_2_mbtowc(conv,pwc,s+2,2); 99 if (ret == RET_ILSEQ) 100 goto ilseq; 101 if (ret != 2) abort(); 102 COMBINE_STATE; 103 conv->istate = state; 104 return count+4; 105 } else 106 goto ilseq; 107 default: abort(); 108 } 109 } 110 goto ilseq; 111 } 112 if (c == SO) { 113 if (state2 != STATE2_DESIGNATED_GB2312 && state2 != STATE2_DESIGNATED_CNS11643_1) 114 goto ilseq; 115 state1 = STATE_TWOBYTE; 116 s++; count++; 117 if (n < count+1) 118 goto none; 119 continue; 120 } 121 if (c == SI) { 122 state1 = STATE_ASCII; 123 s++; count++; 124 if (n < count+1) 125 goto none; 126 continue; 127 } 128 break; 129 } 130 switch (state1) { 131 case STATE_ASCII: 132 if (c < 0x80) { 133 int ret = ascii_mbtowc(conv,pwc,s,1); 134 if (ret == RET_ILSEQ) 135 goto ilseq; 136 if (ret != 1) abort(); 137 if (*pwc == 0x000a || *pwc == 0x000d) { 138 state2 = STATE2_NONE; state3 = STATE3_NONE; 139 } 140 COMBINE_STATE; 141 conv->istate = state; 142 return count+1; 143 } else 144 goto ilseq; 145 case STATE_TWOBYTE: 146 if (n < count+2) 147 goto none; 148 if (s[0] < 0x80 && s[1] < 0x80) { 149 int ret; 150 switch (state2) { 151 case STATE2_NONE: 152 goto ilseq; 153 case STATE2_DESIGNATED_GB2312: 154 ret = gb2312_mbtowc(conv,pwc,s,2); break; 155 case STATE2_DESIGNATED_CNS11643_1: 156 ret = cns11643_1_mbtowc(conv,pwc,s,2); break; 157 default: abort(); 158 } 159 if (ret == RET_ILSEQ) 160 goto ilseq; 161 if (ret != 2) abort(); 162 COMBINE_STATE; 163 conv->istate = state; 164 return count+2; 165 } else 166 goto ilseq; 167 default: abort(); 168 } 169 170none: 171 COMBINE_STATE; 172 conv->istate = state; 173 return RET_TOOFEW(count); 174 175ilseq: 176 COMBINE_STATE; 177 conv->istate = state; 178 return RET_SHIFT_ILSEQ(count); 179} 180 181static int 182iso2022_cn_wctomb (conv_t conv, unsigned char *r, ucs4_t wc, int n) 183{ 184 state_t state = conv->ostate; 185 SPLIT_STATE; 186 unsigned char buf[3]; 187 int ret; 188 189 /* There is no need to handle Unicode 3.1 tag characters and to look for 190 "zh-CN" or "zh-TW" tags, because GB2312 and CNS11643 are disjoint. */ 191 192 /* Try ASCII. */ 193 ret = ascii_wctomb(conv,buf,wc,1); 194 if (ret != RET_ILUNI) { 195 if (ret != 1) abort(); 196 if (buf[0] < 0x80) { 197 int count = (state1 == STATE_ASCII ? 1 : 2); 198 if (n < count) 199 return RET_TOOSMALL; 200 if (state1 != STATE_ASCII) { 201 r[0] = SI; 202 r += 1; 203 state1 = STATE_ASCII; 204 } 205 r[0] = buf[0]; 206 if (wc == 0x000a || wc == 0x000d) { 207 state2 = STATE2_NONE; state3 = STATE3_NONE; 208 } 209 COMBINE_STATE; 210 conv->ostate = state; 211 return count; 212 } 213 } 214 215 /* Try GB 2312-1980. */ 216 ret = gb2312_wctomb(conv,buf,wc,2); 217 if (ret != RET_ILUNI) { 218 if (ret != 2) abort(); 219 if (buf[0] < 0x80 && buf[1] < 0x80) { 220 int count = (state2 == STATE2_DESIGNATED_GB2312 ? 0 : 4) + (state1 == STATE_TWOBYTE ? 0 : 1) + 2; 221 if (n < count) 222 return RET_TOOSMALL; 223 if (state2 != STATE2_DESIGNATED_GB2312) { 224 r[0] = ESC; 225 r[1] = '$'; 226 r[2] = ')'; 227 r[3] = 'A'; 228 r += 4; 229 state2 = STATE2_DESIGNATED_GB2312; 230 } 231 if (state1 != STATE_TWOBYTE) { 232 r[0] = SO; 233 r += 1; 234 state1 = STATE_TWOBYTE; 235 } 236 r[0] = buf[0]; 237 r[1] = buf[1]; 238 COMBINE_STATE; 239 conv->ostate = state; 240 return count; 241 } 242 } 243 244 ret = cns11643_wctomb(conv,buf,wc,3); 245 if (ret != RET_ILUNI) { 246 if (ret != 3) abort(); 247 248 /* Try CNS 11643-1992 Plane 1. */ 249 if (buf[0] == 1 && buf[1] < 0x80 && buf[2] < 0x80) { 250 int count = (state2 == STATE2_DESIGNATED_CNS11643_1 ? 0 : 4) + (state1 == STATE_TWOBYTE ? 0 : 1) + 2; 251 if (n < count) 252 return RET_TOOSMALL; 253 if (state2 != STATE2_DESIGNATED_CNS11643_1) { 254 r[0] = ESC; 255 r[1] = '$'; 256 r[2] = ')'; 257 r[3] = 'G'; 258 r += 4; 259 state2 = STATE2_DESIGNATED_CNS11643_1; 260 } 261 if (state1 != STATE_TWOBYTE) { 262 r[0] = SO; 263 r += 1; 264 state1 = STATE_TWOBYTE; 265 } 266 r[0] = buf[1]; 267 r[1] = buf[2]; 268 COMBINE_STATE; 269 conv->ostate = state; 270 return count; 271 } 272 273 /* Try CNS 11643-1992 Plane 2. */ 274 if (buf[0] == 2 && buf[1] < 0x80 && buf[2] < 0x80) { 275 int count = (state3 == STATE3_DESIGNATED_CNS11643_2 ? 0 : 4) + 4; 276 if (n < count) 277 return RET_TOOSMALL; 278 if (state3 != STATE3_DESIGNATED_CNS11643_2) { 279 r[0] = ESC; 280 r[1] = '$'; 281 r[2] = '*'; 282 r[3] = 'H'; 283 r += 4; 284 state3 = STATE3_DESIGNATED_CNS11643_2; 285 } 286 r[0] = ESC; 287 r[1] = 'N'; 288 r[2] = buf[1]; 289 r[3] = buf[2]; 290 COMBINE_STATE; 291 conv->ostate = state; 292 return count; 293 } 294 } 295 296 return RET_ILUNI; 297} 298 299static int 300iso2022_cn_reset (conv_t conv, unsigned char *r, int n) 301{ 302 state_t state = conv->ostate; 303 SPLIT_STATE; 304 (void)state2; 305 (void)state3; 306 if (state1 != STATE_ASCII) { 307 if (n < 1) 308 return RET_TOOSMALL; 309 r[0] = SI; 310 /* conv->ostate = 0; will be done by the caller */ 311 return 1; 312 } else 313 return 0; 314} 315 316#undef COMBINE_STATE 317#undef SPLIT_STATE 318#undef STATE3_DESIGNATED_CNS11643_2 319#undef STATE3_NONE 320#undef STATE2_DESIGNATED_CNS11643_1 321#undef STATE2_DESIGNATED_GB2312 322#undef STATE2_NONE 323#undef STATE_TWOBYTE 324#undef STATE_ASCII 325