1/* 2 * Copyright (c) 2003, 2006, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26/* 27 */ 28 29package sun.nio.cs.ext; 30 31import java.nio.ByteBuffer; 32import java.nio.CharBuffer; 33import java.nio.charset.Charset; 34import java.nio.charset.CharsetDecoder; 35import java.nio.charset.CharsetEncoder; 36import java.nio.charset.CoderResult; 37import java.nio.charset.CharacterCodingException; 38import sun.nio.cs.DoubleByte; 39import sun.nio.cs.HistoricallyNamedCharset; 40import sun.nio.cs.US_ASCII; 41import sun.nio.cs.*; 42 43public class ISO2022_CN 44 extends Charset 45 implements HistoricallyNamedCharset 46{ 47 private static final byte ISO_ESC = 0x1b; 48 private static final byte ISO_SI = 0x0f; 49 private static final byte ISO_SO = 0x0e; 50 private static final byte ISO_SS2_7 = 0x4e; 51 private static final byte ISO_SS3_7 = 0x4f; 52 private static final byte MSB = (byte)0x80; 53 private static final char REPLACE_CHAR = '\uFFFD'; 54 55 private static final byte SODesigGB = 0; 56 private static final byte SODesigCNS = 1; 57 58 public ISO2022_CN() { 59 super("ISO-2022-CN", ExtendedCharsets.aliasesFor("ISO-2022-CN")); 60 } 61 62 public String historicalName() { 63 return "ISO2022CN"; 64 } 65 66 public boolean contains(Charset cs) { 67 return ((cs instanceof EUC_CN) // GB2312-80 repertoire 68 || (cs instanceof US_ASCII) 69 || (cs instanceof EUC_TW) // CNS11643 repertoire 70 || (cs instanceof ISO2022_CN)); 71 } 72 73 public CharsetDecoder newDecoder() { 74 return new Decoder(this); 75 } 76 77 public CharsetEncoder newEncoder() { 78 throw new UnsupportedOperationException(); 79 } 80 81 public boolean canEncode() { 82 return false; 83 } 84 85 static class Decoder extends CharsetDecoder { 86 private boolean shiftOut; 87 private byte currentSODesig; 88 89 private static final Charset gb2312 = new EUC_CN(); 90 private static final Charset cns = new EUC_TW(); 91 private final DoubleByte.Decoder gb2312Decoder; 92 private final EUC_TW.Decoder cnsDecoder; 93 94 Decoder(Charset cs) { 95 super(cs, 1.0f, 1.0f); 96 shiftOut = false; 97 currentSODesig = SODesigGB; 98 gb2312Decoder = (DoubleByte.Decoder)gb2312.newDecoder(); 99 cnsDecoder = (EUC_TW.Decoder)cns.newDecoder(); 100 } 101 102 protected void implReset() { 103 shiftOut= false; 104 currentSODesig = SODesigGB; 105 } 106 107 private char cnsDecode(byte byte1, byte byte2, byte SS) { 108 byte1 |= MSB; 109 byte2 |= MSB; 110 int p = 0; 111 if (SS == ISO_SS2_7) 112 p = 1; //plane 2, index -- 1 113 else if (SS == ISO_SS3_7) 114 p = 2; //plane 3, index -- 2 115 else 116 return REPLACE_CHAR; //never happen. 117 char[] ret = cnsDecoder.toUnicode(byte1 & 0xff, 118 byte2 & 0xff, 119 p); 120 if (ret == null || ret.length == 2) 121 return REPLACE_CHAR; 122 return ret[0]; 123 } 124 125 private char SODecode(byte byte1, byte byte2, byte SOD) { 126 byte1 |= MSB; 127 byte2 |= MSB; 128 if (SOD == SODesigGB) { 129 return gb2312Decoder.decodeDouble(byte1 & 0xff, 130 byte2 & 0xff); 131 } else { // SOD == SODesigCNS 132 char[] ret = cnsDecoder.toUnicode(byte1 & 0xff, 133 byte2 & 0xff, 134 0); 135 if (ret == null) 136 return REPLACE_CHAR; 137 return ret[0]; 138 } 139 } 140 141 private CoderResult decodeBufferLoop(ByteBuffer src, 142 CharBuffer dst) 143 { 144 int mark = src.position(); 145 byte b1 = 0, b2 = 0, b3 = 0, b4 = 0; 146 int inputSize = 0; 147 char c = REPLACE_CHAR; 148 try { 149 while (src.hasRemaining()) { 150 b1 = src.get(); 151 inputSize = 1; 152 153 while (b1 == ISO_ESC || 154 b1 == ISO_SO || 155 b1 == ISO_SI) { 156 if (b1 == ISO_ESC) { // ESC 157 currentSODesig = SODesigGB; 158 159 if (src.remaining() < 1) 160 return CoderResult.UNDERFLOW; 161 162 b2 = src.get(); 163 inputSize++; 164 165 if ((b2 & (byte)0x80) != 0) 166 return CoderResult.malformedForLength(inputSize); 167 168 if (b2 == (byte)0x24) { 169 if (src.remaining() < 1) 170 return CoderResult.UNDERFLOW; 171 172 b3 = src.get(); 173 inputSize++; 174 175 if ((b3 & (byte)0x80) != 0) 176 return CoderResult.malformedForLength(inputSize); 177 if (b3 == 'A'){ // "$A" 178 currentSODesig = SODesigGB; 179 } else if (b3 == ')') { 180 if (src.remaining() < 1) 181 return CoderResult.UNDERFLOW; 182 b4 = src.get(); 183 inputSize++; 184 if (b4 == 'A'){ // "$)A" 185 currentSODesig = SODesigGB; 186 } else if (b4 == 'G'){ // "$)G" 187 currentSODesig = SODesigCNS; 188 } else { 189 return CoderResult.malformedForLength(inputSize); 190 } 191 } else if (b3 == '*') { 192 if (src.remaining() < 1) 193 return CoderResult.UNDERFLOW; 194 b4 = src.get(); 195 inputSize++; 196 if (b4 != 'H') { // "$*H" 197 //SS2Desig -> CNS-P1 198 return CoderResult.malformedForLength(inputSize); 199 } 200 } else if (b3 == '+') { 201 if (src.remaining() < 1) 202 return CoderResult.UNDERFLOW; 203 b4 = src.get(); 204 inputSize++; 205 if (b4 != 'I'){ // "$+I" 206 //SS3Desig -> CNS-P2. 207 return CoderResult.malformedForLength(inputSize); 208 } 209 } else { 210 return CoderResult.malformedForLength(inputSize); 211 } 212 } else if (b2 == ISO_SS2_7 || b2 == ISO_SS3_7) { 213 if (src.remaining() < 2) 214 return CoderResult.UNDERFLOW; 215 b3 = src.get(); 216 b4 = src.get(); 217 inputSize += 2; 218 if (dst.remaining() < 1) 219 return CoderResult.OVERFLOW; 220 //SS2->CNS-P2, SS3->CNS-P3 221 c = cnsDecode(b3, b4, b2); 222 if (c == REPLACE_CHAR) 223 return CoderResult.unmappableForLength(inputSize); 224 dst.put(c); 225 } else { 226 return CoderResult.malformedForLength(inputSize); 227 } 228 } else if (b1 == ISO_SO) { 229 shiftOut = true; 230 } else if (b1 == ISO_SI) { // shift back in 231 shiftOut = false; 232 } 233 mark += inputSize; 234 if (src.remaining() < 1) 235 return CoderResult.UNDERFLOW; 236 b1 = src.get(); 237 inputSize = 1; 238 } 239 240 if (dst.remaining() < 1) 241 return CoderResult.OVERFLOW; 242 243 if (!shiftOut) { 244 dst.put((char)(b1 & 0xff)); //clear the upper byte 245 mark += inputSize; 246 } else { 247 if (src.remaining() < 1) 248 return CoderResult.UNDERFLOW; 249 b2 = src.get(); 250 inputSize++; 251 c = SODecode(b1, b2, currentSODesig); 252 if (c == REPLACE_CHAR) 253 return CoderResult.unmappableForLength(inputSize); 254 dst.put(c); 255 mark += inputSize; 256 } 257 } 258 return CoderResult.UNDERFLOW; 259 } finally { 260 src.position(mark); 261 } 262 } 263 264 private CoderResult decodeArrayLoop(ByteBuffer src, 265 CharBuffer dst) 266 { 267 int inputSize = 0; 268 byte b1 = 0, b2 = 0, b3 = 0, b4 = 0; 269 char c = REPLACE_CHAR; 270 271 byte[] sa = src.array(); 272 int sp = src.arrayOffset() + src.position(); 273 int sl = src.arrayOffset() + src.limit(); 274 assert (sp <= sl); 275 sp = (sp <= sl ? sp : sl); 276 277 char[] da = dst.array(); 278 int dp = dst.arrayOffset() + dst.position(); 279 int dl = dst.arrayOffset() + dst.limit(); 280 assert (dp <= dl); 281 dp = (dp <= dl ? dp : dl); 282 283 try { 284 while (sp < sl) { 285 b1 = sa[sp]; 286 inputSize = 1; 287 288 while (b1 == ISO_ESC || b1 == ISO_SO || b1 == ISO_SI) { 289 if (b1 == ISO_ESC) { // ESC 290 currentSODesig = SODesigGB; 291 292 if (sp + 2 > sl) 293 return CoderResult.UNDERFLOW; 294 295 b2 = sa[sp + 1]; 296 inputSize++; 297 298 if ((b2 & (byte)0x80) != 0) 299 return CoderResult.malformedForLength(inputSize); 300 if (b2 == (byte)0x24) { 301 if (sp + 3 > sl) 302 return CoderResult.UNDERFLOW; 303 304 b3 = sa[sp + 2]; 305 inputSize++; 306 307 if ((b3 & (byte)0x80) != 0) 308 return CoderResult.malformedForLength(inputSize); 309 if (b3 == 'A'){ // "$A" 310 /* <ESC>$A is not a legal designator sequence for 311 ISO2022_CN, it is listed as an escape sequence 312 for GB2312 in ISO2022-JP-2. Keep it here just for 313 the sake of "compatibility". 314 */ 315 currentSODesig = SODesigGB; 316 } else if (b3 == ')') { 317 if (sp + 4 > sl) 318 return CoderResult.UNDERFLOW; 319 b4 = sa[sp + 3]; 320 inputSize++; 321 322 if (b4 == 'A'){ // "$)A" 323 currentSODesig = SODesigGB; 324 } else if (b4 == 'G'){ // "$)G" 325 currentSODesig = SODesigCNS; 326 } else { 327 return CoderResult.malformedForLength(inputSize); 328 } 329 } else if (b3 == '*') { 330 if (sp + 4 > sl) 331 return CoderResult.UNDERFLOW; 332 b4 = sa[sp + 3]; 333 inputSize++; 334 if (b4 != 'H'){ // "$*H" 335 return CoderResult.malformedForLength(inputSize); 336 } 337 } else if (b3 == '+') { 338 if (sp + 4 > sl) 339 return CoderResult.UNDERFLOW; 340 b4 = sa[sp + 3]; 341 inputSize++; 342 if (b4 != 'I'){ // "$+I" 343 return CoderResult.malformedForLength(inputSize); 344 } 345 } else { 346 return CoderResult.malformedForLength(inputSize); 347 } 348 } else if (b2 == ISO_SS2_7 || b2 == ISO_SS3_7) { 349 if (sp + 4 > sl) { 350 return CoderResult.UNDERFLOW; 351 } 352 b3 = sa[sp + 2]; 353 b4 = sa[sp + 3]; 354 if (dl - dp < 1) { 355 return CoderResult.OVERFLOW; 356 } 357 inputSize += 2; 358 c = cnsDecode(b3, b4, b2); 359 if (c == REPLACE_CHAR) 360 return CoderResult.unmappableForLength(inputSize); 361 da[dp++] = c; 362 } else { 363 return CoderResult.malformedForLength(inputSize); 364 } 365 } else if (b1 == ISO_SO) { 366 shiftOut = true; 367 } else if (b1 == ISO_SI) { // shift back in 368 shiftOut = false; 369 } 370 sp += inputSize; 371 if (sp + 1 > sl) 372 return CoderResult.UNDERFLOW; 373 b1 = sa[sp]; 374 inputSize = 1; 375 } 376 377 if (dl - dp < 1) { 378 return CoderResult.OVERFLOW; 379 } 380 381 if (!shiftOut) { 382 da[dp++] = (char)(b1 & 0xff); //clear the upper byte 383 } else { 384 if (sp + 2 > sl) 385 return CoderResult.UNDERFLOW; 386 b2 = sa[sp + 1]; 387 inputSize++; 388 c = SODecode(b1, b2, currentSODesig); 389 if (c == REPLACE_CHAR) 390 return CoderResult.unmappableForLength(inputSize); 391 da[dp++] = c; 392 } 393 sp += inputSize; 394 } 395 return CoderResult.UNDERFLOW; 396 } finally { 397 src.position(sp - src.arrayOffset()); 398 dst.position(dp - dst.arrayOffset()); 399 } 400 } 401 402 protected CoderResult decodeLoop(ByteBuffer src, 403 CharBuffer dst) 404 { 405 if (src.hasArray() && dst.hasArray()) 406 return decodeArrayLoop(src, dst); 407 else 408 return decodeBufferLoop(src, dst); 409 } 410 } 411} 412