TestEUC_TW.java revision 1245:914c33c7de3e
1/* 2 * Copyright 2009 Sun Microsystems, Inc. All Rights Reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, 20 * CA 95054 USA or visit www.sun.com if you need additional information or 21 * have any questions. 22 */ 23 24/* 25 * @test 26 * @bug 6831794 6229811 27 * @summary Test EUC_TW charset 28 */ 29 30import java.nio.charset.*; 31import java.nio.*; 32import java.util.*; 33 34public class TestEUC_TW { 35 36 static class Time { 37 long t; 38 } 39 static int iteration = 100; 40 41 static char[] decode(byte[] bb, Charset cs, boolean testDirect, Time t) 42 throws Exception { 43 String csn = cs.name(); 44 CharsetDecoder dec = cs.newDecoder(); 45 ByteBuffer bbf; 46 CharBuffer cbf; 47 if (testDirect) { 48 bbf = ByteBuffer.allocateDirect(bb.length); 49 cbf = ByteBuffer.allocateDirect(bb.length*2).asCharBuffer(); 50 bbf.put(bb); 51 } else { 52 bbf = ByteBuffer.wrap(bb); 53 cbf = CharBuffer.allocate(bb.length); 54 } 55 CoderResult cr = null; 56 long t1 = System.nanoTime()/1000; 57 for (int i = 0; i < iteration; i++) { 58 bbf.rewind(); 59 cbf.clear(); 60 dec.reset(); 61 cr = dec.decode(bbf, cbf, true); 62 } 63 long t2 = System.nanoTime()/1000; 64 if (t != null) 65 t.t = (t2 - t1)/iteration; 66 if (cr != CoderResult.UNDERFLOW) { 67 System.out.println("DEC-----------------"); 68 int pos = bbf.position(); 69 System.out.printf(" cr=%s, bbf.pos=%d, bb[pos]=%x,%x,%x,%x%n", 70 cr.toString(), pos, 71 bb[pos++]&0xff, bb[pos++]&0xff,bb[pos++]&0xff, bb[pos++]&0xff); 72 throw new RuntimeException("Decoding err: " + csn); 73 } 74 char[] cc = new char[cbf.position()]; 75 cbf.flip(); cbf.get(cc); 76 return cc; 77 78 } 79 80 static CoderResult decodeCR(byte[] bb, Charset cs, boolean testDirect) 81 throws Exception { 82 CharsetDecoder dec = cs.newDecoder(); 83 ByteBuffer bbf; 84 CharBuffer cbf; 85 if (testDirect) { 86 bbf = ByteBuffer.allocateDirect(bb.length); 87 cbf = ByteBuffer.allocateDirect(bb.length*2).asCharBuffer(); 88 bbf.put(bb).flip(); 89 } else { 90 bbf = ByteBuffer.wrap(bb); 91 cbf = CharBuffer.allocate(bb.length); 92 } 93 return dec.decode(bbf, cbf, true); 94 } 95 96 static byte[] encode(char[] cc, Charset cs, boolean testDirect, Time t) 97 throws Exception { 98 ByteBuffer bbf; 99 CharBuffer cbf; 100 CharsetEncoder enc = cs.newEncoder(); 101 String csn = cs.name(); 102 if (testDirect) { 103 bbf = ByteBuffer.allocateDirect(cc.length * 4); 104 cbf = ByteBuffer.allocateDirect(cc.length * 2).asCharBuffer(); 105 cbf.put(cc).flip(); 106 } else { 107 bbf = ByteBuffer.allocate(cc.length * 4); 108 cbf = CharBuffer.wrap(cc); 109 } 110 CoderResult cr = null; 111 long t1 = System.nanoTime()/1000; 112 for (int i = 0; i < iteration; i++) { 113 cbf.rewind(); 114 bbf.clear(); 115 enc.reset(); 116 cr = enc.encode(cbf, bbf, true); 117 } 118 long t2 = System.nanoTime()/1000; 119 if (t != null) 120 t.t = (t2 - t1)/iteration; 121 if (cr != CoderResult.UNDERFLOW) { 122 System.out.println("ENC-----------------"); 123 int pos = cbf.position(); 124 System.out.printf(" cr=%s, cbf.pos=%d, cc[pos]=%x%n", 125 cr.toString(), pos, cc[pos]&0xffff); 126 throw new RuntimeException("Encoding err: " + csn); 127 } 128 byte[] bb = new byte[bbf.position()]; 129 bbf.flip(); bbf.get(bb); 130 return bb; 131 } 132 133 static CoderResult encodeCR(char[] cc, Charset cs, boolean testDirect) 134 throws Exception { 135 ByteBuffer bbf; 136 CharBuffer cbf; 137 CharsetEncoder enc = cs.newEncoder(); 138 if (testDirect) { 139 bbf = ByteBuffer.allocateDirect(cc.length * 4); 140 cbf = ByteBuffer.allocateDirect(cc.length * 2).asCharBuffer(); 141 cbf.put(cc).flip(); 142 } else { 143 bbf = ByteBuffer.allocate(cc.length * 4); 144 cbf = CharBuffer.wrap(cc); 145 } 146 return enc.encode(cbf, bbf, true); 147 } 148 149 static char[] getEUC_TWChars(boolean skipNR) { 150 //CharsetEncoder encOLD = Charset.forName("EUC_TW_OLD").newEncoder(); 151 CharsetEncoder encOLD = new EUC_TW_OLD().newEncoder(); 152 CharsetEncoder enc = Charset.forName("EUC_TW").newEncoder(); 153 char[] cc = new char[0x20000]; 154 char[] c2 = new char[2]; 155 int pos = 0; 156 int i = 0; 157 //bmp 158 for (i = 0; i < 0x10000; i++) { 159 //SKIP these 3 NR codepoints if compared to EUC_TW 160 if (skipNR && (i == 0x4ea0 || i == 0x51ab || i == 0x52f9)) 161 continue; 162 if (encOLD.canEncode((char)i) != enc.canEncode((char)i)) { 163 System.out.printf(" Err i=%x: old=%b new=%b%n", i, 164 encOLD.canEncode((char)i), 165 enc.canEncode((char)i)); 166 throw new RuntimeException("canEncode() err!"); 167 } 168 169 if (enc.canEncode((char)i)) { 170 cc[pos++] = (char)i; 171 } 172 } 173 174 //supp 175 CharBuffer cb = CharBuffer.wrap(new char[2]); 176 for (i = 0x20000; i < 0x30000; i++) { 177 Character.toChars(i, c2, 0); 178 cb.clear();cb.put(c2[0]);cb.put(c2[1]);cb.flip(); 179 180 if (encOLD.canEncode(cb) != enc.canEncode(cb)) { 181 throw new RuntimeException("canEncode() err!"); 182 } 183 184 if (enc.canEncode(cb)) { 185 //System.out.printf("cp=%x, (%x, %x) %n", i, c2[0] & 0xffff, c2[1] & 0xffff); 186 cc[pos++] = c2[0]; 187 cc[pos++] = c2[1]; 188 } 189 } 190 191 return Arrays.copyOf(cc, pos); 192 } 193 194 static void checkRoundtrip(Charset cs) throws Exception { 195 char[] cc = getEUC_TWChars(false); 196 System.out.printf("Check roundtrip <%s>...", cs.name()); 197 byte[] bb = encode(cc, cs, false, null); 198 char[] ccO = decode(bb, cs, false, null); 199 200 if (!Arrays.equals(cc, ccO)) { 201 System.out.printf(" non-direct failed"); 202 } 203 bb = encode(cc, cs, true, null); 204 ccO = decode(bb, cs, true, null); 205 if (!Arrays.equals(cc, ccO)) { 206 System.out.printf(" (direct) failed"); 207 } 208 System.out.println(); 209 } 210 211 static void checkInit(String csn) throws Exception { 212 System.out.printf("Check init <%s>...%n", csn); 213 Charset.forName("Big5"); // load in the ExtendedCharsets 214 long t1 = System.nanoTime()/1000; 215 Charset cs = Charset.forName(csn); 216 long t2 = System.nanoTime()/1000; 217 System.out.printf(" charset :%d%n", t2 - t1); 218 t1 = System.nanoTime()/1000; 219 cs.newDecoder(); 220 t2 = System.nanoTime()/1000; 221 System.out.printf(" new Decoder :%d%n", t2 - t1); 222 223 t1 = System.nanoTime()/1000; 224 cs.newEncoder(); 225 t2 = System.nanoTime()/1000; 226 System.out.printf(" new Encoder :%d%n", t2 - t1); 227 } 228 229 static void compare(Charset cs1, Charset cs2) throws Exception { 230 char[] cc = getEUC_TWChars(true); 231 232 String csn1 = cs1.name(); 233 String csn2 = cs2.name(); 234 System.out.printf("Diff <%s> <%s>...%n", csn1, csn2); 235 236 Time t1 = new Time(); 237 Time t2 = new Time(); 238 239 byte[] bb1 = encode(cc, cs1, false, t1); 240 byte[] bb2 = encode(cc, cs2, false, t2); 241 242 System.out.printf(" Encoding TimeRatio %s/%s: %d,%d :%f%n", 243 csn2, csn1, 244 t2.t, t1.t, 245 (double)(t2.t)/(t1.t)); 246 if (!Arrays.equals(bb1, bb2)) { 247 System.out.printf(" encoding failed%n"); 248 } 249 250 char[] cc2 = decode(bb1, cs2, false, t2); 251 char[] cc1 = decode(bb1, cs1, false, t1); 252 System.out.printf(" Decoding TimeRatio %s/%s: %d,%d :%f%n", 253 csn2, csn1, 254 t2.t, t1.t, 255 (double)(t2.t)/(t1.t)); 256 if (!Arrays.equals(cc1, cc2)) { 257 System.out.printf(" decoding failed%n"); 258 } 259 260 bb1 = encode(cc, cs1, true, t1); 261 bb2 = encode(cc, cs2, true, t2); 262 263 System.out.printf(" Encoding(dir) TimeRatio %s/%s: %d,%d :%f%n", 264 csn2, csn1, 265 t2.t, t1.t, 266 (double)(t2.t)/(t1.t)); 267 268 if (!Arrays.equals(bb1, bb2)) 269 System.out.printf(" encoding (direct) failed%n"); 270 271 cc1 = decode(bb1, cs1, true, t1); 272 cc2 = decode(bb1, cs2, true, t2); 273 System.out.printf(" Decoding(dir) TimeRatio %s/%s: %d,%d :%f%n", 274 csn2, csn1, 275 t2.t, t1.t, 276 (double)(t2.t)/(t1.t)); 277 if (!Arrays.equals(cc1, cc2)) { 278 System.out.printf(" decoding (direct) failed%n"); 279 } 280 } 281 282 // The first byte is the length of malformed bytes 283 static byte[][] malformed = { 284 //{5, (byte)0xF8, (byte)0x80, (byte)0x80, (byte)0x9F, (byte)0x80, (byte)0xC0 }, 285 }; 286 287 static void checkMalformed(Charset cs) throws Exception { 288 boolean failed = false; 289 String csn = cs.name(); 290 System.out.printf("Check malformed <%s>...%n", csn); 291 for (boolean direct: new boolean[] {false, true}) { 292 for (byte[] bins : malformed) { 293 int mlen = bins[0]; 294 byte[] bin = Arrays.copyOfRange(bins, 1, bins.length); 295 CoderResult cr = decodeCR(bin, cs, direct); 296 String ashex = ""; 297 for (int i = 0; i < bin.length; i++) { 298 if (i > 0) ashex += " "; 299 ashex += Integer.toBinaryString((int)bin[i] & 0xff); 300 } 301 if (!cr.isMalformed()) { 302 System.out.printf(" FAIL(direct=%b): [%s] not malformed.\n", direct, ashex); 303 failed = true; 304 } else if (cr.length() != mlen) { 305 System.out.printf(" FAIL(direct=%b): [%s] malformed[len=%d].\n", direct, ashex, cr.length()); 306 failed = true; 307 } 308 } 309 } 310 if (failed) 311 throw new RuntimeException("Check malformed failed " + csn); 312 } 313 314 static boolean check(CharsetDecoder dec, byte[] bytes, boolean direct, int[] flow) { 315 int inPos = flow[0]; 316 int inLen = flow[1]; 317 int outPos = flow[2]; 318 int outLen = flow[3]; 319 int expedInPos = flow[4]; 320 int expedOutPos = flow[5]; 321 CoderResult expedCR = (flow[6]==0)?CoderResult.UNDERFLOW 322 :CoderResult.OVERFLOW; 323 ByteBuffer bbf; 324 CharBuffer cbf; 325 if (direct) { 326 bbf = ByteBuffer.allocateDirect(inPos + bytes.length); 327 cbf = ByteBuffer.allocateDirect((outPos + outLen)*2).asCharBuffer(); 328 } else { 329 bbf = ByteBuffer.allocate(inPos + bytes.length); 330 cbf = CharBuffer.allocate(outPos + outLen); 331 } 332 bbf.position(inPos); 333 bbf.put(bytes).flip().position(inPos).limit(inPos + inLen); 334 cbf.position(outPos); 335 dec.reset(); 336 CoderResult cr = dec.decode(bbf, cbf, false); 337 if (cr != expedCR || 338 bbf.position() != expedInPos || 339 cbf.position() != expedOutPos) { 340 System.out.printf("Expected(direct=%5b): [", direct); 341 for (int i:flow) System.out.print(" " + i); 342 System.out.println("] CR=" + cr + 343 ", inPos=" + bbf.position() + 344 ", outPos=" + cbf.position()); 345 return false; 346 } 347 return true; 348 } 349 350 static void checkUnderOverflow(Charset cs) throws Exception { 351 String csn = cs.name(); 352 System.out.printf("Check under/overflow <%s>...%n", csn); 353 CharsetDecoder dec = cs.newDecoder(); 354 boolean failed = false; 355 //7f, a1a1, 8ea2a1a1, 8ea3a1a1, 8ea7a1a1 356 //0 1 2 3 7 11 357 byte[] bytes = new String("\u007f\u3000\u4e42\u4e28\ud840\udc55").getBytes("EUC_TW"); 358 int inlen = bytes.length; 359 360 int MAXOFF = 20; 361 for (int inoff = 0; inoff < MAXOFF; inoff++) { 362 for (int outoff = 0; outoff < MAXOFF; outoff++) { 363 int[][] Flows = { 364 //inpos, inLen, outPos, outLen, inPosEP, outposEP, under(0)/over(1) 365 //overflow 366 {inoff, inlen, outoff, 1, inoff + 1, outoff + 1, 1}, 367 {inoff, inlen, outoff, 2, inoff + 3, outoff + 2, 1}, 368 {inoff, inlen, outoff, 3, inoff + 7, outoff + 3, 1}, 369 {inoff, inlen, outoff, 4, inoff + 11, outoff + 4, 1}, 370 {inoff, inlen, outoff, 5, inoff + 11, outoff + 4, 1}, 371 {inoff, inlen, outoff, 6, inoff + 15, outoff + 6, 0}, 372 //underflow 373 {inoff, 1, outoff, 6, inoff + 1, outoff + 1, 0}, 374 {inoff, 2, outoff, 6, inoff + 1, outoff + 1, 0}, 375 {inoff, 3, outoff, 6, inoff + 3, outoff + 2, 0}, 376 {inoff, 4, outoff, 6, inoff + 3, outoff + 2, 0}, 377 {inoff, 5, outoff, 6, inoff + 3, outoff + 2, 0}, 378 {inoff, 8, outoff, 6, inoff + 7, outoff + 3, 0}, 379 {inoff, 9, outoff, 6, inoff + 7, outoff + 3, 0}, 380 {inoff, 10, outoff, 6, inoff + 7, outoff + 3, 0}, 381 {inoff, 11, outoff, 6, inoff +11, outoff + 4, 0}, 382 {inoff, 12, outoff, 6, inoff +11, outoff + 4, 0}, 383 {inoff, 15, outoff, 6, inoff +15, outoff + 6, 0}, 384 // 2-byte under/overflow 385 {inoff, 2, outoff, 1, inoff + 1, outoff + 1, 0}, 386 {inoff, 3, outoff, 1, inoff + 1, outoff + 1, 1}, 387 {inoff, 3, outoff, 2, inoff + 3, outoff + 2, 0}, 388 // 4-byte under/overflow 389 {inoff, 4, outoff, 2, inoff + 3, outoff + 2, 0}, 390 {inoff, 5, outoff, 2, inoff + 3, outoff + 2, 0}, 391 {inoff, 6, outoff, 2, inoff + 3, outoff + 2, 0}, 392 {inoff, 7, outoff, 2, inoff + 3, outoff + 2, 1}, 393 {inoff, 7, outoff, 3, inoff + 7, outoff + 3, 0}, 394 // 4-byte under/overflow 395 {inoff, 8, outoff, 3, inoff + 7, outoff + 3, 0}, 396 {inoff, 9, outoff, 3, inoff + 7, outoff + 3, 0}, 397 {inoff, 10, outoff, 3, inoff + 7, outoff + 3, 0}, 398 {inoff, 11, outoff, 3, inoff + 7, outoff + 3, 1}, 399 {inoff, 11, outoff, 4, inoff +11, outoff + 4, 0}, 400 // 4-byte/supp under/overflow 401 {inoff, 11, outoff, 4, inoff +11, outoff + 4, 0}, 402 {inoff, 12, outoff, 4, inoff +11, outoff + 4, 0}, 403 {inoff, 13, outoff, 4, inoff +11, outoff + 4, 0}, 404 {inoff, 14, outoff, 4, inoff +11, outoff + 4, 0}, 405 {inoff, 15, outoff, 4, inoff +11, outoff + 4, 1}, 406 {inoff, 15, outoff, 5, inoff +11, outoff + 4, 1}, 407 {inoff, 15, outoff, 6, inoff +15, outoff + 6, 0}, 408 }; 409 for (boolean direct: new boolean[] {false, true}) { 410 for (int[] flow: Flows) { 411 if (!check(dec, bytes, direct, flow)) 412 failed = true; 413 } 414 }}} 415 if (failed) 416 throw new RuntimeException("Check under/overflow failed " + csn); 417 } 418 419 public static void main(String[] args) throws Exception { 420 // be the first one 421 //checkInit("EUC_TW_OLD"); 422 checkInit("EUC_TW"); 423 Charset euctw = Charset.forName("EUC_TW"); 424 checkRoundtrip(euctw); 425 compare(euctw, new EUC_TW_OLD()); 426 checkMalformed(euctw); 427 checkUnderOverflow(euctw); 428 } 429} 430