1/* 2 * Copyright (c) 2009, 2012, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 */ 23 24/* 25 * @test 26 * @bug 6843578 27 * @summary Test old and new implementation of db charsets 28 * @build IBM930_OLD IBM933_OLD IBM935_OLD IBM937_OLD IBM939_OLD IBM942_OLD IBM943_OLD IBM948_OLD IBM949_OLD IBM950_OLD IBM970_OLD IBM942C_OLD IBM943C_OLD IBM949C_OLD IBM1381_OLD IBM1383_OLD EUC_CN_OLD EUC_KR_OLD GBK_OLD Johab_OLD MS932_OLD MS936_OLD MS949_OLD MS950_OLD SJIS_OLD PCK_OLD EUC_JP_OLD EUC_JP_LINUX_OLD EUC_JP_Open_OLD 29 * @modules java.base/sun.nio.cs jdk.charsets/sun.nio.cs.ext 30 * @run main TestIBMDB 31 */ 32 33import java.nio.charset.*; 34import java.nio.*; 35import java.util.*; 36 37public class TestIBMDB { 38 static class Time { 39 long t; 40 } 41 static int iteration = 200; 42 43 static char[] decode(byte[] bb, Charset cs, boolean testDirect, Time t) 44 throws Exception { 45 String csn = cs.name(); 46 CharsetDecoder dec = cs.newDecoder(); 47 ByteBuffer bbf; 48 CharBuffer cbf; 49 if (testDirect) { 50 bbf = ByteBuffer.allocateDirect(bb.length); 51 cbf = ByteBuffer.allocateDirect(bb.length*2).asCharBuffer(); 52 bbf.put(bb); 53 } else { 54 bbf = ByteBuffer.wrap(bb); 55 cbf = CharBuffer.allocate(bb.length); 56 } 57 CoderResult cr = null; 58 long t1 = System.nanoTime()/1000; 59 for (int i = 0; i < iteration; i++) { 60 bbf.rewind(); 61 cbf.clear(); 62 dec.reset(); 63 cr = dec.decode(bbf, cbf, true); 64 } 65 long t2 = System.nanoTime()/1000; 66 t.t = (t2 - t1)/iteration; 67 if (cr != CoderResult.UNDERFLOW) { 68 System.out.println("DEC-----------------"); 69 int pos = bbf.position(); 70 System.out.printf(" cr=%s, bbf.pos=%d, bb[pos]=%x,%x,%x,%x%n", 71 cr.toString(), pos, 72 bb[pos++]&0xff, bb[pos++]&0xff,bb[pos++]&0xff, bb[pos++]&0xff); 73 throw new RuntimeException("Decoding err: " + csn); 74 } 75 char[] cc = new char[cbf.position()]; 76 cbf.flip(); cbf.get(cc); 77 return cc; 78 79 } 80 81 static CoderResult decodeCR(byte[] bb, Charset cs, boolean testDirect) 82 throws Exception { 83 CharsetDecoder dec = cs.newDecoder(); 84 ByteBuffer bbf; 85 CharBuffer cbf; 86 if (testDirect) { 87 bbf = ByteBuffer.allocateDirect(bb.length); 88 cbf = ByteBuffer.allocateDirect(bb.length*2).asCharBuffer(); 89 bbf.put(bb).flip(); 90 } else { 91 bbf = ByteBuffer.wrap(bb); 92 cbf = CharBuffer.allocate(bb.length); 93 } 94 CoderResult cr = null; 95 for (int i = 0; i < iteration; i++) { 96 bbf.rewind(); 97 cbf.clear(); 98 dec.reset(); 99 cr = dec.decode(bbf, cbf, true); 100 } 101 return cr; 102 } 103 104 static byte[] encode(char[] cc, Charset cs, boolean testDirect, Time t) 105 throws Exception { 106 ByteBuffer bbf; 107 CharBuffer cbf; 108 CharsetEncoder enc = cs.newEncoder(); 109 String csn = cs.name(); 110 if (testDirect) { 111 bbf = ByteBuffer.allocateDirect(cc.length * 4); 112 cbf = ByteBuffer.allocateDirect(cc.length * 2).asCharBuffer(); 113 cbf.put(cc).flip(); 114 } else { 115 bbf = ByteBuffer.allocate(cc.length * 4); 116 cbf = CharBuffer.wrap(cc); 117 } 118 CoderResult cr = null; 119 long t1 = System.nanoTime()/1000; 120 for (int i = 0; i < iteration; i++) { 121 cbf.rewind(); 122 bbf.clear(); 123 enc.reset(); 124 cr = enc.encode(cbf, bbf, true); 125 } 126 long t2 = System.nanoTime()/1000; 127 t.t = (t2 - t1)/iteration; 128 if (cr != CoderResult.UNDERFLOW) { 129 System.out.println("ENC-----------------"); 130 int pos = cbf.position(); 131 System.out.printf(" cr=%s, cbf.pos=%d, cc[pos]=%x%n", 132 cr.toString(), pos, cc[pos]&0xffff); 133 throw new RuntimeException("Encoding err: " + csn); 134 } 135 byte[] bb = new byte[bbf.position()]; 136 bbf.flip(); bbf.get(bb); 137 return bb; 138 } 139 140 static CoderResult encodeCR(char[] cc, Charset cs, boolean testDirect) 141 throws Exception { 142 ByteBuffer bbf; 143 CharBuffer cbf; 144 CharsetEncoder enc = cs.newEncoder(); 145 if (testDirect) { 146 bbf = ByteBuffer.allocateDirect(cc.length * 4); 147 cbf = ByteBuffer.allocateDirect(cc.length * 2).asCharBuffer(); 148 cbf.put(cc).flip(); 149 } else { 150 bbf = ByteBuffer.allocate(cc.length * 4); 151 cbf = CharBuffer.wrap(cc); 152 } 153 CoderResult cr = null; 154 for (int i = 0; i < iteration; i++) { 155 cbf.rewind(); 156 bbf.clear(); 157 enc.reset(); 158 cr = enc.encode(cbf, bbf, true); 159 } 160 return cr; 161 } 162 163 static void printEntry(char c, Charset cs) { 164 byte[] bb = new String(new char[] {c}).getBytes(cs); 165 for (byte b:bb) 166 System.out.printf("%x", b&0xff); 167 System.out.printf(" %x", c & 0xffff); 168 String s2 = new String(bb, cs); 169 System.out.printf(" %x%n", s2.charAt(0) & 0xffff); 170 } 171 172 // check and compare canEncoding/Encoding 173 static char[] checkEncoding(Charset oldCS, Charset newCS) 174 throws Exception { 175 System.out.printf("Encoding <%s> <%s>...%n", oldCS.name(), newCS.name()); 176 CharsetEncoder encOLD = oldCS.newEncoder(); 177 CharsetEncoder encNew = newCS.newEncoder(); 178 char[] cc = new char[0x10000]; 179 int pos = 0; 180 boolean is970 = "x-IBM970-Old".equals(oldCS.name()); 181 182 for (char c = 0; c < 0xffff; c++) { 183 boolean canOld = encOLD.canEncode(c); 184 boolean canNew = encNew.canEncode(c); 185 186 if (is970 && c == 0x2299) 187 continue; 188 189 if (canOld != canNew) { 190 if (canNew) { 191 System.out.printf(" NEW(only): "); 192 printEntry(c, newCS); 193 } else { 194 if (is970) { 195 byte[] bb = new String(new char[] {c}).getBytes(oldCS); 196 if (bb.length == 2 && bb[0] == (byte)0xa2 && bb[1] == (byte)0xc1) { 197 // we know 970 has bogus nnnn -> a2c1 -> 2299 198 continue; 199 } 200 } 201 System.out.printf(" OLD(only): "); 202 printEntry(c, oldCS); 203 } 204 } else if (canNew) { 205 byte[] bbNew = new String(new char[] {c}).getBytes(newCS); 206 byte[] bbOld = new String(new char[] {c}).getBytes(oldCS); 207 if (!Arrays.equals(bbNew, bbOld)) { 208 System.out.printf(" c->b NEW: "); 209 printEntry(c, newCS); 210 System.out.printf(" c->b OLD: "); 211 printEntry(c, oldCS); 212 } else { 213 String sNew = new String(bbNew, newCS); 214 String sOld = new String(bbOld, oldCS); 215 if (!sNew.equals(sOld)) { 216 System.out.printf(" b2c NEW (c=%x):", c&0xffff); 217 printEntry(sNew.charAt(0), newCS); 218 System.out.printf(" b2c OLD:"); 219 printEntry(sOld.charAt(0), oldCS); 220 } 221 } 222 } 223 if (canNew & canOld) { // added only both for now 224 cc[pos++] = c; 225 } 226 } 227 return Arrays.copyOf(cc, pos); 228 } 229 230 231 // check and compare canEncoding/Encoding 232 static void checkDecoding(Charset oldCS, Charset newCS) 233 throws Exception 234 { 235 System.out.printf("Decoding <%s> <%s>...%n", oldCS.name(), newCS.name()); 236 boolean isEBCDIC = oldCS.name().startsWith("x-IBM93"); 237 238 //Try singlebyte first 239 byte[] bb = new byte[1]; 240 System.out.printf(" trying SB...%n"); 241 for (int b = 0; b < 0x100; b++) { 242 bb[0] = (byte)b; 243 String sOld = new String(bb, oldCS); 244 String sNew = new String(bb, newCS); 245 if (!sOld.equals(sNew)) { 246 System.out.printf(" b=%x: %x/%d(old) %x/%d(new)%n", 247 b& 0xff, 248 sOld.charAt(0) & 0xffff, sOld.length(), 249 sNew.charAt(0) & 0xffff, sNew.length()); 250 } 251 } 252 253 System.out.printf(" trying DB...%n"); 254 bb = new byte[isEBCDIC?4:2]; 255 int b1Min = 0x40; 256 int b1Max = 0xfe; 257 for (int b1 = 0x40; b1 < 0xff; b1++) { 258 if (!isEBCDIC) { 259 // decodable singlebyte b1 260 bb[0] = (byte)b1; 261 String sOld = new String(bb, oldCS); 262 String sNew = new String(bb, newCS); 263 if (!sOld.equals(sNew)) { 264 if (sOld.length() != 2 && sOld.charAt(0) != 0) { 265 // only prints we are NOT expected. above two are known issue 266 System.out.printf(" b1=%x: %x/%d(old) %x/%d(new)%n", 267 b1 & 0xff, 268 sOld.charAt(0) & 0xffff, sOld.length(), 269 sNew.charAt(0) & 0xffff, sNew.length()); 270 continue; 271 } 272 } 273 } 274 for (int b2 = 0x40; b2 < 0xff; b2++) { 275 if (isEBCDIC) { 276 bb[0] = 0x0e; 277 bb[1] = (byte)b1; 278 bb[2] = (byte)b2; 279 bb[3] = 0x0f; 280 } else { 281 bb[0] = (byte)b1; 282 bb[1] = (byte)b2; 283 } 284 String sOld = new String(bb, oldCS); 285 String sNew = new String(bb, newCS); 286 //if (!sOld.equals(sNew)) { 287 if (sOld.charAt(0) != sNew.charAt(0)) { 288 289if (sOld.charAt(0) == 0 && sNew.charAt(0) == 0xfffd) 290 continue; // known issude in old implementation 291 292 System.out.printf(" bb=<%x,%x> c(old)=%x, c(new)=%x%n", 293 b1, b2, sOld.charAt(0) & 0xffff, sNew.charAt(0) & 0xffff); 294 } 295 } 296 } 297 } 298 299 static void checkInit(String csn) throws Exception { 300 System.out.printf("Check init <%s>...%n", csn); 301 Charset.forName("Big5"); // load in the ExtendedCharsets 302 long t1 = System.nanoTime()/1000; 303 Charset cs = Charset.forName(csn); 304 long t2 = System.nanoTime()/1000; 305 System.out.printf(" charset :%d%n", t2 - t1); 306 t1 = System.nanoTime()/1000; 307 cs.newDecoder(); 308 t2 = System.nanoTime()/1000; 309 System.out.printf(" new Decoder :%d%n", t2 - t1); 310 311 t1 = System.nanoTime()/1000; 312 cs.newEncoder(); 313 t2 = System.nanoTime()/1000; 314 System.out.printf(" new Encoder :%d%n", t2 - t1); 315 } 316 317 static void compare(Charset cs1, Charset cs2, char[] cc) throws Exception { 318 System.gc(); // enqueue finalizable objects 319 Thread.sleep(1000); 320 System.gc(); // enqueue finalizable objects 321 322 String csn1 = cs1.name(); 323 String csn2 = cs2.name(); 324 System.out.printf("Diff <%s> <%s>...%n", csn1, csn2); 325 326 Time t1 = new Time(); 327 Time t2 = new Time(); 328 329 byte[] bb1 = encode(cc, cs1, false, t1); 330 byte[] bb2 = encode(cc, cs2, false, t2); 331 332 System.out.printf(" Encoding TimeRatio %s/%s: %d,%d :%f%n", 333 csn2, csn1, 334 t2.t, t1.t, 335 (double)(t2.t)/(t1.t)); 336 if (!Arrays.equals(bb1, bb2)) { 337 System.out.printf(" encoding failed%n"); 338 } 339 340 char[] cc2 = decode(bb1, cs2, false, t2); 341 char[] cc1 = decode(bb1, cs1, false, t1); 342 System.out.printf(" Decoding TimeRatio %s/%s: %d,%d :%f%n", 343 csn2, csn1, 344 t2.t, t1.t, 345 (double)(t2.t)/(t1.t)); 346 if (!Arrays.equals(cc1, cc2)) { 347 System.out.printf(" decoding failed%n"); 348 } 349 350 bb1 = encode(cc, cs1, true, t1); 351 bb2 = encode(cc, cs2, true, t2); 352 353 System.out.printf(" Encoding(dir) TimeRatio %s/%s: %d,%d :%f%n", 354 csn2, csn1, 355 t2.t, t1.t, 356 (double)(t2.t)/(t1.t)); 357 358 if (!Arrays.equals(bb1, bb2)) 359 System.out.printf(" encoding (direct) failed%n"); 360 361 cc1 = decode(bb1, cs1, true, t1); 362 cc2 = decode(bb1, cs2, true, t2); 363 System.out.printf(" Decoding(dir) TimeRatio %s/%s: %d,%d :%f%n", 364 csn2, csn1, 365 t2.t, t1.t, 366 (double)(t2.t)/(t1.t)); 367 if (!Arrays.equals(cc1, cc2)) { 368 System.out.printf(" decoding (direct) failed%n"); 369 } 370 } 371 372 /* The first byte is the length of malformed bytes 373 byte[][] malformed = { 374 {5, (byte)0xF8, (byte)0x80, (byte)0x80, (byte)0x9F, (byte)0x80, (byte)0xC0 }, 375 }; 376 */ 377 378 static void checkMalformed(Charset cs, byte[][] malformed) 379 throws Exception 380 { 381 boolean failed = false; 382 String csn = cs.name(); 383 System.out.printf("Check malformed <%s>...%n", csn); 384 for (boolean direct: new boolean[] {false, true}) { 385 for (byte[] bins : malformed) { 386 int mlen = bins[0]; 387 byte[] bin = Arrays.copyOfRange(bins, 1, bins.length); 388 CoderResult cr = decodeCR(bin, cs, direct); 389 String ashex = ""; 390 for (int i = 0; i < bin.length; i++) { 391 if (i > 0) ashex += " "; 392 ashex += Integer.toString((int)bin[i] & 0xff, 16); 393 } 394 if (!cr.isMalformed()) { 395 System.out.printf(" FAIL(direct=%b): [%s] not malformed. -->cr=%s\n", direct, ashex, cr.toString()); 396 failed = true; 397 } else if (cr.length() != mlen) { 398 System.out.printf(" FAIL(direct=%b): [%s] malformed[len=%d].\n", direct, ashex, cr.length()); 399 failed = true; 400 } 401 } 402 } 403 if (failed) 404 throw new RuntimeException("Check malformed failed " + csn); 405 } 406 407 static boolean check(CharsetDecoder dec, byte[] bytes, boolean direct, int[] flow) { 408 int inPos = flow[0]; 409 int inLen = flow[1]; 410 int outPos = flow[2]; 411 int outLen = flow[3]; 412 int expedInPos = flow[4]; 413 int expedOutPos = flow[5]; 414 CoderResult expedCR = (flow[6]==0)?CoderResult.UNDERFLOW 415 :CoderResult.OVERFLOW; 416 ByteBuffer bbf; 417 CharBuffer cbf; 418 if (direct) { 419 bbf = ByteBuffer.allocateDirect(inPos + bytes.length); 420 cbf = ByteBuffer.allocateDirect((outPos + outLen)*2).asCharBuffer(); 421 } else { 422 bbf = ByteBuffer.allocate(inPos + bytes.length); 423 cbf = CharBuffer.allocate(outPos + outLen); 424 } 425 bbf.position(inPos); 426 bbf.put(bytes).flip().position(inPos).limit(inPos + inLen); 427 cbf.position(outPos); 428 dec.reset(); 429 CoderResult cr = dec.decode(bbf, cbf, false); 430 if (cr != expedCR || 431 bbf.position() != expedInPos || 432 cbf.position() != expedOutPos) { 433 System.out.printf("Expected(direct=%5b): [", direct); 434 for (int i:flow) System.out.print(" " + i); 435 System.out.println("] CR=" + cr + 436 ", inPos=" + bbf.position() + 437 ", outPos=" + cbf.position()); 438 return false; 439 } 440 return true; 441 } 442 443 static void checkUnderOverflow(Charset cs) throws Exception { 444 String csn = cs.name(); 445 System.out.printf("Check under/overflow <%s>...%n", csn); 446 CharsetDecoder dec = cs.newDecoder(); 447 boolean failed = false; 448 449 //7f, a1a1, 8ea2a1a1, 8ea3a1a1, 8ea7a1a1 450 //0 1 2 3 7 11 451 byte[] bytes = new String("\u007f\u3000\u4e42\u4e28\ud840\udc55").getBytes("EUC_TW"); 452 int inlen = bytes.length; 453 454 int MAXOFF = 20; 455 for (int inoff = 0; inoff < MAXOFF; inoff++) { 456 for (int outoff = 0; outoff < MAXOFF; outoff++) { 457 int[][] Flows = { 458 //inpos, inLen, outPos, outLen, inPosEP, outposEP, under(0)/over(1) 459 //overflow 460 {inoff, inlen, outoff, 1, inoff + 1, outoff + 1, 1}, 461 {inoff, inlen, outoff, 2, inoff + 3, outoff + 2, 1}, 462 {inoff, inlen, outoff, 3, inoff + 7, outoff + 3, 1}, 463 {inoff, inlen, outoff, 4, inoff + 11, outoff + 4, 1}, 464 {inoff, inlen, outoff, 5, inoff + 11, outoff + 4, 1}, 465 {inoff, inlen, outoff, 6, inoff + 15, outoff + 6, 0}, 466 //underflow 467 {inoff, 1, outoff, 6, inoff + 1, outoff + 1, 0}, 468 {inoff, 2, outoff, 6, inoff + 1, outoff + 1, 0}, 469 {inoff, 3, outoff, 6, inoff + 3, outoff + 2, 0}, 470 {inoff, 4, outoff, 6, inoff + 3, outoff + 2, 0}, 471 {inoff, 5, outoff, 6, inoff + 3, outoff + 2, 0}, 472 {inoff, 8, outoff, 6, inoff + 7, outoff + 3, 0}, 473 {inoff, 9, outoff, 6, inoff + 7, outoff + 3, 0}, 474 {inoff, 10, outoff, 6, inoff + 7, outoff + 3, 0}, 475 {inoff, 11, outoff, 6, inoff +11, outoff + 4, 0}, 476 {inoff, 12, outoff, 6, inoff +11, outoff + 4, 0}, 477 {inoff, 15, outoff, 6, inoff +15, outoff + 6, 0}, 478 // 2-byte under/overflow 479 {inoff, 2, outoff, 1, inoff + 1, outoff + 1, 0}, 480 {inoff, 3, outoff, 1, inoff + 1, outoff + 1, 1}, 481 {inoff, 3, outoff, 2, inoff + 3, outoff + 2, 0}, 482 }; 483 for (boolean direct: new boolean[] {false, true}) { 484 for (int[] flow: Flows) { 485 if (!check(dec, bytes, direct, flow)) 486 failed = true; 487 } 488 }}} 489 if (failed) 490 throw new RuntimeException("Check under/overflow failed " + csn); 491 } 492 493 static String[] csnames = new String[] { 494 495 "IBM930", 496 "IBM933", 497 "IBM935", 498 "IBM937", 499 "IBM939", 500 "IBM942", 501 "IBM943", 502 "IBM948", 503 "IBM949", 504 "IBM950", 505 "IBM970", 506 "IBM942C", 507 "IBM943C", 508 "IBM949C", 509 "IBM1381", 510 "IBM1383", 511 512 "EUC_CN", 513 "EUC_KR", 514 "GBK", 515 "Johab", 516 "MS932", 517 "MS936", 518 "MS949", 519 "MS950", 520 521 "EUC_JP", 522 "EUC_JP_LINUX", 523 "EUC_JP_Open", 524 "SJIS", 525 "PCK", 526 }; 527 528 public static void main(String[] args) throws Exception { 529 for (String csname: csnames) { 530 System.out.printf("-----------------------------------%n"); 531 String oldname = csname + "_OLD"; 532 if ("EUC_JP_Open".equals(csname)) 533 csname = "eucjp-open"; 534 checkInit(csname); 535 Charset csOld = (Charset)Class.forName(oldname).newInstance(); 536 Charset csNew = Charset.forName(csname); 537 char[] cc = checkEncoding(csOld, csNew); 538 checkDecoding(csOld, csNew); 539 compare(csNew, csOld, cc); 540 541 if (csname.startsWith("x-IBM93")) { 542 //ecdbic 543 checkMalformed(csNew, new byte[][] { 544 {1, 0x26, 0x0f, 0x27}, // in SBSC, no SI 545 {1, 0x0e, 0x41, 0x41, 0xe}, // in DBSC, no SO 546 {2, 0x0e, 0x40, 0x41, 0xe}, // illegal DB 547 }); 548 } else if (csname.equals("x-IBM970") || 549 csname.equals("x-IBM1383")) { 550 //euc_simple 551 checkMalformed(csNew, new byte[][] { 552 {1, 0x26, (byte)0x8f, 0x27}, // SS2 553 {1, (byte)0xa1, (byte)0xa1, (byte)0x8e, 0x51}, // SS3 554 }); 555 } 556 } 557 } 558} 559