FindEncoderBugs.java revision 11822:110f7f35760f
1218893Sdim/* 2193326Sed * Copyright (c) 2008, Oracle and/or its affiliates. All rights reserved. 3193326Sed * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4193326Sed * 5193326Sed * This code is free software; you can redistribute it and/or modify it 6193326Sed * under the terms of the GNU General Public License version 2 only, as 7193326Sed * published by the Free Software Foundation. 8193326Sed * 9193326Sed * This code is distributed in the hope that it will be useful, but WITHOUT 10193326Sed * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11193326Sed * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12193326Sed * version 2 for more details (a copy is included in the LICENSE file that 13193326Sed * accompanied this code). 14198092Srdivacky * 15193326Sed * You should have received a copy of the GNU General Public License version 16208600Srdivacky * 2 along with this work; if not, write to the Free Software Foundation, 17193326Sed * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18193326Sed * 19193326Sed * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20198092Srdivacky * or visit www.oracle.com if you need additional information or have any 21193326Sed * questions. 22193326Sed */ 23193326Sed 24218893Sdim/* 25198092Srdivacky * @test 26210299Sed * @bug 6233345 6381699 6381702 6381705 6381706 27193326Sed * @summary Encode many char sequences in many ways 28193326Sed * @run main/timeout=1200 FindEncoderBugs 29193326Sed * @author Martin Buchholz 30193326Sed * @key randomness 31204643Srdivacky */ 32204643Srdivacky 33204643Srdivackyimport java.util.*; 34204643Srdivackyimport java.util.regex.*; 35204643Srdivackyimport java.nio.*; 36198092Srdivackyimport java.nio.charset.*; 37204643Srdivacky 38204643Srdivackypublic class FindEncoderBugs { 39223017Sdim 40204643Srdivacky static boolean isBroken(String csn) { 41198092Srdivacky if (csn.equals("x-COMPOUND_TEXT")) return true; 42204643Srdivacky return false; 43204643Srdivacky } 44204643Srdivacky 45204643Srdivacky static <T extends Comparable<? super T>> List<T> sort(Collection<T> c) { 46204643Srdivacky List<T> list = new ArrayList<T>(c); 47204643Srdivacky Collections.sort(list); 48204643Srdivacky return list; 49204643Srdivacky } 50204643Srdivacky 51204643Srdivacky static class TooManyFailures extends RuntimeException { 52218893Sdim private static final long serialVersionUID = 0L; 53218893Sdim } 54204643Srdivacky 55204643Srdivacky static String string(byte[] a) { 56218893Sdim final StringBuilder sb = new StringBuilder(); 57218893Sdim for (byte b : a) { 58204643Srdivacky if (sb.length() != 0) sb.append(' '); 59204643Srdivacky sb.append(String.format("%02x", b & 0xff)); 60204643Srdivacky } 61204643Srdivacky return sb.toString(); 62204643Srdivacky } 63204643Srdivacky 64204643Srdivacky static String string(char[] a) { 65204643Srdivacky final StringBuilder sb = new StringBuilder(); 66204643Srdivacky for (char c : a) { 67204643Srdivacky if (sb.length() != 0) sb.append(' '); 68204643Srdivacky sb.append(String.format("\\u%04x", (int) c)); 69204643Srdivacky } 70204643Srdivacky return sb.toString(); 71204643Srdivacky } 72204643Srdivacky 73204643Srdivacky static class Reporter { 74204643Srdivacky // Some machinery to make sure only a small number of errors 75204643Srdivacky // that are "too similar" are reported. 76204643Srdivacky static class Counts extends HashMap<String, Long> { 77204643Srdivacky private static final long serialVersionUID = -1; 78204643Srdivacky long inc(String signature) { 79204643Srdivacky Long count = get(signature); 80204643Srdivacky if (count == null) count = 0L; 81204643Srdivacky put(signature, count+1); 82204643Srdivacky return count+1; 83204643Srdivacky } 84204643Srdivacky } 85204643Srdivacky 86204643Srdivacky final Counts failureCounts = new Counts(); 87204643Srdivacky final static long maxFailures = 2; 88210299Sed 89210299Sed final static Pattern hideBytes = Pattern.compile("\"[0-9a-f ]+\""); 90204643Srdivacky final static Pattern hideChars = Pattern.compile("\\\\u[0-9a-f]{4}"); 91204643Srdivacky 92204643Srdivacky boolean bug(String format, Object... args) { 93204643Srdivacky String signature = String.format(format, args); 94218893Sdim // signature = hideBytes.matcher(signature).replaceAll("\"??\""); 95204643Srdivacky // signature = hideChars.matcher(signature).replaceAll("\\u????"); 96204643Srdivacky failed++; 97204643Srdivacky if (failureCounts.inc(signature) <= maxFailures) { 98204643Srdivacky System.out.printf(format, args); 99193326Sed System.out.println(); 100193326Sed return true; 101204643Srdivacky } 102204643Srdivacky return false; 103204643Srdivacky } 104204643Srdivacky 105204643Srdivacky void summarize() { 106204643Srdivacky for (String key : sort(failureCounts.keySet())) 107204643Srdivacky System.out.printf("-----%n%s%nfailures=%d%n", 108204643Srdivacky key, failureCounts.get(key)); 109204643Srdivacky } 110204643Srdivacky } 111204643Srdivacky 112204643Srdivacky static final Reporter reporter = new Reporter(); 113204643Srdivacky 114204643Srdivacky static class Result { 115204643Srdivacky final int limit; 116204643Srdivacky final int ipos; 117204643Srdivacky final boolean direct; 118204643Srdivacky final char[] ia; 119204643Srdivacky final byte[] oa; 120204643Srdivacky final CoderResult cr; 121204643Srdivacky 122204643Srdivacky private static byte[] toByteArray(ByteBuffer bb) { 123204643Srdivacky byte[] bytes = new byte[bb.position()]; 124204643Srdivacky for (int i = 0; i < bytes.length; i++) 125204643Srdivacky bytes[i] = bb.get(i); 126204643Srdivacky return bytes; 127204643Srdivacky } 128204643Srdivacky 129204643Srdivacky Result(CharBuffer ib, ByteBuffer ob, CoderResult cr) { 130204643Srdivacky ipos = ib.position(); 131204643Srdivacky ia = toArray(ib); 132204643Srdivacky oa = toArray(ob); 133204643Srdivacky direct = ib.isDirect(); 134204793Srdivacky limit = ob.limit(); 135204793Srdivacky this.cr = cr; 136204793Srdivacky } 137204793Srdivacky 138204793Srdivacky static char[] toArray(CharBuffer b) { 139204793Srdivacky int pos = b.position(); 140204643Srdivacky char[] a = new char[b.limit()]; 141204643Srdivacky b.position(0); 142204643Srdivacky b.get(a); 143204643Srdivacky b.position(pos); 144204643Srdivacky return a; 145204643Srdivacky } 146204643Srdivacky 147204643Srdivacky static byte[] toArray(ByteBuffer b) { 148204643Srdivacky byte[] a = new byte[b.position()]; 149204643Srdivacky b.position(0); 150204643Srdivacky b.get(a); 151204643Srdivacky return a; 152204643Srdivacky } 153204643Srdivacky 154204643Srdivacky static boolean eq(Result x, Result y) { 155204643Srdivacky return x == y || 156204643Srdivacky (x != null && y != null && 157210299Sed (Arrays.equals(x.oa, y.oa) && 158205408Srdivacky x.ipos == y.ipos && 159204643Srdivacky x.cr == y.cr)); 160204643Srdivacky } 161204643Srdivacky 162204643Srdivacky public String toString() { 163205408Srdivacky return String.format("\"%s\"[%d/%d] => %s \"%s\"[%d/%d]%s", 164204643Srdivacky string(ia), ipos, ia.length, 165204643Srdivacky cr, string(oa), oa.length, limit, 166205408Srdivacky (direct ? " (direct)" : "")); 167210299Sed } 168204643Srdivacky } 169204643Srdivacky 170204643Srdivacky static class CharsetTester { 171218893Sdim private final Charset cs; 172204643Srdivacky private final boolean hasBom; 173204643Srdivacky private static final int maxFailures = 5; 174204643Srdivacky private int failures = 0; 175204643Srdivacky // private static final long maxCharsetFailures = Long.MAX_VALUE; 176198092Srdivacky private static final long maxCharsetFailures = 10000L; 177204643Srdivacky private final long failed0 = failed; 178204643Srdivacky 179223017Sdim // legend: r=regular d=direct In=Input Ou=Output 180223017Sdim static final int maxBufSize = 20; 181223017Sdim static final CharBuffer[] rInBuffers = new CharBuffer[maxBufSize]; 182223017Sdim static final CharBuffer[] dInBuffers = new CharBuffer[maxBufSize]; 183204643Srdivacky 184204643Srdivacky static final ByteBuffer[] rOuBuffers = new ByteBuffer[maxBufSize]; 185204643Srdivacky static final ByteBuffer[] dOuBuffers = new ByteBuffer[maxBufSize]; 186193326Sed static { 187193326Sed for (int i = 0; i < maxBufSize; i++) { 188193326Sed rInBuffers[i] = CharBuffer.allocate(i); 189221345Sdim dInBuffers[i] = ByteBuffer.allocateDirect(i*2).asCharBuffer(); 190221345Sdim rOuBuffers[i] = ByteBuffer.allocate(i); 191204643Srdivacky dOuBuffers[i] = ByteBuffer.allocateDirect(i); 192204643Srdivacky } 193221345Sdim } 194221345Sdim 195221345Sdim CharsetTester(Charset cs) { 196221345Sdim this.cs = cs; 197204643Srdivacky this.hasBom = 198198092Srdivacky cs.name().matches(".*BOM.*") || 199221345Sdim cs.name().equals("UTF-16"); 200198092Srdivacky } 201221345Sdim 202221345Sdim static boolean bug(String format, Object... args) { 203221345Sdim return reporter.bug(format, args); 204198092Srdivacky } 205221345Sdim 206221345Sdim static boolean hasBom(byte[] a) { 207221345Sdim switch (a.length) { 208221345Sdim case 2: case 4: 209193326Sed int sum = 0; 210193326Sed for (byte x : a) 211204643Srdivacky sum += x; 212221345Sdim return sum == (byte) 0xfe + (byte) 0xff; 213221345Sdim default: return false; 214221345Sdim } 215221345Sdim } 216210299Sed 217221345Sdim void testSurrogates() { 218221345Sdim int failures = 0; 219221345Sdim for (int i = 0; i < 10; i++) { 220204643Srdivacky Result r = test(new char[] { randomHighSurrogate() }); 221221345Sdim if (r == null) break; 222221345Sdim if (! (r.cr.isUnderflow() && 223221345Sdim r.ipos == 0)) 224221345Sdim bug("Lone high surrogate not UNDERFLOW: %s %s", 225221345Sdim cs, r); 226221345Sdim } 227218893Sdim for (int i = 0; i < 10; i++) { 228193326Sed Result r = test(new char[] { randomLowSurrogate() }); 229193326Sed if (r == null) break; 230193326Sed if (! (r.cr.isMalformed() && r.cr.length() == 1)) 231204643Srdivacky bug("Lone low surrogate not MALFORMED[1]: %s %s", 232204643Srdivacky cs, r); 233204643Srdivacky } 234199482Srdivacky char[] chars = new char[2]; 235200583Srdivacky for (int i = 0; i < 10; i++) { 236204643Srdivacky chars[0] = randomLowSurrogate(); // Always illegal 237204643Srdivacky chars[1] = randomChar(); 238204643Srdivacky Result r = test(chars); 239223017Sdim if (r == null) break; 240223017Sdim if (! (r.cr.isMalformed() && 241223017Sdim r.cr.length() == 1 && 242223017Sdim (r.ipos == 0 || (hasBom && hasBom(r.oa))))) { 243223017Sdim if (failures++ > 5) return; 244204643Srdivacky bug("Unpaired low surrogate not MALFORMED[1]: %s %s", 245204643Srdivacky cs, r); 246204643Srdivacky } 247200583Srdivacky } 248193326Sed for (int i = 0; i < 10; i++) { 249193326Sed chars[0] = randomHighSurrogate(); 250221345Sdim do { 251221345Sdim chars[1] = randomChar(); 252204643Srdivacky } while (Character.isLowSurrogate(chars[1])); 253204643Srdivacky Result r = test(chars); 254221345Sdim if (r == null) break; 255221345Sdim if (! (r.cr.isMalformed() && 256221345Sdim r.cr.length() == 1 && 257221345Sdim (r.ipos == 0 || (hasBom && hasBom(r.oa))))) { 258204643Srdivacky if (failures++ > 5) return; 259198092Srdivacky bug("Unpaired high surrogate not MALFORMED[1]: %s %s", 260204643Srdivacky cs, r); 261204643Srdivacky } 262204643Srdivacky } 263204643Srdivacky for (int i = 0; i < 1000; i++) { 264221345Sdim chars[0] = randomHighSurrogate(); 265204643Srdivacky chars[1] = randomLowSurrogate(); 266204643Srdivacky Result r = test(chars); 267221345Sdim if (r == null) break; 268204643Srdivacky if (! ((r.cr.isUnmappable() && 269221345Sdim r.cr.length() == 2 && 270221345Sdim r.oa.length == 0) 271221345Sdim || 272198092Srdivacky (r.cr.isUnderflow() && 273221345Sdim r.oa.length > 0 && 274221345Sdim r.ipos == 2))) { 275221345Sdim if (failures++ > 5) return; 276221345Sdim bug("Legal supplementary character bug: %s %s", 277193326Sed cs, r); 278193326Sed } 279204643Srdivacky } 280221345Sdim } 281221345Sdim 282221345Sdim// if (! (r.cr.isMalformed() && 283221345Sdim// r.cr.length() == 1 && 284210299Sed// (rob.position() == 0 || hasBom(rob)))) { 285221345Sdim// if (failures++ > 5) return; 286221345Sdim// bug("Unpaired surrogate not malformed: %s %s", 287221345Sdim// cs, r); 288204643Srdivacky// } 289221345Sdim// } 290198092Srdivacky 291221345Sdim// dib.clear(); dib.put(chars); dib.flip(); 292221345Sdim// rib.position(0); 293221345Sdim// rob.clear(); rob.limit(lim); 294221345Sdim// for (CharBuffer ib : new CharBuffer[] { rib, dib }) { 295218893Sdim// Result r = recode(ib, rob); 296193326Sed// if (! (r.cr.isMalformed() && 297193326Sed// r.cr.length() == 1 && 298207619Srdivacky// (rob.position() == 0 || hasBom(rob)))) { 299199482Srdivacky// if (failures++ > 5) return; 300218893Sdim// bug("Unpaired surrogate not malformed: %s %s", 301199482Srdivacky// cs, r); 302218893Sdim// } 303199482Srdivacky// } 304207619Srdivacky// //} 305199482Srdivacky// for (int i = 0; i < 10000; i++) { 306199482Srdivacky// chars[0] = randomHighSurrogate(); 307199482Srdivacky// chars[1] = randomLowSurrogate(); 308198092Srdivacky// dib.clear(); dib.put(chars); dib.flip(); 309199482Srdivacky// rib.position(0); 310198092Srdivacky// rob.clear(); rob.limit(lim); 311199482Srdivacky// for (CharBuffer ib : new CharBuffer[] { rib, dib }) { 312207619Srdivacky// Result r = recode(ib, rob); 313198092Srdivacky// if (! ((r.cr.isUnmappable() && 314206084Srdivacky// r.cr.length() == 2 && 315198092Srdivacky// rob.position() == 0) 316198092Srdivacky// || 317218893Sdim// (r.cr.isUnderflow() && 318218893Sdim// rob.position() > 0 && 319218893Sdim// ib.position() == 2))) { 320199482Srdivacky// if (failures++ > 5) return; 321218893Sdim// bug("Legal supplementary character bug: %s %s", 322218893Sdim// cs, r); 323218893Sdim// } 324218893Sdim// } 325218893Sdim// } 326218893Sdim// } 327218893Sdim// } 328218893Sdim 329218893Sdim Result recode(CharBuffer ib, ByteBuffer ob) { 330218893Sdim try { 331218893Sdim byte canary = 22; 332218893Sdim ib.clear(); // Prepare to read 333218893Sdim ob.clear(); // Prepare to write 334218893Sdim for (int i = 0; i < ob.limit(); i++) 335218893Sdim ob.put(i, canary); 336218893Sdim CharsetEncoder coder = cs.newEncoder(); 337218893Sdim CoderResult cr = coder.encode(ib, ob, false); 338218893Sdim equal(ib.limit(), ib.capacity()); 339218893Sdim equal(ob.limit(), ob.capacity()); 340218893Sdim Result r = new Result(ib, ob, cr); 341218893Sdim if (cr.isError()) 342218893Sdim check(cr.length() > 0); 343218893Sdim if (cr.isOverflow() && ob.remaining() > 10) 344221345Sdim bug("OVERFLOW, but there's lots of room: %s %s", 345218893Sdim cs, r); 346218893Sdim// if (cr.isOverflow() && ib.remaining() == 0 && ! hasBom) 347218893Sdim// bug("OVERFLOW, yet remaining() == 0: %s %s", 348218893Sdim// cs, r); 349199482Srdivacky if (cr.isError() && ib.remaining() < cr.length()) 350208600Srdivacky bug("remaining() < CoderResult.length(): %s %s", 351218893Sdim cs, r); 352218893Sdim// if (ib.position() == 0 353218893Sdim// && ob.position() > 0 354218893Sdim// && ! hasBom(r.oa)) 355218893Sdim// bug("output only if input consumed: %s %s", 356218893Sdim// cs, r); 357218893Sdim CoderResult cr2 = coder.encode(ib, ob, false); 358218893Sdim if (ib.position() != r.ipos || 359218893Sdim ob.position() != r.oa.length || 360218893Sdim cr != cr2) 361218893Sdim bug("Coding operation not idempotent: %s%n %s%n %s", 362218893Sdim cs, r, new Result(ib, ob, cr2)); 363218893Sdim if (ob.position() < ob.limit() && 364218893Sdim ob.get(ob.position()) != canary) 365218893Sdim bug("Buffer overrun: %s %s %s", 366218893Sdim cs, r, ob.get(ob.position())); 367218893Sdim return r; 368218893Sdim } catch (Throwable t) { 369218893Sdim if (bug("Unexpected exception: %s %s %s", 370218893Sdim cs, t.getClass().getSimpleName(), 371212904Sdim new Result(ib, ob, null))) 372218893Sdim t.printStackTrace(); 373218893Sdim return null; 374218893Sdim } 375218893Sdim } 376218893Sdim 377218893Sdim Result recode2(char[] ia, int n) { 378218893Sdim int len = ia.length; 379221345Sdim CharBuffer rib = CharBuffer.wrap(ia); 380218893Sdim CharBuffer dib = dInBuffers[len]; 381218893Sdim dib.clear(); dib.put(ia); dib.clear(); 382218893Sdim ByteBuffer rob = rOuBuffers[n]; 383218893Sdim ByteBuffer dob = dOuBuffers[n]; 384218893Sdim equal(rob.limit(), n); 385218893Sdim equal(dob.limit(), n); 386212904Sdim check(dib.isDirect()); 387212904Sdim check(dob.isDirect()); 388212904Sdim Result r1 = recode(rib, rob); 389218893Sdim Result r2 = recode(dib, dob); 390218893Sdim if (r1 != null && r2 != null && ! Result.eq(r1, r2)) 391218893Sdim bug("Results differ for direct buffers: %s%n %s%n %s", 392218893Sdim cs, r1, r2); 393218893Sdim return r1; 394212904Sdim } 395218893Sdim 396212904Sdim Result test(char[] ia) { 397212904Sdim if (failed - failed0 >= maxCharsetFailures) 398 throw new TooManyFailures(); 399 400 Result roomy = recode2(ia, maxBufSize - 1); 401 if (roomy == null) return roomy; 402 int olen = roomy.oa.length; 403 if (olen > 0) { 404 if (roomy.ipos == roomy.ia.length) { 405 Result perfectFit = recode2(ia, olen); 406 if (! Result.eq(roomy, perfectFit)) 407 bug("Results differ: %s%n %s%n %s", 408 cs, roomy, perfectFit); 409 } 410 for (int i = 0; i < olen; i++) { 411 Result claustrophobic = recode2(ia, i); 412 if (claustrophobic == null) return roomy; 413 if (roomy.cr.isUnderflow() && 414 ! claustrophobic.cr.isOverflow()) 415 bug("Expected OVERFLOW: %s%n %s%n %s", 416 cs, roomy, claustrophobic); 417 } 418 } 419 return roomy; 420 } 421 422 void testExhaustively(char[] prefix, int n) { 423 int len = prefix.length; 424 char[] ia = Arrays.copyOf(prefix, len + 1); 425 for (int i = 0; i < 0x10000; i++) { 426 ia[len] = (char) i; 427 if (n == 1) 428 test(ia); 429 else 430 testExhaustively(ia, n - 1); 431 } 432 } 433 434 void testRandomly(char[] prefix, int n) { 435 int len = prefix.length; 436 char[] ia = Arrays.copyOf(prefix, len + n); 437 for (int i = 0; i < 10000; i++) { 438 for (int j = 0; j < n; j++) 439 ia[len + j] = randomChar(); 440 test(ia); 441 } 442 } 443 444 void testPrefix(char[] prefix) { 445 if (prefix.length > 0) 446 System.out.printf("Testing prefix %s%n", string(prefix)); 447 448 test(prefix); 449 450 testExhaustively(prefix, 1); 451 // Can you spare a year of CPU time? 452 //testExhaustively(prefix, 2); 453 454 testRandomly(prefix, 2); 455 testRandomly(prefix, 3); 456 } 457 } 458 459 private final static Random rnd = new Random(); 460 private static char randomChar() { 461 return (char) rnd.nextInt(Character.MAX_VALUE); 462 } 463 private static char randomHighSurrogate() { 464 return (char) (Character.MIN_HIGH_SURROGATE + rnd.nextInt(1024)); 465 } 466 private static char randomLowSurrogate() { 467 return (char) (Character.MIN_LOW_SURROGATE + rnd.nextInt(1024)); 468 } 469 470 private static void testCharset(Charset cs) throws Throwable { 471 if (! cs.canEncode()) 472 return; 473 474 final String csn = cs.name(); 475 476 if (isBroken(csn)) { 477 System.out.printf("Skipping possibly broken charset %s%n", csn); 478 return; 479 } 480 System.out.println(csn); 481 482 CharsetTester tester = new CharsetTester(cs); 483 484 tester.testSurrogates(); 485 486 tester.testPrefix(new char[] {}); 487 488 if (csn.equals("x-ISCII91")) { 489 System.out.println("More ISCII testing..."); 490 new CharsetTester(cs).testPrefix(new char[]{'\u094d'}); // Halant 491 new CharsetTester(cs).testPrefix(new char[]{'\u093c'}); // Nukta 492 } 493 } 494 495 private static void realMain(String[] args) { 496 for (Charset cs : sort(Charset.availableCharsets().values())) { 497 try { 498 testCharset(cs); 499 } catch (TooManyFailures e) { 500 System.out.printf("Too many failures for %s%n", cs); 501 } catch (Throwable t) { 502 unexpected(t); 503 } 504 } 505 reporter.summarize(); 506 } 507 508 //--------------------- Infrastructure --------------------------- 509 static volatile long passed = 0, failed = 0; 510 static void pass() {passed++;} 511 static void fail() {failed++; Thread.dumpStack();} 512 static void fail(String format, Object... args) { 513 System.out.println(String.format(format, args)); failed++;} 514 static void fail(String msg) {System.out.println(msg); fail();} 515 static void unexpected(Throwable t) {failed++; t.printStackTrace();} 516 static void check(boolean cond) {if (cond) pass(); else fail();} 517 static void equal(Object x, Object y) { 518 if (x == null ? y == null : x.equals(y)) pass(); 519 else fail(x + " not equal to " + y);} 520 public static void main(String[] args) throws Throwable { 521 try {realMain(args);} catch (Throwable t) {unexpected(t);} 522 System.out.printf("%nPassed = %d, failed = %d%n%n", passed, failed); 523 if (failed > 0) throw new AssertionError("Some tests failed");} 524} 525