1/* 2 * Copyright (c) 2008, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 */ 23 24/* 25 * @test 26 * @bug 4831163 5053096 5056440 8022224 27 * @summary NIO charset basic verification of JISAutodetect decoder 28 * @modules jdk.charsets 29 * @author Martin Buchholz 30 */ 31 32import java.io.*; 33import java.nio.ByteBuffer; 34import java.nio.CharBuffer; 35import java.nio.charset.Charset; 36import java.nio.charset.CharsetDecoder; 37import java.nio.charset.CoderResult; 38import static java.lang.System.*; 39 40public class NIOJISAutoDetectTest { 41 private static int failures = 0; 42 43 private static void fail(String failureMsg) { 44 System.out.println(failureMsg); 45 failures++; 46 } 47 48 private static void check(boolean cond, String msg) { 49 if (!cond) { 50 fail("test failed: " + msg); 51 new Exception().printStackTrace(); 52 } 53 } 54 55 private static String SJISName() throws Exception { 56 return detectingCharset(new byte[] {(byte)0xbb, (byte)0xdd, 57 (byte)0xcf, (byte)0xb2}); 58 } 59 60 private static String EUCJName() throws Exception { 61 return detectingCharset(new byte[] {(byte)0xa4, (byte)0xd2, 62 (byte)0xa4, (byte)0xe9}); 63 } 64 65 private static String detectingCharset(byte[] bytes) throws Exception { 66 //---------------------------------------------------------------- 67 // Test special public methods of CharsetDecoder while we're here 68 //---------------------------------------------------------------- 69 CharsetDecoder cd = Charset.forName("JISAutodetect").newDecoder(); 70 check(cd.isAutoDetecting(), "isAutodecting()"); 71 check(! cd.isCharsetDetected(), "isCharsetDetected"); 72 cd.decode(ByteBuffer.wrap(new byte[] {(byte)'A'})); 73 check(! cd.isCharsetDetected(), "isCharsetDetected"); 74 try { 75 cd.detectedCharset(); 76 fail("no IllegalStateException"); 77 } catch (IllegalStateException e) {} 78 cd.decode(ByteBuffer.wrap(bytes)); 79 check(cd.isCharsetDetected(), "isCharsetDetected"); 80 Charset cs = cd.detectedCharset(); 81 check(cs != null, "cs != null"); 82 check(! cs.newDecoder().isAutoDetecting(), "isAutodetecting()"); 83 return cs.name(); 84 } 85 86 public static void main(String[] argv) throws Exception { 87 //---------------------------------------------------------------- 88 // Used to throw BufferOverflowException 89 //---------------------------------------------------------------- 90 out.println(new String(new byte[] {0x61}, "JISAutoDetect")); 91 92 //---------------------------------------------------------------- 93 // InputStreamReader(...JISAutoDetect) used to infloop 94 //---------------------------------------------------------------- 95 { 96 byte[] bytes = "ABCD\n".getBytes(); 97 ByteArrayInputStream bais = new ByteArrayInputStream(bytes); 98 InputStreamReader isr = new InputStreamReader(bais, "JISAutoDetect"); 99 BufferedReader reader = new BufferedReader(isr); 100 check (reader.readLine().equals("ABCD"), "first read gets text"); 101 // used to return "ABCD" on second and subsequent reads 102 check (reader.readLine() == null, "second read gets null"); 103 } 104 105 //---------------------------------------------------------------- 106 // Check all Japanese chars for sanity 107 //---------------------------------------------------------------- 108 String SJIS = SJISName(); 109 String EUCJ = EUCJName(); 110 out.printf("SJIS charset is %s%n", SJIS); 111 out.printf("EUCJ charset is %s%n", EUCJ); 112 113 int cnt2022 = 0; 114 int cnteucj = 0; 115 int cntsjis = 0; 116 int cntBAD = 0; 117 for (char c = '\u0000'; c < '\uffff'; c++) { 118 if (c == '\u001b' || // ESC 119 c == '\u2014') // Em-Dash? 120 continue; 121 String s = new String (new char[] {c}); 122 123 //---------------------------------------------------------------- 124 // JISAutoDetect can handle all chars that EUC-JP can, 125 // unless there is an ambiguity with SJIS. 126 //---------------------------------------------------------------- 127 byte[] beucj = s.getBytes(EUCJ); 128 String seucj = new String(beucj, EUCJ); 129 if (seucj.equals(s)) { 130 cnteucj++; 131 String sauto = new String(beucj, "JISAutoDetect"); 132 133 if (! sauto.equals(seucj)) { 134 cntBAD++; 135 String ssjis = new String(beucj, SJIS); 136 if (! sauto.equals(ssjis)) { 137 fail("Autodetection agrees with neither EUC nor SJIS"); 138 } 139 } 140 } else 141 continue; // Optimization 142 143 //---------------------------------------------------------------- 144 // JISAutoDetect can handle all chars that ISO-2022-JP can. 145 //---------------------------------------------------------------- 146 byte[] b2022 = s.getBytes("ISO-2022-JP"); 147 if (new String(b2022, "ISO-2022-JP").equals(s)) { 148 cnt2022++; 149 check(new String(b2022,"JISAutoDetect").equals(s), 150 "ISO2022 autodetection"); 151 } 152 153 //---------------------------------------------------------------- 154 // JISAutoDetect can handle almost all chars that SJIS can. 155 //---------------------------------------------------------------- 156 byte[] bsjis = s.getBytes(SJIS); 157 if (new String(bsjis, SJIS).equals(s)) { 158 cntsjis++; 159 check(new String(bsjis,"JISAutoDetect").equals(s), 160 "SJIS autodetection"); 161 } 162 } 163 out.printf("There are %d ISO-2022-JP-encodable characters.%n", cnt2022); 164 out.printf("There are %d SJIS-encodable characters.%n", cntsjis); 165 out.printf("There are %d EUC-JP-encodable characters.%n", cnteucj); 166 out.printf("There are %d characters that are " + 167 "misdetected as SJIS after being EUC-encoded.%n", cntBAD); 168 169 170 //---------------------------------------------------------------- 171 // tests for specific byte sequences 172 //---------------------------------------------------------------- 173 test("ISO-2022-JP", new byte[] {'A', 'B', 'C'}); 174 test("EUC-JP", new byte[] {'A', 'B', 'C'}); 175 test("SJIS", new byte[] {'A', 'B', 'C'}); 176 177 test("SJIS", 178 new byte[] { 'C', 'o', 'p', 'y', 'r', 'i', 'g', 'h', 't', 179 ' ', (byte)0xa9, ' ', '1', '9', '9', '8' }); 180 181 test("SJIS", 182 new byte[] { (byte)0xbb, (byte)0xdd, (byte)0xcf, (byte)0xb2, 183 (byte)0xb8, (byte)0xdb, (byte)0xbc, (byte)0xbd, 184 (byte)0xc3, (byte)0xd1, (byte)0xbd, (byte)0xde, 185 (byte)0x82, (byte)0xc5, (byte)0x82, (byte)0xb7 }); 186 187 test("EUC-JP", 188 new byte[] { (byte)0xa4, (byte)0xd2, (byte)0xa4, (byte)0xe9, 189 (byte)0xa4, (byte)0xac, (byte)0xa4, (byte)0xca }); 190 191 test("SJIS", 192 new byte[] { (byte)0xbb, (byte)0xdd, (byte)0xcf, (byte)0xb2, 193 (byte)0xb8, (byte)0xdb, (byte)0xbc, (byte)0xbd, 194 (byte)0xc3, (byte)0xd1, (byte)0xbd, (byte)0xde}); 195 196 test("SJIS", 197 new byte[] { (byte)0xbb, (byte)0xdd, (byte)0xcf, (byte)0xb2, 198 (byte)0xb8, (byte)0xdb, (byte)0xbc, (byte)0xbd, 199 (byte)0xc3, (byte)0xd1, (byte)0xbd }); 200 201 test("SJIS", 202 new byte[] { (byte)0x8f, (byte)0xa1, (byte)0xaa }); 203 204 test("EUC-JP", 205 new byte[] { (byte)0x8f, (byte)0xc5, (byte)0xe0, (byte)0x20}); 206 207 test("EUC-JP", 208 new byte[] { (byte)0xbb, (byte)0xdd, (byte)0xcf, (byte)0xb2, 209 (byte)0xb8, (byte)0xdb, (byte)0xbc, (byte)0xbd, 210 (byte)0xc3, (byte)0xd1, (byte)0xbd, (byte)0xde, 211 (byte)0xa4, (byte)0xc7, (byte)0xa4, (byte)0xb9 }); 212 213 test("ISO-2022-JP", 214 new byte[] { 0x1b, '$', 'B', '#', '4', '$', '5', 0x1b, '(', 'B' }); 215 216 217 //---------------------------------------------------------------- 218 // Check handling of ambiguous end-of-input in middle of first char 219 //---------------------------------------------------------------- 220 { 221 CharsetDecoder dc = Charset.forName("x-JISAutoDetect").newDecoder(); 222 ByteBuffer bb = ByteBuffer.allocate(128); 223 CharBuffer cb = CharBuffer.allocate(128); 224 bb.put((byte)'A').put((byte)0x8f); 225 bb.flip(); 226 CoderResult res = dc.decode(bb,cb,false); 227 check(res.isUnderflow(), "isUnderflow"); 228 check(bb.position() == 1, "bb.position()"); 229 check(cb.position() == 1, "cb.position()"); 230 res = dc.decode(bb,cb,false); 231 check(res.isUnderflow(), "isUnderflow"); 232 check(bb.position() == 1, "bb.position()"); 233 check(cb.position() == 1, "cb.position()"); 234 bb.compact(); 235 bb.put((byte)0xa1); 236 bb.flip(); 237 res = dc.decode(bb,cb,true); 238 check(res.isUnderflow(), "isUnderflow"); 239 check(bb.position() == 2, "bb.position()"); 240 check(cb.position() == 2, "cb.position()"); 241 } 242 243 // test #8022224 244 Charset cs = Charset.forName("x-JISAutoDetect"); 245 ByteBuffer bb = ByteBuffer.wrap(new byte[] { 'a', 0x1b, 0x24, 0x40 }); 246 CharBuffer cb = CharBuffer.wrap(new char[10]); 247 CoderResult cr = cs.newDecoder().decode(bb, cb, false); 248 bb.rewind(); 249 cb.clear().limit(1); 250 check(cr == cs.newDecoder().decode(bb, cb, false), "#8022224"); 251 252 if (failures > 0) 253 throw new RuntimeException(failures + " tests failed"); 254 } 255 256 static void checkCoderResult(CoderResult result) { 257 check(result.isUnderflow(), 258 "Unexpected coder result: " + result); 259 } 260 261 static void test(String expectedCharset, byte[] input) throws Exception { 262 Charset cs = Charset.forName("x-JISAutoDetect"); 263 CharsetDecoder autoDetect = cs.newDecoder(); 264 265 Charset cs2 = Charset.forName(expectedCharset); 266 CharsetDecoder decoder = cs2.newDecoder(); 267 268 ByteBuffer bb = ByteBuffer.allocate(128); 269 CharBuffer charOutput = CharBuffer.allocate(128); 270 CharBuffer charExpected = CharBuffer.allocate(128); 271 272 bb.put(input); 273 bb.flip(); 274 bb.mark(); 275 276 CoderResult result = autoDetect.decode(bb, charOutput, true); 277 checkCoderResult(result); 278 charOutput.flip(); 279 String actual = charOutput.toString(); 280 281 bb.reset(); 282 283 result = decoder.decode(bb, charExpected, true); 284 checkCoderResult(result); 285 charExpected.flip(); 286 String expected = charExpected.toString(); 287 288 check(actual.equals(expected), 289 String.format("actual=%s expected=%s", actual, expected)); 290 } 291} 292