1/*
2 * Copyright (c) 2008, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 */
23
24/*
25 * @test
26 * @bug 4831163 5053096 5056440 8022224
27 * @summary NIO charset basic verification of JISAutodetect decoder
28 * @modules jdk.charsets
29 * @author Martin Buchholz
30 */
31
32import java.io.*;
33import java.nio.ByteBuffer;
34import java.nio.CharBuffer;
35import java.nio.charset.Charset;
36import java.nio.charset.CharsetDecoder;
37import java.nio.charset.CoderResult;
38import static java.lang.System.*;
39
40public class NIOJISAutoDetectTest {
41    private static int failures = 0;
42
43    private static void fail(String failureMsg) {
44        System.out.println(failureMsg);
45        failures++;
46    }
47
48    private static void check(boolean cond, String msg) {
49        if (!cond) {
50            fail("test failed: " + msg);
51            new Exception().printStackTrace();
52        }
53    }
54
55    private static String SJISName() throws Exception {
56        return detectingCharset(new byte[] {(byte)0xbb, (byte)0xdd,
57                                            (byte)0xcf, (byte)0xb2});
58    }
59
60    private static String EUCJName() throws Exception {
61        return detectingCharset(new byte[] {(byte)0xa4, (byte)0xd2,
62                                            (byte)0xa4, (byte)0xe9});
63    }
64
65    private static String detectingCharset(byte[] bytes) throws Exception {
66        //----------------------------------------------------------------
67        // Test special public methods of CharsetDecoder while we're here
68        //----------------------------------------------------------------
69        CharsetDecoder cd = Charset.forName("JISAutodetect").newDecoder();
70        check(cd.isAutoDetecting(), "isAutodecting()");
71        check(! cd.isCharsetDetected(), "isCharsetDetected");
72        cd.decode(ByteBuffer.wrap(new byte[] {(byte)'A'}));
73        check(! cd.isCharsetDetected(), "isCharsetDetected");
74        try {
75            cd.detectedCharset();
76            fail("no IllegalStateException");
77        } catch (IllegalStateException e) {}
78        cd.decode(ByteBuffer.wrap(bytes));
79        check(cd.isCharsetDetected(), "isCharsetDetected");
80        Charset cs = cd.detectedCharset();
81        check(cs != null, "cs != null");
82        check(! cs.newDecoder().isAutoDetecting(), "isAutodetecting()");
83        return cs.name();
84    }
85
86    public static void main(String[] argv) throws Exception {
87        //----------------------------------------------------------------
88        // Used to throw BufferOverflowException
89        //----------------------------------------------------------------
90        out.println(new String(new byte[] {0x61}, "JISAutoDetect"));
91
92        //----------------------------------------------------------------
93        // InputStreamReader(...JISAutoDetect) used to infloop
94        //----------------------------------------------------------------
95        {
96            byte[] bytes = "ABCD\n".getBytes();
97            ByteArrayInputStream bais = new  ByteArrayInputStream(bytes);
98            InputStreamReader isr = new InputStreamReader(bais, "JISAutoDetect");
99            BufferedReader reader = new BufferedReader(isr);
100            check (reader.readLine().equals("ABCD"), "first read gets text");
101            // used to return "ABCD" on second and subsequent reads
102            check (reader.readLine() == null, "second read gets null");
103        }
104
105        //----------------------------------------------------------------
106        // Check all Japanese chars for sanity
107        //----------------------------------------------------------------
108        String SJIS = SJISName();
109        String EUCJ = EUCJName();
110        out.printf("SJIS charset is %s%n", SJIS);
111        out.printf("EUCJ charset is %s%n", EUCJ);
112
113        int cnt2022 = 0;
114        int cnteucj = 0;
115        int cntsjis = 0;
116        int cntBAD  = 0;
117        for (char c = '\u0000'; c < '\uffff'; c++) {
118            if (c == '\u001b' || // ESC
119                c == '\u2014')   // Em-Dash?
120                continue;
121            String s = new String (new char[] {c});
122
123            //----------------------------------------------------------------
124            // JISAutoDetect can handle all chars that EUC-JP can,
125            // unless there is an ambiguity with SJIS.
126            //----------------------------------------------------------------
127            byte[] beucj = s.getBytes(EUCJ);
128            String seucj = new String(beucj, EUCJ);
129            if (seucj.equals(s)) {
130                cnteucj++;
131                String sauto = new String(beucj, "JISAutoDetect");
132
133                if (! sauto.equals(seucj)) {
134                    cntBAD++;
135                    String ssjis = new String(beucj, SJIS);
136                    if (! sauto.equals(ssjis)) {
137                        fail("Autodetection agrees with neither EUC nor SJIS");
138                    }
139                }
140            } else
141                continue; // Optimization
142
143            //----------------------------------------------------------------
144            // JISAutoDetect can handle all chars that ISO-2022-JP can.
145            //----------------------------------------------------------------
146            byte[] b2022 = s.getBytes("ISO-2022-JP");
147            if (new String(b2022, "ISO-2022-JP").equals(s)) {
148                cnt2022++;
149                check(new String(b2022,"JISAutoDetect").equals(s),
150                      "ISO2022 autodetection");
151            }
152
153            //----------------------------------------------------------------
154            // JISAutoDetect can handle almost all chars that SJIS can.
155            //----------------------------------------------------------------
156            byte[] bsjis = s.getBytes(SJIS);
157            if (new String(bsjis, SJIS).equals(s)) {
158                cntsjis++;
159                check(new String(bsjis,"JISAutoDetect").equals(s),
160                      "SJIS autodetection");
161            }
162        }
163        out.printf("There are %d ISO-2022-JP-encodable characters.%n", cnt2022);
164        out.printf("There are %d SJIS-encodable characters.%n",        cntsjis);
165        out.printf("There are %d EUC-JP-encodable characters.%n",      cnteucj);
166        out.printf("There are %d characters that are " +
167                   "misdetected as SJIS after being EUC-encoded.%n", cntBAD);
168
169
170        //----------------------------------------------------------------
171        // tests for specific byte sequences
172        //----------------------------------------------------------------
173        test("ISO-2022-JP", new byte[] {'A', 'B', 'C'});
174        test("EUC-JP",      new byte[] {'A', 'B', 'C'});
175        test("SJIS",        new byte[] {'A', 'B', 'C'});
176
177        test("SJIS",
178             new byte[] { 'C', 'o', 'p',  'y',  'r', 'i', 'g',  'h', 't',
179                          ' ', (byte)0xa9, ' ', '1', '9', '9',  '8' });
180
181        test("SJIS",
182             new byte[] { (byte)0xbb, (byte)0xdd, (byte)0xcf, (byte)0xb2,
183                          (byte)0xb8, (byte)0xdb, (byte)0xbc, (byte)0xbd,
184                          (byte)0xc3, (byte)0xd1, (byte)0xbd, (byte)0xde,
185                          (byte)0x82, (byte)0xc5, (byte)0x82, (byte)0xb7 });
186
187        test("EUC-JP",
188             new byte[] { (byte)0xa4, (byte)0xd2, (byte)0xa4, (byte)0xe9,
189                          (byte)0xa4, (byte)0xac, (byte)0xa4, (byte)0xca });
190
191        test("SJIS",
192             new byte[] { (byte)0xbb, (byte)0xdd, (byte)0xcf, (byte)0xb2,
193                          (byte)0xb8, (byte)0xdb, (byte)0xbc, (byte)0xbd,
194                          (byte)0xc3, (byte)0xd1, (byte)0xbd, (byte)0xde});
195
196        test("SJIS",
197             new byte[] { (byte)0xbb, (byte)0xdd, (byte)0xcf, (byte)0xb2,
198                          (byte)0xb8, (byte)0xdb, (byte)0xbc, (byte)0xbd,
199                          (byte)0xc3, (byte)0xd1, (byte)0xbd });
200
201        test("SJIS",
202             new byte[] { (byte)0x8f, (byte)0xa1, (byte)0xaa });
203
204        test("EUC-JP",
205             new byte[] { (byte)0x8f, (byte)0xc5, (byte)0xe0, (byte)0x20});
206
207        test("EUC-JP",
208             new byte[] { (byte)0xbb, (byte)0xdd, (byte)0xcf, (byte)0xb2,
209                          (byte)0xb8, (byte)0xdb, (byte)0xbc, (byte)0xbd,
210                          (byte)0xc3, (byte)0xd1, (byte)0xbd, (byte)0xde,
211                          (byte)0xa4, (byte)0xc7, (byte)0xa4, (byte)0xb9 });
212
213        test("ISO-2022-JP",
214             new byte[] { 0x1b, '$', 'B', '#', '4', '$', '5', 0x1b, '(', 'B' });
215
216
217        //----------------------------------------------------------------
218        // Check handling of ambiguous end-of-input in middle of first char
219        //----------------------------------------------------------------
220        {
221            CharsetDecoder dc = Charset.forName("x-JISAutoDetect").newDecoder();
222            ByteBuffer bb = ByteBuffer.allocate(128);
223            CharBuffer cb = CharBuffer.allocate(128);
224            bb.put((byte)'A').put((byte)0x8f);
225            bb.flip();
226            CoderResult res = dc.decode(bb,cb,false);
227            check(res.isUnderflow(), "isUnderflow");
228            check(bb.position() == 1, "bb.position()");
229            check(cb.position() == 1, "cb.position()");
230            res = dc.decode(bb,cb,false);
231            check(res.isUnderflow(), "isUnderflow");
232            check(bb.position() == 1, "bb.position()");
233            check(cb.position() == 1, "cb.position()");
234            bb.compact();
235            bb.put((byte)0xa1);
236            bb.flip();
237            res = dc.decode(bb,cb,true);
238            check(res.isUnderflow(), "isUnderflow");
239            check(bb.position() == 2, "bb.position()");
240            check(cb.position() == 2, "cb.position()");
241        }
242
243        // test #8022224
244        Charset cs = Charset.forName("x-JISAutoDetect");
245        ByteBuffer bb = ByteBuffer.wrap(new byte[] { 'a', 0x1b, 0x24, 0x40 });
246        CharBuffer cb = CharBuffer.wrap(new char[10]);
247        CoderResult cr = cs.newDecoder().decode(bb, cb, false);
248        bb.rewind();
249        cb.clear().limit(1);
250        check(cr == cs.newDecoder().decode(bb, cb, false), "#8022224");
251
252        if (failures > 0)
253            throw new RuntimeException(failures + " tests failed");
254    }
255
256    static void checkCoderResult(CoderResult result) {
257        check(result.isUnderflow(),
258              "Unexpected coder result: " + result);
259    }
260
261    static void test(String expectedCharset, byte[] input) throws Exception {
262        Charset cs = Charset.forName("x-JISAutoDetect");
263        CharsetDecoder autoDetect = cs.newDecoder();
264
265        Charset cs2 = Charset.forName(expectedCharset);
266        CharsetDecoder decoder = cs2.newDecoder();
267
268        ByteBuffer bb = ByteBuffer.allocate(128);
269        CharBuffer charOutput = CharBuffer.allocate(128);
270        CharBuffer charExpected = CharBuffer.allocate(128);
271
272        bb.put(input);
273        bb.flip();
274        bb.mark();
275
276        CoderResult result = autoDetect.decode(bb, charOutput, true);
277        checkCoderResult(result);
278        charOutput.flip();
279        String actual = charOutput.toString();
280
281        bb.reset();
282
283        result = decoder.decode(bb, charExpected, true);
284        checkCoderResult(result);
285        charExpected.flip();
286        String expected = charExpected.toString();
287
288        check(actual.equals(expected),
289              String.format("actual=%s expected=%s", actual, expected));
290    }
291}
292