1/*
2 * Copyright (c) 2009, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 */
23
24/*
25 * @test
26 * @bug 6831794 6229811
27 * @summary Test EUC_TW charset
28 * @modules java.base/sun.nio.cs
29 */
30
31import java.nio.charset.*;
32import java.nio.*;
33import java.util.*;
34
35public class TestEUC_TW {
36
37    static class Time {
38        long t;
39    }
40    static int iteration = 100;
41
42    static char[] decode(byte[] bb, Charset cs, boolean testDirect, Time t)
43        throws Exception {
44        String csn = cs.name();
45        CharsetDecoder dec = cs.newDecoder();
46        ByteBuffer bbf;
47        CharBuffer cbf;
48        if (testDirect) {
49            bbf = ByteBuffer.allocateDirect(bb.length);
50            cbf = ByteBuffer.allocateDirect(bb.length*2).asCharBuffer();
51            bbf.put(bb);
52        } else {
53            bbf = ByteBuffer.wrap(bb);
54            cbf = CharBuffer.allocate(bb.length);
55        }
56        CoderResult cr = null;
57        long t1 = System.nanoTime()/1000;
58        for (int i = 0; i < iteration; i++) {
59            bbf.rewind();
60            cbf.clear();
61            dec.reset();
62            cr = dec.decode(bbf, cbf, true);
63        }
64        long t2 = System.nanoTime()/1000;
65        if (t != null)
66        t.t = (t2 - t1)/iteration;
67        if (cr != CoderResult.UNDERFLOW) {
68            System.out.println("DEC-----------------");
69            int pos = bbf.position();
70            System.out.printf("  cr=%s, bbf.pos=%d, bb[pos]=%x,%x,%x,%x%n",
71                              cr.toString(), pos,
72                              bb[pos++]&0xff, bb[pos++]&0xff,bb[pos++]&0xff, bb[pos++]&0xff);
73            throw new RuntimeException("Decoding err: " + csn);
74        }
75        char[] cc = new char[cbf.position()];
76        cbf.flip(); cbf.get(cc);
77        return cc;
78
79    }
80
81    static CoderResult decodeCR(byte[] bb, Charset cs, boolean testDirect)
82        throws Exception {
83        CharsetDecoder dec = cs.newDecoder();
84        ByteBuffer bbf;
85        CharBuffer cbf;
86        if (testDirect) {
87            bbf = ByteBuffer.allocateDirect(bb.length);
88            cbf = ByteBuffer.allocateDirect(bb.length*2).asCharBuffer();
89            bbf.put(bb).flip();
90        } else {
91            bbf = ByteBuffer.wrap(bb);
92            cbf = CharBuffer.allocate(bb.length);
93        }
94        return dec.decode(bbf, cbf, true);
95    }
96
97    static byte[] encode(char[] cc, Charset cs, boolean testDirect, Time t)
98        throws Exception {
99        ByteBuffer bbf;
100        CharBuffer cbf;
101        CharsetEncoder enc = cs.newEncoder();
102        String csn = cs.name();
103        if (testDirect) {
104            bbf = ByteBuffer.allocateDirect(cc.length * 4);
105            cbf = ByteBuffer.allocateDirect(cc.length * 2).asCharBuffer();
106            cbf.put(cc).flip();
107        } else {
108            bbf = ByteBuffer.allocate(cc.length * 4);
109            cbf = CharBuffer.wrap(cc);
110        }
111        CoderResult cr = null;
112        long t1 = System.nanoTime()/1000;
113        for (int i = 0; i < iteration; i++) {
114            cbf.rewind();
115            bbf.clear();
116            enc.reset();
117            cr = enc.encode(cbf, bbf, true);
118        }
119        long t2 = System.nanoTime()/1000;
120        if (t != null)
121        t.t = (t2 - t1)/iteration;
122        if (cr != CoderResult.UNDERFLOW) {
123            System.out.println("ENC-----------------");
124            int pos = cbf.position();
125            System.out.printf("  cr=%s, cbf.pos=%d, cc[pos]=%x%n",
126                              cr.toString(), pos, cc[pos]&0xffff);
127            throw new RuntimeException("Encoding err: " + csn);
128        }
129        byte[] bb = new byte[bbf.position()];
130        bbf.flip(); bbf.get(bb);
131        return bb;
132    }
133
134    static CoderResult encodeCR(char[] cc, Charset cs, boolean testDirect)
135        throws Exception {
136        ByteBuffer bbf;
137        CharBuffer cbf;
138        CharsetEncoder enc = cs.newEncoder();
139        if (testDirect) {
140            bbf = ByteBuffer.allocateDirect(cc.length * 4);
141            cbf = ByteBuffer.allocateDirect(cc.length * 2).asCharBuffer();
142            cbf.put(cc).flip();
143        } else {
144            bbf = ByteBuffer.allocate(cc.length * 4);
145            cbf = CharBuffer.wrap(cc);
146        }
147        return enc.encode(cbf, bbf, true);
148    }
149
150    static char[] getEUC_TWChars(boolean skipNR) {
151        //CharsetEncoder encOLD = Charset.forName("EUC_TW_OLD").newEncoder();
152        CharsetEncoder encOLD = new EUC_TW_OLD().newEncoder();
153        CharsetEncoder enc = Charset.forName("EUC_TW").newEncoder();
154        char[] cc = new char[0x20000];
155        char[] c2 = new char[2];
156        int pos = 0;
157        int i = 0;
158        //bmp
159        for (i = 0; i < 0x10000; i++) {
160            //SKIP these 3 NR codepoints if compared to EUC_TW
161            if (skipNR && (i == 0x4ea0 || i == 0x51ab || i == 0x52f9))
162                continue;
163            if (encOLD.canEncode((char)i) != enc.canEncode((char)i)) {
164                System.out.printf("  Err i=%x:  old=%b new=%b%n", i,
165                                  encOLD.canEncode((char)i),
166                                  enc.canEncode((char)i));
167                throw new RuntimeException("canEncode() err!");
168            }
169
170            if (enc.canEncode((char)i)) {
171                cc[pos++] = (char)i;
172            }
173        }
174
175        //supp
176        CharBuffer cb = CharBuffer.wrap(new char[2]);
177        for (i = 0x20000; i < 0x30000; i++) {
178            Character.toChars(i, c2, 0);
179            cb.clear();cb.put(c2[0]);cb.put(c2[1]);cb.flip();
180
181            if (encOLD.canEncode(cb) != enc.canEncode(cb)) {
182                throw new RuntimeException("canEncode() err!");
183            }
184
185            if (enc.canEncode(cb)) {
186                //System.out.printf("cp=%x,  (%x, %x) %n", i, c2[0] & 0xffff, c2[1] & 0xffff);
187                cc[pos++] = c2[0];
188                cc[pos++] = c2[1];
189            }
190        }
191
192        return Arrays.copyOf(cc, pos);
193    }
194
195    static void checkRoundtrip(Charset cs) throws Exception {
196        char[] cc = getEUC_TWChars(false);
197        System.out.printf("Check roundtrip <%s>...", cs.name());
198        byte[] bb = encode(cc, cs, false, null);
199        char[] ccO = decode(bb, cs, false, null);
200
201        if (!Arrays.equals(cc, ccO)) {
202            System.out.printf("    non-direct failed");
203        }
204        bb = encode(cc, cs, true, null);
205        ccO = decode(bb, cs, true, null);
206        if (!Arrays.equals(cc, ccO)) {
207            System.out.printf("    (direct) failed");
208        }
209        System.out.println();
210    }
211
212    static void checkInit(String csn) throws Exception {
213        System.out.printf("Check init <%s>...%n", csn);
214        Charset.forName("Big5");    // load in the ExtendedCharsets
215        long t1 = System.nanoTime()/1000;
216        Charset cs = Charset.forName(csn);
217        long t2 = System.nanoTime()/1000;
218        System.out.printf("    charset     :%d%n", t2 - t1);
219        t1 = System.nanoTime()/1000;
220            cs.newDecoder();
221        t2 = System.nanoTime()/1000;
222        System.out.printf("    new Decoder :%d%n", t2 - t1);
223
224        t1 = System.nanoTime()/1000;
225            cs.newEncoder();
226        t2 = System.nanoTime()/1000;
227        System.out.printf("    new Encoder :%d%n", t2 - t1);
228    }
229
230    static void compare(Charset cs1, Charset cs2) throws Exception {
231        char[] cc = getEUC_TWChars(true);
232
233        String csn1 = cs1.name();
234        String csn2 = cs2.name();
235        System.out.printf("Diff     <%s> <%s>...%n", csn1, csn2);
236
237        Time t1 = new Time();
238        Time t2 = new Time();
239
240        byte[] bb1 = encode(cc, cs1, false, t1);
241        byte[] bb2 = encode(cc, cs2, false, t2);
242
243        System.out.printf("    Encoding TimeRatio %s/%s: %d,%d :%f%n",
244                          csn2, csn1,
245                          t2.t, t1.t,
246                          (double)(t2.t)/(t1.t));
247        if (!Arrays.equals(bb1, bb2)) {
248            System.out.printf("        encoding failed%n");
249        }
250
251        char[] cc2 = decode(bb1, cs2, false, t2);
252        char[] cc1 = decode(bb1, cs1, false, t1);
253        System.out.printf("    Decoding TimeRatio %s/%s: %d,%d :%f%n",
254                          csn2, csn1,
255                          t2.t, t1.t,
256                          (double)(t2.t)/(t1.t));
257        if (!Arrays.equals(cc1, cc2)) {
258            System.out.printf("        decoding failed%n");
259        }
260
261        bb1 = encode(cc, cs1, true, t1);
262        bb2 = encode(cc, cs2, true, t2);
263
264        System.out.printf("    Encoding(dir) TimeRatio %s/%s: %d,%d :%f%n",
265                          csn2, csn1,
266                          t2.t, t1.t,
267                          (double)(t2.t)/(t1.t));
268
269        if (!Arrays.equals(bb1, bb2))
270            System.out.printf("        encoding (direct) failed%n");
271
272        cc1 = decode(bb1, cs1, true, t1);
273        cc2 = decode(bb1, cs2, true, t2);
274        System.out.printf("    Decoding(dir) TimeRatio %s/%s: %d,%d :%f%n",
275                          csn2, csn1,
276                          t2.t, t1.t,
277                          (double)(t2.t)/(t1.t));
278        if (!Arrays.equals(cc1, cc2)) {
279            System.out.printf("        decoding (direct) failed%n");
280        }
281    }
282
283    // The first byte is the length of malformed bytes
284    static byte[][] malformed = {
285        //{5, (byte)0xF8, (byte)0x80, (byte)0x80, (byte)0x9F, (byte)0x80, (byte)0xC0 },
286    };
287
288    static void checkMalformed(Charset cs) throws Exception {
289        boolean failed = false;
290        String csn = cs.name();
291        System.out.printf("Check malformed <%s>...%n", csn);
292        for (boolean direct: new boolean[] {false, true}) {
293            for (byte[] bins : malformed) {
294                int mlen = bins[0];
295                byte[] bin = Arrays.copyOfRange(bins, 1, bins.length);
296                CoderResult cr = decodeCR(bin, cs, direct);
297                String ashex = "";
298                for (int i = 0; i < bin.length; i++) {
299                    if (i > 0) ashex += " ";
300                        ashex += Integer.toBinaryString((int)bin[i] & 0xff);
301                }
302                if (!cr.isMalformed()) {
303                    System.out.printf("        FAIL(direct=%b): [%s] not malformed.\n", direct, ashex);
304                    failed = true;
305                } else if (cr.length() != mlen) {
306                    System.out.printf("        FAIL(direct=%b): [%s] malformed[len=%d].\n", direct, ashex, cr.length());
307                    failed = true;
308                }
309            }
310        }
311        if (failed)
312            throw new RuntimeException("Check malformed failed " + csn);
313    }
314
315    static boolean check(CharsetDecoder dec, byte[] bytes, boolean direct, int[] flow) {
316        int inPos = flow[0];
317        int inLen = flow[1];
318        int outPos = flow[2];
319        int outLen = flow[3];
320        int expedInPos = flow[4];
321        int expedOutPos = flow[5];
322        CoderResult expedCR = (flow[6]==0)?CoderResult.UNDERFLOW
323                                          :CoderResult.OVERFLOW;
324        ByteBuffer bbf;
325        CharBuffer cbf;
326        if (direct) {
327            bbf = ByteBuffer.allocateDirect(inPos + bytes.length);
328            cbf = ByteBuffer.allocateDirect((outPos + outLen)*2).asCharBuffer();
329        } else {
330            bbf = ByteBuffer.allocate(inPos + bytes.length);
331            cbf = CharBuffer.allocate(outPos + outLen);
332        }
333        bbf.position(inPos);
334        bbf.put(bytes).flip().position(inPos).limit(inPos + inLen);
335        cbf.position(outPos);
336        dec.reset();
337        CoderResult cr = dec.decode(bbf, cbf, false);
338        if (cr != expedCR ||
339            bbf.position() != expedInPos ||
340            cbf.position() != expedOutPos) {
341            System.out.printf("Expected(direct=%5b): [", direct);
342            for (int i:flow) System.out.print(" " + i);
343            System.out.println("]  CR=" + cr +
344                               ", inPos=" + bbf.position() +
345                               ", outPos=" + cbf.position());
346            return false;
347        }
348        return true;
349    }
350
351    static void checkUnderOverflow(Charset cs) throws Exception {
352        String csn = cs.name();
353        System.out.printf("Check under/overflow <%s>...%n", csn);
354        CharsetDecoder dec = cs.newDecoder();
355        boolean failed = false;
356        //7f, a1a1, 8ea2a1a1, 8ea3a1a1, 8ea7a1a1
357        //0   1 2   3         7         11
358        byte[] bytes = new String("\u007f\u3000\u4e42\u4e28\ud840\udc55").getBytes("EUC_TW");
359        int    inlen = bytes.length;
360
361        int MAXOFF = 20;
362        for (int inoff = 0; inoff < MAXOFF; inoff++) {
363            for (int outoff = 0; outoff < MAXOFF; outoff++) {
364        int[][] Flows = {
365            //inpos, inLen, outPos,  outLen, inPosEP,    outposEP,   under(0)/over(1)
366            //overflow
367            {inoff,  inlen, outoff,  1,      inoff + 1,  outoff + 1, 1},
368            {inoff,  inlen, outoff,  2,      inoff + 3,  outoff + 2, 1},
369            {inoff,  inlen, outoff,  3,      inoff + 7,  outoff + 3, 1},
370            {inoff,  inlen, outoff,  4,      inoff + 11, outoff + 4, 1},
371            {inoff,  inlen, outoff,  5,      inoff + 11, outoff + 4, 1},
372            {inoff,  inlen, outoff,  6,      inoff + 15, outoff + 6, 0},
373            //underflow
374            {inoff,  1,     outoff,  6,      inoff + 1,  outoff + 1, 0},
375            {inoff,  2,     outoff,  6,      inoff + 1,  outoff + 1, 0},
376            {inoff,  3,     outoff,  6,      inoff + 3,  outoff + 2, 0},
377            {inoff,  4,     outoff,  6,      inoff + 3,  outoff + 2, 0},
378            {inoff,  5,     outoff,  6,      inoff + 3,  outoff + 2, 0},
379            {inoff,  8,     outoff,  6,      inoff + 7,  outoff + 3, 0},
380            {inoff,  9,     outoff,  6,      inoff + 7,  outoff + 3, 0},
381            {inoff, 10,     outoff,  6,      inoff + 7,  outoff + 3, 0},
382            {inoff, 11,     outoff,  6,      inoff +11,  outoff + 4, 0},
383            {inoff, 12,     outoff,  6,      inoff +11,  outoff + 4, 0},
384            {inoff, 15,     outoff,  6,      inoff +15,  outoff + 6, 0},
385            // 2-byte under/overflow
386            {inoff,  2,     outoff,  1,      inoff + 1,  outoff + 1, 0},
387            {inoff,  3,     outoff,  1,      inoff + 1,  outoff + 1, 1},
388            {inoff,  3,     outoff,  2,      inoff + 3,  outoff + 2, 0},
389            // 4-byte  under/overflow
390            {inoff,  4,     outoff,  2,      inoff + 3,  outoff + 2, 0},
391            {inoff,  5,     outoff,  2,      inoff + 3,  outoff + 2, 0},
392            {inoff,  6,     outoff,  2,      inoff + 3,  outoff + 2, 0},
393            {inoff,  7,     outoff,  2,      inoff + 3,  outoff + 2, 1},
394            {inoff,  7,     outoff,  3,      inoff + 7,  outoff + 3, 0},
395            // 4-byte  under/overflow
396            {inoff,  8,     outoff,  3,      inoff + 7,  outoff + 3, 0},
397            {inoff,  9,     outoff,  3,      inoff + 7,  outoff + 3, 0},
398            {inoff, 10,     outoff,  3,      inoff + 7,  outoff + 3, 0},
399            {inoff, 11,     outoff,  3,      inoff + 7,  outoff + 3, 1},
400            {inoff, 11,     outoff,  4,      inoff +11,  outoff + 4, 0},
401            // 4-byte/supp  under/overflow
402            {inoff, 11,     outoff,  4,      inoff +11,  outoff + 4, 0},
403            {inoff, 12,     outoff,  4,      inoff +11,  outoff + 4, 0},
404            {inoff, 13,     outoff,  4,      inoff +11,  outoff + 4, 0},
405            {inoff, 14,     outoff,  4,      inoff +11,  outoff + 4, 0},
406            {inoff, 15,     outoff,  4,      inoff +11,  outoff + 4, 1},
407            {inoff, 15,     outoff,  5,      inoff +11,  outoff + 4, 1},
408            {inoff, 15,     outoff,  6,      inoff +15,  outoff + 6, 0},
409        };
410        for (boolean direct: new boolean[] {false, true}) {
411            for (int[] flow: Flows) {
412                if (!check(dec, bytes, direct, flow))
413                    failed = true;
414            }
415        }}}
416        if (failed)
417            throw new RuntimeException("Check under/overflow failed " + csn);
418    }
419
420    public static void main(String[] args) throws Exception {
421        // be the first one
422        //checkInit("EUC_TW_OLD");
423        checkInit("EUC_TW");
424        Charset euctw = Charset.forName("EUC_TW");
425        checkRoundtrip(euctw);
426        compare(euctw, new EUC_TW_OLD());
427        checkMalformed(euctw);
428        checkUnderOverflow(euctw);
429    }
430}
431