TestEUC_TW.java revision 1245:914c33c7de3e
1/*
2 * Copyright 2009 Sun Microsystems, Inc.  All Rights Reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
20 * CA 95054 USA or visit www.sun.com if you need additional information or
21 * have any questions.
22 */
23
24/*
25 * @test
26 * @bug 6831794 6229811
27 * @summary Test EUC_TW charset
28 */
29
30import java.nio.charset.*;
31import java.nio.*;
32import java.util.*;
33
34public class TestEUC_TW {
35
36    static class Time {
37        long t;
38    }
39    static int iteration = 100;
40
41    static char[] decode(byte[] bb, Charset cs, boolean testDirect, Time t)
42        throws Exception {
43        String csn = cs.name();
44        CharsetDecoder dec = cs.newDecoder();
45        ByteBuffer bbf;
46        CharBuffer cbf;
47        if (testDirect) {
48            bbf = ByteBuffer.allocateDirect(bb.length);
49            cbf = ByteBuffer.allocateDirect(bb.length*2).asCharBuffer();
50            bbf.put(bb);
51        } else {
52            bbf = ByteBuffer.wrap(bb);
53            cbf = CharBuffer.allocate(bb.length);
54        }
55        CoderResult cr = null;
56        long t1 = System.nanoTime()/1000;
57        for (int i = 0; i < iteration; i++) {
58            bbf.rewind();
59            cbf.clear();
60            dec.reset();
61            cr = dec.decode(bbf, cbf, true);
62        }
63        long t2 = System.nanoTime()/1000;
64        if (t != null)
65        t.t = (t2 - t1)/iteration;
66        if (cr != CoderResult.UNDERFLOW) {
67            System.out.println("DEC-----------------");
68            int pos = bbf.position();
69            System.out.printf("  cr=%s, bbf.pos=%d, bb[pos]=%x,%x,%x,%x%n",
70                              cr.toString(), pos,
71                              bb[pos++]&0xff, bb[pos++]&0xff,bb[pos++]&0xff, bb[pos++]&0xff);
72            throw new RuntimeException("Decoding err: " + csn);
73        }
74        char[] cc = new char[cbf.position()];
75        cbf.flip(); cbf.get(cc);
76        return cc;
77
78    }
79
80    static CoderResult decodeCR(byte[] bb, Charset cs, boolean testDirect)
81        throws Exception {
82        CharsetDecoder dec = cs.newDecoder();
83        ByteBuffer bbf;
84        CharBuffer cbf;
85        if (testDirect) {
86            bbf = ByteBuffer.allocateDirect(bb.length);
87            cbf = ByteBuffer.allocateDirect(bb.length*2).asCharBuffer();
88            bbf.put(bb).flip();
89        } else {
90            bbf = ByteBuffer.wrap(bb);
91            cbf = CharBuffer.allocate(bb.length);
92        }
93        return dec.decode(bbf, cbf, true);
94    }
95
96    static byte[] encode(char[] cc, Charset cs, boolean testDirect, Time t)
97        throws Exception {
98        ByteBuffer bbf;
99        CharBuffer cbf;
100        CharsetEncoder enc = cs.newEncoder();
101        String csn = cs.name();
102        if (testDirect) {
103            bbf = ByteBuffer.allocateDirect(cc.length * 4);
104            cbf = ByteBuffer.allocateDirect(cc.length * 2).asCharBuffer();
105            cbf.put(cc).flip();
106        } else {
107            bbf = ByteBuffer.allocate(cc.length * 4);
108            cbf = CharBuffer.wrap(cc);
109        }
110        CoderResult cr = null;
111        long t1 = System.nanoTime()/1000;
112        for (int i = 0; i < iteration; i++) {
113            cbf.rewind();
114            bbf.clear();
115            enc.reset();
116            cr = enc.encode(cbf, bbf, true);
117        }
118        long t2 = System.nanoTime()/1000;
119        if (t != null)
120        t.t = (t2 - t1)/iteration;
121        if (cr != CoderResult.UNDERFLOW) {
122            System.out.println("ENC-----------------");
123            int pos = cbf.position();
124            System.out.printf("  cr=%s, cbf.pos=%d, cc[pos]=%x%n",
125                              cr.toString(), pos, cc[pos]&0xffff);
126            throw new RuntimeException("Encoding err: " + csn);
127        }
128        byte[] bb = new byte[bbf.position()];
129        bbf.flip(); bbf.get(bb);
130        return bb;
131    }
132
133    static CoderResult encodeCR(char[] cc, Charset cs, boolean testDirect)
134        throws Exception {
135        ByteBuffer bbf;
136        CharBuffer cbf;
137        CharsetEncoder enc = cs.newEncoder();
138        if (testDirect) {
139            bbf = ByteBuffer.allocateDirect(cc.length * 4);
140            cbf = ByteBuffer.allocateDirect(cc.length * 2).asCharBuffer();
141            cbf.put(cc).flip();
142        } else {
143            bbf = ByteBuffer.allocate(cc.length * 4);
144            cbf = CharBuffer.wrap(cc);
145        }
146        return enc.encode(cbf, bbf, true);
147    }
148
149    static char[] getEUC_TWChars(boolean skipNR) {
150        //CharsetEncoder encOLD = Charset.forName("EUC_TW_OLD").newEncoder();
151        CharsetEncoder encOLD = new EUC_TW_OLD().newEncoder();
152        CharsetEncoder enc = Charset.forName("EUC_TW").newEncoder();
153        char[] cc = new char[0x20000];
154        char[] c2 = new char[2];
155        int pos = 0;
156        int i = 0;
157        //bmp
158        for (i = 0; i < 0x10000; i++) {
159            //SKIP these 3 NR codepoints if compared to EUC_TW
160            if (skipNR && (i == 0x4ea0 || i == 0x51ab || i == 0x52f9))
161                continue;
162            if (encOLD.canEncode((char)i) != enc.canEncode((char)i)) {
163                System.out.printf("  Err i=%x:  old=%b new=%b%n", i,
164                                  encOLD.canEncode((char)i),
165                                  enc.canEncode((char)i));
166                throw new RuntimeException("canEncode() err!");
167            }
168
169            if (enc.canEncode((char)i)) {
170                cc[pos++] = (char)i;
171            }
172        }
173
174        //supp
175        CharBuffer cb = CharBuffer.wrap(new char[2]);
176        for (i = 0x20000; i < 0x30000; i++) {
177            Character.toChars(i, c2, 0);
178            cb.clear();cb.put(c2[0]);cb.put(c2[1]);cb.flip();
179
180            if (encOLD.canEncode(cb) != enc.canEncode(cb)) {
181                throw new RuntimeException("canEncode() err!");
182            }
183
184            if (enc.canEncode(cb)) {
185                //System.out.printf("cp=%x,  (%x, %x) %n", i, c2[0] & 0xffff, c2[1] & 0xffff);
186                cc[pos++] = c2[0];
187                cc[pos++] = c2[1];
188            }
189        }
190
191        return Arrays.copyOf(cc, pos);
192    }
193
194    static void checkRoundtrip(Charset cs) throws Exception {
195        char[] cc = getEUC_TWChars(false);
196        System.out.printf("Check roundtrip <%s>...", cs.name());
197        byte[] bb = encode(cc, cs, false, null);
198        char[] ccO = decode(bb, cs, false, null);
199
200        if (!Arrays.equals(cc, ccO)) {
201            System.out.printf("    non-direct failed");
202        }
203        bb = encode(cc, cs, true, null);
204        ccO = decode(bb, cs, true, null);
205        if (!Arrays.equals(cc, ccO)) {
206            System.out.printf("    (direct) failed");
207        }
208        System.out.println();
209    }
210
211    static void checkInit(String csn) throws Exception {
212        System.out.printf("Check init <%s>...%n", csn);
213        Charset.forName("Big5");    // load in the ExtendedCharsets
214        long t1 = System.nanoTime()/1000;
215        Charset cs = Charset.forName(csn);
216        long t2 = System.nanoTime()/1000;
217        System.out.printf("    charset     :%d%n", t2 - t1);
218        t1 = System.nanoTime()/1000;
219            cs.newDecoder();
220        t2 = System.nanoTime()/1000;
221        System.out.printf("    new Decoder :%d%n", t2 - t1);
222
223        t1 = System.nanoTime()/1000;
224            cs.newEncoder();
225        t2 = System.nanoTime()/1000;
226        System.out.printf("    new Encoder :%d%n", t2 - t1);
227    }
228
229    static void compare(Charset cs1, Charset cs2) throws Exception {
230        char[] cc = getEUC_TWChars(true);
231
232        String csn1 = cs1.name();
233        String csn2 = cs2.name();
234        System.out.printf("Diff     <%s> <%s>...%n", csn1, csn2);
235
236        Time t1 = new Time();
237        Time t2 = new Time();
238
239        byte[] bb1 = encode(cc, cs1, false, t1);
240        byte[] bb2 = encode(cc, cs2, false, t2);
241
242        System.out.printf("    Encoding TimeRatio %s/%s: %d,%d :%f%n",
243                          csn2, csn1,
244                          t2.t, t1.t,
245                          (double)(t2.t)/(t1.t));
246        if (!Arrays.equals(bb1, bb2)) {
247            System.out.printf("        encoding failed%n");
248        }
249
250        char[] cc2 = decode(bb1, cs2, false, t2);
251        char[] cc1 = decode(bb1, cs1, false, t1);
252        System.out.printf("    Decoding TimeRatio %s/%s: %d,%d :%f%n",
253                          csn2, csn1,
254                          t2.t, t1.t,
255                          (double)(t2.t)/(t1.t));
256        if (!Arrays.equals(cc1, cc2)) {
257            System.out.printf("        decoding failed%n");
258        }
259
260        bb1 = encode(cc, cs1, true, t1);
261        bb2 = encode(cc, cs2, true, t2);
262
263        System.out.printf("    Encoding(dir) TimeRatio %s/%s: %d,%d :%f%n",
264                          csn2, csn1,
265                          t2.t, t1.t,
266                          (double)(t2.t)/(t1.t));
267
268        if (!Arrays.equals(bb1, bb2))
269            System.out.printf("        encoding (direct) failed%n");
270
271        cc1 = decode(bb1, cs1, true, t1);
272        cc2 = decode(bb1, cs2, true, t2);
273        System.out.printf("    Decoding(dir) TimeRatio %s/%s: %d,%d :%f%n",
274                          csn2, csn1,
275                          t2.t, t1.t,
276                          (double)(t2.t)/(t1.t));
277        if (!Arrays.equals(cc1, cc2)) {
278            System.out.printf("        decoding (direct) failed%n");
279        }
280    }
281
282    // The first byte is the length of malformed bytes
283    static byte[][] malformed = {
284        //{5, (byte)0xF8, (byte)0x80, (byte)0x80, (byte)0x9F, (byte)0x80, (byte)0xC0 },
285    };
286
287    static void checkMalformed(Charset cs) throws Exception {
288        boolean failed = false;
289        String csn = cs.name();
290        System.out.printf("Check malformed <%s>...%n", csn);
291        for (boolean direct: new boolean[] {false, true}) {
292            for (byte[] bins : malformed) {
293                int mlen = bins[0];
294                byte[] bin = Arrays.copyOfRange(bins, 1, bins.length);
295                CoderResult cr = decodeCR(bin, cs, direct);
296                String ashex = "";
297                for (int i = 0; i < bin.length; i++) {
298                    if (i > 0) ashex += " ";
299                        ashex += Integer.toBinaryString((int)bin[i] & 0xff);
300                }
301                if (!cr.isMalformed()) {
302                    System.out.printf("        FAIL(direct=%b): [%s] not malformed.\n", direct, ashex);
303                    failed = true;
304                } else if (cr.length() != mlen) {
305                    System.out.printf("        FAIL(direct=%b): [%s] malformed[len=%d].\n", direct, ashex, cr.length());
306                    failed = true;
307                }
308            }
309        }
310        if (failed)
311            throw new RuntimeException("Check malformed failed " + csn);
312    }
313
314    static boolean check(CharsetDecoder dec, byte[] bytes, boolean direct, int[] flow) {
315        int inPos = flow[0];
316        int inLen = flow[1];
317        int outPos = flow[2];
318        int outLen = flow[3];
319        int expedInPos = flow[4];
320        int expedOutPos = flow[5];
321        CoderResult expedCR = (flow[6]==0)?CoderResult.UNDERFLOW
322                                          :CoderResult.OVERFLOW;
323        ByteBuffer bbf;
324        CharBuffer cbf;
325        if (direct) {
326            bbf = ByteBuffer.allocateDirect(inPos + bytes.length);
327            cbf = ByteBuffer.allocateDirect((outPos + outLen)*2).asCharBuffer();
328        } else {
329            bbf = ByteBuffer.allocate(inPos + bytes.length);
330            cbf = CharBuffer.allocate(outPos + outLen);
331        }
332        bbf.position(inPos);
333        bbf.put(bytes).flip().position(inPos).limit(inPos + inLen);
334        cbf.position(outPos);
335        dec.reset();
336        CoderResult cr = dec.decode(bbf, cbf, false);
337        if (cr != expedCR ||
338            bbf.position() != expedInPos ||
339            cbf.position() != expedOutPos) {
340            System.out.printf("Expected(direct=%5b): [", direct);
341            for (int i:flow) System.out.print(" " + i);
342            System.out.println("]  CR=" + cr +
343                               ", inPos=" + bbf.position() +
344                               ", outPos=" + cbf.position());
345            return false;
346        }
347        return true;
348    }
349
350    static void checkUnderOverflow(Charset cs) throws Exception {
351        String csn = cs.name();
352        System.out.printf("Check under/overflow <%s>...%n", csn);
353        CharsetDecoder dec = cs.newDecoder();
354        boolean failed = false;
355        //7f, a1a1, 8ea2a1a1, 8ea3a1a1, 8ea7a1a1
356        //0   1 2   3         7         11
357        byte[] bytes = new String("\u007f\u3000\u4e42\u4e28\ud840\udc55").getBytes("EUC_TW");
358        int    inlen = bytes.length;
359
360        int MAXOFF = 20;
361        for (int inoff = 0; inoff < MAXOFF; inoff++) {
362            for (int outoff = 0; outoff < MAXOFF; outoff++) {
363        int[][] Flows = {
364            //inpos, inLen, outPos,  outLen, inPosEP,    outposEP,   under(0)/over(1)
365            //overflow
366            {inoff,  inlen, outoff,  1,      inoff + 1,  outoff + 1, 1},
367            {inoff,  inlen, outoff,  2,      inoff + 3,  outoff + 2, 1},
368            {inoff,  inlen, outoff,  3,      inoff + 7,  outoff + 3, 1},
369            {inoff,  inlen, outoff,  4,      inoff + 11, outoff + 4, 1},
370            {inoff,  inlen, outoff,  5,      inoff + 11, outoff + 4, 1},
371            {inoff,  inlen, outoff,  6,      inoff + 15, outoff + 6, 0},
372            //underflow
373            {inoff,  1,     outoff,  6,      inoff + 1,  outoff + 1, 0},
374            {inoff,  2,     outoff,  6,      inoff + 1,  outoff + 1, 0},
375            {inoff,  3,     outoff,  6,      inoff + 3,  outoff + 2, 0},
376            {inoff,  4,     outoff,  6,      inoff + 3,  outoff + 2, 0},
377            {inoff,  5,     outoff,  6,      inoff + 3,  outoff + 2, 0},
378            {inoff,  8,     outoff,  6,      inoff + 7,  outoff + 3, 0},
379            {inoff,  9,     outoff,  6,      inoff + 7,  outoff + 3, 0},
380            {inoff, 10,     outoff,  6,      inoff + 7,  outoff + 3, 0},
381            {inoff, 11,     outoff,  6,      inoff +11,  outoff + 4, 0},
382            {inoff, 12,     outoff,  6,      inoff +11,  outoff + 4, 0},
383            {inoff, 15,     outoff,  6,      inoff +15,  outoff + 6, 0},
384            // 2-byte under/overflow
385            {inoff,  2,     outoff,  1,      inoff + 1,  outoff + 1, 0},
386            {inoff,  3,     outoff,  1,      inoff + 1,  outoff + 1, 1},
387            {inoff,  3,     outoff,  2,      inoff + 3,  outoff + 2, 0},
388            // 4-byte  under/overflow
389            {inoff,  4,     outoff,  2,      inoff + 3,  outoff + 2, 0},
390            {inoff,  5,     outoff,  2,      inoff + 3,  outoff + 2, 0},
391            {inoff,  6,     outoff,  2,      inoff + 3,  outoff + 2, 0},
392            {inoff,  7,     outoff,  2,      inoff + 3,  outoff + 2, 1},
393            {inoff,  7,     outoff,  3,      inoff + 7,  outoff + 3, 0},
394            // 4-byte  under/overflow
395            {inoff,  8,     outoff,  3,      inoff + 7,  outoff + 3, 0},
396            {inoff,  9,     outoff,  3,      inoff + 7,  outoff + 3, 0},
397            {inoff, 10,     outoff,  3,      inoff + 7,  outoff + 3, 0},
398            {inoff, 11,     outoff,  3,      inoff + 7,  outoff + 3, 1},
399            {inoff, 11,     outoff,  4,      inoff +11,  outoff + 4, 0},
400            // 4-byte/supp  under/overflow
401            {inoff, 11,     outoff,  4,      inoff +11,  outoff + 4, 0},
402            {inoff, 12,     outoff,  4,      inoff +11,  outoff + 4, 0},
403            {inoff, 13,     outoff,  4,      inoff +11,  outoff + 4, 0},
404            {inoff, 14,     outoff,  4,      inoff +11,  outoff + 4, 0},
405            {inoff, 15,     outoff,  4,      inoff +11,  outoff + 4, 1},
406            {inoff, 15,     outoff,  5,      inoff +11,  outoff + 4, 1},
407            {inoff, 15,     outoff,  6,      inoff +15,  outoff + 6, 0},
408        };
409        for (boolean direct: new boolean[] {false, true}) {
410            for (int[] flow: Flows) {
411                if (!check(dec, bytes, direct, flow))
412                    failed = true;
413            }
414        }}}
415        if (failed)
416            throw new RuntimeException("Check under/overflow failed " + csn);
417    }
418
419    public static void main(String[] args) throws Exception {
420        // be the first one
421        //checkInit("EUC_TW_OLD");
422        checkInit("EUC_TW");
423        Charset euctw = Charset.forName("EUC_TW");
424        checkRoundtrip(euctw);
425        compare(euctw, new EUC_TW_OLD());
426        checkMalformed(euctw);
427        checkUnderOverflow(euctw);
428    }
429}
430