1/*
2 * Copyright (c) 2009, 2012, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 */
23
24/*
25 * @test
26 * @bug 6843578
27 * @summary Test old and new implementation of db charsets
28 * @build IBM930_OLD IBM933_OLD IBM935_OLD IBM937_OLD IBM939_OLD IBM942_OLD IBM943_OLD IBM948_OLD IBM949_OLD IBM950_OLD IBM970_OLD IBM942C_OLD IBM943C_OLD IBM949C_OLD IBM1381_OLD IBM1383_OLD EUC_CN_OLD EUC_KR_OLD GBK_OLD Johab_OLD MS932_OLD MS936_OLD MS949_OLD MS950_OLD SJIS_OLD PCK_OLD EUC_JP_OLD EUC_JP_LINUX_OLD EUC_JP_Open_OLD
29 * @modules java.base/sun.nio.cs jdk.charsets/sun.nio.cs.ext
30 * @run main TestIBMDB
31 */
32
33import java.nio.charset.*;
34import java.nio.*;
35import java.util.*;
36
37public class TestIBMDB {
38    static class Time {
39        long t;
40    }
41    static int iteration = 200;
42
43    static char[] decode(byte[] bb, Charset cs, boolean testDirect, Time t)
44        throws Exception {
45        String csn = cs.name();
46        CharsetDecoder dec = cs.newDecoder();
47        ByteBuffer bbf;
48        CharBuffer cbf;
49        if (testDirect) {
50            bbf = ByteBuffer.allocateDirect(bb.length);
51            cbf = ByteBuffer.allocateDirect(bb.length*2).asCharBuffer();
52            bbf.put(bb);
53        } else {
54            bbf = ByteBuffer.wrap(bb);
55            cbf = CharBuffer.allocate(bb.length);
56        }
57        CoderResult cr = null;
58        long t1 = System.nanoTime()/1000;
59        for (int i = 0; i < iteration; i++) {
60            bbf.rewind();
61            cbf.clear();
62            dec.reset();
63            cr = dec.decode(bbf, cbf, true);
64        }
65        long t2 = System.nanoTime()/1000;
66        t.t = (t2 - t1)/iteration;
67        if (cr != CoderResult.UNDERFLOW) {
68            System.out.println("DEC-----------------");
69            int pos = bbf.position();
70            System.out.printf("  cr=%s, bbf.pos=%d, bb[pos]=%x,%x,%x,%x%n",
71                              cr.toString(), pos,
72                              bb[pos++]&0xff, bb[pos++]&0xff,bb[pos++]&0xff, bb[pos++]&0xff);
73            throw new RuntimeException("Decoding err: " + csn);
74        }
75        char[] cc = new char[cbf.position()];
76        cbf.flip(); cbf.get(cc);
77        return cc;
78
79    }
80
81    static CoderResult decodeCR(byte[] bb, Charset cs, boolean testDirect)
82        throws Exception {
83        CharsetDecoder dec = cs.newDecoder();
84        ByteBuffer bbf;
85        CharBuffer cbf;
86        if (testDirect) {
87            bbf = ByteBuffer.allocateDirect(bb.length);
88            cbf = ByteBuffer.allocateDirect(bb.length*2).asCharBuffer();
89            bbf.put(bb).flip();
90        } else {
91            bbf = ByteBuffer.wrap(bb);
92            cbf = CharBuffer.allocate(bb.length);
93        }
94        CoderResult cr = null;
95        for (int i = 0; i < iteration; i++) {
96            bbf.rewind();
97            cbf.clear();
98            dec.reset();
99            cr = dec.decode(bbf, cbf, true);
100        }
101        return cr;
102    }
103
104    static byte[] encode(char[] cc, Charset cs, boolean testDirect, Time t)
105        throws Exception {
106        ByteBuffer bbf;
107        CharBuffer cbf;
108        CharsetEncoder enc = cs.newEncoder();
109        String csn = cs.name();
110        if (testDirect) {
111            bbf = ByteBuffer.allocateDirect(cc.length * 4);
112            cbf = ByteBuffer.allocateDirect(cc.length * 2).asCharBuffer();
113            cbf.put(cc).flip();
114        } else {
115            bbf = ByteBuffer.allocate(cc.length * 4);
116            cbf = CharBuffer.wrap(cc);
117        }
118        CoderResult cr = null;
119        long t1 = System.nanoTime()/1000;
120        for (int i = 0; i < iteration; i++) {
121            cbf.rewind();
122            bbf.clear();
123            enc.reset();
124            cr = enc.encode(cbf, bbf, true);
125        }
126        long t2 = System.nanoTime()/1000;
127        t.t = (t2 - t1)/iteration;
128        if (cr != CoderResult.UNDERFLOW) {
129            System.out.println("ENC-----------------");
130            int pos = cbf.position();
131            System.out.printf("  cr=%s, cbf.pos=%d, cc[pos]=%x%n",
132                              cr.toString(), pos, cc[pos]&0xffff);
133            throw new RuntimeException("Encoding err: " + csn);
134        }
135        byte[] bb = new byte[bbf.position()];
136        bbf.flip(); bbf.get(bb);
137        return bb;
138    }
139
140    static CoderResult encodeCR(char[] cc, Charset cs, boolean testDirect)
141        throws Exception {
142        ByteBuffer bbf;
143        CharBuffer cbf;
144        CharsetEncoder enc = cs.newEncoder();
145        if (testDirect) {
146            bbf = ByteBuffer.allocateDirect(cc.length * 4);
147            cbf = ByteBuffer.allocateDirect(cc.length * 2).asCharBuffer();
148            cbf.put(cc).flip();
149        } else {
150            bbf = ByteBuffer.allocate(cc.length * 4);
151            cbf = CharBuffer.wrap(cc);
152        }
153        CoderResult cr = null;
154        for (int i = 0; i < iteration; i++) {
155            cbf.rewind();
156            bbf.clear();
157            enc.reset();
158            cr = enc.encode(cbf, bbf, true);
159        }
160        return cr;
161    }
162
163    static void printEntry(char c, Charset cs) {
164        byte[] bb = new String(new char[] {c}).getBytes(cs);
165        for (byte b:bb)
166            System.out.printf("%x", b&0xff);
167        System.out.printf("    %x", c & 0xffff);
168        String s2 = new String(bb, cs);
169        System.out.printf("    %x%n", s2.charAt(0) & 0xffff);
170    }
171
172    // check and compare canEncoding/Encoding
173    static char[] checkEncoding(Charset oldCS, Charset newCS)
174        throws Exception {
175        System.out.printf("Encoding <%s> <%s>...%n", oldCS.name(), newCS.name());
176        CharsetEncoder encOLD = oldCS.newEncoder();
177        CharsetEncoder encNew = newCS.newEncoder();
178        char[] cc = new char[0x10000];
179        int pos = 0;
180        boolean is970 = "x-IBM970-Old".equals(oldCS.name());
181
182        for (char c = 0; c < 0xffff; c++) {
183            boolean canOld = encOLD.canEncode(c);
184            boolean canNew = encNew.canEncode(c);
185
186            if (is970 && c == 0x2299)
187                continue;
188
189            if (canOld != canNew) {
190                if (canNew) {
191                    System.out.printf("      NEW(only): ");
192                    printEntry(c, newCS);
193                } else {
194                    if (is970) {
195                        byte[] bb = new String(new char[] {c}).getBytes(oldCS);
196                        if (bb.length == 2 && bb[0] == (byte)0xa2 && bb[1] == (byte)0xc1) {
197                        // we know 970 has bogus nnnn -> a2c1 -> 2299
198                            continue;
199                        }
200                    }
201                    System.out.printf("      OLD(only): ");
202                    printEntry(c, oldCS);
203                }
204            } else if (canNew) {
205                byte[] bbNew = new String(new char[] {c}).getBytes(newCS);
206                byte[] bbOld = new String(new char[] {c}).getBytes(oldCS);
207                if (!Arrays.equals(bbNew, bbOld)) {
208                    System.out.printf("      c->b NEW: ");
209                    printEntry(c, newCS);
210                    System.out.printf("      c->b OLD: ");
211                    printEntry(c, oldCS);
212                } else {
213                    String sNew = new String(bbNew, newCS);
214                    String sOld = new String(bbOld, oldCS);
215                    if (!sNew.equals(sOld)) {
216                        System.out.printf("      b2c NEW (c=%x):", c&0xffff);
217                        printEntry(sNew.charAt(0), newCS);
218                        System.out.printf("      b2c OLD:");
219                        printEntry(sOld.charAt(0), oldCS);
220                    }
221                }
222            }
223            if (canNew & canOld) {  // added only both for now
224                cc[pos++] = c;
225            }
226        }
227        return Arrays.copyOf(cc, pos);
228    }
229
230
231    // check and compare canEncoding/Encoding
232    static void checkDecoding(Charset oldCS, Charset newCS)
233        throws Exception
234    {
235        System.out.printf("Decoding <%s> <%s>...%n", oldCS.name(), newCS.name());
236        boolean isEBCDIC = oldCS.name().startsWith("x-IBM93");
237
238        //Try singlebyte first
239        byte[] bb = new byte[1];
240        System.out.printf("       trying SB...%n");
241        for (int b = 0; b < 0x100; b++) {
242            bb[0] = (byte)b;
243            String sOld = new String(bb, oldCS);
244            String sNew = new String(bb, newCS);
245            if (!sOld.equals(sNew)) {
246                System.out.printf("        b=%x:  %x/%d(old)  %x/%d(new)%n",
247                                  b& 0xff,
248                                  sOld.charAt(0) & 0xffff, sOld.length(),
249                                  sNew.charAt(0) & 0xffff, sNew.length());
250            }
251        }
252
253        System.out.printf("       trying DB...%n");
254        bb = new byte[isEBCDIC?4:2];
255        int b1Min = 0x40;
256        int b1Max = 0xfe;
257        for (int b1 = 0x40; b1 < 0xff; b1++) {
258            if (!isEBCDIC) {
259                // decodable singlebyte b1
260                bb[0] = (byte)b1;
261                String sOld = new String(bb, oldCS);
262                String sNew = new String(bb, newCS);
263                if (!sOld.equals(sNew)) {
264                    if (sOld.length() != 2 && sOld.charAt(0) != 0) {
265                        // only prints we are NOT expected. above two are known issue
266                        System.out.printf("        b1=%x:  %x/%d(old)  %x/%d(new)%n",
267                                          b1 & 0xff,
268                                          sOld.charAt(0) & 0xffff, sOld.length(),
269                                          sNew.charAt(0) & 0xffff, sNew.length());
270                        continue;
271                    }
272                }
273            }
274            for (int b2 = 0x40; b2 < 0xff; b2++) {
275                if (isEBCDIC) {
276                    bb[0] = 0x0e;
277                    bb[1] = (byte)b1;
278                    bb[2] = (byte)b2;
279                    bb[3] = 0x0f;
280                } else {
281                    bb[0] = (byte)b1;
282                    bb[1] = (byte)b2;
283                }
284                String sOld = new String(bb, oldCS);
285                String sNew = new String(bb, newCS);
286                //if (!sOld.equals(sNew)) {
287                if (sOld.charAt(0) != sNew.charAt(0)) {
288
289if (sOld.charAt(0) == 0 && sNew.charAt(0) == 0xfffd)
290    continue; // known issude in old implementation
291
292                    System.out.printf("        bb=<%x,%x>  c(old)=%x,  c(new)=%x%n",
293                        b1, b2, sOld.charAt(0) & 0xffff, sNew.charAt(0) & 0xffff);
294                }
295            }
296        }
297    }
298
299    static void checkInit(String csn) throws Exception {
300        System.out.printf("Check init <%s>...%n", csn);
301        Charset.forName("Big5");    // load in the ExtendedCharsets
302        long t1 = System.nanoTime()/1000;
303        Charset cs = Charset.forName(csn);
304        long t2 = System.nanoTime()/1000;
305        System.out.printf("    charset     :%d%n", t2 - t1);
306        t1 = System.nanoTime()/1000;
307            cs.newDecoder();
308        t2 = System.nanoTime()/1000;
309        System.out.printf("    new Decoder :%d%n", t2 - t1);
310
311        t1 = System.nanoTime()/1000;
312            cs.newEncoder();
313        t2 = System.nanoTime()/1000;
314        System.out.printf("    new Encoder :%d%n", t2 - t1);
315    }
316
317    static void compare(Charset cs1, Charset cs2, char[] cc) throws Exception {
318        System.gc();    // enqueue finalizable objects
319        Thread.sleep(1000);
320        System.gc();    // enqueue finalizable objects
321
322        String csn1 = cs1.name();
323        String csn2 = cs2.name();
324        System.out.printf("Diff     <%s> <%s>...%n", csn1, csn2);
325
326        Time t1 = new Time();
327        Time t2 = new Time();
328
329        byte[] bb1 = encode(cc, cs1, false, t1);
330        byte[] bb2 = encode(cc, cs2, false, t2);
331
332        System.out.printf("    Encoding TimeRatio %s/%s: %d,%d :%f%n",
333                          csn2, csn1,
334                          t2.t, t1.t,
335                          (double)(t2.t)/(t1.t));
336        if (!Arrays.equals(bb1, bb2)) {
337            System.out.printf("        encoding failed%n");
338        }
339
340        char[] cc2 = decode(bb1, cs2, false, t2);
341        char[] cc1 = decode(bb1, cs1, false, t1);
342        System.out.printf("    Decoding TimeRatio %s/%s: %d,%d :%f%n",
343                          csn2, csn1,
344                          t2.t, t1.t,
345                          (double)(t2.t)/(t1.t));
346        if (!Arrays.equals(cc1, cc2)) {
347            System.out.printf("        decoding failed%n");
348        }
349
350        bb1 = encode(cc, cs1, true, t1);
351        bb2 = encode(cc, cs2, true, t2);
352
353        System.out.printf("    Encoding(dir) TimeRatio %s/%s: %d,%d :%f%n",
354                          csn2, csn1,
355                          t2.t, t1.t,
356                          (double)(t2.t)/(t1.t));
357
358        if (!Arrays.equals(bb1, bb2))
359            System.out.printf("        encoding (direct) failed%n");
360
361        cc1 = decode(bb1, cs1, true, t1);
362        cc2 = decode(bb1, cs2, true, t2);
363        System.out.printf("    Decoding(dir) TimeRatio %s/%s: %d,%d :%f%n",
364                          csn2, csn1,
365                          t2.t, t1.t,
366                          (double)(t2.t)/(t1.t));
367        if (!Arrays.equals(cc1, cc2)) {
368            System.out.printf("        decoding (direct) failed%n");
369        }
370    }
371
372    /* The first byte is the length of malformed bytes
373        byte[][] malformed = {
374            {5, (byte)0xF8, (byte)0x80, (byte)0x80, (byte)0x9F, (byte)0x80, (byte)0xC0 },
375        };
376    */
377
378    static void checkMalformed(Charset cs, byte[][] malformed)
379        throws Exception
380    {
381        boolean failed = false;
382        String csn = cs.name();
383        System.out.printf("Check malformed <%s>...%n", csn);
384        for (boolean direct: new boolean[] {false, true}) {
385            for (byte[] bins : malformed) {
386                int mlen = bins[0];
387                byte[] bin = Arrays.copyOfRange(bins, 1, bins.length);
388                CoderResult cr = decodeCR(bin, cs, direct);
389                String ashex = "";
390                for (int i = 0; i < bin.length; i++) {
391                    if (i > 0) ashex += " ";
392                        ashex += Integer.toString((int)bin[i] & 0xff, 16);
393                }
394                if (!cr.isMalformed()) {
395                    System.out.printf("        FAIL(direct=%b): [%s] not malformed. -->cr=%s\n", direct, ashex, cr.toString());
396                    failed = true;
397                } else if (cr.length() != mlen) {
398                    System.out.printf("        FAIL(direct=%b): [%s] malformed[len=%d].\n", direct, ashex, cr.length());
399                    failed = true;
400                }
401            }
402        }
403        if (failed)
404            throw new RuntimeException("Check malformed failed " + csn);
405    }
406
407    static boolean check(CharsetDecoder dec, byte[] bytes, boolean direct, int[] flow) {
408        int inPos = flow[0];
409        int inLen = flow[1];
410        int outPos = flow[2];
411        int outLen = flow[3];
412        int expedInPos = flow[4];
413        int expedOutPos = flow[5];
414        CoderResult expedCR = (flow[6]==0)?CoderResult.UNDERFLOW
415                                          :CoderResult.OVERFLOW;
416        ByteBuffer bbf;
417        CharBuffer cbf;
418        if (direct) {
419            bbf = ByteBuffer.allocateDirect(inPos + bytes.length);
420            cbf = ByteBuffer.allocateDirect((outPos + outLen)*2).asCharBuffer();
421        } else {
422            bbf = ByteBuffer.allocate(inPos + bytes.length);
423            cbf = CharBuffer.allocate(outPos + outLen);
424        }
425        bbf.position(inPos);
426        bbf.put(bytes).flip().position(inPos).limit(inPos + inLen);
427        cbf.position(outPos);
428        dec.reset();
429        CoderResult cr = dec.decode(bbf, cbf, false);
430        if (cr != expedCR ||
431            bbf.position() != expedInPos ||
432            cbf.position() != expedOutPos) {
433            System.out.printf("Expected(direct=%5b): [", direct);
434            for (int i:flow) System.out.print(" " + i);
435            System.out.println("]  CR=" + cr +
436                               ", inPos=" + bbf.position() +
437                               ", outPos=" + cbf.position());
438            return false;
439        }
440        return true;
441    }
442
443    static void checkUnderOverflow(Charset cs) throws Exception {
444        String csn = cs.name();
445        System.out.printf("Check under/overflow <%s>...%n", csn);
446        CharsetDecoder dec = cs.newDecoder();
447        boolean failed = false;
448
449        //7f, a1a1, 8ea2a1a1, 8ea3a1a1, 8ea7a1a1
450        //0   1 2   3         7         11
451        byte[] bytes = new String("\u007f\u3000\u4e42\u4e28\ud840\udc55").getBytes("EUC_TW");
452        int    inlen = bytes.length;
453
454        int MAXOFF = 20;
455        for (int inoff = 0; inoff < MAXOFF; inoff++) {
456            for (int outoff = 0; outoff < MAXOFF; outoff++) {
457        int[][] Flows = {
458            //inpos, inLen, outPos,  outLen, inPosEP,    outposEP,   under(0)/over(1)
459            //overflow
460            {inoff,  inlen, outoff,  1,      inoff + 1,  outoff + 1, 1},
461            {inoff,  inlen, outoff,  2,      inoff + 3,  outoff + 2, 1},
462            {inoff,  inlen, outoff,  3,      inoff + 7,  outoff + 3, 1},
463            {inoff,  inlen, outoff,  4,      inoff + 11, outoff + 4, 1},
464            {inoff,  inlen, outoff,  5,      inoff + 11, outoff + 4, 1},
465            {inoff,  inlen, outoff,  6,      inoff + 15, outoff + 6, 0},
466            //underflow
467            {inoff,  1,     outoff,  6,      inoff + 1,  outoff + 1, 0},
468            {inoff,  2,     outoff,  6,      inoff + 1,  outoff + 1, 0},
469            {inoff,  3,     outoff,  6,      inoff + 3,  outoff + 2, 0},
470            {inoff,  4,     outoff,  6,      inoff + 3,  outoff + 2, 0},
471            {inoff,  5,     outoff,  6,      inoff + 3,  outoff + 2, 0},
472            {inoff,  8,     outoff,  6,      inoff + 7,  outoff + 3, 0},
473            {inoff,  9,     outoff,  6,      inoff + 7,  outoff + 3, 0},
474            {inoff, 10,     outoff,  6,      inoff + 7,  outoff + 3, 0},
475            {inoff, 11,     outoff,  6,      inoff +11,  outoff + 4, 0},
476            {inoff, 12,     outoff,  6,      inoff +11,  outoff + 4, 0},
477            {inoff, 15,     outoff,  6,      inoff +15,  outoff + 6, 0},
478            // 2-byte under/overflow
479            {inoff,  2,     outoff,  1,      inoff + 1,  outoff + 1, 0},
480            {inoff,  3,     outoff,  1,      inoff + 1,  outoff + 1, 1},
481            {inoff,  3,     outoff,  2,      inoff + 3,  outoff + 2, 0},
482        };
483        for (boolean direct: new boolean[] {false, true}) {
484            for (int[] flow: Flows) {
485                if (!check(dec, bytes, direct, flow))
486                    failed = true;
487            }
488        }}}
489        if (failed)
490            throw new RuntimeException("Check under/overflow failed " + csn);
491    }
492
493    static String[] csnames = new String[] {
494
495        "IBM930",
496        "IBM933",
497        "IBM935",
498        "IBM937",
499        "IBM939",
500        "IBM942",
501        "IBM943",
502        "IBM948",
503        "IBM949",
504        "IBM950",
505        "IBM970",
506        "IBM942C",
507        "IBM943C",
508        "IBM949C",
509        "IBM1381",
510        "IBM1383",
511
512        "EUC_CN",
513        "EUC_KR",
514        "GBK",
515        "Johab",
516        "MS932",
517        "MS936",
518        "MS949",
519        "MS950",
520
521        "EUC_JP",
522        "EUC_JP_LINUX",
523        "EUC_JP_Open",
524        "SJIS",
525        "PCK",
526    };
527
528    public static void main(String[] args) throws Exception {
529        for (String csname: csnames) {
530            System.out.printf("-----------------------------------%n");
531            String oldname = csname + "_OLD";
532            if ("EUC_JP_Open".equals(csname))
533                csname = "eucjp-open";
534            checkInit(csname);
535            Charset csOld = (Charset)Class.forName(oldname).newInstance();
536            Charset csNew = Charset.forName(csname);
537            char[] cc = checkEncoding(csOld, csNew);
538            checkDecoding(csOld, csNew);
539            compare(csNew, csOld, cc);
540
541            if (csname.startsWith("x-IBM93")) {
542                //ecdbic
543                checkMalformed(csNew, new byte[][] {
544                    {1, 0x26, 0x0f, 0x27},         // in SBSC, no SI
545                    {1, 0x0e, 0x41, 0x41, 0xe},    // in DBSC, no SO
546                    {2, 0x0e, 0x40, 0x41, 0xe},    // illegal DB
547                });
548            } else if (csname.equals("x-IBM970") ||
549                       csname.equals("x-IBM1383")) {
550                //euc_simple
551                checkMalformed(csNew, new byte[][] {
552                    {1, 0x26, (byte)0x8f, 0x27},                   // SS2
553                    {1, (byte)0xa1, (byte)0xa1, (byte)0x8e, 0x51}, // SS3
554                });
555            }
556        }
557    }
558}
559