HKSCS.java revision 8845:4be14673b9bf
1/*
2 * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.  Oracle designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Oracle in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 */
25
26package build.tools.charsetmapping;
27
28import java.io.*;
29import java.util.Arrays;
30import java.util.ArrayList;
31import java.util.Scanner;
32import java.util.Formatter;
33import java.util.regex.*;
34import java.nio.charset.*;
35import static build.tools.charsetmapping.Utils.*;
36
37public class HKSCS {
38
39    // HKSCS2001.map has the third column for "UnicodeAlternate", which
40    // is for c->b non-roundtrip mapping.
41    // For HKSCS2008, those non-roundtrip mappings are in .nr file
42    private static Pattern hkscs =
43        Pattern.compile("(?:0x)?+(\\p{XDigit}++)\\s++(?:0x|U\\+)?+(\\p{XDigit}++)?\\s*+(?:0x|U\\+)?(\\p{XDigit}++)?\\s*+.*");
44
45    static void genClass(String args[]) throws Exception {
46
47        // hkscs2008
48        genClass0(new FileInputStream(new File(args[0], "HKSCS2008.map")),
49                  new FileInputStream(new File(args[0], "HKSCS2008.c2b")),
50                  new PrintStream(new File(args[1], "HKSCSMapping.java"),
51                                  "ISO-8859-1"),
52                  "HKSCSMapping",
53                  getCopyright(new File(args[3])));
54
55
56        // xp2001
57        genClass0(new FileInputStream(new File(args[0], "HKSCS_XP.map")),
58                  null,
59                  new PrintStream(new File(args[1], "HKSCS_XPMapping.java"),
60                                  "ISO-8859-1"),
61                  "HKSCS_XPMapping",
62                  getCopyright(new File(args[3])));
63
64        // hkscs2001
65        genClass0(new FileInputStream(new File(args[0], "HKSCS2001.map")),
66                  new FileInputStream(new File(args[0], "HKSCS2001.c2b")),
67                  new PrintStream(new File(args[1], "HKSCS2001Mapping.java"),
68                                  "ISO-8859-1"),
69                  "HKSCS2001Mapping",
70                  getCopyright(new File(args[3])));
71    }
72
73    static void genClass0(InputStream isB2C,
74                          InputStream isC2B,
75                          PrintStream ps,
76                          String clzName,
77                          String copyright)
78        throws Exception
79    {
80        // ranges of byte1 and byte2, something should come from a "config" file
81        int b1Min = 0x87;
82        int b1Max = 0xfe;
83        int b2Min = 0x40;
84        int b2Max = 0xfe;
85
86        try {
87            char[] bmp = new char[0x10000];
88            char[] supp = new char[0x10000];
89
90            boolean[] b2cBmp = new boolean[0x100];
91            boolean[] b2cSupp = new boolean[0x100];
92            // pua should be in range of e000-f8ff. Expand
93            // it to 0xf93b becase the hkscs2001.c2b has
94            // the f920-f93b filled
95            //char[] pua = new char[0xF8FF - 0xE000 + 1];
96            char[] pua = new char[0xF93b - 0xE000 + 1];
97            boolean hasSupp = false;
98            boolean hasPua = false;
99
100            Arrays.fill(bmp, UNMAPPABLE_DECODING);
101            Arrays.fill(supp, UNMAPPABLE_DECODING);
102            Arrays.fill(pua, UNMAPPABLE_DECODING);
103
104            Parser p = new Parser(isB2C, hkscs);
105            Entry  e = null;
106            while ((e = p.next()) != null) {
107                if (e.cp >= 0x10000) {
108                    supp[e.bs] = (char)e.cp;
109                    b2cSupp[e.bs>>8] = true;
110                    hasSupp = true;
111                } else {
112                    bmp[e.bs] = (char)e.cp;
113                    b2cBmp[e.bs>>8] = true;
114                }
115                if (e.cp2 != 0 && e.cp2 >= 0xe000 && e.cp2 <= 0xf8ff) {
116                    hasPua = true;
117                    pua[e.cp2 - 0xE000] = (char)e.bs;
118                }
119            }
120
121            if (isC2B != null) {
122                p = new Parser(isC2B, hkscs);
123                e = null;
124                while ((e = p.next()) != null) {
125                    pua[e.cp - 0xE000] = (char)e.bs;
126                }
127                hasPua = true;
128            }
129
130            StringBuilder sb = new StringBuilder();
131            Output out = new Output(new Formatter(sb));
132
133            out.format(copyright);
134            out.format("%n// -- This file was mechanically generated: Do not edit! -- //%n");
135            out.format("package sun.nio.cs.ext;%n%n");
136            out.format("class %s {%n%n", clzName);
137
138            /* hardcoded in sun.nio.cs.ext.HKSCS.java
139            out.format("    final static int b1Min = 0x%x;%n", b1Min);
140            out.format("    final static int b1Max = 0x%x;%n", b1Max);
141            out.format("    final static int b2Min = 0x%x;%n", b2Min);
142            out.format("    final static int b2Max = 0x%x;%n", b2Max);
143            */
144
145            // bmp tables
146            out.format("%n    static final String[] b2cBmpStr = new String[] {%n");
147            for (int i = 0; i < 0x100; i++) {
148                if (b2cBmp[i])
149                    out.format(bmp, i, b2Min, b2Max, ",");
150                else
151                    out.format("        null,%n");  //unmappable segments
152            }
153            out.format("        };%n");
154
155            // supp tables
156            out.format("%n    static final String[] b2cSuppStr =");
157            if (hasSupp) {
158                out.format(" new String[] {%n");
159                for (int i = 0; i < 0x100; i++) {
160                    if (b2cSupp[i])
161                        out.format(supp, i, b2Min, b2Max, ",");
162                    else
163                        out.format("        null,%n");  //unmappable segments
164                }
165                out.format("        };%n");
166            } else {
167                out.format(" null;%n");
168            }
169
170            // private area tables
171            out.format("%n    final static String pua =");
172            if (hasPua) {
173                out.format("%n");
174                out.format(pua, 0, pua.length, ";");
175            } else {
176                out.format(" null;%n");
177            }
178            out.format("%n");
179            out.format("}");
180
181            out.close();
182
183            ps.println(sb.toString());
184            ps.close();
185
186        } catch (Exception x) {
187            x.printStackTrace();
188        }
189    }
190}
191