SBCS.java revision 9330:8b1f1c2a400f
1/*
2 * Copyright (c) 2008, 2013, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.  Oracle designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Oracle in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 */
25
26package build.tools.charsetmapping;
27
28import java.io.*;
29import java.util.Arrays;
30import java.util.ArrayList;
31import java.util.Scanner;
32import java.util.Formatter;
33import java.util.regex.*;
34import java.nio.charset.*;
35import static build.tools.charsetmapping.Utils.*;
36
37public class SBCS {
38
39    public static void genClass(String args[]) throws Exception {
40
41        Scanner s = new Scanner(new File(args[0], args[2]));
42        while (s.hasNextLine()) {
43            String line = s.nextLine();
44            if (line.startsWith("#") || line.length() == 0)
45                continue;
46            String[] fields = line.split("\\s+");
47            if (fields.length < 5) {
48                System.err.println("Misconfiged sbcs line <" + line + ">?");
49                continue;
50            }
51            String clzName = fields[0];
52            String csName  = fields[1];
53            String hisName = fields[2];
54            boolean isASCII = Boolean.valueOf(fields[3]);
55            String pkgName  = fields[4];
56            System.out.printf("%s,%s,%s,%b,%s%n", clzName, csName, hisName, isASCII, pkgName);
57
58            genClass0(args[0], args[1], "SingleByte-X.java.template",
59                      clzName, csName, hisName, pkgName, isASCII);
60        }
61    }
62
63    private static void toString(char[] sb, int off, int end,
64                                 Formatter out, String closure,
65                                 boolean comment) {
66        while (off < end) {
67            out.format("        \"");
68            for (int j = 0; j < 8; j++) {
69                if (off == end)
70                    break;
71                char c = sb[off++];
72                switch (c) {
73                case '\b':
74                    out.format("\\b"); break;
75                case '\t':
76                    out.format("\\t"); break;
77                case '\n':
78                    out.format("\\n"); break;
79                case '\f':
80                    out.format("\\f"); break;
81                case '\r':
82                    out.format("\\r"); break;
83                case '\"':
84                    out.format("\\\""); break;
85                case '\'':
86                    out.format("\\'"); break;
87                case '\\':
88                    out.format("\\\\"); break;
89                default:
90                    out.format("\\u%04X", c & 0xffff);
91                }
92            }
93            if (comment) {
94                if (off == end)
95                    out.format("\" %s      // 0x%02x - 0x%02x%n",
96                               closure, off-8, off-1);
97                else
98                    out.format("\" +      // 0x%02x - 0x%02x%n",
99                               off-8, off-1);
100            } else {
101                if (off == end)
102                    out.format("\"%s%n", closure);
103                else
104                    out.format("\" +%n");
105            }
106        }
107    }
108
109    static Pattern sbmap = Pattern.compile("0x(\\p{XDigit}++)\\s++(?:U\\+|0x)?(\\p{XDigit}++)(?:\\s++#.*)?");
110
111    private static void genClass0(String srcDir, String dstDir,
112                                  String template,
113                                  String clzName,
114                                  String csName,
115                                  String hisName,
116                                  String pkgName,
117                                  boolean isASCII)
118        throws Exception
119    {
120        StringBuilder b2cSB = new StringBuilder();
121        StringBuilder b2cNRSB = new StringBuilder();
122        StringBuilder c2bNRSB = new StringBuilder();
123
124        char[] sb = new char[0x100];
125        char[] c2bIndex = new char[0x100];
126        int    c2bOff = 0;
127        Arrays.fill(sb, UNMAPPABLE_DECODING);
128        Arrays.fill(c2bIndex, UNMAPPABLE_DECODING);
129
130        // (1)read in .map to parse all b->c entries
131        FileInputStream in = new FileInputStream(
132                                 new File(srcDir, clzName + ".map"));
133        Parser p = new Parser(in, sbmap);
134        Entry  e = null;
135
136        while ((e = p.next()) != null) {
137            sb[e.bs] = (char)e.cp;
138            if (c2bIndex[e.cp>>8] == UNMAPPABLE_DECODING) {
139                c2bOff += 0x100;
140                c2bIndex[e.cp>>8] = 1;
141            }
142        }
143
144        Formatter fm = new Formatter(b2cSB);
145        fm.format("%n");
146
147        // vm -server shows cc[byte + 128] access is much faster than
148        // cc[byte&0xff] so we output the upper segment first
149        toString(sb, 0x80, 0x100, fm, "+", true);
150        toString(sb, 0x00, 0x80,  fm, ";", true);
151        fm.close();
152
153        // (2)now the .nr file which includes "b->c" non-roundtrip entries
154        File f = new File(srcDir, clzName + ".nr");
155        if (f.exists()) {
156            in = new FileInputStream(f);
157            fm = new Formatter(b2cNRSB);
158            p = new Parser(in, sbmap);
159            e = null;
160
161            fm.format("// remove non-roundtrip entries%n");
162            fm.format("        b2cMap = b2cTable.toCharArray();%n");
163            while ((e = p.next()) != null) {
164                fm.format("        b2cMap[%d] = UNMAPPABLE_DECODING;%n",
165                          (e.bs>=0x80)?(e.bs-0x80):(e.bs+0x80));
166            }
167            fm.close();
168        }
169
170        // (3)finally the .c2b file which includes c->b non-roundtrip entries
171        f = new File(srcDir, clzName + ".c2b");
172        if (f.exists()) {
173            in = new FileInputStream(f);
174            fm = new Formatter(c2bNRSB);
175            p = new Parser(in, sbmap);
176            e = null;
177            ArrayList<Entry> es = new ArrayList<Entry>();
178            while ((e = p.next()) != null) {
179                if (c2bIndex[e.cp>>8] == UNMAPPABLE_DECODING) {
180                    c2bOff += 0x100;
181                    c2bIndex[e.cp>>8] = 1;
182                }
183                es.add(e);
184            }
185            fm.format("// non-roundtrip c2b only entries%n");
186            if (es.size() < 100) {
187                fm.format("        c2bNR = new char[%d];%n", es.size() * 2);
188                int i = 0;
189                for (Entry entry: es) {
190                    fm.format("        c2bNR[%d] = 0x%x; c2bNR[%d] = 0x%x;%n",
191                              i++, entry.bs, i++, entry.cp);
192                }
193            } else {
194                char[] cc = new char[es.size() * 2];
195                int i = 0;
196                for (Entry entry: es) {
197                    cc[i++] = (char)entry.bs;
198                    cc[i++] = (char)entry.cp;
199                }
200                fm.format("        c2bNR = (%n");
201                toString(cc, 0, i,  fm, ").toCharArray();", false);
202            }
203            fm.close();
204        }
205
206        // (4)it's time to generate the source file
207        String b2c = b2cSB.toString();
208        String b2cNR = b2cNRSB.toString();
209        String c2bNR = c2bNRSB.toString();
210
211        Scanner s = new Scanner(new File(srcDir, template));
212        PrintStream out = new PrintStream(new FileOutputStream(
213                              new File(dstDir, clzName + ".java")));
214
215        while (s.hasNextLine()) {
216            String line = s.nextLine();
217            int i = line.indexOf("$");
218            if (i == -1) {
219                out.println(line);
220                continue;
221            }
222            if (line.indexOf("$PACKAGE$", i) != -1) {
223                line = line.replace("$PACKAGE$", pkgName);
224            }
225            if (line.indexOf("$NAME_CLZ$", i) != -1) {
226                line = line.replace("$NAME_CLZ$", clzName);
227            }
228            if (line.indexOf("$NAME_CS$", i) != -1) {
229                line = line.replace("$NAME_CS$", csName);
230            }
231            if (line.indexOf("$NAME_ALIASES$", i) != -1) {
232                if ("sun.nio.cs".equals(pkgName))
233                    line = line.replace("$NAME_ALIASES$",
234                                        "StandardCharsets.aliases_" + clzName);
235                else
236                    line = line.replace("$NAME_ALIASES$",
237                                        "ExtendedCharsets.aliasesFor(\"" + csName + "\")");
238            }
239            if (line.indexOf("$NAME_HIS$", i) != -1) {
240                line = line.replace("$NAME_HIS$", hisName);
241            }
242            if (line.indexOf("$CONTAINS$", i) != -1) {
243                if (isASCII)
244                    line = "        return ((cs.name().equals(\"US-ASCII\")) || (cs instanceof " + clzName + "));";
245                else
246                    line = "        return (cs instanceof " + clzName + ");";
247            }
248            if (line.indexOf("$B2CTABLE$") != -1) {
249                line = line.replace("$B2CTABLE$", b2c);
250            }
251            if (line.indexOf("$C2BLENGTH$") != -1) {
252                line = line.replace("$C2BLENGTH$", "0x" + Integer.toString(c2bOff, 16));
253            }
254            if (line.indexOf("$NONROUNDTRIP_B2C$") != -1) {
255                if (b2cNR.length() == 0)
256                    continue;
257                line = line.replace("$NONROUNDTRIP_B2C$", b2cNR);
258            }
259
260            if (line.indexOf("$NONROUNDTRIP_C2B$") != -1) {
261                if (c2bNR.length() == 0)
262                    continue;
263                line = line.replace("$NONROUNDTRIP_C2B$", c2bNR);
264            }
265            out.println(line);
266        }
267        out.close();
268    }
269}
270