1/* 2 * Copyright (c) 2010, 2015, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26package build.tools.charsetmapping; 27 28import java.io.*; 29import java.util.Arrays; 30import java.util.ArrayList; 31import java.util.Scanner; 32import java.util.Formatter; 33import java.util.regex.Pattern; 34import static build.tools.charsetmapping.Utils.*; 35 36public class DBCS { 37 // pattern used by this class to read in mapping table 38 static Pattern mPattern = Pattern.compile("(?:0x)?(\\p{XDigit}++)\\s++(?:0x)?(\\p{XDigit}++)(?:\\s++#.*)?"); 39 40 public static void genClass(String type, Charset cs, 41 String srcDir, String dstDir, String template) 42 throws Exception 43 { 44 String clzName = cs.clzName; 45 String csName = cs.csName; 46 String hisName = cs.hisName; 47 String pkgName = cs.pkgName; 48 boolean isASCII = cs.isASCII; 49 int b1Min = cs.b1Min; 50 int b1Max = cs.b1Max; 51 int b2Min = cs.b2Min; 52 int b2Max = cs.b2Max; 53 54 StringBuilder b2cSB = new StringBuilder(); 55 StringBuilder b2cNRSB = new StringBuilder(); 56 StringBuilder c2bNRSB = new StringBuilder(); 57 58 char[] db = new char[0x10000]; 59 char[] c2bIndex = new char[0x100]; 60 int c2bOff = 0x100; // first 0x100 for unmappable segs 61 62 Arrays.fill(db, UNMAPPABLE_DECODING); 63 Arrays.fill(c2bIndex, UNMAPPABLE_DECODING); 64 65 char[] b2cIndex = new char[0x100]; 66 Arrays.fill(b2cIndex, UNMAPPABLE_DECODING); 67 68 // (1)read in .map to parse all b->c entries 69 FileInputStream in = new FileInputStream(new File(srcDir, clzName + ".map")); 70 Parser p = new Parser(in, mPattern); 71 Entry e = null; 72 while ((e = p.next()) != null) { 73 db[e.bs] = (char)e.cp; 74 75 if (e.bs > 0x100 && // db 76 b2cIndex[e.bs>>8] == UNMAPPABLE_DECODING) { 77 b2cIndex[e.bs>>8] = 1; 78 } 79 80 if (c2bIndex[e.cp>>8] == UNMAPPABLE_DECODING) { 81 c2bOff += 0x100; 82 c2bIndex[e.cp>>8] = 1; 83 } 84 } 85 Output out = new Output(new Formatter(b2cSB)); 86 out.format("%n static final String b2cSBStr =%n"); 87 out.format(db, 0x00, 0x100, ";"); 88 89 out.format("%n static final String[] b2cStr = {%n"); 90 for (int i = 0; i < 0x100; i++) { 91 if (b2cIndex[i] == UNMAPPABLE_DECODING) { 92 out.format(" null,%n"); //unmappable segments 93 } else { 94 out.format(db, i, b2Min, b2Max, ","); 95 } 96 } 97 98 out.format(" };%n"); 99 out.close(); 100 101 // (2)now parse the .nr file which includes "b->c" non-roundtrip entries 102 File f = new File(srcDir, clzName + ".nr"); 103 if (f.exists()) { 104 StringBuilder sb = new StringBuilder(); 105 in = new FileInputStream(f); 106 p = new Parser(in, mPattern); 107 e = null; 108 while ((e = p.next()) != null) { 109 // A <b,c> pair 110 sb.append((char)e.bs); 111 sb.append((char)e.cp); 112 } 113 char[] nr = sb.toString().toCharArray(); 114 out = new Output(new Formatter(b2cNRSB)); 115 out.format("String b2cNR =%n"); 116 out.format(nr, 0, nr.length, ";"); 117 out.close(); 118 } else { 119 b2cNRSB.append("String b2cNR = null;"); 120 } 121 122 // (3)finally the .c2b file which includes c->b non-roundtrip entries 123 f = new File(srcDir, clzName + ".c2b"); 124 if (f.exists()) { 125 StringBuilder sb = new StringBuilder(); 126 in = new FileInputStream(f); 127 p = new Parser(in, mPattern); 128 e = null; 129 while ((e = p.next()) != null) { 130 // A <b,c> pair 131 if (c2bIndex[e.cp>>8] == UNMAPPABLE_DECODING) { 132 c2bOff += 0x100; 133 c2bIndex[e.cp>>8] = 1; 134 } 135 sb.append((char)e.bs); 136 sb.append((char)e.cp); 137 } 138 char[] nr = sb.toString().toCharArray(); 139 out = new Output(new Formatter(c2bNRSB)); 140 out.format("String c2bNR =%n"); 141 out.format(nr, 0, nr.length, ";"); 142 out.close(); 143 } else { 144 c2bNRSB.append("String c2bNR = null;"); 145 } 146 147 // (4)it's time to generate the source file 148 String b2c = b2cSB.toString(); 149 String b2cNR = b2cNRSB.toString(); 150 String c2bNR = c2bNRSB.toString(); 151 152 Scanner s = new Scanner(new File(srcDir, template)); 153 PrintStream ops = new PrintStream(new FileOutputStream( 154 new File(dstDir, clzName + ".java"))); 155 if (hisName == null) 156 hisName = ""; 157 158 // (5) c2b replacement, only used for JIs0208/0212, which 159 // are two pure db charsets so default '3f' does not work 160 // TBD: move this into configuration file 161 String c2bRepl = ""; 162 if (clzName.startsWith("JIS_X_0208")) { 163 c2bRepl = "new byte[]{ (byte)0x21, (byte)0x29 },"; 164 } else if (clzName.startsWith("JIS_X_0212")) { 165 c2bRepl = "new byte[]{ (byte)0x22, (byte)0x44 },"; 166 } else if (clzName.startsWith("IBM300")) { 167 c2bRepl = "new byte[]{ (byte)0x42, (byte)0x6f },"; 168 } 169 170 while (s.hasNextLine()) { 171 String line = s.nextLine(); 172 if (line.indexOf("$") == -1) { 173 ops.println(line); 174 continue; 175 } 176 line = line.replace("$PACKAGE$" , pkgName) 177 .replace("$IMPLEMENTS$", (hisName == null)? 178 "" : "implements HistoricallyNamedCharset") 179 .replace("$NAME_CLZ$", clzName) 180 .replace("$NAME_ALIASES$", 181 "sun.nio.cs".equals(pkgName) ? 182 "StandardCharsets.aliases_" + clzName : 183 "ExtendedCharsets.aliasesFor(\"" + csName + "\")") 184 .replace("$NAME_CS$" , csName) 185 .replace("$CONTAINS$", 186 "MS932".equals(clzName)? 187 "return ((cs.name().equals(\"US-ASCII\")) || (cs instanceof JIS_X_0201) || (cs instanceof " + clzName + "));": 188 (isASCII ? 189 "return ((cs.name().equals(\"US-ASCII\")) || (cs instanceof " + clzName + "));": 190 "return (cs instanceof " + clzName + ");")) 191 .replace("$HISTORICALNAME$", 192 (hisName == null)? "" : 193 " public String historicalName() { return \"" + hisName + "\"; }") 194 .replace("$DECTYPE$", type) 195 .replace("$ENCTYPE$", type) 196 .replace("$B1MIN$" , "0x" + Integer.toString(b1Min, 16)) 197 .replace("$B1MAX$" , "0x" + Integer.toString(b1Max, 16)) 198 .replace("$B2MIN$" , "0x" + Integer.toString(b2Min, 16)) 199 .replace("$B2MAX$" , "0x" + Integer.toString(b2Max, 16)) 200 .replace("$ASCIICOMPATIBLE$", isASCII ? "true" : "false") 201 .replace("$B2C$", b2c) 202 .replace("$C2BLENGTH$", "0x" + Integer.toString(c2bOff, 16)) 203 .replace("$NONROUNDTRIP_B2C$", b2cNR) 204 .replace("$NONROUNDTRIP_C2B$", c2bNR) 205 .replace("$ENC_REPLACEMENT$", c2bRepl); 206 207 ops.println(line); 208 } 209 ops.close(); 210 } 211} 212