1/* 2 * Copyright (c) 2008, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26package sun.nio.cs; 27 28import java.io.InputStream; 29import java.io.InputStreamReader; 30import java.io.OutputStream; 31import java.io.BufferedReader; 32import java.io.IOException; 33import java.util.regex.Matcher; 34import java.util.regex.Pattern; 35import java.util.*; 36import java.security.*; 37 38public class CharsetMapping { 39 public static final char UNMAPPABLE_DECODING = '\uFFFD'; 40 public static final int UNMAPPABLE_ENCODING = 0xFFFD; 41 42 char[] b2cSB; //singlebyte b->c 43 char[] b2cDB1; //dobulebyte b->c /db1 44 char[] b2cDB2; //dobulebyte b->c /db2 45 46 int b2Min, b2Max; //min/max(start/end) value of 2nd byte 47 int b1MinDB1, b1MaxDB1; //min/Max(start/end) value of 1st byte/db1 48 int b1MinDB2, b1MaxDB2; //min/Max(start/end) value of 1st byte/db2 49 int dbSegSize; 50 51 char[] c2b; 52 char[] c2bIndex; 53 54 // Supplementary 55 char[] b2cSupp; 56 char[] c2bSupp; 57 58 // Composite 59 Entry[] b2cComp; 60 Entry[] c2bComp; 61 62 public char decodeSingle(int b) { 63 return b2cSB[b]; 64 } 65 66 public char decodeDouble(int b1, int b2) { 67 if (b2 >= b2Min && b2 < b2Max) { 68 b2 -= b2Min; 69 if (b1 >= b1MinDB1 && b1 <= b1MaxDB1) { 70 b1 -= b1MinDB1; 71 return b2cDB1[b1 * dbSegSize + b2]; 72 } 73 if (b1 >= b1MinDB2 && b1 <= b1MaxDB2) { 74 b1 -= b1MinDB2; 75 return b2cDB2[b1 * dbSegSize + b2]; 76 } 77 } 78 return UNMAPPABLE_DECODING; 79 } 80 81 // for jis0213 all supplementary characters are in 0x2xxxx range, 82 // so only the xxxx part is now stored, should actually store the 83 // codepoint value instead. 84 public char[] decodeSurrogate(int db, char[] cc) { 85 int end = b2cSupp.length / 2; 86 int i = Arrays.binarySearch(b2cSupp, 0, end, (char)db); 87 if (i >= 0) { 88 Character.toChars(b2cSupp[end + i] + 0x20000, cc, 0); 89 return cc; 90 } 91 return null; 92 } 93 94 public char[] decodeComposite(Entry comp, char[] cc) { 95 int i = findBytes(b2cComp, comp); 96 if (i >= 0) { 97 cc[0] = (char)b2cComp[i].cp; 98 cc[1] = (char)b2cComp[i].cp2; 99 return cc; 100 } 101 return null; 102 } 103 104 public int encodeChar(char ch) { 105 int index = c2bIndex[ch >> 8]; 106 if (index == 0xffff) 107 return UNMAPPABLE_ENCODING; 108 return c2b[index + (ch & 0xff)]; 109 } 110 111 public int encodeSurrogate(char hi, char lo) { 112 int cp = Character.toCodePoint(hi, lo); 113 if (cp < 0x20000 || cp >= 0x30000) 114 return UNMAPPABLE_ENCODING; 115 int end = c2bSupp.length / 2; 116 int i = Arrays.binarySearch(c2bSupp, 0, end, (char)cp); 117 if (i >= 0) 118 return c2bSupp[end + i]; 119 return UNMAPPABLE_ENCODING; 120 } 121 122 public boolean isCompositeBase(Entry comp) { 123 if (comp.cp <= 0x31f7 && comp.cp >= 0xe6) { 124 return (findCP(c2bComp, comp) >= 0); 125 } 126 return false; 127 } 128 129 public int encodeComposite(Entry comp) { 130 int i = findComp(c2bComp, comp); 131 if (i >= 0) 132 return c2bComp[i].bs; 133 return UNMAPPABLE_ENCODING; 134 } 135 136 // init the CharsetMapping object from the .dat binary file 137 public static CharsetMapping get(final InputStream is) { 138 return AccessController.doPrivileged(new PrivilegedAction<>() { 139 public CharsetMapping run() { 140 return new CharsetMapping().load(is); 141 } 142 }); 143 } 144 145 public static class Entry { 146 public int bs; //byte sequence reps 147 public int cp; //Unicode codepoint 148 public int cp2; //CC of composite 149 } 150 151 static Comparator<Entry> comparatorBytes = 152 new Comparator<Entry>() { 153 public int compare(Entry m1, Entry m2) { 154 return m1.bs - m2.bs; 155 } 156 public boolean equals(Object obj) { 157 return this == obj; 158 } 159 }; 160 161 static Comparator<Entry> comparatorCP = 162 new Comparator<Entry>() { 163 public int compare(Entry m1, Entry m2) { 164 return m1.cp - m2.cp; 165 } 166 public boolean equals(Object obj) { 167 return this == obj; 168 } 169 }; 170 171 static Comparator<Entry> comparatorComp = 172 new Comparator<Entry>() { 173 public int compare(Entry m1, Entry m2) { 174 int v = m1.cp - m2.cp; 175 if (v == 0) 176 v = m1.cp2 - m2.cp2; 177 return v; 178 } 179 public boolean equals(Object obj) { 180 return this == obj; 181 } 182 }; 183 184 static int findBytes(Entry[] a, Entry k) { 185 return Arrays.binarySearch(a, 0, a.length, k, comparatorBytes); 186 } 187 188 static int findCP(Entry[] a, Entry k) { 189 return Arrays.binarySearch(a, 0, a.length, k, comparatorCP); 190 } 191 192 static int findComp(Entry[] a, Entry k) { 193 return Arrays.binarySearch(a, 0, a.length, k, comparatorComp); 194 } 195 196 /*****************************************************************************/ 197 // tags of different charset mapping tables 198 private static final int MAP_SINGLEBYTE = 0x1; // 0..256 : c 199 private static final int MAP_DOUBLEBYTE1 = 0x2; // min..max: c 200 private static final int MAP_DOUBLEBYTE2 = 0x3; // min..max: c [DB2] 201 private static final int MAP_SUPPLEMENT = 0x5; // db,c 202 private static final int MAP_SUPPLEMENT_C2B = 0x6; // c,db 203 private static final int MAP_COMPOSITE = 0x7; // db,base,cc 204 private static final int MAP_INDEXC2B = 0x8; // index table of c->bb 205 206 private static final boolean readNBytes(InputStream in, byte[] bb, int N) 207 throws IOException 208 { 209 int off = 0; 210 while (N > 0) { 211 int n = in.read(bb, off, N); 212 if (n == -1) 213 return false; 214 N = N - n; 215 off += n; 216 } 217 return true; 218 } 219 220 int off = 0; 221 byte[] bb; 222 private char[] readCharArray() { 223 // first 2 bytes are the number of "chars" stored in this table 224 int size = ((bb[off++]&0xff)<<8) | (bb[off++]&0xff); 225 char [] cc = new char[size]; 226 for (int i = 0; i < size; i++) { 227 cc[i] = (char)(((bb[off++]&0xff)<<8) | (bb[off++]&0xff)); 228 } 229 return cc; 230 } 231 232 void readSINGLEBYTE() { 233 char[] map = readCharArray(); 234 for (int i = 0; i < map.length; i++) { 235 char c = map[i]; 236 if (c != UNMAPPABLE_DECODING) { 237 c2b[c2bIndex[c >> 8] + (c&0xff)] = (char)i; 238 } 239 } 240 b2cSB = map; 241 } 242 243 void readINDEXC2B() { 244 char[] map = readCharArray(); 245 for (int i = map.length - 1; i >= 0; i--) { 246 if (c2b == null && map[i] != -1) { 247 c2b = new char[map[i] + 256]; 248 Arrays.fill(c2b, (char)UNMAPPABLE_ENCODING); 249 break; 250 } 251 } 252 c2bIndex = map; 253 } 254 255 char[] readDB(int b1Min, int b2Min, int segSize) { 256 char[] map = readCharArray(); 257 for (int i = 0; i < map.length; i++) { 258 char c = map[i]; 259 if (c != UNMAPPABLE_DECODING) { 260 int b1 = i / segSize; 261 int b2 = i % segSize; 262 int b = (b1 + b1Min)* 256 + (b2 + b2Min); 263 //System.out.printf(" DB %x\t%x%n", b, c & 0xffff); 264 c2b[c2bIndex[c >> 8] + (c&0xff)] = (char)(b); 265 } 266 } 267 return map; 268 } 269 270 void readDOUBLEBYTE1() { 271 b1MinDB1 = ((bb[off++]&0xff)<<8) | (bb[off++]&0xff); 272 b1MaxDB1 = ((bb[off++]&0xff)<<8) | (bb[off++]&0xff); 273 b2Min = ((bb[off++]&0xff)<<8) | (bb[off++]&0xff); 274 b2Max = ((bb[off++]&0xff)<<8) | (bb[off++]&0xff); 275 dbSegSize = b2Max - b2Min + 1; 276 b2cDB1 = readDB(b1MinDB1, b2Min, dbSegSize); 277 } 278 279 void readDOUBLEBYTE2() { 280 b1MinDB2 = ((bb[off++]&0xff)<<8) | (bb[off++]&0xff); 281 b1MaxDB2 = ((bb[off++]&0xff)<<8) | (bb[off++]&0xff); 282 b2Min = ((bb[off++]&0xff)<<8) | (bb[off++]&0xff); 283 b2Max = ((bb[off++]&0xff)<<8) | (bb[off++]&0xff); 284 dbSegSize = b2Max - b2Min + 1; 285 b2cDB2 = readDB(b1MinDB2, b2Min, dbSegSize); 286 } 287 288 void readCOMPOSITE() { 289 char[] map = readCharArray(); 290 int mLen = map.length/3; 291 b2cComp = new Entry[mLen]; 292 c2bComp = new Entry[mLen]; 293 for (int i = 0, j= 0; i < mLen; i++) { 294 Entry m = new Entry(); 295 m.bs = map[j++]; 296 m.cp = map[j++]; 297 m.cp2 = map[j++]; 298 b2cComp[i] = m; 299 c2bComp[i] = m; 300 } 301 Arrays.sort(c2bComp, 0, c2bComp.length, comparatorComp); 302 } 303 304 CharsetMapping load(InputStream in) { 305 try { 306 // The first 4 bytes are the size of the total data followed in 307 // this .dat file. 308 int len = ((in.read()&0xff) << 24) | ((in.read()&0xff) << 16) | 309 ((in.read()&0xff) << 8) | (in.read()&0xff); 310 bb = new byte[len]; 311 off = 0; 312 //System.out.printf("In : Total=%d%n", len); 313 // Read in all bytes 314 if (!readNBytes(in, bb, len)) 315 throw new RuntimeException("Corrupted data file"); 316 in.close(); 317 318 while (off < len) { 319 int type = ((bb[off++]&0xff)<<8) | (bb[off++]&0xff); 320 switch(type) { 321 case MAP_INDEXC2B: 322 readINDEXC2B(); 323 break; 324 case MAP_SINGLEBYTE: 325 readSINGLEBYTE(); 326 break; 327 case MAP_DOUBLEBYTE1: 328 readDOUBLEBYTE1(); 329 break; 330 case MAP_DOUBLEBYTE2: 331 readDOUBLEBYTE2(); 332 break; 333 case MAP_SUPPLEMENT: 334 b2cSupp = readCharArray(); 335 break; 336 case MAP_SUPPLEMENT_C2B: 337 c2bSupp = readCharArray(); 338 break; 339 case MAP_COMPOSITE: 340 readCOMPOSITE(); 341 break; 342 default: 343 throw new RuntimeException("Corrupted data file"); 344 } 345 } 346 bb = null; 347 return this; 348 } catch (IOException x) { 349 x.printStackTrace(); 350 return null; 351 } 352 } 353} 354