1/*
2 * Copyright (c) 2008, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.  Oracle designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Oracle in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 */
25
26package sun.nio.cs;
27
28import java.io.InputStream;
29import java.io.InputStreamReader;
30import java.io.OutputStream;
31import java.io.BufferedReader;
32import java.io.IOException;
33import java.util.regex.Matcher;
34import java.util.regex.Pattern;
35import java.util.*;
36import java.security.*;
37
38public class CharsetMapping {
39    public static final char UNMAPPABLE_DECODING = '\uFFFD';
40    public static final int  UNMAPPABLE_ENCODING = 0xFFFD;
41
42    char[] b2cSB;                //singlebyte b->c
43    char[] b2cDB1;               //dobulebyte b->c /db1
44    char[] b2cDB2;               //dobulebyte b->c /db2
45
46    int    b2Min, b2Max;         //min/max(start/end) value of 2nd byte
47    int    b1MinDB1, b1MaxDB1;   //min/Max(start/end) value of 1st byte/db1
48    int    b1MinDB2, b1MaxDB2;   //min/Max(start/end) value of 1st byte/db2
49    int    dbSegSize;
50
51    char[] c2b;
52    char[] c2bIndex;
53
54    // Supplementary
55    char[] b2cSupp;
56    char[] c2bSupp;
57
58    // Composite
59    Entry[] b2cComp;
60    Entry[] c2bComp;
61
62    public char decodeSingle(int b) {
63        return b2cSB[b];
64    }
65
66    public char decodeDouble(int b1, int b2) {
67        if (b2 >= b2Min && b2 < b2Max) {
68            b2 -= b2Min;
69            if (b1 >= b1MinDB1 && b1 <= b1MaxDB1) {
70                b1 -= b1MinDB1;
71                return b2cDB1[b1 * dbSegSize + b2];
72            }
73            if (b1 >= b1MinDB2 && b1 <= b1MaxDB2) {
74                b1 -= b1MinDB2;
75                return b2cDB2[b1 * dbSegSize + b2];
76            }
77        }
78        return UNMAPPABLE_DECODING;
79    }
80
81    // for jis0213 all supplementary characters are in 0x2xxxx range,
82    // so only the xxxx part is now stored, should actually store the
83    // codepoint value instead.
84    public char[] decodeSurrogate(int db, char[] cc) {
85        int end = b2cSupp.length / 2;
86        int i = Arrays.binarySearch(b2cSupp, 0, end, (char)db);
87        if (i >= 0) {
88            Character.toChars(b2cSupp[end + i] + 0x20000, cc, 0);
89            return cc;
90        }
91        return null;
92    }
93
94    public char[] decodeComposite(Entry comp, char[] cc) {
95        int i = findBytes(b2cComp, comp);
96        if (i >= 0) {
97            cc[0] = (char)b2cComp[i].cp;
98            cc[1] = (char)b2cComp[i].cp2;
99            return cc;
100        }
101        return null;
102    }
103
104    public int encodeChar(char ch) {
105        int index = c2bIndex[ch >> 8];
106        if (index == 0xffff)
107            return UNMAPPABLE_ENCODING;
108        return c2b[index + (ch & 0xff)];
109    }
110
111    public int encodeSurrogate(char hi, char lo) {
112        int cp = Character.toCodePoint(hi, lo);
113        if (cp < 0x20000 || cp >= 0x30000)
114            return UNMAPPABLE_ENCODING;
115        int end = c2bSupp.length / 2;
116        int i = Arrays.binarySearch(c2bSupp, 0, end, (char)cp);
117        if (i >= 0)
118            return c2bSupp[end + i];
119        return UNMAPPABLE_ENCODING;
120    }
121
122    public boolean isCompositeBase(Entry comp) {
123        if (comp.cp <= 0x31f7 && comp.cp >= 0xe6) {
124            return (findCP(c2bComp, comp) >= 0);
125        }
126        return false;
127    }
128
129    public int encodeComposite(Entry comp) {
130        int i = findComp(c2bComp, comp);
131        if (i >= 0)
132            return c2bComp[i].bs;
133        return UNMAPPABLE_ENCODING;
134    }
135
136    // init the CharsetMapping object from the .dat binary file
137    public static CharsetMapping get(final InputStream is) {
138        return AccessController.doPrivileged(new PrivilegedAction<>() {
139            public CharsetMapping run() {
140                return new CharsetMapping().load(is);
141            }
142        });
143    }
144
145    public static class Entry {
146        public int bs;   //byte sequence reps
147        public int cp;   //Unicode codepoint
148        public int cp2;  //CC of composite
149    }
150
151    static Comparator<Entry> comparatorBytes =
152        new Comparator<Entry>() {
153            public int compare(Entry m1, Entry m2) {
154                return m1.bs - m2.bs;
155            }
156            public boolean equals(Object obj) {
157                return this == obj;
158            }
159    };
160
161    static Comparator<Entry> comparatorCP =
162        new Comparator<Entry>() {
163            public int compare(Entry m1, Entry m2) {
164                return m1.cp - m2.cp;
165            }
166            public boolean equals(Object obj) {
167                return this == obj;
168            }
169    };
170
171    static Comparator<Entry> comparatorComp =
172        new Comparator<Entry>() {
173            public int compare(Entry m1, Entry m2) {
174                 int v = m1.cp - m2.cp;
175                 if (v == 0)
176                   v = m1.cp2 - m2.cp2;
177                 return v;
178            }
179            public boolean equals(Object obj) {
180                return this == obj;
181            }
182    };
183
184    static int findBytes(Entry[] a, Entry k) {
185        return Arrays.binarySearch(a, 0, a.length, k, comparatorBytes);
186    }
187
188    static int findCP(Entry[] a, Entry k) {
189        return Arrays.binarySearch(a, 0, a.length, k, comparatorCP);
190    }
191
192    static int findComp(Entry[] a, Entry k) {
193        return Arrays.binarySearch(a, 0, a.length, k, comparatorComp);
194    }
195
196    /*****************************************************************************/
197    // tags of different charset mapping tables
198    private static final int MAP_SINGLEBYTE      = 0x1; // 0..256  : c
199    private static final int MAP_DOUBLEBYTE1     = 0x2; // min..max: c
200    private static final int MAP_DOUBLEBYTE2     = 0x3; // min..max: c [DB2]
201    private static final int MAP_SUPPLEMENT      = 0x5; //           db,c
202    private static final int MAP_SUPPLEMENT_C2B  = 0x6; //           c,db
203    private static final int MAP_COMPOSITE       = 0x7; //           db,base,cc
204    private static final int MAP_INDEXC2B        = 0x8; // index table of c->bb
205
206    private static final boolean readNBytes(InputStream in, byte[] bb, int N)
207        throws IOException
208    {
209        int off = 0;
210        while (N > 0) {
211            int n = in.read(bb, off, N);
212            if (n == -1)
213                return false;
214            N = N - n;
215            off += n;
216        }
217        return true;
218    }
219
220    int off = 0;
221    byte[] bb;
222    private char[] readCharArray() {
223        // first 2 bytes are the number of "chars" stored in this table
224        int size  = ((bb[off++]&0xff)<<8) | (bb[off++]&0xff);
225        char [] cc = new char[size];
226        for (int i = 0; i < size; i++) {
227            cc[i] = (char)(((bb[off++]&0xff)<<8) | (bb[off++]&0xff));
228        }
229        return cc;
230    }
231
232    void readSINGLEBYTE() {
233        char[] map = readCharArray();
234        for (int i = 0; i < map.length; i++) {
235            char c = map[i];
236            if (c != UNMAPPABLE_DECODING) {
237                c2b[c2bIndex[c >> 8] + (c&0xff)] = (char)i;
238            }
239        }
240        b2cSB = map;
241    }
242
243    void readINDEXC2B() {
244        char[] map = readCharArray();
245        for (int i = map.length - 1; i >= 0; i--) {
246            if (c2b == null && map[i] != -1) {
247                c2b = new char[map[i] + 256];
248                Arrays.fill(c2b, (char)UNMAPPABLE_ENCODING);
249                break;
250            }
251        }
252        c2bIndex = map;
253    }
254
255    char[] readDB(int b1Min, int b2Min, int segSize) {
256        char[] map = readCharArray();
257        for (int i = 0; i < map.length; i++) {
258            char c = map[i];
259            if (c != UNMAPPABLE_DECODING) {
260                int b1 = i / segSize;
261                int b2 = i % segSize;
262                int b = (b1 + b1Min)* 256 + (b2 + b2Min);
263                //System.out.printf("    DB %x\t%x%n", b, c & 0xffff);
264                c2b[c2bIndex[c >> 8] + (c&0xff)] = (char)(b);
265            }
266        }
267        return map;
268    }
269
270    void readDOUBLEBYTE1() {
271        b1MinDB1 = ((bb[off++]&0xff)<<8) | (bb[off++]&0xff);
272        b1MaxDB1 = ((bb[off++]&0xff)<<8) | (bb[off++]&0xff);
273        b2Min =    ((bb[off++]&0xff)<<8) | (bb[off++]&0xff);
274        b2Max =    ((bb[off++]&0xff)<<8) | (bb[off++]&0xff);
275        dbSegSize = b2Max - b2Min + 1;
276        b2cDB1 = readDB(b1MinDB1, b2Min, dbSegSize);
277    }
278
279    void readDOUBLEBYTE2() {
280        b1MinDB2 = ((bb[off++]&0xff)<<8) | (bb[off++]&0xff);
281        b1MaxDB2 = ((bb[off++]&0xff)<<8) | (bb[off++]&0xff);
282        b2Min =    ((bb[off++]&0xff)<<8) | (bb[off++]&0xff);
283        b2Max =    ((bb[off++]&0xff)<<8) | (bb[off++]&0xff);
284        dbSegSize = b2Max - b2Min + 1;
285        b2cDB2 = readDB(b1MinDB2, b2Min, dbSegSize);
286    }
287
288    void readCOMPOSITE() {
289        char[] map = readCharArray();
290        int mLen = map.length/3;
291        b2cComp = new Entry[mLen];
292        c2bComp = new Entry[mLen];
293        for (int i = 0, j= 0; i < mLen; i++) {
294            Entry m = new Entry();
295            m.bs = map[j++];
296            m.cp = map[j++];
297            m.cp2 = map[j++];
298            b2cComp[i] = m;
299            c2bComp[i] = m;
300        }
301        Arrays.sort(c2bComp, 0, c2bComp.length, comparatorComp);
302    }
303
304    CharsetMapping load(InputStream in) {
305        try {
306            // The first 4 bytes are the size of the total data followed in
307            // this .dat file.
308            int len = ((in.read()&0xff) << 24) | ((in.read()&0xff) << 16) |
309                      ((in.read()&0xff) << 8) | (in.read()&0xff);
310            bb = new byte[len];
311            off = 0;
312            //System.out.printf("In : Total=%d%n", len);
313            // Read in all bytes
314            if (!readNBytes(in, bb, len))
315                throw new RuntimeException("Corrupted data file");
316            in.close();
317
318            while (off < len) {
319                int type = ((bb[off++]&0xff)<<8) | (bb[off++]&0xff);
320                switch(type) {
321                case MAP_INDEXC2B:
322                    readINDEXC2B();
323                    break;
324                case MAP_SINGLEBYTE:
325                    readSINGLEBYTE();
326                    break;
327                case MAP_DOUBLEBYTE1:
328                    readDOUBLEBYTE1();
329                    break;
330                case MAP_DOUBLEBYTE2:
331                    readDOUBLEBYTE2();
332                    break;
333                case MAP_SUPPLEMENT:
334                    b2cSupp = readCharArray();
335                    break;
336                case MAP_SUPPLEMENT_C2B:
337                    c2bSupp = readCharArray();
338                    break;
339                case MAP_COMPOSITE:
340                    readCOMPOSITE();
341                    break;
342                default:
343                    throw new RuntimeException("Corrupted data file");
344                }
345            }
346            bb = null;
347            return this;
348        } catch (IOException x) {
349            x.printStackTrace();
350            return null;
351        }
352    }
353}
354