1/*
2 * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.  Oracle designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Oracle in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 */
25
26package sun.font;
27
28import java.nio.ByteBuffer;
29import java.nio.CharBuffer;
30import java.nio.IntBuffer;
31import java.util.Locale;
32import java.nio.charset.*;
33
34/*
35 * A tt font has a CMAP table which is in turn made up of sub-tables which
36 * describe the char to glyph mapping in (possibly) multiple ways.
37 * CMAP subtables are described by 3 values.
38 * 1. Platform ID (eg 3=Microsoft, which is the id we look for in JDK)
39 * 2. Encoding (eg 0=symbol, 1=unicode)
40 * 3. TrueType subtable format (how the char->glyph mapping for the encoding
41 * is stored in the subtable). See the TrueType spec. Format 4 is required
42 * by MS in fonts for windows. Its uses segmented mapping to delta values.
43 * Most typically we see are (3,1,4) :
44 * CMAP Platform ID=3 is what we use.
45 * Encodings that are used in practice by JDK on Solaris are
46 *  symbol (3,0)
47 *  unicode (3,1)
48 *  GBK (3,5) (note that solaris zh fonts report 3,4 but are really 3,5)
49 * The format for almost all subtables is 4. However the solaris (3,5)
50 * encodings are typically in format 2.
51 */
52abstract class CMap {
53
54//     static char WingDings_b2c[] = {
55//         0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
56//         0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
57//         0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
58//         0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
59//         0xfffd, 0xfffd, 0x2702, 0x2701, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
60//         0xfffd, 0x2706, 0x2709, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
61//         0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
62//         0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0x2707, 0x270d,
63//         0xfffd, 0x270c, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
64//         0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
65//         0xfffd, 0x2708, 0xfffd, 0xfffd, 0x2744, 0xfffd, 0x271e, 0xfffd,
66//         0x2720, 0x2721, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
67//         0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
68//         0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
69//         0xfffd, 0x2751, 0x2752, 0xfffd, 0xfffd, 0x2756, 0xfffd, 0xfffd,
70//         0xfffd, 0xfffd, 0xfffd, 0x2740, 0x273f, 0x275d, 0x275e, 0xfffd,
71//         0xfffd, 0x2780, 0x2781, 0x2782, 0x2783, 0x2784, 0x2785, 0x2786,
72//         0x2787, 0x2788, 0x2789, 0xfffd, 0x278a, 0x278b, 0x278c, 0x278d,
73//         0x278e, 0x278f, 0x2790, 0x2791, 0x2792, 0x2793, 0xfffd, 0xfffd,
74//         0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
75//         0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0x274d, 0xfffd,
76//         0xfffd, 0xfffd, 0xfffd, 0xfffd, 0x2736, 0x2734, 0xfffd, 0x2735,
77//         0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0x272a, 0x2730, 0xfffd,
78//         0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
79//         0xfffd, 0xfffd, 0xfffd, 0xfffd, 0x27a5, 0xfffd, 0x27a6, 0xfffd,
80//         0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
81//         0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
82//         0x27a2, 0xfffd, 0xfffd, 0xfffd, 0x27b3, 0xfffd, 0xfffd, 0xfffd,
83//         0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
84//         0x27a1, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
85//         0x27a9, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
86//         0xfffd, 0xfffd, 0xfffd, 0x2717, 0x2713, 0xfffd, 0xfffd, 0xfffd,
87//    };
88
89//     static char Symbols_b2c[] = {
90//         0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
91//         0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
92//         0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
93//         0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
94//         0xfffd, 0xfffd, 0x2200, 0xfffd, 0x2203, 0xfffd, 0xfffd, 0x220d,
95//         0xfffd, 0xfffd, 0x2217, 0xfffd, 0xfffd, 0x2212, 0xfffd, 0xfffd,
96//         0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
97//         0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
98//         0x2245, 0x0391, 0x0392, 0x03a7, 0x0394, 0x0395, 0x03a6, 0x0393,
99//         0x0397, 0x0399, 0x03d1, 0x039a, 0x039b, 0x039c, 0x039d, 0x039f,
100//         0x03a0, 0x0398, 0x03a1, 0x03a3, 0x03a4, 0x03a5, 0x03c2, 0x03a9,
101//         0x039e, 0x03a8, 0x0396, 0xfffd, 0x2234, 0xfffd, 0x22a5, 0xfffd,
102//         0xfffd, 0x03b1, 0x03b2, 0x03c7, 0x03b4, 0x03b5, 0x03c6, 0x03b3,
103//         0x03b7, 0x03b9, 0x03d5, 0x03ba, 0x03bb, 0x03bc, 0x03bd, 0x03bf,
104//         0x03c0, 0x03b8, 0x03c1, 0x03c3, 0x03c4, 0x03c5, 0x03d6, 0x03c9,
105//         0x03be, 0x03c8, 0x03b6, 0xfffd, 0xfffd, 0xfffd, 0x223c, 0xfffd,
106//         0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
107//         0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
108//         0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
109//         0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
110//         0xfffd, 0x03d2, 0xfffd, 0x2264, 0x2215, 0x221e, 0xfffd, 0xfffd,
111//         0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
112//         0x2218, 0xfffd, 0xfffd, 0x2265, 0xfffd, 0x221d, 0xfffd, 0x2219,
113//         0xfffd, 0x2260, 0x2261, 0x2248, 0x22ef, 0x2223, 0xfffd, 0xfffd,
114//         0xfffd, 0xfffd, 0xfffd, 0xfffd, 0x2297, 0x2295, 0x2205, 0x2229,
115//         0x222a, 0x2283, 0x2287, 0x2284, 0x2282, 0x2286, 0x2208, 0x2209,
116//         0xfffd, 0x2207, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0x221a, 0x22c5,
117//         0xfffd, 0x2227, 0x2228, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
118//         0x22c4, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0x2211, 0xfffd, 0xfffd,
119//         0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
120//         0xfffd, 0xfffd, 0x222b, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
121//         0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
122//     };
123
124    static final short ShiftJISEncoding = 2;
125    static final short GBKEncoding      = 3;
126    static final short Big5Encoding     = 4;
127    static final short WansungEncoding  = 5;
128    static final short JohabEncoding    = 6;
129    static final short MSUnicodeSurrogateEncoding = 10;
130
131    static final char noSuchChar = (char)0xfffd;
132    static final int SHORTMASK = 0x0000ffff;
133    static final int INTMASK   = 0xffffffff;
134
135    static final char[][] converterMaps = new char[7][];
136
137    /*
138     * Unicode->other encoding translation array. A pre-computed look up
139     * which can be shared across all fonts using that encoding.
140     * Using this saves running character coverters repeatedly.
141     */
142    char[] xlat;
143
144    static CMap initialize(TrueTypeFont font) {
145
146        CMap cmap = null;
147
148        int offset, platformID, encodingID=-1;
149
150        int three0=0, three1=0, three2=0, three3=0, three4=0, three5=0,
151            three6=0, three10=0;
152        boolean threeStar = false;
153
154        ByteBuffer cmapBuffer = font.getTableBuffer(TrueTypeFont.cmapTag);
155        int cmapTableOffset = font.getTableSize(TrueTypeFont.cmapTag);
156        short numberSubTables = cmapBuffer.getShort(2);
157
158        /* locate the offsets of all 3,*  (ie Microsoft platform) encodings */
159        for (int i=0; i<numberSubTables; i++) {
160            cmapBuffer.position(i * 8 + 4);
161            platformID = cmapBuffer.getShort();
162            if (platformID == 3) {
163                threeStar = true;
164                encodingID = cmapBuffer.getShort();
165                offset     = cmapBuffer.getInt();
166                switch (encodingID) {
167                case 0:  three0  = offset; break; // MS Symbol encoding
168                case 1:  three1  = offset; break; // MS Unicode cmap
169                case 2:  three2  = offset; break; // ShiftJIS cmap.
170                case 3:  three3  = offset; break; // GBK cmap
171                case 4:  three4  = offset; break; // Big 5 cmap
172                case 5:  three5  = offset; break; // Wansung
173                case 6:  three6  = offset; break; // Johab
174                case 10: three10 = offset; break; // MS Unicode surrogates
175                }
176            }
177        }
178
179        /* This defines the preference order for cmap subtables */
180        if (threeStar) {
181            if (three10 != 0) {
182                cmap = createCMap(cmapBuffer, three10, null);
183            }
184            else if  (three0 != 0) {
185                /* The special case treatment of these fonts leads to
186                 * anomalies where a user can view "wingdings" and "wingdings2"
187                 * and the latter shows all its code points in the unicode
188                 * private use area at 0xF000->0XF0FF and the former shows
189                 * a scattered subset of its glyphs that are known mappings to
190                 * unicode code points.
191                 * The primary purpose of these mappings was to facilitate
192                 * display of symbol chars etc in composite fonts, however
193                 * this is not needed as all these code points are covered
194                 * by Lucida Sans Regular.
195                 * Commenting this out reduces the role of these two files
196                 * (assuming that they continue to be used in font.properties)
197                 * to just one of contributing to the overall composite
198                 * font metrics, and also AWT can still access the fonts.
199                 * Clients which explicitly accessed these fonts as names
200                 * "Symbol" and "Wingdings" (ie as physical fonts) and
201                 * expected to see a scattering of these characters will
202                 * see them now as missing. How much of a problem is this?
203                 * Perhaps we could still support this mapping just for
204                 * "Symbol.ttf" but I suspect some users would prefer it
205                 * to be mapped in to the Latin range as that is how
206                 * the "symbol" font is used in native apps.
207                 */
208//              String name = font.platName.toLowerCase(Locale.ENGLISH);
209//              if (name.endsWith("symbol.ttf")) {
210//                  cmap = createSymbolCMap(cmapBuffer, three0, Symbols_b2c);
211//              } else if (name.endsWith("wingding.ttf")) {
212//                  cmap = createSymbolCMap(cmapBuffer, three0, WingDings_b2c);
213//              } else {
214                    cmap = createCMap(cmapBuffer, three0, null);
215//              }
216            }
217            else if (three1 != 0) {
218                cmap = createCMap(cmapBuffer, three1, null);
219            }
220            else if (three2 != 0) {
221                cmap = createCMap(cmapBuffer, three2,
222                                  getConverterMap(ShiftJISEncoding));
223            }
224            else if (three3 != 0) {
225                cmap = createCMap(cmapBuffer, three3,
226                                  getConverterMap(GBKEncoding));
227            }
228            else if (three4 != 0) {
229                /* GB2312 TrueType fonts on Solaris have wrong encoding ID for
230                 * cmap table, these fonts have EncodingID 4 which is Big5
231                 * encoding according the TrueType spec, but actually the
232                 * fonts are using gb2312 encoding, have to use this
233                 * workaround to make Solaris zh_CN locale work.  -sherman
234                 */
235                if (FontUtilities.isSolaris && font.platName != null &&
236                    (font.platName.startsWith(
237                     "/usr/openwin/lib/locale/zh_CN.EUC/X11/fonts/TrueType") ||
238                     font.platName.startsWith(
239                     "/usr/openwin/lib/locale/zh_CN/X11/fonts/TrueType") ||
240                     font.platName.startsWith(
241                     "/usr/openwin/lib/locale/zh/X11/fonts/TrueType"))) {
242                    cmap = createCMap(cmapBuffer, three4,
243                                       getConverterMap(GBKEncoding));
244                }
245                else {
246                    cmap = createCMap(cmapBuffer, three4,
247                                      getConverterMap(Big5Encoding));
248                }
249            }
250            else if (three5 != 0) {
251                cmap = createCMap(cmapBuffer, three5,
252                                  getConverterMap(WansungEncoding));
253            }
254            else if (three6 != 0) {
255                cmap = createCMap(cmapBuffer, three6,
256                                  getConverterMap(JohabEncoding));
257            }
258        } else {
259            /* No 3,* subtable was found. Just use whatever is the first
260             * table listed. Not very useful but maybe better than
261             * rejecting the font entirely?
262             */
263            cmap = createCMap(cmapBuffer, cmapBuffer.getInt(8), null);
264        }
265        return cmap;
266    }
267
268    /* speed up the converting by setting the range for double
269     * byte characters;
270     */
271    static char[] getConverter(short encodingID) {
272        int dBegin = 0x8000;
273        int dEnd   = 0xffff;
274        String encoding;
275
276        switch (encodingID) {
277        case ShiftJISEncoding:
278            dBegin = 0x8140;
279            dEnd   = 0xfcfc;
280            encoding = "SJIS";
281            break;
282        case GBKEncoding:
283            dBegin = 0x8140;
284            dEnd   = 0xfea0;
285            encoding = "GBK";
286            break;
287        case Big5Encoding:
288            dBegin = 0xa140;
289            dEnd   = 0xfefe;
290            encoding = "Big5";
291            break;
292        case WansungEncoding:
293            dBegin = 0xa1a1;
294            dEnd   = 0xfede;
295            encoding = "EUC_KR";
296            break;
297        case JohabEncoding:
298            dBegin = 0x8141;
299            dEnd   = 0xfdfe;
300            encoding = "Johab";
301            break;
302        default:
303            return null;
304        }
305
306        try {
307            char[] convertedChars = new char[65536];
308            for (int i=0; i<65536; i++) {
309                convertedChars[i] = noSuchChar;
310            }
311
312            byte[] inputBytes = new byte[(dEnd-dBegin+1)*2];
313            char[] outputChars = new char[(dEnd-dBegin+1)];
314
315            int j = 0;
316            int firstByte;
317            if (encodingID == ShiftJISEncoding) {
318                for (int i = dBegin; i <= dEnd; i++) {
319                    firstByte = (i >> 8 & 0xff);
320                    if (firstByte >= 0xa1 && firstByte <= 0xdf) {
321                        //sjis halfwidth katakana
322                        inputBytes[j++] = (byte)0xff;
323                        inputBytes[j++] = (byte)0xff;
324                    } else {
325                        inputBytes[j++] = (byte)firstByte;
326                        inputBytes[j++] = (byte)(i & 0xff);
327                    }
328                }
329            } else {
330                for (int i = dBegin; i <= dEnd; i++) {
331                    inputBytes[j++] = (byte)(i>>8 & 0xff);
332                    inputBytes[j++] = (byte)(i & 0xff);
333                }
334            }
335
336            Charset.forName(encoding).newDecoder()
337            .onMalformedInput(CodingErrorAction.REPLACE)
338            .onUnmappableCharacter(CodingErrorAction.REPLACE)
339            .replaceWith("\u0000")
340            .decode(ByteBuffer.wrap(inputBytes, 0, inputBytes.length),
341                    CharBuffer.wrap(outputChars, 0, outputChars.length),
342                    true);
343
344            // ensure single byte ascii
345            for (int i = 0x20; i <= 0x7e; i++) {
346                convertedChars[i] = (char)i;
347            }
348
349            //sjis halfwidth katakana
350            if (encodingID == ShiftJISEncoding) {
351                for (int i = 0xa1; i <= 0xdf; i++) {
352                    convertedChars[i] = (char)(i - 0xa1 + 0xff61);
353                }
354            }
355
356            /* It would save heap space (approx 60Kbytes for each of these
357             * converters) if stored only valid ranges (ie returned
358             * outputChars directly. But this is tricky since want to
359             * include the ASCII range too.
360             */
361//          System.err.println("oc.len="+outputChars.length);
362//          System.err.println("cc.len="+convertedChars.length);
363//          System.err.println("dbegin="+dBegin);
364            System.arraycopy(outputChars, 0, convertedChars, dBegin,
365                             outputChars.length);
366
367            //return convertedChars;
368            /* invert this map as now want it to map from Unicode
369             * to other encoding.
370             */
371            char [] invertedChars = new char[65536];
372            for (int i=0;i<65536;i++) {
373                if (convertedChars[i] != noSuchChar) {
374                    invertedChars[convertedChars[i]] = (char)i;
375                }
376            }
377            return invertedChars;
378
379        } catch (Exception e) {
380            e.printStackTrace();
381        }
382        return null;
383    }
384
385    /*
386     * The returned array maps to unicode from some other 2 byte encoding
387     * eg for a 2byte index which represents a SJIS char, the indexed
388     * value is the corresponding unicode char.
389     */
390    static char[] getConverterMap(short encodingID) {
391        if (converterMaps[encodingID] == null) {
392           converterMaps[encodingID] = getConverter(encodingID);
393        }
394        return converterMaps[encodingID];
395    }
396
397
398    static CMap createCMap(ByteBuffer buffer, int offset, char[] xlat) {
399        /* First do a sanity check that this cmap subtable is contained
400         * within the cmap table.
401         */
402        int subtableFormat = buffer.getChar(offset);
403        long subtableLength;
404        if (subtableFormat < 8) {
405            subtableLength = buffer.getChar(offset+2);
406        } else {
407            subtableLength = buffer.getInt(offset+4) & INTMASK;
408        }
409        if (offset+subtableLength > buffer.capacity()) {
410            if (FontUtilities.isLogging()) {
411                FontUtilities.getLogger().warning("Cmap subtable overflows buffer.");
412            }
413        }
414        switch (subtableFormat) {
415        case 0:  return new CMapFormat0(buffer, offset);
416        case 2:  return new CMapFormat2(buffer, offset, xlat);
417        case 4:  return new CMapFormat4(buffer, offset, xlat);
418        case 6:  return new CMapFormat6(buffer, offset, xlat);
419        case 8:  return new CMapFormat8(buffer, offset, xlat);
420        case 10: return new CMapFormat10(buffer, offset, xlat);
421        case 12: return new CMapFormat12(buffer, offset, xlat);
422        default: throw new RuntimeException("Cmap format unimplemented: " +
423                                            (int)buffer.getChar(offset));
424        }
425    }
426
427/*
428    final char charVal(byte[] cmap, int index) {
429        return (char)(((0xff & cmap[index]) << 8)+(0xff & cmap[index+1]));
430    }
431
432    final short shortVal(byte[] cmap, int index) {
433        return (short)(((0xff & cmap[index]) << 8)+(0xff & cmap[index+1]));
434    }
435*/
436    abstract char getGlyph(int charCode);
437
438    /* Format 4 Header is
439     * ushort format (off=0)
440     * ushort length (off=2)
441     * ushort language (off=4)
442     * ushort segCountX2 (off=6)
443     * ushort searchRange (off=8)
444     * ushort entrySelector (off=10)
445     * ushort rangeShift (off=12)
446     * ushort endCount[segCount] (off=14)
447     * ushort reservedPad
448     * ushort startCount[segCount]
449     * short idDelta[segCount]
450     * idRangeOFfset[segCount]
451     * ushort glyphIdArray[]
452     */
453    static class CMapFormat4 extends CMap {
454        int segCount;
455        int entrySelector;
456        int rangeShift;
457        char[] endCount;
458        char[] startCount;
459        short[] idDelta;
460        char[] idRangeOffset;
461        char[] glyphIds;
462
463        CMapFormat4(ByteBuffer bbuffer, int offset, char[] xlat) {
464
465            this.xlat = xlat;
466
467            bbuffer.position(offset);
468            CharBuffer buffer = bbuffer.asCharBuffer();
469            buffer.get(); // skip, we already know format=4
470            int subtableLength = buffer.get();
471            /* Try to recover from some bad fonts which specify a subtable
472             * length that would overflow the byte buffer holding the whole
473             * cmap table. If this isn't a recoverable situation an exception
474             * may be thrown which is caught higher up the call stack.
475             * Whilst this may seem lenient, in practice, unless the "bad"
476             * subtable we are using is the last one in the cmap table we
477             * would have no way of knowing about this problem anyway.
478             */
479            if (offset+subtableLength > bbuffer.capacity()) {
480                subtableLength = bbuffer.capacity() - offset;
481            }
482            buffer.get(); // skip language
483            segCount = buffer.get()/2;
484            int searchRange = buffer.get();
485            entrySelector = buffer.get();
486            rangeShift    = buffer.get()/2;
487            startCount = new char[segCount];
488            endCount = new char[segCount];
489            idDelta = new short[segCount];
490            idRangeOffset = new char[segCount];
491
492            for (int i=0; i<segCount; i++) {
493                endCount[i] = buffer.get();
494            }
495            buffer.get(); // 2 bytes for reserved pad
496            for (int i=0; i<segCount; i++) {
497                startCount[i] = buffer.get();
498            }
499
500            for (int i=0; i<segCount; i++) {
501                idDelta[i] = (short)buffer.get();
502            }
503
504            for (int i=0; i<segCount; i++) {
505                char ctmp = buffer.get();
506                idRangeOffset[i] = (char)((ctmp>>1)&0xffff);
507            }
508            /* Can calculate the number of glyph IDs by subtracting
509             * "pos" from the length of the cmap
510             */
511            int pos = (segCount*8+16)/2;
512            buffer.position(pos);
513            int numGlyphIds = (subtableLength/2 - pos);
514            glyphIds = new char[numGlyphIds];
515            for (int i=0;i<numGlyphIds;i++) {
516                glyphIds[i] = buffer.get();
517            }
518/*
519            System.err.println("segcount="+segCount);
520            System.err.println("entrySelector="+entrySelector);
521            System.err.println("rangeShift="+rangeShift);
522            for (int j=0;j<segCount;j++) {
523              System.err.println("j="+j+ " sc="+(int)(startCount[j]&0xffff)+
524                                 " ec="+(int)(endCount[j]&0xffff)+
525                                 " delta="+idDelta[j] +
526                                 " ro="+(int)idRangeOffset[j]);
527            }
528
529            //System.err.println("numglyphs="+glyphIds.length);
530            for (int i=0;i<numGlyphIds;i++) {
531                  System.err.println("gid["+i+"]="+(int)glyphIds[i]);
532            }
533*/
534        }
535
536        char getGlyph(int charCode) {
537
538            int index = 0;
539            char glyphCode = 0;
540
541            int controlGlyph = getControlCodeGlyph(charCode, true);
542            if (controlGlyph >= 0) {
543                return (char)controlGlyph;
544            }
545
546            /* presence of translation array indicates that this
547             * cmap is in some other (non-unicode encoding).
548             * In order to look-up a char->glyph mapping we need to
549             * translate the unicode code point to the encoding of
550             * the cmap.
551             * REMIND: VALID CHARCODES??
552             */
553            if (xlat != null) {
554                charCode = xlat[charCode];
555            }
556
557            /*
558             * Citation from the TrueType (and OpenType) spec:
559             *   The segments are sorted in order of increasing endCode
560             *   values, and the segment values are specified in four parallel
561             *   arrays. You search for the first endCode that is greater than
562             *   or equal to the character code you want to map. If the
563             *   corresponding startCode is less than or equal to the
564             *   character code, then you use the corresponding idDelta and
565             *   idRangeOffset to map the character code to a glyph index
566             *   (otherwise, the missingGlyph is returned).
567             */
568
569            /*
570             * CMAP format4 defines several fields for optimized search of
571             * the segment list (entrySelector, searchRange, rangeShift).
572             * However, benefits are neglible and some fonts have incorrect
573             * data - so we use straightforward binary search (see bug 6247425)
574             */
575            int left = 0, right = startCount.length;
576            index = startCount.length >> 1;
577            while (left < right) {
578                if (endCount[index] < charCode) {
579                    left = index + 1;
580                } else {
581                    right = index;
582                }
583                index = (left + right) >> 1;
584            }
585
586            if (charCode >= startCount[index] && charCode <= endCount[index]) {
587                int rangeOffset = idRangeOffset[index];
588
589                if (rangeOffset == 0) {
590                    glyphCode = (char)(charCode + idDelta[index]);
591                } else {
592                    /* Calculate an index into the glyphIds array */
593
594/*
595                    System.err.println("rangeoffset="+rangeOffset+
596                                       " charCode=" + charCode +
597                                       " scnt["+index+"]="+(int)startCount[index] +
598                                       " segCnt="+segCount);
599*/
600
601                    int glyphIDIndex = rangeOffset - segCount + index
602                                         + (charCode - startCount[index]);
603                    glyphCode = glyphIds[glyphIDIndex];
604                    if (glyphCode != 0) {
605                        glyphCode = (char)(glyphCode + idDelta[index]);
606                    }
607                }
608            }
609            if (glyphCode != 0) {
610            //System.err.println("cc="+Integer.toHexString((int)charCode) + " gc="+(int)glyphCode);
611            }
612            return glyphCode;
613        }
614    }
615
616    // Format 0: Byte Encoding table
617    static class CMapFormat0 extends CMap {
618        byte [] cmap;
619
620        CMapFormat0(ByteBuffer buffer, int offset) {
621
622            /* skip 6 bytes of format, length, and version */
623            int len = buffer.getChar(offset+2);
624            cmap = new byte[len-6];
625            buffer.position(offset+6);
626            buffer.get(cmap);
627        }
628
629        char getGlyph(int charCode) {
630            if (charCode < 256) {
631                if (charCode < 0x0010) {
632                    switch (charCode) {
633                    case 0x0009:
634                    case 0x000a:
635                    case 0x000d: return CharToGlyphMapper.INVISIBLE_GLYPH_ID;
636                    }
637                }
638                return (char)(0xff & cmap[charCode]);
639            } else {
640                return 0;
641            }
642        }
643    }
644
645//     static CMap createSymbolCMap(ByteBuffer buffer, int offset, char[] syms) {
646
647//      CMap cmap = createCMap(buffer, offset, null);
648//      if (cmap == null) {
649//          return null;
650//      } else {
651//          return new CMapFormatSymbol(cmap, syms);
652//      }
653//     }
654
655//     static class CMapFormatSymbol extends CMap {
656
657//      CMap cmap;
658//      static final int NUM_BUCKETS = 128;
659//      Bucket[] buckets = new Bucket[NUM_BUCKETS];
660
661//      class Bucket {
662//          char unicode;
663//          char glyph;
664//          Bucket next;
665
666//          Bucket(char u, char g) {
667//              unicode = u;
668//              glyph = g;
669//          }
670//      }
671
672//      CMapFormatSymbol(CMap cmap, char[] syms) {
673
674//          this.cmap = cmap;
675
676//          for (int i=0;i<syms.length;i++) {
677//              char unicode = syms[i];
678//              if (unicode != noSuchChar) {
679//                  char glyph = cmap.getGlyph(i + 0xf000);
680//                  int hash = unicode % NUM_BUCKETS;
681//                  Bucket bucket = new Bucket(unicode, glyph);
682//                  if (buckets[hash] == null) {
683//                      buckets[hash] = bucket;
684//                  } else {
685//                      Bucket b = buckets[hash];
686//                      while (b.next != null) {
687//                          b = b.next;
688//                      }
689//                      b.next = bucket;
690//                  }
691//              }
692//          }
693//      }
694
695//      char getGlyph(int unicode) {
696//          if (unicode >= 0x1000) {
697//              return 0;
698//          }
699//          else if (unicode >=0xf000 && unicode < 0xf100) {
700//              return cmap.getGlyph(unicode);
701//          } else {
702//              Bucket b = buckets[unicode % NUM_BUCKETS];
703//              while (b != null) {
704//                  if (b.unicode == unicode) {
705//                      return b.glyph;
706//                  } else {
707//                      b = b.next;
708//                  }
709//              }
710//              return 0;
711//          }
712//      }
713//     }
714
715    // Format 2: High-byte mapping through table
716    static class CMapFormat2 extends CMap {
717
718        char[] subHeaderKey = new char[256];
719         /* Store subheaders in individual arrays
720          * A SubHeader entry theortically looks like {
721          *   char firstCode;
722          *   char entryCount;
723          *   short idDelta;
724          *   char idRangeOffset;
725          * }
726          */
727        char[] firstCodeArray;
728        char[] entryCountArray;
729        short[] idDeltaArray;
730        char[] idRangeOffSetArray;
731
732        char[] glyphIndexArray;
733
734        CMapFormat2(ByteBuffer buffer, int offset, char[] xlat) {
735
736            this.xlat = xlat;
737
738            int tableLen = buffer.getChar(offset+2);
739            buffer.position(offset+6);
740            CharBuffer cBuffer = buffer.asCharBuffer();
741            char maxSubHeader = 0;
742            for (int i=0;i<256;i++) {
743                subHeaderKey[i] = cBuffer.get();
744                if (subHeaderKey[i] > maxSubHeader) {
745                    maxSubHeader = subHeaderKey[i];
746                }
747            }
748            /* The value of the subHeaderKey is 8 * the subHeader index,
749             * so the number of subHeaders can be obtained by dividing
750             * this value bv 8 and adding 1.
751             */
752            int numSubHeaders = (maxSubHeader >> 3) +1;
753            firstCodeArray = new char[numSubHeaders];
754            entryCountArray = new char[numSubHeaders];
755            idDeltaArray  = new short[numSubHeaders];
756            idRangeOffSetArray  = new char[numSubHeaders];
757            for (int i=0; i<numSubHeaders; i++) {
758                firstCodeArray[i] = cBuffer.get();
759                entryCountArray[i] = cBuffer.get();
760                idDeltaArray[i] = (short)cBuffer.get();
761                idRangeOffSetArray[i] = cBuffer.get();
762//              System.out.println("sh["+i+"]:fc="+(int)firstCodeArray[i]+
763//                                 " ec="+(int)entryCountArray[i]+
764//                                 " delta="+(int)idDeltaArray[i]+
765//                                 " offset="+(int)idRangeOffSetArray[i]);
766            }
767
768            int glyphIndexArrSize = (tableLen-518-numSubHeaders*8)/2;
769            glyphIndexArray = new char[glyphIndexArrSize];
770            for (int i=0; i<glyphIndexArrSize;i++) {
771                glyphIndexArray[i] = cBuffer.get();
772            }
773        }
774
775        char getGlyph(int charCode) {
776            int controlGlyph = getControlCodeGlyph(charCode, true);
777            if (controlGlyph >= 0) {
778                return (char)controlGlyph;
779            }
780
781            if (xlat != null) {
782                charCode = xlat[charCode];
783            }
784
785            char highByte = (char)(charCode >> 8);
786            char lowByte = (char)(charCode & 0xff);
787            int key = subHeaderKey[highByte]>>3; // index into subHeaders
788            char mapMe;
789
790            if (key != 0) {
791                mapMe = lowByte;
792            } else {
793                mapMe = highByte;
794                if (mapMe == 0) {
795                    mapMe = lowByte;
796                }
797            }
798
799//          System.err.println("charCode="+Integer.toHexString(charCode)+
800//                             " key="+key+ " mapMe="+Integer.toHexString(mapMe));
801            char firstCode = firstCodeArray[key];
802            if (mapMe < firstCode) {
803                return 0;
804            } else {
805                mapMe -= firstCode;
806            }
807
808            if (mapMe < entryCountArray[key]) {
809                /* "address" arithmetic is needed to calculate the offset
810                 * into glyphIndexArray. "idRangeOffSetArray[key]" specifies
811                 * the number of bytes from that location in the table where
812                 * the subarray of glyphIndexes starting at "firstCode" begins.
813                 * Each entry in the subHeader table is 8 bytes, and the
814                 * idRangeOffSetArray field is at offset 6 in the entry.
815                 * The glyphIndexArray immediately follows the subHeaders.
816                 * So if there are "N" entries then the number of bytes to the
817                 * start of glyphIndexArray is (N-key)*8-6.
818                 * Subtract this from the idRangeOffSetArray value to get
819                 * the number of bytes into glyphIndexArray and divide by 2 to
820                 * get the (char) array index.
821                 */
822                int glyphArrayOffset = ((idRangeOffSetArray.length-key)*8)-6;
823                int glyphSubArrayStart =
824                        (idRangeOffSetArray[key] - glyphArrayOffset)/2;
825                char glyphCode = glyphIndexArray[glyphSubArrayStart+mapMe];
826                if (glyphCode != 0) {
827                    glyphCode += idDeltaArray[key]; //idDelta
828                    return glyphCode;
829                }
830            }
831            return 0;
832        }
833    }
834
835    // Format 6: Trimmed table mapping
836    static class CMapFormat6 extends CMap {
837
838        char firstCode;
839        char entryCount;
840        char[] glyphIdArray;
841
842        CMapFormat6(ByteBuffer bbuffer, int offset, char[] xlat) {
843
844             bbuffer.position(offset+6);
845             CharBuffer buffer = bbuffer.asCharBuffer();
846             firstCode = buffer.get();
847             entryCount = buffer.get();
848             glyphIdArray = new char[entryCount];
849             for (int i=0; i< entryCount; i++) {
850                 glyphIdArray[i] = buffer.get();
851             }
852         }
853
854         char getGlyph(int charCode) {
855            int controlGlyph = getControlCodeGlyph(charCode, true);
856            if (controlGlyph >= 0) {
857                return (char)controlGlyph;
858            }
859
860             if (xlat != null) {
861                 charCode = xlat[charCode];
862             }
863
864             charCode -= firstCode;
865             if (charCode < 0 || charCode >= entryCount) {
866                  return 0;
867             } else {
868                  return glyphIdArray[charCode];
869             }
870         }
871    }
872
873    // Format 8: mixed 16-bit and 32-bit coverage
874    // Seems unlikely this code will ever get tested as we look for
875    // MS platform Cmaps and MS states (in the Opentype spec on their website)
876    // that MS doesn't support this format
877    static class CMapFormat8 extends CMap {
878         byte[] is32 = new byte[8192];
879         int nGroups;
880         int[] startCharCode;
881         int[] endCharCode;
882         int[] startGlyphID;
883
884         CMapFormat8(ByteBuffer bbuffer, int offset, char[] xlat) {
885
886             bbuffer.position(12);
887             bbuffer.get(is32);
888             nGroups = bbuffer.getInt();
889             startCharCode = new int[nGroups];
890             endCharCode   = new int[nGroups];
891             startGlyphID  = new int[nGroups];
892         }
893
894        char getGlyph(int charCode) {
895            if (xlat != null) {
896                throw new RuntimeException("xlat array for cmap fmt=8");
897            }
898            return 0;
899        }
900
901    }
902
903
904    // Format 4-byte 10: Trimmed table mapping
905    // Seems unlikely this code will ever get tested as we look for
906    // MS platform Cmaps and MS states (in the Opentype spec on their website)
907    // that MS doesn't support this format
908    static class CMapFormat10 extends CMap {
909
910         long firstCode;
911         int entryCount;
912         char[] glyphIdArray;
913
914         CMapFormat10(ByteBuffer bbuffer, int offset, char[] xlat) {
915
916             firstCode = bbuffer.getInt() & INTMASK;
917             entryCount = bbuffer.getInt() & INTMASK;
918             bbuffer.position(offset+20);
919             CharBuffer buffer = bbuffer.asCharBuffer();
920             glyphIdArray = new char[entryCount];
921             for (int i=0; i< entryCount; i++) {
922                 glyphIdArray[i] = buffer.get();
923             }
924         }
925
926         char getGlyph(int charCode) {
927
928             if (xlat != null) {
929                 throw new RuntimeException("xlat array for cmap fmt=10");
930             }
931
932             int code = (int)(charCode - firstCode);
933             if (code < 0 || code >= entryCount) {
934                 return 0;
935             } else {
936                 return glyphIdArray[code];
937             }
938         }
939    }
940
941    // Format 12: Segmented coverage for UCS-4 (fonts supporting
942    // surrogate pairs)
943    static class CMapFormat12 extends CMap {
944
945        int numGroups;
946        int highBit =0;
947        int power;
948        int extra;
949        long[] startCharCode;
950        long[] endCharCode;
951        int[] startGlyphID;
952
953        CMapFormat12(ByteBuffer buffer, int offset, char[] xlat) {
954            if (xlat != null) {
955                throw new RuntimeException("xlat array for cmap fmt=12");
956            }
957
958            numGroups = buffer.getInt(offset+12);
959            startCharCode = new long[numGroups];
960            endCharCode = new long[numGroups];
961            startGlyphID = new int[numGroups];
962            buffer.position(offset+16);
963            buffer = buffer.slice();
964            IntBuffer ibuffer = buffer.asIntBuffer();
965            for (int i=0; i<numGroups; i++) {
966                startCharCode[i] = ibuffer.get() & INTMASK;
967                endCharCode[i] = ibuffer.get() & INTMASK;
968                startGlyphID[i] = ibuffer.get() & INTMASK;
969            }
970
971            /* Finds the high bit by binary searching through the bits */
972            int value = numGroups;
973
974            if (value >= 1 << 16) {
975                value >>= 16;
976                highBit += 16;
977            }
978
979            if (value >= 1 << 8) {
980                value >>= 8;
981                highBit += 8;
982            }
983
984            if (value >= 1 << 4) {
985                value >>= 4;
986                highBit += 4;
987            }
988
989            if (value >= 1 << 2) {
990                value >>= 2;
991                highBit += 2;
992            }
993
994            if (value >= 1 << 1) {
995                value >>= 1;
996                highBit += 1;
997            }
998
999            power = 1 << highBit;
1000            extra = numGroups - power;
1001        }
1002
1003        char getGlyph(int charCode) {
1004            int controlGlyph = getControlCodeGlyph(charCode, false);
1005            if (controlGlyph >= 0) {
1006                return (char)controlGlyph;
1007            }
1008            int probe = power;
1009            int range = 0;
1010
1011            if (startCharCode[extra] <= charCode) {
1012                range = extra;
1013            }
1014
1015            while (probe > 1) {
1016                probe >>= 1;
1017
1018                if (startCharCode[range+probe] <= charCode) {
1019                    range += probe;
1020                }
1021            }
1022
1023            if (startCharCode[range] <= charCode &&
1024                  endCharCode[range] >= charCode) {
1025                return (char)
1026                    (startGlyphID[range] + (charCode - startCharCode[range]));
1027            }
1028
1029            return 0;
1030        }
1031
1032    }
1033
1034    /* Used to substitute for bad Cmaps. */
1035    static class NullCMapClass extends CMap {
1036
1037        char getGlyph(int charCode) {
1038            return 0;
1039        }
1040    }
1041
1042    public static final NullCMapClass theNullCmap = new NullCMapClass();
1043
1044    final int getControlCodeGlyph(int charCode, boolean noSurrogates) {
1045        if (charCode < 0x0010) {
1046            switch (charCode) {
1047            case 0x0009:
1048            case 0x000a:
1049            case 0x000d: return CharToGlyphMapper.INVISIBLE_GLYPH_ID;
1050            }
1051        } else if (charCode >= 0x200c) {
1052            if ((charCode <= 0x200f) ||
1053                (charCode >= 0x2028 && charCode <= 0x202e) ||
1054                (charCode >= 0x206a && charCode <= 0x206f)) {
1055                return CharToGlyphMapper.INVISIBLE_GLYPH_ID;
1056            } else if (noSurrogates && charCode >= 0xFFFF) {
1057                return 0;
1058            }
1059        }
1060        return -1;
1061    }
1062}
1063