1/*
2 * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.  Oracle designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Oracle in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 */
25
26/*
27 *******************************************************************************
28 * Copyright (C) 2009-2014, International Business Machines Corporation and
29 * others. All Rights Reserved.
30 *******************************************************************************
31 */
32
33package sun.text.normalizer;
34
35import java.io.IOException;
36import java.nio.ByteBuffer;
37
38
39/**
40 * @author aheninger
41 *
42 * A read-only Trie2, holding 16 bit data values.
43 *
44 * A Trie2 is a highly optimized data structure for mapping from Unicode
45 * code points (values ranging from 0 to 0x10ffff) to a 16 or 32 bit value.
46 *
47 * See class Trie2 for descriptions of the API for accessing the contents of a trie.
48 *
49 * The fundamental data access methods are declared final in this class, with
50 * the intent that applications might gain a little extra performance, when compared
51 * with calling the same methods via the abstract UTrie2 base class.
52 */
53public final class Trie2_16 extends Trie2 {
54
55    /**
56     *  Internal constructor, not for general use.
57     */
58    Trie2_16() {
59    }
60
61
62    /**
63     * Create a Trie2 from its serialized form.  Inverse of utrie2_serialize().
64     * The serialized format is identical between ICU4C and ICU4J, so this function
65     * will work with serialized Trie2s from either.
66     *
67     * The serialized Trie2 in the bytes may be in either little or big endian byte order.
68     * This allows using serialized Tries from ICU4C without needing to consider the
69     * byte order of the system that created them.
70     *
71     * @param bytes a byte buffer to the serialized form of a UTrie2.
72     * @return An unserialized Trie2_16, ready for use.
73     * @throws IllegalArgumentException if the buffer does not contain a serialized Trie2.
74     * @throws IOException if a read error occurs in the buffer.
75     * @throws ClassCastException if the bytes contain a serialized Trie2_32
76     */
77    public static Trie2_16  createFromSerialized(ByteBuffer bytes) throws IOException {
78        return (Trie2_16) Trie2.createFromSerialized(bytes);
79    }
80
81    /**
82     * Get the value for a code point as stored in the Trie2.
83     *
84     * @param codePoint the code point
85     * @return the value
86     */
87    @Override
88    public final int get(int codePoint) {
89        int value;
90        int ix;
91
92        if (codePoint >= 0) {
93            if (codePoint < 0x0d800 || (codePoint > 0x0dbff && codePoint <= 0x0ffff)) {
94                // Ordinary BMP code point, excluding leading surrogates.
95                // BMP uses a single level lookup.  BMP index starts at offset 0 in the Trie2 index.
96                // 16 bit data is stored in the index array itself.
97                ix = index[codePoint >> UTRIE2_SHIFT_2];
98                ix = (ix << UTRIE2_INDEX_SHIFT) + (codePoint & UTRIE2_DATA_MASK);
99                value = index[ix];
100                return value;
101            }
102            if (codePoint <= 0xffff) {
103                // Lead Surrogate Code Point.  A Separate index section is stored for
104                // lead surrogate code units and code points.
105                //   The main index has the code unit data.
106                //   For this function, we need the code point data.
107                // Note: this expression could be refactored for slightly improved efficiency, but
108                //       surrogate code points will be so rare in practice that it's not worth it.
109                ix = index[UTRIE2_LSCP_INDEX_2_OFFSET + ((codePoint - 0xd800) >> UTRIE2_SHIFT_2)];
110                ix = (ix << UTRIE2_INDEX_SHIFT) + (codePoint & UTRIE2_DATA_MASK);
111                value = index[ix];
112                return value;
113            }
114            if (codePoint < highStart) {
115                // Supplemental code point, use two-level lookup.
116                ix = (UTRIE2_INDEX_1_OFFSET - UTRIE2_OMITTED_BMP_INDEX_1_LENGTH) + (codePoint >> UTRIE2_SHIFT_1);
117                ix = index[ix];
118                ix += (codePoint >> UTRIE2_SHIFT_2) & UTRIE2_INDEX_2_MASK;
119                ix = index[ix];
120                ix = (ix << UTRIE2_INDEX_SHIFT) + (codePoint & UTRIE2_DATA_MASK);
121                value = index[ix];
122                return value;
123            }
124            if (codePoint <= 0x10ffff) {
125                value = index[highValueIndex];
126                return value;
127            }
128        }
129
130        // Fall through.  The code point is outside of the legal range of 0..0x10ffff.
131        return errorValue;
132    }
133
134
135    /**
136     * Get a Trie2 value for a UTF-16 code unit.
137     *
138     * This function returns the same value as get() if the input
139     * character is outside of the lead surrogate range
140     *
141     * There are two values stored in a Trie2 for inputs in the lead
142     * surrogate range.  This function returns the alternate value,
143     * while Trie2.get() returns the main value.
144     *
145     * @param codeUnit a 16 bit code unit or lead surrogate value.
146     * @return the value
147     */
148    @Override
149    public int getFromU16SingleLead(char codeUnit) {
150        int value;
151        int ix;
152
153        // Because the input is a 16 bit char, we can skip the tests for it being in
154        // the BMP range.  It is.
155        ix = index[codeUnit >> UTRIE2_SHIFT_2];
156        ix = (ix << UTRIE2_INDEX_SHIFT) + (codeUnit & UTRIE2_DATA_MASK);
157        value = index[ix];
158        return value;
159    }
160
161    /**
162     * @return the number of bytes of the serialized trie
163     */
164    public int getSerializedLength() {
165        return 16+(header.indexLength+dataLength)*2;
166    }
167}
168