1/*
2 * Copyright (c) 2005, 2015, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.  Oracle designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Oracle in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 */
25
26/*
27 ******************************************************************************
28 * Copyright (C) 1996-2014, International Business Machines Corporation and
29 * others. All Rights Reserved.
30 ******************************************************************************
31 */
32
33package sun.text.normalizer;
34
35import java.io.DataInputStream;
36import java.io.InputStream;
37import java.io.IOException;
38
39/**
40 * Trie implementation which stores data in char, 16 bits.
41 * @author synwee
42 * @see com.ibm.icu.impl.Trie
43 * @since release 2.1, Jan 01 2002
44 */
45
46 // note that i need to handle the block calculations later, since chartrie
47 // in icu4c uses the same index array.
48public class CharTrie extends Trie
49{
50    // public constructors ---------------------------------------------
51
52    /**
53    * <p>Creates a new Trie with the settings for the trie data.</p>
54    * <p>Unserialize the 32-bit-aligned input stream and use the data for the
55    * trie.</p>
56    * @param inputStream file input stream to a ICU data file, containing
57    *                    the trie
58    * @param dataManipulate object which provides methods to parse the char
59    *                        data
60    * @throws IOException thrown when data reading fails
61    * @draft 2.1
62    */
63    public CharTrie(InputStream inputStream,
64                    DataManipulate dataManipulate) throws IOException
65    {
66        super(inputStream, dataManipulate);
67
68        if (!isCharTrie()) {
69            throw new IllegalArgumentException(
70                               "Data given does not belong to a char trie.");
71        }
72    }
73
74    // public methods --------------------------------------------------
75
76    /**
77     * Gets the value associated with the codepoint.
78     * If no value is associated with the codepoint, a default value will be
79     * returned.
80     * @param ch codepoint
81     * @return offset to data
82     */
83    public final char getCodePointValue(int ch)
84    {
85        int offset;
86
87        // fastpath for U+0000..U+D7FF
88        if(0 <= ch && ch < UTF16.LEAD_SURROGATE_MIN_VALUE) {
89            // copy of getRawOffset()
90            offset = (m_index_[ch >> INDEX_STAGE_1_SHIFT_] << INDEX_STAGE_2_SHIFT_)
91                    + (ch & INDEX_STAGE_3_MASK_);
92            return m_data_[offset];
93        }
94
95        // handle U+D800..U+10FFFF
96        offset = getCodePointOffset(ch);
97
98        // return -1 if there is an error, in this case we return the default
99        // value: m_initialValue_
100        return (offset >= 0) ? m_data_[offset] : m_initialValue_;
101    }
102
103    /**
104    * Gets the value to the data which this lead surrogate character points
105    * to.
106    * Returned data may contain folding offset information for the next
107    * trailing surrogate character.
108    * This method does not guarantee correct results for trail surrogates.
109    * @param ch lead surrogate character
110    * @return data value
111    */
112    public final char getLeadValue(char ch)
113    {
114       return m_data_[getLeadOffset(ch)];
115    }
116
117    // protected methods -----------------------------------------------
118
119    /**
120    * <p>Parses the input stream and stores its trie content into a index and
121    * data array</p>
122    * @param inputStream data input stream containing trie data
123    * @exception IOException thrown when data reading fails
124    */
125    protected final void unserialize(InputStream inputStream)
126                                                throws IOException
127    {
128        DataInputStream input = new DataInputStream(inputStream);
129        int indexDataLength = m_dataOffset_ + m_dataLength_;
130        m_index_ = new char[indexDataLength];
131        for (int i = 0; i < indexDataLength; i ++) {
132            m_index_[i] = input.readChar();
133        }
134        m_data_           = m_index_;
135        m_initialValue_   = m_data_[m_dataOffset_];
136    }
137
138    /**
139    * Gets the offset to the data which the surrogate pair points to.
140    * @param lead lead surrogate
141    * @param trail trailing surrogate
142    * @return offset to data
143    * @draft 2.1
144    */
145    protected final int getSurrogateOffset(char lead, char trail)
146    {
147        if (m_dataManipulate_ == null) {
148            throw new NullPointerException(
149                             "The field DataManipulate in this Trie is null");
150        }
151
152        // get fold position for the next trail surrogate
153        int offset = m_dataManipulate_.getFoldingOffset(getLeadValue(lead));
154
155        // get the real data from the folded lead/trail units
156        if (offset > 0) {
157            return getRawOffset(offset, (char)(trail & SURROGATE_MASK_));
158        }
159
160        // return -1 if there is an error, in this case we return the default
161        // value: m_initialValue_
162        return -1;
163    }
164
165    // private data members --------------------------------------------
166
167    /**
168     * Default value
169     */
170    private char m_initialValue_;
171    /**
172     * Array of char data
173     */
174    private char m_data_[];
175}
176