1/* 2 * Copyright (c) 2005, 2015, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26/* 27 ****************************************************************************** 28 * Copyright (C) 1996-2014, International Business Machines Corporation and 29 * others. All Rights Reserved. 30 ****************************************************************************** 31 */ 32 33package sun.text.normalizer; 34 35import java.io.DataInputStream; 36import java.io.InputStream; 37import java.io.IOException; 38 39/** 40 * Trie implementation which stores data in char, 16 bits. 41 * @author synwee 42 * @see com.ibm.icu.impl.Trie 43 * @since release 2.1, Jan 01 2002 44 */ 45 46 // note that i need to handle the block calculations later, since chartrie 47 // in icu4c uses the same index array. 48public class CharTrie extends Trie 49{ 50 // public constructors --------------------------------------------- 51 52 /** 53 * <p>Creates a new Trie with the settings for the trie data.</p> 54 * <p>Unserialize the 32-bit-aligned input stream and use the data for the 55 * trie.</p> 56 * @param inputStream file input stream to a ICU data file, containing 57 * the trie 58 * @param dataManipulate object which provides methods to parse the char 59 * data 60 * @throws IOException thrown when data reading fails 61 * @draft 2.1 62 */ 63 public CharTrie(InputStream inputStream, 64 DataManipulate dataManipulate) throws IOException 65 { 66 super(inputStream, dataManipulate); 67 68 if (!isCharTrie()) { 69 throw new IllegalArgumentException( 70 "Data given does not belong to a char trie."); 71 } 72 } 73 74 // public methods -------------------------------------------------- 75 76 /** 77 * Gets the value associated with the codepoint. 78 * If no value is associated with the codepoint, a default value will be 79 * returned. 80 * @param ch codepoint 81 * @return offset to data 82 */ 83 public final char getCodePointValue(int ch) 84 { 85 int offset; 86 87 // fastpath for U+0000..U+D7FF 88 if(0 <= ch && ch < UTF16.LEAD_SURROGATE_MIN_VALUE) { 89 // copy of getRawOffset() 90 offset = (m_index_[ch >> INDEX_STAGE_1_SHIFT_] << INDEX_STAGE_2_SHIFT_) 91 + (ch & INDEX_STAGE_3_MASK_); 92 return m_data_[offset]; 93 } 94 95 // handle U+D800..U+10FFFF 96 offset = getCodePointOffset(ch); 97 98 // return -1 if there is an error, in this case we return the default 99 // value: m_initialValue_ 100 return (offset >= 0) ? m_data_[offset] : m_initialValue_; 101 } 102 103 /** 104 * Gets the value to the data which this lead surrogate character points 105 * to. 106 * Returned data may contain folding offset information for the next 107 * trailing surrogate character. 108 * This method does not guarantee correct results for trail surrogates. 109 * @param ch lead surrogate character 110 * @return data value 111 */ 112 public final char getLeadValue(char ch) 113 { 114 return m_data_[getLeadOffset(ch)]; 115 } 116 117 // protected methods ----------------------------------------------- 118 119 /** 120 * <p>Parses the input stream and stores its trie content into a index and 121 * data array</p> 122 * @param inputStream data input stream containing trie data 123 * @exception IOException thrown when data reading fails 124 */ 125 protected final void unserialize(InputStream inputStream) 126 throws IOException 127 { 128 DataInputStream input = new DataInputStream(inputStream); 129 int indexDataLength = m_dataOffset_ + m_dataLength_; 130 m_index_ = new char[indexDataLength]; 131 for (int i = 0; i < indexDataLength; i ++) { 132 m_index_[i] = input.readChar(); 133 } 134 m_data_ = m_index_; 135 m_initialValue_ = m_data_[m_dataOffset_]; 136 } 137 138 /** 139 * Gets the offset to the data which the surrogate pair points to. 140 * @param lead lead surrogate 141 * @param trail trailing surrogate 142 * @return offset to data 143 * @draft 2.1 144 */ 145 protected final int getSurrogateOffset(char lead, char trail) 146 { 147 if (m_dataManipulate_ == null) { 148 throw new NullPointerException( 149 "The field DataManipulate in this Trie is null"); 150 } 151 152 // get fold position for the next trail surrogate 153 int offset = m_dataManipulate_.getFoldingOffset(getLeadValue(lead)); 154 155 // get the real data from the folded lead/trail units 156 if (offset > 0) { 157 return getRawOffset(offset, (char)(trail & SURROGATE_MASK_)); 158 } 159 160 // return -1 if there is an error, in this case we return the default 161 // value: m_initialValue_ 162 return -1; 163 } 164 165 // private data members -------------------------------------------- 166 167 /** 168 * Default value 169 */ 170 private char m_initialValue_; 171 /** 172 * Array of char data 173 */ 174 private char m_data_[]; 175} 176