RBCollationTables.java revision 12745:f068a4ffddd2
150476Speter/* 247831Sfoxfair * Copyright (c) 1999, 2012, Oracle and/or its affiliates. All rights reserved. 347831Sfoxfair * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 447831Sfoxfair * 547831Sfoxfair * This code is free software; you can redistribute it and/or modify it 6174990Sache * under the terms of the GNU General Public License version 2 only, as 7290150Sdelphij * published by the Free Software Foundation. Oracle designates this 8290150Sdelphij * particular file as subject to the "Classpath" exception as provided 9290150Sdelphij * by Oracle in the LICENSE file that accompanied this code. 10290150Sdelphij * 11290150Sdelphij * This code is distributed in the hope that it will be useful, but WITHOUT 12290150Sdelphij * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13290150Sdelphij * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14290150Sdelphij * version 2 for more details (a copy is included in the LICENSE file that 15290150Sdelphij * accompanied this code). 16290150Sdelphij * 17290150Sdelphij * You should have received a copy of the GNU General Public License version 18290150Sdelphij * 2 along with this work; if not, write to the Free Software Foundation, 1947831Sfoxfair * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20174990Sache * 2147831Sfoxfair * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 2247831Sfoxfair * or visit www.oracle.com if you need additional information or have any 2347831Sfoxfair * questions. 2447831Sfoxfair */ 2547831Sfoxfair 2647831Sfoxfair/* 2747831Sfoxfair * (C) Copyright Taligent, Inc. 1996, 1997 - All Rights Reserved 2847831Sfoxfair * (C) Copyright IBM Corp. 1996-1998 - All Rights Reserved 2947831Sfoxfair * 3047831Sfoxfair * The original version of this source code and documentation is copyrighted 3147831Sfoxfair * and owned by Taligent, Inc., a wholly-owned subsidiary of IBM. These 3247831Sfoxfair * materials are provided under terms of a License Agreement between Taligent 3347831Sfoxfair * and Sun. This technology is protected by multiple US and International 3447831Sfoxfair * patents. This notice and attribution to Taligent may not be removed. 3547831Sfoxfair * Taligent is a registered trademark of Taligent, Inc. 3647831Sfoxfair * 3774606Sache */ 3874606Sache 3974606Sachepackage java.text; 4074606Sache 4174606Sacheimport java.util.Vector; 4274606Sacheimport sun.text.UCompactIntArray; 4374606Sacheimport sun.text.IntHashtable; 4474606Sache 4547831Sfoxfair/** 4647831Sfoxfair * This class contains the static state of a RuleBasedCollator: The various 4747831Sfoxfair * tables that are used by the collation routines. Several RuleBasedCollators 4847831Sfoxfair * can share a single RBCollationTables object, easing memory requirements and 4947831Sfoxfair * improving performance. 5047831Sfoxfair */ 5147831Sfoxfairfinal class RBCollationTables { 5247831Sfoxfair //=========================================================================================== 5347831Sfoxfair // The following diagram shows the data structure of the RBCollationTables object. 5447831Sfoxfair // Suppose we have the rule, where 'o-umlaut' is the unicode char 0x00F6. 5547831Sfoxfair // "a, A < b, B < c, C, ch, cH, Ch, CH < d, D ... < o, O; 'o-umlaut'/E, 'O-umlaut'/E ...". 5647831Sfoxfair // What the rule says is, sorts 'ch'ligatures and 'c' only with tertiary difference and 5747831Sfoxfair // sorts 'o-umlaut' as if it's always expanded with 'e'. 5847831Sfoxfair // 5947831Sfoxfair // mapping table contracting list expanding list 6047831Sfoxfair // (contains all unicode char 6174570Sache // entries) ___ ____________ _________________________ 6247831Sfoxfair // ________ +>|_*_|->|'c' |v('c') | +>|v('o')|v('umlaut')|v('e')| 6347831Sfoxfair // |_\u0001_|-> v('\u0001') | |_:_| |------------| | |-------------------------| 6447831Sfoxfair // |_\u0002_|-> v('\u0002') | |_:_| |'ch'|v('ch')| | | : | 6554090Sache // |____:___| | |_:_| |------------| | |-------------------------| 6647831Sfoxfair // |____:___| | |'cH'|v('cH')| | | : | 6747831Sfoxfair // |__'a'___|-> v('a') | |------------| | |-------------------------| 6847831Sfoxfair // |__'b'___|-> v('b') | |'Ch'|v('Ch')| | | : | 6947831Sfoxfair // |____:___| | |------------| | |-------------------------| 7047831Sfoxfair // |____:___| | |'CH'|v('CH')| | | : | 7147831Sfoxfair // |___'c'__|---------------- ------------ | |-------------------------| 7247831Sfoxfair // |____:___| | | : | 7347831Sfoxfair // |o-umlaut|---------------------------------------- |_________________________| 7447831Sfoxfair // |____:___| 7547831Sfoxfair // 7647831Sfoxfair // Noted by Helena Shih on 6/23/97 77290150Sdelphij //============================================================================================ 7853943Sache 79174990Sache public RBCollationTables(String rules, int decmp) throws ParseException { 8053943Sache this.rules = rules; 8153943Sache 8253943Sache RBTableBuilder builder = new RBTableBuilder(new BuildAPI()); 8353943Sache builder.build(rules, decmp); // this object is filled in through 8453943Sache // the BuildAPI object 8553943Sache } 8653943Sache 8753943Sache final class BuildAPI { 8853943Sache /** 8953943Sache * Private constructor. Prevents anyone else besides RBTableBuilder 9053943Sache * from gaining direct access to the internals of this class. 9153943Sache */ 9253943Sache private BuildAPI() { 9353943Sache } 9474413Sache 9553943Sache /** 9674413Sache * This function is used by RBTableBuilder to fill in all the members of this 9753961Sache * object. (Effectively, the builder class functions as a "friend" of this 9874413Sache * class, but to avoid changing too much of the logic, it carries around "shadow" 9953961Sache * copies of all these variables until the end of the build process and then 10074413Sache * copies them en masse into the actual tables object once all the construction 10174413Sache * logic is complete. This function does that "copying en masse". 102 * @param f2ary The value for frenchSec (the French-secondary flag) 103 * @param swap The value for SE Asian swapping rule 104 * @param map The collator's character-mapping table (the value for mapping) 105 * @param cTbl The collator's contracting-character table (the value for contractTable) 106 * @param eTbl The collator's expanding-character table (the value for expandTable) 107 * @param cFlgs The hash table of characters that participate in contracting- 108 * character sequences (the value for contractFlags) 109 * @param mso The value for maxSecOrder 110 * @param mto The value for maxTerOrder 111 */ 112 void fillInTables(boolean f2ary, 113 boolean swap, 114 UCompactIntArray map, 115 Vector<Vector<EntryPair>> cTbl, 116 Vector<int[]> eTbl, 117 IntHashtable cFlgs, 118 short mso, 119 short mto) { 120 frenchSec = f2ary; 121 seAsianSwapping = swap; 122 mapping = map; 123 contractTable = cTbl; 124 expandTable = eTbl; 125 contractFlags = cFlgs; 126 maxSecOrder = mso; 127 maxTerOrder = mto; 128 } 129 } 130 131 /** 132 * Gets the table-based rules for the collation object. 133 * @return returns the collation rules that the table collation object 134 * was created from. 135 */ 136 public String getRules() 137 { 138 return rules; 139 } 140 141 public boolean isFrenchSec() { 142 return frenchSec; 143 } 144 145 public boolean isSEAsianSwapping() { 146 return seAsianSwapping; 147 } 148 149 // ============================================================== 150 // internal (for use by CollationElementIterator) 151 // ============================================================== 152 153 /** 154 * Get the entry of hash table of the contracting string in the collation 155 * table. 156 * @param ch the starting character of the contracting string 157 */ 158 Vector<EntryPair> getContractValues(int ch) 159 { 160 int index = mapping.elementAt(ch); 161 return getContractValuesImpl(index - CONTRACTCHARINDEX); 162 } 163 164 //get contract values from contractTable by index 165 private Vector<EntryPair> getContractValuesImpl(int index) 166 { 167 if (index >= 0) 168 { 169 return contractTable.elementAt(index); 170 } 171 else // not found 172 { 173 return null; 174 } 175 } 176 177 /** 178 * Returns true if this character appears anywhere in a contracting 179 * character sequence. (Used by CollationElementIterator.setOffset().) 180 */ 181 boolean usedInContractSeq(int c) { 182 return contractFlags.get(c) == 1; 183 } 184 185 /** 186 * Return the maximum length of any expansion sequences that end 187 * with the specified comparison order. 188 * 189 * @param order a collation order returned by previous or next. 190 * @return the maximum length of any expansion seuences ending 191 * with the specified order. 192 * 193 * @see CollationElementIterator#getMaxExpansion 194 */ 195 int getMaxExpansion(int order) { 196 int result = 1; 197 198 if (expandTable != null) { 199 // Right now this does a linear search through the entire 200 // expansion table. If a collator had a large number of expansions, 201 // this could cause a performance problem, but in practise that 202 // rarely happens 203 for (int i = 0; i < expandTable.size(); i++) { 204 int[] valueList = expandTable.elementAt(i); 205 int length = valueList.length; 206 207 if (length > result && valueList[length-1] == order) { 208 result = length; 209 } 210 } 211 } 212 213 return result; 214 } 215 216 /** 217 * Get the entry of hash table of the expanding string in the collation 218 * table. 219 * @param idx the index of the expanding string value list 220 */ 221 final int[] getExpandValueList(int idx) { 222 return expandTable.elementAt(idx - EXPANDCHARINDEX); 223 } 224 225 /** 226 * Get the comarison order of a character from the collation table. 227 * @return the comparison order of a character. 228 */ 229 int getUnicodeOrder(int ch) { 230 return mapping.elementAt(ch); 231 } 232 233 short getMaxSecOrder() { 234 return maxSecOrder; 235 } 236 237 short getMaxTerOrder() { 238 return maxTerOrder; 239 } 240 241 /** 242 * Reverse a string. 243 */ 244 //shemran/Note: this is used for secondary order value reverse, no 245 // need to consider supplementary pair. 246 static void reverse (StringBuffer result, int from, int to) 247 { 248 int i = from; 249 char swap; 250 251 int j = to - 1; 252 while (i < j) { 253 swap = result.charAt(i); 254 result.setCharAt(i, result.charAt(j)); 255 result.setCharAt(j, swap); 256 i++; 257 j--; 258 } 259 } 260 261 static final int getEntry(Vector<EntryPair> list, String name, boolean fwd) { 262 for (int i = 0; i < list.size(); i++) { 263 EntryPair pair = list.elementAt(i); 264 if (pair.fwd == fwd && pair.entryName.equals(name)) { 265 return i; 266 } 267 } 268 return UNMAPPED; 269 } 270 271 // ============================================================== 272 // constants 273 // ============================================================== 274 //sherman/Todo: is the value big enough????? 275 static final int EXPANDCHARINDEX = 0x7E000000; // Expand index follows 276 static final int CONTRACTCHARINDEX = 0x7F000000; // contract indexes follow 277 static final int UNMAPPED = 0xFFFFFFFF; 278 279 static final int PRIMARYORDERMASK = 0xffff0000; 280 static final int SECONDARYORDERMASK = 0x0000ff00; 281 static final int TERTIARYORDERMASK = 0x000000ff; 282 static final int PRIMARYDIFFERENCEONLY = 0xffff0000; 283 static final int SECONDARYDIFFERENCEONLY = 0xffffff00; 284 static final int PRIMARYORDERSHIFT = 16; 285 static final int SECONDARYORDERSHIFT = 8; 286 287 // ============================================================== 288 // instance variables 289 // ============================================================== 290 private String rules = null; 291 private boolean frenchSec = false; 292 private boolean seAsianSwapping = false; 293 294 private UCompactIntArray mapping = null; 295 private Vector<Vector<EntryPair>> contractTable = null; 296 private Vector<int[]> expandTable = null; 297 private IntHashtable contractFlags = null; 298 299 private short maxSecOrder = 0; 300 private short maxTerOrder = 0; 301} 302