1/* 2 * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved. 3 */ 4/* 5 * Licensed to the Apache Software Foundation (ASF) under one or more 6 * contributor license agreements. See the NOTICE file distributed with 7 * this work for additional information regarding copyright ownership. 8 * The ASF licenses this file to You under the Apache License, Version 2.0 9 * (the "License"); you may not use this file except in compliance with 10 * the License. You may obtain a copy of the License at 11 * 12 * http://www.apache.org/licenses/LICENSE-2.0 13 * 14 * Unless required by applicable law or agreed to in writing, software 15 * distributed under the License is distributed on an "AS IS" BASIS, 16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 * See the License for the specific language governing permissions and 18 * limitations under the License. 19 */ 20 21package com.sun.org.apache.xerces.internal.util; 22 23/** 24 * This class is a symbol table implementation that guarantees that 25 * strings used as identifiers are unique references. Multiple calls 26 * to <code>addSymbol</code> will always return the same string 27 * reference. 28 * <p> 29 * The symbol table performs the same task as <code>String.intern()</code> 30 * with the following differences: 31 * <ul> 32 * <li> 33 * A new string object does not need to be created in order to 34 * retrieve a unique reference. Symbols can be added by using 35 * a series of characters in a character array. 36 * </li> 37 * <li> 38 * Users of the symbol table can provide their own symbol hashing 39 * implementation. For example, a simple string hashing algorithm 40 * may fail to produce a balanced set of hashcodes for symbols 41 * that are <em>mostly</em> unique. Strings with similar leading 42 * characters are especially prone to this poor hashing behavior. 43 * </li> 44 * </ul> 45 * 46 * @see SymbolHash 47 * 48 * @author Andy Clark 49 * 50 */ 51public class SymbolTable { 52 53 // 54 // Constants 55 // 56 57 /** Default table size. */ 58 protected static final int TABLE_SIZE = 101; 59 60 /** Maximum hash collisions per bucket for a table with load factor == 1. */ 61 protected static final int MAX_HASH_COLLISIONS = 40; 62 63 protected static final int MULTIPLIERS_SIZE = 1 << 5; 64 protected static final int MULTIPLIERS_MASK = MULTIPLIERS_SIZE - 1; 65 66 // 67 // Data 68 // 69 70 /** Buckets. */ 71 protected Entry[] fBuckets = null; 72 73 /** actual table size **/ 74 protected int fTableSize; 75 76 /** The total number of entries in the hash table. */ 77 protected transient int fCount; 78 79 /** The table is rehashed when its size exceeds this threshold. (The 80 * value of this field is (int)(capacity * loadFactor).) */ 81 protected int fThreshold; 82 83 /** The load factor for the SymbolTable. */ 84 protected float fLoadFactor; 85 86 /** 87 * A new hash function is selected and the table is rehashed when 88 * the number of keys in the bucket exceeds this threshold. 89 */ 90 protected final int fCollisionThreshold; 91 92 /** 93 * Array of randomly selected hash function multipliers or <code>null</code> 94 * if the default String.hashCode() function should be used. 95 */ 96 protected int[] fHashMultipliers; 97 98 // 99 // Constructors 100 // 101 102 /** 103 * Constructs a new, empty SymbolTable with the specified initial 104 * capacity and the specified load factor. 105 * 106 * @param initialCapacity the initial capacity of the SymbolTable. 107 * @param loadFactor the load factor of the SymbolTable. 108 * @throws IllegalArgumentException if the initial capacity is less 109 * than zero, or if the load factor is nonpositive. 110 */ 111 public SymbolTable(int initialCapacity, float loadFactor) { 112 113 if (initialCapacity < 0) { 114 throw new IllegalArgumentException("Illegal Capacity: " + initialCapacity); 115 } 116 117 if (loadFactor <= 0 || Float.isNaN(loadFactor)) { 118 throw new IllegalArgumentException("Illegal Load: " + loadFactor); 119 } 120 121 if (initialCapacity == 0) { 122 initialCapacity = 1; 123 } 124 125 fLoadFactor = loadFactor; 126 fTableSize = initialCapacity; 127 fBuckets = new Entry[fTableSize]; 128 fThreshold = (int)(fTableSize * loadFactor); 129 fCollisionThreshold = (int)(MAX_HASH_COLLISIONS * loadFactor); 130 fCount = 0; 131 } 132 133 /** 134 * Constructs a new, empty SymbolTable with the specified initial capacity 135 * and default load factor, which is <tt>0.75</tt>. 136 * 137 * @param initialCapacity the initial capacity of the hashtable. 138 * @throws IllegalArgumentException if the initial capacity is less 139 * than zero. 140 */ 141 public SymbolTable(int initialCapacity) { 142 this(initialCapacity, 0.75f); 143 } 144 145 /** 146 * Constructs a new, empty SymbolTable with a default initial capacity (101) 147 * and load factor, which is <tt>0.75</tt>. 148 */ 149 public SymbolTable() { 150 this(TABLE_SIZE, 0.75f); 151 } 152 153 // 154 // Public methods 155 // 156 157 /** 158 * Adds the specified symbol to the symbol table and returns a 159 * reference to the unique symbol. If the symbol already exists, 160 * the previous symbol reference is returned instead, in order 161 * guarantee that symbol references remain unique. 162 * 163 * @param symbol The new symbol. 164 */ 165 public String addSymbol(String symbol) { 166 167 // search for identical symbol 168 int collisionCount = 0; 169 int bucket = hash(symbol) % fTableSize; 170 for (Entry entry = fBuckets[bucket]; entry != null; entry = entry.next) { 171 if (entry.symbol.equals(symbol)) { 172 return entry.symbol; 173 } 174 ++collisionCount; 175 } 176 return addSymbol0(symbol, bucket, collisionCount); 177 178 } // addSymbol(String):String 179 180 private String addSymbol0(String symbol, int bucket, int collisionCount) { 181 182 if (fCount >= fThreshold) { 183 // Rehash the table if the threshold is exceeded 184 rehash(); 185 bucket = hash(symbol) % fTableSize; 186 } 187 else if (collisionCount >= fCollisionThreshold) { 188 // Select a new hash function and rehash the table if 189 // the collision threshold is exceeded. 190 rebalance(); 191 bucket = hash(symbol) % fTableSize; 192 } 193 194 // create new entry 195 Entry entry = new Entry(symbol, fBuckets[bucket]); 196 fBuckets[bucket] = entry; 197 ++fCount; 198 return entry.symbol; 199 200 } // addSymbol0(String,int,int):String 201 202 /** 203 * Adds the specified symbol to the symbol table and returns a 204 * reference to the unique symbol. If the symbol already exists, 205 * the previous symbol reference is returned instead, in order 206 * guarantee that symbol references remain unique. 207 * 208 * @param buffer The buffer containing the new symbol. 209 * @param offset The offset into the buffer of the new symbol. 210 * @param length The length of the new symbol in the buffer. 211 */ 212 public String addSymbol(char[] buffer, int offset, int length) { 213 214 // search for identical symbol 215 int collisionCount = 0; 216 int bucket = hash(buffer, offset, length) % fTableSize; 217 OUTER: for (Entry entry = fBuckets[bucket]; entry != null; entry = entry.next) { 218 if (length == entry.characters.length) { 219 for (int i = 0; i < length; i++) { 220 if (buffer[offset + i] != entry.characters[i]) { 221 ++collisionCount; 222 continue OUTER; 223 } 224 } 225 return entry.symbol; 226 } 227 ++collisionCount; 228 } 229 return addSymbol0(buffer, offset, length, bucket, collisionCount); 230 231 } // addSymbol(char[],int,int):String 232 233 private String addSymbol0(char[] buffer, int offset, int length, int bucket, int collisionCount) { 234 235 if (fCount >= fThreshold) { 236 // Rehash the table if the threshold is exceeded 237 rehash(); 238 bucket = hash(buffer, offset, length) % fTableSize; 239 } 240 else if (collisionCount >= fCollisionThreshold) { 241 // Select a new hash function and rehash the table if 242 // the collision threshold is exceeded. 243 rebalance(); 244 bucket = hash(buffer, offset, length) % fTableSize; 245 } 246 247 // add new entry 248 Entry entry = new Entry(buffer, offset, length, fBuckets[bucket]); 249 fBuckets[bucket] = entry; 250 ++fCount; 251 return entry.symbol; 252 253 } // addSymbol0(char[],int,int,int,int):String 254 255 /** 256 * Returns a hashcode value for the specified symbol. The value 257 * returned by this method must be identical to the value returned 258 * by the <code>hash(char[],int,int)</code> method when called 259 * with the character array that comprises the symbol string. 260 * 261 * @param symbol The symbol to hash. 262 */ 263 public int hash(String symbol) { 264 if (fHashMultipliers == null) { 265 return symbol.hashCode() & 0x7FFFFFFF; 266 } 267 return hash0(symbol); 268 } // hash(String):int 269 270 private int hash0(String symbol) { 271 int code = 0; 272 final int length = symbol.length(); 273 final int[] multipliers = fHashMultipliers; 274 for (int i = 0; i < length; ++i) { 275 code = code * multipliers[i & MULTIPLIERS_MASK] + symbol.charAt(i); 276 } 277 return code & 0x7FFFFFFF; 278 } // hash0(String):int 279 280 /** 281 * Returns a hashcode value for the specified symbol information. 282 * The value returned by this method must be identical to the value 283 * returned by the <code>hash(String)</code> method when called 284 * with the string object created from the symbol information. 285 * 286 * @param buffer The character buffer containing the symbol. 287 * @param offset The offset into the character buffer of the start 288 * of the symbol. 289 * @param length The length of the symbol. 290 */ 291 public int hash(char[] buffer, int offset, int length) { 292 if (fHashMultipliers == null) { 293 int code = 0; 294 for (int i = 0; i < length; ++i) { 295 code = code * 31 + buffer[offset + i]; 296 } 297 return code & 0x7FFFFFFF; 298 } 299 return hash0(buffer, offset, length); 300 301 } // hash(char[],int,int):int 302 303 private int hash0(char[] buffer, int offset, int length) { 304 int code = 0; 305 final int[] multipliers = fHashMultipliers; 306 for (int i = 0; i < length; ++i) { 307 code = code * multipliers[i & MULTIPLIERS_MASK] + buffer[offset + i]; 308 } 309 return code & 0x7FFFFFFF; 310 } // hash0(char[],int,int):int 311 312 /** 313 * Increases the capacity of and internally reorganizes this 314 * SymbolTable, in order to accommodate and access its entries more 315 * efficiently. This method is called automatically when the 316 * number of keys in the SymbolTable exceeds this hashtable's capacity 317 * and load factor. 318 */ 319 protected void rehash() { 320 rehashCommon(fBuckets.length * 2 + 1); 321 } 322 323 /** 324 * Randomly selects a new hash function and reorganizes this SymbolTable 325 * in order to more evenly distribute its entries across the table. This 326 * method is called automatically when the number keys in one of the 327 * SymbolTable's buckets exceeds the given collision threshold. 328 */ 329 protected void rebalance() { 330 if (fHashMultipliers == null) { 331 fHashMultipliers = new int[MULTIPLIERS_SIZE]; 332 } 333 PrimeNumberSequenceGenerator.generateSequence(fHashMultipliers); 334 rehashCommon(fBuckets.length); 335 } 336 337 private void rehashCommon(final int newCapacity) { 338 339 int oldCapacity = fBuckets.length; 340 Entry[] oldTable = fBuckets; 341 342 Entry[] newTable = new Entry[newCapacity]; 343 344 fThreshold = (int)(newCapacity * fLoadFactor); 345 fBuckets = newTable; 346 fTableSize = fBuckets.length; 347 348 for (int i = oldCapacity ; i-- > 0 ;) { 349 for (Entry old = oldTable[i] ; old != null ; ) { 350 Entry e = old; 351 old = old.next; 352 353 int index = hash(e.symbol) % newCapacity; 354 e.next = newTable[index]; 355 newTable[index] = e; 356 } 357 } 358 } 359 360 /** 361 * Returns true if the symbol table already contains the specified 362 * symbol. 363 * 364 * @param symbol The symbol to look for. 365 */ 366 public boolean containsSymbol(String symbol) { 367 368 // search for identical symbol 369 int bucket = hash(symbol) % fTableSize; 370 int length = symbol.length(); 371 OUTER: for (Entry entry = fBuckets[bucket]; entry != null; entry = entry.next) { 372 if (length == entry.characters.length) { 373 for (int i = 0; i < length; i++) { 374 if (symbol.charAt(i) != entry.characters[i]) { 375 continue OUTER; 376 } 377 } 378 return true; 379 } 380 } 381 382 return false; 383 384 } // containsSymbol(String):boolean 385 386 /** 387 * Returns true if the symbol table already contains the specified 388 * symbol. 389 * 390 * @param buffer The buffer containing the symbol to look for. 391 * @param offset The offset into the buffer. 392 * @param length The length of the symbol in the buffer. 393 */ 394 public boolean containsSymbol(char[] buffer, int offset, int length) { 395 396 // search for identical symbol 397 int bucket = hash(buffer, offset, length) % fTableSize; 398 OUTER: for (Entry entry = fBuckets[bucket]; entry != null; entry = entry.next) { 399 if (length == entry.characters.length) { 400 for (int i = 0; i < length; i++) { 401 if (buffer[offset + i] != entry.characters[i]) { 402 continue OUTER; 403 } 404 } 405 return true; 406 } 407 } 408 409 return false; 410 411 } // containsSymbol(char[],int,int):boolean 412 413 // 414 // Classes 415 // 416 417 /** 418 * This class is a symbol table entry. Each entry acts as a node 419 * in a linked list. 420 */ 421 protected static final class Entry { 422 423 // 424 // Data 425 // 426 427 /** Symbol. */ 428 public final String symbol; 429 430 /** 431 * Symbol characters. This information is duplicated here for 432 * comparison performance. 433 */ 434 public final char[] characters; 435 436 /** The next entry. */ 437 public Entry next; 438 439 // 440 // Constructors 441 // 442 443 /** 444 * Constructs a new entry from the specified symbol and next entry 445 * reference. 446 */ 447 public Entry(String symbol, Entry next) { 448 this.symbol = symbol.intern(); 449 characters = new char[symbol.length()]; 450 symbol.getChars(0, characters.length, characters, 0); 451 this.next = next; 452 } 453 454 /** 455 * Constructs a new entry from the specified symbol information and 456 * next entry reference. 457 */ 458 public Entry(char[] ch, int offset, int length, Entry next) { 459 characters = new char[length]; 460 System.arraycopy(ch, offset, characters, 0, length); 461 symbol = new String(characters).intern(); 462 this.next = next; 463 } 464 465 } // class Entry 466 467} // class SymbolTable 468