1/*
2 * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
3 */
4/*
5 * Licensed to the Apache Software Foundation (ASF) under one or more
6 * contributor license agreements.  See the NOTICE file distributed with
7 * this work for additional information regarding copyright ownership.
8 * The ASF licenses this file to You under the Apache License, Version 2.0
9 * (the "License"); you may not use this file except in compliance with
10 * the License.  You may obtain a copy of the License at
11 *
12 *      http://www.apache.org/licenses/LICENSE-2.0
13 *
14 * Unless required by applicable law or agreed to in writing, software
15 * distributed under the License is distributed on an "AS IS" BASIS,
16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 * See the License for the specific language governing permissions and
18 * limitations under the License.
19 */
20
21package com.sun.org.apache.xerces.internal.util;
22
23/**
24 * This class is a symbol table implementation that guarantees that
25 * strings used as identifiers are unique references. Multiple calls
26 * to <code>addSymbol</code> will always return the same string
27 * reference.
28 * <p>
29 * The symbol table performs the same task as <code>String.intern()</code>
30 * with the following differences:
31 * <ul>
32 *  <li>
33 *   A new string object does not need to be created in order to
34 *   retrieve a unique reference. Symbols can be added by using
35 *   a series of characters in a character array.
36 *  </li>
37 *  <li>
38 *   Users of the symbol table can provide their own symbol hashing
39 *   implementation. For example, a simple string hashing algorithm
40 *   may fail to produce a balanced set of hashcodes for symbols
41 *   that are <em>mostly</em> unique. Strings with similar leading
42 *   characters are especially prone to this poor hashing behavior.
43 *  </li>
44 * </ul>
45 *
46 * @see SymbolHash
47 *
48 * @author Andy Clark
49 *
50 */
51public class SymbolTable {
52
53    //
54    // Constants
55    //
56
57    /** Default table size. */
58    protected static final int TABLE_SIZE = 101;
59
60    /** Maximum hash collisions per bucket for a table with load factor == 1. */
61    protected static final int MAX_HASH_COLLISIONS = 40;
62
63    protected static final int MULTIPLIERS_SIZE = 1 << 5;
64    protected static final int MULTIPLIERS_MASK = MULTIPLIERS_SIZE - 1;
65
66    //
67    // Data
68    //
69
70    /** Buckets. */
71    protected Entry[] fBuckets = null;
72
73    /** actual table size **/
74    protected int fTableSize;
75
76    /** The total number of entries in the hash table. */
77    protected transient int fCount;
78
79    /** The table is rehashed when its size exceeds this threshold.  (The
80     * value of this field is (int)(capacity * loadFactor).) */
81    protected int fThreshold;
82
83    /** The load factor for the SymbolTable. */
84    protected float fLoadFactor;
85
86    /**
87     * A new hash function is selected and the table is rehashed when
88     * the number of keys in the bucket exceeds this threshold.
89     */
90    protected final int fCollisionThreshold;
91
92    /**
93     * Array of randomly selected hash function multipliers or <code>null</code>
94     * if the default String.hashCode() function should be used.
95     */
96    protected int[] fHashMultipliers;
97
98    //
99    // Constructors
100    //
101
102    /**
103     * Constructs a new, empty SymbolTable with the specified initial
104     * capacity and the specified load factor.
105     *
106     * @param      initialCapacity   the initial capacity of the SymbolTable.
107     * @param      loadFactor        the load factor of the SymbolTable.
108     * @throws     IllegalArgumentException  if the initial capacity is less
109     *             than zero, or if the load factor is nonpositive.
110     */
111    public SymbolTable(int initialCapacity, float loadFactor) {
112
113        if (initialCapacity < 0) {
114            throw new IllegalArgumentException("Illegal Capacity: " + initialCapacity);
115        }
116
117        if (loadFactor <= 0 || Float.isNaN(loadFactor)) {
118            throw new IllegalArgumentException("Illegal Load: " + loadFactor);
119        }
120
121        if (initialCapacity == 0) {
122            initialCapacity = 1;
123        }
124
125        fLoadFactor = loadFactor;
126        fTableSize = initialCapacity;
127        fBuckets = new Entry[fTableSize];
128        fThreshold = (int)(fTableSize * loadFactor);
129        fCollisionThreshold = (int)(MAX_HASH_COLLISIONS * loadFactor);
130        fCount = 0;
131    }
132
133    /**
134     * Constructs a new, empty SymbolTable with the specified initial capacity
135     * and default load factor, which is <tt>0.75</tt>.
136     *
137     * @param     initialCapacity   the initial capacity of the hashtable.
138     * @throws    IllegalArgumentException if the initial capacity is less
139     *            than zero.
140     */
141    public SymbolTable(int initialCapacity) {
142        this(initialCapacity, 0.75f);
143    }
144
145    /**
146     * Constructs a new, empty SymbolTable with a default initial capacity (101)
147     * and load factor, which is <tt>0.75</tt>.
148     */
149    public SymbolTable() {
150        this(TABLE_SIZE, 0.75f);
151    }
152
153    //
154    // Public methods
155    //
156
157    /**
158     * Adds the specified symbol to the symbol table and returns a
159     * reference to the unique symbol. If the symbol already exists,
160     * the previous symbol reference is returned instead, in order
161     * guarantee that symbol references remain unique.
162     *
163     * @param symbol The new symbol.
164     */
165    public String addSymbol(String symbol) {
166
167        // search for identical symbol
168        int collisionCount = 0;
169        int bucket = hash(symbol) % fTableSize;
170        for (Entry entry = fBuckets[bucket]; entry != null; entry = entry.next) {
171            if (entry.symbol.equals(symbol)) {
172                return entry.symbol;
173            }
174            ++collisionCount;
175        }
176        return addSymbol0(symbol, bucket, collisionCount);
177
178    } // addSymbol(String):String
179
180    private String addSymbol0(String symbol, int bucket, int collisionCount) {
181
182        if (fCount >= fThreshold) {
183            // Rehash the table if the threshold is exceeded
184            rehash();
185            bucket = hash(symbol) % fTableSize;
186        }
187        else if (collisionCount >= fCollisionThreshold) {
188            // Select a new hash function and rehash the table if
189            // the collision threshold is exceeded.
190            rebalance();
191            bucket = hash(symbol) % fTableSize;
192        }
193
194        // create new entry
195        Entry entry = new Entry(symbol, fBuckets[bucket]);
196        fBuckets[bucket] = entry;
197        ++fCount;
198        return entry.symbol;
199
200    } // addSymbol0(String,int,int):String
201
202    /**
203     * Adds the specified symbol to the symbol table and returns a
204     * reference to the unique symbol. If the symbol already exists,
205     * the previous symbol reference is returned instead, in order
206     * guarantee that symbol references remain unique.
207     *
208     * @param buffer The buffer containing the new symbol.
209     * @param offset The offset into the buffer of the new symbol.
210     * @param length The length of the new symbol in the buffer.
211     */
212    public String addSymbol(char[] buffer, int offset, int length) {
213
214        // search for identical symbol
215        int collisionCount = 0;
216        int bucket = hash(buffer, offset, length) % fTableSize;
217        OUTER: for (Entry entry = fBuckets[bucket]; entry != null; entry = entry.next) {
218            if (length == entry.characters.length) {
219                for (int i = 0; i < length; i++) {
220                    if (buffer[offset + i] != entry.characters[i]) {
221                        ++collisionCount;
222                        continue OUTER;
223                    }
224                }
225                return entry.symbol;
226            }
227            ++collisionCount;
228        }
229        return addSymbol0(buffer, offset, length, bucket, collisionCount);
230
231    } // addSymbol(char[],int,int):String
232
233    private String addSymbol0(char[] buffer, int offset, int length, int bucket, int collisionCount) {
234
235        if (fCount >= fThreshold) {
236            // Rehash the table if the threshold is exceeded
237            rehash();
238            bucket = hash(buffer, offset, length) % fTableSize;
239        }
240        else if (collisionCount >= fCollisionThreshold) {
241            // Select a new hash function and rehash the table if
242            // the collision threshold is exceeded.
243            rebalance();
244            bucket = hash(buffer, offset, length) % fTableSize;
245        }
246
247        // add new entry
248        Entry entry = new Entry(buffer, offset, length, fBuckets[bucket]);
249        fBuckets[bucket] = entry;
250        ++fCount;
251        return entry.symbol;
252
253    } // addSymbol0(char[],int,int,int,int):String
254
255    /**
256     * Returns a hashcode value for the specified symbol. The value
257     * returned by this method must be identical to the value returned
258     * by the <code>hash(char[],int,int)</code> method when called
259     * with the character array that comprises the symbol string.
260     *
261     * @param symbol The symbol to hash.
262     */
263    public int hash(String symbol) {
264        if (fHashMultipliers == null) {
265            return symbol.hashCode() & 0x7FFFFFFF;
266        }
267        return hash0(symbol);
268    } // hash(String):int
269
270    private int hash0(String symbol) {
271        int code = 0;
272        final int length = symbol.length();
273        final int[] multipliers = fHashMultipliers;
274        for (int i = 0; i < length; ++i) {
275            code = code * multipliers[i & MULTIPLIERS_MASK] + symbol.charAt(i);
276        }
277        return code & 0x7FFFFFFF;
278    } // hash0(String):int
279
280    /**
281     * Returns a hashcode value for the specified symbol information.
282     * The value returned by this method must be identical to the value
283     * returned by the <code>hash(String)</code> method when called
284     * with the string object created from the symbol information.
285     *
286     * @param buffer The character buffer containing the symbol.
287     * @param offset The offset into the character buffer of the start
288     *               of the symbol.
289     * @param length The length of the symbol.
290     */
291    public int hash(char[] buffer, int offset, int length) {
292        if (fHashMultipliers == null) {
293            int code = 0;
294            for (int i = 0; i < length; ++i) {
295                code = code * 31 + buffer[offset + i];
296            }
297            return code & 0x7FFFFFFF;
298        }
299        return hash0(buffer, offset, length);
300
301    } // hash(char[],int,int):int
302
303    private int hash0(char[] buffer, int offset, int length) {
304        int code = 0;
305        final int[] multipliers = fHashMultipliers;
306        for (int i = 0; i < length; ++i) {
307            code = code * multipliers[i & MULTIPLIERS_MASK] + buffer[offset + i];
308        }
309        return code & 0x7FFFFFFF;
310    } // hash0(char[],int,int):int
311
312    /**
313     * Increases the capacity of and internally reorganizes this
314     * SymbolTable, in order to accommodate and access its entries more
315     * efficiently.  This method is called automatically when the
316     * number of keys in the SymbolTable exceeds this hashtable's capacity
317     * and load factor.
318     */
319    protected void rehash() {
320        rehashCommon(fBuckets.length * 2 + 1);
321    }
322
323    /**
324     * Randomly selects a new hash function and reorganizes this SymbolTable
325     * in order to more evenly distribute its entries across the table. This
326     * method is called automatically when the number keys in one of the
327     * SymbolTable's buckets exceeds the given collision threshold.
328     */
329    protected void rebalance() {
330        if (fHashMultipliers == null) {
331            fHashMultipliers = new int[MULTIPLIERS_SIZE];
332        }
333        PrimeNumberSequenceGenerator.generateSequence(fHashMultipliers);
334        rehashCommon(fBuckets.length);
335    }
336
337    private void rehashCommon(final int newCapacity) {
338
339        int oldCapacity = fBuckets.length;
340        Entry[] oldTable = fBuckets;
341
342        Entry[] newTable = new Entry[newCapacity];
343
344        fThreshold = (int)(newCapacity * fLoadFactor);
345        fBuckets = newTable;
346        fTableSize = fBuckets.length;
347
348        for (int i = oldCapacity ; i-- > 0 ;) {
349            for (Entry old = oldTable[i] ; old != null ; ) {
350                Entry e = old;
351                old = old.next;
352
353                int index = hash(e.symbol) % newCapacity;
354                e.next = newTable[index];
355                newTable[index] = e;
356            }
357        }
358    }
359
360    /**
361     * Returns true if the symbol table already contains the specified
362     * symbol.
363     *
364     * @param symbol The symbol to look for.
365     */
366    public boolean containsSymbol(String symbol) {
367
368        // search for identical symbol
369        int bucket = hash(symbol) % fTableSize;
370        int length = symbol.length();
371        OUTER: for (Entry entry = fBuckets[bucket]; entry != null; entry = entry.next) {
372            if (length == entry.characters.length) {
373                for (int i = 0; i < length; i++) {
374                    if (symbol.charAt(i) != entry.characters[i]) {
375                        continue OUTER;
376                    }
377                }
378                return true;
379            }
380        }
381
382        return false;
383
384    } // containsSymbol(String):boolean
385
386    /**
387     * Returns true if the symbol table already contains the specified
388     * symbol.
389     *
390     * @param buffer The buffer containing the symbol to look for.
391     * @param offset The offset into the buffer.
392     * @param length The length of the symbol in the buffer.
393     */
394    public boolean containsSymbol(char[] buffer, int offset, int length) {
395
396        // search for identical symbol
397        int bucket = hash(buffer, offset, length) % fTableSize;
398        OUTER: for (Entry entry = fBuckets[bucket]; entry != null; entry = entry.next) {
399            if (length == entry.characters.length) {
400                for (int i = 0; i < length; i++) {
401                    if (buffer[offset + i] != entry.characters[i]) {
402                        continue OUTER;
403                    }
404                }
405                return true;
406            }
407        }
408
409        return false;
410
411    } // containsSymbol(char[],int,int):boolean
412
413    //
414    // Classes
415    //
416
417    /**
418     * This class is a symbol table entry. Each entry acts as a node
419     * in a linked list.
420     */
421    protected static final class Entry {
422
423        //
424        // Data
425        //
426
427        /** Symbol. */
428        public final String symbol;
429
430        /**
431         * Symbol characters. This information is duplicated here for
432         * comparison performance.
433         */
434        public final char[] characters;
435
436        /** The next entry. */
437        public Entry next;
438
439        //
440        // Constructors
441        //
442
443        /**
444         * Constructs a new entry from the specified symbol and next entry
445         * reference.
446         */
447        public Entry(String symbol, Entry next) {
448            this.symbol = symbol.intern();
449            characters = new char[symbol.length()];
450            symbol.getChars(0, characters.length, characters, 0);
451            this.next = next;
452        }
453
454        /**
455         * Constructs a new entry from the specified symbol information and
456         * next entry reference.
457         */
458        public Entry(char[] ch, int offset, int length, Entry next) {
459            characters = new char[length];
460            System.arraycopy(ch, offset, characters, 0, length);
461            symbol = new String(characters).intern();
462            this.next = next;
463        }
464
465    } // class Entry
466
467} // class SymbolTable
468