StaticStringsHash.java revision 608:7e06bf1dcb09
1/*
2 * Copyright (c) 1999, 2007, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.  Oracle designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Oracle in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 */
25/*
26 * Licensed Materials - Property of IBM
27 * RMI-IIOP v1.0
28 * Copyright IBM Corp. 1998 1999  All Rights Reserved
29 *
30 */
31
32package sun.rmi.rmic.iiop;
33
34/**
35 * StaticStringsHash takes an array of constant strings and
36 * uses several different hash methods to try to find the
37 * 'best' one for that set. The set of methods is currently
38 * fixed, but with a little work could be made extensible thru
39 * subclassing.
40 * <p>
41 * The current set of methods is:
42 * <ol>
43 * <li> length() - works well when all strings are different length.</li>
44 * <li> charAt(n) - works well when one offset into all strings is different.</li>
45 * <li> hashCode() - works well with larger arrays.</li>
46 * </ol>
47 * After constructing an instance over the set of strings, the
48 * <code>getKey(String)</code> method can be used to use the selected hash
49 * method to produce a key.  The <code>method</code> string will contain
50 * "length()", "charAt(n)", or "hashCode()", and is intended for use by
51 * code generators.
52 * <p>
53 * The <code>keys</code> array will contain the full set of unique keys.
54 * <p>
55 * The <code>buckets</code> array will contain a set of arrays, one for
56 * each key in the <code>keys</code>, where <code>buckets[x][y]</code>
57 * is an index into the <code>strings</code> array.
58 * @author      Bryan Atsatt
59 */
60public class StaticStringsHash {
61
62    /** The set of strings upon which the hash info is created */
63    public String[] strings = null;
64
65    /** Unique hash keys */
66    public int[] keys = null;
67
68    /** Buckets for each key, where buckets[x][y] is an index
69     * into the strings[] array. */
70    public int[][] buckets = null;
71
72    /** The method to invoke on String to produce the hash key */
73    public String method = null;
74
75    /** Get a key for the given string using the
76     * selected hash method.
77     * @param str the string to return a key for.
78     * @return the key.
79     */
80    public int getKey(String str) {
81        switch (keyKind) {
82        case LENGTH: return str.length();
83        case CHAR_AT: return str.charAt(charAt);
84        case HASH_CODE: return str.hashCode();
85        }
86        throw new Error("Bad keyKind");
87    }
88
89    /** Constructor
90     * @param strings the set of strings upon which to
91     * find an optimal hash method. Must not contain
92     * duplicates.
93     */
94    public StaticStringsHash(String[] strings) {
95        this.strings = strings;
96        length = strings.length;
97        tempKeys = new int[length];
98        bucketSizes = new int[length];
99        setMinStringLength();
100
101        // Decide on the best algorithm based on
102        // which one has the smallest maximum
103        // bucket depth. First, try length()...
104
105        int currentMaxDepth = getKeys(LENGTH);
106        int useCharAt = -1;
107        boolean useHashCode = false;
108
109        if (currentMaxDepth > 1) {
110
111            // At least one bucket had more than one
112            // entry, so try charAt(i).  If there
113            // are a lot of strings in the array,
114            // and minStringLength is large, limit
115            // the search to a smaller number of
116            // characters to avoid spending a lot
117            // of time here that is most likely to
118            // be pointless...
119
120            int minLength = minStringLength;
121            if (length > CHAR_AT_MAX_LINES &&
122                length * minLength > CHAR_AT_MAX_CHARS) {
123                minLength = length/CHAR_AT_MAX_CHARS;
124            }
125
126            charAt = 0;
127            for (int i = 0; i < minLength; i++) {
128                int charAtDepth = getKeys(CHAR_AT);
129                if (charAtDepth < currentMaxDepth) {
130                    currentMaxDepth = charAtDepth;
131                    useCharAt = i;
132                    if (currentMaxDepth == 1) {
133                        break;
134                    }
135                }
136                charAt++;
137            }
138            charAt = useCharAt;
139
140
141            if (currentMaxDepth > 1) {
142
143                // At least one bucket had more than one
144                // entry, try hashCode().
145                //
146                // Since the cost of computing a full hashCode
147                // (for the runtime target string) is much higher
148                // than the previous methods, use it only if it is
149                // substantially better. The definition of 'substantial'
150                // here is not very well founded, and could be improved
151                // with some further analysis ;^)
152
153                int hashCodeDepth = getKeys(HASH_CODE);
154                if (hashCodeDepth < currentMaxDepth-3) {
155
156                    // Using the full hashCode results in at least
157                    // 3 fewer entries in the worst bucket, so will
158                    // therefore avoid at least 3 calls to equals()
159                    // in the worst case.
160                    //
161                    // Note that using a number smaller than 3 could
162                    // result in using a hashCode when there are only
163                    // 2 strings in the array, and that would surely
164                    // be a poor performance choice.
165
166                    useHashCode = true;
167                }
168            }
169
170            // Reset keys if needed...
171
172            if (!useHashCode) {
173                if (useCharAt >= 0) {
174
175                    // Use the charAt(i) method...
176
177                    getKeys(CHAR_AT);
178
179                } else {
180
181                    // Use length method...
182
183                    getKeys(LENGTH);
184                }
185            }
186        }
187
188        // Now allocate and fill our real hashKeys array...
189
190        keys = new int[bucketCount];
191        System.arraycopy(tempKeys,0,keys,0,bucketCount);
192
193        // Sort keys and bucketSizes arrays...
194
195        boolean didSwap;
196        do {
197            didSwap = false;
198            for (int i = 0; i < bucketCount - 1; i++) {
199                if (keys[i] > keys[i+1]) {
200                    int temp = keys[i];
201                    keys[i] = keys[i+1];
202                    keys[i+1] = temp;
203                    temp = bucketSizes[i];
204                    bucketSizes[i] = bucketSizes[i+1];
205                    bucketSizes[i+1] = temp;
206                    didSwap = true;
207                }
208            }
209        }
210        while (didSwap == true);
211
212        // Allocate our buckets array. Fill the string
213        // index slot with an unused key so we can
214        // determine which are free...
215
216        int unused = findUnusedKey();
217        buckets = new int[bucketCount][];
218        for (int i = 0; i < bucketCount; i++) {
219            buckets[i] = new int[bucketSizes[i]];
220            for (int j = 0; j < bucketSizes[i]; j++) {
221                buckets[i][j] = unused;
222            }
223        }
224
225        // And fill it in...
226
227        for(int i = 0; i < strings.length; i++) {
228            int key = getKey(strings[i]);
229            for (int j = 0; j < bucketCount; j++) {
230                if (keys[j] == key) {
231                    int k = 0;
232                    while (buckets[j][k] != unused) {
233                        k++;
234                    }
235                    buckets[j][k] = i;
236                    break;
237                }
238            }
239        }
240    }
241
242    /** Print an optimized 'contains' method for the
243     * argument strings
244     */
245    public static void main (String[] args) {
246        StaticStringsHash hash = new StaticStringsHash(args);
247        System.out.println();
248        System.out.println("    public boolean contains(String key) {");
249        System.out.println("        switch (key."+hash.method+") {");
250        for (int i = 0; i < hash.buckets.length; i++) {
251            System.out.println("            case "+hash.keys[i]+": ");
252            for (int j = 0; j < hash.buckets[i].length; j++) {
253                if (j > 0) {
254                    System.out.print("                } else ");
255                } else {
256                    System.out.print("                ");
257                }
258                System.out.println("if (key.equals(\""+ hash.strings[hash.buckets[i][j]] +"\")) {");
259                System.out.println("                    return true;");
260            }
261            System.out.println("                }");
262        }
263        System.out.println("        }");
264        System.out.println("        return false;");
265        System.out.println("    }");
266    }
267
268    private int length;
269    private int[] tempKeys;
270    private int[] bucketSizes;
271    private int bucketCount;
272    private int maxDepth;
273    private int minStringLength = Integer.MAX_VALUE;
274    private int keyKind;
275    private int charAt;
276
277    private static final int LENGTH = 0;
278    private static final int CHAR_AT = 1;
279    private static final int HASH_CODE = 2;
280
281    /* Determines the maximum number of charAt(i)
282     * tests that will be done. The search is
283     * limited because if the number of characters
284     * is large enough, the likelyhood of finding
285     * a good hash key  based on this method is
286     * low. The CHAR_AT_MAX_CHARS limit only
287     * applies f there are more strings than
288     * CHAR_AT_MAX_LINES.
289     */
290    private static final int CHAR_AT_MAX_LINES = 50;
291    private static final int CHAR_AT_MAX_CHARS = 1000;
292
293    private void resetKeys(int keyKind) {
294        this.keyKind = keyKind;
295        switch (keyKind) {
296        case LENGTH: method = "length()"; break;
297        case CHAR_AT: method = "charAt("+charAt+")"; break;
298        case HASH_CODE: method = "hashCode()"; break;
299        }
300        maxDepth = 1;
301        bucketCount = 0;
302        for (int i = 0; i < length; i++) {
303            tempKeys[i] = 0;
304            bucketSizes[i] = 0;
305        }
306    }
307
308    private void setMinStringLength() {
309        for (int i = 0; i < length; i++) {
310            if (strings[i].length() < minStringLength) {
311                minStringLength = strings[i].length();
312            }
313        }
314    }
315
316    private int findUnusedKey() {
317        int unused = 0;
318        int keysLength = keys.length;
319
320        // Note that we just assume that resource
321        // exhaustion will occur rather than an
322        // infinite loop here if the set of keys
323        // is very large.
324
325        while (true) {
326            boolean match = false;
327            for (int i = 0; i < keysLength; i++) {
328                if (keys[i] == unused) {
329                    match = true;
330                    break;
331                }
332            }
333            if (match) {
334                unused--;
335            } else {
336                break;
337            }
338        }
339        return unused;
340    }
341
342    private int getKeys(int methodKind) {
343        resetKeys(methodKind);
344        for(int i = 0; i < strings.length; i++) {
345            addKey(getKey(strings[i]));
346        }
347        return maxDepth;
348    }
349
350    private void addKey(int key) {
351
352        // Have we seen this one before?
353
354        boolean addIt = true;
355        for (int j = 0; j < bucketCount; j++) {
356            if (tempKeys[j] == key) {
357                addIt = false;
358                bucketSizes[j]++;
359                if (bucketSizes[j] > maxDepth) {
360                    maxDepth = bucketSizes[j];
361                }
362                break;
363            }
364        }
365
366        if (addIt) {
367            tempKeys[bucketCount] = key;
368            bucketSizes[bucketCount] = 1;
369            bucketCount++;
370        }
371    }
372}
373