StaticStringsHash.java revision 608:7e06bf1dcb09
1/* 2 * Copyright (c) 1999, 2007, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25/* 26 * Licensed Materials - Property of IBM 27 * RMI-IIOP v1.0 28 * Copyright IBM Corp. 1998 1999 All Rights Reserved 29 * 30 */ 31 32package sun.rmi.rmic.iiop; 33 34/** 35 * StaticStringsHash takes an array of constant strings and 36 * uses several different hash methods to try to find the 37 * 'best' one for that set. The set of methods is currently 38 * fixed, but with a little work could be made extensible thru 39 * subclassing. 40 * <p> 41 * The current set of methods is: 42 * <ol> 43 * <li> length() - works well when all strings are different length.</li> 44 * <li> charAt(n) - works well when one offset into all strings is different.</li> 45 * <li> hashCode() - works well with larger arrays.</li> 46 * </ol> 47 * After constructing an instance over the set of strings, the 48 * <code>getKey(String)</code> method can be used to use the selected hash 49 * method to produce a key. The <code>method</code> string will contain 50 * "length()", "charAt(n)", or "hashCode()", and is intended for use by 51 * code generators. 52 * <p> 53 * The <code>keys</code> array will contain the full set of unique keys. 54 * <p> 55 * The <code>buckets</code> array will contain a set of arrays, one for 56 * each key in the <code>keys</code>, where <code>buckets[x][y]</code> 57 * is an index into the <code>strings</code> array. 58 * @author Bryan Atsatt 59 */ 60public class StaticStringsHash { 61 62 /** The set of strings upon which the hash info is created */ 63 public String[] strings = null; 64 65 /** Unique hash keys */ 66 public int[] keys = null; 67 68 /** Buckets for each key, where buckets[x][y] is an index 69 * into the strings[] array. */ 70 public int[][] buckets = null; 71 72 /** The method to invoke on String to produce the hash key */ 73 public String method = null; 74 75 /** Get a key for the given string using the 76 * selected hash method. 77 * @param str the string to return a key for. 78 * @return the key. 79 */ 80 public int getKey(String str) { 81 switch (keyKind) { 82 case LENGTH: return str.length(); 83 case CHAR_AT: return str.charAt(charAt); 84 case HASH_CODE: return str.hashCode(); 85 } 86 throw new Error("Bad keyKind"); 87 } 88 89 /** Constructor 90 * @param strings the set of strings upon which to 91 * find an optimal hash method. Must not contain 92 * duplicates. 93 */ 94 public StaticStringsHash(String[] strings) { 95 this.strings = strings; 96 length = strings.length; 97 tempKeys = new int[length]; 98 bucketSizes = new int[length]; 99 setMinStringLength(); 100 101 // Decide on the best algorithm based on 102 // which one has the smallest maximum 103 // bucket depth. First, try length()... 104 105 int currentMaxDepth = getKeys(LENGTH); 106 int useCharAt = -1; 107 boolean useHashCode = false; 108 109 if (currentMaxDepth > 1) { 110 111 // At least one bucket had more than one 112 // entry, so try charAt(i). If there 113 // are a lot of strings in the array, 114 // and minStringLength is large, limit 115 // the search to a smaller number of 116 // characters to avoid spending a lot 117 // of time here that is most likely to 118 // be pointless... 119 120 int minLength = minStringLength; 121 if (length > CHAR_AT_MAX_LINES && 122 length * minLength > CHAR_AT_MAX_CHARS) { 123 minLength = length/CHAR_AT_MAX_CHARS; 124 } 125 126 charAt = 0; 127 for (int i = 0; i < minLength; i++) { 128 int charAtDepth = getKeys(CHAR_AT); 129 if (charAtDepth < currentMaxDepth) { 130 currentMaxDepth = charAtDepth; 131 useCharAt = i; 132 if (currentMaxDepth == 1) { 133 break; 134 } 135 } 136 charAt++; 137 } 138 charAt = useCharAt; 139 140 141 if (currentMaxDepth > 1) { 142 143 // At least one bucket had more than one 144 // entry, try hashCode(). 145 // 146 // Since the cost of computing a full hashCode 147 // (for the runtime target string) is much higher 148 // than the previous methods, use it only if it is 149 // substantially better. The definition of 'substantial' 150 // here is not very well founded, and could be improved 151 // with some further analysis ;^) 152 153 int hashCodeDepth = getKeys(HASH_CODE); 154 if (hashCodeDepth < currentMaxDepth-3) { 155 156 // Using the full hashCode results in at least 157 // 3 fewer entries in the worst bucket, so will 158 // therefore avoid at least 3 calls to equals() 159 // in the worst case. 160 // 161 // Note that using a number smaller than 3 could 162 // result in using a hashCode when there are only 163 // 2 strings in the array, and that would surely 164 // be a poor performance choice. 165 166 useHashCode = true; 167 } 168 } 169 170 // Reset keys if needed... 171 172 if (!useHashCode) { 173 if (useCharAt >= 0) { 174 175 // Use the charAt(i) method... 176 177 getKeys(CHAR_AT); 178 179 } else { 180 181 // Use length method... 182 183 getKeys(LENGTH); 184 } 185 } 186 } 187 188 // Now allocate and fill our real hashKeys array... 189 190 keys = new int[bucketCount]; 191 System.arraycopy(tempKeys,0,keys,0,bucketCount); 192 193 // Sort keys and bucketSizes arrays... 194 195 boolean didSwap; 196 do { 197 didSwap = false; 198 for (int i = 0; i < bucketCount - 1; i++) { 199 if (keys[i] > keys[i+1]) { 200 int temp = keys[i]; 201 keys[i] = keys[i+1]; 202 keys[i+1] = temp; 203 temp = bucketSizes[i]; 204 bucketSizes[i] = bucketSizes[i+1]; 205 bucketSizes[i+1] = temp; 206 didSwap = true; 207 } 208 } 209 } 210 while (didSwap == true); 211 212 // Allocate our buckets array. Fill the string 213 // index slot with an unused key so we can 214 // determine which are free... 215 216 int unused = findUnusedKey(); 217 buckets = new int[bucketCount][]; 218 for (int i = 0; i < bucketCount; i++) { 219 buckets[i] = new int[bucketSizes[i]]; 220 for (int j = 0; j < bucketSizes[i]; j++) { 221 buckets[i][j] = unused; 222 } 223 } 224 225 // And fill it in... 226 227 for(int i = 0; i < strings.length; i++) { 228 int key = getKey(strings[i]); 229 for (int j = 0; j < bucketCount; j++) { 230 if (keys[j] == key) { 231 int k = 0; 232 while (buckets[j][k] != unused) { 233 k++; 234 } 235 buckets[j][k] = i; 236 break; 237 } 238 } 239 } 240 } 241 242 /** Print an optimized 'contains' method for the 243 * argument strings 244 */ 245 public static void main (String[] args) { 246 StaticStringsHash hash = new StaticStringsHash(args); 247 System.out.println(); 248 System.out.println(" public boolean contains(String key) {"); 249 System.out.println(" switch (key."+hash.method+") {"); 250 for (int i = 0; i < hash.buckets.length; i++) { 251 System.out.println(" case "+hash.keys[i]+": "); 252 for (int j = 0; j < hash.buckets[i].length; j++) { 253 if (j > 0) { 254 System.out.print(" } else "); 255 } else { 256 System.out.print(" "); 257 } 258 System.out.println("if (key.equals(\""+ hash.strings[hash.buckets[i][j]] +"\")) {"); 259 System.out.println(" return true;"); 260 } 261 System.out.println(" }"); 262 } 263 System.out.println(" }"); 264 System.out.println(" return false;"); 265 System.out.println(" }"); 266 } 267 268 private int length; 269 private int[] tempKeys; 270 private int[] bucketSizes; 271 private int bucketCount; 272 private int maxDepth; 273 private int minStringLength = Integer.MAX_VALUE; 274 private int keyKind; 275 private int charAt; 276 277 private static final int LENGTH = 0; 278 private static final int CHAR_AT = 1; 279 private static final int HASH_CODE = 2; 280 281 /* Determines the maximum number of charAt(i) 282 * tests that will be done. The search is 283 * limited because if the number of characters 284 * is large enough, the likelyhood of finding 285 * a good hash key based on this method is 286 * low. The CHAR_AT_MAX_CHARS limit only 287 * applies f there are more strings than 288 * CHAR_AT_MAX_LINES. 289 */ 290 private static final int CHAR_AT_MAX_LINES = 50; 291 private static final int CHAR_AT_MAX_CHARS = 1000; 292 293 private void resetKeys(int keyKind) { 294 this.keyKind = keyKind; 295 switch (keyKind) { 296 case LENGTH: method = "length()"; break; 297 case CHAR_AT: method = "charAt("+charAt+")"; break; 298 case HASH_CODE: method = "hashCode()"; break; 299 } 300 maxDepth = 1; 301 bucketCount = 0; 302 for (int i = 0; i < length; i++) { 303 tempKeys[i] = 0; 304 bucketSizes[i] = 0; 305 } 306 } 307 308 private void setMinStringLength() { 309 for (int i = 0; i < length; i++) { 310 if (strings[i].length() < minStringLength) { 311 minStringLength = strings[i].length(); 312 } 313 } 314 } 315 316 private int findUnusedKey() { 317 int unused = 0; 318 int keysLength = keys.length; 319 320 // Note that we just assume that resource 321 // exhaustion will occur rather than an 322 // infinite loop here if the set of keys 323 // is very large. 324 325 while (true) { 326 boolean match = false; 327 for (int i = 0; i < keysLength; i++) { 328 if (keys[i] == unused) { 329 match = true; 330 break; 331 } 332 } 333 if (match) { 334 unused--; 335 } else { 336 break; 337 } 338 } 339 return unused; 340 } 341 342 private int getKeys(int methodKind) { 343 resetKeys(methodKind); 344 for(int i = 0; i < strings.length; i++) { 345 addKey(getKey(strings[i])); 346 } 347 return maxDepth; 348 } 349 350 private void addKey(int key) { 351 352 // Have we seen this one before? 353 354 boolean addIt = true; 355 for (int j = 0; j < bucketCount; j++) { 356 if (tempKeys[j] == key) { 357 addIt = false; 358 bucketSizes[j]++; 359 if (bucketSizes[j] > maxDepth) { 360 maxDepth = bucketSizes[j]; 361 } 362 break; 363 } 364 } 365 366 if (addIt) { 367 tempKeys[bucketCount] = key; 368 bucketSizes[bucketCount] = 1; 369 bucketCount++; 370 } 371 } 372} 373