1/* 2 * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26package jdk.tools.jlink.internal; 27 28import java.lang.reflect.Array; 29import java.util.ArrayList; 30import java.util.Arrays; 31import java.util.LinkedHashMap; 32import java.util.List; 33import java.util.Map; 34import jdk.internal.jimage.ImageStringsReader; 35 36/* 37 * The algorithm used here is outlined in Applications of Finite Automata 38 * Representing Large Vocabularies - Claudio L Lucchesi and Tomasz Kowaltowski, 39 * 1992, and A Practical Minimal Perfect Hashing Method - Fabiano C. Botelho1, 40 * Yoshiharu Kohayakawa, and Nivio Ziviani, 2005. 41 * 42 * The primary JDK use of this algorithm is managing the jimage location index. 43 * 44 * The goal of PerfectHashBuilder is to construct an automaton which maps a 45 * string key to a unique index 0..N-1, where N is the number of key-value pairs. 46 * What makes MPHM effective is that the size of the lookup table is N or very 47 * near N, and the minimum lookup is O(1) maximum lookup is O(2). 48 * 49 * The result of PerfectHashBuilder is two integer arrays, redirect and order. 50 * The redirect table provides a 1-1 mapping to the order table, using the 51 * reader algorithm described further on. The order table provides a mapping 52 * to entries. If entries are fixed size and can be put in a direct table, then 53 * the order table can be used to construct the direct table and then discarded. 54 * 55 * The steps for constructing the lookup tables are as follows; 56 * 57 * - Compute an MPHM hash for each key, based on a fixed base value modulo N. 58 * Note, the hash is based on the modified UTF-8 of the key, simplifying 59 * computation in native code. 60 * 61 * - Combine keys that map to the same hash code (collisions) into bucket 62 * chains. 63 * 64 * - Sort bucket chains by length of chains, longest first (most collisions.) 65 * Sorting is done to pack the redirect table with the worst collision 66 * offenders first. 67 * 68 * - For each chain, recompute the hash of each key using a new base value. 69 * Recomputation should give a different key distribution. A tally is kept 70 * of where the key maps, using the order table. The tally is used to detect 71 * new collisions. If there are further collisions, then restart 72 * redistribution using a different hash base value. If a chain is 73 * successfully distributed, then the base value used to compute the hash 74 * is recorded in the redirect table. 75 * 76 * - Once all colliding chains are resolved (length > 1), then the chains with 77 * only one entry are used to fill in the empty slots in the order table. 78 * These keys are recorded in the redirect table using the twos complement 79 * of the order index. 80 * 81 * - It is possible that a given set of keys cannot be packed into a table of 82 * size N. If this situation occurs then the size of the table is 83 * adjusted so that keys distribute differently. 84 * 85 * Readers algoritm; 86 * 87 * - Compute the hash for the key using the fixed base value modulo N. This 88 * will provide an index into the redirect table. The integer value in the 89 * redirect table will determine the next step. 90 * 91 * - If the value in the redirect table is positive, then that value is used 92 * to rehash the key to get the index into the order table. 93 * 94 * - If the value in the redirect table is negative, then that value is the 95 * twos complement of the index into the order table. 96 * 97 * - If the value in the redirect table is zero, then there is no matching 98 * entry. 99 * 100 * - Note that the resulting entry needs to be validated to ensure a match. 101 * This is typically done by comparing the key with the key in entry. 102 */ 103public class PerfectHashBuilder<E> { 104 private static final int RETRY_LIMIT = 1000; 105 106 private Class<?> entryComponent; 107 private Class<?> bucketComponent; 108 109 private final Map<String, Entry<E>> map = new LinkedHashMap<>(); 110 private int[] redirect; 111 private Entry<E>[] order; 112 private int count = 0; 113 114 @SuppressWarnings("EqualsAndHashcode") 115 public static class Entry<E> { 116 private final String key; 117 private final E value; 118 119 Entry() { 120 this("", null); 121 } 122 123 Entry(String key, E value) { 124 this.key = key; 125 this.value = value; 126 } 127 128 String getKey() { 129 return key; 130 } 131 132 E getValue() { 133 return value; 134 } 135 136 int hashCode(int seed) { 137 return ImageStringsReader.hashCode(key, seed); 138 } 139 140 @Override 141 public int hashCode() { 142 return ImageStringsReader.hashCode(key); 143 } 144 145 @Override 146 public boolean equals(Object other) { 147 if (other == this) { 148 return true; 149 } 150 if (!(other instanceof Entry)) { 151 return false; 152 } 153 Entry<?> entry = (Entry<?>) other; 154 return entry.key.equals(key); 155 } 156 } 157 158 static class Bucket<E> implements Comparable<Bucket<E>> { 159 final List<Entry<E>> list = new ArrayList<>(); 160 161 void add(Entry<E> entry) { 162 list.add(entry); 163 } 164 165 int getSize() { 166 return list.size(); 167 } 168 169 List<Entry<E>> getList() { 170 return list; 171 } 172 173 Entry<E> getFirst() { 174 assert !list.isEmpty() : "bucket should never be empty"; 175 return list.get(0); 176 } 177 178 @Override 179 public int hashCode() { 180 return getFirst().hashCode(); 181 } 182 183 @Override 184 @SuppressWarnings("EqualsWhichDoesntCheckParameterClass") 185 public boolean equals(Object obj) { 186 return this == obj; 187 } 188 189 @Override 190 public int compareTo(Bucket<E> o) { 191 return o.getSize() - getSize(); 192 } 193 } 194 195 public PerfectHashBuilder(Class<?> entryComponent, Class<?> bucketComponent) { 196 this.entryComponent = entryComponent; 197 this.bucketComponent = bucketComponent; 198 } 199 200 public int getCount() { 201 return map.size(); 202 } 203 204 public int[] getRedirect() { 205 return redirect.clone(); 206 } 207 208 public Entry<E>[] getOrder() { 209 return order.clone(); 210 } 211 212 public Entry<E> put(String key, E value) { 213 return put(new Entry<>(key, value)); 214 } 215 216 public Entry<E> put(Entry<E> entry) { 217 Entry<E> old = map.put(entry.key, entry); 218 219 if (old == null) { 220 count++; 221 } 222 223 return old; 224 } 225 226 @SuppressWarnings("unchecked") 227 public void generate() { 228 // If the table is empty then exit early. 229 boolean redo = count != 0; 230 231 // Repeat until a valid packing is achieved. 232 while (redo) { 233 redo = false; 234 235 // Allocate the resulting redirect and order tables. 236 redirect = new int[count]; 237 order = (Entry<E>[])Array.newInstance(entryComponent, count); 238 239 // Place all the entries in bucket chains based on hash. Sort by 240 // length of chain. 241 Bucket<E>[] sorted = createBuckets(); 242 int free = 0; 243 244 // Iterate through the chains, longest first. 245 for (Bucket<E> bucket : sorted) { 246 if (bucket.getSize() != 1) { 247 // Attempt to pack entries until no collisions occur. 248 if (!collidedEntries(bucket, count)) { 249 // Failed to pack. Meed to grow table. 250 redo = true; 251 break; 252 } 253 } else { 254 // A no collision entry (bucket.getSize() == 1). Find a free 255 // spot in the order table. 256 for ( ; free < count && order[free] != null; free++) {} 257 258 // If none found, then grow table. 259 if (free >= count) { 260 redo = true; 261 break; 262 } 263 264 // Store entry in order table. 265 order[free] = bucket.getFirst(); 266 // Twos complement of order index stired in the redirect table. 267 redirect[(bucket.hashCode() & 0x7FFFFFFF) % count] = -1 - free; 268 // Update free slot index. 269 free++; 270 } 271 } 272 273 // If packing failed, then bump table size. Make odd to increase 274 // chances of being relatively prime. 275 if (redo) { 276 count = (count + 1) | 1; 277 } 278 } 279 } 280 281 @SuppressWarnings("unchecked") 282 private Bucket<E>[] createBuckets() { 283 // Build bucket chains based on key hash. Collisions end up in same chain. 284 Bucket<E>[] buckets = (Bucket<E>[])Array.newInstance(bucketComponent, count); 285 286 map.values().stream().forEach((entry) -> { 287 int index = (entry.hashCode() & 0x7FFFFFFF) % count; 288 Bucket<E> bucket = buckets[index]; 289 290 if (bucket == null) { 291 buckets[index] = bucket = new Bucket<>(); 292 } 293 294 bucket.add(entry); 295 }); 296 297 // Sort chains, longest first. 298 Bucket<E>[] sorted = Arrays.asList(buckets).stream() 299 .filter((bucket) -> (bucket != null)) 300 .sorted() 301 .toArray((length) -> { 302 return (Bucket<E>[])Array.newInstance(bucketComponent, length); 303 }); 304 305 return sorted; 306 } 307 308 private boolean collidedEntries(Bucket<E> bucket, int count) { 309 // Track packing attempts. 310 List<Integer> undo = new ArrayList<>(); 311 // Start with a new hash seed. 312 int seed = ImageStringsReader.HASH_MULTIPLIER + 1; 313 int retry = 0; 314 315 // Attempt to pack all the entries in a single chain. 316 redo: 317 while (true) { 318 for (Entry<E> entry : bucket.getList()) { 319 // Compute new hash. 320 int index = entry.hashCode(seed) % count; 321 322 // If a collision is detected. 323 if (order[index] != null) { 324 // Only retry so many times with current table size. 325 if (++retry > RETRY_LIMIT) { 326 return false; 327 } 328 329 // Undo the attempted packing. 330 undo.stream().forEach((i) -> { 331 order[i] = null; 332 }); 333 334 // Reset the undo list and bump up the hash seed. 335 undo.clear(); 336 seed++; 337 338 // Zero seed is not valid. 339 if (seed == 0) { 340 seed = 1; 341 } 342 343 // Try again. 344 continue redo; 345 } 346 347 // No collision. 348 order[index] = entry; 349 undo.add(index); 350 } 351 352 // Entire chain packed. Record hash seed used. 353 redirect[(bucket.hashCode() & 0x7FFFFFFF) % count] = seed; 354 355 break; 356 } 357 358 return true; 359 } 360 } 361