1/*
2 * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.  Oracle designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Oracle in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 */
25
26package jdk.tools.jlink.internal;
27
28import java.lang.reflect.Array;
29import java.util.ArrayList;
30import java.util.Arrays;
31import java.util.LinkedHashMap;
32import java.util.List;
33import java.util.Map;
34import jdk.internal.jimage.ImageStringsReader;
35
36/*
37 * The algorithm used here is outlined in Applications of Finite Automata
38 * Representing Large Vocabularies - Claudio L Lucchesi and Tomasz Kowaltowski,
39 * 1992, and A Practical Minimal Perfect Hashing Method - Fabiano C. Botelho1,
40 * Yoshiharu Kohayakawa, and Nivio Ziviani, 2005.
41 *
42 * The primary JDK use of this algorithm is managing the jimage location index.
43 *
44 * The goal of PerfectHashBuilder is to construct an automaton which maps a
45 * string key to a unique index 0..N-1, where N is the number of key-value pairs.
46 * What makes MPHM effective is that the size of the lookup table is N or very
47 * near N, and the minimum lookup is O(1) maximum lookup is O(2).
48 *
49 * The result of PerfectHashBuilder is two integer arrays, redirect and order.
50 * The redirect table provides a 1-1 mapping to the order table, using the
51 * reader algorithm described further on.  The order table provides a mapping
52 * to entries.  If entries are fixed size and can be put in a direct table, then
53 * the order table can be used to construct the direct table and then discarded.
54 *
55 * The steps for constructing the lookup tables are as follows;
56 *
57 *   - Compute an MPHM hash for each key, based on a fixed base value modulo N.
58 *     Note, the hash is based on the modified UTF-8 of the key, simplifying
59 *     computation in native code.
60 *
61 *   - Combine keys that map to the same hash code (collisions) into bucket
62 *     chains.
63 *
64 *   - Sort bucket chains by length of chains, longest first (most collisions.)
65 *     Sorting is done to pack the redirect table with the worst collision
66 *     offenders first.
67 *
68 *   - For each chain, recompute the hash of each key using a new base value.
69 *     Recomputation should give a different key distribution. A tally is kept
70 *     of where the key maps, using the order table. The tally is used to detect
71 *     new collisions. If there are further collisions, then restart
72 *     redistribution using a different hash base value.  If a chain is
73 *     successfully distributed, then the base value used to compute the hash
74 *     is recorded in the redirect table.
75 *
76 *   - Once all colliding chains are resolved (length > 1), then the chains with
77 *     only one entry are used to fill in the empty slots in the order table.
78 *     These keys are recorded in the redirect table using the twos complement
79 *     of the order index.
80 *
81 *   - It is possible that a given set of keys cannot be packed into a table of
82 *     size N.  If this situation occurs then the size of the table is
83 *     adjusted so that keys distribute differently.
84 *
85 * Readers algoritm;
86 *
87 *   - Compute the hash for the key using the fixed base value modulo N.  This
88 *     will provide an index into the redirect table. The integer value in the
89 *     redirect table will determine the next step.
90 *
91 *   - If the value in the redirect table is positive, then that value is used
92 *     to rehash the key to get the index into the order table.
93 *
94 *   - If the value in the redirect table is negative, then that value is the
95 *     twos complement of the index into the order table.
96 *
97 *   - If the value in the redirect table is zero, then there is no matching
98 *     entry.
99 *
100 *   - Note that the resulting entry needs to be validated to ensure a match.
101 *     This is typically done by comparing the key with the key in entry.
102 */
103public class PerfectHashBuilder<E> {
104    private static final int RETRY_LIMIT = 1000;
105
106    private Class<?> entryComponent;
107    private Class<?> bucketComponent;
108
109    private final Map<String, Entry<E>> map = new LinkedHashMap<>();
110    private int[] redirect;
111    private Entry<E>[] order;
112    private int count = 0;
113
114    @SuppressWarnings("EqualsAndHashcode")
115    public static class Entry<E> {
116        private final String key;
117        private final E value;
118
119        Entry() {
120            this("", null);
121        }
122
123        Entry(String key, E value) {
124            this.key = key;
125            this.value = value;
126        }
127
128        String getKey() {
129            return key;
130        }
131
132        E getValue() {
133            return value;
134        }
135
136        int hashCode(int seed) {
137            return ImageStringsReader.hashCode(key, seed);
138        }
139
140        @Override
141        public int hashCode() {
142            return ImageStringsReader.hashCode(key);
143        }
144
145        @Override
146        public boolean equals(Object other) {
147            if (other == this) {
148                return true;
149            }
150            if (!(other instanceof Entry)) {
151                return false;
152            }
153            Entry<?> entry = (Entry<?>) other;
154            return entry.key.equals(key);
155        }
156    }
157
158    static class Bucket<E> implements Comparable<Bucket<E>> {
159        final List<Entry<E>> list = new ArrayList<>();
160
161        void add(Entry<E> entry) {
162            list.add(entry);
163        }
164
165        int getSize() {
166            return list.size();
167        }
168
169        List<Entry<E>> getList() {
170            return list;
171        }
172
173        Entry<E> getFirst() {
174            assert !list.isEmpty() : "bucket should never be empty";
175            return list.get(0);
176        }
177
178        @Override
179        public int hashCode() {
180            return getFirst().hashCode();
181        }
182
183        @Override
184        @SuppressWarnings("EqualsWhichDoesntCheckParameterClass")
185        public boolean equals(Object obj) {
186            return this == obj;
187        }
188
189        @Override
190        public int compareTo(Bucket<E> o) {
191            return o.getSize() - getSize();
192        }
193    }
194
195    public PerfectHashBuilder(Class<?> entryComponent, Class<?> bucketComponent) {
196        this.entryComponent = entryComponent;
197        this.bucketComponent = bucketComponent;
198    }
199
200    public int getCount() {
201        return map.size();
202    }
203
204    public int[] getRedirect() {
205        return redirect.clone();
206    }
207
208    public Entry<E>[] getOrder() {
209        return order.clone();
210    }
211
212    public Entry<E> put(String key, E value) {
213        return put(new Entry<>(key, value));
214    }
215
216    public Entry<E> put(Entry<E> entry) {
217        Entry<E> old = map.put(entry.key, entry);
218
219        if (old == null) {
220            count++;
221        }
222
223        return old;
224    }
225
226    @SuppressWarnings("unchecked")
227    public void generate() {
228        // If the table is empty then exit early.
229        boolean redo = count != 0;
230
231        // Repeat until a valid packing is achieved.
232        while (redo) {
233            redo = false;
234
235            // Allocate the resulting redirect and order tables.
236            redirect = new int[count];
237            order = (Entry<E>[])Array.newInstance(entryComponent, count);
238
239            // Place all the entries in bucket chains based on hash. Sort by
240            // length of chain.
241            Bucket<E>[] sorted = createBuckets();
242            int free = 0;
243
244            // Iterate through the chains, longest first.
245            for (Bucket<E> bucket : sorted) {
246                if (bucket.getSize() != 1) {
247                    // Attempt to pack entries until no collisions occur.
248                    if (!collidedEntries(bucket, count)) {
249                        // Failed to pack. Meed to grow table.
250                        redo = true;
251                        break;
252                    }
253                } else {
254                    // A no collision entry (bucket.getSize() == 1). Find a free
255                    // spot in the order table.
256                    for ( ; free < count && order[free] != null; free++) {}
257
258                    // If none found, then grow table.
259                    if (free >= count) {
260                        redo = true;
261                        break;
262                    }
263
264                    // Store entry in order table.
265                    order[free] = bucket.getFirst();
266                    // Twos complement of order index stired in the redirect table.
267                    redirect[(bucket.hashCode() & 0x7FFFFFFF) % count] = -1 - free;
268                    // Update free slot index.
269                    free++;
270                }
271            }
272
273            // If packing failed, then bump table size. Make odd to increase
274            // chances of being relatively prime.
275            if (redo) {
276                count = (count + 1) | 1;
277            }
278        }
279    }
280
281    @SuppressWarnings("unchecked")
282    private Bucket<E>[] createBuckets() {
283        // Build bucket chains based on key hash.  Collisions end up in same chain.
284        Bucket<E>[] buckets = (Bucket<E>[])Array.newInstance(bucketComponent, count);
285
286        map.values().stream().forEach((entry) -> {
287            int index = (entry.hashCode() & 0x7FFFFFFF) % count;
288            Bucket<E> bucket = buckets[index];
289
290            if (bucket == null) {
291                buckets[index] = bucket = new Bucket<>();
292            }
293
294            bucket.add(entry);
295        });
296
297        // Sort chains, longest first.
298        Bucket<E>[] sorted = Arrays.asList(buckets).stream()
299                .filter((bucket) -> (bucket != null))
300                .sorted()
301                .toArray((length) -> {
302                    return (Bucket<E>[])Array.newInstance(bucketComponent, length);
303                });
304
305        return sorted;
306    }
307
308    private boolean collidedEntries(Bucket<E> bucket, int count) {
309        // Track packing attempts.
310        List<Integer> undo = new ArrayList<>();
311        // Start with a new hash seed.
312        int seed = ImageStringsReader.HASH_MULTIPLIER + 1;
313        int retry = 0;
314
315        // Attempt to pack all the entries in a single chain.
316        redo:
317        while (true) {
318            for (Entry<E> entry : bucket.getList()) {
319                // Compute new hash.
320                int index = entry.hashCode(seed) % count;
321
322                // If a collision is detected.
323                if (order[index] != null) {
324                    // Only retry so many times with current table size.
325                    if (++retry > RETRY_LIMIT) {
326                        return false;
327                    }
328
329                    // Undo the attempted packing.
330                    undo.stream().forEach((i) -> {
331                        order[i] = null;
332                    });
333
334                    // Reset the undo list and bump up the hash seed.
335                    undo.clear();
336                    seed++;
337
338                    // Zero seed is not valid.
339                    if (seed == 0) {
340                        seed = 1;
341                    }
342
343                    // Try again.
344                    continue redo;
345                }
346
347                // No collision.
348                order[index] = entry;
349                undo.add(index);
350            }
351
352            // Entire chain packed. Record hash seed used.
353            redirect[(bucket.hashCode() & 0x7FFFFFFF) % count] = seed;
354
355            break;
356        }
357
358        return true;
359    }
360 }
361