nbperf-bdz.c revision 1.4
1/*	$NetBSD: nbperf-bdz.c,v 1.4 2011/10/21 23:47:11 joerg Exp $	*/
2/*-
3 * Copyright (c) 2009 The NetBSD Foundation, Inc.
4 * All rights reserved.
5 *
6 * This code is derived from software contributed to The NetBSD Foundation
7 * by Joerg Sonnenberger.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 *
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in
17 *    the documentation and/or other materials provided with the
18 *    distribution.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
24 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
26 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
28 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
29 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
30 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 */
33
34#if HAVE_NBTOOL_CONFIG_H
35#include "nbtool_config.h"
36#endif
37
38#include <sys/cdefs.h>
39__RCSID("$NetBSD: nbperf-bdz.c,v 1.4 2011/10/21 23:47:11 joerg Exp $");
40
41#include <err.h>
42#include <inttypes.h>
43#include <stdlib.h>
44#include <stdio.h>
45#include <string.h>
46
47#include "nbperf.h"
48
49/*
50 * A full description of the algorithm can be found in:
51 * "Simple and Space-Efficient Minimal Perfect Hash Functions"
52 * by Botelho, Pagh and Ziviani, proceeedings of WADS 2007.
53 */
54
55/*
56 * The algorithm is based on random, acyclic 3-graphs.
57 *
58 * Each edge in the represents a key.  The vertices are the reminder of
59 * the hash function mod n.  n = cm with c > 1.23.  This ensures that
60 * an acyclic graph can be found with a very high probality.
61 *
62 * An acyclic graph has an edge order, where at least one vertex of
63 * each edge hasn't been seen before.   It is declares the first unvisited
64 * vertex as authoritive for the edge and assigns a 2bit value to unvisited
65 * vertices, so that the sum of all vertices of the edge modulo 4 is
66 * the index of the authoritive vertex.
67 */
68
69#include "graph3.h"
70
71struct state {
72	struct graph3 graph;
73	uint32_t *visited;
74	uint32_t *holes64k;
75	uint16_t *holes256;
76	uint8_t *holes256_64;
77	uint8_t *holes256_128;
78	uint8_t *holes256_192;
79	uint8_t *g;
80	uint32_t *result_map;
81};
82
83static void
84assign_nodes(struct state *state)
85{
86	struct edge3 *e;
87	size_t i, j;
88	uint32_t t, r, holes;
89
90	for (i = 0; i < state->graph.v; ++i)
91		state->g[i] = 3;
92
93	for (i = 0; i < state->graph.e; ++i) {
94		j = state->graph.output_order[i];
95		e = &state->graph.edges[j];
96		if (!state->visited[e->left]) {
97			r = 0;
98			t = e->left;
99		} else if (!state->visited[e->middle]) {
100			r = 1;
101			t = e->middle;
102		} else {
103			if (state->visited[e->right])
104				abort();
105			r = 2;
106			t = e->right;
107		}
108
109		state->visited[t] = 2 + j;
110		if (state->visited[e->left] == 0)
111			state->visited[e->left] = 1;
112		if (state->visited[e->middle] == 0)
113			state->visited[e->middle] = 1;
114		if (state->visited[e->right] == 0)
115			state->visited[e->right] = 1;
116
117		state->g[t] = (9 + r - state->g[e->left] - state->g[e->middle]
118		    - state->g[e->right]) % 3;
119	}
120
121	holes = 0;
122	for (i = 0; i < state->graph.v; ++i) {
123		if (i % 65536 == 0)
124			state->holes64k[i >> 16] = holes;
125
126		if (i % 256 == 0)
127			state->holes256[i >> 8] = holes - state->holes64k[i >> 16];
128
129		if (i % 256 == 64)
130			state->holes256_64[i >> 8] = holes - state->holes256[i >> 8] - state->holes64k[i >> 16];
131
132		if (i % 256 == 128)
133			state->holes256_128[i >> 8] = holes - state->holes256[i >> 8] - state->holes64k[i >> 16];
134
135		if (i % 256 == 192)
136			state->holes256_192[i >> 8] = holes - state->holes256[i >> 8] - state->holes64k[i >> 16];
137
138		if (state->visited[i] > 1) {
139			j = state->visited[i] - 2;
140			state->result_map[j] = i - holes;
141		}
142
143		if (state->g[i] == 3)
144			++holes;
145	}
146
147	if (i % 65536 != 0)
148		state->holes64k[(i >> 16) + 1] = holes;
149
150	if (i % 256 != 0)
151		state->holes256[(i >> 8) + 1] = holes - state->holes64k[((i >> 8) + 1) >> 8];
152
153	if (i % 256 != 64)
154		state->holes256_64[(i >> 8) + 1] = holes - state->holes256[(i >> 8) + 1] - state->holes64k[((i >> 8) + 1) >> 8];
155
156	if (i % 256 != 128)
157		state->holes256_128[(i >> 8) + 1] = holes - state->holes256[(i >> 8) + 1] - state->holes64k[((i >> 8) + 1) >> 8];
158
159	if (i % 256 != 192)
160		state->holes256_192[(i >> 8) + 1] = holes - state->holes256[(i >> 8) + 1] - state->holes64k[((i >> 8) + 1) >> 8];
161}
162
163static void
164print_hash(struct nbperf *nbperf, struct state *state)
165{
166	size_t i, j;
167	uint32_t sum;
168
169	fprintf(nbperf->output, "#include <stdlib.h>\n");
170	fprintf(nbperf->output, "#include <strings.h>\n\n");
171
172	fprintf(nbperf->output, "%suint32_t\n",
173	    nbperf->static_hash ? "static " : "");
174	fprintf(nbperf->output,
175	    "%s(const void * __restrict key, size_t keylen)\n",
176	    nbperf->hash_name);
177	fprintf(nbperf->output, "{\n");
178	fprintf(nbperf->output,
179	    "\tstatic const uint32_t g[%" PRId32 "] = {\n",
180	    (state->graph.v + 15) / 16);
181	for (i = 0; i < state->graph.v; i += 16) {
182		for (j = 0, sum = 0; j < 16; ++j)
183			sum |= (uint32_t)state->g[i + j] << (2 * j);
184
185		fprintf(nbperf->output, "%s0x%08" PRIx32 "ULL,%s",
186		    (i / 16 % 4 == 0 ? "\t    " : " "),
187		    sum,
188		    (i / 16 % 4 == 3 ? "\n" : ""));
189	}
190	fprintf(nbperf->output, "%s\t};\n", (i / 16 % 4 ? "\n" : ""));
191
192	fprintf(nbperf->output,
193	    "\tstatic const uint32_t holes64k[%" PRId32 "] = {\n",
194	    (state->graph.v + 65535) / 65536);
195	for (i = 0; i < state->graph.v; i += 65536)
196		fprintf(nbperf->output, "%s0x%08" PRIx32 ",%s",
197		    (i / 65536 % 4 == 0 ? "\t    " : " "),
198		    state->holes64k[i >> 16],
199		    (i / 65536 % 4 == 3 ? "\n" : ""));
200	fprintf(nbperf->output, "%s\t};\n", (i / 65536 % 4 ? "\n" : ""));
201
202	fprintf(nbperf->output,
203	    "\tstatic const uint16_t holes256[%" PRId32 "] = {\n",
204	    (state->graph.v + 255) / 256);
205	for (i = 0; i < state->graph.v; i += 256)
206		fprintf(nbperf->output, "%s0x%04" PRIx32 ",%s",
207		    (i / 256 % 4 == 0 ? "\t    " : " "),
208		    state->holes256[i >> 8],
209		    (i / 256 % 4 == 3 ? "\n" : ""));
210	fprintf(nbperf->output, "%s\t};\n", (i / 256 % 4 ? "\n" : ""));
211
212	fprintf(nbperf->output,
213	    "\tstatic const uint8_t holes256_64[%" PRId32 "] = {\n",
214	    (state->graph.v + 255) / 256);
215	for (i = 64; i < state->graph.v; i += 256)
216		fprintf(nbperf->output, "%s0x%02" PRIx32 ",%s",
217		    (i / 256 % 4 == 0 ? "\t    " : " "),
218		    state->holes256_64[i >> 8],
219		    (i / 256 % 4 == 3 ? "\n" : ""));
220	fprintf(nbperf->output, "%s\t};\n", (i / 256 % 4 ? "\n" : ""));
221
222	fprintf(nbperf->output,
223	    "\tstatic const uint8_t holes256_128[%" PRId32 "] = {\n",
224	    (state->graph.v + 255) / 256);
225	for (i = 128; i < state->graph.v; i += 256)
226		fprintf(nbperf->output, "%s0x%02" PRIx32 ",%s",
227		    (i / 256 % 4 == 0 ? "\t    " : " "),
228		    state->holes256_128[i >> 8],
229		    (i / 256 % 4 == 3 ? "\n" : ""));
230	fprintf(nbperf->output, "%s\t};\n", (i / 256 % 4 ? "\n" : ""));
231
232	fprintf(nbperf->output,
233	    "\tstatic const uint8_t holes256_192[%" PRId32 "] = {\n",
234	    (state->graph.v + 255) / 256);
235	for (i = 192; i < state->graph.v; i += 256)
236		fprintf(nbperf->output, "%s0x%02" PRIx32 ",%s",
237		    (i / 256 % 4 == 0 ? "\t    " : " "),
238		    state->holes256_192[i >> 8],
239		    (i / 256 % 4 == 3 ? "\n" : ""));
240	fprintf(nbperf->output, "%s\t};\n", (i / 256 % 4 ? "\n" : ""));
241
242	fprintf(nbperf->output, "\tuint32_t h[%zu];\n\n", nbperf->hash_size);
243	fprintf(nbperf->output, "\tuint32_t m;\n");
244	fprintf(nbperf->output, "\tuint32_t a1, a2, b1, b2, c1, c2, idx, idx2;\n\n");
245
246	(*nbperf->print_hash)(nbperf, "\t", "key", "keylen", "h");
247
248	fprintf(nbperf->output, "\n\th[0] = h[0] %% %" PRIu32 ";\n", state->graph.v);
249	fprintf(nbperf->output, "\th[1] = h[1] %% %" PRIu32 ";\n", state->graph.v);
250	fprintf(nbperf->output, "\th[2] = h[2] %% %" PRIu32 ";\n", state->graph.v);
251
252	fprintf(nbperf->output, "\n\ta1 = h[0] >> 4;\n");
253	fprintf(nbperf->output, "\ta2 = 2 * (h[0] & 15);\n");
254	fprintf(nbperf->output, "\tb1 = h[1] >> 4;\n");
255	fprintf(nbperf->output, "\tb2 = 2 * (h[1] & 15);\n");
256	fprintf(nbperf->output, "\tc1 = h[2] >> 4;\n");
257	fprintf(nbperf->output, "\tc2 = 2 * (h[2] & 15);\n");
258
259	fprintf(nbperf->output,
260	    "\tidx = h[(((g[a1] >> a2) & 3) + ((g[b1] >> b2) & 3) +\n"
261	    "\t    ((g[c1] >> c2) & 3)) %% 3];\n\n");
262
263	fprintf(nbperf->output,
264	    "\tswitch ((idx >> 5) & 7) {\n"
265	    "\tcase 0:\n"
266	    "\t\tidx2 = idx - holes64k[idx >> 16] - holes256[idx >> 8];\n"
267	    "\t\tbreak;\n"
268	    "\tcase 1: case 2:\n"
269	    "\t\tidx2 = idx - holes64k[idx >> 16] - holes256[idx >> 8]\n"
270	    "\t\t    - holes256_64[idx >> 8];\n"
271	    "\t\tbreak;\n"
272	    "\tcase 3: case 4:\n"
273	    "\t\tidx2 = idx - holes64k[idx >> 16] - holes256[idx >> 8]\n"
274	    "\t\t    - holes256_128[idx >> 8];\n"
275	    "\t\tbreak;\n"
276	    "\tcase 5: case 6:\n"
277	    "\t\tidx2 = idx - holes64k[idx >> 16] - holes256[idx >> 8]\n"
278	    "\t\t    - holes256_192[idx >> 8];\n"
279	    "\t\tbreak;\n"
280	    "\tcase 7:\n"
281	    "\t\tidx2 = idx - holes64k[(idx + 32) >> 16] -\n"
282	    "\t\t    holes256[(idx + 32) >> 8];\n"
283	    "\t\tbreak;\n"
284	    "\tdefault:\n"
285	    "\t\tabort();\n"
286	    "\t}\n"
287	    "\tswitch ((idx >> 4) & 3) {\n"
288	    "\tcase 1:\n"
289	    "\t\tm = (g[(idx >> 4) - 1] & (g[(idx >> 4) - 1] >> 1) & 0x55555555U);\n"
290	    "\t\tidx2 -= popcount32(m);\n"
291	    "\tcase 0:\n"
292	    "\t\tm = (g[idx >> 4] & (g[idx >> 4] >> 1) & 0x55555555U);\n"
293	    "\t\tm &= ((2U << (2 * (idx & 15))) - 1);\n"
294	    "\t\tidx2 -= popcount32(m);\n"
295	    "\t\tbreak;\n"
296	    "\tcase 2:\n"
297	    "\t\tm = (g[(idx >> 4) + 1] & (g[(idx >> 4) + 1] >> 1) & 0x55555555U);\n"
298	    "\t\tidx2 += popcount32(m);\n"
299	    "\tcase 3:\n"
300	    "\t\tm = (g[idx >> 4] & (g[idx >> 4] >> 1) & 0x55555555U);\n"
301	    "\t\tm &= ~((2U << (2 * (idx & 15))) - 1);\n"
302	    "\t\tidx2 += popcount32(m);\n"
303	    "\t\tbreak;\n"
304	    "\t}\n\n");
305
306	fprintf(nbperf->output,
307	    "\treturn idx2;\n");
308	fprintf(nbperf->output, "}\n");
309
310	if (nbperf->map_output != NULL) {
311		for (i = 0; i < state->graph.e; ++i)
312			fprintf(nbperf->map_output, "%" PRIu32 "\n",
313			    state->result_map[i]);
314	}
315}
316
317int
318bdz_compute(struct nbperf *nbperf)
319{
320	struct state state;
321	int retval = -1;
322	uint32_t v, e;
323
324	if (nbperf->c == 0)
325		nbperf->c = 1.24;
326	if (nbperf->c < 1.24)
327		errx(1, "The argument for option -c must be at least 1.24");
328	if (nbperf->hash_size < 3)
329		errx(1, "The hash function must generate at least 3 values");
330
331	(*nbperf->seed_hash)(nbperf);
332	e = nbperf->n;
333	v = nbperf->c * nbperf->n;
334	if (1.24 * nbperf->n > v)
335		++v;
336	if (v < 10)
337		v = 10;
338
339	graph3_setup(&state.graph, v, e);
340
341	state.holes64k = calloc(sizeof(uint32_t), (v + 65535) / 65536 + 1);
342	state.holes256 = calloc(sizeof(uint16_t), (v + 255) / 256 + 1);
343	state.holes256_64 = calloc(sizeof(uint8_t), (v + 255) / 256 + 1);
344	state.holes256_128 = calloc(sizeof(uint8_t), (v + 255) / 256 + 1);
345	state.holes256_192 = calloc(sizeof(uint8_t), (v + 255) / 256 + 1);
346	state.g = calloc(sizeof(uint32_t), v);
347	state.visited = calloc(sizeof(uint32_t), v);
348	state.result_map = calloc(sizeof(uint32_t), e);
349
350	if (state.holes64k == NULL || state.holes256 == NULL ||
351	    state.holes256_64 == NULL || state.holes256_128 == NULL ||
352	    state.holes256_192 == NULL || state.g == NULL ||
353	    state.visited == NULL || state.result_map == NULL)
354		err(1, "malloc failed");
355
356	if (graph3_hash(nbperf, &state.graph))
357		goto failed;
358	if (graph3_output_order(&state.graph))
359		goto failed;
360	assign_nodes(&state);
361	print_hash(nbperf, &state);
362
363	retval = 0;
364
365failed:
366	graph3_free(&state.graph);
367	free(state.visited);
368	free(state.g);
369	free(state.holes64k);
370	free(state.holes256);
371	free(state.holes256_64);
372	free(state.holes256_128);
373	free(state.holes256_192);
374	free(state.result_map);
375	return retval;
376}
377