nbperf-bdz.c revision 1.3
1/*	$NetBSD: nbperf-bdz.c,v 1.3 2010/03/01 21:46:58 joerg Exp $	*/
2/*-
3 * Copyright (c) 2009 The NetBSD Foundation, Inc.
4 * All rights reserved.
5 *
6 * This code is derived from software contributed to The NetBSD Foundation
7 * by Joerg Sonnenberger.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 *
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in
17 *    the documentation and/or other materials provided with the
18 *    distribution.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
24 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
26 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
28 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
29 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
30 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 */
33
34#include <sys/cdefs.h>
35__RCSID("$NetBSD: nbperf-bdz.c,v 1.3 2010/03/01 21:46:58 joerg Exp $");
36
37#include <err.h>
38#include <inttypes.h>
39#include <stdlib.h>
40#include <stdio.h>
41#include <string.h>
42
43#include "nbperf.h"
44
45/*
46 * A full description of the algorithm can be found in:
47 * "Simple and Space-Efficient Minimal Perfect Hash Functions"
48 * by Botelho, Pagh and Ziviani, proceeedings of WADS 2007.
49 */
50
51/*
52 * The algorithm is based on random, acyclic 3-graphs.
53 *
54 * Each edge in the represents a key.  The vertices are the reminder of
55 * the hash function mod n.  n = cm with c > 1.23.  This ensures that
56 * an acyclic graph can be found with a very high probality.
57 *
58 * An acyclic graph has an edge order, where at least one vertex of
59 * each edge hasn't been seen before.   It is declares the first unvisited
60 * vertex as authoritive for the edge and assigns a 2bit value to unvisited
61 * vertices, so that the sum of all vertices of the edge modulo 4 is
62 * the index of the authoritive vertex.
63 */
64
65#include "graph3.h"
66
67struct state {
68	struct graph3 graph;
69	uint32_t *visited;
70	uint32_t *holes64k;
71	uint16_t *holes256;
72	uint8_t *holes256_64;
73	uint8_t *holes256_128;
74	uint8_t *holes256_192;
75	uint8_t *g;
76	uint32_t *result_map;
77};
78
79static void
80assign_nodes(struct state *state)
81{
82	struct edge3 *e;
83	size_t i, j;
84	uint32_t t, r, holes;
85
86	for (i = 0; i < state->graph.v; ++i)
87		state->g[i] = 3;
88
89	for (i = 0; i < state->graph.e; ++i) {
90		j = state->graph.output_order[i];
91		e = &state->graph.edges[j];
92		if (!state->visited[e->left]) {
93			r = 0;
94			t = e->left;
95		} else if (!state->visited[e->middle]) {
96			r = 1;
97			t = e->middle;
98		} else {
99			if (state->visited[e->right])
100				abort();
101			r = 2;
102			t = e->right;
103		}
104
105		state->visited[t] = 2 + j;
106		if (state->visited[e->left] == 0)
107			state->visited[e->left] = 1;
108		if (state->visited[e->middle] == 0)
109			state->visited[e->middle] = 1;
110		if (state->visited[e->right] == 0)
111			state->visited[e->right] = 1;
112
113		state->g[t] = (9 + r - state->g[e->left] - state->g[e->middle]
114		    - state->g[e->right]) % 3;
115	}
116
117	holes = 0;
118	for (i = 0; i < state->graph.v; ++i) {
119		if (i % 65536 == 0)
120			state->holes64k[i >> 16] = holes;
121
122		if (i % 256 == 0)
123			state->holes256[i >> 8] = holes - state->holes64k[i >> 16];
124
125		if (i % 256 == 64)
126			state->holes256_64[i >> 8] = holes - state->holes256[i >> 8] - state->holes64k[i >> 16];
127
128		if (i % 256 == 128)
129			state->holes256_128[i >> 8] = holes - state->holes256[i >> 8] - state->holes64k[i >> 16];
130
131		if (i % 256 == 192)
132			state->holes256_192[i >> 8] = holes - state->holes256[i >> 8] - state->holes64k[i >> 16];
133
134		if (state->visited[i] > 1) {
135			j = state->visited[i] - 2;
136			state->result_map[j] = i - holes;
137		}
138
139		if (state->g[i] == 3)
140			++holes;
141	}
142
143	if (i % 65536 != 0)
144		state->holes64k[(i >> 16) + 1] = holes;
145
146	if (i % 256 != 0)
147		state->holes256[(i >> 8) + 1] = holes - state->holes64k[((i >> 8) + 1) >> 8];
148
149	if (i % 256 != 64)
150		state->holes256_64[(i >> 8) + 1] = holes - state->holes256[(i >> 8) + 1] - state->holes64k[((i >> 8) + 1) >> 8];
151
152	if (i % 256 != 128)
153		state->holes256_128[(i >> 8) + 1] = holes - state->holes256[(i >> 8) + 1] - state->holes64k[((i >> 8) + 1) >> 8];
154
155	if (i % 256 != 192)
156		state->holes256_192[(i >> 8) + 1] = holes - state->holes256[(i >> 8) + 1] - state->holes64k[((i >> 8) + 1) >> 8];
157}
158
159static void
160print_hash(struct nbperf *nbperf, struct state *state)
161{
162	size_t i, j;
163	uint32_t sum;
164
165	fprintf(nbperf->output, "#include <stdlib.h>\n");
166	fprintf(nbperf->output, "#include <strings.h>\n\n");
167
168	fprintf(nbperf->output, "%suint32_t\n",
169	    nbperf->static_hash ? "static " : "");
170	fprintf(nbperf->output,
171	    "%s(const void * __restrict key, size_t keylen)\n",
172	    nbperf->hash_name);
173	fprintf(nbperf->output, "{\n");
174	fprintf(nbperf->output,
175	    "\tstatic const uint32_t g[%" PRId32 "] = {\n",
176	    (state->graph.v + 15) / 16);
177	for (i = 0; i < state->graph.v; i += 16) {
178		for (j = 0, sum = 0; j < 16; ++j)
179			sum |= (uint32_t)state->g[i + j] << (2 * j);
180
181		fprintf(nbperf->output, "%s0x%08" PRIx32 "ULL,%s",
182		    (i / 16 % 4 == 0 ? "\t    " : " "),
183		    sum,
184		    (i / 16 % 4 == 3 ? "\n" : ""));
185	}
186	fprintf(nbperf->output, "%s\t};\n", (i / 16 % 4 ? "\n" : ""));
187
188	fprintf(nbperf->output,
189	    "\tstatic const uint32_t holes64k[%" PRId32 "] = {\n",
190	    (state->graph.v + 65535) / 65536);
191	for (i = 0; i < state->graph.v; i += 65536)
192		fprintf(nbperf->output, "%s0x%08" PRIx32 ",%s",
193		    (i / 65536 % 4 == 0 ? "\t    " : " "),
194		    state->holes64k[i >> 16],
195		    (i / 65536 % 4 == 3 ? "\n" : ""));
196	fprintf(nbperf->output, "%s\t};\n", (i / 65536 % 4 ? "\n" : ""));
197
198	fprintf(nbperf->output,
199	    "\tstatic const uint16_t holes256[%" PRId32 "] = {\n",
200	    (state->graph.v + 255) / 256);
201	for (i = 0; i < state->graph.v; i += 256)
202		fprintf(nbperf->output, "%s0x%04" PRIx32 ",%s",
203		    (i / 256 % 4 == 0 ? "\t    " : " "),
204		    state->holes256[i >> 8],
205		    (i / 256 % 4 == 3 ? "\n" : ""));
206	fprintf(nbperf->output, "%s\t};\n", (i / 256 % 4 ? "\n" : ""));
207
208	fprintf(nbperf->output,
209	    "\tstatic const uint8_t holes256_64[%" PRId32 "] = {\n",
210	    (state->graph.v + 255) / 256);
211	for (i = 64; i < state->graph.v; i += 256)
212		fprintf(nbperf->output, "%s0x%02" PRIx32 ",%s",
213		    (i / 256 % 4 == 0 ? "\t    " : " "),
214		    state->holes256_64[i >> 8],
215		    (i / 256 % 4 == 3 ? "\n" : ""));
216	fprintf(nbperf->output, "%s\t};\n", (i / 256 % 4 ? "\n" : ""));
217
218	fprintf(nbperf->output,
219	    "\tstatic const uint8_t holes256_128[%" PRId32 "] = {\n",
220	    (state->graph.v + 255) / 256);
221	for (i = 128; i < state->graph.v; i += 256)
222		fprintf(nbperf->output, "%s0x%02" PRIx32 ",%s",
223		    (i / 256 % 4 == 0 ? "\t    " : " "),
224		    state->holes256_128[i >> 8],
225		    (i / 256 % 4 == 3 ? "\n" : ""));
226	fprintf(nbperf->output, "%s\t};\n", (i / 256 % 4 ? "\n" : ""));
227
228	fprintf(nbperf->output,
229	    "\tstatic const uint8_t holes256_192[%" PRId32 "] = {\n",
230	    (state->graph.v + 255) / 256);
231	for (i = 192; i < state->graph.v; i += 256)
232		fprintf(nbperf->output, "%s0x%02" PRIx32 ",%s",
233		    (i / 256 % 4 == 0 ? "\t    " : " "),
234		    state->holes256_192[i >> 8],
235		    (i / 256 % 4 == 3 ? "\n" : ""));
236	fprintf(nbperf->output, "%s\t};\n", (i / 256 % 4 ? "\n" : ""));
237
238	fprintf(nbperf->output, "\tuint32_t h[%zu];\n\n", nbperf->hash_size);
239	fprintf(nbperf->output, "\tuint32_t m;\n");
240	fprintf(nbperf->output, "\tuint32_t a1, a2, b1, b2, c1, c2, idx, idx2;\n\n");
241
242	(*nbperf->print_hash)(nbperf, "\t", "key", "keylen", "h");
243
244	fprintf(nbperf->output, "\n\th[0] = h[0] %% %" PRIu32 ";\n", state->graph.v);
245	fprintf(nbperf->output, "\th[1] = h[1] %% %" PRIu32 ";\n", state->graph.v);
246	fprintf(nbperf->output, "\th[2] = h[2] %% %" PRIu32 ";\n", state->graph.v);
247
248	fprintf(nbperf->output, "\n\ta1 = h[0] >> 4;\n");
249	fprintf(nbperf->output, "\ta2 = 2 * (h[0] & 15);\n");
250	fprintf(nbperf->output, "\tb1 = h[1] >> 4;\n");
251	fprintf(nbperf->output, "\tb2 = 2 * (h[1] & 15);\n");
252	fprintf(nbperf->output, "\tc1 = h[2] >> 4;\n");
253	fprintf(nbperf->output, "\tc2 = 2 * (h[2] & 15);\n");
254
255	fprintf(nbperf->output,
256	    "\tidx = h[(((g[a1] >> a2) & 3) + ((g[b1] >> b2) & 3) +\n"
257	    "\t    ((g[c1] >> c2) & 3)) %% 3];\n\n");
258
259	fprintf(nbperf->output,
260	    "\tswitch ((idx >> 5) & 7) {\n"
261	    "\tcase 0:\n"
262	    "\t\tidx2 = idx - holes64k[idx >> 16] - holes256[idx >> 8];\n"
263	    "\t\tbreak;\n"
264	    "\tcase 1: case 2:\n"
265	    "\t\tidx2 = idx - holes64k[idx >> 16] - holes256[idx >> 8]\n"
266	    "\t\t    - holes256_64[idx >> 8];\n"
267	    "\t\tbreak;\n"
268	    "\tcase 3: case 4:\n"
269	    "\t\tidx2 = idx - holes64k[idx >> 16] - holes256[idx >> 8]\n"
270	    "\t\t    - holes256_128[idx >> 8];\n"
271	    "\t\tbreak;\n"
272	    "\tcase 5: case 6:\n"
273	    "\t\tidx2 = idx - holes64k[idx >> 16] - holes256[idx >> 8]\n"
274	    "\t\t    - holes256_192[idx >> 8];\n"
275	    "\t\tbreak;\n"
276	    "\tcase 7:\n"
277	    "\t\tidx2 = idx - holes64k[(idx + 32) >> 16] -\n"
278	    "\t\t    holes256[(idx + 32) >> 8];\n"
279	    "\t\tbreak;\n"
280	    "\tdefault:\n"
281	    "\t\tabort();\n"
282	    "\t}\n"
283	    "\tswitch ((idx >> 4) & 3) {\n"
284	    "\tcase 1:\n"
285	    "\t\tm = (g[(idx >> 4) - 1] & (g[(idx >> 4) - 1] >> 1) & 0x55555555U);\n"
286	    "\t\tidx2 -= popcount32(m);\n"
287	    "\tcase 0:\n"
288	    "\t\tm = (g[idx >> 4] & (g[idx >> 4] >> 1) & 0x55555555U);\n"
289	    "\t\tm &= ((2U << (2 * (idx & 15))) - 1);\n"
290	    "\t\tidx2 -= popcount32(m);\n"
291	    "\t\tbreak;\n"
292	    "\tcase 2:\n"
293	    "\t\tm = (g[(idx >> 4) + 1] & (g[(idx >> 4) + 1] >> 1) & 0x55555555U);\n"
294	    "\t\tidx2 += popcount32(m);\n"
295	    "\tcase 3:\n"
296	    "\t\tm = (g[idx >> 4] & (g[idx >> 4] >> 1) & 0x55555555U);\n"
297	    "\t\tm &= ~((2U << (2 * (idx & 15))) - 1);\n"
298	    "\t\tidx2 += popcount32(m);\n"
299	    "\t\tbreak;\n"
300	    "\t}\n\n");
301
302	fprintf(nbperf->output,
303	    "\treturn idx2;\n");
304	fprintf(nbperf->output, "}\n");
305
306	if (nbperf->map_output != NULL) {
307		for (i = 0; i < state->graph.e; ++i)
308			fprintf(nbperf->map_output, "%" PRIu32 "\n",
309			    state->result_map[i]);
310	}
311}
312
313int
314bdz_compute(struct nbperf *nbperf)
315{
316	struct state state;
317	int retval = -1;
318	uint32_t v, e;
319
320	if (nbperf->c == 0)
321		nbperf->c = 1.24;
322	if (nbperf->c < 1.24)
323		errx(1, "The argument for option -c must be at least 1.24");
324	if (nbperf->hash_size < 3)
325		errx(1, "The hash function must generate at least 3 values");
326
327	(*nbperf->seed_hash)(nbperf);
328	e = nbperf->n;
329	v = nbperf->c * nbperf->n;
330	if (1.24 * nbperf->n > v)
331		++v;
332	if (v < 10)
333		v = 10;
334
335	graph3_setup(&state.graph, v, e);
336
337	state.holes64k = calloc(sizeof(uint32_t), (v + 65535) / 65536 + 1);
338	state.holes256 = calloc(sizeof(uint16_t), (v + 255) / 256 + 1);
339	state.holes256_64 = calloc(sizeof(uint8_t), (v + 255) / 256 + 1);
340	state.holes256_128 = calloc(sizeof(uint8_t), (v + 255) / 256 + 1);
341	state.holes256_192 = calloc(sizeof(uint8_t), (v + 255) / 256 + 1);
342	state.g = calloc(sizeof(uint32_t), v);
343	state.visited = calloc(sizeof(uint32_t), v);
344	state.result_map = calloc(sizeof(uint32_t), e);
345
346	if (state.holes64k == NULL || state.holes256 == NULL ||
347	    state.holes256_64 == NULL || state.holes256_128 == NULL ||
348	    state.holes256_192 == NULL || state.g == NULL ||
349	    state.visited == NULL || state.result_map == NULL)
350		err(1, "malloc failed");
351
352	if (graph3_hash(nbperf, &state.graph))
353		goto failed;
354	if (graph3_output_order(&state.graph))
355		goto failed;
356	assign_nodes(&state);
357	print_hash(nbperf, &state);
358
359	retval = 0;
360
361failed:
362	graph3_free(&state.graph);
363	free(state.visited);
364	free(state.g);
365	free(state.holes64k);
366	free(state.holes256);
367	free(state.holes256_64);
368	free(state.holes256_128);
369	free(state.holes256_192);
370	free(state.result_map);
371	return retval;
372}
373