1/* $OpenBSD: icdb.c,v 1.8 2016/09/04 16:56:02 nicm Exp $ */
2/*
3 * Copyright (c) 2015 Ted Unangst <tedu@openbsd.org>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 */
17
18#include <errno.h>
19#include <fcntl.h>
20#include <icdb.h>
21#include <stddef.h>
22#include <stdint.h>
23#include <stdio.h>
24#include <stdlib.h>
25#include <string.h>
26#include <unistd.h>
27
28#include <sys/mman.h>
29#include <sys/stat.h>
30
31#include <siphash.h>
32
33/*
34 * Creating a new icdb: icdb_new
35 * Opening existing icdb: icdb_open
36 *
37 * Adding new entries: icdb_add
38 * Adding entries does not update the disk or indices.
39 *
40 * Save to disk: icdb_save
41 * Update indices: icdb_rehash
42 * icdb_save will call rehash.
43 *
44 * Change an existing entry: icdb_update
45 * Changing entries does write to disk.
46 *
47 * Find an entry: icdb_lookup
48 * Looking up an entry is only defined when the indices are synced.
49 *
50 * Close and free resources: icdb_close
51 */
52
53/*
54 * There are two major modes of operation.
55 *
56 * Existing databases use the mmap codepath. The entire database is mapped
57 * into the address space for quick access. Individual entries may be updated,
58 * but no new entries added.
59 *
60 * New databases use malloc backed memory instead. The database may be saved
61 * with icdb_save. It should be saved to a new file to avoid corrupting any
62 * open databases in other processes.
63 */
64
65/*
66 * An icdb has the following format:
67 *   struct icbinfo header
68 *   indexes [ uint32_t * indexsize * nkeys ]
69 *   entries [ entrysize * nentries ]
70 *
71 * To find an entry in the file, the user specifies which key to use.
72 * The key is hashed and looked up in the index. The index contains the
73 * position of the entry in the entries array. -1 identifies not found.
74 * Chaining is done by rehashing the hash. All keys are fixed size byte arrays.
75 */
76
77/*
78 * Header info for icdb. This struct is stored on disk.
79 */
80struct icdbinfo {
81	uint32_t magic;		/* magic */
82	uint32_t version;	/* user specified version */
83	uint32_t nentries;	/* number of entries stored */
84	uint32_t entrysize;	/* size of each entry */
85	uint32_t indexsize;	/* number of entries in hash index */
86	uint32_t nkeys;		/* number of keys defined */
87	uint32_t keysize[8];	/* size of each key */
88	uint32_t keyoffset[8];	/* offset of each key in entry */
89	SIPHASH_KEY siphashkey;	/* random hash key */
90};
91
92/*
93 * In memory representation with auxiliary data.
94 * idxdata and entries will be written to disk after info.
95 */
96struct icdb {
97	struct icdbinfo *info;
98	void *idxdata[8];
99	void *entries;
100	size_t maplen;
101	uint32_t allocated;
102	int fd;
103};
104
105static const uint32_t magic = 0x1ca9d0b7;
106
107static uint32_t
108roundup(uint32_t num)
109{
110	uint32_t r = 2;
111
112	while (r < num * 3 / 2)
113		r *= 2;
114	return r;
115}
116
117struct icdb *
118icdb_new(uint32_t version, uint32_t nentries, uint32_t entrysize,
119    uint32_t nkeys, const uint32_t *keysizes, const uint32_t *keyoffsets)
120{
121	struct icdb *db;
122	struct icdbinfo *info;
123	int i;
124
125	if (entrysize == 0 || entrysize > 1048576 || nkeys > 8) {
126		errno = EINVAL;
127		return NULL;
128	}
129
130	if (!(db = calloc(1, sizeof(*db))))
131		return NULL;
132	if (!(info = calloc(1, sizeof(*info)))) {
133		free(db);
134		return NULL;
135	}
136	db->info = info;
137	db->fd = -1;
138	info->magic = magic;
139	info->version = version;
140	if (nentries)
141		if ((db->entries = reallocarray(NULL, nentries, entrysize)))
142			db->allocated = nentries;
143	info->entrysize = entrysize;
144	info->nkeys = nkeys;
145	for (i = 0; i < nkeys; i++) {
146		info->keysize[i] = keysizes[i];
147		info->keyoffset[i] = keyoffsets[i];
148	}
149	return db;
150}
151DEF_WEAK(icdb_new);
152
153struct icdb *
154icdb_open(const char *name, int flags, uint32_t version)
155{
156	struct icdb *db = NULL;
157	struct icdbinfo *info;
158	struct stat sb;
159	uint8_t *ptr = MAP_FAILED;
160	uint32_t baseoff, indexsize, idxmask, idxlen;
161	int fd, i, saved_errno;
162
163	if ((fd = open(name, flags | O_CLOEXEC)) == -1)
164		return NULL;
165	if (fstat(fd, &sb) != 0)
166		goto fail;
167	if (sb.st_size < sizeof(struct icdbinfo))
168		goto fail;
169	ptr = mmap(NULL, sb.st_size, PROT_READ |
170	    ((flags & O_RDWR) ? PROT_WRITE : 0), MAP_SHARED, fd, 0);
171	if (ptr == MAP_FAILED)
172		goto fail;
173	info = (struct icdbinfo *)ptr;
174	if (info->magic != magic || info->version != version) {
175		errno = ENOENT;
176		goto fail;
177	}
178
179	if (!(db = calloc(1, sizeof(*db))))
180		goto fail;
181	db->info = info;
182
183	indexsize = info->indexsize;
184	idxmask = indexsize - 1;
185	idxlen = indexsize * sizeof(uint32_t);
186	baseoff = sizeof(*info) + idxlen * info->nkeys;
187
188	for (i = 0; i < info->nkeys; i++)
189		db->idxdata[i] = ptr + sizeof(*info) + i * idxlen;
190	db->entries = ptr + baseoff;
191	db->maplen = sb.st_size;
192	db->fd = fd;
193	return db;
194
195fail:
196	saved_errno = errno;
197	if (ptr != MAP_FAILED)
198		munmap(ptr, sb.st_size);
199	if (fd != -1)
200		close(fd);
201	free(db);
202	errno = saved_errno;
203	return NULL;
204}
205DEF_WEAK(icdb_open);
206
207int
208icdb_get(struct icdb *db, void *entry, uint32_t idx)
209{
210	uint32_t entrysize = db->info->entrysize;
211
212	memcpy(entry, (uint8_t *)db->entries + idx * entrysize, entrysize);
213	return 0;
214}
215DEF_WEAK(icdb_get);
216
217int
218icdb_lookup(struct icdb *db, int keynum, const void *key, void *entry,
219    uint32_t *idxp)
220{
221	struct icdbinfo *info = db->info;
222	uint32_t offset;
223	uint64_t hash;
224	uint32_t indexsize, idxmask, idxlen;
225	uint32_t *idxdata;
226
227	indexsize = info->indexsize;
228	idxmask = indexsize - 1;
229	idxlen = indexsize * sizeof(uint32_t);
230
231	idxdata = db->idxdata[keynum];
232
233	hash = SipHash24(&info->siphashkey, key, info->keysize[keynum]);
234	while ((offset = idxdata[hash & idxmask]) != -1) {
235		if (icdb_get(db, entry, offset) != 0) {
236			errno = ENOENT;
237			return -1;
238		}
239		if (memcmp((uint8_t *)entry + info->keyoffset[keynum], key,
240		    info->keysize[keynum]) == 0) {
241			if (idxp)
242				*idxp = offset;
243			return 0;
244		}
245		hash = SipHash24(&info->siphashkey, &hash, sizeof(hash));
246	}
247	return 1;
248}
249DEF_WEAK(icdb_lookup);
250
251int
252icdb_nentries(struct icdb *db)
253{
254	return db->info->nentries;
255}
256DEF_WEAK(icdb_nentries);
257
258const void *
259icdb_entries(struct icdb *db)
260{
261	return db->entries;
262}
263DEF_WEAK(icdb_entries);
264
265int
266icdb_update(struct icdb *db, const void *entry, int offset)
267{
268	struct icdbinfo *info = db->info;
269	uint32_t entrysize = info->entrysize;
270	uint32_t baseoff;
271	uint32_t indexsize, idxmask, idxlen;
272
273	indexsize = info->indexsize;
274	idxmask = indexsize - 1;
275	idxlen = indexsize * sizeof(uint32_t);
276	baseoff = sizeof(*info) + idxlen * info->nkeys;
277
278	memcpy((uint8_t *)db->entries + offset * entrysize, entry, entrysize);
279	if (db->fd != -1) {
280		msync((uint8_t *)db->entries + offset * entrysize, entrysize,
281		    MS_SYNC);
282	}
283	return 0;
284}
285DEF_WEAK(icdb_update);
286
287int
288icdb_add(struct icdb *db, const void *entry)
289{
290	struct icdbinfo *info = db->info;
291	size_t entrysize = info->entrysize;
292
293	if (db->allocated == info->nentries) {
294		void *p;
295		size_t amt = db->allocated ? db->allocated * 2 : 63;
296		if (!(p = reallocarray(db->entries, amt, entrysize)))
297			return -1;
298		db->allocated = amt;
299		db->entries = p;
300	}
301	memcpy((uint8_t *)db->entries + info->nentries * entrysize,
302	    entry, entrysize);
303	info->nentries++;
304	return 0;
305}
306DEF_WEAK(icdb_add);
307
308int
309icdb_rehash(struct icdb *db)
310{
311	struct icdbinfo *info = db->info;
312	uint32_t entrysize = info->entrysize;
313	uint32_t indexsize, idxmask, idxlen;
314	int i, j;
315
316	indexsize = info->indexsize = roundup(info->nentries);
317	idxmask = indexsize - 1;
318	idxlen = sizeof(uint32_t) * indexsize;
319
320	arc4random_buf(&info->siphashkey, sizeof(info->siphashkey));
321
322	for (i = 0; i < info->nkeys; i++) {
323		uint32_t *idxdata = reallocarray(db->idxdata[i],
324		    indexsize, sizeof(uint32_t));
325		if (!idxdata)
326			return -1;
327		memset(idxdata, 0xff, idxlen);
328		db->idxdata[i] = idxdata;
329	}
330	for (j = 0; j < info->nentries; j++) {
331		for (i = 0; i < info->nkeys; i++) {
332			uint32_t *idxdata = db->idxdata[i];
333			uint64_t hash = SipHash24(&info->siphashkey,
334			    (uint8_t *)db->entries + j * entrysize +
335			    info->keyoffset[i], info->keysize[i]);
336			while (idxdata[hash & idxmask] != -1)
337				hash = SipHash24(&info->siphashkey, &hash, sizeof(hash));
338			idxdata[hash & idxmask] = j;
339		}
340	}
341	return 0;
342}
343DEF_WEAK(icdb_rehash);
344
345int
346icdb_save(struct icdb *db, int fd)
347{
348	struct icdbinfo *info = db->info;
349	uint32_t entrysize = info->entrysize;
350	uint32_t indexsize, idxlen;
351	int i;
352
353	if (icdb_rehash(db) != 0)
354		return -1;
355
356	indexsize = info->indexsize;
357	idxlen = sizeof(uint32_t) * indexsize;
358
359	if (ftruncate(fd, 0) != 0)
360		return -1;
361	if (write(fd, info, sizeof(*info)) != sizeof(*info))
362		return -1;
363	for (i = 0; i < info->nkeys; i++) {
364		if (write(fd, db->idxdata[i], idxlen) != idxlen)
365			return -1;
366	}
367	if (write(fd, db->entries, info->nentries * entrysize) !=
368	    info->nentries * entrysize)
369		return -1;
370	return 0;
371}
372DEF_WEAK(icdb_save);
373
374int
375icdb_close(struct icdb *db)
376{
377	int i;
378
379	if (db->fd == -1) {
380		for (i = 0; i < db->info->nkeys; i++)
381			free(db->idxdata[i]);
382		free(db->entries);
383		free(db->info);
384	} else {
385		munmap(db->info, db->maplen);
386		close(db->fd);
387	}
388	free(db);
389	return 0;
390}
391DEF_WEAK(icdb_close);
392