1/*	$OpenBSD: dba.c,v 1.7 2017/02/09 18:26:17 schwarze Exp $ */
2/*
3 * Copyright (c) 2016, 2017 Ingo Schwarze <schwarze@openbsd.org>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 *
17 * Allocation-based version of the mandoc database, for read-write access.
18 * The interface is defined in "dba.h".
19 */
20#include <sys/types.h>
21#include <endian.h>
22#include <errno.h>
23#include <stddef.h>
24#include <stdint.h>
25#include <stdlib.h>
26#include <string.h>
27#include <unistd.h>
28
29#include "mandoc_aux.h"
30#include "mandoc_ohash.h"
31#include "mansearch.h"
32#include "dba_write.h"
33#include "dba_array.h"
34#include "dba.h"
35
36struct macro_entry {
37	struct dba_array	*pages;
38	char			 value[];
39};
40
41static void	*prepend(const char *, char);
42static void	 dba_pages_write(struct dba_array *);
43static int	 compare_names(const void *, const void *);
44static int	 compare_strings(const void *, const void *);
45
46static struct macro_entry
47		*get_macro_entry(struct ohash *, const char *, int32_t);
48static void	 dba_macros_write(struct dba_array *);
49static void	 dba_macro_write(struct ohash *);
50static int	 compare_entries(const void *, const void *);
51
52
53/*** top-level functions **********************************************/
54
55struct dba *
56dba_new(int32_t npages)
57{
58	struct dba	*dba;
59	struct ohash	*macro;
60	int32_t		 im;
61
62	dba = mandoc_malloc(sizeof(*dba));
63	dba->pages = dba_array_new(npages, DBA_GROW);
64	dba->macros = dba_array_new(MACRO_MAX, 0);
65	for (im = 0; im < MACRO_MAX; im++) {
66		macro = mandoc_malloc(sizeof(*macro));
67		mandoc_ohash_init(macro, 4,
68		    offsetof(struct macro_entry, value));
69		dba_array_set(dba->macros, im, macro);
70	}
71	return dba;
72}
73
74void
75dba_free(struct dba *dba)
76{
77	struct dba_array	*page;
78	struct ohash		*macro;
79	struct macro_entry	*entry;
80	unsigned int		 slot;
81
82	dba_array_FOREACH(dba->macros, macro) {
83		for (entry = ohash_first(macro, &slot); entry != NULL;
84		     entry = ohash_next(macro, &slot)) {
85			dba_array_free(entry->pages);
86			free(entry);
87		}
88		ohash_delete(macro);
89		free(macro);
90	}
91	dba_array_free(dba->macros);
92
93	dba_array_undel(dba->pages);
94	dba_array_FOREACH(dba->pages, page) {
95		dba_array_free(dba_array_get(page, DBP_NAME));
96		dba_array_free(dba_array_get(page, DBP_SECT));
97		dba_array_free(dba_array_get(page, DBP_ARCH));
98		free(dba_array_get(page, DBP_DESC));
99		dba_array_free(dba_array_get(page, DBP_FILE));
100		dba_array_free(page);
101	}
102	dba_array_free(dba->pages);
103
104	free(dba);
105}
106
107/*
108 * Write the complete mandoc database to disk; the format is:
109 * - One integer each for magic and version.
110 * - One pointer each to the macros table and to the final magic.
111 * - The pages table.
112 * - The macros table.
113 * - And at the very end, the magic integer again.
114 */
115int
116dba_write(const char *fname, struct dba *dba)
117{
118	int	 save_errno;
119	int32_t	 pos_end, pos_macros, pos_macros_ptr;
120
121	if (dba_open(fname) == -1)
122		return -1;
123	dba_int_write(MANDOCDB_MAGIC);
124	dba_int_write(MANDOCDB_VERSION);
125	pos_macros_ptr = dba_skip(1, 2);
126	dba_pages_write(dba->pages);
127	pos_macros = dba_tell();
128	dba_macros_write(dba->macros);
129	pos_end = dba_tell();
130	dba_int_write(MANDOCDB_MAGIC);
131	dba_seek(pos_macros_ptr);
132	dba_int_write(pos_macros);
133	dba_int_write(pos_end);
134	if (dba_close() == -1) {
135		save_errno = errno;
136		unlink(fname);
137		errno = save_errno;
138		return -1;
139	}
140	return 0;
141}
142
143
144/*** functions for handling pages *************************************/
145
146/*
147 * Create a new page and append it to the pages table.
148 */
149struct dba_array *
150dba_page_new(struct dba_array *pages, const char *arch,
151    const char *desc, const char *file, enum form form)
152{
153	struct dba_array *page, *entry;
154
155	page = dba_array_new(DBP_MAX, 0);
156	entry = dba_array_new(1, DBA_STR | DBA_GROW);
157	dba_array_add(page, entry);
158	entry = dba_array_new(1, DBA_STR | DBA_GROW);
159	dba_array_add(page, entry);
160	if (arch != NULL && *arch != '\0') {
161		entry = dba_array_new(1, DBA_STR | DBA_GROW);
162		dba_array_add(entry, (void *)arch);
163	} else
164		entry = NULL;
165	dba_array_add(page, entry);
166	dba_array_add(page, mandoc_strdup(desc));
167	entry = dba_array_new(1, DBA_STR | DBA_GROW);
168	dba_array_add(entry, prepend(file, form));
169	dba_array_add(page, entry);
170	dba_array_add(pages, page);
171	return page;
172}
173
174/*
175 * Add a section, architecture, or file name to an existing page.
176 * Passing the NULL pointer for the architecture makes the page MI.
177 * In that case, any earlier or later architectures are ignored.
178 */
179void
180dba_page_add(struct dba_array *page, int32_t ie, const char *str)
181{
182	struct dba_array	*entries;
183	char			*entry;
184
185	entries = dba_array_get(page, ie);
186	if (ie == DBP_ARCH) {
187		if (entries == NULL)
188			return;
189		if (str == NULL || *str == '\0') {
190			dba_array_free(entries);
191			dba_array_set(page, DBP_ARCH, NULL);
192			return;
193		}
194	}
195	if (*str == '\0')
196		return;
197	dba_array_FOREACH(entries, entry) {
198		if (ie == DBP_FILE && *entry < ' ')
199			entry++;
200		if (strcmp(entry, str) == 0)
201			return;
202	}
203	dba_array_add(entries, (void *)str);
204}
205
206/*
207 * Add an additional name to an existing page.
208 */
209void
210dba_page_alias(struct dba_array *page, const char *name, uint64_t mask)
211{
212	struct dba_array	*entries;
213	char			*entry;
214	char			 maskbyte;
215
216	if (*name == '\0')
217		return;
218	maskbyte = mask & NAME_MASK;
219	entries = dba_array_get(page, DBP_NAME);
220	dba_array_FOREACH(entries, entry) {
221		if (strcmp(entry + 1, name) == 0) {
222			*entry |= maskbyte;
223			return;
224		}
225	}
226	dba_array_add(entries, prepend(name, maskbyte));
227}
228
229/*
230 * Return a pointer to a temporary copy of instr with inbyte prepended.
231 */
232static void *
233prepend(const char *instr, char inbyte)
234{
235	static char	*outstr = NULL;
236	static size_t	 outlen = 0;
237	size_t		 newlen;
238
239	newlen = strlen(instr) + 1;
240	if (newlen > outlen) {
241		outstr = mandoc_realloc(outstr, newlen + 1);
242		outlen = newlen;
243	}
244	*outstr = inbyte;
245	memcpy(outstr + 1, instr, newlen);
246	return outstr;
247}
248
249/*
250 * Write the pages table to disk; the format is:
251 * - One integer containing the number of pages.
252 * - For each page, five pointers to the names, sections,
253 *   architectures, description, and file names of the page.
254 *   MI pages write 0 instead of the architecture pointer.
255 * - One list each for names, sections, architectures, descriptions and
256 *   file names.  The description for each page ends with a NUL byte.
257 *   For all the other lists, each string ends with a NUL byte,
258 *   and the last string for a page ends with two NUL bytes.
259 * - To assure alignment of following integers,
260 *   the end is padded with NUL bytes up to a multiple of four bytes.
261 */
262static void
263dba_pages_write(struct dba_array *pages)
264{
265	struct dba_array	*page, *entry;
266	int32_t			 pos_pages, pos_end;
267
268	pos_pages = dba_array_writelen(pages, 5);
269	dba_array_FOREACH(pages, page) {
270		dba_array_setpos(page, DBP_NAME, dba_tell());
271		entry = dba_array_get(page, DBP_NAME);
272		dba_array_sort(entry, compare_names);
273		dba_array_writelst(entry);
274	}
275	dba_array_FOREACH(pages, page) {
276		dba_array_setpos(page, DBP_SECT, dba_tell());
277		entry = dba_array_get(page, DBP_SECT);
278		dba_array_sort(entry, compare_strings);
279		dba_array_writelst(entry);
280	}
281	dba_array_FOREACH(pages, page) {
282		if ((entry = dba_array_get(page, DBP_ARCH)) != NULL) {
283			dba_array_setpos(page, DBP_ARCH, dba_tell());
284			dba_array_sort(entry, compare_strings);
285			dba_array_writelst(entry);
286		} else
287			dba_array_setpos(page, DBP_ARCH, 0);
288	}
289	dba_array_FOREACH(pages, page) {
290		dba_array_setpos(page, DBP_DESC, dba_tell());
291		dba_str_write(dba_array_get(page, DBP_DESC));
292	}
293	dba_array_FOREACH(pages, page) {
294		dba_array_setpos(page, DBP_FILE, dba_tell());
295		dba_array_writelst(dba_array_get(page, DBP_FILE));
296	}
297	pos_end = dba_align();
298	dba_seek(pos_pages);
299	dba_array_FOREACH(pages, page)
300		dba_array_writepos(page);
301	dba_seek(pos_end);
302}
303
304static int
305compare_names(const void *vp1, const void *vp2)
306{
307	const char	*cp1, *cp2;
308	int		 diff;
309
310	cp1 = *(const char * const *)vp1;
311	cp2 = *(const char * const *)vp2;
312	return (diff = *cp2 - *cp1) ? diff :
313	    strcasecmp(cp1 + 1, cp2 + 1);
314}
315
316static int
317compare_strings(const void *vp1, const void *vp2)
318{
319	const char	*cp1, *cp2;
320
321	cp1 = *(const char * const *)vp1;
322	cp2 = *(const char * const *)vp2;
323	return strcmp(cp1, cp2);
324}
325
326/*** functions for handling macros ************************************/
327
328/*
329 * In the hash table for a single macro, look up an entry by
330 * the macro value or add an empty one if it doesn't exist yet.
331 */
332static struct macro_entry *
333get_macro_entry(struct ohash *macro, const char *value, int32_t np)
334{
335	struct macro_entry	*entry;
336	size_t			 len;
337	unsigned int		 slot;
338
339	slot = ohash_qlookup(macro, value);
340	if ((entry = ohash_find(macro, slot)) == NULL) {
341		len = strlen(value) + 1;
342		entry = mandoc_malloc(sizeof(*entry) + len);
343		memcpy(&entry->value, value, len);
344		entry->pages = dba_array_new(np, DBA_GROW);
345		ohash_insert(macro, slot, entry);
346	}
347	return entry;
348}
349
350/*
351 * In addition to get_macro_entry(), add multiple page references,
352 * converting them from the on-disk format (byte offsets in the file)
353 * to page pointers in memory.
354 */
355void
356dba_macro_new(struct dba *dba, int32_t im, const char *value,
357    const int32_t *pp)
358{
359	struct macro_entry	*entry;
360	const int32_t		*ip;
361	int32_t			 np;
362
363	np = 0;
364	for (ip = pp; *ip; ip++)
365		np++;
366
367	entry = get_macro_entry(dba_array_get(dba->macros, im), value, np);
368	for (ip = pp; *ip; ip++)
369		dba_array_add(entry->pages, dba_array_get(dba->pages,
370		    be32toh(*ip) / 5 / sizeof(*ip) - 1));
371}
372
373/*
374 * In addition to get_macro_entry(), add one page reference,
375 * directly taking the in-memory page pointer as an argument.
376 */
377void
378dba_macro_add(struct dba_array *macros, int32_t im, const char *value,
379    struct dba_array *page)
380{
381	struct macro_entry	*entry;
382
383	if (*value == '\0')
384		return;
385	entry = get_macro_entry(dba_array_get(macros, im), value, 1);
386	dba_array_add(entry->pages, page);
387}
388
389/*
390 * Write the macros table to disk; the format is:
391 * - The number of macro tables (actually, MACRO_MAX).
392 * - That number of pointers to the individual macro tables.
393 * - The individual macro tables.
394 */
395static void
396dba_macros_write(struct dba_array *macros)
397{
398	struct ohash		*macro;
399	int32_t			 im, pos_macros, pos_end;
400
401	pos_macros = dba_array_writelen(macros, 1);
402	im = 0;
403	dba_array_FOREACH(macros, macro) {
404		dba_array_setpos(macros, im++, dba_tell());
405		dba_macro_write(macro);
406	}
407	pos_end = dba_tell();
408	dba_seek(pos_macros);
409	dba_array_writepos(macros);
410	dba_seek(pos_end);
411}
412
413/*
414 * Write one individual macro table to disk; the format is:
415 * - The number of entries in the table.
416 * - For each entry, two pointers, the first one to the value
417 *   and the second one to the list of pages.
418 * - A list of values, each ending in a NUL byte.
419 * - To assure alignment of following integers,
420 *   padding with NUL bytes up to a multiple of four bytes.
421 * - A list of pointers to pages, each list ending in a 0 integer.
422 */
423static void
424dba_macro_write(struct ohash *macro)
425{
426	struct macro_entry	**entries, *entry;
427	struct dba_array	 *page;
428	int32_t			 *kpos, *dpos;
429	unsigned int		  ie, ne, slot;
430	int			  use;
431	int32_t			  addr, pos_macro, pos_end;
432
433	/* Temporary storage for filtering and sorting. */
434
435	ne = ohash_entries(macro);
436	entries = mandoc_reallocarray(NULL, ne, sizeof(*entries));
437	kpos = mandoc_reallocarray(NULL, ne, sizeof(*kpos));
438	dpos = mandoc_reallocarray(NULL, ne, sizeof(*dpos));
439
440	/* Build a list of non-empty entries and sort it. */
441
442	ne = 0;
443	for (entry = ohash_first(macro, &slot); entry != NULL;
444	     entry = ohash_next(macro, &slot)) {
445		use = 0;
446		dba_array_FOREACH(entry->pages, page)
447			if (dba_array_getpos(page))
448				use = 1;
449		if (use)
450			entries[ne++] = entry;
451	}
452	qsort(entries, ne, sizeof(*entries), compare_entries);
453
454	/* Number of entries, and space for the pointer pairs. */
455
456	dba_int_write(ne);
457	pos_macro = dba_skip(2, ne);
458
459	/* String table. */
460
461	for (ie = 0; ie < ne; ie++) {
462		kpos[ie] = dba_tell();
463		dba_str_write(entries[ie]->value);
464	}
465	dba_align();
466
467	/* Pages table. */
468
469	for (ie = 0; ie < ne; ie++) {
470		dpos[ie] = dba_tell();
471		dba_array_FOREACH(entries[ie]->pages, page)
472			if ((addr = dba_array_getpos(page)))
473				dba_int_write(addr);
474		dba_int_write(0);
475	}
476	pos_end = dba_tell();
477
478	/* Fill in the pointer pairs. */
479
480	dba_seek(pos_macro);
481	for (ie = 0; ie < ne; ie++) {
482		dba_int_write(kpos[ie]);
483		dba_int_write(dpos[ie]);
484	}
485	dba_seek(pos_end);
486
487	free(entries);
488	free(kpos);
489	free(dpos);
490}
491
492static int
493compare_entries(const void *vp1, const void *vp2)
494{
495	const struct macro_entry *ep1, *ep2;
496
497	ep1 = *(const struct macro_entry * const *)vp1;
498	ep2 = *(const struct macro_entry * const *)vp2;
499	return strcmp(ep1->value, ep2->value);
500}
501