1/*	$Id: dba.c,v 1.10 2017/02/17 14:43:54 schwarze Exp $ */
2/*
3 * Copyright (c) 2016, 2017 Ingo Schwarze <schwarze@openbsd.org>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 *
17 * Allocation-based version of the mandoc database, for read-write access.
18 * The interface is defined in "dba.h".
19 */
20#include "config.h"
21
22#include <sys/types.h>
23#if HAVE_ENDIAN
24#include <endian.h>
25#elif HAVE_SYS_ENDIAN
26#include <sys/endian.h>
27#elif HAVE_NTOHL
28#include <arpa/inet.h>
29#endif
30#include <errno.h>
31#include <stddef.h>
32#include <stdint.h>
33#include <stdlib.h>
34#include <string.h>
35#include <unistd.h>
36
37#include "mandoc_aux.h"
38#include "mandoc_ohash.h"
39#include "mansearch.h"
40#include "dba_write.h"
41#include "dba_array.h"
42#include "dba.h"
43
44struct macro_entry {
45	struct dba_array	*pages;
46	char			 value[];
47};
48
49static void	*prepend(const char *, char);
50static void	 dba_pages_write(struct dba_array *);
51static int	 compare_names(const void *, const void *);
52static int	 compare_strings(const void *, const void *);
53
54static struct macro_entry
55		*get_macro_entry(struct ohash *, const char *, int32_t);
56static void	 dba_macros_write(struct dba_array *);
57static void	 dba_macro_write(struct ohash *);
58static int	 compare_entries(const void *, const void *);
59
60
61/*** top-level functions **********************************************/
62
63struct dba *
64dba_new(int32_t npages)
65{
66	struct dba	*dba;
67	struct ohash	*macro;
68	int32_t		 im;
69
70	dba = mandoc_malloc(sizeof(*dba));
71	dba->pages = dba_array_new(npages, DBA_GROW);
72	dba->macros = dba_array_new(MACRO_MAX, 0);
73	for (im = 0; im < MACRO_MAX; im++) {
74		macro = mandoc_malloc(sizeof(*macro));
75		mandoc_ohash_init(macro, 4,
76		    offsetof(struct macro_entry, value));
77		dba_array_set(dba->macros, im, macro);
78	}
79	return dba;
80}
81
82void
83dba_free(struct dba *dba)
84{
85	struct dba_array	*page;
86	struct ohash		*macro;
87	struct macro_entry	*entry;
88	unsigned int		 slot;
89
90	dba_array_FOREACH(dba->macros, macro) {
91		for (entry = ohash_first(macro, &slot); entry != NULL;
92		     entry = ohash_next(macro, &slot)) {
93			dba_array_free(entry->pages);
94			free(entry);
95		}
96		ohash_delete(macro);
97		free(macro);
98	}
99	dba_array_free(dba->macros);
100
101	dba_array_undel(dba->pages);
102	dba_array_FOREACH(dba->pages, page) {
103		dba_array_free(dba_array_get(page, DBP_NAME));
104		dba_array_free(dba_array_get(page, DBP_SECT));
105		dba_array_free(dba_array_get(page, DBP_ARCH));
106		free(dba_array_get(page, DBP_DESC));
107		dba_array_free(dba_array_get(page, DBP_FILE));
108		dba_array_free(page);
109	}
110	dba_array_free(dba->pages);
111
112	free(dba);
113}
114
115/*
116 * Write the complete mandoc database to disk; the format is:
117 * - One integer each for magic and version.
118 * - One pointer each to the macros table and to the final magic.
119 * - The pages table.
120 * - The macros table.
121 * - And at the very end, the magic integer again.
122 */
123int
124dba_write(const char *fname, struct dba *dba)
125{
126	int	 save_errno;
127	int32_t	 pos_end, pos_macros, pos_macros_ptr;
128
129	if (dba_open(fname) == -1)
130		return -1;
131	dba_int_write(MANDOCDB_MAGIC);
132	dba_int_write(MANDOCDB_VERSION);
133	pos_macros_ptr = dba_skip(1, 2);
134	dba_pages_write(dba->pages);
135	pos_macros = dba_tell();
136	dba_macros_write(dba->macros);
137	pos_end = dba_tell();
138	dba_int_write(MANDOCDB_MAGIC);
139	dba_seek(pos_macros_ptr);
140	dba_int_write(pos_macros);
141	dba_int_write(pos_end);
142	if (dba_close() == -1) {
143		save_errno = errno;
144		unlink(fname);
145		errno = save_errno;
146		return -1;
147	}
148	return 0;
149}
150
151
152/*** functions for handling pages *************************************/
153
154/*
155 * Create a new page and append it to the pages table.
156 */
157struct dba_array *
158dba_page_new(struct dba_array *pages, const char *arch,
159    const char *desc, const char *file, enum form form)
160{
161	struct dba_array *page, *entry;
162
163	page = dba_array_new(DBP_MAX, 0);
164	entry = dba_array_new(1, DBA_STR | DBA_GROW);
165	dba_array_add(page, entry);
166	entry = dba_array_new(1, DBA_STR | DBA_GROW);
167	dba_array_add(page, entry);
168	if (arch != NULL && *arch != '\0') {
169		entry = dba_array_new(1, DBA_STR | DBA_GROW);
170		dba_array_add(entry, (void *)arch);
171	} else
172		entry = NULL;
173	dba_array_add(page, entry);
174	dba_array_add(page, mandoc_strdup(desc));
175	entry = dba_array_new(1, DBA_STR | DBA_GROW);
176	dba_array_add(entry, prepend(file, form));
177	dba_array_add(page, entry);
178	dba_array_add(pages, page);
179	return page;
180}
181
182/*
183 * Add a section, architecture, or file name to an existing page.
184 * Passing the NULL pointer for the architecture makes the page MI.
185 * In that case, any earlier or later architectures are ignored.
186 */
187void
188dba_page_add(struct dba_array *page, int32_t ie, const char *str)
189{
190	struct dba_array	*entries;
191	char			*entry;
192
193	entries = dba_array_get(page, ie);
194	if (ie == DBP_ARCH) {
195		if (entries == NULL)
196			return;
197		if (str == NULL || *str == '\0') {
198			dba_array_free(entries);
199			dba_array_set(page, DBP_ARCH, NULL);
200			return;
201		}
202	}
203	if (*str == '\0')
204		return;
205	dba_array_FOREACH(entries, entry) {
206		if (ie == DBP_FILE && *entry < ' ')
207			entry++;
208		if (strcmp(entry, str) == 0)
209			return;
210	}
211	dba_array_add(entries, (void *)str);
212}
213
214/*
215 * Add an additional name to an existing page.
216 */
217void
218dba_page_alias(struct dba_array *page, const char *name, uint64_t mask)
219{
220	struct dba_array	*entries;
221	char			*entry;
222	char			 maskbyte;
223
224	if (*name == '\0')
225		return;
226	maskbyte = mask & NAME_MASK;
227	entries = dba_array_get(page, DBP_NAME);
228	dba_array_FOREACH(entries, entry) {
229		if (strcmp(entry + 1, name) == 0) {
230			*entry |= maskbyte;
231			return;
232		}
233	}
234	dba_array_add(entries, prepend(name, maskbyte));
235}
236
237/*
238 * Return a pointer to a temporary copy of instr with inbyte prepended.
239 */
240static void *
241prepend(const char *instr, char inbyte)
242{
243	static char	*outstr = NULL;
244	static size_t	 outlen = 0;
245	size_t		 newlen;
246
247	newlen = strlen(instr) + 1;
248	if (newlen > outlen) {
249		outstr = mandoc_realloc(outstr, newlen + 1);
250		outlen = newlen;
251	}
252	*outstr = inbyte;
253	memcpy(outstr + 1, instr, newlen);
254	return outstr;
255}
256
257/*
258 * Write the pages table to disk; the format is:
259 * - One integer containing the number of pages.
260 * - For each page, five pointers to the names, sections,
261 *   architectures, description, and file names of the page.
262 *   MI pages write 0 instead of the architecture pointer.
263 * - One list each for names, sections, architectures, descriptions and
264 *   file names.  The description for each page ends with a NUL byte.
265 *   For all the other lists, each string ends with a NUL byte,
266 *   and the last string for a page ends with two NUL bytes.
267 * - To assure alignment of following integers,
268 *   the end is padded with NUL bytes up to a multiple of four bytes.
269 */
270static void
271dba_pages_write(struct dba_array *pages)
272{
273	struct dba_array	*page, *entry;
274	int32_t			 pos_pages, pos_end;
275
276	pos_pages = dba_array_writelen(pages, 5);
277	dba_array_FOREACH(pages, page) {
278		dba_array_setpos(page, DBP_NAME, dba_tell());
279		entry = dba_array_get(page, DBP_NAME);
280		dba_array_sort(entry, compare_names);
281		dba_array_writelst(entry);
282	}
283	dba_array_FOREACH(pages, page) {
284		dba_array_setpos(page, DBP_SECT, dba_tell());
285		entry = dba_array_get(page, DBP_SECT);
286		dba_array_sort(entry, compare_strings);
287		dba_array_writelst(entry);
288	}
289	dba_array_FOREACH(pages, page) {
290		if ((entry = dba_array_get(page, DBP_ARCH)) != NULL) {
291			dba_array_setpos(page, DBP_ARCH, dba_tell());
292			dba_array_sort(entry, compare_strings);
293			dba_array_writelst(entry);
294		} else
295			dba_array_setpos(page, DBP_ARCH, 0);
296	}
297	dba_array_FOREACH(pages, page) {
298		dba_array_setpos(page, DBP_DESC, dba_tell());
299		dba_str_write(dba_array_get(page, DBP_DESC));
300	}
301	dba_array_FOREACH(pages, page) {
302		dba_array_setpos(page, DBP_FILE, dba_tell());
303		dba_array_writelst(dba_array_get(page, DBP_FILE));
304	}
305	pos_end = dba_align();
306	dba_seek(pos_pages);
307	dba_array_FOREACH(pages, page)
308		dba_array_writepos(page);
309	dba_seek(pos_end);
310}
311
312static int
313compare_names(const void *vp1, const void *vp2)
314{
315	const char	*cp1, *cp2;
316	int		 diff;
317
318	cp1 = *(const char * const *)vp1;
319	cp2 = *(const char * const *)vp2;
320	return (diff = *cp2 - *cp1) ? diff :
321	    strcasecmp(cp1 + 1, cp2 + 1);
322}
323
324static int
325compare_strings(const void *vp1, const void *vp2)
326{
327	const char	*cp1, *cp2;
328
329	cp1 = *(const char * const *)vp1;
330	cp2 = *(const char * const *)vp2;
331	return strcmp(cp1, cp2);
332}
333
334/*** functions for handling macros ************************************/
335
336/*
337 * In the hash table for a single macro, look up an entry by
338 * the macro value or add an empty one if it doesn't exist yet.
339 */
340static struct macro_entry *
341get_macro_entry(struct ohash *macro, const char *value, int32_t np)
342{
343	struct macro_entry	*entry;
344	size_t			 len;
345	unsigned int		 slot;
346
347	slot = ohash_qlookup(macro, value);
348	if ((entry = ohash_find(macro, slot)) == NULL) {
349		len = strlen(value) + 1;
350		entry = mandoc_malloc(sizeof(*entry) + len);
351		memcpy(&entry->value, value, len);
352		entry->pages = dba_array_new(np, DBA_GROW);
353		ohash_insert(macro, slot, entry);
354	}
355	return entry;
356}
357
358/*
359 * In addition to get_macro_entry(), add multiple page references,
360 * converting them from the on-disk format (byte offsets in the file)
361 * to page pointers in memory.
362 */
363void
364dba_macro_new(struct dba *dba, int32_t im, const char *value,
365    const int32_t *pp)
366{
367	struct macro_entry	*entry;
368	const int32_t		*ip;
369	int32_t			 np;
370
371	np = 0;
372	for (ip = pp; *ip; ip++)
373		np++;
374
375	entry = get_macro_entry(dba_array_get(dba->macros, im), value, np);
376	for (ip = pp; *ip; ip++)
377		dba_array_add(entry->pages, dba_array_get(dba->pages,
378		    be32toh(*ip) / 5 / sizeof(*ip) - 1));
379}
380
381/*
382 * In addition to get_macro_entry(), add one page reference,
383 * directly taking the in-memory page pointer as an argument.
384 */
385void
386dba_macro_add(struct dba_array *macros, int32_t im, const char *value,
387    struct dba_array *page)
388{
389	struct macro_entry	*entry;
390
391	if (*value == '\0')
392		return;
393	entry = get_macro_entry(dba_array_get(macros, im), value, 1);
394	dba_array_add(entry->pages, page);
395}
396
397/*
398 * Write the macros table to disk; the format is:
399 * - The number of macro tables (actually, MACRO_MAX).
400 * - That number of pointers to the individual macro tables.
401 * - The individual macro tables.
402 */
403static void
404dba_macros_write(struct dba_array *macros)
405{
406	struct ohash		*macro;
407	int32_t			 im, pos_macros, pos_end;
408
409	pos_macros = dba_array_writelen(macros, 1);
410	im = 0;
411	dba_array_FOREACH(macros, macro) {
412		dba_array_setpos(macros, im++, dba_tell());
413		dba_macro_write(macro);
414	}
415	pos_end = dba_tell();
416	dba_seek(pos_macros);
417	dba_array_writepos(macros);
418	dba_seek(pos_end);
419}
420
421/*
422 * Write one individual macro table to disk; the format is:
423 * - The number of entries in the table.
424 * - For each entry, two pointers, the first one to the value
425 *   and the second one to the list of pages.
426 * - A list of values, each ending in a NUL byte.
427 * - To assure alignment of following integers,
428 *   padding with NUL bytes up to a multiple of four bytes.
429 * - A list of pointers to pages, each list ending in a 0 integer.
430 */
431static void
432dba_macro_write(struct ohash *macro)
433{
434	struct macro_entry	**entries, *entry;
435	struct dba_array	 *page;
436	int32_t			 *kpos, *dpos;
437	unsigned int		  ie, ne, slot;
438	int			  use;
439	int32_t			  addr, pos_macro, pos_end;
440
441	/* Temporary storage for filtering and sorting. */
442
443	ne = ohash_entries(macro);
444	entries = mandoc_reallocarray(NULL, ne, sizeof(*entries));
445	kpos = mandoc_reallocarray(NULL, ne, sizeof(*kpos));
446	dpos = mandoc_reallocarray(NULL, ne, sizeof(*dpos));
447
448	/* Build a list of non-empty entries and sort it. */
449
450	ne = 0;
451	for (entry = ohash_first(macro, &slot); entry != NULL;
452	     entry = ohash_next(macro, &slot)) {
453		use = 0;
454		dba_array_FOREACH(entry->pages, page)
455			if (dba_array_getpos(page))
456				use = 1;
457		if (use)
458			entries[ne++] = entry;
459	}
460	qsort(entries, ne, sizeof(*entries), compare_entries);
461
462	/* Number of entries, and space for the pointer pairs. */
463
464	dba_int_write(ne);
465	pos_macro = dba_skip(2, ne);
466
467	/* String table. */
468
469	for (ie = 0; ie < ne; ie++) {
470		kpos[ie] = dba_tell();
471		dba_str_write(entries[ie]->value);
472	}
473	dba_align();
474
475	/* Pages table. */
476
477	for (ie = 0; ie < ne; ie++) {
478		dpos[ie] = dba_tell();
479		dba_array_FOREACH(entries[ie]->pages, page)
480			if ((addr = dba_array_getpos(page)))
481				dba_int_write(addr);
482		dba_int_write(0);
483	}
484	pos_end = dba_tell();
485
486	/* Fill in the pointer pairs. */
487
488	dba_seek(pos_macro);
489	for (ie = 0; ie < ne; ie++) {
490		dba_int_write(kpos[ie]);
491		dba_int_write(dpos[ie]);
492	}
493	dba_seek(pos_end);
494
495	free(entries);
496	free(kpos);
497	free(dpos);
498}
499
500static int
501compare_entries(const void *vp1, const void *vp2)
502{
503	const struct macro_entry *ep1, *ep2;
504
505	ep1 = *(const struct macro_entry * const *)vp1;
506	ep2 = *(const struct macro_entry * const *)vp2;
507	return strcmp(ep1->value, ep2->value);
508}
509