1/* $Id: dbm.c,v 1.7 2019/07/01 22:56:24 schwarze Exp $ */ 2/* 3 * Copyright (c) 2016 Ingo Schwarze <schwarze@openbsd.org> 4 * 5 * Permission to use, copy, modify, and distribute this software for any 6 * purpose with or without fee is hereby granted, provided that the above 7 * copyright notice and this permission notice appear in all copies. 8 * 9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 16 * 17 * Map-based version of the mandoc database, for read-only access. 18 * The interface is defined in "dbm.h". 19 */ 20#include "config.h" 21 22#include <assert.h> 23#if HAVE_ENDIAN 24#include <endian.h> 25#elif HAVE_SYS_ENDIAN 26#include <sys/endian.h> 27#elif HAVE_NTOHL 28#include <arpa/inet.h> 29#endif 30#if HAVE_ERR 31#include <err.h> 32#endif 33#include <errno.h> 34#include <regex.h> 35#include <stdint.h> 36#include <stdio.h> 37#include <stdlib.h> 38#include <string.h> 39 40#include "mansearch.h" 41#include "dbm_map.h" 42#include "dbm.h" 43 44struct macro { 45 int32_t value; 46 int32_t pages; 47}; 48 49struct page { 50 int32_t name; 51 int32_t sect; 52 int32_t arch; 53 int32_t desc; 54 int32_t file; 55}; 56 57enum iter { 58 ITER_NONE = 0, 59 ITER_NAME, 60 ITER_SECT, 61 ITER_ARCH, 62 ITER_DESC, 63 ITER_MACRO 64}; 65 66static struct macro *macros[MACRO_MAX]; 67static int32_t nvals[MACRO_MAX]; 68static struct page *pages; 69static int32_t npages; 70static enum iter iteration; 71 72static struct dbm_res page_bytitle(enum iter, const struct dbm_match *); 73static struct dbm_res page_byarch(const struct dbm_match *); 74static struct dbm_res page_bymacro(int32_t, const struct dbm_match *); 75static char *macro_bypage(int32_t, int32_t); 76 77 78/*** top level functions **********************************************/ 79 80/* 81 * Open a disk-based mandoc database for read-only access. 82 * Map the pages and macros[] arrays. 83 * Return 0 on success. Return -1 and set errno on failure. 84 */ 85int 86dbm_open(const char *fname) 87{ 88 const int32_t *mp, *ep; 89 int32_t im; 90 91 if (dbm_map(fname) == -1) 92 return -1; 93 94 if ((npages = be32toh(*dbm_getint(4))) < 0) { 95 warnx("dbm_open(%s): Invalid number of pages: %d", 96 fname, npages); 97 goto fail; 98 } 99 pages = (struct page *)dbm_getint(5); 100 101 if ((mp = dbm_get(*dbm_getint(2))) == NULL) { 102 warnx("dbm_open(%s): Invalid offset of macros array", fname); 103 goto fail; 104 } 105 if (be32toh(*mp) != MACRO_MAX) { 106 warnx("dbm_open(%s): Invalid number of macros: %d", 107 fname, be32toh(*mp)); 108 goto fail; 109 } 110 for (im = 0; im < MACRO_MAX; im++) { 111 if ((ep = dbm_get(*++mp)) == NULL) { 112 warnx("dbm_open(%s): Invalid offset of macro %d", 113 fname, im); 114 goto fail; 115 } 116 nvals[im] = be32toh(*ep); 117 macros[im] = (struct macro *)++ep; 118 } 119 return 0; 120 121fail: 122 dbm_unmap(); 123 errno = EFTYPE; 124 return -1; 125} 126 127void 128dbm_close(void) 129{ 130 dbm_unmap(); 131} 132 133 134/*** functions for handling pages *************************************/ 135 136int32_t 137dbm_page_count(void) 138{ 139 return npages; 140} 141 142/* 143 * Give the caller pointers to the data for one manual page. 144 */ 145struct dbm_page * 146dbm_page_get(int32_t ip) 147{ 148 static struct dbm_page res; 149 150 assert(ip >= 0); 151 assert(ip < npages); 152 res.name = dbm_get(pages[ip].name); 153 if (res.name == NULL) 154 res.name = "(NULL)\0"; 155 res.sect = dbm_get(pages[ip].sect); 156 if (res.sect == NULL) 157 res.sect = "(NULL)\0"; 158 res.arch = pages[ip].arch ? dbm_get(pages[ip].arch) : NULL; 159 res.desc = dbm_get(pages[ip].desc); 160 if (res.desc == NULL) 161 res.desc = "(NULL)"; 162 res.file = dbm_get(pages[ip].file); 163 if (res.file == NULL) 164 res.file = " (NULL)\0"; 165 res.addr = dbm_addr(pages + ip); 166 return &res; 167} 168 169/* 170 * Functions to start filtered iterations over manual pages. 171 */ 172void 173dbm_page_byname(const struct dbm_match *match) 174{ 175 assert(match != NULL); 176 page_bytitle(ITER_NAME, match); 177} 178 179void 180dbm_page_bysect(const struct dbm_match *match) 181{ 182 assert(match != NULL); 183 page_bytitle(ITER_SECT, match); 184} 185 186void 187dbm_page_byarch(const struct dbm_match *match) 188{ 189 assert(match != NULL); 190 page_byarch(match); 191} 192 193void 194dbm_page_bydesc(const struct dbm_match *match) 195{ 196 assert(match != NULL); 197 page_bytitle(ITER_DESC, match); 198} 199 200void 201dbm_page_bymacro(int32_t im, const struct dbm_match *match) 202{ 203 assert(im >= 0); 204 assert(im < MACRO_MAX); 205 assert(match != NULL); 206 page_bymacro(im, match); 207} 208 209/* 210 * Return the number of the next manual page in the current iteration. 211 */ 212struct dbm_res 213dbm_page_next(void) 214{ 215 struct dbm_res res = {-1, 0}; 216 217 switch(iteration) { 218 case ITER_NONE: 219 return res; 220 case ITER_ARCH: 221 return page_byarch(NULL); 222 case ITER_MACRO: 223 return page_bymacro(0, NULL); 224 default: 225 return page_bytitle(iteration, NULL); 226 } 227} 228 229/* 230 * Functions implementing the iteration over manual pages. 231 */ 232static struct dbm_res 233page_bytitle(enum iter arg_iter, const struct dbm_match *arg_match) 234{ 235 static const struct dbm_match *match; 236 static const char *cp; 237 static int32_t ip; 238 struct dbm_res res = {-1, 0}; 239 240 assert(arg_iter == ITER_NAME || arg_iter == ITER_DESC || 241 arg_iter == ITER_SECT); 242 243 /* Initialize for a new iteration. */ 244 245 if (arg_match != NULL) { 246 iteration = arg_iter; 247 match = arg_match; 248 switch (iteration) { 249 case ITER_NAME: 250 cp = dbm_get(pages[0].name); 251 break; 252 case ITER_SECT: 253 cp = dbm_get(pages[0].sect); 254 break; 255 case ITER_DESC: 256 cp = dbm_get(pages[0].desc); 257 break; 258 default: 259 abort(); 260 } 261 if (cp == NULL) { 262 iteration = ITER_NONE; 263 match = NULL; 264 cp = NULL; 265 ip = npages; 266 } else 267 ip = 0; 268 return res; 269 } 270 271 /* Search for a name. */ 272 273 while (ip < npages) { 274 if (iteration == ITER_NAME) 275 cp++; 276 if (dbm_match(match, cp)) 277 break; 278 cp = strchr(cp, '\0') + 1; 279 if (iteration == ITER_DESC) 280 ip++; 281 else if (*cp == '\0') { 282 cp++; 283 ip++; 284 } 285 } 286 287 /* Reached the end without a match. */ 288 289 if (ip == npages) { 290 iteration = ITER_NONE; 291 match = NULL; 292 cp = NULL; 293 return res; 294 } 295 296 /* Found a match; save the quality for later retrieval. */ 297 298 res.page = ip; 299 res.bits = iteration == ITER_NAME ? cp[-1] : 0; 300 301 /* Skip the remaining names of this page. */ 302 303 if (++ip < npages) { 304 do { 305 cp++; 306 } while (cp[-1] != '\0' || 307 (iteration != ITER_DESC && cp[-2] != '\0')); 308 } 309 return res; 310} 311 312static struct dbm_res 313page_byarch(const struct dbm_match *arg_match) 314{ 315 static const struct dbm_match *match; 316 struct dbm_res res = {-1, 0}; 317 static int32_t ip; 318 const char *cp; 319 320 /* Initialize for a new iteration. */ 321 322 if (arg_match != NULL) { 323 iteration = ITER_ARCH; 324 match = arg_match; 325 ip = 0; 326 return res; 327 } 328 329 /* Search for an architecture. */ 330 331 for ( ; ip < npages; ip++) 332 if (pages[ip].arch) 333 for (cp = dbm_get(pages[ip].arch); 334 *cp != '\0'; 335 cp = strchr(cp, '\0') + 1) 336 if (dbm_match(match, cp)) { 337 res.page = ip++; 338 return res; 339 } 340 341 /* Reached the end without a match. */ 342 343 iteration = ITER_NONE; 344 match = NULL; 345 return res; 346} 347 348static struct dbm_res 349page_bymacro(int32_t arg_im, const struct dbm_match *arg_match) 350{ 351 static const struct dbm_match *match; 352 static const int32_t *pp; 353 static const char *cp; 354 static int32_t im, iv; 355 struct dbm_res res = {-1, 0}; 356 357 assert(im >= 0); 358 assert(im < MACRO_MAX); 359 360 /* Initialize for a new iteration. */ 361 362 if (arg_match != NULL) { 363 iteration = ITER_MACRO; 364 match = arg_match; 365 im = arg_im; 366 cp = nvals[im] ? dbm_get(macros[im]->value) : NULL; 367 pp = NULL; 368 iv = -1; 369 return res; 370 } 371 if (iteration != ITER_MACRO) 372 return res; 373 374 /* Find the next matching macro value. */ 375 376 while (pp == NULL || *pp == 0) { 377 if (++iv == nvals[im]) { 378 iteration = ITER_NONE; 379 return res; 380 } 381 if (iv) 382 cp = strchr(cp, '\0') + 1; 383 if (dbm_match(match, cp)) 384 pp = dbm_get(macros[im][iv].pages); 385 } 386 387 /* Found a matching page. */ 388 389 res.page = (struct page *)dbm_get(*pp++) - pages; 390 return res; 391} 392 393 394/*** functions for handling macros ************************************/ 395 396int32_t 397dbm_macro_count(int32_t im) 398{ 399 assert(im >= 0); 400 assert(im < MACRO_MAX); 401 return nvals[im]; 402} 403 404struct dbm_macro * 405dbm_macro_get(int32_t im, int32_t iv) 406{ 407 static struct dbm_macro macro; 408 409 assert(im >= 0); 410 assert(im < MACRO_MAX); 411 assert(iv >= 0); 412 assert(iv < nvals[im]); 413 macro.value = dbm_get(macros[im][iv].value); 414 macro.pp = dbm_get(macros[im][iv].pages); 415 return ¯o; 416} 417 418/* 419 * Filtered iteration over macro entries. 420 */ 421void 422dbm_macro_bypage(int32_t im, int32_t ip) 423{ 424 assert(im >= 0); 425 assert(im < MACRO_MAX); 426 assert(ip != 0); 427 macro_bypage(im, ip); 428} 429 430char * 431dbm_macro_next(void) 432{ 433 return macro_bypage(MACRO_MAX, 0); 434} 435 436static char * 437macro_bypage(int32_t arg_im, int32_t arg_ip) 438{ 439 static const int32_t *pp; 440 static int32_t im, ip, iv; 441 442 /* Initialize for a new iteration. */ 443 444 if (arg_im < MACRO_MAX && arg_ip != 0) { 445 im = arg_im; 446 ip = arg_ip; 447 pp = dbm_get(macros[im]->pages); 448 iv = 0; 449 return NULL; 450 } 451 if (im >= MACRO_MAX) 452 return NULL; 453 454 /* Search for the next value. */ 455 456 while (iv < nvals[im]) { 457 if (*pp == ip) 458 break; 459 if (*pp == 0) 460 iv++; 461 pp++; 462 } 463 464 /* Reached the end without a match. */ 465 466 if (iv == nvals[im]) { 467 im = MACRO_MAX; 468 ip = 0; 469 pp = NULL; 470 return NULL; 471 } 472 473 /* Found a match; skip the remaining pages of this entry. */ 474 475 if (++iv < nvals[im]) 476 while (*pp++ != 0) 477 continue; 478 479 return dbm_get(macros[im][iv - 1].value); 480} 481