mansearch.c revision 1.6
1/* $Id: mansearch.c,v 1.6 2014/01/05 03:06:36 schwarze Exp $ */ 2/* 3 * Copyright (c) 2012 Kristaps Dzonsons <kristaps@bsd.lv> 4 * Copyright (c) 2013, 2014 Ingo Schwarze <schwarze@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18#include <assert.h> 19#include <fcntl.h> 20#include <getopt.h> 21#include <limits.h> 22#include <regex.h> 23#include <stdio.h> 24#include <stdint.h> 25#include <stddef.h> 26#include <stdlib.h> 27#include <string.h> 28#include <unistd.h> 29 30#include <ohash.h> 31#include <sqlite3.h> 32 33#include "mandoc.h" 34#include "manpath.h" 35#include "mansearch.h" 36 37#define SQL_BIND_TEXT(_db, _s, _i, _v) \ 38 do { if (SQLITE_OK != sqlite3_bind_text \ 39 ((_s), (_i)++, (_v), -1, SQLITE_STATIC)) \ 40 fprintf(stderr, "%s\n", sqlite3_errmsg((_db))); \ 41 } while (0) 42#define SQL_BIND_INT64(_db, _s, _i, _v) \ 43 do { if (SQLITE_OK != sqlite3_bind_int64 \ 44 ((_s), (_i)++, (_v))) \ 45 fprintf(stderr, "%s\n", sqlite3_errmsg((_db))); \ 46 } while (0) 47#define SQL_BIND_BLOB(_db, _s, _i, _v) \ 48 do { if (SQLITE_OK != sqlite3_bind_blob \ 49 ((_s), (_i)++, (&_v), sizeof(_v), SQLITE_STATIC)) \ 50 fprintf(stderr, "%s\n", sqlite3_errmsg((_db))); \ 51 } while (0) 52 53struct expr { 54 uint64_t bits; /* type-mask */ 55 const char *substr; /* to search for, if applicable */ 56 regex_t regexp; /* compiled regexp, if applicable */ 57 int open; /* opening parentheses before */ 58 int and; /* logical AND before */ 59 int close; /* closing parentheses after */ 60 struct expr *next; /* next in sequence */ 61}; 62 63struct match { 64 uint64_t id; /* identifier in database */ 65 char *file; /* relative filepath of manpage */ 66 char *desc; /* description of manpage */ 67 int form; /* 0 == catpage */ 68}; 69 70struct type { 71 uint64_t bits; 72 const char *name; 73}; 74 75static const struct type types[] = { 76 { TYPE_An, "An" }, 77 { TYPE_Ar, "Ar" }, 78 { TYPE_At, "At" }, 79 { TYPE_Bsx, "Bsx" }, 80 { TYPE_Bx, "Bx" }, 81 { TYPE_Cd, "Cd" }, 82 { TYPE_Cm, "Cm" }, 83 { TYPE_Dv, "Dv" }, 84 { TYPE_Dx, "Dx" }, 85 { TYPE_Em, "Em" }, 86 { TYPE_Er, "Er" }, 87 { TYPE_Ev, "Ev" }, 88 { TYPE_Fa, "Fa" }, 89 { TYPE_Fl, "Fl" }, 90 { TYPE_Fn, "Fn" }, 91 { TYPE_Fn, "Fo" }, 92 { TYPE_Ft, "Ft" }, 93 { TYPE_Fx, "Fx" }, 94 { TYPE_Ic, "Ic" }, 95 { TYPE_In, "In" }, 96 { TYPE_Lb, "Lb" }, 97 { TYPE_Li, "Li" }, 98 { TYPE_Lk, "Lk" }, 99 { TYPE_Ms, "Ms" }, 100 { TYPE_Mt, "Mt" }, 101 { TYPE_Nd, "Nd" }, 102 { TYPE_Nm, "Nm" }, 103 { TYPE_Nx, "Nx" }, 104 { TYPE_Ox, "Ox" }, 105 { TYPE_Pa, "Pa" }, 106 { TYPE_Rs, "Rs" }, 107 { TYPE_Sh, "Sh" }, 108 { TYPE_Ss, "Ss" }, 109 { TYPE_St, "St" }, 110 { TYPE_Sy, "Sy" }, 111 { TYPE_Tn, "Tn" }, 112 { TYPE_Va, "Va" }, 113 { TYPE_Va, "Vt" }, 114 { TYPE_Xr, "Xr" }, 115 { TYPE_sec, "sec" }, 116 { TYPE_arch,"arch" }, 117 { ~0ULL, "any" }, 118 { 0ULL, NULL } 119}; 120 121static char *buildnames(sqlite3 *, sqlite3_stmt *, uint64_t); 122static char *buildoutput(sqlite3 *, sqlite3_stmt *, 123 uint64_t, uint64_t); 124static void *hash_alloc(size_t, void *); 125static void hash_free(void *, size_t, void *); 126static void *hash_halloc(size_t, void *); 127static struct expr *exprcomp(const struct mansearch *, 128 int, char *[]); 129static void exprfree(struct expr *); 130static struct expr *exprspec(struct expr *, uint64_t, 131 const char *, const char *); 132static struct expr *exprterm(const struct mansearch *, char *, int); 133static void sql_append(char **sql, size_t *sz, 134 const char *newstr, int count); 135static void sql_match(sqlite3_context *context, 136 int argc, sqlite3_value **argv); 137static void sql_regexp(sqlite3_context *context, 138 int argc, sqlite3_value **argv); 139static char *sql_statement(const struct expr *); 140 141int 142mansearch(const struct mansearch *search, 143 const struct manpaths *paths, 144 int argc, char *argv[], 145 const char *outkey, 146 struct manpage **res, size_t *sz) 147{ 148 int fd, rc, c, ibit; 149 int64_t id; 150 uint64_t outbit; 151 char buf[PATH_MAX]; 152 char *sql; 153 struct manpage *mpage; 154 struct expr *e, *ep; 155 sqlite3 *db; 156 sqlite3_stmt *s, *s2; 157 struct match *mp; 158 struct ohash_info info; 159 struct ohash htab; 160 unsigned int idx; 161 size_t i, j, cur, maxres; 162 163 memset(&info, 0, sizeof(struct ohash_info)); 164 165 info.halloc = hash_halloc; 166 info.alloc = hash_alloc; 167 info.hfree = hash_free; 168 info.key_offset = offsetof(struct match, id); 169 170 *sz = cur = maxres = 0; 171 sql = NULL; 172 *res = NULL; 173 fd = -1; 174 e = NULL; 175 rc = 0; 176 177 if (0 == argc) 178 goto out; 179 if (NULL == (e = exprcomp(search, argc, argv))) 180 goto out; 181 182 outbit = 0; 183 if (NULL != outkey) { 184 for (ibit = 0; types[ibit].bits; ibit++) { 185 if (0 == strcasecmp(types[ibit].name, outkey)) { 186 outbit = types[ibit].bits; 187 break; 188 } 189 } 190 } 191 192 /* 193 * Save a descriptor to the current working directory. 194 * Since pathnames in the "paths" variable might be relative, 195 * and we'll be chdir()ing into them, we need to keep a handle 196 * on our current directory from which to start the chdir(). 197 */ 198 199 if (NULL == getcwd(buf, PATH_MAX)) { 200 perror(NULL); 201 goto out; 202 } else if (-1 == (fd = open(buf, O_RDONLY, 0))) { 203 perror(buf); 204 goto out; 205 } 206 207 sql = sql_statement(e); 208 209 /* 210 * Loop over the directories (containing databases) for us to 211 * search. 212 * Don't let missing/bad databases/directories phase us. 213 * In each, try to open the resident database and, if it opens, 214 * scan it for our match expression. 215 */ 216 217 for (i = 0; i < paths->sz; i++) { 218 if (-1 == fchdir(fd)) { 219 perror(buf); 220 free(*res); 221 break; 222 } else if (-1 == chdir(paths->paths[i])) { 223 perror(paths->paths[i]); 224 continue; 225 } 226 227 c = sqlite3_open_v2 228 (MANDOC_DB, &db, 229 SQLITE_OPEN_READONLY, NULL); 230 231 if (SQLITE_OK != c) { 232 perror(MANDOC_DB); 233 sqlite3_close(db); 234 continue; 235 } 236 237 /* 238 * Define the SQL functions for substring 239 * and regular expression matching. 240 */ 241 242 c = sqlite3_create_function(db, "match", 2, 243 SQLITE_ANY, NULL, sql_match, NULL, NULL); 244 assert(SQLITE_OK == c); 245 c = sqlite3_create_function(db, "regexp", 2, 246 SQLITE_ANY, NULL, sql_regexp, NULL, NULL); 247 assert(SQLITE_OK == c); 248 249 j = 1; 250 c = sqlite3_prepare_v2(db, sql, -1, &s, NULL); 251 if (SQLITE_OK != c) 252 fprintf(stderr, "%s\n", sqlite3_errmsg(db)); 253 254 for (ep = e; NULL != ep; ep = ep->next) { 255 if (NULL == ep->substr) { 256 SQL_BIND_BLOB(db, s, j, ep->regexp); 257 } else 258 SQL_BIND_TEXT(db, s, j, ep->substr); 259 SQL_BIND_INT64(db, s, j, ep->bits); 260 } 261 262 memset(&htab, 0, sizeof(struct ohash)); 263 ohash_init(&htab, 4, &info); 264 265 /* 266 * Hash each entry on its [unique] document identifier. 267 * This is a uint64_t. 268 * Instead of using a hash function, simply convert the 269 * uint64_t to a uint32_t, the hash value's type. 270 * This gives good performance and preserves the 271 * distribution of buckets in the table. 272 */ 273 while (SQLITE_ROW == (c = sqlite3_step(s))) { 274 id = sqlite3_column_int64(s, 5); 275 idx = ohash_lookup_memory 276 (&htab, (char *)&id, 277 sizeof(uint64_t), (uint32_t)id); 278 279 if (NULL != ohash_find(&htab, idx)) 280 continue; 281 282 mp = mandoc_calloc(1, sizeof(struct match)); 283 mp->id = id; 284 mp->file = mandoc_strdup 285 ((char *)sqlite3_column_text(s, 0)); 286 mp->desc = mandoc_strdup 287 ((char *)sqlite3_column_text(s, 3)); 288 mp->form = sqlite3_column_int(s, 4); 289 ohash_insert(&htab, idx, mp); 290 } 291 292 if (SQLITE_DONE != c) 293 fprintf(stderr, "%s\n", sqlite3_errmsg(db)); 294 295 sqlite3_finalize(s); 296 297 c = sqlite3_prepare_v2(db, 298 "SELECT * FROM mlinks WHERE pageid=?", 299 -1, &s, NULL); 300 if (SQLITE_OK != c) 301 fprintf(stderr, "%s\n", sqlite3_errmsg(db)); 302 303 c = sqlite3_prepare_v2(db, 304 "SELECT * FROM keys WHERE pageid=? AND bits & ?", 305 -1, &s2, NULL); 306 if (SQLITE_OK != c) 307 fprintf(stderr, "%s\n", sqlite3_errmsg(db)); 308 309 for (mp = ohash_first(&htab, &idx); 310 NULL != mp; 311 mp = ohash_next(&htab, &idx)) { 312 if (cur + 1 > maxres) { 313 maxres += 1024; 314 *res = mandoc_realloc 315 (*res, maxres * sizeof(struct manpage)); 316 } 317 mpage = *res + cur; 318 if (-1 == asprintf(&mpage->file, "%s/%s", 319 paths->paths[i], mp->file)) { 320 perror(0); 321 exit((int)MANDOCLEVEL_SYSERR); 322 } 323 mpage->desc = mp->desc; 324 mpage->form = mp->form; 325 mpage->names = buildnames(db, s, mp->id); 326 mpage->output = outbit ? 327 buildoutput(db, s2, mp->id, outbit) : NULL; 328 329 free(mp->file); 330 free(mp); 331 cur++; 332 } 333 334 sqlite3_finalize(s); 335 sqlite3_finalize(s2); 336 sqlite3_close(db); 337 ohash_delete(&htab); 338 } 339 rc = 1; 340out: 341 exprfree(e); 342 if (-1 != fd) 343 close(fd); 344 free(sql); 345 *sz = cur; 346 return(rc); 347} 348 349static char * 350buildnames(sqlite3 *db, sqlite3_stmt *s, uint64_t id) 351{ 352 char *names, *newnames; 353 const char *oldnames, *sep1, *name, *sec, *sep2, *arch; 354 size_t i; 355 int c; 356 357 names = NULL; 358 i = 1; 359 SQL_BIND_INT64(db, s, i, id); 360 while (SQLITE_ROW == (c = sqlite3_step(s))) { 361 if (NULL == names) { 362 oldnames = ""; 363 sep1 = ""; 364 } else { 365 oldnames = names; 366 sep1 = ", "; 367 } 368 sec = sqlite3_column_text(s, 1); 369 arch = sqlite3_column_text(s, 2); 370 name = sqlite3_column_text(s, 3); 371 sep2 = '\0' == *arch ? "" : "/"; 372 if (-1 == asprintf(&newnames, "%s%s%s(%s%s%s)", 373 oldnames, sep1, name, sec, sep2, arch)) { 374 perror(0); 375 exit((int)MANDOCLEVEL_SYSERR); 376 } 377 free(names); 378 names = newnames; 379 } 380 if (SQLITE_DONE != c) 381 fprintf(stderr, "%s\n", sqlite3_errmsg(db)); 382 sqlite3_reset(s); 383 return(names); 384} 385 386static char * 387buildoutput(sqlite3 *db, sqlite3_stmt *s, uint64_t id, uint64_t outbit) 388{ 389 char *output, *newoutput; 390 const char *oldoutput, *sep1, *data; 391 size_t i; 392 int c; 393 394 output = NULL; 395 i = 1; 396 SQL_BIND_INT64(db, s, i, id); 397 SQL_BIND_INT64(db, s, i, outbit); 398 while (SQLITE_ROW == (c = sqlite3_step(s))) { 399 if (NULL == output) { 400 oldoutput = ""; 401 sep1 = ""; 402 } else { 403 oldoutput = output; 404 sep1 = " # "; 405 } 406 data = sqlite3_column_text(s, 1); 407 if (-1 == asprintf(&newoutput, "%s%s%s", 408 oldoutput, sep1, data)) { 409 perror(0); 410 exit((int)MANDOCLEVEL_SYSERR); 411 } 412 free(output); 413 output = newoutput; 414 } 415 if (SQLITE_DONE != c) 416 fprintf(stderr, "%s\n", sqlite3_errmsg(db)); 417 sqlite3_reset(s); 418 return(output); 419} 420 421/* 422 * Implement substring match as an application-defined SQL function. 423 * Using the SQL LIKE or GLOB operators instead would be a bad idea 424 * because that would require escaping metacharacters in the string 425 * being searched for. 426 */ 427static void 428sql_match(sqlite3_context *context, int argc, sqlite3_value **argv) 429{ 430 431 assert(2 == argc); 432 sqlite3_result_int(context, NULL != strcasestr( 433 (const char *)sqlite3_value_text(argv[1]), 434 (const char *)sqlite3_value_text(argv[0]))); 435} 436 437/* 438 * Implement regular expression match 439 * as an application-defined SQL function. 440 */ 441static void 442sql_regexp(sqlite3_context *context, int argc, sqlite3_value **argv) 443{ 444 445 assert(2 == argc); 446 sqlite3_result_int(context, !regexec( 447 (regex_t *)sqlite3_value_blob(argv[0]), 448 (const char *)sqlite3_value_text(argv[1]), 449 0, NULL, 0)); 450} 451 452static void 453sql_append(char **sql, size_t *sz, const char *newstr, int count) 454{ 455 size_t newsz; 456 457 newsz = 1 < count ? (size_t)count : strlen(newstr); 458 *sql = mandoc_realloc(*sql, *sz + newsz + 1); 459 if (1 < count) 460 memset(*sql + *sz, *newstr, (size_t)count); 461 else 462 memcpy(*sql + *sz, newstr, newsz); 463 *sz += newsz; 464 (*sql)[*sz] = '\0'; 465} 466 467/* 468 * Prepare the search SQL statement. 469 */ 470static char * 471sql_statement(const struct expr *e) 472{ 473 char *sql; 474 size_t sz; 475 int needop; 476 477 sql = mandoc_strdup("SELECT * FROM mpages WHERE "); 478 sz = strlen(sql); 479 480 for (needop = 0; NULL != e; e = e->next) { 481 if (e->and) 482 sql_append(&sql, &sz, " AND ", 1); 483 else if (needop) 484 sql_append(&sql, &sz, " OR ", 1); 485 if (e->open) 486 sql_append(&sql, &sz, "(", e->open); 487 sql_append(&sql, &sz, NULL == e->substr ? 488 "id IN (SELECT pageid FROM keys " 489 "WHERE key REGEXP ? AND bits & ?)" : 490 "id IN (SELECT pageid FROM keys " 491 "WHERE key MATCH ? AND bits & ?)", 1); 492 if (e->close) 493 sql_append(&sql, &sz, ")", e->close); 494 needop = 1; 495 } 496 497 return(sql); 498} 499 500/* 501 * Compile a set of string tokens into an expression. 502 * Tokens in "argv" are assumed to be individual expression atoms (e.g., 503 * "(", "foo=bar", etc.). 504 */ 505static struct expr * 506exprcomp(const struct mansearch *search, int argc, char *argv[]) 507{ 508 int i, toopen, logic, igncase, toclose; 509 struct expr *first, *next, *cur; 510 511 first = cur = NULL; 512 logic = igncase = toclose = 0; 513 toopen = 1; 514 515 for (i = 0; i < argc; i++) { 516 if (0 == strcmp("(", argv[i])) { 517 if (igncase) 518 goto fail; 519 toopen++; 520 toclose++; 521 continue; 522 } else if (0 == strcmp(")", argv[i])) { 523 if (toopen || logic || igncase || NULL == cur) 524 goto fail; 525 cur->close++; 526 if (0 > --toclose) 527 goto fail; 528 continue; 529 } else if (0 == strcmp("-a", argv[i])) { 530 if (toopen || logic || igncase || NULL == cur) 531 goto fail; 532 logic = 1; 533 continue; 534 } else if (0 == strcmp("-o", argv[i])) { 535 if (toopen || logic || igncase || NULL == cur) 536 goto fail; 537 logic = 2; 538 continue; 539 } else if (0 == strcmp("-i", argv[i])) { 540 if (igncase) 541 goto fail; 542 igncase = 1; 543 continue; 544 } 545 next = exprterm(search, argv[i], !igncase); 546 if (NULL == next) 547 goto fail; 548 next->open = toopen; 549 next->and = (1 == logic); 550 if (NULL != first) { 551 cur->next = next; 552 cur = next; 553 } else 554 cur = first = next; 555 toopen = logic = igncase = 0; 556 } 557 if (toopen || logic || igncase || toclose) 558 goto fail; 559 560 cur->close++; 561 cur = exprspec(cur, TYPE_arch, search->arch, "^(%s|any)$"); 562 exprspec(cur, TYPE_sec, search->sec, "^%s$"); 563 564 return(first); 565 566fail: 567 if (NULL != first) 568 exprfree(first); 569 return(NULL); 570} 571 572static struct expr * 573exprspec(struct expr *cur, uint64_t key, const char *value, 574 const char *format) 575{ 576 char errbuf[BUFSIZ]; 577 char *cp; 578 int irc; 579 580 if (NULL == value) 581 return(cur); 582 583 if (-1 == asprintf(&cp, format, value)) { 584 perror(0); 585 exit((int)MANDOCLEVEL_SYSERR); 586 } 587 cur->next = mandoc_calloc(1, sizeof(struct expr)); 588 cur = cur->next; 589 cur->and = 1; 590 cur->bits = key; 591 if (0 != (irc = regcomp(&cur->regexp, cp, 592 REG_EXTENDED | REG_NOSUB | REG_ICASE))) { 593 regerror(irc, &cur->regexp, errbuf, sizeof(errbuf)); 594 fprintf(stderr, "regcomp: %s\n", errbuf); 595 cur->substr = value; 596 } 597 free(cp); 598 return(cur); 599} 600 601static struct expr * 602exprterm(const struct mansearch *search, char *buf, int cs) 603{ 604 char errbuf[BUFSIZ]; 605 struct expr *e; 606 char *key, *v; 607 size_t i; 608 int irc; 609 610 if ('\0' == *buf) 611 return(NULL); 612 613 e = mandoc_calloc(1, sizeof(struct expr)); 614 615 /*"whatis" mode uses an opaque string and default fields. */ 616 617 if (MANSEARCH_WHATIS & search->flags) { 618 e->substr = buf; 619 e->bits = search->deftype; 620 return(e); 621 } 622 623 /* 624 * If no =~ is specified, search with equality over names and 625 * descriptions. 626 * If =~ begins the phrase, use name and description fields. 627 */ 628 629 if (NULL == (v = strpbrk(buf, "=~"))) { 630 e->substr = buf; 631 e->bits = search->deftype; 632 return(e); 633 } else if (v == buf) 634 e->bits = search->deftype; 635 636 if ('~' == *v++) { 637 if (0 != (irc = regcomp(&e->regexp, v, 638 REG_EXTENDED | REG_NOSUB | (cs ? 0 : REG_ICASE)))) { 639 regerror(irc, &e->regexp, errbuf, sizeof(errbuf)); 640 fprintf(stderr, "regcomp: %s\n", errbuf); 641 free(e); 642 return(NULL); 643 } 644 } else 645 e->substr = v; 646 v[-1] = '\0'; 647 648 /* 649 * Parse out all possible fields. 650 * If the field doesn't resolve, bail. 651 */ 652 653 while (NULL != (key = strsep(&buf, ","))) { 654 if ('\0' == *key) 655 continue; 656 i = 0; 657 while (types[i].bits && 658 strcasecmp(types[i].name, key)) 659 i++; 660 if (0 == types[i].bits) { 661 free(e); 662 return(NULL); 663 } 664 e->bits |= types[i].bits; 665 } 666 667 return(e); 668} 669 670static void 671exprfree(struct expr *p) 672{ 673 struct expr *pp; 674 675 while (NULL != p) { 676 pp = p->next; 677 free(p); 678 p = pp; 679 } 680} 681 682static void * 683hash_halloc(size_t sz, void *arg) 684{ 685 686 return(mandoc_calloc(sz, 1)); 687} 688 689static void * 690hash_alloc(size_t sz, void *arg) 691{ 692 693 return(mandoc_malloc(sz)); 694} 695 696static void 697hash_free(void *p, size_t sz, void *arg) 698{ 699 700 free(p); 701} 702