mansearch.c revision 1.12
1/* $Id: mansearch.c,v 1.12 2014/01/19 23:09:13 schwarze Exp $ */ 2/* 3 * Copyright (c) 2012 Kristaps Dzonsons <kristaps@bsd.lv> 4 * Copyright (c) 2013, 2014 Ingo Schwarze <schwarze@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18#include <assert.h> 19#include <fcntl.h> 20#include <getopt.h> 21#include <limits.h> 22#include <regex.h> 23#include <stdio.h> 24#include <stdint.h> 25#include <stddef.h> 26#include <stdlib.h> 27#include <string.h> 28#include <unistd.h> 29 30#include <ohash.h> 31#include <sqlite3.h> 32 33#include "mandoc.h" 34#include "manpath.h" 35#include "mansearch.h" 36 37extern int mansearch_keymax; 38extern const char *const mansearch_keynames[]; 39 40#define SQL_BIND_TEXT(_db, _s, _i, _v) \ 41 do { if (SQLITE_OK != sqlite3_bind_text \ 42 ((_s), (_i)++, (_v), -1, SQLITE_STATIC)) \ 43 fprintf(stderr, "%s\n", sqlite3_errmsg((_db))); \ 44 } while (0) 45#define SQL_BIND_INT64(_db, _s, _i, _v) \ 46 do { if (SQLITE_OK != sqlite3_bind_int64 \ 47 ((_s), (_i)++, (_v))) \ 48 fprintf(stderr, "%s\n", sqlite3_errmsg((_db))); \ 49 } while (0) 50#define SQL_BIND_BLOB(_db, _s, _i, _v) \ 51 do { if (SQLITE_OK != sqlite3_bind_blob \ 52 ((_s), (_i)++, (&_v), sizeof(_v), SQLITE_STATIC)) \ 53 fprintf(stderr, "%s\n", sqlite3_errmsg((_db))); \ 54 } while (0) 55 56struct expr { 57 uint64_t bits; /* type-mask */ 58 const char *substr; /* to search for, if applicable */ 59 regex_t regexp; /* compiled regexp, if applicable */ 60 int open; /* opening parentheses before */ 61 int and; /* logical AND before */ 62 int close; /* closing parentheses after */ 63 struct expr *next; /* next in sequence */ 64}; 65 66struct match { 67 uint64_t id; /* identifier in database */ 68 int form; /* 0 == catpage */ 69}; 70 71static void buildnames(struct manpage *, sqlite3 *, 72 sqlite3_stmt *, uint64_t, 73 const char *, int form); 74static char *buildoutput(sqlite3 *, sqlite3_stmt *, 75 uint64_t, uint64_t); 76static void *hash_alloc(size_t, void *); 77static void hash_free(void *, size_t, void *); 78static void *hash_halloc(size_t, void *); 79static struct expr *exprcomp(const struct mansearch *, 80 int, char *[]); 81static void exprfree(struct expr *); 82static struct expr *exprspec(struct expr *, uint64_t, 83 const char *, const char *); 84static struct expr *exprterm(const struct mansearch *, char *, int); 85static void sql_append(char **sql, size_t *sz, 86 const char *newstr, int count); 87static void sql_match(sqlite3_context *context, 88 int argc, sqlite3_value **argv); 89static void sql_regexp(sqlite3_context *context, 90 int argc, sqlite3_value **argv); 91static char *sql_statement(const struct expr *); 92 93int 94mansearch(const struct mansearch *search, 95 const struct manpaths *paths, 96 int argc, char *argv[], 97 const char *outkey, 98 struct manpage **res, size_t *sz) 99{ 100 int fd, rc, c, indexbit; 101 int64_t id; 102 uint64_t outbit, iterbit; 103 char buf[PATH_MAX]; 104 char *sql; 105 struct manpage *mpage; 106 struct expr *e, *ep; 107 sqlite3 *db; 108 sqlite3_stmt *s, *s2; 109 struct match *mp; 110 struct ohash_info info; 111 struct ohash htab; 112 unsigned int idx; 113 size_t i, j, cur, maxres; 114 115 memset(&info, 0, sizeof(struct ohash_info)); 116 117 info.halloc = hash_halloc; 118 info.alloc = hash_alloc; 119 info.hfree = hash_free; 120 info.key_offset = offsetof(struct match, id); 121 122 *sz = cur = maxres = 0; 123 sql = NULL; 124 *res = NULL; 125 fd = -1; 126 e = NULL; 127 rc = 0; 128 129 if (0 == argc) 130 goto out; 131 if (NULL == (e = exprcomp(search, argc, argv))) 132 goto out; 133 134 outbit = 0; 135 if (NULL != outkey) { 136 for (indexbit = 0, iterbit = 1; 137 indexbit < mansearch_keymax; 138 indexbit++, iterbit <<= 1) { 139 if (0 == strcasecmp(outkey, 140 mansearch_keynames[indexbit])) { 141 outbit = iterbit; 142 break; 143 } 144 } 145 } 146 147 /* 148 * Save a descriptor to the current working directory. 149 * Since pathnames in the "paths" variable might be relative, 150 * and we'll be chdir()ing into them, we need to keep a handle 151 * on our current directory from which to start the chdir(). 152 */ 153 154 if (NULL == getcwd(buf, PATH_MAX)) { 155 perror(NULL); 156 goto out; 157 } else if (-1 == (fd = open(buf, O_RDONLY, 0))) { 158 perror(buf); 159 goto out; 160 } 161 162 sql = sql_statement(e); 163 164 /* 165 * Loop over the directories (containing databases) for us to 166 * search. 167 * Don't let missing/bad databases/directories phase us. 168 * In each, try to open the resident database and, if it opens, 169 * scan it for our match expression. 170 */ 171 172 for (i = 0; i < paths->sz; i++) { 173 if (-1 == fchdir(fd)) { 174 perror(buf); 175 free(*res); 176 break; 177 } else if (-1 == chdir(paths->paths[i])) { 178 perror(paths->paths[i]); 179 continue; 180 } 181 182 c = sqlite3_open_v2 183 (MANDOC_DB, &db, 184 SQLITE_OPEN_READONLY, NULL); 185 186 if (SQLITE_OK != c) { 187 perror(MANDOC_DB); 188 sqlite3_close(db); 189 continue; 190 } 191 192 /* 193 * Define the SQL functions for substring 194 * and regular expression matching. 195 */ 196 197 c = sqlite3_create_function(db, "match", 2, 198 SQLITE_ANY, NULL, sql_match, NULL, NULL); 199 assert(SQLITE_OK == c); 200 c = sqlite3_create_function(db, "regexp", 2, 201 SQLITE_ANY, NULL, sql_regexp, NULL, NULL); 202 assert(SQLITE_OK == c); 203 204 j = 1; 205 c = sqlite3_prepare_v2(db, sql, -1, &s, NULL); 206 if (SQLITE_OK != c) 207 fprintf(stderr, "%s\n", sqlite3_errmsg(db)); 208 209 for (ep = e; NULL != ep; ep = ep->next) { 210 if (NULL == ep->substr) { 211 SQL_BIND_BLOB(db, s, j, ep->regexp); 212 } else 213 SQL_BIND_TEXT(db, s, j, ep->substr); 214 SQL_BIND_INT64(db, s, j, ep->bits); 215 } 216 217 memset(&htab, 0, sizeof(struct ohash)); 218 ohash_init(&htab, 4, &info); 219 220 /* 221 * Hash each entry on its [unique] document identifier. 222 * This is a uint64_t. 223 * Instead of using a hash function, simply convert the 224 * uint64_t to a uint32_t, the hash value's type. 225 * This gives good performance and preserves the 226 * distribution of buckets in the table. 227 */ 228 while (SQLITE_ROW == (c = sqlite3_step(s))) { 229 id = sqlite3_column_int64(s, 1); 230 idx = ohash_lookup_memory 231 (&htab, (char *)&id, 232 sizeof(uint64_t), (uint32_t)id); 233 234 if (NULL != ohash_find(&htab, idx)) 235 continue; 236 237 mp = mandoc_calloc(1, sizeof(struct match)); 238 mp->id = id; 239 mp->form = sqlite3_column_int(s, 0); 240 ohash_insert(&htab, idx, mp); 241 } 242 243 if (SQLITE_DONE != c) 244 fprintf(stderr, "%s\n", sqlite3_errmsg(db)); 245 246 sqlite3_finalize(s); 247 248 c = sqlite3_prepare_v2(db, 249 "SELECT * FROM mlinks WHERE pageid=?", 250 -1, &s, NULL); 251 if (SQLITE_OK != c) 252 fprintf(stderr, "%s\n", sqlite3_errmsg(db)); 253 254 c = sqlite3_prepare_v2(db, 255 "SELECT * FROM keys WHERE pageid=? AND bits & ?", 256 -1, &s2, NULL); 257 if (SQLITE_OK != c) 258 fprintf(stderr, "%s\n", sqlite3_errmsg(db)); 259 260 for (mp = ohash_first(&htab, &idx); 261 NULL != mp; 262 mp = ohash_next(&htab, &idx)) { 263 if (cur + 1 > maxres) { 264 maxres += 1024; 265 *res = mandoc_realloc 266 (*res, maxres * sizeof(struct manpage)); 267 } 268 mpage = *res + cur; 269 mpage->form = mp->form; 270 buildnames(mpage, db, s, mp->id, 271 paths->paths[i], mp->form); 272 mpage->output = outbit ? 273 buildoutput(db, s2, mp->id, outbit) : NULL; 274 275 free(mp); 276 cur++; 277 } 278 279 sqlite3_finalize(s); 280 sqlite3_finalize(s2); 281 sqlite3_close(db); 282 ohash_delete(&htab); 283 } 284 rc = 1; 285out: 286 exprfree(e); 287 if (-1 != fd) 288 close(fd); 289 free(sql); 290 *sz = cur; 291 return(rc); 292} 293 294static void 295buildnames(struct manpage *mpage, sqlite3 *db, sqlite3_stmt *s, 296 uint64_t id, const char *path, int form) 297{ 298 char *newnames; 299 const char *oldnames, *sep1, *name, *sec, *sep2, *arch, *fsec; 300 size_t i; 301 int c; 302 303 mpage->names = NULL; 304 i = 1; 305 SQL_BIND_INT64(db, s, i, id); 306 while (SQLITE_ROW == (c = sqlite3_step(s))) { 307 308 /* Assemble the list of names. */ 309 310 if (NULL == mpage->names) { 311 oldnames = ""; 312 sep1 = ""; 313 } else { 314 oldnames = mpage->names; 315 sep1 = ", "; 316 } 317 sec = sqlite3_column_text(s, 0); 318 arch = sqlite3_column_text(s, 1); 319 name = sqlite3_column_text(s, 2); 320 sep2 = '\0' == *arch ? "" : "/"; 321 if (-1 == asprintf(&newnames, "%s%s%s(%s%s%s)", 322 oldnames, sep1, name, sec, sep2, arch)) { 323 perror(0); 324 exit((int)MANDOCLEVEL_SYSERR); 325 } 326 free(mpage->names); 327 mpage->names = newnames; 328 329 /* Also save the first file name encountered. */ 330 331 if (NULL != mpage->file) 332 continue; 333 334 if (form) { 335 sep1 = "man"; 336 fsec = sec; 337 } else { 338 sep1 = "cat"; 339 fsec = "0"; 340 } 341 if (-1 == asprintf(&mpage->file, "%s/%s%s%s%s/%s.%s", 342 path, sep1, sec, sep2, arch, name, fsec)) { 343 perror(0); 344 exit((int)MANDOCLEVEL_SYSERR); 345 } 346 } 347 if (SQLITE_DONE != c) 348 fprintf(stderr, "%s\n", sqlite3_errmsg(db)); 349 sqlite3_reset(s); 350} 351 352static char * 353buildoutput(sqlite3 *db, sqlite3_stmt *s, uint64_t id, uint64_t outbit) 354{ 355 char *output, *newoutput; 356 const char *oldoutput, *sep1, *data; 357 size_t i; 358 int c; 359 360 output = NULL; 361 i = 1; 362 SQL_BIND_INT64(db, s, i, id); 363 SQL_BIND_INT64(db, s, i, outbit); 364 while (SQLITE_ROW == (c = sqlite3_step(s))) { 365 if (NULL == output) { 366 oldoutput = ""; 367 sep1 = ""; 368 } else { 369 oldoutput = output; 370 sep1 = " # "; 371 } 372 data = sqlite3_column_text(s, 1); 373 if (-1 == asprintf(&newoutput, "%s%s%s", 374 oldoutput, sep1, data)) { 375 perror(0); 376 exit((int)MANDOCLEVEL_SYSERR); 377 } 378 free(output); 379 output = newoutput; 380 } 381 if (SQLITE_DONE != c) 382 fprintf(stderr, "%s\n", sqlite3_errmsg(db)); 383 sqlite3_reset(s); 384 return(output); 385} 386 387/* 388 * Implement substring match as an application-defined SQL function. 389 * Using the SQL LIKE or GLOB operators instead would be a bad idea 390 * because that would require escaping metacharacters in the string 391 * being searched for. 392 */ 393static void 394sql_match(sqlite3_context *context, int argc, sqlite3_value **argv) 395{ 396 397 assert(2 == argc); 398 sqlite3_result_int(context, NULL != strcasestr( 399 (const char *)sqlite3_value_text(argv[1]), 400 (const char *)sqlite3_value_text(argv[0]))); 401} 402 403/* 404 * Implement regular expression match 405 * as an application-defined SQL function. 406 */ 407static void 408sql_regexp(sqlite3_context *context, int argc, sqlite3_value **argv) 409{ 410 411 assert(2 == argc); 412 sqlite3_result_int(context, !regexec( 413 (regex_t *)sqlite3_value_blob(argv[0]), 414 (const char *)sqlite3_value_text(argv[1]), 415 0, NULL, 0)); 416} 417 418static void 419sql_append(char **sql, size_t *sz, const char *newstr, int count) 420{ 421 size_t newsz; 422 423 newsz = 1 < count ? (size_t)count : strlen(newstr); 424 *sql = mandoc_realloc(*sql, *sz + newsz + 1); 425 if (1 < count) 426 memset(*sql + *sz, *newstr, (size_t)count); 427 else 428 memcpy(*sql + *sz, newstr, newsz); 429 *sz += newsz; 430 (*sql)[*sz] = '\0'; 431} 432 433/* 434 * Prepare the search SQL statement. 435 */ 436static char * 437sql_statement(const struct expr *e) 438{ 439 char *sql; 440 size_t sz; 441 int needop; 442 443 sql = mandoc_strdup("SELECT * FROM mpages WHERE "); 444 sz = strlen(sql); 445 446 for (needop = 0; NULL != e; e = e->next) { 447 if (e->and) 448 sql_append(&sql, &sz, " AND ", 1); 449 else if (needop) 450 sql_append(&sql, &sz, " OR ", 1); 451 if (e->open) 452 sql_append(&sql, &sz, "(", e->open); 453 sql_append(&sql, &sz, NULL == e->substr ? 454 "id IN (SELECT pageid FROM keys " 455 "WHERE key REGEXP ? AND bits & ?)" : 456 "id IN (SELECT pageid FROM keys " 457 "WHERE key MATCH ? AND bits & ?)", 1); 458 if (e->close) 459 sql_append(&sql, &sz, ")", e->close); 460 needop = 1; 461 } 462 463 return(sql); 464} 465 466/* 467 * Compile a set of string tokens into an expression. 468 * Tokens in "argv" are assumed to be individual expression atoms (e.g., 469 * "(", "foo=bar", etc.). 470 */ 471static struct expr * 472exprcomp(const struct mansearch *search, int argc, char *argv[]) 473{ 474 int i, toopen, logic, igncase, toclose; 475 struct expr *first, *next, *cur; 476 477 first = cur = NULL; 478 logic = igncase = toclose = 0; 479 toopen = 1; 480 481 for (i = 0; i < argc; i++) { 482 if (0 == strcmp("(", argv[i])) { 483 if (igncase) 484 goto fail; 485 toopen++; 486 toclose++; 487 continue; 488 } else if (0 == strcmp(")", argv[i])) { 489 if (toopen || logic || igncase || NULL == cur) 490 goto fail; 491 cur->close++; 492 if (0 > --toclose) 493 goto fail; 494 continue; 495 } else if (0 == strcmp("-a", argv[i])) { 496 if (toopen || logic || igncase || NULL == cur) 497 goto fail; 498 logic = 1; 499 continue; 500 } else if (0 == strcmp("-o", argv[i])) { 501 if (toopen || logic || igncase || NULL == cur) 502 goto fail; 503 logic = 2; 504 continue; 505 } else if (0 == strcmp("-i", argv[i])) { 506 if (igncase) 507 goto fail; 508 igncase = 1; 509 continue; 510 } 511 next = exprterm(search, argv[i], !igncase); 512 if (NULL == next) 513 goto fail; 514 next->open = toopen; 515 next->and = (1 == logic); 516 if (NULL != first) { 517 cur->next = next; 518 cur = next; 519 } else 520 cur = first = next; 521 toopen = logic = igncase = 0; 522 } 523 if (toopen || logic || igncase || toclose) 524 goto fail; 525 526 cur->close++; 527 cur = exprspec(cur, TYPE_arch, search->arch, "^(%s|any)$"); 528 exprspec(cur, TYPE_sec, search->sec, "^%s$"); 529 530 return(first); 531 532fail: 533 if (NULL != first) 534 exprfree(first); 535 return(NULL); 536} 537 538static struct expr * 539exprspec(struct expr *cur, uint64_t key, const char *value, 540 const char *format) 541{ 542 char errbuf[BUFSIZ]; 543 char *cp; 544 int irc; 545 546 if (NULL == value) 547 return(cur); 548 549 if (-1 == asprintf(&cp, format, value)) { 550 perror(0); 551 exit((int)MANDOCLEVEL_SYSERR); 552 } 553 cur->next = mandoc_calloc(1, sizeof(struct expr)); 554 cur = cur->next; 555 cur->and = 1; 556 cur->bits = key; 557 if (0 != (irc = regcomp(&cur->regexp, cp, 558 REG_EXTENDED | REG_NOSUB | REG_ICASE))) { 559 regerror(irc, &cur->regexp, errbuf, sizeof(errbuf)); 560 fprintf(stderr, "regcomp: %s\n", errbuf); 561 cur->substr = value; 562 } 563 free(cp); 564 return(cur); 565} 566 567static struct expr * 568exprterm(const struct mansearch *search, char *buf, int cs) 569{ 570 char errbuf[BUFSIZ]; 571 struct expr *e; 572 char *key, *v; 573 uint64_t iterbit; 574 int i, irc; 575 576 if ('\0' == *buf) 577 return(NULL); 578 579 e = mandoc_calloc(1, sizeof(struct expr)); 580 581 /*"whatis" mode uses an opaque string and default fields. */ 582 583 if (MANSEARCH_WHATIS & search->flags) { 584 e->substr = buf; 585 e->bits = search->deftype; 586 return(e); 587 } 588 589 /* 590 * If no =~ is specified, search with equality over names and 591 * descriptions. 592 * If =~ begins the phrase, use name and description fields. 593 */ 594 595 if (NULL == (v = strpbrk(buf, "=~"))) { 596 e->substr = buf; 597 e->bits = search->deftype; 598 return(e); 599 } else if (v == buf) 600 e->bits = search->deftype; 601 602 if ('~' == *v++) { 603 if (NULL != strstr(buf, "arch")) 604 cs = 0; 605 if (0 != (irc = regcomp(&e->regexp, v, 606 REG_EXTENDED | REG_NOSUB | (cs ? 0 : REG_ICASE)))) { 607 regerror(irc, &e->regexp, errbuf, sizeof(errbuf)); 608 fprintf(stderr, "regcomp: %s\n", errbuf); 609 free(e); 610 return(NULL); 611 } 612 } else 613 e->substr = v; 614 v[-1] = '\0'; 615 616 /* 617 * Parse out all possible fields. 618 * If the field doesn't resolve, bail. 619 */ 620 621 while (NULL != (key = strsep(&buf, ","))) { 622 if ('\0' == *key) 623 continue; 624 for (i = 0, iterbit = 1; 625 i < mansearch_keymax; 626 i++, iterbit <<= 1) { 627 if (0 == strcasecmp(key, 628 mansearch_keynames[i])) { 629 e->bits |= iterbit; 630 break; 631 } 632 } 633 if (i == mansearch_keymax) { 634 if (strcasecmp(key, "any")) { 635 free(e); 636 return(NULL); 637 } 638 e->bits |= ~0ULL; 639 } 640 } 641 642 return(e); 643} 644 645static void 646exprfree(struct expr *p) 647{ 648 struct expr *pp; 649 650 while (NULL != p) { 651 pp = p->next; 652 free(p); 653 p = pp; 654 } 655} 656 657static void * 658hash_halloc(size_t sz, void *arg) 659{ 660 661 return(mandoc_calloc(sz, 1)); 662} 663 664static void * 665hash_alloc(size_t sz, void *arg) 666{ 667 668 return(mandoc_malloc(sz)); 669} 670 671static void 672hash_free(void *p, size_t sz, void *arg) 673{ 674 675 free(p); 676} 677