1/* $Vendor-Id: apropos_db.c,v 1.28 2011/12/25 14:58:39 schwarze Exp $ */ 2/* 3 * Copyright (c) 2011 Kristaps Dzonsons <kristaps@bsd.lv> 4 * Copyright (c) 2011 Ingo Schwarze <schwarze@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18#ifdef HAVE_CONFIG_H 19#include "config.h" 20#endif 21 22#include <assert.h> 23#include <fcntl.h> 24#include <regex.h> 25#include <stdarg.h> 26#include <stdint.h> 27#include <stdlib.h> 28#include <string.h> 29#include <unistd.h> 30 31#if defined(__linux__) 32# include <endian.h> 33# include <db_185.h> 34#elif defined(__APPLE__) 35# include <libkern/OSByteOrder.h> 36# include <db.h> 37#else 38# include <db.h> 39#endif 40 41#include "mandocdb.h" 42#include "apropos_db.h" 43#include "mandoc.h" 44 45struct rec { 46 struct res res; /* resulting record info */ 47 /* 48 * Maintain a binary tree for checking the uniqueness of `rec' 49 * when adding elements to the results array. 50 * Since the results array is dynamic, use offset in the array 51 * instead of a pointer to the structure. 52 */ 53 int lhs; 54 int rhs; 55 int matched; /* expression is true */ 56 int *matches; /* partial truth evaluations */ 57}; 58 59struct expr { 60 int regex; /* is regex? */ 61 int index; /* index in match array */ 62 uint64_t mask; /* type-mask */ 63 int and; /* is rhs of logical AND? */ 64 char *v; /* search value */ 65 regex_t re; /* compiled re, if regex */ 66 struct expr *next; /* next in sequence */ 67 struct expr *subexpr; 68}; 69 70struct type { 71 uint64_t mask; 72 const char *name; 73}; 74 75struct rectree { 76 struct rec *node; /* record array for dir tree */ 77 int len; /* length of record array */ 78}; 79 80static const struct type types[] = { 81 { TYPE_An, "An" }, 82 { TYPE_Ar, "Ar" }, 83 { TYPE_At, "At" }, 84 { TYPE_Bsx, "Bsx" }, 85 { TYPE_Bx, "Bx" }, 86 { TYPE_Cd, "Cd" }, 87 { TYPE_Cm, "Cm" }, 88 { TYPE_Dv, "Dv" }, 89 { TYPE_Dx, "Dx" }, 90 { TYPE_Em, "Em" }, 91 { TYPE_Er, "Er" }, 92 { TYPE_Ev, "Ev" }, 93 { TYPE_Fa, "Fa" }, 94 { TYPE_Fl, "Fl" }, 95 { TYPE_Fn, "Fn" }, 96 { TYPE_Fn, "Fo" }, 97 { TYPE_Ft, "Ft" }, 98 { TYPE_Fx, "Fx" }, 99 { TYPE_Ic, "Ic" }, 100 { TYPE_In, "In" }, 101 { TYPE_Lb, "Lb" }, 102 { TYPE_Li, "Li" }, 103 { TYPE_Lk, "Lk" }, 104 { TYPE_Ms, "Ms" }, 105 { TYPE_Mt, "Mt" }, 106 { TYPE_Nd, "Nd" }, 107 { TYPE_Nm, "Nm" }, 108 { TYPE_Nx, "Nx" }, 109 { TYPE_Ox, "Ox" }, 110 { TYPE_Pa, "Pa" }, 111 { TYPE_Rs, "Rs" }, 112 { TYPE_Sh, "Sh" }, 113 { TYPE_Ss, "Ss" }, 114 { TYPE_St, "St" }, 115 { TYPE_Sy, "Sy" }, 116 { TYPE_Tn, "Tn" }, 117 { TYPE_Va, "Va" }, 118 { TYPE_Va, "Vt" }, 119 { TYPE_Xr, "Xr" }, 120 { UINT64_MAX, "any" }, 121 { 0, NULL } 122}; 123 124static DB *btree_open(void); 125static int btree_read(const DBT *, const DBT *, 126 const struct mchars *, 127 uint64_t *, recno_t *, char **); 128static int expreval(const struct expr *, int *); 129static void exprexec(const struct expr *, 130 const char *, uint64_t, struct rec *); 131static int exprmark(const struct expr *, 132 const char *, uint64_t, int *); 133static struct expr *exprexpr(int, char *[], int *, int *, size_t *); 134static struct expr *exprterm(char *, int); 135static DB *index_open(void); 136static int index_read(const DBT *, const DBT *, int, 137 const struct mchars *, struct rec *); 138static void norm_string(const char *, 139 const struct mchars *, char **); 140static size_t norm_utf8(unsigned int, char[7]); 141static void recfree(struct rec *); 142static int single_search(struct rectree *, const struct opts *, 143 const struct expr *, size_t terms, 144 struct mchars *, int); 145 146/* 147 * Open the keyword mandoc-db database. 148 */ 149static DB * 150btree_open(void) 151{ 152 BTREEINFO info; 153 DB *db; 154 155 memset(&info, 0, sizeof(BTREEINFO)); 156 info.flags = R_DUP; 157 158 db = dbopen(MANDOC_DB, O_RDONLY, 0, DB_BTREE, &info); 159 if (NULL != db) 160 return(db); 161 162 return(NULL); 163} 164 165/* 166 * Read a keyword from the database and normalise it. 167 * Return 0 if the database is insane, else 1. 168 */ 169static int 170btree_read(const DBT *k, const DBT *v, const struct mchars *mc, 171 uint64_t *mask, recno_t *rec, char **buf) 172{ 173 uint64_t vbuf[2]; 174 175 /* Are our sizes sane? */ 176 if (k->size < 2 || sizeof(vbuf) != v->size) 177 return(0); 178 179 /* Is our string nil-terminated? */ 180 if ('\0' != ((const char *)k->data)[(int)k->size - 1]) 181 return(0); 182 183 norm_string((const char *)k->data, mc, buf); 184 memcpy(vbuf, v->data, v->size); 185 *mask = betoh64(vbuf[0]); 186 *rec = betoh64(vbuf[1]); 187 return(1); 188} 189 190/* 191 * Take a Unicode codepoint and produce its UTF-8 encoding. 192 * This isn't the best way to do this, but it works. 193 * The magic numbers are from the UTF-8 packaging. 194 * They're not as scary as they seem: read the UTF-8 spec for details. 195 */ 196static size_t 197norm_utf8(unsigned int cp, char out[7]) 198{ 199 int rc; 200 201 rc = 0; 202 203 if (cp <= 0x0000007F) { 204 rc = 1; 205 out[0] = (char)cp; 206 } else if (cp <= 0x000007FF) { 207 rc = 2; 208 out[0] = (cp >> 6 & 31) | 192; 209 out[1] = (cp & 63) | 128; 210 } else if (cp <= 0x0000FFFF) { 211 rc = 3; 212 out[0] = (cp >> 12 & 15) | 224; 213 out[1] = (cp >> 6 & 63) | 128; 214 out[2] = (cp & 63) | 128; 215 } else if (cp <= 0x001FFFFF) { 216 rc = 4; 217 out[0] = (cp >> 18 & 7) | 240; 218 out[1] = (cp >> 12 & 63) | 128; 219 out[2] = (cp >> 6 & 63) | 128; 220 out[3] = (cp & 63) | 128; 221 } else if (cp <= 0x03FFFFFF) { 222 rc = 5; 223 out[0] = (cp >> 24 & 3) | 248; 224 out[1] = (cp >> 18 & 63) | 128; 225 out[2] = (cp >> 12 & 63) | 128; 226 out[3] = (cp >> 6 & 63) | 128; 227 out[4] = (cp & 63) | 128; 228 } else if (cp <= 0x7FFFFFFF) { 229 rc = 6; 230 out[0] = (cp >> 30 & 1) | 252; 231 out[1] = (cp >> 24 & 63) | 128; 232 out[2] = (cp >> 18 & 63) | 128; 233 out[3] = (cp >> 12 & 63) | 128; 234 out[4] = (cp >> 6 & 63) | 128; 235 out[5] = (cp & 63) | 128; 236 } else 237 return(0); 238 239 out[rc] = '\0'; 240 return((size_t)rc); 241} 242 243/* 244 * Normalise strings from the index and database. 245 * These strings are escaped as defined by mandoc_char(7) along with 246 * other goop in mandoc.h (e.g., soft hyphens). 247 * This function normalises these into a nice UTF-8 string. 248 * Returns 0 if the database is fucked. 249 */ 250static void 251norm_string(const char *val, const struct mchars *mc, char **buf) 252{ 253 size_t sz, bsz; 254 char utfbuf[7]; 255 const char *seq, *cpp; 256 int len, u, pos; 257 enum mandoc_esc esc; 258 static const char res[] = { '\\', '\t', 259 ASCII_NBRSP, ASCII_HYPH, '\0' }; 260 261 /* Pre-allocate by the length of the input */ 262 263 bsz = strlen(val) + 1; 264 *buf = mandoc_realloc(*buf, bsz); 265 pos = 0; 266 267 while ('\0' != *val) { 268 /* 269 * Halt on the first escape sequence. 270 * This also halts on the end of string, in which case 271 * we just copy, fallthrough, and exit the loop. 272 */ 273 if ((sz = strcspn(val, res)) > 0) { 274 memcpy(&(*buf)[pos], val, sz); 275 pos += (int)sz; 276 val += (int)sz; 277 } 278 279 if (ASCII_HYPH == *val) { 280 (*buf)[pos++] = '-'; 281 val++; 282 continue; 283 } else if ('\t' == *val || ASCII_NBRSP == *val) { 284 (*buf)[pos++] = ' '; 285 val++; 286 continue; 287 } else if ('\\' != *val) 288 break; 289 290 /* Read past the slash. */ 291 292 val++; 293 u = 0; 294 295 /* 296 * Parse the escape sequence and see if it's a 297 * predefined character or special character. 298 */ 299 300 esc = mandoc_escape(&val, &seq, &len); 301 if (ESCAPE_ERROR == esc) 302 break; 303 304 /* 305 * XXX - this just does UTF-8, but we need to know 306 * beforehand whether we should do text substitution. 307 */ 308 309 switch (esc) { 310 case (ESCAPE_SPECIAL): 311 if (0 != (u = mchars_spec2cp(mc, seq, len))) 312 break; 313 /* FALLTHROUGH */ 314 default: 315 continue; 316 } 317 318 /* 319 * If we have a Unicode codepoint, try to convert that 320 * to a UTF-8 byte string. 321 */ 322 323 cpp = utfbuf; 324 if (0 == (sz = norm_utf8(u, utfbuf))) 325 continue; 326 327 /* Copy the rendered glyph into the stream. */ 328 329 sz = strlen(cpp); 330 bsz += sz; 331 332 *buf = mandoc_realloc(*buf, bsz); 333 334 memcpy(&(*buf)[pos], cpp, sz); 335 pos += (int)sz; 336 } 337 338 (*buf)[pos] = '\0'; 339} 340 341/* 342 * Open the filename-index mandoc-db database. 343 * Returns NULL if opening failed. 344 */ 345static DB * 346index_open(void) 347{ 348 DB *db; 349 350 db = dbopen(MANDOC_IDX, O_RDONLY, 0, DB_RECNO, NULL); 351 if (NULL != db) 352 return(db); 353 354 return(NULL); 355} 356 357/* 358 * Safely unpack from an index file record into the structure. 359 * Returns 1 if an entry was unpacked, 0 if the database is insane. 360 */ 361static int 362index_read(const DBT *key, const DBT *val, int index, 363 const struct mchars *mc, struct rec *rec) 364{ 365 size_t left; 366 char *np, *cp; 367 char type; 368 369#define INDEX_BREAD(_dst) \ 370 do { \ 371 if (NULL == (np = memchr(cp, '\0', left))) \ 372 return(0); \ 373 norm_string(cp, mc, &(_dst)); \ 374 left -= (np - cp) + 1; \ 375 cp = np + 1; \ 376 } while (/* CONSTCOND */ 0) 377 378 if (0 == (left = val->size)) 379 return(0); 380 381 cp = val->data; 382 assert(sizeof(recno_t) == key->size); 383 memcpy(&rec->res.rec, key->data, key->size); 384 rec->res.volume = index; 385 386 if ('d' == (type = *cp++)) 387 rec->res.type = RESTYPE_MDOC; 388 else if ('a' == type) 389 rec->res.type = RESTYPE_MAN; 390 else if ('c' == type) 391 rec->res.type = RESTYPE_CAT; 392 else 393 return(0); 394 395 left--; 396 INDEX_BREAD(rec->res.file); 397 INDEX_BREAD(rec->res.cat); 398 INDEX_BREAD(rec->res.title); 399 INDEX_BREAD(rec->res.arch); 400 INDEX_BREAD(rec->res.desc); 401 return(1); 402} 403 404/* 405 * Search mandocdb databases in paths for expression "expr". 406 * Filter out by "opts". 407 * Call "res" with the results, which may be zero. 408 * Return 0 if there was a database error, else return 1. 409 */ 410int 411apropos_search(int pathsz, char **paths, const struct opts *opts, 412 const struct expr *expr, size_t terms, void *arg, 413 void (*res)(struct res *, size_t, void *)) 414{ 415 struct rectree tree; 416 struct mchars *mc; 417 struct res *ress; 418 int i, mlen, rc; 419 420 memset(&tree, 0, sizeof(struct rectree)); 421 422 rc = 0; 423 mc = mchars_alloc(); 424 425 /* 426 * Main loop. Change into the directory containing manpage 427 * databases. Run our expession over each database in the set. 428 */ 429 430 for (i = 0; i < pathsz; i++) { 431 if (chdir(paths[i])) 432 continue; 433 if ( ! single_search(&tree, opts, expr, terms, mc, i)) 434 goto out; 435 } 436 437 /* 438 * Count matching files, transfer to a "clean" array, then feed 439 * them to the output handler. 440 */ 441 442 for (mlen = i = 0; i < tree.len; i++) 443 if (tree.node[i].matched) 444 mlen++; 445 446 ress = mandoc_malloc(mlen * sizeof(struct res)); 447 448 for (mlen = i = 0; i < tree.len; i++) 449 if (tree.node[i].matched) 450 memcpy(&ress[mlen++], &tree.node[i].res, 451 sizeof(struct res)); 452 453 (*res)(ress, mlen, arg); 454 free(ress); 455 456 rc = 1; 457out: 458 for (i = 0; i < tree.len; i++) 459 recfree(&tree.node[i]); 460 461 free(tree.node); 462 mchars_free(mc); 463 return(rc); 464} 465 466static int 467single_search(struct rectree *tree, const struct opts *opts, 468 const struct expr *expr, size_t terms, 469 struct mchars *mc, int vol) 470{ 471 int root, leaf, ch; 472 DBT key, val; 473 DB *btree, *idx; 474 char *buf; 475 struct rec *rs; 476 struct rec r; 477 uint64_t mask; 478 recno_t rec; 479 480 root = -1; 481 leaf = -1; 482 btree = NULL; 483 idx = NULL; 484 buf = NULL; 485 rs = tree->node; 486 487 memset(&r, 0, sizeof(struct rec)); 488 489 if (NULL == (btree = btree_open())) 490 return(1); 491 492 if (NULL == (idx = index_open())) { 493 (*btree->close)(btree); 494 return(1); 495 } 496 497 while (0 == (ch = (*btree->seq)(btree, &key, &val, R_NEXT))) { 498 if ( ! btree_read(&key, &val, mc, &mask, &rec, &buf)) 499 break; 500 501 /* 502 * See if this keyword record matches any of the 503 * expressions we have stored. 504 */ 505 if ( ! exprmark(expr, buf, mask, NULL)) 506 continue; 507 508 /* 509 * O(log n) scan for prior records. Since a record 510 * number is unbounded, this has decent performance over 511 * a complex hash function. 512 */ 513 514 for (leaf = root; leaf >= 0; ) 515 if (rec > rs[leaf].res.rec && 516 rs[leaf].rhs >= 0) 517 leaf = rs[leaf].rhs; 518 else if (rec < rs[leaf].res.rec && 519 rs[leaf].lhs >= 0) 520 leaf = rs[leaf].lhs; 521 else 522 break; 523 524 /* 525 * If we find a record, see if it has already evaluated 526 * to true. If it has, great, just keep going. If not, 527 * try to evaluate it now and continue anyway. 528 */ 529 530 if (leaf >= 0 && rs[leaf].res.rec == rec) { 531 if (0 == rs[leaf].matched) 532 exprexec(expr, buf, mask, &rs[leaf]); 533 continue; 534 } 535 536 /* 537 * We have a new file to examine. 538 * Extract the manpage's metadata from the index 539 * database, then begin partial evaluation. 540 */ 541 542 key.data = &rec; 543 key.size = sizeof(recno_t); 544 545 if (0 != (*idx->get)(idx, &key, &val, 0)) 546 break; 547 548 r.lhs = r.rhs = -1; 549 if ( ! index_read(&key, &val, vol, mc, &r)) 550 break; 551 552 /* XXX: this should be elsewhere, I guess? */ 553 554 if (opts->cat && strcasecmp(opts->cat, r.res.cat)) 555 continue; 556 557 if (opts->arch && *r.res.arch) 558 if (strcasecmp(opts->arch, r.res.arch)) 559 continue; 560 561 tree->node = rs = mandoc_realloc 562 (rs, (tree->len + 1) * sizeof(struct rec)); 563 564 memcpy(&rs[tree->len], &r, sizeof(struct rec)); 565 memset(&r, 0, sizeof(struct rec)); 566 rs[tree->len].matches = 567 mandoc_calloc(terms, sizeof(int)); 568 569 exprexec(expr, buf, mask, &rs[tree->len]); 570 571 /* Append to our tree. */ 572 573 if (leaf >= 0) { 574 if (rec > rs[leaf].res.rec) 575 rs[leaf].rhs = tree->len; 576 else 577 rs[leaf].lhs = tree->len; 578 } else 579 root = tree->len; 580 581 tree->len++; 582 } 583 584 (*btree->close)(btree); 585 (*idx->close)(idx); 586 587 free(buf); 588 recfree(&r); 589 return(1 == ch); 590} 591 592static void 593recfree(struct rec *rec) 594{ 595 596 free(rec->res.file); 597 free(rec->res.cat); 598 free(rec->res.title); 599 free(rec->res.arch); 600 free(rec->res.desc); 601 602 free(rec->matches); 603} 604 605/* 606 * Compile a list of straight-up terms. 607 * The arguments are re-written into ~[[:<:]]term[[:>:]], or "term" 608 * surrounded by word boundaries, then pumped through exprterm(). 609 * Terms are case-insensitive. 610 * This emulates whatis(1) behaviour. 611 */ 612struct expr * 613termcomp(int argc, char *argv[], size_t *tt) 614{ 615 char *buf; 616 int pos; 617 struct expr *e, *next; 618 size_t sz; 619 620 buf = NULL; 621 e = NULL; 622 *tt = 0; 623 624 for (pos = argc - 1; pos >= 0; pos--) { 625 sz = strlen(argv[pos]) + 18; 626 buf = mandoc_realloc(buf, sz); 627 strlcpy(buf, "Nm~[[:<:]]", sz); 628 strlcat(buf, argv[pos], sz); 629 strlcat(buf, "[[:>:]]", sz); 630 if (NULL == (next = exprterm(buf, 0))) { 631 free(buf); 632 exprfree(e); 633 return(NULL); 634 } 635 next->next = e; 636 e = next; 637 (*tt)++; 638 } 639 640 free(buf); 641 return(e); 642} 643 644/* 645 * Compile a sequence of logical expressions. 646 * See apropos.1 for a grammar of this sequence. 647 */ 648struct expr * 649exprcomp(int argc, char *argv[], size_t *tt) 650{ 651 int pos, lvl; 652 struct expr *e; 653 654 pos = lvl = 0; 655 *tt = 0; 656 657 e = exprexpr(argc, argv, &pos, &lvl, tt); 658 659 if (0 == lvl && pos >= argc) 660 return(e); 661 662 exprfree(e); 663 return(NULL); 664} 665 666/* 667 * Compile an array of tokens into an expression. 668 * An informal expression grammar is defined in apropos(1). 669 * Return NULL if we fail doing so. All memory will be cleaned up. 670 * Return the root of the expression sequence if alright. 671 */ 672static struct expr * 673exprexpr(int argc, char *argv[], int *pos, int *lvl, size_t *tt) 674{ 675 struct expr *e, *first, *next; 676 int log; 677 678 first = next = NULL; 679 680 for ( ; *pos < argc; (*pos)++) { 681 e = next; 682 683 /* 684 * Close out a subexpression. 685 */ 686 687 if (NULL != e && 0 == strcmp(")", argv[*pos])) { 688 if (--(*lvl) < 0) 689 goto err; 690 break; 691 } 692 693 /* 694 * Small note: if we're just starting, don't let "-a" 695 * and "-o" be considered logical operators: they're 696 * just tokens unless pairwise joining, in which case we 697 * record their existence (or assume "OR"). 698 */ 699 log = 0; 700 701 if (NULL != e && 0 == strcmp("-a", argv[*pos])) 702 log = 1; 703 else if (NULL != e && 0 == strcmp("-o", argv[*pos])) 704 log = 2; 705 706 if (log > 0 && ++(*pos) >= argc) 707 goto err; 708 709 /* 710 * Now we parse the term part. This can begin with 711 * "-i", in which case the expression is case 712 * insensitive. 713 */ 714 715 if (0 == strcmp("(", argv[*pos])) { 716 ++(*pos); 717 ++(*lvl); 718 next = mandoc_calloc(1, sizeof(struct expr)); 719 next->subexpr = exprexpr(argc, argv, pos, lvl, tt); 720 if (NULL == next->subexpr) { 721 free(next); 722 next = NULL; 723 } 724 } else if (0 == strcmp("-i", argv[*pos])) { 725 if (++(*pos) >= argc) 726 goto err; 727 next = exprterm(argv[*pos], 0); 728 } else 729 next = exprterm(argv[*pos], 1); 730 731 if (NULL == next) 732 goto err; 733 734 next->and = log == 1; 735 next->index = (int)(*tt)++; 736 737 /* Append to our chain of expressions. */ 738 739 if (NULL == first) { 740 assert(NULL == e); 741 first = next; 742 } else { 743 assert(NULL != e); 744 e->next = next; 745 } 746 } 747 748 return(first); 749err: 750 exprfree(first); 751 return(NULL); 752} 753 754/* 755 * Parse a terminal expression with the grammar as defined in 756 * apropos(1). 757 * Return NULL if we fail the parse. 758 */ 759static struct expr * 760exprterm(char *buf, int cs) 761{ 762 struct expr e; 763 struct expr *p; 764 char *key; 765 int i; 766 767 memset(&e, 0, sizeof(struct expr)); 768 769 /* Choose regex or substring match. */ 770 771 if (NULL == (e.v = strpbrk(buf, "=~"))) { 772 e.regex = 0; 773 e.v = buf; 774 } else { 775 e.regex = '~' == *e.v; 776 *e.v++ = '\0'; 777 } 778 779 /* Determine the record types to search for. */ 780 781 e.mask = 0; 782 if (buf < e.v) { 783 while (NULL != (key = strsep(&buf, ","))) { 784 i = 0; 785 while (types[i].mask && 786 strcmp(types[i].name, key)) 787 i++; 788 e.mask |= types[i].mask; 789 } 790 } 791 if (0 == e.mask) 792 e.mask = TYPE_Nm | TYPE_Nd; 793 794 if (e.regex) { 795 i = REG_EXTENDED | REG_NOSUB | (cs ? 0 : REG_ICASE); 796 if (regcomp(&e.re, e.v, i)) 797 return(NULL); 798 } 799 800 e.v = mandoc_strdup(e.v); 801 802 p = mandoc_calloc(1, sizeof(struct expr)); 803 memcpy(p, &e, sizeof(struct expr)); 804 return(p); 805} 806 807void 808exprfree(struct expr *p) 809{ 810 struct expr *pp; 811 812 while (NULL != p) { 813 if (p->subexpr) 814 exprfree(p->subexpr); 815 if (p->regex) 816 regfree(&p->re); 817 free(p->v); 818 pp = p->next; 819 free(p); 820 p = pp; 821 } 822} 823 824static int 825exprmark(const struct expr *p, const char *cp, 826 uint64_t mask, int *ms) 827{ 828 829 for ( ; p; p = p->next) { 830 if (p->subexpr) { 831 if (exprmark(p->subexpr, cp, mask, ms)) 832 return(1); 833 continue; 834 } else if ( ! (mask & p->mask)) 835 continue; 836 837 if (p->regex) { 838 if (regexec(&p->re, cp, 0, NULL, 0)) 839 continue; 840 } else if (NULL == strcasestr(cp, p->v)) 841 continue; 842 843 if (NULL == ms) 844 return(1); 845 else 846 ms[p->index] = 1; 847 } 848 849 return(0); 850} 851 852static int 853expreval(const struct expr *p, int *ms) 854{ 855 int match; 856 857 /* 858 * AND has precedence over OR. Analysis is left-right, though 859 * it doesn't matter because there are no side-effects. 860 * Thus, step through pairwise ANDs and accumulate their Boolean 861 * evaluation. If we encounter a single true AND collection or 862 * standalone term, the whole expression is true (by definition 863 * of OR). 864 */ 865 866 for (match = 0; p && ! match; p = p->next) { 867 /* Evaluate a subexpression, if applicable. */ 868 if (p->subexpr && ! ms[p->index]) 869 ms[p->index] = expreval(p->subexpr, ms); 870 871 match = ms[p->index]; 872 for ( ; p->next && p->next->and; p = p->next) { 873 /* Evaluate a subexpression, if applicable. */ 874 if (p->next->subexpr && ! ms[p->next->index]) 875 ms[p->next->index] = 876 expreval(p->next->subexpr, ms); 877 match = match && ms[p->next->index]; 878 } 879 } 880 881 return(match); 882} 883 884/* 885 * First, update the array of terms for which this expression evaluates 886 * to true. 887 * Second, logically evaluate all terms over the updated array of truth 888 * values. 889 * If this evaluates to true, mark the expression as satisfied. 890 */ 891static void 892exprexec(const struct expr *e, const char *cp, 893 uint64_t mask, struct rec *r) 894{ 895 896 assert(0 == r->matched); 897 exprmark(e, cp, mask, r->matches); 898 r->matched = expreval(e, r->matches); 899} 900