makemandb.c revision 1.1
1/* $NetBSD */ 2/* 3 * Copyright (c) 2011 Abhinav Upadhyay <er.abhinav.upadhyay@gmail.com> 4 * Copyright (c) 2011 Kristaps Dzonsons <kristaps@bsd.lv> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 19#include <sys/cdefs.h> 20__RCSID("$NetBSD: makemandb.c,v 1.1 2012/02/07 19:13:32 joerg Exp $"); 21 22#include <sys/stat.h> 23#include <sys/types.h> 24 25#include <assert.h> 26#include <ctype.h> 27#include <dirent.h> 28#include <err.h> 29#include <md5.h> 30#include <stdio.h> 31#include <stdlib.h> 32#include <string.h> 33#include <unistd.h> 34#include <util.h> 35 36#include "apropos-utils.h" 37#include "man.h" 38#include "mandoc.h" 39#include "mdoc.h" 40#include "sqlite3.h" 41 42#define BUFLEN 1024 43#define MDOC 0 //If the page is of mdoc(7) type 44#define MAN 1 //If the page is of man(7) type 45 46/* 47 * A data structure for holding section specific data. 48 */ 49typedef struct secbuff { 50 char *data; 51 size_t buflen; //Total length of buffer allocated initially 52 size_t offset; // Current offset in the buffer. 53} secbuff; 54 55typedef struct makemandb_flags { 56 int optimize; 57 int limit; // limit the indexing to only NAME section 58 int recreate; // Database was created from scratch 59 int verbosity; // 0: quiet, 1: default, 2: verbose 60} makemandb_flags; 61 62typedef struct mandb_rec { 63 /* Fields for mandb table */ 64 char *name; // for storing the name of the man page 65 char *name_desc; // for storing the one line description (.Nd) 66 secbuff desc; // for storing the DESCRIPTION section 67 secbuff lib; // for the LIBRARY section 68 secbuff return_vals; // RETURN VALUES 69 secbuff env; // ENVIRONMENT 70 secbuff files; // FILES 71 secbuff exit_status; // EXIT STATUS 72 secbuff diagnostics; // DIAGNOSTICS 73 secbuff errors; // ERRORS 74 char section[2]; 75 76 int xr_found; 77 78 /* Fields for mandb_meta table */ 79 char *md5_hash; 80 dev_t device; 81 ino_t inode; 82 time_t mtime; 83 84 /* Fields for mandb_links table */ 85 char *machine; 86 char *links; //all the links to a page in a space separated form 87 char *file_path; 88 89 /* Non-db fields */ 90 int page_type; //Indicates the type of page: mdoc or man 91} mandb_rec; 92 93static void append(secbuff *sbuff, const char *src); 94static void init_secbuffs(mandb_rec *); 95static void free_secbuffs(mandb_rec *); 96static int check_md5(const char *, sqlite3 *, const char *, char **); 97static void cleanup(mandb_rec *); 98static void set_section(const struct mdoc *, const struct man *, mandb_rec *); 99static void set_machine(const struct mdoc *, mandb_rec *); 100static int insert_into_db(sqlite3 *, mandb_rec *); 101static void begin_parse(const char *, struct mparse *, mandb_rec *); 102static void pmdoc_node(const struct mdoc_node *, mandb_rec *); 103static void pmdoc_Nm(const struct mdoc_node *, mandb_rec *); 104static void pmdoc_Nd(const struct mdoc_node *, mandb_rec *); 105static void pmdoc_Sh(const struct mdoc_node *, mandb_rec *); 106static void pmdoc_Xr(const struct mdoc_node *, mandb_rec *); 107static void pmdoc_Pp(const struct mdoc_node *, mandb_rec *); 108static void pmdoc_macro_handler(const struct mdoc_node *, mandb_rec *, 109 enum mdoct); 110static void pman_node(const struct man_node *n, mandb_rec *); 111static void pman_parse_node(const struct man_node *, secbuff *); 112static void pman_parse_name(const struct man_node *, mandb_rec *); 113static void pman_sh(const struct man_node *, mandb_rec *); 114static void pman_block(const struct man_node *, mandb_rec *); 115static void traversedir(const char *, sqlite3 *, struct mparse *); 116static void mdoc_parse_section(enum mdoc_sec, const char *, mandb_rec *); 117static void man_parse_section(enum man_sec, const struct man_node *, mandb_rec *); 118static void build_file_cache(sqlite3 *, const char *, struct stat *); 119static void update_db(sqlite3 *, struct mparse *, mandb_rec *); 120__dead static void usage(void); 121static void optimize(sqlite3 *); 122static char *parse_escape(const char *); 123static makemandb_flags mflags = { .verbosity = 1 }; 124 125typedef void (*pman_nf)(const struct man_node *n, mandb_rec *); 126typedef void (*pmdoc_nf)(const struct mdoc_node *n, mandb_rec *); 127static const pmdoc_nf mdocs[MDOC_MAX] = { 128 NULL, /* Ap */ 129 NULL, /* Dd */ 130 NULL, /* Dt */ 131 NULL, /* Os */ 132 pmdoc_Sh, /* Sh */ 133 NULL, /* Ss */ 134 pmdoc_Pp, /* Pp */ 135 NULL, /* D1 */ 136 NULL, /* Dl */ 137 NULL, /* Bd */ 138 NULL, /* Ed */ 139 NULL, /* Bl */ 140 NULL, /* El */ 141 NULL, /* It */ 142 NULL, /* Ad */ 143 NULL, /* An */ 144 NULL, /* Ar */ 145 NULL, /* Cd */ 146 NULL, /* Cm */ 147 NULL, /* Dv */ 148 NULL, /* Er */ 149 NULL, /* Ev */ 150 NULL, /* Ex */ 151 NULL, /* Fa */ 152 NULL, /* Fd */ 153 NULL, /* Fl */ 154 NULL, /* Fn */ 155 NULL, /* Ft */ 156 NULL, /* Ic */ 157 NULL, /* In */ 158 NULL, /* Li */ 159 pmdoc_Nd, /* Nd */ 160 pmdoc_Nm, /* Nm */ 161 NULL, /* Op */ 162 NULL, /* Ot */ 163 NULL, /* Pa */ 164 NULL, /* Rv */ 165 NULL, /* St */ 166 NULL, /* Va */ 167 NULL, /* Vt */ 168 pmdoc_Xr, /* Xr */ 169 NULL, /* %A */ 170 NULL, /* %B */ 171 NULL, /* %D */ 172 NULL, /* %I */ 173 NULL, /* %J */ 174 NULL, /* %N */ 175 NULL, /* %O */ 176 NULL, /* %P */ 177 NULL, /* %R */ 178 NULL, /* %T */ 179 NULL, /* %V */ 180 NULL, /* Ac */ 181 NULL, /* Ao */ 182 NULL, /* Aq */ 183 NULL, /* At */ 184 NULL, /* Bc */ 185 NULL, /* Bf */ 186 NULL, /* Bo */ 187 NULL, /* Bq */ 188 NULL, /* Bsx */ 189 NULL, /* Bx */ 190 NULL, /* Db */ 191 NULL, /* Dc */ 192 NULL, /* Do */ 193 NULL, /* Dq */ 194 NULL, /* Ec */ 195 NULL, /* Ef */ 196 NULL, /* Em */ 197 NULL, /* Eo */ 198 NULL, /* Fx */ 199 NULL, /* Ms */ 200 NULL, /* No */ 201 NULL, /* Ns */ 202 NULL, /* Nx */ 203 NULL, /* Ox */ 204 NULL, /* Pc */ 205 NULL, /* Pf */ 206 NULL, /* Po */ 207 NULL, /* Pq */ 208 NULL, /* Qc */ 209 NULL, /* Ql */ 210 NULL, /* Qo */ 211 NULL, /* Qq */ 212 NULL, /* Re */ 213 NULL, /* Rs */ 214 NULL, /* Sc */ 215 NULL, /* So */ 216 NULL, /* Sq */ 217 NULL, /* Sm */ 218 NULL, /* Sx */ 219 NULL, /* Sy */ 220 NULL, /* Tn */ 221 NULL, /* Ux */ 222 NULL, /* Xc */ 223 NULL, /* Xo */ 224 NULL, /* Fo */ 225 NULL, /* Fc */ 226 NULL, /* Oo */ 227 NULL, /* Oc */ 228 NULL, /* Bk */ 229 NULL, /* Ek */ 230 NULL, /* Bt */ 231 NULL, /* Hf */ 232 NULL, /* Fr */ 233 NULL, /* Ud */ 234 NULL, /* Lb */ 235 NULL, /* Lp */ 236 NULL, /* Lk */ 237 NULL, /* Mt */ 238 NULL, /* Brq */ 239 NULL, /* Bro */ 240 NULL, /* Brc */ 241 NULL, /* %C */ 242 NULL, /* Es */ 243 NULL, /* En */ 244 NULL, /* Dx */ 245 NULL, /* %Q */ 246 NULL, /* br */ 247 NULL, /* sp */ 248 NULL, /* %U */ 249 NULL, /* Ta */ 250}; 251 252static const pman_nf mans[MAN_MAX] = { 253 NULL, //br 254 NULL, //TH 255 pman_sh, //SH 256 NULL, //SS 257 NULL, //TP 258 NULL, //LP 259 NULL, //PP 260 NULL, //P 261 NULL, //IP 262 NULL, //HP 263 NULL, //SM 264 NULL, //SB 265 NULL, //BI 266 NULL, //IB 267 NULL, //BR 268 NULL, //RB 269 NULL, //R 270 pman_block, //B 271 NULL, //I 272 NULL, //IR 273 NULL, //RI 274 NULL, //na 275 NULL, //sp 276 NULL, //nf 277 NULL, //fi 278 NULL, //RE 279 NULL, //RS 280 NULL, //DT 281 NULL, //UC 282 NULL, //PD 283 NULL, //AT 284 NULL, //in 285 NULL, //ft 286}; 287 288 289int 290main(int argc, char *argv[]) 291{ 292 FILE *file; 293 const char *sqlstr, *manconf = NULL; 294 char *line, *command; 295 char *errmsg; 296 int ch; 297 struct mparse *mp; 298 sqlite3 *db; 299 ssize_t len; 300 size_t linesize; 301 struct mandb_rec rec; 302 303 while ((ch = getopt(argc, argv, "C:floqv")) != -1) { 304 switch (ch) { 305 case 'C': 306 manconf = optarg; 307 break; 308 case 'f': 309 remove(DBPATH); 310 mflags.recreate = 1; 311 break; 312 case 'l': 313 mflags.limit = 1; 314 break; 315 case 'o': 316 mflags.optimize = 1; 317 break; 318 case 'q': 319 mflags.verbosity = 0; 320 break; 321 case 'v': 322 mflags.verbosity = 2; 323 break; 324 default: 325 usage(); 326 } 327 } 328 329 memset(&rec, 0, sizeof(rec)); 330 331 init_secbuffs(&rec); 332 mp = mparse_alloc(MPARSE_AUTO, MANDOCLEVEL_FATAL, NULL, NULL); 333 334 if ((db = init_db(MANDB_CREATE)) == NULL) 335 exit(EXIT_FAILURE); 336 337 sqlite3_exec(db, "PRAGMA synchronous = 0", NULL, NULL, &errmsg); 338 if (errmsg != NULL) { 339 warnx("%s", errmsg); 340 free(errmsg); 341 close_db(db); 342 exit(EXIT_FAILURE); 343 } 344 345 sqlite3_exec(db, "ATTACH DATABASE \':memory:\' AS metadb", NULL, NULL, 346 &errmsg); 347 if (errmsg != NULL) { 348 warnx("%s", errmsg); 349 free(errmsg); 350 close_db(db); 351 exit(EXIT_FAILURE); 352 } 353 354 if (manconf) { 355 char *arg; 356 size_t command_len = shquote(manconf, NULL, 0) + 1; 357 arg = malloc(command_len ); 358 shquote(manconf, arg, command_len); 359 easprintf(&command, "man -p -C %s", arg); 360 free(arg); 361 } else { 362 command = estrdup("man -p"); 363 } 364 365 /* Call man -p to get the list of man page dirs */ 366 if ((file = popen(command, "r")) == NULL) { 367 close_db(db); 368 err(EXIT_FAILURE, "fopen failed"); 369 } 370 free(command); 371 372 /* Begin the transaction for indexing the pages */ 373 sqlite3_exec(db, "BEGIN", NULL, NULL, &errmsg); 374 if (errmsg != NULL) { 375 warnx("%s", errmsg); 376 free(errmsg); 377 exit(EXIT_FAILURE); 378 } 379 380 sqlstr = "CREATE TABLE IF NOT EXISTS metadb.file_cache(device, inode," 381 " mtime, file PRIMARY KEY);" 382 "CREATE UNIQUE INDEX IF NOT EXISTS metadb.index_file_cache_dev" 383 " ON file_cache (device, inode)"; 384 385 sqlite3_exec(db, sqlstr, NULL, NULL, &errmsg); 386 if (errmsg != NULL) { 387 warnx("%s", errmsg); 388 free(errmsg); 389 close_db(db); 390 exit(EXIT_FAILURE); 391 } 392 393 if (mflags.verbosity) 394 printf("Building temporary file cache\n"); 395 line = NULL; 396 linesize = 0; 397 while ((len = getline(&line, &linesize, file)) != -1) { 398 /* Replace the new line character at the end of string with '\0' */ 399 line[len - 1] = '\0'; 400 /* Traverse the man page directories and parse the pages */ 401 traversedir(line, db, mp); 402 } 403 free(line); 404 405 if (pclose(file) == -1) { 406 close_db(db); 407 cleanup(&rec); 408 free_secbuffs(&rec); 409 err(EXIT_FAILURE, "pclose error"); 410 } 411 412 update_db(db, mp, &rec); 413 mparse_free(mp); 414 free_secbuffs(&rec); 415 416 /* Commit the transaction */ 417 sqlite3_exec(db, "COMMIT", NULL, NULL, &errmsg); 418 if (errmsg != NULL) { 419 warnx("%s", errmsg); 420 free(errmsg); 421 exit(EXIT_FAILURE); 422 } 423 424 if (mflags.optimize) 425 optimize(db); 426 427 close_db(db); 428 return 0; 429} 430 431/* 432 * traversedir -- 433 * Traverses the given directory recursively and passes all the man page files 434 * in the way to build_file_cache() 435 */ 436static void 437traversedir(const char *file, sqlite3 *db, struct mparse *mp) 438{ 439 struct stat sb; 440 struct dirent *dirp; 441 DIR *dp; 442 char *buf; 443 444 if (stat(file, &sb) < 0) { 445 warn("stat failed: %s", file); 446 return; 447 } 448 449 /* If it is a regular file or a symlink, pass it to build_cache() */ 450 if (S_ISREG(sb.st_mode) || S_ISLNK(sb.st_mode)) { 451 build_file_cache(db, file, &sb); 452 return; 453 } 454 455 /* If it is a directory, traverse it recursively */ 456 if (S_ISDIR(sb.st_mode)) { 457 if ((dp = opendir(file)) == NULL) { 458 warn("opendir error: %s", file); 459 return; 460 } 461 462 while ((dirp = readdir(dp)) != NULL) { 463 /* Avoid . and .. entries in a directory */ 464 if (strncmp(dirp->d_name, ".", 1)) { 465 easprintf(&buf, "%s/%s", file, dirp->d_name); 466 traversedir(buf, db, mp); 467 free(buf); 468 } 469 } 470 closedir(dp); 471 } 472} 473 474/* build_file_cache -- 475 * This function generates an md5 hash of the file passed as it's 2nd parameter 476 * and stores it in a temporary table file_cache along with the full file path. 477 * This is done to support incremental updation of the database. 478 * The temporary table file_cache is dropped thereafter in the function 479 * update_db(), once the database has been updated. 480 */ 481static void 482build_file_cache(sqlite3 *db, const char *file, struct stat *sb) 483{ 484 const char *sqlstr; 485 sqlite3_stmt *stmt = NULL; 486 int rc, idx; 487 assert(file != NULL); 488 dev_t device_cache = sb->st_dev; 489 ino_t inode_cache = sb->st_ino; 490 time_t mtime_cache = sb->st_mtime; 491 492 sqlstr = "INSERT INTO metadb.file_cache VALUES (:device, :inode," 493 " :mtime, :file)"; 494 rc = sqlite3_prepare_v2(db, sqlstr, -1, &stmt, NULL); 495 if (rc != SQLITE_OK) { 496 warnx("%s", sqlite3_errmsg(db)); 497 return; 498 } 499 500 idx = sqlite3_bind_parameter_index(stmt, ":device"); 501 rc = sqlite3_bind_int64(stmt, idx, device_cache); 502 if (rc != SQLITE_OK) { 503 warnx("%s", sqlite3_errmsg(db)); 504 sqlite3_finalize(stmt); 505 return; 506 } 507 508 idx = sqlite3_bind_parameter_index(stmt, ":inode"); 509 rc = sqlite3_bind_int64(stmt, idx, inode_cache); 510 if (rc != SQLITE_OK) { 511 warnx("%s", sqlite3_errmsg(db)); 512 sqlite3_finalize(stmt); 513 return; 514 } 515 516 idx = sqlite3_bind_parameter_index(stmt, ":mtime"); 517 rc = sqlite3_bind_int64(stmt, idx, mtime_cache); 518 if (rc != SQLITE_OK) { 519 warnx("%s", sqlite3_errmsg(db)); 520 sqlite3_finalize(stmt); 521 return; 522 } 523 524 idx = sqlite3_bind_parameter_index(stmt, ":file"); 525 rc = sqlite3_bind_text(stmt, idx, file, -1, NULL); 526 if (rc != SQLITE_OK) { 527 warnx("%s", sqlite3_errmsg(db)); 528 sqlite3_finalize(stmt); 529 return; 530 } 531 532 sqlite3_step(stmt); 533 sqlite3_finalize(stmt); 534} 535 536static void 537update_existing_entry(sqlite3 *db, const char *file, const char *hash, 538 mandb_rec *rec, int *new_count, int *link_count, int *err_count) 539{ 540 int update_count, rc, idx; 541 const char *inner_sqlstr; 542 sqlite3_stmt *inner_stmt; 543 544 update_count = sqlite3_total_changes(db); 545 inner_sqlstr = "UPDATE mandb_meta SET device = :device," 546 " inode = :inode, mtime = :mtime WHERE" 547 " md5_hash = :md5 AND file = :file AND" 548 " (device <> :device2 OR inode <> " 549 " :inode2 OR mtime <> :mtime2)"; 550 rc = sqlite3_prepare_v2(db, inner_sqlstr, -1, &inner_stmt, NULL); 551 if (rc != SQLITE_OK) { 552 warnx("%s", sqlite3_errmsg(db)); 553 return; 554 } 555 idx = sqlite3_bind_parameter_index(inner_stmt, ":device"); 556 sqlite3_bind_int64(inner_stmt, idx, rec->device); 557 idx = sqlite3_bind_parameter_index(inner_stmt, ":inode"); 558 sqlite3_bind_int64(inner_stmt, idx, rec->inode); 559 idx = sqlite3_bind_parameter_index(inner_stmt, ":mtime"); 560 sqlite3_bind_int64(inner_stmt, idx, rec->mtime); 561 idx = sqlite3_bind_parameter_index(inner_stmt, ":md5"); 562 sqlite3_bind_text(inner_stmt, idx, hash, -1, NULL); 563 idx = sqlite3_bind_parameter_index(inner_stmt, ":file"); 564 sqlite3_bind_text(inner_stmt, idx, file, -1, NULL); 565 idx = sqlite3_bind_parameter_index(inner_stmt, ":device2"); 566 sqlite3_bind_int64(inner_stmt, idx, rec->device); 567 idx = sqlite3_bind_parameter_index(inner_stmt, ":inode2"); 568 sqlite3_bind_int64(inner_stmt, idx, rec->inode); 569 idx = sqlite3_bind_parameter_index(inner_stmt, ":mtime2"); 570 sqlite3_bind_int64(inner_stmt, idx, rec->mtime); 571 572 rc = sqlite3_step(inner_stmt); 573 if (rc == SQLITE_DONE) { 574 /* Check if an update has been performed. */ 575 if (update_count != sqlite3_total_changes(db)) { 576 if (mflags.verbosity) 577 printf("Updated %s\n", file); 578 (*new_count)++; 579 } else { 580 /* Otherwise it was a hardlink. */ 581 (*link_count)++; 582 } 583 } else { 584 warnx("Could not update the meta data for %s", file); 585 (*err_count)++; 586 } 587 sqlite3_finalize(inner_stmt); 588} 589 590/* update_db -- 591 * Does an incremental updation of the database by checking the file_cache. 592 * It parses and adds the pages which are present in file_cache, 593 * but not in the database. 594 * It also removes the pages which are present in the databse, 595 * but not in the file_cache. 596 */ 597static void 598update_db(sqlite3 *db, struct mparse *mp, mandb_rec *rec) 599{ 600 const char *sqlstr; 601 sqlite3_stmt *stmt = NULL; 602 const char *file; 603 char *errmsg = NULL; 604 char *buf = NULL; 605 int new_count = 0; /* Counter for newly indexed/updated pages */ 606 int total_count = 0; /* Counter for total number of pages */ 607 int err_count = 0; /* Counter for number of failed pages */ 608 int link_count = 0; /* Counter for number of hard/sym links */ 609 int md5_status; 610 int rc; 611 612 sqlstr = "SELECT device, inode, mtime, file FROM metadb.file_cache" 613 " EXCEPT SELECT device, inode, mtime, file from mandb_meta"; 614 615 rc = sqlite3_prepare_v2(db, sqlstr, -1, &stmt, NULL); 616 if (rc != SQLITE_OK) { 617 warnx("%s", sqlite3_errmsg(db)); 618 close_db(db); 619 errx(EXIT_FAILURE, "Could not query file cache"); 620 } 621 622 while (sqlite3_step(stmt) == SQLITE_ROW) { 623 total_count++; 624 rec->device = sqlite3_column_int64(stmt, 0); 625 rec->inode = sqlite3_column_int64(stmt, 1); 626 rec->mtime = sqlite3_column_int64(stmt, 2); 627 file = (const char *) sqlite3_column_text(stmt, 3); 628 md5_status = check_md5(file, db, "mandb_meta", &buf); 629 assert(buf != NULL); 630 if (md5_status == -1) { 631 warnx("An error occurred in checking md5 value" 632 " for file %s", file); 633 err_count++; 634 continue; 635 } 636 637 if (md5_status == 0) { 638 /* 639 * The MD5 hash is already present in the database, 640 * so simply update the metadata, ignoring symlinks. 641 */ 642 struct stat sb; 643 stat(file, &sb); 644 if (S_ISLNK(sb.st_mode)) { 645 free(buf); 646 link_count++; 647 continue; 648 } 649 update_existing_entry(db, file, buf, rec, 650 &new_count, &link_count, &err_count); 651 free(buf); 652 continue; 653 } 654 655 if (md5_status == 1) { 656 /* 657 * The MD5 hash was not present in the database. 658 * This means is either a new file or an updated file. 659 * We should go ahead with parsing. 660 */ 661 if (mflags.verbosity > 1) 662 printf("Parsing: %s\n", file); 663 rec->md5_hash = buf; 664 rec->file_path = estrdup(file); 665 // file_path is freed by insert_into_db itself. 666 begin_parse(file, mp, rec); 667 if (insert_into_db(db, rec) < 0) { 668 warnx("Error in indexing %s", file); 669 err_count++; 670 } else { 671 new_count++; 672 } 673 } 674 } 675 676 sqlite3_finalize(stmt); 677 678 if (mflags.verbosity) { 679 printf("Total Number of new or updated pages enountered = %d\n" 680 "Total number of pages that were successfully" 681 " indexed/updated = %d\n" 682 "Total number of (hard or symbolic) links found = %d\n" 683 "Total number of pages that could not be indexed" 684 " due to errors = %d\n", 685 total_count, new_count, link_count, err_count); 686 } 687 688 if (mflags.recreate == 0) 689 return; 690 691 if (mflags.verbosity) 692 printf("Deleting stale index entries\n"); 693 694 sqlstr = "DELETE FROM mandb_meta WHERE file NOT IN" 695 " (SELECT file FROM metadb.file_cache);" 696 "DROP TABLE metadb.file_cache;" 697 "DELETE FROM mandb WHERE rowid NOT IN" 698 " (SELECT id FROM mandb_meta);"; 699 700 sqlite3_exec(db, sqlstr, NULL, NULL, &errmsg); 701 if (errmsg != NULL) { 702 warnx("Removing old entries failed: %s", errmsg); 703 warnx("Please rebuild database from scratch with -f."); 704 free(errmsg); 705 return; 706 } 707} 708 709/* 710 * begin_parse -- 711 * parses the man page using libmandoc 712 */ 713static void 714begin_parse(const char *file, struct mparse *mp, mandb_rec *rec) 715{ 716 struct mdoc *mdoc; 717 struct man *man; 718 mparse_reset(mp); 719 720 rec->xr_found = 0; 721 722 if (mparse_readfd(mp, -1, file) >= MANDOCLEVEL_FATAL) { 723 warnx("%s: Parse failure", file); 724 return; 725 } 726 727 mparse_result(mp, &mdoc, &man); 728 if (mdoc == NULL && man == NULL) { 729 warnx("Not a man(7) or mdoc(7) page"); 730 return; 731 } 732 733 set_machine(mdoc, rec); 734 set_section(mdoc, man, rec); 735 if (mdoc) { 736 rec->page_type = MDOC; 737 pmdoc_node(mdoc_node(mdoc), rec); 738 } else { 739 rec->page_type = MAN; 740 pman_node(man_node(man), rec); 741 } 742} 743 744/* 745 * set_section -- 746 * Extracts the section number and normalizes it to only the numeric part 747 * (Which should be the first character of the string). 748 */ 749static void 750set_section(const struct mdoc *md, const struct man *m, mandb_rec *rec) 751{ 752 if (md) { 753 const struct mdoc_meta *md_meta = mdoc_meta(md); 754 rec->section[0] = md_meta->msec[0]; 755 } else if (m) { 756 const struct man_meta *m_meta = man_meta(m); 757 rec->section[0] = m_meta->msec[0]; 758 } 759} 760 761/* 762 * get_machine -- 763 * Extracts the machine architecture information if available. 764 */ 765static void 766set_machine(const struct mdoc *md, mandb_rec *rec) 767{ 768 if (md == NULL) 769 return; 770 const struct mdoc_meta *md_meta = mdoc_meta(md); 771 if (md_meta->arch) 772 rec->machine = estrdup(md_meta->arch); 773} 774 775static void 776pmdoc_node(const struct mdoc_node *n, mandb_rec *rec) 777{ 778 779 if (n == NULL) 780 return; 781 782 switch (n->type) { 783 case (MDOC_BODY): 784 /* FALLTHROUGH */ 785 case (MDOC_TAIL): 786 /* FALLTHROUGH */ 787 case (MDOC_ELEM): 788 if (mdocs[n->tok] == NULL) 789 break; 790 (*mdocs[n->tok])(n, rec); 791 break; 792 default: 793 break; 794 } 795 796 pmdoc_node(n->child, rec); 797 pmdoc_node(n->next, rec); 798} 799 800/* 801 * pmdoc_Nm -- 802 * Extracts the Name of the manual page from the .Nm macro 803 */ 804static void 805pmdoc_Nm(const struct mdoc_node *n, mandb_rec *rec) 806{ 807 if (n->sec != SEC_NAME) 808 return; 809 810 for (n = n->child; n; n = n->next) { 811 if (n->type == MDOC_TEXT) { 812 concat(&rec->name, n->string); 813 } 814 } 815} 816 817/* 818 * pmdoc_Nd -- 819 * Extracts the one line description of the man page from the .Nd macro 820 */ 821static void 822pmdoc_Nd(const struct mdoc_node *n, mandb_rec *rec) 823{ 824 /* 825 * A static variable for keeping track of whether a Xr macro was seen 826 * previously. 827 */ 828 char *buf = NULL; 829 char *temp; 830 831 if (n == NULL) 832 return; 833 834 if (n->type == MDOC_TEXT) { 835 if (rec->xr_found && n->next) { 836 /* 837 * An Xr macro was seen previously, so parse this 838 * and the next node. 839 */ 840 temp = estrdup(n->string); 841 n = n->next; 842 easprintf(&buf, "%s(%s)", temp, n->string); 843 concat(&rec->name_desc, buf); 844 free(buf); 845 free(temp); 846 } else { 847 concat(&rec->name_desc, n->string); 848 } 849 rec->xr_found = 0; 850 } else if (mdocs[n->tok] == pmdoc_Xr) { 851 /* Remember that we have encountered an Xr macro */ 852 rec->xr_found = 1; 853 } 854 855 if (n->child) 856 pmdoc_Nd(n->child, rec); 857 858 if(n->next) 859 pmdoc_Nd(n->next, rec); 860} 861 862/* 863 * pmdoc_macro_handler-- 864 * This function is a single point of handling all the special macros that we 865 * want to handle especially. For example the .Xr macro for properly parsing 866 * the referenced page name along with the section number, or the .Pp macro 867 * for adding a new line whenever we encounter it. 868 */ 869static void 870pmdoc_macro_handler(const struct mdoc_node *n, mandb_rec *rec, enum mdoct doct) 871{ 872 const struct mdoc_node *sn; 873 assert(n); 874 875 switch (doct) { 876 /* Parse the man page references. 877 * Basically the .Xr macros are used like: 878 * .Xr ls 1 879 * and formatted like this: 880 * ls(1) 881 * Prepare a buffer to format the data like the above example and call 882 * pmdoc_parse_section to append it. 883 */ 884 case MDOC_Xr: 885 n = n->child; 886 while (n->type != MDOC_TEXT && n->next) 887 n = n->next; 888 889 if (n && n->type != MDOC_TEXT) 890 return; 891 sn = n; 892 if (n->next) 893 n = n->next; 894 895 while (n->type != MDOC_TEXT && n->next) 896 n = n->next; 897 898 if (n && n->type == MDOC_TEXT) { 899 size_t len = strlen(sn->string); 900 char *buf = emalloc(len + 4); 901 memcpy(buf, sn->string, len); 902 buf[len] = '('; 903 buf[len + 1] = n->string[0]; 904 buf[len + 2] = ')'; 905 buf[len + 3] = 0; 906 mdoc_parse_section(n->sec, buf, rec); 907 free(buf); 908 } 909 910 break; 911 912 /* Parse the .Pp macro to add a new line */ 913 case MDOC_Pp: 914 if (n->type == MDOC_TEXT) 915 mdoc_parse_section(n->sec, "\n", rec); 916 break; 917 default: 918 break; 919 } 920 921} 922 923/* 924 * pmdoc_Xr, pmdoc_Pp-- 925 * Empty stubs. 926 * The parser calls these functions each time it encounters 927 * a .Xr or .Pp macro. We are parsing all the data from 928 * the pmdoc_Sh function, so don't do anything here. 929 * (See if else blocks in pmdoc_Sh.) 930 */ 931static void 932pmdoc_Xr(const struct mdoc_node *n, mandb_rec *rec) 933{ 934} 935 936static void 937pmdoc_Pp(const struct mdoc_node *n, mandb_rec *rec) 938{ 939} 940 941/* 942 * pmdoc_Sh -- 943 * Called when a .Sh macro is encountered and loops through its body, calling 944 * mdoc_parse_section to append the data to the section specific buffer. 945 * Two special macros which may occur inside the body of Sh are .Nm and .Xr and 946 * they need special handling, thus the separate if branches for them. 947 */ 948static void 949pmdoc_Sh(const struct mdoc_node *n, mandb_rec *rec) 950{ 951 if (n == NULL) 952 return; 953 int xr_found = 0; 954 955 if (n->type == MDOC_TEXT) { 956 mdoc_parse_section(n->sec, n->string, rec); 957 } else if (mdocs[n->tok] == pmdoc_Nm && rec->name != NULL) { 958 /* 959 * When encountering a .Nm macro, substitute it 960 * with its previously cached value of the argument. 961 */ 962 mdoc_parse_section(n->sec, rec->name, rec); 963 } else if (mdocs[n->tok] == pmdoc_Xr) { 964 /* 965 * When encountering other inline macros, 966 * call pmdoc_macro_handler. 967 */ 968 pmdoc_macro_handler(n, rec, MDOC_Xr); 969 xr_found = 1; 970 } else if (mdocs[n->tok] == pmdoc_Pp) { 971 pmdoc_macro_handler(n, rec, MDOC_Pp); 972 } 973 974 /* 975 * If an Xr macro was encountered then the child node has 976 * already been explored by pmdoc_macro_handler. 977 */ 978 if (xr_found == 0) 979 pmdoc_Sh(n->child, rec); 980 pmdoc_Sh(n->next, rec); 981} 982 983/* 984 * mdoc_parse_section-- 985 * Utility function for parsing sections of the mdoc type pages. 986 * Takes two params: 987 * 1. sec is an enum which indicates the section in which we are present 988 * 2. string is the string which we need to append to the secbuff for this 989 * particular section. 990 * The function appends string to the global section buffer and returns. 991 */ 992static void 993mdoc_parse_section(enum mdoc_sec sec, const char *string, mandb_rec *rec) 994{ 995 /* 996 * If the user specified the 'l' flag, then parse and store only the 997 * NAME section. Ignore the rest. 998 */ 999 if (mflags.limit) 1000 return; 1001 1002 switch (sec) { 1003 case SEC_LIBRARY: 1004 append(&rec->lib, string); 1005 break; 1006 case SEC_RETURN_VALUES: 1007 append(&rec->return_vals, string); 1008 break; 1009 case SEC_ENVIRONMENT: 1010 append(&rec->env, string); 1011 break; 1012 case SEC_FILES: 1013 append(&rec->files, string); 1014 break; 1015 case SEC_EXIT_STATUS: 1016 append(&rec->exit_status, string); 1017 break; 1018 case SEC_DIAGNOSTICS: 1019 append(&rec->diagnostics, string); 1020 break; 1021 case SEC_ERRORS: 1022 append(&rec->errors, string); 1023 break; 1024 case SEC_NAME: 1025 case SEC_SYNOPSIS: 1026 case SEC_EXAMPLES: 1027 case SEC_STANDARDS: 1028 case SEC_HISTORY: 1029 case SEC_AUTHORS: 1030 case SEC_BUGS: 1031 break; 1032 default: 1033 append(&rec->desc, string); 1034 break; 1035 } 1036} 1037 1038static void 1039pman_node(const struct man_node *n, mandb_rec *rec) 1040{ 1041 if (n == NULL) 1042 return; 1043 1044 switch (n->type) { 1045 case (MAN_BODY): 1046 /* FALLTHROUGH */ 1047 case (MAN_TAIL): 1048 /* FALLTHROUGH */ 1049 case (MAN_BLOCK): 1050 /* FALLTHROUGH */ 1051 case (MAN_ELEM): 1052 if (mans[n->tok] != NULL) 1053 (*mans[n->tok])(n, rec); 1054 break; 1055 default: 1056 break; 1057 } 1058 1059 pman_node(n->child, rec); 1060 pman_node(n->next, rec); 1061} 1062 1063/* 1064 * pman_parse_name -- 1065 * Parses the NAME section and puts the complete content in the name_desc 1066 * variable. 1067 */ 1068static void 1069pman_parse_name(const struct man_node *n, mandb_rec *rec) 1070{ 1071 if (n == NULL) 1072 return; 1073 1074 if (n->type == MAN_TEXT) { 1075 char *tmp = parse_escape(n->string); 1076 concat(&rec->name_desc, tmp); 1077 free(tmp); 1078 } 1079 1080 if (n->child) 1081 pman_parse_name(n->child, rec); 1082 1083 if(n->next) 1084 pman_parse_name(n->next, rec); 1085} 1086 1087/* 1088 * A stub function to be able to parse the macros like .B embedded inside 1089 * a section. 1090 */ 1091static void 1092pman_block(const struct man_node *n, mandb_rec *rec) 1093{ 1094} 1095 1096/* 1097 * pman_sh -- 1098 * This function does one of the two things: 1099 * 1. If the present section is NAME, then it will: 1100 * (a) Extract the name of the page (in case of multiple comma separated 1101 * names, it will pick up the first one). 1102 * (b) Build a space spearated list of all the symlinks/hardlinks to 1103 * this page and store in the buffer 'links'. These are extracted from 1104 * the comma separated list of names in the NAME section as well. 1105 * (c) Move on to the one line description section, which is after the list 1106 * of names in the NAME section. 1107 * 2. Otherwise, it will check the section name and call the man_parse_section 1108 * function, passing the enum corresponding that section. 1109 */ 1110static void 1111pman_sh(const struct man_node *n, mandb_rec *rec) 1112{ 1113 static const struct { 1114 enum man_sec section; 1115 const char *header; 1116 } mapping[] = { 1117 { MANSEC_DESCRIPTION, "DESCRIPTION" }, 1118 { MANSEC_SYNOPSIS, "SYNOPSIS" }, 1119 { MANSEC_LIBRARY, "LIBRARY" }, 1120 { MANSEC_ERRORS, "ERRORS" }, 1121 { MANSEC_FILES, "FILES" }, 1122 { MANSEC_RETURN_VALUES, "RETURN VALUE" }, 1123 { MANSEC_RETURN_VALUES, "RETURN VALUES" }, 1124 { MANSEC_EXIT_STATUS, "EXIT STATUS" }, 1125 { MANSEC_EXAMPLES, "EXAMPLES" }, 1126 { MANSEC_EXAMPLES, "EXAMPLE" }, 1127 { MANSEC_STANDARDS, "STANDARDS" }, 1128 { MANSEC_HISTORY, "HISTORY" }, 1129 { MANSEC_BUGS, "BUGS" }, 1130 { MANSEC_AUTHORS, "AUTHORS" }, 1131 { MANSEC_COPYRIGHT, "COPYRIGHT" }, 1132 }; 1133 const struct man_node *head; 1134 char *name_desc; 1135 int sz; 1136 size_t i; 1137 1138 if ((head = n->parent->head) == NULL || (head = head->child) == NULL || 1139 head->type != MAN_TEXT) 1140 return; 1141 1142 /* 1143 * Check if this section should be extracted and 1144 * where it should be stored. Handled the trival cases first. 1145 */ 1146 for (i = 0; i < sizeof(mapping) / sizeof(mapping[0]); ++i) { 1147 if (strcmp(head->string, mapping[i].header) == 0) { 1148 man_parse_section(mapping[i].section, n, rec); 1149 return; 1150 } 1151 } 1152 1153 if (strcmp(head->string, "NAME") == 0) { 1154 /* 1155 * We are in the NAME section. 1156 * pman_parse_name will put the complete content in name_desc. 1157 */ 1158 pman_parse_name(n, rec); 1159 1160 name_desc = rec->name_desc; 1161 1162 /* Remove any leading spaces. */ 1163 while (name_desc[0] == ' ') 1164 name_desc++; 1165 1166 /* If the line begins with a "\&", avoid those */ 1167 if (name_desc[0] == '\\' && name_desc[1] == '&') 1168 name_desc += 2; 1169 1170 /* Now name_desc should be left with a comma-space 1171 * separated list of names and the one line description 1172 * of the page: 1173 * "a, b, c \- sample description" 1174 * Take out the first name, before the first comma 1175 * (or space) and store it in rec->name. 1176 * If the page has aliases then they should be 1177 * in the form of a comma separated list. 1178 * Keep looping while there is a comma in name_desc, 1179 * extract the alias name and store in rec->links. 1180 * When there are no more commas left, break out. 1181 */ 1182 int has_alias = 0; // Any more aliases left? 1183 while (*name_desc) { 1184 /* Remove any leading spaces. */ 1185 if (name_desc[0] == ' ') { 1186 name_desc++; 1187 continue; 1188 } 1189 sz = strcspn(name_desc, ", "); 1190 1191 /* Extract the first term and store it in rec->name. */ 1192 if (rec->name == NULL) { 1193 if (name_desc[sz] == ',') 1194 has_alias = 1; 1195 name_desc[sz] = 0; 1196 rec->name = emalloc(sz + 1); 1197 memcpy(rec->name, name_desc, sz + 1); 1198 name_desc += sz + 1; 1199 continue; 1200 } 1201 1202 /* 1203 * Once rec->name is set, rest of the names 1204 * are to be treated as links or aliases. 1205 */ 1206 if (rec->name && has_alias) { 1207 if (name_desc[sz] != ',') { 1208 /* No more commas left --> 1209 * no more aliases to take out 1210 */ 1211 has_alias = 0; 1212 } 1213 name_desc[sz] = 0; 1214 concat2(&rec->links, name_desc, sz); 1215 name_desc += sz + 1; 1216 continue; 1217 } 1218 break; 1219 } 1220 1221 /* Parse any escape sequences that might be there */ 1222 char *temp = parse_escape(name_desc); 1223 free(rec->name_desc); 1224 rec->name_desc = temp; 1225 temp = parse_escape(rec->name); 1226 free(rec->name); 1227 rec->name = temp; 1228 return; 1229 } 1230 1231 /* The RETURN VALUE section might be specified in multiple ways */ 1232 if (strcmp(head->string, "RETURN") == 0 && 1233 head->next != NULL && head->next->type == MAN_TEXT && 1234 (strcmp(head->next->string, "VALUE") == 0 || 1235 strcmp(head->next->string, "VALUES") == 0)) { 1236 man_parse_section(MANSEC_RETURN_VALUES, n, rec); 1237 return; 1238 } 1239 1240 /* 1241 * EXIT STATUS section can also be specified all on one line or on two 1242 * separate lines. 1243 */ 1244 if (strcmp(head->string, "EXIT") == 0 && 1245 head->next != NULL && head->next->type == MAN_TEXT && 1246 strcmp(head->next->string, "STATUS") == 0) { 1247 man_parse_section(MANSEC_EXIT_STATUS, n, rec); 1248 return; 1249 } 1250 1251 /* Store the rest of the content in desc. */ 1252 man_parse_section(MANSEC_NONE, n, rec); 1253} 1254 1255/* 1256 * pman_parse_node -- 1257 * Generic function to iterate through a node. Usually called from 1258 * man_parse_section to parse a particular section of the man page. 1259 */ 1260static void 1261pman_parse_node(const struct man_node *n, secbuff *s) 1262{ 1263 if (n == NULL) 1264 return; 1265 1266 if (n->type == MAN_TEXT) 1267 append(s, n->string); 1268 1269 pman_parse_node(n->child, s); 1270 pman_parse_node(n->next, s); 1271} 1272 1273/* 1274 * man_parse_section -- 1275 * Takes two parameters: 1276 * sec: Tells which section we are present in 1277 * n: Is the present node of the AST. 1278 * Depending on the section, we call pman_parse_node to parse that section and 1279 * concatenate the content from that section into the buffer for that section. 1280 */ 1281static void 1282man_parse_section(enum man_sec sec, const struct man_node *n, mandb_rec *rec) 1283{ 1284 /* 1285 * If the user sepecified the 'l' flag then just parse 1286 * the NAME section, ignore the rest. 1287 */ 1288 if (mflags.limit) 1289 return; 1290 1291 switch (sec) { 1292 case MANSEC_LIBRARY: 1293 pman_parse_node(n, &rec->lib); 1294 break; 1295 case MANSEC_RETURN_VALUES: 1296 pman_parse_node(n, &rec->return_vals); 1297 break; 1298 case MANSEC_ENVIRONMENT: 1299 pman_parse_node(n, &rec->env); 1300 break; 1301 case MANSEC_FILES: 1302 pman_parse_node(n, &rec->files); 1303 break; 1304 case MANSEC_EXIT_STATUS: 1305 pman_parse_node(n, &rec->exit_status); 1306 break; 1307 case MANSEC_DIAGNOSTICS: 1308 pman_parse_node(n, &rec->diagnostics); 1309 break; 1310 case MANSEC_ERRORS: 1311 pman_parse_node(n, &rec->errors); 1312 break; 1313 case MANSEC_NAME: 1314 case MANSEC_SYNOPSIS: 1315 case MANSEC_EXAMPLES: 1316 case MANSEC_STANDARDS: 1317 case MANSEC_HISTORY: 1318 case MANSEC_BUGS: 1319 case MANSEC_AUTHORS: 1320 case MANSEC_COPYRIGHT: 1321 break; 1322 default: 1323 pman_parse_node(n, &rec->desc); 1324 break; 1325 } 1326 1327} 1328 1329/* 1330 * insert_into_db -- 1331 * Inserts the parsed data of the man page in the Sqlite databse. 1332 * If any of the values is NULL, then we cleanup and return -1 indicating 1333 * an error. 1334 * Otherwise, store the data in the database and return 0. 1335 */ 1336static int 1337insert_into_db(sqlite3 *db, mandb_rec *rec) 1338{ 1339 int rc = 0; 1340 int idx = -1; 1341 const char *sqlstr = NULL; 1342 sqlite3_stmt *stmt = NULL; 1343 char *ln = NULL; 1344 char *errmsg = NULL; 1345 long int mandb_rowid; 1346 1347 /* 1348 * At the very minimum we want to make sure that we store 1349 * the following data: 1350 * Name, one line description, and the MD5 hash 1351 */ 1352 if (rec->name == NULL || rec->name_desc == NULL || 1353 rec->md5_hash == NULL) { 1354 cleanup(rec); 1355 return -1; 1356 } 1357 1358 /* Write null byte at the end of all the sec_buffs */ 1359 rec->desc.data[rec->desc.offset] = 0; 1360 rec->lib.data[rec->lib.offset] = 0; 1361 rec->env.data[rec->env.offset] = 0; 1362 rec->return_vals.data[rec->return_vals.offset] = 0; 1363 rec->exit_status.data[rec->exit_status.offset] = 0; 1364 rec->files.data[rec->files.offset] = 0; 1365 rec->diagnostics.data[rec->diagnostics.offset] = 0; 1366 rec->errors.data[rec->errors.offset] = 0; 1367 1368 /* 1369 * In case of a mdoc page: (sorry, no better place to put this code) 1370 * parse the comma separated list of names of man pages, 1371 * the first name will be stored in the mandb table, rest will be 1372 * treated as links and put in the mandb_links table. 1373 */ 1374 if (rec->page_type == MDOC) { 1375 char *tmp; 1376 rec->links = estrdup(rec->name); 1377 free(rec->name); 1378 int sz = strcspn(rec->links, " \0"); 1379 rec->name = emalloc(sz + 1); 1380 memcpy(rec->name, rec->links, sz); 1381 if(rec->name[sz - 1] == ',') 1382 rec->name[sz - 1] = 0; 1383 else 1384 rec->name[sz] = 0; 1385 while (rec->links[sz] == ' ') 1386 ++sz; 1387 tmp = estrdup(rec->links + sz); 1388 free(rec->links); 1389 rec->links = tmp; 1390 } 1391 1392/*------------------------ Populate the mandb table---------------------------*/ 1393 sqlstr = "INSERT INTO mandb VALUES (:section, :name, :name_desc, :desc," 1394 " :lib, :return_vals, :env, :files, :exit_status," 1395 " :diagnostics, :errors, :md5_hash, :machine)"; 1396 1397 rc = sqlite3_prepare_v2(db, sqlstr, -1, &stmt, NULL); 1398 if (rc != SQLITE_OK) 1399 goto Out; 1400 1401 idx = sqlite3_bind_parameter_index(stmt, ":name"); 1402 rc = sqlite3_bind_text(stmt, idx, rec->name, -1, NULL); 1403 if (rc != SQLITE_OK) { 1404 sqlite3_finalize(stmt); 1405 goto Out; 1406 } 1407 1408 idx = sqlite3_bind_parameter_index(stmt, ":section"); 1409 rc = sqlite3_bind_text(stmt, idx, rec->section, -1, NULL); 1410 if (rc != SQLITE_OK) { 1411 sqlite3_finalize(stmt); 1412 goto Out; 1413 } 1414 1415 idx = sqlite3_bind_parameter_index(stmt, ":name_desc"); 1416 rc = sqlite3_bind_text(stmt, idx, rec->name_desc, -1, NULL); 1417 if (rc != SQLITE_OK) { 1418 sqlite3_finalize(stmt); 1419 goto Out; 1420 } 1421 1422 idx = sqlite3_bind_parameter_index(stmt, ":desc"); 1423 rc = sqlite3_bind_text(stmt, idx, rec->desc.data, 1424 rec->desc.offset + 1, NULL); 1425 if (rc != SQLITE_OK) { 1426 sqlite3_finalize(stmt); 1427 goto Out; 1428 } 1429 1430 idx = sqlite3_bind_parameter_index(stmt, ":lib"); 1431 rc = sqlite3_bind_text(stmt, idx, rec->lib.data, rec->lib.offset + 1, NULL); 1432 if (rc != SQLITE_OK) { 1433 sqlite3_finalize(stmt); 1434 goto Out; 1435 } 1436 1437 idx = sqlite3_bind_parameter_index(stmt, ":return_vals"); 1438 rc = sqlite3_bind_text(stmt, idx, rec->return_vals.data, 1439 rec->return_vals.offset + 1, NULL); 1440 if (rc != SQLITE_OK) { 1441 sqlite3_finalize(stmt); 1442 goto Out; 1443 } 1444 1445 idx = sqlite3_bind_parameter_index(stmt, ":env"); 1446 rc = sqlite3_bind_text(stmt, idx, rec->env.data, rec->env.offset + 1, NULL); 1447 if (rc != SQLITE_OK) { 1448 sqlite3_finalize(stmt); 1449 goto Out; 1450 } 1451 1452 idx = sqlite3_bind_parameter_index(stmt, ":files"); 1453 rc = sqlite3_bind_text(stmt, idx, rec->files.data, 1454 rec->files.offset + 1, NULL); 1455 if (rc != SQLITE_OK) { 1456 sqlite3_finalize(stmt); 1457 goto Out; 1458 } 1459 1460 idx = sqlite3_bind_parameter_index(stmt, ":exit_status"); 1461 rc = sqlite3_bind_text(stmt, idx, rec->exit_status.data, 1462 rec->exit_status.offset + 1, NULL); 1463 if (rc != SQLITE_OK) { 1464 sqlite3_finalize(stmt); 1465 goto Out; 1466 } 1467 1468 idx = sqlite3_bind_parameter_index(stmt, ":diagnostics"); 1469 rc = sqlite3_bind_text(stmt, idx, rec->diagnostics.data, 1470 rec->diagnostics.offset + 1, NULL); 1471 if (rc != SQLITE_OK) { 1472 sqlite3_finalize(stmt); 1473 goto Out; 1474 } 1475 1476 idx = sqlite3_bind_parameter_index(stmt, ":errors"); 1477 rc = sqlite3_bind_text(stmt, idx, rec->errors.data, 1478 rec->errors.offset + 1, NULL); 1479 if (rc != SQLITE_OK) { 1480 sqlite3_finalize(stmt); 1481 goto Out; 1482 } 1483 1484 idx = sqlite3_bind_parameter_index(stmt, ":md5_hash"); 1485 rc = sqlite3_bind_text(stmt, idx, rec->md5_hash, -1, NULL); 1486 if (rc != SQLITE_OK) { 1487 sqlite3_finalize(stmt); 1488 goto Out; 1489 } 1490 1491 idx = sqlite3_bind_parameter_index(stmt, ":machine"); 1492 if (rec->machine) 1493 rc = sqlite3_bind_text(stmt, idx, rec->machine, -1, NULL); 1494 else 1495 rc = sqlite3_bind_null(stmt, idx); 1496 if (rc != SQLITE_OK) { 1497 sqlite3_finalize(stmt); 1498 goto Out; 1499 } 1500 1501 rc = sqlite3_step(stmt); 1502 if (rc != SQLITE_DONE) { 1503 sqlite3_finalize(stmt); 1504 goto Out; 1505 } 1506 1507 sqlite3_finalize(stmt); 1508 1509 /* Get the row id of the last inserted row */ 1510 mandb_rowid = sqlite3_last_insert_rowid(db); 1511 1512/*------------------------Populate the mandb_meta table-----------------------*/ 1513 sqlstr = "INSERT INTO mandb_meta VALUES (:device, :inode, :mtime," 1514 " :file, :md5_hash, :id)"; 1515 rc = sqlite3_prepare_v2(db, sqlstr, -1, &stmt, NULL); 1516 if (rc != SQLITE_OK) 1517 goto Out; 1518 1519 idx = sqlite3_bind_parameter_index(stmt, ":device"); 1520 rc = sqlite3_bind_int64(stmt, idx, rec->device); 1521 if (rc != SQLITE_OK) { 1522 sqlite3_finalize(stmt); 1523 goto Out; 1524 } 1525 1526 idx = sqlite3_bind_parameter_index(stmt, ":inode"); 1527 rc = sqlite3_bind_int64(stmt, idx, rec->inode); 1528 if (rc != SQLITE_OK) { 1529 sqlite3_finalize(stmt); 1530 goto Out; 1531 } 1532 1533 idx = sqlite3_bind_parameter_index(stmt, ":mtime"); 1534 rc = sqlite3_bind_int64(stmt, idx, rec->mtime); 1535 if (rc != SQLITE_OK) { 1536 sqlite3_finalize(stmt); 1537 goto Out; 1538 } 1539 1540 idx = sqlite3_bind_parameter_index(stmt, ":file"); 1541 rc = sqlite3_bind_text(stmt, idx, rec->file_path, -1, NULL); 1542 if (rc != SQLITE_OK) { 1543 sqlite3_finalize(stmt); 1544 goto Out; 1545 } 1546 1547 idx = sqlite3_bind_parameter_index(stmt, ":md5_hash"); 1548 rc = sqlite3_bind_text(stmt, idx, rec->md5_hash, -1, NULL); 1549 if (rc != SQLITE_OK) { 1550 sqlite3_finalize(stmt); 1551 goto Out; 1552 } 1553 1554 idx = sqlite3_bind_parameter_index(stmt, ":id"); 1555 rc = sqlite3_bind_int64(stmt, idx, mandb_rowid); 1556 if (rc != SQLITE_OK) { 1557 sqlite3_finalize(stmt); 1558 goto Out; 1559 } 1560 1561 rc = sqlite3_step(stmt); 1562 sqlite3_finalize(stmt); 1563 if (rc == SQLITE_CONSTRAINT) { 1564 /* The *most* probable reason for reaching here is that 1565 * the UNIQUE contraint on the file column of the mandb_meta 1566 * table was violated. 1567 * This can happen when a file was updated/modified. 1568 * To fix this we need to do two things: 1569 * 1. Delete the row for the older version of this file 1570 * from mandb table. 1571 * 2. Run an UPDATE query to update the row for this file 1572 * in the mandb_meta table. 1573 */ 1574 warnx("Trying to update index for %s", rec->file_path); 1575 char *sql = sqlite3_mprintf("DELETE FROM mandb " 1576 "WHERE rowid = (SELECT id" 1577 " FROM mandb_meta" 1578 " WHERE file = %Q)", 1579 rec->file_path); 1580 sqlite3_exec(db, sql, NULL, NULL, &errmsg); 1581 sqlite3_free(sql); 1582 if (errmsg != NULL) { 1583 warnx("%s", errmsg); 1584 free(errmsg); 1585 } 1586 sqlstr = "UPDATE mandb_meta SET device = :device," 1587 " inode = :inode, mtime = :mtime, id = :id," 1588 " md5_hash = :md5 WHERE file = :file"; 1589 rc = sqlite3_prepare_v2(db, sqlstr, -1, &stmt, NULL); 1590 if (rc != SQLITE_OK) { 1591 warnx("Update failed with error: %s", 1592 sqlite3_errmsg(db)); 1593 close_db(db); 1594 cleanup(rec); 1595 errx(EXIT_FAILURE, 1596 "Consider running makemandb with -f option"); 1597 } 1598 1599 idx = sqlite3_bind_parameter_index(stmt, ":device"); 1600 sqlite3_bind_int64(stmt, idx, rec->device); 1601 idx = sqlite3_bind_parameter_index(stmt, ":inode"); 1602 sqlite3_bind_int64(stmt, idx, rec->inode); 1603 idx = sqlite3_bind_parameter_index(stmt, ":mtime"); 1604 sqlite3_bind_int64(stmt, idx, rec->mtime); 1605 idx = sqlite3_bind_parameter_index(stmt, ":id"); 1606 sqlite3_bind_int64(stmt, idx, mandb_rowid); 1607 idx = sqlite3_bind_parameter_index(stmt, ":md5"); 1608 sqlite3_bind_text(stmt, idx, rec->md5_hash, -1, NULL); 1609 idx = sqlite3_bind_parameter_index(stmt, ":file"); 1610 sqlite3_bind_text(stmt, idx, rec->file_path, -1, NULL); 1611 rc = sqlite3_step(stmt); 1612 sqlite3_finalize(stmt); 1613 1614 if (rc != SQLITE_DONE) { 1615 warnx("%s", sqlite3_errmsg(db)); 1616 close_db(db); 1617 cleanup(rec); 1618 errx(EXIT_FAILURE, 1619 "Consider running makemandb with -f option"); 1620 } 1621 } else if (rc != SQLITE_DONE) { 1622 /* Otherwise make this error fatal */ 1623 warnx("Failed at %s\n%s", rec->file_path, sqlite3_errmsg(db)); 1624 cleanup(rec); 1625 close_db(db); 1626 exit(EXIT_FAILURE); 1627 } 1628 1629/*------------------------ Populate the mandb_links table---------------------*/ 1630 char *str = NULL; 1631 char *links; 1632 if (rec->links && strlen(rec->links)) { 1633 links = rec->links; 1634 for(ln = strtok(links, " "); ln; ln = strtok(NULL, " ")) { 1635 if (ln[0] == ',') 1636 ln++; 1637 if(ln[strlen(ln) - 1] == ',') 1638 ln[strlen(ln) - 1] = 0; 1639 1640 str = sqlite3_mprintf("INSERT INTO mandb_links" 1641 " VALUES (%Q, %Q, %Q, %Q)", 1642 ln, rec->name, rec->section, 1643 rec->machine); 1644 sqlite3_exec(db, str, NULL, NULL, &errmsg); 1645 sqlite3_free(str); 1646 if (errmsg != NULL) { 1647 warnx("%s", errmsg); 1648 cleanup(rec); 1649 free(errmsg); 1650 return -1; 1651 } 1652 } 1653 } 1654 1655 cleanup(rec); 1656 return 0; 1657 1658 Out: 1659 warnx("%s", sqlite3_errmsg(db)); 1660 cleanup(rec); 1661 return -1; 1662} 1663 1664/* 1665 * check_md5-- 1666 * Generates the md5 hash of the file and checks if it already doesn't exist 1667 * in the table (passed as the 3rd parameter). 1668 * This function is being used to avoid hardlinks. 1669 * On successful completion it will also set the value of the fourth parameter 1670 * to the md5 hash of the file (computed previously). It is the responsibility 1671 * of the caller to free this buffer. 1672 * Return values: 1673 * -1: If an error occurs somewhere and sets the md5 return buffer to NULL. 1674 * 0: If the md5 hash does not exist in the table. 1675 * 1: If the hash exists in the database. 1676 */ 1677static int 1678check_md5(const char *file, sqlite3 *db, const char *table, char **buf) 1679{ 1680 int rc = 0; 1681 int idx = -1; 1682 char *sqlstr = NULL; 1683 sqlite3_stmt *stmt = NULL; 1684 1685 assert(file != NULL); 1686 *buf = MD5File(file, NULL); 1687 if (*buf == NULL) { 1688 warn("md5 failed: %s", file); 1689 return -1; 1690 } 1691 1692 easprintf(&sqlstr, "SELECT * FROM %s WHERE md5_hash = :md5_hash", 1693 table); 1694 rc = sqlite3_prepare_v2(db, sqlstr, -1, &stmt, NULL); 1695 if (rc != SQLITE_OK) { 1696 free(sqlstr); 1697 free(*buf); 1698 *buf = NULL; 1699 return -1; 1700 } 1701 1702 idx = sqlite3_bind_parameter_index(stmt, ":md5_hash"); 1703 rc = sqlite3_bind_text(stmt, idx, *buf, -1, NULL); 1704 if (rc != SQLITE_OK) { 1705 warnx("%s", sqlite3_errmsg(db)); 1706 sqlite3_finalize(stmt); 1707 free(sqlstr); 1708 free(*buf); 1709 *buf = NULL; 1710 return -1; 1711 } 1712 1713 if (sqlite3_step(stmt) == SQLITE_ROW) { 1714 sqlite3_finalize(stmt); 1715 free(sqlstr); 1716 return 0; 1717 } 1718 1719 sqlite3_finalize(stmt); 1720 free(sqlstr); 1721 return 1; 1722} 1723 1724/* Optimize the index for faster search */ 1725static void 1726optimize(sqlite3 *db) 1727{ 1728 const char *sqlstr; 1729 char *errmsg = NULL; 1730 1731 if (mflags.verbosity) 1732 printf("Optimizing the database index\n"); 1733 sqlstr = "INSERT INTO mandb(mandb) VALUES (\'optimize\');" 1734 "VACUUM"; 1735 sqlite3_exec(db, sqlstr, NULL, NULL, &errmsg); 1736 if (errmsg != NULL) { 1737 warnx("%s", errmsg); 1738 free(errmsg); 1739 return; 1740 } 1741} 1742 1743/* 1744 * cleanup -- 1745 * cleans up the global buffers 1746 */ 1747static void 1748cleanup(mandb_rec *rec) 1749{ 1750 rec->desc.offset = 0; 1751 rec->lib.offset = 0; 1752 rec->return_vals.offset = 0; 1753 rec->env.offset = 0; 1754 rec->exit_status.offset = 0; 1755 rec->diagnostics.offset = 0; 1756 rec->errors.offset = 0; 1757 rec->files.offset = 0; 1758 1759 free(rec->machine); 1760 rec->machine = NULL; 1761 1762 free(rec->links); 1763 rec->links = NULL; 1764 1765 free(rec->file_path); 1766 rec->file_path = NULL; 1767 1768 free(rec->name); 1769 rec->name = NULL; 1770 1771 free(rec->name_desc); 1772 rec->name_desc = NULL; 1773 1774 free(rec->md5_hash); 1775 rec->md5_hash = NULL; 1776} 1777 1778/* 1779 * init_secbuffs-- 1780 * Sets the value of buflen for all the sec_buff field of rec. And then 1781 * allocate memory to each sec_buff member of rec. 1782 */ 1783static void 1784init_secbuffs(mandb_rec *rec) 1785{ 1786 /* 1787 * Some sec_buff might need more memory, for example desc, 1788 * which stores the data of the DESCRIPTION section, 1789 * while some might need very small amount of memory. 1790 * Therefore explicitly setting the value of buflen field for 1791 * each sec_buff. 1792 */ 1793 rec->desc.buflen = 10 * BUFLEN; 1794 rec->desc.data = emalloc(rec->desc.buflen); 1795 rec->desc.offset = 0; 1796 1797 rec->lib.buflen = BUFLEN / 2; 1798 rec->lib.data = emalloc(rec->lib.buflen); 1799 rec->lib.offset = 0; 1800 1801 rec->return_vals.buflen = BUFLEN; 1802 rec->return_vals.data = emalloc(rec->return_vals.buflen); 1803 rec->return_vals.offset = 0; 1804 1805 rec->exit_status.buflen = BUFLEN; 1806 rec->exit_status.data = emalloc(rec->exit_status.buflen); 1807 rec->exit_status.offset = 0; 1808 1809 rec->env.buflen = BUFLEN; 1810 rec->env.data = emalloc(rec->env.buflen); 1811 rec->env.offset = 0; 1812 1813 rec->files.buflen = BUFLEN; 1814 rec->files.data = emalloc(rec->files.buflen); 1815 rec->files.offset = 0; 1816 1817 rec->diagnostics.buflen = BUFLEN; 1818 rec->diagnostics.data = emalloc(rec->diagnostics.buflen); 1819 rec->diagnostics.offset = 0; 1820 1821 rec->errors.buflen = BUFLEN; 1822 rec->errors.data = emalloc(rec->errors.buflen); 1823 rec->errors.offset = 0; 1824} 1825 1826/* 1827 * free_secbuffs-- 1828 * This function should be called at the end, when all the pages have been 1829 * parsed. 1830 * It frees the memory allocated to sec_buffs by init_secbuffs in the starting. 1831 */ 1832static void 1833free_secbuffs(mandb_rec *rec) 1834{ 1835 free(rec->desc.data); 1836 free(rec->lib.data); 1837 free(rec->return_vals.data); 1838 free(rec->exit_status.data); 1839 free(rec->env.data); 1840 free(rec->files.data); 1841 free(rec->diagnostics.data); 1842 free(rec->errors.data); 1843} 1844 1845static char * 1846parse_escape(const char *str) 1847{ 1848 const char *backslash, *last_backslash; 1849 char *result, *iter; 1850 size_t len; 1851 1852 assert(str); 1853 1854 last_backslash = str; 1855 backslash = strchr(str, '\\'); 1856 if (backslash == NULL) 1857 return estrdup(str); 1858 1859 result = emalloc(strlen(str) + 1); 1860 iter = result; 1861 1862 do { 1863 len = backslash - last_backslash; 1864 memcpy(iter, last_backslash, len); 1865 iter += len; 1866 if (backslash[1] == '-' || backslash[1] == ' ') { 1867 *iter++ = backslash[1]; 1868 last_backslash = backslash + 2; 1869 backslash = strchr(backslash + 2, '\\'); 1870 } else { 1871 ++backslash; 1872 mandoc_escape(&backslash, NULL, NULL); 1873 last_backslash = backslash; 1874 if (backslash == NULL) 1875 break; 1876 backslash = strchr(last_backslash, '\\'); 1877 } 1878 } while (backslash != NULL); 1879 if (last_backslash != NULL) 1880 strcpy(iter, last_backslash); 1881 iter = result; 1882 while ((iter = strchr(iter, ASCII_HYPH)) != NULL) 1883 *iter = '-'; 1884 return result; 1885} 1886 1887/* 1888 * append-- 1889 * Concatenates a space and src at the end of sbuff->data (much like concat in 1890 * apropos-utils.c). 1891 * Rather than reallocating space for writing data, it uses the value of the 1892 * offset field of sec_buff to write new data at the free space left in the 1893 * buffer. 1894 * In case the size of the data to be appended exceeds the number of bytes left 1895 * in the buffer, it reallocates buflen number of bytes and then continues. 1896 * Value of offset field should be adjusted as new data is written. 1897 * 1898 * NOTE: This function does not write the null byte at the end of the buffers, 1899 * write a null byte at the position pointed to by offset before inserting data 1900 * in the db. 1901 */ 1902static void 1903append(secbuff *sbuff, const char *src) 1904{ 1905 short flag = 0; 1906 size_t srclen, newlen; 1907 char *temp; 1908 1909 assert(src != NULL); 1910 temp = parse_escape(src); 1911 srclen = strlen(temp); 1912 1913 if (sbuff->data == NULL) { 1914 sbuff->data = emalloc(sbuff->buflen); 1915 sbuff->offset = 0; 1916 } 1917 1918 newlen = sbuff->offset + srclen + 2; 1919 if (newlen >= sbuff->buflen) { 1920 while (sbuff->buflen < newlen) 1921 sbuff->buflen += sbuff->buflen; 1922 sbuff->data = erealloc(sbuff->data, sbuff->buflen); 1923 flag = 1; 1924 } 1925 1926 /* Append a space at the end of the buffer. */ 1927 if (sbuff->offset || flag) 1928 sbuff->data[sbuff->offset++] = ' '; 1929 /* Now, copy src at the end of the buffer. */ 1930 memcpy(sbuff->data + sbuff->offset, temp, srclen); 1931 sbuff->offset += srclen; 1932 free(temp); 1933} 1934 1935static void 1936usage(void) 1937{ 1938 fprintf(stderr, "Usage: %s [-flo]\n", getprogname()); 1939 exit(1); 1940} 1941