1/* 2 * "$Id: help-index.c 11645 2014-02-27 16:35:53Z msweet $" 3 * 4 * Online help index routines for CUPS. 5 * 6 * Copyright 2007-2014 by Apple Inc. 7 * Copyright 1997-2007 by Easy Software Products. 8 * 9 * These coded instructions, statements, and computer programs are the 10 * property of Apple Inc. and are protected by Federal copyright 11 * law. Distribution and use rights are outlined in the file "LICENSE.txt" 12 * which should have been included with this file. If this file is 13 * file is missing or damaged, see the license at "http://www.cups.org/". 14 */ 15 16/* 17 * Include necessary headers... 18 */ 19 20#include "cgi-private.h" 21#include <cups/dir.h> 22 23 24/* 25 * List of common English words that should not be indexed... 26 */ 27 28static char help_common_words[][6] = 29 { 30 "about", 31 "all", 32 "an", 33 "and", 34 "are", 35 "as", 36 "at", 37 "be", 38 "been", 39 "but", 40 "by", 41 "call", 42 "can", 43 "come", 44 "could", 45 "day", 46 "did", 47 "do", 48 "down", 49 "each", 50 "find", 51 "first", 52 "for", 53 "from", 54 "go", 55 "had", 56 "has", 57 "have", 58 "he", 59 "her", 60 "him", 61 "his", 62 "hot", 63 "how", 64 "if", 65 "in", 66 "is", 67 "it", 68 "know", 69 "like", 70 "long", 71 "look", 72 "make", 73 "many", 74 "may", 75 "more", 76 "most", 77 "my", 78 "no", 79 "now", 80 "of", 81 "on", 82 "one", 83 "or", 84 "other", 85 "out", 86 "over", 87 "said", 88 "see", 89 "she", 90 "side", 91 "so", 92 "some", 93 "sound", 94 "than", 95 "that", 96 "the", 97 "their", 98 "them", 99 "then", 100 "there", 101 "these", 102 "they", 103 "thing", 104 "this", 105 "time", 106 "to", 107 "two", 108 "up", 109 "use", 110 "was", 111 "water", 112 "way", 113 "we", 114 "were", 115 "what", 116 "when", 117 "which", 118 "who", 119 "will", 120 "with", 121 "word", 122 "would", 123 "write", 124 "you", 125 "your" 126 }; 127 128 129/* 130 * Local functions... 131 */ 132 133static help_word_t *help_add_word(help_node_t *n, const char *text); 134static void help_delete_node(help_node_t *n); 135static void help_delete_word(help_word_t *w); 136static int help_load_directory(help_index_t *hi, 137 const char *directory, 138 const char *relative); 139static int help_load_file(help_index_t *hi, 140 const char *filename, 141 const char *relative, 142 time_t mtime); 143static help_node_t *help_new_node(const char *filename, const char *anchor, 144 const char *section, const char *text, 145 time_t mtime, off_t offset, 146 size_t length) 147 __attribute__((nonnull(1,3,4))); 148static int help_sort_by_name(help_node_t *p1, help_node_t *p2); 149static int help_sort_by_score(help_node_t *p1, help_node_t *p2); 150static int help_sort_words(help_word_t *w1, help_word_t *w2); 151 152 153/* 154 * 'helpDeleteIndex()' - Delete an index, freeing all memory used. 155 */ 156 157void 158helpDeleteIndex(help_index_t *hi) /* I - Help index */ 159{ 160 help_node_t *node; /* Current node */ 161 162 163 DEBUG_printf(("helpDeleteIndex(hi=%p)", hi)); 164 165 if (!hi) 166 return; 167 168 for (node = (help_node_t *)cupsArrayFirst(hi->nodes); 169 node; 170 node = (help_node_t *)cupsArrayNext(hi->nodes)) 171 { 172 if (!hi->search) 173 help_delete_node(node); 174 } 175 176 cupsArrayDelete(hi->nodes); 177 cupsArrayDelete(hi->sorted); 178 179 free(hi); 180} 181 182 183/* 184 * 'helpFindNode()' - Find a node in an index. 185 */ 186 187help_node_t * /* O - Node pointer or NULL */ 188helpFindNode(help_index_t *hi, /* I - Index */ 189 const char *filename, /* I - Filename */ 190 const char *anchor) /* I - Anchor */ 191{ 192 help_node_t key; /* Search key */ 193 194 195 DEBUG_printf(("helpFindNode(hi=%p, filename=\"%s\", anchor=\"%s\")", 196 hi, filename, anchor)); 197 198 /* 199 * Range check input... 200 */ 201 202 if (!hi || !filename) 203 return (NULL); 204 205 /* 206 * Initialize the search key... 207 */ 208 209 key.filename = (char *)filename; 210 key.anchor = (char *)anchor; 211 212 /* 213 * Return any match... 214 */ 215 216 return ((help_node_t *)cupsArrayFind(hi->nodes, &key)); 217} 218 219 220/* 221 * 'helpLoadIndex()' - Load a help index from disk. 222 */ 223 224help_index_t * /* O - Index pointer or NULL */ 225helpLoadIndex(const char *hifile, /* I - Index filename */ 226 const char *directory) /* I - Directory that is indexed */ 227{ 228 help_index_t *hi; /* Help index */ 229 cups_file_t *fp; /* Current file */ 230 char line[2048], /* Line from file */ 231 *ptr, /* Pointer into line */ 232 *filename, /* Filename in line */ 233 *anchor, /* Anchor in line */ 234 *sectptr, /* Section pointer in line */ 235 section[1024], /* Section name */ 236 *text; /* Text in line */ 237 time_t mtime; /* Modification time */ 238 off_t offset; /* Offset into file */ 239 size_t length; /* Length in bytes */ 240 int update; /* Update? */ 241 help_node_t *node; /* Current node */ 242 help_word_t *word; /* Current word */ 243 244 245 DEBUG_printf(("helpLoadIndex(hifile=\"%s\", directory=\"%s\")", 246 hifile, directory)); 247 248 /* 249 * Create a new, empty index. 250 */ 251 252 if ((hi = (help_index_t *)calloc(1, sizeof(help_index_t))) == NULL) 253 return (NULL); 254 255 hi->nodes = cupsArrayNew((cups_array_func_t)help_sort_by_name, NULL); 256 hi->sorted = cupsArrayNew((cups_array_func_t)help_sort_by_score, NULL); 257 258 if (!hi->nodes || !hi->sorted) 259 { 260 cupsArrayDelete(hi->nodes); 261 cupsArrayDelete(hi->sorted); 262 free(hi); 263 return (NULL); 264 } 265 266 /* 267 * Try loading the existing index file... 268 */ 269 270 if ((fp = cupsFileOpen(hifile, "r")) != NULL) 271 { 272 /* 273 * Lock the file and then read the first line... 274 */ 275 276 cupsFileLock(fp, 1); 277 278 if (cupsFileGets(fp, line, sizeof(line)) && !strcmp(line, "HELPV2")) 279 { 280 /* 281 * Got a valid header line, now read the data lines... 282 */ 283 284 node = NULL; 285 286 while (cupsFileGets(fp, line, sizeof(line))) 287 { 288 /* 289 * Each line looks like one of the following: 290 * 291 * filename mtime offset length "section" "text" 292 * filename#anchor offset length "text" 293 * SP count word 294 */ 295 296 if (line[0] == ' ') 297 { 298 /* 299 * Read a word in the current node... 300 */ 301 302 if (!node || (ptr = strrchr(line, ' ')) == NULL) 303 continue; 304 305 if ((word = help_add_word(node, ptr + 1)) != NULL) 306 word->count = atoi(line + 1); 307 } 308 else 309 { 310 /* 311 * Add a node... 312 */ 313 314 filename = line; 315 316 if ((ptr = strchr(line, ' ')) == NULL) 317 break; 318 319 while (isspace(*ptr & 255)) 320 *ptr++ = '\0'; 321 322 if ((anchor = strrchr(filename, '#')) != NULL) 323 { 324 *anchor++ = '\0'; 325 mtime = 0; 326 } 327 else 328 mtime = strtol(ptr, &ptr, 10); 329 330 offset = strtoll(ptr, &ptr, 10); 331 length = (size_t)strtoll(ptr, &ptr, 10); 332 333 while (isspace(*ptr & 255)) 334 ptr ++; 335 336 if (!anchor) 337 { 338 /* 339 * Get section... 340 */ 341 342 if (*ptr != '\"') 343 break; 344 345 ptr ++; 346 sectptr = ptr; 347 348 while (*ptr && *ptr != '\"') 349 ptr ++; 350 351 if (*ptr != '\"') 352 break; 353 354 *ptr++ = '\0'; 355 356 strlcpy(section, sectptr, sizeof(section)); 357 358 while (isspace(*ptr & 255)) 359 ptr ++; 360 } 361 362 if (*ptr != '\"') 363 break; 364 365 ptr ++; 366 text = ptr; 367 368 while (*ptr && *ptr != '\"') 369 ptr ++; 370 371 if (*ptr != '\"') 372 break; 373 374 *ptr++ = '\0'; 375 376 if ((node = help_new_node(filename, anchor, section, text, 377 mtime, offset, length)) == NULL) 378 break; 379 380 node->score = -1; 381 382 cupsArrayAdd(hi->nodes, node); 383 } 384 } 385 } 386 387 cupsFileClose(fp); 388 } 389 390 /* 391 * Scan for new/updated files... 392 */ 393 394 update = help_load_directory(hi, directory, NULL); 395 396 /* 397 * Remove any files that are no longer installed... 398 */ 399 400 for (node = (help_node_t *)cupsArrayFirst(hi->nodes); 401 node; 402 node = (help_node_t *)cupsArrayNext(hi->nodes)) 403 if (node->score < 0) 404 { 405 /* 406 * Delete this node... 407 */ 408 409 cupsArrayRemove(hi->nodes, node); 410 help_delete_node(node); 411 } 412 413 /* 414 * Add nodes to the sorted array... 415 */ 416 417 for (node = (help_node_t *)cupsArrayFirst(hi->nodes); 418 node; 419 node = (help_node_t *)cupsArrayNext(hi->nodes)) 420 cupsArrayAdd(hi->sorted, node); 421 422 /* 423 * Save the index if we updated it... 424 */ 425 426 if (update) 427 helpSaveIndex(hi, hifile); 428 429 /* 430 * Return the index... 431 */ 432 433 return (hi); 434} 435 436 437/* 438 * 'helpSaveIndex()' - Save a help index to disk. 439 */ 440 441int /* O - 0 on success, -1 on error */ 442helpSaveIndex(help_index_t *hi, /* I - Index */ 443 const char *hifile) /* I - Index filename */ 444{ 445 cups_file_t *fp; /* Index file */ 446 help_node_t *node; /* Current node */ 447 help_word_t *word; /* Current word */ 448 449 450 DEBUG_printf(("helpSaveIndex(hi=%p, hifile=\"%s\")", hi, hifile)); 451 452 /* 453 * Try creating a new index file... 454 */ 455 456 if ((fp = cupsFileOpen(hifile, "w9")) == NULL) 457 return (-1); 458 459 /* 460 * Lock the file while we write it... 461 */ 462 463 cupsFileLock(fp, 1); 464 465 cupsFilePuts(fp, "HELPV2\n"); 466 467 for (node = (help_node_t *)cupsArrayFirst(hi->nodes); 468 node; 469 node = (help_node_t *)cupsArrayNext(hi->nodes)) 470 { 471 /* 472 * Write the current node with/without the anchor... 473 */ 474 475 if (node->anchor) 476 { 477 if (cupsFilePrintf(fp, "%s#%s " CUPS_LLFMT " " CUPS_LLFMT " \"%s\"\n", 478 node->filename, node->anchor, 479 CUPS_LLCAST node->offset, CUPS_LLCAST node->length, 480 node->text) < 0) 481 break; 482 } 483 else 484 { 485 if (cupsFilePrintf(fp, "%s %d " CUPS_LLFMT " " CUPS_LLFMT " \"%s\" \"%s\"\n", 486 node->filename, (int)node->mtime, 487 CUPS_LLCAST node->offset, CUPS_LLCAST node->length, 488 node->section ? node->section : "", node->text) < 0) 489 break; 490 } 491 492 /* 493 * Then write the words associated with the node... 494 */ 495 496 for (word = (help_word_t *)cupsArrayFirst(node->words); 497 word; 498 word = (help_word_t *)cupsArrayNext(node->words)) 499 if (cupsFilePrintf(fp, " %d %s\n", word->count, word->text) < 0) 500 break; 501 } 502 503 cupsFileFlush(fp); 504 505 if (cupsFileClose(fp) < 0) 506 return (-1); 507 else if (node) 508 return (-1); 509 else 510 return (0); 511} 512 513 514/* 515 * 'helpSearchIndex()' - Search an index. 516 */ 517 518help_index_t * /* O - Search index */ 519helpSearchIndex(help_index_t *hi, /* I - Index */ 520 const char *query, /* I - Query string */ 521 const char *section, /* I - Limit search to this section */ 522 const char *filename) /* I - Limit search to this file */ 523{ 524 help_index_t *search; /* Search index */ 525 help_node_t *node; /* Current node */ 526 help_word_t *word; /* Current word */ 527 void *sc; /* Search context */ 528 int matches; /* Number of matches */ 529 530 531 DEBUG_printf(("helpSearchIndex(hi=%p, query=\"%s\", filename=\"%s\")", 532 hi, query, filename)); 533 534 /* 535 * Range check... 536 */ 537 538 if (!hi || !query) 539 return (NULL); 540 541 /* 542 * Reset the scores of all nodes to 0... 543 */ 544 545 for (node = (help_node_t *)cupsArrayFirst(hi->nodes); 546 node; 547 node = (help_node_t *)cupsArrayNext(hi->nodes)) 548 node->score = 0; 549 550 /* 551 * Find the first node to search in... 552 */ 553 554 if (filename) 555 { 556 node = helpFindNode(hi, filename, NULL); 557 if (!node) 558 return (NULL); 559 } 560 else 561 node = (help_node_t *)cupsArrayFirst(hi->nodes); 562 563 /* 564 * Convert the query into a regular expression... 565 */ 566 567 sc = cgiCompileSearch(query); 568 if (!sc) 569 return (NULL); 570 571 /* 572 * Allocate a search index... 573 */ 574 575 search = calloc(1, sizeof(help_index_t)); 576 if (!search) 577 { 578 cgiFreeSearch(sc); 579 return (NULL); 580 } 581 582 search->nodes = cupsArrayNew((cups_array_func_t)help_sort_by_name, NULL); 583 search->sorted = cupsArrayNew((cups_array_func_t)help_sort_by_score, NULL); 584 585 if (!search->nodes || !search->sorted) 586 { 587 cupsArrayDelete(search->nodes); 588 cupsArrayDelete(search->sorted); 589 free(search); 590 cgiFreeSearch(sc); 591 return (NULL); 592 } 593 594 search->search = 1; 595 596 /* 597 * Check each node in the index, adding matching nodes to the 598 * search index... 599 */ 600 601 for (; node; node = (help_node_t *)cupsArrayNext(hi->nodes)) 602 if (section && strcmp(node->section, section)) 603 continue; 604 else if (filename && strcmp(node->filename, filename)) 605 continue; 606 else 607 { 608 matches = cgiDoSearch(sc, node->text); 609 610 for (word = (help_word_t *)cupsArrayFirst(node->words); 611 word; 612 word = (help_word_t *)cupsArrayNext(node->words)) 613 if (cgiDoSearch(sc, word->text) > 0) 614 matches += word->count; 615 616 if (matches > 0) 617 { 618 /* 619 * Found a match, add the node to the search index... 620 */ 621 622 node->score = matches; 623 624 cupsArrayAdd(search->nodes, node); 625 cupsArrayAdd(search->sorted, node); 626 } 627 } 628 629 /* 630 * Free the search context... 631 */ 632 633 cgiFreeSearch(sc); 634 635 /* 636 * Return the results... 637 */ 638 639 return (search); 640} 641 642 643/* 644 * 'help_add_word()' - Add a word to a node. 645 */ 646 647static help_word_t * /* O - New word */ 648help_add_word(help_node_t *n, /* I - Node */ 649 const char *text) /* I - Word text */ 650{ 651 help_word_t *w, /* New word */ 652 key; /* Search key */ 653 654 655 DEBUG_printf(("2help_add_word(n=%p, text=\"%s\")", n, text)); 656 657 /* 658 * Create the words array as needed... 659 */ 660 661 if (!n->words) 662 n->words = cupsArrayNew((cups_array_func_t)help_sort_words, NULL); 663 664 /* 665 * See if the word is already added... 666 */ 667 668 key.text = (char *)text; 669 670 if ((w = (help_word_t *)cupsArrayFind(n->words, &key)) == NULL) 671 { 672 /* 673 * Create a new word... 674 */ 675 676 if ((w = calloc(1, sizeof(help_word_t))) == NULL) 677 return (NULL); 678 679 if ((w->text = strdup(text)) == NULL) 680 { 681 free(w); 682 return (NULL); 683 } 684 685 cupsArrayAdd(n->words, w); 686 } 687 688 /* 689 * Bump the counter for this word and return it... 690 */ 691 692 w->count ++; 693 694 return (w); 695} 696 697 698/* 699 * 'help_delete_node()' - Free all memory used by a node. 700 */ 701 702static void 703help_delete_node(help_node_t *n) /* I - Node */ 704{ 705 help_word_t *w; /* Current word */ 706 707 708 DEBUG_printf(("2help_delete_node(n=%p)", n)); 709 710 if (!n) 711 return; 712 713 if (n->filename) 714 free(n->filename); 715 716 if (n->anchor) 717 free(n->anchor); 718 719 if (n->section) 720 free(n->section); 721 722 if (n->text) 723 free(n->text); 724 725 for (w = (help_word_t *)cupsArrayFirst(n->words); 726 w; 727 w = (help_word_t *)cupsArrayNext(n->words)) 728 help_delete_word(w); 729 730 cupsArrayDelete(n->words); 731 732 free(n); 733} 734 735 736/* 737 * 'help_delete_word()' - Free all memory used by a word. 738 */ 739 740static void 741help_delete_word(help_word_t *w) /* I - Word */ 742{ 743 DEBUG_printf(("2help_delete_word(w=%p)", w)); 744 745 if (!w) 746 return; 747 748 if (w->text) 749 free(w->text); 750 751 free(w); 752} 753 754 755/* 756 * 'help_load_directory()' - Load a directory of files into an index. 757 */ 758 759static int /* O - 0 = success, -1 = error, 1 = updated */ 760help_load_directory( 761 help_index_t *hi, /* I - Index */ 762 const char *directory, /* I - Directory */ 763 const char *relative) /* I - Relative path */ 764{ 765 cups_dir_t *dir; /* Directory file */ 766 cups_dentry_t *dent; /* Directory entry */ 767 char *ext, /* Pointer to extension */ 768 filename[1024], /* Full filename */ 769 relname[1024]; /* Relative filename */ 770 int update; /* Updated? */ 771 help_node_t *node; /* Current node */ 772 773 774 DEBUG_printf(("2help_load_directory(hi=%p, directory=\"%s\", relative=\"%s\")", 775 hi, directory, relative)); 776 777 /* 778 * Open the directory and scan it... 779 */ 780 781 if ((dir = cupsDirOpen(directory)) == NULL) 782 return (0); 783 784 update = 0; 785 786 while ((dent = cupsDirRead(dir)) != NULL) 787 { 788 /* 789 * Skip "." files... 790 */ 791 792 if (dent->filename[0] == '.') 793 continue; 794 795 /* 796 * Get absolute and relative filenames... 797 */ 798 799 snprintf(filename, sizeof(filename), "%s/%s", directory, dent->filename); 800 if (relative) 801 snprintf(relname, sizeof(relname), "%s/%s", relative, dent->filename); 802 else 803 strlcpy(relname, dent->filename, sizeof(relname)); 804 805 /* 806 * Check if we have a HTML file... 807 */ 808 809 if ((ext = strstr(dent->filename, ".html")) != NULL && 810 (!ext[5] || !strcmp(ext + 5, ".gz"))) 811 { 812 /* 813 * HTML file, see if we have already indexed the file... 814 */ 815 816 if ((node = helpFindNode(hi, relname, NULL)) != NULL) 817 { 818 /* 819 * File already indexed - check dates to confirm that the 820 * index is up-to-date... 821 */ 822 823 if (node->mtime == dent->fileinfo.st_mtime) 824 { 825 /* 826 * Same modification time, so mark all of the nodes 827 * for this file as up-to-date... 828 */ 829 830 for (; node; node = (help_node_t *)cupsArrayNext(hi->nodes)) 831 if (!strcmp(node->filename, relname)) 832 node->score = 0; 833 else 834 break; 835 836 continue; 837 } 838 } 839 840 update = 1; 841 842 help_load_file(hi, filename, relname, dent->fileinfo.st_mtime); 843 } 844 else if (S_ISDIR(dent->fileinfo.st_mode)) 845 { 846 /* 847 * Process sub-directory... 848 */ 849 850 if (help_load_directory(hi, filename, relname) == 1) 851 update = 1; 852 } 853 } 854 855 cupsDirClose(dir); 856 857 return (update); 858} 859 860 861/* 862 * 'help_load_file()' - Load a HTML files into an index. 863 */ 864 865static int /* O - 0 = success, -1 = error */ 866help_load_file( 867 help_index_t *hi, /* I - Index */ 868 const char *filename, /* I - Filename */ 869 const char *relative, /* I - Relative path */ 870 time_t mtime) /* I - Modification time */ 871{ 872 cups_file_t *fp; /* HTML file */ 873 help_node_t *node; /* Current node */ 874 char line[1024], /* Line from file */ 875 temp[1024], /* Temporary word */ 876 section[1024], /* Section */ 877 *ptr, /* Pointer into line */ 878 *anchor, /* Anchor name */ 879 *text; /* Text for anchor */ 880 off_t offset; /* File offset */ 881 char quote; /* Quote character */ 882 help_word_t *word; /* Current word */ 883 int wordlen; /* Length of word */ 884 885 886 DEBUG_printf(("2help_load_file(hi=%p, filename=\"%s\", relative=\"%s\", " 887 "mtime=%ld)", hi, filename, relative, mtime)); 888 889 if ((fp = cupsFileOpen(filename, "r")) == NULL) 890 return (-1); 891 892 node = NULL; 893 offset = 0; 894 895 strlcpy(section, "Other", sizeof(section)); 896 897 while (cupsFileGets(fp, line, sizeof(line))) 898 { 899 /* 900 * Look for "<TITLE>", "<A NAME", or "<!-- SECTION:" prefix... 901 */ 902 903 if (!_cups_strncasecmp(line, "<!-- SECTION:", 13)) 904 { 905 /* 906 * Got section line, copy it! 907 */ 908 909 for (ptr = line + 13; isspace(*ptr & 255); ptr ++); 910 911 strlcpy(section, ptr, sizeof(section)); 912 if ((ptr = strstr(section, "-->")) != NULL) 913 { 914 /* 915 * Strip comment stuff from end of line... 916 */ 917 918 for (*ptr-- = '\0'; ptr > line && isspace(*ptr & 255); *ptr-- = '\0'); 919 920 if (isspace(*ptr & 255)) 921 *ptr = '\0'; 922 } 923 continue; 924 } 925 926 for (ptr = line; (ptr = strchr(ptr, '<')) != NULL;) 927 { 928 ptr ++; 929 930 if (!_cups_strncasecmp(ptr, "TITLE>", 6)) 931 { 932 /* 933 * Found the title... 934 */ 935 936 anchor = NULL; 937 ptr += 6; 938 } 939 else if (!_cups_strncasecmp(ptr, "A NAME=", 7)) 940 { 941 /* 942 * Found an anchor... 943 */ 944 945 ptr += 7; 946 947 if (*ptr == '\"' || *ptr == '\'') 948 { 949 /* 950 * Get quoted anchor... 951 */ 952 953 quote = *ptr; 954 anchor = ptr + 1; 955 if ((ptr = strchr(anchor, quote)) != NULL) 956 *ptr++ = '\0'; 957 else 958 break; 959 } 960 else 961 { 962 /* 963 * Get unquoted anchor... 964 */ 965 966 anchor = ptr + 1; 967 968 for (ptr = anchor; *ptr && *ptr != '>' && !isspace(*ptr & 255); ptr ++); 969 970 if (*ptr) 971 *ptr++ = '\0'; 972 else 973 break; 974 } 975 976 /* 977 * Got the anchor, now lets find the end... 978 */ 979 980 while (*ptr && *ptr != '>') 981 ptr ++; 982 983 if (*ptr != '>') 984 break; 985 986 ptr ++; 987 } 988 else 989 continue; 990 991 /* 992 * Now collect text for the link... 993 */ 994 995 text = ptr; 996 while ((ptr = strchr(text, '<')) == NULL) 997 { 998 ptr = text + strlen(text); 999 if (ptr >= (line + sizeof(line) - 2)) 1000 break; 1001 1002 *ptr++ = ' '; 1003 1004 if (!cupsFileGets(fp, ptr, sizeof(line) - (size_t)(ptr - line) - 1)) 1005 break; 1006 } 1007 1008 *ptr = '\0'; 1009 1010 if (node) 1011 node->length = (size_t)(offset - node->offset); 1012 1013 if (!*text) 1014 { 1015 node = NULL; 1016 break; 1017 } 1018 1019 if ((node = helpFindNode(hi, relative, anchor)) != NULL) 1020 { 1021 /* 1022 * Node already in the index, so replace the text and other 1023 * data... 1024 */ 1025 1026 cupsArrayRemove(hi->nodes, node); 1027 1028 if (node->section) 1029 free(node->section); 1030 1031 if (node->text) 1032 free(node->text); 1033 1034 if (node->words) 1035 { 1036 for (word = (help_word_t *)cupsArrayFirst(node->words); 1037 word; 1038 word = (help_word_t *)cupsArrayNext(node->words)) 1039 help_delete_word(word); 1040 1041 cupsArrayDelete(node->words); 1042 node->words = NULL; 1043 } 1044 1045 node->section = section[0] ? strdup(section) : NULL; 1046 node->text = strdup(text); 1047 node->mtime = mtime; 1048 node->offset = offset; 1049 node->score = 0; 1050 } 1051 else 1052 { 1053 /* 1054 * New node... 1055 */ 1056 1057 node = help_new_node(relative, anchor, section, text, mtime, offset, 0); 1058 } 1059 1060 /* 1061 * Go through the text value and replace tabs and newlines with 1062 * whitespace and eliminate extra whitespace... 1063 */ 1064 1065 for (ptr = node->text, text = node->text; *ptr;) 1066 if (isspace(*ptr & 255)) 1067 { 1068 while (isspace(*ptr & 255)) 1069 ptr ++; 1070 1071 *text++ = ' '; 1072 } 1073 else if (text != ptr) 1074 *text++ = *ptr++; 1075 else 1076 { 1077 text ++; 1078 ptr ++; 1079 } 1080 1081 *text = '\0'; 1082 1083 /* 1084 * (Re)add the node to the array... 1085 */ 1086 1087 cupsArrayAdd(hi->nodes, node); 1088 1089 if (!anchor) 1090 node = NULL; 1091 break; 1092 } 1093 1094 if (node) 1095 { 1096 /* 1097 * Scan this line for words... 1098 */ 1099 1100 for (ptr = line; *ptr; ptr ++) 1101 { 1102 /* 1103 * Skip HTML stuff... 1104 */ 1105 1106 if (*ptr == '<') 1107 { 1108 if (!strncmp(ptr, "<!--", 4)) 1109 { 1110 /* 1111 * Skip HTML comment... 1112 */ 1113 1114 if ((text = strstr(ptr + 4, "-->")) == NULL) 1115 ptr += strlen(ptr) - 1; 1116 else 1117 ptr = text + 2; 1118 } 1119 else 1120 { 1121 /* 1122 * Skip HTML element... 1123 */ 1124 1125 for (ptr ++; *ptr && *ptr != '>'; ptr ++) 1126 { 1127 if (*ptr == '\"' || *ptr == '\'') 1128 { 1129 for (quote = *ptr++; *ptr && *ptr != quote; ptr ++); 1130 1131 if (!*ptr) 1132 ptr --; 1133 } 1134 } 1135 1136 if (!*ptr) 1137 ptr --; 1138 } 1139 1140 continue; 1141 } 1142 else if (*ptr == '&') 1143 { 1144 /* 1145 * Skip HTML entity... 1146 */ 1147 1148 for (ptr ++; *ptr && *ptr != ';'; ptr ++); 1149 1150 if (!*ptr) 1151 ptr --; 1152 1153 continue; 1154 } 1155 else if (!isalnum(*ptr & 255)) 1156 continue; 1157 1158 /* 1159 * Found the start of a word, search until we find the end... 1160 */ 1161 1162 for (text = ptr, ptr ++; *ptr && isalnum(*ptr & 255); ptr ++); 1163 1164 wordlen = (int)(ptr - text); 1165 1166 memcpy(temp, text, (size_t)wordlen); 1167 temp[wordlen] = '\0'; 1168 1169 ptr --; 1170 1171 if (wordlen > 1 && !bsearch(temp, help_common_words, 1172 (sizeof(help_common_words) / 1173 sizeof(help_common_words[0])), 1174 sizeof(help_common_words[0]), 1175 (int (*)(const void *, const void *)) 1176 _cups_strcasecmp)) 1177 help_add_word(node, temp); 1178 } 1179 } 1180 1181 /* 1182 * Get the offset of the next line... 1183 */ 1184 1185 offset = cupsFileTell(fp); 1186 } 1187 1188 cupsFileClose(fp); 1189 1190 if (node) 1191 node->length = (size_t)(offset - node->offset); 1192 1193 return (0); 1194} 1195 1196 1197/* 1198 * 'help_new_node()' - Create a new node and add it to an index. 1199 */ 1200 1201static help_node_t * /* O - Node pointer or NULL on error */ 1202help_new_node(const char *filename, /* I - Filename */ 1203 const char *anchor, /* I - Anchor */ 1204 const char *section, /* I - Section */ 1205 const char *text, /* I - Text */ 1206 time_t mtime, /* I - Modification time */ 1207 off_t offset, /* I - Offset in file */ 1208 size_t length) /* I - Length in bytes */ 1209{ 1210 help_node_t *n; /* Node */ 1211 1212 1213 DEBUG_printf(("2help_new_node(filename=\"%s\", anchor=\"%s\", text=\"%s\", " 1214 "mtime=%ld, offset=%ld, length=%ld)", filename, anchor, text, 1215 (long)mtime, (long)offset, (long)length)); 1216 1217 n = (help_node_t *)calloc(1, sizeof(help_node_t)); 1218 if (!n) 1219 return (NULL); 1220 1221 n->filename = strdup(filename); 1222 n->anchor = anchor ? strdup(anchor) : NULL; 1223 n->section = (section && *section) ? strdup(section) : NULL; 1224 n->text = strdup(text); 1225 n->mtime = mtime; 1226 n->offset = offset; 1227 n->length = length; 1228 1229 return (n); 1230} 1231 1232 1233/* 1234 * 'help_sort_nodes_by_name()' - Sort nodes by section, filename, and anchor. 1235 */ 1236 1237static int /* O - Difference */ 1238help_sort_by_name(help_node_t *n1, /* I - First node */ 1239 help_node_t *n2) /* I - Second node */ 1240{ 1241 int diff; /* Difference */ 1242 1243 1244 DEBUG_printf(("2help_sort_by_name(n1=%p(%s#%s), n2=%p(%s#%s)", 1245 n1, n1->filename, n1->anchor, 1246 n2, n2->filename, n2->anchor)); 1247 1248 if ((diff = strcmp(n1->filename, n2->filename)) != 0) 1249 return (diff); 1250 1251 if (!n1->anchor && !n2->anchor) 1252 return (0); 1253 else if (!n1->anchor) 1254 return (-1); 1255 else if (!n2->anchor) 1256 return (1); 1257 else 1258 return (strcmp(n1->anchor, n2->anchor)); 1259} 1260 1261 1262/* 1263 * 'help_sort_nodes_by_score()' - Sort nodes by score and text. 1264 */ 1265 1266static int /* O - Difference */ 1267help_sort_by_score(help_node_t *n1, /* I - First node */ 1268 help_node_t *n2) /* I - Second node */ 1269{ 1270 int diff; /* Difference */ 1271 1272 1273 DEBUG_printf(("2help_sort_by_score(n1=%p(%d \"%s\" \"%s\"), " 1274 "n2=%p(%d \"%s\" \"%s\")", 1275 n1, n1->score, n1->section, n1->text, 1276 n2, n2->score, n2->section, n2->text)); 1277 1278 if (n1->score != n2->score) 1279 return (n2->score - n1->score); 1280 1281 if (n1->section && !n2->section) 1282 return (1); 1283 else if (!n1->section && n2->section) 1284 return (-1); 1285 else if (n1->section && n2->section && 1286 (diff = strcmp(n1->section, n2->section)) != 0) 1287 return (diff); 1288 1289 return (_cups_strcasecmp(n1->text, n2->text)); 1290} 1291 1292 1293/* 1294 * 'help_sort_words()' - Sort words alphabetically. 1295 */ 1296 1297static int /* O - Difference */ 1298help_sort_words(help_word_t *w1, /* I - Second word */ 1299 help_word_t *w2) /* I - Second word */ 1300{ 1301 DEBUG_printf(("2help_sort_words(w1=%p(\"%s\"), w2=%p(\"%s\"))", 1302 w1, w1->text, w2, w2->text)); 1303 1304 return (_cups_strcasecmp(w1->text, w2->text)); 1305} 1306 1307 1308/* 1309 * End of "$Id: help-index.c 11645 2014-02-27 16:35:53Z msweet $". 1310 */ 1311