1/*
2 * "$Id: help-index.c 11645 2014-02-27 16:35:53Z msweet $"
3 *
4 * Online help index routines for CUPS.
5 *
6 * Copyright 2007-2014 by Apple Inc.
7 * Copyright 1997-2007 by Easy Software Products.
8 *
9 * These coded instructions, statements, and computer programs are the
10 * property of Apple Inc. and are protected by Federal copyright
11 * law.  Distribution and use rights are outlined in the file "LICENSE.txt"
12 * which should have been included with this file.  If this file is
13 * file is missing or damaged, see the license at "http://www.cups.org/".
14 */
15
16/*
17 * Include necessary headers...
18 */
19
20#include "cgi-private.h"
21#include <cups/dir.h>
22
23
24/*
25 * List of common English words that should not be indexed...
26 */
27
28static char		help_common_words[][6] =
29			{
30			  "about",
31			  "all",
32			  "an",
33			  "and",
34			  "are",
35			  "as",
36			  "at",
37			  "be",
38			  "been",
39			  "but",
40			  "by",
41			  "call",
42			  "can",
43			  "come",
44			  "could",
45			  "day",
46			  "did",
47			  "do",
48			  "down",
49			  "each",
50			  "find",
51			  "first",
52			  "for",
53			  "from",
54			  "go",
55			  "had",
56			  "has",
57			  "have",
58			  "he",
59			  "her",
60			  "him",
61			  "his",
62			  "hot",
63			  "how",
64			  "if",
65			  "in",
66			  "is",
67			  "it",
68			  "know",
69			  "like",
70			  "long",
71			  "look",
72			  "make",
73			  "many",
74			  "may",
75			  "more",
76			  "most",
77			  "my",
78			  "no",
79			  "now",
80			  "of",
81			  "on",
82			  "one",
83			  "or",
84			  "other",
85			  "out",
86			  "over",
87			  "said",
88			  "see",
89			  "she",
90			  "side",
91			  "so",
92			  "some",
93			  "sound",
94			  "than",
95			  "that",
96			  "the",
97			  "their",
98			  "them",
99			  "then",
100			  "there",
101			  "these",
102			  "they",
103			  "thing",
104			  "this",
105			  "time",
106			  "to",
107			  "two",
108			  "up",
109			  "use",
110			  "was",
111			  "water",
112			  "way",
113			  "we",
114			  "were",
115			  "what",
116			  "when",
117			  "which",
118			  "who",
119			  "will",
120			  "with",
121			  "word",
122			  "would",
123			  "write",
124			  "you",
125			  "your"
126			};
127
128
129/*
130 * Local functions...
131 */
132
133static help_word_t	*help_add_word(help_node_t *n, const char *text);
134static void		help_delete_node(help_node_t *n);
135static void		help_delete_word(help_word_t *w);
136static int		help_load_directory(help_index_t *hi,
137			                    const char *directory,
138					    const char *relative);
139static int		help_load_file(help_index_t *hi,
140			               const char *filename,
141				       const char *relative,
142				       time_t     mtime);
143static help_node_t	*help_new_node(const char *filename, const char *anchor,
144			               const char *section, const char *text,
145				       time_t mtime, off_t offset,
146				       size_t length)
147				       __attribute__((nonnull(1,3,4)));
148static int		help_sort_by_name(help_node_t *p1, help_node_t *p2);
149static int		help_sort_by_score(help_node_t *p1, help_node_t *p2);
150static int		help_sort_words(help_word_t *w1, help_word_t *w2);
151
152
153/*
154 * 'helpDeleteIndex()' - Delete an index, freeing all memory used.
155 */
156
157void
158helpDeleteIndex(help_index_t *hi)	/* I - Help index */
159{
160  help_node_t	*node;			/* Current node */
161
162
163  DEBUG_printf(("helpDeleteIndex(hi=%p)", hi));
164
165  if (!hi)
166    return;
167
168  for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
169       node;
170       node = (help_node_t *)cupsArrayNext(hi->nodes))
171  {
172    if (!hi->search)
173      help_delete_node(node);
174  }
175
176  cupsArrayDelete(hi->nodes);
177  cupsArrayDelete(hi->sorted);
178
179  free(hi);
180}
181
182
183/*
184 * 'helpFindNode()' - Find a node in an index.
185 */
186
187help_node_t *				/* O - Node pointer or NULL */
188helpFindNode(help_index_t *hi,		/* I - Index */
189             const char   *filename,	/* I - Filename */
190             const char   *anchor)	/* I - Anchor */
191{
192  help_node_t	key;			/* Search key */
193
194
195  DEBUG_printf(("helpFindNode(hi=%p, filename=\"%s\", anchor=\"%s\")",
196                hi, filename, anchor));
197
198 /*
199  * Range check input...
200  */
201
202  if (!hi || !filename)
203    return (NULL);
204
205 /*
206  * Initialize the search key...
207  */
208
209  key.filename = (char *)filename;
210  key.anchor   = (char *)anchor;
211
212 /*
213  * Return any match...
214  */
215
216  return ((help_node_t *)cupsArrayFind(hi->nodes, &key));
217}
218
219
220/*
221 * 'helpLoadIndex()' - Load a help index from disk.
222 */
223
224help_index_t *				/* O - Index pointer or NULL */
225helpLoadIndex(const char *hifile,	/* I - Index filename */
226              const char *directory)	/* I - Directory that is indexed */
227{
228  help_index_t	*hi;			/* Help index */
229  cups_file_t	*fp;			/* Current file */
230  char		line[2048],		/* Line from file */
231		*ptr,			/* Pointer into line */
232		*filename,		/* Filename in line */
233		*anchor,		/* Anchor in line */
234		*sectptr,		/* Section pointer in line */
235		section[1024],		/* Section name */
236		*text;			/* Text in line */
237  time_t	mtime;			/* Modification time */
238  off_t		offset;			/* Offset into file */
239  size_t	length;			/* Length in bytes */
240  int		update;			/* Update? */
241  help_node_t	*node;			/* Current node */
242  help_word_t	*word;			/* Current word */
243
244
245  DEBUG_printf(("helpLoadIndex(hifile=\"%s\", directory=\"%s\")",
246                hifile, directory));
247
248 /*
249  * Create a new, empty index.
250  */
251
252  if ((hi = (help_index_t *)calloc(1, sizeof(help_index_t))) == NULL)
253    return (NULL);
254
255  hi->nodes  = cupsArrayNew((cups_array_func_t)help_sort_by_name, NULL);
256  hi->sorted = cupsArrayNew((cups_array_func_t)help_sort_by_score, NULL);
257
258  if (!hi->nodes || !hi->sorted)
259  {
260    cupsArrayDelete(hi->nodes);
261    cupsArrayDelete(hi->sorted);
262    free(hi);
263    return (NULL);
264  }
265
266 /*
267  * Try loading the existing index file...
268  */
269
270  if ((fp = cupsFileOpen(hifile, "r")) != NULL)
271  {
272   /*
273    * Lock the file and then read the first line...
274    */
275
276    cupsFileLock(fp, 1);
277
278    if (cupsFileGets(fp, line, sizeof(line)) && !strcmp(line, "HELPV2"))
279    {
280     /*
281      * Got a valid header line, now read the data lines...
282      */
283
284      node = NULL;
285
286      while (cupsFileGets(fp, line, sizeof(line)))
287      {
288       /*
289	* Each line looks like one of the following:
290	*
291	*     filename mtime offset length "section" "text"
292	*     filename#anchor offset length "text"
293	*     SP count word
294	*/
295
296        if (line[0] == ' ')
297	{
298	 /*
299	  * Read a word in the current node...
300	  */
301
302          if (!node || (ptr = strrchr(line, ' ')) == NULL)
303	    continue;
304
305          if ((word = help_add_word(node, ptr + 1)) != NULL)
306	    word->count = atoi(line + 1);
307        }
308	else
309	{
310	 /*
311	  * Add a node...
312	  */
313
314	  filename = line;
315
316	  if ((ptr = strchr(line, ' ')) == NULL)
317            break;
318
319	  while (isspace(*ptr & 255))
320            *ptr++ = '\0';
321
322	  if ((anchor = strrchr(filename, '#')) != NULL)
323	  {
324            *anchor++ = '\0';
325	    mtime = 0;
326	  }
327	  else
328	    mtime = strtol(ptr, &ptr, 10);
329
330	  offset = strtoll(ptr, &ptr, 10);
331	  length = (size_t)strtoll(ptr, &ptr, 10);
332
333	  while (isspace(*ptr & 255))
334            ptr ++;
335
336          if (!anchor)
337	  {
338	   /*
339	    * Get section...
340	    */
341
342            if (*ptr != '\"')
343	      break;
344
345            ptr ++;
346	    sectptr = ptr;
347
348            while (*ptr && *ptr != '\"')
349	      ptr ++;
350
351            if (*ptr != '\"')
352	      break;
353
354            *ptr++ = '\0';
355
356            strlcpy(section, sectptr, sizeof(section));
357
358	    while (isspace(*ptr & 255))
359              ptr ++;
360          }
361
362          if (*ptr != '\"')
363	    break;
364
365          ptr ++;
366	  text = ptr;
367
368          while (*ptr && *ptr != '\"')
369	    ptr ++;
370
371          if (*ptr != '\"')
372	    break;
373
374          *ptr++ = '\0';
375
376	  if ((node = help_new_node(filename, anchor, section, text,
377				    mtime, offset, length)) == NULL)
378            break;
379
380	  node->score = -1;
381
382	  cupsArrayAdd(hi->nodes, node);
383        }
384      }
385    }
386
387    cupsFileClose(fp);
388  }
389
390 /*
391  * Scan for new/updated files...
392  */
393
394  update = help_load_directory(hi, directory, NULL);
395
396 /*
397  * Remove any files that are no longer installed...
398  */
399
400  for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
401       node;
402       node = (help_node_t *)cupsArrayNext(hi->nodes))
403    if (node->score < 0)
404    {
405     /*
406      * Delete this node...
407      */
408
409      cupsArrayRemove(hi->nodes, node);
410      help_delete_node(node);
411    }
412
413 /*
414  * Add nodes to the sorted array...
415  */
416
417  for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
418       node;
419       node = (help_node_t *)cupsArrayNext(hi->nodes))
420    cupsArrayAdd(hi->sorted, node);
421
422 /*
423  * Save the index if we updated it...
424  */
425
426  if (update)
427    helpSaveIndex(hi, hifile);
428
429 /*
430  * Return the index...
431  */
432
433  return (hi);
434}
435
436
437/*
438 * 'helpSaveIndex()' - Save a help index to disk.
439 */
440
441int					/* O - 0 on success, -1 on error */
442helpSaveIndex(help_index_t *hi,		/* I - Index */
443              const char   *hifile)	/* I - Index filename */
444{
445  cups_file_t	*fp;			/* Index file */
446  help_node_t	*node;			/* Current node */
447  help_word_t	*word;			/* Current word */
448
449
450  DEBUG_printf(("helpSaveIndex(hi=%p, hifile=\"%s\")", hi, hifile));
451
452 /*
453  * Try creating a new index file...
454  */
455
456  if ((fp = cupsFileOpen(hifile, "w9")) == NULL)
457    return (-1);
458
459 /*
460  * Lock the file while we write it...
461  */
462
463  cupsFileLock(fp, 1);
464
465  cupsFilePuts(fp, "HELPV2\n");
466
467  for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
468       node;
469       node = (help_node_t *)cupsArrayNext(hi->nodes))
470  {
471   /*
472    * Write the current node with/without the anchor...
473    */
474
475    if (node->anchor)
476    {
477      if (cupsFilePrintf(fp, "%s#%s " CUPS_LLFMT " " CUPS_LLFMT " \"%s\"\n",
478                         node->filename, node->anchor,
479                         CUPS_LLCAST node->offset, CUPS_LLCAST node->length,
480			 node->text) < 0)
481        break;
482    }
483    else
484    {
485      if (cupsFilePrintf(fp, "%s %d " CUPS_LLFMT " " CUPS_LLFMT " \"%s\" \"%s\"\n",
486                         node->filename, (int)node->mtime,
487                         CUPS_LLCAST node->offset, CUPS_LLCAST node->length,
488			 node->section ? node->section : "", node->text) < 0)
489        break;
490    }
491
492   /*
493    * Then write the words associated with the node...
494    */
495
496    for (word = (help_word_t *)cupsArrayFirst(node->words);
497         word;
498	 word = (help_word_t *)cupsArrayNext(node->words))
499      if (cupsFilePrintf(fp, " %d %s\n", word->count, word->text) < 0)
500        break;
501  }
502
503  cupsFileFlush(fp);
504
505  if (cupsFileClose(fp) < 0)
506    return (-1);
507  else if (node)
508    return (-1);
509  else
510    return (0);
511}
512
513
514/*
515 * 'helpSearchIndex()' - Search an index.
516 */
517
518help_index_t *				/* O - Search index */
519helpSearchIndex(help_index_t *hi,	/* I - Index */
520                const char   *query,	/* I - Query string */
521		const char   *section,	/* I - Limit search to this section */
522		const char   *filename)	/* I - Limit search to this file */
523{
524  help_index_t	*search;		/* Search index */
525  help_node_t	*node;			/* Current node */
526  help_word_t	*word;			/* Current word */
527  void		*sc;			/* Search context */
528  int		matches;		/* Number of matches */
529
530
531  DEBUG_printf(("helpSearchIndex(hi=%p, query=\"%s\", filename=\"%s\")",
532                hi, query, filename));
533
534 /*
535  * Range check...
536  */
537
538  if (!hi || !query)
539    return (NULL);
540
541 /*
542  * Reset the scores of all nodes to 0...
543  */
544
545  for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
546       node;
547       node = (help_node_t *)cupsArrayNext(hi->nodes))
548    node->score = 0;
549
550 /*
551  * Find the first node to search in...
552  */
553
554  if (filename)
555  {
556    node = helpFindNode(hi, filename, NULL);
557    if (!node)
558      return (NULL);
559  }
560  else
561    node = (help_node_t *)cupsArrayFirst(hi->nodes);
562
563 /*
564  * Convert the query into a regular expression...
565  */
566
567  sc = cgiCompileSearch(query);
568  if (!sc)
569    return (NULL);
570
571 /*
572  * Allocate a search index...
573  */
574
575  search = calloc(1, sizeof(help_index_t));
576  if (!search)
577  {
578    cgiFreeSearch(sc);
579    return (NULL);
580  }
581
582  search->nodes  = cupsArrayNew((cups_array_func_t)help_sort_by_name, NULL);
583  search->sorted = cupsArrayNew((cups_array_func_t)help_sort_by_score, NULL);
584
585  if (!search->nodes || !search->sorted)
586  {
587    cupsArrayDelete(search->nodes);
588    cupsArrayDelete(search->sorted);
589    free(search);
590    cgiFreeSearch(sc);
591    return (NULL);
592  }
593
594  search->search = 1;
595
596 /*
597  * Check each node in the index, adding matching nodes to the
598  * search index...
599  */
600
601  for (; node; node = (help_node_t *)cupsArrayNext(hi->nodes))
602    if (section && strcmp(node->section, section))
603      continue;
604    else if (filename && strcmp(node->filename, filename))
605      continue;
606    else
607    {
608      matches = cgiDoSearch(sc, node->text);
609
610      for (word = (help_word_t *)cupsArrayFirst(node->words);
611           word;
612	   word = (help_word_t *)cupsArrayNext(node->words))
613        if (cgiDoSearch(sc, word->text) > 0)
614          matches += word->count;
615
616      if (matches > 0)
617      {
618       /*
619	* Found a match, add the node to the search index...
620	*/
621
622	node->score = matches;
623
624	cupsArrayAdd(search->nodes, node);
625	cupsArrayAdd(search->sorted, node);
626      }
627    }
628
629 /*
630  * Free the search context...
631  */
632
633  cgiFreeSearch(sc);
634
635 /*
636  * Return the results...
637  */
638
639  return (search);
640}
641
642
643/*
644 * 'help_add_word()' - Add a word to a node.
645 */
646
647static help_word_t *			/* O - New word */
648help_add_word(help_node_t *n,		/* I - Node */
649              const char  *text)	/* I - Word text */
650{
651  help_word_t	*w,			/* New word */
652		key;			/* Search key */
653
654
655  DEBUG_printf(("2help_add_word(n=%p, text=\"%s\")", n, text));
656
657 /*
658  * Create the words array as needed...
659  */
660
661  if (!n->words)
662    n->words = cupsArrayNew((cups_array_func_t)help_sort_words, NULL);
663
664 /*
665  * See if the word is already added...
666  */
667
668  key.text = (char *)text;
669
670  if ((w = (help_word_t *)cupsArrayFind(n->words, &key)) == NULL)
671  {
672   /*
673    * Create a new word...
674    */
675
676    if ((w = calloc(1, sizeof(help_word_t))) == NULL)
677      return (NULL);
678
679    if ((w->text = strdup(text)) == NULL)
680    {
681      free(w);
682      return (NULL);
683    }
684
685    cupsArrayAdd(n->words, w);
686  }
687
688 /*
689  * Bump the counter for this word and return it...
690  */
691
692  w->count ++;
693
694  return (w);
695}
696
697
698/*
699 * 'help_delete_node()' - Free all memory used by a node.
700 */
701
702static void
703help_delete_node(help_node_t *n)	/* I - Node */
704{
705  help_word_t	*w;			/* Current word */
706
707
708  DEBUG_printf(("2help_delete_node(n=%p)", n));
709
710  if (!n)
711    return;
712
713  if (n->filename)
714    free(n->filename);
715
716  if (n->anchor)
717    free(n->anchor);
718
719  if (n->section)
720    free(n->section);
721
722  if (n->text)
723    free(n->text);
724
725  for (w = (help_word_t *)cupsArrayFirst(n->words);
726       w;
727       w = (help_word_t *)cupsArrayNext(n->words))
728    help_delete_word(w);
729
730  cupsArrayDelete(n->words);
731
732  free(n);
733}
734
735
736/*
737 * 'help_delete_word()' - Free all memory used by a word.
738 */
739
740static void
741help_delete_word(help_word_t *w)	/* I - Word */
742{
743  DEBUG_printf(("2help_delete_word(w=%p)", w));
744
745  if (!w)
746    return;
747
748  if (w->text)
749    free(w->text);
750
751  free(w);
752}
753
754
755/*
756 * 'help_load_directory()' - Load a directory of files into an index.
757 */
758
759static int				/* O - 0 = success, -1 = error, 1 = updated */
760help_load_directory(
761    help_index_t *hi,			/* I - Index */
762    const char   *directory,		/* I - Directory */
763    const char   *relative)		/* I - Relative path */
764{
765  cups_dir_t	*dir;			/* Directory file */
766  cups_dentry_t	*dent;			/* Directory entry */
767  char		*ext,			/* Pointer to extension */
768		filename[1024],		/* Full filename */
769		relname[1024];		/* Relative filename */
770  int		update;			/* Updated? */
771  help_node_t	*node;			/* Current node */
772
773
774  DEBUG_printf(("2help_load_directory(hi=%p, directory=\"%s\", relative=\"%s\")",
775                hi, directory, relative));
776
777 /*
778  * Open the directory and scan it...
779  */
780
781  if ((dir = cupsDirOpen(directory)) == NULL)
782    return (0);
783
784  update = 0;
785
786  while ((dent = cupsDirRead(dir)) != NULL)
787  {
788   /*
789    * Skip "." files...
790    */
791
792    if (dent->filename[0] == '.')
793      continue;
794
795   /*
796    * Get absolute and relative filenames...
797    */
798
799    snprintf(filename, sizeof(filename), "%s/%s", directory, dent->filename);
800    if (relative)
801      snprintf(relname, sizeof(relname), "%s/%s", relative, dent->filename);
802    else
803      strlcpy(relname, dent->filename, sizeof(relname));
804
805   /*
806    * Check if we have a HTML file...
807    */
808
809    if ((ext = strstr(dent->filename, ".html")) != NULL &&
810        (!ext[5] || !strcmp(ext + 5, ".gz")))
811    {
812     /*
813      * HTML file, see if we have already indexed the file...
814      */
815
816      if ((node = helpFindNode(hi, relname, NULL)) != NULL)
817      {
818       /*
819        * File already indexed - check dates to confirm that the
820	* index is up-to-date...
821	*/
822
823        if (node->mtime == dent->fileinfo.st_mtime)
824	{
825	 /*
826	  * Same modification time, so mark all of the nodes
827	  * for this file as up-to-date...
828	  */
829
830          for (; node; node = (help_node_t *)cupsArrayNext(hi->nodes))
831	    if (!strcmp(node->filename, relname))
832	      node->score = 0;
833	    else
834	      break;
835
836          continue;
837	}
838      }
839
840      update = 1;
841
842      help_load_file(hi, filename, relname, dent->fileinfo.st_mtime);
843    }
844    else if (S_ISDIR(dent->fileinfo.st_mode))
845    {
846     /*
847      * Process sub-directory...
848      */
849
850      if (help_load_directory(hi, filename, relname) == 1)
851        update = 1;
852    }
853  }
854
855  cupsDirClose(dir);
856
857  return (update);
858}
859
860
861/*
862 * 'help_load_file()' - Load a HTML files into an index.
863 */
864
865static int				/* O - 0 = success, -1 = error */
866help_load_file(
867    help_index_t *hi,			/* I - Index */
868    const char   *filename,		/* I - Filename */
869    const char   *relative,		/* I - Relative path */
870    time_t       mtime)			/* I - Modification time */
871{
872  cups_file_t	*fp;			/* HTML file */
873  help_node_t	*node;			/* Current node */
874  char		line[1024],		/* Line from file */
875		temp[1024],		/* Temporary word */
876                section[1024],		/* Section */
877		*ptr,			/* Pointer into line */
878		*anchor,		/* Anchor name */
879		*text;			/* Text for anchor */
880  off_t		offset;			/* File offset */
881  char		quote;			/* Quote character */
882  help_word_t	*word;			/* Current word */
883  int		wordlen;		/* Length of word */
884
885
886  DEBUG_printf(("2help_load_file(hi=%p, filename=\"%s\", relative=\"%s\", "
887                "mtime=%ld)", hi, filename, relative, mtime));
888
889  if ((fp = cupsFileOpen(filename, "r")) == NULL)
890    return (-1);
891
892  node   = NULL;
893  offset = 0;
894
895  strlcpy(section, "Other", sizeof(section));
896
897  while (cupsFileGets(fp, line, sizeof(line)))
898  {
899   /*
900    * Look for "<TITLE>", "<A NAME", or "<!-- SECTION:" prefix...
901    */
902
903    if (!_cups_strncasecmp(line, "<!-- SECTION:", 13))
904    {
905     /*
906      * Got section line, copy it!
907      */
908
909      for (ptr = line + 13; isspace(*ptr & 255); ptr ++);
910
911      strlcpy(section, ptr, sizeof(section));
912      if ((ptr = strstr(section, "-->")) != NULL)
913      {
914       /*
915        * Strip comment stuff from end of line...
916	*/
917
918        for (*ptr-- = '\0'; ptr > line && isspace(*ptr & 255); *ptr-- = '\0');
919
920	if (isspace(*ptr & 255))
921	  *ptr = '\0';
922      }
923      continue;
924    }
925
926    for (ptr = line; (ptr = strchr(ptr, '<')) != NULL;)
927    {
928      ptr ++;
929
930      if (!_cups_strncasecmp(ptr, "TITLE>", 6))
931      {
932       /*
933        * Found the title...
934	*/
935
936	anchor = NULL;
937	ptr += 6;
938      }
939      else if (!_cups_strncasecmp(ptr, "A NAME=", 7))
940      {
941       /*
942        * Found an anchor...
943	*/
944
945        ptr += 7;
946
947	if (*ptr == '\"' || *ptr == '\'')
948	{
949	 /*
950	  * Get quoted anchor...
951	  */
952
953	  quote  = *ptr;
954          anchor = ptr + 1;
955	  if ((ptr = strchr(anchor, quote)) != NULL)
956	    *ptr++ = '\0';
957	  else
958	    break;
959	}
960	else
961	{
962	 /*
963	  * Get unquoted anchor...
964	  */
965
966          anchor = ptr + 1;
967
968	  for (ptr = anchor; *ptr && *ptr != '>' && !isspace(*ptr & 255); ptr ++);
969
970	  if (*ptr)
971	    *ptr++ = '\0';
972	  else
973	    break;
974	}
975
976       /*
977        * Got the anchor, now lets find the end...
978	*/
979
980        while (*ptr && *ptr != '>')
981	  ptr ++;
982
983        if (*ptr != '>')
984	  break;
985
986        ptr ++;
987      }
988      else
989        continue;
990
991     /*
992      * Now collect text for the link...
993      */
994
995      text = ptr;
996      while ((ptr = strchr(text, '<')) == NULL)
997      {
998	ptr = text + strlen(text);
999	if (ptr >= (line + sizeof(line) - 2))
1000	  break;
1001
1002        *ptr++ = ' ';
1003
1004        if (!cupsFileGets(fp, ptr, sizeof(line) - (size_t)(ptr - line) - 1))
1005	  break;
1006      }
1007
1008      *ptr = '\0';
1009
1010      if (node)
1011	node->length = (size_t)(offset - node->offset);
1012
1013      if (!*text)
1014      {
1015        node = NULL;
1016        break;
1017      }
1018
1019      if ((node = helpFindNode(hi, relative, anchor)) != NULL)
1020      {
1021       /*
1022	* Node already in the index, so replace the text and other
1023	* data...
1024	*/
1025
1026        cupsArrayRemove(hi->nodes, node);
1027
1028        if (node->section)
1029	  free(node->section);
1030
1031	if (node->text)
1032	  free(node->text);
1033
1034        if (node->words)
1035	{
1036	  for (word = (help_word_t *)cupsArrayFirst(node->words);
1037	       word;
1038	       word = (help_word_t *)cupsArrayNext(node->words))
1039	    help_delete_word(word);
1040
1041	  cupsArrayDelete(node->words);
1042	  node->words = NULL;
1043	}
1044
1045	node->section = section[0] ? strdup(section) : NULL;
1046	node->text    = strdup(text);
1047	node->mtime   = mtime;
1048	node->offset  = offset;
1049	node->score   = 0;
1050      }
1051      else
1052      {
1053       /*
1054	* New node...
1055	*/
1056
1057        node = help_new_node(relative, anchor, section, text, mtime, offset, 0);
1058      }
1059
1060     /*
1061      * Go through the text value and replace tabs and newlines with
1062      * whitespace and eliminate extra whitespace...
1063      */
1064
1065      for (ptr = node->text, text = node->text; *ptr;)
1066	if (isspace(*ptr & 255))
1067	{
1068	  while (isspace(*ptr & 255))
1069	    ptr ++;
1070
1071	  *text++ = ' ';
1072        }
1073	else if (text != ptr)
1074	  *text++ = *ptr++;
1075	else
1076	{
1077	  text ++;
1078	  ptr ++;
1079	}
1080
1081      *text = '\0';
1082
1083     /*
1084      * (Re)add the node to the array...
1085      */
1086
1087      cupsArrayAdd(hi->nodes, node);
1088
1089      if (!anchor)
1090        node = NULL;
1091      break;
1092    }
1093
1094    if (node)
1095    {
1096     /*
1097      * Scan this line for words...
1098      */
1099
1100      for (ptr = line; *ptr; ptr ++)
1101      {
1102       /*
1103	* Skip HTML stuff...
1104	*/
1105
1106	if (*ptr == '<')
1107	{
1108          if (!strncmp(ptr, "<!--", 4))
1109	  {
1110	   /*
1111	    * Skip HTML comment...
1112	    */
1113
1114            if ((text = strstr(ptr + 4, "-->")) == NULL)
1115	      ptr += strlen(ptr) - 1;
1116	    else
1117	      ptr = text + 2;
1118	  }
1119	  else
1120	  {
1121	   /*
1122            * Skip HTML element...
1123	    */
1124
1125            for (ptr ++; *ptr && *ptr != '>'; ptr ++)
1126	    {
1127	      if (*ptr == '\"' || *ptr == '\'')
1128	      {
1129		for (quote = *ptr++; *ptr && *ptr != quote; ptr ++);
1130
1131		if (!*ptr)
1132		  ptr --;
1133	      }
1134	    }
1135
1136	    if (!*ptr)
1137	      ptr --;
1138          }
1139
1140          continue;
1141	}
1142	else if (*ptr == '&')
1143	{
1144	 /*
1145	  * Skip HTML entity...
1146	  */
1147
1148	  for (ptr ++; *ptr && *ptr != ';'; ptr ++);
1149
1150	  if (!*ptr)
1151	    ptr --;
1152
1153	  continue;
1154	}
1155	else if (!isalnum(*ptr & 255))
1156          continue;
1157
1158       /*
1159	* Found the start of a word, search until we find the end...
1160	*/
1161
1162	for (text = ptr, ptr ++; *ptr && isalnum(*ptr & 255); ptr ++);
1163
1164	wordlen = (int)(ptr - text);
1165
1166        memcpy(temp, text, (size_t)wordlen);
1167	temp[wordlen] = '\0';
1168
1169        ptr --;
1170
1171	if (wordlen > 1 && !bsearch(temp, help_common_words,
1172	                            (sizeof(help_common_words) /
1173				     sizeof(help_common_words[0])),
1174				    sizeof(help_common_words[0]),
1175				    (int (*)(const void *, const void *))
1176				        _cups_strcasecmp))
1177          help_add_word(node, temp);
1178      }
1179    }
1180
1181   /*
1182    * Get the offset of the next line...
1183    */
1184
1185    offset = cupsFileTell(fp);
1186  }
1187
1188  cupsFileClose(fp);
1189
1190  if (node)
1191    node->length = (size_t)(offset - node->offset);
1192
1193  return (0);
1194}
1195
1196
1197/*
1198 * 'help_new_node()' - Create a new node and add it to an index.
1199 */
1200
1201static help_node_t *			/* O - Node pointer or NULL on error */
1202help_new_node(const char   *filename,	/* I - Filename */
1203              const char   *anchor,	/* I - Anchor */
1204	      const char   *section,	/* I - Section */
1205	      const char   *text,	/* I - Text */
1206	      time_t       mtime,	/* I - Modification time */
1207              off_t        offset,	/* I - Offset in file */
1208	      size_t       length)	/* I - Length in bytes */
1209{
1210  help_node_t	*n;			/* Node */
1211
1212
1213  DEBUG_printf(("2help_new_node(filename=\"%s\", anchor=\"%s\", text=\"%s\", "
1214                "mtime=%ld, offset=%ld, length=%ld)", filename, anchor, text,
1215                (long)mtime, (long)offset, (long)length));
1216
1217  n = (help_node_t *)calloc(1, sizeof(help_node_t));
1218  if (!n)
1219    return (NULL);
1220
1221  n->filename = strdup(filename);
1222  n->anchor   = anchor ? strdup(anchor) : NULL;
1223  n->section  = (section && *section) ? strdup(section) : NULL;
1224  n->text     = strdup(text);
1225  n->mtime    = mtime;
1226  n->offset   = offset;
1227  n->length   = length;
1228
1229  return (n);
1230}
1231
1232
1233/*
1234 * 'help_sort_nodes_by_name()' - Sort nodes by section, filename, and anchor.
1235 */
1236
1237static int				/* O - Difference */
1238help_sort_by_name(help_node_t *n1,	/* I - First node */
1239                  help_node_t *n2)	/* I - Second node */
1240{
1241  int		diff;			/* Difference */
1242
1243
1244  DEBUG_printf(("2help_sort_by_name(n1=%p(%s#%s), n2=%p(%s#%s)",
1245                n1, n1->filename, n1->anchor,
1246		n2, n2->filename, n2->anchor));
1247
1248  if ((diff = strcmp(n1->filename, n2->filename)) != 0)
1249    return (diff);
1250
1251  if (!n1->anchor && !n2->anchor)
1252    return (0);
1253  else if (!n1->anchor)
1254    return (-1);
1255  else if (!n2->anchor)
1256    return (1);
1257  else
1258    return (strcmp(n1->anchor, n2->anchor));
1259}
1260
1261
1262/*
1263 * 'help_sort_nodes_by_score()' - Sort nodes by score and text.
1264 */
1265
1266static int				/* O - Difference */
1267help_sort_by_score(help_node_t *n1,	/* I - First node */
1268                   help_node_t *n2)	/* I - Second node */
1269{
1270  int		diff;			/* Difference */
1271
1272
1273  DEBUG_printf(("2help_sort_by_score(n1=%p(%d \"%s\" \"%s\"), "
1274                "n2=%p(%d \"%s\" \"%s\")",
1275                n1, n1->score, n1->section, n1->text,
1276                n2, n2->score, n2->section, n2->text));
1277
1278  if (n1->score != n2->score)
1279    return (n2->score - n1->score);
1280
1281  if (n1->section && !n2->section)
1282    return (1);
1283  else if (!n1->section && n2->section)
1284    return (-1);
1285  else if (n1->section && n2->section &&
1286           (diff = strcmp(n1->section, n2->section)) != 0)
1287    return (diff);
1288
1289  return (_cups_strcasecmp(n1->text, n2->text));
1290}
1291
1292
1293/*
1294 * 'help_sort_words()' - Sort words alphabetically.
1295 */
1296
1297static int				/* O - Difference */
1298help_sort_words(help_word_t *w1,	/* I - Second word */
1299                help_word_t *w2)	/* I - Second word */
1300{
1301  DEBUG_printf(("2help_sort_words(w1=%p(\"%s\"), w2=%p(\"%s\"))",
1302                w1, w1->text, w2, w2->text));
1303
1304  return (_cups_strcasecmp(w1->text, w2->text));
1305}
1306
1307
1308/*
1309 * End of "$Id: help-index.c 11645 2014-02-27 16:35:53Z msweet $".
1310 */
1311