man.c revision 93139
1/*  man.c: How to read and format man files.
2    $Id: man.c,v 1.16 2002/02/23 19:12:02 karl Exp $
3
4   Copyright (C) 1995, 97, 98, 99, 2000 Free Software Foundation, Inc.
5
6   This program is free software; you can redistribute it and/or modify
7   it under the terms of the GNU General Public License as published by
8   the Free Software Foundation; either version 2, or (at your option)
9   any later version.
10
11   This program is distributed in the hope that it will be useful,
12   but WITHOUT ANY WARRANTY; without even the implied warranty of
13   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14   GNU General Public License for more details.
15
16   You should have received a copy of the GNU General Public License
17   along with this program; if not, write to the Free Software
18   Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
19
20   Written by Brian Fox Thu May  4 09:17:52 1995 (bfox@ai.mit.edu). */
21
22#include "info.h"
23#include <sys/ioctl.h>
24#include "signals.h"
25#if defined (HAVE_SYS_TIME_H)
26#include <sys/time.h>
27#endif
28#if defined (HAVE_SYS_WAIT_H)
29#include <sys/wait.h>
30#endif
31
32#include "tilde.h"
33#include "man.h"
34
35#if !defined (_POSIX_VERSION)
36#define pid_t int
37#endif
38
39#if defined (FD_SET)
40#  if defined (hpux)
41#    define fd_set_cast(x) (int *)(x)
42#  else
43#    define fd_set_cast(x) (fd_set *)(x)
44#  endif /* !hpux */
45#endif /* FD_SET */
46
47#if STRIP_DOT_EXE
48static char const * const exec_extensions[] = {
49  ".exe", ".com", ".bat", ".btm", ".sh", ".ksh", ".pl", ".sed", "", NULL
50};
51#else
52static char const * const exec_extensions[] = { "", NULL };
53#endif
54
55static char *read_from_fd ();
56static void clean_manpage ();
57static NODE *manpage_node_of_file_buffer ();
58static char *get_manpage_contents ();
59
60NODE *
61make_manpage_node (pagename)
62     char *pagename;
63{
64  return (info_get_node (MANPAGE_FILE_BUFFER_NAME, pagename));
65}
66
67NODE *
68get_manpage_node (file_buffer, pagename)
69     FILE_BUFFER *file_buffer;
70     char *pagename;
71{
72  NODE *node;
73
74  node = manpage_node_of_file_buffer (file_buffer, pagename);
75
76  if (!node)
77    {
78      char *page;
79
80      page = get_manpage_contents (pagename);
81
82      if (page)
83        {
84          char header[1024];
85          long oldsize, newsize;
86          int hlen, plen;
87	  char *old_contents = file_buffer->contents;
88
89          sprintf (header, "\n\n%c\n%s %s,  %s %s,  %s (dir)\n\n",
90                   INFO_COOKIE,
91                   INFO_FILE_LABEL, file_buffer->filename,
92                   INFO_NODE_LABEL, pagename,
93                   INFO_UP_LABEL);
94          oldsize = file_buffer->filesize;
95          hlen = strlen (header);
96          plen = strlen (page);
97          newsize = (oldsize + hlen + plen);
98          file_buffer->contents =
99            (char *)xrealloc (file_buffer->contents, 1 + newsize);
100          memcpy (file_buffer->contents + oldsize, header, hlen);
101          memcpy (file_buffer->contents + oldsize + hlen, page, plen);
102          file_buffer->contents[newsize] = '\0';
103          file_buffer->filesize = newsize;
104          file_buffer->finfo.st_size = newsize;
105          build_tags_and_nodes (file_buffer);
106          free (page);
107	  /* We have just relocated file_buffer->contents from under
108	     the feet of info_windows[] array.  Therefore, all the
109	     nodes on that list which are showing man pages have their
110	     contents member pointing into the blue.  Undo that harm.  */
111	  if (old_contents && oldsize && old_contents != file_buffer->contents)
112	    {
113	      int iw;
114	      INFO_WINDOW *info_win;
115	      char *old_contents_end = old_contents + oldsize;
116
117	      for (iw = 0; (info_win = info_windows[iw]); iw++)
118		{
119		  int in;
120
121		  for (in = 0; in < info_win->nodes_index; in++)
122		    {
123		      NODE *node = info_win->nodes[in];
124
125		      /* It really only suffices to see that node->filename
126			 is "*manpages*".  But after several hours of
127			 debugging this, would you blame me for being a bit
128			 paranoid?  */
129		      if (node && node->filename && node->contents &&
130			  strcmp (node->filename,
131				  MANPAGE_FILE_BUFFER_NAME) == 0 &&
132			  node->contents >= old_contents &&
133			  node->contents + node->nodelen <= old_contents_end)
134			{
135			  info_win->nodes[in] =
136			    manpage_node_of_file_buffer (file_buffer,
137							 node->nodename);
138			  free (node->nodename);
139			  free (node);
140			}
141		    }
142		}
143	    }
144        }
145
146      node = manpage_node_of_file_buffer (file_buffer, pagename);
147    }
148
149  return (node);
150}
151
152FILE_BUFFER *
153create_manpage_file_buffer ()
154{
155  FILE_BUFFER *file_buffer = make_file_buffer ();
156  file_buffer->filename = xstrdup (MANPAGE_FILE_BUFFER_NAME);
157  file_buffer->fullpath = xstrdup (MANPAGE_FILE_BUFFER_NAME);
158  file_buffer->finfo.st_size = 0;
159  file_buffer->filesize = 0;
160  file_buffer->contents = (char *)NULL;
161  file_buffer->flags = (N_IsInternal | N_CannotGC | N_IsManPage);
162
163  return (file_buffer);
164}
165
166/* Scan the list of directories in PATH looking for FILENAME.  If we find
167   one that is an executable file, return it as a new string.  Otherwise,
168   return a NULL pointer. */
169static char *
170executable_file_in_path (filename, path)
171     char *filename, *path;
172{
173  struct stat finfo;
174  char *temp_dirname;
175  int statable, dirname_index;
176
177  dirname_index = 0;
178
179  while ((temp_dirname = extract_colon_unit (path, &dirname_index)))
180    {
181      char *temp;
182      char *temp_end;
183      int i;
184
185      /* Expand a leading tilde if one is present. */
186      if (*temp_dirname == '~')
187        {
188          char *expanded_dirname;
189
190          expanded_dirname = tilde_expand_word (temp_dirname);
191          free (temp_dirname);
192          temp_dirname = expanded_dirname;
193        }
194
195      temp = (char *)xmalloc (34 + strlen (temp_dirname) + strlen (filename));
196      strcpy (temp, temp_dirname);
197      if (!IS_SLASH (temp[(strlen (temp)) - 1]))
198        strcat (temp, "/");
199      strcat (temp, filename);
200      temp_end = temp + strlen (temp);
201
202      free (temp_dirname);
203
204      /* Look for FILENAME, possibly with any of the extensions
205	 in EXEC_EXTENSIONS[].  */
206      for (i = 0; exec_extensions[i]; i++)
207	{
208	  if (exec_extensions[i][0])
209	    strcpy (temp_end, exec_extensions[i]);
210	  statable = (stat (temp, &finfo) == 0);
211
212	  /* If we have found a regular executable file, then use it. */
213	  if ((statable) && (S_ISREG (finfo.st_mode)) &&
214	      (access (temp, X_OK) == 0))
215	    return (temp);
216	}
217
218      free (temp);
219    }
220  return ((char *)NULL);
221}
222
223/* Return the full pathname of the system man page formatter. */
224static char *
225find_man_formatter ()
226{
227  return (executable_file_in_path ("man", (char *)getenv ("PATH")));
228}
229
230static char *manpage_pagename = (char *)NULL;
231static char *manpage_section  = (char *)NULL;
232
233static void
234get_page_and_section (pagename)
235     char *pagename;
236{
237  register int i;
238
239  if (manpage_pagename)
240    free (manpage_pagename);
241
242  if (manpage_section)
243    free (manpage_section);
244
245  manpage_pagename = (char *)NULL;
246  manpage_section  = (char *)NULL;
247
248  for (i = 0; pagename[i] != '\0' && pagename[i] != '('; i++);
249
250  manpage_pagename = (char *)xmalloc (1 + i);
251  strncpy (manpage_pagename, pagename, i);
252  manpage_pagename[i] = '\0';
253
254  if (pagename[i] == '(')
255    {
256      int start;
257
258      start = i + 1;
259
260      for (i = start; pagename[i] != '\0' && pagename[i] != ')'; i++);
261
262      manpage_section = (char *)xmalloc (1 + (i - start));
263      strncpy (manpage_section, pagename + start, (i - start));
264      manpage_section[i - start] = '\0';
265    }
266}
267
268#if PIPE_USE_FORK
269static void
270reap_children (sig)
271     int sig;
272{
273  wait (NULL);
274}
275#endif
276
277static char *
278get_manpage_contents (pagename)
279     char *pagename;
280{
281  static char *formatter_args[4] = { (char *)NULL };
282  int pipes[2];
283  pid_t child;
284  RETSIGTYPE (*sigsave) ();
285  char *formatted_page = NULL;
286  int arg_index = 1;
287
288  if (formatter_args[0] == (char *)NULL)
289    formatter_args[0] = find_man_formatter ();
290
291  if (formatter_args[0] == (char *)NULL)
292    return ((char *)NULL);
293
294  get_page_and_section (pagename);
295
296  if (manpage_section != (char *)NULL)
297    formatter_args[arg_index++] = manpage_section;
298
299  formatter_args[arg_index++] = manpage_pagename;
300  formatter_args[arg_index] = (char *)NULL;
301
302  /* Open a pipe to this program, read the output, and save it away
303     in FORMATTED_PAGE.  The reader end of the pipe is pipes[0]; the
304     writer end is pipes[1]. */
305#if PIPE_USE_FORK
306  pipe (pipes);
307
308  sigsave = signal (SIGCHLD, reap_children);
309
310  child = fork ();
311  if (child == -1)
312    return ((char *)NULL);
313
314  if (child != 0)
315    {
316      /* In the parent, close the writing end of the pipe, and read from
317         the exec'd child. */
318      close (pipes[1]);
319      formatted_page = read_from_fd (pipes[0]);
320      close (pipes[0]);
321      signal (SIGCHLD, sigsave);
322    }
323  else
324    { /* In the child, close the read end of the pipe, make the write end
325         of the pipe be stdout, and execute the man page formatter. */
326      close (pipes[0]);
327      freopen (NULL_DEVICE, "w", stderr);
328      freopen (NULL_DEVICE, "r", stdin);
329      dup2 (pipes[1], fileno (stdout));
330
331      execv (formatter_args[0], formatter_args);
332
333      /* If we get here, we couldn't exec, so close out the pipe and
334         exit. */
335      close (pipes[1]);
336      xexit (0);
337    }
338#else  /* !PIPE_USE_FORK */
339  /* Cannot fork/exec, but can popen/pclose.  */
340  {
341    FILE *fpipe;
342    char *cmdline = xmalloc (strlen (formatter_args[0])
343			     + strlen (manpage_pagename)
344			     + (arg_index > 2 ? strlen (manpage_section) : 0)
345 			     + 3);
346    int save_stderr = dup (fileno (stderr));
347    int fd_err = open (NULL_DEVICE, O_WRONLY, 0666);
348
349    if (fd_err > 2)
350      dup2 (fd_err, fileno (stderr)); /* Don't print errors. */
351    sprintf (cmdline, "%s %s %s", formatter_args[0], manpage_pagename,
352				  arg_index > 2 ? manpage_section : "");
353    fpipe = popen (cmdline, "r");
354    free (cmdline);
355    if (fd_err > 2)
356      close (fd_err);
357    dup2 (save_stderr, fileno (stderr));
358    if (fpipe == 0)
359      return ((char *)NULL);
360    formatted_page = read_from_fd (fileno (fpipe));
361    if (pclose (fpipe) == -1)
362      {
363	if (formatted_page)
364	  free (formatted_page);
365	return ((char *)NULL);
366      }
367  }
368#endif /* !PIPE_USE_FORK */
369
370  /* If we have the page, then clean it up. */
371  if (formatted_page)
372    clean_manpage (formatted_page);
373
374  return (formatted_page);
375}
376
377static void
378clean_manpage (manpage)
379     char *manpage;
380{
381  register int i, j;
382  int newline_count = 0;
383  char *newpage;
384
385  newpage = (char *)xmalloc (1 + strlen (manpage));
386
387  for (i = 0, j = 0; (newpage[j] = manpage[i]); i++, j++)
388    {
389      if (manpage[i] == '\n')
390        newline_count++;
391      else
392        newline_count = 0;
393
394      if (newline_count == 3)
395        {
396          j--;
397          newline_count--;
398        }
399
400      /* A malformed man page could have a \b as its first character,
401         in which case decrementing j by 2 will cause us to write into
402         newpage[-1], smashing the hidden info stored there by malloc.  */
403      if (manpage[i] == '\b' || manpage[i] == '\f' && j > 0)
404        j -= 2;
405      else if (!raw_escapes_p)
406	{
407	  /* Remove the ANSI escape sequences for color, boldface,
408	     underlining, and italics, generated by some versions of
409	     Groff.  */
410	  if (manpage[i] == '\033' && manpage[i + 1] == '['
411	      && isdigit (manpage[i + 2]))
412	    {
413	      if (isdigit (manpage[i + 3]) && manpage[i + 4] == 'm')
414		{
415		  i += 4;
416		  j--;
417		}
418	      else if (manpage[i + 3] == 'm')
419		{
420		  i += 3;
421		  j--;
422		}
423	      /* Else do nothing: it's some unknown escape sequence,
424		 so let's leave it alone.  */
425	    }
426	}
427    }
428
429  newpage[j++] = 0;
430
431  strcpy (manpage, newpage);
432  free (newpage);
433}
434
435static NODE *
436manpage_node_of_file_buffer (file_buffer, pagename)
437     FILE_BUFFER *file_buffer;
438     char *pagename;
439{
440  NODE *node = (NODE *)NULL;
441  TAG *tag = (TAG *)NULL;
442
443  if (file_buffer->contents)
444    {
445      register int i;
446
447      for (i = 0; (tag = file_buffer->tags[i]); i++)
448        {
449          if (strcasecmp (pagename, tag->nodename) == 0)
450            break;
451        }
452    }
453
454  if (tag)
455    {
456      node = (NODE *)xmalloc (sizeof (NODE));
457      node->filename = file_buffer->filename;
458      node->nodename = xstrdup (tag->nodename);
459      node->contents = file_buffer->contents + tag->nodestart;
460      node->nodelen = tag->nodelen;
461      node->flags    = 0;
462      node->display_pos = 0;
463      node->parent   = (char *)NULL;
464      node->flags = (N_HasTagsTable | N_IsManPage);
465      node->contents += skip_node_separator (node->contents);
466    }
467
468  return (node);
469}
470
471static char *
472read_from_fd (fd)
473     int fd;
474{
475  struct timeval timeout;
476  char *buffer = (char *)NULL;
477  int bsize = 0;
478  int bindex = 0;
479  int select_result;
480#if defined (FD_SET)
481  fd_set read_fds;
482
483  timeout.tv_sec = 15;
484  timeout.tv_usec = 0;
485
486  FD_ZERO (&read_fds);
487  FD_SET (fd, &read_fds);
488
489  select_result = select (fd + 1, fd_set_cast (&read_fds), 0, 0, &timeout);
490#else /* !FD_SET */
491  select_result = 1;
492#endif /* !FD_SET */
493
494  switch (select_result)
495    {
496    case 0:
497    case -1:
498      break;
499
500    default:
501      {
502        int amount_read;
503        int done = 0;
504
505        while (!done)
506          {
507            while ((bindex + 1024) > (bsize))
508              buffer = (char *)xrealloc (buffer, (bsize += 1024));
509            buffer[bindex] = '\0';
510
511            amount_read = read (fd, buffer + bindex, 1023);
512
513            if (amount_read < 0)
514              {
515                done = 1;
516              }
517            else
518              {
519                bindex += amount_read;
520                buffer[bindex] = '\0';
521                if (amount_read == 0)
522                  done = 1;
523              }
524          }
525      }
526    }
527
528  if ((buffer != (char *)NULL) && (*buffer == '\0'))
529    {
530      free (buffer);
531      buffer = (char *)NULL;
532    }
533
534  return (buffer);
535}
536
537static char *reference_section_starters[] =
538{
539  "\nRELATED INFORMATION",
540  "\nRELATED\tINFORMATION",
541  "RELATED INFORMATION\n",
542  "RELATED\tINFORMATION\n",
543  "\nSEE ALSO",
544  "\nSEE\tALSO",
545  "SEE ALSO\n",
546  "SEE\tALSO\n",
547  (char *)NULL
548};
549
550static SEARCH_BINDING frs_binding;
551
552static SEARCH_BINDING *
553find_reference_section (node)
554     NODE *node;
555{
556  register int i;
557  long position = -1;
558
559  frs_binding.buffer = node->contents;
560  frs_binding.start = 0;
561  frs_binding.end = node->nodelen;
562  frs_binding.flags = S_SkipDest;
563
564  for (i = 0; reference_section_starters[i] != (char *)NULL; i++)
565    {
566      position = search_forward (reference_section_starters[i], &frs_binding);
567      if (position != -1)
568        break;
569    }
570
571  if (position == -1)
572    return ((SEARCH_BINDING *)NULL);
573
574  /* We found the start of the reference section, and point is right after
575     the string which starts it.  The text from here to the next header
576     (or end of buffer) contains the only references in this manpage. */
577  frs_binding.start = position;
578
579  for (i = frs_binding.start; i < frs_binding.end - 2; i++)
580    {
581      if ((frs_binding.buffer[i] == '\n') &&
582          (!whitespace (frs_binding.buffer[i + 1])))
583        {
584          frs_binding.end = i;
585          break;
586        }
587    }
588
589  return (&frs_binding);
590}
591
592REFERENCE **
593xrefs_of_manpage (node)
594     NODE *node;
595{
596  SEARCH_BINDING *reference_section;
597  REFERENCE **refs = (REFERENCE **)NULL;
598  int refs_index = 0;
599  int refs_slots = 0;
600  long position;
601
602  reference_section = find_reference_section (node);
603
604  if (reference_section == (SEARCH_BINDING *)NULL)
605    return ((REFERENCE **)NULL);
606
607  /* Grovel the reference section building a list of references found there.
608     A reference is alphabetic characters followed by non-whitespace text
609     within parenthesis. */
610  reference_section->flags = 0;
611
612  while ((position = search_forward ("(", reference_section)) != -1)
613    {
614      register int start, end;
615
616      for (start = position; start > reference_section->start; start--)
617        if (whitespace (reference_section->buffer[start]))
618          break;
619
620      start++;
621
622      for (end = position; end < reference_section->end; end++)
623        {
624          if (whitespace (reference_section->buffer[end]))
625            {
626              end = start;
627              break;
628            }
629
630          if (reference_section->buffer[end] == ')')
631            {
632              end++;
633              break;
634            }
635        }
636
637      if (end != start)
638        {
639          REFERENCE *entry;
640          int len = end - start;
641
642          entry = (REFERENCE *)xmalloc (sizeof (REFERENCE));
643          entry->label = (char *)xmalloc (1 + len);
644          strncpy (entry->label, (reference_section->buffer) + start, len);
645          entry->label[len] = '\0';
646          entry->filename = xstrdup (node->filename);
647          entry->nodename = xstrdup (entry->label);
648          entry->start = start;
649          entry->end = end;
650
651          add_pointer_to_array
652            (entry, refs_index, refs, refs_slots, 10, REFERENCE *);
653        }
654
655      reference_section->start = position + 1;
656    }
657
658  return (refs);
659}
660
661long
662locate_manpage_xref (node, start, dir)
663     NODE *node;
664     long start;
665     int dir;
666{
667  REFERENCE **refs;
668  long position = -1;
669
670  refs = xrefs_of_manpage (node);
671
672  if (refs)
673    {
674      register int i, count;
675      REFERENCE *entry;
676
677      for (i = 0; refs[i]; i++);
678      count = i;
679
680      if (dir > 0)
681        {
682          for (i = 0; (entry = refs[i]); i++)
683            if (entry->start > start)
684              {
685                position = entry->start;
686                break;
687              }
688        }
689      else
690        {
691          for (i = count - 1; i > -1; i--)
692            {
693              entry = refs[i];
694
695              if (entry->start < start)
696                {
697                  position = entry->start;
698                  break;
699                }
700            }
701        }
702
703      info_free_references (refs);
704    }
705  return (position);
706}
707
708/* This one was a little tricky.  The binding buffer that is passed in has
709   a START and END value of 0 -- strlen (window-line-containing-point).
710   The BUFFER is a pointer to the start of that line. */
711REFERENCE **
712manpage_xrefs_in_binding (node, binding)
713     NODE *node;
714     SEARCH_BINDING *binding;
715{
716  register int i;
717  REFERENCE **all_refs = xrefs_of_manpage (node);
718  REFERENCE **brefs = (REFERENCE **)NULL;
719  REFERENCE *entry;
720  int brefs_index = 0;
721  int brefs_slots = 0;
722  int start, end;
723
724  if (!all_refs)
725    return ((REFERENCE **)NULL);
726
727  start = binding->start + (binding->buffer - node->contents);
728  end = binding->end + (binding->buffer - node->contents);
729
730  for (i = 0; (entry = all_refs[i]); i++)
731    {
732      if ((entry->start > start) && (entry->end < end))
733        {
734          add_pointer_to_array
735            (entry, brefs_index, brefs, brefs_slots, 10, REFERENCE *);
736        }
737      else
738        {
739          maybe_free (entry->label);
740          maybe_free (entry->filename);
741          maybe_free (entry->nodename);
742          free (entry);
743        }
744    }
745
746  free (all_refs);
747  return (brefs);
748}
749