man.c revision 114472
1/*  man.c: How to read and format man files.
2    $Id: man.c,v 1.1 2002/08/25 23:38:38 karl Exp $
3
4   Copyright (C) 1995, 1997, 1998, 1999, 2000, 2002 Free Software
5   Foundation, Inc.
6
7   This program is free software; you can redistribute it and/or modify
8   it under the terms of the GNU General Public License as published by
9   the Free Software Foundation; either version 2, or (at your option)
10   any later version.
11
12   This program is distributed in the hope that it will be useful,
13   but WITHOUT ANY WARRANTY; without even the implied warranty of
14   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15   GNU General Public License for more details.
16
17   You should have received a copy of the GNU General Public License
18   along with this program; if not, write to the Free Software
19   Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
20
21   Written by Brian Fox Thu May  4 09:17:52 1995 (bfox@ai.mit.edu). */
22
23#include "info.h"
24#include <sys/ioctl.h>
25#include "signals.h"
26#if defined (HAVE_SYS_TIME_H)
27#include <sys/time.h>
28#endif
29#if defined (HAVE_SYS_WAIT_H)
30#include <sys/wait.h>
31#endif
32
33#include "tilde.h"
34#include "man.h"
35
36#if !defined (_POSIX_VERSION)
37#define pid_t int
38#endif
39
40#if defined (FD_SET)
41#  if defined (hpux)
42#    define fd_set_cast(x) (int *)(x)
43#  else
44#    define fd_set_cast(x) (fd_set *)(x)
45#  endif /* !hpux */
46#endif /* FD_SET */
47
48#if STRIP_DOT_EXE
49static char const * const exec_extensions[] = {
50  ".exe", ".com", ".bat", ".btm", ".sh", ".ksh", ".pl", ".sed", "", NULL
51};
52#else
53static char const * const exec_extensions[] = { "", NULL };
54#endif
55
56static char *read_from_fd ();
57static void clean_manpage ();
58static NODE *manpage_node_of_file_buffer ();
59static char *get_manpage_contents ();
60
61NODE *
62make_manpage_node (pagename)
63     char *pagename;
64{
65  return (info_get_node (MANPAGE_FILE_BUFFER_NAME, pagename));
66}
67
68NODE *
69get_manpage_node (file_buffer, pagename)
70     FILE_BUFFER *file_buffer;
71     char *pagename;
72{
73  NODE *node;
74
75  node = manpage_node_of_file_buffer (file_buffer, pagename);
76
77  if (!node)
78    {
79      char *page;
80
81      page = get_manpage_contents (pagename);
82
83      if (page)
84        {
85          char header[1024];
86          long oldsize, newsize;
87          int hlen, plen;
88	  char *old_contents = file_buffer->contents;
89
90          sprintf (header, "\n\n%c\n%s %s,  %s %s,  %s (dir)\n\n",
91                   INFO_COOKIE,
92                   INFO_FILE_LABEL, file_buffer->filename,
93                   INFO_NODE_LABEL, pagename,
94                   INFO_UP_LABEL);
95          oldsize = file_buffer->filesize;
96          hlen = strlen (header);
97          plen = strlen (page);
98          newsize = (oldsize + hlen + plen);
99          file_buffer->contents =
100            (char *)xrealloc (file_buffer->contents, 1 + newsize);
101          memcpy (file_buffer->contents + oldsize, header, hlen);
102          memcpy (file_buffer->contents + oldsize + hlen, page, plen);
103          file_buffer->contents[newsize] = '\0';
104          file_buffer->filesize = newsize;
105          file_buffer->finfo.st_size = newsize;
106          build_tags_and_nodes (file_buffer);
107          free (page);
108	  /* We have just relocated file_buffer->contents from under
109	     the feet of info_windows[] array.  Therefore, all the
110	     nodes on that list which are showing man pages have their
111	     contents member pointing into the blue.  Undo that harm.  */
112	  if (old_contents && oldsize && old_contents != file_buffer->contents)
113	    {
114	      int iw;
115	      INFO_WINDOW *info_win;
116	      char *old_contents_end = old_contents + oldsize;
117
118	      for (iw = 0; (info_win = info_windows[iw]); iw++)
119		{
120		  int in;
121
122		  for (in = 0; in < info_win->nodes_index; in++)
123		    {
124		      NODE *node = info_win->nodes[in];
125
126		      /* It really only suffices to see that node->filename
127			 is "*manpages*".  But after several hours of
128			 debugging this, would you blame me for being a bit
129			 paranoid?  */
130		      if (node && node->filename && node->contents &&
131			  strcmp (node->filename,
132				  MANPAGE_FILE_BUFFER_NAME) == 0 &&
133			  node->contents >= old_contents &&
134			  node->contents + node->nodelen <= old_contents_end)
135			{
136			  info_win->nodes[in] =
137			    manpage_node_of_file_buffer (file_buffer,
138							 node->nodename);
139			  free (node->nodename);
140			  free (node);
141			}
142		    }
143		}
144	    }
145        }
146
147      node = manpage_node_of_file_buffer (file_buffer, pagename);
148    }
149
150  return (node);
151}
152
153FILE_BUFFER *
154create_manpage_file_buffer ()
155{
156  FILE_BUFFER *file_buffer = make_file_buffer ();
157  file_buffer->filename = xstrdup (MANPAGE_FILE_BUFFER_NAME);
158  file_buffer->fullpath = xstrdup (MANPAGE_FILE_BUFFER_NAME);
159  file_buffer->finfo.st_size = 0;
160  file_buffer->filesize = 0;
161  file_buffer->contents = (char *)NULL;
162  file_buffer->flags = (N_IsInternal | N_CannotGC | N_IsManPage);
163
164  return (file_buffer);
165}
166
167/* Scan the list of directories in PATH looking for FILENAME.  If we find
168   one that is an executable file, return it as a new string.  Otherwise,
169   return a NULL pointer. */
170static char *
171executable_file_in_path (filename, path)
172     char *filename, *path;
173{
174  struct stat finfo;
175  char *temp_dirname;
176  int statable, dirname_index;
177
178  dirname_index = 0;
179
180  while ((temp_dirname = extract_colon_unit (path, &dirname_index)))
181    {
182      char *temp;
183      char *temp_end;
184      int i;
185
186      /* Expand a leading tilde if one is present. */
187      if (*temp_dirname == '~')
188        {
189          char *expanded_dirname;
190
191          expanded_dirname = tilde_expand_word (temp_dirname);
192          free (temp_dirname);
193          temp_dirname = expanded_dirname;
194        }
195
196      temp = (char *)xmalloc (34 + strlen (temp_dirname) + strlen (filename));
197      strcpy (temp, temp_dirname);
198      if (!IS_SLASH (temp[(strlen (temp)) - 1]))
199        strcat (temp, "/");
200      strcat (temp, filename);
201      temp_end = temp + strlen (temp);
202
203      free (temp_dirname);
204
205      /* Look for FILENAME, possibly with any of the extensions
206	 in EXEC_EXTENSIONS[].  */
207      for (i = 0; exec_extensions[i]; i++)
208	{
209	  if (exec_extensions[i][0])
210	    strcpy (temp_end, exec_extensions[i]);
211	  statable = (stat (temp, &finfo) == 0);
212
213	  /* If we have found a regular executable file, then use it. */
214	  if ((statable) && (S_ISREG (finfo.st_mode)) &&
215	      (access (temp, X_OK) == 0))
216	    return (temp);
217	}
218
219      free (temp);
220    }
221  return ((char *)NULL);
222}
223
224/* Return the full pathname of the system man page formatter. */
225static char *
226find_man_formatter ()
227{
228  return (executable_file_in_path ("man", (char *)getenv ("PATH")));
229}
230
231static char *manpage_pagename = (char *)NULL;
232static char *manpage_section  = (char *)NULL;
233
234static void
235get_page_and_section (pagename)
236     char *pagename;
237{
238  register int i;
239
240  if (manpage_pagename)
241    free (manpage_pagename);
242
243  if (manpage_section)
244    free (manpage_section);
245
246  manpage_pagename = (char *)NULL;
247  manpage_section  = (char *)NULL;
248
249  for (i = 0; pagename[i] != '\0' && pagename[i] != '('; i++);
250
251  manpage_pagename = (char *)xmalloc (1 + i);
252  strncpy (manpage_pagename, pagename, i);
253  manpage_pagename[i] = '\0';
254
255  if (pagename[i] == '(')
256    {
257      int start;
258
259      start = i + 1;
260
261      for (i = start; pagename[i] != '\0' && pagename[i] != ')'; i++);
262
263      manpage_section = (char *)xmalloc (1 + (i - start));
264      strncpy (manpage_section, pagename + start, (i - start));
265      manpage_section[i - start] = '\0';
266    }
267}
268
269#if PIPE_USE_FORK
270static void
271reap_children (sig)
272     int sig;
273{
274  wait (NULL);
275}
276#endif
277
278static char *
279get_manpage_contents (pagename)
280     char *pagename;
281{
282  static char *formatter_args[4] = { (char *)NULL };
283  int pipes[2];
284  pid_t child;
285  RETSIGTYPE (*sigsave) ();
286  char *formatted_page = NULL;
287  int arg_index = 1;
288
289  if (formatter_args[0] == (char *)NULL)
290    formatter_args[0] = find_man_formatter ();
291
292  if (formatter_args[0] == (char *)NULL)
293    return ((char *)NULL);
294
295  get_page_and_section (pagename);
296
297  if (manpage_section != (char *)NULL)
298    formatter_args[arg_index++] = manpage_section;
299
300  formatter_args[arg_index++] = manpage_pagename;
301  formatter_args[arg_index] = (char *)NULL;
302
303  /* Open a pipe to this program, read the output, and save it away
304     in FORMATTED_PAGE.  The reader end of the pipe is pipes[0]; the
305     writer end is pipes[1]. */
306#if PIPE_USE_FORK
307  pipe (pipes);
308
309  sigsave = signal (SIGCHLD, reap_children);
310
311  child = fork ();
312  if (child == -1)
313    return ((char *)NULL);
314
315  if (child != 0)
316    {
317      /* In the parent, close the writing end of the pipe, and read from
318         the exec'd child. */
319      close (pipes[1]);
320      formatted_page = read_from_fd (pipes[0]);
321      close (pipes[0]);
322      signal (SIGCHLD, sigsave);
323    }
324  else
325    { /* In the child, close the read end of the pipe, make the write end
326         of the pipe be stdout, and execute the man page formatter. */
327      close (pipes[0]);
328      freopen (NULL_DEVICE, "w", stderr);
329      freopen (NULL_DEVICE, "r", stdin);
330      dup2 (pipes[1], fileno (stdout));
331
332      execv (formatter_args[0], formatter_args);
333
334      /* If we get here, we couldn't exec, so close out the pipe and
335         exit. */
336      close (pipes[1]);
337      xexit (0);
338    }
339#else  /* !PIPE_USE_FORK */
340  /* Cannot fork/exec, but can popen/pclose.  */
341  {
342    FILE *fpipe;
343    char *cmdline = xmalloc (strlen (formatter_args[0])
344			     + strlen (manpage_pagename)
345			     + (arg_index > 2 ? strlen (manpage_section) : 0)
346 			     + 3);
347    int save_stderr = dup (fileno (stderr));
348    int fd_err = open (NULL_DEVICE, O_WRONLY, 0666);
349
350    if (fd_err > 2)
351      dup2 (fd_err, fileno (stderr)); /* Don't print errors. */
352    sprintf (cmdline, "%s %s %s", formatter_args[0], manpage_pagename,
353				  arg_index > 2 ? manpage_section : "");
354    fpipe = popen (cmdline, "r");
355    free (cmdline);
356    if (fd_err > 2)
357      close (fd_err);
358    dup2 (save_stderr, fileno (stderr));
359    if (fpipe == 0)
360      return ((char *)NULL);
361    formatted_page = read_from_fd (fileno (fpipe));
362    if (pclose (fpipe) == -1)
363      {
364	if (formatted_page)
365	  free (formatted_page);
366	return ((char *)NULL);
367      }
368  }
369#endif /* !PIPE_USE_FORK */
370
371  /* If we have the page, then clean it up. */
372  if (formatted_page)
373    clean_manpage (formatted_page);
374
375  return (formatted_page);
376}
377
378static void
379clean_manpage (manpage)
380     char *manpage;
381{
382  register int i, j;
383  int newline_count = 0;
384  char *newpage;
385
386  newpage = (char *)xmalloc (1 + strlen (manpage));
387
388  for (i = 0, j = 0; (newpage[j] = manpage[i]); i++, j++)
389    {
390      if (manpage[i] == '\n')
391        newline_count++;
392      else
393        newline_count = 0;
394
395      if (newline_count == 3)
396        {
397          j--;
398          newline_count--;
399        }
400
401      /* A malformed man page could have a \b as its first character,
402         in which case decrementing j by 2 will cause us to write into
403         newpage[-1], smashing the hidden info stored there by malloc.  */
404      if (manpage[i] == '\b' || manpage[i] == '\f' && j > 0)
405        j -= 2;
406      else if (!raw_escapes_p)
407	{
408	  /* Remove the ANSI escape sequences for color, boldface,
409	     underlining, and italics, generated by some versions of
410	     Groff.  */
411	  if (manpage[i] == '\033' && manpage[i + 1] == '['
412	      && isdigit (manpage[i + 2]))
413	    {
414	      if (isdigit (manpage[i + 3]) && manpage[i + 4] == 'm')
415		{
416		  i += 4;
417		  j--;
418		}
419	      else if (manpage[i + 3] == 'm')
420		{
421		  i += 3;
422		  j--;
423		}
424	      /* Else do nothing: it's some unknown escape sequence,
425		 so let's leave it alone.  */
426	    }
427	}
428    }
429
430  newpage[j++] = 0;
431
432  strcpy (manpage, newpage);
433  free (newpage);
434}
435
436static NODE *
437manpage_node_of_file_buffer (file_buffer, pagename)
438     FILE_BUFFER *file_buffer;
439     char *pagename;
440{
441  NODE *node = (NODE *)NULL;
442  TAG *tag = (TAG *)NULL;
443
444  if (file_buffer->contents)
445    {
446      register int i;
447
448      for (i = 0; (tag = file_buffer->tags[i]); i++)
449        {
450          if (strcasecmp (pagename, tag->nodename) == 0)
451            break;
452        }
453    }
454
455  if (tag)
456    {
457      node = (NODE *)xmalloc (sizeof (NODE));
458      node->filename = file_buffer->filename;
459      node->nodename = xstrdup (tag->nodename);
460      node->contents = file_buffer->contents + tag->nodestart;
461      node->nodelen = tag->nodelen;
462      node->flags    = 0;
463      node->display_pos = 0;
464      node->parent   = (char *)NULL;
465      node->flags = (N_HasTagsTable | N_IsManPage);
466      node->contents += skip_node_separator (node->contents);
467    }
468
469  return (node);
470}
471
472static char *
473read_from_fd (fd)
474     int fd;
475{
476  struct timeval timeout;
477  char *buffer = (char *)NULL;
478  int bsize = 0;
479  int bindex = 0;
480  int select_result;
481#if defined (FD_SET)
482  fd_set read_fds;
483
484  timeout.tv_sec = 15;
485  timeout.tv_usec = 0;
486
487  FD_ZERO (&read_fds);
488  FD_SET (fd, &read_fds);
489
490  select_result = select (fd + 1, fd_set_cast (&read_fds), 0, 0, &timeout);
491#else /* !FD_SET */
492  select_result = 1;
493#endif /* !FD_SET */
494
495  switch (select_result)
496    {
497    case 0:
498    case -1:
499      break;
500
501    default:
502      {
503        int amount_read;
504        int done = 0;
505
506        while (!done)
507          {
508            while ((bindex + 1024) > (bsize))
509              buffer = (char *)xrealloc (buffer, (bsize += 1024));
510            buffer[bindex] = '\0';
511
512            amount_read = read (fd, buffer + bindex, 1023);
513
514            if (amount_read < 0)
515              {
516                done = 1;
517              }
518            else
519              {
520                bindex += amount_read;
521                buffer[bindex] = '\0';
522                if (amount_read == 0)
523                  done = 1;
524              }
525          }
526      }
527    }
528
529  if ((buffer != (char *)NULL) && (*buffer == '\0'))
530    {
531      free (buffer);
532      buffer = (char *)NULL;
533    }
534
535  return (buffer);
536}
537
538static char *reference_section_starters[] =
539{
540  "\nRELATED INFORMATION",
541  "\nRELATED\tINFORMATION",
542  "RELATED INFORMATION\n",
543  "RELATED\tINFORMATION\n",
544  "\nSEE ALSO",
545  "\nSEE\tALSO",
546  "SEE ALSO\n",
547  "SEE\tALSO\n",
548  (char *)NULL
549};
550
551static SEARCH_BINDING frs_binding;
552
553static SEARCH_BINDING *
554find_reference_section (node)
555     NODE *node;
556{
557  register int i;
558  long position = -1;
559
560  frs_binding.buffer = node->contents;
561  frs_binding.start = 0;
562  frs_binding.end = node->nodelen;
563  frs_binding.flags = S_SkipDest;
564
565  for (i = 0; reference_section_starters[i] != (char *)NULL; i++)
566    {
567      position = search_forward (reference_section_starters[i], &frs_binding);
568      if (position != -1)
569        break;
570    }
571
572  if (position == -1)
573    return ((SEARCH_BINDING *)NULL);
574
575  /* We found the start of the reference section, and point is right after
576     the string which starts it.  The text from here to the next header
577     (or end of buffer) contains the only references in this manpage. */
578  frs_binding.start = position;
579
580  for (i = frs_binding.start; i < frs_binding.end - 2; i++)
581    {
582      if ((frs_binding.buffer[i] == '\n') &&
583          (!whitespace (frs_binding.buffer[i + 1])))
584        {
585          frs_binding.end = i;
586          break;
587        }
588    }
589
590  return (&frs_binding);
591}
592
593REFERENCE **
594xrefs_of_manpage (node)
595     NODE *node;
596{
597  SEARCH_BINDING *reference_section;
598  REFERENCE **refs = (REFERENCE **)NULL;
599  int refs_index = 0;
600  int refs_slots = 0;
601  long position;
602
603  reference_section = find_reference_section (node);
604
605  if (reference_section == (SEARCH_BINDING *)NULL)
606    return ((REFERENCE **)NULL);
607
608  /* Grovel the reference section building a list of references found there.
609     A reference is alphabetic characters followed by non-whitespace text
610     within parenthesis. */
611  reference_section->flags = 0;
612
613  while ((position = search_forward ("(", reference_section)) != -1)
614    {
615      register int start, end;
616
617      for (start = position; start > reference_section->start; start--)
618        if (whitespace (reference_section->buffer[start]))
619          break;
620
621      start++;
622
623      for (end = position; end < reference_section->end; end++)
624        {
625          if (whitespace (reference_section->buffer[end]))
626            {
627              end = start;
628              break;
629            }
630
631          if (reference_section->buffer[end] == ')')
632            {
633              end++;
634              break;
635            }
636        }
637
638      if (end != start)
639        {
640          REFERENCE *entry;
641          int len = end - start;
642
643          entry = (REFERENCE *)xmalloc (sizeof (REFERENCE));
644          entry->label = (char *)xmalloc (1 + len);
645          strncpy (entry->label, (reference_section->buffer) + start, len);
646          entry->label[len] = '\0';
647          entry->filename = xstrdup (node->filename);
648          entry->nodename = xstrdup (entry->label);
649          entry->start = start;
650          entry->end = end;
651
652          add_pointer_to_array
653            (entry, refs_index, refs, refs_slots, 10, REFERENCE *);
654        }
655
656      reference_section->start = position + 1;
657    }
658
659  return (refs);
660}
661
662long
663locate_manpage_xref (node, start, dir)
664     NODE *node;
665     long start;
666     int dir;
667{
668  REFERENCE **refs;
669  long position = -1;
670
671  refs = xrefs_of_manpage (node);
672
673  if (refs)
674    {
675      register int i, count;
676      REFERENCE *entry;
677
678      for (i = 0; refs[i]; i++);
679      count = i;
680
681      if (dir > 0)
682        {
683          for (i = 0; (entry = refs[i]); i++)
684            if (entry->start > start)
685              {
686                position = entry->start;
687                break;
688              }
689        }
690      else
691        {
692          for (i = count - 1; i > -1; i--)
693            {
694              entry = refs[i];
695
696              if (entry->start < start)
697                {
698                  position = entry->start;
699                  break;
700                }
701            }
702        }
703
704      info_free_references (refs);
705    }
706  return (position);
707}
708
709/* This one was a little tricky.  The binding buffer that is passed in has
710   a START and END value of 0 -- strlen (window-line-containing-point).
711   The BUFFER is a pointer to the start of that line. */
712REFERENCE **
713manpage_xrefs_in_binding (node, binding)
714     NODE *node;
715     SEARCH_BINDING *binding;
716{
717  register int i;
718  REFERENCE **all_refs = xrefs_of_manpage (node);
719  REFERENCE **brefs = (REFERENCE **)NULL;
720  REFERENCE *entry;
721  int brefs_index = 0;
722  int brefs_slots = 0;
723  int start, end;
724
725  if (!all_refs)
726    return ((REFERENCE **)NULL);
727
728  start = binding->start + (binding->buffer - node->contents);
729  end = binding->end + (binding->buffer - node->contents);
730
731  for (i = 0; (entry = all_refs[i]); i++)
732    {
733      if ((entry->start > start) && (entry->end < end))
734        {
735          add_pointer_to_array
736            (entry, brefs_index, brefs, brefs_slots, 10, REFERENCE *);
737        }
738      else
739        {
740          maybe_free (entry->label);
741          maybe_free (entry->filename);
742          maybe_free (entry->nodename);
743          free (entry);
744        }
745    }
746
747  free (all_refs);
748  return (brefs);
749}
750