1#!/bin/sh
2# makewhatis: create the whatis database
3# Created: Sun Jun 14 10:49:37 1992
4# Revised: Sat Jan  8 14:12:37 1994 by faith@cs.unc.edu
5# Revised: Sat Mar 23 17:56:18 1996 by micheal@actrix.gen.nz
6# Copyright 1992, 1993, 1994 Rickard E. Faith (faith@cs.unc.edu)
7# May be freely distributed and modified as long as copyright is retained.
8#
9# Wed Dec 23 13:27:50 1992: Rik Faith (faith@cs.unc.edu) applied changes
10# based on Mitchum DSouza (mitchum.dsouza@mrc-apu.cam.ac.uk) cat patches.
11# Also, cleaned up code and make it work with NET-2 doc pages.
12#
13# makewhatis-1.4: aeb 940802, 941007, 950417
14# Fixed so that the -c option works correctly for the cat pages
15# on my machine. Fix for -u by Nan Zou (nan@ksu.ksu.edu).
16# Many minor changes.
17# The -s option is undocumented, and may well disappear again.
18#
19# Sat Mar 23 1996: Michael Hamilton (michael@actrix.gen.nz).
20# I changed the script to invoke gawk only once for each directory tree.
21# This speeds things up considerably (from 30 minutes down to 1.5 minutes
22# on my 486DX66).
23# 960401 - aeb: slight adaptation to work correctly with cat pages.
24# 960510 - added fixes by brennan@raven.ca.boeing.com, author of mawk.
25# 971012 - replaced "test -z" - it doesnt work on SunOS 4.1.3_U1.
26# 980710 - be more careful with TMPFILE
27# 000323 - do not change PATH, better treatment of catpages - Bryan Henderson
28# 011117 - avoid suspicious filenames
29# 030310 - find files only; fix LAPACK cruft; no /usr/man default;
30#	use /dev/stderr instead of /dev/tty; handle files with strange names;
31#	add support for chinese, hungarian, indonesian, japanese, korean,
32#	polish, russian (Thierry Vignaud);
33#
34# Note for Slackware users: "makewhatis -v -w -c" will work.
35#
36# makewhatis aeb 030801 (from %version%)
37
38program=`basename $0`
39
40# In case both /usr/man and /usr/share/man exist, the former is local
41# and should be first.
42# It is a bug to add /var/cache/man to DEFCATPATH.
43dm=
44for d in /usr/man /usr/share/man /usr/X11R6/man /usr/local/man
45do
46    if [ -d $d ]; then
47	if [ x$dm = x ]; then dm=$d; else dm=$dm:$d; fi
48    fi
49done
50DEFMANPATH=$dm
51dc=
52for d in /usr/man/preformat /usr/man /usr/share/man/preformat /usr/share/man
53do
54    if [ -d $d ]; then
55	if [ x$dc = x ]; then dc=$d; else dc=$dc:$d; fi
56    fi
57done
58DEFCATPATH=$dc
59
60# In case /usr is read-only, make /usr/foo/whatis (etc) a symlink to
61# something like /var/cache/man/foo-whatis.
62# Some distributions make a single big /var/cache/man/whatis file,
63# but that leads to problems and bugs.
64
65# AWK=/usr/bin/gawk
66AWK=%awk%
67
68# Find a place for our temporary files. If security is not a concern, use
69#	TMPFILE=/tmp/whatis$$; TMPFILEDIR=none
70# Of course makewhatis should only have the required permissions
71# (for reading and writing directories like /usr/man).
72# We try here to be careful (and avoid preconstructed symlinks)
73# in case makewhatis is run as root, by creating a subdirectory of /tmp.
74
75TMPFILEDIR=/tmp/whatis.tmp.dir.$$
76rm -rf TMPFILEDIR
77if ! mkdir -m 0700 $TMPFILEDIR; then
78    echo Could not create $TMPFILEDIR
79    exit 1;
80fi
81TMPFILE=$TMPFILEDIR/w
82
83# make sure TMPFILEDIR is deleted if program is killed or terminates
84# (just delete this line if your shell doesnt know about trap)
85trap "rm -rf $TMPFILEDIR" 0 1 2 3 15
86
87# default find arg: no directories, no empty files
88findarg0="-type f -size +0"
89
90topath=manpath
91
92defmanpath=$DEFMANPATH
93defcatpath=
94
95sections="1 2 3 4 5 6 7 8 9 n l"
96
97for name in "$@"
98do
99if [ -n "$setsections" ]; then
100	setsections=
101	sections=$name
102	continue
103fi
104case $name in
105    --version|-V)
106	echo "$program from %version%"
107	exit 0;;
108    -c) topath=catpath
109	defmanpath=
110	defcatpath=$DEFCATPATH
111	continue;;
112    -s) setsections=1
113	continue;;
114    -u) findarg="-ctime 0"
115	update=1
116	continue;;
117    -v) verbose=1
118	continue;;
119    -w) manpath=`man --path`
120	catpath=$manpath
121	continue;;
122    -*) echo "Usage: makewhatis [-u] [-v] [-w] [manpath] [-c [catpath]]"
123	echo "       This will build the whatis database for the man pages"
124	echo "       found in manpath and the cat pages found in catpath."
125	echo "       -u: update database with new pages"
126	echo "       -v: verbose"
127	echo "       -w: use manpath obtained from \`man --path\`"
128	echo "       [manpath]: man directories (default: $DEFMANPATH)"
129	echo "       [catpath]: cat directories (default: the first existing"
130	echo "           directory in $DEFCATPATH)"
131	exit;;
132     *) if [ -d $name ]
133	then
134	    eval $topath="\$$topath":$name
135	else
136	    echo "No such directory $name"
137	    exit
138	fi;;
139esac
140done
141
142manpath=`echo ${manpath-$defmanpath} | tr : ' '`
143if [ x"$catpath" = x ]; then
144   for d in `echo $defcatpath | tr : ' '`
145   do
146      if [ -d $d ]; then catpath=$d; break; fi
147   done
148fi
149catpath=`echo ${catpath} | tr : ' '`
150
151# first truncate all the whatis files that will be created new,
152# then only update - we might visit the same directory twice
153if [ x$update = x ]; then
154   for pages in man cat
155   do
156      eval path="\$$pages"path
157      for mandir in $path
158      do
159	 cp /dev/null $mandir/whatis
160      done
161   done
162fi
163
164for pages in man cat
165do
166   export pages
167   eval path="\$$pages"path
168   for mandir in $path
169   do
170     if [ x$verbose != x ]; then
171	echo "about to enter $mandir" > /dev/stderr
172     fi
173     if [ -s ${mandir}/whatis -a $pages = man -a x$update = x ]; then
174	if [ x$verbose != x ]; then
175	   echo skipping $mandir - we did it already > /dev/stderr
176	fi
177     else      
178       here=`pwd`
179       cd $mandir
180       for i in $sections
181       do
182	 if [ -d ${pages}$i ]
183	 then
184	    cd ${pages}$i
185	    section=$i
186	    curdir=$mandir/${pages}$i
187	    export section verbose curdir
188	    find $mandir/${pages}$i/. -name '*' $findarg0 $findarg -print | $AWK '
189
190	    function readline() {
191	      if (use_zcat || use_bzcat) {
192		result = (pipe_cmd | getline);
193		if (result < 0) {
194		  print "Pipe error: " pipe_cmd " " ERRNO > "/dev/stderr";
195		}
196	      } else {
197		result = (getline < filename);
198		if (result < 0) {
199		  print "Read file error: " filename " " ERRNO > "/dev/stderr";
200		}
201	      }
202	      return result;
203	    }
204	    
205	    function closeline() {
206	      if (use_zcat || use_bzcat) {
207		return close(pipe_cmd);
208	      } else {
209		return close(filename);
210	      }
211	    }
212	    
213	    function do_one() {
214	      insh = 0; thisjoin = 1; done = 0;
215	      entire_line = "";
216
217	      if (verbose) {
218		print "adding " filename > "/dev/stderr"
219	      }
220	      
221	      use_zcat = match(filename,"\\.Z$") ||
222			 match(filename,"\\.z$") || match(filename,"\\.gz$");
223	      if (!use_zcat)
224		use_bzcat = match(filename,"\\.bz2");
225	      if (use_zcat || use_bzcat) {
226		filename_no_gz = substr(filename, 0, RSTART - 1);
227	      } else {
228		filename_no_gz = filename;
229	      }
230	      match(filename_no_gz, "/[^/]+$");
231	      progname = substr(filename, RSTART + 1, RLENGTH - 1);
232	      if (match(progname, "\\." section "[A-Za-z]+")) {
233		actual_section = substr(progname, RSTART + 1, RLENGTH - 1);
234	      } else {
235		actual_section = section;
236	      }
237	      sub(/\..*/, "", progname);
238	      if (use_zcat || use_bzcat) {
239		if (use_zcat) {
240		  pipe_cmd = "zcat \"" filename "\"";
241		} else {
242		  pipe_cmd = "bzcat \"" filename "\"";
243		}
244		# try to avoid suspicious stuff
245		if (filename ~ /[;&|`$(]/) {
246		  print "ignored strange file name " filename " in " curdir > "/dev/stderr";
247		  return;
248		}
249	      }
250	    
251	      while (!done && readline() > 0) {
252		gsub(/.\b/, "");
253		if (($1 ~ /^\.[Ss][Hh]/ &&
254		  ($2 ~ /[Nn][Aa][Mm][Ee]/ ||
255		   $2 ~ /^JM�NO/ || $2 ~ /^NAVN/ || $2 ~ /^NUME/ ||
256		   $2 ~ /^BEZEICHNUNG/ || $2 ~ /^NOMBRE/ ||
257		   $2 ~ /^NIMI/ || $2 ~ /^NOM/ || $2 ~ /^IME/ ||
258		   $2 ~ /^N[�E]V/ || $2 ~ /^NAMA/ || $2 ~ /^̾��/ ||
259		   $2 ~ /^̾��/ || $2 ~ /^�̸�/ || $2 ~ /^NAZWA/ ||
260		   $2 ~ /^��������/ || $2 ~ /^���/ || $2 ~ /^�W��/ ||
261		   $2 ~ /^NOME/ || $2 ~ /^NAAM/) || $2 ~ /^���/)) ||
262		  (pages == "cat" && $1 ~ /^NAME/)) {
263		    if (!insh) {
264		      insh = 1;
265		    } else {
266		      done = 1;
267		    }
268		} else if (insh) {
269		  if ($1 ~ /^\.[Ss][HhYS]/ ||
270		    (pages == "cat" &&
271		    ($1 ~ /^S[yYeE]/ || $1 ~ /^DESCRIPTION/ ||
272		     $1 ~ /^COMMAND/ || $1 ~ /^OVERVIEW/ ||
273		     $1 ~ /^STRUCTURES/ || $1 ~ /^INTRODUCTION/ ||
274		     $0 ~ /^[^ ]/))) {
275		      # end insh for Synopsis, Syntax, but also for
276		      # DESCRIPTION (e.g., XFree86.1x),
277		      # COMMAND (e.g., xspread.1)
278		      # OVERVIEW (e.g., TclCommandWriting.3)
279		      # STRUCTURES (e.g., XEvent.3x)
280		      # INTRODUCTION (e.g., TclX.n)
281		      # and anything at all that begins in Column 1, so 
282		      # is probably a section header.
283		    done = 1;
284		  } else {
285		    if ($0 ~ progname"-") {  # Fix old cat pages
286			sub(progname"-", progname" - ");
287		    }
288		    if ($0 ~ /[^ \\]-$/) {
289		      sub(/-$/, "");	  # Handle Hyphenations
290		      nextjoin = 1;
291		    } else if ($0 ~ /\\c$/) {
292		      sub(/\\c$/, "");	  # Handle Continuations
293		      nextjoin = 1;
294		    } else
295		      nextjoin = 0;
296
297		    sub(/^.[IB] /, "");       # Kill bold and italics
298		    sub(/^.BI /, "");         #
299		    sub(/^.SM /, "");         # Kill small
300		    sub(/^.Nm /, "");         # Kill bold
301		    sub(/^.Tn /, "");         # Kill normal
302	            sub(/^.Li /, "");         # Kill .Li
303	            sub(/^.Dq /, "");         # Kill .Dq
304	            sub(/^.Nd */, "- ");      # Convert .Nd to dash
305		    sub(/\\\".*/, "");        # Trim pending comments
306		    sub(/  *$/, "");          # Trim pending spaces
307		    sub(/^\.$/, "");          # Kill blank comments
308		    sub(/^'"'"'.*/, "");      # Kill comment/troff lines
309		    sub(/^.in .*/, "");       # Kill various macros
310		    sub(/^.ti .*/, "");
311		    sub(/^.ta .*/, "");
312		    sub(/^.Vb .*/, "");
313		    sub(/^.[PLTH]P$/, "");    # .PP/.LP/.TP/.HP
314		    sub(/^.Pp$/, "");
315		    sub(/^.[iI]X .*$/, "");
316		    sub(/^.nolinks$/, "");
317		    sub(/^.B$/, "");
318		    sub(/^.nf$/, "");
319
320		    if (($1 ~ /^\.../ || $1 == "") &&
321		        (entire_line ~ / - / || entire_line ~ / \\- /)) {
322		      # Assume that this ends the description of one line
323		      # Sometimes there are several descriptions in one page,
324		      # as in outb(2).
325		      handle_entire_line();
326		      entire_line = "";
327		      thisjoin = 1;
328		    } else {
329		      if (thisjoin) {
330			entire_line = entire_line $0;
331		      } else {
332			entire_line = entire_line " " $0;
333		      }
334		      thisjoin = nextjoin;
335		    }
336		  }
337		}
338	      }
339	      handle_entire_line();
340	      closeline();
341	    }
342
343	    function handle_entire_line() {
344	      x = entire_line;             # Keep it short
345
346	      gsub(/\015/, "", x);         # Kill DOS remains
347	      gsub(/	/, " ", x);        # Translate tabs to spaces
348	      gsub(/  +/, " ", x);         # Collapse spaces
349	      gsub(/ *, */, ", ", x);      # Fix comma spacings
350	      sub(/^ /, "", x);            # Kill initial spaces
351	      sub(/ $/, "", x);            # Kill trailing spaces
352	      sub(/__+/, "_", x);          # Collapse underscores
353
354	      gsub(/\\f\(../, "", x);         # Kill font changes
355	      gsub(/\\f[PRIB0123]/, "", x);   # Kill font changes
356	      gsub(/\\s[-+0-9]*/, "", x);     # Kill size changes
357	      gsub(/\\&/, "", x);             # Kill \&
358	      gsub(/\\\|/, "", x);            # Kill \|
359	      gsub(/\\\((ru|ul)/, "_", x);    # Translate
360	      gsub(/\\\((mi|hy|em)/, "-", x); # Translate
361	      gsub(/\\\*\(../, "", x);        # Kill troff strings
362	      gsub(/\\/, "", x);              # Kill all backslashes
363	      gsub(/"/, "", x);               # Kill quotes (from .Nd "foo bar")
364	      sub(/<h1 align=center>/, "", x);# Yuk! HTML cruft
365	      gsub(/\000.*/, "X", x);         # Binary cruft in LAPACK pages
366	      gsub(/  +/, " ", x);            # Collapse spaces (again)
367	      sub(/^ /, "", x);               # Kill initial spaces (again)
368	      sub(/ $/, "", x);               # Kill trailing spaces (again)
369	      sub(/\.$/, "", x);              # Kill trailing period
370
371	      if (!match(x, / - /))
372		return;
373
374	      after_dash = substr(x, RSTART);
375	      head = substr(x, 1, RSTART-1) ", ";
376	      while (match(head, /, /)) {
377		prog = substr(head, 1, RSTART-1);
378		head = substr(head, RSTART+2);
379		if (prog != progname)
380		  prog = prog " [" progname "]";
381		printf "%-*s (%s) %s\n", 20, prog, actual_section, after_dash;
382	      }
383	    }
384
385	    {			# Main action - process each filename read in.
386	      filename = $0;
387	      do_one();
388	    }
389	    ' pages=$pages section=$section verbose=$verbose curdir=$curdir
390	    cd ..
391	 fi
392       done > $TMPFILE
393
394       cd $here
395
396       # kludge for Slackware's /usr/man/preformat
397       if [ $mandir = /usr/man/preformat ]
398       then
399	 mandir1=/usr/man
400       else
401	 mandir1=$mandir
402       fi
403
404       if [ -f ${mandir1}/whatis ]
405       then
406	 cat ${mandir1}/whatis >> $TMPFILE
407       fi
408       tr -s '\n' < $TMPFILE | sort -u > ${mandir1}/whatis
409
410       chmod 644 ${mandir1}/whatis
411       rm $TMPFILE
412     fi
413   done
414done
415
416# remove tempdir
417rm -rf $TMPFILEDIR
418
419