makewhatis.c revision 96861
1/*-
2 * Copyright (c) 2002 John Rochester
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer,
10 *    in this position and unchanged.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 * 3. The name of the author may not be used to endorse or promote products
15 *    derived from this software without specific prior written permission
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29#include <sys/cdefs.h>
30__FBSDID("$FreeBSD: head/usr.bin/makewhatis/makewhatis.c 96861 2002-05-18 15:15:30Z ru $");
31
32#include <sys/types.h>
33#include <sys/stat.h>
34#include <sys/param.h>
35#include <sys/queue.h>
36
37#include <ctype.h>
38#include <dirent.h>
39#include <err.h>
40#include <stdio.h>
41#include <stdlib.h>
42#include <string.h>
43#include <stringlist.h>
44#include <unistd.h>
45#include <zlib.h>
46
47#define DEFAULT_MANPATH		"/usr/share/man"
48#define LINE_ALLOC		4096
49
50static char blank[] = 		"";
51
52/*
53 * Information collected about each man page in a section.
54 */
55struct page_info {
56	char *	filename;
57	char *	name;
58	char *	suffix;
59	int	gzipped;
60	ino_t	inode;
61};
62
63/*
64 * An entry kept for each visited directory.
65 */
66struct visited_dir {
67	dev_t		device;
68	ino_t		inode;
69	SLIST_ENTRY(visited_dir)	next;
70};
71
72/*
73 * an expanding string
74 */
75struct sbuf {
76	char *	content;		/* the start of the buffer */
77	char *	end;			/* just past the end of the content */
78	char *	last;			/* the last allocated character */
79};
80
81/*
82 * Removes the last amount characters from the sbuf.
83 */
84#define sbuf_retract(sbuf, amount)	\
85	((sbuf)->end -= (amount))
86/*
87 * Returns the length of the sbuf content.
88 */
89#define sbuf_length(sbuf)		\
90	((sbuf)->end - (sbuf)->content)
91
92typedef char *edited_copy(char *from, char *to, int length);
93
94static int append;			/* -a flag: append to existing whatis */
95static int verbose;			/* -v flag: be verbose with warnings */
96static int indent = 24;			/* -i option: description indentation */
97static const char *whatis_name="whatis";/* -n option: the name */
98static char *common_output;		/* -o option: the single output file */
99static char *locale;			/* user's locale if -L is used */
100static char *lang_locale;		/* short form of locale */
101static char *machine;
102
103static int exit_code;			/* exit code to use when finished */
104static SLIST_HEAD(, visited_dir) visited_dirs =
105    SLIST_HEAD_INITIALIZER(visited_dirs);
106
107/*
108 * While the whatis line is being formed, it is stored in whatis_proto.
109 * When finished, it is reformatted into whatis_final and then appended
110 * to whatis_lines.
111 */
112static struct sbuf *whatis_proto;
113static struct sbuf *whatis_final;
114static StringList *whatis_lines;	/* collected output lines */
115
116static char tmp_file[MAXPATHLEN];	/* path of temporary file, if any */
117
118/* A set of possible names for the NAME man page section */
119static const char *name_section_titles[] = {
120	"NAME", "Name", "NAMN", "BEZEICHNUNG", "\xcc\xbe\xbe\xce",
121	"\xee\xe1\xfa\xf7\xe1\xee\xe9\xe5", NULL
122};
123
124/* A subset of the mdoc(7) commands to ignore */
125static char mdoc_commands[] = "ArDvErEvFlLiNmPa";
126
127/*
128 * Frees a struct page_info and its content.
129 */
130static void
131free_page_info(struct page_info *info)
132{
133	free(info->filename);
134	free(info->name);
135	free(info->suffix);
136	free(info);
137}
138
139/*
140 * Allocates and fills in a new struct page_info given the
141 * name of the man section directory and the dirent of the file.
142 * If the file is not a man page, returns NULL.
143 */
144static struct page_info *
145new_page_info(char *dir, struct dirent *dirent)
146{
147	struct page_info *info;
148	int basename_length;
149	char *suffix;
150	struct stat st;
151
152	info = (struct page_info *) malloc(sizeof(struct page_info));
153	if (info == NULL)
154		err(1, "malloc");
155	basename_length = strlen(dirent->d_name);
156	suffix = &dirent->d_name[basename_length];
157	asprintf(&info->filename, "%s/%s", dir, dirent->d_name);
158	if ((info->gzipped = basename_length >= 4 && strcmp(&dirent->d_name[basename_length - 3], ".gz") == 0)) {
159		suffix -= 3;
160		*suffix = '\0';
161	}
162	for (;;) {
163		if (--suffix == dirent->d_name || !isalnum(*suffix)) {
164			if (*suffix == '.')
165				break;
166			if (verbose)
167				warnx("%s: invalid man page name", info->filename);
168			free(info->filename);
169			free(info);
170			return NULL;
171		}
172	}
173	*suffix++ = '\0';
174	info->name = strdup(dirent->d_name);
175	info->suffix = strdup(suffix);
176	if (stat(info->filename, &st) < 0) {
177		warn("%s", info->filename);
178		free_page_info(info);
179		return NULL;
180	}
181	if (!S_ISREG(st.st_mode)) {
182		if (verbose && !S_ISDIR(st.st_mode))
183			warnx("%s: not a regular file", info->filename);
184		free_page_info(info);
185		return NULL;
186	}
187	info->inode = st.st_ino;
188	return info;
189}
190
191/*
192 * Reset an sbuf's length to 0.
193 */
194static void
195sbuf_clear(struct sbuf *sbuf)
196{
197	sbuf->end = sbuf->content;
198}
199
200/*
201 * Allocate a new sbuf.
202 */
203static struct sbuf *
204new_sbuf(void)
205{
206	struct sbuf *sbuf = (struct sbuf *) malloc(sizeof(struct sbuf));
207	sbuf->content = (char *) malloc(LINE_ALLOC);
208	sbuf->last = sbuf->content + LINE_ALLOC - 1;
209	sbuf_clear(sbuf);
210	return sbuf;
211}
212
213/*
214 * Ensure that there is enough room in the sbuf for chars more characters.
215 */
216static void
217sbuf_need(struct sbuf *sbuf, int nchars)
218{
219	/* let's assume we only need to double it, but check just in case */
220	while (sbuf->end + nchars > sbuf->last) {
221		int alloc;
222		char *new_content;
223
224		alloc = (sbuf->last - sbuf->content + 1) * 2;
225		new_content = (char *) malloc(alloc);
226		memcpy(new_content, sbuf->content, sbuf->end - sbuf->content);
227		sbuf->end = new_content + (sbuf->end - sbuf->content);
228		free(sbuf->content);
229		sbuf->content = new_content;
230	}
231}
232
233/*
234 * Appends a string of a given length to the sbuf.
235 */
236static void
237sbuf_append(struct sbuf *sbuf, const char *text, int length)
238{
239	if (length > 0) {
240		sbuf_need(sbuf, length);
241		memcpy(sbuf->end, text, length);
242		sbuf->end += length;
243	}
244}
245
246/*
247 * Appends a null-terminated string to the sbuf.
248 */
249static void
250sbuf_append_str(struct sbuf *sbuf, char *text)
251{
252	sbuf_append(sbuf, text, strlen(text));
253}
254
255/*
256 * Appends an edited null-terminated string to the sbuf.
257 */
258static void
259sbuf_append_edited(struct sbuf *sbuf, char *text, edited_copy copy)
260{
261	int length = strlen(text);
262	if (length > 0) {
263		sbuf_need(sbuf, length);
264		sbuf->end = copy(text, sbuf->end, length);
265	}
266}
267
268/*
269 * Strips any of a set of chars from the end of the sbuf.
270 */
271static void
272sbuf_strip(struct sbuf *sbuf, const char *set)
273{
274	while (sbuf->end > sbuf->content && strchr(set, sbuf->end[-1]) != NULL)
275		sbuf->end--;
276}
277
278/*
279 * Returns the null-terminated string built by the sbuf.
280 */
281static char *
282sbuf_content(struct sbuf *sbuf)
283{
284	*sbuf->end = '\0';
285	return sbuf->content;
286}
287
288/*
289 * Returns true if no man page exists in the directory with
290 * any of the names in the StringList.
291 */
292static int
293no_page_exists(char *dir, StringList *names, char *suffix)
294{
295	char path[MAXPATHLEN];
296	int i;
297
298	for (i = 0; i < names->sl_cur; i++) {
299		snprintf(path, sizeof path, "%s/%s.%s.gz", dir, names->sl_str[i], suffix);
300		if (access(path, F_OK) < 0) {
301			path[strlen(path) - 3] = '\0';
302			if (access(path, F_OK) < 0)
303				continue;
304		}
305		return 0;
306	}
307	return 1;
308}
309
310static void
311trap_signal(int sig __unused)
312{
313	if (tmp_file[0] != '\0')
314		unlink(tmp_file);
315	exit(1);
316}
317
318/*
319 * Attempts to open an output file.  Returns NULL if unsuccessful.
320 */
321static FILE *
322open_output(char *name)
323{
324	FILE *output;
325
326	whatis_lines = sl_init();
327	if (append) {
328		char line[LINE_ALLOC];
329
330		output = fopen(name, "r");
331		if (output == NULL) {
332			warn("%s", name);
333			exit_code = 1;
334			return NULL;
335		}
336		while (fgets(line, sizeof line, output) != NULL) {
337			line[strlen(line) - 1] = '\0';
338			sl_add(whatis_lines, strdup(line));
339		}
340	}
341	if (common_output == NULL) {
342		snprintf(tmp_file, sizeof tmp_file, "%s.tmp", name);
343		name = tmp_file;
344	}
345	output = fopen(name, "w");
346	if (output == NULL) {
347		warn("%s", name);
348		exit_code = 1;
349		return NULL;
350	}
351	return output;
352}
353
354static int
355linesort(const void *a, const void *b)
356{
357	return strcmp((const char *)(*(const char **)a), (const char *)(*(const char **)b));
358}
359
360/*
361 * Writes the unique sorted lines to the output file.
362 */
363static void
364finish_output(FILE *output, char *name)
365{
366	int i;
367	char *prev = NULL;
368
369	qsort(whatis_lines->sl_str, whatis_lines->sl_cur, sizeof(char *), linesort);
370	for (i = 0; i < whatis_lines->sl_cur; i++) {
371		char *line = whatis_lines->sl_str[i];
372		if (i > 0 && strcmp(line, prev) == 0)
373			continue;
374		prev = line;
375		fputs(line, output);
376		putc('\n', output);
377	}
378	fclose(output);
379	sl_free(whatis_lines, 1);
380	if (common_output == NULL) {
381		rename(tmp_file, name);
382		unlink(tmp_file);
383	}
384}
385
386static FILE *
387open_whatis(char *mandir)
388{
389	char filename[MAXPATHLEN];
390
391	snprintf(filename, sizeof filename, "%s/%s", mandir, whatis_name);
392	return open_output(filename);
393}
394
395static void
396finish_whatis(FILE *output, char *mandir)
397{
398	char filename[MAXPATHLEN];
399
400	snprintf(filename, sizeof filename, "%s/%s", mandir, whatis_name);
401	finish_output(output, filename);
402}
403
404/*
405 * Tests to see if the given directory has already been visited.
406 */
407static int
408already_visited(char *dir)
409{
410	struct stat st;
411	struct visited_dir *visit;
412
413	if (stat(dir, &st) < 0) {
414		warn("%s", dir);
415		exit_code = 1;
416		return 1;
417	}
418	SLIST_FOREACH(visit, &visited_dirs, next) {
419		if (visit->inode == st.st_ino &&
420		    visit->device == st.st_dev) {
421			warnx("already visited %s", dir);
422			return 1;
423		}
424	}
425	visit = (struct visited_dir *) malloc(sizeof(struct visited_dir));
426	visit->device = st.st_dev;
427	visit->inode = st.st_ino;
428	SLIST_INSERT_HEAD(&visited_dirs, visit, next);
429	return 0;
430}
431
432/*
433 * Removes trailing spaces from a string, returning a pointer to just
434 * beyond the new last character.
435 */
436static char *
437trim_rhs(char *str)
438{
439	char *rhs = &str[strlen(str)];
440	while (--rhs > str && isspace(*rhs))
441		;
442	*++rhs = '\0';
443	return rhs;
444}
445
446/*
447 * Returns a pointer to the next non-space character in the string.
448 */
449static char *
450skip_spaces(char *s)
451{
452	while (*s != '\0' && isspace(*s))
453		s++;
454	return s;
455}
456
457/*
458 * Returns whether the string contains only digits.
459 */
460static int
461only_digits(char *line)
462{
463	if (!isdigit(*line++))
464		return 0;
465	while (isdigit(*line))
466		line++;
467	return *line == '\0';
468}
469
470/*
471 * Returns whether the line is of one of the forms:
472 *	.Sh NAME
473 *	.Sh "NAME"
474 *	etc.
475 * assuming that section_start is ".Sh".
476 */
477static int
478name_section_line(char *line, const char *section_start)
479{
480	char *rhs;
481	const char **title;
482
483	if (strncmp(line, section_start, 3) != 0)
484		return 0;
485	line = skip_spaces(line + 3);
486	rhs = trim_rhs(line);
487	if (*line == '"') {
488		line++;
489		if (*--rhs == '"')
490			*rhs = '\0';
491	}
492	for (title = name_section_titles; *title != NULL; title++)
493		if (strcmp(*title, line) == 0)
494			return 1;
495	return 0;
496}
497
498/*
499 * Copies characters while removing the most common nroff/troff
500 * markup:
501 *	\(em, \(mi, \fR, \f(XX, \*p, \&,
502 */
503static char *
504de_nroff_copy(char *from, char *to, int fromlen)
505{
506	char *from_end = &from[fromlen];
507	while (from < from_end) {
508		switch (*from) {
509		case '\\':
510			switch (*++from) {
511			case '(':
512				if (strncmp(&from[1], "em", 2) == 0 ||
513						strncmp(&from[1], "mi", 2) == 0) {
514					from += 3;
515					continue;
516				}
517				break;
518			case 'f':
519				if (*++from == '(')
520					from += 3;
521				else
522					from++;
523				continue;
524			case '*':
525				if (from[1] == 'p') {
526					from += 2;
527					continue;
528				}
529				break;
530			case '&':
531				from++;
532				continue;
533			}
534			break;
535		}
536		*to++ = *from++;
537	}
538	return to;
539}
540
541/*
542 * Appends a string with the nroff formatting removed.
543 */
544static void
545add_nroff(char *text)
546{
547	sbuf_append_edited(whatis_proto, text, de_nroff_copy);
548}
549
550/*
551 * Appends "name(suffix), " to whatis_final.
552 */
553static void
554add_whatis_name(char *name, char *suffix)
555{
556	if (*name != '\0') {
557		sbuf_append_str(whatis_final, name);
558		sbuf_append(whatis_final, "(", 1);
559		sbuf_append_str(whatis_final, suffix);
560		sbuf_append(whatis_final, "), ", 3);
561	}
562}
563
564/*
565 * Processes an old-style man(7) line.  This ignores commands with only
566 * a single number argument.
567 */
568static void
569process_man_line(char *line)
570{
571	if (*line == '.') {
572		while (isalpha(*++line))
573			;
574		line = skip_spaces(line);
575		if (only_digits(line))
576			return;
577	} else
578		line = skip_spaces(line);
579	if (*line != '\0') {
580		add_nroff(line);
581		sbuf_append(whatis_proto, " ", 1);
582	}
583}
584
585/*
586 * Processes a new-style mdoc(7) line.
587 */
588static void
589process_mdoc_line(char *line)
590{
591	int xref;
592	int arg = 0;
593	char *line_end = &line[strlen(line)];
594	int orig_length = sbuf_length(whatis_proto);
595	char *next;
596
597	if (*line == '\0')
598		return;
599	if (line[0] != '.' || !isupper(line[1]) || !islower(line[2])) {
600		add_nroff(skip_spaces(line));
601		sbuf_append(whatis_proto, " ", 1);
602		return;
603	}
604	xref = strncmp(line, ".Xr", 3) == 0;
605	line += 3;
606	while ((line = skip_spaces(line)) < line_end) {
607		if (*line == '"') {
608			next = ++line;
609			for (;;) {
610				next = strchr(next, '"');
611				if (next == NULL)
612					break;
613				strcpy(next, &next[1]);
614				line_end--;
615				if (*next != '"')
616					break;
617				next++;
618			}
619		} else
620			next = strpbrk(line, " \t");
621		if (next != NULL)
622			*next++ = '\0';
623		else
624			next = line_end;
625		if (isupper(*line) && islower(line[1]) && line[2] == '\0') {
626			if (strcmp(line, "Ns") == 0) {
627				arg = 0;
628				line = next;
629				continue;
630			}
631			if (strstr(mdoc_commands, line) != NULL) {
632				line = next;
633				continue;
634			}
635		}
636		if (arg > 0 && strchr(",.:;?!)]", *line) == 0) {
637			if (xref) {
638				sbuf_append(whatis_proto, "(", 1);
639				add_nroff(line);
640				sbuf_append(whatis_proto, ")", 1);
641				xref = 0;
642				line = blank;
643			} else
644				sbuf_append(whatis_proto, " ", 1);
645		}
646		add_nroff(line);
647		arg++;
648		line = next;
649	}
650	if (sbuf_length(whatis_proto) > orig_length)
651		sbuf_append(whatis_proto, " ", 1);
652}
653
654/*
655 * Collects a list of comma-separated names from the text.
656 */
657static void
658collect_names(StringList *names, char *text)
659{
660	char *arg;
661
662	for (;;) {
663		arg = text;
664		text = strchr(text, ',');
665		if (text != NULL)
666			*text++ = '\0';
667		sl_add(names, arg);
668		if (text == NULL)
669			return;
670		if (*text == ' ')
671			text++;
672	}
673}
674
675enum { STATE_UNKNOWN, STATE_MANSTYLE, STATE_MDOCNAME, STATE_MDOCDESC };
676
677/*
678 * Processes a man page source into a single whatis line and adds it
679 * to whatis_lines.
680 */
681static void
682process_page(struct page_info *page, char *section_dir)
683{
684	gzFile *in;
685	char buffer[4096];
686	char *line;
687	StringList *names;
688	char *descr;
689	int state = STATE_UNKNOWN;
690	int i;
691
692	sbuf_clear(whatis_proto);
693	if ((in = gzopen(page->filename, "r")) == NULL) {
694		warn("%s", page->filename);
695		exit_code = 1;
696		return;
697	}
698	while (gzgets(in, buffer, sizeof buffer) != NULL) {
699		line = buffer;
700		if (strncmp(line, ".\\\"", 3) == 0)		/* ignore comments */
701			continue;
702		switch (state) {
703		/*
704		 * haven't reached the NAME section yet.
705		 */
706		case STATE_UNKNOWN:
707			if (name_section_line(line, ".SH"))
708				state = STATE_MANSTYLE;
709			else if (name_section_line(line, ".Sh"))
710				state = STATE_MDOCNAME;
711			continue;
712		/*
713		 * Inside an old-style .SH NAME section.
714		 */
715		case STATE_MANSTYLE:
716			if (strncmp(line, ".SH", 3) == 0)
717				break;
718			trim_rhs(line);
719			if (strcmp(line, ".") == 0)
720				continue;
721			if (strncmp(line, ".IX", 3) == 0) {
722				line += 3;
723				line = skip_spaces(line);
724			}
725			process_man_line(line);
726			continue;
727		/*
728		 * Inside a new-style .Sh NAME section (the .Nm part).
729		 */
730		case STATE_MDOCNAME:
731			trim_rhs(line);
732			if (strncmp(line, ".Nm", 3) == 0) {
733				process_mdoc_line(line);
734				continue;
735			} else {
736				if (strcmp(line, ".") == 0)
737					continue;
738				sbuf_append(whatis_proto, "- ", 2);
739				state = STATE_MDOCDESC;
740			}
741			/* fall through */
742		/*
743		 * Inside a new-style .Sh NAME section (after the .Nm-s).
744		 */
745		case STATE_MDOCDESC:
746			if (strncmp(line, ".Sh", 3) == 0)
747				break;
748			trim_rhs(line);
749			if (strcmp(line, ".") == 0)
750				continue;
751			process_mdoc_line(line);
752			continue;
753		}
754		break;
755	}
756	gzclose(in);
757	sbuf_strip(whatis_proto, " \t.-");
758	line = sbuf_content(whatis_proto);
759	/*
760	 * line now contains the appropriate data, but without
761	 * the proper indentation or the section appended to each name.
762	 */
763	descr = strstr(line, " - ");
764	if (descr == NULL) {
765		descr = strchr(line, ' ');
766		if (descr == NULL) {
767			if (verbose)
768				fprintf(stderr, "	ignoring junk description \"%s\"\n", line);
769			return;
770		}
771		*descr++ = '\0';
772	} else {
773		*descr = '\0';
774		descr += 3;
775	}
776	names = sl_init();
777	collect_names(names, line);
778	sbuf_clear(whatis_final);
779	if (!sl_find(names, page->name) && no_page_exists(section_dir, names, page->suffix)) {
780		/*
781		 * Add the page name since that's the only thing that
782		 * man(1) will find.
783		 */
784		add_whatis_name(page->name, page->suffix);
785	}
786	for (i = 0; i < names->sl_cur; i++)
787		add_whatis_name(names->sl_str[i], page->suffix);
788	sl_free(names, 0);
789	sbuf_retract(whatis_final, 2);		/* remove last ", " */
790	while (sbuf_length(whatis_final) < indent)
791		sbuf_append(whatis_final, " ", 1);
792	sbuf_append(whatis_final, " - ", 3);
793	sbuf_append_str(whatis_final, skip_spaces(descr));
794	sl_add(whatis_lines, strdup(sbuf_content(whatis_final)));
795}
796
797/*
798 * Sorts pages first by inode number, then by name.
799 */
800static int
801pagesort(const void *a, const void *b)
802{
803	struct page_info *p1 = *(struct page_info **) a;
804	struct page_info *p2 = *(struct page_info **) b;
805	if (p1->inode == p2->inode)
806		return strcmp(p1->name, p2->name);
807	return p1->inode - p2->inode;
808}
809
810/*
811 * Processes a single man section.
812 */
813static void
814process_section(char *section_dir)
815{
816	struct dirent **entries;
817	int nentries;
818	struct page_info **pages;
819	int npages = 0;
820	int i;
821	int prev_inode = 0;
822
823	if (verbose)
824		fprintf(stderr, "  %s\n", section_dir);
825
826	/*
827	 * scan the man section directory for pages
828	 */
829	nentries = scandir(section_dir, &entries, NULL, alphasort);
830	if (nentries < 0) {
831		warn("%s", section_dir);
832		exit_code = 1;
833		return;
834	}
835	/*
836	 * collect information about man pages
837	 */
838	pages = (struct page_info **) calloc(nentries, sizeof(struct page_info *));
839	for (i = 0; i < nentries; i++) {
840		struct page_info *info = new_page_info(section_dir, entries[i]);
841		if (info != NULL)
842			pages[npages++] = info;
843		free(entries[i]);
844	}
845	free(entries);
846	qsort(pages, npages, sizeof(struct page_info *), pagesort);
847	/*
848	 * process each unique page
849	 */
850	for (i = 0; i < npages; i++) {
851		struct page_info *page = pages[i];
852		if (page->inode != prev_inode) {
853			prev_inode = page->inode;
854			if (verbose)
855				fprintf(stderr, "	reading %s\n", page->filename);
856			process_page(page, section_dir);
857		} else if (verbose)
858			fprintf(stderr, "	skipping %s, duplicate\n", page->filename);
859		free_page_info(page);
860	}
861	free(pages);
862}
863
864/*
865 * Returns whether the directory entry is a man page section.
866 */
867static int
868select_sections(struct dirent *entry)
869{
870	char *p = &entry->d_name[3];
871
872	if (strncmp(entry->d_name, "man", 3) != 0)
873		return 0;
874	while (*p != '\0') {
875		if (!isalnum(*p++))
876			return 0;
877	}
878	return 1;
879}
880
881/*
882 * Processes a single top-level man directory by finding all the
883 * sub-directories named man* and processing each one in turn.
884 */
885static void
886process_mandir(char *dir_name)
887{
888	struct dirent **entries;
889	int nsections;
890	FILE *fp = NULL;
891	int i;
892	struct stat st;
893
894	if (already_visited(dir_name))
895		return;
896	if (verbose)
897		fprintf(stderr, "man directory %s\n", dir_name);
898	nsections = scandir(dir_name, &entries, select_sections, alphasort);
899	if (nsections < 0) {
900		warn("%s", dir_name);
901		exit_code = 1;
902		return;
903	}
904	if (common_output == NULL && (fp = open_whatis(dir_name)) == NULL)
905		return;
906	for (i = 0; i < nsections; i++) {
907		char section_dir[MAXPATHLEN];
908		snprintf(section_dir, sizeof section_dir, "%s/%s", dir_name, entries[i]->d_name);
909		process_section(section_dir);
910		snprintf(section_dir, sizeof section_dir, "%s/%s/%s", dir_name,
911		    entries[i]->d_name, machine);
912		if (stat(section_dir, &st) == 0 && S_ISDIR(st.st_mode))
913			process_section(section_dir);
914		free(entries[i]);
915	}
916	free(entries);
917	if (common_output == NULL)
918		finish_whatis(fp, dir_name);
919}
920
921/*
922 * Processes one argument, which may be a colon-separated list of
923 * directories.
924 */
925static void
926process_argument(const char *arg)
927{
928	char *dir;
929	char *mandir;
930	char *parg;
931
932	parg = strdup(arg);
933	if (parg == NULL)
934		err(1, "out of memory");
935	while ((dir = strsep(&parg, ":")) != NULL) {
936		if (locale != NULL) {
937			asprintf(&mandir, "%s/%s", dir, locale);
938			process_mandir(mandir);
939			free(mandir);
940			if (lang_locale != NULL) {
941				asprintf(&mandir, "%s/%s", dir, lang_locale);
942				process_mandir(mandir);
943				free(mandir);
944			}
945		} else {
946			process_mandir(dir);
947		}
948	}
949	free(parg);
950}
951
952
953int
954main(int argc, char **argv)
955{
956	int opt;
957	extern int optind;
958	extern char *optarg;
959	FILE *fp = NULL;
960
961	while ((opt = getopt(argc, argv, "ai:n:o:vL")) != -1) {
962		switch (opt) {
963		case 'a':
964			append++;
965			break;
966		case 'i':
967			indent = atoi(optarg);
968			break;
969		case 'n':
970			whatis_name = optarg;
971			break;
972		case 'o':
973			common_output = optarg;
974			break;
975		case 'v':
976			verbose++;
977			break;
978		case 'L':
979			locale = getenv("LC_ALL");
980			if (locale == NULL)
981				locale = getenv("LC_CTYPE");
982			if (locale == NULL)
983				locale = getenv("LANG");
984			if (locale != NULL) {
985				char *sep = strchr(locale, '_');
986				if (sep != NULL && isupper(sep[1]) &&
987				    isupper(sep[2])) {
988					asprintf(&lang_locale, "%.*s%s", sep - locale, locale, &sep[3]);
989				}
990			}
991			break;
992		default:
993			fprintf(stderr, "usage: %s [-a] [-i indent] [-n name] [-o output_file] [-v] [-L] [directories...]\n", argv[0]);
994			exit(1);
995		}
996	}
997
998	signal(SIGINT, trap_signal);
999	signal(SIGHUP, trap_signal);
1000	signal(SIGQUIT, trap_signal);
1001	signal(SIGTERM, trap_signal);
1002	SLIST_INIT(&visited_dirs);
1003	whatis_proto = new_sbuf();
1004	whatis_final = new_sbuf();
1005
1006	if ((machine = getenv("MACHINE")) == NULL)
1007		machine = MACHINE;
1008
1009	if (common_output != NULL && (fp = open_output(common_output)) == NULL)
1010		err(1, "%s", common_output);
1011	if (optind == argc) {
1012		const char *manpath = getenv("MANPATH");
1013		if (manpath == NULL)
1014			manpath = DEFAULT_MANPATH;
1015		process_argument(manpath);
1016	} else {
1017		while (optind < argc)
1018			process_argument(argv[optind++]);
1019	}
1020	if (common_output != NULL)
1021		finish_output(fp, common_output);
1022	exit(exit_code);
1023}
1024