makewhatis.c revision 97102
1/*-
2 * Copyright (c) 2002 John Rochester
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer,
10 *    in this position and unchanged.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 * 3. The name of the author may not be used to endorse or promote products
15 *    derived from this software without specific prior written permission
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29#include <sys/cdefs.h>
30__FBSDID("$FreeBSD: head/usr.bin/makewhatis/makewhatis.c 97102 2002-05-22 11:08:41Z ru $");
31
32#include <sys/types.h>
33#include <sys/stat.h>
34#include <sys/param.h>
35#include <sys/queue.h>
36
37#include <ctype.h>
38#include <dirent.h>
39#include <err.h>
40#include <stdio.h>
41#include <stdlib.h>
42#include <string.h>
43#include <stringlist.h>
44#include <unistd.h>
45#include <zlib.h>
46
47#define DEFAULT_MANPATH		"/usr/share/man"
48#define LINE_ALLOC		4096
49
50static char blank[] = 		"";
51
52/*
53 * Information collected about each man page in a section.
54 */
55struct page_info {
56	char *	filename;
57	char *	name;
58	char *	suffix;
59	int	gzipped;
60	ino_t	inode;
61};
62
63/*
64 * An entry kept for each visited directory.
65 */
66struct visited_dir {
67	dev_t		device;
68	ino_t		inode;
69	SLIST_ENTRY(visited_dir)	next;
70};
71
72/*
73 * an expanding string
74 */
75struct sbuf {
76	char *	content;		/* the start of the buffer */
77	char *	end;			/* just past the end of the content */
78	char *	last;			/* the last allocated character */
79};
80
81/*
82 * Removes the last amount characters from the sbuf.
83 */
84#define sbuf_retract(sbuf, amount)	\
85	((sbuf)->end -= (amount))
86/*
87 * Returns the length of the sbuf content.
88 */
89#define sbuf_length(sbuf)		\
90	((sbuf)->end - (sbuf)->content)
91
92typedef char *edited_copy(char *from, char *to, int length);
93
94static int append;			/* -a flag: append to existing whatis */
95static int verbose;			/* -v flag: be verbose with warnings */
96static int indent = 24;			/* -i option: description indentation */
97static const char *whatis_name="whatis";/* -n option: the name */
98static char *common_output;		/* -o option: the single output file */
99static char *locale;			/* user's locale if -L is used */
100static char *lang_locale;		/* short form of locale */
101static char *machine;
102
103static int exit_code;			/* exit code to use when finished */
104static SLIST_HEAD(, visited_dir) visited_dirs =
105    SLIST_HEAD_INITIALIZER(visited_dirs);
106
107/*
108 * While the whatis line is being formed, it is stored in whatis_proto.
109 * When finished, it is reformatted into whatis_final and then appended
110 * to whatis_lines.
111 */
112static struct sbuf *whatis_proto;
113static struct sbuf *whatis_final;
114static StringList *whatis_lines;	/* collected output lines */
115
116static char tmp_file[MAXPATHLEN];	/* path of temporary file, if any */
117
118/* A set of possible names for the NAME man page section */
119static const char *name_section_titles[] = {
120	"NAME", "Name", "NAMN", "BEZEICHNUNG", "\xcc\xbe\xbe\xce",
121	"\xee\xe1\xfa\xf7\xe1\xee\xe9\xe5", NULL
122};
123
124/* A subset of the mdoc(7) commands to ignore */
125static char mdoc_commands[] = "ArDvErEvFlLiNmPa";
126
127/*
128 * Frees a struct page_info and its content.
129 */
130static void
131free_page_info(struct page_info *info)
132{
133	free(info->filename);
134	free(info->name);
135	free(info->suffix);
136	free(info);
137}
138
139/*
140 * Allocates and fills in a new struct page_info given the
141 * name of the man section directory and the dirent of the file.
142 * If the file is not a man page, returns NULL.
143 */
144static struct page_info *
145new_page_info(char *dir, struct dirent *dirent)
146{
147	struct page_info *info;
148	int basename_length;
149	char *suffix;
150	struct stat st;
151
152	info = (struct page_info *) malloc(sizeof(struct page_info));
153	if (info == NULL)
154		err(1, "malloc");
155	basename_length = strlen(dirent->d_name);
156	suffix = &dirent->d_name[basename_length];
157	asprintf(&info->filename, "%s/%s", dir, dirent->d_name);
158	if ((info->gzipped = basename_length >= 4 && strcmp(&dirent->d_name[basename_length - 3], ".gz") == 0)) {
159		suffix -= 3;
160		*suffix = '\0';
161	}
162	for (;;) {
163		if (--suffix == dirent->d_name || !isalnum(*suffix)) {
164			if (*suffix == '.')
165				break;
166			if (verbose)
167				warnx("%s: invalid man page name", info->filename);
168			free(info->filename);
169			free(info);
170			return NULL;
171		}
172	}
173	*suffix++ = '\0';
174	info->name = strdup(dirent->d_name);
175	info->suffix = strdup(suffix);
176	if (stat(info->filename, &st) < 0) {
177		warn("%s", info->filename);
178		free_page_info(info);
179		return NULL;
180	}
181	if (!S_ISREG(st.st_mode)) {
182		if (verbose && !S_ISDIR(st.st_mode))
183			warnx("%s: not a regular file", info->filename);
184		free_page_info(info);
185		return NULL;
186	}
187	info->inode = st.st_ino;
188	return info;
189}
190
191/*
192 * Reset an sbuf's length to 0.
193 */
194static void
195sbuf_clear(struct sbuf *sbuf)
196{
197	sbuf->end = sbuf->content;
198}
199
200/*
201 * Allocate a new sbuf.
202 */
203static struct sbuf *
204new_sbuf(void)
205{
206	struct sbuf *sbuf = (struct sbuf *) malloc(sizeof(struct sbuf));
207	sbuf->content = (char *) malloc(LINE_ALLOC);
208	sbuf->last = sbuf->content + LINE_ALLOC - 1;
209	sbuf_clear(sbuf);
210	return sbuf;
211}
212
213/*
214 * Ensure that there is enough room in the sbuf for chars more characters.
215 */
216static void
217sbuf_need(struct sbuf *sbuf, int nchars)
218{
219	/* let's assume we only need to double it, but check just in case */
220	while (sbuf->end + nchars > sbuf->last) {
221		int alloc;
222		char *new_content;
223
224		alloc = (sbuf->last - sbuf->content + 1) * 2;
225		new_content = (char *) malloc(alloc);
226		memcpy(new_content, sbuf->content, sbuf->end - sbuf->content);
227		sbuf->end = new_content + (sbuf->end - sbuf->content);
228		free(sbuf->content);
229		sbuf->content = new_content;
230	}
231}
232
233/*
234 * Appends a string of a given length to the sbuf.
235 */
236static void
237sbuf_append(struct sbuf *sbuf, const char *text, int length)
238{
239	if (length > 0) {
240		sbuf_need(sbuf, length);
241		memcpy(sbuf->end, text, length);
242		sbuf->end += length;
243	}
244}
245
246/*
247 * Appends a null-terminated string to the sbuf.
248 */
249static void
250sbuf_append_str(struct sbuf *sbuf, char *text)
251{
252	sbuf_append(sbuf, text, strlen(text));
253}
254
255/*
256 * Appends an edited null-terminated string to the sbuf.
257 */
258static void
259sbuf_append_edited(struct sbuf *sbuf, char *text, edited_copy copy)
260{
261	int length = strlen(text);
262	if (length > 0) {
263		sbuf_need(sbuf, length);
264		sbuf->end = copy(text, sbuf->end, length);
265	}
266}
267
268/*
269 * Strips any of a set of chars from the end of the sbuf.
270 */
271static void
272sbuf_strip(struct sbuf *sbuf, const char *set)
273{
274	while (sbuf->end > sbuf->content && strchr(set, sbuf->end[-1]) != NULL)
275		sbuf->end--;
276}
277
278/*
279 * Returns the null-terminated string built by the sbuf.
280 */
281static char *
282sbuf_content(struct sbuf *sbuf)
283{
284	*sbuf->end = '\0';
285	return sbuf->content;
286}
287
288/*
289 * Returns true if no man page exists in the directory with
290 * any of the names in the StringList.
291 */
292static int
293no_page_exists(char *dir, StringList *names, char *suffix)
294{
295	char path[MAXPATHLEN];
296	int i;
297
298	for (i = 0; i < names->sl_cur; i++) {
299		snprintf(path, sizeof path, "%s/%s.%s.gz", dir, names->sl_str[i], suffix);
300		if (access(path, F_OK) < 0) {
301			path[strlen(path) - 3] = '\0';
302			if (access(path, F_OK) < 0)
303				continue;
304		}
305		return 0;
306	}
307	return 1;
308}
309
310static void
311trap_signal(int sig __unused)
312{
313	if (tmp_file[0] != '\0')
314		unlink(tmp_file);
315	exit(1);
316}
317
318/*
319 * Attempts to open an output file.  Returns NULL if unsuccessful.
320 */
321static FILE *
322open_output(char *name)
323{
324	FILE *output;
325
326	whatis_lines = sl_init();
327	if (append) {
328		char line[LINE_ALLOC];
329
330		output = fopen(name, "r");
331		if (output == NULL) {
332			warn("%s", name);
333			exit_code = 1;
334			return NULL;
335		}
336		while (fgets(line, sizeof line, output) != NULL) {
337			line[strlen(line) - 1] = '\0';
338			sl_add(whatis_lines, strdup(line));
339		}
340	}
341	if (common_output == NULL) {
342		snprintf(tmp_file, sizeof tmp_file, "%s.tmp", name);
343		name = tmp_file;
344	}
345	output = fopen(name, "w");
346	if (output == NULL) {
347		warn("%s", name);
348		exit_code = 1;
349		return NULL;
350	}
351	return output;
352}
353
354static int
355linesort(const void *a, const void *b)
356{
357	return strcmp((const char *)(*(const char **)a), (const char *)(*(const char **)b));
358}
359
360/*
361 * Writes the unique sorted lines to the output file.
362 */
363static void
364finish_output(FILE *output, char *name)
365{
366	int i;
367	char *prev = NULL;
368
369	qsort(whatis_lines->sl_str, whatis_lines->sl_cur, sizeof(char *), linesort);
370	for (i = 0; i < whatis_lines->sl_cur; i++) {
371		char *line = whatis_lines->sl_str[i];
372		if (i > 0 && strcmp(line, prev) == 0)
373			continue;
374		prev = line;
375		fputs(line, output);
376		putc('\n', output);
377	}
378	fclose(output);
379	sl_free(whatis_lines, 1);
380	if (common_output == NULL) {
381		rename(tmp_file, name);
382		unlink(tmp_file);
383	}
384}
385
386static FILE *
387open_whatis(char *mandir)
388{
389	char filename[MAXPATHLEN];
390
391	snprintf(filename, sizeof filename, "%s/%s", mandir, whatis_name);
392	return open_output(filename);
393}
394
395static void
396finish_whatis(FILE *output, char *mandir)
397{
398	char filename[MAXPATHLEN];
399
400	snprintf(filename, sizeof filename, "%s/%s", mandir, whatis_name);
401	finish_output(output, filename);
402}
403
404/*
405 * Tests to see if the given directory has already been visited.
406 */
407static int
408already_visited(char *dir)
409{
410	struct stat st;
411	struct visited_dir *visit;
412
413	if (stat(dir, &st) < 0) {
414		warn("%s", dir);
415		exit_code = 1;
416		return 1;
417	}
418	SLIST_FOREACH(visit, &visited_dirs, next) {
419		if (visit->inode == st.st_ino &&
420		    visit->device == st.st_dev) {
421			warnx("already visited %s", dir);
422			return 1;
423		}
424	}
425	visit = (struct visited_dir *) malloc(sizeof(struct visited_dir));
426	visit->device = st.st_dev;
427	visit->inode = st.st_ino;
428	SLIST_INSERT_HEAD(&visited_dirs, visit, next);
429	return 0;
430}
431
432/*
433 * Removes trailing spaces from a string, returning a pointer to just
434 * beyond the new last character.
435 */
436static char *
437trim_rhs(char *str)
438{
439	char *rhs = &str[strlen(str)];
440	while (--rhs > str && isspace(*rhs))
441		;
442	*++rhs = '\0';
443	return rhs;
444}
445
446/*
447 * Returns a pointer to the next non-space character in the string.
448 */
449static char *
450skip_spaces(char *s)
451{
452	while (*s != '\0' && isspace(*s))
453		s++;
454	return s;
455}
456
457/*
458 * Returns whether the string contains only digits.
459 */
460static int
461only_digits(char *line)
462{
463	if (!isdigit(*line++))
464		return 0;
465	while (isdigit(*line))
466		line++;
467	return *line == '\0';
468}
469
470/*
471 * Returns whether the line is of one of the forms:
472 *	.Sh NAME
473 *	.Sh "NAME"
474 *	etc.
475 * assuming that section_start is ".Sh".
476 */
477static int
478name_section_line(char *line, const char *section_start)
479{
480	char *rhs;
481	const char **title;
482
483	if (strncmp(line, section_start, 3) != 0)
484		return 0;
485	line = skip_spaces(line + 3);
486	rhs = trim_rhs(line);
487	if (*line == '"') {
488		line++;
489		if (*--rhs == '"')
490			*rhs = '\0';
491	}
492	for (title = name_section_titles; *title != NULL; title++)
493		if (strcmp(*title, line) == 0)
494			return 1;
495	return 0;
496}
497
498/*
499 * Copies characters while removing the most common nroff/troff
500 * markup:
501 *	\(em, \(mi, \s[+-N], \&
502 *	\fF, \f(fo, \f[font]
503 *	\*s, \*(st, \*[stringvar]
504 */
505static char *
506de_nroff_copy(char *from, char *to, int fromlen)
507{
508	char *from_end = &from[fromlen];
509	while (from < from_end) {
510		switch (*from) {
511		case '\\':
512			switch (*++from) {
513			case '(':
514				if (strncmp(&from[1], "em", 2) == 0 ||
515						strncmp(&from[1], "mi", 2) == 0) {
516					from += 3;
517					continue;
518				}
519				break;
520			case 's':
521				if (*++from == '-')
522					from++;
523				while (isdigit(*from))
524					from++;
525				continue;
526			case 'f':
527			case '*':
528				if (*++from == '(')
529					from += 3;
530				else if (*from == '[') {
531					while (*++from != ']' && from < from_end);
532					from++;
533				} else
534					from++;
535				continue;
536			case '&':
537				from++;
538				continue;
539			}
540			break;
541		}
542		*to++ = *from++;
543	}
544	return to;
545}
546
547/*
548 * Appends a string with the nroff formatting removed.
549 */
550static void
551add_nroff(char *text)
552{
553	sbuf_append_edited(whatis_proto, text, de_nroff_copy);
554}
555
556/*
557 * Appends "name(suffix), " to whatis_final.
558 */
559static void
560add_whatis_name(char *name, char *suffix)
561{
562	if (*name != '\0') {
563		sbuf_append_str(whatis_final, name);
564		sbuf_append(whatis_final, "(", 1);
565		sbuf_append_str(whatis_final, suffix);
566		sbuf_append(whatis_final, "), ", 3);
567	}
568}
569
570/*
571 * Processes an old-style man(7) line.  This ignores commands with only
572 * a single number argument.
573 */
574static void
575process_man_line(char *line)
576{
577	if (*line == '.') {
578		while (isalpha(*++line))
579			;
580		line = skip_spaces(line);
581		if (only_digits(line))
582			return;
583	} else
584		line = skip_spaces(line);
585	if (*line != '\0') {
586		add_nroff(line);
587		sbuf_append(whatis_proto, " ", 1);
588	}
589}
590
591/*
592 * Processes a new-style mdoc(7) line.
593 */
594static void
595process_mdoc_line(char *line)
596{
597	int xref;
598	int arg = 0;
599	char *line_end = &line[strlen(line)];
600	int orig_length = sbuf_length(whatis_proto);
601	char *next;
602
603	if (*line == '\0')
604		return;
605	if (line[0] != '.' || !isupper(line[1]) || !islower(line[2])) {
606		add_nroff(skip_spaces(line));
607		sbuf_append(whatis_proto, " ", 1);
608		return;
609	}
610	xref = strncmp(line, ".Xr", 3) == 0;
611	line += 3;
612	while ((line = skip_spaces(line)) < line_end) {
613		if (*line == '"') {
614			next = ++line;
615			for (;;) {
616				next = strchr(next, '"');
617				if (next == NULL)
618					break;
619				strcpy(next, &next[1]);
620				line_end--;
621				if (*next != '"')
622					break;
623				next++;
624			}
625		} else
626			next = strpbrk(line, " \t");
627		if (next != NULL)
628			*next++ = '\0';
629		else
630			next = line_end;
631		if (isupper(*line) && islower(line[1]) && line[2] == '\0') {
632			if (strcmp(line, "Ns") == 0) {
633				arg = 0;
634				line = next;
635				continue;
636			}
637			if (strstr(mdoc_commands, line) != NULL) {
638				line = next;
639				continue;
640			}
641		}
642		if (arg > 0 && strchr(",.:;?!)]", *line) == 0) {
643			if (xref) {
644				sbuf_append(whatis_proto, "(", 1);
645				add_nroff(line);
646				sbuf_append(whatis_proto, ")", 1);
647				xref = 0;
648				line = blank;
649			} else
650				sbuf_append(whatis_proto, " ", 1);
651		}
652		add_nroff(line);
653		arg++;
654		line = next;
655	}
656	if (sbuf_length(whatis_proto) > orig_length)
657		sbuf_append(whatis_proto, " ", 1);
658}
659
660/*
661 * Collects a list of comma-separated names from the text.
662 */
663static void
664collect_names(StringList *names, char *text)
665{
666	char *arg;
667
668	for (;;) {
669		arg = text;
670		text = strchr(text, ',');
671		if (text != NULL)
672			*text++ = '\0';
673		sl_add(names, arg);
674		if (text == NULL)
675			return;
676		if (*text == ' ')
677			text++;
678	}
679}
680
681enum { STATE_UNKNOWN, STATE_MANSTYLE, STATE_MDOCNAME, STATE_MDOCDESC };
682
683/*
684 * Processes a man page source into a single whatis line and adds it
685 * to whatis_lines.
686 */
687static void
688process_page(struct page_info *page, char *section_dir)
689{
690	gzFile *in;
691	char buffer[4096];
692	char *line;
693	StringList *names;
694	char *descr;
695	int state = STATE_UNKNOWN;
696	int i;
697
698	sbuf_clear(whatis_proto);
699	if ((in = gzopen(page->filename, "r")) == NULL) {
700		warn("%s", page->filename);
701		exit_code = 1;
702		return;
703	}
704	while (gzgets(in, buffer, sizeof buffer) != NULL) {
705		line = buffer;
706		if (strncmp(line, ".\\\"", 3) == 0)		/* ignore comments */
707			continue;
708		switch (state) {
709		/*
710		 * haven't reached the NAME section yet.
711		 */
712		case STATE_UNKNOWN:
713			if (name_section_line(line, ".SH"))
714				state = STATE_MANSTYLE;
715			else if (name_section_line(line, ".Sh"))
716				state = STATE_MDOCNAME;
717			continue;
718		/*
719		 * Inside an old-style .SH NAME section.
720		 */
721		case STATE_MANSTYLE:
722			if (strncmp(line, ".SH", 3) == 0)
723				break;
724			trim_rhs(line);
725			if (strcmp(line, ".") == 0)
726				continue;
727			if (strncmp(line, ".IX", 3) == 0) {
728				line += 3;
729				line = skip_spaces(line);
730			}
731			process_man_line(line);
732			continue;
733		/*
734		 * Inside a new-style .Sh NAME section (the .Nm part).
735		 */
736		case STATE_MDOCNAME:
737			trim_rhs(line);
738			if (strncmp(line, ".Nm", 3) == 0) {
739				process_mdoc_line(line);
740				continue;
741			} else {
742				if (strcmp(line, ".") == 0)
743					continue;
744				sbuf_append(whatis_proto, "- ", 2);
745				state = STATE_MDOCDESC;
746			}
747			/* fall through */
748		/*
749		 * Inside a new-style .Sh NAME section (after the .Nm-s).
750		 */
751		case STATE_MDOCDESC:
752			if (strncmp(line, ".Sh", 3) == 0)
753				break;
754			trim_rhs(line);
755			if (strcmp(line, ".") == 0)
756				continue;
757			process_mdoc_line(line);
758			continue;
759		}
760		break;
761	}
762	gzclose(in);
763	sbuf_strip(whatis_proto, " \t.-");
764	line = sbuf_content(whatis_proto);
765	/*
766	 * line now contains the appropriate data, but without
767	 * the proper indentation or the section appended to each name.
768	 */
769	descr = strstr(line, " - ");
770	if (descr == NULL) {
771		descr = strchr(line, ' ');
772		if (descr == NULL) {
773			if (verbose)
774				fprintf(stderr, "	ignoring junk description \"%s\"\n", line);
775			return;
776		}
777		*descr++ = '\0';
778	} else {
779		*descr = '\0';
780		descr += 3;
781	}
782	names = sl_init();
783	collect_names(names, line);
784	sbuf_clear(whatis_final);
785	if (!sl_find(names, page->name) && no_page_exists(section_dir, names, page->suffix)) {
786		/*
787		 * Add the page name since that's the only thing that
788		 * man(1) will find.
789		 */
790		add_whatis_name(page->name, page->suffix);
791	}
792	for (i = 0; i < names->sl_cur; i++)
793		add_whatis_name(names->sl_str[i], page->suffix);
794	sl_free(names, 0);
795	sbuf_retract(whatis_final, 2);		/* remove last ", " */
796	while (sbuf_length(whatis_final) < indent)
797		sbuf_append(whatis_final, " ", 1);
798	sbuf_append(whatis_final, " - ", 3);
799	sbuf_append_str(whatis_final, skip_spaces(descr));
800	sl_add(whatis_lines, strdup(sbuf_content(whatis_final)));
801}
802
803/*
804 * Sorts pages first by inode number, then by name.
805 */
806static int
807pagesort(const void *a, const void *b)
808{
809	struct page_info *p1 = *(struct page_info **) a;
810	struct page_info *p2 = *(struct page_info **) b;
811	if (p1->inode == p2->inode)
812		return strcmp(p1->name, p2->name);
813	return p1->inode - p2->inode;
814}
815
816/*
817 * Processes a single man section.
818 */
819static void
820process_section(char *section_dir)
821{
822	struct dirent **entries;
823	int nentries;
824	struct page_info **pages;
825	int npages = 0;
826	int i;
827	int prev_inode = 0;
828
829	if (verbose)
830		fprintf(stderr, "  %s\n", section_dir);
831
832	/*
833	 * scan the man section directory for pages
834	 */
835	nentries = scandir(section_dir, &entries, NULL, alphasort);
836	if (nentries < 0) {
837		warn("%s", section_dir);
838		exit_code = 1;
839		return;
840	}
841	/*
842	 * collect information about man pages
843	 */
844	pages = (struct page_info **) calloc(nentries, sizeof(struct page_info *));
845	for (i = 0; i < nentries; i++) {
846		struct page_info *info = new_page_info(section_dir, entries[i]);
847		if (info != NULL)
848			pages[npages++] = info;
849		free(entries[i]);
850	}
851	free(entries);
852	qsort(pages, npages, sizeof(struct page_info *), pagesort);
853	/*
854	 * process each unique page
855	 */
856	for (i = 0; i < npages; i++) {
857		struct page_info *page = pages[i];
858		if (page->inode != prev_inode) {
859			prev_inode = page->inode;
860			if (verbose)
861				fprintf(stderr, "	reading %s\n", page->filename);
862			process_page(page, section_dir);
863		} else if (verbose)
864			fprintf(stderr, "	skipping %s, duplicate\n", page->filename);
865		free_page_info(page);
866	}
867	free(pages);
868}
869
870/*
871 * Returns whether the directory entry is a man page section.
872 */
873static int
874select_sections(struct dirent *entry)
875{
876	char *p = &entry->d_name[3];
877
878	if (strncmp(entry->d_name, "man", 3) != 0)
879		return 0;
880	while (*p != '\0') {
881		if (!isalnum(*p++))
882			return 0;
883	}
884	return 1;
885}
886
887/*
888 * Processes a single top-level man directory by finding all the
889 * sub-directories named man* and processing each one in turn.
890 */
891static void
892process_mandir(char *dir_name)
893{
894	struct dirent **entries;
895	int nsections;
896	FILE *fp = NULL;
897	int i;
898	struct stat st;
899
900	if (already_visited(dir_name))
901		return;
902	if (verbose)
903		fprintf(stderr, "man directory %s\n", dir_name);
904	nsections = scandir(dir_name, &entries, select_sections, alphasort);
905	if (nsections < 0) {
906		warn("%s", dir_name);
907		exit_code = 1;
908		return;
909	}
910	if (common_output == NULL && (fp = open_whatis(dir_name)) == NULL)
911		return;
912	for (i = 0; i < nsections; i++) {
913		char section_dir[MAXPATHLEN];
914		snprintf(section_dir, sizeof section_dir, "%s/%s", dir_name, entries[i]->d_name);
915		process_section(section_dir);
916		snprintf(section_dir, sizeof section_dir, "%s/%s/%s", dir_name,
917		    entries[i]->d_name, machine);
918		if (stat(section_dir, &st) == 0 && S_ISDIR(st.st_mode))
919			process_section(section_dir);
920		free(entries[i]);
921	}
922	free(entries);
923	if (common_output == NULL)
924		finish_whatis(fp, dir_name);
925}
926
927/*
928 * Processes one argument, which may be a colon-separated list of
929 * directories.
930 */
931static void
932process_argument(const char *arg)
933{
934	char *dir;
935	char *mandir;
936	char *parg;
937
938	parg = strdup(arg);
939	if (parg == NULL)
940		err(1, "out of memory");
941	while ((dir = strsep(&parg, ":")) != NULL) {
942		if (locale != NULL) {
943			asprintf(&mandir, "%s/%s", dir, locale);
944			process_mandir(mandir);
945			free(mandir);
946			if (lang_locale != NULL) {
947				asprintf(&mandir, "%s/%s", dir, lang_locale);
948				process_mandir(mandir);
949				free(mandir);
950			}
951		} else {
952			process_mandir(dir);
953		}
954	}
955	free(parg);
956}
957
958
959int
960main(int argc, char **argv)
961{
962	int opt;
963	extern int optind;
964	extern char *optarg;
965	FILE *fp = NULL;
966
967	while ((opt = getopt(argc, argv, "ai:n:o:vL")) != -1) {
968		switch (opt) {
969		case 'a':
970			append++;
971			break;
972		case 'i':
973			indent = atoi(optarg);
974			break;
975		case 'n':
976			whatis_name = optarg;
977			break;
978		case 'o':
979			common_output = optarg;
980			break;
981		case 'v':
982			verbose++;
983			break;
984		case 'L':
985			locale = getenv("LC_ALL");
986			if (locale == NULL)
987				locale = getenv("LC_CTYPE");
988			if (locale == NULL)
989				locale = getenv("LANG");
990			if (locale != NULL) {
991				char *sep = strchr(locale, '_');
992				if (sep != NULL && isupper(sep[1]) &&
993				    isupper(sep[2])) {
994					asprintf(&lang_locale, "%.*s%s", sep - locale, locale, &sep[3]);
995				}
996			}
997			break;
998		default:
999			fprintf(stderr, "usage: %s [-a] [-i indent] [-n name] [-o output_file] [-v] [-L] [directories...]\n", argv[0]);
1000			exit(1);
1001		}
1002	}
1003
1004	signal(SIGINT, trap_signal);
1005	signal(SIGHUP, trap_signal);
1006	signal(SIGQUIT, trap_signal);
1007	signal(SIGTERM, trap_signal);
1008	SLIST_INIT(&visited_dirs);
1009	whatis_proto = new_sbuf();
1010	whatis_final = new_sbuf();
1011
1012	if ((machine = getenv("MACHINE")) == NULL)
1013		machine = MACHINE;
1014
1015	if (common_output != NULL && (fp = open_output(common_output)) == NULL)
1016		err(1, "%s", common_output);
1017	if (optind == argc) {
1018		const char *manpath = getenv("MANPATH");
1019		if (manpath == NULL)
1020			manpath = DEFAULT_MANPATH;
1021		process_argument(manpath);
1022	} else {
1023		while (optind < argc)
1024			process_argument(argv[optind++]);
1025	}
1026	if (common_output != NULL)
1027		finish_output(fp, common_output);
1028	exit(exit_code);
1029}
1030