1/*-
2 * Copyright (c) 2002 John Rochester
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer,
10 *    in this position and unchanged.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 * 3. The name of the author may not be used to endorse or promote products
15 *    derived from this software without specific prior written permission
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29#include <sys/cdefs.h>
30__FBSDID("$FreeBSD$");
31
32#include <sys/types.h>
33#include <sys/stat.h>
34#include <sys/param.h>
35#include <sys/queue.h>
36#include <sys/utsname.h>
37
38#include <ctype.h>
39#include <dirent.h>
40#include <err.h>
41#include <stddef.h>
42#include <stdio.h>
43#include <stdlib.h>
44#include <string.h>
45#include <stringlist.h>
46#include <unistd.h>
47#include <zlib.h>
48
49#define DEFAULT_MANPATH		"/usr/share/man"
50#define LINE_ALLOC		4096
51
52static char blank[] = 		"";
53
54/*
55 * Information collected about each man page in a section.
56 */
57struct page_info {
58	char *	filename;
59	char *	name;
60	char *	suffix;
61	int	gzipped;
62	ino_t	inode;
63};
64
65/*
66 * An entry kept for each visited directory.
67 */
68struct visited_dir {
69	dev_t		device;
70	ino_t		inode;
71	SLIST_ENTRY(visited_dir)	next;
72};
73
74/*
75 * an expanding string
76 */
77struct sbuf {
78	char *	content;		/* the start of the buffer */
79	char *	end;			/* just past the end of the content */
80	char *	last;			/* the last allocated character */
81};
82
83/*
84 * Removes the last amount characters from the sbuf.
85 */
86#define sbuf_retract(sbuf, amount)	\
87	((sbuf)->end -= (amount))
88/*
89 * Returns the length of the sbuf content.
90 */
91#define sbuf_length(sbuf)		\
92	((sbuf)->end - (sbuf)->content)
93
94typedef char *edited_copy(char *from, char *to, int length);
95
96static int append;			/* -a flag: append to existing whatis */
97static int verbose;			/* -v flag: be verbose with warnings */
98static int indent = 24;			/* -i option: description indentation */
99static const char *whatis_name="whatis";/* -n option: the name */
100static char *common_output;		/* -o option: the single output file */
101static char *locale;			/* user's locale if -L is used */
102static char *lang_locale;		/* short form of locale */
103static const char *machine, *machine_arch;
104
105static int exit_code;			/* exit code to use when finished */
106static SLIST_HEAD(, visited_dir) visited_dirs =
107    SLIST_HEAD_INITIALIZER(visited_dirs);
108
109/*
110 * While the whatis line is being formed, it is stored in whatis_proto.
111 * When finished, it is reformatted into whatis_final and then appended
112 * to whatis_lines.
113 */
114static struct sbuf *whatis_proto;
115static struct sbuf *whatis_final;
116static StringList *whatis_lines;	/* collected output lines */
117
118static char tmp_file[MAXPATHLEN];	/* path of temporary file, if any */
119
120/* A set of possible names for the NAME man page section */
121static const char *name_section_titles[] = {
122	"NAME", "Name", "NAMN", "BEZEICHNUNG", "\xcc\xbe\xbe\xce",
123	"\xee\xe1\xfa\xf7\xe1\xee\xe9\xe5", NULL
124};
125
126/* A subset of the mdoc(7) commands to ignore */
127static char mdoc_commands[] = "ArDvErEvFlLiNmPa";
128
129/*
130 * Frees a struct page_info and its content.
131 */
132static void
133free_page_info(struct page_info *info)
134{
135	free(info->filename);
136	free(info->name);
137	free(info->suffix);
138	free(info);
139}
140
141/*
142 * Allocates and fills in a new struct page_info given the
143 * name of the man section directory and the dirent of the file.
144 * If the file is not a man page, returns NULL.
145 */
146static struct page_info *
147new_page_info(char *dir, struct dirent *dirent)
148{
149	struct page_info *info;
150	int basename_length;
151	char *suffix;
152	struct stat st;
153
154	info = (struct page_info *) malloc(sizeof(struct page_info));
155	if (info == NULL)
156		err(1, "malloc");
157	basename_length = strlen(dirent->d_name);
158	suffix = &dirent->d_name[basename_length];
159	asprintf(&info->filename, "%s/%s", dir, dirent->d_name);
160	if ((info->gzipped = basename_length >= 4 && strcmp(&dirent->d_name[basename_length - 3], ".gz") == 0)) {
161		suffix -= 3;
162		*suffix = '\0';
163	}
164	for (;;) {
165		if (--suffix == dirent->d_name || !isalnum(*suffix)) {
166			if (*suffix == '.')
167				break;
168			if (verbose)
169				warnx("%s: invalid man page name", info->filename);
170			free(info->filename);
171			free(info);
172			return NULL;
173		}
174	}
175	*suffix++ = '\0';
176	info->name = strdup(dirent->d_name);
177	info->suffix = strdup(suffix);
178	if (stat(info->filename, &st) < 0) {
179		warn("%s", info->filename);
180		free_page_info(info);
181		return NULL;
182	}
183	if (!S_ISREG(st.st_mode)) {
184		if (verbose && !S_ISDIR(st.st_mode))
185			warnx("%s: not a regular file", info->filename);
186		free_page_info(info);
187		return NULL;
188	}
189	info->inode = st.st_ino;
190	return info;
191}
192
193/*
194 * Reset an sbuf's length to 0.
195 */
196static void
197sbuf_clear(struct sbuf *sbuf)
198{
199	sbuf->end = sbuf->content;
200}
201
202/*
203 * Allocate a new sbuf.
204 */
205static struct sbuf *
206new_sbuf(void)
207{
208	struct sbuf *sbuf = (struct sbuf *) malloc(sizeof(struct sbuf));
209	sbuf->content = (char *) malloc(LINE_ALLOC);
210	sbuf->last = sbuf->content + LINE_ALLOC - 1;
211	sbuf_clear(sbuf);
212	return sbuf;
213}
214
215/*
216 * Ensure that there is enough room in the sbuf for nchars more characters.
217 */
218static void
219sbuf_need(struct sbuf *sbuf, int nchars)
220{
221	char *new_content;
222	size_t size, cntsize;
223
224	/* double the size of the allocation until the buffer is big enough */
225	while (sbuf->end + nchars > sbuf->last) {
226		size = sbuf->last + 1 - sbuf->content;
227		size *= 2;
228		cntsize = sbuf->end - sbuf->content;
229
230		new_content = (char *)malloc(size);
231		memcpy(new_content, sbuf->content, cntsize);
232		free(sbuf->content);
233		sbuf->content = new_content;
234		sbuf->end = new_content + cntsize;
235		sbuf->last = new_content + size - 1;
236	}
237}
238
239/*
240 * Appends a string of a given length to the sbuf.
241 */
242static void
243sbuf_append(struct sbuf *sbuf, const char *text, int length)
244{
245	if (length > 0) {
246		sbuf_need(sbuf, length);
247		memcpy(sbuf->end, text, length);
248		sbuf->end += length;
249	}
250}
251
252/*
253 * Appends a null-terminated string to the sbuf.
254 */
255static void
256sbuf_append_str(struct sbuf *sbuf, char *text)
257{
258	sbuf_append(sbuf, text, strlen(text));
259}
260
261/*
262 * Appends an edited null-terminated string to the sbuf.
263 */
264static void
265sbuf_append_edited(struct sbuf *sbuf, char *text, edited_copy copy)
266{
267	int length = strlen(text);
268	if (length > 0) {
269		sbuf_need(sbuf, length);
270		sbuf->end = copy(text, sbuf->end, length);
271	}
272}
273
274/*
275 * Strips any of a set of chars from the end of the sbuf.
276 */
277static void
278sbuf_strip(struct sbuf *sbuf, const char *set)
279{
280	while (sbuf->end > sbuf->content && strchr(set, sbuf->end[-1]) != NULL)
281		sbuf->end--;
282}
283
284/*
285 * Returns the null-terminated string built by the sbuf.
286 */
287static char *
288sbuf_content(struct sbuf *sbuf)
289{
290	*sbuf->end = '\0';
291	return sbuf->content;
292}
293
294/*
295 * Returns true if no man page exists in the directory with
296 * any of the names in the StringList.
297 */
298static int
299no_page_exists(char *dir, StringList *names, char *suffix)
300{
301	char path[MAXPATHLEN];
302	size_t i;
303
304	for (i = 0; i < names->sl_cur; i++) {
305		snprintf(path, sizeof path, "%s/%s.%s.gz", dir, names->sl_str[i], suffix);
306		if (access(path, F_OK) < 0) {
307			path[strlen(path) - 3] = '\0';
308			if (access(path, F_OK) < 0)
309				continue;
310		}
311		return 0;
312	}
313	return 1;
314}
315
316static void
317trap_signal(int sig __unused)
318{
319	if (tmp_file[0] != '\0')
320		unlink(tmp_file);
321	exit(1);
322}
323
324/*
325 * Attempts to open an output file.  Returns NULL if unsuccessful.
326 */
327static FILE *
328open_output(char *name)
329{
330	FILE *output;
331
332	whatis_lines = sl_init();
333	if (append) {
334		char line[LINE_ALLOC];
335
336		output = fopen(name, "r");
337		if (output == NULL) {
338			warn("%s", name);
339			exit_code = 1;
340			return NULL;
341		}
342		while (fgets(line, sizeof line, output) != NULL) {
343			line[strlen(line) - 1] = '\0';
344			sl_add(whatis_lines, strdup(line));
345		}
346	}
347	if (common_output == NULL) {
348		snprintf(tmp_file, sizeof tmp_file, "%s.tmp", name);
349		name = tmp_file;
350	}
351	output = fopen(name, "w");
352	if (output == NULL) {
353		warn("%s", name);
354		exit_code = 1;
355		return NULL;
356	}
357	return output;
358}
359
360static int
361linesort(const void *a, const void *b)
362{
363	return strcmp((*(const char * const *)a), (*(const char * const *)b));
364}
365
366/*
367 * Writes the unique sorted lines to the output file.
368 */
369static void
370finish_output(FILE *output, char *name)
371{
372	size_t i;
373	char *prev = NULL;
374
375	qsort(whatis_lines->sl_str, whatis_lines->sl_cur, sizeof(char *), linesort);
376	for (i = 0; i < whatis_lines->sl_cur; i++) {
377		char *line = whatis_lines->sl_str[i];
378		if (i > 0 && strcmp(line, prev) == 0)
379			continue;
380		prev = line;
381		fputs(line, output);
382		putc('\n', output);
383	}
384	fclose(output);
385	sl_free(whatis_lines, 1);
386	if (common_output == NULL) {
387		rename(tmp_file, name);
388		unlink(tmp_file);
389	}
390}
391
392static FILE *
393open_whatis(char *mandir)
394{
395	char filename[MAXPATHLEN];
396
397	snprintf(filename, sizeof filename, "%s/%s", mandir, whatis_name);
398	return open_output(filename);
399}
400
401static void
402finish_whatis(FILE *output, char *mandir)
403{
404	char filename[MAXPATHLEN];
405
406	snprintf(filename, sizeof filename, "%s/%s", mandir, whatis_name);
407	finish_output(output, filename);
408}
409
410/*
411 * Tests to see if the given directory has already been visited.
412 */
413static int
414already_visited(char *dir)
415{
416	struct stat st;
417	struct visited_dir *visit;
418
419	if (stat(dir, &st) < 0) {
420		warn("%s", dir);
421		exit_code = 1;
422		return 1;
423	}
424	SLIST_FOREACH(visit, &visited_dirs, next) {
425		if (visit->inode == st.st_ino &&
426		    visit->device == st.st_dev) {
427			warnx("already visited %s", dir);
428			return 1;
429		}
430	}
431	visit = (struct visited_dir *) malloc(sizeof(struct visited_dir));
432	visit->device = st.st_dev;
433	visit->inode = st.st_ino;
434	SLIST_INSERT_HEAD(&visited_dirs, visit, next);
435	return 0;
436}
437
438/*
439 * Removes trailing spaces from a string, returning a pointer to just
440 * beyond the new last character.
441 */
442static char *
443trim_rhs(char *str)
444{
445	char *rhs = &str[strlen(str)];
446	while (--rhs > str && isspace(*rhs))
447		;
448	*++rhs = '\0';
449	return rhs;
450}
451
452/*
453 * Returns a pointer to the next non-space character in the string.
454 */
455static char *
456skip_spaces(char *s)
457{
458	while (*s != '\0' && isspace(*s))
459		s++;
460	return s;
461}
462
463/*
464 * Returns whether the string contains only digits.
465 */
466static int
467only_digits(char *line)
468{
469	if (!isdigit(*line++))
470		return 0;
471	while (isdigit(*line))
472		line++;
473	return *line == '\0';
474}
475
476/*
477 * Returns whether the line is of one of the forms:
478 *	.Sh NAME
479 *	.Sh "NAME"
480 *	etc.
481 * assuming that section_start is ".Sh".
482 */
483static int
484name_section_line(char *line, const char *section_start)
485{
486	char *rhs;
487	const char **title;
488
489	if (strncmp(line, section_start, 3) != 0)
490		return 0;
491	line = skip_spaces(line + 3);
492	rhs = trim_rhs(line);
493	if (*line == '"') {
494		line++;
495		if (*--rhs == '"')
496			*rhs = '\0';
497	}
498	for (title = name_section_titles; *title != NULL; title++)
499		if (strcmp(*title, line) == 0)
500			return 1;
501	return 0;
502}
503
504/*
505 * Copies characters while removing the most common nroff/troff
506 * markup:
507 *	\(em, \(mi, \s[+-N], \&
508 *	\fF, \f(fo, \f[font]
509 *	\*s, \*(st, \*[stringvar]
510 */
511static char *
512de_nroff_copy(char *from, char *to, int fromlen)
513{
514	char *from_end = &from[fromlen];
515	while (from < from_end) {
516		switch (*from) {
517		case '\\':
518			switch (*++from) {
519			case '(':
520				if (strncmp(&from[1], "em", 2) == 0 ||
521						strncmp(&from[1], "mi", 2) == 0) {
522					from += 3;
523					continue;
524				}
525				break;
526			case 's':
527				if (*++from == '-')
528					from++;
529				while (isdigit(*from))
530					from++;
531				continue;
532			case 'f':
533			case '*':
534				if (*++from == '(')
535					from += 3;
536				else if (*from == '[') {
537					while (*++from != ']' && from < from_end);
538					from++;
539				} else
540					from++;
541				continue;
542			case '&':
543				from++;
544				continue;
545			}
546			break;
547		}
548		*to++ = *from++;
549	}
550	return to;
551}
552
553/*
554 * Appends a string with the nroff formatting removed.
555 */
556static void
557add_nroff(char *text)
558{
559	sbuf_append_edited(whatis_proto, text, de_nroff_copy);
560}
561
562/*
563 * Appends "name(suffix), " to whatis_final.
564 */
565static void
566add_whatis_name(char *name, char *suffix)
567{
568	if (*name != '\0') {
569		sbuf_append_str(whatis_final, name);
570		sbuf_append(whatis_final, "(", 1);
571		sbuf_append_str(whatis_final, suffix);
572		sbuf_append(whatis_final, "), ", 3);
573	}
574}
575
576/*
577 * Processes an old-style man(7) line.  This ignores commands with only
578 * a single number argument.
579 */
580static void
581process_man_line(char *line)
582{
583	if (*line == '.') {
584		while (isalpha(*++line))
585			;
586		line = skip_spaces(line);
587		if (only_digits(line))
588			return;
589	} else
590		line = skip_spaces(line);
591	if (*line != '\0') {
592		add_nroff(line);
593		sbuf_append(whatis_proto, " ", 1);
594	}
595}
596
597/*
598 * Processes a new-style mdoc(7) line.
599 */
600static void
601process_mdoc_line(char *line)
602{
603	int xref;
604	int arg = 0;
605	char *line_end = &line[strlen(line)];
606	int orig_length = sbuf_length(whatis_proto);
607	char *next;
608
609	if (*line == '\0')
610		return;
611	if (line[0] != '.' || !isupper(line[1]) || !islower(line[2])) {
612		add_nroff(skip_spaces(line));
613		sbuf_append(whatis_proto, " ", 1);
614		return;
615	}
616	xref = strncmp(line, ".Xr", 3) == 0;
617	line += 3;
618	while ((line = skip_spaces(line)) < line_end) {
619		if (*line == '"') {
620			next = ++line;
621			for (;;) {
622				next = strchr(next, '"');
623				if (next == NULL)
624					break;
625				memmove(next, next + 1, strlen(next));
626				line_end--;
627				if (*next != '"')
628					break;
629				next++;
630			}
631		} else
632			next = strpbrk(line, " \t");
633		if (next != NULL)
634			*next++ = '\0';
635		else
636			next = line_end;
637		if (isupper(*line) && islower(line[1]) && line[2] == '\0') {
638			if (strcmp(line, "Ns") == 0) {
639				arg = 0;
640				line = next;
641				continue;
642			}
643			if (strstr(mdoc_commands, line) != NULL) {
644				line = next;
645				continue;
646			}
647		}
648		if (arg > 0 && strchr(",.:;?!)]", *line) == 0) {
649			if (xref) {
650				sbuf_append(whatis_proto, "(", 1);
651				add_nroff(line);
652				sbuf_append(whatis_proto, ")", 1);
653				xref = 0;
654				line = blank;
655			} else
656				sbuf_append(whatis_proto, " ", 1);
657		}
658		add_nroff(line);
659		arg++;
660		line = next;
661	}
662	if (sbuf_length(whatis_proto) > orig_length)
663		sbuf_append(whatis_proto, " ", 1);
664}
665
666/*
667 * Collects a list of comma-separated names from the text.
668 */
669static void
670collect_names(StringList *names, char *text)
671{
672	char *arg;
673
674	for (;;) {
675		arg = text;
676		text = strchr(text, ',');
677		if (text != NULL)
678			*text++ = '\0';
679		sl_add(names, arg);
680		if (text == NULL)
681			return;
682		if (*text == ' ')
683			text++;
684	}
685}
686
687enum { STATE_UNKNOWN, STATE_MANSTYLE, STATE_MDOCNAME, STATE_MDOCDESC };
688
689/*
690 * Processes a man page source into a single whatis line and adds it
691 * to whatis_lines.
692 */
693static void
694process_page(struct page_info *page, char *section_dir)
695{
696	gzFile in;
697	char buffer[4096];
698	char *line;
699	StringList *names;
700	char *descr;
701	int state = STATE_UNKNOWN;
702	size_t i;
703
704	sbuf_clear(whatis_proto);
705	if ((in = gzopen(page->filename, "r")) == NULL) {
706		warn("%s", page->filename);
707		exit_code = 1;
708		return;
709	}
710	while (gzgets(in, buffer, sizeof buffer) != NULL) {
711		line = buffer;
712		if (strncmp(line, ".\\\"", 3) == 0)		/* ignore comments */
713			continue;
714		switch (state) {
715		/*
716		 * haven't reached the NAME section yet.
717		 */
718		case STATE_UNKNOWN:
719			if (name_section_line(line, ".SH"))
720				state = STATE_MANSTYLE;
721			else if (name_section_line(line, ".Sh"))
722				state = STATE_MDOCNAME;
723			continue;
724		/*
725		 * Inside an old-style .SH NAME section.
726		 */
727		case STATE_MANSTYLE:
728			if (strncmp(line, ".SH", 3) == 0)
729				break;
730			if (strncmp(line, ".SS", 3) == 0)
731				break;
732			trim_rhs(line);
733			if (strcmp(line, ".") == 0)
734				continue;
735			if (strncmp(line, ".IX", 3) == 0) {
736				line += 3;
737				line = skip_spaces(line);
738			}
739			process_man_line(line);
740			continue;
741		/*
742		 * Inside a new-style .Sh NAME section (the .Nm part).
743		 */
744		case STATE_MDOCNAME:
745			trim_rhs(line);
746			if (strncmp(line, ".Nm", 3) == 0) {
747				process_mdoc_line(line);
748				continue;
749			} else {
750				if (strcmp(line, ".") == 0)
751					continue;
752				sbuf_append(whatis_proto, "- ", 2);
753				state = STATE_MDOCDESC;
754			}
755			/* fall through */
756		/*
757		 * Inside a new-style .Sh NAME section (after the .Nm-s).
758		 */
759		case STATE_MDOCDESC:
760			if (strncmp(line, ".Sh", 3) == 0)
761				break;
762			trim_rhs(line);
763			if (strcmp(line, ".") == 0)
764				continue;
765			process_mdoc_line(line);
766			continue;
767		}
768		break;
769	}
770	gzclose(in);
771	sbuf_strip(whatis_proto, " \t.-");
772	line = sbuf_content(whatis_proto);
773	/*
774	 * line now contains the appropriate data, but without
775	 * the proper indentation or the section appended to each name.
776	 */
777	descr = strstr(line, " - ");
778	if (descr == NULL) {
779		descr = strchr(line, ' ');
780		if (descr == NULL) {
781			if (verbose)
782				fprintf(stderr, "	ignoring junk description \"%s\"\n", line);
783			return;
784		}
785		*descr++ = '\0';
786	} else {
787		*descr = '\0';
788		descr += 3;
789	}
790	names = sl_init();
791	collect_names(names, line);
792	sbuf_clear(whatis_final);
793	if (!sl_find(names, page->name) && no_page_exists(section_dir, names, page->suffix)) {
794		/*
795		 * Add the page name since that's the only thing that
796		 * man(1) will find.
797		 */
798		add_whatis_name(page->name, page->suffix);
799	}
800	for (i = 0; i < names->sl_cur; i++)
801		add_whatis_name(names->sl_str[i], page->suffix);
802	sl_free(names, 0);
803	sbuf_retract(whatis_final, 2);		/* remove last ", " */
804	while (sbuf_length(whatis_final) < indent)
805		sbuf_append(whatis_final, " ", 1);
806	sbuf_append(whatis_final, " - ", 3);
807	sbuf_append_str(whatis_final, skip_spaces(descr));
808	sl_add(whatis_lines, strdup(sbuf_content(whatis_final)));
809}
810
811/*
812 * Sorts pages first by inode number, then by name.
813 */
814static int
815pagesort(const void *a, const void *b)
816{
817	const struct page_info *p1 = *(struct page_info * const *) a;
818	const struct page_info *p2 = *(struct page_info * const *) b;
819	if (p1->inode == p2->inode)
820		return strcmp(p1->name, p2->name);
821	return p1->inode - p2->inode;
822}
823
824/*
825 * Processes a single man section.
826 */
827static void
828process_section(char *section_dir)
829{
830	struct dirent **entries;
831	int nentries;
832	struct page_info **pages;
833	int npages = 0;
834	int i;
835	ino_t prev_inode = 0;
836
837	if (verbose)
838		fprintf(stderr, "  %s\n", section_dir);
839
840	/*
841	 * scan the man section directory for pages
842	 */
843	nentries = scandir(section_dir, &entries, NULL, alphasort);
844	if (nentries < 0) {
845		warn("%s", section_dir);
846		exit_code = 1;
847		return;
848	}
849	/*
850	 * collect information about man pages
851	 */
852	pages = (struct page_info **) calloc(nentries, sizeof(struct page_info *));
853	for (i = 0; i < nentries; i++) {
854		struct page_info *info = new_page_info(section_dir, entries[i]);
855		if (info != NULL)
856			pages[npages++] = info;
857		free(entries[i]);
858	}
859	free(entries);
860	qsort(pages, npages, sizeof(struct page_info *), pagesort);
861	/*
862	 * process each unique page
863	 */
864	for (i = 0; i < npages; i++) {
865		struct page_info *page = pages[i];
866		if (page->inode != prev_inode) {
867			prev_inode = page->inode;
868			if (verbose)
869				fprintf(stderr, "	reading %s\n", page->filename);
870			process_page(page, section_dir);
871		} else if (verbose)
872			fprintf(stderr, "	skipping %s, duplicate\n", page->filename);
873		free_page_info(page);
874	}
875	free(pages);
876}
877
878/*
879 * Returns whether the directory entry is a man page section.
880 */
881static int
882select_sections(const struct dirent *entry)
883{
884	const char *p = &entry->d_name[3];
885
886	if (strncmp(entry->d_name, "man", 3) != 0)
887		return 0;
888	while (*p != '\0') {
889		if (!isalnum(*p++))
890			return 0;
891	}
892	return 1;
893}
894
895/*
896 * Processes a single top-level man directory by finding all the
897 * sub-directories named man* and processing each one in turn.
898 */
899static void
900process_mandir(char *dir_name)
901{
902	struct dirent **entries;
903	int nsections;
904	FILE *fp = NULL;
905	int i;
906	struct stat st;
907
908	if (already_visited(dir_name))
909		return;
910	if (verbose)
911		fprintf(stderr, "man directory %s\n", dir_name);
912	nsections = scandir(dir_name, &entries, select_sections, alphasort);
913	if (nsections < 0) {
914		warn("%s", dir_name);
915		exit_code = 1;
916		return;
917	}
918	if (common_output == NULL && (fp = open_whatis(dir_name)) == NULL)
919		return;
920	for (i = 0; i < nsections; i++) {
921		char section_dir[MAXPATHLEN];
922		snprintf(section_dir, sizeof section_dir, "%s/%s", dir_name, entries[i]->d_name);
923		process_section(section_dir);
924		snprintf(section_dir, sizeof section_dir, "%s/%s/%s", dir_name,
925		    entries[i]->d_name, machine);
926		if (stat(section_dir, &st) == 0 && S_ISDIR(st.st_mode))
927			process_section(section_dir);
928		if (strcmp(machine_arch, machine) != 0) {
929			snprintf(section_dir, sizeof section_dir, "%s/%s/%s",
930			    dir_name, entries[i]->d_name, machine_arch);
931			if (stat(section_dir, &st) == 0 && S_ISDIR(st.st_mode))
932				process_section(section_dir);
933		}
934		free(entries[i]);
935	}
936	free(entries);
937	if (common_output == NULL)
938		finish_whatis(fp, dir_name);
939}
940
941/*
942 * Processes one argument, which may be a colon-separated list of
943 * directories.
944 */
945static void
946process_argument(const char *arg)
947{
948	char *dir;
949	char *mandir;
950	char *parg;
951
952	parg = strdup(arg);
953	if (parg == NULL)
954		err(1, "out of memory");
955	while ((dir = strsep(&parg, ":")) != NULL) {
956		if (locale != NULL) {
957			asprintf(&mandir, "%s/%s", dir, locale);
958			process_mandir(mandir);
959			free(mandir);
960			if (lang_locale != NULL) {
961				asprintf(&mandir, "%s/%s", dir, lang_locale);
962				process_mandir(mandir);
963				free(mandir);
964			}
965		} else {
966			process_mandir(dir);
967		}
968	}
969	free(parg);
970}
971
972
973int
974main(int argc, char **argv)
975{
976	int opt;
977	FILE *fp = NULL;
978
979	while ((opt = getopt(argc, argv, "ai:n:o:vL")) != -1) {
980		switch (opt) {
981		case 'a':
982			append++;
983			break;
984		case 'i':
985			indent = atoi(optarg);
986			break;
987		case 'n':
988			whatis_name = optarg;
989			break;
990		case 'o':
991			common_output = optarg;
992			break;
993		case 'v':
994			verbose++;
995			break;
996		case 'L':
997			locale = getenv("LC_ALL");
998			if (locale == NULL)
999				locale = getenv("LC_CTYPE");
1000			if (locale == NULL)
1001				locale = getenv("LANG");
1002			if (locale != NULL) {
1003				char *sep = strchr(locale, '_');
1004				if (sep != NULL && isupper(sep[1]) &&
1005				    isupper(sep[2])) {
1006					asprintf(&lang_locale, "%.*s%s", (int)(ptrdiff_t)(sep - locale), locale, &sep[3]);
1007				}
1008			}
1009			break;
1010		default:
1011			fprintf(stderr, "usage: %s [-a] [-i indent] [-n name] [-o output_file] [-v] [-L] [directories...]\n", argv[0]);
1012			exit(1);
1013		}
1014	}
1015
1016	signal(SIGINT, trap_signal);
1017	signal(SIGHUP, trap_signal);
1018	signal(SIGQUIT, trap_signal);
1019	signal(SIGTERM, trap_signal);
1020	SLIST_INIT(&visited_dirs);
1021	whatis_proto = new_sbuf();
1022	whatis_final = new_sbuf();
1023
1024	if ((machine = getenv("MACHINE")) == NULL) {
1025		static struct utsname utsname;
1026
1027		if (uname(&utsname) == -1)
1028			err(1, "uname");
1029		machine = utsname.machine;
1030	}
1031
1032	if ((machine_arch = getenv("MACHINE_ARCH")) == NULL)
1033		machine_arch = MACHINE_ARCH;
1034
1035	if (common_output != NULL && (fp = open_output(common_output)) == NULL)
1036		err(1, "%s", common_output);
1037	if (optind == argc) {
1038		const char *manpath = getenv("MANPATH");
1039		if (manpath == NULL)
1040			manpath = DEFAULT_MANPATH;
1041		process_argument(manpath);
1042	} else {
1043		while (optind < argc)
1044			process_argument(argv[optind++]);
1045	}
1046	if (common_output != NULL)
1047		finish_output(fp, common_output);
1048	exit(exit_code);
1049}
1050