makewhatis.c revision 96845
1/*-
2 * Copyright (c) 2002 John Rochester
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer,
10 *    in this position and unchanged.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 * 3. The name of the author may not be used to endorse or promote products
15 *    derived from this software without specific prior written permission
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29#include <sys/cdefs.h>
30__FBSDID("$FreeBSD: head/usr.bin/makewhatis/makewhatis.c 96845 2002-05-18 09:19:08Z markm $");
31
32#include <sys/types.h>
33#include <sys/stat.h>
34#include <sys/param.h>
35#include <sys/queue.h>
36
37#include <ctype.h>
38#include <dirent.h>
39#include <err.h>
40#include <stdio.h>
41#include <stdlib.h>
42#include <string.h>
43#include <stringlist.h>
44#include <unistd.h>
45#include <zlib.h>
46
47#define DEFAULT_MANPATH		"/usr/share/man"
48#define LINE_ALLOC		4096
49
50static char blank[] = 		"";
51
52/*
53 * Information collected about each man page in a section.
54 */
55struct page_info {
56	char *	filename;
57	char *	name;
58	char *	suffix;
59	int	gzipped;
60	ino_t	inode;
61};
62
63/*
64 * An entry kept for each visited directory.
65 */
66struct visited_dir {
67	dev_t		device;
68	ino_t		inode;
69	SLIST_ENTRY(visited_dir)	next;
70};
71
72/*
73 * an expanding string
74 */
75struct sbuf {
76	char *	content;		/* the start of the buffer */
77	char *	end;			/* just past the end of the content */
78	char *	last;			/* the last allocated character */
79};
80
81/*
82 * Removes the last amount characters from the sbuf.
83 */
84#define sbuf_retract(sbuf, amount)	\
85	((sbuf)->end -= (amount))
86/*
87 * Returns the length of the sbuf content.
88 */
89#define sbuf_length(sbuf)		\
90	((sbuf)->end - (sbuf)->content)
91
92typedef char *edited_copy(char *from, char *to, int length);
93
94static int append;			/* -a flag: append to existing whatis */
95static int verbose;			/* -v flag: be verbose with warnings */
96static int indent = 24;			/* -i option: description indentation */
97static const char *whatis_name="whatis";/* -n option: the name */
98static char *common_output;		/* -o option: the single output file */
99static char *locale;			/* user's locale if -L is used */
100static char *lang_locale;		/* short form of locale */
101
102static int exit_code;			/* exit code to use when finished */
103static SLIST_HEAD(, visited_dir) visited_dirs =
104    SLIST_HEAD_INITIALIZER(visited_dirs);
105
106/*
107 * While the whatis line is being formed, it is stored in whatis_proto.
108 * When finished, it is reformatted into whatis_final and then appended
109 * to whatis_lines.
110 */
111static struct sbuf *whatis_proto;
112static struct sbuf *whatis_final;
113static StringList *whatis_lines;	/* collected output lines */
114
115static char tmp_file[MAXPATHLEN];	/* path of temporary file, if any */
116
117/* A set of possible names for the NAME man page section */
118static const char *name_section_titles[] = {
119	"NAME", "Name", "NAMN", "BEZEICHNUNG", "\xcc\xbe\xbe\xce",
120	"\xee\xe1\xfa\xf7\xe1\xee\xe9\xe5", NULL
121};
122
123/* A subset of the mdoc(7) commands to ignore */
124static char mdoc_commands[] = "ArDvErEvFlLiNmPa";
125
126/*
127 * Frees a struct page_info and its content.
128 */
129static void
130free_page_info(struct page_info *info)
131{
132	free(info->filename);
133	free(info->name);
134	free(info->suffix);
135	free(info);
136}
137
138/*
139 * Allocates and fills in a new struct page_info given the
140 * name of the man section directory and the dirent of the file.
141 * If the file is not a man page, returns NULL.
142 */
143static struct page_info *
144new_page_info(char *dir, struct dirent *dirent)
145{
146	struct page_info *info;
147	int basename_length;
148	char *suffix;
149	struct stat st;
150
151	info = (struct page_info *) malloc(sizeof(struct page_info));
152	if (info == NULL)
153		err(1, "malloc");
154	basename_length = strlen(dirent->d_name);
155	suffix = &dirent->d_name[basename_length];
156	asprintf(&info->filename, "%s/%s", dir, dirent->d_name);
157	if ((info->gzipped = basename_length >= 4 && strcmp(&dirent->d_name[basename_length - 3], ".gz") == 0)) {
158		suffix -= 3;
159		*suffix = '\0';
160	}
161	for (;;) {
162		if (--suffix == dirent->d_name || !isalnum(*suffix)) {
163			if (*suffix == '.')
164				break;
165			if (verbose)
166				warnx("%s: invalid man page name", info->filename);
167			free(info->filename);
168			free(info);
169			return NULL;
170		}
171	}
172	*suffix++ = '\0';
173	info->name = strdup(dirent->d_name);
174	info->suffix = strdup(suffix);
175	if (stat(info->filename, &st) < 0) {
176		warn("%s", info->filename);
177		free_page_info(info);
178		return NULL;
179	}
180	if (!S_ISREG(st.st_mode)) {
181		if (verbose && !S_ISDIR(st.st_mode))
182			warnx("%s: not a regular file", info->filename);
183		free_page_info(info);
184		return NULL;
185	}
186	info->inode = st.st_ino;
187	return info;
188}
189
190/*
191 * Reset an sbuf's length to 0.
192 */
193static void
194sbuf_clear(struct sbuf *sbuf)
195{
196	sbuf->end = sbuf->content;
197}
198
199/*
200 * Allocate a new sbuf.
201 */
202static struct sbuf *
203new_sbuf(void)
204{
205	struct sbuf *sbuf = (struct sbuf *) malloc(sizeof(struct sbuf));
206	sbuf->content = (char *) malloc(LINE_ALLOC);
207	sbuf->last = sbuf->content + LINE_ALLOC - 1;
208	sbuf_clear(sbuf);
209	return sbuf;
210}
211
212/*
213 * Ensure that there is enough room in the sbuf for chars more characters.
214 */
215static void
216sbuf_need(struct sbuf *sbuf, int nchars)
217{
218	/* let's assume we only need to double it, but check just in case */
219	while (sbuf->end + nchars > sbuf->last) {
220		int alloc;
221		char *new_content;
222
223		alloc = (sbuf->last - sbuf->content + 1) * 2;
224		new_content = (char *) malloc(alloc);
225		memcpy(new_content, sbuf->content, sbuf->end - sbuf->content);
226		sbuf->end = new_content + (sbuf->end - sbuf->content);
227		free(sbuf->content);
228		sbuf->content = new_content;
229	}
230}
231
232/*
233 * Appends a string of a given length to the sbuf.
234 */
235static void
236sbuf_append(struct sbuf *sbuf, const char *text, int length)
237{
238	if (length > 0) {
239		sbuf_need(sbuf, length);
240		memcpy(sbuf->end, text, length);
241		sbuf->end += length;
242	}
243}
244
245/*
246 * Appends a null-terminated string to the sbuf.
247 */
248static void
249sbuf_append_str(struct sbuf *sbuf, char *text)
250{
251	sbuf_append(sbuf, text, strlen(text));
252}
253
254/*
255 * Appends an edited null-terminated string to the sbuf.
256 */
257static void
258sbuf_append_edited(struct sbuf *sbuf, char *text, edited_copy copy)
259{
260	int length = strlen(text);
261	if (length > 0) {
262		sbuf_need(sbuf, length);
263		sbuf->end = copy(text, sbuf->end, length);
264	}
265}
266
267/*
268 * Strips any of a set of chars from the end of the sbuf.
269 */
270static void
271sbuf_strip(struct sbuf *sbuf, const char *set)
272{
273	while (sbuf->end > sbuf->content && strchr(set, sbuf->end[-1]) != NULL)
274		sbuf->end--;
275}
276
277/*
278 * Returns the null-terminated string built by the sbuf.
279 */
280static char *
281sbuf_content(struct sbuf *sbuf)
282{
283	*sbuf->end = '\0';
284	return sbuf->content;
285}
286
287/*
288 * Returns true if no man page exists in the directory with
289 * any of the names in the StringList.
290 */
291static int
292no_page_exists(char *dir, StringList *names, char *suffix)
293{
294	char path[MAXPATHLEN];
295	int i;
296
297	for (i = 0; i < names->sl_cur; i++) {
298		snprintf(path, sizeof path, "%s/%s.%s.gz", dir, names->sl_str[i], suffix);
299		if (access(path, F_OK) < 0) {
300			path[strlen(path) - 3] = '\0';
301			if (access(path, F_OK) < 0)
302				continue;
303		}
304		return 0;
305	}
306	return 1;
307}
308
309static void
310trap_signal(int sig __unused)
311{
312	if (tmp_file[0] != '\0')
313		unlink(tmp_file);
314	exit(1);
315}
316
317/*
318 * Attempts to open an output file.  Returns NULL if unsuccessful.
319 */
320static FILE *
321open_output(char *name)
322{
323	FILE *output;
324
325	whatis_lines = sl_init();
326	if (append) {
327		char line[LINE_ALLOC];
328
329		output = fopen(name, "r");
330		if (output == NULL) {
331			warn("%s", name);
332			exit_code = 1;
333			return NULL;
334		}
335		while (fgets(line, sizeof line, output) != NULL) {
336			line[strlen(line) - 1] = '\0';
337			sl_add(whatis_lines, strdup(line));
338		}
339	}
340	if (common_output == NULL) {
341		snprintf(tmp_file, sizeof tmp_file, "%s.tmp", name);
342		name = tmp_file;
343	}
344	output = fopen(name, "w");
345	if (output == NULL) {
346		warn("%s", name);
347		exit_code = 1;
348		return NULL;
349	}
350	return output;
351}
352
353static int
354linesort(const void *a, const void *b)
355{
356	return strcmp((const char *)(*(const char **)a), (const char *)(*(const char **)b));
357}
358
359/*
360 * Writes the unique sorted lines to the output file.
361 */
362static void
363finish_output(FILE *output, char *name)
364{
365	int i;
366	char *prev = NULL;
367
368	qsort(whatis_lines->sl_str, whatis_lines->sl_cur, sizeof(char *), linesort);
369	for (i = 0; i < whatis_lines->sl_cur; i++) {
370		char *line = whatis_lines->sl_str[i];
371		if (i > 0 && strcmp(line, prev) == 0)
372			continue;
373		prev = line;
374		fputs(line, output);
375		putc('\n', output);
376	}
377	fclose(output);
378	sl_free(whatis_lines, 1);
379	if (common_output == NULL) {
380		rename(tmp_file, name);
381		unlink(tmp_file);
382	}
383}
384
385static FILE *
386open_whatis(char *mandir)
387{
388	char filename[MAXPATHLEN];
389
390	snprintf(filename, sizeof filename, "%s/%s", mandir, whatis_name);
391	return open_output(filename);
392}
393
394static void
395finish_whatis(FILE *output, char *mandir)
396{
397	char filename[MAXPATHLEN];
398
399	snprintf(filename, sizeof filename, "%s/%s", mandir, whatis_name);
400	finish_output(output, filename);
401}
402
403/*
404 * Tests to see if the given directory has already been visited.
405 */
406static int
407already_visited(char *dir)
408{
409	struct stat st;
410	struct visited_dir *visit;
411
412	if (stat(dir, &st) < 0) {
413		warn("%s", dir);
414		exit_code = 1;
415		return 1;
416	}
417	SLIST_FOREACH(visit, &visited_dirs, next) {
418		if (visit->inode == st.st_ino &&
419		    visit->device == st.st_dev) {
420			warnx("already visited %s", dir);
421			return 1;
422		}
423	}
424	visit = (struct visited_dir *) malloc(sizeof(struct visited_dir));
425	visit->device = st.st_dev;
426	visit->inode = st.st_ino;
427	SLIST_INSERT_HEAD(&visited_dirs, visit, next);
428	return 0;
429}
430
431/*
432 * Removes trailing spaces from a string, returning a pointer to just
433 * beyond the new last character.
434 */
435static char *
436trim_rhs(char *str)
437{
438	char *rhs = &str[strlen(str)];
439	while (--rhs > str && isspace(*rhs))
440		;
441	*++rhs = '\0';
442	return rhs;
443}
444
445/*
446 * Returns a pointer to the next non-space character in the string.
447 */
448static char *
449skip_spaces(char *s)
450{
451	while (*s != '\0' && isspace(*s))
452		s++;
453	return s;
454}
455
456/*
457 * Returns whether the string contains only digits.
458 */
459static int
460only_digits(char *line)
461{
462	if (!isdigit(*line++))
463		return 0;
464	while (isdigit(*line))
465		line++;
466	return *line == '\0';
467}
468
469/*
470 * Returns whether the line is of one of the forms:
471 *	.Sh NAME
472 *	.Sh "NAME"
473 *	etc.
474 * assuming that section_start is ".Sh".
475 */
476static int
477name_section_line(char *line, const char *section_start)
478{
479	char *rhs;
480	const char **title;
481
482	if (strncmp(line, section_start, 3) != 0)
483		return 0;
484	line = skip_spaces(line + 3);
485	rhs = trim_rhs(line);
486	if (*line == '"') {
487		line++;
488		if (*--rhs == '"')
489			*rhs = '\0';
490	}
491	for (title = name_section_titles; *title != NULL; title++)
492		if (strcmp(*title, line) == 0)
493			return 1;
494	return 0;
495}
496
497/*
498 * Copies characters while removing the most common nroff/troff
499 * markup:
500 *	\(em, \(mi, \fR, \f(XX, \*p, \&,
501 */
502static char *
503de_nroff_copy(char *from, char *to, int fromlen)
504{
505	char *from_end = &from[fromlen];
506	while (from < from_end) {
507		switch (*from) {
508		case '\\':
509			switch (*++from) {
510			case '(':
511				if (strncmp(&from[1], "em", 2) == 0 ||
512						strncmp(&from[1], "mi", 2) == 0) {
513					from += 3;
514					continue;
515				}
516				break;
517			case 'f':
518				if (*++from == '(')
519					from += 3;
520				else
521					from++;
522				continue;
523			case '*':
524				if (from[1] == 'p') {
525					from += 2;
526					continue;
527				}
528				break;
529			case '&':
530				from++;
531				continue;
532			}
533			break;
534		}
535		*to++ = *from++;
536	}
537	return to;
538}
539
540/*
541 * Appends a string with the nroff formatting removed.
542 */
543static void
544add_nroff(char *text)
545{
546	sbuf_append_edited(whatis_proto, text, de_nroff_copy);
547}
548
549/*
550 * Appends "name(suffix), " to whatis_final.
551 */
552static void
553add_whatis_name(char *name, char *suffix)
554{
555	if (*name != '\0') {
556		sbuf_append_str(whatis_final, name);
557		sbuf_append(whatis_final, "(", 1);
558		sbuf_append_str(whatis_final, suffix);
559		sbuf_append(whatis_final, "), ", 3);
560	}
561}
562
563/*
564 * Processes an old-style man(7) line.  This ignores commands with only
565 * a single number argument.
566 */
567static void
568process_man_line(char *line)
569{
570	if (*line == '.') {
571		while (isalpha(*++line))
572			;
573		line = skip_spaces(line);
574		if (only_digits(line))
575			return;
576	} else
577		line = skip_spaces(line);
578	if (*line != '\0') {
579		add_nroff(line);
580		sbuf_append(whatis_proto, " ", 1);
581	}
582}
583
584/*
585 * Processes a new-style mdoc(7) line.
586 */
587static void
588process_mdoc_line(char *line)
589{
590	int xref;
591	int arg = 0;
592	char *line_end = &line[strlen(line)];
593	int orig_length = sbuf_length(whatis_proto);
594	char *next;
595
596	if (*line == '\0')
597		return;
598	if (line[0] != '.' || !isupper(line[1]) || !islower(line[2])) {
599		add_nroff(skip_spaces(line));
600		return;
601	}
602	xref = strncmp(line, ".Xr", 3) == 0;
603	line += 3;
604	while ((line = skip_spaces(line)) < line_end) {
605		if (*line == '"') {
606			next = ++line;
607			for (;;) {
608				next = strchr(next, '"');
609				if (next == NULL || *next != '"')
610					break;
611				strcpy(next, &next[1]);
612				line_end--;
613				next++;
614			}
615		} else
616			next = strpbrk(line, " \t");
617		if (next != NULL)
618			*next++ = '\0';
619		else
620			next = line_end;
621		if (isupper(*line) && islower(line[1]) && line[2] == '\0') {
622			if (strcmp(line, "Ns") == 0) {
623				arg = 0;
624				line = next;
625				continue;
626			}
627			if (strstr(mdoc_commands, line) != NULL) {
628				line = next;
629				continue;
630			}
631		}
632		if (arg > 0 && strchr(",.:;?!)]", *line) == 0) {
633			if (xref) {
634				sbuf_append(whatis_proto, "(", 1);
635				add_nroff(line);
636				sbuf_append(whatis_proto, ")", 1);
637				xref = 0;
638				line = blank;
639			} else
640				sbuf_append(whatis_proto, " ", 1);
641		}
642		add_nroff(line);
643		arg++;
644		line = next;
645	}
646	if (sbuf_length(whatis_proto) > orig_length)
647		sbuf_append(whatis_proto, " ", 1);
648}
649
650/*
651 * Collects a list of comma-separated names from the text.
652 */
653static void
654collect_names(StringList *names, char *text)
655{
656	char *arg;
657
658	for (;;) {
659		arg = text;
660		text = strchr(text, ',');
661		if (text != NULL)
662			*text++ = '\0';
663		sl_add(names, arg);
664		if (text == NULL)
665			return;
666		if (*text == ' ')
667			text++;
668	}
669}
670
671enum { STATE_UNKNOWN, STATE_MANSTYLE, STATE_MDOCNAME, STATE_MDOCDESC };
672
673/*
674 * Processes a man page source into a single whatis line and adds it
675 * to whatis_lines.
676 */
677static void
678process_page(struct page_info *page, char *section_dir)
679{
680	gzFile *in;
681	char buffer[4096];
682	char *line;
683	StringList *names;
684	char *descr;
685	int state = STATE_UNKNOWN;
686	int i;
687
688	sbuf_clear(whatis_proto);
689	if ((in = gzopen(page->filename, "r")) == NULL) {
690		warn("%s", page->filename);
691		exit_code = 1;
692		return;
693	}
694	while (gzgets(in, buffer, sizeof buffer) != NULL) {
695		line = buffer;
696		if (strncmp(line, ".\\\"", 3) == 0)		/* ignore comments */
697			continue;
698		switch (state) {
699		/*
700		 * haven't reached the NAME section yet.
701		 */
702		case STATE_UNKNOWN:
703			if (name_section_line(line, ".SH"))
704				state = STATE_MANSTYLE;
705			else if (name_section_line(line, ".Sh"))
706				state = STATE_MDOCNAME;
707			continue;
708		/*
709		 * Inside an old-style .SH NAME section.
710		 */
711		case STATE_MANSTYLE:
712			if (strncmp(line, ".SH", 3) == 0)
713				break;
714			trim_rhs(line);
715			if (strcmp(line, ".") == 0)
716				continue;
717			if (strncmp(line, ".IX", 3) == 0) {
718				line += 3;
719				line = skip_spaces(line);
720			}
721			process_man_line(line);
722			continue;
723		/*
724		 * Inside a new-style .Sh NAME section (the .Nm part).
725		 */
726		case STATE_MDOCNAME:
727			trim_rhs(line);
728			if (strncmp(line, ".Nm", 3) == 0) {
729				process_mdoc_line(line);
730				continue;
731			} else {
732				if (strcmp(line, ".") == 0)
733					continue;
734				sbuf_append(whatis_proto, "- ", 2);
735				state = STATE_MDOCDESC;
736			}
737			/* fall through */
738		/*
739		 * Inside a new-style .Sh NAME section (after the .Nm-s).
740		 */
741		case STATE_MDOCDESC:
742			if (strncmp(line, ".Sh", 3) == 0)
743				break;
744			trim_rhs(line);
745			if (strcmp(line, ".") == 0)
746				continue;
747			process_mdoc_line(line);
748			continue;
749		}
750		break;
751	}
752	gzclose(in);
753	sbuf_strip(whatis_proto, " \t.-");
754	line = sbuf_content(whatis_proto);
755	/*
756	 * line now contains the appropriate data, but without
757	 * the proper indentation or the section appended to each name.
758	 */
759	descr = strstr(line, " - ");
760	if (descr == NULL) {
761		descr = strchr(line, ' ');
762		if (descr == NULL) {
763			if (verbose)
764				fprintf(stderr, "	ignoring junk description \"%s\"\n", line);
765			return;
766		}
767		*descr++ = '\0';
768	} else {
769		*descr = '\0';
770		descr += 3;
771	}
772	names = sl_init();
773	collect_names(names, line);
774	sbuf_clear(whatis_final);
775	if (!sl_find(names, page->name) && no_page_exists(section_dir, names, page->suffix)) {
776		/*
777		 * Add the page name since that's the only thing that
778		 * man(1) will find.
779		 */
780		add_whatis_name(page->name, page->suffix);
781	}
782	for (i = 0; i < names->sl_cur; i++)
783		add_whatis_name(names->sl_str[i], page->suffix);
784	sl_free(names, 0);
785	sbuf_retract(whatis_final, 2);		/* remove last ", " */
786	while (sbuf_length(whatis_final) < indent)
787		sbuf_append(whatis_final, " ", 1);
788	sbuf_append(whatis_final, " - ", 3);
789	sbuf_append_str(whatis_final, skip_spaces(descr));
790	sl_add(whatis_lines, strdup(sbuf_content(whatis_final)));
791}
792
793/*
794 * Sorts pages first by inode number, then by name.
795 */
796static int
797pagesort(const void *a, const void *b)
798{
799	struct page_info *p1 = *(struct page_info **) a;
800	struct page_info *p2 = *(struct page_info **) b;
801	if (p1->inode == p2->inode)
802		return strcmp(p1->name, p2->name);
803	return p1->inode - p2->inode;
804}
805
806/*
807 * Processes a single man section.
808 */
809static void
810process_section(char *section_dir)
811{
812	struct dirent **entries;
813	int nentries;
814	struct page_info **pages;
815	int npages = 0;
816	int i;
817	int prev_inode = 0;
818
819	if (verbose)
820		fprintf(stderr, "  %s\n", section_dir);
821
822	/*
823	 * scan the man section directory for pages
824	 */
825	nentries = scandir(section_dir, &entries, NULL, alphasort);
826	if (nentries < 0) {
827		warn("%s", section_dir);
828		exit_code = 1;
829		return;
830	}
831	/*
832	 * collect information about man pages
833	 */
834	pages = (struct page_info **) calloc(nentries, sizeof(struct page_info *));
835	for (i = 0; i < nentries; i++) {
836		struct page_info *info = new_page_info(section_dir, entries[i]);
837		if (info != NULL)
838			pages[npages++] = info;
839		free(entries[i]);
840	}
841	free(entries);
842	qsort(pages, npages, sizeof(struct page_info *), pagesort);
843	/*
844	 * process each unique page
845	 */
846	for (i = 0; i < npages; i++) {
847		struct page_info *page = pages[i];
848		if (page->inode != prev_inode) {
849			prev_inode = page->inode;
850			if (verbose)
851				fprintf(stderr, "	reading %s\n", page->filename);
852			process_page(page, section_dir);
853		} else if (verbose)
854			fprintf(stderr, "	skipping %s, duplicate\n", page->filename);
855		free_page_info(page);
856	}
857	free(pages);
858}
859
860/*
861 * Returns whether the directory entry is a man page section.
862 */
863static int
864select_sections(struct dirent *entry)
865{
866	char *p = &entry->d_name[3];
867
868	if (strncmp(entry->d_name, "man", 3) != 0)
869		return 0;
870	while (*p != '\0') {
871		if (!isalnum(*p++))
872			return 0;
873	}
874	return 1;
875}
876
877/*
878 * Processes a single top-level man directory by finding all the
879 * sub-directories named man* and processing each one in turn.
880 */
881static void
882process_mandir(char *dir_name)
883{
884	struct dirent **entries;
885	int nsections;
886	FILE *fp = NULL;
887	int i;
888
889	if (already_visited(dir_name))
890		return;
891	if (verbose)
892		fprintf(stderr, "man directory %s\n", dir_name);
893	nsections = scandir(dir_name, &entries, select_sections, alphasort);
894	if (nsections < 0) {
895		warn("%s", dir_name);
896		exit_code = 1;
897		return;
898	}
899	if (common_output == NULL && (fp = open_whatis(dir_name)) == NULL)
900		return;
901	for (i = 0; i < nsections; i++) {
902		char section_dir[MAXPATHLEN];
903		snprintf(section_dir, sizeof section_dir, "%s/%s", dir_name, entries[i]->d_name);
904		process_section(section_dir);
905		free(entries[i]);
906	}
907	free(entries);
908	if (common_output == NULL)
909		finish_whatis(fp, dir_name);
910}
911
912/*
913 * Processes one argument, which may be a colon-separated list of
914 * directories.
915 */
916static void
917process_argument(const char *arg)
918{
919	char *dir;
920	char *mandir;
921	char *parg;
922
923	parg = strdup(arg);
924	if (parg == NULL)
925		err(1, "out of memory");
926	while ((dir = strsep(&parg, ":")) != NULL) {
927		if (locale != NULL) {
928			asprintf(&mandir, "%s/%s", dir, locale);
929			process_mandir(mandir);
930			free(mandir);
931			if (lang_locale != NULL) {
932				asprintf(&mandir, "%s/%s", dir, lang_locale);
933				process_mandir(mandir);
934				free(mandir);
935			}
936		} else {
937			process_mandir(dir);
938		}
939	}
940	free(parg);
941}
942
943
944int
945main(int argc, char **argv)
946{
947	int opt;
948	extern int optind;
949	extern char *optarg;
950	FILE *fp = NULL;
951
952	while ((opt = getopt(argc, argv, "ai:n:o:vL")) != -1) {
953		switch (opt) {
954		case 'a':
955			append++;
956			break;
957		case 'i':
958			indent = atoi(optarg);
959			break;
960		case 'n':
961			whatis_name = optarg;
962			break;
963		case 'o':
964			common_output = optarg;
965			break;
966		case 'v':
967			verbose++;
968			break;
969		case 'L':
970			locale = getenv("LC_ALL");
971			if (locale == NULL)
972				locale = getenv("LC_CTYPE");
973			if (locale == NULL)
974				locale = getenv("LANG");
975			if (locale != NULL) {
976				char *sep = strchr(locale, '_');
977				if (sep != NULL && isupper(sep[1]) &&
978				    isupper(sep[2])) {
979					asprintf(&lang_locale, "%.*s%s", sep - locale, locale, &sep[3]);
980				}
981			}
982			break;
983		default:
984			fprintf(stderr, "usage: %s [-a] [-i indent] [-n name] [-o output_file] [-v] [-L] [directories...]\n", argv[0]);
985			exit(1);
986		}
987	}
988
989	signal(SIGINT, trap_signal);
990	signal(SIGHUP, trap_signal);
991	signal(SIGQUIT, trap_signal);
992	signal(SIGTERM, trap_signal);
993	SLIST_INIT(&visited_dirs);
994	whatis_proto = new_sbuf();
995	whatis_final = new_sbuf();
996
997	if (common_output != NULL && (fp = open_output(common_output)) == NULL)
998		err(1, "%s", common_output);
999	if (optind == argc) {
1000		const char *manpath = getenv("MANPATH");
1001		if (manpath == NULL)
1002			manpath = DEFAULT_MANPATH;
1003		process_argument(manpath);
1004	} else {
1005		while (optind < argc)
1006			process_argument(argv[optind++]);
1007	}
1008	if (common_output != NULL)
1009		finish_output(fp, common_output);
1010	exit(exit_code);
1011}
1012