makewhatis.c revision 262539
1/*-
2 * Copyright (c) 2002 John Rochester
3 * Copyright (c) 2013 Franco Fichtner <franco@lastsummer.de>
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice, this list of conditions and the following disclaimer,
11 *    in this position and unchanged.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 * 3. The name of the author may not be used to endorse or promote products
16 *    derived from this software without specific prior written permission
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 *
29 * $FreeBSD: head/usr.bin/makewhatis/makewhatis.c 262539 2014-02-27 00:43:10Z eadler $
30 */
31
32#include <sys/tree.h>
33#include <sys/types.h>
34#include <sys/param.h>
35#include <sys/queue.h>
36#include <sys/stat.h>
37
38#include <ctype.h>
39#include <dirent.h>
40#include <err.h>
41#include <stdio.h>
42#include <stdlib.h>
43#include <string.h>
44#include <stringlist.h>
45#include <unistd.h>
46#include <zlib.h>
47
48#define DEFAULT_MANPATH		"/usr/share/man"
49#define LINE_ALLOC		4096
50
51static char blank[] = 		"";
52
53/*
54 * Information collected about each man page alias.
55 */
56struct page_alias {
57	RB_ENTRY(page_alias) entry;
58	char *filename;
59	char *name;
60	char *suffix;
61	int gzipped;
62};
63
64/*
65 * Information collected about each unique man page.
66 */
67struct page_info {
68	RB_HEAD(page_alias_tree, page_alias) head;
69	RB_ENTRY(page_info) entry;
70	ino_t inode;
71};
72
73static RB_HEAD(page_info_tree, page_info) page_head = RB_INITIALIZER(&page_head);
74
75/*
76 * Sorts page info by inode number.
77 */
78static int
79infosort(const struct page_info *a, const struct page_info *b)
80{
81	return (memcmp(&a->inode, &b->inode, sizeof(a->inode)));
82}
83
84RB_PROTOTYPE(page_info_tree, page_info, entry, infosort);
85RB_GENERATE(page_info_tree, page_info, entry, infosort);
86
87/*
88 * Sorts page alias first by suffix, then name.
89 */
90static int
91aliassort(const struct page_alias *a, const struct page_alias *b)
92{
93	int ret = strcmp(a->suffix, b->suffix);
94	if (ret) {
95		return (ret);
96	}
97
98	return (strcmp(a->name, b->name));
99}
100
101RB_PROTOTYPE(page_alias_tree, page_alias, entry, aliassort);
102RB_GENERATE(page_alias_tree, page_alias, entry, aliassort);
103
104/*
105 * An entry kept for each visited directory.
106 */
107struct visited_dir {
108	dev_t		device;
109	ino_t		inode;
110	SLIST_ENTRY(visited_dir)	next;
111};
112
113/*
114 * an expanding string
115 */
116struct sbuf {
117	char *	content;		/* the start of the buffer */
118	char *	end;			/* just past the end of the content */
119	char *	last;			/* the last allocated character */
120};
121
122/*
123 * Removes the last amount characters from the sbuf.
124 */
125#define sbuf_retract(sbuf, amount)	\
126	((sbuf)->end -= (amount))
127/*
128 * Returns the length of the sbuf content.
129 */
130#define sbuf_length(sbuf)		\
131	((sbuf)->end - (sbuf)->content)
132
133typedef char *edited_copy(char *from, char *to, int length);
134
135static int append;			/* -a flag: append to existing whatis */
136static int verbose;			/* -v flag: be verbose with warnings */
137static int indent = 24;			/* -i option: description indentation */
138static const char *whatis_name="whatis";/* -n option: the name */
139static char *common_output;		/* -o option: the single output file */
140static char *locale;			/* user's locale if -L is used */
141static char *lang_locale;		/* short form of locale */
142static const char *machine;
143
144static int exit_code;			/* exit code to use when finished */
145static SLIST_HEAD(, visited_dir) visited_dirs =
146    SLIST_HEAD_INITIALIZER(visited_dirs);
147
148/*
149 * While the whatis line is being formed, it is stored in whatis_proto.
150 * When finished, it is reformatted into whatis_final and then appended
151 * to whatis_lines.
152 */
153static struct sbuf *whatis_proto;
154static struct sbuf *whatis_final;
155static StringList *whatis_lines;	/* collected output lines */
156
157static char tmp_file[MAXPATHLEN];	/* path of temporary file, if any */
158
159/* A set of possible names for the NAME man page section */
160static const char *name_section_titles[] = {
161	"NAME", "Name", "NAMN", "BEZEICHNUNG", "\xcc\xbe\xbe\xce",
162	"\xee\xe1\xfa\xf7\xe1\xee\xe9\xe5", NULL
163};
164
165/* A subset of the mdoc(7) commands to ignore */
166static char mdoc_commands[] = "ArDvErEvFlLiNmPa";
167
168/*
169 * Frees a struct page_info and its content.
170 */
171static void
172free_page_info(struct page_info *info)
173{
174	struct page_alias *alias;
175
176	while ((alias = RB_ROOT(&info->head))) {
177		RB_REMOVE(page_alias_tree, &info->head, alias);
178		free(alias->filename);
179		free(alias->suffix);
180		free(alias->name);
181		free(alias);
182	}
183
184	free(info);
185}
186
187/*
188 * Allocates and fills in a new struct page_alias given the
189 * full file name of the man page and its dirent.
190 * If the file is not a man page, nothing is added.
191 */
192static void
193new_page_alias(struct page_info *info, char *filename, struct dirent *dirent)
194{
195	int gzipped, basename_length;
196	struct page_alias *alias;
197	char *suffix;
198
199	basename_length = strlen(dirent->d_name);
200	suffix = &dirent->d_name[basename_length];
201
202	gzipped = basename_length >= 4 &&
203	    strcmp(&dirent->d_name[basename_length - 3], ".gz") == 0;
204	if (gzipped) {
205		suffix -= 3;
206		*suffix = '\0';
207	}
208
209	for (;;) {
210		if (--suffix == dirent->d_name || !isalnum(*suffix)) {
211			if (*suffix == '.') {
212				break;
213			}
214			if (verbose) {
215				warnx("%s: invalid man page name", filename);
216			}
217			return;
218		}
219	}
220
221	*suffix++ = '\0';
222
223	alias = malloc(sizeof(*alias));
224	if (alias == NULL) {
225		err(1, "malloc");
226	}
227
228	alias->name = strdup(dirent->d_name);	/* XXX unsafe */
229	alias->filename = strdup(filename);	/* XXX unsafe */
230	alias->suffix = strdup(suffix);		/* XXX unsafe */
231	alias->gzipped = gzipped;
232
233	RB_INSERT(page_alias_tree, &info->head, alias);
234}
235
236/*
237 * Reset an sbuf's length to 0.
238 */
239static void
240sbuf_clear(struct sbuf *sbuf)
241{
242	sbuf->end = sbuf->content;
243}
244
245/*
246 * Allocate a new sbuf.
247 */
248static struct sbuf *
249new_sbuf(void)
250{
251	struct sbuf *sbuf = (struct sbuf *) malloc(sizeof(struct sbuf));
252	sbuf->content = malloc(LINE_ALLOC);
253	sbuf->last = sbuf->content + LINE_ALLOC - 1;
254	sbuf_clear(sbuf);
255	return(sbuf);
256}
257
258/*
259 * Ensure that there is enough room in the sbuf for nchars more characters.
260 */
261static void
262sbuf_need(struct sbuf *sbuf, int nchars)
263{
264	char *new_content;
265	size_t size, cntsize;
266
267	/* double the size of the allocation until the buffer is big enough */
268	while (sbuf->end + nchars > sbuf->last) {
269		size = sbuf->last + 1 - sbuf->content;
270		size *= 2;
271		cntsize = sbuf->end - sbuf->content;
272
273		new_content = malloc(size);
274		memcpy(new_content, sbuf->content, cntsize);
275		free(sbuf->content);
276		sbuf->content = new_content;
277		sbuf->end = new_content + cntsize;
278		sbuf->last = new_content + size - 1;
279	}
280}
281
282/*
283 * Appends a string of a given length to the sbuf.
284 */
285static void
286sbuf_append(struct sbuf *sbuf, const char *text, int length)
287{
288	if (length > 0) {
289		sbuf_need(sbuf, length);
290		memcpy(sbuf->end, text, length);
291		sbuf->end += length;
292	}
293}
294
295/*
296 * Appends a null-terminated string to the sbuf.
297 */
298static void
299sbuf_append_str(struct sbuf *sbuf, char *text)
300{
301	sbuf_append(sbuf, text, strlen(text));
302}
303
304/*
305 * Appends an edited null-terminated string to the sbuf.
306 */
307static void
308sbuf_append_edited(struct sbuf *sbuf, char *text, edited_copy copy)
309{
310	int length = strlen(text);
311	if (length > 0) {
312		sbuf_need(sbuf, length);
313		sbuf->end = copy(text, sbuf->end, length);
314	}
315}
316
317/*
318 * Strips any of a set of chars from the end of the sbuf.
319 */
320static void
321sbuf_strip(struct sbuf *sbuf, const char *set)
322{
323	while (sbuf->end > sbuf->content && strchr(set, sbuf->end[-1]) != NULL)
324		sbuf->end--;
325}
326
327/*
328 * Returns the null-terminated string built by the sbuf.
329 */
330static char *
331sbuf_content(struct sbuf *sbuf)
332{
333	*sbuf->end = '\0';
334	return(sbuf->content);
335}
336
337static void
338trap_signal(int sig __unused)
339{
340	if (tmp_file[0] != '\0')
341		unlink(tmp_file);
342	exit(1);
343}
344
345/*
346 * Attempts to open an output file.  Returns NULL if unsuccessful.
347 */
348static FILE *
349open_output(char *name)
350{
351	FILE *output;
352
353	whatis_lines = sl_init();
354	if (append) {
355		char line[LINE_ALLOC];
356
357		output = fopen(name, "r");
358		if (output == NULL) {
359			warn("%s", name);
360			exit_code = 1;
361			return(NULL);
362		}
363		while (fgets(line, sizeof line, output) != NULL) {
364			line[strlen(line) - 1] = '\0';
365			sl_add(whatis_lines, strdup(line));
366		}
367	}
368	if (common_output == NULL) {
369		snprintf(tmp_file, sizeof tmp_file, "%s.tmp", name);
370		name = tmp_file;
371	}
372	output = fopen(name, "w");
373	if (output == NULL) {
374		warn("%s", name);
375		exit_code = 1;
376		return(NULL);
377	}
378	return(output);
379}
380
381static int
382linesort(const void *a, const void *b)
383{
384	return(strcmp((*(const char * const *)a), (*(const char * const *)b)));
385}
386
387/*
388 * Writes the unique sorted lines to the output file.
389 */
390static void
391finish_output(FILE *output, char *name)
392{
393	size_t i;
394	char *prev = NULL;
395
396	qsort(whatis_lines->sl_str, whatis_lines->sl_cur, sizeof(char *),
397	      linesort);
398	for (i = 0; i < whatis_lines->sl_cur; i++) {
399		char *line = whatis_lines->sl_str[i];
400		if (i > 0 && strcmp(line, prev) == 0)
401			continue;
402		prev = line;
403		fputs(line, output);
404		putc('\n', output);
405	}
406	fclose(output);
407	sl_free(whatis_lines, 1);
408	if (common_output == NULL) {
409		rename(tmp_file, name);
410		unlink(tmp_file);
411	}
412}
413
414static FILE *
415open_whatis(char *mandir)
416{
417	char filename[MAXPATHLEN];
418
419	snprintf(filename, sizeof filename, "%s/%s", mandir, whatis_name);
420	return(open_output(filename));
421}
422
423static void
424finish_whatis(FILE *output, char *mandir)
425{
426	char filename[MAXPATHLEN];
427
428	snprintf(filename, sizeof filename, "%s/%s", mandir, whatis_name);
429	finish_output(output, filename);
430}
431
432/*
433 * Tests to see if the given directory has already been visited.
434 */
435static int
436already_visited(char *dir)
437{
438	struct stat st;
439	struct visited_dir *visit;
440
441	if (stat(dir, &st) < 0) {
442		warn("%s", dir);
443		exit_code = 1;
444		return(1);
445	}
446	SLIST_FOREACH(visit, &visited_dirs, next) {
447		if (visit->inode == st.st_ino &&
448		    visit->device == st.st_dev) {
449			warnx("already visited %s", dir);
450			return(1);
451		}
452	}
453	visit = (struct visited_dir *) malloc(sizeof(struct visited_dir));
454	visit->device = st.st_dev;
455	visit->inode = st.st_ino;
456	SLIST_INSERT_HEAD(&visited_dirs, visit, next);
457	return(0);
458}
459
460/*
461 * Removes trailing spaces from a string, returning a pointer to just
462 * beyond the new last character.
463 */
464static char *
465trim_rhs(char *str)
466{
467	char *rhs = &str[strlen(str)];
468	while (--rhs > str && isspace(*rhs))
469		;
470	*++rhs = '\0';
471	return(rhs);
472}
473
474/*
475 * Returns a pointer to the next non-space character in the string.
476 */
477static char *
478skip_spaces(char *s)
479{
480	while (*s != '\0' && isspace(*s))
481		s++;
482	return(s);
483}
484
485/*
486 * Returns whether the string contains only digits.
487 */
488static int
489only_digits(char *line)
490{
491	if (!isdigit(*line++))
492		return(0);
493	while (isdigit(*line))
494		line++;
495	return(*line == '\0');
496}
497
498/*
499 * Returns whether the line is of one of the forms:
500 *	.Sh NAME
501 *	.Sh "NAME"
502 *	etc.
503 * assuming that section_start is ".Sh".
504 */
505static int
506name_section_line(char *line, const char *section_start)
507{
508	char *rhs;
509	const char **title;
510
511	if (strncmp(line, section_start, 3) != 0)
512		return(0);
513	line = skip_spaces(line + 3);
514	rhs = trim_rhs(line);
515	if (*line == '"') {
516		line++;
517		if (*--rhs == '"')
518			*rhs = '\0';
519	}
520	for (title = name_section_titles; *title != NULL; title++)
521		if (strcmp(*title, line) == 0)
522			return(1);
523	return(0);
524}
525
526/*
527 * Copies characters while removing the most common nroff/troff
528 * markup:
529 *	\(em, \(mi, \s[+-N], \&
530 *	\fF, \f(fo, \f[font]
531 *	\*s, \*(st, \*[stringvar]
532 */
533static char *
534de_nroff_copy(char *from, char *to, int fromlen)
535{
536	char *from_end = &from[fromlen];
537	while (from < from_end) {
538		switch (*from) {
539		case '\\':
540			switch (*++from) {
541			case '(':
542				if (strncmp(&from[1], "em", 2) == 0 ||
543				    strncmp(&from[1], "mi", 2) == 0) {
544					from += 3;
545					continue;
546				}
547				break;
548			case 's':
549				if (*++from == '-')
550					from++;
551				while (isdigit(*from))
552					from++;
553				continue;
554			case 'f':
555			case '*':
556				if (*++from == '(')
557					from += 3;
558				else if (*from == '[') {
559					while (*++from != ']' && from < from_end)
560						;
561					from++;
562				} else
563					from++;
564				continue;
565			case '&':
566				from++;
567				continue;
568			}
569			break;
570		}
571		*to++ = *from++;
572	}
573	return(to);
574}
575
576/*
577 * Appends a string with the nroff formatting removed.
578 */
579static void
580add_nroff(char *text)
581{
582	sbuf_append_edited(whatis_proto, text, de_nroff_copy);
583}
584
585/*
586 * Appends "name(suffix), " to whatis_final.
587 */
588static void
589add_whatis_name(char *name, char *suffix)
590{
591	if (*name != '\0') {
592		sbuf_append_str(whatis_final, name);
593		sbuf_append(whatis_final, "(", 1);
594		sbuf_append_str(whatis_final, suffix);
595		sbuf_append(whatis_final, "), ", 3);
596	}
597}
598
599/*
600 * Processes an old-style man(7) line.  This ignores commands with only
601 * a single number argument.
602 */
603static void
604process_man_line(char *line)
605{
606	if (*line == '.') {
607		while (isalpha(*++line))
608			;
609		line = skip_spaces(line);
610		if (only_digits(line))
611			return;
612	} else
613		line = skip_spaces(line);
614	if (*line != '\0') {
615		add_nroff(line);
616		sbuf_append(whatis_proto, " ", 1);
617	}
618}
619
620struct mdoc_text {
621	const char *mdoc;
622	const char *text;
623};
624
625static int
626process_mdoc_macro(char *line)
627{
628	static const struct mdoc_text list[] = {
629		{ ".At", "AT&T UNIX" },
630		{ ".Bsx", "BSD/OS" },
631		{ ".Bx", "BSD" },
632		{ ".Dx", "DragonFly" },
633		{ ".Fx", "FreeBSD" },
634		{ ".Nx", "NetBSD" },
635		{ ".Ox", "OpenBSD" },
636		{ ".Ux", "UNIX" },
637	};
638	unsigned int i;
639
640	for (i = 0; i < sizeof(list) / sizeof(list[0]); ++i) {
641		if (!strcmp(line, list[i].mdoc)) {
642			sbuf_append(whatis_proto, list[i].text,
643			    strlen(list[i].text));
644			sbuf_append(whatis_proto, " ", 1);
645			return (1);
646		}
647	}
648
649	return (0);
650}
651
652/*
653 * Processes a new-style mdoc(7) line.
654 */
655static void
656process_mdoc_line(char *line)
657{
658	int xref;
659	int arg = 0;
660	char *line_end = &line[strlen(line)];
661	int orig_length = sbuf_length(whatis_proto);
662	char *next;
663
664	if (*line == '\0')
665		return;
666	if (line[0] != '.' || !isupper(line[1]) || !islower(line[2])) {
667		add_nroff(skip_spaces(line));
668		sbuf_append(whatis_proto, " ", 1);
669		return;
670	}
671	if (process_mdoc_macro(line)) {
672		return;
673	}
674	xref = strncmp(line, ".Xr", 3) == 0;
675	line += 3;
676	while ((line = skip_spaces(line)) < line_end) {
677		if (*line == '"') {
678			next = ++line;
679			for (;;) {
680				next = strchr(next, '"');
681				if (next == NULL)
682					break;
683				memmove(next, next + 1, strlen(next));
684				line_end--;
685				if (*next != '"')
686					break;
687				next++;
688			}
689		} else
690			next = strpbrk(line, " \t");
691		if (next != NULL)
692			*next++ = '\0';
693		else
694			next = line_end;
695		if (isupper(*line) && islower(line[1]) && line[2] == '\0') {
696			if (strcmp(line, "Ns") == 0) {
697				arg = 0;
698				line = next;
699				continue;
700			}
701			if (strstr(mdoc_commands, line) != NULL) {
702				line = next;
703				continue;
704			}
705		}
706		if (arg > 0 && strchr(",.:;?!)]", *line) == 0) {
707			if (xref) {
708				sbuf_append(whatis_proto, "(", 1);
709				add_nroff(line);
710				sbuf_append(whatis_proto, ")", 1);
711				xref = 0;
712				line = blank;
713			} else
714				sbuf_append(whatis_proto, " ", 1);
715		}
716		add_nroff(line);
717		arg++;
718		line = next;
719	}
720	if (sbuf_length(whatis_proto) > orig_length)
721		sbuf_append(whatis_proto, " ", 1);
722}
723
724enum { STATE_UNKNOWN, STATE_MANSTYLE, STATE_MDOCNAME, STATE_MDOCDESC };
725
726/*
727 * Processes a man page source into a single whatis line and adds it
728 * to whatis_lines.
729 */
730static void
731process_page(struct page_info *info)
732{
733	int state = STATE_UNKNOWN;
734	struct page_alias *alias;
735	char *line, *descr;
736	char buffer[4096];
737	gzFile in;
738
739	/*
740	 * Only read the page once for each inode.  It's
741	 * safe to assume that page->list is set.
742	 */
743	alias = RB_MIN(page_alias_tree, &info->head);
744
745	if (verbose) {
746		fprintf(stderr, "\treading %s\n", alias->filename);
747	}
748
749	sbuf_clear(whatis_proto);
750	if ((in = gzopen(alias->filename, "r")) == NULL) {
751		warn("%s", alias->filename);
752		exit_code = 1;
753		return;
754	}
755	while (gzgets(in, buffer, sizeof(buffer)) != NULL) {
756		line = buffer;
757		if (strncmp(line, ".\\\"", 3) == 0)	/* ignore comments */
758			continue;
759		switch (state) {
760		/*
761		 * haven't reached the NAME section yet.
762		 */
763		case STATE_UNKNOWN:
764			if (name_section_line(line, ".SH"))
765				state = STATE_MANSTYLE;
766			else if (name_section_line(line, ".Sh"))
767				state = STATE_MDOCNAME;
768			continue;
769		/*
770		 * Inside an old-style .SH NAME section.
771		 */
772		case STATE_MANSTYLE:
773			if (strncmp(line, ".SH", 3) == 0)
774				break;
775			if (strncmp(line, ".SS", 3) == 0)
776				break;
777			trim_rhs(line);
778			if (strcmp(line, ".") == 0)
779				continue;
780			if (strncmp(line, ".IX", 3) == 0) {
781				line += 3;
782				line = skip_spaces(line);
783			}
784			process_man_line(line);
785			continue;
786		/*
787		 * Inside a new-style .Sh NAME section (the .Nm part).
788		 */
789		case STATE_MDOCNAME:
790			trim_rhs(line);
791			if (strncmp(line, ".Nm", 3) == 0) {
792				process_mdoc_line(line);
793				continue;
794			} else {
795				if (strcmp(line, ".") == 0)
796					continue;
797				sbuf_append(whatis_proto, "- ", 2);
798				state = STATE_MDOCDESC;
799			}
800			/* fall through */
801		/*
802		 * Inside a new-style .Sh NAME section (after the .Nm-s).
803		 */
804		case STATE_MDOCDESC:
805			if (strncmp(line, ".Sh", 3) == 0)
806				break;
807			trim_rhs(line);
808			if (strcmp(line, ".") == 0)
809				continue;
810			process_mdoc_line(line);
811			continue;
812		}
813		break;
814	}
815	gzclose(in);
816	sbuf_strip(whatis_proto, " \t.-");
817	line = sbuf_content(whatis_proto);
818	/*
819	 * line now contains the appropriate data, but without
820	 * the proper indentation or the section appended to each name.
821	 */
822	descr = strstr(line, " - ");
823	if (descr == NULL) {
824		descr = strchr(line, ' ');
825		if (descr == NULL) {
826			if (verbose)
827				fprintf(stderr,
828					"\tignoring junk description \"%s\"\n",
829					line);
830			return;
831		}
832		*descr++ = '\0';
833	} else {
834		*descr = '\0';
835		descr += 3;
836	}
837	sbuf_clear(whatis_final);
838	RB_FOREACH(alias, page_alias_tree, &info->head) {
839		/*
840		 * This won't append names stored in `line'.
841		 * The reason for that is that we cannot be sure
842		 * which section they belong to unless we have
843		 * a real alias (via MLINKS) in this list.
844		 */
845		add_whatis_name(alias->name, alias->suffix);
846	}
847	sbuf_retract(whatis_final, 2);		/* remove last ", " */
848	while (sbuf_length(whatis_final) < indent)
849		sbuf_append(whatis_final, " ", 1);
850	sbuf_append(whatis_final, " - ", 3);
851	sbuf_append_str(whatis_final, skip_spaces(descr));
852	sl_add(whatis_lines, strdup(sbuf_content(whatis_final)));
853}
854
855/*
856 * Processes a single man section.
857 */
858static void
859process_section(char *section_dir)
860{
861	struct dirent **entries;
862	struct page_info *info;
863	int nentries;
864	int i;
865
866	if (verbose) {
867		fprintf(stderr, "  %s\n", section_dir);
868	}
869
870	/*
871	 * scan the man section directory for pages
872	 */
873	nentries = scandir(section_dir, &entries, NULL, alphasort);
874	if (nentries < 0) {
875		warn("%s", section_dir);
876		exit_code = 1;
877		return;
878	}
879
880	/*
881	 * collect information about man pages
882	 */
883	for (i = 0; i < nentries; i++) {
884		struct page_info ref;
885		char *filename;
886		struct stat st;
887
888		if (asprintf(&filename, "%s/%s", section_dir,
889		    entries[i]->d_name) < 0) {
890			err(1, "malloc");
891		}
892
893		if (stat(filename, &st) < 0) {
894			warn("%s", filename);
895			goto process_section_next;
896		}
897
898		if (!S_ISREG(st.st_mode)) {
899			if (verbose && !S_ISDIR(st.st_mode))
900			    warnx("%s: not a regular file", filename);
901			goto process_section_next;
902		}
903
904		ref.inode = st.st_ino;
905
906		info = RB_FIND(page_info_tree, &page_head, &ref);
907		if (info == NULL) {
908			info = malloc(sizeof(*info));
909			if (info == NULL) {
910				err(1, "malloc");
911			}
912
913			bzero(info, sizeof(*info));
914			info->inode = st.st_ino;
915			RB_INIT(&info->head);
916
917			RB_INSERT(page_info_tree, &page_head, info);
918		}
919
920		new_page_alias(info, filename, entries[i]);
921
922process_section_next:
923
924		free(entries[i]);
925		free(filename);
926	}
927	free(entries);
928}
929
930/*
931 * Returns whether the directory entry is a man page section.
932 */
933static int
934select_sections(const struct dirent *entry)
935{
936	const char *p = &entry->d_name[3];
937
938	if (strncmp(entry->d_name, "man", 3) != 0)
939		return(0);
940	while (*p != '\0') {
941		if (!isalnum(*p++))
942			return(0);
943	}
944	return(1);
945}
946
947/*
948 * Processes a single top-level man directory by finding all the
949 * sub-directories named man* and processing each one in turn.
950 */
951static void
952process_mandir(char *dir_name)
953{
954	struct dirent **entries;
955	struct page_info *info;
956	int nsections;
957	FILE *fp = NULL;
958	int i;
959	struct stat st;
960
961	if (already_visited(dir_name))
962		return;
963	if (verbose)
964		fprintf(stderr, "man directory %s\n", dir_name);
965	nsections = scandir(dir_name, &entries, select_sections, alphasort);
966	if (nsections < 0) {
967		warn("%s", dir_name);
968		exit_code = 1;
969		return;
970	}
971	if (common_output == NULL && (fp = open_whatis(dir_name)) == NULL)
972		return;
973	for (i = 0; i < nsections; i++) {
974		char section_dir[MAXPATHLEN];
975		snprintf(section_dir, sizeof section_dir, "%s/%s", dir_name,
976			 entries[i]->d_name);
977		process_section(section_dir);
978		snprintf(section_dir, sizeof section_dir, "%s/%s/%s", dir_name,
979			 entries[i]->d_name, machine);
980		if (stat(section_dir, &st) == 0 && S_ISDIR(st.st_mode))
981			process_section(section_dir);
982		free(entries[i]);
983	}
984	free(entries);
985
986	/*
987	 * process and free all pages
988	 */
989	while ((info = RB_ROOT(&page_head))) {
990		RB_REMOVE(page_info_tree, &page_head, info);
991		process_page(info);
992		free_page_info(info);
993	}
994
995	if (common_output == NULL)
996		finish_whatis(fp, dir_name);
997}
998
999/*
1000 * Processes one argument, which may be a colon-separated list of
1001 * directories.
1002 */
1003static void
1004process_argument(const char *arg)
1005{
1006	char *dir;
1007	char *mandir;
1008	char *parg;
1009
1010	parg = strdup(arg);
1011	if (parg == NULL)
1012		err(1, "out of memory");
1013	while ((dir = strsep(&parg, ":")) != NULL) {
1014		if (locale != NULL) {
1015			asprintf(&mandir, "%s/%s", dir, locale);
1016			process_mandir(mandir);
1017			free(mandir);
1018			if (lang_locale != NULL) {
1019				asprintf(&mandir, "%s/%s", dir, lang_locale);
1020				process_mandir(mandir);
1021				free(mandir);
1022			}
1023		} else {
1024			process_mandir(dir);
1025		}
1026	}
1027	free(parg);
1028}
1029
1030
1031int
1032main(int argc, char **argv)
1033{
1034	int opt;
1035	FILE *fp = NULL;
1036
1037	while ((opt = getopt(argc, argv, "ai:n:o:vL")) != -1) {
1038		switch (opt) {
1039		case 'a':
1040			append++;
1041			break;
1042		case 'i':
1043			indent = atoi(optarg);
1044			break;
1045		case 'n':
1046			whatis_name = optarg;
1047			break;
1048		case 'o':
1049			common_output = optarg;
1050			break;
1051		case 'v':
1052			verbose++;
1053			break;
1054		case 'L':
1055			locale = getenv("LC_ALL");
1056			if (locale == NULL)
1057				locale = getenv("LC_CTYPE");
1058			if (locale == NULL)
1059				locale = getenv("LANG");
1060			if (locale != NULL) {
1061				char *sep = strchr(locale, '_');
1062				if (sep != NULL && isupper(sep[1]) &&
1063				    isupper(sep[2])) {
1064					asprintf(&lang_locale, "%.*s%s",
1065					    (int)(sep - locale),
1066					    locale, &sep[3]);
1067				}
1068			}
1069			break;
1070		default:
1071			fprintf(stderr, "usage: %s [-a] [-i indent] [-n name] [-o output_file] [-v] [-L] [directories...]\n", argv[0]);
1072			exit(1);
1073		}
1074	}
1075
1076	signal(SIGINT, trap_signal);
1077	signal(SIGHUP, trap_signal);
1078	signal(SIGQUIT, trap_signal);
1079	signal(SIGTERM, trap_signal);
1080	SLIST_INIT(&visited_dirs);
1081	whatis_proto = new_sbuf();
1082	whatis_final = new_sbuf();
1083
1084	if ((machine = getenv("MACHINE")) == NULL)
1085		machine = MACHINE;
1086
1087	if (common_output != NULL && (fp = open_output(common_output)) == NULL)
1088		err(1, "%s", common_output);
1089	if (optind == argc) {
1090		const char *manpath = getenv("MANPATH");
1091		if (manpath == NULL)
1092			manpath = DEFAULT_MANPATH;
1093		process_argument(manpath);
1094	} else {
1095		while (optind < argc)
1096			process_argument(argv[optind++]);
1097	}
1098	if (common_output != NULL)
1099		finish_output(fp, common_output);
1100	exit(exit_code);
1101}
1102