196845Smarkm/*-
296845Smarkm * Copyright (c) 2002 John Rochester
396845Smarkm * All rights reserved.
496845Smarkm *
596845Smarkm * Redistribution and use in source and binary forms, with or without
696845Smarkm * modification, are permitted provided that the following conditions
796845Smarkm * are met:
896845Smarkm * 1. Redistributions of source code must retain the above copyright
996845Smarkm *    notice, this list of conditions and the following disclaimer,
1096845Smarkm *    in this position and unchanged.
1196845Smarkm * 2. Redistributions in binary form must reproduce the above copyright
1296845Smarkm *    notice, this list of conditions and the following disclaimer in the
1396845Smarkm *    documentation and/or other materials provided with the distribution.
1496845Smarkm * 3. The name of the author may not be used to endorse or promote products
1596845Smarkm *    derived from this software without specific prior written permission
1696845Smarkm *
1796845Smarkm * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
1896845Smarkm * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
1996845Smarkm * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
2096845Smarkm * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
2196845Smarkm * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
2296845Smarkm * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
2396845Smarkm * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
2496845Smarkm * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
2596845Smarkm * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
2696845Smarkm * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
2796845Smarkm */
2896845Smarkm
29262540Seadler#include <sys/cdefs.h>
30262540Seadler__FBSDID("$FreeBSD$");
31262540Seadler
3296845Smarkm#include <sys/types.h>
33262540Seadler#include <sys/stat.h>
3496845Smarkm#include <sys/param.h>
3596845Smarkm#include <sys/queue.h>
36262540Seadler#include <sys/utsname.h>
3796845Smarkm
3896845Smarkm#include <ctype.h>
3996845Smarkm#include <dirent.h>
4096845Smarkm#include <err.h>
41262540Seadler#include <stddef.h>
4296845Smarkm#include <stdio.h>
4396845Smarkm#include <stdlib.h>
4496845Smarkm#include <string.h>
4596845Smarkm#include <stringlist.h>
4696845Smarkm#include <unistd.h>
4796845Smarkm#include <zlib.h>
4896845Smarkm
4996845Smarkm#define DEFAULT_MANPATH		"/usr/share/man"
5096845Smarkm#define LINE_ALLOC		4096
5196845Smarkm
5296845Smarkmstatic char blank[] = 		"";
5396845Smarkm
5496845Smarkm/*
55262540Seadler * Information collected about each man page in a section.
5696845Smarkm */
5796845Smarkmstruct page_info {
58262540Seadler	char *	filename;
59262540Seadler	char *	name;
60262540Seadler	char *	suffix;
61262540Seadler	int	gzipped;
62262540Seadler	ino_t	inode;
6396845Smarkm};
6496845Smarkm
6596845Smarkm/*
6696845Smarkm * An entry kept for each visited directory.
6796845Smarkm */
6896845Smarkmstruct visited_dir {
6996845Smarkm	dev_t		device;
7096845Smarkm	ino_t		inode;
7196845Smarkm	SLIST_ENTRY(visited_dir)	next;
7296845Smarkm};
7396845Smarkm
7496845Smarkm/*
7596845Smarkm * an expanding string
7696845Smarkm */
7796845Smarkmstruct sbuf {
7896845Smarkm	char *	content;		/* the start of the buffer */
7996845Smarkm	char *	end;			/* just past the end of the content */
8096845Smarkm	char *	last;			/* the last allocated character */
8196845Smarkm};
8296845Smarkm
8396845Smarkm/*
8496845Smarkm * Removes the last amount characters from the sbuf.
8596845Smarkm */
8696845Smarkm#define sbuf_retract(sbuf, amount)	\
8796845Smarkm	((sbuf)->end -= (amount))
8896845Smarkm/*
8996845Smarkm * Returns the length of the sbuf content.
9096845Smarkm */
9196845Smarkm#define sbuf_length(sbuf)		\
9296845Smarkm	((sbuf)->end - (sbuf)->content)
9396845Smarkm
9496845Smarkmtypedef char *edited_copy(char *from, char *to, int length);
9596845Smarkm
9696845Smarkmstatic int append;			/* -a flag: append to existing whatis */
9796845Smarkmstatic int verbose;			/* -v flag: be verbose with warnings */
9896845Smarkmstatic int indent = 24;			/* -i option: description indentation */
9996845Smarkmstatic const char *whatis_name="whatis";/* -n option: the name */
10096845Smarkmstatic char *common_output;		/* -o option: the single output file */
10196845Smarkmstatic char *locale;			/* user's locale if -L is used */
10296845Smarkmstatic char *lang_locale;		/* short form of locale */
103262540Seadlerstatic const char *machine, *machine_arch;
10496845Smarkm
10596845Smarkmstatic int exit_code;			/* exit code to use when finished */
10696845Smarkmstatic SLIST_HEAD(, visited_dir) visited_dirs =
10796845Smarkm    SLIST_HEAD_INITIALIZER(visited_dirs);
10896845Smarkm
10996845Smarkm/*
11096845Smarkm * While the whatis line is being formed, it is stored in whatis_proto.
11196845Smarkm * When finished, it is reformatted into whatis_final and then appended
11296845Smarkm * to whatis_lines.
11396845Smarkm */
11496845Smarkmstatic struct sbuf *whatis_proto;
11596845Smarkmstatic struct sbuf *whatis_final;
11696845Smarkmstatic StringList *whatis_lines;	/* collected output lines */
11796845Smarkm
11896845Smarkmstatic char tmp_file[MAXPATHLEN];	/* path of temporary file, if any */
11996845Smarkm
12096845Smarkm/* A set of possible names for the NAME man page section */
12196845Smarkmstatic const char *name_section_titles[] = {
12296845Smarkm	"NAME", "Name", "NAMN", "BEZEICHNUNG", "\xcc\xbe\xbe\xce",
12396845Smarkm	"\xee\xe1\xfa\xf7\xe1\xee\xe9\xe5", NULL
12496845Smarkm};
12596845Smarkm
12696845Smarkm/* A subset of the mdoc(7) commands to ignore */
12796845Smarkmstatic char mdoc_commands[] = "ArDvErEvFlLiNmPa";
12896845Smarkm
12996845Smarkm/*
13096845Smarkm * Frees a struct page_info and its content.
13196845Smarkm */
13296845Smarkmstatic void
13396845Smarkmfree_page_info(struct page_info *info)
13496845Smarkm{
135262540Seadler	free(info->filename);
136262540Seadler	free(info->name);
137262540Seadler	free(info->suffix);
13896845Smarkm	free(info);
13996845Smarkm}
14096845Smarkm
14196845Smarkm/*
142262540Seadler * Allocates and fills in a new struct page_info given the
143262540Seadler * name of the man section directory and the dirent of the file.
144262540Seadler * If the file is not a man page, returns NULL.
14596845Smarkm */
146262540Seadlerstatic struct page_info *
147262540Seadlernew_page_info(char *dir, struct dirent *dirent)
14896845Smarkm{
149262540Seadler	struct page_info *info;
150262540Seadler	int basename_length;
15196845Smarkm	char *suffix;
152262540Seadler	struct stat st;
15396845Smarkm
154262540Seadler	info = (struct page_info *) malloc(sizeof(struct page_info));
155262540Seadler	if (info == NULL)
156262540Seadler		err(1, "malloc");
15796845Smarkm	basename_length = strlen(dirent->d_name);
15896845Smarkm	suffix = &dirent->d_name[basename_length];
159262540Seadler	asprintf(&info->filename, "%s/%s", dir, dirent->d_name);
160262540Seadler	if ((info->gzipped = basename_length >= 4 && strcmp(&dirent->d_name[basename_length - 3], ".gz") == 0)) {
16196845Smarkm		suffix -= 3;
16296845Smarkm		*suffix = '\0';
16396845Smarkm	}
16496845Smarkm	for (;;) {
16596845Smarkm		if (--suffix == dirent->d_name || !isalnum(*suffix)) {
166262540Seadler			if (*suffix == '.')
16796845Smarkm				break;
168262540Seadler			if (verbose)
169262540Seadler				warnx("%s: invalid man page name", info->filename);
170262540Seadler			free(info->filename);
171262540Seadler			free(info);
172262540Seadler			return NULL;
17396845Smarkm		}
17496845Smarkm	}
17596845Smarkm	*suffix++ = '\0';
176262540Seadler	info->name = strdup(dirent->d_name);
177262540Seadler	info->suffix = strdup(suffix);
178262540Seadler	if (stat(info->filename, &st) < 0) {
179262540Seadler		warn("%s", info->filename);
180262540Seadler		free_page_info(info);
181262540Seadler		return NULL;
18296845Smarkm	}
183262540Seadler	if (!S_ISREG(st.st_mode)) {
184262540Seadler		if (verbose && !S_ISDIR(st.st_mode))
185262540Seadler			warnx("%s: not a regular file", info->filename);
186262540Seadler		free_page_info(info);
187262540Seadler		return NULL;
188262540Seadler	}
189262540Seadler	info->inode = st.st_ino;
190262540Seadler	return info;
19196845Smarkm}
19296845Smarkm
19396845Smarkm/*
19496845Smarkm * Reset an sbuf's length to 0.
19596845Smarkm */
19696845Smarkmstatic void
19796845Smarkmsbuf_clear(struct sbuf *sbuf)
19896845Smarkm{
19996845Smarkm	sbuf->end = sbuf->content;
20096845Smarkm}
20196845Smarkm
20296845Smarkm/*
20396845Smarkm * Allocate a new sbuf.
20496845Smarkm */
20596845Smarkmstatic struct sbuf *
20696845Smarkmnew_sbuf(void)
20796845Smarkm{
20896845Smarkm	struct sbuf *sbuf = (struct sbuf *) malloc(sizeof(struct sbuf));
209262540Seadler	sbuf->content = (char *) malloc(LINE_ALLOC);
21096845Smarkm	sbuf->last = sbuf->content + LINE_ALLOC - 1;
21196845Smarkm	sbuf_clear(sbuf);
212262540Seadler	return sbuf;
21396845Smarkm}
21496845Smarkm
21596845Smarkm/*
21699532Srobert * Ensure that there is enough room in the sbuf for nchars more characters.
21796845Smarkm */
21896845Smarkmstatic void
21996845Smarkmsbuf_need(struct sbuf *sbuf, int nchars)
22096845Smarkm{
22199532Srobert	char *new_content;
22299532Srobert	size_t size, cntsize;
22399532Srobert
22499532Srobert	/* double the size of the allocation until the buffer is big enough */
22596845Smarkm	while (sbuf->end + nchars > sbuf->last) {
22699532Srobert		size = sbuf->last + 1 - sbuf->content;
22799532Srobert		size *= 2;
22899532Srobert		cntsize = sbuf->end - sbuf->content;
22996845Smarkm
230262540Seadler		new_content = (char *)malloc(size);
23199532Srobert		memcpy(new_content, sbuf->content, cntsize);
23296845Smarkm		free(sbuf->content);
23396845Smarkm		sbuf->content = new_content;
23499532Srobert		sbuf->end = new_content + cntsize;
23599532Srobert		sbuf->last = new_content + size - 1;
23696845Smarkm	}
23796845Smarkm}
23896845Smarkm
23996845Smarkm/*
24096845Smarkm * Appends a string of a given length to the sbuf.
24196845Smarkm */
24296845Smarkmstatic void
24396845Smarkmsbuf_append(struct sbuf *sbuf, const char *text, int length)
24496845Smarkm{
24596845Smarkm	if (length > 0) {
24696845Smarkm		sbuf_need(sbuf, length);
24796845Smarkm		memcpy(sbuf->end, text, length);
24896845Smarkm		sbuf->end += length;
24996845Smarkm	}
25096845Smarkm}
25196845Smarkm
25296845Smarkm/*
25396845Smarkm * Appends a null-terminated string to the sbuf.
25496845Smarkm */
25596845Smarkmstatic void
25696845Smarkmsbuf_append_str(struct sbuf *sbuf, char *text)
25796845Smarkm{
25896845Smarkm	sbuf_append(sbuf, text, strlen(text));
25996845Smarkm}
26096845Smarkm
26196845Smarkm/*
26296845Smarkm * Appends an edited null-terminated string to the sbuf.
26396845Smarkm */
26496845Smarkmstatic void
26596845Smarkmsbuf_append_edited(struct sbuf *sbuf, char *text, edited_copy copy)
26696845Smarkm{
26796845Smarkm	int length = strlen(text);
26896845Smarkm	if (length > 0) {
26996845Smarkm		sbuf_need(sbuf, length);
27096845Smarkm		sbuf->end = copy(text, sbuf->end, length);
27196845Smarkm	}
27296845Smarkm}
27396845Smarkm
27496845Smarkm/*
27596845Smarkm * Strips any of a set of chars from the end of the sbuf.
27696845Smarkm */
27796845Smarkmstatic void
27896845Smarkmsbuf_strip(struct sbuf *sbuf, const char *set)
27996845Smarkm{
28096845Smarkm	while (sbuf->end > sbuf->content && strchr(set, sbuf->end[-1]) != NULL)
28196845Smarkm		sbuf->end--;
28296845Smarkm}
28396845Smarkm
28496845Smarkm/*
28596845Smarkm * Returns the null-terminated string built by the sbuf.
28696845Smarkm */
28796845Smarkmstatic char *
28896845Smarkmsbuf_content(struct sbuf *sbuf)
28996845Smarkm{
29096845Smarkm	*sbuf->end = '\0';
291262540Seadler	return sbuf->content;
29296845Smarkm}
29396845Smarkm
294262540Seadler/*
295262540Seadler * Returns true if no man page exists in the directory with
296262540Seadler * any of the names in the StringList.
297262540Seadler */
298262540Seadlerstatic int
299262540Seadlerno_page_exists(char *dir, StringList *names, char *suffix)
300262540Seadler{
301262540Seadler	char path[MAXPATHLEN];
302262540Seadler	size_t i;
303262540Seadler
304262540Seadler	for (i = 0; i < names->sl_cur; i++) {
305262540Seadler		snprintf(path, sizeof path, "%s/%s.%s.gz", dir, names->sl_str[i], suffix);
306262540Seadler		if (access(path, F_OK) < 0) {
307262540Seadler			path[strlen(path) - 3] = '\0';
308262540Seadler			if (access(path, F_OK) < 0)
309262540Seadler				continue;
310262540Seadler		}
311262540Seadler		return 0;
312262540Seadler	}
313262540Seadler	return 1;
314262540Seadler}
315262540Seadler
31696845Smarkmstatic void
31796845Smarkmtrap_signal(int sig __unused)
31896845Smarkm{
31996845Smarkm	if (tmp_file[0] != '\0')
32096845Smarkm		unlink(tmp_file);
32196845Smarkm	exit(1);
32296845Smarkm}
32396845Smarkm
32496845Smarkm/*
32596845Smarkm * Attempts to open an output file.  Returns NULL if unsuccessful.
32696845Smarkm */
32796845Smarkmstatic FILE *
32896845Smarkmopen_output(char *name)
32996845Smarkm{
33096845Smarkm	FILE *output;
33196845Smarkm
33296845Smarkm	whatis_lines = sl_init();
33396845Smarkm	if (append) {
33496845Smarkm		char line[LINE_ALLOC];
33596845Smarkm
33696845Smarkm		output = fopen(name, "r");
33796845Smarkm		if (output == NULL) {
33896845Smarkm			warn("%s", name);
33996845Smarkm			exit_code = 1;
340262540Seadler			return NULL;
34196845Smarkm		}
34296845Smarkm		while (fgets(line, sizeof line, output) != NULL) {
34396845Smarkm			line[strlen(line) - 1] = '\0';
34496845Smarkm			sl_add(whatis_lines, strdup(line));
34596845Smarkm		}
34696845Smarkm	}
34796845Smarkm	if (common_output == NULL) {
34896845Smarkm		snprintf(tmp_file, sizeof tmp_file, "%s.tmp", name);
34996845Smarkm		name = tmp_file;
35096845Smarkm	}
35196845Smarkm	output = fopen(name, "w");
35296845Smarkm	if (output == NULL) {
35396845Smarkm		warn("%s", name);
35496845Smarkm		exit_code = 1;
355262540Seadler		return NULL;
35696845Smarkm	}
357262540Seadler	return output;
35896845Smarkm}
35996845Smarkm
36096845Smarkmstatic int
36196845Smarkmlinesort(const void *a, const void *b)
36296845Smarkm{
363262540Seadler	return strcmp((*(const char * const *)a), (*(const char * const *)b));
36496845Smarkm}
36596845Smarkm
36696845Smarkm/*
36796845Smarkm * Writes the unique sorted lines to the output file.
36896845Smarkm */
36996845Smarkmstatic void
37096845Smarkmfinish_output(FILE *output, char *name)
37196845Smarkm{
37297976Skeramida	size_t i;
37396845Smarkm	char *prev = NULL;
37496845Smarkm
375262540Seadler	qsort(whatis_lines->sl_str, whatis_lines->sl_cur, sizeof(char *), linesort);
37696845Smarkm	for (i = 0; i < whatis_lines->sl_cur; i++) {
37796845Smarkm		char *line = whatis_lines->sl_str[i];
37896845Smarkm		if (i > 0 && strcmp(line, prev) == 0)
37996845Smarkm			continue;
38096845Smarkm		prev = line;
38196845Smarkm		fputs(line, output);
38296845Smarkm		putc('\n', output);
38396845Smarkm	}
38496845Smarkm	fclose(output);
38596845Smarkm	sl_free(whatis_lines, 1);
38696845Smarkm	if (common_output == NULL) {
38796845Smarkm		rename(tmp_file, name);
38896845Smarkm		unlink(tmp_file);
38996845Smarkm	}
39096845Smarkm}
39196845Smarkm
39296845Smarkmstatic FILE *
39396845Smarkmopen_whatis(char *mandir)
39496845Smarkm{
39596845Smarkm	char filename[MAXPATHLEN];
39696845Smarkm
39796845Smarkm	snprintf(filename, sizeof filename, "%s/%s", mandir, whatis_name);
398262540Seadler	return open_output(filename);
39996845Smarkm}
40096845Smarkm
40196845Smarkmstatic void
40296845Smarkmfinish_whatis(FILE *output, char *mandir)
40396845Smarkm{
40496845Smarkm	char filename[MAXPATHLEN];
40596845Smarkm
40696845Smarkm	snprintf(filename, sizeof filename, "%s/%s", mandir, whatis_name);
40796845Smarkm	finish_output(output, filename);
40896845Smarkm}
40996845Smarkm
41096845Smarkm/*
41196845Smarkm * Tests to see if the given directory has already been visited.
41296845Smarkm */
41396845Smarkmstatic int
41496845Smarkmalready_visited(char *dir)
41596845Smarkm{
41696845Smarkm	struct stat st;
41796845Smarkm	struct visited_dir *visit;
41896845Smarkm
41996845Smarkm	if (stat(dir, &st) < 0) {
42096845Smarkm		warn("%s", dir);
42196845Smarkm		exit_code = 1;
422262540Seadler		return 1;
42396845Smarkm	}
42496845Smarkm	SLIST_FOREACH(visit, &visited_dirs, next) {
42596845Smarkm		if (visit->inode == st.st_ino &&
42696845Smarkm		    visit->device == st.st_dev) {
42796845Smarkm			warnx("already visited %s", dir);
428262540Seadler			return 1;
42996845Smarkm		}
43096845Smarkm	}
43196845Smarkm	visit = (struct visited_dir *) malloc(sizeof(struct visited_dir));
43296845Smarkm	visit->device = st.st_dev;
43396845Smarkm	visit->inode = st.st_ino;
43496845Smarkm	SLIST_INSERT_HEAD(&visited_dirs, visit, next);
435262540Seadler	return 0;
43696845Smarkm}
43796845Smarkm
43896845Smarkm/*
43996845Smarkm * Removes trailing spaces from a string, returning a pointer to just
44096845Smarkm * beyond the new last character.
44196845Smarkm */
44296845Smarkmstatic char *
44396845Smarkmtrim_rhs(char *str)
44496845Smarkm{
44596845Smarkm	char *rhs = &str[strlen(str)];
44696845Smarkm	while (--rhs > str && isspace(*rhs))
44796845Smarkm		;
44896845Smarkm	*++rhs = '\0';
449262540Seadler	return rhs;
45096845Smarkm}
45196845Smarkm
45296845Smarkm/*
45396845Smarkm * Returns a pointer to the next non-space character in the string.
45496845Smarkm */
45596845Smarkmstatic char *
45696845Smarkmskip_spaces(char *s)
45796845Smarkm{
45896845Smarkm	while (*s != '\0' && isspace(*s))
45996845Smarkm		s++;
460262540Seadler	return s;
46196845Smarkm}
46296845Smarkm
46396845Smarkm/*
46496845Smarkm * Returns whether the string contains only digits.
46596845Smarkm */
46696845Smarkmstatic int
46796845Smarkmonly_digits(char *line)
46896845Smarkm{
46996845Smarkm	if (!isdigit(*line++))
470262540Seadler		return 0;
47196845Smarkm	while (isdigit(*line))
47296845Smarkm		line++;
473262540Seadler	return *line == '\0';
47496845Smarkm}
47596845Smarkm
47696845Smarkm/*
47796845Smarkm * Returns whether the line is of one of the forms:
47896845Smarkm *	.Sh NAME
47996845Smarkm *	.Sh "NAME"
48096845Smarkm *	etc.
48196845Smarkm * assuming that section_start is ".Sh".
48296845Smarkm */
48396845Smarkmstatic int
48496845Smarkmname_section_line(char *line, const char *section_start)
48596845Smarkm{
48696845Smarkm	char *rhs;
48796845Smarkm	const char **title;
48896845Smarkm
48996845Smarkm	if (strncmp(line, section_start, 3) != 0)
490262540Seadler		return 0;
49196845Smarkm	line = skip_spaces(line + 3);
49296845Smarkm	rhs = trim_rhs(line);
49396845Smarkm	if (*line == '"') {
49496845Smarkm		line++;
49596845Smarkm		if (*--rhs == '"')
49696845Smarkm			*rhs = '\0';
49796845Smarkm	}
49896845Smarkm	for (title = name_section_titles; *title != NULL; title++)
49996845Smarkm		if (strcmp(*title, line) == 0)
500262540Seadler			return 1;
501262540Seadler	return 0;
50296845Smarkm}
50396845Smarkm
50496845Smarkm/*
50596845Smarkm * Copies characters while removing the most common nroff/troff
50696845Smarkm * markup:
50797102Sru *	\(em, \(mi, \s[+-N], \&
50897102Sru *	\fF, \f(fo, \f[font]
50997102Sru *	\*s, \*(st, \*[stringvar]
51096845Smarkm */
51196845Smarkmstatic char *
51296845Smarkmde_nroff_copy(char *from, char *to, int fromlen)
51396845Smarkm{
51496845Smarkm	char *from_end = &from[fromlen];
51596845Smarkm	while (from < from_end) {
51696845Smarkm		switch (*from) {
51796845Smarkm		case '\\':
51896845Smarkm			switch (*++from) {
51996845Smarkm			case '(':
52096845Smarkm				if (strncmp(&from[1], "em", 2) == 0 ||
521262540Seadler						strncmp(&from[1], "mi", 2) == 0) {
52296845Smarkm					from += 3;
52396845Smarkm					continue;
52496845Smarkm				}
52596845Smarkm				break;
52697102Sru			case 's':
52797102Sru				if (*++from == '-')
52897102Sru					from++;
52997102Sru				while (isdigit(*from))
53097102Sru					from++;
53197102Sru				continue;
53296845Smarkm			case 'f':
53397102Sru			case '*':
53496845Smarkm				if (*++from == '(')
53596845Smarkm					from += 3;
53697102Sru				else if (*from == '[') {
537262540Seadler					while (*++from != ']' && from < from_end);
53896845Smarkm					from++;
53997102Sru				} else
54097102Sru					from++;
54196845Smarkm				continue;
54296845Smarkm			case '&':
54396845Smarkm				from++;
54496845Smarkm				continue;
54596845Smarkm			}
54696845Smarkm			break;
54796845Smarkm		}
54896845Smarkm		*to++ = *from++;
54996845Smarkm	}
550262540Seadler	return to;
55196845Smarkm}
55296845Smarkm
55396845Smarkm/*
55496845Smarkm * Appends a string with the nroff formatting removed.
55596845Smarkm */
55696845Smarkmstatic void
55796845Smarkmadd_nroff(char *text)
55896845Smarkm{
55996845Smarkm	sbuf_append_edited(whatis_proto, text, de_nroff_copy);
56096845Smarkm}
56196845Smarkm
56296845Smarkm/*
56396845Smarkm * Appends "name(suffix), " to whatis_final.
56496845Smarkm */
56596845Smarkmstatic void
56696845Smarkmadd_whatis_name(char *name, char *suffix)
56796845Smarkm{
56896845Smarkm	if (*name != '\0') {
56996845Smarkm		sbuf_append_str(whatis_final, name);
57096845Smarkm		sbuf_append(whatis_final, "(", 1);
57196845Smarkm		sbuf_append_str(whatis_final, suffix);
57296845Smarkm		sbuf_append(whatis_final, "), ", 3);
57396845Smarkm	}
57496845Smarkm}
57596845Smarkm
57696845Smarkm/*
57796845Smarkm * Processes an old-style man(7) line.  This ignores commands with only
57896845Smarkm * a single number argument.
57996845Smarkm */
58096845Smarkmstatic void
58196845Smarkmprocess_man_line(char *line)
58296845Smarkm{
58396845Smarkm	if (*line == '.') {
58496845Smarkm		while (isalpha(*++line))
58596845Smarkm			;
58696845Smarkm		line = skip_spaces(line);
58796845Smarkm		if (only_digits(line))
58896845Smarkm			return;
58996845Smarkm	} else
59096845Smarkm		line = skip_spaces(line);
59196845Smarkm	if (*line != '\0') {
59296845Smarkm		add_nroff(line);
59396845Smarkm		sbuf_append(whatis_proto, " ", 1);
59496845Smarkm	}
59596845Smarkm}
59696845Smarkm
59796845Smarkm/*
59896845Smarkm * Processes a new-style mdoc(7) line.
59996845Smarkm */
60096845Smarkmstatic void
60196845Smarkmprocess_mdoc_line(char *line)
60296845Smarkm{
60396845Smarkm	int xref;
60496845Smarkm	int arg = 0;
60596845Smarkm	char *line_end = &line[strlen(line)];
60696845Smarkm	int orig_length = sbuf_length(whatis_proto);
60796845Smarkm	char *next;
60896845Smarkm
60996845Smarkm	if (*line == '\0')
61096845Smarkm		return;
61196845Smarkm	if (line[0] != '.' || !isupper(line[1]) || !islower(line[2])) {
61296845Smarkm		add_nroff(skip_spaces(line));
61396859Sru		sbuf_append(whatis_proto, " ", 1);
61496845Smarkm		return;
61596845Smarkm	}
61696845Smarkm	xref = strncmp(line, ".Xr", 3) == 0;
61796845Smarkm	line += 3;
61896845Smarkm	while ((line = skip_spaces(line)) < line_end) {
61996845Smarkm		if (*line == '"') {
62096845Smarkm			next = ++line;
62196845Smarkm			for (;;) {
62296845Smarkm				next = strchr(next, '"');
62396861Sru				if (next == NULL)
62496845Smarkm					break;
62599532Srobert				memmove(next, next + 1, strlen(next));
62696845Smarkm				line_end--;
62796861Sru				if (*next != '"')
62896861Sru					break;
62996845Smarkm				next++;
63096845Smarkm			}
63196845Smarkm		} else
63296845Smarkm			next = strpbrk(line, " \t");
63396845Smarkm		if (next != NULL)
63496845Smarkm			*next++ = '\0';
63596845Smarkm		else
63696845Smarkm			next = line_end;
63796845Smarkm		if (isupper(*line) && islower(line[1]) && line[2] == '\0') {
63896845Smarkm			if (strcmp(line, "Ns") == 0) {
63996845Smarkm				arg = 0;
64096845Smarkm				line = next;
64196845Smarkm				continue;
64296845Smarkm			}
64396845Smarkm			if (strstr(mdoc_commands, line) != NULL) {
64496845Smarkm				line = next;
64596845Smarkm				continue;
64696845Smarkm			}
64796845Smarkm		}
64896845Smarkm		if (arg > 0 && strchr(",.:;?!)]", *line) == 0) {
64996845Smarkm			if (xref) {
65096845Smarkm				sbuf_append(whatis_proto, "(", 1);
65196845Smarkm				add_nroff(line);
65296845Smarkm				sbuf_append(whatis_proto, ")", 1);
65396845Smarkm				xref = 0;
65496845Smarkm				line = blank;
65596845Smarkm			} else
65696845Smarkm				sbuf_append(whatis_proto, " ", 1);
65796845Smarkm		}
65896845Smarkm		add_nroff(line);
65996845Smarkm		arg++;
66096845Smarkm		line = next;
66196845Smarkm	}
66296845Smarkm	if (sbuf_length(whatis_proto) > orig_length)
66396845Smarkm		sbuf_append(whatis_proto, " ", 1);
66496845Smarkm}
66596845Smarkm
666262540Seadler/*
667262540Seadler * Collects a list of comma-separated names from the text.
668262540Seadler */
669262540Seadlerstatic void
670262540Seadlercollect_names(StringList *names, char *text)
671262540Seadler{
672262540Seadler	char *arg;
673262540Seadler
674262540Seadler	for (;;) {
675262540Seadler		arg = text;
676262540Seadler		text = strchr(text, ',');
677262540Seadler		if (text != NULL)
678262540Seadler			*text++ = '\0';
679262540Seadler		sl_add(names, arg);
680262540Seadler		if (text == NULL)
681262540Seadler			return;
682262540Seadler		if (*text == ' ')
683262540Seadler			text++;
684262540Seadler	}
685262540Seadler}
686262540Seadler
68796845Smarkmenum { STATE_UNKNOWN, STATE_MANSTYLE, STATE_MDOCNAME, STATE_MDOCDESC };
68896845Smarkm
68996845Smarkm/*
69096845Smarkm * Processes a man page source into a single whatis line and adds it
69196845Smarkm * to whatis_lines.
69296845Smarkm */
69396845Smarkmstatic void
694262540Seadlerprocess_page(struct page_info *page, char *section_dir)
69596845Smarkm{
696262540Seadler	gzFile in;
697262540Seadler	char buffer[4096];
698262540Seadler	char *line;
699262540Seadler	StringList *names;
700262540Seadler	char *descr;
701262539Seadler	int state = STATE_UNKNOWN;
702262540Seadler	size_t i;
70396845Smarkm
70496845Smarkm	sbuf_clear(whatis_proto);
705262540Seadler	if ((in = gzopen(page->filename, "r")) == NULL) {
706262540Seadler		warn("%s", page->filename);
70796845Smarkm		exit_code = 1;
70896845Smarkm		return;
70996845Smarkm	}
710262540Seadler	while (gzgets(in, buffer, sizeof buffer) != NULL) {
71196845Smarkm		line = buffer;
712262540Seadler		if (strncmp(line, ".\\\"", 3) == 0)		/* ignore comments */
71396845Smarkm			continue;
71496845Smarkm		switch (state) {
71596845Smarkm		/*
71696845Smarkm		 * haven't reached the NAME section yet.
71796845Smarkm		 */
71896845Smarkm		case STATE_UNKNOWN:
71996845Smarkm			if (name_section_line(line, ".SH"))
72096845Smarkm				state = STATE_MANSTYLE;
72196845Smarkm			else if (name_section_line(line, ".Sh"))
72296845Smarkm				state = STATE_MDOCNAME;
72396845Smarkm			continue;
72496845Smarkm		/*
72596845Smarkm		 * Inside an old-style .SH NAME section.
72696845Smarkm		 */
72796845Smarkm		case STATE_MANSTYLE:
72896845Smarkm			if (strncmp(line, ".SH", 3) == 0)
72996845Smarkm				break;
730172752Sedwin			if (strncmp(line, ".SS", 3) == 0)
731172752Sedwin				break;
73296845Smarkm			trim_rhs(line);
73396845Smarkm			if (strcmp(line, ".") == 0)
73496845Smarkm				continue;
73596845Smarkm			if (strncmp(line, ".IX", 3) == 0) {
73696845Smarkm				line += 3;
73796845Smarkm				line = skip_spaces(line);
73896845Smarkm			}
73996845Smarkm			process_man_line(line);
74096845Smarkm			continue;
74196845Smarkm		/*
74296845Smarkm		 * Inside a new-style .Sh NAME section (the .Nm part).
74396845Smarkm		 */
74496845Smarkm		case STATE_MDOCNAME:
74596845Smarkm			trim_rhs(line);
74696845Smarkm			if (strncmp(line, ".Nm", 3) == 0) {
74796845Smarkm				process_mdoc_line(line);
74896845Smarkm				continue;
74996845Smarkm			} else {
75096845Smarkm				if (strcmp(line, ".") == 0)
75196845Smarkm					continue;
75296845Smarkm				sbuf_append(whatis_proto, "- ", 2);
75396845Smarkm				state = STATE_MDOCDESC;
75496845Smarkm			}
75596845Smarkm			/* fall through */
75696845Smarkm		/*
75796845Smarkm		 * Inside a new-style .Sh NAME section (after the .Nm-s).
75896845Smarkm		 */
75996845Smarkm		case STATE_MDOCDESC:
76096845Smarkm			if (strncmp(line, ".Sh", 3) == 0)
76196845Smarkm				break;
76296845Smarkm			trim_rhs(line);
76396845Smarkm			if (strcmp(line, ".") == 0)
76496845Smarkm				continue;
76596845Smarkm			process_mdoc_line(line);
76696845Smarkm			continue;
76796845Smarkm		}
76896845Smarkm		break;
76996845Smarkm	}
77096845Smarkm	gzclose(in);
77196845Smarkm	sbuf_strip(whatis_proto, " \t.-");
77296845Smarkm	line = sbuf_content(whatis_proto);
77396845Smarkm	/*
77496845Smarkm	 * line now contains the appropriate data, but without
77596845Smarkm	 * the proper indentation or the section appended to each name.
77696845Smarkm	 */
77796845Smarkm	descr = strstr(line, " - ");
77896845Smarkm	if (descr == NULL) {
77996845Smarkm		descr = strchr(line, ' ');
78096845Smarkm		if (descr == NULL) {
78196845Smarkm			if (verbose)
782262540Seadler				fprintf(stderr, "	ignoring junk description \"%s\"\n", line);
78396845Smarkm			return;
78496845Smarkm		}
78596845Smarkm		*descr++ = '\0';
78696845Smarkm	} else {
78796845Smarkm		*descr = '\0';
78896845Smarkm		descr += 3;
78996845Smarkm	}
790262540Seadler	names = sl_init();
791262540Seadler	collect_names(names, line);
79296845Smarkm	sbuf_clear(whatis_final);
793262540Seadler	if (!sl_find(names, page->name) && no_page_exists(section_dir, names, page->suffix)) {
79496845Smarkm		/*
795262540Seadler		 * Add the page name since that's the only thing that
796262540Seadler		 * man(1) will find.
79796845Smarkm		 */
798262540Seadler		add_whatis_name(page->name, page->suffix);
79996845Smarkm	}
800262540Seadler	for (i = 0; i < names->sl_cur; i++)
801262540Seadler		add_whatis_name(names->sl_str[i], page->suffix);
802262540Seadler	sl_free(names, 0);
80396845Smarkm	sbuf_retract(whatis_final, 2);		/* remove last ", " */
80496845Smarkm	while (sbuf_length(whatis_final) < indent)
80596845Smarkm		sbuf_append(whatis_final, " ", 1);
80696845Smarkm	sbuf_append(whatis_final, " - ", 3);
80796845Smarkm	sbuf_append_str(whatis_final, skip_spaces(descr));
80896845Smarkm	sl_add(whatis_lines, strdup(sbuf_content(whatis_final)));
80996845Smarkm}
81096845Smarkm
81196845Smarkm/*
812262540Seadler * Sorts pages first by inode number, then by name.
813262540Seadler */
814262540Seadlerstatic int
815262540Seadlerpagesort(const void *a, const void *b)
816262540Seadler{
817262540Seadler	const struct page_info *p1 = *(struct page_info * const *) a;
818262540Seadler	const struct page_info *p2 = *(struct page_info * const *) b;
819262540Seadler	if (p1->inode == p2->inode)
820262540Seadler		return strcmp(p1->name, p2->name);
821262540Seadler	return p1->inode - p2->inode;
822262540Seadler}
823262540Seadler
824262540Seadler/*
82596845Smarkm * Processes a single man section.
82696845Smarkm */
82796845Smarkmstatic void
82896845Smarkmprocess_section(char *section_dir)
82996845Smarkm{
83096845Smarkm	struct dirent **entries;
83196845Smarkm	int nentries;
832262540Seadler	struct page_info **pages;
833262540Seadler	int npages = 0;
83496845Smarkm	int i;
835262540Seadler	ino_t prev_inode = 0;
83696845Smarkm
837262540Seadler	if (verbose)
83896845Smarkm		fprintf(stderr, "  %s\n", section_dir);
83996845Smarkm
84096845Smarkm	/*
84196845Smarkm	 * scan the man section directory for pages
84296845Smarkm	 */
84396845Smarkm	nentries = scandir(section_dir, &entries, NULL, alphasort);
84496845Smarkm	if (nentries < 0) {
84596845Smarkm		warn("%s", section_dir);
84696845Smarkm		exit_code = 1;
84796845Smarkm		return;
84896845Smarkm	}
84996845Smarkm	/*
85096845Smarkm	 * collect information about man pages
85196845Smarkm	 */
852262540Seadler	pages = (struct page_info **) calloc(nentries, sizeof(struct page_info *));
85396845Smarkm	for (i = 0; i < nentries; i++) {
854262540Seadler		struct page_info *info = new_page_info(section_dir, entries[i]);
855262540Seadler		if (info != NULL)
856262540Seadler			pages[npages++] = info;
85796845Smarkm		free(entries[i]);
85896845Smarkm	}
85996845Smarkm	free(entries);
860262540Seadler	qsort(pages, npages, sizeof(struct page_info *), pagesort);
861262540Seadler	/*
862262540Seadler	 * process each unique page
863262540Seadler	 */
864262540Seadler	for (i = 0; i < npages; i++) {
865262540Seadler		struct page_info *page = pages[i];
866262540Seadler		if (page->inode != prev_inode) {
867262540Seadler			prev_inode = page->inode;
868262540Seadler			if (verbose)
869262540Seadler				fprintf(stderr, "	reading %s\n", page->filename);
870262540Seadler			process_page(page, section_dir);
871262540Seadler		} else if (verbose)
872262540Seadler			fprintf(stderr, "	skipping %s, duplicate\n", page->filename);
873262540Seadler		free_page_info(page);
874262540Seadler	}
875262540Seadler	free(pages);
87696845Smarkm}
87796845Smarkm
87896845Smarkm/*
87996845Smarkm * Returns whether the directory entry is a man page section.
88096845Smarkm */
88196845Smarkmstatic int
882201512Skibselect_sections(const struct dirent *entry)
88396845Smarkm{
884201512Skib	const char *p = &entry->d_name[3];
88596845Smarkm
88696845Smarkm	if (strncmp(entry->d_name, "man", 3) != 0)
887262540Seadler		return 0;
88896845Smarkm	while (*p != '\0') {
88996845Smarkm		if (!isalnum(*p++))
890262540Seadler			return 0;
89196845Smarkm	}
892262540Seadler	return 1;
89396845Smarkm}
89496845Smarkm
89596845Smarkm/*
89696845Smarkm * Processes a single top-level man directory by finding all the
89796845Smarkm * sub-directories named man* and processing each one in turn.
89896845Smarkm */
89996845Smarkmstatic void
90096845Smarkmprocess_mandir(char *dir_name)
90196845Smarkm{
90296845Smarkm	struct dirent **entries;
90396845Smarkm	int nsections;
90496845Smarkm	FILE *fp = NULL;
90596845Smarkm	int i;
90696858Sru	struct stat st;
90796845Smarkm
90896845Smarkm	if (already_visited(dir_name))
90996845Smarkm		return;
91096845Smarkm	if (verbose)
91196845Smarkm		fprintf(stderr, "man directory %s\n", dir_name);
91296845Smarkm	nsections = scandir(dir_name, &entries, select_sections, alphasort);
91396845Smarkm	if (nsections < 0) {
91496845Smarkm		warn("%s", dir_name);
91596845Smarkm		exit_code = 1;
91696845Smarkm		return;
91796845Smarkm	}
91896845Smarkm	if (common_output == NULL && (fp = open_whatis(dir_name)) == NULL)
91996845Smarkm		return;
92096845Smarkm	for (i = 0; i < nsections; i++) {
92196845Smarkm		char section_dir[MAXPATHLEN];
922262540Seadler		snprintf(section_dir, sizeof section_dir, "%s/%s", dir_name, entries[i]->d_name);
92396845Smarkm		process_section(section_dir);
92496858Sru		snprintf(section_dir, sizeof section_dir, "%s/%s/%s", dir_name,
925262540Seadler		    entries[i]->d_name, machine);
92696858Sru		if (stat(section_dir, &st) == 0 && S_ISDIR(st.st_mode))
92796858Sru			process_section(section_dir);
928262540Seadler		if (strcmp(machine_arch, machine) != 0) {
929262540Seadler			snprintf(section_dir, sizeof section_dir, "%s/%s/%s",
930262540Seadler			    dir_name, entries[i]->d_name, machine_arch);
931262540Seadler			if (stat(section_dir, &st) == 0 && S_ISDIR(st.st_mode))
932262540Seadler				process_section(section_dir);
933262540Seadler		}
93496845Smarkm		free(entries[i]);
93596845Smarkm	}
93696845Smarkm	free(entries);
93796845Smarkm	if (common_output == NULL)
93896845Smarkm		finish_whatis(fp, dir_name);
93996845Smarkm}
94096845Smarkm
94196845Smarkm/*
94296845Smarkm * Processes one argument, which may be a colon-separated list of
94396845Smarkm * directories.
94496845Smarkm */
94596845Smarkmstatic void
94696845Smarkmprocess_argument(const char *arg)
94796845Smarkm{
94896845Smarkm	char *dir;
94996845Smarkm	char *mandir;
95096845Smarkm	char *parg;
95196845Smarkm
95296845Smarkm	parg = strdup(arg);
95396845Smarkm	if (parg == NULL)
95496845Smarkm		err(1, "out of memory");
95596845Smarkm	while ((dir = strsep(&parg, ":")) != NULL) {
95696845Smarkm		if (locale != NULL) {
95796845Smarkm			asprintf(&mandir, "%s/%s", dir, locale);
95896845Smarkm			process_mandir(mandir);
95996845Smarkm			free(mandir);
96096845Smarkm			if (lang_locale != NULL) {
96196845Smarkm				asprintf(&mandir, "%s/%s", dir, lang_locale);
96296845Smarkm				process_mandir(mandir);
96396845Smarkm				free(mandir);
96496845Smarkm			}
96596845Smarkm		} else {
96696845Smarkm			process_mandir(dir);
96796845Smarkm		}
96896845Smarkm	}
96996845Smarkm	free(parg);
97096845Smarkm}
97196845Smarkm
97296845Smarkm
97396845Smarkmint
97496845Smarkmmain(int argc, char **argv)
97596845Smarkm{
97696845Smarkm	int opt;
97796845Smarkm	FILE *fp = NULL;
97896845Smarkm
97996845Smarkm	while ((opt = getopt(argc, argv, "ai:n:o:vL")) != -1) {
98096845Smarkm		switch (opt) {
98196845Smarkm		case 'a':
98296845Smarkm			append++;
98396845Smarkm			break;
98496845Smarkm		case 'i':
98596845Smarkm			indent = atoi(optarg);
98696845Smarkm			break;
98796845Smarkm		case 'n':
98896845Smarkm			whatis_name = optarg;
98996845Smarkm			break;
99096845Smarkm		case 'o':
99196845Smarkm			common_output = optarg;
99296845Smarkm			break;
99396845Smarkm		case 'v':
99496845Smarkm			verbose++;
99596845Smarkm			break;
99696845Smarkm		case 'L':
99796845Smarkm			locale = getenv("LC_ALL");
99896845Smarkm			if (locale == NULL)
99996845Smarkm				locale = getenv("LC_CTYPE");
100096845Smarkm			if (locale == NULL)
100196845Smarkm				locale = getenv("LANG");
100296845Smarkm			if (locale != NULL) {
100396845Smarkm				char *sep = strchr(locale, '_');
100496845Smarkm				if (sep != NULL && isupper(sep[1]) &&
100596845Smarkm				    isupper(sep[2])) {
1006262540Seadler					asprintf(&lang_locale, "%.*s%s", (int)(ptrdiff_t)(sep - locale), locale, &sep[3]);
100796845Smarkm				}
100896845Smarkm			}
100996845Smarkm			break;
101096845Smarkm		default:
101196845Smarkm			fprintf(stderr, "usage: %s [-a] [-i indent] [-n name] [-o output_file] [-v] [-L] [directories...]\n", argv[0]);
101296845Smarkm			exit(1);
101396845Smarkm		}
101496845Smarkm	}
101596845Smarkm
101696845Smarkm	signal(SIGINT, trap_signal);
101796845Smarkm	signal(SIGHUP, trap_signal);
101896845Smarkm	signal(SIGQUIT, trap_signal);
101996845Smarkm	signal(SIGTERM, trap_signal);
102096845Smarkm	SLIST_INIT(&visited_dirs);
102196845Smarkm	whatis_proto = new_sbuf();
102296845Smarkm	whatis_final = new_sbuf();
102396845Smarkm
1024262540Seadler	if ((machine = getenv("MACHINE")) == NULL) {
1025262540Seadler		static struct utsname utsname;
102696858Sru
1027262540Seadler		if (uname(&utsname) == -1)
1028262540Seadler			err(1, "uname");
1029262540Seadler		machine = utsname.machine;
1030262540Seadler	}
1031262540Seadler
1032262540Seadler	if ((machine_arch = getenv("MACHINE_ARCH")) == NULL)
1033262540Seadler		machine_arch = MACHINE_ARCH;
1034262540Seadler
103596845Smarkm	if (common_output != NULL && (fp = open_output(common_output)) == NULL)
103696845Smarkm		err(1, "%s", common_output);
103796845Smarkm	if (optind == argc) {
103896845Smarkm		const char *manpath = getenv("MANPATH");
103996845Smarkm		if (manpath == NULL)
104096845Smarkm			manpath = DEFAULT_MANPATH;
104196845Smarkm		process_argument(manpath);
104296845Smarkm	} else {
104396845Smarkm		while (optind < argc)
104496845Smarkm			process_argument(argv[optind++]);
104596845Smarkm	}
104696845Smarkm	if (common_output != NULL)
104796845Smarkm		finish_output(fp, common_output);
104896845Smarkm	exit(exit_code);
104996845Smarkm}
1050