makewhatis.c revision 97976
196845Smarkm/*-
296845Smarkm * Copyright (c) 2002 John Rochester
396845Smarkm * All rights reserved.
496845Smarkm *
596845Smarkm * Redistribution and use in source and binary forms, with or without
696845Smarkm * modification, are permitted provided that the following conditions
796845Smarkm * are met:
896845Smarkm * 1. Redistributions of source code must retain the above copyright
996845Smarkm *    notice, this list of conditions and the following disclaimer,
1096845Smarkm *    in this position and unchanged.
1196845Smarkm * 2. Redistributions in binary form must reproduce the above copyright
1296845Smarkm *    notice, this list of conditions and the following disclaimer in the
1396845Smarkm *    documentation and/or other materials provided with the distribution.
1496845Smarkm * 3. The name of the author may not be used to endorse or promote products
1596845Smarkm *    derived from this software without specific prior written permission
1696845Smarkm *
1796845Smarkm * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
1896845Smarkm * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
1996845Smarkm * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
2096845Smarkm * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
2196845Smarkm * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
2296845Smarkm * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
2396845Smarkm * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
2496845Smarkm * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
2596845Smarkm * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
2696845Smarkm * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
2796845Smarkm */
2896845Smarkm
2996845Smarkm#include <sys/cdefs.h>
3096845Smarkm__FBSDID("$FreeBSD: head/usr.bin/makewhatis/makewhatis.c 97976 2002-06-07 01:01:08Z keramida $");
3196845Smarkm
3296845Smarkm#include <sys/types.h>
3396845Smarkm#include <sys/stat.h>
3496845Smarkm#include <sys/param.h>
3596845Smarkm#include <sys/queue.h>
3696845Smarkm
3796845Smarkm#include <ctype.h>
3896845Smarkm#include <dirent.h>
3996845Smarkm#include <err.h>
4096845Smarkm#include <stdio.h>
4196845Smarkm#include <stdlib.h>
4296845Smarkm#include <string.h>
4396845Smarkm#include <stringlist.h>
4496845Smarkm#include <unistd.h>
4596845Smarkm#include <zlib.h>
4696845Smarkm
4796845Smarkm#define DEFAULT_MANPATH		"/usr/share/man"
4896845Smarkm#define LINE_ALLOC		4096
4996845Smarkm
5096845Smarkmstatic char blank[] = 		"";
5196845Smarkm
5296845Smarkm/*
5396845Smarkm * Information collected about each man page in a section.
5496845Smarkm */
5596845Smarkmstruct page_info {
5696845Smarkm	char *	filename;
5796845Smarkm	char *	name;
5896845Smarkm	char *	suffix;
5996845Smarkm	int	gzipped;
6096845Smarkm	ino_t	inode;
6196845Smarkm};
6296845Smarkm
6396845Smarkm/*
6496845Smarkm * An entry kept for each visited directory.
6596845Smarkm */
6696845Smarkmstruct visited_dir {
6796845Smarkm	dev_t		device;
6896845Smarkm	ino_t		inode;
6996845Smarkm	SLIST_ENTRY(visited_dir)	next;
7096845Smarkm};
7196845Smarkm
7296845Smarkm/*
7396845Smarkm * an expanding string
7496845Smarkm */
7596845Smarkmstruct sbuf {
7696845Smarkm	char *	content;		/* the start of the buffer */
7796845Smarkm	char *	end;			/* just past the end of the content */
7896845Smarkm	char *	last;			/* the last allocated character */
7996845Smarkm};
8096845Smarkm
8196845Smarkm/*
8296845Smarkm * Removes the last amount characters from the sbuf.
8396845Smarkm */
8496845Smarkm#define sbuf_retract(sbuf, amount)	\
8596845Smarkm	((sbuf)->end -= (amount))
8696845Smarkm/*
8796845Smarkm * Returns the length of the sbuf content.
8896845Smarkm */
8996845Smarkm#define sbuf_length(sbuf)		\
9096845Smarkm	((sbuf)->end - (sbuf)->content)
9196845Smarkm
9296845Smarkmtypedef char *edited_copy(char *from, char *to, int length);
9396845Smarkm
9496845Smarkmstatic int append;			/* -a flag: append to existing whatis */
9596845Smarkmstatic int verbose;			/* -v flag: be verbose with warnings */
9696845Smarkmstatic int indent = 24;			/* -i option: description indentation */
9796845Smarkmstatic const char *whatis_name="whatis";/* -n option: the name */
9896845Smarkmstatic char *common_output;		/* -o option: the single output file */
9996845Smarkmstatic char *locale;			/* user's locale if -L is used */
10096845Smarkmstatic char *lang_locale;		/* short form of locale */
10196858Srustatic char *machine;
10296845Smarkm
10396845Smarkmstatic int exit_code;			/* exit code to use when finished */
10496845Smarkmstatic SLIST_HEAD(, visited_dir) visited_dirs =
10596845Smarkm    SLIST_HEAD_INITIALIZER(visited_dirs);
10696845Smarkm
10796845Smarkm/*
10896845Smarkm * While the whatis line is being formed, it is stored in whatis_proto.
10996845Smarkm * When finished, it is reformatted into whatis_final and then appended
11096845Smarkm * to whatis_lines.
11196845Smarkm */
11296845Smarkmstatic struct sbuf *whatis_proto;
11396845Smarkmstatic struct sbuf *whatis_final;
11496845Smarkmstatic StringList *whatis_lines;	/* collected output lines */
11596845Smarkm
11696845Smarkmstatic char tmp_file[MAXPATHLEN];	/* path of temporary file, if any */
11796845Smarkm
11896845Smarkm/* A set of possible names for the NAME man page section */
11996845Smarkmstatic const char *name_section_titles[] = {
12096845Smarkm	"NAME", "Name", "NAMN", "BEZEICHNUNG", "\xcc\xbe\xbe\xce",
12196845Smarkm	"\xee\xe1\xfa\xf7\xe1\xee\xe9\xe5", NULL
12296845Smarkm};
12396845Smarkm
12496845Smarkm/* A subset of the mdoc(7) commands to ignore */
12596845Smarkmstatic char mdoc_commands[] = "ArDvErEvFlLiNmPa";
12696845Smarkm
12796845Smarkm/*
12896845Smarkm * Frees a struct page_info and its content.
12996845Smarkm */
13096845Smarkmstatic void
13196845Smarkmfree_page_info(struct page_info *info)
13296845Smarkm{
13396845Smarkm	free(info->filename);
13496845Smarkm	free(info->name);
13596845Smarkm	free(info->suffix);
13696845Smarkm	free(info);
13796845Smarkm}
13896845Smarkm
13996845Smarkm/*
14096845Smarkm * Allocates and fills in a new struct page_info given the
14196845Smarkm * name of the man section directory and the dirent of the file.
14296845Smarkm * If the file is not a man page, returns NULL.
14396845Smarkm */
14496845Smarkmstatic struct page_info *
14596845Smarkmnew_page_info(char *dir, struct dirent *dirent)
14696845Smarkm{
14796845Smarkm	struct page_info *info;
14896845Smarkm	int basename_length;
14996845Smarkm	char *suffix;
15096845Smarkm	struct stat st;
15196845Smarkm
15296845Smarkm	info = (struct page_info *) malloc(sizeof(struct page_info));
15396845Smarkm	if (info == NULL)
15496845Smarkm		err(1, "malloc");
15596845Smarkm	basename_length = strlen(dirent->d_name);
15696845Smarkm	suffix = &dirent->d_name[basename_length];
15796845Smarkm	asprintf(&info->filename, "%s/%s", dir, dirent->d_name);
15896845Smarkm	if ((info->gzipped = basename_length >= 4 && strcmp(&dirent->d_name[basename_length - 3], ".gz") == 0)) {
15996845Smarkm		suffix -= 3;
16096845Smarkm		*suffix = '\0';
16196845Smarkm	}
16296845Smarkm	for (;;) {
16396845Smarkm		if (--suffix == dirent->d_name || !isalnum(*suffix)) {
16496845Smarkm			if (*suffix == '.')
16596845Smarkm				break;
16696845Smarkm			if (verbose)
16796845Smarkm				warnx("%s: invalid man page name", info->filename);
16896845Smarkm			free(info->filename);
16996845Smarkm			free(info);
17096845Smarkm			return NULL;
17196845Smarkm		}
17296845Smarkm	}
17396845Smarkm	*suffix++ = '\0';
17496845Smarkm	info->name = strdup(dirent->d_name);
17596845Smarkm	info->suffix = strdup(suffix);
17696845Smarkm	if (stat(info->filename, &st) < 0) {
17796845Smarkm		warn("%s", info->filename);
17896845Smarkm		free_page_info(info);
17996845Smarkm		return NULL;
18096845Smarkm	}
18196845Smarkm	if (!S_ISREG(st.st_mode)) {
18296845Smarkm		if (verbose && !S_ISDIR(st.st_mode))
18396845Smarkm			warnx("%s: not a regular file", info->filename);
18496845Smarkm		free_page_info(info);
18596845Smarkm		return NULL;
18696845Smarkm	}
18796845Smarkm	info->inode = st.st_ino;
18896845Smarkm	return info;
18996845Smarkm}
19096845Smarkm
19196845Smarkm/*
19296845Smarkm * Reset an sbuf's length to 0.
19396845Smarkm */
19496845Smarkmstatic void
19596845Smarkmsbuf_clear(struct sbuf *sbuf)
19696845Smarkm{
19796845Smarkm	sbuf->end = sbuf->content;
19896845Smarkm}
19996845Smarkm
20096845Smarkm/*
20196845Smarkm * Allocate a new sbuf.
20296845Smarkm */
20396845Smarkmstatic struct sbuf *
20496845Smarkmnew_sbuf(void)
20596845Smarkm{
20696845Smarkm	struct sbuf *sbuf = (struct sbuf *) malloc(sizeof(struct sbuf));
20796845Smarkm	sbuf->content = (char *) malloc(LINE_ALLOC);
20896845Smarkm	sbuf->last = sbuf->content + LINE_ALLOC - 1;
20996845Smarkm	sbuf_clear(sbuf);
21096845Smarkm	return sbuf;
21196845Smarkm}
21296845Smarkm
21396845Smarkm/*
21496845Smarkm * Ensure that there is enough room in the sbuf for chars more characters.
21596845Smarkm */
21696845Smarkmstatic void
21796845Smarkmsbuf_need(struct sbuf *sbuf, int nchars)
21896845Smarkm{
21996845Smarkm	/* let's assume we only need to double it, but check just in case */
22096845Smarkm	while (sbuf->end + nchars > sbuf->last) {
22196845Smarkm		int alloc;
22296845Smarkm		char *new_content;
22396845Smarkm
22496845Smarkm		alloc = (sbuf->last - sbuf->content + 1) * 2;
22596845Smarkm		new_content = (char *) malloc(alloc);
22696845Smarkm		memcpy(new_content, sbuf->content, sbuf->end - sbuf->content);
22796845Smarkm		sbuf->end = new_content + (sbuf->end - sbuf->content);
22896845Smarkm		free(sbuf->content);
22996845Smarkm		sbuf->content = new_content;
23096845Smarkm	}
23196845Smarkm}
23296845Smarkm
23396845Smarkm/*
23496845Smarkm * Appends a string of a given length to the sbuf.
23596845Smarkm */
23696845Smarkmstatic void
23796845Smarkmsbuf_append(struct sbuf *sbuf, const char *text, int length)
23896845Smarkm{
23996845Smarkm	if (length > 0) {
24096845Smarkm		sbuf_need(sbuf, length);
24196845Smarkm		memcpy(sbuf->end, text, length);
24296845Smarkm		sbuf->end += length;
24396845Smarkm	}
24496845Smarkm}
24596845Smarkm
24696845Smarkm/*
24796845Smarkm * Appends a null-terminated string to the sbuf.
24896845Smarkm */
24996845Smarkmstatic void
25096845Smarkmsbuf_append_str(struct sbuf *sbuf, char *text)
25196845Smarkm{
25296845Smarkm	sbuf_append(sbuf, text, strlen(text));
25396845Smarkm}
25496845Smarkm
25596845Smarkm/*
25696845Smarkm * Appends an edited null-terminated string to the sbuf.
25796845Smarkm */
25896845Smarkmstatic void
25996845Smarkmsbuf_append_edited(struct sbuf *sbuf, char *text, edited_copy copy)
26096845Smarkm{
26196845Smarkm	int length = strlen(text);
26296845Smarkm	if (length > 0) {
26396845Smarkm		sbuf_need(sbuf, length);
26496845Smarkm		sbuf->end = copy(text, sbuf->end, length);
26596845Smarkm	}
26696845Smarkm}
26796845Smarkm
26896845Smarkm/*
26996845Smarkm * Strips any of a set of chars from the end of the sbuf.
27096845Smarkm */
27196845Smarkmstatic void
27296845Smarkmsbuf_strip(struct sbuf *sbuf, const char *set)
27396845Smarkm{
27496845Smarkm	while (sbuf->end > sbuf->content && strchr(set, sbuf->end[-1]) != NULL)
27596845Smarkm		sbuf->end--;
27696845Smarkm}
27796845Smarkm
27896845Smarkm/*
27996845Smarkm * Returns the null-terminated string built by the sbuf.
28096845Smarkm */
28196845Smarkmstatic char *
28296845Smarkmsbuf_content(struct sbuf *sbuf)
28396845Smarkm{
28496845Smarkm	*sbuf->end = '\0';
28596845Smarkm	return sbuf->content;
28696845Smarkm}
28796845Smarkm
28896845Smarkm/*
28996845Smarkm * Returns true if no man page exists in the directory with
29096845Smarkm * any of the names in the StringList.
29196845Smarkm */
29296845Smarkmstatic int
29396845Smarkmno_page_exists(char *dir, StringList *names, char *suffix)
29496845Smarkm{
29596845Smarkm	char path[MAXPATHLEN];
29697976Skeramida	size_t i;
29796845Smarkm
29896845Smarkm	for (i = 0; i < names->sl_cur; i++) {
29996845Smarkm		snprintf(path, sizeof path, "%s/%s.%s.gz", dir, names->sl_str[i], suffix);
30096845Smarkm		if (access(path, F_OK) < 0) {
30196845Smarkm			path[strlen(path) - 3] = '\0';
30296845Smarkm			if (access(path, F_OK) < 0)
30396845Smarkm				continue;
30496845Smarkm		}
30596845Smarkm		return 0;
30696845Smarkm	}
30796845Smarkm	return 1;
30896845Smarkm}
30996845Smarkm
31096845Smarkmstatic void
31196845Smarkmtrap_signal(int sig __unused)
31296845Smarkm{
31396845Smarkm	if (tmp_file[0] != '\0')
31496845Smarkm		unlink(tmp_file);
31596845Smarkm	exit(1);
31696845Smarkm}
31796845Smarkm
31896845Smarkm/*
31996845Smarkm * Attempts to open an output file.  Returns NULL if unsuccessful.
32096845Smarkm */
32196845Smarkmstatic FILE *
32296845Smarkmopen_output(char *name)
32396845Smarkm{
32496845Smarkm	FILE *output;
32596845Smarkm
32696845Smarkm	whatis_lines = sl_init();
32796845Smarkm	if (append) {
32896845Smarkm		char line[LINE_ALLOC];
32996845Smarkm
33096845Smarkm		output = fopen(name, "r");
33196845Smarkm		if (output == NULL) {
33296845Smarkm			warn("%s", name);
33396845Smarkm			exit_code = 1;
33496845Smarkm			return NULL;
33596845Smarkm		}
33696845Smarkm		while (fgets(line, sizeof line, output) != NULL) {
33796845Smarkm			line[strlen(line) - 1] = '\0';
33896845Smarkm			sl_add(whatis_lines, strdup(line));
33996845Smarkm		}
34096845Smarkm	}
34196845Smarkm	if (common_output == NULL) {
34296845Smarkm		snprintf(tmp_file, sizeof tmp_file, "%s.tmp", name);
34396845Smarkm		name = tmp_file;
34496845Smarkm	}
34596845Smarkm	output = fopen(name, "w");
34696845Smarkm	if (output == NULL) {
34796845Smarkm		warn("%s", name);
34896845Smarkm		exit_code = 1;
34996845Smarkm		return NULL;
35096845Smarkm	}
35196845Smarkm	return output;
35296845Smarkm}
35396845Smarkm
35496845Smarkmstatic int
35596845Smarkmlinesort(const void *a, const void *b)
35696845Smarkm{
35796845Smarkm	return strcmp((const char *)(*(const char **)a), (const char *)(*(const char **)b));
35896845Smarkm}
35996845Smarkm
36096845Smarkm/*
36196845Smarkm * Writes the unique sorted lines to the output file.
36296845Smarkm */
36396845Smarkmstatic void
36496845Smarkmfinish_output(FILE *output, char *name)
36596845Smarkm{
36697976Skeramida	size_t i;
36796845Smarkm	char *prev = NULL;
36896845Smarkm
36996845Smarkm	qsort(whatis_lines->sl_str, whatis_lines->sl_cur, sizeof(char *), linesort);
37096845Smarkm	for (i = 0; i < whatis_lines->sl_cur; i++) {
37196845Smarkm		char *line = whatis_lines->sl_str[i];
37296845Smarkm		if (i > 0 && strcmp(line, prev) == 0)
37396845Smarkm			continue;
37496845Smarkm		prev = line;
37596845Smarkm		fputs(line, output);
37696845Smarkm		putc('\n', output);
37796845Smarkm	}
37896845Smarkm	fclose(output);
37996845Smarkm	sl_free(whatis_lines, 1);
38096845Smarkm	if (common_output == NULL) {
38196845Smarkm		rename(tmp_file, name);
38296845Smarkm		unlink(tmp_file);
38396845Smarkm	}
38496845Smarkm}
38596845Smarkm
38696845Smarkmstatic FILE *
38796845Smarkmopen_whatis(char *mandir)
38896845Smarkm{
38996845Smarkm	char filename[MAXPATHLEN];
39096845Smarkm
39196845Smarkm	snprintf(filename, sizeof filename, "%s/%s", mandir, whatis_name);
39296845Smarkm	return open_output(filename);
39396845Smarkm}
39496845Smarkm
39596845Smarkmstatic void
39696845Smarkmfinish_whatis(FILE *output, char *mandir)
39796845Smarkm{
39896845Smarkm	char filename[MAXPATHLEN];
39996845Smarkm
40096845Smarkm	snprintf(filename, sizeof filename, "%s/%s", mandir, whatis_name);
40196845Smarkm	finish_output(output, filename);
40296845Smarkm}
40396845Smarkm
40496845Smarkm/*
40596845Smarkm * Tests to see if the given directory has already been visited.
40696845Smarkm */
40796845Smarkmstatic int
40896845Smarkmalready_visited(char *dir)
40996845Smarkm{
41096845Smarkm	struct stat st;
41196845Smarkm	struct visited_dir *visit;
41296845Smarkm
41396845Smarkm	if (stat(dir, &st) < 0) {
41496845Smarkm		warn("%s", dir);
41596845Smarkm		exit_code = 1;
41696845Smarkm		return 1;
41796845Smarkm	}
41896845Smarkm	SLIST_FOREACH(visit, &visited_dirs, next) {
41996845Smarkm		if (visit->inode == st.st_ino &&
42096845Smarkm		    visit->device == st.st_dev) {
42196845Smarkm			warnx("already visited %s", dir);
42296845Smarkm			return 1;
42396845Smarkm		}
42496845Smarkm	}
42596845Smarkm	visit = (struct visited_dir *) malloc(sizeof(struct visited_dir));
42696845Smarkm	visit->device = st.st_dev;
42796845Smarkm	visit->inode = st.st_ino;
42896845Smarkm	SLIST_INSERT_HEAD(&visited_dirs, visit, next);
42996845Smarkm	return 0;
43096845Smarkm}
43196845Smarkm
43296845Smarkm/*
43396845Smarkm * Removes trailing spaces from a string, returning a pointer to just
43496845Smarkm * beyond the new last character.
43596845Smarkm */
43696845Smarkmstatic char *
43796845Smarkmtrim_rhs(char *str)
43896845Smarkm{
43996845Smarkm	char *rhs = &str[strlen(str)];
44096845Smarkm	while (--rhs > str && isspace(*rhs))
44196845Smarkm		;
44296845Smarkm	*++rhs = '\0';
44396845Smarkm	return rhs;
44496845Smarkm}
44596845Smarkm
44696845Smarkm/*
44796845Smarkm * Returns a pointer to the next non-space character in the string.
44896845Smarkm */
44996845Smarkmstatic char *
45096845Smarkmskip_spaces(char *s)
45196845Smarkm{
45296845Smarkm	while (*s != '\0' && isspace(*s))
45396845Smarkm		s++;
45496845Smarkm	return s;
45596845Smarkm}
45696845Smarkm
45796845Smarkm/*
45896845Smarkm * Returns whether the string contains only digits.
45996845Smarkm */
46096845Smarkmstatic int
46196845Smarkmonly_digits(char *line)
46296845Smarkm{
46396845Smarkm	if (!isdigit(*line++))
46496845Smarkm		return 0;
46596845Smarkm	while (isdigit(*line))
46696845Smarkm		line++;
46796845Smarkm	return *line == '\0';
46896845Smarkm}
46996845Smarkm
47096845Smarkm/*
47196845Smarkm * Returns whether the line is of one of the forms:
47296845Smarkm *	.Sh NAME
47396845Smarkm *	.Sh "NAME"
47496845Smarkm *	etc.
47596845Smarkm * assuming that section_start is ".Sh".
47696845Smarkm */
47796845Smarkmstatic int
47896845Smarkmname_section_line(char *line, const char *section_start)
47996845Smarkm{
48096845Smarkm	char *rhs;
48196845Smarkm	const char **title;
48296845Smarkm
48396845Smarkm	if (strncmp(line, section_start, 3) != 0)
48496845Smarkm		return 0;
48596845Smarkm	line = skip_spaces(line + 3);
48696845Smarkm	rhs = trim_rhs(line);
48796845Smarkm	if (*line == '"') {
48896845Smarkm		line++;
48996845Smarkm		if (*--rhs == '"')
49096845Smarkm			*rhs = '\0';
49196845Smarkm	}
49296845Smarkm	for (title = name_section_titles; *title != NULL; title++)
49396845Smarkm		if (strcmp(*title, line) == 0)
49496845Smarkm			return 1;
49596845Smarkm	return 0;
49696845Smarkm}
49796845Smarkm
49896845Smarkm/*
49996845Smarkm * Copies characters while removing the most common nroff/troff
50096845Smarkm * markup:
50197102Sru *	\(em, \(mi, \s[+-N], \&
50297102Sru *	\fF, \f(fo, \f[font]
50397102Sru *	\*s, \*(st, \*[stringvar]
50496845Smarkm */
50596845Smarkmstatic char *
50696845Smarkmde_nroff_copy(char *from, char *to, int fromlen)
50796845Smarkm{
50896845Smarkm	char *from_end = &from[fromlen];
50996845Smarkm	while (from < from_end) {
51096845Smarkm		switch (*from) {
51196845Smarkm		case '\\':
51296845Smarkm			switch (*++from) {
51396845Smarkm			case '(':
51496845Smarkm				if (strncmp(&from[1], "em", 2) == 0 ||
51596845Smarkm						strncmp(&from[1], "mi", 2) == 0) {
51696845Smarkm					from += 3;
51796845Smarkm					continue;
51896845Smarkm				}
51996845Smarkm				break;
52097102Sru			case 's':
52197102Sru				if (*++from == '-')
52297102Sru					from++;
52397102Sru				while (isdigit(*from))
52497102Sru					from++;
52597102Sru				continue;
52696845Smarkm			case 'f':
52797102Sru			case '*':
52896845Smarkm				if (*++from == '(')
52996845Smarkm					from += 3;
53097102Sru				else if (*from == '[') {
53197102Sru					while (*++from != ']' && from < from_end);
53296845Smarkm					from++;
53397102Sru				} else
53497102Sru					from++;
53596845Smarkm				continue;
53696845Smarkm			case '&':
53796845Smarkm				from++;
53896845Smarkm				continue;
53996845Smarkm			}
54096845Smarkm			break;
54196845Smarkm		}
54296845Smarkm		*to++ = *from++;
54396845Smarkm	}
54496845Smarkm	return to;
54596845Smarkm}
54696845Smarkm
54796845Smarkm/*
54896845Smarkm * Appends a string with the nroff formatting removed.
54996845Smarkm */
55096845Smarkmstatic void
55196845Smarkmadd_nroff(char *text)
55296845Smarkm{
55396845Smarkm	sbuf_append_edited(whatis_proto, text, de_nroff_copy);
55496845Smarkm}
55596845Smarkm
55696845Smarkm/*
55796845Smarkm * Appends "name(suffix), " to whatis_final.
55896845Smarkm */
55996845Smarkmstatic void
56096845Smarkmadd_whatis_name(char *name, char *suffix)
56196845Smarkm{
56296845Smarkm	if (*name != '\0') {
56396845Smarkm		sbuf_append_str(whatis_final, name);
56496845Smarkm		sbuf_append(whatis_final, "(", 1);
56596845Smarkm		sbuf_append_str(whatis_final, suffix);
56696845Smarkm		sbuf_append(whatis_final, "), ", 3);
56796845Smarkm	}
56896845Smarkm}
56996845Smarkm
57096845Smarkm/*
57196845Smarkm * Processes an old-style man(7) line.  This ignores commands with only
57296845Smarkm * a single number argument.
57396845Smarkm */
57496845Smarkmstatic void
57596845Smarkmprocess_man_line(char *line)
57696845Smarkm{
57796845Smarkm	if (*line == '.') {
57896845Smarkm		while (isalpha(*++line))
57996845Smarkm			;
58096845Smarkm		line = skip_spaces(line);
58196845Smarkm		if (only_digits(line))
58296845Smarkm			return;
58396845Smarkm	} else
58496845Smarkm		line = skip_spaces(line);
58596845Smarkm	if (*line != '\0') {
58696845Smarkm		add_nroff(line);
58796845Smarkm		sbuf_append(whatis_proto, " ", 1);
58896845Smarkm	}
58996845Smarkm}
59096845Smarkm
59196845Smarkm/*
59296845Smarkm * Processes a new-style mdoc(7) line.
59396845Smarkm */
59496845Smarkmstatic void
59596845Smarkmprocess_mdoc_line(char *line)
59696845Smarkm{
59796845Smarkm	int xref;
59896845Smarkm	int arg = 0;
59996845Smarkm	char *line_end = &line[strlen(line)];
60096845Smarkm	int orig_length = sbuf_length(whatis_proto);
60196845Smarkm	char *next;
60296845Smarkm
60396845Smarkm	if (*line == '\0')
60496845Smarkm		return;
60596845Smarkm	if (line[0] != '.' || !isupper(line[1]) || !islower(line[2])) {
60696845Smarkm		add_nroff(skip_spaces(line));
60796859Sru		sbuf_append(whatis_proto, " ", 1);
60896845Smarkm		return;
60996845Smarkm	}
61096845Smarkm	xref = strncmp(line, ".Xr", 3) == 0;
61196845Smarkm	line += 3;
61296845Smarkm	while ((line = skip_spaces(line)) < line_end) {
61396845Smarkm		if (*line == '"') {
61496845Smarkm			next = ++line;
61596845Smarkm			for (;;) {
61696845Smarkm				next = strchr(next, '"');
61796861Sru				if (next == NULL)
61896845Smarkm					break;
61996845Smarkm				strcpy(next, &next[1]);
62096845Smarkm				line_end--;
62196861Sru				if (*next != '"')
62296861Sru					break;
62396845Smarkm				next++;
62496845Smarkm			}
62596845Smarkm		} else
62696845Smarkm			next = strpbrk(line, " \t");
62796845Smarkm		if (next != NULL)
62896845Smarkm			*next++ = '\0';
62996845Smarkm		else
63096845Smarkm			next = line_end;
63196845Smarkm		if (isupper(*line) && islower(line[1]) && line[2] == '\0') {
63296845Smarkm			if (strcmp(line, "Ns") == 0) {
63396845Smarkm				arg = 0;
63496845Smarkm				line = next;
63596845Smarkm				continue;
63696845Smarkm			}
63796845Smarkm			if (strstr(mdoc_commands, line) != NULL) {
63896845Smarkm				line = next;
63996845Smarkm				continue;
64096845Smarkm			}
64196845Smarkm		}
64296845Smarkm		if (arg > 0 && strchr(",.:;?!)]", *line) == 0) {
64396845Smarkm			if (xref) {
64496845Smarkm				sbuf_append(whatis_proto, "(", 1);
64596845Smarkm				add_nroff(line);
64696845Smarkm				sbuf_append(whatis_proto, ")", 1);
64796845Smarkm				xref = 0;
64896845Smarkm				line = blank;
64996845Smarkm			} else
65096845Smarkm				sbuf_append(whatis_proto, " ", 1);
65196845Smarkm		}
65296845Smarkm		add_nroff(line);
65396845Smarkm		arg++;
65496845Smarkm		line = next;
65596845Smarkm	}
65696845Smarkm	if (sbuf_length(whatis_proto) > orig_length)
65796845Smarkm		sbuf_append(whatis_proto, " ", 1);
65896845Smarkm}
65996845Smarkm
66096845Smarkm/*
66196845Smarkm * Collects a list of comma-separated names from the text.
66296845Smarkm */
66396845Smarkmstatic void
66496845Smarkmcollect_names(StringList *names, char *text)
66596845Smarkm{
66696845Smarkm	char *arg;
66796845Smarkm
66896845Smarkm	for (;;) {
66996845Smarkm		arg = text;
67096845Smarkm		text = strchr(text, ',');
67196845Smarkm		if (text != NULL)
67296845Smarkm			*text++ = '\0';
67396845Smarkm		sl_add(names, arg);
67496845Smarkm		if (text == NULL)
67596845Smarkm			return;
67696845Smarkm		if (*text == ' ')
67796845Smarkm			text++;
67896845Smarkm	}
67996845Smarkm}
68096845Smarkm
68196845Smarkmenum { STATE_UNKNOWN, STATE_MANSTYLE, STATE_MDOCNAME, STATE_MDOCDESC };
68296845Smarkm
68396845Smarkm/*
68496845Smarkm * Processes a man page source into a single whatis line and adds it
68596845Smarkm * to whatis_lines.
68696845Smarkm */
68796845Smarkmstatic void
68896845Smarkmprocess_page(struct page_info *page, char *section_dir)
68996845Smarkm{
69096845Smarkm	gzFile *in;
69196845Smarkm	char buffer[4096];
69296845Smarkm	char *line;
69396845Smarkm	StringList *names;
69496845Smarkm	char *descr;
69596845Smarkm	int state = STATE_UNKNOWN;
69697976Skeramida	size_t i;
69796845Smarkm
69896845Smarkm	sbuf_clear(whatis_proto);
69996845Smarkm	if ((in = gzopen(page->filename, "r")) == NULL) {
70096845Smarkm		warn("%s", page->filename);
70196845Smarkm		exit_code = 1;
70296845Smarkm		return;
70396845Smarkm	}
70496845Smarkm	while (gzgets(in, buffer, sizeof buffer) != NULL) {
70596845Smarkm		line = buffer;
70696845Smarkm		if (strncmp(line, ".\\\"", 3) == 0)		/* ignore comments */
70796845Smarkm			continue;
70896845Smarkm		switch (state) {
70996845Smarkm		/*
71096845Smarkm		 * haven't reached the NAME section yet.
71196845Smarkm		 */
71296845Smarkm		case STATE_UNKNOWN:
71396845Smarkm			if (name_section_line(line, ".SH"))
71496845Smarkm				state = STATE_MANSTYLE;
71596845Smarkm			else if (name_section_line(line, ".Sh"))
71696845Smarkm				state = STATE_MDOCNAME;
71796845Smarkm			continue;
71896845Smarkm		/*
71996845Smarkm		 * Inside an old-style .SH NAME section.
72096845Smarkm		 */
72196845Smarkm		case STATE_MANSTYLE:
72296845Smarkm			if (strncmp(line, ".SH", 3) == 0)
72396845Smarkm				break;
72496845Smarkm			trim_rhs(line);
72596845Smarkm			if (strcmp(line, ".") == 0)
72696845Smarkm				continue;
72796845Smarkm			if (strncmp(line, ".IX", 3) == 0) {
72896845Smarkm				line += 3;
72996845Smarkm				line = skip_spaces(line);
73096845Smarkm			}
73196845Smarkm			process_man_line(line);
73296845Smarkm			continue;
73396845Smarkm		/*
73496845Smarkm		 * Inside a new-style .Sh NAME section (the .Nm part).
73596845Smarkm		 */
73696845Smarkm		case STATE_MDOCNAME:
73796845Smarkm			trim_rhs(line);
73896845Smarkm			if (strncmp(line, ".Nm", 3) == 0) {
73996845Smarkm				process_mdoc_line(line);
74096845Smarkm				continue;
74196845Smarkm			} else {
74296845Smarkm				if (strcmp(line, ".") == 0)
74396845Smarkm					continue;
74496845Smarkm				sbuf_append(whatis_proto, "- ", 2);
74596845Smarkm				state = STATE_MDOCDESC;
74696845Smarkm			}
74796845Smarkm			/* fall through */
74896845Smarkm		/*
74996845Smarkm		 * Inside a new-style .Sh NAME section (after the .Nm-s).
75096845Smarkm		 */
75196845Smarkm		case STATE_MDOCDESC:
75296845Smarkm			if (strncmp(line, ".Sh", 3) == 0)
75396845Smarkm				break;
75496845Smarkm			trim_rhs(line);
75596845Smarkm			if (strcmp(line, ".") == 0)
75696845Smarkm				continue;
75796845Smarkm			process_mdoc_line(line);
75896845Smarkm			continue;
75996845Smarkm		}
76096845Smarkm		break;
76196845Smarkm	}
76296845Smarkm	gzclose(in);
76396845Smarkm	sbuf_strip(whatis_proto, " \t.-");
76496845Smarkm	line = sbuf_content(whatis_proto);
76596845Smarkm	/*
76696845Smarkm	 * line now contains the appropriate data, but without
76796845Smarkm	 * the proper indentation or the section appended to each name.
76896845Smarkm	 */
76996845Smarkm	descr = strstr(line, " - ");
77096845Smarkm	if (descr == NULL) {
77196845Smarkm		descr = strchr(line, ' ');
77296845Smarkm		if (descr == NULL) {
77396845Smarkm			if (verbose)
77496845Smarkm				fprintf(stderr, "	ignoring junk description \"%s\"\n", line);
77596845Smarkm			return;
77696845Smarkm		}
77796845Smarkm		*descr++ = '\0';
77896845Smarkm	} else {
77996845Smarkm		*descr = '\0';
78096845Smarkm		descr += 3;
78196845Smarkm	}
78296845Smarkm	names = sl_init();
78396845Smarkm	collect_names(names, line);
78496845Smarkm	sbuf_clear(whatis_final);
78596845Smarkm	if (!sl_find(names, page->name) && no_page_exists(section_dir, names, page->suffix)) {
78696845Smarkm		/*
78796845Smarkm		 * Add the page name since that's the only thing that
78896845Smarkm		 * man(1) will find.
78996845Smarkm		 */
79096845Smarkm		add_whatis_name(page->name, page->suffix);
79196845Smarkm	}
79296845Smarkm	for (i = 0; i < names->sl_cur; i++)
79396845Smarkm		add_whatis_name(names->sl_str[i], page->suffix);
79496845Smarkm	sl_free(names, 0);
79596845Smarkm	sbuf_retract(whatis_final, 2);		/* remove last ", " */
79696845Smarkm	while (sbuf_length(whatis_final) < indent)
79796845Smarkm		sbuf_append(whatis_final, " ", 1);
79896845Smarkm	sbuf_append(whatis_final, " - ", 3);
79996845Smarkm	sbuf_append_str(whatis_final, skip_spaces(descr));
80096845Smarkm	sl_add(whatis_lines, strdup(sbuf_content(whatis_final)));
80196845Smarkm}
80296845Smarkm
80396845Smarkm/*
80496845Smarkm * Sorts pages first by inode number, then by name.
80596845Smarkm */
80696845Smarkmstatic int
80796845Smarkmpagesort(const void *a, const void *b)
80896845Smarkm{
80996845Smarkm	struct page_info *p1 = *(struct page_info **) a;
81096845Smarkm	struct page_info *p2 = *(struct page_info **) b;
81196845Smarkm	if (p1->inode == p2->inode)
81296845Smarkm		return strcmp(p1->name, p2->name);
81396845Smarkm	return p1->inode - p2->inode;
81496845Smarkm}
81596845Smarkm
81696845Smarkm/*
81796845Smarkm * Processes a single man section.
81896845Smarkm */
81996845Smarkmstatic void
82096845Smarkmprocess_section(char *section_dir)
82196845Smarkm{
82296845Smarkm	struct dirent **entries;
82396845Smarkm	int nentries;
82496845Smarkm	struct page_info **pages;
82596845Smarkm	int npages = 0;
82696845Smarkm	int i;
82797976Skeramida	ino_t prev_inode = 0;
82896845Smarkm
82996845Smarkm	if (verbose)
83096845Smarkm		fprintf(stderr, "  %s\n", section_dir);
83196845Smarkm
83296845Smarkm	/*
83396845Smarkm	 * scan the man section directory for pages
83496845Smarkm	 */
83596845Smarkm	nentries = scandir(section_dir, &entries, NULL, alphasort);
83696845Smarkm	if (nentries < 0) {
83796845Smarkm		warn("%s", section_dir);
83896845Smarkm		exit_code = 1;
83996845Smarkm		return;
84096845Smarkm	}
84196845Smarkm	/*
84296845Smarkm	 * collect information about man pages
84396845Smarkm	 */
84496845Smarkm	pages = (struct page_info **) calloc(nentries, sizeof(struct page_info *));
84596845Smarkm	for (i = 0; i < nentries; i++) {
84696845Smarkm		struct page_info *info = new_page_info(section_dir, entries[i]);
84796845Smarkm		if (info != NULL)
84896845Smarkm			pages[npages++] = info;
84996845Smarkm		free(entries[i]);
85096845Smarkm	}
85196845Smarkm	free(entries);
85296845Smarkm	qsort(pages, npages, sizeof(struct page_info *), pagesort);
85396845Smarkm	/*
85496845Smarkm	 * process each unique page
85596845Smarkm	 */
85696845Smarkm	for (i = 0; i < npages; i++) {
85796845Smarkm		struct page_info *page = pages[i];
85896845Smarkm		if (page->inode != prev_inode) {
85996845Smarkm			prev_inode = page->inode;
86096845Smarkm			if (verbose)
86196845Smarkm				fprintf(stderr, "	reading %s\n", page->filename);
86296845Smarkm			process_page(page, section_dir);
86396845Smarkm		} else if (verbose)
86496845Smarkm			fprintf(stderr, "	skipping %s, duplicate\n", page->filename);
86596845Smarkm		free_page_info(page);
86696845Smarkm	}
86796845Smarkm	free(pages);
86896845Smarkm}
86996845Smarkm
87096845Smarkm/*
87196845Smarkm * Returns whether the directory entry is a man page section.
87296845Smarkm */
87396845Smarkmstatic int
87496845Smarkmselect_sections(struct dirent *entry)
87596845Smarkm{
87696845Smarkm	char *p = &entry->d_name[3];
87796845Smarkm
87896845Smarkm	if (strncmp(entry->d_name, "man", 3) != 0)
87996845Smarkm		return 0;
88096845Smarkm	while (*p != '\0') {
88196845Smarkm		if (!isalnum(*p++))
88296845Smarkm			return 0;
88396845Smarkm	}
88496845Smarkm	return 1;
88596845Smarkm}
88696845Smarkm
88796845Smarkm/*
88896845Smarkm * Processes a single top-level man directory by finding all the
88996845Smarkm * sub-directories named man* and processing each one in turn.
89096845Smarkm */
89196845Smarkmstatic void
89296845Smarkmprocess_mandir(char *dir_name)
89396845Smarkm{
89496845Smarkm	struct dirent **entries;
89596845Smarkm	int nsections;
89696845Smarkm	FILE *fp = NULL;
89796845Smarkm	int i;
89896858Sru	struct stat st;
89996845Smarkm
90096845Smarkm	if (already_visited(dir_name))
90196845Smarkm		return;
90296845Smarkm	if (verbose)
90396845Smarkm		fprintf(stderr, "man directory %s\n", dir_name);
90496845Smarkm	nsections = scandir(dir_name, &entries, select_sections, alphasort);
90596845Smarkm	if (nsections < 0) {
90696845Smarkm		warn("%s", dir_name);
90796845Smarkm		exit_code = 1;
90896845Smarkm		return;
90996845Smarkm	}
91096845Smarkm	if (common_output == NULL && (fp = open_whatis(dir_name)) == NULL)
91196845Smarkm		return;
91296845Smarkm	for (i = 0; i < nsections; i++) {
91396845Smarkm		char section_dir[MAXPATHLEN];
91496845Smarkm		snprintf(section_dir, sizeof section_dir, "%s/%s", dir_name, entries[i]->d_name);
91596845Smarkm		process_section(section_dir);
91696858Sru		snprintf(section_dir, sizeof section_dir, "%s/%s/%s", dir_name,
91796858Sru		    entries[i]->d_name, machine);
91896858Sru		if (stat(section_dir, &st) == 0 && S_ISDIR(st.st_mode))
91996858Sru			process_section(section_dir);
92096845Smarkm		free(entries[i]);
92196845Smarkm	}
92296845Smarkm	free(entries);
92396845Smarkm	if (common_output == NULL)
92496845Smarkm		finish_whatis(fp, dir_name);
92596845Smarkm}
92696845Smarkm
92796845Smarkm/*
92896845Smarkm * Processes one argument, which may be a colon-separated list of
92996845Smarkm * directories.
93096845Smarkm */
93196845Smarkmstatic void
93296845Smarkmprocess_argument(const char *arg)
93396845Smarkm{
93496845Smarkm	char *dir;
93596845Smarkm	char *mandir;
93696845Smarkm	char *parg;
93796845Smarkm
93896845Smarkm	parg = strdup(arg);
93996845Smarkm	if (parg == NULL)
94096845Smarkm		err(1, "out of memory");
94196845Smarkm	while ((dir = strsep(&parg, ":")) != NULL) {
94296845Smarkm		if (locale != NULL) {
94396845Smarkm			asprintf(&mandir, "%s/%s", dir, locale);
94496845Smarkm			process_mandir(mandir);
94596845Smarkm			free(mandir);
94696845Smarkm			if (lang_locale != NULL) {
94796845Smarkm				asprintf(&mandir, "%s/%s", dir, lang_locale);
94896845Smarkm				process_mandir(mandir);
94996845Smarkm				free(mandir);
95096845Smarkm			}
95196845Smarkm		} else {
95296845Smarkm			process_mandir(dir);
95396845Smarkm		}
95496845Smarkm	}
95596845Smarkm	free(parg);
95696845Smarkm}
95796845Smarkm
95896845Smarkm
95996845Smarkmint
96096845Smarkmmain(int argc, char **argv)
96196845Smarkm{
96296845Smarkm	int opt;
96396845Smarkm	extern int optind;
96496845Smarkm	extern char *optarg;
96596845Smarkm	FILE *fp = NULL;
96696845Smarkm
96796845Smarkm	while ((opt = getopt(argc, argv, "ai:n:o:vL")) != -1) {
96896845Smarkm		switch (opt) {
96996845Smarkm		case 'a':
97096845Smarkm			append++;
97196845Smarkm			break;
97296845Smarkm		case 'i':
97396845Smarkm			indent = atoi(optarg);
97496845Smarkm			break;
97596845Smarkm		case 'n':
97696845Smarkm			whatis_name = optarg;
97796845Smarkm			break;
97896845Smarkm		case 'o':
97996845Smarkm			common_output = optarg;
98096845Smarkm			break;
98196845Smarkm		case 'v':
98296845Smarkm			verbose++;
98396845Smarkm			break;
98496845Smarkm		case 'L':
98596845Smarkm			locale = getenv("LC_ALL");
98696845Smarkm			if (locale == NULL)
98796845Smarkm				locale = getenv("LC_CTYPE");
98896845Smarkm			if (locale == NULL)
98996845Smarkm				locale = getenv("LANG");
99096845Smarkm			if (locale != NULL) {
99196845Smarkm				char *sep = strchr(locale, '_');
99296845Smarkm				if (sep != NULL && isupper(sep[1]) &&
99396845Smarkm				    isupper(sep[2])) {
99496845Smarkm					asprintf(&lang_locale, "%.*s%s", sep - locale, locale, &sep[3]);
99596845Smarkm				}
99696845Smarkm			}
99796845Smarkm			break;
99896845Smarkm		default:
99996845Smarkm			fprintf(stderr, "usage: %s [-a] [-i indent] [-n name] [-o output_file] [-v] [-L] [directories...]\n", argv[0]);
100096845Smarkm			exit(1);
100196845Smarkm		}
100296845Smarkm	}
100396845Smarkm
100496845Smarkm	signal(SIGINT, trap_signal);
100596845Smarkm	signal(SIGHUP, trap_signal);
100696845Smarkm	signal(SIGQUIT, trap_signal);
100796845Smarkm	signal(SIGTERM, trap_signal);
100896845Smarkm	SLIST_INIT(&visited_dirs);
100996845Smarkm	whatis_proto = new_sbuf();
101096845Smarkm	whatis_final = new_sbuf();
101196845Smarkm
101296858Sru	if ((machine = getenv("MACHINE")) == NULL)
101396858Sru		machine = MACHINE;
101496858Sru
101596845Smarkm	if (common_output != NULL && (fp = open_output(common_output)) == NULL)
101696845Smarkm		err(1, "%s", common_output);
101796845Smarkm	if (optind == argc) {
101896845Smarkm		const char *manpath = getenv("MANPATH");
101996845Smarkm		if (manpath == NULL)
102096845Smarkm			manpath = DEFAULT_MANPATH;
102196845Smarkm		process_argument(manpath);
102296845Smarkm	} else {
102396845Smarkm		while (optind < argc)
102496845Smarkm			process_argument(argv[optind++]);
102596845Smarkm	}
102696845Smarkm	if (common_output != NULL)
102796845Smarkm		finish_output(fp, common_output);
102896845Smarkm	exit(exit_code);
102996845Smarkm}
1030