catman.c revision 139183
196845Smarkm/*-
296845Smarkm * Copyright (c) 2002 John Rochester
396845Smarkm * All rights reserved.
496845Smarkm *
596845Smarkm * Redistribution and use in source and binary forms, with or without
696845Smarkm * modification, are permitted provided that the following conditions
796845Smarkm * are met:
896845Smarkm * 1. Redistributions of source code must retain the above copyright
996845Smarkm *    notice, this list of conditions and the following disclaimer,
1096845Smarkm *    in this position and unchanged.
1196845Smarkm * 2. Redistributions in binary form must reproduce the above copyright
1296845Smarkm *    notice, this list of conditions and the following disclaimer in the
1396845Smarkm *    documentation and/or other materials provided with the distribution.
1496845Smarkm * 3. The name of the author may not be used to endorse or promote products
1596845Smarkm *    derived from this software without specific prior written permission
1696845Smarkm *
1796845Smarkm * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
1896845Smarkm * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
1996845Smarkm * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
2096845Smarkm * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
2196845Smarkm * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
2296845Smarkm * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
2396845Smarkm * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
2496845Smarkm * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
2596845Smarkm * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
2696845Smarkm * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
2796845Smarkm */
2896845Smarkm
2996845Smarkm#include <sys/cdefs.h>
3096845Smarkm__FBSDID("$FreeBSD: head/usr.bin/catman/catman.c 139183 2004-12-22 15:25:51Z ru $");
3196845Smarkm
3296845Smarkm#include <sys/types.h>
3396845Smarkm#include <sys/stat.h>
3496845Smarkm#include <sys/param.h>
3596845Smarkm
3696845Smarkm#include <ctype.h>
3796845Smarkm#include <dirent.h>
3896845Smarkm#include <err.h>
3996845Smarkm#include <fcntl.h>
40116136Sache#include <locale.h>
41116136Sache#include <langinfo.h>
42139182Sru#include <libgen.h>
4396845Smarkm#include <stdio.h>
4496845Smarkm#include <stdlib.h>
4596845Smarkm#include <string.h>
4696845Smarkm#include <unistd.h>
4796845Smarkm
4896845Smarkm#define DEFAULT_MANPATH		"/usr/share/man"
4996845Smarkm
5096845Smarkm#define TOP_LEVEL_DIR	0	/* signifies a top-level man directory */
5196845Smarkm#define MAN_SECTION_DIR	1	/* signifies a man section directory */
5296845Smarkm#define UNKNOWN		2	/* signifies an unclassifiable directory */
5396845Smarkm
5496845Smarkm#define TEST_EXISTS	0x01
5596845Smarkm#define TEST_DIR	0x02
5696845Smarkm#define TEST_FILE	0x04
5796845Smarkm#define TEST_READABLE	0x08
5896845Smarkm#define TEST_WRITABLE	0x10
5996845Smarkm#define TEST_EXECUTABLE	0x20
6096845Smarkm
6196845Smarkmstatic int verbose;		/* -v flag: be verbose with warnings */
6296845Smarkmstatic int pretend;		/* -n, -p flags: print out what would be done
6396845Smarkm				   instead of actually doing it */
6496845Smarkmstatic int force;		/* -f flag: force overwriting all cat pages */
6596845Smarkmstatic int rm_junk;		/* -r flag: remove garbage pages */
6696845Smarkmstatic char *locale;		/* user's locale if -L is used */
6796845Smarkmstatic char *lang_locale;	/* short form of locale */
6896845Smarkmstatic int exit_code;		/* exit code to use when finished */
6996845Smarkm
7096845Smarkm/*
7196845Smarkm * -T argument for nroff
7296845Smarkm */
7396845Smarkmstatic const char *nroff_device = "ascii";
7496845Smarkm
7596845Smarkm/*
7696845Smarkm * Mapping from locale to nroff device
7796845Smarkm */
7896845Smarkmstatic const char *locale_device[] = {
7996845Smarkm	"KOI8-R",	"koi8-r",
8096845Smarkm	"ISO8859-1",	"latin1",
8196845Smarkm	"ISO8859-15",	"latin1",
8296845Smarkm	NULL
8396845Smarkm};
8496845Smarkm
85106120Sobrien#define	BZ2_CMD		"bzip2"
86106120Sobrien#define	BZ2_EXT		".bz2"
87106120Sobrien#define	BZ2CAT_CMD	"bz"
88115207Sru#define	GZ_CMD		"gzip"
89106120Sobrien#define	GZ_EXT		".gz"
90106120Sobrien#define	GZCAT_CMD	"z"
91106120Sobrienenum Ziptype {NONE, BZIP, GZIP};
92106120Sobrien
9396845Smarkmstatic uid_t uid;
9496845Smarkmstatic gid_t gids[NGROUPS_MAX];
9596845Smarkmstatic int ngids;
9696845Smarkmstatic int starting_dir;
9796845Smarkmstatic char tmp_file[MAXPATHLEN];
9896845Smarkmstruct stat test_st;
9996845Smarkm
10096845Smarkm/*
10196845Smarkm * A hashtable is an array of chains composed of this entry structure.
10296845Smarkm */
10396845Smarkmstruct hash_entry {
10496845Smarkm	ino_t		inode_number;
10596845Smarkm	dev_t		device_number;
10696845Smarkm	const char	*data;
10796845Smarkm	struct hash_entry *next;
10896845Smarkm};
10996845Smarkm
11096845Smarkm#define HASHTABLE_ALLOC	16384	/* allocation for hashtable (power of 2) */
11196845Smarkm#define HASH_MASK	(HASHTABLE_ALLOC - 1)
11296845Smarkm
11396845Smarkmstatic struct hash_entry *visited[HASHTABLE_ALLOC];
11496845Smarkmstatic struct hash_entry *links[HASHTABLE_ALLOC];
11596845Smarkm
11696845Smarkm/*
11796845Smarkm * Inserts a string into a hashtable keyed by inode & device number.
11896845Smarkm */
11996845Smarkmstatic void
12096845Smarkminsert_hashtable(struct hash_entry **table,
12196845Smarkm    ino_t inode_number,
12296845Smarkm    dev_t device_number,
12396845Smarkm    const char *data)
12496845Smarkm{
12596845Smarkm	struct hash_entry *new_entry;
12696845Smarkm	struct hash_entry **chain;
12796845Smarkm
12896845Smarkm	new_entry = (struct hash_entry *) malloc(sizeof(struct hash_entry));
12996845Smarkm	if (new_entry == NULL)
13096845Smarkm		err(1, "can't insert into hashtable");
13196845Smarkm	chain = &table[inode_number & HASH_MASK];
13296845Smarkm	new_entry->inode_number = inode_number;
13396845Smarkm	new_entry->device_number = device_number;
13496845Smarkm	new_entry->data = data;
13596845Smarkm	new_entry->next = *chain;
13696845Smarkm	*chain = new_entry;
13796845Smarkm}
13896845Smarkm
13996845Smarkm/*
14096845Smarkm * Finds a string in a hashtable keyed by inode & device number.
14196845Smarkm */
14296845Smarkmstatic const char *
14396845Smarkmfind_hashtable(struct hash_entry **table,
14496845Smarkm    ino_t inode_number,
14596845Smarkm    dev_t device_number)
14696845Smarkm{
14796845Smarkm	struct hash_entry *chain;
14896845Smarkm
14996845Smarkm	chain = table[inode_number & HASH_MASK];
15096845Smarkm	while (chain != NULL) {
15196845Smarkm		if (chain->inode_number == inode_number &&
15296845Smarkm		    chain->device_number == device_number)
15396845Smarkm			return chain->data;
15496845Smarkm		chain = chain->next;
15596845Smarkm	}
15696845Smarkm	return NULL;
15796845Smarkm}
15896845Smarkm
15996845Smarkmstatic void
16096845Smarkmtrap_signal(int sig __unused)
16196845Smarkm{
16296845Smarkm	if (tmp_file[0] != '\0')
16396845Smarkm		unlink(tmp_file);
16496845Smarkm	exit(1);
16596845Smarkm}
16696845Smarkm
16796845Smarkm/*
16896845Smarkm * Deals with junk files in the man or cat section directories.
16996845Smarkm */
17096845Smarkmstatic void
17196845Smarkmjunk(const char *mandir, const char *name, const char *reason)
17296845Smarkm{
17396845Smarkm	if (verbose)
17496845Smarkm		fprintf(stderr, "%s/%s: %s\n", mandir, name, reason);
17596845Smarkm	if (rm_junk) {
17696845Smarkm		fprintf(stderr, "rm %s/%s\n", mandir, name);
17796845Smarkm		if (!pretend && unlink(name) < 0)
17896845Smarkm			warn("%s/%s", mandir, name);
17996845Smarkm	}
18096845Smarkm}
18196845Smarkm
18296845Smarkm/*
18396845Smarkm * Returns TOP_LEVEL_DIR for .../man, MAN_SECTION_DIR for .../manXXX,
18496845Smarkm * and UNKNOWN for everything else.
18596845Smarkm */
18696845Smarkmstatic int
18796845Smarkmdirectory_type(char *dir)
18896845Smarkm{
18996845Smarkm	char *p;
19096845Smarkm
19196845Smarkm	for (;;) {
19296845Smarkm		p = strrchr(dir, '/');
19396845Smarkm		if (p == NULL || p[1] != '\0')
19496845Smarkm			break;
19596845Smarkm		*p = '\0';
19696845Smarkm	}
19796845Smarkm	if (p == NULL)
19896845Smarkm		p = dir;
19996845Smarkm	else
20096845Smarkm		p++;
20196845Smarkm	if (strncmp(p, "man", 3) == 0) {
20296845Smarkm		p += 3;
20396845Smarkm		if (*p == '\0')
20496845Smarkm			return TOP_LEVEL_DIR;
205116137Sache		while (isalnum((unsigned char)*p) || *p == '_') {
20696845Smarkm			if (*++p == '\0')
20796845Smarkm				return MAN_SECTION_DIR;
20896845Smarkm		}
20996845Smarkm	}
21096845Smarkm	return UNKNOWN;
21196845Smarkm}
21296845Smarkm
21396845Smarkm/*
21496845Smarkm * Tests whether the given file name (without a preceding path)
21596845Smarkm * is a proper man page name (like "mk-amd-map.8.gz").
21696845Smarkm * Only alphanumerics and '_' are allowed after the last '.' and
21796845Smarkm * the last '.' can't be the first or last characters.
21896845Smarkm */
21996845Smarkmstatic int
22096845Smarkmis_manpage_name(char *name)
22196845Smarkm{
22296845Smarkm	char *lastdot = NULL;
22396845Smarkm	char *n = name;
22496845Smarkm
22596845Smarkm	while (*n != '\0') {
226116137Sache		if (!isalnum((unsigned char)*n)) {
22796845Smarkm			switch (*n) {
22896845Smarkm			case '_':
22996845Smarkm				break;
23096845Smarkm			case '-':
23196845Smarkm			case '+':
23296845Smarkm			case '[':
23396845Smarkm			case ':':
23496845Smarkm				lastdot = NULL;
23596845Smarkm				break;
23696845Smarkm			case '.':
23796845Smarkm				lastdot = n;
23896845Smarkm				break;
23996845Smarkm			default:
24096845Smarkm				return 0;
24196845Smarkm			}
24296845Smarkm		}
24396845Smarkm		n++;
24496845Smarkm	}
24596845Smarkm	return lastdot > name && lastdot + 1 < n;
24696845Smarkm}
24796845Smarkm
24896845Smarkmstatic int
249106120Sobrienis_bzipped(char *name)
250106120Sobrien{
251106120Sobrien	int len = strlen(name);
252106120Sobrien	return len >= 5 && strcmp(&name[len - 4], BZ2_EXT) == 0;
253106120Sobrien}
254106120Sobrien
255106120Sobrienstatic int
25696845Smarkmis_gzipped(char *name)
25796845Smarkm{
25896845Smarkm	int len = strlen(name);
259106120Sobrien	return len >= 4 && strcmp(&name[len - 3], GZ_EXT) == 0;
26096845Smarkm}
26196845Smarkm
26296845Smarkm/*
26396845Smarkm * Converts manXXX to catXXX.
26496845Smarkm */
26596845Smarkmstatic char *
26696845Smarkmget_cat_section(char *section)
26796845Smarkm{
26896845Smarkm	char *cat_section;
26996845Smarkm
27096845Smarkm	cat_section = strdup(section);
27196845Smarkm	strncpy(cat_section, "cat", 3);
27296845Smarkm	return cat_section;
27396845Smarkm}
27496845Smarkm
27596845Smarkm/*
27696845Smarkm * Tests to see if the given directory has already been visited.
27796845Smarkm */
27896845Smarkmstatic int
27996845Smarkmalready_visited(char *mandir, char *dir, int count_visit)
28096845Smarkm{
28196845Smarkm	struct stat st;
28296845Smarkm
28396845Smarkm	if (stat(dir, &st) < 0) {
28496845Smarkm		if (mandir != NULL)
28596845Smarkm			warn("%s/%s", mandir, dir);
28696845Smarkm		else
28796845Smarkm			warn("%s", dir);
28896845Smarkm		exit_code = 1;
28996845Smarkm		return 1;
29096845Smarkm	}
29196845Smarkm	if (find_hashtable(visited, st.st_ino, st.st_dev) != NULL) {
29296845Smarkm		if (mandir != NULL)
29396845Smarkm			warnx("already visited %s/%s", mandir, dir);
29496845Smarkm		else
29596845Smarkm			warnx("already visited %s", dir);
29696845Smarkm		return 1;
29796845Smarkm	}
29896845Smarkm	if (count_visit)
29996845Smarkm		insert_hashtable(visited, st.st_ino, st.st_dev, "");
30096845Smarkm	return 0;
30196845Smarkm}
30296845Smarkm
30396845Smarkm/*
30496845Smarkm * Returns a set of TEST_* bits describing a file's type and permissions.
30596845Smarkm * If mod_time isn't NULL, it will contain the file's modification time.
30696845Smarkm */
30796845Smarkmstatic int
30896845Smarkmtest_path(char *name, time_t *mod_time)
30996845Smarkm{
31096845Smarkm	int result;
31196845Smarkm
31296845Smarkm	if (stat(name, &test_st) < 0)
31396845Smarkm		return 0;
31496845Smarkm	result = TEST_EXISTS;
31596845Smarkm	if (mod_time != NULL)
31696845Smarkm		*mod_time = test_st.st_mtime;
31796845Smarkm	if (S_ISDIR(test_st.st_mode))
31896845Smarkm		result |= TEST_DIR;
31996845Smarkm	else if (S_ISREG(test_st.st_mode))
32096845Smarkm		result |= TEST_FILE;
32196845Smarkm	if (test_st.st_uid == uid) {
32296845Smarkm		test_st.st_mode >>= 6;
32396845Smarkm	} else {
32496845Smarkm		int i;
32596845Smarkm		for (i = 0; i < ngids; i++) {
32696845Smarkm			if (test_st.st_gid == gids[i]) {
32796845Smarkm				test_st.st_mode >>= 3;
32896845Smarkm				break;
32996845Smarkm			}
33096845Smarkm		}
33196845Smarkm	}
33296845Smarkm	if (test_st.st_mode & S_IROTH)
33396845Smarkm		result |= TEST_READABLE;
33496845Smarkm	if (test_st.st_mode & S_IWOTH)
33596845Smarkm		result |= TEST_WRITABLE;
33696845Smarkm	if (test_st.st_mode & S_IXOTH)
33796845Smarkm		result |= TEST_EXECUTABLE;
33896845Smarkm	return result;
33996845Smarkm}
34096845Smarkm
34196845Smarkm/*
34296845Smarkm * Checks whether a file is a symbolic link.
34396845Smarkm */
34496845Smarkmstatic int
34596845Smarkmis_symlink(char *path)
34696845Smarkm{
34796845Smarkm	struct stat st;
34896845Smarkm
34996845Smarkm	return lstat(path, &st) >= 0 && S_ISLNK(st.st_mode);
35096845Smarkm}
35196845Smarkm
35296845Smarkm/*
35396845Smarkm * Tests to see if the given directory can be written to.
35496845Smarkm */
35596845Smarkmstatic void
35696845Smarkmcheck_writable(char *mandir)
35796845Smarkm{
35896845Smarkm	if (verbose && !(test_path(mandir, NULL) & TEST_WRITABLE))
35996845Smarkm		fprintf(stderr, "%s: not writable - will only be able to write to existing cat directories\n", mandir);
36096845Smarkm}
36196845Smarkm
36296845Smarkm/*
36396845Smarkm * If the directory exists, attempt to make it writable, otherwise
36496845Smarkm * attempt to create it.
36596845Smarkm */
36696845Smarkmstatic int
36796845Smarkmmake_writable_dir(char *mandir, char *dir)
36896845Smarkm{
36996845Smarkm	int test;
37096845Smarkm
37196845Smarkm	if ((test = test_path(dir, NULL)) != 0) {
37296845Smarkm		if (!(test & TEST_WRITABLE) && chmod(dir, 0755) < 0) {
37396845Smarkm			warn("%s/%s: chmod", mandir, dir);
37496845Smarkm			exit_code = 1;
37596845Smarkm			return 0;
37696845Smarkm		}
37796845Smarkm	} else {
37896845Smarkm		if (verbose || pretend)
37996845Smarkm			fprintf(stderr, "mkdir %s\n", dir);
38096845Smarkm		if (!pretend) {
38196845Smarkm			unlink(dir);
38296845Smarkm			if (mkdir(dir, 0755) < 0) {
38396845Smarkm				warn("%s/%s: mkdir", mandir, dir);
38496845Smarkm				exit_code = 1;
38596845Smarkm				return 0;
38696845Smarkm			}
38796845Smarkm		}
38896845Smarkm	}
38996845Smarkm	return 1;
39096845Smarkm}
39196845Smarkm
39296845Smarkm/*
39396845Smarkm * Processes a single man page source by using nroff to create
39496845Smarkm * the preformatted cat page.
39596845Smarkm */
39696845Smarkmstatic void
397106120Sobrienprocess_page(char *mandir, char *src, char *cat, enum Ziptype zipped)
39896845Smarkm{
39996845Smarkm	int src_test, cat_test;
40096845Smarkm	time_t src_mtime, cat_mtime;
40196845Smarkm	char cmd[MAXPATHLEN];
40296845Smarkm	dev_t src_dev;
40396845Smarkm	ino_t src_ino;
40496845Smarkm	const char *link_name;
40596845Smarkm
40696845Smarkm	src_test = test_path(src, &src_mtime);
40796845Smarkm	if (!(src_test & (TEST_FILE|TEST_READABLE))) {
40896845Smarkm		if (!(src_test & TEST_DIR)) {
40996845Smarkm			warnx("%s/%s: unreadable", mandir, src);
41096845Smarkm			exit_code = 1;
41196845Smarkm			if (rm_junk && is_symlink(src))
41296845Smarkm				junk(mandir, src, "bogus symlink");
41396845Smarkm		}
41496845Smarkm		return;
41596845Smarkm	}
41696845Smarkm	src_dev = test_st.st_dev;
41796845Smarkm	src_ino = test_st.st_ino;
41896845Smarkm	cat_test = test_path(cat, &cat_mtime);
41996845Smarkm	if (cat_test & (TEST_FILE|TEST_READABLE)) {
42096845Smarkm		if (!force && cat_mtime >= src_mtime) {
42196845Smarkm			if (verbose) {
42296845Smarkm				fprintf(stderr, "\t%s/%s: up to date\n",
42396845Smarkm				    mandir, src);
42496845Smarkm			}
42596845Smarkm			return;
42696845Smarkm		}
42796845Smarkm	}
42896845Smarkm	/*
42996845Smarkm	 * Is the man page a link to one we've already processed?
43096845Smarkm	 */
43196845Smarkm	if ((link_name = find_hashtable(links, src_ino, src_dev)) != NULL) {
43296845Smarkm		if (verbose || pretend) {
43396845Smarkm			fprintf(stderr, "%slink %s -> %s\n",
43496845Smarkm			    verbose ? "\t" : "", cat, link_name);
43596845Smarkm		}
43696845Smarkm		if (!pretend)
43796845Smarkm			link(link_name, cat);
43896845Smarkm		return;
43996845Smarkm	}
44096845Smarkm	insert_hashtable(links, src_ino, src_dev, strdup(cat));
44196845Smarkm	if (verbose || pretend) {
44296845Smarkm		fprintf(stderr, "%sformat %s -> %s\n",
44396845Smarkm		    verbose ? "\t" : "", src, cat);
44496845Smarkm		if (pretend)
44596845Smarkm			return;
44696845Smarkm	}
44796845Smarkm	snprintf(tmp_file, sizeof tmp_file, "%s.tmp", cat);
44896845Smarkm	snprintf(cmd, sizeof cmd,
449115207Sru	    "%scat %s | tbl | nroff -T%s -man | col | %s > %s.tmp",
450106120Sobrien	    zipped == BZIP ? BZ2CAT_CMD : zipped == GZIP ? GZCAT_CMD : "",
451115207Sru	    src, nroff_device,
452115207Sru	    zipped == BZIP ? BZ2_CMD : zipped == GZIP ? GZ_CMD : "cat",
453115207Sru	    cat);
45496845Smarkm	if (system(cmd) != 0)
45596845Smarkm		err(1, "formatting pipeline");
45696845Smarkm	if (rename(tmp_file, cat) < 0)
45796845Smarkm		warn("%s", cat);
45896845Smarkm	tmp_file[0] = '\0';
45996845Smarkm}
46096845Smarkm
46196845Smarkm/*
46296845Smarkm * Scan the man section directory for pages and process each one,
46396845Smarkm * then check for junk in the corresponding cat section.
46496845Smarkm */
46596845Smarkmstatic void
46696845Smarkmscan_section(char *mandir, char *section, char *cat_section)
46796845Smarkm{
46896845Smarkm	struct dirent **entries;
46996845Smarkm	char **expected = NULL;
47096845Smarkm	int npages;
47196845Smarkm	int nexpected = 0;
47296845Smarkm	int i, e;
473106120Sobrien	enum Ziptype zipped;
47496845Smarkm	char *page_name;
47596845Smarkm	char page_path[MAXPATHLEN];
47696845Smarkm	char cat_path[MAXPATHLEN];
477106120Sobrien	char zip_path[MAXPATHLEN];
47896845Smarkm
47996845Smarkm	/*
48096845Smarkm	 * scan the man section directory for pages
48196845Smarkm	 */
48296845Smarkm	npages = scandir(section, &entries, NULL, alphasort);
48396845Smarkm	if (npages < 0) {
48496845Smarkm		warn("%s/%s", mandir, section);
48596845Smarkm		exit_code = 1;
48696845Smarkm		return;
48796845Smarkm	}
48896845Smarkm	if (verbose || rm_junk) {
48996845Smarkm		/*
49096845Smarkm		 * Maintain a list of all cat pages that should exist,
49196845Smarkm		 * corresponding to existing man pages.
49296845Smarkm		 */
49396845Smarkm		expected = (char **) calloc(npages, sizeof(char *));
49496845Smarkm	}
49596845Smarkm	for (i = 0; i < npages; free(entries[i++])) {
49696845Smarkm		page_name = entries[i]->d_name;
49796845Smarkm		snprintf(page_path, sizeof page_path, "%s/%s", section,
49896845Smarkm		    page_name);
49996845Smarkm		if (!is_manpage_name(page_name)) {
50096845Smarkm			if (!(test_path(page_path, NULL) & TEST_DIR)) {
50196845Smarkm				junk(mandir, page_path,
50296845Smarkm				    "invalid man page name");
50396845Smarkm			}
50496845Smarkm			continue;
50596845Smarkm		}
506106120Sobrien		zipped = is_bzipped(page_name) ? BZIP :
507106120Sobrien		    is_gzipped(page_name) ? GZIP : NONE;
508106120Sobrien		if (zipped != NONE) {
50996845Smarkm			snprintf(cat_path, sizeof cat_path, "%s/%s",
51096845Smarkm			    cat_section, page_name);
51196845Smarkm			if (expected != NULL)
51296845Smarkm				expected[nexpected++] = strdup(page_name);
513106120Sobrien			process_page(mandir, page_path, cat_path, zipped);
51496845Smarkm		} else {
51596845Smarkm			/*
51696845Smarkm			 * We've got an uncompressed man page,
51796845Smarkm			 * check to see if there's a (preferred)
51896845Smarkm			 * compressed one.
51996845Smarkm			 */
520106120Sobrien			snprintf(zip_path, sizeof zip_path, "%s%s",
521106120Sobrien			    page_path, GZ_EXT);
522106120Sobrien			if (test_path(zip_path, NULL) != 0) {
52396845Smarkm				junk(mandir, page_path,
524106120Sobrien				    "man page unused due to existing " GZ_EXT);
52596845Smarkm			} else {
52696845Smarkm				if (verbose) {
52796845Smarkm					fprintf(stderr,
52896845Smarkm						"warning, %s is uncompressed\n",
52996845Smarkm						page_path);
53096845Smarkm				}
531115207Sru				snprintf(cat_path, sizeof cat_path, "%s/%s",
532115207Sru				    cat_section, page_name);
53396845Smarkm				if (expected != NULL) {
53496845Smarkm					asprintf(&expected[nexpected++],
535115207Sru					    "%s", page_name);
53696845Smarkm				}
537106120Sobrien				process_page(mandir, page_path, cat_path, NONE);
53896845Smarkm			}
53996845Smarkm		}
54096845Smarkm	}
54196845Smarkm	free(entries);
54296845Smarkm	if (expected == NULL)
54396845Smarkm	    return;
54496845Smarkm	/*
54596845Smarkm	 * scan cat sections for junk
54696845Smarkm	 */
54796845Smarkm	npages = scandir(cat_section, &entries, NULL, alphasort);
54896845Smarkm	e = 0;
54996845Smarkm	for (i = 0; i < npages; free(entries[i++])) {
55096845Smarkm		const char *junk_reason;
55196845Smarkm		int cmp = 1;
55296845Smarkm
55396845Smarkm		page_name = entries[i]->d_name;
55496845Smarkm		if (strcmp(page_name, ".") == 0 || strcmp(page_name, "..") == 0)
55596845Smarkm			continue;
55696845Smarkm		/*
55796845Smarkm		 * Keep the index into the expected cat page list
55896845Smarkm		 * ahead of the name we've found.
55996845Smarkm		 */
56096845Smarkm		while (e < nexpected &&
56196845Smarkm		    (cmp = strcmp(page_name, expected[e])) > 0)
56296845Smarkm			free(expected[e++]);
56396845Smarkm		if (cmp == 0)
56496845Smarkm			continue;
56596845Smarkm		/* we have an unexpected page */
566139182Sru		snprintf(cat_path, sizeof cat_path, "%s/%s", cat_section,
567139182Sru		    page_name);
56896845Smarkm		if (!is_manpage_name(page_name)) {
569139182Sru			if (test_path(cat_path, NULL) & TEST_DIR)
570139182Sru				continue;
57196845Smarkm			junk_reason = "invalid cat page name";
57296845Smarkm		} else if (!is_gzipped(page_name) && e + 1 < nexpected &&
57396845Smarkm		    strncmp(page_name, expected[e + 1], strlen(page_name)) == 0 &&
57496845Smarkm		    strlen(expected[e + 1]) == strlen(page_name) + 3) {
575106120Sobrien			junk_reason = "cat page unused due to existing " GZ_EXT;
57696845Smarkm		} else
57796845Smarkm			junk_reason = "cat page without man page";
57896845Smarkm		junk(mandir, cat_path, junk_reason);
57996845Smarkm	}
58096845Smarkm	free(entries);
58196845Smarkm	while (e < nexpected)
58296845Smarkm		free(expected[e++]);
58396845Smarkm	free(expected);
58496845Smarkm}
58596845Smarkm
58696845Smarkm
58796845Smarkm/*
58896845Smarkm * Processes a single man section.
58996845Smarkm */
59096845Smarkmstatic void
59196845Smarkmprocess_section(char *mandir, char *section)
59296845Smarkm{
59396845Smarkm	char *cat_section;
59496845Smarkm
59596845Smarkm	if (already_visited(mandir, section, 1))
59696845Smarkm		return;
59796845Smarkm	if (verbose)
59896845Smarkm		fprintf(stderr, "  section %s\n", section);
59996845Smarkm	cat_section = get_cat_section(section);
60096845Smarkm	if (make_writable_dir(mandir, cat_section))
60196845Smarkm		scan_section(mandir, section, cat_section);
602139182Sru	free(cat_section);
60396845Smarkm}
60496845Smarkm
60596845Smarkmstatic int
60696845Smarkmselect_sections(struct dirent *entry)
60796845Smarkm{
60896845Smarkm	return directory_type(entry->d_name) == MAN_SECTION_DIR;
60996845Smarkm}
61096845Smarkm
61196845Smarkm/*
61296845Smarkm * Processes a single top-level man directory.  If section isn't NULL,
61396845Smarkm * it will only process that section sub-directory, otherwise it will
61496845Smarkm * process all of them.
61596845Smarkm */
61696845Smarkmstatic void
61796845Smarkmprocess_mandir(char *dir_name, char *section)
61896845Smarkm{
61996845Smarkm	fchdir(starting_dir);
62096845Smarkm	if (already_visited(NULL, dir_name, section == NULL))
62196845Smarkm		return;
62296845Smarkm	check_writable(dir_name);
62396845Smarkm	if (verbose)
62496845Smarkm		fprintf(stderr, "man directory %s\n", dir_name);
62596845Smarkm	if (pretend)
62696845Smarkm		fprintf(stderr, "cd %s\n", dir_name);
62796845Smarkm	if (chdir(dir_name) < 0) {
62896845Smarkm		warn("%s: chdir", dir_name);
62996845Smarkm		exit_code = 1;
63096845Smarkm		return;
63196845Smarkm	}
63296845Smarkm	if (section != NULL) {
63396845Smarkm		process_section(dir_name, section);
63496845Smarkm	} else {
63596845Smarkm		struct dirent **entries;
63696845Smarkm		int nsections;
63796845Smarkm		int i;
63896845Smarkm
63996845Smarkm		nsections = scandir(".", &entries, select_sections, alphasort);
64096845Smarkm		if (nsections < 0) {
64196845Smarkm			warn("%s", dir_name);
64296845Smarkm			exit_code = 1;
64396845Smarkm			return;
64496845Smarkm		}
64596845Smarkm		for (i = 0; i < nsections; i++) {
64696845Smarkm			process_section(dir_name, entries[i]->d_name);
64796845Smarkm			free(entries[i]);
64896845Smarkm		}
64996845Smarkm		free(entries);
65096845Smarkm	}
65196845Smarkm}
65296845Smarkm
65396845Smarkm/*
65496845Smarkm * Processes one argument, which may be a colon-separated list of
65596845Smarkm * directories.
65696845Smarkm */
65796845Smarkmstatic void
65896845Smarkmprocess_argument(const char *arg)
65996845Smarkm{
66096845Smarkm	char *dir;
66196845Smarkm	char *mandir;
662139182Sru	char *section;
66396845Smarkm	char *parg;
66496845Smarkm
66596845Smarkm	parg = strdup(arg);
66696845Smarkm	if (parg == NULL)
66796845Smarkm		err(1, "out of memory");
66896845Smarkm	while ((dir = strsep(&parg, ":")) != NULL) {
66996845Smarkm		switch (directory_type(dir)) {
67096845Smarkm		case TOP_LEVEL_DIR:
67196845Smarkm			if (locale != NULL) {
67296845Smarkm				asprintf(&mandir, "%s/%s", dir, locale);
67396845Smarkm				process_mandir(mandir, NULL);
67496845Smarkm				free(mandir);
67596845Smarkm				if (lang_locale != NULL) {
67696845Smarkm					asprintf(&mandir, "%s/%s", dir,
67796845Smarkm					    lang_locale);
67896845Smarkm					process_mandir(mandir, NULL);
67996845Smarkm					free(mandir);
68096845Smarkm				}
68196845Smarkm			} else {
68296845Smarkm				process_mandir(dir, NULL);
68396845Smarkm			}
68496845Smarkm			break;
68596845Smarkm		case MAN_SECTION_DIR: {
686139182Sru			mandir = strdup(dirname(dir));
687139182Sru			section = strdup(basename(dir));
688139182Sru			process_mandir(mandir, section);
689139182Sru			free(mandir);
690139182Sru			free(section);
69196845Smarkm			break;
69296845Smarkm			}
69396845Smarkm		default:
69496845Smarkm			warnx("%s: directory name not in proper man form", dir);
69596845Smarkm			exit_code = 1;
69696845Smarkm		}
69796845Smarkm	}
69896845Smarkm	free(parg);
69996845Smarkm}
70096845Smarkm
70196845Smarkmstatic void
70296845Smarkmdetermine_locale(void)
70396845Smarkm{
70496845Smarkm	char *sep;
70596845Smarkm
706116136Sache	if ((locale = setlocale(LC_CTYPE, "")) == NULL) {
707116136Sache		warnx("-L option used, but no locale found\n");
70896845Smarkm		return;
70996845Smarkm	}
71096845Smarkm	sep = strchr(locale, '_');
711116136Sache	if (sep != NULL && isupper((unsigned char)sep[1])
712116136Sache			&& isupper((unsigned char)sep[2])) {
713139183Sru		asprintf(&lang_locale, "%.*s%s", (int)(sep - locale),
714139183Sru		    locale, &sep[3]);
71596845Smarkm	}
716116136Sache	sep = nl_langinfo(CODESET);
717116136Sache	if (sep != NULL && *sep != '\0' && strcmp(sep, "US-ASCII") != 0) {
71896845Smarkm		int i;
71996845Smarkm
72096845Smarkm		for (i = 0; locale_device[i] != NULL; i += 2) {
72196845Smarkm			if (strcmp(sep, locale_device[i]) == 0) {
72296845Smarkm				nroff_device = locale_device[i + 1];
72396845Smarkm				break;
72496845Smarkm			}
72596845Smarkm		}
72696845Smarkm	}
727116136Sache	if (verbose) {
728116136Sache		if (lang_locale != NULL)
729116136Sache			fprintf(stderr, "short locale is %s\n", lang_locale);
73096845Smarkm		fprintf(stderr, "nroff device is %s\n", nroff_device);
731116136Sache	}
73296845Smarkm}
73396845Smarkm
73496845Smarkmstatic void
73596845Smarkmusage(void)
73696845Smarkm{
73796845Smarkm	fprintf(stderr, "usage: %s [-fLnrv] [directories...]\n", getprogname());
73896845Smarkm	exit(1);
73996845Smarkm}
74096845Smarkm
74196845Smarkmint
74296845Smarkmmain(int argc, char **argv)
74396845Smarkm{
74496845Smarkm	int opt;
74596845Smarkm
74696845Smarkm	if ((uid = getuid()) == 0) {
74796845Smarkm		fprintf(stderr, "don't run %s as root, use:\n   echo", argv[0]);
74896845Smarkm		for (optind = 0; optind < argc; optind++) {
74996845Smarkm			fprintf(stderr, " %s", argv[optind]);
75096845Smarkm		}
75196845Smarkm		fprintf(stderr, " | nice -5 su -m man\n");
75296845Smarkm		exit(1);
75396845Smarkm	}
75496845Smarkm	while ((opt = getopt(argc, argv, "vnfLrh")) != -1) {
75596845Smarkm		switch (opt) {
75696845Smarkm		case 'f':
75796845Smarkm			force++;
75896845Smarkm			break;
75996845Smarkm		case 'L':
76096845Smarkm			determine_locale();
76196845Smarkm			break;
76296845Smarkm		case 'n':
76396845Smarkm			pretend++;
76496845Smarkm			break;
76596845Smarkm		case 'r':
76696845Smarkm			rm_junk++;
76796845Smarkm			break;
76896845Smarkm		case 'v':
76996845Smarkm			verbose++;
77096845Smarkm			break;
77196845Smarkm		default:
77296845Smarkm			usage();
77396845Smarkm			/* NOTREACHED */
77496845Smarkm		}
77596845Smarkm	}
77696845Smarkm	ngids = getgroups(NGROUPS_MAX, gids);
77796845Smarkm	if ((starting_dir = open(".", 0)) < 0) {
77896845Smarkm		err(1, ".");
77996845Smarkm	}
78096845Smarkm	umask(022);
78196845Smarkm	signal(SIGINT, trap_signal);
78296845Smarkm	signal(SIGHUP, trap_signal);
78396845Smarkm	signal(SIGQUIT, trap_signal);
78496845Smarkm	signal(SIGTERM, trap_signal);
78596845Smarkm	if (optind == argc) {
78696845Smarkm		const char *manpath = getenv("MANPATH");
78796845Smarkm		if (manpath == NULL)
78896845Smarkm			manpath = DEFAULT_MANPATH;
78996845Smarkm		process_argument(manpath);
79096845Smarkm	} else {
79196845Smarkm		while (optind < argc)
79296845Smarkm			process_argument(argv[optind++]);
79396845Smarkm	}
79496845Smarkm	exit(exit_code);
79596845Smarkm}
796