• Home
  • History
  • Annotate
  • only in this directory
makewhatis.c revision 97102
1229430Spfg/*-
2229430Spfg * Copyright (c) 2002 John Rochester
3229430Spfg * All rights reserved.
4229430Spfg *
5229430Spfg * Redistribution and use in source and binary forms, with or without
6229430Spfg * modification, are permitted provided that the following conditions
7229430Spfg * are met:
8229430Spfg * 1. Redistributions of source code must retain the above copyright
9229430Spfg *    notice, this list of conditions and the following disclaimer,
10229430Spfg *    in this position and unchanged.
11229430Spfg * 2. Redistributions in binary form must reproduce the above copyright
12229430Spfg *    notice, this list of conditions and the following disclaimer in the
13229430Spfg *    documentation and/or other materials provided with the distribution.
14229430Spfg * 3. The name of the author may not be used to endorse or promote products
15229430Spfg *    derived from this software without specific prior written permission
16229430Spfg *
17229430Spfg * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18229430Spfg * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19229430Spfg * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20229430Spfg * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21229430Spfg * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22229430Spfg * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23229430Spfg * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24229430Spfg * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25229430Spfg * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26229430Spfg * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27229430Spfg */
28229430Spfg
29229430Spfg#include <sys/cdefs.h>
30229430Spfg__FBSDID("$FreeBSD: head/usr.bin/makewhatis/makewhatis.c 97102 2002-05-22 11:08:41Z ru $");
31229430Spfg
32229430Spfg#include <sys/types.h>
33229430Spfg#include <sys/stat.h>
34229430Spfg#include <sys/param.h>
35229430Spfg#include <sys/queue.h>
36229430Spfg
37229430Spfg#include <ctype.h>
38229430Spfg#include <dirent.h>
39229430Spfg#include <err.h>
40229430Spfg#include <stdio.h>
41229430Spfg#include <stdlib.h>
42229430Spfg#include <string.h>
43229430Spfg#include <stringlist.h>
44229430Spfg#include <unistd.h>
45229430Spfg#include <zlib.h>
46229430Spfg
47229430Spfg#define DEFAULT_MANPATH		"/usr/share/man"
48229430Spfg#define LINE_ALLOC		4096
49229430Spfg
50229430Spfgstatic char blank[] = 		"";
51229430Spfg
52229430Spfg/*
53229430Spfg * Information collected about each man page in a section.
54229430Spfg */
55229430Spfgstruct page_info {
56229430Spfg	char *	filename;
57229430Spfg	char *	name;
58229430Spfg	char *	suffix;
59229430Spfg	int	gzipped;
60229430Spfg	ino_t	inode;
61229430Spfg};
62229430Spfg
63229430Spfg/*
64229430Spfg * An entry kept for each visited directory.
65229430Spfg */
66229430Spfgstruct visited_dir {
67229430Spfg	dev_t		device;
68229430Spfg	ino_t		inode;
69229430Spfg	SLIST_ENTRY(visited_dir)	next;
70229430Spfg};
71229430Spfg
72229430Spfg/*
73229430Spfg * an expanding string
74229430Spfg */
75229430Spfgstruct sbuf {
76229430Spfg	char *	content;		/* the start of the buffer */
77229430Spfg	char *	end;			/* just past the end of the content */
78229430Spfg	char *	last;			/* the last allocated character */
79229430Spfg};
80229430Spfg
81229430Spfg/*
82229430Spfg * Removes the last amount characters from the sbuf.
83229430Spfg */
84229430Spfg#define sbuf_retract(sbuf, amount)	\
85229430Spfg	((sbuf)->end -= (amount))
86229430Spfg/*
87229430Spfg * Returns the length of the sbuf content.
88229430Spfg */
89229430Spfg#define sbuf_length(sbuf)		\
90229430Spfg	((sbuf)->end - (sbuf)->content)
91229430Spfg
92229430Spfgtypedef char *edited_copy(char *from, char *to, int length);
93229430Spfg
94229430Spfgstatic int append;			/* -a flag: append to existing whatis */
95229430Spfgstatic int verbose;			/* -v flag: be verbose with warnings */
96229430Spfgstatic int indent = 24;			/* -i option: description indentation */
97229430Spfgstatic const char *whatis_name="whatis";/* -n option: the name */
98229430Spfgstatic char *common_output;		/* -o option: the single output file */
99229430Spfgstatic char *locale;			/* user's locale if -L is used */
100229430Spfgstatic char *lang_locale;		/* short form of locale */
101229430Spfgstatic char *machine;
102229430Spfg
103229430Spfgstatic int exit_code;			/* exit code to use when finished */
104229430Spfgstatic SLIST_HEAD(, visited_dir) visited_dirs =
105229430Spfg    SLIST_HEAD_INITIALIZER(visited_dirs);
106229430Spfg
107229430Spfg/*
108229430Spfg * While the whatis line is being formed, it is stored in whatis_proto.
109229430Spfg * When finished, it is reformatted into whatis_final and then appended
110229430Spfg * to whatis_lines.
111229430Spfg */
112229430Spfgstatic struct sbuf *whatis_proto;
113229430Spfgstatic struct sbuf *whatis_final;
114229430Spfgstatic StringList *whatis_lines;	/* collected output lines */
115229430Spfg
116229430Spfgstatic char tmp_file[MAXPATHLEN];	/* path of temporary file, if any */
117229430Spfg
118229430Spfg/* A set of possible names for the NAME man page section */
119229430Spfgstatic const char *name_section_titles[] = {
120229430Spfg	"NAME", "Name", "NAMN", "BEZEICHNUNG", "\xcc\xbe\xbe\xce",
121229430Spfg	"\xee\xe1\xfa\xf7\xe1\xee\xe9\xe5", NULL
122229430Spfg};
123229430Spfg
124229430Spfg/* A subset of the mdoc(7) commands to ignore */
125229430Spfgstatic char mdoc_commands[] = "ArDvErEvFlLiNmPa";
126229430Spfg
127229430Spfg/*
128229430Spfg * Frees a struct page_info and its content.
129229430Spfg */
130229430Spfgstatic void
131229430Spfgfree_page_info(struct page_info *info)
132229430Spfg{
133229430Spfg	free(info->filename);
134229430Spfg	free(info->name);
135229430Spfg	free(info->suffix);
136229430Spfg	free(info);
137229430Spfg}
138229430Spfg
139229430Spfg/*
140229430Spfg * Allocates and fills in a new struct page_info given the
141229430Spfg * name of the man section directory and the dirent of the file.
142229430Spfg * If the file is not a man page, returns NULL.
143229430Spfg */
144229430Spfgstatic struct page_info *
145229430Spfgnew_page_info(char *dir, struct dirent *dirent)
146229430Spfg{
147229430Spfg	struct page_info *info;
148229430Spfg	int basename_length;
149229430Spfg	char *suffix;
150229430Spfg	struct stat st;
151229430Spfg
152229430Spfg	info = (struct page_info *) malloc(sizeof(struct page_info));
153229430Spfg	if (info == NULL)
154229430Spfg		err(1, "malloc");
155229430Spfg	basename_length = strlen(dirent->d_name);
156229430Spfg	suffix = &dirent->d_name[basename_length];
157229430Spfg	asprintf(&info->filename, "%s/%s", dir, dirent->d_name);
158229430Spfg	if ((info->gzipped = basename_length >= 4 && strcmp(&dirent->d_name[basename_length - 3], ".gz") == 0)) {
159229430Spfg		suffix -= 3;
160229430Spfg		*suffix = '\0';
161229430Spfg	}
162229430Spfg	for (;;) {
163229430Spfg		if (--suffix == dirent->d_name || !isalnum(*suffix)) {
164229430Spfg			if (*suffix == '.')
165229430Spfg				break;
166229430Spfg			if (verbose)
167229430Spfg				warnx("%s: invalid man page name", info->filename);
168229430Spfg			free(info->filename);
169229430Spfg			free(info);
170229430Spfg			return NULL;
171229981Spfg		}
172229981Spfg	}
173229430Spfg	*suffix++ = '\0';
174229430Spfg	info->name = strdup(dirent->d_name);
175229430Spfg	info->suffix = strdup(suffix);
176229430Spfg	if (stat(info->filename, &st) < 0) {
177229430Spfg		warn("%s", info->filename);
178229430Spfg		free_page_info(info);
179229430Spfg		return NULL;
180229430Spfg	}
181229430Spfg	if (!S_ISREG(st.st_mode)) {
182229430Spfg		if (verbose && !S_ISDIR(st.st_mode))
183229430Spfg			warnx("%s: not a regular file", info->filename);
184229430Spfg		free_page_info(info);
185229430Spfg		return NULL;
186229430Spfg	}
187229430Spfg	info->inode = st.st_ino;
188229430Spfg	return info;
189229430Spfg}
190229430Spfg
191229430Spfg/*
192229430Spfg * Reset an sbuf's length to 0.
193229430Spfg */
194229430Spfgstatic void
195229430Spfgsbuf_clear(struct sbuf *sbuf)
196229430Spfg{
197229430Spfg	sbuf->end = sbuf->content;
198229430Spfg}
199229430Spfg
200229430Spfg/*
201229430Spfg * Allocate a new sbuf.
202229430Spfg */
203229430Spfgstatic struct sbuf *
204229430Spfgnew_sbuf(void)
205229430Spfg{
206229430Spfg	struct sbuf *sbuf = (struct sbuf *) malloc(sizeof(struct sbuf));
207229430Spfg	sbuf->content = (char *) malloc(LINE_ALLOC);
208229430Spfg	sbuf->last = sbuf->content + LINE_ALLOC - 1;
209229430Spfg	sbuf_clear(sbuf);
210229430Spfg	return sbuf;
211229430Spfg}
212229430Spfg
213229430Spfg/*
214229430Spfg * Ensure that there is enough room in the sbuf for chars more characters.
215229430Spfg */
216229430Spfgstatic void
217229430Spfgsbuf_need(struct sbuf *sbuf, int nchars)
218229430Spfg{
219229430Spfg	/* let's assume we only need to double it, but check just in case */
220229430Spfg	while (sbuf->end + nchars > sbuf->last) {
221229430Spfg		int alloc;
222229430Spfg		char *new_content;
223229430Spfg
224229430Spfg		alloc = (sbuf->last - sbuf->content + 1) * 2;
225229430Spfg		new_content = (char *) malloc(alloc);
226229430Spfg		memcpy(new_content, sbuf->content, sbuf->end - sbuf->content);
227229430Spfg		sbuf->end = new_content + (sbuf->end - sbuf->content);
228229430Spfg		free(sbuf->content);
229229430Spfg		sbuf->content = new_content;
230229430Spfg	}
231229430Spfg}
232229430Spfg
233229430Spfg/*
234229430Spfg * Appends a string of a given length to the sbuf.
235229430Spfg */
236229430Spfgstatic void
237229430Spfgsbuf_append(struct sbuf *sbuf, const char *text, int length)
238229430Spfg{
239229430Spfg	if (length > 0) {
240229430Spfg		sbuf_need(sbuf, length);
241229430Spfg		memcpy(sbuf->end, text, length);
242229430Spfg		sbuf->end += length;
243229430Spfg	}
244229430Spfg}
245229430Spfg
246229430Spfg/*
247229430Spfg * Appends a null-terminated string to the sbuf.
248229430Spfg */
249229430Spfgstatic void
250229430Spfgsbuf_append_str(struct sbuf *sbuf, char *text)
251229430Spfg{
252229430Spfg	sbuf_append(sbuf, text, strlen(text));
253229430Spfg}
254229430Spfg
255229430Spfg/*
256229430Spfg * Appends an edited null-terminated string to the sbuf.
257229430Spfg */
258229430Spfgstatic void
259229430Spfgsbuf_append_edited(struct sbuf *sbuf, char *text, edited_copy copy)
260229430Spfg{
261229430Spfg	int length = strlen(text);
262229430Spfg	if (length > 0) {
263229430Spfg		sbuf_need(sbuf, length);
264229430Spfg		sbuf->end = copy(text, sbuf->end, length);
265229430Spfg	}
266229430Spfg}
267229430Spfg
268229430Spfg/*
269229430Spfg * Strips any of a set of chars from the end of the sbuf.
270229430Spfg */
271229430Spfgstatic void
272229430Spfgsbuf_strip(struct sbuf *sbuf, const char *set)
273229430Spfg{
274229430Spfg	while (sbuf->end > sbuf->content && strchr(set, sbuf->end[-1]) != NULL)
275229430Spfg		sbuf->end--;
276229430Spfg}
277229430Spfg
278229430Spfg/*
279229430Spfg * Returns the null-terminated string built by the sbuf.
280229430Spfg */
281229430Spfgstatic char *
282229430Spfgsbuf_content(struct sbuf *sbuf)
283229430Spfg{
284229430Spfg	*sbuf->end = '\0';
285229430Spfg	return sbuf->content;
286229430Spfg}
287229430Spfg
288229430Spfg/*
289229430Spfg * Returns true if no man page exists in the directory with
290229430Spfg * any of the names in the StringList.
291229430Spfg */
292229430Spfgstatic int
293229430Spfgno_page_exists(char *dir, StringList *names, char *suffix)
294229430Spfg{
295229430Spfg	char path[MAXPATHLEN];
296229430Spfg	int i;
297229430Spfg
298229430Spfg	for (i = 0; i < names->sl_cur; i++) {
299229430Spfg		snprintf(path, sizeof path, "%s/%s.%s.gz", dir, names->sl_str[i], suffix);
300229430Spfg		if (access(path, F_OK) < 0) {
301229430Spfg			path[strlen(path) - 3] = '\0';
302229430Spfg			if (access(path, F_OK) < 0)
303229430Spfg				continue;
304229430Spfg		}
305229430Spfg		return 0;
306229430Spfg	}
307229430Spfg	return 1;
308229430Spfg}
309229430Spfg
310229430Spfgstatic void
311229430Spfgtrap_signal(int sig __unused)
312229430Spfg{
313229430Spfg	if (tmp_file[0] != '\0')
314229430Spfg		unlink(tmp_file);
315229430Spfg	exit(1);
316229430Spfg}
317229430Spfg
318229430Spfg/*
319229430Spfg * Attempts to open an output file.  Returns NULL if unsuccessful.
320229430Spfg */
321229430Spfgstatic FILE *
322229430Spfgopen_output(char *name)
323229430Spfg{
324229430Spfg	FILE *output;
325229430Spfg
326229430Spfg	whatis_lines = sl_init();
327229430Spfg	if (append) {
328229430Spfg		char line[LINE_ALLOC];
329229430Spfg
330229430Spfg		output = fopen(name, "r");
331229430Spfg		if (output == NULL) {
332229430Spfg			warn("%s", name);
333229430Spfg			exit_code = 1;
334229430Spfg			return NULL;
335229430Spfg		}
336229430Spfg		while (fgets(line, sizeof line, output) != NULL) {
337229430Spfg			line[strlen(line) - 1] = '\0';
338229430Spfg			sl_add(whatis_lines, strdup(line));
339229430Spfg		}
340229430Spfg	}
341229430Spfg	if (common_output == NULL) {
342229430Spfg		snprintf(tmp_file, sizeof tmp_file, "%s.tmp", name);
343229430Spfg		name = tmp_file;
344229430Spfg	}
345229430Spfg	output = fopen(name, "w");
346229430Spfg	if (output == NULL) {
347229430Spfg		warn("%s", name);
348229430Spfg		exit_code = 1;
349229430Spfg		return NULL;
350229430Spfg	}
351229430Spfg	return output;
352229430Spfg}
353229430Spfg
354229430Spfgstatic int
355229430Spfglinesort(const void *a, const void *b)
356229430Spfg{
357229430Spfg	return strcmp((const char *)(*(const char **)a), (const char *)(*(const char **)b));
358229430Spfg}
359229430Spfg
360229430Spfg/*
361229430Spfg * Writes the unique sorted lines to the output file.
362229430Spfg */
363229430Spfgstatic void
364229430Spfgfinish_output(FILE *output, char *name)
365229430Spfg{
366229430Spfg	int i;
367229430Spfg	char *prev = NULL;
368229430Spfg
369229430Spfg	qsort(whatis_lines->sl_str, whatis_lines->sl_cur, sizeof(char *), linesort);
370229430Spfg	for (i = 0; i < whatis_lines->sl_cur; i++) {
371229430Spfg		char *line = whatis_lines->sl_str[i];
372229430Spfg		if (i > 0 && strcmp(line, prev) == 0)
373229430Spfg			continue;
374229430Spfg		prev = line;
375229430Spfg		fputs(line, output);
376229430Spfg		putc('\n', output);
377229430Spfg	}
378229430Spfg	fclose(output);
379229430Spfg	sl_free(whatis_lines, 1);
380229430Spfg	if (common_output == NULL) {
381229430Spfg		rename(tmp_file, name);
382229430Spfg		unlink(tmp_file);
383229430Spfg	}
384229430Spfg}
385229430Spfg
386229430Spfgstatic FILE *
387229430Spfgopen_whatis(char *mandir)
388229430Spfg{
389229430Spfg	char filename[MAXPATHLEN];
390229430Spfg
391229430Spfg	snprintf(filename, sizeof filename, "%s/%s", mandir, whatis_name);
392229430Spfg	return open_output(filename);
393229430Spfg}
394229430Spfg
395229430Spfgstatic void
396229430Spfgfinish_whatis(FILE *output, char *mandir)
397229430Spfg{
398229430Spfg	char filename[MAXPATHLEN];
399229430Spfg
400229430Spfg	snprintf(filename, sizeof filename, "%s/%s", mandir, whatis_name);
401229430Spfg	finish_output(output, filename);
402229430Spfg}
403229430Spfg
404229430Spfg/*
405229430Spfg * Tests to see if the given directory has already been visited.
406229430Spfg */
407229430Spfgstatic int
408229430Spfgalready_visited(char *dir)
409229430Spfg{
410229430Spfg	struct stat st;
411229430Spfg	struct visited_dir *visit;
412229430Spfg
413229430Spfg	if (stat(dir, &st) < 0) {
414229430Spfg		warn("%s", dir);
415229430Spfg		exit_code = 1;
416229430Spfg		return 1;
417229430Spfg	}
418229430Spfg	SLIST_FOREACH(visit, &visited_dirs, next) {
419229430Spfg		if (visit->inode == st.st_ino &&
420229430Spfg		    visit->device == st.st_dev) {
421229430Spfg			warnx("already visited %s", dir);
422229430Spfg			return 1;
423229430Spfg		}
424229430Spfg	}
425229430Spfg	visit = (struct visited_dir *) malloc(sizeof(struct visited_dir));
426229430Spfg	visit->device = st.st_dev;
427229430Spfg	visit->inode = st.st_ino;
428229430Spfg	SLIST_INSERT_HEAD(&visited_dirs, visit, next);
429229430Spfg	return 0;
430229430Spfg}
431229430Spfg
432229430Spfg/*
433229430Spfg * Removes trailing spaces from a string, returning a pointer to just
434229430Spfg * beyond the new last character.
435229430Spfg */
436229430Spfgstatic char *
437229430Spfgtrim_rhs(char *str)
438229430Spfg{
439229430Spfg	char *rhs = &str[strlen(str)];
440229430Spfg	while (--rhs > str && isspace(*rhs))
441229430Spfg		;
442229430Spfg	*++rhs = '\0';
443229430Spfg	return rhs;
444229430Spfg}
445229430Spfg
446229430Spfg/*
447229430Spfg * Returns a pointer to the next non-space character in the string.
448229430Spfg */
449229430Spfgstatic char *
450229430Spfgskip_spaces(char *s)
451229430Spfg{
452229430Spfg	while (*s != '\0' && isspace(*s))
453229430Spfg		s++;
454229430Spfg	return s;
455229430Spfg}
456229430Spfg
457229430Spfg/*
458229430Spfg * Returns whether the string contains only digits.
459229430Spfg */
460229430Spfgstatic int
461229430Spfgonly_digits(char *line)
462229430Spfg{
463229430Spfg	if (!isdigit(*line++))
464229430Spfg		return 0;
465229430Spfg	while (isdigit(*line))
466229430Spfg		line++;
467229430Spfg	return *line == '\0';
468229430Spfg}
469229430Spfg
470229430Spfg/*
471229430Spfg * Returns whether the line is of one of the forms:
472229430Spfg *	.Sh NAME
473229430Spfg *	.Sh "NAME"
474229430Spfg *	etc.
475229430Spfg * assuming that section_start is ".Sh".
476229430Spfg */
477229430Spfgstatic int
478229430Spfgname_section_line(char *line, const char *section_start)
479229430Spfg{
480229430Spfg	char *rhs;
481229430Spfg	const char **title;
482229430Spfg
483229430Spfg	if (strncmp(line, section_start, 3) != 0)
484229430Spfg		return 0;
485229430Spfg	line = skip_spaces(line + 3);
486229430Spfg	rhs = trim_rhs(line);
487229430Spfg	if (*line == '"') {
488229430Spfg		line++;
489229430Spfg		if (*--rhs == '"')
490229430Spfg			*rhs = '\0';
491229430Spfg	}
492229430Spfg	for (title = name_section_titles; *title != NULL; title++)
493229430Spfg		if (strcmp(*title, line) == 0)
494229430Spfg			return 1;
495229430Spfg	return 0;
496229430Spfg}
497229430Spfg
498229430Spfg/*
499229430Spfg * Copies characters while removing the most common nroff/troff
500229430Spfg * markup:
501229430Spfg *	\(em, \(mi, \s[+-N], \&
502229430Spfg *	\fF, \f(fo, \f[font]
503229430Spfg *	\*s, \*(st, \*[stringvar]
504229430Spfg */
505229430Spfgstatic char *
506229430Spfgde_nroff_copy(char *from, char *to, int fromlen)
507229430Spfg{
508229430Spfg	char *from_end = &from[fromlen];
509229430Spfg	while (from < from_end) {
510229430Spfg		switch (*from) {
511229430Spfg		case '\\':
512229430Spfg			switch (*++from) {
513229430Spfg			case '(':
514229430Spfg				if (strncmp(&from[1], "em", 2) == 0 ||
515229430Spfg						strncmp(&from[1], "mi", 2) == 0) {
516229430Spfg					from += 3;
517229430Spfg					continue;
518229430Spfg				}
519229430Spfg				break;
520229430Spfg			case 's':
521229430Spfg				if (*++from == '-')
522229430Spfg					from++;
523229430Spfg				while (isdigit(*from))
524229430Spfg					from++;
525229430Spfg				continue;
526229430Spfg			case 'f':
527229430Spfg			case '*':
528229430Spfg				if (*++from == '(')
529229430Spfg					from += 3;
530229430Spfg				else if (*from == '[') {
531229430Spfg					while (*++from != ']' && from < from_end);
532229430Spfg					from++;
533229430Spfg				} else
534229430Spfg					from++;
535229981Spfg				continue;
536229981Spfg			case '&':
537229430Spfg				from++;
538229430Spfg				continue;
539229430Spfg			}
540229430Spfg			break;
541229430Spfg		}
542229430Spfg		*to++ = *from++;
543229430Spfg	}
544229430Spfg	return to;
545229430Spfg}
546229430Spfg
547229430Spfg/*
548229430Spfg * Appends a string with the nroff formatting removed.
549229430Spfg */
550229430Spfgstatic void
551229430Spfgadd_nroff(char *text)
552229430Spfg{
553229430Spfg	sbuf_append_edited(whatis_proto, text, de_nroff_copy);
554229430Spfg}
555229430Spfg
556229430Spfg/*
557229430Spfg * Appends "name(suffix), " to whatis_final.
558229430Spfg */
559229430Spfgstatic void
560229430Spfgadd_whatis_name(char *name, char *suffix)
561229430Spfg{
562229430Spfg	if (*name != '\0') {
563229430Spfg		sbuf_append_str(whatis_final, name);
564229430Spfg		sbuf_append(whatis_final, "(", 1);
565229430Spfg		sbuf_append_str(whatis_final, suffix);
566229430Spfg		sbuf_append(whatis_final, "), ", 3);
567229430Spfg	}
568229430Spfg}
569229430Spfg
570229430Spfg/*
571229430Spfg * Processes an old-style man(7) line.  This ignores commands with only
572229430Spfg * a single number argument.
573229430Spfg */
574229430Spfgstatic void
575229430Spfgprocess_man_line(char *line)
576229430Spfg{
577229430Spfg	if (*line == '.') {
578229430Spfg		while (isalpha(*++line))
579229430Spfg			;
580229430Spfg		line = skip_spaces(line);
581229430Spfg		if (only_digits(line))
582229430Spfg			return;
583229430Spfg	} else
584229430Spfg		line = skip_spaces(line);
585229430Spfg	if (*line != '\0') {
586229430Spfg		add_nroff(line);
587229430Spfg		sbuf_append(whatis_proto, " ", 1);
588229430Spfg	}
589229430Spfg}
590229430Spfg
591229430Spfg/*
592229430Spfg * Processes a new-style mdoc(7) line.
593229430Spfg */
594229430Spfgstatic void
595229430Spfgprocess_mdoc_line(char *line)
596229430Spfg{
597229430Spfg	int xref;
598229430Spfg	int arg = 0;
599229430Spfg	char *line_end = &line[strlen(line)];
600229430Spfg	int orig_length = sbuf_length(whatis_proto);
601229430Spfg	char *next;
602229430Spfg
603229430Spfg	if (*line == '\0')
604229430Spfg		return;
605229430Spfg	if (line[0] != '.' || !isupper(line[1]) || !islower(line[2])) {
606229430Spfg		add_nroff(skip_spaces(line));
607229430Spfg		sbuf_append(whatis_proto, " ", 1);
608229430Spfg		return;
609229430Spfg	}
610229430Spfg	xref = strncmp(line, ".Xr", 3) == 0;
611229430Spfg	line += 3;
612229430Spfg	while ((line = skip_spaces(line)) < line_end) {
613229430Spfg		if (*line == '"') {
614229430Spfg			next = ++line;
615229430Spfg			for (;;) {
616229430Spfg				next = strchr(next, '"');
617229430Spfg				if (next == NULL)
618229430Spfg					break;
619229430Spfg				strcpy(next, &next[1]);
620229430Spfg				line_end--;
621229430Spfg				if (*next != '"')
622229430Spfg					break;
623229430Spfg				next++;
624229430Spfg			}
625229430Spfg		} else
626229430Spfg			next = strpbrk(line, " \t");
627229430Spfg		if (next != NULL)
628229430Spfg			*next++ = '\0';
629229430Spfg		else
630229430Spfg			next = line_end;
631229430Spfg		if (isupper(*line) && islower(line[1]) && line[2] == '\0') {
632229430Spfg			if (strcmp(line, "Ns") == 0) {
633229430Spfg				arg = 0;
634229430Spfg				line = next;
635229430Spfg				continue;
636229430Spfg			}
637229430Spfg			if (strstr(mdoc_commands, line) != NULL) {
638229430Spfg				line = next;
639229430Spfg				continue;
640229430Spfg			}
641229430Spfg		}
642229430Spfg		if (arg > 0 && strchr(",.:;?!)]", *line) == 0) {
643229430Spfg			if (xref) {
644229430Spfg				sbuf_append(whatis_proto, "(", 1);
645229430Spfg				add_nroff(line);
646229430Spfg				sbuf_append(whatis_proto, ")", 1);
647229430Spfg				xref = 0;
648229430Spfg				line = blank;
649229430Spfg			} else
650229430Spfg				sbuf_append(whatis_proto, " ", 1);
651229430Spfg		}
652229430Spfg		add_nroff(line);
653229430Spfg		arg++;
654229430Spfg		line = next;
655229430Spfg	}
656229430Spfg	if (sbuf_length(whatis_proto) > orig_length)
657229430Spfg		sbuf_append(whatis_proto, " ", 1);
658229430Spfg}
659229430Spfg
660229430Spfg/*
661229430Spfg * Collects a list of comma-separated names from the text.
662229430Spfg */
663229430Spfgstatic void
664229430Spfgcollect_names(StringList *names, char *text)
665229430Spfg{
666229430Spfg	char *arg;
667229430Spfg
668229430Spfg	for (;;) {
669229430Spfg		arg = text;
670229430Spfg		text = strchr(text, ',');
671229430Spfg		if (text != NULL)
672229430Spfg			*text++ = '\0';
673229430Spfg		sl_add(names, arg);
674229430Spfg		if (text == NULL)
675229430Spfg			return;
676229430Spfg		if (*text == ' ')
677229430Spfg			text++;
678229430Spfg	}
679229430Spfg}
680229430Spfg
681229430Spfgenum { STATE_UNKNOWN, STATE_MANSTYLE, STATE_MDOCNAME, STATE_MDOCDESC };
682229430Spfg
683229430Spfg/*
684229430Spfg * Processes a man page source into a single whatis line and adds it
685229430Spfg * to whatis_lines.
686229430Spfg */
687229430Spfgstatic void
688229430Spfgprocess_page(struct page_info *page, char *section_dir)
689229430Spfg{
690	gzFile *in;
691	char buffer[4096];
692	char *line;
693	StringList *names;
694	char *descr;
695	int state = STATE_UNKNOWN;
696	int i;
697
698	sbuf_clear(whatis_proto);
699	if ((in = gzopen(page->filename, "r")) == NULL) {
700		warn("%s", page->filename);
701		exit_code = 1;
702		return;
703	}
704	while (gzgets(in, buffer, sizeof buffer) != NULL) {
705		line = buffer;
706		if (strncmp(line, ".\\\"", 3) == 0)		/* ignore comments */
707			continue;
708		switch (state) {
709		/*
710		 * haven't reached the NAME section yet.
711		 */
712		case STATE_UNKNOWN:
713			if (name_section_line(line, ".SH"))
714				state = STATE_MANSTYLE;
715			else if (name_section_line(line, ".Sh"))
716				state = STATE_MDOCNAME;
717			continue;
718		/*
719		 * Inside an old-style .SH NAME section.
720		 */
721		case STATE_MANSTYLE:
722			if (strncmp(line, ".SH", 3) == 0)
723				break;
724			trim_rhs(line);
725			if (strcmp(line, ".") == 0)
726				continue;
727			if (strncmp(line, ".IX", 3) == 0) {
728				line += 3;
729				line = skip_spaces(line);
730			}
731			process_man_line(line);
732			continue;
733		/*
734		 * Inside a new-style .Sh NAME section (the .Nm part).
735		 */
736		case STATE_MDOCNAME:
737			trim_rhs(line);
738			if (strncmp(line, ".Nm", 3) == 0) {
739				process_mdoc_line(line);
740				continue;
741			} else {
742				if (strcmp(line, ".") == 0)
743					continue;
744				sbuf_append(whatis_proto, "- ", 2);
745				state = STATE_MDOCDESC;
746			}
747			/* fall through */
748		/*
749		 * Inside a new-style .Sh NAME section (after the .Nm-s).
750		 */
751		case STATE_MDOCDESC:
752			if (strncmp(line, ".Sh", 3) == 0)
753				break;
754			trim_rhs(line);
755			if (strcmp(line, ".") == 0)
756				continue;
757			process_mdoc_line(line);
758			continue;
759		}
760		break;
761	}
762	gzclose(in);
763	sbuf_strip(whatis_proto, " \t.-");
764	line = sbuf_content(whatis_proto);
765	/*
766	 * line now contains the appropriate data, but without
767	 * the proper indentation or the section appended to each name.
768	 */
769	descr = strstr(line, " - ");
770	if (descr == NULL) {
771		descr = strchr(line, ' ');
772		if (descr == NULL) {
773			if (verbose)
774				fprintf(stderr, "	ignoring junk description \"%s\"\n", line);
775			return;
776		}
777		*descr++ = '\0';
778	} else {
779		*descr = '\0';
780		descr += 3;
781	}
782	names = sl_init();
783	collect_names(names, line);
784	sbuf_clear(whatis_final);
785	if (!sl_find(names, page->name) && no_page_exists(section_dir, names, page->suffix)) {
786		/*
787		 * Add the page name since that's the only thing that
788		 * man(1) will find.
789		 */
790		add_whatis_name(page->name, page->suffix);
791	}
792	for (i = 0; i < names->sl_cur; i++)
793		add_whatis_name(names->sl_str[i], page->suffix);
794	sl_free(names, 0);
795	sbuf_retract(whatis_final, 2);		/* remove last ", " */
796	while (sbuf_length(whatis_final) < indent)
797		sbuf_append(whatis_final, " ", 1);
798	sbuf_append(whatis_final, " - ", 3);
799	sbuf_append_str(whatis_final, skip_spaces(descr));
800	sl_add(whatis_lines, strdup(sbuf_content(whatis_final)));
801}
802
803/*
804 * Sorts pages first by inode number, then by name.
805 */
806static int
807pagesort(const void *a, const void *b)
808{
809	struct page_info *p1 = *(struct page_info **) a;
810	struct page_info *p2 = *(struct page_info **) b;
811	if (p1->inode == p2->inode)
812		return strcmp(p1->name, p2->name);
813	return p1->inode - p2->inode;
814}
815
816/*
817 * Processes a single man section.
818 */
819static void
820process_section(char *section_dir)
821{
822	struct dirent **entries;
823	int nentries;
824	struct page_info **pages;
825	int npages = 0;
826	int i;
827	int prev_inode = 0;
828
829	if (verbose)
830		fprintf(stderr, "  %s\n", section_dir);
831
832	/*
833	 * scan the man section directory for pages
834	 */
835	nentries = scandir(section_dir, &entries, NULL, alphasort);
836	if (nentries < 0) {
837		warn("%s", section_dir);
838		exit_code = 1;
839		return;
840	}
841	/*
842	 * collect information about man pages
843	 */
844	pages = (struct page_info **) calloc(nentries, sizeof(struct page_info *));
845	for (i = 0; i < nentries; i++) {
846		struct page_info *info = new_page_info(section_dir, entries[i]);
847		if (info != NULL)
848			pages[npages++] = info;
849		free(entries[i]);
850	}
851	free(entries);
852	qsort(pages, npages, sizeof(struct page_info *), pagesort);
853	/*
854	 * process each unique page
855	 */
856	for (i = 0; i < npages; i++) {
857		struct page_info *page = pages[i];
858		if (page->inode != prev_inode) {
859			prev_inode = page->inode;
860			if (verbose)
861				fprintf(stderr, "	reading %s\n", page->filename);
862			process_page(page, section_dir);
863		} else if (verbose)
864			fprintf(stderr, "	skipping %s, duplicate\n", page->filename);
865		free_page_info(page);
866	}
867	free(pages);
868}
869
870/*
871 * Returns whether the directory entry is a man page section.
872 */
873static int
874select_sections(struct dirent *entry)
875{
876	char *p = &entry->d_name[3];
877
878	if (strncmp(entry->d_name, "man", 3) != 0)
879		return 0;
880	while (*p != '\0') {
881		if (!isalnum(*p++))
882			return 0;
883	}
884	return 1;
885}
886
887/*
888 * Processes a single top-level man directory by finding all the
889 * sub-directories named man* and processing each one in turn.
890 */
891static void
892process_mandir(char *dir_name)
893{
894	struct dirent **entries;
895	int nsections;
896	FILE *fp = NULL;
897	int i;
898	struct stat st;
899
900	if (already_visited(dir_name))
901		return;
902	if (verbose)
903		fprintf(stderr, "man directory %s\n", dir_name);
904	nsections = scandir(dir_name, &entries, select_sections, alphasort);
905	if (nsections < 0) {
906		warn("%s", dir_name);
907		exit_code = 1;
908		return;
909	}
910	if (common_output == NULL && (fp = open_whatis(dir_name)) == NULL)
911		return;
912	for (i = 0; i < nsections; i++) {
913		char section_dir[MAXPATHLEN];
914		snprintf(section_dir, sizeof section_dir, "%s/%s", dir_name, entries[i]->d_name);
915		process_section(section_dir);
916		snprintf(section_dir, sizeof section_dir, "%s/%s/%s", dir_name,
917		    entries[i]->d_name, machine);
918		if (stat(section_dir, &st) == 0 && S_ISDIR(st.st_mode))
919			process_section(section_dir);
920		free(entries[i]);
921	}
922	free(entries);
923	if (common_output == NULL)
924		finish_whatis(fp, dir_name);
925}
926
927/*
928 * Processes one argument, which may be a colon-separated list of
929 * directories.
930 */
931static void
932process_argument(const char *arg)
933{
934	char *dir;
935	char *mandir;
936	char *parg;
937
938	parg = strdup(arg);
939	if (parg == NULL)
940		err(1, "out of memory");
941	while ((dir = strsep(&parg, ":")) != NULL) {
942		if (locale != NULL) {
943			asprintf(&mandir, "%s/%s", dir, locale);
944			process_mandir(mandir);
945			free(mandir);
946			if (lang_locale != NULL) {
947				asprintf(&mandir, "%s/%s", dir, lang_locale);
948				process_mandir(mandir);
949				free(mandir);
950			}
951		} else {
952			process_mandir(dir);
953		}
954	}
955	free(parg);
956}
957
958
959int
960main(int argc, char **argv)
961{
962	int opt;
963	extern int optind;
964	extern char *optarg;
965	FILE *fp = NULL;
966
967	while ((opt = getopt(argc, argv, "ai:n:o:vL")) != -1) {
968		switch (opt) {
969		case 'a':
970			append++;
971			break;
972		case 'i':
973			indent = atoi(optarg);
974			break;
975		case 'n':
976			whatis_name = optarg;
977			break;
978		case 'o':
979			common_output = optarg;
980			break;
981		case 'v':
982			verbose++;
983			break;
984		case 'L':
985			locale = getenv("LC_ALL");
986			if (locale == NULL)
987				locale = getenv("LC_CTYPE");
988			if (locale == NULL)
989				locale = getenv("LANG");
990			if (locale != NULL) {
991				char *sep = strchr(locale, '_');
992				if (sep != NULL && isupper(sep[1]) &&
993				    isupper(sep[2])) {
994					asprintf(&lang_locale, "%.*s%s", sep - locale, locale, &sep[3]);
995				}
996			}
997			break;
998		default:
999			fprintf(stderr, "usage: %s [-a] [-i indent] [-n name] [-o output_file] [-v] [-L] [directories...]\n", argv[0]);
1000			exit(1);
1001		}
1002	}
1003
1004	signal(SIGINT, trap_signal);
1005	signal(SIGHUP, trap_signal);
1006	signal(SIGQUIT, trap_signal);
1007	signal(SIGTERM, trap_signal);
1008	SLIST_INIT(&visited_dirs);
1009	whatis_proto = new_sbuf();
1010	whatis_final = new_sbuf();
1011
1012	if ((machine = getenv("MACHINE")) == NULL)
1013		machine = MACHINE;
1014
1015	if (common_output != NULL && (fp = open_output(common_output)) == NULL)
1016		err(1, "%s", common_output);
1017	if (optind == argc) {
1018		const char *manpath = getenv("MANPATH");
1019		if (manpath == NULL)
1020			manpath = DEFAULT_MANPATH;
1021		process_argument(manpath);
1022	} else {
1023		while (optind < argc)
1024			process_argument(argv[optind++]);
1025	}
1026	if (common_output != NULL)
1027		finish_output(fp, common_output);
1028	exit(exit_code);
1029}
1030