makewhatis.c revision 97976
196845Smarkm/*- 296845Smarkm * Copyright (c) 2002 John Rochester 396845Smarkm * All rights reserved. 496845Smarkm * 596845Smarkm * Redistribution and use in source and binary forms, with or without 696845Smarkm * modification, are permitted provided that the following conditions 796845Smarkm * are met: 896845Smarkm * 1. Redistributions of source code must retain the above copyright 996845Smarkm * notice, this list of conditions and the following disclaimer, 1096845Smarkm * in this position and unchanged. 1196845Smarkm * 2. Redistributions in binary form must reproduce the above copyright 1296845Smarkm * notice, this list of conditions and the following disclaimer in the 1396845Smarkm * documentation and/or other materials provided with the distribution. 1496845Smarkm * 3. The name of the author may not be used to endorse or promote products 1596845Smarkm * derived from this software without specific prior written permission 1696845Smarkm * 1796845Smarkm * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 1896845Smarkm * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 1996845Smarkm * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 2096845Smarkm * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 2196845Smarkm * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 2296845Smarkm * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 2396845Smarkm * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 2496845Smarkm * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 2596845Smarkm * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 2696845Smarkm * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 2796845Smarkm */ 2896845Smarkm 2996845Smarkm#include <sys/cdefs.h> 3096845Smarkm__FBSDID("$FreeBSD: head/usr.bin/makewhatis/makewhatis.c 97976 2002-06-07 01:01:08Z keramida $"); 3196845Smarkm 3296845Smarkm#include <sys/types.h> 3396845Smarkm#include <sys/stat.h> 3496845Smarkm#include <sys/param.h> 3596845Smarkm#include <sys/queue.h> 3696845Smarkm 3796845Smarkm#include <ctype.h> 3896845Smarkm#include <dirent.h> 3996845Smarkm#include <err.h> 4096845Smarkm#include <stdio.h> 4196845Smarkm#include <stdlib.h> 4296845Smarkm#include <string.h> 4396845Smarkm#include <stringlist.h> 4496845Smarkm#include <unistd.h> 4596845Smarkm#include <zlib.h> 4696845Smarkm 4796845Smarkm#define DEFAULT_MANPATH "/usr/share/man" 4896845Smarkm#define LINE_ALLOC 4096 4996845Smarkm 5096845Smarkmstatic char blank[] = ""; 5196845Smarkm 5296845Smarkm/* 5396845Smarkm * Information collected about each man page in a section. 5496845Smarkm */ 5596845Smarkmstruct page_info { 5696845Smarkm char * filename; 5796845Smarkm char * name; 5896845Smarkm char * suffix; 5996845Smarkm int gzipped; 6096845Smarkm ino_t inode; 6196845Smarkm}; 6296845Smarkm 6396845Smarkm/* 6496845Smarkm * An entry kept for each visited directory. 6596845Smarkm */ 6696845Smarkmstruct visited_dir { 6796845Smarkm dev_t device; 6896845Smarkm ino_t inode; 6996845Smarkm SLIST_ENTRY(visited_dir) next; 7096845Smarkm}; 7196845Smarkm 7296845Smarkm/* 7396845Smarkm * an expanding string 7496845Smarkm */ 7596845Smarkmstruct sbuf { 7696845Smarkm char * content; /* the start of the buffer */ 7796845Smarkm char * end; /* just past the end of the content */ 7896845Smarkm char * last; /* the last allocated character */ 7996845Smarkm}; 8096845Smarkm 8196845Smarkm/* 8296845Smarkm * Removes the last amount characters from the sbuf. 8396845Smarkm */ 8496845Smarkm#define sbuf_retract(sbuf, amount) \ 8596845Smarkm ((sbuf)->end -= (amount)) 8696845Smarkm/* 8796845Smarkm * Returns the length of the sbuf content. 8896845Smarkm */ 8996845Smarkm#define sbuf_length(sbuf) \ 9096845Smarkm ((sbuf)->end - (sbuf)->content) 9196845Smarkm 9296845Smarkmtypedef char *edited_copy(char *from, char *to, int length); 9396845Smarkm 9496845Smarkmstatic int append; /* -a flag: append to existing whatis */ 9596845Smarkmstatic int verbose; /* -v flag: be verbose with warnings */ 9696845Smarkmstatic int indent = 24; /* -i option: description indentation */ 9796845Smarkmstatic const char *whatis_name="whatis";/* -n option: the name */ 9896845Smarkmstatic char *common_output; /* -o option: the single output file */ 9996845Smarkmstatic char *locale; /* user's locale if -L is used */ 10096845Smarkmstatic char *lang_locale; /* short form of locale */ 10196858Srustatic char *machine; 10296845Smarkm 10396845Smarkmstatic int exit_code; /* exit code to use when finished */ 10496845Smarkmstatic SLIST_HEAD(, visited_dir) visited_dirs = 10596845Smarkm SLIST_HEAD_INITIALIZER(visited_dirs); 10696845Smarkm 10796845Smarkm/* 10896845Smarkm * While the whatis line is being formed, it is stored in whatis_proto. 10996845Smarkm * When finished, it is reformatted into whatis_final and then appended 11096845Smarkm * to whatis_lines. 11196845Smarkm */ 11296845Smarkmstatic struct sbuf *whatis_proto; 11396845Smarkmstatic struct sbuf *whatis_final; 11496845Smarkmstatic StringList *whatis_lines; /* collected output lines */ 11596845Smarkm 11696845Smarkmstatic char tmp_file[MAXPATHLEN]; /* path of temporary file, if any */ 11796845Smarkm 11896845Smarkm/* A set of possible names for the NAME man page section */ 11996845Smarkmstatic const char *name_section_titles[] = { 12096845Smarkm "NAME", "Name", "NAMN", "BEZEICHNUNG", "\xcc\xbe\xbe\xce", 12196845Smarkm "\xee\xe1\xfa\xf7\xe1\xee\xe9\xe5", NULL 12296845Smarkm}; 12396845Smarkm 12496845Smarkm/* A subset of the mdoc(7) commands to ignore */ 12596845Smarkmstatic char mdoc_commands[] = "ArDvErEvFlLiNmPa"; 12696845Smarkm 12796845Smarkm/* 12896845Smarkm * Frees a struct page_info and its content. 12996845Smarkm */ 13096845Smarkmstatic void 13196845Smarkmfree_page_info(struct page_info *info) 13296845Smarkm{ 13396845Smarkm free(info->filename); 13496845Smarkm free(info->name); 13596845Smarkm free(info->suffix); 13696845Smarkm free(info); 13796845Smarkm} 13896845Smarkm 13996845Smarkm/* 14096845Smarkm * Allocates and fills in a new struct page_info given the 14196845Smarkm * name of the man section directory and the dirent of the file. 14296845Smarkm * If the file is not a man page, returns NULL. 14396845Smarkm */ 14496845Smarkmstatic struct page_info * 14596845Smarkmnew_page_info(char *dir, struct dirent *dirent) 14696845Smarkm{ 14796845Smarkm struct page_info *info; 14896845Smarkm int basename_length; 14996845Smarkm char *suffix; 15096845Smarkm struct stat st; 15196845Smarkm 15296845Smarkm info = (struct page_info *) malloc(sizeof(struct page_info)); 15396845Smarkm if (info == NULL) 15496845Smarkm err(1, "malloc"); 15596845Smarkm basename_length = strlen(dirent->d_name); 15696845Smarkm suffix = &dirent->d_name[basename_length]; 15796845Smarkm asprintf(&info->filename, "%s/%s", dir, dirent->d_name); 15896845Smarkm if ((info->gzipped = basename_length >= 4 && strcmp(&dirent->d_name[basename_length - 3], ".gz") == 0)) { 15996845Smarkm suffix -= 3; 16096845Smarkm *suffix = '\0'; 16196845Smarkm } 16296845Smarkm for (;;) { 16396845Smarkm if (--suffix == dirent->d_name || !isalnum(*suffix)) { 16496845Smarkm if (*suffix == '.') 16596845Smarkm break; 16696845Smarkm if (verbose) 16796845Smarkm warnx("%s: invalid man page name", info->filename); 16896845Smarkm free(info->filename); 16996845Smarkm free(info); 17096845Smarkm return NULL; 17196845Smarkm } 17296845Smarkm } 17396845Smarkm *suffix++ = '\0'; 17496845Smarkm info->name = strdup(dirent->d_name); 17596845Smarkm info->suffix = strdup(suffix); 17696845Smarkm if (stat(info->filename, &st) < 0) { 17796845Smarkm warn("%s", info->filename); 17896845Smarkm free_page_info(info); 17996845Smarkm return NULL; 18096845Smarkm } 18196845Smarkm if (!S_ISREG(st.st_mode)) { 18296845Smarkm if (verbose && !S_ISDIR(st.st_mode)) 18396845Smarkm warnx("%s: not a regular file", info->filename); 18496845Smarkm free_page_info(info); 18596845Smarkm return NULL; 18696845Smarkm } 18796845Smarkm info->inode = st.st_ino; 18896845Smarkm return info; 18996845Smarkm} 19096845Smarkm 19196845Smarkm/* 19296845Smarkm * Reset an sbuf's length to 0. 19396845Smarkm */ 19496845Smarkmstatic void 19596845Smarkmsbuf_clear(struct sbuf *sbuf) 19696845Smarkm{ 19796845Smarkm sbuf->end = sbuf->content; 19896845Smarkm} 19996845Smarkm 20096845Smarkm/* 20196845Smarkm * Allocate a new sbuf. 20296845Smarkm */ 20396845Smarkmstatic struct sbuf * 20496845Smarkmnew_sbuf(void) 20596845Smarkm{ 20696845Smarkm struct sbuf *sbuf = (struct sbuf *) malloc(sizeof(struct sbuf)); 20796845Smarkm sbuf->content = (char *) malloc(LINE_ALLOC); 20896845Smarkm sbuf->last = sbuf->content + LINE_ALLOC - 1; 20996845Smarkm sbuf_clear(sbuf); 21096845Smarkm return sbuf; 21196845Smarkm} 21296845Smarkm 21396845Smarkm/* 21496845Smarkm * Ensure that there is enough room in the sbuf for chars more characters. 21596845Smarkm */ 21696845Smarkmstatic void 21796845Smarkmsbuf_need(struct sbuf *sbuf, int nchars) 21896845Smarkm{ 21996845Smarkm /* let's assume we only need to double it, but check just in case */ 22096845Smarkm while (sbuf->end + nchars > sbuf->last) { 22196845Smarkm int alloc; 22296845Smarkm char *new_content; 22396845Smarkm 22496845Smarkm alloc = (sbuf->last - sbuf->content + 1) * 2; 22596845Smarkm new_content = (char *) malloc(alloc); 22696845Smarkm memcpy(new_content, sbuf->content, sbuf->end - sbuf->content); 22796845Smarkm sbuf->end = new_content + (sbuf->end - sbuf->content); 22896845Smarkm free(sbuf->content); 22996845Smarkm sbuf->content = new_content; 23096845Smarkm } 23196845Smarkm} 23296845Smarkm 23396845Smarkm/* 23496845Smarkm * Appends a string of a given length to the sbuf. 23596845Smarkm */ 23696845Smarkmstatic void 23796845Smarkmsbuf_append(struct sbuf *sbuf, const char *text, int length) 23896845Smarkm{ 23996845Smarkm if (length > 0) { 24096845Smarkm sbuf_need(sbuf, length); 24196845Smarkm memcpy(sbuf->end, text, length); 24296845Smarkm sbuf->end += length; 24396845Smarkm } 24496845Smarkm} 24596845Smarkm 24696845Smarkm/* 24796845Smarkm * Appends a null-terminated string to the sbuf. 24896845Smarkm */ 24996845Smarkmstatic void 25096845Smarkmsbuf_append_str(struct sbuf *sbuf, char *text) 25196845Smarkm{ 25296845Smarkm sbuf_append(sbuf, text, strlen(text)); 25396845Smarkm} 25496845Smarkm 25596845Smarkm/* 25696845Smarkm * Appends an edited null-terminated string to the sbuf. 25796845Smarkm */ 25896845Smarkmstatic void 25996845Smarkmsbuf_append_edited(struct sbuf *sbuf, char *text, edited_copy copy) 26096845Smarkm{ 26196845Smarkm int length = strlen(text); 26296845Smarkm if (length > 0) { 26396845Smarkm sbuf_need(sbuf, length); 26496845Smarkm sbuf->end = copy(text, sbuf->end, length); 26596845Smarkm } 26696845Smarkm} 26796845Smarkm 26896845Smarkm/* 26996845Smarkm * Strips any of a set of chars from the end of the sbuf. 27096845Smarkm */ 27196845Smarkmstatic void 27296845Smarkmsbuf_strip(struct sbuf *sbuf, const char *set) 27396845Smarkm{ 27496845Smarkm while (sbuf->end > sbuf->content && strchr(set, sbuf->end[-1]) != NULL) 27596845Smarkm sbuf->end--; 27696845Smarkm} 27796845Smarkm 27896845Smarkm/* 27996845Smarkm * Returns the null-terminated string built by the sbuf. 28096845Smarkm */ 28196845Smarkmstatic char * 28296845Smarkmsbuf_content(struct sbuf *sbuf) 28396845Smarkm{ 28496845Smarkm *sbuf->end = '\0'; 28596845Smarkm return sbuf->content; 28696845Smarkm} 28796845Smarkm 28896845Smarkm/* 28996845Smarkm * Returns true if no man page exists in the directory with 29096845Smarkm * any of the names in the StringList. 29196845Smarkm */ 29296845Smarkmstatic int 29396845Smarkmno_page_exists(char *dir, StringList *names, char *suffix) 29496845Smarkm{ 29596845Smarkm char path[MAXPATHLEN]; 29697976Skeramida size_t i; 29796845Smarkm 29896845Smarkm for (i = 0; i < names->sl_cur; i++) { 29996845Smarkm snprintf(path, sizeof path, "%s/%s.%s.gz", dir, names->sl_str[i], suffix); 30096845Smarkm if (access(path, F_OK) < 0) { 30196845Smarkm path[strlen(path) - 3] = '\0'; 30296845Smarkm if (access(path, F_OK) < 0) 30396845Smarkm continue; 30496845Smarkm } 30596845Smarkm return 0; 30696845Smarkm } 30796845Smarkm return 1; 30896845Smarkm} 30996845Smarkm 31096845Smarkmstatic void 31196845Smarkmtrap_signal(int sig __unused) 31296845Smarkm{ 31396845Smarkm if (tmp_file[0] != '\0') 31496845Smarkm unlink(tmp_file); 31596845Smarkm exit(1); 31696845Smarkm} 31796845Smarkm 31896845Smarkm/* 31996845Smarkm * Attempts to open an output file. Returns NULL if unsuccessful. 32096845Smarkm */ 32196845Smarkmstatic FILE * 32296845Smarkmopen_output(char *name) 32396845Smarkm{ 32496845Smarkm FILE *output; 32596845Smarkm 32696845Smarkm whatis_lines = sl_init(); 32796845Smarkm if (append) { 32896845Smarkm char line[LINE_ALLOC]; 32996845Smarkm 33096845Smarkm output = fopen(name, "r"); 33196845Smarkm if (output == NULL) { 33296845Smarkm warn("%s", name); 33396845Smarkm exit_code = 1; 33496845Smarkm return NULL; 33596845Smarkm } 33696845Smarkm while (fgets(line, sizeof line, output) != NULL) { 33796845Smarkm line[strlen(line) - 1] = '\0'; 33896845Smarkm sl_add(whatis_lines, strdup(line)); 33996845Smarkm } 34096845Smarkm } 34196845Smarkm if (common_output == NULL) { 34296845Smarkm snprintf(tmp_file, sizeof tmp_file, "%s.tmp", name); 34396845Smarkm name = tmp_file; 34496845Smarkm } 34596845Smarkm output = fopen(name, "w"); 34696845Smarkm if (output == NULL) { 34796845Smarkm warn("%s", name); 34896845Smarkm exit_code = 1; 34996845Smarkm return NULL; 35096845Smarkm } 35196845Smarkm return output; 35296845Smarkm} 35396845Smarkm 35496845Smarkmstatic int 35596845Smarkmlinesort(const void *a, const void *b) 35696845Smarkm{ 35796845Smarkm return strcmp((const char *)(*(const char **)a), (const char *)(*(const char **)b)); 35896845Smarkm} 35996845Smarkm 36096845Smarkm/* 36196845Smarkm * Writes the unique sorted lines to the output file. 36296845Smarkm */ 36396845Smarkmstatic void 36496845Smarkmfinish_output(FILE *output, char *name) 36596845Smarkm{ 36697976Skeramida size_t i; 36796845Smarkm char *prev = NULL; 36896845Smarkm 36996845Smarkm qsort(whatis_lines->sl_str, whatis_lines->sl_cur, sizeof(char *), linesort); 37096845Smarkm for (i = 0; i < whatis_lines->sl_cur; i++) { 37196845Smarkm char *line = whatis_lines->sl_str[i]; 37296845Smarkm if (i > 0 && strcmp(line, prev) == 0) 37396845Smarkm continue; 37496845Smarkm prev = line; 37596845Smarkm fputs(line, output); 37696845Smarkm putc('\n', output); 37796845Smarkm } 37896845Smarkm fclose(output); 37996845Smarkm sl_free(whatis_lines, 1); 38096845Smarkm if (common_output == NULL) { 38196845Smarkm rename(tmp_file, name); 38296845Smarkm unlink(tmp_file); 38396845Smarkm } 38496845Smarkm} 38596845Smarkm 38696845Smarkmstatic FILE * 38796845Smarkmopen_whatis(char *mandir) 38896845Smarkm{ 38996845Smarkm char filename[MAXPATHLEN]; 39096845Smarkm 39196845Smarkm snprintf(filename, sizeof filename, "%s/%s", mandir, whatis_name); 39296845Smarkm return open_output(filename); 39396845Smarkm} 39496845Smarkm 39596845Smarkmstatic void 39696845Smarkmfinish_whatis(FILE *output, char *mandir) 39796845Smarkm{ 39896845Smarkm char filename[MAXPATHLEN]; 39996845Smarkm 40096845Smarkm snprintf(filename, sizeof filename, "%s/%s", mandir, whatis_name); 40196845Smarkm finish_output(output, filename); 40296845Smarkm} 40396845Smarkm 40496845Smarkm/* 40596845Smarkm * Tests to see if the given directory has already been visited. 40696845Smarkm */ 40796845Smarkmstatic int 40896845Smarkmalready_visited(char *dir) 40996845Smarkm{ 41096845Smarkm struct stat st; 41196845Smarkm struct visited_dir *visit; 41296845Smarkm 41396845Smarkm if (stat(dir, &st) < 0) { 41496845Smarkm warn("%s", dir); 41596845Smarkm exit_code = 1; 41696845Smarkm return 1; 41796845Smarkm } 41896845Smarkm SLIST_FOREACH(visit, &visited_dirs, next) { 41996845Smarkm if (visit->inode == st.st_ino && 42096845Smarkm visit->device == st.st_dev) { 42196845Smarkm warnx("already visited %s", dir); 42296845Smarkm return 1; 42396845Smarkm } 42496845Smarkm } 42596845Smarkm visit = (struct visited_dir *) malloc(sizeof(struct visited_dir)); 42696845Smarkm visit->device = st.st_dev; 42796845Smarkm visit->inode = st.st_ino; 42896845Smarkm SLIST_INSERT_HEAD(&visited_dirs, visit, next); 42996845Smarkm return 0; 43096845Smarkm} 43196845Smarkm 43296845Smarkm/* 43396845Smarkm * Removes trailing spaces from a string, returning a pointer to just 43496845Smarkm * beyond the new last character. 43596845Smarkm */ 43696845Smarkmstatic char * 43796845Smarkmtrim_rhs(char *str) 43896845Smarkm{ 43996845Smarkm char *rhs = &str[strlen(str)]; 44096845Smarkm while (--rhs > str && isspace(*rhs)) 44196845Smarkm ; 44296845Smarkm *++rhs = '\0'; 44396845Smarkm return rhs; 44496845Smarkm} 44596845Smarkm 44696845Smarkm/* 44796845Smarkm * Returns a pointer to the next non-space character in the string. 44896845Smarkm */ 44996845Smarkmstatic char * 45096845Smarkmskip_spaces(char *s) 45196845Smarkm{ 45296845Smarkm while (*s != '\0' && isspace(*s)) 45396845Smarkm s++; 45496845Smarkm return s; 45596845Smarkm} 45696845Smarkm 45796845Smarkm/* 45896845Smarkm * Returns whether the string contains only digits. 45996845Smarkm */ 46096845Smarkmstatic int 46196845Smarkmonly_digits(char *line) 46296845Smarkm{ 46396845Smarkm if (!isdigit(*line++)) 46496845Smarkm return 0; 46596845Smarkm while (isdigit(*line)) 46696845Smarkm line++; 46796845Smarkm return *line == '\0'; 46896845Smarkm} 46996845Smarkm 47096845Smarkm/* 47196845Smarkm * Returns whether the line is of one of the forms: 47296845Smarkm * .Sh NAME 47396845Smarkm * .Sh "NAME" 47496845Smarkm * etc. 47596845Smarkm * assuming that section_start is ".Sh". 47696845Smarkm */ 47796845Smarkmstatic int 47896845Smarkmname_section_line(char *line, const char *section_start) 47996845Smarkm{ 48096845Smarkm char *rhs; 48196845Smarkm const char **title; 48296845Smarkm 48396845Smarkm if (strncmp(line, section_start, 3) != 0) 48496845Smarkm return 0; 48596845Smarkm line = skip_spaces(line + 3); 48696845Smarkm rhs = trim_rhs(line); 48796845Smarkm if (*line == '"') { 48896845Smarkm line++; 48996845Smarkm if (*--rhs == '"') 49096845Smarkm *rhs = '\0'; 49196845Smarkm } 49296845Smarkm for (title = name_section_titles; *title != NULL; title++) 49396845Smarkm if (strcmp(*title, line) == 0) 49496845Smarkm return 1; 49596845Smarkm return 0; 49696845Smarkm} 49796845Smarkm 49896845Smarkm/* 49996845Smarkm * Copies characters while removing the most common nroff/troff 50096845Smarkm * markup: 50197102Sru * \(em, \(mi, \s[+-N], \& 50297102Sru * \fF, \f(fo, \f[font] 50397102Sru * \*s, \*(st, \*[stringvar] 50496845Smarkm */ 50596845Smarkmstatic char * 50696845Smarkmde_nroff_copy(char *from, char *to, int fromlen) 50796845Smarkm{ 50896845Smarkm char *from_end = &from[fromlen]; 50996845Smarkm while (from < from_end) { 51096845Smarkm switch (*from) { 51196845Smarkm case '\\': 51296845Smarkm switch (*++from) { 51396845Smarkm case '(': 51496845Smarkm if (strncmp(&from[1], "em", 2) == 0 || 51596845Smarkm strncmp(&from[1], "mi", 2) == 0) { 51696845Smarkm from += 3; 51796845Smarkm continue; 51896845Smarkm } 51996845Smarkm break; 52097102Sru case 's': 52197102Sru if (*++from == '-') 52297102Sru from++; 52397102Sru while (isdigit(*from)) 52497102Sru from++; 52597102Sru continue; 52696845Smarkm case 'f': 52797102Sru case '*': 52896845Smarkm if (*++from == '(') 52996845Smarkm from += 3; 53097102Sru else if (*from == '[') { 53197102Sru while (*++from != ']' && from < from_end); 53296845Smarkm from++; 53397102Sru } else 53497102Sru from++; 53596845Smarkm continue; 53696845Smarkm case '&': 53796845Smarkm from++; 53896845Smarkm continue; 53996845Smarkm } 54096845Smarkm break; 54196845Smarkm } 54296845Smarkm *to++ = *from++; 54396845Smarkm } 54496845Smarkm return to; 54596845Smarkm} 54696845Smarkm 54796845Smarkm/* 54896845Smarkm * Appends a string with the nroff formatting removed. 54996845Smarkm */ 55096845Smarkmstatic void 55196845Smarkmadd_nroff(char *text) 55296845Smarkm{ 55396845Smarkm sbuf_append_edited(whatis_proto, text, de_nroff_copy); 55496845Smarkm} 55596845Smarkm 55696845Smarkm/* 55796845Smarkm * Appends "name(suffix), " to whatis_final. 55896845Smarkm */ 55996845Smarkmstatic void 56096845Smarkmadd_whatis_name(char *name, char *suffix) 56196845Smarkm{ 56296845Smarkm if (*name != '\0') { 56396845Smarkm sbuf_append_str(whatis_final, name); 56496845Smarkm sbuf_append(whatis_final, "(", 1); 56596845Smarkm sbuf_append_str(whatis_final, suffix); 56696845Smarkm sbuf_append(whatis_final, "), ", 3); 56796845Smarkm } 56896845Smarkm} 56996845Smarkm 57096845Smarkm/* 57196845Smarkm * Processes an old-style man(7) line. This ignores commands with only 57296845Smarkm * a single number argument. 57396845Smarkm */ 57496845Smarkmstatic void 57596845Smarkmprocess_man_line(char *line) 57696845Smarkm{ 57796845Smarkm if (*line == '.') { 57896845Smarkm while (isalpha(*++line)) 57996845Smarkm ; 58096845Smarkm line = skip_spaces(line); 58196845Smarkm if (only_digits(line)) 58296845Smarkm return; 58396845Smarkm } else 58496845Smarkm line = skip_spaces(line); 58596845Smarkm if (*line != '\0') { 58696845Smarkm add_nroff(line); 58796845Smarkm sbuf_append(whatis_proto, " ", 1); 58896845Smarkm } 58996845Smarkm} 59096845Smarkm 59196845Smarkm/* 59296845Smarkm * Processes a new-style mdoc(7) line. 59396845Smarkm */ 59496845Smarkmstatic void 59596845Smarkmprocess_mdoc_line(char *line) 59696845Smarkm{ 59796845Smarkm int xref; 59896845Smarkm int arg = 0; 59996845Smarkm char *line_end = &line[strlen(line)]; 60096845Smarkm int orig_length = sbuf_length(whatis_proto); 60196845Smarkm char *next; 60296845Smarkm 60396845Smarkm if (*line == '\0') 60496845Smarkm return; 60596845Smarkm if (line[0] != '.' || !isupper(line[1]) || !islower(line[2])) { 60696845Smarkm add_nroff(skip_spaces(line)); 60796859Sru sbuf_append(whatis_proto, " ", 1); 60896845Smarkm return; 60996845Smarkm } 61096845Smarkm xref = strncmp(line, ".Xr", 3) == 0; 61196845Smarkm line += 3; 61296845Smarkm while ((line = skip_spaces(line)) < line_end) { 61396845Smarkm if (*line == '"') { 61496845Smarkm next = ++line; 61596845Smarkm for (;;) { 61696845Smarkm next = strchr(next, '"'); 61796861Sru if (next == NULL) 61896845Smarkm break; 61996845Smarkm strcpy(next, &next[1]); 62096845Smarkm line_end--; 62196861Sru if (*next != '"') 62296861Sru break; 62396845Smarkm next++; 62496845Smarkm } 62596845Smarkm } else 62696845Smarkm next = strpbrk(line, " \t"); 62796845Smarkm if (next != NULL) 62896845Smarkm *next++ = '\0'; 62996845Smarkm else 63096845Smarkm next = line_end; 63196845Smarkm if (isupper(*line) && islower(line[1]) && line[2] == '\0') { 63296845Smarkm if (strcmp(line, "Ns") == 0) { 63396845Smarkm arg = 0; 63496845Smarkm line = next; 63596845Smarkm continue; 63696845Smarkm } 63796845Smarkm if (strstr(mdoc_commands, line) != NULL) { 63896845Smarkm line = next; 63996845Smarkm continue; 64096845Smarkm } 64196845Smarkm } 64296845Smarkm if (arg > 0 && strchr(",.:;?!)]", *line) == 0) { 64396845Smarkm if (xref) { 64496845Smarkm sbuf_append(whatis_proto, "(", 1); 64596845Smarkm add_nroff(line); 64696845Smarkm sbuf_append(whatis_proto, ")", 1); 64796845Smarkm xref = 0; 64896845Smarkm line = blank; 64996845Smarkm } else 65096845Smarkm sbuf_append(whatis_proto, " ", 1); 65196845Smarkm } 65296845Smarkm add_nroff(line); 65396845Smarkm arg++; 65496845Smarkm line = next; 65596845Smarkm } 65696845Smarkm if (sbuf_length(whatis_proto) > orig_length) 65796845Smarkm sbuf_append(whatis_proto, " ", 1); 65896845Smarkm} 65996845Smarkm 66096845Smarkm/* 66196845Smarkm * Collects a list of comma-separated names from the text. 66296845Smarkm */ 66396845Smarkmstatic void 66496845Smarkmcollect_names(StringList *names, char *text) 66596845Smarkm{ 66696845Smarkm char *arg; 66796845Smarkm 66896845Smarkm for (;;) { 66996845Smarkm arg = text; 67096845Smarkm text = strchr(text, ','); 67196845Smarkm if (text != NULL) 67296845Smarkm *text++ = '\0'; 67396845Smarkm sl_add(names, arg); 67496845Smarkm if (text == NULL) 67596845Smarkm return; 67696845Smarkm if (*text == ' ') 67796845Smarkm text++; 67896845Smarkm } 67996845Smarkm} 68096845Smarkm 68196845Smarkmenum { STATE_UNKNOWN, STATE_MANSTYLE, STATE_MDOCNAME, STATE_MDOCDESC }; 68296845Smarkm 68396845Smarkm/* 68496845Smarkm * Processes a man page source into a single whatis line and adds it 68596845Smarkm * to whatis_lines. 68696845Smarkm */ 68796845Smarkmstatic void 68896845Smarkmprocess_page(struct page_info *page, char *section_dir) 68996845Smarkm{ 69096845Smarkm gzFile *in; 69196845Smarkm char buffer[4096]; 69296845Smarkm char *line; 69396845Smarkm StringList *names; 69496845Smarkm char *descr; 69596845Smarkm int state = STATE_UNKNOWN; 69697976Skeramida size_t i; 69796845Smarkm 69896845Smarkm sbuf_clear(whatis_proto); 69996845Smarkm if ((in = gzopen(page->filename, "r")) == NULL) { 70096845Smarkm warn("%s", page->filename); 70196845Smarkm exit_code = 1; 70296845Smarkm return; 70396845Smarkm } 70496845Smarkm while (gzgets(in, buffer, sizeof buffer) != NULL) { 70596845Smarkm line = buffer; 70696845Smarkm if (strncmp(line, ".\\\"", 3) == 0) /* ignore comments */ 70796845Smarkm continue; 70896845Smarkm switch (state) { 70996845Smarkm /* 71096845Smarkm * haven't reached the NAME section yet. 71196845Smarkm */ 71296845Smarkm case STATE_UNKNOWN: 71396845Smarkm if (name_section_line(line, ".SH")) 71496845Smarkm state = STATE_MANSTYLE; 71596845Smarkm else if (name_section_line(line, ".Sh")) 71696845Smarkm state = STATE_MDOCNAME; 71796845Smarkm continue; 71896845Smarkm /* 71996845Smarkm * Inside an old-style .SH NAME section. 72096845Smarkm */ 72196845Smarkm case STATE_MANSTYLE: 72296845Smarkm if (strncmp(line, ".SH", 3) == 0) 72396845Smarkm break; 72496845Smarkm trim_rhs(line); 72596845Smarkm if (strcmp(line, ".") == 0) 72696845Smarkm continue; 72796845Smarkm if (strncmp(line, ".IX", 3) == 0) { 72896845Smarkm line += 3; 72996845Smarkm line = skip_spaces(line); 73096845Smarkm } 73196845Smarkm process_man_line(line); 73296845Smarkm continue; 73396845Smarkm /* 73496845Smarkm * Inside a new-style .Sh NAME section (the .Nm part). 73596845Smarkm */ 73696845Smarkm case STATE_MDOCNAME: 73796845Smarkm trim_rhs(line); 73896845Smarkm if (strncmp(line, ".Nm", 3) == 0) { 73996845Smarkm process_mdoc_line(line); 74096845Smarkm continue; 74196845Smarkm } else { 74296845Smarkm if (strcmp(line, ".") == 0) 74396845Smarkm continue; 74496845Smarkm sbuf_append(whatis_proto, "- ", 2); 74596845Smarkm state = STATE_MDOCDESC; 74696845Smarkm } 74796845Smarkm /* fall through */ 74896845Smarkm /* 74996845Smarkm * Inside a new-style .Sh NAME section (after the .Nm-s). 75096845Smarkm */ 75196845Smarkm case STATE_MDOCDESC: 75296845Smarkm if (strncmp(line, ".Sh", 3) == 0) 75396845Smarkm break; 75496845Smarkm trim_rhs(line); 75596845Smarkm if (strcmp(line, ".") == 0) 75696845Smarkm continue; 75796845Smarkm process_mdoc_line(line); 75896845Smarkm continue; 75996845Smarkm } 76096845Smarkm break; 76196845Smarkm } 76296845Smarkm gzclose(in); 76396845Smarkm sbuf_strip(whatis_proto, " \t.-"); 76496845Smarkm line = sbuf_content(whatis_proto); 76596845Smarkm /* 76696845Smarkm * line now contains the appropriate data, but without 76796845Smarkm * the proper indentation or the section appended to each name. 76896845Smarkm */ 76996845Smarkm descr = strstr(line, " - "); 77096845Smarkm if (descr == NULL) { 77196845Smarkm descr = strchr(line, ' '); 77296845Smarkm if (descr == NULL) { 77396845Smarkm if (verbose) 77496845Smarkm fprintf(stderr, " ignoring junk description \"%s\"\n", line); 77596845Smarkm return; 77696845Smarkm } 77796845Smarkm *descr++ = '\0'; 77896845Smarkm } else { 77996845Smarkm *descr = '\0'; 78096845Smarkm descr += 3; 78196845Smarkm } 78296845Smarkm names = sl_init(); 78396845Smarkm collect_names(names, line); 78496845Smarkm sbuf_clear(whatis_final); 78596845Smarkm if (!sl_find(names, page->name) && no_page_exists(section_dir, names, page->suffix)) { 78696845Smarkm /* 78796845Smarkm * Add the page name since that's the only thing that 78896845Smarkm * man(1) will find. 78996845Smarkm */ 79096845Smarkm add_whatis_name(page->name, page->suffix); 79196845Smarkm } 79296845Smarkm for (i = 0; i < names->sl_cur; i++) 79396845Smarkm add_whatis_name(names->sl_str[i], page->suffix); 79496845Smarkm sl_free(names, 0); 79596845Smarkm sbuf_retract(whatis_final, 2); /* remove last ", " */ 79696845Smarkm while (sbuf_length(whatis_final) < indent) 79796845Smarkm sbuf_append(whatis_final, " ", 1); 79896845Smarkm sbuf_append(whatis_final, " - ", 3); 79996845Smarkm sbuf_append_str(whatis_final, skip_spaces(descr)); 80096845Smarkm sl_add(whatis_lines, strdup(sbuf_content(whatis_final))); 80196845Smarkm} 80296845Smarkm 80396845Smarkm/* 80496845Smarkm * Sorts pages first by inode number, then by name. 80596845Smarkm */ 80696845Smarkmstatic int 80796845Smarkmpagesort(const void *a, const void *b) 80896845Smarkm{ 80996845Smarkm struct page_info *p1 = *(struct page_info **) a; 81096845Smarkm struct page_info *p2 = *(struct page_info **) b; 81196845Smarkm if (p1->inode == p2->inode) 81296845Smarkm return strcmp(p1->name, p2->name); 81396845Smarkm return p1->inode - p2->inode; 81496845Smarkm} 81596845Smarkm 81696845Smarkm/* 81796845Smarkm * Processes a single man section. 81896845Smarkm */ 81996845Smarkmstatic void 82096845Smarkmprocess_section(char *section_dir) 82196845Smarkm{ 82296845Smarkm struct dirent **entries; 82396845Smarkm int nentries; 82496845Smarkm struct page_info **pages; 82596845Smarkm int npages = 0; 82696845Smarkm int i; 82797976Skeramida ino_t prev_inode = 0; 82896845Smarkm 82996845Smarkm if (verbose) 83096845Smarkm fprintf(stderr, " %s\n", section_dir); 83196845Smarkm 83296845Smarkm /* 83396845Smarkm * scan the man section directory for pages 83496845Smarkm */ 83596845Smarkm nentries = scandir(section_dir, &entries, NULL, alphasort); 83696845Smarkm if (nentries < 0) { 83796845Smarkm warn("%s", section_dir); 83896845Smarkm exit_code = 1; 83996845Smarkm return; 84096845Smarkm } 84196845Smarkm /* 84296845Smarkm * collect information about man pages 84396845Smarkm */ 84496845Smarkm pages = (struct page_info **) calloc(nentries, sizeof(struct page_info *)); 84596845Smarkm for (i = 0; i < nentries; i++) { 84696845Smarkm struct page_info *info = new_page_info(section_dir, entries[i]); 84796845Smarkm if (info != NULL) 84896845Smarkm pages[npages++] = info; 84996845Smarkm free(entries[i]); 85096845Smarkm } 85196845Smarkm free(entries); 85296845Smarkm qsort(pages, npages, sizeof(struct page_info *), pagesort); 85396845Smarkm /* 85496845Smarkm * process each unique page 85596845Smarkm */ 85696845Smarkm for (i = 0; i < npages; i++) { 85796845Smarkm struct page_info *page = pages[i]; 85896845Smarkm if (page->inode != prev_inode) { 85996845Smarkm prev_inode = page->inode; 86096845Smarkm if (verbose) 86196845Smarkm fprintf(stderr, " reading %s\n", page->filename); 86296845Smarkm process_page(page, section_dir); 86396845Smarkm } else if (verbose) 86496845Smarkm fprintf(stderr, " skipping %s, duplicate\n", page->filename); 86596845Smarkm free_page_info(page); 86696845Smarkm } 86796845Smarkm free(pages); 86896845Smarkm} 86996845Smarkm 87096845Smarkm/* 87196845Smarkm * Returns whether the directory entry is a man page section. 87296845Smarkm */ 87396845Smarkmstatic int 87496845Smarkmselect_sections(struct dirent *entry) 87596845Smarkm{ 87696845Smarkm char *p = &entry->d_name[3]; 87796845Smarkm 87896845Smarkm if (strncmp(entry->d_name, "man", 3) != 0) 87996845Smarkm return 0; 88096845Smarkm while (*p != '\0') { 88196845Smarkm if (!isalnum(*p++)) 88296845Smarkm return 0; 88396845Smarkm } 88496845Smarkm return 1; 88596845Smarkm} 88696845Smarkm 88796845Smarkm/* 88896845Smarkm * Processes a single top-level man directory by finding all the 88996845Smarkm * sub-directories named man* and processing each one in turn. 89096845Smarkm */ 89196845Smarkmstatic void 89296845Smarkmprocess_mandir(char *dir_name) 89396845Smarkm{ 89496845Smarkm struct dirent **entries; 89596845Smarkm int nsections; 89696845Smarkm FILE *fp = NULL; 89796845Smarkm int i; 89896858Sru struct stat st; 89996845Smarkm 90096845Smarkm if (already_visited(dir_name)) 90196845Smarkm return; 90296845Smarkm if (verbose) 90396845Smarkm fprintf(stderr, "man directory %s\n", dir_name); 90496845Smarkm nsections = scandir(dir_name, &entries, select_sections, alphasort); 90596845Smarkm if (nsections < 0) { 90696845Smarkm warn("%s", dir_name); 90796845Smarkm exit_code = 1; 90896845Smarkm return; 90996845Smarkm } 91096845Smarkm if (common_output == NULL && (fp = open_whatis(dir_name)) == NULL) 91196845Smarkm return; 91296845Smarkm for (i = 0; i < nsections; i++) { 91396845Smarkm char section_dir[MAXPATHLEN]; 91496845Smarkm snprintf(section_dir, sizeof section_dir, "%s/%s", dir_name, entries[i]->d_name); 91596845Smarkm process_section(section_dir); 91696858Sru snprintf(section_dir, sizeof section_dir, "%s/%s/%s", dir_name, 91796858Sru entries[i]->d_name, machine); 91896858Sru if (stat(section_dir, &st) == 0 && S_ISDIR(st.st_mode)) 91996858Sru process_section(section_dir); 92096845Smarkm free(entries[i]); 92196845Smarkm } 92296845Smarkm free(entries); 92396845Smarkm if (common_output == NULL) 92496845Smarkm finish_whatis(fp, dir_name); 92596845Smarkm} 92696845Smarkm 92796845Smarkm/* 92896845Smarkm * Processes one argument, which may be a colon-separated list of 92996845Smarkm * directories. 93096845Smarkm */ 93196845Smarkmstatic void 93296845Smarkmprocess_argument(const char *arg) 93396845Smarkm{ 93496845Smarkm char *dir; 93596845Smarkm char *mandir; 93696845Smarkm char *parg; 93796845Smarkm 93896845Smarkm parg = strdup(arg); 93996845Smarkm if (parg == NULL) 94096845Smarkm err(1, "out of memory"); 94196845Smarkm while ((dir = strsep(&parg, ":")) != NULL) { 94296845Smarkm if (locale != NULL) { 94396845Smarkm asprintf(&mandir, "%s/%s", dir, locale); 94496845Smarkm process_mandir(mandir); 94596845Smarkm free(mandir); 94696845Smarkm if (lang_locale != NULL) { 94796845Smarkm asprintf(&mandir, "%s/%s", dir, lang_locale); 94896845Smarkm process_mandir(mandir); 94996845Smarkm free(mandir); 95096845Smarkm } 95196845Smarkm } else { 95296845Smarkm process_mandir(dir); 95396845Smarkm } 95496845Smarkm } 95596845Smarkm free(parg); 95696845Smarkm} 95796845Smarkm 95896845Smarkm 95996845Smarkmint 96096845Smarkmmain(int argc, char **argv) 96196845Smarkm{ 96296845Smarkm int opt; 96396845Smarkm extern int optind; 96496845Smarkm extern char *optarg; 96596845Smarkm FILE *fp = NULL; 96696845Smarkm 96796845Smarkm while ((opt = getopt(argc, argv, "ai:n:o:vL")) != -1) { 96896845Smarkm switch (opt) { 96996845Smarkm case 'a': 97096845Smarkm append++; 97196845Smarkm break; 97296845Smarkm case 'i': 97396845Smarkm indent = atoi(optarg); 97496845Smarkm break; 97596845Smarkm case 'n': 97696845Smarkm whatis_name = optarg; 97796845Smarkm break; 97896845Smarkm case 'o': 97996845Smarkm common_output = optarg; 98096845Smarkm break; 98196845Smarkm case 'v': 98296845Smarkm verbose++; 98396845Smarkm break; 98496845Smarkm case 'L': 98596845Smarkm locale = getenv("LC_ALL"); 98696845Smarkm if (locale == NULL) 98796845Smarkm locale = getenv("LC_CTYPE"); 98896845Smarkm if (locale == NULL) 98996845Smarkm locale = getenv("LANG"); 99096845Smarkm if (locale != NULL) { 99196845Smarkm char *sep = strchr(locale, '_'); 99296845Smarkm if (sep != NULL && isupper(sep[1]) && 99396845Smarkm isupper(sep[2])) { 99496845Smarkm asprintf(&lang_locale, "%.*s%s", sep - locale, locale, &sep[3]); 99596845Smarkm } 99696845Smarkm } 99796845Smarkm break; 99896845Smarkm default: 99996845Smarkm fprintf(stderr, "usage: %s [-a] [-i indent] [-n name] [-o output_file] [-v] [-L] [directories...]\n", argv[0]); 100096845Smarkm exit(1); 100196845Smarkm } 100296845Smarkm } 100396845Smarkm 100496845Smarkm signal(SIGINT, trap_signal); 100596845Smarkm signal(SIGHUP, trap_signal); 100696845Smarkm signal(SIGQUIT, trap_signal); 100796845Smarkm signal(SIGTERM, trap_signal); 100896845Smarkm SLIST_INIT(&visited_dirs); 100996845Smarkm whatis_proto = new_sbuf(); 101096845Smarkm whatis_final = new_sbuf(); 101196845Smarkm 101296858Sru if ((machine = getenv("MACHINE")) == NULL) 101396858Sru machine = MACHINE; 101496858Sru 101596845Smarkm if (common_output != NULL && (fp = open_output(common_output)) == NULL) 101696845Smarkm err(1, "%s", common_output); 101796845Smarkm if (optind == argc) { 101896845Smarkm const char *manpath = getenv("MANPATH"); 101996845Smarkm if (manpath == NULL) 102096845Smarkm manpath = DEFAULT_MANPATH; 102196845Smarkm process_argument(manpath); 102296845Smarkm } else { 102396845Smarkm while (optind < argc) 102496845Smarkm process_argument(argv[optind++]); 102596845Smarkm } 102696845Smarkm if (common_output != NULL) 102796845Smarkm finish_output(fp, common_output); 102896845Smarkm exit(exit_code); 102996845Smarkm} 1030