makewhatis.c revision 97102
1229430Spfg/*- 2229430Spfg * Copyright (c) 2002 John Rochester 3229430Spfg * All rights reserved. 4229430Spfg * 5229430Spfg * Redistribution and use in source and binary forms, with or without 6229430Spfg * modification, are permitted provided that the following conditions 7229430Spfg * are met: 8229430Spfg * 1. Redistributions of source code must retain the above copyright 9229430Spfg * notice, this list of conditions and the following disclaimer, 10229430Spfg * in this position and unchanged. 11229430Spfg * 2. Redistributions in binary form must reproduce the above copyright 12229430Spfg * notice, this list of conditions and the following disclaimer in the 13229430Spfg * documentation and/or other materials provided with the distribution. 14229430Spfg * 3. The name of the author may not be used to endorse or promote products 15229430Spfg * derived from this software without specific prior written permission 16229430Spfg * 17229430Spfg * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18229430Spfg * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19229430Spfg * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20229430Spfg * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 21229430Spfg * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 22229430Spfg * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23229430Spfg * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24229430Spfg * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25229430Spfg * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26229430Spfg * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27229430Spfg */ 28229430Spfg 29229430Spfg#include <sys/cdefs.h> 30229430Spfg__FBSDID("$FreeBSD: head/usr.bin/makewhatis/makewhatis.c 97102 2002-05-22 11:08:41Z ru $"); 31229430Spfg 32229430Spfg#include <sys/types.h> 33229430Spfg#include <sys/stat.h> 34229430Spfg#include <sys/param.h> 35229430Spfg#include <sys/queue.h> 36229430Spfg 37229430Spfg#include <ctype.h> 38229430Spfg#include <dirent.h> 39229430Spfg#include <err.h> 40229430Spfg#include <stdio.h> 41229430Spfg#include <stdlib.h> 42229430Spfg#include <string.h> 43229430Spfg#include <stringlist.h> 44229430Spfg#include <unistd.h> 45229430Spfg#include <zlib.h> 46229430Spfg 47229430Spfg#define DEFAULT_MANPATH "/usr/share/man" 48229430Spfg#define LINE_ALLOC 4096 49229430Spfg 50229430Spfgstatic char blank[] = ""; 51229430Spfg 52229430Spfg/* 53229430Spfg * Information collected about each man page in a section. 54229430Spfg */ 55229430Spfgstruct page_info { 56229430Spfg char * filename; 57229430Spfg char * name; 58229430Spfg char * suffix; 59229430Spfg int gzipped; 60229430Spfg ino_t inode; 61229430Spfg}; 62229430Spfg 63229430Spfg/* 64229430Spfg * An entry kept for each visited directory. 65229430Spfg */ 66229430Spfgstruct visited_dir { 67229430Spfg dev_t device; 68229430Spfg ino_t inode; 69229430Spfg SLIST_ENTRY(visited_dir) next; 70229430Spfg}; 71229430Spfg 72229430Spfg/* 73229430Spfg * an expanding string 74229430Spfg */ 75229430Spfgstruct sbuf { 76229430Spfg char * content; /* the start of the buffer */ 77229430Spfg char * end; /* just past the end of the content */ 78229430Spfg char * last; /* the last allocated character */ 79229430Spfg}; 80229430Spfg 81229430Spfg/* 82229430Spfg * Removes the last amount characters from the sbuf. 83229430Spfg */ 84229430Spfg#define sbuf_retract(sbuf, amount) \ 85229430Spfg ((sbuf)->end -= (amount)) 86229430Spfg/* 87229430Spfg * Returns the length of the sbuf content. 88229430Spfg */ 89229430Spfg#define sbuf_length(sbuf) \ 90229430Spfg ((sbuf)->end - (sbuf)->content) 91229430Spfg 92229430Spfgtypedef char *edited_copy(char *from, char *to, int length); 93229430Spfg 94229430Spfgstatic int append; /* -a flag: append to existing whatis */ 95229430Spfgstatic int verbose; /* -v flag: be verbose with warnings */ 96229430Spfgstatic int indent = 24; /* -i option: description indentation */ 97229430Spfgstatic const char *whatis_name="whatis";/* -n option: the name */ 98229430Spfgstatic char *common_output; /* -o option: the single output file */ 99229430Spfgstatic char *locale; /* user's locale if -L is used */ 100229430Spfgstatic char *lang_locale; /* short form of locale */ 101229430Spfgstatic char *machine; 102229430Spfg 103229430Spfgstatic int exit_code; /* exit code to use when finished */ 104229430Spfgstatic SLIST_HEAD(, visited_dir) visited_dirs = 105229430Spfg SLIST_HEAD_INITIALIZER(visited_dirs); 106229430Spfg 107229430Spfg/* 108229430Spfg * While the whatis line is being formed, it is stored in whatis_proto. 109229430Spfg * When finished, it is reformatted into whatis_final and then appended 110229430Spfg * to whatis_lines. 111229430Spfg */ 112229430Spfgstatic struct sbuf *whatis_proto; 113229430Spfgstatic struct sbuf *whatis_final; 114229430Spfgstatic StringList *whatis_lines; /* collected output lines */ 115229430Spfg 116229430Spfgstatic char tmp_file[MAXPATHLEN]; /* path of temporary file, if any */ 117229430Spfg 118229430Spfg/* A set of possible names for the NAME man page section */ 119229430Spfgstatic const char *name_section_titles[] = { 120229430Spfg "NAME", "Name", "NAMN", "BEZEICHNUNG", "\xcc\xbe\xbe\xce", 121229430Spfg "\xee\xe1\xfa\xf7\xe1\xee\xe9\xe5", NULL 122229430Spfg}; 123229430Spfg 124229430Spfg/* A subset of the mdoc(7) commands to ignore */ 125229430Spfgstatic char mdoc_commands[] = "ArDvErEvFlLiNmPa"; 126229430Spfg 127229430Spfg/* 128229430Spfg * Frees a struct page_info and its content. 129229430Spfg */ 130229430Spfgstatic void 131229430Spfgfree_page_info(struct page_info *info) 132229430Spfg{ 133229430Spfg free(info->filename); 134229430Spfg free(info->name); 135229430Spfg free(info->suffix); 136229430Spfg free(info); 137229430Spfg} 138229430Spfg 139229430Spfg/* 140229430Spfg * Allocates and fills in a new struct page_info given the 141229430Spfg * name of the man section directory and the dirent of the file. 142229430Spfg * If the file is not a man page, returns NULL. 143229430Spfg */ 144229430Spfgstatic struct page_info * 145229430Spfgnew_page_info(char *dir, struct dirent *dirent) 146229430Spfg{ 147229430Spfg struct page_info *info; 148229430Spfg int basename_length; 149229430Spfg char *suffix; 150229430Spfg struct stat st; 151229430Spfg 152229430Spfg info = (struct page_info *) malloc(sizeof(struct page_info)); 153229430Spfg if (info == NULL) 154229430Spfg err(1, "malloc"); 155229430Spfg basename_length = strlen(dirent->d_name); 156229430Spfg suffix = &dirent->d_name[basename_length]; 157229430Spfg asprintf(&info->filename, "%s/%s", dir, dirent->d_name); 158229430Spfg if ((info->gzipped = basename_length >= 4 && strcmp(&dirent->d_name[basename_length - 3], ".gz") == 0)) { 159229430Spfg suffix -= 3; 160229430Spfg *suffix = '\0'; 161229430Spfg } 162229430Spfg for (;;) { 163229430Spfg if (--suffix == dirent->d_name || !isalnum(*suffix)) { 164229430Spfg if (*suffix == '.') 165229430Spfg break; 166229430Spfg if (verbose) 167229430Spfg warnx("%s: invalid man page name", info->filename); 168229430Spfg free(info->filename); 169229430Spfg free(info); 170229430Spfg return NULL; 171229981Spfg } 172229981Spfg } 173229430Spfg *suffix++ = '\0'; 174229430Spfg info->name = strdup(dirent->d_name); 175229430Spfg info->suffix = strdup(suffix); 176229430Spfg if (stat(info->filename, &st) < 0) { 177229430Spfg warn("%s", info->filename); 178229430Spfg free_page_info(info); 179229430Spfg return NULL; 180229430Spfg } 181229430Spfg if (!S_ISREG(st.st_mode)) { 182229430Spfg if (verbose && !S_ISDIR(st.st_mode)) 183229430Spfg warnx("%s: not a regular file", info->filename); 184229430Spfg free_page_info(info); 185229430Spfg return NULL; 186229430Spfg } 187229430Spfg info->inode = st.st_ino; 188229430Spfg return info; 189229430Spfg} 190229430Spfg 191229430Spfg/* 192229430Spfg * Reset an sbuf's length to 0. 193229430Spfg */ 194229430Spfgstatic void 195229430Spfgsbuf_clear(struct sbuf *sbuf) 196229430Spfg{ 197229430Spfg sbuf->end = sbuf->content; 198229430Spfg} 199229430Spfg 200229430Spfg/* 201229430Spfg * Allocate a new sbuf. 202229430Spfg */ 203229430Spfgstatic struct sbuf * 204229430Spfgnew_sbuf(void) 205229430Spfg{ 206229430Spfg struct sbuf *sbuf = (struct sbuf *) malloc(sizeof(struct sbuf)); 207229430Spfg sbuf->content = (char *) malloc(LINE_ALLOC); 208229430Spfg sbuf->last = sbuf->content + LINE_ALLOC - 1; 209229430Spfg sbuf_clear(sbuf); 210229430Spfg return sbuf; 211229430Spfg} 212229430Spfg 213229430Spfg/* 214229430Spfg * Ensure that there is enough room in the sbuf for chars more characters. 215229430Spfg */ 216229430Spfgstatic void 217229430Spfgsbuf_need(struct sbuf *sbuf, int nchars) 218229430Spfg{ 219229430Spfg /* let's assume we only need to double it, but check just in case */ 220229430Spfg while (sbuf->end + nchars > sbuf->last) { 221229430Spfg int alloc; 222229430Spfg char *new_content; 223229430Spfg 224229430Spfg alloc = (sbuf->last - sbuf->content + 1) * 2; 225229430Spfg new_content = (char *) malloc(alloc); 226229430Spfg memcpy(new_content, sbuf->content, sbuf->end - sbuf->content); 227229430Spfg sbuf->end = new_content + (sbuf->end - sbuf->content); 228229430Spfg free(sbuf->content); 229229430Spfg sbuf->content = new_content; 230229430Spfg } 231229430Spfg} 232229430Spfg 233229430Spfg/* 234229430Spfg * Appends a string of a given length to the sbuf. 235229430Spfg */ 236229430Spfgstatic void 237229430Spfgsbuf_append(struct sbuf *sbuf, const char *text, int length) 238229430Spfg{ 239229430Spfg if (length > 0) { 240229430Spfg sbuf_need(sbuf, length); 241229430Spfg memcpy(sbuf->end, text, length); 242229430Spfg sbuf->end += length; 243229430Spfg } 244229430Spfg} 245229430Spfg 246229430Spfg/* 247229430Spfg * Appends a null-terminated string to the sbuf. 248229430Spfg */ 249229430Spfgstatic void 250229430Spfgsbuf_append_str(struct sbuf *sbuf, char *text) 251229430Spfg{ 252229430Spfg sbuf_append(sbuf, text, strlen(text)); 253229430Spfg} 254229430Spfg 255229430Spfg/* 256229430Spfg * Appends an edited null-terminated string to the sbuf. 257229430Spfg */ 258229430Spfgstatic void 259229430Spfgsbuf_append_edited(struct sbuf *sbuf, char *text, edited_copy copy) 260229430Spfg{ 261229430Spfg int length = strlen(text); 262229430Spfg if (length > 0) { 263229430Spfg sbuf_need(sbuf, length); 264229430Spfg sbuf->end = copy(text, sbuf->end, length); 265229430Spfg } 266229430Spfg} 267229430Spfg 268229430Spfg/* 269229430Spfg * Strips any of a set of chars from the end of the sbuf. 270229430Spfg */ 271229430Spfgstatic void 272229430Spfgsbuf_strip(struct sbuf *sbuf, const char *set) 273229430Spfg{ 274229430Spfg while (sbuf->end > sbuf->content && strchr(set, sbuf->end[-1]) != NULL) 275229430Spfg sbuf->end--; 276229430Spfg} 277229430Spfg 278229430Spfg/* 279229430Spfg * Returns the null-terminated string built by the sbuf. 280229430Spfg */ 281229430Spfgstatic char * 282229430Spfgsbuf_content(struct sbuf *sbuf) 283229430Spfg{ 284229430Spfg *sbuf->end = '\0'; 285229430Spfg return sbuf->content; 286229430Spfg} 287229430Spfg 288229430Spfg/* 289229430Spfg * Returns true if no man page exists in the directory with 290229430Spfg * any of the names in the StringList. 291229430Spfg */ 292229430Spfgstatic int 293229430Spfgno_page_exists(char *dir, StringList *names, char *suffix) 294229430Spfg{ 295229430Spfg char path[MAXPATHLEN]; 296229430Spfg int i; 297229430Spfg 298229430Spfg for (i = 0; i < names->sl_cur; i++) { 299229430Spfg snprintf(path, sizeof path, "%s/%s.%s.gz", dir, names->sl_str[i], suffix); 300229430Spfg if (access(path, F_OK) < 0) { 301229430Spfg path[strlen(path) - 3] = '\0'; 302229430Spfg if (access(path, F_OK) < 0) 303229430Spfg continue; 304229430Spfg } 305229430Spfg return 0; 306229430Spfg } 307229430Spfg return 1; 308229430Spfg} 309229430Spfg 310229430Spfgstatic void 311229430Spfgtrap_signal(int sig __unused) 312229430Spfg{ 313229430Spfg if (tmp_file[0] != '\0') 314229430Spfg unlink(tmp_file); 315229430Spfg exit(1); 316229430Spfg} 317229430Spfg 318229430Spfg/* 319229430Spfg * Attempts to open an output file. Returns NULL if unsuccessful. 320229430Spfg */ 321229430Spfgstatic FILE * 322229430Spfgopen_output(char *name) 323229430Spfg{ 324229430Spfg FILE *output; 325229430Spfg 326229430Spfg whatis_lines = sl_init(); 327229430Spfg if (append) { 328229430Spfg char line[LINE_ALLOC]; 329229430Spfg 330229430Spfg output = fopen(name, "r"); 331229430Spfg if (output == NULL) { 332229430Spfg warn("%s", name); 333229430Spfg exit_code = 1; 334229430Spfg return NULL; 335229430Spfg } 336229430Spfg while (fgets(line, sizeof line, output) != NULL) { 337229430Spfg line[strlen(line) - 1] = '\0'; 338229430Spfg sl_add(whatis_lines, strdup(line)); 339229430Spfg } 340229430Spfg } 341229430Spfg if (common_output == NULL) { 342229430Spfg snprintf(tmp_file, sizeof tmp_file, "%s.tmp", name); 343229430Spfg name = tmp_file; 344229430Spfg } 345229430Spfg output = fopen(name, "w"); 346229430Spfg if (output == NULL) { 347229430Spfg warn("%s", name); 348229430Spfg exit_code = 1; 349229430Spfg return NULL; 350229430Spfg } 351229430Spfg return output; 352229430Spfg} 353229430Spfg 354229430Spfgstatic int 355229430Spfglinesort(const void *a, const void *b) 356229430Spfg{ 357229430Spfg return strcmp((const char *)(*(const char **)a), (const char *)(*(const char **)b)); 358229430Spfg} 359229430Spfg 360229430Spfg/* 361229430Spfg * Writes the unique sorted lines to the output file. 362229430Spfg */ 363229430Spfgstatic void 364229430Spfgfinish_output(FILE *output, char *name) 365229430Spfg{ 366229430Spfg int i; 367229430Spfg char *prev = NULL; 368229430Spfg 369229430Spfg qsort(whatis_lines->sl_str, whatis_lines->sl_cur, sizeof(char *), linesort); 370229430Spfg for (i = 0; i < whatis_lines->sl_cur; i++) { 371229430Spfg char *line = whatis_lines->sl_str[i]; 372229430Spfg if (i > 0 && strcmp(line, prev) == 0) 373229430Spfg continue; 374229430Spfg prev = line; 375229430Spfg fputs(line, output); 376229430Spfg putc('\n', output); 377229430Spfg } 378229430Spfg fclose(output); 379229430Spfg sl_free(whatis_lines, 1); 380229430Spfg if (common_output == NULL) { 381229430Spfg rename(tmp_file, name); 382229430Spfg unlink(tmp_file); 383229430Spfg } 384229430Spfg} 385229430Spfg 386229430Spfgstatic FILE * 387229430Spfgopen_whatis(char *mandir) 388229430Spfg{ 389229430Spfg char filename[MAXPATHLEN]; 390229430Spfg 391229430Spfg snprintf(filename, sizeof filename, "%s/%s", mandir, whatis_name); 392229430Spfg return open_output(filename); 393229430Spfg} 394229430Spfg 395229430Spfgstatic void 396229430Spfgfinish_whatis(FILE *output, char *mandir) 397229430Spfg{ 398229430Spfg char filename[MAXPATHLEN]; 399229430Spfg 400229430Spfg snprintf(filename, sizeof filename, "%s/%s", mandir, whatis_name); 401229430Spfg finish_output(output, filename); 402229430Spfg} 403229430Spfg 404229430Spfg/* 405229430Spfg * Tests to see if the given directory has already been visited. 406229430Spfg */ 407229430Spfgstatic int 408229430Spfgalready_visited(char *dir) 409229430Spfg{ 410229430Spfg struct stat st; 411229430Spfg struct visited_dir *visit; 412229430Spfg 413229430Spfg if (stat(dir, &st) < 0) { 414229430Spfg warn("%s", dir); 415229430Spfg exit_code = 1; 416229430Spfg return 1; 417229430Spfg } 418229430Spfg SLIST_FOREACH(visit, &visited_dirs, next) { 419229430Spfg if (visit->inode == st.st_ino && 420229430Spfg visit->device == st.st_dev) { 421229430Spfg warnx("already visited %s", dir); 422229430Spfg return 1; 423229430Spfg } 424229430Spfg } 425229430Spfg visit = (struct visited_dir *) malloc(sizeof(struct visited_dir)); 426229430Spfg visit->device = st.st_dev; 427229430Spfg visit->inode = st.st_ino; 428229430Spfg SLIST_INSERT_HEAD(&visited_dirs, visit, next); 429229430Spfg return 0; 430229430Spfg} 431229430Spfg 432229430Spfg/* 433229430Spfg * Removes trailing spaces from a string, returning a pointer to just 434229430Spfg * beyond the new last character. 435229430Spfg */ 436229430Spfgstatic char * 437229430Spfgtrim_rhs(char *str) 438229430Spfg{ 439229430Spfg char *rhs = &str[strlen(str)]; 440229430Spfg while (--rhs > str && isspace(*rhs)) 441229430Spfg ; 442229430Spfg *++rhs = '\0'; 443229430Spfg return rhs; 444229430Spfg} 445229430Spfg 446229430Spfg/* 447229430Spfg * Returns a pointer to the next non-space character in the string. 448229430Spfg */ 449229430Spfgstatic char * 450229430Spfgskip_spaces(char *s) 451229430Spfg{ 452229430Spfg while (*s != '\0' && isspace(*s)) 453229430Spfg s++; 454229430Spfg return s; 455229430Spfg} 456229430Spfg 457229430Spfg/* 458229430Spfg * Returns whether the string contains only digits. 459229430Spfg */ 460229430Spfgstatic int 461229430Spfgonly_digits(char *line) 462229430Spfg{ 463229430Spfg if (!isdigit(*line++)) 464229430Spfg return 0; 465229430Spfg while (isdigit(*line)) 466229430Spfg line++; 467229430Spfg return *line == '\0'; 468229430Spfg} 469229430Spfg 470229430Spfg/* 471229430Spfg * Returns whether the line is of one of the forms: 472229430Spfg * .Sh NAME 473229430Spfg * .Sh "NAME" 474229430Spfg * etc. 475229430Spfg * assuming that section_start is ".Sh". 476229430Spfg */ 477229430Spfgstatic int 478229430Spfgname_section_line(char *line, const char *section_start) 479229430Spfg{ 480229430Spfg char *rhs; 481229430Spfg const char **title; 482229430Spfg 483229430Spfg if (strncmp(line, section_start, 3) != 0) 484229430Spfg return 0; 485229430Spfg line = skip_spaces(line + 3); 486229430Spfg rhs = trim_rhs(line); 487229430Spfg if (*line == '"') { 488229430Spfg line++; 489229430Spfg if (*--rhs == '"') 490229430Spfg *rhs = '\0'; 491229430Spfg } 492229430Spfg for (title = name_section_titles; *title != NULL; title++) 493229430Spfg if (strcmp(*title, line) == 0) 494229430Spfg return 1; 495229430Spfg return 0; 496229430Spfg} 497229430Spfg 498229430Spfg/* 499229430Spfg * Copies characters while removing the most common nroff/troff 500229430Spfg * markup: 501229430Spfg * \(em, \(mi, \s[+-N], \& 502229430Spfg * \fF, \f(fo, \f[font] 503229430Spfg * \*s, \*(st, \*[stringvar] 504229430Spfg */ 505229430Spfgstatic char * 506229430Spfgde_nroff_copy(char *from, char *to, int fromlen) 507229430Spfg{ 508229430Spfg char *from_end = &from[fromlen]; 509229430Spfg while (from < from_end) { 510229430Spfg switch (*from) { 511229430Spfg case '\\': 512229430Spfg switch (*++from) { 513229430Spfg case '(': 514229430Spfg if (strncmp(&from[1], "em", 2) == 0 || 515229430Spfg strncmp(&from[1], "mi", 2) == 0) { 516229430Spfg from += 3; 517229430Spfg continue; 518229430Spfg } 519229430Spfg break; 520229430Spfg case 's': 521229430Spfg if (*++from == '-') 522229430Spfg from++; 523229430Spfg while (isdigit(*from)) 524229430Spfg from++; 525229430Spfg continue; 526229430Spfg case 'f': 527229430Spfg case '*': 528229430Spfg if (*++from == '(') 529229430Spfg from += 3; 530229430Spfg else if (*from == '[') { 531229430Spfg while (*++from != ']' && from < from_end); 532229430Spfg from++; 533229430Spfg } else 534229430Spfg from++; 535229981Spfg continue; 536229981Spfg case '&': 537229430Spfg from++; 538229430Spfg continue; 539229430Spfg } 540229430Spfg break; 541229430Spfg } 542229430Spfg *to++ = *from++; 543229430Spfg } 544229430Spfg return to; 545229430Spfg} 546229430Spfg 547229430Spfg/* 548229430Spfg * Appends a string with the nroff formatting removed. 549229430Spfg */ 550229430Spfgstatic void 551229430Spfgadd_nroff(char *text) 552229430Spfg{ 553229430Spfg sbuf_append_edited(whatis_proto, text, de_nroff_copy); 554229430Spfg} 555229430Spfg 556229430Spfg/* 557229430Spfg * Appends "name(suffix), " to whatis_final. 558229430Spfg */ 559229430Spfgstatic void 560229430Spfgadd_whatis_name(char *name, char *suffix) 561229430Spfg{ 562229430Spfg if (*name != '\0') { 563229430Spfg sbuf_append_str(whatis_final, name); 564229430Spfg sbuf_append(whatis_final, "(", 1); 565229430Spfg sbuf_append_str(whatis_final, suffix); 566229430Spfg sbuf_append(whatis_final, "), ", 3); 567229430Spfg } 568229430Spfg} 569229430Spfg 570229430Spfg/* 571229430Spfg * Processes an old-style man(7) line. This ignores commands with only 572229430Spfg * a single number argument. 573229430Spfg */ 574229430Spfgstatic void 575229430Spfgprocess_man_line(char *line) 576229430Spfg{ 577229430Spfg if (*line == '.') { 578229430Spfg while (isalpha(*++line)) 579229430Spfg ; 580229430Spfg line = skip_spaces(line); 581229430Spfg if (only_digits(line)) 582229430Spfg return; 583229430Spfg } else 584229430Spfg line = skip_spaces(line); 585229430Spfg if (*line != '\0') { 586229430Spfg add_nroff(line); 587229430Spfg sbuf_append(whatis_proto, " ", 1); 588229430Spfg } 589229430Spfg} 590229430Spfg 591229430Spfg/* 592229430Spfg * Processes a new-style mdoc(7) line. 593229430Spfg */ 594229430Spfgstatic void 595229430Spfgprocess_mdoc_line(char *line) 596229430Spfg{ 597229430Spfg int xref; 598229430Spfg int arg = 0; 599229430Spfg char *line_end = &line[strlen(line)]; 600229430Spfg int orig_length = sbuf_length(whatis_proto); 601229430Spfg char *next; 602229430Spfg 603229430Spfg if (*line == '\0') 604229430Spfg return; 605229430Spfg if (line[0] != '.' || !isupper(line[1]) || !islower(line[2])) { 606229430Spfg add_nroff(skip_spaces(line)); 607229430Spfg sbuf_append(whatis_proto, " ", 1); 608229430Spfg return; 609229430Spfg } 610229430Spfg xref = strncmp(line, ".Xr", 3) == 0; 611229430Spfg line += 3; 612229430Spfg while ((line = skip_spaces(line)) < line_end) { 613229430Spfg if (*line == '"') { 614229430Spfg next = ++line; 615229430Spfg for (;;) { 616229430Spfg next = strchr(next, '"'); 617229430Spfg if (next == NULL) 618229430Spfg break; 619229430Spfg strcpy(next, &next[1]); 620229430Spfg line_end--; 621229430Spfg if (*next != '"') 622229430Spfg break; 623229430Spfg next++; 624229430Spfg } 625229430Spfg } else 626229430Spfg next = strpbrk(line, " \t"); 627229430Spfg if (next != NULL) 628229430Spfg *next++ = '\0'; 629229430Spfg else 630229430Spfg next = line_end; 631229430Spfg if (isupper(*line) && islower(line[1]) && line[2] == '\0') { 632229430Spfg if (strcmp(line, "Ns") == 0) { 633229430Spfg arg = 0; 634229430Spfg line = next; 635229430Spfg continue; 636229430Spfg } 637229430Spfg if (strstr(mdoc_commands, line) != NULL) { 638229430Spfg line = next; 639229430Spfg continue; 640229430Spfg } 641229430Spfg } 642229430Spfg if (arg > 0 && strchr(",.:;?!)]", *line) == 0) { 643229430Spfg if (xref) { 644229430Spfg sbuf_append(whatis_proto, "(", 1); 645229430Spfg add_nroff(line); 646229430Spfg sbuf_append(whatis_proto, ")", 1); 647229430Spfg xref = 0; 648229430Spfg line = blank; 649229430Spfg } else 650229430Spfg sbuf_append(whatis_proto, " ", 1); 651229430Spfg } 652229430Spfg add_nroff(line); 653229430Spfg arg++; 654229430Spfg line = next; 655229430Spfg } 656229430Spfg if (sbuf_length(whatis_proto) > orig_length) 657229430Spfg sbuf_append(whatis_proto, " ", 1); 658229430Spfg} 659229430Spfg 660229430Spfg/* 661229430Spfg * Collects a list of comma-separated names from the text. 662229430Spfg */ 663229430Spfgstatic void 664229430Spfgcollect_names(StringList *names, char *text) 665229430Spfg{ 666229430Spfg char *arg; 667229430Spfg 668229430Spfg for (;;) { 669229430Spfg arg = text; 670229430Spfg text = strchr(text, ','); 671229430Spfg if (text != NULL) 672229430Spfg *text++ = '\0'; 673229430Spfg sl_add(names, arg); 674229430Spfg if (text == NULL) 675229430Spfg return; 676229430Spfg if (*text == ' ') 677229430Spfg text++; 678229430Spfg } 679229430Spfg} 680229430Spfg 681229430Spfgenum { STATE_UNKNOWN, STATE_MANSTYLE, STATE_MDOCNAME, STATE_MDOCDESC }; 682229430Spfg 683229430Spfg/* 684229430Spfg * Processes a man page source into a single whatis line and adds it 685229430Spfg * to whatis_lines. 686229430Spfg */ 687229430Spfgstatic void 688229430Spfgprocess_page(struct page_info *page, char *section_dir) 689229430Spfg{ 690 gzFile *in; 691 char buffer[4096]; 692 char *line; 693 StringList *names; 694 char *descr; 695 int state = STATE_UNKNOWN; 696 int i; 697 698 sbuf_clear(whatis_proto); 699 if ((in = gzopen(page->filename, "r")) == NULL) { 700 warn("%s", page->filename); 701 exit_code = 1; 702 return; 703 } 704 while (gzgets(in, buffer, sizeof buffer) != NULL) { 705 line = buffer; 706 if (strncmp(line, ".\\\"", 3) == 0) /* ignore comments */ 707 continue; 708 switch (state) { 709 /* 710 * haven't reached the NAME section yet. 711 */ 712 case STATE_UNKNOWN: 713 if (name_section_line(line, ".SH")) 714 state = STATE_MANSTYLE; 715 else if (name_section_line(line, ".Sh")) 716 state = STATE_MDOCNAME; 717 continue; 718 /* 719 * Inside an old-style .SH NAME section. 720 */ 721 case STATE_MANSTYLE: 722 if (strncmp(line, ".SH", 3) == 0) 723 break; 724 trim_rhs(line); 725 if (strcmp(line, ".") == 0) 726 continue; 727 if (strncmp(line, ".IX", 3) == 0) { 728 line += 3; 729 line = skip_spaces(line); 730 } 731 process_man_line(line); 732 continue; 733 /* 734 * Inside a new-style .Sh NAME section (the .Nm part). 735 */ 736 case STATE_MDOCNAME: 737 trim_rhs(line); 738 if (strncmp(line, ".Nm", 3) == 0) { 739 process_mdoc_line(line); 740 continue; 741 } else { 742 if (strcmp(line, ".") == 0) 743 continue; 744 sbuf_append(whatis_proto, "- ", 2); 745 state = STATE_MDOCDESC; 746 } 747 /* fall through */ 748 /* 749 * Inside a new-style .Sh NAME section (after the .Nm-s). 750 */ 751 case STATE_MDOCDESC: 752 if (strncmp(line, ".Sh", 3) == 0) 753 break; 754 trim_rhs(line); 755 if (strcmp(line, ".") == 0) 756 continue; 757 process_mdoc_line(line); 758 continue; 759 } 760 break; 761 } 762 gzclose(in); 763 sbuf_strip(whatis_proto, " \t.-"); 764 line = sbuf_content(whatis_proto); 765 /* 766 * line now contains the appropriate data, but without 767 * the proper indentation or the section appended to each name. 768 */ 769 descr = strstr(line, " - "); 770 if (descr == NULL) { 771 descr = strchr(line, ' '); 772 if (descr == NULL) { 773 if (verbose) 774 fprintf(stderr, " ignoring junk description \"%s\"\n", line); 775 return; 776 } 777 *descr++ = '\0'; 778 } else { 779 *descr = '\0'; 780 descr += 3; 781 } 782 names = sl_init(); 783 collect_names(names, line); 784 sbuf_clear(whatis_final); 785 if (!sl_find(names, page->name) && no_page_exists(section_dir, names, page->suffix)) { 786 /* 787 * Add the page name since that's the only thing that 788 * man(1) will find. 789 */ 790 add_whatis_name(page->name, page->suffix); 791 } 792 for (i = 0; i < names->sl_cur; i++) 793 add_whatis_name(names->sl_str[i], page->suffix); 794 sl_free(names, 0); 795 sbuf_retract(whatis_final, 2); /* remove last ", " */ 796 while (sbuf_length(whatis_final) < indent) 797 sbuf_append(whatis_final, " ", 1); 798 sbuf_append(whatis_final, " - ", 3); 799 sbuf_append_str(whatis_final, skip_spaces(descr)); 800 sl_add(whatis_lines, strdup(sbuf_content(whatis_final))); 801} 802 803/* 804 * Sorts pages first by inode number, then by name. 805 */ 806static int 807pagesort(const void *a, const void *b) 808{ 809 struct page_info *p1 = *(struct page_info **) a; 810 struct page_info *p2 = *(struct page_info **) b; 811 if (p1->inode == p2->inode) 812 return strcmp(p1->name, p2->name); 813 return p1->inode - p2->inode; 814} 815 816/* 817 * Processes a single man section. 818 */ 819static void 820process_section(char *section_dir) 821{ 822 struct dirent **entries; 823 int nentries; 824 struct page_info **pages; 825 int npages = 0; 826 int i; 827 int prev_inode = 0; 828 829 if (verbose) 830 fprintf(stderr, " %s\n", section_dir); 831 832 /* 833 * scan the man section directory for pages 834 */ 835 nentries = scandir(section_dir, &entries, NULL, alphasort); 836 if (nentries < 0) { 837 warn("%s", section_dir); 838 exit_code = 1; 839 return; 840 } 841 /* 842 * collect information about man pages 843 */ 844 pages = (struct page_info **) calloc(nentries, sizeof(struct page_info *)); 845 for (i = 0; i < nentries; i++) { 846 struct page_info *info = new_page_info(section_dir, entries[i]); 847 if (info != NULL) 848 pages[npages++] = info; 849 free(entries[i]); 850 } 851 free(entries); 852 qsort(pages, npages, sizeof(struct page_info *), pagesort); 853 /* 854 * process each unique page 855 */ 856 for (i = 0; i < npages; i++) { 857 struct page_info *page = pages[i]; 858 if (page->inode != prev_inode) { 859 prev_inode = page->inode; 860 if (verbose) 861 fprintf(stderr, " reading %s\n", page->filename); 862 process_page(page, section_dir); 863 } else if (verbose) 864 fprintf(stderr, " skipping %s, duplicate\n", page->filename); 865 free_page_info(page); 866 } 867 free(pages); 868} 869 870/* 871 * Returns whether the directory entry is a man page section. 872 */ 873static int 874select_sections(struct dirent *entry) 875{ 876 char *p = &entry->d_name[3]; 877 878 if (strncmp(entry->d_name, "man", 3) != 0) 879 return 0; 880 while (*p != '\0') { 881 if (!isalnum(*p++)) 882 return 0; 883 } 884 return 1; 885} 886 887/* 888 * Processes a single top-level man directory by finding all the 889 * sub-directories named man* and processing each one in turn. 890 */ 891static void 892process_mandir(char *dir_name) 893{ 894 struct dirent **entries; 895 int nsections; 896 FILE *fp = NULL; 897 int i; 898 struct stat st; 899 900 if (already_visited(dir_name)) 901 return; 902 if (verbose) 903 fprintf(stderr, "man directory %s\n", dir_name); 904 nsections = scandir(dir_name, &entries, select_sections, alphasort); 905 if (nsections < 0) { 906 warn("%s", dir_name); 907 exit_code = 1; 908 return; 909 } 910 if (common_output == NULL && (fp = open_whatis(dir_name)) == NULL) 911 return; 912 for (i = 0; i < nsections; i++) { 913 char section_dir[MAXPATHLEN]; 914 snprintf(section_dir, sizeof section_dir, "%s/%s", dir_name, entries[i]->d_name); 915 process_section(section_dir); 916 snprintf(section_dir, sizeof section_dir, "%s/%s/%s", dir_name, 917 entries[i]->d_name, machine); 918 if (stat(section_dir, &st) == 0 && S_ISDIR(st.st_mode)) 919 process_section(section_dir); 920 free(entries[i]); 921 } 922 free(entries); 923 if (common_output == NULL) 924 finish_whatis(fp, dir_name); 925} 926 927/* 928 * Processes one argument, which may be a colon-separated list of 929 * directories. 930 */ 931static void 932process_argument(const char *arg) 933{ 934 char *dir; 935 char *mandir; 936 char *parg; 937 938 parg = strdup(arg); 939 if (parg == NULL) 940 err(1, "out of memory"); 941 while ((dir = strsep(&parg, ":")) != NULL) { 942 if (locale != NULL) { 943 asprintf(&mandir, "%s/%s", dir, locale); 944 process_mandir(mandir); 945 free(mandir); 946 if (lang_locale != NULL) { 947 asprintf(&mandir, "%s/%s", dir, lang_locale); 948 process_mandir(mandir); 949 free(mandir); 950 } 951 } else { 952 process_mandir(dir); 953 } 954 } 955 free(parg); 956} 957 958 959int 960main(int argc, char **argv) 961{ 962 int opt; 963 extern int optind; 964 extern char *optarg; 965 FILE *fp = NULL; 966 967 while ((opt = getopt(argc, argv, "ai:n:o:vL")) != -1) { 968 switch (opt) { 969 case 'a': 970 append++; 971 break; 972 case 'i': 973 indent = atoi(optarg); 974 break; 975 case 'n': 976 whatis_name = optarg; 977 break; 978 case 'o': 979 common_output = optarg; 980 break; 981 case 'v': 982 verbose++; 983 break; 984 case 'L': 985 locale = getenv("LC_ALL"); 986 if (locale == NULL) 987 locale = getenv("LC_CTYPE"); 988 if (locale == NULL) 989 locale = getenv("LANG"); 990 if (locale != NULL) { 991 char *sep = strchr(locale, '_'); 992 if (sep != NULL && isupper(sep[1]) && 993 isupper(sep[2])) { 994 asprintf(&lang_locale, "%.*s%s", sep - locale, locale, &sep[3]); 995 } 996 } 997 break; 998 default: 999 fprintf(stderr, "usage: %s [-a] [-i indent] [-n name] [-o output_file] [-v] [-L] [directories...]\n", argv[0]); 1000 exit(1); 1001 } 1002 } 1003 1004 signal(SIGINT, trap_signal); 1005 signal(SIGHUP, trap_signal); 1006 signal(SIGQUIT, trap_signal); 1007 signal(SIGTERM, trap_signal); 1008 SLIST_INIT(&visited_dirs); 1009 whatis_proto = new_sbuf(); 1010 whatis_final = new_sbuf(); 1011 1012 if ((machine = getenv("MACHINE")) == NULL) 1013 machine = MACHINE; 1014 1015 if (common_output != NULL && (fp = open_output(common_output)) == NULL) 1016 err(1, "%s", common_output); 1017 if (optind == argc) { 1018 const char *manpath = getenv("MANPATH"); 1019 if (manpath == NULL) 1020 manpath = DEFAULT_MANPATH; 1021 process_argument(manpath); 1022 } else { 1023 while (optind < argc) 1024 process_argument(argv[optind++]); 1025 } 1026 if (common_output != NULL) 1027 finish_output(fp, common_output); 1028 exit(exit_code); 1029} 1030