fmt.c revision 77807
1273806Snp/* $OpenBSD: fmt.c,v 1.16 2000/06/25 15:35:42 pjanzen Exp $ */ 2273806Snp 3273806Snp/* Sensible version of fmt 4273806Snp * 5273806Snp * Syntax: fmt [ options ] [ goal [ max ] ] [ filename ... ] 6273806Snp * 7273806Snp * Since the documentation for the original fmt is so poor, here 8273806Snp * is an accurate description of what this one does. It's usually 9273806Snp * the same. The *mechanism* used may differ from that suggested 10273806Snp * here. Note that we are *not* entirely compatible with fmt, 11273806Snp * because fmt gets so many things wrong. 12273806Snp * 13273806Snp * 1. Tabs are expanded, assuming 8-space tab stops. 14273806Snp * If the `-t <n>' option is given, we assume <n>-space 15273806Snp * tab stops instead. 16273806Snp * Trailing blanks are removed from all lines. 17273806Snp * x\b == nothing, for any x other than \b. 18273806Snp * Other control characters are simply stripped. This 19273806Snp * includes \r. 20273806Snp * 2. Each line is split into leading whitespace and 21273806Snp * everything else. Maximal consecutive sequences of 22273806Snp * lines with the same leading whitespace are considered 23273806Snp * to form paragraphs, except that a blank line is always 24273806Snp * a paragraph to itself. 25273806Snp * If the `-p' option is given then the first line of a 26273806Snp * paragraph is permitted to have indentation different 27273806Snp * from that of the other lines. 28273806Snp * If the `-m' option is given then a line that looks 29273806Snp * like a mail message header, if it is not immediately 30273806Snp * preceded by a non-blank non-message-header line, is 31273806Snp * taken to start a new paragraph, which also contains 32273806Snp * any subsequent lines with non-empty leading whitespace. 33273806Snp * 3. The "everything else" is split into words; a word 34273806Snp * includes its trailing whitespace, and a word at the 35273806Snp * end of a line is deemed to be followed by a single 36273806Snp * space, or two spaces if it ends with a sentence-end 37273806Snp * character. (See the `-d' option for how to change that.) 38273806Snp * If the `-s' option has been given, then a word's trailing 39273806Snp * whitespace is replaced by what it would have had if it 40273806Snp * had occurred at end of line. 41273806Snp * 4. Each paragraph is sent to standard output as follows. 42273806Snp * We output the leading whitespace, and then enough words 43273806Snp * to make the line length as near as possible to the goal 44273806Snp * without exceeding the maximum. (If a single word would 45273806Snp * exceed the maximum, we output that anyway.) Of course 46273806Snp * the trailing whitespace of the last word is ignored. 47273806Snp * We then emit a newline and start again if there are any 48273806Snp * words left. 49273806Snp * Note that for a blank line this translates as "We emit 50273806Snp * a newline". 51273806Snp * If the `-l <n>' option is given, then leading whitespace 52273806Snp * is modified slightly: <n> spaces are replaced by a tab. 53273806Snp * Indented paragraphs (see above under `-p') make matters 54273806Snp * more complicated than this suggests. Actually every paragraph 55273806Snp * has two `leading whitespace' values; the value for the first 56273806Snp * line, and the value for the most recent line. (While processing 57273806Snp * the first line, the two are equal. When `-p' has not been 58273806Snp * given, they are always equal.) The leading whitespace 59273806Snp * actually output is that of the first line (for the first 60273806Snp * line of *output*) or that of the most recent line (for 61273806Snp * all other lines of output). 62273806Snp * When `-m' has been given, message header paragraphs are 63273806Snp * taken as having first-leading-whitespace empty and 64273806Snp * subsequent-leading-whitespace two spaces. 65273806Snp * 66273806Snp * Multiple input files are formatted one at a time, so that a file 67273806Snp * never ends in the middle of a line. 68273806Snp * 69273806Snp * There's an alternative mode of operation, invoked by giving 70273806Snp * the `-c' option. In that case we just center every line, 71273806Snp * and most of the other options are ignored. This should 72273806Snp * really be in a separate program, but we must stay compatible 73273806Snp * with old `fmt'. 74273806Snp * 75273806Snp * QUERY: Should `-m' also try to do the right thing with quoted text? 76273806Snp * QUERY: `-b' to treat backslashed whitespace as old `fmt' does? 77273806Snp * QUERY: Option meaning `never join lines'? 78273806Snp * QUERY: Option meaning `split in mid-word to avoid overlong lines'? 79273806Snp * (Those last two might not be useful, since we have `fold'.) 80273806Snp * 81273806Snp * Differences from old `fmt': 82273806Snp * 83273806Snp * - We have many more options. Options that aren't understood 84273806Snp * generate a lengthy usage message, rather than being 85273806Snp * treated as filenames. 86273806Snp * - Even with `-m', our handling of message headers is 87273806Snp * significantly different. (And much better.) 88273806Snp * - We don't treat `\ ' as non-word-breaking. 89273806Snp * - Downward changes of indentation start new paragraphs 90273806Snp * for us, as well as upward. (I think old `fmt' behaves 91273806Snp * in the way it does in order to allow indented paragraphs, 92273806Snp * but this is a broken way of making indented paragraphs 93273806Snp * behave right.) 94273806Snp * - Given the choice of going over or under |goal_length| 95273806Snp * by the same amount, we go over; old `fmt' goes under. 96273806Snp * - We treat `?' as ending a sentence, and not `:'. Old `fmt' 97273806Snp * does the reverse. 98273806Snp * - We return approved return codes. Old `fmt' returns 99273806Snp * 1 for some errors, and *the number of unopenable files* 100273806Snp * when that was all that went wrong. 101273806Snp * - We have fewer crashes and more helpful error messages. 102273806Snp * - We don't turn spaces into tabs at starts of lines unless 103273806Snp * specifically requested. 104273806Snp * - New `fmt' is somewhat smaller and slightly faster than 105318799Snp * old `fmt'. 106273806Snp * 107273806Snp * Bugs: 108273806Snp * 109273806Snp * None known. There probably are some, though. 110273806Snp * 111273806Snp * Portability: 112273806Snp * 113273806Snp * I believe this code to be pretty portable. It does require 114273806Snp * that you have `getopt'. If you need to include "getopt.h" 115273806Snp * for this (e.g., if your system didn't come with `getopt' 116273806Snp * and you installed it yourself) then you should arrange for 117273806Snp * NEED_getopt_h to be #defined. 118273806Snp * 119273806Snp * Everything here should work OK even on nasty 16-bit 120273806Snp * machines and nice 64-bit ones. However, it's only really 121273806Snp * been tested on my FreeBSD machine. Your mileage may vary. 122273806Snp */ 123273806Snp 124273806Snp/* Copyright (c) 1997 Gareth McCaughan. All rights reserved. 125273806Snp * 126273806Snp * Redistribution and use of this code, in source or binary forms, 127273806Snp * with or without modification, are permitted subject to the following 128273806Snp * conditions: 129273806Snp * 130273806Snp * - Redistribution of source code must retain the above copyright 131273806Snp * notice, this list of conditions and the following disclaimer. 132273806Snp * 133273806Snp * - If you distribute modified source code it must also include 134273806Snp * a notice saying that it has been modified, and giving a brief 135273806Snp * description of what changes have been made. 136273806Snp * 137273806Snp * Disclaimer: I am not responsible for the results of using this code. 138273806Snp * If it formats your hard disc, sends obscene messages to 139273806Snp * your boss and kills your children then that's your problem 140273806Snp * not mine. I give absolutely no warranty of any sort as to 141273806Snp * what the program will do, and absolutely refuse to be held 142273806Snp * liable for any consequences of your using it. 143273806Snp * Thank you. Have a nice day. 144273806Snp */ 145273806Snp 146273806Snp/* RCS change log: 147273806Snp * Revision 1.5 1998/03/02 18:02:21 gjm11 148273806Snp * Minor changes for portability. 149273806Snp * 150273806Snp * Revision 1.4 1997/10/01 11:51:28 gjm11 151273806Snp * Repair broken indented-paragraph handling. 152273806Snp * Add mail message header stuff. 153273806Snp * Improve comments and layout. 154273806Snp * Make usable with non-BSD systems. 155273806Snp * Add revision display to usage message. 156273806Snp * 157273806Snp * Revision 1.3 1997/09/30 16:24:47 gjm11 158273806Snp * Add copyright notice, rcsid string and log message. 159273806Snp * 160273806Snp * Revision 1.2 1997/09/30 16:13:39 gjm11 161273806Snp * Add options: -d <chars>, -l <width>, -p, -s, -t <width>, -h . 162273806Snp * Parse options with `getopt'. Clean up code generally. 163273806Snp * Make comments more accurate. 164273806Snp * 165273806Snp * Revision 1.1 1997/09/30 11:29:57 gjm11 166273806Snp * Initial revision 167273806Snp */ 168273806Snp 169273806Snp#ifndef lint 170273806Snpstatic const char rcsid[] = 171273806Snp "$FreeBSD: head/usr.bin/fmt/fmt.c 77807 2001-06-06 10:17:05Z ru $"; 172273806Snpstatic const char copyright[] = 173273806Snp "Copyright (c) 1997 Gareth McCaughan. All rights reserved.\n"; 174273806Snp#endif /* not lint */ 175273806Snp 176273806Snp#include <ctype.h> 177273806Snp#include <err.h> 178273806Snp#include <locale.h> 179273806Snp#include <stdio.h> 180273806Snp#include <stdlib.h> 181273806Snp#include <string.h> 182273806Snp#include <sysexits.h> 183273806Snp#include <unistd.h> 184273806Snp 185273806Snp/* Something that, we hope, will never be a genuine line length, 186273806Snp * indentation etc. 187273806Snp */ 188273806Snp#define SILLY ((size_t)-1) 189273806Snp 190273806Snp/* I used to use |strtoul| for this, but (1) not all systems have it 191273806Snp * and (2) it's probably better to use |strtol| to detect negative 192273806Snp * numbers better. 193273806Snp * If |fussyp==0| then we don't complain about non-numbers 194273806Snp * (returning 0 instead), but we do complain about bad numbers. 195273806Snp */ 196273806Snpstatic size_t 197273806Snpget_positive(const char *s, const char *err_mess, int fussyP) { 198273806Snp char *t; 199273806Snp long result = strtol(s,&t,0); 200273806Snp if (*t) { if (fussyP) goto Lose; else return 0; } 201273806Snp if (result<=0) { Lose: errx(EX_USAGE, err_mess); } 202273806Snp return (size_t) result; 203273806Snp} 204273806Snp 205273806Snp/* Global variables */ 206273806Snp 207273806Snpstatic int centerP=0; /* Try to center lines? */ 208273806Snpstatic size_t goal_length=0; /* Target length for output lines */ 209273806Snpstatic size_t max_length=0; /* Maximum length for output lines */ 210273806Snpstatic int coalesce_spaces_P=0; /* Coalesce multiple whitespace -> ' ' ? */ 211273806Snpstatic int allow_indented_paragraphs=0; /* Can first line have diff. ind.? */ 212273806Snpstatic int tab_width=8; /* Number of spaces per tab stop */ 213273806Snpstatic size_t output_tab_width=0; /* Ditto, when squashing leading spaces */ 214273806Snpstatic const char *sentence_enders=".?!"; /* Double-space after these */ 215273806Snpstatic int grok_mail_headers=0; /* treat embedded mail headers magically? */ 216273806Snp 217273806Snpstatic int n_errors=0; /* Number of failed files. Return on exit. */ 218273806Snpstatic char *output_buffer=0; /* Output line will be built here */ 219273806Snpstatic size_t x; /* Horizontal position in output line */ 220273806Snpstatic size_t x0; /* Ditto, ignoring leading whitespace */ 221273806Snpstatic size_t pending_spaces; /* Spaces to add before next word */ 222273806Snpstatic int output_in_paragraph=0; /* Any of current para written out yet? */ 223273806Snp 224273806Snp/* Prototypes */ 225273806Snp 226273806Snpstatic void process_named_file (const char *); 227273806Snpstatic void process_stream (FILE *, const char *); 228273806Snpstatic size_t indent_length (const char *, size_t); 229273806Snpstatic int might_be_header (const unsigned char *); 230273806Snpstatic void new_paragraph (size_t, size_t); 231273806Snpstatic void output_word (size_t, size_t, const char *, size_t, size_t); 232273806Snpstatic void output_indent (size_t); 233273806Snpstatic void center_stream (FILE *, const char *); 234273806Snpstatic char * get_line (FILE *, size_t *); 235273806Snpstatic void * xrealloc (void *, size_t); 236273806Snp 237273806Snp#define XMALLOC(x) xrealloc(0,x) 238273806Snp 239273806Snp/* Here is perhaps the right place to mention that this code is 240273806Snp * all in top-down order. Hence, |main| comes first. 241273806Snp */ 242273806Snpint 243273806Snpmain(int argc, char *argv[]) { 244273806Snp int ch; /* used for |getopt| processing */ 245273806Snp 246273806Snp 247273806Snp (void) setlocale(LC_CTYPE, ""); 248273806Snp 249273806Snp /* 1. Grok parameters. */ 250273806Snp 251273806Snp while ((ch = getopt(argc, argv, "0123456789cd:hl:mpst:w:")) != -1) 252273806Snp switch(ch) { 253273806Snp case 'c': 254273806Snp centerP = 1; 255273806Snp continue; 256273806Snp case 'd': 257273806Snp sentence_enders = optarg; 258273806Snp continue; 259273806Snp case 'l': 260273806Snp output_tab_width 261273806Snp = get_positive(optarg, "output tab width must be positive", 1); 262273806Snp continue; 263273806Snp case 'm': 264273806Snp grok_mail_headers = 1; 265273806Snp continue; 266273806Snp case 'p': 267273806Snp allow_indented_paragraphs = 1; 268273806Snp continue; 269273806Snp case 's': 270273806Snp coalesce_spaces_P = 1; 271273806Snp continue; 272273806Snp case 't': 273273806Snp tab_width = get_positive(optarg, "tab width must be positive", 1); 274273806Snp continue; 275273806Snp case 'w': 276273806Snp goal_length = get_positive(optarg, "width must be positive", 1); 277273806Snp max_length = goal_length; 278273806Snp continue; 279273806Snp case '0': case '1': case '2': case '3': case '4': case '5': 280273806Snp case '6': case '7': case '8': case '9': 281273806Snp /* XXX this is not a stylistically approved use of getopt() */ 282273806Snp if (goal_length==0) { 283273806Snp char *p; 284273806Snp p = argv[optind - 1]; 285273806Snp if (p[0] == '-' && p[1] == ch && !p[2]) 286273806Snp goal_length = get_positive(++p, "width must be nonzero", 1); 287273806Snp else 288273806Snp goal_length = get_positive(argv[optind]+1, 289273806Snp "width must be nonzero", 1); 290273806Snp max_length = goal_length; 291273806Snp } 292273806Snp continue; 293273806Snp case 'h': default: 294273806Snp fprintf(stderr, 295273806Snp"Usage: fmt [-cmps] [-d chars] [-l num] [-t num]\n" 296273806Snp" [-w width | -width | goal [maximum]] [file ...]\n" 297273806Snp"Options: -c center each line instead of formatting\n" 298273806Snp" -d <chars> double-space after <chars> at line end\n" 299273806Snp" -l <n> turn each <n> spaces at start of line into a tab\n" 300273806Snp" -m try to make sure mail header lines stay separate\n" 301273806Snp" -p allow indented paragraphs\n" 302273806Snp" -s coalesce whitespace inside lines\n" 303273806Snp" -t <n> have tabs every <n> columns\n" 304273806Snp" -w <n> set maximum width to <n>\n" 305273806Snp" goal set target width to goal\n"); 306273806Snp exit(ch=='h' ? 0 : EX_USAGE); 307273806Snp } 308273806Snp argc -= optind; argv += optind; 309273806Snp 310273806Snp /* [ goal [ maximum ] ] */ 311273806Snp 312273806Snp if (argc>0 && goal_length==0 313273806Snp && (goal_length=get_positive(*argv,"goal length must be positive", 0)) 314273806Snp != 0) { 315273806Snp --argc; ++argv; 316273806Snp if (argc>0 317273806Snp && (max_length=get_positive(*argv,"max length must be positive", 0)) 318273806Snp != 0) { 319273806Snp --argc; ++argv; 320273806Snp if (max_length<goal_length) 321273806Snp errx(EX_USAGE, "max length must be >= goal length"); 322273806Snp } 323273806Snp } 324273806Snp if (goal_length==0) goal_length = 65; 325273806Snp if (max_length==0) max_length = goal_length+10; 326273806Snp output_buffer = XMALLOC(max_length+1); /* really needn't be longer */ 327273806Snp 328273806Snp /* 2. Process files. */ 329273806Snp 330273806Snp if (argc>0) { 331309378Sjhb while (argc-->0) process_named_file(*argv++); 332273806Snp } 333273806Snp else { 334273806Snp process_stream(stdin, "standard input"); 335273806Snp } 336273806Snp 337273806Snp /* We're done. */ 338273806Snp 339273806Snp return n_errors ? EX_NOINPUT : 0; 340309378Sjhb 341273806Snp} 342273806Snp 343273806Snp/* Process a single file, given its name. 344273806Snp */ 345273806Snpstatic void 346273806Snpprocess_named_file(const char *name) { 347273806Snp FILE *f=fopen(name, "r"); 348273806Snp if (!f) { perror(name); ++n_errors; } 349273806Snp else { 350273806Snp process_stream(f, name); 351273806Snp fclose(f); 352273806Snp } 353309378Sjhb} 354273806Snp 355273806Snp/* Types of mail header continuation lines: 356273806Snp */ 357273806Snptypedef enum { 358273806Snp hdr_ParagraphStart = -1, 359273806Snp hdr_NonHeader = 0, 360273806Snp hdr_Header = 1, 361273806Snp hdr_Continuation = 2 362309378Sjhb} HdrType; 363273806Snp 364273806Snp/* Process a stream. This is where the real work happens, 365273806Snp * except that centering is handled separately. 366273806Snp */ 367273806Snpstatic void 368273806Snpprocess_stream(FILE *stream, const char *name) { 369273806Snp size_t last_indent=SILLY; /* how many spaces in last indent? */ 370273806Snp size_t para_line_number=0; /* how many lines already read in this para? */ 371273806Snp size_t first_indent=SILLY; /* indentation of line 0 of paragraph */ 372273806Snp HdrType prev_header_type=hdr_ParagraphStart; 373273806Snp /* ^-- header_type of previous line; -1 at para start */ 374273806Snp char *line; 375273806Snp size_t length; 376273806Snp 377273806Snp if (centerP) { center_stream(stream, name); return; } 378273806Snp while ((line=get_line(stream,&length)) != NULL) { 379273806Snp size_t np=indent_length(line, length); 380273806Snp { HdrType header_type=hdr_NonHeader; 381273806Snp if (grok_mail_headers && prev_header_type!=hdr_NonHeader) { 382273806Snp if (np==0 && might_be_header(line)) 383273806Snp header_type = hdr_Header; 384273806Snp else if (np>0 && prev_header_type>hdr_NonHeader) 385273806Snp header_type = hdr_Continuation; 386273806Snp } 387273806Snp /* We need a new paragraph if and only if: 388273806Snp * this line is blank, 389273806Snp * OR it's a mail header, 390273806Snp * OR it's not a mail header AND the last line was one, 391273806Snp * OR the indentation has changed 392273806Snp * AND the line isn't a mail header continuation line 393273806Snp * AND this isn't the second line of an indented paragraph. 394273806Snp */ 395273806Snp if ( length==0 396273806Snp || header_type==hdr_Header 397273806Snp || (header_type==hdr_NonHeader && prev_header_type>hdr_NonHeader) 398273806Snp || (np!=last_indent 399273806Snp && header_type != hdr_Continuation 400273806Snp && (!allow_indented_paragraphs || para_line_number != 1)) ) { 401273806Snp new_paragraph(output_in_paragraph ? last_indent : first_indent, np); 402273806Snp para_line_number = 0; 403273806Snp first_indent = np; 404273806Snp last_indent = np; 405273806Snp if (header_type==hdr_Header) last_indent=2; /* for cont. lines */ 406273806Snp if (length==0) { 407273806Snp putchar('\n'); 408273806Snp prev_header_type=hdr_ParagraphStart; 409273806Snp continue; 410273806Snp } 411273806Snp } 412273806Snp else { 413273806Snp /* If this is an indented paragraph other than a mail header 414273806Snp * continuation, set |last_indent|. 415273806Snp */ 416273806Snp if (np != last_indent && header_type != hdr_Continuation) 417273806Snp last_indent=np; 418273806Snp } 419273806Snp prev_header_type = header_type; 420273806Snp } 421273806Snp 422273806Snp { size_t n=np; 423273806Snp while (n<length) { 424273806Snp /* Find word end and count spaces after it */ 425273806Snp size_t word_length=0, space_length=0; 426273806Snp while (n+word_length < length && line[n+word_length] != ' ') 427273806Snp ++word_length; 428273806Snp space_length = word_length; 429273806Snp while (n+space_length < length && line[n+space_length] == ' ') 430273806Snp ++space_length; 431273806Snp /* Send the word to the output machinery. */ 432273806Snp output_word(first_indent, last_indent, 433273806Snp line+n, word_length, space_length-word_length); 434273806Snp n += space_length; 435273806Snp } 436273806Snp } 437273806Snp ++para_line_number; 438273806Snp } 439273806Snp new_paragraph(output_in_paragraph ? last_indent : first_indent, 0); 440273806Snp if (ferror(stream)) { perror(name); ++n_errors; } 441273806Snp} 442273806Snp 443273806Snp/* How long is the indent on this line? 444273806Snp */ 445273806Snpstatic size_t 446273806Snpindent_length(const char *line, size_t length) { 447273806Snp size_t n=0; 448273806Snp while (n<length && *line++ == ' ') ++n; 449273806Snp return n; 450273806Snp} 451273806Snp 452273806Snp/* Might this line be a mail header? 453273806Snp * We deem a line to be a possible header if it matches the 454273806Snp * Perl regexp /^[A-Z][-A-Za-z0-9]*:\s/. This is *not* the same 455273806Snp * as in RFC whatever-number-it-is; we want to be gratuitously 456273806Snp * conservative to avoid mangling ordinary civilised text. 457273806Snp */ 458273806Snpstatic int 459273806Snpmight_be_header(const unsigned char *line) { 460273806Snp if (!isupper(*line++)) return 0; 461273806Snp while (*line && (isalnum(*line) || *line=='-')) ++line; 462273806Snp return (*line==':' && isspace(line[1])); 463273806Snp} 464273806Snp 465273806Snp/* Begin a new paragraph with an indent of |indent| spaces. 466273806Snp */ 467273806Snpstatic void 468273806Snpnew_paragraph(size_t old_indent, size_t indent) { 469273806Snp if (x0) { 470273806Snp if (old_indent>0) output_indent(old_indent); 471273806Snp fwrite(output_buffer, 1, x0, stdout); 472273806Snp putchar('\n'); 473273806Snp } 474273806Snp x=indent; x0=0; pending_spaces=0; 475273806Snp output_in_paragraph = 0; 476273806Snp} 477273806Snp 478273806Snp/* Output spaces or tabs for leading indentation. 479273806Snp */ 480273806Snpstatic void 481273806Snpoutput_indent(size_t n_spaces) { 482273806Snp if (output_tab_width) { 483273806Snp while (n_spaces >= output_tab_width) { 484273806Snp putchar('\t'); 485273806Snp n_spaces -= output_tab_width; 486273806Snp } 487319255Snp } 488273806Snp while (n_spaces-- > 0) putchar(' '); 489273806Snp} 490273806Snp 491319255Snp/* Output a single word, or add it to the buffer. 492309378Sjhb * indent0 and indent1 are the indents to use on the first and subsequent 493273806Snp * lines of a paragraph. They'll often be the same, of course. 494273806Snp */ 495273806Snpstatic void 496273806Snpoutput_word(size_t indent0, size_t indent1, const char *word, size_t length, size_t spaces) { 497273806Snp size_t new_x = x+pending_spaces+length; 498273806Snp size_t indent = output_in_paragraph ? indent1 : indent0; 499309378Sjhb 500273806Snp /* If either |spaces==0| (at end of line) or |coalesce_spaces_P| 501273806Snp * (squashing internal whitespace), then add just one space; 502273806Snp * except that if the last character was a sentence-ender we 503273806Snp * actually add two spaces. 504273806Snp */ 505273806Snp if (coalesce_spaces_P || spaces==0) 506273806Snp spaces = strchr(sentence_enders, word[length-1]) ? 2 : 1; 507273806Snp 508273806Snp if (new_x<=goal_length) { 509273806Snp /* After adding the word we still aren't at the goal length, 510273806Snp * so clearly we add it to the buffer rather than outputing it. 511273806Snp */ 512273806Snp memset(output_buffer+x0, ' ', pending_spaces); 513273806Snp x0 += pending_spaces; x += pending_spaces; 514273806Snp memcpy(output_buffer+x0, word, length); 515273806Snp x0 += length; x += length; 516273806Snp pending_spaces = spaces; 517273806Snp } 518273806Snp else { 519273806Snp /* Adding the word takes us past the goal. Print the line-so-far, 520319255Snp * and the word too iff either (1) the lsf is empty or (2) that 521273806Snp * makes us nearer the goal but doesn't take us over the limit, 522273806Snp * or (3) the word on its own takes us over the limit. 523273806Snp * In case (3) we put a newline in between. 524319255Snp */ 525309378Sjhb if (indent>0) output_indent(indent); 526273806Snp fwrite(output_buffer, 1, x0, stdout); 527273806Snp if (x0==0 || (new_x <= max_length && new_x-goal_length <= goal_length-x)) { 528273806Snp printf("%*s", pending_spaces, ""); 529273806Snp goto write_out_word; 530273806Snp } 531273806Snp else { 532309378Sjhb /* If the word takes us over the limit on its own, just 533273806Snp * spit it out and don't bother buffering it. 534273806Snp */ 535273806Snp if (indent+length > max_length) { 536273806Snp putchar('\n'); 537273806Snp if (indent>0) output_indent(indent); 538273806Snpwrite_out_word: 539273806Snp fwrite(word, 1, length, stdout); 540273806Snp x0 = 0; x = indent1; pending_spaces = 0; 541273806Snp } 542273806Snp else { 543273806Snp memcpy(output_buffer, word, length); 544273806Snp x0 = length; x = length+indent1; pending_spaces = spaces; 545273806Snp } 546273806Snp } 547273806Snp putchar('\n'); 548273806Snp output_in_paragraph = 1; 549273806Snp } 550273806Snp} 551273806Snp 552273806Snp/* Process a stream, but just center its lines rather than trying to 553273806Snp * format them neatly. 554273806Snp */ 555273806Snpstatic void 556273806Snpcenter_stream(FILE *stream, const char *name) { 557273806Snp char *line; 558273806Snp size_t length; 559273806Snp while ((line=get_line(stream, &length)) != 0) { 560273806Snp size_t l=length; 561273806Snp while (l>0 && isspace(*line)) { ++line; --l; } 562273806Snp length=l; 563273806Snp while (l<goal_length) { putchar(' '); l+=2; } 564273806Snp fwrite(line, 1, length, stdout); 565273806Snp putchar('\n'); 566273806Snp } 567273806Snp if (ferror(stream)) { perror(name); ++n_errors; } 568273806Snp} 569273806Snp 570273806Snp/* Get a single line from a stream. Expand tabs, strip control 571273806Snp * characters and trailing whitespace, and handle backspaces. 572273806Snp * Return the address of the buffer containing the line, and 573273806Snp * put the length of the line in |lengthp|. 574273806Snp * This can cope with arbitrarily long lines, and with lines 575273806Snp * without terminating \n. 576273806Snp * If there are no characters left or an error happens, we 577273806Snp * return 0. 578273806Snp * Don't confuse |spaces_pending| here with the global 579273806Snp * |pending_spaces|. 580273806Snp */ 581273806Snpstatic char * 582273806Snpget_line(FILE *stream, size_t *lengthp) { 583273806Snp static char *buf=NULL; 584273806Snp static size_t length=0; 585273806Snp size_t len=0; 586273806Snp int ch; 587273806Snp size_t spaces_pending=0; 588273806Snp 589273806Snp if (buf==NULL) { length=100; buf=XMALLOC(length); } 590273806Snp while ((ch=getc(stream)) != '\n' && ch != EOF) { 591273806Snp if (ch==' ') ++spaces_pending; 592273806Snp else if (isprint(ch)) { 593273806Snp while (len+spaces_pending >= length) { 594273806Snp length*=2; buf=xrealloc(buf, length); 595273806Snp } 596273806Snp while (spaces_pending > 0) { --spaces_pending; buf[len++]=' '; } 597273806Snp buf[len++] = ch; 598273806Snp } 599273806Snp else if (ch=='\t') 600273806Snp spaces_pending += tab_width - (len+spaces_pending)%tab_width; 601273806Snp else if (ch=='\b') { if (len) --len; } 602273806Snp } 603273806Snp *lengthp=len; 604273806Snp return (len>0 || ch!=EOF) ? buf : 0; 605273806Snp} 606273806Snp 607273806Snp/* (Re)allocate some memory, exiting with an error if we can't. 608273806Snp */ 609273806Snpstatic void * 610273806Snpxrealloc(void *ptr, size_t nbytes) { 611273806Snp void *p = realloc(ptr, nbytes); 612273806Snp if (p == NULL) errx(EX_OSERR, "out of memory"); 613273806Snp return p; 614273806Snp} 615273806Snp