Cross Reference: /freebsd-10.3-release/usr.bin/fmt/fmt.c

Deleted Added

sdiff udiff text old ( 50477 ) new ( 77807 )

full compact

fmt.c (50477)	fmt.c (77807)
1/* 2 * Copyright (c) 1980, 1993 3 * The Regents of the University of California. All rights reserved.	1/* $OpenBSD: fmt.c,v 1.16 2000/06/25 15:35:42 pjanzen Exp $ / 2 3/ Sensible version of fmt
4 *	4 *
5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright	5 * Syntax: fmt [ options ] [ goal [ max ] ] [ filename ... ] 6 * 7 * Since the documentation for the original fmt is so poor, here 8 * is an accurate description of what this one does. It's usually 9 * the same. The mechanism used may differ from that suggested 10 * here. Note that we are not entirely compatible with fmt, 11 * because fmt gets so many things wrong. 12 * 13 * 1. Tabs are expanded, assuming 8-space tab stops. 14 * If the `-t <n>' option is given, we assume <n>-space 15 * tab stops instead. 16 * Trailing blanks are removed from all lines. 17 * x\b == nothing, for any x other than \b. 18 * Other control characters are simply stripped. This 19 * includes \r. 20 * 2. Each line is split into leading whitespace and 21 * everything else. Maximal consecutive sequences of 22 * lines with the same leading whitespace are considered 23 * to form paragraphs, except that a blank line is always 24 * a paragraph to itself. 25 * If the `-p' option is given then the first line of a 26 * paragraph is permitted to have indentation different 27 * from that of the other lines. 28 * If the `-m' option is given then a line that looks 29 * like a mail message header, if it is not immediately 30 * preceded by a non-blank non-message-header line, is 31 * taken to start a new paragraph, which also contains 32 * any subsequent lines with non-empty leading whitespace. 33 * 3. The "everything else" is split into words; a word 34 * includes its trailing whitespace, and a word at the 35 * end of a line is deemed to be followed by a single 36 * space, or two spaces if it ends with a sentence-end 37 * character. (See the `-d' option for how to change that.) 38 * If the `-s' option has been given, then a word's trailing 39 * whitespace is replaced by what it would have had if it 40 * had occurred at end of line. 41 * 4. Each paragraph is sent to standard output as follows. 42 * We output the leading whitespace, and then enough words 43 * to make the line length as near as possible to the goal 44 * without exceeding the maximum. (If a single word would 45 * exceed the maximum, we output that anyway.) Of course 46 * the trailing whitespace of the last word is ignored. 47 * We then emit a newline and start again if there are any 48 * words left. 49 * Note that for a blank line this translates as "We emit 50 * a newline". 51 * If the `-l <n>' option is given, then leading whitespace 52 * is modified slightly: <n> spaces are replaced by a tab. 53 * Indented paragraphs (see above under `-p') make matters 54 * more complicated than this suggests. Actually every paragraph 55 * has two `leading whitespace' values; the value for the first 56 * line, and the value for the most recent line. (While processing 57 * the first line, the two are equal. When `-p' has not been 58 * given, they are always equal.) The leading whitespace 59 * actually output is that of the first line (for the first 60 * line of output) or that of the most recent line (for 61 * all other lines of output). 62 * When `-m' has been given, message header paragraphs are 63 * taken as having first-leading-whitespace empty and 64 * subsequent-leading-whitespace two spaces. 65 * 66 * Multiple input files are formatted one at a time, so that a file 67 * never ends in the middle of a line. 68 * 69 * There's an alternative mode of operation, invoked by giving 70 * the `-c' option. In that case we just center every line, 71 * and most of the other options are ignored. This should 72 * really be in a separate program, but we must stay compatible 73 * with old `fmt'. 74 * 75 * QUERY: Should `-m' also try to do the right thing with quoted text? 76 * QUERY: `-b' to treat backslashed whitespace as old `fmt' does? 77 * QUERY: Option meaning `never join lines'? 78 * QUERY: Option meaning `split in mid-word to avoid overlong lines'? 79 * (Those last two might not be useful, since we have `fold'.) 80 * 81 * Differences from old `fmt': 82 * 83 * - We have many more options. Options that aren't understood 84 * generate a lengthy usage message, rather than being 85 * treated as filenames. 86 * - Even with `-m', our handling of message headers is 87 * significantly different. (And much better.) 88 * - We don't treat `\ ' as non-word-breaking. 89 * - Downward changes of indentation start new paragraphs 90 * for us, as well as upward. (I think old `fmt' behaves 91 * in the way it does in order to allow indented paragraphs, 92 * but this is a broken way of making indented paragraphs 93 * behave right.) 94 * - Given the choice of going over or under \|goal_length\| 95 * by the same amount, we go over; old `fmt' goes under. 96 * - We treat `?' as ending a sentence, and not `:'. Old `fmt' 97 * does the reverse. 98 * - We return approved return codes. Old `fmt' returns 99 * 1 for some errors, and the number of unopenable files 100 * when that was all that went wrong. 101 * - We have fewer crashes and more helpful error messages. 102 * - We don't turn spaces into tabs at starts of lines unless 103 * specifically requested. 104 * - New `fmt' is somewhat smaller and slightly faster than 105 * old `fmt'. 106 * 107 * Bugs: 108 * 109 * None known. There probably are some, though. 110 * 111 * Portability: 112 * 113 * I believe this code to be pretty portable. It does require 114 * that you have `getopt'. If you need to include "getopt.h" 115 * for this (e.g., if your system didn't come with `getopt' 116 * and you installed it yourself) then you should arrange for 117 * NEED_getopt_h to be #defined. 118 * 119 * Everything here should work OK even on nasty 16-bit 120 * machines and nice 64-bit ones. However, it's only really 121 * been tested on my FreeBSD machine. Your mileage may vary. 122 / 123* 124/* Copyright (c) 1997 Gareth McCaughan. All rights reserved. 125 * 126 * Redistribution and use of this code, in source or binary forms, 127 * with or without modification, are permitted subject to the following 128 * conditions: 129 * 130 * - Redistribution of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.	131 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. All advertising materials mentioning features or use of this software 14 * must display the following acknowledgement: 15 * This product includes software developed by the University of 16 * California, Berkeley and its contributors. 17 * 4. Neither the name of the University nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission.
20 *	132 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE.	133 * - If you distribute modified source code it must also include 134 * a notice saying that it has been modified, and giving a brief 135 * description of what changes have been made. 136 * 137 * Disclaimer: I am not responsible for the results of using this code. 138 * If it formats your hard disc, sends obscene messages to 139 * your boss and kills your children then that's your problem 140 * not mine. I give absolutely no warranty of any sort as to 141 * what the program will do, and absolutely refuse to be held 142 * liable for any consequences of your using it. 143 * Thank you. Have a nice day.
32 */ 33	144 / 145*
34#ifndef lint 35static char copyright[] = 36"@(#) Copyright (c) 1980, 1993\n\ 37 The Regents of the University of California. All rights reserved.\n"; 38#endif /* not lint */	146/* RCS change log: 147 * Revision 1.5 1998/03/02 18:02:21 gjm11 148 * Minor changes for portability. 149 * 150 * Revision 1.4 1997/10/01 11:51:28 gjm11 151 * Repair broken indented-paragraph handling. 152 * Add mail message header stuff. 153 * Improve comments and layout. 154 * Make usable with non-BSD systems. 155 * Add revision display to usage message. 156 * 157 * Revision 1.3 1997/09/30 16:24:47 gjm11 158 * Add copyright notice, rcsid string and log message. 159 * 160 * Revision 1.2 1997/09/30 16:13:39 gjm11 161 * Add options: -d <chars>, -l <width>, -p, -s, -t <width>, -h . 162 * Parse options with `getopt'. Clean up code generally. 163 * Make comments more accurate. 164 * 165 * Revision 1.1 1997/09/30 11:29:57 gjm11 166 * Initial revision 167 */
39 40#ifndef lint	168 169#ifndef lint
41#if 0 42static char sccsid[] = "@(#)fmt.c 8.1 (Berkeley) 7/20/93"; 43#else
44static const char rcsid[] =	170static const char rcsid[] =
45 "$FreeBSD: head/usr.bin/fmt/fmt.c 50477 1999-08-28 01:08:13Z peter $"; 46#endif	171 "$FreeBSD: head/usr.bin/fmt/fmt.c 77807 2001-06-06 10:17:05Z ru $"; 172static const char copyright[] = 173 "Copyright (c) 1997 Gareth McCaughan. All rights reserved.\n";
47#endif /* not lint */ 48 49#include <ctype.h> 50#include <err.h> 51#include <locale.h> 52#include <stdio.h> 53#include <stdlib.h> 54#include <string.h>	174#endif /* not lint / 175* 176#include <ctype.h> 177#include <err.h> 178#include <locale.h> 179#include <stdio.h> 180#include <stdlib.h> 181#include <string.h>
	182#include <sysexits.h> 183#include <unistd.h>
55	184
56/* 57 * fmt -- format the concatenation of input files or standard input 58 * onto standard output. Designed for use with Mail ~\| 59 * 60 * Syntax : fmt [ goal [ max ] ] [ name ... ] 61 * Authors: Kurt Shoens (UCB) 12/7/78; 62 * Liz Allen (UMCP) 2/24/83 [Addition of goal length concept].	185/* Something that, we hope, will never be a genuine line length, 186 * indentation etc.
63 */	187 */
	188#define SILLY ((size_t)-1)
64	189
65/* LIZ@UOM 6/18/85 -- Don't need LENGTH any more. 66 * #define LENGTH 72 Max line length in output	190/* I used to use \|strtoul\| for this, but (1) not all systems have it 191 * and (2) it's probably better to use \|strtol\| to detect negative 192 * numbers better. 193 * If \|fussyp==0\| then we don't complain about non-numbers 194 * (returning 0 instead), but we do complain about bad numbers.
67 */	195 */
68#define NOSTR ((char ) 0) / Null string pointer for lint */	196static size_t 197get_positive(const char s, const char err_mess, int fussyP) { 198 char t; 199* long result = strtol(s,&t,0); 200 if (t) { if (fussyP) goto Lose; else return 0; } 201* if (result<=0) { Lose: errx(EX_USAGE, err_mess); } 202 return (size_t) result; 203}
69	204
70/* LIZ@UOM 6/18/85 --New variables goal_length and max_length / 71#define GOAL_LENGTH 65 72#define MAX_LENGTH 75 73int goal_length; / Target or goal line length in output / 74int max_length; / Max line length in output / 75int pfx; / Current leading blank count / 76int lineno; / Current input line / 77int mark; / Last place we saw a head line */ 78int center;	205/* Global variables */
79	206
80char *headnames[] = {"To", "Subject", "Cc", 0};	207static int centerP=0; /* Try to center lines? / 208static size_t goal_length=0; / Target length for output lines / 209static size_t max_length=0; / Maximum length for output lines / 210static int coalesce_spaces_P=0; / Coalesce multiple whitespace -> ' ' ? / 211static int allow_indented_paragraphs=0; / Can first line have diff. ind.? / 212static int tab_width=8; / Number of spaces per tab stop / 213static size_t output_tab_width=0; / Ditto, when squashing leading spaces / 214static const char sentence_enders=".?!"; /* Double-space after these / 215static int grok_mail_headers=0; / treat embedded mail headers magically? */
81	216
82void fmt __P((FILE )); 83int ispref __P((char , char *)); 84void leadin __P((void)); 85void oflush __P((void)); 86void pack __P((char [], int)); 87void prefix __P((char [])); 88void setout __P((void)); 89void split __P((char [])); 90void tabulate __P((char []));	217static int n_errors=0; /* Number of failed files. Return on exit. / 218static char output_buffer=0; /* Output line will be built here / 219static size_t x; / Horizontal position in output line / 220static size_t x0; / Ditto, ignoring leading whitespace / 221static size_t pending_spaces; / Spaces to add before next word / 222static int output_in_paragraph=0; / Any of current para written out yet? */
91	223
92/* 93 * Drive the whole formatter by managing input files. Also, 94 * cause initialization of the output stuff and flush it out 95 * at the end. 96 */	224/* Prototypes */
97	225
	226static void process_named_file (const char ); 227static void process_stream (FILE , const char ); 228static size_t indent_length (const char , size_t); 229static int might_be_header (const unsigned char ); 230static void new_paragraph (size_t, size_t); 231static void output_word (size_t, size_t, const char , size_t, size_t); 232static void output_indent (size_t); 233static void center_stream (FILE , const char ); 234static char * get_line (FILE , size_t ); 235static void * xrealloc (void , size_t); 236* 237#define XMALLOC(x) xrealloc(0,x) 238 239/* Here is perhaps the right place to mention that this code is 240 * all in top-down order. Hence, \|main\| comes first. 241 */
98int	242int
99main(argc, argv) 100 int argc; 101 char *argv; 102{ 103* register FILE fi; 104* register int errs = 0; 105 int number; /* LIZ@UOM 6/18/85 */	243main(int argc, char argv[]) { 244* int ch; /* used for \|getopt\| processing */
106	245
107 (void) setlocale(LC_CTYPE, "");
108	246
109 goal_length = GOAL_LENGTH; 110 max_length = MAX_LENGTH; 111 setout(); 112 lineno = 1; 113 mark = -10; 114 /* 115 * LIZ@UOM 6/18/85 -- Check for goal and max length arguments 116 / 117* if (argc > 1 && !strcmp(argv[1], "-c")) { 118 center++; 119 argc--; 120 argv++; 121 } 122 if (argc > 1 && (1 == (sscanf(argv[1], "%d", &number)))) { 123 argv++; 124 argc--; 125 goal_length = number; 126 if (argc > 1 && (1 == (sscanf(argv[1], "%d", &number)))) { 127 argv++; 128 argc--; 129 max_length = number; 130 } 131 } 132 if (max_length <= goal_length) 133 errx(1, "max length must be greater than goal length"); 134 if (argc < 2) { 135 fmt(stdin); 136 oflush(); 137 exit(0); 138 } 139 while (--argc) { 140 if ((fi = fopen(++argv, "r")) == NULL) { 141* perror(argv); 142* errs++; 143 continue; 144 } 145 fmt(fi); 146 fclose(fi); 147 } 148 oflush(); 149 exit(errs); 150}	247 (void) setlocale(LC_CTYPE, "");
151	248
152/* 153 * Read up characters from the passed input file, forming lines, 154 * doing ^H processing, expanding tabs, stripping trailing blanks, 155 * and sending each line down for analysis. 156 / 157void 158fmt(fi) 159* FILE fi; 160{ 161* static char linebuf = 0, canonb = 0; 162 register char cp, cp2, cc; 163 register int c, col; 164#define CHUNKSIZE 1024 165 static int lbufsize = 0, cbufsize = CHUNKSIZE;	249 /* 1. Grok parameters. */
166	250
167 canonb = malloc(CHUNKSIZE); 168 if (canonb == 0) 169 abort();	251 while ((ch = getopt(argc, argv, "0123456789cd:hl:mpst:w:")) != -1) 252 switch(ch) { 253 case 'c': 254 centerP = 1; 255 continue; 256 case 'd': 257 sentence_enders = optarg; 258 continue; 259 case 'l': 260 output_tab_width 261 = get_positive(optarg, "output tab width must be positive", 1); 262 continue; 263 case 'm': 264 grok_mail_headers = 1; 265 continue; 266 case 'p': 267 allow_indented_paragraphs = 1; 268 continue; 269 case 's': 270 coalesce_spaces_P = 1; 271 continue; 272 case 't': 273 tab_width = get_positive(optarg, "tab width must be positive", 1); 274 continue; 275 case 'w': 276 goal_length = get_positive(optarg, "width must be positive", 1); 277 max_length = goal_length; 278 continue; 279 case '0': case '1': case '2': case '3': case '4': case '5': 280 case '6': case '7': case '8': case '9': 281 /* XXX this is not a stylistically approved use of getopt() / 282* if (goal_length==0) { 283 char p; 284* p = argv[optind - 1]; 285 if (p[0] == '-' && p[1] == ch && !p[2]) 286 goal_length = get_positive(++p, "width must be nonzero", 1); 287 else 288 goal_length = get_positive(argv[optind]+1, 289 "width must be nonzero", 1); 290 max_length = goal_length; 291 } 292 continue; 293 case 'h': default: 294 fprintf(stderr, 295"Usage: fmt [-cmps] [-d chars] [-l num] [-t num]\n" 296" [-w width \| -width \| goal [maximum]] [file ...]\n" 297"Options: -c center each line instead of formatting\n" 298" -d <chars> double-space after <chars> at line end\n" 299" -l <n> turn each <n> spaces at start of line into a tab\n" 300" -m try to make sure mail header lines stay separate\n" 301" -p allow indented paragraphs\n" 302" -s coalesce whitespace inside lines\n" 303" -t <n> have tabs every <n> columns\n" 304" -w <n> set maximum width to <n>\n" 305" goal set target width to goal\n"); 306 exit(ch=='h' ? 0 : EX_USAGE); 307 } 308 argc -= optind; argv += optind;
170	309
171 if (center) { 172 linebuf = malloc(BUFSIZ); 173 while (1) { 174 cp = fgets(linebuf, BUFSIZ, fi); 175 if (!cp) 176 return; 177 while (cp && isspace(cp)) 178 cp++; 179 cp2 = cp + strlen(cp) - 1; 180 while (cp2 > cp && isspace(cp2)) 181* cp2--; 182 if (cp == cp2) 183 putchar('\n'); 184 col = cp2 - cp; 185 for (c = 0; c < (goal_length-col)/2; c++) 186 putchar(' '); 187 while (cp <= cp2) 188 putchar(cp++); 189* putchar('\n'); 190 } 191 } 192 c = getc(fi); 193 while (c != EOF) { 194 /* 195 * Collect a line, doing ^H processing. 196 * Leave tabs for now. 197 / 198* cp = linebuf; 199 while (c != '\n' && c != EOF) { 200 if (cp - linebuf >= lbufsize) { 201 int offset = cp - linebuf; 202 lbufsize += CHUNKSIZE; 203 linebuf = realloc(linebuf, lbufsize); 204 if(linebuf == 0) 205 abort(); 206 cp = linebuf + offset; 207 } 208 if (c == '\b') { 209 if (cp > linebuf) 210 cp--; 211 c = getc(fi); 212 continue; 213 } 214 if (!isprint(c) && c != '\t') { 215 c = getc(fi); 216 continue; 217 } 218 cp++ = c; 219* c = getc(fi); 220 }	310 /* [ goal [ maximum ] ] */
221	311
222 /* 223 * Toss anything remaining on the input line. 224 / 225* while (c != '\n' && c != EOF) 226 c = getc(fi);	312 if (argc>0 && goal_length==0 313 && (goal_length=get_positive(argv,"goal length must be positive", 0)) 314* != 0) { 315 --argc; ++argv; 316 if (argc>0 317 && (max_length=get_positive(argv,"max length must be positive", 0)) 318* != 0) { 319 --argc; ++argv; 320 if (max_length<goal_length) 321 errx(EX_USAGE, "max length must be >= goal length"); 322 } 323 } 324 if (goal_length==0) goal_length = 65; 325 if (max_length==0) max_length = goal_length+10; 326 output_buffer = XMALLOC(max_length+1); /* really needn't be longer */
227	327
228 if (cp != NULL) { 229 cp = '\0'; 230* } else { 231 putchar('\n'); 232 c = getc(fi); 233 continue; 234 }	328 /* 2. Process files. */
235	329
236 /* 237 * Expand tabs on the way to canonb. 238 / 239* col = 0; 240 cp = linebuf; 241 cp2 = canonb; 242 while ((cc = cp++)) { 243* if (cc != '\t') { 244 col++; 245 if (cp2 - canonb >= cbufsize) { 246 int offset = cp2 - canonb; 247 cbufsize += CHUNKSIZE; 248 canonb = realloc(canonb, cbufsize); 249 if(canonb == 0) 250 abort(); 251 cp2 = canonb + offset; 252 } 253 cp2++ = cc; 254* continue; 255 } 256 do { 257 if (cp2 - canonb >= cbufsize) { 258 int offset = cp2 - canonb; 259 cbufsize += CHUNKSIZE; 260 canonb = realloc(canonb, cbufsize); 261 if(canonb == 0) 262 abort(); 263 cp2 = canonb + offset; 264 } 265 cp2++ = ' '; 266* col++; 267 } while ((col & 07) != 0); 268 }	330 if (argc>0) { 331 while (argc-->0) process_named_file(argv++); 332* } 333 else { 334 process_stream(stdin, "standard input"); 335 }
269	336
270 /* 271 * Swipe trailing blanks from the line. 272 / 273* for (cp2--; cp2 >= canonb && cp2 == ' '; cp2--) 274* ; 275 ++cp2 = '\0'; 276* prefix(canonb); 277 if (c != EOF) 278 c = getc(fi); 279 } 280}	337 /* We're done. */
281	338
282/* 283 * Take a line devoid of tabs and other garbage and determine its 284 * blank prefix. If the indent changes, call for a linebreak. 285 * If the input line is blank, echo the blank line on the output. 286 * Finally, if the line minus the prefix is a mail header, try to keep 287 * it on a line by itself. 288 / 289void 290prefix(line) 291* char line[]; 292{ 293 register char cp, hp; 294* register int np, h;	339 return n_errors ? EX_NOINPUT : 0;
295	340
296 if (!line) { 297* oflush(); 298 putchar('\n'); 299 return; 300 } 301 for (cp = line; cp == ' '; cp++) 302* ; 303 np = cp - line;	341}
304	342
305 /* 306 * The following horrible expression attempts to avoid linebreaks 307 * when the indent changes due to a paragraph. 308 / 309* if (np != pfx && (np > pfx \|\| abs(pfx-np) > 8)) 310 oflush(); 311 if ((h = ishead(cp))) 312 oflush(), mark = lineno; 313 if (lineno - mark < 3 && lineno - mark > 0) 314 for (hp = &headnames[0]; hp != (char ) 0; hp++) 315 if (ispref(hp, cp)) { 316* h = 1; 317 oflush(); 318 break; 319 } 320 if (!h && (h = (cp == '.'))) 321* oflush(); 322 pfx = np; 323 if (h) 324 pack(cp, strlen(cp)); 325 else split(cp); 326 if (h) 327 oflush(); 328 lineno++;	343/* Process a single file, given its name. 344 / 345static void 346process_named_file(const char name) { 347 FILE f=fopen(name, "r"); 348* if (!f) { perror(name); ++n_errors; } 349 else { 350 process_stream(f, name); 351 fclose(f); 352 }
329} 330	353} 354
331/* 332 * Split up the passed line into output "words" which are 333 * maximal strings of non-blanks with the blank separation 334 * attached at the end. Pass these words along to the output 335 * line packer.	355/* Types of mail header continuation lines:
336 */	356 */
337void 338split(line) 339 char line[]; 340{ 341 register char cp, cp2; 342 static char word=0; 343* static int wordsize=0; 344 int wordl; /* LIZ@UOM 6/18/85 */	357typedef enum { 358 hdr_ParagraphStart = -1, 359 hdr_NonHeader = 0, 360 hdr_Header = 1, 361 hdr_Continuation = 2 362} HdrType;
345	363
346 { 347 int l = strlen(line); 348 if (l >= wordsize) { 349 if (word) 350 free(word); 351 wordsize = (l+66)&~63; 352 word = malloc(wordsize); 353 if (word == NULL) 354 abort(); 355 } 356 }	364/* Process a stream. This is where the real work happens, 365 * except that centering is handled separately. 366 / 367static void 368process_stream(FILE stream, const char name) { 369* size_t last_indent=SILLY; /* how many spaces in last indent? / 370* size_t para_line_number=0; /* how many lines already read in this para? / 371* size_t first_indent=SILLY; /* indentation of line 0 of paragraph / 372* HdrType prev_header_type=hdr_ParagraphStart; 373 /* ^-- header_type of previous line; -1 at para start / 374* char line; 375* size_t length;
357	376
358 cp = line; 359 while (cp) { 360* cp2 = word; 361 wordl = 0; /* LIZ@UOM 6/18/85 */	377 if (centerP) { center_stream(stream, name); return; } 378 while ((line=get_line(stream,&length)) != NULL) { 379 size_t np=indent_length(line, length); 380 { HdrType header_type=hdr_NonHeader; 381 if (grok_mail_headers && prev_header_type!=hdr_NonHeader) { 382 if (np==0 && might_be_header(line)) 383 header_type = hdr_Header; 384 else if (np>0 && prev_header_type>hdr_NonHeader) 385 header_type = hdr_Continuation; 386 } 387 /* We need a new paragraph if and only if: 388 * this line is blank, 389 * OR it's a mail header, 390 * OR it's not a mail header AND the last line was one, 391 * OR the indentation has changed 392 * AND the line isn't a mail header continuation line 393 * AND this isn't the second line of an indented paragraph. 394 / 395* if ( length==0 396 \|\| header_type==hdr_Header 397 \|\| (header_type==hdr_NonHeader && prev_header_type>hdr_NonHeader) 398 \|\| (np!=last_indent 399 && header_type != hdr_Continuation 400 && (!allow_indented_paragraphs \|\| para_line_number != 1)) ) { 401 new_paragraph(output_in_paragraph ? last_indent : first_indent, np); 402 para_line_number = 0; 403 first_indent = np; 404 last_indent = np; 405 if (header_type==hdr_Header) last_indent=2; /* for cont. lines / 406* if (length==0) { 407 putchar('\n'); 408 prev_header_type=hdr_ParagraphStart; 409 continue; 410 } 411 } 412 else { 413 /* If this is an indented paragraph other than a mail header 414 * continuation, set \|last_indent\|. 415 / 416* if (np != last_indent && header_type != hdr_Continuation) 417 last_indent=np; 418 } 419 prev_header_type = header_type; 420 }
362	421
363 /* 364 * Collect a 'word,' allowing it to contain escaped white 365 * space. 366 / 367* while (cp && cp != ' ') { 368 if (cp == '\\' && isspace(cp[1])) 369* cp2++ = cp++; 370 cp2++ = cp++; 371 wordl++;/* LIZ@UOM 6/18/85 / 372* } 373 374 /* 375 * Guarantee a space at end of line. Two spaces after end of 376 * sentence punctuation. 377 / 378* if (cp == '\0') { 379* cp2++ = ' '; 380* if (cp != line && index(".:!", cp[-1])) 381 cp2++ = ' '; 382* } 383 while (cp == ' ') 384* cp2++ = cp++; 385 cp2 = '\0'; 386* /* 387 * LIZ@UOM 6/18/85 pack(word); 388 / 389* pack(word, wordl); 390 }	422 { size_t n=np; 423 while (n<length) { 424 /* Find word end and count spaces after it / 425* size_t word_length=0, space_length=0; 426 while (n+word_length < length && line[n+word_length] != ' ') 427 ++word_length; 428 space_length = word_length; 429 while (n+space_length < length && line[n+space_length] == ' ') 430 ++space_length; 431 /* Send the word to the output machinery. / 432* output_word(first_indent, last_indent, 433 line+n, word_length, space_length-word_length); 434 n += space_length; 435 } 436 } 437 ++para_line_number; 438 } 439 new_paragraph(output_in_paragraph ? last_indent : first_indent, 0); 440 if (ferror(stream)) { perror(name); ++n_errors; }
391} 392	441} 442
393/* 394 * Output section. 395 * Build up line images from the words passed in. Prefix 396 * each line with correct number of blanks. The buffer "outbuf" 397 * contains the current partial line image, including prefixed blanks. 398 * "outp" points to the next available space therein. When outp is NOSTR, 399 * there ain't nothing in there yet. At the bottom of this whole mess, 400 * leading tabs are reinserted.	443/* How long is the indent on this line?
401 */	444 */
402char outbuf; / Sandbagged output line image / 403char outp; /* Pointer in above / 404int outbuf_size; / er, size of outbuf */	445static size_t 446indent_length(const char line, size_t length) { 447* size_t n=0; 448 while (n<length && line++ == ' ') ++n; 449* return n; 450}
405	451
406/* 407 * Initialize the output section.	452/* Might this line be a mail header? 453 * We deem a line to be a possible header if it matches the 454 * Perl regexp /^[A-Z][-A-Za-z0-9]:\s/. This is not* the same 455 * as in RFC whatever-number-it-is; we want to be gratuitously 456 * conservative to avoid mangling ordinary civilised text.
408 */	457 */
409void 410setout() 411{ 412 outbuf = malloc(BUFSIZ); 413 if (outbuf == 0) 414 abort(); 415 outbuf_size = BUFSIZ; 416 outp = NOSTR;	458static int 459might_be_header(const unsigned char line) { 460* if (!isupper(line++)) return 0; 461* while (line && (isalnum(line) \|\| line=='-')) ++line; 462* return (*line==':' && isspace(line[1]));
417} 418	463} 464
419/* 420 * Pack a word onto the output line. If this is the beginning of 421 * the line, push on the appropriately-sized string of blanks first. 422 * If the word won't fit on the current line, flush and begin a new 423 * line. If the word is too long to fit all by itself on a line, 424 * just give it its own and hope for the best. 425 * 426 * LIZ@UOM 6/18/85 -- If the new word will fit in at less than the 427 * goal length, take it. If not, then check to see if the line 428 * will be over the max length; if so put the word on the next 429 * line. If not, check to see if the line will be closer to the 430 * goal length with or without the word and take it or put it on 431 * the next line accordingly.	465/* Begin a new paragraph with an indent of \|indent\| spaces.
432 */	466 */
433 434/* 435 * LIZ@UOM 6/18/85 -- pass in the length of the word as well 436 * pack(word) 437 * char word[]; 438 / 439void 440pack(word,wl) 441* char word[]; 442 int wl; 443{ 444 register char cp; 445* register int s, t; 446 447 if (((outp==NOSTR) ? wl : outp-outbuf + wl) >= outbuf_size) { 448 char old_outbuf = outbuf; 449* outbuf_size = 2; 450* outbuf = realloc(outbuf, outbuf_size); 451 if (outbuf == 0) 452 abort(); 453 outp += outbuf-old_outbuf; 454 } 455 456 if (outp == NOSTR) 457 leadin(); 458 /* 459 * LIZ@UOM 6/18/85 -- change condition to check goal_length; s is the 460 * length of the line before the word is added; t is now the length 461 * of the line after the word is added 462 * t = strlen(word); 463 * if (t+s <= LENGTH) 464 / 465* s = outp - outbuf; 466 t = wl + s; 467 if ((t <= goal_length) \|\| 468 ((t <= max_length) && (t - goal_length <= goal_length - s))) { 469 /* 470 * In like flint! 471 / 472* for (cp = word; cp; outp++ = cp++); 473* return; 474 } 475 if (s > pfx) { 476 oflush(); 477 leadin(); 478 } 479 for (cp = word; cp; outp++ = *cp++);	467static void 468new_paragraph(size_t old_indent, size_t indent) { 469 if (x0) { 470 if (old_indent>0) output_indent(old_indent); 471 fwrite(output_buffer, 1, x0, stdout); 472 putchar('\n'); 473 } 474 x=indent; x0=0; pending_spaces=0; 475 output_in_paragraph = 0;
480} 481	476} 477
482/* 483 * If there is anything on the current output line, send it on 484 * its way. Set outp to NOSTR to indicate the absence of the current 485 * line prefix.	478/* Output spaces or tabs for leading indentation.
486 */	479 */
487void 488oflush() 489{ 490 if (outp == NOSTR) 491 return; 492 outp = '\0'; 493* tabulate(outbuf); 494 outp = NOSTR;	480static void 481output_indent(size_t n_spaces) { 482 if (output_tab_width) { 483 while (n_spaces >= output_tab_width) { 484 putchar('\t'); 485 n_spaces -= output_tab_width; 486 } 487 } 488 while (n_spaces-- > 0) putchar(' ');
495} 496	489} 490
497/* 498 * Take the passed line buffer, insert leading tabs where possible, and 499 * output on standard output (finally).	491/* Output a single word, or add it to the buffer. 492 * indent0 and indent1 are the indents to use on the first and subsequent 493 * lines of a paragraph. They'll often be the same, of course.
500 */	494 */
501void 502tabulate(line) 503 char line[]; 504{ 505 register char cp; 506* register int b, t;	495static void 496output_word(size_t indent0, size_t indent1, const char word, size_t length, size_t spaces) { 497* size_t new_x = x+pending_spaces+length; 498 size_t indent = output_in_paragraph ? indent1 : indent0;
507	499
508 /* 509 * Toss trailing blanks in the output line. 510 / 511* cp = line + strlen(line) - 1; 512 while (cp >= line && cp == ' ') 513* cp--; 514 *++cp = '\0';	500 /* If either \|spaces==0\| (at end of line) or \|coalesce_spaces_P\| 501 * (squashing internal whitespace), then add just one space; 502 * except that if the last character was a sentence-ender we 503 * actually add two spaces. 504 / 505* if (coalesce_spaces_P \|\| spaces==0) 506 spaces = strchr(sentence_enders, word[length-1]) ? 2 : 1;
515	507
516 /* 517 * Count the leading blank space and tabulate. 518 / 519* for (cp = line; cp == ' '; cp++) 520* ; 521 b = cp-line; 522 t = b >> 3; 523 b &= 07; 524 if (t > 0) 525 do 526 putc('\t', stdout); 527 while (--t); 528 if (b > 0) 529 do 530 putc(' ', stdout); 531 while (--b); 532 while (cp) 533* putc(cp++, stdout); 534* putc('\n', stdout);	508 if (new_x<=goal_length) { 509 /* After adding the word we still aren't at the goal length, 510 * so clearly we add it to the buffer rather than outputing it. 511 / 512* memset(output_buffer+x0, ' ', pending_spaces); 513 x0 += pending_spaces; x += pending_spaces; 514 memcpy(output_buffer+x0, word, length); 515 x0 += length; x += length; 516 pending_spaces = spaces; 517 } 518 else { 519 /* Adding the word takes us past the goal. Print the line-so-far, 520 * and the word too iff either (1) the lsf is empty or (2) that 521 * makes us nearer the goal but doesn't take us over the limit, 522 * or (3) the word on its own takes us over the limit. 523 * In case (3) we put a newline in between. 524 / 525* if (indent>0) output_indent(indent); 526 fwrite(output_buffer, 1, x0, stdout); 527 if (x0==0 \|\| (new_x <= max_length && new_x-goal_length <= goal_length-x)) { 528 printf("%s", pending_spaces, ""); 529* goto write_out_word; 530 } 531 else { 532 /* If the word takes us over the limit on its own, just 533 * spit it out and don't bother buffering it. 534 / 535* if (indent+length > max_length) { 536 putchar('\n'); 537 if (indent>0) output_indent(indent); 538write_out_word: 539 fwrite(word, 1, length, stdout); 540 x0 = 0; x = indent1; pending_spaces = 0; 541 } 542 else { 543 memcpy(output_buffer, word, length); 544 x0 = length; x = length+indent1; pending_spaces = spaces; 545 } 546 } 547 putchar('\n'); 548 output_in_paragraph = 1; 549 }
535} 536	550} 551
537/* 538 * Initialize the output line with the appropriate number of 539 * leading blanks.	552/* Process a stream, but just center its lines rather than trying to 553 * format them neatly.
540 */	554 */
541void 542leadin() 543{ 544 register int b; 545 register char cp; 546* 547 for (b = 0, cp = outbuf; b < pfx; b++) 548 cp++ = ' '; 549* outp = cp;	555static void 556center_stream(FILE stream, const char name) { 557 char line; 558* size_t length; 559 while ((line=get_line(stream, &length)) != 0) { 560 size_t l=length; 561 while (l>0 && isspace(line)) { ++line; --l; } 562* length=l; 563 while (l<goal_length) { putchar(' '); l+=2; } 564 fwrite(line, 1, length, stdout); 565 putchar('\n'); 566 } 567 if (ferror(stream)) { perror(name); ++n_errors; }
550} 551	568} 569
552/* 553 * Save a string in dynamic space. 554 * This little goodie is needed for 555 * a headline detector in head.c	570/* Get a single line from a stream. Expand tabs, strip control 571 * characters and trailing whitespace, and handle backspaces. 572 * Return the address of the buffer containing the line, and 573 * put the length of the line in \|lengthp\|. 574 * This can cope with arbitrarily long lines, and with lines 575 * without terminating \n. 576 * If there are no characters left or an error happens, we 577 * return 0. 578 * Don't confuse \|spaces_pending\| here with the global 579 * \|pending_spaces\|.
556 */	580 */
557char * 558savestr(str) 559 char str[]; 560{ 561 register char *top;	581static char * 582get_line(FILE stream, size_t lengthp) { 583 static char buf=NULL; 584* static size_t length=0; 585 size_t len=0; 586 int ch; 587 size_t spaces_pending=0;
562	588
563 top = malloc(strlen(str) + 1); 564 if (top == NOSTR) 565 errx(1, "ran out of memory"); 566 strcpy(top, str); 567 return (top);	589 if (buf==NULL) { length=100; buf=XMALLOC(length); } 590 while ((ch=getc(stream)) != '\n' && ch != EOF) { 591 if (ch==' ') ++spaces_pending; 592 else if (isprint(ch)) { 593 while (len+spaces_pending >= length) { 594 length=2; buf=xrealloc(buf, length); 595* } 596 while (spaces_pending > 0) { --spaces_pending; buf[len++]=' '; } 597 buf[len++] = ch; 598 } 599 else if (ch=='\t') 600 spaces_pending += tab_width - (len+spaces_pending)%tab_width; 601 else if (ch=='\b') { if (len) --len; } 602 } 603 lengthp=len; 604* return (len>0 \|\| ch!=EOF) ? buf : 0;
568} 569	605} 606
570/* 571 * Is s1 a prefix of s2??	607/* (Re)allocate some memory, exiting with an error if we can't.
572 */	608 */
573int 574ispref(s1, s2) 575 register char s1, s2; 576{ 577 578 while (s1++ == s2) 579 ; 580 return (*s1 == '\0');	609static void * 610xrealloc(void ptr, size_t nbytes) { 611* void p = realloc(ptr, nbytes); 612* if (p == NULL) errx(EX_OSERR, "out of memory"); 613 return p;
581}	614}