Deleted Added
full compact
fmt.c (50477) fmt.c (77807)
1/*
2 * Copyright (c) 1980, 1993
3 * The Regents of the University of California. All rights reserved.
1/* $OpenBSD: fmt.c,v 1.16 2000/06/25 15:35:42 pjanzen Exp $ */
2
3/* Sensible version of fmt
4 *
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
5 * Syntax: fmt [ options ] [ goal [ max ] ] [ filename ... ]
6 *
7 * Since the documentation for the original fmt is so poor, here
8 * is an accurate description of what this one does. It's usually
9 * the same. The *mechanism* used may differ from that suggested
10 * here. Note that we are *not* entirely compatible with fmt,
11 * because fmt gets so many things wrong.
12 *
13 * 1. Tabs are expanded, assuming 8-space tab stops.
14 * If the `-t <n>' option is given, we assume <n>-space
15 * tab stops instead.
16 * Trailing blanks are removed from all lines.
17 * x\b == nothing, for any x other than \b.
18 * Other control characters are simply stripped. This
19 * includes \r.
20 * 2. Each line is split into leading whitespace and
21 * everything else. Maximal consecutive sequences of
22 * lines with the same leading whitespace are considered
23 * to form paragraphs, except that a blank line is always
24 * a paragraph to itself.
25 * If the `-p' option is given then the first line of a
26 * paragraph is permitted to have indentation different
27 * from that of the other lines.
28 * If the `-m' option is given then a line that looks
29 * like a mail message header, if it is not immediately
30 * preceded by a non-blank non-message-header line, is
31 * taken to start a new paragraph, which also contains
32 * any subsequent lines with non-empty leading whitespace.
33 * 3. The "everything else" is split into words; a word
34 * includes its trailing whitespace, and a word at the
35 * end of a line is deemed to be followed by a single
36 * space, or two spaces if it ends with a sentence-end
37 * character. (See the `-d' option for how to change that.)
38 * If the `-s' option has been given, then a word's trailing
39 * whitespace is replaced by what it would have had if it
40 * had occurred at end of line.
41 * 4. Each paragraph is sent to standard output as follows.
42 * We output the leading whitespace, and then enough words
43 * to make the line length as near as possible to the goal
44 * without exceeding the maximum. (If a single word would
45 * exceed the maximum, we output that anyway.) Of course
46 * the trailing whitespace of the last word is ignored.
47 * We then emit a newline and start again if there are any
48 * words left.
49 * Note that for a blank line this translates as "We emit
50 * a newline".
51 * If the `-l <n>' option is given, then leading whitespace
52 * is modified slightly: <n> spaces are replaced by a tab.
53 * Indented paragraphs (see above under `-p') make matters
54 * more complicated than this suggests. Actually every paragraph
55 * has two `leading whitespace' values; the value for the first
56 * line, and the value for the most recent line. (While processing
57 * the first line, the two are equal. When `-p' has not been
58 * given, they are always equal.) The leading whitespace
59 * actually output is that of the first line (for the first
60 * line of *output*) or that of the most recent line (for
61 * all other lines of output).
62 * When `-m' has been given, message header paragraphs are
63 * taken as having first-leading-whitespace empty and
64 * subsequent-leading-whitespace two spaces.
65 *
66 * Multiple input files are formatted one at a time, so that a file
67 * never ends in the middle of a line.
68 *
69 * There's an alternative mode of operation, invoked by giving
70 * the `-c' option. In that case we just center every line,
71 * and most of the other options are ignored. This should
72 * really be in a separate program, but we must stay compatible
73 * with old `fmt'.
74 *
75 * QUERY: Should `-m' also try to do the right thing with quoted text?
76 * QUERY: `-b' to treat backslashed whitespace as old `fmt' does?
77 * QUERY: Option meaning `never join lines'?
78 * QUERY: Option meaning `split in mid-word to avoid overlong lines'?
79 * (Those last two might not be useful, since we have `fold'.)
80 *
81 * Differences from old `fmt':
82 *
83 * - We have many more options. Options that aren't understood
84 * generate a lengthy usage message, rather than being
85 * treated as filenames.
86 * - Even with `-m', our handling of message headers is
87 * significantly different. (And much better.)
88 * - We don't treat `\ ' as non-word-breaking.
89 * - Downward changes of indentation start new paragraphs
90 * for us, as well as upward. (I think old `fmt' behaves
91 * in the way it does in order to allow indented paragraphs,
92 * but this is a broken way of making indented paragraphs
93 * behave right.)
94 * - Given the choice of going over or under |goal_length|
95 * by the same amount, we go over; old `fmt' goes under.
96 * - We treat `?' as ending a sentence, and not `:'. Old `fmt'
97 * does the reverse.
98 * - We return approved return codes. Old `fmt' returns
99 * 1 for some errors, and *the number of unopenable files*
100 * when that was all that went wrong.
101 * - We have fewer crashes and more helpful error messages.
102 * - We don't turn spaces into tabs at starts of lines unless
103 * specifically requested.
104 * - New `fmt' is somewhat smaller and slightly faster than
105 * old `fmt'.
106 *
107 * Bugs:
108 *
109 * None known. There probably are some, though.
110 *
111 * Portability:
112 *
113 * I believe this code to be pretty portable. It does require
114 * that you have `getopt'. If you need to include "getopt.h"
115 * for this (e.g., if your system didn't come with `getopt'
116 * and you installed it yourself) then you should arrange for
117 * NEED_getopt_h to be #defined.
118 *
119 * Everything here should work OK even on nasty 16-bit
120 * machines and nice 64-bit ones. However, it's only really
121 * been tested on my FreeBSD machine. Your mileage may vary.
122 */
123
124/* Copyright (c) 1997 Gareth McCaughan. All rights reserved.
125 *
126 * Redistribution and use of this code, in source or binary forms,
127 * with or without modification, are permitted subject to the following
128 * conditions:
129 *
130 * - Redistribution of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
131 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 * must display the following acknowledgement:
15 * This product includes software developed by the University of
16 * California, Berkeley and its contributors.
17 * 4. Neither the name of the University nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
20 *
132 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
133 * - If you distribute modified source code it must also include
134 * a notice saying that it has been modified, and giving a brief
135 * description of what changes have been made.
136 *
137 * Disclaimer: I am not responsible for the results of using this code.
138 * If it formats your hard disc, sends obscene messages to
139 * your boss and kills your children then that's your problem
140 * not mine. I give absolutely no warranty of any sort as to
141 * what the program will do, and absolutely refuse to be held
142 * liable for any consequences of your using it.
143 * Thank you. Have a nice day.
32 */
33
144 */
145
34#ifndef lint
35static char copyright[] =
36"@(#) Copyright (c) 1980, 1993\n\
37 The Regents of the University of California. All rights reserved.\n";
38#endif /* not lint */
146/* RCS change log:
147 * Revision 1.5 1998/03/02 18:02:21 gjm11
148 * Minor changes for portability.
149 *
150 * Revision 1.4 1997/10/01 11:51:28 gjm11
151 * Repair broken indented-paragraph handling.
152 * Add mail message header stuff.
153 * Improve comments and layout.
154 * Make usable with non-BSD systems.
155 * Add revision display to usage message.
156 *
157 * Revision 1.3 1997/09/30 16:24:47 gjm11
158 * Add copyright notice, rcsid string and log message.
159 *
160 * Revision 1.2 1997/09/30 16:13:39 gjm11
161 * Add options: -d <chars>, -l <width>, -p, -s, -t <width>, -h .
162 * Parse options with `getopt'. Clean up code generally.
163 * Make comments more accurate.
164 *
165 * Revision 1.1 1997/09/30 11:29:57 gjm11
166 * Initial revision
167 */
39
40#ifndef lint
168
169#ifndef lint
41#if 0
42static char sccsid[] = "@(#)fmt.c 8.1 (Berkeley) 7/20/93";
43#else
44static const char rcsid[] =
170static const char rcsid[] =
45 "$FreeBSD: head/usr.bin/fmt/fmt.c 50477 1999-08-28 01:08:13Z peter $";
46#endif
171 "$FreeBSD: head/usr.bin/fmt/fmt.c 77807 2001-06-06 10:17:05Z ru $";
172static const char copyright[] =
173 "Copyright (c) 1997 Gareth McCaughan. All rights reserved.\n";
47#endif /* not lint */
48
49#include <ctype.h>
50#include <err.h>
51#include <locale.h>
52#include <stdio.h>
53#include <stdlib.h>
54#include <string.h>
174#endif /* not lint */
175
176#include <ctype.h>
177#include <err.h>
178#include <locale.h>
179#include <stdio.h>
180#include <stdlib.h>
181#include <string.h>
182#include <sysexits.h>
183#include <unistd.h>
55
184
56/*
57 * fmt -- format the concatenation of input files or standard input
58 * onto standard output. Designed for use with Mail ~|
59 *
60 * Syntax : fmt [ goal [ max ] ] [ name ... ]
61 * Authors: Kurt Shoens (UCB) 12/7/78;
62 * Liz Allen (UMCP) 2/24/83 [Addition of goal length concept].
185/* Something that, we hope, will never be a genuine line length,
186 * indentation etc.
63 */
187 */
188#define SILLY ((size_t)-1)
64
189
65/* LIZ@UOM 6/18/85 -- Don't need LENGTH any more.
66 * #define LENGTH 72 Max line length in output
190/* I used to use |strtoul| for this, but (1) not all systems have it
191 * and (2) it's probably better to use |strtol| to detect negative
192 * numbers better.
193 * If |fussyp==0| then we don't complain about non-numbers
194 * (returning 0 instead), but we do complain about bad numbers.
67 */
195 */
68#define NOSTR ((char *) 0) /* Null string pointer for lint */
196static size_t
197get_positive(const char *s, const char *err_mess, int fussyP) {
198 char *t;
199 long result = strtol(s,&t,0);
200 if (*t) { if (fussyP) goto Lose; else return 0; }
201 if (result<=0) { Lose: errx(EX_USAGE, err_mess); }
202 return (size_t) result;
203}
69
204
70/* LIZ@UOM 6/18/85 --New variables goal_length and max_length */
71#define GOAL_LENGTH 65
72#define MAX_LENGTH 75
73int goal_length; /* Target or goal line length in output */
74int max_length; /* Max line length in output */
75int pfx; /* Current leading blank count */
76int lineno; /* Current input line */
77int mark; /* Last place we saw a head line */
78int center;
205/* Global variables */
79
206
80char *headnames[] = {"To", "Subject", "Cc", 0};
207static int centerP=0; /* Try to center lines? */
208static size_t goal_length=0; /* Target length for output lines */
209static size_t max_length=0; /* Maximum length for output lines */
210static int coalesce_spaces_P=0; /* Coalesce multiple whitespace -> ' ' ? */
211static int allow_indented_paragraphs=0; /* Can first line have diff. ind.? */
212static int tab_width=8; /* Number of spaces per tab stop */
213static size_t output_tab_width=0; /* Ditto, when squashing leading spaces */
214static const char *sentence_enders=".?!"; /* Double-space after these */
215static int grok_mail_headers=0; /* treat embedded mail headers magically? */
81
216
82void fmt __P((FILE *));
83int ispref __P((char *, char *));
84void leadin __P((void));
85void oflush __P((void));
86void pack __P((char [], int));
87void prefix __P((char []));
88void setout __P((void));
89void split __P((char []));
90void tabulate __P((char []));
217static int n_errors=0; /* Number of failed files. Return on exit. */
218static char *output_buffer=0; /* Output line will be built here */
219static size_t x; /* Horizontal position in output line */
220static size_t x0; /* Ditto, ignoring leading whitespace */
221static size_t pending_spaces; /* Spaces to add before next word */
222static int output_in_paragraph=0; /* Any of current para written out yet? */
91
223
92/*
93 * Drive the whole formatter by managing input files. Also,
94 * cause initialization of the output stuff and flush it out
95 * at the end.
96 */
224/* Prototypes */
97
225
226static void process_named_file (const char *);
227static void process_stream (FILE *, const char *);
228static size_t indent_length (const char *, size_t);
229static int might_be_header (const unsigned char *);
230static void new_paragraph (size_t, size_t);
231static void output_word (size_t, size_t, const char *, size_t, size_t);
232static void output_indent (size_t);
233static void center_stream (FILE *, const char *);
234static char * get_line (FILE *, size_t *);
235static void * xrealloc (void *, size_t);
236
237#define XMALLOC(x) xrealloc(0,x)
238
239/* Here is perhaps the right place to mention that this code is
240 * all in top-down order. Hence, |main| comes first.
241 */
98int
242int
99main(argc, argv)
100 int argc;
101 char **argv;
102{
103 register FILE *fi;
104 register int errs = 0;
105 int number; /* LIZ@UOM 6/18/85 */
243main(int argc, char *argv[]) {
244 int ch; /* used for |getopt| processing */
106
245
107 (void) setlocale(LC_CTYPE, "");
108
246
109 goal_length = GOAL_LENGTH;
110 max_length = MAX_LENGTH;
111 setout();
112 lineno = 1;
113 mark = -10;
114 /*
115 * LIZ@UOM 6/18/85 -- Check for goal and max length arguments
116 */
117 if (argc > 1 && !strcmp(argv[1], "-c")) {
118 center++;
119 argc--;
120 argv++;
121 }
122 if (argc > 1 && (1 == (sscanf(argv[1], "%d", &number)))) {
123 argv++;
124 argc--;
125 goal_length = number;
126 if (argc > 1 && (1 == (sscanf(argv[1], "%d", &number)))) {
127 argv++;
128 argc--;
129 max_length = number;
130 }
131 }
132 if (max_length <= goal_length)
133 errx(1, "max length must be greater than goal length");
134 if (argc < 2) {
135 fmt(stdin);
136 oflush();
137 exit(0);
138 }
139 while (--argc) {
140 if ((fi = fopen(*++argv, "r")) == NULL) {
141 perror(*argv);
142 errs++;
143 continue;
144 }
145 fmt(fi);
146 fclose(fi);
147 }
148 oflush();
149 exit(errs);
150}
247 (void) setlocale(LC_CTYPE, "");
151
248
152/*
153 * Read up characters from the passed input file, forming lines,
154 * doing ^H processing, expanding tabs, stripping trailing blanks,
155 * and sending each line down for analysis.
156 */
157void
158fmt(fi)
159 FILE *fi;
160{
161 static char *linebuf = 0, *canonb = 0;
162 register char *cp, *cp2, cc;
163 register int c, col;
164#define CHUNKSIZE 1024
165 static int lbufsize = 0, cbufsize = CHUNKSIZE;
249 /* 1. Grok parameters. */
166
250
167 canonb = malloc(CHUNKSIZE);
168 if (canonb == 0)
169 abort();
251 while ((ch = getopt(argc, argv, "0123456789cd:hl:mpst:w:")) != -1)
252 switch(ch) {
253 case 'c':
254 centerP = 1;
255 continue;
256 case 'd':
257 sentence_enders = optarg;
258 continue;
259 case 'l':
260 output_tab_width
261 = get_positive(optarg, "output tab width must be positive", 1);
262 continue;
263 case 'm':
264 grok_mail_headers = 1;
265 continue;
266 case 'p':
267 allow_indented_paragraphs = 1;
268 continue;
269 case 's':
270 coalesce_spaces_P = 1;
271 continue;
272 case 't':
273 tab_width = get_positive(optarg, "tab width must be positive", 1);
274 continue;
275 case 'w':
276 goal_length = get_positive(optarg, "width must be positive", 1);
277 max_length = goal_length;
278 continue;
279 case '0': case '1': case '2': case '3': case '4': case '5':
280 case '6': case '7': case '8': case '9':
281 /* XXX this is not a stylistically approved use of getopt() */
282 if (goal_length==0) {
283 char *p;
284 p = argv[optind - 1];
285 if (p[0] == '-' && p[1] == ch && !p[2])
286 goal_length = get_positive(++p, "width must be nonzero", 1);
287 else
288 goal_length = get_positive(argv[optind]+1,
289 "width must be nonzero", 1);
290 max_length = goal_length;
291 }
292 continue;
293 case 'h': default:
294 fprintf(stderr,
295"Usage: fmt [-cmps] [-d chars] [-l num] [-t num]\n"
296" [-w width | -width | goal [maximum]] [file ...]\n"
297"Options: -c center each line instead of formatting\n"
298" -d <chars> double-space after <chars> at line end\n"
299" -l <n> turn each <n> spaces at start of line into a tab\n"
300" -m try to make sure mail header lines stay separate\n"
301" -p allow indented paragraphs\n"
302" -s coalesce whitespace inside lines\n"
303" -t <n> have tabs every <n> columns\n"
304" -w <n> set maximum width to <n>\n"
305" goal set target width to goal\n");
306 exit(ch=='h' ? 0 : EX_USAGE);
307 }
308 argc -= optind; argv += optind;
170
309
171 if (center) {
172 linebuf = malloc(BUFSIZ);
173 while (1) {
174 cp = fgets(linebuf, BUFSIZ, fi);
175 if (!cp)
176 return;
177 while (*cp && isspace(*cp))
178 cp++;
179 cp2 = cp + strlen(cp) - 1;
180 while (cp2 > cp && isspace(*cp2))
181 cp2--;
182 if (cp == cp2)
183 putchar('\n');
184 col = cp2 - cp;
185 for (c = 0; c < (goal_length-col)/2; c++)
186 putchar(' ');
187 while (cp <= cp2)
188 putchar(*cp++);
189 putchar('\n');
190 }
191 }
192 c = getc(fi);
193 while (c != EOF) {
194 /*
195 * Collect a line, doing ^H processing.
196 * Leave tabs for now.
197 */
198 cp = linebuf;
199 while (c != '\n' && c != EOF) {
200 if (cp - linebuf >= lbufsize) {
201 int offset = cp - linebuf;
202 lbufsize += CHUNKSIZE;
203 linebuf = realloc(linebuf, lbufsize);
204 if(linebuf == 0)
205 abort();
206 cp = linebuf + offset;
207 }
208 if (c == '\b') {
209 if (cp > linebuf)
210 cp--;
211 c = getc(fi);
212 continue;
213 }
214 if (!isprint(c) && c != '\t') {
215 c = getc(fi);
216 continue;
217 }
218 *cp++ = c;
219 c = getc(fi);
220 }
310 /* [ goal [ maximum ] ] */
221
311
222 /*
223 * Toss anything remaining on the input line.
224 */
225 while (c != '\n' && c != EOF)
226 c = getc(fi);
312 if (argc>0 && goal_length==0
313 && (goal_length=get_positive(*argv,"goal length must be positive", 0))
314 != 0) {
315 --argc; ++argv;
316 if (argc>0
317 && (max_length=get_positive(*argv,"max length must be positive", 0))
318 != 0) {
319 --argc; ++argv;
320 if (max_length<goal_length)
321 errx(EX_USAGE, "max length must be >= goal length");
322 }
323 }
324 if (goal_length==0) goal_length = 65;
325 if (max_length==0) max_length = goal_length+10;
326 output_buffer = XMALLOC(max_length+1); /* really needn't be longer */
227
327
228 if (cp != NULL) {
229 *cp = '\0';
230 } else {
231 putchar('\n');
232 c = getc(fi);
233 continue;
234 }
328 /* 2. Process files. */
235
329
236 /*
237 * Expand tabs on the way to canonb.
238 */
239 col = 0;
240 cp = linebuf;
241 cp2 = canonb;
242 while ((cc = *cp++)) {
243 if (cc != '\t') {
244 col++;
245 if (cp2 - canonb >= cbufsize) {
246 int offset = cp2 - canonb;
247 cbufsize += CHUNKSIZE;
248 canonb = realloc(canonb, cbufsize);
249 if(canonb == 0)
250 abort();
251 cp2 = canonb + offset;
252 }
253 *cp2++ = cc;
254 continue;
255 }
256 do {
257 if (cp2 - canonb >= cbufsize) {
258 int offset = cp2 - canonb;
259 cbufsize += CHUNKSIZE;
260 canonb = realloc(canonb, cbufsize);
261 if(canonb == 0)
262 abort();
263 cp2 = canonb + offset;
264 }
265 *cp2++ = ' ';
266 col++;
267 } while ((col & 07) != 0);
268 }
330 if (argc>0) {
331 while (argc-->0) process_named_file(*argv++);
332 }
333 else {
334 process_stream(stdin, "standard input");
335 }
269
336
270 /*
271 * Swipe trailing blanks from the line.
272 */
273 for (cp2--; cp2 >= canonb && *cp2 == ' '; cp2--)
274 ;
275 *++cp2 = '\0';
276 prefix(canonb);
277 if (c != EOF)
278 c = getc(fi);
279 }
280}
337 /* We're done. */
281
338
282/*
283 * Take a line devoid of tabs and other garbage and determine its
284 * blank prefix. If the indent changes, call for a linebreak.
285 * If the input line is blank, echo the blank line on the output.
286 * Finally, if the line minus the prefix is a mail header, try to keep
287 * it on a line by itself.
288 */
289void
290prefix(line)
291 char line[];
292{
293 register char *cp, **hp;
294 register int np, h;
339 return n_errors ? EX_NOINPUT : 0;
295
340
296 if (!*line) {
297 oflush();
298 putchar('\n');
299 return;
300 }
301 for (cp = line; *cp == ' '; cp++)
302 ;
303 np = cp - line;
341}
304
342
305 /*
306 * The following horrible expression attempts to avoid linebreaks
307 * when the indent changes due to a paragraph.
308 */
309 if (np != pfx && (np > pfx || abs(pfx-np) > 8))
310 oflush();
311 if ((h = ishead(cp)))
312 oflush(), mark = lineno;
313 if (lineno - mark < 3 && lineno - mark > 0)
314 for (hp = &headnames[0]; *hp != (char *) 0; hp++)
315 if (ispref(*hp, cp)) {
316 h = 1;
317 oflush();
318 break;
319 }
320 if (!h && (h = (*cp == '.')))
321 oflush();
322 pfx = np;
323 if (h)
324 pack(cp, strlen(cp));
325 else split(cp);
326 if (h)
327 oflush();
328 lineno++;
343/* Process a single file, given its name.
344 */
345static void
346process_named_file(const char *name) {
347 FILE *f=fopen(name, "r");
348 if (!f) { perror(name); ++n_errors; }
349 else {
350 process_stream(f, name);
351 fclose(f);
352 }
329}
330
353}
354
331/*
332 * Split up the passed line into output "words" which are
333 * maximal strings of non-blanks with the blank separation
334 * attached at the end. Pass these words along to the output
335 * line packer.
355/* Types of mail header continuation lines:
336 */
356 */
337void
338split(line)
339 char line[];
340{
341 register char *cp, *cp2;
342 static char *word=0;
343 static int wordsize=0;
344 int wordl; /* LIZ@UOM 6/18/85 */
357typedef enum {
358 hdr_ParagraphStart = -1,
359 hdr_NonHeader = 0,
360 hdr_Header = 1,
361 hdr_Continuation = 2
362} HdrType;
345
363
346 {
347 int l = strlen(line);
348 if (l >= wordsize) {
349 if (word)
350 free(word);
351 wordsize = (l+66)&~63;
352 word = malloc(wordsize);
353 if (word == NULL)
354 abort();
355 }
356 }
364/* Process a stream. This is where the real work happens,
365 * except that centering is handled separately.
366 */
367static void
368process_stream(FILE *stream, const char *name) {
369 size_t last_indent=SILLY; /* how many spaces in last indent? */
370 size_t para_line_number=0; /* how many lines already read in this para? */
371 size_t first_indent=SILLY; /* indentation of line 0 of paragraph */
372 HdrType prev_header_type=hdr_ParagraphStart;
373 /* ^-- header_type of previous line; -1 at para start */
374 char *line;
375 size_t length;
357
376
358 cp = line;
359 while (*cp) {
360 cp2 = word;
361 wordl = 0; /* LIZ@UOM 6/18/85 */
377 if (centerP) { center_stream(stream, name); return; }
378 while ((line=get_line(stream,&length)) != NULL) {
379 size_t np=indent_length(line, length);
380 { HdrType header_type=hdr_NonHeader;
381 if (grok_mail_headers && prev_header_type!=hdr_NonHeader) {
382 if (np==0 && might_be_header(line))
383 header_type = hdr_Header;
384 else if (np>0 && prev_header_type>hdr_NonHeader)
385 header_type = hdr_Continuation;
386 }
387 /* We need a new paragraph if and only if:
388 * this line is blank,
389 * OR it's a mail header,
390 * OR it's not a mail header AND the last line was one,
391 * OR the indentation has changed
392 * AND the line isn't a mail header continuation line
393 * AND this isn't the second line of an indented paragraph.
394 */
395 if ( length==0
396 || header_type==hdr_Header
397 || (header_type==hdr_NonHeader && prev_header_type>hdr_NonHeader)
398 || (np!=last_indent
399 && header_type != hdr_Continuation
400 && (!allow_indented_paragraphs || para_line_number != 1)) ) {
401 new_paragraph(output_in_paragraph ? last_indent : first_indent, np);
402 para_line_number = 0;
403 first_indent = np;
404 last_indent = np;
405 if (header_type==hdr_Header) last_indent=2; /* for cont. lines */
406 if (length==0) {
407 putchar('\n');
408 prev_header_type=hdr_ParagraphStart;
409 continue;
410 }
411 }
412 else {
413 /* If this is an indented paragraph other than a mail header
414 * continuation, set |last_indent|.
415 */
416 if (np != last_indent && header_type != hdr_Continuation)
417 last_indent=np;
418 }
419 prev_header_type = header_type;
420 }
362
421
363 /*
364 * Collect a 'word,' allowing it to contain escaped white
365 * space.
366 */
367 while (*cp && *cp != ' ') {
368 if (*cp == '\\' && isspace(cp[1]))
369 *cp2++ = *cp++;
370 *cp2++ = *cp++;
371 wordl++;/* LIZ@UOM 6/18/85 */
372 }
373
374 /*
375 * Guarantee a space at end of line. Two spaces after end of
376 * sentence punctuation.
377 */
378 if (*cp == '\0') {
379 *cp2++ = ' ';
380 if (cp != line && index(".:!", cp[-1]))
381 *cp2++ = ' ';
382 }
383 while (*cp == ' ')
384 *cp2++ = *cp++;
385 *cp2 = '\0';
386 /*
387 * LIZ@UOM 6/18/85 pack(word);
388 */
389 pack(word, wordl);
390 }
422 { size_t n=np;
423 while (n<length) {
424 /* Find word end and count spaces after it */
425 size_t word_length=0, space_length=0;
426 while (n+word_length < length && line[n+word_length] != ' ')
427 ++word_length;
428 space_length = word_length;
429 while (n+space_length < length && line[n+space_length] == ' ')
430 ++space_length;
431 /* Send the word to the output machinery. */
432 output_word(first_indent, last_indent,
433 line+n, word_length, space_length-word_length);
434 n += space_length;
435 }
436 }
437 ++para_line_number;
438 }
439 new_paragraph(output_in_paragraph ? last_indent : first_indent, 0);
440 if (ferror(stream)) { perror(name); ++n_errors; }
391}
392
441}
442
393/*
394 * Output section.
395 * Build up line images from the words passed in. Prefix
396 * each line with correct number of blanks. The buffer "outbuf"
397 * contains the current partial line image, including prefixed blanks.
398 * "outp" points to the next available space therein. When outp is NOSTR,
399 * there ain't nothing in there yet. At the bottom of this whole mess,
400 * leading tabs are reinserted.
443/* How long is the indent on this line?
401 */
444 */
402char *outbuf; /* Sandbagged output line image */
403char *outp; /* Pointer in above */
404int outbuf_size; /* er, size of outbuf */
445static size_t
446indent_length(const char *line, size_t length) {
447 size_t n=0;
448 while (n<length && *line++ == ' ') ++n;
449 return n;
450}
405
451
406/*
407 * Initialize the output section.
452/* Might this line be a mail header?
453 * We deem a line to be a possible header if it matches the
454 * Perl regexp /^[A-Z][-A-Za-z0-9]*:\s/. This is *not* the same
455 * as in RFC whatever-number-it-is; we want to be gratuitously
456 * conservative to avoid mangling ordinary civilised text.
408 */
457 */
409void
410setout()
411{
412 outbuf = malloc(BUFSIZ);
413 if (outbuf == 0)
414 abort();
415 outbuf_size = BUFSIZ;
416 outp = NOSTR;
458static int
459might_be_header(const unsigned char *line) {
460 if (!isupper(*line++)) return 0;
461 while (*line && (isalnum(*line) || *line=='-')) ++line;
462 return (*line==':' && isspace(line[1]));
417}
418
463}
464
419/*
420 * Pack a word onto the output line. If this is the beginning of
421 * the line, push on the appropriately-sized string of blanks first.
422 * If the word won't fit on the current line, flush and begin a new
423 * line. If the word is too long to fit all by itself on a line,
424 * just give it its own and hope for the best.
425 *
426 * LIZ@UOM 6/18/85 -- If the new word will fit in at less than the
427 * goal length, take it. If not, then check to see if the line
428 * will be over the max length; if so put the word on the next
429 * line. If not, check to see if the line will be closer to the
430 * goal length with or without the word and take it or put it on
431 * the next line accordingly.
465/* Begin a new paragraph with an indent of |indent| spaces.
432 */
466 */
433
434/*
435 * LIZ@UOM 6/18/85 -- pass in the length of the word as well
436 * pack(word)
437 * char word[];
438 */
439void
440pack(word,wl)
441 char word[];
442 int wl;
443{
444 register char *cp;
445 register int s, t;
446
447 if (((outp==NOSTR) ? wl : outp-outbuf + wl) >= outbuf_size) {
448 char *old_outbuf = outbuf;
449 outbuf_size *= 2;
450 outbuf = realloc(outbuf, outbuf_size);
451 if (outbuf == 0)
452 abort();
453 outp += outbuf-old_outbuf;
454 }
455
456 if (outp == NOSTR)
457 leadin();
458 /*
459 * LIZ@UOM 6/18/85 -- change condition to check goal_length; s is the
460 * length of the line before the word is added; t is now the length
461 * of the line after the word is added
462 * t = strlen(word);
463 * if (t+s <= LENGTH)
464 */
465 s = outp - outbuf;
466 t = wl + s;
467 if ((t <= goal_length) ||
468 ((t <= max_length) && (t - goal_length <= goal_length - s))) {
469 /*
470 * In like flint!
471 */
472 for (cp = word; *cp; *outp++ = *cp++);
473 return;
474 }
475 if (s > pfx) {
476 oflush();
477 leadin();
478 }
479 for (cp = word; *cp; *outp++ = *cp++);
467static void
468new_paragraph(size_t old_indent, size_t indent) {
469 if (x0) {
470 if (old_indent>0) output_indent(old_indent);
471 fwrite(output_buffer, 1, x0, stdout);
472 putchar('\n');
473 }
474 x=indent; x0=0; pending_spaces=0;
475 output_in_paragraph = 0;
480}
481
476}
477
482/*
483 * If there is anything on the current output line, send it on
484 * its way. Set outp to NOSTR to indicate the absence of the current
485 * line prefix.
478/* Output spaces or tabs for leading indentation.
486 */
479 */
487void
488oflush()
489{
490 if (outp == NOSTR)
491 return;
492 *outp = '\0';
493 tabulate(outbuf);
494 outp = NOSTR;
480static void
481output_indent(size_t n_spaces) {
482 if (output_tab_width) {
483 while (n_spaces >= output_tab_width) {
484 putchar('\t');
485 n_spaces -= output_tab_width;
486 }
487 }
488 while (n_spaces-- > 0) putchar(' ');
495}
496
489}
490
497/*
498 * Take the passed line buffer, insert leading tabs where possible, and
499 * output on standard output (finally).
491/* Output a single word, or add it to the buffer.
492 * indent0 and indent1 are the indents to use on the first and subsequent
493 * lines of a paragraph. They'll often be the same, of course.
500 */
494 */
501void
502tabulate(line)
503 char line[];
504{
505 register char *cp;
506 register int b, t;
495static void
496output_word(size_t indent0, size_t indent1, const char *word, size_t length, size_t spaces) {
497 size_t new_x = x+pending_spaces+length;
498 size_t indent = output_in_paragraph ? indent1 : indent0;
507
499
508 /*
509 * Toss trailing blanks in the output line.
510 */
511 cp = line + strlen(line) - 1;
512 while (cp >= line && *cp == ' ')
513 cp--;
514 *++cp = '\0';
500 /* If either |spaces==0| (at end of line) or |coalesce_spaces_P|
501 * (squashing internal whitespace), then add just one space;
502 * except that if the last character was a sentence-ender we
503 * actually add two spaces.
504 */
505 if (coalesce_spaces_P || spaces==0)
506 spaces = strchr(sentence_enders, word[length-1]) ? 2 : 1;
515
507
516 /*
517 * Count the leading blank space and tabulate.
518 */
519 for (cp = line; *cp == ' '; cp++)
520 ;
521 b = cp-line;
522 t = b >> 3;
523 b &= 07;
524 if (t > 0)
525 do
526 putc('\t', stdout);
527 while (--t);
528 if (b > 0)
529 do
530 putc(' ', stdout);
531 while (--b);
532 while (*cp)
533 putc(*cp++, stdout);
534 putc('\n', stdout);
508 if (new_x<=goal_length) {
509 /* After adding the word we still aren't at the goal length,
510 * so clearly we add it to the buffer rather than outputing it.
511 */
512 memset(output_buffer+x0, ' ', pending_spaces);
513 x0 += pending_spaces; x += pending_spaces;
514 memcpy(output_buffer+x0, word, length);
515 x0 += length; x += length;
516 pending_spaces = spaces;
517 }
518 else {
519 /* Adding the word takes us past the goal. Print the line-so-far,
520 * and the word too iff either (1) the lsf is empty or (2) that
521 * makes us nearer the goal but doesn't take us over the limit,
522 * or (3) the word on its own takes us over the limit.
523 * In case (3) we put a newline in between.
524 */
525 if (indent>0) output_indent(indent);
526 fwrite(output_buffer, 1, x0, stdout);
527 if (x0==0 || (new_x <= max_length && new_x-goal_length <= goal_length-x)) {
528 printf("%*s", pending_spaces, "");
529 goto write_out_word;
530 }
531 else {
532 /* If the word takes us over the limit on its own, just
533 * spit it out and don't bother buffering it.
534 */
535 if (indent+length > max_length) {
536 putchar('\n');
537 if (indent>0) output_indent(indent);
538write_out_word:
539 fwrite(word, 1, length, stdout);
540 x0 = 0; x = indent1; pending_spaces = 0;
541 }
542 else {
543 memcpy(output_buffer, word, length);
544 x0 = length; x = length+indent1; pending_spaces = spaces;
545 }
546 }
547 putchar('\n');
548 output_in_paragraph = 1;
549 }
535}
536
550}
551
537/*
538 * Initialize the output line with the appropriate number of
539 * leading blanks.
552/* Process a stream, but just center its lines rather than trying to
553 * format them neatly.
540 */
554 */
541void
542leadin()
543{
544 register int b;
545 register char *cp;
546
547 for (b = 0, cp = outbuf; b < pfx; b++)
548 *cp++ = ' ';
549 outp = cp;
555static void
556center_stream(FILE *stream, const char *name) {
557 char *line;
558 size_t length;
559 while ((line=get_line(stream, &length)) != 0) {
560 size_t l=length;
561 while (l>0 && isspace(*line)) { ++line; --l; }
562 length=l;
563 while (l<goal_length) { putchar(' '); l+=2; }
564 fwrite(line, 1, length, stdout);
565 putchar('\n');
566 }
567 if (ferror(stream)) { perror(name); ++n_errors; }
550}
551
568}
569
552/*
553 * Save a string in dynamic space.
554 * This little goodie is needed for
555 * a headline detector in head.c
570/* Get a single line from a stream. Expand tabs, strip control
571 * characters and trailing whitespace, and handle backspaces.
572 * Return the address of the buffer containing the line, and
573 * put the length of the line in |lengthp|.
574 * This can cope with arbitrarily long lines, and with lines
575 * without terminating \n.
576 * If there are no characters left or an error happens, we
577 * return 0.
578 * Don't confuse |spaces_pending| here with the global
579 * |pending_spaces|.
556 */
580 */
557char *
558savestr(str)
559 char str[];
560{
561 register char *top;
581static char *
582get_line(FILE *stream, size_t *lengthp) {
583 static char *buf=NULL;
584 static size_t length=0;
585 size_t len=0;
586 int ch;
587 size_t spaces_pending=0;
562
588
563 top = malloc(strlen(str) + 1);
564 if (top == NOSTR)
565 errx(1, "ran out of memory");
566 strcpy(top, str);
567 return (top);
589 if (buf==NULL) { length=100; buf=XMALLOC(length); }
590 while ((ch=getc(stream)) != '\n' && ch != EOF) {
591 if (ch==' ') ++spaces_pending;
592 else if (isprint(ch)) {
593 while (len+spaces_pending >= length) {
594 length*=2; buf=xrealloc(buf, length);
595 }
596 while (spaces_pending > 0) { --spaces_pending; buf[len++]=' '; }
597 buf[len++] = ch;
598 }
599 else if (ch=='\t')
600 spaces_pending += tab_width - (len+spaces_pending)%tab_width;
601 else if (ch=='\b') { if (len) --len; }
602 }
603 *lengthp=len;
604 return (len>0 || ch!=EOF) ? buf : 0;
568}
569
605}
606
570/*
571 * Is s1 a prefix of s2??
607/* (Re)allocate some memory, exiting with an error if we can't.
572 */
608 */
573int
574ispref(s1, s2)
575 register char *s1, *s2;
576{
577
578 while (*s1++ == *s2)
579 ;
580 return (*s1 == '\0');
609static void *
610xrealloc(void *ptr, size_t nbytes) {
611 void *p = realloc(ptr, nbytes);
612 if (p == NULL) errx(EX_OSERR, "out of memory");
613 return p;
581}
614}