fmt.c revision 8874
1/*
2 * Copyright (c) 1980, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 *    must display the following acknowledgement:
15 *	This product includes software developed by the University of
16 *	California, Berkeley and its contributors.
17 * 4. Neither the name of the University nor the names of its contributors
18 *    may be used to endorse or promote products derived from this software
19 *    without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 */
33
34#ifndef lint
35static char copyright[] =
36"@(#) Copyright (c) 1980, 1993\n\
37	The Regents of the University of California.  All rights reserved.\n";
38#endif /* not lint */
39
40#ifndef lint
41static char sccsid[] = "@(#)fmt.c	8.1 (Berkeley) 7/20/93";
42#endif /* not lint */
43
44#include <stdio.h>
45#include <ctype.h>
46
47/*
48 * fmt -- format the concatenation of input files or standard input
49 * onto standard output.  Designed for use with Mail ~|
50 *
51 * Syntax : fmt [ goal [ max ] ] [ name ... ]
52 * Authors: Kurt Shoens (UCB) 12/7/78;
53 *          Liz Allen (UMCP) 2/24/83 [Addition of goal length concept].
54 */
55
56/* LIZ@UOM 6/18/85 -- Don't need LENGTH any more.
57 * #define	LENGTH	72		Max line length in output
58 */
59#define	NOSTR	((char *) 0)	/* Null string pointer for lint */
60
61/* LIZ@UOM 6/18/85 --New variables goal_length and max_length */
62#define GOAL_LENGTH 65
63#define MAX_LENGTH 75
64int	goal_length;		/* Target or goal line length in output */
65int	max_length;		/* Max line length in output */
66int	pfx;			/* Current leading blank count */
67int	lineno;			/* Current input line */
68int	mark;			/* Last place we saw a head line */
69
70char	*malloc();		/* for lint . . . */
71char	*headnames[] = {"To", "Subject", "Cc", 0};
72
73/*
74 * Drive the whole formatter by managing input files.  Also,
75 * cause initialization of the output stuff and flush it out
76 * at the end.
77 */
78
79main(argc, argv)
80	int argc;
81	char **argv;
82{
83	register FILE *fi;
84	register int errs = 0;
85	int number;		/* LIZ@UOM 6/18/85 */
86
87	goal_length = GOAL_LENGTH;
88	max_length = MAX_LENGTH;
89	setout();
90	lineno = 1;
91	mark = -10;
92	/*
93	 * LIZ@UOM 6/18/85 -- Check for goal and max length arguments
94	 */
95	if (argc > 1 && (1 == (sscanf(argv[1], "%d", &number)))) {
96		argv++;
97		argc--;
98		goal_length = number;
99		if (argc > 1 && (1 == (sscanf(argv[1], "%d", &number)))) {
100			argv++;
101			argc--;
102			max_length = number;
103		}
104	}
105	if (max_length <= goal_length) {
106		fprintf(stderr, "Max length must be greater than %s\n",
107			"goal length");
108		exit(1);
109	}
110	if (argc < 2) {
111		fmt(stdin);
112		oflush();
113		exit(0);
114	}
115	while (--argc) {
116		if ((fi = fopen(*++argv, "r")) == NULL) {
117			perror(*argv);
118			errs++;
119			continue;
120		}
121		fmt(fi);
122		fclose(fi);
123	}
124	oflush();
125	exit(errs);
126}
127
128/*
129 * Read up characters from the passed input file, forming lines,
130 * doing ^H processing, expanding tabs, stripping trailing blanks,
131 * and sending each line down for analysis.
132 */
133fmt(fi)
134	FILE *fi;
135{
136	char linebuf[BUFSIZ], canonb[BUFSIZ];
137	register char *cp, *cp2, cc;
138	register int c, col;
139
140	c = getc(fi);
141	while (c != EOF) {
142		/*
143		 * Collect a line, doing ^H processing.
144		 * Leave tabs for now.
145		 */
146		cp = linebuf;
147		while (c != '\n' && c != EOF && cp-linebuf < BUFSIZ-1) {
148			if (c == '\b') {
149				if (cp > linebuf)
150					cp--;
151				c = getc(fi);
152				continue;
153			}
154			if (!isprint(c) && c != '\t') {
155				c = getc(fi);
156				continue;
157			}
158			*cp++ = c;
159			c = getc(fi);
160		}
161		*cp = '\0';
162
163		/*
164		 * Toss anything remaining on the input line.
165		 */
166		while (c != '\n' && c != EOF)
167			c = getc(fi);
168
169		/*
170		 * Expand tabs on the way to canonb.
171		 */
172		col = 0;
173		cp = linebuf;
174		cp2 = canonb;
175		while (cc = *cp++) {
176			if (cc != '\t') {
177				col++;
178				if (cp2-canonb < BUFSIZ-1)
179					*cp2++ = cc;
180				continue;
181			}
182			do {
183				if (cp2-canonb < BUFSIZ-1)
184					*cp2++ = ' ';
185				col++;
186			} while ((col & 07) != 0);
187		}
188
189		/*
190		 * Swipe trailing blanks from the line.
191		 */
192		for (cp2--; cp2 >= canonb && *cp2 == ' '; cp2--)
193			;
194		*++cp2 = '\0';
195		prefix(canonb);
196		if (c != EOF)
197			c = getc(fi);
198	}
199}
200
201/*
202 * Take a line devoid of tabs and other garbage and determine its
203 * blank prefix.  If the indent changes, call for a linebreak.
204 * If the input line is blank, echo the blank line on the output.
205 * Finally, if the line minus the prefix is a mail header, try to keep
206 * it on a line by itself.
207 */
208prefix(line)
209	char line[];
210{
211	register char *cp, **hp;
212	register int np, h;
213
214	if (!*line) {
215		oflush();
216		putchar('\n');
217		return;
218	}
219	for (cp = line; *cp == ' '; cp++)
220		;
221	np = cp - line;
222
223	/*
224	 * The following horrible expression attempts to avoid linebreaks
225	 * when the indent changes due to a paragraph.
226	 */
227	if (np != pfx && (np > pfx || abs(pfx-np) > 8))
228		oflush();
229	if (h = ishead(cp))
230		oflush(), mark = lineno;
231	if (lineno - mark < 3 && lineno - mark > 0)
232		for (hp = &headnames[0]; *hp != (char *) 0; hp++)
233			if (ispref(*hp, cp)) {
234				h = 1;
235				oflush();
236				break;
237			}
238	if (!h && (h = (*cp == '.')))
239		oflush();
240	pfx = np;
241	if (h)
242		pack(cp, strlen(cp));
243	else	split(cp);
244	if (h)
245		oflush();
246	lineno++;
247}
248
249/*
250 * Split up the passed line into output "words" which are
251 * maximal strings of non-blanks with the blank separation
252 * attached at the end.  Pass these words along to the output
253 * line packer.
254 */
255split(line)
256	char line[];
257{
258	register char *cp, *cp2;
259	char word[BUFSIZ];
260	int wordl;		/* LIZ@UOM 6/18/85 */
261
262	cp = line;
263	while (*cp) {
264		cp2 = word;
265		wordl = 0;	/* LIZ@UOM 6/18/85 */
266
267		/*
268		 * Collect a 'word,' allowing it to contain escaped white
269		 * space.
270		 */
271		while (*cp && *cp != ' ') {
272			if (*cp == '\\' && isspace(cp[1]))
273				*cp2++ = *cp++;
274			*cp2++ = *cp++;
275			wordl++;/* LIZ@UOM 6/18/85 */
276		}
277
278		/*
279		 * Guarantee a space at end of line. Two spaces after end of
280		 * sentence punctuation.
281		 */
282		if (*cp == '\0') {
283			*cp2++ = ' ';
284			if (index(".:!", cp[-1]))
285				*cp2++ = ' ';
286		}
287		while (*cp == ' ')
288			*cp2++ = *cp++;
289		*cp2 = '\0';
290		/*
291		 * LIZ@UOM 6/18/85 pack(word);
292		 */
293		pack(word, wordl);
294	}
295}
296
297/*
298 * Output section.
299 * Build up line images from the words passed in.  Prefix
300 * each line with correct number of blanks.  The buffer "outbuf"
301 * contains the current partial line image, including prefixed blanks.
302 * "outp" points to the next available space therein.  When outp is NOSTR,
303 * there ain't nothing in there yet.  At the bottom of this whole mess,
304 * leading tabs are reinserted.
305 */
306char	outbuf[BUFSIZ];			/* Sandbagged output line image */
307char	*outp;				/* Pointer in above */
308
309/*
310 * Initialize the output section.
311 */
312setout()
313{
314	outp = NOSTR;
315}
316
317/*
318 * Pack a word onto the output line.  If this is the beginning of
319 * the line, push on the appropriately-sized string of blanks first.
320 * If the word won't fit on the current line, flush and begin a new
321 * line.  If the word is too long to fit all by itself on a line,
322 * just give it its own and hope for the best.
323 *
324 * LIZ@UOM 6/18/85 -- If the new word will fit in at less than the
325 *	goal length, take it.  If not, then check to see if the line
326 *	will be over the max length; if so put the word on the next
327 *	line.  If not, check to see if the line will be closer to the
328 *	goal length with or without the word and take it or put it on
329 *	the next line accordingly.
330 */
331
332/*
333 * LIZ@UOM 6/18/85 -- pass in the length of the word as well
334 * pack(word)
335 *	char word[];
336 */
337pack(word,wl)
338	char word[];
339	int wl;
340{
341	register char *cp;
342	register int s, t;
343
344	if (outp == NOSTR)
345		leadin();
346	/*
347	 * LIZ@UOM 6/18/85 -- change condition to check goal_length; s is the
348	 * length of the line before the word is added; t is now the length
349	 * of the line after the word is added
350	 *	t = strlen(word);
351	 *	if (t+s <= LENGTH)
352	 */
353	s = outp - outbuf;
354	t = wl + s;
355	if ((t <= goal_length) ||
356	    ((t <= max_length) && (t - goal_length <= goal_length - s))) {
357		/*
358		 * In like flint!
359		 */
360		for (cp = word; *cp; *outp++ = *cp++);
361		return;
362	}
363	if (s > pfx) {
364		oflush();
365		leadin();
366	}
367	for (cp = word; *cp; *outp++ = *cp++);
368}
369
370/*
371 * If there is anything on the current output line, send it on
372 * its way.  Set outp to NOSTR to indicate the absence of the current
373 * line prefix.
374 */
375oflush()
376{
377	if (outp == NOSTR)
378		return;
379	*outp = '\0';
380	tabulate(outbuf);
381	outp = NOSTR;
382}
383
384/*
385 * Take the passed line buffer, insert leading tabs where possible, and
386 * output on standard output (finally).
387 */
388tabulate(line)
389	char line[];
390{
391	register char *cp;
392	register int b, t;
393
394	/*
395	 * Toss trailing blanks in the output line.
396	 */
397	cp = line + strlen(line) - 1;
398	while (cp >= line && *cp == ' ')
399		cp--;
400	*++cp = '\0';
401
402	/*
403	 * Count the leading blank space and tabulate.
404	 */
405	for (cp = line; *cp == ' '; cp++)
406		;
407	b = cp-line;
408	t = b >> 3;
409	b &= 07;
410	if (t > 0)
411		do
412			putc('\t', stdout);
413		while (--t);
414	if (b > 0)
415		do
416			putc(' ', stdout);
417		while (--b);
418	while (*cp)
419		putc(*cp++, stdout);
420	putc('\n', stdout);
421}
422
423/*
424 * Initialize the output line with the appropriate number of
425 * leading blanks.
426 */
427leadin()
428{
429	register int b;
430	register char *cp;
431
432	for (b = 0, cp = outbuf; b < pfx; b++)
433		*cp++ = ' ';
434	outp = cp;
435}
436
437/*
438 * Save a string in dynamic space.
439 * This little goodie is needed for
440 * a headline detector in head.c
441 */
442char *
443savestr(str)
444	char str[];
445{
446	register char *top;
447
448	top = malloc(strlen(str) + 1);
449	if (top == NOSTR) {
450		fprintf(stderr, "fmt:  Ran out of memory\n");
451		exit(1);
452	}
453	strcpy(top, str);
454	return (top);
455}
456
457/*
458 * Is s1 a prefix of s2??
459 */
460ispref(s1, s2)
461	register char *s1, *s2;
462{
463
464	while (*s1++ == *s2)
465		;
466	return (*s1 == '\0');
467}
468