fmt.c revision 11765
1/*
2 * Copyright (c) 1980, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 *    must display the following acknowledgement:
15 *	This product includes software developed by the University of
16 *	California, Berkeley and its contributors.
17 * 4. Neither the name of the University nor the names of its contributors
18 *    may be used to endorse or promote products derived from this software
19 *    without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 */
33
34#ifndef lint
35static char copyright[] =
36"@(#) Copyright (c) 1980, 1993\n\
37	The Regents of the University of California.  All rights reserved.\n";
38#endif /* not lint */
39
40#ifndef lint
41static char sccsid[] = "@(#)fmt.c	8.1 (Berkeley) 7/20/93";
42#endif /* not lint */
43
44#include <stdio.h>
45#include <ctype.h>
46#include <locale.h>
47
48/*
49 * fmt -- format the concatenation of input files or standard input
50 * onto standard output.  Designed for use with Mail ~|
51 *
52 * Syntax : fmt [ goal [ max ] ] [ name ... ]
53 * Authors: Kurt Shoens (UCB) 12/7/78;
54 *          Liz Allen (UMCP) 2/24/83 [Addition of goal length concept].
55 */
56
57/* LIZ@UOM 6/18/85 -- Don't need LENGTH any more.
58 * #define	LENGTH	72		Max line length in output
59 */
60#define	NOSTR	((char *) 0)	/* Null string pointer for lint */
61
62/* LIZ@UOM 6/18/85 --New variables goal_length and max_length */
63#define GOAL_LENGTH 65
64#define MAX_LENGTH 75
65int	goal_length;		/* Target or goal line length in output */
66int	max_length;		/* Max line length in output */
67int	pfx;			/* Current leading blank count */
68int	lineno;			/* Current input line */
69int	mark;			/* Last place we saw a head line */
70
71char	*malloc();		/* for lint . . . */
72char	*headnames[] = {"To", "Subject", "Cc", 0};
73
74/*
75 * Drive the whole formatter by managing input files.  Also,
76 * cause initialization of the output stuff and flush it out
77 * at the end.
78 */
79
80main(argc, argv)
81	int argc;
82	char **argv;
83{
84	register FILE *fi;
85	register int errs = 0;
86	int number;		/* LIZ@UOM 6/18/85 */
87
88	(void) setlocale(LC_CTYPE, "");
89
90	goal_length = GOAL_LENGTH;
91	max_length = MAX_LENGTH;
92	setout();
93	lineno = 1;
94	mark = -10;
95	/*
96	 * LIZ@UOM 6/18/85 -- Check for goal and max length arguments
97	 */
98	if (argc > 1 && (1 == (sscanf(argv[1], "%d", &number)))) {
99		argv++;
100		argc--;
101		goal_length = number;
102		if (argc > 1 && (1 == (sscanf(argv[1], "%d", &number)))) {
103			argv++;
104			argc--;
105			max_length = number;
106		}
107	}
108	if (max_length <= goal_length) {
109		fprintf(stderr, "Max length must be greater than %s\n",
110			"goal length");
111		exit(1);
112	}
113	if (argc < 2) {
114		fmt(stdin);
115		oflush();
116		exit(0);
117	}
118	while (--argc) {
119		if ((fi = fopen(*++argv, "r")) == NULL) {
120			perror(*argv);
121			errs++;
122			continue;
123		}
124		fmt(fi);
125		fclose(fi);
126	}
127	oflush();
128	exit(errs);
129}
130
131/*
132 * Read up characters from the passed input file, forming lines,
133 * doing ^H processing, expanding tabs, stripping trailing blanks,
134 * and sending each line down for analysis.
135 */
136fmt(fi)
137	FILE *fi;
138{
139	char linebuf[BUFSIZ], canonb[BUFSIZ];
140	register char *cp, *cp2, cc;
141	register int c, col;
142
143	c = getc(fi);
144	while (c != EOF) {
145		/*
146		 * Collect a line, doing ^H processing.
147		 * Leave tabs for now.
148		 */
149		cp = linebuf;
150		while (c != '\n' && c != EOF && cp-linebuf < BUFSIZ-1) {
151			if (c == '\b') {
152				if (cp > linebuf)
153					cp--;
154				c = getc(fi);
155				continue;
156			}
157			if (!isprint(c) && c != '\t') {
158				c = getc(fi);
159				continue;
160			}
161			*cp++ = c;
162			c = getc(fi);
163		}
164		*cp = '\0';
165
166		/*
167		 * Toss anything remaining on the input line.
168		 */
169		while (c != '\n' && c != EOF)
170			c = getc(fi);
171
172		/*
173		 * Expand tabs on the way to canonb.
174		 */
175		col = 0;
176		cp = linebuf;
177		cp2 = canonb;
178		while (cc = *cp++) {
179			if (cc != '\t') {
180				col++;
181				if (cp2-canonb < BUFSIZ-1)
182					*cp2++ = cc;
183				continue;
184			}
185			do {
186				if (cp2-canonb < BUFSIZ-1)
187					*cp2++ = ' ';
188				col++;
189			} while ((col & 07) != 0);
190		}
191
192		/*
193		 * Swipe trailing blanks from the line.
194		 */
195		for (cp2--; cp2 >= canonb && *cp2 == ' '; cp2--)
196			;
197		*++cp2 = '\0';
198		prefix(canonb);
199		if (c != EOF)
200			c = getc(fi);
201	}
202}
203
204/*
205 * Take a line devoid of tabs and other garbage and determine its
206 * blank prefix.  If the indent changes, call for a linebreak.
207 * If the input line is blank, echo the blank line on the output.
208 * Finally, if the line minus the prefix is a mail header, try to keep
209 * it on a line by itself.
210 */
211prefix(line)
212	char line[];
213{
214	register char *cp, **hp;
215	register int np, h;
216
217	if (!*line) {
218		oflush();
219		putchar('\n');
220		return;
221	}
222	for (cp = line; *cp == ' '; cp++)
223		;
224	np = cp - line;
225
226	/*
227	 * The following horrible expression attempts to avoid linebreaks
228	 * when the indent changes due to a paragraph.
229	 */
230	if (np != pfx && (np > pfx || abs(pfx-np) > 8))
231		oflush();
232	if (h = ishead(cp))
233		oflush(), mark = lineno;
234	if (lineno - mark < 3 && lineno - mark > 0)
235		for (hp = &headnames[0]; *hp != (char *) 0; hp++)
236			if (ispref(*hp, cp)) {
237				h = 1;
238				oflush();
239				break;
240			}
241	if (!h && (h = (*cp == '.')))
242		oflush();
243	pfx = np;
244	if (h)
245		pack(cp, strlen(cp));
246	else	split(cp);
247	if (h)
248		oflush();
249	lineno++;
250}
251
252/*
253 * Split up the passed line into output "words" which are
254 * maximal strings of non-blanks with the blank separation
255 * attached at the end.  Pass these words along to the output
256 * line packer.
257 */
258split(line)
259	char line[];
260{
261	register char *cp, *cp2;
262	char word[BUFSIZ];
263	int wordl;		/* LIZ@UOM 6/18/85 */
264
265	cp = line;
266	while (*cp) {
267		cp2 = word;
268		wordl = 0;	/* LIZ@UOM 6/18/85 */
269
270		/*
271		 * Collect a 'word,' allowing it to contain escaped white
272		 * space.
273		 */
274		while (*cp && *cp != ' ') {
275			if (*cp == '\\' && isspace(cp[1]))
276				*cp2++ = *cp++;
277			*cp2++ = *cp++;
278			wordl++;/* LIZ@UOM 6/18/85 */
279		}
280
281		/*
282		 * Guarantee a space at end of line. Two spaces after end of
283		 * sentence punctuation.
284		 */
285		if (*cp == '\0') {
286			*cp2++ = ' ';
287			if (index(".:!", cp[-1]))
288				*cp2++ = ' ';
289		}
290		while (*cp == ' ')
291			*cp2++ = *cp++;
292		*cp2 = '\0';
293		/*
294		 * LIZ@UOM 6/18/85 pack(word);
295		 */
296		pack(word, wordl);
297	}
298}
299
300/*
301 * Output section.
302 * Build up line images from the words passed in.  Prefix
303 * each line with correct number of blanks.  The buffer "outbuf"
304 * contains the current partial line image, including prefixed blanks.
305 * "outp" points to the next available space therein.  When outp is NOSTR,
306 * there ain't nothing in there yet.  At the bottom of this whole mess,
307 * leading tabs are reinserted.
308 */
309char	outbuf[BUFSIZ];			/* Sandbagged output line image */
310char	*outp;				/* Pointer in above */
311
312/*
313 * Initialize the output section.
314 */
315setout()
316{
317	outp = NOSTR;
318}
319
320/*
321 * Pack a word onto the output line.  If this is the beginning of
322 * the line, push on the appropriately-sized string of blanks first.
323 * If the word won't fit on the current line, flush and begin a new
324 * line.  If the word is too long to fit all by itself on a line,
325 * just give it its own and hope for the best.
326 *
327 * LIZ@UOM 6/18/85 -- If the new word will fit in at less than the
328 *	goal length, take it.  If not, then check to see if the line
329 *	will be over the max length; if so put the word on the next
330 *	line.  If not, check to see if the line will be closer to the
331 *	goal length with or without the word and take it or put it on
332 *	the next line accordingly.
333 */
334
335/*
336 * LIZ@UOM 6/18/85 -- pass in the length of the word as well
337 * pack(word)
338 *	char word[];
339 */
340pack(word,wl)
341	char word[];
342	int wl;
343{
344	register char *cp;
345	register int s, t;
346
347	if (outp == NOSTR)
348		leadin();
349	/*
350	 * LIZ@UOM 6/18/85 -- change condition to check goal_length; s is the
351	 * length of the line before the word is added; t is now the length
352	 * of the line after the word is added
353	 *	t = strlen(word);
354	 *	if (t+s <= LENGTH)
355	 */
356	s = outp - outbuf;
357	t = wl + s;
358	if ((t <= goal_length) ||
359	    ((t <= max_length) && (t - goal_length <= goal_length - s))) {
360		/*
361		 * In like flint!
362		 */
363		for (cp = word; *cp; *outp++ = *cp++);
364		return;
365	}
366	if (s > pfx) {
367		oflush();
368		leadin();
369	}
370	for (cp = word; *cp; *outp++ = *cp++);
371}
372
373/*
374 * If there is anything on the current output line, send it on
375 * its way.  Set outp to NOSTR to indicate the absence of the current
376 * line prefix.
377 */
378oflush()
379{
380	if (outp == NOSTR)
381		return;
382	*outp = '\0';
383	tabulate(outbuf);
384	outp = NOSTR;
385}
386
387/*
388 * Take the passed line buffer, insert leading tabs where possible, and
389 * output on standard output (finally).
390 */
391tabulate(line)
392	char line[];
393{
394	register char *cp;
395	register int b, t;
396
397	/*
398	 * Toss trailing blanks in the output line.
399	 */
400	cp = line + strlen(line) - 1;
401	while (cp >= line && *cp == ' ')
402		cp--;
403	*++cp = '\0';
404
405	/*
406	 * Count the leading blank space and tabulate.
407	 */
408	for (cp = line; *cp == ' '; cp++)
409		;
410	b = cp-line;
411	t = b >> 3;
412	b &= 07;
413	if (t > 0)
414		do
415			putc('\t', stdout);
416		while (--t);
417	if (b > 0)
418		do
419			putc(' ', stdout);
420		while (--b);
421	while (*cp)
422		putc(*cp++, stdout);
423	putc('\n', stdout);
424}
425
426/*
427 * Initialize the output line with the appropriate number of
428 * leading blanks.
429 */
430leadin()
431{
432	register int b;
433	register char *cp;
434
435	for (b = 0, cp = outbuf; b < pfx; b++)
436		*cp++ = ' ';
437	outp = cp;
438}
439
440/*
441 * Save a string in dynamic space.
442 * This little goodie is needed for
443 * a headline detector in head.c
444 */
445char *
446savestr(str)
447	char str[];
448{
449	register char *top;
450
451	top = malloc(strlen(str) + 1);
452	if (top == NOSTR) {
453		fprintf(stderr, "fmt:  Ran out of memory\n");
454		exit(1);
455	}
456	strcpy(top, str);
457	return (top);
458}
459
460/*
461 * Is s1 a prefix of s2??
462 */
463ispref(s1, s2)
464	register char *s1, *s2;
465{
466
467	while (*s1++ == *s2)
468		;
469	return (*s1 == '\0');
470}
471