fmt.c revision 15344
1/*
2 * Copyright (c) 1980, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 *    must display the following acknowledgement:
15 *	This product includes software developed by the University of
16 *	California, Berkeley and its contributors.
17 * 4. Neither the name of the University nor the names of its contributors
18 *    may be used to endorse or promote products derived from this software
19 *    without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 */
33
34#ifndef lint
35static char copyright[] =
36"@(#) Copyright (c) 1980, 1993\n\
37	The Regents of the University of California.  All rights reserved.\n";
38#endif /* not lint */
39
40#ifndef lint
41static char sccsid[] = "@(#)fmt.c	8.1 (Berkeley) 7/20/93";
42#endif /* not lint */
43
44#include <stdio.h>
45#include <ctype.h>
46#include <locale.h>
47#include <stdlib.h>
48
49/*
50 * fmt -- format the concatenation of input files or standard input
51 * onto standard output.  Designed for use with Mail ~|
52 *
53 * Syntax : fmt [ goal [ max ] ] [ name ... ]
54 * Authors: Kurt Shoens (UCB) 12/7/78;
55 *          Liz Allen (UMCP) 2/24/83 [Addition of goal length concept].
56 */
57
58/* LIZ@UOM 6/18/85 -- Don't need LENGTH any more.
59 * #define	LENGTH	72		Max line length in output
60 */
61#define	NOSTR	((char *) 0)	/* Null string pointer for lint */
62
63/* LIZ@UOM 6/18/85 --New variables goal_length and max_length */
64#define GOAL_LENGTH 65
65#define MAX_LENGTH 75
66int	goal_length;		/* Target or goal line length in output */
67int	max_length;		/* Max line length in output */
68int	pfx;			/* Current leading blank count */
69int	lineno;			/* Current input line */
70int	mark;			/* Last place we saw a head line */
71
72char	*malloc();		/* for lint . . . */
73char	*headnames[] = {"To", "Subject", "Cc", 0};
74
75/*
76 * Drive the whole formatter by managing input files.  Also,
77 * cause initialization of the output stuff and flush it out
78 * at the end.
79 */
80
81main(argc, argv)
82	int argc;
83	char **argv;
84{
85	register FILE *fi;
86	register int errs = 0;
87	int number;		/* LIZ@UOM 6/18/85 */
88
89	(void) setlocale(LC_CTYPE, "");
90
91	goal_length = GOAL_LENGTH;
92	max_length = MAX_LENGTH;
93	setout();
94	lineno = 1;
95	mark = -10;
96	/*
97	 * LIZ@UOM 6/18/85 -- Check for goal and max length arguments
98	 */
99	if (argc > 1 && (1 == (sscanf(argv[1], "%d", &number)))) {
100		argv++;
101		argc--;
102		goal_length = number;
103		if (argc > 1 && (1 == (sscanf(argv[1], "%d", &number)))) {
104			argv++;
105			argc--;
106			max_length = number;
107		}
108	}
109	if (max_length <= goal_length) {
110		fprintf(stderr, "Max length must be greater than %s\n",
111			"goal length");
112		exit(1);
113	}
114	if (argc < 2) {
115		fmt(stdin);
116		oflush();
117		exit(0);
118	}
119	while (--argc) {
120		if ((fi = fopen(*++argv, "r")) == NULL) {
121			perror(*argv);
122			errs++;
123			continue;
124		}
125		fmt(fi);
126		fclose(fi);
127	}
128	oflush();
129	exit(errs);
130}
131
132/*
133 * Read up characters from the passed input file, forming lines,
134 * doing ^H processing, expanding tabs, stripping trailing blanks,
135 * and sending each line down for analysis.
136 */
137fmt(fi)
138	FILE *fi;
139{
140	static char *linebuf = 0, *canonb = 0;
141	register char *cp, *cp2, cc;
142	register int c, col;
143#define CHUNKSIZE 1024
144	static int lbufsize = 0, cbufsize = 0;
145
146	c = getc(fi);
147	while (c != EOF) {
148		/*
149		 * Collect a line, doing ^H processing.
150		 * Leave tabs for now.
151		 */
152		cp = linebuf;
153		while (c != '\n' && c != EOF) {
154			if (cp - linebuf >= lbufsize) {
155				int offset = cp - linebuf;
156				lbufsize += CHUNKSIZE;
157				linebuf = realloc(linebuf, lbufsize);
158				if(linebuf == 0)
159					abort();
160				cp = linebuf + offset;
161			}
162			if (c == '\b') {
163				if (cp > linebuf)
164					cp--;
165				c = getc(fi);
166				continue;
167			}
168			if (!isprint(c) && c != '\t') {
169				c = getc(fi);
170				continue;
171			}
172			*cp++ = c;
173			c = getc(fi);
174		}
175
176		/*
177		 * Toss anything remaining on the input line.
178		 */
179		while (c != '\n' && c != EOF)
180			c = getc(fi);
181
182		if (cp != NULL) {
183			*cp = '\0';
184		} else {
185			putchar('\n');
186			c = getc(fi);
187			continue;
188		}
189
190		/*
191		 * Expand tabs on the way to canonb.
192		 */
193		col = 0;
194		cp = linebuf;
195		cp2 = canonb;
196		while (cc = *cp++) {
197			if (cc != '\t') {
198				col++;
199				if (cp2 - canonb >= cbufsize) {
200					int offset = cp2 - canonb;
201					cbufsize += CHUNKSIZE;
202					canonb = realloc(canonb, cbufsize);
203					if(canonb == 0)
204						abort();
205					cp2 = canonb + offset;
206				}
207				*cp2++ = cc;
208				continue;
209			}
210			do {
211				if (cp2 - canonb >= cbufsize) {
212					int offset = cp2 - canonb;
213					cbufsize += CHUNKSIZE;
214					canonb = realloc(canonb, cbufsize);
215					if(canonb == 0)
216						abort();
217					cp2 = canonb + offset;
218				}
219				*cp2++ = ' ';
220				col++;
221			} while ((col & 07) != 0);
222		}
223
224		/*
225		 * Swipe trailing blanks from the line.
226		 */
227		for (cp2--; cp2 >= canonb && *cp2 == ' '; cp2--)
228			;
229		*++cp2 = '\0';
230		prefix(canonb);
231		if (c != EOF)
232			c = getc(fi);
233	}
234}
235
236/*
237 * Take a line devoid of tabs and other garbage and determine its
238 * blank prefix.  If the indent changes, call for a linebreak.
239 * If the input line is blank, echo the blank line on the output.
240 * Finally, if the line minus the prefix is a mail header, try to keep
241 * it on a line by itself.
242 */
243prefix(line)
244	char line[];
245{
246	register char *cp, **hp;
247	register int np, h;
248
249	if (!*line) {
250		oflush();
251		putchar('\n');
252		return;
253	}
254	for (cp = line; *cp == ' '; cp++)
255		;
256	np = cp - line;
257
258	/*
259	 * The following horrible expression attempts to avoid linebreaks
260	 * when the indent changes due to a paragraph.
261	 */
262	if (np != pfx && (np > pfx || abs(pfx-np) > 8))
263		oflush();
264	if (h = ishead(cp))
265		oflush(), mark = lineno;
266	if (lineno - mark < 3 && lineno - mark > 0)
267		for (hp = &headnames[0]; *hp != (char *) 0; hp++)
268			if (ispref(*hp, cp)) {
269				h = 1;
270				oflush();
271				break;
272			}
273	if (!h && (h = (*cp == '.')))
274		oflush();
275	pfx = np;
276	if (h)
277		pack(cp, strlen(cp));
278	else	split(cp);
279	if (h)
280		oflush();
281	lineno++;
282}
283
284/*
285 * Split up the passed line into output "words" which are
286 * maximal strings of non-blanks with the blank separation
287 * attached at the end.  Pass these words along to the output
288 * line packer.
289 */
290split(line)
291	char line[];
292{
293	register char *cp, *cp2;
294	char word[BUFSIZ];
295	int wordl;		/* LIZ@UOM 6/18/85 */
296
297	cp = line;
298	while (*cp) {
299		cp2 = word;
300		wordl = 0;	/* LIZ@UOM 6/18/85 */
301
302		/*
303		 * Collect a 'word,' allowing it to contain escaped white
304		 * space.
305		 */
306		while (*cp && *cp != ' ') {
307			if (*cp == '\\' && isspace(cp[1]))
308				*cp2++ = *cp++;
309			*cp2++ = *cp++;
310			wordl++;/* LIZ@UOM 6/18/85 */
311		}
312
313		/*
314		 * Guarantee a space at end of line. Two spaces after end of
315		 * sentence punctuation.
316		 */
317		if (*cp == '\0') {
318			*cp2++ = ' ';
319			if (index(".:!", cp[-1]))
320				*cp2++ = ' ';
321		}
322		while (*cp == ' ')
323			*cp2++ = *cp++;
324		*cp2 = '\0';
325		/*
326		 * LIZ@UOM 6/18/85 pack(word);
327		 */
328		pack(word, wordl);
329	}
330}
331
332/*
333 * Output section.
334 * Build up line images from the words passed in.  Prefix
335 * each line with correct number of blanks.  The buffer "outbuf"
336 * contains the current partial line image, including prefixed blanks.
337 * "outp" points to the next available space therein.  When outp is NOSTR,
338 * there ain't nothing in there yet.  At the bottom of this whole mess,
339 * leading tabs are reinserted.
340 */
341char	outbuf[BUFSIZ];			/* Sandbagged output line image */
342char	*outp;				/* Pointer in above */
343
344/*
345 * Initialize the output section.
346 */
347setout()
348{
349	outp = NOSTR;
350}
351
352/*
353 * Pack a word onto the output line.  If this is the beginning of
354 * the line, push on the appropriately-sized string of blanks first.
355 * If the word won't fit on the current line, flush and begin a new
356 * line.  If the word is too long to fit all by itself on a line,
357 * just give it its own and hope for the best.
358 *
359 * LIZ@UOM 6/18/85 -- If the new word will fit in at less than the
360 *	goal length, take it.  If not, then check to see if the line
361 *	will be over the max length; if so put the word on the next
362 *	line.  If not, check to see if the line will be closer to the
363 *	goal length with or without the word and take it or put it on
364 *	the next line accordingly.
365 */
366
367/*
368 * LIZ@UOM 6/18/85 -- pass in the length of the word as well
369 * pack(word)
370 *	char word[];
371 */
372pack(word,wl)
373	char word[];
374	int wl;
375{
376	register char *cp;
377	register int s, t;
378
379	if (outp == NOSTR)
380		leadin();
381	/*
382	 * LIZ@UOM 6/18/85 -- change condition to check goal_length; s is the
383	 * length of the line before the word is added; t is now the length
384	 * of the line after the word is added
385	 *	t = strlen(word);
386	 *	if (t+s <= LENGTH)
387	 */
388	s = outp - outbuf;
389	t = wl + s;
390	if ((t <= goal_length) ||
391	    ((t <= max_length) && (t - goal_length <= goal_length - s))) {
392		/*
393		 * In like flint!
394		 */
395		for (cp = word; *cp; *outp++ = *cp++);
396		return;
397	}
398	if (s > pfx) {
399		oflush();
400		leadin();
401	}
402	for (cp = word; *cp; *outp++ = *cp++);
403}
404
405/*
406 * If there is anything on the current output line, send it on
407 * its way.  Set outp to NOSTR to indicate the absence of the current
408 * line prefix.
409 */
410oflush()
411{
412	if (outp == NOSTR)
413		return;
414	*outp = '\0';
415	tabulate(outbuf);
416	outp = NOSTR;
417}
418
419/*
420 * Take the passed line buffer, insert leading tabs where possible, and
421 * output on standard output (finally).
422 */
423tabulate(line)
424	char line[];
425{
426	register char *cp;
427	register int b, t;
428
429	/*
430	 * Toss trailing blanks in the output line.
431	 */
432	cp = line + strlen(line) - 1;
433	while (cp >= line && *cp == ' ')
434		cp--;
435	*++cp = '\0';
436
437	/*
438	 * Count the leading blank space and tabulate.
439	 */
440	for (cp = line; *cp == ' '; cp++)
441		;
442	b = cp-line;
443	t = b >> 3;
444	b &= 07;
445	if (t > 0)
446		do
447			putc('\t', stdout);
448		while (--t);
449	if (b > 0)
450		do
451			putc(' ', stdout);
452		while (--b);
453	while (*cp)
454		putc(*cp++, stdout);
455	putc('\n', stdout);
456}
457
458/*
459 * Initialize the output line with the appropriate number of
460 * leading blanks.
461 */
462leadin()
463{
464	register int b;
465	register char *cp;
466
467	for (b = 0, cp = outbuf; b < pfx; b++)
468		*cp++ = ' ';
469	outp = cp;
470}
471
472/*
473 * Save a string in dynamic space.
474 * This little goodie is needed for
475 * a headline detector in head.c
476 */
477char *
478savestr(str)
479	char str[];
480{
481	register char *top;
482
483	top = malloc(strlen(str) + 1);
484	if (top == NOSTR) {
485		fprintf(stderr, "fmt:  Ran out of memory\n");
486		exit(1);
487	}
488	strcpy(top, str);
489	return (top);
490}
491
492/*
493 * Is s1 a prefix of s2??
494 */
495ispref(s1, s2)
496	register char *s1, *s2;
497{
498
499	while (*s1++ == *s2)
500		;
501	return (*s1 == '\0');
502}
503