fmt.c revision 27185
1/*
2 * Copyright (c) 1980, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 *    must display the following acknowledgement:
15 *	This product includes software developed by the University of
16 *	California, Berkeley and its contributors.
17 * 4. Neither the name of the University nor the names of its contributors
18 *    may be used to endorse or promote products derived from this software
19 *    without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 */
33
34#ifndef lint
35static char copyright[] =
36"@(#) Copyright (c) 1980, 1993\n\
37	The Regents of the University of California.  All rights reserved.\n";
38#endif /* not lint */
39
40#ifndef lint
41#if 0
42static char sccsid[] = "@(#)fmt.c	8.1 (Berkeley) 7/20/93";
43#else
44static const char rcsid[] =
45	"$Id$";
46#endif
47#endif /* not lint */
48
49#include <ctype.h>
50#include <err.h>
51#include <locale.h>
52#include <stdio.h>
53#include <stdlib.h>
54#include <string.h>
55
56/*
57 * fmt -- format the concatenation of input files or standard input
58 * onto standard output.  Designed for use with Mail ~|
59 *
60 * Syntax : fmt [ goal [ max ] ] [ name ... ]
61 * Authors: Kurt Shoens (UCB) 12/7/78;
62 *          Liz Allen (UMCP) 2/24/83 [Addition of goal length concept].
63 */
64
65/* LIZ@UOM 6/18/85 -- Don't need LENGTH any more.
66 * #define	LENGTH	72		Max line length in output
67 */
68#define	NOSTR	((char *) 0)	/* Null string pointer for lint */
69
70/* LIZ@UOM 6/18/85 --New variables goal_length and max_length */
71#define GOAL_LENGTH 65
72#define MAX_LENGTH 75
73int	goal_length;		/* Target or goal line length in output */
74int	max_length;		/* Max line length in output */
75int	pfx;			/* Current leading blank count */
76int	lineno;			/* Current input line */
77int	mark;			/* Last place we saw a head line */
78int	center;
79
80char	*headnames[] = {"To", "Subject", "Cc", 0};
81
82void fmt __P((FILE *));
83int ispref __P((char *, char *));
84void leadin __P((void));
85void oflush __P((void));
86void pack __P((char [], int));
87void prefix __P((char []));
88void setout __P((void));
89void split __P((char []));
90void tabulate __P((char []));
91
92/*
93 * Drive the whole formatter by managing input files.  Also,
94 * cause initialization of the output stuff and flush it out
95 * at the end.
96 */
97
98int
99main(argc, argv)
100	int argc;
101	char **argv;
102{
103	register FILE *fi;
104	register int errs = 0;
105	int number;		/* LIZ@UOM 6/18/85 */
106
107	(void) setlocale(LC_CTYPE, "");
108
109	goal_length = GOAL_LENGTH;
110	max_length = MAX_LENGTH;
111	setout();
112	lineno = 1;
113	mark = -10;
114	/*
115	 * LIZ@UOM 6/18/85 -- Check for goal and max length arguments
116	 */
117	if (argc > 1 && !strcmp(argv[1], "-c")) {
118		center++;
119		argc--;
120		argv++;
121	}
122	if (argc > 1 && (1 == (sscanf(argv[1], "%d", &number)))) {
123		argv++;
124		argc--;
125		goal_length = number;
126		if (argc > 1 && (1 == (sscanf(argv[1], "%d", &number)))) {
127			argv++;
128			argc--;
129			max_length = number;
130		}
131	}
132	if (max_length <= goal_length)
133		errx(1, "max length must be greater than goal length");
134	if (argc < 2) {
135		fmt(stdin);
136		oflush();
137		exit(0);
138	}
139	while (--argc) {
140		if ((fi = fopen(*++argv, "r")) == NULL) {
141			perror(*argv);
142			errs++;
143			continue;
144		}
145		fmt(fi);
146		fclose(fi);
147	}
148	oflush();
149	exit(errs);
150}
151
152/*
153 * Read up characters from the passed input file, forming lines,
154 * doing ^H processing, expanding tabs, stripping trailing blanks,
155 * and sending each line down for analysis.
156 */
157void
158fmt(fi)
159	FILE *fi;
160{
161	static char *linebuf = 0, *canonb = 0;
162	register char *cp, *cp2, cc;
163	register int c, col;
164#define CHUNKSIZE 1024
165	static int lbufsize = 0, cbufsize = 0;
166
167	if (center) {
168		linebuf = malloc(BUFSIZ);
169		while (1) {
170			cp = fgets(linebuf, BUFSIZ, fi);
171			if (!cp)
172				return;
173			while (*cp && isspace(*cp))
174				cp++;
175			cp2 = cp + strlen(cp) - 1;
176			while (cp2 > cp && isspace(*cp2))
177				cp2--;
178			if (cp == cp2)
179				putchar('\n');
180			col = cp2 - cp;
181			for (c = 0; c < (goal_length-col)/2; c++)
182				putchar(' ');
183			while (cp <= cp2)
184				putchar(*cp++);
185			putchar('\n');
186		}
187	}
188	c = getc(fi);
189	while (c != EOF) {
190		/*
191		 * Collect a line, doing ^H processing.
192		 * Leave tabs for now.
193		 */
194		cp = linebuf;
195		while (c != '\n' && c != EOF) {
196			if (cp - linebuf >= lbufsize) {
197				int offset = cp - linebuf;
198				lbufsize += CHUNKSIZE;
199				linebuf = realloc(linebuf, lbufsize);
200				if(linebuf == 0)
201					abort();
202				cp = linebuf + offset;
203			}
204			if (c == '\b') {
205				if (cp > linebuf)
206					cp--;
207				c = getc(fi);
208				continue;
209			}
210			if (!isprint(c) && c != '\t') {
211				c = getc(fi);
212				continue;
213			}
214			*cp++ = c;
215			c = getc(fi);
216		}
217
218		/*
219		 * Toss anything remaining on the input line.
220		 */
221		while (c != '\n' && c != EOF)
222			c = getc(fi);
223
224		if (cp != NULL) {
225			*cp = '\0';
226		} else {
227			putchar('\n');
228			c = getc(fi);
229			continue;
230		}
231
232		/*
233		 * Expand tabs on the way to canonb.
234		 */
235		col = 0;
236		cp = linebuf;
237		cp2 = canonb;
238		while ((cc = *cp++)) {
239			if (cc != '\t') {
240				col++;
241				if (cp2 - canonb >= cbufsize) {
242					int offset = cp2 - canonb;
243					cbufsize += CHUNKSIZE;
244					canonb = realloc(canonb, cbufsize);
245					if(canonb == 0)
246						abort();
247					cp2 = canonb + offset;
248				}
249				*cp2++ = cc;
250				continue;
251			}
252			do {
253				if (cp2 - canonb >= cbufsize) {
254					int offset = cp2 - canonb;
255					cbufsize += CHUNKSIZE;
256					canonb = realloc(canonb, cbufsize);
257					if(canonb == 0)
258						abort();
259					cp2 = canonb + offset;
260				}
261				*cp2++ = ' ';
262				col++;
263			} while ((col & 07) != 0);
264		}
265
266		/*
267		 * Swipe trailing blanks from the line.
268		 */
269		for (cp2--; cp2 >= canonb && *cp2 == ' '; cp2--)
270			;
271		*++cp2 = '\0';
272		prefix(canonb);
273		if (c != EOF)
274			c = getc(fi);
275	}
276}
277
278/*
279 * Take a line devoid of tabs and other garbage and determine its
280 * blank prefix.  If the indent changes, call for a linebreak.
281 * If the input line is blank, echo the blank line on the output.
282 * Finally, if the line minus the prefix is a mail header, try to keep
283 * it on a line by itself.
284 */
285void
286prefix(line)
287	char line[];
288{
289	register char *cp, **hp;
290	register int np, h;
291
292	if (!*line) {
293		oflush();
294		putchar('\n');
295		return;
296	}
297	for (cp = line; *cp == ' '; cp++)
298		;
299	np = cp - line;
300
301	/*
302	 * The following horrible expression attempts to avoid linebreaks
303	 * when the indent changes due to a paragraph.
304	 */
305	if (np != pfx && (np > pfx || abs(pfx-np) > 8))
306		oflush();
307	if ((h = ishead(cp)))
308		oflush(), mark = lineno;
309	if (lineno - mark < 3 && lineno - mark > 0)
310		for (hp = &headnames[0]; *hp != (char *) 0; hp++)
311			if (ispref(*hp, cp)) {
312				h = 1;
313				oflush();
314				break;
315			}
316	if (!h && (h = (*cp == '.')))
317		oflush();
318	pfx = np;
319	if (h)
320		pack(cp, strlen(cp));
321	else	split(cp);
322	if (h)
323		oflush();
324	lineno++;
325}
326
327/*
328 * Split up the passed line into output "words" which are
329 * maximal strings of non-blanks with the blank separation
330 * attached at the end.  Pass these words along to the output
331 * line packer.
332 */
333void
334split(line)
335	char line[];
336{
337	register char *cp, *cp2;
338	char word[BUFSIZ];
339	int wordl;		/* LIZ@UOM 6/18/85 */
340
341	cp = line;
342	while (*cp) {
343		cp2 = word;
344		wordl = 0;	/* LIZ@UOM 6/18/85 */
345
346		/*
347		 * Collect a 'word,' allowing it to contain escaped white
348		 * space.
349		 */
350		while (*cp && *cp != ' ') {
351			if (*cp == '\\' && isspace(cp[1]))
352				*cp2++ = *cp++;
353			*cp2++ = *cp++;
354			wordl++;/* LIZ@UOM 6/18/85 */
355		}
356
357		/*
358		 * Guarantee a space at end of line. Two spaces after end of
359		 * sentence punctuation.
360		 */
361		if (*cp == '\0') {
362			*cp2++ = ' ';
363			if (index(".:!", cp[-1]))
364				*cp2++ = ' ';
365		}
366		while (*cp == ' ')
367			*cp2++ = *cp++;
368		*cp2 = '\0';
369		/*
370		 * LIZ@UOM 6/18/85 pack(word);
371		 */
372		pack(word, wordl);
373	}
374}
375
376/*
377 * Output section.
378 * Build up line images from the words passed in.  Prefix
379 * each line with correct number of blanks.  The buffer "outbuf"
380 * contains the current partial line image, including prefixed blanks.
381 * "outp" points to the next available space therein.  When outp is NOSTR,
382 * there ain't nothing in there yet.  At the bottom of this whole mess,
383 * leading tabs are reinserted.
384 */
385char	outbuf[BUFSIZ];			/* Sandbagged output line image */
386char	*outp;				/* Pointer in above */
387
388/*
389 * Initialize the output section.
390 */
391void
392setout()
393{
394	outp = NOSTR;
395}
396
397/*
398 * Pack a word onto the output line.  If this is the beginning of
399 * the line, push on the appropriately-sized string of blanks first.
400 * If the word won't fit on the current line, flush and begin a new
401 * line.  If the word is too long to fit all by itself on a line,
402 * just give it its own and hope for the best.
403 *
404 * LIZ@UOM 6/18/85 -- If the new word will fit in at less than the
405 *	goal length, take it.  If not, then check to see if the line
406 *	will be over the max length; if so put the word on the next
407 *	line.  If not, check to see if the line will be closer to the
408 *	goal length with or without the word and take it or put it on
409 *	the next line accordingly.
410 */
411
412/*
413 * LIZ@UOM 6/18/85 -- pass in the length of the word as well
414 * pack(word)
415 *	char word[];
416 */
417void
418pack(word,wl)
419	char word[];
420	int wl;
421{
422	register char *cp;
423	register int s, t;
424
425	if (outp == NOSTR)
426		leadin();
427	/*
428	 * LIZ@UOM 6/18/85 -- change condition to check goal_length; s is the
429	 * length of the line before the word is added; t is now the length
430	 * of the line after the word is added
431	 *	t = strlen(word);
432	 *	if (t+s <= LENGTH)
433	 */
434	s = outp - outbuf;
435	t = wl + s;
436	if ((t <= goal_length) ||
437	    ((t <= max_length) && (t - goal_length <= goal_length - s))) {
438		/*
439		 * In like flint!
440		 */
441		for (cp = word; *cp; *outp++ = *cp++);
442		return;
443	}
444	if (s > pfx) {
445		oflush();
446		leadin();
447	}
448	for (cp = word; *cp; *outp++ = *cp++);
449}
450
451/*
452 * If there is anything on the current output line, send it on
453 * its way.  Set outp to NOSTR to indicate the absence of the current
454 * line prefix.
455 */
456void
457oflush()
458{
459	if (outp == NOSTR)
460		return;
461	*outp = '\0';
462	tabulate(outbuf);
463	outp = NOSTR;
464}
465
466/*
467 * Take the passed line buffer, insert leading tabs where possible, and
468 * output on standard output (finally).
469 */
470void
471tabulate(line)
472	char line[];
473{
474	register char *cp;
475	register int b, t;
476
477	/*
478	 * Toss trailing blanks in the output line.
479	 */
480	cp = line + strlen(line) - 1;
481	while (cp >= line && *cp == ' ')
482		cp--;
483	*++cp = '\0';
484
485	/*
486	 * Count the leading blank space and tabulate.
487	 */
488	for (cp = line; *cp == ' '; cp++)
489		;
490	b = cp-line;
491	t = b >> 3;
492	b &= 07;
493	if (t > 0)
494		do
495			putc('\t', stdout);
496		while (--t);
497	if (b > 0)
498		do
499			putc(' ', stdout);
500		while (--b);
501	while (*cp)
502		putc(*cp++, stdout);
503	putc('\n', stdout);
504}
505
506/*
507 * Initialize the output line with the appropriate number of
508 * leading blanks.
509 */
510void
511leadin()
512{
513	register int b;
514	register char *cp;
515
516	for (b = 0, cp = outbuf; b < pfx; b++)
517		*cp++ = ' ';
518	outp = cp;
519}
520
521/*
522 * Save a string in dynamic space.
523 * This little goodie is needed for
524 * a headline detector in head.c
525 */
526char *
527savestr(str)
528	char str[];
529{
530	register char *top;
531
532	top = malloc(strlen(str) + 1);
533	if (top == NOSTR)
534		errx(1, "ran out of memory");
535	strcpy(top, str);
536	return (top);
537}
538
539/*
540 * Is s1 a prefix of s2??
541 */
542int
543ispref(s1, s2)
544	register char *s1, *s2;
545{
546
547	while (*s1++ == *s2)
548		;
549	return (*s1 == '\0');
550}
551