fmt.c revision 28478
1/*
2 * Copyright (c) 1980, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 *    must display the following acknowledgement:
15 *	This product includes software developed by the University of
16 *	California, Berkeley and its contributors.
17 * 4. Neither the name of the University nor the names of its contributors
18 *    may be used to endorse or promote products derived from this software
19 *    without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 */
33
34#ifndef lint
35static char copyright[] =
36"@(#) Copyright (c) 1980, 1993\n\
37	The Regents of the University of California.  All rights reserved.\n";
38#endif /* not lint */
39
40#ifndef lint
41#if 0
42static char sccsid[] = "@(#)fmt.c	8.1 (Berkeley) 7/20/93";
43#else
44static const char rcsid[] =
45	"$Id: fmt.c,v 1.9 1997/07/03 07:19:46 charnier Exp $";
46#endif
47#endif /* not lint */
48
49#include <ctype.h>
50#include <err.h>
51#include <locale.h>
52#include <stdio.h>
53#include <stdlib.h>
54#include <string.h>
55
56/*
57 * fmt -- format the concatenation of input files or standard input
58 * onto standard output.  Designed for use with Mail ~|
59 *
60 * Syntax : fmt [ goal [ max ] ] [ name ... ]
61 * Authors: Kurt Shoens (UCB) 12/7/78;
62 *          Liz Allen (UMCP) 2/24/83 [Addition of goal length concept].
63 */
64
65/* LIZ@UOM 6/18/85 -- Don't need LENGTH any more.
66 * #define	LENGTH	72		Max line length in output
67 */
68#define	NOSTR	((char *) 0)	/* Null string pointer for lint */
69
70/* LIZ@UOM 6/18/85 --New variables goal_length and max_length */
71#define GOAL_LENGTH 65
72#define MAX_LENGTH 75
73int	goal_length;		/* Target or goal line length in output */
74int	max_length;		/* Max line length in output */
75int	pfx;			/* Current leading blank count */
76int	lineno;			/* Current input line */
77int	mark;			/* Last place we saw a head line */
78int	center;
79
80char	*headnames[] = {"To", "Subject", "Cc", 0};
81
82void fmt __P((FILE *));
83int ispref __P((char *, char *));
84void leadin __P((void));
85void oflush __P((void));
86void pack __P((char [], int));
87void prefix __P((char []));
88void setout __P((void));
89void split __P((char []));
90void tabulate __P((char []));
91
92/*
93 * Drive the whole formatter by managing input files.  Also,
94 * cause initialization of the output stuff and flush it out
95 * at the end.
96 */
97
98int
99main(argc, argv)
100	int argc;
101	char **argv;
102{
103	register FILE *fi;
104	register int errs = 0;
105	int number;		/* LIZ@UOM 6/18/85 */
106
107	(void) setlocale(LC_CTYPE, "");
108
109	goal_length = GOAL_LENGTH;
110	max_length = MAX_LENGTH;
111	setout();
112	lineno = 1;
113	mark = -10;
114	/*
115	 * LIZ@UOM 6/18/85 -- Check for goal and max length arguments
116	 */
117	if (argc > 1 && !strcmp(argv[1], "-c")) {
118		center++;
119		argc--;
120		argv++;
121	}
122	if (argc > 1 && (1 == (sscanf(argv[1], "%d", &number)))) {
123		argv++;
124		argc--;
125		goal_length = number;
126		if (argc > 1 && (1 == (sscanf(argv[1], "%d", &number)))) {
127			argv++;
128			argc--;
129			max_length = number;
130		}
131	}
132	if (max_length <= goal_length)
133		errx(1, "max length must be greater than goal length");
134	if (argc < 2) {
135		fmt(stdin);
136		oflush();
137		exit(0);
138	}
139	while (--argc) {
140		if ((fi = fopen(*++argv, "r")) == NULL) {
141			perror(*argv);
142			errs++;
143			continue;
144		}
145		fmt(fi);
146		fclose(fi);
147	}
148	oflush();
149	exit(errs);
150}
151
152/*
153 * Read up characters from the passed input file, forming lines,
154 * doing ^H processing, expanding tabs, stripping trailing blanks,
155 * and sending each line down for analysis.
156 */
157void
158fmt(fi)
159	FILE *fi;
160{
161	static char *linebuf = 0, *canonb = 0;
162	register char *cp, *cp2, cc;
163	register int c, col;
164#define CHUNKSIZE 1024
165	static int lbufsize = 0, cbufsize = CHUNKSIZE;
166
167	canonb = malloc(CHUNKSIZE);
168	if (canonb == 0)
169		abort();
170
171	if (center) {
172		linebuf = malloc(BUFSIZ);
173		while (1) {
174			cp = fgets(linebuf, BUFSIZ, fi);
175			if (!cp)
176				return;
177			while (*cp && isspace(*cp))
178				cp++;
179			cp2 = cp + strlen(cp) - 1;
180			while (cp2 > cp && isspace(*cp2))
181				cp2--;
182			if (cp == cp2)
183				putchar('\n');
184			col = cp2 - cp;
185			for (c = 0; c < (goal_length-col)/2; c++)
186				putchar(' ');
187			while (cp <= cp2)
188				putchar(*cp++);
189			putchar('\n');
190		}
191	}
192	c = getc(fi);
193	while (c != EOF) {
194		/*
195		 * Collect a line, doing ^H processing.
196		 * Leave tabs for now.
197		 */
198		cp = linebuf;
199		while (c != '\n' && c != EOF) {
200			if (cp - linebuf >= lbufsize) {
201				int offset = cp - linebuf;
202				lbufsize += CHUNKSIZE;
203				linebuf = realloc(linebuf, lbufsize);
204				if(linebuf == 0)
205					abort();
206				cp = linebuf + offset;
207			}
208			if (c == '\b') {
209				if (cp > linebuf)
210					cp--;
211				c = getc(fi);
212				continue;
213			}
214			if (!isprint(c) && c != '\t') {
215				c = getc(fi);
216				continue;
217			}
218			*cp++ = c;
219			c = getc(fi);
220		}
221
222		/*
223		 * Toss anything remaining on the input line.
224		 */
225		while (c != '\n' && c != EOF)
226			c = getc(fi);
227
228		if (cp != NULL) {
229			*cp = '\0';
230		} else {
231			putchar('\n');
232			c = getc(fi);
233			continue;
234		}
235
236		/*
237		 * Expand tabs on the way to canonb.
238		 */
239		col = 0;
240		cp = linebuf;
241		cp2 = canonb;
242		while ((cc = *cp++)) {
243			if (cc != '\t') {
244				col++;
245				if (cp2 - canonb >= cbufsize) {
246					int offset = cp2 - canonb;
247					cbufsize += CHUNKSIZE;
248					canonb = realloc(canonb, cbufsize);
249					if(canonb == 0)
250						abort();
251					cp2 = canonb + offset;
252				}
253				*cp2++ = cc;
254				continue;
255			}
256			do {
257				if (cp2 - canonb >= cbufsize) {
258					int offset = cp2 - canonb;
259					cbufsize += CHUNKSIZE;
260					canonb = realloc(canonb, cbufsize);
261					if(canonb == 0)
262						abort();
263					cp2 = canonb + offset;
264				}
265				*cp2++ = ' ';
266				col++;
267			} while ((col & 07) != 0);
268		}
269
270		/*
271		 * Swipe trailing blanks from the line.
272		 */
273		for (cp2--; cp2 >= canonb && *cp2 == ' '; cp2--)
274			;
275		*++cp2 = '\0';
276		prefix(canonb);
277		if (c != EOF)
278			c = getc(fi);
279	}
280}
281
282/*
283 * Take a line devoid of tabs and other garbage and determine its
284 * blank prefix.  If the indent changes, call for a linebreak.
285 * If the input line is blank, echo the blank line on the output.
286 * Finally, if the line minus the prefix is a mail header, try to keep
287 * it on a line by itself.
288 */
289void
290prefix(line)
291	char line[];
292{
293	register char *cp, **hp;
294	register int np, h;
295
296	if (!*line) {
297		oflush();
298		putchar('\n');
299		return;
300	}
301	for (cp = line; *cp == ' '; cp++)
302		;
303	np = cp - line;
304
305	/*
306	 * The following horrible expression attempts to avoid linebreaks
307	 * when the indent changes due to a paragraph.
308	 */
309	if (np != pfx && (np > pfx || abs(pfx-np) > 8))
310		oflush();
311	if ((h = ishead(cp)))
312		oflush(), mark = lineno;
313	if (lineno - mark < 3 && lineno - mark > 0)
314		for (hp = &headnames[0]; *hp != (char *) 0; hp++)
315			if (ispref(*hp, cp)) {
316				h = 1;
317				oflush();
318				break;
319			}
320	if (!h && (h = (*cp == '.')))
321		oflush();
322	pfx = np;
323	if (h)
324		pack(cp, strlen(cp));
325	else	split(cp);
326	if (h)
327		oflush();
328	lineno++;
329}
330
331/*
332 * Split up the passed line into output "words" which are
333 * maximal strings of non-blanks with the blank separation
334 * attached at the end.  Pass these words along to the output
335 * line packer.
336 */
337void
338split(line)
339	char line[];
340{
341	register char *cp, *cp2;
342	char word[BUFSIZ];
343	int wordl;		/* LIZ@UOM 6/18/85 */
344
345	cp = line;
346	while (*cp) {
347		cp2 = word;
348		wordl = 0;	/* LIZ@UOM 6/18/85 */
349
350		/*
351		 * Collect a 'word,' allowing it to contain escaped white
352		 * space.
353		 */
354		while (*cp && *cp != ' ') {
355			if (*cp == '\\' && isspace(cp[1]))
356				*cp2++ = *cp++;
357			*cp2++ = *cp++;
358			wordl++;/* LIZ@UOM 6/18/85 */
359		}
360
361		/*
362		 * Guarantee a space at end of line. Two spaces after end of
363		 * sentence punctuation.
364		 */
365		if (*cp == '\0') {
366			*cp2++ = ' ';
367			if (index(".:!", cp[-1]))
368				*cp2++ = ' ';
369		}
370		while (*cp == ' ')
371			*cp2++ = *cp++;
372		*cp2 = '\0';
373		/*
374		 * LIZ@UOM 6/18/85 pack(word);
375		 */
376		pack(word, wordl);
377	}
378}
379
380/*
381 * Output section.
382 * Build up line images from the words passed in.  Prefix
383 * each line with correct number of blanks.  The buffer "outbuf"
384 * contains the current partial line image, including prefixed blanks.
385 * "outp" points to the next available space therein.  When outp is NOSTR,
386 * there ain't nothing in there yet.  At the bottom of this whole mess,
387 * leading tabs are reinserted.
388 */
389char	outbuf[BUFSIZ];			/* Sandbagged output line image */
390char	*outp;				/* Pointer in above */
391
392/*
393 * Initialize the output section.
394 */
395void
396setout()
397{
398	outp = NOSTR;
399}
400
401/*
402 * Pack a word onto the output line.  If this is the beginning of
403 * the line, push on the appropriately-sized string of blanks first.
404 * If the word won't fit on the current line, flush and begin a new
405 * line.  If the word is too long to fit all by itself on a line,
406 * just give it its own and hope for the best.
407 *
408 * LIZ@UOM 6/18/85 -- If the new word will fit in at less than the
409 *	goal length, take it.  If not, then check to see if the line
410 *	will be over the max length; if so put the word on the next
411 *	line.  If not, check to see if the line will be closer to the
412 *	goal length with or without the word and take it or put it on
413 *	the next line accordingly.
414 */
415
416/*
417 * LIZ@UOM 6/18/85 -- pass in the length of the word as well
418 * pack(word)
419 *	char word[];
420 */
421void
422pack(word,wl)
423	char word[];
424	int wl;
425{
426	register char *cp;
427	register int s, t;
428
429	if (outp == NOSTR)
430		leadin();
431	/*
432	 * LIZ@UOM 6/18/85 -- change condition to check goal_length; s is the
433	 * length of the line before the word is added; t is now the length
434	 * of the line after the word is added
435	 *	t = strlen(word);
436	 *	if (t+s <= LENGTH)
437	 */
438	s = outp - outbuf;
439	t = wl + s;
440	if ((t <= goal_length) ||
441	    ((t <= max_length) && (t - goal_length <= goal_length - s))) {
442		/*
443		 * In like flint!
444		 */
445		for (cp = word; *cp; *outp++ = *cp++);
446		return;
447	}
448	if (s > pfx) {
449		oflush();
450		leadin();
451	}
452	for (cp = word; *cp; *outp++ = *cp++);
453}
454
455/*
456 * If there is anything on the current output line, send it on
457 * its way.  Set outp to NOSTR to indicate the absence of the current
458 * line prefix.
459 */
460void
461oflush()
462{
463	if (outp == NOSTR)
464		return;
465	*outp = '\0';
466	tabulate(outbuf);
467	outp = NOSTR;
468}
469
470/*
471 * Take the passed line buffer, insert leading tabs where possible, and
472 * output on standard output (finally).
473 */
474void
475tabulate(line)
476	char line[];
477{
478	register char *cp;
479	register int b, t;
480
481	/*
482	 * Toss trailing blanks in the output line.
483	 */
484	cp = line + strlen(line) - 1;
485	while (cp >= line && *cp == ' ')
486		cp--;
487	*++cp = '\0';
488
489	/*
490	 * Count the leading blank space and tabulate.
491	 */
492	for (cp = line; *cp == ' '; cp++)
493		;
494	b = cp-line;
495	t = b >> 3;
496	b &= 07;
497	if (t > 0)
498		do
499			putc('\t', stdout);
500		while (--t);
501	if (b > 0)
502		do
503			putc(' ', stdout);
504		while (--b);
505	while (*cp)
506		putc(*cp++, stdout);
507	putc('\n', stdout);
508}
509
510/*
511 * Initialize the output line with the appropriate number of
512 * leading blanks.
513 */
514void
515leadin()
516{
517	register int b;
518	register char *cp;
519
520	for (b = 0, cp = outbuf; b < pfx; b++)
521		*cp++ = ' ';
522	outp = cp;
523}
524
525/*
526 * Save a string in dynamic space.
527 * This little goodie is needed for
528 * a headline detector in head.c
529 */
530char *
531savestr(str)
532	char str[];
533{
534	register char *top;
535
536	top = malloc(strlen(str) + 1);
537	if (top == NOSTR)
538		errx(1, "ran out of memory");
539	strcpy(top, str);
540	return (top);
541}
542
543/*
544 * Is s1 a prefix of s2??
545 */
546int
547ispref(s1, s2)
548	register char *s1, *s2;
549{
550
551	while (*s1++ == *s2)
552		;
553	return (*s1 == '\0');
554}
555