fmt.c revision 30009
1177633Sdfr/*
2177633Sdfr * Copyright (c) 1980, 1993
3177633Sdfr *	The Regents of the University of California.  All rights reserved.
4177633Sdfr *
5177633Sdfr * Redistribution and use in source and binary forms, with or without
6177633Sdfr * modification, are permitted provided that the following conditions
7177633Sdfr * are met:
8177633Sdfr * 1. Redistributions of source code must retain the above copyright
9177633Sdfr *    notice, this list of conditions and the following disclaimer.
10177633Sdfr * 2. Redistributions in binary form must reproduce the above copyright
11177633Sdfr *    notice, this list of conditions and the following disclaimer in the
12177633Sdfr *    documentation and/or other materials provided with the distribution.
13177633Sdfr * 3. All advertising materials mentioning features or use of this software
14177633Sdfr *    must display the following acknowledgement:
15177633Sdfr *	This product includes software developed by the University of
16177633Sdfr *	California, Berkeley and its contributors.
17177633Sdfr * 4. Neither the name of the University nor the names of its contributors
18177633Sdfr *    may be used to endorse or promote products derived from this software
19177633Sdfr *    without specific prior written permission.
20177633Sdfr *
21177633Sdfr * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22177633Sdfr * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23177633Sdfr * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24177633Sdfr * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25177633Sdfr * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26177633Sdfr * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27177633Sdfr * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28177633Sdfr * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29177633Sdfr * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30177633Sdfr * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31177633Sdfr * SUCH DAMAGE.
32177633Sdfr */
33177633Sdfr
34177633Sdfr#ifndef lint
35177633Sdfrstatic char copyright[] =
36177633Sdfr"@(#) Copyright (c) 1980, 1993\n\
37177633Sdfr	The Regents of the University of California.  All rights reserved.\n";
38177633Sdfr#endif /* not lint */
39177633Sdfr
40177633Sdfr#ifndef lint
41177633Sdfr#if 0
42177633Sdfrstatic char sccsid[] = "@(#)fmt.c	8.1 (Berkeley) 7/20/93";
43177633Sdfr#else
44177633Sdfrstatic const char rcsid[] =
45177633Sdfr	"$Id: fmt.c,v 1.10 1997/08/21 03:41:41 jlemon Exp $";
46177633Sdfr#endif
47177633Sdfr#endif /* not lint */
48177633Sdfr
49177633Sdfr#include <ctype.h>
50177633Sdfr#include <err.h>
51177633Sdfr#include <locale.h>
52177633Sdfr#include <stdio.h>
53177633Sdfr#include <stdlib.h>
54177633Sdfr#include <string.h>
55177633Sdfr
56184588Sdfr/*
57177633Sdfr * fmt -- format the concatenation of input files or standard input
58177633Sdfr * onto standard output.  Designed for use with Mail ~|
59177633Sdfr *
60196503Szec * Syntax : fmt [ goal [ max ] ] [ name ... ]
61196503Szec * Authors: Kurt Shoens (UCB) 12/7/78;
62177633Sdfr *          Liz Allen (UMCP) 2/24/83 [Addition of goal length concept].
63177633Sdfr */
64177685Sdfr
65177633Sdfr/* LIZ@UOM 6/18/85 -- Don't need LENGTH any more.
66177633Sdfr * #define	LENGTH	72		Max line length in output
67184588Sdfr */
68184588Sdfr#define	NOSTR	((char *) 0)	/* Null string pointer for lint */
69184588Sdfr
70184588Sdfr/* LIZ@UOM 6/18/85 --New variables goal_length and max_length */
71177633Sdfr#define GOAL_LENGTH 65
72177633Sdfr#define MAX_LENGTH 75
73193272Sjhbint	goal_length;		/* Target or goal line length in output */
74177633Sdfrint	max_length;		/* Max line length in output */
75177633Sdfrint	pfx;			/* Current leading blank count */
76177633Sdfrint	lineno;			/* Current input line */
77177633Sdfrint	mark;			/* Last place we saw a head line */
78177633Sdfrint	center;
79177633Sdfr
80177633Sdfrchar	*headnames[] = {"To", "Subject", "Cc", 0};
81177633Sdfr
82177633Sdfrvoid fmt __P((FILE *));
83177633Sdfrint ispref __P((char *, char *));
84177633Sdfrvoid leadin __P((void));
85177633Sdfrvoid oflush __P((void));
86177633Sdfrvoid pack __P((char [], int));
87177633Sdfrvoid prefix __P((char []));
88177633Sdfrvoid setout __P((void));
89177633Sdfrvoid split __P((char []));
90177633Sdfrvoid tabulate __P((char []));
91177633Sdfr
92177633Sdfr/*
93177633Sdfr * Drive the whole formatter by managing input files.  Also,
94177633Sdfr * cause initialization of the output stuff and flush it out
95177633Sdfr * at the end.
96177633Sdfr */
97177633Sdfr
98177633Sdfrint
99177633Sdfrmain(argc, argv)
100177633Sdfr	int argc;
101177633Sdfr	char **argv;
102177633Sdfr{
103177633Sdfr	register FILE *fi;
104177633Sdfr	register int errs = 0;
105177633Sdfr	int number;		/* LIZ@UOM 6/18/85 */
106177633Sdfr
107177633Sdfr	(void) setlocale(LC_CTYPE, "");
108177633Sdfr
109177633Sdfr	goal_length = GOAL_LENGTH;
110177633Sdfr	max_length = MAX_LENGTH;
111177633Sdfr	setout();
112177633Sdfr	lineno = 1;
113177633Sdfr	mark = -10;
114177633Sdfr	/*
115177633Sdfr	 * LIZ@UOM 6/18/85 -- Check for goal and max length arguments
116177633Sdfr	 */
117177633Sdfr	if (argc > 1 && !strcmp(argv[1], "-c")) {
118177633Sdfr		center++;
119177633Sdfr		argc--;
120184588Sdfr		argv++;
121184588Sdfr	}
122177633Sdfr	if (argc > 1 && (1 == (sscanf(argv[1], "%d", &number)))) {
123177633Sdfr		argv++;
124177633Sdfr		argc--;
125177633Sdfr		goal_length = number;
126177633Sdfr		if (argc > 1 && (1 == (sscanf(argv[1], "%d", &number)))) {
127177633Sdfr			argv++;
128218757Sbz			argc--;
129177633Sdfr			max_length = number;
130196503Szec		}
131177633Sdfr	}
132177633Sdfr	if (max_length <= goal_length)
133177633Sdfr		errx(1, "max length must be greater than goal length");
134184588Sdfr	if (argc < 2) {
135177633Sdfr		fmt(stdin);
136177633Sdfr		oflush();
137177633Sdfr		exit(0);
138177633Sdfr	}
139177633Sdfr	while (--argc) {
140193272Sjhb		if ((fi = fopen(*++argv, "r")) == NULL) {
141177633Sdfr			perror(*argv);
142177633Sdfr			errs++;
143177633Sdfr			continue;
144177633Sdfr		}
145177633Sdfr		fmt(fi);
146177633Sdfr		fclose(fi);
147184588Sdfr	}
148177633Sdfr	oflush();
149177633Sdfr	exit(errs);
150177633Sdfr}
151177633Sdfr
152177633Sdfr/*
153177633Sdfr * Read up characters from the passed input file, forming lines,
154177633Sdfr * doing ^H processing, expanding tabs, stripping trailing blanks,
155177633Sdfr * and sending each line down for analysis.
156177633Sdfr */
157184588Sdfrvoid
158184588Sdfrfmt(fi)
159184588Sdfr	FILE *fi;
160177633Sdfr{
161177633Sdfr	static char *linebuf = 0, *canonb = 0;
162177633Sdfr	register char *cp, *cp2, cc;
163177633Sdfr	register int c, col;
164184588Sdfr#define CHUNKSIZE 1024
165184588Sdfr	static int lbufsize = 0, cbufsize = CHUNKSIZE;
166177633Sdfr
167177633Sdfr	canonb = malloc(CHUNKSIZE);
168177633Sdfr	if (canonb == 0)
169177633Sdfr		abort();
170184588Sdfr
171177633Sdfr	if (center) {
172177633Sdfr		linebuf = malloc(BUFSIZ);
173177633Sdfr		while (1) {
174184588Sdfr			cp = fgets(linebuf, BUFSIZ, fi);
175184588Sdfr			if (!cp)
176184588Sdfr				return;
177184588Sdfr			while (*cp && isspace(*cp))
178184588Sdfr				cp++;
179177633Sdfr			cp2 = cp + strlen(cp) - 1;
180177633Sdfr			while (cp2 > cp && isspace(*cp2))
181177633Sdfr				cp2--;
182177633Sdfr			if (cp == cp2)
183177633Sdfr				putchar('\n');
184177633Sdfr			col = cp2 - cp;
185177633Sdfr			for (c = 0; c < (goal_length-col)/2; c++)
186177633Sdfr				putchar(' ');
187177633Sdfr			while (cp <= cp2)
188177633Sdfr				putchar(*cp++);
189177633Sdfr			putchar('\n');
190177633Sdfr		}
191177633Sdfr	}
192184588Sdfr	c = getc(fi);
193184588Sdfr	while (c != EOF) {
194184588Sdfr		/*
195184588Sdfr		 * Collect a line, doing ^H processing.
196184588Sdfr		 * Leave tabs for now.
197184588Sdfr		 */
198184588Sdfr		cp = linebuf;
199184588Sdfr		while (c != '\n' && c != EOF) {
200184588Sdfr			if (cp - linebuf >= lbufsize) {
201184588Sdfr				int offset = cp - linebuf;
202184588Sdfr				lbufsize += CHUNKSIZE;
203184588Sdfr				linebuf = realloc(linebuf, lbufsize);
204184588Sdfr				if(linebuf == 0)
205177633Sdfr					abort();
206177633Sdfr				cp = linebuf + offset;
207177633Sdfr			}
208177633Sdfr			if (c == '\b') {
209177633Sdfr				if (cp > linebuf)
210193272Sjhb					cp--;
211177633Sdfr				c = getc(fi);
212177633Sdfr				continue;
213184588Sdfr			}
214177633Sdfr			if (!isprint(c) && c != '\t') {
215177633Sdfr				c = getc(fi);
216177633Sdfr				continue;
217184588Sdfr			}
218177633Sdfr			*cp++ = c;
219184588Sdfr			c = getc(fi);
220184588Sdfr		}
221184588Sdfr
222177633Sdfr		/*
223177633Sdfr		 * Toss anything remaining on the input line.
224177633Sdfr		 */
225184588Sdfr		while (c != '\n' && c != EOF)
226184588Sdfr			c = getc(fi);
227184588Sdfr
228184588Sdfr		if (cp != NULL) {
229177633Sdfr			*cp = '\0';
230177633Sdfr		} else {
231177633Sdfr			putchar('\n');
232177633Sdfr			c = getc(fi);
233184588Sdfr			continue;
234184588Sdfr		}
235177633Sdfr
236184588Sdfr		/*
237177633Sdfr		 * Expand tabs on the way to canonb.
238184588Sdfr		 */
239177633Sdfr		col = 0;
240177633Sdfr		cp = linebuf;
241177633Sdfr		cp2 = canonb;
242177633Sdfr		while ((cc = *cp++)) {
243177633Sdfr			if (cc != '\t') {
244184588Sdfr				col++;
245184588Sdfr				if (cp2 - canonb >= cbufsize) {
246184588Sdfr					int offset = cp2 - canonb;
247184588Sdfr					cbufsize += CHUNKSIZE;
248184588Sdfr					canonb = realloc(canonb, cbufsize);
249184588Sdfr					if(canonb == 0)
250184588Sdfr						abort();
251184588Sdfr					cp2 = canonb + offset;
252184588Sdfr				}
253184588Sdfr				*cp2++ = cc;
254184588Sdfr				continue;
255184588Sdfr			}
256184588Sdfr			do {
257177633Sdfr				if (cp2 - canonb >= cbufsize) {
258184588Sdfr					int offset = cp2 - canonb;
259177633Sdfr					cbufsize += CHUNKSIZE;
260177633Sdfr					canonb = realloc(canonb, cbufsize);
261177633Sdfr					if(canonb == 0)
262177633Sdfr						abort();
263177633Sdfr					cp2 = canonb + offset;
264177633Sdfr				}
265177633Sdfr				*cp2++ = ' ';
266177633Sdfr				col++;
267184588Sdfr			} while ((col & 07) != 0);
268177633Sdfr		}
269177633Sdfr
270177633Sdfr		/*
271177633Sdfr		 * Swipe trailing blanks from the line.
272177633Sdfr		 */
273177633Sdfr		for (cp2--; cp2 >= canonb && *cp2 == ' '; cp2--)
274177633Sdfr			;
275177633Sdfr		*++cp2 = '\0';
276184588Sdfr		prefix(canonb);
277177633Sdfr		if (c != EOF)
278193272Sjhb			c = getc(fi);
279177633Sdfr	}
280177633Sdfr}
281184588Sdfr
282177633Sdfr/*
283177633Sdfr * Take a line devoid of tabs and other garbage and determine its
284177633Sdfr * blank prefix.  If the indent changes, call for a linebreak.
285184588Sdfr * If the input line is blank, echo the blank line on the output.
286184588Sdfr * Finally, if the line minus the prefix is a mail header, try to keep
287184588Sdfr * it on a line by itself.
288177633Sdfr */
289177633Sdfrvoid
290177633Sdfrprefix(line)
291177633Sdfr	char line[];
292177633Sdfr{
293177633Sdfr	register char *cp, **hp;
294177633Sdfr	register int np, h;
295177633Sdfr
296177633Sdfr	if (!*line) {
297177633Sdfr		oflush();
298177633Sdfr		putchar('\n');
299177633Sdfr		return;
300177633Sdfr	}
301193272Sjhb	for (cp = line; *cp == ' '; cp++)
302177633Sdfr		;
303177633Sdfr	np = cp - line;
304177633Sdfr
305177633Sdfr	/*
306177633Sdfr	 * The following horrible expression attempts to avoid linebreaks
307193272Sjhb	 * when the indent changes due to a paragraph.
308177633Sdfr	 */
309	if (np != pfx && (np > pfx || abs(pfx-np) > 8))
310		oflush();
311	if ((h = ishead(cp)))
312		oflush(), mark = lineno;
313	if (lineno - mark < 3 && lineno - mark > 0)
314		for (hp = &headnames[0]; *hp != (char *) 0; hp++)
315			if (ispref(*hp, cp)) {
316				h = 1;
317				oflush();
318				break;
319			}
320	if (!h && (h = (*cp == '.')))
321		oflush();
322	pfx = np;
323	if (h)
324		pack(cp, strlen(cp));
325	else	split(cp);
326	if (h)
327		oflush();
328	lineno++;
329}
330
331/*
332 * Split up the passed line into output "words" which are
333 * maximal strings of non-blanks with the blank separation
334 * attached at the end.  Pass these words along to the output
335 * line packer.
336 */
337void
338split(line)
339	char line[];
340{
341	register char *cp, *cp2;
342	static char *word=0;
343	static int wordsize=0;
344	int wordl;		/* LIZ@UOM 6/18/85 */
345
346	{
347		int l = strlen(line);
348		if (l >= wordsize) {
349			if (word)
350				free(word);
351			wordsize = (l+66)&~63;
352			word = malloc(wordsize);
353			if (word == NULL)
354				abort();
355		}
356	}
357
358	cp = line;
359	while (*cp) {
360		cp2 = word;
361		wordl = 0;	/* LIZ@UOM 6/18/85 */
362
363		/*
364		 * Collect a 'word,' allowing it to contain escaped white
365		 * space.
366		 */
367		while (*cp && *cp != ' ') {
368			if (*cp == '\\' && isspace(cp[1]))
369				*cp2++ = *cp++;
370			*cp2++ = *cp++;
371			wordl++;/* LIZ@UOM 6/18/85 */
372		}
373
374		/*
375		 * Guarantee a space at end of line. Two spaces after end of
376		 * sentence punctuation.
377		 */
378		if (*cp == '\0') {
379			*cp2++ = ' ';
380			if (cp != line && index(".:!", cp[-1]))
381				*cp2++ = ' ';
382		}
383		while (*cp == ' ')
384			*cp2++ = *cp++;
385		*cp2 = '\0';
386		/*
387		 * LIZ@UOM 6/18/85 pack(word);
388		 */
389		pack(word, wordl);
390	}
391}
392
393/*
394 * Output section.
395 * Build up line images from the words passed in.  Prefix
396 * each line with correct number of blanks.  The buffer "outbuf"
397 * contains the current partial line image, including prefixed blanks.
398 * "outp" points to the next available space therein.  When outp is NOSTR,
399 * there ain't nothing in there yet.  At the bottom of this whole mess,
400 * leading tabs are reinserted.
401 */
402char	*outbuf;			/* Sandbagged output line image */
403char	*outp;				/* Pointer in above */
404int	outbuf_size;			/* er, size of outbuf */
405
406/*
407 * Initialize the output section.
408 */
409void
410setout()
411{
412	outbuf = malloc(BUFSIZ);
413	if (outbuf == 0)
414		abort();
415	outbuf_size = BUFSIZ;
416	outp = NOSTR;
417}
418
419/*
420 * Pack a word onto the output line.  If this is the beginning of
421 * the line, push on the appropriately-sized string of blanks first.
422 * If the word won't fit on the current line, flush and begin a new
423 * line.  If the word is too long to fit all by itself on a line,
424 * just give it its own and hope for the best.
425 *
426 * LIZ@UOM 6/18/85 -- If the new word will fit in at less than the
427 *	goal length, take it.  If not, then check to see if the line
428 *	will be over the max length; if so put the word on the next
429 *	line.  If not, check to see if the line will be closer to the
430 *	goal length with or without the word and take it or put it on
431 *	the next line accordingly.
432 */
433
434/*
435 * LIZ@UOM 6/18/85 -- pass in the length of the word as well
436 * pack(word)
437 *	char word[];
438 */
439void
440pack(word,wl)
441	char word[];
442	int wl;
443{
444	register char *cp;
445	register int s, t;
446
447	if (((outp==NOSTR) ? wl : outp-outbuf + wl) >= outbuf_size) {
448		char *old_outbuf = outbuf;
449		outbuf_size *= 2;
450		outbuf = realloc(outbuf, outbuf_size);
451		if (outbuf == 0)
452			abort();
453		outp += outbuf-old_outbuf;
454	}
455
456	if (outp == NOSTR)
457		leadin();
458	/*
459	 * LIZ@UOM 6/18/85 -- change condition to check goal_length; s is the
460	 * length of the line before the word is added; t is now the length
461	 * of the line after the word is added
462	 *	t = strlen(word);
463	 *	if (t+s <= LENGTH)
464	 */
465	s = outp - outbuf;
466	t = wl + s;
467	if ((t <= goal_length) ||
468	    ((t <= max_length) && (t - goal_length <= goal_length - s))) {
469		/*
470		 * In like flint!
471		 */
472		for (cp = word; *cp; *outp++ = *cp++);
473		return;
474	}
475	if (s > pfx) {
476		oflush();
477		leadin();
478	}
479	for (cp = word; *cp; *outp++ = *cp++);
480}
481
482/*
483 * If there is anything on the current output line, send it on
484 * its way.  Set outp to NOSTR to indicate the absence of the current
485 * line prefix.
486 */
487void
488oflush()
489{
490	if (outp == NOSTR)
491		return;
492	*outp = '\0';
493	tabulate(outbuf);
494	outp = NOSTR;
495}
496
497/*
498 * Take the passed line buffer, insert leading tabs where possible, and
499 * output on standard output (finally).
500 */
501void
502tabulate(line)
503	char line[];
504{
505	register char *cp;
506	register int b, t;
507
508	/*
509	 * Toss trailing blanks in the output line.
510	 */
511	cp = line + strlen(line) - 1;
512	while (cp >= line && *cp == ' ')
513		cp--;
514	*++cp = '\0';
515
516	/*
517	 * Count the leading blank space and tabulate.
518	 */
519	for (cp = line; *cp == ' '; cp++)
520		;
521	b = cp-line;
522	t = b >> 3;
523	b &= 07;
524	if (t > 0)
525		do
526			putc('\t', stdout);
527		while (--t);
528	if (b > 0)
529		do
530			putc(' ', stdout);
531		while (--b);
532	while (*cp)
533		putc(*cp++, stdout);
534	putc('\n', stdout);
535}
536
537/*
538 * Initialize the output line with the appropriate number of
539 * leading blanks.
540 */
541void
542leadin()
543{
544	register int b;
545	register char *cp;
546
547	for (b = 0, cp = outbuf; b < pfx; b++)
548		*cp++ = ' ';
549	outp = cp;
550}
551
552/*
553 * Save a string in dynamic space.
554 * This little goodie is needed for
555 * a headline detector in head.c
556 */
557char *
558savestr(str)
559	char str[];
560{
561	register char *top;
562
563	top = malloc(strlen(str) + 1);
564	if (top == NOSTR)
565		errx(1, "ran out of memory");
566	strcpy(top, str);
567	return (top);
568}
569
570/*
571 * Is s1 a prefix of s2??
572 */
573int
574ispref(s1, s2)
575	register char *s1, *s2;
576{
577
578	while (*s1++ == *s2)
579		;
580	return (*s1 == '\0');
581}
582