1/*	$NetBSD: fmt.c,v 1.33 2017/10/13 00:11:56 christos Exp $	*/
2
3/*
4 * Copyright (c) 1980, 1993
5 *	The Regents of the University of California.  All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of the University nor the names of its contributors
16 *    may be used to endorse or promote products derived from this software
17 *    without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 */
31
32#include <sys/cdefs.h>
33#ifndef lint
34__COPYRIGHT("@(#) Copyright (c) 1980, 1993\
35 The Regents of the University of California.  All rights reserved.");
36#endif /* not lint */
37
38#ifndef lint
39#if 0
40static char sccsid[] = "@(#)fmt.c	8.1 (Berkeley) 7/20/93";
41#endif
42__RCSID("$NetBSD: fmt.c,v 1.33 2017/10/13 00:11:56 christos Exp $");
43#endif /* not lint */
44
45#include <wctype.h>
46#include <locale.h>
47#include <stdio.h>
48#include <stdlib.h>
49#include <unistd.h>
50#include <errno.h>
51#include <err.h>
52#include <limits.h>
53#include <string.h>
54#include <locale.h>
55#include "buffer.h"
56
57/*
58 * fmt -- format the concatenation of input files or standard input
59 * onto standard output.  Designed for use with Mail ~|
60 *
61 * Syntax : fmt [ goal [ max ] ] [ name ... ]
62 * Authors: Kurt Shoens (UCB) 12/7/78;
63 *          Liz Allen (UMCP) 2/24/83 [Addition of goal length concept].
64 */
65
66/* LIZ@UOM 6/18/85 --New variables goal_length and max_length */
67#define GOAL_LENGTH 65
68#define MAX_LENGTH 75
69static size_t	goal_length;	/* Target or goal line length in output */
70static size_t	max_length;	/* Max line length in output */
71static size_t	pfx;		/* Current leading blank count */
72static int	raw;		/* Don't treat mail specially */
73static int	lineno;		/* Current input line */
74static int	mark;		/* Last place we saw a head line */
75static int	center;
76static struct buffer outbuf;
77
78static const wchar_t *headnames[] = { L"To", L"Subject", L"Cc", NULL };
79
80static void	usage(void) __dead;
81static int 	getnum(const char *, const char *, size_t *, int);
82static void	fmt(FILE *);
83static int	ispref(const wchar_t *, const wchar_t *);
84static void	leadin(void);
85static void	oflush(void);
86static void	pack(const wchar_t *, size_t);
87static void	prefix(const struct buffer *, int);
88static void	split(const wchar_t *, int);
89static void	tabulate(struct buffer *);
90
91
92int		ishead(const wchar_t *);
93
94/*
95 * Drive the whole formatter by managing input files.  Also,
96 * cause initialization of the output stuff and flush it out
97 * at the end.
98 */
99
100int
101main(int argc, char **argv)
102{
103	FILE *fi;
104	int errs = 0;
105	int compat = 1;
106	int c;
107
108	goal_length = GOAL_LENGTH;
109	max_length = MAX_LENGTH;
110	buf_init(&outbuf);
111	lineno = 1;
112	mark = -10;
113
114	setprogname(*argv);
115	(void)setlocale(LC_ALL, "");
116
117	while ((c = getopt(argc, argv, "Cg:m:rw:")) != -1)
118		switch (c) {
119		case 'C':
120			center++;
121			break;
122		case 'g':
123			(void)getnum(optarg, "goal", &goal_length, 1);
124			compat = 0;
125			break;
126		case 'm':
127		case 'w':
128			(void)getnum(optarg, "max", &max_length, 1);
129			compat = 0;
130			break;
131		case 'r':
132			raw++;
133			break;
134		default:
135			usage();
136		}
137
138	argc -= optind;
139	argv += optind;
140
141	/*
142	 * compatibility with old usage.
143	 */
144	if (compat && argc > 0 && getnum(*argv, "goal", &goal_length, 0)) {
145		argv++;
146		argc--;
147		if (argc > 0 && getnum(*argv, "max", &max_length, 0)) {
148			argv++;
149			argc--;
150		}
151	}
152
153	if (max_length <= goal_length) {
154		errx(1, "Max length (%zu) must be greater than goal "
155		    "length (%zu)", max_length, goal_length);
156	}
157	if (argc == 0) {
158		fmt(stdin);
159		oflush();
160		return 0;
161	}
162	for (;argc; argc--, argv++) {
163		if ((fi = fopen(*argv, "r")) == NULL) {
164			warn("Cannot open `%s'", *argv);
165			errs++;
166			continue;
167		}
168		fmt(fi);
169		(void)fclose(fi);
170	}
171	oflush();
172	buf_end(&outbuf);
173	return errs;
174}
175
176static void
177usage(void)
178{
179	(void)fprintf(stderr,
180	    "Usage: %s [-Cr] [-g <goal>] [-m|w <max>] [<files>..]\n"
181	    "\t %s [-Cr] [<goal>] [<max>] [<files>]\n",
182	    getprogname(), getprogname());
183	exit(1);
184}
185
186static int
187getnum(const char *str, const char *what, size_t *res, int badnum)
188{
189	unsigned long ul;
190	char *ep;
191
192	errno = 0;
193	ul = strtoul(str, &ep, 0);
194        if (*str != '\0' && *ep == '\0') {
195		 if ((errno == ERANGE && ul == ULONG_MAX) || ul > SIZE_T_MAX)
196			errx(1, "%s number `%s' too big", what, str);
197		*res = (size_t)ul;
198		return 1;
199	} else if (badnum)
200		errx(1, "Bad %s number `%s'", what, str);
201
202	return 0;
203}
204
205/*
206 * Read up characters from the passed input file, forming lines,
207 * doing ^H processing, expanding tabs, stripping trailing blanks,
208 * and sending each line down for analysis.
209 */
210static void
211fmt(FILE *fi)
212{
213	struct buffer lbuf, cbuf;
214	wchar_t *cp, *cp2;
215	wint_t c;
216	int add_space;
217	size_t len, col, i;
218
219	if (center) {
220		for (;;) {
221			cp = fgetwln(fi, &len);
222			if (!cp)
223				return;
224
225			/* skip over leading space */
226			while (len > 0) {
227				if (!iswspace(*cp))
228					break;
229				cp++;
230				len--;
231			}
232
233			/* clear trailing space */
234			while (len > 0) {
235				if (!iswspace((unsigned char)cp[len-1]))
236					break;
237				len--;
238			}
239
240			if (len == 0) {
241				/* blank line */
242				(void)putwchar(L'\n');
243				continue;
244			}
245
246			if (goal_length > len) {
247				for (i = 0; i < (goal_length - len) / 2; i++) {
248					(void)putwchar(L' ');
249				}
250			}
251			for (i = 0; i < len; i++) {
252				(void)putwchar(cp[i]);
253			}
254			(void)putwchar(L'\n');
255		}
256	}
257
258	buf_init(&lbuf);
259	buf_init(&cbuf);
260	c = getwc(fi);
261
262	while (c != WEOF) {
263		/*
264		 * Collect a line, doing ^H processing.
265		 * Leave tabs for now.
266		 */
267		buf_reset(&lbuf);
268		while (c != '\n' && c != WEOF) {
269			if (c == '\b') {
270				(void)buf_unputc(&lbuf);
271				c = getwc(fi);
272				continue;
273			}
274			if(!(iswprint(c) || c == '\t' || c >= 160)) {
275				c = getwc(fi);
276				continue;
277			}
278			buf_putc(&lbuf, c);
279			c = getwc(fi);
280		}
281		buf_putc(&lbuf, '\0');
282		(void)buf_unputc(&lbuf);
283		add_space = c != WEOF;
284
285		/*
286		 * Expand tabs on the way.
287		 */
288		col = 0;
289		cp = lbuf.bptr;
290		buf_reset(&cbuf);
291		while ((c = *cp++) != '\0') {
292			if (c != '\t') {
293				col++;
294				buf_putc(&cbuf, c);
295				continue;
296			}
297			do {
298				buf_putc(&cbuf, ' ');
299				col++;
300			} while ((col & 07) != 0);
301		}
302
303		/*
304		 * Swipe trailing blanks from the line.
305		 */
306		for (cp2 = cbuf.ptr - 1; cp2 >= cbuf.bptr && *cp2 == ' '; cp2--)
307			continue;
308		cbuf.ptr = cp2 + 1;
309		buf_putc(&cbuf, '\0');
310		(void)buf_unputc(&cbuf);
311		prefix(&cbuf, add_space);
312		if (c != WEOF)
313			c = getwc(fi);
314	}
315	buf_end(&cbuf);
316	buf_end(&lbuf);
317}
318
319/*
320 * Take a line devoid of tabs and other garbage and determine its
321 * blank prefix.  If the indent changes, call for a linebreak.
322 * If the input line is blank, echo the blank line on the output.
323 * Finally, if the line minus the prefix is a mail header, try to keep
324 * it on a line by itself.
325 */
326static void
327prefix(const struct buffer *buf, int add_space)
328{
329	const wchar_t *cp;
330	const wchar_t **hp;
331	size_t np;
332	int h;
333
334	if (buf->ptr == buf->bptr) {
335		oflush();
336		(void)putwchar(L'\n');
337		return;
338	}
339	for (cp = buf->bptr; *cp == ' '; cp++)
340		continue;
341	np = cp - buf->bptr;
342
343	/*
344	 * The following horrible expression attempts to avoid linebreaks
345	 * when the indent changes due to a paragraph.
346	 */
347	if (np != pfx && (np > pfx || abs((int)(pfx - np)) > 8))
348		oflush();
349	if (!raw) {
350		if ((h = ishead(cp)) != 0) {
351			oflush();
352			mark = lineno;
353		}
354		if (lineno - mark < 3 && lineno - mark > 0)
355			for (hp = &headnames[0]; *hp != NULL; hp++)
356				if (ispref(*hp, cp)) {
357					h = 1;
358					oflush();
359					break;
360				}
361		if (!h && (h = (*cp == '.')))
362			oflush();
363	} else
364		h = 0;
365	pfx = np;
366	if (h) {
367		pack(cp, (size_t)(buf->ptr - cp));
368		oflush();
369	} else
370		split(cp, add_space);
371	lineno++;
372}
373
374/*
375 * Split up the passed line into output "words" which are
376 * maximal strings of non-blanks with the blank separation
377 * attached at the end.  Pass these words along to the output
378 * line packer.
379 */
380static void
381split(const wchar_t line[], int add_space)
382{
383	const wchar_t *cp;
384	struct buffer word;
385	size_t wlen;
386
387	buf_init(&word);
388	cp = line;
389	while (*cp) {
390		buf_reset(&word);
391		wlen = 0;
392
393		/*
394		 * Collect a 'word,' allowing it to contain escaped white
395		 * space.
396		 */
397		while (*cp && *cp != ' ') {
398			if (*cp == '\\' && iswspace(cp[1]))
399				buf_putc(&word, *cp++);
400			buf_putc(&word, *cp++);
401			wlen++;
402		}
403
404		/*
405		 * Guarantee a space at end of line. Two spaces after end of
406		 * sentence punctuation.
407		 */
408		if (*cp == '\0' && add_space) {
409			buf_putc(&word, ' ');
410			if (strchr(".:!", cp[-1]))
411				buf_putc(&word, ' ');
412		}
413		while (*cp == ' ')
414			buf_putc(&word, *cp++);
415
416		buf_putc(&word, '\0');
417		(void)buf_unputc(&word);
418
419		pack(word.bptr, wlen);
420	}
421	buf_end(&word);
422}
423
424/*
425 * Output section.
426 * Build up line images from the words passed in.  Prefix
427 * each line with correct number of blanks.
428 *
429 * At the bottom of this whole mess, leading tabs are reinserted.
430 */
431
432/*
433 * Pack a word onto the output line.  If this is the beginning of
434 * the line, push on the appropriately-sized string of blanks first.
435 * If the word won't fit on the current line, flush and begin a new
436 * line.  If the word is too long to fit all by itself on a line,
437 * just give it its own and hope for the best.
438 *
439 * LIZ@UOM 6/18/85 -- If the new word will fit in at less than the
440 *	goal length, take it.  If not, then check to see if the line
441 *	will be over the max length; if so put the word on the next
442 *	line.  If not, check to see if the line will be closer to the
443 *	goal length with or without the word and take it or put it on
444 *	the next line accordingly.
445 */
446
447static void
448pack(const wchar_t *word, size_t wlen)
449{
450	const wchar_t *cp;
451	size_t s, t;
452
453	if (outbuf.bptr == outbuf.ptr)
454		leadin();
455	/*
456	 * LIZ@UOM 6/18/85 -- change condition to check goal_length; s is the
457	 * length of the line before the word is added; t is now the length
458	 * of the line after the word is added
459	 */
460	s = outbuf.ptr - outbuf.bptr;
461	t = wlen + s;
462	if ((t <= goal_length) || ((t <= max_length) &&
463	    (s <= goal_length) && (t - goal_length <= goal_length - s))) {
464		/*
465		 * In like flint!
466		 */
467		for (cp = word; *cp;)
468			buf_putc(&outbuf, *cp++);
469		return;
470	}
471	if (s > pfx) {
472		oflush();
473		leadin();
474	}
475	for (cp = word; *cp;)
476		buf_putc(&outbuf, *cp++);
477}
478
479/*
480 * If there is anything on the current output line, send it on
481 * its way.  Reset outbuf.
482 */
483static void
484oflush(void)
485{
486	if (outbuf.bptr == outbuf.ptr)
487		return;
488	buf_putc(&outbuf, '\0');
489	(void)buf_unputc(&outbuf);
490	tabulate(&outbuf);
491	buf_reset(&outbuf);
492}
493
494/*
495 * Take the passed line buffer, insert leading tabs where possible, and
496 * output on standard output (finally).
497 */
498static void
499tabulate(struct buffer *buf)
500{
501	wchar_t *cp;
502	size_t b, t;
503
504	/*
505	 * Toss trailing blanks in the output line.
506	 */
507	for (cp = buf->ptr - 1; cp >= buf->bptr && *cp == ' '; cp--)
508		continue;
509	*++cp = '\0';
510
511	/*
512	 * Count the leading blank space and tabulate.
513	 */
514	for (cp = buf->bptr; *cp == ' '; cp++)
515		continue;
516	b = cp - buf->bptr;
517	t = b / 8;
518	b = b % 8;
519	if (t > 0)
520		do
521			(void)putwchar(L'\t');
522		while (--t);
523	if (b > 0)
524		do
525			(void)putwchar(L' ');
526		while (--b);
527	while (*cp)
528		(void)putwchar(*cp++);
529	(void)putwchar(L'\n');
530}
531
532/*
533 * Initialize the output line with the appropriate number of
534 * leading blanks.
535 */
536static void
537leadin(void)
538{
539	size_t b;
540
541	buf_reset(&outbuf);
542
543	for (b = 0; b < pfx; b++)
544		buf_putc(&outbuf, ' ');
545}
546
547/*
548 * Is s1 a prefix of s2??
549 */
550static int
551ispref(const wchar_t *s1, const wchar_t *s2)
552{
553
554	while (*s1++ == *s2)
555		continue;
556	return *s1 == '\0';
557}
558