fmt.c revision 7307:aee7ca23287a
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23 * Use is subject to license terms.
24 */
25
26/*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
27/*	  All Rights Reserved	*/
28
29
30#include <stdio.h>
31#include <stdlib.h>
32#include <ctype.h>
33#include <wctype.h>
34#include <widec.h>
35#include <dlfcn.h>
36#include <locale.h>
37#include <sys/param.h>
38#include <string.h>
39
40/*
41 * fmt -- format the concatenation of input files or standard input
42 * onto standard output.  Designed for use with Mail ~|
43 *
44 * Syntax: fmt [ -width | -w width ] [ -cs ] [ name ... ]
45 * Author: Kurt Shoens (UCB) 12/7/78
46 */
47
48#define	NOSTR	((wchar_t *)0)	/* Null string pointer for lint */
49#define	MAXLINES	100	/* maximum mail header lines to verify */
50
51wchar_t	outbuf[BUFSIZ];			/* Sandbagged output line image */
52wchar_t	*outp;				/* Pointer in above */
53int	filler;				/* Filler amount in outbuf */
54char sobuf[BUFSIZ];	/* Global buffer */
55
56int	pfx;			/* Current leading blank count */
57int	width = 72;		/* Width that we will not exceed */
58int	nojoin = 0;		/* split lines only, don't join short ones */
59int	errs = 0;		/* Current number of errors */
60
61enum crown_type	{c_none, c_reset, c_head, c_lead, c_fixup, c_body};
62enum crown_type	crown_state;	/* Crown margin state */
63int	crown_head;		/* The header offset */
64int	crown_body;		/* The body offset */
65	/* currently-known initial strings found in mail headers */
66wchar_t	*headnames[] = {
67	L"Apparently-To", L"Bcc", L"bcc", L"Cc", L"cc", L"Confirmed-By",
68	L"Content", L"content-length", L"From", L"Date", L"id",
69	L"Message-I", L"MIME-Version", L"Precedence", L"Return-Path",
70	L"Received", L"Reply-To", L"Status", L"Subject", L"To", L"X-IMAP",
71	L"X-Lines", L"X-Sender", L"X-Sun", L"X-Status", L"X-UID",
72	0};
73
74enum hdr_type {
75	off,		/* mail header processing is off */
76	not_in_hdr,	/* not currently processing a mail header */
77	in_hdr, 	/* currently filling hdrbuf with potential hdr lines */
78	flush_hdr,	/* flush hdrbuf; not a header, no special processing */
79	do_hdr		/* process hdrbuf as a mail header */
80};
81				/* current state of hdrbuf */
82enum hdr_type	hdr_state = not_in_hdr;
83
84wchar_t *hdrbuf[MAXLINES];	/* buffer to hold potential mail header lines */
85int 	h_lines;		/* index into lines of hdrbuf */
86
87void (*(split))(wchar_t []);
88extern int scrwidth(wchar_t);
89extern int ishead(char []);
90
91
92static void fill_hdrbuf(wchar_t []);
93static void header_chk(void);
94static void process_hdrbuf(void);
95static void leadin(void);
96static void tabulate(wchar_t []);
97static void oflush(void);
98static void pack(wchar_t []);
99static void msplit(wchar_t []);
100static void csplit(wchar_t []);
101static void _wckind_init(void);
102static void prefix(wchar_t []);
103static void fmt(FILE *);
104static int setopt(char *);
105int _wckind(wchar_t);
106
107/*
108 * Drive the whole formatter by managing input files.  Also,
109 * cause initialization of the output stuff and flush it out
110 * at the end.
111 */
112
113int
114main(int argc, char **argv)
115{
116	FILE *fi;
117	char *cp;
118	int nofile;
119	char *locale;
120
121	outp = NOSTR;
122	setbuf(stdout, sobuf);
123	setlocale(LC_ALL, "");
124	locale = setlocale(LC_CTYPE, "");
125	if (strcmp(locale, "C") == 0) {
126		split = csplit;
127	} else {
128		split = msplit;
129		_wckind_init();
130	}
131	if (argc < 2) {
132single:
133		fmt(stdin);
134		oflush();
135		exit(0);
136	}
137	nofile = 1;
138	while (--argc) {
139		cp = *++argv;
140		if (setopt(cp))
141			continue;
142		nofile = 0;
143		if ((fi = fopen(cp, "r")) == NULL) {
144			perror(cp);
145			errs++;
146			continue;
147		}
148		fmt(fi);
149		fclose(fi);
150	}
151	if (nofile)
152		goto single;
153	oflush();
154	fclose(stdout);
155	return (errs);
156}
157
158/*
159 * Read up characters from the passed input file, forming lines,
160 * doing ^H processing, expanding tabs, stripping trailing blanks,
161 * and sending each line down for analysis.
162 */
163
164static void
165fmt(FILE *fi)
166{
167	wchar_t linebuf[BUFSIZ], canonb[BUFSIZ];
168	wchar_t *cp, *cp2;
169	int col;
170	wchar_t	c;
171	char	cbuf[BUFSIZ];	/* stores wchar_t string as char string */
172
173	c = getwc(fi);
174	while (c != EOF) {
175		/*
176		 * Collect a line, doing ^H processing.
177		 * Leave tabs for now.
178		 */
179
180		cp = linebuf;
181		while (c != L'\n' && c != EOF && cp-linebuf < BUFSIZ-1) {
182			if (c == L'\b') {
183				if (cp > linebuf)
184					cp--;
185				c = getwc(fi);
186				continue;
187			}
188			if (!(iswprint(c)) && c != L'\t') {
189				c = getwc(fi);
190				continue;
191			}
192			*cp++ = c;
193			c = getwc(fi);
194		}
195		*cp = L'\0';
196
197		/*
198		 * Toss anything remaining on the input line.
199		 */
200
201		while (c != L'\n' && c != EOF)
202			c = getwc(fi);
203		/*
204		 * Expand tabs on the way to canonb.
205		 */
206
207		col = 0;
208		cp = linebuf;
209		cp2 = canonb;
210		while (c = *cp++) {
211			if (c != L'\t') {
212				col += scrwidth(c);
213				if (cp2-canonb < BUFSIZ-1)
214					*cp2++ = c;
215				continue;
216			}
217			do {
218				if (cp2-canonb < BUFSIZ-1)
219					*cp2++ = L' ';
220				col++;
221			} while ((col & 07) != 0);
222		}
223
224		/*
225		 * Swipe trailing blanks from the line.
226		 */
227
228		for (cp2--; cp2 >= canonb && *cp2 == L' '; cp2--) {
229		}
230		*++cp2 = '\0';
231
232			/* special processing to look for mail header lines */
233		switch (hdr_state) {
234		case off:
235			prefix(canonb);
236		case not_in_hdr:
237			/* look for an initial mail header line */
238			/* skip initial blanks */
239			for (cp = canonb; *cp == L' '; cp++) {
240			}
241			/*
242			 * Need to convert string from wchar_t to char,
243			 * since this is what ishead() expects.  Since we
244			 * only want to make sure cp points to a "From" line
245			 * of the email, we don't have to alloc
246			 * BUFSIZ * MB_LEN_MAX to cbuf.
247			 */
248			wcstombs(cbuf, cp, (BUFSIZ - 1));
249			if (ishead(cbuf)) {
250				hdr_state = in_hdr;
251				fill_hdrbuf(canonb);
252			} else {
253				/* no mail header line; process normally */
254				prefix(canonb);
255			}
256			break;
257		case in_hdr:
258			/* already saw 1st mail header line; look for more */
259			if (canonb[0] == L'\0') {
260				/*
261				 * blank line means end of mail header;
262				 * verify current mail header buffer
263				 * then process it accordingly
264				 */
265				header_chk();
266				process_hdrbuf();
267				/* now process the current blank line */
268				prefix(canonb);
269			} else
270				/*
271				 * not a blank line--save this line as
272				 * a potential mail header line
273				 */
274				fill_hdrbuf(canonb);
275			break;
276		}
277		if (c != EOF)
278			c = getwc(fi);
279	}
280	/*
281	 * end of this file--make sure we process the stuff in
282	 * hdrbuf before we're finished
283	 */
284	if (hdr_state == in_hdr) {
285		header_chk();
286		process_hdrbuf();
287	}
288}
289
290/*
291 * Take a line devoid of tabs and other garbage and determine its
292 * blank prefix.  If the indent changes, call for a linebreak.
293 * If the input line is blank, echo the blank line on the output.
294 * Finally, if the line minus the prefix is a mail header, try to keep
295 * it on a line by itself.
296 */
297
298static void
299prefix(wchar_t line[])
300{
301	wchar_t *cp;
302	int np;
303	int nosplit = 0;	/* flag set if line should not be split */
304
305	if (line[0] == L'\0') {
306		oflush();
307		putchar('\n');
308		if (crown_state != c_none)
309			crown_state = c_reset;
310		return;
311	}
312	for (cp = line; *cp == L' '; cp++) {
313	}
314	np = cp - line;
315
316	/*
317	 * The following horrible expression attempts to avoid linebreaks
318	 * when the indent changes due to a paragraph.
319	 */
320
321	if (crown_state == c_none && np != pfx && (np > pfx || abs(pfx-np) > 8))
322		oflush();
323	/*
324	 * if this is a mail header line, don't split it; flush previous
325	 * line, if any, so we don't join this line to it
326	 */
327	if (hdr_state == do_hdr) {
328		nosplit = 1;
329		oflush();
330	}
331	/* flush previous line so we don't join this one to it */
332	if (nojoin)
333		oflush();
334	/* nroff-type lines starting with '.' are not split nor joined */
335	if (!nosplit && (nosplit = (*cp == L'.')))
336		oflush();
337	pfx = np;
338	switch (crown_state) {
339	case c_reset:
340		crown_head = pfx;
341		crown_state = c_head;
342		break;
343	case c_lead:
344		crown_body = pfx;
345		crown_state = c_body;
346		break;
347	case c_fixup:
348		crown_body = pfx;
349		crown_state = c_body;
350		if (outp) {
351			wchar_t s[BUFSIZ];
352
353			*outp = L'\0';
354			wscpy(s, &outbuf[crown_head]);
355			outp = NOSTR;
356			split(s);
357		}
358		break;
359	}
360	if (nosplit) {
361		/* put whole input line onto outbuf and print it out */
362		pack(cp);
363		oflush();
364	} else
365		/*
366		 * split puts current line onto outbuf, but splits it
367		 * at word boundaries, if it exceeds desired length
368		 */
369		split(cp);
370	if (nojoin)
371		/*
372		 * flush current line so next lines, if any,
373		 * won't join to this one
374		 */
375		oflush();
376}
377
378/*
379 * Split up the passed line into output "words" which are
380 * maximal strings of non-blanks with the blank separation
381 * attached at the end.  Pass these words along to the output
382 * line packer.
383 */
384
385static void
386csplit(wchar_t line[])
387{
388	wchar_t *cp, *cp2;
389	wchar_t word[BUFSIZ];
390	static const wchar_t *srchlist = (const wchar_t *) L".:!?";
391
392	cp = line;
393	while (*cp) {
394		cp2 = word;
395
396		/*
397		 * Collect a 'word,' allowing it to contain escaped
398		 * white space.
399		 */
400
401		while (*cp && !(iswspace(*cp))) {
402			if (*cp == '\\' && iswspace(cp[1]))
403				*cp2++ = *cp++;
404			*cp2++ = *cp++;
405		}
406
407		/*
408		 * Guarantee a space at end of line.
409		 * Two spaces after end of sentence punctuation.
410		 */
411
412		if (*cp == L'\0') {
413			*cp2++ = L' ';
414			if (wschr(srchlist, cp[-1]) != NULL)
415				*cp2++ = L' ';
416		}
417		while (iswspace(*cp))
418			*cp2++ = *cp++;
419		*cp2 = L'\0';
420		pack(word);
421	}
422}
423
424static void
425msplit(wchar_t line[])
426{
427	wchar_t *cp, *cp2, prev;
428	wchar_t word[BUFSIZ];
429	static const wchar_t *srchlist = (const wchar_t *) L".:!?";
430
431	cp = line;
432	while (*cp) {
433		cp2 = word;
434		prev = *cp;
435
436		/*
437		 * Collect a 'word,' allowing it to contain escaped
438		 * white space.
439		 */
440
441		while (*cp) {
442			if (iswspace(*cp))
443				break;
444			if (_wckind(*cp) != _wckind(prev))
445				if (wcsetno(*cp) != 0 || wcsetno(prev) != 0)
446					break;
447			if (*cp == '\\' && iswspace(cp[1]))
448				*cp2++ = *cp++;
449			prev = *cp;
450			*cp2++ = *cp++;
451		}
452
453		/*
454		 * Guarantee a space at end of line.
455		 * Two spaces after end of sentence punctuation.
456		 */
457
458		if (*cp == L'\0') {
459			*cp2++ = L' ';
460			if (wschr(srchlist, cp[-1]) != NULL)
461				*cp2++ = L' ';
462		}
463		while (iswspace(*cp))
464			*cp2++ = *cp++;
465		*cp2 = L'\0';
466		pack(word);
467	}
468}
469
470/*
471 * Output section.
472 * Build up line images from the words passed in.  Prefix
473 * each line with correct number of blanks.  The buffer "outbuf"
474 * contains the current partial line image, including prefixed blanks.
475 * "outp" points to the next available space therein.  When outp is NOSTR,
476 * there ain't nothing in there yet.  At the bottom of this whole mess,
477 * leading tabs are reinserted.
478 */
479
480/*
481 * Pack a word onto the output line.  If this is the beginning of
482 * the line, push on the appropriately-sized string of blanks first.
483 * If the word won't fit on the current line, flush and begin a new
484 * line.  If the word is too long to fit all by itself on a line,
485 * just give it its own and hope for the best.
486 */
487
488static void
489pack(wchar_t word[])
490{
491	wchar_t *cp;
492	int s, t;
493
494	if (outp == NOSTR)
495		leadin();
496	t = wscol(word);
497	*outp = L'\0';
498	s = wscol(outbuf);
499	if (t+s <= width) {
500		for (cp = word; *cp; *outp++ = *cp++) {
501		}
502		return;
503	}
504	if (s > filler) {
505		oflush();
506		leadin();
507	}
508	for (cp = word; *cp; *outp++ = *cp++) {
509	}
510}
511
512/*
513 * If there is anything on the current output line, send it on
514 * its way.  Set outp to NOSTR to indicate the absence of the current
515 * line prefix.
516 */
517
518static void
519oflush(void)
520{
521	if (outp == NOSTR)
522		return;
523	*outp = L'\0';
524	tabulate(outbuf);
525	outp = NOSTR;
526}
527
528/*
529 * Take the passed line buffer, insert leading tabs where possible, and
530 * output on standard output (finally).
531 */
532
533static void
534tabulate(wchar_t line[])
535{
536	wchar_t *cp;
537	int b, t;
538
539
540	/* Toss trailing blanks in the output line */
541	cp = line + wslen(line) - 1;
542	while (cp >= line && *cp == L' ')
543		cp--;
544	*++cp = L'\0';
545	/* Count the leading blank space and tabulate */
546	for (cp = line; *cp == L' '; cp++) {
547	}
548	b = cp - line;
549	t = b >> 3;
550	b &= 07;
551	if (t > 0)
552		do
553			putc('\t', stdout);
554		while (--t) {
555		}
556	if (b > 0)
557		do
558			putc(' ', stdout);
559		while (--b) {
560		}
561	while (*cp)
562		putwc(*cp++, stdout);
563	putc('\n', stdout);
564}
565
566/*
567 * Initialize the output line with the appropriate number of
568 * leading blanks.
569 */
570
571static void
572leadin(void)
573{
574	int b;
575	wchar_t *cp;
576	int l;
577
578	switch (crown_state) {
579	case c_head:
580		l = crown_head;
581		crown_state = c_lead;
582		break;
583
584	case c_lead:
585	case c_fixup:
586		l = crown_head;
587		crown_state = c_fixup;
588		break;
589
590	case c_body:
591		l = crown_body;
592		break;
593
594	default:
595		l = pfx;
596		break;
597	}
598	filler = l;
599	for (b = 0, cp = outbuf; b < l; b++)
600		*cp++ = L' ';
601	outp = cp;
602}
603
604/*
605 * Is s1 a prefix of s2??
606 */
607
608static int
609ispref(wchar_t *s1, wchar_t *s2)
610{
611
612	while (*s1 != L'\0' && *s2 != L'\0')
613		if (*s1++ != *s2++)
614			return (0);
615	return (1);
616}
617
618/*
619 * Set an input option
620 */
621
622static int
623setopt(char *cp)
624{
625	static int ws = 0;
626
627	if (*cp == '-') {
628		if (cp[1] == 'c' && cp[2] == '\0') {
629			crown_state = c_reset;
630			return (1);
631		}
632		if (cp[1] == 's' && cp[2] == '\0') {
633			nojoin = 1;
634			return (1);
635		}
636		if (cp[1] == 'w' && cp[2] == '\0') {
637			ws++;
638			return (1);
639		}
640		width = atoi(cp+1);
641	} else if (ws) {
642		width = atoi(cp);
643		ws = 0;
644	} else
645		return (0);
646	if (width <= 0 || width >= BUFSIZ-2) {
647		fprintf(stderr, "fmt:  bad width: %d\n", width);
648		exit(1);
649	}
650	return (1);
651}
652
653
654#define	LIB_WDRESOLVE	"/usr/lib/locale/%s/LC_CTYPE/wdresolve.so"
655#define	WCHKIND		"_wdchkind_"
656
657static int	_wckind_c_locale(wchar_t);
658
659static int	(*__wckind)(wchar_t) = _wckind_c_locale;
660static void	*dlhandle = NULL;
661
662
663static void
664_wckind_init(void)
665{
666	char	*locale;
667	char	path[MAXPATHLEN + 1];
668
669
670	if (dlhandle != NULL) {
671		(void) dlclose(dlhandle);
672		dlhandle = NULL;
673	}
674
675	locale = setlocale(LC_CTYPE, NULL);
676	if (strcmp(locale, "C") == 0)
677		goto c_locale;
678
679	(void) sprintf(path, LIB_WDRESOLVE, locale);
680
681	if ((dlhandle = dlopen(path, RTLD_LAZY)) != NULL) {
682		__wckind = (int (*)(wchar_t))dlsym(dlhandle, WCHKIND);
683		if (__wckind != NULL)
684			return;
685		(void) dlclose(dlhandle);
686		dlhandle = NULL;
687	}
688
689c_locale:
690	__wckind = _wckind_c_locale;
691}
692
693
694int
695_wckind(wchar_t wc)
696{
697	return (*__wckind) (wc);
698}
699
700
701static int
702_wckind_c_locale(wchar_t wc)
703{
704	int	ret;
705
706	/*
707	 * DEPEND_ON_ANSIC: L notion for the character is new in
708	 * ANSI-C, k&r compiler won't work.
709	 */
710	if (iswascii(wc))
711		ret = (iswalnum(wc) || wc == L'_') ? 0 : 1;
712	else
713		ret = wcsetno(wc) + 1;
714
715	return (ret);
716}
717
718/*
719 * header_chk -
720 * Called when done looking for a set mail header lines.
721 * Either a blank line was seen, or EOF was reached.
722 *
723 * Verifies if current hdrbuf of potential mail header lines
724 * is really a mail header.  A mail header must be at least 2
725 * lines and more than half of them must start with one of the
726 * known mail header strings in headnames.
727 *
728 * header_chk sets hdr_state to do_hdr if hdrbuf contained a valid
729 * mail header.  Otherwise, it sets hdr_state to flush_hdr.
730 *
731 * h_lines = hdrbuf index for next line to be saved;
732 *	     also indicates current # of lines in potential header
733 */
734static void
735header_chk(void)
736{
737	wchar_t  *cp; 		/* ptr to current char of line */
738	wchar_t **hp; 		/* ptr to current char of a valid */
739				/* mail header string */
740	int	  l;		/* index */
741				/*
742				 * number of lines in hdrbuf that look
743				 * like mail header lines (start with
744				 * a known mail header prefix)
745				 */
746	int	 hdrcount = 0;
747		/* header must have at least 2 lines (h_lines > 1) */
748		if (h_lines < 2) {
749			hdr_state = flush_hdr;
750			return;
751		}
752		/*
753		 * go through each line in hdrbuf and see how many
754		 * look like mail header lines
755		 */
756		for (l = 0; l < h_lines; l++) {
757			/* skip initial blanks */
758			for (cp = hdrbuf[l]; *cp == L' '; cp++) {
759			}
760			for (hp = &headnames[0]; *hp != (wchar_t *)0; hp++)
761				if (ispref(*hp, cp)) {
762					hdrcount++;
763					break;
764				}
765		}
766		/*
767		 * if over half match, we'll assume this is a header;
768		 * set hdr_state to indicate whether to treat
769		 * these lines as mail header (do_hdr) or not (flush_hdr)
770		 */
771		if (hdrcount > h_lines / 2)
772			hdr_state = do_hdr;
773		else
774			hdr_state = flush_hdr;
775}
776
777/*
778 * fill_hdrbuf -
779 * Save given input line into next element of hdrbuf,
780 * as a potential mail header line, to be processed later
781 * once we decide whether or not the contents of hdrbuf is
782 * really a mail header, via header_chk().
783 *
784 * Does not allow hdrbuf to exceed MAXLINES lines.
785 * Dynamically allocates space for each line.  If we are unable
786 * to allocate space for the current string, stop special mail
787 * header preservation at this point and continue formatting
788 * without it.
789 */
790static void
791fill_hdrbuf(wchar_t line[])
792{
793	wchar_t *cp;	/* pointer to characters in input line */
794	int	 i;	/* index into characters a hdrbuf line */
795
796	if (h_lines >= MAXLINES) {
797		/*
798		 * if we run over MAXLINES potential mail header
799		 * lines, stop checking--this is most likely NOT a
800		 * mail header; flush out the hdrbuf, then process
801		 * the current 'line' normally.
802		 */
803		hdr_state = flush_hdr;
804		process_hdrbuf();
805		prefix(line);
806		return;
807	}
808	hdrbuf[h_lines] = (wchar_t *)malloc(sizeof (wchar_t) *
809	    (wslen(line) + 1));
810	if (hdrbuf[h_lines] == NULL) {
811		perror("malloc");
812		fprintf(stderr, "fmt: unable to do mail header preservation\n");
813		errs++;
814		/*
815		 * Can't process mail header; flush current contents
816		 * of mail header and continue with no more mail
817		 * header processing
818		 */
819		if (h_lines == 0)
820			/* hdrbuf is empty; process this line normally */
821			prefix(line);
822		else {
823			hdr_state = flush_hdr;
824			for (i = 0; i < h_lines; i++) {
825				prefix(hdrbuf[i]);
826				free(hdrbuf[i]);
827			}
828			h_lines = 0;
829		}
830		hdr_state = off;
831		return;
832	}
833	/* save this line as a potential mail header line */
834	for (i = 0, cp = line; (hdrbuf[h_lines][i] = *cp) != L'\0'; i++, cp++) {
835	}
836	h_lines++;
837}
838
839/*
840 * process_hdrbuf -
841 * Outputs the lines currently stored in hdrbuf, according
842 * to the current hdr_state value, assumed to be either do_hdr
843 * or flush_hdr.
844 * This should be called after doing a header_chk() to verify
845 * the hdrbuf and set the hdr_state flag.
846 */
847static void
848process_hdrbuf(void)
849{
850int i;
851
852	for (i = 0; i < h_lines; i++) {
853		prefix(hdrbuf[i]);
854		free(hdrbuf[i]);
855	}
856	hdr_state = not_in_hdr;
857	h_lines = 0;
858}
859