1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23 * Use is subject to license terms.
24 */
25
26/*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
27/*	  All Rights Reserved	*/
28
29
30#include <stdio.h>
31#include <stdlib.h>
32#include <ctype.h>
33#include <wctype.h>
34#include <widec.h>
35#include <dlfcn.h>
36#include <locale.h>
37#include <sys/param.h>
38#include <string.h>
39
40/*
41 * fmt -- format the concatenation of input files or standard input
42 * onto standard output.  Designed for use with Mail ~|
43 *
44 * Syntax: fmt [ -width | -w width ] [ -cs ] [ name ... ]
45 * Author: Kurt Shoens (UCB) 12/7/78
46 */
47
48#define	NOSTR	((wchar_t *)0)	/* Null string pointer for lint */
49#define	MAXLINES	100	/* maximum mail header lines to verify */
50
51wchar_t	outbuf[BUFSIZ];			/* Sandbagged output line image */
52wchar_t	*outp;				/* Pointer in above */
53int	filler;				/* Filler amount in outbuf */
54char sobuf[BUFSIZ];	/* Global buffer */
55
56int	pfx;			/* Current leading blank count */
57int	width = 72;		/* Width that we will not exceed */
58int	nojoin = 0;		/* split lines only, don't join short ones */
59int	errs = 0;		/* Current number of errors */
60
61enum crown_type	{c_none, c_reset, c_head, c_lead, c_fixup, c_body};
62enum crown_type	crown_state;	/* Crown margin state */
63int	crown_head;		/* The header offset */
64int	crown_body;		/* The body offset */
65	/* currently-known initial strings found in mail headers */
66wchar_t	*headnames[] = {
67	L"Apparently-To", L"Bcc", L"bcc", L"Cc", L"cc", L"Confirmed-By",
68	L"Content", L"content-length", L"From", L"Date", L"id",
69	L"Message-I", L"MIME-Version", L"Precedence", L"Return-Path",
70	L"Received", L"Reply-To", L"Status", L"Subject", L"To", L"X-IMAP",
71	L"X-Lines", L"X-Sender", L"X-Sun", L"X-Status", L"X-UID",
72	0};
73
74enum hdr_type {
75	off,		/* mail header processing is off */
76	not_in_hdr,	/* not currently processing a mail header */
77	in_hdr, 	/* currently filling hdrbuf with potential hdr lines */
78	flush_hdr,	/* flush hdrbuf; not a header, no special processing */
79	do_hdr		/* process hdrbuf as a mail header */
80};
81				/* current state of hdrbuf */
82enum hdr_type	hdr_state = not_in_hdr;
83
84wchar_t *hdrbuf[MAXLINES];	/* buffer to hold potential mail header lines */
85int 	h_lines;		/* index into lines of hdrbuf */
86
87void (*(split))(wchar_t []);
88extern int scrwidth(wchar_t);
89extern int ishead(char []);
90
91
92static void fill_hdrbuf(wchar_t []);
93static void header_chk(void);
94static void process_hdrbuf(void);
95static void leadin(void);
96static void tabulate(wchar_t []);
97static void oflush(void);
98static void pack(wchar_t []);
99static void msplit(wchar_t []);
100static void csplit(wchar_t []);
101static void _wckind_init(void);
102static void prefix(wchar_t []);
103static void fmt(FILE *);
104static int setopt(char *);
105int _wckind(wchar_t);
106
107/*
108 * Drive the whole formatter by managing input files.  Also,
109 * cause initialization of the output stuff and flush it out
110 * at the end.
111 */
112
113int
114main(int argc, char **argv)
115{
116	FILE *fi;
117	char *cp;
118	int nofile;
119	char *locale;
120
121	outp = NOSTR;
122	setbuf(stdout, sobuf);
123	setlocale(LC_ALL, "");
124	locale = setlocale(LC_CTYPE, "");
125	if (strcmp(locale, "C") == 0) {
126		split = csplit;
127	} else {
128		split = msplit;
129		_wckind_init();
130	}
131	if (argc < 2) {
132single:
133		fmt(stdin);
134		oflush();
135		exit(0);
136	}
137	nofile = 1;
138	while (--argc) {
139		cp = *++argv;
140		if (setopt(cp))
141			continue;
142		nofile = 0;
143		if ((fi = fopen(cp, "r")) == NULL) {
144			perror(cp);
145			errs++;
146			continue;
147		}
148		fmt(fi);
149		fclose(fi);
150	}
151	if (nofile)
152		goto single;
153	oflush();
154	fclose(stdout);
155	return (errs);
156}
157
158/*
159 * Read up characters from the passed input file, forming lines,
160 * doing ^H processing, expanding tabs, stripping trailing blanks,
161 * and sending each line down for analysis.
162 */
163
164static void
165fmt(FILE *fi)
166{
167	wchar_t linebuf[BUFSIZ], canonb[BUFSIZ];
168	wchar_t *cp, *cp2;
169	int col;
170	wchar_t	c;
171	char	cbuf[BUFSIZ];	/* stores wchar_t string as char string */
172
173	c = getwc(fi);
174	while (c != EOF) {
175		/*
176		 * Collect a line, doing ^H processing.
177		 * Leave tabs for now.
178		 */
179
180		cp = linebuf;
181		while (c != L'\n' && c != EOF && cp-linebuf < BUFSIZ-1) {
182			if (c == L'\b') {
183				if (cp > linebuf)
184					cp--;
185				c = getwc(fi);
186				continue;
187			}
188			if (!(iswprint(c)) && c != L'\t') {
189				c = getwc(fi);
190				continue;
191			}
192			*cp++ = c;
193			c = getwc(fi);
194		}
195		*cp = L'\0';
196
197		/*
198		 * Toss anything remaining on the input line.
199		 */
200
201		while (c != L'\n' && c != EOF)
202			c = getwc(fi);
203		/*
204		 * Expand tabs on the way to canonb.
205		 */
206
207		col = 0;
208		cp = linebuf;
209		cp2 = canonb;
210		while (c = *cp++) {
211			if (c != L'\t') {
212				col += scrwidth(c);
213				if (cp2-canonb < BUFSIZ-1)
214					*cp2++ = c;
215				continue;
216			}
217			do {
218				if (cp2-canonb < BUFSIZ-1)
219					*cp2++ = L' ';
220				col++;
221			} while ((col & 07) != 0);
222		}
223
224		/*
225		 * Swipe trailing blanks from the line.
226		 */
227
228		for (cp2--; cp2 >= canonb && *cp2 == L' '; cp2--) {
229		}
230		*++cp2 = '\0';
231
232			/* special processing to look for mail header lines */
233		switch (hdr_state) {
234		case off:
235			prefix(canonb);
236		case not_in_hdr:
237			/* look for an initial mail header line */
238			/* skip initial blanks */
239			for (cp = canonb; *cp == L' '; cp++) {
240			}
241			/*
242			 * Need to convert string from wchar_t to char,
243			 * since this is what ishead() expects.  Since we
244			 * only want to make sure cp points to a "From" line
245			 * of the email, we don't have to alloc
246			 * BUFSIZ * MB_LEN_MAX to cbuf.
247			 */
248			wcstombs(cbuf, cp, (BUFSIZ - 1));
249			if (ishead(cbuf)) {
250				hdr_state = in_hdr;
251				fill_hdrbuf(canonb);
252			} else {
253				/* no mail header line; process normally */
254				prefix(canonb);
255			}
256			break;
257		case in_hdr:
258			/* already saw 1st mail header line; look for more */
259			if (canonb[0] == L'\0') {
260				/*
261				 * blank line means end of mail header;
262				 * verify current mail header buffer
263				 * then process it accordingly
264				 */
265				header_chk();
266				process_hdrbuf();
267				/* now process the current blank line */
268				prefix(canonb);
269			} else
270				/*
271				 * not a blank line--save this line as
272				 * a potential mail header line
273				 */
274				fill_hdrbuf(canonb);
275			break;
276		}
277		if (c != EOF)
278			c = getwc(fi);
279	}
280	/*
281	 * end of this file--make sure we process the stuff in
282	 * hdrbuf before we're finished
283	 */
284	if (hdr_state == in_hdr) {
285		header_chk();
286		process_hdrbuf();
287	}
288}
289
290/*
291 * Take a line devoid of tabs and other garbage and determine its
292 * blank prefix.  If the indent changes, call for a linebreak.
293 * If the input line is blank, echo the blank line on the output.
294 * Finally, if the line minus the prefix is a mail header, try to keep
295 * it on a line by itself.
296 */
297
298static void
299prefix(wchar_t line[])
300{
301	wchar_t *cp;
302	int np;
303	int nosplit = 0;	/* flag set if line should not be split */
304
305	if (line[0] == L'\0') {
306		oflush();
307		putchar('\n');
308		if (crown_state != c_none)
309			crown_state = c_reset;
310		return;
311	}
312	for (cp = line; *cp == L' '; cp++) {
313	}
314	np = cp - line;
315
316	/*
317	 * The following horrible expression attempts to avoid linebreaks
318	 * when the indent changes due to a paragraph.
319	 */
320
321	if (crown_state == c_none && np != pfx && (np > pfx || abs(pfx-np) > 8))
322		oflush();
323	/*
324	 * if this is a mail header line, don't split it; flush previous
325	 * line, if any, so we don't join this line to it
326	 */
327	if (hdr_state == do_hdr) {
328		nosplit = 1;
329		oflush();
330	}
331	/* flush previous line so we don't join this one to it */
332	if (nojoin)
333		oflush();
334	/* nroff-type lines starting with '.' are not split nor joined */
335	if (!nosplit && (nosplit = (*cp == L'.')))
336		oflush();
337	pfx = np;
338	switch (crown_state) {
339	case c_reset:
340		crown_head = pfx;
341		crown_state = c_head;
342		break;
343	case c_lead:
344		crown_body = pfx;
345		crown_state = c_body;
346		break;
347	case c_fixup:
348		crown_body = pfx;
349		crown_state = c_body;
350		if (outp) {
351			wchar_t s[BUFSIZ];
352
353			*outp = L'\0';
354			wscpy(s, &outbuf[crown_head]);
355			outp = NOSTR;
356			split(s);
357		}
358		break;
359	}
360	if (nosplit) {
361		/* put whole input line onto outbuf and print it out */
362		pack(cp);
363		oflush();
364	} else
365		/*
366		 * split puts current line onto outbuf, but splits it
367		 * at word boundaries, if it exceeds desired length
368		 */
369		split(cp);
370	if (nojoin)
371		/*
372		 * flush current line so next lines, if any,
373		 * won't join to this one
374		 */
375		oflush();
376}
377
378/*
379 * Split up the passed line into output "words" which are
380 * maximal strings of non-blanks with the blank separation
381 * attached at the end.  Pass these words along to the output
382 * line packer.
383 */
384
385static void
386csplit(wchar_t line[])
387{
388	wchar_t *cp, *cp2;
389	wchar_t word[BUFSIZ];
390	static const wchar_t *srchlist = (const wchar_t *) L".:!?";
391
392	cp = line;
393	while (*cp) {
394		cp2 = word;
395
396		/*
397		 * Collect a 'word,' allowing it to contain escaped
398		 * white space.
399		 */
400
401		while (*cp && !(iswspace(*cp))) {
402			if (*cp == '\\' && iswspace(cp[1]))
403				*cp2++ = *cp++;
404			*cp2++ = *cp++;
405		}
406
407		/*
408		 * Guarantee a space at end of line.
409		 * Two spaces after end of sentence punctuation.
410		 */
411
412		if (*cp == L'\0') {
413			*cp2++ = L' ';
414			if (wschr(srchlist, cp[-1]) != NULL)
415				*cp2++ = L' ';
416		}
417		while (iswspace(*cp))
418			*cp2++ = *cp++;
419		*cp2 = L'\0';
420		pack(word);
421	}
422}
423
424static void
425msplit(wchar_t line[])
426{
427	wchar_t *cp, *cp2, prev;
428	wchar_t word[BUFSIZ];
429	static const wchar_t *srchlist = (const wchar_t *) L".:!?";
430
431	cp = line;
432	while (*cp) {
433		cp2 = word;
434		prev = *cp;
435
436		/*
437		 * Collect a 'word,' allowing it to contain escaped
438		 * white space.
439		 */
440
441		while (*cp) {
442			if (iswspace(*cp))
443				break;
444			if (_wckind(*cp) != _wckind(prev))
445				if (wcsetno(*cp) != 0 || wcsetno(prev) != 0)
446					break;
447			if (*cp == '\\' && iswspace(cp[1]))
448				*cp2++ = *cp++;
449			prev = *cp;
450			*cp2++ = *cp++;
451		}
452
453		/*
454		 * Guarantee a space at end of line.
455		 * Two spaces after end of sentence punctuation.
456		 */
457
458		if (*cp == L'\0') {
459			*cp2++ = L' ';
460			if (wschr(srchlist, cp[-1]) != NULL)
461				*cp2++ = L' ';
462		}
463		while (iswspace(*cp))
464			*cp2++ = *cp++;
465		*cp2 = L'\0';
466		pack(word);
467	}
468}
469
470/*
471 * Output section.
472 * Build up line images from the words passed in.  Prefix
473 * each line with correct number of blanks.  The buffer "outbuf"
474 * contains the current partial line image, including prefixed blanks.
475 * "outp" points to the next available space therein.  When outp is NOSTR,
476 * there ain't nothing in there yet.  At the bottom of this whole mess,
477 * leading tabs are reinserted.
478 */
479
480/*
481 * Pack a word onto the output line.  If this is the beginning of
482 * the line, push on the appropriately-sized string of blanks first.
483 * If the word won't fit on the current line, flush and begin a new
484 * line.  If the word is too long to fit all by itself on a line,
485 * just give it its own and hope for the best.
486 */
487
488static void
489pack(wchar_t word[])
490{
491	wchar_t *cp;
492	int s, t;
493
494	if (outp == NOSTR)
495		leadin();
496	t = wscol(word);
497	*outp = L'\0';
498	s = wscol(outbuf);
499	if (t+s <= width) {
500		for (cp = word; *cp; *outp++ = *cp++) {
501		}
502		return;
503	}
504	if (s > filler) {
505		oflush();
506		leadin();
507	}
508	for (cp = word; *cp; *outp++ = *cp++) {
509	}
510}
511
512/*
513 * If there is anything on the current output line, send it on
514 * its way.  Set outp to NOSTR to indicate the absence of the current
515 * line prefix.
516 */
517
518static void
519oflush(void)
520{
521	if (outp == NOSTR)
522		return;
523	*outp = L'\0';
524	tabulate(outbuf);
525	outp = NOSTR;
526}
527
528/*
529 * Take the passed line buffer, insert leading tabs where possible, and
530 * output on standard output (finally).
531 */
532
533static void
534tabulate(wchar_t line[])
535{
536	wchar_t *cp;
537	int b, t;
538
539
540	/* Toss trailing blanks in the output line */
541	cp = line + wslen(line) - 1;
542	while (cp >= line && *cp == L' ')
543		cp--;
544	*++cp = L'\0';
545	/* Count the leading blank space and tabulate */
546	for (cp = line; *cp == L' '; cp++) {
547	}
548	b = cp - line;
549	t = b >> 3;
550	b &= 07;
551	if (t > 0)
552		do {
553			putc('\t', stdout);
554		} while (--t);
555	if (b > 0)
556		do {
557			putc(' ', stdout);
558		} while (--b);
559	while (*cp)
560		putwc(*cp++, stdout);
561	putc('\n', stdout);
562}
563
564/*
565 * Initialize the output line with the appropriate number of
566 * leading blanks.
567 */
568
569static void
570leadin(void)
571{
572	int b;
573	wchar_t *cp;
574	int l;
575
576	switch (crown_state) {
577	case c_head:
578		l = crown_head;
579		crown_state = c_lead;
580		break;
581
582	case c_lead:
583	case c_fixup:
584		l = crown_head;
585		crown_state = c_fixup;
586		break;
587
588	case c_body:
589		l = crown_body;
590		break;
591
592	default:
593		l = pfx;
594		break;
595	}
596	filler = l;
597	for (b = 0, cp = outbuf; b < l; b++)
598		*cp++ = L' ';
599	outp = cp;
600}
601
602/*
603 * Is s1 a prefix of s2??
604 */
605
606static int
607ispref(wchar_t *s1, wchar_t *s2)
608{
609
610	while (*s1 != L'\0' && *s2 != L'\0')
611		if (*s1++ != *s2++)
612			return (0);
613	return (1);
614}
615
616/*
617 * Set an input option
618 */
619
620static int
621setopt(char *cp)
622{
623	static int ws = 0;
624
625	if (*cp == '-') {
626		if (cp[1] == 'c' && cp[2] == '\0') {
627			crown_state = c_reset;
628			return (1);
629		}
630		if (cp[1] == 's' && cp[2] == '\0') {
631			nojoin = 1;
632			return (1);
633		}
634		if (cp[1] == 'w' && cp[2] == '\0') {
635			ws++;
636			return (1);
637		}
638		width = atoi(cp+1);
639	} else if (ws) {
640		width = atoi(cp);
641		ws = 0;
642	} else
643		return (0);
644	if (width <= 0 || width >= BUFSIZ-2) {
645		fprintf(stderr, "fmt:  bad width: %d\n", width);
646		exit(1);
647	}
648	return (1);
649}
650
651
652#define	LIB_WDRESOLVE	"/usr/lib/locale/%s/LC_CTYPE/wdresolve.so"
653#define	WCHKIND		"_wdchkind_"
654
655static int	_wckind_c_locale(wchar_t);
656
657static int	(*__wckind)(wchar_t) = _wckind_c_locale;
658static void	*dlhandle = NULL;
659
660
661static void
662_wckind_init(void)
663{
664	char	*locale;
665	char	path[MAXPATHLEN + 1];
666
667
668	if (dlhandle != NULL) {
669		(void) dlclose(dlhandle);
670		dlhandle = NULL;
671	}
672
673	locale = setlocale(LC_CTYPE, NULL);
674	if (strcmp(locale, "C") == 0)
675		goto c_locale;
676
677	(void) sprintf(path, LIB_WDRESOLVE, locale);
678
679	if ((dlhandle = dlopen(path, RTLD_LAZY)) != NULL) {
680		__wckind = (int (*)(wchar_t))dlsym(dlhandle, WCHKIND);
681		if (__wckind != NULL)
682			return;
683		(void) dlclose(dlhandle);
684		dlhandle = NULL;
685	}
686
687c_locale:
688	__wckind = _wckind_c_locale;
689}
690
691
692int
693_wckind(wchar_t wc)
694{
695	return (*__wckind) (wc);
696}
697
698
699static int
700_wckind_c_locale(wchar_t wc)
701{
702	int	ret;
703
704	/*
705	 * DEPEND_ON_ANSIC: L notion for the character is new in
706	 * ANSI-C, k&r compiler won't work.
707	 */
708	if (iswascii(wc))
709		ret = (iswalnum(wc) || wc == L'_') ? 0 : 1;
710	else
711		ret = wcsetno(wc) + 1;
712
713	return (ret);
714}
715
716/*
717 * header_chk -
718 * Called when done looking for a set mail header lines.
719 * Either a blank line was seen, or EOF was reached.
720 *
721 * Verifies if current hdrbuf of potential mail header lines
722 * is really a mail header.  A mail header must be at least 2
723 * lines and more than half of them must start with one of the
724 * known mail header strings in headnames.
725 *
726 * header_chk sets hdr_state to do_hdr if hdrbuf contained a valid
727 * mail header.  Otherwise, it sets hdr_state to flush_hdr.
728 *
729 * h_lines = hdrbuf index for next line to be saved;
730 *	     also indicates current # of lines in potential header
731 */
732static void
733header_chk(void)
734{
735	wchar_t  *cp; 		/* ptr to current char of line */
736	wchar_t **hp; 		/* ptr to current char of a valid */
737				/* mail header string */
738	int	  l;		/* index */
739				/*
740				 * number of lines in hdrbuf that look
741				 * like mail header lines (start with
742				 * a known mail header prefix)
743				 */
744	int	 hdrcount = 0;
745		/* header must have at least 2 lines (h_lines > 1) */
746		if (h_lines < 2) {
747			hdr_state = flush_hdr;
748			return;
749		}
750		/*
751		 * go through each line in hdrbuf and see how many
752		 * look like mail header lines
753		 */
754		for (l = 0; l < h_lines; l++) {
755			/* skip initial blanks */
756			for (cp = hdrbuf[l]; *cp == L' '; cp++) {
757			}
758			for (hp = &headnames[0]; *hp != (wchar_t *)0; hp++)
759				if (ispref(*hp, cp)) {
760					hdrcount++;
761					break;
762				}
763		}
764		/*
765		 * if over half match, we'll assume this is a header;
766		 * set hdr_state to indicate whether to treat
767		 * these lines as mail header (do_hdr) or not (flush_hdr)
768		 */
769		if (hdrcount > h_lines / 2)
770			hdr_state = do_hdr;
771		else
772			hdr_state = flush_hdr;
773}
774
775/*
776 * fill_hdrbuf -
777 * Save given input line into next element of hdrbuf,
778 * as a potential mail header line, to be processed later
779 * once we decide whether or not the contents of hdrbuf is
780 * really a mail header, via header_chk().
781 *
782 * Does not allow hdrbuf to exceed MAXLINES lines.
783 * Dynamically allocates space for each line.  If we are unable
784 * to allocate space for the current string, stop special mail
785 * header preservation at this point and continue formatting
786 * without it.
787 */
788static void
789fill_hdrbuf(wchar_t line[])
790{
791	wchar_t *cp;	/* pointer to characters in input line */
792	int	 i;	/* index into characters a hdrbuf line */
793
794	if (h_lines >= MAXLINES) {
795		/*
796		 * if we run over MAXLINES potential mail header
797		 * lines, stop checking--this is most likely NOT a
798		 * mail header; flush out the hdrbuf, then process
799		 * the current 'line' normally.
800		 */
801		hdr_state = flush_hdr;
802		process_hdrbuf();
803		prefix(line);
804		return;
805	}
806	hdrbuf[h_lines] = (wchar_t *)malloc(sizeof (wchar_t) *
807	    (wslen(line) + 1));
808	if (hdrbuf[h_lines] == NULL) {
809		perror("malloc");
810		fprintf(stderr, "fmt: unable to do mail header preservation\n");
811		errs++;
812		/*
813		 * Can't process mail header; flush current contents
814		 * of mail header and continue with no more mail
815		 * header processing
816		 */
817		if (h_lines == 0)
818			/* hdrbuf is empty; process this line normally */
819			prefix(line);
820		else {
821			hdr_state = flush_hdr;
822			for (i = 0; i < h_lines; i++) {
823				prefix(hdrbuf[i]);
824				free(hdrbuf[i]);
825			}
826			h_lines = 0;
827		}
828		hdr_state = off;
829		return;
830	}
831	/* save this line as a potential mail header line */
832	for (i = 0, cp = line; (hdrbuf[h_lines][i] = *cp) != L'\0'; i++, cp++) {
833	}
834	h_lines++;
835}
836
837/*
838 * process_hdrbuf -
839 * Outputs the lines currently stored in hdrbuf, according
840 * to the current hdr_state value, assumed to be either do_hdr
841 * or flush_hdr.
842 * This should be called after doing a header_chk() to verify
843 * the hdrbuf and set the hdr_state flag.
844 */
845static void
846process_hdrbuf(void)
847{
848int i;
849
850	for (i = 0; i < h_lines; i++) {
851		prefix(hdrbuf[i]);
852		free(hdrbuf[i]);
853	}
854	hdr_state = not_in_hdr;
855	h_lines = 0;
856}
857