1/*
2 * Copyright (c) 1998-2003, 2006, 2013 Proofpoint, Inc. and its suppliers.
3 *	All rights reserved.
4 * Copyright (c) 1994, 1996-1997 Eric P. Allman.  All rights reserved.
5 * Copyright (c) 1994
6 *	The Regents of the University of California.  All rights reserved.
7 *
8 * By using this file, you agree to the terms and conditions set
9 * forth in the LICENSE file which can be found at the top level of
10 * the sendmail distribution.
11 *
12 */
13
14#include <sendmail.h>
15#include <string.h>
16
17SM_RCSID("@(#)$Id: mime.c,v 8.149 2013-11-22 20:51:56 ca Exp $")
18
19/*
20**  MIME support.
21**
22**	I am indebted to John Beck of Hewlett-Packard, who contributed
23**	his code to me for inclusion.  As it turns out, I did not use
24**	his code since he used a "minimum change" approach that used
25**	several temp files, and I wanted a "minimum impact" approach
26**	that would avoid copying.  However, looking over his code
27**	helped me cement my understanding of the problem.
28**
29**	I also looked at, but did not directly use, Nathaniel
30**	Borenstein's "code.c" module.  Again, it functioned as
31**	a file-to-file translator, which did not fit within my
32**	design bounds, but it was a useful base for understanding
33**	the problem.
34*/
35
36/* use "old" mime 7 to 8 algorithm by default */
37#ifndef MIME7TO8_OLD
38# define MIME7TO8_OLD	1
39#endif
40
41#if MIME8TO7
42static int	isboundary __P((char *, char **));
43static int	mimeboundary __P((char *, char **));
44static int	mime_getchar __P((SM_FILE_T *, char **, int *));
45static int	mime_getchar_crlf __P((SM_FILE_T *, char **, int *));
46
47/* character set for hex and base64 encoding */
48static char	Base16Code[] =	"0123456789ABCDEF";
49static char	Base64Code[] =	"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
50
51/* types of MIME boundaries */
52# define MBT_SYNTAX	0	/* syntax error */
53# define MBT_NOTSEP	1	/* not a boundary */
54# define MBT_INTERMED	2	/* intermediate boundary (no trailing --) */
55# define MBT_FINAL	3	/* final boundary (trailing -- included) */
56
57static char	*MimeBoundaryNames[] =
58{
59	"SYNTAX",	"NOTSEP",	"INTERMED",	"FINAL"
60};
61
62static bool	MapNLtoCRLF;
63
64/*
65**  MIME8TO7 -- output 8 bit body in 7 bit format
66**
67**	The header has already been output -- this has to do the
68**	8 to 7 bit conversion.  It would be easy if we didn't have
69**	to deal with nested formats (multipart/xxx and message/rfc822).
70**
71**	We won't be called if we don't have to do a conversion, and
72**	appropriate MIME-Version: and Content-Type: fields have been
73**	output.  Any Content-Transfer-Encoding: field has not been
74**	output, and we can add it here.
75**
76**	Parameters:
77**		mci -- mailer connection information.
78**		header -- the header for this body part.
79**		e -- envelope.
80**		boundaries -- the currently pending message boundaries.
81**			NULL if we are processing the outer portion.
82**		flags -- to tweak processing.
83**		level -- recursion level.
84**
85**	Returns:
86**		An indicator of what terminated the message part:
87**		  MBT_FINAL -- the final boundary
88**		  MBT_INTERMED -- an intermediate boundary
89**		  MBT_NOTSEP -- an end of file
90**		  SM_IO_EOF -- I/O error occurred
91*/
92
93struct args
94{
95	char	*a_field;	/* name of field */
96	char	*a_value;	/* value of that field */
97};
98
99int
100mime8to7(mci, header, e, boundaries, flags, level)
101	register MCI *mci;
102	HDR *header;
103	register ENVELOPE *e;
104	char **boundaries;
105	int flags;
106	int level;
107{
108	register char *p;
109	int linelen;
110	int blen;
111	int bt;
112	off_t offset;
113	size_t sectionsize, sectionhighbits;
114	int i;
115	char *type;
116	char *subtype;
117	char *cte;
118	char **pvp;
119	int argc = 0;
120	char *bp;
121	bool use_qp = false;
122	struct args argv[MAXMIMEARGS];
123	char bbuf[128];
124	char buf[MAXLINE];
125	char pvpbuf[MAXLINE];
126	extern unsigned char MimeTokenTab[256];
127
128	if (level > MAXMIMENESTING)
129	{
130		if (!bitset(EF_TOODEEP, e->e_flags))
131		{
132			if (tTd(43, 4))
133				sm_dprintf("mime8to7: too deep, level=%d\n",
134					   level);
135			usrerr("mime8to7: recursion level %d exceeded",
136				level);
137			e->e_flags |= EF_DONT_MIME|EF_TOODEEP;
138		}
139	}
140	if (tTd(43, 1))
141	{
142		sm_dprintf("mime8to7: flags = %x, boundaries =", flags);
143		if (boundaries[0] == NULL)
144			sm_dprintf(" <none>");
145		else
146		{
147			for (i = 0; boundaries[i] != NULL; i++)
148				sm_dprintf(" %s", boundaries[i]);
149		}
150		sm_dprintf("\n");
151	}
152	MapNLtoCRLF = true;
153	p = hvalue("Content-Transfer-Encoding", header);
154	if (p == NULL ||
155	    (pvp = prescan(p, '\0', pvpbuf, sizeof(pvpbuf), NULL,
156			   MimeTokenTab, false)) == NULL ||
157	    pvp[0] == NULL)
158	{
159		cte = NULL;
160	}
161	else
162	{
163		cataddr(pvp, NULL, buf, sizeof(buf), '\0', false);
164		cte = sm_rpool_strdup_x(e->e_rpool, buf);
165	}
166
167	type = subtype = NULL;
168	p = hvalue("Content-Type", header);
169	if (p == NULL)
170	{
171		if (bitset(M87F_DIGEST, flags))
172			p = "message/rfc822";
173		else
174			p = "text/plain";
175	}
176	if (p != NULL &&
177	    (pvp = prescan(p, '\0', pvpbuf, sizeof(pvpbuf), NULL,
178			   MimeTokenTab, false)) != NULL &&
179	    pvp[0] != NULL)
180	{
181		if (tTd(43, 40))
182		{
183			for (i = 0; pvp[i] != NULL; i++)
184				sm_dprintf("pvp[%d] = \"%s\"\n", i, pvp[i]);
185		}
186		type = *pvp++;
187		if (*pvp != NULL && strcmp(*pvp, "/") == 0 &&
188		    *++pvp != NULL)
189		{
190			subtype = *pvp++;
191		}
192
193		/* break out parameters */
194		while (*pvp != NULL && argc < MAXMIMEARGS)
195		{
196			/* skip to semicolon separator */
197			while (*pvp != NULL && strcmp(*pvp, ";") != 0)
198				pvp++;
199			if (*pvp++ == NULL || *pvp == NULL)
200				break;
201
202			/* complain about empty values */
203			if (strcmp(*pvp, ";") == 0)
204			{
205				usrerr("mime8to7: Empty parameter in Content-Type header");
206
207				/* avoid bounce loops */
208				e->e_flags |= EF_DONT_MIME;
209				continue;
210			}
211
212			/* extract field name */
213			argv[argc].a_field = *pvp++;
214
215			/* see if there is a value */
216			if (*pvp != NULL && strcmp(*pvp, "=") == 0 &&
217			    (*++pvp == NULL || strcmp(*pvp, ";") != 0))
218			{
219				argv[argc].a_value = *pvp;
220				argc++;
221			}
222		}
223	}
224
225	/* check for disaster cases */
226	if (type == NULL)
227		type = "-none-";
228	if (subtype == NULL)
229		subtype = "-none-";
230
231	/* don't propagate some flags more than one level into the message */
232	flags &= ~M87F_DIGEST;
233
234	/*
235	**  Check for cases that can not be encoded.
236	**
237	**	For example, you can't encode certain kinds of types
238	**	or already-encoded messages.  If we find this case,
239	**	just copy it through.
240	*/
241
242	(void) sm_snprintf(buf, sizeof(buf), "%.100s/%.100s", type, subtype);
243	if (wordinclass(buf, 'n') || (cte != NULL && !wordinclass(cte, 'e')))
244		flags |= M87F_NO8BIT;
245
246# ifdef USE_B_CLASS
247	if (wordinclass(buf, 'b') || wordinclass(type, 'b'))
248		MapNLtoCRLF = false;
249# endif
250	if (wordinclass(buf, 'q') || wordinclass(type, 'q'))
251		use_qp = true;
252
253	/*
254	**  Multipart requires special processing.
255	**
256	**	Do a recursive descent into the message.
257	*/
258
259	if (sm_strcasecmp(type, "multipart") == 0 &&
260	    (!bitset(M87F_NO8BIT, flags) || bitset(M87F_NO8TO7, flags)) &&
261	    !bitset(EF_TOODEEP, e->e_flags)
262	   )
263	{
264
265		if (sm_strcasecmp(subtype, "digest") == 0)
266			flags |= M87F_DIGEST;
267
268		for (i = 0; i < argc; i++)
269		{
270			if (sm_strcasecmp(argv[i].a_field, "boundary") == 0)
271				break;
272		}
273		if (i >= argc || argv[i].a_value == NULL)
274		{
275			usrerr("mime8to7: Content-Type: \"%s\": %s boundary",
276				i >= argc ? "missing" : "bogus", p);
277			p = "---";
278
279			/* avoid bounce loops */
280			e->e_flags |= EF_DONT_MIME;
281		}
282		else
283		{
284			p = argv[i].a_value;
285			unfoldstripquotes(p);
286		}
287		if (sm_strlcpy(bbuf, p, sizeof(bbuf)) >= sizeof(bbuf))
288		{
289			usrerr("mime8to7: multipart boundary \"%s\" too long",
290				p);
291
292			/* avoid bounce loops */
293			e->e_flags |= EF_DONT_MIME;
294		}
295
296		if (tTd(43, 1))
297			sm_dprintf("mime8to7: multipart boundary \"%s\"\n",
298				bbuf);
299		for (i = 0; i < MAXMIMENESTING; i++)
300		{
301			if (boundaries[i] == NULL)
302				break;
303		}
304		if (i >= MAXMIMENESTING)
305		{
306			if (tTd(43, 4))
307				sm_dprintf("mime8to7: too deep, i=%d\n", i);
308			if (!bitset(EF_TOODEEP, e->e_flags))
309				usrerr("mime8to7: multipart nesting boundary too deep");
310
311			/* avoid bounce loops */
312			e->e_flags |= EF_DONT_MIME|EF_TOODEEP;
313		}
314		else
315		{
316			boundaries[i] = bbuf;
317			boundaries[i + 1] = NULL;
318		}
319		mci->mci_flags |= MCIF_INMIME;
320
321		/* skip the early "comment" prologue */
322		if (!putline("", mci))
323			goto writeerr;
324		mci->mci_flags &= ~MCIF_INHEADER;
325		bt = MBT_FINAL;
326		while ((blen = sm_io_fgets(e->e_dfp, SM_TIME_DEFAULT, buf,
327					sizeof(buf))) >= 0)
328		{
329			bt = mimeboundary(buf, boundaries);
330			if (bt != MBT_NOTSEP)
331				break;
332			if (!putxline(buf, blen, mci,
333					PXLF_MAPFROM|PXLF_STRIP8BIT))
334				goto writeerr;
335			if (tTd(43, 99))
336				sm_dprintf("  ...%s", buf);
337		}
338		if (sm_io_eof(e->e_dfp))
339			bt = MBT_FINAL;
340		while (bt != MBT_FINAL)
341		{
342			auto HDR *hdr = NULL;
343
344			(void) sm_strlcpyn(buf, sizeof(buf), 2, "--", bbuf);
345			if (!putline(buf, mci))
346				goto writeerr;
347			if (tTd(43, 35))
348				sm_dprintf("  ...%s\n", buf);
349			collect(e->e_dfp, false, &hdr, e, false);
350			if (tTd(43, 101))
351				putline("+++after collect", mci);
352			if (!putheader(mci, hdr, e, flags))
353				goto writeerr;
354			if (tTd(43, 101))
355				putline("+++after putheader", mci);
356			bt = mime8to7(mci, hdr, e, boundaries, flags,
357				      level + 1);
358			if (bt == SM_IO_EOF)
359				goto writeerr;
360		}
361		(void) sm_strlcpyn(buf, sizeof(buf), 3, "--", bbuf, "--");
362		if (!putline(buf, mci))
363			goto writeerr;
364		if (tTd(43, 35))
365			sm_dprintf("  ...%s\n", buf);
366		boundaries[i] = NULL;
367		mci->mci_flags &= ~MCIF_INMIME;
368
369		/* skip the late "comment" epilogue */
370		while ((blen = sm_io_fgets(e->e_dfp, SM_TIME_DEFAULT, buf,
371					sizeof(buf))) >= 0)
372		{
373			bt = mimeboundary(buf, boundaries);
374			if (bt != MBT_NOTSEP)
375				break;
376			if (!putxline(buf, blen, mci,
377					PXLF_MAPFROM|PXLF_STRIP8BIT))
378				goto writeerr;
379			if (tTd(43, 99))
380				sm_dprintf("  ...%s", buf);
381		}
382		if (sm_io_eof(e->e_dfp))
383			bt = MBT_FINAL;
384		if (tTd(43, 3))
385			sm_dprintf("\t\t\tmime8to7=>%s (multipart)\n",
386				MimeBoundaryNames[bt]);
387		return bt;
388	}
389
390	/*
391	**  Message/xxx types -- recurse exactly once.
392	**
393	**	Class 's' is predefined to have "rfc822" only.
394	*/
395
396	if (sm_strcasecmp(type, "message") == 0)
397	{
398		if (!wordinclass(subtype, 's') ||
399		    bitset(EF_TOODEEP, e->e_flags))
400		{
401			flags |= M87F_NO8BIT;
402		}
403		else
404		{
405			auto HDR *hdr = NULL;
406
407			if (!putline("", mci))
408				goto writeerr;
409
410			mci->mci_flags |= MCIF_INMIME;
411			collect(e->e_dfp, false, &hdr, e, false);
412			if (tTd(43, 101))
413				putline("+++after collect", mci);
414			if (!putheader(mci, hdr, e, flags))
415				goto writeerr;
416			if (tTd(43, 101))
417				putline("+++after putheader", mci);
418			if (hvalue("MIME-Version", hdr) == NULL &&
419			    !bitset(M87F_NO8TO7, flags) &&
420			    !putline("MIME-Version: 1.0", mci))
421				goto writeerr;
422			bt = mime8to7(mci, hdr, e, boundaries, flags,
423				      level + 1);
424			mci->mci_flags &= ~MCIF_INMIME;
425			return bt;
426		}
427	}
428
429	/*
430	**  Non-compound body type
431	**
432	**	Compute the ratio of seven to eight bit characters;
433	**	use that as a heuristic to decide how to do the
434	**	encoding.
435	*/
436
437	sectionsize = sectionhighbits = 0;
438	if (!bitset(M87F_NO8BIT|M87F_NO8TO7, flags))
439	{
440		/* remember where we were */
441		offset = sm_io_tell(e->e_dfp, SM_TIME_DEFAULT);
442		if (offset == -1)
443			syserr("mime8to7: cannot sm_io_tell on %cf%s",
444			       DATAFL_LETTER, e->e_id);
445
446		/* do a scan of this body type to count character types */
447		while ((blen = sm_io_fgets(e->e_dfp, SM_TIME_DEFAULT, buf,
448					sizeof(buf))) >= 0)
449		{
450			if (mimeboundary(buf, boundaries) != MBT_NOTSEP)
451				break;
452			for (i = 0; i < blen; i++)
453			{
454				/* count bytes with the high bit set */
455				sectionsize++;
456				if (bitset(0200, buf[i]))
457					sectionhighbits++;
458			}
459
460			/*
461			**  Heuristic: if 1/4 of the first 4K bytes are 8-bit,
462			**  assume base64.  This heuristic avoids double-reading
463			**  large graphics or video files.
464			*/
465
466			if (sectionsize >= 4096 &&
467			    sectionhighbits > sectionsize / 4)
468				break;
469		}
470
471		/* return to the original offset for processing */
472		/* XXX use relative seeks to handle >31 bit file sizes? */
473		if (sm_io_seek(e->e_dfp, SM_TIME_DEFAULT, offset, SEEK_SET) < 0)
474			syserr("mime8to7: cannot sm_io_fseek on %cf%s",
475			       DATAFL_LETTER, e->e_id);
476		else
477			sm_io_clearerr(e->e_dfp);
478	}
479
480	/*
481	**  Heuristically determine encoding method.
482	**	If more than 1/8 of the total characters have the
483	**	eighth bit set, use base64; else use quoted-printable.
484	**	However, only encode binary encoded data as base64,
485	**	since otherwise the NL=>CRLF mapping will be a problem.
486	*/
487
488	if (tTd(43, 8))
489	{
490		sm_dprintf("mime8to7: %ld high bit(s) in %ld byte(s), cte=%s, type=%s/%s\n",
491			(long) sectionhighbits, (long) sectionsize,
492			cte == NULL ? "[none]" : cte,
493			type == NULL ? "[none]" : type,
494			subtype == NULL ? "[none]" : subtype);
495	}
496	if (cte != NULL && sm_strcasecmp(cte, "binary") == 0)
497		sectionsize = sectionhighbits;
498	linelen = 0;
499	bp = buf;
500	if (sectionhighbits == 0)
501	{
502		/* no encoding necessary */
503		if (cte != NULL &&
504		    bitset(MCIF_CVT8TO7|MCIF_CVT7TO8|MCIF_INMIME,
505			   mci->mci_flags) &&
506		    !bitset(M87F_NO8TO7, flags))
507		{
508			/*
509			**  Skip _unless_ in MIME mode and potentially
510			**  converting from 8 bit to 7 bit MIME.  See
511			**  putheader() for the counterpart where the
512			**  CTE header is skipped in the opposite
513			**  situation.
514			*/
515
516			(void) sm_snprintf(buf, sizeof(buf),
517				"Content-Transfer-Encoding: %.200s", cte);
518			if (!putline(buf, mci))
519				goto writeerr;
520			if (tTd(43, 36))
521				sm_dprintf("  ...%s\n", buf);
522		}
523		if (!putline("", mci))
524			goto writeerr;
525		mci->mci_flags &= ~MCIF_INHEADER;
526		while ((blen = sm_io_fgets(e->e_dfp, SM_TIME_DEFAULT, buf,
527					sizeof(buf))) >= 0)
528		{
529			if (!bitset(MCIF_INLONGLINE, mci->mci_flags))
530			{
531				bt = mimeboundary(buf, boundaries);
532				if (bt != MBT_NOTSEP)
533					break;
534			}
535			if (!putxline(buf, blen, mci,
536				      PXLF_MAPFROM|PXLF_NOADDEOL))
537				goto writeerr;
538		}
539		if (sm_io_eof(e->e_dfp))
540			bt = MBT_FINAL;
541	}
542	else if (!MapNLtoCRLF ||
543		 (sectionsize / 8 < sectionhighbits && !use_qp))
544	{
545		/* use base64 encoding */
546		int c1, c2;
547
548		if (tTd(43, 36))
549			sm_dprintf("  ...Content-Transfer-Encoding: base64\n");
550		if (!putline("Content-Transfer-Encoding: base64", mci))
551			goto writeerr;
552		(void) sm_snprintf(buf, sizeof(buf),
553			"X-MIME-Autoconverted: from 8bit to base64 by %s id %s",
554			MyHostName, e->e_id);
555		if (!putline(buf, mci) || !putline("", mci))
556			goto writeerr;
557		mci->mci_flags &= ~MCIF_INHEADER;
558		while ((c1 = mime_getchar_crlf(e->e_dfp, boundaries, &bt)) !=
559			SM_IO_EOF)
560		{
561			if (linelen > 71)
562			{
563				*bp = '\0';
564				if (!putline(buf, mci))
565					goto writeerr;
566				linelen = 0;
567				bp = buf;
568			}
569			linelen += 4;
570			*bp++ = Base64Code[(c1 >> 2)];
571			c1 = (c1 & 0x03) << 4;
572			c2 = mime_getchar_crlf(e->e_dfp, boundaries, &bt);
573			if (c2 == SM_IO_EOF)
574			{
575				*bp++ = Base64Code[c1];
576				*bp++ = '=';
577				*bp++ = '=';
578				break;
579			}
580			c1 |= (c2 >> 4) & 0x0f;
581			*bp++ = Base64Code[c1];
582			c1 = (c2 & 0x0f) << 2;
583			c2 = mime_getchar_crlf(e->e_dfp, boundaries, &bt);
584			if (c2 == SM_IO_EOF)
585			{
586				*bp++ = Base64Code[c1];
587				*bp++ = '=';
588				break;
589			}
590			c1 |= (c2 >> 6) & 0x03;
591			*bp++ = Base64Code[c1];
592			*bp++ = Base64Code[c2 & 0x3f];
593		}
594		*bp = '\0';
595		if (!putline(buf, mci))
596			goto writeerr;
597	}
598	else
599	{
600		/* use quoted-printable encoding */
601		int c1, c2;
602		int fromstate;
603		BITMAP256 badchars;
604
605		/* set up map of characters that must be mapped */
606		clrbitmap(badchars);
607		for (c1 = 0x00; c1 < 0x20; c1++)
608			setbitn(c1, badchars);
609		clrbitn('\t', badchars);
610		for (c1 = 0x7f; c1 < 0x100; c1++)
611			setbitn(c1, badchars);
612		setbitn('=', badchars);
613		if (bitnset(M_EBCDIC, mci->mci_mailer->m_flags))
614			for (p = "!\"#$@[\\]^`{|}~"; *p != '\0'; p++)
615				setbitn(*p, badchars);
616
617		if (tTd(43, 36))
618			sm_dprintf("  ...Content-Transfer-Encoding: quoted-printable\n");
619		if (!putline("Content-Transfer-Encoding: quoted-printable",
620				mci))
621			goto writeerr;
622		(void) sm_snprintf(buf, sizeof(buf),
623			"X-MIME-Autoconverted: from 8bit to quoted-printable by %s id %s",
624			MyHostName, e->e_id);
625		if (!putline(buf, mci) || !putline("", mci))
626			goto writeerr;
627		mci->mci_flags &= ~MCIF_INHEADER;
628		fromstate = 0;
629		c2 = '\n';
630		while ((c1 = mime_getchar(e->e_dfp, boundaries, &bt)) !=
631			SM_IO_EOF)
632		{
633			if (c1 == '\n')
634			{
635				if (c2 == ' ' || c2 == '\t')
636				{
637					*bp++ = '=';
638					*bp++ = Base16Code[(c2 >> 4) & 0x0f];
639					*bp++ = Base16Code[c2 & 0x0f];
640				}
641				if (buf[0] == '.' && bp == &buf[1])
642				{
643					buf[0] = '=';
644					*bp++ = Base16Code[('.' >> 4) & 0x0f];
645					*bp++ = Base16Code['.' & 0x0f];
646				}
647				*bp = '\0';
648				if (!putline(buf, mci))
649					goto writeerr;
650				linelen = fromstate = 0;
651				bp = buf;
652				c2 = c1;
653				continue;
654			}
655			if (c2 == ' ' && linelen == 4 && fromstate == 4 &&
656			    bitnset(M_ESCFROM, mci->mci_mailer->m_flags))
657			{
658				*bp++ = '=';
659				*bp++ = '2';
660				*bp++ = '0';
661				linelen += 3;
662			}
663			else if (c2 == ' ' || c2 == '\t')
664			{
665				*bp++ = c2;
666				linelen++;
667			}
668			if (linelen > 72 &&
669			    (linelen > 75 || c1 != '.' ||
670			     (linelen > 73 && c2 == '.')))
671			{
672				if (linelen > 73 && c2 == '.')
673					bp--;
674				else
675					c2 = '\n';
676				*bp++ = '=';
677				*bp = '\0';
678				if (!putline(buf, mci))
679					goto writeerr;
680				linelen = fromstate = 0;
681				bp = buf;
682				if (c2 == '.')
683				{
684					*bp++ = '.';
685					linelen++;
686				}
687			}
688			if (bitnset(bitidx(c1), badchars))
689			{
690				*bp++ = '=';
691				*bp++ = Base16Code[(c1 >> 4) & 0x0f];
692				*bp++ = Base16Code[c1 & 0x0f];
693				linelen += 3;
694			}
695			else if (c1 != ' ' && c1 != '\t')
696			{
697				if (linelen < 4 && c1 == "From"[linelen])
698					fromstate++;
699				*bp++ = c1;
700				linelen++;
701			}
702			c2 = c1;
703		}
704
705		/* output any saved character */
706		if (c2 == ' ' || c2 == '\t')
707		{
708			*bp++ = '=';
709			*bp++ = Base16Code[(c2 >> 4) & 0x0f];
710			*bp++ = Base16Code[c2 & 0x0f];
711			linelen += 3;
712		}
713
714		if (linelen > 0 || boundaries[0] != NULL)
715		{
716			*bp = '\0';
717			if (!putline(buf, mci))
718				goto writeerr;
719		}
720
721	}
722	if (tTd(43, 3))
723		sm_dprintf("\t\t\tmime8to7=>%s (basic)\n", MimeBoundaryNames[bt]);
724	return bt;
725
726  writeerr:
727	return SM_IO_EOF;
728}
729/*
730**  MIME_GETCHAR -- get a character for MIME processing
731**
732**	Treats boundaries as SM_IO_EOF.
733**
734**	Parameters:
735**		fp -- the input file.
736**		boundaries -- the current MIME boundaries.
737**		btp -- if the return value is SM_IO_EOF, *btp is set to
738**			the type of the boundary.
739**
740**	Returns:
741**		The next character in the input stream.
742*/
743
744static int
745mime_getchar(fp, boundaries, btp)
746	register SM_FILE_T *fp;
747	char **boundaries;
748	int *btp;
749{
750	int c;
751	static unsigned char *bp = NULL;
752	static int buflen = 0;
753	static bool atbol = true;	/* at beginning of line */
754	static int bt = MBT_SYNTAX;	/* boundary type of next SM_IO_EOF */
755	static unsigned char buf[128];	/* need not be a full line */
756	int start = 0;			/* indicates position of - in buffer */
757
758	if (buflen == 1 && *bp == '\n')
759	{
760		/* last \n in buffer may be part of next MIME boundary */
761		c = *bp;
762	}
763	else if (buflen > 0)
764	{
765		buflen--;
766		return *bp++;
767	}
768	else
769		c = sm_io_getc(fp, SM_TIME_DEFAULT);
770	bp = buf;
771	buflen = 0;
772	if (c == '\n')
773	{
774		/* might be part of a MIME boundary */
775		*bp++ = c;
776		atbol = true;
777		c = sm_io_getc(fp, SM_TIME_DEFAULT);
778		if (c == '\n')
779		{
780			(void) sm_io_ungetc(fp, SM_TIME_DEFAULT, c);
781			return c;
782		}
783		start = 1;
784	}
785	if (c != SM_IO_EOF)
786		*bp++ = c;
787	else
788		bt = MBT_FINAL;
789	if (atbol && c == '-')
790	{
791		/* check for a message boundary */
792		c = sm_io_getc(fp, SM_TIME_DEFAULT);
793		if (c != '-')
794		{
795			if (c != SM_IO_EOF)
796				*bp++ = c;
797			else
798				bt = MBT_FINAL;
799			buflen = bp - buf - 1;
800			bp = buf;
801			return *bp++;
802		}
803
804		/* got "--", now check for rest of separator */
805		*bp++ = '-';
806		while (bp < &buf[sizeof(buf) - 2] &&
807		       (c = sm_io_getc(fp, SM_TIME_DEFAULT)) != SM_IO_EOF &&
808		       c != '\n')
809		{
810			*bp++ = c;
811		}
812		*bp = '\0';	/* XXX simply cut off? */
813		bt = mimeboundary((char *) &buf[start], boundaries);
814		switch (bt)
815		{
816		  case MBT_FINAL:
817		  case MBT_INTERMED:
818			/* we have a message boundary */
819			buflen = 0;
820			*btp = bt;
821			return SM_IO_EOF;
822		}
823
824		if (bp < &buf[sizeof(buf) - 2] && c != SM_IO_EOF)
825			*bp++ = c;
826	}
827
828	atbol = c == '\n';
829	buflen = bp - buf - 1;
830	if (buflen < 0)
831	{
832		*btp = bt;
833		return SM_IO_EOF;
834	}
835	bp = buf;
836	return *bp++;
837}
838/*
839**  MIME_GETCHAR_CRLF -- do mime_getchar, but translate NL => CRLF
840**
841**	Parameters:
842**		fp -- the input file.
843**		boundaries -- the current MIME boundaries.
844**		btp -- if the return value is SM_IO_EOF, *btp is set to
845**			the type of the boundary.
846**
847**	Returns:
848**		The next character in the input stream.
849*/
850
851static int
852mime_getchar_crlf(fp, boundaries, btp)
853	register SM_FILE_T *fp;
854	char **boundaries;
855	int *btp;
856{
857	static bool sendlf = false;
858	int c;
859
860	if (sendlf)
861	{
862		sendlf = false;
863		return '\n';
864	}
865	c = mime_getchar(fp, boundaries, btp);
866	if (c == '\n' && MapNLtoCRLF)
867	{
868		sendlf = true;
869		return '\r';
870	}
871	return c;
872}
873/*
874**  MIMEBOUNDARY -- determine if this line is a MIME boundary & its type
875**
876**	Parameters:
877**		line -- the input line.
878**		boundaries -- the set of currently pending boundaries.
879**
880**	Returns:
881**		MBT_NOTSEP -- if this is not a separator line
882**		MBT_INTERMED -- if this is an intermediate separator
883**		MBT_FINAL -- if this is a final boundary
884**		MBT_SYNTAX -- if this is a boundary for the wrong
885**			enclosure -- i.e., a syntax error.
886*/
887
888static int
889mimeboundary(line, boundaries)
890	register char *line;
891	char **boundaries;
892{
893	int type = MBT_NOTSEP;
894	int i;
895	int savec;
896
897	if (line[0] != '-' || line[1] != '-' || boundaries == NULL)
898		return MBT_NOTSEP;
899	i = strlen(line);
900	if (i > 0 && line[i - 1] == '\n')
901		i--;
902
903	/* strip off trailing whitespace */
904	while (i > 0 && (line[i - 1] == ' ' || line[i - 1] == '\t'
905#if _FFR_MIME_CR_OK
906		|| line[i - 1] == '\r'
907#endif
908	       ))
909		i--;
910	savec = line[i];
911	line[i] = '\0';
912
913	if (tTd(43, 5))
914		sm_dprintf("mimeboundary: line=\"%s\"... ", line);
915
916	/* check for this as an intermediate boundary */
917	if (isboundary(&line[2], boundaries) >= 0)
918		type = MBT_INTERMED;
919	else if (i > 2 && strncmp(&line[i - 2], "--", 2) == 0)
920	{
921		/* check for a final boundary */
922		line[i - 2] = '\0';
923		if (isboundary(&line[2], boundaries) >= 0)
924			type = MBT_FINAL;
925		line[i - 2] = '-';
926	}
927
928	line[i] = savec;
929	if (tTd(43, 5))
930		sm_dprintf("%s\n", MimeBoundaryNames[type]);
931	return type;
932}
933/*
934**  DEFCHARSET -- return default character set for message
935**
936**	The first choice for character set is for the mailer
937**	corresponding to the envelope sender.  If neither that
938**	nor the global configuration file has a default character
939**	set defined, return "unknown-8bit" as recommended by
940**	RFC 1428 section 3.
941**
942**	Parameters:
943**		e -- the envelope for this message.
944**
945**	Returns:
946**		The default character set for that mailer.
947*/
948
949char *
950defcharset(e)
951	register ENVELOPE *e;
952{
953	if (e != NULL && e->e_from.q_mailer != NULL &&
954	    e->e_from.q_mailer->m_defcharset != NULL)
955		return e->e_from.q_mailer->m_defcharset;
956	if (DefaultCharSet != NULL)
957		return DefaultCharSet;
958	return "unknown-8bit";
959}
960/*
961**  ISBOUNDARY -- is a given string a currently valid boundary?
962**
963**	Parameters:
964**		line -- the current input line.
965**		boundaries -- the list of valid boundaries.
966**
967**	Returns:
968**		The index number in boundaries if the line is found.
969**		-1 -- otherwise.
970**
971*/
972
973static int
974isboundary(line, boundaries)
975	char *line;
976	char **boundaries;
977{
978	register int i;
979
980	for (i = 0; i <= MAXMIMENESTING && boundaries[i] != NULL; i++)
981	{
982		if (strcmp(line, boundaries[i]) == 0)
983			return i;
984	}
985	return -1;
986}
987#endif /* MIME8TO7 */
988
989#if MIME7TO8
990static int	mime_fromqp __P((unsigned char *, unsigned char **, int));
991
992/*
993**  MIME7TO8 -- output 7 bit encoded MIME body in 8 bit format
994**
995**  This is a hack. Supports translating the two 7-bit body-encodings
996**  (quoted-printable and base64) to 8-bit coded bodies.
997**
998**  There is not much point in supporting multipart here, as the UA
999**  will be able to deal with encoded MIME bodies if it can parse MIME
1000**  multipart messages.
1001**
1002**  Note also that we won't be called unless it is a text/plain MIME
1003**  message, encoded base64 or QP and mailer flag '9' has been defined
1004**  on mailer.
1005**
1006**  Contributed by Marius Olaffson <marius@rhi.hi.is>.
1007**
1008**	Parameters:
1009**		mci -- mailer connection information.
1010**		header -- the header for this body part.
1011**		e -- envelope.
1012**
1013**	Returns:
1014**		true iff body was written successfully
1015*/
1016
1017static char index_64[128] =
1018{
1019	-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1020	-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1021	-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,62, -1,-1,-1,63,
1022	52,53,54,55, 56,57,58,59, 60,61,-1,-1, -1,-1,-1,-1,
1023	-1, 0, 1, 2,  3, 4, 5, 6,  7, 8, 9,10, 11,12,13,14,
1024	15,16,17,18, 19,20,21,22, 23,24,25,-1, -1,-1,-1,-1,
1025	-1,26,27,28, 29,30,31,32, 33,34,35,36, 37,38,39,40,
1026	41,42,43,44, 45,46,47,48, 49,50,51,-1, -1,-1,-1,-1
1027};
1028
1029# define CHAR64(c)  (((c) < 0 || (c) > 127) ? -1 : index_64[(c)])
1030
1031bool
1032mime7to8(mci, header, e)
1033	register MCI *mci;
1034	HDR *header;
1035	register ENVELOPE *e;
1036{
1037	int pxflags, blen;
1038	register char *p;
1039	char *cte;
1040	char **pvp;
1041	unsigned char *fbufp;
1042	char buf[MAXLINE];
1043	unsigned char fbuf[MAXLINE + 1];
1044	char pvpbuf[MAXLINE];
1045	extern unsigned char MimeTokenTab[256];
1046
1047	p = hvalue("Content-Transfer-Encoding", header);
1048	if (p == NULL ||
1049	    (pvp = prescan(p, '\0', pvpbuf, sizeof(pvpbuf), NULL,
1050			   MimeTokenTab, false)) == NULL ||
1051	    pvp[0] == NULL)
1052	{
1053		/* "can't happen" -- upper level should have caught this */
1054		syserr("mime7to8: unparsable CTE %s", p == NULL ? "<NULL>" : p);
1055
1056		/* avoid bounce loops */
1057		e->e_flags |= EF_DONT_MIME;
1058
1059		/* cheap failsafe algorithm -- should work on text/plain */
1060		if (p != NULL)
1061		{
1062			(void) sm_snprintf(buf, sizeof(buf),
1063				"Content-Transfer-Encoding: %s", p);
1064			if (!putline(buf, mci))
1065				goto writeerr;
1066		}
1067		if (!putline("", mci))
1068			goto writeerr;
1069		mci->mci_flags &= ~MCIF_INHEADER;
1070		while ((blen = sm_io_fgets(e->e_dfp, SM_TIME_DEFAULT, buf,
1071					sizeof(buf))) >= 0)
1072		{
1073			if (!putxline(buf, blen, mci, PXLF_MAPFROM))
1074				goto writeerr;
1075		}
1076		return true;
1077	}
1078	cataddr(pvp, NULL, buf, sizeof(buf), '\0', false);
1079	cte = sm_rpool_strdup_x(e->e_rpool, buf);
1080
1081	mci->mci_flags |= MCIF_INHEADER;
1082	if (!putline("Content-Transfer-Encoding: 8bit", mci))
1083		goto writeerr;
1084	(void) sm_snprintf(buf, sizeof(buf),
1085		"X-MIME-Autoconverted: from %.200s to 8bit by %s id %s",
1086		cte, MyHostName, e->e_id);
1087	if (!putline(buf, mci) || !putline("", mci))
1088		goto writeerr;
1089	mci->mci_flags &= ~MCIF_INHEADER;
1090
1091	/*
1092	**  Translate body encoding to 8-bit.  Supports two types of
1093	**  encodings; "base64" and "quoted-printable". Assume qp if
1094	**  it is not base64.
1095	*/
1096
1097	pxflags = PXLF_MAPFROM;
1098	if (sm_strcasecmp(cte, "base64") == 0)
1099	{
1100		int c1, c2, c3, c4;
1101
1102		fbufp = fbuf;
1103		while ((c1 = sm_io_getc(e->e_dfp, SM_TIME_DEFAULT)) !=
1104			SM_IO_EOF)
1105		{
1106			if (SM_ISSPACE(c1))
1107				continue;
1108
1109			do
1110			{
1111				c2 = sm_io_getc(e->e_dfp, SM_TIME_DEFAULT);
1112			} while (SM_ISSPACE(c2));
1113			if (c2 == SM_IO_EOF)
1114				break;
1115
1116			do
1117			{
1118				c3 = sm_io_getc(e->e_dfp, SM_TIME_DEFAULT);
1119			} while (SM_ISSPACE(c3));
1120			if (c3 == SM_IO_EOF)
1121				break;
1122
1123			do
1124			{
1125				c4 = sm_io_getc(e->e_dfp, SM_TIME_DEFAULT);
1126			} while (SM_ISSPACE(c4));
1127			if (c4 == SM_IO_EOF)
1128				break;
1129
1130			if (c1 == '=' || c2 == '=')
1131				continue;
1132			c1 = CHAR64(c1);
1133			c2 = CHAR64(c2);
1134
1135#if MIME7TO8_OLD
1136#define CHK_EOL if (*--fbufp != '\n' || (fbufp > fbuf && *--fbufp != '\r')) \
1137			++fbufp;
1138#else /* MIME7TO8_OLD */
1139#define CHK_EOL if (*--fbufp != '\n' || (fbufp > fbuf && *--fbufp != '\r')) \
1140		{					\
1141			++fbufp;			\
1142			pxflags |= PXLF_NOADDEOL;	\
1143		}
1144#endif /* MIME7TO8_OLD */
1145
1146#define PUTLINE64	\
1147	do		\
1148	{		\
1149		if (*fbufp++ == '\n' || fbufp >= &fbuf[MAXLINE])	\
1150		{							\
1151			CHK_EOL;					\
1152			if (!putxline((char *) fbuf, fbufp - fbuf, mci, pxflags)) \
1153				goto writeerr;				\
1154			pxflags &= ~PXLF_NOADDEOL;			\
1155			fbufp = fbuf;					\
1156		}	\
1157	} while (0)
1158
1159			*fbufp = (c1 << 2) | ((c2 & 0x30) >> 4);
1160			PUTLINE64;
1161			if (c3 == '=')
1162				continue;
1163			c3 = CHAR64(c3);
1164			*fbufp = ((c2 & 0x0f) << 4) | ((c3 & 0x3c) >> 2);
1165			PUTLINE64;
1166			if (c4 == '=')
1167				continue;
1168			c4 = CHAR64(c4);
1169			*fbufp = ((c3 & 0x03) << 6) | c4;
1170			PUTLINE64;
1171		}
1172	}
1173	else
1174	{
1175		int off;
1176
1177		/* quoted-printable */
1178		pxflags |= PXLF_NOADDEOL;
1179		fbufp = fbuf;
1180		while (sm_io_fgets(e->e_dfp, SM_TIME_DEFAULT, buf,
1181				   sizeof(buf)) >= 0)
1182		{
1183			off = mime_fromqp((unsigned char *) buf, &fbufp,
1184					  &fbuf[MAXLINE] - fbufp);
1185again:
1186			if (off < -1)
1187				continue;
1188
1189			if (fbufp - fbuf > 0)
1190			{
1191				if (!putxline((char *) fbuf, fbufp - fbuf - 1,
1192						mci, pxflags))
1193					goto writeerr;
1194			}
1195			fbufp = fbuf;
1196			if (off >= 0 && buf[off] != '\0')
1197			{
1198				off = mime_fromqp((unsigned char *) (buf + off),
1199						  &fbufp,
1200						  &fbuf[MAXLINE] - fbufp);
1201				goto again;
1202			}
1203		}
1204	}
1205
1206	/* force out partial last line */
1207	if (fbufp > fbuf)
1208	{
1209		*fbufp = '\0';
1210		if (!putxline((char *) fbuf, fbufp - fbuf, mci, pxflags))
1211			goto writeerr;
1212	}
1213
1214	/*
1215	**  The decoded text may end without an EOL.  Since this function
1216	**  is only called for text/plain MIME messages, it is safe to
1217	**  add an extra one at the end just in case.  This is a hack,
1218	**  but so is auto-converting MIME in the first place.
1219	*/
1220
1221	if (!putline("", mci))
1222		goto writeerr;
1223
1224	if (tTd(43, 3))
1225		sm_dprintf("\t\t\tmime7to8 => %s to 8bit done\n", cte);
1226	return true;
1227
1228  writeerr:
1229	return false;
1230}
1231/*
1232**  The following is based on Borenstein's "codes.c" module, with simplifying
1233**  changes as we do not deal with multipart, and to do the translation in-core,
1234**  with an attempt to prevent overrun of output buffers.
1235**
1236**  What is needed here are changes to defend this code better against
1237**  bad encodings. Questionable to always return 0xFF for bad mappings.
1238*/
1239
1240static char index_hex[128] =
1241{
1242	-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1243	-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1244	-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1245	0, 1, 2, 3,  4, 5, 6, 7,  8, 9,-1,-1, -1,-1,-1,-1,
1246	-1,10,11,12, 13,14,15,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1247	-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1248	-1,10,11,12, 13,14,15,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1249	-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1
1250};
1251
1252# define HEXCHAR(c)  (((c) < 0 || (c) > 127) ? -1 : index_hex[(c)])
1253
1254/*
1255**  MIME_FROMQP -- decode quoted printable string
1256**
1257**	Parameters:
1258**		infile -- input (encoded) string
1259**		outfile -- output string
1260**		maxlen -- size of output buffer
1261**
1262**	Returns:
1263**		-2 if decoding failure
1264**		-1 if infile completely decoded into outfile
1265**		>= 0 is the position in infile decoding
1266**			reached before maxlen was reached
1267*/
1268
1269static int
1270mime_fromqp(infile, outfile, maxlen)
1271	unsigned char *infile;
1272	unsigned char **outfile;
1273	int maxlen;		/* Max # of chars allowed in outfile */
1274{
1275	int c1, c2;
1276	int nchar = 0;
1277	unsigned char *b;
1278
1279	/* decrement by one for trailing '\0', at least one other char */
1280	if (--maxlen < 1)
1281		return 0;
1282
1283	b = infile;
1284	while ((c1 = *infile++) != '\0' && nchar < maxlen)
1285	{
1286		if (c1 == '=')
1287		{
1288			if ((c1 = *infile++) == '\0')
1289				break;
1290
1291			if (c1 == '\n' || (c1 = HEXCHAR(c1)) == -1)
1292			{
1293				/* ignore it and the rest of the buffer */
1294				return -2;
1295			}
1296			else
1297			{
1298				do
1299				{
1300					if ((c2 = *infile++) == '\0')
1301					{
1302						c2 = -1;
1303						break;
1304					}
1305				} while ((c2 = HEXCHAR(c2)) == -1);
1306
1307				if (c2 == -1)
1308					break;
1309				nchar++;
1310				*(*outfile)++ = c1 << 4 | c2;
1311			}
1312		}
1313		else
1314		{
1315			nchar++;
1316			*(*outfile)++ = c1;
1317			if (c1 == '\n')
1318				break;
1319		}
1320	}
1321	*(*outfile)++ = '\0';
1322	if (nchar >= maxlen)
1323		return (infile - b - 1);
1324	return -1;
1325}
1326#endif /* MIME7TO8 */
1327