1/*	$NetBSD: mime_codecs.c,v 1.8 2009/01/18 01:29:57 lukem Exp $	*/
2
3/*-
4 * Copyright (c) 2006 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Anon Ymous.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32/*
33 * This module contains all mime related codecs.  Typically there are
34 * two versions: one operating on buffers and one operating on files.
35 * All exported routines have a "mime_" prefix.  The file oriented
36 * routines have a "mime_f" prefix replacing the "mime_" prefix of the
37 * equivalent buffer based version.
38 *
39 * The file based API should be:
40 *
41 *   mime_f<name>_{encode,decode}(FILE *in, FILE *out, void *cookie)
42 *
43 * XXX - currently this naming convention has not been adheared to.
44 *
45 * where the cookie is a generic way to pass arguments to the routine.
46 * This way these routines can be run by run_function() in mime.c.
47 *
48 * The buffer based API is not as rigid.
49 */
50
51#ifdef MIME_SUPPORT
52
53#include <sys/cdefs.h>
54#ifndef __lint__
55__RCSID("$NetBSD: mime_codecs.c,v 1.8 2009/01/18 01:29:57 lukem Exp $");
56#endif /* not __lint__ */
57
58#include <assert.h>
59#include <iconv.h>
60#include <stdio.h>
61#include <stdlib.h>
62#include <util.h>
63
64#include "def.h"
65#include "extern.h"
66#include "mime_codecs.h"
67
68
69#ifdef CHARSET_SUPPORT
70/************************************************************************
71 * Core character set conversion routines.
72 *
73 */
74
75/*
76 * Fault-tolerant iconv() function.
77 *
78 * This routine was borrowed from nail-11.25/mime.c and modified.  It
79 * tries to handle errno == EILSEQ by restarting at the next input
80 * byte (is this a good idea?).  All other errors are handled by the
81 * caller.
82 */
83PUBLIC size_t
84mime_iconv(iconv_t cd, const char **inb, size_t *inbleft, char **outb, size_t *outbleft)
85{
86	size_t sz = 0;
87
88	while ((sz = iconv(cd, inb, inbleft, outb, outbleft)) == (size_t)-1
89			&& errno == EILSEQ) {
90		if (*outbleft > 0) {
91			*(*outb)++ = '?';
92			(*outbleft)--;
93		} else {
94			**outb = '\0';
95			return E2BIG;
96		}
97		if (*inbleft > 0) {
98			(*inb)++;
99			(*inbleft)--;
100		} else {
101			**outb = '\0';
102			break;
103		}
104	}
105	return sz;
106}
107
108/*
109 * This routine was mostly borrowed from src/usr.bin/iconv/iconv.c.
110 * We don't care about the invalid character count, so don't bother
111 * with __iconv().  We do care about robustness, so call iconv_ft()
112 * above to try to recover from errors.
113 */
114#define INBUFSIZE 1024
115#define OUTBUFSIZE (INBUFSIZE * 2)
116
117PUBLIC void
118mime_ficonv(FILE *fi, FILE *fo, void *cookie)
119{
120	char inbuf[INBUFSIZE], outbuf[OUTBUFSIZE], *out;
121	const char *in;
122	size_t inbytes, outbytes, ret;
123	iconv_t cd;
124
125	/*
126	 * NOTE: iconv_t is actually a pointer typedef, so this
127	 * conversion is not what it appears to be!
128	 */
129	cd = (iconv_t)cookie;
130
131	while ((inbytes = fread(inbuf, 1, INBUFSIZE, fi)) > 0) {
132		in = inbuf;
133		while (inbytes > 0) {
134			out = outbuf;
135			outbytes = OUTBUFSIZE;
136			ret = mime_iconv(cd, &in, &inbytes, &out, &outbytes);
137			if (ret == (size_t)-1 && errno != E2BIG) {
138				if (errno != EINVAL || in == inbuf) {
139					/* XXX - what is proper here?
140					 * Just copy out the remains? */
141					(void)fprintf(fo,
142					    "\n\t[ iconv truncated message: %s ]\n\n",
143					    strerror(errno));
144					return;
145				}
146				/*
147				 * If here: errno == EINVAL && in != inbuf
148				 */
149				/* incomplete input character */
150				(void)memmove(inbuf, in, inbytes);
151				ret = fread(inbuf + inbytes, 1,
152				    INBUFSIZE - inbytes, fi);
153				if (ret == 0) {
154					if (feof(fi)) {
155						(void)fprintf(fo,
156						    "\n\t[ unexpected end of file; "
157						    "the last character is "
158						    "incomplete. ]\n\n");
159						return;
160					}
161					(void)fprintf(fo,
162					    "\n\t[ fread(): %s ]\n\n",
163					    strerror(errno));
164					return;
165				}
166				in = inbuf;
167				inbytes += ret;
168
169			}
170			if (outbytes < OUTBUFSIZE)
171				(void)fwrite(outbuf, 1, OUTBUFSIZE - outbytes, fo);
172		}
173	}
174	/* reset the shift state of the output buffer */
175	outbytes = OUTBUFSIZE;
176	out = outbuf;
177	ret = iconv(cd, NULL, NULL, &out, &outbytes);
178	if (ret == (size_t)-1) {
179		(void)fprintf(fo, "\n\t[ iconv(): %s ]\n\n",
180		    strerror(errno));
181		return;
182	}
183	if (outbytes < OUTBUFSIZE)
184		(void)fwrite(outbuf, 1, OUTBUFSIZE - outbytes, fo);
185}
186
187#endif	/* CHARSET_SUPPORT */
188
189
190
191/************************************************************************
192 * Core base64 routines
193 *
194 * Defined in sec 6.8 of RFC 2045.
195 */
196
197/*
198 * Decode a base64 buffer.
199 *
200 *   bin:  buffer to hold the decoded (binary) result (see note 1).
201 *   b64:  buffer holding the encoded (base64) source.
202 *   cnt:  number of bytes in the b64 buffer to decode (see note 2).
203 *
204 * Return: the number of bytes written to the 'bin' buffer or -1 on
205 *         error.
206 * NOTES:
207 *   1) It is the callers responsibility to ensure that bin is large
208 *      enough to hold the result.
209 *   2) The b64 buffer should always contain a multiple of 4 bytes of
210 *      data!
211 */
212PUBLIC ssize_t
213mime_b64tobin(char *bin, const char *b64, size_t cnt)
214{
215	static const signed char b64index[] = {
216		-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
217		-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
218		-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,62, -1,-1,-1,63,
219		52,53,54,55, 56,57,58,59, 60,61,-1,-1, -1,-2,-1,-1,
220		-1, 0, 1, 2,  3, 4, 5, 6,  7, 8, 9,10, 11,12,13,14,
221		15,16,17,18, 19,20,21,22, 23,24,25,-1, -1,-1,-1,-1,
222		-1,26,27,28, 29,30,31,32, 33,34,35,36, 37,38,39,40,
223		41,42,43,44, 45,46,47,48, 49,50,51,-1, -1,-1,-1,-1
224	};
225	unsigned char *p;
226	const unsigned char *q, *end;
227
228#define EQU	(unsigned)-2
229#define BAD	(unsigned)-1
230#define uchar64(c)  ((c) >= sizeof(b64index) ? BAD : (unsigned)b64index[(c)])
231
232	p = (unsigned char *)bin;
233	q = (const unsigned char *)b64;
234	for (end = q + cnt; q < end; q += 4) {
235		unsigned a = uchar64(q[0]);
236		unsigned b = uchar64(q[1]);
237		unsigned c = uchar64(q[2]);
238		unsigned d = uchar64(q[3]);
239
240		*p++ = ((a << 2) | ((b & 0x30) >> 4));
241		if (c == EQU)	{ /* got '=' */
242			if (d != EQU)
243				return -1;
244			break;
245		}
246		*p++ = (((b & 0x0f) << 4) | ((c & 0x3c) >> 2));
247		if (d == EQU) { /* got '=' */
248			break;
249		}
250		*p++ = (((c & 0x03) << 6) | d);
251
252		if (a == BAD || b == BAD || c == BAD || d == BAD)
253			return -1;
254	}
255
256#undef uchar64
257#undef EQU
258#undef BAD
259
260	return p - (unsigned char*)bin;
261}
262
263/*
264 * Encode a buffer as a base64 result.
265 *
266 *   b64:  buffer to hold the encoded (base64) result (see note).
267 *   bin:  buffer holding the binary source.
268 *   cnt:  number of bytes in the bin buffer to encode.
269 *
270 * NOTE: it is the callers responsibility to ensure that 'b64' is
271 *       large enough to hold the result.
272 */
273PUBLIC void
274mime_bintob64(char *b64, const char *bin, size_t cnt)
275{
276	static const char b64table[] =
277	    "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
278	const unsigned char *p = (const unsigned char*)bin;
279	ssize_t i;
280
281	for (i = cnt; i > 0; i -= 3) {
282		unsigned a = p[0];
283		unsigned b = p[1];
284		unsigned c = p[2];
285
286		b64[0] = b64table[a >> 2];
287		switch(i) {
288		case 1:
289			b64[1] = b64table[((a & 0x3) << 4)];
290			b64[2] = '=';
291			b64[3] = '=';
292			break;
293		case 2:
294			b64[1] = b64table[((a & 0x3) << 4) | ((b & 0xf0) >> 4)];
295			b64[2] = b64table[((b & 0xf) << 2)];
296			b64[3] = '=';
297			break;
298		default:
299			b64[1] = b64table[((a & 0x3) << 4) | ((b & 0xf0) >> 4)];
300			b64[2] = b64table[((b & 0xf) << 2) | ((c & 0xc0) >> 6)];
301			b64[3] = b64table[c & 0x3f];
302			break;
303		}
304		p   += 3;
305		b64 += 4;
306	}
307}
308
309
310#define MIME_BASE64_LINE_MAX	(4 * 19)  /* max line length is 76: see RFC2045 sec 6.8 */
311
312static void
313mime_fB64_encode(FILE *fi, FILE *fo, void *cookie __unused)
314{
315	static char b64[MIME_BASE64_LINE_MAX];
316	static char mem[3 * (MIME_BASE64_LINE_MAX / 4)];
317	size_t cnt;
318	char *cp;
319	size_t limit;
320#ifdef __lint__
321	cookie = cookie;
322#endif
323	limit = 0;
324	if ((cp = value(ENAME_MIME_B64_LINE_MAX)) != NULL)
325		limit = (size_t)atoi(cp);
326	if (limit == 0 || limit > sizeof(b64))
327		limit = sizeof(b64);
328
329	limit = 3 * roundup(limit, 4) / 4;
330	if (limit < 3)
331		limit = 3;
332
333	while ((cnt = fread(mem, sizeof(*mem), limit, fi)) > 0) {
334		mime_bintob64(b64, mem, (size_t)cnt);
335		(void)fwrite(b64, sizeof(*b64), (size_t)4 * roundup(cnt, 3) / 3, fo);
336		(void)putc('\n', fo);
337	}
338}
339
340static void
341mime_fB64_decode(FILE *fi, FILE *fo, void *add_lf)
342{
343	char *line;
344	size_t len;
345	char *buf;
346	size_t buflen;
347
348	buflen = 3 * (MIME_BASE64_LINE_MAX / 4);
349	buf = emalloc(buflen);
350
351	while ((line = fgetln(fi, &len)) != NULL) {
352		ssize_t binlen;
353		if (line[len-1] == '\n') /* forget the trailing newline */
354			len--;
355
356		/* trash trailing white space */
357		for (/*EMPTY*/; len > 0 && is_WSP(line[len-1]); len--)
358			continue;
359
360		/* skip leading white space */
361		for (/*EMPTY*/; len > 0 && is_WSP(line[0]); len--, line++)
362			continue;
363
364		if (len == 0)
365			break;
366
367		if (3 * len > 4 * buflen) {
368			buflen *= 2;
369			buf = erealloc(buf, buflen);
370		}
371
372		binlen = mime_b64tobin(buf, line, len);
373
374		if (binlen <= 0) {
375			(void)fprintf(fo, "WARN: invalid base64 encoding\n");
376			break;
377		}
378		(void)fwrite(buf, 1, (size_t)binlen, fo);
379	}
380
381	free(buf);
382
383	if (add_lf)
384		(void)fputc('\n', fo);
385}
386
387
388/************************************************************************
389 * Core quoted-printable routines.
390 *
391 * Note: the header QP routines are slightly different and burried
392 * inside mime_header.c
393 */
394
395static int
396mustquote(unsigned char *p, unsigned char *end, size_t l)
397{
398#define N	0	/* do not quote */
399#define Q	1	/* must quote */
400#define SP	2	/* white space */
401#define XF	3	/* special character 'F' - maybe quoted */
402#define XD	4	/* special character '.' - maybe quoted */
403#define EQ	Q	/* '=' must be quoted */
404#define TB	SP	/* treat '\t' as a space */
405#define NL	N	/* don't quote '\n' (NL) - XXX - quoting here breaks the line length algorithm */
406#define CR	Q	/* always quote a '\r' (CR) - it occurs only in a CRLF combo */
407
408	static const signed char quotetab[] = {
409  		 Q, Q, Q, Q,  Q, Q, Q, Q,  Q,TB,NL, Q,  Q,CR, Q, Q,
410		 Q, Q, Q, Q,  Q, Q, Q, Q,  Q, Q, Q, Q,  Q, Q, Q, Q,
411		SP, N, N, N,  N, N, N, N,  N, N, N, N,  N, N,XD, N,
412		 N, N, N, N,  N, N, N, N,  N, N, N, N,  N,EQ, N, N,
413
414		 N, N, N, N,  N, N,XF, N,  N, N, N, N,  N, N, N, N,
415		 N, N, N, N,  N, N, N, N,  N, N, N, N,  N, N, N, N,
416		 N, N, N, N,  N, N, N, N,  N, N, N, N,  N, N, N, N,
417		 N, N, N, N,  N, N, N, N,  N, N, N, N,  N, N, N, Q,
418	};
419	int flag = *p > 0x7f ? Q : quotetab[*p];
420
421	if (flag == N)
422		return 0;
423	if (flag == Q)
424		return 1;
425	if (flag == SP)
426		return p + 1 < end && p[1] == '\n'; /* trailing white space */
427
428	/* The remainder are special start-of-line cases. */
429	if (l != 0)
430		return 0;
431
432	if (flag == XF)	/* line may start with "From" */
433		return p + 4 < end && p[1] == 'r' && p[2] == 'o' && p[3] == 'm';
434
435	if (flag == XD)	/* line may consist of a single dot */
436		return p + 1 < end && p[1] == '\n';
437
438	errx(EXIT_FAILURE,
439	    "mustquote: invalid logic: *p=0x%x (%d) flag=%d, l=%zu\n",
440	    *p, *p, flag, l);
441	/* NOT REACHED */
442	return 0;	/* appease GCC */
443
444#undef N
445#undef Q
446#undef SP
447#undef XX
448#undef EQ
449#undef TB
450#undef NL
451#undef CR
452}
453
454
455#define MIME_QUOTED_LINE_MAX	76  /* QP max length: see RFC2045 sec 6.7 */
456
457static void
458fput_quoted_line(FILE *fo, char *line, size_t len, size_t limit)
459{
460	size_t l;	/* length of current output line */
461	unsigned char *beg;
462	unsigned char *end;
463	unsigned char *p;
464
465	assert(limit <= MIME_QUOTED_LINE_MAX);
466
467	beg = (unsigned char*)line;
468	end = beg + len;
469	l = 0;
470	for (p = (unsigned char*)line; p < end; p++) {
471		if (mustquote(p, end, l)) {
472			if (l + 4 > limit) {
473				(void)fputs("=\n", fo);
474				l = 0;
475			}
476			(void)fprintf(fo, "=%02X", *p);
477			l += 3;
478		}
479		else {
480			if (*p == '\n') {
481				if (p > beg && p[-1] == '\r')
482					(void)fputs("=0A=", fo);
483				l = (size_t)-1;
484			}
485			else if (l + 2 > limit) {
486				(void)fputs("=\n", fo);
487				l = 0;
488			}
489			(void)putc(*p, fo);
490			l++;
491		}
492	}
493	/*
494	 * Lines ending in a blank must escape the newline.
495	 */
496	if (len && is_WSP(p[-1]))
497		(void)fputs("=\n", fo);
498}
499
500static void
501mime_fQP_encode(FILE *fi, FILE *fo, void *cookie __unused)
502{
503	char *line;
504	size_t len;
505	char *cp;
506	size_t limit;
507
508#ifdef __lint__
509	cookie = cookie;
510#endif
511	limit = 0;
512	if ((cp = value(ENAME_MIME_QP_LINE_MAX)) != NULL)
513		limit = (size_t)atoi(cp);
514	if (limit == 0 || limit > MIME_QUOTED_LINE_MAX)
515		limit = MIME_QUOTED_LINE_MAX;
516	if (limit < 4)
517		limit = 4;
518
519	while ((line = fgetln(fi, &len)) != NULL)
520		fput_quoted_line(fo, line, len, limit);
521}
522
523static void
524mime_fQP_decode(FILE *fi, FILE *fo, void *cookie __unused)
525{
526	char *line;
527	size_t len;
528
529#ifdef __lint__
530	cookie = cookie;
531#endif
532	while ((line = fgetln(fi, &len)) != NULL) {
533		char *p;
534		char *end;
535
536		end = line + len;
537		for (p = line; p < end; p++) {
538			if (*p == '=') {
539				p++;
540				while (p < end && is_WSP(*p))
541					p++;
542				if (*p != '\n' && p + 1 < end) {
543					int c;
544					char buf[3];
545
546					buf[0] = *p++;
547					buf[1] = *p;
548					buf[2] = '\0';
549					c = (int)strtol(buf, NULL, 16);
550					(void)fputc(c, fo);
551				}
552			}
553			else
554				(void)fputc(*p, fo);
555		}
556	}
557}
558
559
560/************************************************************************
561 * Routines to select the codec by name.
562 */
563
564PUBLIC void
565mime_fio_copy(FILE *fi, FILE *fo, void *cookie __unused)
566{
567	int c;
568
569#ifdef __lint__
570	cookie = cookie;
571#endif
572	while ((c = getc(fi)) != EOF)
573		(void)putc(c, fo);
574
575	(void)fflush(fo);
576	if (ferror(fi)) {
577		warn("read");
578		rewind(fi);
579		return;
580	}
581	if (ferror(fo)) {
582		warn("write");
583		(void)Fclose(fo);
584		rewind(fi);
585		return;
586	}
587}
588
589
590static const struct transfer_encoding_s {
591	const char 	*name;
592	mime_codec_t	enc;
593	mime_codec_t	dec;
594} transfer_encoding_tbl[] = {
595	{ MIME_TRANSFER_7BIT,	mime_fio_copy,	    mime_fio_copy },
596	{ MIME_TRANSFER_8BIT, 	mime_fio_copy,	    mime_fio_copy },
597	{ MIME_TRANSFER_BINARY,	mime_fio_copy,	    mime_fio_copy },
598	{ MIME_TRANSFER_QUOTED, mime_fQP_encode,    mime_fQP_decode },
599	{ MIME_TRANSFER_BASE64, mime_fB64_encode,   mime_fB64_decode },
600	{ NULL,			NULL,		    NULL },
601};
602
603
604PUBLIC mime_codec_t
605mime_fio_encoder(const char *ename)
606{
607	const struct transfer_encoding_s *tep = NULL;
608
609	if (ename == NULL)
610		return NULL;
611
612	for (tep = transfer_encoding_tbl; tep->name; tep++)
613		if (strcasecmp(tep->name, ename) == 0)
614			break;
615	return tep->enc;
616}
617
618PUBLIC mime_codec_t
619mime_fio_decoder(const char *ename)
620{
621	const struct transfer_encoding_s *tep = NULL;
622
623	if (ename == NULL)
624		return NULL;
625
626	for (tep = transfer_encoding_tbl; tep->name; tep++)
627		if (strcasecmp(tep->name, ename) == 0)
628			break;
629	return tep->dec;
630}
631
632/*
633 * This is for use in complete.c and mime.c to get the list of
634 * encoding names without exposing the transfer_encoding_tbl[].  The
635 * first name is returned if called with a pointer to a NULL pointer.
636 * Subsequent calls with the same cookie give successive names.  A
637 * NULL return indicates the end of the list.
638 */
639PUBLIC const char *
640mime_next_encoding_name(const void **cookie)
641{
642	const struct transfer_encoding_s *tep;
643
644	tep = *cookie;
645	if (tep == NULL)
646		tep = transfer_encoding_tbl;
647
648	*cookie = tep->name ? &tep[1] : NULL;
649
650	return tep->name;
651}
652
653#endif /* MIME_SUPPORT */
654