1/*	$NetBSD: mime_state.c,v 1.3 2020/03/18 19:05:16 christos Exp $	*/
2
3/*++
4/* NAME
5/*	mime_state 3
6/* SUMMARY
7/*	MIME parser state machine
8/* SYNOPSIS
9/*	#include <mime_state.h>
10/*
11/*	MIME_STATE *mime_state_alloc(flags, head_out, head_end,
12/*					 body_out, body_end,
13/*					 err_print, context)
14/*	int	flags;
15/*	void	(*head_out)(void *ptr, int header_class,
16/*				const HEADER_OPTS *header_info,
17/*				VSTRING *buf, off_t offset);
18/*	void	(*head_end)(void *ptr);
19/*	void	(*body_out)(void *ptr, int rec_type,
20/*				const char *buf, ssize_t len,
21/*				off_t offset);
22/*	void	(*body_end)(void *ptr);
23/*	void	(*err_print)(void *ptr, int err_flag, const char *text)
24/*	void	*context;
25/*
26/*	int	mime_state_update(state, rec_type, buf, len)
27/*	MIME_STATE *state;
28/*	int	rec_type;
29/*	const char *buf;
30/*	ssize_t	len;
31/*
32/*	MIME_STATE *mime_state_free(state)
33/*	MIME_STATE *state;
34/*
35/*	const char *mime_state_error(error_code)
36/*	int	error_code;
37/*
38/*	typedef struct {
39/* .in +4
40/*		const int code;		/* internal error code */
41/*		const char *dsn;	/* RFC 3463 */
42/*		const char *text;	/* descriptive text */
43/* .in -4
44/*	} MIME_STATE_DETAIL;
45/*
46/*	const MIME_STATE_DETAIL *mime_state_detail(error_code)
47/*	int	error_code;
48/* DESCRIPTION
49/*	This module implements a one-pass MIME processor with optional
50/*	8-bit to quoted-printable conversion.
51/*
52/*	In order to fend off denial of service attacks, message headers
53/*	are truncated at or above var_header_limit bytes, message boundary
54/*	strings are truncated at var_mime_bound_len bytes, and the multipart
55/*	nesting level is limited to var_mime_maxdepth levels.
56/*
57/*	mime_state_alloc() creates a MIME state machine. The machine
58/*	is delivered in its initial state, expecting content type
59/*	text/plain, 7-bit data.
60/*
61/*	mime_state_update() updates the MIME state machine according
62/*	to the input record type and the record content.
63/*	The result value is the bit-wise OR of zero or more of the following:
64/* .IP MIME_ERR_TRUNC_HEADER
65/*	A message header was longer than var_header_limit bytes.
66/* .IP MIME_ERR_NESTING
67/*	The MIME structure was nested more than var_mime_maxdepth levels.
68/* .IP MIME_ERR_8BIT_IN_HEADER
69/*	A message header contains 8-bit data. This is always illegal.
70/* .IP MIME_ERR_8BIT_IN_7BIT_BODY
71/*	A MIME header specifies (or defaults to) 7-bit content, but the
72/*	corresponding message body or body parts contain 8-bit content.
73/* .IP MIME_ERR_ENCODING_DOMAIN
74/*	An entity of type "message" or "multipart" specifies the wrong
75/*	content transfer encoding domain, or specifies a transformation
76/*	(quoted-printable, base64) instead of a domain (7bit, 8bit,
77/*	or binary).
78/* .PP
79/*	mime_state_free() releases storage for a MIME state machine,
80/*	and conveniently returns a null pointer.
81/*
82/*	mime_state_error() returns a string representation for the
83/*	specified error code. When multiple errors are specified it
84/*	reports what it deems the most serious one.
85/*
86/*	mime_state_detail() returns a table entry with error
87/*	information for the specified error code. When multiple
88/*	errors are specified it reports what it deems the most
89/*	serious one.
90/*
91/*	Arguments:
92/* .IP body_out
93/*	The output routine for body lines. It receives unmodified input
94/*	records, or the result of 8-bit -> 7-bit conversion.
95/* .IP body_end
96/*	A null pointer, or a pointer to a routine that is called after
97/*	the last input record is processed.
98/* .IP buf
99/*	Buffer with the content of a logical or physical message record.
100/* .IP context
101/*	Caller context that is passed on to the head_out and body_out
102/*	routines.
103/* .IP enc_type
104/*	The content encoding: MIME_ENC_7BIT or MIME_ENC_8BIT.
105/* .IP err_print
106/*	Null pointer, or pointer to a function that is called with
107/*	arguments: the application context, the error type, and the
108/*	offending input. Only one instance per error type is reported.
109/* .IP flags
110/*	Special processing options. Specify the bit-wise OR of zero or
111/*	more of the following:
112/* .RS
113/* .IP MIME_OPT_DISABLE_MIME
114/*	Pay no attention to Content-* message headers, and switch to
115/*	message body state at the end of the primary message headers.
116/* .IP MIME_OPT_REPORT_TRUNC_HEADER
117/*	Report errors that set the MIME_ERR_TRUNC_HEADER error flag
118/*	(see above).
119/* .IP MIME_OPT_REPORT_8BIT_IN_HEADER
120/*	Report errors that set the MIME_ERR_8BIT_IN_HEADER error
121/*	flag (see above). This rarely stops legitimate mail.
122/* .IP MIME_OPT_REPORT_8BIT_IN_7BIT_BODY
123/*	Report errors that set the MIME_ERR_8BIT_IN_7BIT_BODY error
124/*	flag (see above). This currently breaks Majordomo mail that is
125/*	forwarded for approval, because Majordomo does not propagate
126/*	MIME type information from the enclosed message to the message
127/*	headers of the request for approval.
128/* .IP MIME_OPT_REPORT_ENCODING_DOMAIN
129/*	Report errors that set the MIME_ERR_ENCODING_DOMAIN error
130/*	flag (see above).
131/* .IP MIME_OPT_REPORT_NESTING
132/*	Report errors that set the MIME_ERR_NESTING error flag
133/*	(see above).
134/* .IP MIME_OPT_DOWNGRADE
135/*	Transform content that claims to be 8-bit into quoted-printable.
136/*	Where appropriate, update Content-Transfer-Encoding: message
137/*	headers.
138/* .RE
139/* .sp
140/*	For convenience, MIME_OPT_NONE requests no special processing.
141/* .IP header_class
142/*	Specifies where a message header is located.
143/* .RS
144/* .IP MIME_HDR_PRIMARY
145/*	In the primary message header section.
146/* .IP MIME_HDR_MULTIPART
147/*	In the header section after a multipart boundary string.
148/* .IP MIME_HDR_NESTED
149/*	At the start of a nested (e.g., message/rfc822) message.
150/* .RE
151/* .sp
152/*	For convenience, the macros MIME_HDR_FIRST and MIME_HDR_LAST
153/*	specify the range of MIME_HDR_MUMBLE macros.
154/* .sp
155/*	To find out if something is a MIME header at the beginning
156/*	of an RFC 822 message or an attached message, look at the
157/*	header_info argument.
158/* .IP header_info
159/*	Null pointer or information about the message header, see
160/*	header_opts(3).
161/* .IP head_out
162/*	The output routine that is invoked for outputting a message header.
163/*	A multi-line header is passed as one chunk of text with embedded
164/*	newlines.
165/*	It is the responsibility of the output routine to break the text
166/*	at embedded newlines, and to break up long text between newlines
167/*	into multiple output records.
168/*	Note: an output routine is explicitly allowed to modify the text.
169/* .IP head_end
170/*	A null pointer, or a pointer to a routine that is called after
171/*	the last message header in the first header block is processed.
172/* .IP len
173/*	Length of non-VSTRING input buffer.
174/* .IP offset
175/*	The offset in bytes from the start of the current block of message
176/*	headers or body lines. Line boundaries are counted as one byte.
177/* .IP rec_type
178/*	The input record type as defined in rec_type(3h). State is
179/*	updated for text records (REC_TYPE_NORM or REC_TYPE_CONT).
180/*	Some input records are stored internally in order to reconstruct
181/*	multi-line input.  Upon receipt of any non-text record type, all
182/*	stored input is flushed and the state is set to "body".
183/* .IP state
184/*	MIME parser state created with mime_state_alloc().
185/* BUGS
186/*	NOTE: when the end of headers is reached, mime_state_update()
187/*	may execute up to three call-backs before returning to the
188/*	caller: head_out(), head_end(), and body_out() or body_end().
189/*	As long as call-backs return no result, it is up to the
190/*	call-back routines to check if a previous call-back experienced
191/*	an error.
192/*
193/*	Different mail user agents treat malformed message boundary
194/*	strings in different ways. The Postfix MIME processor cannot
195/*	be bug-compatible with everything.
196/*
197/*	This module will not glue together multipart boundary strings that
198/*	span multiple input records.
199/*
200/*	This module will not glue together RFC 2231 formatted (boundary)
201/*	parameter values. RFC 2231 claims compatibility with existing
202/*	MIME processors. Splitting boundary strings is not backwards
203/*	compatible.
204/*
205/*	The "8-bit data inside 7-bit body" test is myopic. It is not aware
206/*	of any enclosing (message or multipart) encoding information.
207/*
208/*	If the input ends in data other than a hard line break, this module
209/*	will add a hard line break of its own. No line break is added to
210/*	empty input.
211/*
212/*	This code recognizes the obsolete form "headername :" but will
213/*	normalize it to the canonical form "headername:". Leaving the
214/*	obsolete form alone would cause too much trouble with existing code
215/*	that expects only the normalized form.
216/* SEE ALSO
217/*	msg(3) diagnostics interface
218/*	header_opts(3) header information lookup
219/*	RFC 822 (ARPA Internet Text Messages)
220/*	RFC 2045 (MIME: Format of internet message bodies)
221/*	RFC 2046 (MIME: Media types)
222/* DIAGNOSTICS
223/*	Fatal errors: memory allocation problem.
224/* LICENSE
225/* .ad
226/* .fi
227/*	The Secure Mailer license must be distributed with this software.
228/* HISTORY
229/* .ad
230/* .fi
231/*	This code was implemented from scratch after reading the RFC
232/*	documents. This was a relatively straightforward effort with
233/*	few if any surprises. Victor Duchovni of Morgan Stanley shared
234/*	his experiences with ambiguities in real-life MIME implementations.
235/*	Liviu Daia of the Romanian Academy shared his insights in some
236/*	of the darker corners.
237/* AUTHOR(S)
238/*	Wietse Venema
239/*	IBM T.J. Watson Research
240/*	P.O. Box 704
241/*	Yorktown Heights, NY 10598, USA
242/*
243/*	Wietse Venema
244/*	Google, Inc.
245/*	111 8th Avenue
246/*	New York, NY 10011, USA
247/*--*/
248
249/* System library. */
250
251#include <sys_defs.h>
252#include <stdarg.h>
253#include <ctype.h>
254#include <string.h>
255
256#ifdef STRCASECMP_IN_STRINGS_H
257#include <strings.h>
258#endif
259
260/* Utility library. */
261
262#include <mymalloc.h>
263#include <msg.h>
264#include <vstring.h>
265
266/* Global library. */
267
268#include <rec_type.h>
269#include <is_header.h>
270#include <header_opts.h>
271#include <mail_params.h>
272#include <header_token.h>
273#include <lex_822.h>
274#include <mime_state.h>
275
276/* Application-specific. */
277
278 /*
279  * Mime parser stack element for multipart content.
280  */
281typedef struct MIME_STACK {
282    int     def_ctype;			/* default content type */
283    int     def_stype;			/* default content subtype */
284    char   *boundary;			/* boundary string */
285    ssize_t bound_len;			/* boundary length */
286    struct MIME_STACK *next;		/* linkage */
287} MIME_STACK;
288
289 /*
290  * Mime parser state.
291  */
292#define MIME_MAX_TOKEN		3	/* tokens per attribute */
293
294struct MIME_STATE {
295
296    /*
297     * Volatile members.
298     */
299    int     curr_state;			/* header/body state */
300    int     curr_ctype;			/* last or default content type */
301    int     curr_stype;			/* last or default content subtype */
302    int     curr_encoding;		/* last or default content encoding */
303    int     curr_domain;		/* last or default encoding unit */
304    VSTRING *output_buffer;		/* headers, quoted-printable body */
305    int     prev_rec_type;		/* previous input record type */
306    int     nesting_level;		/* safety */
307    MIME_STACK *stack;			/* for composite types */
308    HEADER_TOKEN token[MIME_MAX_TOKEN];	/* header token array */
309    VSTRING *token_buffer;		/* header parser scratch buffer */
310    int     err_flags;			/* processing errors */
311    off_t   head_offset;		/* offset in header block */
312    off_t   body_offset;		/* offset in body block */
313
314    /*
315     * Static members.
316     */
317    int     static_flags;		/* static processing options */
318    MIME_STATE_HEAD_OUT head_out;	/* header output routine */
319    MIME_STATE_ANY_END head_end;	/* end of primary header routine */
320    MIME_STATE_BODY_OUT body_out;	/* body output routine */
321    MIME_STATE_ANY_END body_end;	/* end of body output routine */
322    MIME_STATE_ERR_PRINT err_print;	/* error report */
323    void   *app_context;		/* application context */
324};
325
326 /*
327  * Content types and subtypes that we care about, either because we have to,
328  * or because we want to filter out broken MIME messages.
329  */
330#define MIME_CTYPE_OTHER	0
331#define MIME_CTYPE_TEXT		1
332#define MIME_CTYPE_MESSAGE	2
333#define MIME_CTYPE_MULTIPART	3
334
335#define MIME_STYPE_OTHER	0
336#define MIME_STYPE_PLAIN	1
337#define MIME_STYPE_RFC822	2
338#define MIME_STYPE_PARTIAL	3
339#define MIME_STYPE_EXTERN_BODY	4
340#define MIME_STYPE_GLOBAL	5
341
342 /*
343  * MIME parser states. We steal from the public interface.
344  */
345#define MIME_STATE_PRIMARY	MIME_HDR_PRIMARY	/* primary headers */
346#define MIME_STATE_MULTIPART	MIME_HDR_MULTIPART	/* after --boundary */
347#define MIME_STATE_NESTED	MIME_HDR_NESTED	/* message/rfc822 */
348#define MIME_STATE_BODY		(MIME_HDR_NESTED + 1)
349
350#define SET_MIME_STATE(ptr, state, ctype, stype, encoding, domain) do { \
351	(ptr)->curr_state = (state); \
352	(ptr)->curr_ctype = (ctype); \
353	(ptr)->curr_stype = (stype); \
354	(ptr)->curr_encoding = (encoding); \
355	(ptr)->curr_domain = (domain); \
356	if ((state) == MIME_STATE_BODY) \
357	    (ptr)->body_offset = 0; \
358	else \
359	    (ptr)->head_offset = 0; \
360    } while (0)
361
362#define SET_CURR_STATE(ptr, state) do { \
363	(ptr)->curr_state = (state); \
364	if ((state) == MIME_STATE_BODY) \
365	    (ptr)->body_offset = 0; \
366	else \
367	    (ptr)->head_offset = 0; \
368    } while (0)
369
370 /*
371  * MIME encodings and domains. We intentionally use the same codes for
372  * encodings and domains, so that we can easily find out whether a content
373  * transfer encoding header specifies a domain or whether it specifies
374  * domain+encoding, which is illegal for multipart/any and message/any.
375  */
376typedef struct MIME_ENCODING {
377    const char *name;			/* external representation */
378    int     encoding;			/* internal representation */
379    int     domain;			/* subset of encoding */
380} MIME_ENCODING;
381
382#define MIME_ENC_QP		1	/* encoding + domain */
383#define MIME_ENC_BASE64		2	/* encoding + domain */
384 /* These are defined in mime_state.h as part of the external interface. */
385#ifndef MIME_ENC_7BIT
386#define MIME_ENC_7BIT		7	/* domain only */
387#define MIME_ENC_8BIT		8	/* domain only */
388#define MIME_ENC_BINARY		9	/* domain only */
389#endif
390
391static const MIME_ENCODING mime_encoding_map[] = {	/* RFC 2045 */
392    "7bit", MIME_ENC_7BIT, MIME_ENC_7BIT,	/* domain */
393    "8bit", MIME_ENC_8BIT, MIME_ENC_8BIT,	/* domain */
394    "binary", MIME_ENC_BINARY, MIME_ENC_BINARY,	/* domain */
395    "base64", MIME_ENC_BASE64, MIME_ENC_7BIT,	/* encoding */
396    "quoted-printable", MIME_ENC_QP, MIME_ENC_7BIT,	/* encoding */
397    0,
398};
399
400 /*
401  * Silly Little Macros.
402  */
403#define STR(x)		vstring_str(x)
404#define LEN(x)		VSTRING_LEN(x)
405#define END(x)		vstring_end(x)
406#define CU_CHAR_PTR(x)	((const unsigned char *) (x))
407
408#define REPORT_ERROR_LEN(state, err_type, text, len) do { \
409	if ((state->err_flags & err_type) == 0) { \
410	    if (state->err_print != 0) \
411		state->err_print(state->app_context, err_type, text, len); \
412	    state->err_flags |= err_type; \
413	} \
414    } while (0)
415
416#define REPORT_ERROR(state, err_type, text) do { \
417	const char *_text = text; \
418	ssize_t _len = strlen(text); \
419	REPORT_ERROR_LEN(state, err_type, _text, _len); \
420    } while (0)
421
422#define REPORT_ERROR_BUF(state, err_type, buf) \
423    REPORT_ERROR_LEN(state, err_type, STR(buf), LEN(buf))
424
425
426 /*
427  * Outputs and state changes are interleaved, so we must maintain separate
428  * offsets for header and body segments.
429  */
430#define HEAD_OUT(ptr, info, len) do { \
431	if ((ptr)->head_out) { \
432	    (ptr)->head_out((ptr)->app_context, (ptr)->curr_state, \
433			    (info), (ptr)->output_buffer, (ptr)->head_offset); \
434	    (ptr)->head_offset += (len) + 1; \
435	} \
436    } while(0)
437
438#define BODY_OUT(ptr, rec_type, text, len) do { \
439	if ((ptr)->body_out) { \
440	    (ptr)->body_out((ptr)->app_context, (rec_type), \
441			    (text), (len), (ptr)->body_offset); \
442	    (ptr)->body_offset += (len) + 1; \
443	} \
444    } while(0)
445
446/* mime_state_push - push boundary onto stack */
447
448static void mime_state_push(MIME_STATE *state, int def_ctype, int def_stype,
449			            const char *boundary)
450{
451    MIME_STACK *stack;
452
453    /*
454     * RFC 2046 mandates that a boundary string be up to 70 characters long.
455     * Some MTAs, including Postfix, include the fully-qualified MTA name
456     * which can be longer, so we are willing to handle boundary strings that
457     * exceed the RFC specification. We allow for message headers of up to
458     * var_header_limit characters. In order to avoid denial of service, we
459     * have to impose a configurable limit on the amount of text that we are
460     * willing to store as a boundary string. Despite this truncation way we
461     * will still correctly detect all intermediate boundaries and all the
462     * message headers that follow those boundaries.
463     */
464    state->nesting_level += 1;
465    stack = (MIME_STACK *) mymalloc(sizeof(*stack));
466    stack->def_ctype = def_ctype;
467    stack->def_stype = def_stype;
468    if ((stack->bound_len = strlen(boundary)) > var_mime_bound_len)
469	stack->bound_len = var_mime_bound_len;
470    stack->boundary = mystrndup(boundary, stack->bound_len);
471    stack->next = state->stack;
472    state->stack = stack;
473    if (msg_verbose)
474	msg_info("PUSH boundary %s", stack->boundary);
475}
476
477/* mime_state_pop - pop boundary from stack */
478
479static void mime_state_pop(MIME_STATE *state)
480{
481    MIME_STACK *stack;
482
483    if ((stack = state->stack) == 0)
484	msg_panic("mime_state_pop: there is no stack");
485    if (msg_verbose)
486	msg_info("POP boundary %s", stack->boundary);
487    state->nesting_level -= 1;
488    state->stack = stack->next;
489    myfree(stack->boundary);
490    myfree((void *) stack);
491}
492
493/* mime_state_alloc - create MIME state machine */
494
495MIME_STATE *mime_state_alloc(int flags,
496			             MIME_STATE_HEAD_OUT head_out,
497			             MIME_STATE_ANY_END head_end,
498			             MIME_STATE_BODY_OUT body_out,
499			             MIME_STATE_ANY_END body_end,
500			             MIME_STATE_ERR_PRINT err_print,
501			             void *context)
502{
503    MIME_STATE *state;
504
505    state = (MIME_STATE *) mymalloc(sizeof(*state));
506
507    /* Volatile members. */
508    state->err_flags = 0;
509    state->body_offset = 0;			/* XXX */
510    SET_MIME_STATE(state, MIME_STATE_PRIMARY,
511		   MIME_CTYPE_TEXT, MIME_STYPE_PLAIN,
512		   MIME_ENC_7BIT, MIME_ENC_7BIT);
513    state->output_buffer = vstring_alloc(100);
514    state->prev_rec_type = 0;
515    state->stack = 0;
516    state->token_buffer = vstring_alloc(1);
517    state->nesting_level = -1;			/* BC Fix 20170512 */
518
519    /* Static members. */
520    state->static_flags = flags;
521    state->head_out = head_out;
522    state->head_end = head_end;
523    state->body_out = body_out;
524    state->body_end = body_end;
525    state->err_print = err_print;
526    state->app_context = context;
527    return (state);
528}
529
530/* mime_state_free - destroy MIME state machine */
531
532MIME_STATE *mime_state_free(MIME_STATE *state)
533{
534    vstring_free(state->output_buffer);
535    while (state->stack)
536	mime_state_pop(state);
537    if (state->token_buffer)
538	vstring_free(state->token_buffer);
539    myfree((void *) state);
540    return (0);
541}
542
543/* mime_state_content_type - process content-type header */
544
545static void mime_state_content_type(MIME_STATE *state,
546				            const HEADER_OPTS *header_info)
547{
548    const char *cp;
549    ssize_t tok_count;
550    int     def_ctype;
551    int     def_stype;
552
553#define TOKEN_MATCH(tok, text) \
554    ((tok).type == HEADER_TOK_TOKEN && strcasecmp((tok).u.value, (text)) == 0)
555
556#define RFC2045_TSPECIALS	"()<>@,;:\\\"/[]?="
557
558#define PARSE_CONTENT_TYPE_HEADER(state, ptr) \
559    header_token(state->token, MIME_MAX_TOKEN, \
560	state->token_buffer, ptr, RFC2045_TSPECIALS, ';')
561
562    cp = STR(state->output_buffer) + strlen(header_info->name) + 1;
563    if ((tok_count = PARSE_CONTENT_TYPE_HEADER(state, &cp)) > 0) {
564
565	/*
566	 * text/whatever. Right now we don't really care if it is plain or
567	 * not, but we may want to recognize subtypes later, and then this
568	 * code can serve as an example.
569	 */
570	if (TOKEN_MATCH(state->token[0], "text")) {
571	    state->curr_ctype = MIME_CTYPE_TEXT;
572	    if (tok_count >= 3
573		&& state->token[1].type == '/'
574		&& TOKEN_MATCH(state->token[2], "plain"))
575		state->curr_stype = MIME_STYPE_PLAIN;
576	    else
577		state->curr_stype = MIME_STYPE_OTHER;
578	    return;
579	}
580
581	/*
582	 * message/whatever body parts start with another block of message
583	 * headers that we may want to look at. The partial and external-body
584	 * subtypes cannot be subjected to 8-bit -> 7-bit conversion, so we
585	 * must properly recognize them.
586	 */
587	if (TOKEN_MATCH(state->token[0], "message")) {
588	    state->curr_ctype = MIME_CTYPE_MESSAGE;
589	    state->curr_stype = MIME_STYPE_OTHER;
590	    if (tok_count >= 3
591		&& state->token[1].type == '/') {
592		if (TOKEN_MATCH(state->token[2], "rfc822"))
593		    state->curr_stype = MIME_STYPE_RFC822;
594		else if (TOKEN_MATCH(state->token[2], "partial"))
595		    state->curr_stype = MIME_STYPE_PARTIAL;
596		else if (TOKEN_MATCH(state->token[2], "external-body"))
597		    state->curr_stype = MIME_STYPE_EXTERN_BODY;
598		else if (TOKEN_MATCH(state->token[2], "global"))
599		    state->curr_stype = MIME_STYPE_GLOBAL;
600	    }
601	    return;
602	}
603
604	/*
605	 * multipart/digest has default content type message/rfc822,
606	 * multipart/whatever has default content type text/plain.
607	 */
608	if (TOKEN_MATCH(state->token[0], "multipart")) {
609	    state->curr_ctype = MIME_CTYPE_MULTIPART;
610	    if (tok_count >= 3
611		&& state->token[1].type == '/'
612		&& TOKEN_MATCH(state->token[2], "digest")) {
613		def_ctype = MIME_CTYPE_MESSAGE;
614		def_stype = MIME_STYPE_RFC822;
615	    } else {
616		def_ctype = MIME_CTYPE_TEXT;
617		def_stype = MIME_STYPE_PLAIN;
618	    }
619
620	    /*
621	     * Yes, this is supposed to capture multiple boundary strings,
622	     * which are illegal and which could be used to hide content in
623	     * an implementation dependent manner. The code below allows us
624	     * to find embedded message headers as long as the sender uses
625	     * only one of these same-level boundary strings.
626	     *
627	     * Yes, this is supposed to ignore the boundary value type.
628	     */
629	    while ((tok_count = PARSE_CONTENT_TYPE_HEADER(state, &cp)) >= 0) {
630		if (tok_count >= 3
631		    && TOKEN_MATCH(state->token[0], "boundary")
632		    && state->token[1].type == '=') {
633		    if (state->nesting_level > var_mime_maxdepth) {
634			if (state->static_flags & MIME_OPT_REPORT_NESTING)
635			    REPORT_ERROR_BUF(state, MIME_ERR_NESTING,
636					     state->output_buffer);
637		    } else {
638			mime_state_push(state, def_ctype, def_stype,
639					state->token[2].u.value);
640		    }
641		}
642	    }
643	}
644	return;
645    }
646
647    /*
648     * other/whatever.
649     */
650    else {
651	state->curr_ctype = MIME_CTYPE_OTHER;
652	return;
653    }
654}
655
656/* mime_state_content_encoding - process content-transfer-encoding header */
657
658static void mime_state_content_encoding(MIME_STATE *state,
659				             const HEADER_OPTS *header_info)
660{
661    const char *cp;
662    const MIME_ENCODING *cmp;
663
664#define PARSE_CONTENT_ENCODING_HEADER(state, ptr) \
665    header_token(state->token, 1, state->token_buffer, ptr, (char *) 0, 0)
666
667    /*
668     * Do content-transfer-encoding header. Never set the encoding domain to
669     * something other than 7bit, 8bit or binary, even if we don't recognize
670     * the input.
671     */
672    cp = STR(state->output_buffer) + strlen(header_info->name) + 1;
673    if (PARSE_CONTENT_ENCODING_HEADER(state, &cp) > 0
674	&& state->token[0].type == HEADER_TOK_TOKEN) {
675	for (cmp = mime_encoding_map; cmp->name != 0; cmp++) {
676	    if (strcasecmp(state->token[0].u.value, cmp->name) == 0) {
677		state->curr_encoding = cmp->encoding;
678		state->curr_domain = cmp->domain;
679		break;
680	    }
681	}
682    }
683}
684
685/* mime_state_enc_name - encoding to printable form */
686
687static const char *mime_state_enc_name(int encoding)
688{
689    const MIME_ENCODING *cmp;
690
691    for (cmp = mime_encoding_map; cmp->name != 0; cmp++)
692	if (encoding == cmp->encoding)
693	    return (cmp->name);
694    return ("unknown");
695}
696
697/* mime_state_downgrade - convert 8-bit data to quoted-printable */
698
699static void mime_state_downgrade(MIME_STATE *state, int rec_type,
700				         const char *text, ssize_t len)
701{
702    static char hexchars[] = "0123456789ABCDEF";
703    const unsigned char *cp;
704    int     ch;
705
706#define QP_ENCODE(buffer, ch) { \
707	VSTRING_ADDCH(buffer, '='); \
708	VSTRING_ADDCH(buffer, hexchars[(ch >> 4) & 0xff]); \
709	VSTRING_ADDCH(buffer, hexchars[ch & 0xf]); \
710    }
711
712    /*
713     * Insert a soft line break when the output reaches a critical length
714     * before we reach a hard line break.
715     */
716    for (cp = CU_CHAR_PTR(text); cp < CU_CHAR_PTR(text + len); cp++) {
717	/* Critical length before hard line break. */
718	if (LEN(state->output_buffer) > 72) {
719	    VSTRING_ADDCH(state->output_buffer, '=');
720	    VSTRING_TERMINATE(state->output_buffer);
721	    BODY_OUT(state, REC_TYPE_NORM,
722		     STR(state->output_buffer),
723		     LEN(state->output_buffer));
724	    VSTRING_RESET(state->output_buffer);
725	}
726	/* Append the next character. */
727	ch = *cp;
728	if ((ch < 32 && ch != '\t') || ch == '=' || ch > 126) {
729	    QP_ENCODE(state->output_buffer, ch);
730	} else {
731	    VSTRING_ADDCH(state->output_buffer, ch);
732	}
733    }
734
735    /*
736     * Flush output after a hard line break (i.e. the end of a REC_TYPE_NORM
737     * record). Fix trailing whitespace as per the RFC: in the worst case,
738     * the output length will grow from 73 characters to 75 characters.
739     */
740    if (rec_type == REC_TYPE_NORM) {
741	if (LEN(state->output_buffer) > 0
742	    && ((ch = END(state->output_buffer)[-1]) == ' ' || ch == '\t')) {
743	    vstring_truncate(state->output_buffer,
744			     LEN(state->output_buffer) - 1);
745	    QP_ENCODE(state->output_buffer, ch);
746	}
747	VSTRING_TERMINATE(state->output_buffer);
748	BODY_OUT(state, REC_TYPE_NORM,
749		 STR(state->output_buffer),
750		 LEN(state->output_buffer));
751	VSTRING_RESET(state->output_buffer);
752    }
753}
754
755/* mime_state_update - update MIME state machine */
756
757int     mime_state_update(MIME_STATE *state, int rec_type,
758			          const char *text, ssize_t len)
759{
760    int     input_is_text = (rec_type == REC_TYPE_NORM
761			     || rec_type == REC_TYPE_CONT);
762    MIME_STACK *sp;
763    const HEADER_OPTS *header_info;
764    const unsigned char *cp;
765
766#define SAVE_PREV_REC_TYPE_AND_RETURN_ERR_FLAGS(state, rec_type) do { \
767	state->prev_rec_type = rec_type; \
768	return (state->err_flags); \
769    } while (0)
770
771    /*
772     * Be sure to flush any partial output line that might still be buffered
773     * up before taking any other "end of input" actions.
774     */
775    if (!input_is_text && state->prev_rec_type == REC_TYPE_CONT)
776	mime_state_update(state, REC_TYPE_NORM, "", 0);
777
778    /*
779     * This message state machine is kept simple for the sake of robustness.
780     * Standards evolve over time, and we want to be able to correctly
781     * process messages that are not yet defined. This state machine knows
782     * about headers and bodies, understands that multipart/whatever has
783     * multiple body parts with a header and body, and that message/whatever
784     * has message headers at the start of a body part.
785     */
786    switch (state->curr_state) {
787
788	/*
789	 * First, deal with header information that we have accumulated from
790	 * previous input records. Discard text that does not fit in a header
791	 * buffer. Our limit is quite generous; Sendmail will refuse mail
792	 * with only 32kbyte in all the message headers combined.
793	 */
794    case MIME_STATE_PRIMARY:
795    case MIME_STATE_MULTIPART:
796    case MIME_STATE_NESTED:
797	if (LEN(state->output_buffer) > 0) {
798	    if (input_is_text) {
799		if (state->prev_rec_type == REC_TYPE_CONT) {
800		    if (LEN(state->output_buffer) < var_header_limit) {
801			vstring_strncat(state->output_buffer, text, len);
802		    } else {
803			if (state->static_flags & MIME_OPT_REPORT_TRUNC_HEADER)
804			    REPORT_ERROR_BUF(state, MIME_ERR_TRUNC_HEADER,
805					     state->output_buffer);
806		    }
807		    SAVE_PREV_REC_TYPE_AND_RETURN_ERR_FLAGS(state, rec_type);
808		}
809		if (IS_SPACE_TAB(*text)) {
810		    if (LEN(state->output_buffer) < var_header_limit) {
811			vstring_strcat(state->output_buffer, "\n");
812			vstring_strncat(state->output_buffer, text, len);
813		    } else {
814			if (state->static_flags & MIME_OPT_REPORT_TRUNC_HEADER)
815			    REPORT_ERROR_BUF(state, MIME_ERR_TRUNC_HEADER,
816					     state->output_buffer);
817		    }
818		    SAVE_PREV_REC_TYPE_AND_RETURN_ERR_FLAGS(state, rec_type);
819		}
820	    }
821
822	    /*
823	     * The input is (the beginning of) another message header, or is
824	     * not a message header, or is not even a text record. With no
825	     * more input to append to this saved header, do output
826	     * processing and reset the saved header buffer. Hold on to the
827	     * content transfer encoding header if we have to do a 8->7
828	     * transformation, because the proper information depends on the
829	     * content type header: message and multipart require a domain,
830	     * leaf entities have either a transformation or a domain.
831	     */
832	    if (LEN(state->output_buffer) > 0) {
833		header_info = header_opts_find(STR(state->output_buffer));
834		if (!(state->static_flags & MIME_OPT_DISABLE_MIME)
835		    && header_info != 0) {
836		    if (header_info->type == HDR_CONTENT_TYPE)
837			mime_state_content_type(state, header_info);
838		    if (header_info->type == HDR_CONTENT_TRANSFER_ENCODING)
839			mime_state_content_encoding(state, header_info);
840		}
841		if ((state->static_flags & MIME_OPT_REPORT_8BIT_IN_HEADER) != 0
842		    && (state->err_flags & MIME_ERR_8BIT_IN_HEADER) == 0) {
843		    for (cp = CU_CHAR_PTR(STR(state->output_buffer));
844			 cp < CU_CHAR_PTR(END(state->output_buffer)); cp++)
845			if (*cp & 0200) {
846			    REPORT_ERROR_BUF(state, MIME_ERR_8BIT_IN_HEADER,
847					     state->output_buffer);
848			    break;
849			}
850		}
851		/* Output routine is explicitly allowed to change the data. */
852		if (header_info == 0
853		    || header_info->type != HDR_CONTENT_TRANSFER_ENCODING
854		    || (state->static_flags & MIME_OPT_DOWNGRADE) == 0
855		    || state->curr_domain == MIME_ENC_7BIT)
856		    HEAD_OUT(state, header_info, len);
857		state->prev_rec_type = 0;
858		VSTRING_RESET(state->output_buffer);
859	    }
860	}
861
862	/*
863	 * With past header information moved out of the way, proceed with a
864	 * clean slate.
865	 */
866	if (input_is_text) {
867	    ssize_t header_len;
868
869	    /*
870	     * See if this input is (the beginning of) a message header.
871	     *
872	     * Normalize obsolete "name space colon" syntax to "name colon".
873	     * Things would be too confusing otherwise.
874	     *
875	     * Don't assume that the input is null terminated.
876	     */
877	    if ((header_len = is_header_buf(text, len)) > 0) {
878		vstring_strncpy(state->output_buffer, text, header_len);
879		for (text += header_len, len -= header_len;
880		     len > 0 && IS_SPACE_TAB(*text);
881		     text++, len--)
882		     /* void */ ;
883		vstring_strncat(state->output_buffer, text, len);
884		SAVE_PREV_REC_TYPE_AND_RETURN_ERR_FLAGS(state, rec_type);
885	    }
886	}
887
888	/*
889	 * This input terminates a block of message headers. When converting
890	 * 8-bit to 7-bit mail, this is the right place to emit the correct
891	 * content-transfer-encoding header. With message or multipart we
892	 * specify 7bit, with leaf entities we specify quoted-printable.
893	 *
894	 * We're not going to convert non-text data into base 64. If they send
895	 * arbitrary binary data as 8-bit text, then the data is already
896	 * broken beyond recovery, because the Postfix SMTP server sanitizes
897	 * record boundaries, treating broken record boundaries as CRLF.
898	 *
899	 * Clear the output buffer, we will need it for storage of the
900	 * conversion result.
901	 */
902	if ((state->static_flags & MIME_OPT_DOWNGRADE)
903	    && state->curr_domain != MIME_ENC_7BIT) {
904	    if ((state->curr_ctype == MIME_CTYPE_MESSAGE
905		 && state->curr_stype != MIME_STYPE_GLOBAL)
906		|| state->curr_ctype == MIME_CTYPE_MULTIPART)
907		cp = CU_CHAR_PTR("7bit");
908	    else
909		cp = CU_CHAR_PTR("quoted-printable");
910	    vstring_sprintf(state->output_buffer,
911			    "Content-Transfer-Encoding: %s", cp);
912	    HEAD_OUT(state, (HEADER_OPTS *) 0, len);
913	    VSTRING_RESET(state->output_buffer);
914	}
915
916	/*
917	 * This input terminates a block of message headers. Call the
918	 * optional header end routine at the end of the first header block.
919	 */
920	if (state->curr_state == MIME_STATE_PRIMARY && state->head_end)
921	    state->head_end(state->app_context);
922
923	/*
924	 * This is the right place to check if the sender specified an
925	 * appropriate identity encoding (7bit, 8bit, binary) for multipart
926	 * and for message.
927	 */
928	if (state->static_flags & MIME_OPT_REPORT_ENCODING_DOMAIN) {
929	    if (state->curr_ctype == MIME_CTYPE_MESSAGE) {
930		if (state->curr_stype == MIME_STYPE_PARTIAL
931		    || state->curr_stype == MIME_STYPE_EXTERN_BODY) {
932		    if (state->curr_domain != MIME_ENC_7BIT)
933			REPORT_ERROR(state, MIME_ERR_ENCODING_DOMAIN,
934				 mime_state_enc_name(state->curr_encoding));
935		}
936		/* EAI: message/global allows non-identity encoding. */
937		else if (state->curr_stype == MIME_STYPE_RFC822) {
938		    if (state->curr_encoding != state->curr_domain)
939			REPORT_ERROR(state, MIME_ERR_ENCODING_DOMAIN,
940				 mime_state_enc_name(state->curr_encoding));
941		}
942	    } else if (state->curr_ctype == MIME_CTYPE_MULTIPART) {
943		if (state->curr_encoding != state->curr_domain)
944		    REPORT_ERROR(state, MIME_ERR_ENCODING_DOMAIN,
945				 mime_state_enc_name(state->curr_encoding));
946	    }
947	}
948
949	/*
950	 * Find out if the next body starts with its own message headers. In
951	 * aggressive mode, examine headers of partial and external-body
952	 * messages. Otherwise, treat such headers as part of the "body". Set
953	 * the proper encoding information for the multipart prolog.
954	 *
955	 * XXX We parse headers inside message/* content even when the encoding
956	 * is invalid (encoding != domain). With base64 we won't recognize
957	 * any headers, and with quoted-printable we won't recognize MIME
958	 * boundary strings, but the MIME processor will still resynchronize
959	 * when it runs into the higher-level boundary string at the end of
960	 * the message/* content. Although we will treat some headers as body
961	 * text, we will still do a better job than if we were treating the
962	 * entire message/* content as body text.
963	 *
964	 * XXX This changes state to MIME_STATE_NESTED and then outputs a body
965	 * line, so that the body offset is not properly reset.
966	 *
967	 * Don't assume that the input is null terminated.
968	 */
969	if (input_is_text) {
970	    if (len == 0) {
971		state->body_offset = 0;		/* XXX */
972		if (state->curr_ctype == MIME_CTYPE_MESSAGE) {
973		    if (state->curr_stype == MIME_STYPE_RFC822)
974			SET_MIME_STATE(state, MIME_STATE_NESTED,
975				       MIME_CTYPE_TEXT, MIME_STYPE_PLAIN,
976				       MIME_ENC_7BIT, MIME_ENC_7BIT);
977		    else if (state->curr_stype == MIME_STYPE_GLOBAL
978			 && ((state->static_flags & MIME_OPT_DOWNGRADE) == 0
979			     || state->curr_domain == MIME_ENC_7BIT))
980			/* XXX EAI: inspect encoded message/global. */
981			SET_MIME_STATE(state, MIME_STATE_NESTED,
982				       MIME_CTYPE_TEXT, MIME_STYPE_PLAIN,
983				       MIME_ENC_7BIT, MIME_ENC_7BIT);
984		    else
985			SET_CURR_STATE(state, MIME_STATE_BODY);
986		} else if (state->curr_ctype == MIME_CTYPE_MULTIPART) {
987		    SET_MIME_STATE(state, MIME_STATE_BODY,
988				   MIME_CTYPE_OTHER, MIME_STYPE_OTHER,
989				   MIME_ENC_7BIT, MIME_ENC_7BIT);
990		} else {
991		    SET_CURR_STATE(state, MIME_STATE_BODY);
992		}
993	    }
994
995	    /*
996	     * Invalid input. Force output of one blank line and jump to the
997	     * body state, leaving all other state alone.
998	     *
999	     * We don't break legitimate mail by inserting a blank line
1000	     * separator between primary headers and a non-empty body. Many
1001	     * MTA's don't even record the presence or absence of this
1002	     * separator, nor does the Milter protocol pass it on to Milter
1003	     * applications.
1004	     *
1005	     * XXX We don't insert a blank line separator into attachments, to
1006	     * avoid breaking digital signatures. Postfix shall not do a
1007	     * worse mail delivery job than MTAs that can't even parse MIME.
1008	     * We switch to body state anyway, to avoid treating body text as
1009	     * header text, and mis-interpreting or truncating it. The code
1010	     * below for initial From_ lines is for educational purposes.
1011	     *
1012	     * Sites concerned about MIME evasion can use a MIME normalizer.
1013	     * Postfix has a different mission.
1014	     */
1015	    else {
1016		if (msg_verbose)
1017		    msg_info("garbage in %s header",
1018		    state->curr_state == MIME_STATE_MULTIPART ? "multipart" :
1019		       state->curr_state == MIME_STATE_PRIMARY ? "primary" :
1020			 state->curr_state == MIME_STATE_NESTED ? "nested" :
1021			     "other");
1022		switch (state->curr_state) {
1023		case MIME_STATE_PRIMARY:
1024		    BODY_OUT(state, REC_TYPE_NORM, "", 0);
1025		    SET_CURR_STATE(state, MIME_STATE_BODY);
1026		    break;
1027#if 0
1028		case MIME_STATE_NESTED:
1029		    if (state->body_offset <= 1
1030			&& rec_type == REC_TYPE_NORM
1031			&& len > 7
1032			&& (strncmp(text + (*text == '>'), "From ", 5) == 0
1033			    || strncmp(text, "=46rom ", 7) == 0))
1034			break;
1035		    /* FALLTHROUGH */
1036#endif
1037		default:
1038		    SET_CURR_STATE(state, MIME_STATE_BODY);
1039		    break;
1040		}
1041	    }
1042	}
1043
1044	/*
1045	 * This input is not text. Go to body state, unconditionally.
1046	 */
1047	else {
1048	    SET_CURR_STATE(state, MIME_STATE_BODY);
1049	}
1050	/* FALLTHROUGH */
1051
1052	/*
1053	 * Body text. Look for message boundaries, and recover from missing
1054	 * boundary strings. Missing boundaries can happen in aggressive mode
1055	 * with text/rfc822-headers or with message/partial. Ignore non-space
1056	 * cruft after --boundary or --boundary--, because some MUAs do, and
1057	 * because only perverse software would take advantage of this to
1058	 * escape detection. We have to ignore trailing cruft anyway, because
1059	 * our saved copy of the boundary string may have been truncated for
1060	 * safety reasons.
1061	 *
1062	 * Optionally look for 8-bit data in content that was announced as, or
1063	 * that defaults to, 7-bit. Unfortunately, we cannot turn this on by
1064	 * default. Majordomo sends requests for approval that do not
1065	 * propagate the MIME information from the enclosed message to the
1066	 * message headers of the approval request.
1067	 *
1068	 * Set the proper state information after processing a message boundary
1069	 * string.
1070	 *
1071	 * Don't look for boundary strings at the start of a continued record.
1072	 *
1073	 * Don't assume that the input is null terminated.
1074	 */
1075    case MIME_STATE_BODY:
1076	if (input_is_text) {
1077	    if ((state->static_flags & MIME_OPT_REPORT_8BIT_IN_7BIT_BODY) != 0
1078		&& state->curr_encoding == MIME_ENC_7BIT
1079		&& (state->err_flags & MIME_ERR_8BIT_IN_7BIT_BODY) == 0) {
1080		for (cp = CU_CHAR_PTR(text); cp < CU_CHAR_PTR(text + len); cp++)
1081		    if (*cp & 0200) {
1082			REPORT_ERROR_LEN(state, MIME_ERR_8BIT_IN_7BIT_BODY,
1083					 text, len);
1084			break;
1085		    }
1086	    }
1087	    if (state->stack && state->prev_rec_type != REC_TYPE_CONT
1088		&& len > 2 && text[0] == '-' && text[1] == '-') {
1089		for (sp = state->stack; sp != 0; sp = sp->next) {
1090		    if (len >= 2 + sp->bound_len &&
1091		      strncmp(text + 2, sp->boundary, sp->bound_len) == 0) {
1092			while (sp != state->stack)
1093			    mime_state_pop(state);
1094			if (len >= 4 + sp->bound_len &&
1095			  strncmp(text + 2 + sp->bound_len, "--", 2) == 0) {
1096			    mime_state_pop(state);
1097			    SET_MIME_STATE(state, MIME_STATE_BODY,
1098					 MIME_CTYPE_OTHER, MIME_STYPE_OTHER,
1099					   MIME_ENC_7BIT, MIME_ENC_7BIT);
1100			} else {
1101			    SET_MIME_STATE(state, MIME_STATE_MULTIPART,
1102					   sp->def_ctype, sp->def_stype,
1103					   MIME_ENC_7BIT, MIME_ENC_7BIT);
1104			}
1105			break;
1106		    }
1107		}
1108	    }
1109	    /* Put last for consistency with header output routine. */
1110	    if ((state->static_flags & MIME_OPT_DOWNGRADE)
1111		&& state->curr_domain != MIME_ENC_7BIT)
1112		mime_state_downgrade(state, rec_type, text, len);
1113	    else
1114		BODY_OUT(state, rec_type, text, len);
1115	}
1116
1117	/*
1118	 * The input is not a text record. Inform the application that this
1119	 * is the last opportunity to send any pending output.
1120	 */
1121	else {
1122	    if (state->body_end)
1123		state->body_end(state->app_context);
1124	}
1125	SAVE_PREV_REC_TYPE_AND_RETURN_ERR_FLAGS(state, rec_type);
1126
1127	/*
1128	 * Oops. This can't happen.
1129	 */
1130    default:
1131	msg_panic("mime_state_update: unknown state: %d", state->curr_state);
1132    }
1133}
1134
1135 /*
1136  * Mime error to (DSN, text) mapping. Order matters; more serious errors
1137  * must precede less serious errors, because the error-to-text conversion
1138  * can report only one error.
1139  */
1140static const MIME_STATE_DETAIL mime_err_detail[] = {
1141    MIME_ERR_NESTING, "5.6.0", "MIME nesting exceeds safety limit",
1142    MIME_ERR_TRUNC_HEADER, "5.6.0", "message header length exceeds safety limit",
1143    MIME_ERR_8BIT_IN_HEADER, "5.6.0", "improper use of 8-bit data in message header",
1144    MIME_ERR_8BIT_IN_7BIT_BODY, "5.6.0", "improper use of 8-bit data in message body",
1145    MIME_ERR_ENCODING_DOMAIN, "5.6.0", "invalid message/* or multipart/* encoding domain",
1146    0,
1147};
1148
1149/* mime_state_error - error code to string */
1150
1151const char *mime_state_error(int error_code)
1152{
1153    const MIME_STATE_DETAIL *mp;
1154
1155    if (error_code == 0)
1156	msg_panic("mime_state_error: there is no error");
1157    for (mp = mime_err_detail; mp->code; mp++)
1158	if (mp->code & error_code)
1159	    return (mp->text);
1160    msg_panic("mime_state_error: unknown error code %d", error_code);
1161}
1162
1163/* mime_state_detail - error code to table entry with assorted data */
1164
1165const MIME_STATE_DETAIL *mime_state_detail(int error_code)
1166{
1167    const MIME_STATE_DETAIL *mp;
1168
1169    if (error_code == 0)
1170	msg_panic("mime_state_detail: there is no error");
1171    for (mp = mime_err_detail; mp->code; mp++)
1172	if (mp->code & error_code)
1173	    return (mp);
1174    msg_panic("mime_state_detail: unknown error code %d", error_code);
1175}
1176
1177#ifdef TEST
1178
1179#include <stdlib.h>
1180#include <stringops.h>
1181#include <vstream.h>
1182#include <msg_vstream.h>
1183#include <rec_streamlf.h>
1184
1185 /*
1186  * Stress test the REC_TYPE_CONT/NORM handling, but don't break header
1187  * labels.
1188  */
1189/*#define REC_LEN	40*/
1190
1191#define REC_LEN	1024
1192
1193static void head_out(void *context, int class, const HEADER_OPTS *unused_info,
1194		             VSTRING *buf, off_t offset)
1195{
1196    VSTREAM *stream = (VSTREAM *) context;
1197
1198    vstream_fprintf(stream, "%s %ld\t|%s\n",
1199		    class == MIME_HDR_PRIMARY ? "MAIN" :
1200		    class == MIME_HDR_MULTIPART ? "MULT" :
1201		    class == MIME_HDR_NESTED ? "NEST" :
1202		    "ERROR", (long) offset, STR(buf));
1203}
1204
1205static void head_end(void *context)
1206{
1207    VSTREAM *stream = (VSTREAM *) context;
1208
1209    vstream_fprintf(stream, "HEADER END\n");
1210}
1211
1212static void body_out(void *context, int rec_type, const char *buf, ssize_t len,
1213		             off_t offset)
1214{
1215    VSTREAM *stream = (VSTREAM *) context;
1216
1217    vstream_fprintf(stream, "BODY %c %ld\t|", rec_type, (long) offset);
1218    vstream_fwrite(stream, buf, len);
1219    if (rec_type == REC_TYPE_NORM)
1220	VSTREAM_PUTC('\n', stream);
1221}
1222
1223static void body_end(void *context)
1224{
1225    VSTREAM *stream = (VSTREAM *) context;
1226
1227    vstream_fprintf(stream, "BODY END\n");
1228}
1229
1230static void err_print(void *unused_context, int err_flag,
1231		              const char *text, ssize_t len)
1232{
1233    msg_warn("%s: %.*s", mime_state_error(err_flag),
1234	     len < 100 ? (int) len : 100, text);
1235}
1236
1237int     var_header_limit = 2000;
1238int     var_mime_maxdepth = 20;
1239int     var_mime_bound_len = 2000;
1240char   *var_drop_hdrs = DEF_DROP_HDRS;
1241
1242int     main(int unused_argc, char **argv)
1243{
1244    int     rec_type;
1245    int     last = 0;
1246    VSTRING *buf;
1247    MIME_STATE *state;
1248    int     err;
1249
1250    /*
1251     * Initialize.
1252     */
1253#define MIME_OPTIONS \
1254	    (MIME_OPT_REPORT_8BIT_IN_7BIT_BODY \
1255	    | MIME_OPT_REPORT_8BIT_IN_HEADER \
1256	    | MIME_OPT_REPORT_ENCODING_DOMAIN \
1257	    | MIME_OPT_REPORT_TRUNC_HEADER \
1258	    | MIME_OPT_REPORT_NESTING \
1259	    | MIME_OPT_DOWNGRADE)
1260
1261    msg_vstream_init(basename(argv[0]), VSTREAM_OUT);
1262    msg_verbose = 1;
1263    buf = vstring_alloc(10);
1264    state = mime_state_alloc(MIME_OPTIONS,
1265			     head_out, head_end,
1266			     body_out, body_end,
1267			     err_print,
1268			     (void *) VSTREAM_OUT);
1269
1270    /*
1271     * Main loop.
1272     */
1273    do {
1274	rec_type = rec_streamlf_get(VSTREAM_IN, buf, REC_LEN);
1275	VSTRING_TERMINATE(buf);
1276	err = mime_state_update(state, last = rec_type, STR(buf), LEN(buf));
1277	vstream_fflush(VSTREAM_OUT);
1278    } while (rec_type > 0);
1279
1280    /*
1281     * Error reporting.
1282     */
1283    if (err & MIME_ERR_TRUNC_HEADER)
1284	msg_warn("message header length exceeds safety limit");
1285    if (err & MIME_ERR_NESTING)
1286	msg_warn("MIME nesting exceeds safety limit");
1287    if (err & MIME_ERR_8BIT_IN_HEADER)
1288	msg_warn("improper use of 8-bit data in message header");
1289    if (err & MIME_ERR_8BIT_IN_7BIT_BODY)
1290	msg_warn("improper use of 8-bit data in message body");
1291    if (err & MIME_ERR_ENCODING_DOMAIN)
1292	msg_warn("improper message/* or multipart/* encoding domain");
1293
1294    /*
1295     * Cleanup.
1296     */
1297    mime_state_free(state);
1298    vstring_free(buf);
1299    exit(0);
1300}
1301
1302#endif
1303