1/*++
2/* NAME
3/*	cleanup_message 3
4/* SUMMARY
5/*	process message segment
6/* SYNOPSIS
7/*	#include "cleanup.h"
8/*
9/*	void	cleanup_message(state, type, buf, len)
10/*	CLEANUP_STATE *state;
11/*	int	type;
12/*	const char *buf;
13/*	ssize_t	len;
14/* DESCRIPTION
15/*	This module processes message content records and copies the
16/*	result to the queue file.  It validates the input, rewrites
17/*	sender/recipient addresses to canonical form, inserts missing
18/*	message headers, and extracts information from message headers
19/*	to be used later when generating the extracted output segment.
20/*	This routine absorbs but does not emit the content to extracted
21/*	boundary record.
22/*
23/*	Arguments:
24/* .IP state
25/*	Queue file and message processing state. This state is updated
26/*	as records are processed and as errors happen.
27/* .IP type
28/*	Record type.
29/* .IP buf
30/*	Record content.
31/* .IP len
32/*	Record content length.
33/* LICENSE
34/* .ad
35/* .fi
36/*	The Secure Mailer license must be distributed with this software.
37/* AUTHOR(S)
38/*	Wietse Venema
39/*	IBM T.J. Watson Research
40/*	P.O. Box 704
41/*	Yorktown Heights, NY 10598, USA
42/*--*/
43
44/* System library. */
45
46#include <sys_defs.h>
47#include <ctype.h>
48#include <string.h>
49#include <time.h>
50#include <unistd.h>
51
52#ifdef STRCASECMP_IN_STRINGS_H
53#include <strings.h>
54#endif
55
56/* Utility library. */
57
58#include <msg.h>
59#include <vstring.h>
60#include <vstream.h>
61#include <argv.h>
62#include <split_at.h>
63#include <mymalloc.h>
64#include <stringops.h>
65#include <nvtable.h>
66
67/* Global library. */
68
69#include <record.h>
70#include <rec_type.h>
71#include <cleanup_user.h>
72#include <tok822.h>
73#include <header_opts.h>
74#include <quote_822_local.h>
75#include <mail_params.h>
76#include <mail_date.h>
77#include <mail_addr.h>
78#include <is_header.h>
79#include <ext_prop.h>
80#include <mail_proto.h>
81#include <mime_state.h>
82#include <lex_822.h>
83#include <dsn_util.h>
84#include <conv_time.h>
85
86/* Application-specific. */
87
88#include "cleanup.h"
89
90/* cleanup_fold_header - wrap address list header */
91
92static void cleanup_fold_header(CLEANUP_STATE *state, VSTRING *header_buf)
93{
94    char   *start_line = vstring_str(header_buf);
95    char   *end_line;
96    char   *next_line;
97    char   *line;
98
99    /*
100     * A rewritten address list contains one address per line. The code below
101     * replaces newlines by spaces, to fit as many addresses on a line as
102     * possible (without rearranging the order of addresses). Prepending
103     * white space to the beginning of lines is delegated to the output
104     * routine.
105     */
106    for (line = start_line; line != 0; line = next_line) {
107	end_line = line + strcspn(line, "\n");
108	if (line > start_line) {
109	    if (end_line - start_line < 70) {	/* TAB counts as one */
110		line[-1] = ' ';
111	    } else {
112		start_line = line;
113	    }
114	}
115	next_line = *end_line ? end_line + 1 : 0;
116    }
117    cleanup_out_header(state, header_buf);
118}
119
120/* cleanup_extract_internal - save unquoted copy of extracted address */
121
122static char *cleanup_extract_internal(VSTRING *buffer, TOK822 *addr)
123{
124
125    /*
126     * A little routine to stash away a copy of an address that we extracted
127     * from a message header line.
128     */
129    tok822_internalize(buffer, addr->head, TOK822_STR_DEFL);
130    return (mystrdup(vstring_str(buffer)));
131}
132
133/* cleanup_rewrite_sender - sender address rewriting */
134
135static void cleanup_rewrite_sender(CLEANUP_STATE *state,
136				           const HEADER_OPTS *hdr_opts,
137				           VSTRING *header_buf)
138{
139    TOK822 *tree;
140    TOK822 **addr_list;
141    TOK822 **tpp;
142    int     did_rewrite = 0;
143
144    if (msg_verbose)
145	msg_info("rewrite_sender: %s", hdr_opts->name);
146
147    /*
148     * Parse the header line, rewrite each address found, and regenerate the
149     * header line. Finally, pipe the result through the header line folding
150     * routine.
151     */
152    tree = tok822_parse_limit(vstring_str(header_buf)
153			      + strlen(hdr_opts->name) + 1,
154			      var_token_limit);
155    addr_list = tok822_grep(tree, TOK822_ADDR);
156    for (tpp = addr_list; *tpp; tpp++) {
157	did_rewrite |= cleanup_rewrite_tree(state->hdr_rewrite_context, *tpp);
158	if (state->flags & CLEANUP_FLAG_MAP_OK) {
159	    if (cleanup_send_canon_maps
160		&& (cleanup_send_canon_flags & CLEANUP_CANON_FLAG_HDR_FROM))
161		did_rewrite |=
162		    cleanup_map11_tree(state, *tpp, cleanup_send_canon_maps,
163				cleanup_ext_prop_mask & EXT_PROP_CANONICAL);
164	    if (cleanup_comm_canon_maps
165		&& (cleanup_comm_canon_flags & CLEANUP_CANON_FLAG_HDR_FROM))
166		did_rewrite |=
167		    cleanup_map11_tree(state, *tpp, cleanup_comm_canon_maps,
168				cleanup_ext_prop_mask & EXT_PROP_CANONICAL);
169	    if (cleanup_masq_domains
170		&& (cleanup_masq_flags & CLEANUP_MASQ_FLAG_HDR_FROM))
171		did_rewrite |=
172		    cleanup_masquerade_tree(state, *tpp, cleanup_masq_domains);
173	}
174    }
175    if (did_rewrite) {
176	vstring_truncate(header_buf, strlen(hdr_opts->name));
177	vstring_strcat(header_buf, ": ");
178	tok822_externalize(header_buf, tree, TOK822_STR_HEAD);
179    }
180    myfree((char *) addr_list);
181    tok822_free_tree(tree);
182    if ((hdr_opts->flags & HDR_OPT_DROP) == 0) {
183	if (did_rewrite)
184	    cleanup_fold_header(state, header_buf);
185	else
186	    cleanup_out_header(state, header_buf);
187    }
188}
189
190/* cleanup_rewrite_recip - recipient address rewriting */
191
192static void cleanup_rewrite_recip(CLEANUP_STATE *state,
193				          const HEADER_OPTS *hdr_opts,
194				          VSTRING *header_buf)
195{
196    TOK822 *tree;
197    TOK822 **addr_list;
198    TOK822 **tpp;
199    int     did_rewrite = 0;
200
201    if (msg_verbose)
202	msg_info("rewrite_recip: %s", hdr_opts->name);
203
204    /*
205     * Parse the header line, rewrite each address found, and regenerate the
206     * header line. Finally, pipe the result through the header line folding
207     * routine.
208     */
209    tree = tok822_parse_limit(vstring_str(header_buf)
210			      + strlen(hdr_opts->name) + 1,
211			      var_token_limit);
212    addr_list = tok822_grep(tree, TOK822_ADDR);
213    for (tpp = addr_list; *tpp; tpp++) {
214	did_rewrite |= cleanup_rewrite_tree(state->hdr_rewrite_context, *tpp);
215	if (state->flags & CLEANUP_FLAG_MAP_OK) {
216	    if (cleanup_rcpt_canon_maps
217		&& (cleanup_rcpt_canon_flags & CLEANUP_CANON_FLAG_HDR_RCPT))
218		did_rewrite |=
219		    cleanup_map11_tree(state, *tpp, cleanup_rcpt_canon_maps,
220				cleanup_ext_prop_mask & EXT_PROP_CANONICAL);
221	    if (cleanup_comm_canon_maps
222		&& (cleanup_comm_canon_flags & CLEANUP_CANON_FLAG_HDR_RCPT))
223		did_rewrite |=
224		    cleanup_map11_tree(state, *tpp, cleanup_comm_canon_maps,
225				cleanup_ext_prop_mask & EXT_PROP_CANONICAL);
226	    if (cleanup_masq_domains
227		&& (cleanup_masq_flags & CLEANUP_MASQ_FLAG_HDR_RCPT))
228		did_rewrite |=
229		    cleanup_masquerade_tree(state, *tpp, cleanup_masq_domains);
230	}
231    }
232    if (did_rewrite) {
233	vstring_truncate(header_buf, strlen(hdr_opts->name));
234	vstring_strcat(header_buf, ": ");
235	tok822_externalize(header_buf, tree, TOK822_STR_HEAD);
236    }
237    myfree((char *) addr_list);
238    tok822_free_tree(tree);
239    if ((hdr_opts->flags & HDR_OPT_DROP) == 0) {
240	if (did_rewrite)
241	    cleanup_fold_header(state, header_buf);
242	else
243	    cleanup_out_header(state, header_buf);
244    }
245}
246
247/* cleanup_act_log - log action with context */
248
249static void cleanup_act_log(CLEANUP_STATE *state,
250			            const char *action, const char *class,
251			            const char *content, const char *text)
252{
253    const char *attr;
254
255    if ((attr = nvtable_find(state->attr, MAIL_ATTR_LOG_ORIGIN)) == 0)
256	attr = "unknown";
257    vstring_sprintf(state->temp1, "%s: %s: %s %.200s from %s;",
258		    state->queue_id, action, class, content, attr);
259    if (state->sender)
260	vstring_sprintf_append(state->temp1, " from=<%s>", state->sender);
261    if (state->recip)
262	vstring_sprintf_append(state->temp1, " to=<%s>", state->recip);
263    if ((attr = nvtable_find(state->attr, MAIL_ATTR_LOG_PROTO_NAME)) != 0)
264	vstring_sprintf_append(state->temp1, " proto=%s", attr);
265    if ((attr = nvtable_find(state->attr, MAIL_ATTR_LOG_HELO_NAME)) != 0)
266	vstring_sprintf_append(state->temp1, " helo=<%s>", attr);
267    if (text && *text)
268	vstring_sprintf_append(state->temp1, ": %s", text);
269    msg_info("%s", vstring_str(state->temp1));
270}
271
272#define CLEANUP_ACT_CTXT_HEADER	"header"
273#define CLEANUP_ACT_CTXT_BODY	"body"
274#define CLEANUP_ACT_CTXT_ANY	"content"
275
276/* cleanup_act - act upon a header/body match */
277
278static const char *cleanup_act(CLEANUP_STATE *state, char *context,
279			               const char *buf, const char *value,
280			               const char *map_class)
281{
282    const char *optional_text = value + strcspn(value, " \t");
283    int     command_len = optional_text - value;
284
285#ifdef DELAY_ACTION
286    int     defer_delay;
287
288#endif
289
290    while (*optional_text && ISSPACE(*optional_text))
291	optional_text++;
292
293#define STREQUAL(x,y,l) (strncasecmp((x), (y), (l)) == 0 && (y)[l] == 0)
294#define CLEANUP_ACT_DROP 0
295
296    /*
297     * CLEANUP_STAT_CONT and CLEANUP_STAT_DEFER both update the reason
298     * attribute, but CLEANUP_STAT_DEFER takes precedence. It terminates
299     * queue record processing, and prevents bounces from being sent.
300     */
301    if (STREQUAL(value, "REJECT", command_len)) {
302	const CLEANUP_STAT_DETAIL *detail;
303
304	if (state->reason)
305	    myfree(state->reason);
306	if (*optional_text) {
307	    state->reason = dsn_prepend("5.7.1", optional_text);
308	    if (*state->reason != '4' && *state->reason != '5') {
309		msg_warn("bad DSN action in %s -- need 4.x.x or 5.x.x",
310			 optional_text);
311		*state->reason = '4';
312	    }
313	} else {
314	    detail = cleanup_stat_detail(CLEANUP_STAT_CONT);
315	    state->reason = dsn_prepend(detail->dsn, detail->text);
316	}
317	if (*state->reason == '4')
318	    state->errs |= CLEANUP_STAT_DEFER;
319	else
320	    state->errs |= CLEANUP_STAT_CONT;
321	state->flags &= ~CLEANUP_FLAG_FILTER_ALL;
322	cleanup_act_log(state, "reject", context, buf, state->reason);
323	return (buf);
324    }
325    if (STREQUAL(value, "WARN", command_len)) {
326	cleanup_act_log(state, "warning", context, buf, optional_text);
327	return (buf);
328    }
329    if (STREQUAL(value, "INFO", command_len)) {
330	cleanup_act_log(state, "info", context, buf, optional_text);
331	return (buf);
332    }
333    if (STREQUAL(value, "FILTER", command_len)) {
334	if (*optional_text == 0) {
335	    msg_warn("missing FILTER command argument in %s map", map_class);
336	} else if (strchr(optional_text, ':') == 0) {
337	    msg_warn("bad FILTER command %s in %s -- "
338		     "need transport:destination",
339		     optional_text, map_class);
340	} else {
341	    if (state->filter)
342		myfree(state->filter);
343	    state->filter = mystrdup(optional_text);
344	    cleanup_act_log(state, "filter", context, buf, optional_text);
345	}
346	return (buf);
347    }
348    if (STREQUAL(value, "DISCARD", command_len)) {
349	cleanup_act_log(state, "discard", context, buf, optional_text);
350	state->flags |= CLEANUP_FLAG_DISCARD;
351	state->flags &= ~CLEANUP_FLAG_FILTER_ALL;
352	return (buf);
353    }
354    if (STREQUAL(value, "HOLD", command_len)) {
355	if ((state->flags & (CLEANUP_FLAG_HOLD | CLEANUP_FLAG_DISCARD)) == 0) {
356	    cleanup_act_log(state, "hold", context, buf, optional_text);
357	    state->flags |= CLEANUP_FLAG_HOLD;
358	}
359	return (buf);
360    }
361
362    /*
363     * The DELAY feature is disabled because it has too many problems. 1) It
364     * does not work on some remote file systems; 2) mail will be delivered
365     * anyway with "sendmail -q" etc.; 3) while the mail is queued it bogs
366     * down the deferred queue scan with huge amounts of useless disk I/O
367     * operations.
368     */
369#ifdef DELAY_ACTION
370    if (STREQUAL(value, "DELAY", command_len)) {
371	if ((state->flags & (CLEANUP_FLAG_HOLD | CLEANUP_FLAG_DISCARD)) == 0) {
372	    if (*optional_text == 0) {
373		msg_warn("missing DELAY argument in %s map", map_class);
374	    } else if (conv_time(optional_text, &defer_delay, 's') == 0) {
375		msg_warn("ignoring bad DELAY argument %s in %s map",
376			 optional_text, map_class);
377	    } else {
378		cleanup_act_log(state, "delay", context, buf, optional_text);
379		state->defer_delay = defer_delay;
380	    }
381	}
382	return (buf);
383    }
384#endif
385    if (STREQUAL(value, "PREPEND", command_len)) {
386	if (*optional_text == 0) {
387	    msg_warn("PREPEND action without text in %s map", map_class);
388	} else if (strcmp(context, CLEANUP_ACT_CTXT_HEADER) == 0
389		   && !is_header(optional_text)) {
390	    msg_warn("bad PREPEND header text \"%s\" in %s map -- "
391		     "need \"headername: headervalue\"",
392		     optional_text, map_class);
393	} else {
394	    cleanup_act_log(state, "prepend", context, buf, optional_text);
395	    cleanup_out_string(state, REC_TYPE_NORM, optional_text);
396	}
397	return (buf);
398    }
399    if (STREQUAL(value, "REPLACE", command_len)) {
400	if (*optional_text == 0) {
401	    msg_warn("REPLACE action without text in %s map", map_class);
402	    return (buf);
403	} else if (strcmp(context, CLEANUP_ACT_CTXT_HEADER) == 0
404		   && !is_header(optional_text)) {
405	    msg_warn("bad REPLACE header text \"%s\" in %s map -- "
406		     "need \"headername: headervalue\"",
407		     optional_text, map_class);
408	    return (buf);
409	} else {
410	    cleanup_act_log(state, "replace", context, buf, optional_text);
411	    return (mystrdup(optional_text));
412	}
413    }
414    if (STREQUAL(value, "REDIRECT", command_len)) {
415	if (strchr(optional_text, '@') == 0) {
416	    msg_warn("bad REDIRECT target \"%s\" in %s map -- "
417		     "need user@domain",
418		     optional_text, map_class);
419	} else {
420	    if (state->redirect)
421		myfree(state->redirect);
422	    state->redirect = mystrdup(optional_text);
423	    cleanup_act_log(state, "redirect", context, buf, optional_text);
424	    state->flags &= ~CLEANUP_FLAG_FILTER_ALL;
425	}
426	return (buf);
427    }
428    /* Allow and ignore optional text after the action. */
429
430    if (STREQUAL(value, "IGNORE", command_len))
431	return (CLEANUP_ACT_DROP);
432
433    if (STREQUAL(value, "DUNNO", command_len))	/* preferred */
434	return (buf);
435
436    if (STREQUAL(value, "OK", command_len))	/* compat */
437	return (buf);
438
439    msg_warn("unknown command in %s map: %s", map_class, value);
440    return (buf);
441}
442
443/* cleanup_header_callback - process one complete header line */
444
445static void cleanup_header_callback(void *context, int header_class,
446				            const HEADER_OPTS *hdr_opts,
447				            VSTRING *header_buf,
448				            off_t unused_offset)
449{
450    CLEANUP_STATE *state = (CLEANUP_STATE *) context;
451    const char *myname = "cleanup_header_callback";
452    char   *hdrval;
453    struct code_map {
454	const char *name;
455	const char *encoding;
456    };
457    static struct code_map code_map[] = {	/* RFC 2045 */
458	"7bit", MAIL_ATTR_ENC_7BIT,
459	"8bit", MAIL_ATTR_ENC_8BIT,
460	"binary", MAIL_ATTR_ENC_8BIT,	/* XXX Violation */
461	"quoted-printable", MAIL_ATTR_ENC_7BIT,
462	"base64", MAIL_ATTR_ENC_7BIT,
463	0,
464    };
465    struct code_map *cmp;
466    MAPS   *checks;
467    const char *map_class;
468
469    if (msg_verbose)
470	msg_info("%s: '%.200s'", myname, vstring_str(header_buf));
471
472    /*
473     * Crude header filtering. This stops malware that isn't sophisticated
474     * enough to use fancy header encodings.
475     */
476#define CHECK(class, maps, var_name) \
477	(header_class == class && (map_class = var_name, checks = maps) != 0)
478
479    if (hdr_opts && (hdr_opts->flags & HDR_OPT_MIME))
480	header_class = MIME_HDR_MULTIPART;
481
482    if ((state->flags & CLEANUP_FLAG_FILTER)
483	&& (CHECK(MIME_HDR_PRIMARY, cleanup_header_checks, VAR_HEADER_CHECKS)
484    || CHECK(MIME_HDR_MULTIPART, cleanup_mimehdr_checks, VAR_MIMEHDR_CHECKS)
485    || CHECK(MIME_HDR_NESTED, cleanup_nesthdr_checks, VAR_NESTHDR_CHECKS))) {
486	char   *header = vstring_str(header_buf);
487	const char *value;
488
489	if ((value = maps_find(checks, header, 0)) != 0) {
490	    const char *result;
491
492	    if ((result = cleanup_act(state, CLEANUP_ACT_CTXT_HEADER,
493				      header, value, map_class))
494		== CLEANUP_ACT_DROP) {
495		return;
496	    } else if (result != header) {
497		vstring_strcpy(header_buf, result);
498		hdr_opts = header_opts_find(result);
499		myfree((char *) result);
500	    }
501	} else if (checks->error) {
502	    msg_warn("%s: %s map lookup problem -- "
503		     "message not accepted, try again later",
504		     state->queue_id, checks->title);
505	    state->errs |= CLEANUP_STAT_WRITE;
506	}
507    }
508
509    /*
510     * If this is an "unknown" header, just copy it to the output without
511     * even bothering to fold long lines. cleanup_out() will split long
512     * headers that do not fit a REC_TYPE_NORM record.
513     */
514    if (hdr_opts == 0) {
515	cleanup_out_header(state, header_buf);
516	return;
517    }
518
519    /*
520     * Allow 8-bit type info to override 7-bit type info. XXX Should reuse
521     * the effort that went into MIME header parsing.
522     */
523    hdrval = vstring_str(header_buf) + strlen(hdr_opts->name) + 1;
524    while (ISSPACE(*hdrval))
525	hdrval++;
526    /* trimblanks(hdrval, 0)[0] = 0; */
527    if (var_auto_8bit_enc_hdr
528	&& hdr_opts->type == HDR_CONTENT_TRANSFER_ENCODING) {
529	for (cmp = code_map; cmp->name != 0; cmp++) {
530	    if (strcasecmp(hdrval, cmp->name) == 0) {
531		if (strcasecmp(cmp->encoding, MAIL_ATTR_ENC_8BIT) == 0)
532		    nvtable_update(state->attr, MAIL_ATTR_ENCODING,
533				   cmp->encoding);
534		break;
535	    }
536	}
537    }
538
539    /*
540     * Copy attachment etc. header blocks without further inspection.
541     */
542    if (header_class != MIME_HDR_PRIMARY) {
543	cleanup_out_header(state, header_buf);
544	return;
545    }
546
547    /*
548     * Known header. Remember that we have seen at least one. Find out what
549     * we should do with this header: delete, count, rewrite. Note that we
550     * should examine headers even when they will be deleted from the output,
551     * because the addresses in those headers might be needed elsewhere.
552     *
553     * XXX 2821: Return-path breakage.
554     *
555     * RFC 821 specifies: When the receiver-SMTP makes the "final delivery" of a
556     * message it inserts at the beginning of the mail data a return path
557     * line.  The return path line preserves the information in the
558     * <reverse-path> from the MAIL command.  Here, final delivery means the
559     * message leaves the SMTP world.  Normally, this would mean it has been
560     * delivered to the destination user, but in some cases it may be further
561     * processed and transmitted by another mail system.
562     *
563     * And that is what Postfix implements. Delivery agents prepend
564     * Return-Path:. In order to avoid cluttering up the message with
565     * possibly inconsistent Return-Path: information (the sender can change
566     * as the result of mail forwarding or mailing list delivery), Postfix
567     * removes any existing Return-Path: headers.
568     *
569     * RFC 2821 Section 4.4 specifies:    A message-originating SMTP system
570     * SHOULD NOT send a message that already contains a Return-path header.
571     * SMTP servers performing a relay function MUST NOT inspect the message
572     * data, and especially not to the extent needed to determine if
573     * Return-path headers are present. SMTP servers making final delivery
574     * MAY remove Return-path headers before adding their own.
575     */
576    else {
577	state->headers_seen |= (1 << hdr_opts->type);
578	if (hdr_opts->type == HDR_MESSAGE_ID)
579	    msg_info("%s: message-id=%s", state->queue_id, hdrval);
580	if (hdr_opts->type == HDR_RESENT_MESSAGE_ID)
581	    msg_info("%s: resent-message-id=%s", state->queue_id, hdrval);
582	if (hdr_opts->type == HDR_RECEIVED)
583	    if (++state->hop_count >= var_hopcount_limit)
584		state->errs |= CLEANUP_STAT_HOPS;
585	if (CLEANUP_OUT_OK(state)) {
586	    if (hdr_opts->flags & HDR_OPT_RR)
587		state->resent = "Resent-";
588	    if ((hdr_opts->flags & HDR_OPT_SENDER)
589		&& state->hdr_rewrite_context) {
590		cleanup_rewrite_sender(state, hdr_opts, header_buf);
591	    } else if ((hdr_opts->flags & HDR_OPT_RECIP)
592		       && state->hdr_rewrite_context) {
593		cleanup_rewrite_recip(state, hdr_opts, header_buf);
594	    } else if ((hdr_opts->flags & HDR_OPT_DROP) == 0) {
595		cleanup_out_header(state, header_buf);
596	    }
597	}
598    }
599}
600
601/* cleanup_header_done_callback - insert missing message headers */
602
603static void cleanup_header_done_callback(void *context)
604{
605    const char *myname = "cleanup_header_done_callback";
606    CLEANUP_STATE *state = (CLEANUP_STATE *) context;
607    char    time_stamp[1024];		/* XXX locale dependent? */
608    struct tm *tp;
609    TOK822 *token;
610    time_t  tv;
611
612    /*
613     * XXX Workaround: when we reach the end of headers, mime_state_update()
614     * may execute up to three call-backs before returning to the caller:
615     * head_out(), head_end(), and body_out() or body_end(). As long as
616     * call-backs don't return a result, each call-back has to check for
617     * itself if the previous call-back experienced a problem.
618     */
619    if (CLEANUP_OUT_OK(state) == 0)
620	return;
621
622    /*
623     * Add a missing (Resent-)Message-Id: header. The message ID gives the
624     * time in GMT units, plus the local queue ID.
625     *
626     * XXX Message-Id is not a required message header (RFC 822 and RFC 2822).
627     *
628     * XXX It is the queue ID non-inode bits that prevent messages from getting
629     * the same Message-Id within the same second.
630     *
631     * XXX An arbitrary amount of time may pass between the start of the mail
632     * transaction and the creation of a queue file. Since we guarantee queue
633     * ID uniqueness only within a second, we must ensure that the time in
634     * the message ID matches the queue ID creation time, as long as we use
635     * the queue ID in the message ID.
636     *
637     * XXX We log a dummy name=value record so that we (hopefully) don't break
638     * compatibility with existing logfile analyzers, and so that we don't
639     * complicate future code that wants to log more name=value attributes.
640     */
641    if ((state->hdr_rewrite_context || var_always_add_hdrs)
642	&& (state->headers_seen & (1 << (state->resent[0] ?
643			   HDR_RESENT_MESSAGE_ID : HDR_MESSAGE_ID))) == 0) {
644	if (var_long_queue_ids) {
645	    vstring_sprintf(state->temp1, "%s@%s",
646			    state->queue_id, var_myhostname);
647	} else {
648	    tv = state->handle->ctime.tv_sec;
649	    tp = gmtime(&tv);
650	    strftime(time_stamp, sizeof(time_stamp), "%Y%m%d%H%M%S", tp);
651	    vstring_sprintf(state->temp1, "%s.%s@%s",
652			    time_stamp, state->queue_id, var_myhostname);
653	}
654	cleanup_out_format(state, REC_TYPE_NORM, "%sMessage-Id: <%s>",
655			   state->resent, vstring_str(state->temp1));
656	msg_info("%s: %smessage-id=<%s>",
657		 state->queue_id, *state->resent ? "resent-" : "",
658		 vstring_str(state->temp1));
659	state->headers_seen |= (1 << (state->resent[0] ?
660				   HDR_RESENT_MESSAGE_ID : HDR_MESSAGE_ID));
661    }
662    if ((state->headers_seen & (1 << HDR_MESSAGE_ID)) == 0)
663	msg_info("%s: message-id=<>", state->queue_id);
664
665    /*
666     * Add a missing (Resent-)Date: header. The date is in local time units,
667     * with the GMT offset at the end.
668     */
669    if ((state->hdr_rewrite_context || var_always_add_hdrs)
670	&& (state->headers_seen & (1 << (state->resent[0] ?
671				       HDR_RESENT_DATE : HDR_DATE))) == 0) {
672	cleanup_out_format(state, REC_TYPE_NORM, "%sDate: %s",
673		      state->resent, mail_date(state->arrival_time.tv_sec));
674    }
675
676    /*
677     * Add a missing (Resent-)From: header.
678     */
679    if ((state->hdr_rewrite_context || var_always_add_hdrs)
680	&& (state->headers_seen & (1 << (state->resent[0] ?
681				       HDR_RESENT_FROM : HDR_FROM))) == 0) {
682	quote_822_local(state->temp1, *state->sender ?
683			state->sender : MAIL_ADDR_MAIL_DAEMON);
684	vstring_sprintf(state->temp2, "%sFrom: %s",
685			state->resent, vstring_str(state->temp1));
686	if (*state->sender && state->fullname && *state->fullname) {
687	    vstring_sprintf(state->temp1, "(%s)", state->fullname);
688	    token = tok822_parse(vstring_str(state->temp1));
689	    vstring_strcat(state->temp2, " ");
690	    tok822_externalize(state->temp2, token, TOK822_STR_NONE);
691	    tok822_free_tree(token);
692	}
693	CLEANUP_OUT_BUF(state, REC_TYPE_NORM, state->temp2);
694    }
695
696    /*
697     * XXX 2821: Appendix B: The return address in the MAIL command SHOULD,
698     * if possible, be derived from the system's identity for the submitting
699     * (local) user, and the "From:" header field otherwise. If there is a
700     * system identity available, it SHOULD also be copied to the Sender
701     * header field if it is different from the address in the From header
702     * field.  (Any Sender field that was already there SHOULD be removed.)
703     * Similar wording appears in RFC 2822 section 3.6.2.
704     *
705     * Postfix presently does not insert a Sender: header if envelope and From:
706     * address differ. Older Postfix versions assumed that the envelope
707     * sender address specifies the system identity and inserted Sender:
708     * whenever envelope and From: differed. This was wrong with relayed
709     * mail, and was often not even desirable with original submissions.
710     *
711     * XXX 2822 Section 3.6.2, as well as RFC 822 Section 4.1: FROM headers can
712     * contain multiple addresses. If this is the case, then a Sender: header
713     * must be provided with a single address.
714     *
715     * Postfix does not count the number of addresses in a From: header
716     * (although doing so is trivial, once the address is parsed).
717     */
718
719    /*
720     * Add a missing destination header.
721     */
722#define VISIBLE_RCPT	((1 << HDR_TO) | (1 << HDR_RESENT_TO) \
723			| (1 << HDR_CC) | (1 << HDR_RESENT_CC))
724
725    if ((state->hdr_rewrite_context || var_always_add_hdrs)
726	&& (state->headers_seen & VISIBLE_RCPT) == 0 && *var_rcpt_witheld) {
727	if (!is_header(var_rcpt_witheld)) {
728	    msg_warn("bad %s header text \"%s\" -- "
729		     "need \"headername: headervalue\"",
730		     VAR_RCPT_WITHELD, var_rcpt_witheld);
731	} else {
732	    cleanup_out_format(state, REC_TYPE_NORM, "%s", var_rcpt_witheld);
733	}
734    }
735
736    /*
737     * Place a dummy PTR record right after the last header so that we can
738     * append headers without having to worry about clobbering the
739     * end-of-content marker.
740     */
741    if (state->milters || cleanup_milters) {
742	if ((state->append_hdr_pt_offset = vstream_ftell(state->dst)) < 0)
743	    msg_fatal("%s: vstream_ftell %s: %m", myname, cleanup_path);
744	cleanup_out_format(state, REC_TYPE_PTR, REC_TYPE_PTR_FORMAT, 0L);
745	if ((state->append_hdr_pt_target = vstream_ftell(state->dst)) < 0)
746	    msg_fatal("%s: vstream_ftell %s: %m", myname, cleanup_path);
747	state->body_offset = state->append_hdr_pt_target;
748    }
749}
750
751/* cleanup_body_callback - output one body record */
752
753static void cleanup_body_callback(void *context, int type,
754				          const char *buf, ssize_t len,
755				          off_t offset)
756{
757    CLEANUP_STATE *state = (CLEANUP_STATE *) context;
758
759    /*
760     * XXX Workaround: when we reach the end of headers, mime_state_update()
761     * may execute up to three call-backs before returning to the caller:
762     * head_out(), head_end(), and body_out() or body_end(). As long as
763     * call-backs don't return a result, each call-back has to check for
764     * itself if the previous call-back experienced a problem.
765     */
766    if (CLEANUP_OUT_OK(state) == 0)
767	return;
768
769    /*
770     * Crude message body content filter for emergencies. This code has
771     * several problems: it sees one line at a time; it looks at long lines
772     * only in chunks of line_length_limit (2048) characters; it is easily
773     * bypassed with encodings and other tricks.
774     */
775    if ((state->flags & CLEANUP_FLAG_FILTER)
776	&& cleanup_body_checks
777	&& (var_body_check_len == 0 || offset < var_body_check_len)) {
778	const char *value;
779
780	if ((value = maps_find(cleanup_body_checks, buf, 0)) != 0) {
781	    const char *result;
782
783	    if ((result = cleanup_act(state, CLEANUP_ACT_CTXT_BODY,
784				      buf, value, VAR_BODY_CHECKS))
785		== CLEANUP_ACT_DROP) {
786		return;
787	    } else if (result != buf) {
788		cleanup_out(state, type, result, strlen(result));
789		myfree((char *) result);
790		return;
791	    }
792	} else if (cleanup_body_checks->error) {
793	    msg_warn("%s: %s map lookup problem -- "
794		     "message not accepted, try again later",
795		     state->queue_id, cleanup_body_checks->title);
796	    state->errs |= CLEANUP_STAT_WRITE;
797	}
798    }
799    cleanup_out(state, type, buf, len);
800}
801
802/* cleanup_message_headerbody - process message content, header and body */
803
804static void cleanup_message_headerbody(CLEANUP_STATE *state, int type,
805				               const char *buf, ssize_t len)
806{
807    const char *myname = "cleanup_message_headerbody";
808    const MIME_STATE_DETAIL *detail;
809    const char *cp;
810    char   *dst;
811
812    /*
813     * Reject unwanted characters.
814     *
815     * XXX Possible optimization: simplify the loop when the "reject" set
816     * contains only one character.
817     */
818    if ((state->flags & CLEANUP_FLAG_FILTER) && cleanup_reject_chars) {
819	for (cp = buf; cp < buf + len; cp++) {
820	    if (memchr(vstring_str(cleanup_reject_chars),
821		       *(const unsigned char *) cp,
822		       VSTRING_LEN(cleanup_reject_chars))) {
823		cleanup_act(state, CLEANUP_ACT_CTXT_ANY,
824			    buf, "REJECT disallowed character",
825			    "character reject");
826		return;
827	    }
828	}
829    }
830
831    /*
832     * Strip unwanted characters. Don't overwrite the input.
833     *
834     * XXX Possible space+time optimization: use a bitset.
835     *
836     * XXX Possible optimization: simplify the loop when the "strip" set
837     * contains only one character.
838     *
839     * XXX Possible optimization: copy the input only if we really have to.
840     */
841    if ((state->flags & CLEANUP_FLAG_FILTER) && cleanup_strip_chars) {
842	VSTRING_RESET(state->stripped_buf);
843	VSTRING_SPACE(state->stripped_buf, len + 1);
844	dst = vstring_str(state->stripped_buf);
845	for (cp = buf; cp < buf + len; cp++)
846	    if (!memchr(vstring_str(cleanup_strip_chars),
847			*(const unsigned char *) cp,
848			VSTRING_LEN(cleanup_strip_chars)))
849		*dst++ = *cp;
850	*dst = 0;
851	buf = vstring_str(state->stripped_buf);
852	len = dst - buf;
853    }
854
855    /*
856     * Copy text record to the output.
857     */
858    if (type == REC_TYPE_NORM || type == REC_TYPE_CONT) {
859	state->mime_errs = mime_state_update(state->mime_state, type, buf, len);
860    }
861
862    /*
863     * If we have reached the end of the message content segment, record the
864     * current file position so we can compute the message size lateron.
865     */
866    else if (type == REC_TYPE_XTRA) {
867	state->mime_errs = mime_state_update(state->mime_state, type, buf, len);
868	if (state->milters || cleanup_milters)
869	    /* Make room for body modification. */
870	    cleanup_out_format(state, REC_TYPE_PTR, REC_TYPE_PTR_FORMAT, 0L);
871	/* Ignore header truncation after primary message headers. */
872	state->mime_errs &= ~MIME_ERR_TRUNC_HEADER;
873	if (state->mime_errs && state->reason == 0) {
874	    state->errs |= CLEANUP_STAT_CONT;
875	    detail = mime_state_detail(state->mime_errs);
876	    state->reason = dsn_prepend(detail->dsn, detail->text);
877	}
878	state->mime_state = mime_state_free(state->mime_state);
879	if ((state->xtra_offset = vstream_ftell(state->dst)) < 0)
880	    msg_fatal("%s: vstream_ftell %s: %m", myname, cleanup_path);
881	state->cont_length = state->xtra_offset - state->data_offset;
882	state->action = cleanup_extracted;
883    }
884
885    /*
886     * This should never happen.
887     */
888    else {
889	msg_warn("%s: message rejected: "
890	      "unexpected record type %d in message content", myname, type);
891	state->errs |= CLEANUP_STAT_BAD;
892    }
893}
894
895/* cleanup_mime_error_callback - error report call-back routine */
896
897static void cleanup_mime_error_callback(void *context, int err_code,
898				              const char *text, ssize_t len)
899{
900    CLEANUP_STATE *state = (CLEANUP_STATE *) context;
901    const char *origin;
902
903    /*
904     * Message header too large errors are handled after the end of the
905     * primary message headers.
906     */
907    if ((err_code & ~MIME_ERR_TRUNC_HEADER) != 0) {
908	if ((origin = nvtable_find(state->attr, MAIL_ATTR_LOG_ORIGIN)) == 0)
909	    origin = MAIL_ATTR_ORG_NONE;
910#define TEXT_LEN (len < 100 ? (int) len : 100)
911	msg_info("%s: reject: mime-error %s: %.*s from %s; from=<%s> to=<%s>",
912		 state->queue_id, mime_state_error(err_code), TEXT_LEN, text,
913	    origin, state->sender, state->recip ? state->recip : "unknown");
914    }
915}
916
917/* cleanup_message - initialize message content segment */
918
919void    cleanup_message(CLEANUP_STATE *state, int type, const char *buf, ssize_t len)
920{
921    const char *myname = "cleanup_message";
922    int     mime_options;
923
924    /*
925     * Write the start-of-content segment marker.
926     */
927    cleanup_out_string(state, REC_TYPE_MESG, "");
928    if ((state->data_offset = vstream_ftell(state->dst)) < 0)
929	msg_fatal("%s: vstream_ftell %s: %m", myname, cleanup_path);
930
931    /*
932     * Set up MIME processing options, if any. MIME_OPT_DISABLE_MIME disables
933     * special processing of Content-Type: headers, and thus, causes all text
934     * after the primary headers to be treated as the message body.
935     */
936    mime_options = 0;
937    if (var_disable_mime_input) {
938	mime_options |= MIME_OPT_DISABLE_MIME;
939    } else {
940	/* Turn off content checks if bouncing or forwarding mail. */
941	if (state->flags & CLEANUP_FLAG_FILTER) {
942	    if (var_strict_8bitmime || var_strict_7bit_hdrs)
943		mime_options |= MIME_OPT_REPORT_8BIT_IN_HEADER;
944	    if (var_strict_8bitmime || var_strict_8bit_body)
945		mime_options |= MIME_OPT_REPORT_8BIT_IN_7BIT_BODY;
946	    if (var_strict_encoding)
947		mime_options |= MIME_OPT_REPORT_ENCODING_DOMAIN;
948	    if (var_strict_8bitmime || var_strict_7bit_hdrs
949		|| var_strict_8bit_body || var_strict_encoding
950		|| *var_header_checks || *var_mimehdr_checks
951		|| *var_nesthdr_checks)
952		mime_options |= MIME_OPT_REPORT_NESTING;
953	}
954    }
955    state->mime_state = mime_state_alloc(mime_options,
956					 cleanup_header_callback,
957					 cleanup_header_done_callback,
958					 cleanup_body_callback,
959					 (MIME_STATE_ANY_END) 0,
960					 cleanup_mime_error_callback,
961					 (void *) state);
962
963    /*
964     * XXX Workaround: truncate a long message header so that we don't exceed
965     * the default Sendmail libmilter request size limit of 65535.
966     */
967#define KLUDGE_HEADER_LIMIT	60000
968    if ((cleanup_milters || state->milters)
969	&& var_header_limit > KLUDGE_HEADER_LIMIT)
970	var_header_limit = KLUDGE_HEADER_LIMIT;
971
972    /*
973     * Pass control to the header processing routine.
974     */
975    state->action = cleanup_message_headerbody;
976    cleanup_message_headerbody(state, type, buf, len);
977}
978