collect.c revision 43148
1/*
2 * Copyright (c) 1998 Sendmail, Inc.  All rights reserved.
3 * Copyright (c) 1983, 1995-1997 Eric P. Allman.  All rights reserved.
4 * Copyright (c) 1988, 1993
5 *	The Regents of the University of California.  All rights reserved.
6 *
7 * By using this file, you agree to the terms and conditions set
8 * forth in the LICENSE file which can be found at the top level of
9 * the sendmail distribution.
10 *
11 */
12
13#ifndef lint
14static char sccsid[] = "@(#)collect.c	8.91 (Berkeley) 8/19/1998";
15#endif /* not lint */
16
17# include <errno.h>
18# include "sendmail.h"
19
20/*
21**  COLLECT -- read & parse message header & make temp file.
22**
23**	Creates a temporary file name and copies the standard
24**	input to that file.  Leading UNIX-style "From" lines are
25**	stripped off (after important information is extracted).
26**
27**	Parameters:
28**		fp -- file to read.
29**		smtpmode -- if set, we are running SMTP: give an RFC821
30**			style message to say we are ready to collect
31**			input, and never ignore a single dot to mean
32**			end of message.
33**		hdrp -- the location to stash the header.
34**		e -- the current envelope.
35**
36**	Returns:
37**		none.
38**
39**	Side Effects:
40**		Temp file is created and filled.
41**		The from person may be set.
42*/
43
44static jmp_buf	CtxCollectTimeout;
45static void	collecttimeout __P((time_t));
46static bool	CollectProgress;
47static EVENT	*CollectTimeout;
48
49/* values for input state machine */
50#define IS_NORM		0	/* middle of line */
51#define IS_BOL		1	/* beginning of line */
52#define IS_DOT		2	/* read a dot at beginning of line */
53#define IS_DOTCR	3	/* read ".\r" at beginning of line */
54#define IS_CR		4	/* read a carriage return */
55
56/* values for message state machine */
57#define MS_UFROM	0	/* reading Unix from line */
58#define MS_HEADER	1	/* reading message header */
59#define MS_BODY		2	/* reading message body */
60#define MS_DISCARD	3	/* discarding rest of message */
61
62void
63collect(fp, smtpmode, hdrp, e)
64	FILE *fp;
65	bool smtpmode;
66	HDR **hdrp;
67	register ENVELOPE *e;
68{
69	register FILE *volatile tf;
70	volatile bool ignrdot = smtpmode ? FALSE : IgnrDot;
71	volatile time_t dbto = smtpmode ? TimeOuts.to_datablock : 0;
72	register char *volatile bp;
73	volatile int c = EOF;
74	volatile bool inputerr = FALSE;
75	bool headeronly;
76	char *volatile buf;
77	volatile int buflen;
78	volatile int istate;
79	volatile int mstate;
80	u_char *volatile pbp;
81	int nhdrlines = 0;
82	int hdrlinelen = 0;
83	u_char peekbuf[8];
84	char dfname[MAXQFNAME];
85	char bufbuf[MAXLINE];
86	extern bool isheader __P((char *));
87	extern void tferror __P((FILE *volatile, ENVELOPE *));
88
89	headeronly = hdrp != NULL;
90
91	/*
92	**  Create the temp file name and create the file.
93	*/
94
95	if (!headeronly)
96	{
97		int tfd;
98		struct stat stbuf;
99
100		strcpy(dfname, queuename(e, 'd'));
101		tfd = dfopen(dfname, O_WRONLY|O_CREAT|O_TRUNC, FileMode, SFF_ANYFILE);
102		if (tfd < 0 || (tf = fdopen(tfd, "w")) == NULL)
103		{
104			syserr("Cannot create %s", dfname);
105			e->e_flags |= EF_NO_BODY_RETN;
106			finis(TRUE, ExitStat);
107		}
108		if (fstat(fileno(tf), &stbuf) < 0)
109			e->e_dfino = -1;
110		else
111		{
112			e->e_dfdev = stbuf.st_dev;
113			e->e_dfino = stbuf.st_ino;
114		}
115		HasEightBits = FALSE;
116		e->e_msgsize = 0;
117		e->e_flags |= EF_HAS_DF;
118	}
119
120	/*
121	**  Tell ARPANET to go ahead.
122	*/
123
124	if (smtpmode)
125		message("354 Enter mail, end with \".\" on a line by itself");
126
127	if (tTd(30, 2))
128		printf("collect\n");
129
130	/*
131	**  Read the message.
132	**
133	**	This is done using two interleaved state machines.
134	**	The input state machine is looking for things like
135	**	hidden dots; the message state machine is handling
136	**	the larger picture (e.g., header versus body).
137	*/
138
139	buf = bp = bufbuf;
140	buflen = sizeof bufbuf;
141	pbp = peekbuf;
142	istate = IS_BOL;
143	mstate = SaveFrom ? MS_HEADER : MS_UFROM;
144	CollectProgress = FALSE;
145
146	if (dbto != 0)
147	{
148		/* handle possible input timeout */
149		if (setjmp(CtxCollectTimeout) != 0)
150		{
151			if (LogLevel > 2)
152				sm_syslog(LOG_NOTICE, e->e_id,
153				    "timeout waiting for input from %s during message collect",
154				    CurHostName ? CurHostName : "<local machine>");
155			errno = 0;
156			usrerr("451 timeout waiting for input during message collect");
157			goto readerr;
158		}
159		CollectTimeout = setevent(dbto, collecttimeout, dbto);
160	}
161
162	for (;;)
163	{
164		if (tTd(30, 35))
165			printf("top, istate=%d, mstate=%d\n", istate, mstate);
166		for (;;)
167		{
168			if (pbp > peekbuf)
169				c = *--pbp;
170			else
171			{
172				while (!feof(fp) && !ferror(fp))
173				{
174					errno = 0;
175					c = getc(fp);
176					if (errno != EINTR)
177						break;
178					clearerr(fp);
179				}
180				CollectProgress = TRUE;
181				if (TrafficLogFile != NULL && !headeronly)
182				{
183					if (istate == IS_BOL)
184						fprintf(TrafficLogFile, "%05d <<< ",
185							(int) getpid());
186					if (c == EOF)
187						fprintf(TrafficLogFile, "[EOF]\n");
188					else
189						putc(c, TrafficLogFile);
190				}
191				if (c == EOF)
192					goto readerr;
193				if (SevenBitInput)
194					c &= 0x7f;
195				else
196					HasEightBits |= bitset(0x80, c);
197			}
198			if (tTd(30, 94))
199				printf("istate=%d, c=%c (0x%x)\n",
200					istate, c, c);
201			switch (istate)
202			{
203			  case IS_BOL:
204				hdrlinelen = 0;
205				if (c == '.')
206				{
207					istate = IS_DOT;
208					continue;
209				}
210				break;
211
212			  case IS_DOT:
213				if (c == '\n' && !ignrdot &&
214				    !bitset(EF_NL_NOT_EOL, e->e_flags))
215					goto readerr;
216				else if (c == '\r' &&
217					 !bitset(EF_CRLF_NOT_EOL, e->e_flags))
218				{
219					istate = IS_DOTCR;
220					continue;
221				}
222				else if (c != '.' ||
223					 (OpMode != MD_SMTP &&
224					  OpMode != MD_DAEMON &&
225					  OpMode != MD_ARPAFTP))
226				{
227					*pbp++ = c;
228					c = '.';
229				}
230				break;
231
232			  case IS_DOTCR:
233				if (c == '\n' && !ignrdot)
234					goto readerr;
235				else
236				{
237					/* push back the ".\rx" */
238					*pbp++ = c;
239					*pbp++ = '\r';
240					c = '.';
241				}
242				break;
243
244			  case IS_CR:
245				if (c == '\n')
246					istate = IS_BOL;
247				else
248				{
249					ungetc(c, fp);
250					c = '\r';
251					istate = IS_NORM;
252				}
253				goto bufferchar;
254			}
255
256			if (c == '\r' && !bitset(EF_CRLF_NOT_EOL, e->e_flags))
257			{
258				istate = IS_CR;
259				continue;
260			}
261			else if (c == '\n' && !bitset(EF_NL_NOT_EOL, e->e_flags))
262				istate = IS_BOL;
263			else
264				istate = IS_NORM;
265
266bufferchar:
267			if (!headeronly)
268				e->e_msgsize++;
269			switch (mstate)
270			{
271			  case MS_BODY:
272				/* just put the character out */
273				if (MaxMessageSize <= 0 ||
274				    e->e_msgsize <= MaxMessageSize)
275					putc(c, tf);
276
277				/* fall through */
278
279			  case MS_DISCARD:
280				continue;
281			}
282
283			/* header -- buffer up */
284			if (bp >= &buf[buflen - 2])
285			{
286				char *obuf;
287
288				if (mstate != MS_HEADER)
289					break;
290
291				/* out of space for header */
292				obuf = buf;
293				if (buflen < MEMCHUNKSIZE)
294					buflen *= 2;
295				else
296					buflen += MEMCHUNKSIZE;
297				buf = xalloc(buflen);
298				bcopy(obuf, buf, bp - obuf);
299				bp = &buf[bp - obuf];
300				if (obuf != bufbuf)
301					free(obuf);
302			}
303			if (c >= 0200 && c <= 0237)
304			{
305#if 0	/* causes complaints -- figure out something for 8.9 */
306				usrerr("Illegal character 0x%x in header", c);
307#endif
308			}
309			else if (c != '\0')
310			{
311				*bp++ = c;
312				if (MaxHeaderLineLength > 0 &&
313				    ++hdrlinelen > MaxHeaderLineLength)
314				{
315					sm_syslog(LOG_NOTICE, e->e_id,
316						  "header line too long (%d max) from %s during message collect",
317						  MaxHeaderLineLength,
318						  CurHostName != NULL ? CurHostName : "localhost");
319					errno = 0;
320					e->e_flags |= EF_CLRQUEUE;
321					e->e_status = "5.6.0";
322					usrerr("552 Header line too long (%d max)",
323						MaxHeaderLineLength);
324					mstate = MS_DISCARD;
325				}
326			}
327			if (istate == IS_BOL)
328				break;
329		}
330		*bp = '\0';
331
332nextstate:
333		if (tTd(30, 35))
334			printf("nextstate, istate=%d, mstate=%d, line = \"%s\"\n",
335				istate, mstate, buf);
336		switch (mstate)
337		{
338		  case MS_UFROM:
339			mstate = MS_HEADER;
340#ifndef NOTUNIX
341			if (strncmp(buf, "From ", 5) == 0)
342			{
343				extern void eatfrom __P((char *volatile, ENVELOPE *));
344
345				bp = buf;
346				eatfrom(buf, e);
347				continue;
348			}
349#endif
350			/* fall through */
351
352		  case MS_HEADER:
353			if (!isheader(buf))
354			{
355				mstate = MS_BODY;
356				goto nextstate;
357			}
358
359			if (MaxHeaderLines > 0 &&
360			    ++nhdrlines > MaxHeaderLines)
361			{
362				sm_syslog(LOG_NOTICE, e->e_id,
363					  "too many header lines (%d max) from %s during message collect",
364					  MaxHeaderLines,
365					  CurHostName != NULL ? CurHostName : "localhost");
366				errno = 0;
367				e->e_flags |= EF_CLRQUEUE;
368				e->e_status = "5.6.0";
369				usrerr("552 Too many header lines (%d max)",
370					MaxHeaderLines);
371				mstate = MS_DISCARD;
372				break;
373			}
374
375			/* check for possible continuation line */
376			do
377			{
378				clearerr(fp);
379				errno = 0;
380				c = getc(fp);
381			} while (errno == EINTR);
382			if (c != EOF)
383				ungetc(c, fp);
384			if (c == ' ' || c == '\t')
385			{
386				/* yep -- defer this */
387				continue;
388			}
389
390			/* trim off trailing CRLF or NL */
391			if (*--bp != '\n' || *--bp != '\r')
392				bp++;
393			*bp = '\0';
394
395			if (bitset(H_EOH, chompheader(buf, FALSE, hdrp, e)))
396			{
397				mstate = MS_BODY;
398				goto nextstate;
399			}
400			break;
401
402		  case MS_BODY:
403			if (tTd(30, 1))
404				printf("EOH\n");
405			if (headeronly)
406				goto readerr;
407			bp = buf;
408
409			/* toss blank line */
410			if ((!bitset(EF_CRLF_NOT_EOL, e->e_flags) &&
411				bp[0] == '\r' && bp[1] == '\n') ||
412			    (!bitset(EF_NL_NOT_EOL, e->e_flags) &&
413				bp[0] == '\n'))
414			{
415				break;
416			}
417
418			/* if not a blank separator, write it out */
419			if (MaxMessageSize <= 0 ||
420			    e->e_msgsize <= MaxMessageSize)
421			{
422				while (*bp != '\0')
423					putc(*bp++, tf);
424			}
425			break;
426		}
427		bp = buf;
428	}
429
430readerr:
431	if ((feof(fp) && smtpmode) || ferror(fp))
432	{
433		const char *errmsg = errstring(errno);
434
435		if (tTd(30, 1))
436			printf("collect: premature EOM: %s\n", errmsg);
437		if (LogLevel >= 2)
438			sm_syslog(LOG_WARNING, e->e_id,
439				"collect: premature EOM: %s", errmsg);
440		inputerr = TRUE;
441	}
442
443	/* reset global timer */
444	clrevent(CollectTimeout);
445
446	if (headeronly)
447		return;
448
449	if (tf != NULL &&
450	    (fflush(tf) != 0 || ferror(tf) ||
451	     (SuperSafe && fsync(fileno(tf)) < 0) ||
452	     fclose(tf) < 0))
453	{
454		tferror(tf, e);
455		flush_errors(TRUE);
456		finis(TRUE, ExitStat);
457	}
458
459	/* An EOF when running SMTP is an error */
460	if (inputerr && (OpMode == MD_SMTP || OpMode == MD_DAEMON))
461	{
462		char *host;
463		char *problem;
464
465		host = RealHostName;
466		if (host == NULL)
467			host = "localhost";
468
469		if (feof(fp))
470			problem = "unexpected close";
471		else if (ferror(fp))
472			problem = "I/O error";
473		else
474			problem = "read timeout";
475		if (LogLevel > 0 && feof(fp))
476			sm_syslog(LOG_NOTICE, e->e_id,
477			    "collect: %s on connection from %.100s, sender=%s: %s",
478			    problem, host,
479			    shortenstring(e->e_from.q_paddr, MAXSHORTSTR),
480			    errstring(errno));
481		if (feof(fp))
482			usrerr("451 collect: %s on connection from %s, from=%s",
483				problem, host,
484				shortenstring(e->e_from.q_paddr, MAXSHORTSTR));
485		else
486			syserr("451 collect: %s on connection from %s, from=%s",
487				problem, host,
488				shortenstring(e->e_from.q_paddr, MAXSHORTSTR));
489
490		/* don't return an error indication */
491		e->e_to = NULL;
492		e->e_flags &= ~EF_FATALERRS;
493		e->e_flags |= EF_CLRQUEUE;
494
495		/* and don't try to deliver the partial message either */
496		if (InChild)
497			ExitStat = EX_QUIT;
498		finis(TRUE, ExitStat);
499	}
500
501	/*
502	**  Find out some information from the headers.
503	**	Examples are who is the from person & the date.
504	*/
505
506	eatheader(e, TRUE);
507
508	if (GrabTo && e->e_sendqueue == NULL)
509		usrerr("No recipient addresses found in header");
510
511	/* collect statistics */
512	if (OpMode != MD_VERIFY)
513		markstats(e, (ADDRESS *) NULL, FALSE);
514
515#if _FFR_DSN_RRT_OPTION
516	/*
517	**  If we have a Return-Receipt-To:, turn it into a DSN.
518	*/
519
520	if (RrtImpliesDsn && hvalue("return-receipt-to", e->e_header) != NULL)
521	{
522		ADDRESS *q;
523
524		for (q = e->e_sendqueue; q != NULL; q = q->q_next)
525			if (!bitset(QHASNOTIFY, q->q_flags))
526				q->q_flags |= QHASNOTIFY|QPINGONSUCCESS;
527	}
528#endif
529
530	/*
531	**  Add an Apparently-To: line if we have no recipient lines.
532	*/
533
534	if (hvalue("to", e->e_header) != NULL ||
535	    hvalue("cc", e->e_header) != NULL ||
536	    hvalue("apparently-to", e->e_header) != NULL)
537	{
538		/* have a valid recipient header -- delete Bcc: headers */
539		e->e_flags |= EF_DELETE_BCC;
540	}
541	else if (hvalue("bcc", e->e_header) == NULL)
542	{
543		/* no valid recipient headers */
544		register ADDRESS *q;
545		char *hdr = NULL;
546
547		/* create an Apparently-To: field */
548		/*    that or reject the message.... */
549		switch (NoRecipientAction)
550		{
551		  case NRA_ADD_APPARENTLY_TO:
552			hdr = "Apparently-To";
553			break;
554
555		  case NRA_ADD_TO:
556			hdr = "To";
557			break;
558
559		  case NRA_ADD_BCC:
560			addheader("Bcc", " ", &e->e_header);
561			break;
562
563		  case NRA_ADD_TO_UNDISCLOSED:
564			addheader("To", "undisclosed-recipients:;", &e->e_header);
565			break;
566		}
567
568		if (hdr != NULL)
569		{
570			for (q = e->e_sendqueue; q != NULL; q = q->q_next)
571			{
572				if (q->q_alias != NULL)
573					continue;
574				if (tTd(30, 3))
575					printf("Adding %s: %s\n",
576						hdr, q->q_paddr);
577				addheader(hdr, q->q_paddr, &e->e_header);
578			}
579		}
580	}
581
582	/* check for message too large */
583	if (MaxMessageSize > 0 && e->e_msgsize > MaxMessageSize)
584	{
585		e->e_flags |= EF_NO_BODY_RETN|EF_CLRQUEUE;
586		e->e_status = "5.2.3";
587		usrerr("552 Message exceeds maximum fixed size (%ld)",
588			MaxMessageSize);
589		if (LogLevel > 6)
590			sm_syslog(LOG_NOTICE, e->e_id,
591				"message size (%ld) exceeds maximum (%ld)",
592				e->e_msgsize, MaxMessageSize);
593	}
594
595	/* check for illegal 8-bit data */
596	if (HasEightBits)
597	{
598		e->e_flags |= EF_HAS8BIT;
599		if (!bitset(MM_PASS8BIT|MM_MIME8BIT, MimeMode) &&
600		    !bitset(EF_IS_MIME, e->e_flags))
601		{
602			e->e_status = "5.6.1";
603			usrerr("554 Eight bit data not allowed");
604		}
605	}
606	else
607	{
608		/* if it claimed to be 8 bits, well, it lied.... */
609		if (e->e_bodytype != NULL &&
610		    strcasecmp(e->e_bodytype, "8BITMIME") == 0)
611			e->e_bodytype = "7BIT";
612	}
613
614	if ((e->e_dfp = fopen(dfname, "r")) == NULL)
615	{
616		/* we haven't acked receipt yet, so just chuck this */
617		syserr("Cannot reopen %s", dfname);
618		finis(TRUE, ExitStat);
619	}
620}
621
622
623static void
624collecttimeout(timeout)
625	time_t timeout;
626{
627	/* if no progress was made, die now */
628	if (!CollectProgress)
629		longjmp(CtxCollectTimeout, 1);
630
631	/* otherwise reset the timeout */
632	CollectTimeout = setevent(timeout, collecttimeout, timeout);
633	CollectProgress = FALSE;
634}
635/*
636**  TFERROR -- signal error on writing the temporary file.
637**
638**	Parameters:
639**		tf -- the file pointer for the temporary file.
640**		e -- the current envelope.
641**
642**	Returns:
643**		none.
644**
645**	Side Effects:
646**		Gives an error message.
647**		Arranges for following output to go elsewhere.
648*/
649
650void
651tferror(tf, e)
652	FILE *volatile tf;
653	register ENVELOPE *e;
654{
655	setstat(EX_IOERR);
656	if (errno == ENOSPC)
657	{
658#if STAT64 > 0
659		struct stat64 st;
660#else
661		struct stat st;
662#endif
663		long avail;
664		long bsize;
665		extern long freediskspace __P((char *, long *));
666
667		e->e_flags |= EF_NO_BODY_RETN;
668
669		if (
670#if STAT64 > 0
671		    fstat64(fileno(tf), &st)
672#else
673		    fstat(fileno(tf), &st)
674#endif
675		    < 0)
676		  st.st_size = 0;
677		(void) freopen(queuename(e, 'd'), "w", tf);
678		if (st.st_size <= 0)
679			fprintf(tf, "\n*** Mail could not be accepted");
680		else if (sizeof st.st_size > sizeof (long))
681			fprintf(tf, "\n*** Mail of at least %s bytes could not be accepted\n",
682				quad_to_string(st.st_size));
683		else
684			fprintf(tf, "\n*** Mail of at least %lu bytes could not be accepted\n",
685				(unsigned long) st.st_size);
686		fprintf(tf, "*** at %s due to lack of disk space for temp file.\n",
687			MyHostName);
688		avail = freediskspace(QueueDir, &bsize);
689		if (avail > 0)
690		{
691			if (bsize > 1024)
692				avail *= bsize / 1024;
693			else if (bsize < 1024)
694				avail /= 1024 / bsize;
695			fprintf(tf, "*** Currently, %ld kilobytes are available for mail temp files.\n",
696				avail);
697		}
698		e->e_status = "4.3.1";
699		usrerr("452 Out of disk space for temp file");
700	}
701	else
702		syserr("collect: Cannot write tf%s", e->e_id);
703	if (freopen("/dev/null", "w", tf) == NULL)
704		sm_syslog(LOG_ERR, e->e_id,
705			  "tferror: freopen(\"/dev/null\") failed: %s",
706			  errstring(errno));
707}
708/*
709**  EATFROM -- chew up a UNIX style from line and process
710**
711**	This does indeed make some assumptions about the format
712**	of UNIX messages.
713**
714**	Parameters:
715**		fm -- the from line.
716**
717**	Returns:
718**		none.
719**
720**	Side Effects:
721**		extracts what information it can from the header,
722**		such as the date.
723*/
724
725# ifndef NOTUNIX
726
727char	*DowList[] =
728{
729	"Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat", NULL
730};
731
732char	*MonthList[] =
733{
734	"Jan", "Feb", "Mar", "Apr", "May", "Jun",
735	"Jul", "Aug", "Sep", "Oct", "Nov", "Dec",
736	NULL
737};
738
739void
740eatfrom(fm, e)
741	char *volatile fm;
742	register ENVELOPE *e;
743{
744	register char *p;
745	register char **dt;
746
747	if (tTd(30, 2))
748		printf("eatfrom(%s)\n", fm);
749
750	/* find the date part */
751	p = fm;
752	while (*p != '\0')
753	{
754		/* skip a word */
755		while (*p != '\0' && *p != ' ')
756			p++;
757		while (*p == ' ')
758			p++;
759		if (!(isascii(*p) && isupper(*p)) ||
760		    p[3] != ' ' || p[13] != ':' || p[16] != ':')
761			continue;
762
763		/* we have a possible date */
764		for (dt = DowList; *dt != NULL; dt++)
765			if (strncmp(*dt, p, 3) == 0)
766				break;
767		if (*dt == NULL)
768			continue;
769
770		for (dt = MonthList; *dt != NULL; dt++)
771			if (strncmp(*dt, &p[4], 3) == 0)
772				break;
773		if (*dt != NULL)
774			break;
775	}
776
777	if (*p != '\0')
778	{
779		char *q;
780
781		/* we have found a date */
782		q = xalloc(25);
783		(void) strncpy(q, p, 25);
784		q[24] = '\0';
785		q = arpadate(q);
786		define('a', newstr(q), e);
787	}
788}
789
790# endif /* NOTUNIX */
791