1/*
2 * Copyright 2005-2009, Haiku Inc.
3 * This file may be used under the terms of the MIT License.
4 *
5 * Originally public domain written by Alexander G. M. Smith.
6 */
7
8
9/*!	BeMailToMBox is a utility program (requested by Frank Zschockelt) that
10	converts BeOS e-mail files into Unix mailbox files (the kind that Pine
11	uses).  All the files in the input directory are concatenated with the
12	appropriate mbox header lines added between them, and trailing blank lines
13	reduced.  The resulting text is written to standard output.  Command line
14	driven.
15*/
16
17#include <ctype.h>
18#include <errno.h>
19#include <string.h>
20#include <stdio.h>
21#include <time.h>
22
23#include <Application.h>
24#include <StorageKit.h>
25#include <SupportKit.h>
26
27
28extern const char* __progname;
29static const char* kProgramName = __progname;
30
31time_t gDateStampTime;
32	// Time value used for stamping each message header. Incremented by 1 second
33	// for each message, starts out with the current local time.
34
35
36/*!	Global utility function to display an error message and return.  The message
37	part describes the error, and if errorNumber is non-zero, gets the string
38	", error code $X (standard description)." appended to it.  If the message
39	is NULL then it gets defaulted to "Something went wrong".
40*/
41static void
42DisplayErrorMessage(const char* messageString = NULL, status_t errorNumber = 0,
43	const char* titleString = NULL)
44{
45	char errorBuffer[2048];
46
47	if (titleString == NULL)
48		titleString = "Error Message:";
49
50	if (messageString == NULL) {
51		if (errorNumber == B_OK)
52			messageString = "No error, no message, why bother?";
53		else
54			messageString = "Error";
55	}
56
57	if (errorNumber != 0) {
58		snprintf(errorBuffer, sizeof(errorBuffer), "%s: %s (%" B_PRIx32 ")"
59			"has occured.", messageString, strerror(errorNumber), errorNumber);
60		messageString = errorBuffer;
61	}
62
63	fputs(titleString, stderr);
64	fputc('\n', stderr);
65	fputs(messageString, stderr);
66	fputc('\n', stderr);
67}
68
69
70/*!	Determine if a line of text is the start of another message.  Pine mailbox
71	files have messages that start with a line that could say something like
72	"From agmsmith@achilles.net Fri Oct 31 21:19:36 EST 1997" or maybe something
73	like "From POPmail Mon Oct 20 21:12:36 1997" or in a more modern format,
74	"From agmsmith@achilles.net Tue Sep 4 09:04:11 2001 -0400".  I generalise it
75	to "From blah Day MMM NN XX:XX:XX TZONE1 YYYY TZONE2".  Blah is an e-mail
76	address you can ignore (just treat it as a word separated by spaces).  Day
77	is a 3 letter day of the week.  MMM is a 3 letter month name.  NN is the two
78	digit day of the week, has a leading space if the day is less than 10.
79	XX:XX:XX is the time, the X's are digits.  TZONE1 is the old style optional
80	time zone of 3 capital letters.  YYYY is the four digit year.  TZONE2 is the
81	optional modern time zone info, a plus or minus sign and 4 digits.  Returns
82	true if the line of text (ended with a NUL byte, no line feed or carriage
83	returns at the end) is the start of a message.
84*/
85bool
86IsStartOfMailMessage(char* lineString)
87{
88	// It starts with "From "
89	if (memcmp("From ", lineString, 5) != 0)
90		return false;
91
92	char* string = lineString + 4;
93	while (*string == ' ')
94		string++;
95
96	// Skip over the e-mail address (or stop at the end of string).
97
98	while (*string != ' ' && *string != 0)
99		string++;
100	while (*string == ' ')
101		string++;
102
103	// TODO: improve this!!!
104
105	// Look for the 3 letter day of the week.
106	if (memcmp(string, "Mon", 3) != 0 && memcmp(string, "Tue", 3) != 0
107		&& memcmp(string, "Wed", 3) != 0 && memcmp(string, "Thu", 3) != 0
108		&& memcmp(string, "Fri", 3) != 0 && memcmp(string, "Sat", 3) != 0
109		&& memcmp(string, "Sun", 3) != 0) {
110		fprintf(stderr, "False alarm, not a valid day of the week in \"%s\""
111			".\n", lineString);
112		return false;
113	}
114
115	string += 3;
116	while (*string == ' ')
117		string++;
118
119	// Look for the 3 letter month code.
120	if (memcmp(string, "Jan", 3) != 0 && memcmp(string, "Feb", 3) != 0
121		&& memcmp(string, "Mar", 3) != 0 && memcmp(string, "Apr", 3) != 0
122		&& memcmp(string, "May", 3) != 0 && memcmp(string, "Jun", 3) != 0
123		&& memcmp(string, "Jul", 3) != 0 && memcmp(string, "Aug", 3) != 0
124		&& memcmp(string, "Sep", 3) != 0 && memcmp(string, "Oct", 3) != 0
125		&& memcmp(string, "Nov", 3) != 0 && memcmp(string, "Dec", 3) != 0) {
126		fprintf(stderr, "False alarm, not a valid month name in \"%s\".\n",
127			lineString);
128		return false;
129	}
130
131	string += 3;
132	while (*string == ' ')
133		string++;
134
135	// Skip the day of the month.  Require at least one digit.
136	if (*string < '0' || *string > '9') {
137		fprintf(stderr, "False alarm, not a valid day of the "
138			"month number in \"%s\".\n", lineString);
139		return false;
140	}
141
142	while (*string >= '0' && *string <= '9')
143		string++;
144	while (*string == ' ')
145		string++;
146
147	// Check the time.  Look for the sequence
148	// digit-digit-colon-digit-digit-colon-digit-digit.
149
150	if (string[0] < '0' || string[0] > '9'
151		|| string[1] < '0' || string[1] > '9'
152		|| string[2] != ':'
153		|| string[3] < '0' || string[3] > '9'
154		|| string[4] < '0' || string[4] > '9'
155		|| string[5] != ':'
156		|| string[6] < '0' || string[6] > '9'
157		|| string[7] < '0' || string[7] > '9') {
158		fprintf(stderr, "False alarm, not a valid time value in \"%s\".\n",
159			lineString);
160		return false;
161	}
162
163	string += 8;
164	while (*string == ' ')
165		string++;
166
167	// Look for the optional antique 3 capital letter time zone and skip it.
168	if (string[0] >= 'A' && string[0] <= 'Z'
169		&& string[1] >= 'A' && string[1] <= 'Z'
170		&& string[2] >= 'A' && string[2] <= 'Z') {
171		string += 3;
172		while (*string == ' ')
173			string++;
174	}
175
176	// Look for the 4 digit year.
177	if (string[0] < '0' || string[0] > '9'
178		|| string[1] < '0' || string[1] > '9'
179		|| string[2] < '0' || string[2] > '9'
180		|| string[3] < '0' || string[3] > '9') {
181		fprintf(stderr, "False alarm, not a valid 4 digit year in \"%s\".\n",
182			lineString);
183		return false;
184	}
185
186	string += 4;
187	while (*string == ' ')
188		string++;
189
190	// Look for the optional modern time zone and skip over it if present.
191	if ((string[0] == '+' || string[0] == '-')
192		&& string[1] >= '0' && string[1] <= '9'
193		&& string[2] >= '0' && string[2] <= '9'
194		&& string[3] >= '0' && string[3] <= '9'
195		&& string[4] >= '0' && string[4] <= '9') {
196		string += 5;
197		while (*string == ' ')
198			string++;
199	}
200
201	// Look for end of string.
202	if (*string != 0) {
203		fprintf(stderr, "False alarm, extra stuff after the "
204			"year/time zone in \"%s\".\n", lineString);
205		return false;
206	}
207
208	return true;
209}
210
211
212/*!	Read the input file, convert it to mbox format, and write it to standard
213	output.  Returns zero if successful, a negative error code if an error
214	occured.
215*/
216status_t
217ProcessMessageFile(char* fileName)
218{
219	fprintf(stdout, "Now processing: \"%s\"\n", fileName);
220
221	FILE* inputFile = fopen(fileName, "rb");
222	if (inputFile == NULL) {
223		DisplayErrorMessage("Unable to open file", errno);
224		return errno;
225	}
226
227	// Extract a text message from the Mail file.
228
229	BString messageText;
230	int lineNumber = 0;
231
232	while (!feof(inputFile)) {
233		// First read in one line of text.
234		char line[102400];
235		if (fgets(line, sizeof(line), inputFile) == NULL) {
236			if (ferror(inputFile)) {
237				char errorString[2048];
238				snprintf(errorString, sizeof(errorString),
239					"Error while reading from \"%s\"", fileName);
240				DisplayErrorMessage(errorString, errno);
241				fclose(inputFile);
242				return errno;
243			}
244			break;
245				// No error, just end of file.
246		}
247
248		// Remove any trailing control characters (line feed usually, or CRLF).
249		// Might also nuke trailing tabs too. Doesn't usually matter. The main
250		// thing is to allow input files with both LF and CRLF endings (and
251		// even CR endings if you come from the Macintosh world).
252
253		char* string = line + strlen(line) - 1;
254		while (string >= line && *string < 32)
255			string--;
256		*(++string) = 0;
257
258		if (lineNumber == 0 && line[0] == 0) {
259			// Skip leading blank lines.
260			continue;
261		}
262		lineNumber++;
263
264		// Prepend the new mbox message header, if the first line of the message
265		// doesn't already have one.
266		if (lineNumber == 1 && !IsStartOfMailMessage(line)) {
267			time_t timestamp = gDateStampTime++;
268			messageText.Append("From baron@be.com ");
269			messageText.Append(ctime(&timestamp));
270		}
271
272		// Append the line to the current message text.
273		messageText.Append(line);
274		messageText.Append("\n");
275	}
276
277	// Remove blank lines from the end of the message (a pet peeve of mine), but
278	// end the message with two new lines to separate it from the next message.
279	int i = messageText.Length();
280	while (i > 0 && (messageText[i - 1] == '\n' || messageText[i - 1] == '\r'))
281		i--;
282	messageText.Truncate(i);
283	messageText.Append("\n\n");
284
285	// Write the message out.
286
287	status_t status = B_OK;
288
289	if (puts(messageText.String()) < 0) {
290		DisplayErrorMessage ("Error while writing the message", errno);
291		status = errno;
292	}
293
294	fclose(inputFile);
295	return status;
296}
297
298
299int
300main(int argc, char** argv)
301{
302	BApplication app("application/x-vnd.Haiku-mail2mbox");
303
304	if (argc <= 1 || argc >= 3) {
305		printf("%s is a utility for converting Mail e-mail\n", argv[0]);
306		printf("files to Unix Pine style e-mail files.  It could well\n");
307		printf("work with other Unix style mailbox files.  Each message in\n");
308		printf("the input directory is converted and sent to the standard\n");
309		printf("output.  Usage:\n\n");
310		printf("%s InputDirectory >OutputFile\n\n", kProgramName);
311		printf("Public domain, by Alexander G. M. Smith.\n");
312		return -10;
313	}
314
315	// Set the date stamp to the current time.
316	gDateStampTime = time (NULL);
317
318	// Try to open the input directory.
319	char inputPathName[B_PATH_NAME_LENGTH];
320	strlcpy(inputPathName, argv[1], sizeof(inputPathName) - 2);
321
322	char tempString[2048];
323
324	DIR* dir = opendir(inputPathName);
325	if (dir == NULL) {
326		sprintf(tempString, "Problems opening directory named \"%s\".",
327			inputPathName);
328		DisplayErrorMessage(tempString, errno);
329		return 1;
330	}
331
332	// Append a trailing slash to the directory name, if it needs one.
333	if (inputPathName[strlen(inputPathName) - 1] != '/')
334		strcat(inputPathName, "/");
335
336	int messagesDoneCount = 0;
337	status_t status = B_OK;
338
339	while (dirent_t* entry = readdir(dir)) {
340		// skip '.' and '..'
341		if (!strcmp(entry->d_name, ".") || !strcmp(entry->d_name, ".."))
342			break;
343
344		strlcpy(tempString, inputPathName, sizeof(tempString));
345		strlcat(tempString, entry->d_name, sizeof(tempString));
346
347		status = ProcessMessageFile(tempString);
348		if (status != B_OK)
349			break;
350
351		messagesDoneCount++;
352	}
353
354	closedir(dir);
355
356	if (status != B_OK) {
357		DisplayErrorMessage("Stopping early because an error occured", status);
358		return status;
359	}
360
361	fprintf(stderr, "Did %d messages successfully.\n", messagesDoneCount);
362	return 0;
363}
364