archive_getdate.c revision 318482
1/*
2 * This code is in the public domain and has no copyright.
3 *
4 * This is a plain C recursive-descent translation of an old
5 * public-domain YACC grammar that has been used for parsing dates in
6 * very many open-source projects.
7 *
8 * Since the original authors were generous enough to donate their
9 * work to the public domain, I feel compelled to match their
10 * generosity.
11 *
12 * Tim Kientzle, February 2009.
13 */
14
15/*
16 * Header comment from original getdate.y:
17 */
18
19/*
20**  Originally written by Steven M. Bellovin <smb@research.att.com> while
21**  at the University of North Carolina at Chapel Hill.  Later tweaked by
22**  a couple of people on Usenet.  Completely overhauled by Rich $alz
23**  <rsalz@bbn.com> and Jim Berets <jberets@bbn.com> in August, 1990;
24**
25**  This grammar has 10 shift/reduce conflicts.
26**
27**  This code is in the public domain and has no copyright.
28*/
29
30#ifdef __FreeBSD__
31#include <sys/cdefs.h>
32__FBSDID("$FreeBSD$");
33#endif
34
35#include <ctype.h>
36#include <stdio.h>
37#include <stdlib.h>
38#include <string.h>
39#include <time.h>
40
41#define __LIBARCHIVE_BUILD 1
42#include "archive_getdate.h"
43
44/* Basic time units. */
45#define	EPOCH		1970
46#define	MINUTE		(60L)
47#define	HOUR		(60L * MINUTE)
48#define	DAY		(24L * HOUR)
49
50/* Daylight-savings mode:  on, off, or not yet known. */
51enum DSTMODE { DSTon, DSToff, DSTmaybe };
52/* Meridian:  am or pm. */
53enum { tAM, tPM };
54/* Token types returned by nexttoken() */
55enum { tAGO = 260, tDAY, tDAYZONE, tAMPM, tMONTH, tMONTH_UNIT, tSEC_UNIT,
56       tUNUMBER, tZONE, tDST };
57struct token { int token; time_t value; };
58
59/*
60 * Parser state.
61 */
62struct gdstate {
63	struct token *tokenp; /* Pointer to next token. */
64	/* HaveXxxx counts how many of this kind of phrase we've seen;
65	 * it's a fatal error to have more than one time, zone, day,
66	 * or date phrase. */
67	int	HaveYear;
68	int	HaveMonth;
69	int	HaveDay;
70	int	HaveWeekDay; /* Day of week */
71	int	HaveTime; /* Hour/minute/second */
72	int	HaveZone; /* timezone and/or DST info */
73	int	HaveRel; /* time offset; we can have more than one */
74	/* Absolute time values. */
75	time_t	Timezone;  /* Seconds offset from GMT */
76	time_t	Day;
77	time_t	Hour;
78	time_t	Minutes;
79	time_t	Month;
80	time_t	Seconds;
81	time_t	Year;
82	/* DST selection */
83	enum DSTMODE	DSTmode;
84	/* Day of week accounting, e.g., "3rd Tuesday" */
85	time_t	DayOrdinal; /* "3" in "3rd Tuesday" */
86	time_t	DayNumber; /* "Tuesday" in "3rd Tuesday" */
87	/* Relative time values: hour/day/week offsets are measured in
88	 * seconds, month/year are counted in months. */
89	time_t	RelMonth;
90	time_t	RelSeconds;
91};
92
93/*
94 * A series of functions that recognize certain common time phrases.
95 * Each function returns 1 if it managed to make sense of some of the
96 * tokens, zero otherwise.
97 */
98
99/*
100 *  hour:minute or hour:minute:second with optional AM, PM, or numeric
101 *  timezone offset
102 */
103static int
104timephrase(struct gdstate *gds)
105{
106	if (gds->tokenp[0].token == tUNUMBER
107	    && gds->tokenp[1].token == ':'
108	    && gds->tokenp[2].token == tUNUMBER
109	    && gds->tokenp[3].token == ':'
110	    && gds->tokenp[4].token == tUNUMBER) {
111		/* "12:14:18" or "22:08:07" */
112		++gds->HaveTime;
113		gds->Hour = gds->tokenp[0].value;
114		gds->Minutes = gds->tokenp[2].value;
115		gds->Seconds = gds->tokenp[4].value;
116		gds->tokenp += 5;
117	}
118	else if (gds->tokenp[0].token == tUNUMBER
119	    && gds->tokenp[1].token == ':'
120	    && gds->tokenp[2].token == tUNUMBER) {
121		/* "12:14" or "22:08" */
122		++gds->HaveTime;
123		gds->Hour = gds->tokenp[0].value;
124		gds->Minutes = gds->tokenp[2].value;
125		gds->Seconds = 0;
126		gds->tokenp += 3;
127	}
128	else if (gds->tokenp[0].token == tUNUMBER
129	    && gds->tokenp[1].token == tAMPM) {
130		/* "7" is a time if it's followed by "am" or "pm" */
131		++gds->HaveTime;
132		gds->Hour = gds->tokenp[0].value;
133		gds->Minutes = gds->Seconds = 0;
134		/* We'll handle the AM/PM below. */
135		gds->tokenp += 1;
136	} else {
137		/* We can't handle this. */
138		return 0;
139	}
140
141	if (gds->tokenp[0].token == tAMPM) {
142		/* "7:12pm", "12:20:13am" */
143		if (gds->Hour == 12)
144			gds->Hour = 0;
145		if (gds->tokenp[0].value == tPM)
146			gds->Hour += 12;
147		gds->tokenp += 1;
148	}
149	if (gds->tokenp[0].token == '+'
150	    && gds->tokenp[1].token == tUNUMBER) {
151		/* "7:14+0700" */
152		gds->HaveZone++;
153		gds->DSTmode = DSToff;
154		gds->Timezone = - ((gds->tokenp[1].value / 100) * HOUR
155		    + (gds->tokenp[1].value % 100) * MINUTE);
156		gds->tokenp += 2;
157	}
158	if (gds->tokenp[0].token == '-'
159	    && gds->tokenp[1].token == tUNUMBER) {
160		/* "19:14:12-0530" */
161		gds->HaveZone++;
162		gds->DSTmode = DSToff;
163		gds->Timezone = + ((gds->tokenp[1].value / 100) * HOUR
164		    + (gds->tokenp[1].value % 100) * MINUTE);
165		gds->tokenp += 2;
166	}
167	return 1;
168}
169
170/*
171 * Timezone name, possibly including DST.
172 */
173static int
174zonephrase(struct gdstate *gds)
175{
176	if (gds->tokenp[0].token == tZONE
177	    && gds->tokenp[1].token == tDST) {
178		gds->HaveZone++;
179		gds->Timezone = gds->tokenp[0].value;
180		gds->DSTmode = DSTon;
181		gds->tokenp += 1;
182		return 1;
183	}
184
185	if (gds->tokenp[0].token == tZONE) {
186		gds->HaveZone++;
187		gds->Timezone = gds->tokenp[0].value;
188		gds->DSTmode = DSToff;
189		gds->tokenp += 1;
190		return 1;
191	}
192
193	if (gds->tokenp[0].token == tDAYZONE) {
194		gds->HaveZone++;
195		gds->Timezone = gds->tokenp[0].value;
196		gds->DSTmode = DSTon;
197		gds->tokenp += 1;
198		return 1;
199	}
200	return 0;
201}
202
203/*
204 * Year/month/day in various combinations.
205 */
206static int
207datephrase(struct gdstate *gds)
208{
209	if (gds->tokenp[0].token == tUNUMBER
210	    && gds->tokenp[1].token == '/'
211	    && gds->tokenp[2].token == tUNUMBER
212	    && gds->tokenp[3].token == '/'
213	    && gds->tokenp[4].token == tUNUMBER) {
214		gds->HaveYear++;
215		gds->HaveMonth++;
216		gds->HaveDay++;
217		if (gds->tokenp[0].value >= 13) {
218			/* First number is big:  2004/01/29, 99/02/17 */
219			gds->Year = gds->tokenp[0].value;
220			gds->Month = gds->tokenp[2].value;
221			gds->Day = gds->tokenp[4].value;
222		} else if ((gds->tokenp[4].value >= 13)
223		    || (gds->tokenp[2].value >= 13)) {
224			/* Last number is big:  01/07/98 */
225			/* Middle number is big:  01/29/04 */
226			gds->Month = gds->tokenp[0].value;
227			gds->Day = gds->tokenp[2].value;
228			gds->Year = gds->tokenp[4].value;
229		} else {
230			/* No significant clues: 02/03/04 */
231			gds->Month = gds->tokenp[0].value;
232			gds->Day = gds->tokenp[2].value;
233			gds->Year = gds->tokenp[4].value;
234		}
235		gds->tokenp += 5;
236		return 1;
237	}
238
239	if (gds->tokenp[0].token == tUNUMBER
240	    && gds->tokenp[1].token == '/'
241	    && gds->tokenp[2].token == tUNUMBER) {
242		/* "1/15" */
243		gds->HaveMonth++;
244		gds->HaveDay++;
245		gds->Month = gds->tokenp[0].value;
246		gds->Day = gds->tokenp[2].value;
247		gds->tokenp += 3;
248		return 1;
249	}
250
251	if (gds->tokenp[0].token == tUNUMBER
252	    && gds->tokenp[1].token == '-'
253	    && gds->tokenp[2].token == tUNUMBER
254	    && gds->tokenp[3].token == '-'
255	    && gds->tokenp[4].token == tUNUMBER) {
256		/* ISO 8601 format.  yyyy-mm-dd.  */
257		gds->HaveYear++;
258		gds->HaveMonth++;
259		gds->HaveDay++;
260		gds->Year = gds->tokenp[0].value;
261		gds->Month = gds->tokenp[2].value;
262		gds->Day = gds->tokenp[4].value;
263		gds->tokenp += 5;
264		return 1;
265	}
266
267	if (gds->tokenp[0].token == tUNUMBER
268	    && gds->tokenp[1].token == '-'
269	    && gds->tokenp[2].token == tMONTH
270	    && gds->tokenp[3].token == '-'
271	    && gds->tokenp[4].token == tUNUMBER) {
272		gds->HaveYear++;
273		gds->HaveMonth++;
274		gds->HaveDay++;
275		if (gds->tokenp[0].value > 31) {
276			/* e.g. 1992-Jun-17 */
277			gds->Year = gds->tokenp[0].value;
278			gds->Month = gds->tokenp[2].value;
279			gds->Day = gds->tokenp[4].value;
280		} else {
281			/* e.g. 17-JUN-1992.  */
282			gds->Day = gds->tokenp[0].value;
283			gds->Month = gds->tokenp[2].value;
284			gds->Year = gds->tokenp[4].value;
285		}
286		gds->tokenp += 5;
287		return 1;
288	}
289
290	if (gds->tokenp[0].token == tMONTH
291	    && gds->tokenp[1].token == tUNUMBER
292	    && gds->tokenp[2].token == ','
293	    && gds->tokenp[3].token == tUNUMBER) {
294		/* "June 17, 2001" */
295		gds->HaveYear++;
296		gds->HaveMonth++;
297		gds->HaveDay++;
298		gds->Month = gds->tokenp[0].value;
299		gds->Day = gds->tokenp[1].value;
300		gds->Year = gds->tokenp[3].value;
301		gds->tokenp += 4;
302		return 1;
303	}
304
305	if (gds->tokenp[0].token == tMONTH
306	    && gds->tokenp[1].token == tUNUMBER) {
307		/* "May 3" */
308		gds->HaveMonth++;
309		gds->HaveDay++;
310		gds->Month = gds->tokenp[0].value;
311		gds->Day = gds->tokenp[1].value;
312		gds->tokenp += 2;
313		return 1;
314	}
315
316	if (gds->tokenp[0].token == tUNUMBER
317	    && gds->tokenp[1].token == tMONTH
318	    && gds->tokenp[2].token == tUNUMBER) {
319		/* "12 Sept 1997" */
320		gds->HaveYear++;
321		gds->HaveMonth++;
322		gds->HaveDay++;
323		gds->Day = gds->tokenp[0].value;
324		gds->Month = gds->tokenp[1].value;
325		gds->Year = gds->tokenp[2].value;
326		gds->tokenp += 3;
327		return 1;
328	}
329
330	if (gds->tokenp[0].token == tUNUMBER
331	    && gds->tokenp[1].token == tMONTH) {
332		/* "12 Sept" */
333		gds->HaveMonth++;
334		gds->HaveDay++;
335		gds->Day = gds->tokenp[0].value;
336		gds->Month = gds->tokenp[1].value;
337		gds->tokenp += 2;
338		return 1;
339	}
340
341	return 0;
342}
343
344/*
345 * Relative time phrase: "tomorrow", "yesterday", "+1 hour", etc.
346 */
347static int
348relunitphrase(struct gdstate *gds)
349{
350	if (gds->tokenp[0].token == '-'
351	    && gds->tokenp[1].token == tUNUMBER
352	    && gds->tokenp[2].token == tSEC_UNIT) {
353		/* "-3 hours" */
354		gds->HaveRel++;
355		gds->RelSeconds -= gds->tokenp[1].value * gds->tokenp[2].value;
356		gds->tokenp += 3;
357		return 1;
358	}
359	if (gds->tokenp[0].token == '+'
360	    && gds->tokenp[1].token == tUNUMBER
361	    && gds->tokenp[2].token == tSEC_UNIT) {
362		/* "+1 minute" */
363		gds->HaveRel++;
364		gds->RelSeconds += gds->tokenp[1].value * gds->tokenp[2].value;
365		gds->tokenp += 3;
366		return 1;
367	}
368	if (gds->tokenp[0].token == tUNUMBER
369	    && gds->tokenp[1].token == tSEC_UNIT) {
370		/* "1 day" */
371		gds->HaveRel++;
372		gds->RelSeconds += gds->tokenp[0].value * gds->tokenp[1].value;
373		gds->tokenp += 2;
374		return 1;
375	}
376	if (gds->tokenp[0].token == '-'
377	    && gds->tokenp[1].token == tUNUMBER
378	    && gds->tokenp[2].token == tMONTH_UNIT) {
379		/* "-3 months" */
380		gds->HaveRel++;
381		gds->RelMonth -= gds->tokenp[1].value * gds->tokenp[2].value;
382		gds->tokenp += 3;
383		return 1;
384	}
385	if (gds->tokenp[0].token == '+'
386	    && gds->tokenp[1].token == tUNUMBER
387	    && gds->tokenp[2].token == tMONTH_UNIT) {
388		/* "+5 years" */
389		gds->HaveRel++;
390		gds->RelMonth += gds->tokenp[1].value * gds->tokenp[2].value;
391		gds->tokenp += 3;
392		return 1;
393	}
394	if (gds->tokenp[0].token == tUNUMBER
395	    && gds->tokenp[1].token == tMONTH_UNIT) {
396		/* "2 years" */
397		gds->HaveRel++;
398		gds->RelMonth += gds->tokenp[0].value * gds->tokenp[1].value;
399		gds->tokenp += 2;
400		return 1;
401	}
402	if (gds->tokenp[0].token == tSEC_UNIT) {
403		/* "now", "tomorrow" */
404		gds->HaveRel++;
405		gds->RelSeconds += gds->tokenp[0].value;
406		gds->tokenp += 1;
407		return 1;
408	}
409	if (gds->tokenp[0].token == tMONTH_UNIT) {
410		/* "month" */
411		gds->HaveRel++;
412		gds->RelMonth += gds->tokenp[0].value;
413		gds->tokenp += 1;
414		return 1;
415	}
416	return 0;
417}
418
419/*
420 * Day of the week specification.
421 */
422static int
423dayphrase(struct gdstate *gds)
424{
425	if (gds->tokenp[0].token == tDAY) {
426		/* "tues", "wednesday," */
427		gds->HaveWeekDay++;
428		gds->DayOrdinal = 1;
429		gds->DayNumber = gds->tokenp[0].value;
430		gds->tokenp += 1;
431		if (gds->tokenp[0].token == ',')
432			gds->tokenp += 1;
433		return 1;
434	}
435	if (gds->tokenp[0].token == tUNUMBER
436		&& gds->tokenp[1].token == tDAY) {
437		/* "second tues" "3 wed" */
438		gds->HaveWeekDay++;
439		gds->DayOrdinal = gds->tokenp[0].value;
440		gds->DayNumber = gds->tokenp[1].value;
441		gds->tokenp += 2;
442		return 1;
443	}
444	return 0;
445}
446
447/*
448 * Try to match a phrase using one of the above functions.
449 * This layer also deals with a couple of generic issues.
450 */
451static int
452phrase(struct gdstate *gds)
453{
454	if (timephrase(gds))
455		return 1;
456	if (zonephrase(gds))
457		return 1;
458	if (datephrase(gds))
459		return 1;
460	if (dayphrase(gds))
461		return 1;
462	if (relunitphrase(gds)) {
463		if (gds->tokenp[0].token == tAGO) {
464			gds->RelSeconds = -gds->RelSeconds;
465			gds->RelMonth = -gds->RelMonth;
466			gds->tokenp += 1;
467		}
468		return 1;
469	}
470
471	/* Bare numbers sometimes have meaning. */
472	if (gds->tokenp[0].token == tUNUMBER) {
473		if (gds->HaveTime && !gds->HaveYear && !gds->HaveRel) {
474			gds->HaveYear++;
475			gds->Year = gds->tokenp[0].value;
476			gds->tokenp += 1;
477			return 1;
478		}
479
480		if(gds->tokenp[0].value > 10000) {
481			/* "20040301" */
482			gds->HaveYear++;
483			gds->HaveMonth++;
484			gds->HaveDay++;
485			gds->Day= (gds->tokenp[0].value)%100;
486			gds->Month= (gds->tokenp[0].value/100)%100;
487			gds->Year = gds->tokenp[0].value/10000;
488			gds->tokenp += 1;
489			return 1;
490		}
491
492		if (gds->tokenp[0].value < 24) {
493			gds->HaveTime++;
494			gds->Hour = gds->tokenp[0].value;
495			gds->Minutes = 0;
496			gds->Seconds = 0;
497			gds->tokenp += 1;
498			return 1;
499		}
500
501		if ((gds->tokenp[0].value / 100 < 24)
502		    && (gds->tokenp[0].value % 100 < 60)) {
503			/* "513" is same as "5:13" */
504			gds->Hour = gds->tokenp[0].value / 100;
505			gds->Minutes = gds->tokenp[0].value % 100;
506			gds->Seconds = 0;
507			gds->tokenp += 1;
508			return 1;
509		}
510	}
511
512	return 0;
513}
514
515/*
516 * A dictionary of time words.
517 */
518static struct LEXICON {
519	size_t		abbrev;
520	const char	*name;
521	int		type;
522	time_t		value;
523} const TimeWords[] = {
524	/* am/pm */
525	{ 0, "am",		tAMPM,	tAM },
526	{ 0, "pm",		tAMPM,	tPM },
527
528	/* Month names. */
529	{ 3, "january",		tMONTH,  1 },
530	{ 3, "february",	tMONTH,  2 },
531	{ 3, "march",		tMONTH,  3 },
532	{ 3, "april",		tMONTH,  4 },
533	{ 3, "may",		tMONTH,  5 },
534	{ 3, "june",		tMONTH,  6 },
535	{ 3, "july",		tMONTH,  7 },
536	{ 3, "august",		tMONTH,  8 },
537	{ 3, "september",	tMONTH,  9 },
538	{ 3, "october",		tMONTH, 10 },
539	{ 3, "november",	tMONTH, 11 },
540	{ 3, "december",	tMONTH, 12 },
541
542	/* Days of the week. */
543	{ 2, "sunday",		tDAY, 0 },
544	{ 3, "monday",		tDAY, 1 },
545	{ 2, "tuesday",		tDAY, 2 },
546	{ 3, "wednesday",	tDAY, 3 },
547	{ 2, "thursday",	tDAY, 4 },
548	{ 2, "friday",		tDAY, 5 },
549	{ 2, "saturday",	tDAY, 6 },
550
551	/* Timezones: Offsets are in seconds. */
552	{ 0, "gmt",  tZONE,     0*HOUR }, /* Greenwich Mean */
553	{ 0, "ut",   tZONE,     0*HOUR }, /* Universal (Coordinated) */
554	{ 0, "utc",  tZONE,     0*HOUR },
555	{ 0, "wet",  tZONE,     0*HOUR }, /* Western European */
556	{ 0, "bst",  tDAYZONE,  0*HOUR }, /* British Summer */
557	{ 0, "wat",  tZONE,     1*HOUR }, /* West Africa */
558	{ 0, "at",   tZONE,     2*HOUR }, /* Azores */
559	/* { 0, "bst", tZONE, 3*HOUR }, */ /* Brazil Standard: Conflict */
560	/* { 0, "gst", tZONE, 3*HOUR }, */ /* Greenland Standard: Conflict*/
561	{ 0, "nft",  tZONE,     3*HOUR+30*MINUTE }, /* Newfoundland */
562	{ 0, "nst",  tZONE,     3*HOUR+30*MINUTE }, /* Newfoundland Standard */
563	{ 0, "ndt",  tDAYZONE,  3*HOUR+30*MINUTE }, /* Newfoundland Daylight */
564	{ 0, "ast",  tZONE,     4*HOUR }, /* Atlantic Standard */
565	{ 0, "adt",  tDAYZONE,  4*HOUR }, /* Atlantic Daylight */
566	{ 0, "est",  tZONE,     5*HOUR }, /* Eastern Standard */
567	{ 0, "edt",  tDAYZONE,  5*HOUR }, /* Eastern Daylight */
568	{ 0, "cst",  tZONE,     6*HOUR }, /* Central Standard */
569	{ 0, "cdt",  tDAYZONE,  6*HOUR }, /* Central Daylight */
570	{ 0, "mst",  tZONE,     7*HOUR }, /* Mountain Standard */
571	{ 0, "mdt",  tDAYZONE,  7*HOUR }, /* Mountain Daylight */
572	{ 0, "pst",  tZONE,     8*HOUR }, /* Pacific Standard */
573	{ 0, "pdt",  tDAYZONE,  8*HOUR }, /* Pacific Daylight */
574	{ 0, "yst",  tZONE,     9*HOUR }, /* Yukon Standard */
575	{ 0, "ydt",  tDAYZONE,  9*HOUR }, /* Yukon Daylight */
576	{ 0, "hst",  tZONE,     10*HOUR }, /* Hawaii Standard */
577	{ 0, "hdt",  tDAYZONE,  10*HOUR }, /* Hawaii Daylight */
578	{ 0, "cat",  tZONE,     10*HOUR }, /* Central Alaska */
579	{ 0, "ahst", tZONE,     10*HOUR }, /* Alaska-Hawaii Standard */
580	{ 0, "nt",   tZONE,     11*HOUR }, /* Nome */
581	{ 0, "idlw", tZONE,     12*HOUR }, /* Intl Date Line West */
582	{ 0, "cet",  tZONE,     -1*HOUR }, /* Central European */
583	{ 0, "met",  tZONE,     -1*HOUR }, /* Middle European */
584	{ 0, "mewt", tZONE,     -1*HOUR }, /* Middle European Winter */
585	{ 0, "mest", tDAYZONE,  -1*HOUR }, /* Middle European Summer */
586	{ 0, "swt",  tZONE,     -1*HOUR }, /* Swedish Winter */
587	{ 0, "sst",  tDAYZONE,  -1*HOUR }, /* Swedish Summer */
588	{ 0, "fwt",  tZONE,     -1*HOUR }, /* French Winter */
589	{ 0, "fst",  tDAYZONE,  -1*HOUR }, /* French Summer */
590	{ 0, "eet",  tZONE,     -2*HOUR }, /* Eastern Eur, USSR Zone 1 */
591	{ 0, "bt",   tZONE,     -3*HOUR }, /* Baghdad, USSR Zone 2 */
592	{ 0, "it",   tZONE,     -3*HOUR-30*MINUTE },/* Iran */
593	{ 0, "zp4",  tZONE,     -4*HOUR }, /* USSR Zone 3 */
594	{ 0, "zp5",  tZONE,     -5*HOUR }, /* USSR Zone 4 */
595	{ 0, "ist",  tZONE,     -5*HOUR-30*MINUTE },/* Indian Standard */
596	{ 0, "zp6",  tZONE,     -6*HOUR }, /* USSR Zone 5 */
597	/* { 0, "nst",  tZONE, -6.5*HOUR }, */ /* North Sumatra: Conflict */
598	/* { 0, "sst", tZONE, -7*HOUR }, */ /* So Sumatra, USSR 6: Conflict */
599	{ 0, "wast", tZONE,     -7*HOUR }, /* West Australian Standard */
600	{ 0, "wadt", tDAYZONE,  -7*HOUR }, /* West Australian Daylight */
601	{ 0, "jt",   tZONE,     -7*HOUR-30*MINUTE },/* Java (3pm in Cronusland!)*/
602	{ 0, "cct",  tZONE,     -8*HOUR }, /* China Coast, USSR Zone 7 */
603	{ 0, "jst",  tZONE,     -9*HOUR }, /* Japan Std, USSR Zone 8 */
604	{ 0, "cast", tZONE,     -9*HOUR-30*MINUTE },/* Ctrl Australian Std */
605	{ 0, "cadt", tDAYZONE,  -9*HOUR-30*MINUTE },/* Ctrl Australian Daylt */
606	{ 0, "east", tZONE,     -10*HOUR }, /* Eastern Australian Std */
607	{ 0, "eadt", tDAYZONE,  -10*HOUR }, /* Eastern Australian Daylt */
608	{ 0, "gst",  tZONE,     -10*HOUR }, /* Guam Std, USSR Zone 9 */
609	{ 0, "nzt",  tZONE,     -12*HOUR }, /* New Zealand */
610	{ 0, "nzst", tZONE,     -12*HOUR }, /* New Zealand Standard */
611	{ 0, "nzdt", tDAYZONE,  -12*HOUR }, /* New Zealand Daylight */
612	{ 0, "idle", tZONE,     -12*HOUR }, /* Intl Date Line East */
613
614	{ 0, "dst",  tDST,		0 },
615
616	/* Time units. */
617	{ 4, "years",		tMONTH_UNIT,	12 },
618	{ 5, "months",		tMONTH_UNIT,	1 },
619	{ 9, "fortnights",	tSEC_UNIT,	14 * DAY },
620	{ 4, "weeks",		tSEC_UNIT,	7 * DAY },
621	{ 3, "days",		tSEC_UNIT,	DAY },
622	{ 4, "hours",		tSEC_UNIT,	HOUR },
623	{ 3, "minutes",		tSEC_UNIT,	MINUTE },
624	{ 3, "seconds",		tSEC_UNIT,	1 },
625
626	/* Relative-time words. */
627	{ 0, "tomorrow",	tSEC_UNIT,	DAY },
628	{ 0, "yesterday",	tSEC_UNIT,	-DAY },
629	{ 0, "today",		tSEC_UNIT,	0 },
630	{ 0, "now",		tSEC_UNIT,	0 },
631	{ 0, "last",		tUNUMBER,	-1 },
632	{ 0, "this",		tSEC_UNIT,	0 },
633	{ 0, "next",		tUNUMBER,	2 },
634	{ 0, "first",		tUNUMBER,	1 },
635	{ 0, "1st",		tUNUMBER,	1 },
636/*	{ 0, "second",		tUNUMBER,	2 }, */
637	{ 0, "2nd",		tUNUMBER,	2 },
638	{ 0, "third",		tUNUMBER,	3 },
639	{ 0, "3rd",		tUNUMBER,	3 },
640	{ 0, "fourth",		tUNUMBER,	4 },
641	{ 0, "4th",		tUNUMBER,	4 },
642	{ 0, "fifth",		tUNUMBER,	5 },
643	{ 0, "5th",		tUNUMBER,	5 },
644	{ 0, "sixth",		tUNUMBER,	6 },
645	{ 0, "seventh",		tUNUMBER,	7 },
646	{ 0, "eighth",		tUNUMBER,	8 },
647	{ 0, "ninth",		tUNUMBER,	9 },
648	{ 0, "tenth",		tUNUMBER,	10 },
649	{ 0, "eleventh",	tUNUMBER,	11 },
650	{ 0, "twelfth",		tUNUMBER,	12 },
651	{ 0, "ago",		tAGO,		1 },
652
653	/* Military timezones. */
654	{ 0, "a",	tZONE,	1*HOUR },
655	{ 0, "b",	tZONE,	2*HOUR },
656	{ 0, "c",	tZONE,	3*HOUR },
657	{ 0, "d",	tZONE,	4*HOUR },
658	{ 0, "e",	tZONE,	5*HOUR },
659	{ 0, "f",	tZONE,	6*HOUR },
660	{ 0, "g",	tZONE,	7*HOUR },
661	{ 0, "h",	tZONE,	8*HOUR },
662	{ 0, "i",	tZONE,	9*HOUR },
663	{ 0, "k",	tZONE,	10*HOUR },
664	{ 0, "l",	tZONE,	11*HOUR },
665	{ 0, "m",	tZONE,	12*HOUR },
666	{ 0, "n",	tZONE,	-1*HOUR },
667	{ 0, "o",	tZONE,	-2*HOUR },
668	{ 0, "p",	tZONE,	-3*HOUR },
669	{ 0, "q",	tZONE,	-4*HOUR },
670	{ 0, "r",	tZONE,	-5*HOUR },
671	{ 0, "s",	tZONE,	-6*HOUR },
672	{ 0, "t",	tZONE,	-7*HOUR },
673	{ 0, "u",	tZONE,	-8*HOUR },
674	{ 0, "v",	tZONE,	-9*HOUR },
675	{ 0, "w",	tZONE,	-10*HOUR },
676	{ 0, "x",	tZONE,	-11*HOUR },
677	{ 0, "y",	tZONE,	-12*HOUR },
678	{ 0, "z",	tZONE,	0*HOUR },
679
680	/* End of table. */
681	{ 0, NULL,	0,	0 }
682};
683
684/*
685 * Year is either:
686 *  = A number from 0 to 99, which means a year from 1970 to 2069, or
687 *  = The actual year (>=100).
688 */
689static time_t
690Convert(time_t Month, time_t Day, time_t Year,
691	time_t Hours, time_t Minutes, time_t Seconds,
692	time_t Timezone, enum DSTMODE DSTmode)
693{
694	signed char DaysInMonth[12] = {
695		31, 0, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31
696	};
697	time_t	Julian;
698	int	i;
699
700	if (Year < 69)
701		Year += 2000;
702	else if (Year < 100)
703		Year += 1900;
704	DaysInMonth[1] = Year % 4 == 0 && (Year % 100 != 0 || Year % 400 == 0)
705	    ? 29 : 28;
706	/* Checking for 2038 bogusly assumes that time_t is 32 bits.  But
707	   I'm too lazy to try to check for time_t overflow in another way.  */
708	if (Year < EPOCH || Year > 2038
709	    || Month < 1 || Month > 12
710	    /* Lint fluff:  "conversion from long may lose accuracy" */
711	    || Day < 1 || Day > DaysInMonth[(int)--Month]
712	    || Hours < 0 || Hours > 23
713	    || Minutes < 0 || Minutes > 59
714	    || Seconds < 0 || Seconds > 59)
715		return -1;
716
717	Julian = Day - 1;
718	for (i = 0; i < Month; i++)
719		Julian += DaysInMonth[i];
720	for (i = EPOCH; i < Year; i++)
721		Julian += 365 + (i % 4 == 0);
722	Julian *= DAY;
723	Julian += Timezone;
724	Julian += Hours * HOUR + Minutes * MINUTE + Seconds;
725	if (DSTmode == DSTon
726	    || (DSTmode == DSTmaybe && localtime(&Julian)->tm_isdst))
727		Julian -= HOUR;
728	return Julian;
729}
730
731
732static time_t
733DSTcorrect(time_t Start, time_t Future)
734{
735	time_t	StartDay;
736	time_t	FutureDay;
737
738	StartDay = (localtime(&Start)->tm_hour + 1) % 24;
739	FutureDay = (localtime(&Future)->tm_hour + 1) % 24;
740	return (Future - Start) + (StartDay - FutureDay) * HOUR;
741}
742
743
744static time_t
745RelativeDate(time_t Start, time_t zone, int dstmode,
746    time_t DayOrdinal, time_t DayNumber)
747{
748	struct tm	*tm;
749	time_t	t, now;
750
751	t = Start - zone;
752	tm = gmtime(&t);
753	now = Start;
754	now += DAY * ((DayNumber - tm->tm_wday + 7) % 7);
755	now += 7 * DAY * (DayOrdinal <= 0 ? DayOrdinal : DayOrdinal - 1);
756	if (dstmode == DSTmaybe)
757		return DSTcorrect(Start, now);
758	return now - Start;
759}
760
761
762static time_t
763RelativeMonth(time_t Start, time_t Timezone, time_t RelMonth)
764{
765	struct tm	*tm;
766	time_t	Month;
767	time_t	Year;
768
769	if (RelMonth == 0)
770		return 0;
771	tm = localtime(&Start);
772	Month = 12 * (tm->tm_year + 1900) + tm->tm_mon + RelMonth;
773	Year = Month / 12;
774	Month = Month % 12 + 1;
775	return DSTcorrect(Start,
776	    Convert(Month, (time_t)tm->tm_mday, Year,
777		(time_t)tm->tm_hour, (time_t)tm->tm_min, (time_t)tm->tm_sec,
778		Timezone, DSTmaybe));
779}
780
781/*
782 * Tokenizer.
783 */
784static int
785nexttoken(const char **in, time_t *value)
786{
787	char	c;
788	char	buff[64];
789
790	for ( ; ; ) {
791		while (isspace((unsigned char)**in))
792			++*in;
793
794		/* Skip parenthesized comments. */
795		if (**in == '(') {
796			int Count = 0;
797			do {
798				c = *(*in)++;
799				if (c == '\0')
800					return c;
801				if (c == '(')
802					Count++;
803				else if (c == ')')
804					Count--;
805			} while (Count > 0);
806			continue;
807		}
808
809		/* Try the next token in the word table first. */
810		/* This allows us to match "2nd", for example. */
811		{
812			const char *src = *in;
813			const struct LEXICON *tp;
814			unsigned i = 0;
815
816			/* Force to lowercase and strip '.' characters. */
817			while (*src != '\0'
818			    && (isalnum((unsigned char)*src) || *src == '.')
819			    && i < sizeof(buff)-1) {
820				if (*src != '.') {
821					if (isupper((unsigned char)*src))
822						buff[i++] = tolower((unsigned char)*src);
823					else
824						buff[i++] = *src;
825				}
826				src++;
827			}
828			buff[i] = '\0';
829
830			/*
831			 * Find the first match.  If the word can be
832			 * abbreviated, make sure we match at least
833			 * the minimum abbreviation.
834			 */
835			for (tp = TimeWords; tp->name; tp++) {
836				size_t abbrev = tp->abbrev;
837				if (abbrev == 0)
838					abbrev = strlen(tp->name);
839				if (strlen(buff) >= abbrev
840				    && strncmp(tp->name, buff, strlen(buff))
841				    	== 0) {
842					/* Skip over token. */
843					*in = src;
844					/* Return the match. */
845					*value = tp->value;
846					return tp->type;
847				}
848			}
849		}
850
851		/*
852		 * Not in the word table, maybe it's a number.  Note:
853		 * Because '-' and '+' have other special meanings, I
854		 * don't deal with signed numbers here.
855		 */
856		if (isdigit((unsigned char)(c = **in))) {
857			for (*value = 0; isdigit((unsigned char)(c = *(*in)++)); )
858				*value = 10 * *value + c - '0';
859			(*in)--;
860			return (tUNUMBER);
861		}
862
863		return *(*in)++;
864	}
865}
866
867#define	TM_YEAR_ORIGIN 1900
868
869/* Yield A - B, measured in seconds.  */
870static long
871difftm (struct tm *a, struct tm *b)
872{
873	int ay = a->tm_year + (TM_YEAR_ORIGIN - 1);
874	int by = b->tm_year + (TM_YEAR_ORIGIN - 1);
875	int days = (
876		/* difference in day of year */
877		a->tm_yday - b->tm_yday
878		/* + intervening leap days */
879		+  ((ay >> 2) - (by >> 2))
880		-  (ay/100 - by/100)
881		+  ((ay/100 >> 2) - (by/100 >> 2))
882		/* + difference in years * 365 */
883		+  (long)(ay-by) * 365
884		);
885	return (days * DAY + (a->tm_hour - b->tm_hour) * HOUR
886	    + (a->tm_min - b->tm_min) * MINUTE
887	    + (a->tm_sec - b->tm_sec));
888}
889
890/*
891 *
892 * The public function.
893 *
894 * TODO: tokens[] array should be dynamically sized.
895 */
896time_t
897__archive_get_date(time_t now, const char *p)
898{
899	struct token	tokens[256];
900	struct gdstate	_gds;
901	struct token	*lasttoken;
902	struct gdstate	*gds;
903	struct tm	local, *tm;
904	struct tm	gmt, *gmt_ptr;
905	time_t		Start;
906	time_t		tod;
907	long		tzone;
908
909	/* Clear out the parsed token array. */
910	memset(tokens, 0, sizeof(tokens));
911	/* Initialize the parser state. */
912	memset(&_gds, 0, sizeof(_gds));
913	gds = &_gds;
914
915	/* Look up the current time. */
916	memset(&local, 0, sizeof(local));
917	tm = localtime (&now);
918	if (tm == NULL)
919		return -1;
920	local = *tm;
921
922	/* Look up UTC if we can and use that to determine the current
923	 * timezone offset. */
924	memset(&gmt, 0, sizeof(gmt));
925	gmt_ptr = gmtime (&now);
926	if (gmt_ptr != NULL) {
927		/* Copy, in case localtime and gmtime use the same buffer. */
928		gmt = *gmt_ptr;
929	}
930	if (gmt_ptr != NULL)
931		tzone = difftm (&gmt, &local);
932	else
933		/* This system doesn't understand timezones; fake it. */
934		tzone = 0;
935	if(local.tm_isdst)
936		tzone += HOUR;
937
938	/* Tokenize the input string. */
939	lasttoken = tokens;
940	while ((lasttoken->token = nexttoken(&p, &lasttoken->value)) != 0) {
941		++lasttoken;
942		if (lasttoken > tokens + 255)
943			return -1;
944	}
945	gds->tokenp = tokens;
946
947	/* Match phrases until we run out of input tokens. */
948	while (gds->tokenp < lasttoken) {
949		if (!phrase(gds))
950			return -1;
951	}
952
953	/* Use current local timezone if none was specified. */
954	if (!gds->HaveZone) {
955		gds->Timezone = tzone;
956		gds->DSTmode = DSTmaybe;
957	}
958
959	/* If a timezone was specified, use that for generating the default
960	 * time components instead of the local timezone. */
961	if (gds->HaveZone && gmt_ptr != NULL) {
962		now -= gds->Timezone;
963		gmt_ptr = gmtime (&now);
964		if (gmt_ptr != NULL)
965			local = *gmt_ptr;
966		now += gds->Timezone;
967	}
968
969	if (!gds->HaveYear)
970		gds->Year = local.tm_year + 1900;
971	if (!gds->HaveMonth)
972		gds->Month = local.tm_mon + 1;
973	if (!gds->HaveDay)
974		gds->Day = local.tm_mday;
975	/* Note: No default for hour/min/sec; a specifier that just
976	 * gives date always refers to 00:00 on that date. */
977
978	/* If we saw more than one time, timezone, weekday, year, month,
979	 * or day, then give up. */
980	if (gds->HaveTime > 1 || gds->HaveZone > 1 || gds->HaveWeekDay > 1
981	    || gds->HaveYear > 1 || gds->HaveMonth > 1 || gds->HaveDay > 1)
982		return -1;
983
984	/* Compute an absolute time based on whatever absolute information
985	 * we collected. */
986	if (gds->HaveYear || gds->HaveMonth || gds->HaveDay
987	    || gds->HaveTime || gds->HaveWeekDay) {
988		Start = Convert(gds->Month, gds->Day, gds->Year,
989		    gds->Hour, gds->Minutes, gds->Seconds,
990		    gds->Timezone, gds->DSTmode);
991		if (Start < 0)
992			return -1;
993	} else {
994		Start = now;
995		if (!gds->HaveRel)
996			Start -= local.tm_hour * HOUR + local.tm_min * MINUTE
997			    + local.tm_sec;
998	}
999
1000	/* Add the relative offset. */
1001	Start += gds->RelSeconds;
1002	Start += RelativeMonth(Start, gds->Timezone, gds->RelMonth);
1003
1004	/* Adjust for day-of-week offsets. */
1005	if (gds->HaveWeekDay
1006	    && !(gds->HaveYear || gds->HaveMonth || gds->HaveDay)) {
1007		tod = RelativeDate(Start, gds->Timezone,
1008		    gds->DSTmode, gds->DayOrdinal, gds->DayNumber);
1009		Start += tod;
1010	}
1011
1012	/* -1 is an error indicator, so return 0 instead of -1 if
1013	 * that's the actual time. */
1014	return Start == -1 ? 0 : Start;
1015}
1016
1017
1018#if	defined(TEST)
1019
1020/* ARGSUSED */
1021int
1022main(int argc, char **argv)
1023{
1024    time_t	d;
1025    time_t	now = time(NULL);
1026
1027    while (*++argv != NULL) {
1028	    (void)printf("Input: %s\n", *argv);
1029	    d = get_date(now, *argv);
1030	    if (d == -1)
1031		    (void)printf("Bad format - couldn't convert.\n");
1032	    else
1033		    (void)printf("Output: %s\n", ctime(&d));
1034    }
1035    exit(0);
1036    /* NOTREACHED */
1037}
1038#endif	/* defined(TEST) */
1039