strptime.c revision 1.18
1/*	$OpenBSD: strptime.c,v 1.18 2014/10/11 02:21:27 doug Exp $ */
2/*	$NetBSD: strptime.c,v 1.12 1998/01/20 21:39:40 mycroft Exp $	*/
3
4/*-
5 * Copyright (c) 1997, 1998, 2005, 2008 The NetBSD Foundation, Inc.
6 * All rights reserved.
7 *
8 * This code was contributed to The NetBSD Foundation by Klaus Klein.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32#include <sys/localedef.h>
33#include <ctype.h>
34#include <locale.h>
35#include <string.h>
36#include <time.h>
37#include <tzfile.h>
38
39#define	_ctloc(x)		(_CurrentTimeLocale->x)
40
41/*
42 * We do not implement alternate representations. However, we always
43 * check whether a given modifier is allowed for a certain conversion.
44 */
45#define _ALT_E			0x01
46#define _ALT_O			0x02
47#define	_LEGAL_ALT(x)		{ if (alt_format & ~(x)) return (0); }
48
49/*
50 * We keep track of some of the fields we set in order to compute missing ones.
51 */
52#define FIELD_TM_MON	(1 << 0)
53#define FIELD_TM_MDAY	(1 << 1)
54#define FIELD_TM_WDAY	(1 << 2)
55#define FIELD_TM_YDAY	(1 << 3)
56#define FIELD_TM_YEAR	(1 << 4)
57
58static char gmt[] = { "GMT" };
59static char utc[] = { "UTC" };
60/* RFC-822/RFC-2822 */
61static const char * const nast[5] = {
62       "EST",    "CST",    "MST",    "PST",    "\0\0\0"
63};
64static const char * const nadt[5] = {
65       "EDT",    "CDT",    "MDT",    "PDT",    "\0\0\0"
66};
67
68static const int mon_lengths[2][MONSPERYEAR] = {
69        { 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31 },
70        { 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31 }
71};
72
73static	int _conv_num(const unsigned char **, int *, int, int);
74static	int leaps_thru_end_of(const int y);
75static	char *_strptime(const char *, const char *, struct tm *, int);
76static	const u_char *_find_string(const u_char *, int *, const char * const *,
77	    const char * const *, int);
78
79
80char *
81strptime(const char *buf, const char *fmt, struct tm *tm)
82{
83	return(_strptime(buf, fmt, tm, 1));
84}
85
86static char *
87_strptime(const char *buf, const char *fmt, struct tm *tm, int initialize)
88{
89	unsigned char c;
90	const unsigned char *bp, *ep;
91	size_t len;
92	int alt_format, i, offs;
93	int neg = 0;
94	static int century, relyear, fields;
95
96	if (initialize) {
97		century = TM_YEAR_BASE;
98		relyear = -1;
99		fields = 0;
100	}
101
102	bp = (unsigned char *)buf;
103	while ((c = *fmt) != '\0') {
104		/* Clear `alternate' modifier prior to new conversion. */
105		alt_format = 0;
106
107		/* Eat up white-space. */
108		if (isspace(c)) {
109			while (isspace(*bp))
110				bp++;
111
112			fmt++;
113			continue;
114		}
115
116		if ((c = *fmt++) != '%')
117			goto literal;
118
119
120again:		switch (c = *fmt++) {
121		case '%':	/* "%%" is converted to "%". */
122literal:
123		if (c != *bp++)
124			return (NULL);
125
126		break;
127
128		/*
129		 * "Alternative" modifiers. Just set the appropriate flag
130		 * and start over again.
131		 */
132		case 'E':	/* "%E?" alternative conversion modifier. */
133			_LEGAL_ALT(0);
134			alt_format |= _ALT_E;
135			goto again;
136
137		case 'O':	/* "%O?" alternative conversion modifier. */
138			_LEGAL_ALT(0);
139			alt_format |= _ALT_O;
140			goto again;
141
142		/*
143		 * "Complex" conversion rules, implemented through recursion.
144		 */
145		case 'c':	/* Date and time, using the locale's format. */
146			_LEGAL_ALT(_ALT_E);
147			if (!(bp = _strptime(bp, _ctloc(d_t_fmt), tm, 0)))
148				return (NULL);
149			break;
150
151		case 'D':	/* The date as "%m/%d/%y". */
152			_LEGAL_ALT(0);
153			if (!(bp = _strptime(bp, "%m/%d/%y", tm, 0)))
154				return (NULL);
155			break;
156
157		case 'F':	/* The date as "%Y-%m-%d". */
158			_LEGAL_ALT(0);
159			if (!(bp = _strptime(bp, "%Y-%m-%d", tm, 0)))
160				return (NULL);
161			continue;
162
163		case 'R':	/* The time as "%H:%M". */
164			_LEGAL_ALT(0);
165			if (!(bp = _strptime(bp, "%H:%M", tm, 0)))
166				return (NULL);
167			break;
168
169		case 'r':	/* The time as "%I:%M:%S %p". */
170			_LEGAL_ALT(0);
171			if (!(bp = _strptime(bp, "%I:%M:%S %p", tm, 0)))
172				return (NULL);
173			break;
174
175		case 'T':	/* The time as "%H:%M:%S". */
176			_LEGAL_ALT(0);
177			if (!(bp = _strptime(bp, "%H:%M:%S", tm, 0)))
178				return (NULL);
179			break;
180
181		case 'X':	/* The time, using the locale's format. */
182			_LEGAL_ALT(_ALT_E);
183			if (!(bp = _strptime(bp, _ctloc(t_fmt), tm, 0)))
184				return (NULL);
185			break;
186
187		case 'x':	/* The date, using the locale's format. */
188			_LEGAL_ALT(_ALT_E);
189			if (!(bp = _strptime(bp, _ctloc(d_fmt), tm, 0)))
190				return (NULL);
191			break;
192
193		/*
194		 * "Elementary" conversion rules.
195		 */
196		case 'A':	/* The day of week, using the locale's form. */
197		case 'a':
198			_LEGAL_ALT(0);
199			for (i = 0; i < 7; i++) {
200				/* Full name. */
201				len = strlen(_ctloc(day[i]));
202				if (strncasecmp(_ctloc(day[i]), bp, len) == 0)
203					break;
204
205				/* Abbreviated name. */
206				len = strlen(_ctloc(abday[i]));
207				if (strncasecmp(_ctloc(abday[i]), bp, len) == 0)
208					break;
209			}
210
211			/* Nothing matched. */
212			if (i == 7)
213				return (NULL);
214
215			tm->tm_wday = i;
216			bp += len;
217			fields |= FIELD_TM_WDAY;
218			break;
219
220		case 'B':	/* The month, using the locale's form. */
221		case 'b':
222		case 'h':
223			_LEGAL_ALT(0);
224			for (i = 0; i < 12; i++) {
225				/* Full name. */
226				len = strlen(_ctloc(mon[i]));
227				if (strncasecmp(_ctloc(mon[i]), bp, len) == 0)
228					break;
229
230				/* Abbreviated name. */
231				len = strlen(_ctloc(abmon[i]));
232				if (strncasecmp(_ctloc(abmon[i]), bp, len) == 0)
233					break;
234			}
235
236			/* Nothing matched. */
237			if (i == 12)
238				return (NULL);
239
240			tm->tm_mon = i;
241			bp += len;
242			fields |= FIELD_TM_MON;
243			break;
244
245		case 'C':	/* The century number. */
246			_LEGAL_ALT(_ALT_E);
247			if (!(_conv_num(&bp, &i, 0, 99)))
248				return (NULL);
249
250			century = i * 100;
251			break;
252
253		case 'd':	/* The day of month. */
254		case 'e':
255			_LEGAL_ALT(_ALT_O);
256			if (!(_conv_num(&bp, &tm->tm_mday, 1, 31)))
257				return (NULL);
258			fields |= FIELD_TM_MDAY;
259			break;
260
261		case 'k':	/* The hour (24-hour clock representation). */
262			_LEGAL_ALT(0);
263			/* FALLTHROUGH */
264		case 'H':
265			_LEGAL_ALT(_ALT_O);
266			if (!(_conv_num(&bp, &tm->tm_hour, 0, 23)))
267				return (NULL);
268			break;
269
270		case 'l':	/* The hour (12-hour clock representation). */
271			_LEGAL_ALT(0);
272			/* FALLTHROUGH */
273		case 'I':
274			_LEGAL_ALT(_ALT_O);
275			if (!(_conv_num(&bp, &tm->tm_hour, 1, 12)))
276				return (NULL);
277			break;
278
279		case 'j':	/* The day of year. */
280			_LEGAL_ALT(0);
281			if (!(_conv_num(&bp, &tm->tm_yday, 1, 366)))
282				return (NULL);
283			tm->tm_yday--;
284			fields |= FIELD_TM_YDAY;
285			break;
286
287		case 'M':	/* The minute. */
288			_LEGAL_ALT(_ALT_O);
289			if (!(_conv_num(&bp, &tm->tm_min, 0, 59)))
290				return (NULL);
291			break;
292
293		case 'm':	/* The month. */
294			_LEGAL_ALT(_ALT_O);
295			if (!(_conv_num(&bp, &tm->tm_mon, 1, 12)))
296				return (NULL);
297			tm->tm_mon--;
298			fields |= FIELD_TM_MON;
299			break;
300
301		case 'p':	/* The locale's equivalent of AM/PM. */
302			_LEGAL_ALT(0);
303			/* AM? */
304			len = strlen(_ctloc(am_pm[0]));
305			if (strncasecmp(_ctloc(am_pm[0]), bp, len) == 0) {
306				if (tm->tm_hour > 12)	/* i.e., 13:00 AM ?! */
307					return (NULL);
308				else if (tm->tm_hour == 12)
309					tm->tm_hour = 0;
310
311				bp += len;
312				break;
313			}
314			/* PM? */
315			len = strlen(_ctloc(am_pm[1]));
316			if (strncasecmp(_ctloc(am_pm[1]), bp, len) == 0) {
317				if (tm->tm_hour > 12)	/* i.e., 13:00 PM ?! */
318					return (NULL);
319				else if (tm->tm_hour < 12)
320					tm->tm_hour += 12;
321
322				bp += len;
323				break;
324			}
325
326			/* Nothing matched. */
327			return (NULL);
328
329		case 'S':	/* The seconds. */
330			_LEGAL_ALT(_ALT_O);
331			if (!(_conv_num(&bp, &tm->tm_sec, 0, 61)))
332				return (NULL);
333			break;
334
335		case 'U':	/* The week of year, beginning on sunday. */
336		case 'W':	/* The week of year, beginning on monday. */
337			_LEGAL_ALT(_ALT_O);
338			/*
339			 * XXX This is bogus, as we can not assume any valid
340			 * information present in the tm structure at this
341			 * point to calculate a real value, so just check the
342			 * range for now.
343			 */
344			 if (!(_conv_num(&bp, &i, 0, 53)))
345				return (NULL);
346			 break;
347
348		case 'w':	/* The day of week, beginning on sunday. */
349			_LEGAL_ALT(_ALT_O);
350			if (!(_conv_num(&bp, &tm->tm_wday, 0, 6)))
351				return (NULL);
352			fields |= FIELD_TM_WDAY;
353			break;
354
355		case 'u':	/* The day of week, monday = 1. */
356			_LEGAL_ALT(_ALT_O);
357			if (!(_conv_num(&bp, &i, 1, 7)))
358				return (NULL);
359			tm->tm_wday = i % 7;
360			fields |= FIELD_TM_WDAY;
361			continue;
362
363		case 'g':	/* The year corresponding to the ISO week
364				 * number but without the century.
365				 */
366			if (!(_conv_num(&bp, &i, 0, 99)))
367				return (NULL);
368			continue;
369
370		case 'G':	/* The year corresponding to the ISO week
371				 * number with century.
372				 */
373			do
374				bp++;
375			while (isdigit(*bp));
376			continue;
377
378		case 'V':	/* The ISO 8601:1988 week number as decimal */
379			if (!(_conv_num(&bp, &i, 0, 53)))
380				return (NULL);
381			continue;
382
383		case 'Y':	/* The year. */
384			_LEGAL_ALT(_ALT_E);
385			if (!(_conv_num(&bp, &i, 0, 9999)))
386				return (NULL);
387
388			relyear = -1;
389			tm->tm_year = i - TM_YEAR_BASE;
390			fields |= FIELD_TM_YEAR;
391			break;
392
393		case 'y':	/* The year within the century (2 digits). */
394			_LEGAL_ALT(_ALT_E | _ALT_O);
395			if (!(_conv_num(&bp, &relyear, 0, 99)))
396				return (NULL);
397			break;
398
399		case 'Z':
400			tzset();
401			if (strncmp((const char *)bp, gmt, 3) == 0) {
402				tm->tm_isdst = 0;
403#ifdef TM_GMTOFF
404				tm->TM_GMTOFF = 0;
405#endif
406#ifdef TM_ZONE
407				tm->TM_ZONE = gmt;
408#endif
409				bp += 3;
410			} else if (strncmp((const char *)bp, utc, 3) == 0) {
411				tm->tm_isdst = 0;
412#ifdef TM_GMTOFF
413				tm->TM_GMTOFF = 0;
414#endif
415#ifdef TM_ZONE
416				tm->TM_ZONE = utc;
417#endif
418				bp += 3;
419			} else {
420				ep = _find_string(bp, &i,
421					       	 (const char * const *)tzname,
422					       	  NULL, 2);
423				if (ep == NULL)
424					return (NULL);
425
426				tm->tm_isdst = i;
427#ifdef TM_GMTOFF
428				tm->TM_GMTOFF = -(timezone);
429#endif
430#ifdef TM_ZONE
431				tm->TM_ZONE = tzname[i];
432#endif
433				bp = ep;
434			}
435			continue;
436
437		case 'z':
438			/*
439			 * We recognize all ISO 8601 formats:
440			 * Z	= Zulu time/UTC
441			 * [+-]hhmm
442			 * [+-]hh:mm
443			 * [+-]hh
444			 * We recognize all RFC-822/RFC-2822 formats:
445			 * UT|GMT
446			 *          North American : UTC offsets
447			 * E[DS]T = Eastern : -4 | -5
448			 * C[DS]T = Central : -5 | -6
449			 * M[DS]T = Mountain: -6 | -7
450			 * P[DS]T = Pacific : -7 | -8
451			 *          Military
452			 * [A-IL-M] = -1 ... -9 (J not used)
453			 * [N-Y]  = +1 ... +12
454			 */
455			while (isspace(*bp))
456				bp++;
457
458			switch (*bp++) {
459			case 'G':
460				if (*bp++ != 'M')
461					return NULL;
462				/*FALLTHROUGH*/
463			case 'U':
464				if (*bp++ != 'T')
465					return NULL;
466				/*FALLTHROUGH*/
467			case 'Z':
468				tm->tm_isdst = 0;
469#ifdef TM_GMTOFF
470				tm->TM_GMTOFF = 0;
471#endif
472#ifdef TM_ZONE
473				tm->TM_ZONE = utc;
474#endif
475				continue;
476			case '+':
477				neg = 0;
478				break;
479			case '-':
480				neg = 1;
481				break;
482			default:
483				--bp;
484				ep = _find_string(bp, &i, nast, NULL, 4);
485				if (ep != NULL) {
486#ifdef TM_GMTOFF
487					tm->TM_GMTOFF = -5 - i;
488#endif
489#ifdef TM_ZONE
490					tm->TM_ZONE = __UNCONST(nast[i]);
491#endif
492					bp = ep;
493					continue;
494				}
495				ep = _find_string(bp, &i, nadt, NULL, 4);
496				if (ep != NULL) {
497					tm->tm_isdst = 1;
498#ifdef TM_GMTOFF
499					tm->TM_GMTOFF = -4 - i;
500#endif
501#ifdef TM_ZONE
502					tm->TM_ZONE = __UNCONST(nadt[i]);
503#endif
504					bp = ep;
505					continue;
506				}
507
508				if ((*bp >= 'A' && *bp <= 'I') ||
509				    (*bp >= 'L' && *bp <= 'Y')) {
510#ifdef TM_GMTOFF
511					/* Argh! No 'J'! */
512					if (*bp >= 'A' && *bp <= 'I')
513						tm->TM_GMTOFF =
514						    ('A' - 1) - (int)*bp;
515					else if (*bp >= 'L' && *bp <= 'M')
516						tm->TM_GMTOFF = 'A' - (int)*bp;
517					else if (*bp >= 'N' && *bp <= 'Y')
518						tm->TM_GMTOFF = (int)*bp - 'M';
519#endif
520#ifdef TM_ZONE
521					tm->TM_ZONE = NULL; /* XXX */
522#endif
523					bp++;
524					continue;
525				}
526				return NULL;
527			}
528			offs = 0;
529			for (i = 0; i < 4; ) {
530				if (isdigit(*bp)) {
531					offs = offs * 10 + (*bp++ - '0');
532					i++;
533					continue;
534				}
535				if (i == 2 && *bp == ':') {
536					bp++;
537					continue;
538				}
539				break;
540			}
541			switch (i) {
542			case 2:
543				offs *= 100;
544				break;
545			case 4:
546				i = offs % 100;
547				if (i >= 60)
548					return NULL;
549				/* Convert minutes into decimal */
550				offs = (offs / 100) * 100 + (i * 50) / 30;
551				break;
552			default:
553				return NULL;
554			}
555			if (neg)
556				offs = -offs;
557			tm->tm_isdst = 0;	/* XXX */
558#ifdef TM_GMTOFF
559			tm->TM_GMTOFF = offs;
560#endif
561#ifdef TM_ZONE
562			tm->TM_ZONE = NULL;	/* XXX */
563#endif
564			continue;
565
566		/*
567		 * Miscellaneous conversions.
568		 */
569		case 'n':	/* Any kind of white-space. */
570		case 't':
571			_LEGAL_ALT(0);
572			while (isspace(*bp))
573				bp++;
574			break;
575
576
577		default:	/* Unknown/unsupported conversion. */
578			return (NULL);
579		}
580
581
582	}
583
584	/*
585	 * We need to evaluate the two digit year spec (%y)
586	 * last as we can get a century spec (%C) at any time.
587	 */
588	if (relyear != -1) {
589		if (century == TM_YEAR_BASE) {
590			if (relyear <= 68)
591				tm->tm_year = relyear + 2000 - TM_YEAR_BASE;
592			else
593				tm->tm_year = relyear + 1900 - TM_YEAR_BASE;
594		} else {
595			tm->tm_year = relyear + century - TM_YEAR_BASE;
596		}
597		fields |= FIELD_TM_YEAR;
598	}
599
600	/* Compute some missing values when possible. */
601	if (fields & FIELD_TM_YEAR) {
602		const int year = tm->tm_year + TM_YEAR_BASE;
603		const int *mon_lens = mon_lengths[isleap(year)];
604		if (!(fields & FIELD_TM_YDAY) &&
605		    (fields & FIELD_TM_MON) && (fields & FIELD_TM_MDAY)) {
606			tm->tm_yday = tm->tm_mday - 1;
607			for (i = 0; i < tm->tm_mon; i++)
608				tm->tm_yday += mon_lens[i];
609			fields |= FIELD_TM_YDAY;
610		}
611		if (fields & FIELD_TM_YDAY) {
612			int days = tm->tm_yday;
613			if (!(fields & FIELD_TM_WDAY)) {
614				tm->tm_wday = EPOCH_WDAY +
615				    ((year - EPOCH_YEAR) % DAYSPERWEEK) *
616				    (DAYSPERNYEAR % DAYSPERWEEK) +
617				    leaps_thru_end_of(year - 1) -
618				    leaps_thru_end_of(EPOCH_YEAR - 1) +
619				    tm->tm_yday;
620				tm->tm_wday %= DAYSPERWEEK;
621				if (tm->tm_wday < 0)
622					tm->tm_wday += DAYSPERWEEK;
623			}
624			if (!(fields & FIELD_TM_MON)) {
625				tm->tm_mon = 0;
626				while (tm->tm_mon < MONSPERYEAR && days >= mon_lens[tm->tm_mon])
627					days -= mon_lens[tm->tm_mon++];
628			}
629			if (!(fields & FIELD_TM_MDAY))
630				tm->tm_mday = days + 1;
631		}
632	}
633
634	return ((char *)bp);
635}
636
637
638static int
639_conv_num(const unsigned char **buf, int *dest, int llim, int ulim)
640{
641	int result = 0;
642	int rulim = ulim;
643
644	if (**buf < '0' || **buf > '9')
645		return (0);
646
647	/* we use rulim to break out of the loop when we run out of digits */
648	do {
649		result *= 10;
650		result += *(*buf)++ - '0';
651		rulim /= 10;
652	} while ((result * 10 <= ulim) && rulim && **buf >= '0' && **buf <= '9');
653
654	if (result < llim || result > ulim)
655		return (0);
656
657	*dest = result;
658	return (1);
659}
660
661static const u_char *
662_find_string(const u_char *bp, int *tgt, const char * const *n1,
663		const char * const *n2, int c)
664{
665	int i;
666	unsigned int len;
667
668	/* check full name - then abbreviated ones */
669	for (; n1 != NULL; n1 = n2, n2 = NULL) {
670		for (i = 0; i < c; i++, n1++) {
671			len = strlen(*n1);
672			if (strncasecmp(*n1, (const char *)bp, len) == 0) {
673				*tgt = i;
674				return bp + len;
675			}
676		}
677	}
678
679	/* Nothing matched */
680	return NULL;
681}
682
683static int
684leaps_thru_end_of(const int y)
685{
686	return (y >= 0) ? (y / 4 - y / 100 + y / 400) :
687		-(leaps_thru_end_of(-(y + 1)) + 1);
688}
689