strptime.c revision 1.16
1/*	$OpenBSD: strptime.c,v 1.16 2014/02/13 23:16:03 millert Exp $ */
2/*	$NetBSD: strptime.c,v 1.12 1998/01/20 21:39:40 mycroft Exp $	*/
3
4/*-
5 * Copyright (c) 1997, 1998, 2005, 2008 The NetBSD Foundation, Inc.
6 * All rights reserved.
7 *
8 * This code was contributed to The NetBSD Foundation by Klaus Klein.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32#include <sys/localedef.h>
33#include <ctype.h>
34#include <locale.h>
35#include <string.h>
36#include <time.h>
37#include <tzfile.h>
38
39#define	_ctloc(x)		(_CurrentTimeLocale->x)
40
41/*
42 * We do not implement alternate representations. However, we always
43 * check whether a given modifier is allowed for a certain conversion.
44 */
45#define _ALT_E			0x01
46#define _ALT_O			0x02
47#define	_LEGAL_ALT(x)		{ if (alt_format & ~(x)) return (0); }
48
49/*
50 * We keep track of some of the fields we set in order to compute missing ones.
51 */
52#define FIELD_TM_MON	(1 << 0)
53#define FIELD_TM_MDAY	(1 << 1)
54#define FIELD_TM_WDAY	(1 << 2)
55#define FIELD_TM_YDAY	(1 << 3)
56#define FIELD_TM_YEAR	(1 << 4)
57
58static char gmt[] = { "GMT" };
59#ifdef TM_ZONE
60static char utc[] = { "UTC" };
61#endif
62/* RFC-822/RFC-2822 */
63static const char * const nast[5] = {
64       "EST",    "CST",    "MST",    "PST",    "\0\0\0"
65};
66static const char * const nadt[5] = {
67       "EDT",    "CDT",    "MDT",    "PDT",    "\0\0\0"
68};
69
70static const int mon_lengths[2][MONSPERYEAR] = {
71        { 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31 },
72        { 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31 }
73};
74
75static	int _conv_num(const unsigned char **, int *, int, int);
76static	int leaps_thru_end_of(const int y);
77static	char *_strptime(const char *, const char *, struct tm *, int);
78static	const u_char *_find_string(const u_char *, int *, const char * const *,
79	    const char * const *, int);
80
81
82char *
83strptime(const char *buf, const char *fmt, struct tm *tm)
84{
85	return(_strptime(buf, fmt, tm, 1));
86}
87
88static char *
89_strptime(const char *buf, const char *fmt, struct tm *tm, int initialize)
90{
91	unsigned char c;
92	const unsigned char *bp, *ep;
93	size_t len;
94	int alt_format, i, offs;
95	int neg = 0;
96	static int century, relyear, fields;
97
98	if (initialize) {
99		century = TM_YEAR_BASE;
100		relyear = -1;
101		fields = 0;
102	}
103
104	bp = (unsigned char *)buf;
105	while ((c = *fmt) != '\0') {
106		/* Clear `alternate' modifier prior to new conversion. */
107		alt_format = 0;
108
109		/* Eat up white-space. */
110		if (isspace(c)) {
111			while (isspace(*bp))
112				bp++;
113
114			fmt++;
115			continue;
116		}
117
118		if ((c = *fmt++) != '%')
119			goto literal;
120
121
122again:		switch (c = *fmt++) {
123		case '%':	/* "%%" is converted to "%". */
124literal:
125		if (c != *bp++)
126			return (NULL);
127
128		break;
129
130		/*
131		 * "Alternative" modifiers. Just set the appropriate flag
132		 * and start over again.
133		 */
134		case 'E':	/* "%E?" alternative conversion modifier. */
135			_LEGAL_ALT(0);
136			alt_format |= _ALT_E;
137			goto again;
138
139		case 'O':	/* "%O?" alternative conversion modifier. */
140			_LEGAL_ALT(0);
141			alt_format |= _ALT_O;
142			goto again;
143
144		/*
145		 * "Complex" conversion rules, implemented through recursion.
146		 */
147		case 'c':	/* Date and time, using the locale's format. */
148			_LEGAL_ALT(_ALT_E);
149			if (!(bp = _strptime(bp, _ctloc(d_t_fmt), tm, 0)))
150				return (NULL);
151			break;
152
153		case 'D':	/* The date as "%m/%d/%y". */
154			_LEGAL_ALT(0);
155			if (!(bp = _strptime(bp, "%m/%d/%y", tm, 0)))
156				return (NULL);
157			break;
158
159		case 'F':	/* The date as "%Y-%m-%d". */
160			_LEGAL_ALT(0);
161			if (!(bp = _strptime(bp, "%Y-%m-%d", tm, 0)))
162				return (NULL);
163			continue;
164
165		case 'R':	/* The time as "%H:%M". */
166			_LEGAL_ALT(0);
167			if (!(bp = _strptime(bp, "%H:%M", tm, 0)))
168				return (NULL);
169			break;
170
171		case 'r':	/* The time as "%I:%M:%S %p". */
172			_LEGAL_ALT(0);
173			if (!(bp = _strptime(bp, "%I:%M:%S %p", tm, 0)))
174				return (NULL);
175			break;
176
177		case 'T':	/* The time as "%H:%M:%S". */
178			_LEGAL_ALT(0);
179			if (!(bp = _strptime(bp, "%H:%M:%S", tm, 0)))
180				return (NULL);
181			break;
182
183		case 'X':	/* The time, using the locale's format. */
184			_LEGAL_ALT(_ALT_E);
185			if (!(bp = _strptime(bp, _ctloc(t_fmt), tm, 0)))
186				return (NULL);
187			break;
188
189		case 'x':	/* The date, using the locale's format. */
190			_LEGAL_ALT(_ALT_E);
191			if (!(bp = _strptime(bp, _ctloc(d_fmt), tm, 0)))
192				return (NULL);
193			break;
194
195		/*
196		 * "Elementary" conversion rules.
197		 */
198		case 'A':	/* The day of week, using the locale's form. */
199		case 'a':
200			_LEGAL_ALT(0);
201			for (i = 0; i < 7; i++) {
202				/* Full name. */
203				len = strlen(_ctloc(day[i]));
204				if (strncasecmp(_ctloc(day[i]), bp, len) == 0)
205					break;
206
207				/* Abbreviated name. */
208				len = strlen(_ctloc(abday[i]));
209				if (strncasecmp(_ctloc(abday[i]), bp, len) == 0)
210					break;
211			}
212
213			/* Nothing matched. */
214			if (i == 7)
215				return (NULL);
216
217			tm->tm_wday = i;
218			bp += len;
219			fields |= FIELD_TM_WDAY;
220			break;
221
222		case 'B':	/* The month, using the locale's form. */
223		case 'b':
224		case 'h':
225			_LEGAL_ALT(0);
226			for (i = 0; i < 12; i++) {
227				/* Full name. */
228				len = strlen(_ctloc(mon[i]));
229				if (strncasecmp(_ctloc(mon[i]), bp, len) == 0)
230					break;
231
232				/* Abbreviated name. */
233				len = strlen(_ctloc(abmon[i]));
234				if (strncasecmp(_ctloc(abmon[i]), bp, len) == 0)
235					break;
236			}
237
238			/* Nothing matched. */
239			if (i == 12)
240				return (NULL);
241
242			tm->tm_mon = i;
243			bp += len;
244			fields |= FIELD_TM_MON;
245			break;
246
247		case 'C':	/* The century number. */
248			_LEGAL_ALT(_ALT_E);
249			if (!(_conv_num(&bp, &i, 0, 99)))
250				return (NULL);
251
252			century = i * 100;
253			break;
254
255		case 'd':	/* The day of month. */
256		case 'e':
257			_LEGAL_ALT(_ALT_O);
258			if (!(_conv_num(&bp, &tm->tm_mday, 1, 31)))
259				return (NULL);
260			fields |= FIELD_TM_MDAY;
261			break;
262
263		case 'k':	/* The hour (24-hour clock representation). */
264			_LEGAL_ALT(0);
265			/* FALLTHROUGH */
266		case 'H':
267			_LEGAL_ALT(_ALT_O);
268			if (!(_conv_num(&bp, &tm->tm_hour, 0, 23)))
269				return (NULL);
270			break;
271
272		case 'l':	/* The hour (12-hour clock representation). */
273			_LEGAL_ALT(0);
274			/* FALLTHROUGH */
275		case 'I':
276			_LEGAL_ALT(_ALT_O);
277			if (!(_conv_num(&bp, &tm->tm_hour, 1, 12)))
278				return (NULL);
279			break;
280
281		case 'j':	/* The day of year. */
282			_LEGAL_ALT(0);
283			if (!(_conv_num(&bp, &tm->tm_yday, 1, 366)))
284				return (NULL);
285			tm->tm_yday--;
286			fields |= FIELD_TM_YDAY;
287			break;
288
289		case 'M':	/* The minute. */
290			_LEGAL_ALT(_ALT_O);
291			if (!(_conv_num(&bp, &tm->tm_min, 0, 59)))
292				return (NULL);
293			break;
294
295		case 'm':	/* The month. */
296			_LEGAL_ALT(_ALT_O);
297			if (!(_conv_num(&bp, &tm->tm_mon, 1, 12)))
298				return (NULL);
299			tm->tm_mon--;
300			fields |= FIELD_TM_MON;
301			break;
302
303		case 'p':	/* The locale's equivalent of AM/PM. */
304			_LEGAL_ALT(0);
305			/* AM? */
306			len = strlen(_ctloc(am_pm[0]));
307			if (strncasecmp(_ctloc(am_pm[0]), bp, len) == 0) {
308				if (tm->tm_hour > 12)	/* i.e., 13:00 AM ?! */
309					return (NULL);
310				else if (tm->tm_hour == 12)
311					tm->tm_hour = 0;
312
313				bp += len;
314				break;
315			}
316			/* PM? */
317			len = strlen(_ctloc(am_pm[1]));
318			if (strncasecmp(_ctloc(am_pm[1]), bp, len) == 0) {
319				if (tm->tm_hour > 12)	/* i.e., 13:00 PM ?! */
320					return (NULL);
321				else if (tm->tm_hour < 12)
322					tm->tm_hour += 12;
323
324				bp += len;
325				break;
326			}
327
328			/* Nothing matched. */
329			return (NULL);
330
331		case 'S':	/* The seconds. */
332			_LEGAL_ALT(_ALT_O);
333			if (!(_conv_num(&bp, &tm->tm_sec, 0, 61)))
334				return (NULL);
335			break;
336
337		case 'U':	/* The week of year, beginning on sunday. */
338		case 'W':	/* The week of year, beginning on monday. */
339			_LEGAL_ALT(_ALT_O);
340			/*
341			 * XXX This is bogus, as we can not assume any valid
342			 * information present in the tm structure at this
343			 * point to calculate a real value, so just check the
344			 * range for now.
345			 */
346			 if (!(_conv_num(&bp, &i, 0, 53)))
347				return (NULL);
348			 break;
349
350		case 'w':	/* The day of week, beginning on sunday. */
351			_LEGAL_ALT(_ALT_O);
352			if (!(_conv_num(&bp, &tm->tm_wday, 0, 6)))
353				return (NULL);
354			fields |= FIELD_TM_WDAY;
355			break;
356
357		case 'u':	/* The day of week, monday = 1. */
358			_LEGAL_ALT(_ALT_O);
359			if (!(_conv_num(&bp, &i, 1, 7)))
360				return (NULL);
361			tm->tm_wday = i % 7;
362			fields |= FIELD_TM_WDAY;
363			continue;
364
365		case 'g':	/* The year corresponding to the ISO week
366				 * number but without the century.
367				 */
368			if (!(_conv_num(&bp, &i, 0, 99)))
369				return (NULL);
370			continue;
371
372		case 'G':	/* The year corresponding to the ISO week
373				 * number with century.
374				 */
375			do
376				bp++;
377			while (isdigit(*bp));
378			continue;
379
380		case 'V':	/* The ISO 8601:1988 week number as decimal */
381			if (!(_conv_num(&bp, &i, 0, 53)))
382				return (NULL);
383			continue;
384
385		case 'Y':	/* The year. */
386			_LEGAL_ALT(_ALT_E);
387			if (!(_conv_num(&bp, &i, 0, 9999)))
388				return (NULL);
389
390			relyear = -1;
391			tm->tm_year = i - TM_YEAR_BASE;
392			fields |= FIELD_TM_YEAR;
393			break;
394
395		case 'y':	/* The year within the century (2 digits). */
396			_LEGAL_ALT(_ALT_E | _ALT_O);
397			if (!(_conv_num(&bp, &relyear, 0, 99)))
398				return (NULL);
399			break;
400
401		case 'Z':
402			tzset();
403			if (strncmp((const char *)bp, gmt, 3) == 0) {
404				tm->tm_isdst = 0;
405#ifdef TM_GMTOFF
406				tm->TM_GMTOFF = 0;
407#endif
408#ifdef TM_ZONE
409				tm->TM_ZONE = gmt;
410#endif
411				bp += 3;
412			} else {
413				ep = _find_string(bp, &i,
414					       	 (const char * const *)tzname,
415					       	  NULL, 2);
416				if (ep != NULL) {
417					tm->tm_isdst = i;
418#ifdef TM_GMTOFF
419					tm->TM_GMTOFF = -(timezone);
420#endif
421#ifdef TM_ZONE
422					tm->TM_ZONE = tzname[i];
423#endif
424				}
425				bp = ep;
426			}
427			continue;
428
429		case 'z':
430			/*
431			 * We recognize all ISO 8601 formats:
432			 * Z	= Zulu time/UTC
433			 * [+-]hhmm
434			 * [+-]hh:mm
435			 * [+-]hh
436			 * We recognize all RFC-822/RFC-2822 formats:
437			 * UT|GMT
438			 *          North American : UTC offsets
439			 * E[DS]T = Eastern : -4 | -5
440			 * C[DS]T = Central : -5 | -6
441			 * M[DS]T = Mountain: -6 | -7
442			 * P[DS]T = Pacific : -7 | -8
443			 *          Military
444			 * [A-IL-M] = -1 ... -9 (J not used)
445			 * [N-Y]  = +1 ... +12
446			 */
447			while (isspace(*bp))
448				bp++;
449
450			switch (*bp++) {
451			case 'G':
452				if (*bp++ != 'M')
453					return NULL;
454				/*FALLTHROUGH*/
455			case 'U':
456				if (*bp++ != 'T')
457					return NULL;
458				/*FALLTHROUGH*/
459			case 'Z':
460				tm->tm_isdst = 0;
461#ifdef TM_GMTOFF
462				tm->TM_GMTOFF = 0;
463#endif
464#ifdef TM_ZONE
465				tm->TM_ZONE = utc;
466#endif
467				continue;
468			case '+':
469				neg = 0;
470				break;
471			case '-':
472				neg = 1;
473				break;
474			default:
475				--bp;
476				ep = _find_string(bp, &i, nast, NULL, 4);
477				if (ep != NULL) {
478#ifdef TM_GMTOFF
479					tm->TM_GMTOFF = -5 - i;
480#endif
481#ifdef TM_ZONE
482					tm->TM_ZONE = __UNCONST(nast[i]);
483#endif
484					bp = ep;
485					continue;
486				}
487				ep = _find_string(bp, &i, nadt, NULL, 4);
488				if (ep != NULL) {
489					tm->tm_isdst = 1;
490#ifdef TM_GMTOFF
491					tm->TM_GMTOFF = -4 - i;
492#endif
493#ifdef TM_ZONE
494					tm->TM_ZONE = __UNCONST(nadt[i]);
495#endif
496					bp = ep;
497					continue;
498				}
499
500				if ((*bp >= 'A' && *bp <= 'I') ||
501				    (*bp >= 'L' && *bp <= 'Y')) {
502#ifdef TM_GMTOFF
503					/* Argh! No 'J'! */
504					if (*bp >= 'A' && *bp <= 'I')
505						tm->TM_GMTOFF =
506						    ('A' - 1) - (int)*bp;
507					else if (*bp >= 'L' && *bp <= 'M')
508						tm->TM_GMTOFF = 'A' - (int)*bp;
509					else if (*bp >= 'N' && *bp <= 'Y')
510						tm->TM_GMTOFF = (int)*bp - 'M';
511#endif
512#ifdef TM_ZONE
513					tm->TM_ZONE = NULL; /* XXX */
514#endif
515					bp++;
516					continue;
517				}
518				return NULL;
519			}
520			offs = 0;
521			for (i = 0; i < 4; ) {
522				if (isdigit(*bp)) {
523					offs = offs * 10 + (*bp++ - '0');
524					i++;
525					continue;
526				}
527				if (i == 2 && *bp == ':') {
528					bp++;
529					continue;
530				}
531				break;
532			}
533			switch (i) {
534			case 2:
535				offs *= 100;
536				break;
537			case 4:
538				i = offs % 100;
539				if (i >= 60)
540					return NULL;
541				/* Convert minutes into decimal */
542				offs = (offs / 100) * 100 + (i * 50) / 30;
543				break;
544			default:
545				return NULL;
546			}
547			if (neg)
548				offs = -offs;
549			tm->tm_isdst = 0;	/* XXX */
550#ifdef TM_GMTOFF
551			tm->TM_GMTOFF = offs;
552#endif
553#ifdef TM_ZONE
554			tm->TM_ZONE = NULL;	/* XXX */
555#endif
556			continue;
557
558		/*
559		 * Miscellaneous conversions.
560		 */
561		case 'n':	/* Any kind of white-space. */
562		case 't':
563			_LEGAL_ALT(0);
564			while (isspace(*bp))
565				bp++;
566			break;
567
568
569		default:	/* Unknown/unsupported conversion. */
570			return (NULL);
571		}
572
573
574	}
575
576	/*
577	 * We need to evaluate the two digit year spec (%y)
578	 * last as we can get a century spec (%C) at any time.
579	 */
580	if (relyear != -1) {
581		if (century == TM_YEAR_BASE) {
582			if (relyear <= 68)
583				tm->tm_year = relyear + 2000 - TM_YEAR_BASE;
584			else
585				tm->tm_year = relyear + 1900 - TM_YEAR_BASE;
586		} else {
587			tm->tm_year = relyear + century - TM_YEAR_BASE;
588		}
589		fields |= FIELD_TM_YEAR;
590	}
591
592	/* Compute some missing values when possible. */
593	if (fields & FIELD_TM_YEAR) {
594		const int year = tm->tm_year + TM_YEAR_BASE;
595		const int *mon_lens = mon_lengths[isleap(year)];
596		if (!(fields & FIELD_TM_YDAY) &&
597		    (fields & FIELD_TM_MON) && (fields & FIELD_TM_MDAY)) {
598			tm->tm_yday = tm->tm_mday - 1;
599			for (i = 0; i < tm->tm_mon; i++)
600				tm->tm_yday += mon_lens[i];
601			fields |= FIELD_TM_YDAY;
602		}
603		if (fields & FIELD_TM_YDAY) {
604			int days = tm->tm_yday;
605			if (!(fields & FIELD_TM_WDAY)) {
606				tm->tm_wday = EPOCH_WDAY +
607				    ((year - EPOCH_YEAR) % DAYSPERWEEK) *
608				    (DAYSPERNYEAR % DAYSPERWEEK) +
609				    leaps_thru_end_of(year - 1) -
610				    leaps_thru_end_of(EPOCH_YEAR - 1) +
611				    tm->tm_yday;
612				tm->tm_wday %= DAYSPERWEEK;
613				if (tm->tm_wday < 0)
614					tm->tm_wday += DAYSPERWEEK;
615			}
616			if (!(fields & FIELD_TM_MON)) {
617				tm->tm_mon = 0;
618				while (tm->tm_mon < MONSPERYEAR && days >= mon_lens[tm->tm_mon])
619					days -= mon_lens[tm->tm_mon++];
620			}
621			if (!(fields & FIELD_TM_MDAY))
622				tm->tm_mday = days + 1;
623		}
624	}
625
626	return ((char *)bp);
627}
628
629
630static int
631_conv_num(const unsigned char **buf, int *dest, int llim, int ulim)
632{
633	int result = 0;
634	int rulim = ulim;
635
636	if (**buf < '0' || **buf > '9')
637		return (0);
638
639	/* we use rulim to break out of the loop when we run out of digits */
640	do {
641		result *= 10;
642		result += *(*buf)++ - '0';
643		rulim /= 10;
644	} while ((result * 10 <= ulim) && rulim && **buf >= '0' && **buf <= '9');
645
646	if (result < llim || result > ulim)
647		return (0);
648
649	*dest = result;
650	return (1);
651}
652
653static const u_char *
654_find_string(const u_char *bp, int *tgt, const char * const *n1,
655		const char * const *n2, int c)
656{
657	int i;
658	unsigned int len;
659
660	/* check full name - then abbreviated ones */
661	for (; n1 != NULL; n1 = n2, n2 = NULL) {
662		for (i = 0; i < c; i++, n1++) {
663			len = strlen(*n1);
664			if (strncasecmp(*n1, (const char *)bp, len) == 0) {
665				*tgt = i;
666				return bp + len;
667			}
668		}
669	}
670
671	/* Nothing matched */
672	return NULL;
673}
674
675static int
676leaps_thru_end_of(const int y)
677{
678	return (y >= 0) ? (y / 4 - y / 100 + y / 400) :
679		-(leaps_thru_end_of(-(y + 1)) + 1);
680}
681