strptime.c revision 1.27
1/*	$OpenBSD: strptime.c,v 1.27 2019/05/10 20:24:58 schwarze Exp $ */
2/*	$NetBSD: strptime.c,v 1.12 1998/01/20 21:39:40 mycroft Exp $	*/
3/*-
4 * Copyright (c) 1997, 1998, 2005, 2008 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code was contributed to The NetBSD Foundation by Klaus Klein.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 *    notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 *    notice, this list of conditions and the following disclaimer in the
16 *    documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
19 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
20 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
21 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
22 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
26 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28 * POSSIBILITY OF SUCH DAMAGE.
29 */
30
31#include <ctype.h>
32#include <locale.h>
33#include <stdint.h>
34#include <string.h>
35#include <time.h>
36
37#include "localedef.h"
38#include "private.h"
39#include "tzfile.h"
40
41#define	_ctloc(x)		(_CurrentTimeLocale->x)
42
43/*
44 * We do not implement alternate representations. However, we always
45 * check whether a given modifier is allowed for a certain conversion.
46 */
47#define _ALT_E			0x01
48#define _ALT_O			0x02
49#define	_LEGAL_ALT(x)		{ if (alt_format & ~(x)) return (0); }
50
51/*
52 * We keep track of some of the fields we set in order to compute missing ones.
53 */
54#define FIELD_TM_MON	(1 << 0)
55#define FIELD_TM_MDAY	(1 << 1)
56#define FIELD_TM_WDAY	(1 << 2)
57#define FIELD_TM_YDAY	(1 << 3)
58#define FIELD_TM_YEAR	(1 << 4)
59
60static char gmt[] = { "GMT" };
61static char utc[] = { "UTC" };
62/* RFC-822/RFC-2822 */
63static const char * const nast[5] = {
64       "EST",    "CST",    "MST",    "PST",    "\0\0\0"
65};
66static const char * const nadt[5] = {
67       "EDT",    "CDT",    "MDT",    "PDT",    "\0\0\0"
68};
69
70static const int mon_lengths[2][MONSPERYEAR] = {
71        { 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31 },
72        { 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31 }
73};
74
75static	int _conv_num64(const unsigned char **, int64_t *, int64_t, int64_t);
76static	int _conv_num(const unsigned char **, int *, int, int);
77static	int leaps_thru_end_of(const int y);
78static	char *_strptime(const char *, const char *, struct tm *, int);
79static	const u_char *_find_string(const u_char *, int *, const char * const *,
80	    const char * const *, int);
81
82
83char *
84strptime(const char *buf, const char *fmt, struct tm *tm)
85{
86	return(_strptime(buf, fmt, tm, 1));
87}
88DEF_WEAK(strptime);
89
90static char *
91_strptime(const char *buf, const char *fmt, struct tm *tm, int initialize)
92{
93	unsigned char c;
94	const unsigned char *bp, *ep;
95	size_t len;
96	int alt_format, i, offs;
97	int neg = 0;
98	static int century, relyear, fields;
99
100	if (initialize) {
101		century = TM_YEAR_BASE;
102		relyear = -1;
103		fields = 0;
104	}
105
106	bp = (const unsigned char *)buf;
107	while ((c = *fmt) != '\0') {
108		/* Clear `alternate' modifier prior to new conversion. */
109		alt_format = 0;
110
111		/* Eat up white-space. */
112		if (isspace(c)) {
113			while (isspace(*bp))
114				bp++;
115
116			fmt++;
117			continue;
118		}
119
120		if ((c = *fmt++) != '%')
121			goto literal;
122
123
124again:		switch (c = *fmt++) {
125		case '%':	/* "%%" is converted to "%". */
126literal:
127		if (c != *bp++)
128			return (NULL);
129
130		break;
131
132		/*
133		 * "Alternative" modifiers. Just set the appropriate flag
134		 * and start over again.
135		 */
136		case 'E':	/* "%E?" alternative conversion modifier. */
137			_LEGAL_ALT(0);
138			alt_format |= _ALT_E;
139			goto again;
140
141		case 'O':	/* "%O?" alternative conversion modifier. */
142			_LEGAL_ALT(0);
143			alt_format |= _ALT_O;
144			goto again;
145
146		/*
147		 * "Complex" conversion rules, implemented through recursion.
148		 */
149		case 'c':	/* Date and time, using the locale's format. */
150			_LEGAL_ALT(_ALT_E);
151			if (!(bp = _strptime(bp, _ctloc(d_t_fmt), tm, 0)))
152				return (NULL);
153			break;
154
155		case 'D':	/* The date as "%m/%d/%y". */
156			_LEGAL_ALT(0);
157			if (!(bp = _strptime(bp, "%m/%d/%y", tm, 0)))
158				return (NULL);
159			break;
160
161		case 'F':	/* The date as "%Y-%m-%d". */
162			_LEGAL_ALT(0);
163			if (!(bp = _strptime(bp, "%Y-%m-%d", tm, 0)))
164				return (NULL);
165			continue;
166
167		case 'R':	/* The time as "%H:%M". */
168			_LEGAL_ALT(0);
169			if (!(bp = _strptime(bp, "%H:%M", tm, 0)))
170				return (NULL);
171			break;
172
173		case 'r':	/* The time as "%I:%M:%S %p". */
174			_LEGAL_ALT(0);
175			if (!(bp = _strptime(bp, "%I:%M:%S %p", tm, 0)))
176				return (NULL);
177			break;
178
179		case 'T':	/* The time as "%H:%M:%S". */
180			_LEGAL_ALT(0);
181			if (!(bp = _strptime(bp, "%H:%M:%S", tm, 0)))
182				return (NULL);
183			break;
184
185		case 'X':	/* The time, using the locale's format. */
186			_LEGAL_ALT(_ALT_E);
187			if (!(bp = _strptime(bp, _ctloc(t_fmt), tm, 0)))
188				return (NULL);
189			break;
190
191		case 'x':	/* The date, using the locale's format. */
192			_LEGAL_ALT(_ALT_E);
193			if (!(bp = _strptime(bp, _ctloc(d_fmt), tm, 0)))
194				return (NULL);
195			break;
196
197		/*
198		 * "Elementary" conversion rules.
199		 */
200		case 'A':	/* The day of week, using the locale's form. */
201		case 'a':
202			_LEGAL_ALT(0);
203			for (i = 0; i < 7; i++) {
204				/* Full name. */
205				len = strlen(_ctloc(day[i]));
206				if (strncasecmp(_ctloc(day[i]), bp, len) == 0)
207					break;
208
209				/* Abbreviated name. */
210				len = strlen(_ctloc(abday[i]));
211				if (strncasecmp(_ctloc(abday[i]), bp, len) == 0)
212					break;
213			}
214
215			/* Nothing matched. */
216			if (i == 7)
217				return (NULL);
218
219			tm->tm_wday = i;
220			bp += len;
221			fields |= FIELD_TM_WDAY;
222			break;
223
224		case 'B':	/* The month, using the locale's form. */
225		case 'b':
226		case 'h':
227			_LEGAL_ALT(0);
228			for (i = 0; i < 12; i++) {
229				/* Full name. */
230				len = strlen(_ctloc(mon[i]));
231				if (strncasecmp(_ctloc(mon[i]), bp, len) == 0)
232					break;
233
234				/* Abbreviated name. */
235				len = strlen(_ctloc(abmon[i]));
236				if (strncasecmp(_ctloc(abmon[i]), bp, len) == 0)
237					break;
238			}
239
240			/* Nothing matched. */
241			if (i == 12)
242				return (NULL);
243
244			tm->tm_mon = i;
245			bp += len;
246			fields |= FIELD_TM_MON;
247			break;
248
249		case 'C':	/* The century number. */
250			_LEGAL_ALT(_ALT_E);
251			if (!(_conv_num(&bp, &i, 0, 99)))
252				return (NULL);
253
254			century = i * 100;
255			break;
256
257		case 'e':	/* The day of month. */
258			if (isspace(*bp))
259				bp++;
260			/* FALLTHROUGH */
261		case 'd':
262			_LEGAL_ALT(_ALT_O);
263			if (!(_conv_num(&bp, &tm->tm_mday, 1, 31)))
264				return (NULL);
265			fields |= FIELD_TM_MDAY;
266			break;
267
268		case 'k':	/* The hour (24-hour clock representation). */
269			_LEGAL_ALT(0);
270			/* FALLTHROUGH */
271		case 'H':
272			_LEGAL_ALT(_ALT_O);
273			if (!(_conv_num(&bp, &tm->tm_hour, 0, 23)))
274				return (NULL);
275			break;
276
277		case 'l':	/* The hour (12-hour clock representation). */
278			_LEGAL_ALT(0);
279			/* FALLTHROUGH */
280		case 'I':
281			_LEGAL_ALT(_ALT_O);
282			if (!(_conv_num(&bp, &tm->tm_hour, 1, 12)))
283				return (NULL);
284			break;
285
286		case 'j':	/* The day of year. */
287			_LEGAL_ALT(0);
288			if (!(_conv_num(&bp, &tm->tm_yday, 1, 366)))
289				return (NULL);
290			tm->tm_yday--;
291			fields |= FIELD_TM_YDAY;
292			break;
293
294		case 'M':	/* The minute. */
295			_LEGAL_ALT(_ALT_O);
296			if (!(_conv_num(&bp, &tm->tm_min, 0, 59)))
297				return (NULL);
298			break;
299
300		case 'm':	/* The month. */
301			_LEGAL_ALT(_ALT_O);
302			if (!(_conv_num(&bp, &tm->tm_mon, 1, 12)))
303				return (NULL);
304			tm->tm_mon--;
305			fields |= FIELD_TM_MON;
306			break;
307
308		case 'p':	/* The locale's equivalent of AM/PM. */
309			_LEGAL_ALT(0);
310			/* AM? */
311			len = strlen(_ctloc(am_pm[0]));
312			if (strncasecmp(_ctloc(am_pm[0]), bp, len) == 0) {
313				if (tm->tm_hour > 12)	/* i.e., 13:00 AM ?! */
314					return (NULL);
315				else if (tm->tm_hour == 12)
316					tm->tm_hour = 0;
317
318				bp += len;
319				break;
320			}
321			/* PM? */
322			len = strlen(_ctloc(am_pm[1]));
323			if (strncasecmp(_ctloc(am_pm[1]), bp, len) == 0) {
324				if (tm->tm_hour > 12)	/* i.e., 13:00 PM ?! */
325					return (NULL);
326				else if (tm->tm_hour < 12)
327					tm->tm_hour += 12;
328
329				bp += len;
330				break;
331			}
332
333			/* Nothing matched. */
334			return (NULL);
335
336		case 'S':	/* The seconds. */
337			_LEGAL_ALT(_ALT_O);
338			if (!(_conv_num(&bp, &tm->tm_sec, 0, 60)))
339				return (NULL);
340			break;
341		case 's':	/* Seconds since epoch */
342			{
343				int64_t i64;
344				if (!(_conv_num64(&bp, &i64, 0, INT64_MAX)))
345					return (NULL);
346				if (!gmtime_r(&i64, tm))
347					return (NULL);
348				fields = 0xffff;	 /* everything */
349			}
350			break;
351		case 'U':	/* The week of year, beginning on sunday. */
352		case 'W':	/* The week of year, beginning on monday. */
353			_LEGAL_ALT(_ALT_O);
354			/*
355			 * XXX This is bogus, as we can not assume any valid
356			 * information present in the tm structure at this
357			 * point to calculate a real value, so just check the
358			 * range for now.
359			 */
360			 if (!(_conv_num(&bp, &i, 0, 53)))
361				return (NULL);
362			 break;
363
364		case 'w':	/* The day of week, beginning on sunday. */
365			_LEGAL_ALT(_ALT_O);
366			if (!(_conv_num(&bp, &tm->tm_wday, 0, 6)))
367				return (NULL);
368			fields |= FIELD_TM_WDAY;
369			break;
370
371		case 'u':	/* The day of week, monday = 1. */
372			_LEGAL_ALT(_ALT_O);
373			if (!(_conv_num(&bp, &i, 1, 7)))
374				return (NULL);
375			tm->tm_wday = i % 7;
376			fields |= FIELD_TM_WDAY;
377			continue;
378
379		case 'g':	/* The year corresponding to the ISO week
380				 * number but without the century.
381				 */
382			if (!(_conv_num(&bp, &i, 0, 99)))
383				return (NULL);
384			continue;
385
386		case 'G':	/* The year corresponding to the ISO week
387				 * number with century.
388				 */
389			do
390				bp++;
391			while (isdigit(*bp));
392			continue;
393
394		case 'V':	/* The ISO 8601:1988 week number as decimal */
395			if (!(_conv_num(&bp, &i, 0, 53)))
396				return (NULL);
397			continue;
398
399		case 'Y':	/* The year. */
400			_LEGAL_ALT(_ALT_E);
401			if (!(_conv_num(&bp, &i, 0, 9999)))
402				return (NULL);
403
404			relyear = -1;
405			tm->tm_year = i - TM_YEAR_BASE;
406			fields |= FIELD_TM_YEAR;
407			break;
408
409		case 'y':	/* The year within the century (2 digits). */
410			_LEGAL_ALT(_ALT_E | _ALT_O);
411			if (!(_conv_num(&bp, &relyear, 0, 99)))
412				return (NULL);
413			break;
414
415		case 'Z':
416			tzset();
417			if (strncmp((const char *)bp, gmt, 3) == 0) {
418				tm->tm_isdst = 0;
419#ifdef TM_GMTOFF
420				tm->TM_GMTOFF = 0;
421#endif
422#ifdef TM_ZONE
423				tm->TM_ZONE = gmt;
424#endif
425				bp += 3;
426			} else if (strncmp((const char *)bp, utc, 3) == 0) {
427				tm->tm_isdst = 0;
428#ifdef TM_GMTOFF
429				tm->TM_GMTOFF = 0;
430#endif
431#ifdef TM_ZONE
432				tm->TM_ZONE = utc;
433#endif
434				bp += 3;
435			} else {
436				ep = _find_string(bp, &i,
437					       	 (const char * const *)tzname,
438					       	  NULL, 2);
439				if (ep == NULL)
440					return (NULL);
441
442				tm->tm_isdst = i;
443#ifdef TM_GMTOFF
444				tm->TM_GMTOFF = -(timezone);
445#endif
446#ifdef TM_ZONE
447				tm->TM_ZONE = tzname[i];
448#endif
449				bp = ep;
450			}
451			continue;
452
453		case 'z':
454			/*
455			 * We recognize all ISO 8601 formats:
456			 * Z	= Zulu time/UTC
457			 * [+-]hhmm
458			 * [+-]hh:mm
459			 * [+-]hh
460			 * We recognize all RFC-822/RFC-2822 formats:
461			 * UT|GMT
462			 *          North American : UTC offsets
463			 * E[DS]T = Eastern : -4 | -5
464			 * C[DS]T = Central : -5 | -6
465			 * M[DS]T = Mountain: -6 | -7
466			 * P[DS]T = Pacific : -7 | -8
467			 */
468			while (isspace(*bp))
469				bp++;
470
471			switch (*bp++) {
472			case 'G':
473				if (*bp++ != 'M')
474					return NULL;
475				/*FALLTHROUGH*/
476			case 'U':
477				if (*bp++ != 'T')
478					return NULL;
479				/*FALLTHROUGH*/
480			case 'Z':
481				tm->tm_isdst = 0;
482#ifdef TM_GMTOFF
483				tm->TM_GMTOFF = 0;
484#endif
485#ifdef TM_ZONE
486				tm->TM_ZONE = utc;
487#endif
488				continue;
489			case '+':
490				neg = 0;
491				break;
492			case '-':
493				neg = 1;
494				break;
495			default:
496				--bp;
497				ep = _find_string(bp, &i, nast, NULL, 4);
498				if (ep != NULL) {
499#ifdef TM_GMTOFF
500					tm->TM_GMTOFF = (-5 - i) * SECSPERHOUR;
501#endif
502#ifdef TM_ZONE
503					tm->TM_ZONE = (char *)nast[i];
504#endif
505					bp = ep;
506					continue;
507				}
508				ep = _find_string(bp, &i, nadt, NULL, 4);
509				if (ep != NULL) {
510					tm->tm_isdst = 1;
511#ifdef TM_GMTOFF
512					tm->TM_GMTOFF = (-4 - i) * SECSPERHOUR;
513#endif
514#ifdef TM_ZONE
515					tm->TM_ZONE = (char *)nadt[i];
516#endif
517					bp = ep;
518					continue;
519				}
520				return NULL;
521			}
522			offs = 0;
523			for (i = 0; i < 4; ) {
524				if (isdigit(*bp)) {
525					offs = offs * 10 + (*bp++ - '0');
526					i++;
527					continue;
528				}
529				if (i == 2 && *bp == ':') {
530					bp++;
531					continue;
532				}
533				break;
534			}
535			switch (i) {
536			case 2:
537				offs *= 100;
538				break;
539			case 4:
540				i = offs % 100;
541				if (i >= 60)
542					return NULL;
543				/* Convert minutes into decimal */
544				offs = (offs / 100) * 100 + (i * 50) / 30;
545				break;
546			default:
547				return NULL;
548			}
549			if (neg)
550				offs = -offs;
551			tm->tm_isdst = 0;	/* XXX */
552#ifdef TM_GMTOFF
553			tm->TM_GMTOFF = offs;
554#endif
555#ifdef TM_ZONE
556			tm->TM_ZONE = NULL;	/* XXX */
557#endif
558			continue;
559
560		/*
561		 * Miscellaneous conversions.
562		 */
563		case 'n':	/* Any kind of white-space. */
564		case 't':
565			_LEGAL_ALT(0);
566			while (isspace(*bp))
567				bp++;
568			break;
569
570
571		default:	/* Unknown/unsupported conversion. */
572			return (NULL);
573		}
574
575
576	}
577
578	/*
579	 * We need to evaluate the two digit year spec (%y)
580	 * last as we can get a century spec (%C) at any time.
581	 */
582	if (relyear != -1) {
583		if (century == TM_YEAR_BASE) {
584			if (relyear <= 68)
585				tm->tm_year = relyear + 2000 - TM_YEAR_BASE;
586			else
587				tm->tm_year = relyear + 1900 - TM_YEAR_BASE;
588		} else {
589			tm->tm_year = relyear + century - TM_YEAR_BASE;
590		}
591		fields |= FIELD_TM_YEAR;
592	}
593
594	/* Compute some missing values when possible. */
595	if (fields & FIELD_TM_YEAR) {
596		const int year = tm->tm_year + TM_YEAR_BASE;
597		const int *mon_lens = mon_lengths[isleap(year)];
598		if (!(fields & FIELD_TM_YDAY) &&
599		    (fields & FIELD_TM_MON) && (fields & FIELD_TM_MDAY)) {
600			tm->tm_yday = tm->tm_mday - 1;
601			for (i = 0; i < tm->tm_mon; i++)
602				tm->tm_yday += mon_lens[i];
603			fields |= FIELD_TM_YDAY;
604		}
605		if (fields & FIELD_TM_YDAY) {
606			int days = tm->tm_yday;
607			if (!(fields & FIELD_TM_WDAY)) {
608				tm->tm_wday = EPOCH_WDAY +
609				    ((year - EPOCH_YEAR) % DAYSPERWEEK) *
610				    (DAYSPERNYEAR % DAYSPERWEEK) +
611				    leaps_thru_end_of(year - 1) -
612				    leaps_thru_end_of(EPOCH_YEAR - 1) +
613				    tm->tm_yday;
614				tm->tm_wday %= DAYSPERWEEK;
615				if (tm->tm_wday < 0)
616					tm->tm_wday += DAYSPERWEEK;
617			}
618			if (!(fields & FIELD_TM_MON)) {
619				tm->tm_mon = 0;
620				while (tm->tm_mon < MONSPERYEAR && days >= mon_lens[tm->tm_mon])
621					days -= mon_lens[tm->tm_mon++];
622			}
623			if (!(fields & FIELD_TM_MDAY))
624				tm->tm_mday = days + 1;
625		}
626	}
627
628	return ((char *)bp);
629}
630
631
632static int
633_conv_num(const unsigned char **buf, int *dest, int llim, int ulim)
634{
635	int result = 0;
636	int rulim = ulim;
637
638	if (**buf < '0' || **buf > '9')
639		return (0);
640
641	/* we use rulim to break out of the loop when we run out of digits */
642	do {
643		result *= 10;
644		result += *(*buf)++ - '0';
645		rulim /= 10;
646	} while ((result * 10 <= ulim) && rulim && **buf >= '0' && **buf <= '9');
647
648	if (result < llim || result > ulim)
649		return (0);
650
651	*dest = result;
652	return (1);
653}
654
655static int
656_conv_num64(const unsigned char **buf, int64_t *dest, int64_t llim, int64_t ulim)
657{
658	int result = 0;
659	int64_t rulim = ulim;
660
661	if (**buf < '0' || **buf > '9')
662		return (0);
663
664	/* we use rulim to break out of the loop when we run out of digits */
665	do {
666		result *= 10;
667		result += *(*buf)++ - '0';
668		rulim /= 10;
669	} while ((result * 10 <= ulim) && rulim && **buf >= '0' && **buf <= '9');
670
671	if (result < llim || result > ulim)
672		return (0);
673
674	*dest = result;
675	return (1);
676}
677
678static const u_char *
679_find_string(const u_char *bp, int *tgt, const char * const *n1,
680		const char * const *n2, int c)
681{
682	int i;
683	unsigned int len;
684
685	/* check full name - then abbreviated ones */
686	for (; n1 != NULL; n1 = n2, n2 = NULL) {
687		for (i = 0; i < c; i++, n1++) {
688			len = strlen(*n1);
689			if (strncasecmp(*n1, (const char *)bp, len) == 0) {
690				*tgt = i;
691				return bp + len;
692			}
693		}
694	}
695
696	/* Nothing matched */
697	return NULL;
698}
699
700static int
701leaps_thru_end_of(const int y)
702{
703	return (y >= 0) ? (y / 4 - y / 100 + y / 400) :
704		-(leaps_thru_end_of(-(y + 1)) + 1);
705}
706