1/*	$OpenBSD: strptime.c,v 1.31 2023/03/02 16:21:51 millert Exp $ */
2/*	$NetBSD: strptime.c,v 1.12 1998/01/20 21:39:40 mycroft Exp $	*/
3/*-
4 * Copyright (c) 1997, 1998, 2005, 2008 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code was contributed to The NetBSD Foundation by Klaus Klein.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 *    notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 *    notice, this list of conditions and the following disclaimer in the
16 *    documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
19 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
20 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
21 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
22 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
26 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28 * POSSIBILITY OF SUCH DAMAGE.
29 */
30
31#include <ctype.h>
32#include <errno.h>
33#include <limits.h>
34#include <locale.h>
35#include <stdlib.h>
36#include <string.h>
37#include <time.h>
38
39#include "localedef.h"
40#include "private.h"
41#include "tzfile.h"
42
43#define	_ctloc(x)		(_CurrentTimeLocale->x)
44
45/*
46 * We do not implement alternate representations. However, we always
47 * check whether a given modifier is allowed for a certain conversion.
48 */
49#define _ALT_E			0x01
50#define _ALT_O			0x02
51#define	_LEGAL_ALT(x)		{ if (alt_format & ~(x)) return (0); }
52
53/*
54 * We keep track of some of the fields we set in order to compute missing ones.
55 */
56#define FIELD_TM_MON	(1 << 0)
57#define FIELD_TM_MDAY	(1 << 1)
58#define FIELD_TM_WDAY	(1 << 2)
59#define FIELD_TM_YDAY	(1 << 3)
60#define FIELD_TM_YEAR	(1 << 4)
61
62static char gmt[] = { "GMT" };
63static char utc[] = { "UTC" };
64/* RFC-822/RFC-2822 */
65static const char * const nast[5] = {
66       "EST",    "CST",    "MST",    "PST",    "\0\0\0"
67};
68static const char * const nadt[5] = {
69       "EDT",    "CDT",    "MDT",    "PDT",    "\0\0\0"
70};
71
72static const int mon_lengths[2][MONSPERYEAR] = {
73        { 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31 },
74        { 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31 }
75};
76
77static	int _conv_num(const unsigned char **, int *, int, int);
78static	int epoch_to_tm(const unsigned char **, struct tm *);
79static	int leaps_thru_end_of(const int y);
80static	char *_strptime(const char *, const char *, struct tm *, int);
81static	const u_char *_find_string(const u_char *, int *, const char * const *,
82	    const char * const *, int);
83
84
85char *
86strptime(const char *buf, const char *fmt, struct tm *tm)
87{
88	return(_strptime(buf, fmt, tm, 1));
89}
90DEF_WEAK(strptime);
91
92static char *
93_strptime(const char *buf, const char *fmt, struct tm *tm, int initialize)
94{
95	unsigned char c;
96	const unsigned char *bp, *ep;
97	size_t len;
98	int alt_format, i, offs;
99	int neg = 0;
100	static int century, relyear, fields;
101
102	if (initialize) {
103		century = TM_YEAR_BASE;
104		relyear = -1;
105		fields = 0;
106	}
107
108	bp = (const unsigned char *)buf;
109	while ((c = *fmt) != '\0') {
110		/* Clear `alternate' modifier prior to new conversion. */
111		alt_format = 0;
112
113		/* Eat up white-space. */
114		if (isspace(c)) {
115			while (isspace(*bp))
116				bp++;
117
118			fmt++;
119			continue;
120		}
121
122		if ((c = *fmt++) != '%')
123			goto literal;
124
125
126again:		switch (c = *fmt++) {
127		case '%':	/* "%%" is converted to "%". */
128literal:
129		if (c != *bp++)
130			return (NULL);
131
132		break;
133
134		/*
135		 * "Alternative" modifiers. Just set the appropriate flag
136		 * and start over again.
137		 */
138		case 'E':	/* "%E?" alternative conversion modifier. */
139			_LEGAL_ALT(0);
140			alt_format |= _ALT_E;
141			goto again;
142
143		case 'O':	/* "%O?" alternative conversion modifier. */
144			_LEGAL_ALT(0);
145			alt_format |= _ALT_O;
146			goto again;
147
148		/*
149		 * "Complex" conversion rules, implemented through recursion.
150		 */
151		case 'c':	/* Date and time, using the locale's format. */
152			_LEGAL_ALT(_ALT_E);
153			if (!(bp = _strptime(bp, _ctloc(d_t_fmt), tm, 0)))
154				return (NULL);
155			break;
156
157		case 'D':	/* The date as "%m/%d/%y". */
158			_LEGAL_ALT(0);
159			if (!(bp = _strptime(bp, "%m/%d/%y", tm, 0)))
160				return (NULL);
161			break;
162
163		case 'F':	/* The date as "%Y-%m-%d". */
164			_LEGAL_ALT(0);
165			if (!(bp = _strptime(bp, "%Y-%m-%d", tm, 0)))
166				return (NULL);
167			continue;
168
169		case 'R':	/* The time as "%H:%M". */
170			_LEGAL_ALT(0);
171			if (!(bp = _strptime(bp, "%H:%M", tm, 0)))
172				return (NULL);
173			break;
174
175		case 'r':	/* The time as "%I:%M:%S %p". */
176			_LEGAL_ALT(0);
177			if (!(bp = _strptime(bp, "%I:%M:%S %p", tm, 0)))
178				return (NULL);
179			break;
180
181		case 'T':	/* The time as "%H:%M:%S". */
182			_LEGAL_ALT(0);
183			if (!(bp = _strptime(bp, "%H:%M:%S", tm, 0)))
184				return (NULL);
185			break;
186
187		case 'X':	/* The time, using the locale's format. */
188			_LEGAL_ALT(_ALT_E);
189			if (!(bp = _strptime(bp, _ctloc(t_fmt), tm, 0)))
190				return (NULL);
191			break;
192
193		case 'x':	/* The date, using the locale's format. */
194			_LEGAL_ALT(_ALT_E);
195			if (!(bp = _strptime(bp, _ctloc(d_fmt), tm, 0)))
196				return (NULL);
197			break;
198
199		/*
200		 * "Elementary" conversion rules.
201		 */
202		case 'A':	/* The day of week, using the locale's form. */
203		case 'a':
204			_LEGAL_ALT(0);
205			for (i = 0; i < 7; i++) {
206				/* Full name. */
207				len = strlen(_ctloc(day[i]));
208				if (strncasecmp(_ctloc(day[i]), bp, len) == 0)
209					break;
210
211				/* Abbreviated name. */
212				len = strlen(_ctloc(abday[i]));
213				if (strncasecmp(_ctloc(abday[i]), bp, len) == 0)
214					break;
215			}
216
217			/* Nothing matched. */
218			if (i == 7)
219				return (NULL);
220
221			tm->tm_wday = i;
222			bp += len;
223			fields |= FIELD_TM_WDAY;
224			break;
225
226		case 'B':	/* The month, using the locale's form. */
227		case 'b':
228		case 'h':
229			_LEGAL_ALT(0);
230			for (i = 0; i < 12; i++) {
231				/* Full name. */
232				len = strlen(_ctloc(mon[i]));
233				if (strncasecmp(_ctloc(mon[i]), bp, len) == 0)
234					break;
235
236				/* Abbreviated name. */
237				len = strlen(_ctloc(abmon[i]));
238				if (strncasecmp(_ctloc(abmon[i]), bp, len) == 0)
239					break;
240			}
241
242			/* Nothing matched. */
243			if (i == 12)
244				return (NULL);
245
246			tm->tm_mon = i;
247			bp += len;
248			fields |= FIELD_TM_MON;
249			break;
250
251		case 'C':	/* The century number. */
252			_LEGAL_ALT(_ALT_E);
253			if (!(_conv_num(&bp, &i, 0, 99)))
254				return (NULL);
255
256			century = i * 100;
257			break;
258
259		case 'e':	/* The day of month. */
260			if (isspace(*bp))
261				bp++;
262			/* FALLTHROUGH */
263		case 'd':
264			_LEGAL_ALT(_ALT_O);
265			if (!(_conv_num(&bp, &tm->tm_mday, 1, 31)))
266				return (NULL);
267			fields |= FIELD_TM_MDAY;
268			break;
269
270		case 'k':	/* The hour (24-hour clock representation). */
271			_LEGAL_ALT(0);
272			/* FALLTHROUGH */
273		case 'H':
274			_LEGAL_ALT(_ALT_O);
275			if (!(_conv_num(&bp, &tm->tm_hour, 0, 23)))
276				return (NULL);
277			break;
278
279		case 'l':	/* The hour (12-hour clock representation). */
280			_LEGAL_ALT(0);
281			/* FALLTHROUGH */
282		case 'I':
283			_LEGAL_ALT(_ALT_O);
284			if (!(_conv_num(&bp, &tm->tm_hour, 1, 12)))
285				return (NULL);
286			break;
287
288		case 'j':	/* The day of year. */
289			_LEGAL_ALT(0);
290			if (!(_conv_num(&bp, &tm->tm_yday, 1, 366)))
291				return (NULL);
292			tm->tm_yday--;
293			fields |= FIELD_TM_YDAY;
294			break;
295
296		case 'M':	/* The minute. */
297			_LEGAL_ALT(_ALT_O);
298			if (!(_conv_num(&bp, &tm->tm_min, 0, 59)))
299				return (NULL);
300			break;
301
302		case 'm':	/* The month. */
303			_LEGAL_ALT(_ALT_O);
304			if (!(_conv_num(&bp, &tm->tm_mon, 1, 12)))
305				return (NULL);
306			tm->tm_mon--;
307			fields |= FIELD_TM_MON;
308			break;
309
310		case 'p':	/* The locale's equivalent of AM/PM. */
311			_LEGAL_ALT(0);
312			/* AM? */
313			len = strlen(_ctloc(am_pm[0]));
314			if (strncasecmp(_ctloc(am_pm[0]), bp, len) == 0) {
315				if (tm->tm_hour > 12)	/* i.e., 13:00 AM ?! */
316					return (NULL);
317				else if (tm->tm_hour == 12)
318					tm->tm_hour = 0;
319
320				bp += len;
321				break;
322			}
323			/* PM? */
324			len = strlen(_ctloc(am_pm[1]));
325			if (strncasecmp(_ctloc(am_pm[1]), bp, len) == 0) {
326				if (tm->tm_hour > 12)	/* i.e., 13:00 PM ?! */
327					return (NULL);
328				else if (tm->tm_hour < 12)
329					tm->tm_hour += 12;
330
331				bp += len;
332				break;
333			}
334
335			/* Nothing matched. */
336			return (NULL);
337
338		case 'S':	/* The seconds. */
339			_LEGAL_ALT(_ALT_O);
340			if (!(_conv_num(&bp, &tm->tm_sec, 0, 60)))
341				return (NULL);
342			break;
343		case 's':	/* Seconds since epoch. */
344			if (!(epoch_to_tm(&bp, tm)))
345				return (NULL);
346			fields = 0xffff;	 /* everything */
347			break;
348		case 'U':	/* The week of year, beginning on sunday. */
349		case 'W':	/* The week of year, beginning on monday. */
350			_LEGAL_ALT(_ALT_O);
351			/*
352			 * XXX This is bogus, as we can not assume any valid
353			 * information present in the tm structure at this
354			 * point to calculate a real value, so just check the
355			 * range for now.
356			 */
357			 if (!(_conv_num(&bp, &i, 0, 53)))
358				return (NULL);
359			 break;
360
361		case 'w':	/* The day of week, beginning on sunday. */
362			_LEGAL_ALT(_ALT_O);
363			if (!(_conv_num(&bp, &tm->tm_wday, 0, 6)))
364				return (NULL);
365			fields |= FIELD_TM_WDAY;
366			break;
367
368		case 'u':	/* The day of week, monday = 1. */
369			_LEGAL_ALT(_ALT_O);
370			if (!(_conv_num(&bp, &i, 1, 7)))
371				return (NULL);
372			tm->tm_wday = i % 7;
373			fields |= FIELD_TM_WDAY;
374			continue;
375
376		case 'g':	/* The year corresponding to the ISO week
377				 * number but without the century.
378				 */
379			if (!(_conv_num(&bp, &i, 0, 99)))
380				return (NULL);
381			continue;
382
383		case 'G':	/* The year corresponding to the ISO week
384				 * number with century.
385				 */
386			do
387				bp++;
388			while (isdigit(*bp));
389			continue;
390
391		case 'V':	/* The ISO 8601:1988 week number as decimal */
392			if (!(_conv_num(&bp, &i, 0, 53)))
393				return (NULL);
394			continue;
395
396		case 'Y':	/* The year. */
397			_LEGAL_ALT(_ALT_E);
398			if (!(_conv_num(&bp, &i, 0, 9999)))
399				return (NULL);
400
401			relyear = -1;
402			tm->tm_year = i - TM_YEAR_BASE;
403			fields |= FIELD_TM_YEAR;
404			break;
405
406		case 'y':	/* The year within the century (2 digits). */
407			_LEGAL_ALT(_ALT_E | _ALT_O);
408			if (!(_conv_num(&bp, &relyear, 0, 99)))
409				return (NULL);
410			break;
411
412		case 'Z':
413			tzset();
414			if (strncmp((const char *)bp, gmt, 3) == 0) {
415				tm->tm_isdst = 0;
416				tm->tm_gmtoff = 0;
417				tm->tm_zone = gmt;
418				bp += 3;
419			} else if (strncmp((const char *)bp, utc, 3) == 0) {
420				tm->tm_isdst = 0;
421				tm->tm_gmtoff = 0;
422				tm->tm_zone = utc;
423				bp += 3;
424			} else {
425				ep = _find_string(bp, &i,
426						 (const char * const *)tzname,
427						  NULL, 2);
428				if (ep == NULL)
429					return (NULL);
430
431				tm->tm_isdst = i;
432				tm->tm_gmtoff = -(timezone);
433				tm->tm_zone = tzname[i];
434				bp = ep;
435			}
436			continue;
437
438		case 'z':
439			/*
440			 * We recognize all ISO 8601 formats:
441			 * Z	= Zulu time/UTC
442			 * [+-]hhmm
443			 * [+-]hh:mm
444			 * [+-]hh
445			 * We recognize all RFC-822/RFC-2822 formats:
446			 * UT|GMT
447			 *          North American : UTC offsets
448			 * E[DS]T = Eastern : -4 | -5
449			 * C[DS]T = Central : -5 | -6
450			 * M[DS]T = Mountain: -6 | -7
451			 * P[DS]T = Pacific : -7 | -8
452			 */
453			while (isspace(*bp))
454				bp++;
455
456			switch (*bp++) {
457			case 'G':
458				if (*bp++ != 'M')
459					return NULL;
460				/*FALLTHROUGH*/
461			case 'U':
462				if (*bp++ != 'T')
463					return NULL;
464				/*FALLTHROUGH*/
465			case 'Z':
466				tm->tm_isdst = 0;
467				tm->tm_gmtoff = 0;
468				tm->tm_zone = utc;
469				continue;
470			case '+':
471				neg = 0;
472				break;
473			case '-':
474				neg = 1;
475				break;
476			default:
477				--bp;
478				ep = _find_string(bp, &i, nast, NULL, 4);
479				if (ep != NULL) {
480					tm->tm_gmtoff = (-5 - i) * SECSPERHOUR;
481					tm->tm_zone = (char *)nast[i];
482					bp = ep;
483					continue;
484				}
485				ep = _find_string(bp, &i, nadt, NULL, 4);
486				if (ep != NULL) {
487					tm->tm_isdst = 1;
488					tm->tm_gmtoff = (-4 - i) * SECSPERHOUR;
489					tm->tm_zone = (char *)nadt[i];
490					bp = ep;
491					continue;
492				}
493				return NULL;
494			}
495			if (!isdigit(bp[0]) || !isdigit(bp[1]))
496				return NULL;
497			offs = ((bp[0]-'0') * 10 + (bp[1]-'0')) * SECSPERHOUR;
498			bp += 2;
499			if (*bp == ':')
500				bp++;
501			if (isdigit(*bp)) {
502				offs += (*bp++ - '0') * 10 * SECSPERMIN;
503				if (!isdigit(*bp))
504					return NULL;
505				offs += (*bp++ - '0') * SECSPERMIN;
506			}
507			if (neg)
508				offs = -offs;
509			tm->tm_isdst = 0;	/* XXX */
510			tm->tm_gmtoff = offs;
511			tm->tm_zone = NULL;	/* XXX */
512			continue;
513
514		/*
515		 * Miscellaneous conversions.
516		 */
517		case 'n':	/* Any kind of white-space. */
518		case 't':
519			_LEGAL_ALT(0);
520			while (isspace(*bp))
521				bp++;
522			break;
523
524
525		default:	/* Unknown/unsupported conversion. */
526			return (NULL);
527		}
528
529
530	}
531
532	/*
533	 * We need to evaluate the two digit year spec (%y)
534	 * last as we can get a century spec (%C) at any time.
535	 */
536	if (relyear != -1) {
537		if (century == TM_YEAR_BASE) {
538			if (relyear <= 68)
539				tm->tm_year = relyear + 2000 - TM_YEAR_BASE;
540			else
541				tm->tm_year = relyear + 1900 - TM_YEAR_BASE;
542		} else {
543			tm->tm_year = relyear + century - TM_YEAR_BASE;
544		}
545		fields |= FIELD_TM_YEAR;
546	}
547
548	/* Compute some missing values when possible. */
549	if (fields & FIELD_TM_YEAR) {
550		const int year = tm->tm_year + TM_YEAR_BASE;
551		const int *mon_lens = mon_lengths[isleap(year)];
552		if (!(fields & FIELD_TM_YDAY) &&
553		    (fields & FIELD_TM_MON) && (fields & FIELD_TM_MDAY)) {
554			tm->tm_yday = tm->tm_mday - 1;
555			for (i = 0; i < tm->tm_mon; i++)
556				tm->tm_yday += mon_lens[i];
557			fields |= FIELD_TM_YDAY;
558		}
559		if (fields & FIELD_TM_YDAY) {
560			int days = tm->tm_yday;
561			if (!(fields & FIELD_TM_WDAY)) {
562				tm->tm_wday = EPOCH_WDAY +
563				    ((year - EPOCH_YEAR) % DAYSPERWEEK) *
564				    (DAYSPERNYEAR % DAYSPERWEEK) +
565				    leaps_thru_end_of(year - 1) -
566				    leaps_thru_end_of(EPOCH_YEAR - 1) +
567				    tm->tm_yday;
568				tm->tm_wday %= DAYSPERWEEK;
569				if (tm->tm_wday < 0)
570					tm->tm_wday += DAYSPERWEEK;
571			}
572			if (!(fields & FIELD_TM_MON)) {
573				tm->tm_mon = 0;
574				while (tm->tm_mon < MONSPERYEAR && days >= mon_lens[tm->tm_mon])
575					days -= mon_lens[tm->tm_mon++];
576			}
577			if (!(fields & FIELD_TM_MDAY))
578				tm->tm_mday = days + 1;
579		}
580	}
581
582	return ((char *)bp);
583}
584
585
586static int
587_conv_num(const unsigned char **buf, int *dest, int llim, int ulim)
588{
589	int result = 0;
590	int rulim = ulim;
591
592	if (**buf < '0' || **buf > '9')
593		return (0);
594
595	/* we use rulim to break out of the loop when we run out of digits */
596	do {
597		result *= 10;
598		result += *(*buf)++ - '0';
599		rulim /= 10;
600	} while ((result * 10 <= ulim) && rulim && **buf >= '0' && **buf <= '9');
601
602	if (result < llim || result > ulim)
603		return (0);
604
605	*dest = result;
606	return (1);
607}
608
609static int
610epoch_to_tm(const unsigned char **buf, struct tm *tm)
611{
612	int saved_errno = errno;
613	int ret = 0;
614	time_t secs;
615	char *ep;
616
617	errno = 0;
618	secs = strtoll(*buf, &ep, 10);
619	if (*buf == (unsigned char *)ep)
620		goto done;
621	if (secs < 0 ||
622	    secs == LLONG_MAX && errno == ERANGE)
623		goto done;
624	if (localtime_r(&secs, tm) == NULL)
625		goto done;
626	ret = 1;
627done:
628	*buf = ep;
629	errno = saved_errno;
630	return (ret);
631}
632
633static const u_char *
634_find_string(const u_char *bp, int *tgt, const char * const *n1,
635		const char * const *n2, int c)
636{
637	int i;
638	unsigned int len;
639
640	/* check full name - then abbreviated ones */
641	for (; n1 != NULL; n1 = n2, n2 = NULL) {
642		for (i = 0; i < c; i++, n1++) {
643			len = strlen(*n1);
644			if (strncasecmp(*n1, (const char *)bp, len) == 0) {
645				*tgt = i;
646				return bp + len;
647			}
648		}
649	}
650
651	/* Nothing matched */
652	return NULL;
653}
654
655static int
656leaps_thru_end_of(const int y)
657{
658	return (y >= 0) ? (y / 4 - y / 100 + y / 400) :
659		-(leaps_thru_end_of(-(y + 1)) + 1);
660}
661