strptime.c revision 1.18
1/* $OpenBSD: strptime.c,v 1.18 2014/10/11 02:21:27 doug Exp $ */ 2/* $NetBSD: strptime.c,v 1.12 1998/01/20 21:39:40 mycroft Exp $ */ 3 4/*- 5 * Copyright (c) 1997, 1998, 2005, 2008 The NetBSD Foundation, Inc. 6 * All rights reserved. 7 * 8 * This code was contributed to The NetBSD Foundation by Klaus Klein. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32#include <sys/localedef.h> 33#include <ctype.h> 34#include <locale.h> 35#include <string.h> 36#include <time.h> 37#include <tzfile.h> 38 39#define _ctloc(x) (_CurrentTimeLocale->x) 40 41/* 42 * We do not implement alternate representations. However, we always 43 * check whether a given modifier is allowed for a certain conversion. 44 */ 45#define _ALT_E 0x01 46#define _ALT_O 0x02 47#define _LEGAL_ALT(x) { if (alt_format & ~(x)) return (0); } 48 49/* 50 * We keep track of some of the fields we set in order to compute missing ones. 51 */ 52#define FIELD_TM_MON (1 << 0) 53#define FIELD_TM_MDAY (1 << 1) 54#define FIELD_TM_WDAY (1 << 2) 55#define FIELD_TM_YDAY (1 << 3) 56#define FIELD_TM_YEAR (1 << 4) 57 58static char gmt[] = { "GMT" }; 59static char utc[] = { "UTC" }; 60/* RFC-822/RFC-2822 */ 61static const char * const nast[5] = { 62 "EST", "CST", "MST", "PST", "\0\0\0" 63}; 64static const char * const nadt[5] = { 65 "EDT", "CDT", "MDT", "PDT", "\0\0\0" 66}; 67 68static const int mon_lengths[2][MONSPERYEAR] = { 69 { 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31 }, 70 { 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31 } 71}; 72 73static int _conv_num(const unsigned char **, int *, int, int); 74static int leaps_thru_end_of(const int y); 75static char *_strptime(const char *, const char *, struct tm *, int); 76static const u_char *_find_string(const u_char *, int *, const char * const *, 77 const char * const *, int); 78 79 80char * 81strptime(const char *buf, const char *fmt, struct tm *tm) 82{ 83 return(_strptime(buf, fmt, tm, 1)); 84} 85 86static char * 87_strptime(const char *buf, const char *fmt, struct tm *tm, int initialize) 88{ 89 unsigned char c; 90 const unsigned char *bp, *ep; 91 size_t len; 92 int alt_format, i, offs; 93 int neg = 0; 94 static int century, relyear, fields; 95 96 if (initialize) { 97 century = TM_YEAR_BASE; 98 relyear = -1; 99 fields = 0; 100 } 101 102 bp = (unsigned char *)buf; 103 while ((c = *fmt) != '\0') { 104 /* Clear `alternate' modifier prior to new conversion. */ 105 alt_format = 0; 106 107 /* Eat up white-space. */ 108 if (isspace(c)) { 109 while (isspace(*bp)) 110 bp++; 111 112 fmt++; 113 continue; 114 } 115 116 if ((c = *fmt++) != '%') 117 goto literal; 118 119 120again: switch (c = *fmt++) { 121 case '%': /* "%%" is converted to "%". */ 122literal: 123 if (c != *bp++) 124 return (NULL); 125 126 break; 127 128 /* 129 * "Alternative" modifiers. Just set the appropriate flag 130 * and start over again. 131 */ 132 case 'E': /* "%E?" alternative conversion modifier. */ 133 _LEGAL_ALT(0); 134 alt_format |= _ALT_E; 135 goto again; 136 137 case 'O': /* "%O?" alternative conversion modifier. */ 138 _LEGAL_ALT(0); 139 alt_format |= _ALT_O; 140 goto again; 141 142 /* 143 * "Complex" conversion rules, implemented through recursion. 144 */ 145 case 'c': /* Date and time, using the locale's format. */ 146 _LEGAL_ALT(_ALT_E); 147 if (!(bp = _strptime(bp, _ctloc(d_t_fmt), tm, 0))) 148 return (NULL); 149 break; 150 151 case 'D': /* The date as "%m/%d/%y". */ 152 _LEGAL_ALT(0); 153 if (!(bp = _strptime(bp, "%m/%d/%y", tm, 0))) 154 return (NULL); 155 break; 156 157 case 'F': /* The date as "%Y-%m-%d". */ 158 _LEGAL_ALT(0); 159 if (!(bp = _strptime(bp, "%Y-%m-%d", tm, 0))) 160 return (NULL); 161 continue; 162 163 case 'R': /* The time as "%H:%M". */ 164 _LEGAL_ALT(0); 165 if (!(bp = _strptime(bp, "%H:%M", tm, 0))) 166 return (NULL); 167 break; 168 169 case 'r': /* The time as "%I:%M:%S %p". */ 170 _LEGAL_ALT(0); 171 if (!(bp = _strptime(bp, "%I:%M:%S %p", tm, 0))) 172 return (NULL); 173 break; 174 175 case 'T': /* The time as "%H:%M:%S". */ 176 _LEGAL_ALT(0); 177 if (!(bp = _strptime(bp, "%H:%M:%S", tm, 0))) 178 return (NULL); 179 break; 180 181 case 'X': /* The time, using the locale's format. */ 182 _LEGAL_ALT(_ALT_E); 183 if (!(bp = _strptime(bp, _ctloc(t_fmt), tm, 0))) 184 return (NULL); 185 break; 186 187 case 'x': /* The date, using the locale's format. */ 188 _LEGAL_ALT(_ALT_E); 189 if (!(bp = _strptime(bp, _ctloc(d_fmt), tm, 0))) 190 return (NULL); 191 break; 192 193 /* 194 * "Elementary" conversion rules. 195 */ 196 case 'A': /* The day of week, using the locale's form. */ 197 case 'a': 198 _LEGAL_ALT(0); 199 for (i = 0; i < 7; i++) { 200 /* Full name. */ 201 len = strlen(_ctloc(day[i])); 202 if (strncasecmp(_ctloc(day[i]), bp, len) == 0) 203 break; 204 205 /* Abbreviated name. */ 206 len = strlen(_ctloc(abday[i])); 207 if (strncasecmp(_ctloc(abday[i]), bp, len) == 0) 208 break; 209 } 210 211 /* Nothing matched. */ 212 if (i == 7) 213 return (NULL); 214 215 tm->tm_wday = i; 216 bp += len; 217 fields |= FIELD_TM_WDAY; 218 break; 219 220 case 'B': /* The month, using the locale's form. */ 221 case 'b': 222 case 'h': 223 _LEGAL_ALT(0); 224 for (i = 0; i < 12; i++) { 225 /* Full name. */ 226 len = strlen(_ctloc(mon[i])); 227 if (strncasecmp(_ctloc(mon[i]), bp, len) == 0) 228 break; 229 230 /* Abbreviated name. */ 231 len = strlen(_ctloc(abmon[i])); 232 if (strncasecmp(_ctloc(abmon[i]), bp, len) == 0) 233 break; 234 } 235 236 /* Nothing matched. */ 237 if (i == 12) 238 return (NULL); 239 240 tm->tm_mon = i; 241 bp += len; 242 fields |= FIELD_TM_MON; 243 break; 244 245 case 'C': /* The century number. */ 246 _LEGAL_ALT(_ALT_E); 247 if (!(_conv_num(&bp, &i, 0, 99))) 248 return (NULL); 249 250 century = i * 100; 251 break; 252 253 case 'd': /* The day of month. */ 254 case 'e': 255 _LEGAL_ALT(_ALT_O); 256 if (!(_conv_num(&bp, &tm->tm_mday, 1, 31))) 257 return (NULL); 258 fields |= FIELD_TM_MDAY; 259 break; 260 261 case 'k': /* The hour (24-hour clock representation). */ 262 _LEGAL_ALT(0); 263 /* FALLTHROUGH */ 264 case 'H': 265 _LEGAL_ALT(_ALT_O); 266 if (!(_conv_num(&bp, &tm->tm_hour, 0, 23))) 267 return (NULL); 268 break; 269 270 case 'l': /* The hour (12-hour clock representation). */ 271 _LEGAL_ALT(0); 272 /* FALLTHROUGH */ 273 case 'I': 274 _LEGAL_ALT(_ALT_O); 275 if (!(_conv_num(&bp, &tm->tm_hour, 1, 12))) 276 return (NULL); 277 break; 278 279 case 'j': /* The day of year. */ 280 _LEGAL_ALT(0); 281 if (!(_conv_num(&bp, &tm->tm_yday, 1, 366))) 282 return (NULL); 283 tm->tm_yday--; 284 fields |= FIELD_TM_YDAY; 285 break; 286 287 case 'M': /* The minute. */ 288 _LEGAL_ALT(_ALT_O); 289 if (!(_conv_num(&bp, &tm->tm_min, 0, 59))) 290 return (NULL); 291 break; 292 293 case 'm': /* The month. */ 294 _LEGAL_ALT(_ALT_O); 295 if (!(_conv_num(&bp, &tm->tm_mon, 1, 12))) 296 return (NULL); 297 tm->tm_mon--; 298 fields |= FIELD_TM_MON; 299 break; 300 301 case 'p': /* The locale's equivalent of AM/PM. */ 302 _LEGAL_ALT(0); 303 /* AM? */ 304 len = strlen(_ctloc(am_pm[0])); 305 if (strncasecmp(_ctloc(am_pm[0]), bp, len) == 0) { 306 if (tm->tm_hour > 12) /* i.e., 13:00 AM ?! */ 307 return (NULL); 308 else if (tm->tm_hour == 12) 309 tm->tm_hour = 0; 310 311 bp += len; 312 break; 313 } 314 /* PM? */ 315 len = strlen(_ctloc(am_pm[1])); 316 if (strncasecmp(_ctloc(am_pm[1]), bp, len) == 0) { 317 if (tm->tm_hour > 12) /* i.e., 13:00 PM ?! */ 318 return (NULL); 319 else if (tm->tm_hour < 12) 320 tm->tm_hour += 12; 321 322 bp += len; 323 break; 324 } 325 326 /* Nothing matched. */ 327 return (NULL); 328 329 case 'S': /* The seconds. */ 330 _LEGAL_ALT(_ALT_O); 331 if (!(_conv_num(&bp, &tm->tm_sec, 0, 61))) 332 return (NULL); 333 break; 334 335 case 'U': /* The week of year, beginning on sunday. */ 336 case 'W': /* The week of year, beginning on monday. */ 337 _LEGAL_ALT(_ALT_O); 338 /* 339 * XXX This is bogus, as we can not assume any valid 340 * information present in the tm structure at this 341 * point to calculate a real value, so just check the 342 * range for now. 343 */ 344 if (!(_conv_num(&bp, &i, 0, 53))) 345 return (NULL); 346 break; 347 348 case 'w': /* The day of week, beginning on sunday. */ 349 _LEGAL_ALT(_ALT_O); 350 if (!(_conv_num(&bp, &tm->tm_wday, 0, 6))) 351 return (NULL); 352 fields |= FIELD_TM_WDAY; 353 break; 354 355 case 'u': /* The day of week, monday = 1. */ 356 _LEGAL_ALT(_ALT_O); 357 if (!(_conv_num(&bp, &i, 1, 7))) 358 return (NULL); 359 tm->tm_wday = i % 7; 360 fields |= FIELD_TM_WDAY; 361 continue; 362 363 case 'g': /* The year corresponding to the ISO week 364 * number but without the century. 365 */ 366 if (!(_conv_num(&bp, &i, 0, 99))) 367 return (NULL); 368 continue; 369 370 case 'G': /* The year corresponding to the ISO week 371 * number with century. 372 */ 373 do 374 bp++; 375 while (isdigit(*bp)); 376 continue; 377 378 case 'V': /* The ISO 8601:1988 week number as decimal */ 379 if (!(_conv_num(&bp, &i, 0, 53))) 380 return (NULL); 381 continue; 382 383 case 'Y': /* The year. */ 384 _LEGAL_ALT(_ALT_E); 385 if (!(_conv_num(&bp, &i, 0, 9999))) 386 return (NULL); 387 388 relyear = -1; 389 tm->tm_year = i - TM_YEAR_BASE; 390 fields |= FIELD_TM_YEAR; 391 break; 392 393 case 'y': /* The year within the century (2 digits). */ 394 _LEGAL_ALT(_ALT_E | _ALT_O); 395 if (!(_conv_num(&bp, &relyear, 0, 99))) 396 return (NULL); 397 break; 398 399 case 'Z': 400 tzset(); 401 if (strncmp((const char *)bp, gmt, 3) == 0) { 402 tm->tm_isdst = 0; 403#ifdef TM_GMTOFF 404 tm->TM_GMTOFF = 0; 405#endif 406#ifdef TM_ZONE 407 tm->TM_ZONE = gmt; 408#endif 409 bp += 3; 410 } else if (strncmp((const char *)bp, utc, 3) == 0) { 411 tm->tm_isdst = 0; 412#ifdef TM_GMTOFF 413 tm->TM_GMTOFF = 0; 414#endif 415#ifdef TM_ZONE 416 tm->TM_ZONE = utc; 417#endif 418 bp += 3; 419 } else { 420 ep = _find_string(bp, &i, 421 (const char * const *)tzname, 422 NULL, 2); 423 if (ep == NULL) 424 return (NULL); 425 426 tm->tm_isdst = i; 427#ifdef TM_GMTOFF 428 tm->TM_GMTOFF = -(timezone); 429#endif 430#ifdef TM_ZONE 431 tm->TM_ZONE = tzname[i]; 432#endif 433 bp = ep; 434 } 435 continue; 436 437 case 'z': 438 /* 439 * We recognize all ISO 8601 formats: 440 * Z = Zulu time/UTC 441 * [+-]hhmm 442 * [+-]hh:mm 443 * [+-]hh 444 * We recognize all RFC-822/RFC-2822 formats: 445 * UT|GMT 446 * North American : UTC offsets 447 * E[DS]T = Eastern : -4 | -5 448 * C[DS]T = Central : -5 | -6 449 * M[DS]T = Mountain: -6 | -7 450 * P[DS]T = Pacific : -7 | -8 451 * Military 452 * [A-IL-M] = -1 ... -9 (J not used) 453 * [N-Y] = +1 ... +12 454 */ 455 while (isspace(*bp)) 456 bp++; 457 458 switch (*bp++) { 459 case 'G': 460 if (*bp++ != 'M') 461 return NULL; 462 /*FALLTHROUGH*/ 463 case 'U': 464 if (*bp++ != 'T') 465 return NULL; 466 /*FALLTHROUGH*/ 467 case 'Z': 468 tm->tm_isdst = 0; 469#ifdef TM_GMTOFF 470 tm->TM_GMTOFF = 0; 471#endif 472#ifdef TM_ZONE 473 tm->TM_ZONE = utc; 474#endif 475 continue; 476 case '+': 477 neg = 0; 478 break; 479 case '-': 480 neg = 1; 481 break; 482 default: 483 --bp; 484 ep = _find_string(bp, &i, nast, NULL, 4); 485 if (ep != NULL) { 486#ifdef TM_GMTOFF 487 tm->TM_GMTOFF = -5 - i; 488#endif 489#ifdef TM_ZONE 490 tm->TM_ZONE = __UNCONST(nast[i]); 491#endif 492 bp = ep; 493 continue; 494 } 495 ep = _find_string(bp, &i, nadt, NULL, 4); 496 if (ep != NULL) { 497 tm->tm_isdst = 1; 498#ifdef TM_GMTOFF 499 tm->TM_GMTOFF = -4 - i; 500#endif 501#ifdef TM_ZONE 502 tm->TM_ZONE = __UNCONST(nadt[i]); 503#endif 504 bp = ep; 505 continue; 506 } 507 508 if ((*bp >= 'A' && *bp <= 'I') || 509 (*bp >= 'L' && *bp <= 'Y')) { 510#ifdef TM_GMTOFF 511 /* Argh! No 'J'! */ 512 if (*bp >= 'A' && *bp <= 'I') 513 tm->TM_GMTOFF = 514 ('A' - 1) - (int)*bp; 515 else if (*bp >= 'L' && *bp <= 'M') 516 tm->TM_GMTOFF = 'A' - (int)*bp; 517 else if (*bp >= 'N' && *bp <= 'Y') 518 tm->TM_GMTOFF = (int)*bp - 'M'; 519#endif 520#ifdef TM_ZONE 521 tm->TM_ZONE = NULL; /* XXX */ 522#endif 523 bp++; 524 continue; 525 } 526 return NULL; 527 } 528 offs = 0; 529 for (i = 0; i < 4; ) { 530 if (isdigit(*bp)) { 531 offs = offs * 10 + (*bp++ - '0'); 532 i++; 533 continue; 534 } 535 if (i == 2 && *bp == ':') { 536 bp++; 537 continue; 538 } 539 break; 540 } 541 switch (i) { 542 case 2: 543 offs *= 100; 544 break; 545 case 4: 546 i = offs % 100; 547 if (i >= 60) 548 return NULL; 549 /* Convert minutes into decimal */ 550 offs = (offs / 100) * 100 + (i * 50) / 30; 551 break; 552 default: 553 return NULL; 554 } 555 if (neg) 556 offs = -offs; 557 tm->tm_isdst = 0; /* XXX */ 558#ifdef TM_GMTOFF 559 tm->TM_GMTOFF = offs; 560#endif 561#ifdef TM_ZONE 562 tm->TM_ZONE = NULL; /* XXX */ 563#endif 564 continue; 565 566 /* 567 * Miscellaneous conversions. 568 */ 569 case 'n': /* Any kind of white-space. */ 570 case 't': 571 _LEGAL_ALT(0); 572 while (isspace(*bp)) 573 bp++; 574 break; 575 576 577 default: /* Unknown/unsupported conversion. */ 578 return (NULL); 579 } 580 581 582 } 583 584 /* 585 * We need to evaluate the two digit year spec (%y) 586 * last as we can get a century spec (%C) at any time. 587 */ 588 if (relyear != -1) { 589 if (century == TM_YEAR_BASE) { 590 if (relyear <= 68) 591 tm->tm_year = relyear + 2000 - TM_YEAR_BASE; 592 else 593 tm->tm_year = relyear + 1900 - TM_YEAR_BASE; 594 } else { 595 tm->tm_year = relyear + century - TM_YEAR_BASE; 596 } 597 fields |= FIELD_TM_YEAR; 598 } 599 600 /* Compute some missing values when possible. */ 601 if (fields & FIELD_TM_YEAR) { 602 const int year = tm->tm_year + TM_YEAR_BASE; 603 const int *mon_lens = mon_lengths[isleap(year)]; 604 if (!(fields & FIELD_TM_YDAY) && 605 (fields & FIELD_TM_MON) && (fields & FIELD_TM_MDAY)) { 606 tm->tm_yday = tm->tm_mday - 1; 607 for (i = 0; i < tm->tm_mon; i++) 608 tm->tm_yday += mon_lens[i]; 609 fields |= FIELD_TM_YDAY; 610 } 611 if (fields & FIELD_TM_YDAY) { 612 int days = tm->tm_yday; 613 if (!(fields & FIELD_TM_WDAY)) { 614 tm->tm_wday = EPOCH_WDAY + 615 ((year - EPOCH_YEAR) % DAYSPERWEEK) * 616 (DAYSPERNYEAR % DAYSPERWEEK) + 617 leaps_thru_end_of(year - 1) - 618 leaps_thru_end_of(EPOCH_YEAR - 1) + 619 tm->tm_yday; 620 tm->tm_wday %= DAYSPERWEEK; 621 if (tm->tm_wday < 0) 622 tm->tm_wday += DAYSPERWEEK; 623 } 624 if (!(fields & FIELD_TM_MON)) { 625 tm->tm_mon = 0; 626 while (tm->tm_mon < MONSPERYEAR && days >= mon_lens[tm->tm_mon]) 627 days -= mon_lens[tm->tm_mon++]; 628 } 629 if (!(fields & FIELD_TM_MDAY)) 630 tm->tm_mday = days + 1; 631 } 632 } 633 634 return ((char *)bp); 635} 636 637 638static int 639_conv_num(const unsigned char **buf, int *dest, int llim, int ulim) 640{ 641 int result = 0; 642 int rulim = ulim; 643 644 if (**buf < '0' || **buf > '9') 645 return (0); 646 647 /* we use rulim to break out of the loop when we run out of digits */ 648 do { 649 result *= 10; 650 result += *(*buf)++ - '0'; 651 rulim /= 10; 652 } while ((result * 10 <= ulim) && rulim && **buf >= '0' && **buf <= '9'); 653 654 if (result < llim || result > ulim) 655 return (0); 656 657 *dest = result; 658 return (1); 659} 660 661static const u_char * 662_find_string(const u_char *bp, int *tgt, const char * const *n1, 663 const char * const *n2, int c) 664{ 665 int i; 666 unsigned int len; 667 668 /* check full name - then abbreviated ones */ 669 for (; n1 != NULL; n1 = n2, n2 = NULL) { 670 for (i = 0; i < c; i++, n1++) { 671 len = strlen(*n1); 672 if (strncasecmp(*n1, (const char *)bp, len) == 0) { 673 *tgt = i; 674 return bp + len; 675 } 676 } 677 } 678 679 /* Nothing matched */ 680 return NULL; 681} 682 683static int 684leaps_thru_end_of(const int y) 685{ 686 return (y >= 0) ? (y / 4 - y / 100 + y / 400) : 687 -(leaps_thru_end_of(-(y + 1)) + 1); 688} 689