1/* 2 date_strptime.c: Coded by Tadayoshi Funaba 2011,2012 3*/ 4 5#include "ruby.h" 6#include "ruby/encoding.h" 7#include "ruby/re.h" 8#include <ctype.h> 9 10static const char *day_names[] = { 11 "Sunday", "Monday", "Tuesday", "Wednesday", 12 "Thursday", "Friday", "Saturday", 13 "Sun", "Mon", "Tue", "Wed", 14 "Thu", "Fri", "Sat" 15}; 16 17static const char *month_names[] = { 18 "January", "February", "March", "April", 19 "May", "June", "July", "August", "September", 20 "October", "November", "December", 21 "Jan", "Feb", "Mar", "Apr", "May", "Jun", 22 "Jul", "Aug", "Sep", "Oct", "Nov", "Dec" 23}; 24 25static const char *merid_names[] = { 26 "am", "pm", 27 "a.m.", "p.m." 28}; 29 30static const char *extz_pats[] = { 31 ":z", 32 "::z", 33 ":::z" 34}; 35 36#define sizeof_array(o) (sizeof o / sizeof o[0]) 37 38#define f_negate(x) rb_funcall(x, rb_intern("-@"), 0) 39#define f_add(x,y) rb_funcall(x, '+', 1, y) 40#define f_sub(x,y) rb_funcall(x, '-', 1, y) 41#define f_mul(x,y) rb_funcall(x, '*', 1, y) 42#define f_div(x,y) rb_funcall(x, '/', 1, y) 43#define f_idiv(x,y) rb_funcall(x, rb_intern("div"), 1, y) 44#define f_mod(x,y) rb_funcall(x, '%', 1, y) 45#define f_expt(x,y) rb_funcall(x, rb_intern("**"), 1, y) 46 47#define f_lt_p(x,y) rb_funcall(x, '<', 1, y) 48#define f_gt_p(x,y) rb_funcall(x, '>', 1, y) 49#define f_le_p(x,y) rb_funcall(x, rb_intern("<="), 1, y) 50#define f_ge_p(x,y) rb_funcall(x, rb_intern(">="), 1, y) 51 52#define f_match(r,s) rb_funcall(r, rb_intern("match"), 1, s) 53#define f_aref(o,i) rb_funcall(o, rb_intern("[]"), 1, i) 54#define f_end(o,i) rb_funcall(o, rb_intern("end"), 1, i) 55 56#define issign(c) ((c) == '-' || (c) == '+') 57 58static int 59num_pattern_p(const char *s) 60{ 61 if (isdigit((unsigned char)*s)) 62 return 1; 63 if (*s == '%') { 64 s++; 65 if (*s == 'E' || *s == 'O') 66 s++; 67 if (*s && 68 (strchr("CDdeFGgHIjkLlMmNQRrSsTUuVvWwXxYy", *s) || 69 isdigit((unsigned char)*s))) 70 return 1; 71 } 72 return 0; 73} 74 75#define NUM_PATTERN_P() num_pattern_p(&fmt[fi + 1]) 76 77static long 78read_digits(const char *s, VALUE *n, size_t width) 79{ 80 size_t l; 81 82 l = strspn(s, "0123456789"); 83 84 if (l == 0) 85 return 0; 86 87 if (width < l) 88 l = width; 89 90 if ((4 * l * sizeof(char)) <= (sizeof(long)*CHAR_BIT)) { 91 const char *os = s; 92 long v; 93 94 v = 0; 95 while ((size_t)(s - os) < l) { 96 v *= 10; 97 v += *s - '0'; 98 s++; 99 } 100 if (os == s) 101 return 0; 102 *n = LONG2NUM(v); 103 return l; 104 } 105 else { 106 char *s2 = ALLOCA_N(char, l + 1); 107 memcpy(s2, s, l); 108 s2[l] = '\0'; 109 *n = rb_cstr_to_inum(s2, 10, 0); 110 return l; 111 } 112} 113 114#define set_hash(k,v) rb_hash_aset(hash, ID2SYM(rb_intern(k)), v) 115#define ref_hash(k) rb_hash_aref(hash, ID2SYM(rb_intern(k))) 116#define del_hash(k) rb_hash_delete(hash, ID2SYM(rb_intern(k))) 117 118#define fail() \ 119{ \ 120 set_hash("_fail", Qtrue); \ 121 return 0; \ 122} 123 124#define fail_p() (!NIL_P(ref_hash("_fail"))) 125 126#define READ_DIGITS(n,w) \ 127{ \ 128 size_t l; \ 129 l = read_digits(&str[si], &n, w); \ 130 if (l == 0) \ 131 fail(); \ 132 si += l; \ 133} 134 135#define READ_DIGITS_MAX(n) READ_DIGITS(n, LONG_MAX) 136 137static int 138valid_range_p(VALUE v, int a, int b) 139{ 140 if (FIXNUM_P(v)) { 141 int vi = FIX2INT(v); 142 return !(vi < a || vi > b); 143 } 144 return !(f_lt_p(v, INT2NUM(a)) || f_gt_p(v, INT2NUM(b))); 145} 146 147#define recur(fmt) \ 148{ \ 149 size_t l; \ 150 l = date__strptime_internal(&str[si], slen - si, \ 151 fmt, sizeof fmt - 1, hash); \ 152 if (fail_p()) \ 153 return 0; \ 154 si += l; \ 155} 156 157VALUE date_zone_to_diff(VALUE); 158 159static size_t 160date__strptime_internal(const char *str, size_t slen, 161 const char *fmt, size_t flen, VALUE hash) 162{ 163 size_t si, fi; 164 int c; 165 166 si = fi = 0; 167 168 while (fi < flen) { 169 170 switch (fmt[fi]) { 171 case '%': 172 173 again: 174 fi++; 175 c = fmt[fi]; 176 177 switch (c) { 178 case 'E': 179 if (fmt[fi + 1] && strchr("cCxXyY", fmt[fi + 1])) 180 goto again; 181 fi--; 182 goto ordinal; 183 case 'O': 184 if (fmt[fi + 1] && strchr("deHImMSuUVwWy", fmt[fi + 1])) 185 goto again; 186 fi--; 187 goto ordinal; 188 case ':': 189 { 190 int i; 191 192 for (i = 0; i < (int)sizeof_array(extz_pats); i++) 193 if (strncmp(extz_pats[i], &fmt[fi], 194 strlen(extz_pats[i])) == 0) { 195 fi += i; 196 goto again; 197 } 198 fail(); 199 } 200 201 case 'A': 202 case 'a': 203 { 204 int i; 205 206 for (i = 0; i < (int)sizeof_array(day_names); i++) { 207 size_t l = strlen(day_names[i]); 208 if (strncasecmp(day_names[i], &str[si], l) == 0) { 209 si += l; 210 set_hash("wday", INT2FIX(i % 7)); 211 goto matched; 212 } 213 } 214 fail(); 215 } 216 case 'B': 217 case 'b': 218 case 'h': 219 { 220 int i; 221 222 for (i = 0; i < (int)sizeof_array(month_names); i++) { 223 size_t l = strlen(month_names[i]); 224 if (strncasecmp(month_names[i], &str[si], l) == 0) { 225 si += l; 226 set_hash("mon", INT2FIX((i % 12) + 1)); 227 goto matched; 228 } 229 } 230 fail(); 231 } 232 233 case 'C': 234 { 235 VALUE n; 236 237 if (NUM_PATTERN_P()) 238 READ_DIGITS(n, 2) 239 else 240 READ_DIGITS_MAX(n) 241 set_hash("_cent", n); 242 goto matched; 243 } 244 245 case 'c': 246 recur("%a %b %e %H:%M:%S %Y"); 247 goto matched; 248 249 case 'D': 250 recur("%m/%d/%y"); 251 goto matched; 252 253 case 'd': 254 case 'e': 255 { 256 VALUE n; 257 258 if (str[si] == ' ') { 259 si++; 260 READ_DIGITS(n, 1); 261 } else { 262 READ_DIGITS(n, 2); 263 } 264 if (!valid_range_p(n, 1, 31)) 265 fail(); 266 set_hash("mday", n); 267 goto matched; 268 } 269 270 case 'F': 271 recur("%Y-%m-%d"); 272 goto matched; 273 274 case 'G': 275 { 276 VALUE n; 277 278 if (NUM_PATTERN_P()) 279 READ_DIGITS(n, 4) 280 else 281 READ_DIGITS_MAX(n) 282 set_hash("cwyear", n); 283 goto matched; 284 } 285 286 case 'g': 287 { 288 VALUE n; 289 290 READ_DIGITS(n, 2); 291 if (!valid_range_p(n, 0, 99)) 292 fail(); 293 set_hash("cwyear",n); 294 set_hash("_cent", 295 INT2FIX(f_ge_p(n, INT2FIX(69)) ? 19 : 20)); 296 goto matched; 297 } 298 299 case 'H': 300 case 'k': 301 { 302 VALUE n; 303 304 if (str[si] == ' ') { 305 si++; 306 READ_DIGITS(n, 1); 307 } else { 308 READ_DIGITS(n, 2); 309 } 310 if (!valid_range_p(n, 0, 24)) 311 fail(); 312 set_hash("hour", n); 313 goto matched; 314 } 315 316 case 'I': 317 case 'l': 318 { 319 VALUE n; 320 321 if (str[si] == ' ') { 322 si++; 323 READ_DIGITS(n, 1); 324 } else { 325 READ_DIGITS(n, 2); 326 } 327 if (!valid_range_p(n, 1, 12)) 328 fail(); 329 set_hash("hour", n); 330 goto matched; 331 } 332 333 case 'j': 334 { 335 VALUE n; 336 337 READ_DIGITS(n, 3); 338 if (!valid_range_p(n, 1, 366)) 339 fail(); 340 set_hash("yday", n); 341 goto matched; 342 } 343 344 case 'L': 345 case 'N': 346 { 347 VALUE n; 348 int sign = 1; 349 size_t osi; 350 351 if (issign(str[si])) { 352 if (str[si] == '-') 353 sign = -1; 354 si++; 355 } 356 osi = si; 357 if (NUM_PATTERN_P()) 358 READ_DIGITS(n, c == 'L' ? 3 : 9) 359 else 360 READ_DIGITS_MAX(n) 361 if (sign == -1) 362 n = f_negate(n); 363 set_hash("sec_fraction", 364 rb_rational_new2(n, 365 f_expt(INT2FIX(10), 366 ULONG2NUM(si - osi)))); 367 goto matched; 368 } 369 370 case 'M': 371 { 372 VALUE n; 373 374 READ_DIGITS(n, 2); 375 if (!valid_range_p(n, 0, 59)) 376 fail(); 377 set_hash("min", n); 378 goto matched; 379 } 380 381 case 'm': 382 { 383 VALUE n; 384 385 READ_DIGITS(n, 2); 386 if (!valid_range_p(n, 1, 12)) 387 fail(); 388 set_hash("mon", n); 389 goto matched; 390 } 391 392 case 'n': 393 case 't': 394 recur(" "); 395 goto matched; 396 397 case 'P': 398 case 'p': 399 { 400 int i; 401 402 for (i = 0; i < 4; i++) { 403 size_t l = strlen(merid_names[i]); 404 if (strncasecmp(merid_names[i], &str[si], l) == 0) { 405 si += l; 406 set_hash("_merid", INT2FIX((i % 2) == 0 ? 0 : 12)); 407 goto matched; 408 } 409 } 410 fail(); 411 } 412 413 case 'Q': 414 { 415 VALUE n; 416 int sign = 1; 417 418 if (str[si] == '-') { 419 sign = -1; 420 si++; 421 } 422 READ_DIGITS_MAX(n); 423 if (sign == -1) 424 n = f_negate(n); 425 set_hash("seconds", 426 rb_rational_new2(n, 427 f_expt(INT2FIX(10), 428 INT2FIX(3)))); 429 goto matched; 430 } 431 432 case 'R': 433 recur("%H:%M"); 434 goto matched; 435 436 case 'r': 437 recur("%I:%M:%S %p"); 438 goto matched; 439 440 case 'S': 441 { 442 VALUE n; 443 444 READ_DIGITS(n, 2); 445 if (!valid_range_p(n, 0, 60)) 446 fail(); 447 set_hash("sec", n); 448 goto matched; 449 } 450 451 case 's': 452 { 453 VALUE n; 454 int sign = 1; 455 456 if (str[si] == '-') { 457 sign = -1; 458 si++; 459 } 460 READ_DIGITS_MAX(n); 461 if (sign == -1) 462 n = f_negate(n); 463 set_hash("seconds", n); 464 goto matched; 465 } 466 467 case 'T': 468 recur("%H:%M:%S"); 469 goto matched; 470 471 case 'U': 472 case 'W': 473 { 474 VALUE n; 475 476 READ_DIGITS(n, 2); 477 if (!valid_range_p(n, 0, 53)) 478 fail(); 479 set_hash(c == 'U' ? "wnum0" : "wnum1", n); 480 goto matched; 481 } 482 483 case 'u': 484 { 485 VALUE n; 486 487 READ_DIGITS(n, 1); 488 if (!valid_range_p(n, 1, 7)) 489 fail(); 490 set_hash("cwday", n); 491 goto matched; 492 } 493 494 case 'V': 495 { 496 VALUE n; 497 498 READ_DIGITS(n, 2); 499 if (!valid_range_p(n, 1, 53)) 500 fail(); 501 set_hash("cweek", n); 502 goto matched; 503 } 504 505 case 'v': 506 recur("%e-%b-%Y"); 507 goto matched; 508 509 case 'w': 510 { 511 VALUE n; 512 513 READ_DIGITS(n, 1); 514 if (!valid_range_p(n, 0, 6)) 515 fail(); 516 set_hash("wday", n); 517 goto matched; 518 } 519 520 case 'X': 521 recur("%H:%M:%S"); 522 goto matched; 523 524 case 'x': 525 recur("%m/%d/%y"); 526 goto matched; 527 528 case 'Y': 529 { 530 VALUE n; 531 int sign = 1; 532 533 if (issign(str[si])) { 534 if (str[si] == '-') 535 sign = -1; 536 si++; 537 } 538 if (NUM_PATTERN_P()) 539 READ_DIGITS(n, 4) 540 else 541 READ_DIGITS_MAX(n) 542 if (sign == -1) 543 n = f_negate(n); 544 set_hash("year", n); 545 goto matched; 546 } 547 548 case 'y': 549 { 550 VALUE n; 551 int sign = 1; 552 553 READ_DIGITS(n, 2); 554 if (!valid_range_p(n, 0, 99)) 555 fail(); 556 if (sign == -1) 557 n = f_negate(n); 558 set_hash("year", n); 559 set_hash("_cent", 560 INT2FIX(f_ge_p(n, INT2FIX(69)) ? 19 : 20)); 561 goto matched; 562 } 563 564 case 'Z': 565 case 'z': 566 { 567 static const char pat_source[] = 568 "\\A(" 569 "(?:gmt|utc?)?[-+]\\d+(?:[,.:]\\d+(?::\\d+)?)?" 570 "|(?-i:[[:alpha:].\\s]+)(?:standard|daylight)\\s+time\\b" 571 "|(?-i:[[:alpha:]]+)(?:\\s+dst)?\\b" 572 ")"; 573 static VALUE pat = Qnil; 574 VALUE m, b; 575 576 if (NIL_P(pat)) { 577 pat = rb_reg_new(pat_source, sizeof pat_source - 1, 578 ONIG_OPTION_IGNORECASE); 579 rb_gc_register_mark_object(pat); 580 } 581 582 b = rb_backref_get(); 583 rb_match_busy(b); 584 m = f_match(pat, rb_usascii_str_new2(&str[si])); 585 586 if (!NIL_P(m)) { 587 VALUE s, l, o; 588 589 s = rb_reg_nth_match(1, m); 590 l = f_end(m, INT2FIX(0)); 591 o = date_zone_to_diff(s); 592 si += NUM2LONG(l); 593 set_hash("zone", s); 594 set_hash("offset", o); 595 rb_backref_set(b); 596 goto matched; 597 } 598 rb_backref_set(b); 599 fail(); 600 } 601 602 case '%': 603 if (str[si] != '%') 604 fail(); 605 si++; 606 goto matched; 607 608 case '+': 609 recur("%a %b %e %H:%M:%S %Z %Y"); 610 goto matched; 611 612 default: 613 if (str[si] != '%') 614 fail(); 615 si++; 616 if (fi < flen) 617 if (str[si] != fmt[fi]) 618 fail(); 619 si++; 620 goto matched; 621 } 622 case ' ': 623 case '\t': 624 case '\n': 625 case '\v': 626 case '\f': 627 case '\r': 628 while (isspace((unsigned char)str[si])) 629 si++; 630 fi++; 631 break; 632 default: 633 ordinal: 634 if (str[si] != fmt[fi]) 635 fail(); 636 si++; 637 fi++; 638 break; 639 matched: 640 fi++; 641 break; 642 } 643 } 644 645 return si; 646} 647 648VALUE 649date__strptime(const char *str, size_t slen, 650 const char *fmt, size_t flen, VALUE hash) 651{ 652 size_t si; 653 VALUE cent, merid; 654 655 si = date__strptime_internal(str, slen, fmt, flen, hash); 656 657 if (slen > si) { 658 VALUE s; 659 660 s = rb_usascii_str_new(&str[si], slen - si); 661 set_hash("leftover", s); 662 } 663 664 if (fail_p()) 665 return Qnil; 666 667 cent = ref_hash("_cent"); 668 if (!NIL_P(cent)) { 669 VALUE year; 670 671 year = ref_hash("cwyear"); 672 if (!NIL_P(year)) 673 set_hash("cwyear", f_add(year, f_mul(cent, INT2FIX(100)))); 674 year = ref_hash("year"); 675 if (!NIL_P(year)) 676 set_hash("year", f_add(year, f_mul(cent, INT2FIX(100)))); 677 del_hash("_cent"); 678 } 679 680 merid = ref_hash("_merid"); 681 if (!NIL_P(merid)) { 682 VALUE hour; 683 684 hour = ref_hash("hour"); 685 if (!NIL_P(hour)) { 686 hour = f_mod(hour, INT2FIX(12)); 687 set_hash("hour", f_add(hour, merid)); 688 } 689 del_hash("_merid"); 690 } 691 692 return hash; 693} 694 695/* 696Local variables: 697c-file-style: "ruby" 698End: 699*/ 700