1/*
2  date_strptime.c: Coded by Tadayoshi Funaba 2011,2012
3*/
4
5#include "ruby.h"
6#include "ruby/encoding.h"
7#include "ruby/re.h"
8#include <ctype.h>
9
10static const char *day_names[] = {
11    "Sunday", "Monday", "Tuesday", "Wednesday",
12    "Thursday", "Friday", "Saturday",
13    "Sun", "Mon", "Tue", "Wed",
14    "Thu", "Fri", "Sat"
15};
16
17static const char *month_names[] = {
18    "January", "February", "March", "April",
19    "May", "June", "July", "August", "September",
20    "October", "November", "December",
21    "Jan", "Feb", "Mar", "Apr", "May", "Jun",
22    "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
23};
24
25static const char *merid_names[] = {
26    "am", "pm",
27    "a.m.", "p.m."
28};
29
30static const char *extz_pats[] = {
31    ":z",
32    "::z",
33    ":::z"
34};
35
36#define sizeof_array(o) (sizeof o / sizeof o[0])
37
38#define f_negate(x) rb_funcall(x, rb_intern("-@"), 0)
39#define f_add(x,y) rb_funcall(x, '+', 1, y)
40#define f_sub(x,y) rb_funcall(x, '-', 1, y)
41#define f_mul(x,y) rb_funcall(x, '*', 1, y)
42#define f_div(x,y) rb_funcall(x, '/', 1, y)
43#define f_idiv(x,y) rb_funcall(x, rb_intern("div"), 1, y)
44#define f_mod(x,y) rb_funcall(x, '%', 1, y)
45#define f_expt(x,y) rb_funcall(x, rb_intern("**"), 1, y)
46
47#define f_lt_p(x,y) rb_funcall(x, '<', 1, y)
48#define f_gt_p(x,y) rb_funcall(x, '>', 1, y)
49#define f_le_p(x,y) rb_funcall(x, rb_intern("<="), 1, y)
50#define f_ge_p(x,y) rb_funcall(x, rb_intern(">="), 1, y)
51
52#define f_match(r,s) rb_funcall(r, rb_intern("match"), 1, s)
53#define f_aref(o,i) rb_funcall(o, rb_intern("[]"), 1, i)
54#define f_end(o,i) rb_funcall(o, rb_intern("end"), 1, i)
55
56#define issign(c) ((c) == '-' || (c) == '+')
57
58static int
59num_pattern_p(const char *s)
60{
61    if (isdigit((unsigned char)*s))
62	return 1;
63    if (*s == '%') {
64	s++;
65	if (*s == 'E' || *s == 'O')
66	    s++;
67	if (*s &&
68	    (strchr("CDdeFGgHIjkLlMmNQRrSsTUuVvWwXxYy", *s) ||
69	     isdigit((unsigned char)*s)))
70	    return 1;
71    }
72    return 0;
73}
74
75#define NUM_PATTERN_P() num_pattern_p(&fmt[fi + 1])
76
77static long
78read_digits(const char *s, VALUE *n, size_t width)
79{
80    size_t l;
81
82    l = strspn(s, "0123456789");
83
84    if (l == 0)
85	return 0;
86
87    if (width < l)
88	l = width;
89
90    if ((4 * l * sizeof(char)) <= (sizeof(long)*CHAR_BIT)) {
91	const char *os = s;
92	long v;
93
94	v = 0;
95	while ((size_t)(s - os) < l) {
96	    v *= 10;
97	    v += *s - '0';
98	    s++;
99	}
100	if (os == s)
101	    return 0;
102	*n = LONG2NUM(v);
103	return l;
104    }
105    else {
106	char *s2 = ALLOCA_N(char, l + 1);
107	memcpy(s2, s, l);
108	s2[l] = '\0';
109	*n = rb_cstr_to_inum(s2, 10, 0);
110	return l;
111    }
112}
113
114#define set_hash(k,v) rb_hash_aset(hash, ID2SYM(rb_intern(k)), v)
115#define ref_hash(k) rb_hash_aref(hash, ID2SYM(rb_intern(k)))
116#define del_hash(k) rb_hash_delete(hash, ID2SYM(rb_intern(k)))
117
118#define fail() \
119{ \
120    set_hash("_fail", Qtrue); \
121    return 0; \
122}
123
124#define fail_p() (!NIL_P(ref_hash("_fail")))
125
126#define READ_DIGITS(n,w) \
127{ \
128    size_t l; \
129    l = read_digits(&str[si], &n, w); \
130    if (l == 0) \
131	fail();	\
132    si += l; \
133}
134
135#define READ_DIGITS_MAX(n) READ_DIGITS(n, LONG_MAX)
136
137static int
138valid_range_p(VALUE v, int a, int b)
139{
140    if (FIXNUM_P(v)) {
141	int vi = FIX2INT(v);
142	return !(vi < a || vi > b);
143    }
144    return !(f_lt_p(v, INT2NUM(a)) || f_gt_p(v, INT2NUM(b)));
145}
146
147#define recur(fmt) \
148{ \
149    size_t l; \
150    l = date__strptime_internal(&str[si], slen - si, \
151				fmt, sizeof fmt - 1, hash); \
152    if (fail_p()) \
153	return 0; \
154    si += l; \
155}
156
157VALUE date_zone_to_diff(VALUE);
158
159static size_t
160date__strptime_internal(const char *str, size_t slen,
161			const char *fmt, size_t flen, VALUE hash)
162{
163    size_t si, fi;
164    int c;
165
166    si = fi = 0;
167
168    while (fi < flen) {
169
170	switch (fmt[fi]) {
171	  case '%':
172
173	  again:
174	    fi++;
175	    c = fmt[fi];
176
177	    switch (c) {
178	      case 'E':
179		if (fmt[fi + 1] && strchr("cCxXyY", fmt[fi + 1]))
180		    goto again;
181		fi--;
182		goto ordinal;
183	      case 'O':
184		if (fmt[fi + 1] && strchr("deHImMSuUVwWy", fmt[fi + 1]))
185		    goto again;
186		fi--;
187		goto ordinal;
188	      case ':':
189		{
190		    int i;
191
192		    for (i = 0; i < (int)sizeof_array(extz_pats); i++)
193			if (strncmp(extz_pats[i], &fmt[fi],
194					strlen(extz_pats[i])) == 0) {
195			    fi += i;
196			    goto again;
197			}
198		    fail();
199		}
200
201	      case 'A':
202	      case 'a':
203		{
204		    int i;
205
206		    for (i = 0; i < (int)sizeof_array(day_names); i++) {
207			size_t l = strlen(day_names[i]);
208			if (strncasecmp(day_names[i], &str[si], l) == 0) {
209			    si += l;
210			    set_hash("wday", INT2FIX(i % 7));
211			    goto matched;
212			}
213		    }
214		    fail();
215		}
216	      case 'B':
217	      case 'b':
218	      case 'h':
219		{
220		    int i;
221
222		    for (i = 0; i < (int)sizeof_array(month_names); i++) {
223			size_t l = strlen(month_names[i]);
224			if (strncasecmp(month_names[i], &str[si], l) == 0) {
225			    si += l;
226			    set_hash("mon", INT2FIX((i % 12) + 1));
227			    goto matched;
228			}
229		    }
230		    fail();
231		}
232
233	      case 'C':
234		{
235		    VALUE n;
236
237		    if (NUM_PATTERN_P())
238			READ_DIGITS(n, 2)
239		    else
240			READ_DIGITS_MAX(n)
241		    set_hash("_cent", n);
242		    goto matched;
243		}
244
245	      case 'c':
246		recur("%a %b %e %H:%M:%S %Y");
247		goto matched;
248
249	      case 'D':
250		recur("%m/%d/%y");
251		goto matched;
252
253	      case 'd':
254	      case 'e':
255		{
256		    VALUE n;
257
258		    if (str[si] == ' ') {
259			si++;
260			READ_DIGITS(n, 1);
261		    } else {
262			READ_DIGITS(n, 2);
263		    }
264		    if (!valid_range_p(n, 1, 31))
265			fail();
266		    set_hash("mday", n);
267		    goto matched;
268		}
269
270	      case 'F':
271		recur("%Y-%m-%d");
272		goto matched;
273
274	      case 'G':
275		{
276		    VALUE n;
277
278		    if (NUM_PATTERN_P())
279			READ_DIGITS(n, 4)
280		    else
281			READ_DIGITS_MAX(n)
282		    set_hash("cwyear", n);
283		    goto matched;
284		}
285
286	      case 'g':
287		{
288		    VALUE n;
289
290		    READ_DIGITS(n, 2);
291		    if (!valid_range_p(n, 0, 99))
292			fail();
293		    set_hash("cwyear",n);
294		    set_hash("_cent",
295			     INT2FIX(f_ge_p(n, INT2FIX(69)) ? 19 : 20));
296		    goto matched;
297		}
298
299	      case 'H':
300	      case 'k':
301		{
302		    VALUE n;
303
304		    if (str[si] == ' ') {
305			si++;
306			READ_DIGITS(n, 1);
307		    } else {
308			READ_DIGITS(n, 2);
309		    }
310		    if (!valid_range_p(n, 0, 24))
311			fail();
312		    set_hash("hour", n);
313		    goto matched;
314		}
315
316	      case 'I':
317	      case 'l':
318		{
319		    VALUE n;
320
321		    if (str[si] == ' ') {
322			si++;
323			READ_DIGITS(n, 1);
324		    } else {
325			READ_DIGITS(n, 2);
326		    }
327		    if (!valid_range_p(n, 1, 12))
328			fail();
329		    set_hash("hour", n);
330		    goto matched;
331		}
332
333	      case 'j':
334		{
335		    VALUE n;
336
337		    READ_DIGITS(n, 3);
338		    if (!valid_range_p(n, 1, 366))
339			fail();
340		    set_hash("yday", n);
341		    goto matched;
342		}
343
344	      case 'L':
345	      case 'N':
346		{
347		    VALUE n;
348		    int sign = 1;
349		    size_t osi;
350
351		    if (issign(str[si])) {
352			if (str[si] == '-')
353			    sign = -1;
354			si++;
355		    }
356		    osi = si;
357		    if (NUM_PATTERN_P())
358			READ_DIGITS(n, c == 'L' ? 3 : 9)
359		    else
360			READ_DIGITS_MAX(n)
361		    if (sign == -1)
362			n = f_negate(n);
363		    set_hash("sec_fraction",
364			     rb_rational_new2(n,
365					      f_expt(INT2FIX(10),
366						     ULONG2NUM(si - osi))));
367		    goto matched;
368		}
369
370	      case 'M':
371		{
372		    VALUE n;
373
374		    READ_DIGITS(n, 2);
375		    if (!valid_range_p(n, 0, 59))
376			fail();
377		    set_hash("min", n);
378		    goto matched;
379		}
380
381	      case 'm':
382		{
383		    VALUE n;
384
385		    READ_DIGITS(n, 2);
386		    if (!valid_range_p(n, 1, 12))
387			fail();
388		    set_hash("mon", n);
389		    goto matched;
390		}
391
392	      case 'n':
393	      case 't':
394		recur(" ");
395		goto matched;
396
397	      case 'P':
398	      case 'p':
399		{
400		    int i;
401
402		    for (i = 0; i < 4; i++) {
403			size_t l = strlen(merid_names[i]);
404			if (strncasecmp(merid_names[i], &str[si], l) == 0) {
405			    si += l;
406			    set_hash("_merid", INT2FIX((i % 2) == 0 ? 0 : 12));
407			    goto matched;
408			}
409		    }
410		    fail();
411		}
412
413	      case 'Q':
414		{
415		    VALUE n;
416		    int sign = 1;
417
418		    if (str[si] == '-') {
419			sign = -1;
420			si++;
421		    }
422		    READ_DIGITS_MAX(n);
423		    if (sign == -1)
424			n = f_negate(n);
425		    set_hash("seconds",
426			     rb_rational_new2(n,
427					      f_expt(INT2FIX(10),
428						     INT2FIX(3))));
429		    goto matched;
430		}
431
432	      case 'R':
433		recur("%H:%M");
434		goto matched;
435
436	      case 'r':
437		recur("%I:%M:%S %p");
438		goto matched;
439
440	      case 'S':
441		{
442		    VALUE n;
443
444		    READ_DIGITS(n, 2);
445		    if (!valid_range_p(n, 0, 60))
446			fail();
447		    set_hash("sec", n);
448		    goto matched;
449		}
450
451	      case 's':
452		{
453		    VALUE n;
454		    int sign = 1;
455
456		    if (str[si] == '-') {
457			sign = -1;
458			si++;
459		    }
460		    READ_DIGITS_MAX(n);
461		    if (sign == -1)
462			n = f_negate(n);
463		    set_hash("seconds", n);
464		    goto matched;
465		}
466
467	      case 'T':
468		recur("%H:%M:%S");
469		goto matched;
470
471	      case 'U':
472	      case 'W':
473		{
474		    VALUE n;
475
476		    READ_DIGITS(n, 2);
477		    if (!valid_range_p(n, 0, 53))
478			fail();
479		    set_hash(c == 'U' ? "wnum0" : "wnum1", n);
480		    goto matched;
481		}
482
483	      case 'u':
484		{
485		    VALUE n;
486
487		    READ_DIGITS(n, 1);
488		    if (!valid_range_p(n, 1, 7))
489			fail();
490		    set_hash("cwday", n);
491		    goto matched;
492		}
493
494	      case 'V':
495		{
496		    VALUE n;
497
498		    READ_DIGITS(n, 2);
499		    if (!valid_range_p(n, 1, 53))
500			fail();
501		    set_hash("cweek", n);
502		    goto matched;
503		}
504
505	      case 'v':
506		recur("%e-%b-%Y");
507		goto matched;
508
509	      case 'w':
510		{
511		    VALUE n;
512
513		    READ_DIGITS(n, 1);
514		    if (!valid_range_p(n, 0, 6))
515			fail();
516		    set_hash("wday", n);
517		    goto matched;
518		}
519
520	      case 'X':
521		recur("%H:%M:%S");
522		goto matched;
523
524	      case 'x':
525		recur("%m/%d/%y");
526		goto matched;
527
528	      case 'Y':
529		  {
530		      VALUE n;
531		      int sign = 1;
532
533		      if (issign(str[si])) {
534			  if (str[si] == '-')
535			      sign = -1;
536			  si++;
537		      }
538		      if (NUM_PATTERN_P())
539			  READ_DIGITS(n, 4)
540		      else
541			  READ_DIGITS_MAX(n)
542		    if (sign == -1)
543			n = f_negate(n);
544		      set_hash("year", n);
545		      goto matched;
546		  }
547
548	      case 'y':
549		{
550		    VALUE n;
551		    int sign = 1;
552
553		    READ_DIGITS(n, 2);
554		    if (!valid_range_p(n, 0, 99))
555			fail();
556		    if (sign == -1)
557			n = f_negate(n);
558		    set_hash("year", n);
559		    set_hash("_cent",
560			     INT2FIX(f_ge_p(n, INT2FIX(69)) ? 19 : 20));
561		    goto matched;
562		}
563
564	      case 'Z':
565	      case 'z':
566		{
567		    static const char pat_source[] =
568			"\\A("
569			"(?:gmt|utc?)?[-+]\\d+(?:[,.:]\\d+(?::\\d+)?)?"
570			"|(?-i:[[:alpha:].\\s]+)(?:standard|daylight)\\s+time\\b"
571			"|(?-i:[[:alpha:]]+)(?:\\s+dst)?\\b"
572			")";
573		    static VALUE pat = Qnil;
574		    VALUE m, b;
575
576		    if (NIL_P(pat)) {
577			pat = rb_reg_new(pat_source, sizeof pat_source - 1,
578					 ONIG_OPTION_IGNORECASE);
579			rb_gc_register_mark_object(pat);
580		    }
581
582		    b = rb_backref_get();
583		    rb_match_busy(b);
584		    m = f_match(pat, rb_usascii_str_new2(&str[si]));
585
586		    if (!NIL_P(m)) {
587			VALUE s, l, o;
588
589			s = rb_reg_nth_match(1, m);
590			l = f_end(m, INT2FIX(0));
591			o = date_zone_to_diff(s);
592			si += NUM2LONG(l);
593			set_hash("zone", s);
594			set_hash("offset", o);
595			rb_backref_set(b);
596			goto matched;
597		    }
598		    rb_backref_set(b);
599		    fail();
600		}
601
602	      case '%':
603		if (str[si] != '%')
604		    fail();
605		si++;
606		goto matched;
607
608	      case '+':
609		recur("%a %b %e %H:%M:%S %Z %Y");
610		goto matched;
611
612	      default:
613		if (str[si] != '%')
614		    fail();
615		si++;
616		if (fi < flen)
617		    if (str[si] != fmt[fi])
618			fail();
619		si++;
620		goto matched;
621	    }
622	  case ' ':
623	  case '\t':
624	  case '\n':
625	  case '\v':
626	  case '\f':
627	  case '\r':
628	    while (isspace((unsigned char)str[si]))
629		si++;
630	    fi++;
631	    break;
632	  default:
633	  ordinal:
634	    if (str[si] != fmt[fi])
635		fail();
636	    si++;
637	    fi++;
638	    break;
639	  matched:
640	    fi++;
641	    break;
642	}
643    }
644
645    return si;
646}
647
648VALUE
649date__strptime(const char *str, size_t slen,
650	       const char *fmt, size_t flen, VALUE hash)
651{
652    size_t si;
653    VALUE cent, merid;
654
655    si = date__strptime_internal(str, slen, fmt, flen, hash);
656
657    if (slen > si) {
658	VALUE s;
659
660	s = rb_usascii_str_new(&str[si], slen - si);
661	set_hash("leftover", s);
662    }
663
664    if (fail_p())
665	return Qnil;
666
667    cent = ref_hash("_cent");
668    if (!NIL_P(cent)) {
669	VALUE year;
670
671	year = ref_hash("cwyear");
672	if (!NIL_P(year))
673	    set_hash("cwyear", f_add(year, f_mul(cent, INT2FIX(100))));
674	year = ref_hash("year");
675	if (!NIL_P(year))
676	    set_hash("year", f_add(year, f_mul(cent, INT2FIX(100))));
677	del_hash("_cent");
678    }
679
680    merid = ref_hash("_merid");
681    if (!NIL_P(merid)) {
682	VALUE hour;
683
684	hour = ref_hash("hour");
685	if (!NIL_P(hour)) {
686	    hour = f_mod(hour, INT2FIX(12));
687	    set_hash("hour", f_add(hour, merid));
688	}
689	del_hash("_merid");
690    }
691
692    return hash;
693}
694
695/*
696Local variables:
697c-file-style: "ruby"
698End:
699*/
700