1/* date.c:  date parsing for Subversion
2 *
3 * ====================================================================
4 *    Licensed to the Apache Software Foundation (ASF) under one
5 *    or more contributor license agreements.  See the NOTICE file
6 *    distributed with this work for additional information
7 *    regarding copyright ownership.  The ASF licenses this file
8 *    to you under the Apache License, Version 2.0 (the
9 *    "License"); you may not use this file except in compliance
10 *    with the License.  You may obtain a copy of the License at
11 *
12 *      http://www.apache.org/licenses/LICENSE-2.0
13 *
14 *    Unless required by applicable law or agreed to in writing,
15 *    software distributed under the License is distributed on an
16 *    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17 *    KIND, either express or implied.  See the License for the
18 *    specific language governing permissions and limitations
19 *    under the License.
20 * ====================================================================
21 */
22
23#include "svn_time.h"
24#include "svn_error.h"
25#include "svn_string.h"
26
27#include "svn_private_config.h"
28#include "private/svn_token.h"
29
30/* Valid rule actions */
31enum rule_action {
32  ACCUM,    /* Accumulate a decimal value */
33  MICRO,    /* Accumulate microseconds */
34  TZIND,    /* Handle +, -, Z */
35  NOOP,     /* Do nothing */
36  SKIPFROM, /* If at end-of-value, accept the match.  Otherwise,
37               if the next template character matches the current
38               value character, continue processing as normal.
39               Otherwise, attempt to complete matching starting
40               immediately after the first subsequent occurrance of
41               ']' in the template. */
42  SKIP,     /* Ignore this template character */
43  ACCEPT    /* Accept the value */
44};
45
46/* How to handle a particular character in a template */
47typedef struct rule
48{
49  char key;                /* The template char that this rule matches */
50  const char *valid;       /* String of valid chars for this rule */
51  enum rule_action action; /* What action to take when the rule is matched */
52  int offset;              /* Where to store the any results of the action,
53                              expressed in terms of bytes relative to the
54                              base of a match_state object. */
55} rule;
56
57/* The parsed values, before localtime/gmt processing */
58typedef struct match_state
59{
60  apr_time_exp_t base;
61  apr_int32_t offhours;
62  apr_int32_t offminutes;
63} match_state;
64
65#define DIGITS "0123456789"
66
67/* A declarative specification of how each template character
68   should be processed, using a rule for each valid symbol. */
69static const rule
70rules[] =
71{
72  { 'Y', DIGITS, ACCUM, APR_OFFSETOF(match_state, base.tm_year) },
73  { 'M', DIGITS, ACCUM, APR_OFFSETOF(match_state, base.tm_mon) },
74  { 'D', DIGITS, ACCUM, APR_OFFSETOF(match_state, base.tm_mday) },
75  { 'h', DIGITS, ACCUM, APR_OFFSETOF(match_state, base.tm_hour) },
76  { 'm', DIGITS, ACCUM, APR_OFFSETOF(match_state, base.tm_min) },
77  { 's', DIGITS, ACCUM, APR_OFFSETOF(match_state, base.tm_sec) },
78  { 'u', DIGITS, MICRO, APR_OFFSETOF(match_state, base.tm_usec) },
79  { 'O', DIGITS, ACCUM, APR_OFFSETOF(match_state, offhours) },
80  { 'o', DIGITS, ACCUM, APR_OFFSETOF(match_state, offminutes) },
81  { '+', "-+", TZIND, 0 },
82  { 'Z', "Z", TZIND, 0 },
83  { ':', ":", NOOP, 0 },
84  { '-', "-", NOOP, 0 },
85  { 'T', "T", NOOP, 0 },
86  { ' ', " ", NOOP, 0 },
87  { '.', ".,", NOOP, 0 },
88  { '[', NULL, SKIPFROM, 0 },
89  { ']', NULL, SKIP, 0 },
90  { '\0', NULL, ACCEPT, 0 },
91};
92
93/* Return the rule associated with TCHAR, or NULL if there
94   is no such rule. */
95static const rule *
96find_rule(char tchar)
97{
98  int i = sizeof(rules)/sizeof(rules[0]);
99  while (i--)
100    if (rules[i].key == tchar)
101      return &rules[i];
102  return NULL;
103}
104
105/* Attempt to match the date-string in VALUE to the provided TEMPLATE,
106   using the rules defined above.  Return TRUE on successful match,
107   FALSE otherwise.  On successful match, fill in *EXP with the
108   matched values and set *LOCALTZ to TRUE if the local time zone
109   should be used to interpret the match (i.e. if no time zone
110   information was provided), or FALSE if not. */
111static svn_boolean_t
112template_match(apr_time_exp_t *expt, svn_boolean_t *localtz,
113               const char *template, const char *value)
114{
115  int multiplier = 100000;
116  int tzind = 0;
117  match_state ms;
118  char *base = (char *)&ms;
119
120  memset(&ms, 0, sizeof(ms));
121
122  for (;;)
123    {
124      const rule *match = find_rule(*template++);
125      char vchar = *value++;
126      apr_int32_t *place;
127
128      if (!match || (match->valid
129                     && (!vchar || !strchr(match->valid, vchar))))
130        return FALSE;
131
132      /* Compute the address of memory location affected by this
133         rule by adding match->offset bytes to the address of ms.
134         Because this is a byte-quantity, it is necessary to cast
135         &ms to char *. */
136      place = (apr_int32_t *)(base + match->offset);
137      switch (match->action)
138        {
139        case ACCUM:
140          *place = *place * 10 + vchar - '0';
141          continue;
142        case MICRO:
143          *place += (vchar - '0') * multiplier;
144          multiplier /= 10;
145          continue;
146        case TZIND:
147          tzind = vchar;
148          continue;
149        case SKIP:
150          value--;
151          continue;
152        case NOOP:
153          continue;
154        case SKIPFROM:
155          if (!vchar)
156            break;
157          match = find_rule(*template);
158          if (!strchr(match->valid, vchar))
159            template = strchr(template, ']') + 1;
160          value--;
161          continue;
162        case ACCEPT:
163          if (vchar)
164            return FALSE;
165          break;
166        }
167
168      break;
169    }
170
171  /* Validate gmt offset here, since we can't reliably do it later. */
172  if (ms.offhours > 23 || ms.offminutes > 59)
173    return FALSE;
174
175  /* tzind will be '+' or '-' for an explicit time zone, 'Z' to
176     indicate UTC, or 0 to indicate local time. */
177  switch (tzind)
178    {
179    case '+':
180      ms.base.tm_gmtoff = ms.offhours * 3600 + ms.offminutes * 60;
181      break;
182    case '-':
183      ms.base.tm_gmtoff = -(ms.offhours * 3600 + ms.offminutes * 60);
184      break;
185    }
186
187  *expt = ms.base;
188  *localtz = (tzind == 0);
189  return TRUE;
190}
191
192static struct unit_words_table {
193  const char *word;
194  apr_time_t value;
195} unit_words_table[] = {
196  /* Word matching does not concern itself with exact days of the month
197   * or leap years so these amounts are always fixed. */
198  { "years",    apr_time_from_sec(60 * 60 * 24 * 365) },
199  { "months",   apr_time_from_sec(60 * 60 * 24 * 30) },
200  { "weeks",    apr_time_from_sec(60 * 60 * 24 * 7) },
201  { "days",     apr_time_from_sec(60 * 60 * 24) },
202  { "hours",    apr_time_from_sec(60 * 60) },
203  { "minutes",  apr_time_from_sec(60) },
204  { "mins",     apr_time_from_sec(60) },
205  { NULL ,      0 }
206};
207
208static svn_token_map_t number_words_map[] = {
209  { "zero", 0 }, { "one", 1 }, { "two", 2 }, { "three", 3 }, { "four", 4 },
210  { "five", 5 }, { "six", 6 }, { "seven", 7 }, { "eight", 8 }, { "nine", 9 },
211  { "ten", 10 }, { "eleven", 11 }, { "twelve", 12 }, { NULL, 0 }
212};
213
214/* Attempt to match the date-string in TEXT according to the following rules:
215 *
216 * "N years|months|weeks|days|hours|minutes ago" resolve to the most recent
217 * revision prior to the specified time. N may either be a word from
218 * NUMBER_WORDS_TABLE defined above, or a non-negative digit.
219 *
220 * Return TRUE on successful match, FALSE otherwise. On successful match,
221 * fill in *EXP with the matched value and set *LOCALTZ to TRUE (this
222 * function always uses local time). Use POOL for temporary allocations. */
223static svn_boolean_t
224words_match(apr_time_exp_t *expt, svn_boolean_t *localtz,
225            apr_time_t now, const char *text, apr_pool_t *pool)
226{
227  apr_time_t t = -1;
228  const char *word;
229  apr_array_header_t *words;
230  int i;
231  int n = -1;
232  const char *unit_str;
233
234  words = svn_cstring_split(text, " ", TRUE /* chop_whitespace */, pool);
235
236  if (words->nelts != 3)
237    return FALSE;
238
239  word = APR_ARRAY_IDX(words, 0, const char *);
240
241  /* Try to parse a number word. */
242  n = svn_token__from_word(number_words_map, word);
243
244  if (n == SVN_TOKEN_UNKNOWN)
245    {
246      svn_error_t *err;
247
248      /* Try to parse a digit. */
249      err = svn_cstring_atoi(&n, word);
250      if (err)
251        {
252          svn_error_clear(err);
253          return FALSE;
254        }
255      if (n < 0)
256        return FALSE;
257    }
258
259  /* Try to parse a unit. */
260  word = APR_ARRAY_IDX(words, 1, const char *);
261  for (i = 0, unit_str = unit_words_table[i].word;
262       unit_str = unit_words_table[i].word, unit_str != NULL; i++)
263    {
264      /* Tolerate missing trailing 's' from unit. */
265      if (!strcmp(word, unit_str) ||
266          !strncmp(word, unit_str, strlen(unit_str) - 1))
267        {
268          t = now - (n * unit_words_table[i].value);
269          break;
270        }
271    }
272
273  if (t < 0)
274    return FALSE;
275
276  /* Require trailing "ago". */
277  word = APR_ARRAY_IDX(words, 2, const char *);
278  if (strcmp(word, "ago"))
279    return FALSE;
280
281  if (apr_time_exp_lt(expt, t) != APR_SUCCESS)
282    return FALSE;
283
284  *localtz = TRUE;
285  return TRUE;
286}
287
288static int
289valid_days_by_month[] = {
290  31, 29, 31, 30,
291  31, 30, 31, 31,
292  30, 31, 30, 31
293};
294
295svn_error_t *
296svn_parse_date(svn_boolean_t *matched, apr_time_t *result, const char *text,
297               apr_time_t now, apr_pool_t *pool)
298{
299  apr_time_exp_t expt, expnow;
300  apr_status_t apr_err;
301  svn_boolean_t localtz;
302
303  *matched = FALSE;
304
305  apr_err = apr_time_exp_lt(&expnow, now);
306  if (apr_err != APR_SUCCESS)
307    return svn_error_wrap_apr(apr_err, _("Can't manipulate current date"));
308
309  if (template_match(&expt, &localtz, /* ISO-8601 extended, date only */
310                     "YYYY-M[M]-D[D]",
311                     text)
312      || template_match(&expt, &localtz, /* ISO-8601 extended, UTC */
313                        "YYYY-M[M]-D[D]Th[h]:mm[:ss[.u[u[u[u[u[u][Z]",
314                        text)
315      || template_match(&expt, &localtz, /* ISO-8601 extended, with offset */
316                        "YYYY-M[M]-D[D]Th[h]:mm[:ss[.u[u[u[u[u[u]+OO[:oo]",
317                        text)
318      || template_match(&expt, &localtz, /* ISO-8601 basic, date only */
319                        "YYYYMMDD",
320                        text)
321      || template_match(&expt, &localtz, /* ISO-8601 basic, UTC */
322                        "YYYYMMDDThhmm[ss[.u[u[u[u[u[u][Z]",
323                        text)
324      || template_match(&expt, &localtz, /* ISO-8601 basic, with offset */
325                        "YYYYMMDDThhmm[ss[.u[u[u[u[u[u]+OO[oo]",
326                        text)
327      || template_match(&expt, &localtz, /* "svn log" format */
328                        "YYYY-M[M]-D[D] h[h]:mm[:ss[.u[u[u[u[u[u][ +OO[oo]",
329                        text)
330      || template_match(&expt, &localtz, /* GNU date's iso-8601 */
331                        "YYYY-M[M]-D[D]Th[h]:mm[:ss[.u[u[u[u[u[u]+OO[oo]",
332                        text))
333    {
334      expt.tm_year -= 1900;
335      expt.tm_mon -= 1;
336    }
337  else if (template_match(&expt, &localtz, /* Just a time */
338                          "h[h]:mm[:ss[.u[u[u[u[u[u]",
339                          text))
340    {
341      expt.tm_year = expnow.tm_year;
342      expt.tm_mon = expnow.tm_mon;
343      expt.tm_mday = expnow.tm_mday;
344    }
345  else if (!words_match(&expt, &localtz, now, text, pool))
346    return SVN_NO_ERROR;
347
348  /* Range validation, allowing for leap seconds */
349  if (expt.tm_mon < 0 || expt.tm_mon > 11
350      || expt.tm_mday > valid_days_by_month[expt.tm_mon]
351      || expt.tm_mday < 1
352      || expt.tm_hour > 23
353      || expt.tm_min > 59
354      || expt.tm_sec > 60)
355    return SVN_NO_ERROR;
356
357  /* february/leap-year day checking.  tm_year is bias-1900, so centuries
358     that equal 100 (mod 400) are multiples of 400. */
359  if (expt.tm_mon == 1
360      && expt.tm_mday == 29
361      && (expt.tm_year % 4 != 0
362          || (expt.tm_year % 100 == 0 && expt.tm_year % 400 != 100)))
363    return SVN_NO_ERROR;
364
365  if (localtz)
366    {
367      apr_time_t candidate;
368      apr_time_exp_t expthen;
369
370      /* We need to know the GMT offset of the requested time, not the
371         current time.  In some cases, that quantity is ambiguous,
372         since at the end of daylight saving's time, an hour's worth
373         of local time happens twice.  For those cases, we should
374         prefer DST if we are currently in DST, and standard time if
375         not.  So, calculate the time value using the current time's
376         GMT offset and use the GMT offset of the resulting time. */
377      expt.tm_gmtoff = expnow.tm_gmtoff;
378      apr_err = apr_time_exp_gmt_get(&candidate, &expt);
379      if (apr_err != APR_SUCCESS)
380        return svn_error_wrap_apr(apr_err,
381                                  _("Can't calculate requested date"));
382      apr_err = apr_time_exp_lt(&expthen, candidate);
383      if (apr_err != APR_SUCCESS)
384        return svn_error_wrap_apr(apr_err, _("Can't expand time"));
385      expt.tm_gmtoff = expthen.tm_gmtoff;
386    }
387  apr_err = apr_time_exp_gmt_get(result, &expt);
388  if (apr_err != APR_SUCCESS)
389    return svn_error_wrap_apr(apr_err, _("Can't calculate requested date"));
390
391  *matched = TRUE;
392  return SVN_NO_ERROR;
393}
394