1/***************************************************************************
2 *                                  _   _ ____  _
3 *  Project                     ___| | | |  _ \| |
4 *                             / __| | | | |_) | |
5 *                            | (__| |_| |  _ <| |___
6 *                             \___|\___/|_| \_\_____|
7 *
8 * Copyright (C) 1998 - 2014, Daniel Stenberg, <daniel@haxx.se>, et al.
9 *
10 * This software is licensed as described in the file COPYING, which
11 * you should have received as part of this distribution. The terms
12 * are also available at http://curl.haxx.se/docs/copyright.html.
13 *
14 * You may opt to use, copy, modify, merge, publish, distribute and/or sell
15 * copies of the Software, and permit persons to whom the Software is
16 * furnished to do so, under the terms of the COPYING file.
17 *
18 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
19 * KIND, either express or implied.
20 *
21 ***************************************************************************/
22#include "tool_setup.h"
23
24#define _MPRINTF_REPLACE /* we want curl-functions instead of native ones */
25#include <curl/mprintf.h>
26
27#include "tool_urlglob.h"
28#include "tool_vms.h"
29
30#include "memdebug.h" /* keep this as LAST include */
31
32typedef enum {
33  GLOB_OK,
34  GLOB_NO_MEM = CURLE_OUT_OF_MEMORY,
35  GLOB_ERROR = CURLE_URL_MALFORMAT
36} GlobCode;
37
38#define GLOBERROR(string, column, code) \
39  glob->error = string, glob->pos = column, code
40
41void glob_cleanup(URLGlob* glob);
42
43static GlobCode glob_fixed(URLGlob *glob, char *fixed, size_t len)
44{
45  URLPattern *pat = &glob->pattern[glob->size];
46  pat->type = UPTSet;
47  pat->content.Set.size = 1;
48  pat->content.Set.ptr_s = 0;
49  pat->globindex = -1;
50
51  pat->content.Set.elements = malloc(sizeof(char*));
52
53  if(!pat->content.Set.elements)
54    return GLOBERROR("out of memory", 0, GLOB_NO_MEM);
55
56  pat->content.Set.elements[0] = malloc(len+1);
57  if(!pat->content.Set.elements[0])
58    return GLOBERROR("out of memory", 0, GLOB_NO_MEM);
59
60  memcpy(pat->content.Set.elements[0], fixed, len);
61  pat->content.Set.elements[0][len] = 0;
62
63  return GLOB_OK;
64}
65
66/* multiply
67 *
68 * Multiplies and checks for overflow.
69 */
70static int multiply(unsigned long *amount, long with)
71{
72  unsigned long sum = *amount * with;
73  if(sum/with != *amount)
74    return 1; /* didn't fit, bail out */
75  *amount = sum;
76  return 0;
77}
78
79static GlobCode glob_set(URLGlob *glob, char **patternp,
80                         size_t *posp, unsigned long *amount,
81                         int globindex)
82{
83  /* processes a set expression with the point behind the opening '{'
84     ','-separated elements are collected until the next closing '}'
85  */
86  URLPattern *pat;
87  bool done = FALSE;
88  char *buf = glob->glob_buffer;
89  char *pattern = *patternp;
90  char *opattern = pattern;
91  size_t opos = *posp-1;
92
93  pat = &glob->pattern[glob->size];
94  /* patterns 0,1,2,... correspond to size=1,3,5,... */
95  pat->type = UPTSet;
96  pat->content.Set.size = 0;
97  pat->content.Set.ptr_s = 0;
98  pat->content.Set.elements = NULL;
99  pat->globindex = globindex;
100
101  while(!done) {
102    switch (*pattern) {
103    case '\0':                  /* URL ended while set was still open */
104      return GLOBERROR("unmatched brace", opos, GLOB_ERROR);
105
106    case '{':
107    case '[':                   /* no nested expressions at this time */
108      return GLOBERROR("nested brace", *posp, GLOB_ERROR);
109
110    case '}':                           /* set element completed */
111      if(opattern == pattern)
112        return GLOBERROR("empty string within braces", *posp, GLOB_ERROR);
113
114      /* add 1 to size since it'll be incremented below */
115      if(multiply(amount, pat->content.Set.size+1))
116        return GLOBERROR("range overflow", 0, GLOB_ERROR);
117
118      /* fall-through */
119    case ',':
120
121      *buf = '\0';
122      if(pat->content.Set.elements) {
123        char **new_arr = realloc(pat->content.Set.elements,
124                                 (pat->content.Set.size + 1) * sizeof(char*));
125        if(!new_arr)
126          return GLOBERROR("out of memory", 0, GLOB_NO_MEM);
127
128        pat->content.Set.elements = new_arr;
129      }
130      else
131        pat->content.Set.elements = malloc(sizeof(char*));
132
133      if(!pat->content.Set.elements)
134        return GLOBERROR("out of memory", 0, GLOB_NO_MEM);
135
136      pat->content.Set.elements[pat->content.Set.size] =
137        strdup(glob->glob_buffer);
138      if(!pat->content.Set.elements[pat->content.Set.size])
139        return GLOBERROR("out of memory", 0, GLOB_NO_MEM);
140      ++pat->content.Set.size;
141
142      if(*pattern == '}') {
143        pattern++; /* pass the closing brace */
144        done = TRUE;
145        continue;
146      }
147
148      buf = glob->glob_buffer;
149      ++pattern;
150      ++(*posp);
151      break;
152
153    case ']':                           /* illegal closing bracket */
154      return GLOBERROR("unexpected close bracket", *posp, GLOB_ERROR);
155
156    case '\\':                          /* escaped character, skip '\' */
157      if(pattern[1]) {
158        ++pattern;
159        ++(*posp);
160      }
161      /* intentional fallthrough */
162    default:
163      *buf++ = *pattern++;              /* copy character to set element */
164      ++(*posp);
165    }
166  }
167
168  *patternp = pattern; /* return with the new position */
169  return GLOB_OK;
170}
171
172static GlobCode glob_range(URLGlob *glob, char **patternp,
173                           size_t *posp, unsigned long *amount,
174                           int globindex)
175{
176  /* processes a range expression with the point behind the opening '['
177     - char range: e.g. "a-z]", "B-Q]"
178     - num range: e.g. "0-9]", "17-2000]"
179     - num range with leading zeros: e.g. "001-999]"
180     expression is checked for well-formedness and collected until the next ']'
181  */
182  URLPattern *pat;
183  int rc;
184  char *pattern = *patternp;
185  char *c;
186
187  pat = &glob->pattern[glob->size];
188  pat->globindex = globindex;
189
190  if(ISALPHA(*pattern)) {
191    /* character range detected */
192    char min_c;
193    char max_c;
194    int step=1;
195
196    pat->type = UPTCharRange;
197
198    rc = sscanf(pattern, "%c-%c", &min_c, &max_c);
199
200    if((rc == 2) && (pattern[3] == ':')) {
201      char *endp;
202      unsigned long lstep;
203      errno = 0;
204      lstep = strtoul(&pattern[3], &endp, 10);
205      if(errno || (*endp != ']'))
206        step = -1;
207      else {
208        pattern = endp+1;
209        step = (int)lstep;
210        if(step > (max_c - min_c))
211          step = -1;
212      }
213    }
214    else
215      pattern += 4;
216
217    *posp += (pattern - *patternp);
218
219    if((rc != 2) || (min_c >= max_c) || ((max_c - min_c) > ('z' - 'a')) ||
220       (step < 0) )
221      /* the pattern is not well-formed */
222      return GLOBERROR("bad range", *posp, GLOB_ERROR);
223
224    /* if there was a ":[num]" thing, use that as step or else use 1 */
225    pat->content.CharRange.step = step;
226    pat->content.CharRange.ptr_c = pat->content.CharRange.min_c = min_c;
227    pat->content.CharRange.max_c = max_c;
228
229    if(multiply(amount, (pat->content.CharRange.max_c -
230                         pat->content.CharRange.min_c + 1)))
231      return GLOBERROR("range overflow", *posp, GLOB_ERROR);
232  }
233  else if(ISDIGIT(*pattern)) {
234    /* numeric range detected */
235    unsigned long min_n;
236    unsigned long max_n = 0;
237    unsigned long step_n = 0;
238    char *endp;
239
240    pat->type = UPTNumRange;
241    pat->content.NumRange.padlength = 0;
242
243    if(*pattern == '0') {
244      /* leading zero specified, count them! */
245      c = pattern;
246      while(ISDIGIT(*c)) {
247        c++;
248        ++pat->content.NumRange.padlength; /* padding length is set for all
249                                              instances of this pattern */
250      }
251    }
252
253    errno = 0;
254    min_n = strtoul(pattern, &endp, 10);
255    if(errno || (endp == pattern))
256      endp=NULL;
257    else {
258      if(*endp != '-')
259        endp = NULL;
260      else {
261        pattern = endp+1;
262        errno = 0;
263        max_n = strtoul(pattern, &endp, 10);
264        if(errno || (*endp == ':')) {
265          pattern = endp+1;
266          errno = 0;
267          step_n = strtoul(pattern, &endp, 10);
268          if(errno)
269            /* over/underflow situation */
270            endp = NULL;
271        }
272        else
273          step_n = 1;
274        if(endp && (*endp == ']')) {
275          pattern= endp+1;
276        }
277        else
278          endp = NULL;
279      }
280    }
281
282    *posp += (pattern - *patternp);
283
284    if(!endp || (min_n > max_n) || (step_n > (max_n - min_n)))
285      /* the pattern is not well-formed */
286      return GLOBERROR("bad range", *posp, GLOB_ERROR);
287
288    /* typecasting to ints are fine here since we make sure above that we
289       are within 31 bits */
290    pat->content.NumRange.ptr_n = pat->content.NumRange.min_n = min_n;
291    pat->content.NumRange.max_n = max_n;
292    pat->content.NumRange.step = step_n;
293
294    if(multiply(amount, (pat->content.NumRange.max_n -
295                         pat->content.NumRange.min_n + 1)))
296      return GLOBERROR("range overflow", *posp, GLOB_ERROR);
297  }
298  else
299    return GLOBERROR("bad range specification", *posp, GLOB_ERROR);
300
301  *patternp = pattern;
302  return GLOB_OK;
303}
304
305static bool peek_ipv6(const char *str, size_t *skip)
306{
307  /*
308   * Scan for a potential IPv6 literal.
309   * - Valid globs contain a hyphen and <= 1 colon.
310   * - IPv6 literals contain no hyphens and >= 2 colons.
311   */
312  size_t i = 0;
313  size_t colons = 0;
314  if(str[i++] != '[') {
315    return FALSE;
316  }
317  for(;;) {
318    const char c = str[i++];
319    if(ISALNUM(c) || c == '.' || c == '%') {
320      /* ok */
321    }
322    else if(c == ':') {
323      colons++;
324    }
325    else if(c == ']') {
326      *skip = i;
327      return colons >= 2 ? TRUE : FALSE;
328    }
329    else {
330      return FALSE;
331    }
332  }
333}
334
335static GlobCode glob_parse(URLGlob *glob, char *pattern,
336                           size_t pos, unsigned long *amount)
337{
338  /* processes a literal string component of a URL
339     special characters '{' and '[' branch to set/range processing functions
340   */
341  GlobCode res = GLOB_OK;
342  int globindex = 0; /* count "actual" globs */
343
344  *amount = 1;
345
346  while(*pattern && !res) {
347    char *buf = glob->glob_buffer;
348    size_t sublen = 0;
349    while(*pattern && *pattern != '{') {
350      if(*pattern == '[') {
351        /* Skip over potential IPv6 literals. */
352        size_t skip;
353        if(peek_ipv6(pattern, &skip)) {
354          memcpy(buf, pattern, skip);
355          buf += skip;
356          pattern += skip;
357          sublen += skip;
358          continue;
359        }
360        break;
361      }
362      if(*pattern == '}' || *pattern == ']')
363        return GLOBERROR("unmatched close brace/bracket", pos, GLOB_ERROR);
364
365      /* only allow \ to escape known "special letters" */
366      if(*pattern == '\\' &&
367         (*(pattern+1) == '{' || *(pattern+1) == '[' ||
368          *(pattern+1) == '}' || *(pattern+1) == ']') ) {
369
370        /* escape character, skip '\' */
371        ++pattern;
372        ++pos;
373      }
374      *buf++ = *pattern++; /* copy character to literal */
375      ++pos;
376      sublen++;
377    }
378    if(sublen) {
379      /* we got a literal string, add it as a single-item list */
380      *buf = '\0';
381      res = glob_fixed(glob, glob->glob_buffer, sublen);
382    }
383    else {
384      switch (*pattern) {
385      case '\0': /* done  */
386        break;
387
388      case '{':
389        /* process set pattern */
390        pattern++;
391        pos++;
392        res = glob_set(glob, &pattern, &pos, amount, globindex++);
393        break;
394
395      case '[':
396        /* process range pattern */
397        pattern++;
398        pos++;
399        res = glob_range(glob, &pattern, &pos, amount, globindex++);
400        break;
401      }
402    }
403
404    if(++glob->size > GLOB_PATTERN_NUM)
405      return GLOBERROR("too many globs", pos, GLOB_ERROR);
406  }
407  return res;
408}
409
410int glob_url(URLGlob** glob, char* url, unsigned long *urlnum, FILE *error)
411{
412  /*
413   * We can deal with any-size, just make a buffer with the same length
414   * as the specified URL!
415   */
416  URLGlob *glob_expand;
417  unsigned long amount = 0;
418  char *glob_buffer;
419  GlobCode res;
420
421  *glob = NULL;
422
423  glob_buffer = malloc(strlen(url) + 1);
424  if(!glob_buffer)
425    return CURLE_OUT_OF_MEMORY;
426
427  glob_expand = calloc(1, sizeof(URLGlob));
428  if(!glob_expand) {
429    Curl_safefree(glob_buffer);
430    return CURLE_OUT_OF_MEMORY;
431  }
432  glob_expand->urllen = strlen(url);
433  glob_expand->glob_buffer = glob_buffer;
434
435  res = glob_parse(glob_expand, url, 1, &amount);
436  if(!res)
437    *urlnum = amount;
438  else {
439    if(error && glob_expand->error) {
440      char text[128];
441      const char *t;
442      if(glob_expand->pos) {
443        snprintf(text, sizeof(text), "%s in column %zu", glob_expand->error,
444                 glob_expand->pos);
445        t = text;
446      }
447      else
448        t = glob_expand->error;
449
450      /* send error description to the error-stream */
451      fprintf(error, "curl: (%d) [globbing] %s\n", res, t);
452    }
453    /* it failed, we cleanup */
454    glob_cleanup(glob_expand);
455    *urlnum = 1;
456    return res;
457  }
458
459  *glob = glob_expand;
460  return CURLE_OK;
461}
462
463void glob_cleanup(URLGlob* glob)
464{
465  size_t i;
466  int elem;
467
468  /* the < condition is required since i underflows! */
469  for(i = glob->size - 1; i < glob->size; --i) {
470    if((glob->pattern[i].type == UPTSet) &&
471       (glob->pattern[i].content.Set.elements)) {
472      for(elem = glob->pattern[i].content.Set.size - 1;
473          elem >= 0;
474          --elem) {
475        Curl_safefree(glob->pattern[i].content.Set.elements[elem]);
476      }
477      Curl_safefree(glob->pattern[i].content.Set.elements);
478    }
479  }
480  Curl_safefree(glob->glob_buffer);
481  Curl_safefree(glob);
482}
483
484int glob_next_url(char **globbed, URLGlob *glob)
485{
486  URLPattern *pat;
487  size_t i;
488  size_t j;
489  size_t len;
490  size_t buflen = glob->urllen + 1;
491  char *buf = glob->glob_buffer;
492
493  *globbed = NULL;
494
495  if(!glob->beenhere)
496    glob->beenhere = 1;
497  else {
498    bool carry = TRUE;
499
500    /* implement a counter over the index ranges of all patterns,
501       starting with the rightmost pattern */
502    /* the < condition is required since i underflows! */
503    for(i = glob->size - 1; carry && (i < glob->size); --i) {
504      carry = FALSE;
505      pat = &glob->pattern[i];
506      switch (pat->type) {
507      case UPTSet:
508        if((pat->content.Set.elements) &&
509           (++pat->content.Set.ptr_s == pat->content.Set.size)) {
510          pat->content.Set.ptr_s = 0;
511          carry = TRUE;
512        }
513        break;
514      case UPTCharRange:
515        pat->content.CharRange.ptr_c = (char)(pat->content.CharRange.step +
516                           (int)((unsigned char)pat->content.CharRange.ptr_c));
517        if(pat->content.CharRange.ptr_c > pat->content.CharRange.max_c) {
518          pat->content.CharRange.ptr_c = pat->content.CharRange.min_c;
519          carry = TRUE;
520        }
521        break;
522      case UPTNumRange:
523        pat->content.NumRange.ptr_n += pat->content.NumRange.step;
524        if(pat->content.NumRange.ptr_n > pat->content.NumRange.max_n) {
525          pat->content.NumRange.ptr_n = pat->content.NumRange.min_n;
526          carry = TRUE;
527        }
528        break;
529      default:
530        printf("internal error: invalid pattern type (%d)\n", (int)pat->type);
531        return CURLE_FAILED_INIT;
532      }
533    }
534    if(carry) {         /* first pattern ptr has run into overflow, done! */
535      /* TODO: verify if this should actally return CURLE_OK. */
536      return CURLE_OK; /* CURLE_OK to match previous behavior */
537    }
538  }
539
540  for(j = 0; j < glob->size; ++j) {
541    pat = &glob->pattern[j];
542    switch(pat->type) {
543    case UPTSet:
544      if(pat->content.Set.elements) {
545        len = strlen(pat->content.Set.elements[pat->content.Set.ptr_s]);
546        snprintf(buf, buflen, "%s",
547                 pat->content.Set.elements[pat->content.Set.ptr_s]);
548        buf += len;
549        buflen -= len;
550      }
551      break;
552    case UPTCharRange:
553      *buf++ = pat->content.CharRange.ptr_c;
554      break;
555    case UPTNumRange:
556      len = snprintf(buf, buflen, "%0*ld",
557                     pat->content.NumRange.padlength,
558                     pat->content.NumRange.ptr_n);
559      buf += len;
560      buflen -= len;
561      break;
562    default:
563      printf("internal error: invalid pattern type (%d)\n", (int)pat->type);
564      return CURLE_FAILED_INIT;
565    }
566  }
567  *buf = '\0';
568
569  *globbed = strdup(glob->glob_buffer);
570  if(!*globbed)
571    return CURLE_OUT_OF_MEMORY;
572
573  return CURLE_OK;
574}
575
576int glob_match_url(char **result, char *filename, URLGlob *glob)
577{
578  char *target;
579  size_t allocsize;
580  char numbuf[18];
581  char *appendthis = NULL;
582  size_t appendlen = 0;
583  size_t stringlen = 0;
584
585  *result = NULL;
586
587  /* We cannot use the glob_buffer for storage here since the filename may
588   * be longer than the URL we use. We allocate a good start size, then
589   * we need to realloc in case of need.
590   */
591  allocsize = strlen(filename) + 1; /* make it at least one byte to store the
592                                       trailing zero */
593  target = malloc(allocsize);
594  if(!target)
595    return CURLE_OUT_OF_MEMORY;
596
597  while(*filename) {
598    if(*filename == '#' && ISDIGIT(filename[1])) {
599      unsigned long i;
600      char *ptr = filename;
601      unsigned long num = strtoul(&filename[1], &filename, 10);
602      URLPattern *pat =NULL;
603
604      if(num < glob->size) {
605        num--; /* make it zero based */
606        /* find the correct glob entry */
607        for(i=0; i<glob->size; i++) {
608          if(glob->pattern[i].globindex == (int)num) {
609            pat = &glob->pattern[i];
610            break;
611          }
612        }
613      }
614
615      if(pat) {
616        switch (pat->type) {
617        case UPTSet:
618          if(pat->content.Set.elements) {
619            appendthis = pat->content.Set.elements[pat->content.Set.ptr_s];
620            appendlen =
621              strlen(pat->content.Set.elements[pat->content.Set.ptr_s]);
622          }
623          break;
624        case UPTCharRange:
625          numbuf[0] = pat->content.CharRange.ptr_c;
626          numbuf[1] = 0;
627          appendthis = numbuf;
628          appendlen = 1;
629          break;
630        case UPTNumRange:
631          snprintf(numbuf, sizeof(numbuf), "%0*d",
632                   pat->content.NumRange.padlength,
633                   pat->content.NumRange.ptr_n);
634          appendthis = numbuf;
635          appendlen = strlen(numbuf);
636          break;
637        default:
638          fprintf(stderr, "internal error: invalid pattern type (%d)\n",
639                  (int)pat->type);
640          Curl_safefree(target);
641          return CURLE_FAILED_INIT;
642        }
643      }
644      else {
645        /* #[num] out of range, use the #[num] in the output */
646        filename = ptr;
647        appendthis = filename++;
648        appendlen = 1;
649      }
650    }
651    else {
652      appendthis = filename++;
653      appendlen = 1;
654    }
655    if(appendlen + stringlen >= allocsize) {
656      char *newstr;
657      /* we append a single byte to allow for the trailing byte to be appended
658         at the end of this function outside the while() loop */
659      allocsize = (appendlen + stringlen) * 2;
660      newstr = realloc(target, allocsize + 1);
661      if(!newstr) {
662        Curl_safefree(target);
663        return CURLE_OUT_OF_MEMORY;
664      }
665      target = newstr;
666    }
667    memcpy(&target[stringlen], appendthis, appendlen);
668    stringlen += appendlen;
669  }
670  target[stringlen]= '\0';
671  *result = target;
672  return CURLE_OK;
673}
674
675