1/***************************************************************************
2 *                                  _   _ ____  _
3 *  Project                     ___| | | |  _ \| |
4 *                             / __| | | | |_) | |
5 *                            | (__| |_| |  _ <| |___
6 *                             \___|\___/|_| \_\_____|
7 *
8 * Copyright (C) 1998 - 2011, Daniel Stenberg, <daniel@haxx.se>, et al.
9 *
10 * This software is licensed as described in the file COPYING, which
11 * you should have received as part of this distribution. The terms
12 * are also available at http://curl.haxx.se/docs/copyright.html.
13 *
14 * You may opt to use, copy, modify, merge, publish, distribute and/or sell
15 * copies of the Software, and permit persons to whom the Software is
16 * furnished to do so, under the terms of the COPYING file.
17 *
18 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
19 * KIND, either express or implied.
20 *
21 ***************************************************************************/
22#include "setup.h"
23
24#include <curl/curl.h>
25
26#define _MPRINTF_REPLACE /* we want curl-functions instead of native ones */
27#include <curl/mprintf.h>
28
29#include "tool_urlglob.h"
30#include "tool_vms.h"
31
32#include "memdebug.h" /* keep this as LAST include */
33
34typedef enum {
35  GLOB_OK,
36  GLOB_NO_MEM,
37  GLOB_ERROR
38} GlobCode;
39
40/*
41 * glob_word()
42 *
43 * Input a full globbed string, set the forth argument to the amount of
44 * strings we get out of this. Return GlobCode.
45 */
46static GlobCode glob_word(URLGlob *, /* object anchor */
47                          char *,    /* globbed string */
48                          size_t,       /* position */
49                          int *);    /* returned number of strings */
50
51static GlobCode glob_set(URLGlob *glob, char *pattern,
52                         size_t pos, int *amount)
53{
54  /* processes a set expression with the point behind the opening '{'
55     ','-separated elements are collected until the next closing '}'
56  */
57  URLPattern *pat;
58  GlobCode res;
59  bool done = FALSE;
60  char* buf = glob->glob_buffer;
61
62  pat = &glob->pattern[glob->size / 2];
63  /* patterns 0,1,2,... correspond to size=1,3,5,... */
64  pat->type = UPTSet;
65  pat->content.Set.size = 0;
66  pat->content.Set.ptr_s = 0;
67  pat->content.Set.elements = NULL;
68
69  ++glob->size;
70
71  while(!done) {
72    switch (*pattern) {
73    case '\0':                  /* URL ended while set was still open */
74      snprintf(glob->errormsg, sizeof(glob->errormsg),
75               "unmatched brace at pos %zu\n", pos);
76      return GLOB_ERROR;
77
78    case '{':
79    case '[':                   /* no nested expressions at this time */
80      snprintf(glob->errormsg, sizeof(glob->errormsg),
81               "nested braces not supported at pos %zu\n", pos);
82      return GLOB_ERROR;
83
84    case ',':
85    case '}':                           /* set element completed */
86      *buf = '\0';
87      if(pat->content.Set.elements) {
88        char **new_arr = realloc(pat->content.Set.elements,
89                                 (pat->content.Set.size + 1) * sizeof(char*));
90        if(!new_arr) {
91          short elem;
92          for(elem = 0; elem < pat->content.Set.size; elem++)
93            Curl_safefree(pat->content.Set.elements[elem]);
94          Curl_safefree(pat->content.Set.elements);
95          pat->content.Set.ptr_s = 0;
96          pat->content.Set.size = 0;
97        }
98        pat->content.Set.elements = new_arr;
99      }
100      else
101        pat->content.Set.elements = malloc(sizeof(char*));
102      if(!pat->content.Set.elements) {
103        snprintf(glob->errormsg, sizeof(glob->errormsg), "out of memory\n");
104        return GLOB_NO_MEM;
105      }
106      pat->content.Set.elements[pat->content.Set.size] =
107        strdup(glob->glob_buffer);
108      if(!pat->content.Set.elements[pat->content.Set.size]) {
109        short elem;
110        for(elem = 0; elem < pat->content.Set.size; elem++)
111          Curl_safefree(pat->content.Set.elements[elem]);
112        Curl_safefree(pat->content.Set.elements);
113        pat->content.Set.ptr_s = 0;
114        pat->content.Set.size = 0;
115        snprintf(glob->errormsg, sizeof(glob->errormsg), "out of memory\n");
116        return GLOB_NO_MEM;
117      }
118      ++pat->content.Set.size;
119
120      if(*pattern == '}') {
121        /* entire set pattern completed */
122        int wordamount;
123
124        /* always check for a literal (may be "") between patterns */
125        res = glob_word(glob, ++pattern, ++pos, &wordamount);
126        if(res) {
127          short elem;
128          for(elem = 0; elem < pat->content.Set.size; elem++)
129            Curl_safefree(pat->content.Set.elements[elem]);
130          Curl_safefree(pat->content.Set.elements);
131          pat->content.Set.ptr_s = 0;
132          pat->content.Set.size = 0;
133          return res;
134        }
135
136        *amount = pat->content.Set.size * wordamount;
137
138        done = TRUE;
139        continue;
140      }
141
142      buf = glob->glob_buffer;
143      ++pattern;
144      ++pos;
145      break;
146
147    case ']':                           /* illegal closing bracket */
148      snprintf(glob->errormsg, sizeof(glob->errormsg),
149               "illegal pattern at pos %zu\n", pos);
150      return GLOB_ERROR;
151
152    case '\\':                          /* escaped character, skip '\' */
153      if(pattern[1]) {
154        ++pattern;
155        ++pos;
156      }
157      /* intentional fallthrough */
158    default:
159      *buf++ = *pattern++;              /* copy character to set element */
160      ++pos;
161    }
162  }
163  return GLOB_OK;
164}
165
166static GlobCode glob_range(URLGlob *glob, char *pattern,
167                           size_t pos, int *amount)
168{
169  /* processes a range expression with the point behind the opening '['
170     - char range: e.g. "a-z]", "B-Q]"
171     - num range: e.g. "0-9]", "17-2000]"
172     - num range with leading zeros: e.g. "001-999]"
173     expression is checked for well-formedness and collected until the next ']'
174  */
175  URLPattern *pat;
176  char *c;
177  char sep;
178  char sep2;
179  int step;
180  int rc;
181  GlobCode res;
182  int wordamount = 1;
183
184  pat = &glob->pattern[glob->size / 2];
185  /* patterns 0,1,2,... correspond to size=1,3,5,... */
186  ++glob->size;
187
188  if(ISALPHA(*pattern)) {
189    /* character range detected */
190    char min_c;
191    char max_c;
192
193    pat->type = UPTCharRange;
194
195    rc = sscanf(pattern, "%c-%c%c%d%c", &min_c, &max_c, &sep, &step, &sep2);
196
197    if((rc < 3) || (min_c >= max_c) || ((max_c - min_c) > ('z' - 'a'))) {
198      /* the pattern is not well-formed */
199      snprintf(glob->errormsg, sizeof(glob->errormsg),
200               "error: bad range specification after pos %zu\n", pos);
201      return GLOB_ERROR;
202    }
203
204    /* check the (first) separating character */
205    if((sep != ']') && (sep != ':')) {
206      snprintf(glob->errormsg, sizeof(glob->errormsg),
207               "error: unsupported character (%c) after range at pos %zu\n",
208               sep, pos);
209      return GLOB_ERROR;
210    }
211
212    /* if there was a ":[num]" thing, use that as step or else use 1 */
213    pat->content.CharRange.step =
214      ((sep == ':') && (rc == 5) && (sep2 == ']')) ? step : 1;
215
216    pat->content.CharRange.ptr_c = pat->content.CharRange.min_c = min_c;
217    pat->content.CharRange.max_c = max_c;
218  }
219  else if(ISDIGIT(*pattern)) {
220    /* numeric range detected */
221    int min_n;
222    int max_n;
223
224    pat->type = UPTNumRange;
225    pat->content.NumRange.padlength = 0;
226
227    rc = sscanf(pattern, "%d-%d%c%d%c", &min_n, &max_n, &sep, &step, &sep2);
228
229    if((rc < 2) || (min_n > max_n)) {
230      /* the pattern is not well-formed */
231      snprintf(glob->errormsg, sizeof(glob->errormsg),
232               "error: bad range specification after pos %zu\n", pos);
233      return GLOB_ERROR;
234    }
235    pat->content.NumRange.ptr_n = pat->content.NumRange.min_n = min_n;
236    pat->content.NumRange.max_n = max_n;
237
238    /* if there was a ":[num]" thing, use that as step or else use 1 */
239    pat->content.NumRange.step =
240      ((sep == ':') && (rc == 5) && (sep2 == ']')) ? step : 1;
241
242    if(*pattern == '0') {
243      /* leading zero specified */
244      c = pattern;
245      while(ISDIGIT(*c)) {
246        c++;
247        ++pat->content.NumRange.padlength; /* padding length is set for all
248                                              instances of this pattern */
249      }
250    }
251  }
252  else {
253    snprintf(glob->errormsg, sizeof(glob->errormsg),
254             "illegal character in range specification at pos %zu\n", pos);
255    return GLOB_ERROR;
256  }
257
258  c = (char*)strchr(pattern, ']'); /* continue after next ']' */
259  if(c)
260    c++;
261  else {
262    snprintf(glob->errormsg, sizeof(glob->errormsg), "missing ']'");
263    return GLOB_ERROR; /* missing ']' */
264  }
265
266  /* always check for a literal (may be "") between patterns */
267
268  res = glob_word(glob, c, pos + (c - pattern), &wordamount);
269  if(res == GLOB_ERROR) {
270    wordamount = 1;
271    res = GLOB_OK;
272  }
273
274  if(!res) {
275    if(pat->type == UPTCharRange)
276      *amount = wordamount * (pat->content.CharRange.max_c -
277                              pat->content.CharRange.min_c + 1);
278    else
279      *amount = wordamount * (pat->content.NumRange.max_n -
280                              pat->content.NumRange.min_n + 1);
281  }
282
283  return res; /* GLOB_OK or GLOB_NO_MEM */
284}
285
286static GlobCode glob_word(URLGlob *glob, char *pattern,
287                          size_t pos, int *amount)
288{
289  /* processes a literal string component of a URL
290     special characters '{' and '[' branch to set/range processing functions
291   */
292  char* buf = glob->glob_buffer;
293  size_t litindex;
294  GlobCode res = GLOB_OK;
295
296  *amount = 1; /* default is one single string */
297
298  while(*pattern != '\0' && *pattern != '{' && *pattern != '[') {
299    if(*pattern == '}' || *pattern == ']') {
300      snprintf(glob->errormsg, sizeof(glob->errormsg),
301               "unmatched close brace/bracket at pos %zu\n", pos);
302      return GLOB_ERROR;
303    }
304
305    /* only allow \ to escape known "special letters" */
306    if(*pattern == '\\' &&
307        (*(pattern+1) == '{' || *(pattern+1) == '[' ||
308         *(pattern+1) == '}' || *(pattern+1) == ']') ) {
309
310      /* escape character, skip '\' */
311      ++pattern;
312      ++pos;
313    }
314    *buf++ = *pattern++; /* copy character to literal */
315    ++pos;
316  }
317  *buf = '\0';
318  litindex = glob->size / 2;
319  /* literals 0,1,2,... correspond to size=0,2,4,... */
320  glob->literal[litindex] = strdup(glob->glob_buffer);
321  if(!glob->literal[litindex]) {
322    snprintf(glob->errormsg, sizeof(glob->errormsg), "out of memory\n");
323    return GLOB_NO_MEM;
324  }
325  ++glob->size;
326
327  switch (*pattern) {
328  case '\0':
329    /* singular URL processed  */
330    break;
331
332  case '{':
333    /* process set pattern */
334    res = glob_set(glob, ++pattern, ++pos, amount);
335    break;
336
337  case '[':
338    /* process range pattern */
339    res = glob_range(glob, ++pattern, ++pos, amount);
340    break;
341  }
342
343  if(res)
344    Curl_safefree(glob->literal[litindex]);
345
346  return res;
347}
348
349int glob_url(URLGlob** glob, char* url, int *urlnum, FILE *error)
350{
351  /*
352   * We can deal with any-size, just make a buffer with the same length
353   * as the specified URL!
354   */
355  URLGlob *glob_expand;
356  int amount;
357  char *glob_buffer;
358  GlobCode res;
359
360  *glob = NULL;
361
362  glob_buffer = malloc(strlen(url) + 1);
363  if(!glob_buffer)
364    return CURLE_OUT_OF_MEMORY;
365
366  glob_expand = calloc(1, sizeof(URLGlob));
367  if(!glob_expand) {
368    Curl_safefree(glob_buffer);
369    return CURLE_OUT_OF_MEMORY;
370  }
371  glob_expand->size = 0;
372  glob_expand->urllen = strlen(url);
373  glob_expand->glob_buffer = glob_buffer;
374  glob_expand->beenhere = 0;
375
376  res = glob_word(glob_expand, url, 1, &amount);
377  if(!res)
378    *urlnum = amount;
379  else {
380    if(error && glob_expand->errormsg[0]) {
381      /* send error description to the error-stream */
382      fprintf(error, "curl: (%d) [globbing] %s",
383              (res == GLOB_NO_MEM) ? CURLE_OUT_OF_MEMORY : CURLE_URL_MALFORMAT,
384              glob_expand->errormsg);
385    }
386    /* it failed, we cleanup */
387    Curl_safefree(glob_buffer);
388    Curl_safefree(glob_expand);
389    *urlnum = 1;
390    return (res == GLOB_NO_MEM) ? CURLE_OUT_OF_MEMORY : CURLE_URL_MALFORMAT;
391  }
392
393  *glob = glob_expand;
394  return CURLE_OK;
395}
396
397void glob_cleanup(URLGlob* glob)
398{
399  size_t i;
400  int elem;
401
402  for(i = glob->size - 1; i < glob->size; --i) {
403    if(!(i & 1)) {     /* even indexes contain literals */
404      Curl_safefree(glob->literal[i/2]);
405    }
406    else {              /* odd indexes contain sets or ranges */
407      if((glob->pattern[i/2].type == UPTSet) &&
408         (glob->pattern[i/2].content.Set.elements)) {
409        for(elem = glob->pattern[i/2].content.Set.size - 1;
410             elem >= 0;
411             --elem) {
412          Curl_safefree(glob->pattern[i/2].content.Set.elements[elem]);
413        }
414        Curl_safefree(glob->pattern[i/2].content.Set.elements);
415      }
416    }
417  }
418  Curl_safefree(glob->glob_buffer);
419  Curl_safefree(glob);
420}
421
422int glob_next_url(char **globbed, URLGlob *glob)
423{
424  URLPattern *pat;
425  char *lit;
426  size_t i;
427  size_t j;
428  size_t len;
429  size_t buflen = glob->urllen + 1;
430  char *buf = glob->glob_buffer;
431
432  *globbed = NULL;
433
434  if(!glob->beenhere)
435    glob->beenhere = 1;
436  else {
437    bool carry = TRUE;
438
439    /* implement a counter over the index ranges of all patterns,
440       starting with the rightmost pattern */
441    for(i = glob->size / 2 - 1; carry && (i < glob->size); --i) {
442      carry = FALSE;
443      pat = &glob->pattern[i];
444      switch (pat->type) {
445      case UPTSet:
446        if((pat->content.Set.elements) &&
447           (++pat->content.Set.ptr_s == pat->content.Set.size)) {
448          pat->content.Set.ptr_s = 0;
449          carry = TRUE;
450        }
451        break;
452      case UPTCharRange:
453        pat->content.CharRange.ptr_c = (char)(pat->content.CharRange.step +
454                           (int)((unsigned char)pat->content.CharRange.ptr_c));
455        if(pat->content.CharRange.ptr_c > pat->content.CharRange.max_c) {
456          pat->content.CharRange.ptr_c = pat->content.CharRange.min_c;
457          carry = TRUE;
458        }
459        break;
460      case UPTNumRange:
461        pat->content.NumRange.ptr_n += pat->content.NumRange.step;
462        if(pat->content.NumRange.ptr_n > pat->content.NumRange.max_n) {
463          pat->content.NumRange.ptr_n = pat->content.NumRange.min_n;
464          carry = TRUE;
465        }
466        break;
467      default:
468        printf("internal error: invalid pattern type (%d)\n", (int)pat->type);
469        return CURLE_FAILED_INIT;
470      }
471    }
472    if(carry) {         /* first pattern ptr has run into overflow, done! */
473      /* TODO: verify if this should actally return CURLE_OK. */
474      return CURLE_OK; /* CURLE_OK to match previous behavior */
475    }
476  }
477
478  for(j = 0; j < glob->size; ++j) {
479    if(!(j&1)) {              /* every other term (j even) is a literal */
480      lit = glob->literal[j/2];
481      len = snprintf(buf, buflen, "%s", lit);
482      buf += len;
483      buflen -= len;
484    }
485    else {                              /* the rest (i odd) are patterns */
486      pat = &glob->pattern[j/2];
487      switch(pat->type) {
488      case UPTSet:
489        if(pat->content.Set.elements) {
490          len = strlen(pat->content.Set.elements[pat->content.Set.ptr_s]);
491          snprintf(buf, buflen, "%s",
492                   pat->content.Set.elements[pat->content.Set.ptr_s]);
493          buf += len;
494          buflen -= len;
495        }
496        break;
497      case UPTCharRange:
498        *buf++ = pat->content.CharRange.ptr_c;
499        break;
500      case UPTNumRange:
501        len = snprintf(buf, buflen, "%0*d",
502                       pat->content.NumRange.padlength,
503                       pat->content.NumRange.ptr_n);
504        buf += len;
505        buflen -= len;
506        break;
507      default:
508        printf("internal error: invalid pattern type (%d)\n", (int)pat->type);
509        return CURLE_FAILED_INIT;
510      }
511    }
512  }
513  *buf = '\0';
514
515  *globbed = strdup(glob->glob_buffer);
516  if(!*globbed)
517    return CURLE_OUT_OF_MEMORY;
518
519  return CURLE_OK;
520}
521
522int glob_match_url(char **result, char *filename, URLGlob *glob)
523{
524  char *target;
525  size_t allocsize;
526  char numbuf[18];
527  char *appendthis = NULL;
528  size_t appendlen = 0;
529  size_t stringlen = 0;
530
531  *result = NULL;
532
533  /* We cannot use the glob_buffer for storage here since the filename may
534   * be longer than the URL we use. We allocate a good start size, then
535   * we need to realloc in case of need.
536   */
537  allocsize = strlen(filename) + 1; /* make it at least one byte to store the
538                                       trailing zero */
539  target = malloc(allocsize);
540  if(!target)
541    return CURLE_OUT_OF_MEMORY;
542
543  while(*filename) {
544    if(*filename == '#' && ISDIGIT(filename[1])) {
545      unsigned long i;
546      char *ptr = filename;
547      unsigned long num = strtoul(&filename[1], &filename, 10);
548      i = num - 1UL;
549
550      if(num && (i <= glob->size / 2)) {
551        URLPattern pat = glob->pattern[i];
552        switch (pat.type) {
553        case UPTSet:
554          if(pat.content.Set.elements) {
555            appendthis = pat.content.Set.elements[pat.content.Set.ptr_s];
556            appendlen =
557              strlen(pat.content.Set.elements[pat.content.Set.ptr_s]);
558          }
559          break;
560        case UPTCharRange:
561          numbuf[0] = pat.content.CharRange.ptr_c;
562          numbuf[1] = 0;
563          appendthis = numbuf;
564          appendlen = 1;
565          break;
566        case UPTNumRange:
567          snprintf(numbuf, sizeof(numbuf), "%0*d",
568                   pat.content.NumRange.padlength,
569                   pat.content.NumRange.ptr_n);
570          appendthis = numbuf;
571          appendlen = strlen(numbuf);
572          break;
573        default:
574          printf("internal error: invalid pattern type (%d)\n",
575                 (int)pat.type);
576          Curl_safefree(target);
577          return CURLE_FAILED_INIT;
578        }
579      }
580      else {
581        /* #[num] out of range, use the #[num] in the output */
582        filename = ptr;
583        appendthis = filename++;
584        appendlen = 1;
585      }
586    }
587    else {
588      appendthis = filename++;
589      appendlen = 1;
590    }
591    if(appendlen + stringlen >= allocsize) {
592      char *newstr;
593      /* we append a single byte to allow for the trailing byte to be appended
594         at the end of this function outside the while() loop */
595      allocsize = (appendlen + stringlen) * 2;
596      newstr = realloc(target, allocsize + 1);
597      if(!newstr) {
598        Curl_safefree(target);
599        return CURLE_OUT_OF_MEMORY;
600      }
601      target = newstr;
602    }
603    memcpy(&target[stringlen], appendthis, appendlen);
604    stringlen += appendlen;
605  }
606  target[stringlen]= '\0';
607  *result = target;
608  return CURLE_OK;
609}
610
611