1/***************************************************************************
2 *                                  _   _ ____  _
3 *  Project                     ___| | | |  _ \| |
4 *                             / __| | | | |_) | |
5 *                            | (__| |_| |  _ <| |___
6 *                             \___|\___/|_| \_\_____|
7 *
8 * Copyright (C) 1998 - 2011, Daniel Stenberg, <daniel@haxx.se>, et al.
9 *
10 * This software is licensed as described in the file COPYING, which
11 * you should have received as part of this distribution. The terms
12 * are also available at http://curl.haxx.se/docs/copyright.html.
13 *
14 * You may opt to use, copy, modify, merge, publish, distribute and/or sell
15 * copies of the Software, and permit persons to whom the Software is
16 * furnished to do so, under the terms of the COPYING file.
17 *
18 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
19 * KIND, either express or implied.
20 *
21 ***************************************************************************/
22
23/* client-local setup.h */
24#include "setup.h"
25
26#include <stdio.h>
27#include <stdlib.h>
28#include <string.h>
29#include <ctype.h>
30#include <curl/curl.h>
31
32#define _MPRINTF_REPLACE /* we want curl-functions instead of native ones */
33#include <curl/mprintf.h>
34
35#include "urlglob.h"
36#include "os-specific.h"
37
38#if defined(CURLDEBUG) && defined(CURLTOOLDEBUG)
39#include "memdebug.h"
40#endif
41
42typedef enum {
43  GLOB_OK,
44  GLOB_ERROR
45} GlobCode;
46
47/*
48 * glob_word()
49 *
50 * Input a full globbed string, set the forth argument to the amount of
51 * strings we get out of this. Return GlobCode.
52 */
53static GlobCode glob_word(URLGlob *, /* object anchor */
54                          char *,    /* globbed string */
55                          size_t,       /* position */
56                          int *);    /* returned number of strings */
57
58static GlobCode glob_set(URLGlob *glob, char *pattern,
59                         size_t pos, int *amount)
60{
61  /* processes a set expression with the point behind the opening '{'
62     ','-separated elements are collected until the next closing '}'
63  */
64  bool done = FALSE;
65  char* buf = glob->glob_buffer;
66  URLPattern *pat;
67
68  pat = (URLPattern*)&glob->pattern[glob->size / 2];
69  /* patterns 0,1,2,... correspond to size=1,3,5,... */
70  pat->type = UPTSet;
71  pat->content.Set.size = 0;
72  pat->content.Set.ptr_s = 0;
73  pat->content.Set.elements = NULL;
74
75  ++glob->size;
76
77  while(!done) {
78    switch (*pattern) {
79    case '\0':                  /* URL ended while set was still open */
80      snprintf(glob->errormsg, sizeof(glob->errormsg),
81               "unmatched brace at pos %zu\n", pos);
82      return GLOB_ERROR;
83
84    case '{':
85    case '[':                   /* no nested expressions at this time */
86      snprintf(glob->errormsg, sizeof(glob->errormsg),
87               "nested braces not supported at pos %zu\n", pos);
88      return GLOB_ERROR;
89
90    case ',':
91    case '}':                           /* set element completed */
92      *buf = '\0';
93      if(pat->content.Set.elements)
94        pat->content.Set.elements =
95          realloc(pat->content.Set.elements,
96                  (pat->content.Set.size + 1) * sizeof(char*));
97      else
98        pat->content.Set.elements =
99          malloc((pat->content.Set.size + 1) * sizeof(char*));
100      if(!pat->content.Set.elements) {
101        snprintf(glob->errormsg, sizeof(glob->errormsg), "out of memory");
102        return GLOB_ERROR;
103      }
104      pat->content.Set.elements[pat->content.Set.size] =
105        strdup(glob->glob_buffer);
106      if(!pat->content.Set.elements[pat->content.Set.size]) {
107        snprintf(glob->errormsg, sizeof(glob->errormsg), "out of memory");
108        return GLOB_ERROR;
109      }
110      ++pat->content.Set.size;
111
112      if(*pattern == '}') {
113        /* entire set pattern completed */
114        int wordamount;
115
116        /* always check for a literal (may be "") between patterns */
117        if(GLOB_ERROR == glob_word(glob, ++pattern, ++pos, &wordamount))
118          return GLOB_ERROR;
119        *amount = pat->content.Set.size * wordamount;
120
121        done = TRUE;
122        continue;
123      }
124
125      buf = glob->glob_buffer;
126      ++pattern;
127      ++pos;
128      break;
129
130    case ']':                           /* illegal closing bracket */
131      snprintf(glob->errormsg, sizeof(glob->errormsg),
132               "illegal pattern at pos %zu\n", pos);
133      return GLOB_ERROR;
134
135    case '\\':                          /* escaped character, skip '\' */
136      if(pattern[1]) {
137        ++pattern;
138        ++pos;
139      }
140      /* intentional fallthrough */
141    default:
142      *buf++ = *pattern++;              /* copy character to set element */
143      ++pos;
144    }
145  }
146  return GLOB_OK;
147}
148
149static GlobCode glob_range(URLGlob *glob, char *pattern,
150                           size_t pos, int *amount)
151{
152  /* processes a range expression with the point behind the opening '['
153     - char range: e.g. "a-z]", "B-Q]"
154     - num range: e.g. "0-9]", "17-2000]"
155     - num range with leading zeros: e.g. "001-999]"
156     expression is checked for well-formedness and collected until the next ']'
157  */
158  URLPattern *pat;
159  char *c;
160  int wordamount=1;
161  char sep;
162  char sep2;
163  int step;
164  int rc;
165
166  pat = (URLPattern*)&glob->pattern[glob->size / 2];
167  /* patterns 0,1,2,... correspond to size=1,3,5,... */
168  ++glob->size;
169
170  if(ISALPHA(*pattern)) {         /* character range detected */
171    char min_c;
172    char max_c;
173
174    pat->type = UPTCharRange;
175    rc = sscanf(pattern, "%c-%c%c%d%c", &min_c, &max_c, &sep, &step, &sep2);
176    if((rc < 3) || (min_c >= max_c) || ((max_c - min_c) > ('z' - 'a'))) {
177      /* the pattern is not well-formed */
178      snprintf(glob->errormsg, sizeof(glob->errormsg),
179               "error: bad range specification after pos %zu\n", pos);
180      return GLOB_ERROR;
181    }
182
183    /* check the (first) separating character */
184    if((sep != ']') && (sep != ':')) {
185      snprintf(glob->errormsg, sizeof(glob->errormsg),
186               "error: unsupported character (%c) after range at pos %zu\n",
187               sep, pos);
188      return GLOB_ERROR;
189    }
190
191    /* if there was a ":[num]" thing, use that as step or else use 1 */
192    pat->content.CharRange.step =
193      ((sep == ':') && (rc == 5) && (sep2 == ']'))?step:1;
194
195    pat->content.CharRange.ptr_c = pat->content.CharRange.min_c = min_c;
196    pat->content.CharRange.max_c = max_c;
197  }
198  else if(ISDIGIT(*pattern)) { /* numeric range detected */
199    int min_n;
200    int max_n;
201
202    pat->type = UPTNumRange;
203    pat->content.NumRange.padlength = 0;
204
205    rc = sscanf(pattern, "%d-%d%c%d%c", &min_n, &max_n, &sep, &step, &sep2);
206
207    if((rc < 2) || (min_n > max_n)) {
208      /* the pattern is not well-formed */
209      snprintf(glob->errormsg, sizeof(glob->errormsg),
210               "error: bad range specification after pos %zu\n", pos);
211      return GLOB_ERROR;
212    }
213    pat->content.NumRange.ptr_n =  pat->content.NumRange.min_n = min_n;
214    pat->content.NumRange.max_n = max_n;
215
216    /* if there was a ":[num]" thing, use that as step or else use 1 */
217    pat->content.NumRange.step =
218      ((sep == ':') && (rc == 5) && (sep2 == ']'))?step:1;
219
220    if(*pattern == '0') {              /* leading zero specified */
221      c = pattern;
222      while(ISDIGIT(*c)) {
223        c++;
224        ++pat->content.NumRange.padlength; /* padding length is set for all
225                                              instances of this pattern */
226      }
227    }
228
229  }
230  else {
231    snprintf(glob->errormsg, sizeof(glob->errormsg),
232             "illegal character in range specification at pos %zu\n", pos);
233    return GLOB_ERROR;
234  }
235
236  c = (char*)strchr(pattern, ']'); /* continue after next ']' */
237  if(c)
238    c++;
239  else {
240    snprintf(glob->errormsg, sizeof(glob->errormsg), "missing ']'");
241    return GLOB_ERROR; /* missing ']' */
242  }
243
244  /* always check for a literal (may be "") between patterns */
245
246  if(GLOB_ERROR == glob_word(glob, c, pos + (c - pattern), &wordamount))
247    wordamount = 1;
248
249  if(pat->type == UPTCharRange)
250    *amount = (pat->content.CharRange.max_c -
251               pat->content.CharRange.min_c + 1) *
252      wordamount;
253  else
254    *amount = (pat->content.NumRange.max_n -
255               pat->content.NumRange.min_n + 1) * wordamount;
256
257  return GLOB_OK;
258}
259
260static GlobCode glob_word(URLGlob *glob, char *pattern,
261                          size_t pos, int *amount)
262{
263  /* processes a literal string component of a URL
264     special characters '{' and '[' branch to set/range processing functions
265   */
266  char* buf = glob->glob_buffer;
267  size_t litindex;
268  GlobCode res = GLOB_OK;
269
270  *amount = 1; /* default is one single string */
271
272  while(*pattern != '\0' && *pattern != '{' && *pattern != '[') {
273    if(*pattern == '}' || *pattern == ']') {
274      snprintf(glob->errormsg, sizeof(glob->errormsg),
275               "unmatched close brace/bracket at pos %zu\n", pos);
276      return GLOB_ERROR;
277    }
278
279    /* only allow \ to escape known "special letters" */
280    if(*pattern == '\\' &&
281        (*(pattern+1) == '{' || *(pattern+1) == '[' ||
282         *(pattern+1) == '}' || *(pattern+1) == ']') ) {
283
284      /* escape character, skip '\' */
285      ++pattern;
286      ++pos;
287    }
288    *buf++ = *pattern++;                /* copy character to literal */
289    ++pos;
290  }
291  *buf = '\0';
292  litindex = glob->size / 2;
293  /* literals 0,1,2,... correspond to size=0,2,4,... */
294  glob->literal[litindex] = strdup(glob->glob_buffer);
295  if(!glob->literal[litindex])
296    return GLOB_ERROR;
297  ++glob->size;
298
299  switch (*pattern) {
300  case '\0':
301    break;                      /* singular URL processed  */
302
303  case '{':
304    /* process set pattern */
305    res = glob_set(glob, ++pattern, ++pos, amount);
306    break;
307
308  case '[':
309    /* process range pattern */
310    res= glob_range(glob, ++pattern, ++pos, amount);
311    break;
312  }
313
314  if(GLOB_OK != res)
315    /* free that strdup'ed string again */
316    free(glob->literal[litindex]);
317
318  return res; /* something got wrong */
319}
320
321int glob_url(URLGlob** glob, char* url, int *urlnum, FILE *error)
322{
323  /*
324   * We can deal with any-size, just make a buffer with the same length
325   * as the specified URL!
326   */
327  URLGlob *glob_expand;
328  int amount;
329  char *glob_buffer = malloc(strlen(url)+1);
330
331  *glob = NULL;
332  if(NULL == glob_buffer)
333    return CURLE_OUT_OF_MEMORY;
334
335  glob_expand = calloc(1, sizeof(URLGlob));
336  if(NULL == glob_expand) {
337    free(glob_buffer);
338    return CURLE_OUT_OF_MEMORY;
339  }
340  glob_expand->size = 0;
341  glob_expand->urllen = strlen(url);
342  glob_expand->glob_buffer = glob_buffer;
343  glob_expand->beenhere=0;
344  if(GLOB_OK == glob_word(glob_expand, url, 1, &amount))
345    *urlnum = amount;
346  else {
347    if(error && glob_expand->errormsg[0]) {
348      /* send error description to the error-stream */
349      fprintf(error, "curl: (%d) [globbing] %s",
350              CURLE_URL_MALFORMAT, glob_expand->errormsg);
351    }
352    /* it failed, we cleanup */
353    free(glob_buffer);
354    free(glob_expand);
355    glob_expand = NULL;
356    *urlnum = 1;
357    return CURLE_URL_MALFORMAT;
358  }
359
360  *glob = glob_expand;
361  return CURLE_OK;
362}
363
364void glob_cleanup(URLGlob* glob)
365{
366  size_t i;
367  int elem;
368
369  for(i = glob->size - 1; i < glob->size; --i) {
370    if(!(i & 1)) {     /* even indexes contain literals */
371      free(glob->literal[i/2]);
372    }
373    else {              /* odd indexes contain sets or ranges */
374      if((glob->pattern[i/2].type == UPTSet) &&
375         (glob->pattern[i/2].content.Set.elements)) {
376        for(elem = glob->pattern[i/2].content.Set.size - 1;
377             elem >= 0;
378             --elem) {
379          if(glob->pattern[i/2].content.Set.elements[elem])
380            free(glob->pattern[i/2].content.Set.elements[elem]);
381        }
382        free(glob->pattern[i/2].content.Set.elements);
383      }
384    }
385  }
386  free(glob->glob_buffer);
387  free(glob);
388}
389
390char *glob_next_url(URLGlob *glob)
391{
392  char *buf = glob->glob_buffer;
393  URLPattern *pat;
394  char *lit;
395  size_t i;
396  size_t j;
397  size_t buflen = glob->urllen+1;
398  size_t len;
399
400  if(!glob->beenhere)
401    glob->beenhere = 1;
402  else {
403    bool carry = TRUE;
404
405    /* implement a counter over the index ranges of all patterns,
406       starting with the rightmost pattern */
407    for(i = glob->size / 2 - 1; carry && i < glob->size; --i) {
408      carry = FALSE;
409      pat = &glob->pattern[i];
410      switch (pat->type) {
411      case UPTSet:
412        if(++pat->content.Set.ptr_s == pat->content.Set.size) {
413          pat->content.Set.ptr_s = 0;
414          carry = TRUE;
415        }
416        break;
417      case UPTCharRange:
418        pat->content.CharRange.ptr_c = (char)(pat->content.CharRange.step +
419                           (int)((unsigned char)pat->content.CharRange.ptr_c));
420        if(pat->content.CharRange.ptr_c > pat->content.CharRange.max_c) {
421          pat->content.CharRange.ptr_c = pat->content.CharRange.min_c;
422          carry = TRUE;
423        }
424        break;
425      case UPTNumRange:
426        pat->content.NumRange.ptr_n += pat->content.NumRange.step;
427        if(pat->content.NumRange.ptr_n > pat->content.NumRange.max_n) {
428          pat->content.NumRange.ptr_n = pat->content.NumRange.min_n;
429          carry = TRUE;
430        }
431        break;
432      default:
433        printf("internal error: invalid pattern type (%d)\n", (int)pat->type);
434        exit (CURLE_FAILED_INIT);
435      }
436    }
437    if(carry)          /* first pattern ptr has run into overflow, done! */
438      return NULL;
439  }
440
441  for(j = 0; j < glob->size; ++j) {
442    if(!(j&1)) {              /* every other term (j even) is a literal */
443      lit = glob->literal[j/2];
444      len = snprintf(buf, buflen, "%s", lit);
445      buf += len;
446      buflen -= len;
447    }
448    else {                              /* the rest (i odd) are patterns */
449      pat = &glob->pattern[j/2];
450      switch(pat->type) {
451      case UPTSet:
452        len = strlen(pat->content.Set.elements[pat->content.Set.ptr_s]);
453        snprintf(buf, buflen, "%s",
454                 pat->content.Set.elements[pat->content.Set.ptr_s]);
455        buf += len;
456        buflen -= len;
457        break;
458      case UPTCharRange:
459        *buf++ = pat->content.CharRange.ptr_c;
460        break;
461      case UPTNumRange:
462        len = snprintf(buf, buflen, "%0*d",
463                       pat->content.NumRange.padlength,
464                       pat->content.NumRange.ptr_n);
465        buf += len;
466        buflen -= len;
467        break;
468      default:
469        printf("internal error: invalid pattern type (%d)\n", (int)pat->type);
470        exit (CURLE_FAILED_INIT);
471      }
472    }
473  }
474  *buf = '\0';
475  return strdup(glob->glob_buffer);
476}
477
478char *glob_match_url(char *filename, URLGlob *glob)
479{
480  char *target;
481  size_t allocsize;
482  size_t stringlen=0;
483  char numbuf[18];
484  char *appendthis = NULL;
485  size_t appendlen = 0;
486
487  /* We cannot use the glob_buffer for storage here since the filename may
488   * be longer than the URL we use. We allocate a good start size, then
489   * we need to realloc in case of need.
490   */
491  allocsize=strlen(filename)+1; /* make it at least one byte to store the
492                                   trailing zero */
493  target = malloc(allocsize);
494  if(NULL == target)
495    return NULL; /* major failure */
496
497  while(*filename) {
498    if(*filename == '#' && ISDIGIT(filename[1])) {
499      unsigned long i;
500      char *ptr = filename;
501      unsigned long num = strtoul(&filename[1], &filename, 10);
502      i = num-1;
503
504      if(num && (i <= glob->size / 2)) {
505        URLPattern pat = glob->pattern[i];
506        switch (pat.type) {
507        case UPTSet:
508          appendthis = pat.content.Set.elements[pat.content.Set.ptr_s];
509          appendlen = strlen(pat.content.Set.elements[pat.content.Set.ptr_s]);
510          break;
511        case UPTCharRange:
512          numbuf[0]=pat.content.CharRange.ptr_c;
513          numbuf[1]=0;
514          appendthis=numbuf;
515          appendlen=1;
516          break;
517        case UPTNumRange:
518          snprintf(numbuf, sizeof(numbuf), "%0*d",
519                   pat.content.NumRange.padlength,
520                   pat.content.NumRange.ptr_n);
521          appendthis = numbuf;
522          appendlen = strlen(numbuf);
523          break;
524        default:
525          printf("internal error: invalid pattern type (%d)\n",
526                 (int)pat.type);
527          free(target);
528          return NULL;
529        }
530      }
531      else {
532        /* #[num] out of range, use the #[num] in the output */
533        filename = ptr;
534        appendthis=filename++;
535        appendlen=1;
536      }
537    }
538    else {
539      appendthis=filename++;
540      appendlen=1;
541    }
542    if(appendlen + stringlen >= allocsize) {
543      char *newstr;
544      /* we append a single byte to allow for the trailing byte to be appended
545         at the end of this function outside the while() loop */
546      allocsize = (appendlen + stringlen)*2;
547      newstr=realloc(target, allocsize + 1);
548      if(NULL ==newstr) {
549        free(target);
550        return NULL;
551      }
552      target=newstr;
553    }
554    memcpy(&target[stringlen], appendthis, appendlen);
555    stringlen += appendlen;
556  }
557  target[stringlen]= '\0';
558  return target;
559}
560