1/*************************************************************************** 2 * _ _ ____ _ 3 * Project ___| | | | _ \| | 4 * / __| | | | |_) | | 5 * | (__| |_| | _ <| |___ 6 * \___|\___/|_| \_\_____| 7 * 8 * Copyright (C) 1998 - 2011, Daniel Stenberg, <daniel@haxx.se>, et al. 9 * 10 * This software is licensed as described in the file COPYING, which 11 * you should have received as part of this distribution. The terms 12 * are also available at http://curl.haxx.se/docs/copyright.html. 13 * 14 * You may opt to use, copy, modify, merge, publish, distribute and/or sell 15 * copies of the Software, and permit persons to whom the Software is 16 * furnished to do so, under the terms of the COPYING file. 17 * 18 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY 19 * KIND, either express or implied. 20 * 21 ***************************************************************************/ 22 23/* client-local setup.h */ 24#include "setup.h" 25 26#include <stdio.h> 27#include <stdlib.h> 28#include <string.h> 29#include <ctype.h> 30#include <curl/curl.h> 31 32#define _MPRINTF_REPLACE /* we want curl-functions instead of native ones */ 33#include <curl/mprintf.h> 34 35#include "urlglob.h" 36#include "os-specific.h" 37 38#if defined(CURLDEBUG) && defined(CURLTOOLDEBUG) 39#include "memdebug.h" 40#endif 41 42typedef enum { 43 GLOB_OK, 44 GLOB_ERROR 45} GlobCode; 46 47/* 48 * glob_word() 49 * 50 * Input a full globbed string, set the forth argument to the amount of 51 * strings we get out of this. Return GlobCode. 52 */ 53static GlobCode glob_word(URLGlob *, /* object anchor */ 54 char *, /* globbed string */ 55 size_t, /* position */ 56 int *); /* returned number of strings */ 57 58static GlobCode glob_set(URLGlob *glob, char *pattern, 59 size_t pos, int *amount) 60{ 61 /* processes a set expression with the point behind the opening '{' 62 ','-separated elements are collected until the next closing '}' 63 */ 64 bool done = FALSE; 65 char* buf = glob->glob_buffer; 66 URLPattern *pat; 67 68 pat = (URLPattern*)&glob->pattern[glob->size / 2]; 69 /* patterns 0,1,2,... correspond to size=1,3,5,... */ 70 pat->type = UPTSet; 71 pat->content.Set.size = 0; 72 pat->content.Set.ptr_s = 0; 73 pat->content.Set.elements = NULL; 74 75 ++glob->size; 76 77 while(!done) { 78 switch (*pattern) { 79 case '\0': /* URL ended while set was still open */ 80 snprintf(glob->errormsg, sizeof(glob->errormsg), 81 "unmatched brace at pos %zu\n", pos); 82 return GLOB_ERROR; 83 84 case '{': 85 case '[': /* no nested expressions at this time */ 86 snprintf(glob->errormsg, sizeof(glob->errormsg), 87 "nested braces not supported at pos %zu\n", pos); 88 return GLOB_ERROR; 89 90 case ',': 91 case '}': /* set element completed */ 92 *buf = '\0'; 93 if(pat->content.Set.elements) 94 pat->content.Set.elements = 95 realloc(pat->content.Set.elements, 96 (pat->content.Set.size + 1) * sizeof(char*)); 97 else 98 pat->content.Set.elements = 99 malloc((pat->content.Set.size + 1) * sizeof(char*)); 100 if(!pat->content.Set.elements) { 101 snprintf(glob->errormsg, sizeof(glob->errormsg), "out of memory"); 102 return GLOB_ERROR; 103 } 104 pat->content.Set.elements[pat->content.Set.size] = 105 strdup(glob->glob_buffer); 106 if(!pat->content.Set.elements[pat->content.Set.size]) { 107 snprintf(glob->errormsg, sizeof(glob->errormsg), "out of memory"); 108 return GLOB_ERROR; 109 } 110 ++pat->content.Set.size; 111 112 if(*pattern == '}') { 113 /* entire set pattern completed */ 114 int wordamount; 115 116 /* always check for a literal (may be "") between patterns */ 117 if(GLOB_ERROR == glob_word(glob, ++pattern, ++pos, &wordamount)) 118 return GLOB_ERROR; 119 *amount = pat->content.Set.size * wordamount; 120 121 done = TRUE; 122 continue; 123 } 124 125 buf = glob->glob_buffer; 126 ++pattern; 127 ++pos; 128 break; 129 130 case ']': /* illegal closing bracket */ 131 snprintf(glob->errormsg, sizeof(glob->errormsg), 132 "illegal pattern at pos %zu\n", pos); 133 return GLOB_ERROR; 134 135 case '\\': /* escaped character, skip '\' */ 136 if(pattern[1]) { 137 ++pattern; 138 ++pos; 139 } 140 /* intentional fallthrough */ 141 default: 142 *buf++ = *pattern++; /* copy character to set element */ 143 ++pos; 144 } 145 } 146 return GLOB_OK; 147} 148 149static GlobCode glob_range(URLGlob *glob, char *pattern, 150 size_t pos, int *amount) 151{ 152 /* processes a range expression with the point behind the opening '[' 153 - char range: e.g. "a-z]", "B-Q]" 154 - num range: e.g. "0-9]", "17-2000]" 155 - num range with leading zeros: e.g. "001-999]" 156 expression is checked for well-formedness and collected until the next ']' 157 */ 158 URLPattern *pat; 159 char *c; 160 int wordamount=1; 161 char sep; 162 char sep2; 163 int step; 164 int rc; 165 166 pat = (URLPattern*)&glob->pattern[glob->size / 2]; 167 /* patterns 0,1,2,... correspond to size=1,3,5,... */ 168 ++glob->size; 169 170 if(ISALPHA(*pattern)) { /* character range detected */ 171 char min_c; 172 char max_c; 173 174 pat->type = UPTCharRange; 175 rc = sscanf(pattern, "%c-%c%c%d%c", &min_c, &max_c, &sep, &step, &sep2); 176 if((rc < 3) || (min_c >= max_c) || ((max_c - min_c) > ('z' - 'a'))) { 177 /* the pattern is not well-formed */ 178 snprintf(glob->errormsg, sizeof(glob->errormsg), 179 "error: bad range specification after pos %zu\n", pos); 180 return GLOB_ERROR; 181 } 182 183 /* check the (first) separating character */ 184 if((sep != ']') && (sep != ':')) { 185 snprintf(glob->errormsg, sizeof(glob->errormsg), 186 "error: unsupported character (%c) after range at pos %zu\n", 187 sep, pos); 188 return GLOB_ERROR; 189 } 190 191 /* if there was a ":[num]" thing, use that as step or else use 1 */ 192 pat->content.CharRange.step = 193 ((sep == ':') && (rc == 5) && (sep2 == ']'))?step:1; 194 195 pat->content.CharRange.ptr_c = pat->content.CharRange.min_c = min_c; 196 pat->content.CharRange.max_c = max_c; 197 } 198 else if(ISDIGIT(*pattern)) { /* numeric range detected */ 199 int min_n; 200 int max_n; 201 202 pat->type = UPTNumRange; 203 pat->content.NumRange.padlength = 0; 204 205 rc = sscanf(pattern, "%d-%d%c%d%c", &min_n, &max_n, &sep, &step, &sep2); 206 207 if((rc < 2) || (min_n > max_n)) { 208 /* the pattern is not well-formed */ 209 snprintf(glob->errormsg, sizeof(glob->errormsg), 210 "error: bad range specification after pos %zu\n", pos); 211 return GLOB_ERROR; 212 } 213 pat->content.NumRange.ptr_n = pat->content.NumRange.min_n = min_n; 214 pat->content.NumRange.max_n = max_n; 215 216 /* if there was a ":[num]" thing, use that as step or else use 1 */ 217 pat->content.NumRange.step = 218 ((sep == ':') && (rc == 5) && (sep2 == ']'))?step:1; 219 220 if(*pattern == '0') { /* leading zero specified */ 221 c = pattern; 222 while(ISDIGIT(*c)) { 223 c++; 224 ++pat->content.NumRange.padlength; /* padding length is set for all 225 instances of this pattern */ 226 } 227 } 228 229 } 230 else { 231 snprintf(glob->errormsg, sizeof(glob->errormsg), 232 "illegal character in range specification at pos %zu\n", pos); 233 return GLOB_ERROR; 234 } 235 236 c = (char*)strchr(pattern, ']'); /* continue after next ']' */ 237 if(c) 238 c++; 239 else { 240 snprintf(glob->errormsg, sizeof(glob->errormsg), "missing ']'"); 241 return GLOB_ERROR; /* missing ']' */ 242 } 243 244 /* always check for a literal (may be "") between patterns */ 245 246 if(GLOB_ERROR == glob_word(glob, c, pos + (c - pattern), &wordamount)) 247 wordamount = 1; 248 249 if(pat->type == UPTCharRange) 250 *amount = (pat->content.CharRange.max_c - 251 pat->content.CharRange.min_c + 1) * 252 wordamount; 253 else 254 *amount = (pat->content.NumRange.max_n - 255 pat->content.NumRange.min_n + 1) * wordamount; 256 257 return GLOB_OK; 258} 259 260static GlobCode glob_word(URLGlob *glob, char *pattern, 261 size_t pos, int *amount) 262{ 263 /* processes a literal string component of a URL 264 special characters '{' and '[' branch to set/range processing functions 265 */ 266 char* buf = glob->glob_buffer; 267 size_t litindex; 268 GlobCode res = GLOB_OK; 269 270 *amount = 1; /* default is one single string */ 271 272 while(*pattern != '\0' && *pattern != '{' && *pattern != '[') { 273 if(*pattern == '}' || *pattern == ']') { 274 snprintf(glob->errormsg, sizeof(glob->errormsg), 275 "unmatched close brace/bracket at pos %zu\n", pos); 276 return GLOB_ERROR; 277 } 278 279 /* only allow \ to escape known "special letters" */ 280 if(*pattern == '\\' && 281 (*(pattern+1) == '{' || *(pattern+1) == '[' || 282 *(pattern+1) == '}' || *(pattern+1) == ']') ) { 283 284 /* escape character, skip '\' */ 285 ++pattern; 286 ++pos; 287 } 288 *buf++ = *pattern++; /* copy character to literal */ 289 ++pos; 290 } 291 *buf = '\0'; 292 litindex = glob->size / 2; 293 /* literals 0,1,2,... correspond to size=0,2,4,... */ 294 glob->literal[litindex] = strdup(glob->glob_buffer); 295 if(!glob->literal[litindex]) 296 return GLOB_ERROR; 297 ++glob->size; 298 299 switch (*pattern) { 300 case '\0': 301 break; /* singular URL processed */ 302 303 case '{': 304 /* process set pattern */ 305 res = glob_set(glob, ++pattern, ++pos, amount); 306 break; 307 308 case '[': 309 /* process range pattern */ 310 res= glob_range(glob, ++pattern, ++pos, amount); 311 break; 312 } 313 314 if(GLOB_OK != res) 315 /* free that strdup'ed string again */ 316 free(glob->literal[litindex]); 317 318 return res; /* something got wrong */ 319} 320 321int glob_url(URLGlob** glob, char* url, int *urlnum, FILE *error) 322{ 323 /* 324 * We can deal with any-size, just make a buffer with the same length 325 * as the specified URL! 326 */ 327 URLGlob *glob_expand; 328 int amount; 329 char *glob_buffer = malloc(strlen(url)+1); 330 331 *glob = NULL; 332 if(NULL == glob_buffer) 333 return CURLE_OUT_OF_MEMORY; 334 335 glob_expand = calloc(1, sizeof(URLGlob)); 336 if(NULL == glob_expand) { 337 free(glob_buffer); 338 return CURLE_OUT_OF_MEMORY; 339 } 340 glob_expand->size = 0; 341 glob_expand->urllen = strlen(url); 342 glob_expand->glob_buffer = glob_buffer; 343 glob_expand->beenhere=0; 344 if(GLOB_OK == glob_word(glob_expand, url, 1, &amount)) 345 *urlnum = amount; 346 else { 347 if(error && glob_expand->errormsg[0]) { 348 /* send error description to the error-stream */ 349 fprintf(error, "curl: (%d) [globbing] %s", 350 CURLE_URL_MALFORMAT, glob_expand->errormsg); 351 } 352 /* it failed, we cleanup */ 353 free(glob_buffer); 354 free(glob_expand); 355 glob_expand = NULL; 356 *urlnum = 1; 357 return CURLE_URL_MALFORMAT; 358 } 359 360 *glob = glob_expand; 361 return CURLE_OK; 362} 363 364void glob_cleanup(URLGlob* glob) 365{ 366 size_t i; 367 int elem; 368 369 for(i = glob->size - 1; i < glob->size; --i) { 370 if(!(i & 1)) { /* even indexes contain literals */ 371 free(glob->literal[i/2]); 372 } 373 else { /* odd indexes contain sets or ranges */ 374 if((glob->pattern[i/2].type == UPTSet) && 375 (glob->pattern[i/2].content.Set.elements)) { 376 for(elem = glob->pattern[i/2].content.Set.size - 1; 377 elem >= 0; 378 --elem) { 379 if(glob->pattern[i/2].content.Set.elements[elem]) 380 free(glob->pattern[i/2].content.Set.elements[elem]); 381 } 382 free(glob->pattern[i/2].content.Set.elements); 383 } 384 } 385 } 386 free(glob->glob_buffer); 387 free(glob); 388} 389 390char *glob_next_url(URLGlob *glob) 391{ 392 char *buf = glob->glob_buffer; 393 URLPattern *pat; 394 char *lit; 395 size_t i; 396 size_t j; 397 size_t buflen = glob->urllen+1; 398 size_t len; 399 400 if(!glob->beenhere) 401 glob->beenhere = 1; 402 else { 403 bool carry = TRUE; 404 405 /* implement a counter over the index ranges of all patterns, 406 starting with the rightmost pattern */ 407 for(i = glob->size / 2 - 1; carry && i < glob->size; --i) { 408 carry = FALSE; 409 pat = &glob->pattern[i]; 410 switch (pat->type) { 411 case UPTSet: 412 if(++pat->content.Set.ptr_s == pat->content.Set.size) { 413 pat->content.Set.ptr_s = 0; 414 carry = TRUE; 415 } 416 break; 417 case UPTCharRange: 418 pat->content.CharRange.ptr_c = (char)(pat->content.CharRange.step + 419 (int)((unsigned char)pat->content.CharRange.ptr_c)); 420 if(pat->content.CharRange.ptr_c > pat->content.CharRange.max_c) { 421 pat->content.CharRange.ptr_c = pat->content.CharRange.min_c; 422 carry = TRUE; 423 } 424 break; 425 case UPTNumRange: 426 pat->content.NumRange.ptr_n += pat->content.NumRange.step; 427 if(pat->content.NumRange.ptr_n > pat->content.NumRange.max_n) { 428 pat->content.NumRange.ptr_n = pat->content.NumRange.min_n; 429 carry = TRUE; 430 } 431 break; 432 default: 433 printf("internal error: invalid pattern type (%d)\n", (int)pat->type); 434 exit (CURLE_FAILED_INIT); 435 } 436 } 437 if(carry) /* first pattern ptr has run into overflow, done! */ 438 return NULL; 439 } 440 441 for(j = 0; j < glob->size; ++j) { 442 if(!(j&1)) { /* every other term (j even) is a literal */ 443 lit = glob->literal[j/2]; 444 len = snprintf(buf, buflen, "%s", lit); 445 buf += len; 446 buflen -= len; 447 } 448 else { /* the rest (i odd) are patterns */ 449 pat = &glob->pattern[j/2]; 450 switch(pat->type) { 451 case UPTSet: 452 len = strlen(pat->content.Set.elements[pat->content.Set.ptr_s]); 453 snprintf(buf, buflen, "%s", 454 pat->content.Set.elements[pat->content.Set.ptr_s]); 455 buf += len; 456 buflen -= len; 457 break; 458 case UPTCharRange: 459 *buf++ = pat->content.CharRange.ptr_c; 460 break; 461 case UPTNumRange: 462 len = snprintf(buf, buflen, "%0*d", 463 pat->content.NumRange.padlength, 464 pat->content.NumRange.ptr_n); 465 buf += len; 466 buflen -= len; 467 break; 468 default: 469 printf("internal error: invalid pattern type (%d)\n", (int)pat->type); 470 exit (CURLE_FAILED_INIT); 471 } 472 } 473 } 474 *buf = '\0'; 475 return strdup(glob->glob_buffer); 476} 477 478char *glob_match_url(char *filename, URLGlob *glob) 479{ 480 char *target; 481 size_t allocsize; 482 size_t stringlen=0; 483 char numbuf[18]; 484 char *appendthis = NULL; 485 size_t appendlen = 0; 486 487 /* We cannot use the glob_buffer for storage here since the filename may 488 * be longer than the URL we use. We allocate a good start size, then 489 * we need to realloc in case of need. 490 */ 491 allocsize=strlen(filename)+1; /* make it at least one byte to store the 492 trailing zero */ 493 target = malloc(allocsize); 494 if(NULL == target) 495 return NULL; /* major failure */ 496 497 while(*filename) { 498 if(*filename == '#' && ISDIGIT(filename[1])) { 499 unsigned long i; 500 char *ptr = filename; 501 unsigned long num = strtoul(&filename[1], &filename, 10); 502 i = num-1; 503 504 if(num && (i <= glob->size / 2)) { 505 URLPattern pat = glob->pattern[i]; 506 switch (pat.type) { 507 case UPTSet: 508 appendthis = pat.content.Set.elements[pat.content.Set.ptr_s]; 509 appendlen = strlen(pat.content.Set.elements[pat.content.Set.ptr_s]); 510 break; 511 case UPTCharRange: 512 numbuf[0]=pat.content.CharRange.ptr_c; 513 numbuf[1]=0; 514 appendthis=numbuf; 515 appendlen=1; 516 break; 517 case UPTNumRange: 518 snprintf(numbuf, sizeof(numbuf), "%0*d", 519 pat.content.NumRange.padlength, 520 pat.content.NumRange.ptr_n); 521 appendthis = numbuf; 522 appendlen = strlen(numbuf); 523 break; 524 default: 525 printf("internal error: invalid pattern type (%d)\n", 526 (int)pat.type); 527 free(target); 528 return NULL; 529 } 530 } 531 else { 532 /* #[num] out of range, use the #[num] in the output */ 533 filename = ptr; 534 appendthis=filename++; 535 appendlen=1; 536 } 537 } 538 else { 539 appendthis=filename++; 540 appendlen=1; 541 } 542 if(appendlen + stringlen >= allocsize) { 543 char *newstr; 544 /* we append a single byte to allow for the trailing byte to be appended 545 at the end of this function outside the while() loop */ 546 allocsize = (appendlen + stringlen)*2; 547 newstr=realloc(target, allocsize + 1); 548 if(NULL ==newstr) { 549 free(target); 550 return NULL; 551 } 552 target=newstr; 553 } 554 memcpy(&target[stringlen], appendthis, appendlen); 555 stringlen += appendlen; 556 } 557 target[stringlen]= '\0'; 558 return target; 559} 560