1/*************************************************************************** 2 * _ _ ____ _ 3 * Project ___| | | | _ \| | 4 * / __| | | | |_) | | 5 * | (__| |_| | _ <| |___ 6 * \___|\___/|_| \_\_____| 7 * 8 * Copyright (C) 1998 - 2011, Daniel Stenberg, <daniel@haxx.se>, et al. 9 * 10 * This software is licensed as described in the file COPYING, which 11 * you should have received as part of this distribution. The terms 12 * are also available at http://curl.haxx.se/docs/copyright.html. 13 * 14 * You may opt to use, copy, modify, merge, publish, distribute and/or sell 15 * copies of the Software, and permit persons to whom the Software is 16 * furnished to do so, under the terms of the COPYING file. 17 * 18 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY 19 * KIND, either express or implied. 20 * 21 ***************************************************************************/ 22#include "setup.h" 23 24#include <curl/curl.h> 25 26#define _MPRINTF_REPLACE /* we want curl-functions instead of native ones */ 27#include <curl/mprintf.h> 28 29#include "tool_urlglob.h" 30#include "tool_vms.h" 31 32#include "memdebug.h" /* keep this as LAST include */ 33 34typedef enum { 35 GLOB_OK, 36 GLOB_NO_MEM, 37 GLOB_ERROR 38} GlobCode; 39 40/* 41 * glob_word() 42 * 43 * Input a full globbed string, set the forth argument to the amount of 44 * strings we get out of this. Return GlobCode. 45 */ 46static GlobCode glob_word(URLGlob *, /* object anchor */ 47 char *, /* globbed string */ 48 size_t, /* position */ 49 int *); /* returned number of strings */ 50 51static GlobCode glob_set(URLGlob *glob, char *pattern, 52 size_t pos, int *amount) 53{ 54 /* processes a set expression with the point behind the opening '{' 55 ','-separated elements are collected until the next closing '}' 56 */ 57 URLPattern *pat; 58 GlobCode res; 59 bool done = FALSE; 60 char* buf = glob->glob_buffer; 61 62 pat = &glob->pattern[glob->size / 2]; 63 /* patterns 0,1,2,... correspond to size=1,3,5,... */ 64 pat->type = UPTSet; 65 pat->content.Set.size = 0; 66 pat->content.Set.ptr_s = 0; 67 pat->content.Set.elements = NULL; 68 69 ++glob->size; 70 71 while(!done) { 72 switch (*pattern) { 73 case '\0': /* URL ended while set was still open */ 74 snprintf(glob->errormsg, sizeof(glob->errormsg), 75 "unmatched brace at pos %zu\n", pos); 76 return GLOB_ERROR; 77 78 case '{': 79 case '[': /* no nested expressions at this time */ 80 snprintf(glob->errormsg, sizeof(glob->errormsg), 81 "nested braces not supported at pos %zu\n", pos); 82 return GLOB_ERROR; 83 84 case ',': 85 case '}': /* set element completed */ 86 *buf = '\0'; 87 if(pat->content.Set.elements) { 88 char **new_arr = realloc(pat->content.Set.elements, 89 (pat->content.Set.size + 1) * sizeof(char*)); 90 if(!new_arr) { 91 short elem; 92 for(elem = 0; elem < pat->content.Set.size; elem++) 93 Curl_safefree(pat->content.Set.elements[elem]); 94 Curl_safefree(pat->content.Set.elements); 95 pat->content.Set.ptr_s = 0; 96 pat->content.Set.size = 0; 97 } 98 pat->content.Set.elements = new_arr; 99 } 100 else 101 pat->content.Set.elements = malloc(sizeof(char*)); 102 if(!pat->content.Set.elements) { 103 snprintf(glob->errormsg, sizeof(glob->errormsg), "out of memory\n"); 104 return GLOB_NO_MEM; 105 } 106 pat->content.Set.elements[pat->content.Set.size] = 107 strdup(glob->glob_buffer); 108 if(!pat->content.Set.elements[pat->content.Set.size]) { 109 short elem; 110 for(elem = 0; elem < pat->content.Set.size; elem++) 111 Curl_safefree(pat->content.Set.elements[elem]); 112 Curl_safefree(pat->content.Set.elements); 113 pat->content.Set.ptr_s = 0; 114 pat->content.Set.size = 0; 115 snprintf(glob->errormsg, sizeof(glob->errormsg), "out of memory\n"); 116 return GLOB_NO_MEM; 117 } 118 ++pat->content.Set.size; 119 120 if(*pattern == '}') { 121 /* entire set pattern completed */ 122 int wordamount; 123 124 /* always check for a literal (may be "") between patterns */ 125 res = glob_word(glob, ++pattern, ++pos, &wordamount); 126 if(res) { 127 short elem; 128 for(elem = 0; elem < pat->content.Set.size; elem++) 129 Curl_safefree(pat->content.Set.elements[elem]); 130 Curl_safefree(pat->content.Set.elements); 131 pat->content.Set.ptr_s = 0; 132 pat->content.Set.size = 0; 133 return res; 134 } 135 136 *amount = pat->content.Set.size * wordamount; 137 138 done = TRUE; 139 continue; 140 } 141 142 buf = glob->glob_buffer; 143 ++pattern; 144 ++pos; 145 break; 146 147 case ']': /* illegal closing bracket */ 148 snprintf(glob->errormsg, sizeof(glob->errormsg), 149 "illegal pattern at pos %zu\n", pos); 150 return GLOB_ERROR; 151 152 case '\\': /* escaped character, skip '\' */ 153 if(pattern[1]) { 154 ++pattern; 155 ++pos; 156 } 157 /* intentional fallthrough */ 158 default: 159 *buf++ = *pattern++; /* copy character to set element */ 160 ++pos; 161 } 162 } 163 return GLOB_OK; 164} 165 166static GlobCode glob_range(URLGlob *glob, char *pattern, 167 size_t pos, int *amount) 168{ 169 /* processes a range expression with the point behind the opening '[' 170 - char range: e.g. "a-z]", "B-Q]" 171 - num range: e.g. "0-9]", "17-2000]" 172 - num range with leading zeros: e.g. "001-999]" 173 expression is checked for well-formedness and collected until the next ']' 174 */ 175 URLPattern *pat; 176 char *c; 177 char sep; 178 char sep2; 179 int step; 180 int rc; 181 GlobCode res; 182 int wordamount = 1; 183 184 pat = &glob->pattern[glob->size / 2]; 185 /* patterns 0,1,2,... correspond to size=1,3,5,... */ 186 ++glob->size; 187 188 if(ISALPHA(*pattern)) { 189 /* character range detected */ 190 char min_c; 191 char max_c; 192 193 pat->type = UPTCharRange; 194 195 rc = sscanf(pattern, "%c-%c%c%d%c", &min_c, &max_c, &sep, &step, &sep2); 196 197 if((rc < 3) || (min_c >= max_c) || ((max_c - min_c) > ('z' - 'a'))) { 198 /* the pattern is not well-formed */ 199 snprintf(glob->errormsg, sizeof(glob->errormsg), 200 "error: bad range specification after pos %zu\n", pos); 201 return GLOB_ERROR; 202 } 203 204 /* check the (first) separating character */ 205 if((sep != ']') && (sep != ':')) { 206 snprintf(glob->errormsg, sizeof(glob->errormsg), 207 "error: unsupported character (%c) after range at pos %zu\n", 208 sep, pos); 209 return GLOB_ERROR; 210 } 211 212 /* if there was a ":[num]" thing, use that as step or else use 1 */ 213 pat->content.CharRange.step = 214 ((sep == ':') && (rc == 5) && (sep2 == ']')) ? step : 1; 215 216 pat->content.CharRange.ptr_c = pat->content.CharRange.min_c = min_c; 217 pat->content.CharRange.max_c = max_c; 218 } 219 else if(ISDIGIT(*pattern)) { 220 /* numeric range detected */ 221 int min_n; 222 int max_n; 223 224 pat->type = UPTNumRange; 225 pat->content.NumRange.padlength = 0; 226 227 rc = sscanf(pattern, "%d-%d%c%d%c", &min_n, &max_n, &sep, &step, &sep2); 228 229 if((rc < 2) || (min_n > max_n)) { 230 /* the pattern is not well-formed */ 231 snprintf(glob->errormsg, sizeof(glob->errormsg), 232 "error: bad range specification after pos %zu\n", pos); 233 return GLOB_ERROR; 234 } 235 pat->content.NumRange.ptr_n = pat->content.NumRange.min_n = min_n; 236 pat->content.NumRange.max_n = max_n; 237 238 /* if there was a ":[num]" thing, use that as step or else use 1 */ 239 pat->content.NumRange.step = 240 ((sep == ':') && (rc == 5) && (sep2 == ']')) ? step : 1; 241 242 if(*pattern == '0') { 243 /* leading zero specified */ 244 c = pattern; 245 while(ISDIGIT(*c)) { 246 c++; 247 ++pat->content.NumRange.padlength; /* padding length is set for all 248 instances of this pattern */ 249 } 250 } 251 } 252 else { 253 snprintf(glob->errormsg, sizeof(glob->errormsg), 254 "illegal character in range specification at pos %zu\n", pos); 255 return GLOB_ERROR; 256 } 257 258 c = (char*)strchr(pattern, ']'); /* continue after next ']' */ 259 if(c) 260 c++; 261 else { 262 snprintf(glob->errormsg, sizeof(glob->errormsg), "missing ']'"); 263 return GLOB_ERROR; /* missing ']' */ 264 } 265 266 /* always check for a literal (may be "") between patterns */ 267 268 res = glob_word(glob, c, pos + (c - pattern), &wordamount); 269 if(res == GLOB_ERROR) { 270 wordamount = 1; 271 res = GLOB_OK; 272 } 273 274 if(!res) { 275 if(pat->type == UPTCharRange) 276 *amount = wordamount * (pat->content.CharRange.max_c - 277 pat->content.CharRange.min_c + 1); 278 else 279 *amount = wordamount * (pat->content.NumRange.max_n - 280 pat->content.NumRange.min_n + 1); 281 } 282 283 return res; /* GLOB_OK or GLOB_NO_MEM */ 284} 285 286static GlobCode glob_word(URLGlob *glob, char *pattern, 287 size_t pos, int *amount) 288{ 289 /* processes a literal string component of a URL 290 special characters '{' and '[' branch to set/range processing functions 291 */ 292 char* buf = glob->glob_buffer; 293 size_t litindex; 294 GlobCode res = GLOB_OK; 295 296 *amount = 1; /* default is one single string */ 297 298 while(*pattern != '\0' && *pattern != '{' && *pattern != '[') { 299 if(*pattern == '}' || *pattern == ']') { 300 snprintf(glob->errormsg, sizeof(glob->errormsg), 301 "unmatched close brace/bracket at pos %zu\n", pos); 302 return GLOB_ERROR; 303 } 304 305 /* only allow \ to escape known "special letters" */ 306 if(*pattern == '\\' && 307 (*(pattern+1) == '{' || *(pattern+1) == '[' || 308 *(pattern+1) == '}' || *(pattern+1) == ']') ) { 309 310 /* escape character, skip '\' */ 311 ++pattern; 312 ++pos; 313 } 314 *buf++ = *pattern++; /* copy character to literal */ 315 ++pos; 316 } 317 *buf = '\0'; 318 litindex = glob->size / 2; 319 /* literals 0,1,2,... correspond to size=0,2,4,... */ 320 glob->literal[litindex] = strdup(glob->glob_buffer); 321 if(!glob->literal[litindex]) { 322 snprintf(glob->errormsg, sizeof(glob->errormsg), "out of memory\n"); 323 return GLOB_NO_MEM; 324 } 325 ++glob->size; 326 327 switch (*pattern) { 328 case '\0': 329 /* singular URL processed */ 330 break; 331 332 case '{': 333 /* process set pattern */ 334 res = glob_set(glob, ++pattern, ++pos, amount); 335 break; 336 337 case '[': 338 /* process range pattern */ 339 res = glob_range(glob, ++pattern, ++pos, amount); 340 break; 341 } 342 343 if(res) 344 Curl_safefree(glob->literal[litindex]); 345 346 return res; 347} 348 349int glob_url(URLGlob** glob, char* url, int *urlnum, FILE *error) 350{ 351 /* 352 * We can deal with any-size, just make a buffer with the same length 353 * as the specified URL! 354 */ 355 URLGlob *glob_expand; 356 int amount; 357 char *glob_buffer; 358 GlobCode res; 359 360 *glob = NULL; 361 362 glob_buffer = malloc(strlen(url) + 1); 363 if(!glob_buffer) 364 return CURLE_OUT_OF_MEMORY; 365 366 glob_expand = calloc(1, sizeof(URLGlob)); 367 if(!glob_expand) { 368 Curl_safefree(glob_buffer); 369 return CURLE_OUT_OF_MEMORY; 370 } 371 glob_expand->size = 0; 372 glob_expand->urllen = strlen(url); 373 glob_expand->glob_buffer = glob_buffer; 374 glob_expand->beenhere = 0; 375 376 res = glob_word(glob_expand, url, 1, &amount); 377 if(!res) 378 *urlnum = amount; 379 else { 380 if(error && glob_expand->errormsg[0]) { 381 /* send error description to the error-stream */ 382 fprintf(error, "curl: (%d) [globbing] %s", 383 (res == GLOB_NO_MEM) ? CURLE_OUT_OF_MEMORY : CURLE_URL_MALFORMAT, 384 glob_expand->errormsg); 385 } 386 /* it failed, we cleanup */ 387 Curl_safefree(glob_buffer); 388 Curl_safefree(glob_expand); 389 *urlnum = 1; 390 return (res == GLOB_NO_MEM) ? CURLE_OUT_OF_MEMORY : CURLE_URL_MALFORMAT; 391 } 392 393 *glob = glob_expand; 394 return CURLE_OK; 395} 396 397void glob_cleanup(URLGlob* glob) 398{ 399 size_t i; 400 int elem; 401 402 for(i = glob->size - 1; i < glob->size; --i) { 403 if(!(i & 1)) { /* even indexes contain literals */ 404 Curl_safefree(glob->literal[i/2]); 405 } 406 else { /* odd indexes contain sets or ranges */ 407 if((glob->pattern[i/2].type == UPTSet) && 408 (glob->pattern[i/2].content.Set.elements)) { 409 for(elem = glob->pattern[i/2].content.Set.size - 1; 410 elem >= 0; 411 --elem) { 412 Curl_safefree(glob->pattern[i/2].content.Set.elements[elem]); 413 } 414 Curl_safefree(glob->pattern[i/2].content.Set.elements); 415 } 416 } 417 } 418 Curl_safefree(glob->glob_buffer); 419 Curl_safefree(glob); 420} 421 422int glob_next_url(char **globbed, URLGlob *glob) 423{ 424 URLPattern *pat; 425 char *lit; 426 size_t i; 427 size_t j; 428 size_t len; 429 size_t buflen = glob->urllen + 1; 430 char *buf = glob->glob_buffer; 431 432 *globbed = NULL; 433 434 if(!glob->beenhere) 435 glob->beenhere = 1; 436 else { 437 bool carry = TRUE; 438 439 /* implement a counter over the index ranges of all patterns, 440 starting with the rightmost pattern */ 441 for(i = glob->size / 2 - 1; carry && (i < glob->size); --i) { 442 carry = FALSE; 443 pat = &glob->pattern[i]; 444 switch (pat->type) { 445 case UPTSet: 446 if((pat->content.Set.elements) && 447 (++pat->content.Set.ptr_s == pat->content.Set.size)) { 448 pat->content.Set.ptr_s = 0; 449 carry = TRUE; 450 } 451 break; 452 case UPTCharRange: 453 pat->content.CharRange.ptr_c = (char)(pat->content.CharRange.step + 454 (int)((unsigned char)pat->content.CharRange.ptr_c)); 455 if(pat->content.CharRange.ptr_c > pat->content.CharRange.max_c) { 456 pat->content.CharRange.ptr_c = pat->content.CharRange.min_c; 457 carry = TRUE; 458 } 459 break; 460 case UPTNumRange: 461 pat->content.NumRange.ptr_n += pat->content.NumRange.step; 462 if(pat->content.NumRange.ptr_n > pat->content.NumRange.max_n) { 463 pat->content.NumRange.ptr_n = pat->content.NumRange.min_n; 464 carry = TRUE; 465 } 466 break; 467 default: 468 printf("internal error: invalid pattern type (%d)\n", (int)pat->type); 469 return CURLE_FAILED_INIT; 470 } 471 } 472 if(carry) { /* first pattern ptr has run into overflow, done! */ 473 /* TODO: verify if this should actally return CURLE_OK. */ 474 return CURLE_OK; /* CURLE_OK to match previous behavior */ 475 } 476 } 477 478 for(j = 0; j < glob->size; ++j) { 479 if(!(j&1)) { /* every other term (j even) is a literal */ 480 lit = glob->literal[j/2]; 481 len = snprintf(buf, buflen, "%s", lit); 482 buf += len; 483 buflen -= len; 484 } 485 else { /* the rest (i odd) are patterns */ 486 pat = &glob->pattern[j/2]; 487 switch(pat->type) { 488 case UPTSet: 489 if(pat->content.Set.elements) { 490 len = strlen(pat->content.Set.elements[pat->content.Set.ptr_s]); 491 snprintf(buf, buflen, "%s", 492 pat->content.Set.elements[pat->content.Set.ptr_s]); 493 buf += len; 494 buflen -= len; 495 } 496 break; 497 case UPTCharRange: 498 *buf++ = pat->content.CharRange.ptr_c; 499 break; 500 case UPTNumRange: 501 len = snprintf(buf, buflen, "%0*d", 502 pat->content.NumRange.padlength, 503 pat->content.NumRange.ptr_n); 504 buf += len; 505 buflen -= len; 506 break; 507 default: 508 printf("internal error: invalid pattern type (%d)\n", (int)pat->type); 509 return CURLE_FAILED_INIT; 510 } 511 } 512 } 513 *buf = '\0'; 514 515 *globbed = strdup(glob->glob_buffer); 516 if(!*globbed) 517 return CURLE_OUT_OF_MEMORY; 518 519 return CURLE_OK; 520} 521 522int glob_match_url(char **result, char *filename, URLGlob *glob) 523{ 524 char *target; 525 size_t allocsize; 526 char numbuf[18]; 527 char *appendthis = NULL; 528 size_t appendlen = 0; 529 size_t stringlen = 0; 530 531 *result = NULL; 532 533 /* We cannot use the glob_buffer for storage here since the filename may 534 * be longer than the URL we use. We allocate a good start size, then 535 * we need to realloc in case of need. 536 */ 537 allocsize = strlen(filename) + 1; /* make it at least one byte to store the 538 trailing zero */ 539 target = malloc(allocsize); 540 if(!target) 541 return CURLE_OUT_OF_MEMORY; 542 543 while(*filename) { 544 if(*filename == '#' && ISDIGIT(filename[1])) { 545 unsigned long i; 546 char *ptr = filename; 547 unsigned long num = strtoul(&filename[1], &filename, 10); 548 i = num - 1UL; 549 550 if(num && (i <= glob->size / 2)) { 551 URLPattern pat = glob->pattern[i]; 552 switch (pat.type) { 553 case UPTSet: 554 if(pat.content.Set.elements) { 555 appendthis = pat.content.Set.elements[pat.content.Set.ptr_s]; 556 appendlen = 557 strlen(pat.content.Set.elements[pat.content.Set.ptr_s]); 558 } 559 break; 560 case UPTCharRange: 561 numbuf[0] = pat.content.CharRange.ptr_c; 562 numbuf[1] = 0; 563 appendthis = numbuf; 564 appendlen = 1; 565 break; 566 case UPTNumRange: 567 snprintf(numbuf, sizeof(numbuf), "%0*d", 568 pat.content.NumRange.padlength, 569 pat.content.NumRange.ptr_n); 570 appendthis = numbuf; 571 appendlen = strlen(numbuf); 572 break; 573 default: 574 printf("internal error: invalid pattern type (%d)\n", 575 (int)pat.type); 576 Curl_safefree(target); 577 return CURLE_FAILED_INIT; 578 } 579 } 580 else { 581 /* #[num] out of range, use the #[num] in the output */ 582 filename = ptr; 583 appendthis = filename++; 584 appendlen = 1; 585 } 586 } 587 else { 588 appendthis = filename++; 589 appendlen = 1; 590 } 591 if(appendlen + stringlen >= allocsize) { 592 char *newstr; 593 /* we append a single byte to allow for the trailing byte to be appended 594 at the end of this function outside the while() loop */ 595 allocsize = (appendlen + stringlen) * 2; 596 newstr = realloc(target, allocsize + 1); 597 if(!newstr) { 598 Curl_safefree(target); 599 return CURLE_OUT_OF_MEMORY; 600 } 601 target = newstr; 602 } 603 memcpy(&target[stringlen], appendthis, appendlen); 604 stringlen += appendlen; 605 } 606 target[stringlen]= '\0'; 607 *result = target; 608 return CURLE_OK; 609} 610 611