1/*************************************************************************** 2 * _ _ ____ _ 3 * Project ___| | | | _ \| | 4 * / __| | | | |_) | | 5 * | (__| |_| | _ <| |___ 6 * \___|\___/|_| \_\_____| 7 * 8 * Copyright (C) 1998 - 2014, Daniel Stenberg, <daniel@haxx.se>, et al. 9 * 10 * This software is licensed as described in the file COPYING, which 11 * you should have received as part of this distribution. The terms 12 * are also available at http://curl.haxx.se/docs/copyright.html. 13 * 14 * You may opt to use, copy, modify, merge, publish, distribute and/or sell 15 * copies of the Software, and permit persons to whom the Software is 16 * furnished to do so, under the terms of the COPYING file. 17 * 18 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY 19 * KIND, either express or implied. 20 * 21 ***************************************************************************/ 22#include "tool_setup.h" 23 24#define _MPRINTF_REPLACE /* we want curl-functions instead of native ones */ 25#include <curl/mprintf.h> 26 27#include "tool_urlglob.h" 28#include "tool_vms.h" 29 30#include "memdebug.h" /* keep this as LAST include */ 31 32typedef enum { 33 GLOB_OK, 34 GLOB_NO_MEM = CURLE_OUT_OF_MEMORY, 35 GLOB_ERROR = CURLE_URL_MALFORMAT 36} GlobCode; 37 38#define GLOBERROR(string, column, code) \ 39 glob->error = string, glob->pos = column, code 40 41void glob_cleanup(URLGlob* glob); 42 43static GlobCode glob_fixed(URLGlob *glob, char *fixed, size_t len) 44{ 45 URLPattern *pat = &glob->pattern[glob->size]; 46 pat->type = UPTSet; 47 pat->content.Set.size = 1; 48 pat->content.Set.ptr_s = 0; 49 pat->globindex = -1; 50 51 pat->content.Set.elements = malloc(sizeof(char*)); 52 53 if(!pat->content.Set.elements) 54 return GLOBERROR("out of memory", 0, GLOB_NO_MEM); 55 56 pat->content.Set.elements[0] = malloc(len+1); 57 if(!pat->content.Set.elements[0]) 58 return GLOBERROR("out of memory", 0, GLOB_NO_MEM); 59 60 memcpy(pat->content.Set.elements[0], fixed, len); 61 pat->content.Set.elements[0][len] = 0; 62 63 return GLOB_OK; 64} 65 66/* multiply 67 * 68 * Multiplies and checks for overflow. 69 */ 70static int multiply(unsigned long *amount, long with) 71{ 72 unsigned long sum = *amount * with; 73 if(sum/with != *amount) 74 return 1; /* didn't fit, bail out */ 75 *amount = sum; 76 return 0; 77} 78 79static GlobCode glob_set(URLGlob *glob, char **patternp, 80 size_t *posp, unsigned long *amount, 81 int globindex) 82{ 83 /* processes a set expression with the point behind the opening '{' 84 ','-separated elements are collected until the next closing '}' 85 */ 86 URLPattern *pat; 87 bool done = FALSE; 88 char *buf = glob->glob_buffer; 89 char *pattern = *patternp; 90 char *opattern = pattern; 91 size_t opos = *posp-1; 92 93 pat = &glob->pattern[glob->size]; 94 /* patterns 0,1,2,... correspond to size=1,3,5,... */ 95 pat->type = UPTSet; 96 pat->content.Set.size = 0; 97 pat->content.Set.ptr_s = 0; 98 pat->content.Set.elements = NULL; 99 pat->globindex = globindex; 100 101 while(!done) { 102 switch (*pattern) { 103 case '\0': /* URL ended while set was still open */ 104 return GLOBERROR("unmatched brace", opos, GLOB_ERROR); 105 106 case '{': 107 case '[': /* no nested expressions at this time */ 108 return GLOBERROR("nested brace", *posp, GLOB_ERROR); 109 110 case '}': /* set element completed */ 111 if(opattern == pattern) 112 return GLOBERROR("empty string within braces", *posp, GLOB_ERROR); 113 114 /* add 1 to size since it'll be incremented below */ 115 if(multiply(amount, pat->content.Set.size+1)) 116 return GLOBERROR("range overflow", 0, GLOB_ERROR); 117 118 /* fall-through */ 119 case ',': 120 121 *buf = '\0'; 122 if(pat->content.Set.elements) { 123 char **new_arr = realloc(pat->content.Set.elements, 124 (pat->content.Set.size + 1) * sizeof(char*)); 125 if(!new_arr) 126 return GLOBERROR("out of memory", 0, GLOB_NO_MEM); 127 128 pat->content.Set.elements = new_arr; 129 } 130 else 131 pat->content.Set.elements = malloc(sizeof(char*)); 132 133 if(!pat->content.Set.elements) 134 return GLOBERROR("out of memory", 0, GLOB_NO_MEM); 135 136 pat->content.Set.elements[pat->content.Set.size] = 137 strdup(glob->glob_buffer); 138 if(!pat->content.Set.elements[pat->content.Set.size]) 139 return GLOBERROR("out of memory", 0, GLOB_NO_MEM); 140 ++pat->content.Set.size; 141 142 if(*pattern == '}') { 143 pattern++; /* pass the closing brace */ 144 done = TRUE; 145 continue; 146 } 147 148 buf = glob->glob_buffer; 149 ++pattern; 150 ++(*posp); 151 break; 152 153 case ']': /* illegal closing bracket */ 154 return GLOBERROR("unexpected close bracket", *posp, GLOB_ERROR); 155 156 case '\\': /* escaped character, skip '\' */ 157 if(pattern[1]) { 158 ++pattern; 159 ++(*posp); 160 } 161 /* intentional fallthrough */ 162 default: 163 *buf++ = *pattern++; /* copy character to set element */ 164 ++(*posp); 165 } 166 } 167 168 *patternp = pattern; /* return with the new position */ 169 return GLOB_OK; 170} 171 172static GlobCode glob_range(URLGlob *glob, char **patternp, 173 size_t *posp, unsigned long *amount, 174 int globindex) 175{ 176 /* processes a range expression with the point behind the opening '[' 177 - char range: e.g. "a-z]", "B-Q]" 178 - num range: e.g. "0-9]", "17-2000]" 179 - num range with leading zeros: e.g. "001-999]" 180 expression is checked for well-formedness and collected until the next ']' 181 */ 182 URLPattern *pat; 183 int rc; 184 char *pattern = *patternp; 185 char *c; 186 187 pat = &glob->pattern[glob->size]; 188 pat->globindex = globindex; 189 190 if(ISALPHA(*pattern)) { 191 /* character range detected */ 192 char min_c; 193 char max_c; 194 int step=1; 195 196 pat->type = UPTCharRange; 197 198 rc = sscanf(pattern, "%c-%c", &min_c, &max_c); 199 200 if((rc == 2) && (pattern[3] == ':')) { 201 char *endp; 202 unsigned long lstep; 203 errno = 0; 204 lstep = strtoul(&pattern[3], &endp, 10); 205 if(errno || (*endp != ']')) 206 step = -1; 207 else { 208 pattern = endp+1; 209 step = (int)lstep; 210 if(step > (max_c - min_c)) 211 step = -1; 212 } 213 } 214 else 215 pattern += 4; 216 217 *posp += (pattern - *patternp); 218 219 if((rc != 2) || (min_c >= max_c) || ((max_c - min_c) > ('z' - 'a')) || 220 (step < 0) ) 221 /* the pattern is not well-formed */ 222 return GLOBERROR("bad range", *posp, GLOB_ERROR); 223 224 /* if there was a ":[num]" thing, use that as step or else use 1 */ 225 pat->content.CharRange.step = step; 226 pat->content.CharRange.ptr_c = pat->content.CharRange.min_c = min_c; 227 pat->content.CharRange.max_c = max_c; 228 229 if(multiply(amount, (pat->content.CharRange.max_c - 230 pat->content.CharRange.min_c + 1))) 231 return GLOBERROR("range overflow", *posp, GLOB_ERROR); 232 } 233 else if(ISDIGIT(*pattern)) { 234 /* numeric range detected */ 235 unsigned long min_n; 236 unsigned long max_n = 0; 237 unsigned long step_n = 0; 238 char *endp; 239 240 pat->type = UPTNumRange; 241 pat->content.NumRange.padlength = 0; 242 243 if(*pattern == '0') { 244 /* leading zero specified, count them! */ 245 c = pattern; 246 while(ISDIGIT(*c)) { 247 c++; 248 ++pat->content.NumRange.padlength; /* padding length is set for all 249 instances of this pattern */ 250 } 251 } 252 253 errno = 0; 254 min_n = strtoul(pattern, &endp, 10); 255 if(errno || (endp == pattern)) 256 endp=NULL; 257 else { 258 if(*endp != '-') 259 endp = NULL; 260 else { 261 pattern = endp+1; 262 errno = 0; 263 max_n = strtoul(pattern, &endp, 10); 264 if(errno || (*endp == ':')) { 265 pattern = endp+1; 266 errno = 0; 267 step_n = strtoul(pattern, &endp, 10); 268 if(errno) 269 /* over/underflow situation */ 270 endp = NULL; 271 } 272 else 273 step_n = 1; 274 if(endp && (*endp == ']')) { 275 pattern= endp+1; 276 } 277 else 278 endp = NULL; 279 } 280 } 281 282 *posp += (pattern - *patternp); 283 284 if(!endp || (min_n > max_n) || (step_n > (max_n - min_n))) 285 /* the pattern is not well-formed */ 286 return GLOBERROR("bad range", *posp, GLOB_ERROR); 287 288 /* typecasting to ints are fine here since we make sure above that we 289 are within 31 bits */ 290 pat->content.NumRange.ptr_n = pat->content.NumRange.min_n = min_n; 291 pat->content.NumRange.max_n = max_n; 292 pat->content.NumRange.step = step_n; 293 294 if(multiply(amount, (pat->content.NumRange.max_n - 295 pat->content.NumRange.min_n + 1))) 296 return GLOBERROR("range overflow", *posp, GLOB_ERROR); 297 } 298 else 299 return GLOBERROR("bad range specification", *posp, GLOB_ERROR); 300 301 *patternp = pattern; 302 return GLOB_OK; 303} 304 305static bool peek_ipv6(const char *str, size_t *skip) 306{ 307 /* 308 * Scan for a potential IPv6 literal. 309 * - Valid globs contain a hyphen and <= 1 colon. 310 * - IPv6 literals contain no hyphens and >= 2 colons. 311 */ 312 size_t i = 0; 313 size_t colons = 0; 314 if(str[i++] != '[') { 315 return FALSE; 316 } 317 for(;;) { 318 const char c = str[i++]; 319 if(ISALNUM(c) || c == '.' || c == '%') { 320 /* ok */ 321 } 322 else if(c == ':') { 323 colons++; 324 } 325 else if(c == ']') { 326 *skip = i; 327 return colons >= 2 ? TRUE : FALSE; 328 } 329 else { 330 return FALSE; 331 } 332 } 333} 334 335static GlobCode glob_parse(URLGlob *glob, char *pattern, 336 size_t pos, unsigned long *amount) 337{ 338 /* processes a literal string component of a URL 339 special characters '{' and '[' branch to set/range processing functions 340 */ 341 GlobCode res = GLOB_OK; 342 int globindex = 0; /* count "actual" globs */ 343 344 *amount = 1; 345 346 while(*pattern && !res) { 347 char *buf = glob->glob_buffer; 348 size_t sublen = 0; 349 while(*pattern && *pattern != '{') { 350 if(*pattern == '[') { 351 /* Skip over potential IPv6 literals. */ 352 size_t skip; 353 if(peek_ipv6(pattern, &skip)) { 354 memcpy(buf, pattern, skip); 355 buf += skip; 356 pattern += skip; 357 sublen += skip; 358 continue; 359 } 360 break; 361 } 362 if(*pattern == '}' || *pattern == ']') 363 return GLOBERROR("unmatched close brace/bracket", pos, GLOB_ERROR); 364 365 /* only allow \ to escape known "special letters" */ 366 if(*pattern == '\\' && 367 (*(pattern+1) == '{' || *(pattern+1) == '[' || 368 *(pattern+1) == '}' || *(pattern+1) == ']') ) { 369 370 /* escape character, skip '\' */ 371 ++pattern; 372 ++pos; 373 } 374 *buf++ = *pattern++; /* copy character to literal */ 375 ++pos; 376 sublen++; 377 } 378 if(sublen) { 379 /* we got a literal string, add it as a single-item list */ 380 *buf = '\0'; 381 res = glob_fixed(glob, glob->glob_buffer, sublen); 382 } 383 else { 384 switch (*pattern) { 385 case '\0': /* done */ 386 break; 387 388 case '{': 389 /* process set pattern */ 390 pattern++; 391 pos++; 392 res = glob_set(glob, &pattern, &pos, amount, globindex++); 393 break; 394 395 case '[': 396 /* process range pattern */ 397 pattern++; 398 pos++; 399 res = glob_range(glob, &pattern, &pos, amount, globindex++); 400 break; 401 } 402 } 403 404 if(++glob->size > GLOB_PATTERN_NUM) 405 return GLOBERROR("too many globs", pos, GLOB_ERROR); 406 } 407 return res; 408} 409 410int glob_url(URLGlob** glob, char* url, unsigned long *urlnum, FILE *error) 411{ 412 /* 413 * We can deal with any-size, just make a buffer with the same length 414 * as the specified URL! 415 */ 416 URLGlob *glob_expand; 417 unsigned long amount = 0; 418 char *glob_buffer; 419 GlobCode res; 420 421 *glob = NULL; 422 423 glob_buffer = malloc(strlen(url) + 1); 424 if(!glob_buffer) 425 return CURLE_OUT_OF_MEMORY; 426 427 glob_expand = calloc(1, sizeof(URLGlob)); 428 if(!glob_expand) { 429 Curl_safefree(glob_buffer); 430 return CURLE_OUT_OF_MEMORY; 431 } 432 glob_expand->urllen = strlen(url); 433 glob_expand->glob_buffer = glob_buffer; 434 435 res = glob_parse(glob_expand, url, 1, &amount); 436 if(!res) 437 *urlnum = amount; 438 else { 439 if(error && glob_expand->error) { 440 char text[128]; 441 const char *t; 442 if(glob_expand->pos) { 443 snprintf(text, sizeof(text), "%s in column %zu", glob_expand->error, 444 glob_expand->pos); 445 t = text; 446 } 447 else 448 t = glob_expand->error; 449 450 /* send error description to the error-stream */ 451 fprintf(error, "curl: (%d) [globbing] %s\n", res, t); 452 } 453 /* it failed, we cleanup */ 454 glob_cleanup(glob_expand); 455 *urlnum = 1; 456 return res; 457 } 458 459 *glob = glob_expand; 460 return CURLE_OK; 461} 462 463void glob_cleanup(URLGlob* glob) 464{ 465 size_t i; 466 int elem; 467 468 /* the < condition is required since i underflows! */ 469 for(i = glob->size - 1; i < glob->size; --i) { 470 if((glob->pattern[i].type == UPTSet) && 471 (glob->pattern[i].content.Set.elements)) { 472 for(elem = glob->pattern[i].content.Set.size - 1; 473 elem >= 0; 474 --elem) { 475 Curl_safefree(glob->pattern[i].content.Set.elements[elem]); 476 } 477 Curl_safefree(glob->pattern[i].content.Set.elements); 478 } 479 } 480 Curl_safefree(glob->glob_buffer); 481 Curl_safefree(glob); 482} 483 484int glob_next_url(char **globbed, URLGlob *glob) 485{ 486 URLPattern *pat; 487 size_t i; 488 size_t j; 489 size_t len; 490 size_t buflen = glob->urllen + 1; 491 char *buf = glob->glob_buffer; 492 493 *globbed = NULL; 494 495 if(!glob->beenhere) 496 glob->beenhere = 1; 497 else { 498 bool carry = TRUE; 499 500 /* implement a counter over the index ranges of all patterns, 501 starting with the rightmost pattern */ 502 /* the < condition is required since i underflows! */ 503 for(i = glob->size - 1; carry && (i < glob->size); --i) { 504 carry = FALSE; 505 pat = &glob->pattern[i]; 506 switch (pat->type) { 507 case UPTSet: 508 if((pat->content.Set.elements) && 509 (++pat->content.Set.ptr_s == pat->content.Set.size)) { 510 pat->content.Set.ptr_s = 0; 511 carry = TRUE; 512 } 513 break; 514 case UPTCharRange: 515 pat->content.CharRange.ptr_c = (char)(pat->content.CharRange.step + 516 (int)((unsigned char)pat->content.CharRange.ptr_c)); 517 if(pat->content.CharRange.ptr_c > pat->content.CharRange.max_c) { 518 pat->content.CharRange.ptr_c = pat->content.CharRange.min_c; 519 carry = TRUE; 520 } 521 break; 522 case UPTNumRange: 523 pat->content.NumRange.ptr_n += pat->content.NumRange.step; 524 if(pat->content.NumRange.ptr_n > pat->content.NumRange.max_n) { 525 pat->content.NumRange.ptr_n = pat->content.NumRange.min_n; 526 carry = TRUE; 527 } 528 break; 529 default: 530 printf("internal error: invalid pattern type (%d)\n", (int)pat->type); 531 return CURLE_FAILED_INIT; 532 } 533 } 534 if(carry) { /* first pattern ptr has run into overflow, done! */ 535 /* TODO: verify if this should actally return CURLE_OK. */ 536 return CURLE_OK; /* CURLE_OK to match previous behavior */ 537 } 538 } 539 540 for(j = 0; j < glob->size; ++j) { 541 pat = &glob->pattern[j]; 542 switch(pat->type) { 543 case UPTSet: 544 if(pat->content.Set.elements) { 545 len = strlen(pat->content.Set.elements[pat->content.Set.ptr_s]); 546 snprintf(buf, buflen, "%s", 547 pat->content.Set.elements[pat->content.Set.ptr_s]); 548 buf += len; 549 buflen -= len; 550 } 551 break; 552 case UPTCharRange: 553 *buf++ = pat->content.CharRange.ptr_c; 554 break; 555 case UPTNumRange: 556 len = snprintf(buf, buflen, "%0*ld", 557 pat->content.NumRange.padlength, 558 pat->content.NumRange.ptr_n); 559 buf += len; 560 buflen -= len; 561 break; 562 default: 563 printf("internal error: invalid pattern type (%d)\n", (int)pat->type); 564 return CURLE_FAILED_INIT; 565 } 566 } 567 *buf = '\0'; 568 569 *globbed = strdup(glob->glob_buffer); 570 if(!*globbed) 571 return CURLE_OUT_OF_MEMORY; 572 573 return CURLE_OK; 574} 575 576int glob_match_url(char **result, char *filename, URLGlob *glob) 577{ 578 char *target; 579 size_t allocsize; 580 char numbuf[18]; 581 char *appendthis = NULL; 582 size_t appendlen = 0; 583 size_t stringlen = 0; 584 585 *result = NULL; 586 587 /* We cannot use the glob_buffer for storage here since the filename may 588 * be longer than the URL we use. We allocate a good start size, then 589 * we need to realloc in case of need. 590 */ 591 allocsize = strlen(filename) + 1; /* make it at least one byte to store the 592 trailing zero */ 593 target = malloc(allocsize); 594 if(!target) 595 return CURLE_OUT_OF_MEMORY; 596 597 while(*filename) { 598 if(*filename == '#' && ISDIGIT(filename[1])) { 599 unsigned long i; 600 char *ptr = filename; 601 unsigned long num = strtoul(&filename[1], &filename, 10); 602 URLPattern *pat =NULL; 603 604 if(num < glob->size) { 605 num--; /* make it zero based */ 606 /* find the correct glob entry */ 607 for(i=0; i<glob->size; i++) { 608 if(glob->pattern[i].globindex == (int)num) { 609 pat = &glob->pattern[i]; 610 break; 611 } 612 } 613 } 614 615 if(pat) { 616 switch (pat->type) { 617 case UPTSet: 618 if(pat->content.Set.elements) { 619 appendthis = pat->content.Set.elements[pat->content.Set.ptr_s]; 620 appendlen = 621 strlen(pat->content.Set.elements[pat->content.Set.ptr_s]); 622 } 623 break; 624 case UPTCharRange: 625 numbuf[0] = pat->content.CharRange.ptr_c; 626 numbuf[1] = 0; 627 appendthis = numbuf; 628 appendlen = 1; 629 break; 630 case UPTNumRange: 631 snprintf(numbuf, sizeof(numbuf), "%0*d", 632 pat->content.NumRange.padlength, 633 pat->content.NumRange.ptr_n); 634 appendthis = numbuf; 635 appendlen = strlen(numbuf); 636 break; 637 default: 638 fprintf(stderr, "internal error: invalid pattern type (%d)\n", 639 (int)pat->type); 640 Curl_safefree(target); 641 return CURLE_FAILED_INIT; 642 } 643 } 644 else { 645 /* #[num] out of range, use the #[num] in the output */ 646 filename = ptr; 647 appendthis = filename++; 648 appendlen = 1; 649 } 650 } 651 else { 652 appendthis = filename++; 653 appendlen = 1; 654 } 655 if(appendlen + stringlen >= allocsize) { 656 char *newstr; 657 /* we append a single byte to allow for the trailing byte to be appended 658 at the end of this function outside the while() loop */ 659 allocsize = (appendlen + stringlen) * 2; 660 newstr = realloc(target, allocsize + 1); 661 if(!newstr) { 662 Curl_safefree(target); 663 return CURLE_OUT_OF_MEMORY; 664 } 665 target = newstr; 666 } 667 memcpy(&target[stringlen], appendthis, appendlen); 668 stringlen += appendlen; 669 } 670 target[stringlen]= '\0'; 671 *result = target; 672 return CURLE_OK; 673} 674 675