fetch.c revision 70275
1/*- 2 * Copyright (c) 2000 Dag-Erling Co�dan Sm�rgrav 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer 10 * in this position and unchanged. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 3. The name of the author may not be used to endorse or promote products 15 * derived from this software without specific prior written permission 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 * 28 * $FreeBSD: head/usr.bin/fetch/fetch.c 70275 2000-12-22 18:23:19Z des $ 29 */ 30 31#include <sys/param.h> 32#include <sys/stat.h> 33#include <sys/socket.h> 34 35#include <ctype.h> 36#include <err.h> 37#include <errno.h> 38#include <signal.h> 39#include <stdio.h> 40#include <stdlib.h> 41#include <string.h> 42#include <sysexits.h> 43#include <unistd.h> 44 45#include <fetch.h> 46 47#define MINBUFSIZE 4096 48 49/* Option flags */ 50int A_flag; /* -A: do not follow 302 redirects */ 51int a_flag; /* -a: auto retry */ 52size_t B_size; /* -B: buffer size */ 53int b_flag; /*! -b: workaround TCP bug */ 54char *c_dirname; /* -c: remote directory */ 55int d_flag; /* -d: direct connection */ 56int F_flag; /* -F: restart without checking mtime */ 57char *f_filename; /* -f: file to fetch */ 58int H_flag; /* -H: use high port */ 59char *h_hostname; /* -h: host to fetch from */ 60int l_flag; /* -l: link rather than copy file: URLs */ 61int m_flag; /* -[Mm]: mirror mode */ 62int n_flag; /* -n: do not preserve modification time */ 63int o_flag; /* -o: specify output file */ 64int o_directory; /* output file is a directory */ 65char *o_filename; /* name of output file */ 66int o_stdout; /* output file is stdout */ 67int once_flag; /* -1: stop at first successful file */ 68int p_flag; /* -[Pp]: use passive FTP */ 69int R_flag; /* -R: don't delete partially transferred files */ 70int r_flag; /* -r: restart previously interrupted transfer */ 71u_int T_secs = 0; /* -T: transfer timeout in seconds */ 72int s_flag; /* -s: show size, don't fetch */ 73off_t S_size; /* -S: require size to match */ 74int t_flag; /*! -t: workaround TCP bug */ 75int v_level = 1; /* -v: verbosity level */ 76int v_tty; /* stdout is a tty */ 77u_int w_secs; /* -w: retry delay */ 78int family = PF_UNSPEC; /* -[46]: address family to use */ 79 80int sigalrm; /* SIGALRM received */ 81int sigint; /* SIGINT received */ 82 83u_int ftp_timeout; /* default timeout for FTP transfers */ 84u_int http_timeout; /* default timeout for HTTP transfers */ 85u_char *buf; /* transfer buffer */ 86 87 88void 89sig_handler(int sig) 90{ 91 switch (sig) { 92 case SIGALRM: 93 sigalrm = 1; 94 break; 95 case SIGINT: 96 sigint = 1; 97 break; 98 } 99} 100 101struct xferstat { 102 char name[40]; 103 struct timeval start; 104 struct timeval end; 105 struct timeval last; 106 off_t size; 107 off_t offset; 108 off_t rcvd; 109}; 110 111void 112stat_display(struct xferstat *xs, int force) 113{ 114 struct timeval now; 115 116 if (!v_tty || !v_level) 117 return; 118 119 gettimeofday(&now, NULL); 120 if (!force && now.tv_sec <= xs->last.tv_sec) 121 return; 122 xs->last = now; 123 124 fprintf(stderr, "\rReceiving %s", xs->name); 125 if (xs->size == -1) 126 fprintf(stderr, ": %lld bytes", xs->rcvd); 127 else 128 fprintf(stderr, " (%lld bytes): %d%%", xs->size, 129 (int)((100.0 * xs->rcvd) / xs->size)); 130} 131 132void 133stat_start(struct xferstat *xs, char *name, off_t size, off_t offset) 134{ 135 snprintf(xs->name, sizeof xs->name, "%s", name); 136 gettimeofday(&xs->start, NULL); 137 xs->last.tv_sec = xs->last.tv_usec = 0; 138 xs->end = xs->last; 139 xs->size = size; 140 xs->offset = offset; 141 xs->rcvd = offset; 142 stat_display(xs, 1); 143} 144 145void 146stat_update(struct xferstat *xs, off_t rcvd, int force) 147{ 148 xs->rcvd = rcvd; 149 stat_display(xs, 0); 150} 151 152void 153stat_end(struct xferstat *xs) 154{ 155 double delta; 156 double bps; 157 158 if (!v_level) 159 return; 160 161 gettimeofday(&xs->end, NULL); 162 163 stat_display(xs, 1); 164 fputc('\n', stderr); 165 delta = (xs->end.tv_sec + (xs->end.tv_usec / 1.e6)) 166 - (xs->start.tv_sec + (xs->start.tv_usec / 1.e6)); 167 fprintf(stderr, "%lld bytes transferred in %.1f seconds ", 168 xs->rcvd - xs->offset, delta); 169 bps = (xs->rcvd - xs->offset) / delta; 170 if (bps > 1024*1024) 171 fprintf(stderr, "(%.2f MBps)\n", bps / (1024*1024)); 172 else if (bps > 1024) 173 fprintf(stderr, "(%.2f kBps)\n", bps / 1024); 174 else 175 fprintf(stderr, "(%.2f Bps)\n", bps); 176} 177 178int 179fetch(char *URL, char *path) 180{ 181 struct url *url; 182 struct url_stat us; 183 struct stat sb; 184 struct xferstat xs; 185 FILE *f, *of; 186 size_t size; 187 off_t count; 188 char flags[8]; 189 int n, r; 190 u_int timeout; 191 192 f = of = NULL; 193 194 /* parse URL */ 195 if ((url = fetchParseURL(URL)) == NULL) { 196 warnx("%s: parse error", URL); 197 goto failure; 198 } 199 200 /* if no scheme was specified, take a guess */ 201 if (!*url->scheme) { 202 if (!*url->host) 203 strcpy(url->scheme, SCHEME_FILE); 204 else if (strncasecmp(url->host, "ftp.", 4)) 205 strcpy(url->scheme, SCHEME_FTP); 206 else if (strncasecmp(url->host, "www.", 4)) 207 strcpy(url->scheme, SCHEME_HTTP); 208 } 209 210 timeout = 0; 211 *flags = 0; 212 count = 0; 213 214 /* common flags */ 215 if (v_level > 1) 216 strcat(flags, "v"); 217 switch (family) { 218 case PF_INET: 219 strcat(flags, "4"); 220 break; 221 case PF_INET6: 222 strcat(flags, "6"); 223 break; 224 } 225 226 /* FTP specific flags */ 227 if (strcmp(url->scheme, "ftp") == 0) { 228 if (p_flag) 229 strcat(flags, "p"); 230 if (d_flag) 231 strcat(flags, "d"); 232 if (H_flag) 233 strcat(flags, "h"); 234 timeout = T_secs ? T_secs : ftp_timeout; 235 } 236 237 /* HTTP specific flags */ 238 if (strcmp(url->scheme, "http") == 0) { 239 if (d_flag) 240 strcat(flags, "d"); 241 if (A_flag) 242 strcat(flags, "A"); 243 timeout = T_secs ? T_secs : http_timeout; 244 } 245 246 /* set the protocol timeout. */ 247 fetchTimeout = timeout; 248 249 /* just print size */ 250 if (s_flag) { 251 if (fetchStat(url, &us, flags) == -1) 252 goto failure; 253 if (us.size == -1) 254 printf("Unknown\n"); 255 else 256 printf("%lld\n", us.size); 257 goto success; 258 } 259 260 /* 261 * If the -r flag was specified, we have to compare the local and 262 * remote files, so we should really do a fetchStat() first, but I 263 * know of at least one HTTP server that only sends the content 264 * size in response to GET requests, and leaves it out of replies 265 * to HEAD requests. Also, in the (frequent) case that the local 266 * and remote files match but the local file is truncated, we have 267 * sufficient information *before* the compare to issue a correct 268 * request. Therefore, we always issue a GET request as if we were 269 * sure the local file was a truncated copy of the remote file; we 270 * can drop the connection later if we change our minds. 271 */ 272 if ((r_flag || m_flag) && !o_stdout && stat(path, &sb) != -1) { 273 if (r_flag) 274 url->offset = sb.st_size; 275 } else { 276 sb.st_size = -1; 277 } 278 279 /* start the transfer */ 280 if ((f = fetchXGet(url, &us, flags)) == NULL) { 281 warnx("%s: %s", path, fetchLastErrString); 282 goto failure; 283 } 284 if (sigint) 285 goto signal; 286 287 /* check that size is as expected */ 288 if (S_size) { 289 if (us.size == -1) { 290 warnx("%s: size unknown", path); 291 goto failure; 292 } else if (us.size != S_size) { 293 warnx("%s: size mismatch: expected %lld, actual %lld", 294 path, S_size, us.size); 295 goto failure; 296 } 297 } 298 299 /* symlink instead of copy */ 300 if (l_flag && strcmp(url->scheme, "file") == 0 && !o_stdout) { 301 if (symlink(url->doc, path) == -1) { 302 warn("%s: symlink()", path); 303 goto failure; 304 } 305 goto success; 306 } 307 308 if (us.size == -1) 309 warnx("%s: size of remote file is not known", path); 310 if (v_level > 1) { 311 if (sb.st_size != -1) 312 fprintf(stderr, "local size / mtime: %lld / %ld\n", 313 sb.st_size, sb.st_mtime); 314 if (us.size != -1) 315 fprintf(stderr, "remote size / mtime: %lld / %ld\n", 316 us.size, us.mtime); 317 } 318 319 /* open output file */ 320 if (o_stdout) { 321 /* output to stdout */ 322 of = stdout; 323 } else if (sb.st_size != -1) { 324 /* resume mode, local file exists */ 325 if (!F_flag && us.mtime && sb.st_mtime != us.mtime) { 326 /* no match! have to refetch */ 327 fclose(f); 328 /* if precious, warn the user and give up */ 329 if (R_flag) { 330 warnx("%s: local modification time does not match remote", 331 path); 332 goto failure_keep; 333 } 334 url->offset = 0; 335 if ((f = fetchXGet(url, &us, flags)) == NULL) { 336 warnx("%s: %s", path, fetchLastErrString); 337 goto failure; 338 } 339 if (sigint) 340 goto signal; 341 } else { 342 if (us.size == sb.st_size) 343 /* nothing to do */ 344 goto success; 345 if (sb.st_size > us.size) { 346 /* local file too long! */ 347 warnx("%s: local file (%lld bytes) is longer " 348 "than remote file (%lld bytes)", 349 path, sb.st_size, us.size); 350 goto failure; 351 } 352 /* we got through, open local file and seek to offset */ 353 /* 354 * XXX there's a race condition here - the file we open is not 355 * necessarily the same as the one we stat()'ed earlier... 356 */ 357 if ((of = fopen(path, "a")) == NULL) { 358 warn("%s: fopen()", path); 359 goto failure; 360 } 361 if (fseek(of, url->offset, SEEK_SET) == -1) { 362 warn("%s: fseek()", path); 363 goto failure; 364 } 365 } 366 } 367 if (m_flag && sb.st_size != -1) { 368 /* mirror mode, local file exists */ 369 if (sb.st_size == us.size && sb.st_mtime == us.mtime) 370 goto success; 371 } 372 if (!of) { 373 /* 374 * We don't yet have an output file; either this is a vanilla 375 * run with no special flags, or the local and remote files 376 * didn't match. 377 */ 378 if ((of = fopen(path, "w")) == NULL) { 379 warn("%s: open()", path); 380 goto failure; 381 } 382 } 383 count = url->offset; 384 385 /* start the counter */ 386 stat_start(&xs, path, us.size, count); 387 388 sigint = sigalrm = 0; 389 390 /* suck in the data */ 391 for (n = 0; !sigint && !sigalrm; ++n) { 392 if (us.size != -1 && us.size - count < B_size) 393 size = us.size - count; 394 else 395 size = B_size; 396 if (timeout) 397 alarm(timeout); 398 if ((size = fread(buf, 1, size, f)) <= 0) 399 break; 400 stat_update(&xs, count += size, 0); 401 if (fwrite(buf, size, 1, of) != 1) 402 break; 403 } 404 405 if (timeout) 406 alarm(0); 407 408 stat_end(&xs); 409 410 /* set mtime of local file */ 411 if (!n_flag && us.mtime && !o_stdout 412 && (stat(path, &sb) != -1) && sb.st_mode & S_IFREG) { 413 struct timeval tv[2]; 414 415 fflush(of); 416 tv[0].tv_sec = (long)(us.atime ? us.atime : us.mtime); 417 tv[1].tv_sec = (long)us.mtime; 418 tv[0].tv_usec = tv[1].tv_usec = 0; 419 if (utimes(path, tv)) 420 warn("%s: utimes()", path); 421 } 422 423 /* timed out or interrupted? */ 424 signal: 425 if (sigalrm) 426 warnx("transfer timed out"); 427 if (sigint) { 428 warnx("transfer interrupted"); 429 goto failure; 430 } 431 432 if (!sigalrm) { 433 /* check the status of our files */ 434 if (ferror(f)) 435 warn("%s", URL); 436 if (ferror(of)) 437 warn("%s", path); 438 if (ferror(f) || ferror(of)) 439 goto failure; 440 } 441 442 /* did the transfer complete normally? */ 443 if (us.size != -1 && count < us.size) { 444 warnx("%s appears to be truncated: %lld/%lld bytes", 445 path, count, us.size); 446 goto failure_keep; 447 } 448 449 /* 450 * If the transfer timed out and we didn't know how much to 451 * expect, assume the worst (i.e. we didn't get all of it) 452 */ 453 if (sigalrm && us.size == -1) { 454 warnx("%s may be truncated", path); 455 goto failure_keep; 456 } 457 458 success: 459 r = 0; 460 goto done; 461 failure: 462 if (of && of != stdout && !R_flag && !r_flag) 463 if (stat(path, &sb) != -1 && (sb.st_mode & S_IFREG)) 464 unlink(path); 465 failure_keep: 466 r = -1; 467 goto done; 468 done: 469 if (f) 470 fclose(f); 471 if (of && of != stdout) 472 fclose(of); 473 if (url) 474 fetchFreeURL(url); 475 return r; 476} 477 478void 479usage(void) 480{ 481 /* XXX badly out of synch */ 482 fprintf(stderr, 483 "Usage: fetch [-1AFHMPRabdlmnpqrstv] [-o outputfile] [-S bytes]\n" 484 " [-B bytes] [-T seconds] [-w seconds]\n" 485 " [-f file -h host [-c dir] | URL ...]\n" 486 ); 487} 488 489 490#define PARSENUM(NAME, TYPE) \ 491int \ 492NAME(char *s, TYPE *v) \ 493{ \ 494 *v = 0; \ 495 for (*v = 0; *s; s++) \ 496 if (isdigit(*s)) \ 497 *v = *v * 10 + *s - '0'; \ 498 else \ 499 return -1; \ 500 return 0; \ 501} 502 503PARSENUM(parseint, u_int) 504PARSENUM(parsesize, size_t) 505PARSENUM(parseoff, off_t) 506 507int 508main(int argc, char *argv[]) 509{ 510 struct stat sb; 511 struct sigaction sa; 512 char *p, *q, *s; 513 int c, e, r; 514 515 while ((c = getopt(argc, argv, 516 "146AaB:bc:dFf:h:lHMmnPpo:qRrS:sT:tvw:")) != EOF) 517 switch (c) { 518 case '1': 519 once_flag = 1; 520 break; 521 case '4': 522 family = PF_INET; 523 break; 524 case '6': 525 family = PF_INET6; 526 break; 527 case 'A': 528 A_flag = 1; 529 break; 530 case 'a': 531 a_flag = 1; 532 break; 533 case 'B': 534 if (parsesize(optarg, &B_size) == -1) 535 errx(1, "invalid buffer size"); 536 break; 537 case 'b': 538 warnx("warning: the -b option is deprecated"); 539 b_flag = 1; 540 break; 541 case 'c': 542 c_dirname = optarg; 543 break; 544 case 'd': 545 d_flag = 1; 546 break; 547 case 'F': 548 F_flag = 1; 549 break; 550 case 'f': 551 f_filename = optarg; 552 break; 553 case 'H': 554 H_flag = 1; 555 break; 556 case 'h': 557 h_hostname = optarg; 558 break; 559 case 'l': 560 l_flag = 1; 561 break; 562 case 'o': 563 o_flag = 1; 564 o_filename = optarg; 565 break; 566 case 'M': 567 case 'm': 568 if (r_flag) 569 errx(1, "the -m and -r flags are mutually exclusive"); 570 m_flag = 1; 571 break; 572 case 'n': 573 n_flag = 1; 574 break; 575 case 'P': 576 case 'p': 577 p_flag = 1; 578 break; 579 case 'q': 580 v_level = 0; 581 break; 582 case 'R': 583 R_flag = 1; 584 break; 585 case 'r': 586 if (m_flag) 587 errx(1, "the -m and -r flags are mutually exclusive"); 588 r_flag = 1; 589 break; 590 case 'S': 591 if (parseoff(optarg, &S_size) == -1) 592 errx(1, "invalid size"); 593 break; 594 case 's': 595 s_flag = 1; 596 break; 597 case 'T': 598 if (parseint(optarg, &T_secs) == -1) 599 errx(1, "invalid timeout"); 600 break; 601 case 't': 602 t_flag = 1; 603 warnx("warning: the -t option is deprecated"); 604 break; 605 case 'v': 606 v_level++; 607 break; 608 case 'w': 609 a_flag = 1; 610 if (parseint(optarg, &w_secs) == -1) 611 errx(1, "invalid delay"); 612 break; 613 default: 614 usage(); 615 exit(EX_USAGE); 616 } 617 618 argc -= optind; 619 argv += optind; 620 621 if (h_hostname || f_filename || c_dirname) { 622 if (!h_hostname || !f_filename || argc) { 623 usage(); 624 exit(EX_USAGE); 625 } 626 /* XXX this is a hack. */ 627 if (strcspn(h_hostname, "@:/") != strlen(h_hostname)) 628 errx(1, "invalid hostname"); 629 if (asprintf(argv, "ftp://%s/%s/%s", h_hostname, 630 c_dirname ? c_dirname : "", f_filename) == -1) 631 errx(1, "%s", strerror(ENOMEM)); 632 argc++; 633 } 634 635 if (!argc) { 636 usage(); 637 exit(EX_USAGE); 638 } 639 640 /* allocate buffer */ 641 if (B_size < MINBUFSIZE) 642 B_size = MINBUFSIZE; 643 if ((buf = malloc(B_size)) == NULL) 644 errx(1, "%s", strerror(ENOMEM)); 645 646 /* timeouts */ 647 if ((s = getenv("FTP_TIMEOUT")) != NULL) { 648 if (parseint(s, &ftp_timeout) == -1) { 649 warnx("FTP_TIMEOUT is not a positive integer"); 650 ftp_timeout = 0; 651 } 652 } 653 if ((s = getenv("HTTP_TIMEOUT")) != NULL) { 654 if (parseint(s, &http_timeout) == -1) { 655 warnx("HTTP_TIMEOUT is not a positive integer"); 656 http_timeout = 0; 657 } 658 } 659 660 /* signal handling */ 661 sa.sa_flags = 0; 662 sa.sa_handler = sig_handler; 663 sigemptyset(&sa.sa_mask); 664 sigaction(SIGALRM, &sa, NULL); 665 sa.sa_flags = SA_RESETHAND; 666 sigaction(SIGINT, &sa, NULL); 667 fetchRestartCalls = 0; 668 669 /* output file */ 670 if (o_flag) { 671 if (strcmp(o_filename, "-") == 0) { 672 o_stdout = 1; 673 } else if (stat(o_filename, &sb) == -1) { 674 if (errno == ENOENT) { 675 if (argc > 1) 676 errx(EX_USAGE, "%s is not a directory", o_filename); 677 } else { 678 err(EX_IOERR, "%s", o_filename); 679 } 680 } else { 681 if (sb.st_mode & S_IFDIR) 682 o_directory = 1; 683 } 684 } 685 686 /* check if output is to a tty (for progress report) */ 687 v_tty = isatty(STDERR_FILENO); 688 r = 0; 689 690 while (argc) { 691 if ((p = strrchr(*argv, '/')) == NULL) 692 p = *argv; 693 else 694 p++; 695 696 if (!*p) 697 p = "fetch.out"; 698 699 fetchLastErrCode = 0; 700 701 if (o_flag) { 702 if (o_stdout) { 703 e = fetch(*argv, "-"); 704 } else if (o_directory) { 705 asprintf(&q, "%s/%s", o_filename, p); 706 e = fetch(*argv, q); 707 free(q); 708 } else { 709 e = fetch(*argv, o_filename); 710 } 711 } else { 712 e = fetch(*argv, p); 713 } 714 715 if (sigint) 716 kill(getpid(), SIGINT); 717 718 if (e == 0 && once_flag) 719 exit(0); 720 721 if (e) { 722 r = 1; 723 if ((fetchLastErrCode 724 && fetchLastErrCode != FETCH_UNAVAIL 725 && fetchLastErrCode != FETCH_MOVED 726 && fetchLastErrCode != FETCH_URL 727 && fetchLastErrCode != FETCH_RESOLV 728 && fetchLastErrCode != FETCH_UNKNOWN)) { 729 if (w_secs) { 730 if (v_level) 731 fprintf(stderr, "Waiting %d seconds before retrying\n", 732 w_secs); 733 sleep(w_secs); 734 } 735 if (a_flag) 736 continue; 737 } 738 } 739 740 argc--, argv++; 741 } 742 743 exit(r); 744} 745