fetch.c revision 73937
1/*- 2 * Copyright (c) 2000 Dag-Erling Co�dan Sm�rgrav 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer 10 * in this position and unchanged. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 3. The name of the author may not be used to endorse or promote products 15 * derived from this software without specific prior written permission 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 * 28 * $FreeBSD: head/usr.bin/fetch/fetch.c 73937 2001-03-07 05:33:50Z des $ 29 */ 30 31#include <sys/param.h> 32#include <sys/stat.h> 33#include <sys/socket.h> 34 35#include <ctype.h> 36#include <err.h> 37#include <errno.h> 38#include <signal.h> 39#include <stdio.h> 40#include <stdlib.h> 41#include <string.h> 42#include <sysexits.h> 43#include <unistd.h> 44 45#include <fetch.h> 46 47#define MINBUFSIZE 4096 48 49/* Option flags */ 50int A_flag; /* -A: do not follow 302 redirects */ 51int a_flag; /* -a: auto retry */ 52size_t B_size; /* -B: buffer size */ 53int b_flag; /*! -b: workaround TCP bug */ 54char *c_dirname; /* -c: remote directory */ 55int d_flag; /* -d: direct connection */ 56int F_flag; /* -F: restart without checking mtime */ 57char *f_filename; /* -f: file to fetch */ 58int H_flag; /* -H: use high port */ 59char *h_hostname; /* -h: host to fetch from */ 60int l_flag; /* -l: link rather than copy file: URLs */ 61int m_flag; /* -[Mm]: mirror mode */ 62int n_flag; /* -n: do not preserve modification time */ 63int o_flag; /* -o: specify output file */ 64int o_directory; /* output file is a directory */ 65char *o_filename; /* name of output file */ 66int o_stdout; /* output file is stdout */ 67int once_flag; /* -1: stop at first successful file */ 68int p_flag; /* -[Pp]: use passive FTP */ 69int R_flag; /* -R: don't delete partially transferred files */ 70int r_flag; /* -r: restart previously interrupted transfer */ 71u_int T_secs = 0; /* -T: transfer timeout in seconds */ 72int s_flag; /* -s: show size, don't fetch */ 73off_t S_size; /* -S: require size to match */ 74int t_flag; /*! -t: workaround TCP bug */ 75int v_level = 1; /* -v: verbosity level */ 76int v_tty; /* stdout is a tty */ 77u_int w_secs; /* -w: retry delay */ 78int family = PF_UNSPEC; /* -[46]: address family to use */ 79 80int sigalrm; /* SIGALRM received */ 81int siginfo; /* SIGINFO received */ 82int sigint; /* SIGINT received */ 83 84u_int ftp_timeout; /* default timeout for FTP transfers */ 85u_int http_timeout; /* default timeout for HTTP transfers */ 86u_char *buf; /* transfer buffer */ 87 88 89void 90sig_handler(int sig) 91{ 92 switch (sig) { 93 case SIGALRM: 94 sigalrm = 1; 95 break; 96 case SIGINFO: 97 siginfo = 1; 98 break; 99 case SIGINT: 100 sigint = 1; 101 break; 102 } 103} 104 105struct xferstat { 106 char name[40]; 107 struct timeval start; 108 struct timeval end; 109 struct timeval last; 110 off_t size; 111 off_t offset; 112 off_t rcvd; 113}; 114 115void 116stat_display(struct xferstat *xs, int force) 117{ 118 struct timeval now; 119 120 if (!v_tty || !v_level) 121 return; 122 123 gettimeofday(&now, NULL); 124 if (!force && now.tv_sec <= xs->last.tv_sec) 125 return; 126 xs->last = now; 127 128 fprintf(stderr, "\rReceiving %s", xs->name); 129 if (xs->size == -1) 130 fprintf(stderr, ": %lld bytes", xs->rcvd); 131 else 132 fprintf(stderr, " (%lld bytes): %d%%", xs->size, 133 (int)((100.0 * xs->rcvd) / xs->size)); 134} 135 136void 137stat_start(struct xferstat *xs, char *name, off_t size, off_t offset) 138{ 139 snprintf(xs->name, sizeof xs->name, "%s", name); 140 gettimeofday(&xs->start, NULL); 141 xs->last.tv_sec = xs->last.tv_usec = 0; 142 xs->end = xs->last; 143 xs->size = size; 144 xs->offset = offset; 145 xs->rcvd = offset; 146 stat_display(xs, 1); 147} 148 149void 150stat_update(struct xferstat *xs, off_t rcvd, int force) 151{ 152 xs->rcvd = rcvd; 153 stat_display(xs, 0); 154} 155 156void 157stat_end(struct xferstat *xs) 158{ 159 double delta; 160 double bps; 161 162 if (!v_level) 163 return; 164 165 gettimeofday(&xs->end, NULL); 166 167 stat_display(xs, 1); 168 fputc('\n', stderr); 169 delta = (xs->end.tv_sec + (xs->end.tv_usec / 1.e6)) 170 - (xs->start.tv_sec + (xs->start.tv_usec / 1.e6)); 171 fprintf(stderr, "%lld bytes transferred in %.1f seconds ", 172 xs->rcvd - xs->offset, delta); 173 bps = (xs->rcvd - xs->offset) / delta; 174 if (bps > 1024*1024) 175 fprintf(stderr, "(%.2f MBps)\n", bps / (1024*1024)); 176 else if (bps > 1024) 177 fprintf(stderr, "(%.2f kBps)\n", bps / 1024); 178 else 179 fprintf(stderr, "(%.2f Bps)\n", bps); 180} 181 182int 183fetch(char *URL, char *path) 184{ 185 struct url *url; 186 struct url_stat us; 187 struct stat sb; 188 struct xferstat xs; 189 FILE *f, *of; 190 size_t size, wr; 191 off_t count; 192 char flags[8]; 193 int n, r; 194 u_int timeout; 195 u_char *ptr; 196 197 f = of = NULL; 198 199 /* parse URL */ 200 if ((url = fetchParseURL(URL)) == NULL) { 201 warnx("%s: parse error", URL); 202 goto failure; 203 } 204 205 /* if no scheme was specified, take a guess */ 206 if (!*url->scheme) { 207 if (!*url->host) 208 strcpy(url->scheme, SCHEME_FILE); 209 else if (strncasecmp(url->host, "ftp.", 4)) 210 strcpy(url->scheme, SCHEME_FTP); 211 else if (strncasecmp(url->host, "www.", 4)) 212 strcpy(url->scheme, SCHEME_HTTP); 213 } 214 215 timeout = 0; 216 *flags = 0; 217 count = 0; 218 219 /* common flags */ 220 if (v_level > 1) 221 strcat(flags, "v"); 222 switch (family) { 223 case PF_INET: 224 strcat(flags, "4"); 225 break; 226 case PF_INET6: 227 strcat(flags, "6"); 228 break; 229 } 230 231 /* FTP specific flags */ 232 if (strcmp(url->scheme, "ftp") == 0) { 233 if (p_flag) 234 strcat(flags, "p"); 235 if (d_flag) 236 strcat(flags, "d"); 237 if (H_flag) 238 strcat(flags, "h"); 239 timeout = T_secs ? T_secs : ftp_timeout; 240 } 241 242 /* HTTP specific flags */ 243 if (strcmp(url->scheme, "http") == 0) { 244 if (d_flag) 245 strcat(flags, "d"); 246 if (A_flag) 247 strcat(flags, "A"); 248 timeout = T_secs ? T_secs : http_timeout; 249 } 250 251 /* set the protocol timeout. */ 252 fetchTimeout = timeout; 253 254 /* just print size */ 255 if (s_flag) { 256 if (fetchStat(url, &us, flags) == -1) 257 goto failure; 258 if (us.size == -1) 259 printf("Unknown\n"); 260 else 261 printf("%lld\n", us.size); 262 goto success; 263 } 264 265 /* 266 * If the -r flag was specified, we have to compare the local and 267 * remote files, so we should really do a fetchStat() first, but I 268 * know of at least one HTTP server that only sends the content 269 * size in response to GET requests, and leaves it out of replies 270 * to HEAD requests. Also, in the (frequent) case that the local 271 * and remote files match but the local file is truncated, we have 272 * sufficient information *before* the compare to issue a correct 273 * request. Therefore, we always issue a GET request as if we were 274 * sure the local file was a truncated copy of the remote file; we 275 * can drop the connection later if we change our minds. 276 */ 277 if ((r_flag || m_flag) && !o_stdout && stat(path, &sb) != -1) { 278 if (r_flag) 279 url->offset = sb.st_size; 280 } else { 281 sb.st_size = -1; 282 } 283 284 /* start the transfer */ 285 if ((f = fetchXGet(url, &us, flags)) == NULL) { 286 warnx("%s: %s", path, fetchLastErrString); 287 goto failure; 288 } 289 if (sigint) 290 goto signal; 291 292 /* check that size is as expected */ 293 if (S_size) { 294 if (us.size == -1) { 295 warnx("%s: size unknown", path); 296 goto failure; 297 } else if (us.size != S_size) { 298 warnx("%s: size mismatch: expected %lld, actual %lld", 299 path, S_size, us.size); 300 goto failure; 301 } 302 } 303 304 /* symlink instead of copy */ 305 if (l_flag && strcmp(url->scheme, "file") == 0 && !o_stdout) { 306 if (symlink(url->doc, path) == -1) { 307 warn("%s: symlink()", path); 308 goto failure; 309 } 310 goto success; 311 } 312 313 if (us.size == -1) 314 warnx("%s: size of remote file is not known", path); 315 if (v_level > 1) { 316 if (sb.st_size != -1) 317 fprintf(stderr, "local size / mtime: %lld / %ld\n", 318 sb.st_size, sb.st_mtime); 319 if (us.size != -1) 320 fprintf(stderr, "remote size / mtime: %lld / %ld\n", 321 us.size, us.mtime); 322 } 323 324 /* open output file */ 325 if (o_stdout) { 326 /* output to stdout */ 327 of = stdout; 328 } else if (sb.st_size != -1) { 329 /* resume mode, local file exists */ 330 if (!F_flag && us.mtime && sb.st_mtime != us.mtime) { 331 /* no match! have to refetch */ 332 fclose(f); 333 /* if precious, warn the user and give up */ 334 if (R_flag) { 335 warnx("%s: local modification time does not match remote", 336 path); 337 goto failure_keep; 338 } 339 url->offset = 0; 340 if ((f = fetchXGet(url, &us, flags)) == NULL) { 341 warnx("%s: %s", path, fetchLastErrString); 342 goto failure; 343 } 344 if (sigint) 345 goto signal; 346 } else { 347 if (us.size == sb.st_size) 348 /* nothing to do */ 349 goto success; 350 if (sb.st_size > us.size) { 351 /* local file too long! */ 352 warnx("%s: local file (%lld bytes) is longer " 353 "than remote file (%lld bytes)", 354 path, sb.st_size, us.size); 355 goto failure; 356 } 357 /* we got through, open local file and seek to offset */ 358 /* 359 * XXX there's a race condition here - the file we open is not 360 * necessarily the same as the one we stat()'ed earlier... 361 */ 362 if ((of = fopen(path, "a")) == NULL) { 363 warn("%s: fopen()", path); 364 goto failure; 365 } 366 if (fseek(of, url->offset, SEEK_SET) == -1) { 367 warn("%s: fseek()", path); 368 goto failure; 369 } 370 } 371 } 372 if (m_flag && sb.st_size != -1) { 373 /* mirror mode, local file exists */ 374 if (sb.st_size == us.size && sb.st_mtime == us.mtime) 375 goto success; 376 } 377 if (!of) { 378 /* 379 * We don't yet have an output file; either this is a vanilla 380 * run with no special flags, or the local and remote files 381 * didn't match. 382 */ 383 if ((of = fopen(path, "w")) == NULL) { 384 warn("%s: open()", path); 385 goto failure; 386 } 387 } 388 count = url->offset; 389 390 /* start the counter */ 391 stat_start(&xs, path, us.size, count); 392 393 sigalrm = siginfo = sigint = 0; 394 395 /* suck in the data */ 396 signal(SIGINFO, sig_handler); 397 for (n = 0; !sigint && !sigalrm; ++n) { 398 if (us.size != -1 && us.size - count < B_size) 399 size = us.size - count; 400 else 401 size = B_size; 402 if (timeout) 403 alarm(timeout); 404 if ((size = fread(buf, 1, size, f)) == 0) { 405 if (ferror(f) && errno == EINTR && !sigalrm && !sigint) 406 clearerr(f); 407 else 408 break; 409 } 410 if (timeout) 411 alarm(0); 412 if (siginfo) { 413 stat_end(&xs); 414 siginfo = 0; 415 } 416 stat_update(&xs, count += size, 0); 417 for (ptr = buf; size > 0; ptr += wr, size -= wr) 418 if ((wr = fwrite(ptr, 1, size, of)) < size) { 419 if (ferror(of) && errno == EINTR && !sigalrm && !sigint) 420 clearerr(of); 421 else 422 break; 423 } 424 if (size != 0) 425 break; 426 } 427 signal(SIGINFO, SIG_DFL); 428 429 if (timeout) 430 alarm(0); 431 432 stat_end(&xs); 433 434 /* set mtime of local file */ 435 if (!n_flag && us.mtime && !o_stdout 436 && (stat(path, &sb) != -1) && sb.st_mode & S_IFREG) { 437 struct timeval tv[2]; 438 439 fflush(of); 440 tv[0].tv_sec = (long)(us.atime ? us.atime : us.mtime); 441 tv[1].tv_sec = (long)us.mtime; 442 tv[0].tv_usec = tv[1].tv_usec = 0; 443 if (utimes(path, tv)) 444 warn("%s: utimes()", path); 445 } 446 447 /* timed out or interrupted? */ 448 signal: 449 if (sigalrm) 450 warnx("transfer timed out"); 451 if (sigint) { 452 warnx("transfer interrupted"); 453 goto failure; 454 } 455 456 if (!sigalrm) { 457 /* check the status of our files */ 458 if (ferror(f)) 459 warn("%s", URL); 460 if (ferror(of)) 461 warn("%s", path); 462 if (ferror(f) || ferror(of)) 463 goto failure; 464 } 465 466 /* did the transfer complete normally? */ 467 if (us.size != -1 && count < us.size) { 468 warnx("%s appears to be truncated: %lld/%lld bytes", 469 path, count, us.size); 470 goto failure_keep; 471 } 472 473 /* 474 * If the transfer timed out and we didn't know how much to 475 * expect, assume the worst (i.e. we didn't get all of it) 476 */ 477 if (sigalrm && us.size == -1) { 478 warnx("%s may be truncated", path); 479 goto failure_keep; 480 } 481 482 success: 483 r = 0; 484 goto done; 485 failure: 486 if (of && of != stdout && !R_flag && !r_flag) 487 if (stat(path, &sb) != -1 && (sb.st_mode & S_IFREG)) 488 unlink(path); 489 failure_keep: 490 r = -1; 491 goto done; 492 done: 493 if (f) 494 fclose(f); 495 if (of && of != stdout) 496 fclose(of); 497 if (url) 498 fetchFreeURL(url); 499 return r; 500} 501 502void 503usage(void) 504{ 505 /* XXX badly out of synch */ 506 fprintf(stderr, 507 "Usage: fetch [-1AFHMPRabdlmnpqrstv] [-o outputfile] [-S bytes]\n" 508 " [-B bytes] [-T seconds] [-w seconds]\n" 509 " [-f file -h host [-c dir] | URL ...]\n" 510 ); 511} 512 513 514#define PARSENUM(NAME, TYPE) \ 515int \ 516NAME(char *s, TYPE *v) \ 517{ \ 518 *v = 0; \ 519 for (*v = 0; *s; s++) \ 520 if (isdigit(*s)) \ 521 *v = *v * 10 + *s - '0'; \ 522 else \ 523 return -1; \ 524 return 0; \ 525} 526 527PARSENUM(parseint, u_int) 528PARSENUM(parsesize, size_t) 529PARSENUM(parseoff, off_t) 530 531int 532main(int argc, char *argv[]) 533{ 534 struct stat sb; 535 struct sigaction sa; 536 char *p, *q, *s; 537 int c, e, r; 538 539 while ((c = getopt(argc, argv, 540 "146AaB:bc:dFf:h:lHMmnPpo:qRrS:sT:tvw:")) != EOF) 541 switch (c) { 542 case '1': 543 once_flag = 1; 544 break; 545 case '4': 546 family = PF_INET; 547 break; 548 case '6': 549 family = PF_INET6; 550 break; 551 case 'A': 552 A_flag = 1; 553 break; 554 case 'a': 555 a_flag = 1; 556 break; 557 case 'B': 558 if (parsesize(optarg, &B_size) == -1) 559 errx(1, "invalid buffer size"); 560 break; 561 case 'b': 562 warnx("warning: the -b option is deprecated"); 563 b_flag = 1; 564 break; 565 case 'c': 566 c_dirname = optarg; 567 break; 568 case 'd': 569 d_flag = 1; 570 break; 571 case 'F': 572 F_flag = 1; 573 break; 574 case 'f': 575 f_filename = optarg; 576 break; 577 case 'H': 578 H_flag = 1; 579 break; 580 case 'h': 581 h_hostname = optarg; 582 break; 583 case 'l': 584 l_flag = 1; 585 break; 586 case 'o': 587 o_flag = 1; 588 o_filename = optarg; 589 break; 590 case 'M': 591 case 'm': 592 if (r_flag) 593 errx(1, "the -m and -r flags are mutually exclusive"); 594 m_flag = 1; 595 break; 596 case 'n': 597 n_flag = 1; 598 break; 599 case 'P': 600 case 'p': 601 p_flag = 1; 602 break; 603 case 'q': 604 v_level = 0; 605 break; 606 case 'R': 607 R_flag = 1; 608 break; 609 case 'r': 610 if (m_flag) 611 errx(1, "the -m and -r flags are mutually exclusive"); 612 r_flag = 1; 613 break; 614 case 'S': 615 if (parseoff(optarg, &S_size) == -1) 616 errx(1, "invalid size"); 617 break; 618 case 's': 619 s_flag = 1; 620 break; 621 case 'T': 622 if (parseint(optarg, &T_secs) == -1) 623 errx(1, "invalid timeout"); 624 break; 625 case 't': 626 t_flag = 1; 627 warnx("warning: the -t option is deprecated"); 628 break; 629 case 'v': 630 v_level++; 631 break; 632 case 'w': 633 a_flag = 1; 634 if (parseint(optarg, &w_secs) == -1) 635 errx(1, "invalid delay"); 636 break; 637 default: 638 usage(); 639 exit(EX_USAGE); 640 } 641 642 argc -= optind; 643 argv += optind; 644 645 if (h_hostname || f_filename || c_dirname) { 646 if (!h_hostname || !f_filename || argc) { 647 usage(); 648 exit(EX_USAGE); 649 } 650 /* XXX this is a hack. */ 651 if (strcspn(h_hostname, "@:/") != strlen(h_hostname)) 652 errx(1, "invalid hostname"); 653 if (asprintf(argv, "ftp://%s/%s/%s", h_hostname, 654 c_dirname ? c_dirname : "", f_filename) == -1) 655 errx(1, "%s", strerror(ENOMEM)); 656 argc++; 657 } 658 659 if (!argc) { 660 usage(); 661 exit(EX_USAGE); 662 } 663 664 /* allocate buffer */ 665 if (B_size < MINBUFSIZE) 666 B_size = MINBUFSIZE; 667 if ((buf = malloc(B_size)) == NULL) 668 errx(1, "%s", strerror(ENOMEM)); 669 670 /* timeouts */ 671 if ((s = getenv("FTP_TIMEOUT")) != NULL) { 672 if (parseint(s, &ftp_timeout) == -1) { 673 warnx("FTP_TIMEOUT is not a positive integer"); 674 ftp_timeout = 0; 675 } 676 } 677 if ((s = getenv("HTTP_TIMEOUT")) != NULL) { 678 if (parseint(s, &http_timeout) == -1) { 679 warnx("HTTP_TIMEOUT is not a positive integer"); 680 http_timeout = 0; 681 } 682 } 683 684 /* signal handling */ 685 sa.sa_flags = 0; 686 sa.sa_handler = sig_handler; 687 sigemptyset(&sa.sa_mask); 688 sigaction(SIGALRM, &sa, NULL); 689 sa.sa_flags = SA_RESETHAND; 690 sigaction(SIGINT, &sa, NULL); 691 fetchRestartCalls = 0; 692 693 /* output file */ 694 if (o_flag) { 695 if (strcmp(o_filename, "-") == 0) { 696 o_stdout = 1; 697 } else if (stat(o_filename, &sb) == -1) { 698 if (errno == ENOENT) { 699 if (argc > 1) 700 errx(EX_USAGE, "%s is not a directory", o_filename); 701 } else { 702 err(EX_IOERR, "%s", o_filename); 703 } 704 } else { 705 if (sb.st_mode & S_IFDIR) 706 o_directory = 1; 707 } 708 } 709 710 /* check if output is to a tty (for progress report) */ 711 v_tty = isatty(STDERR_FILENO); 712 r = 0; 713 714 while (argc) { 715 if ((p = strrchr(*argv, '/')) == NULL) 716 p = *argv; 717 else 718 p++; 719 720 if (!*p) 721 p = "fetch.out"; 722 723 fetchLastErrCode = 0; 724 725 if (o_flag) { 726 if (o_stdout) { 727 e = fetch(*argv, "-"); 728 } else if (o_directory) { 729 asprintf(&q, "%s/%s", o_filename, p); 730 e = fetch(*argv, q); 731 free(q); 732 } else { 733 e = fetch(*argv, o_filename); 734 } 735 } else { 736 e = fetch(*argv, p); 737 } 738 739 if (sigint) 740 kill(getpid(), SIGINT); 741 742 if (e == 0 && once_flag) 743 exit(0); 744 745 if (e) { 746 r = 1; 747 if ((fetchLastErrCode 748 && fetchLastErrCode != FETCH_UNAVAIL 749 && fetchLastErrCode != FETCH_MOVED 750 && fetchLastErrCode != FETCH_URL 751 && fetchLastErrCode != FETCH_RESOLV 752 && fetchLastErrCode != FETCH_UNKNOWN)) { 753 if (w_secs) { 754 if (v_level) 755 fprintf(stderr, "Waiting %d seconds before retrying\n", 756 w_secs); 757 sleep(w_secs); 758 } 759 if (a_flag) 760 continue; 761 } 762 } 763 764 argc--, argv++; 765 } 766 767 exit(r); 768} 769