fetch.c revision 62815
1166255Sdelphij/*- 2166255Sdelphij * Copyright (c) 2000 Dag-Erling Co�dan Sm�rgrav 3166255Sdelphij * All rights reserved. 4166255Sdelphij * 5166255Sdelphij * Redistribution and use in source and binary forms, with or without 6166255Sdelphij * modification, are permitted provided that the following conditions 7166255Sdelphij * are met: 8166255Sdelphij * 1. Redistributions of source code must retain the above copyright 9166255Sdelphij * notice, this list of conditions and the following disclaimer 10166255Sdelphij * in this position and unchanged. 11166255Sdelphij * 2. Redistributions in binary form must reproduce the above copyright 12166255Sdelphij * notice, this list of conditions and the following disclaimer in the 13166255Sdelphij * documentation and/or other materials provided with the distribution. 14166255Sdelphij * 3. The name of the author may not be used to endorse or promote products 15166255Sdelphij * derived from this software without specific prior written permission 16166255Sdelphij * 17166255Sdelphij * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18166255Sdelphij * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19166255Sdelphij * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20166255Sdelphij * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 21166255Sdelphij * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 22166255Sdelphij * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23166255Sdelphij * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24166255Sdelphij * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25166255Sdelphij * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26166255Sdelphij * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27166255Sdelphij * 28166255Sdelphij * $FreeBSD: head/usr.bin/fetch/fetch.c 62815 2000-07-08 09:34:33Z des $ 29166255Sdelphij */ 30166255Sdelphij 31166255Sdelphij#include <sys/param.h> 32166255Sdelphij#include <sys/stat.h> 33166255Sdelphij#include <sys/socket.h> 34166255Sdelphij 35166255Sdelphij#include <ctype.h> 36166255Sdelphij#include <err.h> 37166255Sdelphij#include <errno.h> 38166255Sdelphij#include <stdio.h> 39166255Sdelphij#include <stdlib.h> 40166255Sdelphij#include <string.h> 41166255Sdelphij#include <sysexits.h> 42166255Sdelphij#include <unistd.h> 43166255Sdelphij 44166255Sdelphij#include <fetch.h> 45166255Sdelphij 46166255Sdelphij#define MINBUFSIZE 4096 47166255Sdelphij 48166255Sdelphij/* Option flags */ 49166255Sdelphijint A_flag; /* -A: do not follow 302 redirects */ 50166255Sdelphijint a_flag; /* -a: auto retry */ 51166255Sdelphijsize_t B_size; /* -B: buffer size */ 52166255Sdelphijint b_flag; /*! -b: workaround TCP bug */ 53166255Sdelphijchar *c_dirname; /* -c: remote directory */ 54166255Sdelphijint d_flag; /* -d: direct connection */ 55166255Sdelphijint F_flag; /* -F: restart without checking mtime */ 56166255Sdelphijchar *f_filename; /* -f: file to fetch */ 57166255Sdelphijint H_flag; /* -H: use high port */ 58166255Sdelphijchar *h_hostname; /* -h: host to fetch from */ 59166255Sdelphijint l_flag; /* -l: link rather than copy file: URLs */ 60166255Sdelphijint m_flag; /* -[Mm]: mirror mode */ 61166255Sdelphijint n_flag; /* -n: do not preserve modification time */ 62166255Sdelphijint o_flag; /* -o: specify output file */ 63166255Sdelphijint o_directory; /* output file is a directory */ 64166255Sdelphijchar *o_filename; /* name of output file */ 65166255Sdelphijint o_stdout; /* output file is stdout */ 66166255Sdelphijint once_flag; /* -1: stop at first successful file */ 67166255Sdelphijint p_flag = 1; /* -[Pp]: use passive FTP */ 68166255Sdelphijint R_flag; /* -R: don't delete partially transferred files */ 69166255Sdelphijint r_flag; /* -r: restart previously interrupted transfer */ 70166255Sdelphiju_int T_secs = 0; /* -T: transfer timeout in seconds */ 71166255Sdelphijint s_flag; /* -s: show size, don't fetch */ 72166255Sdelphijoff_t S_size; /* -S: require size to match */ 73166255Sdelphijint t_flag; /*! -t: workaround TCP bug */ 74int v_level = 1; /* -v: verbosity level */ 75int v_tty; /* stdout is a tty */ 76u_int w_secs; /* -w: retry delay */ 77int family = PF_UNSPEC; /* -[46]: address family to use */ 78 79 80u_int ftp_timeout; /* default timeout for FTP transfers */ 81u_int http_timeout; /* default timeout for HTTP transfers */ 82u_char *buf; /* transfer buffer */ 83 84 85void 86sig_handler(int sig) 87{ 88 errx(1, "Transfer timed out"); 89} 90 91struct xferstat { 92 char name[40]; 93 struct timeval start; 94 struct timeval end; 95 struct timeval last; 96 off_t size; 97 off_t offset; 98 off_t rcvd; 99}; 100 101void 102stat_start(struct xferstat *xs, char *name, off_t size, off_t offset) 103{ 104 snprintf(xs->name, sizeof xs->name, "%s", name); 105 xs->size = size; 106 xs->offset = offset; 107 if (v_level) { 108 fprintf(stderr, "Receiving %s", xs->name); 109 if (xs->size != -1) 110 fprintf(stderr, " (%lld bytes)", xs->size - xs->offset); 111 } 112 gettimeofday(&xs->start, NULL); 113 xs->last = xs->start; 114} 115 116void 117stat_update(struct xferstat *xs, off_t rcvd) 118{ 119 struct timeval now; 120 121 xs->rcvd = rcvd; 122 123 if (v_level <= 1 || !v_tty) 124 return; 125 126 gettimeofday(&now, NULL); 127 if (now.tv_sec <= xs->last.tv_sec) 128 return; 129 xs->last = now; 130 131 fprintf(stderr, "\rReceiving %s", xs->name); 132 if (xs->size == -1) 133 fprintf(stderr, ": %lld bytes", xs->rcvd - xs->offset); 134 else 135 fprintf(stderr, " (%lld bytes): %d%%", xs->size - xs->offset, 136 (int)((100.0 * xs->rcvd) / (xs->size - xs->offset))); 137} 138 139void 140stat_end(struct xferstat *xs) 141{ 142 double delta; 143 double bps; 144 145 gettimeofday(&xs->end, NULL); 146 147 if (!v_level) 148 return; 149 150 fputc('\n', stderr); 151 delta = (xs->end.tv_sec + (xs->end.tv_usec / 1.e6)) 152 - (xs->start.tv_sec + (xs->start.tv_usec / 1.e6)); 153 fprintf(stderr, "%lld bytes transferred in %.1f seconds ", 154 xs->size - xs->offset, delta); 155 bps = (xs->size - xs->offset) / delta; 156 if (bps > 1024*1024) 157 fprintf(stderr, "(%.2f MBps)\n", bps / (1024*1024)); 158 else if (bps > 1024) 159 fprintf(stderr, "(%.2f kBps)\n", bps / 1024); 160 else 161 fprintf(stderr, "(%.2f Bps)\n", bps); 162} 163 164int 165fetch(char *URL, char *path) 166{ 167 struct url *url; 168 struct url_stat us; 169 struct stat sb; 170 struct xferstat xs; 171 FILE *f, *of; 172 size_t size; 173 off_t count; 174 char flags[8]; 175 int ch, n, r; 176 u_int timeout; 177 178 f = of = NULL; 179 180 /* parse URL */ 181 if ((url = fetchParseURL(URL)) == NULL) { 182 warnx("%s: parse error", URL); 183 goto failure; 184 } 185 186 timeout = 0; 187 *flags = 0; 188 189 /* common flags */ 190 if (v_level > 2) 191 strcat(flags, "v"); 192 switch (family) { 193 case PF_INET: 194 strcat(flags, "4"); 195 break; 196 case PF_INET6: 197 strcat(flags, "6"); 198 break; 199 } 200 201 /* FTP specific flags */ 202 if (strcmp(url->scheme, "ftp") == 0) { 203 if (p_flag) 204 strcat(flags, "p"); 205 if (d_flag) 206 strcat(flags, "d"); 207 if (H_flag) 208 strcat(flags, "h"); 209 timeout = T_secs ? T_secs : ftp_timeout; 210 } 211 212 /* HTTP specific flags */ 213 if (strcmp(url->scheme, "http") == 0) { 214 if (d_flag) 215 strcat(flags, "d"); 216 if (A_flag) 217 strcat(flags, "A"); 218 timeout = T_secs ? T_secs : http_timeout; 219 } 220 221 /* 222 * Set the protocol timeout. 223 * This currently only works for FTP, so we still use 224 * alarm(timeout) further down. 225 */ 226 fetchTimeout = timeout; 227 228 /* stat remote file */ 229 alarm(timeout); 230 if (fetchStat(url, &us, flags) == -1) 231 warnx("%s: size not known", path); 232 alarm(timeout); 233 234 /* just print size */ 235 if (s_flag) { 236 if (us.size == -1) 237 printf("Unknown\n"); 238 else 239 printf("%lld\n", us.size); 240 goto success; 241 } 242 243 /* check that size is as expected */ 244 if (S_size && us.size != -1 && us.size != S_size) { 245 warnx("%s: size mismatch: expected %lld, actual %lld", 246 path, S_size, us.size); 247 goto failure; 248 } 249 250 /* symlink instead of copy */ 251 if (l_flag && strcmp(url->scheme, "file") == 0 && !o_stdout) { 252 if (symlink(url->doc, path) == -1) { 253 warn("%s: symlink()", path); 254 goto failure; 255 } 256 goto success; 257 } 258 259 if (o_stdout) { 260 /* output to stdout */ 261 of = stdout; 262 } else if (r_flag && us.size != -1 && stat(path, &sb) != -1 263 && (F_flag || (us.mtime && sb.st_mtime == us.mtime))) { 264 /* output to file, restart aborted transfer */ 265 if (us.size == sb.st_size) 266 goto success; 267 else if (sb.st_size > us.size && truncate(path, us.size) == -1) { 268 warn("%s: truncate()", path); 269 goto failure; 270 } 271 if ((of = fopen(path, "a")) == NULL) { 272 warn("%s: open()", path); 273 goto failure; 274 } 275 url->offset = sb.st_size; 276 } else if (m_flag && us.size != -1 && stat(path, &sb) != -1) { 277 /* output to file, mirror mode */ 278 if (sb.st_size == us.size && sb.st_mtime == us.mtime) 279 return 0; 280 if ((of = fopen(path, "w")) == NULL) { 281 warn("%s: open()", path); 282 goto failure; 283 } 284 } else { 285 /* output to file, all other cases */ 286 if ((of = fopen(path, "w")) == NULL) { 287 warn("%s: open()", path); 288 goto failure; 289 } 290 } 291 count = url->offset; 292 293 /* start the transfer */ 294 if ((f = fetchGet(url, flags)) == NULL) { 295 warnx("%s", fetchLastErrString); 296 if (!R_flag && !r_flag && !o_stdout) 297 unlink(path); 298 goto failure; 299 } 300 301 /* start the counter */ 302 stat_start(&xs, path, us.size, count); 303 304 n = 0; 305 306 if (us.size == -1) { 307 /* 308 * We have no idea how much data to expect, so do it byte by 309 * byte. This is incredibly inefficient, but there's not much 310 * we can do about it... :( 311 */ 312 while (1) { 313 if (timeout) 314 alarm(timeout); 315#ifdef STDIO_HACK 316 /* 317 * This is a non-portable hack, but it makes things go 318 * faster. Basically, if there is data in the input file's 319 * buffer, write it out; then fall through to the fgetc() 320 * which forces a refill. It saves a memcpy() and reduces 321 * the number of iterations, i.e the number of calls to 322 * alarm(). Empirical evidence shows this can cut user 323 * time by up to 90%. There may be better (even portable) 324 * ways to do this. 325 */ 326 if (f->_r && (f->_ub._base == NULL)) { 327 if (fwrite(f->_p, f->_r, 1, of) < 1) 328 break; 329 count += f->_r; 330 f->_p += f->_r; 331 f->_r = 0; 332 } 333#endif 334 if ((ch = fgetc(f)) == EOF || fputc(ch, of) == EOF) 335 break; 336 stat_update(&xs, count++); 337 n++; 338 } 339 } else { 340 /* we know exactly how much to transfer, so do it efficiently */ 341 for (size = B_size; count != us.size; n++) { 342 if (us.size - count < B_size) 343 size = us.size - count; 344 if (timeout) 345 alarm(timeout); 346 if (fread(buf, size, 1, f) != 1 || fwrite(buf, size, 1, of) != 1) 347 break; 348 stat_update(&xs, count += size); 349 } 350 } 351 352 if (timeout) 353 alarm(0); 354 355 stat_end(&xs); 356 357 /* check the status of our files */ 358 if (ferror(f)) 359 warn("%s", URL); 360 if (ferror(of)) 361 warn("%s", path); 362 if (ferror(f) || ferror(of)) { 363 if (!R_flag && !r_flag && !o_stdout) 364 unlink(path); 365 goto failure; 366 } 367 368 /* need to close the file before setting mtime */ 369 if (of != stdout) { 370 fclose(of); 371 of = NULL; 372 } 373 374 /* Set mtime of local file */ 375 if (!n_flag && us.size != -1 && !o_stdout) { 376 struct timeval tv[2]; 377 378 tv[0].tv_sec = (long)us.atime; 379 tv[1].tv_sec = (long)us.mtime; 380 tv[0].tv_usec = tv[1].tv_usec = 0; 381 if (utimes(path, tv)) 382 warn("%s: utimes()", path); 383 } 384 385 /* check the file size */ 386 if (us.size != -1 && count < us.size) { 387 warnx("%s appears to be truncated: %lld/%lld bytes", 388 path, count, us.size); 389 goto failure; 390 } 391 392 success: 393 r = 0; 394 goto done; 395 failure: 396 r = -1; 397 goto done; 398 done: 399 if (f) 400 fclose(f); 401 if (of && of != stdout) 402 fclose(of); 403 fetchFreeURL(url); 404 return r; 405} 406 407void 408usage(void) 409{ 410 /* XXX badly out of synch */ 411 fprintf(stderr, 412 "Usage: fetch [-1AFHMPRabdlmnpqrstv] [-o outputfile] [-S bytes]\n" 413 " [-B bytes] [-T seconds] [-w seconds]\n" 414 " [-f file -h host [-c dir] | URL ...]\n" 415 ); 416} 417 418 419#define PARSENUM(NAME, TYPE) \ 420int \ 421NAME(char *s, TYPE *v) \ 422{ \ 423 *v = 0; \ 424 for (*v = 0; *s; s++) \ 425 if (isdigit(*s)) \ 426 *v = *v * 10 + *s - '0'; \ 427 else \ 428 return -1; \ 429 return 0; \ 430} 431 432PARSENUM(parseint, u_int) 433PARSENUM(parsesize, size_t) 434PARSENUM(parseoff, off_t) 435 436int 437main(int argc, char *argv[]) 438{ 439 struct stat sb; 440 char *p, *q, *s; 441 int c, e, r; 442 443 while ((c = getopt(argc, argv, 444 "146AaB:bc:dFf:h:lHMmnPpo:qRrS:sT:tvw:")) != EOF) 445 switch (c) { 446 case '1': 447 once_flag = 1; 448 break; 449 case '4': 450 family = PF_INET; 451 break; 452 case '6': 453 family = PF_INET6; 454 break; 455 case 'A': 456 A_flag = 1; 457 break; 458 case 'a': 459 a_flag = 1; 460 break; 461 case 'B': 462 if (parsesize(optarg, &B_size) == -1) 463 errx(1, "invalid buffer size"); 464 break; 465 case 'b': 466 warnx("warning: the -b option is deprecated"); 467 b_flag = 1; 468 break; 469 case 'c': 470 c_dirname = optarg; 471 break; 472 case 'd': 473 d_flag = 1; 474 break; 475 case 'F': 476 F_flag = 1; 477 break; 478 case 'f': 479 f_filename = optarg; 480 break; 481 case 'H': 482 H_flag = 1; 483 break; 484 case 'h': 485 h_hostname = optarg; 486 break; 487 case 'l': 488 l_flag = 1; 489 break; 490 case 'o': 491 o_flag = 1; 492 o_filename = optarg; 493 break; 494 case 'M': 495 case 'm': 496 m_flag = 1; 497 break; 498 case 'n': 499 n_flag = 1; 500 break; 501 case 'P': 502 case 'p': 503 p_flag = 1; 504 break; 505 case 'q': 506 v_level = 0; 507 break; 508 case 'R': 509 R_flag = 1; 510 break; 511 case 'r': 512 r_flag = 1; 513 break; 514 case 'S': 515 if (parseoff(optarg, &S_size) == -1) 516 errx(1, "invalid size"); 517 break; 518 case 's': 519 s_flag = 1; 520 break; 521 case 'T': 522 if (parseint(optarg, &T_secs) == -1) 523 errx(1, "invalid timeout"); 524 break; 525 case 't': 526 t_flag = 1; 527 warnx("warning: the -t option is deprecated"); 528 break; 529 case 'v': 530 v_level++; 531 break; 532 case 'w': 533 a_flag = 1; 534 if (parseint(optarg, &w_secs) == -1) 535 errx(1, "invalid delay"); 536 break; 537 default: 538 usage(); 539 exit(EX_USAGE); 540 } 541 542 argc -= optind; 543 argv += optind; 544 545 if (h_hostname || f_filename || c_dirname) { 546 if (!h_hostname || !f_filename || argc) { 547 usage(); 548 exit(EX_USAGE); 549 } 550 /* XXX this is a hack. */ 551 if (strcspn(h_hostname, "@:/") != strlen(h_hostname)) 552 errx(1, "invalid hostname"); 553 if (asprintf(argv, "ftp://%s/%s/%s", h_hostname, 554 c_dirname ? c_dirname : "", f_filename) == -1) 555 errx(1, strerror(ENOMEM)); 556 argc++; 557 } 558 559 if (!argc) { 560 usage(); 561 exit(EX_USAGE); 562 } 563 564 /* allocate buffer */ 565 if (B_size < MINBUFSIZE) 566 B_size = MINBUFSIZE; 567 if ((buf = malloc(B_size)) == NULL) 568 errx(1, strerror(ENOMEM)); 569 570 /* timeout handling */ 571 signal(SIGALRM, sig_handler); 572 if ((s = getenv("FTP_TIMEOUT")) != NULL) { 573 if (parseint(s, &ftp_timeout) == -1) { 574 warnx("FTP_TIMEOUT is not a positive integer"); 575 ftp_timeout = 0; 576 } 577 } 578 if ((s = getenv("HTTP_TIMEOUT")) != NULL) { 579 if (parseint(s, &http_timeout) == -1) { 580 warnx("HTTP_TIMEOUT is not a positive integer"); 581 http_timeout = 0; 582 } 583 } 584 585 /* output file */ 586 if (o_flag) { 587 if (strcmp(o_filename, "-") == 0) { 588 o_stdout = 1; 589 } else if (stat(o_filename, &sb) == -1) { 590 if (errno == ENOENT) { 591 if (argc > 1) 592 errx(EX_USAGE, "%s is not a directory", o_filename); 593 } else { 594 err(EX_IOERR, "%s", o_filename); 595 } 596 } else { 597 if (sb.st_mode & S_IFDIR) 598 o_directory = 1; 599 } 600 } 601 602 /* check if output is to a tty (for progress report) */ 603 v_tty = isatty(STDERR_FILENO); 604 r = 0; 605 606 while (argc) { 607 if ((p = strrchr(*argv, '/')) == NULL) 608 p = *argv; 609 else 610 p++; 611 612 if (!*p) 613 p = "fetch.out"; 614 615 fetchLastErrCode = 0; 616 617 if (o_flag) { 618 if (o_stdout) { 619 e = fetch(*argv, "-"); 620 } else if (o_directory) { 621 asprintf(&q, "%s/%s", o_filename, p); 622 e = fetch(*argv, q); 623 free(q); 624 } else { 625 e = fetch(*argv, o_filename); 626 } 627 } else { 628 e = fetch(*argv, p); 629 } 630 631 if (e == 0 && once_flag) 632 exit(0); 633 634 if (e) { 635 r = 1; 636 if ((fetchLastErrCode 637 && fetchLastErrCode != FETCH_UNAVAIL 638 && fetchLastErrCode != FETCH_MOVED 639 && fetchLastErrCode != FETCH_URL 640 && fetchLastErrCode != FETCH_RESOLV 641 && fetchLastErrCode != FETCH_UNKNOWN)) { 642 if (w_secs) { 643 if (v_level) 644 fprintf(stderr, "Waiting %d seconds before retrying\n", w_secs); 645 sleep(w_secs); 646 } 647 if (a_flag) 648 continue; 649 fprintf(stderr, "Skipping %s\n", *argv); 650 } 651 } 652 653 argc--, argv++; 654 } 655 656 exit(r); 657} 658