fetch.c revision 62216
1/*- 2 * Copyright (c) 2000 Dag-Erling Co�dan Sm�rgrav 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer 10 * in this position and unchanged. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 3. The name of the author may not be used to endorse or promote products 15 * derived from this software without specific prior written permission 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 * 28 * $FreeBSD: head/usr.bin/fetch/fetch.c 62216 2000-06-28 16:55:15Z des $ 29 */ 30 31#include <sys/param.h> 32#include <sys/stat.h> 33#include <sys/socket.h> 34 35#include <ctype.h> 36#include <err.h> 37#include <errno.h> 38#include <stdio.h> 39#include <stdlib.h> 40#include <string.h> 41#include <sysexits.h> 42#include <unistd.h> 43 44#include <fetch.h> 45 46#define MINBUFSIZE 4096 47 48/* Option flags */ 49int A_flag; /* -A: do not follow 302 redirects */ 50int a_flag; /* -a: auto retry */ 51size_t B_size; /* -B: buffer size */ 52int b_flag; /*! -b: workaround TCP bug */ 53int d_flag; /* -d: direct connection */ 54int F_flag; /* -F: restart without checking mtime */ 55char *f_filename; /* -f: file to fetch */ 56int H_flag; /* -H: use high port */ 57char *h_hostname; /* -h: host to fetch from */ 58int l_flag; /* -l: link rather than copy file: URLs */ 59int m_flag; /* -[Mm]: set local timestamp to remote timestamp */ 60int o_flag; /* -o: specify output file */ 61int o_directory; /* output file is a directory */ 62char *o_filename; /* name of output file */ 63int o_stdout; /* output file is stdout */ 64int once_flag; /* -1: stop at first successful file */ 65int p_flag = 1; /* -[Pp]: use passive FTP */ 66int R_flag; /* -R: don't delete partially transferred files */ 67int r_flag; /* -r: restart previously interrupted transfer */ 68u_int T_secs = 0; /* -T: transfer timeout in seconds */ 69int s_flag; /* -s: show size, don't fetch */ 70off_t S_size; /* -S: require size to match */ 71int t_flag; /*! -t: workaround TCP bug */ 72int v_level = 1; /* -v: verbosity level */ 73int v_tty; /* stdout is a tty */ 74u_int w_secs; /* -w: retry delay */ 75int family = PF_UNSPEC; /* -[46]: address family to use */ 76 77 78u_int ftp_timeout; /* default timeout for FTP transfers */ 79u_int http_timeout; /* default timeout for HTTP transfers */ 80u_char *buf; /* transfer buffer */ 81 82 83void 84sig_handler(int sig) 85{ 86 errx(1, "Transfer timed out"); 87} 88 89struct xferstat { 90 char name[40]; 91 struct timeval start; 92 struct timeval end; 93 struct timeval last; 94 off_t size; 95 off_t offset; 96 off_t rcvd; 97}; 98 99void 100stat_start(struct xferstat *xs, char *name, off_t size, off_t offset) 101{ 102 snprintf(xs->name, sizeof xs->name, "%s", name); 103 xs->size = size; 104 xs->offset = offset; 105 if (v_level) { 106 fprintf(stderr, "Receiving %s", xs->name); 107 if (xs->size != -1) 108 fprintf(stderr, " (%lld bytes)", xs->size - xs->offset); 109 } 110 gettimeofday(&xs->start, NULL); 111 xs->last = xs->start; 112} 113 114void 115stat_update(struct xferstat *xs, off_t rcvd) 116{ 117 struct timeval now; 118 119 xs->rcvd = rcvd; 120 121 if (v_level <= 1 || !v_tty) 122 return; 123 124 gettimeofday(&now, NULL); 125 if (now.tv_sec <= xs->last.tv_sec) 126 return; 127 xs->last = now; 128 129 fprintf(stderr, "\rReceiving %s", xs->name); 130 if (xs->size == -1) 131 fprintf(stderr, ": %lld bytes", xs->rcvd - xs->offset); 132 else 133 fprintf(stderr, " (%lld bytes): %d%%", xs->size - xs->offset, 134 (int)((100.0 * xs->rcvd) / (xs->size - xs->offset))); 135} 136 137void 138stat_end(struct xferstat *xs) 139{ 140 double delta; 141 double bps; 142 143 gettimeofday(&xs->end, NULL); 144 145 if (!v_level) 146 return; 147 148 fputc('\n', stderr); 149 delta = (xs->end.tv_sec + (xs->end.tv_usec / 1.e6)) 150 - (xs->start.tv_sec + (xs->start.tv_usec / 1.e6)); 151 fprintf(stderr, "%lld bytes transferred in %.1f seconds ", 152 xs->size - xs->offset, delta); 153 bps = (xs->size - xs->offset) / delta; 154 if (bps > 1024*1024) 155 fprintf(stderr, "(%.2f MBps)\n", bps / (1024*1024)); 156 else if (bps > 1024) 157 fprintf(stderr, "(%.2f kBps)\n", bps / 1024); 158 else 159 fprintf(stderr, "(%.2f Bps)\n", bps); 160} 161 162int 163fetch(char *URL, char *path) 164{ 165 struct url *url; 166 struct url_stat us; 167 struct stat sb; 168 struct xferstat xs; 169 FILE *f, *of; 170 size_t size; 171 off_t count; 172 char flags[8]; 173 int ch, n, r; 174 u_int timeout; 175 176 f = of = NULL; 177 178 /* parse URL */ 179 if ((url = fetchParseURL(URL)) == NULL) { 180 warnx("%s: parse error", URL); 181 goto failure; 182 } 183 184 timeout = 0; 185 *flags = 0; 186 187 /* common flags */ 188 if (v_level > 2) 189 strcat(flags, "v"); 190 switch (family) { 191 case PF_INET: 192 strcat(flags, "4"); 193 break; 194 case PF_INET6: 195 strcat(flags, "6"); 196 break; 197 } 198 199 /* FTP specific flags */ 200 if (strcmp(url->scheme, "ftp") == 0) { 201 if (p_flag) 202 strcat(flags, "p"); 203 if (d_flag) 204 strcat(flags, "d"); 205 if (H_flag) 206 strcat(flags, "h"); 207 timeout = T_secs ? T_secs : ftp_timeout; 208 } 209 210 /* HTTP specific flags */ 211 if (strcmp(url->scheme, "http") == 0) { 212 if (d_flag) 213 strcat(flags, "d"); 214 if (A_flag) 215 strcat(flags, "A"); 216 timeout = T_secs ? T_secs : http_timeout; 217 } 218 219 /* 220 * Set the protocol timeout. 221 * This currently only works for FTP, so we still use 222 * alarm(timeout) further down. 223 */ 224 fetchTimeout = timeout; 225 226 /* stat remote file */ 227 alarm(timeout); 228 if (fetchStat(url, &us, flags) == -1) 229 warnx("%s: size not known", path); 230 alarm(timeout); 231 232 /* just print size */ 233 if (s_flag) { 234 if (us.size == -1) 235 printf("Unknown\n"); 236 else 237 printf("%lld\n", us.size); 238 goto success; 239 } 240 241 /* check that size is as expected */ 242 if (S_size && us.size != -1 && us.size != S_size) { 243 warnx("%s: size mismatch: expected %lld, actual %lld", 244 path, S_size, us.size); 245 goto failure; 246 } 247 248 /* symlink instead of copy */ 249 if (l_flag && strcmp(url->scheme, "file") == 0 && !o_stdout) { 250 if (symlink(url->doc, path) == -1) { 251 warn("%s: symlink()", path); 252 goto failure; 253 } 254 goto success; 255 } 256 257 if (o_stdout) { 258 /* output to stdout */ 259 of = stdout; 260 } else if (r_flag && us.size != -1 && stat(path, &sb) != -1 261 && (F_flag || (us.mtime && sb.st_mtime == us.mtime))) { 262 /* output to file, restart aborted transfer */ 263 if (us.size == sb.st_size) 264 goto success; 265 else if (sb.st_size > us.size && truncate(path, us.size) == -1) { 266 warn("%s: truncate()", path); 267 goto failure; 268 } 269 if ((of = fopen(path, "a")) == NULL) { 270 warn("%s: open()", path); 271 goto failure; 272 } 273 url->offset = sb.st_size; 274 } else if (m_flag && us.size != -1 && stat(path, &sb) != -1) { 275 /* output to file, mirror mode */ 276 warnx(" local: %lld bytes, mtime %ld", sb.st_size, sb.st_mtime); 277 warnx("remote: %lld bytes, mtime %ld", us.size, us.mtime); 278 if (sb.st_size == us.size && sb.st_mtime == us.mtime) 279 return 0; 280 if ((of = fopen(path, "w")) == NULL) { 281 warn("%s: open()", path); 282 goto failure; 283 } 284 } else { 285 /* output to file, all other cases */ 286 if ((of = fopen(path, "w")) == NULL) { 287 warn("%s: open()", path); 288 goto failure; 289 } 290 } 291 count = url->offset; 292 293 /* start the transfer */ 294 if ((f = fetchGet(url, flags)) == NULL) { 295 warnx("%s", fetchLastErrString); 296 goto failure; 297 } 298 299 /* start the counter */ 300 stat_start(&xs, path, us.size, count); 301 302 n = 0; 303 304 if (us.size == -1) { 305 /* 306 * We have no idea how much data to expect, so do it byte by 307 * byte. This is incredibly inefficient, but there's not much 308 * we can do about it... :( 309 */ 310 while (1) { 311 if (timeout) 312 alarm(timeout); 313#ifdef STDIO_HACK 314 /* 315 * This is a non-portable hack, but it makes things go 316 * faster. Basically, if there is data in the input file's 317 * buffer, write it out; then fall through to the fgetc() 318 * which forces a refill. It saves a memcpy() and reduces 319 * the number of iterations, i.e the number of calls to 320 * alarm(). Empirical evidence shows this can cut user 321 * time by up to 90%. There may be better (even portable) 322 * ways to do this. 323 */ 324 if (f->_r && (f->_ub._base == NULL)) { 325 if (fwrite(f->_p, f->_r, 1, of) < 1) 326 break; 327 count += f->_r; 328 f->_p += f->_r; 329 f->_r = 0; 330 } 331#endif 332 if ((ch = fgetc(f)) == EOF || fputc(ch, of) == EOF) 333 break; 334 stat_update(&xs, count++); 335 n++; 336 } 337 } else { 338 /* we know exactly how much to transfer, so do it efficiently */ 339 for (size = B_size; count != us.size; n++) { 340 if (us.size - count < B_size) 341 size = us.size - count; 342 if (timeout) 343 alarm(timeout); 344 if (fread(buf, size, 1, f) != 1 || fwrite(buf, size, 1, of) != 1) 345 break; 346 stat_update(&xs, count += size); 347 } 348 } 349 350 if (timeout) 351 alarm(0); 352 353 stat_end(&xs); 354 355 /* check the status of our files */ 356 if (ferror(f)) 357 warn("%s", URL); 358 if (ferror(of)) 359 warn("%s", path); 360 if (ferror(f) || ferror(of)) { 361 if (!R_flag && !o_stdout) 362 unlink(path); 363 goto failure; 364 } 365 366 /* need to close the file before setting mtime */ 367 if (of != stdout) { 368 fclose(of); 369 of = NULL; 370 } 371 372 /* Set mtime of local file */ 373 if (m_flag && us.size != -1 && !o_stdout) { 374 struct timeval tv[2]; 375 376 tv[0].tv_sec = (long)us.atime; 377 tv[1].tv_sec = (long)us.mtime; 378 tv[0].tv_usec = tv[1].tv_usec = 0; 379 if (utimes(path, tv)) 380 warn("%s: utimes()", path); 381 } 382 383 success: 384 r = 0; 385 goto done; 386 failure: 387 r = -1; 388 goto done; 389 done: 390 if (f) 391 fclose(f); 392 if (of && of != stdout) 393 fclose(of); 394 fetchFreeURL(url); 395 return r; 396} 397 398void 399usage(void) 400{ 401 /* XXX badly out of synch */ 402 fprintf(stderr, 403 "Usage: fetch [-1AFHMPRabdlmnpqrstv] [-o outputfile] [-S bytes]\n" 404 " [-B bytes] [-T seconds] [-w seconds]\n" 405 " [-f file -h host [-c dir] | URL ...]\n" 406 ); 407} 408 409 410#define PARSENUM(NAME, TYPE) \ 411int \ 412NAME(char *s, TYPE *v) \ 413{ \ 414 *v = 0; \ 415 for (*v = 0; *s; s++) \ 416 if (isdigit(*s)) \ 417 *v = *v * 10 + *s - '0'; \ 418 else \ 419 return -1; \ 420 return 0; \ 421} 422 423PARSENUM(parseint, u_int) 424PARSENUM(parsesize, size_t) 425PARSENUM(parseoff, off_t) 426 427int 428main(int argc, char *argv[]) 429{ 430 struct stat sb; 431 char *p, *q, *s; 432 int c, e, r; 433 434 while ((c = getopt(argc, argv, 435 "146AaB:bdFf:h:lHMmnPpo:qRrS:sT:tvw:")) != EOF) 436 switch (c) { 437 case '1': 438 once_flag = 1; 439 break; 440 case '4': 441 family = PF_INET; 442 break; 443 case '6': 444 family = PF_INET6; 445 break; 446 case 'A': 447 A_flag = 1; 448 break; 449 case 'a': 450 a_flag = 1; 451 break; 452 case 'B': 453 if (parsesize(optarg, &B_size) == -1) 454 errx(1, "invalid buffer size"); 455 break; 456 case 'b': 457 warnx("warning: the -b option is deprecated"); 458 b_flag = 1; 459 break; 460 case 'd': 461 d_flag = 1; 462 break; 463 case 'F': 464 F_flag = 1; 465 break; 466 case 'f': 467 f_filename = optarg; 468 break; 469 case 'H': 470 H_flag = 1; 471 break; 472 case 'h': 473 h_hostname = optarg; 474 break; 475 case 'l': 476 l_flag = 1; 477 break; 478 case 'o': 479 o_flag = 1; 480 o_filename = optarg; 481 break; 482 case 'M': 483 case 'm': 484 m_flag = 1; 485 break; 486 case 'n': 487 m_flag = 0; 488 break; 489 case 'P': 490 case 'p': 491 p_flag = 1; 492 break; 493 case 'q': 494 v_level = 0; 495 break; 496 case 'R': 497 R_flag = 1; 498 break; 499 case 'r': 500 r_flag = 1; 501 break; 502 case 'S': 503 if (parseoff(optarg, &S_size) == -1) 504 errx(1, "invalid size"); 505 break; 506 case 's': 507 s_flag = 1; 508 break; 509 case 'T': 510 if (parseint(optarg, &T_secs) == -1) 511 errx(1, "invalid timeout"); 512 break; 513 case 't': 514 t_flag = 1; 515 warnx("warning: the -t option is deprecated"); 516 break; 517 case 'v': 518 v_level++; 519 break; 520 case 'w': 521 a_flag = 1; 522 if (parseint(optarg, &w_secs) == -1) 523 errx(1, "invalid delay"); 524 break; 525 default: 526 usage(); 527 exit(EX_USAGE); 528 } 529 530 argc -= optind; 531 argv += optind; 532 533 if (h_hostname || f_filename) { 534 if (!h_hostname || !f_filename || argc) { 535 usage(); 536 exit(EX_USAGE); 537 } 538 /* XXX this is a hack. */ 539 if (strcspn(h_hostname, "@:/") != strlen(h_hostname)) 540 errx(1, "invalid hostname"); 541 if (asprintf(argv, "ftp://%s/%s", h_hostname, f_filename) == -1) 542 errx(1, strerror(ENOMEM)); 543 argc++; 544 } 545 546 if (!argc) { 547 usage(); 548 exit(EX_USAGE); 549 } 550 551 /* allocate buffer */ 552 if (B_size < MINBUFSIZE) 553 B_size = MINBUFSIZE; 554 if ((buf = malloc(B_size)) == NULL) 555 errx(1, strerror(ENOMEM)); 556 557 /* timeout handling */ 558 signal(SIGALRM, sig_handler); 559 if ((s = getenv("FTP_TIMEOUT")) != NULL) { 560 if (parseint(s, &ftp_timeout) == -1) { 561 warnx("FTP_TIMEOUT is not a positive integer"); 562 ftp_timeout = 0; 563 } 564 } 565 if ((s = getenv("HTTP_TIMEOUT")) != NULL) { 566 if (parseint(s, &http_timeout) == -1) { 567 warnx("HTTP_TIMEOUT is not a positive integer"); 568 http_timeout = 0; 569 } 570 } 571 572 /* output file */ 573 if (o_flag) { 574 if (strcmp(o_filename, "-") == 0) { 575 o_stdout = 1; 576 } else if (stat(o_filename, &sb) == -1) { 577 if (errno == ENOENT) { 578 if (argc > 1) 579 errx(EX_USAGE, "%s is not a directory", o_filename); 580 } else { 581 err(EX_IOERR, "%s", o_filename); 582 } 583 } else { 584 if (sb.st_mode & S_IFDIR) 585 o_directory = 1; 586 } 587 } 588 589 /* check if output is to a tty (for progress report) */ 590 v_tty = isatty(STDOUT_FILENO); 591 r = 0; 592 593 while (argc) { 594 if ((p = strrchr(*argv, '/')) == NULL) 595 p = *argv; 596 else 597 p++; 598 599 if (!*p) 600 p = "fetch.out"; 601 602 fetchLastErrCode = 0; 603 604 if (o_flag) { 605 if (o_stdout) { 606 e = fetch(*argv, "-"); 607 } else if (o_directory) { 608 asprintf(&q, "%s/%s", o_filename, p); 609 e = fetch(*argv, q); 610 free(q); 611 } else { 612 e = fetch(*argv, o_filename); 613 } 614 } else { 615 e = fetch(*argv, p); 616 } 617 618 if (e == 0 && once_flag) 619 exit(0); 620 621 if (e) { 622 r = 1; 623 if ((fetchLastErrCode 624 && fetchLastErrCode != FETCH_UNAVAIL 625 && fetchLastErrCode != FETCH_MOVED 626 && fetchLastErrCode != FETCH_URL 627 && fetchLastErrCode != FETCH_RESOLV 628 && fetchLastErrCode != FETCH_UNKNOWN)) { 629 if (w_secs) { 630 if (v_level) 631 fprintf(stderr, "Waiting %d seconds before retrying\n", w_secs); 632 sleep(w_secs); 633 } 634 if (a_flag) 635 continue; 636 fprintf(stderr, "Skipping %s\n", *argv); 637 } 638 } 639 640 argc--, argv++; 641 } 642 643 exit(r); 644} 645