fetch.c revision 73937
1/*-
2 * Copyright (c) 2000 Dag-Erling Co�dan Sm�rgrav
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer
10 *    in this position and unchanged.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 * 3. The name of the author may not be used to endorse or promote products
15 *    derived from this software without specific prior written permission
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 *
28 *	$FreeBSD: head/usr.bin/fetch/fetch.c 73937 2001-03-07 05:33:50Z des $
29 */
30
31#include <sys/param.h>
32#include <sys/stat.h>
33#include <sys/socket.h>
34
35#include <ctype.h>
36#include <err.h>
37#include <errno.h>
38#include <signal.h>
39#include <stdio.h>
40#include <stdlib.h>
41#include <string.h>
42#include <sysexits.h>
43#include <unistd.h>
44
45#include <fetch.h>
46
47#define MINBUFSIZE	4096
48
49/* Option flags */
50int	 A_flag;	/*    -A: do not follow 302 redirects */
51int	 a_flag;	/*    -a: auto retry */
52size_t	 B_size;	/*    -B: buffer size */
53int	 b_flag;	/*!   -b: workaround TCP bug */
54char    *c_dirname;	/*    -c: remote directory */
55int	 d_flag;	/*    -d: direct connection */
56int	 F_flag;	/*    -F: restart without checking mtime  */
57char	*f_filename;	/*    -f: file to fetch */
58int	 H_flag;	/*    -H: use high port */
59char	*h_hostname;	/*    -h: host to fetch from */
60int	 l_flag;	/*    -l: link rather than copy file: URLs */
61int	 m_flag;	/* -[Mm]: mirror mode */
62int	 n_flag;	/*    -n: do not preserve modification time */
63int	 o_flag;	/*    -o: specify output file */
64int	 o_directory;	/*        output file is a directory */
65char	*o_filename;	/*        name of output file */
66int	 o_stdout;	/*        output file is stdout */
67int	 once_flag;	/*    -1: stop at first successful file */
68int	 p_flag;	/* -[Pp]: use passive FTP */
69int	 R_flag;	/*    -R: don't delete partially transferred files */
70int	 r_flag;	/*    -r: restart previously interrupted transfer */
71u_int	 T_secs = 0;	/*    -T: transfer timeout in seconds */
72int	 s_flag;        /*    -s: show size, don't fetch */
73off_t	 S_size;        /*    -S: require size to match */
74int	 t_flag;	/*!   -t: workaround TCP bug */
75int	 v_level = 1;	/*    -v: verbosity level */
76int	 v_tty;		/*        stdout is a tty */
77u_int	 w_secs;	/*    -w: retry delay */
78int	 family = PF_UNSPEC;	/* -[46]: address family to use */
79
80int	 sigalrm;	/* SIGALRM received */
81int	 siginfo;	/* SIGINFO received */
82int	 sigint;	/* SIGINT received */
83
84u_int	 ftp_timeout;	/* default timeout for FTP transfers */
85u_int	 http_timeout;	/* default timeout for HTTP transfers */
86u_char	*buf;		/* transfer buffer */
87
88
89void
90sig_handler(int sig)
91{
92    switch (sig) {
93    case SIGALRM:
94	sigalrm = 1;
95	break;
96    case SIGINFO:
97	siginfo = 1;
98	break;
99    case SIGINT:
100	sigint = 1;
101	break;
102    }
103}
104
105struct xferstat {
106    char		 name[40];
107    struct timeval	 start;
108    struct timeval	 end;
109    struct timeval	 last;
110    off_t		 size;
111    off_t		 offset;
112    off_t		 rcvd;
113};
114
115void
116stat_display(struct xferstat *xs, int force)
117{
118    struct timeval now;
119
120    if (!v_tty || !v_level)
121	return;
122
123    gettimeofday(&now, NULL);
124    if (!force && now.tv_sec <= xs->last.tv_sec)
125	return;
126    xs->last = now;
127
128    fprintf(stderr, "\rReceiving %s", xs->name);
129    if (xs->size == -1)
130	fprintf(stderr, ": %lld bytes", xs->rcvd);
131    else
132	fprintf(stderr, " (%lld bytes): %d%%", xs->size,
133		(int)((100.0 * xs->rcvd) / xs->size));
134}
135
136void
137stat_start(struct xferstat *xs, char *name, off_t size, off_t offset)
138{
139    snprintf(xs->name, sizeof xs->name, "%s", name);
140    gettimeofday(&xs->start, NULL);
141    xs->last.tv_sec = xs->last.tv_usec = 0;
142    xs->end = xs->last;
143    xs->size = size;
144    xs->offset = offset;
145    xs->rcvd = offset;
146    stat_display(xs, 1);
147}
148
149void
150stat_update(struct xferstat *xs, off_t rcvd, int force)
151{
152    xs->rcvd = rcvd;
153    stat_display(xs, 0);
154}
155
156void
157stat_end(struct xferstat *xs)
158{
159    double delta;
160    double bps;
161
162    if (!v_level)
163	return;
164
165    gettimeofday(&xs->end, NULL);
166
167    stat_display(xs, 1);
168    fputc('\n', stderr);
169    delta = (xs->end.tv_sec + (xs->end.tv_usec / 1.e6))
170	- (xs->start.tv_sec + (xs->start.tv_usec / 1.e6));
171    fprintf(stderr, "%lld bytes transferred in %.1f seconds ",
172	    xs->rcvd - xs->offset, delta);
173    bps = (xs->rcvd - xs->offset) / delta;
174    if (bps > 1024*1024)
175	fprintf(stderr, "(%.2f MBps)\n", bps / (1024*1024));
176    else if (bps > 1024)
177	fprintf(stderr, "(%.2f kBps)\n", bps / 1024);
178    else
179	fprintf(stderr, "(%.2f Bps)\n", bps);
180}
181
182int
183fetch(char *URL, char *path)
184{
185    struct url *url;
186    struct url_stat us;
187    struct stat sb;
188    struct xferstat xs;
189    FILE *f, *of;
190    size_t size, wr;
191    off_t count;
192    char flags[8];
193    int n, r;
194    u_int timeout;
195    u_char *ptr;
196
197    f = of = NULL;
198
199    /* parse URL */
200    if ((url = fetchParseURL(URL)) == NULL) {
201	warnx("%s: parse error", URL);
202	goto failure;
203    }
204
205    /* if no scheme was specified, take a guess */
206    if (!*url->scheme) {
207	if (!*url->host)
208	    strcpy(url->scheme, SCHEME_FILE);
209	else if (strncasecmp(url->host, "ftp.", 4))
210	    strcpy(url->scheme, SCHEME_FTP);
211	else if (strncasecmp(url->host, "www.", 4))
212	    strcpy(url->scheme, SCHEME_HTTP);
213    }
214
215    timeout = 0;
216    *flags = 0;
217    count = 0;
218
219    /* common flags */
220    if (v_level > 1)
221	strcat(flags, "v");
222    switch (family) {
223    case PF_INET:
224	strcat(flags, "4");
225	break;
226    case PF_INET6:
227	strcat(flags, "6");
228	break;
229    }
230
231    /* FTP specific flags */
232    if (strcmp(url->scheme, "ftp") == 0) {
233	if (p_flag)
234	    strcat(flags, "p");
235	if (d_flag)
236	    strcat(flags, "d");
237	if (H_flag)
238	    strcat(flags, "h");
239	timeout = T_secs ? T_secs : ftp_timeout;
240    }
241
242    /* HTTP specific flags */
243    if (strcmp(url->scheme, "http") == 0) {
244	if (d_flag)
245	    strcat(flags, "d");
246	if (A_flag)
247	    strcat(flags, "A");
248	timeout = T_secs ? T_secs : http_timeout;
249    }
250
251    /* set the protocol timeout. */
252    fetchTimeout = timeout;
253
254    /* just print size */
255    if (s_flag) {
256	if (fetchStat(url, &us, flags) == -1)
257	    goto failure;
258	if (us.size == -1)
259	    printf("Unknown\n");
260	else
261	    printf("%lld\n", us.size);
262	goto success;
263    }
264
265    /*
266     * If the -r flag was specified, we have to compare the local and
267     * remote files, so we should really do a fetchStat() first, but I
268     * know of at least one HTTP server that only sends the content
269     * size in response to GET requests, and leaves it out of replies
270     * to HEAD requests. Also, in the (frequent) case that the local
271     * and remote files match but the local file is truncated, we have
272     * sufficient information *before* the compare to issue a correct
273     * request. Therefore, we always issue a GET request as if we were
274     * sure the local file was a truncated copy of the remote file; we
275     * can drop the connection later if we change our minds.
276     */
277    if ((r_flag  || m_flag) && !o_stdout && stat(path, &sb) != -1) {
278	if (r_flag)
279	    url->offset = sb.st_size;
280    } else {
281	sb.st_size = -1;
282    }
283
284    /* start the transfer */
285    if ((f = fetchXGet(url, &us, flags)) == NULL) {
286	warnx("%s: %s", path, fetchLastErrString);
287	goto failure;
288    }
289    if (sigint)
290	goto signal;
291
292    /* check that size is as expected */
293    if (S_size) {
294	if (us.size == -1) {
295	    warnx("%s: size unknown", path);
296	    goto failure;
297	} else if (us.size != S_size) {
298	    warnx("%s: size mismatch: expected %lld, actual %lld",
299		  path, S_size, us.size);
300	    goto failure;
301	}
302    }
303
304    /* symlink instead of copy */
305    if (l_flag && strcmp(url->scheme, "file") == 0 && !o_stdout) {
306	if (symlink(url->doc, path) == -1) {
307	    warn("%s: symlink()", path);
308	    goto failure;
309	}
310	goto success;
311    }
312
313    if (us.size == -1)
314	warnx("%s: size of remote file is not known", path);
315    if (v_level > 1) {
316	if (sb.st_size != -1)
317	    fprintf(stderr, "local size / mtime: %lld / %ld\n",
318		    sb.st_size, sb.st_mtime);
319	if (us.size != -1)
320	    fprintf(stderr, "remote size / mtime: %lld / %ld\n",
321		    us.size, us.mtime);
322    }
323
324    /* open output file */
325    if (o_stdout) {
326	/* output to stdout */
327	of = stdout;
328    } else if (sb.st_size != -1) {
329	/* resume mode, local file exists */
330	if (!F_flag && us.mtime && sb.st_mtime != us.mtime) {
331	    /* no match! have to refetch */
332	    fclose(f);
333	    /* if precious, warn the user and give up */
334	    if (R_flag) {
335		warnx("%s: local modification time does not match remote",
336		      path);
337		goto failure_keep;
338	    }
339	    url->offset = 0;
340	    if ((f = fetchXGet(url, &us, flags)) == NULL) {
341		warnx("%s: %s", path, fetchLastErrString);
342		goto failure;
343	    }
344	    if (sigint)
345		goto signal;
346	} else {
347	    if (us.size == sb.st_size)
348		/* nothing to do */
349		goto success;
350	    if (sb.st_size > us.size) {
351		/* local file too long! */
352		warnx("%s: local file (%lld bytes) is longer "
353		      "than remote file (%lld bytes)",
354		      path, sb.st_size, us.size);
355		goto failure;
356	    }
357	    /* we got through, open local file and seek to offset */
358	    /*
359	     * XXX there's a race condition here - the file we open is not
360	     * necessarily the same as the one we stat()'ed earlier...
361	     */
362	    if ((of = fopen(path, "a")) == NULL) {
363		warn("%s: fopen()", path);
364		goto failure;
365	    }
366	    if (fseek(of, url->offset, SEEK_SET) == -1) {
367		warn("%s: fseek()", path);
368		goto failure;
369	    }
370	}
371    }
372    if (m_flag && sb.st_size != -1) {
373	/* mirror mode, local file exists */
374	if (sb.st_size == us.size && sb.st_mtime == us.mtime)
375	    goto success;
376    }
377    if (!of) {
378	/*
379	 * We don't yet have an output file; either this is a vanilla
380	 * run with no special flags, or the local and remote files
381	 * didn't match.
382	 */
383	if ((of = fopen(path, "w")) == NULL) {
384	    warn("%s: open()", path);
385	    goto failure;
386	}
387    }
388    count = url->offset;
389
390    /* start the counter */
391    stat_start(&xs, path, us.size, count);
392
393    sigalrm = siginfo = sigint = 0;
394
395    /* suck in the data */
396    signal(SIGINFO, sig_handler);
397    for (n = 0; !sigint && !sigalrm; ++n) {
398	if (us.size != -1 && us.size - count < B_size)
399	    size = us.size - count;
400	else
401	    size = B_size;
402	if (timeout)
403	    alarm(timeout);
404	if ((size = fread(buf, 1, size, f)) == 0) {
405	    if (ferror(f) && errno == EINTR && !sigalrm && !sigint)
406		clearerr(f);
407	    else
408		break;
409	}
410	if (timeout)
411	    alarm(0);
412	if (siginfo) {
413	    stat_end(&xs);
414	    siginfo = 0;
415	}
416	stat_update(&xs, count += size, 0);
417	for (ptr = buf; size > 0; ptr += wr, size -= wr)
418	    if ((wr = fwrite(ptr, 1, size, of)) < size) {
419		if (ferror(of) && errno == EINTR && !sigalrm && !sigint)
420		    clearerr(of);
421		else
422		    break;
423	    }
424	if (size != 0)
425	    break;
426    }
427    signal(SIGINFO, SIG_DFL);
428
429    if (timeout)
430	alarm(0);
431
432    stat_end(&xs);
433
434    /* set mtime of local file */
435    if (!n_flag && us.mtime && !o_stdout
436	&& (stat(path, &sb) != -1) && sb.st_mode & S_IFREG) {
437	struct timeval tv[2];
438
439	fflush(of);
440	tv[0].tv_sec = (long)(us.atime ? us.atime : us.mtime);
441	tv[1].tv_sec = (long)us.mtime;
442	tv[0].tv_usec = tv[1].tv_usec = 0;
443	if (utimes(path, tv))
444	    warn("%s: utimes()", path);
445    }
446
447    /* timed out or interrupted? */
448 signal:
449    if (sigalrm)
450	warnx("transfer timed out");
451    if (sigint) {
452	warnx("transfer interrupted");
453	goto failure;
454    }
455
456    if (!sigalrm) {
457	/* check the status of our files */
458	if (ferror(f))
459	    warn("%s", URL);
460	if (ferror(of))
461	    warn("%s", path);
462	if (ferror(f) || ferror(of))
463	    goto failure;
464    }
465
466    /* did the transfer complete normally? */
467    if (us.size != -1 && count < us.size) {
468	warnx("%s appears to be truncated: %lld/%lld bytes",
469	      path, count, us.size);
470	goto failure_keep;
471    }
472
473    /*
474     * If the transfer timed out and we didn't know how much to
475     * expect, assume the worst (i.e. we didn't get all of it)
476     */
477    if (sigalrm && us.size == -1) {
478	warnx("%s may be truncated", path);
479	goto failure_keep;
480    }
481
482 success:
483    r = 0;
484    goto done;
485 failure:
486    if (of && of != stdout && !R_flag && !r_flag)
487	if (stat(path, &sb) != -1 && (sb.st_mode & S_IFREG))
488	    unlink(path);
489 failure_keep:
490    r = -1;
491    goto done;
492 done:
493    if (f)
494	fclose(f);
495    if (of && of != stdout)
496	fclose(of);
497    if (url)
498	fetchFreeURL(url);
499    return r;
500}
501
502void
503usage(void)
504{
505    /* XXX badly out of synch */
506    fprintf(stderr,
507	    "Usage: fetch [-1AFHMPRabdlmnpqrstv] [-o outputfile] [-S bytes]\n"
508	    "             [-B bytes] [-T seconds] [-w seconds]\n"
509	    "             [-f file -h host [-c dir] | URL ...]\n"
510	);
511}
512
513
514#define PARSENUM(NAME, TYPE)		\
515int					\
516NAME(char *s, TYPE *v)			\
517{					\
518    *v = 0;				\
519    for (*v = 0; *s; s++)		\
520	if (isdigit(*s))		\
521	    *v = *v * 10 + *s - '0';	\
522	else				\
523	    return -1;			\
524    return 0;				\
525}
526
527PARSENUM(parseint, u_int)
528PARSENUM(parsesize, size_t)
529PARSENUM(parseoff, off_t)
530
531int
532main(int argc, char *argv[])
533{
534    struct stat sb;
535    struct sigaction sa;
536    char *p, *q, *s;
537    int c, e, r;
538
539    while ((c = getopt(argc, argv,
540		       "146AaB:bc:dFf:h:lHMmnPpo:qRrS:sT:tvw:")) != EOF)
541	switch (c) {
542	case '1':
543	    once_flag = 1;
544	    break;
545	case '4':
546	    family = PF_INET;
547	    break;
548	case '6':
549	    family = PF_INET6;
550	    break;
551	case 'A':
552	    A_flag = 1;
553	    break;
554	case 'a':
555	    a_flag = 1;
556	    break;
557	case 'B':
558	    if (parsesize(optarg, &B_size) == -1)
559		errx(1, "invalid buffer size");
560	    break;
561	case 'b':
562	    warnx("warning: the -b option is deprecated");
563	    b_flag = 1;
564	    break;
565	case 'c':
566	    c_dirname = optarg;
567	    break;
568	case 'd':
569	    d_flag = 1;
570	    break;
571	case 'F':
572	    F_flag = 1;
573	    break;
574	case 'f':
575	    f_filename = optarg;
576	    break;
577	case 'H':
578	    H_flag = 1;
579	    break;
580	case 'h':
581	    h_hostname = optarg;
582	    break;
583	case 'l':
584	    l_flag = 1;
585	    break;
586	case 'o':
587	    o_flag = 1;
588	    o_filename = optarg;
589	    break;
590	case 'M':
591	case 'm':
592	    if (r_flag)
593		errx(1, "the -m and -r flags are mutually exclusive");
594	    m_flag = 1;
595	    break;
596	case 'n':
597	    n_flag = 1;
598	    break;
599	case 'P':
600	case 'p':
601	    p_flag = 1;
602	    break;
603	case 'q':
604	    v_level = 0;
605	    break;
606	case 'R':
607	    R_flag = 1;
608	    break;
609	case 'r':
610	    if (m_flag)
611		errx(1, "the -m and -r flags are mutually exclusive");
612	    r_flag = 1;
613	    break;
614	case 'S':
615	    if (parseoff(optarg, &S_size) == -1)
616		errx(1, "invalid size");
617	    break;
618	case 's':
619	    s_flag = 1;
620	    break;
621	case 'T':
622	    if (parseint(optarg, &T_secs) == -1)
623		errx(1, "invalid timeout");
624	    break;
625	case 't':
626	    t_flag = 1;
627	    warnx("warning: the -t option is deprecated");
628	    break;
629	case 'v':
630	    v_level++;
631	    break;
632	case 'w':
633	    a_flag = 1;
634	    if (parseint(optarg, &w_secs) == -1)
635		errx(1, "invalid delay");
636	    break;
637	default:
638	    usage();
639	    exit(EX_USAGE);
640	}
641
642    argc -= optind;
643    argv += optind;
644
645    if (h_hostname || f_filename || c_dirname) {
646	if (!h_hostname || !f_filename || argc) {
647	    usage();
648	    exit(EX_USAGE);
649	}
650	/* XXX this is a hack. */
651	if (strcspn(h_hostname, "@:/") != strlen(h_hostname))
652	    errx(1, "invalid hostname");
653	if (asprintf(argv, "ftp://%s/%s/%s", h_hostname,
654		     c_dirname ? c_dirname : "", f_filename) == -1)
655	    errx(1, "%s", strerror(ENOMEM));
656	argc++;
657    }
658
659    if (!argc) {
660	usage();
661	exit(EX_USAGE);
662    }
663
664    /* allocate buffer */
665    if (B_size < MINBUFSIZE)
666	B_size = MINBUFSIZE;
667    if ((buf = malloc(B_size)) == NULL)
668	errx(1, "%s", strerror(ENOMEM));
669
670    /* timeouts */
671    if ((s = getenv("FTP_TIMEOUT")) != NULL) {
672	if (parseint(s, &ftp_timeout) == -1) {
673	    warnx("FTP_TIMEOUT is not a positive integer");
674	    ftp_timeout = 0;
675	}
676    }
677    if ((s = getenv("HTTP_TIMEOUT")) != NULL) {
678	if (parseint(s, &http_timeout) == -1) {
679	    warnx("HTTP_TIMEOUT is not a positive integer");
680	    http_timeout = 0;
681	}
682    }
683
684    /* signal handling */
685    sa.sa_flags = 0;
686    sa.sa_handler = sig_handler;
687    sigemptyset(&sa.sa_mask);
688    sigaction(SIGALRM, &sa, NULL);
689    sa.sa_flags = SA_RESETHAND;
690    sigaction(SIGINT, &sa, NULL);
691    fetchRestartCalls = 0;
692
693    /* output file */
694    if (o_flag) {
695	if (strcmp(o_filename, "-") == 0) {
696	    o_stdout = 1;
697	} else if (stat(o_filename, &sb) == -1) {
698	    if (errno == ENOENT) {
699		if (argc > 1)
700		    errx(EX_USAGE, "%s is not a directory", o_filename);
701	    } else {
702		err(EX_IOERR, "%s", o_filename);
703	    }
704	} else {
705	    if (sb.st_mode & S_IFDIR)
706		o_directory = 1;
707	}
708    }
709
710    /* check if output is to a tty (for progress report) */
711    v_tty = isatty(STDERR_FILENO);
712    r = 0;
713
714    while (argc) {
715	if ((p = strrchr(*argv, '/')) == NULL)
716	    p = *argv;
717	else
718	    p++;
719
720	if (!*p)
721	    p = "fetch.out";
722
723	fetchLastErrCode = 0;
724
725	if (o_flag) {
726	    if (o_stdout) {
727		e = fetch(*argv, "-");
728	    } else if (o_directory) {
729		asprintf(&q, "%s/%s", o_filename, p);
730		e = fetch(*argv, q);
731		free(q);
732	    } else {
733		e = fetch(*argv, o_filename);
734	    }
735	} else {
736	    e = fetch(*argv, p);
737	}
738
739	if (sigint)
740	    kill(getpid(), SIGINT);
741
742	if (e == 0 && once_flag)
743	    exit(0);
744
745	if (e) {
746	    r = 1;
747	    if ((fetchLastErrCode
748		 && fetchLastErrCode != FETCH_UNAVAIL
749		 && fetchLastErrCode != FETCH_MOVED
750		 && fetchLastErrCode != FETCH_URL
751		 && fetchLastErrCode != FETCH_RESOLV
752		 && fetchLastErrCode != FETCH_UNKNOWN)) {
753		if (w_secs) {
754		    if (v_level)
755			fprintf(stderr, "Waiting %d seconds before retrying\n",
756				w_secs);
757		    sleep(w_secs);
758		}
759		if (a_flag)
760		    continue;
761	    }
762	}
763
764	argc--, argv++;
765    }
766
767    exit(r);
768}
769