fetch.c revision 70275
1/*-
2 * Copyright (c) 2000 Dag-Erling Co�dan Sm�rgrav
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer
10 *    in this position and unchanged.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 * 3. The name of the author may not be used to endorse or promote products
15 *    derived from this software without specific prior written permission
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 *
28 *	$FreeBSD: head/usr.bin/fetch/fetch.c 70275 2000-12-22 18:23:19Z des $
29 */
30
31#include <sys/param.h>
32#include <sys/stat.h>
33#include <sys/socket.h>
34
35#include <ctype.h>
36#include <err.h>
37#include <errno.h>
38#include <signal.h>
39#include <stdio.h>
40#include <stdlib.h>
41#include <string.h>
42#include <sysexits.h>
43#include <unistd.h>
44
45#include <fetch.h>
46
47#define MINBUFSIZE	4096
48
49/* Option flags */
50int	 A_flag;	/*    -A: do not follow 302 redirects */
51int	 a_flag;	/*    -a: auto retry */
52size_t	 B_size;	/*    -B: buffer size */
53int	 b_flag;	/*!   -b: workaround TCP bug */
54char    *c_dirname;	/*    -c: remote directory */
55int	 d_flag;	/*    -d: direct connection */
56int	 F_flag;	/*    -F: restart without checking mtime  */
57char	*f_filename;	/*    -f: file to fetch */
58int	 H_flag;	/*    -H: use high port */
59char	*h_hostname;	/*    -h: host to fetch from */
60int	 l_flag;	/*    -l: link rather than copy file: URLs */
61int	 m_flag;	/* -[Mm]: mirror mode */
62int	 n_flag;	/*    -n: do not preserve modification time */
63int	 o_flag;	/*    -o: specify output file */
64int	 o_directory;	/*        output file is a directory */
65char	*o_filename;	/*        name of output file */
66int	 o_stdout;	/*        output file is stdout */
67int	 once_flag;	/*    -1: stop at first successful file */
68int	 p_flag;	/* -[Pp]: use passive FTP */
69int	 R_flag;	/*    -R: don't delete partially transferred files */
70int	 r_flag;	/*    -r: restart previously interrupted transfer */
71u_int	 T_secs = 0;	/*    -T: transfer timeout in seconds */
72int	 s_flag;        /*    -s: show size, don't fetch */
73off_t	 S_size;        /*    -S: require size to match */
74int	 t_flag;	/*!   -t: workaround TCP bug */
75int	 v_level = 1;	/*    -v: verbosity level */
76int	 v_tty;		/*        stdout is a tty */
77u_int	 w_secs;	/*    -w: retry delay */
78int	 family = PF_UNSPEC;	/* -[46]: address family to use */
79
80int	 sigalrm;	/* SIGALRM received */
81int	 sigint;	/* SIGINT received */
82
83u_int	 ftp_timeout;	/* default timeout for FTP transfers */
84u_int	 http_timeout;	/* default timeout for HTTP transfers */
85u_char	*buf;		/* transfer buffer */
86
87
88void
89sig_handler(int sig)
90{
91    switch (sig) {
92    case SIGALRM:
93	sigalrm = 1;
94	break;
95    case SIGINT:
96	sigint = 1;
97	break;
98    }
99}
100
101struct xferstat {
102    char		 name[40];
103    struct timeval	 start;
104    struct timeval	 end;
105    struct timeval	 last;
106    off_t		 size;
107    off_t		 offset;
108    off_t		 rcvd;
109};
110
111void
112stat_display(struct xferstat *xs, int force)
113{
114    struct timeval now;
115
116    if (!v_tty || !v_level)
117	return;
118
119    gettimeofday(&now, NULL);
120    if (!force && now.tv_sec <= xs->last.tv_sec)
121	return;
122    xs->last = now;
123
124    fprintf(stderr, "\rReceiving %s", xs->name);
125    if (xs->size == -1)
126	fprintf(stderr, ": %lld bytes", xs->rcvd);
127    else
128	fprintf(stderr, " (%lld bytes): %d%%", xs->size,
129		(int)((100.0 * xs->rcvd) / xs->size));
130}
131
132void
133stat_start(struct xferstat *xs, char *name, off_t size, off_t offset)
134{
135    snprintf(xs->name, sizeof xs->name, "%s", name);
136    gettimeofday(&xs->start, NULL);
137    xs->last.tv_sec = xs->last.tv_usec = 0;
138    xs->end = xs->last;
139    xs->size = size;
140    xs->offset = offset;
141    xs->rcvd = offset;
142    stat_display(xs, 1);
143}
144
145void
146stat_update(struct xferstat *xs, off_t rcvd, int force)
147{
148    xs->rcvd = rcvd;
149    stat_display(xs, 0);
150}
151
152void
153stat_end(struct xferstat *xs)
154{
155    double delta;
156    double bps;
157
158    if (!v_level)
159	return;
160
161    gettimeofday(&xs->end, NULL);
162
163    stat_display(xs, 1);
164    fputc('\n', stderr);
165    delta = (xs->end.tv_sec + (xs->end.tv_usec / 1.e6))
166	- (xs->start.tv_sec + (xs->start.tv_usec / 1.e6));
167    fprintf(stderr, "%lld bytes transferred in %.1f seconds ",
168	    xs->rcvd - xs->offset, delta);
169    bps = (xs->rcvd - xs->offset) / delta;
170    if (bps > 1024*1024)
171	fprintf(stderr, "(%.2f MBps)\n", bps / (1024*1024));
172    else if (bps > 1024)
173	fprintf(stderr, "(%.2f kBps)\n", bps / 1024);
174    else
175	fprintf(stderr, "(%.2f Bps)\n", bps);
176}
177
178int
179fetch(char *URL, char *path)
180{
181    struct url *url;
182    struct url_stat us;
183    struct stat sb;
184    struct xferstat xs;
185    FILE *f, *of;
186    size_t size;
187    off_t count;
188    char flags[8];
189    int n, r;
190    u_int timeout;
191
192    f = of = NULL;
193
194    /* parse URL */
195    if ((url = fetchParseURL(URL)) == NULL) {
196	warnx("%s: parse error", URL);
197	goto failure;
198    }
199
200    /* if no scheme was specified, take a guess */
201    if (!*url->scheme) {
202	if (!*url->host)
203	    strcpy(url->scheme, SCHEME_FILE);
204	else if (strncasecmp(url->host, "ftp.", 4))
205	    strcpy(url->scheme, SCHEME_FTP);
206	else if (strncasecmp(url->host, "www.", 4))
207	    strcpy(url->scheme, SCHEME_HTTP);
208    }
209
210    timeout = 0;
211    *flags = 0;
212    count = 0;
213
214    /* common flags */
215    if (v_level > 1)
216	strcat(flags, "v");
217    switch (family) {
218    case PF_INET:
219	strcat(flags, "4");
220	break;
221    case PF_INET6:
222	strcat(flags, "6");
223	break;
224    }
225
226    /* FTP specific flags */
227    if (strcmp(url->scheme, "ftp") == 0) {
228	if (p_flag)
229	    strcat(flags, "p");
230	if (d_flag)
231	    strcat(flags, "d");
232	if (H_flag)
233	    strcat(flags, "h");
234	timeout = T_secs ? T_secs : ftp_timeout;
235    }
236
237    /* HTTP specific flags */
238    if (strcmp(url->scheme, "http") == 0) {
239	if (d_flag)
240	    strcat(flags, "d");
241	if (A_flag)
242	    strcat(flags, "A");
243	timeout = T_secs ? T_secs : http_timeout;
244    }
245
246    /* set the protocol timeout. */
247    fetchTimeout = timeout;
248
249    /* just print size */
250    if (s_flag) {
251	if (fetchStat(url, &us, flags) == -1)
252	    goto failure;
253	if (us.size == -1)
254	    printf("Unknown\n");
255	else
256	    printf("%lld\n", us.size);
257	goto success;
258    }
259
260    /*
261     * If the -r flag was specified, we have to compare the local and
262     * remote files, so we should really do a fetchStat() first, but I
263     * know of at least one HTTP server that only sends the content
264     * size in response to GET requests, and leaves it out of replies
265     * to HEAD requests. Also, in the (frequent) case that the local
266     * and remote files match but the local file is truncated, we have
267     * sufficient information *before* the compare to issue a correct
268     * request. Therefore, we always issue a GET request as if we were
269     * sure the local file was a truncated copy of the remote file; we
270     * can drop the connection later if we change our minds.
271     */
272    if ((r_flag  || m_flag) && !o_stdout && stat(path, &sb) != -1) {
273	if (r_flag)
274	    url->offset = sb.st_size;
275    } else {
276	sb.st_size = -1;
277    }
278
279    /* start the transfer */
280    if ((f = fetchXGet(url, &us, flags)) == NULL) {
281	warnx("%s: %s", path, fetchLastErrString);
282	goto failure;
283    }
284    if (sigint)
285	goto signal;
286
287    /* check that size is as expected */
288    if (S_size) {
289	if (us.size == -1) {
290	    warnx("%s: size unknown", path);
291	    goto failure;
292	} else if (us.size != S_size) {
293	    warnx("%s: size mismatch: expected %lld, actual %lld",
294		  path, S_size, us.size);
295	    goto failure;
296	}
297    }
298
299    /* symlink instead of copy */
300    if (l_flag && strcmp(url->scheme, "file") == 0 && !o_stdout) {
301	if (symlink(url->doc, path) == -1) {
302	    warn("%s: symlink()", path);
303	    goto failure;
304	}
305	goto success;
306    }
307
308    if (us.size == -1)
309	warnx("%s: size of remote file is not known", path);
310    if (v_level > 1) {
311	if (sb.st_size != -1)
312	    fprintf(stderr, "local size / mtime: %lld / %ld\n",
313		    sb.st_size, sb.st_mtime);
314	if (us.size != -1)
315	    fprintf(stderr, "remote size / mtime: %lld / %ld\n",
316		    us.size, us.mtime);
317    }
318
319    /* open output file */
320    if (o_stdout) {
321	/* output to stdout */
322	of = stdout;
323    } else if (sb.st_size != -1) {
324	/* resume mode, local file exists */
325	if (!F_flag && us.mtime && sb.st_mtime != us.mtime) {
326	    /* no match! have to refetch */
327	    fclose(f);
328	    /* if precious, warn the user and give up */
329	    if (R_flag) {
330		warnx("%s: local modification time does not match remote",
331		      path);
332		goto failure_keep;
333	    }
334	    url->offset = 0;
335	    if ((f = fetchXGet(url, &us, flags)) == NULL) {
336		warnx("%s: %s", path, fetchLastErrString);
337		goto failure;
338	    }
339	    if (sigint)
340		goto signal;
341	} else {
342	    if (us.size == sb.st_size)
343		/* nothing to do */
344		goto success;
345	    if (sb.st_size > us.size) {
346		/* local file too long! */
347		warnx("%s: local file (%lld bytes) is longer "
348		      "than remote file (%lld bytes)",
349		      path, sb.st_size, us.size);
350		goto failure;
351	    }
352	    /* we got through, open local file and seek to offset */
353	    /*
354	     * XXX there's a race condition here - the file we open is not
355	     * necessarily the same as the one we stat()'ed earlier...
356	     */
357	    if ((of = fopen(path, "a")) == NULL) {
358		warn("%s: fopen()", path);
359		goto failure;
360	    }
361	    if (fseek(of, url->offset, SEEK_SET) == -1) {
362		warn("%s: fseek()", path);
363		goto failure;
364	    }
365	}
366    }
367    if (m_flag && sb.st_size != -1) {
368	/* mirror mode, local file exists */
369	if (sb.st_size == us.size && sb.st_mtime == us.mtime)
370	    goto success;
371    }
372    if (!of) {
373	/*
374	 * We don't yet have an output file; either this is a vanilla
375	 * run with no special flags, or the local and remote files
376	 * didn't match.
377	 */
378	if ((of = fopen(path, "w")) == NULL) {
379	    warn("%s: open()", path);
380	    goto failure;
381	}
382    }
383    count = url->offset;
384
385    /* start the counter */
386    stat_start(&xs, path, us.size, count);
387
388    sigint = sigalrm = 0;
389
390    /* suck in the data */
391    for (n = 0; !sigint && !sigalrm; ++n) {
392	if (us.size != -1 && us.size - count < B_size)
393	    size = us.size - count;
394	else
395	    size = B_size;
396	if (timeout)
397	    alarm(timeout);
398	if ((size = fread(buf, 1, size, f)) <= 0)
399	    break;
400	stat_update(&xs, count += size, 0);
401	if (fwrite(buf, size, 1, of) != 1)
402	    break;
403    }
404
405    if (timeout)
406	alarm(0);
407
408    stat_end(&xs);
409
410    /* set mtime of local file */
411    if (!n_flag && us.mtime && !o_stdout
412	&& (stat(path, &sb) != -1) && sb.st_mode & S_IFREG) {
413	struct timeval tv[2];
414
415	fflush(of);
416	tv[0].tv_sec = (long)(us.atime ? us.atime : us.mtime);
417	tv[1].tv_sec = (long)us.mtime;
418	tv[0].tv_usec = tv[1].tv_usec = 0;
419	if (utimes(path, tv))
420	    warn("%s: utimes()", path);
421    }
422
423    /* timed out or interrupted? */
424 signal:
425    if (sigalrm)
426	warnx("transfer timed out");
427    if (sigint) {
428	warnx("transfer interrupted");
429	goto failure;
430    }
431
432    if (!sigalrm) {
433	/* check the status of our files */
434	if (ferror(f))
435	    warn("%s", URL);
436	if (ferror(of))
437	    warn("%s", path);
438	if (ferror(f) || ferror(of))
439	    goto failure;
440    }
441
442    /* did the transfer complete normally? */
443    if (us.size != -1 && count < us.size) {
444	warnx("%s appears to be truncated: %lld/%lld bytes",
445	      path, count, us.size);
446	goto failure_keep;
447    }
448
449    /*
450     * If the transfer timed out and we didn't know how much to
451     * expect, assume the worst (i.e. we didn't get all of it)
452     */
453    if (sigalrm && us.size == -1) {
454	warnx("%s may be truncated", path);
455	goto failure_keep;
456    }
457
458 success:
459    r = 0;
460    goto done;
461 failure:
462    if (of && of != stdout && !R_flag && !r_flag)
463	if (stat(path, &sb) != -1 && (sb.st_mode & S_IFREG))
464	    unlink(path);
465 failure_keep:
466    r = -1;
467    goto done;
468 done:
469    if (f)
470	fclose(f);
471    if (of && of != stdout)
472	fclose(of);
473    if (url)
474	fetchFreeURL(url);
475    return r;
476}
477
478void
479usage(void)
480{
481    /* XXX badly out of synch */
482    fprintf(stderr,
483	    "Usage: fetch [-1AFHMPRabdlmnpqrstv] [-o outputfile] [-S bytes]\n"
484	    "             [-B bytes] [-T seconds] [-w seconds]\n"
485	    "             [-f file -h host [-c dir] | URL ...]\n"
486	);
487}
488
489
490#define PARSENUM(NAME, TYPE)		\
491int					\
492NAME(char *s, TYPE *v)			\
493{					\
494    *v = 0;				\
495    for (*v = 0; *s; s++)		\
496	if (isdigit(*s))		\
497	    *v = *v * 10 + *s - '0';	\
498	else				\
499	    return -1;			\
500    return 0;				\
501}
502
503PARSENUM(parseint, u_int)
504PARSENUM(parsesize, size_t)
505PARSENUM(parseoff, off_t)
506
507int
508main(int argc, char *argv[])
509{
510    struct stat sb;
511    struct sigaction sa;
512    char *p, *q, *s;
513    int c, e, r;
514
515    while ((c = getopt(argc, argv,
516		       "146AaB:bc:dFf:h:lHMmnPpo:qRrS:sT:tvw:")) != EOF)
517	switch (c) {
518	case '1':
519	    once_flag = 1;
520	    break;
521	case '4':
522	    family = PF_INET;
523	    break;
524	case '6':
525	    family = PF_INET6;
526	    break;
527	case 'A':
528	    A_flag = 1;
529	    break;
530	case 'a':
531	    a_flag = 1;
532	    break;
533	case 'B':
534	    if (parsesize(optarg, &B_size) == -1)
535		errx(1, "invalid buffer size");
536	    break;
537	case 'b':
538	    warnx("warning: the -b option is deprecated");
539	    b_flag = 1;
540	    break;
541	case 'c':
542	    c_dirname = optarg;
543	    break;
544	case 'd':
545	    d_flag = 1;
546	    break;
547	case 'F':
548	    F_flag = 1;
549	    break;
550	case 'f':
551	    f_filename = optarg;
552	    break;
553	case 'H':
554	    H_flag = 1;
555	    break;
556	case 'h':
557	    h_hostname = optarg;
558	    break;
559	case 'l':
560	    l_flag = 1;
561	    break;
562	case 'o':
563	    o_flag = 1;
564	    o_filename = optarg;
565	    break;
566	case 'M':
567	case 'm':
568	    if (r_flag)
569		errx(1, "the -m and -r flags are mutually exclusive");
570	    m_flag = 1;
571	    break;
572	case 'n':
573	    n_flag = 1;
574	    break;
575	case 'P':
576	case 'p':
577	    p_flag = 1;
578	    break;
579	case 'q':
580	    v_level = 0;
581	    break;
582	case 'R':
583	    R_flag = 1;
584	    break;
585	case 'r':
586	    if (m_flag)
587		errx(1, "the -m and -r flags are mutually exclusive");
588	    r_flag = 1;
589	    break;
590	case 'S':
591	    if (parseoff(optarg, &S_size) == -1)
592		errx(1, "invalid size");
593	    break;
594	case 's':
595	    s_flag = 1;
596	    break;
597	case 'T':
598	    if (parseint(optarg, &T_secs) == -1)
599		errx(1, "invalid timeout");
600	    break;
601	case 't':
602	    t_flag = 1;
603	    warnx("warning: the -t option is deprecated");
604	    break;
605	case 'v':
606	    v_level++;
607	    break;
608	case 'w':
609	    a_flag = 1;
610	    if (parseint(optarg, &w_secs) == -1)
611		errx(1, "invalid delay");
612	    break;
613	default:
614	    usage();
615	    exit(EX_USAGE);
616	}
617
618    argc -= optind;
619    argv += optind;
620
621    if (h_hostname || f_filename || c_dirname) {
622	if (!h_hostname || !f_filename || argc) {
623	    usage();
624	    exit(EX_USAGE);
625	}
626	/* XXX this is a hack. */
627	if (strcspn(h_hostname, "@:/") != strlen(h_hostname))
628	    errx(1, "invalid hostname");
629	if (asprintf(argv, "ftp://%s/%s/%s", h_hostname,
630		     c_dirname ? c_dirname : "", f_filename) == -1)
631	    errx(1, "%s", strerror(ENOMEM));
632	argc++;
633    }
634
635    if (!argc) {
636	usage();
637	exit(EX_USAGE);
638    }
639
640    /* allocate buffer */
641    if (B_size < MINBUFSIZE)
642	B_size = MINBUFSIZE;
643    if ((buf = malloc(B_size)) == NULL)
644	errx(1, "%s", strerror(ENOMEM));
645
646    /* timeouts */
647    if ((s = getenv("FTP_TIMEOUT")) != NULL) {
648	if (parseint(s, &ftp_timeout) == -1) {
649	    warnx("FTP_TIMEOUT is not a positive integer");
650	    ftp_timeout = 0;
651	}
652    }
653    if ((s = getenv("HTTP_TIMEOUT")) != NULL) {
654	if (parseint(s, &http_timeout) == -1) {
655	    warnx("HTTP_TIMEOUT is not a positive integer");
656	    http_timeout = 0;
657	}
658    }
659
660    /* signal handling */
661    sa.sa_flags = 0;
662    sa.sa_handler = sig_handler;
663    sigemptyset(&sa.sa_mask);
664    sigaction(SIGALRM, &sa, NULL);
665    sa.sa_flags = SA_RESETHAND;
666    sigaction(SIGINT, &sa, NULL);
667    fetchRestartCalls = 0;
668
669    /* output file */
670    if (o_flag) {
671	if (strcmp(o_filename, "-") == 0) {
672	    o_stdout = 1;
673	} else if (stat(o_filename, &sb) == -1) {
674	    if (errno == ENOENT) {
675		if (argc > 1)
676		    errx(EX_USAGE, "%s is not a directory", o_filename);
677	    } else {
678		err(EX_IOERR, "%s", o_filename);
679	    }
680	} else {
681	    if (sb.st_mode & S_IFDIR)
682		o_directory = 1;
683	}
684    }
685
686    /* check if output is to a tty (for progress report) */
687    v_tty = isatty(STDERR_FILENO);
688    r = 0;
689
690    while (argc) {
691	if ((p = strrchr(*argv, '/')) == NULL)
692	    p = *argv;
693	else
694	    p++;
695
696	if (!*p)
697	    p = "fetch.out";
698
699	fetchLastErrCode = 0;
700
701	if (o_flag) {
702	    if (o_stdout) {
703		e = fetch(*argv, "-");
704	    } else if (o_directory) {
705		asprintf(&q, "%s/%s", o_filename, p);
706		e = fetch(*argv, q);
707		free(q);
708	    } else {
709		e = fetch(*argv, o_filename);
710	    }
711	} else {
712	    e = fetch(*argv, p);
713	}
714
715	if (sigint)
716	    kill(getpid(), SIGINT);
717
718	if (e == 0 && once_flag)
719	    exit(0);
720
721	if (e) {
722	    r = 1;
723	    if ((fetchLastErrCode
724		 && fetchLastErrCode != FETCH_UNAVAIL
725		 && fetchLastErrCode != FETCH_MOVED
726		 && fetchLastErrCode != FETCH_URL
727		 && fetchLastErrCode != FETCH_RESOLV
728		 && fetchLastErrCode != FETCH_UNKNOWN)) {
729		if (w_secs) {
730		    if (v_level)
731			fprintf(stderr, "Waiting %d seconds before retrying\n",
732				w_secs);
733		    sleep(w_secs);
734		}
735		if (a_flag)
736		    continue;
737	    }
738	}
739
740	argc--, argv++;
741    }
742
743    exit(r);
744}
745