fetch.c revision 63235
1/*-
2 * Copyright (c) 2000 Dag-Erling Co�dan Sm�rgrav
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer
10 *    in this position and unchanged.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 * 3. The name of the author may not be used to endorse or promote products
15 *    derived from this software without specific prior written permission
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 *
28 *	$FreeBSD: head/usr.bin/fetch/fetch.c 63235 2000-07-16 00:47:00Z des $
29 */
30
31#include <sys/param.h>
32#include <sys/stat.h>
33#include <sys/socket.h>
34
35#include <ctype.h>
36#include <err.h>
37#include <errno.h>
38#include <signal.h>
39#include <stdio.h>
40#include <stdlib.h>
41#include <string.h>
42#include <sysexits.h>
43#include <unistd.h>
44
45#include <fetch.h>
46
47#define MINBUFSIZE	4096
48
49/* Option flags */
50int	 A_flag;	/*    -A: do not follow 302 redirects */
51int	 a_flag;	/*    -a: auto retry */
52size_t	 B_size;	/*    -B: buffer size */
53int	 b_flag;	/*!   -b: workaround TCP bug */
54char    *c_dirname;	/*    -c: remote directory */
55int	 d_flag;	/*    -d: direct connection */
56int	 F_flag;	/*    -F: restart without checking mtime  */
57char	*f_filename;	/*    -f: file to fetch */
58int	 H_flag;	/*    -H: use high port */
59char	*h_hostname;	/*    -h: host to fetch from */
60int	 l_flag;	/*    -l: link rather than copy file: URLs */
61int	 m_flag;	/* -[Mm]: mirror mode */
62int	 n_flag;	/*    -n: do not preserve modification time */
63int	 o_flag;	/*    -o: specify output file */
64int	 o_directory;	/*        output file is a directory */
65char	*o_filename;	/*        name of output file */
66int	 o_stdout;	/*        output file is stdout */
67int	 once_flag;	/*    -1: stop at first successful file */
68int	 p_flag = 1;	/* -[Pp]: use passive FTP */
69int	 R_flag;	/*    -R: don't delete partially transferred files */
70int	 r_flag;	/*    -r: restart previously interrupted transfer */
71u_int	 T_secs = 0;	/*    -T: transfer timeout in seconds */
72int	 s_flag;        /*    -s: show size, don't fetch */
73off_t	 S_size;        /*    -S: require size to match */
74int	 t_flag;	/*!   -t: workaround TCP bug */
75int	 v_level = 1;	/*    -v: verbosity level */
76int	 v_tty;		/*        stdout is a tty */
77u_int	 w_secs;	/*    -w: retry delay */
78int	 family = PF_UNSPEC;	/* -[46]: address family to use */
79
80int	 sigalrm;	/* SIGALRM received */
81int	 sigint;	/* SIGINT received */
82
83u_int	 ftp_timeout;	/* default timeout for FTP transfers */
84u_int	 http_timeout;	/* default timeout for HTTP transfers */
85u_char	*buf;		/* transfer buffer */
86
87
88void
89sig_handler(int sig)
90{
91    switch (sig) {
92    case SIGALRM:
93	sigalrm = 1;
94	break;
95    case SIGINT:
96	sigint = 1;
97	break;
98    }
99}
100
101struct xferstat {
102    char		 name[40];
103    struct timeval	 start;
104    struct timeval	 end;
105    struct timeval	 last;
106    off_t		 size;
107    off_t		 offset;
108    off_t		 rcvd;
109};
110
111void
112stat_display(struct xferstat *xs, int force)
113{
114    struct timeval now;
115
116    if (!v_tty)
117	return;
118
119    gettimeofday(&now, NULL);
120    if (!force && now.tv_sec <= xs->last.tv_sec)
121	return;
122    xs->last = now;
123
124    fprintf(stderr, "\rReceiving %s", xs->name);
125    if (xs->size == -1)
126	fprintf(stderr, ": %lld bytes", xs->rcvd);
127    else
128	fprintf(stderr, " (%lld bytes): %d%%", xs->size,
129		(int)((100.0 * xs->rcvd) / xs->size));
130}
131
132void
133stat_start(struct xferstat *xs, char *name, off_t size, off_t offset)
134{
135    snprintf(xs->name, sizeof xs->name, "%s", name);
136    gettimeofday(&xs->start, NULL);
137    xs->last.tv_sec = xs->last.tv_usec = 0;
138    xs->end = xs->last;
139    xs->size = size;
140    xs->offset = offset;
141    xs->rcvd = offset;
142    stat_display(xs, 1);
143}
144
145void
146stat_update(struct xferstat *xs, off_t rcvd, int force)
147{
148    xs->rcvd = rcvd;
149    stat_display(xs, 0);
150}
151
152void
153stat_end(struct xferstat *xs)
154{
155    double delta;
156    double bps;
157
158    gettimeofday(&xs->end, NULL);
159
160    stat_display(xs, 1);
161    fputc('\n', stderr);
162    delta = (xs->end.tv_sec + (xs->end.tv_usec / 1.e6))
163	- (xs->start.tv_sec + (xs->start.tv_usec / 1.e6));
164    fprintf(stderr, "%lld bytes transferred in %.1f seconds ",
165	    xs->rcvd - xs->offset, delta);
166    bps = (xs->rcvd - xs->offset) / delta;
167    if (bps > 1024*1024)
168	fprintf(stderr, "(%.2f MBps)\n", bps / (1024*1024));
169    else if (bps > 1024)
170	fprintf(stderr, "(%.2f kBps)\n", bps / 1024);
171    else
172	fprintf(stderr, "(%.2f Bps)\n", bps);
173}
174
175int
176fetch(char *URL, char *path)
177{
178    struct url *url;
179    struct url_stat us;
180    struct stat sb;
181    struct xferstat xs;
182    FILE *f, *of;
183    size_t size;
184    off_t count;
185    char flags[8];
186    int n, r;
187    u_int timeout;
188
189    f = of = NULL;
190
191    /* parse URL */
192    if ((url = fetchParseURL(URL)) == NULL) {
193	warnx("%s: parse error", URL);
194	goto failure;
195    }
196
197    timeout = 0;
198    *flags = 0;
199
200    /* common flags */
201    if (v_level > 2)
202	strcat(flags, "v");
203    switch (family) {
204    case PF_INET:
205	strcat(flags, "4");
206	break;
207    case PF_INET6:
208	strcat(flags, "6");
209	break;
210    }
211
212    /* FTP specific flags */
213    if (strcmp(url->scheme, "ftp") == 0) {
214	if (p_flag)
215	    strcat(flags, "p");
216	if (d_flag)
217	    strcat(flags, "d");
218	if (H_flag)
219	    strcat(flags, "h");
220	timeout = T_secs ? T_secs : ftp_timeout;
221    }
222
223    /* HTTP specific flags */
224    if (strcmp(url->scheme, "http") == 0) {
225	if (d_flag)
226	    strcat(flags, "d");
227	if (A_flag)
228	    strcat(flags, "A");
229	timeout = T_secs ? T_secs : http_timeout;
230    }
231
232    /* set the protocol timeout. */
233    fetchTimeout = timeout;
234
235    /* stat remote file */
236    if (fetchStat(url, &us, flags) == -1)
237	goto failure;
238
239    /* just print size */
240    if (s_flag) {
241	if (us.size == -1)
242	    printf("Unknown\n");
243	else
244	    printf("%lld\n", us.size);
245	goto success;
246    }
247
248    /* check that size is as expected */
249    if (S_size && us.size != -1 && us.size != S_size) {
250	warnx("%s: size mismatch: expected %lld, actual %lld",
251	      path, S_size, us.size);
252	goto failure;
253    }
254
255    /* symlink instead of copy */
256    if (l_flag && strcmp(url->scheme, "file") == 0 && !o_stdout) {
257	if (symlink(url->doc, path) == -1) {
258	    warn("%s: symlink()", path);
259	    goto failure;
260	}
261	goto success;
262    }
263
264    if (o_stdout) {
265	/* output to stdout */
266	of = stdout;
267    } else if (r_flag && us.size != -1 && stat(path, &sb) != -1
268	       && (F_flag || (us.mtime && sb.st_mtime == us.mtime))) {
269	/* output to file, restart aborted transfer */
270	if (us.size == sb.st_size)
271	    goto success;
272	else if (sb.st_size > us.size && truncate(path, us.size) == -1) {
273	    warn("%s: truncate()", path);
274	    goto failure;
275	}
276	if ((of = fopen(path, "a")) == NULL) {
277	    warn("%s: open()", path);
278	    goto failure;
279	}
280	url->offset = sb.st_size;
281    } else if (m_flag && us.size != -1 && stat(path, &sb) != -1) {
282	/* output to file, mirror mode */
283	if (sb.st_size == us.size && sb.st_mtime == us.mtime)
284	    return 0;
285	if ((of = fopen(path, "w")) == NULL) {
286	    warn("%s: open()", path);
287	    goto failure;
288	}
289    } else {
290	/* output to file, all other cases */
291	if ((of = fopen(path, "w")) == NULL) {
292	    warn("%s: open()", path);
293	    goto failure;
294	}
295    }
296    count = url->offset;
297
298    /* start the transfer */
299    if ((f = fetchGet(url, flags)) == NULL) {
300	warnx("%s", fetchLastErrString);
301	if (!R_flag && !r_flag && !o_stdout)
302	    unlink(path);
303	goto failure;
304    }
305
306    /* start the counter */
307    stat_start(&xs, path, us.size, count);
308
309    sigint = sigalrm = 0;
310
311    /* suck in the data */
312    for (n = 0; !sigint && !sigalrm; ++n) {
313	if (us.size != -1 && us.size - count < B_size)
314	    size = us.size - count;
315	else
316	    size = B_size;
317	if (timeout)
318	    alarm(timeout);
319	if ((size = fread(buf, 1, size, f)) <= 0)
320	    break;
321	stat_update(&xs, count += size, 0);
322	if (fwrite(buf, size, 1, of) != 1)
323	    break;
324    }
325
326    if (timeout)
327	alarm(0);
328
329    stat_end(&xs);
330
331    /* Set mtime of local file */
332    if (!n_flag && us.mtime && !o_stdout) {
333	struct timeval tv[2];
334
335	fflush(of);
336	tv[0].tv_sec = (long)(us.atime ? us.atime : us.mtime);
337	tv[1].tv_sec = (long)us.mtime;
338	tv[0].tv_usec = tv[1].tv_usec = 0;
339	if (utimes(path, tv))
340	    warn("%s: utimes()", path);
341    }
342
343    /* timed out or interrupted? */
344    if (sigalrm)
345	warnx("transfer timed out");
346    if (sigint)
347	warnx("transfer interrupted");
348
349    if (!sigalrm && !sigint) {
350	/* check the status of our files */
351	if (ferror(f))
352	    warn("%s", URL);
353	if (ferror(of))
354	    warn("%s", path);
355	if (ferror(f) || ferror(of))
356	    goto failure;
357    }
358
359    /* did the transfer complete normally? */
360    if (us.size != -1 && count < us.size) {
361	warnx("%s appears to be truncated: %lld/%lld bytes",
362	      path, count, us.size);
363	goto failure_keep;
364    }
365
366 success:
367    r = 0;
368    goto done;
369 failure:
370    if (of && of != stdout && !R_flag && !r_flag)
371	unlink(path);
372 failure_keep:
373    r = -1;
374    goto done;
375 done:
376    if (f)
377	fclose(f);
378    if (of && of != stdout)
379	fclose(of);
380    if (url)
381	fetchFreeURL(url);
382    return r;
383}
384
385void
386usage(void)
387{
388    /* XXX badly out of synch */
389    fprintf(stderr,
390	    "Usage: fetch [-1AFHMPRabdlmnpqrstv] [-o outputfile] [-S bytes]\n"
391	    "             [-B bytes] [-T seconds] [-w seconds]\n"
392	    "             [-f file -h host [-c dir] | URL ...]\n"
393	);
394}
395
396
397#define PARSENUM(NAME, TYPE)		\
398int					\
399NAME(char *s, TYPE *v)			\
400{					\
401    *v = 0;				\
402    for (*v = 0; *s; s++)		\
403	if (isdigit(*s))		\
404	    *v = *v * 10 + *s - '0';	\
405	else				\
406	    return -1;			\
407    return 0;				\
408}
409
410PARSENUM(parseint, u_int)
411PARSENUM(parsesize, size_t)
412PARSENUM(parseoff, off_t)
413
414int
415main(int argc, char *argv[])
416{
417    struct stat sb;
418    struct sigaction sa;
419    char *p, *q, *s;
420    int c, e, r;
421
422    while ((c = getopt(argc, argv,
423		       "146AaB:bc:dFf:h:lHMmnPpo:qRrS:sT:tvw:")) != EOF)
424	switch (c) {
425	case '1':
426	    once_flag = 1;
427	    break;
428	case '4':
429	    family = PF_INET;
430	    break;
431	case '6':
432	    family = PF_INET6;
433	    break;
434	case 'A':
435	    A_flag = 1;
436	    break;
437	case 'a':
438	    a_flag = 1;
439	    break;
440	case 'B':
441	    if (parsesize(optarg, &B_size) == -1)
442		errx(1, "invalid buffer size");
443	    break;
444	case 'b':
445	    warnx("warning: the -b option is deprecated");
446	    b_flag = 1;
447	    break;
448	case 'c':
449	    c_dirname = optarg;
450	    break;
451	case 'd':
452	    d_flag = 1;
453	    break;
454	case 'F':
455	    F_flag = 1;
456	    break;
457	case 'f':
458	    f_filename = optarg;
459	    break;
460	case 'H':
461	    H_flag = 1;
462	    break;
463	case 'h':
464	    h_hostname = optarg;
465	    break;
466	case 'l':
467	    l_flag = 1;
468	    break;
469	case 'o':
470	    o_flag = 1;
471	    o_filename = optarg;
472	    break;
473	case 'M':
474	case 'm':
475	    m_flag = 1;
476	    break;
477	case 'n':
478	    n_flag = 1;
479	    break;
480	case 'P':
481	case 'p':
482	    p_flag = 1;
483	    break;
484	case 'q':
485	    v_level = 0;
486	    break;
487	case 'R':
488	    R_flag = 1;
489	    break;
490	case 'r':
491	    r_flag = 1;
492	    break;
493	case 'S':
494	    if (parseoff(optarg, &S_size) == -1)
495		errx(1, "invalid size");
496	    break;
497	case 's':
498	    s_flag = 1;
499	    break;
500	case 'T':
501	    if (parseint(optarg, &T_secs) == -1)
502		errx(1, "invalid timeout");
503	    break;
504	case 't':
505	    t_flag = 1;
506	    warnx("warning: the -t option is deprecated");
507	    break;
508	case 'v':
509	    v_level++;
510	    break;
511	case 'w':
512	    a_flag = 1;
513	    if (parseint(optarg, &w_secs) == -1)
514		errx(1, "invalid delay");
515	    break;
516	default:
517	    usage();
518	    exit(EX_USAGE);
519	}
520
521    argc -= optind;
522    argv += optind;
523
524    if (h_hostname || f_filename || c_dirname) {
525	if (!h_hostname || !f_filename || argc) {
526	    usage();
527	    exit(EX_USAGE);
528	}
529	/* XXX this is a hack. */
530	if (strcspn(h_hostname, "@:/") != strlen(h_hostname))
531	    errx(1, "invalid hostname");
532	if (asprintf(argv, "ftp://%s/%s/%s", h_hostname,
533		     c_dirname ? c_dirname : "", f_filename) == -1)
534	    errx(1, strerror(ENOMEM));
535	argc++;
536    }
537
538    if (!argc) {
539	usage();
540	exit(EX_USAGE);
541    }
542
543    /* allocate buffer */
544    if (B_size < MINBUFSIZE)
545	B_size = MINBUFSIZE;
546    if ((buf = malloc(B_size)) == NULL)
547	errx(1, strerror(ENOMEM));
548
549    /* timeouts */
550    if ((s = getenv("FTP_TIMEOUT")) != NULL) {
551	if (parseint(s, &ftp_timeout) == -1) {
552	    warnx("FTP_TIMEOUT is not a positive integer");
553	    ftp_timeout = 0;
554	}
555    }
556    if ((s = getenv("HTTP_TIMEOUT")) != NULL) {
557	if (parseint(s, &http_timeout) == -1) {
558	    warnx("HTTP_TIMEOUT is not a positive integer");
559	    http_timeout = 0;
560	}
561    }
562
563    /* signal handling */
564    sa.sa_flags = 0;
565    sa.sa_handler = sig_handler;
566    sigemptyset(&sa.sa_mask);
567    (void)sigaction(SIGALRM, &sa, NULL);
568    (void)sigaction(SIGINT, &sa, NULL);
569
570    /* output file */
571    if (o_flag) {
572	if (strcmp(o_filename, "-") == 0) {
573	    o_stdout = 1;
574	} else if (stat(o_filename, &sb) == -1) {
575	    if (errno == ENOENT) {
576		if (argc > 1)
577		    errx(EX_USAGE, "%s is not a directory", o_filename);
578	    } else {
579		err(EX_IOERR, "%s", o_filename);
580	    }
581	} else {
582	    if (sb.st_mode & S_IFDIR)
583		o_directory = 1;
584	}
585    }
586
587    /* check if output is to a tty (for progress report) */
588    v_tty = isatty(STDERR_FILENO);
589    r = 0;
590
591    while (argc) {
592	if ((p = strrchr(*argv, '/')) == NULL)
593	    p = *argv;
594	else
595	    p++;
596
597	if (!*p)
598	    p = "fetch.out";
599
600	fetchLastErrCode = 0;
601
602	if (o_flag) {
603	    if (o_stdout) {
604		e = fetch(*argv, "-");
605	    } else if (o_directory) {
606		asprintf(&q, "%s/%s", o_filename, p);
607		e = fetch(*argv, q);
608		free(q);
609	    } else {
610		e = fetch(*argv, o_filename);
611	    }
612	} else {
613	    e = fetch(*argv, p);
614	}
615
616	if (sigint)
617	    exit(1);
618
619	if (e == 0 && once_flag)
620	    exit(0);
621
622	if (e) {
623	    r = 1;
624	    if ((fetchLastErrCode
625		 && fetchLastErrCode != FETCH_UNAVAIL
626		 && fetchLastErrCode != FETCH_MOVED
627		 && fetchLastErrCode != FETCH_URL
628		 && fetchLastErrCode != FETCH_RESOLV
629		 && fetchLastErrCode != FETCH_UNKNOWN)) {
630		if (w_secs) {
631		    if (v_level)
632			fprintf(stderr, "Waiting %d seconds before retrying\n", w_secs);
633		    sleep(w_secs);
634		}
635		if (a_flag)
636		    continue;
637		fprintf(stderr, "Skipping %s\n", *argv);
638	    }
639	}
640
641	argc--, argv++;
642    }
643
644    exit(r);
645}
646