fetch.c revision 62216
1/*-
2 * Copyright (c) 2000 Dag-Erling Co�dan Sm�rgrav
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer
10 *    in this position and unchanged.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 * 3. The name of the author may not be used to endorse or promote products
15 *    derived from this software without specific prior written permission
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 *
28 *	$FreeBSD: head/usr.bin/fetch/fetch.c 62216 2000-06-28 16:55:15Z des $
29 */
30
31#include <sys/param.h>
32#include <sys/stat.h>
33#include <sys/socket.h>
34
35#include <ctype.h>
36#include <err.h>
37#include <errno.h>
38#include <stdio.h>
39#include <stdlib.h>
40#include <string.h>
41#include <sysexits.h>
42#include <unistd.h>
43
44#include <fetch.h>
45
46#define MINBUFSIZE	4096
47
48/* Option flags */
49int	 A_flag;	/*    -A: do not follow 302 redirects */
50int	 a_flag;	/*    -a: auto retry */
51size_t	 B_size;	/*    -B: buffer size */
52int	 b_flag;	/*!   -b: workaround TCP bug */
53int	 d_flag;	/*    -d: direct connection */
54int	 F_flag;	/*    -F: restart without checking mtime  */
55char	*f_filename;	/*    -f: file to fetch */
56int	 H_flag;	/*    -H: use high port */
57char	*h_hostname;	/*    -h: host to fetch from */
58int	 l_flag;	/*    -l: link rather than copy file: URLs */
59int	 m_flag;	/* -[Mm]: set local timestamp to remote timestamp */
60int	 o_flag;	/*    -o: specify output file */
61int	 o_directory;	/*        output file is a directory */
62char	*o_filename;	/*        name of output file */
63int	 o_stdout;	/*        output file is stdout */
64int	 once_flag;	/*    -1: stop at first successful file */
65int	 p_flag = 1;	/* -[Pp]: use passive FTP */
66int	 R_flag;	/*    -R: don't delete partially transferred files */
67int	 r_flag;	/*    -r: restart previously interrupted transfer */
68u_int	 T_secs = 0;	/*    -T: transfer timeout in seconds */
69int	 s_flag;        /*    -s: show size, don't fetch */
70off_t	 S_size;        /*    -S: require size to match */
71int	 t_flag;	/*!   -t: workaround TCP bug */
72int	 v_level = 1;	/*    -v: verbosity level */
73int	 v_tty;		/*        stdout is a tty */
74u_int	 w_secs;	/*    -w: retry delay */
75int	 family = PF_UNSPEC;	/* -[46]: address family to use */
76
77
78u_int	 ftp_timeout;	/* default timeout for FTP transfers */
79u_int	 http_timeout;	/* default timeout for HTTP transfers */
80u_char	*buf;		/* transfer buffer */
81
82
83void
84sig_handler(int sig)
85{
86    errx(1, "Transfer timed out");
87}
88
89struct xferstat {
90    char		 name[40];
91    struct timeval	 start;
92    struct timeval	 end;
93    struct timeval	 last;
94    off_t		 size;
95    off_t		 offset;
96    off_t		 rcvd;
97};
98
99void
100stat_start(struct xferstat *xs, char *name, off_t size, off_t offset)
101{
102    snprintf(xs->name, sizeof xs->name, "%s", name);
103    xs->size = size;
104    xs->offset = offset;
105    if (v_level) {
106	fprintf(stderr, "Receiving %s", xs->name);
107	if (xs->size != -1)
108	    fprintf(stderr, " (%lld bytes)", xs->size - xs->offset);
109    }
110    gettimeofday(&xs->start, NULL);
111    xs->last = xs->start;
112}
113
114void
115stat_update(struct xferstat *xs, off_t rcvd)
116{
117    struct timeval now;
118
119    xs->rcvd = rcvd;
120
121    if (v_level <= 1 || !v_tty)
122	return;
123
124    gettimeofday(&now, NULL);
125    if (now.tv_sec <= xs->last.tv_sec)
126	return;
127    xs->last = now;
128
129    fprintf(stderr, "\rReceiving %s", xs->name);
130    if (xs->size == -1)
131	fprintf(stderr, ": %lld bytes", xs->rcvd - xs->offset);
132    else
133	fprintf(stderr, " (%lld bytes): %d%%", xs->size - xs->offset,
134		(int)((100.0 * xs->rcvd) / (xs->size - xs->offset)));
135}
136
137void
138stat_end(struct xferstat *xs)
139{
140    double delta;
141    double bps;
142
143    gettimeofday(&xs->end, NULL);
144
145    if (!v_level)
146	return;
147
148    fputc('\n', stderr);
149    delta = (xs->end.tv_sec + (xs->end.tv_usec / 1.e6))
150	- (xs->start.tv_sec + (xs->start.tv_usec / 1.e6));
151    fprintf(stderr, "%lld bytes transferred in %.1f seconds ",
152	    xs->size - xs->offset, delta);
153    bps = (xs->size - xs->offset) / delta;
154    if (bps > 1024*1024)
155	fprintf(stderr, "(%.2f MBps)\n", bps / (1024*1024));
156    else if (bps > 1024)
157	fprintf(stderr, "(%.2f kBps)\n", bps / 1024);
158    else
159	fprintf(stderr, "(%.2f Bps)\n", bps);
160}
161
162int
163fetch(char *URL, char *path)
164{
165    struct url *url;
166    struct url_stat us;
167    struct stat sb;
168    struct xferstat xs;
169    FILE *f, *of;
170    size_t size;
171    off_t count;
172    char flags[8];
173    int ch, n, r;
174    u_int timeout;
175
176    f = of = NULL;
177
178    /* parse URL */
179    if ((url = fetchParseURL(URL)) == NULL) {
180	warnx("%s: parse error", URL);
181	goto failure;
182    }
183
184    timeout = 0;
185    *flags = 0;
186
187    /* common flags */
188    if (v_level > 2)
189	strcat(flags, "v");
190    switch (family) {
191    case PF_INET:
192	strcat(flags, "4");
193	break;
194    case PF_INET6:
195	strcat(flags, "6");
196	break;
197    }
198
199    /* FTP specific flags */
200    if (strcmp(url->scheme, "ftp") == 0) {
201	if (p_flag)
202	    strcat(flags, "p");
203	if (d_flag)
204	    strcat(flags, "d");
205	if (H_flag)
206	    strcat(flags, "h");
207	timeout = T_secs ? T_secs : ftp_timeout;
208    }
209
210    /* HTTP specific flags */
211    if (strcmp(url->scheme, "http") == 0) {
212	if (d_flag)
213	    strcat(flags, "d");
214	if (A_flag)
215	    strcat(flags, "A");
216	timeout = T_secs ? T_secs : http_timeout;
217    }
218
219    /*
220     * Set the protocol timeout.
221     * This currently only works for FTP, so we still use
222     * alarm(timeout) further down.
223     */
224    fetchTimeout = timeout;
225
226    /* stat remote file */
227    alarm(timeout);
228    if (fetchStat(url, &us, flags) == -1)
229	warnx("%s: size not known", path);
230    alarm(timeout);
231
232    /* just print size */
233    if (s_flag) {
234	if (us.size == -1)
235	    printf("Unknown\n");
236	else
237	    printf("%lld\n", us.size);
238	goto success;
239    }
240
241    /* check that size is as expected */
242    if (S_size && us.size != -1 && us.size != S_size) {
243	warnx("%s: size mismatch: expected %lld, actual %lld",
244	      path, S_size, us.size);
245	goto failure;
246    }
247
248    /* symlink instead of copy */
249    if (l_flag && strcmp(url->scheme, "file") == 0 && !o_stdout) {
250	if (symlink(url->doc, path) == -1) {
251	    warn("%s: symlink()", path);
252	    goto failure;
253	}
254	goto success;
255    }
256
257    if (o_stdout) {
258	/* output to stdout */
259	of = stdout;
260    } else if (r_flag && us.size != -1 && stat(path, &sb) != -1
261	       && (F_flag || (us.mtime && sb.st_mtime == us.mtime))) {
262	/* output to file, restart aborted transfer */
263	if (us.size == sb.st_size)
264	    goto success;
265	else if (sb.st_size > us.size && truncate(path, us.size) == -1) {
266	    warn("%s: truncate()", path);
267	    goto failure;
268	}
269	if ((of = fopen(path, "a")) == NULL) {
270	    warn("%s: open()", path);
271	    goto failure;
272	}
273	url->offset = sb.st_size;
274    } else if (m_flag && us.size != -1 && stat(path, &sb) != -1) {
275	/* output to file, mirror mode */
276	warnx(" local: %lld bytes, mtime %ld", sb.st_size, sb.st_mtime);
277	warnx("remote: %lld bytes, mtime %ld", us.size, us.mtime);
278	if (sb.st_size == us.size && sb.st_mtime == us.mtime)
279	    return 0;
280	if ((of = fopen(path, "w")) == NULL) {
281	    warn("%s: open()", path);
282	    goto failure;
283	}
284    } else {
285	/* output to file, all other cases */
286	if ((of = fopen(path, "w")) == NULL) {
287	    warn("%s: open()", path);
288	    goto failure;
289	}
290    }
291    count = url->offset;
292
293    /* start the transfer */
294    if ((f = fetchGet(url, flags)) == NULL) {
295	warnx("%s", fetchLastErrString);
296	goto failure;
297    }
298
299    /* start the counter */
300    stat_start(&xs, path, us.size, count);
301
302    n = 0;
303
304    if (us.size == -1) {
305	/*
306	 * We have no idea how much data to expect, so do it byte by
307         * byte. This is incredibly inefficient, but there's not much
308         * we can do about it... :(
309	 */
310	while (1) {
311	    if (timeout)
312		alarm(timeout);
313#ifdef STDIO_HACK
314	    /*
315	     * This is a non-portable hack, but it makes things go
316	     * faster. Basically, if there is data in the input file's
317	     * buffer, write it out; then fall through to the fgetc()
318	     * which forces a refill. It saves a memcpy() and reduces
319	     * the number of iterations, i.e the number of calls to
320	     * alarm(). Empirical evidence shows this can cut user
321	     * time by up to 90%. There may be better (even portable)
322	     * ways to do this.
323	     */
324	    if (f->_r && (f->_ub._base == NULL)) {
325		if (fwrite(f->_p, f->_r, 1, of) < 1)
326		    break;
327		count += f->_r;
328		f->_p += f->_r;
329		f->_r = 0;
330	    }
331#endif
332	    if ((ch = fgetc(f)) == EOF || fputc(ch, of) == EOF)
333		break;
334	    stat_update(&xs, count++);
335	    n++;
336	}
337    } else {
338	/* we know exactly how much to transfer, so do it efficiently */
339	for (size = B_size; count != us.size; n++) {
340	    if (us.size - count < B_size)
341		size = us.size - count;
342	    if (timeout)
343		alarm(timeout);
344	    if (fread(buf, size, 1, f) != 1 || fwrite(buf, size, 1, of) != 1)
345		break;
346	    stat_update(&xs, count += size);
347	}
348    }
349
350    if (timeout)
351	alarm(0);
352
353    stat_end(&xs);
354
355    /* check the status of our files */
356    if (ferror(f))
357	warn("%s", URL);
358    if (ferror(of))
359	warn("%s", path);
360    if (ferror(f) || ferror(of)) {
361	if (!R_flag && !o_stdout)
362	    unlink(path);
363	goto failure;
364    }
365
366    /* need to close the file before setting mtime */
367    if (of != stdout) {
368	fclose(of);
369	of = NULL;
370    }
371
372    /* Set mtime of local file */
373    if (m_flag && us.size != -1 && !o_stdout) {
374	struct timeval tv[2];
375
376	tv[0].tv_sec = (long)us.atime;
377	tv[1].tv_sec = (long)us.mtime;
378	tv[0].tv_usec = tv[1].tv_usec = 0;
379	if (utimes(path, tv))
380	    warn("%s: utimes()", path);
381    }
382
383 success:
384    r = 0;
385    goto done;
386 failure:
387    r = -1;
388    goto done;
389 done:
390    if (f)
391	fclose(f);
392    if (of && of != stdout)
393	fclose(of);
394    fetchFreeURL(url);
395    return r;
396}
397
398void
399usage(void)
400{
401    /* XXX badly out of synch */
402    fprintf(stderr,
403	    "Usage: fetch [-1AFHMPRabdlmnpqrstv] [-o outputfile] [-S bytes]\n"
404	    "             [-B bytes] [-T seconds] [-w seconds]\n"
405	    "             [-f file -h host [-c dir] | URL ...]\n"
406	);
407}
408
409
410#define PARSENUM(NAME, TYPE)		\
411int					\
412NAME(char *s, TYPE *v)			\
413{					\
414    *v = 0;				\
415    for (*v = 0; *s; s++)		\
416	if (isdigit(*s))		\
417	    *v = *v * 10 + *s - '0';	\
418	else				\
419	    return -1;			\
420    return 0;				\
421}
422
423PARSENUM(parseint, u_int)
424PARSENUM(parsesize, size_t)
425PARSENUM(parseoff, off_t)
426
427int
428main(int argc, char *argv[])
429{
430    struct stat sb;
431    char *p, *q, *s;
432    int c, e, r;
433
434    while ((c = getopt(argc, argv,
435		       "146AaB:bdFf:h:lHMmnPpo:qRrS:sT:tvw:")) != EOF)
436	switch (c) {
437	case '1':
438	    once_flag = 1;
439	    break;
440	case '4':
441	    family = PF_INET;
442	    break;
443	case '6':
444	    family = PF_INET6;
445	    break;
446	case 'A':
447	    A_flag = 1;
448	    break;
449	case 'a':
450	    a_flag = 1;
451	    break;
452	case 'B':
453	    if (parsesize(optarg, &B_size) == -1)
454		errx(1, "invalid buffer size");
455	    break;
456	case 'b':
457	    warnx("warning: the -b option is deprecated");
458	    b_flag = 1;
459	    break;
460	case 'd':
461	    d_flag = 1;
462	    break;
463	case 'F':
464	    F_flag = 1;
465	    break;
466	case 'f':
467	    f_filename = optarg;
468	    break;
469	case 'H':
470	    H_flag = 1;
471	    break;
472	case 'h':
473	    h_hostname = optarg;
474	    break;
475	case 'l':
476	    l_flag = 1;
477	    break;
478	case 'o':
479	    o_flag = 1;
480	    o_filename = optarg;
481	    break;
482	case 'M':
483	case 'm':
484	    m_flag = 1;
485	    break;
486	case 'n':
487	    m_flag = 0;
488	    break;
489	case 'P':
490	case 'p':
491	    p_flag = 1;
492	    break;
493	case 'q':
494	    v_level = 0;
495	    break;
496	case 'R':
497	    R_flag = 1;
498	    break;
499	case 'r':
500	    r_flag = 1;
501	    break;
502	case 'S':
503	    if (parseoff(optarg, &S_size) == -1)
504		errx(1, "invalid size");
505	    break;
506	case 's':
507	    s_flag = 1;
508	    break;
509	case 'T':
510	    if (parseint(optarg, &T_secs) == -1)
511		errx(1, "invalid timeout");
512	    break;
513	case 't':
514	    t_flag = 1;
515	    warnx("warning: the -t option is deprecated");
516	    break;
517	case 'v':
518	    v_level++;
519	    break;
520	case 'w':
521	    a_flag = 1;
522	    if (parseint(optarg, &w_secs) == -1)
523		errx(1, "invalid delay");
524	    break;
525	default:
526	    usage();
527	    exit(EX_USAGE);
528	}
529
530    argc -= optind;
531    argv += optind;
532
533    if (h_hostname || f_filename) {
534	if (!h_hostname || !f_filename || argc) {
535	    usage();
536	    exit(EX_USAGE);
537	}
538	/* XXX this is a hack. */
539	if (strcspn(h_hostname, "@:/") != strlen(h_hostname))
540	    errx(1, "invalid hostname");
541	if (asprintf(argv, "ftp://%s/%s", h_hostname, f_filename) == -1)
542	    errx(1, strerror(ENOMEM));
543	argc++;
544    }
545
546    if (!argc) {
547	usage();
548	exit(EX_USAGE);
549    }
550
551    /* allocate buffer */
552    if (B_size < MINBUFSIZE)
553	B_size = MINBUFSIZE;
554    if ((buf = malloc(B_size)) == NULL)
555	errx(1, strerror(ENOMEM));
556
557    /* timeout handling */
558    signal(SIGALRM, sig_handler);
559    if ((s = getenv("FTP_TIMEOUT")) != NULL) {
560	if (parseint(s, &ftp_timeout) == -1) {
561	    warnx("FTP_TIMEOUT is not a positive integer");
562	    ftp_timeout = 0;
563	}
564    }
565    if ((s = getenv("HTTP_TIMEOUT")) != NULL) {
566	if (parseint(s, &http_timeout) == -1) {
567	    warnx("HTTP_TIMEOUT is not a positive integer");
568	    http_timeout = 0;
569	}
570    }
571
572    /* output file */
573    if (o_flag) {
574	if (strcmp(o_filename, "-") == 0) {
575	    o_stdout = 1;
576	} else if (stat(o_filename, &sb) == -1) {
577	    if (errno == ENOENT) {
578		if (argc > 1)
579		    errx(EX_USAGE, "%s is not a directory", o_filename);
580	    } else {
581		err(EX_IOERR, "%s", o_filename);
582	    }
583	} else {
584	    if (sb.st_mode & S_IFDIR)
585		o_directory = 1;
586	}
587    }
588
589    /* check if output is to a tty (for progress report) */
590    v_tty = isatty(STDOUT_FILENO);
591    r = 0;
592
593    while (argc) {
594	if ((p = strrchr(*argv, '/')) == NULL)
595	    p = *argv;
596	else
597	    p++;
598
599	if (!*p)
600	    p = "fetch.out";
601
602	fetchLastErrCode = 0;
603
604	if (o_flag) {
605	    if (o_stdout) {
606		e = fetch(*argv, "-");
607	    } else if (o_directory) {
608		asprintf(&q, "%s/%s", o_filename, p);
609		e = fetch(*argv, q);
610		free(q);
611	    } else {
612		e = fetch(*argv, o_filename);
613	    }
614	} else {
615	    e = fetch(*argv, p);
616	}
617
618	if (e == 0 && once_flag)
619	    exit(0);
620
621	if (e) {
622	    r = 1;
623	    if ((fetchLastErrCode
624		 && fetchLastErrCode != FETCH_UNAVAIL
625		 && fetchLastErrCode != FETCH_MOVED
626		 && fetchLastErrCode != FETCH_URL
627		 && fetchLastErrCode != FETCH_RESOLV
628		 && fetchLastErrCode != FETCH_UNKNOWN)) {
629		if (w_secs) {
630		    if (v_level)
631			fprintf(stderr, "Waiting %d seconds before retrying\n", w_secs);
632		    sleep(w_secs);
633		}
634		if (a_flag)
635		    continue;
636		fprintf(stderr, "Skipping %s\n", *argv);
637	    }
638	}
639
640	argc--, argv++;
641    }
642
643    exit(r);
644}
645