fetch.c revision 62815
1154645Syar/*-
2154645Syar * Copyright (c) 2000 Dag-Erling Co�dan Sm�rgrav
3154645Syar * All rights reserved.
4154645Syar *
5154645Syar * Redistribution and use in source and binary forms, with or without
6154645Syar * modification, are permitted provided that the following conditions
7154645Syar * are met:
8154645Syar * 1. Redistributions of source code must retain the above copyright
9154645Syar *    notice, this list of conditions and the following disclaimer
10154645Syar *    in this position and unchanged.
11154645Syar * 2. Redistributions in binary form must reproduce the above copyright
12154645Syar *    notice, this list of conditions and the following disclaimer in the
13154645Syar *    documentation and/or other materials provided with the distribution.
14154645Syar * 3. The name of the author may not be used to endorse or promote products
15154645Syar *    derived from this software without specific prior written permission
16154645Syar *
17154645Syar * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18154645Syar * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19154645Syar * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20154645Syar * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21154645Syar * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22154645Syar * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23154645Syar * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24154645Syar * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 *
28 *	$FreeBSD: head/usr.bin/fetch/fetch.c 62815 2000-07-08 09:34:33Z des $
29 */
30
31#include <sys/param.h>
32#include <sys/stat.h>
33#include <sys/socket.h>
34
35#include <ctype.h>
36#include <err.h>
37#include <errno.h>
38#include <stdio.h>
39#include <stdlib.h>
40#include <string.h>
41#include <sysexits.h>
42#include <unistd.h>
43
44#include <fetch.h>
45
46#define MINBUFSIZE	4096
47
48/* Option flags */
49int	 A_flag;	/*    -A: do not follow 302 redirects */
50int	 a_flag;	/*    -a: auto retry */
51size_t	 B_size;	/*    -B: buffer size */
52int	 b_flag;	/*!   -b: workaround TCP bug */
53char    *c_dirname;	/*    -c: remote directory */
54int	 d_flag;	/*    -d: direct connection */
55int	 F_flag;	/*    -F: restart without checking mtime  */
56char	*f_filename;	/*    -f: file to fetch */
57int	 H_flag;	/*    -H: use high port */
58char	*h_hostname;	/*    -h: host to fetch from */
59int	 l_flag;	/*    -l: link rather than copy file: URLs */
60int	 m_flag;	/* -[Mm]: mirror mode */
61int	 n_flag;	/*    -n: do not preserve modification time */
62int	 o_flag;	/*    -o: specify output file */
63int	 o_directory;	/*        output file is a directory */
64char	*o_filename;	/*        name of output file */
65int	 o_stdout;	/*        output file is stdout */
66int	 once_flag;	/*    -1: stop at first successful file */
67int	 p_flag = 1;	/* -[Pp]: use passive FTP */
68int	 R_flag;	/*    -R: don't delete partially transferred files */
69int	 r_flag;	/*    -r: restart previously interrupted transfer */
70u_int	 T_secs = 0;	/*    -T: transfer timeout in seconds */
71int	 s_flag;        /*    -s: show size, don't fetch */
72off_t	 S_size;        /*    -S: require size to match */
73int	 t_flag;	/*!   -t: workaround TCP bug */
74int	 v_level = 1;	/*    -v: verbosity level */
75int	 v_tty;		/*        stdout is a tty */
76u_int	 w_secs;	/*    -w: retry delay */
77int	 family = PF_UNSPEC;	/* -[46]: address family to use */
78
79
80u_int	 ftp_timeout;	/* default timeout for FTP transfers */
81u_int	 http_timeout;	/* default timeout for HTTP transfers */
82u_char	*buf;		/* transfer buffer */
83
84
85void
86sig_handler(int sig)
87{
88    errx(1, "Transfer timed out");
89}
90
91struct xferstat {
92    char		 name[40];
93    struct timeval	 start;
94    struct timeval	 end;
95    struct timeval	 last;
96    off_t		 size;
97    off_t		 offset;
98    off_t		 rcvd;
99};
100
101void
102stat_start(struct xferstat *xs, char *name, off_t size, off_t offset)
103{
104    snprintf(xs->name, sizeof xs->name, "%s", name);
105    xs->size = size;
106    xs->offset = offset;
107    if (v_level) {
108	fprintf(stderr, "Receiving %s", xs->name);
109	if (xs->size != -1)
110	    fprintf(stderr, " (%lld bytes)", xs->size - xs->offset);
111    }
112    gettimeofday(&xs->start, NULL);
113    xs->last = xs->start;
114}
115
116void
117stat_update(struct xferstat *xs, off_t rcvd)
118{
119    struct timeval now;
120
121    xs->rcvd = rcvd;
122
123    if (v_level <= 1 || !v_tty)
124	return;
125
126    gettimeofday(&now, NULL);
127    if (now.tv_sec <= xs->last.tv_sec)
128	return;
129    xs->last = now;
130
131    fprintf(stderr, "\rReceiving %s", xs->name);
132    if (xs->size == -1)
133	fprintf(stderr, ": %lld bytes", xs->rcvd - xs->offset);
134    else
135	fprintf(stderr, " (%lld bytes): %d%%", xs->size - xs->offset,
136		(int)((100.0 * xs->rcvd) / (xs->size - xs->offset)));
137}
138
139void
140stat_end(struct xferstat *xs)
141{
142    double delta;
143    double bps;
144
145    gettimeofday(&xs->end, NULL);
146
147    if (!v_level)
148	return;
149
150    fputc('\n', stderr);
151    delta = (xs->end.tv_sec + (xs->end.tv_usec / 1.e6))
152	- (xs->start.tv_sec + (xs->start.tv_usec / 1.e6));
153    fprintf(stderr, "%lld bytes transferred in %.1f seconds ",
154	    xs->size - xs->offset, delta);
155    bps = (xs->size - xs->offset) / delta;
156    if (bps > 1024*1024)
157	fprintf(stderr, "(%.2f MBps)\n", bps / (1024*1024));
158    else if (bps > 1024)
159	fprintf(stderr, "(%.2f kBps)\n", bps / 1024);
160    else
161	fprintf(stderr, "(%.2f Bps)\n", bps);
162}
163
164int
165fetch(char *URL, char *path)
166{
167    struct url *url;
168    struct url_stat us;
169    struct stat sb;
170    struct xferstat xs;
171    FILE *f, *of;
172    size_t size;
173    off_t count;
174    char flags[8];
175    int ch, n, r;
176    u_int timeout;
177
178    f = of = NULL;
179
180    /* parse URL */
181    if ((url = fetchParseURL(URL)) == NULL) {
182	warnx("%s: parse error", URL);
183	goto failure;
184    }
185
186    timeout = 0;
187    *flags = 0;
188
189    /* common flags */
190    if (v_level > 2)
191	strcat(flags, "v");
192    switch (family) {
193    case PF_INET:
194	strcat(flags, "4");
195	break;
196    case PF_INET6:
197	strcat(flags, "6");
198	break;
199    }
200
201    /* FTP specific flags */
202    if (strcmp(url->scheme, "ftp") == 0) {
203	if (p_flag)
204	    strcat(flags, "p");
205	if (d_flag)
206	    strcat(flags, "d");
207	if (H_flag)
208	    strcat(flags, "h");
209	timeout = T_secs ? T_secs : ftp_timeout;
210    }
211
212    /* HTTP specific flags */
213    if (strcmp(url->scheme, "http") == 0) {
214	if (d_flag)
215	    strcat(flags, "d");
216	if (A_flag)
217	    strcat(flags, "A");
218	timeout = T_secs ? T_secs : http_timeout;
219    }
220
221    /*
222     * Set the protocol timeout.
223     * This currently only works for FTP, so we still use
224     * alarm(timeout) further down.
225     */
226    fetchTimeout = timeout;
227
228    /* stat remote file */
229    alarm(timeout);
230    if (fetchStat(url, &us, flags) == -1)
231	warnx("%s: size not known", path);
232    alarm(timeout);
233
234    /* just print size */
235    if (s_flag) {
236	if (us.size == -1)
237	    printf("Unknown\n");
238	else
239	    printf("%lld\n", us.size);
240	goto success;
241    }
242
243    /* check that size is as expected */
244    if (S_size && us.size != -1 && us.size != S_size) {
245	warnx("%s: size mismatch: expected %lld, actual %lld",
246	      path, S_size, us.size);
247	goto failure;
248    }
249
250    /* symlink instead of copy */
251    if (l_flag && strcmp(url->scheme, "file") == 0 && !o_stdout) {
252	if (symlink(url->doc, path) == -1) {
253	    warn("%s: symlink()", path);
254	    goto failure;
255	}
256	goto success;
257    }
258
259    if (o_stdout) {
260	/* output to stdout */
261	of = stdout;
262    } else if (r_flag && us.size != -1 && stat(path, &sb) != -1
263	       && (F_flag || (us.mtime && sb.st_mtime == us.mtime))) {
264	/* output to file, restart aborted transfer */
265	if (us.size == sb.st_size)
266	    goto success;
267	else if (sb.st_size > us.size && truncate(path, us.size) == -1) {
268	    warn("%s: truncate()", path);
269	    goto failure;
270	}
271	if ((of = fopen(path, "a")) == NULL) {
272	    warn("%s: open()", path);
273	    goto failure;
274	}
275	url->offset = sb.st_size;
276    } else if (m_flag && us.size != -1 && stat(path, &sb) != -1) {
277	/* output to file, mirror mode */
278	if (sb.st_size == us.size && sb.st_mtime == us.mtime)
279	    return 0;
280	if ((of = fopen(path, "w")) == NULL) {
281	    warn("%s: open()", path);
282	    goto failure;
283	}
284    } else {
285	/* output to file, all other cases */
286	if ((of = fopen(path, "w")) == NULL) {
287	    warn("%s: open()", path);
288	    goto failure;
289	}
290    }
291    count = url->offset;
292
293    /* start the transfer */
294    if ((f = fetchGet(url, flags)) == NULL) {
295	warnx("%s", fetchLastErrString);
296	if (!R_flag && !r_flag && !o_stdout)
297	    unlink(path);
298	goto failure;
299    }
300
301    /* start the counter */
302    stat_start(&xs, path, us.size, count);
303
304    n = 0;
305
306    if (us.size == -1) {
307	/*
308	 * We have no idea how much data to expect, so do it byte by
309         * byte. This is incredibly inefficient, but there's not much
310         * we can do about it... :(
311	 */
312	while (1) {
313	    if (timeout)
314		alarm(timeout);
315#ifdef STDIO_HACK
316	    /*
317	     * This is a non-portable hack, but it makes things go
318	     * faster. Basically, if there is data in the input file's
319	     * buffer, write it out; then fall through to the fgetc()
320	     * which forces a refill. It saves a memcpy() and reduces
321	     * the number of iterations, i.e the number of calls to
322	     * alarm(). Empirical evidence shows this can cut user
323	     * time by up to 90%. There may be better (even portable)
324	     * ways to do this.
325	     */
326	    if (f->_r && (f->_ub._base == NULL)) {
327		if (fwrite(f->_p, f->_r, 1, of) < 1)
328		    break;
329		count += f->_r;
330		f->_p += f->_r;
331		f->_r = 0;
332	    }
333#endif
334	    if ((ch = fgetc(f)) == EOF || fputc(ch, of) == EOF)
335		break;
336	    stat_update(&xs, count++);
337	    n++;
338	}
339    } else {
340	/* we know exactly how much to transfer, so do it efficiently */
341	for (size = B_size; count != us.size; n++) {
342	    if (us.size - count < B_size)
343		size = us.size - count;
344	    if (timeout)
345		alarm(timeout);
346	    if (fread(buf, size, 1, f) != 1 || fwrite(buf, size, 1, of) != 1)
347		break;
348	    stat_update(&xs, count += size);
349	}
350    }
351
352    if (timeout)
353	alarm(0);
354
355    stat_end(&xs);
356
357    /* check the status of our files */
358    if (ferror(f))
359	warn("%s", URL);
360    if (ferror(of))
361	warn("%s", path);
362    if (ferror(f) || ferror(of)) {
363	if (!R_flag && !r_flag && !o_stdout)
364	    unlink(path);
365	goto failure;
366    }
367
368    /* need to close the file before setting mtime */
369    if (of != stdout) {
370	fclose(of);
371	of = NULL;
372    }
373
374    /* Set mtime of local file */
375    if (!n_flag && us.size != -1 && !o_stdout) {
376	struct timeval tv[2];
377
378	tv[0].tv_sec = (long)us.atime;
379	tv[1].tv_sec = (long)us.mtime;
380	tv[0].tv_usec = tv[1].tv_usec = 0;
381	if (utimes(path, tv))
382	    warn("%s: utimes()", path);
383    }
384
385    /* check the file size */
386    if (us.size != -1 && count < us.size) {
387	warnx("%s appears to be truncated: %lld/%lld bytes",
388	      path, count, us.size);
389	goto failure;
390    }
391
392 success:
393    r = 0;
394    goto done;
395 failure:
396    r = -1;
397    goto done;
398 done:
399    if (f)
400	fclose(f);
401    if (of && of != stdout)
402	fclose(of);
403    fetchFreeURL(url);
404    return r;
405}
406
407void
408usage(void)
409{
410    /* XXX badly out of synch */
411    fprintf(stderr,
412	    "Usage: fetch [-1AFHMPRabdlmnpqrstv] [-o outputfile] [-S bytes]\n"
413	    "             [-B bytes] [-T seconds] [-w seconds]\n"
414	    "             [-f file -h host [-c dir] | URL ...]\n"
415	);
416}
417
418
419#define PARSENUM(NAME, TYPE)		\
420int					\
421NAME(char *s, TYPE *v)			\
422{					\
423    *v = 0;				\
424    for (*v = 0; *s; s++)		\
425	if (isdigit(*s))		\
426	    *v = *v * 10 + *s - '0';	\
427	else				\
428	    return -1;			\
429    return 0;				\
430}
431
432PARSENUM(parseint, u_int)
433PARSENUM(parsesize, size_t)
434PARSENUM(parseoff, off_t)
435
436int
437main(int argc, char *argv[])
438{
439    struct stat sb;
440    char *p, *q, *s;
441    int c, e, r;
442
443    while ((c = getopt(argc, argv,
444		       "146AaB:bc:dFf:h:lHMmnPpo:qRrS:sT:tvw:")) != EOF)
445	switch (c) {
446	case '1':
447	    once_flag = 1;
448	    break;
449	case '4':
450	    family = PF_INET;
451	    break;
452	case '6':
453	    family = PF_INET6;
454	    break;
455	case 'A':
456	    A_flag = 1;
457	    break;
458	case 'a':
459	    a_flag = 1;
460	    break;
461	case 'B':
462	    if (parsesize(optarg, &B_size) == -1)
463		errx(1, "invalid buffer size");
464	    break;
465	case 'b':
466	    warnx("warning: the -b option is deprecated");
467	    b_flag = 1;
468	    break;
469	case 'c':
470	    c_dirname = optarg;
471	    break;
472	case 'd':
473	    d_flag = 1;
474	    break;
475	case 'F':
476	    F_flag = 1;
477	    break;
478	case 'f':
479	    f_filename = optarg;
480	    break;
481	case 'H':
482	    H_flag = 1;
483	    break;
484	case 'h':
485	    h_hostname = optarg;
486	    break;
487	case 'l':
488	    l_flag = 1;
489	    break;
490	case 'o':
491	    o_flag = 1;
492	    o_filename = optarg;
493	    break;
494	case 'M':
495	case 'm':
496	    m_flag = 1;
497	    break;
498	case 'n':
499	    n_flag = 1;
500	    break;
501	case 'P':
502	case 'p':
503	    p_flag = 1;
504	    break;
505	case 'q':
506	    v_level = 0;
507	    break;
508	case 'R':
509	    R_flag = 1;
510	    break;
511	case 'r':
512	    r_flag = 1;
513	    break;
514	case 'S':
515	    if (parseoff(optarg, &S_size) == -1)
516		errx(1, "invalid size");
517	    break;
518	case 's':
519	    s_flag = 1;
520	    break;
521	case 'T':
522	    if (parseint(optarg, &T_secs) == -1)
523		errx(1, "invalid timeout");
524	    break;
525	case 't':
526	    t_flag = 1;
527	    warnx("warning: the -t option is deprecated");
528	    break;
529	case 'v':
530	    v_level++;
531	    break;
532	case 'w':
533	    a_flag = 1;
534	    if (parseint(optarg, &w_secs) == -1)
535		errx(1, "invalid delay");
536	    break;
537	default:
538	    usage();
539	    exit(EX_USAGE);
540	}
541
542    argc -= optind;
543    argv += optind;
544
545    if (h_hostname || f_filename || c_dirname) {
546	if (!h_hostname || !f_filename || argc) {
547	    usage();
548	    exit(EX_USAGE);
549	}
550	/* XXX this is a hack. */
551	if (strcspn(h_hostname, "@:/") != strlen(h_hostname))
552	    errx(1, "invalid hostname");
553	if (asprintf(argv, "ftp://%s/%s/%s", h_hostname,
554		     c_dirname ? c_dirname : "", f_filename) == -1)
555	    errx(1, strerror(ENOMEM));
556	argc++;
557    }
558
559    if (!argc) {
560	usage();
561	exit(EX_USAGE);
562    }
563
564    /* allocate buffer */
565    if (B_size < MINBUFSIZE)
566	B_size = MINBUFSIZE;
567    if ((buf = malloc(B_size)) == NULL)
568	errx(1, strerror(ENOMEM));
569
570    /* timeout handling */
571    signal(SIGALRM, sig_handler);
572    if ((s = getenv("FTP_TIMEOUT")) != NULL) {
573	if (parseint(s, &ftp_timeout) == -1) {
574	    warnx("FTP_TIMEOUT is not a positive integer");
575	    ftp_timeout = 0;
576	}
577    }
578    if ((s = getenv("HTTP_TIMEOUT")) != NULL) {
579	if (parseint(s, &http_timeout) == -1) {
580	    warnx("HTTP_TIMEOUT is not a positive integer");
581	    http_timeout = 0;
582	}
583    }
584
585    /* output file */
586    if (o_flag) {
587	if (strcmp(o_filename, "-") == 0) {
588	    o_stdout = 1;
589	} else if (stat(o_filename, &sb) == -1) {
590	    if (errno == ENOENT) {
591		if (argc > 1)
592		    errx(EX_USAGE, "%s is not a directory", o_filename);
593	    } else {
594		err(EX_IOERR, "%s", o_filename);
595	    }
596	} else {
597	    if (sb.st_mode & S_IFDIR)
598		o_directory = 1;
599	}
600    }
601
602    /* check if output is to a tty (for progress report) */
603    v_tty = isatty(STDERR_FILENO);
604    r = 0;
605
606    while (argc) {
607	if ((p = strrchr(*argv, '/')) == NULL)
608	    p = *argv;
609	else
610	    p++;
611
612	if (!*p)
613	    p = "fetch.out";
614
615	fetchLastErrCode = 0;
616
617	if (o_flag) {
618	    if (o_stdout) {
619		e = fetch(*argv, "-");
620	    } else if (o_directory) {
621		asprintf(&q, "%s/%s", o_filename, p);
622		e = fetch(*argv, q);
623		free(q);
624	    } else {
625		e = fetch(*argv, o_filename);
626	    }
627	} else {
628	    e = fetch(*argv, p);
629	}
630
631	if (e == 0 && once_flag)
632	    exit(0);
633
634	if (e) {
635	    r = 1;
636	    if ((fetchLastErrCode
637		 && fetchLastErrCode != FETCH_UNAVAIL
638		 && fetchLastErrCode != FETCH_MOVED
639		 && fetchLastErrCode != FETCH_URL
640		 && fetchLastErrCode != FETCH_RESOLV
641		 && fetchLastErrCode != FETCH_UNKNOWN)) {
642		if (w_secs) {
643		    if (v_level)
644			fprintf(stderr, "Waiting %d seconds before retrying\n", w_secs);
645		    sleep(w_secs);
646		}
647		if (a_flag)
648		    continue;
649		fprintf(stderr, "Skipping %s\n", *argv);
650	    }
651	}
652
653	argc--, argv++;
654    }
655
656    exit(r);
657}
658