fetch.c revision 63015
1219820Sjeff/*-
2219820Sjeff * Copyright (c) 2000 Dag-Erling Co�dan Sm�rgrav
3255932Salfred * All rights reserved.
4234183Sjhb *
5234183Sjhb * Redistribution and use in source and binary forms, with or without
6234183Sjhb * modification, are permitted provided that the following conditions
7219820Sjeff * are met:
8219820Sjeff * 1. Redistributions of source code must retain the above copyright
9255932Salfred *    notice, this list of conditions and the following disclaimer
10255932Salfred *    in this position and unchanged.
11234183Sjhb * 2. Redistributions in binary form must reproduce the above copyright
12219820Sjeff *    notice, this list of conditions and the following disclaimer in the
13255932Salfred *    documentation and/or other materials provided with the distribution.
14255932Salfred * 3. The name of the author may not be used to endorse or promote products
15255932Salfred *    derived from this software without specific prior written permission
16234183Sjhb *
17255932Salfred * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18255932Salfred * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19271127Shselasky * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20219820Sjeff * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21234183Sjhb * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22234183Sjhb * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23234183Sjhb * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24234183Sjhb * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25234183Sjhb * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26234183Sjhb * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27234183Sjhb *
28234183Sjhb *	$FreeBSD: head/usr.bin/fetch/fetch.c 63015 2000-07-12 11:02:01Z des $
29234183Sjhb */
30234183Sjhb
31234183Sjhb#include <sys/param.h>
32234183Sjhb#include <sys/stat.h>
33219820Sjeff#include <sys/socket.h>
34219820Sjeff
35260495Sdim#include <ctype.h>
36#include <err.h>
37#include <errno.h>
38#include <stdio.h>
39#include <stdlib.h>
40#include <string.h>
41#include <sysexits.h>
42#include <unistd.h>
43
44#include <fetch.h>
45
46#define MINBUFSIZE	4096
47
48/* Option flags */
49int	 A_flag;	/*    -A: do not follow 302 redirects */
50int	 a_flag;	/*    -a: auto retry */
51size_t	 B_size;	/*    -B: buffer size */
52int	 b_flag;	/*!   -b: workaround TCP bug */
53char    *c_dirname;	/*    -c: remote directory */
54int	 d_flag;	/*    -d: direct connection */
55int	 F_flag;	/*    -F: restart without checking mtime  */
56char	*f_filename;	/*    -f: file to fetch */
57int	 H_flag;	/*    -H: use high port */
58char	*h_hostname;	/*    -h: host to fetch from */
59int	 l_flag;	/*    -l: link rather than copy file: URLs */
60int	 m_flag;	/* -[Mm]: mirror mode */
61int	 n_flag;	/*    -n: do not preserve modification time */
62int	 o_flag;	/*    -o: specify output file */
63int	 o_directory;	/*        output file is a directory */
64char	*o_filename;	/*        name of output file */
65int	 o_stdout;	/*        output file is stdout */
66int	 once_flag;	/*    -1: stop at first successful file */
67int	 p_flag = 1;	/* -[Pp]: use passive FTP */
68int	 R_flag;	/*    -R: don't delete partially transferred files */
69int	 r_flag;	/*    -r: restart previously interrupted transfer */
70u_int	 T_secs = 0;	/*    -T: transfer timeout in seconds */
71int	 s_flag;        /*    -s: show size, don't fetch */
72off_t	 S_size;        /*    -S: require size to match */
73int	 t_flag;	/*!   -t: workaround TCP bug */
74int	 v_level = 1;	/*    -v: verbosity level */
75int	 v_tty;		/*        stdout is a tty */
76u_int	 w_secs;	/*    -w: retry delay */
77int	 family = PF_UNSPEC;	/* -[46]: address family to use */
78
79int	 sigalrm;	/* SIGALRM received */
80int	 sigint;	/* SIGINT received */
81
82u_int	 ftp_timeout;	/* default timeout for FTP transfers */
83u_int	 http_timeout;	/* default timeout for HTTP transfers */
84u_char	*buf;		/* transfer buffer */
85
86
87void
88sig_handler(int sig)
89{
90    switch (sig) {
91    case SIGALRM:
92	sigalrm = 1;
93	break;
94    case SIGINT:
95	sigint = 1;
96	break;
97    }
98}
99
100struct xferstat {
101    char		 name[40];
102    struct timeval	 start;
103    struct timeval	 end;
104    struct timeval	 last;
105    off_t		 size;
106    off_t		 offset;
107    off_t		 rcvd;
108};
109
110void	 stat_start(struct xferstat *, char *, off_t, off_t);
111void	 stat_update(struct xferstat *, off_t);
112void	 stat_end(struct xferstat *);
113
114void
115stat_start(struct xferstat *xs, char *name, off_t size, off_t offset)
116{
117    snprintf(xs->name, sizeof xs->name, "%s", name);
118    gettimeofday(&xs->start, NULL);
119    xs->last.tv_sec = xs->last.tv_usec = 0;
120    xs->end = xs->last;
121    xs->size = size;
122    xs->offset = offset;
123    stat_update(xs, 0);
124}
125
126void
127stat_update(struct xferstat *xs, off_t rcvd)
128{
129    struct timeval now;
130
131    xs->rcvd = rcvd;
132
133    if (v_level <= 1 || !v_tty)
134	return;
135
136    gettimeofday(&now, NULL);
137    if (now.tv_sec <= xs->last.tv_sec)
138	return;
139    xs->last = now;
140
141    fprintf(stderr, "\rReceiving %s", xs->name);
142    if (xs->size == -1)
143	fprintf(stderr, ": %lld bytes", xs->size);
144    else
145	fprintf(stderr, " (%lld bytes): %d%%", xs->size,
146		(int)((100.0 * (xs->rcvd + xs->offset)) / xs->size));
147}
148
149void
150stat_end(struct xferstat *xs)
151{
152    double delta;
153    double bps;
154
155    gettimeofday(&xs->end, NULL);
156
157    if (!v_level)
158	return;
159
160    fputc('\n', stderr);
161    delta = (xs->end.tv_sec + (xs->end.tv_usec / 1.e6))
162	- (xs->start.tv_sec + (xs->start.tv_usec / 1.e6));
163    fprintf(stderr, "%lld bytes transferred in %.1f seconds ",
164	    xs->rcvd - xs->offset, delta);
165    bps = (xs->rcvd - xs->offset) / delta;
166    if (bps > 1024*1024)
167	fprintf(stderr, "(%.2f MBps)\n", bps / (1024*1024));
168    else if (bps > 1024)
169	fprintf(stderr, "(%.2f kBps)\n", bps / 1024);
170    else
171	fprintf(stderr, "(%.2f Bps)\n", bps);
172}
173
174int
175fetch(char *URL, char *path)
176{
177    struct url *url;
178    struct url_stat us;
179    struct stat sb;
180    struct xferstat xs;
181    FILE *f, *of;
182    size_t size;
183    off_t count;
184    char flags[8];
185    int ch, n, r;
186    u_int timeout;
187
188    f = of = NULL;
189
190    /* parse URL */
191    if ((url = fetchParseURL(URL)) == NULL) {
192	warnx("%s: parse error", URL);
193	goto failure;
194    }
195
196    timeout = 0;
197    *flags = 0;
198
199    /* common flags */
200    if (v_level > 2)
201	strcat(flags, "v");
202    switch (family) {
203    case PF_INET:
204	strcat(flags, "4");
205	break;
206    case PF_INET6:
207	strcat(flags, "6");
208	break;
209    }
210
211    /* FTP specific flags */
212    if (strcmp(url->scheme, "ftp") == 0) {
213	if (p_flag)
214	    strcat(flags, "p");
215	if (d_flag)
216	    strcat(flags, "d");
217	if (H_flag)
218	    strcat(flags, "h");
219	timeout = T_secs ? T_secs : ftp_timeout;
220    }
221
222    /* HTTP specific flags */
223    if (strcmp(url->scheme, "http") == 0) {
224	if (d_flag)
225	    strcat(flags, "d");
226	if (A_flag)
227	    strcat(flags, "A");
228	timeout = T_secs ? T_secs : http_timeout;
229    }
230
231    /* set the protocol timeout. */
232    fetchTimeout = timeout;
233
234    /* stat remote file */
235    if (fetchStat(url, &us, flags) == -1)
236	warnx("%s: size not known", path);
237
238    /* just print size */
239    if (s_flag) {
240	if (us.size == -1)
241	    printf("Unknown\n");
242	else
243	    printf("%lld\n", us.size);
244	goto success;
245    }
246
247    /* check that size is as expected */
248    if (S_size && us.size != -1 && us.size != S_size) {
249	warnx("%s: size mismatch: expected %lld, actual %lld",
250	      path, S_size, us.size);
251	goto failure;
252    }
253
254    /* symlink instead of copy */
255    if (l_flag && strcmp(url->scheme, "file") == 0 && !o_stdout) {
256	if (symlink(url->doc, path) == -1) {
257	    warn("%s: symlink()", path);
258	    goto failure;
259	}
260	goto success;
261    }
262
263    if (o_stdout) {
264	/* output to stdout */
265	of = stdout;
266    } else if (r_flag && us.size != -1 && stat(path, &sb) != -1
267	       && (F_flag || (us.mtime && sb.st_mtime == us.mtime))) {
268	/* output to file, restart aborted transfer */
269	if (us.size == sb.st_size)
270	    goto success;
271	else if (sb.st_size > us.size && truncate(path, us.size) == -1) {
272	    warn("%s: truncate()", path);
273	    goto failure;
274	}
275	if ((of = fopen(path, "a")) == NULL) {
276	    warn("%s: open()", path);
277	    goto failure;
278	}
279	url->offset = sb.st_size;
280    } else if (m_flag && us.size != -1 && stat(path, &sb) != -1) {
281	/* output to file, mirror mode */
282	if (sb.st_size == us.size && sb.st_mtime == us.mtime)
283	    return 0;
284	if ((of = fopen(path, "w")) == NULL) {
285	    warn("%s: open()", path);
286	    goto failure;
287	}
288    } else {
289	/* output to file, all other cases */
290	if ((of = fopen(path, "w")) == NULL) {
291	    warn("%s: open()", path);
292	    goto failure;
293	}
294    }
295    count = url->offset;
296
297    /* start the transfer */
298    if ((f = fetchGet(url, flags)) == NULL) {
299	warnx("%s", fetchLastErrString);
300	if (!R_flag && !r_flag && !o_stdout)
301	    unlink(path);
302	goto failure;
303    }
304
305    /* start the counter */
306    stat_start(&xs, path, us.size, count);
307
308    n = 0;
309    sigint = sigalrm = 0;
310    if (us.size == -1) {
311	/*
312	 * We have no idea how much data to expect, so do it byte by
313         * byte. This is incredibly inefficient, but there's not much
314         * we can do about it... :(
315	 */
316	while (!sigint && !sigalrm) {
317	    if (timeout)
318		alarm(timeout);
319#ifdef STDIO_HACK
320	    /*
321	     * This is a non-portable hack, but it makes things go
322	     * faster. Basically, if there is data in the input file's
323	     * buffer, write it out; then fall through to the fgetc()
324	     * which forces a refill. It saves a memcpy() and reduces
325	     * the number of iterations, i.e the number of calls to
326	     * alarm(). Empirical evidence shows this can cut user
327	     * time by up to 90%. There may be better (even portable)
328	     * ways to do this.
329	     */
330	    if (f->_r && (f->_ub._base == NULL)) {
331		if (fwrite(f->_p, f->_r, 1, of) < 1)
332		    break;
333		count += f->_r;
334		f->_p += f->_r;
335		f->_r = 0;
336	    }
337#endif
338	    if ((ch = fgetc(f)) == EOF || fputc(ch, of) == EOF)
339		break;
340	    stat_update(&xs, count++);
341	    n++;
342	}
343    } else {
344	/* we know exactly how much to transfer, so do it efficiently */
345	for (size = B_size; count != us.size && !sigint && !sigalrm; n++) {
346	    if (us.size - count < B_size)
347		size = us.size - count;
348	    if (timeout)
349		alarm(timeout);
350	    if ((size = fread(buf, 1, size, f)) <= 0)
351		break;
352	    stat_update(&xs, count += size);
353	    if (fwrite(buf, size, 1, of) != 1)
354		break;
355	}
356    }
357
358    if (timeout)
359	alarm(0);
360
361    stat_end(&xs);
362
363    /* check the status of our files */
364    if (ferror(f))
365	warn("%s", URL);
366    if (ferror(of))
367	warn("%s", path);
368    if (ferror(f) || ferror(of)) {
369	if (!R_flag && !r_flag && !o_stdout)
370	    unlink(path);
371	goto failure;
372    }
373
374    /* need to close the file before setting mtime */
375    if (of != stdout) {
376	fclose(of);
377	of = NULL;
378    }
379
380    /* Set mtime of local file */
381    if (!n_flag && us.size != -1 && !o_stdout) {
382	struct timeval tv[2];
383
384	tv[0].tv_sec = (long)us.atime;
385	tv[1].tv_sec = (long)us.mtime;
386	tv[0].tv_usec = tv[1].tv_usec = 0;
387	if (utimes(path, tv))
388	    warn("%s: utimes()", path);
389    }
390
391    /* did the transfer complete normally? */
392    if (sigalrm)
393	warnx("transfer timed out");
394    else if (sigint)
395	warnx("transfer interrupted");
396    else if (us.size != -1 && count < us.size) {
397	warnx("%s appears to be truncated: %lld/%lld bytes",
398	      path, count, us.size);
399	goto failure;
400    }
401
402 success:
403    r = (!sigalrm && !sigint);
404    goto done;
405 failure:
406    r = -1;
407    goto done;
408 done:
409    if (f)
410	fclose(f);
411    if (of && of != stdout)
412	fclose(of);
413    if (url)
414	fetchFreeURL(url);
415    return r;
416}
417
418void
419usage(void)
420{
421    /* XXX badly out of synch */
422    fprintf(stderr,
423	    "Usage: fetch [-1AFHMPRabdlmnpqrstv] [-o outputfile] [-S bytes]\n"
424	    "             [-B bytes] [-T seconds] [-w seconds]\n"
425	    "             [-f file -h host [-c dir] | URL ...]\n"
426	);
427}
428
429
430#define PARSENUM(NAME, TYPE)		\
431int					\
432NAME(char *s, TYPE *v)			\
433{					\
434    *v = 0;				\
435    for (*v = 0; *s; s++)		\
436	if (isdigit(*s))		\
437	    *v = *v * 10 + *s - '0';	\
438	else				\
439	    return -1;			\
440    return 0;				\
441}
442
443PARSENUM(parseint, u_int)
444PARSENUM(parsesize, size_t)
445PARSENUM(parseoff, off_t)
446
447int
448main(int argc, char *argv[])
449{
450    struct stat sb;
451    char *p, *q, *s;
452    int c, e, r;
453
454    while ((c = getopt(argc, argv,
455		       "146AaB:bc:dFf:h:lHMmnPpo:qRrS:sT:tvw:")) != EOF)
456	switch (c) {
457	case '1':
458	    once_flag = 1;
459	    break;
460	case '4':
461	    family = PF_INET;
462	    break;
463	case '6':
464	    family = PF_INET6;
465	    break;
466	case 'A':
467	    A_flag = 1;
468	    break;
469	case 'a':
470	    a_flag = 1;
471	    break;
472	case 'B':
473	    if (parsesize(optarg, &B_size) == -1)
474		errx(1, "invalid buffer size");
475	    break;
476	case 'b':
477	    warnx("warning: the -b option is deprecated");
478	    b_flag = 1;
479	    break;
480	case 'c':
481	    c_dirname = optarg;
482	    break;
483	case 'd':
484	    d_flag = 1;
485	    break;
486	case 'F':
487	    F_flag = 1;
488	    break;
489	case 'f':
490	    f_filename = optarg;
491	    break;
492	case 'H':
493	    H_flag = 1;
494	    break;
495	case 'h':
496	    h_hostname = optarg;
497	    break;
498	case 'l':
499	    l_flag = 1;
500	    break;
501	case 'o':
502	    o_flag = 1;
503	    o_filename = optarg;
504	    break;
505	case 'M':
506	case 'm':
507	    m_flag = 1;
508	    break;
509	case 'n':
510	    n_flag = 1;
511	    break;
512	case 'P':
513	case 'p':
514	    p_flag = 1;
515	    break;
516	case 'q':
517	    v_level = 0;
518	    break;
519	case 'R':
520	    R_flag = 1;
521	    break;
522	case 'r':
523	    r_flag = 1;
524	    break;
525	case 'S':
526	    if (parseoff(optarg, &S_size) == -1)
527		errx(1, "invalid size");
528	    break;
529	case 's':
530	    s_flag = 1;
531	    break;
532	case 'T':
533	    if (parseint(optarg, &T_secs) == -1)
534		errx(1, "invalid timeout");
535	    break;
536	case 't':
537	    t_flag = 1;
538	    warnx("warning: the -t option is deprecated");
539	    break;
540	case 'v':
541	    v_level++;
542	    break;
543	case 'w':
544	    a_flag = 1;
545	    if (parseint(optarg, &w_secs) == -1)
546		errx(1, "invalid delay");
547	    break;
548	default:
549	    usage();
550	    exit(EX_USAGE);
551	}
552
553    argc -= optind;
554    argv += optind;
555
556    if (h_hostname || f_filename || c_dirname) {
557	if (!h_hostname || !f_filename || argc) {
558	    usage();
559	    exit(EX_USAGE);
560	}
561	/* XXX this is a hack. */
562	if (strcspn(h_hostname, "@:/") != strlen(h_hostname))
563	    errx(1, "invalid hostname");
564	if (asprintf(argv, "ftp://%s/%s/%s", h_hostname,
565		     c_dirname ? c_dirname : "", f_filename) == -1)
566	    errx(1, strerror(ENOMEM));
567	argc++;
568    }
569
570    if (!argc) {
571	usage();
572	exit(EX_USAGE);
573    }
574
575    /* allocate buffer */
576    if (B_size < MINBUFSIZE)
577	B_size = MINBUFSIZE;
578    if ((buf = malloc(B_size)) == NULL)
579	errx(1, strerror(ENOMEM));
580
581    /* timeout handling */
582    signal(SIGALRM, sig_handler);
583    if ((s = getenv("FTP_TIMEOUT")) != NULL) {
584	if (parseint(s, &ftp_timeout) == -1) {
585	    warnx("FTP_TIMEOUT is not a positive integer");
586	    ftp_timeout = 0;
587	}
588    }
589    if ((s = getenv("HTTP_TIMEOUT")) != NULL) {
590	if (parseint(s, &http_timeout) == -1) {
591	    warnx("HTTP_TIMEOUT is not a positive integer");
592	    http_timeout = 0;
593	}
594    }
595
596    /* interrupt handling */
597    signal(SIGINT, sig_handler);
598
599    /* output file */
600    if (o_flag) {
601	if (strcmp(o_filename, "-") == 0) {
602	    o_stdout = 1;
603	} else if (stat(o_filename, &sb) == -1) {
604	    if (errno == ENOENT) {
605		if (argc > 1)
606		    errx(EX_USAGE, "%s is not a directory", o_filename);
607	    } else {
608		err(EX_IOERR, "%s", o_filename);
609	    }
610	} else {
611	    if (sb.st_mode & S_IFDIR)
612		o_directory = 1;
613	}
614    }
615
616    /* check if output is to a tty (for progress report) */
617    v_tty = isatty(STDERR_FILENO);
618    r = 0;
619
620    while (argc) {
621	if ((p = strrchr(*argv, '/')) == NULL)
622	    p = *argv;
623	else
624	    p++;
625
626	if (!*p)
627	    p = "fetch.out";
628
629	fetchLastErrCode = 0;
630
631	if (o_flag) {
632	    if (o_stdout) {
633		e = fetch(*argv, "-");
634	    } else if (o_directory) {
635		asprintf(&q, "%s/%s", o_filename, p);
636		e = fetch(*argv, q);
637		free(q);
638	    } else {
639		e = fetch(*argv, o_filename);
640	    }
641	} else {
642	    e = fetch(*argv, p);
643	}
644
645	if (sigint)
646	    exit(1);
647
648	if (e == 0 && once_flag)
649	    exit(0);
650
651	if (e) {
652	    r = 1;
653	    if ((fetchLastErrCode
654		 && fetchLastErrCode != FETCH_UNAVAIL
655		 && fetchLastErrCode != FETCH_MOVED
656		 && fetchLastErrCode != FETCH_URL
657		 && fetchLastErrCode != FETCH_RESOLV
658		 && fetchLastErrCode != FETCH_UNKNOWN)) {
659		if (w_secs) {
660		    if (v_level)
661			fprintf(stderr, "Waiting %d seconds before retrying\n", w_secs);
662		    sleep(w_secs);
663		}
664		if (a_flag)
665		    continue;
666		fprintf(stderr, "Skipping %s\n", *argv);
667	    }
668	}
669
670	argc--, argv++;
671    }
672
673    exit(r);
674}
675