1/* vi: set sw=4 ts=4: */
2/*
3 * wget - retrieve a file using HTTP or FTP
4 *
5 * Chip Rosenthal Covad Communications <chip@laserlink.net>
6 * Licensed under GPLv2, see file LICENSE in this source tree.
7 *
8 * Copyright (C) 2010 Bradley M. Kuhn <bkuhn@ebb.org>
9 * Kuhn's copyrights are licensed GPLv2-or-later.  File as a whole remains GPLv2.
10 */
11
12//usage:#define wget_trivial_usage
13//usage:	IF_FEATURE_WGET_LONG_OPTIONS(
14//usage:       "[-c|--continue] [-s|--spider] [-q|--quiet] [-O|--output-document FILE]\n"
15//usage:       "	[--header 'header: value'] [-Y|--proxy on/off] [-P DIR]\n"
16//usage:       "	[--no-check-certificate] [-U|--user-agent AGENT]"
17//usage:			IF_FEATURE_WGET_TIMEOUT(" [-T SEC]") " URL..."
18//usage:	)
19//usage:	IF_NOT_FEATURE_WGET_LONG_OPTIONS(
20//usage:       "[-csq] [-O FILE] [-Y on/off] [-P DIR] [-U AGENT]"
21//usage:			IF_FEATURE_WGET_TIMEOUT(" [-T SEC]") " URL..."
22//usage:	)
23//usage:#define wget_full_usage "\n\n"
24//usage:       "Retrieve files via HTTP or FTP\n"
25//usage:     "\n	-s	Spider mode - only check file existence"
26//usage:     "\n	-c	Continue retrieval of aborted transfer"
27//usage:     "\n	-q	Quiet"
28//usage:     "\n	-P DIR	Save to DIR (default .)"
29//usage:	IF_FEATURE_WGET_TIMEOUT(
30//usage:     "\n	-T SEC	Network timeout is SEC seconds"
31//usage:	)
32//usage:     "\n	-O FILE	Save to FILE ('-' for stdout)"
33//usage:     "\n	-U STR	Use STR for User-Agent header"
34//usage:     "\n	-Y	Use proxy ('on' or 'off')"
35
36#include "libbb.h"
37
38//#define log_io(...) bb_error_msg(__VA_ARGS__)
39#define log_io(...) ((void)0)
40
41
42struct host_info {
43	char *allocated;
44	const char *path;
45	const char *user;
46	char       *host;
47	int         port;
48	smallint    is_ftp;
49};
50
51
52/* Globals */
53struct globals {
54	off_t content_len;        /* Content-length of the file */
55	off_t beg_range;          /* Range at which continue begins */
56#if ENABLE_FEATURE_WGET_STATUSBAR
57	off_t transferred;        /* Number of bytes transferred so far */
58	const char *curfile;      /* Name of current file being transferred */
59	bb_progress_t pmt;
60#endif
61        char *dir_prefix;
62#if ENABLE_FEATURE_WGET_LONG_OPTIONS
63        char *post_data;
64        char *extra_headers;
65#endif
66        char *fname_out;        /* where to direct output (-O) */
67        const char *proxy_flag; /* Use proxies if env vars are set */
68        const char *user_agent; /* "User-Agent" header field */
69#if ENABLE_FEATURE_WGET_TIMEOUT
70	unsigned timeout_seconds;
71#endif
72	int output_fd;
73	int o_flags;
74	smallint chunked;         /* chunked transfer encoding */
75	smallint got_clen;        /* got content-length: from server  */
76	/* Local downloads do benefit from big buffer.
77	 * With 512 byte buffer, it was measured to be
78	 * an order of magnitude slower than with big one.
79	 */
80	uint64_t just_to_align_next_member;
81	char wget_buf[CONFIG_FEATURE_COPYBUF_KB*1024];
82} FIX_ALIASING;
83#define G (*ptr_to_globals)
84#define INIT_G() do { \
85        SET_PTR_TO_GLOBALS(xzalloc(sizeof(G))); \
86} while (0)
87
88
89/* Must match option string! */
90enum {
91	WGET_OPT_CONTINUE   = (1 << 0),
92	WGET_OPT_SPIDER     = (1 << 1),
93	WGET_OPT_QUIET      = (1 << 2),
94	WGET_OPT_OUTNAME    = (1 << 3),
95	WGET_OPT_PREFIX     = (1 << 4),
96	WGET_OPT_PROXY      = (1 << 5),
97	WGET_OPT_USER_AGENT = (1 << 6),
98	WGET_OPT_TIMEOUT    = (1 << 7),
99	WGET_OPT_RETRIES    = (1 << 8),
100	WGET_OPT_PASSIVE    = (1 << 9),
101	WGET_OPT_HEADER     = (1 << 10) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
102	WGET_OPT_POST_DATA  = (1 << 11) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
103};
104
105enum {
106	PROGRESS_START = -1,
107	PROGRESS_END   = 0,
108	PROGRESS_BUMP  = 1,
109};
110#if ENABLE_FEATURE_WGET_STATUSBAR
111static void progress_meter(int flag)
112{
113	if (option_mask32 & WGET_OPT_QUIET)
114		return;
115
116	if (flag == PROGRESS_START)
117		bb_progress_init(&G.pmt);
118
119	bb_progress_update(&G.pmt,
120			G.curfile,
121			G.beg_range,
122			G.transferred,
123			(G.chunked || !G.got_clen) ? 0 : G.beg_range + G.transferred + G.content_len
124	);
125
126	if (flag == PROGRESS_END) {
127		bb_putchar_stderr('\n');
128		G.transferred = 0;
129	}
130}
131#else
132static ALWAYS_INLINE void progress_meter(int flag UNUSED_PARAM) { }
133#endif
134
135
136/* IPv6 knows scoped address types i.e. link and site local addresses. Link
137 * local addresses can have a scope identifier to specify the
138 * interface/link an address is valid on (e.g. fe80::1%eth0). This scope
139 * identifier is only valid on a single node.
140 *
141 * RFC 4007 says that the scope identifier MUST NOT be sent across the wire,
142 * unless all nodes agree on the semantic. Apache e.g. regards zone identifiers
143 * in the Host header as invalid requests, see
144 * https://issues.apache.org/bugzilla/show_bug.cgi?id=35122
145 */
146static void strip_ipv6_scope_id(char *host)
147{
148	char *scope, *cp;
149
150	/* bbox wget actually handles IPv6 addresses without [], like
151	 * wget "http://::1/xxx", but this is not standard.
152	 * To save code, _here_ we do not support it. */
153
154	if (host[0] != '[')
155		return; /* not IPv6 */
156
157	scope = strchr(host, '%');
158	if (!scope)
159		return;
160
161	/* Remove the IPv6 zone identifier from the host address */
162	cp = strchr(host, ']');
163	if (!cp || (cp[1] != ':' && cp[1] != '\0')) {
164		/* malformed address (not "[xx]:nn" or "[xx]") */
165		return;
166	}
167
168	/* cp points to "]...", scope points to "%eth0]..." */
169	overlapping_strcpy(scope, cp);
170}
171
172#if ENABLE_FEATURE_WGET_AUTHENTICATION
173/* Base64-encode character string. */
174static char *base64enc(const char *str)
175{
176	unsigned len = strlen(str);
177	if (len > sizeof(G.wget_buf)/4*3 - 10) /* paranoia */
178		len = sizeof(G.wget_buf)/4*3 - 10;
179	bb_uuencode(G.wget_buf, str, len, bb_uuenc_tbl_base64);
180	return G.wget_buf;
181}
182#endif
183
184static char* sanitize_string(char *s)
185{
186	unsigned char *p = (void *) s;
187	while (*p >= ' ')
188		p++;
189	*p = '\0';
190	return s;
191}
192
193#if ENABLE_FEATURE_WGET_TIMEOUT
194static void socket_timeout(int sig UNUSED_PARAM)
195{
196	bb_error_msg_and_die("connect timed out");
197}
198#endif
199
200static FILE *open_socket(len_and_sockaddr *lsa)
201{
202	FILE *fp;
203
204#if ENABLE_FEATURE_WGET_TIMEOUT
205	/* Add a timeout for dead or inaccessible servers */
206	if (option_mask32 & WGET_OPT_TIMEOUT) {
207		alarm(G.timeout_seconds);
208		signal(SIGALRM, socket_timeout);
209	}
210#endif
211	/* glibc 2.4 seems to try seeking on it - ??! */
212	/* hopefully it understands what ESPIPE means... */
213	fp = fdopen(xconnect_stream(lsa), "r+");
214	if (fp == NULL)
215		bb_perror_msg_and_die(bb_msg_memory_exhausted);
216#if ENABLE_FEATURE_WGET_TIMEOUT
217	if (option_mask32 & WGET_OPT_TIMEOUT)
218		alarm(0);
219#endif
220
221	return fp;
222}
223
224/* Returns '\n' if it was seen, else '\0'. Trims at first '\r' or '\n' */
225static char fgets_and_trim(FILE *fp)
226{
227	char c;
228	char *buf_ptr;
229
230	if (fgets(G.wget_buf, sizeof(G.wget_buf) - 1, fp) == NULL)
231		bb_perror_msg_and_die("error getting response");
232
233	buf_ptr = strchrnul(G.wget_buf, '\n');
234	c = *buf_ptr;
235	*buf_ptr = '\0';
236	buf_ptr = strchrnul(G.wget_buf, '\r');
237	*buf_ptr = '\0';
238
239	log_io("< %s", G.wget_buf);
240
241	return c;
242}
243
244static int ftpcmd(const char *s1, const char *s2, FILE *fp)
245{
246	int result;
247	if (s1) {
248		if (!s2)
249			s2 = "";
250		fprintf(fp, "%s%s\r\n", s1, s2);
251		fflush(fp);
252		log_io("> %s%s", s1, s2);
253	}
254
255	do {
256		fgets_and_trim(fp);
257	} while (!isdigit(G.wget_buf[0]) || G.wget_buf[3] != ' ');
258
259	G.wget_buf[3] = '\0';
260	result = xatoi_u(G.wget_buf);
261	G.wget_buf[3] = ' ';
262	return result;
263}
264
265static void parse_url(const char *src_url, struct host_info *h)
266{
267	char *url, *p, *sp;
268
269	free(h->allocated);
270	h->allocated = url = xstrdup(src_url);
271
272	if (strncmp(url, "http://", 7) == 0) {
273		h->port = bb_lookup_port("http", "tcp", 80);
274		h->host = url + 7;
275		h->is_ftp = 0;
276	} else if (strncmp(url, "ftp://", 6) == 0) {
277		h->port = bb_lookup_port("ftp", "tcp", 21);
278		h->host = url + 6;
279		h->is_ftp = 1;
280	} else
281		bb_error_msg_and_die("not an http or ftp url: %s", sanitize_string(url));
282
283	// FYI:
284	// "Real" wget 'http://busybox.net?var=a/b' sends this request:
285	//   'GET /?var=a/b HTTP 1.0'
286	//   and saves 'index.html?var=a%2Fb' (we save 'b')
287	// wget 'http://busybox.net?login=john@doe':
288	//   request: 'GET /?login=john@doe HTTP/1.0'
289	//   saves: 'index.html?login=john@doe' (we save '?login=john@doe')
290	// wget 'http://busybox.net#test/test':
291	//   request: 'GET / HTTP/1.0'
292	//   saves: 'index.html' (we save 'test')
293	//
294	// We also don't add unique .N suffix if file exists...
295	sp = strchr(h->host, '/');
296	p = strchr(h->host, '?'); if (!sp || (p && sp > p)) sp = p;
297	p = strchr(h->host, '#'); if (!sp || (p && sp > p)) sp = p;
298	if (!sp) {
299		h->path = "";
300	} else if (*sp == '/') {
301		*sp = '\0';
302		h->path = sp + 1;
303	} else { // '#' or '?'
304		// http://busybox.net?login=john@doe is a valid URL
305		// memmove converts to:
306		// http:/busybox.nett?login=john@doe...
307		memmove(h->host - 1, h->host, sp - h->host);
308		h->host--;
309		sp[-1] = '\0';
310		h->path = sp;
311	}
312
313	// We used to set h->user to NULL here, but this interferes
314	// with handling of code 302 ("object was moved")
315
316	sp = strrchr(h->host, '@');
317	if (sp != NULL) {
318		*sp = '\0';
319		h->user = h->host;
320		h->host = sp + 1;
321	}
322
323	sp = h->host;
324}
325
326static char *gethdr(FILE *fp)
327{
328	char *s, *hdrval;
329	int c;
330
331	/* *istrunc = 0; */
332
333	/* retrieve header line */
334	c = fgets_and_trim(fp);
335
336	/* end of the headers? */
337	if (G.wget_buf[0] == '\0')
338		return NULL;
339
340	/* convert the header name to lower case */
341	for (s = G.wget_buf; isalnum(*s) || *s == '-' || *s == '.'; ++s) {
342		/* tolower for "A-Z", no-op for "0-9a-z-." */
343		*s |= 0x20;
344	}
345
346	/* verify we are at the end of the header name */
347	if (*s != ':')
348		bb_error_msg_and_die("bad header line: %s", sanitize_string(G.wget_buf));
349
350	/* locate the start of the header value */
351	*s++ = '\0';
352	hdrval = skip_whitespace(s);
353
354	if (c != '\n') {
355		/* Rats! The buffer isn't big enough to hold the entire header value */
356		while (c = getc(fp), c != EOF && c != '\n')
357			continue;
358	}
359
360	return hdrval;
361}
362
363static FILE* prepare_ftp_session(FILE **dfpp, struct host_info *target, len_and_sockaddr *lsa)
364{
365	FILE *sfp;
366	char *str;
367	int port;
368
369	if (!target->user)
370		target->user = xstrdup("anonymous:busybox@");
371
372	sfp = open_socket(lsa);
373	if (ftpcmd(NULL, NULL, sfp) != 220)
374		bb_error_msg_and_die("%s", sanitize_string(G.wget_buf + 4));
375
376	/*
377	 * Splitting username:password pair,
378	 * trying to log in
379	 */
380	str = strchr(target->user, ':');
381	if (str)
382		*str++ = '\0';
383	switch (ftpcmd("USER ", target->user, sfp)) {
384	case 230:
385		break;
386	case 331:
387		if (ftpcmd("PASS ", str, sfp) == 230)
388			break;
389		/* fall through (failed login) */
390	default:
391		bb_error_msg_and_die("ftp login: %s", sanitize_string(G.wget_buf + 4));
392	}
393
394	ftpcmd("TYPE I", NULL, sfp);
395
396	/*
397	 * Querying file size
398	 */
399	if (ftpcmd("SIZE ", target->path, sfp) == 213) {
400		G.content_len = BB_STRTOOFF(G.wget_buf + 4, NULL, 10);
401		if (G.content_len < 0 || errno) {
402			bb_error_msg_and_die("SIZE value is garbage");
403		}
404		G.got_clen = 1;
405	}
406
407	/*
408	 * Entering passive mode
409	 */
410	if (ftpcmd("PASV", NULL, sfp) != 227) {
411 pasv_error:
412		bb_error_msg_and_die("bad response to %s: %s", "PASV", sanitize_string(G.wget_buf));
413	}
414	// Response is "227 garbageN1,N2,N3,N4,P1,P2[)garbage]
415	// Server's IP is N1.N2.N3.N4 (we ignore it)
416	// Server's port for data connection is P1*256+P2
417	str = strrchr(G.wget_buf, ')');
418	if (str) str[0] = '\0';
419	str = strrchr(G.wget_buf, ',');
420	if (!str) goto pasv_error;
421	port = xatou_range(str+1, 0, 255);
422	*str = '\0';
423	str = strrchr(G.wget_buf, ',');
424	if (!str) goto pasv_error;
425	port += xatou_range(str+1, 0, 255) * 256;
426	set_nport(lsa, htons(port));
427
428	*dfpp = open_socket(lsa);
429
430	if (G.beg_range) {
431		sprintf(G.wget_buf, "REST %"OFF_FMT"u", G.beg_range);
432		if (ftpcmd(G.wget_buf, NULL, sfp) == 350)
433			G.content_len -= G.beg_range;
434	}
435
436	if (ftpcmd("RETR ", target->path, sfp) > 150)
437		bb_error_msg_and_die("bad response to %s: %s", "RETR", sanitize_string(G.wget_buf));
438
439	return sfp;
440}
441
442static void NOINLINE retrieve_file_data(FILE *dfp)
443{
444#if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
445# if ENABLE_FEATURE_WGET_TIMEOUT
446	unsigned second_cnt;
447# endif
448	struct pollfd polldata;
449
450	polldata.fd = fileno(dfp);
451	polldata.events = POLLIN | POLLPRI;
452#endif
453	progress_meter(PROGRESS_START);
454
455	if (G.chunked)
456		goto get_clen;
457
458	/* Loops only if chunked */
459	while (1) {
460
461#if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
462		/* Must use nonblocking I/O, otherwise fread will loop
463		 * and *block* until it reads full buffer,
464		 * which messes up progress bar and/or timeout logic.
465		 * Because of nonblocking I/O, we need to dance
466		 * very carefully around EAGAIN. See explanation at
467		 * clearerr() call.
468		 */
469		ndelay_on(polldata.fd);
470#endif
471		while (1) {
472			int n;
473			unsigned rdsz;
474
475			rdsz = sizeof(G.wget_buf);
476			if (G.got_clen) {
477				if (G.content_len < (off_t)rdsz) {
478					if ((int)G.content_len <= 0)
479						break;
480					rdsz = (unsigned)G.content_len;
481				}
482			}
483
484#if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
485# if ENABLE_FEATURE_WGET_TIMEOUT
486			second_cnt = G.timeout_seconds;
487# endif
488			while (1) {
489				if (safe_poll(&polldata, 1, 1000) != 0)
490					break; /* error, EOF, or data is available */
491# if ENABLE_FEATURE_WGET_TIMEOUT
492				if (second_cnt != 0 && --second_cnt == 0) {
493					progress_meter(PROGRESS_END);
494					bb_error_msg_and_die("download timed out");
495				}
496# endif
497				/* Needed for "stalled" indicator */
498				progress_meter(PROGRESS_BUMP);
499			}
500
501			/* fread internally uses read loop, which in our case
502			 * is usually exited when we get EAGAIN.
503			 * In this case, libc sets error marker on the stream.
504			 * Need to clear it before next fread to avoid possible
505			 * rare false positive ferror below. Rare because usually
506			 * fread gets more than zero bytes, and we don't fall
507			 * into if (n <= 0) ...
508			 */
509			clearerr(dfp);
510			errno = 0;
511#endif
512			n = fread(G.wget_buf, 1, rdsz, dfp);
513			/* man fread:
514			 * If error occurs, or EOF is reached, the return value
515			 * is a short item count (or zero).
516			 * fread does not distinguish between EOF and error.
517			 */
518			if (n <= 0) {
519#if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
520				if (errno == EAGAIN) /* poll lied, there is no data? */
521					continue; /* yes */
522#endif
523				if (ferror(dfp))
524					bb_perror_msg_and_die(bb_msg_read_error);
525				break; /* EOF, not error */
526			}
527
528			xwrite(G.output_fd, G.wget_buf, n);
529
530#if ENABLE_FEATURE_WGET_STATUSBAR
531			G.transferred += n;
532			progress_meter(PROGRESS_BUMP);
533#endif
534			if (G.got_clen) {
535				G.content_len -= n;
536				if (G.content_len == 0)
537					break;
538			}
539		}
540#if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
541		clearerr(dfp);
542		ndelay_off(polldata.fd); /* else fgets can get very unhappy */
543#endif
544		if (!G.chunked)
545			break;
546
547		fgets_and_trim(dfp); /* Eat empty line */
548 get_clen:
549		fgets_and_trim(dfp);
550		G.content_len = STRTOOFF(G.wget_buf, NULL, 16);
551		/* FIXME: error check? */
552		if (G.content_len == 0)
553			break; /* all done! */
554		G.got_clen = 1;
555	}
556
557	/* Draw full bar and free its resources */
558	G.chunked = 0;  /* makes it show 100% even for chunked download */
559	G.got_clen = 1; /* makes it show 100% even for download of (formerly) unknown size */
560	progress_meter(PROGRESS_END);
561}
562
563static void download_one_url(const char *url)
564{
565	bool use_proxy;                 /* Use proxies if env vars are set  */
566	int redir_limit;
567	len_and_sockaddr *lsa;
568	FILE *sfp;                      /* socket to web/ftp server         */
569	FILE *dfp;                      /* socket to ftp server (data)      */
570	char *proxy = NULL;
571	char *fname_out_alloc;
572	struct host_info server;
573	struct host_info target;
574
575	server.allocated = NULL;
576	target.allocated = NULL;
577	server.user = NULL;
578	target.user = NULL;
579
580	parse_url(url, &target);
581
582	/* Use the proxy if necessary */
583	use_proxy = (strcmp(G.proxy_flag, "off") != 0);
584	if (use_proxy) {
585		proxy = getenv(target.is_ftp ? "ftp_proxy" : "http_proxy");
586		use_proxy = (proxy && proxy[0]);
587		if (use_proxy)
588			parse_url(proxy, &server);
589	}
590	if (!use_proxy) {
591		server.port = target.port;
592		if (ENABLE_FEATURE_IPV6) {
593			//free(server.allocated); - can't be non-NULL
594			server.host = server.allocated = xstrdup(target.host);
595		} else {
596			server.host = target.host;
597		}
598	}
599
600	if (ENABLE_FEATURE_IPV6)
601		strip_ipv6_scope_id(target.host);
602
603	/* If there was no -O FILE, guess output filename */
604	fname_out_alloc = NULL;
605	if (!(option_mask32 & WGET_OPT_OUTNAME)) {
606		G.fname_out = bb_get_last_path_component_nostrip(target.path);
607		/* handle "wget http://kernel.org//" */
608		if (G.fname_out[0] == '/' || !G.fname_out[0])
609			G.fname_out = (char*)"index.html";
610		/* -P DIR is considered only if there was no -O FILE */
611		else {
612			if (G.dir_prefix)
613				G.fname_out = fname_out_alloc = concat_path_file(G.dir_prefix, G.fname_out);
614			else {
615				/* redirects may free target.path later, need to make a copy */
616				G.fname_out = fname_out_alloc = xstrdup(G.fname_out);
617			}
618		}
619	}
620#if ENABLE_FEATURE_WGET_STATUSBAR
621	G.curfile = bb_get_last_path_component_nostrip(G.fname_out);
622#endif
623
624	/* Determine where to start transfer */
625	G.beg_range = 0;
626	if (option_mask32 & WGET_OPT_CONTINUE) {
627		G.output_fd = open(G.fname_out, O_WRONLY);
628		if (G.output_fd >= 0) {
629			G.beg_range = xlseek(G.output_fd, 0, SEEK_END);
630		}
631		/* File doesn't exist. We do not create file here yet.
632		 * We are not sure it exists on remote side */
633	}
634
635	redir_limit = 5;
636 resolve_lsa:
637	lsa = xhost2sockaddr(server.host, server.port);
638	if (!(option_mask32 & WGET_OPT_QUIET)) {
639		char *s = xmalloc_sockaddr2dotted(&lsa->u.sa);
640		fprintf(stderr, "Connecting to %s (%s)\n", server.host, s);
641		free(s);
642	}
643 establish_session:
644	/*G.content_len = 0; - redundant, got_clen = 0 is enough */
645	G.got_clen = 0;
646	G.chunked = 0;
647	if (use_proxy || !target.is_ftp) {
648		/*
649		 *  HTTP session
650		 */
651		char *str;
652		int status;
653
654		/* Open socket to http server */
655		sfp = open_socket(lsa);
656
657#ifdef CHECK_FULL_CONTENT_LEN
658		/* First, Send HTTP request to get the full size of the target file. */
659		if(use_proxy){
660			fprintf(sfp, "GET %stp://%s/%s HTTP/1.1\r\n",
661					target.is_ftp?"f":"ht",
662					target.host,
663					target.path);
664		}
665		else{
666			if(option_mask32 & WGET_OPT_POST_DATA)
667				fprintf(sfp, "POST /%s HTTP/1.1\r\n", target.path);
668			else
669				fprintf(sfp, "GET /%s HTTP/1.1\r\n", target.path);
670		}
671		fprintf(sfp, "Host: %s\r\nUser-Agent: %s\r\n", target.host, G.user_agent);
672		fprintf(sfp, "Connection: close\r\n");
673
674#if ENABLE_FEATURE_WGET_AUTHENTICATION
675		if(target.user)
676			fprintf(sfp, "Proxy-Authorization: Basic %s\r\n"+6, base64enc(target.user));
677		if(use_proxy && server.user)
678			fprintf(sfp, "Proxy-Authorization: Basic %s\r\n", base64enc(server.user));
679#endif
680		fprintf(sfp, "\r\n");
681
682		fflush(sfp);
683
684first_response:
685		fgets_and_trim(sfp);
686
687		str = G.wget_buf;
688		str = skip_non_whitespace(str);
689		str = skip_whitespace(str);
690
691		status = atoi(str);
692		switch(status){
693			case 0:
694			case 100:
695				while(gethdr(sfp) != NULL)
696					/* eat all remaining headers */;
697					goto first_response;
698			case 200:
699			case 204:
700				break;
701			case 300:  /* redirection */
702			case 301:
703			case 302:
704			case 303:
705				break;
706			case 206:
707				if(G.beg_range)
708					break;
709			/* fall through */
710			default:
711				bb_error_msg_and_die("server returned error: %s", sanitize_string(G.wget_buf));
712		}
713
714		while((str = gethdr(sfp)) != NULL){
715			static const char keywords[] ALIGN1 = "content-length\0";
716			enum{
717				KEY_content_length = 1
718			};
719			smalluint key;
720
721			/* strip trailing whitespace */
722			char *s = strchrnul(str, '\0')-1;
723			while(s >= str && (*s == ' ' || *s == '\t')){
724				*s = '\0';
725				s--;
726			}
727			key = index_in_strings(keywords, G.wget_buf)+1;
728			if(key == KEY_content_length){
729				G.content_len = BB_STRTOOFF(str, NULL, 10);
730				if(G.content_len < 0 || errno)
731					bb_error_msg_and_die("content-length %s is garbage", sanitize_string(str));
732
733				G.got_clen = 1;
734				break;
735			}
736		}
737
738		// Had already downloaded the full content.
739		if(G.beg_range == G.content_len){
740			dfp = sfp;
741			free(lsa);
742			goto END_OF_DOWNLOAD;
743		}
744
745		fclose(sfp);
746		sfp = open_socket(lsa);
747#endif
748
749		/* Send HTTP request */
750		if (use_proxy) {
751			fprintf(sfp, "GET %stp://%s/%s HTTP/1.1\r\n",
752				target.is_ftp ? "f" : "ht", target.host,
753				target.path);
754		} else {
755			if (option_mask32 & WGET_OPT_POST_DATA)
756				fprintf(sfp, "POST /%s HTTP/1.1\r\n", target.path);
757			else
758				fprintf(sfp, "GET /%s HTTP/1.1\r\n", target.path);
759		}
760
761		fprintf(sfp, "Host: %s\r\nUser-Agent: %s\r\n",
762			target.host, G.user_agent);
763
764		/* Ask server to close the connection as soon as we are done
765		 * (IOW: we do not intend to send more requests)
766		 */
767		fprintf(sfp, "Connection: close\r\n");
768
769#if ENABLE_FEATURE_WGET_AUTHENTICATION
770		if (target.user) {
771			fprintf(sfp, "Proxy-Authorization: Basic %s\r\n"+6,
772				base64enc(target.user));
773		}
774		if (use_proxy && server.user) {
775			fprintf(sfp, "Proxy-Authorization: Basic %s\r\n",
776				base64enc(server.user));
777		}
778#endif
779
780		if (G.beg_range)
781			fprintf(sfp, "Range: bytes=%"OFF_FMT"u-\r\n", G.beg_range);
782
783#if ENABLE_FEATURE_WGET_LONG_OPTIONS
784		if (G.extra_headers)
785			fputs(G.extra_headers, sfp);
786
787		if (option_mask32 & WGET_OPT_POST_DATA) {
788			fprintf(sfp,
789				"Content-Type: application/x-www-form-urlencoded\r\n"
790				"Content-Length: %u\r\n"
791				"\r\n"
792				"%s",
793				(int) strlen(G.post_data), G.post_data
794			);
795		} else
796#endif
797		{
798			fprintf(sfp, "\r\n");
799		}
800
801		fflush(sfp);
802
803		/*
804		 * Retrieve HTTP response line and check for "200" status code.
805		 */
806 read_response:
807		fgets_and_trim(sfp);
808
809		str = G.wget_buf;
810		str = skip_non_whitespace(str);
811		str = skip_whitespace(str);
812		// FIXME: no error check
813		// xatou wouldn't work: "200 OK"
814		status = atoi(str);
815		switch (status) {
816		case 0:
817		case 100:
818			while (gethdr(sfp) != NULL)
819				/* eat all remaining headers */;
820			goto read_response;
821		case 200:
822/*
823Response 204 doesn't say "null file", it says "metadata
824has changed but data didn't":
825
826"10.2.5 204 No Content
827The server has fulfilled the request but does not need to return
828an entity-body, and might want to return updated metainformation.
829The response MAY include new or updated metainformation in the form
830of entity-headers, which if present SHOULD be associated with
831the requested variant.
832
833If the client is a user agent, it SHOULD NOT change its document
834view from that which caused the request to be sent. This response
835is primarily intended to allow input for actions to take place
836without causing a change to the user agent's active document view,
837although any new or updated metainformation SHOULD be applied
838to the document currently in the user agent's active view.
839
840The 204 response MUST NOT include a message-body, and thus
841is always terminated by the first empty line after the header fields."
842
843However, in real world it was observed that some web servers
844(e.g. Boa/0.94.14rc21) simply use code 204 when file size is zero.
845*/
846		case 204:
847			break;
848		case 300:  /* redirection */
849		case 301:
850		case 302:
851		case 303:
852			break;
853		case 206:
854			if (G.beg_range)
855				break;
856			/* fall through */
857		default:
858			bb_error_msg_and_die("server returned error: %s", sanitize_string(G.wget_buf));
859		}
860
861		/*
862		 * Retrieve HTTP headers.
863		 */
864		while ((str = gethdr(sfp)) != NULL) {
865			static const char keywords[] ALIGN1 =
866				"content-length\0""transfer-encoding\0""location\0";
867			enum {
868				KEY_content_length = 1, KEY_transfer_encoding, KEY_location
869			};
870			smalluint key;
871
872			/* gethdr converted "FOO:" string to lowercase */
873
874			/* strip trailing whitespace */
875			char *s = strchrnul(str, '\0') - 1;
876			while (s >= str && (*s == ' ' || *s == '\t')) {
877				*s = '\0';
878				s--;
879			}
880			key = index_in_strings(keywords, G.wget_buf) + 1;
881			if (key == KEY_content_length) {
882				G.content_len = BB_STRTOOFF(str, NULL, 10);
883				if (G.content_len < 0 || errno) {
884					bb_error_msg_and_die("content-length %s is garbage", sanitize_string(str));
885				}
886				G.got_clen = 1;
887				continue;
888			}
889			if (key == KEY_transfer_encoding) {
890				if (strcmp(str_tolower(str), "chunked") != 0)
891					bb_error_msg_and_die("transfer encoding '%s' is not supported", sanitize_string(str));
892				G.chunked = 1;
893			}
894			if (key == KEY_location && status >= 300) {
895				if (--redir_limit == 0)
896					bb_error_msg_and_die("too many redirections");
897				fclose(sfp);
898				if (str[0] == '/') {
899					free(target.allocated);
900					target.path = target.allocated = xstrdup(str+1);
901					/* lsa stays the same: it's on the same server */
902				} else {
903					parse_url(str, &target);
904					if (!use_proxy) {
905						free(server.allocated);
906						server.allocated = NULL;
907						server.host = target.host;
908						/* strip_ipv6_scope_id(target.host); - no! */
909						/* we assume remote never gives us IPv6 addr with scope id */
910						server.port = target.port;
911						free(lsa);
912						goto resolve_lsa;
913					} /* else: lsa stays the same: we use proxy */
914				}
915				goto establish_session;
916			}
917		}
918//		if (status >= 300)
919//			bb_error_msg_and_die("bad redirection (no Location: header from server)");
920
921		/* For HTTP, data is pumped over the same connection */
922		dfp = sfp;
923
924	} else {
925		/*
926		 *  FTP session
927		 */
928		sfp = prepare_ftp_session(&dfp, &target, lsa);
929	}
930
931	free(lsa);
932
933	if (!(option_mask32 & WGET_OPT_SPIDER)) {
934		if (G.output_fd < 0)
935			G.output_fd = xopen(G.fname_out, G.o_flags);
936		retrieve_file_data(dfp);
937		if (!(option_mask32 & WGET_OPT_OUTNAME)) {
938			xclose(G.output_fd);
939			G.output_fd = -1;
940		}
941	}
942
943#ifdef CHECK_FULL_CONTENT_LEN
944END_OF_DOWNLOAD:
945#endif
946	if (dfp != sfp) {
947		/* It's ftp. Close data connection properly */
948		fclose(dfp);
949		if (ftpcmd(NULL, NULL, sfp) != 226)
950			bb_error_msg_and_die("ftp error: %s", sanitize_string(G.wget_buf + 4));
951		/* ftpcmd("QUIT", NULL, sfp); - why bother? */
952	}
953	fclose(sfp);
954
955	free(server.allocated);
956	free(target.allocated);
957	free(fname_out_alloc);
958}
959
960int wget_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
961int wget_main(int argc UNUSED_PARAM, char **argv)
962{
963#if ENABLE_FEATURE_WGET_LONG_OPTIONS
964	static const char wget_longopts[] ALIGN1 =
965		/* name, has_arg, val */
966		"continue\0"         No_argument       "c"
967//FIXME: -s isn't --spider, it's --save-headers!
968		"spider\0"           No_argument       "s"
969		"quiet\0"            No_argument       "q"
970		"output-document\0"  Required_argument "O"
971		"directory-prefix\0" Required_argument "P"
972		"proxy\0"            Required_argument "Y"
973		"user-agent\0"       Required_argument "U"
974#if ENABLE_FEATURE_WGET_TIMEOUT
975		"timeout\0"          Required_argument "T"
976#endif
977		/* Ignored: */
978		// "tries\0"            Required_argument "t"
979		/* Ignored (we always use PASV): */
980		"passive-ftp\0"      No_argument       "\xff"
981		"header\0"           Required_argument "\xfe"
982		"post-data\0"        Required_argument "\xfd"
983		/* Ignored (we don't do ssl) */
984		"no-check-certificate\0" No_argument   "\xfc"
985		;
986#endif
987
988#if ENABLE_FEATURE_WGET_LONG_OPTIONS
989	llist_t *headers_llist = NULL;
990#endif
991
992	INIT_G();
993
994	IF_FEATURE_WGET_TIMEOUT(G.timeout_seconds = 900;)
995	G.proxy_flag = "on";   /* use proxies if env vars are set */
996	G.user_agent = "Wget"; /* "User-Agent" header field */
997
998#if ENABLE_FEATURE_WGET_LONG_OPTIONS
999	applet_long_options = wget_longopts;
1000#endif
1001	opt_complementary = "-1" IF_FEATURE_WGET_TIMEOUT(":T+") IF_FEATURE_WGET_LONG_OPTIONS(":\xfe::");
1002	getopt32(argv, "csqO:P:Y:U:T:" /*ignored:*/ "t:",
1003		&G.fname_out, &G.dir_prefix,
1004		&G.proxy_flag, &G.user_agent,
1005		IF_FEATURE_WGET_TIMEOUT(&G.timeout_seconds) IF_NOT_FEATURE_WGET_TIMEOUT(NULL),
1006		NULL /* -t RETRIES */
1007		IF_FEATURE_WGET_LONG_OPTIONS(, &headers_llist)
1008		IF_FEATURE_WGET_LONG_OPTIONS(, &G.post_data)
1009	);
1010	argv += optind;
1011
1012#if ENABLE_FEATURE_WGET_LONG_OPTIONS
1013	if (headers_llist) {
1014		int size = 1;
1015		char *cp;
1016		llist_t *ll = headers_llist;
1017		while (ll) {
1018			size += strlen(ll->data) + 2;
1019			ll = ll->link;
1020		}
1021		G.extra_headers = cp = xmalloc(size);
1022		while (headers_llist) {
1023			cp += sprintf(cp, "%s\r\n", (char*)llist_pop(&headers_llist));
1024		}
1025	}
1026#endif
1027
1028	G.output_fd = -1;
1029	G.o_flags = O_WRONLY | O_CREAT | O_TRUNC | O_EXCL;
1030	if (G.fname_out) { /* -O FILE ? */
1031		if (LONE_DASH(G.fname_out)) { /* -O - ? */
1032			G.output_fd = 1;
1033			option_mask32 &= ~WGET_OPT_CONTINUE;
1034		}
1035		/* compat with wget: -O FILE can overwrite */
1036		G.o_flags = O_WRONLY | O_CREAT | O_TRUNC;
1037	}
1038
1039	while (*argv)
1040		download_one_url(*argv++);
1041
1042	if (G.output_fd >= 0)
1043		xclose(G.output_fd);
1044
1045	return EXIT_SUCCESS;
1046}
1047