1/* vi: set sw=4 ts=4: */ 2/* 3 * wget - retrieve a file using HTTP or FTP 4 * 5 * Chip Rosenthal Covad Communications <chip@laserlink.net> 6 * Licensed under GPLv2, see file LICENSE in this source tree. 7 * 8 * Copyright (C) 2010 Bradley M. Kuhn <bkuhn@ebb.org> 9 * Kuhn's copyrights are licensed GPLv2-or-later. File as a whole remains GPLv2. 10 */ 11 12//usage:#define wget_trivial_usage 13//usage: IF_FEATURE_WGET_LONG_OPTIONS( 14//usage: "[-c|--continue] [-s|--spider] [-q|--quiet] [-O|--output-document FILE]\n" 15//usage: " [--header 'header: value'] [-Y|--proxy on/off] [-P DIR]\n" 16//usage: " [--no-check-certificate] [-U|--user-agent AGENT]" 17//usage: IF_FEATURE_WGET_TIMEOUT(" [-T SEC]") " URL..." 18//usage: ) 19//usage: IF_NOT_FEATURE_WGET_LONG_OPTIONS( 20//usage: "[-csq] [-O FILE] [-Y on/off] [-P DIR] [-U AGENT]" 21//usage: IF_FEATURE_WGET_TIMEOUT(" [-T SEC]") " URL..." 22//usage: ) 23//usage:#define wget_full_usage "\n\n" 24//usage: "Retrieve files via HTTP or FTP\n" 25//usage: "\n -s Spider mode - only check file existence" 26//usage: "\n -c Continue retrieval of aborted transfer" 27//usage: "\n -q Quiet" 28//usage: "\n -P DIR Save to DIR (default .)" 29//usage: IF_FEATURE_WGET_TIMEOUT( 30//usage: "\n -T SEC Network timeout is SEC seconds" 31//usage: ) 32//usage: "\n -O FILE Save to FILE ('-' for stdout)" 33//usage: "\n -U STR Use STR for User-Agent header" 34//usage: "\n -Y Use proxy ('on' or 'off')" 35 36#include "libbb.h" 37 38//#define log_io(...) bb_error_msg(__VA_ARGS__) 39#define log_io(...) ((void)0) 40 41 42struct host_info { 43 char *allocated; 44 const char *path; 45 const char *user; 46 char *host; 47 int port; 48 smallint is_ftp; 49}; 50 51 52/* Globals */ 53struct globals { 54 off_t content_len; /* Content-length of the file */ 55 off_t beg_range; /* Range at which continue begins */ 56#if ENABLE_FEATURE_WGET_STATUSBAR 57 off_t transferred; /* Number of bytes transferred so far */ 58 const char *curfile; /* Name of current file being transferred */ 59 bb_progress_t pmt; 60#endif 61 char *dir_prefix; 62#if ENABLE_FEATURE_WGET_LONG_OPTIONS 63 char *post_data; 64 char *extra_headers; 65#endif 66 char *fname_out; /* where to direct output (-O) */ 67 const char *proxy_flag; /* Use proxies if env vars are set */ 68 const char *user_agent; /* "User-Agent" header field */ 69#if ENABLE_FEATURE_WGET_TIMEOUT 70 unsigned timeout_seconds; 71#endif 72 int output_fd; 73 int o_flags; 74 smallint chunked; /* chunked transfer encoding */ 75 smallint got_clen; /* got content-length: from server */ 76 /* Local downloads do benefit from big buffer. 77 * With 512 byte buffer, it was measured to be 78 * an order of magnitude slower than with big one. 79 */ 80 uint64_t just_to_align_next_member; 81 char wget_buf[CONFIG_FEATURE_COPYBUF_KB*1024]; 82} FIX_ALIASING; 83#define G (*ptr_to_globals) 84#define INIT_G() do { \ 85 SET_PTR_TO_GLOBALS(xzalloc(sizeof(G))); \ 86} while (0) 87 88 89/* Must match option string! */ 90enum { 91 WGET_OPT_CONTINUE = (1 << 0), 92 WGET_OPT_SPIDER = (1 << 1), 93 WGET_OPT_QUIET = (1 << 2), 94 WGET_OPT_OUTNAME = (1 << 3), 95 WGET_OPT_PREFIX = (1 << 4), 96 WGET_OPT_PROXY = (1 << 5), 97 WGET_OPT_USER_AGENT = (1 << 6), 98 WGET_OPT_TIMEOUT = (1 << 7), 99 WGET_OPT_RETRIES = (1 << 8), 100 WGET_OPT_PASSIVE = (1 << 9), 101 WGET_OPT_HEADER = (1 << 10) * ENABLE_FEATURE_WGET_LONG_OPTIONS, 102 WGET_OPT_POST_DATA = (1 << 11) * ENABLE_FEATURE_WGET_LONG_OPTIONS, 103}; 104 105enum { 106 PROGRESS_START = -1, 107 PROGRESS_END = 0, 108 PROGRESS_BUMP = 1, 109}; 110#if ENABLE_FEATURE_WGET_STATUSBAR 111static void progress_meter(int flag) 112{ 113 if (option_mask32 & WGET_OPT_QUIET) 114 return; 115 116 if (flag == PROGRESS_START) 117 bb_progress_init(&G.pmt); 118 119 bb_progress_update(&G.pmt, 120 G.curfile, 121 G.beg_range, 122 G.transferred, 123 (G.chunked || !G.got_clen) ? 0 : G.beg_range + G.transferred + G.content_len 124 ); 125 126 if (flag == PROGRESS_END) { 127 bb_putchar_stderr('\n'); 128 G.transferred = 0; 129 } 130} 131#else 132static ALWAYS_INLINE void progress_meter(int flag UNUSED_PARAM) { } 133#endif 134 135 136/* IPv6 knows scoped address types i.e. link and site local addresses. Link 137 * local addresses can have a scope identifier to specify the 138 * interface/link an address is valid on (e.g. fe80::1%eth0). This scope 139 * identifier is only valid on a single node. 140 * 141 * RFC 4007 says that the scope identifier MUST NOT be sent across the wire, 142 * unless all nodes agree on the semantic. Apache e.g. regards zone identifiers 143 * in the Host header as invalid requests, see 144 * https://issues.apache.org/bugzilla/show_bug.cgi?id=35122 145 */ 146static void strip_ipv6_scope_id(char *host) 147{ 148 char *scope, *cp; 149 150 /* bbox wget actually handles IPv6 addresses without [], like 151 * wget "http://::1/xxx", but this is not standard. 152 * To save code, _here_ we do not support it. */ 153 154 if (host[0] != '[') 155 return; /* not IPv6 */ 156 157 scope = strchr(host, '%'); 158 if (!scope) 159 return; 160 161 /* Remove the IPv6 zone identifier from the host address */ 162 cp = strchr(host, ']'); 163 if (!cp || (cp[1] != ':' && cp[1] != '\0')) { 164 /* malformed address (not "[xx]:nn" or "[xx]") */ 165 return; 166 } 167 168 /* cp points to "]...", scope points to "%eth0]..." */ 169 overlapping_strcpy(scope, cp); 170} 171 172#if ENABLE_FEATURE_WGET_AUTHENTICATION 173/* Base64-encode character string. */ 174static char *base64enc(const char *str) 175{ 176 unsigned len = strlen(str); 177 if (len > sizeof(G.wget_buf)/4*3 - 10) /* paranoia */ 178 len = sizeof(G.wget_buf)/4*3 - 10; 179 bb_uuencode(G.wget_buf, str, len, bb_uuenc_tbl_base64); 180 return G.wget_buf; 181} 182#endif 183 184static char* sanitize_string(char *s) 185{ 186 unsigned char *p = (void *) s; 187 while (*p >= ' ') 188 p++; 189 *p = '\0'; 190 return s; 191} 192 193#if ENABLE_FEATURE_WGET_TIMEOUT 194static void socket_timeout(int sig UNUSED_PARAM) 195{ 196 bb_error_msg_and_die("connect timed out"); 197} 198#endif 199 200static FILE *open_socket(len_and_sockaddr *lsa) 201{ 202 FILE *fp; 203 204#if ENABLE_FEATURE_WGET_TIMEOUT 205 /* Add a timeout for dead or inaccessible servers */ 206 if (option_mask32 & WGET_OPT_TIMEOUT) { 207 alarm(G.timeout_seconds); 208 signal(SIGALRM, socket_timeout); 209 } 210#endif 211 /* glibc 2.4 seems to try seeking on it - ??! */ 212 /* hopefully it understands what ESPIPE means... */ 213 fp = fdopen(xconnect_stream(lsa), "r+"); 214 if (fp == NULL) 215 bb_perror_msg_and_die(bb_msg_memory_exhausted); 216#if ENABLE_FEATURE_WGET_TIMEOUT 217 if (option_mask32 & WGET_OPT_TIMEOUT) 218 alarm(0); 219#endif 220 221 return fp; 222} 223 224/* Returns '\n' if it was seen, else '\0'. Trims at first '\r' or '\n' */ 225static char fgets_and_trim(FILE *fp) 226{ 227 char c; 228 char *buf_ptr; 229 230 if (fgets(G.wget_buf, sizeof(G.wget_buf) - 1, fp) == NULL) 231 bb_perror_msg_and_die("error getting response"); 232 233 buf_ptr = strchrnul(G.wget_buf, '\n'); 234 c = *buf_ptr; 235 *buf_ptr = '\0'; 236 buf_ptr = strchrnul(G.wget_buf, '\r'); 237 *buf_ptr = '\0'; 238 239 log_io("< %s", G.wget_buf); 240 241 return c; 242} 243 244static int ftpcmd(const char *s1, const char *s2, FILE *fp) 245{ 246 int result; 247 if (s1) { 248 if (!s2) 249 s2 = ""; 250 fprintf(fp, "%s%s\r\n", s1, s2); 251 fflush(fp); 252 log_io("> %s%s", s1, s2); 253 } 254 255 do { 256 fgets_and_trim(fp); 257 } while (!isdigit(G.wget_buf[0]) || G.wget_buf[3] != ' '); 258 259 G.wget_buf[3] = '\0'; 260 result = xatoi_u(G.wget_buf); 261 G.wget_buf[3] = ' '; 262 return result; 263} 264 265static void parse_url(const char *src_url, struct host_info *h) 266{ 267 char *url, *p, *sp; 268 269 free(h->allocated); 270 h->allocated = url = xstrdup(src_url); 271 272 if (strncmp(url, "http://", 7) == 0) { 273 h->port = bb_lookup_port("http", "tcp", 80); 274 h->host = url + 7; 275 h->is_ftp = 0; 276 } else if (strncmp(url, "ftp://", 6) == 0) { 277 h->port = bb_lookup_port("ftp", "tcp", 21); 278 h->host = url + 6; 279 h->is_ftp = 1; 280 } else 281 bb_error_msg_and_die("not an http or ftp url: %s", sanitize_string(url)); 282 283 // FYI: 284 // "Real" wget 'http://busybox.net?var=a/b' sends this request: 285 // 'GET /?var=a/b HTTP 1.0' 286 // and saves 'index.html?var=a%2Fb' (we save 'b') 287 // wget 'http://busybox.net?login=john@doe': 288 // request: 'GET /?login=john@doe HTTP/1.0' 289 // saves: 'index.html?login=john@doe' (we save '?login=john@doe') 290 // wget 'http://busybox.net#test/test': 291 // request: 'GET / HTTP/1.0' 292 // saves: 'index.html' (we save 'test') 293 // 294 // We also don't add unique .N suffix if file exists... 295 sp = strchr(h->host, '/'); 296 p = strchr(h->host, '?'); if (!sp || (p && sp > p)) sp = p; 297 p = strchr(h->host, '#'); if (!sp || (p && sp > p)) sp = p; 298 if (!sp) { 299 h->path = ""; 300 } else if (*sp == '/') { 301 *sp = '\0'; 302 h->path = sp + 1; 303 } else { // '#' or '?' 304 // http://busybox.net?login=john@doe is a valid URL 305 // memmove converts to: 306 // http:/busybox.nett?login=john@doe... 307 memmove(h->host - 1, h->host, sp - h->host); 308 h->host--; 309 sp[-1] = '\0'; 310 h->path = sp; 311 } 312 313 // We used to set h->user to NULL here, but this interferes 314 // with handling of code 302 ("object was moved") 315 316 sp = strrchr(h->host, '@'); 317 if (sp != NULL) { 318 *sp = '\0'; 319 h->user = h->host; 320 h->host = sp + 1; 321 } 322 323 sp = h->host; 324} 325 326static char *gethdr(FILE *fp) 327{ 328 char *s, *hdrval; 329 int c; 330 331 /* *istrunc = 0; */ 332 333 /* retrieve header line */ 334 c = fgets_and_trim(fp); 335 336 /* end of the headers? */ 337 if (G.wget_buf[0] == '\0') 338 return NULL; 339 340 /* convert the header name to lower case */ 341 for (s = G.wget_buf; isalnum(*s) || *s == '-' || *s == '.'; ++s) { 342 /* tolower for "A-Z", no-op for "0-9a-z-." */ 343 *s |= 0x20; 344 } 345 346 /* verify we are at the end of the header name */ 347 if (*s != ':') 348 bb_error_msg_and_die("bad header line: %s", sanitize_string(G.wget_buf)); 349 350 /* locate the start of the header value */ 351 *s++ = '\0'; 352 hdrval = skip_whitespace(s); 353 354 if (c != '\n') { 355 /* Rats! The buffer isn't big enough to hold the entire header value */ 356 while (c = getc(fp), c != EOF && c != '\n') 357 continue; 358 } 359 360 return hdrval; 361} 362 363static FILE* prepare_ftp_session(FILE **dfpp, struct host_info *target, len_and_sockaddr *lsa) 364{ 365 FILE *sfp; 366 char *str; 367 int port; 368 369 if (!target->user) 370 target->user = xstrdup("anonymous:busybox@"); 371 372 sfp = open_socket(lsa); 373 if (ftpcmd(NULL, NULL, sfp) != 220) 374 bb_error_msg_and_die("%s", sanitize_string(G.wget_buf + 4)); 375 376 /* 377 * Splitting username:password pair, 378 * trying to log in 379 */ 380 str = strchr(target->user, ':'); 381 if (str) 382 *str++ = '\0'; 383 switch (ftpcmd("USER ", target->user, sfp)) { 384 case 230: 385 break; 386 case 331: 387 if (ftpcmd("PASS ", str, sfp) == 230) 388 break; 389 /* fall through (failed login) */ 390 default: 391 bb_error_msg_and_die("ftp login: %s", sanitize_string(G.wget_buf + 4)); 392 } 393 394 ftpcmd("TYPE I", NULL, sfp); 395 396 /* 397 * Querying file size 398 */ 399 if (ftpcmd("SIZE ", target->path, sfp) == 213) { 400 G.content_len = BB_STRTOOFF(G.wget_buf + 4, NULL, 10); 401 if (G.content_len < 0 || errno) { 402 bb_error_msg_and_die("SIZE value is garbage"); 403 } 404 G.got_clen = 1; 405 } 406 407 /* 408 * Entering passive mode 409 */ 410 if (ftpcmd("PASV", NULL, sfp) != 227) { 411 pasv_error: 412 bb_error_msg_and_die("bad response to %s: %s", "PASV", sanitize_string(G.wget_buf)); 413 } 414 // Response is "227 garbageN1,N2,N3,N4,P1,P2[)garbage] 415 // Server's IP is N1.N2.N3.N4 (we ignore it) 416 // Server's port for data connection is P1*256+P2 417 str = strrchr(G.wget_buf, ')'); 418 if (str) str[0] = '\0'; 419 str = strrchr(G.wget_buf, ','); 420 if (!str) goto pasv_error; 421 port = xatou_range(str+1, 0, 255); 422 *str = '\0'; 423 str = strrchr(G.wget_buf, ','); 424 if (!str) goto pasv_error; 425 port += xatou_range(str+1, 0, 255) * 256; 426 set_nport(lsa, htons(port)); 427 428 *dfpp = open_socket(lsa); 429 430 if (G.beg_range) { 431 sprintf(G.wget_buf, "REST %"OFF_FMT"u", G.beg_range); 432 if (ftpcmd(G.wget_buf, NULL, sfp) == 350) 433 G.content_len -= G.beg_range; 434 } 435 436 if (ftpcmd("RETR ", target->path, sfp) > 150) 437 bb_error_msg_and_die("bad response to %s: %s", "RETR", sanitize_string(G.wget_buf)); 438 439 return sfp; 440} 441 442static void NOINLINE retrieve_file_data(FILE *dfp) 443{ 444#if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT 445# if ENABLE_FEATURE_WGET_TIMEOUT 446 unsigned second_cnt; 447# endif 448 struct pollfd polldata; 449 450 polldata.fd = fileno(dfp); 451 polldata.events = POLLIN | POLLPRI; 452#endif 453 progress_meter(PROGRESS_START); 454 455 if (G.chunked) 456 goto get_clen; 457 458 /* Loops only if chunked */ 459 while (1) { 460 461#if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT 462 /* Must use nonblocking I/O, otherwise fread will loop 463 * and *block* until it reads full buffer, 464 * which messes up progress bar and/or timeout logic. 465 * Because of nonblocking I/O, we need to dance 466 * very carefully around EAGAIN. See explanation at 467 * clearerr() call. 468 */ 469 ndelay_on(polldata.fd); 470#endif 471 while (1) { 472 int n; 473 unsigned rdsz; 474 475 rdsz = sizeof(G.wget_buf); 476 if (G.got_clen) { 477 if (G.content_len < (off_t)rdsz) { 478 if ((int)G.content_len <= 0) 479 break; 480 rdsz = (unsigned)G.content_len; 481 } 482 } 483 484#if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT 485# if ENABLE_FEATURE_WGET_TIMEOUT 486 second_cnt = G.timeout_seconds; 487# endif 488 while (1) { 489 if (safe_poll(&polldata, 1, 1000) != 0) 490 break; /* error, EOF, or data is available */ 491# if ENABLE_FEATURE_WGET_TIMEOUT 492 if (second_cnt != 0 && --second_cnt == 0) { 493 progress_meter(PROGRESS_END); 494 bb_error_msg_and_die("download timed out"); 495 } 496# endif 497 /* Needed for "stalled" indicator */ 498 progress_meter(PROGRESS_BUMP); 499 } 500 501 /* fread internally uses read loop, which in our case 502 * is usually exited when we get EAGAIN. 503 * In this case, libc sets error marker on the stream. 504 * Need to clear it before next fread to avoid possible 505 * rare false positive ferror below. Rare because usually 506 * fread gets more than zero bytes, and we don't fall 507 * into if (n <= 0) ... 508 */ 509 clearerr(dfp); 510 errno = 0; 511#endif 512 n = fread(G.wget_buf, 1, rdsz, dfp); 513 /* man fread: 514 * If error occurs, or EOF is reached, the return value 515 * is a short item count (or zero). 516 * fread does not distinguish between EOF and error. 517 */ 518 if (n <= 0) { 519#if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT 520 if (errno == EAGAIN) /* poll lied, there is no data? */ 521 continue; /* yes */ 522#endif 523 if (ferror(dfp)) 524 bb_perror_msg_and_die(bb_msg_read_error); 525 break; /* EOF, not error */ 526 } 527 528 xwrite(G.output_fd, G.wget_buf, n); 529 530#if ENABLE_FEATURE_WGET_STATUSBAR 531 G.transferred += n; 532 progress_meter(PROGRESS_BUMP); 533#endif 534 if (G.got_clen) { 535 G.content_len -= n; 536 if (G.content_len == 0) 537 break; 538 } 539 } 540#if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT 541 clearerr(dfp); 542 ndelay_off(polldata.fd); /* else fgets can get very unhappy */ 543#endif 544 if (!G.chunked) 545 break; 546 547 fgets_and_trim(dfp); /* Eat empty line */ 548 get_clen: 549 fgets_and_trim(dfp); 550 G.content_len = STRTOOFF(G.wget_buf, NULL, 16); 551 /* FIXME: error check? */ 552 if (G.content_len == 0) 553 break; /* all done! */ 554 G.got_clen = 1; 555 } 556 557 /* Draw full bar and free its resources */ 558 G.chunked = 0; /* makes it show 100% even for chunked download */ 559 G.got_clen = 1; /* makes it show 100% even for download of (formerly) unknown size */ 560 progress_meter(PROGRESS_END); 561} 562 563static void download_one_url(const char *url) 564{ 565 bool use_proxy; /* Use proxies if env vars are set */ 566 int redir_limit; 567 len_and_sockaddr *lsa; 568 FILE *sfp; /* socket to web/ftp server */ 569 FILE *dfp; /* socket to ftp server (data) */ 570 char *proxy = NULL; 571 char *fname_out_alloc; 572 struct host_info server; 573 struct host_info target; 574 575 server.allocated = NULL; 576 target.allocated = NULL; 577 server.user = NULL; 578 target.user = NULL; 579 580 parse_url(url, &target); 581 582 /* Use the proxy if necessary */ 583 use_proxy = (strcmp(G.proxy_flag, "off") != 0); 584 if (use_proxy) { 585 proxy = getenv(target.is_ftp ? "ftp_proxy" : "http_proxy"); 586 use_proxy = (proxy && proxy[0]); 587 if (use_proxy) 588 parse_url(proxy, &server); 589 } 590 if (!use_proxy) { 591 server.port = target.port; 592 if (ENABLE_FEATURE_IPV6) { 593 //free(server.allocated); - can't be non-NULL 594 server.host = server.allocated = xstrdup(target.host); 595 } else { 596 server.host = target.host; 597 } 598 } 599 600 if (ENABLE_FEATURE_IPV6) 601 strip_ipv6_scope_id(target.host); 602 603 /* If there was no -O FILE, guess output filename */ 604 fname_out_alloc = NULL; 605 if (!(option_mask32 & WGET_OPT_OUTNAME)) { 606 G.fname_out = bb_get_last_path_component_nostrip(target.path); 607 /* handle "wget http://kernel.org//" */ 608 if (G.fname_out[0] == '/' || !G.fname_out[0]) 609 G.fname_out = (char*)"index.html"; 610 /* -P DIR is considered only if there was no -O FILE */ 611 else { 612 if (G.dir_prefix) 613 G.fname_out = fname_out_alloc = concat_path_file(G.dir_prefix, G.fname_out); 614 else { 615 /* redirects may free target.path later, need to make a copy */ 616 G.fname_out = fname_out_alloc = xstrdup(G.fname_out); 617 } 618 } 619 } 620#if ENABLE_FEATURE_WGET_STATUSBAR 621 G.curfile = bb_get_last_path_component_nostrip(G.fname_out); 622#endif 623 624 /* Determine where to start transfer */ 625 G.beg_range = 0; 626 if (option_mask32 & WGET_OPT_CONTINUE) { 627 G.output_fd = open(G.fname_out, O_WRONLY); 628 if (G.output_fd >= 0) { 629 G.beg_range = xlseek(G.output_fd, 0, SEEK_END); 630 } 631 /* File doesn't exist. We do not create file here yet. 632 * We are not sure it exists on remote side */ 633 } 634 635 redir_limit = 5; 636 resolve_lsa: 637 lsa = xhost2sockaddr(server.host, server.port); 638 if (!(option_mask32 & WGET_OPT_QUIET)) { 639 char *s = xmalloc_sockaddr2dotted(&lsa->u.sa); 640 fprintf(stderr, "Connecting to %s (%s)\n", server.host, s); 641 free(s); 642 } 643 establish_session: 644 /*G.content_len = 0; - redundant, got_clen = 0 is enough */ 645 G.got_clen = 0; 646 G.chunked = 0; 647 if (use_proxy || !target.is_ftp) { 648 /* 649 * HTTP session 650 */ 651 char *str; 652 int status; 653 654 /* Open socket to http server */ 655 sfp = open_socket(lsa); 656 657#ifdef CHECK_FULL_CONTENT_LEN 658 /* First, Send HTTP request to get the full size of the target file. */ 659 if(use_proxy){ 660 fprintf(sfp, "GET %stp://%s/%s HTTP/1.1\r\n", 661 target.is_ftp?"f":"ht", 662 target.host, 663 target.path); 664 } 665 else{ 666 if(option_mask32 & WGET_OPT_POST_DATA) 667 fprintf(sfp, "POST /%s HTTP/1.1\r\n", target.path); 668 else 669 fprintf(sfp, "GET /%s HTTP/1.1\r\n", target.path); 670 } 671 fprintf(sfp, "Host: %s\r\nUser-Agent: %s\r\n", target.host, G.user_agent); 672 fprintf(sfp, "Connection: close\r\n"); 673 674#if ENABLE_FEATURE_WGET_AUTHENTICATION 675 if(target.user) 676 fprintf(sfp, "Proxy-Authorization: Basic %s\r\n"+6, base64enc(target.user)); 677 if(use_proxy && server.user) 678 fprintf(sfp, "Proxy-Authorization: Basic %s\r\n", base64enc(server.user)); 679#endif 680 fprintf(sfp, "\r\n"); 681 682 fflush(sfp); 683 684first_response: 685 fgets_and_trim(sfp); 686 687 str = G.wget_buf; 688 str = skip_non_whitespace(str); 689 str = skip_whitespace(str); 690 691 status = atoi(str); 692 switch(status){ 693 case 0: 694 case 100: 695 while(gethdr(sfp) != NULL) 696 /* eat all remaining headers */; 697 goto first_response; 698 case 200: 699 case 204: 700 break; 701 case 300: /* redirection */ 702 case 301: 703 case 302: 704 case 303: 705 break; 706 case 206: 707 if(G.beg_range) 708 break; 709 /* fall through */ 710 default: 711 bb_error_msg_and_die("server returned error: %s", sanitize_string(G.wget_buf)); 712 } 713 714 while((str = gethdr(sfp)) != NULL){ 715 static const char keywords[] ALIGN1 = "content-length\0"; 716 enum{ 717 KEY_content_length = 1 718 }; 719 smalluint key; 720 721 /* strip trailing whitespace */ 722 char *s = strchrnul(str, '\0')-1; 723 while(s >= str && (*s == ' ' || *s == '\t')){ 724 *s = '\0'; 725 s--; 726 } 727 key = index_in_strings(keywords, G.wget_buf)+1; 728 if(key == KEY_content_length){ 729 G.content_len = BB_STRTOOFF(str, NULL, 10); 730 if(G.content_len < 0 || errno) 731 bb_error_msg_and_die("content-length %s is garbage", sanitize_string(str)); 732 733 G.got_clen = 1; 734 break; 735 } 736 } 737 738 // Had already downloaded the full content. 739 if(G.beg_range == G.content_len){ 740 dfp = sfp; 741 free(lsa); 742 goto END_OF_DOWNLOAD; 743 } 744 745 fclose(sfp); 746 sfp = open_socket(lsa); 747#endif 748 749 /* Send HTTP request */ 750 if (use_proxy) { 751 fprintf(sfp, "GET %stp://%s/%s HTTP/1.1\r\n", 752 target.is_ftp ? "f" : "ht", target.host, 753 target.path); 754 } else { 755 if (option_mask32 & WGET_OPT_POST_DATA) 756 fprintf(sfp, "POST /%s HTTP/1.1\r\n", target.path); 757 else 758 fprintf(sfp, "GET /%s HTTP/1.1\r\n", target.path); 759 } 760 761 fprintf(sfp, "Host: %s\r\nUser-Agent: %s\r\n", 762 target.host, G.user_agent); 763 764 /* Ask server to close the connection as soon as we are done 765 * (IOW: we do not intend to send more requests) 766 */ 767 fprintf(sfp, "Connection: close\r\n"); 768 769#if ENABLE_FEATURE_WGET_AUTHENTICATION 770 if (target.user) { 771 fprintf(sfp, "Proxy-Authorization: Basic %s\r\n"+6, 772 base64enc(target.user)); 773 } 774 if (use_proxy && server.user) { 775 fprintf(sfp, "Proxy-Authorization: Basic %s\r\n", 776 base64enc(server.user)); 777 } 778#endif 779 780 if (G.beg_range) 781 fprintf(sfp, "Range: bytes=%"OFF_FMT"u-\r\n", G.beg_range); 782 783#if ENABLE_FEATURE_WGET_LONG_OPTIONS 784 if (G.extra_headers) 785 fputs(G.extra_headers, sfp); 786 787 if (option_mask32 & WGET_OPT_POST_DATA) { 788 fprintf(sfp, 789 "Content-Type: application/x-www-form-urlencoded\r\n" 790 "Content-Length: %u\r\n" 791 "\r\n" 792 "%s", 793 (int) strlen(G.post_data), G.post_data 794 ); 795 } else 796#endif 797 { 798 fprintf(sfp, "\r\n"); 799 } 800 801 fflush(sfp); 802 803 /* 804 * Retrieve HTTP response line and check for "200" status code. 805 */ 806 read_response: 807 fgets_and_trim(sfp); 808 809 str = G.wget_buf; 810 str = skip_non_whitespace(str); 811 str = skip_whitespace(str); 812 // FIXME: no error check 813 // xatou wouldn't work: "200 OK" 814 status = atoi(str); 815 switch (status) { 816 case 0: 817 case 100: 818 while (gethdr(sfp) != NULL) 819 /* eat all remaining headers */; 820 goto read_response; 821 case 200: 822/* 823Response 204 doesn't say "null file", it says "metadata 824has changed but data didn't": 825 826"10.2.5 204 No Content 827The server has fulfilled the request but does not need to return 828an entity-body, and might want to return updated metainformation. 829The response MAY include new or updated metainformation in the form 830of entity-headers, which if present SHOULD be associated with 831the requested variant. 832 833If the client is a user agent, it SHOULD NOT change its document 834view from that which caused the request to be sent. This response 835is primarily intended to allow input for actions to take place 836without causing a change to the user agent's active document view, 837although any new or updated metainformation SHOULD be applied 838to the document currently in the user agent's active view. 839 840The 204 response MUST NOT include a message-body, and thus 841is always terminated by the first empty line after the header fields." 842 843However, in real world it was observed that some web servers 844(e.g. Boa/0.94.14rc21) simply use code 204 when file size is zero. 845*/ 846 case 204: 847 break; 848 case 300: /* redirection */ 849 case 301: 850 case 302: 851 case 303: 852 break; 853 case 206: 854 if (G.beg_range) 855 break; 856 /* fall through */ 857 default: 858 bb_error_msg_and_die("server returned error: %s", sanitize_string(G.wget_buf)); 859 } 860 861 /* 862 * Retrieve HTTP headers. 863 */ 864 while ((str = gethdr(sfp)) != NULL) { 865 static const char keywords[] ALIGN1 = 866 "content-length\0""transfer-encoding\0""location\0"; 867 enum { 868 KEY_content_length = 1, KEY_transfer_encoding, KEY_location 869 }; 870 smalluint key; 871 872 /* gethdr converted "FOO:" string to lowercase */ 873 874 /* strip trailing whitespace */ 875 char *s = strchrnul(str, '\0') - 1; 876 while (s >= str && (*s == ' ' || *s == '\t')) { 877 *s = '\0'; 878 s--; 879 } 880 key = index_in_strings(keywords, G.wget_buf) + 1; 881 if (key == KEY_content_length) { 882 G.content_len = BB_STRTOOFF(str, NULL, 10); 883 if (G.content_len < 0 || errno) { 884 bb_error_msg_and_die("content-length %s is garbage", sanitize_string(str)); 885 } 886 G.got_clen = 1; 887 continue; 888 } 889 if (key == KEY_transfer_encoding) { 890 if (strcmp(str_tolower(str), "chunked") != 0) 891 bb_error_msg_and_die("transfer encoding '%s' is not supported", sanitize_string(str)); 892 G.chunked = 1; 893 } 894 if (key == KEY_location && status >= 300) { 895 if (--redir_limit == 0) 896 bb_error_msg_and_die("too many redirections"); 897 fclose(sfp); 898 if (str[0] == '/') { 899 free(target.allocated); 900 target.path = target.allocated = xstrdup(str+1); 901 /* lsa stays the same: it's on the same server */ 902 } else { 903 parse_url(str, &target); 904 if (!use_proxy) { 905 free(server.allocated); 906 server.allocated = NULL; 907 server.host = target.host; 908 /* strip_ipv6_scope_id(target.host); - no! */ 909 /* we assume remote never gives us IPv6 addr with scope id */ 910 server.port = target.port; 911 free(lsa); 912 goto resolve_lsa; 913 } /* else: lsa stays the same: we use proxy */ 914 } 915 goto establish_session; 916 } 917 } 918// if (status >= 300) 919// bb_error_msg_and_die("bad redirection (no Location: header from server)"); 920 921 /* For HTTP, data is pumped over the same connection */ 922 dfp = sfp; 923 924 } else { 925 /* 926 * FTP session 927 */ 928 sfp = prepare_ftp_session(&dfp, &target, lsa); 929 } 930 931 free(lsa); 932 933 if (!(option_mask32 & WGET_OPT_SPIDER)) { 934 if (G.output_fd < 0) 935 G.output_fd = xopen(G.fname_out, G.o_flags); 936 retrieve_file_data(dfp); 937 if (!(option_mask32 & WGET_OPT_OUTNAME)) { 938 xclose(G.output_fd); 939 G.output_fd = -1; 940 } 941 } 942 943#ifdef CHECK_FULL_CONTENT_LEN 944END_OF_DOWNLOAD: 945#endif 946 if (dfp != sfp) { 947 /* It's ftp. Close data connection properly */ 948 fclose(dfp); 949 if (ftpcmd(NULL, NULL, sfp) != 226) 950 bb_error_msg_and_die("ftp error: %s", sanitize_string(G.wget_buf + 4)); 951 /* ftpcmd("QUIT", NULL, sfp); - why bother? */ 952 } 953 fclose(sfp); 954 955 free(server.allocated); 956 free(target.allocated); 957 free(fname_out_alloc); 958} 959 960int wget_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE; 961int wget_main(int argc UNUSED_PARAM, char **argv) 962{ 963#if ENABLE_FEATURE_WGET_LONG_OPTIONS 964 static const char wget_longopts[] ALIGN1 = 965 /* name, has_arg, val */ 966 "continue\0" No_argument "c" 967//FIXME: -s isn't --spider, it's --save-headers! 968 "spider\0" No_argument "s" 969 "quiet\0" No_argument "q" 970 "output-document\0" Required_argument "O" 971 "directory-prefix\0" Required_argument "P" 972 "proxy\0" Required_argument "Y" 973 "user-agent\0" Required_argument "U" 974#if ENABLE_FEATURE_WGET_TIMEOUT 975 "timeout\0" Required_argument "T" 976#endif 977 /* Ignored: */ 978 // "tries\0" Required_argument "t" 979 /* Ignored (we always use PASV): */ 980 "passive-ftp\0" No_argument "\xff" 981 "header\0" Required_argument "\xfe" 982 "post-data\0" Required_argument "\xfd" 983 /* Ignored (we don't do ssl) */ 984 "no-check-certificate\0" No_argument "\xfc" 985 ; 986#endif 987 988#if ENABLE_FEATURE_WGET_LONG_OPTIONS 989 llist_t *headers_llist = NULL; 990#endif 991 992 INIT_G(); 993 994 IF_FEATURE_WGET_TIMEOUT(G.timeout_seconds = 900;) 995 G.proxy_flag = "on"; /* use proxies if env vars are set */ 996 G.user_agent = "Wget"; /* "User-Agent" header field */ 997 998#if ENABLE_FEATURE_WGET_LONG_OPTIONS 999 applet_long_options = wget_longopts; 1000#endif 1001 opt_complementary = "-1" IF_FEATURE_WGET_TIMEOUT(":T+") IF_FEATURE_WGET_LONG_OPTIONS(":\xfe::"); 1002 getopt32(argv, "csqO:P:Y:U:T:" /*ignored:*/ "t:", 1003 &G.fname_out, &G.dir_prefix, 1004 &G.proxy_flag, &G.user_agent, 1005 IF_FEATURE_WGET_TIMEOUT(&G.timeout_seconds) IF_NOT_FEATURE_WGET_TIMEOUT(NULL), 1006 NULL /* -t RETRIES */ 1007 IF_FEATURE_WGET_LONG_OPTIONS(, &headers_llist) 1008 IF_FEATURE_WGET_LONG_OPTIONS(, &G.post_data) 1009 ); 1010 argv += optind; 1011 1012#if ENABLE_FEATURE_WGET_LONG_OPTIONS 1013 if (headers_llist) { 1014 int size = 1; 1015 char *cp; 1016 llist_t *ll = headers_llist; 1017 while (ll) { 1018 size += strlen(ll->data) + 2; 1019 ll = ll->link; 1020 } 1021 G.extra_headers = cp = xmalloc(size); 1022 while (headers_llist) { 1023 cp += sprintf(cp, "%s\r\n", (char*)llist_pop(&headers_llist)); 1024 } 1025 } 1026#endif 1027 1028 G.output_fd = -1; 1029 G.o_flags = O_WRONLY | O_CREAT | O_TRUNC | O_EXCL; 1030 if (G.fname_out) { /* -O FILE ? */ 1031 if (LONE_DASH(G.fname_out)) { /* -O - ? */ 1032 G.output_fd = 1; 1033 option_mask32 &= ~WGET_OPT_CONTINUE; 1034 } 1035 /* compat with wget: -O FILE can overwrite */ 1036 G.o_flags = O_WRONLY | O_CREAT | O_TRUNC; 1037 } 1038 1039 while (*argv) 1040 download_one_url(*argv++); 1041 1042 if (G.output_fd >= 0) 1043 xclose(G.output_fd); 1044 1045 return EXIT_SUCCESS; 1046} 1047