phttpget.c revision 158301
1/*- 2 * Copyright 2005 Colin Percival 3 * All rights reserved 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted providing that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 16 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY 18 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 22 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 23 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 24 * POSSIBILITY OF SUCH DAMAGE. 25 */ 26 27#include <sys/cdefs.h> 28__FBSDID("$FreeBSD: head/usr.sbin/portsnap/phttpget/phttpget.c 158301 2006-05-05 04:47:00Z cperciva $"); 29 30#include <sys/types.h> 31#include <sys/time.h> 32#include <sys/socket.h> 33 34#include <ctype.h> 35#include <err.h> 36#include <errno.h> 37#include <fcntl.h> 38#include <limits.h> 39#include <netdb.h> 40#include <stdint.h> 41#include <stdio.h> 42#include <stdlib.h> 43#include <string.h> 44#include <sysexits.h> 45#include <unistd.h> 46 47static const char * env_HTTP_PROXY; 48static char * env_HTTP_PROXY_AUTH; 49static const char * env_HTTP_USER_AGENT; 50static const char * proxyport; 51static char * proxyauth; 52 53static struct timeval timo = { 15, 0}; 54 55static void 56usage(void) 57{ 58 59 fprintf(stderr, "usage: phttpget server [file ...]\n"); 60 exit(EX_USAGE); 61} 62 63/* 64 * Base64 encode a string; the string returned, if non-NULL, is 65 * allocated using malloc() and must be freed by the caller. 66 */ 67static char * 68b64enc(const char *ptext) 69{ 70 static const char base64[] = 71 "ABCDEFGHIJKLMNOPQRSTUVWXYZ" 72 "abcdefghijklmnopqrstuvwxyz" 73 "0123456789+/"; 74 const char *pt; 75 char *ctext, *pc; 76 size_t ptlen, ctlen; 77 uint32_t t; 78 unsigned int j; 79 80 /* 81 * Encoded length is 4 characters per 3-byte block or partial 82 * block of plaintext, plus one byte for the terminating NUL 83 */ 84 ptlen = strlen(ptext); 85 if (ptlen > ((SIZE_MAX - 1) / 4) * 3 - 2) 86 return NULL; /* Possible integer overflow */ 87 ctlen = 4 * ((ptlen + 2) / 3) + 1; 88 if ((ctext = malloc(ctlen)) == NULL) 89 return NULL; 90 ctext[ctlen - 1] = 0; 91 92 /* 93 * Scan through ptext, reading up to 3 bytes from ptext and 94 * writing 4 bytes to ctext, until we run out of input. 95 */ 96 for (pt = ptext, pc = ctext; ptlen; ptlen -= 3, pc += 4) { 97 /* Read 3 bytes */ 98 for (t = j = 0; j < 3; j++) { 99 t <<= 8; 100 if (j < ptlen) 101 t += *pt++; 102 } 103 104 /* Write 4 bytes */ 105 for (j = 0; j < 4; j++) { 106 if (j <= ptlen + 1) 107 pc[j] = base64[(t >> 18) & 0x3f]; 108 else 109 pc[j] = '='; 110 t <<= 6; 111 } 112 113 /* If we're done, exit the loop */ 114 if (ptlen <= 3) 115 break; 116 } 117 118 return (ctext); 119} 120 121static void 122readenv(void) 123{ 124 char *proxy_auth_userpass, *proxy_auth_userpass64, *p; 125 char *proxy_auth_user = NULL; 126 char *proxy_auth_pass = NULL; 127 128 env_HTTP_PROXY = getenv("HTTP_PROXY"); 129 if (env_HTTP_PROXY == NULL) 130 env_HTTP_PROXY = getenv("http_proxy"); 131 if (env_HTTP_PROXY != NULL) { 132 if (strncmp(env_HTTP_PROXY, "http://", 7) == 0) 133 env_HTTP_PROXY += 7; 134 p = strchr(env_HTTP_PROXY, '/'); 135 if (p != NULL) 136 *p = 0; 137 p = strchr(env_HTTP_PROXY, ':'); 138 if (p != NULL) { 139 *p = 0; 140 proxyport = p + 1; 141 } else 142 proxyport = "3128"; 143 } 144 145 env_HTTP_PROXY_AUTH = getenv("HTTP_PROXY_AUTH"); 146 if ((env_HTTP_PROXY != NULL) && 147 (env_HTTP_PROXY_AUTH != NULL) && 148 (strncasecmp(env_HTTP_PROXY_AUTH, "basic:" , 6) == 0)) { 149 /* Ignore authentication scheme */ 150 (void) strsep(&env_HTTP_PROXY_AUTH, ":"); 151 152 /* Ignore realm */ 153 (void) strsep(&env_HTTP_PROXY_AUTH, ":"); 154 155 /* Obtain username and password */ 156 proxy_auth_user = strsep(&env_HTTP_PROXY_AUTH, ":"); 157 proxy_auth_pass = env_HTTP_PROXY_AUTH; 158 } 159 160 if ((proxy_auth_user != NULL) && (proxy_auth_pass != NULL)) { 161 asprintf(&proxy_auth_userpass, "%s:%s", 162 proxy_auth_user, proxy_auth_pass); 163 if (proxy_auth_userpass == NULL) 164 err(1, "asprintf"); 165 166 proxy_auth_userpass64 = b64enc(proxy_auth_userpass); 167 if (proxy_auth_userpass64 == NULL) 168 err(1, "malloc"); 169 170 asprintf(&proxyauth, "Proxy-Authorization: Basic %s\r\n", 171 proxy_auth_userpass64); 172 if (proxyauth == NULL) 173 err(1, "asprintf"); 174 175 free(proxy_auth_userpass); 176 free(proxy_auth_userpass64); 177 } else 178 proxyauth = NULL; 179 180 env_HTTP_USER_AGENT = getenv("HTTP_USER_AGENT"); 181 if (env_HTTP_USER_AGENT == NULL) 182 env_HTTP_USER_AGENT = "phttpget/0.1"; 183} 184 185static int 186makerequest(char ** buf, char * path, char * server, int connclose) 187{ 188 int buflen; 189 190 buflen = asprintf(buf, 191 "GET %s%s/%s HTTP/1.1\r\n" 192 "Host: %s\r\n" 193 "User-Agent: %s\r\n" 194 "%s" 195 "%s" 196 "\r\n", 197 env_HTTP_PROXY ? "http://" : "", 198 env_HTTP_PROXY ? server : "", 199 path, server, env_HTTP_USER_AGENT, 200 proxyauth ? proxyauth : "", 201 connclose ? "Connection: Close\r\n" : ""); 202 if (buflen == -1) 203 err(1, "asprintf"); 204 return(buflen); 205} 206 207static int 208readln(int sd, char * resbuf, int * resbuflen, int * resbufpos) 209{ 210 ssize_t len; 211 212 while (strnstr(resbuf + *resbufpos, "\r\n", 213 *resbuflen - *resbufpos) == NULL) { 214 /* Move buffered data to the start of the buffer */ 215 if (*resbufpos != 0) { 216 memmove(resbuf, resbuf + *resbufpos, 217 *resbuflen - *resbufpos); 218 *resbuflen -= *resbufpos; 219 *resbufpos = 0; 220 } 221 222 /* If the buffer is full, complain */ 223 if (*resbuflen == BUFSIZ) 224 return -1; 225 226 /* Read more data into the buffer */ 227 len = recv(sd, resbuf + *resbuflen, BUFSIZ - *resbuflen, 0); 228 if ((len == 0) || 229 ((len == -1) && (errno != EINTR))) 230 return -1; 231 232 if (len != -1) 233 *resbuflen += len; 234 } 235 236 return 0; 237} 238 239static int 240copybytes(int sd, int fd, off_t copylen, char * resbuf, int * resbuflen, 241 int * resbufpos) 242{ 243 ssize_t len; 244 245 while (copylen) { 246 /* Write data from resbuf to fd */ 247 len = *resbuflen - *resbufpos; 248 if (copylen < len) 249 len = copylen; 250 if (len > 0) { 251 if (fd != -1) 252 len = write(fd, resbuf + *resbufpos, len); 253 if (len == -1) 254 err(1, "write"); 255 *resbufpos += len; 256 copylen -= len; 257 continue; 258 } 259 260 /* Read more data into buffer */ 261 len = recv(sd, resbuf, BUFSIZ, 0); 262 if (len == -1) { 263 if (errno == EINTR) 264 continue; 265 return -1; 266 } else if (len == 0) { 267 return -2; 268 } else { 269 *resbuflen = len; 270 *resbufpos = 0; 271 } 272 } 273 274 return 0; 275} 276 277int 278main(int argc, char *argv[]) 279{ 280 struct addrinfo hints; /* Hints to getaddrinfo */ 281 struct addrinfo *res; /* Pointer to server address being used */ 282 struct addrinfo *res0; /* Pointer to server addresses */ 283 char * resbuf = NULL; /* Response buffer */ 284 int resbufpos = 0; /* Response buffer position */ 285 int resbuflen = 0; /* Response buffer length */ 286 char * eolp; /* Pointer to "\r\n" within resbuf */ 287 char * hln; /* Pointer within header line */ 288 char * servername; /* Name of server */ 289 char * fname = NULL; /* Name of downloaded file */ 290 char * reqbuf = NULL; /* Request buffer */ 291 int reqbufpos = 0; /* Request buffer position */ 292 int reqbuflen = 0; /* Request buffer length */ 293 ssize_t len; /* Length sent or received */ 294 int nreq = 0; /* Number of next request to send */ 295 int nres = 0; /* Number of next reply to receive */ 296 int pipelined = 0; /* != 0 if connection in pipelined mode. */ 297 int sd = -1; /* Socket descriptor */ 298 int sdflags = 0; /* Flags on the socket sd */ 299 int fd = -1; /* Descriptor for download target file */ 300 int error; /* Error code */ 301 int statuscode; /* HTTP Status code */ 302 off_t contentlength; /* Value from Content-Length header */ 303 int chunked; /* != if transfer-encoding is chunked */ 304 off_t clen; /* Chunk length */ 305 int firstreq = 0; /* # of first request for this connection */ 306 307 /* Check that the arguments are sensible */ 308 if (argc < 2) 309 usage(); 310 311 /* Read important environment variables */ 312 readenv(); 313 314 /* Get server name and adjust arg[cv] to point at file names */ 315 servername = argv[1]; 316 argv += 2; 317 argc -= 2; 318 319 /* Allocate response buffer */ 320 resbuf = malloc(BUFSIZ); 321 if (resbuf == NULL) 322 err(1, "malloc"); 323 324 /* Look up server */ 325 memset(&hints, 0, sizeof(hints)); 326 hints.ai_family = PF_UNSPEC; 327 hints.ai_socktype = SOCK_STREAM; 328 error = getaddrinfo(env_HTTP_PROXY ? env_HTTP_PROXY : servername, 329 env_HTTP_PROXY ? proxyport : "http", &hints, &res0); 330 if (error) 331 errx(1, "host = %s, port = %s: %s", 332 env_HTTP_PROXY ? env_HTTP_PROXY : servername, 333 env_HTTP_PROXY ? proxyport : "http", 334 gai_strerror(error)); 335 if (res0 == NULL) 336 errx(1, "could not look up %s", servername); 337 res = res0; 338 339 /* Do the fetching */ 340 while (nres < argc) { 341 /* Make sure we have a connected socket */ 342 for (; sd == -1; res = res->ai_next) { 343 /* No addresses left to try :-( */ 344 if (res == NULL) 345 errx(1, "Could not connect to %s", servername); 346 347 /* Create a socket... */ 348 sd = socket(res->ai_family, res->ai_socktype, 349 res->ai_protocol); 350 if (sd == -1) 351 continue; 352 353 /* ... set 15-second timeouts ... */ 354 setsockopt(sd, SOL_SOCKET, SO_SNDTIMEO, 355 (void *)&timo, (socklen_t)sizeof(timo)); 356 setsockopt(sd, SOL_SOCKET, SO_RCVTIMEO, 357 (void *)&timo, (socklen_t)sizeof(timo)); 358 359 /* ... and connect to the server. */ 360 if(connect(sd, res->ai_addr, res->ai_addrlen)) { 361 close(sd); 362 sd = -1; 363 continue; 364 } 365 366 firstreq = nres; 367 } 368 369 /* 370 * If in pipelined HTTP mode, put socket into non-blocking 371 * mode, since we're probably going to want to try to send 372 * several HTTP requests. 373 */ 374 if (pipelined) { 375 sdflags = fcntl(sd, F_GETFL); 376 if (fcntl(sd, F_SETFL, sdflags | O_NONBLOCK) == -1) 377 err(1, "fcntl"); 378 } 379 380 /* Construct requests and/or send them without blocking */ 381 while ((nreq < argc) && ((reqbuf == NULL) || pipelined)) { 382 /* If not in the middle of a request, make one */ 383 if (reqbuf == NULL) { 384 reqbuflen = makerequest(&reqbuf, argv[nreq], 385 servername, (nreq == argc - 1)); 386 reqbufpos = 0; 387 } 388 389 /* If in pipelined mode, try to send the request */ 390 if (pipelined) { 391 while (reqbufpos < reqbuflen) { 392 len = send(sd, reqbuf + reqbufpos, 393 reqbuflen - reqbufpos, 0); 394 if (len == -1) 395 break; 396 reqbufpos += len; 397 } 398 if (reqbufpos < reqbuflen) { 399 if (errno != EAGAIN) 400 goto conndied; 401 break; 402 } else { 403 free(reqbuf); 404 reqbuf = NULL; 405 nreq++; 406 } 407 } 408 } 409 410 /* Put connection back into blocking mode */ 411 if (pipelined) { 412 if (fcntl(sd, F_SETFL, sdflags) == -1) 413 err(1, "fcntl"); 414 } 415 416 /* Do we need to blocking-send a request? */ 417 if (nres == nreq) { 418 while (reqbufpos < reqbuflen) { 419 len = send(sd, reqbuf + reqbufpos, 420 reqbuflen - reqbufpos, 0); 421 if (len == -1) 422 goto conndied; 423 reqbufpos += len; 424 } 425 free(reqbuf); 426 reqbuf = NULL; 427 nreq++; 428 } 429 430 /* Scan through the response processing headers. */ 431 statuscode = 0; 432 contentlength = -1; 433 chunked = 0; 434 do { 435 /* Get a header line */ 436 error = readln(sd, resbuf, &resbuflen, &resbufpos); 437 if (error) 438 goto conndied; 439 hln = resbuf + resbufpos; 440 eolp = strnstr(hln, "\r\n", resbuflen - resbufpos); 441 resbufpos = (eolp - resbuf) + 2; 442 *eolp = '\0'; 443 444 /* Make sure it doesn't contain a NUL character */ 445 if (strchr(hln, '\0') != eolp) 446 goto conndied; 447 448 if (statuscode == 0) { 449 /* The first line MUST be HTTP/1.x xxx ... */ 450 if ((strncmp(hln, "HTTP/1.", 7) != 0) || 451 ! isdigit(hln[7])) 452 goto conndied; 453 454 /* 455 * If the minor version number isn't zero, 456 * then we can assume that pipelining our 457 * requests is OK -- as long as we don't 458 * see a "Connection: close" line later 459 * and we either have a Content-Length or 460 * Transfer-Encoding: chunked header to 461 * tell us the length. 462 */ 463 if (hln[7] != '0') 464 pipelined = 1; 465 466 /* Skip over the minor version number */ 467 hln = strchr(hln + 7, ' '); 468 if (hln == NULL) 469 goto conndied; 470 else 471 hln++; 472 473 /* Read the status code */ 474 while (isdigit(*hln)) { 475 statuscode = statuscode * 10 + 476 *hln - '0'; 477 hln++; 478 } 479 480 if (statuscode < 100 || statuscode > 599) 481 goto conndied; 482 483 /* Ignore the rest of the line */ 484 continue; 485 } 486 487 /* Check for "Connection: close" header */ 488 if (strncmp(hln, "Connection:", 11) == 0) { 489 hln += 11; 490 if (strstr(hln, "close") != NULL) 491 pipelined = 0; 492 493 /* Next header... */ 494 continue; 495 } 496 497 /* Check for "Content-Length:" header */ 498 if (strncmp(hln, "Content-Length:", 15) == 0) { 499 hln += 15; 500 contentlength = 0; 501 502 /* Find the start of the length */ 503 while (!isdigit(*hln) && (*hln != '\0')) 504 hln++; 505 506 /* Compute the length */ 507 while (isdigit(*hln)) { 508 if (contentlength >= OFF_MAX / 10) { 509 /* Nasty people... */ 510 goto conndied; 511 } 512 contentlength = contentlength * 10 + 513 *hln - '0'; 514 hln++; 515 } 516 517 /* Next header... */ 518 continue; 519 } 520 521 /* Check for "Transfer-Encoding: chunked" header */ 522 if (strncmp(hln, "Transfer-Encoding:", 18) == 0) { 523 hln += 18; 524 if (strstr(hln, "chunked") != NULL) 525 chunked = 1; 526 527 /* Next header... */ 528 continue; 529 } 530 531 /* We blithely ignore any other header lines */ 532 533 /* No more header lines */ 534 if (strlen(hln) == 0) { 535 /* 536 * If the status code was 1xx, then there will 537 * be a real header later. Servers may emit 538 * 1xx header blocks at will, but since we 539 * don't expect one, we should just ignore it. 540 */ 541 if (100 <= statuscode && statuscode <= 199) { 542 statuscode = 0; 543 continue; 544 } 545 546 /* End of header; message body follows */ 547 break; 548 } 549 } while (1); 550 551 /* No message body for 204 or 304 */ 552 if (statuscode == 204 || statuscode == 304) { 553 nres++; 554 continue; 555 } 556 557 /* 558 * There should be a message body coming, but we only want 559 * to send it to a file if the status code is 200 560 */ 561 if (statuscode == 200) { 562 /* Generate a file name for the download */ 563 fname = strrchr(argv[nres], '/'); 564 if (fname == NULL) 565 fname = argv[nres]; 566 else 567 fname++; 568 if (strlen(fname) == 0) 569 errx(1, "Cannot obtain file name from %s\n", 570 argv[nres]); 571 572 fd = open(fname, O_CREAT | O_TRUNC | O_WRONLY, 0644); 573 if (fd == -1) 574 errx(1, "open(%s)", fname); 575 }; 576 577 /* Read the message and send data to fd if appropriate */ 578 if (chunked) { 579 /* Handle a chunked-encoded entity */ 580 581 /* Read chunks */ 582 do { 583 error = readln(sd, resbuf, &resbuflen, 584 &resbufpos); 585 if (error) 586 goto conndied; 587 hln = resbuf + resbufpos; 588 eolp = strstr(hln, "\r\n"); 589 resbufpos = (eolp - resbuf) + 2; 590 591 clen = 0; 592 while (isxdigit(*hln)) { 593 if (clen >= OFF_MAX / 16) { 594 /* Nasty people... */ 595 goto conndied; 596 } 597 if (isdigit(*hln)) 598 clen = clen * 16 + *hln - '0'; 599 else 600 clen = clen * 16 + 10 + 601 tolower(*hln) - 'a'; 602 hln++; 603 } 604 605 error = copybytes(sd, fd, clen, resbuf, 606 &resbuflen, &resbufpos); 607 if (error) { 608 goto conndied; 609 } 610 } while (clen != 0); 611 612 /* Read trailer and final CRLF */ 613 do { 614 error = readln(sd, resbuf, &resbuflen, 615 &resbufpos); 616 if (error) 617 goto conndied; 618 hln = resbuf + resbufpos; 619 eolp = strstr(hln, "\r\n"); 620 resbufpos = (eolp - resbuf) + 2; 621 } while (hln != eolp); 622 } else if (contentlength != -1) { 623 error = copybytes(sd, fd, contentlength, resbuf, 624 &resbuflen, &resbufpos); 625 if (error) 626 goto conndied; 627 } else { 628 /* 629 * Not chunked, and no content length header. 630 * Read everything until the server closes the 631 * socket. 632 */ 633 error = copybytes(sd, fd, OFF_MAX, resbuf, 634 &resbuflen, &resbufpos); 635 if (error == -1) 636 goto conndied; 637 pipelined = 0; 638 } 639 640 if (fd != -1) { 641 close(fd); 642 fd = -1; 643 } 644 645 fprintf(stderr, "http://%s/%s: %d ", servername, argv[nres], 646 statuscode); 647 if (statuscode == 200) 648 fprintf(stderr, "OK\n"); 649 else if (statuscode < 300) 650 fprintf(stderr, "Successful (ignored)\n"); 651 else if (statuscode < 400) 652 fprintf(stderr, "Redirection (ignored)\n"); 653 else 654 fprintf(stderr, "Error (ignored)\n"); 655 656 /* We've finished this file! */ 657 nres++; 658 659 /* 660 * If necessary, clean up this connection so that we 661 * can start a new one. 662 */ 663 if (pipelined == 0) 664 goto cleanupconn; 665 continue; 666 667conndied: 668 /* 669 * Something went wrong -- our connection died, the server 670 * sent us garbage, etc. If this happened on the first 671 * request we sent over this connection, give up. Otherwise, 672 * close this connection, open a new one, and reissue the 673 * request. 674 */ 675 if (nres == firstreq) 676 errx(1, "Connection failure"); 677 678cleanupconn: 679 /* 680 * Clean up our connection and keep on going 681 */ 682 shutdown(sd, SHUT_RDWR); 683 close(sd); 684 sd = -1; 685 if (fd != -1) { 686 close(fd); 687 fd = -1; 688 } 689 if (reqbuf != NULL) { 690 free(reqbuf); 691 reqbuf = NULL; 692 } 693 nreq = nres; 694 res = res0; 695 pipelined = 0; 696 resbufpos = resbuflen = 0; 697 continue; 698 } 699 700 free(resbuf); 701 freeaddrinfo(res0); 702 703 return 0; 704} 705