phttpget.c revision 154909
1/*- 2 * Copyright 2005 Colin Percival 3 * All rights reserved 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted providing that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 16 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY 18 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 22 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 23 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 24 * POSSIBILITY OF SUCH DAMAGE. 25 */ 26 27#include <sys/cdefs.h> 28__FBSDID("$FreeBSD: head/usr.sbin/portsnap/phttpget/phttpget.c 154909 2006-01-27 14:42:15Z cperciva $"); 29 30#include <sys/types.h> 31#include <sys/time.h> 32#include <sys/socket.h> 33 34#include <ctype.h> 35#include <err.h> 36#include <errno.h> 37#include <fcntl.h> 38#include <limits.h> 39#include <netdb.h> 40#include <stdint.h> 41#include <stdio.h> 42#include <stdlib.h> 43#include <string.h> 44#include <sysexits.h> 45#include <unistd.h> 46 47static const char * env_HTTP_PROXY; 48static char * env_HTTP_PROXY_AUTH; 49static const char * env_HTTP_USER_AGENT; 50static const char * proxyport; 51static char * proxyauth; 52 53static struct timeval timo = { 15, 0}; 54 55static void 56usage(void) 57{ 58 59 fprintf(stderr, "usage: phttpget server [file ...]\n"); 60 exit(EX_USAGE); 61} 62 63/* 64 * Base64 encode a string; the string returned, if non-NULL, is 65 * allocated using malloc() and must be freed by the caller. 66 */ 67static char * 68b64enc(const char *ptext) 69{ 70 static const char base64[] = 71 "ABCDEFGHIJKLMNOPQRSTUVWXYZ" 72 "abcdefghijklmnopqrstuvwxyz" 73 "0123456789+/"; 74 const char *pt; 75 char *ctext, *pc; 76 size_t ptlen, ctlen; 77 uint32_t t; 78 unsigned int j; 79 80 /* 81 * Encoded length is 4 characters per 3-byte block or partial 82 * block of plaintext, plus one byte for the terminating NUL 83 */ 84 ptlen = strlen(ptext); 85 if (ptlen > ((SIZE_MAX - 1) / 4) * 3 - 2) 86 return NULL; /* Possible integer overflow */ 87 ctlen = 4 * ((ptlen + 2) / 3) + 1; 88 if ((ctext = malloc(ctlen)) == NULL) 89 return NULL; 90 ctext[ctlen - 1] = 0; 91 92 /* 93 * Scan through ptext, reading up to 3 bytes from ptext and 94 * writing 4 bytes to ctext, until we run out of input. 95 */ 96 for (pt = ptext, pc = ctext; ptlen; ptlen -= 3, pc += 4) { 97 /* Read 3 bytes */ 98 for (t = j = 0; j < 3; j++) { 99 t <<= 8; 100 if (j < ptlen) 101 t += *pt++; 102 } 103 104 /* Write 4 bytes */ 105 for (j = 0; j < 4; j++) { 106 if (j <= ptlen + 1) 107 pc[j] = base64[(t >> 18) & 0x3f]; 108 else 109 pc[j] = '='; 110 t <<= 6; 111 } 112 113 /* If we're done, exit the loop */ 114 if (ptlen <= 3) 115 break; 116 } 117 118 return (ctext); 119} 120 121static void 122readenv(void) 123{ 124 char *proxy_auth_userpass, *proxy_auth_userpass64, *p; 125 char *proxy_auth_user = NULL; 126 char *proxy_auth_pass = NULL; 127 128 env_HTTP_PROXY = getenv("HTTP_PROXY"); 129 if (env_HTTP_PROXY != NULL) { 130 if (strncmp(env_HTTP_PROXY, "http://", 7) == 0) 131 env_HTTP_PROXY += 7; 132 p = strchr(env_HTTP_PROXY, '/'); 133 if (p != NULL) 134 *p = 0; 135 p = strchr(env_HTTP_PROXY, ':'); 136 if (p != NULL) { 137 *p = 0; 138 proxyport = p + 1; 139 } else 140 proxyport = "3128"; 141 } 142 143 env_HTTP_PROXY_AUTH = getenv("HTTP_PROXY_AUTH"); 144 if ((env_HTTP_PROXY != NULL) && 145 (env_HTTP_PROXY_AUTH != NULL) && 146 (strncasecmp(env_HTTP_PROXY_AUTH, "basic:" , 6) == 0)) { 147 /* Ignore authentication scheme */ 148 (void) strsep(&env_HTTP_PROXY_AUTH, ":"); 149 150 /* Ignore realm */ 151 (void) strsep(&env_HTTP_PROXY_AUTH, ":"); 152 153 /* Obtain username and password */ 154 proxy_auth_user = strsep(&env_HTTP_PROXY_AUTH, ":"); 155 proxy_auth_pass = strsep(&env_HTTP_PROXY_AUTH, ":"); 156 } 157 158 if ((proxy_auth_user != NULL) && (proxy_auth_pass != NULL)) { 159 asprintf(&proxy_auth_userpass, "%s:%s", 160 proxy_auth_user, proxy_auth_pass); 161 if (proxy_auth_userpass == NULL) 162 err(1, "asprintf"); 163 164 proxy_auth_userpass64 = b64enc(proxy_auth_userpass); 165 if (proxy_auth_userpass64 == NULL) 166 err(1, "malloc"); 167 168 asprintf(&proxyauth, "Proxy-Authorization: Basic %s\r\n", 169 proxy_auth_userpass64); 170 if (proxyauth == NULL) 171 err(1, "asprintf"); 172 173 free(proxy_auth_userpass); 174 free(proxy_auth_userpass64); 175 } else 176 proxyauth = NULL; 177 178 env_HTTP_USER_AGENT = getenv("HTTP_USER_AGENT"); 179 if (env_HTTP_USER_AGENT == NULL) 180 env_HTTP_USER_AGENT = "phttpget/0.1"; 181} 182 183static int 184makerequest(char ** buf, char * path, char * server, int connclose) 185{ 186 int buflen; 187 188 buflen = asprintf(buf, 189 "GET %s%s/%s HTTP/1.1\r\n" 190 "Host: %s\r\n" 191 "User-Agent: %s\r\n" 192 "%s" 193 "%s" 194 "\r\n", 195 env_HTTP_PROXY ? "http://" : "", 196 env_HTTP_PROXY ? server : "", 197 path, server, env_HTTP_USER_AGENT, 198 proxyauth ? proxyauth : "", 199 connclose ? "Connection: Close\r\n" : ""); 200 if (buflen == -1) 201 err(1, "asprintf"); 202 return(buflen); 203} 204 205static int 206readln(int sd, char * resbuf, int * resbuflen, int * resbufpos) 207{ 208 ssize_t len; 209 210 while (strnstr(resbuf + *resbufpos, "\r\n", 211 *resbuflen - *resbufpos) == NULL) { 212 /* Move buffered data to the start of the buffer */ 213 if (*resbufpos != 0) { 214 memmove(resbuf, resbuf + *resbufpos, 215 *resbuflen - *resbufpos); 216 *resbuflen -= *resbufpos; 217 *resbufpos = 0; 218 } 219 220 /* If the buffer is full, complain */ 221 if (*resbuflen == BUFSIZ) 222 return -1; 223 224 /* Read more data into the buffer */ 225 len = recv(sd, resbuf + *resbuflen, BUFSIZ - *resbuflen, 0); 226 if ((len == 0) || 227 ((len == -1) && (errno != EINTR))) 228 return -1; 229 230 if (len != -1) 231 *resbuflen += len; 232 } 233 234 return 0; 235} 236 237static int 238copybytes(int sd, int fd, off_t copylen, char * resbuf, int * resbuflen, 239 int * resbufpos) 240{ 241 ssize_t len; 242 243 while (copylen) { 244 /* Write data from resbuf to fd */ 245 len = *resbuflen - *resbufpos; 246 if (copylen < len) 247 len = copylen; 248 if (len > 0) { 249 if (fd != -1) 250 len = write(fd, resbuf + *resbufpos, len); 251 if (len == -1) 252 err(1, "write"); 253 *resbufpos += len; 254 copylen -= len; 255 continue; 256 } 257 258 /* Read more data into buffer */ 259 len = recv(sd, resbuf, BUFSIZ, 0); 260 if (len == -1) { 261 if (errno == EINTR) 262 continue; 263 return -1; 264 } else if (len == 0) { 265 return -2; 266 } else { 267 *resbuflen = len; 268 *resbufpos = 0; 269 } 270 } 271 272 return 0; 273} 274 275int 276main(int argc, char *argv[]) 277{ 278 struct addrinfo hints; /* Hints to getaddrinfo */ 279 struct addrinfo *res; /* Pointer to server address being used */ 280 struct addrinfo *res0; /* Pointer to server addresses */ 281 char * resbuf = NULL; /* Response buffer */ 282 int resbufpos = 0; /* Response buffer position */ 283 int resbuflen = 0; /* Response buffer length */ 284 char * eolp; /* Pointer to "\r\n" within resbuf */ 285 char * hln; /* Pointer within header line */ 286 char * servername; /* Name of server */ 287 char * fname = NULL; /* Name of downloaded file */ 288 char * reqbuf = NULL; /* Request buffer */ 289 int reqbufpos = 0; /* Request buffer position */ 290 int reqbuflen = 0; /* Request buffer length */ 291 ssize_t len; /* Length sent or received */ 292 int nreq = 0; /* Number of next request to send */ 293 int nres = 0; /* Number of next reply to receive */ 294 int pipelined = 0; /* != 0 if connection in pipelined mode. */ 295 int sd = -1; /* Socket descriptor */ 296 int sdflags = 0; /* Flags on the socket sd */ 297 int fd = -1; /* Descriptor for download target file */ 298 int error; /* Error code */ 299 int statuscode; /* HTTP Status code */ 300 off_t contentlength; /* Value from Content-Length header */ 301 int chunked; /* != if transfer-encoding is chunked */ 302 off_t clen; /* Chunk length */ 303 int firstreq = 0; /* # of first request for this connection */ 304 305 /* Check that the arguments are sensible */ 306 if (argc < 2) 307 usage(); 308 309 /* Read important environment variables */ 310 readenv(); 311 312 /* Get server name and adjust arg[cv] to point at file names */ 313 servername = argv[1]; 314 argv += 2; 315 argc -= 2; 316 317 /* Allocate response buffer */ 318 resbuf = malloc(BUFSIZ); 319 if (resbuf == NULL) 320 err(1, "malloc"); 321 322 /* Look up server */ 323 memset(&hints, 0, sizeof(hints)); 324 hints.ai_family = PF_UNSPEC; 325 hints.ai_socktype = SOCK_STREAM; 326 error = getaddrinfo(env_HTTP_PROXY ? env_HTTP_PROXY : servername, 327 env_HTTP_PROXY ? proxyport : "http", &hints, &res0); 328 if (error) 329 errx(1, "host = %s, port = %s: %s", 330 env_HTTP_PROXY ? env_HTTP_PROXY : servername, 331 env_HTTP_PROXY ? proxyport : "http", 332 gai_strerror(error)); 333 if (res0 == NULL) 334 errx(1, "could not look up %s", servername); 335 res = res0; 336 337 /* Do the fetching */ 338 while (nres < argc) { 339 /* Make sure we have a connected socket */ 340 for (; sd == -1; res = res->ai_next) { 341 /* No addresses left to try :-( */ 342 if (res == NULL) 343 errx(1, "Could not connect to %s", servername); 344 345 /* Create a socket... */ 346 sd = socket(res->ai_family, res->ai_socktype, 347 res->ai_protocol); 348 if (sd == -1) 349 continue; 350 351 /* ... set 15-second timeouts ... */ 352 setsockopt(sd, SOL_SOCKET, SO_SNDTIMEO, 353 (void *)&timo, (socklen_t)sizeof(timo)); 354 setsockopt(sd, SOL_SOCKET, SO_RCVTIMEO, 355 (void *)&timo, (socklen_t)sizeof(timo)); 356 357 /* ... and connect to the server. */ 358 if(connect(sd, res->ai_addr, res->ai_addrlen)) { 359 close(sd); 360 sd = -1; 361 continue; 362 } 363 364 firstreq = nres; 365 } 366 367 /* 368 * If in pipelined HTTP mode, put socket into non-blocking 369 * mode, since we're probably going to want to try to send 370 * several HTTP requests. 371 */ 372 if (pipelined) { 373 sdflags = fcntl(sd, F_GETFL); 374 if (fcntl(sd, F_SETFL, sdflags | O_NONBLOCK) == -1) 375 err(1, "fcntl"); 376 } 377 378 /* Construct requests and/or send them without blocking */ 379 while ((nreq < argc) && ((reqbuf == NULL) || pipelined)) { 380 /* If not in the middle of a request, make one */ 381 if (reqbuf == NULL) { 382 reqbuflen = makerequest(&reqbuf, argv[nreq], 383 servername, (nreq == argc - 1)); 384 reqbufpos = 0; 385 } 386 387 /* If in pipelined mode, try to send the request */ 388 if (pipelined) { 389 while (reqbufpos < reqbuflen) { 390 len = send(sd, reqbuf + reqbufpos, 391 reqbuflen - reqbufpos, 0); 392 if (len == -1) 393 break; 394 reqbufpos += len; 395 } 396 if (reqbufpos < reqbuflen) { 397 if (errno != EAGAIN) 398 goto conndied; 399 break; 400 } else { 401 free(reqbuf); 402 reqbuf = NULL; 403 nreq++; 404 } 405 } 406 } 407 408 /* Put connection back into blocking mode */ 409 if (pipelined) { 410 if (fcntl(sd, F_SETFL, sdflags) == -1) 411 err(1, "fcntl"); 412 } 413 414 /* Do we need to blocking-send a request? */ 415 if (nres == nreq) { 416 while (reqbufpos < reqbuflen) { 417 len = send(sd, reqbuf + reqbufpos, 418 reqbuflen - reqbufpos, 0); 419 if (len == -1) 420 goto conndied; 421 reqbufpos += len; 422 } 423 free(reqbuf); 424 reqbuf = NULL; 425 nreq++; 426 } 427 428 /* Scan through the response processing headers. */ 429 statuscode = 0; 430 contentlength = -1; 431 chunked = 0; 432 do { 433 /* Get a header line */ 434 error = readln(sd, resbuf, &resbuflen, &resbufpos); 435 if (error) 436 goto conndied; 437 hln = resbuf + resbufpos; 438 eolp = strnstr(hln, "\r\n", resbuflen - resbufpos); 439 resbufpos = (eolp - resbuf) + 2; 440 *eolp = '\0'; 441 442 /* Make sure it doesn't contain a NUL character */ 443 if (strchr(hln, '\0') != eolp) 444 goto conndied; 445 446 if (statuscode == 0) { 447 /* The first line MUST be HTTP/1.x xxx ... */ 448 if ((strncmp(hln, "HTTP/1.", 7) != 0) || 449 ! isdigit(hln[7])) 450 goto conndied; 451 452 /* 453 * If the minor version number isn't zero, 454 * then we can assume that pipelining our 455 * requests is OK -- as long as we don't 456 * see a "Connection: close" line later 457 * and we either have a Content-Length or 458 * Transfer-Encoding: chunked header to 459 * tell us the length. 460 */ 461 if (hln[7] != '0') 462 pipelined = 1; 463 464 /* Skip over the minor version number */ 465 hln = strchr(hln + 7, ' '); 466 if (hln == NULL) 467 goto conndied; 468 else 469 hln++; 470 471 /* Read the status code */ 472 while (isdigit(*hln)) { 473 statuscode = statuscode * 10 + 474 *hln - '0'; 475 hln++; 476 } 477 478 if (statuscode < 100 || statuscode > 599) 479 goto conndied; 480 481 /* Ignore the rest of the line */ 482 continue; 483 } 484 485 /* Check for "Connection: close" header */ 486 if (strncmp(hln, "Connection:", 11) == 0) { 487 hln += 11; 488 if (strstr(hln, "close") != NULL) 489 pipelined = 0; 490 491 /* Next header... */ 492 continue; 493 } 494 495 /* Check for "Content-Length:" header */ 496 if (strncmp(hln, "Content-Length:", 15) == 0) { 497 hln += 15; 498 contentlength = 0; 499 500 /* Find the start of the length */ 501 while (!isdigit(*hln) && (*hln != '\0')) 502 hln++; 503 504 /* Compute the length */ 505 while (isdigit(*hln)) { 506 if (contentlength >= OFF_MAX / 10) { 507 /* Nasty people... */ 508 goto conndied; 509 } 510 contentlength = contentlength * 10 + 511 *hln - '0'; 512 hln++; 513 } 514 515 /* Next header... */ 516 continue; 517 } 518 519 /* Check for "Transfer-Encoding: chunked" header */ 520 if (strncmp(hln, "Transfer-Encoding:", 18) == 0) { 521 hln += 18; 522 if (strstr(hln, "chunked") != NULL) 523 chunked = 1; 524 525 /* Next header... */ 526 continue; 527 } 528 529 /* We blithely ignore any other header lines */ 530 531 /* No more header lines */ 532 if (strlen(hln) == 0) { 533 /* 534 * If the status code was 1xx, then there will 535 * be a real header later. Servers may emit 536 * 1xx header blocks at will, but since we 537 * don't expect one, we should just ignore it. 538 */ 539 if (100 <= statuscode && statuscode <= 199) { 540 statuscode = 0; 541 continue; 542 } 543 544 /* End of header; message body follows */ 545 break; 546 } 547 } while (1); 548 549 /* No message body for 204 or 304 */ 550 if (statuscode == 204 || statuscode == 304) { 551 nres++; 552 continue; 553 } 554 555 /* 556 * There should be a message body coming, but we only want 557 * to send it to a file if the status code is 200 558 */ 559 if (statuscode == 200) { 560 /* Generate a file name for the download */ 561 fname = strrchr(argv[nres], '/'); 562 if (fname == NULL) 563 fname = argv[nres]; 564 else 565 fname++; 566 if (strlen(fname) == 0) 567 errx(1, "Cannot obtain file name from %s\n", 568 argv[nres]); 569 570 fd = open(fname, O_CREAT | O_TRUNC | O_WRONLY, 0644); 571 if (fd == -1) 572 errx(1, "open(%s)", fname); 573 }; 574 575 /* Read the message and send data to fd if appropriate */ 576 if (chunked) { 577 /* Handle a chunked-encoded entity */ 578 579 /* Read chunks */ 580 do { 581 error = readln(sd, resbuf, &resbuflen, 582 &resbufpos); 583 if (error) 584 goto conndied; 585 hln = resbuf + resbufpos; 586 eolp = strstr(hln, "\r\n"); 587 resbufpos = (eolp - resbuf) + 2; 588 589 clen = 0; 590 while (isxdigit(*hln)) { 591 if (clen >= OFF_MAX / 16) { 592 /* Nasty people... */ 593 goto conndied; 594 } 595 if (isdigit(*hln)) 596 clen = clen * 16 + *hln - '0'; 597 else 598 clen = clen * 16 + 10 + 599 tolower(*hln) - 'a'; 600 hln++; 601 } 602 603 error = copybytes(sd, fd, clen, resbuf, 604 &resbuflen, &resbufpos); 605 if (error) { 606 goto conndied; 607 } 608 } while (clen != 0); 609 610 /* Read trailer and final CRLF */ 611 do { 612 error = readln(sd, resbuf, &resbuflen, 613 &resbufpos); 614 if (error) 615 goto conndied; 616 hln = resbuf + resbufpos; 617 eolp = strstr(hln, "\r\n"); 618 resbufpos = (eolp - resbuf) + 2; 619 } while (hln != eolp); 620 } else if (contentlength != -1) { 621 error = copybytes(sd, fd, contentlength, resbuf, 622 &resbuflen, &resbufpos); 623 if (error) 624 goto conndied; 625 } else { 626 /* 627 * Not chunked, and no content length header. 628 * Read everything until the server closes the 629 * socket. 630 */ 631 error = copybytes(sd, fd, OFF_MAX, resbuf, 632 &resbuflen, &resbufpos); 633 if (error == -1) 634 goto conndied; 635 pipelined = 0; 636 } 637 638 if (fd != -1) { 639 close(fd); 640 fd = -1; 641 } 642 643 fprintf(stderr, "http://%s/%s: %d ", servername, argv[nres], 644 statuscode); 645 if (statuscode == 200) 646 fprintf(stderr, "OK\n"); 647 else if (statuscode < 300) 648 fprintf(stderr, "Successful (ignored)\n"); 649 else if (statuscode < 400) 650 fprintf(stderr, "Redirection (ignored)\n"); 651 else 652 fprintf(stderr, "Error (ignored)\n"); 653 654 /* We've finished this file! */ 655 nres++; 656 657 /* 658 * If necessary, clean up this connection so that we 659 * can start a new one. 660 */ 661 if (pipelined == 0) 662 goto cleanupconn; 663 continue; 664 665conndied: 666 /* 667 * Something went wrong -- our connection died, the server 668 * sent us garbage, etc. If this happened on the first 669 * request we sent over this connection, give up. Otherwise, 670 * close this connection, open a new one, and reissue the 671 * request. 672 */ 673 if (nres == firstreq) 674 errx(1, "Connection failure"); 675 676cleanupconn: 677 /* 678 * Clean up our connection and keep on going 679 */ 680 shutdown(sd, SHUT_RDWR); 681 close(sd); 682 sd = -1; 683 if (fd != -1) { 684 close(fd); 685 fd = -1; 686 } 687 if (reqbuf != NULL) { 688 free(reqbuf); 689 reqbuf = NULL; 690 } 691 nreq = nres; 692 res = res0; 693 pipelined = 0; 694 resbufpos = resbuflen = 0; 695 continue; 696 } 697 698 free(resbuf); 699 freeaddrinfo(res0); 700 701 return 0; 702} 703