phttpget.c revision 150461
1/*- 2 * Copyright 2005 Colin Percival 3 * All rights reserved 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted providing that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 16 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY 18 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 22 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 23 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 24 * POSSIBILITY OF SUCH DAMAGE. 25 */ 26 27#include <sys/cdefs.h> 28__FBSDID("$FreeBSD: head/usr.sbin/portsnap/phttpget/phttpget.c 150461 2005-09-22 07:11:27Z cperciva $"); 29 30#include <sys/types.h> 31#include <sys/time.h> 32#include <sys/socket.h> 33 34#include <ctype.h> 35#include <err.h> 36#include <errno.h> 37#include <fcntl.h> 38#include <limits.h> 39#include <netdb.h> 40#include <stdint.h> 41#include <stdio.h> 42#include <stdlib.h> 43#include <string.h> 44#include <sysexits.h> 45#include <unistd.h> 46 47static const char * env_HTTP_PROXY; 48static char * env_HTTP_PROXY_AUTH; 49static const char * env_HTTP_USER_AGENT; 50static const char * proxyport; 51static char * proxyauth; 52 53static struct timeval timo = { 15, 0}; 54 55static void 56usage(void) 57{ 58 59 fprintf(stderr, "usage: phttpget server [file ...]\n"); 60 exit(EX_USAGE); 61} 62 63/* 64 * Base64 encode a string; the string returned, if non-NULL, is 65 * allocated using malloc() and must be freed by the caller. 66 */ 67static char * 68b64enc(const char *ptext) 69{ 70 static const char base64[] = 71 "ABCDEFGHIJKLMNOPQRSTUVWXYZ" 72 "abcdefghijklmnopqrstuvwxyz" 73 "0123456789+/"; 74 const char *pt; 75 char *ctext, *pc; 76 size_t ptlen, ctlen; 77 uint32_t t; 78 unsigned int j; 79 80 /* 81 * Encoded length is 4 characters per 3-byte block or partial 82 * block of plaintext, plus one byte for the terminating NUL 83 */ 84 ptlen = strlen(ptext); 85 if (ptlen > ((SIZE_MAX - 1) / 4) * 3 - 2) 86 return NULL; /* Possible integer overflow */ 87 ctlen = 4 * ((ptlen + 2) / 3) + 1; 88 if ((ctext = malloc(ctlen)) == NULL) 89 return NULL; 90 ctext[ctlen - 1] = 0; 91 92 /* 93 * Scan through ptext, reading up to 3 bytes from ptext and 94 * writing 4 bytes to ctext, until we run out of input. 95 */ 96 for (pt = ptext, pc = ctext; ptlen; ptlen -= 3, pc += 4) { 97 /* Read 3 bytes */ 98 for (t = j = 0; j < 3; j++) { 99 t <<= 8; 100 if (j < ptlen) 101 t += *pt++; 102 } 103 104 /* Write 4 bytes */ 105 for (j = 0; j < 4; j++) { 106 if (j <= ptlen + 1) 107 pc[j] = base64[(t >> 18) & 0x3f]; 108 else 109 pc[j] = '='; 110 t <<= 6; 111 } 112 113 /* If we're done, exit the loop */ 114 if (ptlen <= 3) 115 break; 116 } 117 118 return (ctext); 119} 120 121static void 122readenv(void) 123{ 124 char *proxy_auth_userpass, *proxy_auth_userpass64, *p; 125 char *proxy_auth_user = NULL; 126 char *proxy_auth_pass = NULL; 127 128 env_HTTP_PROXY = getenv("HTTP_PROXY"); 129 if (env_HTTP_PROXY != NULL) { 130 if (strncmp(env_HTTP_PROXY, "http://", 7) == 0) 131 env_HTTP_PROXY += 7; 132 p = strchr(env_HTTP_PROXY, '/'); 133 if (p != NULL) 134 *p = 0; 135 p = strchr(env_HTTP_PROXY, ':'); 136 if (p != NULL) { 137 *p = 0; 138 proxyport = p + 1; 139 } else 140 proxyport = "3128"; 141 } 142 143 env_HTTP_PROXY_AUTH = getenv("HTTP_PROXY_AUTH"); 144 if ((env_HTTP_PROXY != NULL) && 145 (env_HTTP_PROXY_AUTH != NULL) && 146 (strncasecmp(env_HTTP_PROXY_AUTH, "basic:" , 6) == 0)) { 147 /* Ignore authentication scheme */ 148 (void) strsep(&env_HTTP_PROXY_AUTH, ":"); 149 150 /* Ignore realm */ 151 (void) strsep(&env_HTTP_PROXY_AUTH, ":"); 152 153 /* Obtain username and password */ 154 proxy_auth_user = strsep(&env_HTTP_PROXY_AUTH, ":"); 155 proxy_auth_pass = strsep(&env_HTTP_PROXY_AUTH, ":"); 156 } 157 158 if ((proxy_auth_user != NULL) && (proxy_auth_pass != NULL)) { 159 asprintf(&proxy_auth_userpass, "%s:%s", 160 proxy_auth_user, proxy_auth_pass); 161 if (proxy_auth_userpass == NULL) 162 err(1, "asprintf"); 163 164 proxy_auth_userpass64 = b64enc(proxy_auth_userpass); 165 if (proxy_auth_userpass64 == NULL) 166 err(1, "malloc"); 167 168 asprintf(&proxyauth, "Proxy-Authorization: Basic %s\r\n", 169 proxy_auth_userpass64); 170 if (proxyauth == NULL) 171 err(1, "asprintf"); 172 173 free(proxy_auth_userpass); 174 free(proxy_auth_userpass64); 175 } else 176 proxyauth = NULL; 177 178 env_HTTP_USER_AGENT = getenv("HTTP_USER_AGENT"); 179 if (env_HTTP_USER_AGENT == NULL) 180 env_HTTP_USER_AGENT = "phttpget/0.1"; 181} 182 183static int 184makerequest(char ** buf, char * path, char * server, int connclose) 185{ 186 int buflen; 187 188 buflen = asprintf(buf, 189 "GET %s%s/%s HTTP/1.1\r\n" 190 "Host: %s\r\n" 191 "User-Agent: %s\r\n" 192 "%s" 193 "%s" 194 "\r\n", 195 env_HTTP_PROXY ? "http://" : "", 196 env_HTTP_PROXY ? server : "", 197 path, server, env_HTTP_USER_AGENT, 198 proxyauth ? proxyauth : "", 199 connclose ? "Connection: Close\r\n" : ""); 200 if (buflen == -1) 201 err(1, "asprintf"); 202 return(buflen); 203} 204 205static int 206readln(int sd, char * resbuf, int * resbuflen, int * resbufpos) 207{ 208 ssize_t len; 209 210 while (strnstr(resbuf + *resbufpos, "\r\n", 211 *resbuflen - *resbufpos) == NULL) { 212 /* Move buffered data to the start of the buffer */ 213 if (*resbufpos != 0) { 214 memmove(resbuf, resbuf + *resbufpos, 215 *resbuflen - *resbufpos); 216 *resbuflen -= *resbufpos; 217 *resbufpos = 0; 218 } 219 220 /* If the buffer is full, complain */ 221 if (*resbuflen == BUFSIZ) 222 return -1; 223 224 /* Read more data into the buffer */ 225 len = recv(sd, resbuf + *resbuflen, BUFSIZ - *resbuflen, 0); 226 if ((len == -1) && (errno != EINTR)) 227 return -1; 228 229 if (len != -1) 230 *resbuflen += len; 231 } 232 233 return 0; 234} 235 236static int 237copybytes(int sd, int fd, off_t copylen, char * resbuf, int * resbuflen, 238 int * resbufpos) 239{ 240 ssize_t len; 241 242 while (copylen) { 243 /* Write data from resbuf to fd */ 244 len = *resbuflen - *resbufpos; 245 if (copylen < len) 246 len = copylen; 247 if (len > 0) { 248 if (fd != -1) 249 len = write(fd, resbuf + *resbufpos, len); 250 if (len == -1) 251 err(1, "write"); 252 *resbufpos += len; 253 copylen -= len; 254 continue; 255 } 256 257 /* Read more data into buffer */ 258 len = recv(sd, resbuf, BUFSIZ, 0); 259 if (len == -1) { 260 if (errno == EINTR) 261 continue; 262 return -1; 263 } else if (len == 0) { 264 return -2; 265 } else { 266 *resbuflen = len; 267 *resbufpos = 0; 268 } 269 } 270 271 return 0; 272} 273 274int 275main(int argc, char *argv[]) 276{ 277 struct addrinfo hints; /* Hints to getaddrinfo */ 278 struct addrinfo *res; /* Pointer to server address being used */ 279 struct addrinfo *res0; /* Pointer to server addresses */ 280 char * resbuf = NULL; /* Response buffer */ 281 int resbufpos = 0; /* Response buffer position */ 282 int resbuflen = 0; /* Response buffer length */ 283 char * eolp; /* Pointer to "\r\n" within resbuf */ 284 char * hln; /* Pointer within header line */ 285 char * servername; /* Name of server */ 286 char * fname = NULL; /* Name of downloaded file */ 287 char * reqbuf = NULL; /* Request buffer */ 288 int reqbufpos = 0; /* Request buffer position */ 289 int reqbuflen = 0; /* Request buffer length */ 290 ssize_t len; /* Length sent or received */ 291 int nreq = 0; /* Number of next request to send */ 292 int nres = 0; /* Number of next reply to receive */ 293 int pipelined = 0; /* != 0 if connection in pipelined mode. */ 294 int sd = -1; /* Socket descriptor */ 295 int sdflags = 0; /* Flags on the socket sd */ 296 int fd = -1; /* Descriptor for download target file */ 297 int error; /* Error code */ 298 int statuscode; /* HTTP Status code */ 299 off_t contentlength; /* Value from Content-Length header */ 300 int chunked; /* != if transfer-encoding is chunked */ 301 off_t clen; /* Chunk length */ 302 int firstreq = 0; /* # of first request for this connection */ 303 304 /* Check that the arguments are sensible */ 305 if (argc < 2) 306 usage(); 307 308 /* Read important environment variables */ 309 readenv(); 310 311 /* Get server name and adjust arg[cv] to point at file names */ 312 servername = argv[1]; 313 argv += 2; 314 argc -= 2; 315 316 /* Allocate response buffer */ 317 resbuf = malloc(BUFSIZ); 318 if (resbuf == NULL) 319 err(1, "malloc"); 320 321 /* Look up server */ 322 memset(&hints, 0, sizeof(hints)); 323 hints.ai_family = PF_UNSPEC; 324 hints.ai_socktype = SOCK_STREAM; 325 error = getaddrinfo(env_HTTP_PROXY ? env_HTTP_PROXY : servername, 326 env_HTTP_PROXY ? proxyport : "http", &hints, &res0); 327 if (error) 328 errx(1, "%s: host = %s, port = %s", 329 env_HTTP_PROXY ? env_HTTP_PROXY : servername, 330 env_HTTP_PROXY ? proxyport : "http", 331 gai_strerror(error)); 332 if (res0 == NULL) 333 errx(1, "could not look up %s", servername); 334 res = res0; 335 336 /* Do the fetching */ 337 while (nres < argc) { 338 /* Make sure we have a connected socket */ 339 for (; sd == -1; res = res->ai_next) { 340 /* No addresses left to try :-( */ 341 if (res == NULL) 342 errx(1, "Could not connect to %s", servername); 343 344 /* Create a socket... */ 345 sd = socket(res->ai_family, res->ai_socktype, 346 res->ai_protocol); 347 if (sd == -1) 348 continue; 349 350 /* ... set 15-second timeouts ... */ 351 setsockopt(sd, SOL_SOCKET, SO_SNDTIMEO, 352 (void *)&timo, (socklen_t)sizeof(timo)); 353 setsockopt(sd, SOL_SOCKET, SO_RCVTIMEO, 354 (void *)&timo, (socklen_t)sizeof(timo)); 355 356 /* ... and connect to the server. */ 357 if(connect(sd, res->ai_addr, res->ai_addrlen)) { 358 close(sd); 359 sd = -1; 360 continue; 361 } 362 363 firstreq = nres; 364 } 365 366 /* 367 * If in pipelined HTTP mode, put socket into non-blocking 368 * mode, since we're probably going to want to try to send 369 * several HTTP requests. 370 */ 371 if (pipelined) { 372 sdflags = fcntl(sd, F_GETFL); 373 if (fcntl(sd, F_SETFL, sdflags | O_NONBLOCK) == -1) 374 err(1, "fcntl"); 375 } 376 377 /* Construct requests and/or send them without blocking */ 378 while ((nreq < argc) && ((reqbuf == NULL) || pipelined)) { 379 /* If not in the middle of a request, make one */ 380 if (reqbuf == NULL) { 381 reqbuflen = makerequest(&reqbuf, argv[nreq], 382 servername, (nreq == argc - 1)); 383 reqbufpos = 0; 384 } 385 386 /* If in pipelined mode, try to send the request */ 387 if (pipelined) { 388 while (reqbufpos < reqbuflen) { 389 len = send(sd, reqbuf + reqbufpos, 390 reqbuflen - reqbufpos, 0); 391 if (len == -1) 392 break; 393 reqbufpos += len; 394 } 395 if (reqbufpos < reqbuflen) { 396 if (errno != EAGAIN) 397 goto conndied; 398 break; 399 } else { 400 free(reqbuf); 401 reqbuf = NULL; 402 nreq++; 403 } 404 } 405 } 406 407 /* Put connection back into blocking mode */ 408 if (pipelined) { 409 if (fcntl(sd, F_SETFL, sdflags) == -1) 410 err(1, "fcntl"); 411 } 412 413 /* Do we need to blocking-send a request? */ 414 if (nres == nreq) { 415 while (reqbufpos < reqbuflen) { 416 len = send(sd, reqbuf + reqbufpos, 417 reqbuflen - reqbufpos, 0); 418 if (len == -1) 419 goto conndied; 420 reqbufpos += len; 421 } 422 free(reqbuf); 423 reqbuf = NULL; 424 nreq++; 425 } 426 427 /* Scan through the response processing headers. */ 428 statuscode = 0; 429 contentlength = -1; 430 chunked = 0; 431 do { 432 /* Get a header line */ 433 error = readln(sd, resbuf, &resbuflen, &resbufpos); 434 if (error) 435 goto conndied; 436 hln = resbuf + resbufpos; 437 eolp = strnstr(hln, "\r\n", resbuflen - resbufpos); 438 resbufpos = (eolp - resbuf) + 2; 439 *eolp = '\0'; 440 441 /* Make sure it doesn't contain a NUL character */ 442 if (strchr(hln, '\0') != eolp) 443 goto conndied; 444 445 if (statuscode == 0) { 446 /* The first line MUST be HTTP/1.x xxx ... */ 447 if ((strncmp(hln, "HTTP/1.", 7) != 0) || 448 ! isdigit(hln[7])) 449 goto conndied; 450 451 /* 452 * If the minor version number isn't zero, 453 * then we can assume that pipelining our 454 * requests is OK -- as long as we don't 455 * see a "Connection: close" line later 456 * and we either have a Content-Length or 457 * Transfer-Encoding: chunked header to 458 * tell us the length. 459 */ 460 if (hln[7] != '0') 461 pipelined = 1; 462 463 /* Skip over the minor version number */ 464 hln = strchr(hln + 7, ' '); 465 if (hln == NULL) 466 goto conndied; 467 else 468 hln++; 469 470 /* Read the status code */ 471 while (isdigit(*hln)) { 472 statuscode = statuscode * 10 + 473 *hln - '0'; 474 hln++; 475 } 476 477 if (statuscode < 100 || statuscode > 599) 478 goto conndied; 479 480 /* Ignore the rest of the line */ 481 continue; 482 } 483 484 /* Check for "Connection: close" header */ 485 if (strncmp(hln, "Connection:", 11) == 0) { 486 hln += 11; 487 if (strstr(hln, "close") != NULL) 488 pipelined = 0; 489 490 /* Next header... */ 491 continue; 492 } 493 494 /* Check for "Content-Length:" header */ 495 if (strncmp(hln, "Content-Length:", 15) == 0) { 496 hln += 15; 497 contentlength = 0; 498 499 /* Find the start of the length */ 500 while (!isdigit(*hln) && (*hln != '\0')) 501 hln++; 502 503 /* Compute the length */ 504 while (isdigit(*hln)) { 505 if (contentlength >= OFF_MAX / 10) { 506 /* Nasty people... */ 507 goto conndied; 508 } 509 contentlength = contentlength * 10 + 510 *hln - '0'; 511 hln++; 512 } 513 514 /* Next header... */ 515 continue; 516 } 517 518 /* Check for "Transfer-Encoding: chunked" header */ 519 if (strncmp(hln, "Transfer-Encoding:", 18) == 0) { 520 hln += 18; 521 if (strstr(hln, "chunked") != NULL) 522 chunked = 1; 523 524 /* Next header... */ 525 continue; 526 } 527 528 /* We blithely ignore any other header lines */ 529 530 /* No more header lines */ 531 if (strlen(hln) == 0) { 532 /* 533 * If the status code was 1xx, then there will 534 * be a real header later. Servers may emit 535 * 1xx header blocks at will, but since we 536 * don't expect one, we should just ignore it. 537 */ 538 if (100 <= statuscode && statuscode <= 199) { 539 statuscode = 0; 540 continue; 541 } 542 543 /* End of header; message body follows */ 544 break; 545 } 546 } while (1); 547 548 /* No message body for 204 or 304 */ 549 if (statuscode == 204 || statuscode == 304) { 550 nres++; 551 continue; 552 } 553 554 /* 555 * There should be a message body coming, but we only want 556 * to send it to a file if the status code is 200 557 */ 558 if (statuscode == 200) { 559 /* Generate a file name for the download */ 560 fname = strrchr(argv[nres], '/'); 561 if (fname == NULL) 562 fname = argv[nres]; 563 else 564 fname++; 565 if (strlen(fname) == 0) 566 errx(1, "Cannot obtain file name from %s\n", 567 argv[nres]); 568 569 fd = open(fname, O_CREAT | O_TRUNC | O_WRONLY, 0644); 570 if (fd == -1) 571 errx(1, "open(%s)", fname); 572 }; 573 574 /* Read the message and send data to fd if appropriate */ 575 if (chunked) { 576 /* Handle a chunked-encoded entity */ 577 578 /* Read chunks */ 579 do { 580 error = readln(sd, resbuf, &resbuflen, 581 &resbufpos); 582 if (error) 583 goto conndied; 584 hln = resbuf + resbufpos; 585 eolp = strstr(hln, "\r\n"); 586 resbufpos = (eolp - resbuf) + 2; 587 588 clen = 0; 589 while (isxdigit(*hln)) { 590 if (clen >= OFF_MAX / 16) { 591 /* Nasty people... */ 592 goto conndied; 593 } 594 if (isdigit(*hln)) 595 clen = clen * 16 + *hln - '0'; 596 else 597 clen = clen * 16 + 10 + 598 tolower(*hln) - 'a'; 599 hln++; 600 } 601 602 error = copybytes(sd, fd, clen, resbuf, 603 &resbuflen, &resbufpos); 604 if (error) { 605 goto conndied; 606 } 607 } while (clen != 0); 608 609 /* Read trailer and final CRLF */ 610 do { 611 error = readln(sd, resbuf, &resbuflen, 612 &resbufpos); 613 if (error) 614 goto conndied; 615 hln = resbuf + resbufpos; 616 eolp = strstr(hln, "\r\n"); 617 resbufpos = (eolp - resbuf) + 2; 618 } while (hln != eolp); 619 } else if (contentlength != -1) { 620 error = copybytes(sd, fd, contentlength, resbuf, 621 &resbuflen, &resbufpos); 622 if (error) 623 goto conndied; 624 } else { 625 /* 626 * Not chunked, and no content length header. 627 * Read everything until the server closes the 628 * socket. 629 */ 630 error = copybytes(sd, fd, OFF_MAX, resbuf, 631 &resbuflen, &resbufpos); 632 if (error == -1) 633 goto conndied; 634 pipelined = 0; 635 } 636 637 if (fd != -1) { 638 close(fd); 639 fd = -1; 640 } 641 642 fprintf(stderr, "http://%s/%s: %d ", servername, argv[nres], 643 statuscode); 644 if (statuscode == 200) 645 fprintf(stderr, "OK\n"); 646 else if (statuscode < 300) 647 fprintf(stderr, "Successful (ignored)\n"); 648 else if (statuscode < 400) 649 fprintf(stderr, "Redirection (ignored)\n"); 650 else 651 fprintf(stderr, "Error (ignored)\n"); 652 653 /* We've finished this file! */ 654 nres++; 655 656 /* 657 * If necessary, clean up this connection so that we 658 * can start a new one. 659 */ 660 if (pipelined == 0) 661 goto cleanupconn; 662 continue; 663 664conndied: 665 /* 666 * Something went wrong -- our connection died, the server 667 * sent us garbage, etc. If this happened on the first 668 * request we sent over this connection, give up. Otherwise, 669 * close this connection, open a new one, and reissue the 670 * request. 671 */ 672 if (nres == firstreq) 673 errx(1, "Connection failure"); 674 675cleanupconn: 676 /* 677 * Clean up our connection and keep on going 678 */ 679 shutdown(sd, SHUT_RDWR); 680 close(sd); 681 sd = -1; 682 if (fd != -1) { 683 close(fd); 684 fd = -1; 685 } 686 if (reqbuf != NULL) { 687 free(reqbuf); 688 reqbuf = NULL; 689 } 690 nreq = nres; 691 res = res0; 692 pipelined = 0; 693 resbufpos = resbuflen = 0; 694 continue; 695 } 696 697 free(resbuf); 698 freeaddrinfo(res0); 699 700 return 0; 701} 702