phttpget.c revision 148871
1/*- 2 * Copyright 2005 Colin Percival 3 * All rights reserved 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted providing that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 16 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY 18 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 22 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 23 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 24 * POSSIBILITY OF SUCH DAMAGE. 25 */ 26 27#include <sys/cdefs.h> 28__FBSDID("$FreeBSD: head/usr.sbin/portsnap/phttpget/phttpget.c 148871 2005-08-08 20:10:06Z cperciva $"); 29 30#include <sys/types.h> 31#include <sys/time.h> 32#include <sys/socket.h> 33 34#include <ctype.h> 35#include <err.h> 36#include <errno.h> 37#include <fcntl.h> 38#include <limits.h> 39#include <netdb.h> 40#include <stdio.h> 41#include <stdlib.h> 42#include <string.h> 43#include <sysexits.h> 44#include <unistd.h> 45 46static const char * env_HTTP_PROXY; 47static const char * env_HTTP_USER_AGENT; 48static const char * proxyport; 49 50static struct timeval timo = { 15, 0}; 51 52static void 53usage(void) 54{ 55 56 fprintf(stderr, "usage: phttpget server [file ...]\n"); 57 exit(EX_USAGE); 58} 59 60static void 61readenv(void) 62{ 63 char * p; 64 65 env_HTTP_PROXY = getenv("HTTP_PROXY"); 66 if (env_HTTP_PROXY) { 67 if (strncmp(env_HTTP_PROXY, "http://", 7) == 0) 68 env_HTTP_PROXY += 7; 69 p = strchr(env_HTTP_PROXY, ':'); 70 if (p != NULL) { 71 *p = 0; 72 proxyport = p + 1; 73 } else 74 proxyport = "3128"; 75 } 76 77 env_HTTP_USER_AGENT = getenv("HTTP_USER_AGENT"); 78 if (env_HTTP_USER_AGENT == NULL) 79 env_HTTP_USER_AGENT = "phttpget/0.1"; 80} 81 82static int 83makerequest(char ** buf, char * path, char * server, int connclose) 84{ 85 int buflen; 86 87 buflen = asprintf(buf, 88 "GET %s%s/%s HTTP/1.1\r\n" 89 "Host: %s\r\n" 90 "User-Agent: %s\r\n" 91 "%s" 92 "\r\n", 93 env_HTTP_PROXY ? "http://" : "", 94 env_HTTP_PROXY ? server : "", 95 path, server, env_HTTP_USER_AGENT, 96 connclose ? "Connection: Close\r\n" : ""); 97 if (buflen == -1) 98 err(1, "asprintf"); 99 return(buflen); 100} 101 102static int 103readln(int sd, char * resbuf, int * resbuflen, int * resbufpos) 104{ 105 ssize_t len; 106 107 while (strnstr(resbuf + *resbufpos, "\r\n", 108 *resbuflen - *resbufpos) == NULL) { 109 /* Move buffered data to the start of the buffer */ 110 if (*resbufpos != 0) { 111 memmove(resbuf, resbuf + *resbufpos, 112 *resbuflen - *resbufpos); 113 *resbuflen -= *resbufpos; 114 *resbufpos = 0; 115 } 116 117 /* If the buffer is full, complain */ 118 if (*resbuflen == BUFSIZ) 119 return -1; 120 121 /* Read more data into the buffer */ 122 len = recv(sd, resbuf + *resbuflen, BUFSIZ - *resbuflen, 0); 123 if ((len == -1) && (errno != EINTR)) 124 return -1; 125 126 if (len != -1) 127 *resbuflen += len; 128 } 129 130 return 0; 131} 132 133static int 134copybytes(int sd, int fd, off_t copylen, char * resbuf, int * resbuflen, 135 int * resbufpos) 136{ 137 ssize_t len; 138 139 while (copylen) { 140 /* Write data from resbuf to fd */ 141 len = *resbuflen - *resbufpos; 142 if (copylen < len) 143 len = copylen; 144 if (len > 0) { 145 if (fd != -1) 146 len = write(fd, resbuf + *resbufpos, len); 147 if (len == -1) 148 err(1, "write"); 149 *resbufpos += len; 150 copylen -= len; 151 continue; 152 } 153 154 /* Read more data into buffer */ 155 len = recv(sd, resbuf, BUFSIZ, 0); 156 if (len == -1) { 157 if (errno == EINTR) 158 continue; 159 return -1; 160 } else if (len == 0) { 161 return -2; 162 } else { 163 *resbuflen = len; 164 *resbufpos = 0; 165 } 166 } 167 168 return 0; 169} 170 171int 172main(int argc, char *argv[]) 173{ 174 struct addrinfo hints; /* Hints to getaddrinfo */ 175 struct addrinfo *res; /* Pointer to server address being used */ 176 struct addrinfo *res0; /* Pointer to server addresses */ 177 char * resbuf = NULL; /* Response buffer */ 178 int resbufpos = 0; /* Response buffer position */ 179 int resbuflen = 0; /* Response buffer length */ 180 char * eolp; /* Pointer to "\r\n" within resbuf */ 181 char * hln0; /* Pointer to start of header line */ 182 char * hln; /* Pointer within header line */ 183 char * servername; /* Name of server */ 184 char * fname = NULL; /* Name of downloaded file */ 185 char * reqbuf = NULL; /* Request buffer */ 186 int reqbufpos = 0; /* Request buffer position */ 187 int reqbuflen = 0; /* Request buffer length */ 188 ssize_t len; /* Length sent or received */ 189 int nreq = 0; /* Number of next request to send */ 190 int nres = 0; /* Number of next reply to receive */ 191 int pipelined = 0; /* != 0 if connection in pipelined mode. */ 192 int sd = -1; /* Socket descriptor */ 193 int sdflags = 0; /* Flags on the socket sd */ 194 int fd = -1; /* Descriptor for download target file */ 195 int error; /* Error code */ 196 int statuscode; /* HTTP Status code */ 197 off_t contentlength; /* Value from Content-Length header */ 198 int chunked; /* != if transfer-encoding is chunked */ 199 off_t clen; /* Chunk length */ 200 int firstreq = 0; /* # of first request for this connection */ 201 202 /* Check that the arguments are sensible */ 203 if (argc < 2) 204 usage(); 205 206 /* Read important environment variables */ 207 readenv(); 208 209 /* Get server name and adjust arg[cv] to point at file names */ 210 servername = argv[1]; 211 argv += 2; 212 argc -= 2; 213 214 /* Allocate response buffer */ 215 resbuf = malloc(BUFSIZ); 216 if (resbuf == NULL) 217 err(1, "malloc"); 218 219 /* Look up server */ 220 memset(&hints, 0, sizeof(hints)); 221 hints.ai_family = PF_UNSPEC; 222 hints.ai_socktype = SOCK_STREAM; 223 error = getaddrinfo(env_HTTP_PROXY ? env_HTTP_PROXY : servername, 224 env_HTTP_PROXY ? proxyport : "http", &hints, &res0); 225 if (error) 226 errx(1, "%s: %s", 227 env_HTTP_PROXY ? env_HTTP_PROXY : servername, 228 gai_strerror(error)); 229 if (res0 == NULL) 230 errx(1, "could not look up %s", servername); 231 res = res0; 232 233 /* Do the fetching */ 234 while (nres < argc) { 235 /* Make sure we have a connected socket */ 236 for (; sd == -1; res = res->ai_next) { 237 /* No addresses left to try :-( */ 238 if (res == NULL) 239 errx(1, "Could not connect to %s", servername); 240 241 /* Create a socket... */ 242 sd = socket(res->ai_family, res->ai_socktype, 243 res->ai_protocol); 244 if (sd == -1) 245 continue; 246 247 /* ... set 15-second timeouts ... */ 248 setsockopt(sd, SOL_SOCKET, SO_SNDTIMEO, 249 (void *)&timo, (socklen_t)sizeof(timo)); 250 setsockopt(sd, SOL_SOCKET, SO_RCVTIMEO, 251 (void *)&timo, (socklen_t)sizeof(timo)); 252 253 /* ... and connect to the server. */ 254 if(connect(sd, res->ai_addr, res->ai_addrlen)) { 255 close(sd); 256 sd = -1; 257 continue; 258 } 259 260 firstreq = nres; 261 } 262 263 /* 264 * If in pipelined HTTP mode, put socket into non-blocking 265 * mode, since we're probably going to want to try to send 266 * several HTTP requests. 267 */ 268 if (pipelined) { 269 sdflags = fcntl(sd, F_GETFL); 270 if (fcntl(sd, F_SETFL, sdflags | O_NONBLOCK) == -1) 271 err(1, "fcntl"); 272 } 273 274 /* Construct requests and/or send them without blocking */ 275 while ((nreq < argc) && ((reqbuf == NULL) || pipelined)) { 276 /* If not in the middle of a request, make one */ 277 if (reqbuf == NULL) { 278 reqbuflen = makerequest(&reqbuf, argv[nreq], 279 servername, (nreq == argc - 1)); 280 reqbufpos = 0; 281 } 282 283 /* If in pipelined mode, try to send the request */ 284 if (pipelined) { 285 while (reqbufpos < reqbuflen) { 286 len = send(sd, reqbuf + reqbufpos, 287 reqbuflen - reqbufpos, 0); 288 if (len == -1) 289 break; 290 reqbufpos += len; 291 } 292 if (reqbufpos < reqbuflen) { 293 if (errno != EAGAIN) 294 goto conndied; 295 break; 296 } else { 297 free(reqbuf); 298 reqbuf = NULL; 299 nreq++; 300 } 301 } 302 } 303 304 /* Put connection back into blocking mode */ 305 if (pipelined) { 306 if (fcntl(sd, F_SETFL, sdflags) == -1) 307 err(1, "fcntl"); 308 } 309 310 /* Do we need to blocking-send a request? */ 311 if (nres == nreq) { 312 while (reqbufpos < reqbuflen) { 313 len = send(sd, reqbuf + reqbufpos, 314 reqbuflen - reqbufpos, 0); 315 if (len == -1) 316 goto conndied; 317 reqbufpos += len; 318 } 319 free(reqbuf); 320 reqbuf = NULL; 321 nreq++; 322 } 323 324 /* Scan through the response processing headers. */ 325 statuscode = 0; 326 contentlength = -1; 327 chunked = 0; 328 do { 329 /* Get a header line */ 330 error = readln(sd, resbuf, &resbuflen, &resbufpos); 331 if (error) 332 goto conndied; 333 hln0 = hln = resbuf + resbufpos; 334 eolp = strnstr(hln, "\r\n", resbuflen - resbufpos); 335 resbufpos = (eolp - resbuf) + 2; 336 *eolp = '\0'; 337 338 /* Make sure it doesn't contain a NUL character */ 339 if (strchr(hln, '\0') != eolp) 340 goto conndied; 341 342 if (statuscode == 0) { 343 /* The first line MUST be HTTP/1.x xxx ... */ 344 if ((strncmp(hln, "HTTP/1.", 7) != 0) || 345 ! isdigit(hln[7])) 346 goto conndied; 347 348 /* 349 * If the minor version number isn't zero, 350 * then we can assume that pipelining our 351 * requests is OK -- as long as we don't 352 * see a "Connection: close" line later 353 * and we either have a Content-Length or 354 * Transfer-Encoding: chunked header to 355 * tell us the length. 356 */ 357 if (hln[7] != '0') 358 pipelined = 1; 359 360 /* Skip over the minor version number */ 361 hln = strchr(hln + 7, ' '); 362 if (hln == NULL) 363 goto conndied; 364 else 365 hln++; 366 367 /* Read the status code */ 368 while (isdigit(*hln)) { 369 statuscode = statuscode * 10 + 370 *hln - '0'; 371 hln++; 372 } 373 374 if (statuscode < 100 || statuscode > 599) 375 goto conndied; 376 377 /* Ignore the rest of the line */ 378 continue; 379 } 380 381 /* Check for "Connection: close" header */ 382 if (strncmp(hln, "Connection:", 11) == 0) { 383 hln += 11; 384 if (strstr(hln, "close") != NULL) 385 pipelined = 0; 386 387 /* Next header... */ 388 continue; 389 } 390 391 /* Check for "Content-Length:" header */ 392 if (strncmp(hln, "Content-Length:", 15) == 0) { 393 hln += 15; 394 contentlength = 0; 395 396 /* Find the start of the length */ 397 while (!isdigit(*hln) && (*hln != '\0')) 398 hln++; 399 400 /* Compute the length */ 401 while (isdigit(*hln)) { 402 if (contentlength > INT_MAX / 10) { 403 /* Nasty people... */ 404 goto conndied; 405 } 406 contentlength = contentlength * 10 + 407 *hln - '0'; 408 hln++; 409 } 410 411 /* Next header... */ 412 continue; 413 } 414 415 /* Check for "Transfer-Encoding: chunked" header */ 416 if (strncmp(hln, "Transfer-Encoding:", 18) == 0) { 417 hln += 18; 418 if (strstr(hln, "chunked") != NULL) 419 chunked = 1; 420 421 /* Next header... */ 422 continue; 423 } 424 425 /* We blithely ignore any other header lines */ 426 427 /* No more header lines */ 428 if (strlen(hln) == 0) { 429 /* 430 * If the status code was 1xx, then there will 431 * be a real header later. Servers may emit 432 * 1xx header blocks at will, but since we 433 * don't expect one, we should just ignore it. 434 */ 435 if (100 <= statuscode && statuscode <= 199) { 436 statuscode = 0; 437 continue; 438 } 439 440 /* End of header; message body follows */ 441 break; 442 } 443 } while (1); 444 445 /* No message body for 204 or 304 */ 446 if (statuscode == 204 || statuscode == 304) { 447 nres++; 448 continue; 449 } 450 451 /* 452 * There should be a message body coming, but we only want 453 * to send it to a file if the status code is 200 454 */ 455 if (statuscode == 200) { 456 /* Generate a file name for the download */ 457 fname = strrchr(argv[nres], '/'); 458 if (fname == NULL) 459 fname = argv[nres]; 460 else 461 fname++; 462 if (strlen(fname) == 0) 463 errx(1, "Cannot obtain file name from %s\n", 464 argv[nres]); 465 466 fd = open(fname, O_CREAT | O_TRUNC | O_WRONLY, 0644); 467 if (fd == -1) 468 errx(1, "open(%s)", fname); 469 }; 470 471 /* Read the message and send data to fd if appropriate */ 472 if (chunked) { 473 /* Handle a chunked-encoded entity */ 474 475 /* Read chunks */ 476 do { 477 error = readln(sd, resbuf, &resbuflen, 478 &resbufpos); 479 if (error) 480 goto conndied; 481 hln = resbuf + resbufpos; 482 eolp = strstr(hln, "\r\n"); 483 resbufpos = (eolp - resbuf) + 2; 484 485 clen = 0; 486 while (isxdigit(*hln)) { 487 if (clen > INT_MAX / 16) { 488 /* Nasty people... */ 489 goto conndied; 490 } 491 if (isdigit(*hln)) 492 clen = clen * 16 + *hln - '0'; 493 else 494 clen = clen * 16 + 10 + 495 tolower(*hln) - 'a'; 496 hln++; 497 } 498 499 error = copybytes(sd, fd, clen, resbuf, 500 &resbuflen, &resbufpos); 501 if (error) { 502 goto conndied; 503 } 504 } while (clen != 0); 505 506 /* Read trailer and final CRLF */ 507 do { 508 error = readln(sd, resbuf, &resbuflen, 509 &resbufpos); 510 if (error) 511 goto conndied; 512 hln = resbuf + resbufpos; 513 eolp = strstr(hln, "\r\n"); 514 resbufpos = (eolp - resbuf) + 2; 515 } while (hln != eolp); 516 } else if (contentlength != -1) { 517 error = copybytes(sd, fd, contentlength, resbuf, 518 &resbuflen, &resbufpos); 519 if (error) 520 goto conndied; 521 } else { 522 /* 523 * Not chunked, and no content length header. 524 * Read everything until the server closes the 525 * socket. 526 */ 527 error = copybytes(sd, fd, INT_MAX, resbuf, 528 &resbuflen, &resbufpos); 529 if (error == -1) 530 goto conndied; 531 pipelined = 0; 532 } 533 534 if (fd != -1) { 535 close(fd); 536 fd = -1; 537 } 538 539 fprintf(stderr, "http://%s/%s: %d ", servername, argv[nres], 540 statuscode); 541 if (statuscode == 200) 542 fprintf(stderr, "OK\n"); 543 else if (statuscode < 300) 544 fprintf(stderr, "Successful (ignored)\n"); 545 else if (statuscode < 400) 546 fprintf(stderr, "Redirection (ignored)\n"); 547 else 548 fprintf(stderr, "Error (ignored)\n"); 549 550 /* We've finished this file! */ 551 nres++; 552 553 /* 554 * If necessary, clean up this connection so that we 555 * can start a new one. 556 */ 557 if (pipelined == 0) 558 goto cleanupconn; 559 continue; 560 561conndied: 562 /* 563 * Something went wrong -- our connection died, the server 564 * sent us garbage, etc. If this happened on the first 565 * request we sent over this connection, give up. Otherwise, 566 * close this connection, open a new one, and reissue the 567 * request. 568 */ 569 if (nres == firstreq) 570 errx(1, "Connection failure"); 571 572cleanupconn: 573 /* 574 * Clean up our connection and keep on going 575 */ 576 shutdown(sd, SHUT_RDWR); 577 close(sd); 578 sd = -1; 579 if (fd != -1) { 580 close(fd); 581 fd = -1; 582 } 583 if (reqbuf != NULL) { 584 free(reqbuf); 585 reqbuf = NULL; 586 } 587 nreq = nres; 588 res = res0; 589 pipelined = 0; 590 resbufpos = resbuflen = 0; 591 continue; 592 } 593 594 free(resbuf); 595 freeaddrinfo(res0); 596 597 return 0; 598} 599