http.c revision 1.18
1/* $Id: http.c,v 1.18 2017/01/24 13:32:55 jsing Exp $ */ 2/* 3 * Copyright (c) 2016 Kristaps Dzonsons <kristaps@bsd.lv> 4 * 5 * Permission to use, copy, modify, and distribute this software for any 6 * purpose with or without fee is hereby granted, provided that the above 7 * copyright notice and this permission notice appear in all copies. 8 * 9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES 10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR 12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 16 */ 17 18#include <sys/socket.h> 19#include <sys/param.h> 20#include <arpa/inet.h> 21 22#include <ctype.h> 23#include <err.h> 24#include <limits.h> 25#include <netdb.h> 26#include <stdio.h> 27#include <stdint.h> 28#include <stdlib.h> 29#include <string.h> 30#include <tls.h> 31#include <unistd.h> 32 33#include "http.h" 34#include "extern.h" 35 36#define DEFAULT_CA_FILE "/etc/ssl/cert.pem" 37 38/* 39 * A buffer for transferring HTTP/S data. 40 */ 41struct httpxfer { 42 char *hbuf; /* header transfer buffer */ 43 size_t hbufsz; /* header buffer size */ 44 int headok; /* header has been parsed */ 45 char *bbuf; /* body transfer buffer */ 46 size_t bbufsz; /* body buffer size */ 47 int bodyok; /* body has been parsed */ 48 char *headbuf; /* lookaside buffer for headers */ 49 struct httphead *head; /* parsed headers */ 50 size_t headsz; /* number of headers */ 51}; 52 53/* 54 * An HTTP/S connection object. 55 */ 56struct http { 57 int fd; /* connected socket */ 58 short port; /* port number */ 59 struct source src; /* endpoint (raw) host */ 60 char *path; /* path to request */ 61 char *host; /* name of endpoint host */ 62 struct tls *ctx; /* if TLS */ 63 writefp writer; /* write function */ 64 readfp reader; /* read function */ 65}; 66 67struct tls_config *tlscfg; 68 69static ssize_t 70dosysread(char *buf, size_t sz, const struct http *http) 71{ 72 ssize_t rc; 73 74 rc = read(http->fd, buf, sz); 75 if (rc < 0) 76 warn("%s: read", http->src.ip); 77 return rc; 78} 79 80static ssize_t 81dosyswrite(const void *buf, size_t sz, const struct http *http) 82{ 83 ssize_t rc; 84 85 rc = write(http->fd, buf, sz); 86 if (rc < 0) 87 warn("%s: write", http->src.ip); 88 return rc; 89} 90 91static ssize_t 92dotlsread(char *buf, size_t sz, const struct http *http) 93{ 94 ssize_t rc; 95 96 do { 97 rc = tls_read(http->ctx, buf, sz); 98 } while (rc == TLS_WANT_POLLIN || rc == TLS_WANT_POLLOUT); 99 100 if (rc < 0) 101 warnx("%s: tls_read: %s", http->src.ip, 102 tls_error(http->ctx)); 103 return rc; 104} 105 106static ssize_t 107dotlswrite(const void *buf, size_t sz, const struct http *http) 108{ 109 ssize_t rc; 110 111 do { 112 rc = tls_write(http->ctx, buf, sz); 113 } while (rc == TLS_WANT_POLLIN || rc == TLS_WANT_POLLOUT); 114 115 if (rc < 0) 116 warnx("%s: tls_write: %s", http->src.ip, 117 tls_error(http->ctx)); 118 return rc; 119} 120 121int 122http_init() 123{ 124 if (tlscfg != NULL) 125 return 0; 126 127 if (tls_init() == -1) { 128 warn("tls_init"); 129 goto err; 130 } 131 132 tlscfg = tls_config_new(); 133 if (tlscfg == NULL) { 134 warn("tls_config_new"); 135 goto err; 136 } 137 138 if (tls_config_set_ca_file(tlscfg, DEFAULT_CA_FILE) == -1) { 139 warn("tls_config_set_ca_file: %s", tls_config_error(tlscfg)); 140 goto err; 141 } 142 143 return 0; 144 145 err: 146 tls_config_free(tlscfg); 147 tlscfg = NULL; 148 149 return -1; 150} 151 152static ssize_t 153http_read(char *buf, size_t sz, const struct http *http) 154{ 155 ssize_t ssz, xfer; 156 157 xfer = 0; 158 do { 159 if ((ssz = http->reader(buf, sz, http)) < 0) 160 return -1; 161 if (ssz == 0) 162 break; 163 xfer += ssz; 164 sz -= ssz; 165 buf += ssz; 166 } while (ssz > 0 && sz > 0); 167 168 return xfer; 169} 170 171static int 172http_write(const char *buf, size_t sz, const struct http *http) 173{ 174 ssize_t ssz, xfer; 175 176 xfer = sz; 177 while (sz > 0) { 178 if ((ssz = http->writer(buf, sz, http)) < 0) 179 return -1; 180 sz -= ssz; 181 buf += (size_t)ssz; 182 } 183 return xfer; 184} 185 186void 187http_disconnect(struct http *http) 188{ 189 int rc; 190 191 if (http->ctx != NULL) { 192 /* TLS connection. */ 193 do { 194 rc = tls_close(http->ctx); 195 } while (rc == TLS_WANT_POLLIN || rc == TLS_WANT_POLLOUT); 196 197 if (rc < 0) 198 warnx("%s: tls_close: %s", http->src.ip, 199 tls_error(http->ctx)); 200 201 tls_free(http->ctx); 202 } 203 if (http->fd != -1) { 204 if (close(http->fd) == -1) 205 warn("%s: close", http->src.ip); 206 } 207 208 http->fd = -1; 209 http->ctx = NULL; 210} 211 212void 213http_free(struct http *http) 214{ 215 216 if (http == NULL) 217 return; 218 http_disconnect(http); 219 free(http->host); 220 free(http->path); 221 free(http->src.ip); 222 free(http); 223} 224 225struct http * 226http_alloc(const struct source *addrs, size_t addrsz, 227 const char *host, short port, const char *path) 228{ 229 struct sockaddr_storage ss; 230 int family, fd, c; 231 socklen_t len; 232 size_t cur, i = 0; 233 struct http *http; 234 235 /* Do this while we still have addresses to connect. */ 236again: 237 if (i == addrsz) 238 return NULL; 239 cur = i++; 240 241 /* Convert to PF_INET or PF_INET6 address from string. */ 242 243 memset(&ss, 0, sizeof(struct sockaddr_storage)); 244 245 if (addrs[cur].family == 4) { 246 family = PF_INET; 247 ((struct sockaddr_in *)&ss)->sin_family = AF_INET; 248 ((struct sockaddr_in *)&ss)->sin_port = htons(port); 249 c = inet_pton(AF_INET, addrs[cur].ip, 250 &((struct sockaddr_in *)&ss)->sin_addr); 251 len = sizeof(struct sockaddr_in); 252 } else if (addrs[cur].family == 6) { 253 family = PF_INET6; 254 ((struct sockaddr_in6 *)&ss)->sin6_family = AF_INET6; 255 ((struct sockaddr_in6 *)&ss)->sin6_port = htons(port); 256 c = inet_pton(AF_INET6, addrs[cur].ip, 257 &((struct sockaddr_in6 *)&ss)->sin6_addr); 258 len = sizeof(struct sockaddr_in6); 259 } else { 260 warnx("%s: unknown family", addrs[cur].ip); 261 goto again; 262 } 263 264 if (c < 0) { 265 warn("%s: inet_ntop", addrs[cur].ip); 266 goto again; 267 } else if (c == 0) { 268 warnx("%s: inet_ntop", addrs[cur].ip); 269 goto again; 270 } 271 272 /* Create socket and connect. */ 273 274 fd = socket(family, SOCK_STREAM, 0); 275 if (fd == -1) { 276 warn("%s: socket", addrs[cur].ip); 277 goto again; 278 } else if (connect(fd, (struct sockaddr *)&ss, len) == -1) { 279 warn("%s: connect", addrs[cur].ip); 280 close(fd); 281 goto again; 282 } 283 284 /* Allocate the communicator. */ 285 286 http = calloc(1, sizeof(struct http)); 287 if (http == NULL) { 288 warn("calloc"); 289 close(fd); 290 return NULL; 291 } 292 http->fd = fd; 293 http->port = port; 294 http->src.family = addrs[cur].family; 295 http->src.ip = strdup(addrs[cur].ip); 296 http->host = strdup(host); 297 http->path = strdup(path); 298 if (http->src.ip == NULL || http->host == NULL || http->path == NULL) { 299 warn("strdup"); 300 goto err; 301 } 302 303 /* If necessary, do our TLS setup. */ 304 305 if (port != 443) { 306 http->writer = dosyswrite; 307 http->reader = dosysread; 308 return http; 309 } 310 311 http->writer = dotlswrite; 312 http->reader = dotlsread; 313 314 if ((http->ctx = tls_client()) == NULL) { 315 warn("tls_client"); 316 goto err; 317 } else if (tls_configure(http->ctx, tlscfg) == -1) { 318 warnx("%s: tls_configure: %s", 319 http->src.ip, tls_error(http->ctx)); 320 goto err; 321 } 322 323 if (tls_connect_socket(http->ctx, http->fd, http->host) != 0) { 324 warnx("%s: tls_connect_socket: %s, %s", http->src.ip, 325 http->host, tls_error(http->ctx)); 326 goto err; 327 } 328 329 return http; 330err: 331 http_free(http); 332 return NULL; 333} 334 335struct httpxfer * 336http_open(const struct http *http, const void *p, size_t psz) 337{ 338 char *req; 339 int c; 340 struct httpxfer *trans; 341 342 if (p == NULL) { 343 c = asprintf(&req, 344 "GET %s HTTP/1.0\r\n" 345 "Host: %s\r\n" 346 "\r\n", 347 http->path, http->host); 348 } else { 349 c = asprintf(&req, 350 "POST %s HTTP/1.0\r\n" 351 "Host: %s\r\n" 352 "Content-Length: %zu\r\n" 353 "\r\n", 354 http->path, http->host, psz); 355 } 356 if (c == -1) { 357 warn("asprintf"); 358 return NULL; 359 } else if (!http_write(req, c, http)) { 360 free(req); 361 return NULL; 362 } else if (p != NULL && !http_write(p, psz, http)) { 363 free(req); 364 return NULL; 365 } 366 367 free(req); 368 369 trans = calloc(1, sizeof(struct httpxfer)); 370 if (trans == NULL) 371 warn("calloc"); 372 return trans; 373} 374 375void 376http_close(struct httpxfer *x) 377{ 378 379 if (x == NULL) 380 return; 381 free(x->hbuf); 382 free(x->bbuf); 383 free(x->headbuf); 384 free(x->head); 385 free(x); 386} 387 388/* 389 * Read the HTTP body from the wire. 390 * If invoked multiple times, this will return the same pointer with the 391 * same data (or NULL, if the original invocation returned NULL). 392 * Returns NULL if read or allocation errors occur. 393 * You must not free the returned pointer. 394 */ 395char * 396http_body_read(const struct http *http, struct httpxfer *trans, size_t *sz) 397{ 398 char buf[BUFSIZ]; 399 ssize_t ssz; 400 void *pp; 401 size_t szp; 402 403 if (sz == NULL) 404 sz = &szp; 405 406 /* Have we already parsed this? */ 407 408 if (trans->bodyok > 0) { 409 *sz = trans->bbufsz; 410 return trans->bbuf; 411 } else if (trans->bodyok < 0) 412 return NULL; 413 414 *sz = 0; 415 trans->bodyok = -1; 416 417 do { 418 /* If less than sizeof(buf), at EOF. */ 419 if ((ssz = http_read(buf, sizeof(buf), http)) < 0) 420 return NULL; 421 else if (ssz == 0) 422 break; 423 pp = realloc(trans->bbuf, trans->bbufsz + ssz); 424 if (pp == NULL) { 425 warn("realloc"); 426 return NULL; 427 } 428 trans->bbuf = pp; 429 memcpy(trans->bbuf + trans->bbufsz, buf, ssz); 430 trans->bbufsz += ssz; 431 } while (ssz == sizeof(buf)); 432 433 trans->bodyok = 1; 434 *sz = trans->bbufsz; 435 return trans->bbuf; 436} 437 438struct httphead * 439http_head_get(const char *v, struct httphead *h, size_t hsz) 440{ 441 size_t i; 442 443 for (i = 0; i < hsz; i++) { 444 if (strcmp(h[i].key, v)) 445 continue; 446 return &h[i]; 447 } 448 return NULL; 449} 450 451/* 452 * Look through the headers and determine our HTTP code. 453 * This will return -1 on failure, otherwise the code. 454 */ 455int 456http_head_status(const struct http *http, struct httphead *h, size_t sz) 457{ 458 int rc; 459 unsigned int code; 460 struct httphead *st; 461 462 if ((st = http_head_get("Status", h, sz)) == NULL) { 463 warnx("%s: no status header", http->src.ip); 464 return -1; 465 } 466 467 rc = sscanf(st->val, "%*s %u %*s", &code); 468 if (rc < 0) { 469 warn("sscanf"); 470 return -1; 471 } else if (rc != 1) { 472 warnx("%s: cannot convert status header", http->src.ip); 473 return -1; 474 } 475 return code; 476} 477 478/* 479 * Parse headers from the transfer. 480 * Malformed headers are skipped. 481 * A special "Status" header is added for the HTTP status line. 482 * This can only happen once http_head_read has been called with 483 * success. 484 * This can be invoked multiple times: it will only parse the headers 485 * once and after that it will just return the cache. 486 * You must not free the returned pointer. 487 * If the original header parse failed, or if memory allocation fails 488 * internally, this returns NULL. 489 */ 490struct httphead * 491http_head_parse(const struct http *http, struct httpxfer *trans, size_t *sz) 492{ 493 size_t hsz, szp; 494 struct httphead *h; 495 char *cp, *ep, *ccp, *buf; 496 497 if (sz == NULL) 498 sz = &szp; 499 500 /* 501 * If we've already parsed the headers, return the 502 * previously-parsed buffer now. 503 * If we have errors on the stream, return NULL now. 504 */ 505 506 if (trans->head != NULL) { 507 *sz = trans->headsz; 508 return trans->head; 509 } else if (trans->headok <= 0) 510 return NULL; 511 512 if ((buf = strdup(trans->hbuf)) == NULL) { 513 warn("strdup"); 514 return NULL; 515 } 516 hsz = 0; 517 cp = buf; 518 519 do { 520 if ((cp = strstr(cp, "\r\n")) != NULL) 521 cp += 2; 522 hsz++; 523 } while (cp != NULL); 524 525 /* 526 * Allocate headers, then step through the data buffer, parsing 527 * out headers as we have them. 528 * We know at this point that the buffer is NUL-terminated in 529 * the usual way. 530 */ 531 532 h = calloc(hsz, sizeof(struct httphead)); 533 if (h == NULL) { 534 warn("calloc"); 535 free(buf); 536 return NULL; 537 } 538 539 *sz = hsz; 540 hsz = 0; 541 cp = buf; 542 543 do { 544 if ((ep = strstr(cp, "\r\n")) != NULL) { 545 *ep = '\0'; 546 ep += 2; 547 } 548 if (hsz == 0) { 549 h[hsz].key = "Status"; 550 h[hsz++].val = cp; 551 continue; 552 } 553 554 /* Skip bad headers. */ 555 if ((ccp = strchr(cp, ':')) == NULL) { 556 warnx("%s: header without separator", http->src.ip); 557 continue; 558 } 559 560 *ccp++ = '\0'; 561 while (isspace((int)*ccp)) 562 ccp++; 563 h[hsz].key = cp; 564 h[hsz++].val = ccp; 565 } while ((cp = ep) != NULL); 566 567 trans->headbuf = buf; 568 trans->head = h; 569 trans->headsz = hsz; 570 return h; 571} 572 573/* 574 * Read the HTTP headers from the wire. 575 * If invoked multiple times, this will return the same pointer with the 576 * same data (or NULL, if the original invocation returned NULL). 577 * Returns NULL if read or allocation errors occur. 578 * You must not free the returned pointer. 579 */ 580char * 581http_head_read(const struct http *http, struct httpxfer *trans, size_t *sz) 582{ 583 char buf[BUFSIZ]; 584 ssize_t ssz; 585 char *ep; 586 void *pp; 587 size_t szp; 588 589 if (sz == NULL) 590 sz = &szp; 591 592 /* Have we already parsed this? */ 593 594 if (trans->headok > 0) { 595 *sz = trans->hbufsz; 596 return trans->hbuf; 597 } else if (trans->headok < 0) 598 return NULL; 599 600 *sz = 0; 601 ep = NULL; 602 trans->headok = -1; 603 604 /* 605 * Begin by reading by BUFSIZ blocks until we reach the header 606 * termination marker (two CRLFs). 607 * We might read into our body, but that's ok: we'll copy out 608 * the body parts into our body buffer afterward. 609 */ 610 611 do { 612 /* If less than sizeof(buf), at EOF. */ 613 if ((ssz = http_read(buf, sizeof(buf), http)) < 0) 614 return NULL; 615 else if (ssz == 0) 616 break; 617 pp = realloc(trans->hbuf, trans->hbufsz + ssz); 618 if (pp == NULL) { 619 warn("realloc"); 620 return NULL; 621 } 622 trans->hbuf = pp; 623 memcpy(trans->hbuf + trans->hbufsz, buf, ssz); 624 trans->hbufsz += ssz; 625 /* Search for end of headers marker. */ 626 ep = memmem(trans->hbuf, trans->hbufsz, "\r\n\r\n", 4); 627 } while (ep == NULL && ssz == sizeof(buf)); 628 629 if (ep == NULL) { 630 warnx("%s: partial transfer", http->src.ip); 631 return NULL; 632 } 633 *ep = '\0'; 634 635 /* 636 * The header data is invalid if it has any binary characters in 637 * it: check that now. 638 * This is important because we want to guarantee that all 639 * header keys and pairs are properly NUL-terminated. 640 */ 641 642 if (strlen(trans->hbuf) != (uintptr_t)(ep - trans->hbuf)) { 643 warnx("%s: binary data in header", http->src.ip); 644 return NULL; 645 } 646 647 /* 648 * Copy remaining buffer into body buffer. 649 */ 650 651 ep += 4; 652 trans->bbufsz = (trans->hbuf + trans->hbufsz) - ep; 653 trans->bbuf = malloc(trans->bbufsz); 654 if (trans->bbuf == NULL) { 655 warn("malloc"); 656 return NULL; 657 } 658 memcpy(trans->bbuf, ep, trans->bbufsz); 659 660 trans->headok = 1; 661 *sz = trans->hbufsz; 662 return trans->hbuf; 663} 664 665void 666http_get_free(struct httpget *g) 667{ 668 669 if (g == NULL) 670 return; 671 http_close(g->xfer); 672 http_free(g->http); 673 free(g); 674} 675 676struct httpget * 677http_get(const struct source *addrs, size_t addrsz, const char *domain, 678 short port, const char *path, const void *post, size_t postsz) 679{ 680 struct http *h; 681 struct httpxfer *x; 682 struct httpget *g; 683 struct httphead *head; 684 size_t headsz, bodsz, headrsz; 685 int code; 686 char *bod, *headr; 687 688 h = http_alloc(addrs, addrsz, domain, port, path); 689 if (h == NULL) 690 return NULL; 691 692 if ((x = http_open(h, post, postsz)) == NULL) { 693 http_free(h); 694 return NULL; 695 } else if ((headr = http_head_read(h, x, &headrsz)) == NULL) { 696 http_close(x); 697 http_free(h); 698 return NULL; 699 } else if ((bod = http_body_read(h, x, &bodsz)) == NULL) { 700 http_close(x); 701 http_free(h); 702 return NULL; 703 } 704 705 http_disconnect(h); 706 707 if ((head = http_head_parse(h, x, &headsz)) == NULL) { 708 http_close(x); 709 http_free(h); 710 return NULL; 711 } else if ((code = http_head_status(h, head, headsz)) < 0) { 712 http_close(x); 713 http_free(h); 714 return NULL; 715 } 716 717 if ((g = calloc(1, sizeof(struct httpget))) == NULL) { 718 warn("calloc"); 719 http_close(x); 720 http_free(h); 721 return NULL; 722 } 723 724 g->headpart = headr; 725 g->headpartsz = headrsz; 726 g->bodypart = bod; 727 g->bodypartsz = bodsz; 728 g->head = head; 729 g->headsz = headsz; 730 g->code = code; 731 g->xfer = x; 732 g->http = h; 733 return g; 734} 735 736#if 0 737int 738main(void) 739{ 740 struct httpget *g; 741 struct httphead *httph; 742 size_t i, httphsz; 743 struct source addrs[2]; 744 size_t addrsz; 745 746#if 0 747 addrs[0].ip = "127.0.0.1"; 748 addrs[0].family = 4; 749 addrsz = 1; 750#else 751 addrs[0].ip = "2a00:1450:400a:806::2004"; 752 addrs[0].family = 6; 753 addrs[1].ip = "193.135.3.123"; 754 addrs[1].family = 4; 755 addrsz = 2; 756#endif 757 758 if (http_init() == -1) 759 errx(EXIT_FAILURE, "http_init"); 760 761#if 0 762 g = http_get(addrs, addrsz, "localhost", 80, "/index.html"); 763#else 764 g = http_get(addrs, addrsz, "www.google.ch", 80, "/index.html", 765 NULL, 0); 766#endif 767 768 if (g == NULL) 769 errx(EXIT_FAILURE, "http_get"); 770 771 httph = http_head_parse(g->http, g->xfer, &httphsz); 772 warnx("code: %d", g->code); 773 774 for (i = 0; i < httphsz; i++) 775 warnx("head: [%s]=[%s]", httph[i].key, httph[i].val); 776 777 http_get_free(g); 778 return (EXIT_SUCCESS); 779} 780#endif 781