http.c revision 125695
174462Salfred/*- 274462Salfred * Copyright (c) 2000 Dag-Erling Co�dan Sm�rgrav 3259118Shrs * All rights reserved. 4259118Shrs * 58870Srgrimes * Redistribution and use in source and binary forms, with or without 6259118Shrs * modification, are permitted provided that the following conditions 7259118Shrs * are met: 8259118Shrs * 1. Redistributions of source code must retain the above copyright 98870Srgrimes * notice, this list of conditions and the following disclaimer 10259118Shrs * in this position and unchanged. 11259118Shrs * 2. Redistributions in binary form must reproduce the above copyright 12259118Shrs * notice, this list of conditions and the following disclaimer in the 13259118Shrs * documentation and/or other materials provided with the distribution. 14259118Shrs * 3. The name of the author may not be used to endorse or promote products 15259118Shrs * derived from this software without specific prior written permission. 16259118Shrs * 17259118Shrs * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18259118Shrs * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 198870Srgrimes * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20259118Shrs * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 21259118Shrs * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 22259118Shrs * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23259118Shrs * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24259118Shrs * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25259118Shrs * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26259118Shrs * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27259118Shrs */ 28259118Shrs 29259118Shrs#include <sys/cdefs.h> 30259118Shrs__FBSDID("$FreeBSD: head/lib/libfetch/http.c 125695 2004-02-11 09:23:35Z des $"); 31259118Shrs 321902Swollman/* 331902Swollman * The following copyright applies to the base64 code: 341902Swollman * 35136582Sobrien *- 3674462Salfred * Copyright 1997 Massachusetts Institute of Technology 371902Swollman * 3892986Sobrien * Permission to use, copy, modify, and distribute this software and 3992986Sobrien * its documentation for any purpose and without fee is hereby 401902Swollman * granted, provided that both the above copyright notice and this 411902Swollman * permission notice appear in all copies, that both the above 4274462Salfred * copyright notice and this permission notice appear in all 431902Swollman * supporting documentation, and that the name of M.I.T. not be used 441902Swollman * in advertising or publicity pertaining to distribution of the 451902Swollman * software without specific, written prior permission. M.I.T. makes 461902Swollman * no representations about the suitability of this software for any 471902Swollman * purpose. It is provided "as is" without express or implied 481902Swollman * warranty. 4974462Salfred * 501902Swollman * THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''. M.I.T. DISCLAIMS 511902Swollman * ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE, 5274462Salfred * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 5374462Salfred * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT 5474462Salfred * SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 551902Swollman * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 561902Swollman * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF 5774462Salfred * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 581902Swollman * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 591902Swollman * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 601902Swollman * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 6121062Speter * SUCH DAMAGE. 621902Swollman */ 631902Swollman 6421062Speter#include <sys/param.h> 651902Swollman#include <sys/socket.h> 661902Swollman 6774462Salfred#include <ctype.h> 681902Swollman#include <err.h> 691902Swollman#include <errno.h> 701902Swollman#include <locale.h> 711902Swollman#include <netdb.h> 721902Swollman#include <stdarg.h> 731902Swollman#include <stdio.h> 741902Swollman#include <stdlib.h> 751902Swollman#include <string.h> 761902Swollman#include <time.h> 771902Swollman#include <unistd.h> 781902Swollman 791902Swollman#include "fetch.h" 801902Swollman#include "common.h" 811902Swollman#include "httperr.h" 821902Swollman 831902Swollman/* Maximum number of redirects to follow */ 841902Swollman#define MAX_REDIRECT 5 851902Swollman 861902Swollman/* Symbolic names for reply codes we care about */ 871902Swollman#define HTTP_OK 200 881902Swollman#define HTTP_PARTIAL 206 891902Swollman#define HTTP_MOVED_PERM 301 901902Swollman#define HTTP_MOVED_TEMP 302 911902Swollman#define HTTP_SEE_OTHER 303 921902Swollman#define HTTP_NEED_AUTH 401 931902Swollman#define HTTP_NEED_PROXY_AUTH 407 941902Swollman#define HTTP_PROTOCOL_ERROR 999 951902Swollman 961902Swollman#define HTTP_REDIRECT(xyz) ((xyz) == HTTP_MOVED_PERM \ 971902Swollman || (xyz) == HTTP_MOVED_TEMP \ 981902Swollman || (xyz) == HTTP_SEE_OTHER) 99288040Srodrigc 1001902Swollman#define HTTP_ERROR(xyz) ((xyz) > 400 && (xyz) < 599) 10174462Salfred 1021902Swollman 1031902Swollman/***************************************************************************** 1041902Swollman * I/O functions for decoding chunked streams 1051902Swollman */ 1061902Swollman 1071902Swollmanstruct httpio 1081902Swollman{ 1091902Swollman conn_t *conn; /* connection */ 1108870Srgrimes int chunked; /* chunked mode */ 11174462Salfred char *buf; /* chunk buffer */ 1121902Swollman size_t bufsize; /* size of chunk buffer */ 1131902Swollman ssize_t buflen; /* amount of data currently in buffer */ 114298120Spfg int bufpos; /* current read offset in buffer */ 115298120Spfg int eof; /* end-of-file flag */ 1161902Swollman int error; /* error flag */ 1171902Swollman size_t chunksize; /* remaining size of current chunk */ 1181902Swollman#ifndef NDEBUG 1191902Swollman size_t total; 1201902Swollman#endif 1211902Swollman}; 1221902Swollman 1231902Swollman/* 1241902Swollman * Get next chunk header 1251902Swollman */ 1261902Swollmanstatic int 12774462Salfred_http_new_chunk(struct httpio *io) 1281902Swollman{ 1291902Swollman char *p; 1301902Swollman 1311902Swollman if (_fetch_getln(io->conn) == -1) 13274462Salfred return (-1); 1331902Swollman 1341902Swollman if (io->conn->buflen < 2 || !ishexnumber(*io->conn->buf)) 13574462Salfred return (-1); 1361902Swollman 137298120Spfg for (p = io->conn->buf; *p && !isspace(*p); ++p) { 138298120Spfg if (*p == ';') 1391902Swollman break; 1401902Swollman if (!ishexnumber(*p)) 1411902Swollman return (-1); 1421902Swollman if (isdigit(*p)) { 1431902Swollman io->chunksize = io->chunksize * 16 + 1441902Swollman *p - '0'; 1451902Swollman } else { 1461902Swollman io->chunksize = io->chunksize * 16 + 1471902Swollman 10 + tolower(*p) - 'a'; 1481902Swollman } 1491902Swollman } 1501902Swollman 1511902Swollman#ifndef NDEBUG 1521902Swollman if (fetchDebug) { 1531902Swollman io->total += io->chunksize; 1541902Swollman if (io->chunksize == 0) 1551902Swollman fprintf(stderr, "%s(): end of last chunk\n", __func__); 15674462Salfred else 1571902Swollman fprintf(stderr, "%s(): new chunk: %lu (%lu)\n", 1581902Swollman __func__, (unsigned long)io->chunksize, 1591902Swollman (unsigned long)io->total); 16074462Salfred } 1611902Swollman#endif 1621902Swollman 1631902Swollman return (io->chunksize); 1641902Swollman} 1651902Swollman 1661902Swollman/* 1671902Swollman * Grow the input buffer to at least len bytes 1681902Swollman */ 1691902Swollmanstatic inline int 1701902Swollman_http_growbuf(struct httpio *io, size_t len) 1711902Swollman{ 1721902Swollman char *tmp; 1731902Swollman 1741902Swollman if (io->bufsize >= len) 1751902Swollman return (0); 1761902Swollman 1771902Swollman if ((tmp = realloc(io->buf, len)) == NULL) 1781902Swollman return (-1); 1791902Swollman io->buf = tmp; 1801902Swollman io->bufsize = len; 1811902Swollman return (0); 1821902Swollman} 1831902Swollman 1841902Swollman/* 1851902Swollman * Fill the input buffer, do chunk decoding on the fly 1861902Swollman */ 1871902Swollmanstatic int 1881902Swollman_http_fillbuf(struct httpio *io, size_t len) 1891902Swollman{ 1901902Swollman if (io->error) 1911902Swollman return (-1); 1921902Swollman if (io->eof) 1931902Swollman return (0); 1941902Swollman 1951902Swollman if (io->chunked == 0) { 1961902Swollman if (_http_growbuf(io, len) == -1) 197283833Srodrigc return (-1); 1981902Swollman if ((io->buflen = _fetch_read(io->conn, io->buf, len)) == -1) { 19921062Speter io->error = 1; 20074462Salfred return (-1); 20121062Speter } 20221062Speter io->bufpos = 0; 20374462Salfred return (io->buflen); 2041902Swollman } 2051902Swollman 20674462Salfred if (io->chunksize == 0) { 2071902Swollman switch (_http_new_chunk(io)) { 2081902Swollman case -1: 2091902Swollman io->error = 1; 2101902Swollman return (-1); 2111902Swollman case 0: 2121902Swollman io->eof = 1; 2131902Swollman return (0); 21474462Salfred } 2151902Swollman } 21674462Salfred 21721062Speter if (len > io->chunksize) 21821062Speter len = io->chunksize; 21974462Salfred if (_http_growbuf(io, len) == -1) 2201902Swollman return (-1); 22174462Salfred if ((io->buflen = _fetch_read(io->conn, io->buf, len)) == -1) { 22221062Speter io->error = 1; 22321062Speter return (-1); 22474462Salfred } 2251902Swollman io->chunksize -= io->buflen; 22621062Speter 2271902Swollman if (io->chunksize == 0) { 2281902Swollman char endl[2]; 229298120Spfg 230298120Spfg if (_fetch_read(io->conn, endl, 2) != 2 || 2311902Swollman endl[0] != '\r' || endl[1] != '\n') 2321902Swollman return (-1); 2331902Swollman } 2341902Swollman 2351902Swollman io->bufpos = 0; 2361902Swollman 2371902Swollman return (io->buflen); 2381902Swollman} 2391902Swollman 2401902Swollman/* 2411902Swollman * Read function 2421902Swollman */ 2431902Swollmanstatic int 2441902Swollman_http_readfn(void *v, char *buf, int len) 2451902Swollman{ 2461902Swollman struct httpio *io = (struct httpio *)v; 24774462Salfred int l, pos; 24874462Salfred 2491902Swollman if (io->error) 2501902Swollman return (-1); 2511902Swollman if (io->eof) 2521902Swollman return (0); 25374462Salfred 2541902Swollman for (pos = 0; len > 0; pos += l, len -= l) { 25574462Salfred /* empty buffer */ 25621062Speter if (!io->buf || io->bufpos == io->buflen) 25721062Speter if (_http_fillbuf(io, len) < 1) 25874462Salfred break; 2591902Swollman l = io->buflen - io->bufpos; 26074462Salfred if (len < l) 26121062Speter l = len; 26221062Speter bcopy(io->buf + io->bufpos, buf + pos, l); 26374462Salfred io->bufpos += l; 2641902Swollman } 26521062Speter 2661902Swollman if (!pos && io->error) 26774462Salfred return (-1); 26874462Salfred return (pos); 2691902Swollman} 270298120Spfg 271298120Spfg/* 2721902Swollman * Write function 2731902Swollman */ 2741902Swollmanstatic int 2751902Swollman_http_writefn(void *v, const char *buf, int len) 2761902Swollman{ 2771902Swollman struct httpio *io = (struct httpio *)v; 2781902Swollman 2791902Swollman return (_fetch_write(io->conn, buf, len)); 2801902Swollman} 2811902Swollman 2821902Swollman/* 2831902Swollman * Close function 2841902Swollman */ 2851902Swollmanstatic int 2861902Swollman_http_closefn(void *v) 2871902Swollman{ 2881902Swollman struct httpio *io = (struct httpio *)v; 2891902Swollman int r; 2901902Swollman 2911902Swollman r = _fetch_close(io->conn); 2921902Swollman if (io->buf) 2931902Swollman free(io->buf); 29474462Salfred free(io); 2951902Swollman return (r); 2961902Swollman} 297 298/* 299 * Wrap a file descriptor up 300 */ 301static FILE * 302_http_funopen(conn_t *conn, int chunked) 303{ 304 struct httpio *io; 305 FILE *f; 306 307 if ((io = calloc(1, sizeof(*io))) == NULL) { 308 _fetch_syserr(); 309 return (NULL); 310 } 311 io->conn = conn; 312 io->chunked = chunked; 313 f = funopen(io, _http_readfn, _http_writefn, NULL, _http_closefn); 314 if (f == NULL) { 315 _fetch_syserr(); 316 free(io); 317 return (NULL); 318 } 319 return (f); 320} 321 322 323/***************************************************************************** 324 * Helper functions for talking to the server and parsing its replies 325 */ 326 327/* Header types */ 328typedef enum { 329 hdr_syserror = -2, 330 hdr_error = -1, 331 hdr_end = 0, 332 hdr_unknown = 1, 333 hdr_content_length, 334 hdr_content_range, 335 hdr_last_modified, 336 hdr_location, 337 hdr_transfer_encoding, 338 hdr_www_authenticate 339} hdr_t; 340 341/* Names of interesting headers */ 342static struct { 343 hdr_t num; 344 const char *name; 345} hdr_names[] = { 346 { hdr_content_length, "Content-Length" }, 347 { hdr_content_range, "Content-Range" }, 348 { hdr_last_modified, "Last-Modified" }, 349 { hdr_location, "Location" }, 350 { hdr_transfer_encoding, "Transfer-Encoding" }, 351 { hdr_www_authenticate, "WWW-Authenticate" }, 352 { hdr_unknown, NULL }, 353}; 354 355/* 356 * Send a formatted line; optionally echo to terminal 357 */ 358static int 359_http_cmd(conn_t *conn, const char *fmt, ...) 360{ 361 va_list ap; 362 size_t len; 363 char *msg; 364 int r; 365 366 va_start(ap, fmt); 367 len = vasprintf(&msg, fmt, ap); 368 va_end(ap); 369 370 if (msg == NULL) { 371 errno = ENOMEM; 372 _fetch_syserr(); 373 return (-1); 374 } 375 376 r = _fetch_putln(conn, msg, len); 377 free(msg); 378 379 if (r == -1) { 380 _fetch_syserr(); 381 return (-1); 382 } 383 384 return (0); 385} 386 387/* 388 * Get and parse status line 389 */ 390static int 391_http_get_reply(conn_t *conn) 392{ 393 char *p; 394 395 if (_fetch_getln(conn) == -1) 396 return (-1); 397 /* 398 * A valid status line looks like "HTTP/m.n xyz reason" where m 399 * and n are the major and minor protocol version numbers and xyz 400 * is the reply code. 401 * Unfortunately, there are servers out there (NCSA 1.5.1, to name 402 * just one) that do not send a version number, so we can't rely 403 * on finding one, but if we do, insist on it being 1.0 or 1.1. 404 * We don't care about the reason phrase. 405 */ 406 if (strncmp(conn->buf, "HTTP", 4) != 0) 407 return (HTTP_PROTOCOL_ERROR); 408 p = conn->buf + 4; 409 if (*p == '/') { 410 if (p[1] != '1' || p[2] != '.' || (p[3] != '0' && p[3] != '1')) 411 return (HTTP_PROTOCOL_ERROR); 412 p += 4; 413 } 414 if (*p != ' ' || !isdigit(p[1]) || !isdigit(p[2]) || !isdigit(p[3])) 415 return (HTTP_PROTOCOL_ERROR); 416 417 conn->err = (p[1] - '0') * 100 + (p[2] - '0') * 10 + (p[3] - '0'); 418 return (conn->err); 419} 420 421/* 422 * Check a header; if the type matches the given string, return a pointer 423 * to the beginning of the value. 424 */ 425static const char * 426_http_match(const char *str, const char *hdr) 427{ 428 while (*str && *hdr && tolower(*str++) == tolower(*hdr++)) 429 /* nothing */; 430 if (*str || *hdr != ':') 431 return (NULL); 432 while (*hdr && isspace(*++hdr)) 433 /* nothing */; 434 return (hdr); 435} 436 437/* 438 * Get the next header and return the appropriate symbolic code. 439 */ 440static hdr_t 441_http_next_header(conn_t *conn, const char **p) 442{ 443 int i; 444 445 if (_fetch_getln(conn) == -1) 446 return (hdr_syserror); 447 while (conn->buflen && isspace(conn->buf[conn->buflen - 1])) 448 conn->buflen--; 449 conn->buf[conn->buflen] = '\0'; 450 if (conn->buflen == 0) 451 return (hdr_end); 452 /* 453 * We could check for malformed headers but we don't really care. 454 * A valid header starts with a token immediately followed by a 455 * colon; a token is any sequence of non-control, non-whitespace 456 * characters except "()<>@,;:\\\"{}". 457 */ 458 for (i = 0; hdr_names[i].num != hdr_unknown; i++) 459 if ((*p = _http_match(hdr_names[i].name, conn->buf)) != NULL) 460 return (hdr_names[i].num); 461 return (hdr_unknown); 462} 463 464/* 465 * Parse a last-modified header 466 */ 467static int 468_http_parse_mtime(const char *p, time_t *mtime) 469{ 470 char locale[64], *r; 471 struct tm tm; 472 473 strncpy(locale, setlocale(LC_TIME, NULL), sizeof(locale)); 474 setlocale(LC_TIME, "C"); 475 r = strptime(p, "%a, %d %b %Y %H:%M:%S GMT", &tm); 476 /* XXX should add support for date-2 and date-3 */ 477 setlocale(LC_TIME, locale); 478 if (r == NULL) 479 return (-1); 480 DEBUG(fprintf(stderr, "last modified: [%04d-%02d-%02d " 481 "%02d:%02d:%02d]\n", 482 tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday, 483 tm.tm_hour, tm.tm_min, tm.tm_sec)); 484 *mtime = timegm(&tm); 485 return (0); 486} 487 488/* 489 * Parse a content-length header 490 */ 491static int 492_http_parse_length(const char *p, off_t *length) 493{ 494 off_t len; 495 496 for (len = 0; *p && isdigit(*p); ++p) 497 len = len * 10 + (*p - '0'); 498 if (*p) 499 return (-1); 500 DEBUG(fprintf(stderr, "content length: [%lld]\n", 501 (long long)len)); 502 *length = len; 503 return (0); 504} 505 506/* 507 * Parse a content-range header 508 */ 509static int 510_http_parse_range(const char *p, off_t *offset, off_t *length, off_t *size) 511{ 512 off_t first, last, len; 513 514 if (strncasecmp(p, "bytes ", 6) != 0) 515 return (-1); 516 for (first = 0, p += 6; *p && isdigit(*p); ++p) 517 first = first * 10 + *p - '0'; 518 if (*p != '-') 519 return (-1); 520 for (last = 0, ++p; *p && isdigit(*p); ++p) 521 last = last * 10 + *p - '0'; 522 if (first > last || *p != '/') 523 return (-1); 524 for (len = 0, ++p; *p && isdigit(*p); ++p) 525 len = len * 10 + *p - '0'; 526 if (*p || len < last - first + 1) 527 return (-1); 528 DEBUG(fprintf(stderr, "content range: [%lld-%lld/%lld]\n", 529 (long long)first, (long long)last, (long long)len)); 530 *offset = first; 531 *length = last - first + 1; 532 *size = len; 533 return (0); 534} 535 536 537/***************************************************************************** 538 * Helper functions for authorization 539 */ 540 541/* 542 * Base64 encoding 543 */ 544static char * 545_http_base64(const char *src) 546{ 547 static const char base64[] = 548 "ABCDEFGHIJKLMNOPQRSTUVWXYZ" 549 "abcdefghijklmnopqrstuvwxyz" 550 "0123456789+/"; 551 char *str, *dst; 552 size_t l; 553 int t, r; 554 555 l = strlen(src); 556 if ((str = malloc(((l + 2) / 3) * 4)) == NULL) 557 return (NULL); 558 dst = str; 559 r = 0; 560 561 while (l >= 3) { 562 t = (src[0] << 16) | (src[1] << 8) | src[2]; 563 dst[0] = base64[(t >> 18) & 0x3f]; 564 dst[1] = base64[(t >> 12) & 0x3f]; 565 dst[2] = base64[(t >> 6) & 0x3f]; 566 dst[3] = base64[(t >> 0) & 0x3f]; 567 src += 3; l -= 3; 568 dst += 4; r += 4; 569 } 570 571 switch (l) { 572 case 2: 573 t = (src[0] << 16) | (src[1] << 8); 574 dst[0] = base64[(t >> 18) & 0x3f]; 575 dst[1] = base64[(t >> 12) & 0x3f]; 576 dst[2] = base64[(t >> 6) & 0x3f]; 577 dst[3] = '='; 578 dst += 4; 579 r += 4; 580 break; 581 case 1: 582 t = src[0] << 16; 583 dst[0] = base64[(t >> 18) & 0x3f]; 584 dst[1] = base64[(t >> 12) & 0x3f]; 585 dst[2] = dst[3] = '='; 586 dst += 4; 587 r += 4; 588 break; 589 case 0: 590 break; 591 } 592 593 *dst = 0; 594 return (str); 595} 596 597/* 598 * Encode username and password 599 */ 600static int 601_http_basic_auth(conn_t *conn, const char *hdr, const char *usr, const char *pwd) 602{ 603 char *upw, *auth; 604 int r; 605 606 DEBUG(fprintf(stderr, "usr: [%s]\n", usr)); 607 DEBUG(fprintf(stderr, "pwd: [%s]\n", pwd)); 608 if (asprintf(&upw, "%s:%s", usr, pwd) == -1) 609 return (-1); 610 auth = _http_base64(upw); 611 free(upw); 612 if (auth == NULL) 613 return (-1); 614 r = _http_cmd(conn, "%s: Basic %s", hdr, auth); 615 free(auth); 616 return (r); 617} 618 619/* 620 * Send an authorization header 621 */ 622static int 623_http_authorize(conn_t *conn, const char *hdr, const char *p) 624{ 625 /* basic authorization */ 626 if (strncasecmp(p, "basic:", 6) == 0) { 627 char *user, *pwd, *str; 628 int r; 629 630 /* skip realm */ 631 for (p += 6; *p && *p != ':'; ++p) 632 /* nothing */ ; 633 if (!*p || strchr(++p, ':') == NULL) 634 return (-1); 635 if ((str = strdup(p)) == NULL) 636 return (-1); /* XXX */ 637 user = str; 638 pwd = strchr(str, ':'); 639 *pwd++ = '\0'; 640 r = _http_basic_auth(conn, hdr, user, pwd); 641 free(str); 642 return (r); 643 } 644 return (-1); 645} 646 647 648/***************************************************************************** 649 * Helper functions for connecting to a server or proxy 650 */ 651 652/* 653 * Connect to the correct HTTP server or proxy. 654 */ 655static conn_t * 656_http_connect(struct url *URL, struct url *purl, const char *flags) 657{ 658 conn_t *conn; 659 int verbose; 660 int af; 661 662#ifdef INET6 663 af = AF_UNSPEC; 664#else 665 af = AF_INET; 666#endif 667 668 verbose = CHECK_FLAG('v'); 669 if (CHECK_FLAG('4')) 670 af = AF_INET; 671#ifdef INET6 672 else if (CHECK_FLAG('6')) 673 af = AF_INET6; 674#endif 675 676 if (purl && strcasecmp(URL->scheme, SCHEME_HTTPS) != 0) { 677 URL = purl; 678 } else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0) { 679 /* can't talk http to an ftp server */ 680 /* XXX should set an error code */ 681 return (NULL); 682 } 683 684 if ((conn = _fetch_connect(URL->host, URL->port, af, verbose)) == NULL) 685 /* _fetch_connect() has already set an error code */ 686 return (NULL); 687 if (strcasecmp(URL->scheme, SCHEME_HTTPS) == 0 && 688 _fetch_ssl(conn, verbose) == -1) { 689 _fetch_close(conn); 690 /* grrr */ 691 errno = EAUTH; 692 _fetch_syserr(); 693 return (NULL); 694 } 695 return (conn); 696} 697 698static struct url * 699_http_get_proxy(const char *flags) 700{ 701 struct url *purl; 702 char *p; 703 704 if (flags != NULL && strchr(flags, 'd') != NULL) 705 return (NULL); 706 if (((p = getenv("HTTP_PROXY")) || (p = getenv("http_proxy"))) && 707 (purl = fetchParseURL(p))) { 708 if (!*purl->scheme) 709 strcpy(purl->scheme, SCHEME_HTTP); 710 if (!purl->port) 711 purl->port = _fetch_default_proxy_port(purl->scheme); 712 if (strcasecmp(purl->scheme, SCHEME_HTTP) == 0) 713 return (purl); 714 fetchFreeURL(purl); 715 } 716 return (NULL); 717} 718 719static void 720_http_print_html(FILE *out, FILE *in) 721{ 722 size_t len; 723 char *line, *p, *q; 724 int comment, tag; 725 726 comment = tag = 0; 727 while ((line = fgetln(in, &len)) != NULL) { 728 while (len && isspace(line[len - 1])) 729 --len; 730 for (p = q = line; q < line + len; ++q) { 731 if (comment && *q == '-') { 732 if (q + 2 < line + len && 733 strcmp(q, "-->") == 0) { 734 tag = comment = 0; 735 q += 2; 736 } 737 } else if (tag && !comment && *q == '>') { 738 p = q + 1; 739 tag = 0; 740 } else if (!tag && *q == '<') { 741 if (q > p) 742 fwrite(p, q - p, 1, out); 743 tag = 1; 744 if (q + 3 < line + len && 745 strcmp(q, "<!--") == 0) { 746 comment = 1; 747 q += 3; 748 } 749 } 750 } 751 if (!tag && q > p) 752 fwrite(p, q - p, 1, out); 753 fputc('\n', out); 754 } 755} 756 757 758/***************************************************************************** 759 * Core 760 */ 761 762/* 763 * Send a request and process the reply 764 * 765 * XXX This function is way too long, the do..while loop should be split 766 * XXX off into a separate function. 767 */ 768FILE * 769_http_request(struct url *URL, const char *op, struct url_stat *us, 770 struct url *purl, const char *flags) 771{ 772 conn_t *conn; 773 struct url *url, *new; 774 int chunked, direct, need_auth, noredirect, verbose; 775 int e, i, n; 776 off_t offset, clength, length, size; 777 time_t mtime; 778 const char *p; 779 FILE *f; 780 hdr_t h; 781 char hbuf[MAXHOSTNAMELEN + 7], *host; 782 783 direct = CHECK_FLAG('d'); 784 noredirect = CHECK_FLAG('A'); 785 verbose = CHECK_FLAG('v'); 786 787 if (direct && purl) { 788 fetchFreeURL(purl); 789 purl = NULL; 790 } 791 792 /* try the provided URL first */ 793 url = URL; 794 795 /* if the A flag is set, we only get one try */ 796 n = noredirect ? 1 : MAX_REDIRECT; 797 i = 0; 798 799 e = HTTP_PROTOCOL_ERROR; 800 need_auth = 0; 801 do { 802 new = NULL; 803 chunked = 0; 804 offset = 0; 805 clength = -1; 806 length = -1; 807 size = -1; 808 mtime = 0; 809 810 /* check port */ 811 if (!url->port) 812 url->port = _fetch_default_port(url->scheme); 813 814 /* were we redirected to an FTP URL? */ 815 if (purl == NULL && strcmp(url->scheme, SCHEME_FTP) == 0) { 816 if (strcmp(op, "GET") == 0) 817 return (_ftp_request(url, "RETR", us, purl, flags)); 818 else if (strcmp(op, "HEAD") == 0) 819 return (_ftp_request(url, "STAT", us, purl, flags)); 820 } 821 822 /* connect to server or proxy */ 823 if ((conn = _http_connect(url, purl, flags)) == NULL) 824 goto ouch; 825 826 host = url->host; 827#ifdef INET6 828 if (strchr(url->host, ':')) { 829 snprintf(hbuf, sizeof(hbuf), "[%s]", url->host); 830 host = hbuf; 831 } 832#endif 833 if (url->port != _fetch_default_port(url->scheme)) { 834 if (host != hbuf) { 835 strcpy(hbuf, host); 836 host = hbuf; 837 } 838 snprintf(hbuf + strlen(hbuf), 839 sizeof(hbuf) - strlen(hbuf), ":%d", url->port); 840 } 841 842 /* send request */ 843 if (verbose) 844 _fetch_info("requesting %s://%s%s", 845 url->scheme, host, url->doc); 846 if (purl) { 847 _http_cmd(conn, "%s %s://%s%s HTTP/1.1", 848 op, url->scheme, host, url->doc); 849 } else { 850 _http_cmd(conn, "%s %s HTTP/1.1", 851 op, url->doc); 852 } 853 854 /* virtual host */ 855 _http_cmd(conn, "Host: %s", host); 856 857 /* proxy authorization */ 858 if (purl) { 859 if (*purl->user || *purl->pwd) 860 _http_basic_auth(conn, "Proxy-Authorization", 861 purl->user, purl->pwd); 862 else if ((p = getenv("HTTP_PROXY_AUTH")) != NULL && *p != '\0') 863 _http_authorize(conn, "Proxy-Authorization", p); 864 } 865 866 /* server authorization */ 867 if (need_auth || *url->user || *url->pwd) { 868 if (*url->user || *url->pwd) 869 _http_basic_auth(conn, "Authorization", url->user, url->pwd); 870 else if ((p = getenv("HTTP_AUTH")) != NULL && *p != '\0') 871 _http_authorize(conn, "Authorization", p); 872 else if (fetchAuthMethod && fetchAuthMethod(url) == 0) { 873 _http_basic_auth(conn, "Authorization", url->user, url->pwd); 874 } else { 875 _http_seterr(HTTP_NEED_AUTH); 876 goto ouch; 877 } 878 } 879 880 /* other headers */ 881 if ((p = getenv("HTTP_REFERER")) != NULL && *p != '\0') { 882 if (strcasecmp(p, "auto") == 0) 883 _http_cmd(conn, "Referer: %s://%s%s", 884 url->scheme, host, url->doc); 885 else 886 _http_cmd(conn, "Referer: %s", p); 887 } 888 if ((p = getenv("HTTP_USER_AGENT")) != NULL && *p != '\0') 889 _http_cmd(conn, "User-Agent: %s", p); 890 else 891 _http_cmd(conn, "User-Agent: %s " _LIBFETCH_VER, getprogname()); 892 if (url->offset > 0) 893 _http_cmd(conn, "Range: bytes=%lld-", (long long)url->offset); 894 _http_cmd(conn, "Connection: close"); 895 _http_cmd(conn, ""); 896 897 /* get reply */ 898 switch (_http_get_reply(conn)) { 899 case HTTP_OK: 900 case HTTP_PARTIAL: 901 /* fine */ 902 break; 903 case HTTP_MOVED_PERM: 904 case HTTP_MOVED_TEMP: 905 case HTTP_SEE_OTHER: 906 /* 907 * Not so fine, but we still have to read the 908 * headers to get the new location. 909 */ 910 break; 911 case HTTP_NEED_AUTH: 912 if (need_auth) { 913 /* 914 * We already sent out authorization code, 915 * so there's nothing more we can do. 916 */ 917 _http_seterr(conn->err); 918 goto ouch; 919 } 920 /* try again, but send the password this time */ 921 if (verbose) 922 _fetch_info("server requires authorization"); 923 break; 924 case HTTP_NEED_PROXY_AUTH: 925 /* 926 * If we're talking to a proxy, we already sent 927 * our proxy authorization code, so there's 928 * nothing more we can do. 929 */ 930 _http_seterr(conn->err); 931 goto ouch; 932 case HTTP_PROTOCOL_ERROR: 933 /* fall through */ 934 case -1: 935 _fetch_syserr(); 936 goto ouch; 937 default: 938 _http_seterr(conn->err); 939 if (!verbose) 940 goto ouch; 941 /* fall through so we can get the full error message */ 942 } 943 944 /* get headers */ 945 do { 946 switch ((h = _http_next_header(conn, &p))) { 947 case hdr_syserror: 948 _fetch_syserr(); 949 goto ouch; 950 case hdr_error: 951 _http_seterr(HTTP_PROTOCOL_ERROR); 952 goto ouch; 953 case hdr_content_length: 954 _http_parse_length(p, &clength); 955 break; 956 case hdr_content_range: 957 _http_parse_range(p, &offset, &length, &size); 958 break; 959 case hdr_last_modified: 960 _http_parse_mtime(p, &mtime); 961 break; 962 case hdr_location: 963 if (!HTTP_REDIRECT(conn->err)) 964 break; 965 if (new) 966 free(new); 967 if (verbose) 968 _fetch_info("%d redirect to %s", conn->err, p); 969 if (*p == '/') 970 /* absolute path */ 971 new = fetchMakeURL(url->scheme, url->host, url->port, p, 972 url->user, url->pwd); 973 else 974 new = fetchParseURL(p); 975 if (new == NULL) { 976 /* XXX should set an error code */ 977 DEBUG(fprintf(stderr, "failed to parse new URL\n")); 978 goto ouch; 979 } 980 if (!*new->user && !*new->pwd) { 981 strcpy(new->user, url->user); 982 strcpy(new->pwd, url->pwd); 983 } 984 new->offset = url->offset; 985 new->length = url->length; 986 break; 987 case hdr_transfer_encoding: 988 /* XXX weak test*/ 989 chunked = (strcasecmp(p, "chunked") == 0); 990 break; 991 case hdr_www_authenticate: 992 if (conn->err != HTTP_NEED_AUTH) 993 break; 994 /* if we were smarter, we'd check the method and realm */ 995 break; 996 case hdr_end: 997 /* fall through */ 998 case hdr_unknown: 999 /* ignore */ 1000 break; 1001 } 1002 } while (h > hdr_end); 1003 1004 /* we need to provide authentication */ 1005 if (conn->err == HTTP_NEED_AUTH) { 1006 e = conn->err; 1007 need_auth = 1; 1008 _fetch_close(conn); 1009 conn = NULL; 1010 continue; 1011 } 1012 1013 /* we have a hit or an error */ 1014 if (conn->err == HTTP_OK || conn->err == HTTP_PARTIAL || HTTP_ERROR(conn->err)) 1015 break; 1016 1017 /* all other cases: we got a redirect */ 1018 e = conn->err; 1019 need_auth = 0; 1020 _fetch_close(conn); 1021 conn = NULL; 1022 if (!new) { 1023 DEBUG(fprintf(stderr, "redirect with no new location\n")); 1024 break; 1025 } 1026 if (url != URL) 1027 fetchFreeURL(url); 1028 url = new; 1029 } while (++i < n); 1030 1031 /* we failed, or ran out of retries */ 1032 if (conn == NULL) { 1033 _http_seterr(e); 1034 goto ouch; 1035 } 1036 1037 DEBUG(fprintf(stderr, "offset %lld, length %lld," 1038 " size %lld, clength %lld\n", 1039 (long long)offset, (long long)length, 1040 (long long)size, (long long)clength)); 1041 1042 /* check for inconsistencies */ 1043 if (clength != -1 && length != -1 && clength != length) { 1044 _http_seterr(HTTP_PROTOCOL_ERROR); 1045 goto ouch; 1046 } 1047 if (clength == -1) 1048 clength = length; 1049 if (clength != -1) 1050 length = offset + clength; 1051 if (length != -1 && size != -1 && length != size) { 1052 _http_seterr(HTTP_PROTOCOL_ERROR); 1053 goto ouch; 1054 } 1055 if (size == -1) 1056 size = length; 1057 1058 /* fill in stats */ 1059 if (us) { 1060 us->size = size; 1061 us->atime = us->mtime = mtime; 1062 } 1063 1064 /* too far? */ 1065 if (URL->offset > 0 && offset > URL->offset) { 1066 _http_seterr(HTTP_PROTOCOL_ERROR); 1067 goto ouch; 1068 } 1069 1070 /* report back real offset and size */ 1071 URL->offset = offset; 1072 URL->length = clength; 1073 1074 /* wrap it up in a FILE */ 1075 if ((f = _http_funopen(conn, chunked)) == NULL) { 1076 _fetch_syserr(); 1077 goto ouch; 1078 } 1079 1080 if (url != URL) 1081 fetchFreeURL(url); 1082 if (purl) 1083 fetchFreeURL(purl); 1084 1085 if (HTTP_ERROR(conn->err)) { 1086 _http_print_html(stderr, f); 1087 fclose(f); 1088 f = NULL; 1089 } 1090 1091 return (f); 1092 1093ouch: 1094 if (url != URL) 1095 fetchFreeURL(url); 1096 if (purl) 1097 fetchFreeURL(purl); 1098 if (conn != NULL) 1099 _fetch_close(conn); 1100 return (NULL); 1101} 1102 1103 1104/***************************************************************************** 1105 * Entry points 1106 */ 1107 1108/* 1109 * Retrieve and stat a file by HTTP 1110 */ 1111FILE * 1112fetchXGetHTTP(struct url *URL, struct url_stat *us, const char *flags) 1113{ 1114 return (_http_request(URL, "GET", us, _http_get_proxy(flags), flags)); 1115} 1116 1117/* 1118 * Retrieve a file by HTTP 1119 */ 1120FILE * 1121fetchGetHTTP(struct url *URL, const char *flags) 1122{ 1123 return (fetchXGetHTTP(URL, NULL, flags)); 1124} 1125 1126/* 1127 * Store a file by HTTP 1128 */ 1129FILE * 1130fetchPutHTTP(struct url *URL __unused, const char *flags __unused) 1131{ 1132 warnx("fetchPutHTTP(): not implemented"); 1133 return (NULL); 1134} 1135 1136/* 1137 * Get an HTTP document's metadata 1138 */ 1139int 1140fetchStatHTTP(struct url *URL, struct url_stat *us, const char *flags) 1141{ 1142 FILE *f; 1143 1144 f = _http_request(URL, "HEAD", us, _http_get_proxy(flags), flags); 1145 if (f == NULL) 1146 return (-1); 1147 fclose(f); 1148 return (0); 1149} 1150 1151/* 1152 * List a directory 1153 */ 1154struct url_ent * 1155fetchListHTTP(struct url *url __unused, const char *flags __unused) 1156{ 1157 warnx("fetchListHTTP(): not implemented"); 1158 return (NULL); 1159} 1160