http.c revision 63281
137535Sdes/*- 263012Sdes * Copyright (c) 2000 Dag-Erling Co�dan Sm�rgrav 337535Sdes * All rights reserved. 437535Sdes * 537535Sdes * Redistribution and use in source and binary forms, with or without 637535Sdes * modification, are permitted provided that the following conditions 737535Sdes * are met: 837535Sdes * 1. Redistributions of source code must retain the above copyright 937535Sdes * notice, this list of conditions and the following disclaimer 1037535Sdes * in this position and unchanged. 1137535Sdes * 2. Redistributions in binary form must reproduce the above copyright 1237535Sdes * notice, this list of conditions and the following disclaimer in the 1337535Sdes * documentation and/or other materials provided with the distribution. 1437535Sdes * 3. The name of the author may not be used to endorse or promote products 1563012Sdes * derived from this software without specific prior written permission. 1637535Sdes * 1737535Sdes * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 1837535Sdes * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 1937535Sdes * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 2037535Sdes * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 2137535Sdes * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 2237535Sdes * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 2337535Sdes * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 2437535Sdes * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 2537535Sdes * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 2637535Sdes * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 2737535Sdes * 2863012Sdes * $FreeBSD: head/lib/libfetch/http.c 63281 2000-07-16 23:18:44Z des $ 2937535Sdes */ 3037535Sdes 3163236Sdes/* 3263236Sdes * The following copyright applies to the base64 code: 3363236Sdes * 3463236Sdes *- 3563236Sdes * Copyright 1997 Massachusetts Institute of Technology 3663236Sdes * 3763236Sdes * Permission to use, copy, modify, and distribute this software and 3863236Sdes * its documentation for any purpose and without fee is hereby 3963236Sdes * granted, provided that both the above copyright notice and this 4063236Sdes * permission notice appear in all copies, that both the above 4163236Sdes * copyright notice and this permission notice appear in all 4263236Sdes * supporting documentation, and that the name of M.I.T. not be used 4363236Sdes * in advertising or publicity pertaining to distribution of the 4463236Sdes * software without specific, written prior permission. M.I.T. makes 4563236Sdes * no representations about the suitability of this software for any 4663236Sdes * purpose. It is provided "as is" without express or implied 4763236Sdes * warranty. 4863236Sdes * 4963236Sdes * THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''. M.I.T. DISCLAIMS 5063236Sdes * ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE, 5163236Sdes * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 5263236Sdes * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT 5363236Sdes * SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 5463236Sdes * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 5563236Sdes * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF 5663236Sdes * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 5763236Sdes * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 5863236Sdes * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 5963236Sdes * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 6063236Sdes * SUCH DAMAGE. 6163236Sdes */ 6263236Sdes 6337535Sdes#include <sys/param.h> 6460737Sume#include <sys/socket.h> 6537535Sdes 6663012Sdes#include <ctype.h> 6737535Sdes#include <err.h> 6863012Sdes#include <errno.h> 6960376Sdes#include <locale.h> 7060189Sdes#include <netdb.h> 7137608Sdes#include <stdarg.h> 7237535Sdes#include <stdio.h> 7337535Sdes#include <stdlib.h> 7437535Sdes#include <string.h> 7560376Sdes#include <time.h> 7637535Sdes#include <unistd.h> 7737535Sdes 7837535Sdes#include "fetch.h" 7940939Sdes#include "common.h" 8041862Sdes#include "httperr.h" 8137535Sdes 8263012Sdesextern char *__progname; /* XXX not portable */ 8337535Sdes 8463012Sdes/* Maximum number of redirects to follow */ 8563012Sdes#define MAX_REDIRECT 5 8637535Sdes 8763012Sdes/* Symbolic names for reply codes we care about */ 8863012Sdes#define HTTP_OK 200 8963012Sdes#define HTTP_PARTIAL 206 9063012Sdes#define HTTP_MOVED_PERM 301 9163012Sdes#define HTTP_MOVED_TEMP 302 9263012Sdes#define HTTP_SEE_OTHER 303 9363012Sdes#define HTTP_NEED_AUTH 401 9463012Sdes#define HTTP_NEED_PROXY_AUTH 403 9563012Sdes#define HTTP_PROTOCOL_ERROR 999 9660196Sdes 9763012Sdes#define HTTP_REDIRECT(xyz) ((xyz) == HTTP_MOVED_PERM \ 9863012Sdes || (xyz) == HTTP_MOVED_TEMP \ 9963012Sdes || (xyz) == HTTP_SEE_OTHER) 10063012Sdes 10163012Sdes 10263012Sdes 10363012Sdes/***************************************************************************** 10463012Sdes * I/O functions for decoding chunked streams 10563012Sdes */ 10663012Sdes 10737535Sdesstruct cookie 10837535Sdes{ 10963012Sdes int fd; 11063012Sdes char *buf; 11163012Sdes size_t b_size; 11263012Sdes size_t b_len; 11363012Sdes int b_pos; 11463012Sdes int eof; 11563012Sdes int error; 11663012Sdes long chunksize; 11763281Sdes#ifndef NDEBUG 11863012Sdes long total; 11963012Sdes#endif 12037535Sdes}; 12137535Sdes 12237608Sdes/* 12363012Sdes * Get next chunk header 12437608Sdes */ 12537608Sdesstatic int 12663012Sdes_http_new_chunk(struct cookie *c) 12737608Sdes{ 12863012Sdes char *p; 12937608Sdes 13063012Sdes if (_fetch_getln(c->fd, &c->buf, &c->b_size, &c->b_len) == -1) 13163012Sdes return -1; 13263012Sdes 13363012Sdes if (c->b_len < 2 || !ishexnumber(*c->buf)) 13463012Sdes return -1; 13563012Sdes 13663012Sdes for (p = c->buf; !isspace(*p) && *p != ';' && p < c->buf + c->b_len; ++p) 13763012Sdes if (!ishexnumber(*p)) 13863012Sdes return -1; 13963012Sdes else if (isdigit(*p)) 14063012Sdes c->chunksize = c->chunksize * 16 + *p - '0'; 14163012Sdes else 14263012Sdes c->chunksize = c->chunksize * 16 + 10 + tolower(*p) - 'a'; 14363012Sdes 14463281Sdes#ifndef NDEBUG 14563012Sdes c->total += c->chunksize; 14663012Sdes if (c->chunksize == 0) 14763012Sdes fprintf(stderr, "\033[1m_http_fillbuf(): " 14863012Sdes "end of last chunk\033[m\n"); 14963012Sdes else 15063012Sdes fprintf(stderr, "\033[1m_http_fillbuf(): " 15163012Sdes "new chunk: %ld (%ld)\033[m\n", c->chunksize, c->total); 15263012Sdes#endif 15363012Sdes 15463012Sdes return c->chunksize; 15537608Sdes} 15637608Sdes 15737608Sdes/* 15837608Sdes * Fill the input buffer, do chunk decoding on the fly 15937608Sdes */ 16063012Sdesstatic int 16137535Sdes_http_fillbuf(struct cookie *c) 16237535Sdes{ 16363012Sdes if (c->error) 16463012Sdes return -1; 16537535Sdes if (c->eof) 16663012Sdes return 0; 16763012Sdes 16863012Sdes if (c->chunksize == 0) { 16963012Sdes switch (_http_new_chunk(c)) { 17063012Sdes case -1: 17163012Sdes c->error = 1; 17263012Sdes return -1; 17363012Sdes case 0: 17463012Sdes c->eof = 1; 17563012Sdes return 0; 17637535Sdes } 17737535Sdes } 17863012Sdes 17963012Sdes if (c->b_size < c->chunksize) { 18063012Sdes char *tmp; 18163012Sdes 18263012Sdes if ((tmp = realloc(c->buf, c->chunksize)) == NULL) 18363012Sdes return -1; 18463012Sdes c->buf = tmp; 18563012Sdes c->b_size = c->chunksize; 18663012Sdes } 18763012Sdes 18863012Sdes if ((c->b_len = read(c->fd, c->buf, c->chunksize)) == -1) 18963012Sdes return -1; 19063012Sdes c->chunksize -= c->b_len; 19163012Sdes 19263012Sdes if (c->chunksize == 0) { 19363012Sdes char endl[2]; 19463012Sdes read(c->fd, endl, 2); 19563012Sdes } 19663012Sdes 19763012Sdes c->b_pos = 0; 19863012Sdes 19963012Sdes return c->b_len; 20037535Sdes} 20137535Sdes 20237608Sdes/* 20337608Sdes * Read function 20437608Sdes */ 20537535Sdesstatic int 20663012Sdes_http_readfn(void *v, char *buf, int len) 20737535Sdes{ 20863012Sdes struct cookie *c = (struct cookie *)v; 20963012Sdes int l, pos; 21063012Sdes 21163012Sdes if (c->error) 21263012Sdes return -1; 21363012Sdes if (c->eof) 21463012Sdes return 0; 21563012Sdes 21663012Sdes for (pos = 0; len > 0; pos += l, len -= l) { 21737535Sdes /* empty buffer */ 21863012Sdes if (!c->buf || c->b_pos == c->b_len) 21963012Sdes if (_http_fillbuf(c) < 1) 22037535Sdes break; 22163012Sdes l = c->b_len - c->b_pos; 22263012Sdes if (len < l) 22363012Sdes l = len; 22463012Sdes bcopy(c->buf + c->b_pos, buf + pos, l); 22563012Sdes c->b_pos += l; 22663012Sdes } 22737535Sdes 22863012Sdes if (!pos && c->error) 22937535Sdes return -1; 23063012Sdes return pos; 23137535Sdes} 23237535Sdes 23337608Sdes/* 23437608Sdes * Write function 23537608Sdes */ 23637535Sdesstatic int 23763012Sdes_http_writefn(void *v, const char *buf, int len) 23837535Sdes{ 23963012Sdes struct cookie *c = (struct cookie *)v; 24063012Sdes 24163012Sdes return write(c->fd, buf, len); 24237535Sdes} 24337535Sdes 24437608Sdes/* 24537608Sdes * Close function 24637608Sdes */ 24737535Sdesstatic int 24863012Sdes_http_closefn(void *v) 24937535Sdes{ 25063012Sdes struct cookie *c = (struct cookie *)v; 25163012Sdes int r; 25263012Sdes 25363012Sdes r = close(c->fd); 25463012Sdes if (c->buf) 25563012Sdes free(c->buf); 25637535Sdes free(c); 25763012Sdes return r; 25837535Sdes} 25937535Sdes 26037608Sdes/* 26163012Sdes * Wrap a file descriptor up 26237608Sdes */ 26363012Sdesstatic FILE * 26463012Sdes_http_funopen(int fd) 26537535Sdes{ 26663012Sdes struct cookie *c; 26763012Sdes FILE *f; 26863012Sdes 26963012Sdes if ((c = calloc(1, sizeof *c)) == NULL) { 27063012Sdes _fetch_syserr(); 27163012Sdes return NULL; 27263012Sdes } 27363012Sdes c->fd = fd; 27463012Sdes if (!(f = funopen(c, _http_readfn, _http_writefn, NULL, _http_closefn))) { 27563012Sdes _fetch_syserr(); 27663012Sdes free(c); 27763012Sdes return NULL; 27863012Sdes } 27963012Sdes return f; 28063012Sdes} 28163012Sdes 28263012Sdes 28363012Sdes/***************************************************************************** 28463012Sdes * Helper functions for talking to the server and parsing its replies 28563012Sdes */ 28663012Sdes 28763012Sdes/* Header types */ 28863012Sdestypedef enum { 28963012Sdes hdr_syserror = -2, 29063012Sdes hdr_error = -1, 29163012Sdes hdr_end = 0, 29263012Sdes hdr_unknown = 1, 29363012Sdes hdr_content_length, 29463012Sdes hdr_content_range, 29563012Sdes hdr_last_modified, 29663012Sdes hdr_location, 29763012Sdes hdr_transfer_encoding 29863012Sdes} hdr; 29963012Sdes 30063012Sdes/* Names of interesting headers */ 30163012Sdesstatic struct { 30263012Sdes hdr num; 30363012Sdes char *name; 30463012Sdes} hdr_names[] = { 30563012Sdes { hdr_content_length, "Content-Length" }, 30663012Sdes { hdr_content_range, "Content-Range" }, 30763012Sdes { hdr_last_modified, "Last-Modified" }, 30863012Sdes { hdr_location, "Location" }, 30963012Sdes { hdr_transfer_encoding, "Transfer-Encoding" }, 31063012Sdes { hdr_unknown, NULL }, 31163012Sdes}; 31263012Sdes 31363012Sdesstatic char *reply_buf; 31463012Sdesstatic size_t reply_size; 31563012Sdesstatic size_t reply_length; 31663012Sdes 31763012Sdes/* 31863012Sdes * Send a formatted line; optionally echo to terminal 31963012Sdes */ 32063012Sdesstatic int 32163012Sdes_http_cmd(int fd, char *fmt, ...) 32263012Sdes{ 32363012Sdes va_list ap; 32463012Sdes size_t len; 32563012Sdes char *msg; 32663012Sdes int r; 32763012Sdes 32863012Sdes va_start(ap, fmt); 32963012Sdes len = vasprintf(&msg, fmt, ap); 33063012Sdes va_end(ap); 33163012Sdes 33263012Sdes if (msg == NULL) { 33363012Sdes errno = ENOMEM; 33463012Sdes _fetch_syserr(); 33563012Sdes return -1; 33663012Sdes } 33763012Sdes 33863012Sdes r = _fetch_putln(fd, msg, len); 33963012Sdes free(msg); 34063012Sdes 34163012Sdes if (r == -1) { 34263012Sdes _fetch_syserr(); 34363012Sdes return -1; 34463012Sdes } 34563012Sdes 34663012Sdes return 0; 34763012Sdes} 34863012Sdes 34963012Sdes/* 35063012Sdes * Get and parse status line 35163012Sdes */ 35263012Sdesstatic int 35363012Sdes_http_get_reply(int fd) 35463012Sdes{ 35563012Sdes if (_fetch_getln(fd, &reply_buf, &reply_size, &reply_length) == -1) 35663012Sdes return -1; 35737535Sdes /* 35863012Sdes * A valid status line looks like "HTTP/m.n xyz reason" where m 35963012Sdes * and n are the major and minor protocol version numbers and xyz 36063012Sdes * is the reply code. 36163012Sdes * We grok HTTP 1.0 and 1.1, so m must be 1 and n must be 0 or 1. 36263012Sdes * We don't care about the reason phrase. 36337535Sdes */ 36463012Sdes if (strncmp(reply_buf, "HTTP/1.", 7) != 0 36563012Sdes || (reply_buf[7] != '0' && reply_buf[7] != '1') || reply_buf[8] != ' ' 36663012Sdes || !isdigit(reply_buf[9]) 36763012Sdes || !isdigit(reply_buf[10]) 36863012Sdes || !isdigit(reply_buf[11])) 36963012Sdes return HTTP_PROTOCOL_ERROR; 37063012Sdes 37163012Sdes return ((reply_buf[9] - '0') * 100 37263012Sdes + (reply_buf[10] - '0') * 10 37363012Sdes + (reply_buf[11] - '0')); 37437535Sdes} 37537535Sdes 37637608Sdes/* 37763012Sdes * Check a header; if the type matches the given string, return a 37863012Sdes * pointer to the beginning of the value. 37963012Sdes */ 38063012Sdesstatic char * 38163012Sdes_http_match(char *str, char *hdr) 38263012Sdes{ 38363012Sdes while (*str && *hdr && tolower(*str++) == tolower(*hdr++)) 38463012Sdes /* nothing */; 38563012Sdes if (*str || *hdr != ':') 38663012Sdes return NULL; 38763012Sdes while (*hdr && isspace(*++hdr)) 38863012Sdes /* nothing */; 38963012Sdes return hdr; 39063012Sdes} 39163012Sdes 39263012Sdes/* 39363012Sdes * Get the next header and return the appropriate symbolic code. 39463012Sdes */ 39563012Sdesstatic hdr 39663012Sdes_http_next_header(int fd, char **p) 39763012Sdes{ 39863012Sdes int i; 39963012Sdes 40063012Sdes if (_fetch_getln(fd, &reply_buf, &reply_size, &reply_length) == -1) 40163012Sdes return hdr_syserror; 40263012Sdes while (reply_length && isspace(reply_buf[reply_length-1])) 40363012Sdes reply_length--; 40463012Sdes reply_buf[reply_length] = 0; 40563012Sdes if (reply_length == 0) 40663012Sdes return hdr_end; 40763012Sdes /* 40863012Sdes * We could check for malformed headers but we don't really care. 40963012Sdes * A valid header starts with a token immediately followed by a 41063012Sdes * colon; a token is any sequence of non-control, non-whitespace 41163012Sdes * characters except "()<>@,;:\\\"{}". 41263012Sdes */ 41363012Sdes for (i = 0; hdr_names[i].num != hdr_unknown; i++) 41463012Sdes if ((*p = _http_match(hdr_names[i].name, reply_buf)) != NULL) 41563012Sdes return hdr_names[i].num; 41663012Sdes return hdr_unknown; 41763012Sdes} 41863012Sdes 41963012Sdes/* 42063012Sdes * Parse a last-modified header 42163012Sdes */ 42263012Sdesstatic time_t 42363012Sdes_http_parse_mtime(char *p) 42463012Sdes{ 42563012Sdes char locale[64]; 42663012Sdes struct tm tm; 42763012Sdes 42863012Sdes strncpy(locale, setlocale(LC_TIME, NULL), sizeof locale); 42963012Sdes setlocale(LC_TIME, "C"); 43063012Sdes strptime(p, "%a, %d %b %Y %H:%M:%S GMT", &tm); 43163012Sdes /* XXX should add support for date-2 and date-3 */ 43263012Sdes setlocale(LC_TIME, locale); 43363012Sdes DEBUG(fprintf(stderr, "last modified: [\033[1m%04d-%02d-%02d " 43463012Sdes "%02d:%02d:%02d\033[m]\n", 43563012Sdes tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday, 43663012Sdes tm.tm_hour, tm.tm_min, tm.tm_sec)); 43763012Sdes return timegm(&tm); 43863012Sdes} 43963012Sdes 44063012Sdes/* 44163012Sdes * Parse a content-length header 44263012Sdes */ 44363012Sdesstatic off_t 44463012Sdes_http_parse_length(char *p) 44563012Sdes{ 44663012Sdes off_t len; 44763012Sdes 44863012Sdes for (len = 0; *p && isdigit(*p); ++p) 44963012Sdes len = len * 10 + (*p - '0'); 45063012Sdes DEBUG(fprintf(stderr, "content length: [\033[1m%lld\033[m]\n", len)); 45163012Sdes return len; 45263012Sdes} 45363012Sdes 45463012Sdes/* 45563012Sdes * Parse a content-range header 45663012Sdes */ 45763012Sdesstatic off_t 45863012Sdes_http_parse_range(char *p) 45963012Sdes{ 46063012Sdes off_t off; 46163012Sdes 46263012Sdes if (strncasecmp(p, "bytes ", 6) != 0) 46363012Sdes return -1; 46463012Sdes for (p += 6, off = 0; *p && isdigit(*p); ++p) 46563012Sdes off = off * 10 + *p - '0'; 46663012Sdes if (*p != '-') 46763012Sdes return -1; 46863012Sdes DEBUG(fprintf(stderr, "content range: [\033[1m%lld-\033[m]\n", off)); 46963012Sdes return off; 47063012Sdes} 47163012Sdes 47263012Sdes 47363012Sdes/***************************************************************************** 47463012Sdes * Helper functions for authorization 47563012Sdes */ 47663012Sdes 47763012Sdes/* 47837608Sdes * Base64 encoding 47937608Sdes */ 48062965Sdesstatic char * 48162965Sdes_http_base64(char *src) 48237608Sdes{ 48337608Sdes static const char base64[] = 48437608Sdes "ABCDEFGHIJKLMNOPQRSTUVWXYZ" 48537608Sdes "abcdefghijklmnopqrstuvwxyz" 48637608Sdes "0123456789+/"; 48762965Sdes char *str, *dst; 48862965Sdes size_t l; 48962965Sdes int t, r; 49062965Sdes 49162965Sdes l = strlen(src); 49262965Sdes if ((str = malloc(((l + 2) / 3) * 4)) == NULL) 49362965Sdes return NULL; 49462965Sdes dst = str; 49562965Sdes r = 0; 49637608Sdes 49737608Sdes while (l >= 3) { 49837608Sdes t = (src[0] << 16) | (src[1] << 8) | src[2]; 49937608Sdes dst[0] = base64[(t >> 18) & 0x3f]; 50037608Sdes dst[1] = base64[(t >> 12) & 0x3f]; 50137608Sdes dst[2] = base64[(t >> 6) & 0x3f]; 50237608Sdes dst[3] = base64[(t >> 0) & 0x3f]; 50337608Sdes src += 3; l -= 3; 50437608Sdes dst += 4; r += 4; 50537608Sdes } 50637608Sdes 50737608Sdes switch (l) { 50837608Sdes case 2: 50937608Sdes t = (src[0] << 16) | (src[1] << 8); 51037608Sdes dst[0] = base64[(t >> 18) & 0x3f]; 51137608Sdes dst[1] = base64[(t >> 12) & 0x3f]; 51237608Sdes dst[2] = base64[(t >> 6) & 0x3f]; 51337608Sdes dst[3] = '='; 51437608Sdes dst += 4; 51537608Sdes r += 4; 51637608Sdes break; 51737608Sdes case 1: 51837608Sdes t = src[0] << 16; 51937608Sdes dst[0] = base64[(t >> 18) & 0x3f]; 52037608Sdes dst[1] = base64[(t >> 12) & 0x3f]; 52137608Sdes dst[2] = dst[3] = '='; 52237608Sdes dst += 4; 52337608Sdes r += 4; 52437608Sdes break; 52537608Sdes case 0: 52637608Sdes break; 52737608Sdes } 52837608Sdes 52937608Sdes *dst = 0; 53062965Sdes return str; 53137608Sdes} 53237608Sdes 53337608Sdes/* 53437608Sdes * Encode username and password 53537608Sdes */ 53662965Sdesstatic int 53763012Sdes_http_basic_auth(int fd, char *hdr, char *usr, char *pwd) 53837608Sdes{ 53962965Sdes char *upw, *auth; 54062965Sdes int r; 54137608Sdes 54262965Sdes if (asprintf(&upw, "%s:%s", usr, pwd) == -1) 54362965Sdes return -1; 54462965Sdes auth = _http_base64(upw); 54562965Sdes free(upw); 54662965Sdes if (auth == NULL) 54762965Sdes return -1; 54863012Sdes r = _http_cmd(fd, "%s: Basic %s", hdr, auth); 54962965Sdes free(auth); 55062965Sdes return r; 55162965Sdes} 55262965Sdes 55362965Sdes/* 55462965Sdes * Send an authorization header 55562965Sdes */ 55662965Sdesstatic int 55763012Sdes_http_authorize(int fd, char *hdr, char *p) 55862965Sdes{ 55962965Sdes /* basic authorization */ 56062965Sdes if (strncasecmp(p, "basic:", 6) == 0) { 56162965Sdes char *user, *pwd, *str; 56262965Sdes int r; 56362965Sdes 56462965Sdes /* skip realm */ 56562965Sdes for (p += 6; *p && *p != ':'; ++p) 56662965Sdes /* nothing */ ; 56762965Sdes if (!*p || strchr(++p, ':') == NULL) 56862965Sdes return -1; 56962965Sdes if ((str = strdup(p)) == NULL) 57062965Sdes return -1; /* XXX */ 57162965Sdes user = str; 57262965Sdes pwd = strchr(str, ':'); 57362965Sdes *pwd++ = '\0'; 57463012Sdes r = _http_basic_auth(fd, hdr, user, pwd); 57562965Sdes free(str); 57662965Sdes return r; 57762811Sdes } 57862965Sdes return -1; 57937608Sdes} 58037608Sdes 58163012Sdes 58263012Sdes/***************************************************************************** 58363012Sdes * Helper functions for connecting to a server or proxy 58463012Sdes */ 58563012Sdes 58637608Sdes/* 58763012Sdes * Connect to the specified HTTP proxy server. 58837608Sdes */ 58963012Sdesstatic int 59063012Sdes_http_proxy_connect(char *proxy, int af, int verbose) 59137535Sdes{ 59263012Sdes char *hostname, *p; 59363012Sdes int fd, port; 59463012Sdes 59563012Sdes /* get hostname */ 59663012Sdes hostname = NULL; 59760737Sume#ifdef INET6 59863012Sdes /* host part can be an IPv6 address enclosed in square brackets */ 59963012Sdes if (*proxy == '[') { 60063012Sdes if ((p = strchr(proxy, ']')) == NULL) { 60163012Sdes /* no terminating bracket */ 60263012Sdes /* XXX should set an error code */ 60363012Sdes goto ouch; 60463012Sdes } 60563012Sdes if (p[1] != '\0' && p[1] != ':') { 60663012Sdes /* garbage after address */ 60763012Sdes /* XXX should set an error code */ 60863012Sdes goto ouch; 60963012Sdes } 61063012Sdes if ((hostname = malloc(p - proxy)) == NULL) { 61163012Sdes errno = ENOMEM; 61263012Sdes _fetch_syserr(); 61363012Sdes goto ouch; 61463012Sdes } 61563012Sdes strncpy(hostname, proxy + 1, p - proxy - 1); 61663012Sdes hostname[p - proxy - 1] = '\0'; 61763012Sdes ++p; 61863012Sdes } else { 61963012Sdes#endif /* INET6 */ 62063012Sdes if ((p = strchr(proxy, ':')) == NULL) 62163012Sdes p = strchr(proxy, '\0'); 62263012Sdes if ((hostname = malloc(p - proxy + 1)) == NULL) { 62363012Sdes errno = ENOMEM; 62463012Sdes _fetch_syserr(); 62563012Sdes goto ouch; 62663012Sdes } 62763012Sdes strncpy(hostname, proxy, p - proxy); 62863012Sdes hostname[p - proxy] = '\0'; 62963012Sdes#ifdef INET6 63063012Sdes } 63163012Sdes#endif /* INET6 */ 63263012Sdes DEBUG(fprintf(stderr, "proxy name: [%s]\n", hostname)); 63363012Sdes 63463012Sdes /* get port number */ 63563012Sdes port = 0; 63663012Sdes if (*p == ':') { 63763012Sdes ++p; 63863012Sdes if (strspn(p, "0123456789") != strlen(p) || strlen(p) > 5) { 63963012Sdes /* port number is non-numeric or too long */ 64063012Sdes /* XXX should set an error code */ 64163012Sdes goto ouch; 64263012Sdes } 64363012Sdes port = atoi(p); 64463012Sdes if (port < 1 || port > 65535) { 64563012Sdes /* port number is out of range */ 64663012Sdes /* XXX should set an error code */ 64763012Sdes goto ouch; 64863012Sdes } 64963012Sdes } 65063012Sdes 65163012Sdes if (!port) { 65263012Sdes#if 0 65363012Sdes /* 65463012Sdes * commented out, since there is currently no service name 65563012Sdes * for HTTP proxies 65663012Sdes */ 65763012Sdes struct servent *se; 65863012Sdes 65963012Sdes if ((se = getservbyname("xxxx", "tcp")) != NULL) 66063012Sdes port = ntohs(se->s_port); 66163012Sdes else 66263012Sdes#endif 66363012Sdes port = 3128; 66463012Sdes } 66563012Sdes DEBUG(fprintf(stderr, "proxy port: %d\n", port)); 66663012Sdes 66763012Sdes /* connect */ 66863012Sdes if ((fd = _fetch_connect(hostname, port, af, verbose)) == -1) 66963012Sdes _fetch_syserr(); 67063012Sdes return fd; 67163012Sdes 67263012Sdes ouch: 67363012Sdes if (hostname) 67463012Sdes free(hostname); 67563012Sdes return -1; 67663012Sdes} 67763012Sdes 67863012Sdes/* 67963012Sdes * Connect to the correct HTTP server or proxy. 68063012Sdes */ 68163012Sdesstatic int 68263012Sdes_http_connect(struct url *URL, int *proxy, char *flags) 68363012Sdes{ 68463012Sdes int direct, verbose; 68563012Sdes int af, fd; 68663012Sdes char *p; 68763012Sdes 68863012Sdes#ifdef INET6 68963012Sdes af = AF_UNSPEC; 69060737Sume#else 69163012Sdes af = AF_INET; 69260737Sume#endif 69363012Sdes 69455544Sdes direct = (flags && strchr(flags, 'd')); 69555544Sdes verbose = (flags && strchr(flags, 'v')); 69663012Sdes if (flags && strchr(flags, '4')) 69760737Sume af = AF_INET; 69863012Sdes else if (flags && strchr(flags, '6')) 69960737Sume af = AF_INET6; 70041862Sdes 70137535Sdes /* check port */ 70260189Sdes if (!URL->port) { 70360189Sdes struct servent *se; 70460189Sdes 70563012Sdes /* Scheme can be ftp if we're using a proxy */ 70660587Sume if (strcasecmp(URL->scheme, "ftp") == 0) 70760587Sume if ((se = getservbyname("ftp", "tcp")) != NULL) 70860587Sume URL->port = ntohs(se->s_port); 70960587Sume else 71060587Sume URL->port = 21; 71160189Sdes else 71260587Sume if ((se = getservbyname("http", "tcp")) != NULL) 71360587Sume URL->port = ntohs(se->s_port); 71460587Sume else 71560587Sume URL->port = 80; 71660189Sdes } 71737535Sdes 71863012Sdes if (!direct && (p = getenv("HTTP_PROXY")) != NULL) { 71963012Sdes /* attempt to connect to proxy server */ 72063012Sdes if ((fd = _http_proxy_connect(p, af, verbose)) == -1) 72163012Sdes return -1; 72263012Sdes *proxy = 1; 72363012Sdes } else { 72463012Sdes /* if no proxy is configured, try direct */ 72563012Sdes if (strcasecmp(URL->scheme, "ftp") == 0) { 72663012Sdes /* can't talk http to an ftp server */ 72763012Sdes /* XXX should set an error code */ 72863012Sdes return -1; 72960189Sdes } 73063012Sdes if ((fd = _fetch_connect(URL->host, URL->port, af, verbose)) == -1) 73163012Sdes /* _fetch_connect() has already set an error code */ 73263012Sdes return -1; 73363012Sdes *proxy = 0; 73437535Sdes } 73537535Sdes 73663012Sdes return fd; 73760376Sdes} 73860376Sdes 73963012Sdes 74063012Sdes/***************************************************************************** 74163012Sdes * Core 74260954Sdes */ 74360954Sdes 74460954Sdes/* 74563012Sdes * Send a request and process the reply 74660376Sdes */ 74763012Sdesstatic FILE * 74863012Sdes_http_request(struct url *URL, char *op, struct url_stat *us, char *flags) 74960376Sdes{ 75063012Sdes struct url *url, *new; 75163012Sdes int chunked, need_auth, noredirect, proxy, verbose; 75263012Sdes int code, fd, i, n; 75363012Sdes off_t offset; 75463012Sdes char *p; 75563012Sdes FILE *f; 75663012Sdes hdr h; 75760737Sume char *host; 75860737Sume#ifdef INET6 75960737Sume char hbuf[MAXHOSTNAMELEN + 1]; 76060737Sume#endif 76163012Sdes 76263012Sdes noredirect = (flags && strchr(flags, 'A')); 76360376Sdes verbose = (flags && strchr(flags, 'v')); 76460737Sume 76563012Sdes n = noredirect ? 1 : MAX_REDIRECT; 76663012Sdes 76763069Sdes /* just to appease compiler warnings */ 76863069Sdes code = HTTP_PROTOCOL_ERROR; 76963012Sdes chunked = 0; 77063012Sdes offset = 0; 77163012Sdes fd = -1; 77263012Sdes 77363012Sdes for (url = URL, i = 0; i < n; ++i) { 77463069Sdes new = NULL; 77563069Sdes us->size = -1; 77663069Sdes us->atime = us->mtime = 0; 77763069Sdes chunked = 0; 77863012Sdes need_auth = 0; 77963069Sdes offset = 0; 78063069Sdes fd = -1; 78163012Sdes retry: 78263012Sdes /* connect to server or proxy */ 78363012Sdes if ((fd = _http_connect(url, &proxy, flags)) == -1) 78463012Sdes goto ouch; 78563012Sdes 78663012Sdes host = url->host; 78760737Sume#ifdef INET6 78863012Sdes if (strchr(url->host, ':')) { 78963012Sdes snprintf(hbuf, sizeof(hbuf), "[%s]", url->host); 79063012Sdes host = hbuf; 79163012Sdes } 79260737Sume#endif 79337535Sdes 79463012Sdes /* send request */ 79563012Sdes if (verbose) 79663012Sdes _fetch_info("requesting %s://%s:%d%s", 79763012Sdes url->scheme, host, url->port, url->doc); 79863012Sdes if (proxy) { 79963012Sdes _http_cmd(fd, "%s %s://%s:%d%s HTTP/1.1", 80063012Sdes op, url->scheme, host, url->port, url->doc); 80163012Sdes } else { 80263012Sdes _http_cmd(fd, "%s %s HTTP/1.1", 80363012Sdes op, url->doc); 80463012Sdes } 80537535Sdes 80663012Sdes /* proxy authorization */ 80763012Sdes if (proxy && (p = getenv("HTTP_PROXY_AUTH")) != NULL) 80863012Sdes _http_authorize(fd, "Proxy-Authorization", p); 80963012Sdes 81063012Sdes /* server authorization */ 81163012Sdes if (need_auth) { 81263012Sdes if (*url->user || *url->pwd) 81363012Sdes _http_basic_auth(fd, "Authorization", 81463012Sdes url->user ? url->user : "", 81563012Sdes url->pwd ? url->pwd : ""); 81663012Sdes else if ((p = getenv("HTTP_AUTH")) != NULL) 81763012Sdes _http_authorize(fd, "Authorization", p); 81863012Sdes else { 81963012Sdes _http_seterr(HTTP_NEED_AUTH); 82063012Sdes goto ouch; 82163012Sdes } 82263012Sdes } 82360376Sdes 82463012Sdes /* other headers */ 82563012Sdes _http_cmd(fd, "Host: %s:%d", host, url->port); 82663012Sdes _http_cmd(fd, "User-Agent: %s " _LIBFETCH_VER, __progname); 82763012Sdes if (URL->offset) 82863012Sdes _http_cmd(fd, "Range: bytes=%lld-", url->offset); 82963012Sdes _http_cmd(fd, "Connection: close"); 83063012Sdes _http_cmd(fd, ""); 83160376Sdes 83263012Sdes /* get reply */ 83363012Sdes switch ((code = _http_get_reply(fd))) { 83463012Sdes case HTTP_OK: 83563012Sdes case HTTP_PARTIAL: 83663012Sdes /* fine */ 83763012Sdes break; 83863012Sdes case HTTP_MOVED_PERM: 83963012Sdes case HTTP_MOVED_TEMP: 84063012Sdes /* 84163012Sdes * Not so fine, but we still have to read the headers to 84263012Sdes * get the new location. 84363012Sdes */ 84463012Sdes break; 84563012Sdes case HTTP_NEED_AUTH: 84663012Sdes if (need_auth) { 84763012Sdes /* 84863012Sdes * We already sent out authorization code, so there's 84963012Sdes * nothing more we can do. 85063012Sdes */ 85163012Sdes _http_seterr(code); 85263012Sdes goto ouch; 85363012Sdes } 85463012Sdes /* try again, but send the password this time */ 85563012Sdes if (verbose) 85663012Sdes _fetch_info("server requires authorization"); 85763012Sdes need_auth = 1; 85863012Sdes close(fd); 85963012Sdes goto retry; 86063012Sdes case HTTP_NEED_PROXY_AUTH: 86163012Sdes /* 86263012Sdes * If we're talking to a proxy, we already sent our proxy 86363012Sdes * authorization code, so there's nothing more we can do. 86463012Sdes */ 86563012Sdes _http_seterr(code); 86663012Sdes goto ouch; 86763012Sdes case HTTP_PROTOCOL_ERROR: 86863012Sdes /* fall through */ 86963012Sdes case -1: 87063012Sdes _fetch_syserr(); 87163012Sdes goto ouch; 87263012Sdes default: 87363012Sdes _http_seterr(code); 87463012Sdes goto ouch; 87563012Sdes } 87663012Sdes 87763012Sdes /* get headers */ 87863012Sdes do { 87963012Sdes switch ((h = _http_next_header(fd, &p))) { 88063012Sdes case hdr_syserror: 88163012Sdes _fetch_syserr(); 88263012Sdes goto ouch; 88363012Sdes case hdr_error: 88463012Sdes _http_seterr(HTTP_PROTOCOL_ERROR); 88563012Sdes goto ouch; 88663012Sdes case hdr_content_length: 88763012Sdes us->size = _http_parse_length(p); 88863012Sdes break; 88963012Sdes case hdr_content_range: 89063012Sdes offset = _http_parse_range(p); 89163012Sdes break; 89263012Sdes case hdr_last_modified: 89363012Sdes us->atime = us->mtime = _http_parse_mtime(p); 89463012Sdes break; 89563012Sdes case hdr_location: 89663012Sdes if (!HTTP_REDIRECT(code)) 89763012Sdes break; 89863069Sdes if (new) 89963069Sdes free(new); 90063012Sdes if (verbose) 90163012Sdes _fetch_info("%d redirect to %s", code, p); 90263069Sdes if (*p == '/') 90363069Sdes /* absolute path */ 90463069Sdes new = fetchMakeURL(url->scheme, url->host, url->port, p, 90563069Sdes url->user, url->pwd); 90663069Sdes else 90763069Sdes new = fetchParseURL(p); 90863069Sdes if (new == NULL) { 90963069Sdes /* XXX should set an error code */ 91063069Sdes DEBUG(fprintf(stderr, "failed to parse new URL\n")); 91163012Sdes goto ouch; 91263069Sdes } 91363012Sdes if (!*new->user && !*new->pwd) { 91463012Sdes strcpy(new->user, url->user); 91563012Sdes strcpy(new->pwd, url->pwd); 91663012Sdes } 91763012Sdes new->offset = url->offset; 91863012Sdes new->length = url->length; 91963069Sdes break; 92063012Sdes case hdr_transfer_encoding: 92163012Sdes /* XXX weak test*/ 92263012Sdes chunked = (strcasecmp(p, "chunked") == 0); 92363012Sdes break; 92463012Sdes case hdr_end: 92563012Sdes /* fall through */ 92663012Sdes case hdr_unknown: 92763012Sdes /* ignore */ 92863012Sdes break; 92963012Sdes } 93063012Sdes } while (h > hdr_end); 93160376Sdes 93263069Sdes /* we either have a hit, or a redirect with no Location: header */ 93363069Sdes if (code == HTTP_OK || code == HTTP_PARTIAL || !new) 93463012Sdes break; 93563069Sdes 93663069Sdes /* we have a redirect */ 93763069Sdes close(fd); 93863069Sdes if (url != URL) 93963069Sdes fetchFreeURL(url); 94063069Sdes url = new; 94160376Sdes } 94260376Sdes 94363012Sdes /* no success */ 94463012Sdes if (fd == -1) { 94563012Sdes _http_seterr(code); 94663012Sdes goto ouch; 94737571Sdes } 94837535Sdes 94963012Sdes /* wrap it up in a FILE */ 95063012Sdes if ((f = chunked ? _http_funopen(fd) : fdopen(fd, "r")) == NULL) { 95163012Sdes _fetch_syserr(); 95263012Sdes goto ouch; 95363012Sdes } 95463012Sdes 95563012Sdes while (offset++ < url->offset) 95663012Sdes if (fgetc(f) == EOF) { 95763012Sdes _fetch_syserr(); 95860954Sdes fclose(f); 95963012Sdes f = NULL; 96037535Sdes } 96163012Sdes 96263012Sdes if (url != URL) 96363012Sdes fetchFreeURL(url); 96463012Sdes 96563012Sdes return f; 96637535Sdes 96763012Sdes ouch: 96863012Sdes if (url != URL) 96963012Sdes fetchFreeURL(url); 97063012Sdes if (fd != -1) 97163012Sdes close(fd); 97263012Sdes return NULL; 97363012Sdes} 97460189Sdes 97563012Sdes 97663012Sdes/***************************************************************************** 97763012Sdes * Entry points 97863012Sdes */ 97963012Sdes 98063012Sdes/* 98163012Sdes * Retrieve a file by HTTP 98263012Sdes */ 98363012SdesFILE * 98463012SdesfetchGetHTTP(struct url *URL, char *flags) 98563012Sdes{ 98663012Sdes struct url_stat us; 98737535Sdes 98863012Sdes return _http_request(URL, "GET", &us, flags); 98937535Sdes} 99037535Sdes 99137535SdesFILE * 99240975SdesfetchPutHTTP(struct url *URL, char *flags) 99337535Sdes{ 99437535Sdes warnx("fetchPutHTTP(): not implemented"); 99537535Sdes return NULL; 99637535Sdes} 99740975Sdes 99840975Sdes/* 99940975Sdes * Get an HTTP document's metadata 100040975Sdes */ 100140975Sdesint 100260376SdesfetchStatHTTP(struct url *URL, struct url_stat *us, char *flags) 100340975Sdes{ 100460376Sdes FILE *f; 100560376Sdes 100663012Sdes if ((f = _http_request(URL, "HEAD", us, flags)) == NULL) 100760376Sdes return -1; 100860581Sdes fclose(f); 100960376Sdes return 0; 101040975Sdes} 101141989Sdes 101241989Sdes/* 101341989Sdes * List a directory 101441989Sdes */ 101541989Sdesstruct url_ent * 101641989SdesfetchListHTTP(struct url *url, char *flags) 101741989Sdes{ 101841989Sdes warnx("fetchListHTTP(): not implemented"); 101941989Sdes return NULL; 102041989Sdes} 1021