http.c revision 60189
137535Sdes/*- 237535Sdes * Copyright (c) 1998 Dag-Erling Co�dan Sm�rgrav 337535Sdes * All rights reserved. 437535Sdes * 537535Sdes * Redistribution and use in source and binary forms, with or without 637535Sdes * modification, are permitted provided that the following conditions 737535Sdes * are met: 837535Sdes * 1. Redistributions of source code must retain the above copyright 937535Sdes * notice, this list of conditions and the following disclaimer 1037535Sdes * in this position and unchanged. 1137535Sdes * 2. Redistributions in binary form must reproduce the above copyright 1237535Sdes * notice, this list of conditions and the following disclaimer in the 1337535Sdes * documentation and/or other materials provided with the distribution. 1437535Sdes * 3. The name of the author may not be used to endorse or promote products 1537535Sdes * derived from this software without specific prior written permission 1637535Sdes * 1737535Sdes * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 1837535Sdes * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 1937535Sdes * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 2037535Sdes * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 2137535Sdes * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 2237535Sdes * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 2337535Sdes * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 2437535Sdes * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 2537535Sdes * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 2637535Sdes * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 2737535Sdes * 2850476Speter * $FreeBSD: head/lib/libfetch/http.c 60189 2000-05-07 20:01:55Z des $ 2937535Sdes */ 3037535Sdes 3137608Sdes/* 3237608Sdes * The base64 code in this file is based on code from MIT fetch, which 3337608Sdes * has the following copyright and license: 3437608Sdes * 3537608Sdes *- 3637608Sdes * Copyright 1997 Massachusetts Institute of Technology 3737608Sdes * 3837608Sdes * Permission to use, copy, modify, and distribute this software and 3937608Sdes * its documentation for any purpose and without fee is hereby 4037608Sdes * granted, provided that both the above copyright notice and this 4137608Sdes * permission notice appear in all copies, that both the above 4237608Sdes * copyright notice and this permission notice appear in all 4337608Sdes * supporting documentation, and that the name of M.I.T. not be used 4437608Sdes * in advertising or publicity pertaining to distribution of the 4560189Sdes * software without specific, written prior permission. M.I.T. makes 4637608Sdes * no representations about the suitability of this software for any 4737608Sdes * purpose. It is provided "as is" without express or implied 4837608Sdes * warranty. 4937608Sdes * 5037608Sdes * THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''. M.I.T. DISCLAIMS 5137608Sdes * ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE, 5237608Sdes * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 5337608Sdes * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT 5437608Sdes * SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 5537608Sdes * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 5637608Sdes * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF 5737608Sdes * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 5837608Sdes * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 5937608Sdes * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 6037608Sdes * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 6137608Sdes * SUCH DAMAGE. */ 6237608Sdes 6337535Sdes#include <sys/param.h> 6437535Sdes 6537535Sdes#include <err.h> 6637535Sdes#include <ctype.h> 6760189Sdes#include <netdb.h> 6837608Sdes#include <stdarg.h> 6937535Sdes#include <stdio.h> 7037535Sdes#include <stdlib.h> 7137535Sdes#include <string.h> 7237535Sdes#include <unistd.h> 7337535Sdes 7437535Sdes#include "fetch.h" 7540939Sdes#include "common.h" 7641862Sdes#include "httperr.h" 7737535Sdes 7837535Sdesextern char *__progname; 7937535Sdes 8037535Sdes#define ENDL "\r\n" 8137535Sdes 8237535Sdesstruct cookie 8337535Sdes{ 8437535Sdes FILE *real_f; 8537535Sdes#define ENC_NONE 0 8637535Sdes#define ENC_CHUNKED 1 8737535Sdes int encoding; /* 1 = chunked, 0 = none */ 8837535Sdes#define HTTPCTYPELEN 59 8937535Sdes char content_type[HTTPCTYPELEN+1]; 9037535Sdes char *buf; 9137535Sdes int b_cur, eof; 9237535Sdes unsigned b_len, chunksize; 9337535Sdes}; 9437535Sdes 9537608Sdes/* 9637608Sdes * Send a formatted line; optionally echo to terminal 9737608Sdes */ 9837608Sdesstatic int 9937608Sdes_http_cmd(FILE *f, char *fmt, ...) 10037608Sdes{ 10137608Sdes va_list ap; 10237608Sdes 10337608Sdes va_start(ap, fmt); 10437608Sdes vfprintf(f, fmt, ap); 10537608Sdes#ifndef NDEBUG 10637608Sdes fprintf(stderr, "\033[1m>>> "); 10737608Sdes vfprintf(stderr, fmt, ap); 10837608Sdes fprintf(stderr, "\033[m"); 10937608Sdes#endif 11037608Sdes va_end(ap); 11137608Sdes 11237608Sdes return 0; /* XXX */ 11337608Sdes} 11437608Sdes 11537608Sdes/* 11637608Sdes * Fill the input buffer, do chunk decoding on the fly 11737608Sdes */ 11837535Sdesstatic char * 11937535Sdes_http_fillbuf(struct cookie *c) 12037535Sdes{ 12137535Sdes char *ln; 12237535Sdes unsigned int len; 12337535Sdes 12437535Sdes if (c->eof) 12537535Sdes return NULL; 12637535Sdes 12737535Sdes if (c->encoding == ENC_NONE) { 12837535Sdes c->buf = fgetln(c->real_f, &(c->b_len)); 12937535Sdes c->b_cur = 0; 13037535Sdes } else if (c->encoding == ENC_CHUNKED) { 13137535Sdes if (c->chunksize == 0) { 13237535Sdes ln = fgetln(c->real_f, &len); 13337535Sdes DEBUG(fprintf(stderr, "\033[1m_http_fillbuf(): new chunk: " 13437535Sdes "%*.*s\033[m\n", (int)len-2, (int)len-2, ln)); 13537535Sdes sscanf(ln, "%x", &(c->chunksize)); 13637535Sdes if (!c->chunksize) { 13737535Sdes DEBUG(fprintf(stderr, "\033[1m_http_fillbuf(): " 13837535Sdes "end of last chunk\033[m\n")); 13937535Sdes c->eof = 1; 14037535Sdes return NULL; 14137535Sdes } 14237535Sdes DEBUG(fprintf(stderr, "\033[1m_http_fillbuf(): " 14337535Sdes "new chunk: %X\033[m\n", c->chunksize)); 14437535Sdes } 14537535Sdes c->buf = fgetln(c->real_f, &(c->b_len)); 14637535Sdes if (c->b_len > c->chunksize) 14737535Sdes c->b_len = c->chunksize; 14837535Sdes c->chunksize -= c->b_len; 14937535Sdes c->b_cur = 0; 15037535Sdes } 15137535Sdes else return NULL; /* unknown encoding */ 15237535Sdes return c->buf; 15337535Sdes} 15437535Sdes 15537608Sdes/* 15637608Sdes * Read function 15737608Sdes */ 15837535Sdesstatic int 15937535Sdes_http_readfn(struct cookie *c, char *buf, int len) 16037535Sdes{ 16137535Sdes int l, pos = 0; 16237535Sdes while (len) { 16337535Sdes /* empty buffer */ 16437535Sdes if (!c->buf || (c->b_cur == c->b_len)) 16537535Sdes if (!_http_fillbuf(c)) 16637535Sdes break; 16737535Sdes 16837535Sdes l = c->b_len - c->b_cur; 16937535Sdes if (len < l) l = len; 17037535Sdes memcpy(buf + pos, c->buf + c->b_cur, l); 17137535Sdes c->b_cur += l; 17237535Sdes pos += l; 17337535Sdes len -= l; 17437535Sdes } 17537535Sdes 17637535Sdes if (ferror(c->real_f)) 17737535Sdes return -1; 17837535Sdes else return pos; 17937535Sdes} 18037535Sdes 18137608Sdes/* 18237608Sdes * Write function 18337608Sdes */ 18437535Sdesstatic int 18537535Sdes_http_writefn(struct cookie *c, const char *buf, int len) 18637535Sdes{ 18737535Sdes size_t r = fwrite(buf, 1, (size_t)len, c->real_f); 18837535Sdes return r ? r : -1; 18937535Sdes} 19037535Sdes 19137608Sdes/* 19237608Sdes * Close function 19337608Sdes */ 19437535Sdesstatic int 19537535Sdes_http_closefn(struct cookie *c) 19637535Sdes{ 19737535Sdes int r = fclose(c->real_f); 19837535Sdes free(c); 19937535Sdes return (r == EOF) ? -1 : 0; 20037535Sdes} 20137535Sdes 20237608Sdes/* 20337608Sdes * Extract content type from cookie 20437608Sdes */ 20537535Sdeschar * 20637535SdesfetchContentType(FILE *f) 20737535Sdes{ 20837535Sdes /* 20937535Sdes * We have no way of making sure this really *is* one of our cookies, 21037535Sdes * so just check for a null pointer and hope for the best. 21137535Sdes */ 21237535Sdes return f->_cookie ? (((struct cookie *)f->_cookie)->content_type) : NULL; 21337535Sdes} 21437535Sdes 21537608Sdes/* 21637608Sdes * Base64 encoding 21737608Sdes */ 21837608Sdesint 21937608Sdes_http_base64(char *dst, char *src, int l) 22037608Sdes{ 22137608Sdes static const char base64[] = 22237608Sdes "ABCDEFGHIJKLMNOPQRSTUVWXYZ" 22337608Sdes "abcdefghijklmnopqrstuvwxyz" 22437608Sdes "0123456789+/"; 22537608Sdes int t, r = 0; 22637608Sdes 22737608Sdes while (l >= 3) { 22837608Sdes t = (src[0] << 16) | (src[1] << 8) | src[2]; 22937608Sdes dst[0] = base64[(t >> 18) & 0x3f]; 23037608Sdes dst[1] = base64[(t >> 12) & 0x3f]; 23137608Sdes dst[2] = base64[(t >> 6) & 0x3f]; 23237608Sdes dst[3] = base64[(t >> 0) & 0x3f]; 23337608Sdes src += 3; l -= 3; 23437608Sdes dst += 4; r += 4; 23537608Sdes } 23637608Sdes 23737608Sdes switch (l) { 23837608Sdes case 2: 23937608Sdes t = (src[0] << 16) | (src[1] << 8); 24037608Sdes dst[0] = base64[(t >> 18) & 0x3f]; 24137608Sdes dst[1] = base64[(t >> 12) & 0x3f]; 24237608Sdes dst[2] = base64[(t >> 6) & 0x3f]; 24337608Sdes dst[3] = '='; 24437608Sdes dst += 4; 24537608Sdes r += 4; 24637608Sdes break; 24737608Sdes case 1: 24837608Sdes t = src[0] << 16; 24937608Sdes dst[0] = base64[(t >> 18) & 0x3f]; 25037608Sdes dst[1] = base64[(t >> 12) & 0x3f]; 25137608Sdes dst[2] = dst[3] = '='; 25237608Sdes dst += 4; 25337608Sdes r += 4; 25437608Sdes break; 25537608Sdes case 0: 25637608Sdes break; 25737608Sdes } 25837608Sdes 25937608Sdes *dst = 0; 26037608Sdes return r; 26137608Sdes} 26237608Sdes 26337608Sdes/* 26437608Sdes * Encode username and password 26537608Sdes */ 26637608Sdeschar * 26737608Sdes_http_auth(char *usr, char *pwd) 26837608Sdes{ 26937608Sdes int len, lu, lp; 27037608Sdes char *str, *s; 27137608Sdes 27237608Sdes lu = strlen(usr); 27337608Sdes lp = strlen(pwd); 27437608Sdes 27537608Sdes len = (lu * 4 + 2) / 3 /* user name, round up */ 27637608Sdes + 1 /* colon */ 27737608Sdes + (lp * 4 + 2) / 3 /* password, round up */ 27837608Sdes + 1; /* null */ 27937608Sdes 28037608Sdes if ((s = str = (char *)malloc(len)) == NULL) 28137608Sdes return NULL; 28237608Sdes 28337608Sdes s += _http_base64(s, usr, lu); 28437608Sdes *s++ = ':'; 28537608Sdes s += _http_base64(s, pwd, lp); 28637608Sdes *s = 0; 28737608Sdes 28837608Sdes return str; 28937608Sdes} 29037608Sdes 29137608Sdes/* 29240975Sdes * Retrieve a file by HTTP 29337608Sdes */ 29437535SdesFILE * 29540975SdesfetchGetHTTP(struct url *URL, char *flags) 29637535Sdes{ 29755544Sdes int sd = -1, e, i, enc = ENC_NONE, direct, verbose; 29837535Sdes struct cookie *c; 29941863Sdes char *ln, *p, *px, *q; 30037535Sdes FILE *f, *cf; 30137535Sdes size_t len; 30237535Sdes 30355544Sdes direct = (flags && strchr(flags, 'd')); 30455544Sdes verbose = (flags && strchr(flags, 'v')); 30541862Sdes 30637535Sdes /* allocate cookie */ 30760189Sdes if ((c = calloc(1, sizeof *c)) == NULL) 30837535Sdes return NULL; 30937535Sdes 31037535Sdes /* check port */ 31160189Sdes if (!URL->port) { 31260189Sdes struct servent *se; 31360189Sdes 31460189Sdes if ((se = getservbyname("http", "tcp")) != NULL) 31560189Sdes URL->port = ntohs(se->s_port); 31660189Sdes else 31760189Sdes URL->port = 80; 31860189Sdes } 31937535Sdes 32037535Sdes /* attempt to connect to proxy server */ 32155544Sdes if (!direct && (px = getenv("HTTP_PROXY")) != NULL) { 32241863Sdes char host[MAXHOSTNAMELEN]; 32360189Sdes int port = 0; 32437535Sdes 32537535Sdes /* measure length */ 32637535Sdes len = strcspn(px, ":"); 32737535Sdes 32855544Sdes /* get port (XXX atoi is a little too tolerant perhaps?) */ 32960189Sdes if (px[len] == ':') { 33060189Sdes if (strspn(px+len+1, "0123456789") != strlen(px+len+1) 33160189Sdes || strlen(px+len+1) > 5) { 33260189Sdes /* XXX we should emit some kind of warning */ 33360189Sdes } 33437535Sdes port = atoi(px+len+1); 33560189Sdes if (port < 1 || port > 65535) { 33660189Sdes /* XXX we should emit some kind of warning */ 33760189Sdes } 33860189Sdes } 33960189Sdes if (!port) { 34060189Sdes#if 0 34160189Sdes /* 34260189Sdes * commented out, since there is currently no service name 34360189Sdes * for HTTP proxies 34460189Sdes */ 34560189Sdes struct servent *se; 34660189Sdes 34760189Sdes if ((se = getservbyname("xxxx", "tcp")) != NULL) 34860189Sdes port = ntohs(se->s_port); 34960189Sdes else 35060189Sdes#endif 35160189Sdes port = 3128; 35260189Sdes } 35337535Sdes 35437535Sdes /* get host name */ 35537535Sdes if (len >= MAXHOSTNAMELEN) 35637535Sdes len = MAXHOSTNAMELEN - 1; 35737535Sdes strncpy(host, px, len); 35837535Sdes host[len] = 0; 35937535Sdes 36037535Sdes /* connect */ 36141923Sdes sd = _fetch_connect(host, port, verbose); 36237535Sdes } 36337535Sdes 36437535Sdes /* if no proxy is configured or could be contacted, try direct */ 36538394Sdes if (sd == -1) { 36641923Sdes if ((sd = _fetch_connect(URL->host, URL->port, verbose)) == -1) 36737535Sdes goto ouch; 36837535Sdes } 36937535Sdes 37037535Sdes /* reopen as stream */ 37137571Sdes if ((f = fdopen(sd, "r+")) == NULL) 37237535Sdes goto ouch; 37337535Sdes c->real_f = f; 37437535Sdes 37537535Sdes /* send request (proxies require absolute form, so use that) */ 37641862Sdes if (verbose) 37741862Sdes _fetch_info("requesting http://%s:%d%s", 37841862Sdes URL->host, URL->port, URL->doc); 37937608Sdes _http_cmd(f, "GET http://%s:%d%s HTTP/1.1" ENDL, 38037608Sdes URL->host, URL->port, URL->doc); 38137535Sdes 38237535Sdes /* start sending headers away */ 38337535Sdes if (URL->user[0] || URL->pwd[0]) { 38437608Sdes char *auth_str = _http_auth(URL->user, URL->pwd); 38537608Sdes if (!auth_str) 38637608Sdes goto fouch; 38737608Sdes _http_cmd(f, "Authorization: Basic %s" ENDL, auth_str); 38837608Sdes free(auth_str); 38937535Sdes } 39037608Sdes _http_cmd(f, "Host: %s:%d" ENDL, URL->host, URL->port); 39137608Sdes _http_cmd(f, "User-Agent: %s " _LIBFETCH_VER ENDL, __progname); 39237608Sdes _http_cmd(f, "Connection: close" ENDL ENDL); 39337535Sdes 39437535Sdes /* get response */ 39537535Sdes if ((ln = fgetln(f, &len)) == NULL) 39637535Sdes goto fouch; 39737535Sdes DEBUG(fprintf(stderr, "response: [\033[1m%*.*s\033[m]\n", 39837535Sdes (int)len-2, (int)len-2, ln)); 39937535Sdes 40037535Sdes /* we can't use strchr() and friends since ln isn't NUL-terminated */ 40137535Sdes p = ln; 40237535Sdes while ((p < ln + len) && !isspace(*p)) 40337535Sdes p++; 40437535Sdes while ((p < ln + len) && !isdigit(*p)) 40537535Sdes p++; 40637535Sdes if (!isdigit(*p)) 40737535Sdes goto fouch; 40841863Sdes e = atoi(p); 40941863Sdes DEBUG(fprintf(stderr, "code: [\033[1m%d\033[m]\n", e)); 41037535Sdes 41137535Sdes /* add code to handle redirects later */ 41241863Sdes if (e != 200) { 41341863Sdes _http_seterr(e); 41437535Sdes goto fouch; 41537571Sdes } 41637535Sdes 41737535Sdes /* browse through header */ 41837535Sdes while (1) { 41937535Sdes if ((ln = fgetln(f, &len)) == NULL) 42037535Sdes goto fouch; 42137535Sdes if ((ln[0] == '\r') || (ln[0] == '\n')) 42237535Sdes break; 42360189Sdes DEBUG(fprintf(stderr, "header: [\033[1m%*.*s\033[m]\n", 42437535Sdes (int)len-2, (int)len-2, ln)); 42537535Sdes#define XFERENC "Transfer-Encoding:" 42660189Sdes if (strncasecmp(ln, XFERENC, sizeof XFERENC - 1) == 0) { 42760189Sdes p = ln + sizeof XFERENC - 1; 42837535Sdes while ((p < ln + len) && isspace(*p)) 42937535Sdes p++; 43037535Sdes for (q = p; (q < ln + len) && !isspace(*q); q++) 43137535Sdes /* VOID */ ; 43237535Sdes *q = 0; 43337535Sdes if (strcasecmp(p, "chunked") == 0) 43437535Sdes enc = ENC_CHUNKED; 43537535Sdes DEBUG(fprintf(stderr, "xferenc: [\033[1m%s\033[m]\n", p)); 43637535Sdes#undef XFERENC 43737535Sdes#define CONTTYPE "Content-Type:" 43860189Sdes } else if (strncasecmp(ln, CONTTYPE, sizeof CONTTYPE - 1) == 0) { 43960189Sdes p = ln + sizeof CONTTYPE - 1; 44037535Sdes while ((p < ln + len) && isspace(*p)) 44137535Sdes p++; 44237535Sdes for (i = 0; p < ln + len; p++) 44337535Sdes if (i < HTTPCTYPELEN) 44437535Sdes c->content_type[i++] = *p; 44537535Sdes do c->content_type[i--] = 0; while (isspace(c->content_type[i])); 44637535Sdes DEBUG(fprintf(stderr, "conttype: [\033[1m%s\033[m]\n", 44737535Sdes c->content_type)); 44837535Sdes#undef CONTTYPE 44937535Sdes } 45037535Sdes } 45137535Sdes 45237535Sdes /* only body remains */ 45337535Sdes c->encoding = enc; 45437535Sdes cf = funopen(c, 45537535Sdes (int (*)(void *, char *, int))_http_readfn, 45637535Sdes (int (*)(void *, const char *, int))_http_writefn, 45737535Sdes (fpos_t (*)(void *, fpos_t, int))NULL, 45837535Sdes (int (*)(void *))_http_closefn); 45937535Sdes if (cf == NULL) 46037535Sdes goto fouch; 46160189Sdes 46237535Sdes return cf; 46337535Sdes 46437535Sdesouch: 46537571Sdes if (sd >= 0) 46637571Sdes close(sd); 46737535Sdes free(c); 46841862Sdes _http_seterr(999); /* XXX do this properly RSN */ 46937535Sdes return NULL; 47037535Sdesfouch: 47137535Sdes fclose(f); 47237535Sdes free(c); 47341862Sdes _http_seterr(999); /* XXX do this properly RSN */ 47437535Sdes return NULL; 47537535Sdes} 47637535Sdes 47737535SdesFILE * 47840975SdesfetchPutHTTP(struct url *URL, char *flags) 47937535Sdes{ 48037535Sdes warnx("fetchPutHTTP(): not implemented"); 48137535Sdes return NULL; 48237535Sdes} 48340975Sdes 48440975Sdes/* 48540975Sdes * Get an HTTP document's metadata 48640975Sdes */ 48740975Sdesint 48840975SdesfetchStatHTTP(struct url *url, struct url_stat *us, char *flags) 48940975Sdes{ 49040975Sdes warnx("fetchStatHTTP(): not implemented"); 49140975Sdes return -1; 49240975Sdes} 49341989Sdes 49441989Sdes/* 49541989Sdes * List a directory 49641989Sdes */ 49741989Sdesstruct url_ent * 49841989SdesfetchListHTTP(struct url *url, char *flags) 49941989Sdes{ 50041989Sdes warnx("fetchListHTTP(): not implemented"); 50141989Sdes return NULL; 50241989Sdes} 503