http.c revision 41989
137535Sdes/*- 237535Sdes * Copyright (c) 1998 Dag-Erling Co�dan Sm�rgrav 337535Sdes * All rights reserved. 437535Sdes * 537535Sdes * Redistribution and use in source and binary forms, with or without 637535Sdes * modification, are permitted provided that the following conditions 737535Sdes * are met: 837535Sdes * 1. Redistributions of source code must retain the above copyright 937535Sdes * notice, this list of conditions and the following disclaimer 1037535Sdes * in this position and unchanged. 1137535Sdes * 2. Redistributions in binary form must reproduce the above copyright 1237535Sdes * notice, this list of conditions and the following disclaimer in the 1337535Sdes * documentation and/or other materials provided with the distribution. 1437535Sdes * 3. The name of the author may not be used to endorse or promote products 1537535Sdes * derived from this software without specific prior written permission 1637535Sdes * 1737535Sdes * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 1837535Sdes * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 1937535Sdes * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 2037535Sdes * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 2137535Sdes * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 2237535Sdes * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 2337535Sdes * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 2437535Sdes * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 2537535Sdes * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 2637535Sdes * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 2737535Sdes * 2841989Sdes * $Id: http.c,v 1.10 1998/12/18 14:32:48 des Exp $ 2937535Sdes */ 3037535Sdes 3137608Sdes/* 3237608Sdes * The base64 code in this file is based on code from MIT fetch, which 3337608Sdes * has the following copyright and license: 3437608Sdes * 3537608Sdes *- 3637608Sdes * Copyright 1997 Massachusetts Institute of Technology 3737608Sdes * 3837608Sdes * Permission to use, copy, modify, and distribute this software and 3937608Sdes * its documentation for any purpose and without fee is hereby 4037608Sdes * granted, provided that both the above copyright notice and this 4137608Sdes * permission notice appear in all copies, that both the above 4237608Sdes * copyright notice and this permission notice appear in all 4337608Sdes * supporting documentation, and that the name of M.I.T. not be used 4437608Sdes * in advertising or publicity pertaining to distribution of the 4537608Sdes * software without specific, written prior permission. M.I.T. makes 4637608Sdes * no representations about the suitability of this software for any 4737608Sdes * purpose. It is provided "as is" without express or implied 4837608Sdes * warranty. 4937608Sdes * 5037608Sdes * THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''. M.I.T. DISCLAIMS 5137608Sdes * ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE, 5237608Sdes * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 5337608Sdes * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT 5437608Sdes * SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 5537608Sdes * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 5637608Sdes * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF 5737608Sdes * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 5837608Sdes * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 5937608Sdes * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 6037608Sdes * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 6137608Sdes * SUCH DAMAGE. */ 6237608Sdes 6337535Sdes#include <sys/param.h> 6437535Sdes 6537535Sdes#include <err.h> 6637535Sdes#include <ctype.h> 6737608Sdes#include <stdarg.h> 6837535Sdes#include <stdio.h> 6937535Sdes#include <stdlib.h> 7037535Sdes#include <string.h> 7137535Sdes#include <unistd.h> 7237535Sdes 7337535Sdes#include "fetch.h" 7440939Sdes#include "common.h" 7541862Sdes#include "httperr.h" 7637535Sdes 7737535Sdesextern char *__progname; 7837535Sdes 7937535Sdes#define ENDL "\r\n" 8037535Sdes 8137535Sdesstruct cookie 8237535Sdes{ 8337535Sdes FILE *real_f; 8437535Sdes#define ENC_NONE 0 8537535Sdes#define ENC_CHUNKED 1 8637535Sdes int encoding; /* 1 = chunked, 0 = none */ 8737535Sdes#define HTTPCTYPELEN 59 8837535Sdes char content_type[HTTPCTYPELEN+1]; 8937535Sdes char *buf; 9037535Sdes int b_cur, eof; 9137535Sdes unsigned b_len, chunksize; 9237535Sdes}; 9337535Sdes 9437608Sdes/* 9537608Sdes * Send a formatted line; optionally echo to terminal 9637608Sdes */ 9737608Sdesstatic int 9837608Sdes_http_cmd(FILE *f, char *fmt, ...) 9937608Sdes{ 10037608Sdes va_list ap; 10137608Sdes 10237608Sdes va_start(ap, fmt); 10337608Sdes vfprintf(f, fmt, ap); 10437608Sdes#ifndef NDEBUG 10537608Sdes fprintf(stderr, "\033[1m>>> "); 10637608Sdes vfprintf(stderr, fmt, ap); 10737608Sdes fprintf(stderr, "\033[m"); 10837608Sdes#endif 10937608Sdes va_end(ap); 11037608Sdes 11137608Sdes return 0; /* XXX */ 11237608Sdes} 11337608Sdes 11437608Sdes/* 11537608Sdes * Fill the input buffer, do chunk decoding on the fly 11637608Sdes */ 11737535Sdesstatic char * 11837535Sdes_http_fillbuf(struct cookie *c) 11937535Sdes{ 12037535Sdes char *ln; 12137535Sdes unsigned int len; 12237535Sdes 12337535Sdes if (c->eof) 12437535Sdes return NULL; 12537535Sdes 12637535Sdes if (c->encoding == ENC_NONE) { 12737535Sdes c->buf = fgetln(c->real_f, &(c->b_len)); 12837535Sdes c->b_cur = 0; 12937535Sdes } else if (c->encoding == ENC_CHUNKED) { 13037535Sdes if (c->chunksize == 0) { 13137535Sdes ln = fgetln(c->real_f, &len); 13237535Sdes DEBUG(fprintf(stderr, "\033[1m_http_fillbuf(): new chunk: " 13337535Sdes "%*.*s\033[m\n", (int)len-2, (int)len-2, ln)); 13437535Sdes sscanf(ln, "%x", &(c->chunksize)); 13537535Sdes if (!c->chunksize) { 13637535Sdes DEBUG(fprintf(stderr, "\033[1m_http_fillbuf(): " 13737535Sdes "end of last chunk\033[m\n")); 13837535Sdes c->eof = 1; 13937535Sdes return NULL; 14037535Sdes } 14137535Sdes DEBUG(fprintf(stderr, "\033[1m_http_fillbuf(): " 14237535Sdes "new chunk: %X\033[m\n", c->chunksize)); 14337535Sdes } 14437535Sdes c->buf = fgetln(c->real_f, &(c->b_len)); 14537535Sdes if (c->b_len > c->chunksize) 14637535Sdes c->b_len = c->chunksize; 14737535Sdes c->chunksize -= c->b_len; 14837535Sdes c->b_cur = 0; 14937535Sdes } 15037535Sdes else return NULL; /* unknown encoding */ 15137535Sdes return c->buf; 15237535Sdes} 15337535Sdes 15437608Sdes/* 15537608Sdes * Read function 15637608Sdes */ 15737535Sdesstatic int 15837535Sdes_http_readfn(struct cookie *c, char *buf, int len) 15937535Sdes{ 16037535Sdes int l, pos = 0; 16137535Sdes while (len) { 16237535Sdes /* empty buffer */ 16337535Sdes if (!c->buf || (c->b_cur == c->b_len)) 16437535Sdes if (!_http_fillbuf(c)) 16537535Sdes break; 16637535Sdes 16737535Sdes l = c->b_len - c->b_cur; 16837535Sdes if (len < l) l = len; 16937535Sdes memcpy(buf + pos, c->buf + c->b_cur, l); 17037535Sdes c->b_cur += l; 17137535Sdes pos += l; 17237535Sdes len -= l; 17337535Sdes } 17437535Sdes 17537535Sdes if (ferror(c->real_f)) 17637535Sdes return -1; 17737535Sdes else return pos; 17837535Sdes} 17937535Sdes 18037608Sdes/* 18137608Sdes * Write function 18237608Sdes */ 18337535Sdesstatic int 18437535Sdes_http_writefn(struct cookie *c, const char *buf, int len) 18537535Sdes{ 18637535Sdes size_t r = fwrite(buf, 1, (size_t)len, c->real_f); 18737535Sdes return r ? r : -1; 18837535Sdes} 18937535Sdes 19037608Sdes/* 19137608Sdes * Close function 19237608Sdes */ 19337535Sdesstatic int 19437535Sdes_http_closefn(struct cookie *c) 19537535Sdes{ 19637535Sdes int r = fclose(c->real_f); 19737535Sdes free(c); 19837535Sdes return (r == EOF) ? -1 : 0; 19937535Sdes} 20037535Sdes 20137608Sdes/* 20237608Sdes * Extract content type from cookie 20337608Sdes */ 20437535Sdeschar * 20537535SdesfetchContentType(FILE *f) 20637535Sdes{ 20737535Sdes /* 20837535Sdes * We have no way of making sure this really *is* one of our cookies, 20937535Sdes * so just check for a null pointer and hope for the best. 21037535Sdes */ 21137535Sdes return f->_cookie ? (((struct cookie *)f->_cookie)->content_type) : NULL; 21237535Sdes} 21337535Sdes 21437608Sdes/* 21537608Sdes * Base64 encoding 21637608Sdes */ 21737608Sdesint 21837608Sdes_http_base64(char *dst, char *src, int l) 21937608Sdes{ 22037608Sdes static const char base64[] = 22137608Sdes "ABCDEFGHIJKLMNOPQRSTUVWXYZ" 22237608Sdes "abcdefghijklmnopqrstuvwxyz" 22337608Sdes "0123456789+/"; 22437608Sdes int t, r = 0; 22537608Sdes 22637608Sdes while (l >= 3) { 22737608Sdes t = (src[0] << 16) | (src[1] << 8) | src[2]; 22837608Sdes dst[0] = base64[(t >> 18) & 0x3f]; 22937608Sdes dst[1] = base64[(t >> 12) & 0x3f]; 23037608Sdes dst[2] = base64[(t >> 6) & 0x3f]; 23137608Sdes dst[3] = base64[(t >> 0) & 0x3f]; 23237608Sdes src += 3; l -= 3; 23337608Sdes dst += 4; r += 4; 23437608Sdes } 23537608Sdes 23637608Sdes switch (l) { 23737608Sdes case 2: 23837608Sdes t = (src[0] << 16) | (src[1] << 8); 23937608Sdes dst[0] = base64[(t >> 18) & 0x3f]; 24037608Sdes dst[1] = base64[(t >> 12) & 0x3f]; 24137608Sdes dst[2] = base64[(t >> 6) & 0x3f]; 24237608Sdes dst[3] = '='; 24337608Sdes dst += 4; 24437608Sdes r += 4; 24537608Sdes break; 24637608Sdes case 1: 24737608Sdes t = src[0] << 16; 24837608Sdes dst[0] = base64[(t >> 18) & 0x3f]; 24937608Sdes dst[1] = base64[(t >> 12) & 0x3f]; 25037608Sdes dst[2] = dst[3] = '='; 25137608Sdes dst += 4; 25237608Sdes r += 4; 25337608Sdes break; 25437608Sdes case 0: 25537608Sdes break; 25637608Sdes } 25737608Sdes 25837608Sdes *dst = 0; 25937608Sdes return r; 26037608Sdes} 26137608Sdes 26237608Sdes/* 26337608Sdes * Encode username and password 26437608Sdes */ 26537608Sdeschar * 26637608Sdes_http_auth(char *usr, char *pwd) 26737608Sdes{ 26837608Sdes int len, lu, lp; 26937608Sdes char *str, *s; 27037608Sdes 27137608Sdes lu = strlen(usr); 27237608Sdes lp = strlen(pwd); 27337608Sdes 27437608Sdes len = (lu * 4 + 2) / 3 /* user name, round up */ 27537608Sdes + 1 /* colon */ 27637608Sdes + (lp * 4 + 2) / 3 /* password, round up */ 27737608Sdes + 1; /* null */ 27837608Sdes 27937608Sdes if ((s = str = (char *)malloc(len)) == NULL) 28037608Sdes return NULL; 28137608Sdes 28237608Sdes s += _http_base64(s, usr, lu); 28337608Sdes *s++ = ':'; 28437608Sdes s += _http_base64(s, pwd, lp); 28537608Sdes *s = 0; 28637608Sdes 28737608Sdes return str; 28837608Sdes} 28937608Sdes 29037608Sdes/* 29140975Sdes * Retrieve a file by HTTP 29237608Sdes */ 29337535SdesFILE * 29440975SdesfetchGetHTTP(struct url *URL, char *flags) 29537535Sdes{ 29641863Sdes int sd = -1, e, i, enc = ENC_NONE, verbose; 29737535Sdes struct cookie *c; 29841863Sdes char *ln, *p, *px, *q; 29937535Sdes FILE *f, *cf; 30037535Sdes size_t len; 30137535Sdes 30241862Sdes verbose = (strchr(flags, 'v') != NULL); 30341862Sdes 30437535Sdes /* allocate cookie */ 30537535Sdes if ((c = calloc(1, sizeof(struct cookie))) == NULL) 30637535Sdes return NULL; 30737535Sdes 30837535Sdes /* check port */ 30937535Sdes if (!URL->port) 31037535Sdes URL->port = 80; /* default HTTP port */ 31137535Sdes 31237535Sdes /* attempt to connect to proxy server */ 31341863Sdes if ((px = getenv("HTTP_PROXY")) != NULL) { 31441863Sdes char host[MAXHOSTNAMELEN]; 31537535Sdes int port = 3128; /* XXX I think 3128 is default... check? */ 31637535Sdes 31737535Sdes /* measure length */ 31837535Sdes len = strcspn(px, ":"); 31937535Sdes 32037535Sdes /* get port (atoi is a little too tolerant perhaps?) */ 32137535Sdes if (px[len] == ':') 32237535Sdes port = atoi(px+len+1); 32337535Sdes 32437535Sdes /* get host name */ 32537535Sdes if (len >= MAXHOSTNAMELEN) 32637535Sdes len = MAXHOSTNAMELEN - 1; 32737535Sdes strncpy(host, px, len); 32837535Sdes host[len] = 0; 32937535Sdes 33037535Sdes /* connect */ 33141923Sdes sd = _fetch_connect(host, port, verbose); 33237535Sdes } 33337535Sdes 33437535Sdes /* if no proxy is configured or could be contacted, try direct */ 33538394Sdes if (sd == -1) { 33641923Sdes if ((sd = _fetch_connect(URL->host, URL->port, verbose)) == -1) 33737535Sdes goto ouch; 33837535Sdes } 33937535Sdes 34037535Sdes /* reopen as stream */ 34137571Sdes if ((f = fdopen(sd, "r+")) == NULL) 34237535Sdes goto ouch; 34337535Sdes c->real_f = f; 34437535Sdes 34537535Sdes /* send request (proxies require absolute form, so use that) */ 34641862Sdes if (verbose) 34741862Sdes _fetch_info("requesting http://%s:%d%s", 34841862Sdes URL->host, URL->port, URL->doc); 34937608Sdes _http_cmd(f, "GET http://%s:%d%s HTTP/1.1" ENDL, 35037608Sdes URL->host, URL->port, URL->doc); 35137535Sdes 35237535Sdes /* start sending headers away */ 35337535Sdes if (URL->user[0] || URL->pwd[0]) { 35437608Sdes char *auth_str = _http_auth(URL->user, URL->pwd); 35537608Sdes if (!auth_str) 35637608Sdes goto fouch; 35737608Sdes _http_cmd(f, "Authorization: Basic %s" ENDL, auth_str); 35837608Sdes free(auth_str); 35937535Sdes } 36037608Sdes _http_cmd(f, "Host: %s:%d" ENDL, URL->host, URL->port); 36137608Sdes _http_cmd(f, "User-Agent: %s " _LIBFETCH_VER ENDL, __progname); 36237608Sdes _http_cmd(f, "Connection: close" ENDL ENDL); 36337535Sdes 36437535Sdes /* get response */ 36537535Sdes if ((ln = fgetln(f, &len)) == NULL) 36637535Sdes goto fouch; 36737535Sdes DEBUG(fprintf(stderr, "response: [\033[1m%*.*s\033[m]\n", 36837535Sdes (int)len-2, (int)len-2, ln)); 36937535Sdes 37037535Sdes /* we can't use strchr() and friends since ln isn't NUL-terminated */ 37137535Sdes p = ln; 37237535Sdes while ((p < ln + len) && !isspace(*p)) 37337535Sdes p++; 37437535Sdes while ((p < ln + len) && !isdigit(*p)) 37537535Sdes p++; 37637535Sdes if (!isdigit(*p)) 37737535Sdes goto fouch; 37841863Sdes e = atoi(p); 37941863Sdes DEBUG(fprintf(stderr, "code: [\033[1m%d\033[m]\n", e)); 38037535Sdes 38137535Sdes /* add code to handle redirects later */ 38241863Sdes if (e != 200) { 38341863Sdes _http_seterr(e); 38437535Sdes goto fouch; 38537571Sdes } 38637535Sdes 38737535Sdes /* browse through header */ 38837535Sdes while (1) { 38937535Sdes if ((ln = fgetln(f, &len)) == NULL) 39037535Sdes goto fouch; 39137535Sdes if ((ln[0] == '\r') || (ln[0] == '\n')) 39237535Sdes break; 39337535Sdes DEBUG(fprintf(stderr, "header: [\033[1m%*.*s\033[m]\n", 39437535Sdes (int)len-2, (int)len-2, ln)); 39537535Sdes#define XFERENC "Transfer-Encoding:" 39637535Sdes if (strncasecmp(ln, XFERENC, sizeof(XFERENC)-1) == 0) { 39737535Sdes p = ln + sizeof(XFERENC) - 1; 39837535Sdes while ((p < ln + len) && isspace(*p)) 39937535Sdes p++; 40037535Sdes for (q = p; (q < ln + len) && !isspace(*q); q++) 40137535Sdes /* VOID */ ; 40237535Sdes *q = 0; 40337535Sdes if (strcasecmp(p, "chunked") == 0) 40437535Sdes enc = ENC_CHUNKED; 40537535Sdes DEBUG(fprintf(stderr, "xferenc: [\033[1m%s\033[m]\n", p)); 40637535Sdes#undef XFERENC 40737535Sdes#define CONTTYPE "Content-Type:" 40837535Sdes } else if (strncasecmp(ln, CONTTYPE, sizeof(CONTTYPE)-1) == 0) { 40937535Sdes p = ln + sizeof(CONTTYPE) - 1; 41037535Sdes while ((p < ln + len) && isspace(*p)) 41137535Sdes p++; 41237535Sdes for (i = 0; p < ln + len; p++) 41337535Sdes if (i < HTTPCTYPELEN) 41437535Sdes c->content_type[i++] = *p; 41537535Sdes do c->content_type[i--] = 0; while (isspace(c->content_type[i])); 41637535Sdes DEBUG(fprintf(stderr, "conttype: [\033[1m%s\033[m]\n", 41737535Sdes c->content_type)); 41837535Sdes#undef CONTTYPE 41937535Sdes } 42037535Sdes } 42137535Sdes 42237535Sdes /* only body remains */ 42337535Sdes c->encoding = enc; 42437535Sdes cf = funopen(c, 42537535Sdes (int (*)(void *, char *, int))_http_readfn, 42637535Sdes (int (*)(void *, const char *, int))_http_writefn, 42737535Sdes (fpos_t (*)(void *, fpos_t, int))NULL, 42837535Sdes (int (*)(void *))_http_closefn); 42937535Sdes if (cf == NULL) 43037535Sdes goto fouch; 43137535Sdes return cf; 43237535Sdes 43337535Sdesouch: 43437571Sdes if (sd >= 0) 43537571Sdes close(sd); 43637535Sdes free(c); 43741862Sdes _http_seterr(999); /* XXX do this properly RSN */ 43837535Sdes return NULL; 43937535Sdesfouch: 44037535Sdes fclose(f); 44137535Sdes free(c); 44241862Sdes _http_seterr(999); /* XXX do this properly RSN */ 44337535Sdes return NULL; 44437535Sdes} 44537535Sdes 44637535SdesFILE * 44740975SdesfetchPutHTTP(struct url *URL, char *flags) 44837535Sdes{ 44937535Sdes warnx("fetchPutHTTP(): not implemented"); 45037535Sdes return NULL; 45137535Sdes} 45240975Sdes 45340975Sdes/* 45440975Sdes * Get an HTTP document's metadata 45540975Sdes */ 45640975Sdesint 45740975SdesfetchStatHTTP(struct url *url, struct url_stat *us, char *flags) 45840975Sdes{ 45940975Sdes warnx("fetchStatHTTP(): not implemented"); 46040975Sdes return -1; 46140975Sdes} 46241989Sdes 46341989Sdes/* 46441989Sdes * List a directory 46541989Sdes */ 46641989Sdesstruct url_ent * 46741989SdesfetchListHTTP(struct url *url, char *flags) 46841989Sdes{ 46941989Sdes warnx("fetchListHTTP(): not implemented"); 47041989Sdes return NULL; 47141989Sdes} 472