http.c revision 41862
137535Sdes/*- 237535Sdes * Copyright (c) 1998 Dag-Erling Co�dan Sm�rgrav 337535Sdes * All rights reserved. 437535Sdes * 537535Sdes * Redistribution and use in source and binary forms, with or without 637535Sdes * modification, are permitted provided that the following conditions 737535Sdes * are met: 837535Sdes * 1. Redistributions of source code must retain the above copyright 937535Sdes * notice, this list of conditions and the following disclaimer 1037535Sdes * in this position and unchanged. 1137535Sdes * 2. Redistributions in binary form must reproduce the above copyright 1237535Sdes * notice, this list of conditions and the following disclaimer in the 1337535Sdes * documentation and/or other materials provided with the distribution. 1437535Sdes * 3. The name of the author may not be used to endorse or promote products 1537535Sdes * derived from this software without specific prior written permission 1637535Sdes * 1737535Sdes * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 1837535Sdes * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 1937535Sdes * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 2037535Sdes * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 2137535Sdes * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 2237535Sdes * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 2337535Sdes * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 2437535Sdes * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 2537535Sdes * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 2637535Sdes * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 2737535Sdes * 2841862Sdes * $Id: http.c,v 1.7 1998/11/06 22:14:08 des Exp $ 2937535Sdes */ 3037535Sdes 3137608Sdes/* 3237608Sdes * The base64 code in this file is based on code from MIT fetch, which 3337608Sdes * has the following copyright and license: 3437608Sdes * 3537608Sdes *- 3637608Sdes * Copyright 1997 Massachusetts Institute of Technology 3737608Sdes * 3837608Sdes * Permission to use, copy, modify, and distribute this software and 3937608Sdes * its documentation for any purpose and without fee is hereby 4037608Sdes * granted, provided that both the above copyright notice and this 4137608Sdes * permission notice appear in all copies, that both the above 4237608Sdes * copyright notice and this permission notice appear in all 4337608Sdes * supporting documentation, and that the name of M.I.T. not be used 4437608Sdes * in advertising or publicity pertaining to distribution of the 4537608Sdes * software without specific, written prior permission. M.I.T. makes 4637608Sdes * no representations about the suitability of this software for any 4737608Sdes * purpose. It is provided "as is" without express or implied 4837608Sdes * warranty. 4937608Sdes * 5037608Sdes * THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''. M.I.T. DISCLAIMS 5137608Sdes * ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE, 5237608Sdes * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 5337608Sdes * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT 5437608Sdes * SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 5537608Sdes * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 5637608Sdes * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF 5737608Sdes * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 5837608Sdes * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 5937608Sdes * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 6037608Sdes * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 6137608Sdes * SUCH DAMAGE. */ 6237608Sdes 6337535Sdes#include <sys/param.h> 6437535Sdes#include <sys/errno.h> 6537535Sdes#include <sys/socket.h> 6637535Sdes#include <sys/types.h> 6737535Sdes 6837535Sdes#include <netinet/in.h> 6937535Sdes 7037535Sdes#include <err.h> 7137535Sdes#include <ctype.h> 7237535Sdes#include <netdb.h> 7337608Sdes#include <stdarg.h> 7437535Sdes#include <stdio.h> 7537535Sdes#include <stdlib.h> 7637535Sdes#include <string.h> 7737535Sdes#include <unistd.h> 7837535Sdes 7937535Sdes#include "fetch.h" 8040939Sdes#include "common.h" 8141862Sdes#include "httperr.h" 8237535Sdes 8337535Sdes#ifndef NDEBUG 8437535Sdes#define DEBUG(x) do x; while (0) 8537535Sdes#else 8637535Sdes#define DEBUG(x) do { } while (0) 8737535Sdes#endif 8837535Sdes 8937535Sdesextern char *__progname; 9037535Sdes 9137535Sdes#define ENDL "\r\n" 9237535Sdes 9337535Sdesstruct cookie 9437535Sdes{ 9537535Sdes FILE *real_f; 9637535Sdes#define ENC_NONE 0 9737535Sdes#define ENC_CHUNKED 1 9837535Sdes int encoding; /* 1 = chunked, 0 = none */ 9937535Sdes#define HTTPCTYPELEN 59 10037535Sdes char content_type[HTTPCTYPELEN+1]; 10137535Sdes char *buf; 10237535Sdes int b_cur, eof; 10337535Sdes unsigned b_len, chunksize; 10437535Sdes}; 10537535Sdes 10637608Sdes/* 10737608Sdes * Send a formatted line; optionally echo to terminal 10837608Sdes */ 10937608Sdesstatic int 11037608Sdes_http_cmd(FILE *f, char *fmt, ...) 11137608Sdes{ 11237608Sdes va_list ap; 11337608Sdes 11437608Sdes va_start(ap, fmt); 11537608Sdes vfprintf(f, fmt, ap); 11637608Sdes#ifndef NDEBUG 11737608Sdes fprintf(stderr, "\033[1m>>> "); 11837608Sdes vfprintf(stderr, fmt, ap); 11937608Sdes fprintf(stderr, "\033[m"); 12037608Sdes#endif 12137608Sdes va_end(ap); 12237608Sdes 12337608Sdes return 0; /* XXX */ 12437608Sdes} 12537608Sdes 12637608Sdes/* 12737608Sdes * Fill the input buffer, do chunk decoding on the fly 12837608Sdes */ 12937535Sdesstatic char * 13037535Sdes_http_fillbuf(struct cookie *c) 13137535Sdes{ 13237535Sdes char *ln; 13337535Sdes unsigned int len; 13437535Sdes 13537535Sdes if (c->eof) 13637535Sdes return NULL; 13737535Sdes 13837535Sdes if (c->encoding == ENC_NONE) { 13937535Sdes c->buf = fgetln(c->real_f, &(c->b_len)); 14037535Sdes c->b_cur = 0; 14137535Sdes } else if (c->encoding == ENC_CHUNKED) { 14237535Sdes if (c->chunksize == 0) { 14337535Sdes ln = fgetln(c->real_f, &len); 14437535Sdes DEBUG(fprintf(stderr, "\033[1m_http_fillbuf(): new chunk: " 14537535Sdes "%*.*s\033[m\n", (int)len-2, (int)len-2, ln)); 14637535Sdes sscanf(ln, "%x", &(c->chunksize)); 14737535Sdes if (!c->chunksize) { 14837535Sdes DEBUG(fprintf(stderr, "\033[1m_http_fillbuf(): " 14937535Sdes "end of last chunk\033[m\n")); 15037535Sdes c->eof = 1; 15137535Sdes return NULL; 15237535Sdes } 15337535Sdes DEBUG(fprintf(stderr, "\033[1m_http_fillbuf(): " 15437535Sdes "new chunk: %X\033[m\n", c->chunksize)); 15537535Sdes } 15637535Sdes c->buf = fgetln(c->real_f, &(c->b_len)); 15737535Sdes if (c->b_len > c->chunksize) 15837535Sdes c->b_len = c->chunksize; 15937535Sdes c->chunksize -= c->b_len; 16037535Sdes c->b_cur = 0; 16137535Sdes } 16237535Sdes else return NULL; /* unknown encoding */ 16337535Sdes return c->buf; 16437535Sdes} 16537535Sdes 16637608Sdes/* 16737608Sdes * Read function 16837608Sdes */ 16937535Sdesstatic int 17037535Sdes_http_readfn(struct cookie *c, char *buf, int len) 17137535Sdes{ 17237535Sdes int l, pos = 0; 17337535Sdes while (len) { 17437535Sdes /* empty buffer */ 17537535Sdes if (!c->buf || (c->b_cur == c->b_len)) 17637535Sdes if (!_http_fillbuf(c)) 17737535Sdes break; 17837535Sdes 17937535Sdes l = c->b_len - c->b_cur; 18037535Sdes if (len < l) l = len; 18137535Sdes memcpy(buf + pos, c->buf + c->b_cur, l); 18237535Sdes c->b_cur += l; 18337535Sdes pos += l; 18437535Sdes len -= l; 18537535Sdes } 18637535Sdes 18737535Sdes if (ferror(c->real_f)) 18837535Sdes return -1; 18937535Sdes else return pos; 19037535Sdes} 19137535Sdes 19237608Sdes/* 19337608Sdes * Write function 19437608Sdes */ 19537535Sdesstatic int 19637535Sdes_http_writefn(struct cookie *c, const char *buf, int len) 19737535Sdes{ 19837535Sdes size_t r = fwrite(buf, 1, (size_t)len, c->real_f); 19937535Sdes return r ? r : -1; 20037535Sdes} 20137535Sdes 20237608Sdes/* 20337608Sdes * Close function 20437608Sdes */ 20537535Sdesstatic int 20637535Sdes_http_closefn(struct cookie *c) 20737535Sdes{ 20837535Sdes int r = fclose(c->real_f); 20937535Sdes free(c); 21037535Sdes return (r == EOF) ? -1 : 0; 21137535Sdes} 21237535Sdes 21337608Sdes/* 21437608Sdes * Extract content type from cookie 21537608Sdes */ 21637535Sdeschar * 21737535SdesfetchContentType(FILE *f) 21837535Sdes{ 21937535Sdes /* 22037535Sdes * We have no way of making sure this really *is* one of our cookies, 22137535Sdes * so just check for a null pointer and hope for the best. 22237535Sdes */ 22337535Sdes return f->_cookie ? (((struct cookie *)f->_cookie)->content_type) : NULL; 22437535Sdes} 22537535Sdes 22637608Sdes/* 22737608Sdes * Base64 encoding 22837608Sdes */ 22937608Sdesint 23037608Sdes_http_base64(char *dst, char *src, int l) 23137608Sdes{ 23237608Sdes static const char base64[] = 23337608Sdes "ABCDEFGHIJKLMNOPQRSTUVWXYZ" 23437608Sdes "abcdefghijklmnopqrstuvwxyz" 23537608Sdes "0123456789+/"; 23637608Sdes int t, r = 0; 23737608Sdes 23837608Sdes while (l >= 3) { 23937608Sdes t = (src[0] << 16) | (src[1] << 8) | src[2]; 24037608Sdes dst[0] = base64[(t >> 18) & 0x3f]; 24137608Sdes dst[1] = base64[(t >> 12) & 0x3f]; 24237608Sdes dst[2] = base64[(t >> 6) & 0x3f]; 24337608Sdes dst[3] = base64[(t >> 0) & 0x3f]; 24437608Sdes src += 3; l -= 3; 24537608Sdes dst += 4; r += 4; 24637608Sdes } 24737608Sdes 24837608Sdes switch (l) { 24937608Sdes case 2: 25037608Sdes t = (src[0] << 16) | (src[1] << 8); 25137608Sdes dst[0] = base64[(t >> 18) & 0x3f]; 25237608Sdes dst[1] = base64[(t >> 12) & 0x3f]; 25337608Sdes dst[2] = base64[(t >> 6) & 0x3f]; 25437608Sdes dst[3] = '='; 25537608Sdes dst += 4; 25637608Sdes r += 4; 25737608Sdes break; 25837608Sdes case 1: 25937608Sdes t = src[0] << 16; 26037608Sdes dst[0] = base64[(t >> 18) & 0x3f]; 26137608Sdes dst[1] = base64[(t >> 12) & 0x3f]; 26237608Sdes dst[2] = dst[3] = '='; 26337608Sdes dst += 4; 26437608Sdes r += 4; 26537608Sdes break; 26637608Sdes case 0: 26737608Sdes break; 26837608Sdes } 26937608Sdes 27037608Sdes *dst = 0; 27137608Sdes return r; 27237608Sdes} 27337608Sdes 27437608Sdes/* 27537608Sdes * Encode username and password 27637608Sdes */ 27737608Sdeschar * 27837608Sdes_http_auth(char *usr, char *pwd) 27937608Sdes{ 28037608Sdes int len, lu, lp; 28137608Sdes char *str, *s; 28237608Sdes 28337608Sdes lu = strlen(usr); 28437608Sdes lp = strlen(pwd); 28537608Sdes 28637608Sdes len = (lu * 4 + 2) / 3 /* user name, round up */ 28737608Sdes + 1 /* colon */ 28837608Sdes + (lp * 4 + 2) / 3 /* password, round up */ 28937608Sdes + 1; /* null */ 29037608Sdes 29137608Sdes if ((s = str = (char *)malloc(len)) == NULL) 29237608Sdes return NULL; 29337608Sdes 29437608Sdes s += _http_base64(s, usr, lu); 29537608Sdes *s++ = ':'; 29637608Sdes s += _http_base64(s, pwd, lp); 29737608Sdes *s = 0; 29837608Sdes 29937608Sdes return str; 30037608Sdes} 30137608Sdes 30237608Sdes/* 30340975Sdes * Retrieve a file by HTTP 30437608Sdes */ 30537535SdesFILE * 30640975SdesfetchGetHTTP(struct url *URL, char *flags) 30737535Sdes{ 30841862Sdes int sd = -1, err, i, enc = ENC_NONE, verbose; 30937535Sdes struct cookie *c; 31037535Sdes char *ln, *p, *q; 31137535Sdes FILE *f, *cf; 31237535Sdes size_t len; 31337535Sdes 31441862Sdes verbose = (strchr(flags, 'v') != NULL); 31541862Sdes 31637535Sdes /* allocate cookie */ 31737535Sdes if ((c = calloc(1, sizeof(struct cookie))) == NULL) 31837535Sdes return NULL; 31937535Sdes 32037535Sdes /* check port */ 32137535Sdes if (!URL->port) 32237535Sdes URL->port = 80; /* default HTTP port */ 32337535Sdes 32437535Sdes /* attempt to connect to proxy server */ 32537535Sdes if (getenv("HTTP_PROXY")) { 32637535Sdes char *px, host[MAXHOSTNAMELEN]; 32737535Sdes int port = 3128; /* XXX I think 3128 is default... check? */ 32837535Sdes size_t len; 32937535Sdes 33037535Sdes /* measure length */ 33137535Sdes px = getenv("HTTP_PROXY"); 33237535Sdes len = strcspn(px, ":"); 33337535Sdes 33437535Sdes /* get port (atoi is a little too tolerant perhaps?) */ 33537535Sdes if (px[len] == ':') 33637535Sdes port = atoi(px+len+1); 33737535Sdes 33837535Sdes /* get host name */ 33937535Sdes if (len >= MAXHOSTNAMELEN) 34037535Sdes len = MAXHOSTNAMELEN - 1; 34137535Sdes strncpy(host, px, len); 34237535Sdes host[len] = 0; 34337535Sdes 34437535Sdes /* connect */ 34541862Sdes sd = fetchConnect(host, port, verbose); 34637535Sdes } 34737535Sdes 34837535Sdes /* if no proxy is configured or could be contacted, try direct */ 34938394Sdes if (sd == -1) { 35041862Sdes if ((sd = fetchConnect(URL->host, URL->port, verbose)) == -1) 35137535Sdes goto ouch; 35237535Sdes } 35337535Sdes 35437535Sdes /* reopen as stream */ 35537571Sdes if ((f = fdopen(sd, "r+")) == NULL) 35637535Sdes goto ouch; 35737535Sdes c->real_f = f; 35837535Sdes 35937535Sdes /* send request (proxies require absolute form, so use that) */ 36041862Sdes if (verbose) 36141862Sdes _fetch_info("requesting http://%s:%d%s", 36241862Sdes URL->host, URL->port, URL->doc); 36337608Sdes _http_cmd(f, "GET http://%s:%d%s HTTP/1.1" ENDL, 36437608Sdes URL->host, URL->port, URL->doc); 36537535Sdes 36637535Sdes /* start sending headers away */ 36737535Sdes if (URL->user[0] || URL->pwd[0]) { 36837608Sdes char *auth_str = _http_auth(URL->user, URL->pwd); 36937608Sdes if (!auth_str) 37037608Sdes goto fouch; 37137608Sdes _http_cmd(f, "Authorization: Basic %s" ENDL, auth_str); 37237608Sdes free(auth_str); 37337535Sdes } 37437608Sdes _http_cmd(f, "Host: %s:%d" ENDL, URL->host, URL->port); 37537608Sdes _http_cmd(f, "User-Agent: %s " _LIBFETCH_VER ENDL, __progname); 37637608Sdes _http_cmd(f, "Connection: close" ENDL ENDL); 37737535Sdes 37837535Sdes /* get response */ 37937535Sdes if ((ln = fgetln(f, &len)) == NULL) 38037535Sdes goto fouch; 38137535Sdes DEBUG(fprintf(stderr, "response: [\033[1m%*.*s\033[m]\n", 38237535Sdes (int)len-2, (int)len-2, ln)); 38337535Sdes 38437535Sdes /* we can't use strchr() and friends since ln isn't NUL-terminated */ 38537535Sdes p = ln; 38637535Sdes while ((p < ln + len) && !isspace(*p)) 38737535Sdes p++; 38837535Sdes while ((p < ln + len) && !isdigit(*p)) 38937535Sdes p++; 39037535Sdes if (!isdigit(*p)) 39137535Sdes goto fouch; 39237535Sdes err = atoi(p); 39337535Sdes DEBUG(fprintf(stderr, "code: [\033[1m%d\033[m]\n", err)); 39437535Sdes 39537535Sdes /* add code to handle redirects later */ 39637571Sdes if (err != 200) { 39740975Sdes _http_seterr(err); 39837535Sdes goto fouch; 39937571Sdes } 40037535Sdes 40137535Sdes /* browse through header */ 40237535Sdes while (1) { 40337535Sdes if ((ln = fgetln(f, &len)) == NULL) 40437535Sdes goto fouch; 40537535Sdes if ((ln[0] == '\r') || (ln[0] == '\n')) 40637535Sdes break; 40737535Sdes DEBUG(fprintf(stderr, "header: [\033[1m%*.*s\033[m]\n", 40837535Sdes (int)len-2, (int)len-2, ln)); 40937535Sdes#define XFERENC "Transfer-Encoding:" 41037535Sdes if (strncasecmp(ln, XFERENC, sizeof(XFERENC)-1) == 0) { 41137535Sdes p = ln + sizeof(XFERENC) - 1; 41237535Sdes while ((p < ln + len) && isspace(*p)) 41337535Sdes p++; 41437535Sdes for (q = p; (q < ln + len) && !isspace(*q); q++) 41537535Sdes /* VOID */ ; 41637535Sdes *q = 0; 41737535Sdes if (strcasecmp(p, "chunked") == 0) 41837535Sdes enc = ENC_CHUNKED; 41937535Sdes DEBUG(fprintf(stderr, "xferenc: [\033[1m%s\033[m]\n", p)); 42037535Sdes#undef XFERENC 42137535Sdes#define CONTTYPE "Content-Type:" 42237535Sdes } else if (strncasecmp(ln, CONTTYPE, sizeof(CONTTYPE)-1) == 0) { 42337535Sdes p = ln + sizeof(CONTTYPE) - 1; 42437535Sdes while ((p < ln + len) && isspace(*p)) 42537535Sdes p++; 42637535Sdes for (i = 0; p < ln + len; p++) 42737535Sdes if (i < HTTPCTYPELEN) 42837535Sdes c->content_type[i++] = *p; 42937535Sdes do c->content_type[i--] = 0; while (isspace(c->content_type[i])); 43037535Sdes DEBUG(fprintf(stderr, "conttype: [\033[1m%s\033[m]\n", 43137535Sdes c->content_type)); 43237535Sdes#undef CONTTYPE 43337535Sdes } 43437535Sdes } 43537535Sdes 43637535Sdes /* only body remains */ 43737535Sdes c->encoding = enc; 43837535Sdes cf = funopen(c, 43937535Sdes (int (*)(void *, char *, int))_http_readfn, 44037535Sdes (int (*)(void *, const char *, int))_http_writefn, 44137535Sdes (fpos_t (*)(void *, fpos_t, int))NULL, 44237535Sdes (int (*)(void *))_http_closefn); 44337535Sdes if (cf == NULL) 44437535Sdes goto fouch; 44537535Sdes return cf; 44637535Sdes 44737535Sdesouch: 44837571Sdes if (sd >= 0) 44937571Sdes close(sd); 45037535Sdes free(c); 45141862Sdes _http_seterr(999); /* XXX do this properly RSN */ 45237535Sdes return NULL; 45337535Sdesfouch: 45437535Sdes fclose(f); 45537535Sdes free(c); 45641862Sdes _http_seterr(999); /* XXX do this properly RSN */ 45737535Sdes return NULL; 45837535Sdes} 45937535Sdes 46037535SdesFILE * 46140975SdesfetchPutHTTP(struct url *URL, char *flags) 46237535Sdes{ 46337535Sdes warnx("fetchPutHTTP(): not implemented"); 46437535Sdes return NULL; 46537535Sdes} 46640975Sdes 46740975Sdes/* 46840975Sdes * Get an HTTP document's metadata 46940975Sdes */ 47040975Sdesint 47140975SdesfetchStatHTTP(struct url *url, struct url_stat *us, char *flags) 47240975Sdes{ 47340975Sdes warnx("fetchStatHTTP(): not implemented"); 47440975Sdes return -1; 47540975Sdes} 476