http.c revision 40975
137535Sdes/*- 237535Sdes * Copyright (c) 1998 Dag-Erling Co�dan Sm�rgrav 337535Sdes * All rights reserved. 437535Sdes * 537535Sdes * Redistribution and use in source and binary forms, with or without 637535Sdes * modification, are permitted provided that the following conditions 737535Sdes * are met: 837535Sdes * 1. Redistributions of source code must retain the above copyright 937535Sdes * notice, this list of conditions and the following disclaimer 1037535Sdes * in this position and unchanged. 1137535Sdes * 2. Redistributions in binary form must reproduce the above copyright 1237535Sdes * notice, this list of conditions and the following disclaimer in the 1337535Sdes * documentation and/or other materials provided with the distribution. 1437535Sdes * 3. The name of the author may not be used to endorse or promote products 1537535Sdes * derived from this software without specific prior written permission 1637535Sdes * 1737535Sdes * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 1837535Sdes * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 1937535Sdes * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 2037535Sdes * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 2137535Sdes * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 2237535Sdes * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 2337535Sdes * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 2437535Sdes * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 2537535Sdes * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 2637535Sdes * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 2737535Sdes * 2840975Sdes * $Id: http.c,v 1.6 1998/11/05 19:48:17 des Exp $ 2937535Sdes */ 3037535Sdes 3137608Sdes/* 3237608Sdes * The base64 code in this file is based on code from MIT fetch, which 3337608Sdes * has the following copyright and license: 3437608Sdes * 3537608Sdes *- 3637608Sdes * Copyright 1997 Massachusetts Institute of Technology 3737608Sdes * 3837608Sdes * Permission to use, copy, modify, and distribute this software and 3937608Sdes * its documentation for any purpose and without fee is hereby 4037608Sdes * granted, provided that both the above copyright notice and this 4137608Sdes * permission notice appear in all copies, that both the above 4237608Sdes * copyright notice and this permission notice appear in all 4337608Sdes * supporting documentation, and that the name of M.I.T. not be used 4437608Sdes * in advertising or publicity pertaining to distribution of the 4537608Sdes * software without specific, written prior permission. M.I.T. makes 4637608Sdes * no representations about the suitability of this software for any 4737608Sdes * purpose. It is provided "as is" without express or implied 4837608Sdes * warranty. 4937608Sdes * 5037608Sdes * THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''. M.I.T. DISCLAIMS 5137608Sdes * ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE, 5237608Sdes * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 5337608Sdes * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT 5437608Sdes * SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 5537608Sdes * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 5637608Sdes * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF 5737608Sdes * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 5837608Sdes * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 5937608Sdes * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 6037608Sdes * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 6137608Sdes * SUCH DAMAGE. */ 6237608Sdes 6337535Sdes#include <sys/param.h> 6437535Sdes#include <sys/errno.h> 6537535Sdes#include <sys/socket.h> 6637535Sdes#include <sys/types.h> 6737535Sdes 6837535Sdes#include <netinet/in.h> 6937535Sdes 7037535Sdes#include <err.h> 7137535Sdes#include <ctype.h> 7237535Sdes#include <netdb.h> 7337608Sdes#include <stdarg.h> 7437535Sdes#include <stdio.h> 7537535Sdes#include <stdlib.h> 7637535Sdes#include <string.h> 7737535Sdes#include <unistd.h> 7837535Sdes 7937535Sdes#include "fetch.h" 8040939Sdes#include "common.h" 8140975Sdes#include "httperr.inc" 8237535Sdes 8337535Sdes#ifndef NDEBUG 8437535Sdes#define DEBUG(x) do x; while (0) 8537535Sdes#else 8637535Sdes#define DEBUG(x) do { } while (0) 8737535Sdes#endif 8837535Sdes 8937535Sdesextern char *__progname; 9037535Sdes 9137535Sdes#define ENDL "\r\n" 9237535Sdes 9337535Sdesstruct cookie 9437535Sdes{ 9537535Sdes FILE *real_f; 9637535Sdes#define ENC_NONE 0 9737535Sdes#define ENC_CHUNKED 1 9837535Sdes int encoding; /* 1 = chunked, 0 = none */ 9937535Sdes#define HTTPCTYPELEN 59 10037535Sdes char content_type[HTTPCTYPELEN+1]; 10137535Sdes char *buf; 10237535Sdes int b_cur, eof; 10337535Sdes unsigned b_len, chunksize; 10437535Sdes}; 10537535Sdes 10637608Sdes/* 10737608Sdes * Send a formatted line; optionally echo to terminal 10837608Sdes */ 10937608Sdesstatic int 11037608Sdes_http_cmd(FILE *f, char *fmt, ...) 11137608Sdes{ 11237608Sdes va_list ap; 11337608Sdes 11437608Sdes va_start(ap, fmt); 11537608Sdes vfprintf(f, fmt, ap); 11637608Sdes#ifndef NDEBUG 11737608Sdes fprintf(stderr, "\033[1m>>> "); 11837608Sdes vfprintf(stderr, fmt, ap); 11937608Sdes fprintf(stderr, "\033[m"); 12037608Sdes#endif 12137608Sdes va_end(ap); 12237608Sdes 12337608Sdes return 0; /* XXX */ 12437608Sdes} 12537608Sdes 12637608Sdes/* 12737608Sdes * Fill the input buffer, do chunk decoding on the fly 12837608Sdes */ 12937535Sdesstatic char * 13037535Sdes_http_fillbuf(struct cookie *c) 13137535Sdes{ 13237535Sdes char *ln; 13337535Sdes unsigned int len; 13437535Sdes 13537535Sdes if (c->eof) 13637535Sdes return NULL; 13737535Sdes 13837535Sdes if (c->encoding == ENC_NONE) { 13937535Sdes c->buf = fgetln(c->real_f, &(c->b_len)); 14037535Sdes c->b_cur = 0; 14137535Sdes } else if (c->encoding == ENC_CHUNKED) { 14237535Sdes if (c->chunksize == 0) { 14337535Sdes ln = fgetln(c->real_f, &len); 14437535Sdes DEBUG(fprintf(stderr, "\033[1m_http_fillbuf(): new chunk: " 14537535Sdes "%*.*s\033[m\n", (int)len-2, (int)len-2, ln)); 14637535Sdes sscanf(ln, "%x", &(c->chunksize)); 14737535Sdes if (!c->chunksize) { 14837535Sdes DEBUG(fprintf(stderr, "\033[1m_http_fillbuf(): " 14937535Sdes "end of last chunk\033[m\n")); 15037535Sdes c->eof = 1; 15137535Sdes return NULL; 15237535Sdes } 15337535Sdes DEBUG(fprintf(stderr, "\033[1m_http_fillbuf(): " 15437535Sdes "new chunk: %X\033[m\n", c->chunksize)); 15537535Sdes } 15637535Sdes c->buf = fgetln(c->real_f, &(c->b_len)); 15737535Sdes if (c->b_len > c->chunksize) 15837535Sdes c->b_len = c->chunksize; 15937535Sdes c->chunksize -= c->b_len; 16037535Sdes c->b_cur = 0; 16137535Sdes } 16237535Sdes else return NULL; /* unknown encoding */ 16337535Sdes return c->buf; 16437535Sdes} 16537535Sdes 16637608Sdes/* 16737608Sdes * Read function 16837608Sdes */ 16937535Sdesstatic int 17037535Sdes_http_readfn(struct cookie *c, char *buf, int len) 17137535Sdes{ 17237535Sdes int l, pos = 0; 17337535Sdes while (len) { 17437535Sdes /* empty buffer */ 17537535Sdes if (!c->buf || (c->b_cur == c->b_len)) 17637535Sdes if (!_http_fillbuf(c)) 17737535Sdes break; 17837535Sdes 17937535Sdes l = c->b_len - c->b_cur; 18037535Sdes if (len < l) l = len; 18137535Sdes memcpy(buf + pos, c->buf + c->b_cur, l); 18237535Sdes c->b_cur += l; 18337535Sdes pos += l; 18437535Sdes len -= l; 18537535Sdes } 18637535Sdes 18737535Sdes if (ferror(c->real_f)) 18837535Sdes return -1; 18937535Sdes else return pos; 19037535Sdes} 19137535Sdes 19237608Sdes/* 19337608Sdes * Write function 19437608Sdes */ 19537535Sdesstatic int 19637535Sdes_http_writefn(struct cookie *c, const char *buf, int len) 19737535Sdes{ 19837535Sdes size_t r = fwrite(buf, 1, (size_t)len, c->real_f); 19937535Sdes return r ? r : -1; 20037535Sdes} 20137535Sdes 20237608Sdes/* 20337608Sdes * Close function 20437608Sdes */ 20537535Sdesstatic int 20637535Sdes_http_closefn(struct cookie *c) 20737535Sdes{ 20837535Sdes int r = fclose(c->real_f); 20937535Sdes free(c); 21037535Sdes return (r == EOF) ? -1 : 0; 21137535Sdes} 21237535Sdes 21337608Sdes/* 21437608Sdes * Extract content type from cookie 21537608Sdes */ 21637535Sdeschar * 21737535SdesfetchContentType(FILE *f) 21837535Sdes{ 21937535Sdes /* 22037535Sdes * We have no way of making sure this really *is* one of our cookies, 22137535Sdes * so just check for a null pointer and hope for the best. 22237535Sdes */ 22337535Sdes return f->_cookie ? (((struct cookie *)f->_cookie)->content_type) : NULL; 22437535Sdes} 22537535Sdes 22637608Sdes/* 22737608Sdes * Base64 encoding 22837608Sdes */ 22937608Sdesint 23037608Sdes_http_base64(char *dst, char *src, int l) 23137608Sdes{ 23237608Sdes static const char base64[] = 23337608Sdes "ABCDEFGHIJKLMNOPQRSTUVWXYZ" 23437608Sdes "abcdefghijklmnopqrstuvwxyz" 23537608Sdes "0123456789+/"; 23637608Sdes int t, r = 0; 23737608Sdes 23837608Sdes while (l >= 3) { 23937608Sdes t = (src[0] << 16) | (src[1] << 8) | src[2]; 24037608Sdes dst[0] = base64[(t >> 18) & 0x3f]; 24137608Sdes dst[1] = base64[(t >> 12) & 0x3f]; 24237608Sdes dst[2] = base64[(t >> 6) & 0x3f]; 24337608Sdes dst[3] = base64[(t >> 0) & 0x3f]; 24437608Sdes src += 3; l -= 3; 24537608Sdes dst += 4; r += 4; 24637608Sdes } 24737608Sdes 24837608Sdes switch (l) { 24937608Sdes case 2: 25037608Sdes t = (src[0] << 16) | (src[1] << 8); 25137608Sdes dst[0] = base64[(t >> 18) & 0x3f]; 25237608Sdes dst[1] = base64[(t >> 12) & 0x3f]; 25337608Sdes dst[2] = base64[(t >> 6) & 0x3f]; 25437608Sdes dst[3] = '='; 25537608Sdes dst += 4; 25637608Sdes r += 4; 25737608Sdes break; 25837608Sdes case 1: 25937608Sdes t = src[0] << 16; 26037608Sdes dst[0] = base64[(t >> 18) & 0x3f]; 26137608Sdes dst[1] = base64[(t >> 12) & 0x3f]; 26237608Sdes dst[2] = dst[3] = '='; 26337608Sdes dst += 4; 26437608Sdes r += 4; 26537608Sdes break; 26637608Sdes case 0: 26737608Sdes break; 26837608Sdes } 26937608Sdes 27037608Sdes *dst = 0; 27137608Sdes return r; 27237608Sdes} 27337608Sdes 27437608Sdes/* 27537608Sdes * Encode username and password 27637608Sdes */ 27737608Sdeschar * 27837608Sdes_http_auth(char *usr, char *pwd) 27937608Sdes{ 28037608Sdes int len, lu, lp; 28137608Sdes char *str, *s; 28237608Sdes 28337608Sdes lu = strlen(usr); 28437608Sdes lp = strlen(pwd); 28537608Sdes 28637608Sdes len = (lu * 4 + 2) / 3 /* user name, round up */ 28737608Sdes + 1 /* colon */ 28837608Sdes + (lp * 4 + 2) / 3 /* password, round up */ 28937608Sdes + 1; /* null */ 29037608Sdes 29137608Sdes if ((s = str = (char *)malloc(len)) == NULL) 29237608Sdes return NULL; 29337608Sdes 29437608Sdes s += _http_base64(s, usr, lu); 29537608Sdes *s++ = ':'; 29637608Sdes s += _http_base64(s, pwd, lp); 29737608Sdes *s = 0; 29837608Sdes 29937608Sdes return str; 30037608Sdes} 30137608Sdes 30237608Sdes/* 30340975Sdes * Retrieve a file by HTTP 30437608Sdes */ 30537535SdesFILE * 30640975SdesfetchGetHTTP(struct url *URL, char *flags) 30737535Sdes{ 30837571Sdes int sd = -1, err, i, enc = ENC_NONE; 30937535Sdes struct cookie *c; 31037535Sdes char *ln, *p, *q; 31137535Sdes FILE *f, *cf; 31237535Sdes size_t len; 31337535Sdes 31437535Sdes /* allocate cookie */ 31537535Sdes if ((c = calloc(1, sizeof(struct cookie))) == NULL) 31637535Sdes return NULL; 31737535Sdes 31837535Sdes /* check port */ 31937535Sdes if (!URL->port) 32037535Sdes URL->port = 80; /* default HTTP port */ 32137535Sdes 32237535Sdes /* attempt to connect to proxy server */ 32337535Sdes if (getenv("HTTP_PROXY")) { 32437535Sdes char *px, host[MAXHOSTNAMELEN]; 32537535Sdes int port = 3128; /* XXX I think 3128 is default... check? */ 32637535Sdes size_t len; 32737535Sdes 32837535Sdes /* measure length */ 32937535Sdes px = getenv("HTTP_PROXY"); 33037535Sdes len = strcspn(px, ":"); 33137535Sdes 33237535Sdes /* get port (atoi is a little too tolerant perhaps?) */ 33337535Sdes if (px[len] == ':') 33437535Sdes port = atoi(px+len+1); 33537535Sdes 33637535Sdes /* get host name */ 33737535Sdes if (len >= MAXHOSTNAMELEN) 33837535Sdes len = MAXHOSTNAMELEN - 1; 33937535Sdes strncpy(host, px, len); 34037535Sdes host[len] = 0; 34137535Sdes 34237535Sdes /* connect */ 34337571Sdes sd = fetchConnect(host, port); 34437535Sdes } 34537535Sdes 34637535Sdes /* if no proxy is configured or could be contacted, try direct */ 34738394Sdes if (sd == -1) { 34838394Sdes if ((sd = fetchConnect(URL->host, URL->port)) == -1) 34937535Sdes goto ouch; 35037535Sdes } 35137535Sdes 35237535Sdes /* reopen as stream */ 35337571Sdes if ((f = fdopen(sd, "r+")) == NULL) 35437535Sdes goto ouch; 35537535Sdes c->real_f = f; 35637535Sdes 35737535Sdes /* send request (proxies require absolute form, so use that) */ 35837608Sdes _http_cmd(f, "GET http://%s:%d%s HTTP/1.1" ENDL, 35937608Sdes URL->host, URL->port, URL->doc); 36037535Sdes 36137535Sdes /* start sending headers away */ 36237535Sdes if (URL->user[0] || URL->pwd[0]) { 36337608Sdes char *auth_str = _http_auth(URL->user, URL->pwd); 36437608Sdes if (!auth_str) 36537608Sdes goto fouch; 36637608Sdes _http_cmd(f, "Authorization: Basic %s" ENDL, auth_str); 36737608Sdes free(auth_str); 36837535Sdes } 36937608Sdes _http_cmd(f, "Host: %s:%d" ENDL, URL->host, URL->port); 37037608Sdes _http_cmd(f, "User-Agent: %s " _LIBFETCH_VER ENDL, __progname); 37137608Sdes _http_cmd(f, "Connection: close" ENDL ENDL); 37237535Sdes 37337535Sdes /* get response */ 37437535Sdes if ((ln = fgetln(f, &len)) == NULL) 37537535Sdes goto fouch; 37637535Sdes DEBUG(fprintf(stderr, "response: [\033[1m%*.*s\033[m]\n", 37737535Sdes (int)len-2, (int)len-2, ln)); 37837535Sdes 37937535Sdes /* we can't use strchr() and friends since ln isn't NUL-terminated */ 38037535Sdes p = ln; 38137535Sdes while ((p < ln + len) && !isspace(*p)) 38237535Sdes p++; 38337535Sdes while ((p < ln + len) && !isdigit(*p)) 38437535Sdes p++; 38537535Sdes if (!isdigit(*p)) 38637535Sdes goto fouch; 38737535Sdes err = atoi(p); 38837535Sdes DEBUG(fprintf(stderr, "code: [\033[1m%d\033[m]\n", err)); 38937535Sdes 39037535Sdes /* add code to handle redirects later */ 39137571Sdes if (err != 200) { 39240975Sdes _http_seterr(err); 39337535Sdes goto fouch; 39437571Sdes } 39537535Sdes 39637535Sdes /* browse through header */ 39737535Sdes while (1) { 39837535Sdes if ((ln = fgetln(f, &len)) == NULL) 39937535Sdes goto fouch; 40037535Sdes if ((ln[0] == '\r') || (ln[0] == '\n')) 40137535Sdes break; 40237535Sdes DEBUG(fprintf(stderr, "header: [\033[1m%*.*s\033[m]\n", 40337535Sdes (int)len-2, (int)len-2, ln)); 40437535Sdes#define XFERENC "Transfer-Encoding:" 40537535Sdes if (strncasecmp(ln, XFERENC, sizeof(XFERENC)-1) == 0) { 40637535Sdes p = ln + sizeof(XFERENC) - 1; 40737535Sdes while ((p < ln + len) && isspace(*p)) 40837535Sdes p++; 40937535Sdes for (q = p; (q < ln + len) && !isspace(*q); q++) 41037535Sdes /* VOID */ ; 41137535Sdes *q = 0; 41237535Sdes if (strcasecmp(p, "chunked") == 0) 41337535Sdes enc = ENC_CHUNKED; 41437535Sdes DEBUG(fprintf(stderr, "xferenc: [\033[1m%s\033[m]\n", p)); 41537535Sdes#undef XFERENC 41637535Sdes#define CONTTYPE "Content-Type:" 41737535Sdes } else if (strncasecmp(ln, CONTTYPE, sizeof(CONTTYPE)-1) == 0) { 41837535Sdes p = ln + sizeof(CONTTYPE) - 1; 41937535Sdes while ((p < ln + len) && isspace(*p)) 42037535Sdes p++; 42137535Sdes for (i = 0; p < ln + len; p++) 42237535Sdes if (i < HTTPCTYPELEN) 42337535Sdes c->content_type[i++] = *p; 42437535Sdes do c->content_type[i--] = 0; while (isspace(c->content_type[i])); 42537535Sdes DEBUG(fprintf(stderr, "conttype: [\033[1m%s\033[m]\n", 42637535Sdes c->content_type)); 42737535Sdes#undef CONTTYPE 42837535Sdes } 42937535Sdes } 43037535Sdes 43137535Sdes /* only body remains */ 43237535Sdes c->encoding = enc; 43337535Sdes cf = funopen(c, 43437535Sdes (int (*)(void *, char *, int))_http_readfn, 43537535Sdes (int (*)(void *, const char *, int))_http_writefn, 43637535Sdes (fpos_t (*)(void *, fpos_t, int))NULL, 43737535Sdes (int (*)(void *))_http_closefn); 43837535Sdes if (cf == NULL) 43937535Sdes goto fouch; 44037535Sdes return cf; 44137535Sdes 44237535Sdesouch: 44337571Sdes if (sd >= 0) 44437571Sdes close(sd); 44537535Sdes free(c); 44637535Sdes return NULL; 44737535Sdesfouch: 44837535Sdes fclose(f); 44937535Sdes free(c); 45037535Sdes return NULL; 45137535Sdes} 45237535Sdes 45337535SdesFILE * 45440975SdesfetchPutHTTP(struct url *URL, char *flags) 45537535Sdes{ 45637535Sdes warnx("fetchPutHTTP(): not implemented"); 45737535Sdes return NULL; 45837535Sdes} 45940975Sdes 46040975Sdes/* 46140975Sdes * Get an HTTP document's metadata 46240975Sdes */ 46340975Sdesint 46440975SdesfetchStatHTTP(struct url *url, struct url_stat *us, char *flags) 46540975Sdes{ 46640975Sdes warnx("fetchStatHTTP(): not implemented"); 46740975Sdes return -1; 46840975Sdes} 469