http.c revision 41989
137535Sdes/*-
237535Sdes * Copyright (c) 1998 Dag-Erling Co�dan Sm�rgrav
337535Sdes * All rights reserved.
437535Sdes *
537535Sdes * Redistribution and use in source and binary forms, with or without
637535Sdes * modification, are permitted provided that the following conditions
737535Sdes * are met:
837535Sdes * 1. Redistributions of source code must retain the above copyright
937535Sdes *    notice, this list of conditions and the following disclaimer
1037535Sdes *    in this position and unchanged.
1137535Sdes * 2. Redistributions in binary form must reproduce the above copyright
1237535Sdes *    notice, this list of conditions and the following disclaimer in the
1337535Sdes *    documentation and/or other materials provided with the distribution.
1437535Sdes * 3. The name of the author may not be used to endorse or promote products
1537535Sdes *    derived from this software without specific prior written permission
1637535Sdes *
1737535Sdes * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
1837535Sdes * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
1937535Sdes * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
2037535Sdes * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
2137535Sdes * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
2237535Sdes * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
2337535Sdes * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
2437535Sdes * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
2537535Sdes * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
2637535Sdes * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
2737535Sdes *
2841989Sdes *	$Id: http.c,v 1.10 1998/12/18 14:32:48 des Exp $
2937535Sdes */
3037535Sdes
3137608Sdes/*
3237608Sdes * The base64 code in this file is based on code from MIT fetch, which
3337608Sdes * has the following copyright and license:
3437608Sdes *
3537608Sdes *-
3637608Sdes * Copyright 1997 Massachusetts Institute of Technology
3737608Sdes *
3837608Sdes * Permission to use, copy, modify, and distribute this software and
3937608Sdes * its documentation for any purpose and without fee is hereby
4037608Sdes * granted, provided that both the above copyright notice and this
4137608Sdes * permission notice appear in all copies, that both the above
4237608Sdes * copyright notice and this permission notice appear in all
4337608Sdes * supporting documentation, and that the name of M.I.T. not be used
4437608Sdes * in advertising or publicity pertaining to distribution of the
4537608Sdes * software without specific, written prior permission.  M.I.T. makes
4637608Sdes * no representations about the suitability of this software for any
4737608Sdes * purpose.  It is provided "as is" without express or implied
4837608Sdes * warranty.
4937608Sdes *
5037608Sdes * THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''.  M.I.T. DISCLAIMS
5137608Sdes * ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE,
5237608Sdes * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
5337608Sdes * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT
5437608Sdes * SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
5537608Sdes * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
5637608Sdes * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
5737608Sdes * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
5837608Sdes * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
5937608Sdes * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
6037608Sdes * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
6137608Sdes * SUCH DAMAGE. */
6237608Sdes
6337535Sdes#include <sys/param.h>
6437535Sdes
6537535Sdes#include <err.h>
6637535Sdes#include <ctype.h>
6737608Sdes#include <stdarg.h>
6837535Sdes#include <stdio.h>
6937535Sdes#include <stdlib.h>
7037535Sdes#include <string.h>
7137535Sdes#include <unistd.h>
7237535Sdes
7337535Sdes#include "fetch.h"
7440939Sdes#include "common.h"
7541862Sdes#include "httperr.h"
7637535Sdes
7737535Sdesextern char *__progname;
7837535Sdes
7937535Sdes#define ENDL "\r\n"
8037535Sdes
8137535Sdesstruct cookie
8237535Sdes{
8337535Sdes    FILE *real_f;
8437535Sdes#define ENC_NONE 0
8537535Sdes#define ENC_CHUNKED 1
8637535Sdes    int encoding;			/* 1 = chunked, 0 = none */
8737535Sdes#define HTTPCTYPELEN 59
8837535Sdes    char content_type[HTTPCTYPELEN+1];
8937535Sdes    char *buf;
9037535Sdes    int b_cur, eof;
9137535Sdes    unsigned b_len, chunksize;
9237535Sdes};
9337535Sdes
9437608Sdes/*
9537608Sdes * Send a formatted line; optionally echo to terminal
9637608Sdes */
9737608Sdesstatic int
9837608Sdes_http_cmd(FILE *f, char *fmt, ...)
9937608Sdes{
10037608Sdes    va_list ap;
10137608Sdes
10237608Sdes    va_start(ap, fmt);
10337608Sdes    vfprintf(f, fmt, ap);
10437608Sdes#ifndef NDEBUG
10537608Sdes    fprintf(stderr, "\033[1m>>> ");
10637608Sdes    vfprintf(stderr, fmt, ap);
10737608Sdes    fprintf(stderr, "\033[m");
10837608Sdes#endif
10937608Sdes    va_end(ap);
11037608Sdes
11137608Sdes    return 0; /* XXX */
11237608Sdes}
11337608Sdes
11437608Sdes/*
11537608Sdes * Fill the input buffer, do chunk decoding on the fly
11637608Sdes */
11737535Sdesstatic char *
11837535Sdes_http_fillbuf(struct cookie *c)
11937535Sdes{
12037535Sdes    char *ln;
12137535Sdes    unsigned int len;
12237535Sdes
12337535Sdes    if (c->eof)
12437535Sdes	return NULL;
12537535Sdes
12637535Sdes    if (c->encoding == ENC_NONE) {
12737535Sdes	c->buf = fgetln(c->real_f, &(c->b_len));
12837535Sdes	c->b_cur = 0;
12937535Sdes    } else if (c->encoding == ENC_CHUNKED) {
13037535Sdes	if (c->chunksize == 0) {
13137535Sdes	    ln = fgetln(c->real_f, &len);
13237535Sdes	    DEBUG(fprintf(stderr, "\033[1m_http_fillbuf(): new chunk: "
13337535Sdes			  "%*.*s\033[m\n", (int)len-2, (int)len-2, ln));
13437535Sdes	    sscanf(ln, "%x", &(c->chunksize));
13537535Sdes	    if (!c->chunksize) {
13637535Sdes		DEBUG(fprintf(stderr, "\033[1m_http_fillbuf(): "
13737535Sdes			      "end of last chunk\033[m\n"));
13837535Sdes		c->eof = 1;
13937535Sdes		return NULL;
14037535Sdes	    }
14137535Sdes	    DEBUG(fprintf(stderr, "\033[1m_http_fillbuf(): "
14237535Sdes			  "new chunk: %X\033[m\n", c->chunksize));
14337535Sdes	}
14437535Sdes	c->buf = fgetln(c->real_f, &(c->b_len));
14537535Sdes	if (c->b_len > c->chunksize)
14637535Sdes	    c->b_len = c->chunksize;
14737535Sdes	c->chunksize -= c->b_len;
14837535Sdes	c->b_cur = 0;
14937535Sdes    }
15037535Sdes    else return NULL; /* unknown encoding */
15137535Sdes    return c->buf;
15237535Sdes}
15337535Sdes
15437608Sdes/*
15537608Sdes * Read function
15637608Sdes */
15737535Sdesstatic int
15837535Sdes_http_readfn(struct cookie *c, char *buf, int len)
15937535Sdes{
16037535Sdes    int l, pos = 0;
16137535Sdes    while (len) {
16237535Sdes	/* empty buffer */
16337535Sdes	if (!c->buf || (c->b_cur == c->b_len))
16437535Sdes	    if (!_http_fillbuf(c))
16537535Sdes		break;
16637535Sdes
16737535Sdes	l = c->b_len - c->b_cur;
16837535Sdes	if (len < l) l = len;
16937535Sdes	memcpy(buf + pos, c->buf + c->b_cur, l);
17037535Sdes	c->b_cur += l;
17137535Sdes	pos += l;
17237535Sdes	len -= l;
17337535Sdes    }
17437535Sdes
17537535Sdes    if (ferror(c->real_f))
17637535Sdes	return -1;
17737535Sdes    else return pos;
17837535Sdes}
17937535Sdes
18037608Sdes/*
18137608Sdes * Write function
18237608Sdes */
18337535Sdesstatic int
18437535Sdes_http_writefn(struct cookie *c, const char *buf, int len)
18537535Sdes{
18637535Sdes    size_t r = fwrite(buf, 1, (size_t)len, c->real_f);
18737535Sdes    return r ? r : -1;
18837535Sdes}
18937535Sdes
19037608Sdes/*
19137608Sdes * Close function
19237608Sdes */
19337535Sdesstatic int
19437535Sdes_http_closefn(struct cookie *c)
19537535Sdes{
19637535Sdes    int r = fclose(c->real_f);
19737535Sdes    free(c);
19837535Sdes    return (r == EOF) ? -1 : 0;
19937535Sdes}
20037535Sdes
20137608Sdes/*
20237608Sdes * Extract content type from cookie
20337608Sdes */
20437535Sdeschar *
20537535SdesfetchContentType(FILE *f)
20637535Sdes{
20737535Sdes    /*
20837535Sdes     * We have no way of making sure this really *is* one of our cookies,
20937535Sdes     * so just check for a null pointer and hope for the best.
21037535Sdes     */
21137535Sdes    return f->_cookie ? (((struct cookie *)f->_cookie)->content_type) : NULL;
21237535Sdes}
21337535Sdes
21437608Sdes/*
21537608Sdes * Base64 encoding
21637608Sdes */
21737608Sdesint
21837608Sdes_http_base64(char *dst, char *src, int l)
21937608Sdes{
22037608Sdes    static const char base64[] =
22137608Sdes	"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
22237608Sdes	"abcdefghijklmnopqrstuvwxyz"
22337608Sdes	"0123456789+/";
22437608Sdes    int t, r = 0;
22537608Sdes
22637608Sdes    while (l >= 3) {
22737608Sdes	t = (src[0] << 16) | (src[1] << 8) | src[2];
22837608Sdes	dst[0] = base64[(t >> 18) & 0x3f];
22937608Sdes	dst[1] = base64[(t >> 12) & 0x3f];
23037608Sdes	dst[2] = base64[(t >> 6) & 0x3f];
23137608Sdes	dst[3] = base64[(t >> 0) & 0x3f];
23237608Sdes	src += 3; l -= 3;
23337608Sdes	dst += 4; r += 4;
23437608Sdes    }
23537608Sdes
23637608Sdes    switch (l) {
23737608Sdes    case 2:
23837608Sdes	t = (src[0] << 16) | (src[1] << 8);
23937608Sdes	dst[0] = base64[(t >> 18) & 0x3f];
24037608Sdes	dst[1] = base64[(t >> 12) & 0x3f];
24137608Sdes	dst[2] = base64[(t >> 6) & 0x3f];
24237608Sdes	dst[3] = '=';
24337608Sdes	dst += 4;
24437608Sdes	r += 4;
24537608Sdes	break;
24637608Sdes    case 1:
24737608Sdes	t = src[0] << 16;
24837608Sdes	dst[0] = base64[(t >> 18) & 0x3f];
24937608Sdes	dst[1] = base64[(t >> 12) & 0x3f];
25037608Sdes	dst[2] = dst[3] = '=';
25137608Sdes	dst += 4;
25237608Sdes	r += 4;
25337608Sdes	break;
25437608Sdes    case 0:
25537608Sdes	break;
25637608Sdes    }
25737608Sdes
25837608Sdes    *dst = 0;
25937608Sdes    return r;
26037608Sdes}
26137608Sdes
26237608Sdes/*
26337608Sdes * Encode username and password
26437608Sdes */
26537608Sdeschar *
26637608Sdes_http_auth(char *usr, char *pwd)
26737608Sdes{
26837608Sdes    int len, lu, lp;
26937608Sdes    char *str, *s;
27037608Sdes
27137608Sdes    lu = strlen(usr);
27237608Sdes    lp = strlen(pwd);
27337608Sdes
27437608Sdes    len = (lu * 4 + 2) / 3	/* user name, round up */
27537608Sdes	+ 1			/* colon */
27637608Sdes	+ (lp * 4 + 2) / 3	/* password, round up */
27737608Sdes	+ 1;			/* null */
27837608Sdes
27937608Sdes    if ((s = str = (char *)malloc(len)) == NULL)
28037608Sdes	return NULL;
28137608Sdes
28237608Sdes    s += _http_base64(s, usr, lu);
28337608Sdes    *s++ = ':';
28437608Sdes    s += _http_base64(s, pwd, lp);
28537608Sdes    *s = 0;
28637608Sdes
28737608Sdes    return str;
28837608Sdes}
28937608Sdes
29037608Sdes/*
29140975Sdes * Retrieve a file by HTTP
29237608Sdes */
29337535SdesFILE *
29440975SdesfetchGetHTTP(struct url *URL, char *flags)
29537535Sdes{
29641863Sdes    int sd = -1, e, i, enc = ENC_NONE, verbose;
29737535Sdes    struct cookie *c;
29841863Sdes    char *ln, *p, *px, *q;
29937535Sdes    FILE *f, *cf;
30037535Sdes    size_t len;
30137535Sdes
30241862Sdes    verbose = (strchr(flags, 'v') != NULL);
30341862Sdes
30437535Sdes    /* allocate cookie */
30537535Sdes    if ((c = calloc(1, sizeof(struct cookie))) == NULL)
30637535Sdes	return NULL;
30737535Sdes
30837535Sdes    /* check port */
30937535Sdes    if (!URL->port)
31037535Sdes	URL->port = 80; /* default HTTP port */
31137535Sdes
31237535Sdes    /* attempt to connect to proxy server */
31341863Sdes    if ((px = getenv("HTTP_PROXY")) != NULL) {
31441863Sdes	char host[MAXHOSTNAMELEN];
31537535Sdes	int port = 3128; /* XXX I think 3128 is default... check? */
31637535Sdes
31737535Sdes	/* measure length */
31837535Sdes	len = strcspn(px, ":");
31937535Sdes
32037535Sdes	/* get port (atoi is a little too tolerant perhaps?) */
32137535Sdes	if (px[len] == ':')
32237535Sdes	    port = atoi(px+len+1);
32337535Sdes
32437535Sdes	/* get host name */
32537535Sdes	if (len >= MAXHOSTNAMELEN)
32637535Sdes	    len = MAXHOSTNAMELEN - 1;
32737535Sdes	strncpy(host, px, len);
32837535Sdes	host[len] = 0;
32937535Sdes
33037535Sdes	/* connect */
33141923Sdes	sd = _fetch_connect(host, port, verbose);
33237535Sdes    }
33337535Sdes
33437535Sdes    /* if no proxy is configured or could be contacted, try direct */
33538394Sdes    if (sd == -1) {
33641923Sdes	if ((sd = _fetch_connect(URL->host, URL->port, verbose)) == -1)
33737535Sdes	    goto ouch;
33837535Sdes    }
33937535Sdes
34037535Sdes    /* reopen as stream */
34137571Sdes    if ((f = fdopen(sd, "r+")) == NULL)
34237535Sdes	goto ouch;
34337535Sdes    c->real_f = f;
34437535Sdes
34537535Sdes    /* send request (proxies require absolute form, so use that) */
34641862Sdes    if (verbose)
34741862Sdes	_fetch_info("requesting http://%s:%d%s",
34841862Sdes		    URL->host, URL->port, URL->doc);
34937608Sdes    _http_cmd(f, "GET http://%s:%d%s HTTP/1.1" ENDL,
35037608Sdes	      URL->host, URL->port, URL->doc);
35137535Sdes
35237535Sdes    /* start sending headers away */
35337535Sdes    if (URL->user[0] || URL->pwd[0]) {
35437608Sdes	char *auth_str = _http_auth(URL->user, URL->pwd);
35537608Sdes	if (!auth_str)
35637608Sdes	    goto fouch;
35737608Sdes	_http_cmd(f, "Authorization: Basic %s" ENDL, auth_str);
35837608Sdes	free(auth_str);
35937535Sdes    }
36037608Sdes    _http_cmd(f, "Host: %s:%d" ENDL, URL->host, URL->port);
36137608Sdes    _http_cmd(f, "User-Agent: %s " _LIBFETCH_VER ENDL, __progname);
36237608Sdes    _http_cmd(f, "Connection: close" ENDL ENDL);
36337535Sdes
36437535Sdes    /* get response */
36537535Sdes    if ((ln = fgetln(f, &len)) == NULL)
36637535Sdes	goto fouch;
36737535Sdes    DEBUG(fprintf(stderr, "response: [\033[1m%*.*s\033[m]\n",
36837535Sdes		  (int)len-2, (int)len-2, ln));
36937535Sdes
37037535Sdes    /* we can't use strchr() and friends since ln isn't NUL-terminated */
37137535Sdes    p = ln;
37237535Sdes    while ((p < ln + len) && !isspace(*p))
37337535Sdes	p++;
37437535Sdes    while ((p < ln + len) && !isdigit(*p))
37537535Sdes	p++;
37637535Sdes    if (!isdigit(*p))
37737535Sdes	goto fouch;
37841863Sdes    e = atoi(p);
37941863Sdes    DEBUG(fprintf(stderr, "code:     [\033[1m%d\033[m]\n", e));
38037535Sdes
38137535Sdes    /* add code to handle redirects later */
38241863Sdes    if (e != 200) {
38341863Sdes	_http_seterr(e);
38437535Sdes	goto fouch;
38537571Sdes    }
38637535Sdes
38737535Sdes    /* browse through header */
38837535Sdes    while (1) {
38937535Sdes	if ((ln = fgetln(f, &len)) == NULL)
39037535Sdes	    goto fouch;
39137535Sdes	if ((ln[0] == '\r') || (ln[0] == '\n'))
39237535Sdes	    break;
39337535Sdes	DEBUG(fprintf(stderr, "header:   [\033[1m%*.*s\033[m]\n",
39437535Sdes		      (int)len-2, (int)len-2, ln));
39537535Sdes#define XFERENC "Transfer-Encoding:"
39637535Sdes	if (strncasecmp(ln, XFERENC, sizeof(XFERENC)-1) == 0) {
39737535Sdes	    p = ln + sizeof(XFERENC) - 1;
39837535Sdes	    while ((p < ln + len) && isspace(*p))
39937535Sdes		p++;
40037535Sdes	    for (q = p; (q < ln + len) && !isspace(*q); q++)
40137535Sdes		/* VOID */ ;
40237535Sdes	    *q = 0;
40337535Sdes	    if (strcasecmp(p, "chunked") == 0)
40437535Sdes		enc = ENC_CHUNKED;
40537535Sdes	    DEBUG(fprintf(stderr, "xferenc:  [\033[1m%s\033[m]\n", p));
40637535Sdes#undef XFERENC
40737535Sdes#define CONTTYPE "Content-Type:"
40837535Sdes	} else if (strncasecmp(ln, CONTTYPE, sizeof(CONTTYPE)-1) == 0) {
40937535Sdes	    p = ln + sizeof(CONTTYPE) - 1;
41037535Sdes	    while ((p < ln + len) && isspace(*p))
41137535Sdes		p++;
41237535Sdes	    for (i = 0; p < ln + len; p++)
41337535Sdes		if (i < HTTPCTYPELEN)
41437535Sdes		    c->content_type[i++] = *p;
41537535Sdes	    do c->content_type[i--] = 0; while (isspace(c->content_type[i]));
41637535Sdes	    DEBUG(fprintf(stderr, "conttype: [\033[1m%s\033[m]\n",
41737535Sdes			  c->content_type));
41837535Sdes#undef CONTTYPE
41937535Sdes	}
42037535Sdes    }
42137535Sdes
42237535Sdes    /* only body remains */
42337535Sdes    c->encoding = enc;
42437535Sdes    cf = funopen(c,
42537535Sdes		 (int (*)(void *, char *, int))_http_readfn,
42637535Sdes		 (int (*)(void *, const char *, int))_http_writefn,
42737535Sdes		 (fpos_t (*)(void *, fpos_t, int))NULL,
42837535Sdes		 (int (*)(void *))_http_closefn);
42937535Sdes    if (cf == NULL)
43037535Sdes	goto fouch;
43137535Sdes    return cf;
43237535Sdes
43337535Sdesouch:
43437571Sdes    if (sd >= 0)
43537571Sdes	close(sd);
43637535Sdes    free(c);
43741862Sdes    _http_seterr(999); /* XXX do this properly RSN */
43837535Sdes    return NULL;
43937535Sdesfouch:
44037535Sdes    fclose(f);
44137535Sdes    free(c);
44241862Sdes    _http_seterr(999); /* XXX do this properly RSN */
44337535Sdes    return NULL;
44437535Sdes}
44537535Sdes
44637535SdesFILE *
44740975SdesfetchPutHTTP(struct url *URL, char *flags)
44837535Sdes{
44937535Sdes    warnx("fetchPutHTTP(): not implemented");
45037535Sdes    return NULL;
45137535Sdes}
45240975Sdes
45340975Sdes/*
45440975Sdes * Get an HTTP document's metadata
45540975Sdes */
45640975Sdesint
45740975SdesfetchStatHTTP(struct url *url, struct url_stat *us, char *flags)
45840975Sdes{
45940975Sdes    warnx("fetchStatHTTP(): not implemented");
46040975Sdes    return -1;
46140975Sdes}
46241989Sdes
46341989Sdes/*
46441989Sdes * List a directory
46541989Sdes */
46641989Sdesstruct url_ent *
46741989SdesfetchListHTTP(struct url *url, char *flags)
46841989Sdes{
46941989Sdes    warnx("fetchListHTTP(): not implemented");
47041989Sdes    return NULL;
47141989Sdes}
472