http.c revision 41862
137535Sdes/*-
237535Sdes * Copyright (c) 1998 Dag-Erling Co�dan Sm�rgrav
337535Sdes * All rights reserved.
437535Sdes *
537535Sdes * Redistribution and use in source and binary forms, with or without
637535Sdes * modification, are permitted provided that the following conditions
737535Sdes * are met:
837535Sdes * 1. Redistributions of source code must retain the above copyright
937535Sdes *    notice, this list of conditions and the following disclaimer
1037535Sdes *    in this position and unchanged.
1137535Sdes * 2. Redistributions in binary form must reproduce the above copyright
1237535Sdes *    notice, this list of conditions and the following disclaimer in the
1337535Sdes *    documentation and/or other materials provided with the distribution.
1437535Sdes * 3. The name of the author may not be used to endorse or promote products
1537535Sdes *    derived from this software without specific prior written permission
1637535Sdes *
1737535Sdes * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
1837535Sdes * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
1937535Sdes * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
2037535Sdes * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
2137535Sdes * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
2237535Sdes * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
2337535Sdes * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
2437535Sdes * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
2537535Sdes * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
2637535Sdes * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
2737535Sdes *
2841862Sdes *	$Id: http.c,v 1.7 1998/11/06 22:14:08 des Exp $
2937535Sdes */
3037535Sdes
3137608Sdes/*
3237608Sdes * The base64 code in this file is based on code from MIT fetch, which
3337608Sdes * has the following copyright and license:
3437608Sdes *
3537608Sdes *-
3637608Sdes * Copyright 1997 Massachusetts Institute of Technology
3737608Sdes *
3837608Sdes * Permission to use, copy, modify, and distribute this software and
3937608Sdes * its documentation for any purpose and without fee is hereby
4037608Sdes * granted, provided that both the above copyright notice and this
4137608Sdes * permission notice appear in all copies, that both the above
4237608Sdes * copyright notice and this permission notice appear in all
4337608Sdes * supporting documentation, and that the name of M.I.T. not be used
4437608Sdes * in advertising or publicity pertaining to distribution of the
4537608Sdes * software without specific, written prior permission.  M.I.T. makes
4637608Sdes * no representations about the suitability of this software for any
4737608Sdes * purpose.  It is provided "as is" without express or implied
4837608Sdes * warranty.
4937608Sdes *
5037608Sdes * THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''.  M.I.T. DISCLAIMS
5137608Sdes * ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE,
5237608Sdes * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
5337608Sdes * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT
5437608Sdes * SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
5537608Sdes * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
5637608Sdes * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
5737608Sdes * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
5837608Sdes * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
5937608Sdes * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
6037608Sdes * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
6137608Sdes * SUCH DAMAGE. */
6237608Sdes
6337535Sdes#include <sys/param.h>
6437535Sdes#include <sys/errno.h>
6537535Sdes#include <sys/socket.h>
6637535Sdes#include <sys/types.h>
6737535Sdes
6837535Sdes#include <netinet/in.h>
6937535Sdes
7037535Sdes#include <err.h>
7137535Sdes#include <ctype.h>
7237535Sdes#include <netdb.h>
7337608Sdes#include <stdarg.h>
7437535Sdes#include <stdio.h>
7537535Sdes#include <stdlib.h>
7637535Sdes#include <string.h>
7737535Sdes#include <unistd.h>
7837535Sdes
7937535Sdes#include "fetch.h"
8040939Sdes#include "common.h"
8141862Sdes#include "httperr.h"
8237535Sdes
8337535Sdes#ifndef NDEBUG
8437535Sdes#define DEBUG(x) do x; while (0)
8537535Sdes#else
8637535Sdes#define DEBUG(x) do { } while (0)
8737535Sdes#endif
8837535Sdes
8937535Sdesextern char *__progname;
9037535Sdes
9137535Sdes#define ENDL "\r\n"
9237535Sdes
9337535Sdesstruct cookie
9437535Sdes{
9537535Sdes    FILE *real_f;
9637535Sdes#define ENC_NONE 0
9737535Sdes#define ENC_CHUNKED 1
9837535Sdes    int encoding;			/* 1 = chunked, 0 = none */
9937535Sdes#define HTTPCTYPELEN 59
10037535Sdes    char content_type[HTTPCTYPELEN+1];
10137535Sdes    char *buf;
10237535Sdes    int b_cur, eof;
10337535Sdes    unsigned b_len, chunksize;
10437535Sdes};
10537535Sdes
10637608Sdes/*
10737608Sdes * Send a formatted line; optionally echo to terminal
10837608Sdes */
10937608Sdesstatic int
11037608Sdes_http_cmd(FILE *f, char *fmt, ...)
11137608Sdes{
11237608Sdes    va_list ap;
11337608Sdes
11437608Sdes    va_start(ap, fmt);
11537608Sdes    vfprintf(f, fmt, ap);
11637608Sdes#ifndef NDEBUG
11737608Sdes    fprintf(stderr, "\033[1m>>> ");
11837608Sdes    vfprintf(stderr, fmt, ap);
11937608Sdes    fprintf(stderr, "\033[m");
12037608Sdes#endif
12137608Sdes    va_end(ap);
12237608Sdes
12337608Sdes    return 0; /* XXX */
12437608Sdes}
12537608Sdes
12637608Sdes/*
12737608Sdes * Fill the input buffer, do chunk decoding on the fly
12837608Sdes */
12937535Sdesstatic char *
13037535Sdes_http_fillbuf(struct cookie *c)
13137535Sdes{
13237535Sdes    char *ln;
13337535Sdes    unsigned int len;
13437535Sdes
13537535Sdes    if (c->eof)
13637535Sdes	return NULL;
13737535Sdes
13837535Sdes    if (c->encoding == ENC_NONE) {
13937535Sdes	c->buf = fgetln(c->real_f, &(c->b_len));
14037535Sdes	c->b_cur = 0;
14137535Sdes    } else if (c->encoding == ENC_CHUNKED) {
14237535Sdes	if (c->chunksize == 0) {
14337535Sdes	    ln = fgetln(c->real_f, &len);
14437535Sdes	    DEBUG(fprintf(stderr, "\033[1m_http_fillbuf(): new chunk: "
14537535Sdes			  "%*.*s\033[m\n", (int)len-2, (int)len-2, ln));
14637535Sdes	    sscanf(ln, "%x", &(c->chunksize));
14737535Sdes	    if (!c->chunksize) {
14837535Sdes		DEBUG(fprintf(stderr, "\033[1m_http_fillbuf(): "
14937535Sdes			      "end of last chunk\033[m\n"));
15037535Sdes		c->eof = 1;
15137535Sdes		return NULL;
15237535Sdes	    }
15337535Sdes	    DEBUG(fprintf(stderr, "\033[1m_http_fillbuf(): "
15437535Sdes			  "new chunk: %X\033[m\n", c->chunksize));
15537535Sdes	}
15637535Sdes	c->buf = fgetln(c->real_f, &(c->b_len));
15737535Sdes	if (c->b_len > c->chunksize)
15837535Sdes	    c->b_len = c->chunksize;
15937535Sdes	c->chunksize -= c->b_len;
16037535Sdes	c->b_cur = 0;
16137535Sdes    }
16237535Sdes    else return NULL; /* unknown encoding */
16337535Sdes    return c->buf;
16437535Sdes}
16537535Sdes
16637608Sdes/*
16737608Sdes * Read function
16837608Sdes */
16937535Sdesstatic int
17037535Sdes_http_readfn(struct cookie *c, char *buf, int len)
17137535Sdes{
17237535Sdes    int l, pos = 0;
17337535Sdes    while (len) {
17437535Sdes	/* empty buffer */
17537535Sdes	if (!c->buf || (c->b_cur == c->b_len))
17637535Sdes	    if (!_http_fillbuf(c))
17737535Sdes		break;
17837535Sdes
17937535Sdes	l = c->b_len - c->b_cur;
18037535Sdes	if (len < l) l = len;
18137535Sdes	memcpy(buf + pos, c->buf + c->b_cur, l);
18237535Sdes	c->b_cur += l;
18337535Sdes	pos += l;
18437535Sdes	len -= l;
18537535Sdes    }
18637535Sdes
18737535Sdes    if (ferror(c->real_f))
18837535Sdes	return -1;
18937535Sdes    else return pos;
19037535Sdes}
19137535Sdes
19237608Sdes/*
19337608Sdes * Write function
19437608Sdes */
19537535Sdesstatic int
19637535Sdes_http_writefn(struct cookie *c, const char *buf, int len)
19737535Sdes{
19837535Sdes    size_t r = fwrite(buf, 1, (size_t)len, c->real_f);
19937535Sdes    return r ? r : -1;
20037535Sdes}
20137535Sdes
20237608Sdes/*
20337608Sdes * Close function
20437608Sdes */
20537535Sdesstatic int
20637535Sdes_http_closefn(struct cookie *c)
20737535Sdes{
20837535Sdes    int r = fclose(c->real_f);
20937535Sdes    free(c);
21037535Sdes    return (r == EOF) ? -1 : 0;
21137535Sdes}
21237535Sdes
21337608Sdes/*
21437608Sdes * Extract content type from cookie
21537608Sdes */
21637535Sdeschar *
21737535SdesfetchContentType(FILE *f)
21837535Sdes{
21937535Sdes    /*
22037535Sdes     * We have no way of making sure this really *is* one of our cookies,
22137535Sdes     * so just check for a null pointer and hope for the best.
22237535Sdes     */
22337535Sdes    return f->_cookie ? (((struct cookie *)f->_cookie)->content_type) : NULL;
22437535Sdes}
22537535Sdes
22637608Sdes/*
22737608Sdes * Base64 encoding
22837608Sdes */
22937608Sdesint
23037608Sdes_http_base64(char *dst, char *src, int l)
23137608Sdes{
23237608Sdes    static const char base64[] =
23337608Sdes	"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
23437608Sdes	"abcdefghijklmnopqrstuvwxyz"
23537608Sdes	"0123456789+/";
23637608Sdes    int t, r = 0;
23737608Sdes
23837608Sdes    while (l >= 3) {
23937608Sdes	t = (src[0] << 16) | (src[1] << 8) | src[2];
24037608Sdes	dst[0] = base64[(t >> 18) & 0x3f];
24137608Sdes	dst[1] = base64[(t >> 12) & 0x3f];
24237608Sdes	dst[2] = base64[(t >> 6) & 0x3f];
24337608Sdes	dst[3] = base64[(t >> 0) & 0x3f];
24437608Sdes	src += 3; l -= 3;
24537608Sdes	dst += 4; r += 4;
24637608Sdes    }
24737608Sdes
24837608Sdes    switch (l) {
24937608Sdes    case 2:
25037608Sdes	t = (src[0] << 16) | (src[1] << 8);
25137608Sdes	dst[0] = base64[(t >> 18) & 0x3f];
25237608Sdes	dst[1] = base64[(t >> 12) & 0x3f];
25337608Sdes	dst[2] = base64[(t >> 6) & 0x3f];
25437608Sdes	dst[3] = '=';
25537608Sdes	dst += 4;
25637608Sdes	r += 4;
25737608Sdes	break;
25837608Sdes    case 1:
25937608Sdes	t = src[0] << 16;
26037608Sdes	dst[0] = base64[(t >> 18) & 0x3f];
26137608Sdes	dst[1] = base64[(t >> 12) & 0x3f];
26237608Sdes	dst[2] = dst[3] = '=';
26337608Sdes	dst += 4;
26437608Sdes	r += 4;
26537608Sdes	break;
26637608Sdes    case 0:
26737608Sdes	break;
26837608Sdes    }
26937608Sdes
27037608Sdes    *dst = 0;
27137608Sdes    return r;
27237608Sdes}
27337608Sdes
27437608Sdes/*
27537608Sdes * Encode username and password
27637608Sdes */
27737608Sdeschar *
27837608Sdes_http_auth(char *usr, char *pwd)
27937608Sdes{
28037608Sdes    int len, lu, lp;
28137608Sdes    char *str, *s;
28237608Sdes
28337608Sdes    lu = strlen(usr);
28437608Sdes    lp = strlen(pwd);
28537608Sdes
28637608Sdes    len = (lu * 4 + 2) / 3	/* user name, round up */
28737608Sdes	+ 1			/* colon */
28837608Sdes	+ (lp * 4 + 2) / 3	/* password, round up */
28937608Sdes	+ 1;			/* null */
29037608Sdes
29137608Sdes    if ((s = str = (char *)malloc(len)) == NULL)
29237608Sdes	return NULL;
29337608Sdes
29437608Sdes    s += _http_base64(s, usr, lu);
29537608Sdes    *s++ = ':';
29637608Sdes    s += _http_base64(s, pwd, lp);
29737608Sdes    *s = 0;
29837608Sdes
29937608Sdes    return str;
30037608Sdes}
30137608Sdes
30237608Sdes/*
30340975Sdes * Retrieve a file by HTTP
30437608Sdes */
30537535SdesFILE *
30640975SdesfetchGetHTTP(struct url *URL, char *flags)
30737535Sdes{
30841862Sdes    int sd = -1, err, i, enc = ENC_NONE, verbose;
30937535Sdes    struct cookie *c;
31037535Sdes    char *ln, *p, *q;
31137535Sdes    FILE *f, *cf;
31237535Sdes    size_t len;
31337535Sdes
31441862Sdes    verbose = (strchr(flags, 'v') != NULL);
31541862Sdes
31637535Sdes    /* allocate cookie */
31737535Sdes    if ((c = calloc(1, sizeof(struct cookie))) == NULL)
31837535Sdes	return NULL;
31937535Sdes
32037535Sdes    /* check port */
32137535Sdes    if (!URL->port)
32237535Sdes	URL->port = 80; /* default HTTP port */
32337535Sdes
32437535Sdes    /* attempt to connect to proxy server */
32537535Sdes    if (getenv("HTTP_PROXY")) {
32637535Sdes	char *px, host[MAXHOSTNAMELEN];
32737535Sdes	int port = 3128; /* XXX I think 3128 is default... check? */
32837535Sdes	size_t len;
32937535Sdes
33037535Sdes	/* measure length */
33137535Sdes	px = getenv("HTTP_PROXY");
33237535Sdes	len = strcspn(px, ":");
33337535Sdes
33437535Sdes	/* get port (atoi is a little too tolerant perhaps?) */
33537535Sdes	if (px[len] == ':')
33637535Sdes	    port = atoi(px+len+1);
33737535Sdes
33837535Sdes	/* get host name */
33937535Sdes	if (len >= MAXHOSTNAMELEN)
34037535Sdes	    len = MAXHOSTNAMELEN - 1;
34137535Sdes	strncpy(host, px, len);
34237535Sdes	host[len] = 0;
34337535Sdes
34437535Sdes	/* connect */
34541862Sdes	sd = fetchConnect(host, port, verbose);
34637535Sdes    }
34737535Sdes
34837535Sdes    /* if no proxy is configured or could be contacted, try direct */
34938394Sdes    if (sd == -1) {
35041862Sdes	if ((sd = fetchConnect(URL->host, URL->port, verbose)) == -1)
35137535Sdes	    goto ouch;
35237535Sdes    }
35337535Sdes
35437535Sdes    /* reopen as stream */
35537571Sdes    if ((f = fdopen(sd, "r+")) == NULL)
35637535Sdes	goto ouch;
35737535Sdes    c->real_f = f;
35837535Sdes
35937535Sdes    /* send request (proxies require absolute form, so use that) */
36041862Sdes    if (verbose)
36141862Sdes	_fetch_info("requesting http://%s:%d%s",
36241862Sdes		    URL->host, URL->port, URL->doc);
36337608Sdes    _http_cmd(f, "GET http://%s:%d%s HTTP/1.1" ENDL,
36437608Sdes	      URL->host, URL->port, URL->doc);
36537535Sdes
36637535Sdes    /* start sending headers away */
36737535Sdes    if (URL->user[0] || URL->pwd[0]) {
36837608Sdes	char *auth_str = _http_auth(URL->user, URL->pwd);
36937608Sdes	if (!auth_str)
37037608Sdes	    goto fouch;
37137608Sdes	_http_cmd(f, "Authorization: Basic %s" ENDL, auth_str);
37237608Sdes	free(auth_str);
37337535Sdes    }
37437608Sdes    _http_cmd(f, "Host: %s:%d" ENDL, URL->host, URL->port);
37537608Sdes    _http_cmd(f, "User-Agent: %s " _LIBFETCH_VER ENDL, __progname);
37637608Sdes    _http_cmd(f, "Connection: close" ENDL ENDL);
37737535Sdes
37837535Sdes    /* get response */
37937535Sdes    if ((ln = fgetln(f, &len)) == NULL)
38037535Sdes	goto fouch;
38137535Sdes    DEBUG(fprintf(stderr, "response: [\033[1m%*.*s\033[m]\n",
38237535Sdes		  (int)len-2, (int)len-2, ln));
38337535Sdes
38437535Sdes    /* we can't use strchr() and friends since ln isn't NUL-terminated */
38537535Sdes    p = ln;
38637535Sdes    while ((p < ln + len) && !isspace(*p))
38737535Sdes	p++;
38837535Sdes    while ((p < ln + len) && !isdigit(*p))
38937535Sdes	p++;
39037535Sdes    if (!isdigit(*p))
39137535Sdes	goto fouch;
39237535Sdes    err = atoi(p);
39337535Sdes    DEBUG(fprintf(stderr, "code:     [\033[1m%d\033[m]\n", err));
39437535Sdes
39537535Sdes    /* add code to handle redirects later */
39637571Sdes    if (err != 200) {
39740975Sdes	_http_seterr(err);
39837535Sdes	goto fouch;
39937571Sdes    }
40037535Sdes
40137535Sdes    /* browse through header */
40237535Sdes    while (1) {
40337535Sdes	if ((ln = fgetln(f, &len)) == NULL)
40437535Sdes	    goto fouch;
40537535Sdes	if ((ln[0] == '\r') || (ln[0] == '\n'))
40637535Sdes	    break;
40737535Sdes	DEBUG(fprintf(stderr, "header:   [\033[1m%*.*s\033[m]\n",
40837535Sdes		      (int)len-2, (int)len-2, ln));
40937535Sdes#define XFERENC "Transfer-Encoding:"
41037535Sdes	if (strncasecmp(ln, XFERENC, sizeof(XFERENC)-1) == 0) {
41137535Sdes	    p = ln + sizeof(XFERENC) - 1;
41237535Sdes	    while ((p < ln + len) && isspace(*p))
41337535Sdes		p++;
41437535Sdes	    for (q = p; (q < ln + len) && !isspace(*q); q++)
41537535Sdes		/* VOID */ ;
41637535Sdes	    *q = 0;
41737535Sdes	    if (strcasecmp(p, "chunked") == 0)
41837535Sdes		enc = ENC_CHUNKED;
41937535Sdes	    DEBUG(fprintf(stderr, "xferenc:  [\033[1m%s\033[m]\n", p));
42037535Sdes#undef XFERENC
42137535Sdes#define CONTTYPE "Content-Type:"
42237535Sdes	} else if (strncasecmp(ln, CONTTYPE, sizeof(CONTTYPE)-1) == 0) {
42337535Sdes	    p = ln + sizeof(CONTTYPE) - 1;
42437535Sdes	    while ((p < ln + len) && isspace(*p))
42537535Sdes		p++;
42637535Sdes	    for (i = 0; p < ln + len; p++)
42737535Sdes		if (i < HTTPCTYPELEN)
42837535Sdes		    c->content_type[i++] = *p;
42937535Sdes	    do c->content_type[i--] = 0; while (isspace(c->content_type[i]));
43037535Sdes	    DEBUG(fprintf(stderr, "conttype: [\033[1m%s\033[m]\n",
43137535Sdes			  c->content_type));
43237535Sdes#undef CONTTYPE
43337535Sdes	}
43437535Sdes    }
43537535Sdes
43637535Sdes    /* only body remains */
43737535Sdes    c->encoding = enc;
43837535Sdes    cf = funopen(c,
43937535Sdes		 (int (*)(void *, char *, int))_http_readfn,
44037535Sdes		 (int (*)(void *, const char *, int))_http_writefn,
44137535Sdes		 (fpos_t (*)(void *, fpos_t, int))NULL,
44237535Sdes		 (int (*)(void *))_http_closefn);
44337535Sdes    if (cf == NULL)
44437535Sdes	goto fouch;
44537535Sdes    return cf;
44637535Sdes
44737535Sdesouch:
44837571Sdes    if (sd >= 0)
44937571Sdes	close(sd);
45037535Sdes    free(c);
45141862Sdes    _http_seterr(999); /* XXX do this properly RSN */
45237535Sdes    return NULL;
45337535Sdesfouch:
45437535Sdes    fclose(f);
45537535Sdes    free(c);
45641862Sdes    _http_seterr(999); /* XXX do this properly RSN */
45737535Sdes    return NULL;
45837535Sdes}
45937535Sdes
46037535SdesFILE *
46140975SdesfetchPutHTTP(struct url *URL, char *flags)
46237535Sdes{
46337535Sdes    warnx("fetchPutHTTP(): not implemented");
46437535Sdes    return NULL;
46537535Sdes}
46640975Sdes
46740975Sdes/*
46840975Sdes * Get an HTTP document's metadata
46940975Sdes */
47040975Sdesint
47140975SdesfetchStatHTTP(struct url *url, struct url_stat *us, char *flags)
47240975Sdes{
47340975Sdes    warnx("fetchStatHTTP(): not implemented");
47440975Sdes    return -1;
47540975Sdes}
476