http.c revision 40975
137535Sdes/*-
237535Sdes * Copyright (c) 1998 Dag-Erling Co�dan Sm�rgrav
337535Sdes * All rights reserved.
437535Sdes *
537535Sdes * Redistribution and use in source and binary forms, with or without
637535Sdes * modification, are permitted provided that the following conditions
737535Sdes * are met:
837535Sdes * 1. Redistributions of source code must retain the above copyright
937535Sdes *    notice, this list of conditions and the following disclaimer
1037535Sdes *    in this position and unchanged.
1137535Sdes * 2. Redistributions in binary form must reproduce the above copyright
1237535Sdes *    notice, this list of conditions and the following disclaimer in the
1337535Sdes *    documentation and/or other materials provided with the distribution.
1437535Sdes * 3. The name of the author may not be used to endorse or promote products
1537535Sdes *    derived from this software without specific prior written permission
1637535Sdes *
1737535Sdes * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
1837535Sdes * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
1937535Sdes * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
2037535Sdes * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
2137535Sdes * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
2237535Sdes * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
2337535Sdes * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
2437535Sdes * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
2537535Sdes * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
2637535Sdes * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
2737535Sdes *
2840975Sdes *	$Id: http.c,v 1.6 1998/11/05 19:48:17 des Exp $
2937535Sdes */
3037535Sdes
3137608Sdes/*
3237608Sdes * The base64 code in this file is based on code from MIT fetch, which
3337608Sdes * has the following copyright and license:
3437608Sdes *
3537608Sdes *-
3637608Sdes * Copyright 1997 Massachusetts Institute of Technology
3737608Sdes *
3837608Sdes * Permission to use, copy, modify, and distribute this software and
3937608Sdes * its documentation for any purpose and without fee is hereby
4037608Sdes * granted, provided that both the above copyright notice and this
4137608Sdes * permission notice appear in all copies, that both the above
4237608Sdes * copyright notice and this permission notice appear in all
4337608Sdes * supporting documentation, and that the name of M.I.T. not be used
4437608Sdes * in advertising or publicity pertaining to distribution of the
4537608Sdes * software without specific, written prior permission.  M.I.T. makes
4637608Sdes * no representations about the suitability of this software for any
4737608Sdes * purpose.  It is provided "as is" without express or implied
4837608Sdes * warranty.
4937608Sdes *
5037608Sdes * THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''.  M.I.T. DISCLAIMS
5137608Sdes * ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE,
5237608Sdes * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
5337608Sdes * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT
5437608Sdes * SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
5537608Sdes * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
5637608Sdes * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
5737608Sdes * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
5837608Sdes * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
5937608Sdes * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
6037608Sdes * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
6137608Sdes * SUCH DAMAGE. */
6237608Sdes
6337535Sdes#include <sys/param.h>
6437535Sdes#include <sys/errno.h>
6537535Sdes#include <sys/socket.h>
6637535Sdes#include <sys/types.h>
6737535Sdes
6837535Sdes#include <netinet/in.h>
6937535Sdes
7037535Sdes#include <err.h>
7137535Sdes#include <ctype.h>
7237535Sdes#include <netdb.h>
7337608Sdes#include <stdarg.h>
7437535Sdes#include <stdio.h>
7537535Sdes#include <stdlib.h>
7637535Sdes#include <string.h>
7737535Sdes#include <unistd.h>
7837535Sdes
7937535Sdes#include "fetch.h"
8040939Sdes#include "common.h"
8140975Sdes#include "httperr.inc"
8237535Sdes
8337535Sdes#ifndef NDEBUG
8437535Sdes#define DEBUG(x) do x; while (0)
8537535Sdes#else
8637535Sdes#define DEBUG(x) do { } while (0)
8737535Sdes#endif
8837535Sdes
8937535Sdesextern char *__progname;
9037535Sdes
9137535Sdes#define ENDL "\r\n"
9237535Sdes
9337535Sdesstruct cookie
9437535Sdes{
9537535Sdes    FILE *real_f;
9637535Sdes#define ENC_NONE 0
9737535Sdes#define ENC_CHUNKED 1
9837535Sdes    int encoding;			/* 1 = chunked, 0 = none */
9937535Sdes#define HTTPCTYPELEN 59
10037535Sdes    char content_type[HTTPCTYPELEN+1];
10137535Sdes    char *buf;
10237535Sdes    int b_cur, eof;
10337535Sdes    unsigned b_len, chunksize;
10437535Sdes};
10537535Sdes
10637608Sdes/*
10737608Sdes * Send a formatted line; optionally echo to terminal
10837608Sdes */
10937608Sdesstatic int
11037608Sdes_http_cmd(FILE *f, char *fmt, ...)
11137608Sdes{
11237608Sdes    va_list ap;
11337608Sdes
11437608Sdes    va_start(ap, fmt);
11537608Sdes    vfprintf(f, fmt, ap);
11637608Sdes#ifndef NDEBUG
11737608Sdes    fprintf(stderr, "\033[1m>>> ");
11837608Sdes    vfprintf(stderr, fmt, ap);
11937608Sdes    fprintf(stderr, "\033[m");
12037608Sdes#endif
12137608Sdes    va_end(ap);
12237608Sdes
12337608Sdes    return 0; /* XXX */
12437608Sdes}
12537608Sdes
12637608Sdes/*
12737608Sdes * Fill the input buffer, do chunk decoding on the fly
12837608Sdes */
12937535Sdesstatic char *
13037535Sdes_http_fillbuf(struct cookie *c)
13137535Sdes{
13237535Sdes    char *ln;
13337535Sdes    unsigned int len;
13437535Sdes
13537535Sdes    if (c->eof)
13637535Sdes	return NULL;
13737535Sdes
13837535Sdes    if (c->encoding == ENC_NONE) {
13937535Sdes	c->buf = fgetln(c->real_f, &(c->b_len));
14037535Sdes	c->b_cur = 0;
14137535Sdes    } else if (c->encoding == ENC_CHUNKED) {
14237535Sdes	if (c->chunksize == 0) {
14337535Sdes	    ln = fgetln(c->real_f, &len);
14437535Sdes	    DEBUG(fprintf(stderr, "\033[1m_http_fillbuf(): new chunk: "
14537535Sdes			  "%*.*s\033[m\n", (int)len-2, (int)len-2, ln));
14637535Sdes	    sscanf(ln, "%x", &(c->chunksize));
14737535Sdes	    if (!c->chunksize) {
14837535Sdes		DEBUG(fprintf(stderr, "\033[1m_http_fillbuf(): "
14937535Sdes			      "end of last chunk\033[m\n"));
15037535Sdes		c->eof = 1;
15137535Sdes		return NULL;
15237535Sdes	    }
15337535Sdes	    DEBUG(fprintf(stderr, "\033[1m_http_fillbuf(): "
15437535Sdes			  "new chunk: %X\033[m\n", c->chunksize));
15537535Sdes	}
15637535Sdes	c->buf = fgetln(c->real_f, &(c->b_len));
15737535Sdes	if (c->b_len > c->chunksize)
15837535Sdes	    c->b_len = c->chunksize;
15937535Sdes	c->chunksize -= c->b_len;
16037535Sdes	c->b_cur = 0;
16137535Sdes    }
16237535Sdes    else return NULL; /* unknown encoding */
16337535Sdes    return c->buf;
16437535Sdes}
16537535Sdes
16637608Sdes/*
16737608Sdes * Read function
16837608Sdes */
16937535Sdesstatic int
17037535Sdes_http_readfn(struct cookie *c, char *buf, int len)
17137535Sdes{
17237535Sdes    int l, pos = 0;
17337535Sdes    while (len) {
17437535Sdes	/* empty buffer */
17537535Sdes	if (!c->buf || (c->b_cur == c->b_len))
17637535Sdes	    if (!_http_fillbuf(c))
17737535Sdes		break;
17837535Sdes
17937535Sdes	l = c->b_len - c->b_cur;
18037535Sdes	if (len < l) l = len;
18137535Sdes	memcpy(buf + pos, c->buf + c->b_cur, l);
18237535Sdes	c->b_cur += l;
18337535Sdes	pos += l;
18437535Sdes	len -= l;
18537535Sdes    }
18637535Sdes
18737535Sdes    if (ferror(c->real_f))
18837535Sdes	return -1;
18937535Sdes    else return pos;
19037535Sdes}
19137535Sdes
19237608Sdes/*
19337608Sdes * Write function
19437608Sdes */
19537535Sdesstatic int
19637535Sdes_http_writefn(struct cookie *c, const char *buf, int len)
19737535Sdes{
19837535Sdes    size_t r = fwrite(buf, 1, (size_t)len, c->real_f);
19937535Sdes    return r ? r : -1;
20037535Sdes}
20137535Sdes
20237608Sdes/*
20337608Sdes * Close function
20437608Sdes */
20537535Sdesstatic int
20637535Sdes_http_closefn(struct cookie *c)
20737535Sdes{
20837535Sdes    int r = fclose(c->real_f);
20937535Sdes    free(c);
21037535Sdes    return (r == EOF) ? -1 : 0;
21137535Sdes}
21237535Sdes
21337608Sdes/*
21437608Sdes * Extract content type from cookie
21537608Sdes */
21637535Sdeschar *
21737535SdesfetchContentType(FILE *f)
21837535Sdes{
21937535Sdes    /*
22037535Sdes     * We have no way of making sure this really *is* one of our cookies,
22137535Sdes     * so just check for a null pointer and hope for the best.
22237535Sdes     */
22337535Sdes    return f->_cookie ? (((struct cookie *)f->_cookie)->content_type) : NULL;
22437535Sdes}
22537535Sdes
22637608Sdes/*
22737608Sdes * Base64 encoding
22837608Sdes */
22937608Sdesint
23037608Sdes_http_base64(char *dst, char *src, int l)
23137608Sdes{
23237608Sdes    static const char base64[] =
23337608Sdes	"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
23437608Sdes	"abcdefghijklmnopqrstuvwxyz"
23537608Sdes	"0123456789+/";
23637608Sdes    int t, r = 0;
23737608Sdes
23837608Sdes    while (l >= 3) {
23937608Sdes	t = (src[0] << 16) | (src[1] << 8) | src[2];
24037608Sdes	dst[0] = base64[(t >> 18) & 0x3f];
24137608Sdes	dst[1] = base64[(t >> 12) & 0x3f];
24237608Sdes	dst[2] = base64[(t >> 6) & 0x3f];
24337608Sdes	dst[3] = base64[(t >> 0) & 0x3f];
24437608Sdes	src += 3; l -= 3;
24537608Sdes	dst += 4; r += 4;
24637608Sdes    }
24737608Sdes
24837608Sdes    switch (l) {
24937608Sdes    case 2:
25037608Sdes	t = (src[0] << 16) | (src[1] << 8);
25137608Sdes	dst[0] = base64[(t >> 18) & 0x3f];
25237608Sdes	dst[1] = base64[(t >> 12) & 0x3f];
25337608Sdes	dst[2] = base64[(t >> 6) & 0x3f];
25437608Sdes	dst[3] = '=';
25537608Sdes	dst += 4;
25637608Sdes	r += 4;
25737608Sdes	break;
25837608Sdes    case 1:
25937608Sdes	t = src[0] << 16;
26037608Sdes	dst[0] = base64[(t >> 18) & 0x3f];
26137608Sdes	dst[1] = base64[(t >> 12) & 0x3f];
26237608Sdes	dst[2] = dst[3] = '=';
26337608Sdes	dst += 4;
26437608Sdes	r += 4;
26537608Sdes	break;
26637608Sdes    case 0:
26737608Sdes	break;
26837608Sdes    }
26937608Sdes
27037608Sdes    *dst = 0;
27137608Sdes    return r;
27237608Sdes}
27337608Sdes
27437608Sdes/*
27537608Sdes * Encode username and password
27637608Sdes */
27737608Sdeschar *
27837608Sdes_http_auth(char *usr, char *pwd)
27937608Sdes{
28037608Sdes    int len, lu, lp;
28137608Sdes    char *str, *s;
28237608Sdes
28337608Sdes    lu = strlen(usr);
28437608Sdes    lp = strlen(pwd);
28537608Sdes
28637608Sdes    len = (lu * 4 + 2) / 3	/* user name, round up */
28737608Sdes	+ 1			/* colon */
28837608Sdes	+ (lp * 4 + 2) / 3	/* password, round up */
28937608Sdes	+ 1;			/* null */
29037608Sdes
29137608Sdes    if ((s = str = (char *)malloc(len)) == NULL)
29237608Sdes	return NULL;
29337608Sdes
29437608Sdes    s += _http_base64(s, usr, lu);
29537608Sdes    *s++ = ':';
29637608Sdes    s += _http_base64(s, pwd, lp);
29737608Sdes    *s = 0;
29837608Sdes
29937608Sdes    return str;
30037608Sdes}
30137608Sdes
30237608Sdes/*
30340975Sdes * Retrieve a file by HTTP
30437608Sdes */
30537535SdesFILE *
30640975SdesfetchGetHTTP(struct url *URL, char *flags)
30737535Sdes{
30837571Sdes    int sd = -1, err, i, enc = ENC_NONE;
30937535Sdes    struct cookie *c;
31037535Sdes    char *ln, *p, *q;
31137535Sdes    FILE *f, *cf;
31237535Sdes    size_t len;
31337535Sdes
31437535Sdes    /* allocate cookie */
31537535Sdes    if ((c = calloc(1, sizeof(struct cookie))) == NULL)
31637535Sdes	return NULL;
31737535Sdes
31837535Sdes    /* check port */
31937535Sdes    if (!URL->port)
32037535Sdes	URL->port = 80; /* default HTTP port */
32137535Sdes
32237535Sdes    /* attempt to connect to proxy server */
32337535Sdes    if (getenv("HTTP_PROXY")) {
32437535Sdes	char *px, host[MAXHOSTNAMELEN];
32537535Sdes	int port = 3128; /* XXX I think 3128 is default... check? */
32637535Sdes	size_t len;
32737535Sdes
32837535Sdes	/* measure length */
32937535Sdes	px = getenv("HTTP_PROXY");
33037535Sdes	len = strcspn(px, ":");
33137535Sdes
33237535Sdes	/* get port (atoi is a little too tolerant perhaps?) */
33337535Sdes	if (px[len] == ':')
33437535Sdes	    port = atoi(px+len+1);
33537535Sdes
33637535Sdes	/* get host name */
33737535Sdes	if (len >= MAXHOSTNAMELEN)
33837535Sdes	    len = MAXHOSTNAMELEN - 1;
33937535Sdes	strncpy(host, px, len);
34037535Sdes	host[len] = 0;
34137535Sdes
34237535Sdes	/* connect */
34337571Sdes	sd = fetchConnect(host, port);
34437535Sdes    }
34537535Sdes
34637535Sdes    /* if no proxy is configured or could be contacted, try direct */
34738394Sdes    if (sd == -1) {
34838394Sdes	if ((sd = fetchConnect(URL->host, URL->port)) == -1)
34937535Sdes	    goto ouch;
35037535Sdes    }
35137535Sdes
35237535Sdes    /* reopen as stream */
35337571Sdes    if ((f = fdopen(sd, "r+")) == NULL)
35437535Sdes	goto ouch;
35537535Sdes    c->real_f = f;
35637535Sdes
35737535Sdes    /* send request (proxies require absolute form, so use that) */
35837608Sdes    _http_cmd(f, "GET http://%s:%d%s HTTP/1.1" ENDL,
35937608Sdes	      URL->host, URL->port, URL->doc);
36037535Sdes
36137535Sdes    /* start sending headers away */
36237535Sdes    if (URL->user[0] || URL->pwd[0]) {
36337608Sdes	char *auth_str = _http_auth(URL->user, URL->pwd);
36437608Sdes	if (!auth_str)
36537608Sdes	    goto fouch;
36637608Sdes	_http_cmd(f, "Authorization: Basic %s" ENDL, auth_str);
36737608Sdes	free(auth_str);
36837535Sdes    }
36937608Sdes    _http_cmd(f, "Host: %s:%d" ENDL, URL->host, URL->port);
37037608Sdes    _http_cmd(f, "User-Agent: %s " _LIBFETCH_VER ENDL, __progname);
37137608Sdes    _http_cmd(f, "Connection: close" ENDL ENDL);
37237535Sdes
37337535Sdes    /* get response */
37437535Sdes    if ((ln = fgetln(f, &len)) == NULL)
37537535Sdes	goto fouch;
37637535Sdes    DEBUG(fprintf(stderr, "response: [\033[1m%*.*s\033[m]\n",
37737535Sdes		  (int)len-2, (int)len-2, ln));
37837535Sdes
37937535Sdes    /* we can't use strchr() and friends since ln isn't NUL-terminated */
38037535Sdes    p = ln;
38137535Sdes    while ((p < ln + len) && !isspace(*p))
38237535Sdes	p++;
38337535Sdes    while ((p < ln + len) && !isdigit(*p))
38437535Sdes	p++;
38537535Sdes    if (!isdigit(*p))
38637535Sdes	goto fouch;
38737535Sdes    err = atoi(p);
38837535Sdes    DEBUG(fprintf(stderr, "code:     [\033[1m%d\033[m]\n", err));
38937535Sdes
39037535Sdes    /* add code to handle redirects later */
39137571Sdes    if (err != 200) {
39240975Sdes	_http_seterr(err);
39337535Sdes	goto fouch;
39437571Sdes    }
39537535Sdes
39637535Sdes    /* browse through header */
39737535Sdes    while (1) {
39837535Sdes	if ((ln = fgetln(f, &len)) == NULL)
39937535Sdes	    goto fouch;
40037535Sdes	if ((ln[0] == '\r') || (ln[0] == '\n'))
40137535Sdes	    break;
40237535Sdes	DEBUG(fprintf(stderr, "header:   [\033[1m%*.*s\033[m]\n",
40337535Sdes		      (int)len-2, (int)len-2, ln));
40437535Sdes#define XFERENC "Transfer-Encoding:"
40537535Sdes	if (strncasecmp(ln, XFERENC, sizeof(XFERENC)-1) == 0) {
40637535Sdes	    p = ln + sizeof(XFERENC) - 1;
40737535Sdes	    while ((p < ln + len) && isspace(*p))
40837535Sdes		p++;
40937535Sdes	    for (q = p; (q < ln + len) && !isspace(*q); q++)
41037535Sdes		/* VOID */ ;
41137535Sdes	    *q = 0;
41237535Sdes	    if (strcasecmp(p, "chunked") == 0)
41337535Sdes		enc = ENC_CHUNKED;
41437535Sdes	    DEBUG(fprintf(stderr, "xferenc:  [\033[1m%s\033[m]\n", p));
41537535Sdes#undef XFERENC
41637535Sdes#define CONTTYPE "Content-Type:"
41737535Sdes	} else if (strncasecmp(ln, CONTTYPE, sizeof(CONTTYPE)-1) == 0) {
41837535Sdes	    p = ln + sizeof(CONTTYPE) - 1;
41937535Sdes	    while ((p < ln + len) && isspace(*p))
42037535Sdes		p++;
42137535Sdes	    for (i = 0; p < ln + len; p++)
42237535Sdes		if (i < HTTPCTYPELEN)
42337535Sdes		    c->content_type[i++] = *p;
42437535Sdes	    do c->content_type[i--] = 0; while (isspace(c->content_type[i]));
42537535Sdes	    DEBUG(fprintf(stderr, "conttype: [\033[1m%s\033[m]\n",
42637535Sdes			  c->content_type));
42737535Sdes#undef CONTTYPE
42837535Sdes	}
42937535Sdes    }
43037535Sdes
43137535Sdes    /* only body remains */
43237535Sdes    c->encoding = enc;
43337535Sdes    cf = funopen(c,
43437535Sdes		 (int (*)(void *, char *, int))_http_readfn,
43537535Sdes		 (int (*)(void *, const char *, int))_http_writefn,
43637535Sdes		 (fpos_t (*)(void *, fpos_t, int))NULL,
43737535Sdes		 (int (*)(void *))_http_closefn);
43837535Sdes    if (cf == NULL)
43937535Sdes	goto fouch;
44037535Sdes    return cf;
44137535Sdes
44237535Sdesouch:
44337571Sdes    if (sd >= 0)
44437571Sdes	close(sd);
44537535Sdes    free(c);
44637535Sdes    return NULL;
44737535Sdesfouch:
44837535Sdes    fclose(f);
44937535Sdes    free(c);
45037535Sdes    return NULL;
45137535Sdes}
45237535Sdes
45337535SdesFILE *
45440975SdesfetchPutHTTP(struct url *URL, char *flags)
45537535Sdes{
45637535Sdes    warnx("fetchPutHTTP(): not implemented");
45737535Sdes    return NULL;
45837535Sdes}
45940975Sdes
46040975Sdes/*
46140975Sdes * Get an HTTP document's metadata
46240975Sdes */
46340975Sdesint
46440975SdesfetchStatHTTP(struct url *url, struct url_stat *us, char *flags)
46540975Sdes{
46640975Sdes    warnx("fetchStatHTTP(): not implemented");
46740975Sdes    return -1;
46840975Sdes}
469