http.c revision 37535
1133819Stjr/*-
2133819Stjr * Copyright (c) 1998 Dag-Erling Co�dan Sm�rgrav
3133819Stjr * All rights reserved.
4133819Stjr *
5133819Stjr * Redistribution and use in source and binary forms, with or without
6133819Stjr * modification, are permitted provided that the following conditions
7133819Stjr * are met:
8133819Stjr * 1. Redistributions of source code must retain the above copyright
9133819Stjr *    notice, this list of conditions and the following disclaimer
10133819Stjr *    in this position and unchanged.
11133819Stjr * 2. Redistributions in binary form must reproduce the above copyright
12133819Stjr *    notice, this list of conditions and the following disclaimer in the
13133819Stjr *    documentation and/or other materials provided with the distribution.
14133819Stjr * 3. The name of the author may not be used to endorse or promote products
15133819Stjr *    derived from this software without specific prior written permission
16133819Stjr *
17133819Stjr * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18133819Stjr * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19133819Stjr * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20133819Stjr * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21133819Stjr * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22133819Stjr * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23133819Stjr * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24133819Stjr * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25133819Stjr * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26133819Stjr * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27133819Stjr *
28133819Stjr *	$Id$
29133819Stjr */
30133819Stjr
31133819Stjr#include <sys/param.h>
32133819Stjr#include <sys/errno.h>
33133819Stjr#include <sys/socket.h>
34133819Stjr#include <sys/types.h>
35133819Stjr
36133819Stjr#include <netinet/in.h>
37165832Snetchild
38165832Snetchild#include <err.h>
39162954Sphk#include <ctype.h>
40142057Sjhb#include <netdb.h>
41161310Snetchild#include <stdio.h>
42133819Stjr#include <stdlib.h>
43133819Stjr#include <string.h>
44133819Stjr#include <unistd.h>
45133819Stjr
46133819Stjr#include "fetch.h"
47133819Stjr#include "httperr.c"
48133819Stjr
49133819Stjr#ifndef NDEBUG
50133819Stjr#define DEBUG(x) do x; while (0)
51133819Stjr#else
52133819Stjr#define DEBUG(x) do { } while (0)
53133819Stjr#endif
54133819Stjr
55133819Stjrextern char *__progname;
56133819Stjr
57142057Sjhbextern int fprint64(FILE *f, const unsigned char *buf);
58142057Sjhb
59133819Stjr#define ENDL "\r\n"
60133819Stjr
61133819Stjrstruct cookie
62133819Stjr{
63133819Stjr    FILE *real_f;
64133819Stjr#define ENC_NONE 0
65133819Stjr#define ENC_CHUNKED 1
66161474Snetchild    int encoding;			/* 1 = chunked, 0 = none */
67133819Stjr#define HTTPCTYPELEN 59
68133819Stjr    char content_type[HTTPCTYPELEN+1];
69133819Stjr    char *buf;
70133819Stjr    int b_cur, eof;
71133819Stjr    unsigned b_len, chunksize;
72133819Stjr};
73133819Stjr
74133819Stjrstatic int
75133819Stjr_http_connect(char *host, int port)
76133819Stjr{
77133819Stjr    struct sockaddr_in sin;
78133819Stjr    struct hostent *he;
79133819Stjr    int fd;
80133819Stjr
81133819Stjr    /* look up host name */
82133819Stjr    if ((he = gethostbyname(host)) == NULL)
83133819Stjr	return -1;
84133819Stjr
85133819Stjr    /* set up socket address structure */
86133819Stjr    bzero(&sin, sizeof(sin));
87133819Stjr    bcopy(he->h_addr, (char *)&sin.sin_addr, he->h_length);
88133819Stjr    sin.sin_family = he->h_addrtype;
89133819Stjr    sin.sin_port = htons(port);
90133819Stjr
91133819Stjr    /* try to connect */
92133819Stjr    if ((fd = socket(sin.sin_family, SOCK_STREAM, 0)) < 0)
93133819Stjr	return -1;
94133819Stjr    if (connect(fd, (struct sockaddr *)&sin, sizeof sin) < 0) {
95133819Stjr	close(fd);
96133819Stjr	return -1;
97133819Stjr    }
98133819Stjr
99133819Stjr    return fd;
100142057Sjhb}
101142057Sjhb
102142057Sjhbstatic char *
103142057Sjhb_http_fillbuf(struct cookie *c)
104142057Sjhb{
105142057Sjhb    char *ln;
106142057Sjhb    unsigned int len;
107133819Stjr
108142057Sjhb    if (c->eof)
109142057Sjhb	return NULL;
110142057Sjhb
111133819Stjr    if (c->encoding == ENC_NONE) {
112133819Stjr	c->buf = fgetln(c->real_f, &(c->b_len));
113142057Sjhb	c->b_cur = 0;
114142057Sjhb    } else if (c->encoding == ENC_CHUNKED) {
115142057Sjhb	if (c->chunksize == 0) {
116133819Stjr	    ln = fgetln(c->real_f, &len);
117142057Sjhb	    DEBUG(fprintf(stderr, "\033[1m_http_fillbuf(): new chunk: "
118142057Sjhb			  "%*.*s\033[m\n", (int)len-2, (int)len-2, ln));
119142057Sjhb	    sscanf(ln, "%x", &(c->chunksize));
120142057Sjhb	    if (!c->chunksize) {
121147588Sjhb		DEBUG(fprintf(stderr, "\033[1m_http_fillbuf(): "
122147588Sjhb			      "end of last chunk\033[m\n"));
123142057Sjhb		c->eof = 1;
124142057Sjhb		return NULL;
125142057Sjhb	    }
126142057Sjhb	    DEBUG(fprintf(stderr, "\033[1m_http_fillbuf(): "
127142057Sjhb			  "new chunk: %X\033[m\n", c->chunksize));
128133819Stjr	}
129142057Sjhb	c->buf = fgetln(c->real_f, &(c->b_len));
130133819Stjr	if (c->b_len > c->chunksize)
131142057Sjhb	    c->b_len = c->chunksize;
132142057Sjhb	c->chunksize -= c->b_len;
133142057Sjhb	c->b_cur = 0;
134142057Sjhb    }
135142057Sjhb    else return NULL; /* unknown encoding */
136142057Sjhb    return c->buf;
137142057Sjhb}
138156440Sups
139142057Sjhbstatic int
140142057Sjhb_http_readfn(struct cookie *c, char *buf, int len)
141142057Sjhb{
142142057Sjhb    int l, pos = 0;
143142057Sjhb    while (len) {
144142057Sjhb	/* empty buffer */
145142057Sjhb	if (!c->buf || (c->b_cur == c->b_len))
146142057Sjhb	    if (!_http_fillbuf(c))
147156440Sups		break;
148142057Sjhb
149142057Sjhb	l = c->b_len - c->b_cur;
150142057Sjhb	if (len < l) l = len;
151142057Sjhb	memcpy(buf + pos, c->buf + c->b_cur, l);
152142057Sjhb	c->b_cur += l;
153142057Sjhb	pos += l;
154156440Sups	len -= l;
155156440Sups    }
156156440Sups
157142057Sjhb    if (ferror(c->real_f))
158142057Sjhb	return -1;
159142057Sjhb    else return pos;
160142057Sjhb}
161133819Stjr
162142057Sjhbstatic int
163142057Sjhb_http_writefn(struct cookie *c, const char *buf, int len)
164142057Sjhb{
165142057Sjhb    size_t r = fwrite(buf, 1, (size_t)len, c->real_f);
166142057Sjhb    return r ? r : -1;
167142057Sjhb}
168142057Sjhb
169142057Sjhbstatic int
170142057Sjhb_http_closefn(struct cookie *c)
171133819Stjr{
172133819Stjr    int r = fclose(c->real_f);
173156440Sups    free(c);
174142057Sjhb    return (r == EOF) ? -1 : 0;
175142057Sjhb}
176142057Sjhb
177142057Sjhbchar *
178142057SjhbfetchContentType(FILE *f)
179142057Sjhb{
180142057Sjhb    /*
181156440Sups     * We have no way of making sure this really *is* one of our cookies,
182156440Sups     * so just check for a null pointer and hope for the best.
183142057Sjhb     */
184142057Sjhb    return f->_cookie ? (((struct cookie *)f->_cookie)->content_type) : NULL;
185142057Sjhb}
186142057Sjhb
187142057SjhbFILE *
188133819StjrfetchGetHTTP(url_t *URL, char *flags)
189133819Stjr{
190142057Sjhb    int fd = -1, err, i, enc = ENC_NONE;
191156440Sups    struct cookie *c;
192156440Sups    char *ln, *p, *q;
193156440Sups    FILE *f, *cf;
194156440Sups    size_t len;
195156440Sups
196156440Sups    /* allocate cookie */
197133819Stjr    if ((c = calloc(1, sizeof(struct cookie))) == NULL)
198133819Stjr	return NULL;
199142057Sjhb
200142057Sjhb    /* check port */
201142057Sjhb    if (!URL->port)
202142057Sjhb	URL->port = 80; /* default HTTP port */
203142057Sjhb
204142057Sjhb    /* attempt to connect to proxy server */
205142057Sjhb    if (getenv("HTTP_PROXY")) {
206142057Sjhb	char *px, host[MAXHOSTNAMELEN];
207142057Sjhb	int port = 3128; /* XXX I think 3128 is default... check? */
208142057Sjhb	size_t len;
209142057Sjhb
210142057Sjhb	/* measure length */
211142057Sjhb	px = getenv("HTTP_PROXY");
212142057Sjhb	len = strcspn(px, ":");
213142057Sjhb
214142057Sjhb	/* get port (atoi is a little too tolerant perhaps?) */
215142057Sjhb	if (px[len] == ':')
216142057Sjhb	    port = atoi(px+len+1);
217142057Sjhb
218161474Snetchild	/* get host name */
219161474Snetchild	if (len >= MAXHOSTNAMELEN)
220161474Snetchild	    len = MAXHOSTNAMELEN - 1;
221161474Snetchild	strncpy(host, px, len);
222161474Snetchild	host[len] = 0;
223161474Snetchild
224161474Snetchild	/* connect */
225161474Snetchild	fd = _http_connect(host, port);
226142057Sjhb    }
227142057Sjhb
228142057Sjhb    /* if no proxy is configured or could be contacted, try direct */
229133819Stjr    if (fd < 0) {
230133819Stjr	if ((fd = _http_connect(URL->host, URL->port)) < 0)
231133819Stjr	    goto ouch;
232133819Stjr    }
233133819Stjr
234133819Stjr    /* reopen as stream */
235133819Stjr    if ((f = fdopen(fd, "r+")) == NULL)
236144449Sjhb	goto ouch;
237144449Sjhb    c->real_f = f;
238133819Stjr
239144449Sjhb    /* send request (proxies require absolute form, so use that) */
240144449Sjhb    fprintf(f, "GET http://%s:%d/%s HTTP/1.1" ENDL,
241144449Sjhb	    URL->host, URL->port, URL->doc);
242144449Sjhb
243144449Sjhb    /* start sending headers away */
244133819Stjr    if (URL->user[0] || URL->pwd[0]) {
245144449Sjhb	fprintf(f, "Authorization: Basic ");
246144449Sjhb	fprint64(f, (const unsigned char *)URL->user);
247133819Stjr	fputc(':', f);
248144449Sjhb	fprint64(f, (const unsigned char *)URL->pwd);
249144449Sjhb	fputs(ENDL, f);
250144449Sjhb    }
251144449Sjhb    fprintf(f, "Host: %s:%d" ENDL, URL->host, URL->port);
252144449Sjhb    fprintf(f, "User-Agent: %s " _LIBFETCH_VER ENDL, __progname);
253144449Sjhb    fprintf(f, "Connection: close" ENDL ENDL);
254144449Sjhb
255144449Sjhb    /* get response */
256144449Sjhb    if ((ln = fgetln(f, &len)) == NULL)
257144449Sjhb	goto fouch;
258144449Sjhb    DEBUG(fprintf(stderr, "response: [\033[1m%*.*s\033[m]\n",
259133819Stjr		  (int)len-2, (int)len-2, ln));
260144449Sjhb
261144449Sjhb    /* we can't use strchr() and friends since ln isn't NUL-terminated */
262144449Sjhb    p = ln;
263144449Sjhb    while ((p < ln + len) && !isspace(*p))
264144449Sjhb	p++;
265144449Sjhb    while ((p < ln + len) && !isdigit(*p))
266144449Sjhb	p++;
267144449Sjhb    if (!isdigit(*p))
268144449Sjhb	goto fouch;
269144449Sjhb    err = atoi(p);
270144449Sjhb    DEBUG(fprintf(stderr, "code:     [\033[1m%d\033[m]\n", err));
271144449Sjhb
272144449Sjhb    /* add code to handle redirects later */
273144449Sjhb    if (err != 200)
274144449Sjhb	goto fouch;
275144449Sjhb
276133819Stjr    /* browse through header */
277144449Sjhb    while (1) {
278144449Sjhb	if ((ln = fgetln(f, &len)) == NULL)
279144449Sjhb	    goto fouch;
280144449Sjhb	if ((ln[0] == '\r') || (ln[0] == '\n'))
281144449Sjhb	    break;
282133819Stjr	DEBUG(fprintf(stderr, "header:   [\033[1m%*.*s\033[m]\n",
283144449Sjhb		      (int)len-2, (int)len-2, ln));
284144449Sjhb#define XFERENC "Transfer-Encoding:"
285144449Sjhb	if (strncasecmp(ln, XFERENC, sizeof(XFERENC)-1) == 0) {
286144449Sjhb	    p = ln + sizeof(XFERENC) - 1;
287144449Sjhb	    while ((p < ln + len) && isspace(*p))
288133819Stjr		p++;
289133819Stjr	    for (q = p; (q < ln + len) && !isspace(*q); q++)
290133819Stjr		/* VOID */ ;
291133819Stjr	    *q = 0;
292133819Stjr	    if (strcasecmp(p, "chunked") == 0)
293133819Stjr		enc = ENC_CHUNKED;
294144449Sjhb	    DEBUG(fprintf(stderr, "xferenc:  [\033[1m%s\033[m]\n", p));
295144449Sjhb#undef XFERENC
296133819Stjr#define CONTTYPE "Content-Type:"
297144449Sjhb	} else if (strncasecmp(ln, CONTTYPE, sizeof(CONTTYPE)-1) == 0) {
298144449Sjhb	    p = ln + sizeof(CONTTYPE) - 1;
299144449Sjhb	    while ((p < ln + len) && isspace(*p))
300144449Sjhb		p++;
301144449Sjhb	    for (i = 0; p < ln + len; p++)
302133819Stjr		if (i < HTTPCTYPELEN)
303133819Stjr		    c->content_type[i++] = *p;
304133819Stjr	    do c->content_type[i--] = 0; while (isspace(c->content_type[i]));
305133819Stjr	    DEBUG(fprintf(stderr, "conttype: [\033[1m%s\033[m]\n",
306133819Stjr			  c->content_type));
307133819Stjr#undef CONTTYPE
308133819Stjr	}
309133819Stjr    }
310133819Stjr
311133819Stjr    /* only body remains */
312133819Stjr    c->encoding = enc;
313133819Stjr    cf = funopen(c,
314133819Stjr		 (int (*)(void *, char *, int))_http_readfn,
315133819Stjr		 (int (*)(void *, const char *, int))_http_writefn,
316133819Stjr		 (fpos_t (*)(void *, fpos_t, int))NULL,
317133819Stjr		 (int (*)(void *))_http_closefn);
318133819Stjr    if (cf == NULL)
319133819Stjr	goto fouch;
320133819Stjr    return cf;
321133819Stjr
322133819Stjrouch:
323133819Stjr    if (fd >= 0)
324133819Stjr	close(fd);
325133819Stjr    free(c);
326133819Stjr    return NULL;
327133819Stjrfouch:
328133819Stjr    fclose(f);
329133819Stjr    free(c);
330133819Stjr    return NULL;
331133819Stjr}
332133819Stjr
333133819StjrFILE *
334133819StjrfetchPutHTTP(url_t *URL, char *flags)
335133819Stjr{
336133819Stjr    warnx("fetchPutHTTP(): not implemented");
337133819Stjr    return NULL;
338133819Stjr}
339133819Stjr