http.c revision 37536
1/*-
2 * Copyright (c) 1998 Dag-Erling Co�dan Sm�rgrav
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer
10 *    in this position and unchanged.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 * 3. The name of the author may not be used to endorse or promote products
15 *    derived from this software without specific prior written permission
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 *
28 *	$Id$
29 */
30
31#include <sys/param.h>
32#include <sys/errno.h>
33#include <sys/socket.h>
34#include <sys/types.h>
35
36#include <netinet/in.h>
37
38#include <err.h>
39#include <ctype.h>
40#include <netdb.h>
41#include <stdio.h>
42#include <stdlib.h>
43#include <string.h>
44#include <unistd.h>
45
46#include "fetch.h"
47#include "httperr.c"
48
49#ifndef NDEBUG
50#define DEBUG(x) do x; while (0)
51#else
52#define DEBUG(x) do { } while (0)
53#endif
54
55extern char *__progname;
56
57extern int fprint64(FILE *f, const unsigned char *buf);
58
59#define ENDL "\r\n"
60
61struct cookie
62{
63    FILE *real_f;
64#define ENC_NONE 0
65#define ENC_CHUNKED 1
66    int encoding;			/* 1 = chunked, 0 = none */
67#define HTTPCTYPELEN 59
68    char content_type[HTTPCTYPELEN+1];
69    char *buf;
70    int b_cur, eof;
71    unsigned b_len, chunksize;
72};
73
74static int
75_http_connect(char *host, int port)
76{
77    struct sockaddr_in sin;
78    struct hostent *he;
79    int fd;
80
81    /* look up host name */
82    if ((he = gethostbyname(host)) == NULL)
83	return -1;
84
85    /* set up socket address structure */
86    bzero(&sin, sizeof(sin));
87    bcopy(he->h_addr, (char *)&sin.sin_addr, he->h_length);
88    sin.sin_family = he->h_addrtype;
89    sin.sin_port = htons(port);
90
91    /* try to connect */
92    if ((fd = socket(sin.sin_family, SOCK_STREAM, 0)) < 0)
93	return -1;
94    if (connect(fd, (struct sockaddr *)&sin, sizeof sin) < 0) {
95	close(fd);
96	return -1;
97    }
98
99    return fd;
100}
101
102static char *
103_http_fillbuf(struct cookie *c)
104{
105    char *ln;
106    unsigned int len;
107
108    if (c->eof)
109	return NULL;
110
111    if (c->encoding == ENC_NONE) {
112	c->buf = fgetln(c->real_f, &(c->b_len));
113	c->b_cur = 0;
114    } else if (c->encoding == ENC_CHUNKED) {
115	if (c->chunksize == 0) {
116	    ln = fgetln(c->real_f, &len);
117	    DEBUG(fprintf(stderr, "\033[1m_http_fillbuf(): new chunk: "
118			  "%*.*s\033[m\n", (int)len-2, (int)len-2, ln));
119	    sscanf(ln, "%x", &(c->chunksize));
120	    if (!c->chunksize) {
121		DEBUG(fprintf(stderr, "\033[1m_http_fillbuf(): "
122			      "end of last chunk\033[m\n"));
123		c->eof = 1;
124		return NULL;
125	    }
126	    DEBUG(fprintf(stderr, "\033[1m_http_fillbuf(): "
127			  "new chunk: %X\033[m\n", c->chunksize));
128	}
129	c->buf = fgetln(c->real_f, &(c->b_len));
130	if (c->b_len > c->chunksize)
131	    c->b_len = c->chunksize;
132	c->chunksize -= c->b_len;
133	c->b_cur = 0;
134    }
135    else return NULL; /* unknown encoding */
136    return c->buf;
137}
138
139static int
140_http_readfn(struct cookie *c, char *buf, int len)
141{
142    int l, pos = 0;
143    while (len) {
144	/* empty buffer */
145	if (!c->buf || (c->b_cur == c->b_len))
146	    if (!_http_fillbuf(c))
147		break;
148
149	l = c->b_len - c->b_cur;
150	if (len < l) l = len;
151	memcpy(buf + pos, c->buf + c->b_cur, l);
152	c->b_cur += l;
153	pos += l;
154	len -= l;
155    }
156
157    if (ferror(c->real_f))
158	return -1;
159    else return pos;
160}
161
162static int
163_http_writefn(struct cookie *c, const char *buf, int len)
164{
165    size_t r = fwrite(buf, 1, (size_t)len, c->real_f);
166    return r ? r : -1;
167}
168
169static int
170_http_closefn(struct cookie *c)
171{
172    int r = fclose(c->real_f);
173    free(c);
174    return (r == EOF) ? -1 : 0;
175}
176
177char *
178fetchContentType(FILE *f)
179{
180    /*
181     * We have no way of making sure this really *is* one of our cookies,
182     * so just check for a null pointer and hope for the best.
183     */
184    return f->_cookie ? (((struct cookie *)f->_cookie)->content_type) : NULL;
185}
186
187FILE *
188fetchGetHTTP(url_t *URL, char *flags)
189{
190    int fd = -1, err, i, enc = ENC_NONE;
191    struct cookie *c;
192    char *ln, *p, *q;
193    FILE *f, *cf;
194    size_t len;
195
196    /* allocate cookie */
197    if ((c = calloc(1, sizeof(struct cookie))) == NULL)
198	return NULL;
199
200    /* check port */
201    if (!URL->port)
202	URL->port = 80; /* default HTTP port */
203
204    /* attempt to connect to proxy server */
205    if (getenv("HTTP_PROXY")) {
206	char *px, host[MAXHOSTNAMELEN];
207	int port = 3128; /* XXX I think 3128 is default... check? */
208	size_t len;
209
210	/* measure length */
211	px = getenv("HTTP_PROXY");
212	len = strcspn(px, ":");
213
214	/* get port (atoi is a little too tolerant perhaps?) */
215	if (px[len] == ':')
216	    port = atoi(px+len+1);
217
218	/* get host name */
219	if (len >= MAXHOSTNAMELEN)
220	    len = MAXHOSTNAMELEN - 1;
221	strncpy(host, px, len);
222	host[len] = 0;
223
224	/* connect */
225	fd = _http_connect(host, port);
226    }
227
228    /* if no proxy is configured or could be contacted, try direct */
229    if (fd < 0) {
230	if ((fd = _http_connect(URL->host, URL->port)) < 0)
231	    goto ouch;
232    }
233
234    /* reopen as stream */
235    if ((f = fdopen(fd, "r+")) == NULL)
236	goto ouch;
237    c->real_f = f;
238
239    /* send request (proxies require absolute form, so use that) */
240    fprintf(f, "GET http://%s:%d/%s HTTP/1.1" ENDL,
241	    URL->host, URL->port, URL->doc);
242
243    /* start sending headers away */
244    if (URL->user[0] || URL->pwd[0]) {
245	fprintf(f, "Authorization: Basic ");
246	fprint64(f, (const unsigned char *)URL->user);
247	fputc(':', f);
248	fprint64(f, (const unsigned char *)URL->pwd);
249	fputs(ENDL, f);
250    }
251    fprintf(f, "Host: %s:%d" ENDL, URL->host, URL->port);
252    fprintf(f, "User-Agent: %s " _LIBFETCH_VER ENDL, __progname);
253    fprintf(f, "Connection: close" ENDL ENDL);
254
255    /* get response */
256    if ((ln = fgetln(f, &len)) == NULL)
257	goto fouch;
258    DEBUG(fprintf(stderr, "response: [\033[1m%*.*s\033[m]\n",
259		  (int)len-2, (int)len-2, ln));
260
261    /* we can't use strchr() and friends since ln isn't NUL-terminated */
262    p = ln;
263    while ((p < ln + len) && !isspace(*p))
264	p++;
265    while ((p < ln + len) && !isdigit(*p))
266	p++;
267    if (!isdigit(*p))
268	goto fouch;
269    err = atoi(p);
270    DEBUG(fprintf(stderr, "code:     [\033[1m%d\033[m]\n", err));
271
272    /* add code to handle redirects later */
273    if (err != 200)
274	goto fouch;
275
276    /* browse through header */
277    while (1) {
278	if ((ln = fgetln(f, &len)) == NULL)
279	    goto fouch;
280	if ((ln[0] == '\r') || (ln[0] == '\n'))
281	    break;
282	DEBUG(fprintf(stderr, "header:   [\033[1m%*.*s\033[m]\n",
283		      (int)len-2, (int)len-2, ln));
284#define XFERENC "Transfer-Encoding:"
285	if (strncasecmp(ln, XFERENC, sizeof(XFERENC)-1) == 0) {
286	    p = ln + sizeof(XFERENC) - 1;
287	    while ((p < ln + len) && isspace(*p))
288		p++;
289	    for (q = p; (q < ln + len) && !isspace(*q); q++)
290		/* VOID */ ;
291	    *q = 0;
292	    if (strcasecmp(p, "chunked") == 0)
293		enc = ENC_CHUNKED;
294	    DEBUG(fprintf(stderr, "xferenc:  [\033[1m%s\033[m]\n", p));
295#undef XFERENC
296#define CONTTYPE "Content-Type:"
297	} else if (strncasecmp(ln, CONTTYPE, sizeof(CONTTYPE)-1) == 0) {
298	    p = ln + sizeof(CONTTYPE) - 1;
299	    while ((p < ln + len) && isspace(*p))
300		p++;
301	    for (i = 0; p < ln + len; p++)
302		if (i < HTTPCTYPELEN)
303		    c->content_type[i++] = *p;
304	    do c->content_type[i--] = 0; while (isspace(c->content_type[i]));
305	    DEBUG(fprintf(stderr, "conttype: [\033[1m%s\033[m]\n",
306			  c->content_type));
307#undef CONTTYPE
308	}
309    }
310
311    /* only body remains */
312    c->encoding = enc;
313    cf = funopen(c,
314		 (int (*)(void *, char *, int))_http_readfn,
315		 (int (*)(void *, const char *, int))_http_writefn,
316		 (fpos_t (*)(void *, fpos_t, int))NULL,
317		 (int (*)(void *))_http_closefn);
318    if (cf == NULL)
319	goto fouch;
320    return cf;
321
322ouch:
323    if (fd >= 0)
324	close(fd);
325    free(c);
326    return NULL;
327fouch:
328    fclose(f);
329    free(c);
330    return NULL;
331}
332
333FILE *
334fetchPutHTTP(url_t *URL, char *flags)
335{
336    warnx("fetchPutHTTP(): not implemented");
337    return NULL;
338}
339