http.c revision 41862
1/*-
2 * Copyright (c) 1998 Dag-Erling Co�dan Sm�rgrav
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer
10 *    in this position and unchanged.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 * 3. The name of the author may not be used to endorse or promote products
15 *    derived from this software without specific prior written permission
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 *
28 *	$Id: http.c,v 1.7 1998/11/06 22:14:08 des Exp $
29 */
30
31/*
32 * The base64 code in this file is based on code from MIT fetch, which
33 * has the following copyright and license:
34 *
35 *-
36 * Copyright 1997 Massachusetts Institute of Technology
37 *
38 * Permission to use, copy, modify, and distribute this software and
39 * its documentation for any purpose and without fee is hereby
40 * granted, provided that both the above copyright notice and this
41 * permission notice appear in all copies, that both the above
42 * copyright notice and this permission notice appear in all
43 * supporting documentation, and that the name of M.I.T. not be used
44 * in advertising or publicity pertaining to distribution of the
45 * software without specific, written prior permission.  M.I.T. makes
46 * no representations about the suitability of this software for any
47 * purpose.  It is provided "as is" without express or implied
48 * warranty.
49 *
50 * THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''.  M.I.T. DISCLAIMS
51 * ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE,
52 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
53 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT
54 * SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
55 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
56 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
57 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
58 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
59 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
60 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
61 * SUCH DAMAGE. */
62
63#include <sys/param.h>
64#include <sys/errno.h>
65#include <sys/socket.h>
66#include <sys/types.h>
67
68#include <netinet/in.h>
69
70#include <err.h>
71#include <ctype.h>
72#include <netdb.h>
73#include <stdarg.h>
74#include <stdio.h>
75#include <stdlib.h>
76#include <string.h>
77#include <unistd.h>
78
79#include "fetch.h"
80#include "common.h"
81#include "httperr.h"
82
83#ifndef NDEBUG
84#define DEBUG(x) do x; while (0)
85#else
86#define DEBUG(x) do { } while (0)
87#endif
88
89extern char *__progname;
90
91#define ENDL "\r\n"
92
93struct cookie
94{
95    FILE *real_f;
96#define ENC_NONE 0
97#define ENC_CHUNKED 1
98    int encoding;			/* 1 = chunked, 0 = none */
99#define HTTPCTYPELEN 59
100    char content_type[HTTPCTYPELEN+1];
101    char *buf;
102    int b_cur, eof;
103    unsigned b_len, chunksize;
104};
105
106/*
107 * Send a formatted line; optionally echo to terminal
108 */
109static int
110_http_cmd(FILE *f, char *fmt, ...)
111{
112    va_list ap;
113
114    va_start(ap, fmt);
115    vfprintf(f, fmt, ap);
116#ifndef NDEBUG
117    fprintf(stderr, "\033[1m>>> ");
118    vfprintf(stderr, fmt, ap);
119    fprintf(stderr, "\033[m");
120#endif
121    va_end(ap);
122
123    return 0; /* XXX */
124}
125
126/*
127 * Fill the input buffer, do chunk decoding on the fly
128 */
129static char *
130_http_fillbuf(struct cookie *c)
131{
132    char *ln;
133    unsigned int len;
134
135    if (c->eof)
136	return NULL;
137
138    if (c->encoding == ENC_NONE) {
139	c->buf = fgetln(c->real_f, &(c->b_len));
140	c->b_cur = 0;
141    } else if (c->encoding == ENC_CHUNKED) {
142	if (c->chunksize == 0) {
143	    ln = fgetln(c->real_f, &len);
144	    DEBUG(fprintf(stderr, "\033[1m_http_fillbuf(): new chunk: "
145			  "%*.*s\033[m\n", (int)len-2, (int)len-2, ln));
146	    sscanf(ln, "%x", &(c->chunksize));
147	    if (!c->chunksize) {
148		DEBUG(fprintf(stderr, "\033[1m_http_fillbuf(): "
149			      "end of last chunk\033[m\n"));
150		c->eof = 1;
151		return NULL;
152	    }
153	    DEBUG(fprintf(stderr, "\033[1m_http_fillbuf(): "
154			  "new chunk: %X\033[m\n", c->chunksize));
155	}
156	c->buf = fgetln(c->real_f, &(c->b_len));
157	if (c->b_len > c->chunksize)
158	    c->b_len = c->chunksize;
159	c->chunksize -= c->b_len;
160	c->b_cur = 0;
161    }
162    else return NULL; /* unknown encoding */
163    return c->buf;
164}
165
166/*
167 * Read function
168 */
169static int
170_http_readfn(struct cookie *c, char *buf, int len)
171{
172    int l, pos = 0;
173    while (len) {
174	/* empty buffer */
175	if (!c->buf || (c->b_cur == c->b_len))
176	    if (!_http_fillbuf(c))
177		break;
178
179	l = c->b_len - c->b_cur;
180	if (len < l) l = len;
181	memcpy(buf + pos, c->buf + c->b_cur, l);
182	c->b_cur += l;
183	pos += l;
184	len -= l;
185    }
186
187    if (ferror(c->real_f))
188	return -1;
189    else return pos;
190}
191
192/*
193 * Write function
194 */
195static int
196_http_writefn(struct cookie *c, const char *buf, int len)
197{
198    size_t r = fwrite(buf, 1, (size_t)len, c->real_f);
199    return r ? r : -1;
200}
201
202/*
203 * Close function
204 */
205static int
206_http_closefn(struct cookie *c)
207{
208    int r = fclose(c->real_f);
209    free(c);
210    return (r == EOF) ? -1 : 0;
211}
212
213/*
214 * Extract content type from cookie
215 */
216char *
217fetchContentType(FILE *f)
218{
219    /*
220     * We have no way of making sure this really *is* one of our cookies,
221     * so just check for a null pointer and hope for the best.
222     */
223    return f->_cookie ? (((struct cookie *)f->_cookie)->content_type) : NULL;
224}
225
226/*
227 * Base64 encoding
228 */
229int
230_http_base64(char *dst, char *src, int l)
231{
232    static const char base64[] =
233	"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
234	"abcdefghijklmnopqrstuvwxyz"
235	"0123456789+/";
236    int t, r = 0;
237
238    while (l >= 3) {
239	t = (src[0] << 16) | (src[1] << 8) | src[2];
240	dst[0] = base64[(t >> 18) & 0x3f];
241	dst[1] = base64[(t >> 12) & 0x3f];
242	dst[2] = base64[(t >> 6) & 0x3f];
243	dst[3] = base64[(t >> 0) & 0x3f];
244	src += 3; l -= 3;
245	dst += 4; r += 4;
246    }
247
248    switch (l) {
249    case 2:
250	t = (src[0] << 16) | (src[1] << 8);
251	dst[0] = base64[(t >> 18) & 0x3f];
252	dst[1] = base64[(t >> 12) & 0x3f];
253	dst[2] = base64[(t >> 6) & 0x3f];
254	dst[3] = '=';
255	dst += 4;
256	r += 4;
257	break;
258    case 1:
259	t = src[0] << 16;
260	dst[0] = base64[(t >> 18) & 0x3f];
261	dst[1] = base64[(t >> 12) & 0x3f];
262	dst[2] = dst[3] = '=';
263	dst += 4;
264	r += 4;
265	break;
266    case 0:
267	break;
268    }
269
270    *dst = 0;
271    return r;
272}
273
274/*
275 * Encode username and password
276 */
277char *
278_http_auth(char *usr, char *pwd)
279{
280    int len, lu, lp;
281    char *str, *s;
282
283    lu = strlen(usr);
284    lp = strlen(pwd);
285
286    len = (lu * 4 + 2) / 3	/* user name, round up */
287	+ 1			/* colon */
288	+ (lp * 4 + 2) / 3	/* password, round up */
289	+ 1;			/* null */
290
291    if ((s = str = (char *)malloc(len)) == NULL)
292	return NULL;
293
294    s += _http_base64(s, usr, lu);
295    *s++ = ':';
296    s += _http_base64(s, pwd, lp);
297    *s = 0;
298
299    return str;
300}
301
302/*
303 * Retrieve a file by HTTP
304 */
305FILE *
306fetchGetHTTP(struct url *URL, char *flags)
307{
308    int sd = -1, err, i, enc = ENC_NONE, verbose;
309    struct cookie *c;
310    char *ln, *p, *q;
311    FILE *f, *cf;
312    size_t len;
313
314    verbose = (strchr(flags, 'v') != NULL);
315
316    /* allocate cookie */
317    if ((c = calloc(1, sizeof(struct cookie))) == NULL)
318	return NULL;
319
320    /* check port */
321    if (!URL->port)
322	URL->port = 80; /* default HTTP port */
323
324    /* attempt to connect to proxy server */
325    if (getenv("HTTP_PROXY")) {
326	char *px, host[MAXHOSTNAMELEN];
327	int port = 3128; /* XXX I think 3128 is default... check? */
328	size_t len;
329
330	/* measure length */
331	px = getenv("HTTP_PROXY");
332	len = strcspn(px, ":");
333
334	/* get port (atoi is a little too tolerant perhaps?) */
335	if (px[len] == ':')
336	    port = atoi(px+len+1);
337
338	/* get host name */
339	if (len >= MAXHOSTNAMELEN)
340	    len = MAXHOSTNAMELEN - 1;
341	strncpy(host, px, len);
342	host[len] = 0;
343
344	/* connect */
345	sd = fetchConnect(host, port, verbose);
346    }
347
348    /* if no proxy is configured or could be contacted, try direct */
349    if (sd == -1) {
350	if ((sd = fetchConnect(URL->host, URL->port, verbose)) == -1)
351	    goto ouch;
352    }
353
354    /* reopen as stream */
355    if ((f = fdopen(sd, "r+")) == NULL)
356	goto ouch;
357    c->real_f = f;
358
359    /* send request (proxies require absolute form, so use that) */
360    if (verbose)
361	_fetch_info("requesting http://%s:%d%s",
362		    URL->host, URL->port, URL->doc);
363    _http_cmd(f, "GET http://%s:%d%s HTTP/1.1" ENDL,
364	      URL->host, URL->port, URL->doc);
365
366    /* start sending headers away */
367    if (URL->user[0] || URL->pwd[0]) {
368	char *auth_str = _http_auth(URL->user, URL->pwd);
369	if (!auth_str)
370	    goto fouch;
371	_http_cmd(f, "Authorization: Basic %s" ENDL, auth_str);
372	free(auth_str);
373    }
374    _http_cmd(f, "Host: %s:%d" ENDL, URL->host, URL->port);
375    _http_cmd(f, "User-Agent: %s " _LIBFETCH_VER ENDL, __progname);
376    _http_cmd(f, "Connection: close" ENDL ENDL);
377
378    /* get response */
379    if ((ln = fgetln(f, &len)) == NULL)
380	goto fouch;
381    DEBUG(fprintf(stderr, "response: [\033[1m%*.*s\033[m]\n",
382		  (int)len-2, (int)len-2, ln));
383
384    /* we can't use strchr() and friends since ln isn't NUL-terminated */
385    p = ln;
386    while ((p < ln + len) && !isspace(*p))
387	p++;
388    while ((p < ln + len) && !isdigit(*p))
389	p++;
390    if (!isdigit(*p))
391	goto fouch;
392    err = atoi(p);
393    DEBUG(fprintf(stderr, "code:     [\033[1m%d\033[m]\n", err));
394
395    /* add code to handle redirects later */
396    if (err != 200) {
397	_http_seterr(err);
398	goto fouch;
399    }
400
401    /* browse through header */
402    while (1) {
403	if ((ln = fgetln(f, &len)) == NULL)
404	    goto fouch;
405	if ((ln[0] == '\r') || (ln[0] == '\n'))
406	    break;
407	DEBUG(fprintf(stderr, "header:   [\033[1m%*.*s\033[m]\n",
408		      (int)len-2, (int)len-2, ln));
409#define XFERENC "Transfer-Encoding:"
410	if (strncasecmp(ln, XFERENC, sizeof(XFERENC)-1) == 0) {
411	    p = ln + sizeof(XFERENC) - 1;
412	    while ((p < ln + len) && isspace(*p))
413		p++;
414	    for (q = p; (q < ln + len) && !isspace(*q); q++)
415		/* VOID */ ;
416	    *q = 0;
417	    if (strcasecmp(p, "chunked") == 0)
418		enc = ENC_CHUNKED;
419	    DEBUG(fprintf(stderr, "xferenc:  [\033[1m%s\033[m]\n", p));
420#undef XFERENC
421#define CONTTYPE "Content-Type:"
422	} else if (strncasecmp(ln, CONTTYPE, sizeof(CONTTYPE)-1) == 0) {
423	    p = ln + sizeof(CONTTYPE) - 1;
424	    while ((p < ln + len) && isspace(*p))
425		p++;
426	    for (i = 0; p < ln + len; p++)
427		if (i < HTTPCTYPELEN)
428		    c->content_type[i++] = *p;
429	    do c->content_type[i--] = 0; while (isspace(c->content_type[i]));
430	    DEBUG(fprintf(stderr, "conttype: [\033[1m%s\033[m]\n",
431			  c->content_type));
432#undef CONTTYPE
433	}
434    }
435
436    /* only body remains */
437    c->encoding = enc;
438    cf = funopen(c,
439		 (int (*)(void *, char *, int))_http_readfn,
440		 (int (*)(void *, const char *, int))_http_writefn,
441		 (fpos_t (*)(void *, fpos_t, int))NULL,
442		 (int (*)(void *))_http_closefn);
443    if (cf == NULL)
444	goto fouch;
445    return cf;
446
447ouch:
448    if (sd >= 0)
449	close(sd);
450    free(c);
451    _http_seterr(999); /* XXX do this properly RSN */
452    return NULL;
453fouch:
454    fclose(f);
455    free(c);
456    _http_seterr(999); /* XXX do this properly RSN */
457    return NULL;
458}
459
460FILE *
461fetchPutHTTP(struct url *URL, char *flags)
462{
463    warnx("fetchPutHTTP(): not implemented");
464    return NULL;
465}
466
467/*
468 * Get an HTTP document's metadata
469 */
470int
471fetchStatHTTP(struct url *url, struct url_stat *us, char *flags)
472{
473    warnx("fetchStatHTTP(): not implemented");
474    return -1;
475}
476