http.c revision 60189
1/*-
2 * Copyright (c) 1998 Dag-Erling Co�dan Sm�rgrav
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer
10 *    in this position and unchanged.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 * 3. The name of the author may not be used to endorse or promote products
15 *    derived from this software without specific prior written permission
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 *
28 * $FreeBSD: head/lib/libfetch/http.c 60189 2000-05-07 20:01:55Z des $
29 */
30
31/*
32 * The base64 code in this file is based on code from MIT fetch, which
33 * has the following copyright and license:
34 *
35 *-
36 * Copyright 1997 Massachusetts Institute of Technology
37 *
38 * Permission to use, copy, modify, and distribute this software and
39 * its documentation for any purpose and without fee is hereby
40 * granted, provided that both the above copyright notice and this
41 * permission notice appear in all copies, that both the above
42 * copyright notice and this permission notice appear in all
43 * supporting documentation, and that the name of M.I.T. not be used
44 * in advertising or publicity pertaining to distribution of the
45 * software without specific, written prior permission.	 M.I.T. makes
46 * no representations about the suitability of this software for any
47 * purpose.  It is provided "as is" without express or implied
48 * warranty.
49 *
50 * THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''.  M.I.T. DISCLAIMS
51 * ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE,
52 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
53 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT
54 * SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
55 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
56 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
57 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
58 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
59 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
60 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
61 * SUCH DAMAGE. */
62
63#include <sys/param.h>
64
65#include <err.h>
66#include <ctype.h>
67#include <netdb.h>
68#include <stdarg.h>
69#include <stdio.h>
70#include <stdlib.h>
71#include <string.h>
72#include <unistd.h>
73
74#include "fetch.h"
75#include "common.h"
76#include "httperr.h"
77
78extern char *__progname;
79
80#define ENDL "\r\n"
81
82struct cookie
83{
84    FILE *real_f;
85#define ENC_NONE 0
86#define ENC_CHUNKED 1
87    int encoding;			/* 1 = chunked, 0 = none */
88#define HTTPCTYPELEN 59
89    char content_type[HTTPCTYPELEN+1];
90    char *buf;
91    int b_cur, eof;
92    unsigned b_len, chunksize;
93};
94
95/*
96 * Send a formatted line; optionally echo to terminal
97 */
98static int
99_http_cmd(FILE *f, char *fmt, ...)
100{
101    va_list ap;
102
103    va_start(ap, fmt);
104    vfprintf(f, fmt, ap);
105#ifndef NDEBUG
106    fprintf(stderr, "\033[1m>>> ");
107    vfprintf(stderr, fmt, ap);
108    fprintf(stderr, "\033[m");
109#endif
110    va_end(ap);
111
112    return 0; /* XXX */
113}
114
115/*
116 * Fill the input buffer, do chunk decoding on the fly
117 */
118static char *
119_http_fillbuf(struct cookie *c)
120{
121    char *ln;
122    unsigned int len;
123
124    if (c->eof)
125	return NULL;
126
127    if (c->encoding == ENC_NONE) {
128	c->buf = fgetln(c->real_f, &(c->b_len));
129	c->b_cur = 0;
130    } else if (c->encoding == ENC_CHUNKED) {
131	if (c->chunksize == 0) {
132	    ln = fgetln(c->real_f, &len);
133	    DEBUG(fprintf(stderr, "\033[1m_http_fillbuf(): new chunk: "
134			  "%*.*s\033[m\n", (int)len-2, (int)len-2, ln));
135	    sscanf(ln, "%x", &(c->chunksize));
136	    if (!c->chunksize) {
137		DEBUG(fprintf(stderr, "\033[1m_http_fillbuf(): "
138			      "end of last chunk\033[m\n"));
139		c->eof = 1;
140		return NULL;
141	    }
142	    DEBUG(fprintf(stderr, "\033[1m_http_fillbuf(): "
143			  "new chunk: %X\033[m\n", c->chunksize));
144	}
145	c->buf = fgetln(c->real_f, &(c->b_len));
146	if (c->b_len > c->chunksize)
147	    c->b_len = c->chunksize;
148	c->chunksize -= c->b_len;
149	c->b_cur = 0;
150    }
151    else return NULL; /* unknown encoding */
152    return c->buf;
153}
154
155/*
156 * Read function
157 */
158static int
159_http_readfn(struct cookie *c, char *buf, int len)
160{
161    int l, pos = 0;
162    while (len) {
163	/* empty buffer */
164	if (!c->buf || (c->b_cur == c->b_len))
165	    if (!_http_fillbuf(c))
166		break;
167
168	l = c->b_len - c->b_cur;
169	if (len < l) l = len;
170	memcpy(buf + pos, c->buf + c->b_cur, l);
171	c->b_cur += l;
172	pos += l;
173	len -= l;
174    }
175
176    if (ferror(c->real_f))
177	return -1;
178    else return pos;
179}
180
181/*
182 * Write function
183 */
184static int
185_http_writefn(struct cookie *c, const char *buf, int len)
186{
187    size_t r = fwrite(buf, 1, (size_t)len, c->real_f);
188    return r ? r : -1;
189}
190
191/*
192 * Close function
193 */
194static int
195_http_closefn(struct cookie *c)
196{
197    int r = fclose(c->real_f);
198    free(c);
199    return (r == EOF) ? -1 : 0;
200}
201
202/*
203 * Extract content type from cookie
204 */
205char *
206fetchContentType(FILE *f)
207{
208    /*
209     * We have no way of making sure this really *is* one of our cookies,
210     * so just check for a null pointer and hope for the best.
211     */
212    return f->_cookie ? (((struct cookie *)f->_cookie)->content_type) : NULL;
213}
214
215/*
216 * Base64 encoding
217 */
218int
219_http_base64(char *dst, char *src, int l)
220{
221    static const char base64[] =
222	"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
223	"abcdefghijklmnopqrstuvwxyz"
224	"0123456789+/";
225    int t, r = 0;
226
227    while (l >= 3) {
228	t = (src[0] << 16) | (src[1] << 8) | src[2];
229	dst[0] = base64[(t >> 18) & 0x3f];
230	dst[1] = base64[(t >> 12) & 0x3f];
231	dst[2] = base64[(t >> 6) & 0x3f];
232	dst[3] = base64[(t >> 0) & 0x3f];
233	src += 3; l -= 3;
234	dst += 4; r += 4;
235    }
236
237    switch (l) {
238    case 2:
239	t = (src[0] << 16) | (src[1] << 8);
240	dst[0] = base64[(t >> 18) & 0x3f];
241	dst[1] = base64[(t >> 12) & 0x3f];
242	dst[2] = base64[(t >> 6) & 0x3f];
243	dst[3] = '=';
244	dst += 4;
245	r += 4;
246	break;
247    case 1:
248	t = src[0] << 16;
249	dst[0] = base64[(t >> 18) & 0x3f];
250	dst[1] = base64[(t >> 12) & 0x3f];
251	dst[2] = dst[3] = '=';
252	dst += 4;
253	r += 4;
254	break;
255    case 0:
256	break;
257    }
258
259    *dst = 0;
260    return r;
261}
262
263/*
264 * Encode username and password
265 */
266char *
267_http_auth(char *usr, char *pwd)
268{
269    int len, lu, lp;
270    char *str, *s;
271
272    lu = strlen(usr);
273    lp = strlen(pwd);
274
275    len = (lu * 4 + 2) / 3	/* user name, round up */
276	+ 1			/* colon */
277	+ (lp * 4 + 2) / 3	/* password, round up */
278	+ 1;			/* null */
279
280    if ((s = str = (char *)malloc(len)) == NULL)
281	return NULL;
282
283    s += _http_base64(s, usr, lu);
284    *s++ = ':';
285    s += _http_base64(s, pwd, lp);
286    *s = 0;
287
288    return str;
289}
290
291/*
292 * Retrieve a file by HTTP
293 */
294FILE *
295fetchGetHTTP(struct url *URL, char *flags)
296{
297    int sd = -1, e, i, enc = ENC_NONE, direct, verbose;
298    struct cookie *c;
299    char *ln, *p, *px, *q;
300    FILE *f, *cf;
301    size_t len;
302
303    direct = (flags && strchr(flags, 'd'));
304    verbose = (flags && strchr(flags, 'v'));
305
306    /* allocate cookie */
307    if ((c = calloc(1, sizeof *c)) == NULL)
308	return NULL;
309
310    /* check port */
311    if (!URL->port) {
312	struct servent *se;
313
314	if ((se = getservbyname("http", "tcp")) != NULL)
315	    URL->port = ntohs(se->s_port);
316	else
317	    URL->port = 80;
318    }
319
320    /* attempt to connect to proxy server */
321    if (!direct && (px = getenv("HTTP_PROXY")) != NULL) {
322	char host[MAXHOSTNAMELEN];
323	int port = 0;
324
325	/* measure length */
326	len = strcspn(px, ":");
327
328	/* get port (XXX atoi is a little too tolerant perhaps?) */
329	if (px[len] == ':') {
330	    if (strspn(px+len+1, "0123456789") != strlen(px+len+1)
331		|| strlen(px+len+1) > 5) {
332		/* XXX we should emit some kind of warning */
333	    }
334	    port = atoi(px+len+1);
335	    if (port < 1 || port > 65535) {
336		/* XXX we should emit some kind of warning */
337	    }
338	}
339	if (!port) {
340#if 0
341	    /*
342	     * commented out, since there is currently no service name
343	     * for HTTP proxies
344	     */
345	    struct servent *se;
346
347	    if ((se = getservbyname("xxxx", "tcp")) != NULL)
348		port = ntohs(se->s_port);
349	    else
350#endif
351		port = 3128;
352	}
353
354	/* get host name */
355	if (len >= MAXHOSTNAMELEN)
356	    len = MAXHOSTNAMELEN - 1;
357	strncpy(host, px, len);
358	host[len] = 0;
359
360	/* connect */
361	sd = _fetch_connect(host, port, verbose);
362    }
363
364    /* if no proxy is configured or could be contacted, try direct */
365    if (sd == -1) {
366	if ((sd = _fetch_connect(URL->host, URL->port, verbose)) == -1)
367	    goto ouch;
368    }
369
370    /* reopen as stream */
371    if ((f = fdopen(sd, "r+")) == NULL)
372	goto ouch;
373    c->real_f = f;
374
375    /* send request (proxies require absolute form, so use that) */
376    if (verbose)
377	_fetch_info("requesting http://%s:%d%s",
378		    URL->host, URL->port, URL->doc);
379    _http_cmd(f, "GET http://%s:%d%s HTTP/1.1" ENDL,
380	      URL->host, URL->port, URL->doc);
381
382    /* start sending headers away */
383    if (URL->user[0] || URL->pwd[0]) {
384	char *auth_str = _http_auth(URL->user, URL->pwd);
385	if (!auth_str)
386	    goto fouch;
387	_http_cmd(f, "Authorization: Basic %s" ENDL, auth_str);
388	free(auth_str);
389    }
390    _http_cmd(f, "Host: %s:%d" ENDL, URL->host, URL->port);
391    _http_cmd(f, "User-Agent: %s " _LIBFETCH_VER ENDL, __progname);
392    _http_cmd(f, "Connection: close" ENDL ENDL);
393
394    /* get response */
395    if ((ln = fgetln(f, &len)) == NULL)
396	goto fouch;
397    DEBUG(fprintf(stderr, "response: [\033[1m%*.*s\033[m]\n",
398		  (int)len-2, (int)len-2, ln));
399
400    /* we can't use strchr() and friends since ln isn't NUL-terminated */
401    p = ln;
402    while ((p < ln + len) && !isspace(*p))
403	p++;
404    while ((p < ln + len) && !isdigit(*p))
405	p++;
406    if (!isdigit(*p))
407	goto fouch;
408    e = atoi(p);
409    DEBUG(fprintf(stderr, "code:     [\033[1m%d\033[m]\n", e));
410
411    /* add code to handle redirects later */
412    if (e != 200) {
413	_http_seterr(e);
414	goto fouch;
415    }
416
417    /* browse through header */
418    while (1) {
419	if ((ln = fgetln(f, &len)) == NULL)
420	    goto fouch;
421	if ((ln[0] == '\r') || (ln[0] == '\n'))
422	    break;
423	DEBUG(fprintf(stderr, "header:	 [\033[1m%*.*s\033[m]\n",
424		      (int)len-2, (int)len-2, ln));
425#define XFERENC "Transfer-Encoding:"
426	if (strncasecmp(ln, XFERENC, sizeof XFERENC - 1) == 0) {
427	    p = ln + sizeof XFERENC - 1;
428	    while ((p < ln + len) && isspace(*p))
429		p++;
430	    for (q = p; (q < ln + len) && !isspace(*q); q++)
431		/* VOID */ ;
432	    *q = 0;
433	    if (strcasecmp(p, "chunked") == 0)
434		enc = ENC_CHUNKED;
435	    DEBUG(fprintf(stderr, "xferenc:  [\033[1m%s\033[m]\n", p));
436#undef XFERENC
437#define CONTTYPE "Content-Type:"
438	} else if (strncasecmp(ln, CONTTYPE, sizeof CONTTYPE - 1) == 0) {
439	    p = ln + sizeof CONTTYPE - 1;
440	    while ((p < ln + len) && isspace(*p))
441		p++;
442	    for (i = 0; p < ln + len; p++)
443		if (i < HTTPCTYPELEN)
444		    c->content_type[i++] = *p;
445	    do c->content_type[i--] = 0; while (isspace(c->content_type[i]));
446	    DEBUG(fprintf(stderr, "conttype: [\033[1m%s\033[m]\n",
447			  c->content_type));
448#undef CONTTYPE
449	}
450    }
451
452    /* only body remains */
453    c->encoding = enc;
454    cf = funopen(c,
455		 (int (*)(void *, char *, int))_http_readfn,
456		 (int (*)(void *, const char *, int))_http_writefn,
457		 (fpos_t (*)(void *, fpos_t, int))NULL,
458		 (int (*)(void *))_http_closefn);
459    if (cf == NULL)
460	goto fouch;
461
462    return cf;
463
464ouch:
465    if (sd >= 0)
466	close(sd);
467    free(c);
468    _http_seterr(999); /* XXX do this properly RSN */
469    return NULL;
470fouch:
471    fclose(f);
472    free(c);
473    _http_seterr(999); /* XXX do this properly RSN */
474    return NULL;
475}
476
477FILE *
478fetchPutHTTP(struct url *URL, char *flags)
479{
480    warnx("fetchPutHTTP(): not implemented");
481    return NULL;
482}
483
484/*
485 * Get an HTTP document's metadata
486 */
487int
488fetchStatHTTP(struct url *url, struct url_stat *us, char *flags)
489{
490    warnx("fetchStatHTTP(): not implemented");
491    return -1;
492}
493
494/*
495 * List a directory
496 */
497struct url_ent *
498fetchListHTTP(struct url *url, char *flags)
499{
500    warnx("fetchListHTTP(): not implemented");
501    return NULL;
502}
503