http.c revision 61896
1/*-
2 * Copyright (c) 1998 Dag-Erling Co�dan Sm�rgrav
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer
10 *    in this position and unchanged.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 * 3. The name of the author may not be used to endorse or promote products
15 *    derived from this software without specific prior written permission
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 *
28 * $FreeBSD: head/lib/libfetch/http.c 61896 2000-06-21 09:49:51Z des $
29 */
30
31/*
32 * The base64 code in this file is based on code from MIT fetch, which
33 * has the following copyright and license:
34 *
35 *-
36 * Copyright 1997 Massachusetts Institute of Technology
37 *
38 * Permission to use, copy, modify, and distribute this software and
39 * its documentation for any purpose and without fee is hereby
40 * granted, provided that both the above copyright notice and this
41 * permission notice appear in all copies, that both the above
42 * copyright notice and this permission notice appear in all
43 * supporting documentation, and that the name of M.I.T. not be used
44 * in advertising or publicity pertaining to distribution of the
45 * software without specific, written prior permission.	 M.I.T. makes
46 * no representations about the suitability of this software for any
47 * purpose.  It is provided "as is" without express or implied
48 * warranty.
49 *
50 * THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''.  M.I.T. DISCLAIMS
51 * ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE,
52 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
53 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT
54 * SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
55 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
56 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
57 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
58 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
59 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
60 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
61 * SUCH DAMAGE. */
62
63#include <sys/param.h>
64#include <sys/socket.h>
65
66#include <err.h>
67#include <ctype.h>
68#include <locale.h>
69#include <netdb.h>
70#include <stdarg.h>
71#include <stdio.h>
72#include <stdlib.h>
73#include <string.h>
74#include <time.h>
75#include <unistd.h>
76
77#include "fetch.h"
78#include "common.h"
79#include "httperr.h"
80
81extern char *__progname;
82
83#define ENDL "\r\n"
84
85#define HTTP_OK		200
86#define HTTP_PARTIAL	206
87#define HTTP_MOVED	302
88
89struct cookie
90{
91    FILE *real_f;
92#define ENC_NONE 0
93#define ENC_CHUNKED 1
94    int encoding;			/* 1 = chunked, 0 = none */
95#define HTTPCTYPELEN 59
96    char content_type[HTTPCTYPELEN+1];
97    char *buf;
98    int b_cur, eof;
99    unsigned b_len, chunksize;
100};
101
102/*
103 * Send a formatted line; optionally echo to terminal
104 */
105static int
106_http_cmd(FILE *f, char *fmt, ...)
107{
108    va_list ap;
109
110    va_start(ap, fmt);
111    vfprintf(f, fmt, ap);
112#ifndef NDEBUG
113    fprintf(stderr, "\033[1m>>> ");
114    vfprintf(stderr, fmt, ap);
115    fprintf(stderr, "\033[m");
116#endif
117    va_end(ap);
118
119    return 0; /* XXX */
120}
121
122/*
123 * Fill the input buffer, do chunk decoding on the fly
124 */
125static char *
126_http_fillbuf(struct cookie *c)
127{
128    char *ln;
129    unsigned int len;
130
131    if (c->eof)
132	return NULL;
133
134    if (c->encoding == ENC_NONE) {
135	c->buf = fgetln(c->real_f, &(c->b_len));
136	c->b_cur = 0;
137    } else if (c->encoding == ENC_CHUNKED) {
138	if (c->chunksize == 0) {
139	    ln = fgetln(c->real_f, &len);
140	    if (len <= 2)
141		return NULL;
142	    DEBUG(fprintf(stderr, "\033[1m_http_fillbuf(): new chunk: "
143			  "%*.*s\033[m\n", (int)len-2, (int)len-2, ln));
144	    sscanf(ln, "%x", &(c->chunksize));
145	    if (!c->chunksize) {
146		DEBUG(fprintf(stderr, "\033[1m_http_fillbuf(): "
147			      "end of last chunk\033[m\n"));
148		c->eof = 1;
149		return NULL;
150	    }
151	    DEBUG(fprintf(stderr, "\033[1m_http_fillbuf(): "
152			  "new chunk: %X\033[m\n", c->chunksize));
153	}
154	c->buf = fgetln(c->real_f, &(c->b_len));
155	if (c->b_len > c->chunksize)
156	    c->b_len = c->chunksize;
157	c->chunksize -= c->b_len;
158	c->b_cur = 0;
159    }
160    else return NULL; /* unknown encoding */
161    return c->buf;
162}
163
164/*
165 * Read function
166 */
167static int
168_http_readfn(struct cookie *c, char *buf, int len)
169{
170    int l, pos = 0;
171    while (len) {
172	/* empty buffer */
173	if (!c->buf || (c->b_cur == c->b_len))
174	    if (!_http_fillbuf(c))
175		break;
176
177	l = c->b_len - c->b_cur;
178	if (len < l) l = len;
179	memcpy(buf + pos, c->buf + c->b_cur, l);
180	c->b_cur += l;
181	pos += l;
182	len -= l;
183    }
184
185    if (ferror(c->real_f))
186	return -1;
187    else return pos;
188}
189
190/*
191 * Write function
192 */
193static int
194_http_writefn(struct cookie *c, const char *buf, int len)
195{
196    size_t r = fwrite(buf, 1, (size_t)len, c->real_f);
197    return r ? r : -1;
198}
199
200/*
201 * Close function
202 */
203static int
204_http_closefn(struct cookie *c)
205{
206    int r = fclose(c->real_f);
207    free(c);
208    return (r == EOF) ? -1 : 0;
209}
210
211/*
212 * Extract content type from cookie
213 */
214char *
215fetchContentType(FILE *f)
216{
217    /*
218     * We have no way of making sure this really *is* one of our cookies,
219     * so just check for a null pointer and hope for the best.
220     */
221    return f->_cookie ? (((struct cookie *)f->_cookie)->content_type) : NULL;
222}
223
224/*
225 * Base64 encoding
226 */
227int
228_http_base64(char *dst, char *src, int l)
229{
230    static const char base64[] =
231	"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
232	"abcdefghijklmnopqrstuvwxyz"
233	"0123456789+/";
234    int t, r = 0;
235
236    while (l >= 3) {
237	t = (src[0] << 16) | (src[1] << 8) | src[2];
238	dst[0] = base64[(t >> 18) & 0x3f];
239	dst[1] = base64[(t >> 12) & 0x3f];
240	dst[2] = base64[(t >> 6) & 0x3f];
241	dst[3] = base64[(t >> 0) & 0x3f];
242	src += 3; l -= 3;
243	dst += 4; r += 4;
244    }
245
246    switch (l) {
247    case 2:
248	t = (src[0] << 16) | (src[1] << 8);
249	dst[0] = base64[(t >> 18) & 0x3f];
250	dst[1] = base64[(t >> 12) & 0x3f];
251	dst[2] = base64[(t >> 6) & 0x3f];
252	dst[3] = '=';
253	dst += 4;
254	r += 4;
255	break;
256    case 1:
257	t = src[0] << 16;
258	dst[0] = base64[(t >> 18) & 0x3f];
259	dst[1] = base64[(t >> 12) & 0x3f];
260	dst[2] = dst[3] = '=';
261	dst += 4;
262	r += 4;
263	break;
264    case 0:
265	break;
266    }
267
268    *dst = 0;
269    return r;
270}
271
272/*
273 * Encode username and password
274 */
275char *
276_http_auth(char *usr, char *pwd)
277{
278    int len, lu, lp;
279    char *str, *s;
280
281    lu = strlen(usr);
282    lp = strlen(pwd);
283
284    len = (lu * 4 + 2) / 3	/* user name, round up */
285	+ 1			/* colon */
286	+ (lp * 4 + 2) / 3	/* password, round up */
287	+ 1;			/* null */
288
289    if ((s = str = (char *)malloc(len)) == NULL)
290	return NULL;
291
292    s += _http_base64(s, usr, lu);
293    *s++ = ':';
294    s += _http_base64(s, pwd, lp);
295    *s = 0;
296
297    return str;
298}
299
300/*
301 * Connect to server or proxy
302 */
303FILE *
304_http_connect(struct url *URL, char *flags)
305{
306    int direct, sd = -1, verbose;
307#ifdef INET6
308    int af = AF_UNSPEC;
309#else
310    int af = AF_INET;
311#endif
312    size_t len;
313    char *px;
314    FILE *f;
315
316    direct = (flags && strchr(flags, 'd'));
317    verbose = (flags && strchr(flags, 'v'));
318    if ((flags && strchr(flags, '4')))
319	af = AF_INET;
320    else if ((flags && strchr(flags, '6')))
321	af = AF_INET6;
322
323    /* check port */
324    if (!URL->port) {
325	struct servent *se;
326
327	if (strcasecmp(URL->scheme, "ftp") == 0)
328	    if ((se = getservbyname("ftp", "tcp")) != NULL)
329		URL->port = ntohs(se->s_port);
330	    else
331		URL->port = 21;
332	else
333	    if ((se = getservbyname("http", "tcp")) != NULL)
334		URL->port = ntohs(se->s_port);
335	    else
336		URL->port = 80;
337    }
338
339    /* attempt to connect to proxy server */
340    if (!direct && (px = getenv("HTTP_PROXY")) != NULL) {
341	char host[MAXHOSTNAMELEN];
342	int port = 0;
343
344	/* measure length */
345#ifdef INET6
346	if (px[0] != '[' ||
347	    (len = strcspn(px, "]")) >= strlen(px) ||
348	    (px[++len] != '\0' && px[len] != ':'))
349#endif
350	    len = strcspn(px, ":");
351
352	/* get port (XXX atoi is a little too tolerant perhaps?) */
353	if (px[len] == ':') {
354	    if (strspn(px+len+1, "0123456789") != strlen(px+len+1)
355		|| strlen(px+len+1) > 5) {
356		/* XXX we should emit some kind of warning */
357	    }
358	    port = atoi(px+len+1);
359	    if (port < 1 || port > 65535) {
360		/* XXX we should emit some kind of warning */
361	    }
362	}
363	if (!port) {
364#if 0
365	    /*
366	     * commented out, since there is currently no service name
367	     * for HTTP proxies
368	     */
369	    struct servent *se;
370
371	    if ((se = getservbyname("xxxx", "tcp")) != NULL)
372		port = ntohs(se->s_port);
373	    else
374#endif
375		port = 3128;
376	}
377
378	/* get host name */
379#ifdef INET6
380	if (len > 1 && px[0] == '[' && px[len - 1] == ']') {
381	    px++;
382	    len -= 2;
383	}
384#endif
385	if (len >= MAXHOSTNAMELEN)
386	    len = MAXHOSTNAMELEN - 1;
387	strncpy(host, px, len);
388	host[len] = 0;
389
390	/* connect */
391	sd = _fetch_connect(host, port, af, verbose);
392    }
393
394    /* if no proxy is configured or could be contacted, try direct */
395    if (sd == -1) {
396	if (strcasecmp(URL->scheme, "ftp") == 0)
397	    goto ouch;
398	if ((sd = _fetch_connect(URL->host, URL->port, af, verbose)) == -1)
399	    goto ouch;
400    }
401
402    /* reopen as stream */
403    if ((f = fdopen(sd, "r+")) == NULL)
404	goto ouch;
405
406    return f;
407
408ouch:
409    if (sd >= 0)
410	close(sd);
411    _http_seterr(999); /* XXX do this properly RSN */
412    return NULL;
413}
414
415/*
416 * Check a header line
417 */
418char *
419_http_match(char *str, char *hdr)
420{
421    while (*str && *hdr && tolower(*str++) == tolower(*hdr++))
422	/* nothing */;
423    if (*str || *hdr != ':')
424	return NULL;
425    while (*hdr && isspace(*++hdr))
426	/* nothing */;
427    return hdr;
428}
429
430/*
431 * Send a HEAD or GET request
432 */
433int
434_http_request(FILE *f, char *op, struct url *URL, char *flags)
435{
436    int e, verbose;
437    char *ln, *p;
438    size_t len;
439    char *host;
440#ifdef INET6
441    char hbuf[MAXHOSTNAMELEN + 1];
442#endif
443
444    verbose = (flags && strchr(flags, 'v'));
445
446    host = URL->host;
447#ifdef INET6
448    if (strchr(URL->host, ':')) {
449	snprintf(hbuf, sizeof(hbuf), "[%s]", URL->host);
450	host = hbuf;
451    }
452#endif
453
454    /* send request (proxies require absolute form, so use that) */
455    if (verbose)
456	_fetch_info("requesting %s://%s:%d%s",
457		    URL->scheme, host, URL->port, URL->doc);
458    _http_cmd(f, "%s %s://%s:%d%s HTTP/1.1" ENDL,
459	      op, URL->scheme, host, URL->port, URL->doc);
460
461    /* start sending headers away */
462    if (URL->user[0] || URL->pwd[0]) {
463	char *auth_str = _http_auth(URL->user, URL->pwd);
464	if (!auth_str)
465	    return 999; /* XXX wrong */
466	_http_cmd(f, "Authorization: Basic %s" ENDL, auth_str);
467	free(auth_str);
468    }
469    _http_cmd(f, "Host: %s:%d" ENDL, host, URL->port);
470    _http_cmd(f, "User-Agent: %s " _LIBFETCH_VER ENDL, __progname);
471    if (URL->offset)
472	_http_cmd(f, "Range: bytes=%lld-" ENDL, URL->offset);
473    _http_cmd(f, "Connection: close" ENDL ENDL);
474
475    /* get response */
476    if ((ln = fgetln(f, &len)) == NULL)
477	return 999;
478    DEBUG(fprintf(stderr, "response: [\033[1m%*.*s\033[m]\n",
479		  (int)len-2, (int)len-2, ln));
480
481    /* we can't use strchr() and friends since ln isn't NUL-terminated */
482    p = ln;
483    while ((p < ln + len) && !isspace(*p))
484	p++;
485    while ((p < ln + len) && !isdigit(*p))
486	p++;
487    if (!isdigit(*p))
488	return 999;
489
490    e = atoi(p);
491    DEBUG(fprintf(stderr, "code:     [\033[1m%d\033[m]\n", e));
492    return e;
493}
494
495/*
496 * Retrieve a file by HTTP
497 */
498FILE *
499fetchGetHTTP(struct url *URL, char *flags)
500{
501    int e, enc = ENC_NONE, i, noredirect;
502    struct cookie *c;
503    char *ln, *p, *q;
504    FILE *f, *cf;
505    size_t len;
506    off_t pos = 0;
507
508    noredirect = (flags && strchr(flags, 'A'));
509
510    /* allocate cookie */
511    if ((c = calloc(1, sizeof *c)) == NULL)
512	return NULL;
513
514    /* connect */
515    if ((f = _http_connect(URL, flags)) == NULL) {
516	free(c);
517	return NULL;
518    }
519    c->real_f = f;
520
521    e = _http_request(f, "GET", URL, flags);
522    if (e != (URL->offset ? HTTP_PARTIAL : HTTP_OK)
523	&& (e != HTTP_MOVED || noredirect)) {
524	_http_seterr(e);
525	free(c);
526	fclose(f);
527	return NULL;
528    }
529
530    /* browse through header */
531    while (1) {
532	if ((ln = fgetln(f, &len)) == NULL)
533	    goto fouch;
534	if ((ln[0] == '\r') || (ln[0] == '\n'))
535	    break;
536	while (isspace(ln[len-1]))
537	    --len;
538	ln[len] = '\0'; /* XXX */
539	DEBUG(fprintf(stderr, "header:	 [\033[1m%s\033[m]\n", ln));
540	if ((p = _http_match("Location", ln)) != NULL) {
541	    struct url *url;
542
543	    for (q = p; *q && !isspace(*q); q++)
544		/* VOID */ ;
545	    *q = 0;
546	    if ((url = fetchParseURL(p)) == NULL)
547		goto fouch;
548	    url->offset = URL->offset;
549	    url->length = URL->length;
550	    DEBUG(fprintf(stderr, "location:  [\033[1m%s\033[m]\n", p));
551	    cf = fetchGetHTTP(url, flags);
552	    fetchFreeURL(url);
553	    fclose(f);
554	    return cf;
555	} else if ((p = _http_match("Transfer-Encoding", ln)) != NULL) {
556	    for (q = p; *q && !isspace(*q); q++)
557		/* VOID */ ;
558	    *q = 0;
559	    if (strcasecmp(p, "chunked") == 0)
560		enc = ENC_CHUNKED;
561	    DEBUG(fprintf(stderr, "transfer encoding:  [\033[1m%s\033[m]\n", p));
562	} else if ((p = _http_match("Content-Type", ln)) != NULL) {
563	    for (i = 0; *p && i < HTTPCTYPELEN; p++, i++)
564		    c->content_type[i] = *p;
565	    do c->content_type[i--] = 0; while (isspace(c->content_type[i]));
566	    DEBUG(fprintf(stderr, "content type: [\033[1m%s\033[m]\n",
567			  c->content_type));
568	} else if ((p = _http_match("Content-Range", ln)) != NULL) {
569	    if (strncasecmp(p, "bytes ", 6) != 0)
570		goto fouch;
571	    p += 6;
572	    while (*p && isdigit(*p))
573		pos = pos * 10 + (*p++ - '0');
574	    /* XXX wouldn't hurt to be slightly more paranoid here */
575	    DEBUG(fprintf(stderr, "content range: [\033[1m%lld-\033[m]\n", pos));
576	    if (pos > URL->offset)
577		goto fouch;
578	}
579    }
580
581    /* only body remains */
582    c->encoding = enc;
583    cf = funopen(c,
584		 (int (*)(void *, char *, int))_http_readfn,
585		 (int (*)(void *, const char *, int))_http_writefn,
586		 (fpos_t (*)(void *, fpos_t, int))NULL,
587		 (int (*)(void *))_http_closefn);
588    if (cf == NULL)
589	goto fouch;
590
591    while (pos < URL->offset)
592	if (fgetc(cf) == EOF)
593	    goto cfouch;
594
595    return cf;
596
597fouch:
598    fclose(f);
599    free(c);
600    _http_seterr(999); /* XXX do this properly RSN */
601    return NULL;
602cfouch:
603    fclose(cf);
604    _http_seterr(999); /* XXX do this properly RSN */
605    return NULL;
606}
607
608FILE *
609fetchPutHTTP(struct url *URL, char *flags)
610{
611    warnx("fetchPutHTTP(): not implemented");
612    return NULL;
613}
614
615/*
616 * Get an HTTP document's metadata
617 */
618int
619fetchStatHTTP(struct url *URL, struct url_stat *us, char *flags)
620{
621    int e, noredirect;
622    size_t len;
623    char *ln, *p, *q;
624    FILE *f;
625
626    noredirect = (flags && strchr(flags, 'A'));
627
628    us->size = -1;
629    us->atime = us->mtime = 0;
630
631    /* connect */
632    if ((f = _http_connect(URL, flags)) == NULL)
633	return -1;
634
635    e = _http_request(f, "HEAD", URL, flags);
636    if (e != HTTP_OK && (e != HTTP_MOVED || noredirect)) {
637	_http_seterr(e);
638	fclose(f);
639	return -1;
640    }
641
642    while (1) {
643	if ((ln = fgetln(f, &len)) == NULL)
644	    goto fouch;
645	if ((ln[0] == '\r') || (ln[0] == '\n'))
646	    break;
647	while (isspace(ln[len-1]))
648	    --len;
649	ln[len] = '\0'; /* XXX */
650	DEBUG(fprintf(stderr, "header:	 [\033[1m%s\033[m]\n", ln));
651	if ((p = _http_match("Location", ln)) != NULL) {
652	    struct url *url;
653
654	    for (q = p; *q && !isspace(*q); q++)
655		/* VOID */ ;
656	    *q = 0;
657	    if ((url = fetchParseURL(p)) == NULL)
658		goto ouch;
659	    url->offset = URL->offset;
660	    url->length = URL->length;
661	    DEBUG(fprintf(stderr, "location:  [\033[1m%s\033[m]\n", p));
662	    e = fetchStatHTTP(url, us, flags);
663	    fetchFreeURL(url);
664	    fclose(f);
665	    return e;
666	} else if ((p = _http_match("Last-Modified", ln)) != NULL) {
667	    struct tm tm;
668	    char locale[64];
669
670	    strncpy(locale, setlocale(LC_TIME, NULL), sizeof locale);
671	    setlocale(LC_TIME, "C");
672	    strptime(p, "%a, %d %b %Y %H:%M:%S GMT", &tm);
673	    /* XXX should add support for date-2 and date-3 */
674	    setlocale(LC_TIME, locale);
675	    us->atime = us->mtime = timegm(&tm);
676	    DEBUG(fprintf(stderr, "last modified: [\033[1m%04d-%02d-%02d "
677			  "%02d:%02d:%02d\033[m]\n",
678			  tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday,
679			  tm.tm_hour, tm.tm_min, tm.tm_sec));
680	} else if ((p = _http_match("Content-Length", ln)) != NULL) {
681	    us->size = 0;
682	    while (*p && isdigit(*p))
683		us->size = us->size * 10 + (*p++ - '0');
684	    DEBUG(fprintf(stderr, "content length: [\033[1m%lld\033[m]\n", us->size));
685	}
686    }
687
688    fclose(f);
689    return 0;
690 ouch:
691    _http_seterr(999); /* XXX do this properly RSN */
692 fouch:
693    fclose(f);
694    return -1;
695}
696
697/*
698 * List a directory
699 */
700struct url_ent *
701fetchListHTTP(struct url *url, char *flags)
702{
703    warnx("fetchListHTTP(): not implemented");
704    return NULL;
705}
706