http.c revision 60737
1/*-
2 * Copyright (c) 1998 Dag-Erling Co�dan Sm�rgrav
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer
10 *    in this position and unchanged.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 * 3. The name of the author may not be used to endorse or promote products
15 *    derived from this software without specific prior written permission
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 *
28 * $FreeBSD: head/lib/libfetch/http.c 60737 2000-05-20 18:23:51Z ume $
29 */
30
31/*
32 * The base64 code in this file is based on code from MIT fetch, which
33 * has the following copyright and license:
34 *
35 *-
36 * Copyright 1997 Massachusetts Institute of Technology
37 *
38 * Permission to use, copy, modify, and distribute this software and
39 * its documentation for any purpose and without fee is hereby
40 * granted, provided that both the above copyright notice and this
41 * permission notice appear in all copies, that both the above
42 * copyright notice and this permission notice appear in all
43 * supporting documentation, and that the name of M.I.T. not be used
44 * in advertising or publicity pertaining to distribution of the
45 * software without specific, written prior permission.	 M.I.T. makes
46 * no representations about the suitability of this software for any
47 * purpose.  It is provided "as is" without express or implied
48 * warranty.
49 *
50 * THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''.  M.I.T. DISCLAIMS
51 * ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE,
52 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
53 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT
54 * SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
55 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
56 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
57 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
58 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
59 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
60 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
61 * SUCH DAMAGE. */
62
63#include <sys/param.h>
64#include <sys/socket.h>
65
66#include <err.h>
67#include <ctype.h>
68#include <locale.h>
69#include <netdb.h>
70#include <stdarg.h>
71#include <stdio.h>
72#include <stdlib.h>
73#include <string.h>
74#include <time.h>
75#include <unistd.h>
76
77#include "fetch.h"
78#include "common.h"
79#include "httperr.h"
80
81extern char *__progname;
82
83#define ENDL "\r\n"
84
85#define HTTP_OK		200
86#define HTTP_PARTIAL	206
87
88struct cookie
89{
90    FILE *real_f;
91#define ENC_NONE 0
92#define ENC_CHUNKED 1
93    int encoding;			/* 1 = chunked, 0 = none */
94#define HTTPCTYPELEN 59
95    char content_type[HTTPCTYPELEN+1];
96    char *buf;
97    int b_cur, eof;
98    unsigned b_len, chunksize;
99};
100
101/*
102 * Send a formatted line; optionally echo to terminal
103 */
104static int
105_http_cmd(FILE *f, char *fmt, ...)
106{
107    va_list ap;
108
109    va_start(ap, fmt);
110    vfprintf(f, fmt, ap);
111#ifndef NDEBUG
112    fprintf(stderr, "\033[1m>>> ");
113    vfprintf(stderr, fmt, ap);
114    fprintf(stderr, "\033[m");
115#endif
116    va_end(ap);
117
118    return 0; /* XXX */
119}
120
121/*
122 * Fill the input buffer, do chunk decoding on the fly
123 */
124static char *
125_http_fillbuf(struct cookie *c)
126{
127    char *ln;
128    unsigned int len;
129
130    if (c->eof)
131	return NULL;
132
133    if (c->encoding == ENC_NONE) {
134	c->buf = fgetln(c->real_f, &(c->b_len));
135	c->b_cur = 0;
136    } else if (c->encoding == ENC_CHUNKED) {
137	if (c->chunksize == 0) {
138	    ln = fgetln(c->real_f, &len);
139	    if (len <= 2)
140		return NULL;
141	    DEBUG(fprintf(stderr, "\033[1m_http_fillbuf(): new chunk: "
142			  "%*.*s\033[m\n", (int)len-2, (int)len-2, ln));
143	    sscanf(ln, "%x", &(c->chunksize));
144	    if (!c->chunksize) {
145		DEBUG(fprintf(stderr, "\033[1m_http_fillbuf(): "
146			      "end of last chunk\033[m\n"));
147		c->eof = 1;
148		return NULL;
149	    }
150	    DEBUG(fprintf(stderr, "\033[1m_http_fillbuf(): "
151			  "new chunk: %X\033[m\n", c->chunksize));
152	}
153	c->buf = fgetln(c->real_f, &(c->b_len));
154	if (c->b_len > c->chunksize)
155	    c->b_len = c->chunksize;
156	c->chunksize -= c->b_len;
157	c->b_cur = 0;
158    }
159    else return NULL; /* unknown encoding */
160    return c->buf;
161}
162
163/*
164 * Read function
165 */
166static int
167_http_readfn(struct cookie *c, char *buf, int len)
168{
169    int l, pos = 0;
170    while (len) {
171	/* empty buffer */
172	if (!c->buf || (c->b_cur == c->b_len))
173	    if (!_http_fillbuf(c))
174		break;
175
176	l = c->b_len - c->b_cur;
177	if (len < l) l = len;
178	memcpy(buf + pos, c->buf + c->b_cur, l);
179	c->b_cur += l;
180	pos += l;
181	len -= l;
182    }
183
184    if (ferror(c->real_f))
185	return -1;
186    else return pos;
187}
188
189/*
190 * Write function
191 */
192static int
193_http_writefn(struct cookie *c, const char *buf, int len)
194{
195    size_t r = fwrite(buf, 1, (size_t)len, c->real_f);
196    return r ? r : -1;
197}
198
199/*
200 * Close function
201 */
202static int
203_http_closefn(struct cookie *c)
204{
205    int r = fclose(c->real_f);
206    free(c);
207    return (r == EOF) ? -1 : 0;
208}
209
210/*
211 * Extract content type from cookie
212 */
213char *
214fetchContentType(FILE *f)
215{
216    /*
217     * We have no way of making sure this really *is* one of our cookies,
218     * so just check for a null pointer and hope for the best.
219     */
220    return f->_cookie ? (((struct cookie *)f->_cookie)->content_type) : NULL;
221}
222
223/*
224 * Base64 encoding
225 */
226int
227_http_base64(char *dst, char *src, int l)
228{
229    static const char base64[] =
230	"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
231	"abcdefghijklmnopqrstuvwxyz"
232	"0123456789+/";
233    int t, r = 0;
234
235    while (l >= 3) {
236	t = (src[0] << 16) | (src[1] << 8) | src[2];
237	dst[0] = base64[(t >> 18) & 0x3f];
238	dst[1] = base64[(t >> 12) & 0x3f];
239	dst[2] = base64[(t >> 6) & 0x3f];
240	dst[3] = base64[(t >> 0) & 0x3f];
241	src += 3; l -= 3;
242	dst += 4; r += 4;
243    }
244
245    switch (l) {
246    case 2:
247	t = (src[0] << 16) | (src[1] << 8);
248	dst[0] = base64[(t >> 18) & 0x3f];
249	dst[1] = base64[(t >> 12) & 0x3f];
250	dst[2] = base64[(t >> 6) & 0x3f];
251	dst[3] = '=';
252	dst += 4;
253	r += 4;
254	break;
255    case 1:
256	t = src[0] << 16;
257	dst[0] = base64[(t >> 18) & 0x3f];
258	dst[1] = base64[(t >> 12) & 0x3f];
259	dst[2] = dst[3] = '=';
260	dst += 4;
261	r += 4;
262	break;
263    case 0:
264	break;
265    }
266
267    *dst = 0;
268    return r;
269}
270
271/*
272 * Encode username and password
273 */
274char *
275_http_auth(char *usr, char *pwd)
276{
277    int len, lu, lp;
278    char *str, *s;
279
280    lu = strlen(usr);
281    lp = strlen(pwd);
282
283    len = (lu * 4 + 2) / 3	/* user name, round up */
284	+ 1			/* colon */
285	+ (lp * 4 + 2) / 3	/* password, round up */
286	+ 1;			/* null */
287
288    if ((s = str = (char *)malloc(len)) == NULL)
289	return NULL;
290
291    s += _http_base64(s, usr, lu);
292    *s++ = ':';
293    s += _http_base64(s, pwd, lp);
294    *s = 0;
295
296    return str;
297}
298
299/*
300 * Connect to server or proxy
301 */
302FILE *
303_http_connect(struct url *URL, char *flags)
304{
305    int direct, sd = -1, verbose;
306#ifdef INET6
307    int af = AF_UNSPEC;
308#else
309    int af = AF_INET;
310#endif
311    size_t len;
312    char *px;
313    FILE *f;
314
315    direct = (flags && strchr(flags, 'd'));
316    verbose = (flags && strchr(flags, 'v'));
317    if ((flags && strchr(flags, '4')))
318	af = AF_INET;
319    else if ((flags && strchr(flags, '6')))
320	af = AF_INET6;
321
322    /* check port */
323    if (!URL->port) {
324	struct servent *se;
325
326	if (strcasecmp(URL->scheme, "ftp") == 0)
327	    if ((se = getservbyname("ftp", "tcp")) != NULL)
328		URL->port = ntohs(se->s_port);
329	    else
330		URL->port = 21;
331	else
332	    if ((se = getservbyname("http", "tcp")) != NULL)
333		URL->port = ntohs(se->s_port);
334	    else
335		URL->port = 80;
336    }
337
338    /* attempt to connect to proxy server */
339    if (!direct && (px = getenv("HTTP_PROXY")) != NULL) {
340	char host[MAXHOSTNAMELEN];
341	int port = 0;
342
343	/* measure length */
344#ifdef INET6
345	if (px[0] != '[' ||
346	    (len = strcspn(px, "]")) >= strlen(px) ||
347	    (px[++len] != '\0' && px[len] != ':'))
348#endif
349	    len = strcspn(px, ":");
350
351	/* get port (XXX atoi is a little too tolerant perhaps?) */
352	if (px[len] == ':') {
353	    if (strspn(px+len+1, "0123456789") != strlen(px+len+1)
354		|| strlen(px+len+1) > 5) {
355		/* XXX we should emit some kind of warning */
356	    }
357	    port = atoi(px+len+1);
358	    if (port < 1 || port > 65535) {
359		/* XXX we should emit some kind of warning */
360	    }
361	}
362	if (!port) {
363#if 0
364	    /*
365	     * commented out, since there is currently no service name
366	     * for HTTP proxies
367	     */
368	    struct servent *se;
369
370	    if ((se = getservbyname("xxxx", "tcp")) != NULL)
371		port = ntohs(se->s_port);
372	    else
373#endif
374		port = 3128;
375	}
376
377	/* get host name */
378#ifdef INET6
379	if (len > 1 && px[0] == '[' && px[len - 1] == ']') {
380	    px++;
381	    len -= 2;
382	}
383#endif
384	if (len >= MAXHOSTNAMELEN)
385	    len = MAXHOSTNAMELEN - 1;
386	strncpy(host, px, len);
387	host[len] = 0;
388
389	/* connect */
390	sd = _fetch_connect(host, port, af, verbose);
391    }
392
393    /* if no proxy is configured or could be contacted, try direct */
394    if (sd == -1) {
395	if (strcasecmp(URL->scheme, "ftp") == 0)
396	    goto ouch;
397	if ((sd = _fetch_connect(URL->host, URL->port, af, verbose)) == -1)
398	    goto ouch;
399    }
400
401    /* reopen as stream */
402    if ((f = fdopen(sd, "r+")) == NULL)
403	goto ouch;
404
405    return f;
406
407ouch:
408    if (sd >= 0)
409	close(sd);
410    _http_seterr(999); /* XXX do this properly RSN */
411    return NULL;
412}
413
414/*
415 * Send a HEAD or GET request
416 */
417int
418_http_request(FILE *f, char *op, struct url *URL, char *flags)
419{
420    int e, verbose;
421    char *ln, *p;
422    size_t len;
423    char *host;
424#ifdef INET6
425    char hbuf[MAXHOSTNAMELEN + 1];
426#endif
427
428    verbose = (flags && strchr(flags, 'v'));
429
430    host = URL->host;
431#ifdef INET6
432    if (strchr(URL->host, ':')) {
433	snprintf(hbuf, sizeof(hbuf), "[%s]", URL->host);
434	host = hbuf;
435    }
436#endif
437
438    /* send request (proxies require absolute form, so use that) */
439    if (verbose)
440	_fetch_info("requesting %s://%s:%d%s",
441		    URL->scheme, host, URL->port, URL->doc);
442    _http_cmd(f, "%s %s://%s:%d%s HTTP/1.1" ENDL,
443	      op, URL->scheme, host, URL->port, URL->doc);
444
445    /* start sending headers away */
446    if (URL->user[0] || URL->pwd[0]) {
447	char *auth_str = _http_auth(URL->user, URL->pwd);
448	if (!auth_str)
449	    return 999; /* XXX wrong */
450	_http_cmd(f, "Authorization: Basic %s" ENDL, auth_str);
451	free(auth_str);
452    }
453    _http_cmd(f, "Host: %s:%d" ENDL, host, URL->port);
454    _http_cmd(f, "User-Agent: %s " _LIBFETCH_VER ENDL, __progname);
455    if (URL->offset)
456	_http_cmd(f, "Range: bytes=%lld-" ENDL, URL->offset);
457    _http_cmd(f, "Connection: close" ENDL ENDL);
458
459    /* get response */
460    if ((ln = fgetln(f, &len)) == NULL)
461	return 999;
462    DEBUG(fprintf(stderr, "response: [\033[1m%*.*s\033[m]\n",
463		  (int)len-2, (int)len-2, ln));
464
465    /* we can't use strchr() and friends since ln isn't NUL-terminated */
466    p = ln;
467    while ((p < ln + len) && !isspace(*p))
468	p++;
469    while ((p < ln + len) && !isdigit(*p))
470	p++;
471    if (!isdigit(*p))
472	return 999;
473
474    e = atoi(p);
475    DEBUG(fprintf(stderr, "code:     [\033[1m%d\033[m]\n", e));
476    return e;
477}
478
479/*
480 * Check a header line
481 */
482char *
483_http_match(char *str, char *hdr)
484{
485    while (*str && *hdr && tolower(*str++) == tolower(*hdr++))
486	/* nothing */;
487    if (*str || *hdr != ':')
488	return NULL;
489    while (*hdr && isspace(*++hdr))
490	/* nothing */;
491    return hdr;
492}
493
494/*
495 * Retrieve a file by HTTP
496 */
497FILE *
498fetchGetHTTP(struct url *URL, char *flags)
499{
500    int e, enc = ENC_NONE, i;
501    struct cookie *c;
502    char *ln, *p, *q;
503    FILE *f, *cf;
504    size_t len;
505    off_t pos = 0;
506
507    /* allocate cookie */
508    if ((c = calloc(1, sizeof *c)) == NULL)
509	return NULL;
510
511    /* connect */
512    if ((f = _http_connect(URL, flags)) == NULL) {
513	free(c);
514	return NULL;
515    }
516    c->real_f = f;
517
518    e = _http_request(f, "GET", URL, flags);
519
520    /* add code to handle redirects later */
521    if (e != (URL->offset ? HTTP_PARTIAL : HTTP_OK)) {
522	_http_seterr(e);
523	goto fouch;
524    }
525
526    /* browse through header */
527    while (1) {
528	if ((ln = fgetln(f, &len)) == NULL)
529	    goto fouch;
530	if ((ln[0] == '\r') || (ln[0] == '\n'))
531	    break;
532	while (isspace(ln[len-1]))
533	    --len;
534	ln[len] = '\0'; /* XXX */
535	DEBUG(fprintf(stderr, "header:	 [\033[1m%s\033[m]\n", ln));
536	if ((p = _http_match("Transfer-Encoding", ln)) != NULL) {
537	    for (q = p; *q && !isspace(*q); q++)
538		/* VOID */ ;
539	    *q = 0;
540	    if (strcasecmp(p, "chunked") == 0)
541		enc = ENC_CHUNKED;
542	    DEBUG(fprintf(stderr, "transfer encoding:  [\033[1m%s\033[m]\n", p));
543	} else if ((p = _http_match("Content-Type", ln)) != NULL) {
544	    for (i = 0; *p && i < HTTPCTYPELEN; p++, i++)
545		    c->content_type[i] = *p;
546	    do c->content_type[i--] = 0; while (isspace(c->content_type[i]));
547	    DEBUG(fprintf(stderr, "content type: [\033[1m%s\033[m]\n",
548			  c->content_type));
549	} else if ((p = _http_match("Content-Range", ln)) != NULL) {
550	    if (strncasecmp(p, "bytes ", 6) != 0)
551		goto fouch;
552	    p += 6;
553	    while (*p && isdigit(*p))
554		pos = pos * 10 + (*p++ - '0');
555	    /* XXX wouldn't hurt to be slightly more paranoid here */
556	    DEBUG(fprintf(stderr, "content range: [\033[1m%lld-\033[m]\n", pos));
557	    if (pos > URL->offset)
558		goto fouch;
559	}
560    }
561
562    /* only body remains */
563    c->encoding = enc;
564    cf = funopen(c,
565		 (int (*)(void *, char *, int))_http_readfn,
566		 (int (*)(void *, const char *, int))_http_writefn,
567		 (fpos_t (*)(void *, fpos_t, int))NULL,
568		 (int (*)(void *))_http_closefn);
569    if (cf == NULL)
570	goto fouch;
571
572    while (pos < URL->offset)
573	if (fgetc(cf) == EOF)
574	    goto cfouch;
575
576    return cf;
577
578fouch:
579    fclose(f);
580    free(c);
581    _http_seterr(999); /* XXX do this properly RSN */
582    return NULL;
583cfouch:
584    fclose(cf);
585    _http_seterr(999); /* XXX do this properly RSN */
586    return NULL;
587}
588
589FILE *
590fetchPutHTTP(struct url *URL, char *flags)
591{
592    warnx("fetchPutHTTP(): not implemented");
593    return NULL;
594}
595
596/*
597 * Get an HTTP document's metadata
598 */
599int
600fetchStatHTTP(struct url *URL, struct url_stat *us, char *flags)
601{
602    int e;
603    size_t len;
604    char *ln, *p;
605    FILE *f;
606
607    us->size = -1;
608    us->atime = us->mtime = 0;
609
610    /* connect */
611    if ((f = _http_connect(URL, flags)) == NULL)
612	return -1;
613
614    if ((e = _http_request(f, "HEAD", URL, flags)) != HTTP_OK) {
615	_http_seterr(e);
616	goto ouch;
617    }
618
619    while (1) {
620	if ((ln = fgetln(f, &len)) == NULL)
621	    goto fouch;
622	if ((ln[0] == '\r') || (ln[0] == '\n'))
623	    break;
624	while (isspace(ln[len-1]))
625	    --len;
626	ln[len] = '\0'; /* XXX */
627	DEBUG(fprintf(stderr, "header:	 [\033[1m%s\033[m]\n", ln));
628	if ((p = _http_match("Last-Modified", ln)) != NULL) {
629	    struct tm tm;
630	    char locale[64];
631
632	    strncpy(locale, setlocale(LC_TIME, NULL), sizeof locale);
633	    setlocale(LC_TIME, "C");
634	    strptime(p, "%a, %d %b %Y %H:%M:%S GMT", &tm);
635	    /* XXX should add support for date-2 and date-3 */
636	    setlocale(LC_TIME, locale);
637	    us->atime = us->mtime = timegm(&tm);
638	    DEBUG(fprintf(stderr, "last modified: [\033[1m%04d-%02d-%02d "
639			  "%02d:%02d:%02d\033[m]\n",
640			  tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday,
641			  tm.tm_hour, tm.tm_min, tm.tm_sec));
642	} else if ((p = _http_match("Content-Length", ln)) != NULL) {
643	    us->size = 0;
644	    while (*p && isdigit(*p))
645		us->size = us->size * 10 + (*p++ - '0');
646	    DEBUG(fprintf(stderr, "content length: [\033[1m%lld\033[m]\n", us->size));
647	}
648    }
649
650    fclose(f);
651    return 0;
652 ouch:
653    _http_seterr(999); /* XXX do this properly RSN */
654 fouch:
655    fclose(f);
656    return -1;
657}
658
659/*
660 * List a directory
661 */
662struct url_ent *
663fetchListHTTP(struct url *url, char *flags)
664{
665    warnx("fetchListHTTP(): not implemented");
666    return NULL;
667}
668