http.c revision 60587
1/*-
2 * Copyright (c) 1998 Dag-Erling Co�dan Sm�rgrav
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer
10 *    in this position and unchanged.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 * 3. The name of the author may not be used to endorse or promote products
15 *    derived from this software without specific prior written permission
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 *
28 * $FreeBSD: head/lib/libfetch/http.c 60587 2000-05-15 09:05:36Z ume $
29 */
30
31/*
32 * The base64 code in this file is based on code from MIT fetch, which
33 * has the following copyright and license:
34 *
35 *-
36 * Copyright 1997 Massachusetts Institute of Technology
37 *
38 * Permission to use, copy, modify, and distribute this software and
39 * its documentation for any purpose and without fee is hereby
40 * granted, provided that both the above copyright notice and this
41 * permission notice appear in all copies, that both the above
42 * copyright notice and this permission notice appear in all
43 * supporting documentation, and that the name of M.I.T. not be used
44 * in advertising or publicity pertaining to distribution of the
45 * software without specific, written prior permission.	 M.I.T. makes
46 * no representations about the suitability of this software for any
47 * purpose.  It is provided "as is" without express or implied
48 * warranty.
49 *
50 * THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''.  M.I.T. DISCLAIMS
51 * ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE,
52 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
53 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT
54 * SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
55 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
56 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
57 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
58 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
59 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
60 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
61 * SUCH DAMAGE. */
62
63#include <sys/param.h>
64
65#include <err.h>
66#include <ctype.h>
67#include <locale.h>
68#include <netdb.h>
69#include <stdarg.h>
70#include <stdio.h>
71#include <stdlib.h>
72#include <string.h>
73#include <time.h>
74#include <unistd.h>
75
76#include "fetch.h"
77#include "common.h"
78#include "httperr.h"
79
80extern char *__progname;
81
82#define ENDL "\r\n"
83
84#define HTTP_OK		200
85#define HTTP_PARTIAL	206
86
87struct cookie
88{
89    FILE *real_f;
90#define ENC_NONE 0
91#define ENC_CHUNKED 1
92    int encoding;			/* 1 = chunked, 0 = none */
93#define HTTPCTYPELEN 59
94    char content_type[HTTPCTYPELEN+1];
95    char *buf;
96    int b_cur, eof;
97    unsigned b_len, chunksize;
98};
99
100/*
101 * Send a formatted line; optionally echo to terminal
102 */
103static int
104_http_cmd(FILE *f, char *fmt, ...)
105{
106    va_list ap;
107
108    va_start(ap, fmt);
109    vfprintf(f, fmt, ap);
110#ifndef NDEBUG
111    fprintf(stderr, "\033[1m>>> ");
112    vfprintf(stderr, fmt, ap);
113    fprintf(stderr, "\033[m");
114#endif
115    va_end(ap);
116
117    return 0; /* XXX */
118}
119
120/*
121 * Fill the input buffer, do chunk decoding on the fly
122 */
123static char *
124_http_fillbuf(struct cookie *c)
125{
126    char *ln;
127    unsigned int len;
128
129    if (c->eof)
130	return NULL;
131
132    if (c->encoding == ENC_NONE) {
133	c->buf = fgetln(c->real_f, &(c->b_len));
134	c->b_cur = 0;
135    } else if (c->encoding == ENC_CHUNKED) {
136	if (c->chunksize == 0) {
137	    ln = fgetln(c->real_f, &len);
138	    DEBUG(fprintf(stderr, "\033[1m_http_fillbuf(): new chunk: "
139			  "%*.*s\033[m\n", (int)len-2, (int)len-2, ln));
140	    sscanf(ln, "%x", &(c->chunksize));
141	    if (!c->chunksize) {
142		DEBUG(fprintf(stderr, "\033[1m_http_fillbuf(): "
143			      "end of last chunk\033[m\n"));
144		c->eof = 1;
145		return NULL;
146	    }
147	    DEBUG(fprintf(stderr, "\033[1m_http_fillbuf(): "
148			  "new chunk: %X\033[m\n", c->chunksize));
149	}
150	c->buf = fgetln(c->real_f, &(c->b_len));
151	if (c->b_len > c->chunksize)
152	    c->b_len = c->chunksize;
153	c->chunksize -= c->b_len;
154	c->b_cur = 0;
155    }
156    else return NULL; /* unknown encoding */
157    return c->buf;
158}
159
160/*
161 * Read function
162 */
163static int
164_http_readfn(struct cookie *c, char *buf, int len)
165{
166    int l, pos = 0;
167    while (len) {
168	/* empty buffer */
169	if (!c->buf || (c->b_cur == c->b_len))
170	    if (!_http_fillbuf(c))
171		break;
172
173	l = c->b_len - c->b_cur;
174	if (len < l) l = len;
175	memcpy(buf + pos, c->buf + c->b_cur, l);
176	c->b_cur += l;
177	pos += l;
178	len -= l;
179    }
180
181    if (ferror(c->real_f))
182	return -1;
183    else return pos;
184}
185
186/*
187 * Write function
188 */
189static int
190_http_writefn(struct cookie *c, const char *buf, int len)
191{
192    size_t r = fwrite(buf, 1, (size_t)len, c->real_f);
193    return r ? r : -1;
194}
195
196/*
197 * Close function
198 */
199static int
200_http_closefn(struct cookie *c)
201{
202    int r = fclose(c->real_f);
203    free(c);
204    return (r == EOF) ? -1 : 0;
205}
206
207/*
208 * Extract content type from cookie
209 */
210char *
211fetchContentType(FILE *f)
212{
213    /*
214     * We have no way of making sure this really *is* one of our cookies,
215     * so just check for a null pointer and hope for the best.
216     */
217    return f->_cookie ? (((struct cookie *)f->_cookie)->content_type) : NULL;
218}
219
220/*
221 * Base64 encoding
222 */
223int
224_http_base64(char *dst, char *src, int l)
225{
226    static const char base64[] =
227	"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
228	"abcdefghijklmnopqrstuvwxyz"
229	"0123456789+/";
230    int t, r = 0;
231
232    while (l >= 3) {
233	t = (src[0] << 16) | (src[1] << 8) | src[2];
234	dst[0] = base64[(t >> 18) & 0x3f];
235	dst[1] = base64[(t >> 12) & 0x3f];
236	dst[2] = base64[(t >> 6) & 0x3f];
237	dst[3] = base64[(t >> 0) & 0x3f];
238	src += 3; l -= 3;
239	dst += 4; r += 4;
240    }
241
242    switch (l) {
243    case 2:
244	t = (src[0] << 16) | (src[1] << 8);
245	dst[0] = base64[(t >> 18) & 0x3f];
246	dst[1] = base64[(t >> 12) & 0x3f];
247	dst[2] = base64[(t >> 6) & 0x3f];
248	dst[3] = '=';
249	dst += 4;
250	r += 4;
251	break;
252    case 1:
253	t = src[0] << 16;
254	dst[0] = base64[(t >> 18) & 0x3f];
255	dst[1] = base64[(t >> 12) & 0x3f];
256	dst[2] = dst[3] = '=';
257	dst += 4;
258	r += 4;
259	break;
260    case 0:
261	break;
262    }
263
264    *dst = 0;
265    return r;
266}
267
268/*
269 * Encode username and password
270 */
271char *
272_http_auth(char *usr, char *pwd)
273{
274    int len, lu, lp;
275    char *str, *s;
276
277    lu = strlen(usr);
278    lp = strlen(pwd);
279
280    len = (lu * 4 + 2) / 3	/* user name, round up */
281	+ 1			/* colon */
282	+ (lp * 4 + 2) / 3	/* password, round up */
283	+ 1;			/* null */
284
285    if ((s = str = (char *)malloc(len)) == NULL)
286	return NULL;
287
288    s += _http_base64(s, usr, lu);
289    *s++ = ':';
290    s += _http_base64(s, pwd, lp);
291    *s = 0;
292
293    return str;
294}
295
296/*
297 * Connect to server or proxy
298 */
299FILE *
300_http_connect(struct url *URL, char *flags)
301{
302    int direct, sd = -1, verbose;
303    size_t len;
304    char *px;
305    FILE *f;
306
307    direct = (flags && strchr(flags, 'd'));
308    verbose = (flags && strchr(flags, 'v'));
309
310    /* check port */
311    if (!URL->port) {
312	struct servent *se;
313
314	if (strcasecmp(URL->scheme, "ftp") == 0)
315	    if ((se = getservbyname("ftp", "tcp")) != NULL)
316		URL->port = ntohs(se->s_port);
317	    else
318		URL->port = 21;
319	else
320	    if ((se = getservbyname("http", "tcp")) != NULL)
321		URL->port = ntohs(se->s_port);
322	    else
323		URL->port = 80;
324    }
325
326    /* attempt to connect to proxy server */
327    if (!direct && (px = getenv("HTTP_PROXY")) != NULL) {
328	char host[MAXHOSTNAMELEN];
329	int port = 0;
330
331	/* measure length */
332	len = strcspn(px, ":");
333
334	/* get port (XXX atoi is a little too tolerant perhaps?) */
335	if (px[len] == ':') {
336	    if (strspn(px+len+1, "0123456789") != strlen(px+len+1)
337		|| strlen(px+len+1) > 5) {
338		/* XXX we should emit some kind of warning */
339	    }
340	    port = atoi(px+len+1);
341	    if (port < 1 || port > 65535) {
342		/* XXX we should emit some kind of warning */
343	    }
344	}
345	if (!port) {
346#if 0
347	    /*
348	     * commented out, since there is currently no service name
349	     * for HTTP proxies
350	     */
351	    struct servent *se;
352
353	    if ((se = getservbyname("xxxx", "tcp")) != NULL)
354		port = ntohs(se->s_port);
355	    else
356#endif
357		port = 3128;
358	}
359
360	/* get host name */
361	if (len >= MAXHOSTNAMELEN)
362	    len = MAXHOSTNAMELEN - 1;
363	strncpy(host, px, len);
364	host[len] = 0;
365
366	/* connect */
367	sd = _fetch_connect(host, port, verbose);
368    }
369
370    /* if no proxy is configured or could be contacted, try direct */
371    if (sd == -1) {
372	if (strcasecmp(URL->scheme, "ftp") == 0)
373	    goto ouch;
374	if ((sd = _fetch_connect(URL->host, URL->port, verbose)) == -1)
375	    goto ouch;
376    }
377
378    /* reopen as stream */
379    if ((f = fdopen(sd, "r+")) == NULL)
380	goto ouch;
381
382    return f;
383
384ouch:
385    if (sd >= 0)
386	close(sd);
387    _http_seterr(999); /* XXX do this properly RSN */
388    return NULL;
389}
390
391/*
392 * Send a HEAD or GET request
393 */
394int
395_http_request(FILE *f, char *op, struct url *URL, char *flags)
396{
397    int e, verbose;
398    char *ln, *p;
399    size_t len;
400
401    verbose = (flags && strchr(flags, 'v'));
402
403    /* send request (proxies require absolute form, so use that) */
404    if (verbose)
405	_fetch_info("requesting %s://%s:%d%s",
406		    URL->scheme, URL->host, URL->port, URL->doc);
407    _http_cmd(f, "%s %s://%s:%d%s HTTP/1.1" ENDL,
408	      op, URL->scheme, URL->host, URL->port, URL->doc);
409
410    /* start sending headers away */
411    if (URL->user[0] || URL->pwd[0]) {
412	char *auth_str = _http_auth(URL->user, URL->pwd);
413	if (!auth_str)
414	    return 999; /* XXX wrong */
415	_http_cmd(f, "Authorization: Basic %s" ENDL, auth_str);
416	free(auth_str);
417    }
418    _http_cmd(f, "Host: %s:%d" ENDL, URL->host, URL->port);
419    _http_cmd(f, "User-Agent: %s " _LIBFETCH_VER ENDL, __progname);
420    if (URL->offset)
421	_http_cmd(f, "Range: bytes=%lld-" ENDL, URL->offset);
422    _http_cmd(f, "Connection: close" ENDL ENDL);
423
424    /* get response */
425    if ((ln = fgetln(f, &len)) == NULL)
426	return 999;
427    DEBUG(fprintf(stderr, "response: [\033[1m%*.*s\033[m]\n",
428		  (int)len-2, (int)len-2, ln));
429
430    /* we can't use strchr() and friends since ln isn't NUL-terminated */
431    p = ln;
432    while ((p < ln + len) && !isspace(*p))
433	p++;
434    while ((p < ln + len) && !isdigit(*p))
435	p++;
436    if (!isdigit(*p))
437	return 999;
438
439    e = atoi(p);
440    DEBUG(fprintf(stderr, "code:     [\033[1m%d\033[m]\n", e));
441    return e;
442}
443
444/*
445 * Check a header line
446 */
447char *
448_http_match(char *str, char *hdr)
449{
450    while (*str && *hdr && tolower(*str++) == tolower(*hdr++))
451	/* nothing */;
452    if (*str || *hdr != ':')
453	return NULL;
454    while (*hdr && isspace(*++hdr))
455	/* nothing */;
456    return hdr;
457}
458
459/*
460 * Retrieve a file by HTTP
461 */
462FILE *
463fetchGetHTTP(struct url *URL, char *flags)
464{
465    int e, enc = ENC_NONE, i;
466    struct cookie *c;
467    char *ln, *p, *q;
468    FILE *f, *cf;
469    size_t len;
470    off_t pos = 0;
471
472    /* allocate cookie */
473    if ((c = calloc(1, sizeof *c)) == NULL)
474	return NULL;
475
476    /* connect */
477    if ((f = _http_connect(URL, flags)) == NULL) {
478	free(c);
479	return NULL;
480    }
481    c->real_f = f;
482
483    e = _http_request(f, "GET", URL, flags);
484
485    /* add code to handle redirects later */
486    if (e != (URL->offset ? HTTP_PARTIAL : HTTP_OK)) {
487	_http_seterr(e);
488	goto fouch;
489    }
490
491    /* browse through header */
492    while (1) {
493	if ((ln = fgetln(f, &len)) == NULL)
494	    goto fouch;
495	if ((ln[0] == '\r') || (ln[0] == '\n'))
496	    break;
497	while (isspace(ln[len-1]))
498	    --len;
499	ln[len] = '\0'; /* XXX */
500	DEBUG(fprintf(stderr, "header:	 [\033[1m%s\033[m]\n", ln));
501	if ((p = _http_match("Transfer-Encoding", ln)) != NULL) {
502	    for (q = p; *q && !isspace(*q); q++)
503		/* VOID */ ;
504	    *q = 0;
505	    if (strcasecmp(p, "chunked") == 0)
506		enc = ENC_CHUNKED;
507	    DEBUG(fprintf(stderr, "transfer encoding:  [\033[1m%s\033[m]\n", p));
508	} else if ((p = _http_match("Content-Type", ln)) != NULL) {
509	    for (i = 0; *p && i < HTTPCTYPELEN; p++, i++)
510		    c->content_type[i] = *p;
511	    do c->content_type[i--] = 0; while (isspace(c->content_type[i]));
512	    DEBUG(fprintf(stderr, "content type: [\033[1m%s\033[m]\n",
513			  c->content_type));
514	} else if ((p = _http_match("Content-Range", ln)) != NULL) {
515	    if (strncasecmp(p, "bytes ", 6) != 0)
516		goto fouch;
517	    p += 6;
518	    while (*p && isdigit(*p))
519		pos = pos * 10 + (*p++ - '0');
520	    /* XXX wouldn't hurt to be slightly more paranoid here */
521	    DEBUG(fprintf(stderr, "content range: [\033[1m%lld-\033[m]\n", pos));
522	    if (pos > URL->offset)
523		goto fouch;
524	}
525    }
526
527    /* only body remains */
528    c->encoding = enc;
529    cf = funopen(c,
530		 (int (*)(void *, char *, int))_http_readfn,
531		 (int (*)(void *, const char *, int))_http_writefn,
532		 (fpos_t (*)(void *, fpos_t, int))NULL,
533		 (int (*)(void *))_http_closefn);
534    if (cf == NULL)
535	goto fouch;
536
537    while (pos < URL->offset)
538	if (fgetc(cf) == EOF)
539	    goto cfouch;
540
541    return cf;
542
543fouch:
544    fclose(f);
545    free(c);
546    _http_seterr(999); /* XXX do this properly RSN */
547    return NULL;
548cfouch:
549    fclose(cf);
550    _http_seterr(999); /* XXX do this properly RSN */
551    return NULL;
552}
553
554FILE *
555fetchPutHTTP(struct url *URL, char *flags)
556{
557    warnx("fetchPutHTTP(): not implemented");
558    return NULL;
559}
560
561/*
562 * Get an HTTP document's metadata
563 */
564int
565fetchStatHTTP(struct url *URL, struct url_stat *us, char *flags)
566{
567    int e;
568    size_t len;
569    char *ln, *p;
570    FILE *f;
571
572    us->size = -1;
573    us->atime = us->mtime = 0;
574
575    /* connect */
576    if ((f = _http_connect(URL, flags)) == NULL)
577	return -1;
578
579    if ((e = _http_request(f, "HEAD", URL, flags)) != HTTP_OK) {
580	_http_seterr(e);
581	goto ouch;
582    }
583
584    while (1) {
585	if ((ln = fgetln(f, &len)) == NULL)
586	    goto fouch;
587	if ((ln[0] == '\r') || (ln[0] == '\n'))
588	    break;
589	while (isspace(ln[len-1]))
590	    --len;
591	ln[len] = '\0'; /* XXX */
592	DEBUG(fprintf(stderr, "header:	 [\033[1m%s\033[m]\n", ln));
593	if ((p = _http_match("Last-Modified", ln)) != NULL) {
594	    struct tm tm;
595	    char locale[64];
596
597	    strncpy(locale, setlocale(LC_TIME, NULL), sizeof locale);
598	    setlocale(LC_TIME, "C");
599	    strptime(p, "%a, %d %b %Y %H:%M:%S GMT", &tm);
600	    /* XXX should add support for date-2 and date-3 */
601	    setlocale(LC_TIME, locale);
602	    us->atime = us->mtime = timegm(&tm);
603	    DEBUG(fprintf(stderr, "last modified: [\033[1m%04d-%02d-%02d "
604			  "%02d:%02d:%02d\033[m]\n",
605			  tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday,
606			  tm.tm_hour, tm.tm_min, tm.tm_sec));
607	} else if ((p = _http_match("Content-Length", ln)) != NULL) {
608	    us->size = 0;
609	    while (*p && isdigit(*p))
610		us->size = us->size * 10 + (*p++ - '0');
611	    DEBUG(fprintf(stderr, "content length: [\033[1m%lld\033[m]\n", us->size));
612	}
613    }
614
615    fclose(f);
616    return 0;
617 ouch:
618    _http_seterr(999); /* XXX do this properly RSN */
619 fouch:
620    fclose(f);
621    return -1;
622}
623
624/*
625 * List a directory
626 */
627struct url_ent *
628fetchListHTTP(struct url *url, char *flags)
629{
630    warnx("fetchListHTTP(): not implemented");
631    return NULL;
632}
633