http.c revision 60707
1/*-
2 * Copyright (c) 1998 Dag-Erling Co�dan Sm�rgrav
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer
10 *    in this position and unchanged.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 * 3. The name of the author may not be used to endorse or promote products
15 *    derived from this software without specific prior written permission
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 *
28 * $FreeBSD: head/lib/libfetch/http.c 60707 2000-05-19 09:45:42Z des $
29 */
30
31/*
32 * The base64 code in this file is based on code from MIT fetch, which
33 * has the following copyright and license:
34 *
35 *-
36 * Copyright 1997 Massachusetts Institute of Technology
37 *
38 * Permission to use, copy, modify, and distribute this software and
39 * its documentation for any purpose and without fee is hereby
40 * granted, provided that both the above copyright notice and this
41 * permission notice appear in all copies, that both the above
42 * copyright notice and this permission notice appear in all
43 * supporting documentation, and that the name of M.I.T. not be used
44 * in advertising or publicity pertaining to distribution of the
45 * software without specific, written prior permission.	 M.I.T. makes
46 * no representations about the suitability of this software for any
47 * purpose.  It is provided "as is" without express or implied
48 * warranty.
49 *
50 * THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''.  M.I.T. DISCLAIMS
51 * ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE,
52 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
53 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT
54 * SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
55 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
56 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
57 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
58 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
59 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
60 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
61 * SUCH DAMAGE. */
62
63#include <sys/param.h>
64
65#include <err.h>
66#include <ctype.h>
67#include <locale.h>
68#include <netdb.h>
69#include <stdarg.h>
70#include <stdio.h>
71#include <stdlib.h>
72#include <string.h>
73#include <time.h>
74#include <unistd.h>
75
76#include "fetch.h"
77#include "common.h"
78#include "httperr.h"
79
80extern char *__progname;
81
82#define ENDL "\r\n"
83
84#define HTTP_OK		200
85#define HTTP_PARTIAL	206
86
87struct cookie
88{
89    FILE *real_f;
90#define ENC_NONE 0
91#define ENC_CHUNKED 1
92    int encoding;			/* 1 = chunked, 0 = none */
93#define HTTPCTYPELEN 59
94    char content_type[HTTPCTYPELEN+1];
95    char *buf;
96    int b_cur, eof;
97    unsigned b_len, chunksize;
98};
99
100/*
101 * Send a formatted line; optionally echo to terminal
102 */
103static int
104_http_cmd(FILE *f, char *fmt, ...)
105{
106    va_list ap;
107
108    va_start(ap, fmt);
109    vfprintf(f, fmt, ap);
110#ifndef NDEBUG
111    fprintf(stderr, "\033[1m>>> ");
112    vfprintf(stderr, fmt, ap);
113    fprintf(stderr, "\033[m");
114#endif
115    va_end(ap);
116
117    return 0; /* XXX */
118}
119
120/*
121 * Fill the input buffer, do chunk decoding on the fly
122 */
123static char *
124_http_fillbuf(struct cookie *c)
125{
126    char *ln;
127    unsigned int len;
128
129    if (c->eof)
130	return NULL;
131
132    if (c->encoding == ENC_NONE) {
133	c->buf = fgetln(c->real_f, &(c->b_len));
134	c->b_cur = 0;
135    } else if (c->encoding == ENC_CHUNKED) {
136	if (c->chunksize == 0) {
137	    ln = fgetln(c->real_f, &len);
138	    if (len <= 2)
139		return NULL;
140	    DEBUG(fprintf(stderr, "\033[1m_http_fillbuf(): new chunk: "
141			  "%*.*s\033[m\n", (int)len-2, (int)len-2, ln));
142	    sscanf(ln, "%x", &(c->chunksize));
143	    if (!c->chunksize) {
144		DEBUG(fprintf(stderr, "\033[1m_http_fillbuf(): "
145			      "end of last chunk\033[m\n"));
146		c->eof = 1;
147		return NULL;
148	    }
149	    DEBUG(fprintf(stderr, "\033[1m_http_fillbuf(): "
150			  "new chunk: %X\033[m\n", c->chunksize));
151	}
152	c->buf = fgetln(c->real_f, &(c->b_len));
153	if (c->b_len > c->chunksize)
154	    c->b_len = c->chunksize;
155	c->chunksize -= c->b_len;
156	c->b_cur = 0;
157    }
158    else return NULL; /* unknown encoding */
159    return c->buf;
160}
161
162/*
163 * Read function
164 */
165static int
166_http_readfn(struct cookie *c, char *buf, int len)
167{
168    int l, pos = 0;
169    while (len) {
170	/* empty buffer */
171	if (!c->buf || (c->b_cur == c->b_len))
172	    if (!_http_fillbuf(c))
173		break;
174
175	l = c->b_len - c->b_cur;
176	if (len < l) l = len;
177	memcpy(buf + pos, c->buf + c->b_cur, l);
178	c->b_cur += l;
179	pos += l;
180	len -= l;
181    }
182
183    if (ferror(c->real_f))
184	return -1;
185    else return pos;
186}
187
188/*
189 * Write function
190 */
191static int
192_http_writefn(struct cookie *c, const char *buf, int len)
193{
194    size_t r = fwrite(buf, 1, (size_t)len, c->real_f);
195    return r ? r : -1;
196}
197
198/*
199 * Close function
200 */
201static int
202_http_closefn(struct cookie *c)
203{
204    int r = fclose(c->real_f);
205    free(c);
206    return (r == EOF) ? -1 : 0;
207}
208
209/*
210 * Extract content type from cookie
211 */
212char *
213fetchContentType(FILE *f)
214{
215    /*
216     * We have no way of making sure this really *is* one of our cookies,
217     * so just check for a null pointer and hope for the best.
218     */
219    return f->_cookie ? (((struct cookie *)f->_cookie)->content_type) : NULL;
220}
221
222/*
223 * Base64 encoding
224 */
225int
226_http_base64(char *dst, char *src, int l)
227{
228    static const char base64[] =
229	"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
230	"abcdefghijklmnopqrstuvwxyz"
231	"0123456789+/";
232    int t, r = 0;
233
234    while (l >= 3) {
235	t = (src[0] << 16) | (src[1] << 8) | src[2];
236	dst[0] = base64[(t >> 18) & 0x3f];
237	dst[1] = base64[(t >> 12) & 0x3f];
238	dst[2] = base64[(t >> 6) & 0x3f];
239	dst[3] = base64[(t >> 0) & 0x3f];
240	src += 3; l -= 3;
241	dst += 4; r += 4;
242    }
243
244    switch (l) {
245    case 2:
246	t = (src[0] << 16) | (src[1] << 8);
247	dst[0] = base64[(t >> 18) & 0x3f];
248	dst[1] = base64[(t >> 12) & 0x3f];
249	dst[2] = base64[(t >> 6) & 0x3f];
250	dst[3] = '=';
251	dst += 4;
252	r += 4;
253	break;
254    case 1:
255	t = src[0] << 16;
256	dst[0] = base64[(t >> 18) & 0x3f];
257	dst[1] = base64[(t >> 12) & 0x3f];
258	dst[2] = dst[3] = '=';
259	dst += 4;
260	r += 4;
261	break;
262    case 0:
263	break;
264    }
265
266    *dst = 0;
267    return r;
268}
269
270/*
271 * Encode username and password
272 */
273char *
274_http_auth(char *usr, char *pwd)
275{
276    int len, lu, lp;
277    char *str, *s;
278
279    lu = strlen(usr);
280    lp = strlen(pwd);
281
282    len = (lu * 4 + 2) / 3	/* user name, round up */
283	+ 1			/* colon */
284	+ (lp * 4 + 2) / 3	/* password, round up */
285	+ 1;			/* null */
286
287    if ((s = str = (char *)malloc(len)) == NULL)
288	return NULL;
289
290    s += _http_base64(s, usr, lu);
291    *s++ = ':';
292    s += _http_base64(s, pwd, lp);
293    *s = 0;
294
295    return str;
296}
297
298/*
299 * Connect to server or proxy
300 */
301FILE *
302_http_connect(struct url *URL, char *flags)
303{
304    int direct, sd = -1, verbose;
305    size_t len;
306    char *px;
307    FILE *f;
308
309    direct = (flags && strchr(flags, 'd'));
310    verbose = (flags && strchr(flags, 'v'));
311
312    /* check port */
313    if (!URL->port) {
314	struct servent *se;
315
316	if (strcasecmp(URL->scheme, "ftp") == 0)
317	    if ((se = getservbyname("ftp", "tcp")) != NULL)
318		URL->port = ntohs(se->s_port);
319	    else
320		URL->port = 21;
321	else
322	    if ((se = getservbyname("http", "tcp")) != NULL)
323		URL->port = ntohs(se->s_port);
324	    else
325		URL->port = 80;
326    }
327
328    /* attempt to connect to proxy server */
329    if (!direct && (px = getenv("HTTP_PROXY")) != NULL) {
330	char host[MAXHOSTNAMELEN];
331	int port = 0;
332
333	/* measure length */
334	len = strcspn(px, ":");
335
336	/* get port (XXX atoi is a little too tolerant perhaps?) */
337	if (px[len] == ':') {
338	    if (strspn(px+len+1, "0123456789") != strlen(px+len+1)
339		|| strlen(px+len+1) > 5) {
340		/* XXX we should emit some kind of warning */
341	    }
342	    port = atoi(px+len+1);
343	    if (port < 1 || port > 65535) {
344		/* XXX we should emit some kind of warning */
345	    }
346	}
347	if (!port) {
348#if 0
349	    /*
350	     * commented out, since there is currently no service name
351	     * for HTTP proxies
352	     */
353	    struct servent *se;
354
355	    if ((se = getservbyname("xxxx", "tcp")) != NULL)
356		port = ntohs(se->s_port);
357	    else
358#endif
359		port = 3128;
360	}
361
362	/* get host name */
363	if (len >= MAXHOSTNAMELEN)
364	    len = MAXHOSTNAMELEN - 1;
365	strncpy(host, px, len);
366	host[len] = 0;
367
368	/* connect */
369	sd = _fetch_connect(host, port, verbose);
370    }
371
372    /* if no proxy is configured or could be contacted, try direct */
373    if (sd == -1) {
374	if (strcasecmp(URL->scheme, "ftp") == 0)
375	    goto ouch;
376	if ((sd = _fetch_connect(URL->host, URL->port, verbose)) == -1)
377	    goto ouch;
378    }
379
380    /* reopen as stream */
381    if ((f = fdopen(sd, "r+")) == NULL)
382	goto ouch;
383
384    return f;
385
386ouch:
387    if (sd >= 0)
388	close(sd);
389    _http_seterr(999); /* XXX do this properly RSN */
390    return NULL;
391}
392
393/*
394 * Send a HEAD or GET request
395 */
396int
397_http_request(FILE *f, char *op, struct url *URL, char *flags)
398{
399    int e, verbose;
400    char *ln, *p;
401    size_t len;
402
403    verbose = (flags && strchr(flags, 'v'));
404
405    /* send request (proxies require absolute form, so use that) */
406    if (verbose)
407	_fetch_info("requesting %s://%s:%d%s",
408		    URL->scheme, URL->host, URL->port, URL->doc);
409    _http_cmd(f, "%s %s://%s:%d%s HTTP/1.1" ENDL,
410	      op, URL->scheme, URL->host, URL->port, URL->doc);
411
412    /* start sending headers away */
413    if (URL->user[0] || URL->pwd[0]) {
414	char *auth_str = _http_auth(URL->user, URL->pwd);
415	if (!auth_str)
416	    return 999; /* XXX wrong */
417	_http_cmd(f, "Authorization: Basic %s" ENDL, auth_str);
418	free(auth_str);
419    }
420    _http_cmd(f, "Host: %s:%d" ENDL, URL->host, URL->port);
421    _http_cmd(f, "User-Agent: %s " _LIBFETCH_VER ENDL, __progname);
422    if (URL->offset)
423	_http_cmd(f, "Range: bytes=%lld-" ENDL, URL->offset);
424    _http_cmd(f, "Connection: close" ENDL ENDL);
425
426    /* get response */
427    if ((ln = fgetln(f, &len)) == NULL)
428	return 999;
429    DEBUG(fprintf(stderr, "response: [\033[1m%*.*s\033[m]\n",
430		  (int)len-2, (int)len-2, ln));
431
432    /* we can't use strchr() and friends since ln isn't NUL-terminated */
433    p = ln;
434    while ((p < ln + len) && !isspace(*p))
435	p++;
436    while ((p < ln + len) && !isdigit(*p))
437	p++;
438    if (!isdigit(*p))
439	return 999;
440
441    e = atoi(p);
442    DEBUG(fprintf(stderr, "code:     [\033[1m%d\033[m]\n", e));
443    return e;
444}
445
446/*
447 * Check a header line
448 */
449char *
450_http_match(char *str, char *hdr)
451{
452    while (*str && *hdr && tolower(*str++) == tolower(*hdr++))
453	/* nothing */;
454    if (*str || *hdr != ':')
455	return NULL;
456    while (*hdr && isspace(*++hdr))
457	/* nothing */;
458    return hdr;
459}
460
461/*
462 * Retrieve a file by HTTP
463 */
464FILE *
465fetchGetHTTP(struct url *URL, char *flags)
466{
467    int e, enc = ENC_NONE, i;
468    struct cookie *c;
469    char *ln, *p, *q;
470    FILE *f, *cf;
471    size_t len;
472    off_t pos = 0;
473
474    /* allocate cookie */
475    if ((c = calloc(1, sizeof *c)) == NULL)
476	return NULL;
477
478    /* connect */
479    if ((f = _http_connect(URL, flags)) == NULL) {
480	free(c);
481	return NULL;
482    }
483    c->real_f = f;
484
485    e = _http_request(f, "GET", URL, flags);
486
487    /* add code to handle redirects later */
488    if (e != (URL->offset ? HTTP_PARTIAL : HTTP_OK)) {
489	_http_seterr(e);
490	goto fouch;
491    }
492
493    /* browse through header */
494    while (1) {
495	if ((ln = fgetln(f, &len)) == NULL)
496	    goto fouch;
497	if ((ln[0] == '\r') || (ln[0] == '\n'))
498	    break;
499	while (isspace(ln[len-1]))
500	    --len;
501	ln[len] = '\0'; /* XXX */
502	DEBUG(fprintf(stderr, "header:	 [\033[1m%s\033[m]\n", ln));
503	if ((p = _http_match("Transfer-Encoding", ln)) != NULL) {
504	    for (q = p; *q && !isspace(*q); q++)
505		/* VOID */ ;
506	    *q = 0;
507	    if (strcasecmp(p, "chunked") == 0)
508		enc = ENC_CHUNKED;
509	    DEBUG(fprintf(stderr, "transfer encoding:  [\033[1m%s\033[m]\n", p));
510	} else if ((p = _http_match("Content-Type", ln)) != NULL) {
511	    for (i = 0; *p && i < HTTPCTYPELEN; p++, i++)
512		    c->content_type[i] = *p;
513	    do c->content_type[i--] = 0; while (isspace(c->content_type[i]));
514	    DEBUG(fprintf(stderr, "content type: [\033[1m%s\033[m]\n",
515			  c->content_type));
516	} else if ((p = _http_match("Content-Range", ln)) != NULL) {
517	    if (strncasecmp(p, "bytes ", 6) != 0)
518		goto fouch;
519	    p += 6;
520	    while (*p && isdigit(*p))
521		pos = pos * 10 + (*p++ - '0');
522	    /* XXX wouldn't hurt to be slightly more paranoid here */
523	    DEBUG(fprintf(stderr, "content range: [\033[1m%lld-\033[m]\n", pos));
524	    if (pos > URL->offset)
525		goto fouch;
526	}
527    }
528
529    /* only body remains */
530    c->encoding = enc;
531    cf = funopen(c,
532		 (int (*)(void *, char *, int))_http_readfn,
533		 (int (*)(void *, const char *, int))_http_writefn,
534		 (fpos_t (*)(void *, fpos_t, int))NULL,
535		 (int (*)(void *))_http_closefn);
536    if (cf == NULL)
537	goto fouch;
538
539    while (pos < URL->offset)
540	if (fgetc(cf) == EOF)
541	    goto cfouch;
542
543    return cf;
544
545fouch:
546    fclose(f);
547    free(c);
548    _http_seterr(999); /* XXX do this properly RSN */
549    return NULL;
550cfouch:
551    fclose(cf);
552    _http_seterr(999); /* XXX do this properly RSN */
553    return NULL;
554}
555
556FILE *
557fetchPutHTTP(struct url *URL, char *flags)
558{
559    warnx("fetchPutHTTP(): not implemented");
560    return NULL;
561}
562
563/*
564 * Get an HTTP document's metadata
565 */
566int
567fetchStatHTTP(struct url *URL, struct url_stat *us, char *flags)
568{
569    int e;
570    size_t len;
571    char *ln, *p;
572    FILE *f;
573
574    us->size = -1;
575    us->atime = us->mtime = 0;
576
577    /* connect */
578    if ((f = _http_connect(URL, flags)) == NULL)
579	return -1;
580
581    if ((e = _http_request(f, "HEAD", URL, flags)) != HTTP_OK) {
582	_http_seterr(e);
583	goto ouch;
584    }
585
586    while (1) {
587	if ((ln = fgetln(f, &len)) == NULL)
588	    goto fouch;
589	if ((ln[0] == '\r') || (ln[0] == '\n'))
590	    break;
591	while (isspace(ln[len-1]))
592	    --len;
593	ln[len] = '\0'; /* XXX */
594	DEBUG(fprintf(stderr, "header:	 [\033[1m%s\033[m]\n", ln));
595	if ((p = _http_match("Last-Modified", ln)) != NULL) {
596	    struct tm tm;
597	    char locale[64];
598
599	    strncpy(locale, setlocale(LC_TIME, NULL), sizeof locale);
600	    setlocale(LC_TIME, "C");
601	    strptime(p, "%a, %d %b %Y %H:%M:%S GMT", &tm);
602	    /* XXX should add support for date-2 and date-3 */
603	    setlocale(LC_TIME, locale);
604	    us->atime = us->mtime = timegm(&tm);
605	    DEBUG(fprintf(stderr, "last modified: [\033[1m%04d-%02d-%02d "
606			  "%02d:%02d:%02d\033[m]\n",
607			  tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday,
608			  tm.tm_hour, tm.tm_min, tm.tm_sec));
609	} else if ((p = _http_match("Content-Length", ln)) != NULL) {
610	    us->size = 0;
611	    while (*p && isdigit(*p))
612		us->size = us->size * 10 + (*p++ - '0');
613	    DEBUG(fprintf(stderr, "content length: [\033[1m%lld\033[m]\n", us->size));
614	}
615    }
616
617    fclose(f);
618    return 0;
619 ouch:
620    _http_seterr(999); /* XXX do this properly RSN */
621 fouch:
622    fclose(f);
623    return -1;
624}
625
626/*
627 * List a directory
628 */
629struct url_ent *
630fetchListHTTP(struct url *url, char *flags)
631{
632    warnx("fetchListHTTP(): not implemented");
633    return NULL;
634}
635