http.c revision 88771
1/*-
2 * Copyright (c) 2000 Dag-Erling Co�dan Sm�rgrav
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer
10 *    in this position and unchanged.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 * 3. The name of the author may not be used to endorse or promote products
15 *    derived from this software without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29#include <sys/cdefs.h>
30__FBSDID("$FreeBSD: head/lib/libfetch/http.c 88771 2002-01-01 16:25:29Z des $");
31
32/*
33 * The following copyright applies to the base64 code:
34 *
35 *-
36 * Copyright 1997 Massachusetts Institute of Technology
37 *
38 * Permission to use, copy, modify, and distribute this software and
39 * its documentation for any purpose and without fee is hereby
40 * granted, provided that both the above copyright notice and this
41 * permission notice appear in all copies, that both the above
42 * copyright notice and this permission notice appear in all
43 * supporting documentation, and that the name of M.I.T. not be used
44 * in advertising or publicity pertaining to distribution of the
45 * software without specific, written prior permission.  M.I.T. makes
46 * no representations about the suitability of this software for any
47 * purpose.  It is provided "as is" without express or implied
48 * warranty.
49 *
50 * THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''.  M.I.T. DISCLAIMS
51 * ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE,
52 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
53 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT
54 * SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
55 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
56 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
57 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
58 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
59 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
60 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
61 * SUCH DAMAGE.
62 */
63
64#include <sys/param.h>
65#include <sys/socket.h>
66
67#include <ctype.h>
68#include <err.h>
69#include <errno.h>
70#include <locale.h>
71#include <netdb.h>
72#include <stdarg.h>
73#include <stdio.h>
74#include <stdlib.h>
75#include <string.h>
76#include <time.h>
77#include <unistd.h>
78
79#include "fetch.h"
80#include "common.h"
81#include "httperr.h"
82
83extern char *__progname; /* XXX not portable */
84
85/* Maximum number of redirects to follow */
86#define MAX_REDIRECT 5
87
88/* Symbolic names for reply codes we care about */
89#define HTTP_OK			200
90#define HTTP_PARTIAL		206
91#define HTTP_MOVED_PERM		301
92#define HTTP_MOVED_TEMP		302
93#define HTTP_SEE_OTHER		303
94#define HTTP_NEED_AUTH		401
95#define HTTP_NEED_PROXY_AUTH	407
96#define HTTP_PROTOCOL_ERROR	999
97
98#define HTTP_REDIRECT(xyz) ((xyz) == HTTP_MOVED_PERM \
99                            || (xyz) == HTTP_MOVED_TEMP \
100                            || (xyz) == HTTP_SEE_OTHER)
101
102#define HTTP_ERROR(xyz) ((xyz) > 400 && (xyz) < 599)
103
104
105/*****************************************************************************
106 * I/O functions for decoding chunked streams
107 */
108
109struct cookie
110{
111    int		 fd;
112    char	*buf;
113    size_t	 b_size;
114    ssize_t	 b_len;
115    int		 b_pos;
116    int		 eof;
117    int		 error;
118    size_t	 chunksize;
119#ifndef NDEBUG
120    size_t	 total;
121#endif
122};
123
124/*
125 * Get next chunk header
126 */
127static int
128_http_new_chunk(struct cookie *c)
129{
130    char *p;
131
132    if (_fetch_getln(c->fd, &c->buf, &c->b_size, &c->b_len) == -1)
133	return -1;
134
135    if (c->b_len < 2 || !ishexnumber(*c->buf))
136	return -1;
137
138    for (p = c->buf; !isspace(*p) && *p != ';' && p < c->buf + c->b_len; ++p)
139	if (!ishexnumber(*p))
140	    return -1;
141	else if (isdigit(*p))
142	    c->chunksize = c->chunksize * 16 + *p - '0';
143	else
144	    c->chunksize = c->chunksize * 16 + 10 + tolower(*p) - 'a';
145
146#ifndef NDEBUG
147    if (fetchDebug) {
148	c->total += c->chunksize;
149	if (c->chunksize == 0)
150	    fprintf(stderr, "_http_fillbuf(): "
151		    "end of last chunk\n");
152	else
153	    fprintf(stderr, "_http_fillbuf(): "
154		    "new chunk: %lu (%lu)\n",
155		    (unsigned long)c->chunksize, (unsigned long)c->total);
156    }
157#endif
158
159    return c->chunksize;
160}
161
162/*
163 * Fill the input buffer, do chunk decoding on the fly
164 */
165static int
166_http_fillbuf(struct cookie *c)
167{
168    if (c->error)
169	return -1;
170    if (c->eof)
171	return 0;
172
173    if (c->chunksize == 0) {
174	switch (_http_new_chunk(c)) {
175	case -1:
176	    c->error = 1;
177	    return -1;
178	case 0:
179	    c->eof = 1;
180	    return 0;
181	}
182    }
183
184    if (c->b_size < c->chunksize) {
185	char *tmp;
186
187	if ((tmp = realloc(c->buf, c->chunksize)) == NULL)
188	    return -1;
189	c->buf = tmp;
190	c->b_size = c->chunksize;
191    }
192
193    if ((c->b_len = read(c->fd, c->buf, c->chunksize)) == -1)
194	return -1;
195    c->chunksize -= c->b_len;
196
197    if (c->chunksize == 0) {
198	char endl[2];
199	read(c->fd, endl, 2);
200    }
201
202    c->b_pos = 0;
203
204    return c->b_len;
205}
206
207/*
208 * Read function
209 */
210static int
211_http_readfn(void *v, char *buf, int len)
212{
213    struct cookie *c = (struct cookie *)v;
214    int l, pos;
215
216    if (c->error)
217	return -1;
218    if (c->eof)
219	return 0;
220
221    for (pos = 0; len > 0; pos += l, len -= l) {
222	/* empty buffer */
223	if (!c->buf || c->b_pos == c->b_len)
224	    if (_http_fillbuf(c) < 1)
225		break;
226	l = c->b_len - c->b_pos;
227	if (len < l)
228	    l = len;
229	bcopy(c->buf + c->b_pos, buf + pos, l);
230	c->b_pos += l;
231    }
232
233    if (!pos && c->error)
234	return -1;
235    return pos;
236}
237
238/*
239 * Write function
240 */
241static int
242_http_writefn(void *v, const char *buf, int len)
243{
244    struct cookie *c = (struct cookie *)v;
245
246    return write(c->fd, buf, len);
247}
248
249/*
250 * Close function
251 */
252static int
253_http_closefn(void *v)
254{
255    struct cookie *c = (struct cookie *)v;
256    int r;
257
258    r = close(c->fd);
259    if (c->buf)
260	free(c->buf);
261    free(c);
262    return r;
263}
264
265/*
266 * Wrap a file descriptor up
267 */
268static FILE *
269_http_funopen(int fd)
270{
271    struct cookie *c;
272    FILE *f;
273
274    if ((c = calloc(1, sizeof *c)) == NULL) {
275	_fetch_syserr();
276	return NULL;
277    }
278    c->fd = fd;
279    if (!(f = funopen(c, _http_readfn, _http_writefn, NULL, _http_closefn))) {
280	_fetch_syserr();
281	free(c);
282	return NULL;
283    }
284    return f;
285}
286
287
288/*****************************************************************************
289 * Helper functions for talking to the server and parsing its replies
290 */
291
292/* Header types */
293typedef enum {
294    hdr_syserror = -2,
295    hdr_error = -1,
296    hdr_end = 0,
297    hdr_unknown = 1,
298    hdr_content_length,
299    hdr_content_range,
300    hdr_last_modified,
301    hdr_location,
302    hdr_transfer_encoding,
303    hdr_www_authenticate
304} hdr_t;
305
306/* Names of interesting headers */
307static struct {
308    hdr_t	 num;
309    const char	*name;
310} hdr_names[] = {
311    { hdr_content_length,	"Content-Length" },
312    { hdr_content_range,	"Content-Range" },
313    { hdr_last_modified,	"Last-Modified" },
314    { hdr_location,		"Location" },
315    { hdr_transfer_encoding,	"Transfer-Encoding" },
316    { hdr_www_authenticate,	"WWW-Authenticate" },
317    { hdr_unknown,		NULL },
318};
319
320static char	*reply_buf;
321static size_t	 reply_size;
322static size_t	 reply_length;
323
324/*
325 * Send a formatted line; optionally echo to terminal
326 */
327static int
328_http_cmd(int fd, const char *fmt, ...)
329{
330    va_list ap;
331    size_t len;
332    char *msg;
333    int r;
334
335    va_start(ap, fmt);
336    len = vasprintf(&msg, fmt, ap);
337    va_end(ap);
338
339    if (msg == NULL) {
340	errno = ENOMEM;
341	_fetch_syserr();
342	return -1;
343    }
344
345    r = _fetch_putln(fd, msg, len);
346    free(msg);
347
348    if (r == -1) {
349	_fetch_syserr();
350	return -1;
351    }
352
353    return 0;
354}
355
356/*
357 * Get and parse status line
358 */
359static int
360_http_get_reply(int fd)
361{
362    char *p;
363
364    if (_fetch_getln(fd, &reply_buf, &reply_size, &reply_length) == -1)
365	return -1;
366    /*
367     * A valid status line looks like "HTTP/m.n xyz reason" where m
368     * and n are the major and minor protocol version numbers and xyz
369     * is the reply code.
370     * Unfortunately, there are servers out there (NCSA 1.5.1, to name
371     * just one) that do not send a version number, so we can't rely
372     * on finding one, but if we do, insist on it being 1.0 or 1.1.
373     * We don't care about the reason phrase.
374     */
375    if (strncmp(reply_buf, "HTTP", 4) != 0)
376	return HTTP_PROTOCOL_ERROR;
377    p = reply_buf + 4;
378    if (*p == '/') {
379	if (p[1] != '1' || p[2] != '.' || (p[3] != '0' && p[3] != '1'))
380	    return HTTP_PROTOCOL_ERROR;
381	p += 4;
382    }
383    if (*p != ' '
384	|| !isdigit(p[1])
385	|| !isdigit(p[2])
386	|| !isdigit(p[3]))
387	return HTTP_PROTOCOL_ERROR;
388
389    return ((p[1] - '0') * 100 + (p[2] - '0') * 10 + (p[3] - '0'));
390}
391
392/*
393 * Check a header; if the type matches the given string, return a
394 * pointer to the beginning of the value.
395 */
396static const char *
397_http_match(const char *str, const char *hdr)
398{
399    while (*str && *hdr && tolower(*str++) == tolower(*hdr++))
400	/* nothing */;
401    if (*str || *hdr != ':')
402	return NULL;
403    while (*hdr && isspace(*++hdr))
404	/* nothing */;
405    return hdr;
406}
407
408/*
409 * Get the next header and return the appropriate symbolic code.
410 */
411static hdr_t
412_http_next_header(int fd, const char **p)
413{
414    int i;
415
416    if (_fetch_getln(fd, &reply_buf, &reply_size, &reply_length) == -1)
417	return hdr_syserror;
418    while (reply_length && isspace(reply_buf[reply_length-1]))
419	reply_length--;
420    reply_buf[reply_length] = 0;
421    if (reply_length == 0)
422	return hdr_end;
423    /*
424     * We could check for malformed headers but we don't really care.
425     * A valid header starts with a token immediately followed by a
426     * colon; a token is any sequence of non-control, non-whitespace
427     * characters except "()<>@,;:\\\"{}".
428     */
429    for (i = 0; hdr_names[i].num != hdr_unknown; i++)
430	if ((*p = _http_match(hdr_names[i].name, reply_buf)) != NULL)
431	    return hdr_names[i].num;
432    return hdr_unknown;
433}
434
435/*
436 * Parse a last-modified header
437 */
438static int
439_http_parse_mtime(const char *p, time_t *mtime)
440{
441    char locale[64], *r;
442    struct tm tm;
443
444    strncpy(locale, setlocale(LC_TIME, NULL), sizeof locale);
445    setlocale(LC_TIME, "C");
446    r = strptime(p, "%a, %d %b %Y %H:%M:%S GMT", &tm);
447    /* XXX should add support for date-2 and date-3 */
448    setlocale(LC_TIME, locale);
449    if (r == NULL)
450	return -1;
451    DEBUG(fprintf(stderr, "last modified: [%04d-%02d-%02d "
452		  "%02d:%02d:%02d]\n",
453		  tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday,
454		  tm.tm_hour, tm.tm_min, tm.tm_sec));
455    *mtime = timegm(&tm);
456    return 0;
457}
458
459/*
460 * Parse a content-length header
461 */
462static int
463_http_parse_length(const char *p, off_t *length)
464{
465    off_t len;
466
467    for (len = 0; *p && isdigit(*p); ++p)
468	len = len * 10 + (*p - '0');
469    if (*p)
470	return -1;
471    DEBUG(fprintf(stderr, "content length: [%lld]\n",
472		  (long long)len));
473    *length = len;
474    return 0;
475}
476
477/*
478 * Parse a content-range header
479 */
480static int
481_http_parse_range(const char *p, off_t *offset, off_t *length, off_t *size)
482{
483    off_t first, last, len;
484
485    if (strncasecmp(p, "bytes ", 6) != 0)
486	return -1;
487    for (first = 0, p += 6; *p && isdigit(*p); ++p)
488	first = first * 10 + *p - '0';
489    if (*p != '-')
490	return -1;
491    for (last = 0, ++p; *p && isdigit(*p); ++p)
492	last = last * 10 + *p - '0';
493    if (first > last || *p != '/')
494	return -1;
495    for (len = 0, ++p; *p && isdigit(*p); ++p)
496	len = len * 10 + *p - '0';
497    if (*p || len < last - first + 1)
498	return -1;
499    DEBUG(fprintf(stderr, "content range: [%lld-%lld/%lld]\n",
500		  (long long)first, (long long)last, (long long)len));
501    *offset = first;
502    *length = last - first + 1;
503    *size = len;
504    return 0;
505}
506
507
508/*****************************************************************************
509 * Helper functions for authorization
510 */
511
512/*
513 * Base64 encoding
514 */
515static char *
516_http_base64(char *src)
517{
518    static const char base64[] =
519	"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
520	"abcdefghijklmnopqrstuvwxyz"
521	"0123456789+/";
522    char *str, *dst;
523    size_t l;
524    int t, r;
525
526    l = strlen(src);
527    if ((str = malloc(((l + 2) / 3) * 4)) == NULL)
528	return NULL;
529    dst = str;
530    r = 0;
531
532    while (l >= 3) {
533	t = (src[0] << 16) | (src[1] << 8) | src[2];
534	dst[0] = base64[(t >> 18) & 0x3f];
535	dst[1] = base64[(t >> 12) & 0x3f];
536	dst[2] = base64[(t >> 6) & 0x3f];
537	dst[3] = base64[(t >> 0) & 0x3f];
538	src += 3; l -= 3;
539	dst += 4; r += 4;
540    }
541
542    switch (l) {
543    case 2:
544	t = (src[0] << 16) | (src[1] << 8);
545	dst[0] = base64[(t >> 18) & 0x3f];
546	dst[1] = base64[(t >> 12) & 0x3f];
547	dst[2] = base64[(t >> 6) & 0x3f];
548	dst[3] = '=';
549	dst += 4;
550	r += 4;
551	break;
552    case 1:
553	t = src[0] << 16;
554	dst[0] = base64[(t >> 18) & 0x3f];
555	dst[1] = base64[(t >> 12) & 0x3f];
556	dst[2] = dst[3] = '=';
557	dst += 4;
558	r += 4;
559	break;
560    case 0:
561	break;
562    }
563
564    *dst = 0;
565    return str;
566}
567
568/*
569 * Encode username and password
570 */
571static int
572_http_basic_auth(int fd, const char *hdr, const char *usr, const char *pwd)
573{
574    char *upw, *auth;
575    int r;
576
577    DEBUG(fprintf(stderr, "usr: [%s]\n", usr));
578    DEBUG(fprintf(stderr, "pwd: [%s]\n", pwd));
579    if (asprintf(&upw, "%s:%s", usr, pwd) == -1)
580	return -1;
581    auth = _http_base64(upw);
582    free(upw);
583    if (auth == NULL)
584	return -1;
585    r = _http_cmd(fd, "%s: Basic %s", hdr, auth);
586    free(auth);
587    return r;
588}
589
590/*
591 * Send an authorization header
592 */
593static int
594_http_authorize(int fd, const char *hdr, const char *p)
595{
596    /* basic authorization */
597    if (strncasecmp(p, "basic:", 6) == 0) {
598	char *user, *pwd, *str;
599	int r;
600
601	/* skip realm */
602	for (p += 6; *p && *p != ':'; ++p)
603	    /* nothing */ ;
604	if (!*p || strchr(++p, ':') == NULL)
605	    return -1;
606	if ((str = strdup(p)) == NULL)
607	    return -1; /* XXX */
608	user = str;
609	pwd = strchr(str, ':');
610	*pwd++ = '\0';
611	r = _http_basic_auth(fd, hdr, user, pwd);
612	free(str);
613	return r;
614    }
615    return -1;
616}
617
618
619/*****************************************************************************
620 * Helper functions for connecting to a server or proxy
621 */
622
623/*
624 * Connect to the correct HTTP server or proxy.
625 */
626static int
627_http_connect(struct url *URL, struct url *purl, const char *flags)
628{
629    int verbose;
630    int af, fd;
631
632#ifdef INET6
633    af = AF_UNSPEC;
634#else
635    af = AF_INET;
636#endif
637
638    verbose = CHECK_FLAG('v');
639    if (CHECK_FLAG('4'))
640	af = AF_INET;
641#ifdef INET6
642    else if (CHECK_FLAG('6'))
643	af = AF_INET6;
644#endif
645
646    if (purl) {
647	URL = purl;
648    } else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0) {
649	/* can't talk http to an ftp server */
650	/* XXX should set an error code */
651	return -1;
652    }
653
654    if ((fd = _fetch_connect(URL->host, URL->port, af, verbose)) == -1)
655	/* _fetch_connect() has already set an error code */
656	return -1;
657    return fd;
658}
659
660static struct url *
661_http_get_proxy(void)
662{
663    struct url *purl;
664    char *p;
665
666    if (((p = getenv("HTTP_PROXY")) || (p = getenv("http_proxy"))) &&
667	(purl = fetchParseURL(p))) {
668	if (!*purl->scheme)
669	    strcpy(purl->scheme, SCHEME_HTTP);
670	if (!purl->port)
671	    purl->port = _fetch_default_proxy_port(purl->scheme);
672	if (strcasecmp(purl->scheme, SCHEME_HTTP) == 0)
673	    return purl;
674	fetchFreeURL(purl);
675    }
676    return NULL;
677}
678
679static void
680_http_print_html(FILE *out, FILE *in)
681{
682    size_t len;
683    char *line, *p, *q;
684    int comment, tag;
685
686    comment = tag = 0;
687    while ((line = fgetln(in, &len)) != NULL) {
688	while (len && isspace(line[len - 1]))
689	    --len;
690	for (p = q = line; q < line + len; ++q) {
691	    if (comment && *q == '-') {
692		if (q + 2 < line + len && strcmp(q, "-->") == 0) {
693		    tag = comment = 0;
694		    q += 2;
695		}
696	    } if (tag && !comment && *q == '>') {
697		p = q + 1;
698		tag = 0;
699	    } else if (!tag && *q == '<') {
700		if (q > p)
701		    fwrite(p, q - p, 1, out);
702		tag = 1;
703		if (q + 3 < line + len && strcmp(q, "<!--") == 0) {
704		    comment = 1;
705		    q += 3;
706		}
707	    }
708	}
709	if (!tag && q > p)
710	    fwrite(p, q - p, 1, out);
711	fputc('\n', out);
712    }
713}
714
715
716/*****************************************************************************
717 * Core
718 */
719
720/*
721 * Send a request and process the reply
722 */
723FILE *
724_http_request(struct url *URL, const char *op, struct url_stat *us,
725	      struct url *purl, const char *flags)
726{
727    struct url *url, *new;
728    int chunked, direct, need_auth, noredirect, verbose;
729    int code, fd, i, n;
730    off_t offset, clength, length, size;
731    time_t mtime;
732    const char *p;
733    FILE *f;
734    hdr_t h;
735    char *host;
736#ifdef INET6
737    char hbuf[MAXHOSTNAMELEN + 1];
738#endif
739
740    direct = CHECK_FLAG('d');
741    noredirect = CHECK_FLAG('A');
742    verbose = CHECK_FLAG('v');
743
744    if (direct && purl) {
745	fetchFreeURL(purl);
746	purl = NULL;
747    }
748
749    /* try the provided URL first */
750    url = URL;
751
752    /* if the A flag is set, we only get one try */
753    n = noredirect ? 1 : MAX_REDIRECT;
754    i = 0;
755
756    need_auth = 0;
757    do {
758	new = NULL;
759	chunked = 0;
760	offset = 0;
761	clength = -1;
762	length = -1;
763	size = -1;
764	mtime = 0;
765
766	/* check port */
767	if (!url->port)
768	    url->port = _fetch_default_port(url->scheme);
769
770	/* were we redirected to an FTP URL? */
771	if (purl == NULL && strcmp(url->scheme, SCHEME_FTP) == 0) {
772	    if (strcmp(op, "GET") == 0)
773		return _ftp_request(url, "RETR", us, purl, flags);
774	    else if (strcmp(op, "HEAD") == 0)
775		return _ftp_request(url, "STAT", us, purl, flags);
776	}
777
778	/* connect to server or proxy */
779	if ((fd = _http_connect(url, purl, flags)) == -1)
780	    goto ouch;
781
782	host = url->host;
783#ifdef INET6
784	if (strchr(url->host, ':')) {
785	    snprintf(hbuf, sizeof(hbuf), "[%s]", url->host);
786	    host = hbuf;
787	}
788#endif
789
790	/* send request */
791	if (verbose)
792	    _fetch_info("requesting %s://%s:%d%s",
793			url->scheme, host, url->port, url->doc);
794	if (purl) {
795	    _http_cmd(fd, "%s %s://%s:%d%s HTTP/1.1",
796		      op, url->scheme, host, url->port, url->doc);
797	} else {
798	    _http_cmd(fd, "%s %s HTTP/1.1",
799		      op, url->doc);
800	}
801
802	/* virtual host */
803	if (url->port == _fetch_default_port(url->scheme))
804	    _http_cmd(fd, "Host: %s", host);
805	else
806	    _http_cmd(fd, "Host: %s:%d", host, url->port);
807
808	/* proxy authorization */
809	if (purl) {
810	    if (*purl->user || *purl->pwd)
811		_http_basic_auth(fd, "Proxy-Authorization",
812				 purl->user, purl->pwd);
813	    else if ((p = getenv("HTTP_PROXY_AUTH")) != NULL && *p != '\0')
814		_http_authorize(fd, "Proxy-Authorization", p);
815	}
816
817	/* server authorization */
818	if (need_auth || *url->user || *url->pwd) {
819	    if (*url->user || *url->pwd)
820		_http_basic_auth(fd, "Authorization", url->user, url->pwd);
821	    else if ((p = getenv("HTTP_AUTH")) != NULL && *p != '\0')
822		_http_authorize(fd, "Authorization", p);
823	    else if (fetchAuthMethod && fetchAuthMethod(url) == 0) {
824		_http_basic_auth(fd, "Authorization", url->user, url->pwd);
825	    } else {
826		_http_seterr(HTTP_NEED_AUTH);
827		goto ouch;
828	    }
829	}
830
831	/* other headers */
832	if ((p = getenv("HTTP_USER_AGENT")) != NULL && *p != '\0')
833	    _http_cmd(fd, "User-Agent: %s", p);
834	else
835	    _http_cmd(fd, "User-Agent: %s " _LIBFETCH_VER, __progname);
836	if (url->offset)
837	    _http_cmd(fd, "Range: bytes=%lld-", (long long)url->offset);
838	_http_cmd(fd, "Connection: close");
839	_http_cmd(fd, "");
840
841	/* get reply */
842	switch ((code = _http_get_reply(fd))) {
843	case HTTP_OK:
844	case HTTP_PARTIAL:
845	    /* fine */
846	    break;
847	case HTTP_MOVED_PERM:
848	case HTTP_MOVED_TEMP:
849	case HTTP_SEE_OTHER:
850	    /*
851	     * Not so fine, but we still have to read the headers to
852	     * get the new location.
853	     */
854	    break;
855	case HTTP_NEED_AUTH:
856	    if (need_auth) {
857		/*
858		 * We already sent out authorization code, so there's
859		 * nothing more we can do.
860		 */
861		_http_seterr(code);
862		goto ouch;
863	    }
864	    /* try again, but send the password this time */
865	    if (verbose)
866		_fetch_info("server requires authorization");
867	    break;
868	case HTTP_NEED_PROXY_AUTH:
869	    /*
870	     * If we're talking to a proxy, we already sent our proxy
871	     * authorization code, so there's nothing more we can do.
872	     */
873	    _http_seterr(code);
874	    goto ouch;
875	case HTTP_PROTOCOL_ERROR:
876	    /* fall through */
877	case -1:
878	    _fetch_syserr();
879	    goto ouch;
880	default:
881	    _http_seterr(code);
882	    if (!verbose)
883		goto ouch;
884	    /* fall through so we can get the full error message */
885	}
886
887	/* get headers */
888	do {
889	    switch ((h = _http_next_header(fd, &p))) {
890	    case hdr_syserror:
891		_fetch_syserr();
892		goto ouch;
893	    case hdr_error:
894		_http_seterr(HTTP_PROTOCOL_ERROR);
895		goto ouch;
896	    case hdr_content_length:
897		_http_parse_length(p, &clength);
898		break;
899	    case hdr_content_range:
900		_http_parse_range(p, &offset, &length, &size);
901		break;
902	    case hdr_last_modified:
903		_http_parse_mtime(p, &mtime);
904		break;
905	    case hdr_location:
906		if (!HTTP_REDIRECT(code))
907		    break;
908		if (new)
909		    free(new);
910		if (verbose)
911		    _fetch_info("%d redirect to %s", code, p);
912		if (*p == '/')
913		    /* absolute path */
914		    new = fetchMakeURL(url->scheme, url->host, url->port, p,
915				       url->user, url->pwd);
916		else
917		    new = fetchParseURL(p);
918		if (new == NULL) {
919		    /* XXX should set an error code */
920		    DEBUG(fprintf(stderr, "failed to parse new URL\n"));
921		    goto ouch;
922		}
923		if (!*new->user && !*new->pwd) {
924		    strcpy(new->user, url->user);
925		    strcpy(new->pwd, url->pwd);
926		}
927		new->offset = url->offset;
928		new->length = url->length;
929		break;
930	    case hdr_transfer_encoding:
931		/* XXX weak test*/
932		chunked = (strcasecmp(p, "chunked") == 0);
933		break;
934	    case hdr_www_authenticate:
935		if (code != HTTP_NEED_AUTH)
936		    break;
937		/* if we were smarter, we'd check the method and realm */
938		break;
939	    case hdr_end:
940		/* fall through */
941	    case hdr_unknown:
942		/* ignore */
943		break;
944	    }
945	} while (h > hdr_end);
946
947	/* we have a hit or an error */
948	if (code == HTTP_OK || code == HTTP_PARTIAL || HTTP_ERROR(code))
949	    break;
950
951	/* we need to provide authentication */
952	if (code == HTTP_NEED_AUTH) {
953	    need_auth = 1;
954	    close(fd);
955	    fd = -1;
956	    continue;
957	}
958
959	/* all other cases: we got a redirect */
960	need_auth = 0;
961	close(fd);
962	fd = -1;
963	if (!new) {
964	    DEBUG(fprintf(stderr, "redirect with no new location\n"));
965	    break;
966	}
967	if (url != URL)
968	    fetchFreeURL(url);
969	url = new;
970    } while (++i < n);
971
972    /* we failed, or ran out of retries */
973    if (fd == -1) {
974	_http_seterr(code);
975	goto ouch;
976    }
977
978    DEBUG(fprintf(stderr, "offset %lld, length %lld,"
979		  " size %lld, clength %lld\n",
980		  (long long)offset, (long long)length,
981		  (long long)size, (long long)clength));
982
983    /* check for inconsistencies */
984    if (clength != -1 && length != -1 && clength != length) {
985	_http_seterr(HTTP_PROTOCOL_ERROR);
986	goto ouch;
987    }
988    if (clength == -1)
989	clength = length;
990    if (clength != -1)
991	length = offset + clength;
992    if (length != -1 && size != -1 && length != size) {
993	_http_seterr(HTTP_PROTOCOL_ERROR);
994	goto ouch;
995    }
996    if (size == -1)
997	size = length;
998
999    /* fill in stats */
1000    if (us) {
1001	us->size = size;
1002	us->atime = us->mtime = mtime;
1003    }
1004
1005    /* too far? */
1006    if (offset > URL->offset) {
1007	_http_seterr(HTTP_PROTOCOL_ERROR);
1008	goto ouch;
1009    }
1010
1011    /* report back real offset and size */
1012    URL->offset = offset;
1013    URL->length = clength;
1014
1015    /* wrap it up in a FILE */
1016    if ((f = chunked ? _http_funopen(fd) : fdopen(fd, "r")) == NULL) {
1017	_fetch_syserr();
1018	goto ouch;
1019    }
1020
1021    if (url != URL)
1022	fetchFreeURL(url);
1023    if (purl)
1024	fetchFreeURL(purl);
1025
1026    if (HTTP_ERROR(code)) {
1027	_http_print_html(stderr, f);
1028	fclose(f);
1029	f = NULL;
1030    }
1031
1032    return f;
1033
1034 ouch:
1035    if (url != URL)
1036	fetchFreeURL(url);
1037    if (purl)
1038	fetchFreeURL(purl);
1039    if (fd != -1)
1040	close(fd);
1041    return NULL;
1042}
1043
1044
1045/*****************************************************************************
1046 * Entry points
1047 */
1048
1049/*
1050 * Retrieve and stat a file by HTTP
1051 */
1052FILE *
1053fetchXGetHTTP(struct url *URL, struct url_stat *us, const char *flags)
1054{
1055    return _http_request(URL, "GET", us, _http_get_proxy(), flags);
1056}
1057
1058/*
1059 * Retrieve a file by HTTP
1060 */
1061FILE *
1062fetchGetHTTP(struct url *URL, const char *flags)
1063{
1064    return fetchXGetHTTP(URL, NULL, flags);
1065}
1066
1067/*
1068 * Store a file by HTTP
1069 */
1070FILE *
1071fetchPutHTTP(struct url *URL __unused, const char *flags __unused)
1072{
1073    warnx("fetchPutHTTP(): not implemented");
1074    return NULL;
1075}
1076
1077/*
1078 * Get an HTTP document's metadata
1079 */
1080int
1081fetchStatHTTP(struct url *URL, struct url_stat *us, const char *flags)
1082{
1083    FILE *f;
1084
1085    if ((f = _http_request(URL, "HEAD", us, _http_get_proxy(), flags)) == NULL)
1086	return -1;
1087    fclose(f);
1088    return 0;
1089}
1090
1091/*
1092 * List a directory
1093 */
1094struct url_ent *
1095fetchListHTTP(struct url *url __unused, const char *flags __unused)
1096{
1097    warnx("fetchListHTTP(): not implemented");
1098    return NULL;
1099}
1100