http.c revision 89590
1255332Scy/*-
2255332Scy * Copyright (c) 2000 Dag-Erling Co�dan Sm�rgrav
3145510Sdarrenr * All rights reserved.
4145510Sdarrenr *
5145510Sdarrenr * Redistribution and use in source and binary forms, with or without
6145510Sdarrenr * modification, are permitted provided that the following conditions
7145510Sdarrenr * are met:
8145510Sdarrenr * 1. Redistributions of source code must retain the above copyright
9145510Sdarrenr *    notice, this list of conditions and the following disclaimer
10145510Sdarrenr *    in this position and unchanged.
11145510Sdarrenr * 2. Redistributions in binary form must reproduce the above copyright
12145510Sdarrenr *    notice, this list of conditions and the following disclaimer in the
13145510Sdarrenr *    documentation and/or other materials provided with the distribution.
14145510Sdarrenr * 3. The name of the author may not be used to endorse or promote products
15145510Sdarrenr *    derived from this software without specific prior written permission.
16255332Scy *
17145510Sdarrenr * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18145510Sdarrenr * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19145510Sdarrenr * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20145510Sdarrenr * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21145510Sdarrenr * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22145510Sdarrenr * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23145510Sdarrenr * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24145510Sdarrenr * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25145510Sdarrenr * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26145510Sdarrenr * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27145510Sdarrenr */
28145510Sdarrenr
29145510Sdarrenr#include <sys/cdefs.h>
30255332Scy__FBSDID("$FreeBSD: head/lib/libfetch/http.c 89590 2002-01-20 19:53:12Z des $");
31145510Sdarrenr
32145510Sdarrenr/*
33145510Sdarrenr * The following copyright applies to the base64 code:
34145510Sdarrenr *
35145510Sdarrenr *-
36145510Sdarrenr * Copyright 1997 Massachusetts Institute of Technology
37145510Sdarrenr *
38145510Sdarrenr * Permission to use, copy, modify, and distribute this software and
39145510Sdarrenr * its documentation for any purpose and without fee is hereby
40145510Sdarrenr * granted, provided that both the above copyright notice and this
41145510Sdarrenr * permission notice appear in all copies, that both the above
42145510Sdarrenr * copyright notice and this permission notice appear in all
43255332Scy * supporting documentation, and that the name of M.I.T. not be used
44145510Sdarrenr * in advertising or publicity pertaining to distribution of the
45145510Sdarrenr * software without specific, written prior permission.  M.I.T. makes
46145510Sdarrenr * no representations about the suitability of this software for any
47145510Sdarrenr * purpose.  It is provided "as is" without express or implied
48145510Sdarrenr * warranty.
49145510Sdarrenr *
50145510Sdarrenr * THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''.  M.I.T. DISCLAIMS
51145510Sdarrenr * ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE,
52145510Sdarrenr * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
53145510Sdarrenr * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT
54145510Sdarrenr * SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
55145510Sdarrenr * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
56145510Sdarrenr * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
57255332Scy * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
58145510Sdarrenr * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
59145510Sdarrenr * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
60145510Sdarrenr * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
61145510Sdarrenr * SUCH DAMAGE.
62145510Sdarrenr */
63145510Sdarrenr
64145510Sdarrenr#include <sys/param.h>
65145510Sdarrenr#include <sys/socket.h>
66145510Sdarrenr
67145510Sdarrenr#include <ctype.h>
68145510Sdarrenr#include <err.h>
69145510Sdarrenr#include <errno.h>
70255332Scy#include <locale.h>
71145510Sdarrenr#include <netdb.h>
72145510Sdarrenr#include <stdarg.h>
73145510Sdarrenr#include <stdio.h>
74145510Sdarrenr#include <stdlib.h>
75145510Sdarrenr#include <string.h>
76145510Sdarrenr#include <time.h>
77145510Sdarrenr#include <unistd.h>
78145510Sdarrenr
79145510Sdarrenr#include "fetch.h"
80145510Sdarrenr#include "common.h"
81145510Sdarrenr#include "httperr.h"
82145510Sdarrenr
83145510Sdarrenrextern char *__progname; /* XXX not portable */
84145510Sdarrenr
85145510Sdarrenr/* Maximum number of redirects to follow */
86145510Sdarrenr#define MAX_REDIRECT 5
87145510Sdarrenr
88145510Sdarrenr/* Symbolic names for reply codes we care about */
89145510Sdarrenr#define HTTP_OK			200
90145510Sdarrenr#define HTTP_PARTIAL		206
91145510Sdarrenr#define HTTP_MOVED_PERM		301
92145510Sdarrenr#define HTTP_MOVED_TEMP		302
93145510Sdarrenr#define HTTP_SEE_OTHER		303
94145510Sdarrenr#define HTTP_NEED_AUTH		401
95145510Sdarrenr#define HTTP_NEED_PROXY_AUTH	407
96145510Sdarrenr#define HTTP_PROTOCOL_ERROR	999
97145510Sdarrenr
98145510Sdarrenr#define HTTP_REDIRECT(xyz) ((xyz) == HTTP_MOVED_PERM \
99145510Sdarrenr                            || (xyz) == HTTP_MOVED_TEMP \
100145510Sdarrenr                            || (xyz) == HTTP_SEE_OTHER)
101145510Sdarrenr
102145510Sdarrenr#define HTTP_ERROR(xyz) ((xyz) > 400 && (xyz) < 599)
103145510Sdarrenr
104145510Sdarrenr
105/*****************************************************************************
106 * I/O functions for decoding chunked streams
107 */
108
109struct cookie
110{
111    int		 fd;
112    char	*buf;
113    size_t	 b_size;
114    ssize_t	 b_len;
115    int		 b_pos;
116    int		 eof;
117    int		 error;
118    size_t	 chunksize;
119#ifndef NDEBUG
120    size_t	 total;
121#endif
122};
123
124/*
125 * Get next chunk header
126 */
127static int
128_http_new_chunk(struct cookie *c)
129{
130    char *p;
131
132    if (_fetch_getln(c->fd, &c->buf, &c->b_size, &c->b_len) == -1)
133	return -1;
134
135    if (c->b_len < 2 || !ishexnumber(*c->buf))
136	return -1;
137
138    for (p = c->buf; !isspace(*p) && *p != ';' && p < c->buf + c->b_len; ++p)
139	if (!ishexnumber(*p))
140	    return -1;
141	else if (isdigit(*p))
142	    c->chunksize = c->chunksize * 16 + *p - '0';
143	else
144	    c->chunksize = c->chunksize * 16 + 10 + tolower(*p) - 'a';
145
146#ifndef NDEBUG
147    if (fetchDebug) {
148	c->total += c->chunksize;
149	if (c->chunksize == 0)
150	    fprintf(stderr, "_http_fillbuf(): "
151		    "end of last chunk\n");
152	else
153	    fprintf(stderr, "_http_fillbuf(): "
154		    "new chunk: %lu (%lu)\n",
155		    (unsigned long)c->chunksize, (unsigned long)c->total);
156    }
157#endif
158
159    return c->chunksize;
160}
161
162/*
163 * Fill the input buffer, do chunk decoding on the fly
164 */
165static int
166_http_fillbuf(struct cookie *c)
167{
168    if (c->error)
169	return -1;
170    if (c->eof)
171	return 0;
172
173    if (c->chunksize == 0) {
174	switch (_http_new_chunk(c)) {
175	case -1:
176	    c->error = 1;
177	    return -1;
178	case 0:
179	    c->eof = 1;
180	    return 0;
181	}
182    }
183
184    if (c->b_size < c->chunksize) {
185	char *tmp;
186
187	if ((tmp = realloc(c->buf, c->chunksize)) == NULL)
188	    return -1;
189	c->buf = tmp;
190	c->b_size = c->chunksize;
191    }
192
193    if ((c->b_len = read(c->fd, c->buf, c->chunksize)) == -1)
194	return -1;
195    c->chunksize -= c->b_len;
196
197    if (c->chunksize == 0) {
198	char endl;
199	if (read(c->fd, &endl, 1) == -1 ||
200	    read(c->fd, &endl, 1) == -1)
201	    return -1;
202    }
203
204    c->b_pos = 0;
205
206    return c->b_len;
207}
208
209/*
210 * Read function
211 */
212static int
213_http_readfn(void *v, char *buf, int len)
214{
215    struct cookie *c = (struct cookie *)v;
216    int l, pos;
217
218    if (c->error)
219	return -1;
220    if (c->eof)
221	return 0;
222
223    for (pos = 0; len > 0; pos += l, len -= l) {
224	/* empty buffer */
225	if (!c->buf || c->b_pos == c->b_len)
226	    if (_http_fillbuf(c) < 1)
227		break;
228	l = c->b_len - c->b_pos;
229	if (len < l)
230	    l = len;
231	bcopy(c->buf + c->b_pos, buf + pos, l);
232	c->b_pos += l;
233    }
234
235    if (!pos && c->error)
236	return -1;
237    return pos;
238}
239
240/*
241 * Write function
242 */
243static int
244_http_writefn(void *v, const char *buf, int len)
245{
246    struct cookie *c = (struct cookie *)v;
247
248    return write(c->fd, buf, len);
249}
250
251/*
252 * Close function
253 */
254static int
255_http_closefn(void *v)
256{
257    struct cookie *c = (struct cookie *)v;
258    int r;
259
260    r = close(c->fd);
261    if (c->buf)
262	free(c->buf);
263    free(c);
264    return r;
265}
266
267/*
268 * Wrap a file descriptor up
269 */
270static FILE *
271_http_funopen(int fd)
272{
273    struct cookie *c;
274    FILE *f;
275
276    if ((c = calloc(1, sizeof *c)) == NULL) {
277	_fetch_syserr();
278	return NULL;
279    }
280    c->fd = fd;
281    if (!(f = funopen(c, _http_readfn, _http_writefn, NULL, _http_closefn))) {
282	_fetch_syserr();
283	free(c);
284	return NULL;
285    }
286    return f;
287}
288
289
290/*****************************************************************************
291 * Helper functions for talking to the server and parsing its replies
292 */
293
294/* Header types */
295typedef enum {
296    hdr_syserror = -2,
297    hdr_error = -1,
298    hdr_end = 0,
299    hdr_unknown = 1,
300    hdr_content_length,
301    hdr_content_range,
302    hdr_last_modified,
303    hdr_location,
304    hdr_transfer_encoding,
305    hdr_www_authenticate
306} hdr_t;
307
308/* Names of interesting headers */
309static struct {
310    hdr_t	 num;
311    const char	*name;
312} hdr_names[] = {
313    { hdr_content_length,	"Content-Length" },
314    { hdr_content_range,	"Content-Range" },
315    { hdr_last_modified,	"Last-Modified" },
316    { hdr_location,		"Location" },
317    { hdr_transfer_encoding,	"Transfer-Encoding" },
318    { hdr_www_authenticate,	"WWW-Authenticate" },
319    { hdr_unknown,		NULL },
320};
321
322static char	*reply_buf;
323static size_t	 reply_size;
324static size_t	 reply_length;
325
326/*
327 * Send a formatted line; optionally echo to terminal
328 */
329static int
330_http_cmd(int fd, const char *fmt, ...)
331{
332    va_list ap;
333    size_t len;
334    char *msg;
335    int r;
336
337    va_start(ap, fmt);
338    len = vasprintf(&msg, fmt, ap);
339    va_end(ap);
340
341    if (msg == NULL) {
342	errno = ENOMEM;
343	_fetch_syserr();
344	return -1;
345    }
346
347    r = _fetch_putln(fd, msg, len);
348    free(msg);
349
350    if (r == -1) {
351	_fetch_syserr();
352	return -1;
353    }
354
355    return 0;
356}
357
358/*
359 * Get and parse status line
360 */
361static int
362_http_get_reply(int fd)
363{
364    char *p;
365
366    if (_fetch_getln(fd, &reply_buf, &reply_size, &reply_length) == -1)
367	return -1;
368    /*
369     * A valid status line looks like "HTTP/m.n xyz reason" where m
370     * and n are the major and minor protocol version numbers and xyz
371     * is the reply code.
372     * Unfortunately, there are servers out there (NCSA 1.5.1, to name
373     * just one) that do not send a version number, so we can't rely
374     * on finding one, but if we do, insist on it being 1.0 or 1.1.
375     * We don't care about the reason phrase.
376     */
377    if (strncmp(reply_buf, "HTTP", 4) != 0)
378	return HTTP_PROTOCOL_ERROR;
379    p = reply_buf + 4;
380    if (*p == '/') {
381	if (p[1] != '1' || p[2] != '.' || (p[3] != '0' && p[3] != '1'))
382	    return HTTP_PROTOCOL_ERROR;
383	p += 4;
384    }
385    if (*p != ' '
386	|| !isdigit(p[1])
387	|| !isdigit(p[2])
388	|| !isdigit(p[3]))
389	return HTTP_PROTOCOL_ERROR;
390
391    return ((p[1] - '0') * 100 + (p[2] - '0') * 10 + (p[3] - '0'));
392}
393
394/*
395 * Check a header; if the type matches the given string, return a
396 * pointer to the beginning of the value.
397 */
398static const char *
399_http_match(const char *str, const char *hdr)
400{
401    while (*str && *hdr && tolower(*str++) == tolower(*hdr++))
402	/* nothing */;
403    if (*str || *hdr != ':')
404	return NULL;
405    while (*hdr && isspace(*++hdr))
406	/* nothing */;
407    return hdr;
408}
409
410/*
411 * Get the next header and return the appropriate symbolic code.
412 */
413static hdr_t
414_http_next_header(int fd, const char **p)
415{
416    int i;
417
418    if (_fetch_getln(fd, &reply_buf, &reply_size, &reply_length) == -1)
419	return hdr_syserror;
420    while (reply_length && isspace(reply_buf[reply_length-1]))
421	reply_length--;
422    reply_buf[reply_length] = 0;
423    if (reply_length == 0)
424	return hdr_end;
425    /*
426     * We could check for malformed headers but we don't really care.
427     * A valid header starts with a token immediately followed by a
428     * colon; a token is any sequence of non-control, non-whitespace
429     * characters except "()<>@,;:\\\"{}".
430     */
431    for (i = 0; hdr_names[i].num != hdr_unknown; i++)
432	if ((*p = _http_match(hdr_names[i].name, reply_buf)) != NULL)
433	    return hdr_names[i].num;
434    return hdr_unknown;
435}
436
437/*
438 * Parse a last-modified header
439 */
440static int
441_http_parse_mtime(const char *p, time_t *mtime)
442{
443    char locale[64], *r;
444    struct tm tm;
445
446    strncpy(locale, setlocale(LC_TIME, NULL), sizeof locale);
447    setlocale(LC_TIME, "C");
448    r = strptime(p, "%a, %d %b %Y %H:%M:%S GMT", &tm);
449    /* XXX should add support for date-2 and date-3 */
450    setlocale(LC_TIME, locale);
451    if (r == NULL)
452	return -1;
453    DEBUG(fprintf(stderr, "last modified: [%04d-%02d-%02d "
454		  "%02d:%02d:%02d]\n",
455		  tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday,
456		  tm.tm_hour, tm.tm_min, tm.tm_sec));
457    *mtime = timegm(&tm);
458    return 0;
459}
460
461/*
462 * Parse a content-length header
463 */
464static int
465_http_parse_length(const char *p, off_t *length)
466{
467    off_t len;
468
469    for (len = 0; *p && isdigit(*p); ++p)
470	len = len * 10 + (*p - '0');
471    if (*p)
472	return -1;
473    DEBUG(fprintf(stderr, "content length: [%lld]\n",
474		  (long long)len));
475    *length = len;
476    return 0;
477}
478
479/*
480 * Parse a content-range header
481 */
482static int
483_http_parse_range(const char *p, off_t *offset, off_t *length, off_t *size)
484{
485    off_t first, last, len;
486
487    if (strncasecmp(p, "bytes ", 6) != 0)
488	return -1;
489    for (first = 0, p += 6; *p && isdigit(*p); ++p)
490	first = first * 10 + *p - '0';
491    if (*p != '-')
492	return -1;
493    for (last = 0, ++p; *p && isdigit(*p); ++p)
494	last = last * 10 + *p - '0';
495    if (first > last || *p != '/')
496	return -1;
497    for (len = 0, ++p; *p && isdigit(*p); ++p)
498	len = len * 10 + *p - '0';
499    if (*p || len < last - first + 1)
500	return -1;
501    DEBUG(fprintf(stderr, "content range: [%lld-%lld/%lld]\n",
502		  (long long)first, (long long)last, (long long)len));
503    *offset = first;
504    *length = last - first + 1;
505    *size = len;
506    return 0;
507}
508
509
510/*****************************************************************************
511 * Helper functions for authorization
512 */
513
514/*
515 * Base64 encoding
516 */
517static char *
518_http_base64(char *src)
519{
520    static const char base64[] =
521	"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
522	"abcdefghijklmnopqrstuvwxyz"
523	"0123456789+/";
524    char *str, *dst;
525    size_t l;
526    int t, r;
527
528    l = strlen(src);
529    if ((str = malloc(((l + 2) / 3) * 4)) == NULL)
530	return NULL;
531    dst = str;
532    r = 0;
533
534    while (l >= 3) {
535	t = (src[0] << 16) | (src[1] << 8) | src[2];
536	dst[0] = base64[(t >> 18) & 0x3f];
537	dst[1] = base64[(t >> 12) & 0x3f];
538	dst[2] = base64[(t >> 6) & 0x3f];
539	dst[3] = base64[(t >> 0) & 0x3f];
540	src += 3; l -= 3;
541	dst += 4; r += 4;
542    }
543
544    switch (l) {
545    case 2:
546	t = (src[0] << 16) | (src[1] << 8);
547	dst[0] = base64[(t >> 18) & 0x3f];
548	dst[1] = base64[(t >> 12) & 0x3f];
549	dst[2] = base64[(t >> 6) & 0x3f];
550	dst[3] = '=';
551	dst += 4;
552	r += 4;
553	break;
554    case 1:
555	t = src[0] << 16;
556	dst[0] = base64[(t >> 18) & 0x3f];
557	dst[1] = base64[(t >> 12) & 0x3f];
558	dst[2] = dst[3] = '=';
559	dst += 4;
560	r += 4;
561	break;
562    case 0:
563	break;
564    }
565
566    *dst = 0;
567    return str;
568}
569
570/*
571 * Encode username and password
572 */
573static int
574_http_basic_auth(int fd, const char *hdr, const char *usr, const char *pwd)
575{
576    char *upw, *auth;
577    int r;
578
579    DEBUG(fprintf(stderr, "usr: [%s]\n", usr));
580    DEBUG(fprintf(stderr, "pwd: [%s]\n", pwd));
581    if (asprintf(&upw, "%s:%s", usr, pwd) == -1)
582	return -1;
583    auth = _http_base64(upw);
584    free(upw);
585    if (auth == NULL)
586	return -1;
587    r = _http_cmd(fd, "%s: Basic %s", hdr, auth);
588    free(auth);
589    return r;
590}
591
592/*
593 * Send an authorization header
594 */
595static int
596_http_authorize(int fd, const char *hdr, const char *p)
597{
598    /* basic authorization */
599    if (strncasecmp(p, "basic:", 6) == 0) {
600	char *user, *pwd, *str;
601	int r;
602
603	/* skip realm */
604	for (p += 6; *p && *p != ':'; ++p)
605	    /* nothing */ ;
606	if (!*p || strchr(++p, ':') == NULL)
607	    return -1;
608	if ((str = strdup(p)) == NULL)
609	    return -1; /* XXX */
610	user = str;
611	pwd = strchr(str, ':');
612	*pwd++ = '\0';
613	r = _http_basic_auth(fd, hdr, user, pwd);
614	free(str);
615	return r;
616    }
617    return -1;
618}
619
620
621/*****************************************************************************
622 * Helper functions for connecting to a server or proxy
623 */
624
625/*
626 * Connect to the correct HTTP server or proxy.
627 */
628static int
629_http_connect(struct url *URL, struct url *purl, const char *flags)
630{
631    int verbose;
632    int af, fd;
633
634#ifdef INET6
635    af = AF_UNSPEC;
636#else
637    af = AF_INET;
638#endif
639
640    verbose = CHECK_FLAG('v');
641    if (CHECK_FLAG('4'))
642	af = AF_INET;
643#ifdef INET6
644    else if (CHECK_FLAG('6'))
645	af = AF_INET6;
646#endif
647
648    if (purl) {
649	URL = purl;
650    } else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0) {
651	/* can't talk http to an ftp server */
652	/* XXX should set an error code */
653	return -1;
654    }
655
656    if ((fd = _fetch_connect(URL->host, URL->port, af, verbose)) == -1)
657	/* _fetch_connect() has already set an error code */
658	return -1;
659    return fd;
660}
661
662static struct url *
663_http_get_proxy(void)
664{
665    struct url *purl;
666    char *p;
667
668    if (((p = getenv("HTTP_PROXY")) || (p = getenv("http_proxy"))) &&
669	(purl = fetchParseURL(p))) {
670	if (!*purl->scheme)
671	    strcpy(purl->scheme, SCHEME_HTTP);
672	if (!purl->port)
673	    purl->port = _fetch_default_proxy_port(purl->scheme);
674	if (strcasecmp(purl->scheme, SCHEME_HTTP) == 0)
675	    return purl;
676	fetchFreeURL(purl);
677    }
678    return NULL;
679}
680
681static void
682_http_print_html(FILE *out, FILE *in)
683{
684    size_t len;
685    char *line, *p, *q;
686    int comment, tag;
687
688    comment = tag = 0;
689    while ((line = fgetln(in, &len)) != NULL) {
690	while (len && isspace(line[len - 1]))
691	    --len;
692	for (p = q = line; q < line + len; ++q) {
693	    if (comment && *q == '-') {
694		if (q + 2 < line + len && strcmp(q, "-->") == 0) {
695		    tag = comment = 0;
696		    q += 2;
697		}
698	    } if (tag && !comment && *q == '>') {
699		p = q + 1;
700		tag = 0;
701	    } else if (!tag && *q == '<') {
702		if (q > p)
703		    fwrite(p, q - p, 1, out);
704		tag = 1;
705		if (q + 3 < line + len && strcmp(q, "<!--") == 0) {
706		    comment = 1;
707		    q += 3;
708		}
709	    }
710	}
711	if (!tag && q > p)
712	    fwrite(p, q - p, 1, out);
713	fputc('\n', out);
714    }
715}
716
717
718/*****************************************************************************
719 * Core
720 */
721
722/*
723 * Send a request and process the reply
724 */
725FILE *
726_http_request(struct url *URL, const char *op, struct url_stat *us,
727	      struct url *purl, const char *flags)
728{
729    struct url *url, *new;
730    int chunked, direct, need_auth, noredirect, verbose;
731    int code, fd, i, n;
732    off_t offset, clength, length, size;
733    time_t mtime;
734    const char *p;
735    FILE *f;
736    hdr_t h;
737    char *host;
738#ifdef INET6
739    char hbuf[MAXHOSTNAMELEN + 1];
740#endif
741
742    direct = CHECK_FLAG('d');
743    noredirect = CHECK_FLAG('A');
744    verbose = CHECK_FLAG('v');
745
746    if (direct && purl) {
747	fetchFreeURL(purl);
748	purl = NULL;
749    }
750
751    /* try the provided URL first */
752    url = URL;
753
754    /* if the A flag is set, we only get one try */
755    n = noredirect ? 1 : MAX_REDIRECT;
756    i = 0;
757
758    need_auth = 0;
759    do {
760	new = NULL;
761	chunked = 0;
762	offset = 0;
763	clength = -1;
764	length = -1;
765	size = -1;
766	mtime = 0;
767
768	/* check port */
769	if (!url->port)
770	    url->port = _fetch_default_port(url->scheme);
771
772	/* were we redirected to an FTP URL? */
773	if (purl == NULL && strcmp(url->scheme, SCHEME_FTP) == 0) {
774	    if (strcmp(op, "GET") == 0)
775		return _ftp_request(url, "RETR", us, purl, flags);
776	    else if (strcmp(op, "HEAD") == 0)
777		return _ftp_request(url, "STAT", us, purl, flags);
778	}
779
780	/* connect to server or proxy */
781	if ((fd = _http_connect(url, purl, flags)) == -1)
782	    goto ouch;
783
784	host = url->host;
785#ifdef INET6
786	if (strchr(url->host, ':')) {
787	    snprintf(hbuf, sizeof(hbuf), "[%s]", url->host);
788	    host = hbuf;
789	}
790#endif
791
792	/* send request */
793	if (verbose)
794	    _fetch_info("requesting %s://%s:%d%s",
795			url->scheme, host, url->port, url->doc);
796	if (purl) {
797	    _http_cmd(fd, "%s %s://%s:%d%s HTTP/1.1",
798		      op, url->scheme, host, url->port, url->doc);
799	} else {
800	    _http_cmd(fd, "%s %s HTTP/1.1",
801		      op, url->doc);
802	}
803
804	/* virtual host */
805	if (url->port == _fetch_default_port(url->scheme))
806	    _http_cmd(fd, "Host: %s", host);
807	else
808	    _http_cmd(fd, "Host: %s:%d", host, url->port);
809
810	/* proxy authorization */
811	if (purl) {
812	    if (*purl->user || *purl->pwd)
813		_http_basic_auth(fd, "Proxy-Authorization",
814				 purl->user, purl->pwd);
815	    else if ((p = getenv("HTTP_PROXY_AUTH")) != NULL && *p != '\0')
816		_http_authorize(fd, "Proxy-Authorization", p);
817	}
818
819	/* server authorization */
820	if (need_auth || *url->user || *url->pwd) {
821	    if (*url->user || *url->pwd)
822		_http_basic_auth(fd, "Authorization", url->user, url->pwd);
823	    else if ((p = getenv("HTTP_AUTH")) != NULL && *p != '\0')
824		_http_authorize(fd, "Authorization", p);
825	    else if (fetchAuthMethod && fetchAuthMethod(url) == 0) {
826		_http_basic_auth(fd, "Authorization", url->user, url->pwd);
827	    } else {
828		_http_seterr(HTTP_NEED_AUTH);
829		goto ouch;
830	    }
831	}
832
833	/* other headers */
834	if ((p = getenv("HTTP_USER_AGENT")) != NULL && *p != '\0')
835	    _http_cmd(fd, "User-Agent: %s", p);
836	else
837	    _http_cmd(fd, "User-Agent: %s " _LIBFETCH_VER, __progname);
838	if (url->offset)
839	    _http_cmd(fd, "Range: bytes=%lld-", (long long)url->offset);
840	_http_cmd(fd, "Connection: close");
841	_http_cmd(fd, "");
842
843	/* get reply */
844	switch ((code = _http_get_reply(fd))) {
845	case HTTP_OK:
846	case HTTP_PARTIAL:
847	    /* fine */
848	    break;
849	case HTTP_MOVED_PERM:
850	case HTTP_MOVED_TEMP:
851	case HTTP_SEE_OTHER:
852	    /*
853	     * Not so fine, but we still have to read the headers to
854	     * get the new location.
855	     */
856	    break;
857	case HTTP_NEED_AUTH:
858	    if (need_auth) {
859		/*
860		 * We already sent out authorization code, so there's
861		 * nothing more we can do.
862		 */
863		_http_seterr(code);
864		goto ouch;
865	    }
866	    /* try again, but send the password this time */
867	    if (verbose)
868		_fetch_info("server requires authorization");
869	    break;
870	case HTTP_NEED_PROXY_AUTH:
871	    /*
872	     * If we're talking to a proxy, we already sent our proxy
873	     * authorization code, so there's nothing more we can do.
874	     */
875	    _http_seterr(code);
876	    goto ouch;
877	case HTTP_PROTOCOL_ERROR:
878	    /* fall through */
879	case -1:
880	    _fetch_syserr();
881	    goto ouch;
882	default:
883	    _http_seterr(code);
884	    if (!verbose)
885		goto ouch;
886	    /* fall through so we can get the full error message */
887	}
888
889	/* get headers */
890	do {
891	    switch ((h = _http_next_header(fd, &p))) {
892	    case hdr_syserror:
893		_fetch_syserr();
894		goto ouch;
895	    case hdr_error:
896		_http_seterr(HTTP_PROTOCOL_ERROR);
897		goto ouch;
898	    case hdr_content_length:
899		_http_parse_length(p, &clength);
900		break;
901	    case hdr_content_range:
902		_http_parse_range(p, &offset, &length, &size);
903		break;
904	    case hdr_last_modified:
905		_http_parse_mtime(p, &mtime);
906		break;
907	    case hdr_location:
908		if (!HTTP_REDIRECT(code))
909		    break;
910		if (new)
911		    free(new);
912		if (verbose)
913		    _fetch_info("%d redirect to %s", code, p);
914		if (*p == '/')
915		    /* absolute path */
916		    new = fetchMakeURL(url->scheme, url->host, url->port, p,
917				       url->user, url->pwd);
918		else
919		    new = fetchParseURL(p);
920		if (new == NULL) {
921		    /* XXX should set an error code */
922		    DEBUG(fprintf(stderr, "failed to parse new URL\n"));
923		    goto ouch;
924		}
925		if (!*new->user && !*new->pwd) {
926		    strcpy(new->user, url->user);
927		    strcpy(new->pwd, url->pwd);
928		}
929		new->offset = url->offset;
930		new->length = url->length;
931		break;
932	    case hdr_transfer_encoding:
933		/* XXX weak test*/
934		chunked = (strcasecmp(p, "chunked") == 0);
935		break;
936	    case hdr_www_authenticate:
937		if (code != HTTP_NEED_AUTH)
938		    break;
939		/* if we were smarter, we'd check the method and realm */
940		break;
941	    case hdr_end:
942		/* fall through */
943	    case hdr_unknown:
944		/* ignore */
945		break;
946	    }
947	} while (h > hdr_end);
948
949	/* we have a hit or an error */
950	if (code == HTTP_OK || code == HTTP_PARTIAL || HTTP_ERROR(code))
951	    break;
952
953	/* we need to provide authentication */
954	if (code == HTTP_NEED_AUTH) {
955	    need_auth = 1;
956	    close(fd);
957	    fd = -1;
958	    continue;
959	}
960
961	/* all other cases: we got a redirect */
962	need_auth = 0;
963	close(fd);
964	fd = -1;
965	if (!new) {
966	    DEBUG(fprintf(stderr, "redirect with no new location\n"));
967	    break;
968	}
969	if (url != URL)
970	    fetchFreeURL(url);
971	url = new;
972    } while (++i < n);
973
974    /* we failed, or ran out of retries */
975    if (fd == -1) {
976	_http_seterr(code);
977	goto ouch;
978    }
979
980    DEBUG(fprintf(stderr, "offset %lld, length %lld,"
981		  " size %lld, clength %lld\n",
982		  (long long)offset, (long long)length,
983		  (long long)size, (long long)clength));
984
985    /* check for inconsistencies */
986    if (clength != -1 && length != -1 && clength != length) {
987	_http_seterr(HTTP_PROTOCOL_ERROR);
988	goto ouch;
989    }
990    if (clength == -1)
991	clength = length;
992    if (clength != -1)
993	length = offset + clength;
994    if (length != -1 && size != -1 && length != size) {
995	_http_seterr(HTTP_PROTOCOL_ERROR);
996	goto ouch;
997    }
998    if (size == -1)
999	size = length;
1000
1001    /* fill in stats */
1002    if (us) {
1003	us->size = size;
1004	us->atime = us->mtime = mtime;
1005    }
1006
1007    /* too far? */
1008    if (offset > URL->offset) {
1009	_http_seterr(HTTP_PROTOCOL_ERROR);
1010	goto ouch;
1011    }
1012
1013    /* report back real offset and size */
1014    URL->offset = offset;
1015    URL->length = clength;
1016
1017    /* wrap it up in a FILE */
1018    if ((f = chunked ? _http_funopen(fd) : fdopen(fd, "r")) == NULL) {
1019	_fetch_syserr();
1020	goto ouch;
1021    }
1022
1023    if (url != URL)
1024	fetchFreeURL(url);
1025    if (purl)
1026	fetchFreeURL(purl);
1027
1028    if (HTTP_ERROR(code)) {
1029	_http_print_html(stderr, f);
1030	fclose(f);
1031	f = NULL;
1032    }
1033
1034    return f;
1035
1036 ouch:
1037    if (url != URL)
1038	fetchFreeURL(url);
1039    if (purl)
1040	fetchFreeURL(purl);
1041    if (fd != -1)
1042	close(fd);
1043    return NULL;
1044}
1045
1046
1047/*****************************************************************************
1048 * Entry points
1049 */
1050
1051/*
1052 * Retrieve and stat a file by HTTP
1053 */
1054FILE *
1055fetchXGetHTTP(struct url *URL, struct url_stat *us, const char *flags)
1056{
1057    return _http_request(URL, "GET", us, _http_get_proxy(), flags);
1058}
1059
1060/*
1061 * Retrieve a file by HTTP
1062 */
1063FILE *
1064fetchGetHTTP(struct url *URL, const char *flags)
1065{
1066    return fetchXGetHTTP(URL, NULL, flags);
1067}
1068
1069/*
1070 * Store a file by HTTP
1071 */
1072FILE *
1073fetchPutHTTP(struct url *URL __unused, const char *flags __unused)
1074{
1075    warnx("fetchPutHTTP(): not implemented");
1076    return NULL;
1077}
1078
1079/*
1080 * Get an HTTP document's metadata
1081 */
1082int
1083fetchStatHTTP(struct url *URL, struct url_stat *us, const char *flags)
1084{
1085    FILE *f;
1086
1087    if ((f = _http_request(URL, "HEAD", us, _http_get_proxy(), flags)) == NULL)
1088	return -1;
1089    fclose(f);
1090    return 0;
1091}
1092
1093/*
1094 * List a directory
1095 */
1096struct url_ent *
1097fetchListHTTP(struct url *url __unused, const char *flags __unused)
1098{
1099    warnx("fetchListHTTP(): not implemented");
1100    return NULL;
1101}
1102