1/*-
2 * SPDX-License-Identifier: BSD-3-Clause
3 *
4 * Copyright (c) 2000-2014 Dag-Erling Sm��rgrav
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer
12 *    in this position and unchanged.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 * 3. The name of the author may not be used to endorse or promote products
17 *    derived from this software without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
20 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
21 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
22 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
23 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
24 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
28 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 */
30
31#include <sys/cdefs.h>
32__FBSDID("$FreeBSD$");
33
34/*
35 * The following copyright applies to the base64 code:
36 *
37 *-
38 * Copyright 1997 Massachusetts Institute of Technology
39 *
40 * Permission to use, copy, modify, and distribute this software and
41 * its documentation for any purpose and without fee is hereby
42 * granted, provided that both the above copyright notice and this
43 * permission notice appear in all copies, that both the above
44 * copyright notice and this permission notice appear in all
45 * supporting documentation, and that the name of M.I.T. not be used
46 * in advertising or publicity pertaining to distribution of the
47 * software without specific, written prior permission.  M.I.T. makes
48 * no representations about the suitability of this software for any
49 * purpose.  It is provided "as is" without express or implied
50 * warranty.
51 *
52 * THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''.  M.I.T. DISCLAIMS
53 * ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE,
54 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
55 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT
56 * SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
57 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
58 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
59 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
60 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
61 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
62 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63 * SUCH DAMAGE.
64 */
65
66#include <sys/param.h>
67#include <sys/socket.h>
68#include <sys/time.h>
69
70#include <ctype.h>
71#include <err.h>
72#include <errno.h>
73#include <locale.h>
74#include <netdb.h>
75#include <stdarg.h>
76#include <stdbool.h>
77#include <stdio.h>
78#include <stdlib.h>
79#include <string.h>
80#include <time.h>
81#include <unistd.h>
82
83#ifdef WITH_SSL
84#include <openssl/md5.h>
85#define MD5Init(c) MD5_Init(c)
86#define MD5Update(c, data, len) MD5_Update(c, data, len)
87#define MD5Final(md, c) MD5_Final(md, c)
88#else
89#include <md5.h>
90#endif
91
92#include <netinet/in.h>
93#include <netinet/tcp.h>
94
95#include "fetch.h"
96#include "common.h"
97#include "httperr.h"
98
99/* Maximum number of redirects to follow */
100#define MAX_REDIRECT 20
101
102/* Symbolic names for reply codes we care about */
103#define HTTP_OK			200
104#define HTTP_PARTIAL		206
105#define HTTP_MOVED_PERM		301
106#define HTTP_MOVED_TEMP		302
107#define HTTP_SEE_OTHER		303
108#define HTTP_NOT_MODIFIED	304
109#define HTTP_USE_PROXY		305
110#define HTTP_TEMP_REDIRECT	307
111#define HTTP_PERM_REDIRECT	308
112#define HTTP_NEED_AUTH		401
113#define HTTP_NEED_PROXY_AUTH	407
114#define HTTP_BAD_RANGE		416
115#define HTTP_PROTOCOL_ERROR	999
116
117#define HTTP_REDIRECT(xyz) ((xyz) == HTTP_MOVED_PERM \
118			    || (xyz) == HTTP_MOVED_TEMP \
119			    || (xyz) == HTTP_TEMP_REDIRECT \
120			    || (xyz) == HTTP_PERM_REDIRECT \
121			    || (xyz) == HTTP_USE_PROXY \
122			    || (xyz) == HTTP_SEE_OTHER)
123
124#define HTTP_ERROR(xyz) ((xyz) >= 400 && (xyz) <= 599)
125
126
127/*****************************************************************************
128 * I/O functions for decoding chunked streams
129 */
130
131struct httpio
132{
133	conn_t		*conn;		/* connection */
134	int		 chunked;	/* chunked mode */
135	char		*buf;		/* chunk buffer */
136	size_t		 bufsize;	/* size of chunk buffer */
137	size_t		 buflen;	/* amount of data currently in buffer */
138	size_t		 bufpos;	/* current read offset in buffer */
139	int		 eof;		/* end-of-file flag */
140	int		 error;		/* error flag */
141	size_t		 chunksize;	/* remaining size of current chunk */
142#ifndef NDEBUG
143	size_t		 total;
144#endif
145};
146
147/*
148 * Get next chunk header
149 */
150static int
151http_new_chunk(struct httpio *io)
152{
153	char *p;
154
155	if (fetch_getln(io->conn) == -1)
156		return (-1);
157
158	if (io->conn->buflen < 2 || !isxdigit((unsigned char)*io->conn->buf))
159		return (-1);
160
161	for (p = io->conn->buf; *p && !isspace((unsigned char)*p); ++p) {
162		if (*p == ';')
163			break;
164		if (!isxdigit((unsigned char)*p))
165			return (-1);
166		if (isdigit((unsigned char)*p)) {
167			io->chunksize = io->chunksize * 16 +
168			    *p - '0';
169		} else {
170			io->chunksize = io->chunksize * 16 +
171			    10 + tolower((unsigned char)*p) - 'a';
172		}
173	}
174
175#ifndef NDEBUG
176	if (fetchDebug) {
177		io->total += io->chunksize;
178		if (io->chunksize == 0)
179			fprintf(stderr, "%s(): end of last chunk\n", __func__);
180		else
181			fprintf(stderr, "%s(): new chunk: %lu (%lu)\n",
182			    __func__, (unsigned long)io->chunksize,
183			    (unsigned long)io->total);
184	}
185#endif
186
187	return (io->chunksize);
188}
189
190/*
191 * Grow the input buffer to at least len bytes
192 */
193static inline int
194http_growbuf(struct httpio *io, size_t len)
195{
196	char *tmp;
197
198	if (io->bufsize >= len)
199		return (0);
200
201	if ((tmp = realloc(io->buf, len)) == NULL)
202		return (-1);
203	io->buf = tmp;
204	io->bufsize = len;
205	return (0);
206}
207
208/*
209 * Fill the input buffer, do chunk decoding on the fly
210 */
211static ssize_t
212http_fillbuf(struct httpio *io, size_t len)
213{
214	ssize_t nbytes;
215	char ch;
216
217	if (io->error)
218		return (-1);
219	if (io->eof)
220		return (0);
221
222	/* not chunked: just fetch the requested amount */
223	if (io->chunked == 0) {
224		if (http_growbuf(io, len) == -1)
225			return (-1);
226		if ((nbytes = fetch_read(io->conn, io->buf, len)) == -1) {
227			io->error = errno;
228			return (-1);
229		}
230		io->buflen = nbytes;
231		io->bufpos = 0;
232		return (io->buflen);
233	}
234
235	/* chunked, but we ran out: get the next chunk header */
236	if (io->chunksize == 0) {
237		switch (http_new_chunk(io)) {
238		case -1:
239			io->error = EPROTO;
240			return (-1);
241		case 0:
242			io->eof = 1;
243			return (0);
244		}
245	}
246
247	/* fetch the requested amount, but no more than the current chunk */
248	if (len > io->chunksize)
249		len = io->chunksize;
250	if (http_growbuf(io, len) == -1)
251		return (-1);
252	if ((nbytes = fetch_read(io->conn, io->buf, len)) == -1) {
253		io->error = errno;
254		return (-1);
255	}
256	io->bufpos = 0;
257	io->buflen = nbytes;
258	io->chunksize -= nbytes;
259
260	if (io->chunksize == 0) {
261		if (fetch_read(io->conn, &ch, 1) != 1 || ch != '\r' ||
262		    fetch_read(io->conn, &ch, 1) != 1 || ch != '\n')
263			return (-1);
264	}
265
266	return (io->buflen);
267}
268
269/*
270 * Read function
271 */
272static int
273http_readfn(void *v, char *buf, int len)
274{
275	struct httpio *io = (struct httpio *)v;
276	int rlen;
277
278	if (io->error)
279		return (-1);
280	if (io->eof)
281		return (0);
282
283	/* empty buffer */
284	if (!io->buf || io->bufpos == io->buflen) {
285		if ((rlen = http_fillbuf(io, len)) < 0) {
286			if ((errno = io->error) == EINTR)
287				io->error = 0;
288			return (-1);
289		} else if (rlen == 0) {
290			return (0);
291		}
292	}
293
294	rlen = io->buflen - io->bufpos;
295	if (len < rlen)
296		rlen = len;
297	memcpy(buf, io->buf + io->bufpos, rlen);
298	io->bufpos += rlen;
299	return (rlen);
300}
301
302/*
303 * Write function
304 */
305static int
306http_writefn(void *v, const char *buf, int len)
307{
308	struct httpio *io = (struct httpio *)v;
309
310	return (fetch_write(io->conn, buf, len));
311}
312
313/*
314 * Close function
315 */
316static int
317http_closefn(void *v)
318{
319	struct httpio *io = (struct httpio *)v;
320	int r;
321
322	r = fetch_close(io->conn);
323	if (io->buf)
324		free(io->buf);
325	free(io);
326	return (r);
327}
328
329/*
330 * Wrap a file descriptor up
331 */
332static FILE *
333http_funopen(conn_t *conn, int chunked)
334{
335	struct httpio *io;
336	FILE *f;
337
338	if ((io = calloc(1, sizeof(*io))) == NULL) {
339		fetch_syserr();
340		return (NULL);
341	}
342	io->conn = conn;
343	io->chunked = chunked;
344	f = funopen(io, http_readfn, http_writefn, NULL, http_closefn);
345	if (f == NULL) {
346		fetch_syserr();
347		free(io);
348		return (NULL);
349	}
350	return (f);
351}
352
353
354/*****************************************************************************
355 * Helper functions for talking to the server and parsing its replies
356 */
357
358/* Header types */
359typedef enum {
360	hdr_syserror = -2,
361	hdr_error = -1,
362	hdr_end = 0,
363	hdr_unknown = 1,
364	hdr_content_length,
365	hdr_content_range,
366	hdr_last_modified,
367	hdr_location,
368	hdr_transfer_encoding,
369	hdr_www_authenticate,
370	hdr_proxy_authenticate,
371} hdr_t;
372
373/* Names of interesting headers */
374static struct {
375	hdr_t		 num;
376	const char	*name;
377} hdr_names[] = {
378	{ hdr_content_length,		"Content-Length" },
379	{ hdr_content_range,		"Content-Range" },
380	{ hdr_last_modified,		"Last-Modified" },
381	{ hdr_location,			"Location" },
382	{ hdr_transfer_encoding,	"Transfer-Encoding" },
383	{ hdr_www_authenticate,		"WWW-Authenticate" },
384	{ hdr_proxy_authenticate,	"Proxy-Authenticate" },
385	{ hdr_unknown,			NULL },
386};
387
388/*
389 * Send a formatted line; optionally echo to terminal
390 */
391static int
392http_cmd(conn_t *conn, const char *fmt, ...)
393{
394	va_list ap;
395	size_t len;
396	char *msg;
397	int r;
398
399	va_start(ap, fmt);
400	len = vasprintf(&msg, fmt, ap);
401	va_end(ap);
402
403	if (msg == NULL) {
404		errno = ENOMEM;
405		fetch_syserr();
406		return (-1);
407	}
408
409	r = fetch_putln(conn, msg, len);
410	free(msg);
411
412	if (r == -1) {
413		fetch_syserr();
414		return (-1);
415	}
416
417	return (0);
418}
419
420/*
421 * Get and parse status line
422 */
423static int
424http_get_reply(conn_t *conn)
425{
426	char *p;
427
428	if (fetch_getln(conn) == -1)
429		return (-1);
430	/*
431	 * A valid status line looks like "HTTP/m.n xyz reason" where m
432	 * and n are the major and minor protocol version numbers and xyz
433	 * is the reply code.
434	 * Unfortunately, there are servers out there (NCSA 1.5.1, to name
435	 * just one) that do not send a version number, so we can't rely
436	 * on finding one, but if we do, insist on it being 1.0 or 1.1.
437	 * We don't care about the reason phrase.
438	 */
439	if (strncmp(conn->buf, "HTTP", 4) != 0)
440		return (HTTP_PROTOCOL_ERROR);
441	p = conn->buf + 4;
442	if (*p == '/') {
443		if (p[1] != '1' || p[2] != '.' || (p[3] != '0' && p[3] != '1'))
444			return (HTTP_PROTOCOL_ERROR);
445		p += 4;
446	}
447	if (*p != ' ' ||
448	    !isdigit((unsigned char)p[1]) ||
449	    !isdigit((unsigned char)p[2]) ||
450	    !isdigit((unsigned char)p[3]))
451		return (HTTP_PROTOCOL_ERROR);
452
453	conn->err = (p[1] - '0') * 100 + (p[2] - '0') * 10 + (p[3] - '0');
454	return (conn->err);
455}
456
457/*
458 * Check a header; if the type matches the given string, return a pointer
459 * to the beginning of the value.
460 */
461static const char *
462http_match(const char *str, const char *hdr)
463{
464	while (*str && *hdr &&
465	    tolower((unsigned char)*str++) == tolower((unsigned char)*hdr++))
466		/* nothing */;
467	if (*str || *hdr != ':')
468		return (NULL);
469	while (*hdr && isspace((unsigned char)*++hdr))
470		/* nothing */;
471	return (hdr);
472}
473
474
475/*
476 * Get the next header and return the appropriate symbolic code.  We
477 * need to read one line ahead for checking for a continuation line
478 * belonging to the current header (continuation lines start with
479 * white space).
480 *
481 * We get called with a fresh line already in the conn buffer, either
482 * from the previous http_next_header() invocation, or, the first
483 * time, from a fetch_getln() performed by our caller.
484 *
485 * This stops when we encounter an empty line (we dont read beyond the header
486 * area).
487 *
488 * Note that the "headerbuf" is just a place to return the result. Its
489 * contents are not used for the next call. This means that no cleanup
490 * is needed when ie doing another connection, just call the cleanup when
491 * fully done to deallocate memory.
492 */
493
494/* Limit the max number of continuation lines to some reasonable value */
495#define HTTP_MAX_CONT_LINES 10
496
497/* Place into which to build a header from one or several lines */
498typedef struct {
499	char	*buf;		/* buffer */
500	size_t	 bufsize;	/* buffer size */
501	size_t	 buflen;	/* length of buffer contents */
502} http_headerbuf_t;
503
504static void
505init_http_headerbuf(http_headerbuf_t *buf)
506{
507	buf->buf = NULL;
508	buf->bufsize = 0;
509	buf->buflen = 0;
510}
511
512static void
513clean_http_headerbuf(http_headerbuf_t *buf)
514{
515	if (buf->buf)
516		free(buf->buf);
517	init_http_headerbuf(buf);
518}
519
520/* Remove whitespace at the end of the buffer */
521static void
522http_conn_trimright(conn_t *conn)
523{
524	while (conn->buflen &&
525	       isspace((unsigned char)conn->buf[conn->buflen - 1]))
526		conn->buflen--;
527	conn->buf[conn->buflen] = '\0';
528}
529
530static hdr_t
531http_next_header(conn_t *conn, http_headerbuf_t *hbuf, const char **p)
532{
533	unsigned int i, len;
534
535	/*
536	 * Have to do the stripping here because of the first line. So
537	 * it's done twice for the subsequent lines. No big deal
538	 */
539	http_conn_trimright(conn);
540	if (conn->buflen == 0)
541		return (hdr_end);
542
543	/* Copy the line to the headerbuf */
544	if (hbuf->bufsize < conn->buflen + 1) {
545		if ((hbuf->buf = realloc(hbuf->buf, conn->buflen + 1)) == NULL)
546			return (hdr_syserror);
547		hbuf->bufsize = conn->buflen + 1;
548	}
549	strcpy(hbuf->buf, conn->buf);
550	hbuf->buflen = conn->buflen;
551
552	/*
553	 * Fetch possible continuation lines. Stop at 1st non-continuation
554	 * and leave it in the conn buffer
555	 */
556	for (i = 0; i < HTTP_MAX_CONT_LINES; i++) {
557		if (fetch_getln(conn) == -1)
558			return (hdr_syserror);
559
560		/*
561		 * Note: we carry on the idea from the previous version
562		 * that a pure whitespace line is equivalent to an empty
563		 * one (so it's not continuation and will be handled when
564		 * we are called next)
565		 */
566		http_conn_trimright(conn);
567		if (conn->buf[0] != ' ' && conn->buf[0] != "\t"[0])
568			break;
569
570		/* Got a continuation line. Concatenate to previous */
571		len = hbuf->buflen + conn->buflen;
572		if (hbuf->bufsize < len + 1) {
573			len *= 2;
574			if ((hbuf->buf = realloc(hbuf->buf, len + 1)) == NULL)
575				return (hdr_syserror);
576			hbuf->bufsize = len + 1;
577		}
578		strcpy(hbuf->buf + hbuf->buflen, conn->buf);
579		hbuf->buflen += conn->buflen;
580	}
581
582	/*
583	 * We could check for malformed headers but we don't really care.
584	 * A valid header starts with a token immediately followed by a
585	 * colon; a token is any sequence of non-control, non-whitespace
586	 * characters except "()<>@,;:\\\"{}".
587	 */
588	for (i = 0; hdr_names[i].num != hdr_unknown; i++)
589		if ((*p = http_match(hdr_names[i].name, hbuf->buf)) != NULL)
590			return (hdr_names[i].num);
591
592	return (hdr_unknown);
593}
594
595/**************************
596 * [Proxy-]Authenticate header parsing
597 */
598
599/*
600 * Read doublequote-delimited string into output buffer obuf (allocated
601 * by caller, whose responsibility it is to ensure that it's big enough)
602 * cp points to the first char after the initial '"'
603 * Handles \ quoting
604 * Returns pointer to the first char after the terminating double quote, or
605 * NULL for error.
606 */
607static const char *
608http_parse_headerstring(const char *cp, char *obuf)
609{
610	for (;;) {
611		switch (*cp) {
612		case 0: /* Unterminated string */
613			*obuf = 0;
614			return (NULL);
615		case '"': /* Ending quote */
616			*obuf = 0;
617			return (++cp);
618		case '\\':
619			if (*++cp == 0) {
620				*obuf = 0;
621				return (NULL);
622			}
623			/* FALLTHROUGH */
624		default:
625			*obuf++ = *cp++;
626		}
627	}
628}
629
630/* Http auth challenge schemes */
631typedef enum {HTTPAS_UNKNOWN, HTTPAS_BASIC,HTTPAS_DIGEST} http_auth_schemes_t;
632
633/* Data holder for a Basic or Digest challenge. */
634typedef struct {
635	http_auth_schemes_t scheme;
636	char	*realm;
637	char	*qop;
638	char	*nonce;
639	char	*opaque;
640	char	*algo;
641	int	 stale;
642	int	 nc; /* Nonce count */
643} http_auth_challenge_t;
644
645static void
646init_http_auth_challenge(http_auth_challenge_t *b)
647{
648	b->scheme = HTTPAS_UNKNOWN;
649	b->realm = b->qop = b->nonce = b->opaque = b->algo = NULL;
650	b->stale = b->nc = 0;
651}
652
653static void
654clean_http_auth_challenge(http_auth_challenge_t *b)
655{
656	if (b->realm)
657		free(b->realm);
658	if (b->qop)
659		free(b->qop);
660	if (b->nonce)
661		free(b->nonce);
662	if (b->opaque)
663		free(b->opaque);
664	if (b->algo)
665		free(b->algo);
666	init_http_auth_challenge(b);
667}
668
669/* Data holder for an array of challenges offered in an http response. */
670#define MAX_CHALLENGES 10
671typedef struct {
672	http_auth_challenge_t *challenges[MAX_CHALLENGES];
673	int	count; /* Number of parsed challenges in the array */
674	int	valid; /* We did parse an authenticate header */
675} http_auth_challenges_t;
676
677static void
678init_http_auth_challenges(http_auth_challenges_t *cs)
679{
680	int i;
681	for (i = 0; i < MAX_CHALLENGES; i++)
682		cs->challenges[i] = NULL;
683	cs->count = cs->valid = 0;
684}
685
686static void
687clean_http_auth_challenges(http_auth_challenges_t *cs)
688{
689	int i;
690	/* We rely on non-zero pointers being allocated, not on the count */
691	for (i = 0; i < MAX_CHALLENGES; i++) {
692		if (cs->challenges[i] != NULL) {
693			clean_http_auth_challenge(cs->challenges[i]);
694			free(cs->challenges[i]);
695		}
696	}
697	init_http_auth_challenges(cs);
698}
699
700/*
701 * Enumeration for lexical elements. Separators will be returned as their own
702 * ascii value
703 */
704typedef enum {HTTPHL_WORD=256, HTTPHL_STRING=257, HTTPHL_END=258,
705	      HTTPHL_ERROR = 259} http_header_lex_t;
706
707/*
708 * Determine what kind of token comes next and return possible value
709 * in buf, which is supposed to have been allocated big enough by
710 * caller. Advance input pointer and return element type.
711 */
712static int
713http_header_lex(const char **cpp, char *buf)
714{
715	size_t l;
716	/* Eat initial whitespace */
717	*cpp += strspn(*cpp, " \t");
718	if (**cpp == 0)
719		return (HTTPHL_END);
720
721	/* Separator ? */
722	if (**cpp == ',' || **cpp == '=')
723		return (*((*cpp)++));
724
725	/* String ? */
726	if (**cpp == '"') {
727		*cpp = http_parse_headerstring(++*cpp, buf);
728		if (*cpp == NULL)
729			return (HTTPHL_ERROR);
730		return (HTTPHL_STRING);
731	}
732
733	/* Read other token, until separator or whitespace */
734	l = strcspn(*cpp, " \t,=");
735	memcpy(buf, *cpp, l);
736	buf[l] = 0;
737	*cpp += l;
738	return (HTTPHL_WORD);
739}
740
741/*
742 * Read challenges from http xxx-authenticate header and accumulate them
743 * in the challenges list structure.
744 *
745 * Headers with multiple challenges are specified by rfc2617, but
746 * servers (ie: squid) often send them in separate headers instead,
747 * which in turn is forbidden by the http spec (multiple headers with
748 * the same name are only allowed for pure comma-separated lists, see
749 * rfc2616 sec 4.2).
750 *
751 * We support both approaches anyway
752 */
753static int
754http_parse_authenticate(const char *cp, http_auth_challenges_t *cs)
755{
756	int ret = -1;
757	http_header_lex_t lex;
758	char *key = malloc(strlen(cp) + 1);
759	char *value = malloc(strlen(cp) + 1);
760	char *buf = malloc(strlen(cp) + 1);
761
762	if (key == NULL || value == NULL || buf == NULL) {
763		fetch_syserr();
764		goto out;
765	}
766
767	/* In any case we've seen the header and we set the valid bit */
768	cs->valid = 1;
769
770	/* Need word first */
771	lex = http_header_lex(&cp, key);
772	if (lex != HTTPHL_WORD)
773		goto out;
774
775	/* Loop on challenges */
776	for (; cs->count < MAX_CHALLENGES; cs->count++) {
777		cs->challenges[cs->count] =
778			malloc(sizeof(http_auth_challenge_t));
779		if (cs->challenges[cs->count] == NULL) {
780			fetch_syserr();
781			goto out;
782		}
783		init_http_auth_challenge(cs->challenges[cs->count]);
784		if (strcasecmp(key, "basic") == 0) {
785			cs->challenges[cs->count]->scheme = HTTPAS_BASIC;
786		} else if (strcasecmp(key, "digest") == 0) {
787			cs->challenges[cs->count]->scheme = HTTPAS_DIGEST;
788		} else {
789			cs->challenges[cs->count]->scheme = HTTPAS_UNKNOWN;
790			/*
791			 * Continue parsing as basic or digest may
792			 * follow, and the syntax is the same for
793			 * all. We'll just ignore this one when
794			 * looking at the list
795			 */
796		}
797
798		/* Loop on attributes */
799		for (;;) {
800			/* Key */
801			lex = http_header_lex(&cp, key);
802			if (lex != HTTPHL_WORD)
803				goto out;
804
805			/* Equal sign */
806			lex = http_header_lex(&cp, buf);
807			if (lex != '=')
808				goto out;
809
810			/* Value */
811			lex = http_header_lex(&cp, value);
812			if (lex != HTTPHL_WORD && lex != HTTPHL_STRING)
813				goto out;
814
815			if (strcasecmp(key, "realm") == 0) {
816				cs->challenges[cs->count]->realm =
817				    strdup(value);
818			} else if (strcasecmp(key, "qop") == 0) {
819				cs->challenges[cs->count]->qop =
820				    strdup(value);
821			} else if (strcasecmp(key, "nonce") == 0) {
822				cs->challenges[cs->count]->nonce =
823				    strdup(value);
824			} else if (strcasecmp(key, "opaque") == 0) {
825				cs->challenges[cs->count]->opaque =
826				    strdup(value);
827			} else if (strcasecmp(key, "algorithm") == 0) {
828				cs->challenges[cs->count]->algo =
829				    strdup(value);
830			} else if (strcasecmp(key, "stale") == 0) {
831				cs->challenges[cs->count]->stale =
832				    strcasecmp(value, "no");
833			} else {
834				/* ignore unknown attributes */
835			}
836
837			/* Comma or Next challenge or End */
838			lex = http_header_lex(&cp, key);
839			/*
840			 * If we get a word here, this is the beginning of the
841			 * next challenge. Break the attributes loop
842			 */
843			if (lex == HTTPHL_WORD)
844				break;
845
846			if (lex == HTTPHL_END) {
847				/* End while looking for ',' is normal exit */
848				cs->count++;
849				ret = 0;
850				goto out;
851			}
852			/* Anything else is an error */
853			if (lex != ',')
854				goto out;
855
856		} /* End attributes loop */
857	} /* End challenge loop */
858
859	/*
860	 * Challenges max count exceeded. This really can't happen
861	 * with normal data, something's fishy -> error
862	 */
863
864out:
865	if (key)
866		free(key);
867	if (value)
868		free(value);
869	if (buf)
870		free(buf);
871	return (ret);
872}
873
874
875/*
876 * Parse a last-modified header
877 */
878static int
879http_parse_mtime(const char *p, time_t *mtime)
880{
881	char locale[64], *r;
882	struct tm tm;
883
884	strlcpy(locale, setlocale(LC_TIME, NULL), sizeof(locale));
885	setlocale(LC_TIME, "C");
886	r = strptime(p, "%a, %d %b %Y %H:%M:%S GMT", &tm);
887	/*
888	 * Some proxies use UTC in response, but it should still be
889	 * parsed. RFC2616 states GMT and UTC are exactly equal for HTTP.
890	 */
891	if (r == NULL)
892		r = strptime(p, "%a, %d %b %Y %H:%M:%S UTC", &tm);
893	/* XXX should add support for date-2 and date-3 */
894	setlocale(LC_TIME, locale);
895	if (r == NULL)
896		return (-1);
897	DEBUGF("last modified: [%04d-%02d-%02d %02d:%02d:%02d]\n",
898	    tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday,
899	    tm.tm_hour, tm.tm_min, tm.tm_sec);
900	*mtime = timegm(&tm);
901	return (0);
902}
903
904/*
905 * Parse a content-length header
906 */
907static int
908http_parse_length(const char *p, off_t *length)
909{
910	off_t len;
911
912	for (len = 0; *p && isdigit((unsigned char)*p); ++p)
913		len = len * 10 + (*p - '0');
914	if (*p)
915		return (-1);
916	DEBUGF("content length: [%lld]\n", (long long)len);
917	*length = len;
918	return (0);
919}
920
921/*
922 * Parse a content-range header
923 */
924static int
925http_parse_range(const char *p, off_t *offset, off_t *length, off_t *size)
926{
927	off_t first, last, len;
928
929	if (strncasecmp(p, "bytes ", 6) != 0)
930		return (-1);
931	p += 6;
932	if (*p == '*') {
933		first = last = -1;
934		++p;
935	} else {
936		for (first = 0; *p && isdigit((unsigned char)*p); ++p)
937			first = first * 10 + *p - '0';
938		if (*p != '-')
939			return (-1);
940		for (last = 0, ++p; *p && isdigit((unsigned char)*p); ++p)
941			last = last * 10 + *p - '0';
942	}
943	if (first > last || *p != '/')
944		return (-1);
945	for (len = 0, ++p; *p && isdigit((unsigned char)*p); ++p)
946		len = len * 10 + *p - '0';
947	if (*p || len < last - first + 1)
948		return (-1);
949	if (first == -1) {
950		DEBUGF("content range: [*/%lld]\n", (long long)len);
951		*length = 0;
952	} else {
953		DEBUGF("content range: [%lld-%lld/%lld]\n",
954		    (long long)first, (long long)last, (long long)len);
955		*length = last - first + 1;
956	}
957	*offset = first;
958	*size = len;
959	return (0);
960}
961
962
963/*****************************************************************************
964 * Helper functions for authorization
965 */
966
967/*
968 * Base64 encoding
969 */
970static char *
971http_base64(const char *src)
972{
973	static const char base64[] =
974	    "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
975	    "abcdefghijklmnopqrstuvwxyz"
976	    "0123456789+/";
977	char *str, *dst;
978	size_t l;
979	int t, r;
980
981	l = strlen(src);
982	if ((str = malloc(((l + 2) / 3) * 4 + 1)) == NULL)
983		return (NULL);
984	dst = str;
985	r = 0;
986
987	while (l >= 3) {
988		t = (src[0] << 16) | (src[1] << 8) | src[2];
989		dst[0] = base64[(t >> 18) & 0x3f];
990		dst[1] = base64[(t >> 12) & 0x3f];
991		dst[2] = base64[(t >> 6) & 0x3f];
992		dst[3] = base64[(t >> 0) & 0x3f];
993		src += 3; l -= 3;
994		dst += 4; r += 4;
995	}
996
997	switch (l) {
998	case 2:
999		t = (src[0] << 16) | (src[1] << 8);
1000		dst[0] = base64[(t >> 18) & 0x3f];
1001		dst[1] = base64[(t >> 12) & 0x3f];
1002		dst[2] = base64[(t >> 6) & 0x3f];
1003		dst[3] = '=';
1004		dst += 4;
1005		r += 4;
1006		break;
1007	case 1:
1008		t = src[0] << 16;
1009		dst[0] = base64[(t >> 18) & 0x3f];
1010		dst[1] = base64[(t >> 12) & 0x3f];
1011		dst[2] = dst[3] = '=';
1012		dst += 4;
1013		r += 4;
1014		break;
1015	case 0:
1016		break;
1017	}
1018
1019	*dst = 0;
1020	return (str);
1021}
1022
1023
1024/*
1025 * Extract authorization parameters from environment value.
1026 * The value is like scheme:realm:user:pass
1027 */
1028typedef struct {
1029	char	*scheme;
1030	char	*realm;
1031	char	*user;
1032	char	*password;
1033} http_auth_params_t;
1034
1035static void
1036init_http_auth_params(http_auth_params_t *s)
1037{
1038	s->scheme = s->realm = s->user = s->password = NULL;
1039}
1040
1041static void
1042clean_http_auth_params(http_auth_params_t *s)
1043{
1044	if (s->scheme)
1045		free(s->scheme);
1046	if (s->realm)
1047		free(s->realm);
1048	if (s->user)
1049		free(s->user);
1050	if (s->password)
1051		free(s->password);
1052	init_http_auth_params(s);
1053}
1054
1055static int
1056http_authfromenv(const char *p, http_auth_params_t *parms)
1057{
1058	int ret = -1;
1059	char *v, *ve;
1060	char *str = strdup(p);
1061
1062	if (str == NULL) {
1063		fetch_syserr();
1064		return (-1);
1065	}
1066	v = str;
1067
1068	if ((ve = strchr(v, ':')) == NULL)
1069		goto out;
1070
1071	*ve = 0;
1072	if ((parms->scheme = strdup(v)) == NULL) {
1073		fetch_syserr();
1074		goto out;
1075	}
1076	v = ve + 1;
1077
1078	if ((ve = strchr(v, ':')) == NULL)
1079		goto out;
1080
1081	*ve = 0;
1082	if ((parms->realm = strdup(v)) == NULL) {
1083		fetch_syserr();
1084		goto out;
1085	}
1086	v = ve + 1;
1087
1088	if ((ve = strchr(v, ':')) == NULL)
1089		goto out;
1090
1091	*ve = 0;
1092	if ((parms->user = strdup(v)) == NULL) {
1093		fetch_syserr();
1094		goto out;
1095	}
1096	v = ve + 1;
1097
1098
1099	if ((parms->password = strdup(v)) == NULL) {
1100		fetch_syserr();
1101		goto out;
1102	}
1103	ret = 0;
1104out:
1105	if (ret == -1)
1106		clean_http_auth_params(parms);
1107	if (str)
1108		free(str);
1109	return (ret);
1110}
1111
1112
1113/*
1114 * Digest response: the code to compute the digest is taken from the
1115 * sample implementation in RFC2616
1116 */
1117#define IN const
1118#define OUT
1119
1120#define HASHLEN 16
1121typedef char HASH[HASHLEN];
1122#define HASHHEXLEN 32
1123typedef char HASHHEX[HASHHEXLEN+1];
1124
1125static const char *hexchars = "0123456789abcdef";
1126static void
1127CvtHex(IN HASH Bin, OUT HASHHEX Hex)
1128{
1129	unsigned short i;
1130	unsigned char j;
1131
1132	for (i = 0; i < HASHLEN; i++) {
1133		j = (Bin[i] >> 4) & 0xf;
1134		Hex[i*2] = hexchars[j];
1135		j = Bin[i] & 0xf;
1136		Hex[i*2+1] = hexchars[j];
1137	}
1138	Hex[HASHHEXLEN] = '\0';
1139};
1140
1141/* calculate H(A1) as per spec */
1142static void
1143DigestCalcHA1(
1144	IN char * pszAlg,
1145	IN char * pszUserName,
1146	IN char * pszRealm,
1147	IN char * pszPassword,
1148	IN char * pszNonce,
1149	IN char * pszCNonce,
1150	OUT HASHHEX SessionKey
1151	)
1152{
1153	MD5_CTX Md5Ctx;
1154	HASH HA1;
1155
1156	MD5Init(&Md5Ctx);
1157	MD5Update(&Md5Ctx, pszUserName, strlen(pszUserName));
1158	MD5Update(&Md5Ctx, ":", 1);
1159	MD5Update(&Md5Ctx, pszRealm, strlen(pszRealm));
1160	MD5Update(&Md5Ctx, ":", 1);
1161	MD5Update(&Md5Ctx, pszPassword, strlen(pszPassword));
1162	MD5Final(HA1, &Md5Ctx);
1163	if (strcasecmp(pszAlg, "md5-sess") == 0) {
1164
1165		MD5Init(&Md5Ctx);
1166		MD5Update(&Md5Ctx, HA1, HASHLEN);
1167		MD5Update(&Md5Ctx, ":", 1);
1168		MD5Update(&Md5Ctx, pszNonce, strlen(pszNonce));
1169		MD5Update(&Md5Ctx, ":", 1);
1170		MD5Update(&Md5Ctx, pszCNonce, strlen(pszCNonce));
1171		MD5Final(HA1, &Md5Ctx);
1172	}
1173	CvtHex(HA1, SessionKey);
1174}
1175
1176/* calculate request-digest/response-digest as per HTTP Digest spec */
1177static void
1178DigestCalcResponse(
1179	IN HASHHEX HA1,           /* H(A1) */
1180	IN char * pszNonce,       /* nonce from server */
1181	IN char * pszNonceCount,  /* 8 hex digits */
1182	IN char * pszCNonce,      /* client nonce */
1183	IN char * pszQop,         /* qop-value: "", "auth", "auth-int" */
1184	IN char * pszMethod,      /* method from the request */
1185	IN char * pszDigestUri,   /* requested URL */
1186	IN HASHHEX HEntity,       /* H(entity body) if qop="auth-int" */
1187	OUT HASHHEX Response      /* request-digest or response-digest */
1188	)
1189{
1190#if 0
1191	DEBUGF("Calc: HA1[%s] Nonce[%s] qop[%s] method[%s] URI[%s]\n",
1192	    HA1, pszNonce, pszQop, pszMethod, pszDigestUri);
1193#endif
1194	MD5_CTX Md5Ctx;
1195	HASH HA2;
1196	HASH RespHash;
1197	HASHHEX HA2Hex;
1198
1199	// calculate H(A2)
1200	MD5Init(&Md5Ctx);
1201	MD5Update(&Md5Ctx, pszMethod, strlen(pszMethod));
1202	MD5Update(&Md5Ctx, ":", 1);
1203	MD5Update(&Md5Ctx, pszDigestUri, strlen(pszDigestUri));
1204	if (strcasecmp(pszQop, "auth-int") == 0) {
1205		MD5Update(&Md5Ctx, ":", 1);
1206		MD5Update(&Md5Ctx, HEntity, HASHHEXLEN);
1207	}
1208	MD5Final(HA2, &Md5Ctx);
1209	CvtHex(HA2, HA2Hex);
1210
1211	// calculate response
1212	MD5Init(&Md5Ctx);
1213	MD5Update(&Md5Ctx, HA1, HASHHEXLEN);
1214	MD5Update(&Md5Ctx, ":", 1);
1215	MD5Update(&Md5Ctx, pszNonce, strlen(pszNonce));
1216	MD5Update(&Md5Ctx, ":", 1);
1217	if (*pszQop) {
1218		MD5Update(&Md5Ctx, pszNonceCount, strlen(pszNonceCount));
1219		MD5Update(&Md5Ctx, ":", 1);
1220		MD5Update(&Md5Ctx, pszCNonce, strlen(pszCNonce));
1221		MD5Update(&Md5Ctx, ":", 1);
1222		MD5Update(&Md5Ctx, pszQop, strlen(pszQop));
1223		MD5Update(&Md5Ctx, ":", 1);
1224	}
1225	MD5Update(&Md5Ctx, HA2Hex, HASHHEXLEN);
1226	MD5Final(RespHash, &Md5Ctx);
1227	CvtHex(RespHash, Response);
1228}
1229
1230/*
1231 * Generate/Send a Digest authorization header
1232 * This looks like: [Proxy-]Authorization: credentials
1233 *
1234 *  credentials      = "Digest" digest-response
1235 *  digest-response  = 1#( username | realm | nonce | digest-uri
1236 *                      | response | [ algorithm ] | [cnonce] |
1237 *                      [opaque] | [message-qop] |
1238 *                          [nonce-count]  | [auth-param] )
1239 *  username         = "username" "=" username-value
1240 *  username-value   = quoted-string
1241 *  digest-uri       = "uri" "=" digest-uri-value
1242 *  digest-uri-value = request-uri   ; As specified by HTTP/1.1
1243 *  message-qop      = "qop" "=" qop-value
1244 *  cnonce           = "cnonce" "=" cnonce-value
1245 *  cnonce-value     = nonce-value
1246 *  nonce-count      = "nc" "=" nc-value
1247 *  nc-value         = 8LHEX
1248 *  response         = "response" "=" request-digest
1249 *  request-digest = <"> 32LHEX <">
1250 */
1251static int
1252http_digest_auth(conn_t *conn, const char *hdr, http_auth_challenge_t *c,
1253		 http_auth_params_t *parms, struct url *url)
1254{
1255	int r;
1256	char noncecount[10];
1257	char cnonce[40];
1258	char *options = NULL;
1259
1260	if (!c->realm || !c->nonce) {
1261		DEBUGF("realm/nonce not set in challenge\n");
1262		return(-1);
1263	}
1264	if (!c->algo)
1265		c->algo = strdup("");
1266
1267	if (asprintf(&options, "%s%s%s%s",
1268	    *c->algo? ",algorithm=" : "", c->algo,
1269	    c->opaque? ",opaque=" : "", c->opaque?c->opaque:"") < 0)
1270		return (-1);
1271
1272	if (!c->qop) {
1273		c->qop = strdup("");
1274		*noncecount = 0;
1275		*cnonce = 0;
1276	} else {
1277		c->nc++;
1278		sprintf(noncecount, "%08x", c->nc);
1279		/* We don't try very hard with the cnonce ... */
1280		sprintf(cnonce, "%x%lx", getpid(), (unsigned long)time(0));
1281	}
1282
1283	HASHHEX HA1;
1284	DigestCalcHA1(c->algo, parms->user, c->realm,
1285		      parms->password, c->nonce, cnonce, HA1);
1286	DEBUGF("HA1: [%s]\n", HA1);
1287	HASHHEX digest;
1288	DigestCalcResponse(HA1, c->nonce, noncecount, cnonce, c->qop,
1289			   "GET", url->doc, "", digest);
1290
1291	if (c->qop[0]) {
1292		r = http_cmd(conn, "%s: Digest username=\"%s\",realm=\"%s\","
1293			     "nonce=\"%s\",uri=\"%s\",response=\"%s\","
1294			     "qop=\"auth\", cnonce=\"%s\", nc=%s%s",
1295			     hdr, parms->user, c->realm,
1296			     c->nonce, url->doc, digest,
1297			     cnonce, noncecount, options);
1298	} else {
1299		r = http_cmd(conn, "%s: Digest username=\"%s\",realm=\"%s\","
1300			     "nonce=\"%s\",uri=\"%s\",response=\"%s\"%s",
1301			     hdr, parms->user, c->realm,
1302			     c->nonce, url->doc, digest, options);
1303	}
1304	if (options)
1305		free(options);
1306	return (r);
1307}
1308
1309/*
1310 * Encode username and password
1311 */
1312static int
1313http_basic_auth(conn_t *conn, const char *hdr, const char *usr, const char *pwd)
1314{
1315	char *upw, *auth;
1316	int r;
1317
1318	DEBUGF("basic: usr: [%s]\n", usr);
1319	DEBUGF("basic: pwd: [%s]\n", pwd);
1320	if (asprintf(&upw, "%s:%s", usr, pwd) == -1)
1321		return (-1);
1322	auth = http_base64(upw);
1323	free(upw);
1324	if (auth == NULL)
1325		return (-1);
1326	r = http_cmd(conn, "%s: Basic %s", hdr, auth);
1327	free(auth);
1328	return (r);
1329}
1330
1331/*
1332 * Chose the challenge to answer and call the appropriate routine to
1333 * produce the header.
1334 */
1335static int
1336http_authorize(conn_t *conn, const char *hdr, http_auth_challenges_t *cs,
1337	       http_auth_params_t *parms, struct url *url)
1338{
1339	http_auth_challenge_t *digest = NULL;
1340	int i;
1341
1342	/* If user or pass are null we're not happy */
1343	if (!parms->user || !parms->password) {
1344		DEBUGF("NULL usr or pass\n");
1345		return (-1);
1346	}
1347
1348	/* Look for a Digest */
1349	for (i = 0; i < cs->count; i++) {
1350		if (cs->challenges[i]->scheme == HTTPAS_DIGEST)
1351			digest = cs->challenges[i];
1352	}
1353
1354	/* Error if "Digest" was specified and there is no Digest challenge */
1355	if (!digest &&
1356	    (parms->scheme && strcasecmp(parms->scheme, "digest") == 0)) {
1357		DEBUGF("Digest auth in env, not supported by peer\n");
1358		return (-1);
1359	}
1360	/*
1361	 * If "basic" was specified in the environment, or there is no Digest
1362	 * challenge, do the basic thing. Don't need a challenge for this,
1363	 * so no need to check basic!=NULL
1364	 */
1365	if (!digest ||
1366	    (parms->scheme && strcasecmp(parms->scheme, "basic") == 0))
1367		return (http_basic_auth(conn,hdr,parms->user,parms->password));
1368
1369	/* Else, prefer digest. We just checked that it's not NULL */
1370	return (http_digest_auth(conn, hdr, digest, parms, url));
1371}
1372
1373/*****************************************************************************
1374 * Helper functions for connecting to a server or proxy
1375 */
1376
1377/*
1378 * Connect to the correct HTTP server or proxy.
1379 */
1380static conn_t *
1381http_connect(struct url *URL, struct url *purl, const char *flags)
1382{
1383	struct url *curl;
1384	conn_t *conn;
1385	hdr_t h;
1386	http_headerbuf_t headerbuf;
1387	const char *p;
1388	int verbose;
1389	int af, val;
1390	int serrno;
1391	bool isproxyauth = false;
1392	http_auth_challenges_t proxy_challenges;
1393
1394#ifdef INET6
1395	af = AF_UNSPEC;
1396#else
1397	af = AF_INET;
1398#endif
1399
1400	verbose = CHECK_FLAG('v');
1401	if (CHECK_FLAG('4'))
1402		af = AF_INET;
1403#ifdef INET6
1404	else if (CHECK_FLAG('6'))
1405		af = AF_INET6;
1406#endif
1407
1408	curl = (purl != NULL) ? purl : URL;
1409
1410retry:
1411	if ((conn = fetch_connect(curl->host, curl->port, af, verbose)) == NULL)
1412		/* fetch_connect() has already set an error code */
1413		return (NULL);
1414	init_http_headerbuf(&headerbuf);
1415	if (strcmp(URL->scheme, SCHEME_HTTPS) == 0 && purl) {
1416		init_http_auth_challenges(&proxy_challenges);
1417		http_cmd(conn, "CONNECT %s:%d HTTP/1.1", URL->host, URL->port);
1418		http_cmd(conn, "Host: %s:%d", URL->host, URL->port);
1419		if (isproxyauth) {
1420			http_auth_params_t aparams;
1421			init_http_auth_params(&aparams);
1422			if (*purl->user || *purl->pwd) {
1423				aparams.user = strdup(purl->user);
1424				aparams.password = strdup(purl->pwd);
1425			} else if ((p = getenv("HTTP_PROXY_AUTH")) != NULL &&
1426				    *p != '\0') {
1427				if (http_authfromenv(p, &aparams) < 0) {
1428					http_seterr(HTTP_NEED_PROXY_AUTH);
1429					fetch_syserr();
1430					goto ouch;
1431				}
1432			} else if (fetch_netrc_auth(purl) == 0) {
1433				aparams.user = strdup(purl->user);
1434				aparams.password = strdup(purl->pwd);
1435			} else {
1436				/*
1437				 * No auth information found in system - exiting
1438				 * with warning.
1439				 */
1440				warnx("Missing username and/or password set");
1441				fetch_syserr();
1442				goto ouch;
1443			}
1444			http_authorize(conn, "Proxy-Authorization",
1445			    &proxy_challenges, &aparams, purl);
1446			clean_http_auth_params(&aparams);
1447		}
1448		http_cmd(conn, "");
1449		/* Get reply from CONNECT Tunnel attempt */
1450		int httpreply = http_get_reply(conn);
1451		if (httpreply != HTTP_OK) {
1452			http_seterr(httpreply);
1453			/* If the error is a 407/HTTP_NEED_PROXY_AUTH */
1454			if (httpreply == HTTP_NEED_PROXY_AUTH &&
1455			    ! isproxyauth) {
1456				/* Try again with authentication. */
1457				clean_http_headerbuf(&headerbuf);
1458				fetch_close(conn);
1459				isproxyauth = true;
1460				goto retry;
1461			}
1462			goto ouch;
1463		}
1464		/* Read and discard the rest of the proxy response */
1465		if (fetch_getln(conn) < 0) {
1466			fetch_syserr();
1467			goto ouch;
1468		}
1469		do {
1470			switch ((h = http_next_header(conn, &headerbuf, &p))) {
1471			case hdr_syserror:
1472				fetch_syserr();
1473				goto ouch;
1474			case hdr_error:
1475				http_seterr(HTTP_PROTOCOL_ERROR);
1476				goto ouch;
1477			default:
1478				/* ignore */ ;
1479			}
1480		} while (h > hdr_end);
1481	}
1482	if (strcmp(URL->scheme, SCHEME_HTTPS) == 0 &&
1483	    fetch_ssl(conn, URL, verbose) == -1) {
1484		/* grrr */
1485		errno = EAUTH;
1486		fetch_syserr();
1487		goto ouch;
1488	}
1489
1490	val = 1;
1491	setsockopt(conn->sd, IPPROTO_TCP, TCP_NOPUSH, &val, sizeof(val));
1492
1493	clean_http_headerbuf(&headerbuf);
1494	return (conn);
1495ouch:
1496	serrno = errno;
1497	clean_http_headerbuf(&headerbuf);
1498	fetch_close(conn);
1499	errno = serrno;
1500	return (NULL);
1501}
1502
1503static struct url *
1504http_get_proxy(struct url * url, const char *flags)
1505{
1506	struct url *purl;
1507	char *p;
1508
1509	if (flags != NULL && strchr(flags, 'd') != NULL)
1510		return (NULL);
1511	if (fetch_no_proxy_match(url->host))
1512		return (NULL);
1513	if (((p = getenv("HTTP_PROXY")) || (p = getenv("http_proxy"))) &&
1514	    *p && (purl = fetchParseURL(p))) {
1515		if (!*purl->scheme)
1516			strcpy(purl->scheme, SCHEME_HTTP);
1517		if (!purl->port)
1518			purl->port = fetch_default_proxy_port(purl->scheme);
1519		if (strcmp(purl->scheme, SCHEME_HTTP) == 0)
1520			return (purl);
1521		fetchFreeURL(purl);
1522	}
1523	return (NULL);
1524}
1525
1526static void
1527http_print_html(FILE *out, FILE *in)
1528{
1529	size_t len;
1530	char *line, *p, *q;
1531	int comment, tag;
1532
1533	comment = tag = 0;
1534	while ((line = fgetln(in, &len)) != NULL) {
1535		while (len && isspace((unsigned char)line[len - 1]))
1536			--len;
1537		for (p = q = line; q < line + len; ++q) {
1538			if (comment && *q == '-') {
1539				if (q + 2 < line + len &&
1540				    strcmp(q, "-->") == 0) {
1541					tag = comment = 0;
1542					q += 2;
1543				}
1544			} else if (tag && !comment && *q == '>') {
1545				p = q + 1;
1546				tag = 0;
1547			} else if (!tag && *q == '<') {
1548				if (q > p)
1549					fwrite(p, q - p, 1, out);
1550				tag = 1;
1551				if (q + 3 < line + len &&
1552				    strcmp(q, "<!--") == 0) {
1553					comment = 1;
1554					q += 3;
1555				}
1556			}
1557		}
1558		if (!tag && q > p)
1559			fwrite(p, q - p, 1, out);
1560		fputc('\n', out);
1561	}
1562}
1563
1564
1565/*****************************************************************************
1566 * Core
1567 */
1568
1569FILE *
1570http_request(struct url *URL, const char *op, struct url_stat *us,
1571	struct url *purl, const char *flags)
1572{
1573
1574	return (http_request_body(URL, op, us, purl, flags, NULL, NULL));
1575}
1576
1577/*
1578 * Send a request and process the reply
1579 *
1580 * XXX This function is way too long, the do..while loop should be split
1581 * XXX off into a separate function.
1582 */
1583FILE *
1584http_request_body(struct url *URL, const char *op, struct url_stat *us,
1585	struct url *purl, const char *flags, const char *content_type,
1586	const char *body)
1587{
1588	char timebuf[80];
1589	char hbuf[MAXHOSTNAMELEN + 7], *host;
1590	conn_t *conn;
1591	struct url *url, *new;
1592	int chunked, direct, ims, noredirect, verbose;
1593	int e, i, n, val;
1594	off_t offset, clength, length, size;
1595	time_t mtime;
1596	const char *p;
1597	FILE *f;
1598	hdr_t h;
1599	struct tm *timestruct;
1600	http_headerbuf_t headerbuf;
1601	http_auth_challenges_t server_challenges;
1602	http_auth_challenges_t proxy_challenges;
1603	size_t body_len;
1604
1605	/* The following calls don't allocate anything */
1606	init_http_headerbuf(&headerbuf);
1607	init_http_auth_challenges(&server_challenges);
1608	init_http_auth_challenges(&proxy_challenges);
1609
1610	direct = CHECK_FLAG('d');
1611	noredirect = CHECK_FLAG('A');
1612	verbose = CHECK_FLAG('v');
1613	ims = CHECK_FLAG('i');
1614
1615	if (direct && purl) {
1616		fetchFreeURL(purl);
1617		purl = NULL;
1618	}
1619
1620	/* try the provided URL first */
1621	url = URL;
1622
1623	n = MAX_REDIRECT;
1624	i = 0;
1625
1626	e = HTTP_PROTOCOL_ERROR;
1627	do {
1628		new = NULL;
1629		chunked = 0;
1630		offset = 0;
1631		clength = -1;
1632		length = -1;
1633		size = -1;
1634		mtime = 0;
1635
1636		/* check port */
1637		if (!url->port)
1638			url->port = fetch_default_port(url->scheme);
1639
1640		/* were we redirected to an FTP URL? */
1641		if (purl == NULL && strcmp(url->scheme, SCHEME_FTP) == 0) {
1642			if (strcmp(op, "GET") == 0)
1643				return (ftp_request(url, "RETR", us, purl, flags));
1644			else if (strcmp(op, "HEAD") == 0)
1645				return (ftp_request(url, "STAT", us, purl, flags));
1646		}
1647
1648		/* connect to server or proxy */
1649		if ((conn = http_connect(url, purl, flags)) == NULL)
1650			goto ouch;
1651
1652		/* append port number only if necessary */
1653		host = url->host;
1654		if (url->port != fetch_default_port(url->scheme)) {
1655			snprintf(hbuf, sizeof(hbuf), "%s:%d", host, url->port);
1656			host = hbuf;
1657		}
1658
1659		/* send request */
1660		if (verbose)
1661			fetch_info("requesting %s://%s%s",
1662			    url->scheme, host, url->doc);
1663		if (purl && strcmp(URL->scheme, SCHEME_HTTPS) != 0) {
1664			http_cmd(conn, "%s %s://%s%s HTTP/1.1",
1665			    op, url->scheme, host, url->doc);
1666		} else {
1667			http_cmd(conn, "%s %s HTTP/1.1",
1668			    op, url->doc);
1669		}
1670
1671		if (ims && url->ims_time) {
1672			timestruct = gmtime((time_t *)&url->ims_time);
1673			(void)strftime(timebuf, 80, "%a, %d %b %Y %T GMT",
1674			    timestruct);
1675			if (verbose)
1676				fetch_info("If-Modified-Since: %s", timebuf);
1677			http_cmd(conn, "If-Modified-Since: %s", timebuf);
1678		}
1679		/* virtual host */
1680		http_cmd(conn, "Host: %s", host);
1681
1682		/*
1683		 * Proxy authorization: we only send auth after we received
1684		 * a 407 error. We do not first try basic anyway (changed
1685		 * when support was added for digest-auth)
1686		 */
1687		if (purl && proxy_challenges.valid) {
1688			http_auth_params_t aparams;
1689			init_http_auth_params(&aparams);
1690			if (*purl->user || *purl->pwd) {
1691				aparams.user = strdup(purl->user);
1692				aparams.password = strdup(purl->pwd);
1693			} else if ((p = getenv("HTTP_PROXY_AUTH")) != NULL &&
1694				   *p != '\0') {
1695				if (http_authfromenv(p, &aparams) < 0) {
1696					http_seterr(HTTP_NEED_PROXY_AUTH);
1697					goto ouch;
1698				}
1699			} else if (fetch_netrc_auth(purl) == 0) {
1700				aparams.user = strdup(purl->user);
1701				aparams.password = strdup(purl->pwd);
1702			}
1703			http_authorize(conn, "Proxy-Authorization",
1704				       &proxy_challenges, &aparams, url);
1705			clean_http_auth_params(&aparams);
1706		}
1707
1708		/*
1709		 * Server authorization: we never send "a priori"
1710		 * Basic auth, which used to be done if user/pass were
1711		 * set in the url. This would be weird because we'd send the
1712		 * password in the clear even if Digest is finally to be
1713		 * used (it would have made more sense for the
1714		 * pre-digest version to do this when Basic was specified
1715		 * in the environment)
1716		 */
1717		if (server_challenges.valid) {
1718			http_auth_params_t aparams;
1719			init_http_auth_params(&aparams);
1720			if (*url->user || *url->pwd) {
1721				aparams.user = strdup(url->user);
1722				aparams.password = strdup(url->pwd);
1723			} else if ((p = getenv("HTTP_AUTH")) != NULL &&
1724				   *p != '\0') {
1725				if (http_authfromenv(p, &aparams) < 0) {
1726					http_seterr(HTTP_NEED_AUTH);
1727					goto ouch;
1728				}
1729			} else if (fetch_netrc_auth(url) == 0) {
1730				aparams.user = strdup(url->user);
1731				aparams.password = strdup(url->pwd);
1732			} else if (fetchAuthMethod &&
1733				   fetchAuthMethod(url) == 0) {
1734				aparams.user = strdup(url->user);
1735				aparams.password = strdup(url->pwd);
1736			} else {
1737				http_seterr(HTTP_NEED_AUTH);
1738				goto ouch;
1739			}
1740			http_authorize(conn, "Authorization",
1741				       &server_challenges, &aparams, url);
1742			clean_http_auth_params(&aparams);
1743		}
1744
1745		/* other headers */
1746		if ((p = getenv("HTTP_ACCEPT")) != NULL) {
1747			if (*p != '\0')
1748				http_cmd(conn, "Accept: %s", p);
1749		} else {
1750			http_cmd(conn, "Accept: */*");
1751		}
1752		if ((p = getenv("HTTP_REFERER")) != NULL && *p != '\0') {
1753			if (strcasecmp(p, "auto") == 0)
1754				http_cmd(conn, "Referer: %s://%s%s",
1755				    url->scheme, host, url->doc);
1756			else
1757				http_cmd(conn, "Referer: %s", p);
1758		}
1759		if ((p = getenv("HTTP_USER_AGENT")) != NULL) {
1760			/* no User-Agent if defined but empty */
1761			if  (*p != '\0')
1762				http_cmd(conn, "User-Agent: %s", p);
1763		} else {
1764			/* default User-Agent */
1765			http_cmd(conn, "User-Agent: %s " _LIBFETCH_VER,
1766			    getprogname());
1767		}
1768		if (url->offset > 0)
1769			http_cmd(conn, "Range: bytes=%lld-", (long long)url->offset);
1770		http_cmd(conn, "Connection: close");
1771
1772		if (body) {
1773			body_len = strlen(body);
1774			http_cmd(conn, "Content-Length: %zu", body_len);
1775			if (content_type != NULL)
1776				http_cmd(conn, "Content-Type: %s", content_type);
1777		}
1778
1779		http_cmd(conn, "");
1780
1781		if (body)
1782			fetch_write(conn, body, body_len);
1783
1784		/*
1785		 * Force the queued request to be dispatched.  Normally, one
1786		 * would do this with shutdown(2) but squid proxies can be
1787		 * configured to disallow such half-closed connections.  To
1788		 * be compatible with such configurations, fiddle with socket
1789		 * options to force the pending data to be written.
1790		 */
1791		val = 0;
1792		setsockopt(conn->sd, IPPROTO_TCP, TCP_NOPUSH, &val,
1793			   sizeof(val));
1794		val = 1;
1795		setsockopt(conn->sd, IPPROTO_TCP, TCP_NODELAY, &val,
1796			   sizeof(val));
1797
1798		/* get reply */
1799		switch (http_get_reply(conn)) {
1800		case HTTP_OK:
1801		case HTTP_PARTIAL:
1802		case HTTP_NOT_MODIFIED:
1803			/* fine */
1804			break;
1805		case HTTP_MOVED_PERM:
1806		case HTTP_MOVED_TEMP:
1807		case HTTP_TEMP_REDIRECT:
1808		case HTTP_PERM_REDIRECT:
1809		case HTTP_SEE_OTHER:
1810		case HTTP_USE_PROXY:
1811			/*
1812			 * Not so fine, but we still have to read the
1813			 * headers to get the new location.
1814			 */
1815			break;
1816		case HTTP_NEED_AUTH:
1817			if (server_challenges.valid) {
1818				/*
1819				 * We already sent out authorization code,
1820				 * so there's nothing more we can do.
1821				 */
1822				http_seterr(conn->err);
1823				goto ouch;
1824			}
1825			/* try again, but send the password this time */
1826			if (verbose)
1827				fetch_info("server requires authorization");
1828			break;
1829		case HTTP_NEED_PROXY_AUTH:
1830			if (proxy_challenges.valid) {
1831				/*
1832				 * We already sent our proxy
1833				 * authorization code, so there's
1834				 * nothing more we can do. */
1835				http_seterr(conn->err);
1836				goto ouch;
1837			}
1838			/* try again, but send the password this time */
1839			if (verbose)
1840				fetch_info("proxy requires authorization");
1841			break;
1842		case HTTP_BAD_RANGE:
1843			/*
1844			 * This can happen if we ask for 0 bytes because
1845			 * we already have the whole file.  Consider this
1846			 * a success for now, and check sizes later.
1847			 */
1848			break;
1849		case HTTP_PROTOCOL_ERROR:
1850			/* fall through */
1851		case -1:
1852			fetch_syserr();
1853			goto ouch;
1854		default:
1855			http_seterr(conn->err);
1856			if (!verbose)
1857				goto ouch;
1858			/* fall through so we can get the full error message */
1859		}
1860
1861		/* get headers. http_next_header expects one line readahead */
1862		if (fetch_getln(conn) == -1) {
1863			fetch_syserr();
1864			goto ouch;
1865		}
1866		do {
1867			switch ((h = http_next_header(conn, &headerbuf, &p))) {
1868			case hdr_syserror:
1869				fetch_syserr();
1870				goto ouch;
1871			case hdr_error:
1872				http_seterr(HTTP_PROTOCOL_ERROR);
1873				goto ouch;
1874			case hdr_content_length:
1875				http_parse_length(p, &clength);
1876				break;
1877			case hdr_content_range:
1878				http_parse_range(p, &offset, &length, &size);
1879				break;
1880			case hdr_last_modified:
1881				http_parse_mtime(p, &mtime);
1882				break;
1883			case hdr_location:
1884				if (!HTTP_REDIRECT(conn->err))
1885					break;
1886				/*
1887				 * if the A flag is set, we don't follow
1888				 * temporary redirects.
1889				 */
1890				if (noredirect &&
1891				    conn->err != HTTP_MOVED_PERM &&
1892				    conn->err != HTTP_PERM_REDIRECT &&
1893				    conn->err != HTTP_USE_PROXY) {
1894					n = 1;
1895					break;
1896				}
1897				if (new)
1898					free(new);
1899				if (verbose)
1900					fetch_info("%d redirect to %s",
1901					    conn->err, p);
1902				if (*p == '/')
1903					/* absolute path */
1904					new = fetchMakeURL(url->scheme, url->host,
1905					    url->port, p, url->user, url->pwd);
1906				else
1907					new = fetchParseURL(p);
1908				if (new == NULL) {
1909					/* XXX should set an error code */
1910					DEBUGF("failed to parse new URL\n");
1911					goto ouch;
1912				}
1913
1914				/* Only copy credentials if the host matches */
1915				if (strcmp(new->host, url->host) == 0 &&
1916				    !*new->user && !*new->pwd) {
1917					strcpy(new->user, url->user);
1918					strcpy(new->pwd, url->pwd);
1919				}
1920				new->offset = url->offset;
1921				new->length = url->length;
1922				new->ims_time = url->ims_time;
1923				break;
1924			case hdr_transfer_encoding:
1925				/* XXX weak test*/
1926				chunked = (strcasecmp(p, "chunked") == 0);
1927				break;
1928			case hdr_www_authenticate:
1929				if (conn->err != HTTP_NEED_AUTH)
1930					break;
1931				if (http_parse_authenticate(p, &server_challenges) == 0)
1932					++n;
1933				break;
1934			case hdr_proxy_authenticate:
1935				if (conn->err != HTTP_NEED_PROXY_AUTH)
1936					break;
1937				if (http_parse_authenticate(p, &proxy_challenges) == 0)
1938					++n;
1939				break;
1940			case hdr_end:
1941				/* fall through */
1942			case hdr_unknown:
1943				/* ignore */
1944				break;
1945			}
1946		} while (h > hdr_end);
1947
1948		/* we need to provide authentication */
1949		if (conn->err == HTTP_NEED_AUTH ||
1950		    conn->err == HTTP_NEED_PROXY_AUTH) {
1951			e = conn->err;
1952			if ((conn->err == HTTP_NEED_AUTH &&
1953			     !server_challenges.valid) ||
1954			    (conn->err == HTTP_NEED_PROXY_AUTH &&
1955			     !proxy_challenges.valid)) {
1956				/* 401/7 but no www/proxy-authenticate ?? */
1957				DEBUGF("%03d without auth header\n", conn->err);
1958				goto ouch;
1959			}
1960			fetch_close(conn);
1961			conn = NULL;
1962			continue;
1963		}
1964
1965		/* requested range not satisfiable */
1966		if (conn->err == HTTP_BAD_RANGE) {
1967			if (url->offset > 0 && url->length == 0) {
1968				/* asked for 0 bytes; fake it */
1969				offset = url->offset;
1970				clength = -1;
1971				conn->err = HTTP_OK;
1972				break;
1973			} else {
1974				http_seterr(conn->err);
1975				goto ouch;
1976			}
1977		}
1978
1979		/* we have a hit or an error */
1980		if (conn->err == HTTP_OK
1981		    || conn->err == HTTP_NOT_MODIFIED
1982		    || conn->err == HTTP_PARTIAL
1983		    || HTTP_ERROR(conn->err))
1984			break;
1985
1986		/* all other cases: we got a redirect */
1987		e = conn->err;
1988		clean_http_auth_challenges(&server_challenges);
1989		fetch_close(conn);
1990		conn = NULL;
1991		if (!new) {
1992			DEBUGF("redirect with no new location\n");
1993			break;
1994		}
1995		if (url != URL)
1996			fetchFreeURL(url);
1997		url = new;
1998	} while (++i < n);
1999
2000	/* we failed, or ran out of retries */
2001	if (conn == NULL) {
2002		http_seterr(e);
2003		goto ouch;
2004	}
2005
2006	DEBUGF("offset %lld, length %lld, size %lld, clength %lld\n",
2007	    (long long)offset, (long long)length,
2008	    (long long)size, (long long)clength);
2009
2010	if (conn->err == HTTP_NOT_MODIFIED) {
2011		http_seterr(HTTP_NOT_MODIFIED);
2012		return (NULL);
2013	}
2014
2015	/* check for inconsistencies */
2016	if (clength != -1 && length != -1 && clength != length) {
2017		http_seterr(HTTP_PROTOCOL_ERROR);
2018		goto ouch;
2019	}
2020	if (clength == -1)
2021		clength = length;
2022	if (clength != -1)
2023		length = offset + clength;
2024	if (length != -1 && size != -1 && length != size) {
2025		http_seterr(HTTP_PROTOCOL_ERROR);
2026		goto ouch;
2027	}
2028	if (size == -1)
2029		size = length;
2030
2031	/* fill in stats */
2032	if (us) {
2033		us->size = size;
2034		us->atime = us->mtime = mtime;
2035	}
2036
2037	/* too far? */
2038	if (URL->offset > 0 && offset > URL->offset) {
2039		http_seterr(HTTP_PROTOCOL_ERROR);
2040		goto ouch;
2041	}
2042
2043	/* report back real offset and size */
2044	URL->offset = offset;
2045	URL->length = clength;
2046
2047	/* wrap it up in a FILE */
2048	if ((f = http_funopen(conn, chunked)) == NULL) {
2049		fetch_syserr();
2050		goto ouch;
2051	}
2052
2053	if (url != URL)
2054		fetchFreeURL(url);
2055	if (purl)
2056		fetchFreeURL(purl);
2057
2058	if (HTTP_ERROR(conn->err)) {
2059		http_print_html(stderr, f);
2060		fclose(f);
2061		f = NULL;
2062	}
2063	clean_http_headerbuf(&headerbuf);
2064	clean_http_auth_challenges(&server_challenges);
2065	clean_http_auth_challenges(&proxy_challenges);
2066	return (f);
2067
2068ouch:
2069	if (url != URL)
2070		fetchFreeURL(url);
2071	if (purl)
2072		fetchFreeURL(purl);
2073	if (conn != NULL)
2074		fetch_close(conn);
2075	clean_http_headerbuf(&headerbuf);
2076	clean_http_auth_challenges(&server_challenges);
2077	clean_http_auth_challenges(&proxy_challenges);
2078	return (NULL);
2079}
2080
2081
2082/*****************************************************************************
2083 * Entry points
2084 */
2085
2086/*
2087 * Retrieve and stat a file by HTTP
2088 */
2089FILE *
2090fetchXGetHTTP(struct url *URL, struct url_stat *us, const char *flags)
2091{
2092	return (http_request(URL, "GET", us, http_get_proxy(URL, flags), flags));
2093}
2094
2095/*
2096 * Retrieve a file by HTTP
2097 */
2098FILE *
2099fetchGetHTTP(struct url *URL, const char *flags)
2100{
2101	return (fetchXGetHTTP(URL, NULL, flags));
2102}
2103
2104/*
2105 * Store a file by HTTP
2106 */
2107FILE *
2108fetchPutHTTP(struct url *URL __unused, const char *flags __unused)
2109{
2110	warnx("fetchPutHTTP(): not implemented");
2111	return (NULL);
2112}
2113
2114/*
2115 * Get an HTTP document's metadata
2116 */
2117int
2118fetchStatHTTP(struct url *URL, struct url_stat *us, const char *flags)
2119{
2120	FILE *f;
2121
2122	f = http_request(URL, "HEAD", us, http_get_proxy(URL, flags), flags);
2123	if (f == NULL)
2124		return (-1);
2125	fclose(f);
2126	return (0);
2127}
2128
2129/*
2130 * List a directory
2131 */
2132struct url_ent *
2133fetchListHTTP(struct url *url __unused, const char *flags __unused)
2134{
2135	warnx("fetchListHTTP(): not implemented");
2136	return (NULL);
2137}
2138
2139/*
2140 * Arbitrary HTTP verb and content requests
2141 */
2142FILE *
2143fetchReqHTTP(struct url *URL, const char *method, const char *flags,
2144	const char *content_type, const char *body)
2145{
2146
2147	return (http_request_body(URL, method, NULL, http_get_proxy(URL, flags),
2148	    flags, content_type, body));
2149}
2150