http.c revision 106207
1139749Simp/*-
2113584Ssimokawa * Copyright (c) 2000 Dag-Erling Co�dan Sm�rgrav
3103285Sikob * All rights reserved.
4103285Sikob *
5103285Sikob * Redistribution and use in source and binary forms, with or without
6103285Sikob * modification, are permitted provided that the following conditions
7103285Sikob * are met:
8103285Sikob * 1. Redistributions of source code must retain the above copyright
9103285Sikob *    notice, this list of conditions and the following disclaimer
10103285Sikob *    in this position and unchanged.
11103285Sikob * 2. Redistributions in binary form must reproduce the above copyright
12103285Sikob *    notice, this list of conditions and the following disclaimer in the
13103285Sikob *    documentation and/or other materials provided with the distribution.
14103285Sikob * 3. The name of the author may not be used to endorse or promote products
15103285Sikob *    derived from this software without specific prior written permission.
16103285Sikob *
17103285Sikob * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18103285Sikob * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19103285Sikob * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20103285Sikob * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21103285Sikob * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22103285Sikob * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23103285Sikob * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24103285Sikob * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25103285Sikob * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26103285Sikob * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27103285Sikob */
28103285Sikob
29103285Sikob#include <sys/cdefs.h>
30103285Sikob__FBSDID("$FreeBSD: head/lib/libfetch/http.c 106207 2002-10-30 15:01:29Z des $");
31103285Sikob
32103285Sikob/*
33103285Sikob * The following copyright applies to the base64 code:
34103285Sikob *
35103285Sikob *-
36103285Sikob * Copyright 1997 Massachusetts Institute of Technology
37150968Sglebius *
38150968Sglebius * Permission to use, copy, modify, and distribute this software and
39103285Sikob * its documentation for any purpose and without fee is hereby
40150968Sglebius * granted, provided that both the above copyright notice and this
41103285Sikob * permission notice appear in all copies, that both the above
42103285Sikob * copyright notice and this permission notice appear in all
43103285Sikob * supporting documentation, and that the name of M.I.T. not be used
44103285Sikob * in advertising or publicity pertaining to distribution of the
45103285Sikob * software without specific, written prior permission.  M.I.T. makes
46103285Sikob * no representations about the suitability of this software for any
47103285Sikob * purpose.  It is provided "as is" without express or implied
48103285Sikob * warranty.
49103285Sikob *
50103285Sikob * THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''.  M.I.T. DISCLAIMS
51103285Sikob * ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE,
52113584Ssimokawa * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
53103285Sikob * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT
54103285Sikob * SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
55103285Sikob * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
56103285Sikob * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
57103285Sikob * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
58147256Sbrooks * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
59127468Ssimokawa * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
60127468Ssimokawa * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
61127468Ssimokawa * SUCH DAMAGE.
62127468Ssimokawa */
63127468Ssimokawa
64127468Ssimokawa#include <sys/param.h>
65103285Sikob#include <sys/socket.h>
66103285Sikob
67103285Sikob#include <ctype.h>
68103285Sikob#include <err.h>
69103285Sikob#include <errno.h>
70127468Ssimokawa#include <locale.h>
71103285Sikob#include <netdb.h>
72122161Ssimokawa#include <stdarg.h>
73111942Ssimokawa#include <stdio.h>
74103285Sikob#include <stdlib.h>
75103285Sikob#include <string.h>
76124169Ssimokawa#include <time.h>
77124169Ssimokawa#include <unistd.h>
78124169Ssimokawa
79103285Sikob#include "fetch.h"
80124169Ssimokawa#include "common.h"
81124169Ssimokawa#include "httperr.h"
82124169Ssimokawa
83103285Sikob/* Maximum number of redirects to follow */
84103285Sikob#define MAX_REDIRECT 5
85103285Sikob
86116139Ssimokawa/* Symbolic names for reply codes we care about */
87122603Ssimokawa#define HTTP_OK			200
88103285Sikob#define HTTP_PARTIAL		206
89108281Ssimokawa#define HTTP_MOVED_PERM		301
90103285Sikob#define HTTP_MOVED_TEMP		302
91103285Sikob#define HTTP_SEE_OTHER		303
92103285Sikob#define HTTP_NEED_AUTH		401
93122603Ssimokawa#define HTTP_NEED_PROXY_AUTH	407
94103285Sikob#define HTTP_PROTOCOL_ERROR	999
95103285Sikob
96116139Ssimokawa#define HTTP_REDIRECT(xyz) ((xyz) == HTTP_MOVED_PERM \
97122603Ssimokawa			    || (xyz) == HTTP_MOVED_TEMP \
98122603Ssimokawa			    || (xyz) == HTTP_SEE_OTHER)
99122603Ssimokawa
100103285Sikob#define HTTP_ERROR(xyz) ((xyz) > 400 && (xyz) < 599)
101122603Ssimokawa
102122603Ssimokawa
103122603Ssimokawa/*****************************************************************************
104122603Ssimokawa * I/O functions for decoding chunked streams
105103285Sikob */
106103285Sikob
107103285Sikobstruct httpio
108103285Sikob{
109103285Sikob	conn_t		*conn;		/* connection */
110103285Sikob	int		 chunked;	/* chunked mode */
111103285Sikob	char		*buf;		/* chunk buffer */
112103285Sikob	size_t		 bufsize;	/* size of chunk buffer */
113103285Sikob	ssize_t		 buflen;	/* amount of data currently in buffer */
114150789Sglebius	int		 bufpos;	/* current read offset in buffer */
115150789Sglebius	int		 eof;		/* end-of-file flag */
116150789Sglebius	int		 error;		/* error flag */
117103285Sikob	size_t		 chunksize;	/* remaining size of current chunk */
118103285Sikob#ifndef NDEBUG
119103285Sikob	size_t		 total;
120103285Sikob#endif
121150789Sglebius};
122150789Sglebius
123103285Sikob/*
124103285Sikob * Get next chunk header
125103285Sikob */
126121953Ssimokawastatic int
127103285Sikob_http_new_chunk(struct httpio *io)
128103285Sikob{
129103285Sikob	char *p;
130103285Sikob
131103285Sikob	if (_fetch_getln(io->conn) == -1)
132103285Sikob		return (-1);
133103285Sikob
134103285Sikob	if (io->conn->buflen < 2 || !ishexnumber(*io->conn->buf))
135103285Sikob		return (-1);
136103285Sikob
137103285Sikob	for (p = io->conn->buf; *p && !isspace(*p); ++p) {
138103285Sikob		if (*p == ';')
139108281Ssimokawa			break;
140103285Sikob		if (!ishexnumber(*p))
141103285Sikob			return (-1);
142103285Sikob		if (isdigit(*p)) {
143103285Sikob			io->chunksize = io->chunksize * 16 +
144103285Sikob			    *p - '0';
145103285Sikob		} else {
146103285Sikob			io->chunksize = io->chunksize * 16 +
147103285Sikob			    10 + tolower(*p) - 'a';
148103285Sikob		}
149147256Sbrooks	}
150103285Sikob
151147256Sbrooks#ifndef NDEBUG
152147256Sbrooks	if (fetchDebug) {
153147256Sbrooks		io->total += io->chunksize;
154109814Ssimokawa		if (io->chunksize == 0)
155103285Sikob			fprintf(stderr, "%s(): end of last chunk\n", __func__);
156103285Sikob		else
157103285Sikob			fprintf(stderr, "%s(): new chunk: %lu (%lu)\n",
158103285Sikob			    __func__, (unsigned long)io->chunksize,
159103285Sikob			    (unsigned long)io->total);
160170374Ssimokawa	}
161103285Sikob#endif
162103285Sikob
163103285Sikob	return (io->chunksize);
164103285Sikob}
165103285Sikob
166124251Ssimokawa/*
167124251Ssimokawa * Grow the input buffer to at least len bytes
168124251Ssimokawa */
169103285Sikobstatic inline int
170103285Sikob_http_growbuf(struct httpio *io, size_t len)
171103285Sikob{
172103285Sikob	char *tmp;
173103285Sikob
174103285Sikob	if (io->bufsize >= len)
175103285Sikob		return (0);
176103285Sikob
177103285Sikob	if ((tmp = realloc(io->buf, len)) == NULL)
178103285Sikob		return (-1);
179103285Sikob	io->buf = tmp;
180147256Sbrooks	io->bufsize = len;
181147256Sbrooks	return (0);
182147256Sbrooks}
183109814Ssimokawa
184147256Sbrooks/*
185109814Ssimokawa * Fill the input buffer, do chunk decoding on the fly
186109814Ssimokawa */
187109814Ssimokawastatic int
188109814Ssimokawa_http_fillbuf(struct httpio *io, size_t len)
189109814Ssimokawa{
190109814Ssimokawa	if (io->error)
191109814Ssimokawa		return (-1);
192107653Ssimokawa	if (io->eof)
193107653Ssimokawa		return (0);
194103285Sikob
195103285Sikob	if (io->chunked == 0) {
196103285Sikob		if (_http_growbuf(io, len) == -1)
197147256Sbrooks			return (-1);
198147256Sbrooks		if ((io->buflen = _fetch_read(io->conn, io->buf, len)) == -1) {
199147256Sbrooks			io->error = 1;
200147256Sbrooks			return (-1);
201147256Sbrooks		}
202103285Sikob		io->bufpos = 0;
203103285Sikob		return (io->buflen);
204127468Ssimokawa	}
205121953Ssimokawa
206122212Ssimokawa	if (io->chunksize == 0) {
207122212Ssimokawa		switch (_http_new_chunk(io)) {
208122212Ssimokawa		case -1:
209122212Ssimokawa			io->error = 1;
210103285Sikob			return (-1);
211132430Ssimokawa		case 0:
212132430Ssimokawa			io->eof = 1;
213132430Ssimokawa			return (0);
214103285Sikob		}
215103285Sikob	}
216103285Sikob
217170374Ssimokawa	if (len > io->chunksize)
218111942Ssimokawa		len = io->chunksize;
219103285Sikob	if (_http_growbuf(io, len) == -1)
220103285Sikob		return (-1);
221127468Ssimokawa	if ((io->buflen = _fetch_read(io->conn, io->buf, len)) == -1) {
222127468Ssimokawa		io->error = 1;
223127468Ssimokawa		return (-1);
224106937Ssam	}
225108712Ssimokawa	io->chunksize -= io->buflen;
226103285Sikob
227103285Sikob	if (io->chunksize == 0) {
228103285Sikob		char endl[2];
229103285Sikob
230127468Ssimokawa		if (_fetch_read(io->conn, endl, 2) != 2 ||
231151229Sglebius		    endl[0] != '\r' || endl[1] != '\n')
232129552Syar			return (-1);
233108712Ssimokawa	}
234103285Sikob
235103285Sikob	io->bufpos = 0;
236122161Ssimokawa
237103285Sikob	return (io->buflen);
238103285Sikob}
239103285Sikob
240103285Sikob/*
241103285Sikob * Read function
242103285Sikob */
243103285Sikobstatic int
244103285Sikob_http_readfn(void *v, char *buf, int len)
245147256Sbrooks{
246111942Ssimokawa	struct httpio *io = (struct httpio *)v;
247111942Ssimokawa	int l, pos;
248103285Sikob
249103285Sikob	if (io->error)
250103285Sikob		return (-1);
251103285Sikob	if (io->eof)
252103285Sikob		return (0);
253103285Sikob
254103285Sikob	for (pos = 0; len > 0; pos += l, len -= l) {
255103285Sikob		/* empty buffer */
256103285Sikob		if (!io->buf || io->bufpos == io->buflen)
257113584Ssimokawa			if (_http_fillbuf(io, len) < 1)
258113584Ssimokawa				break;
259111942Ssimokawa		l = io->buflen - io->bufpos;
260111942Ssimokawa		if (len < l)
261111942Ssimokawa			l = len;
262111942Ssimokawa		bcopy(io->buf + io->bufpos, buf + pos, l);
263111942Ssimokawa		io->bufpos += l;
264111942Ssimokawa	}
265111942Ssimokawa
266111942Ssimokawa	if (!pos && io->error)
267111942Ssimokawa		return (-1);
268111942Ssimokawa	return (pos);
269111942Ssimokawa}
270111942Ssimokawa
271111942Ssimokawa/*
272111942Ssimokawa * Write function
273103285Sikob */
274103285Sikobstatic int
275103285Sikob_http_writefn(void *v, const char *buf, int len)
276148887Srwatson{
277148887Srwatson	struct httpio *io = (struct httpio *)v;
278148887Srwatson
279103285Sikob	return (_fetch_write(io->conn, buf, len));
280148887Srwatson}
281103285Sikob
282103285Sikob/*
283103285Sikob * Close function
284103285Sikob */
285103285Sikobstatic int
286103285Sikob_http_closefn(void *v)
287147256Sbrooks{
288103285Sikob	struct httpio *io = (struct httpio *)v;
289103285Sikob	int r;
290147256Sbrooks
291147256Sbrooks	r = _fetch_close(io->conn);
292150789Sglebius	if (io->buf)
293150789Sglebius		free(io->buf);
294150789Sglebius	free(io);
295150789Sglebius	return (r);
296150789Sglebius}
297103285Sikob
298103285Sikob/*
299103285Sikob * Wrap a file descriptor up
300127468Ssimokawa */
301147256Sbrooksstatic FILE *
302127468Ssimokawa_http_funopen(conn_t *conn, int chunked)
303147256Sbrooks{
304147256Sbrooks	struct httpio *io;
305108712Ssimokawa	FILE *f;
306103285Sikob
307103285Sikob	if ((io = calloc(1, sizeof *io)) == NULL) {
308170374Ssimokawa		_fetch_syserr();
309103285Sikob		return (NULL);
310103285Sikob	}
311103285Sikob	io->conn = conn;
312103285Sikob	io->chunked = chunked;
313103285Sikob	f = funopen(io, _http_readfn, _http_writefn, NULL, _http_closefn);
314103285Sikob	if (f == NULL) {
315103285Sikob		_fetch_syserr();
316103285Sikob		free(io);
317147256Sbrooks		return (NULL);
318103285Sikob	}
319111942Ssimokawa	return (f);
320113584Ssimokawa}
321103285Sikob
322103285Sikob
323122161Ssimokawa/*****************************************************************************
324103285Sikob * Helper functions for talking to the server and parsing its replies
325103285Sikob */
326103285Sikob
327103285Sikob/* Header types */
328103285Sikobtypedef enum {
329103285Sikob	hdr_syserror = -2,
330170374Ssimokawa	hdr_error = -1,
331170374Ssimokawa	hdr_end = 0,
332170374Ssimokawa	hdr_unknown = 1,
333170374Ssimokawa	hdr_content_length,
334170374Ssimokawa	hdr_content_range,
335170374Ssimokawa	hdr_last_modified,
336103285Sikob	hdr_location,
337103285Sikob	hdr_transfer_encoding,
338112400Ssimokawa	hdr_www_authenticate
339103285Sikob} hdr_t;
340103285Sikob
341103285Sikob/* Names of interesting headers */
342103285Sikobstatic struct {
343122603Ssimokawa	hdr_t		 num;
344111942Ssimokawa	const char	*name;
345111942Ssimokawa} hdr_names[] = {
346111942Ssimokawa	{ hdr_content_length,		"Content-Length" },
347113584Ssimokawa	{ hdr_content_range,		"Content-Range" },
348111942Ssimokawa	{ hdr_last_modified,		"Last-Modified" },
349113584Ssimokawa	{ hdr_location,			"Location" },
350113584Ssimokawa	{ hdr_transfer_encoding,	"Transfer-Encoding" },
351111942Ssimokawa	{ hdr_www_authenticate,		"WWW-Authenticate" },
352111942Ssimokawa	{ hdr_unknown,			NULL },
353111942Ssimokawa};
354111942Ssimokawa
355111942Ssimokawa/*
356111942Ssimokawa * Send a formatted line; optionally echo to terminal
357111942Ssimokawa */
358111942Ssimokawastatic int
359111942Ssimokawa_http_cmd(conn_t *conn, const char *fmt, ...)
360177599Sru{
361113584Ssimokawa	va_list ap;
362177599Sru	size_t len;
363177599Sru	char *msg;
364177599Sru	int r;
365111942Ssimokawa
366111942Ssimokawa	va_start(ap, fmt);
367111942Ssimokawa	len = vasprintf(&msg, fmt, ap);
368111942Ssimokawa	va_end(ap);
369111942Ssimokawa
370111942Ssimokawa	if (msg == NULL) {
371120660Ssimokawa		errno = ENOMEM;
372111942Ssimokawa		_fetch_syserr();
373111942Ssimokawa		return (-1);
374167632Ssimokawa	}
375111942Ssimokawa
376111942Ssimokawa	r = _fetch_putln(conn, msg, len);
377103285Sikob	free(msg);
378103285Sikob
379103285Sikob	if (r == -1) {
380103285Sikob		_fetch_syserr();
381103285Sikob		return (-1);
382103285Sikob	}
383103285Sikob
384103285Sikob	return (0);
385148887Srwatson}
386148887Srwatson
387148887Srwatson/*
388148887Srwatson * Get and parse status line
389103285Sikob */
390103285Sikobstatic int
391148887Srwatson_http_get_reply(conn_t *conn)
392103285Sikob{
393103285Sikob	char *p;
394103285Sikob
395103285Sikob	if (_fetch_getln(conn) == -1)
396103285Sikob		return (-1);
397103285Sikob	/*
398103285Sikob	 * A valid status line looks like "HTTP/m.n xyz reason" where m
399103285Sikob	 * and n are the major and minor protocol version numbers and xyz
400103285Sikob	 * is the reply code.
401103285Sikob	 * Unfortunately, there are servers out there (NCSA 1.5.1, to name
402103285Sikob	 * just one) that do not send a version number, so we can't rely
403103285Sikob	 * on finding one, but if we do, insist on it being 1.0 or 1.1.
404103285Sikob	 * We don't care about the reason phrase.
405103285Sikob	 */
406103285Sikob	if (strncmp(conn->buf, "HTTP", 4) != 0)
407103285Sikob		return (HTTP_PROTOCOL_ERROR);
408103285Sikob	p = conn->buf + 4;
409103285Sikob	if (*p == '/') {
410103285Sikob		if (p[1] != '1' || p[2] != '.' || (p[3] != '0' && p[3] != '1'))
411148887Srwatson			return (HTTP_PROTOCOL_ERROR);
412148887Srwatson		p += 4;
413148887Srwatson	}
414103285Sikob	if (*p != ' ' || !isdigit(p[1]) || !isdigit(p[2]) || !isdigit(p[3]))
415148887Srwatson		return (HTTP_PROTOCOL_ERROR);
416103285Sikob
417103285Sikob	conn->err = (p[1] - '0') * 100 + (p[2] - '0') * 10 + (p[3] - '0');
418148887Srwatson	return (conn->err);
419148887Srwatson}
420148887Srwatson
421103285Sikob/*
422148887Srwatson * Check a header; if the type matches the given string, return a pointer
423103285Sikob * to the beginning of the value.
424103285Sikob */
425103285Sikobstatic const char *
426103285Sikob_http_match(const char *str, const char *hdr)
427103285Sikob{
428103285Sikob	while (*str && *hdr && tolower(*str++) == tolower(*hdr++))
429103285Sikob		/* nothing */;
430103285Sikob	if (*str || *hdr != ':')
431108712Ssimokawa		return (NULL);
432103285Sikob	while (*hdr && isspace(*++hdr))
433103285Sikob		/* nothing */;
434103285Sikob	return (hdr);
435103285Sikob}
436103285Sikob
437103285Sikob/*
438103285Sikob * Get the next header and return the appropriate symbolic code.
439103285Sikob */
440103285Sikobstatic hdr_t
441103285Sikob_http_next_header(conn_t *conn, const char **p)
442103285Sikob{
443108712Ssimokawa	int i;
444150789Sglebius
445150789Sglebius	if (_fetch_getln(conn) == -1)
446150789Sglebius		return (hdr_syserror);
447150789Sglebius	while (conn->buflen && isspace(conn->buf[conn->buflen - 1]))
448188394Sfjoe		conn->buflen--;
449150789Sglebius	conn->buf[conn->buflen] = '\0';
450150789Sglebius	if (conn->buflen == 0)
451150789Sglebius		return (hdr_end);
452150789Sglebius	/*
453150789Sglebius	 * We could check for malformed headers but we don't really care.
454150789Sglebius	 * A valid header starts with a token immediately followed by a
455150789Sglebius	 * colon; a token is any sequence of non-control, non-whitespace
456150789Sglebius	 * characters except "()<>@,;:\\\"{}".
457150789Sglebius	 */
458150789Sglebius	for (i = 0; hdr_names[i].num != hdr_unknown; i++)
459150789Sglebius		if ((*p = _http_match(hdr_names[i].name, conn->buf)) != NULL)
460150789Sglebius			return (hdr_names[i].num);
461150789Sglebius	return (hdr_unknown);
462150789Sglebius}
463150789Sglebius
464150789Sglebius/*
465150789Sglebius * Parse a last-modified header
466150789Sglebius */
467150789Sglebiusstatic int
468150789Sglebius_http_parse_mtime(const char *p, time_t *mtime)
469150789Sglebius{
470150789Sglebius	char locale[64], *r;
471127468Ssimokawa	struct tm tm;
472103285Sikob
473108712Ssimokawa	strncpy(locale, setlocale(LC_TIME, NULL), sizeof locale);
474108712Ssimokawa	setlocale(LC_TIME, "C");
475108712Ssimokawa	r = strptime(p, "%a, %d %b %Y %H:%M:%S GMT", &tm);
476108712Ssimokawa	/* XXX should add support for date-2 and date-3 */
477108712Ssimokawa	setlocale(LC_TIME, locale);
478106937Ssam	if (r == NULL)
479106937Ssam		return (-1);
480106937Ssam	DEBUG(fprintf(stderr, "last modified: [%04d-%02d-%02d "
481106937Ssam		  "%02d:%02d:%02d]\n",
482127468Ssimokawa		  tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday,
483108712Ssimokawa		  tm.tm_hour, tm.tm_min, tm.tm_sec));
484108712Ssimokawa	*mtime = timegm(&tm);
485108712Ssimokawa	return (0);
486103285Sikob}
487103285Sikob
488103285Sikob/*
489103285Sikob * Parse a content-length header
490103285Sikob */
491103285Sikobstatic int
492111942Ssimokawa_http_parse_length(const char *p, off_t *length)
493111942Ssimokawa{
494111942Ssimokawa	off_t len;
495111942Ssimokawa
496111942Ssimokawa	for (len = 0; *p && isdigit(*p); ++p)
497111942Ssimokawa		len = len * 10 + (*p - '0');
498111942Ssimokawa	if (*p)
499147256Sbrooks		return (-1);
500111942Ssimokawa	DEBUG(fprintf(stderr, "content length: [%lld]\n",
501122161Ssimokawa	    (long long)len));
502111942Ssimokawa	*length = len;
503111942Ssimokawa	return (0);
504111942Ssimokawa}
505111942Ssimokawa
506111942Ssimokawa/*
507113584Ssimokawa * Parse a content-range header
508111942Ssimokawa */
509170374Ssimokawastatic int
510111942Ssimokawa_http_parse_range(const char *p, off_t *offset, off_t *length, off_t *size)
511170374Ssimokawa{
512111942Ssimokawa	off_t first, last, len;
513113584Ssimokawa
514113584Ssimokawa	if (strncasecmp(p, "bytes ", 6) != 0)
515111942Ssimokawa		return (-1);
516111942Ssimokawa	for (first = 0, p += 6; *p && isdigit(*p); ++p)
517111942Ssimokawa		first = first * 10 + *p - '0';
518111942Ssimokawa	if (*p != '-')
519111942Ssimokawa		return (-1);
520103285Sikob	for (last = 0, ++p; *p && isdigit(*p); ++p)
521103285Sikob		last = last * 10 + *p - '0';
522103285Sikob	if (first > last || *p != '/')
523103285Sikob		return (-1);
524103285Sikob	for (len = 0, ++p; *p && isdigit(*p); ++p)
525122161Ssimokawa		len = len * 10 + *p - '0';
526103285Sikob	if (*p || len < last - first + 1)
527103285Sikob		return (-1);
528103285Sikob	DEBUG(fprintf(stderr, "content range: [%lld-%lld/%lld]\n",
529103285Sikob	    (long long)first, (long long)last, (long long)len));
530122161Ssimokawa	*offset = first;
531103285Sikob	*length = last - first + 1;
532103285Sikob	*size = len;
533103285Sikob	return (0);
534103285Sikob}
535103285Sikob
536103285Sikob
537103285Sikob/*****************************************************************************
538103285Sikob * Helper functions for authorization
539103285Sikob */
540103285Sikob
541103285Sikob/*
542103285Sikob * Base64 encoding
543103285Sikob */
544103285Sikobstatic char *
545148887Srwatson_http_base64(const char *src)
546148887Srwatson{
547148887Srwatson	static const char base64[] =
548103285Sikob	    "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
549148887Srwatson	    "abcdefghijklmnopqrstuvwxyz"
550103285Sikob	    "0123456789+/";
551103285Sikob	char *str, *dst;
552103285Sikob	size_t l;
553103285Sikob	int t, r;
554148887Srwatson
555148887Srwatson	l = strlen(src);
556148887Srwatson	if ((str = malloc(((l + 2) / 3) * 4)) == NULL)
557103285Sikob		return (NULL);
558148887Srwatson	dst = str;
559103285Sikob	r = 0;
560103285Sikob
561103285Sikob	while (l >= 3) {
562111942Ssimokawa		t = (src[0] << 16) | (src[1] << 8) | src[2];
563111942Ssimokawa		dst[0] = base64[(t >> 18) & 0x3f];
564111942Ssimokawa		dst[1] = base64[(t >> 12) & 0x3f];
565103285Sikob		dst[2] = base64[(t >> 6) & 0x3f];
566103285Sikob		dst[3] = base64[(t >> 0) & 0x3f];
567103285Sikob		src += 3; l -= 3;
568103285Sikob		dst += 4; r += 4;
569103285Sikob	}
570103285Sikob
571103285Sikob	switch (l) {
572103285Sikob	case 2:
573103285Sikob		t = (src[0] << 16) | (src[1] << 8);
574103285Sikob		dst[0] = base64[(t >> 18) & 0x3f];
575103285Sikob		dst[1] = base64[(t >> 12) & 0x3f];
576103285Sikob		dst[2] = base64[(t >> 6) & 0x3f];
577103285Sikob		dst[3] = '=';
578170374Ssimokawa		dst += 4;
579170374Ssimokawa		r += 4;
580170374Ssimokawa		break;
581111942Ssimokawa	case 1:
582111942Ssimokawa		t = src[0] << 16;
583170374Ssimokawa		dst[0] = base64[(t >> 18) & 0x3f];
584111942Ssimokawa		dst[1] = base64[(t >> 12) & 0x3f];
585170374Ssimokawa		dst[2] = dst[3] = '=';
586170374Ssimokawa		dst += 4;
587170374Ssimokawa		r += 4;
588111942Ssimokawa		break;
589170374Ssimokawa	case 0:
590170374Ssimokawa		break;
591170374Ssimokawa	}
592103285Sikob
593170374Ssimokawa	*dst = 0;
594170374Ssimokawa	return (str);
595170374Ssimokawa}
596170374Ssimokawa
597103285Sikob/*
598170374Ssimokawa * Encode username and password
599127468Ssimokawa */
600108712Ssimokawastatic int
601108712Ssimokawa_http_basic_auth(conn_t *conn, const char *hdr, const char *usr, const char *pwd)
602127468Ssimokawa{
603127468Ssimokawa	char *upw, *auth;
604108712Ssimokawa	int r;
605103285Sikob
606103285Sikob	DEBUG(fprintf(stderr, "usr: [%s]\n", usr));
607111942Ssimokawa	DEBUG(fprintf(stderr, "pwd: [%s]\n", pwd));
608120660Ssimokawa	if (asprintf(&upw, "%s:%s", usr, pwd) == -1)
609129585Sdfr		return (-1);
610113584Ssimokawa	auth = _http_base64(upw);
611103285Sikob	free(upw);
612120660Ssimokawa	if (auth == NULL)
613103285Sikob		return (-1);
614111942Ssimokawa	r = _http_cmd(conn, "%s: Basic %s", hdr, auth);
615103285Sikob	free(auth);
616103285Sikob	return (r);
617103285Sikob}
618103285Sikob
619103285Sikob/*
620103285Sikob * Send an authorization header
621111942Ssimokawa */
622103285Sikobstatic int
623103285Sikob_http_authorize(conn_t *conn, const char *hdr, const char *p)
624103285Sikob{
625103285Sikob	/* basic authorization */
626103285Sikob	if (strncasecmp(p, "basic:", 6) == 0) {
627103285Sikob		char *user, *pwd, *str;
628111942Ssimokawa		int r;
629111942Ssimokawa
630103285Sikob		/* skip realm */
631103285Sikob		for (p += 6; *p && *p != ':'; ++p)
632103285Sikob			/* nothing */ ;
633103285Sikob		if (!*p || strchr(++p, ':') == NULL)
634103285Sikob			return (-1);
635103285Sikob		if ((str = strdup(p)) == NULL)
636113584Ssimokawa			return (-1); /* XXX */
637103285Sikob		user = str;
638103285Sikob		pwd = strchr(str, ':');
639111942Ssimokawa		*pwd++ = '\0';
640111942Ssimokawa		r = _http_basic_auth(conn, hdr, user, pwd);
641103285Sikob		free(str);
642127468Ssimokawa		return (r);
643111942Ssimokawa	}
644111942Ssimokawa	return (-1);
645103285Sikob}
646103285Sikob
647147256Sbrooks
648150789Sglebius/*****************************************************************************
649170374Ssimokawa * Helper functions for connecting to a server or proxy
650111942Ssimokawa */
651111942Ssimokawa
652113584Ssimokawa/*
653111942Ssimokawa * Connect to the correct HTTP server or proxy.
654111942Ssimokawa */
655111942Ssimokawastatic conn_t *
656111942Ssimokawa_http_connect(struct url *URL, struct url *purl, const char *flags)
657119119Ssimokawa{
658113584Ssimokawa	conn_t *conn;
659113584Ssimokawa	int verbose;
660113584Ssimokawa	int af;
661113584Ssimokawa
662113584Ssimokawa#ifdef INET6
663170374Ssimokawa	af = AF_UNSPEC;
664111942Ssimokawa#else
665119119Ssimokawa	af = AF_INET;
666119119Ssimokawa#endif
667119119Ssimokawa
668119119Ssimokawa	verbose = CHECK_FLAG('v');
669119119Ssimokawa	if (CHECK_FLAG('4'))
670119119Ssimokawa		af = AF_INET;
671119119Ssimokawa#ifdef INET6
672111942Ssimokawa	else if (CHECK_FLAG('6'))
673170374Ssimokawa		af = AF_INET6;
674127468Ssimokawa#endif
675111942Ssimokawa
676111942Ssimokawa	if (purl && strcasecmp(URL->scheme, SCHEME_HTTPS) != 0) {
677132429Ssimokawa		URL = purl;
678132429Ssimokawa	} else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0) {
679132429Ssimokawa		/* can't talk http to an ftp server */
680132429Ssimokawa		/* XXX should set an error code */
681108712Ssimokawa		return (NULL);
682103285Sikob	}
683103285Sikob
684122161Ssimokawa	if ((conn = _fetch_connect(URL->host, URL->port, af, verbose)) == NULL)
685103285Sikob		/* _fetch_connect() has already set an error code */
686103285Sikob		return (NULL);
687103285Sikob	if (strcasecmp(URL->scheme, SCHEME_HTTPS) == 0 &&
688103285Sikob	    _fetch_ssl(conn, verbose) == -1) {
689103285Sikob		_fetch_close(conn);
690103285Sikob		/* grrr */
691103285Sikob		errno = EAUTH;
692103285Sikob		_fetch_syserr();
693103285Sikob		return (NULL);
694103285Sikob	}
695103285Sikob	return (conn);
696103285Sikob}
697103285Sikob
698127468Ssimokawastatic struct url *
699127468Ssimokawa_http_get_proxy(void)
700127468Ssimokawa{
701106937Ssam	struct url *purl;
702108712Ssimokawa	char *p;
703103285Sikob
704103285Sikob	if (((p = getenv("HTTP_PROXY")) || (p = getenv("http_proxy"))) &&
705111942Ssimokawa	    (purl = fetchParseURL(p))) {
706111942Ssimokawa		if (!*purl->scheme)
707103285Sikob			strcpy(purl->scheme, SCHEME_HTTP);
708103285Sikob		if (!purl->port)
709103285Sikob			purl->port = _fetch_default_proxy_port(purl->scheme);
710103285Sikob		if (strcasecmp(purl->scheme, SCHEME_HTTP) == 0)
711103285Sikob			return (purl);
712103285Sikob		fetchFreeURL(purl);
713103285Sikob	}
714103285Sikob	return (NULL);
715103285Sikob}
716103285Sikob
717103285Sikobstatic void
718103285Sikob_http_print_html(FILE *out, FILE *in)
719103285Sikob{
720103285Sikob	size_t len;
721103285Sikob	char *line, *p, *q;
722121953Ssimokawa	int comment, tag;
723103285Sikob
724103285Sikob	comment = tag = 0;
725103285Sikob	while ((line = fgetln(in, &len)) != NULL) {
726103285Sikob		while (len && isspace(line[len - 1]))
727103285Sikob			--len;
728127468Ssimokawa		for (p = q = line; q < line + len; ++q) {
729127468Ssimokawa			if (comment && *q == '-') {
730127468Ssimokawa				if (q + 2 < line + len &&
731113506Smdodd				    strcmp(q, "-->") == 0) {
732113506Smdodd					tag = comment = 0;
733113506Smdodd					q += 2;
734				}
735			} else if (tag && !comment && *q == '>') {
736				p = q + 1;
737				tag = 0;
738			} else if (!tag && *q == '<') {
739				if (q > p)
740					fwrite(p, q - p, 1, out);
741				tag = 1;
742				if (q + 3 < line + len &&
743				    strcmp(q, "<!--") == 0) {
744					comment = 1;
745					q += 3;
746				}
747			}
748		}
749		if (!tag && q > p)
750			fwrite(p, q - p, 1, out);
751		fputc('\n', out);
752	}
753}
754
755
756/*****************************************************************************
757 * Core
758 */
759
760/*
761 * Send a request and process the reply
762 *
763 * XXX This function is way too long, the do..while loop should be split
764 * XXX off into a separate function.
765 */
766FILE *
767_http_request(struct url *URL, const char *op, struct url_stat *us,
768    struct url *purl, const char *flags)
769{
770	conn_t *conn;
771	struct url *url, *new;
772	int chunked, direct, need_auth, noredirect, verbose;
773	int e, i, n;
774	off_t offset, clength, length, size;
775	time_t mtime;
776	const char *p;
777	FILE *f;
778	hdr_t h;
779	char *host;
780#ifdef INET6
781	char hbuf[MAXHOSTNAMELEN + 1];
782#endif
783
784	direct = CHECK_FLAG('d');
785	noredirect = CHECK_FLAG('A');
786	verbose = CHECK_FLAG('v');
787
788	if (direct && purl) {
789		fetchFreeURL(purl);
790		purl = NULL;
791	}
792
793	/* try the provided URL first */
794	url = URL;
795
796	/* if the A flag is set, we only get one try */
797	n = noredirect ? 1 : MAX_REDIRECT;
798	i = 0;
799
800	e = HTTP_PROTOCOL_ERROR;
801	need_auth = 0;
802	do {
803		new = NULL;
804		chunked = 0;
805		offset = 0;
806		clength = -1;
807		length = -1;
808		size = -1;
809		mtime = 0;
810
811		/* check port */
812		if (!url->port)
813			url->port = _fetch_default_port(url->scheme);
814
815		/* were we redirected to an FTP URL? */
816		if (purl == NULL && strcmp(url->scheme, SCHEME_FTP) == 0) {
817			if (strcmp(op, "GET") == 0)
818				return (_ftp_request(url, "RETR", us, purl, flags));
819			else if (strcmp(op, "HEAD") == 0)
820				return (_ftp_request(url, "STAT", us, purl, flags));
821		}
822
823		/* connect to server or proxy */
824		if ((conn = _http_connect(url, purl, flags)) == NULL)
825			goto ouch;
826
827		host = url->host;
828#ifdef INET6
829		if (strchr(url->host, ':')) {
830			snprintf(hbuf, sizeof(hbuf), "[%s]", url->host);
831			host = hbuf;
832		}
833#endif
834
835		/* send request */
836		if (verbose)
837			_fetch_info("requesting %s://%s:%d%s",
838			    url->scheme, host, url->port, url->doc);
839		if (purl) {
840			_http_cmd(conn, "%s %s://%s:%d%s HTTP/1.1",
841			    op, url->scheme, host, url->port, url->doc);
842		} else {
843			_http_cmd(conn, "%s %s HTTP/1.1",
844			    op, url->doc);
845		}
846
847		/* virtual host */
848		if (url->port == _fetch_default_port(url->scheme))
849			_http_cmd(conn, "Host: %s", host);
850		else
851			_http_cmd(conn, "Host: %s:%d", host, url->port);
852
853		/* proxy authorization */
854		if (purl) {
855			if (*purl->user || *purl->pwd)
856				_http_basic_auth(conn, "Proxy-Authorization",
857				    purl->user, purl->pwd);
858			else if ((p = getenv("HTTP_PROXY_AUTH")) != NULL && *p != '\0')
859				_http_authorize(conn, "Proxy-Authorization", p);
860		}
861
862		/* server authorization */
863		if (need_auth || *url->user || *url->pwd) {
864			if (*url->user || *url->pwd)
865				_http_basic_auth(conn, "Authorization", url->user, url->pwd);
866			else if ((p = getenv("HTTP_AUTH")) != NULL && *p != '\0')
867				_http_authorize(conn, "Authorization", p);
868			else if (fetchAuthMethod && fetchAuthMethod(url) == 0) {
869				_http_basic_auth(conn, "Authorization", url->user, url->pwd);
870			} else {
871				_http_seterr(HTTP_NEED_AUTH);
872				goto ouch;
873			}
874		}
875
876		/* other headers */
877		if ((p = getenv("HTTP_USER_AGENT")) != NULL && *p != '\0')
878			_http_cmd(conn, "User-Agent: %s", p);
879		else
880			_http_cmd(conn, "User-Agent: %s " _LIBFETCH_VER, getprogname());
881		if (url->offset)
882			_http_cmd(conn, "Range: bytes=%lld-", (long long)url->offset);
883		_http_cmd(conn, "Connection: close");
884		_http_cmd(conn, "");
885
886		/* get reply */
887		switch (_http_get_reply(conn)) {
888		case HTTP_OK:
889		case HTTP_PARTIAL:
890			/* fine */
891			break;
892		case HTTP_MOVED_PERM:
893		case HTTP_MOVED_TEMP:
894		case HTTP_SEE_OTHER:
895			/*
896			 * Not so fine, but we still have to read the headers to
897			 * get the new location.
898			 */
899			break;
900		case HTTP_NEED_AUTH:
901			if (need_auth) {
902				/*
903				 * We already sent out authorization code, so there's
904				 * nothing more we can do.
905				 */
906				_http_seterr(conn->err);
907				goto ouch;
908			}
909			/* try again, but send the password this time */
910			if (verbose)
911				_fetch_info("server requires authorization");
912			break;
913		case HTTP_NEED_PROXY_AUTH:
914			/*
915			 * If we're talking to a proxy, we already sent our proxy
916			 * authorization code, so there's nothing more we can do.
917			 */
918			_http_seterr(conn->err);
919			goto ouch;
920		case HTTP_PROTOCOL_ERROR:
921			/* fall through */
922		case -1:
923			_fetch_syserr();
924			goto ouch;
925		default:
926			_http_seterr(conn->err);
927			if (!verbose)
928				goto ouch;
929			/* fall through so we can get the full error message */
930		}
931
932		/* get headers */
933		do {
934			switch ((h = _http_next_header(conn, &p))) {
935			case hdr_syserror:
936				_fetch_syserr();
937				goto ouch;
938			case hdr_error:
939				_http_seterr(HTTP_PROTOCOL_ERROR);
940				goto ouch;
941			case hdr_content_length:
942				_http_parse_length(p, &clength);
943				break;
944			case hdr_content_range:
945				_http_parse_range(p, &offset, &length, &size);
946				break;
947			case hdr_last_modified:
948				_http_parse_mtime(p, &mtime);
949				break;
950			case hdr_location:
951				if (!HTTP_REDIRECT(conn->err))
952					break;
953				if (new)
954					free(new);
955				if (verbose)
956					_fetch_info("%d redirect to %s", conn->err, p);
957				if (*p == '/')
958					/* absolute path */
959					new = fetchMakeURL(url->scheme, url->host, url->port, p,
960					    url->user, url->pwd);
961				else
962					new = fetchParseURL(p);
963				if (new == NULL) {
964					/* XXX should set an error code */
965					DEBUG(fprintf(stderr, "failed to parse new URL\n"));
966					goto ouch;
967				}
968				if (!*new->user && !*new->pwd) {
969					strcpy(new->user, url->user);
970					strcpy(new->pwd, url->pwd);
971				}
972				new->offset = url->offset;
973				new->length = url->length;
974				break;
975			case hdr_transfer_encoding:
976				/* XXX weak test*/
977				chunked = (strcasecmp(p, "chunked") == 0);
978				break;
979			case hdr_www_authenticate:
980				if (conn->err != HTTP_NEED_AUTH)
981					break;
982				/* if we were smarter, we'd check the method and realm */
983				break;
984			case hdr_end:
985				/* fall through */
986			case hdr_unknown:
987				/* ignore */
988				break;
989			}
990		} while (h > hdr_end);
991
992		/* we need to provide authentication */
993		if (conn->err == HTTP_NEED_AUTH) {
994			e = conn->err;
995			need_auth = 1;
996			_fetch_close(conn);
997			conn = NULL;
998			continue;
999		}
1000
1001		/* we have a hit or an error */
1002		if (conn->err == HTTP_OK || conn->err == HTTP_PARTIAL || HTTP_ERROR(conn->err))
1003			break;
1004
1005		/* all other cases: we got a redirect */
1006		e = conn->err;
1007		need_auth = 0;
1008		_fetch_close(conn);
1009		conn = NULL;
1010		if (!new) {
1011			DEBUG(fprintf(stderr, "redirect with no new location\n"));
1012			break;
1013		}
1014		if (url != URL)
1015			fetchFreeURL(url);
1016		url = new;
1017	} while (++i < n);
1018
1019	/* we failed, or ran out of retries */
1020	if (conn == NULL) {
1021		_http_seterr(e);
1022		goto ouch;
1023	}
1024
1025	DEBUG(fprintf(stderr, "offset %lld, length %lld,"
1026		  " size %lld, clength %lld\n",
1027		  (long long)offset, (long long)length,
1028		  (long long)size, (long long)clength));
1029
1030	/* check for inconsistencies */
1031	if (clength != -1 && length != -1 && clength != length) {
1032		_http_seterr(HTTP_PROTOCOL_ERROR);
1033		goto ouch;
1034	}
1035	if (clength == -1)
1036		clength = length;
1037	if (clength != -1)
1038		length = offset + clength;
1039	if (length != -1 && size != -1 && length != size) {
1040		_http_seterr(HTTP_PROTOCOL_ERROR);
1041		goto ouch;
1042	}
1043	if (size == -1)
1044		size = length;
1045
1046	/* fill in stats */
1047	if (us) {
1048		us->size = size;
1049		us->atime = us->mtime = mtime;
1050	}
1051
1052	/* too far? */
1053	if (offset > URL->offset) {
1054		_http_seterr(HTTP_PROTOCOL_ERROR);
1055		goto ouch;
1056	}
1057
1058	/* report back real offset and size */
1059	URL->offset = offset;
1060	URL->length = clength;
1061
1062	/* wrap it up in a FILE */
1063	if ((f = _http_funopen(conn, chunked)) == NULL) {
1064		_fetch_syserr();
1065		goto ouch;
1066	}
1067
1068	if (url != URL)
1069		fetchFreeURL(url);
1070	if (purl)
1071		fetchFreeURL(purl);
1072
1073	if (HTTP_ERROR(conn->err)) {
1074		_http_print_html(stderr, f);
1075		fclose(f);
1076		f = NULL;
1077	}
1078
1079	return (f);
1080
1081ouch:
1082	if (url != URL)
1083		fetchFreeURL(url);
1084	if (purl)
1085		fetchFreeURL(purl);
1086	if (conn != NULL)
1087		_fetch_close(conn);
1088	return (NULL);
1089}
1090
1091
1092/*****************************************************************************
1093 * Entry points
1094 */
1095
1096/*
1097 * Retrieve and stat a file by HTTP
1098 */
1099FILE *
1100fetchXGetHTTP(struct url *URL, struct url_stat *us, const char *flags)
1101{
1102	return (_http_request(URL, "GET", us, _http_get_proxy(), flags));
1103}
1104
1105/*
1106 * Retrieve a file by HTTP
1107 */
1108FILE *
1109fetchGetHTTP(struct url *URL, const char *flags)
1110{
1111	return (fetchXGetHTTP(URL, NULL, flags));
1112}
1113
1114/*
1115 * Store a file by HTTP
1116 */
1117FILE *
1118fetchPutHTTP(struct url *URL __unused, const char *flags __unused)
1119{
1120	warnx("fetchPutHTTP(): not implemented");
1121	return (NULL);
1122}
1123
1124/*
1125 * Get an HTTP document's metadata
1126 */
1127int
1128fetchStatHTTP(struct url *URL, struct url_stat *us, const char *flags)
1129{
1130	FILE *f;
1131
1132	if ((f = _http_request(URL, "HEAD", us, _http_get_proxy(), flags)) == NULL)
1133		return (-1);
1134	fclose(f);
1135	return (0);
1136}
1137
1138/*
1139 * List a directory
1140 */
1141struct url_ent *
1142fetchListHTTP(struct url *url __unused, const char *flags __unused)
1143{
1144	warnx("fetchListHTTP(): not implemented");
1145	return (NULL);
1146}
1147