http.c revision 55544
1281642Sglebius/*-
2130365Smlaier * Copyright (c) 1998 Dag-Erling Co�dan Sm�rgrav
3130365Smlaier * All rights reserved.
4130365Smlaier *
5130365Smlaier * Redistribution and use in source and binary forms, with or without
6130365Smlaier * modification, are permitted provided that the following conditions
7130365Smlaier * are met:
8130365Smlaier * 1. Redistributions of source code must retain the above copyright
9130365Smlaier *    notice, this list of conditions and the following disclaimer
10130365Smlaier *    in this position and unchanged.
11130365Smlaier * 2. Redistributions in binary form must reproduce the above copyright
12130365Smlaier *    notice, this list of conditions and the following disclaimer in the
13130365Smlaier *    documentation and/or other materials provided with the distribution.
14130365Smlaier * 3. The name of the author may not be used to endorse or promote products
15130365Smlaier *    derived from this software without specific prior written permission
16130365Smlaier *
17130365Smlaier * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18130365Smlaier * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19130365Smlaier * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20130365Smlaier * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21130365Smlaier * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22130365Smlaier * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23130365Smlaier * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24130365Smlaier * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25281642Sglebius * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26281642Sglebius * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27281642Sglebius *
28130365Smlaier * $FreeBSD: head/lib/libfetch/http.c 55544 2000-01-07 10:59:12Z des $
29130365Smlaier */
30130365Smlaier
31130365Smlaier/*
32130365Smlaier * The base64 code in this file is based on code from MIT fetch, which
33130365Smlaier * has the following copyright and license:
34130365Smlaier *
35130365Smlaier *-
36130365Smlaier * Copyright 1997 Massachusetts Institute of Technology
37130365Smlaier *
38130365Smlaier * Permission to use, copy, modify, and distribute this software and
39130365Smlaier * its documentation for any purpose and without fee is hereby
40130365Smlaier * granted, provided that both the above copyright notice and this
41130365Smlaier * permission notice appear in all copies, that both the above
42130365Smlaier * copyright notice and this permission notice appear in all
43130365Smlaier * supporting documentation, and that the name of M.I.T. not be used
44130365Smlaier * in advertising or publicity pertaining to distribution of the
45130365Smlaier * software without specific, written prior permission.  M.I.T. makes
46130365Smlaier * no representations about the suitability of this software for any
47130365Smlaier * purpose.  It is provided "as is" without express or implied
48130365Smlaier * warranty.
49130384Smlaier *
50130384Smlaier * THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''.  M.I.T. DISCLAIMS
51130384Smlaier * ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE,
52130384Smlaier * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
53130384Smlaier * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT
54130384Smlaier * SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
55130365Smlaier * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
56130365Smlaier * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
57130365Smlaier * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
58130365Smlaier * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
59130365Smlaier * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
60130365Smlaier * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
61130365Smlaier * SUCH DAMAGE. */
62130365Smlaier
63130365Smlaier#include <sys/param.h>
64130365Smlaier
65130365Smlaier#include <err.h>
66130365Smlaier#include <ctype.h>
67130365Smlaier#include <stdarg.h>
68130365Smlaier#include <stdio.h>
69130365Smlaier#include <stdlib.h>
70130365Smlaier#include <string.h>
71130365Smlaier#include <unistd.h>
72130365Smlaier
73130365Smlaier#include "fetch.h"
74130365Smlaier#include "common.h"
75130365Smlaier#include "httperr.h"
76130365Smlaier
77130365Smlaierextern char *__progname;
78130365Smlaier
79130365Smlaier#define ENDL "\r\n"
80130365Smlaier
81130365Smlaierstruct cookie
82130365Smlaier{
83130365Smlaier    FILE *real_f;
84130365Smlaier#define ENC_NONE 0
85130365Smlaier#define ENC_CHUNKED 1
86130365Smlaier    int encoding;			/* 1 = chunked, 0 = none */
87130365Smlaier#define HTTPCTYPELEN 59
88130365Smlaier    char content_type[HTTPCTYPELEN+1];
89130365Smlaier    char *buf;
90130365Smlaier    int b_cur, eof;
91130365Smlaier    unsigned b_len, chunksize;
92130365Smlaier};
93130365Smlaier
94130365Smlaier/*
95130365Smlaier * Send a formatted line; optionally echo to terminal
96130365Smlaier */
97130365Smlaierstatic int
98130365Smlaier_http_cmd(FILE *f, char *fmt, ...)
99130365Smlaier{
100130365Smlaier    va_list ap;
101130365Smlaier
102130365Smlaier    va_start(ap, fmt);
103130365Smlaier    vfprintf(f, fmt, ap);
104130365Smlaier#ifndef NDEBUG
105130365Smlaier    fprintf(stderr, "\033[1m>>> ");
106130365Smlaier    vfprintf(stderr, fmt, ap);
107130365Smlaier    fprintf(stderr, "\033[m");
108130365Smlaier#endif
109130365Smlaier    va_end(ap);
110130365Smlaier
111130365Smlaier    return 0; /* XXX */
112130365Smlaier}
113130365Smlaier
114130365Smlaier/*
115130365Smlaier * Fill the input buffer, do chunk decoding on the fly
116130365Smlaier */
117130365Smlaierstatic char *
118130365Smlaier_http_fillbuf(struct cookie *c)
119130365Smlaier{
120130365Smlaier    char *ln;
121130365Smlaier    unsigned int len;
122130365Smlaier
123130365Smlaier    if (c->eof)
124130365Smlaier	return NULL;
125130365Smlaier
126130365Smlaier    if (c->encoding == ENC_NONE) {
127130365Smlaier	c->buf = fgetln(c->real_f, &(c->b_len));
128130365Smlaier	c->b_cur = 0;
129130365Smlaier    } else if (c->encoding == ENC_CHUNKED) {
130130365Smlaier	if (c->chunksize == 0) {
131130365Smlaier	    ln = fgetln(c->real_f, &len);
132130365Smlaier	    DEBUG(fprintf(stderr, "\033[1m_http_fillbuf(): new chunk: "
133130365Smlaier			  "%*.*s\033[m\n", (int)len-2, (int)len-2, ln));
134130365Smlaier	    sscanf(ln, "%x", &(c->chunksize));
135130365Smlaier	    if (!c->chunksize) {
136130365Smlaier		DEBUG(fprintf(stderr, "\033[1m_http_fillbuf(): "
137130365Smlaier			      "end of last chunk\033[m\n"));
138130365Smlaier		c->eof = 1;
139130365Smlaier		return NULL;
140130365Smlaier	    }
141130365Smlaier	    DEBUG(fprintf(stderr, "\033[1m_http_fillbuf(): "
142130365Smlaier			  "new chunk: %X\033[m\n", c->chunksize));
143130365Smlaier	}
144130365Smlaier	c->buf = fgetln(c->real_f, &(c->b_len));
145130365Smlaier	if (c->b_len > c->chunksize)
146130365Smlaier	    c->b_len = c->chunksize;
147130365Smlaier	c->chunksize -= c->b_len;
148130365Smlaier	c->b_cur = 0;
149130365Smlaier    }
150130365Smlaier    else return NULL; /* unknown encoding */
151130365Smlaier    return c->buf;
152130365Smlaier}
153130365Smlaier
154130365Smlaier/*
155130365Smlaier * Read function
156130365Smlaier */
157130365Smlaierstatic int
158130365Smlaier_http_readfn(struct cookie *c, char *buf, int len)
159130365Smlaier{
160130365Smlaier    int l, pos = 0;
161130365Smlaier    while (len) {
162130365Smlaier	/* empty buffer */
163130365Smlaier	if (!c->buf || (c->b_cur == c->b_len))
164130365Smlaier	    if (!_http_fillbuf(c))
165130365Smlaier		break;
166130365Smlaier
167130365Smlaier	l = c->b_len - c->b_cur;
168130365Smlaier	if (len < l) l = len;
169130365Smlaier	memcpy(buf + pos, c->buf + c->b_cur, l);
170130365Smlaier	c->b_cur += l;
171130365Smlaier	pos += l;
172130365Smlaier	len -= l;
173130365Smlaier    }
174130365Smlaier
175130508Smlaier    if (ferror(c->real_f))
176130365Smlaier	return -1;
177130508Smlaier    else return pos;
178130365Smlaier}
179130365Smlaier
180130365Smlaier/*
181130365Smlaier * Write function
182130365Smlaier */
183static int
184_http_writefn(struct cookie *c, const char *buf, int len)
185{
186    size_t r = fwrite(buf, 1, (size_t)len, c->real_f);
187    return r ? r : -1;
188}
189
190/*
191 * Close function
192 */
193static int
194_http_closefn(struct cookie *c)
195{
196    int r = fclose(c->real_f);
197    free(c);
198    return (r == EOF) ? -1 : 0;
199}
200
201/*
202 * Extract content type from cookie
203 */
204char *
205fetchContentType(FILE *f)
206{
207    /*
208     * We have no way of making sure this really *is* one of our cookies,
209     * so just check for a null pointer and hope for the best.
210     */
211    return f->_cookie ? (((struct cookie *)f->_cookie)->content_type) : NULL;
212}
213
214/*
215 * Base64 encoding
216 */
217int
218_http_base64(char *dst, char *src, int l)
219{
220    static const char base64[] =
221	"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
222	"abcdefghijklmnopqrstuvwxyz"
223	"0123456789+/";
224    int t, r = 0;
225
226    while (l >= 3) {
227	t = (src[0] << 16) | (src[1] << 8) | src[2];
228	dst[0] = base64[(t >> 18) & 0x3f];
229	dst[1] = base64[(t >> 12) & 0x3f];
230	dst[2] = base64[(t >> 6) & 0x3f];
231	dst[3] = base64[(t >> 0) & 0x3f];
232	src += 3; l -= 3;
233	dst += 4; r += 4;
234    }
235
236    switch (l) {
237    case 2:
238	t = (src[0] << 16) | (src[1] << 8);
239	dst[0] = base64[(t >> 18) & 0x3f];
240	dst[1] = base64[(t >> 12) & 0x3f];
241	dst[2] = base64[(t >> 6) & 0x3f];
242	dst[3] = '=';
243	dst += 4;
244	r += 4;
245	break;
246    case 1:
247	t = src[0] << 16;
248	dst[0] = base64[(t >> 18) & 0x3f];
249	dst[1] = base64[(t >> 12) & 0x3f];
250	dst[2] = dst[3] = '=';
251	dst += 4;
252	r += 4;
253	break;
254    case 0:
255	break;
256    }
257
258    *dst = 0;
259    return r;
260}
261
262/*
263 * Encode username and password
264 */
265char *
266_http_auth(char *usr, char *pwd)
267{
268    int len, lu, lp;
269    char *str, *s;
270
271    lu = strlen(usr);
272    lp = strlen(pwd);
273
274    len = (lu * 4 + 2) / 3	/* user name, round up */
275	+ 1			/* colon */
276	+ (lp * 4 + 2) / 3	/* password, round up */
277	+ 1;			/* null */
278
279    if ((s = str = (char *)malloc(len)) == NULL)
280	return NULL;
281
282    s += _http_base64(s, usr, lu);
283    *s++ = ':';
284    s += _http_base64(s, pwd, lp);
285    *s = 0;
286
287    return str;
288}
289
290/*
291 * Retrieve a file by HTTP
292 */
293FILE *
294fetchGetHTTP(struct url *URL, char *flags)
295{
296    int sd = -1, e, i, enc = ENC_NONE, direct, verbose;
297    struct cookie *c;
298    char *ln, *p, *px, *q;
299    FILE *f, *cf;
300    size_t len;
301
302    direct = (flags && strchr(flags, 'd'));
303    verbose = (flags && strchr(flags, 'v'));
304
305    /* allocate cookie */
306    if ((c = calloc(1, sizeof(struct cookie))) == NULL)
307	return NULL;
308
309    /* check port */
310    if (!URL->port)
311	URL->port = 80; /* default HTTP port */
312
313    /* attempt to connect to proxy server */
314    if (!direct && (px = getenv("HTTP_PROXY")) != NULL) {
315	char host[MAXHOSTNAMELEN];
316	int port = 3128; /* XXX I think 3128 is default... check? */
317
318	/* measure length */
319	len = strcspn(px, ":");
320
321	/* get port (XXX atoi is a little too tolerant perhaps?) */
322	if (px[len] == ':')
323	    port = atoi(px+len+1);
324
325	/* get host name */
326	if (len >= MAXHOSTNAMELEN)
327	    len = MAXHOSTNAMELEN - 1;
328	strncpy(host, px, len);
329	host[len] = 0;
330
331	/* connect */
332	sd = _fetch_connect(host, port, verbose);
333    }
334
335    /* if no proxy is configured or could be contacted, try direct */
336    if (sd == -1) {
337	if ((sd = _fetch_connect(URL->host, URL->port, verbose)) == -1)
338	    goto ouch;
339    }
340
341    /* reopen as stream */
342    if ((f = fdopen(sd, "r+")) == NULL)
343	goto ouch;
344    c->real_f = f;
345
346    /* send request (proxies require absolute form, so use that) */
347    if (verbose)
348	_fetch_info("requesting http://%s:%d%s",
349		    URL->host, URL->port, URL->doc);
350    _http_cmd(f, "GET http://%s:%d%s HTTP/1.1" ENDL,
351	      URL->host, URL->port, URL->doc);
352
353    /* start sending headers away */
354    if (URL->user[0] || URL->pwd[0]) {
355	char *auth_str = _http_auth(URL->user, URL->pwd);
356	if (!auth_str)
357	    goto fouch;
358	_http_cmd(f, "Authorization: Basic %s" ENDL, auth_str);
359	free(auth_str);
360    }
361    _http_cmd(f, "Host: %s:%d" ENDL, URL->host, URL->port);
362    _http_cmd(f, "User-Agent: %s " _LIBFETCH_VER ENDL, __progname);
363    _http_cmd(f, "Connection: close" ENDL ENDL);
364
365    /* get response */
366    if ((ln = fgetln(f, &len)) == NULL)
367	goto fouch;
368    DEBUG(fprintf(stderr, "response: [\033[1m%*.*s\033[m]\n",
369		  (int)len-2, (int)len-2, ln));
370
371    /* we can't use strchr() and friends since ln isn't NUL-terminated */
372    p = ln;
373    while ((p < ln + len) && !isspace(*p))
374	p++;
375    while ((p < ln + len) && !isdigit(*p))
376	p++;
377    if (!isdigit(*p))
378	goto fouch;
379    e = atoi(p);
380    DEBUG(fprintf(stderr, "code:     [\033[1m%d\033[m]\n", e));
381
382    /* add code to handle redirects later */
383    if (e != 200) {
384	_http_seterr(e);
385	goto fouch;
386    }
387
388    /* browse through header */
389    while (1) {
390	if ((ln = fgetln(f, &len)) == NULL)
391	    goto fouch;
392	if ((ln[0] == '\r') || (ln[0] == '\n'))
393	    break;
394	DEBUG(fprintf(stderr, "header:   [\033[1m%*.*s\033[m]\n",
395		      (int)len-2, (int)len-2, ln));
396#define XFERENC "Transfer-Encoding:"
397	if (strncasecmp(ln, XFERENC, sizeof(XFERENC)-1) == 0) {
398	    p = ln + sizeof(XFERENC) - 1;
399	    while ((p < ln + len) && isspace(*p))
400		p++;
401	    for (q = p; (q < ln + len) && !isspace(*q); q++)
402		/* VOID */ ;
403	    *q = 0;
404	    if (strcasecmp(p, "chunked") == 0)
405		enc = ENC_CHUNKED;
406	    DEBUG(fprintf(stderr, "xferenc:  [\033[1m%s\033[m]\n", p));
407#undef XFERENC
408#define CONTTYPE "Content-Type:"
409	} else if (strncasecmp(ln, CONTTYPE, sizeof(CONTTYPE)-1) == 0) {
410	    p = ln + sizeof(CONTTYPE) - 1;
411	    while ((p < ln + len) && isspace(*p))
412		p++;
413	    for (i = 0; p < ln + len; p++)
414		if (i < HTTPCTYPELEN)
415		    c->content_type[i++] = *p;
416	    do c->content_type[i--] = 0; while (isspace(c->content_type[i]));
417	    DEBUG(fprintf(stderr, "conttype: [\033[1m%s\033[m]\n",
418			  c->content_type));
419#undef CONTTYPE
420	}
421    }
422
423    /* only body remains */
424    c->encoding = enc;
425    cf = funopen(c,
426		 (int (*)(void *, char *, int))_http_readfn,
427		 (int (*)(void *, const char *, int))_http_writefn,
428		 (fpos_t (*)(void *, fpos_t, int))NULL,
429		 (int (*)(void *))_http_closefn);
430    if (cf == NULL)
431	goto fouch;
432    return cf;
433
434ouch:
435    if (sd >= 0)
436	close(sd);
437    free(c);
438    _http_seterr(999); /* XXX do this properly RSN */
439    return NULL;
440fouch:
441    fclose(f);
442    free(c);
443    _http_seterr(999); /* XXX do this properly RSN */
444    return NULL;
445}
446
447FILE *
448fetchPutHTTP(struct url *URL, char *flags)
449{
450    warnx("fetchPutHTTP(): not implemented");
451    return NULL;
452}
453
454/*
455 * Get an HTTP document's metadata
456 */
457int
458fetchStatHTTP(struct url *url, struct url_stat *us, char *flags)
459{
460    warnx("fetchStatHTTP(): not implemented");
461    return -1;
462}
463
464/*
465 * List a directory
466 */
467struct url_ent *
468fetchListHTTP(struct url *url, char *flags)
469{
470    warnx("fetchListHTTP(): not implemented");
471    return NULL;
472}
473