fetch.c revision 87560
181634Sbrian/*-
281634Sbrian * Copyright (c) 1998 Dag-Erling Co�dan Sm�rgrav
381634Sbrian * All rights reserved.
481634Sbrian *
581634Sbrian * Redistribution and use in source and binary forms, with or without
681634Sbrian * modification, are permitted provided that the following conditions
781634Sbrian * are met:
881634Sbrian * 1. Redistributions of source code must retain the above copyright
981634Sbrian *    notice, this list of conditions and the following disclaimer
1081634Sbrian *    in this position and unchanged.
1181634Sbrian * 2. Redistributions in binary form must reproduce the above copyright
1281634Sbrian *    notice, this list of conditions and the following disclaimer in the
1381634Sbrian *    documentation and/or other materials provided with the distribution.
1481634Sbrian * 3. The name of the author may not be used to endorse or promote products
1581634Sbrian *    derived from this software without specific prior written permission
1681634Sbrian *
1781634Sbrian * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
1881634Sbrian * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
1981634Sbrian * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
2081634Sbrian * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
2181634Sbrian * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
2281634Sbrian * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
2381634Sbrian * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
2481634Sbrian * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
2581634Sbrian * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
2681634Sbrian * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
2781634Sbrian */
2881634Sbrian
2981634Sbrian#include <sys/cdefs.h>
3081634Sbrian__FBSDID("$FreeBSD: head/lib/libfetch/fetch.c 87560 2001-12-09 15:05:19Z des $");
3196732Sbrian
3296732Sbrian#include <sys/param.h>
3396732Sbrian#include <sys/errno.h>
3496732Sbrian
3581634Sbrian#include <ctype.h>
3681634Sbrian#include <stdio.h>
3781634Sbrian#include <stdlib.h>
3881634Sbrian#include <string.h>
3981634Sbrian
4081634Sbrian#include "fetch.h"
4181634Sbrian#include "common.h"
4281634Sbrian
4381634Sbrianauth_t	 fetchAuthMethod;
4481634Sbrianint	 fetchLastErrCode;
4581634Sbrianchar	 fetchLastErrString[MAXERRSTRING];
4681634Sbrianint	 fetchTimeout;
4781634Sbrianint	 fetchRestartCalls = 1;
4881634Sbrianint	 fetchDebug;
4981634Sbrian
5081634Sbrian
5181634Sbrian/*** Local data **************************************************************/
5281634Sbrian
5381634Sbrian/*
5481634Sbrian * Error messages for parser errors
5581634Sbrian */
5681634Sbrian#define URL_MALFORMED		1
5781634Sbrian#define URL_BAD_SCHEME		2
5881634Sbrian#define URL_BAD_PORT		3
5981634Sbrianstatic struct fetcherr _url_errlist[] = {
6081634Sbrian    { URL_MALFORMED,	FETCH_URL,	"Malformed URL" },
6181634Sbrian    { URL_BAD_SCHEME,	FETCH_URL,	"Invalid URL scheme" },
6281634Sbrian    { URL_BAD_PORT,	FETCH_URL,	"Invalid server port" },
6381634Sbrian    { -1,		FETCH_UNKNOWN,	"Unknown parser error" }
6481634Sbrian};
6581634Sbrian
6681634Sbrian
6781634Sbrian/*** Public API **************************************************************/
6881634Sbrian
6981634Sbrian/*
7081634Sbrian * Select the appropriate protocol for the URL scheme, and return a
7181634Sbrian * read-only stream connected to the document referenced by the URL.
7281634Sbrian * Also fill out the struct url_stat.
7381634Sbrian */
7481634SbrianFILE *
7581634SbrianfetchXGet(struct url *URL, struct url_stat *us, const char *flags)
7681634Sbrian{
7781634Sbrian    int direct;
7881634Sbrian
7981634Sbrian    direct = CHECK_FLAG('d');
8081634Sbrian    if (strcasecmp(URL->scheme, SCHEME_FILE) == 0)
8181634Sbrian	return fetchXGetFile(URL, us, flags);
8281634Sbrian    else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0)
8381634Sbrian	return fetchXGetHTTP(URL, us, flags);
8481634Sbrian    else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0) {
8581634Sbrian	return fetchXGetFTP(URL, us, flags);
8681634Sbrian    } else {
8781634Sbrian	_url_seterr(URL_BAD_SCHEME);
8881634Sbrian	return NULL;
8981634Sbrian    }
9081634Sbrian}
9181634Sbrian
9281634Sbrian/*
9381634Sbrian * Select the appropriate protocol for the URL scheme, and return a
9481634Sbrian * read-only stream connected to the document referenced by the URL.
9581634Sbrian */
9681634SbrianFILE *
9781634SbrianfetchGet(struct url *URL, const char *flags)
9881634Sbrian{
9981634Sbrian    return fetchXGet(URL, NULL, flags);
10081634Sbrian}
10181634Sbrian
10281634Sbrian/*
10381634Sbrian * Select the appropriate protocol for the URL scheme, and return a
10481634Sbrian * write-only stream connected to the document referenced by the URL.
10581634Sbrian */
10681634SbrianFILE *
10781634SbrianfetchPut(struct url *URL, const char *flags)
10881634Sbrian{
10981634Sbrian    int direct;
11081634Sbrian
11181634Sbrian    direct = CHECK_FLAG('d');
11281634Sbrian    if (strcasecmp(URL->scheme, SCHEME_FILE) == 0)
11381634Sbrian	return fetchPutFile(URL, flags);
11481634Sbrian    else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0)
11581634Sbrian	return fetchPutHTTP(URL, flags);
11681634Sbrian    else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0) {
11781634Sbrian	return fetchPutFTP(URL, flags);
11881634Sbrian    } else {
11981634Sbrian	_url_seterr(URL_BAD_SCHEME);
12081634Sbrian	return NULL;
12181634Sbrian    }
12281634Sbrian}
12381634Sbrian
12481634Sbrian/*
12581634Sbrian * Select the appropriate protocol for the URL scheme, and return the
12681634Sbrian * size of the document referenced by the URL if it exists.
12781634Sbrian */
12881634Sbrianint
12981634SbrianfetchStat(struct url *URL, struct url_stat *us, const char *flags)
13081634Sbrian{
13181634Sbrian    int direct;
13281634Sbrian
13381634Sbrian    direct = CHECK_FLAG('d');
13481634Sbrian    if (strcasecmp(URL->scheme, SCHEME_FILE) == 0)
13581634Sbrian	return fetchStatFile(URL, us, flags);
13681634Sbrian    else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0)
13781634Sbrian	return fetchStatHTTP(URL, us, flags);
13881634Sbrian    else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0) {
13981634Sbrian	return fetchStatFTP(URL, us, flags);
14081634Sbrian    } else {
14181634Sbrian	_url_seterr(URL_BAD_SCHEME);
14281634Sbrian	return -1;
14381634Sbrian    }
14481634Sbrian}
14581634Sbrian
14681634Sbrian/*
14781634Sbrian * Select the appropriate protocol for the URL scheme, and return a
14881634Sbrian * list of files in the directory pointed to by the URL.
14981634Sbrian */
15081634Sbrianstruct url_ent *
15181634SbrianfetchList(struct url *URL, const char *flags)
15281634Sbrian{
15381634Sbrian    int direct;
15481634Sbrian
15581634Sbrian    direct = CHECK_FLAG('d');
15681634Sbrian    if (strcasecmp(URL->scheme, SCHEME_FILE) == 0)
15781634Sbrian	return fetchListFile(URL, flags);
15881634Sbrian    else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0)
15981634Sbrian	return fetchListHTTP(URL, flags);
16081634Sbrian    else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0) {
16181634Sbrian	return fetchListFTP(URL, flags);
16281634Sbrian    } else {
16381634Sbrian	_url_seterr(URL_BAD_SCHEME);
16481634Sbrian	return NULL;
16581634Sbrian    }
166102558Sbrian}
167102558Sbrian
168102558Sbrian/*
169102558Sbrian * Attempt to parse the given URL; if successful, call fetchXGet().
170102558Sbrian */
171102558SbrianFILE *
17281634SbrianfetchXGetURL(const char *URL, struct url_stat *us, const char *flags)
17381634Sbrian{
17481634Sbrian    struct url *u;
17581634Sbrian    FILE *f;
17681634Sbrian
17781634Sbrian    if ((u = fetchParseURL(URL)) == NULL)
178102558Sbrian	return NULL;
17981634Sbrian
18081634Sbrian    f = fetchXGet(u, us, flags);
18181634Sbrian
18281634Sbrian    fetchFreeURL(u);
18381634Sbrian    return f;
18481634Sbrian}
18581634Sbrian
18681634Sbrian/*
18781634Sbrian * Attempt to parse the given URL; if successful, call fetchGet().
18881634Sbrian */
18981739SbrianFILE *
19081739SbrianfetchGetURL(const char *URL, const char *flags)
19181739Sbrian{
19281739Sbrian    return fetchXGetURL(URL, NULL, flags);
19381739Sbrian}
19481739Sbrian
19581634Sbrian/*
19681634Sbrian * Attempt to parse the given URL; if successful, call fetchPut().
19781634Sbrian */
19881634SbrianFILE *
19981634SbrianfetchPutURL(const char *URL, const char *flags)
20081634Sbrian{
20181634Sbrian    struct url *u;
20281634Sbrian    FILE *f;
20381634Sbrian
20481634Sbrian    if ((u = fetchParseURL(URL)) == NULL)
20581634Sbrian	return NULL;
20681634Sbrian
20781634Sbrian    f = fetchPut(u, flags);
20881634Sbrian
20981634Sbrian    fetchFreeURL(u);
21081634Sbrian    return f;
21181634Sbrian}
21281634Sbrian
21381634Sbrian/*
21481634Sbrian * Attempt to parse the given URL; if successful, call fetchStat().
21581634Sbrian */
21681634Sbrianint
21781634SbrianfetchStatURL(const char *URL, struct url_stat *us, const char *flags)
21881634Sbrian{
21981634Sbrian    struct url *u;
22081634Sbrian    int s;
22181634Sbrian
22281634Sbrian    if ((u = fetchParseURL(URL)) == NULL)
22381634Sbrian	return -1;
22481634Sbrian
22581634Sbrian    s = fetchStat(u, us, flags);
22681634Sbrian
22781634Sbrian    fetchFreeURL(u);
22881634Sbrian    return s;
22981634Sbrian}
23081634Sbrian
23181634Sbrian/*
23281634Sbrian * Attempt to parse the given URL; if successful, call fetchList().
23381634Sbrian */
23481634Sbrianstruct url_ent *
23581634SbrianfetchListURL(const char *URL, const char *flags)
23681634Sbrian{
23781634Sbrian    struct url *u;
23881634Sbrian    struct url_ent *ue;
23981634Sbrian
24081634Sbrian    if ((u = fetchParseURL(URL)) == NULL)
24181634Sbrian	return NULL;
24281634Sbrian
24381634Sbrian    ue = fetchList(u, flags);
24481634Sbrian
24581634Sbrian    fetchFreeURL(u);
24681634Sbrian    return ue;
24781634Sbrian}
24881634Sbrian
24981634Sbrian/*
25081634Sbrian * Make a URL
25181634Sbrian */
25281634Sbrianstruct url *
25381634SbrianfetchMakeURL(const char *scheme, const char *host, int port, const char *doc,
25481634Sbrian    const char *user, const char *pwd)
25581634Sbrian{
25681634Sbrian    struct url *u;
25781634Sbrian
25881634Sbrian    if (!scheme || (!host && !doc)) {
25981634Sbrian	_url_seterr(URL_MALFORMED);
26081634Sbrian	return NULL;
26181634Sbrian    }
26281634Sbrian
26381634Sbrian    if (port < 0 || port > 65535) {
26481634Sbrian	_url_seterr(URL_BAD_PORT);
26581634Sbrian	return NULL;
26681634Sbrian    }
26781634Sbrian
26881634Sbrian    /* allocate struct url */
26981634Sbrian    if ((u = calloc(1, sizeof *u)) == NULL) {
27081634Sbrian	_fetch_syserr();
27181634Sbrian	return NULL;
27281634Sbrian    }
27381634Sbrian
27481634Sbrian    if ((u->doc = strdup(doc ? doc : "/")) == NULL) {
27581634Sbrian	_fetch_syserr();
27681634Sbrian	free(u);
27781634Sbrian	return NULL;
27881634Sbrian    }
27981634Sbrian
28081634Sbrian#define seturl(x) snprintf(u->x, sizeof u->x, "%s", x)
28181634Sbrian    seturl(scheme);
28281634Sbrian    seturl(host);
28381634Sbrian    seturl(user);
28481634Sbrian    seturl(pwd);
28581634Sbrian#undef seturl
28681634Sbrian    u->port = port;
28781634Sbrian
28881634Sbrian    return u;
28981634Sbrian}
29081634Sbrian
29181634Sbrian/*
29281634Sbrian * Split an URL into components. URL syntax is:
29381634Sbrian * [method:/][/[user[:pwd]@]host[:port]/][document]
29481634Sbrian * This almost, but not quite, RFC1738 URL syntax.
29581634Sbrian */
29681634Sbrianstruct url *
29781634SbrianfetchParseURL(const char *URL)
29881634Sbrian{
29981634Sbrian    char *doc;
30081634Sbrian    const char *p, *q;
30181634Sbrian    struct url *u;
30281634Sbrian    int i;
30381634Sbrian
30481634Sbrian    /* allocate struct url */
30581634Sbrian    if ((u = calloc(1, sizeof *u)) == NULL) {
30681634Sbrian	_fetch_syserr();
30781634Sbrian	return NULL;
30881634Sbrian    }
30981634Sbrian
31081634Sbrian    /* scheme name */
31181634Sbrian    if ((p = strstr(URL, ":/"))) {
31281634Sbrian	snprintf(u->scheme, URL_SCHEMELEN+1, "%.*s", (int)(p - URL), URL);
31381634Sbrian	URL = ++p;
31481634Sbrian	/*
31581634Sbrian	 * Only one slash: no host, leave slash as part of document
31681634Sbrian	 * Two slashes: host follows, strip slashes
31781634Sbrian	 */
31881634Sbrian	if (URL[1] == '/')
31981634Sbrian	    URL = (p += 2);
32081634Sbrian    } else {
32181634Sbrian	p = URL;
32281634Sbrian    }
32381634Sbrian    if (!*URL || *URL == '/' || *URL == '.' ||
32481634Sbrian	(u->scheme[0] == '\0' &&
32581634Sbrian    	    strchr(URL, '/') == NULL && strchr(URL, ':') == NULL))
32681634Sbrian	goto nohost;
32781634Sbrian
32881634Sbrian    p = strpbrk(URL, "/@");
32981634Sbrian    if (p && *p == '@') {
33081634Sbrian	/* username */
33181634Sbrian	for (q = URL, i = 0; (*q != ':') && (*q != '@'); q++)
33281634Sbrian	    if (i < URL_USERLEN)
33381634Sbrian		u->user[i++] = *q;
33481634Sbrian
33581634Sbrian	/* password */
33681634Sbrian	if (*q == ':')
33781634Sbrian	    for (q++, i = 0; (*q != ':') && (*q != '@'); q++)
33881634Sbrian		if (i < URL_PWDLEN)
33981634Sbrian		    u->pwd[i++] = *q;
34081634Sbrian
34181634Sbrian	p++;
34281634Sbrian    } else {
34381634Sbrian	p = URL;
34481634Sbrian    }
34581634Sbrian
34681634Sbrian    /* hostname */
34781634Sbrian#ifdef INET6
34881634Sbrian    if (*p == '[' && (q = strchr(p + 1, ']')) != NULL &&
34981634Sbrian	(*++q == '\0' || *q == '/' || *q == ':')) {
35081634Sbrian	if ((i = q - p - 2) > MAXHOSTNAMELEN)
35181634Sbrian	    i = MAXHOSTNAMELEN;
35281634Sbrian	strncpy(u->host, ++p, i);
35381634Sbrian	p = q;
35481634Sbrian    } else
35581634Sbrian#endif
35681634Sbrian	for (i = 0; *p && (*p != '/') && (*p != ':'); p++)
35781634Sbrian	    if (i < MAXHOSTNAMELEN)
35881634Sbrian		u->host[i++] = *p;
35981634Sbrian
36081634Sbrian    /* port */
36181634Sbrian    if (*p == ':') {
36281634Sbrian	for (q = ++p; *q && (*q != '/'); q++)
36381634Sbrian	    if (isdigit(*q))
36481634Sbrian		u->port = u->port * 10 + (*q - '0');
36581634Sbrian	    else {
36681634Sbrian		/* invalid port */
36781634Sbrian		_url_seterr(URL_BAD_PORT);
36881634Sbrian		goto ouch;
36981634Sbrian	    }
37081634Sbrian	while (*p && (*p != '/'))
37181634Sbrian	    p++;
37281634Sbrian    }
37381634Sbrian
37481634Sbriannohost:
37581634Sbrian    /* document */
37681634Sbrian    if (!*p)
37781634Sbrian	p = "/";
37881634Sbrian
37981634Sbrian    if (strcasecmp(u->scheme, SCHEME_HTTP) == 0 ||
38081634Sbrian	strcasecmp(u->scheme, SCHEME_HTTPS) == 0) {
38181634Sbrian	const char hexnums[] = "0123456789abcdef";
38281634Sbrian
38381634Sbrian	/* percent-escape whitespace. */
38481634Sbrian	if ((doc = malloc(strlen(p) * 3 + 1)) == NULL) {
38581634Sbrian	    _fetch_syserr();
38681634Sbrian	    goto ouch;
38781634Sbrian	}
38881634Sbrian	u->doc = doc;
389102558Sbrian	while (*p != '\0') {
39081634Sbrian	    if (!isspace(*p)) {
39181634Sbrian		*doc++ = *p++;
392102558Sbrian            } else {
393102558Sbrian		*doc++ = '%';
394102558Sbrian		*doc++ = hexnums[((unsigned int)*p) >> 4];
395102558Sbrian		*doc++ = hexnums[((unsigned int)*p) & 0xf];
39681634Sbrian		p++;
39781634Sbrian            }
39881634Sbrian	}
39981634Sbrian	*doc = '\0';
40081634Sbrian    } else if ((u->doc = strdup(p)) == NULL) {
40181634Sbrian	_fetch_syserr();
40281634Sbrian	goto ouch;
40381634Sbrian    }
40481634Sbrian
40581634Sbrian    DEBUG(fprintf(stderr,
40681634Sbrian		  "scheme:   [\033[1m%s\033[m]\n"
40781634Sbrian		  "user:     [\033[1m%s\033[m]\n"
40881634Sbrian		  "password: [\033[1m%s\033[m]\n"
40981634Sbrian		  "host:     [\033[1m%s\033[m]\n"
41081634Sbrian		  "port:     [\033[1m%d\033[m]\n"
41181634Sbrian		  "document: [\033[1m%s\033[m]\n",
41281634Sbrian		  u->scheme, u->user, u->pwd,
41381634Sbrian		  u->host, u->port, u->doc));
41481634Sbrian
41581634Sbrian    return u;
41681634Sbrian
41781634Sbrianouch:
41881634Sbrian    free(u);
41981634Sbrian    return NULL;
42098243Sbrian}
42181634Sbrian
42281634Sbrian/*
42381634Sbrian * Free a URL
42481634Sbrian */
42581634Sbrianvoid
42681634SbrianfetchFreeURL(struct url *u)
42781634Sbrian{
42881634Sbrian    free(u->doc);
42981634Sbrian    free(u);
43081634Sbrian}
43181634Sbrian