fetch.c revision 109967
137535Sdes/*-
237535Sdes * Copyright (c) 1998 Dag-Erling Co�dan Sm�rgrav
337535Sdes * All rights reserved.
437535Sdes *
537535Sdes * Redistribution and use in source and binary forms, with or without
637535Sdes * modification, are permitted provided that the following conditions
737535Sdes * are met:
837535Sdes * 1. Redistributions of source code must retain the above copyright
937535Sdes *    notice, this list of conditions and the following disclaimer
1037535Sdes *    in this position and unchanged.
1137535Sdes * 2. Redistributions in binary form must reproduce the above copyright
1237535Sdes *    notice, this list of conditions and the following disclaimer in the
1337535Sdes *    documentation and/or other materials provided with the distribution.
1437535Sdes * 3. The name of the author may not be used to endorse or promote products
1537535Sdes *    derived from this software without specific prior written permission
1637535Sdes *
1737535Sdes * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
1837535Sdes * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
1937535Sdes * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
2037535Sdes * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
2137535Sdes * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
2237535Sdes * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
2337535Sdes * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
2437535Sdes * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
2537535Sdes * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
2637535Sdes * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
2737535Sdes */
2837535Sdes
2984203Sdillon#include <sys/cdefs.h>
3084203Sdillon__FBSDID("$FreeBSD: head/lib/libfetch/fetch.c 109967 2003-01-28 08:04:40Z des $");
3184203Sdillon
3237535Sdes#include <sys/param.h>
3340975Sdes#include <sys/errno.h>
3437535Sdes
3537535Sdes#include <ctype.h>
3637535Sdes#include <stdio.h>
3737535Sdes#include <stdlib.h>
3837535Sdes#include <string.h>
3937535Sdes
4037535Sdes#include "fetch.h"
4140975Sdes#include "common.h"
4237535Sdes
4377238Sdesauth_t	 fetchAuthMethod;
4460924Sdesint	 fetchLastErrCode;
4560924Sdeschar	 fetchLastErrString[MAXERRSTRING];
4660924Sdesint	 fetchTimeout;
4763334Sdesint	 fetchRestartCalls = 1;
4887560Sdesint	 fetchDebug;
4941862Sdes
5041862Sdes
5140975Sdes/*** Local data **************************************************************/
5237535Sdes
5340939Sdes/*
5440975Sdes * Error messages for parser errors
5540975Sdes */
5640975Sdes#define URL_MALFORMED		1
5740975Sdes#define URL_BAD_SCHEME		2
5840975Sdes#define URL_BAD_PORT		3
5940975Sdesstatic struct fetcherr _url_errlist[] = {
6090267Sdes	{ URL_MALFORMED,	FETCH_URL,	"Malformed URL" },
6190267Sdes	{ URL_BAD_SCHEME,	FETCH_URL,	"Invalid URL scheme" },
6290267Sdes	{ URL_BAD_PORT,		FETCH_URL,	"Invalid server port" },
6390267Sdes	{ -1,			FETCH_UNKNOWN,	"Unknown parser error" }
6440975Sdes};
6540975Sdes
6640975Sdes
6740975Sdes/*** Public API **************************************************************/
6840975Sdes
6940975Sdes/*
7040939Sdes * Select the appropriate protocol for the URL scheme, and return a
7140939Sdes * read-only stream connected to the document referenced by the URL.
7263340Sdes * Also fill out the struct url_stat.
7340939Sdes */
7438394SdesFILE *
7575891SarchiefetchXGet(struct url *URL, struct url_stat *us, const char *flags)
7638394Sdes{
7790267Sdes	int direct;
7860587Sume
7990267Sdes	direct = CHECK_FLAG('d');
80109694Sdes	if (us != NULL) {
81109694Sdes		us->size = -1;
82109694Sdes		us->atime = us->mtime = 0;
83109694Sdes	}
8490267Sdes	if (strcasecmp(URL->scheme, SCHEME_FILE) == 0)
8590267Sdes		return (fetchXGetFile(URL, us, flags));
8697868Sdes	else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0)
8797868Sdes		return (fetchXGetFTP(URL, us, flags));
8890267Sdes	else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0)
8990267Sdes		return (fetchXGetHTTP(URL, us, flags));
9097868Sdes	else if (strcasecmp(URL->scheme, SCHEME_HTTPS) == 0)
9197868Sdes		return (fetchXGetHTTP(URL, us, flags));
9297868Sdes	_url_seterr(URL_BAD_SCHEME);
9397868Sdes	return (NULL);
9438394Sdes}
9538394Sdes
9640939Sdes/*
9740939Sdes * Select the appropriate protocol for the URL scheme, and return a
9863340Sdes * read-only stream connected to the document referenced by the URL.
9963340Sdes */
10063340SdesFILE *
10175891SarchiefetchGet(struct url *URL, const char *flags)
10263340Sdes{
10390267Sdes	return (fetchXGet(URL, NULL, flags));
10463340Sdes}
10563340Sdes
10663340Sdes/*
10763340Sdes * Select the appropriate protocol for the URL scheme, and return a
10840939Sdes * write-only stream connected to the document referenced by the URL.
10940939Sdes */
11038394SdesFILE *
11175891SarchiefetchPut(struct url *URL, const char *flags)
11238394Sdes{
11390267Sdes	int direct;
11460587Sume
11590267Sdes	direct = CHECK_FLAG('d');
11690267Sdes	if (strcasecmp(URL->scheme, SCHEME_FILE) == 0)
11790267Sdes		return (fetchPutFile(URL, flags));
11897868Sdes	else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0)
11997868Sdes		return (fetchPutFTP(URL, flags));
12090267Sdes	else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0)
12190267Sdes		return (fetchPutHTTP(URL, flags));
12297868Sdes	else if (strcasecmp(URL->scheme, SCHEME_HTTPS) == 0)
12397868Sdes		return (fetchPutHTTP(URL, flags));
12497868Sdes	_url_seterr(URL_BAD_SCHEME);
12597868Sdes	return (NULL);
12638394Sdes}
12738394Sdes
12840939Sdes/*
12940975Sdes * Select the appropriate protocol for the URL scheme, and return the
13040975Sdes * size of the document referenced by the URL if it exists.
13140975Sdes */
13240975Sdesint
13375891SarchiefetchStat(struct url *URL, struct url_stat *us, const char *flags)
13440975Sdes{
13590267Sdes	int direct;
13660587Sume
13790267Sdes	direct = CHECK_FLAG('d');
138109694Sdes	if (us != NULL) {
139109694Sdes		us->size = -1;
140109694Sdes		us->atime = us->mtime = 0;
141109694Sdes	}
14290267Sdes	if (strcasecmp(URL->scheme, SCHEME_FILE) == 0)
14390267Sdes		return (fetchStatFile(URL, us, flags));
14497868Sdes	else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0)
14597868Sdes		return (fetchStatFTP(URL, us, flags));
14690267Sdes	else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0)
14790267Sdes		return (fetchStatHTTP(URL, us, flags));
14897868Sdes	else if (strcasecmp(URL->scheme, SCHEME_HTTPS) == 0)
14997868Sdes		return (fetchStatHTTP(URL, us, flags));
15040975Sdes	_url_seterr(URL_BAD_SCHEME);
15190267Sdes	return (-1);
15240975Sdes}
15340975Sdes
15440975Sdes/*
15541989Sdes * Select the appropriate protocol for the URL scheme, and return a
15641989Sdes * list of files in the directory pointed to by the URL.
15741989Sdes */
15841989Sdesstruct url_ent *
15975891SarchiefetchList(struct url *URL, const char *flags)
16041989Sdes{
16190267Sdes	int direct;
16260587Sume
16390267Sdes	direct = CHECK_FLAG('d');
16490267Sdes	if (strcasecmp(URL->scheme, SCHEME_FILE) == 0)
16590267Sdes		return (fetchListFile(URL, flags));
16697868Sdes	else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0)
16797868Sdes		return (fetchListFTP(URL, flags));
16890267Sdes	else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0)
16990267Sdes		return (fetchListHTTP(URL, flags));
17097868Sdes	else if (strcasecmp(URL->scheme, SCHEME_HTTPS) == 0)
17197868Sdes		return (fetchListHTTP(URL, flags));
17241989Sdes	_url_seterr(URL_BAD_SCHEME);
17390267Sdes	return (NULL);
17441989Sdes}
17541989Sdes
17641989Sdes/*
17763340Sdes * Attempt to parse the given URL; if successful, call fetchXGet().
17840939Sdes */
17937535SdesFILE *
18075891SarchiefetchXGetURL(const char *URL, struct url_stat *us, const char *flags)
18137535Sdes{
18290267Sdes	struct url *u;
18390267Sdes	FILE *f;
18438394Sdes
18590267Sdes	if ((u = fetchParseURL(URL)) == NULL)
18690267Sdes		return (NULL);
18790267Sdes
18890267Sdes	f = fetchXGet(u, us, flags);
18990267Sdes
19090267Sdes	fetchFreeURL(u);
19190267Sdes	return (f);
19237535Sdes}
19337535Sdes
19463340Sdes/*
19563340Sdes * Attempt to parse the given URL; if successful, call fetchGet().
19663340Sdes */
19763340SdesFILE *
19875891SarchiefetchGetURL(const char *URL, const char *flags)
19963340Sdes{
20090267Sdes	return (fetchXGetURL(URL, NULL, flags));
20163340Sdes}
20237535Sdes
20340939Sdes/*
20440939Sdes * Attempt to parse the given URL; if successful, call fetchPut().
20540939Sdes */
20637535SdesFILE *
20775891SarchiefetchPutURL(const char *URL, const char *flags)
20837535Sdes{
20990267Sdes	struct url *u;
21090267Sdes	FILE *f;
21190267Sdes
21290267Sdes	if ((u = fetchParseURL(URL)) == NULL)
21390267Sdes		return (NULL);
21490267Sdes
21590267Sdes	f = fetchPut(u, flags);
21690267Sdes
21790267Sdes	fetchFreeURL(u);
21890267Sdes	return (f);
21937535Sdes}
22037535Sdes
22137535Sdes/*
22240975Sdes * Attempt to parse the given URL; if successful, call fetchStat().
22340975Sdes */
22440975Sdesint
22575891SarchiefetchStatURL(const char *URL, struct url_stat *us, const char *flags)
22640975Sdes{
22790267Sdes	struct url *u;
22890267Sdes	int s;
22940975Sdes
23090267Sdes	if ((u = fetchParseURL(URL)) == NULL)
23190267Sdes		return (-1);
23240975Sdes
23390267Sdes	s = fetchStat(u, us, flags);
23440975Sdes
23590267Sdes	fetchFreeURL(u);
23690267Sdes	return (s);
23740975Sdes}
23840975Sdes
23940975Sdes/*
24041989Sdes * Attempt to parse the given URL; if successful, call fetchList().
24141989Sdes */
24241989Sdesstruct url_ent *
24375891SarchiefetchListURL(const char *URL, const char *flags)
24441989Sdes{
24590267Sdes	struct url *u;
24690267Sdes	struct url_ent *ue;
24741989Sdes
24890267Sdes	if ((u = fetchParseURL(URL)) == NULL)
24990267Sdes		return (NULL);
25041989Sdes
25190267Sdes	ue = fetchList(u, flags);
25241989Sdes
25390267Sdes	fetchFreeURL(u);
25490267Sdes	return (ue);
25541989Sdes}
25641989Sdes
25741989Sdes/*
25860927Sdes * Make a URL
25960927Sdes */
26060927Sdesstruct url *
26175891SarchiefetchMakeURL(const char *scheme, const char *host, int port, const char *doc,
26275891Sarchie    const char *user, const char *pwd)
26360927Sdes{
26490267Sdes	struct url *u;
26560927Sdes
26690267Sdes	if (!scheme || (!host && !doc)) {
26790267Sdes		_url_seterr(URL_MALFORMED);
26890267Sdes		return (NULL);
26990267Sdes	}
27060927Sdes
27190267Sdes	if (port < 0 || port > 65535) {
27290267Sdes		_url_seterr(URL_BAD_PORT);
27390267Sdes		return (NULL);
27490267Sdes	}
27590267Sdes
27690267Sdes	/* allocate struct url */
277109967Sdes	if ((u = calloc(1, sizeof(*u))) == NULL) {
27890267Sdes		_fetch_syserr();
27990267Sdes		return (NULL);
28090267Sdes	}
28190267Sdes
28290267Sdes	if ((u->doc = strdup(doc ? doc : "/")) == NULL) {
28390267Sdes		_fetch_syserr();
28490267Sdes		free(u);
28590267Sdes		return (NULL);
28690267Sdes	}
28790267Sdes
288109967Sdes#define seturl(x) snprintf(u->x, sizeof(u->x), "%s", x)
28990267Sdes	seturl(scheme);
29090267Sdes	seturl(host);
29190267Sdes	seturl(user);
29290267Sdes	seturl(pwd);
29360927Sdes#undef seturl
29490267Sdes	u->port = port;
29560927Sdes
29690267Sdes	return (u);
29760927Sdes}
29860927Sdes
29960927Sdes/*
30037535Sdes * Split an URL into components. URL syntax is:
30167042Sdes * [method:/][/[user[:pwd]@]host[:port]/][document]
30237535Sdes * This almost, but not quite, RFC1738 URL syntax.
30337535Sdes */
30440975Sdesstruct url *
30575891SarchiefetchParseURL(const char *URL)
30637535Sdes{
30790267Sdes	char *doc;
30890267Sdes	const char *p, *q;
30990267Sdes	struct url *u;
31090267Sdes	int i;
31137535Sdes
31290267Sdes	/* allocate struct url */
313109967Sdes	if ((u = calloc(1, sizeof(*u))) == NULL) {
31490267Sdes		_fetch_syserr();
31590267Sdes		return (NULL);
31690267Sdes	}
31737535Sdes
31890267Sdes	/* scheme name */
31990267Sdes	if ((p = strstr(URL, ":/"))) {
32090267Sdes		snprintf(u->scheme, URL_SCHEMELEN+1,
32190267Sdes		    "%.*s", (int)(p - URL), URL);
32290267Sdes		URL = ++p;
32390267Sdes		/*
32490267Sdes		 * Only one slash: no host, leave slash as part of document
32590267Sdes		 * Two slashes: host follows, strip slashes
32690267Sdes		 */
32790267Sdes		if (URL[1] == '/')
32890267Sdes			URL = (p += 2);
32990267Sdes	} else {
33090267Sdes		p = URL;
33190267Sdes	}
33290267Sdes	if (!*URL || *URL == '/' || *URL == '.' ||
33390267Sdes	    (u->scheme[0] == '\0' &&
33490267Sdes		strchr(URL, '/') == NULL && strchr(URL, ':') == NULL))
33590267Sdes		goto nohost;
33637535Sdes
33790267Sdes	p = strpbrk(URL, "/@");
33890267Sdes	if (p && *p == '@') {
33990267Sdes		/* username */
34090267Sdes		for (q = URL, i = 0; (*q != ':') && (*q != '@'); q++)
34190267Sdes			if (i < URL_USERLEN)
34290267Sdes				u->user[i++] = *q;
34379423Sdes
34490267Sdes		/* password */
34590267Sdes		if (*q == ':')
34690267Sdes			for (q++, i = 0; (*q != ':') && (*q != '@'); q++)
34790267Sdes				if (i < URL_PWDLEN)
34890267Sdes					u->pwd[i++] = *q;
34990267Sdes
35090267Sdes		p++;
35190267Sdes	} else {
35290267Sdes		p = URL;
35390267Sdes	}
35490267Sdes
35590267Sdes	/* hostname */
35660737Sume#ifdef INET6
35790267Sdes	if (*p == '[' && (q = strchr(p + 1, ']')) != NULL &&
35890267Sdes	    (*++q == '\0' || *q == '/' || *q == ':')) {
35990267Sdes		if ((i = q - p - 2) > MAXHOSTNAMELEN)
36090267Sdes			i = MAXHOSTNAMELEN;
36190267Sdes		strncpy(u->host, ++p, i);
36290267Sdes		p = q;
36390267Sdes	} else
36460737Sume#endif
36590267Sdes		for (i = 0; *p && (*p != '/') && (*p != ':'); p++)
36690267Sdes			if (i < MAXHOSTNAMELEN)
36790267Sdes				u->host[i++] = *p;
36837535Sdes
36990267Sdes	/* port */
37090267Sdes	if (*p == ':') {
37190267Sdes		for (q = ++p; *q && (*q != '/'); q++)
37290267Sdes			if (isdigit(*q))
37390267Sdes				u->port = u->port * 10 + (*q - '0');
37490267Sdes			else {
37590267Sdes				/* invalid port */
37690267Sdes				_url_seterr(URL_BAD_PORT);
37790267Sdes				goto ouch;
37890267Sdes			}
37990267Sdes		p = q;
38090267Sdes	}
38137535Sdes
38237535Sdesnohost:
38390267Sdes	/* document */
38490267Sdes	if (!*p)
38590267Sdes		p = "/";
38667419Sdes
38790267Sdes	if (strcasecmp(u->scheme, SCHEME_HTTP) == 0 ||
38890267Sdes	    strcasecmp(u->scheme, SCHEME_HTTPS) == 0) {
38990267Sdes		const char hexnums[] = "0123456789abcdef";
39090267Sdes
39190267Sdes		/* percent-escape whitespace. */
39290267Sdes		if ((doc = malloc(strlen(p) * 3 + 1)) == NULL) {
39390267Sdes			_fetch_syserr();
39490267Sdes			goto ouch;
39590267Sdes		}
39690267Sdes		u->doc = doc;
39790267Sdes		while (*p != '\0') {
39890267Sdes			if (!isspace(*p)) {
39990267Sdes				*doc++ = *p++;
40090267Sdes			} else {
40190267Sdes				*doc++ = '%';
40290267Sdes				*doc++ = hexnums[((unsigned int)*p) >> 4];
40390267Sdes				*doc++ = hexnums[((unsigned int)*p) & 0xf];
40490267Sdes				p++;
40590267Sdes			}
40690267Sdes		}
40790267Sdes		*doc = '\0';
40890267Sdes	} else if ((u->doc = strdup(p)) == NULL) {
40990267Sdes		_fetch_syserr();
41090267Sdes		goto ouch;
41167419Sdes	}
41290267Sdes
41390267Sdes	DEBUG(fprintf(stderr,
41488769Sdes		  "scheme:   [%s]\n"
41588769Sdes		  "user:     [%s]\n"
41688769Sdes		  "password: [%s]\n"
41788769Sdes		  "host:     [%s]\n"
41888769Sdes		  "port:     [%d]\n"
41988769Sdes		  "document: [%s]\n",
42037535Sdes		  u->scheme, u->user, u->pwd,
42137535Sdes		  u->host, u->port, u->doc));
42237535Sdes
42390267Sdes	return (u);
42490267Sdes
42537535Sdesouch:
42690267Sdes	free(u);
42790267Sdes	return (NULL);
42837535Sdes}
42960376Sdes
43060376Sdes/*
43160376Sdes * Free a URL
43260376Sdes */
43360376Sdesvoid
43460376SdesfetchFreeURL(struct url *u)
43560376Sdes{
43690267Sdes	free(u->doc);
43790267Sdes	free(u);
43860376Sdes}
439