fetch.c revision 90267
137535Sdes/*-
237535Sdes * Copyright (c) 1998 Dag-Erling Co�dan Sm�rgrav
337535Sdes * All rights reserved.
437535Sdes *
537535Sdes * Redistribution and use in source and binary forms, with or without
637535Sdes * modification, are permitted provided that the following conditions
737535Sdes * are met:
837535Sdes * 1. Redistributions of source code must retain the above copyright
937535Sdes *    notice, this list of conditions and the following disclaimer
1037535Sdes *    in this position and unchanged.
1137535Sdes * 2. Redistributions in binary form must reproduce the above copyright
1237535Sdes *    notice, this list of conditions and the following disclaimer in the
1337535Sdes *    documentation and/or other materials provided with the distribution.
1437535Sdes * 3. The name of the author may not be used to endorse or promote products
1537535Sdes *    derived from this software without specific prior written permission
1637535Sdes *
1737535Sdes * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
1837535Sdes * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
1937535Sdes * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
2037535Sdes * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
2137535Sdes * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
2237535Sdes * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
2337535Sdes * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
2437535Sdes * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
2537535Sdes * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
2637535Sdes * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
2737535Sdes */
2837535Sdes
2984203Sdillon#include <sys/cdefs.h>
3084203Sdillon__FBSDID("$FreeBSD: head/lib/libfetch/fetch.c 90267 2002-02-05 22:13:51Z des $");
3184203Sdillon
3237535Sdes#include <sys/param.h>
3340975Sdes#include <sys/errno.h>
3437535Sdes
3537535Sdes#include <ctype.h>
3637535Sdes#include <stdio.h>
3737535Sdes#include <stdlib.h>
3837535Sdes#include <string.h>
3937535Sdes
4037535Sdes#include "fetch.h"
4140975Sdes#include "common.h"
4237535Sdes
4377238Sdesauth_t	 fetchAuthMethod;
4460924Sdesint	 fetchLastErrCode;
4560924Sdeschar	 fetchLastErrString[MAXERRSTRING];
4660924Sdesint	 fetchTimeout;
4763334Sdesint	 fetchRestartCalls = 1;
4887560Sdesint	 fetchDebug;
4941862Sdes
5041862Sdes
5140975Sdes/*** Local data **************************************************************/
5237535Sdes
5340939Sdes/*
5440975Sdes * Error messages for parser errors
5540975Sdes */
5640975Sdes#define URL_MALFORMED		1
5740975Sdes#define URL_BAD_SCHEME		2
5840975Sdes#define URL_BAD_PORT		3
5940975Sdesstatic struct fetcherr _url_errlist[] = {
6090267Sdes	{ URL_MALFORMED,	FETCH_URL,	"Malformed URL" },
6190267Sdes	{ URL_BAD_SCHEME,	FETCH_URL,	"Invalid URL scheme" },
6290267Sdes	{ URL_BAD_PORT,		FETCH_URL,	"Invalid server port" },
6390267Sdes	{ -1,			FETCH_UNKNOWN,	"Unknown parser error" }
6440975Sdes};
6540975Sdes
6640975Sdes
6740975Sdes/*** Public API **************************************************************/
6840975Sdes
6940975Sdes/*
7040939Sdes * Select the appropriate protocol for the URL scheme, and return a
7140939Sdes * read-only stream connected to the document referenced by the URL.
7263340Sdes * Also fill out the struct url_stat.
7340939Sdes */
7438394SdesFILE *
7575891SarchiefetchXGet(struct url *URL, struct url_stat *us, const char *flags)
7638394Sdes{
7790267Sdes	int direct;
7860587Sume
7990267Sdes	direct = CHECK_FLAG('d');
8090267Sdes	if (strcasecmp(URL->scheme, SCHEME_FILE) == 0)
8190267Sdes		return (fetchXGetFile(URL, us, flags));
8290267Sdes	else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0)
8390267Sdes		return (fetchXGetHTTP(URL, us, flags));
8490267Sdes	else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0) {
8590267Sdes		return (fetchXGetFTP(URL, us, flags));
8690267Sdes	} else {
8790267Sdes		_url_seterr(URL_BAD_SCHEME);
8890267Sdes		return (NULL);
8990267Sdes	}
9038394Sdes}
9138394Sdes
9240939Sdes/*
9340939Sdes * Select the appropriate protocol for the URL scheme, and return a
9463340Sdes * read-only stream connected to the document referenced by the URL.
9563340Sdes */
9663340SdesFILE *
9775891SarchiefetchGet(struct url *URL, const char *flags)
9863340Sdes{
9990267Sdes	return (fetchXGet(URL, NULL, flags));
10063340Sdes}
10163340Sdes
10263340Sdes/*
10363340Sdes * Select the appropriate protocol for the URL scheme, and return a
10440939Sdes * write-only stream connected to the document referenced by the URL.
10540939Sdes */
10638394SdesFILE *
10775891SarchiefetchPut(struct url *URL, const char *flags)
10838394Sdes{
10990267Sdes	int direct;
11060587Sume
11190267Sdes	direct = CHECK_FLAG('d');
11290267Sdes	if (strcasecmp(URL->scheme, SCHEME_FILE) == 0)
11390267Sdes		return (fetchPutFile(URL, flags));
11490267Sdes	else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0)
11590267Sdes		return (fetchPutHTTP(URL, flags));
11690267Sdes	else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0) {
11790267Sdes		return (fetchPutFTP(URL, flags));
11890267Sdes	} else {
11990267Sdes		_url_seterr(URL_BAD_SCHEME);
12090267Sdes		return (NULL);
12190267Sdes	}
12238394Sdes}
12338394Sdes
12440939Sdes/*
12540975Sdes * Select the appropriate protocol for the URL scheme, and return the
12640975Sdes * size of the document referenced by the URL if it exists.
12740975Sdes */
12840975Sdesint
12975891SarchiefetchStat(struct url *URL, struct url_stat *us, const char *flags)
13040975Sdes{
13190267Sdes	int direct;
13260587Sume
13390267Sdes	direct = CHECK_FLAG('d');
13490267Sdes	if (strcasecmp(URL->scheme, SCHEME_FILE) == 0)
13590267Sdes		return (fetchStatFile(URL, us, flags));
13690267Sdes	else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0)
13790267Sdes		return (fetchStatHTTP(URL, us, flags));
13890267Sdes	else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0)
13990267Sdes		return (fetchStatFTP(URL, us, flags));
14040975Sdes	_url_seterr(URL_BAD_SCHEME);
14190267Sdes	return (-1);
14240975Sdes}
14340975Sdes
14440975Sdes/*
14541989Sdes * Select the appropriate protocol for the URL scheme, and return a
14641989Sdes * list of files in the directory pointed to by the URL.
14741989Sdes */
14841989Sdesstruct url_ent *
14975891SarchiefetchList(struct url *URL, const char *flags)
15041989Sdes{
15190267Sdes	int direct;
15260587Sume
15390267Sdes	direct = CHECK_FLAG('d');
15490267Sdes	if (strcasecmp(URL->scheme, SCHEME_FILE) == 0)
15590267Sdes		return (fetchListFile(URL, flags));
15690267Sdes	else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0)
15790267Sdes		return (fetchListHTTP(URL, flags));
15890267Sdes	else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0)
15990267Sdes		return (fetchListFTP(URL, flags));
16041989Sdes	_url_seterr(URL_BAD_SCHEME);
16190267Sdes	return (NULL);
16241989Sdes}
16341989Sdes
16441989Sdes/*
16563340Sdes * Attempt to parse the given URL; if successful, call fetchXGet().
16640939Sdes */
16737535SdesFILE *
16875891SarchiefetchXGetURL(const char *URL, struct url_stat *us, const char *flags)
16937535Sdes{
17090267Sdes	struct url *u;
17190267Sdes	FILE *f;
17238394Sdes
17390267Sdes	if ((u = fetchParseURL(URL)) == NULL)
17490267Sdes		return (NULL);
17590267Sdes
17690267Sdes	f = fetchXGet(u, us, flags);
17790267Sdes
17890267Sdes	fetchFreeURL(u);
17990267Sdes	return (f);
18037535Sdes}
18137535Sdes
18263340Sdes/*
18363340Sdes * Attempt to parse the given URL; if successful, call fetchGet().
18463340Sdes */
18563340SdesFILE *
18675891SarchiefetchGetURL(const char *URL, const char *flags)
18763340Sdes{
18890267Sdes	return (fetchXGetURL(URL, NULL, flags));
18963340Sdes}
19037535Sdes
19140939Sdes/*
19240939Sdes * Attempt to parse the given URL; if successful, call fetchPut().
19340939Sdes */
19437535SdesFILE *
19575891SarchiefetchPutURL(const char *URL, const char *flags)
19637535Sdes{
19790267Sdes	struct url *u;
19890267Sdes	FILE *f;
19990267Sdes
20090267Sdes	if ((u = fetchParseURL(URL)) == NULL)
20190267Sdes		return (NULL);
20290267Sdes
20390267Sdes	f = fetchPut(u, flags);
20490267Sdes
20590267Sdes	fetchFreeURL(u);
20690267Sdes	return (f);
20737535Sdes}
20837535Sdes
20937535Sdes/*
21040975Sdes * Attempt to parse the given URL; if successful, call fetchStat().
21140975Sdes */
21240975Sdesint
21375891SarchiefetchStatURL(const char *URL, struct url_stat *us, const char *flags)
21440975Sdes{
21590267Sdes	struct url *u;
21690267Sdes	int s;
21740975Sdes
21890267Sdes	if ((u = fetchParseURL(URL)) == NULL)
21990267Sdes		return (-1);
22040975Sdes
22190267Sdes	s = fetchStat(u, us, flags);
22240975Sdes
22390267Sdes	fetchFreeURL(u);
22490267Sdes	return (s);
22540975Sdes}
22640975Sdes
22740975Sdes/*
22841989Sdes * Attempt to parse the given URL; if successful, call fetchList().
22941989Sdes */
23041989Sdesstruct url_ent *
23175891SarchiefetchListURL(const char *URL, const char *flags)
23241989Sdes{
23390267Sdes	struct url *u;
23490267Sdes	struct url_ent *ue;
23541989Sdes
23690267Sdes	if ((u = fetchParseURL(URL)) == NULL)
23790267Sdes		return (NULL);
23841989Sdes
23990267Sdes	ue = fetchList(u, flags);
24041989Sdes
24190267Sdes	fetchFreeURL(u);
24290267Sdes	return (ue);
24341989Sdes}
24441989Sdes
24541989Sdes/*
24660927Sdes * Make a URL
24760927Sdes */
24860927Sdesstruct url *
24975891SarchiefetchMakeURL(const char *scheme, const char *host, int port, const char *doc,
25075891Sarchie    const char *user, const char *pwd)
25160927Sdes{
25290267Sdes	struct url *u;
25360927Sdes
25490267Sdes	if (!scheme || (!host && !doc)) {
25590267Sdes		_url_seterr(URL_MALFORMED);
25690267Sdes		return (NULL);
25790267Sdes	}
25860927Sdes
25990267Sdes	if (port < 0 || port > 65535) {
26090267Sdes		_url_seterr(URL_BAD_PORT);
26190267Sdes		return (NULL);
26290267Sdes	}
26390267Sdes
26490267Sdes	/* allocate struct url */
26590267Sdes	if ((u = calloc(1, sizeof *u)) == NULL) {
26690267Sdes		_fetch_syserr();
26790267Sdes		return (NULL);
26890267Sdes	}
26990267Sdes
27090267Sdes	if ((u->doc = strdup(doc ? doc : "/")) == NULL) {
27190267Sdes		_fetch_syserr();
27290267Sdes		free(u);
27390267Sdes		return (NULL);
27490267Sdes	}
27590267Sdes
27660927Sdes#define seturl(x) snprintf(u->x, sizeof u->x, "%s", x)
27790267Sdes	seturl(scheme);
27890267Sdes	seturl(host);
27990267Sdes	seturl(user);
28090267Sdes	seturl(pwd);
28160927Sdes#undef seturl
28290267Sdes	u->port = port;
28360927Sdes
28490267Sdes	return (u);
28560927Sdes}
28660927Sdes
28760927Sdes/*
28837535Sdes * Split an URL into components. URL syntax is:
28967042Sdes * [method:/][/[user[:pwd]@]host[:port]/][document]
29037535Sdes * This almost, but not quite, RFC1738 URL syntax.
29137535Sdes */
29240975Sdesstruct url *
29375891SarchiefetchParseURL(const char *URL)
29437535Sdes{
29590267Sdes	char *doc;
29690267Sdes	const char *p, *q;
29790267Sdes	struct url *u;
29890267Sdes	int i;
29937535Sdes
30090267Sdes	/* allocate struct url */
30190267Sdes	if ((u = calloc(1, sizeof *u)) == NULL) {
30290267Sdes		_fetch_syserr();
30390267Sdes		return (NULL);
30490267Sdes	}
30537535Sdes
30690267Sdes	/* scheme name */
30790267Sdes	if ((p = strstr(URL, ":/"))) {
30890267Sdes		snprintf(u->scheme, URL_SCHEMELEN+1,
30990267Sdes		    "%.*s", (int)(p - URL), URL);
31090267Sdes		URL = ++p;
31190267Sdes		/*
31290267Sdes		 * Only one slash: no host, leave slash as part of document
31390267Sdes		 * Two slashes: host follows, strip slashes
31490267Sdes		 */
31590267Sdes		if (URL[1] == '/')
31690267Sdes			URL = (p += 2);
31790267Sdes	} else {
31890267Sdes		p = URL;
31990267Sdes	}
32090267Sdes	if (!*URL || *URL == '/' || *URL == '.' ||
32190267Sdes	    (u->scheme[0] == '\0' &&
32290267Sdes		strchr(URL, '/') == NULL && strchr(URL, ':') == NULL))
32390267Sdes		goto nohost;
32437535Sdes
32590267Sdes	p = strpbrk(URL, "/@");
32690267Sdes	if (p && *p == '@') {
32790267Sdes		/* username */
32890267Sdes		for (q = URL, i = 0; (*q != ':') && (*q != '@'); q++)
32990267Sdes			if (i < URL_USERLEN)
33090267Sdes				u->user[i++] = *q;
33179423Sdes
33290267Sdes		/* password */
33390267Sdes		if (*q == ':')
33490267Sdes			for (q++, i = 0; (*q != ':') && (*q != '@'); q++)
33590267Sdes				if (i < URL_PWDLEN)
33690267Sdes					u->pwd[i++] = *q;
33790267Sdes
33890267Sdes		p++;
33990267Sdes	} else {
34090267Sdes		p = URL;
34190267Sdes	}
34290267Sdes
34390267Sdes	/* hostname */
34460737Sume#ifdef INET6
34590267Sdes	if (*p == '[' && (q = strchr(p + 1, ']')) != NULL &&
34690267Sdes	    (*++q == '\0' || *q == '/' || *q == ':')) {
34790267Sdes		if ((i = q - p - 2) > MAXHOSTNAMELEN)
34890267Sdes			i = MAXHOSTNAMELEN;
34990267Sdes		strncpy(u->host, ++p, i);
35090267Sdes		p = q;
35190267Sdes	} else
35260737Sume#endif
35390267Sdes		for (i = 0; *p && (*p != '/') && (*p != ':'); p++)
35490267Sdes			if (i < MAXHOSTNAMELEN)
35590267Sdes				u->host[i++] = *p;
35637535Sdes
35790267Sdes	/* port */
35890267Sdes	if (*p == ':') {
35990267Sdes		for (q = ++p; *q && (*q != '/'); q++)
36090267Sdes			if (isdigit(*q))
36190267Sdes				u->port = u->port * 10 + (*q - '0');
36290267Sdes			else {
36390267Sdes				/* invalid port */
36490267Sdes				_url_seterr(URL_BAD_PORT);
36590267Sdes				goto ouch;
36690267Sdes			}
36790267Sdes		p = q;
36890267Sdes	}
36937535Sdes
37037535Sdesnohost:
37190267Sdes	/* document */
37290267Sdes	if (!*p)
37390267Sdes		p = "/";
37467419Sdes
37590267Sdes	if (strcasecmp(u->scheme, SCHEME_HTTP) == 0 ||
37690267Sdes	    strcasecmp(u->scheme, SCHEME_HTTPS) == 0) {
37790267Sdes		const char hexnums[] = "0123456789abcdef";
37890267Sdes
37990267Sdes		/* percent-escape whitespace. */
38090267Sdes		if ((doc = malloc(strlen(p) * 3 + 1)) == NULL) {
38190267Sdes			_fetch_syserr();
38290267Sdes			goto ouch;
38390267Sdes		}
38490267Sdes		u->doc = doc;
38590267Sdes		while (*p != '\0') {
38690267Sdes			if (!isspace(*p)) {
38790267Sdes				*doc++ = *p++;
38890267Sdes			} else {
38990267Sdes				*doc++ = '%';
39090267Sdes				*doc++ = hexnums[((unsigned int)*p) >> 4];
39190267Sdes				*doc++ = hexnums[((unsigned int)*p) & 0xf];
39290267Sdes				p++;
39390267Sdes			}
39490267Sdes		}
39590267Sdes		*doc = '\0';
39690267Sdes	} else if ((u->doc = strdup(p)) == NULL) {
39790267Sdes		_fetch_syserr();
39890267Sdes		goto ouch;
39967419Sdes	}
40090267Sdes
40190267Sdes	DEBUG(fprintf(stderr,
40288769Sdes		  "scheme:   [%s]\n"
40388769Sdes		  "user:     [%s]\n"
40488769Sdes		  "password: [%s]\n"
40588769Sdes		  "host:     [%s]\n"
40688769Sdes		  "port:     [%d]\n"
40788769Sdes		  "document: [%s]\n",
40837535Sdes		  u->scheme, u->user, u->pwd,
40937535Sdes		  u->host, u->port, u->doc));
41037535Sdes
41190267Sdes	return (u);
41290267Sdes
41337535Sdesouch:
41490267Sdes	free(u);
41590267Sdes	return (NULL);
41637535Sdes}
41760376Sdes
41860376Sdes/*
41960376Sdes * Free a URL
42060376Sdes */
42160376Sdesvoid
42260376SdesfetchFreeURL(struct url *u)
42360376Sdes{
42490267Sdes	free(u->doc);
42590267Sdes	free(u);
42660376Sdes}
427