fetch.c revision 97868
137535Sdes/*-
237535Sdes * Copyright (c) 1998 Dag-Erling Co�dan Sm�rgrav
337535Sdes * All rights reserved.
437535Sdes *
537535Sdes * Redistribution and use in source and binary forms, with or without
637535Sdes * modification, are permitted provided that the following conditions
737535Sdes * are met:
837535Sdes * 1. Redistributions of source code must retain the above copyright
937535Sdes *    notice, this list of conditions and the following disclaimer
1037535Sdes *    in this position and unchanged.
1137535Sdes * 2. Redistributions in binary form must reproduce the above copyright
1237535Sdes *    notice, this list of conditions and the following disclaimer in the
1337535Sdes *    documentation and/or other materials provided with the distribution.
1437535Sdes * 3. The name of the author may not be used to endorse or promote products
1537535Sdes *    derived from this software without specific prior written permission
1637535Sdes *
1737535Sdes * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
1837535Sdes * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
1937535Sdes * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
2037535Sdes * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
2137535Sdes * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
2237535Sdes * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
2337535Sdes * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
2437535Sdes * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
2537535Sdes * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
2637535Sdes * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
2737535Sdes */
2837535Sdes
2984203Sdillon#include <sys/cdefs.h>
3084203Sdillon__FBSDID("$FreeBSD: head/lib/libfetch/fetch.c 97868 2002-06-05 12:46:36Z des $");
3184203Sdillon
3237535Sdes#include <sys/param.h>
3340975Sdes#include <sys/errno.h>
3437535Sdes
3537535Sdes#include <ctype.h>
3637535Sdes#include <stdio.h>
3737535Sdes#include <stdlib.h>
3837535Sdes#include <string.h>
3937535Sdes
4037535Sdes#include "fetch.h"
4140975Sdes#include "common.h"
4237535Sdes
4377238Sdesauth_t	 fetchAuthMethod;
4460924Sdesint	 fetchLastErrCode;
4560924Sdeschar	 fetchLastErrString[MAXERRSTRING];
4660924Sdesint	 fetchTimeout;
4763334Sdesint	 fetchRestartCalls = 1;
4887560Sdesint	 fetchDebug;
4941862Sdes
5041862Sdes
5140975Sdes/*** Local data **************************************************************/
5237535Sdes
5340939Sdes/*
5440975Sdes * Error messages for parser errors
5540975Sdes */
5640975Sdes#define URL_MALFORMED		1
5740975Sdes#define URL_BAD_SCHEME		2
5840975Sdes#define URL_BAD_PORT		3
5940975Sdesstatic struct fetcherr _url_errlist[] = {
6090267Sdes	{ URL_MALFORMED,	FETCH_URL,	"Malformed URL" },
6190267Sdes	{ URL_BAD_SCHEME,	FETCH_URL,	"Invalid URL scheme" },
6290267Sdes	{ URL_BAD_PORT,		FETCH_URL,	"Invalid server port" },
6390267Sdes	{ -1,			FETCH_UNKNOWN,	"Unknown parser error" }
6440975Sdes};
6540975Sdes
6640975Sdes
6740975Sdes/*** Public API **************************************************************/
6840975Sdes
6940975Sdes/*
7040939Sdes * Select the appropriate protocol for the URL scheme, and return a
7140939Sdes * read-only stream connected to the document referenced by the URL.
7263340Sdes * Also fill out the struct url_stat.
7340939Sdes */
7438394SdesFILE *
7575891SarchiefetchXGet(struct url *URL, struct url_stat *us, const char *flags)
7638394Sdes{
7790267Sdes	int direct;
7860587Sume
7990267Sdes	direct = CHECK_FLAG('d');
8090267Sdes	if (strcasecmp(URL->scheme, SCHEME_FILE) == 0)
8190267Sdes		return (fetchXGetFile(URL, us, flags));
8297868Sdes	else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0)
8397868Sdes		return (fetchXGetFTP(URL, us, flags));
8490267Sdes	else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0)
8590267Sdes		return (fetchXGetHTTP(URL, us, flags));
8697868Sdes	else if (strcasecmp(URL->scheme, SCHEME_HTTPS) == 0)
8797868Sdes		return (fetchXGetHTTP(URL, us, flags));
8897868Sdes	_url_seterr(URL_BAD_SCHEME);
8997868Sdes	return (NULL);
9038394Sdes}
9138394Sdes
9240939Sdes/*
9340939Sdes * Select the appropriate protocol for the URL scheme, and return a
9463340Sdes * read-only stream connected to the document referenced by the URL.
9563340Sdes */
9663340SdesFILE *
9775891SarchiefetchGet(struct url *URL, const char *flags)
9863340Sdes{
9990267Sdes	return (fetchXGet(URL, NULL, flags));
10063340Sdes}
10163340Sdes
10263340Sdes/*
10363340Sdes * Select the appropriate protocol for the URL scheme, and return a
10440939Sdes * write-only stream connected to the document referenced by the URL.
10540939Sdes */
10638394SdesFILE *
10775891SarchiefetchPut(struct url *URL, const char *flags)
10838394Sdes{
10990267Sdes	int direct;
11060587Sume
11190267Sdes	direct = CHECK_FLAG('d');
11290267Sdes	if (strcasecmp(URL->scheme, SCHEME_FILE) == 0)
11390267Sdes		return (fetchPutFile(URL, flags));
11497868Sdes	else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0)
11597868Sdes		return (fetchPutFTP(URL, flags));
11690267Sdes	else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0)
11790267Sdes		return (fetchPutHTTP(URL, flags));
11897868Sdes	else if (strcasecmp(URL->scheme, SCHEME_HTTPS) == 0)
11997868Sdes		return (fetchPutHTTP(URL, flags));
12097868Sdes	_url_seterr(URL_BAD_SCHEME);
12197868Sdes	return (NULL);
12238394Sdes}
12338394Sdes
12440939Sdes/*
12540975Sdes * Select the appropriate protocol for the URL scheme, and return the
12640975Sdes * size of the document referenced by the URL if it exists.
12740975Sdes */
12840975Sdesint
12975891SarchiefetchStat(struct url *URL, struct url_stat *us, const char *flags)
13040975Sdes{
13190267Sdes	int direct;
13260587Sume
13390267Sdes	direct = CHECK_FLAG('d');
13490267Sdes	if (strcasecmp(URL->scheme, SCHEME_FILE) == 0)
13590267Sdes		return (fetchStatFile(URL, us, flags));
13697868Sdes	else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0)
13797868Sdes		return (fetchStatFTP(URL, us, flags));
13890267Sdes	else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0)
13990267Sdes		return (fetchStatHTTP(URL, us, flags));
14097868Sdes	else if (strcasecmp(URL->scheme, SCHEME_HTTPS) == 0)
14197868Sdes		return (fetchStatHTTP(URL, us, flags));
14240975Sdes	_url_seterr(URL_BAD_SCHEME);
14390267Sdes	return (-1);
14440975Sdes}
14540975Sdes
14640975Sdes/*
14741989Sdes * Select the appropriate protocol for the URL scheme, and return a
14841989Sdes * list of files in the directory pointed to by the URL.
14941989Sdes */
15041989Sdesstruct url_ent *
15175891SarchiefetchList(struct url *URL, const char *flags)
15241989Sdes{
15390267Sdes	int direct;
15460587Sume
15590267Sdes	direct = CHECK_FLAG('d');
15690267Sdes	if (strcasecmp(URL->scheme, SCHEME_FILE) == 0)
15790267Sdes		return (fetchListFile(URL, flags));
15897868Sdes	else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0)
15997868Sdes		return (fetchListFTP(URL, flags));
16090267Sdes	else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0)
16190267Sdes		return (fetchListHTTP(URL, flags));
16297868Sdes	else if (strcasecmp(URL->scheme, SCHEME_HTTPS) == 0)
16397868Sdes		return (fetchListHTTP(URL, flags));
16441989Sdes	_url_seterr(URL_BAD_SCHEME);
16590267Sdes	return (NULL);
16641989Sdes}
16741989Sdes
16841989Sdes/*
16963340Sdes * Attempt to parse the given URL; if successful, call fetchXGet().
17040939Sdes */
17137535SdesFILE *
17275891SarchiefetchXGetURL(const char *URL, struct url_stat *us, const char *flags)
17337535Sdes{
17490267Sdes	struct url *u;
17590267Sdes	FILE *f;
17638394Sdes
17790267Sdes	if ((u = fetchParseURL(URL)) == NULL)
17890267Sdes		return (NULL);
17990267Sdes
18090267Sdes	f = fetchXGet(u, us, flags);
18190267Sdes
18290267Sdes	fetchFreeURL(u);
18390267Sdes	return (f);
18437535Sdes}
18537535Sdes
18663340Sdes/*
18763340Sdes * Attempt to parse the given URL; if successful, call fetchGet().
18863340Sdes */
18963340SdesFILE *
19075891SarchiefetchGetURL(const char *URL, const char *flags)
19163340Sdes{
19290267Sdes	return (fetchXGetURL(URL, NULL, flags));
19363340Sdes}
19437535Sdes
19540939Sdes/*
19640939Sdes * Attempt to parse the given URL; if successful, call fetchPut().
19740939Sdes */
19837535SdesFILE *
19975891SarchiefetchPutURL(const char *URL, const char *flags)
20037535Sdes{
20190267Sdes	struct url *u;
20290267Sdes	FILE *f;
20390267Sdes
20490267Sdes	if ((u = fetchParseURL(URL)) == NULL)
20590267Sdes		return (NULL);
20690267Sdes
20790267Sdes	f = fetchPut(u, flags);
20890267Sdes
20990267Sdes	fetchFreeURL(u);
21090267Sdes	return (f);
21137535Sdes}
21237535Sdes
21337535Sdes/*
21440975Sdes * Attempt to parse the given URL; if successful, call fetchStat().
21540975Sdes */
21640975Sdesint
21775891SarchiefetchStatURL(const char *URL, struct url_stat *us, const char *flags)
21840975Sdes{
21990267Sdes	struct url *u;
22090267Sdes	int s;
22140975Sdes
22290267Sdes	if ((u = fetchParseURL(URL)) == NULL)
22390267Sdes		return (-1);
22440975Sdes
22590267Sdes	s = fetchStat(u, us, flags);
22640975Sdes
22790267Sdes	fetchFreeURL(u);
22890267Sdes	return (s);
22940975Sdes}
23040975Sdes
23140975Sdes/*
23241989Sdes * Attempt to parse the given URL; if successful, call fetchList().
23341989Sdes */
23441989Sdesstruct url_ent *
23575891SarchiefetchListURL(const char *URL, const char *flags)
23641989Sdes{
23790267Sdes	struct url *u;
23890267Sdes	struct url_ent *ue;
23941989Sdes
24090267Sdes	if ((u = fetchParseURL(URL)) == NULL)
24190267Sdes		return (NULL);
24241989Sdes
24390267Sdes	ue = fetchList(u, flags);
24441989Sdes
24590267Sdes	fetchFreeURL(u);
24690267Sdes	return (ue);
24741989Sdes}
24841989Sdes
24941989Sdes/*
25060927Sdes * Make a URL
25160927Sdes */
25260927Sdesstruct url *
25375891SarchiefetchMakeURL(const char *scheme, const char *host, int port, const char *doc,
25475891Sarchie    const char *user, const char *pwd)
25560927Sdes{
25690267Sdes	struct url *u;
25760927Sdes
25890267Sdes	if (!scheme || (!host && !doc)) {
25990267Sdes		_url_seterr(URL_MALFORMED);
26090267Sdes		return (NULL);
26190267Sdes	}
26260927Sdes
26390267Sdes	if (port < 0 || port > 65535) {
26490267Sdes		_url_seterr(URL_BAD_PORT);
26590267Sdes		return (NULL);
26690267Sdes	}
26790267Sdes
26890267Sdes	/* allocate struct url */
26990267Sdes	if ((u = calloc(1, sizeof *u)) == NULL) {
27090267Sdes		_fetch_syserr();
27190267Sdes		return (NULL);
27290267Sdes	}
27390267Sdes
27490267Sdes	if ((u->doc = strdup(doc ? doc : "/")) == NULL) {
27590267Sdes		_fetch_syserr();
27690267Sdes		free(u);
27790267Sdes		return (NULL);
27890267Sdes	}
27990267Sdes
28060927Sdes#define seturl(x) snprintf(u->x, sizeof u->x, "%s", x)
28190267Sdes	seturl(scheme);
28290267Sdes	seturl(host);
28390267Sdes	seturl(user);
28490267Sdes	seturl(pwd);
28560927Sdes#undef seturl
28690267Sdes	u->port = port;
28760927Sdes
28890267Sdes	return (u);
28960927Sdes}
29060927Sdes
29160927Sdes/*
29237535Sdes * Split an URL into components. URL syntax is:
29367042Sdes * [method:/][/[user[:pwd]@]host[:port]/][document]
29437535Sdes * This almost, but not quite, RFC1738 URL syntax.
29537535Sdes */
29640975Sdesstruct url *
29775891SarchiefetchParseURL(const char *URL)
29837535Sdes{
29990267Sdes	char *doc;
30090267Sdes	const char *p, *q;
30190267Sdes	struct url *u;
30290267Sdes	int i;
30337535Sdes
30490267Sdes	/* allocate struct url */
30590267Sdes	if ((u = calloc(1, sizeof *u)) == NULL) {
30690267Sdes		_fetch_syserr();
30790267Sdes		return (NULL);
30890267Sdes	}
30937535Sdes
31090267Sdes	/* scheme name */
31190267Sdes	if ((p = strstr(URL, ":/"))) {
31290267Sdes		snprintf(u->scheme, URL_SCHEMELEN+1,
31390267Sdes		    "%.*s", (int)(p - URL), URL);
31490267Sdes		URL = ++p;
31590267Sdes		/*
31690267Sdes		 * Only one slash: no host, leave slash as part of document
31790267Sdes		 * Two slashes: host follows, strip slashes
31890267Sdes		 */
31990267Sdes		if (URL[1] == '/')
32090267Sdes			URL = (p += 2);
32190267Sdes	} else {
32290267Sdes		p = URL;
32390267Sdes	}
32490267Sdes	if (!*URL || *URL == '/' || *URL == '.' ||
32590267Sdes	    (u->scheme[0] == '\0' &&
32690267Sdes		strchr(URL, '/') == NULL && strchr(URL, ':') == NULL))
32790267Sdes		goto nohost;
32837535Sdes
32990267Sdes	p = strpbrk(URL, "/@");
33090267Sdes	if (p && *p == '@') {
33190267Sdes		/* username */
33290267Sdes		for (q = URL, i = 0; (*q != ':') && (*q != '@'); q++)
33390267Sdes			if (i < URL_USERLEN)
33490267Sdes				u->user[i++] = *q;
33579423Sdes
33690267Sdes		/* password */
33790267Sdes		if (*q == ':')
33890267Sdes			for (q++, i = 0; (*q != ':') && (*q != '@'); q++)
33990267Sdes				if (i < URL_PWDLEN)
34090267Sdes					u->pwd[i++] = *q;
34190267Sdes
34290267Sdes		p++;
34390267Sdes	} else {
34490267Sdes		p = URL;
34590267Sdes	}
34690267Sdes
34790267Sdes	/* hostname */
34860737Sume#ifdef INET6
34990267Sdes	if (*p == '[' && (q = strchr(p + 1, ']')) != NULL &&
35090267Sdes	    (*++q == '\0' || *q == '/' || *q == ':')) {
35190267Sdes		if ((i = q - p - 2) > MAXHOSTNAMELEN)
35290267Sdes			i = MAXHOSTNAMELEN;
35390267Sdes		strncpy(u->host, ++p, i);
35490267Sdes		p = q;
35590267Sdes	} else
35660737Sume#endif
35790267Sdes		for (i = 0; *p && (*p != '/') && (*p != ':'); p++)
35890267Sdes			if (i < MAXHOSTNAMELEN)
35990267Sdes				u->host[i++] = *p;
36037535Sdes
36190267Sdes	/* port */
36290267Sdes	if (*p == ':') {
36390267Sdes		for (q = ++p; *q && (*q != '/'); q++)
36490267Sdes			if (isdigit(*q))
36590267Sdes				u->port = u->port * 10 + (*q - '0');
36690267Sdes			else {
36790267Sdes				/* invalid port */
36890267Sdes				_url_seterr(URL_BAD_PORT);
36990267Sdes				goto ouch;
37090267Sdes			}
37190267Sdes		p = q;
37290267Sdes	}
37337535Sdes
37437535Sdesnohost:
37590267Sdes	/* document */
37690267Sdes	if (!*p)
37790267Sdes		p = "/";
37867419Sdes
37990267Sdes	if (strcasecmp(u->scheme, SCHEME_HTTP) == 0 ||
38090267Sdes	    strcasecmp(u->scheme, SCHEME_HTTPS) == 0) {
38190267Sdes		const char hexnums[] = "0123456789abcdef";
38290267Sdes
38390267Sdes		/* percent-escape whitespace. */
38490267Sdes		if ((doc = malloc(strlen(p) * 3 + 1)) == NULL) {
38590267Sdes			_fetch_syserr();
38690267Sdes			goto ouch;
38790267Sdes		}
38890267Sdes		u->doc = doc;
38990267Sdes		while (*p != '\0') {
39090267Sdes			if (!isspace(*p)) {
39190267Sdes				*doc++ = *p++;
39290267Sdes			} else {
39390267Sdes				*doc++ = '%';
39490267Sdes				*doc++ = hexnums[((unsigned int)*p) >> 4];
39590267Sdes				*doc++ = hexnums[((unsigned int)*p) & 0xf];
39690267Sdes				p++;
39790267Sdes			}
39890267Sdes		}
39990267Sdes		*doc = '\0';
40090267Sdes	} else if ((u->doc = strdup(p)) == NULL) {
40190267Sdes		_fetch_syserr();
40290267Sdes		goto ouch;
40367419Sdes	}
40490267Sdes
40590267Sdes	DEBUG(fprintf(stderr,
40688769Sdes		  "scheme:   [%s]\n"
40788769Sdes		  "user:     [%s]\n"
40888769Sdes		  "password: [%s]\n"
40988769Sdes		  "host:     [%s]\n"
41088769Sdes		  "port:     [%d]\n"
41188769Sdes		  "document: [%s]\n",
41237535Sdes		  u->scheme, u->user, u->pwd,
41337535Sdes		  u->host, u->port, u->doc));
41437535Sdes
41590267Sdes	return (u);
41690267Sdes
41737535Sdesouch:
41890267Sdes	free(u);
41990267Sdes	return (NULL);
42037535Sdes}
42160376Sdes
42260376Sdes/*
42360376Sdes * Free a URL
42460376Sdes */
42560376Sdesvoid
42660376SdesfetchFreeURL(struct url *u)
42760376Sdes{
42890267Sdes	free(u->doc);
42990267Sdes	free(u);
43060376Sdes}
431