fetch.c revision 88769
137535Sdes/*-
237535Sdes * Copyright (c) 1998 Dag-Erling Co�dan Sm�rgrav
337535Sdes * All rights reserved.
437535Sdes *
537535Sdes * Redistribution and use in source and binary forms, with or without
637535Sdes * modification, are permitted provided that the following conditions
737535Sdes * are met:
837535Sdes * 1. Redistributions of source code must retain the above copyright
937535Sdes *    notice, this list of conditions and the following disclaimer
1037535Sdes *    in this position and unchanged.
1137535Sdes * 2. Redistributions in binary form must reproduce the above copyright
1237535Sdes *    notice, this list of conditions and the following disclaimer in the
1337535Sdes *    documentation and/or other materials provided with the distribution.
1437535Sdes * 3. The name of the author may not be used to endorse or promote products
1537535Sdes *    derived from this software without specific prior written permission
1637535Sdes *
1737535Sdes * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
1837535Sdes * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
1937535Sdes * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
2037535Sdes * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
2137535Sdes * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
2237535Sdes * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
2337535Sdes * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
2437535Sdes * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
2537535Sdes * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
2637535Sdes * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
2737535Sdes */
2837535Sdes
2984203Sdillon#include <sys/cdefs.h>
3084203Sdillon__FBSDID("$FreeBSD: head/lib/libfetch/fetch.c 88769 2002-01-01 14:48:09Z des $");
3184203Sdillon
3237535Sdes#include <sys/param.h>
3340975Sdes#include <sys/errno.h>
3437535Sdes
3537535Sdes#include <ctype.h>
3637535Sdes#include <stdio.h>
3737535Sdes#include <stdlib.h>
3837535Sdes#include <string.h>
3937535Sdes
4037535Sdes#include "fetch.h"
4140975Sdes#include "common.h"
4237535Sdes
4377238Sdesauth_t	 fetchAuthMethod;
4460924Sdesint	 fetchLastErrCode;
4560924Sdeschar	 fetchLastErrString[MAXERRSTRING];
4660924Sdesint	 fetchTimeout;
4763334Sdesint	 fetchRestartCalls = 1;
4887560Sdesint	 fetchDebug;
4941862Sdes
5041862Sdes
5140975Sdes/*** Local data **************************************************************/
5237535Sdes
5340939Sdes/*
5440975Sdes * Error messages for parser errors
5540975Sdes */
5640975Sdes#define URL_MALFORMED		1
5740975Sdes#define URL_BAD_SCHEME		2
5840975Sdes#define URL_BAD_PORT		3
5940975Sdesstatic struct fetcherr _url_errlist[] = {
6040975Sdes    { URL_MALFORMED,	FETCH_URL,	"Malformed URL" },
6140975Sdes    { URL_BAD_SCHEME,	FETCH_URL,	"Invalid URL scheme" },
6240975Sdes    { URL_BAD_PORT,	FETCH_URL,	"Invalid server port" },
6340975Sdes    { -1,		FETCH_UNKNOWN,	"Unknown parser error" }
6440975Sdes};
6540975Sdes
6640975Sdes
6740975Sdes/*** Public API **************************************************************/
6840975Sdes
6940975Sdes/*
7040939Sdes * Select the appropriate protocol for the URL scheme, and return a
7140939Sdes * read-only stream connected to the document referenced by the URL.
7263340Sdes * Also fill out the struct url_stat.
7340939Sdes */
7438394SdesFILE *
7575891SarchiefetchXGet(struct url *URL, struct url_stat *us, const char *flags)
7638394Sdes{
7760587Sume    int direct;
7860587Sume
7967892Sdes    direct = CHECK_FLAG('d');
8067042Sdes    if (strcasecmp(URL->scheme, SCHEME_FILE) == 0)
8163340Sdes	return fetchXGetFile(URL, us, flags);
8267042Sdes    else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0)
8363340Sdes	return fetchXGetHTTP(URL, us, flags);
8467042Sdes    else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0) {
8563340Sdes	return fetchXGetFTP(URL, us, flags);
8660587Sume    } else {
8740975Sdes	_url_seterr(URL_BAD_SCHEME);
8840975Sdes	return NULL;
8940975Sdes    }
9038394Sdes}
9138394Sdes
9240939Sdes/*
9340939Sdes * Select the appropriate protocol for the URL scheme, and return a
9463340Sdes * read-only stream connected to the document referenced by the URL.
9563340Sdes */
9663340SdesFILE *
9775891SarchiefetchGet(struct url *URL, const char *flags)
9863340Sdes{
9963340Sdes    return fetchXGet(URL, NULL, flags);
10063340Sdes}
10163340Sdes
10263340Sdes/*
10363340Sdes * Select the appropriate protocol for the URL scheme, and return a
10440939Sdes * write-only stream connected to the document referenced by the URL.
10540939Sdes */
10638394SdesFILE *
10775891SarchiefetchPut(struct url *URL, const char *flags)
10838394Sdes{
10960587Sume    int direct;
11060587Sume
11167892Sdes    direct = CHECK_FLAG('d');
11267042Sdes    if (strcasecmp(URL->scheme, SCHEME_FILE) == 0)
11338394Sdes	return fetchPutFile(URL, flags);
11467042Sdes    else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0)
11538394Sdes	return fetchPutHTTP(URL, flags);
11667042Sdes    else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0) {
11738394Sdes	return fetchPutFTP(URL, flags);
11860587Sume    } else {
11940975Sdes	_url_seterr(URL_BAD_SCHEME);
12040975Sdes	return NULL;
12140975Sdes    }
12238394Sdes}
12338394Sdes
12440939Sdes/*
12540975Sdes * Select the appropriate protocol for the URL scheme, and return the
12640975Sdes * size of the document referenced by the URL if it exists.
12740975Sdes */
12840975Sdesint
12975891SarchiefetchStat(struct url *URL, struct url_stat *us, const char *flags)
13040975Sdes{
13160587Sume    int direct;
13260587Sume
13367892Sdes    direct = CHECK_FLAG('d');
13467042Sdes    if (strcasecmp(URL->scheme, SCHEME_FILE) == 0)
13540975Sdes	return fetchStatFile(URL, us, flags);
13667042Sdes    else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0)
13740975Sdes	return fetchStatHTTP(URL, us, flags);
13867042Sdes    else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0) {
13940975Sdes	return fetchStatFTP(URL, us, flags);
14060587Sume    } else {
14140975Sdes	_url_seterr(URL_BAD_SCHEME);
14240975Sdes	return -1;
14340975Sdes    }
14440975Sdes}
14540975Sdes
14640975Sdes/*
14741989Sdes * Select the appropriate protocol for the URL scheme, and return a
14841989Sdes * list of files in the directory pointed to by the URL.
14941989Sdes */
15041989Sdesstruct url_ent *
15175891SarchiefetchList(struct url *URL, const char *flags)
15241989Sdes{
15360587Sume    int direct;
15460587Sume
15567892Sdes    direct = CHECK_FLAG('d');
15667042Sdes    if (strcasecmp(URL->scheme, SCHEME_FILE) == 0)
15741989Sdes	return fetchListFile(URL, flags);
15867042Sdes    else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0)
15941989Sdes	return fetchListHTTP(URL, flags);
16067042Sdes    else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0) {
16141989Sdes	return fetchListFTP(URL, flags);
16260587Sume    } else {
16341989Sdes	_url_seterr(URL_BAD_SCHEME);
16441989Sdes	return NULL;
16541989Sdes    }
16641989Sdes}
16741989Sdes
16841989Sdes/*
16963340Sdes * Attempt to parse the given URL; if successful, call fetchXGet().
17040939Sdes */
17137535SdesFILE *
17275891SarchiefetchXGetURL(const char *URL, struct url_stat *us, const char *flags)
17337535Sdes{
17440975Sdes    struct url *u;
17537535Sdes    FILE *f;
17638394Sdes
17737535Sdes    if ((u = fetchParseURL(URL)) == NULL)
17837535Sdes	return NULL;
17937535Sdes
18063340Sdes    f = fetchXGet(u, us, flags);
18138394Sdes
18260376Sdes    fetchFreeURL(u);
18337535Sdes    return f;
18437535Sdes}
18537535Sdes
18663340Sdes/*
18763340Sdes * Attempt to parse the given URL; if successful, call fetchGet().
18863340Sdes */
18963340SdesFILE *
19075891SarchiefetchGetURL(const char *URL, const char *flags)
19163340Sdes{
19263340Sdes    return fetchXGetURL(URL, NULL, flags);
19363340Sdes}
19437535Sdes
19540939Sdes/*
19640939Sdes * Attempt to parse the given URL; if successful, call fetchPut().
19740939Sdes */
19837535SdesFILE *
19975891SarchiefetchPutURL(const char *URL, const char *flags)
20037535Sdes{
20140975Sdes    struct url *u;
20237535Sdes    FILE *f;
20337535Sdes
20437535Sdes    if ((u = fetchParseURL(URL)) == NULL)
20537535Sdes	return NULL;
20637535Sdes
20738394Sdes    f = fetchPut(u, flags);
20838394Sdes
20960376Sdes    fetchFreeURL(u);
21037535Sdes    return f;
21137535Sdes}
21237535Sdes
21337535Sdes/*
21440975Sdes * Attempt to parse the given URL; if successful, call fetchStat().
21540975Sdes */
21640975Sdesint
21775891SarchiefetchStatURL(const char *URL, struct url_stat *us, const char *flags)
21840975Sdes{
21940975Sdes    struct url *u;
22040975Sdes    int s;
22140975Sdes
22240975Sdes    if ((u = fetchParseURL(URL)) == NULL)
22340975Sdes	return -1;
22440975Sdes
22540975Sdes    s = fetchStat(u, us, flags);
22640975Sdes
22760376Sdes    fetchFreeURL(u);
22840975Sdes    return s;
22940975Sdes}
23040975Sdes
23140975Sdes/*
23241989Sdes * Attempt to parse the given URL; if successful, call fetchList().
23341989Sdes */
23441989Sdesstruct url_ent *
23575891SarchiefetchListURL(const char *URL, const char *flags)
23641989Sdes{
23741989Sdes    struct url *u;
23841989Sdes    struct url_ent *ue;
23941989Sdes
24041989Sdes    if ((u = fetchParseURL(URL)) == NULL)
24141989Sdes	return NULL;
24241989Sdes
24341989Sdes    ue = fetchList(u, flags);
24441989Sdes
24560376Sdes    fetchFreeURL(u);
24641989Sdes    return ue;
24741989Sdes}
24841989Sdes
24941989Sdes/*
25060927Sdes * Make a URL
25160927Sdes */
25260927Sdesstruct url *
25375891SarchiefetchMakeURL(const char *scheme, const char *host, int port, const char *doc,
25475891Sarchie    const char *user, const char *pwd)
25560927Sdes{
25660927Sdes    struct url *u;
25760927Sdes
25860927Sdes    if (!scheme || (!host && !doc)) {
25960927Sdes	_url_seterr(URL_MALFORMED);
26060927Sdes	return NULL;
26160927Sdes    }
26260927Sdes
26360927Sdes    if (port < 0 || port > 65535) {
26460927Sdes	_url_seterr(URL_BAD_PORT);
26560927Sdes	return NULL;
26660927Sdes    }
26760927Sdes
26860927Sdes    /* allocate struct url */
26960927Sdes    if ((u = calloc(1, sizeof *u)) == NULL) {
27060927Sdes	_fetch_syserr();
27160927Sdes	return NULL;
27260927Sdes    }
27360927Sdes
27460927Sdes    if ((u->doc = strdup(doc ? doc : "/")) == NULL) {
27560927Sdes	_fetch_syserr();
27660927Sdes	free(u);
27760927Sdes	return NULL;
27860927Sdes    }
27960927Sdes
28060927Sdes#define seturl(x) snprintf(u->x, sizeof u->x, "%s", x)
28160927Sdes    seturl(scheme);
28260927Sdes    seturl(host);
28360927Sdes    seturl(user);
28460927Sdes    seturl(pwd);
28560927Sdes#undef seturl
28660927Sdes    u->port = port;
28760927Sdes
28860927Sdes    return u;
28960927Sdes}
29060927Sdes
29160927Sdes/*
29237535Sdes * Split an URL into components. URL syntax is:
29367042Sdes * [method:/][/[user[:pwd]@]host[:port]/][document]
29437535Sdes * This almost, but not quite, RFC1738 URL syntax.
29537535Sdes */
29640975Sdesstruct url *
29775891SarchiefetchParseURL(const char *URL)
29837535Sdes{
29975891Sarchie    char *doc;
30075891Sarchie    const char *p, *q;
30140975Sdes    struct url *u;
30237535Sdes    int i;
30337535Sdes
30440975Sdes    /* allocate struct url */
30560190Sdes    if ((u = calloc(1, sizeof *u)) == NULL) {
30640975Sdes	_fetch_syserr();
30737535Sdes	return NULL;
30840975Sdes    }
30937535Sdes
31037535Sdes    /* scheme name */
31167042Sdes    if ((p = strstr(URL, ":/"))) {
31279834Sdes	snprintf(u->scheme, URL_SCHEMELEN+1, "%.*s", (int)(p - URL), URL);
31367042Sdes	URL = ++p;
31467042Sdes	/*
31567042Sdes	 * Only one slash: no host, leave slash as part of document
31667042Sdes	 * Two slashes: host follows, strip slashes
31767042Sdes	 */
31867042Sdes	if (URL[1] == '/')
31967042Sdes	    URL = (p += 2);
32069975Sdes    } else {
32169975Sdes	p = URL;
32240975Sdes    }
32379834Sdes    if (!*URL || *URL == '/' || *URL == '.' ||
32480520Sse	(u->scheme[0] == '\0' &&
32580520Sse    	    strchr(URL, '/') == NULL && strchr(URL, ':') == NULL))
32637535Sdes	goto nohost;
32737535Sdes
32837535Sdes    p = strpbrk(URL, "/@");
32941862Sdes    if (p && *p == '@') {
33037535Sdes	/* username */
33137535Sdes	for (q = URL, i = 0; (*q != ':') && (*q != '@'); q++)
33237535Sdes	    if (i < URL_USERLEN)
33337535Sdes		u->user[i++] = *q;
33437535Sdes
33537535Sdes	/* password */
33637535Sdes	if (*q == ':')
33737535Sdes	    for (q++, i = 0; (*q != ':') && (*q != '@'); q++)
33837535Sdes		if (i < URL_PWDLEN)
33937535Sdes		    u->pwd[i++] = *q;
34037535Sdes
34137535Sdes	p++;
34279423Sdes    } else {
34379423Sdes	p = URL;
34479423Sdes    }
34579423Sdes
34637535Sdes    /* hostname */
34760737Sume#ifdef INET6
34860737Sume    if (*p == '[' && (q = strchr(p + 1, ']')) != NULL &&
34960737Sume	(*++q == '\0' || *q == '/' || *q == ':')) {
35060737Sume	if ((i = q - p - 2) > MAXHOSTNAMELEN)
35160737Sume	    i = MAXHOSTNAMELEN;
35260737Sume	strncpy(u->host, ++p, i);
35360737Sume	p = q;
35460737Sume    } else
35560737Sume#endif
35660737Sume	for (i = 0; *p && (*p != '/') && (*p != ':'); p++)
35760737Sume	    if (i < MAXHOSTNAMELEN)
35860737Sume		u->host[i++] = *p;
35937535Sdes
36037535Sdes    /* port */
36137535Sdes    if (*p == ':') {
36237535Sdes	for (q = ++p; *q && (*q != '/'); q++)
36337535Sdes	    if (isdigit(*q))
36437535Sdes		u->port = u->port * 10 + (*q - '0');
36540975Sdes	    else {
36640975Sdes		/* invalid port */
36740975Sdes		_url_seterr(URL_BAD_PORT);
36840975Sdes		goto ouch;
36940975Sdes	    }
37037535Sdes	while (*p && (*p != '/'))
37137535Sdes	    p++;
37237535Sdes    }
37337535Sdes
37437535Sdesnohost:
37537535Sdes    /* document */
37660376Sdes    if (!*p)
37760376Sdes	p = "/";
37860376Sdes
37969465Sdes    if (strcasecmp(u->scheme, SCHEME_HTTP) == 0 ||
38069465Sdes	strcasecmp(u->scheme, SCHEME_HTTPS) == 0) {
38167419Sdes	const char hexnums[] = "0123456789abcdef";
38267419Sdes
38369465Sdes	/* percent-escape whitespace. */
38469465Sdes	if ((doc = malloc(strlen(p) * 3 + 1)) == NULL) {
38567419Sdes	    _fetch_syserr();
38667419Sdes	    goto ouch;
38767419Sdes	}
38869465Sdes	u->doc = doc;
38967419Sdes	while (*p != '\0') {
39067419Sdes	    if (!isspace(*p)) {
39167419Sdes		*doc++ = *p++;
39267419Sdes            } else {
39367419Sdes		*doc++ = '%';
39467419Sdes		*doc++ = hexnums[((unsigned int)*p) >> 4];
39567419Sdes		*doc++ = hexnums[((unsigned int)*p) & 0xf];
39667419Sdes		p++;
39767419Sdes            }
39867419Sdes	}
39967419Sdes	*doc = '\0';
40067419Sdes    } else if ((u->doc = strdup(p)) == NULL) {
40160376Sdes	_fetch_syserr();
40260376Sdes	goto ouch;
40340939Sdes    }
40437535Sdes
40537535Sdes    DEBUG(fprintf(stderr,
40688769Sdes		  "scheme:   [%s]\n"
40788769Sdes		  "user:     [%s]\n"
40888769Sdes		  "password: [%s]\n"
40988769Sdes		  "host:     [%s]\n"
41088769Sdes		  "port:     [%d]\n"
41188769Sdes		  "document: [%s]\n",
41237535Sdes		  u->scheme, u->user, u->pwd,
41337535Sdes		  u->host, u->port, u->doc));
41437535Sdes
41537535Sdes    return u;
41637535Sdes
41737535Sdesouch:
41837535Sdes    free(u);
41937535Sdes    return NULL;
42037535Sdes}
42160376Sdes
42260376Sdes/*
42360376Sdes * Free a URL
42460376Sdes */
42560376Sdesvoid
42660376SdesfetchFreeURL(struct url *u)
42760376Sdes{
42860376Sdes    free(u->doc);
42960376Sdes    free(u);
43060376Sdes}
431