fetch.c revision 90264
11541Srgrimes/*-
21541Srgrimes * Copyright (c) 1998 Dag-Erling Co�dan Sm�rgrav
31541Srgrimes * All rights reserved.
41541Srgrimes *
51541Srgrimes * Redistribution and use in source and binary forms, with or without
61541Srgrimes * modification, are permitted provided that the following conditions
71541Srgrimes * are met:
81541Srgrimes * 1. Redistributions of source code must retain the above copyright
91541Srgrimes *    notice, this list of conditions and the following disclaimer
101541Srgrimes *    in this position and unchanged.
111541Srgrimes * 2. Redistributions in binary form must reproduce the above copyright
121541Srgrimes *    notice, this list of conditions and the following disclaimer in the
131541Srgrimes *    documentation and/or other materials provided with the distribution.
141541Srgrimes * 3. The name of the author may not be used to endorse or promote products
151541Srgrimes *    derived from this software without specific prior written permission
161541Srgrimes *
171541Srgrimes * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
181541Srgrimes * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
191541Srgrimes * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
201541Srgrimes * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
211541Srgrimes * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
221541Srgrimes * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
231541Srgrimes * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
241541Srgrimes * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
251541Srgrimes * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
261541Srgrimes * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
271541Srgrimes */
281541Srgrimes
291541Srgrimes#include <sys/cdefs.h>
301541Srgrimes__FBSDID("$FreeBSD: head/lib/libfetch/fetch.c 90264 2002-02-05 21:32:16Z des $");
311541Srgrimes
321541Srgrimes#include <sys/param.h>
3314505Shsu#include <sys/errno.h>
3450477Speter
351541Srgrimes#include <ctype.h>
361541Srgrimes#include <stdio.h>
372165Spaul#include <stdlib.h>
382165Spaul#include <string.h>
392165Spaul
4015492Sbde#include "fetch.h"
411541Srgrimes#include "common.h"
421541Srgrimes
431541Srgrimesauth_t	 fetchAuthMethod;
441541Srgrimesint	 fetchLastErrCode;
451541Srgrimeschar	 fetchLastErrString[MAXERRSTRING];
461541Srgrimesint	 fetchTimeout;
471541Srgrimesint	 fetchRestartCalls = 1;
481541Srgrimesint	 fetchDebug;
4936079Swollman
5036079Swollman
5161837Salfred/*** Local data **************************************************************/
5261837Salfred
531541Srgrimes/*
5436079Swollman * Error messages for parser errors
551541Srgrimes */
561541Srgrimes#define URL_MALFORMED		1
571541Srgrimes#define URL_BAD_SCHEME		2
581541Srgrimes#define URL_BAD_PORT		3
591541Srgrimesstatic struct fetcherr _url_errlist[] = {
601541Srgrimes    { URL_MALFORMED,	FETCH_URL,	"Malformed URL" },
611541Srgrimes    { URL_BAD_SCHEME,	FETCH_URL,	"Invalid URL scheme" },
6213765Smpp    { URL_BAD_PORT,	FETCH_URL,	"Invalid server port" },
631541Srgrimes    { -1,		FETCH_UNKNOWN,	"Unknown parser error" }
641541Srgrimes};
651541Srgrimes
661541Srgrimes
671541Srgrimes/*** Public API **************************************************************/
681541Srgrimes
691541Srgrimes/*
701541Srgrimes * Select the appropriate protocol for the URL scheme, and return a
711541Srgrimes * read-only stream connected to the document referenced by the URL.
721541Srgrimes * Also fill out the struct url_stat.
7360938Sjake */
7460938SjakeFILE *
7560938SjakefetchXGet(struct url *URL, struct url_stat *us, const char *flags)
7614547Sdg{
7718787Spst    int direct;
7818787Spst
791541Srgrimes    direct = CHECK_FLAG('d');
801541Srgrimes    if (strcasecmp(URL->scheme, SCHEME_FILE) == 0)
811541Srgrimes	return fetchXGetFile(URL, us, flags);
8241087Struckman    else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0)
8341087Struckman	return fetchXGetHTTP(URL, us, flags);
841541Srgrimes    else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0) {
8560938Sjake	return fetchXGetFTP(URL, us, flags);
861541Srgrimes    } else {
871541Srgrimes	_url_seterr(URL_BAD_SCHEME);
881541Srgrimes	return NULL;
891541Srgrimes    }
901541Srgrimes}
911541Srgrimes
921541Srgrimes/*
931541Srgrimes * Select the appropriate protocol for the URL scheme, and return a
941541Srgrimes * read-only stream connected to the document referenced by the URL.
951541Srgrimes */
961541SrgrimesFILE *
971541SrgrimesfetchGet(struct url *URL, const char *flags)
981541Srgrimes{
991541Srgrimes    return fetchXGet(URL, NULL, flags);
1001541Srgrimes}
1011541Srgrimes
1021541Srgrimes/*
1031541Srgrimes * Select the appropriate protocol for the URL scheme, and return a
1041541Srgrimes * write-only stream connected to the document referenced by the URL.
1051541Srgrimes */
10636527SpeterFILE *
1071541SrgrimesfetchPut(struct url *URL, const char *flags)
10855943Sjasone{
10959288Sjlemon    int direct;
1101541Srgrimes
11138482Swollman    direct = CHECK_FLAG('d');
11238482Swollman    if (strcasecmp(URL->scheme, SCHEME_FILE) == 0)
11351381Sgreen	return fetchPutFile(URL, flags);
11438482Swollman    else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0)
11536079Swollman	return fetchPutHTTP(URL, flags);
11643458Sbde    else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0) {
11761837Salfred	return fetchPutFTP(URL, flags);
11861837Salfred    } else {
11961837Salfred	_url_seterr(URL_BAD_SCHEME);
12061837Salfred	return NULL;
12161837Salfred    }
1221541Srgrimes}
1231541Srgrimes
1241541Srgrimes/*
1251541Srgrimes * Select the appropriate protocol for the URL scheme, and return the
1261541Srgrimes * size of the document referenced by the URL if it exists.
12714547Sdg */
12814547Sdgint
12914547SdgfetchStat(struct url *URL, struct url_stat *us, const char *flags)
13014547Sdg{
13114547Sdg    int direct;
13214547Sdg
13314547Sdg    direct = CHECK_FLAG('d');
1341541Srgrimes    if (strcasecmp(URL->scheme, SCHEME_FILE) == 0)
13514547Sdg	return fetchStatFile(URL, us, flags);
13614547Sdg    else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0)
13714547Sdg	return fetchStatHTTP(URL, us, flags);
1381541Srgrimes    else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0) {
13914547Sdg	return fetchStatFTP(URL, us, flags);
14014547Sdg    } else {
14143196Sfenner	_url_seterr(URL_BAD_SCHEME);
1421541Srgrimes	return -1;
14336079Swollman    }
14436079Swollman}
14536079Swollman
14636079Swollman/*
14736079Swollman * Select the appropriate protocol for the URL scheme, and return a
14836079Swollman * list of files in the directory pointed to by the URL.
14936079Swollman */
15036079Swollmanstruct url_ent *
15136079SwollmanfetchList(struct url *URL, const char *flags)
15236079Swollman{
15336079Swollman    int direct;
15436079Swollman
15536079Swollman    direct = CHECK_FLAG('d');
15636079Swollman    if (strcasecmp(URL->scheme, SCHEME_FILE) == 0)
15736079Swollman	return fetchListFile(URL, flags);
15836079Swollman    else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0)
15936079Swollman	return fetchListHTTP(URL, flags);
16036079Swollman    else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0) {
16136079Swollman	return fetchListFTP(URL, flags);
16236079Swollman    } else {
16336079Swollman	_url_seterr(URL_BAD_SCHEME);
16436079Swollman	return NULL;
16536079Swollman    }
16636079Swollman}
16736079Swollman
16836079Swollman/*
16936079Swollman * Attempt to parse the given URL; if successful, call fetchXGet().
17036079Swollman */
17136079SwollmanFILE *
17236079SwollmanfetchXGetURL(const char *URL, struct url_stat *us, const char *flags)
17336079Swollman{
17414547Sdg    struct url *u;
1751541Srgrimes    FILE *f;
1761541Srgrimes
1771541Srgrimes    if ((u = fetchParseURL(URL)) == NULL)
1781541Srgrimes	return NULL;
1791541Srgrimes
18036527Speter    f = fetchXGet(u, us, flags);
18136527Speter
18255943Sjasone    fetchFreeURL(u);
18359288Sjlemon    return f;
18436527Speter}
18536527Speter
1861541Srgrimes/*
1871541Srgrimes * Attempt to parse the given URL; if successful, call fetchGet().
1881541Srgrimes */
1891541SrgrimesFILE *
1901541SrgrimesfetchGetURL(const char *URL, const char *flags)
1911541Srgrimes{
1921541Srgrimes    return fetchXGetURL(URL, NULL, flags);
1931541Srgrimes}
1941541Srgrimes
1951541Srgrimes/*
1961541Srgrimes * Attempt to parse the given URL; if successful, call fetchPut().
1971541Srgrimes */
1981541SrgrimesFILE *
1991541SrgrimesfetchPutURL(const char *URL, const char *flags)
2001541Srgrimes{
2011541Srgrimes    struct url *u;
2021541Srgrimes    FILE *f;
20314547Sdg
2041541Srgrimes    if ((u = fetchParseURL(URL)) == NULL)
2051541Srgrimes	return NULL;
2061541Srgrimes
2073304Sphk    f = fetchPut(u, flags);
2081541Srgrimes
2093304Sphk    fetchFreeURL(u);
2101541Srgrimes    return f;
2111541Srgrimes}
2121541Srgrimes
2131541Srgrimes/*
2141541Srgrimes * Attempt to parse the given URL; if successful, call fetchStat().
2151541Srgrimes */
2161541Srgrimesint
2171541SrgrimesfetchStatURL(const char *URL, struct url_stat *us, const char *flags)
2181541Srgrimes{
2191541Srgrimes    struct url *u;
2201541Srgrimes    int s;
2211541Srgrimes
2221541Srgrimes    if ((u = fetchParseURL(URL)) == NULL)
2231541Srgrimes	return -1;
2241541Srgrimes
2251541Srgrimes    s = fetchStat(u, us, flags);
2261541Srgrimes
2271541Srgrimes    fetchFreeURL(u);
2281541Srgrimes    return s;
2291541Srgrimes}
2301541Srgrimes
2311541Srgrimes/*
2321541Srgrimes * Attempt to parse the given URL; if successful, call fetchList().
2331541Srgrimes */
2341541Srgrimesstruct url_ent *
2351541SrgrimesfetchListURL(const char *URL, const char *flags)
2361541Srgrimes{
2371541Srgrimes    struct url *u;
2381541Srgrimes    struct url_ent *ue;
2391541Srgrimes
2401541Srgrimes    if ((u = fetchParseURL(URL)) == NULL)
2411541Srgrimes	return NULL;
2421541Srgrimes
2431541Srgrimes    ue = fetchList(u, flags);
2441541Srgrimes
2451541Srgrimes    fetchFreeURL(u);
2461541Srgrimes    return ue;
24736527Speter}
24836527Speter
24936527Speter/*
25036527Speter * Make a URL
2511541Srgrimes */
25236527Speterstruct url *
25336527SpeterfetchMakeURL(const char *scheme, const char *host, int port, const char *doc,
25436527Speter    const char *user, const char *pwd)
25536527Speter{
2561541Srgrimes    struct url *u;
25755205Speter
25831927Sbde    if (!scheme || (!host && !doc)) {
25938482Swollman	_url_seterr(URL_MALFORMED);
26038482Swollman	return NULL;
26138482Swollman    }
26238482Swollman
26338482Swollman    if (port < 0 || port > 65535) {
26438482Swollman	_url_seterr(URL_BAD_PORT);
26538482Swollman	return NULL;
26638482Swollman    }
26738482Swollman
26838482Swollman    /* allocate struct url */
26938482Swollman    if ((u = calloc(1, sizeof *u)) == NULL) {
27038482Swollman	_fetch_syserr();
27138482Swollman	return NULL;
27238482Swollman    }
27340931Sdg
27460938Sjake    if ((u->doc = strdup(doc ? doc : "/")) == NULL) {
27540931Sdg	_fetch_syserr();
27640931Sdg	free(u);
27740931Sdg	return NULL;
27840931Sdg    }
27961837Salfred
28061837Salfred#define seturl(x) snprintf(u->x, sizeof u->x, "%s", x)
28161837Salfred    seturl(scheme);
28261837Salfred    seturl(host);
28361837Salfred    seturl(user);
28461837Salfred    seturl(pwd);
28561837Salfred#undef seturl
28661837Salfred    u->port = port;
28761837Salfred
28861837Salfred    return u;
28961837Salfred}
29031927Sbde
29131927Sbde/*
29231927Sbde * Split an URL into components. URL syntax is:
29361837Salfred * [method:/][/[user[:pwd]@]host[:port]/][document]
29431927Sbde * This almost, but not quite, RFC1738 URL syntax.
29531927Sbde */
29636079Swollmanstruct url *
2972112SwollmanfetchParseURL(const char *URL)
29836079Swollman{
29936079Swollman    char *doc;
30014505Shsu    const char *p, *q;
30132995Sbde    struct url *u;
30215492Sbde    int i;
30315492Sbde
30415492Sbde    /* allocate struct url */
30515492Sbde    if ((u = calloc(1, sizeof *u)) == NULL) {
30632995Sbde	_fetch_syserr();
30732995Sbde	return NULL;
30815492Sbde    }
3091541Srgrimes
3101541Srgrimes    /* scheme name */
3111541Srgrimes    if ((p = strstr(URL, ":/"))) {
31245311Sdt	snprintf(u->scheme, URL_SCHEMELEN+1, "%.*s", (int)(p - URL), URL);
31351418Sgreen	URL = ++p;
31445311Sdt	/*
31551418Sgreen	 * Only one slash: no host, leave slash as part of document
31643512Snewton	 * Two slashes: host follows, strip slashes
31736735Sdfr	 */
31814505Shsu	if (URL[1] == '/')
31929350Speter	    URL = (p += 2);
32029350Speter    } else {
32152984Speter	p = URL;
3223304Sphk    }
3233304Sphk    if (!*URL || *URL == '/' || *URL == '.' ||
3243304Sphk	(u->scheme[0] == '\0' &&
3253304Sphk    	    strchr(URL, '/') == NULL && strchr(URL, ':') == NULL))
32628270Swollman	goto nohost;
32714505Shsu
32814505Shsu    p = strpbrk(URL, "/@");
32928270Swollman    if (p && *p == '@') {
33014505Shsu	/* username */
33114505Shsu	for (q = URL, i = 0; (*q != ':') && (*q != '@'); q++)
33214505Shsu	    if (i < URL_USERLEN)
33314505Shsu		u->user[i++] = *q;
33414505Shsu
33514505Shsu	/* password */
33614505Shsu	if (*q == ':')
33714505Shsu	    for (q++, i = 0; (*q != ':') && (*q != '@'); q++)
33819670Sbde		if (i < URL_PWDLEN)
33919670Sbde		    u->pwd[i++] = *q;
34014505Shsu
34114505Shsu	p++;
34214505Shsu    } else {
34314505Shsu	p = URL;
34452070Sgreen    }
34552070Sgreen
34652070Sgreen    /* hostname */
34736079Swollman#ifdef INET6
34814505Shsu    if (*p == '[' && (q = strchr(p + 1, ']')) != NULL &&
34914505Shsu	(*++q == '\0' || *q == '/' || *q == ':')) {
35014505Shsu	if ((i = q - p - 2) > MAXHOSTNAMELEN)
35128270Swollman	    i = MAXHOSTNAMELEN;
35236079Swollman	strncpy(u->host, ++p, i);
35328270Swollman	p = q;
35414505Shsu    } else
35514505Shsu#endif
35614505Shsu	for (i = 0; *p && (*p != '/') && (*p != ':'); p++)
35728270Swollman	    if (i < MAXHOSTNAMELEN)
35814505Shsu		u->host[i++] = *p;
35914505Shsu
36014505Shsu    /* port */
36136079Swollman    if (*p == ':') {
36214505Shsu	for (q = ++p; *q && (*q != '/'); q++)
36314505Shsu	    if (isdigit(*q))
36438482Swollman		u->port = u->port * 10 + (*q - '0');
36514505Shsu	    else {
36614505Shsu		/* invalid port */
36714505Shsu		_url_seterr(URL_BAD_PORT);
36814505Shsu		goto ouch;
36914505Shsu	    }
37025201Swollman	p = q;
37114505Shsu    }
37218787Spst
37318787Spstnohost:
37427531Sfenner    /* document */
37551381Sgreen    if (!*p)
37651381Sgreen	p = "/";
37738482Swollman
37838482Swollman    if (strcasecmp(u->scheme, SCHEME_HTTP) == 0 ||
37938482Swollman	strcasecmp(u->scheme, SCHEME_HTTPS) == 0) {
38053541Sshin	const char hexnums[] = "0123456789abcdef";
38153541Sshin
38253541Sshin	/* percent-escape whitespace. */
38353541Sshin	if ((doc = malloc(strlen(p) * 3 + 1)) == NULL) {
38453541Sshin	    _fetch_syserr();
38553541Sshin	    goto ouch;
38629350Speter	}
38729350Speter	u->doc = doc;
38828270Swollman	while (*p != '\0') {
38928270Swollman	    if (!isspace(*p)) {
39028270Swollman		*doc++ = *p++;
39114505Shsu            } else {
39214505Shsu		*doc++ = '%';
39328270Swollman		*doc++ = hexnums[((unsigned int)*p) >> 4];
39428270Swollman		*doc++ = hexnums[((unsigned int)*p) & 0xf];
39528270Swollman		p++;
39638482Swollman            }
39714505Shsu	}
39836079Swollman	*doc = '\0';
39914505Shsu    } else if ((u->doc = strdup(p)) == NULL) {
40031927Sbde	_fetch_syserr();
40161837Salfred	goto ouch;
40261837Salfred    }
40361837Salfred
40461837Salfred    DEBUG(fprintf(stderr,
40561837Salfred		  "scheme:   [%s]\n"
40661837Salfred		  "user:     [%s]\n"
40765534Salfred		  "password: [%s]\n"
40861837Salfred		  "host:     [%s]\n"
40961837Salfred		  "port:     [%d]\n"
41055205Speter		  "document: [%s]\n",
4112165Spaul		  u->scheme, u->user, u->pwd,
41214505Shsu		  u->host, u->port, u->doc));
413
414    return u;
415
416ouch:
417    free(u);
418    return NULL;
419}
420
421/*
422 * Free a URL
423 */
424void
425fetchFreeURL(struct url *u)
426{
427    free(u->doc);
428    free(u);
429}
430