fetch.c revision 60737
137535Sdes/*-
237535Sdes * Copyright (c) 1998 Dag-Erling Co�dan Sm�rgrav
337535Sdes * All rights reserved.
437535Sdes *
537535Sdes * Redistribution and use in source and binary forms, with or without
637535Sdes * modification, are permitted provided that the following conditions
737535Sdes * are met:
837535Sdes * 1. Redistributions of source code must retain the above copyright
937535Sdes *    notice, this list of conditions and the following disclaimer
1037535Sdes *    in this position and unchanged.
1137535Sdes * 2. Redistributions in binary form must reproduce the above copyright
1237535Sdes *    notice, this list of conditions and the following disclaimer in the
1337535Sdes *    documentation and/or other materials provided with the distribution.
1437535Sdes * 3. The name of the author may not be used to endorse or promote products
1537535Sdes *    derived from this software without specific prior written permission
1637535Sdes *
1737535Sdes * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
1837535Sdes * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
1937535Sdes * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
2037535Sdes * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
2137535Sdes * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
2237535Sdes * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
2337535Sdes * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
2437535Sdes * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
2537535Sdes * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
2637535Sdes * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
2737535Sdes *
2850476Speter * $FreeBSD: head/lib/libfetch/fetch.c 60737 2000-05-20 18:23:51Z ume $
2937535Sdes */
3037535Sdes
3137535Sdes#include <sys/param.h>
3240975Sdes#include <sys/errno.h>
3337535Sdes
3437535Sdes#include <ctype.h>
3537535Sdes#include <stdio.h>
3637535Sdes#include <stdlib.h>
3737535Sdes#include <string.h>
3837535Sdes
3937535Sdes#include "fetch.h"
4040975Sdes#include "common.h"
4137535Sdes
4237535Sdes
4341862Sdesint fetchLastErrCode;
4455557Sdesint fetchTimeout;
4541862Sdes
4641862Sdes
4740975Sdes/*** Local data **************************************************************/
4837535Sdes
4940939Sdes/*
5040975Sdes * Error messages for parser errors
5140975Sdes */
5240975Sdes#define URL_MALFORMED		1
5340975Sdes#define URL_BAD_SCHEME		2
5440975Sdes#define URL_BAD_PORT		3
5540975Sdesstatic struct fetcherr _url_errlist[] = {
5640975Sdes    { URL_MALFORMED,	FETCH_URL,	"Malformed URL" },
5740975Sdes    { URL_BAD_SCHEME,	FETCH_URL,	"Invalid URL scheme" },
5840975Sdes    { URL_BAD_PORT,	FETCH_URL,	"Invalid server port" },
5940975Sdes    { -1,		FETCH_UNKNOWN,	"Unknown parser error" }
6040975Sdes};
6140975Sdes
6240975Sdes
6340975Sdes/*** Public API **************************************************************/
6440975Sdes
6540975Sdes/*
6640939Sdes * Select the appropriate protocol for the URL scheme, and return a
6740939Sdes * read-only stream connected to the document referenced by the URL.
6840939Sdes */
6938394SdesFILE *
7040975SdesfetchGet(struct url *URL, char *flags)
7138394Sdes{
7260587Sume    int direct;
7360587Sume
7460587Sume    direct = (flags && strchr(flags, 'd'));
7538394Sdes    if (strcasecmp(URL->scheme, "file") == 0)
7638394Sdes	return fetchGetFile(URL, flags);
7738394Sdes    else if (strcasecmp(URL->scheme, "http") == 0)
7838394Sdes	return fetchGetHTTP(URL, flags);
7960587Sume    else if (strcasecmp(URL->scheme, "ftp") == 0) {
8060587Sume	if (!direct &&
8160587Sume	    getenv("FTP_PROXY") == NULL && getenv("HTTP_PROXY") != NULL)
8260587Sume	    return fetchGetHTTP(URL, flags);
8338394Sdes	return fetchGetFTP(URL, flags);
8460587Sume    } else {
8540975Sdes	_url_seterr(URL_BAD_SCHEME);
8640975Sdes	return NULL;
8740975Sdes    }
8838394Sdes}
8938394Sdes
9040939Sdes/*
9140939Sdes * Select the appropriate protocol for the URL scheme, and return a
9240939Sdes * write-only stream connected to the document referenced by the URL.
9340939Sdes */
9438394SdesFILE *
9540975SdesfetchPut(struct url *URL, char *flags)
9638394Sdes{
9760587Sume    int direct;
9860587Sume
9960587Sume    direct = (flags && strchr(flags, 'd'));
10038394Sdes    if (strcasecmp(URL->scheme, "file") == 0)
10138394Sdes	return fetchPutFile(URL, flags);
10238394Sdes    else if (strcasecmp(URL->scheme, "http") == 0)
10338394Sdes	return fetchPutHTTP(URL, flags);
10460587Sume    else if (strcasecmp(URL->scheme, "ftp") == 0) {
10560587Sume	if (!direct &&
10660587Sume	    getenv("FTP_PROXY") == NULL && getenv("HTTP_PROXY") != NULL)
10760587Sume	    return fetchPutHTTP(URL, flags);
10838394Sdes	return fetchPutFTP(URL, flags);
10960587Sume    } else {
11040975Sdes	_url_seterr(URL_BAD_SCHEME);
11140975Sdes	return NULL;
11240975Sdes    }
11338394Sdes}
11438394Sdes
11540939Sdes/*
11640975Sdes * Select the appropriate protocol for the URL scheme, and return the
11740975Sdes * size of the document referenced by the URL if it exists.
11840975Sdes */
11940975Sdesint
12040975SdesfetchStat(struct url *URL, struct url_stat *us, char *flags)
12140975Sdes{
12260587Sume    int direct;
12360587Sume
12460587Sume    direct = (flags && strchr(flags, 'd'));
12540975Sdes    if (strcasecmp(URL->scheme, "file") == 0)
12640975Sdes	return fetchStatFile(URL, us, flags);
12740975Sdes    else if (strcasecmp(URL->scheme, "http") == 0)
12840975Sdes	return fetchStatHTTP(URL, us, flags);
12960587Sume    else if (strcasecmp(URL->scheme, "ftp") == 0) {
13060587Sume	if (!direct &&
13160587Sume	    getenv("FTP_PROXY") == NULL && getenv("HTTP_PROXY") != NULL)
13260587Sume	    return fetchStatHTTP(URL, us, flags);
13340975Sdes	return fetchStatFTP(URL, us, flags);
13460587Sume    } else {
13540975Sdes	_url_seterr(URL_BAD_SCHEME);
13640975Sdes	return -1;
13740975Sdes    }
13840975Sdes}
13940975Sdes
14040975Sdes/*
14141989Sdes * Select the appropriate protocol for the URL scheme, and return a
14241989Sdes * list of files in the directory pointed to by the URL.
14341989Sdes */
14441989Sdesstruct url_ent *
14541989SdesfetchList(struct url *URL, char *flags)
14641989Sdes{
14760587Sume    int direct;
14860587Sume
14960587Sume    direct = (flags && strchr(flags, 'd'));
15041989Sdes    if (strcasecmp(URL->scheme, "file") == 0)
15141989Sdes	return fetchListFile(URL, flags);
15241989Sdes    else if (strcasecmp(URL->scheme, "http") == 0)
15341989Sdes	return fetchListHTTP(URL, flags);
15460587Sume    else if (strcasecmp(URL->scheme, "ftp") == 0) {
15560587Sume	if (!direct &&
15660587Sume	    getenv("FTP_PROXY") == NULL && getenv("HTTP_PROXY") != NULL)
15760587Sume	    return fetchListHTTP(URL, flags);
15841989Sdes	return fetchListFTP(URL, flags);
15960587Sume    } else {
16041989Sdes	_url_seterr(URL_BAD_SCHEME);
16141989Sdes	return NULL;
16241989Sdes    }
16341989Sdes}
16441989Sdes
16541989Sdes/*
16640939Sdes * Attempt to parse the given URL; if successful, call fetchGet().
16740939Sdes */
16837535SdesFILE *
16937535SdesfetchGetURL(char *URL, char *flags)
17037535Sdes{
17140975Sdes    struct url *u;
17237535Sdes    FILE *f;
17338394Sdes
17437535Sdes    if ((u = fetchParseURL(URL)) == NULL)
17537535Sdes	return NULL;
17637535Sdes
17738394Sdes    f = fetchGet(u, flags);
17838394Sdes
17960376Sdes    fetchFreeURL(u);
18037535Sdes    return f;
18137535Sdes}
18237535Sdes
18337535Sdes
18440939Sdes/*
18540939Sdes * Attempt to parse the given URL; if successful, call fetchPut().
18640939Sdes */
18737535SdesFILE *
18837535SdesfetchPutURL(char *URL, char *flags)
18937535Sdes{
19040975Sdes    struct url *u;
19137535Sdes    FILE *f;
19237535Sdes
19337535Sdes    if ((u = fetchParseURL(URL)) == NULL)
19437535Sdes	return NULL;
19537535Sdes
19638394Sdes    f = fetchPut(u, flags);
19738394Sdes
19860376Sdes    fetchFreeURL(u);
19937535Sdes    return f;
20037535Sdes}
20137535Sdes
20237535Sdes/*
20340975Sdes * Attempt to parse the given URL; if successful, call fetchStat().
20440975Sdes */
20540975Sdesint
20640975SdesfetchStatURL(char *URL, struct url_stat *us, char *flags)
20740975Sdes{
20840975Sdes    struct url *u;
20940975Sdes    int s;
21040975Sdes
21140975Sdes    if ((u = fetchParseURL(URL)) == NULL)
21240975Sdes	return -1;
21340975Sdes
21440975Sdes    s = fetchStat(u, us, flags);
21540975Sdes
21660376Sdes    fetchFreeURL(u);
21740975Sdes    return s;
21840975Sdes}
21940975Sdes
22040975Sdes/*
22141989Sdes * Attempt to parse the given URL; if successful, call fetchList().
22241989Sdes */
22341989Sdesstruct url_ent *
22441989SdesfetchListURL(char *URL, char *flags)
22541989Sdes{
22641989Sdes    struct url *u;
22741989Sdes    struct url_ent *ue;
22841989Sdes
22941989Sdes    if ((u = fetchParseURL(URL)) == NULL)
23041989Sdes	return NULL;
23141989Sdes
23241989Sdes    ue = fetchList(u, flags);
23341989Sdes
23460376Sdes    fetchFreeURL(u);
23541989Sdes    return ue;
23641989Sdes}
23741989Sdes
23841989Sdes/*
23937535Sdes * Split an URL into components. URL syntax is:
24037535Sdes * method:[//[user[:pwd]@]host[:port]]/[document]
24137535Sdes * This almost, but not quite, RFC1738 URL syntax.
24237535Sdes */
24340975Sdesstruct url *
24437535SdesfetchParseURL(char *URL)
24537535Sdes{
24637535Sdes    char *p, *q;
24740975Sdes    struct url *u;
24837535Sdes    int i;
24937535Sdes
25040975Sdes    /* allocate struct url */
25160190Sdes    if ((u = calloc(1, sizeof *u)) == NULL) {
25240975Sdes	errno = ENOMEM;
25340975Sdes	_fetch_syserr();
25437535Sdes	return NULL;
25540975Sdes    }
25637535Sdes
25737535Sdes    /* scheme name */
25837535Sdes    for (i = 0; *URL && (*URL != ':'); URL++)
25937535Sdes	if (i < URL_SCHEMELEN)
26037535Sdes	    u->scheme[i++] = *URL;
26140975Sdes    if (!URL[0] || (URL[1] != '/')) {
26240975Sdes	_url_seterr(URL_BAD_SCHEME);
26337535Sdes	goto ouch;
26440975Sdes    }
26537535Sdes    else URL++;
26637535Sdes    if (URL[1] != '/') {
26737535Sdes	p = URL;
26837535Sdes	goto nohost;
26937535Sdes    }
27037535Sdes    else URL += 2;
27137535Sdes
27237535Sdes    p = strpbrk(URL, "/@");
27341862Sdes    if (p && *p == '@') {
27437535Sdes	/* username */
27537535Sdes	for (q = URL, i = 0; (*q != ':') && (*q != '@'); q++)
27637535Sdes	    if (i < URL_USERLEN)
27737535Sdes		u->user[i++] = *q;
27837535Sdes
27937535Sdes	/* password */
28037535Sdes	if (*q == ':')
28137535Sdes	    for (q++, i = 0; (*q != ':') && (*q != '@'); q++)
28237535Sdes		if (i < URL_PWDLEN)
28337535Sdes		    u->pwd[i++] = *q;
28437535Sdes
28537535Sdes	p++;
28637535Sdes    } else p = URL;
28737535Sdes
28837535Sdes    /* hostname */
28960737Sume#ifdef INET6
29060737Sume    if (*p == '[' && (q = strchr(p + 1, ']')) != NULL &&
29160737Sume	(*++q == '\0' || *q == '/' || *q == ':')) {
29260737Sume	if ((i = q - p - 2) > MAXHOSTNAMELEN)
29360737Sume	    i = MAXHOSTNAMELEN;
29460737Sume	strncpy(u->host, ++p, i);
29560737Sume	p = q;
29660737Sume    } else
29760737Sume#endif
29860737Sume	for (i = 0; *p && (*p != '/') && (*p != ':'); p++)
29960737Sume	    if (i < MAXHOSTNAMELEN)
30060737Sume		u->host[i++] = *p;
30137535Sdes
30237535Sdes    /* port */
30337535Sdes    if (*p == ':') {
30437535Sdes	for (q = ++p; *q && (*q != '/'); q++)
30537535Sdes	    if (isdigit(*q))
30637535Sdes		u->port = u->port * 10 + (*q - '0');
30740975Sdes	    else {
30840975Sdes		/* invalid port */
30940975Sdes		_url_seterr(URL_BAD_PORT);
31040975Sdes		goto ouch;
31140975Sdes	    }
31237535Sdes	while (*p && (*p != '/'))
31337535Sdes	    p++;
31437535Sdes    }
31537535Sdes
31637535Sdesnohost:
31737535Sdes    /* document */
31860376Sdes    if (!*p)
31960376Sdes	p = "/";
32060376Sdes
32160376Sdes    if ((u->doc = strdup(p)) == NULL) {
32260376Sdes	errno = ENOMEM;
32360376Sdes	_fetch_syserr();
32460376Sdes	goto ouch;
32540939Sdes    }
32637535Sdes
32737535Sdes    DEBUG(fprintf(stderr,
32837535Sdes		  "scheme:   [\033[1m%s\033[m]\n"
32937535Sdes		  "user:     [\033[1m%s\033[m]\n"
33037535Sdes		  "password: [\033[1m%s\033[m]\n"
33137535Sdes		  "host:     [\033[1m%s\033[m]\n"
33237535Sdes		  "port:     [\033[1m%d\033[m]\n"
33337535Sdes		  "document: [\033[1m%s\033[m]\n",
33437535Sdes		  u->scheme, u->user, u->pwd,
33537535Sdes		  u->host, u->port, u->doc));
33637535Sdes
33737535Sdes    return u;
33837535Sdes
33937535Sdesouch:
34037535Sdes    free(u);
34137535Sdes    return NULL;
34237535Sdes}
34360376Sdes
34460376Sdes/*
34560376Sdes * Free a URL
34660376Sdes */
34760376Sdesvoid
34860376SdesfetchFreeURL(struct url *u)
34960376Sdes{
35060376Sdes    free(u->doc);
35160376Sdes    free(u);
35260376Sdes}
353