fetch.c revision 63340
133965Sjdp/*-
2218822Sdim * Copyright (c) 1998 Dag-Erling Co�dan Sm�rgrav
3218822Sdim * All rights reserved.
433965Sjdp *
533965Sjdp * Redistribution and use in source and binary forms, with or without
633965Sjdp * modification, are permitted provided that the following conditions
733965Sjdp * are met:
833965Sjdp * 1. Redistributions of source code must retain the above copyright
933965Sjdp *    notice, this list of conditions and the following disclaimer
1033965Sjdp *    in this position and unchanged.
1133965Sjdp * 2. Redistributions in binary form must reproduce the above copyright
1233965Sjdp *    notice, this list of conditions and the following disclaimer in the
1333965Sjdp *    documentation and/or other materials provided with the distribution.
1433965Sjdp * 3. The name of the author may not be used to endorse or promote products
1533965Sjdp *    derived from this software without specific prior written permission
1633965Sjdp *
1733965Sjdp * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
1833965Sjdp * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
1933965Sjdp * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20218822Sdim * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21218822Sdim * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
2233965Sjdp * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
2333965Sjdp * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
2433965Sjdp * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25218822Sdim * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
2633965Sjdp * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
2733965Sjdp *
2833965Sjdp * $FreeBSD: head/lib/libfetch/fetch.c 63340 2000-07-17 21:25:00Z des $
2933965Sjdp */
3033965Sjdp
3133965Sjdp#include <sys/param.h>
3261843Sobrien#include <sys/errno.h>
3333965Sjdp
3433965Sjdp#include <ctype.h>
3533965Sjdp#include <stdio.h>
3633965Sjdp#include <stdlib.h>
3733965Sjdp#include <string.h>
3833965Sjdp
3933965Sjdp#include "fetch.h"
4033965Sjdp#include "common.h"
4133965Sjdp
4233965Sjdp
4333965Sjdpint	 fetchLastErrCode;
44130561Sobrienchar	 fetchLastErrString[MAXERRSTRING];
45130561Sobrienint	 fetchTimeout;
4633965Sjdpint	 fetchRestartCalls = 1;
4733965Sjdp
4833965Sjdp
4933965Sjdp/*** Local data **************************************************************/
5033965Sjdp
5133965Sjdp/*
5233965Sjdp * Error messages for parser errors
5333965Sjdp */
5433965Sjdp#define URL_MALFORMED		1
5533965Sjdp#define URL_BAD_SCHEME		2
5633965Sjdp#define URL_BAD_PORT		3
5733965Sjdpstatic struct fetcherr _url_errlist[] = {
5833965Sjdp    { URL_MALFORMED,	FETCH_URL,	"Malformed URL" },
5933965Sjdp    { URL_BAD_SCHEME,	FETCH_URL,	"Invalid URL scheme" },
6033965Sjdp    { URL_BAD_PORT,	FETCH_URL,	"Invalid server port" },
6133965Sjdp    { -1,		FETCH_UNKNOWN,	"Unknown parser error" }
6233965Sjdp};
6333965Sjdp
6433965Sjdp
6533965Sjdp/*** Public API **************************************************************/
6633965Sjdp
6733965Sjdp/*
6833965Sjdp * Select the appropriate protocol for the URL scheme, and return a
6933965Sjdp * read-only stream connected to the document referenced by the URL.
7033965Sjdp * Also fill out the struct url_stat.
7133965Sjdp */
7233965SjdpFILE *
7333965SjdpfetchXGet(struct url *URL, struct url_stat *us, char *flags)
7433965Sjdp{
7533965Sjdp    int direct;
7633965Sjdp
7733965Sjdp    direct = (flags && strchr(flags, 'd'));
7833965Sjdp    if (strcasecmp(URL->scheme, "file") == 0)
7933965Sjdp	return fetchXGetFile(URL, us, flags);
8033965Sjdp    else if (strcasecmp(URL->scheme, "http") == 0)
8133965Sjdp	return fetchXGetHTTP(URL, us, flags);
8233965Sjdp    else if (strcasecmp(URL->scheme, "ftp") == 0) {
8333965Sjdp	if (!direct &&
8433965Sjdp	    getenv("FTP_PROXY") == NULL && getenv("HTTP_PROXY") != NULL)
8533965Sjdp	    return fetchXGetHTTP(URL, us, flags);
8633965Sjdp	return fetchXGetFTP(URL, us, flags);
8733965Sjdp    } else {
8833965Sjdp	_url_seterr(URL_BAD_SCHEME);
8933965Sjdp	return NULL;
9033965Sjdp    }
9133965Sjdp}
9233965Sjdp
9333965Sjdp/*
9433965Sjdp * Select the appropriate protocol for the URL scheme, and return a
9533965Sjdp * read-only stream connected to the document referenced by the URL.
9633965Sjdp */
9733965SjdpFILE *
9833965SjdpfetchGet(struct url *URL, char *flags)
9933965Sjdp{
10033965Sjdp    return fetchXGet(URL, NULL, flags);
10133965Sjdp}
10233965Sjdp
10333965Sjdp/*
10433965Sjdp * Select the appropriate protocol for the URL scheme, and return a
10533965Sjdp * write-only stream connected to the document referenced by the URL.
10633965Sjdp */
10733965SjdpFILE *
10833965SjdpfetchPut(struct url *URL, char *flags)
10933965Sjdp{
11033965Sjdp    int direct;
11133965Sjdp
11233965Sjdp    direct = (flags && strchr(flags, 'd'));
11333965Sjdp    if (strcasecmp(URL->scheme, "file") == 0)
11433965Sjdp	return fetchPutFile(URL, flags);
11533965Sjdp    else if (strcasecmp(URL->scheme, "http") == 0)
11633965Sjdp	return fetchPutHTTP(URL, flags);
11733965Sjdp    else if (strcasecmp(URL->scheme, "ftp") == 0) {
11833965Sjdp	if (!direct &&
11933965Sjdp	    getenv("FTP_PROXY") == NULL && getenv("HTTP_PROXY") != NULL)
12033965Sjdp	    return fetchPutHTTP(URL, flags);
12133965Sjdp	return fetchPutFTP(URL, flags);
12233965Sjdp    } else {
12333965Sjdp	_url_seterr(URL_BAD_SCHEME);
12433965Sjdp	return NULL;
12533965Sjdp    }
12633965Sjdp}
12733965Sjdp
12833965Sjdp/*
12933965Sjdp * Select the appropriate protocol for the URL scheme, and return the
13033965Sjdp * size of the document referenced by the URL if it exists.
13133965Sjdp */
13233965Sjdpint
13333965SjdpfetchStat(struct url *URL, struct url_stat *us, char *flags)
13433965Sjdp{
13533965Sjdp    int direct;
13633965Sjdp
13733965Sjdp    direct = (flags && strchr(flags, 'd'));
13833965Sjdp    if (strcasecmp(URL->scheme, "file") == 0)
13933965Sjdp	return fetchStatFile(URL, us, flags);
14033965Sjdp    else if (strcasecmp(URL->scheme, "http") == 0)
14133965Sjdp	return fetchStatHTTP(URL, us, flags);
14233965Sjdp    else if (strcasecmp(URL->scheme, "ftp") == 0) {
14333965Sjdp	if (!direct &&
14433965Sjdp	    getenv("FTP_PROXY") == NULL && getenv("HTTP_PROXY") != NULL)
14533965Sjdp	    return fetchStatHTTP(URL, us, flags);
14633965Sjdp	return fetchStatFTP(URL, us, flags);
14733965Sjdp    } else {
148130561Sobrien	_url_seterr(URL_BAD_SCHEME);
14933965Sjdp	return -1;
15033965Sjdp    }
15133965Sjdp}
15233965Sjdp
15333965Sjdp/*
15433965Sjdp * Select the appropriate protocol for the URL scheme, and return a
15533965Sjdp * list of files in the directory pointed to by the URL.
15633965Sjdp */
15733965Sjdpstruct url_ent *
158130561SobrienfetchList(struct url *URL, char *flags)
15933965Sjdp{
16033965Sjdp    int direct;
16133965Sjdp
16233965Sjdp    direct = (flags && strchr(flags, 'd'));
16333965Sjdp    if (strcasecmp(URL->scheme, "file") == 0)
16433965Sjdp	return fetchListFile(URL, flags);
16533965Sjdp    else if (strcasecmp(URL->scheme, "http") == 0)
16633965Sjdp	return fetchListHTTP(URL, flags);
16733965Sjdp    else if (strcasecmp(URL->scheme, "ftp") == 0) {
16833965Sjdp	if (!direct &&
16933965Sjdp	    getenv("FTP_PROXY") == NULL && getenv("HTTP_PROXY") != NULL)
17033965Sjdp	    return fetchListHTTP(URL, flags);
17133965Sjdp	return fetchListFTP(URL, flags);
17233965Sjdp    } else {
17333965Sjdp	_url_seterr(URL_BAD_SCHEME);
17433965Sjdp	return NULL;
17533965Sjdp    }
17633965Sjdp}
17733965Sjdp
17833965Sjdp/*
17933965Sjdp * Attempt to parse the given URL; if successful, call fetchXGet().
18033965Sjdp */
18133965SjdpFILE *
18233965SjdpfetchXGetURL(char *URL, struct url_stat *us, char *flags)
18333965Sjdp{
18433965Sjdp    struct url *u;
18533965Sjdp    FILE *f;
18633965Sjdp
18733965Sjdp    if ((u = fetchParseURL(URL)) == NULL)
18833965Sjdp	return NULL;
18933965Sjdp
19033965Sjdp    f = fetchXGet(u, us, flags);
19133965Sjdp
19233965Sjdp    fetchFreeURL(u);
19333965Sjdp    return f;
19433965Sjdp}
19533965Sjdp
19633965Sjdp/*
19733965Sjdp * Attempt to parse the given URL; if successful, call fetchGet().
19833965Sjdp */
19933965SjdpFILE *
20033965SjdpfetchGetURL(char *URL, char *flags)
20133965Sjdp{
20233965Sjdp    return fetchXGetURL(URL, NULL, flags);
20333965Sjdp}
20433965Sjdp
20533965Sjdp/*
20633965Sjdp * Attempt to parse the given URL; if successful, call fetchPut().
20733965Sjdp */
20833965SjdpFILE *
20933965SjdpfetchPutURL(char *URL, char *flags)
21033965Sjdp{
21133965Sjdp    struct url *u;
21233965Sjdp    FILE *f;
21333965Sjdp
21433965Sjdp    if ((u = fetchParseURL(URL)) == NULL)
21533965Sjdp	return NULL;
21633965Sjdp
21733965Sjdp    f = fetchPut(u, flags);
21833965Sjdp
21933965Sjdp    fetchFreeURL(u);
22033965Sjdp    return f;
22133965Sjdp}
22233965Sjdp
22333965Sjdp/*
22433965Sjdp * Attempt to parse the given URL; if successful, call fetchStat().
22533965Sjdp */
22633965Sjdpint
22733965SjdpfetchStatURL(char *URL, struct url_stat *us, char *flags)
22833965Sjdp{
22933965Sjdp    struct url *u;
23033965Sjdp    int s;
23133965Sjdp
23233965Sjdp    if ((u = fetchParseURL(URL)) == NULL)
23333965Sjdp	return -1;
23433965Sjdp
23533965Sjdp    s = fetchStat(u, us, flags);
23633965Sjdp
23733965Sjdp    fetchFreeURL(u);
23833965Sjdp    return s;
23933965Sjdp}
24033965Sjdp
24133965Sjdp/*
24233965Sjdp * Attempt to parse the given URL; if successful, call fetchList().
24333965Sjdp */
24433965Sjdpstruct url_ent *
24533965SjdpfetchListURL(char *URL, char *flags)
24633965Sjdp{
24733965Sjdp    struct url *u;
24833965Sjdp    struct url_ent *ue;
24933965Sjdp
25033965Sjdp    if ((u = fetchParseURL(URL)) == NULL)
251130561Sobrien	return NULL;
252130561Sobrien
253130561Sobrien    ue = fetchList(u, flags);
254130561Sobrien
255130561Sobrien    fetchFreeURL(u);
256130561Sobrien    return ue;
257130561Sobrien}
258130561Sobrien
259130561Sobrien/*
260130561Sobrien * Make a URL
261130561Sobrien */
262130561Sobrienstruct url *
263130561SobrienfetchMakeURL(char *scheme, char *host, int port, char *doc,
264130561Sobrien    char *user, char *pwd)
26533965Sjdp{
266130561Sobrien    struct url *u;
267130561Sobrien
268130561Sobrien    if (!scheme || (!host && !doc)) {
269130561Sobrien	_url_seterr(URL_MALFORMED);
270130561Sobrien	return NULL;
271130561Sobrien    }
272130561Sobrien
273130561Sobrien    if (port < 0 || port > 65535) {
274130561Sobrien	_url_seterr(URL_BAD_PORT);
275130561Sobrien	return NULL;
276130561Sobrien    }
277130561Sobrien
278130561Sobrien    /* allocate struct url */
279130561Sobrien    if ((u = calloc(1, sizeof *u)) == NULL) {
280130561Sobrien	_fetch_syserr();
281130561Sobrien	return NULL;
282130561Sobrien    }
283130561Sobrien
284130561Sobrien    if ((u->doc = strdup(doc ? doc : "/")) == NULL) {
285130561Sobrien	_fetch_syserr();
286130561Sobrien	free(u);
287130561Sobrien	return NULL;
288130561Sobrien    }
289130561Sobrien
29033965Sjdp#define seturl(x) snprintf(u->x, sizeof u->x, "%s", x)
29133965Sjdp    seturl(scheme);
29233965Sjdp    seturl(host);
29333965Sjdp    seturl(user);
294130561Sobrien    seturl(pwd);
29533965Sjdp#undef seturl
29633965Sjdp    u->port = port;
29733965Sjdp
29833965Sjdp    return u;
29933965Sjdp}
30033965Sjdp
30133965Sjdp/*
30233965Sjdp * Split an URL into components. URL syntax is:
30333965Sjdp * method:[//[user[:pwd]@]host[:port]]/[document]
30433965Sjdp * This almost, but not quite, RFC1738 URL syntax.
30533965Sjdp */
306130561Sobrienstruct url *
30733965SjdpfetchParseURL(char *URL)
30833965Sjdp{
30960484Sobrien    char *p, *q;
31033965Sjdp    struct url *u;
31133965Sjdp    int i;
31233965Sjdp
31333965Sjdp    /* allocate struct url */
31433965Sjdp    if ((u = calloc(1, sizeof *u)) == NULL) {
315130561Sobrien	_fetch_syserr();
31633965Sjdp	return NULL;
31733965Sjdp    }
31833965Sjdp
31933965Sjdp    /* scheme name */
32033965Sjdp    for (i = 0; *URL && (*URL != ':'); URL++)
32133965Sjdp	if (i < URL_SCHEMELEN)
32233965Sjdp	    u->scheme[i++] = *URL;
32333965Sjdp    if (!URL[0] || (URL[1] != '/')) {
32433965Sjdp	_url_seterr(URL_BAD_SCHEME);
32533965Sjdp	goto ouch;
32633965Sjdp    }
327130561Sobrien    else URL++;
328130561Sobrien    if (URL[1] != '/') {
32933965Sjdp	p = URL;
330130561Sobrien	goto nohost;
33133965Sjdp    }
33233965Sjdp    else URL += 2;
33333965Sjdp
334104834Sobrien    p = strpbrk(URL, "/@");
33533965Sjdp    if (p && *p == '@') {
336130561Sobrien	/* username */
337130561Sobrien	for (q = URL, i = 0; (*q != ':') && (*q != '@'); q++)
338130561Sobrien	    if (i < URL_USERLEN)
33933965Sjdp		u->user[i++] = *q;
34033965Sjdp
34133965Sjdp	/* password */
34233965Sjdp	if (*q == ':')
34333965Sjdp	    for (q++, i = 0; (*q != ':') && (*q != '@'); q++)
34433965Sjdp		if (i < URL_PWDLEN)
34533965Sjdp		    u->pwd[i++] = *q;
346130561Sobrien
347130561Sobrien	p++;
34833965Sjdp    } else p = URL;
34933965Sjdp
350130561Sobrien    /* hostname */
35133965Sjdp#ifdef INET6
35233965Sjdp    if (*p == '[' && (q = strchr(p + 1, ']')) != NULL &&
35333965Sjdp	(*++q == '\0' || *q == '/' || *q == ':')) {
35433965Sjdp	if ((i = q - p - 2) > MAXHOSTNAMELEN)
35533965Sjdp	    i = MAXHOSTNAMELEN;
35633965Sjdp	strncpy(u->host, ++p, i);
35733965Sjdp	p = q;
35833965Sjdp    } else
35933965Sjdp#endif
360130561Sobrien	for (i = 0; *p && (*p != '/') && (*p != ':'); p++)
361130561Sobrien	    if (i < MAXHOSTNAMELEN)
36233965Sjdp		u->host[i++] = *p;
36333965Sjdp
36433965Sjdp    /* port */
36533965Sjdp    if (*p == ':') {
36633965Sjdp	for (q = ++p; *q && (*q != '/'); q++)
36733965Sjdp	    if (isdigit(*q))
36833965Sjdp		u->port = u->port * 10 + (*q - '0');
36933965Sjdp	    else {
37033965Sjdp		/* invalid port */
37133965Sjdp		_url_seterr(URL_BAD_PORT);
372130561Sobrien		goto ouch;
37333965Sjdp	    }
37433965Sjdp	while (*p && (*p != '/'))
37533965Sjdp	    p++;
37633965Sjdp    }
37733965Sjdp
37833965Sjdpnohost:
37933965Sjdp    /* document */
38033965Sjdp    if (!*p)
38133965Sjdp	p = "/";
38233965Sjdp
38333965Sjdp    if ((u->doc = strdup(p)) == NULL) {
384130561Sobrien	_fetch_syserr();
38533965Sjdp	goto ouch;
386130561Sobrien    }
38733965Sjdp
38833965Sjdp    DEBUG(fprintf(stderr,
38933965Sjdp		  "scheme:   [\033[1m%s\033[m]\n"
39033965Sjdp		  "user:     [\033[1m%s\033[m]\n"
39133965Sjdp		  "password: [\033[1m%s\033[m]\n"
392130561Sobrien		  "host:     [\033[1m%s\033[m]\n"
393130561Sobrien		  "port:     [\033[1m%d\033[m]\n"
39433965Sjdp		  "document: [\033[1m%s\033[m]\n",
39533965Sjdp		  u->scheme, u->user, u->pwd,
39660484Sobrien		  u->host, u->port, u->doc));
397130561Sobrien
39833965Sjdp    return u;
39933965Sjdp
40033965Sjdpouch:
40133965Sjdp    free(u);
402130561Sobrien    return NULL;
403130561Sobrien}
404130561Sobrien
40533965Sjdp/*
406130561Sobrien * Free a URL
40733965Sjdp */
40833965Sjdpvoid
40933965SjdpfetchFreeURL(struct url *u)
41033965Sjdp{
41133965Sjdp    free(u->doc);
412130561Sobrien    free(u);
413130561Sobrien}
414130561Sobrien