fetch.c revision 97868
139665Smsmith/*-
239665Smsmith * Copyright (c) 1998 Dag-Erling Co�dan Sm�rgrav
339665Smsmith * All rights reserved.
439665Smsmith *
539665Smsmith * Redistribution and use in source and binary forms, with or without
639665Smsmith * modification, are permitted provided that the following conditions
739665Smsmith * are met:
839665Smsmith * 1. Redistributions of source code must retain the above copyright
939665Smsmith *    notice, this list of conditions and the following disclaimer
1039665Smsmith *    in this position and unchanged.
1139665Smsmith * 2. Redistributions in binary form must reproduce the above copyright
1239665Smsmith *    notice, this list of conditions and the following disclaimer in the
1339665Smsmith *    documentation and/or other materials provided with the distribution.
1439665Smsmith * 3. The name of the author may not be used to endorse or promote products
1539665Smsmith *    derived from this software without specific prior written permission
1639665Smsmith *
1739665Smsmith * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
1839665Smsmith * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
1939665Smsmith * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
2039665Smsmith * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
2139665Smsmith * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
2239665Smsmith * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
2339665Smsmith * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
2439665Smsmith * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
2539665Smsmith * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
2639665Smsmith * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
2739665Smsmith */
2839665Smsmith
2939665Smsmith#include <sys/cdefs.h>
3039665Smsmith__FBSDID("$FreeBSD: head/lib/libfetch/fetch.c 97868 2002-06-05 12:46:36Z des $");
3139665Smsmith
3239665Smsmith#include <sys/param.h>
3339665Smsmith#include <sys/errno.h>
3439665Smsmith
3539665Smsmith#include <ctype.h>
3639665Smsmith#include <stdio.h>
3739665Smsmith#include <stdlib.h>
3839665Smsmith#include <string.h>
3939665Smsmith
4039665Smsmith#include "fetch.h"
4139665Smsmith#include "common.h"
4239665Smsmith
4339665Smsmithauth_t	 fetchAuthMethod;
4439665Smsmithint	 fetchLastErrCode;
4539665Smsmithchar	 fetchLastErrString[MAXERRSTRING];
4639665Smsmithint	 fetchTimeout;
4739665Smsmithint	 fetchRestartCalls = 1;
4839665Smsmithint	 fetchDebug;
4939665Smsmith
5039665Smsmith
5139665Smsmith/*** Local data **************************************************************/
5239665Smsmith
5339665Smsmith/*
5439665Smsmith * Error messages for parser errors
5539665Smsmith */
5639665Smsmith#define URL_MALFORMED		1
5739665Smsmith#define URL_BAD_SCHEME		2
5839665Smsmith#define URL_BAD_PORT		3
5939665Smsmithstatic struct fetcherr _url_errlist[] = {
6039665Smsmith	{ URL_MALFORMED,	FETCH_URL,	"Malformed URL" },
6139665Smsmith	{ URL_BAD_SCHEME,	FETCH_URL,	"Invalid URL scheme" },
6239665Smsmith	{ URL_BAD_PORT,		FETCH_URL,	"Invalid server port" },
6339665Smsmith	{ -1,			FETCH_UNKNOWN,	"Unknown parser error" }
6439665Smsmith};
6539665Smsmith
6639665Smsmith
6739665Smsmith/*** Public API **************************************************************/
6839665Smsmith
6939665Smsmith/*
7039665Smsmith * Select the appropriate protocol for the URL scheme, and return a
7139665Smsmith * read-only stream connected to the document referenced by the URL.
7239665Smsmith * Also fill out the struct url_stat.
7339665Smsmith */
7439665SmsmithFILE *
7539665SmsmithfetchXGet(struct url *URL, struct url_stat *us, const char *flags)
7639665Smsmith{
7739665Smsmith	int direct;
7839665Smsmith
7939665Smsmith	direct = CHECK_FLAG('d');
8039665Smsmith	if (strcasecmp(URL->scheme, SCHEME_FILE) == 0)
8139665Smsmith		return (fetchXGetFile(URL, us, flags));
8239665Smsmith	else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0)
8339665Smsmith		return (fetchXGetFTP(URL, us, flags));
8439665Smsmith	else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0)
8539665Smsmith		return (fetchXGetHTTP(URL, us, flags));
8639665Smsmith	else if (strcasecmp(URL->scheme, SCHEME_HTTPS) == 0)
8739665Smsmith		return (fetchXGetHTTP(URL, us, flags));
8839665Smsmith	_url_seterr(URL_BAD_SCHEME);
8939665Smsmith	return (NULL);
9039665Smsmith}
9139665Smsmith
9239665Smsmith/*
9339665Smsmith * Select the appropriate protocol for the URL scheme, and return a
9439665Smsmith * read-only stream connected to the document referenced by the URL.
9539665Smsmith */
9639665SmsmithFILE *
9739665SmsmithfetchGet(struct url *URL, const char *flags)
9839665Smsmith{
9939665Smsmith	return (fetchXGet(URL, NULL, flags));
10039665Smsmith}
10139665Smsmith
10239665Smsmith/*
10339665Smsmith * Select the appropriate protocol for the URL scheme, and return a
10439665Smsmith * write-only stream connected to the document referenced by the URL.
10539665Smsmith */
10639665SmsmithFILE *
10739665SmsmithfetchPut(struct url *URL, const char *flags)
10839665Smsmith{
10939665Smsmith	int direct;
11039665Smsmith
11139665Smsmith	direct = CHECK_FLAG('d');
11239665Smsmith	if (strcasecmp(URL->scheme, SCHEME_FILE) == 0)
11339665Smsmith		return (fetchPutFile(URL, flags));
11439665Smsmith	else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0)
11539665Smsmith		return (fetchPutFTP(URL, flags));
11639665Smsmith	else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0)
11739665Smsmith		return (fetchPutHTTP(URL, flags));
11839665Smsmith	else if (strcasecmp(URL->scheme, SCHEME_HTTPS) == 0)
11939665Smsmith		return (fetchPutHTTP(URL, flags));
12039665Smsmith	_url_seterr(URL_BAD_SCHEME);
12139665Smsmith	return (NULL);
12239665Smsmith}
12339665Smsmith
12439665Smsmith/*
12539665Smsmith * Select the appropriate protocol for the URL scheme, and return the
12639665Smsmith * size of the document referenced by the URL if it exists.
12739665Smsmith */
12839665Smsmithint
12939665SmsmithfetchStat(struct url *URL, struct url_stat *us, const char *flags)
13039665Smsmith{
13139665Smsmith	int direct;
13239665Smsmith
13339665Smsmith	direct = CHECK_FLAG('d');
13439665Smsmith	if (strcasecmp(URL->scheme, SCHEME_FILE) == 0)
13539665Smsmith		return (fetchStatFile(URL, us, flags));
13639665Smsmith	else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0)
13739665Smsmith		return (fetchStatFTP(URL, us, flags));
13839665Smsmith	else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0)
13939665Smsmith		return (fetchStatHTTP(URL, us, flags));
14039665Smsmith	else if (strcasecmp(URL->scheme, SCHEME_HTTPS) == 0)
14139665Smsmith		return (fetchStatHTTP(URL, us, flags));
14239665Smsmith	_url_seterr(URL_BAD_SCHEME);
14339665Smsmith	return (-1);
14439665Smsmith}
14539665Smsmith
14639665Smsmith/*
14739665Smsmith * Select the appropriate protocol for the URL scheme, and return a
14839665Smsmith * list of files in the directory pointed to by the URL.
14939665Smsmith */
15039665Smsmithstruct url_ent *
15139665SmsmithfetchList(struct url *URL, const char *flags)
15239665Smsmith{
15339665Smsmith	int direct;
15439665Smsmith
15539665Smsmith	direct = CHECK_FLAG('d');
15639665Smsmith	if (strcasecmp(URL->scheme, SCHEME_FILE) == 0)
15739665Smsmith		return (fetchListFile(URL, flags));
15839665Smsmith	else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0)
15939665Smsmith		return (fetchListFTP(URL, flags));
16039665Smsmith	else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0)
16139665Smsmith		return (fetchListHTTP(URL, flags));
16239665Smsmith	else if (strcasecmp(URL->scheme, SCHEME_HTTPS) == 0)
16339665Smsmith		return (fetchListHTTP(URL, flags));
16439665Smsmith	_url_seterr(URL_BAD_SCHEME);
16539665Smsmith	return (NULL);
16639665Smsmith}
16739665Smsmith
16839665Smsmith/*
16939665Smsmith * Attempt to parse the given URL; if successful, call fetchXGet().
17039665Smsmith */
17139665SmsmithFILE *
17239665SmsmithfetchXGetURL(const char *URL, struct url_stat *us, const char *flags)
17339665Smsmith{
17439665Smsmith	struct url *u;
17539665Smsmith	FILE *f;
17639665Smsmith
17739665Smsmith	if ((u = fetchParseURL(URL)) == NULL)
17839665Smsmith		return (NULL);
17939665Smsmith
18039665Smsmith	f = fetchXGet(u, us, flags);
18139665Smsmith
18239665Smsmith	fetchFreeURL(u);
18339665Smsmith	return (f);
18439665Smsmith}
18539665Smsmith
18639665Smsmith/*
18739665Smsmith * Attempt to parse the given URL; if successful, call fetchGet().
18839665Smsmith */
18939665SmsmithFILE *
19039665SmsmithfetchGetURL(const char *URL, const char *flags)
19139665Smsmith{
19239665Smsmith	return (fetchXGetURL(URL, NULL, flags));
19339665Smsmith}
194
195/*
196 * Attempt to parse the given URL; if successful, call fetchPut().
197 */
198FILE *
199fetchPutURL(const char *URL, const char *flags)
200{
201	struct url *u;
202	FILE *f;
203
204	if ((u = fetchParseURL(URL)) == NULL)
205		return (NULL);
206
207	f = fetchPut(u, flags);
208
209	fetchFreeURL(u);
210	return (f);
211}
212
213/*
214 * Attempt to parse the given URL; if successful, call fetchStat().
215 */
216int
217fetchStatURL(const char *URL, struct url_stat *us, const char *flags)
218{
219	struct url *u;
220	int s;
221
222	if ((u = fetchParseURL(URL)) == NULL)
223		return (-1);
224
225	s = fetchStat(u, us, flags);
226
227	fetchFreeURL(u);
228	return (s);
229}
230
231/*
232 * Attempt to parse the given URL; if successful, call fetchList().
233 */
234struct url_ent *
235fetchListURL(const char *URL, const char *flags)
236{
237	struct url *u;
238	struct url_ent *ue;
239
240	if ((u = fetchParseURL(URL)) == NULL)
241		return (NULL);
242
243	ue = fetchList(u, flags);
244
245	fetchFreeURL(u);
246	return (ue);
247}
248
249/*
250 * Make a URL
251 */
252struct url *
253fetchMakeURL(const char *scheme, const char *host, int port, const char *doc,
254    const char *user, const char *pwd)
255{
256	struct url *u;
257
258	if (!scheme || (!host && !doc)) {
259		_url_seterr(URL_MALFORMED);
260		return (NULL);
261	}
262
263	if (port < 0 || port > 65535) {
264		_url_seterr(URL_BAD_PORT);
265		return (NULL);
266	}
267
268	/* allocate struct url */
269	if ((u = calloc(1, sizeof *u)) == NULL) {
270		_fetch_syserr();
271		return (NULL);
272	}
273
274	if ((u->doc = strdup(doc ? doc : "/")) == NULL) {
275		_fetch_syserr();
276		free(u);
277		return (NULL);
278	}
279
280#define seturl(x) snprintf(u->x, sizeof u->x, "%s", x)
281	seturl(scheme);
282	seturl(host);
283	seturl(user);
284	seturl(pwd);
285#undef seturl
286	u->port = port;
287
288	return (u);
289}
290
291/*
292 * Split an URL into components. URL syntax is:
293 * [method:/][/[user[:pwd]@]host[:port]/][document]
294 * This almost, but not quite, RFC1738 URL syntax.
295 */
296struct url *
297fetchParseURL(const char *URL)
298{
299	char *doc;
300	const char *p, *q;
301	struct url *u;
302	int i;
303
304	/* allocate struct url */
305	if ((u = calloc(1, sizeof *u)) == NULL) {
306		_fetch_syserr();
307		return (NULL);
308	}
309
310	/* scheme name */
311	if ((p = strstr(URL, ":/"))) {
312		snprintf(u->scheme, URL_SCHEMELEN+1,
313		    "%.*s", (int)(p - URL), URL);
314		URL = ++p;
315		/*
316		 * Only one slash: no host, leave slash as part of document
317		 * Two slashes: host follows, strip slashes
318		 */
319		if (URL[1] == '/')
320			URL = (p += 2);
321	} else {
322		p = URL;
323	}
324	if (!*URL || *URL == '/' || *URL == '.' ||
325	    (u->scheme[0] == '\0' &&
326		strchr(URL, '/') == NULL && strchr(URL, ':') == NULL))
327		goto nohost;
328
329	p = strpbrk(URL, "/@");
330	if (p && *p == '@') {
331		/* username */
332		for (q = URL, i = 0; (*q != ':') && (*q != '@'); q++)
333			if (i < URL_USERLEN)
334				u->user[i++] = *q;
335
336		/* password */
337		if (*q == ':')
338			for (q++, i = 0; (*q != ':') && (*q != '@'); q++)
339				if (i < URL_PWDLEN)
340					u->pwd[i++] = *q;
341
342		p++;
343	} else {
344		p = URL;
345	}
346
347	/* hostname */
348#ifdef INET6
349	if (*p == '[' && (q = strchr(p + 1, ']')) != NULL &&
350	    (*++q == '\0' || *q == '/' || *q == ':')) {
351		if ((i = q - p - 2) > MAXHOSTNAMELEN)
352			i = MAXHOSTNAMELEN;
353		strncpy(u->host, ++p, i);
354		p = q;
355	} else
356#endif
357		for (i = 0; *p && (*p != '/') && (*p != ':'); p++)
358			if (i < MAXHOSTNAMELEN)
359				u->host[i++] = *p;
360
361	/* port */
362	if (*p == ':') {
363		for (q = ++p; *q && (*q != '/'); q++)
364			if (isdigit(*q))
365				u->port = u->port * 10 + (*q - '0');
366			else {
367				/* invalid port */
368				_url_seterr(URL_BAD_PORT);
369				goto ouch;
370			}
371		p = q;
372	}
373
374nohost:
375	/* document */
376	if (!*p)
377		p = "/";
378
379	if (strcasecmp(u->scheme, SCHEME_HTTP) == 0 ||
380	    strcasecmp(u->scheme, SCHEME_HTTPS) == 0) {
381		const char hexnums[] = "0123456789abcdef";
382
383		/* percent-escape whitespace. */
384		if ((doc = malloc(strlen(p) * 3 + 1)) == NULL) {
385			_fetch_syserr();
386			goto ouch;
387		}
388		u->doc = doc;
389		while (*p != '\0') {
390			if (!isspace(*p)) {
391				*doc++ = *p++;
392			} else {
393				*doc++ = '%';
394				*doc++ = hexnums[((unsigned int)*p) >> 4];
395				*doc++ = hexnums[((unsigned int)*p) & 0xf];
396				p++;
397			}
398		}
399		*doc = '\0';
400	} else if ((u->doc = strdup(p)) == NULL) {
401		_fetch_syserr();
402		goto ouch;
403	}
404
405	DEBUG(fprintf(stderr,
406		  "scheme:   [%s]\n"
407		  "user:     [%s]\n"
408		  "password: [%s]\n"
409		  "host:     [%s]\n"
410		  "port:     [%d]\n"
411		  "document: [%s]\n",
412		  u->scheme, u->user, u->pwd,
413		  u->host, u->port, u->doc));
414
415	return (u);
416
417ouch:
418	free(u);
419	return (NULL);
420}
421
422/*
423 * Free a URL
424 */
425void
426fetchFreeURL(struct url *u)
427{
428	free(u->doc);
429	free(u);
430}
431