137535Sdes/*-
2226537Sdes * Copyright (c) 1998-2004 Dag-Erling Sm��rgrav
337535Sdes * All rights reserved.
437535Sdes *
537535Sdes * Redistribution and use in source and binary forms, with or without
637535Sdes * modification, are permitted provided that the following conditions
737535Sdes * are met:
837535Sdes * 1. Redistributions of source code must retain the above copyright
937535Sdes *    notice, this list of conditions and the following disclaimer
1037535Sdes *    in this position and unchanged.
1137535Sdes * 2. Redistributions in binary form must reproduce the above copyright
1237535Sdes *    notice, this list of conditions and the following disclaimer in the
1337535Sdes *    documentation and/or other materials provided with the distribution.
1437535Sdes * 3. The name of the author may not be used to endorse or promote products
1537535Sdes *    derived from this software without specific prior written permission
1637535Sdes *
1737535Sdes * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
1837535Sdes * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
1937535Sdes * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
2037535Sdes * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
2137535Sdes * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
2237535Sdes * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
2337535Sdes * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
2437535Sdes * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
2537535Sdes * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
2637535Sdes * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
2737535Sdes */
2837535Sdes
2984203Sdillon#include <sys/cdefs.h>
3084203Sdillon__FBSDID("$FreeBSD: stable/11/lib/libfetch/fetch.c 357214 2020-01-28 18:42:06Z gordon $");
3184203Sdillon
3237535Sdes#include <sys/param.h>
3340975Sdes#include <sys/errno.h>
3437535Sdes
3537535Sdes#include <ctype.h>
3637535Sdes#include <stdio.h>
3737535Sdes#include <stdlib.h>
3837535Sdes#include <string.h>
3937535Sdes
4037535Sdes#include "fetch.h"
4140975Sdes#include "common.h"
4237535Sdes
4377238Sdesauth_t	 fetchAuthMethod;
4460924Sdesint	 fetchLastErrCode;
4560924Sdeschar	 fetchLastErrString[MAXERRSTRING];
4660924Sdesint	 fetchTimeout;
4763334Sdesint	 fetchRestartCalls = 1;
4887560Sdesint	 fetchDebug;
4941862Sdes
5041862Sdes
5140975Sdes/*** Local data **************************************************************/
5237535Sdes
5340939Sdes/*
5440975Sdes * Error messages for parser errors
5540975Sdes */
5640975Sdes#define URL_MALFORMED		1
5740975Sdes#define URL_BAD_SCHEME		2
5840975Sdes#define URL_BAD_PORT		3
59174588Sdesstatic struct fetcherr url_errlist[] = {
6090267Sdes	{ URL_MALFORMED,	FETCH_URL,	"Malformed URL" },
6190267Sdes	{ URL_BAD_SCHEME,	FETCH_URL,	"Invalid URL scheme" },
6290267Sdes	{ URL_BAD_PORT,		FETCH_URL,	"Invalid server port" },
6390267Sdes	{ -1,			FETCH_UNKNOWN,	"Unknown parser error" }
6440975Sdes};
6540975Sdes
6640975Sdes
6740975Sdes/*** Public API **************************************************************/
6840975Sdes
6940975Sdes/*
7040939Sdes * Select the appropriate protocol for the URL scheme, and return a
7140939Sdes * read-only stream connected to the document referenced by the URL.
7263340Sdes * Also fill out the struct url_stat.
7340939Sdes */
7438394SdesFILE *
7575891SarchiefetchXGet(struct url *URL, struct url_stat *us, const char *flags)
7638394Sdes{
7760587Sume
78109694Sdes	if (us != NULL) {
79109694Sdes		us->size = -1;
80109694Sdes		us->atime = us->mtime = 0;
81109694Sdes	}
8290267Sdes	if (strcasecmp(URL->scheme, SCHEME_FILE) == 0)
8390267Sdes		return (fetchXGetFile(URL, us, flags));
8497868Sdes	else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0)
8597868Sdes		return (fetchXGetFTP(URL, us, flags));
8690267Sdes	else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0)
8790267Sdes		return (fetchXGetHTTP(URL, us, flags));
8897868Sdes	else if (strcasecmp(URL->scheme, SCHEME_HTTPS) == 0)
8997868Sdes		return (fetchXGetHTTP(URL, us, flags));
90174588Sdes	url_seterr(URL_BAD_SCHEME);
9197868Sdes	return (NULL);
9238394Sdes}
9338394Sdes
9440939Sdes/*
9540939Sdes * Select the appropriate protocol for the URL scheme, and return a
9663340Sdes * read-only stream connected to the document referenced by the URL.
9763340Sdes */
9863340SdesFILE *
9975891SarchiefetchGet(struct url *URL, const char *flags)
10063340Sdes{
10190267Sdes	return (fetchXGet(URL, NULL, flags));
10263340Sdes}
10363340Sdes
10463340Sdes/*
10563340Sdes * Select the appropriate protocol for the URL scheme, and return a
10640939Sdes * write-only stream connected to the document referenced by the URL.
10740939Sdes */
10838394SdesFILE *
10975891SarchiefetchPut(struct url *URL, const char *flags)
11038394Sdes{
11160587Sume
11290267Sdes	if (strcasecmp(URL->scheme, SCHEME_FILE) == 0)
11390267Sdes		return (fetchPutFile(URL, flags));
11497868Sdes	else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0)
11597868Sdes		return (fetchPutFTP(URL, flags));
11690267Sdes	else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0)
11790267Sdes		return (fetchPutHTTP(URL, flags));
11897868Sdes	else if (strcasecmp(URL->scheme, SCHEME_HTTPS) == 0)
11997868Sdes		return (fetchPutHTTP(URL, flags));
120174588Sdes	url_seterr(URL_BAD_SCHEME);
12197868Sdes	return (NULL);
12238394Sdes}
12338394Sdes
12440939Sdes/*
12540975Sdes * Select the appropriate protocol for the URL scheme, and return the
12640975Sdes * size of the document referenced by the URL if it exists.
12740975Sdes */
12840975Sdesint
12975891SarchiefetchStat(struct url *URL, struct url_stat *us, const char *flags)
13040975Sdes{
13160587Sume
132109694Sdes	if (us != NULL) {
133109694Sdes		us->size = -1;
134109694Sdes		us->atime = us->mtime = 0;
135109694Sdes	}
13690267Sdes	if (strcasecmp(URL->scheme, SCHEME_FILE) == 0)
13790267Sdes		return (fetchStatFile(URL, us, flags));
13897868Sdes	else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0)
13997868Sdes		return (fetchStatFTP(URL, us, flags));
14090267Sdes	else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0)
14190267Sdes		return (fetchStatHTTP(URL, us, flags));
14297868Sdes	else if (strcasecmp(URL->scheme, SCHEME_HTTPS) == 0)
14397868Sdes		return (fetchStatHTTP(URL, us, flags));
144174588Sdes	url_seterr(URL_BAD_SCHEME);
14590267Sdes	return (-1);
14640975Sdes}
14740975Sdes
14840975Sdes/*
14941989Sdes * Select the appropriate protocol for the URL scheme, and return a
15041989Sdes * list of files in the directory pointed to by the URL.
15141989Sdes */
15241989Sdesstruct url_ent *
15375891SarchiefetchList(struct url *URL, const char *flags)
15441989Sdes{
15560587Sume
15690267Sdes	if (strcasecmp(URL->scheme, SCHEME_FILE) == 0)
15790267Sdes		return (fetchListFile(URL, flags));
15897868Sdes	else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0)
15997868Sdes		return (fetchListFTP(URL, flags));
16090267Sdes	else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0)
16190267Sdes		return (fetchListHTTP(URL, flags));
16297868Sdes	else if (strcasecmp(URL->scheme, SCHEME_HTTPS) == 0)
16397868Sdes		return (fetchListHTTP(URL, flags));
164174588Sdes	url_seterr(URL_BAD_SCHEME);
16590267Sdes	return (NULL);
16641989Sdes}
16741989Sdes
16841989Sdes/*
16963340Sdes * Attempt to parse the given URL; if successful, call fetchXGet().
17040939Sdes */
17137535SdesFILE *
17275891SarchiefetchXGetURL(const char *URL, struct url_stat *us, const char *flags)
17337535Sdes{
17490267Sdes	struct url *u;
17590267Sdes	FILE *f;
17638394Sdes
17790267Sdes	if ((u = fetchParseURL(URL)) == NULL)
17890267Sdes		return (NULL);
17990267Sdes
18090267Sdes	f = fetchXGet(u, us, flags);
18190267Sdes
18290267Sdes	fetchFreeURL(u);
18390267Sdes	return (f);
18437535Sdes}
18537535Sdes
18663340Sdes/*
18763340Sdes * Attempt to parse the given URL; if successful, call fetchGet().
18863340Sdes */
18963340SdesFILE *
19075891SarchiefetchGetURL(const char *URL, const char *flags)
19163340Sdes{
19290267Sdes	return (fetchXGetURL(URL, NULL, flags));
19363340Sdes}
19437535Sdes
19540939Sdes/*
19640939Sdes * Attempt to parse the given URL; if successful, call fetchPut().
19740939Sdes */
19837535SdesFILE *
19975891SarchiefetchPutURL(const char *URL, const char *flags)
20037535Sdes{
20190267Sdes	struct url *u;
20290267Sdes	FILE *f;
20390267Sdes
20490267Sdes	if ((u = fetchParseURL(URL)) == NULL)
20590267Sdes		return (NULL);
20690267Sdes
20790267Sdes	f = fetchPut(u, flags);
20890267Sdes
20990267Sdes	fetchFreeURL(u);
21090267Sdes	return (f);
21137535Sdes}
21237535Sdes
21337535Sdes/*
21440975Sdes * Attempt to parse the given URL; if successful, call fetchStat().
21540975Sdes */
21640975Sdesint
21775891SarchiefetchStatURL(const char *URL, struct url_stat *us, const char *flags)
21840975Sdes{
21990267Sdes	struct url *u;
22090267Sdes	int s;
22140975Sdes
22290267Sdes	if ((u = fetchParseURL(URL)) == NULL)
22390267Sdes		return (-1);
22440975Sdes
22590267Sdes	s = fetchStat(u, us, flags);
22640975Sdes
22790267Sdes	fetchFreeURL(u);
22890267Sdes	return (s);
22940975Sdes}
23040975Sdes
23140975Sdes/*
23241989Sdes * Attempt to parse the given URL; if successful, call fetchList().
23341989Sdes */
23441989Sdesstruct url_ent *
23575891SarchiefetchListURL(const char *URL, const char *flags)
23641989Sdes{
23790267Sdes	struct url *u;
23890267Sdes	struct url_ent *ue;
23941989Sdes
24090267Sdes	if ((u = fetchParseURL(URL)) == NULL)
24190267Sdes		return (NULL);
24241989Sdes
24390267Sdes	ue = fetchList(u, flags);
24441989Sdes
24590267Sdes	fetchFreeURL(u);
24690267Sdes	return (ue);
24741989Sdes}
24841989Sdes
24941989Sdes/*
25060927Sdes * Make a URL
25160927Sdes */
25260927Sdesstruct url *
25375891SarchiefetchMakeURL(const char *scheme, const char *host, int port, const char *doc,
25475891Sarchie    const char *user, const char *pwd)
25560927Sdes{
25690267Sdes	struct url *u;
25760927Sdes
25890267Sdes	if (!scheme || (!host && !doc)) {
259174588Sdes		url_seterr(URL_MALFORMED);
26090267Sdes		return (NULL);
26190267Sdes	}
26260927Sdes
26390267Sdes	if (port < 0 || port > 65535) {
264174588Sdes		url_seterr(URL_BAD_PORT);
26590267Sdes		return (NULL);
26690267Sdes	}
26790267Sdes
26890267Sdes	/* allocate struct url */
269109967Sdes	if ((u = calloc(1, sizeof(*u))) == NULL) {
270174588Sdes		fetch_syserr();
27190267Sdes		return (NULL);
27290267Sdes	}
273339250Sdes	u->netrcfd = -1;
27490267Sdes
27590267Sdes	if ((u->doc = strdup(doc ? doc : "/")) == NULL) {
276174588Sdes		fetch_syserr();
27790267Sdes		free(u);
27890267Sdes		return (NULL);
27990267Sdes	}
28090267Sdes
281109967Sdes#define seturl(x) snprintf(u->x, sizeof(u->x), "%s", x)
28290267Sdes	seturl(scheme);
28390267Sdes	seturl(host);
28490267Sdes	seturl(user);
28590267Sdes	seturl(pwd);
28660927Sdes#undef seturl
28790267Sdes	u->port = port;
28860927Sdes
28990267Sdes	return (u);
29060927Sdes}
29160927Sdes
29260927Sdes/*
293234138Semaste * Return value of the given hex digit.
294234138Semaste */
295234138Semastestatic int
296234138Semastefetch_hexval(char ch)
297234138Semaste{
298234138Semaste
299234138Semaste	if (ch >= '0' && ch <= '9')
300234138Semaste		return (ch - '0');
301234138Semaste	else if (ch >= 'a' && ch <= 'f')
302234138Semaste		return (ch - 'a' + 10);
303234138Semaste	else if (ch >= 'A' && ch <= 'F')
304234138Semaste		return (ch - 'A' + 10);
305234138Semaste	return (-1);
306234138Semaste}
307234138Semaste
308234138Semaste/*
309234138Semaste * Decode percent-encoded URL component from src into dst, stopping at end
310234138Semaste * of string, or at @ or : separators.  Returns a pointer to the unhandled
311234138Semaste * part of the input string (null terminator, @, or :).  No terminator is
312234138Semaste * written to dst (it is the caller's responsibility).
313234138Semaste */
314234138Semastestatic const char *
315234138Semastefetch_pctdecode(char *dst, const char *src, size_t dlen)
316234138Semaste{
317234138Semaste	int d1, d2;
318234138Semaste	char c;
319234138Semaste	const char *s;
320234138Semaste
321234138Semaste	for (s = src; *s != '\0' && *s != '@' && *s != ':'; s++) {
322234138Semaste		if (s[0] == '%' && (d1 = fetch_hexval(s[1])) >= 0 &&
323234138Semaste		    (d2 = fetch_hexval(s[2])) >= 0 && (d1 > 0 || d2 > 0)) {
324234138Semaste			c = d1 << 4 | d2;
325234138Semaste			s += 2;
326234138Semaste		} else {
327234138Semaste			c = *s;
328234138Semaste		}
329234138Semaste		if (dlen-- > 0)
330234138Semaste			*dst++ = c;
331357214Sgordon		else
332357214Sgordon			return (NULL);
333234138Semaste	}
334234138Semaste	return (s);
335234138Semaste}
336234138Semaste
337234138Semaste/*
33837535Sdes * Split an URL into components. URL syntax is:
33967042Sdes * [method:/][/[user[:pwd]@]host[:port]/][document]
34037535Sdes * This almost, but not quite, RFC1738 URL syntax.
34137535Sdes */
34240975Sdesstruct url *
34375891SarchiefetchParseURL(const char *URL)
34437535Sdes{
34590267Sdes	char *doc;
34690267Sdes	const char *p, *q;
34790267Sdes	struct url *u;
34890267Sdes	int i;
34937535Sdes
35090267Sdes	/* allocate struct url */
351109967Sdes	if ((u = calloc(1, sizeof(*u))) == NULL) {
352174588Sdes		fetch_syserr();
35390267Sdes		return (NULL);
35490267Sdes	}
355339250Sdes	u->netrcfd = -1;
35637535Sdes
35790267Sdes	/* scheme name */
35890267Sdes	if ((p = strstr(URL, ":/"))) {
35990267Sdes		snprintf(u->scheme, URL_SCHEMELEN+1,
36090267Sdes		    "%.*s", (int)(p - URL), URL);
36190267Sdes		URL = ++p;
36290267Sdes		/*
36390267Sdes		 * Only one slash: no host, leave slash as part of document
36490267Sdes		 * Two slashes: host follows, strip slashes
36590267Sdes		 */
36690267Sdes		if (URL[1] == '/')
36790267Sdes			URL = (p += 2);
36890267Sdes	} else {
36990267Sdes		p = URL;
37090267Sdes	}
37190267Sdes	if (!*URL || *URL == '/' || *URL == '.' ||
37290267Sdes	    (u->scheme[0] == '\0' &&
37390267Sdes		strchr(URL, '/') == NULL && strchr(URL, ':') == NULL))
37490267Sdes		goto nohost;
37537535Sdes
37690267Sdes	p = strpbrk(URL, "/@");
37790267Sdes	if (p && *p == '@') {
37890267Sdes		/* username */
379234138Semaste		q = fetch_pctdecode(u->user, URL, URL_USERLEN);
380357214Sgordon		if (q == NULL)
381357214Sgordon			goto ouch;
38279423Sdes
38390267Sdes		/* password */
384357214Sgordon		if (*q == ':') {
385252375Skientzle			q = fetch_pctdecode(u->pwd, q + 1, URL_PWDLEN);
386357214Sgordon			if (q == NULL)
387357214Sgordon				goto ouch;
388357214Sgordon		}
38990267Sdes		p++;
39090267Sdes	} else {
39190267Sdes		p = URL;
39290267Sdes	}
39390267Sdes
39490267Sdes	/* hostname */
39590267Sdes	if (*p == '[' && (q = strchr(p + 1, ']')) != NULL &&
39690267Sdes	    (*++q == '\0' || *q == '/' || *q == ':')) {
397315902Sdes		if ((i = q - p) > MAXHOSTNAMELEN)
39890267Sdes			i = MAXHOSTNAMELEN;
399315902Sdes		strncpy(u->host, p, i);
40090267Sdes		p = q;
401315902Sdes	} else {
40290267Sdes		for (i = 0; *p && (*p != '/') && (*p != ':'); p++)
40390267Sdes			if (i < MAXHOSTNAMELEN)
40490267Sdes				u->host[i++] = *p;
405315902Sdes	}
40637535Sdes
40790267Sdes	/* port */
40890267Sdes	if (*p == ':') {
40990267Sdes		for (q = ++p; *q && (*q != '/'); q++)
410174761Sdes			if (isdigit((unsigned char)*q))
41190267Sdes				u->port = u->port * 10 + (*q - '0');
41290267Sdes			else {
41390267Sdes				/* invalid port */
414174588Sdes				url_seterr(URL_BAD_PORT);
41590267Sdes				goto ouch;
41690267Sdes			}
41790267Sdes		p = q;
41890267Sdes	}
41937535Sdes
42037535Sdesnohost:
42190267Sdes	/* document */
42290267Sdes	if (!*p)
42390267Sdes		p = "/";
42467419Sdes
42590267Sdes	if (strcasecmp(u->scheme, SCHEME_HTTP) == 0 ||
42690267Sdes	    strcasecmp(u->scheme, SCHEME_HTTPS) == 0) {
42790267Sdes		const char hexnums[] = "0123456789abcdef";
42890267Sdes
42990267Sdes		/* percent-escape whitespace. */
43090267Sdes		if ((doc = malloc(strlen(p) * 3 + 1)) == NULL) {
431174588Sdes			fetch_syserr();
43290267Sdes			goto ouch;
43390267Sdes		}
43490267Sdes		u->doc = doc;
43590267Sdes		while (*p != '\0') {
436174761Sdes			if (!isspace((unsigned char)*p)) {
43790267Sdes				*doc++ = *p++;
43890267Sdes			} else {
43990267Sdes				*doc++ = '%';
44090267Sdes				*doc++ = hexnums[((unsigned int)*p) >> 4];
44190267Sdes				*doc++ = hexnums[((unsigned int)*p) & 0xf];
44290267Sdes				p++;
44390267Sdes			}
44490267Sdes		}
44590267Sdes		*doc = '\0';
44690267Sdes	} else if ((u->doc = strdup(p)) == NULL) {
447174588Sdes		fetch_syserr();
44890267Sdes		goto ouch;
44967419Sdes	}
45090267Sdes
451339250Sdes	DEBUGF("scheme:   \"%s\"\n"
452339250Sdes	    "user:     \"%s\"\n"
453339250Sdes	    "password: \"%s\"\n"
454339250Sdes	    "host:     \"%s\"\n"
455339250Sdes	    "port:     \"%d\"\n"
456339250Sdes	    "document: \"%s\"\n",
457339250Sdes	    u->scheme, u->user, u->pwd,
458339250Sdes	    u->host, u->port, u->doc);
45937535Sdes
46090267Sdes	return (u);
46190267Sdes
46237535Sdesouch:
46390267Sdes	free(u);
46490267Sdes	return (NULL);
46537535Sdes}
46660376Sdes
46760376Sdes/*
46860376Sdes * Free a URL
46960376Sdes */
47060376Sdesvoid
47160376SdesfetchFreeURL(struct url *u)
47260376Sdes{
47390267Sdes	free(u->doc);
47490267Sdes	free(u);
47560376Sdes}
476