1/*-
2 * Copyright (c) 1998-2004 Dag-Erling Sm��rgrav
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer
10 *    in this position and unchanged.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 * 3. The name of the author may not be used to endorse or promote products
15 *    derived from this software without specific prior written permission
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29#include <sys/cdefs.h>
30__FBSDID("$FreeBSD: stable/11/lib/libfetch/fetch.c 357214 2020-01-28 18:42:06Z gordon $");
31
32#include <sys/param.h>
33#include <sys/errno.h>
34
35#include <ctype.h>
36#include <stdio.h>
37#include <stdlib.h>
38#include <string.h>
39
40#include "fetch.h"
41#include "common.h"
42
43auth_t	 fetchAuthMethod;
44int	 fetchLastErrCode;
45char	 fetchLastErrString[MAXERRSTRING];
46int	 fetchTimeout;
47int	 fetchRestartCalls = 1;
48int	 fetchDebug;
49
50
51/*** Local data **************************************************************/
52
53/*
54 * Error messages for parser errors
55 */
56#define URL_MALFORMED		1
57#define URL_BAD_SCHEME		2
58#define URL_BAD_PORT		3
59static struct fetcherr url_errlist[] = {
60	{ URL_MALFORMED,	FETCH_URL,	"Malformed URL" },
61	{ URL_BAD_SCHEME,	FETCH_URL,	"Invalid URL scheme" },
62	{ URL_BAD_PORT,		FETCH_URL,	"Invalid server port" },
63	{ -1,			FETCH_UNKNOWN,	"Unknown parser error" }
64};
65
66
67/*** Public API **************************************************************/
68
69/*
70 * Select the appropriate protocol for the URL scheme, and return a
71 * read-only stream connected to the document referenced by the URL.
72 * Also fill out the struct url_stat.
73 */
74FILE *
75fetchXGet(struct url *URL, struct url_stat *us, const char *flags)
76{
77
78	if (us != NULL) {
79		us->size = -1;
80		us->atime = us->mtime = 0;
81	}
82	if (strcasecmp(URL->scheme, SCHEME_FILE) == 0)
83		return (fetchXGetFile(URL, us, flags));
84	else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0)
85		return (fetchXGetFTP(URL, us, flags));
86	else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0)
87		return (fetchXGetHTTP(URL, us, flags));
88	else if (strcasecmp(URL->scheme, SCHEME_HTTPS) == 0)
89		return (fetchXGetHTTP(URL, us, flags));
90	url_seterr(URL_BAD_SCHEME);
91	return (NULL);
92}
93
94/*
95 * Select the appropriate protocol for the URL scheme, and return a
96 * read-only stream connected to the document referenced by the URL.
97 */
98FILE *
99fetchGet(struct url *URL, const char *flags)
100{
101	return (fetchXGet(URL, NULL, flags));
102}
103
104/*
105 * Select the appropriate protocol for the URL scheme, and return a
106 * write-only stream connected to the document referenced by the URL.
107 */
108FILE *
109fetchPut(struct url *URL, const char *flags)
110{
111
112	if (strcasecmp(URL->scheme, SCHEME_FILE) == 0)
113		return (fetchPutFile(URL, flags));
114	else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0)
115		return (fetchPutFTP(URL, flags));
116	else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0)
117		return (fetchPutHTTP(URL, flags));
118	else if (strcasecmp(URL->scheme, SCHEME_HTTPS) == 0)
119		return (fetchPutHTTP(URL, flags));
120	url_seterr(URL_BAD_SCHEME);
121	return (NULL);
122}
123
124/*
125 * Select the appropriate protocol for the URL scheme, and return the
126 * size of the document referenced by the URL if it exists.
127 */
128int
129fetchStat(struct url *URL, struct url_stat *us, const char *flags)
130{
131
132	if (us != NULL) {
133		us->size = -1;
134		us->atime = us->mtime = 0;
135	}
136	if (strcasecmp(URL->scheme, SCHEME_FILE) == 0)
137		return (fetchStatFile(URL, us, flags));
138	else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0)
139		return (fetchStatFTP(URL, us, flags));
140	else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0)
141		return (fetchStatHTTP(URL, us, flags));
142	else if (strcasecmp(URL->scheme, SCHEME_HTTPS) == 0)
143		return (fetchStatHTTP(URL, us, flags));
144	url_seterr(URL_BAD_SCHEME);
145	return (-1);
146}
147
148/*
149 * Select the appropriate protocol for the URL scheme, and return a
150 * list of files in the directory pointed to by the URL.
151 */
152struct url_ent *
153fetchList(struct url *URL, const char *flags)
154{
155
156	if (strcasecmp(URL->scheme, SCHEME_FILE) == 0)
157		return (fetchListFile(URL, flags));
158	else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0)
159		return (fetchListFTP(URL, flags));
160	else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0)
161		return (fetchListHTTP(URL, flags));
162	else if (strcasecmp(URL->scheme, SCHEME_HTTPS) == 0)
163		return (fetchListHTTP(URL, flags));
164	url_seterr(URL_BAD_SCHEME);
165	return (NULL);
166}
167
168/*
169 * Attempt to parse the given URL; if successful, call fetchXGet().
170 */
171FILE *
172fetchXGetURL(const char *URL, struct url_stat *us, const char *flags)
173{
174	struct url *u;
175	FILE *f;
176
177	if ((u = fetchParseURL(URL)) == NULL)
178		return (NULL);
179
180	f = fetchXGet(u, us, flags);
181
182	fetchFreeURL(u);
183	return (f);
184}
185
186/*
187 * Attempt to parse the given URL; if successful, call fetchGet().
188 */
189FILE *
190fetchGetURL(const char *URL, const char *flags)
191{
192	return (fetchXGetURL(URL, NULL, flags));
193}
194
195/*
196 * Attempt to parse the given URL; if successful, call fetchPut().
197 */
198FILE *
199fetchPutURL(const char *URL, const char *flags)
200{
201	struct url *u;
202	FILE *f;
203
204	if ((u = fetchParseURL(URL)) == NULL)
205		return (NULL);
206
207	f = fetchPut(u, flags);
208
209	fetchFreeURL(u);
210	return (f);
211}
212
213/*
214 * Attempt to parse the given URL; if successful, call fetchStat().
215 */
216int
217fetchStatURL(const char *URL, struct url_stat *us, const char *flags)
218{
219	struct url *u;
220	int s;
221
222	if ((u = fetchParseURL(URL)) == NULL)
223		return (-1);
224
225	s = fetchStat(u, us, flags);
226
227	fetchFreeURL(u);
228	return (s);
229}
230
231/*
232 * Attempt to parse the given URL; if successful, call fetchList().
233 */
234struct url_ent *
235fetchListURL(const char *URL, const char *flags)
236{
237	struct url *u;
238	struct url_ent *ue;
239
240	if ((u = fetchParseURL(URL)) == NULL)
241		return (NULL);
242
243	ue = fetchList(u, flags);
244
245	fetchFreeURL(u);
246	return (ue);
247}
248
249/*
250 * Make a URL
251 */
252struct url *
253fetchMakeURL(const char *scheme, const char *host, int port, const char *doc,
254    const char *user, const char *pwd)
255{
256	struct url *u;
257
258	if (!scheme || (!host && !doc)) {
259		url_seterr(URL_MALFORMED);
260		return (NULL);
261	}
262
263	if (port < 0 || port > 65535) {
264		url_seterr(URL_BAD_PORT);
265		return (NULL);
266	}
267
268	/* allocate struct url */
269	if ((u = calloc(1, sizeof(*u))) == NULL) {
270		fetch_syserr();
271		return (NULL);
272	}
273	u->netrcfd = -1;
274
275	if ((u->doc = strdup(doc ? doc : "/")) == NULL) {
276		fetch_syserr();
277		free(u);
278		return (NULL);
279	}
280
281#define seturl(x) snprintf(u->x, sizeof(u->x), "%s", x)
282	seturl(scheme);
283	seturl(host);
284	seturl(user);
285	seturl(pwd);
286#undef seturl
287	u->port = port;
288
289	return (u);
290}
291
292/*
293 * Return value of the given hex digit.
294 */
295static int
296fetch_hexval(char ch)
297{
298
299	if (ch >= '0' && ch <= '9')
300		return (ch - '0');
301	else if (ch >= 'a' && ch <= 'f')
302		return (ch - 'a' + 10);
303	else if (ch >= 'A' && ch <= 'F')
304		return (ch - 'A' + 10);
305	return (-1);
306}
307
308/*
309 * Decode percent-encoded URL component from src into dst, stopping at end
310 * of string, or at @ or : separators.  Returns a pointer to the unhandled
311 * part of the input string (null terminator, @, or :).  No terminator is
312 * written to dst (it is the caller's responsibility).
313 */
314static const char *
315fetch_pctdecode(char *dst, const char *src, size_t dlen)
316{
317	int d1, d2;
318	char c;
319	const char *s;
320
321	for (s = src; *s != '\0' && *s != '@' && *s != ':'; s++) {
322		if (s[0] == '%' && (d1 = fetch_hexval(s[1])) >= 0 &&
323		    (d2 = fetch_hexval(s[2])) >= 0 && (d1 > 0 || d2 > 0)) {
324			c = d1 << 4 | d2;
325			s += 2;
326		} else {
327			c = *s;
328		}
329		if (dlen-- > 0)
330			*dst++ = c;
331		else
332			return (NULL);
333	}
334	return (s);
335}
336
337/*
338 * Split an URL into components. URL syntax is:
339 * [method:/][/[user[:pwd]@]host[:port]/][document]
340 * This almost, but not quite, RFC1738 URL syntax.
341 */
342struct url *
343fetchParseURL(const char *URL)
344{
345	char *doc;
346	const char *p, *q;
347	struct url *u;
348	int i;
349
350	/* allocate struct url */
351	if ((u = calloc(1, sizeof(*u))) == NULL) {
352		fetch_syserr();
353		return (NULL);
354	}
355	u->netrcfd = -1;
356
357	/* scheme name */
358	if ((p = strstr(URL, ":/"))) {
359		snprintf(u->scheme, URL_SCHEMELEN+1,
360		    "%.*s", (int)(p - URL), URL);
361		URL = ++p;
362		/*
363		 * Only one slash: no host, leave slash as part of document
364		 * Two slashes: host follows, strip slashes
365		 */
366		if (URL[1] == '/')
367			URL = (p += 2);
368	} else {
369		p = URL;
370	}
371	if (!*URL || *URL == '/' || *URL == '.' ||
372	    (u->scheme[0] == '\0' &&
373		strchr(URL, '/') == NULL && strchr(URL, ':') == NULL))
374		goto nohost;
375
376	p = strpbrk(URL, "/@");
377	if (p && *p == '@') {
378		/* username */
379		q = fetch_pctdecode(u->user, URL, URL_USERLEN);
380		if (q == NULL)
381			goto ouch;
382
383		/* password */
384		if (*q == ':') {
385			q = fetch_pctdecode(u->pwd, q + 1, URL_PWDLEN);
386			if (q == NULL)
387				goto ouch;
388		}
389		p++;
390	} else {
391		p = URL;
392	}
393
394	/* hostname */
395	if (*p == '[' && (q = strchr(p + 1, ']')) != NULL &&
396	    (*++q == '\0' || *q == '/' || *q == ':')) {
397		if ((i = q - p) > MAXHOSTNAMELEN)
398			i = MAXHOSTNAMELEN;
399		strncpy(u->host, p, i);
400		p = q;
401	} else {
402		for (i = 0; *p && (*p != '/') && (*p != ':'); p++)
403			if (i < MAXHOSTNAMELEN)
404				u->host[i++] = *p;
405	}
406
407	/* port */
408	if (*p == ':') {
409		for (q = ++p; *q && (*q != '/'); q++)
410			if (isdigit((unsigned char)*q))
411				u->port = u->port * 10 + (*q - '0');
412			else {
413				/* invalid port */
414				url_seterr(URL_BAD_PORT);
415				goto ouch;
416			}
417		p = q;
418	}
419
420nohost:
421	/* document */
422	if (!*p)
423		p = "/";
424
425	if (strcasecmp(u->scheme, SCHEME_HTTP) == 0 ||
426	    strcasecmp(u->scheme, SCHEME_HTTPS) == 0) {
427		const char hexnums[] = "0123456789abcdef";
428
429		/* percent-escape whitespace. */
430		if ((doc = malloc(strlen(p) * 3 + 1)) == NULL) {
431			fetch_syserr();
432			goto ouch;
433		}
434		u->doc = doc;
435		while (*p != '\0') {
436			if (!isspace((unsigned char)*p)) {
437				*doc++ = *p++;
438			} else {
439				*doc++ = '%';
440				*doc++ = hexnums[((unsigned int)*p) >> 4];
441				*doc++ = hexnums[((unsigned int)*p) & 0xf];
442				p++;
443			}
444		}
445		*doc = '\0';
446	} else if ((u->doc = strdup(p)) == NULL) {
447		fetch_syserr();
448		goto ouch;
449	}
450
451	DEBUGF("scheme:   \"%s\"\n"
452	    "user:     \"%s\"\n"
453	    "password: \"%s\"\n"
454	    "host:     \"%s\"\n"
455	    "port:     \"%d\"\n"
456	    "document: \"%s\"\n",
457	    u->scheme, u->user, u->pwd,
458	    u->host, u->port, u->doc);
459
460	return (u);
461
462ouch:
463	free(u);
464	return (NULL);
465}
466
467/*
468 * Free a URL
469 */
470void
471fetchFreeURL(struct url *u)
472{
473	free(u->doc);
474	free(u);
475}
476