1/*-
2 * SPDX-License-Identifier: BSD-3-Clause
3 *
4 * Copyright (c) 1998-2004 Dag-Erling Sm��rgrav
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer
12 *    in this position and unchanged.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 * 3. The name of the author may not be used to endorse or promote products
17 *    derived from this software without specific prior written permission
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
20 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
21 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
22 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
23 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
24 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
28 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 */
30
31#include <sys/cdefs.h>
32__FBSDID("$FreeBSD$");
33
34#include <sys/param.h>
35
36#include <netinet/in.h>
37
38#include <errno.h>
39#include <ctype.h>
40#include <stdio.h>
41#include <stdlib.h>
42#include <string.h>
43
44#include "fetch.h"
45#include "common.h"
46
47auth_t	 fetchAuthMethod;
48int	 fetchLastErrCode;
49char	 fetchLastErrString[MAXERRSTRING];
50int	 fetchTimeout;
51int	 fetchRestartCalls = 1;
52int	 fetchDebug;
53
54
55/*** Local data **************************************************************/
56
57/*
58 * Error messages for parser errors
59 */
60#define URL_MALFORMED		1
61#define URL_BAD_SCHEME		2
62#define URL_BAD_PORT		3
63static struct fetcherr url_errlist[] = {
64	{ URL_MALFORMED,	FETCH_URL,	"Malformed URL" },
65	{ URL_BAD_SCHEME,	FETCH_URL,	"Invalid URL scheme" },
66	{ URL_BAD_PORT,		FETCH_URL,	"Invalid server port" },
67	{ -1,			FETCH_UNKNOWN,	"Unknown parser error" }
68};
69
70
71/*** Public API **************************************************************/
72
73/*
74 * Select the appropriate protocol for the URL scheme, and return a
75 * read-only stream connected to the document referenced by the URL.
76 * Also fill out the struct url_stat.
77 */
78FILE *
79fetchXGet(struct url *URL, struct url_stat *us, const char *flags)
80{
81
82	if (us != NULL) {
83		us->size = -1;
84		us->atime = us->mtime = 0;
85	}
86	if (strcmp(URL->scheme, SCHEME_FILE) == 0)
87		return (fetchXGetFile(URL, us, flags));
88	else if (strcmp(URL->scheme, SCHEME_FTP) == 0)
89		return (fetchXGetFTP(URL, us, flags));
90	else if (strcmp(URL->scheme, SCHEME_HTTP) == 0)
91		return (fetchXGetHTTP(URL, us, flags));
92	else if (strcmp(URL->scheme, SCHEME_HTTPS) == 0)
93		return (fetchXGetHTTP(URL, us, flags));
94	url_seterr(URL_BAD_SCHEME);
95	return (NULL);
96}
97
98/*
99 * Select the appropriate protocol for the URL scheme, and return a
100 * read-only stream connected to the document referenced by the URL.
101 */
102FILE *
103fetchGet(struct url *URL, const char *flags)
104{
105	return (fetchXGet(URL, NULL, flags));
106}
107
108/*
109 * Select the appropriate protocol for the URL scheme, and return a
110 * write-only stream connected to the document referenced by the URL.
111 */
112FILE *
113fetchPut(struct url *URL, const char *flags)
114{
115
116	if (strcmp(URL->scheme, SCHEME_FILE) == 0)
117		return (fetchPutFile(URL, flags));
118	else if (strcmp(URL->scheme, SCHEME_FTP) == 0)
119		return (fetchPutFTP(URL, flags));
120	else if (strcmp(URL->scheme, SCHEME_HTTP) == 0)
121		return (fetchPutHTTP(URL, flags));
122	else if (strcmp(URL->scheme, SCHEME_HTTPS) == 0)
123		return (fetchPutHTTP(URL, flags));
124	url_seterr(URL_BAD_SCHEME);
125	return (NULL);
126}
127
128/*
129 * Select the appropriate protocol for the URL scheme, and return the
130 * size of the document referenced by the URL if it exists.
131 */
132int
133fetchStat(struct url *URL, struct url_stat *us, const char *flags)
134{
135
136	if (us != NULL) {
137		us->size = -1;
138		us->atime = us->mtime = 0;
139	}
140	if (strcmp(URL->scheme, SCHEME_FILE) == 0)
141		return (fetchStatFile(URL, us, flags));
142	else if (strcmp(URL->scheme, SCHEME_FTP) == 0)
143		return (fetchStatFTP(URL, us, flags));
144	else if (strcmp(URL->scheme, SCHEME_HTTP) == 0)
145		return (fetchStatHTTP(URL, us, flags));
146	else if (strcmp(URL->scheme, SCHEME_HTTPS) == 0)
147		return (fetchStatHTTP(URL, us, flags));
148	url_seterr(URL_BAD_SCHEME);
149	return (-1);
150}
151
152/*
153 * Select the appropriate protocol for the URL scheme, and return a
154 * list of files in the directory pointed to by the URL.
155 */
156struct url_ent *
157fetchList(struct url *URL, const char *flags)
158{
159
160	if (strcmp(URL->scheme, SCHEME_FILE) == 0)
161		return (fetchListFile(URL, flags));
162	else if (strcmp(URL->scheme, SCHEME_FTP) == 0)
163		return (fetchListFTP(URL, flags));
164	else if (strcmp(URL->scheme, SCHEME_HTTP) == 0)
165		return (fetchListHTTP(URL, flags));
166	else if (strcmp(URL->scheme, SCHEME_HTTPS) == 0)
167		return (fetchListHTTP(URL, flags));
168	url_seterr(URL_BAD_SCHEME);
169	return (NULL);
170}
171
172/*
173 * Attempt to parse the given URL; if successful, call fetchXGet().
174 */
175FILE *
176fetchXGetURL(const char *URL, struct url_stat *us, const char *flags)
177{
178	struct url *u;
179	FILE *f;
180
181	if ((u = fetchParseURL(URL)) == NULL)
182		return (NULL);
183
184	f = fetchXGet(u, us, flags);
185
186	fetchFreeURL(u);
187	return (f);
188}
189
190/*
191 * Attempt to parse the given URL; if successful, call fetchGet().
192 */
193FILE *
194fetchGetURL(const char *URL, const char *flags)
195{
196	return (fetchXGetURL(URL, NULL, flags));
197}
198
199/*
200 * Attempt to parse the given URL; if successful, call fetchPut().
201 */
202FILE *
203fetchPutURL(const char *URL, const char *flags)
204{
205	struct url *u;
206	FILE *f;
207
208	if ((u = fetchParseURL(URL)) == NULL)
209		return (NULL);
210
211	f = fetchPut(u, flags);
212
213	fetchFreeURL(u);
214	return (f);
215}
216
217/*
218 * Attempt to parse the given URL; if successful, call fetchStat().
219 */
220int
221fetchStatURL(const char *URL, struct url_stat *us, const char *flags)
222{
223	struct url *u;
224	int s;
225
226	if ((u = fetchParseURL(URL)) == NULL)
227		return (-1);
228
229	s = fetchStat(u, us, flags);
230
231	fetchFreeURL(u);
232	return (s);
233}
234
235/*
236 * Attempt to parse the given URL; if successful, call fetchList().
237 */
238struct url_ent *
239fetchListURL(const char *URL, const char *flags)
240{
241	struct url *u;
242	struct url_ent *ue;
243
244	if ((u = fetchParseURL(URL)) == NULL)
245		return (NULL);
246
247	ue = fetchList(u, flags);
248
249	fetchFreeURL(u);
250	return (ue);
251}
252
253/*
254 * Make a URL
255 */
256struct url *
257fetchMakeURL(const char *scheme, const char *host, int port, const char *doc,
258    const char *user, const char *pwd)
259{
260	struct url *u;
261
262	if (!scheme || (!host && !doc)) {
263		url_seterr(URL_MALFORMED);
264		return (NULL);
265	}
266
267	if (port < 0 || port > 65535) {
268		url_seterr(URL_BAD_PORT);
269		return (NULL);
270	}
271
272	/* allocate struct url */
273	if ((u = calloc(1, sizeof(*u))) == NULL) {
274		fetch_syserr();
275		return (NULL);
276	}
277	u->netrcfd = -1;
278
279	if ((u->doc = strdup(doc ? doc : "/")) == NULL) {
280		fetch_syserr();
281		free(u);
282		return (NULL);
283	}
284
285#define seturl(x) snprintf(u->x, sizeof(u->x), "%s", x)
286	seturl(scheme);
287	seturl(host);
288	seturl(user);
289	seturl(pwd);
290#undef seturl
291	u->port = port;
292
293	return (u);
294}
295
296/*
297 * Return value of the given hex digit.
298 */
299static int
300fetch_hexval(char ch)
301{
302
303	if (ch >= '0' && ch <= '9')
304		return (ch - '0');
305	else if (ch >= 'a' && ch <= 'f')
306		return (ch - 'a' + 10);
307	else if (ch >= 'A' && ch <= 'F')
308		return (ch - 'A' + 10);
309	return (-1);
310}
311
312/*
313 * Decode percent-encoded URL component from src into dst, stopping at end
314 * of string, or at @ or : separators.  Returns a pointer to the unhandled
315 * part of the input string (null terminator, @, or :).  No terminator is
316 * written to dst (it is the caller's responsibility).
317 */
318static const char *
319fetch_pctdecode(char *dst, const char *src, size_t dlen)
320{
321	int d1, d2;
322	char c;
323	const char *s;
324
325	for (s = src; *s != '\0' && *s != '@' && *s != ':'; s++) {
326		if (s[0] == '%' && (d1 = fetch_hexval(s[1])) >= 0 &&
327		    (d2 = fetch_hexval(s[2])) >= 0 && (d1 > 0 || d2 > 0)) {
328			c = d1 << 4 | d2;
329			s += 2;
330		} else if (s[0] == '%') {
331			/* Invalid escape sequence. */
332			return (NULL);
333		} else {
334			c = *s;
335		}
336		if (dlen-- > 0)
337			*dst++ = c;
338		else
339			return (NULL);
340	}
341	return (s);
342}
343
344/*
345 * Split an URL into components. URL syntax is:
346 * [method:/][/[user[:pwd]@]host[:port]/][document]
347 * This almost, but not quite, RFC1738 URL syntax.
348 */
349struct url *
350fetchParseURL(const char *URL)
351{
352	char *doc;
353	const char *p, *q;
354	struct url *u;
355	int i, n;
356
357	/* allocate struct url */
358	if ((u = calloc(1, sizeof(*u))) == NULL) {
359		fetch_syserr();
360		return (NULL);
361	}
362	u->netrcfd = -1;
363
364	/* scheme name */
365	if ((p = strstr(URL, ":/"))) {
366                if (p - URL > URL_SCHEMELEN)
367                        goto ouch;
368                for (i = 0; URL + i < p; i++)
369                        u->scheme[i] = tolower((unsigned char)URL[i]);
370		URL = ++p;
371		/*
372		 * Only one slash: no host, leave slash as part of document
373		 * Two slashes: host follows, strip slashes
374		 */
375		if (URL[1] == '/')
376			URL = (p += 2);
377	} else {
378		p = URL;
379	}
380	if (!*URL || *URL == '/' || *URL == '.' ||
381	    (u->scheme[0] == '\0' &&
382		strchr(URL, '/') == NULL && strchr(URL, ':') == NULL))
383		goto nohost;
384
385	p = strpbrk(URL, "/@");
386	if (p && *p == '@') {
387		/* username */
388		q = fetch_pctdecode(u->user, URL, URL_USERLEN);
389		if (q == NULL)
390			goto ouch;
391
392		/* password */
393		if (*q == ':') {
394			q = fetch_pctdecode(u->pwd, q + 1, URL_PWDLEN);
395			if (q == NULL)
396				goto ouch;
397		}
398		p++;
399	} else {
400		p = URL;
401	}
402
403	/* hostname */
404	if (*p == '[') {
405		q = p + 1 + strspn(p + 1, ":0123456789ABCDEFabcdef");
406		if (*q++ != ']')
407			goto ouch;
408	} else {
409		/* valid characters in a DNS name */
410		q = p + strspn(p, "-." "0123456789"
411		    "ABCDEFGHIJKLMNOPQRSTUVWXYZ" "_"
412		    "abcdefghijklmnopqrstuvwxyz");
413	}
414	if ((*q != '\0' && *q != '/' && *q != ':') || q - p > MAXHOSTNAMELEN)
415		goto ouch;
416	for (i = 0; p + i < q; i++)
417		u->host[i] = tolower((unsigned char)p[i]);
418	u->host[i] = '\0';
419	p = q;
420
421	/* port */
422	if (*p == ':') {
423		for (n = 0, q = ++p; *q && (*q != '/'); q++) {
424			if (*q >= '0' && *q <= '9' && n < INT_MAX / 10) {
425				n = n * 10 + (*q - '0');
426			} else {
427				/* invalid port */
428				url_seterr(URL_BAD_PORT);
429				goto ouch;
430			}
431		}
432		if (n < 1 || n > IPPORT_MAX)
433			goto ouch;
434		u->port = n;
435		p = q;
436	}
437
438nohost:
439	/* document */
440	if (!*p)
441		p = "/";
442
443	if (strcmp(u->scheme, SCHEME_HTTP) == 0 ||
444	    strcmp(u->scheme, SCHEME_HTTPS) == 0) {
445		const char hexnums[] = "0123456789abcdef";
446
447		/* percent-escape whitespace. */
448		if ((doc = malloc(strlen(p) * 3 + 1)) == NULL) {
449			fetch_syserr();
450			goto ouch;
451		}
452		u->doc = doc;
453		while (*p != '\0') {
454			if (!isspace((unsigned char)*p)) {
455				*doc++ = *p++;
456			} else {
457				*doc++ = '%';
458				*doc++ = hexnums[((unsigned int)*p) >> 4];
459				*doc++ = hexnums[((unsigned int)*p) & 0xf];
460				p++;
461			}
462		}
463		*doc = '\0';
464	} else if ((u->doc = strdup(p)) == NULL) {
465		fetch_syserr();
466		goto ouch;
467	}
468
469	DEBUGF("scheme:   \"%s\"\n"
470	    "user:     \"%s\"\n"
471	    "password: \"%s\"\n"
472	    "host:     \"%s\"\n"
473	    "port:     \"%d\"\n"
474	    "document: \"%s\"\n",
475	    u->scheme, u->user, u->pwd,
476	    u->host, u->port, u->doc);
477
478	return (u);
479
480ouch:
481	free(u);
482	return (NULL);
483}
484
485/*
486 * Free a URL
487 */
488void
489fetchFreeURL(struct url *u)
490{
491	free(u->doc);
492	free(u);
493}
494