fetch.c revision 90267
1/*-
2 * Copyright (c) 1998 Dag-Erling Co�dan Sm�rgrav
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer
10 *    in this position and unchanged.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 * 3. The name of the author may not be used to endorse or promote products
15 *    derived from this software without specific prior written permission
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29#include <sys/cdefs.h>
30__FBSDID("$FreeBSD: head/lib/libfetch/fetch.c 90267 2002-02-05 22:13:51Z des $");
31
32#include <sys/param.h>
33#include <sys/errno.h>
34
35#include <ctype.h>
36#include <stdio.h>
37#include <stdlib.h>
38#include <string.h>
39
40#include "fetch.h"
41#include "common.h"
42
43auth_t	 fetchAuthMethod;
44int	 fetchLastErrCode;
45char	 fetchLastErrString[MAXERRSTRING];
46int	 fetchTimeout;
47int	 fetchRestartCalls = 1;
48int	 fetchDebug;
49
50
51/*** Local data **************************************************************/
52
53/*
54 * Error messages for parser errors
55 */
56#define URL_MALFORMED		1
57#define URL_BAD_SCHEME		2
58#define URL_BAD_PORT		3
59static struct fetcherr _url_errlist[] = {
60	{ URL_MALFORMED,	FETCH_URL,	"Malformed URL" },
61	{ URL_BAD_SCHEME,	FETCH_URL,	"Invalid URL scheme" },
62	{ URL_BAD_PORT,		FETCH_URL,	"Invalid server port" },
63	{ -1,			FETCH_UNKNOWN,	"Unknown parser error" }
64};
65
66
67/*** Public API **************************************************************/
68
69/*
70 * Select the appropriate protocol for the URL scheme, and return a
71 * read-only stream connected to the document referenced by the URL.
72 * Also fill out the struct url_stat.
73 */
74FILE *
75fetchXGet(struct url *URL, struct url_stat *us, const char *flags)
76{
77	int direct;
78
79	direct = CHECK_FLAG('d');
80	if (strcasecmp(URL->scheme, SCHEME_FILE) == 0)
81		return (fetchXGetFile(URL, us, flags));
82	else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0)
83		return (fetchXGetHTTP(URL, us, flags));
84	else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0) {
85		return (fetchXGetFTP(URL, us, flags));
86	} else {
87		_url_seterr(URL_BAD_SCHEME);
88		return (NULL);
89	}
90}
91
92/*
93 * Select the appropriate protocol for the URL scheme, and return a
94 * read-only stream connected to the document referenced by the URL.
95 */
96FILE *
97fetchGet(struct url *URL, const char *flags)
98{
99	return (fetchXGet(URL, NULL, flags));
100}
101
102/*
103 * Select the appropriate protocol for the URL scheme, and return a
104 * write-only stream connected to the document referenced by the URL.
105 */
106FILE *
107fetchPut(struct url *URL, const char *flags)
108{
109	int direct;
110
111	direct = CHECK_FLAG('d');
112	if (strcasecmp(URL->scheme, SCHEME_FILE) == 0)
113		return (fetchPutFile(URL, flags));
114	else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0)
115		return (fetchPutHTTP(URL, flags));
116	else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0) {
117		return (fetchPutFTP(URL, flags));
118	} else {
119		_url_seterr(URL_BAD_SCHEME);
120		return (NULL);
121	}
122}
123
124/*
125 * Select the appropriate protocol for the URL scheme, and return the
126 * size of the document referenced by the URL if it exists.
127 */
128int
129fetchStat(struct url *URL, struct url_stat *us, const char *flags)
130{
131	int direct;
132
133	direct = CHECK_FLAG('d');
134	if (strcasecmp(URL->scheme, SCHEME_FILE) == 0)
135		return (fetchStatFile(URL, us, flags));
136	else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0)
137		return (fetchStatHTTP(URL, us, flags));
138	else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0)
139		return (fetchStatFTP(URL, us, flags));
140	_url_seterr(URL_BAD_SCHEME);
141	return (-1);
142}
143
144/*
145 * Select the appropriate protocol for the URL scheme, and return a
146 * list of files in the directory pointed to by the URL.
147 */
148struct url_ent *
149fetchList(struct url *URL, const char *flags)
150{
151	int direct;
152
153	direct = CHECK_FLAG('d');
154	if (strcasecmp(URL->scheme, SCHEME_FILE) == 0)
155		return (fetchListFile(URL, flags));
156	else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0)
157		return (fetchListHTTP(URL, flags));
158	else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0)
159		return (fetchListFTP(URL, flags));
160	_url_seterr(URL_BAD_SCHEME);
161	return (NULL);
162}
163
164/*
165 * Attempt to parse the given URL; if successful, call fetchXGet().
166 */
167FILE *
168fetchXGetURL(const char *URL, struct url_stat *us, const char *flags)
169{
170	struct url *u;
171	FILE *f;
172
173	if ((u = fetchParseURL(URL)) == NULL)
174		return (NULL);
175
176	f = fetchXGet(u, us, flags);
177
178	fetchFreeURL(u);
179	return (f);
180}
181
182/*
183 * Attempt to parse the given URL; if successful, call fetchGet().
184 */
185FILE *
186fetchGetURL(const char *URL, const char *flags)
187{
188	return (fetchXGetURL(URL, NULL, flags));
189}
190
191/*
192 * Attempt to parse the given URL; if successful, call fetchPut().
193 */
194FILE *
195fetchPutURL(const char *URL, const char *flags)
196{
197	struct url *u;
198	FILE *f;
199
200	if ((u = fetchParseURL(URL)) == NULL)
201		return (NULL);
202
203	f = fetchPut(u, flags);
204
205	fetchFreeURL(u);
206	return (f);
207}
208
209/*
210 * Attempt to parse the given URL; if successful, call fetchStat().
211 */
212int
213fetchStatURL(const char *URL, struct url_stat *us, const char *flags)
214{
215	struct url *u;
216	int s;
217
218	if ((u = fetchParseURL(URL)) == NULL)
219		return (-1);
220
221	s = fetchStat(u, us, flags);
222
223	fetchFreeURL(u);
224	return (s);
225}
226
227/*
228 * Attempt to parse the given URL; if successful, call fetchList().
229 */
230struct url_ent *
231fetchListURL(const char *URL, const char *flags)
232{
233	struct url *u;
234	struct url_ent *ue;
235
236	if ((u = fetchParseURL(URL)) == NULL)
237		return (NULL);
238
239	ue = fetchList(u, flags);
240
241	fetchFreeURL(u);
242	return (ue);
243}
244
245/*
246 * Make a URL
247 */
248struct url *
249fetchMakeURL(const char *scheme, const char *host, int port, const char *doc,
250    const char *user, const char *pwd)
251{
252	struct url *u;
253
254	if (!scheme || (!host && !doc)) {
255		_url_seterr(URL_MALFORMED);
256		return (NULL);
257	}
258
259	if (port < 0 || port > 65535) {
260		_url_seterr(URL_BAD_PORT);
261		return (NULL);
262	}
263
264	/* allocate struct url */
265	if ((u = calloc(1, sizeof *u)) == NULL) {
266		_fetch_syserr();
267		return (NULL);
268	}
269
270	if ((u->doc = strdup(doc ? doc : "/")) == NULL) {
271		_fetch_syserr();
272		free(u);
273		return (NULL);
274	}
275
276#define seturl(x) snprintf(u->x, sizeof u->x, "%s", x)
277	seturl(scheme);
278	seturl(host);
279	seturl(user);
280	seturl(pwd);
281#undef seturl
282	u->port = port;
283
284	return (u);
285}
286
287/*
288 * Split an URL into components. URL syntax is:
289 * [method:/][/[user[:pwd]@]host[:port]/][document]
290 * This almost, but not quite, RFC1738 URL syntax.
291 */
292struct url *
293fetchParseURL(const char *URL)
294{
295	char *doc;
296	const char *p, *q;
297	struct url *u;
298	int i;
299
300	/* allocate struct url */
301	if ((u = calloc(1, sizeof *u)) == NULL) {
302		_fetch_syserr();
303		return (NULL);
304	}
305
306	/* scheme name */
307	if ((p = strstr(URL, ":/"))) {
308		snprintf(u->scheme, URL_SCHEMELEN+1,
309		    "%.*s", (int)(p - URL), URL);
310		URL = ++p;
311		/*
312		 * Only one slash: no host, leave slash as part of document
313		 * Two slashes: host follows, strip slashes
314		 */
315		if (URL[1] == '/')
316			URL = (p += 2);
317	} else {
318		p = URL;
319	}
320	if (!*URL || *URL == '/' || *URL == '.' ||
321	    (u->scheme[0] == '\0' &&
322		strchr(URL, '/') == NULL && strchr(URL, ':') == NULL))
323		goto nohost;
324
325	p = strpbrk(URL, "/@");
326	if (p && *p == '@') {
327		/* username */
328		for (q = URL, i = 0; (*q != ':') && (*q != '@'); q++)
329			if (i < URL_USERLEN)
330				u->user[i++] = *q;
331
332		/* password */
333		if (*q == ':')
334			for (q++, i = 0; (*q != ':') && (*q != '@'); q++)
335				if (i < URL_PWDLEN)
336					u->pwd[i++] = *q;
337
338		p++;
339	} else {
340		p = URL;
341	}
342
343	/* hostname */
344#ifdef INET6
345	if (*p == '[' && (q = strchr(p + 1, ']')) != NULL &&
346	    (*++q == '\0' || *q == '/' || *q == ':')) {
347		if ((i = q - p - 2) > MAXHOSTNAMELEN)
348			i = MAXHOSTNAMELEN;
349		strncpy(u->host, ++p, i);
350		p = q;
351	} else
352#endif
353		for (i = 0; *p && (*p != '/') && (*p != ':'); p++)
354			if (i < MAXHOSTNAMELEN)
355				u->host[i++] = *p;
356
357	/* port */
358	if (*p == ':') {
359		for (q = ++p; *q && (*q != '/'); q++)
360			if (isdigit(*q))
361				u->port = u->port * 10 + (*q - '0');
362			else {
363				/* invalid port */
364				_url_seterr(URL_BAD_PORT);
365				goto ouch;
366			}
367		p = q;
368	}
369
370nohost:
371	/* document */
372	if (!*p)
373		p = "/";
374
375	if (strcasecmp(u->scheme, SCHEME_HTTP) == 0 ||
376	    strcasecmp(u->scheme, SCHEME_HTTPS) == 0) {
377		const char hexnums[] = "0123456789abcdef";
378
379		/* percent-escape whitespace. */
380		if ((doc = malloc(strlen(p) * 3 + 1)) == NULL) {
381			_fetch_syserr();
382			goto ouch;
383		}
384		u->doc = doc;
385		while (*p != '\0') {
386			if (!isspace(*p)) {
387				*doc++ = *p++;
388			} else {
389				*doc++ = '%';
390				*doc++ = hexnums[((unsigned int)*p) >> 4];
391				*doc++ = hexnums[((unsigned int)*p) & 0xf];
392				p++;
393			}
394		}
395		*doc = '\0';
396	} else if ((u->doc = strdup(p)) == NULL) {
397		_fetch_syserr();
398		goto ouch;
399	}
400
401	DEBUG(fprintf(stderr,
402		  "scheme:   [%s]\n"
403		  "user:     [%s]\n"
404		  "password: [%s]\n"
405		  "host:     [%s]\n"
406		  "port:     [%d]\n"
407		  "document: [%s]\n",
408		  u->scheme, u->user, u->pwd,
409		  u->host, u->port, u->doc));
410
411	return (u);
412
413ouch:
414	free(u);
415	return (NULL);
416}
417
418/*
419 * Free a URL
420 */
421void
422fetchFreeURL(struct url *u)
423{
424	free(u->doc);
425	free(u);
426}
427