fetch.c revision 109967
1/*-
2 * Copyright (c) 1998 Dag-Erling Co�dan Sm�rgrav
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer
10 *    in this position and unchanged.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 * 3. The name of the author may not be used to endorse or promote products
15 *    derived from this software without specific prior written permission
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29#include <sys/cdefs.h>
30__FBSDID("$FreeBSD: head/lib/libfetch/fetch.c 109967 2003-01-28 08:04:40Z des $");
31
32#include <sys/param.h>
33#include <sys/errno.h>
34
35#include <ctype.h>
36#include <stdio.h>
37#include <stdlib.h>
38#include <string.h>
39
40#include "fetch.h"
41#include "common.h"
42
43auth_t	 fetchAuthMethod;
44int	 fetchLastErrCode;
45char	 fetchLastErrString[MAXERRSTRING];
46int	 fetchTimeout;
47int	 fetchRestartCalls = 1;
48int	 fetchDebug;
49
50
51/*** Local data **************************************************************/
52
53/*
54 * Error messages for parser errors
55 */
56#define URL_MALFORMED		1
57#define URL_BAD_SCHEME		2
58#define URL_BAD_PORT		3
59static struct fetcherr _url_errlist[] = {
60	{ URL_MALFORMED,	FETCH_URL,	"Malformed URL" },
61	{ URL_BAD_SCHEME,	FETCH_URL,	"Invalid URL scheme" },
62	{ URL_BAD_PORT,		FETCH_URL,	"Invalid server port" },
63	{ -1,			FETCH_UNKNOWN,	"Unknown parser error" }
64};
65
66
67/*** Public API **************************************************************/
68
69/*
70 * Select the appropriate protocol for the URL scheme, and return a
71 * read-only stream connected to the document referenced by the URL.
72 * Also fill out the struct url_stat.
73 */
74FILE *
75fetchXGet(struct url *URL, struct url_stat *us, const char *flags)
76{
77	int direct;
78
79	direct = CHECK_FLAG('d');
80	if (us != NULL) {
81		us->size = -1;
82		us->atime = us->mtime = 0;
83	}
84	if (strcasecmp(URL->scheme, SCHEME_FILE) == 0)
85		return (fetchXGetFile(URL, us, flags));
86	else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0)
87		return (fetchXGetFTP(URL, us, flags));
88	else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0)
89		return (fetchXGetHTTP(URL, us, flags));
90	else if (strcasecmp(URL->scheme, SCHEME_HTTPS) == 0)
91		return (fetchXGetHTTP(URL, us, flags));
92	_url_seterr(URL_BAD_SCHEME);
93	return (NULL);
94}
95
96/*
97 * Select the appropriate protocol for the URL scheme, and return a
98 * read-only stream connected to the document referenced by the URL.
99 */
100FILE *
101fetchGet(struct url *URL, const char *flags)
102{
103	return (fetchXGet(URL, NULL, flags));
104}
105
106/*
107 * Select the appropriate protocol for the URL scheme, and return a
108 * write-only stream connected to the document referenced by the URL.
109 */
110FILE *
111fetchPut(struct url *URL, const char *flags)
112{
113	int direct;
114
115	direct = CHECK_FLAG('d');
116	if (strcasecmp(URL->scheme, SCHEME_FILE) == 0)
117		return (fetchPutFile(URL, flags));
118	else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0)
119		return (fetchPutFTP(URL, flags));
120	else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0)
121		return (fetchPutHTTP(URL, flags));
122	else if (strcasecmp(URL->scheme, SCHEME_HTTPS) == 0)
123		return (fetchPutHTTP(URL, flags));
124	_url_seterr(URL_BAD_SCHEME);
125	return (NULL);
126}
127
128/*
129 * Select the appropriate protocol for the URL scheme, and return the
130 * size of the document referenced by the URL if it exists.
131 */
132int
133fetchStat(struct url *URL, struct url_stat *us, const char *flags)
134{
135	int direct;
136
137	direct = CHECK_FLAG('d');
138	if (us != NULL) {
139		us->size = -1;
140		us->atime = us->mtime = 0;
141	}
142	if (strcasecmp(URL->scheme, SCHEME_FILE) == 0)
143		return (fetchStatFile(URL, us, flags));
144	else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0)
145		return (fetchStatFTP(URL, us, flags));
146	else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0)
147		return (fetchStatHTTP(URL, us, flags));
148	else if (strcasecmp(URL->scheme, SCHEME_HTTPS) == 0)
149		return (fetchStatHTTP(URL, us, flags));
150	_url_seterr(URL_BAD_SCHEME);
151	return (-1);
152}
153
154/*
155 * Select the appropriate protocol for the URL scheme, and return a
156 * list of files in the directory pointed to by the URL.
157 */
158struct url_ent *
159fetchList(struct url *URL, const char *flags)
160{
161	int direct;
162
163	direct = CHECK_FLAG('d');
164	if (strcasecmp(URL->scheme, SCHEME_FILE) == 0)
165		return (fetchListFile(URL, flags));
166	else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0)
167		return (fetchListFTP(URL, flags));
168	else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0)
169		return (fetchListHTTP(URL, flags));
170	else if (strcasecmp(URL->scheme, SCHEME_HTTPS) == 0)
171		return (fetchListHTTP(URL, flags));
172	_url_seterr(URL_BAD_SCHEME);
173	return (NULL);
174}
175
176/*
177 * Attempt to parse the given URL; if successful, call fetchXGet().
178 */
179FILE *
180fetchXGetURL(const char *URL, struct url_stat *us, const char *flags)
181{
182	struct url *u;
183	FILE *f;
184
185	if ((u = fetchParseURL(URL)) == NULL)
186		return (NULL);
187
188	f = fetchXGet(u, us, flags);
189
190	fetchFreeURL(u);
191	return (f);
192}
193
194/*
195 * Attempt to parse the given URL; if successful, call fetchGet().
196 */
197FILE *
198fetchGetURL(const char *URL, const char *flags)
199{
200	return (fetchXGetURL(URL, NULL, flags));
201}
202
203/*
204 * Attempt to parse the given URL; if successful, call fetchPut().
205 */
206FILE *
207fetchPutURL(const char *URL, const char *flags)
208{
209	struct url *u;
210	FILE *f;
211
212	if ((u = fetchParseURL(URL)) == NULL)
213		return (NULL);
214
215	f = fetchPut(u, flags);
216
217	fetchFreeURL(u);
218	return (f);
219}
220
221/*
222 * Attempt to parse the given URL; if successful, call fetchStat().
223 */
224int
225fetchStatURL(const char *URL, struct url_stat *us, const char *flags)
226{
227	struct url *u;
228	int s;
229
230	if ((u = fetchParseURL(URL)) == NULL)
231		return (-1);
232
233	s = fetchStat(u, us, flags);
234
235	fetchFreeURL(u);
236	return (s);
237}
238
239/*
240 * Attempt to parse the given URL; if successful, call fetchList().
241 */
242struct url_ent *
243fetchListURL(const char *URL, const char *flags)
244{
245	struct url *u;
246	struct url_ent *ue;
247
248	if ((u = fetchParseURL(URL)) == NULL)
249		return (NULL);
250
251	ue = fetchList(u, flags);
252
253	fetchFreeURL(u);
254	return (ue);
255}
256
257/*
258 * Make a URL
259 */
260struct url *
261fetchMakeURL(const char *scheme, const char *host, int port, const char *doc,
262    const char *user, const char *pwd)
263{
264	struct url *u;
265
266	if (!scheme || (!host && !doc)) {
267		_url_seterr(URL_MALFORMED);
268		return (NULL);
269	}
270
271	if (port < 0 || port > 65535) {
272		_url_seterr(URL_BAD_PORT);
273		return (NULL);
274	}
275
276	/* allocate struct url */
277	if ((u = calloc(1, sizeof(*u))) == NULL) {
278		_fetch_syserr();
279		return (NULL);
280	}
281
282	if ((u->doc = strdup(doc ? doc : "/")) == NULL) {
283		_fetch_syserr();
284		free(u);
285		return (NULL);
286	}
287
288#define seturl(x) snprintf(u->x, sizeof(u->x), "%s", x)
289	seturl(scheme);
290	seturl(host);
291	seturl(user);
292	seturl(pwd);
293#undef seturl
294	u->port = port;
295
296	return (u);
297}
298
299/*
300 * Split an URL into components. URL syntax is:
301 * [method:/][/[user[:pwd]@]host[:port]/][document]
302 * This almost, but not quite, RFC1738 URL syntax.
303 */
304struct url *
305fetchParseURL(const char *URL)
306{
307	char *doc;
308	const char *p, *q;
309	struct url *u;
310	int i;
311
312	/* allocate struct url */
313	if ((u = calloc(1, sizeof(*u))) == NULL) {
314		_fetch_syserr();
315		return (NULL);
316	}
317
318	/* scheme name */
319	if ((p = strstr(URL, ":/"))) {
320		snprintf(u->scheme, URL_SCHEMELEN+1,
321		    "%.*s", (int)(p - URL), URL);
322		URL = ++p;
323		/*
324		 * Only one slash: no host, leave slash as part of document
325		 * Two slashes: host follows, strip slashes
326		 */
327		if (URL[1] == '/')
328			URL = (p += 2);
329	} else {
330		p = URL;
331	}
332	if (!*URL || *URL == '/' || *URL == '.' ||
333	    (u->scheme[0] == '\0' &&
334		strchr(URL, '/') == NULL && strchr(URL, ':') == NULL))
335		goto nohost;
336
337	p = strpbrk(URL, "/@");
338	if (p && *p == '@') {
339		/* username */
340		for (q = URL, i = 0; (*q != ':') && (*q != '@'); q++)
341			if (i < URL_USERLEN)
342				u->user[i++] = *q;
343
344		/* password */
345		if (*q == ':')
346			for (q++, i = 0; (*q != ':') && (*q != '@'); q++)
347				if (i < URL_PWDLEN)
348					u->pwd[i++] = *q;
349
350		p++;
351	} else {
352		p = URL;
353	}
354
355	/* hostname */
356#ifdef INET6
357	if (*p == '[' && (q = strchr(p + 1, ']')) != NULL &&
358	    (*++q == '\0' || *q == '/' || *q == ':')) {
359		if ((i = q - p - 2) > MAXHOSTNAMELEN)
360			i = MAXHOSTNAMELEN;
361		strncpy(u->host, ++p, i);
362		p = q;
363	} else
364#endif
365		for (i = 0; *p && (*p != '/') && (*p != ':'); p++)
366			if (i < MAXHOSTNAMELEN)
367				u->host[i++] = *p;
368
369	/* port */
370	if (*p == ':') {
371		for (q = ++p; *q && (*q != '/'); q++)
372			if (isdigit(*q))
373				u->port = u->port * 10 + (*q - '0');
374			else {
375				/* invalid port */
376				_url_seterr(URL_BAD_PORT);
377				goto ouch;
378			}
379		p = q;
380	}
381
382nohost:
383	/* document */
384	if (!*p)
385		p = "/";
386
387	if (strcasecmp(u->scheme, SCHEME_HTTP) == 0 ||
388	    strcasecmp(u->scheme, SCHEME_HTTPS) == 0) {
389		const char hexnums[] = "0123456789abcdef";
390
391		/* percent-escape whitespace. */
392		if ((doc = malloc(strlen(p) * 3 + 1)) == NULL) {
393			_fetch_syserr();
394			goto ouch;
395		}
396		u->doc = doc;
397		while (*p != '\0') {
398			if (!isspace(*p)) {
399				*doc++ = *p++;
400			} else {
401				*doc++ = '%';
402				*doc++ = hexnums[((unsigned int)*p) >> 4];
403				*doc++ = hexnums[((unsigned int)*p) & 0xf];
404				p++;
405			}
406		}
407		*doc = '\0';
408	} else if ((u->doc = strdup(p)) == NULL) {
409		_fetch_syserr();
410		goto ouch;
411	}
412
413	DEBUG(fprintf(stderr,
414		  "scheme:   [%s]\n"
415		  "user:     [%s]\n"
416		  "password: [%s]\n"
417		  "host:     [%s]\n"
418		  "port:     [%d]\n"
419		  "document: [%s]\n",
420		  u->scheme, u->user, u->pwd,
421		  u->host, u->port, u->doc));
422
423	return (u);
424
425ouch:
426	free(u);
427	return (NULL);
428}
429
430/*
431 * Free a URL
432 */
433void
434fetchFreeURL(struct url *u)
435{
436	free(u->doc);
437	free(u);
438}
439