fetch.c revision 88769
1/*-
2 * Copyright (c) 1998 Dag-Erling Co�dan Sm�rgrav
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer
10 *    in this position and unchanged.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 * 3. The name of the author may not be used to endorse or promote products
15 *    derived from this software without specific prior written permission
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29#include <sys/cdefs.h>
30__FBSDID("$FreeBSD: head/lib/libfetch/fetch.c 88769 2002-01-01 14:48:09Z des $");
31
32#include <sys/param.h>
33#include <sys/errno.h>
34
35#include <ctype.h>
36#include <stdio.h>
37#include <stdlib.h>
38#include <string.h>
39
40#include "fetch.h"
41#include "common.h"
42
43auth_t	 fetchAuthMethod;
44int	 fetchLastErrCode;
45char	 fetchLastErrString[MAXERRSTRING];
46int	 fetchTimeout;
47int	 fetchRestartCalls = 1;
48int	 fetchDebug;
49
50
51/*** Local data **************************************************************/
52
53/*
54 * Error messages for parser errors
55 */
56#define URL_MALFORMED		1
57#define URL_BAD_SCHEME		2
58#define URL_BAD_PORT		3
59static struct fetcherr _url_errlist[] = {
60    { URL_MALFORMED,	FETCH_URL,	"Malformed URL" },
61    { URL_BAD_SCHEME,	FETCH_URL,	"Invalid URL scheme" },
62    { URL_BAD_PORT,	FETCH_URL,	"Invalid server port" },
63    { -1,		FETCH_UNKNOWN,	"Unknown parser error" }
64};
65
66
67/*** Public API **************************************************************/
68
69/*
70 * Select the appropriate protocol for the URL scheme, and return a
71 * read-only stream connected to the document referenced by the URL.
72 * Also fill out the struct url_stat.
73 */
74FILE *
75fetchXGet(struct url *URL, struct url_stat *us, const char *flags)
76{
77    int direct;
78
79    direct = CHECK_FLAG('d');
80    if (strcasecmp(URL->scheme, SCHEME_FILE) == 0)
81	return fetchXGetFile(URL, us, flags);
82    else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0)
83	return fetchXGetHTTP(URL, us, flags);
84    else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0) {
85	return fetchXGetFTP(URL, us, flags);
86    } else {
87	_url_seterr(URL_BAD_SCHEME);
88	return NULL;
89    }
90}
91
92/*
93 * Select the appropriate protocol for the URL scheme, and return a
94 * read-only stream connected to the document referenced by the URL.
95 */
96FILE *
97fetchGet(struct url *URL, const char *flags)
98{
99    return fetchXGet(URL, NULL, flags);
100}
101
102/*
103 * Select the appropriate protocol for the URL scheme, and return a
104 * write-only stream connected to the document referenced by the URL.
105 */
106FILE *
107fetchPut(struct url *URL, const char *flags)
108{
109    int direct;
110
111    direct = CHECK_FLAG('d');
112    if (strcasecmp(URL->scheme, SCHEME_FILE) == 0)
113	return fetchPutFile(URL, flags);
114    else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0)
115	return fetchPutHTTP(URL, flags);
116    else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0) {
117	return fetchPutFTP(URL, flags);
118    } else {
119	_url_seterr(URL_BAD_SCHEME);
120	return NULL;
121    }
122}
123
124/*
125 * Select the appropriate protocol for the URL scheme, and return the
126 * size of the document referenced by the URL if it exists.
127 */
128int
129fetchStat(struct url *URL, struct url_stat *us, const char *flags)
130{
131    int direct;
132
133    direct = CHECK_FLAG('d');
134    if (strcasecmp(URL->scheme, SCHEME_FILE) == 0)
135	return fetchStatFile(URL, us, flags);
136    else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0)
137	return fetchStatHTTP(URL, us, flags);
138    else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0) {
139	return fetchStatFTP(URL, us, flags);
140    } else {
141	_url_seterr(URL_BAD_SCHEME);
142	return -1;
143    }
144}
145
146/*
147 * Select the appropriate protocol for the URL scheme, and return a
148 * list of files in the directory pointed to by the URL.
149 */
150struct url_ent *
151fetchList(struct url *URL, const char *flags)
152{
153    int direct;
154
155    direct = CHECK_FLAG('d');
156    if (strcasecmp(URL->scheme, SCHEME_FILE) == 0)
157	return fetchListFile(URL, flags);
158    else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0)
159	return fetchListHTTP(URL, flags);
160    else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0) {
161	return fetchListFTP(URL, flags);
162    } else {
163	_url_seterr(URL_BAD_SCHEME);
164	return NULL;
165    }
166}
167
168/*
169 * Attempt to parse the given URL; if successful, call fetchXGet().
170 */
171FILE *
172fetchXGetURL(const char *URL, struct url_stat *us, const char *flags)
173{
174    struct url *u;
175    FILE *f;
176
177    if ((u = fetchParseURL(URL)) == NULL)
178	return NULL;
179
180    f = fetchXGet(u, us, flags);
181
182    fetchFreeURL(u);
183    return f;
184}
185
186/*
187 * Attempt to parse the given URL; if successful, call fetchGet().
188 */
189FILE *
190fetchGetURL(const char *URL, const char *flags)
191{
192    return fetchXGetURL(URL, NULL, flags);
193}
194
195/*
196 * Attempt to parse the given URL; if successful, call fetchPut().
197 */
198FILE *
199fetchPutURL(const char *URL, const char *flags)
200{
201    struct url *u;
202    FILE *f;
203
204    if ((u = fetchParseURL(URL)) == NULL)
205	return NULL;
206
207    f = fetchPut(u, flags);
208
209    fetchFreeURL(u);
210    return f;
211}
212
213/*
214 * Attempt to parse the given URL; if successful, call fetchStat().
215 */
216int
217fetchStatURL(const char *URL, struct url_stat *us, const char *flags)
218{
219    struct url *u;
220    int s;
221
222    if ((u = fetchParseURL(URL)) == NULL)
223	return -1;
224
225    s = fetchStat(u, us, flags);
226
227    fetchFreeURL(u);
228    return s;
229}
230
231/*
232 * Attempt to parse the given URL; if successful, call fetchList().
233 */
234struct url_ent *
235fetchListURL(const char *URL, const char *flags)
236{
237    struct url *u;
238    struct url_ent *ue;
239
240    if ((u = fetchParseURL(URL)) == NULL)
241	return NULL;
242
243    ue = fetchList(u, flags);
244
245    fetchFreeURL(u);
246    return ue;
247}
248
249/*
250 * Make a URL
251 */
252struct url *
253fetchMakeURL(const char *scheme, const char *host, int port, const char *doc,
254    const char *user, const char *pwd)
255{
256    struct url *u;
257
258    if (!scheme || (!host && !doc)) {
259	_url_seterr(URL_MALFORMED);
260	return NULL;
261    }
262
263    if (port < 0 || port > 65535) {
264	_url_seterr(URL_BAD_PORT);
265	return NULL;
266    }
267
268    /* allocate struct url */
269    if ((u = calloc(1, sizeof *u)) == NULL) {
270	_fetch_syserr();
271	return NULL;
272    }
273
274    if ((u->doc = strdup(doc ? doc : "/")) == NULL) {
275	_fetch_syserr();
276	free(u);
277	return NULL;
278    }
279
280#define seturl(x) snprintf(u->x, sizeof u->x, "%s", x)
281    seturl(scheme);
282    seturl(host);
283    seturl(user);
284    seturl(pwd);
285#undef seturl
286    u->port = port;
287
288    return u;
289}
290
291/*
292 * Split an URL into components. URL syntax is:
293 * [method:/][/[user[:pwd]@]host[:port]/][document]
294 * This almost, but not quite, RFC1738 URL syntax.
295 */
296struct url *
297fetchParseURL(const char *URL)
298{
299    char *doc;
300    const char *p, *q;
301    struct url *u;
302    int i;
303
304    /* allocate struct url */
305    if ((u = calloc(1, sizeof *u)) == NULL) {
306	_fetch_syserr();
307	return NULL;
308    }
309
310    /* scheme name */
311    if ((p = strstr(URL, ":/"))) {
312	snprintf(u->scheme, URL_SCHEMELEN+1, "%.*s", (int)(p - URL), URL);
313	URL = ++p;
314	/*
315	 * Only one slash: no host, leave slash as part of document
316	 * Two slashes: host follows, strip slashes
317	 */
318	if (URL[1] == '/')
319	    URL = (p += 2);
320    } else {
321	p = URL;
322    }
323    if (!*URL || *URL == '/' || *URL == '.' ||
324	(u->scheme[0] == '\0' &&
325    	    strchr(URL, '/') == NULL && strchr(URL, ':') == NULL))
326	goto nohost;
327
328    p = strpbrk(URL, "/@");
329    if (p && *p == '@') {
330	/* username */
331	for (q = URL, i = 0; (*q != ':') && (*q != '@'); q++)
332	    if (i < URL_USERLEN)
333		u->user[i++] = *q;
334
335	/* password */
336	if (*q == ':')
337	    for (q++, i = 0; (*q != ':') && (*q != '@'); q++)
338		if (i < URL_PWDLEN)
339		    u->pwd[i++] = *q;
340
341	p++;
342    } else {
343	p = URL;
344    }
345
346    /* hostname */
347#ifdef INET6
348    if (*p == '[' && (q = strchr(p + 1, ']')) != NULL &&
349	(*++q == '\0' || *q == '/' || *q == ':')) {
350	if ((i = q - p - 2) > MAXHOSTNAMELEN)
351	    i = MAXHOSTNAMELEN;
352	strncpy(u->host, ++p, i);
353	p = q;
354    } else
355#endif
356	for (i = 0; *p && (*p != '/') && (*p != ':'); p++)
357	    if (i < MAXHOSTNAMELEN)
358		u->host[i++] = *p;
359
360    /* port */
361    if (*p == ':') {
362	for (q = ++p; *q && (*q != '/'); q++)
363	    if (isdigit(*q))
364		u->port = u->port * 10 + (*q - '0');
365	    else {
366		/* invalid port */
367		_url_seterr(URL_BAD_PORT);
368		goto ouch;
369	    }
370	while (*p && (*p != '/'))
371	    p++;
372    }
373
374nohost:
375    /* document */
376    if (!*p)
377	p = "/";
378
379    if (strcasecmp(u->scheme, SCHEME_HTTP) == 0 ||
380	strcasecmp(u->scheme, SCHEME_HTTPS) == 0) {
381	const char hexnums[] = "0123456789abcdef";
382
383	/* percent-escape whitespace. */
384	if ((doc = malloc(strlen(p) * 3 + 1)) == NULL) {
385	    _fetch_syserr();
386	    goto ouch;
387	}
388	u->doc = doc;
389	while (*p != '\0') {
390	    if (!isspace(*p)) {
391		*doc++ = *p++;
392            } else {
393		*doc++ = '%';
394		*doc++ = hexnums[((unsigned int)*p) >> 4];
395		*doc++ = hexnums[((unsigned int)*p) & 0xf];
396		p++;
397            }
398	}
399	*doc = '\0';
400    } else if ((u->doc = strdup(p)) == NULL) {
401	_fetch_syserr();
402	goto ouch;
403    }
404
405    DEBUG(fprintf(stderr,
406		  "scheme:   [%s]\n"
407		  "user:     [%s]\n"
408		  "password: [%s]\n"
409		  "host:     [%s]\n"
410		  "port:     [%d]\n"
411		  "document: [%s]\n",
412		  u->scheme, u->user, u->pwd,
413		  u->host, u->port, u->doc));
414
415    return u;
416
417ouch:
418    free(u);
419    return NULL;
420}
421
422/*
423 * Free a URL
424 */
425void
426fetchFreeURL(struct url *u)
427{
428    free(u->doc);
429    free(u);
430}
431