fetch.c revision 84203
1/*-
2 * Copyright (c) 1998 Dag-Erling Co�dan Sm�rgrav
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer
10 *    in this position and unchanged.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 * 3. The name of the author may not be used to endorse or promote products
15 *    derived from this software without specific prior written permission
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29#include <sys/cdefs.h>
30__FBSDID("$FreeBSD: head/lib/libfetch/fetch.c 84203 2001-09-30 21:36:09Z dillon $");
31
32#include <sys/param.h>
33#include <sys/errno.h>
34
35#include <ctype.h>
36#include <stdio.h>
37#include <stdlib.h>
38#include <string.h>
39
40#include "fetch.h"
41#include "common.h"
42
43auth_t	 fetchAuthMethod;
44int	 fetchLastErrCode;
45char	 fetchLastErrString[MAXERRSTRING];
46int	 fetchTimeout;
47int	 fetchRestartCalls = 1;
48
49
50/*** Local data **************************************************************/
51
52/*
53 * Error messages for parser errors
54 */
55#define URL_MALFORMED		1
56#define URL_BAD_SCHEME		2
57#define URL_BAD_PORT		3
58static struct fetcherr _url_errlist[] = {
59    { URL_MALFORMED,	FETCH_URL,	"Malformed URL" },
60    { URL_BAD_SCHEME,	FETCH_URL,	"Invalid URL scheme" },
61    { URL_BAD_PORT,	FETCH_URL,	"Invalid server port" },
62    { -1,		FETCH_UNKNOWN,	"Unknown parser error" }
63};
64
65
66/*** Public API **************************************************************/
67
68/*
69 * Select the appropriate protocol for the URL scheme, and return a
70 * read-only stream connected to the document referenced by the URL.
71 * Also fill out the struct url_stat.
72 */
73FILE *
74fetchXGet(struct url *URL, struct url_stat *us, const char *flags)
75{
76    int direct;
77
78    direct = CHECK_FLAG('d');
79    if (strcasecmp(URL->scheme, SCHEME_FILE) == 0)
80	return fetchXGetFile(URL, us, flags);
81    else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0)
82	return fetchXGetHTTP(URL, us, flags);
83    else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0) {
84	return fetchXGetFTP(URL, us, flags);
85    } else {
86	_url_seterr(URL_BAD_SCHEME);
87	return NULL;
88    }
89}
90
91/*
92 * Select the appropriate protocol for the URL scheme, and return a
93 * read-only stream connected to the document referenced by the URL.
94 */
95FILE *
96fetchGet(struct url *URL, const char *flags)
97{
98    return fetchXGet(URL, NULL, flags);
99}
100
101/*
102 * Select the appropriate protocol for the URL scheme, and return a
103 * write-only stream connected to the document referenced by the URL.
104 */
105FILE *
106fetchPut(struct url *URL, const char *flags)
107{
108    int direct;
109
110    direct = CHECK_FLAG('d');
111    if (strcasecmp(URL->scheme, SCHEME_FILE) == 0)
112	return fetchPutFile(URL, flags);
113    else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0)
114	return fetchPutHTTP(URL, flags);
115    else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0) {
116	return fetchPutFTP(URL, flags);
117    } else {
118	_url_seterr(URL_BAD_SCHEME);
119	return NULL;
120    }
121}
122
123/*
124 * Select the appropriate protocol for the URL scheme, and return the
125 * size of the document referenced by the URL if it exists.
126 */
127int
128fetchStat(struct url *URL, struct url_stat *us, const char *flags)
129{
130    int direct;
131
132    direct = CHECK_FLAG('d');
133    if (strcasecmp(URL->scheme, SCHEME_FILE) == 0)
134	return fetchStatFile(URL, us, flags);
135    else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0)
136	return fetchStatHTTP(URL, us, flags);
137    else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0) {
138	return fetchStatFTP(URL, us, flags);
139    } else {
140	_url_seterr(URL_BAD_SCHEME);
141	return -1;
142    }
143}
144
145/*
146 * Select the appropriate protocol for the URL scheme, and return a
147 * list of files in the directory pointed to by the URL.
148 */
149struct url_ent *
150fetchList(struct url *URL, const char *flags)
151{
152    int direct;
153
154    direct = CHECK_FLAG('d');
155    if (strcasecmp(URL->scheme, SCHEME_FILE) == 0)
156	return fetchListFile(URL, flags);
157    else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0)
158	return fetchListHTTP(URL, flags);
159    else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0) {
160	return fetchListFTP(URL, flags);
161    } else {
162	_url_seterr(URL_BAD_SCHEME);
163	return NULL;
164    }
165}
166
167/*
168 * Attempt to parse the given URL; if successful, call fetchXGet().
169 */
170FILE *
171fetchXGetURL(const char *URL, struct url_stat *us, const char *flags)
172{
173    struct url *u;
174    FILE *f;
175
176    if ((u = fetchParseURL(URL)) == NULL)
177	return NULL;
178
179    f = fetchXGet(u, us, flags);
180
181    fetchFreeURL(u);
182    return f;
183}
184
185/*
186 * Attempt to parse the given URL; if successful, call fetchGet().
187 */
188FILE *
189fetchGetURL(const char *URL, const char *flags)
190{
191    return fetchXGetURL(URL, NULL, flags);
192}
193
194/*
195 * Attempt to parse the given URL; if successful, call fetchPut().
196 */
197FILE *
198fetchPutURL(const char *URL, const char *flags)
199{
200    struct url *u;
201    FILE *f;
202
203    if ((u = fetchParseURL(URL)) == NULL)
204	return NULL;
205
206    f = fetchPut(u, flags);
207
208    fetchFreeURL(u);
209    return f;
210}
211
212/*
213 * Attempt to parse the given URL; if successful, call fetchStat().
214 */
215int
216fetchStatURL(const char *URL, struct url_stat *us, const char *flags)
217{
218    struct url *u;
219    int s;
220
221    if ((u = fetchParseURL(URL)) == NULL)
222	return -1;
223
224    s = fetchStat(u, us, flags);
225
226    fetchFreeURL(u);
227    return s;
228}
229
230/*
231 * Attempt to parse the given URL; if successful, call fetchList().
232 */
233struct url_ent *
234fetchListURL(const char *URL, const char *flags)
235{
236    struct url *u;
237    struct url_ent *ue;
238
239    if ((u = fetchParseURL(URL)) == NULL)
240	return NULL;
241
242    ue = fetchList(u, flags);
243
244    fetchFreeURL(u);
245    return ue;
246}
247
248/*
249 * Make a URL
250 */
251struct url *
252fetchMakeURL(const char *scheme, const char *host, int port, const char *doc,
253    const char *user, const char *pwd)
254{
255    struct url *u;
256
257    if (!scheme || (!host && !doc)) {
258	_url_seterr(URL_MALFORMED);
259	return NULL;
260    }
261
262    if (port < 0 || port > 65535) {
263	_url_seterr(URL_BAD_PORT);
264	return NULL;
265    }
266
267    /* allocate struct url */
268    if ((u = calloc(1, sizeof *u)) == NULL) {
269	_fetch_syserr();
270	return NULL;
271    }
272
273    if ((u->doc = strdup(doc ? doc : "/")) == NULL) {
274	_fetch_syserr();
275	free(u);
276	return NULL;
277    }
278
279#define seturl(x) snprintf(u->x, sizeof u->x, "%s", x)
280    seturl(scheme);
281    seturl(host);
282    seturl(user);
283    seturl(pwd);
284#undef seturl
285    u->port = port;
286
287    return u;
288}
289
290/*
291 * Split an URL into components. URL syntax is:
292 * [method:/][/[user[:pwd]@]host[:port]/][document]
293 * This almost, but not quite, RFC1738 URL syntax.
294 */
295struct url *
296fetchParseURL(const char *URL)
297{
298    char *doc;
299    const char *p, *q;
300    struct url *u;
301    int i;
302
303    /* allocate struct url */
304    if ((u = calloc(1, sizeof *u)) == NULL) {
305	_fetch_syserr();
306	return NULL;
307    }
308
309    /* scheme name */
310    if ((p = strstr(URL, ":/"))) {
311	snprintf(u->scheme, URL_SCHEMELEN+1, "%.*s", (int)(p - URL), URL);
312	URL = ++p;
313	/*
314	 * Only one slash: no host, leave slash as part of document
315	 * Two slashes: host follows, strip slashes
316	 */
317	if (URL[1] == '/')
318	    URL = (p += 2);
319    } else {
320	p = URL;
321    }
322    if (!*URL || *URL == '/' || *URL == '.' ||
323	(u->scheme[0] == '\0' &&
324    	    strchr(URL, '/') == NULL && strchr(URL, ':') == NULL))
325	goto nohost;
326
327    p = strpbrk(URL, "/@");
328    if (p && *p == '@') {
329	/* username */
330	for (q = URL, i = 0; (*q != ':') && (*q != '@'); q++)
331	    if (i < URL_USERLEN)
332		u->user[i++] = *q;
333
334	/* password */
335	if (*q == ':')
336	    for (q++, i = 0; (*q != ':') && (*q != '@'); q++)
337		if (i < URL_PWDLEN)
338		    u->pwd[i++] = *q;
339
340	p++;
341    } else {
342	p = URL;
343    }
344
345    /* hostname */
346#ifdef INET6
347    if (*p == '[' && (q = strchr(p + 1, ']')) != NULL &&
348	(*++q == '\0' || *q == '/' || *q == ':')) {
349	if ((i = q - p - 2) > MAXHOSTNAMELEN)
350	    i = MAXHOSTNAMELEN;
351	strncpy(u->host, ++p, i);
352	p = q;
353    } else
354#endif
355	for (i = 0; *p && (*p != '/') && (*p != ':'); p++)
356	    if (i < MAXHOSTNAMELEN)
357		u->host[i++] = *p;
358
359    /* port */
360    if (*p == ':') {
361	for (q = ++p; *q && (*q != '/'); q++)
362	    if (isdigit(*q))
363		u->port = u->port * 10 + (*q - '0');
364	    else {
365		/* invalid port */
366		_url_seterr(URL_BAD_PORT);
367		goto ouch;
368	    }
369	while (*p && (*p != '/'))
370	    p++;
371    }
372
373nohost:
374    /* document */
375    if (!*p)
376	p = "/";
377
378    if (strcasecmp(u->scheme, SCHEME_HTTP) == 0 ||
379	strcasecmp(u->scheme, SCHEME_HTTPS) == 0) {
380	const char hexnums[] = "0123456789abcdef";
381
382	/* percent-escape whitespace. */
383	if ((doc = malloc(strlen(p) * 3 + 1)) == NULL) {
384	    _fetch_syserr();
385	    goto ouch;
386	}
387	u->doc = doc;
388	while (*p != '\0') {
389	    if (!isspace(*p)) {
390		*doc++ = *p++;
391            } else {
392		*doc++ = '%';
393		*doc++ = hexnums[((unsigned int)*p) >> 4];
394		*doc++ = hexnums[((unsigned int)*p) & 0xf];
395		p++;
396            }
397	}
398	*doc = '\0';
399    } else if ((u->doc = strdup(p)) == NULL) {
400	_fetch_syserr();
401	goto ouch;
402    }
403
404    DEBUG(fprintf(stderr,
405		  "scheme:   [\033[1m%s\033[m]\n"
406		  "user:     [\033[1m%s\033[m]\n"
407		  "password: [\033[1m%s\033[m]\n"
408		  "host:     [\033[1m%s\033[m]\n"
409		  "port:     [\033[1m%d\033[m]\n"
410		  "document: [\033[1m%s\033[m]\n",
411		  u->scheme, u->user, u->pwd,
412		  u->host, u->port, u->doc));
413
414    return u;
415
416ouch:
417    free(u);
418    return NULL;
419}
420
421/*
422 * Free a URL
423 */
424void
425fetchFreeURL(struct url *u)
426{
427    free(u->doc);
428    free(u);
429}
430