fetch.c revision 77238
1/*-
2 * Copyright (c) 1998 Dag-Erling Co�dan Sm�rgrav
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer
10 *    in this position and unchanged.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 * 3. The name of the author may not be used to endorse or promote products
15 *    derived from this software without specific prior written permission
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 *
28 * $FreeBSD: head/lib/libfetch/fetch.c 77238 2001-05-26 19:37:15Z des $
29 */
30
31#include <sys/param.h>
32#include <sys/errno.h>
33
34#include <ctype.h>
35#include <stdio.h>
36#include <stdlib.h>
37#include <string.h>
38
39#include "fetch.h"
40#include "common.h"
41
42auth_t	 fetchAuthMethod;
43int	 fetchLastErrCode;
44char	 fetchLastErrString[MAXERRSTRING];
45int	 fetchTimeout;
46int	 fetchRestartCalls = 1;
47
48
49/*** Local data **************************************************************/
50
51/*
52 * Error messages for parser errors
53 */
54#define URL_MALFORMED		1
55#define URL_BAD_SCHEME		2
56#define URL_BAD_PORT		3
57static struct fetcherr _url_errlist[] = {
58    { URL_MALFORMED,	FETCH_URL,	"Malformed URL" },
59    { URL_BAD_SCHEME,	FETCH_URL,	"Invalid URL scheme" },
60    { URL_BAD_PORT,	FETCH_URL,	"Invalid server port" },
61    { -1,		FETCH_UNKNOWN,	"Unknown parser error" }
62};
63
64
65/*** Public API **************************************************************/
66
67/*
68 * Select the appropriate protocol for the URL scheme, and return a
69 * read-only stream connected to the document referenced by the URL.
70 * Also fill out the struct url_stat.
71 */
72FILE *
73fetchXGet(struct url *URL, struct url_stat *us, const char *flags)
74{
75    int direct;
76
77    direct = CHECK_FLAG('d');
78    if (strcasecmp(URL->scheme, SCHEME_FILE) == 0)
79	return fetchXGetFile(URL, us, flags);
80    else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0)
81	return fetchXGetHTTP(URL, us, flags);
82    else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0) {
83	return fetchXGetFTP(URL, us, flags);
84    } else {
85	_url_seterr(URL_BAD_SCHEME);
86	return NULL;
87    }
88}
89
90/*
91 * Select the appropriate protocol for the URL scheme, and return a
92 * read-only stream connected to the document referenced by the URL.
93 */
94FILE *
95fetchGet(struct url *URL, const char *flags)
96{
97    return fetchXGet(URL, NULL, flags);
98}
99
100/*
101 * Select the appropriate protocol for the URL scheme, and return a
102 * write-only stream connected to the document referenced by the URL.
103 */
104FILE *
105fetchPut(struct url *URL, const char *flags)
106{
107    int direct;
108
109    direct = CHECK_FLAG('d');
110    if (strcasecmp(URL->scheme, SCHEME_FILE) == 0)
111	return fetchPutFile(URL, flags);
112    else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0)
113	return fetchPutHTTP(URL, flags);
114    else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0) {
115	return fetchPutFTP(URL, flags);
116    } else {
117	_url_seterr(URL_BAD_SCHEME);
118	return NULL;
119    }
120}
121
122/*
123 * Select the appropriate protocol for the URL scheme, and return the
124 * size of the document referenced by the URL if it exists.
125 */
126int
127fetchStat(struct url *URL, struct url_stat *us, const char *flags)
128{
129    int direct;
130
131    direct = CHECK_FLAG('d');
132    if (strcasecmp(URL->scheme, SCHEME_FILE) == 0)
133	return fetchStatFile(URL, us, flags);
134    else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0)
135	return fetchStatHTTP(URL, us, flags);
136    else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0) {
137	return fetchStatFTP(URL, us, flags);
138    } else {
139	_url_seterr(URL_BAD_SCHEME);
140	return -1;
141    }
142}
143
144/*
145 * Select the appropriate protocol for the URL scheme, and return a
146 * list of files in the directory pointed to by the URL.
147 */
148struct url_ent *
149fetchList(struct url *URL, const char *flags)
150{
151    int direct;
152
153    direct = CHECK_FLAG('d');
154    if (strcasecmp(URL->scheme, SCHEME_FILE) == 0)
155	return fetchListFile(URL, flags);
156    else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0)
157	return fetchListHTTP(URL, flags);
158    else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0) {
159	return fetchListFTP(URL, flags);
160    } else {
161	_url_seterr(URL_BAD_SCHEME);
162	return NULL;
163    }
164}
165
166/*
167 * Attempt to parse the given URL; if successful, call fetchXGet().
168 */
169FILE *
170fetchXGetURL(const char *URL, struct url_stat *us, const char *flags)
171{
172    struct url *u;
173    FILE *f;
174
175    if ((u = fetchParseURL(URL)) == NULL)
176	return NULL;
177
178    f = fetchXGet(u, us, flags);
179
180    fetchFreeURL(u);
181    return f;
182}
183
184/*
185 * Attempt to parse the given URL; if successful, call fetchGet().
186 */
187FILE *
188fetchGetURL(const char *URL, const char *flags)
189{
190    return fetchXGetURL(URL, NULL, flags);
191}
192
193/*
194 * Attempt to parse the given URL; if successful, call fetchPut().
195 */
196FILE *
197fetchPutURL(const char *URL, const char *flags)
198{
199    struct url *u;
200    FILE *f;
201
202    if ((u = fetchParseURL(URL)) == NULL)
203	return NULL;
204
205    f = fetchPut(u, flags);
206
207    fetchFreeURL(u);
208    return f;
209}
210
211/*
212 * Attempt to parse the given URL; if successful, call fetchStat().
213 */
214int
215fetchStatURL(const char *URL, struct url_stat *us, const char *flags)
216{
217    struct url *u;
218    int s;
219
220    if ((u = fetchParseURL(URL)) == NULL)
221	return -1;
222
223    s = fetchStat(u, us, flags);
224
225    fetchFreeURL(u);
226    return s;
227}
228
229/*
230 * Attempt to parse the given URL; if successful, call fetchList().
231 */
232struct url_ent *
233fetchListURL(const char *URL, const char *flags)
234{
235    struct url *u;
236    struct url_ent *ue;
237
238    if ((u = fetchParseURL(URL)) == NULL)
239	return NULL;
240
241    ue = fetchList(u, flags);
242
243    fetchFreeURL(u);
244    return ue;
245}
246
247/*
248 * Make a URL
249 */
250struct url *
251fetchMakeURL(const char *scheme, const char *host, int port, const char *doc,
252    const char *user, const char *pwd)
253{
254    struct url *u;
255
256    if (!scheme || (!host && !doc)) {
257	_url_seterr(URL_MALFORMED);
258	return NULL;
259    }
260
261    if (port < 0 || port > 65535) {
262	_url_seterr(URL_BAD_PORT);
263	return NULL;
264    }
265
266    /* allocate struct url */
267    if ((u = calloc(1, sizeof *u)) == NULL) {
268	_fetch_syserr();
269	return NULL;
270    }
271
272    if ((u->doc = strdup(doc ? doc : "/")) == NULL) {
273	_fetch_syserr();
274	free(u);
275	return NULL;
276    }
277
278#define seturl(x) snprintf(u->x, sizeof u->x, "%s", x)
279    seturl(scheme);
280    seturl(host);
281    seturl(user);
282    seturl(pwd);
283#undef seturl
284    u->port = port;
285
286    return u;
287}
288
289/*
290 * Split an URL into components. URL syntax is:
291 * [method:/][/[user[:pwd]@]host[:port]/][document]
292 * This almost, but not quite, RFC1738 URL syntax.
293 */
294struct url *
295fetchParseURL(const char *URL)
296{
297    char *doc;
298    const char *p, *q;
299    struct url *u;
300    int i;
301
302    /* allocate struct url */
303    if ((u = calloc(1, sizeof *u)) == NULL) {
304	_fetch_syserr();
305	return NULL;
306    }
307
308    /* scheme name */
309    if ((p = strstr(URL, ":/"))) {
310	snprintf(u->scheme, URL_SCHEMELEN+1, "%.*s", p - URL, URL);
311	URL = ++p;
312	/*
313	 * Only one slash: no host, leave slash as part of document
314	 * Two slashes: host follows, strip slashes
315	 */
316	if (URL[1] == '/')
317	    URL = (p += 2);
318    } else {
319	p = URL;
320    }
321    if (!*URL || *URL == '/')
322	goto nohost;
323
324    p = strpbrk(URL, "/@");
325    if (p && *p == '@') {
326	/* username */
327	for (q = URL, i = 0; (*q != ':') && (*q != '@'); q++)
328	    if (i < URL_USERLEN)
329		u->user[i++] = *q;
330
331	/* password */
332	if (*q == ':')
333	    for (q++, i = 0; (*q != ':') && (*q != '@'); q++)
334		if (i < URL_PWDLEN)
335		    u->pwd[i++] = *q;
336
337	p++;
338    } else p = URL;
339
340    /* hostname */
341#ifdef INET6
342    if (*p == '[' && (q = strchr(p + 1, ']')) != NULL &&
343	(*++q == '\0' || *q == '/' || *q == ':')) {
344	if ((i = q - p - 2) > MAXHOSTNAMELEN)
345	    i = MAXHOSTNAMELEN;
346	strncpy(u->host, ++p, i);
347	p = q;
348    } else
349#endif
350	for (i = 0; *p && (*p != '/') && (*p != ':'); p++)
351	    if (i < MAXHOSTNAMELEN)
352		u->host[i++] = *p;
353
354    /* port */
355    if (*p == ':') {
356	for (q = ++p; *q && (*q != '/'); q++)
357	    if (isdigit(*q))
358		u->port = u->port * 10 + (*q - '0');
359	    else {
360		/* invalid port */
361		_url_seterr(URL_BAD_PORT);
362		goto ouch;
363	    }
364	while (*p && (*p != '/'))
365	    p++;
366    }
367
368nohost:
369    /* document */
370    if (!*p)
371	p = "/";
372
373    if (strcasecmp(u->scheme, SCHEME_HTTP) == 0 ||
374	strcasecmp(u->scheme, SCHEME_HTTPS) == 0) {
375	const char hexnums[] = "0123456789abcdef";
376
377	/* percent-escape whitespace. */
378	if ((doc = malloc(strlen(p) * 3 + 1)) == NULL) {
379	    _fetch_syserr();
380	    goto ouch;
381	}
382	u->doc = doc;
383	while (*p != '\0') {
384	    if (!isspace(*p)) {
385		*doc++ = *p++;
386            } else {
387		*doc++ = '%';
388		*doc++ = hexnums[((unsigned int)*p) >> 4];
389		*doc++ = hexnums[((unsigned int)*p) & 0xf];
390		p++;
391            }
392	}
393	*doc = '\0';
394    } else if ((u->doc = strdup(p)) == NULL) {
395	_fetch_syserr();
396	goto ouch;
397    }
398
399    DEBUG(fprintf(stderr,
400		  "scheme:   [\033[1m%s\033[m]\n"
401		  "user:     [\033[1m%s\033[m]\n"
402		  "password: [\033[1m%s\033[m]\n"
403		  "host:     [\033[1m%s\033[m]\n"
404		  "port:     [\033[1m%d\033[m]\n"
405		  "document: [\033[1m%s\033[m]\n",
406		  u->scheme, u->user, u->pwd,
407		  u->host, u->port, u->doc));
408
409    return u;
410
411ouch:
412    free(u);
413    return NULL;
414}
415
416/*
417 * Free a URL
418 */
419void
420fetchFreeURL(struct url *u)
421{
422    free(u->doc);
423    free(u);
424}
425