fetch.c revision 1.1.1.8
1/*	$NetBSD: fetch.c,v 1.1.1.8 2009/08/21 15:12:27 joerg Exp $	*/
2/*-
3 * Copyright (c) 1998-2004 Dag-Erling Co�dan Sm�rgrav
4 * Copyright (c) 2008 Joerg Sonnenberger <joerg@NetBSD.org>
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer
12 *    in this position and unchanged.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 * 3. The name of the author may not be used to endorse or promote products
17 *    derived from this software without specific prior written permission
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
20 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
21 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
22 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
23 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
24 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
28 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 *
30 * $FreeBSD: fetch.c,v 1.41 2007/12/19 00:26:36 des Exp $
31 */
32
33#if HAVE_CONFIG_H
34#include "config.h"
35#endif
36#ifndef NETBSD
37#include <nbcompat.h>
38#endif
39
40#include <ctype.h>
41#include <errno.h>
42#include <stdio.h>
43#include <stdlib.h>
44#include <string.h>
45
46#include "fetch.h"
47#include "common.h"
48
49auth_t	 fetchAuthMethod;
50int	 fetchLastErrCode;
51char	 fetchLastErrString[MAXERRSTRING];
52int	 fetchTimeout;
53volatile int	 fetchRestartCalls = 1;
54int	 fetchDebug;
55
56
57/*** Local data **************************************************************/
58
59/*
60 * Error messages for parser errors
61 */
62#define URL_MALFORMED		1
63#define URL_BAD_SCHEME		2
64#define URL_BAD_PORT		3
65static struct fetcherr url_errlist[] = {
66	{ URL_MALFORMED,	FETCH_URL,	"Malformed URL" },
67	{ URL_BAD_SCHEME,	FETCH_URL,	"Invalid URL scheme" },
68	{ URL_BAD_PORT,		FETCH_URL,	"Invalid server port" },
69	{ -1,			FETCH_UNKNOWN,	"Unknown parser error" }
70};
71
72
73/*** Public API **************************************************************/
74
75/*
76 * Select the appropriate protocol for the URL scheme, and return a
77 * read-only stream connected to the document referenced by the URL.
78 * Also fill out the struct url_stat.
79 */
80fetchIO *
81fetchXGet(struct url *URL, struct url_stat *us, const char *flags)
82{
83
84	if (us != NULL) {
85		us->size = -1;
86		us->atime = us->mtime = 0;
87	}
88	if (strcasecmp(URL->scheme, SCHEME_FILE) == 0)
89		return (fetchXGetFile(URL, us, flags));
90	else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0)
91		return (fetchXGetFTP(URL, us, flags));
92	else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0)
93		return (fetchXGetHTTP(URL, us, flags));
94	else if (strcasecmp(URL->scheme, SCHEME_HTTPS) == 0)
95		return (fetchXGetHTTP(URL, us, flags));
96	url_seterr(URL_BAD_SCHEME);
97	return (NULL);
98}
99
100/*
101 * Select the appropriate protocol for the URL scheme, and return a
102 * read-only stream connected to the document referenced by the URL.
103 */
104fetchIO *
105fetchGet(struct url *URL, const char *flags)
106{
107	return (fetchXGet(URL, NULL, flags));
108}
109
110/*
111 * Select the appropriate protocol for the URL scheme, and return a
112 * write-only stream connected to the document referenced by the URL.
113 */
114fetchIO *
115fetchPut(struct url *URL, const char *flags)
116{
117
118	if (strcasecmp(URL->scheme, SCHEME_FILE) == 0)
119		return (fetchPutFile(URL, flags));
120	else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0)
121		return (fetchPutFTP(URL, flags));
122	else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0)
123		return (fetchPutHTTP(URL, flags));
124	else if (strcasecmp(URL->scheme, SCHEME_HTTPS) == 0)
125		return (fetchPutHTTP(URL, flags));
126	url_seterr(URL_BAD_SCHEME);
127	return (NULL);
128}
129
130/*
131 * Select the appropriate protocol for the URL scheme, and return the
132 * size of the document referenced by the URL if it exists.
133 */
134int
135fetchStat(struct url *URL, struct url_stat *us, const char *flags)
136{
137
138	if (us != NULL) {
139		us->size = -1;
140		us->atime = us->mtime = 0;
141	}
142	if (strcasecmp(URL->scheme, SCHEME_FILE) == 0)
143		return (fetchStatFile(URL, us, flags));
144	else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0)
145		return (fetchStatFTP(URL, us, flags));
146	else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0)
147		return (fetchStatHTTP(URL, us, flags));
148	else if (strcasecmp(URL->scheme, SCHEME_HTTPS) == 0)
149		return (fetchStatHTTP(URL, us, flags));
150	url_seterr(URL_BAD_SCHEME);
151	return (-1);
152}
153
154/*
155 * Select the appropriate protocol for the URL scheme, and return a
156 * list of files in the directory pointed to by the URL.
157 */
158int
159fetchList(struct url_list *ue, struct url *URL, const char *pattern,
160    const char *flags)
161{
162
163	if (strcasecmp(URL->scheme, SCHEME_FILE) == 0)
164		return (fetchListFile(ue, URL, pattern, flags));
165	else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0)
166		return (fetchListFTP(ue, URL, pattern, flags));
167	else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0)
168		return (fetchListHTTP(ue, URL, pattern, flags));
169	else if (strcasecmp(URL->scheme, SCHEME_HTTPS) == 0)
170		return (fetchListHTTP(ue, URL, pattern, flags));
171	url_seterr(URL_BAD_SCHEME);
172	return -1;
173}
174
175/*
176 * Attempt to parse the given URL; if successful, call fetchXGet().
177 */
178fetchIO *
179fetchXGetURL(const char *URL, struct url_stat *us, const char *flags)
180{
181	struct url *u;
182	fetchIO *f;
183
184	if ((u = fetchParseURL(URL)) == NULL)
185		return (NULL);
186
187	f = fetchXGet(u, us, flags);
188
189	fetchFreeURL(u);
190	return (f);
191}
192
193/*
194 * Attempt to parse the given URL; if successful, call fetchGet().
195 */
196fetchIO *
197fetchGetURL(const char *URL, const char *flags)
198{
199	return (fetchXGetURL(URL, NULL, flags));
200}
201
202/*
203 * Attempt to parse the given URL; if successful, call fetchPut().
204 */
205fetchIO *
206fetchPutURL(const char *URL, const char *flags)
207{
208	struct url *u;
209	fetchIO *f;
210
211	if ((u = fetchParseURL(URL)) == NULL)
212		return (NULL);
213
214	f = fetchPut(u, flags);
215
216	fetchFreeURL(u);
217	return (f);
218}
219
220/*
221 * Attempt to parse the given URL; if successful, call fetchStat().
222 */
223int
224fetchStatURL(const char *URL, struct url_stat *us, const char *flags)
225{
226	struct url *u;
227	int s;
228
229	if ((u = fetchParseURL(URL)) == NULL)
230		return (-1);
231
232	s = fetchStat(u, us, flags);
233
234	fetchFreeURL(u);
235	return (s);
236}
237
238/*
239 * Attempt to parse the given URL; if successful, call fetchList().
240 */
241int
242fetchListURL(struct url_list *ue, const char *URL, const char *pattern,
243    const char *flags)
244{
245	struct url *u;
246	int rv;
247
248	if ((u = fetchParseURL(URL)) == NULL)
249		return -1;
250
251	rv = fetchList(ue, u, pattern, flags);
252
253	fetchFreeURL(u);
254	return rv;
255}
256
257/*
258 * Make a URL
259 */
260struct url *
261fetchMakeURL(const char *scheme, const char *host, int port, const char *doc,
262    const char *user, const char *pwd)
263{
264	struct url *u;
265
266	if (!scheme || (!host && !doc)) {
267		url_seterr(URL_MALFORMED);
268		return (NULL);
269	}
270
271	if (port < 0 || port > 65535) {
272		url_seterr(URL_BAD_PORT);
273		return (NULL);
274	}
275
276	/* allocate struct url */
277	if ((u = calloc(1, sizeof(*u))) == NULL) {
278		fetch_syserr();
279		return (NULL);
280	}
281
282	if ((u->doc = strdup(doc ? doc : "/")) == NULL) {
283		fetch_syserr();
284		free(u);
285		return (NULL);
286	}
287
288#define seturl(x) snprintf(u->x, sizeof(u->x), "%s", x)
289	seturl(scheme);
290	seturl(host);
291	seturl(user);
292	seturl(pwd);
293#undef seturl
294	u->port = port;
295
296	return (u);
297}
298
299int
300fetch_urlpath_safe(char x)
301{
302	if ((x >= '0' && x <= '9') || (x >= 'A' && x <= 'Z') ||
303	    (x >= 'a' && x <= 'z'))
304		return 1;
305
306	switch (x) {
307	case '$':
308	case '-':
309	case '_':
310	case '.':
311	case '+':
312	case '!':
313	case '*':
314	case '\'':
315	case '(':
316	case ')':
317	case ',':
318	/* The following are allowed in segment and path components: */
319	case '?':
320	case ':':
321	case '@':
322	case '&':
323	case '=':
324	case '/':
325	case ';':
326	/* If something is already quoted... */
327	case '%':
328		return 1;
329	default:
330		return 0;
331	}
332}
333
334/*
335 * Copy an existing URL.
336 */
337struct url *
338fetchCopyURL(const struct url *src)
339{
340	struct url *dst;
341	char *doc;
342
343	/* allocate struct url */
344	if ((dst = malloc(sizeof(*dst))) == NULL) {
345		fetch_syserr();
346		return (NULL);
347	}
348	if ((doc = strdup(src->doc)) == NULL) {
349		fetch_syserr();
350		free(dst);
351		return (NULL);
352	}
353	*dst = *src;
354	dst->doc = doc;
355
356	return dst;
357}
358
359/*
360 * Split an URL into components. URL syntax is:
361 * [method:/][/[user[:pwd]@]host[:port]/][document]
362 * This almost, but not quite, RFC1738 URL syntax.
363 */
364struct url *
365fetchParseURL(const char *URL)
366{
367	const char *p, *q;
368	struct url *u;
369	size_t i, count;
370	int pre_quoted;
371
372	/* allocate struct url */
373	if ((u = calloc(1, sizeof(*u))) == NULL) {
374		fetch_syserr();
375		return (NULL);
376	}
377
378	if (*URL == '/') {
379		pre_quoted = 0;
380		strcpy(u->scheme, SCHEME_FILE);
381		p = URL;
382		goto quote_doc;
383	}
384	if (strncmp(URL, "file:", 5) == 0) {
385		pre_quoted = 1;
386		strcpy(u->scheme, SCHEME_FILE);
387		URL += 5;
388		if (URL[0] != '/' || URL[1] != '/' || URL[2] != '/') {
389			url_seterr(URL_MALFORMED);
390			goto ouch;
391		}
392		p = URL + 2;
393		goto quote_doc;
394	}
395	if (strncmp(URL, "http:", 5) == 0 ||
396	    strncmp(URL, "https:", 6) == 0) {
397		pre_quoted = 1;
398		if (URL[4] == ':') {
399			strcpy(u->scheme, SCHEME_HTTP);
400			URL += 5;
401		} else {
402			strcpy(u->scheme, SCHEME_HTTPS);
403			URL += 6;
404		}
405
406		if (URL[0] != '/' || URL[1] != '/') {
407			url_seterr(URL_MALFORMED);
408			goto ouch;
409		}
410		URL += 2;
411		p = URL;
412		goto find_user;
413	}
414	if (strncmp(URL, "ftp:", 4) == 0) {
415		pre_quoted = 1;
416		strcpy(u->scheme, SCHEME_FTP);
417		URL += 4;
418		if (URL[0] != '/' || URL[1] != '/') {
419			url_seterr(URL_MALFORMED);
420			goto ouch;
421		}
422		URL += 2;
423		p = URL;
424		goto find_user;
425	}
426
427	url_seterr(URL_BAD_SCHEME);
428	goto ouch;
429
430find_user:
431	p = strpbrk(URL, "/@");
432	if (p != NULL && *p == '@') {
433		/* username */
434		for (q = URL, i = 0; (*q != ':') && (*q != '@'); q++) {
435			if (i < URL_USERLEN)
436				u->user[i++] = *q;
437		}
438
439		/* password */
440		if (*q == ':') {
441			for (q++, i = 0; (*q != '@'); q++)
442				if (i < URL_PWDLEN)
443					u->pwd[i++] = *q;
444		}
445
446		p++;
447	} else {
448		p = URL;
449	}
450
451	/* hostname */
452#ifdef INET6
453	if (*p == '[' && (q = strchr(p + 1, ']')) != NULL &&
454	    (*++q == '\0' || *q == '/' || *q == ':')) {
455		if ((i = q - p - 2) > URL_HOSTLEN)
456			i = URL_HOSTLEN;
457		strncpy(u->host, ++p, i);
458		p = q;
459	} else
460#endif
461		for (i = 0; *p && (*p != '/') && (*p != ':'); p++)
462			if (i < URL_HOSTLEN)
463				u->host[i++] = *p;
464
465	/* port */
466	if (*p == ':') {
467		for (q = ++p; *q && (*q != '/'); q++)
468			if (isdigit((unsigned char)*q))
469				u->port = u->port * 10 + (*q - '0');
470			else {
471				/* invalid port */
472				url_seterr(URL_BAD_PORT);
473				goto ouch;
474			}
475		p = q;
476	}
477
478	/* document */
479	if (!*p)
480		p = "/";
481
482quote_doc:
483	count = 1;
484	for (i = 0; p[i] != '\0'; ++i) {
485		if ((!pre_quoted && p[i] == '%') ||
486		    !fetch_urlpath_safe(p[i]))
487			count += 3;
488		else
489			++count;
490	}
491
492	if ((u->doc = malloc(count)) == NULL) {
493		fetch_syserr();
494		goto ouch;
495	}
496	for (i = 0; *p != '\0'; ++p) {
497		if ((!pre_quoted && *p == '%') ||
498		    !fetch_urlpath_safe(*p)) {
499			u->doc[i++] = '%';
500			if ((unsigned char)*p < 160)
501				u->doc[i++] = '0' + ((unsigned char)*p) / 16;
502			else
503				u->doc[i++] = 'a' - 10 + ((unsigned char)*p) / 16;
504			if ((unsigned char)*p % 16 < 10)
505				u->doc[i++] = '0' + ((unsigned char)*p) % 16;
506			else
507				u->doc[i++] = 'a' - 10 + ((unsigned char)*p) % 16;
508		} else
509			u->doc[i++] = *p;
510	}
511	u->doc[i] = '\0';
512
513	return (u);
514
515ouch:
516	free(u);
517	return (NULL);
518}
519
520/*
521 * Free a URL
522 */
523void
524fetchFreeURL(struct url *u)
525{
526	free(u->doc);
527	free(u);
528}
529
530static char
531xdigit2digit(char digit)
532{
533	digit = tolower((unsigned char)digit);
534	if (digit >= 'a' && digit <= 'f')
535		digit = digit - 'a' + 10;
536	else
537		digit = digit - '0';
538
539	return digit;
540}
541
542/*
543 * Unquote whole URL.
544 * Skips optional parts like query or fragment identifier.
545 */
546char *
547fetchUnquotePath(struct url *url)
548{
549	char *unquoted;
550	const char *iter;
551	size_t i;
552
553	if ((unquoted = malloc(strlen(url->doc) + 1)) == NULL)
554		return NULL;
555
556	for (i = 0, iter = url->doc; *iter != '\0'; ++iter) {
557		if (*iter == '#' || *iter == '?')
558			break;
559		if (iter[0] != '%' ||
560		    !isxdigit((unsigned char)iter[1]) ||
561		    !isxdigit((unsigned char)iter[2])) {
562			unquoted[i++] = *iter;
563			continue;
564		}
565		unquoted[i++] = xdigit2digit(iter[1]) * 16 +
566		    xdigit2digit(iter[2]);
567		iter += 2;
568	}
569	unquoted[i] = '\0';
570	return unquoted;
571}
572
573
574/*
575 * Extract the file name component of a URL.
576 */
577char *
578fetchUnquoteFilename(struct url *url)
579{
580	char *unquoted, *filename;
581	const char *last_slash;
582
583	if ((unquoted = fetchUnquotePath(url)) == NULL)
584		return NULL;
585
586	if ((last_slash = strrchr(unquoted, '/')) == NULL)
587		return unquoted;
588	filename = strdup(last_slash + 1);
589	free(unquoted);
590	return filename;
591}
592
593char *
594fetchStringifyURL(const struct url *url)
595{
596	size_t total;
597	char *doc;
598
599	/* scheme :// user : pwd @ host :port doc */
600	total = strlen(url->scheme) + 3 + strlen(url->user) + 1 +
601	    strlen(url->pwd) + 1 + strlen(url->host) + 6 + strlen(url->doc) + 1;
602	if ((doc = malloc(total)) == NULL)
603		return NULL;
604	if (url->port != 0)
605		snprintf(doc, total, "%s%s%s%s%s%s%s:%d%s",
606		    url->scheme,
607		    url->scheme[0] != '\0' ? "://" : "",
608		    url->user,
609		    url->pwd[0] != '\0' ? ":" : "",
610		    url->pwd,
611		    url->user[0] != '\0' || url->pwd[0] != '\0' ? "@" : "",
612		    url->host,
613		    (int)url->port,
614		    url->doc);
615	else {
616		snprintf(doc, total, "%s%s%s%s%s%s%s%s",
617		    url->scheme,
618		    url->scheme[0] != '\0' ? "://" : "",
619		    url->user,
620		    url->pwd[0] != '\0' ? ":" : "",
621		    url->pwd,
622		    url->user[0] != '\0' || url->pwd[0] != '\0' ? "@" : "",
623		    url->host,
624		    url->doc);
625	}
626	return doc;
627}
628