fetch.c revision 87560
181634Sbrian/*- 281634Sbrian * Copyright (c) 1998 Dag-Erling Co�dan Sm�rgrav 381634Sbrian * All rights reserved. 481634Sbrian * 581634Sbrian * Redistribution and use in source and binary forms, with or without 681634Sbrian * modification, are permitted provided that the following conditions 781634Sbrian * are met: 881634Sbrian * 1. Redistributions of source code must retain the above copyright 981634Sbrian * notice, this list of conditions and the following disclaimer 1081634Sbrian * in this position and unchanged. 1181634Sbrian * 2. Redistributions in binary form must reproduce the above copyright 1281634Sbrian * notice, this list of conditions and the following disclaimer in the 1381634Sbrian * documentation and/or other materials provided with the distribution. 1481634Sbrian * 3. The name of the author may not be used to endorse or promote products 1581634Sbrian * derived from this software without specific prior written permission 1681634Sbrian * 1781634Sbrian * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 1881634Sbrian * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 1981634Sbrian * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 2081634Sbrian * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 2181634Sbrian * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 2281634Sbrian * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 2381634Sbrian * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 2481634Sbrian * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 2581634Sbrian * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 2681634Sbrian * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 2781634Sbrian */ 2881634Sbrian 2981634Sbrian#include <sys/cdefs.h> 3081634Sbrian__FBSDID("$FreeBSD: head/lib/libfetch/fetch.c 87560 2001-12-09 15:05:19Z des $"); 3196732Sbrian 3296732Sbrian#include <sys/param.h> 3396732Sbrian#include <sys/errno.h> 3496732Sbrian 3581634Sbrian#include <ctype.h> 3681634Sbrian#include <stdio.h> 3781634Sbrian#include <stdlib.h> 3881634Sbrian#include <string.h> 3981634Sbrian 4081634Sbrian#include "fetch.h" 4181634Sbrian#include "common.h" 4281634Sbrian 4381634Sbrianauth_t fetchAuthMethod; 4481634Sbrianint fetchLastErrCode; 4581634Sbrianchar fetchLastErrString[MAXERRSTRING]; 4681634Sbrianint fetchTimeout; 4781634Sbrianint fetchRestartCalls = 1; 4881634Sbrianint fetchDebug; 4981634Sbrian 5081634Sbrian 5181634Sbrian/*** Local data **************************************************************/ 5281634Sbrian 5381634Sbrian/* 5481634Sbrian * Error messages for parser errors 5581634Sbrian */ 5681634Sbrian#define URL_MALFORMED 1 5781634Sbrian#define URL_BAD_SCHEME 2 5881634Sbrian#define URL_BAD_PORT 3 5981634Sbrianstatic struct fetcherr _url_errlist[] = { 6081634Sbrian { URL_MALFORMED, FETCH_URL, "Malformed URL" }, 6181634Sbrian { URL_BAD_SCHEME, FETCH_URL, "Invalid URL scheme" }, 6281634Sbrian { URL_BAD_PORT, FETCH_URL, "Invalid server port" }, 6381634Sbrian { -1, FETCH_UNKNOWN, "Unknown parser error" } 6481634Sbrian}; 6581634Sbrian 6681634Sbrian 6781634Sbrian/*** Public API **************************************************************/ 6881634Sbrian 6981634Sbrian/* 7081634Sbrian * Select the appropriate protocol for the URL scheme, and return a 7181634Sbrian * read-only stream connected to the document referenced by the URL. 7281634Sbrian * Also fill out the struct url_stat. 7381634Sbrian */ 7481634SbrianFILE * 7581634SbrianfetchXGet(struct url *URL, struct url_stat *us, const char *flags) 7681634Sbrian{ 7781634Sbrian int direct; 7881634Sbrian 7981634Sbrian direct = CHECK_FLAG('d'); 8081634Sbrian if (strcasecmp(URL->scheme, SCHEME_FILE) == 0) 8181634Sbrian return fetchXGetFile(URL, us, flags); 8281634Sbrian else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0) 8381634Sbrian return fetchXGetHTTP(URL, us, flags); 8481634Sbrian else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0) { 8581634Sbrian return fetchXGetFTP(URL, us, flags); 8681634Sbrian } else { 8781634Sbrian _url_seterr(URL_BAD_SCHEME); 8881634Sbrian return NULL; 8981634Sbrian } 9081634Sbrian} 9181634Sbrian 9281634Sbrian/* 9381634Sbrian * Select the appropriate protocol for the URL scheme, and return a 9481634Sbrian * read-only stream connected to the document referenced by the URL. 9581634Sbrian */ 9681634SbrianFILE * 9781634SbrianfetchGet(struct url *URL, const char *flags) 9881634Sbrian{ 9981634Sbrian return fetchXGet(URL, NULL, flags); 10081634Sbrian} 10181634Sbrian 10281634Sbrian/* 10381634Sbrian * Select the appropriate protocol for the URL scheme, and return a 10481634Sbrian * write-only stream connected to the document referenced by the URL. 10581634Sbrian */ 10681634SbrianFILE * 10781634SbrianfetchPut(struct url *URL, const char *flags) 10881634Sbrian{ 10981634Sbrian int direct; 11081634Sbrian 11181634Sbrian direct = CHECK_FLAG('d'); 11281634Sbrian if (strcasecmp(URL->scheme, SCHEME_FILE) == 0) 11381634Sbrian return fetchPutFile(URL, flags); 11481634Sbrian else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0) 11581634Sbrian return fetchPutHTTP(URL, flags); 11681634Sbrian else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0) { 11781634Sbrian return fetchPutFTP(URL, flags); 11881634Sbrian } else { 11981634Sbrian _url_seterr(URL_BAD_SCHEME); 12081634Sbrian return NULL; 12181634Sbrian } 12281634Sbrian} 12381634Sbrian 12481634Sbrian/* 12581634Sbrian * Select the appropriate protocol for the URL scheme, and return the 12681634Sbrian * size of the document referenced by the URL if it exists. 12781634Sbrian */ 12881634Sbrianint 12981634SbrianfetchStat(struct url *URL, struct url_stat *us, const char *flags) 13081634Sbrian{ 13181634Sbrian int direct; 13281634Sbrian 13381634Sbrian direct = CHECK_FLAG('d'); 13481634Sbrian if (strcasecmp(URL->scheme, SCHEME_FILE) == 0) 13581634Sbrian return fetchStatFile(URL, us, flags); 13681634Sbrian else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0) 13781634Sbrian return fetchStatHTTP(URL, us, flags); 13881634Sbrian else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0) { 13981634Sbrian return fetchStatFTP(URL, us, flags); 14081634Sbrian } else { 14181634Sbrian _url_seterr(URL_BAD_SCHEME); 14281634Sbrian return -1; 14381634Sbrian } 14481634Sbrian} 14581634Sbrian 14681634Sbrian/* 14781634Sbrian * Select the appropriate protocol for the URL scheme, and return a 14881634Sbrian * list of files in the directory pointed to by the URL. 14981634Sbrian */ 15081634Sbrianstruct url_ent * 15181634SbrianfetchList(struct url *URL, const char *flags) 15281634Sbrian{ 15381634Sbrian int direct; 15481634Sbrian 15581634Sbrian direct = CHECK_FLAG('d'); 15681634Sbrian if (strcasecmp(URL->scheme, SCHEME_FILE) == 0) 15781634Sbrian return fetchListFile(URL, flags); 15881634Sbrian else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0) 15981634Sbrian return fetchListHTTP(URL, flags); 16081634Sbrian else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0) { 16181634Sbrian return fetchListFTP(URL, flags); 16281634Sbrian } else { 16381634Sbrian _url_seterr(URL_BAD_SCHEME); 16481634Sbrian return NULL; 16581634Sbrian } 166102558Sbrian} 167102558Sbrian 168102558Sbrian/* 169102558Sbrian * Attempt to parse the given URL; if successful, call fetchXGet(). 170102558Sbrian */ 171102558SbrianFILE * 17281634SbrianfetchXGetURL(const char *URL, struct url_stat *us, const char *flags) 17381634Sbrian{ 17481634Sbrian struct url *u; 17581634Sbrian FILE *f; 17681634Sbrian 17781634Sbrian if ((u = fetchParseURL(URL)) == NULL) 178102558Sbrian return NULL; 17981634Sbrian 18081634Sbrian f = fetchXGet(u, us, flags); 18181634Sbrian 18281634Sbrian fetchFreeURL(u); 18381634Sbrian return f; 18481634Sbrian} 18581634Sbrian 18681634Sbrian/* 18781634Sbrian * Attempt to parse the given URL; if successful, call fetchGet(). 18881634Sbrian */ 18981739SbrianFILE * 19081739SbrianfetchGetURL(const char *URL, const char *flags) 19181739Sbrian{ 19281739Sbrian return fetchXGetURL(URL, NULL, flags); 19381739Sbrian} 19481739Sbrian 19581634Sbrian/* 19681634Sbrian * Attempt to parse the given URL; if successful, call fetchPut(). 19781634Sbrian */ 19881634SbrianFILE * 19981634SbrianfetchPutURL(const char *URL, const char *flags) 20081634Sbrian{ 20181634Sbrian struct url *u; 20281634Sbrian FILE *f; 20381634Sbrian 20481634Sbrian if ((u = fetchParseURL(URL)) == NULL) 20581634Sbrian return NULL; 20681634Sbrian 20781634Sbrian f = fetchPut(u, flags); 20881634Sbrian 20981634Sbrian fetchFreeURL(u); 21081634Sbrian return f; 21181634Sbrian} 21281634Sbrian 21381634Sbrian/* 21481634Sbrian * Attempt to parse the given URL; if successful, call fetchStat(). 21581634Sbrian */ 21681634Sbrianint 21781634SbrianfetchStatURL(const char *URL, struct url_stat *us, const char *flags) 21881634Sbrian{ 21981634Sbrian struct url *u; 22081634Sbrian int s; 22181634Sbrian 22281634Sbrian if ((u = fetchParseURL(URL)) == NULL) 22381634Sbrian return -1; 22481634Sbrian 22581634Sbrian s = fetchStat(u, us, flags); 22681634Sbrian 22781634Sbrian fetchFreeURL(u); 22881634Sbrian return s; 22981634Sbrian} 23081634Sbrian 23181634Sbrian/* 23281634Sbrian * Attempt to parse the given URL; if successful, call fetchList(). 23381634Sbrian */ 23481634Sbrianstruct url_ent * 23581634SbrianfetchListURL(const char *URL, const char *flags) 23681634Sbrian{ 23781634Sbrian struct url *u; 23881634Sbrian struct url_ent *ue; 23981634Sbrian 24081634Sbrian if ((u = fetchParseURL(URL)) == NULL) 24181634Sbrian return NULL; 24281634Sbrian 24381634Sbrian ue = fetchList(u, flags); 24481634Sbrian 24581634Sbrian fetchFreeURL(u); 24681634Sbrian return ue; 24781634Sbrian} 24881634Sbrian 24981634Sbrian/* 25081634Sbrian * Make a URL 25181634Sbrian */ 25281634Sbrianstruct url * 25381634SbrianfetchMakeURL(const char *scheme, const char *host, int port, const char *doc, 25481634Sbrian const char *user, const char *pwd) 25581634Sbrian{ 25681634Sbrian struct url *u; 25781634Sbrian 25881634Sbrian if (!scheme || (!host && !doc)) { 25981634Sbrian _url_seterr(URL_MALFORMED); 26081634Sbrian return NULL; 26181634Sbrian } 26281634Sbrian 26381634Sbrian if (port < 0 || port > 65535) { 26481634Sbrian _url_seterr(URL_BAD_PORT); 26581634Sbrian return NULL; 26681634Sbrian } 26781634Sbrian 26881634Sbrian /* allocate struct url */ 26981634Sbrian if ((u = calloc(1, sizeof *u)) == NULL) { 27081634Sbrian _fetch_syserr(); 27181634Sbrian return NULL; 27281634Sbrian } 27381634Sbrian 27481634Sbrian if ((u->doc = strdup(doc ? doc : "/")) == NULL) { 27581634Sbrian _fetch_syserr(); 27681634Sbrian free(u); 27781634Sbrian return NULL; 27881634Sbrian } 27981634Sbrian 28081634Sbrian#define seturl(x) snprintf(u->x, sizeof u->x, "%s", x) 28181634Sbrian seturl(scheme); 28281634Sbrian seturl(host); 28381634Sbrian seturl(user); 28481634Sbrian seturl(pwd); 28581634Sbrian#undef seturl 28681634Sbrian u->port = port; 28781634Sbrian 28881634Sbrian return u; 28981634Sbrian} 29081634Sbrian 29181634Sbrian/* 29281634Sbrian * Split an URL into components. URL syntax is: 29381634Sbrian * [method:/][/[user[:pwd]@]host[:port]/][document] 29481634Sbrian * This almost, but not quite, RFC1738 URL syntax. 29581634Sbrian */ 29681634Sbrianstruct url * 29781634SbrianfetchParseURL(const char *URL) 29881634Sbrian{ 29981634Sbrian char *doc; 30081634Sbrian const char *p, *q; 30181634Sbrian struct url *u; 30281634Sbrian int i; 30381634Sbrian 30481634Sbrian /* allocate struct url */ 30581634Sbrian if ((u = calloc(1, sizeof *u)) == NULL) { 30681634Sbrian _fetch_syserr(); 30781634Sbrian return NULL; 30881634Sbrian } 30981634Sbrian 31081634Sbrian /* scheme name */ 31181634Sbrian if ((p = strstr(URL, ":/"))) { 31281634Sbrian snprintf(u->scheme, URL_SCHEMELEN+1, "%.*s", (int)(p - URL), URL); 31381634Sbrian URL = ++p; 31481634Sbrian /* 31581634Sbrian * Only one slash: no host, leave slash as part of document 31681634Sbrian * Two slashes: host follows, strip slashes 31781634Sbrian */ 31881634Sbrian if (URL[1] == '/') 31981634Sbrian URL = (p += 2); 32081634Sbrian } else { 32181634Sbrian p = URL; 32281634Sbrian } 32381634Sbrian if (!*URL || *URL == '/' || *URL == '.' || 32481634Sbrian (u->scheme[0] == '\0' && 32581634Sbrian strchr(URL, '/') == NULL && strchr(URL, ':') == NULL)) 32681634Sbrian goto nohost; 32781634Sbrian 32881634Sbrian p = strpbrk(URL, "/@"); 32981634Sbrian if (p && *p == '@') { 33081634Sbrian /* username */ 33181634Sbrian for (q = URL, i = 0; (*q != ':') && (*q != '@'); q++) 33281634Sbrian if (i < URL_USERLEN) 33381634Sbrian u->user[i++] = *q; 33481634Sbrian 33581634Sbrian /* password */ 33681634Sbrian if (*q == ':') 33781634Sbrian for (q++, i = 0; (*q != ':') && (*q != '@'); q++) 33881634Sbrian if (i < URL_PWDLEN) 33981634Sbrian u->pwd[i++] = *q; 34081634Sbrian 34181634Sbrian p++; 34281634Sbrian } else { 34381634Sbrian p = URL; 34481634Sbrian } 34581634Sbrian 34681634Sbrian /* hostname */ 34781634Sbrian#ifdef INET6 34881634Sbrian if (*p == '[' && (q = strchr(p + 1, ']')) != NULL && 34981634Sbrian (*++q == '\0' || *q == '/' || *q == ':')) { 35081634Sbrian if ((i = q - p - 2) > MAXHOSTNAMELEN) 35181634Sbrian i = MAXHOSTNAMELEN; 35281634Sbrian strncpy(u->host, ++p, i); 35381634Sbrian p = q; 35481634Sbrian } else 35581634Sbrian#endif 35681634Sbrian for (i = 0; *p && (*p != '/') && (*p != ':'); p++) 35781634Sbrian if (i < MAXHOSTNAMELEN) 35881634Sbrian u->host[i++] = *p; 35981634Sbrian 36081634Sbrian /* port */ 36181634Sbrian if (*p == ':') { 36281634Sbrian for (q = ++p; *q && (*q != '/'); q++) 36381634Sbrian if (isdigit(*q)) 36481634Sbrian u->port = u->port * 10 + (*q - '0'); 36581634Sbrian else { 36681634Sbrian /* invalid port */ 36781634Sbrian _url_seterr(URL_BAD_PORT); 36881634Sbrian goto ouch; 36981634Sbrian } 37081634Sbrian while (*p && (*p != '/')) 37181634Sbrian p++; 37281634Sbrian } 37381634Sbrian 37481634Sbriannohost: 37581634Sbrian /* document */ 37681634Sbrian if (!*p) 37781634Sbrian p = "/"; 37881634Sbrian 37981634Sbrian if (strcasecmp(u->scheme, SCHEME_HTTP) == 0 || 38081634Sbrian strcasecmp(u->scheme, SCHEME_HTTPS) == 0) { 38181634Sbrian const char hexnums[] = "0123456789abcdef"; 38281634Sbrian 38381634Sbrian /* percent-escape whitespace. */ 38481634Sbrian if ((doc = malloc(strlen(p) * 3 + 1)) == NULL) { 38581634Sbrian _fetch_syserr(); 38681634Sbrian goto ouch; 38781634Sbrian } 38881634Sbrian u->doc = doc; 389102558Sbrian while (*p != '\0') { 39081634Sbrian if (!isspace(*p)) { 39181634Sbrian *doc++ = *p++; 392102558Sbrian } else { 393102558Sbrian *doc++ = '%'; 394102558Sbrian *doc++ = hexnums[((unsigned int)*p) >> 4]; 395102558Sbrian *doc++ = hexnums[((unsigned int)*p) & 0xf]; 39681634Sbrian p++; 39781634Sbrian } 39881634Sbrian } 39981634Sbrian *doc = '\0'; 40081634Sbrian } else if ((u->doc = strdup(p)) == NULL) { 40181634Sbrian _fetch_syserr(); 40281634Sbrian goto ouch; 40381634Sbrian } 40481634Sbrian 40581634Sbrian DEBUG(fprintf(stderr, 40681634Sbrian "scheme: [\033[1m%s\033[m]\n" 40781634Sbrian "user: [\033[1m%s\033[m]\n" 40881634Sbrian "password: [\033[1m%s\033[m]\n" 40981634Sbrian "host: [\033[1m%s\033[m]\n" 41081634Sbrian "port: [\033[1m%d\033[m]\n" 41181634Sbrian "document: [\033[1m%s\033[m]\n", 41281634Sbrian u->scheme, u->user, u->pwd, 41381634Sbrian u->host, u->port, u->doc)); 41481634Sbrian 41581634Sbrian return u; 41681634Sbrian 41781634Sbrianouch: 41881634Sbrian free(u); 41981634Sbrian return NULL; 42098243Sbrian} 42181634Sbrian 42281634Sbrian/* 42381634Sbrian * Free a URL 42481634Sbrian */ 42581634Sbrianvoid 42681634SbrianfetchFreeURL(struct url *u) 42781634Sbrian{ 42881634Sbrian free(u->doc); 42981634Sbrian free(u); 43081634Sbrian} 43181634Sbrian