fetch.c revision 90264
11541Srgrimes/*- 21541Srgrimes * Copyright (c) 1998 Dag-Erling Co�dan Sm�rgrav 31541Srgrimes * All rights reserved. 41541Srgrimes * 51541Srgrimes * Redistribution and use in source and binary forms, with or without 61541Srgrimes * modification, are permitted provided that the following conditions 71541Srgrimes * are met: 81541Srgrimes * 1. Redistributions of source code must retain the above copyright 91541Srgrimes * notice, this list of conditions and the following disclaimer 101541Srgrimes * in this position and unchanged. 111541Srgrimes * 2. Redistributions in binary form must reproduce the above copyright 121541Srgrimes * notice, this list of conditions and the following disclaimer in the 131541Srgrimes * documentation and/or other materials provided with the distribution. 141541Srgrimes * 3. The name of the author may not be used to endorse or promote products 151541Srgrimes * derived from this software without specific prior written permission 161541Srgrimes * 171541Srgrimes * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 181541Srgrimes * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 191541Srgrimes * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 201541Srgrimes * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 211541Srgrimes * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 221541Srgrimes * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 231541Srgrimes * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 241541Srgrimes * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 251541Srgrimes * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 261541Srgrimes * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 271541Srgrimes */ 281541Srgrimes 291541Srgrimes#include <sys/cdefs.h> 301541Srgrimes__FBSDID("$FreeBSD: head/lib/libfetch/fetch.c 90264 2002-02-05 21:32:16Z des $"); 311541Srgrimes 321541Srgrimes#include <sys/param.h> 3314505Shsu#include <sys/errno.h> 3450477Speter 351541Srgrimes#include <ctype.h> 361541Srgrimes#include <stdio.h> 372165Spaul#include <stdlib.h> 382165Spaul#include <string.h> 392165Spaul 4015492Sbde#include "fetch.h" 411541Srgrimes#include "common.h" 421541Srgrimes 431541Srgrimesauth_t fetchAuthMethod; 441541Srgrimesint fetchLastErrCode; 451541Srgrimeschar fetchLastErrString[MAXERRSTRING]; 461541Srgrimesint fetchTimeout; 471541Srgrimesint fetchRestartCalls = 1; 481541Srgrimesint fetchDebug; 4936079Swollman 5036079Swollman 5161837Salfred/*** Local data **************************************************************/ 5261837Salfred 531541Srgrimes/* 5436079Swollman * Error messages for parser errors 551541Srgrimes */ 561541Srgrimes#define URL_MALFORMED 1 571541Srgrimes#define URL_BAD_SCHEME 2 581541Srgrimes#define URL_BAD_PORT 3 591541Srgrimesstatic struct fetcherr _url_errlist[] = { 601541Srgrimes { URL_MALFORMED, FETCH_URL, "Malformed URL" }, 611541Srgrimes { URL_BAD_SCHEME, FETCH_URL, "Invalid URL scheme" }, 6213765Smpp { URL_BAD_PORT, FETCH_URL, "Invalid server port" }, 631541Srgrimes { -1, FETCH_UNKNOWN, "Unknown parser error" } 641541Srgrimes}; 651541Srgrimes 661541Srgrimes 671541Srgrimes/*** Public API **************************************************************/ 681541Srgrimes 691541Srgrimes/* 701541Srgrimes * Select the appropriate protocol for the URL scheme, and return a 711541Srgrimes * read-only stream connected to the document referenced by the URL. 721541Srgrimes * Also fill out the struct url_stat. 7360938Sjake */ 7460938SjakeFILE * 7560938SjakefetchXGet(struct url *URL, struct url_stat *us, const char *flags) 7614547Sdg{ 7718787Spst int direct; 7818787Spst 791541Srgrimes direct = CHECK_FLAG('d'); 801541Srgrimes if (strcasecmp(URL->scheme, SCHEME_FILE) == 0) 811541Srgrimes return fetchXGetFile(URL, us, flags); 8241087Struckman else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0) 8341087Struckman return fetchXGetHTTP(URL, us, flags); 841541Srgrimes else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0) { 8560938Sjake return fetchXGetFTP(URL, us, flags); 861541Srgrimes } else { 871541Srgrimes _url_seterr(URL_BAD_SCHEME); 881541Srgrimes return NULL; 891541Srgrimes } 901541Srgrimes} 911541Srgrimes 921541Srgrimes/* 931541Srgrimes * Select the appropriate protocol for the URL scheme, and return a 941541Srgrimes * read-only stream connected to the document referenced by the URL. 951541Srgrimes */ 961541SrgrimesFILE * 971541SrgrimesfetchGet(struct url *URL, const char *flags) 981541Srgrimes{ 991541Srgrimes return fetchXGet(URL, NULL, flags); 1001541Srgrimes} 1011541Srgrimes 1021541Srgrimes/* 1031541Srgrimes * Select the appropriate protocol for the URL scheme, and return a 1041541Srgrimes * write-only stream connected to the document referenced by the URL. 1051541Srgrimes */ 10636527SpeterFILE * 1071541SrgrimesfetchPut(struct url *URL, const char *flags) 10855943Sjasone{ 10959288Sjlemon int direct; 1101541Srgrimes 11138482Swollman direct = CHECK_FLAG('d'); 11238482Swollman if (strcasecmp(URL->scheme, SCHEME_FILE) == 0) 11351381Sgreen return fetchPutFile(URL, flags); 11438482Swollman else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0) 11536079Swollman return fetchPutHTTP(URL, flags); 11643458Sbde else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0) { 11761837Salfred return fetchPutFTP(URL, flags); 11861837Salfred } else { 11961837Salfred _url_seterr(URL_BAD_SCHEME); 12061837Salfred return NULL; 12161837Salfred } 1221541Srgrimes} 1231541Srgrimes 1241541Srgrimes/* 1251541Srgrimes * Select the appropriate protocol for the URL scheme, and return the 1261541Srgrimes * size of the document referenced by the URL if it exists. 12714547Sdg */ 12814547Sdgint 12914547SdgfetchStat(struct url *URL, struct url_stat *us, const char *flags) 13014547Sdg{ 13114547Sdg int direct; 13214547Sdg 13314547Sdg direct = CHECK_FLAG('d'); 1341541Srgrimes if (strcasecmp(URL->scheme, SCHEME_FILE) == 0) 13514547Sdg return fetchStatFile(URL, us, flags); 13614547Sdg else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0) 13714547Sdg return fetchStatHTTP(URL, us, flags); 1381541Srgrimes else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0) { 13914547Sdg return fetchStatFTP(URL, us, flags); 14014547Sdg } else { 14143196Sfenner _url_seterr(URL_BAD_SCHEME); 1421541Srgrimes return -1; 14336079Swollman } 14436079Swollman} 14536079Swollman 14636079Swollman/* 14736079Swollman * Select the appropriate protocol for the URL scheme, and return a 14836079Swollman * list of files in the directory pointed to by the URL. 14936079Swollman */ 15036079Swollmanstruct url_ent * 15136079SwollmanfetchList(struct url *URL, const char *flags) 15236079Swollman{ 15336079Swollman int direct; 15436079Swollman 15536079Swollman direct = CHECK_FLAG('d'); 15636079Swollman if (strcasecmp(URL->scheme, SCHEME_FILE) == 0) 15736079Swollman return fetchListFile(URL, flags); 15836079Swollman else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0) 15936079Swollman return fetchListHTTP(URL, flags); 16036079Swollman else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0) { 16136079Swollman return fetchListFTP(URL, flags); 16236079Swollman } else { 16336079Swollman _url_seterr(URL_BAD_SCHEME); 16436079Swollman return NULL; 16536079Swollman } 16636079Swollman} 16736079Swollman 16836079Swollman/* 16936079Swollman * Attempt to parse the given URL; if successful, call fetchXGet(). 17036079Swollman */ 17136079SwollmanFILE * 17236079SwollmanfetchXGetURL(const char *URL, struct url_stat *us, const char *flags) 17336079Swollman{ 17414547Sdg struct url *u; 1751541Srgrimes FILE *f; 1761541Srgrimes 1771541Srgrimes if ((u = fetchParseURL(URL)) == NULL) 1781541Srgrimes return NULL; 1791541Srgrimes 18036527Speter f = fetchXGet(u, us, flags); 18136527Speter 18255943Sjasone fetchFreeURL(u); 18359288Sjlemon return f; 18436527Speter} 18536527Speter 1861541Srgrimes/* 1871541Srgrimes * Attempt to parse the given URL; if successful, call fetchGet(). 1881541Srgrimes */ 1891541SrgrimesFILE * 1901541SrgrimesfetchGetURL(const char *URL, const char *flags) 1911541Srgrimes{ 1921541Srgrimes return fetchXGetURL(URL, NULL, flags); 1931541Srgrimes} 1941541Srgrimes 1951541Srgrimes/* 1961541Srgrimes * Attempt to parse the given URL; if successful, call fetchPut(). 1971541Srgrimes */ 1981541SrgrimesFILE * 1991541SrgrimesfetchPutURL(const char *URL, const char *flags) 2001541Srgrimes{ 2011541Srgrimes struct url *u; 2021541Srgrimes FILE *f; 20314547Sdg 2041541Srgrimes if ((u = fetchParseURL(URL)) == NULL) 2051541Srgrimes return NULL; 2061541Srgrimes 2073304Sphk f = fetchPut(u, flags); 2081541Srgrimes 2093304Sphk fetchFreeURL(u); 2101541Srgrimes return f; 2111541Srgrimes} 2121541Srgrimes 2131541Srgrimes/* 2141541Srgrimes * Attempt to parse the given URL; if successful, call fetchStat(). 2151541Srgrimes */ 2161541Srgrimesint 2171541SrgrimesfetchStatURL(const char *URL, struct url_stat *us, const char *flags) 2181541Srgrimes{ 2191541Srgrimes struct url *u; 2201541Srgrimes int s; 2211541Srgrimes 2221541Srgrimes if ((u = fetchParseURL(URL)) == NULL) 2231541Srgrimes return -1; 2241541Srgrimes 2251541Srgrimes s = fetchStat(u, us, flags); 2261541Srgrimes 2271541Srgrimes fetchFreeURL(u); 2281541Srgrimes return s; 2291541Srgrimes} 2301541Srgrimes 2311541Srgrimes/* 2321541Srgrimes * Attempt to parse the given URL; if successful, call fetchList(). 2331541Srgrimes */ 2341541Srgrimesstruct url_ent * 2351541SrgrimesfetchListURL(const char *URL, const char *flags) 2361541Srgrimes{ 2371541Srgrimes struct url *u; 2381541Srgrimes struct url_ent *ue; 2391541Srgrimes 2401541Srgrimes if ((u = fetchParseURL(URL)) == NULL) 2411541Srgrimes return NULL; 2421541Srgrimes 2431541Srgrimes ue = fetchList(u, flags); 2441541Srgrimes 2451541Srgrimes fetchFreeURL(u); 2461541Srgrimes return ue; 24736527Speter} 24836527Speter 24936527Speter/* 25036527Speter * Make a URL 2511541Srgrimes */ 25236527Speterstruct url * 25336527SpeterfetchMakeURL(const char *scheme, const char *host, int port, const char *doc, 25436527Speter const char *user, const char *pwd) 25536527Speter{ 2561541Srgrimes struct url *u; 25755205Speter 25831927Sbde if (!scheme || (!host && !doc)) { 25938482Swollman _url_seterr(URL_MALFORMED); 26038482Swollman return NULL; 26138482Swollman } 26238482Swollman 26338482Swollman if (port < 0 || port > 65535) { 26438482Swollman _url_seterr(URL_BAD_PORT); 26538482Swollman return NULL; 26638482Swollman } 26738482Swollman 26838482Swollman /* allocate struct url */ 26938482Swollman if ((u = calloc(1, sizeof *u)) == NULL) { 27038482Swollman _fetch_syserr(); 27138482Swollman return NULL; 27238482Swollman } 27340931Sdg 27460938Sjake if ((u->doc = strdup(doc ? doc : "/")) == NULL) { 27540931Sdg _fetch_syserr(); 27640931Sdg free(u); 27740931Sdg return NULL; 27840931Sdg } 27961837Salfred 28061837Salfred#define seturl(x) snprintf(u->x, sizeof u->x, "%s", x) 28161837Salfred seturl(scheme); 28261837Salfred seturl(host); 28361837Salfred seturl(user); 28461837Salfred seturl(pwd); 28561837Salfred#undef seturl 28661837Salfred u->port = port; 28761837Salfred 28861837Salfred return u; 28961837Salfred} 29031927Sbde 29131927Sbde/* 29231927Sbde * Split an URL into components. URL syntax is: 29361837Salfred * [method:/][/[user[:pwd]@]host[:port]/][document] 29431927Sbde * This almost, but not quite, RFC1738 URL syntax. 29531927Sbde */ 29636079Swollmanstruct url * 2972112SwollmanfetchParseURL(const char *URL) 29836079Swollman{ 29936079Swollman char *doc; 30014505Shsu const char *p, *q; 30132995Sbde struct url *u; 30215492Sbde int i; 30315492Sbde 30415492Sbde /* allocate struct url */ 30515492Sbde if ((u = calloc(1, sizeof *u)) == NULL) { 30632995Sbde _fetch_syserr(); 30732995Sbde return NULL; 30815492Sbde } 3091541Srgrimes 3101541Srgrimes /* scheme name */ 3111541Srgrimes if ((p = strstr(URL, ":/"))) { 31245311Sdt snprintf(u->scheme, URL_SCHEMELEN+1, "%.*s", (int)(p - URL), URL); 31351418Sgreen URL = ++p; 31445311Sdt /* 31551418Sgreen * Only one slash: no host, leave slash as part of document 31643512Snewton * Two slashes: host follows, strip slashes 31736735Sdfr */ 31814505Shsu if (URL[1] == '/') 31929350Speter URL = (p += 2); 32029350Speter } else { 32152984Speter p = URL; 3223304Sphk } 3233304Sphk if (!*URL || *URL == '/' || *URL == '.' || 3243304Sphk (u->scheme[0] == '\0' && 3253304Sphk strchr(URL, '/') == NULL && strchr(URL, ':') == NULL)) 32628270Swollman goto nohost; 32714505Shsu 32814505Shsu p = strpbrk(URL, "/@"); 32928270Swollman if (p && *p == '@') { 33014505Shsu /* username */ 33114505Shsu for (q = URL, i = 0; (*q != ':') && (*q != '@'); q++) 33214505Shsu if (i < URL_USERLEN) 33314505Shsu u->user[i++] = *q; 33414505Shsu 33514505Shsu /* password */ 33614505Shsu if (*q == ':') 33714505Shsu for (q++, i = 0; (*q != ':') && (*q != '@'); q++) 33819670Sbde if (i < URL_PWDLEN) 33919670Sbde u->pwd[i++] = *q; 34014505Shsu 34114505Shsu p++; 34214505Shsu } else { 34314505Shsu p = URL; 34452070Sgreen } 34552070Sgreen 34652070Sgreen /* hostname */ 34736079Swollman#ifdef INET6 34814505Shsu if (*p == '[' && (q = strchr(p + 1, ']')) != NULL && 34914505Shsu (*++q == '\0' || *q == '/' || *q == ':')) { 35014505Shsu if ((i = q - p - 2) > MAXHOSTNAMELEN) 35128270Swollman i = MAXHOSTNAMELEN; 35236079Swollman strncpy(u->host, ++p, i); 35328270Swollman p = q; 35414505Shsu } else 35514505Shsu#endif 35614505Shsu for (i = 0; *p && (*p != '/') && (*p != ':'); p++) 35728270Swollman if (i < MAXHOSTNAMELEN) 35814505Shsu u->host[i++] = *p; 35914505Shsu 36014505Shsu /* port */ 36136079Swollman if (*p == ':') { 36214505Shsu for (q = ++p; *q && (*q != '/'); q++) 36314505Shsu if (isdigit(*q)) 36438482Swollman u->port = u->port * 10 + (*q - '0'); 36514505Shsu else { 36614505Shsu /* invalid port */ 36714505Shsu _url_seterr(URL_BAD_PORT); 36814505Shsu goto ouch; 36914505Shsu } 37025201Swollman p = q; 37114505Shsu } 37218787Spst 37318787Spstnohost: 37427531Sfenner /* document */ 37551381Sgreen if (!*p) 37651381Sgreen p = "/"; 37738482Swollman 37838482Swollman if (strcasecmp(u->scheme, SCHEME_HTTP) == 0 || 37938482Swollman strcasecmp(u->scheme, SCHEME_HTTPS) == 0) { 38053541Sshin const char hexnums[] = "0123456789abcdef"; 38153541Sshin 38253541Sshin /* percent-escape whitespace. */ 38353541Sshin if ((doc = malloc(strlen(p) * 3 + 1)) == NULL) { 38453541Sshin _fetch_syserr(); 38553541Sshin goto ouch; 38629350Speter } 38729350Speter u->doc = doc; 38828270Swollman while (*p != '\0') { 38928270Swollman if (!isspace(*p)) { 39028270Swollman *doc++ = *p++; 39114505Shsu } else { 39214505Shsu *doc++ = '%'; 39328270Swollman *doc++ = hexnums[((unsigned int)*p) >> 4]; 39428270Swollman *doc++ = hexnums[((unsigned int)*p) & 0xf]; 39528270Swollman p++; 39638482Swollman } 39714505Shsu } 39836079Swollman *doc = '\0'; 39914505Shsu } else if ((u->doc = strdup(p)) == NULL) { 40031927Sbde _fetch_syserr(); 40161837Salfred goto ouch; 40261837Salfred } 40361837Salfred 40461837Salfred DEBUG(fprintf(stderr, 40561837Salfred "scheme: [%s]\n" 40661837Salfred "user: [%s]\n" 40765534Salfred "password: [%s]\n" 40861837Salfred "host: [%s]\n" 40961837Salfred "port: [%d]\n" 41055205Speter "document: [%s]\n", 4112165Spaul u->scheme, u->user, u->pwd, 41214505Shsu u->host, u->port, u->doc)); 413 414 return u; 415 416ouch: 417 free(u); 418 return NULL; 419} 420 421/* 422 * Free a URL 423 */ 424void 425fetchFreeURL(struct url *u) 426{ 427 free(u->doc); 428 free(u); 429} 430