fetch.c revision 109967
137535Sdes/*- 237535Sdes * Copyright (c) 1998 Dag-Erling Co�dan Sm�rgrav 337535Sdes * All rights reserved. 437535Sdes * 537535Sdes * Redistribution and use in source and binary forms, with or without 637535Sdes * modification, are permitted provided that the following conditions 737535Sdes * are met: 837535Sdes * 1. Redistributions of source code must retain the above copyright 937535Sdes * notice, this list of conditions and the following disclaimer 1037535Sdes * in this position and unchanged. 1137535Sdes * 2. Redistributions in binary form must reproduce the above copyright 1237535Sdes * notice, this list of conditions and the following disclaimer in the 1337535Sdes * documentation and/or other materials provided with the distribution. 1437535Sdes * 3. The name of the author may not be used to endorse or promote products 1537535Sdes * derived from this software without specific prior written permission 1637535Sdes * 1737535Sdes * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 1837535Sdes * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 1937535Sdes * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 2037535Sdes * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 2137535Sdes * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 2237535Sdes * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 2337535Sdes * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 2437535Sdes * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 2537535Sdes * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 2637535Sdes * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 2737535Sdes */ 2837535Sdes 2984203Sdillon#include <sys/cdefs.h> 3084203Sdillon__FBSDID("$FreeBSD: head/lib/libfetch/fetch.c 109967 2003-01-28 08:04:40Z des $"); 3184203Sdillon 3237535Sdes#include <sys/param.h> 3340975Sdes#include <sys/errno.h> 3437535Sdes 3537535Sdes#include <ctype.h> 3637535Sdes#include <stdio.h> 3737535Sdes#include <stdlib.h> 3837535Sdes#include <string.h> 3937535Sdes 4037535Sdes#include "fetch.h" 4140975Sdes#include "common.h" 4237535Sdes 4377238Sdesauth_t fetchAuthMethod; 4460924Sdesint fetchLastErrCode; 4560924Sdeschar fetchLastErrString[MAXERRSTRING]; 4660924Sdesint fetchTimeout; 4763334Sdesint fetchRestartCalls = 1; 4887560Sdesint fetchDebug; 4941862Sdes 5041862Sdes 5140975Sdes/*** Local data **************************************************************/ 5237535Sdes 5340939Sdes/* 5440975Sdes * Error messages for parser errors 5540975Sdes */ 5640975Sdes#define URL_MALFORMED 1 5740975Sdes#define URL_BAD_SCHEME 2 5840975Sdes#define URL_BAD_PORT 3 5940975Sdesstatic struct fetcherr _url_errlist[] = { 6090267Sdes { URL_MALFORMED, FETCH_URL, "Malformed URL" }, 6190267Sdes { URL_BAD_SCHEME, FETCH_URL, "Invalid URL scheme" }, 6290267Sdes { URL_BAD_PORT, FETCH_URL, "Invalid server port" }, 6390267Sdes { -1, FETCH_UNKNOWN, "Unknown parser error" } 6440975Sdes}; 6540975Sdes 6640975Sdes 6740975Sdes/*** Public API **************************************************************/ 6840975Sdes 6940975Sdes/* 7040939Sdes * Select the appropriate protocol for the URL scheme, and return a 7140939Sdes * read-only stream connected to the document referenced by the URL. 7263340Sdes * Also fill out the struct url_stat. 7340939Sdes */ 7438394SdesFILE * 7575891SarchiefetchXGet(struct url *URL, struct url_stat *us, const char *flags) 7638394Sdes{ 7790267Sdes int direct; 7860587Sume 7990267Sdes direct = CHECK_FLAG('d'); 80109694Sdes if (us != NULL) { 81109694Sdes us->size = -1; 82109694Sdes us->atime = us->mtime = 0; 83109694Sdes } 8490267Sdes if (strcasecmp(URL->scheme, SCHEME_FILE) == 0) 8590267Sdes return (fetchXGetFile(URL, us, flags)); 8697868Sdes else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0) 8797868Sdes return (fetchXGetFTP(URL, us, flags)); 8890267Sdes else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0) 8990267Sdes return (fetchXGetHTTP(URL, us, flags)); 9097868Sdes else if (strcasecmp(URL->scheme, SCHEME_HTTPS) == 0) 9197868Sdes return (fetchXGetHTTP(URL, us, flags)); 9297868Sdes _url_seterr(URL_BAD_SCHEME); 9397868Sdes return (NULL); 9438394Sdes} 9538394Sdes 9640939Sdes/* 9740939Sdes * Select the appropriate protocol for the URL scheme, and return a 9863340Sdes * read-only stream connected to the document referenced by the URL. 9963340Sdes */ 10063340SdesFILE * 10175891SarchiefetchGet(struct url *URL, const char *flags) 10263340Sdes{ 10390267Sdes return (fetchXGet(URL, NULL, flags)); 10463340Sdes} 10563340Sdes 10663340Sdes/* 10763340Sdes * Select the appropriate protocol for the URL scheme, and return a 10840939Sdes * write-only stream connected to the document referenced by the URL. 10940939Sdes */ 11038394SdesFILE * 11175891SarchiefetchPut(struct url *URL, const char *flags) 11238394Sdes{ 11390267Sdes int direct; 11460587Sume 11590267Sdes direct = CHECK_FLAG('d'); 11690267Sdes if (strcasecmp(URL->scheme, SCHEME_FILE) == 0) 11790267Sdes return (fetchPutFile(URL, flags)); 11897868Sdes else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0) 11997868Sdes return (fetchPutFTP(URL, flags)); 12090267Sdes else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0) 12190267Sdes return (fetchPutHTTP(URL, flags)); 12297868Sdes else if (strcasecmp(URL->scheme, SCHEME_HTTPS) == 0) 12397868Sdes return (fetchPutHTTP(URL, flags)); 12497868Sdes _url_seterr(URL_BAD_SCHEME); 12597868Sdes return (NULL); 12638394Sdes} 12738394Sdes 12840939Sdes/* 12940975Sdes * Select the appropriate protocol for the URL scheme, and return the 13040975Sdes * size of the document referenced by the URL if it exists. 13140975Sdes */ 13240975Sdesint 13375891SarchiefetchStat(struct url *URL, struct url_stat *us, const char *flags) 13440975Sdes{ 13590267Sdes int direct; 13660587Sume 13790267Sdes direct = CHECK_FLAG('d'); 138109694Sdes if (us != NULL) { 139109694Sdes us->size = -1; 140109694Sdes us->atime = us->mtime = 0; 141109694Sdes } 14290267Sdes if (strcasecmp(URL->scheme, SCHEME_FILE) == 0) 14390267Sdes return (fetchStatFile(URL, us, flags)); 14497868Sdes else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0) 14597868Sdes return (fetchStatFTP(URL, us, flags)); 14690267Sdes else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0) 14790267Sdes return (fetchStatHTTP(URL, us, flags)); 14897868Sdes else if (strcasecmp(URL->scheme, SCHEME_HTTPS) == 0) 14997868Sdes return (fetchStatHTTP(URL, us, flags)); 15040975Sdes _url_seterr(URL_BAD_SCHEME); 15190267Sdes return (-1); 15240975Sdes} 15340975Sdes 15440975Sdes/* 15541989Sdes * Select the appropriate protocol for the URL scheme, and return a 15641989Sdes * list of files in the directory pointed to by the URL. 15741989Sdes */ 15841989Sdesstruct url_ent * 15975891SarchiefetchList(struct url *URL, const char *flags) 16041989Sdes{ 16190267Sdes int direct; 16260587Sume 16390267Sdes direct = CHECK_FLAG('d'); 16490267Sdes if (strcasecmp(URL->scheme, SCHEME_FILE) == 0) 16590267Sdes return (fetchListFile(URL, flags)); 16697868Sdes else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0) 16797868Sdes return (fetchListFTP(URL, flags)); 16890267Sdes else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0) 16990267Sdes return (fetchListHTTP(URL, flags)); 17097868Sdes else if (strcasecmp(URL->scheme, SCHEME_HTTPS) == 0) 17197868Sdes return (fetchListHTTP(URL, flags)); 17241989Sdes _url_seterr(URL_BAD_SCHEME); 17390267Sdes return (NULL); 17441989Sdes} 17541989Sdes 17641989Sdes/* 17763340Sdes * Attempt to parse the given URL; if successful, call fetchXGet(). 17840939Sdes */ 17937535SdesFILE * 18075891SarchiefetchXGetURL(const char *URL, struct url_stat *us, const char *flags) 18137535Sdes{ 18290267Sdes struct url *u; 18390267Sdes FILE *f; 18438394Sdes 18590267Sdes if ((u = fetchParseURL(URL)) == NULL) 18690267Sdes return (NULL); 18790267Sdes 18890267Sdes f = fetchXGet(u, us, flags); 18990267Sdes 19090267Sdes fetchFreeURL(u); 19190267Sdes return (f); 19237535Sdes} 19337535Sdes 19463340Sdes/* 19563340Sdes * Attempt to parse the given URL; if successful, call fetchGet(). 19663340Sdes */ 19763340SdesFILE * 19875891SarchiefetchGetURL(const char *URL, const char *flags) 19963340Sdes{ 20090267Sdes return (fetchXGetURL(URL, NULL, flags)); 20163340Sdes} 20237535Sdes 20340939Sdes/* 20440939Sdes * Attempt to parse the given URL; if successful, call fetchPut(). 20540939Sdes */ 20637535SdesFILE * 20775891SarchiefetchPutURL(const char *URL, const char *flags) 20837535Sdes{ 20990267Sdes struct url *u; 21090267Sdes FILE *f; 21190267Sdes 21290267Sdes if ((u = fetchParseURL(URL)) == NULL) 21390267Sdes return (NULL); 21490267Sdes 21590267Sdes f = fetchPut(u, flags); 21690267Sdes 21790267Sdes fetchFreeURL(u); 21890267Sdes return (f); 21937535Sdes} 22037535Sdes 22137535Sdes/* 22240975Sdes * Attempt to parse the given URL; if successful, call fetchStat(). 22340975Sdes */ 22440975Sdesint 22575891SarchiefetchStatURL(const char *URL, struct url_stat *us, const char *flags) 22640975Sdes{ 22790267Sdes struct url *u; 22890267Sdes int s; 22940975Sdes 23090267Sdes if ((u = fetchParseURL(URL)) == NULL) 23190267Sdes return (-1); 23240975Sdes 23390267Sdes s = fetchStat(u, us, flags); 23440975Sdes 23590267Sdes fetchFreeURL(u); 23690267Sdes return (s); 23740975Sdes} 23840975Sdes 23940975Sdes/* 24041989Sdes * Attempt to parse the given URL; if successful, call fetchList(). 24141989Sdes */ 24241989Sdesstruct url_ent * 24375891SarchiefetchListURL(const char *URL, const char *flags) 24441989Sdes{ 24590267Sdes struct url *u; 24690267Sdes struct url_ent *ue; 24741989Sdes 24890267Sdes if ((u = fetchParseURL(URL)) == NULL) 24990267Sdes return (NULL); 25041989Sdes 25190267Sdes ue = fetchList(u, flags); 25241989Sdes 25390267Sdes fetchFreeURL(u); 25490267Sdes return (ue); 25541989Sdes} 25641989Sdes 25741989Sdes/* 25860927Sdes * Make a URL 25960927Sdes */ 26060927Sdesstruct url * 26175891SarchiefetchMakeURL(const char *scheme, const char *host, int port, const char *doc, 26275891Sarchie const char *user, const char *pwd) 26360927Sdes{ 26490267Sdes struct url *u; 26560927Sdes 26690267Sdes if (!scheme || (!host && !doc)) { 26790267Sdes _url_seterr(URL_MALFORMED); 26890267Sdes return (NULL); 26990267Sdes } 27060927Sdes 27190267Sdes if (port < 0 || port > 65535) { 27290267Sdes _url_seterr(URL_BAD_PORT); 27390267Sdes return (NULL); 27490267Sdes } 27590267Sdes 27690267Sdes /* allocate struct url */ 277109967Sdes if ((u = calloc(1, sizeof(*u))) == NULL) { 27890267Sdes _fetch_syserr(); 27990267Sdes return (NULL); 28090267Sdes } 28190267Sdes 28290267Sdes if ((u->doc = strdup(doc ? doc : "/")) == NULL) { 28390267Sdes _fetch_syserr(); 28490267Sdes free(u); 28590267Sdes return (NULL); 28690267Sdes } 28790267Sdes 288109967Sdes#define seturl(x) snprintf(u->x, sizeof(u->x), "%s", x) 28990267Sdes seturl(scheme); 29090267Sdes seturl(host); 29190267Sdes seturl(user); 29290267Sdes seturl(pwd); 29360927Sdes#undef seturl 29490267Sdes u->port = port; 29560927Sdes 29690267Sdes return (u); 29760927Sdes} 29860927Sdes 29960927Sdes/* 30037535Sdes * Split an URL into components. URL syntax is: 30167042Sdes * [method:/][/[user[:pwd]@]host[:port]/][document] 30237535Sdes * This almost, but not quite, RFC1738 URL syntax. 30337535Sdes */ 30440975Sdesstruct url * 30575891SarchiefetchParseURL(const char *URL) 30637535Sdes{ 30790267Sdes char *doc; 30890267Sdes const char *p, *q; 30990267Sdes struct url *u; 31090267Sdes int i; 31137535Sdes 31290267Sdes /* allocate struct url */ 313109967Sdes if ((u = calloc(1, sizeof(*u))) == NULL) { 31490267Sdes _fetch_syserr(); 31590267Sdes return (NULL); 31690267Sdes } 31737535Sdes 31890267Sdes /* scheme name */ 31990267Sdes if ((p = strstr(URL, ":/"))) { 32090267Sdes snprintf(u->scheme, URL_SCHEMELEN+1, 32190267Sdes "%.*s", (int)(p - URL), URL); 32290267Sdes URL = ++p; 32390267Sdes /* 32490267Sdes * Only one slash: no host, leave slash as part of document 32590267Sdes * Two slashes: host follows, strip slashes 32690267Sdes */ 32790267Sdes if (URL[1] == '/') 32890267Sdes URL = (p += 2); 32990267Sdes } else { 33090267Sdes p = URL; 33190267Sdes } 33290267Sdes if (!*URL || *URL == '/' || *URL == '.' || 33390267Sdes (u->scheme[0] == '\0' && 33490267Sdes strchr(URL, '/') == NULL && strchr(URL, ':') == NULL)) 33590267Sdes goto nohost; 33637535Sdes 33790267Sdes p = strpbrk(URL, "/@"); 33890267Sdes if (p && *p == '@') { 33990267Sdes /* username */ 34090267Sdes for (q = URL, i = 0; (*q != ':') && (*q != '@'); q++) 34190267Sdes if (i < URL_USERLEN) 34290267Sdes u->user[i++] = *q; 34379423Sdes 34490267Sdes /* password */ 34590267Sdes if (*q == ':') 34690267Sdes for (q++, i = 0; (*q != ':') && (*q != '@'); q++) 34790267Sdes if (i < URL_PWDLEN) 34890267Sdes u->pwd[i++] = *q; 34990267Sdes 35090267Sdes p++; 35190267Sdes } else { 35290267Sdes p = URL; 35390267Sdes } 35490267Sdes 35590267Sdes /* hostname */ 35660737Sume#ifdef INET6 35790267Sdes if (*p == '[' && (q = strchr(p + 1, ']')) != NULL && 35890267Sdes (*++q == '\0' || *q == '/' || *q == ':')) { 35990267Sdes if ((i = q - p - 2) > MAXHOSTNAMELEN) 36090267Sdes i = MAXHOSTNAMELEN; 36190267Sdes strncpy(u->host, ++p, i); 36290267Sdes p = q; 36390267Sdes } else 36460737Sume#endif 36590267Sdes for (i = 0; *p && (*p != '/') && (*p != ':'); p++) 36690267Sdes if (i < MAXHOSTNAMELEN) 36790267Sdes u->host[i++] = *p; 36837535Sdes 36990267Sdes /* port */ 37090267Sdes if (*p == ':') { 37190267Sdes for (q = ++p; *q && (*q != '/'); q++) 37290267Sdes if (isdigit(*q)) 37390267Sdes u->port = u->port * 10 + (*q - '0'); 37490267Sdes else { 37590267Sdes /* invalid port */ 37690267Sdes _url_seterr(URL_BAD_PORT); 37790267Sdes goto ouch; 37890267Sdes } 37990267Sdes p = q; 38090267Sdes } 38137535Sdes 38237535Sdesnohost: 38390267Sdes /* document */ 38490267Sdes if (!*p) 38590267Sdes p = "/"; 38667419Sdes 38790267Sdes if (strcasecmp(u->scheme, SCHEME_HTTP) == 0 || 38890267Sdes strcasecmp(u->scheme, SCHEME_HTTPS) == 0) { 38990267Sdes const char hexnums[] = "0123456789abcdef"; 39090267Sdes 39190267Sdes /* percent-escape whitespace. */ 39290267Sdes if ((doc = malloc(strlen(p) * 3 + 1)) == NULL) { 39390267Sdes _fetch_syserr(); 39490267Sdes goto ouch; 39590267Sdes } 39690267Sdes u->doc = doc; 39790267Sdes while (*p != '\0') { 39890267Sdes if (!isspace(*p)) { 39990267Sdes *doc++ = *p++; 40090267Sdes } else { 40190267Sdes *doc++ = '%'; 40290267Sdes *doc++ = hexnums[((unsigned int)*p) >> 4]; 40390267Sdes *doc++ = hexnums[((unsigned int)*p) & 0xf]; 40490267Sdes p++; 40590267Sdes } 40690267Sdes } 40790267Sdes *doc = '\0'; 40890267Sdes } else if ((u->doc = strdup(p)) == NULL) { 40990267Sdes _fetch_syserr(); 41090267Sdes goto ouch; 41167419Sdes } 41290267Sdes 41390267Sdes DEBUG(fprintf(stderr, 41488769Sdes "scheme: [%s]\n" 41588769Sdes "user: [%s]\n" 41688769Sdes "password: [%s]\n" 41788769Sdes "host: [%s]\n" 41888769Sdes "port: [%d]\n" 41988769Sdes "document: [%s]\n", 42037535Sdes u->scheme, u->user, u->pwd, 42137535Sdes u->host, u->port, u->doc)); 42237535Sdes 42390267Sdes return (u); 42490267Sdes 42537535Sdesouch: 42690267Sdes free(u); 42790267Sdes return (NULL); 42837535Sdes} 42960376Sdes 43060376Sdes/* 43160376Sdes * Free a URL 43260376Sdes */ 43360376Sdesvoid 43460376SdesfetchFreeURL(struct url *u) 43560376Sdes{ 43690267Sdes free(u->doc); 43790267Sdes free(u); 43860376Sdes} 439