fetch.c revision 88769
137535Sdes/*- 237535Sdes * Copyright (c) 1998 Dag-Erling Co�dan Sm�rgrav 337535Sdes * All rights reserved. 437535Sdes * 537535Sdes * Redistribution and use in source and binary forms, with or without 637535Sdes * modification, are permitted provided that the following conditions 737535Sdes * are met: 837535Sdes * 1. Redistributions of source code must retain the above copyright 937535Sdes * notice, this list of conditions and the following disclaimer 1037535Sdes * in this position and unchanged. 1137535Sdes * 2. Redistributions in binary form must reproduce the above copyright 1237535Sdes * notice, this list of conditions and the following disclaimer in the 1337535Sdes * documentation and/or other materials provided with the distribution. 1437535Sdes * 3. The name of the author may not be used to endorse or promote products 1537535Sdes * derived from this software without specific prior written permission 1637535Sdes * 1737535Sdes * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 1837535Sdes * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 1937535Sdes * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 2037535Sdes * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 2137535Sdes * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 2237535Sdes * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 2337535Sdes * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 2437535Sdes * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 2537535Sdes * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 2637535Sdes * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 2737535Sdes */ 2837535Sdes 2984203Sdillon#include <sys/cdefs.h> 3084203Sdillon__FBSDID("$FreeBSD: head/lib/libfetch/fetch.c 88769 2002-01-01 14:48:09Z des $"); 3184203Sdillon 3237535Sdes#include <sys/param.h> 3340975Sdes#include <sys/errno.h> 3437535Sdes 3537535Sdes#include <ctype.h> 3637535Sdes#include <stdio.h> 3737535Sdes#include <stdlib.h> 3837535Sdes#include <string.h> 3937535Sdes 4037535Sdes#include "fetch.h" 4140975Sdes#include "common.h" 4237535Sdes 4377238Sdesauth_t fetchAuthMethod; 4460924Sdesint fetchLastErrCode; 4560924Sdeschar fetchLastErrString[MAXERRSTRING]; 4660924Sdesint fetchTimeout; 4763334Sdesint fetchRestartCalls = 1; 4887560Sdesint fetchDebug; 4941862Sdes 5041862Sdes 5140975Sdes/*** Local data **************************************************************/ 5237535Sdes 5340939Sdes/* 5440975Sdes * Error messages for parser errors 5540975Sdes */ 5640975Sdes#define URL_MALFORMED 1 5740975Sdes#define URL_BAD_SCHEME 2 5840975Sdes#define URL_BAD_PORT 3 5940975Sdesstatic struct fetcherr _url_errlist[] = { 6040975Sdes { URL_MALFORMED, FETCH_URL, "Malformed URL" }, 6140975Sdes { URL_BAD_SCHEME, FETCH_URL, "Invalid URL scheme" }, 6240975Sdes { URL_BAD_PORT, FETCH_URL, "Invalid server port" }, 6340975Sdes { -1, FETCH_UNKNOWN, "Unknown parser error" } 6440975Sdes}; 6540975Sdes 6640975Sdes 6740975Sdes/*** Public API **************************************************************/ 6840975Sdes 6940975Sdes/* 7040939Sdes * Select the appropriate protocol for the URL scheme, and return a 7140939Sdes * read-only stream connected to the document referenced by the URL. 7263340Sdes * Also fill out the struct url_stat. 7340939Sdes */ 7438394SdesFILE * 7575891SarchiefetchXGet(struct url *URL, struct url_stat *us, const char *flags) 7638394Sdes{ 7760587Sume int direct; 7860587Sume 7967892Sdes direct = CHECK_FLAG('d'); 8067042Sdes if (strcasecmp(URL->scheme, SCHEME_FILE) == 0) 8163340Sdes return fetchXGetFile(URL, us, flags); 8267042Sdes else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0) 8363340Sdes return fetchXGetHTTP(URL, us, flags); 8467042Sdes else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0) { 8563340Sdes return fetchXGetFTP(URL, us, flags); 8660587Sume } else { 8740975Sdes _url_seterr(URL_BAD_SCHEME); 8840975Sdes return NULL; 8940975Sdes } 9038394Sdes} 9138394Sdes 9240939Sdes/* 9340939Sdes * Select the appropriate protocol for the URL scheme, and return a 9463340Sdes * read-only stream connected to the document referenced by the URL. 9563340Sdes */ 9663340SdesFILE * 9775891SarchiefetchGet(struct url *URL, const char *flags) 9863340Sdes{ 9963340Sdes return fetchXGet(URL, NULL, flags); 10063340Sdes} 10163340Sdes 10263340Sdes/* 10363340Sdes * Select the appropriate protocol for the URL scheme, and return a 10440939Sdes * write-only stream connected to the document referenced by the URL. 10540939Sdes */ 10638394SdesFILE * 10775891SarchiefetchPut(struct url *URL, const char *flags) 10838394Sdes{ 10960587Sume int direct; 11060587Sume 11167892Sdes direct = CHECK_FLAG('d'); 11267042Sdes if (strcasecmp(URL->scheme, SCHEME_FILE) == 0) 11338394Sdes return fetchPutFile(URL, flags); 11467042Sdes else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0) 11538394Sdes return fetchPutHTTP(URL, flags); 11667042Sdes else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0) { 11738394Sdes return fetchPutFTP(URL, flags); 11860587Sume } else { 11940975Sdes _url_seterr(URL_BAD_SCHEME); 12040975Sdes return NULL; 12140975Sdes } 12238394Sdes} 12338394Sdes 12440939Sdes/* 12540975Sdes * Select the appropriate protocol for the URL scheme, and return the 12640975Sdes * size of the document referenced by the URL if it exists. 12740975Sdes */ 12840975Sdesint 12975891SarchiefetchStat(struct url *URL, struct url_stat *us, const char *flags) 13040975Sdes{ 13160587Sume int direct; 13260587Sume 13367892Sdes direct = CHECK_FLAG('d'); 13467042Sdes if (strcasecmp(URL->scheme, SCHEME_FILE) == 0) 13540975Sdes return fetchStatFile(URL, us, flags); 13667042Sdes else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0) 13740975Sdes return fetchStatHTTP(URL, us, flags); 13867042Sdes else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0) { 13940975Sdes return fetchStatFTP(URL, us, flags); 14060587Sume } else { 14140975Sdes _url_seterr(URL_BAD_SCHEME); 14240975Sdes return -1; 14340975Sdes } 14440975Sdes} 14540975Sdes 14640975Sdes/* 14741989Sdes * Select the appropriate protocol for the URL scheme, and return a 14841989Sdes * list of files in the directory pointed to by the URL. 14941989Sdes */ 15041989Sdesstruct url_ent * 15175891SarchiefetchList(struct url *URL, const char *flags) 15241989Sdes{ 15360587Sume int direct; 15460587Sume 15567892Sdes direct = CHECK_FLAG('d'); 15667042Sdes if (strcasecmp(URL->scheme, SCHEME_FILE) == 0) 15741989Sdes return fetchListFile(URL, flags); 15867042Sdes else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0) 15941989Sdes return fetchListHTTP(URL, flags); 16067042Sdes else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0) { 16141989Sdes return fetchListFTP(URL, flags); 16260587Sume } else { 16341989Sdes _url_seterr(URL_BAD_SCHEME); 16441989Sdes return NULL; 16541989Sdes } 16641989Sdes} 16741989Sdes 16841989Sdes/* 16963340Sdes * Attempt to parse the given URL; if successful, call fetchXGet(). 17040939Sdes */ 17137535SdesFILE * 17275891SarchiefetchXGetURL(const char *URL, struct url_stat *us, const char *flags) 17337535Sdes{ 17440975Sdes struct url *u; 17537535Sdes FILE *f; 17638394Sdes 17737535Sdes if ((u = fetchParseURL(URL)) == NULL) 17837535Sdes return NULL; 17937535Sdes 18063340Sdes f = fetchXGet(u, us, flags); 18138394Sdes 18260376Sdes fetchFreeURL(u); 18337535Sdes return f; 18437535Sdes} 18537535Sdes 18663340Sdes/* 18763340Sdes * Attempt to parse the given URL; if successful, call fetchGet(). 18863340Sdes */ 18963340SdesFILE * 19075891SarchiefetchGetURL(const char *URL, const char *flags) 19163340Sdes{ 19263340Sdes return fetchXGetURL(URL, NULL, flags); 19363340Sdes} 19437535Sdes 19540939Sdes/* 19640939Sdes * Attempt to parse the given URL; if successful, call fetchPut(). 19740939Sdes */ 19837535SdesFILE * 19975891SarchiefetchPutURL(const char *URL, const char *flags) 20037535Sdes{ 20140975Sdes struct url *u; 20237535Sdes FILE *f; 20337535Sdes 20437535Sdes if ((u = fetchParseURL(URL)) == NULL) 20537535Sdes return NULL; 20637535Sdes 20738394Sdes f = fetchPut(u, flags); 20838394Sdes 20960376Sdes fetchFreeURL(u); 21037535Sdes return f; 21137535Sdes} 21237535Sdes 21337535Sdes/* 21440975Sdes * Attempt to parse the given URL; if successful, call fetchStat(). 21540975Sdes */ 21640975Sdesint 21775891SarchiefetchStatURL(const char *URL, struct url_stat *us, const char *flags) 21840975Sdes{ 21940975Sdes struct url *u; 22040975Sdes int s; 22140975Sdes 22240975Sdes if ((u = fetchParseURL(URL)) == NULL) 22340975Sdes return -1; 22440975Sdes 22540975Sdes s = fetchStat(u, us, flags); 22640975Sdes 22760376Sdes fetchFreeURL(u); 22840975Sdes return s; 22940975Sdes} 23040975Sdes 23140975Sdes/* 23241989Sdes * Attempt to parse the given URL; if successful, call fetchList(). 23341989Sdes */ 23441989Sdesstruct url_ent * 23575891SarchiefetchListURL(const char *URL, const char *flags) 23641989Sdes{ 23741989Sdes struct url *u; 23841989Sdes struct url_ent *ue; 23941989Sdes 24041989Sdes if ((u = fetchParseURL(URL)) == NULL) 24141989Sdes return NULL; 24241989Sdes 24341989Sdes ue = fetchList(u, flags); 24441989Sdes 24560376Sdes fetchFreeURL(u); 24641989Sdes return ue; 24741989Sdes} 24841989Sdes 24941989Sdes/* 25060927Sdes * Make a URL 25160927Sdes */ 25260927Sdesstruct url * 25375891SarchiefetchMakeURL(const char *scheme, const char *host, int port, const char *doc, 25475891Sarchie const char *user, const char *pwd) 25560927Sdes{ 25660927Sdes struct url *u; 25760927Sdes 25860927Sdes if (!scheme || (!host && !doc)) { 25960927Sdes _url_seterr(URL_MALFORMED); 26060927Sdes return NULL; 26160927Sdes } 26260927Sdes 26360927Sdes if (port < 0 || port > 65535) { 26460927Sdes _url_seterr(URL_BAD_PORT); 26560927Sdes return NULL; 26660927Sdes } 26760927Sdes 26860927Sdes /* allocate struct url */ 26960927Sdes if ((u = calloc(1, sizeof *u)) == NULL) { 27060927Sdes _fetch_syserr(); 27160927Sdes return NULL; 27260927Sdes } 27360927Sdes 27460927Sdes if ((u->doc = strdup(doc ? doc : "/")) == NULL) { 27560927Sdes _fetch_syserr(); 27660927Sdes free(u); 27760927Sdes return NULL; 27860927Sdes } 27960927Sdes 28060927Sdes#define seturl(x) snprintf(u->x, sizeof u->x, "%s", x) 28160927Sdes seturl(scheme); 28260927Sdes seturl(host); 28360927Sdes seturl(user); 28460927Sdes seturl(pwd); 28560927Sdes#undef seturl 28660927Sdes u->port = port; 28760927Sdes 28860927Sdes return u; 28960927Sdes} 29060927Sdes 29160927Sdes/* 29237535Sdes * Split an URL into components. URL syntax is: 29367042Sdes * [method:/][/[user[:pwd]@]host[:port]/][document] 29437535Sdes * This almost, but not quite, RFC1738 URL syntax. 29537535Sdes */ 29640975Sdesstruct url * 29775891SarchiefetchParseURL(const char *URL) 29837535Sdes{ 29975891Sarchie char *doc; 30075891Sarchie const char *p, *q; 30140975Sdes struct url *u; 30237535Sdes int i; 30337535Sdes 30440975Sdes /* allocate struct url */ 30560190Sdes if ((u = calloc(1, sizeof *u)) == NULL) { 30640975Sdes _fetch_syserr(); 30737535Sdes return NULL; 30840975Sdes } 30937535Sdes 31037535Sdes /* scheme name */ 31167042Sdes if ((p = strstr(URL, ":/"))) { 31279834Sdes snprintf(u->scheme, URL_SCHEMELEN+1, "%.*s", (int)(p - URL), URL); 31367042Sdes URL = ++p; 31467042Sdes /* 31567042Sdes * Only one slash: no host, leave slash as part of document 31667042Sdes * Two slashes: host follows, strip slashes 31767042Sdes */ 31867042Sdes if (URL[1] == '/') 31967042Sdes URL = (p += 2); 32069975Sdes } else { 32169975Sdes p = URL; 32240975Sdes } 32379834Sdes if (!*URL || *URL == '/' || *URL == '.' || 32480520Sse (u->scheme[0] == '\0' && 32580520Sse strchr(URL, '/') == NULL && strchr(URL, ':') == NULL)) 32637535Sdes goto nohost; 32737535Sdes 32837535Sdes p = strpbrk(URL, "/@"); 32941862Sdes if (p && *p == '@') { 33037535Sdes /* username */ 33137535Sdes for (q = URL, i = 0; (*q != ':') && (*q != '@'); q++) 33237535Sdes if (i < URL_USERLEN) 33337535Sdes u->user[i++] = *q; 33437535Sdes 33537535Sdes /* password */ 33637535Sdes if (*q == ':') 33737535Sdes for (q++, i = 0; (*q != ':') && (*q != '@'); q++) 33837535Sdes if (i < URL_PWDLEN) 33937535Sdes u->pwd[i++] = *q; 34037535Sdes 34137535Sdes p++; 34279423Sdes } else { 34379423Sdes p = URL; 34479423Sdes } 34579423Sdes 34637535Sdes /* hostname */ 34760737Sume#ifdef INET6 34860737Sume if (*p == '[' && (q = strchr(p + 1, ']')) != NULL && 34960737Sume (*++q == '\0' || *q == '/' || *q == ':')) { 35060737Sume if ((i = q - p - 2) > MAXHOSTNAMELEN) 35160737Sume i = MAXHOSTNAMELEN; 35260737Sume strncpy(u->host, ++p, i); 35360737Sume p = q; 35460737Sume } else 35560737Sume#endif 35660737Sume for (i = 0; *p && (*p != '/') && (*p != ':'); p++) 35760737Sume if (i < MAXHOSTNAMELEN) 35860737Sume u->host[i++] = *p; 35937535Sdes 36037535Sdes /* port */ 36137535Sdes if (*p == ':') { 36237535Sdes for (q = ++p; *q && (*q != '/'); q++) 36337535Sdes if (isdigit(*q)) 36437535Sdes u->port = u->port * 10 + (*q - '0'); 36540975Sdes else { 36640975Sdes /* invalid port */ 36740975Sdes _url_seterr(URL_BAD_PORT); 36840975Sdes goto ouch; 36940975Sdes } 37037535Sdes while (*p && (*p != '/')) 37137535Sdes p++; 37237535Sdes } 37337535Sdes 37437535Sdesnohost: 37537535Sdes /* document */ 37660376Sdes if (!*p) 37760376Sdes p = "/"; 37860376Sdes 37969465Sdes if (strcasecmp(u->scheme, SCHEME_HTTP) == 0 || 38069465Sdes strcasecmp(u->scheme, SCHEME_HTTPS) == 0) { 38167419Sdes const char hexnums[] = "0123456789abcdef"; 38267419Sdes 38369465Sdes /* percent-escape whitespace. */ 38469465Sdes if ((doc = malloc(strlen(p) * 3 + 1)) == NULL) { 38567419Sdes _fetch_syserr(); 38667419Sdes goto ouch; 38767419Sdes } 38869465Sdes u->doc = doc; 38967419Sdes while (*p != '\0') { 39067419Sdes if (!isspace(*p)) { 39167419Sdes *doc++ = *p++; 39267419Sdes } else { 39367419Sdes *doc++ = '%'; 39467419Sdes *doc++ = hexnums[((unsigned int)*p) >> 4]; 39567419Sdes *doc++ = hexnums[((unsigned int)*p) & 0xf]; 39667419Sdes p++; 39767419Sdes } 39867419Sdes } 39967419Sdes *doc = '\0'; 40067419Sdes } else if ((u->doc = strdup(p)) == NULL) { 40160376Sdes _fetch_syserr(); 40260376Sdes goto ouch; 40340939Sdes } 40437535Sdes 40537535Sdes DEBUG(fprintf(stderr, 40688769Sdes "scheme: [%s]\n" 40788769Sdes "user: [%s]\n" 40888769Sdes "password: [%s]\n" 40988769Sdes "host: [%s]\n" 41088769Sdes "port: [%d]\n" 41188769Sdes "document: [%s]\n", 41237535Sdes u->scheme, u->user, u->pwd, 41337535Sdes u->host, u->port, u->doc)); 41437535Sdes 41537535Sdes return u; 41637535Sdes 41737535Sdesouch: 41837535Sdes free(u); 41937535Sdes return NULL; 42037535Sdes} 42160376Sdes 42260376Sdes/* 42360376Sdes * Free a URL 42460376Sdes */ 42560376Sdesvoid 42660376SdesfetchFreeURL(struct url *u) 42760376Sdes{ 42860376Sdes free(u->doc); 42960376Sdes free(u); 43060376Sdes} 431