fetch.c revision 97868
137535Sdes/*- 237535Sdes * Copyright (c) 1998 Dag-Erling Co�dan Sm�rgrav 337535Sdes * All rights reserved. 437535Sdes * 537535Sdes * Redistribution and use in source and binary forms, with or without 637535Sdes * modification, are permitted provided that the following conditions 737535Sdes * are met: 837535Sdes * 1. Redistributions of source code must retain the above copyright 937535Sdes * notice, this list of conditions and the following disclaimer 1037535Sdes * in this position and unchanged. 1137535Sdes * 2. Redistributions in binary form must reproduce the above copyright 1237535Sdes * notice, this list of conditions and the following disclaimer in the 1337535Sdes * documentation and/or other materials provided with the distribution. 1437535Sdes * 3. The name of the author may not be used to endorse or promote products 1537535Sdes * derived from this software without specific prior written permission 1637535Sdes * 1737535Sdes * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 1837535Sdes * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 1937535Sdes * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 2037535Sdes * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 2137535Sdes * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 2237535Sdes * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 2337535Sdes * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 2437535Sdes * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 2537535Sdes * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 2637535Sdes * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 2737535Sdes */ 2837535Sdes 2984203Sdillon#include <sys/cdefs.h> 3084203Sdillon__FBSDID("$FreeBSD: head/lib/libfetch/fetch.c 97868 2002-06-05 12:46:36Z des $"); 3184203Sdillon 3237535Sdes#include <sys/param.h> 3340975Sdes#include <sys/errno.h> 3437535Sdes 3537535Sdes#include <ctype.h> 3637535Sdes#include <stdio.h> 3737535Sdes#include <stdlib.h> 3837535Sdes#include <string.h> 3937535Sdes 4037535Sdes#include "fetch.h" 4140975Sdes#include "common.h" 4237535Sdes 4377238Sdesauth_t fetchAuthMethod; 4460924Sdesint fetchLastErrCode; 4560924Sdeschar fetchLastErrString[MAXERRSTRING]; 4660924Sdesint fetchTimeout; 4763334Sdesint fetchRestartCalls = 1; 4887560Sdesint fetchDebug; 4941862Sdes 5041862Sdes 5140975Sdes/*** Local data **************************************************************/ 5237535Sdes 5340939Sdes/* 5440975Sdes * Error messages for parser errors 5540975Sdes */ 5640975Sdes#define URL_MALFORMED 1 5740975Sdes#define URL_BAD_SCHEME 2 5840975Sdes#define URL_BAD_PORT 3 5940975Sdesstatic struct fetcherr _url_errlist[] = { 6090267Sdes { URL_MALFORMED, FETCH_URL, "Malformed URL" }, 6190267Sdes { URL_BAD_SCHEME, FETCH_URL, "Invalid URL scheme" }, 6290267Sdes { URL_BAD_PORT, FETCH_URL, "Invalid server port" }, 6390267Sdes { -1, FETCH_UNKNOWN, "Unknown parser error" } 6440975Sdes}; 6540975Sdes 6640975Sdes 6740975Sdes/*** Public API **************************************************************/ 6840975Sdes 6940975Sdes/* 7040939Sdes * Select the appropriate protocol for the URL scheme, and return a 7140939Sdes * read-only stream connected to the document referenced by the URL. 7263340Sdes * Also fill out the struct url_stat. 7340939Sdes */ 7438394SdesFILE * 7575891SarchiefetchXGet(struct url *URL, struct url_stat *us, const char *flags) 7638394Sdes{ 7790267Sdes int direct; 7860587Sume 7990267Sdes direct = CHECK_FLAG('d'); 8090267Sdes if (strcasecmp(URL->scheme, SCHEME_FILE) == 0) 8190267Sdes return (fetchXGetFile(URL, us, flags)); 8297868Sdes else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0) 8397868Sdes return (fetchXGetFTP(URL, us, flags)); 8490267Sdes else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0) 8590267Sdes return (fetchXGetHTTP(URL, us, flags)); 8697868Sdes else if (strcasecmp(URL->scheme, SCHEME_HTTPS) == 0) 8797868Sdes return (fetchXGetHTTP(URL, us, flags)); 8897868Sdes _url_seterr(URL_BAD_SCHEME); 8997868Sdes return (NULL); 9038394Sdes} 9138394Sdes 9240939Sdes/* 9340939Sdes * Select the appropriate protocol for the URL scheme, and return a 9463340Sdes * read-only stream connected to the document referenced by the URL. 9563340Sdes */ 9663340SdesFILE * 9775891SarchiefetchGet(struct url *URL, const char *flags) 9863340Sdes{ 9990267Sdes return (fetchXGet(URL, NULL, flags)); 10063340Sdes} 10163340Sdes 10263340Sdes/* 10363340Sdes * Select the appropriate protocol for the URL scheme, and return a 10440939Sdes * write-only stream connected to the document referenced by the URL. 10540939Sdes */ 10638394SdesFILE * 10775891SarchiefetchPut(struct url *URL, const char *flags) 10838394Sdes{ 10990267Sdes int direct; 11060587Sume 11190267Sdes direct = CHECK_FLAG('d'); 11290267Sdes if (strcasecmp(URL->scheme, SCHEME_FILE) == 0) 11390267Sdes return (fetchPutFile(URL, flags)); 11497868Sdes else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0) 11597868Sdes return (fetchPutFTP(URL, flags)); 11690267Sdes else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0) 11790267Sdes return (fetchPutHTTP(URL, flags)); 11897868Sdes else if (strcasecmp(URL->scheme, SCHEME_HTTPS) == 0) 11997868Sdes return (fetchPutHTTP(URL, flags)); 12097868Sdes _url_seterr(URL_BAD_SCHEME); 12197868Sdes return (NULL); 12238394Sdes} 12338394Sdes 12440939Sdes/* 12540975Sdes * Select the appropriate protocol for the URL scheme, and return the 12640975Sdes * size of the document referenced by the URL if it exists. 12740975Sdes */ 12840975Sdesint 12975891SarchiefetchStat(struct url *URL, struct url_stat *us, const char *flags) 13040975Sdes{ 13190267Sdes int direct; 13260587Sume 13390267Sdes direct = CHECK_FLAG('d'); 13490267Sdes if (strcasecmp(URL->scheme, SCHEME_FILE) == 0) 13590267Sdes return (fetchStatFile(URL, us, flags)); 13697868Sdes else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0) 13797868Sdes return (fetchStatFTP(URL, us, flags)); 13890267Sdes else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0) 13990267Sdes return (fetchStatHTTP(URL, us, flags)); 14097868Sdes else if (strcasecmp(URL->scheme, SCHEME_HTTPS) == 0) 14197868Sdes return (fetchStatHTTP(URL, us, flags)); 14240975Sdes _url_seterr(URL_BAD_SCHEME); 14390267Sdes return (-1); 14440975Sdes} 14540975Sdes 14640975Sdes/* 14741989Sdes * Select the appropriate protocol for the URL scheme, and return a 14841989Sdes * list of files in the directory pointed to by the URL. 14941989Sdes */ 15041989Sdesstruct url_ent * 15175891SarchiefetchList(struct url *URL, const char *flags) 15241989Sdes{ 15390267Sdes int direct; 15460587Sume 15590267Sdes direct = CHECK_FLAG('d'); 15690267Sdes if (strcasecmp(URL->scheme, SCHEME_FILE) == 0) 15790267Sdes return (fetchListFile(URL, flags)); 15897868Sdes else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0) 15997868Sdes return (fetchListFTP(URL, flags)); 16090267Sdes else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0) 16190267Sdes return (fetchListHTTP(URL, flags)); 16297868Sdes else if (strcasecmp(URL->scheme, SCHEME_HTTPS) == 0) 16397868Sdes return (fetchListHTTP(URL, flags)); 16441989Sdes _url_seterr(URL_BAD_SCHEME); 16590267Sdes return (NULL); 16641989Sdes} 16741989Sdes 16841989Sdes/* 16963340Sdes * Attempt to parse the given URL; if successful, call fetchXGet(). 17040939Sdes */ 17137535SdesFILE * 17275891SarchiefetchXGetURL(const char *URL, struct url_stat *us, const char *flags) 17337535Sdes{ 17490267Sdes struct url *u; 17590267Sdes FILE *f; 17638394Sdes 17790267Sdes if ((u = fetchParseURL(URL)) == NULL) 17890267Sdes return (NULL); 17990267Sdes 18090267Sdes f = fetchXGet(u, us, flags); 18190267Sdes 18290267Sdes fetchFreeURL(u); 18390267Sdes return (f); 18437535Sdes} 18537535Sdes 18663340Sdes/* 18763340Sdes * Attempt to parse the given URL; if successful, call fetchGet(). 18863340Sdes */ 18963340SdesFILE * 19075891SarchiefetchGetURL(const char *URL, const char *flags) 19163340Sdes{ 19290267Sdes return (fetchXGetURL(URL, NULL, flags)); 19363340Sdes} 19437535Sdes 19540939Sdes/* 19640939Sdes * Attempt to parse the given URL; if successful, call fetchPut(). 19740939Sdes */ 19837535SdesFILE * 19975891SarchiefetchPutURL(const char *URL, const char *flags) 20037535Sdes{ 20190267Sdes struct url *u; 20290267Sdes FILE *f; 20390267Sdes 20490267Sdes if ((u = fetchParseURL(URL)) == NULL) 20590267Sdes return (NULL); 20690267Sdes 20790267Sdes f = fetchPut(u, flags); 20890267Sdes 20990267Sdes fetchFreeURL(u); 21090267Sdes return (f); 21137535Sdes} 21237535Sdes 21337535Sdes/* 21440975Sdes * Attempt to parse the given URL; if successful, call fetchStat(). 21540975Sdes */ 21640975Sdesint 21775891SarchiefetchStatURL(const char *URL, struct url_stat *us, const char *flags) 21840975Sdes{ 21990267Sdes struct url *u; 22090267Sdes int s; 22140975Sdes 22290267Sdes if ((u = fetchParseURL(URL)) == NULL) 22390267Sdes return (-1); 22440975Sdes 22590267Sdes s = fetchStat(u, us, flags); 22640975Sdes 22790267Sdes fetchFreeURL(u); 22890267Sdes return (s); 22940975Sdes} 23040975Sdes 23140975Sdes/* 23241989Sdes * Attempt to parse the given URL; if successful, call fetchList(). 23341989Sdes */ 23441989Sdesstruct url_ent * 23575891SarchiefetchListURL(const char *URL, const char *flags) 23641989Sdes{ 23790267Sdes struct url *u; 23890267Sdes struct url_ent *ue; 23941989Sdes 24090267Sdes if ((u = fetchParseURL(URL)) == NULL) 24190267Sdes return (NULL); 24241989Sdes 24390267Sdes ue = fetchList(u, flags); 24441989Sdes 24590267Sdes fetchFreeURL(u); 24690267Sdes return (ue); 24741989Sdes} 24841989Sdes 24941989Sdes/* 25060927Sdes * Make a URL 25160927Sdes */ 25260927Sdesstruct url * 25375891SarchiefetchMakeURL(const char *scheme, const char *host, int port, const char *doc, 25475891Sarchie const char *user, const char *pwd) 25560927Sdes{ 25690267Sdes struct url *u; 25760927Sdes 25890267Sdes if (!scheme || (!host && !doc)) { 25990267Sdes _url_seterr(URL_MALFORMED); 26090267Sdes return (NULL); 26190267Sdes } 26260927Sdes 26390267Sdes if (port < 0 || port > 65535) { 26490267Sdes _url_seterr(URL_BAD_PORT); 26590267Sdes return (NULL); 26690267Sdes } 26790267Sdes 26890267Sdes /* allocate struct url */ 26990267Sdes if ((u = calloc(1, sizeof *u)) == NULL) { 27090267Sdes _fetch_syserr(); 27190267Sdes return (NULL); 27290267Sdes } 27390267Sdes 27490267Sdes if ((u->doc = strdup(doc ? doc : "/")) == NULL) { 27590267Sdes _fetch_syserr(); 27690267Sdes free(u); 27790267Sdes return (NULL); 27890267Sdes } 27990267Sdes 28060927Sdes#define seturl(x) snprintf(u->x, sizeof u->x, "%s", x) 28190267Sdes seturl(scheme); 28290267Sdes seturl(host); 28390267Sdes seturl(user); 28490267Sdes seturl(pwd); 28560927Sdes#undef seturl 28690267Sdes u->port = port; 28760927Sdes 28890267Sdes return (u); 28960927Sdes} 29060927Sdes 29160927Sdes/* 29237535Sdes * Split an URL into components. URL syntax is: 29367042Sdes * [method:/][/[user[:pwd]@]host[:port]/][document] 29437535Sdes * This almost, but not quite, RFC1738 URL syntax. 29537535Sdes */ 29640975Sdesstruct url * 29775891SarchiefetchParseURL(const char *URL) 29837535Sdes{ 29990267Sdes char *doc; 30090267Sdes const char *p, *q; 30190267Sdes struct url *u; 30290267Sdes int i; 30337535Sdes 30490267Sdes /* allocate struct url */ 30590267Sdes if ((u = calloc(1, sizeof *u)) == NULL) { 30690267Sdes _fetch_syserr(); 30790267Sdes return (NULL); 30890267Sdes } 30937535Sdes 31090267Sdes /* scheme name */ 31190267Sdes if ((p = strstr(URL, ":/"))) { 31290267Sdes snprintf(u->scheme, URL_SCHEMELEN+1, 31390267Sdes "%.*s", (int)(p - URL), URL); 31490267Sdes URL = ++p; 31590267Sdes /* 31690267Sdes * Only one slash: no host, leave slash as part of document 31790267Sdes * Two slashes: host follows, strip slashes 31890267Sdes */ 31990267Sdes if (URL[1] == '/') 32090267Sdes URL = (p += 2); 32190267Sdes } else { 32290267Sdes p = URL; 32390267Sdes } 32490267Sdes if (!*URL || *URL == '/' || *URL == '.' || 32590267Sdes (u->scheme[0] == '\0' && 32690267Sdes strchr(URL, '/') == NULL && strchr(URL, ':') == NULL)) 32790267Sdes goto nohost; 32837535Sdes 32990267Sdes p = strpbrk(URL, "/@"); 33090267Sdes if (p && *p == '@') { 33190267Sdes /* username */ 33290267Sdes for (q = URL, i = 0; (*q != ':') && (*q != '@'); q++) 33390267Sdes if (i < URL_USERLEN) 33490267Sdes u->user[i++] = *q; 33579423Sdes 33690267Sdes /* password */ 33790267Sdes if (*q == ':') 33890267Sdes for (q++, i = 0; (*q != ':') && (*q != '@'); q++) 33990267Sdes if (i < URL_PWDLEN) 34090267Sdes u->pwd[i++] = *q; 34190267Sdes 34290267Sdes p++; 34390267Sdes } else { 34490267Sdes p = URL; 34590267Sdes } 34690267Sdes 34790267Sdes /* hostname */ 34860737Sume#ifdef INET6 34990267Sdes if (*p == '[' && (q = strchr(p + 1, ']')) != NULL && 35090267Sdes (*++q == '\0' || *q == '/' || *q == ':')) { 35190267Sdes if ((i = q - p - 2) > MAXHOSTNAMELEN) 35290267Sdes i = MAXHOSTNAMELEN; 35390267Sdes strncpy(u->host, ++p, i); 35490267Sdes p = q; 35590267Sdes } else 35660737Sume#endif 35790267Sdes for (i = 0; *p && (*p != '/') && (*p != ':'); p++) 35890267Sdes if (i < MAXHOSTNAMELEN) 35990267Sdes u->host[i++] = *p; 36037535Sdes 36190267Sdes /* port */ 36290267Sdes if (*p == ':') { 36390267Sdes for (q = ++p; *q && (*q != '/'); q++) 36490267Sdes if (isdigit(*q)) 36590267Sdes u->port = u->port * 10 + (*q - '0'); 36690267Sdes else { 36790267Sdes /* invalid port */ 36890267Sdes _url_seterr(URL_BAD_PORT); 36990267Sdes goto ouch; 37090267Sdes } 37190267Sdes p = q; 37290267Sdes } 37337535Sdes 37437535Sdesnohost: 37590267Sdes /* document */ 37690267Sdes if (!*p) 37790267Sdes p = "/"; 37867419Sdes 37990267Sdes if (strcasecmp(u->scheme, SCHEME_HTTP) == 0 || 38090267Sdes strcasecmp(u->scheme, SCHEME_HTTPS) == 0) { 38190267Sdes const char hexnums[] = "0123456789abcdef"; 38290267Sdes 38390267Sdes /* percent-escape whitespace. */ 38490267Sdes if ((doc = malloc(strlen(p) * 3 + 1)) == NULL) { 38590267Sdes _fetch_syserr(); 38690267Sdes goto ouch; 38790267Sdes } 38890267Sdes u->doc = doc; 38990267Sdes while (*p != '\0') { 39090267Sdes if (!isspace(*p)) { 39190267Sdes *doc++ = *p++; 39290267Sdes } else { 39390267Sdes *doc++ = '%'; 39490267Sdes *doc++ = hexnums[((unsigned int)*p) >> 4]; 39590267Sdes *doc++ = hexnums[((unsigned int)*p) & 0xf]; 39690267Sdes p++; 39790267Sdes } 39890267Sdes } 39990267Sdes *doc = '\0'; 40090267Sdes } else if ((u->doc = strdup(p)) == NULL) { 40190267Sdes _fetch_syserr(); 40290267Sdes goto ouch; 40367419Sdes } 40490267Sdes 40590267Sdes DEBUG(fprintf(stderr, 40688769Sdes "scheme: [%s]\n" 40788769Sdes "user: [%s]\n" 40888769Sdes "password: [%s]\n" 40988769Sdes "host: [%s]\n" 41088769Sdes "port: [%d]\n" 41188769Sdes "document: [%s]\n", 41237535Sdes u->scheme, u->user, u->pwd, 41337535Sdes u->host, u->port, u->doc)); 41437535Sdes 41590267Sdes return (u); 41690267Sdes 41737535Sdesouch: 41890267Sdes free(u); 41990267Sdes return (NULL); 42037535Sdes} 42160376Sdes 42260376Sdes/* 42360376Sdes * Free a URL 42460376Sdes */ 42560376Sdesvoid 42660376SdesfetchFreeURL(struct url *u) 42760376Sdes{ 42890267Sdes free(u->doc); 42990267Sdes free(u); 43060376Sdes} 431