fetch.c revision 90267
137535Sdes/*- 237535Sdes * Copyright (c) 1998 Dag-Erling Co�dan Sm�rgrav 337535Sdes * All rights reserved. 437535Sdes * 537535Sdes * Redistribution and use in source and binary forms, with or without 637535Sdes * modification, are permitted provided that the following conditions 737535Sdes * are met: 837535Sdes * 1. Redistributions of source code must retain the above copyright 937535Sdes * notice, this list of conditions and the following disclaimer 1037535Sdes * in this position and unchanged. 1137535Sdes * 2. Redistributions in binary form must reproduce the above copyright 1237535Sdes * notice, this list of conditions and the following disclaimer in the 1337535Sdes * documentation and/or other materials provided with the distribution. 1437535Sdes * 3. The name of the author may not be used to endorse or promote products 1537535Sdes * derived from this software without specific prior written permission 1637535Sdes * 1737535Sdes * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 1837535Sdes * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 1937535Sdes * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 2037535Sdes * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 2137535Sdes * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 2237535Sdes * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 2337535Sdes * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 2437535Sdes * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 2537535Sdes * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 2637535Sdes * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 2737535Sdes */ 2837535Sdes 2984203Sdillon#include <sys/cdefs.h> 3084203Sdillon__FBSDID("$FreeBSD: head/lib/libfetch/fetch.c 90267 2002-02-05 22:13:51Z des $"); 3184203Sdillon 3237535Sdes#include <sys/param.h> 3340975Sdes#include <sys/errno.h> 3437535Sdes 3537535Sdes#include <ctype.h> 3637535Sdes#include <stdio.h> 3737535Sdes#include <stdlib.h> 3837535Sdes#include <string.h> 3937535Sdes 4037535Sdes#include "fetch.h" 4140975Sdes#include "common.h" 4237535Sdes 4377238Sdesauth_t fetchAuthMethod; 4460924Sdesint fetchLastErrCode; 4560924Sdeschar fetchLastErrString[MAXERRSTRING]; 4660924Sdesint fetchTimeout; 4763334Sdesint fetchRestartCalls = 1; 4887560Sdesint fetchDebug; 4941862Sdes 5041862Sdes 5140975Sdes/*** Local data **************************************************************/ 5237535Sdes 5340939Sdes/* 5440975Sdes * Error messages for parser errors 5540975Sdes */ 5640975Sdes#define URL_MALFORMED 1 5740975Sdes#define URL_BAD_SCHEME 2 5840975Sdes#define URL_BAD_PORT 3 5940975Sdesstatic struct fetcherr _url_errlist[] = { 6090267Sdes { URL_MALFORMED, FETCH_URL, "Malformed URL" }, 6190267Sdes { URL_BAD_SCHEME, FETCH_URL, "Invalid URL scheme" }, 6290267Sdes { URL_BAD_PORT, FETCH_URL, "Invalid server port" }, 6390267Sdes { -1, FETCH_UNKNOWN, "Unknown parser error" } 6440975Sdes}; 6540975Sdes 6640975Sdes 6740975Sdes/*** Public API **************************************************************/ 6840975Sdes 6940975Sdes/* 7040939Sdes * Select the appropriate protocol for the URL scheme, and return a 7140939Sdes * read-only stream connected to the document referenced by the URL. 7263340Sdes * Also fill out the struct url_stat. 7340939Sdes */ 7438394SdesFILE * 7575891SarchiefetchXGet(struct url *URL, struct url_stat *us, const char *flags) 7638394Sdes{ 7790267Sdes int direct; 7860587Sume 7990267Sdes direct = CHECK_FLAG('d'); 8090267Sdes if (strcasecmp(URL->scheme, SCHEME_FILE) == 0) 8190267Sdes return (fetchXGetFile(URL, us, flags)); 8290267Sdes else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0) 8390267Sdes return (fetchXGetHTTP(URL, us, flags)); 8490267Sdes else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0) { 8590267Sdes return (fetchXGetFTP(URL, us, flags)); 8690267Sdes } else { 8790267Sdes _url_seterr(URL_BAD_SCHEME); 8890267Sdes return (NULL); 8990267Sdes } 9038394Sdes} 9138394Sdes 9240939Sdes/* 9340939Sdes * Select the appropriate protocol for the URL scheme, and return a 9463340Sdes * read-only stream connected to the document referenced by the URL. 9563340Sdes */ 9663340SdesFILE * 9775891SarchiefetchGet(struct url *URL, const char *flags) 9863340Sdes{ 9990267Sdes return (fetchXGet(URL, NULL, flags)); 10063340Sdes} 10163340Sdes 10263340Sdes/* 10363340Sdes * Select the appropriate protocol for the URL scheme, and return a 10440939Sdes * write-only stream connected to the document referenced by the URL. 10540939Sdes */ 10638394SdesFILE * 10775891SarchiefetchPut(struct url *URL, const char *flags) 10838394Sdes{ 10990267Sdes int direct; 11060587Sume 11190267Sdes direct = CHECK_FLAG('d'); 11290267Sdes if (strcasecmp(URL->scheme, SCHEME_FILE) == 0) 11390267Sdes return (fetchPutFile(URL, flags)); 11490267Sdes else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0) 11590267Sdes return (fetchPutHTTP(URL, flags)); 11690267Sdes else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0) { 11790267Sdes return (fetchPutFTP(URL, flags)); 11890267Sdes } else { 11990267Sdes _url_seterr(URL_BAD_SCHEME); 12090267Sdes return (NULL); 12190267Sdes } 12238394Sdes} 12338394Sdes 12440939Sdes/* 12540975Sdes * Select the appropriate protocol for the URL scheme, and return the 12640975Sdes * size of the document referenced by the URL if it exists. 12740975Sdes */ 12840975Sdesint 12975891SarchiefetchStat(struct url *URL, struct url_stat *us, const char *flags) 13040975Sdes{ 13190267Sdes int direct; 13260587Sume 13390267Sdes direct = CHECK_FLAG('d'); 13490267Sdes if (strcasecmp(URL->scheme, SCHEME_FILE) == 0) 13590267Sdes return (fetchStatFile(URL, us, flags)); 13690267Sdes else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0) 13790267Sdes return (fetchStatHTTP(URL, us, flags)); 13890267Sdes else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0) 13990267Sdes return (fetchStatFTP(URL, us, flags)); 14040975Sdes _url_seterr(URL_BAD_SCHEME); 14190267Sdes return (-1); 14240975Sdes} 14340975Sdes 14440975Sdes/* 14541989Sdes * Select the appropriate protocol for the URL scheme, and return a 14641989Sdes * list of files in the directory pointed to by the URL. 14741989Sdes */ 14841989Sdesstruct url_ent * 14975891SarchiefetchList(struct url *URL, const char *flags) 15041989Sdes{ 15190267Sdes int direct; 15260587Sume 15390267Sdes direct = CHECK_FLAG('d'); 15490267Sdes if (strcasecmp(URL->scheme, SCHEME_FILE) == 0) 15590267Sdes return (fetchListFile(URL, flags)); 15690267Sdes else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0) 15790267Sdes return (fetchListHTTP(URL, flags)); 15890267Sdes else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0) 15990267Sdes return (fetchListFTP(URL, flags)); 16041989Sdes _url_seterr(URL_BAD_SCHEME); 16190267Sdes return (NULL); 16241989Sdes} 16341989Sdes 16441989Sdes/* 16563340Sdes * Attempt to parse the given URL; if successful, call fetchXGet(). 16640939Sdes */ 16737535SdesFILE * 16875891SarchiefetchXGetURL(const char *URL, struct url_stat *us, const char *flags) 16937535Sdes{ 17090267Sdes struct url *u; 17190267Sdes FILE *f; 17238394Sdes 17390267Sdes if ((u = fetchParseURL(URL)) == NULL) 17490267Sdes return (NULL); 17590267Sdes 17690267Sdes f = fetchXGet(u, us, flags); 17790267Sdes 17890267Sdes fetchFreeURL(u); 17990267Sdes return (f); 18037535Sdes} 18137535Sdes 18263340Sdes/* 18363340Sdes * Attempt to parse the given URL; if successful, call fetchGet(). 18463340Sdes */ 18563340SdesFILE * 18675891SarchiefetchGetURL(const char *URL, const char *flags) 18763340Sdes{ 18890267Sdes return (fetchXGetURL(URL, NULL, flags)); 18963340Sdes} 19037535Sdes 19140939Sdes/* 19240939Sdes * Attempt to parse the given URL; if successful, call fetchPut(). 19340939Sdes */ 19437535SdesFILE * 19575891SarchiefetchPutURL(const char *URL, const char *flags) 19637535Sdes{ 19790267Sdes struct url *u; 19890267Sdes FILE *f; 19990267Sdes 20090267Sdes if ((u = fetchParseURL(URL)) == NULL) 20190267Sdes return (NULL); 20290267Sdes 20390267Sdes f = fetchPut(u, flags); 20490267Sdes 20590267Sdes fetchFreeURL(u); 20690267Sdes return (f); 20737535Sdes} 20837535Sdes 20937535Sdes/* 21040975Sdes * Attempt to parse the given URL; if successful, call fetchStat(). 21140975Sdes */ 21240975Sdesint 21375891SarchiefetchStatURL(const char *URL, struct url_stat *us, const char *flags) 21440975Sdes{ 21590267Sdes struct url *u; 21690267Sdes int s; 21740975Sdes 21890267Sdes if ((u = fetchParseURL(URL)) == NULL) 21990267Sdes return (-1); 22040975Sdes 22190267Sdes s = fetchStat(u, us, flags); 22240975Sdes 22390267Sdes fetchFreeURL(u); 22490267Sdes return (s); 22540975Sdes} 22640975Sdes 22740975Sdes/* 22841989Sdes * Attempt to parse the given URL; if successful, call fetchList(). 22941989Sdes */ 23041989Sdesstruct url_ent * 23175891SarchiefetchListURL(const char *URL, const char *flags) 23241989Sdes{ 23390267Sdes struct url *u; 23490267Sdes struct url_ent *ue; 23541989Sdes 23690267Sdes if ((u = fetchParseURL(URL)) == NULL) 23790267Sdes return (NULL); 23841989Sdes 23990267Sdes ue = fetchList(u, flags); 24041989Sdes 24190267Sdes fetchFreeURL(u); 24290267Sdes return (ue); 24341989Sdes} 24441989Sdes 24541989Sdes/* 24660927Sdes * Make a URL 24760927Sdes */ 24860927Sdesstruct url * 24975891SarchiefetchMakeURL(const char *scheme, const char *host, int port, const char *doc, 25075891Sarchie const char *user, const char *pwd) 25160927Sdes{ 25290267Sdes struct url *u; 25360927Sdes 25490267Sdes if (!scheme || (!host && !doc)) { 25590267Sdes _url_seterr(URL_MALFORMED); 25690267Sdes return (NULL); 25790267Sdes } 25860927Sdes 25990267Sdes if (port < 0 || port > 65535) { 26090267Sdes _url_seterr(URL_BAD_PORT); 26190267Sdes return (NULL); 26290267Sdes } 26390267Sdes 26490267Sdes /* allocate struct url */ 26590267Sdes if ((u = calloc(1, sizeof *u)) == NULL) { 26690267Sdes _fetch_syserr(); 26790267Sdes return (NULL); 26890267Sdes } 26990267Sdes 27090267Sdes if ((u->doc = strdup(doc ? doc : "/")) == NULL) { 27190267Sdes _fetch_syserr(); 27290267Sdes free(u); 27390267Sdes return (NULL); 27490267Sdes } 27590267Sdes 27660927Sdes#define seturl(x) snprintf(u->x, sizeof u->x, "%s", x) 27790267Sdes seturl(scheme); 27890267Sdes seturl(host); 27990267Sdes seturl(user); 28090267Sdes seturl(pwd); 28160927Sdes#undef seturl 28290267Sdes u->port = port; 28360927Sdes 28490267Sdes return (u); 28560927Sdes} 28660927Sdes 28760927Sdes/* 28837535Sdes * Split an URL into components. URL syntax is: 28967042Sdes * [method:/][/[user[:pwd]@]host[:port]/][document] 29037535Sdes * This almost, but not quite, RFC1738 URL syntax. 29137535Sdes */ 29240975Sdesstruct url * 29375891SarchiefetchParseURL(const char *URL) 29437535Sdes{ 29590267Sdes char *doc; 29690267Sdes const char *p, *q; 29790267Sdes struct url *u; 29890267Sdes int i; 29937535Sdes 30090267Sdes /* allocate struct url */ 30190267Sdes if ((u = calloc(1, sizeof *u)) == NULL) { 30290267Sdes _fetch_syserr(); 30390267Sdes return (NULL); 30490267Sdes } 30537535Sdes 30690267Sdes /* scheme name */ 30790267Sdes if ((p = strstr(URL, ":/"))) { 30890267Sdes snprintf(u->scheme, URL_SCHEMELEN+1, 30990267Sdes "%.*s", (int)(p - URL), URL); 31090267Sdes URL = ++p; 31190267Sdes /* 31290267Sdes * Only one slash: no host, leave slash as part of document 31390267Sdes * Two slashes: host follows, strip slashes 31490267Sdes */ 31590267Sdes if (URL[1] == '/') 31690267Sdes URL = (p += 2); 31790267Sdes } else { 31890267Sdes p = URL; 31990267Sdes } 32090267Sdes if (!*URL || *URL == '/' || *URL == '.' || 32190267Sdes (u->scheme[0] == '\0' && 32290267Sdes strchr(URL, '/') == NULL && strchr(URL, ':') == NULL)) 32390267Sdes goto nohost; 32437535Sdes 32590267Sdes p = strpbrk(URL, "/@"); 32690267Sdes if (p && *p == '@') { 32790267Sdes /* username */ 32890267Sdes for (q = URL, i = 0; (*q != ':') && (*q != '@'); q++) 32990267Sdes if (i < URL_USERLEN) 33090267Sdes u->user[i++] = *q; 33179423Sdes 33290267Sdes /* password */ 33390267Sdes if (*q == ':') 33490267Sdes for (q++, i = 0; (*q != ':') && (*q != '@'); q++) 33590267Sdes if (i < URL_PWDLEN) 33690267Sdes u->pwd[i++] = *q; 33790267Sdes 33890267Sdes p++; 33990267Sdes } else { 34090267Sdes p = URL; 34190267Sdes } 34290267Sdes 34390267Sdes /* hostname */ 34460737Sume#ifdef INET6 34590267Sdes if (*p == '[' && (q = strchr(p + 1, ']')) != NULL && 34690267Sdes (*++q == '\0' || *q == '/' || *q == ':')) { 34790267Sdes if ((i = q - p - 2) > MAXHOSTNAMELEN) 34890267Sdes i = MAXHOSTNAMELEN; 34990267Sdes strncpy(u->host, ++p, i); 35090267Sdes p = q; 35190267Sdes } else 35260737Sume#endif 35390267Sdes for (i = 0; *p && (*p != '/') && (*p != ':'); p++) 35490267Sdes if (i < MAXHOSTNAMELEN) 35590267Sdes u->host[i++] = *p; 35637535Sdes 35790267Sdes /* port */ 35890267Sdes if (*p == ':') { 35990267Sdes for (q = ++p; *q && (*q != '/'); q++) 36090267Sdes if (isdigit(*q)) 36190267Sdes u->port = u->port * 10 + (*q - '0'); 36290267Sdes else { 36390267Sdes /* invalid port */ 36490267Sdes _url_seterr(URL_BAD_PORT); 36590267Sdes goto ouch; 36690267Sdes } 36790267Sdes p = q; 36890267Sdes } 36937535Sdes 37037535Sdesnohost: 37190267Sdes /* document */ 37290267Sdes if (!*p) 37390267Sdes p = "/"; 37467419Sdes 37590267Sdes if (strcasecmp(u->scheme, SCHEME_HTTP) == 0 || 37690267Sdes strcasecmp(u->scheme, SCHEME_HTTPS) == 0) { 37790267Sdes const char hexnums[] = "0123456789abcdef"; 37890267Sdes 37990267Sdes /* percent-escape whitespace. */ 38090267Sdes if ((doc = malloc(strlen(p) * 3 + 1)) == NULL) { 38190267Sdes _fetch_syserr(); 38290267Sdes goto ouch; 38390267Sdes } 38490267Sdes u->doc = doc; 38590267Sdes while (*p != '\0') { 38690267Sdes if (!isspace(*p)) { 38790267Sdes *doc++ = *p++; 38890267Sdes } else { 38990267Sdes *doc++ = '%'; 39090267Sdes *doc++ = hexnums[((unsigned int)*p) >> 4]; 39190267Sdes *doc++ = hexnums[((unsigned int)*p) & 0xf]; 39290267Sdes p++; 39390267Sdes } 39490267Sdes } 39590267Sdes *doc = '\0'; 39690267Sdes } else if ((u->doc = strdup(p)) == NULL) { 39790267Sdes _fetch_syserr(); 39890267Sdes goto ouch; 39967419Sdes } 40090267Sdes 40190267Sdes DEBUG(fprintf(stderr, 40288769Sdes "scheme: [%s]\n" 40388769Sdes "user: [%s]\n" 40488769Sdes "password: [%s]\n" 40588769Sdes "host: [%s]\n" 40688769Sdes "port: [%d]\n" 40788769Sdes "document: [%s]\n", 40837535Sdes u->scheme, u->user, u->pwd, 40937535Sdes u->host, u->port, u->doc)); 41037535Sdes 41190267Sdes return (u); 41290267Sdes 41337535Sdesouch: 41490267Sdes free(u); 41590267Sdes return (NULL); 41637535Sdes} 41760376Sdes 41860376Sdes/* 41960376Sdes * Free a URL 42060376Sdes */ 42160376Sdesvoid 42260376SdesfetchFreeURL(struct url *u) 42360376Sdes{ 42490267Sdes free(u->doc); 42590267Sdes free(u); 42660376Sdes} 427