Deleted Added
full compact
fetch.c (174761) fetch.c (186241)
1/*-
2 * Copyright (c) 1998-2004 Dag-Erling Co�dan Sm�rgrav
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer
10 * in this position and unchanged.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * 3. The name of the author may not be used to endorse or promote products
15 * derived from this software without specific prior written permission
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29#include <sys/cdefs.h>
1/*-
2 * Copyright (c) 1998-2004 Dag-Erling Co�dan Sm�rgrav
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer
10 * in this position and unchanged.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * 3. The name of the author may not be used to endorse or promote products
15 * derived from this software without specific prior written permission
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29#include <sys/cdefs.h>
30__FBSDID("$FreeBSD: head/lib/libfetch/fetch.c 174761 2007-12-19 00:26:36Z des $");
30__FBSDID("$FreeBSD: head/lib/libfetch/fetch.c 186241 2008-12-17 18:00:18Z murray $");
31
32#include <sys/param.h>
33#include <sys/errno.h>
34
35#include <ctype.h>
36#include <stdio.h>
37#include <stdlib.h>
38#include <string.h>
39
40#include "fetch.h"
41#include "common.h"
42
43auth_t fetchAuthMethod;
44int fetchLastErrCode;
45char fetchLastErrString[MAXERRSTRING];
46int fetchTimeout;
47int fetchRestartCalls = 1;
48int fetchDebug;
49
50
51/*** Local data **************************************************************/
52
53/*
54 * Error messages for parser errors
55 */
56#define URL_MALFORMED 1
57#define URL_BAD_SCHEME 2
58#define URL_BAD_PORT 3
59static struct fetcherr url_errlist[] = {
60 { URL_MALFORMED, FETCH_URL, "Malformed URL" },
61 { URL_BAD_SCHEME, FETCH_URL, "Invalid URL scheme" },
62 { URL_BAD_PORT, FETCH_URL, "Invalid server port" },
63 { -1, FETCH_UNKNOWN, "Unknown parser error" }
64};
65
66
67/*** Public API **************************************************************/
68
69/*
70 * Select the appropriate protocol for the URL scheme, and return a
71 * read-only stream connected to the document referenced by the URL.
72 * Also fill out the struct url_stat.
73 */
74FILE *
75fetchXGet(struct url *URL, struct url_stat *us, const char *flags)
76{
31
32#include <sys/param.h>
33#include <sys/errno.h>
34
35#include <ctype.h>
36#include <stdio.h>
37#include <stdlib.h>
38#include <string.h>
39
40#include "fetch.h"
41#include "common.h"
42
43auth_t fetchAuthMethod;
44int fetchLastErrCode;
45char fetchLastErrString[MAXERRSTRING];
46int fetchTimeout;
47int fetchRestartCalls = 1;
48int fetchDebug;
49
50
51/*** Local data **************************************************************/
52
53/*
54 * Error messages for parser errors
55 */
56#define URL_MALFORMED 1
57#define URL_BAD_SCHEME 2
58#define URL_BAD_PORT 3
59static struct fetcherr url_errlist[] = {
60 { URL_MALFORMED, FETCH_URL, "Malformed URL" },
61 { URL_BAD_SCHEME, FETCH_URL, "Invalid URL scheme" },
62 { URL_BAD_PORT, FETCH_URL, "Invalid server port" },
63 { -1, FETCH_UNKNOWN, "Unknown parser error" }
64};
65
66
67/*** Public API **************************************************************/
68
69/*
70 * Select the appropriate protocol for the URL scheme, and return a
71 * read-only stream connected to the document referenced by the URL.
72 * Also fill out the struct url_stat.
73 */
74FILE *
75fetchXGet(struct url *URL, struct url_stat *us, const char *flags)
76{
77 int direct;
78
77
79 direct = CHECK_FLAG('d');
80 if (us != NULL) {
81 us->size = -1;
82 us->atime = us->mtime = 0;
83 }
84 if (strcasecmp(URL->scheme, SCHEME_FILE) == 0)
85 return (fetchXGetFile(URL, us, flags));
86 else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0)
87 return (fetchXGetFTP(URL, us, flags));
88 else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0)
89 return (fetchXGetHTTP(URL, us, flags));
90 else if (strcasecmp(URL->scheme, SCHEME_HTTPS) == 0)
91 return (fetchXGetHTTP(URL, us, flags));
92 url_seterr(URL_BAD_SCHEME);
93 return (NULL);
94}
95
96/*
97 * Select the appropriate protocol for the URL scheme, and return a
98 * read-only stream connected to the document referenced by the URL.
99 */
100FILE *
101fetchGet(struct url *URL, const char *flags)
102{
103 return (fetchXGet(URL, NULL, flags));
104}
105
106/*
107 * Select the appropriate protocol for the URL scheme, and return a
108 * write-only stream connected to the document referenced by the URL.
109 */
110FILE *
111fetchPut(struct url *URL, const char *flags)
112{
78 if (us != NULL) {
79 us->size = -1;
80 us->atime = us->mtime = 0;
81 }
82 if (strcasecmp(URL->scheme, SCHEME_FILE) == 0)
83 return (fetchXGetFile(URL, us, flags));
84 else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0)
85 return (fetchXGetFTP(URL, us, flags));
86 else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0)
87 return (fetchXGetHTTP(URL, us, flags));
88 else if (strcasecmp(URL->scheme, SCHEME_HTTPS) == 0)
89 return (fetchXGetHTTP(URL, us, flags));
90 url_seterr(URL_BAD_SCHEME);
91 return (NULL);
92}
93
94/*
95 * Select the appropriate protocol for the URL scheme, and return a
96 * read-only stream connected to the document referenced by the URL.
97 */
98FILE *
99fetchGet(struct url *URL, const char *flags)
100{
101 return (fetchXGet(URL, NULL, flags));
102}
103
104/*
105 * Select the appropriate protocol for the URL scheme, and return a
106 * write-only stream connected to the document referenced by the URL.
107 */
108FILE *
109fetchPut(struct url *URL, const char *flags)
110{
113 int direct;
114
111
115 direct = CHECK_FLAG('d');
116 if (strcasecmp(URL->scheme, SCHEME_FILE) == 0)
117 return (fetchPutFile(URL, flags));
118 else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0)
119 return (fetchPutFTP(URL, flags));
120 else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0)
121 return (fetchPutHTTP(URL, flags));
122 else if (strcasecmp(URL->scheme, SCHEME_HTTPS) == 0)
123 return (fetchPutHTTP(URL, flags));
124 url_seterr(URL_BAD_SCHEME);
125 return (NULL);
126}
127
128/*
129 * Select the appropriate protocol for the URL scheme, and return the
130 * size of the document referenced by the URL if it exists.
131 */
132int
133fetchStat(struct url *URL, struct url_stat *us, const char *flags)
134{
112 if (strcasecmp(URL->scheme, SCHEME_FILE) == 0)
113 return (fetchPutFile(URL, flags));
114 else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0)
115 return (fetchPutFTP(URL, flags));
116 else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0)
117 return (fetchPutHTTP(URL, flags));
118 else if (strcasecmp(URL->scheme, SCHEME_HTTPS) == 0)
119 return (fetchPutHTTP(URL, flags));
120 url_seterr(URL_BAD_SCHEME);
121 return (NULL);
122}
123
124/*
125 * Select the appropriate protocol for the URL scheme, and return the
126 * size of the document referenced by the URL if it exists.
127 */
128int
129fetchStat(struct url *URL, struct url_stat *us, const char *flags)
130{
135 int direct;
136
131
137 direct = CHECK_FLAG('d');
138 if (us != NULL) {
139 us->size = -1;
140 us->atime = us->mtime = 0;
141 }
142 if (strcasecmp(URL->scheme, SCHEME_FILE) == 0)
143 return (fetchStatFile(URL, us, flags));
144 else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0)
145 return (fetchStatFTP(URL, us, flags));
146 else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0)
147 return (fetchStatHTTP(URL, us, flags));
148 else if (strcasecmp(URL->scheme, SCHEME_HTTPS) == 0)
149 return (fetchStatHTTP(URL, us, flags));
150 url_seterr(URL_BAD_SCHEME);
151 return (-1);
152}
153
154/*
155 * Select the appropriate protocol for the URL scheme, and return a
156 * list of files in the directory pointed to by the URL.
157 */
158struct url_ent *
159fetchList(struct url *URL, const char *flags)
160{
132 if (us != NULL) {
133 us->size = -1;
134 us->atime = us->mtime = 0;
135 }
136 if (strcasecmp(URL->scheme, SCHEME_FILE) == 0)
137 return (fetchStatFile(URL, us, flags));
138 else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0)
139 return (fetchStatFTP(URL, us, flags));
140 else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0)
141 return (fetchStatHTTP(URL, us, flags));
142 else if (strcasecmp(URL->scheme, SCHEME_HTTPS) == 0)
143 return (fetchStatHTTP(URL, us, flags));
144 url_seterr(URL_BAD_SCHEME);
145 return (-1);
146}
147
148/*
149 * Select the appropriate protocol for the URL scheme, and return a
150 * list of files in the directory pointed to by the URL.
151 */
152struct url_ent *
153fetchList(struct url *URL, const char *flags)
154{
161 int direct;
162
155
163 direct = CHECK_FLAG('d');
164 if (strcasecmp(URL->scheme, SCHEME_FILE) == 0)
165 return (fetchListFile(URL, flags));
166 else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0)
167 return (fetchListFTP(URL, flags));
168 else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0)
169 return (fetchListHTTP(URL, flags));
170 else if (strcasecmp(URL->scheme, SCHEME_HTTPS) == 0)
171 return (fetchListHTTP(URL, flags));
172 url_seterr(URL_BAD_SCHEME);
173 return (NULL);
174}
175
176/*
177 * Attempt to parse the given URL; if successful, call fetchXGet().
178 */
179FILE *
180fetchXGetURL(const char *URL, struct url_stat *us, const char *flags)
181{
182 struct url *u;
183 FILE *f;
184
185 if ((u = fetchParseURL(URL)) == NULL)
186 return (NULL);
187
188 f = fetchXGet(u, us, flags);
189
190 fetchFreeURL(u);
191 return (f);
192}
193
194/*
195 * Attempt to parse the given URL; if successful, call fetchGet().
196 */
197FILE *
198fetchGetURL(const char *URL, const char *flags)
199{
200 return (fetchXGetURL(URL, NULL, flags));
201}
202
203/*
204 * Attempt to parse the given URL; if successful, call fetchPut().
205 */
206FILE *
207fetchPutURL(const char *URL, const char *flags)
208{
209 struct url *u;
210 FILE *f;
211
212 if ((u = fetchParseURL(URL)) == NULL)
213 return (NULL);
214
215 f = fetchPut(u, flags);
216
217 fetchFreeURL(u);
218 return (f);
219}
220
221/*
222 * Attempt to parse the given URL; if successful, call fetchStat().
223 */
224int
225fetchStatURL(const char *URL, struct url_stat *us, const char *flags)
226{
227 struct url *u;
228 int s;
229
230 if ((u = fetchParseURL(URL)) == NULL)
231 return (-1);
232
233 s = fetchStat(u, us, flags);
234
235 fetchFreeURL(u);
236 return (s);
237}
238
239/*
240 * Attempt to parse the given URL; if successful, call fetchList().
241 */
242struct url_ent *
243fetchListURL(const char *URL, const char *flags)
244{
245 struct url *u;
246 struct url_ent *ue;
247
248 if ((u = fetchParseURL(URL)) == NULL)
249 return (NULL);
250
251 ue = fetchList(u, flags);
252
253 fetchFreeURL(u);
254 return (ue);
255}
256
257/*
258 * Make a URL
259 */
260struct url *
261fetchMakeURL(const char *scheme, const char *host, int port, const char *doc,
262 const char *user, const char *pwd)
263{
264 struct url *u;
265
266 if (!scheme || (!host && !doc)) {
267 url_seterr(URL_MALFORMED);
268 return (NULL);
269 }
270
271 if (port < 0 || port > 65535) {
272 url_seterr(URL_BAD_PORT);
273 return (NULL);
274 }
275
276 /* allocate struct url */
277 if ((u = calloc(1, sizeof(*u))) == NULL) {
278 fetch_syserr();
279 return (NULL);
280 }
281
282 if ((u->doc = strdup(doc ? doc : "/")) == NULL) {
283 fetch_syserr();
284 free(u);
285 return (NULL);
286 }
287
288#define seturl(x) snprintf(u->x, sizeof(u->x), "%s", x)
289 seturl(scheme);
290 seturl(host);
291 seturl(user);
292 seturl(pwd);
293#undef seturl
294 u->port = port;
295
296 return (u);
297}
298
299/*
300 * Split an URL into components. URL syntax is:
301 * [method:/][/[user[:pwd]@]host[:port]/][document]
302 * This almost, but not quite, RFC1738 URL syntax.
303 */
304struct url *
305fetchParseURL(const char *URL)
306{
307 char *doc;
308 const char *p, *q;
309 struct url *u;
310 int i;
311
312 /* allocate struct url */
313 if ((u = calloc(1, sizeof(*u))) == NULL) {
314 fetch_syserr();
315 return (NULL);
316 }
317
318 /* scheme name */
319 if ((p = strstr(URL, ":/"))) {
320 snprintf(u->scheme, URL_SCHEMELEN+1,
321 "%.*s", (int)(p - URL), URL);
322 URL = ++p;
323 /*
324 * Only one slash: no host, leave slash as part of document
325 * Two slashes: host follows, strip slashes
326 */
327 if (URL[1] == '/')
328 URL = (p += 2);
329 } else {
330 p = URL;
331 }
332 if (!*URL || *URL == '/' || *URL == '.' ||
333 (u->scheme[0] == '\0' &&
334 strchr(URL, '/') == NULL && strchr(URL, ':') == NULL))
335 goto nohost;
336
337 p = strpbrk(URL, "/@");
338 if (p && *p == '@') {
339 /* username */
340 for (q = URL, i = 0; (*q != ':') && (*q != '@'); q++)
341 if (i < URL_USERLEN)
342 u->user[i++] = *q;
343
344 /* password */
345 if (*q == ':')
346 for (q++, i = 0; (*q != ':') && (*q != '@'); q++)
347 if (i < URL_PWDLEN)
348 u->pwd[i++] = *q;
349
350 p++;
351 } else {
352 p = URL;
353 }
354
355 /* hostname */
356#ifdef INET6
357 if (*p == '[' && (q = strchr(p + 1, ']')) != NULL &&
358 (*++q == '\0' || *q == '/' || *q == ':')) {
359 if ((i = q - p - 2) > MAXHOSTNAMELEN)
360 i = MAXHOSTNAMELEN;
361 strncpy(u->host, ++p, i);
362 p = q;
363 } else
364#endif
365 for (i = 0; *p && (*p != '/') && (*p != ':'); p++)
366 if (i < MAXHOSTNAMELEN)
367 u->host[i++] = *p;
368
369 /* port */
370 if (*p == ':') {
371 for (q = ++p; *q && (*q != '/'); q++)
372 if (isdigit((unsigned char)*q))
373 u->port = u->port * 10 + (*q - '0');
374 else {
375 /* invalid port */
376 url_seterr(URL_BAD_PORT);
377 goto ouch;
378 }
379 p = q;
380 }
381
382nohost:
383 /* document */
384 if (!*p)
385 p = "/";
386
387 if (strcasecmp(u->scheme, SCHEME_HTTP) == 0 ||
388 strcasecmp(u->scheme, SCHEME_HTTPS) == 0) {
389 const char hexnums[] = "0123456789abcdef";
390
391 /* percent-escape whitespace. */
392 if ((doc = malloc(strlen(p) * 3 + 1)) == NULL) {
393 fetch_syserr();
394 goto ouch;
395 }
396 u->doc = doc;
397 while (*p != '\0') {
398 if (!isspace((unsigned char)*p)) {
399 *doc++ = *p++;
400 } else {
401 *doc++ = '%';
402 *doc++ = hexnums[((unsigned int)*p) >> 4];
403 *doc++ = hexnums[((unsigned int)*p) & 0xf];
404 p++;
405 }
406 }
407 *doc = '\0';
408 } else if ((u->doc = strdup(p)) == NULL) {
409 fetch_syserr();
410 goto ouch;
411 }
412
413 DEBUG(fprintf(stderr,
414 "scheme: [%s]\n"
415 "user: [%s]\n"
416 "password: [%s]\n"
417 "host: [%s]\n"
418 "port: [%d]\n"
419 "document: [%s]\n",
420 u->scheme, u->user, u->pwd,
421 u->host, u->port, u->doc));
422
423 return (u);
424
425ouch:
426 free(u);
427 return (NULL);
428}
429
430/*
431 * Free a URL
432 */
433void
434fetchFreeURL(struct url *u)
435{
436 free(u->doc);
437 free(u);
438}
156 if (strcasecmp(URL->scheme, SCHEME_FILE) == 0)
157 return (fetchListFile(URL, flags));
158 else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0)
159 return (fetchListFTP(URL, flags));
160 else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0)
161 return (fetchListHTTP(URL, flags));
162 else if (strcasecmp(URL->scheme, SCHEME_HTTPS) == 0)
163 return (fetchListHTTP(URL, flags));
164 url_seterr(URL_BAD_SCHEME);
165 return (NULL);
166}
167
168/*
169 * Attempt to parse the given URL; if successful, call fetchXGet().
170 */
171FILE *
172fetchXGetURL(const char *URL, struct url_stat *us, const char *flags)
173{
174 struct url *u;
175 FILE *f;
176
177 if ((u = fetchParseURL(URL)) == NULL)
178 return (NULL);
179
180 f = fetchXGet(u, us, flags);
181
182 fetchFreeURL(u);
183 return (f);
184}
185
186/*
187 * Attempt to parse the given URL; if successful, call fetchGet().
188 */
189FILE *
190fetchGetURL(const char *URL, const char *flags)
191{
192 return (fetchXGetURL(URL, NULL, flags));
193}
194
195/*
196 * Attempt to parse the given URL; if successful, call fetchPut().
197 */
198FILE *
199fetchPutURL(const char *URL, const char *flags)
200{
201 struct url *u;
202 FILE *f;
203
204 if ((u = fetchParseURL(URL)) == NULL)
205 return (NULL);
206
207 f = fetchPut(u, flags);
208
209 fetchFreeURL(u);
210 return (f);
211}
212
213/*
214 * Attempt to parse the given URL; if successful, call fetchStat().
215 */
216int
217fetchStatURL(const char *URL, struct url_stat *us, const char *flags)
218{
219 struct url *u;
220 int s;
221
222 if ((u = fetchParseURL(URL)) == NULL)
223 return (-1);
224
225 s = fetchStat(u, us, flags);
226
227 fetchFreeURL(u);
228 return (s);
229}
230
231/*
232 * Attempt to parse the given URL; if successful, call fetchList().
233 */
234struct url_ent *
235fetchListURL(const char *URL, const char *flags)
236{
237 struct url *u;
238 struct url_ent *ue;
239
240 if ((u = fetchParseURL(URL)) == NULL)
241 return (NULL);
242
243 ue = fetchList(u, flags);
244
245 fetchFreeURL(u);
246 return (ue);
247}
248
249/*
250 * Make a URL
251 */
252struct url *
253fetchMakeURL(const char *scheme, const char *host, int port, const char *doc,
254 const char *user, const char *pwd)
255{
256 struct url *u;
257
258 if (!scheme || (!host && !doc)) {
259 url_seterr(URL_MALFORMED);
260 return (NULL);
261 }
262
263 if (port < 0 || port > 65535) {
264 url_seterr(URL_BAD_PORT);
265 return (NULL);
266 }
267
268 /* allocate struct url */
269 if ((u = calloc(1, sizeof(*u))) == NULL) {
270 fetch_syserr();
271 return (NULL);
272 }
273
274 if ((u->doc = strdup(doc ? doc : "/")) == NULL) {
275 fetch_syserr();
276 free(u);
277 return (NULL);
278 }
279
280#define seturl(x) snprintf(u->x, sizeof(u->x), "%s", x)
281 seturl(scheme);
282 seturl(host);
283 seturl(user);
284 seturl(pwd);
285#undef seturl
286 u->port = port;
287
288 return (u);
289}
290
291/*
292 * Split an URL into components. URL syntax is:
293 * [method:/][/[user[:pwd]@]host[:port]/][document]
294 * This almost, but not quite, RFC1738 URL syntax.
295 */
296struct url *
297fetchParseURL(const char *URL)
298{
299 char *doc;
300 const char *p, *q;
301 struct url *u;
302 int i;
303
304 /* allocate struct url */
305 if ((u = calloc(1, sizeof(*u))) == NULL) {
306 fetch_syserr();
307 return (NULL);
308 }
309
310 /* scheme name */
311 if ((p = strstr(URL, ":/"))) {
312 snprintf(u->scheme, URL_SCHEMELEN+1,
313 "%.*s", (int)(p - URL), URL);
314 URL = ++p;
315 /*
316 * Only one slash: no host, leave slash as part of document
317 * Two slashes: host follows, strip slashes
318 */
319 if (URL[1] == '/')
320 URL = (p += 2);
321 } else {
322 p = URL;
323 }
324 if (!*URL || *URL == '/' || *URL == '.' ||
325 (u->scheme[0] == '\0' &&
326 strchr(URL, '/') == NULL && strchr(URL, ':') == NULL))
327 goto nohost;
328
329 p = strpbrk(URL, "/@");
330 if (p && *p == '@') {
331 /* username */
332 for (q = URL, i = 0; (*q != ':') && (*q != '@'); q++)
333 if (i < URL_USERLEN)
334 u->user[i++] = *q;
335
336 /* password */
337 if (*q == ':')
338 for (q++, i = 0; (*q != ':') && (*q != '@'); q++)
339 if (i < URL_PWDLEN)
340 u->pwd[i++] = *q;
341
342 p++;
343 } else {
344 p = URL;
345 }
346
347 /* hostname */
348#ifdef INET6
349 if (*p == '[' && (q = strchr(p + 1, ']')) != NULL &&
350 (*++q == '\0' || *q == '/' || *q == ':')) {
351 if ((i = q - p - 2) > MAXHOSTNAMELEN)
352 i = MAXHOSTNAMELEN;
353 strncpy(u->host, ++p, i);
354 p = q;
355 } else
356#endif
357 for (i = 0; *p && (*p != '/') && (*p != ':'); p++)
358 if (i < MAXHOSTNAMELEN)
359 u->host[i++] = *p;
360
361 /* port */
362 if (*p == ':') {
363 for (q = ++p; *q && (*q != '/'); q++)
364 if (isdigit((unsigned char)*q))
365 u->port = u->port * 10 + (*q - '0');
366 else {
367 /* invalid port */
368 url_seterr(URL_BAD_PORT);
369 goto ouch;
370 }
371 p = q;
372 }
373
374nohost:
375 /* document */
376 if (!*p)
377 p = "/";
378
379 if (strcasecmp(u->scheme, SCHEME_HTTP) == 0 ||
380 strcasecmp(u->scheme, SCHEME_HTTPS) == 0) {
381 const char hexnums[] = "0123456789abcdef";
382
383 /* percent-escape whitespace. */
384 if ((doc = malloc(strlen(p) * 3 + 1)) == NULL) {
385 fetch_syserr();
386 goto ouch;
387 }
388 u->doc = doc;
389 while (*p != '\0') {
390 if (!isspace((unsigned char)*p)) {
391 *doc++ = *p++;
392 } else {
393 *doc++ = '%';
394 *doc++ = hexnums[((unsigned int)*p) >> 4];
395 *doc++ = hexnums[((unsigned int)*p) & 0xf];
396 p++;
397 }
398 }
399 *doc = '\0';
400 } else if ((u->doc = strdup(p)) == NULL) {
401 fetch_syserr();
402 goto ouch;
403 }
404
405 DEBUG(fprintf(stderr,
406 "scheme: [%s]\n"
407 "user: [%s]\n"
408 "password: [%s]\n"
409 "host: [%s]\n"
410 "port: [%d]\n"
411 "document: [%s]\n",
412 u->scheme, u->user, u->pwd,
413 u->host, u->port, u->doc));
414
415 return (u);
416
417ouch:
418 free(u);
419 return (NULL);
420}
421
422/*
423 * Free a URL
424 */
425void
426fetchFreeURL(struct url *u)
427{
428 free(u->doc);
429 free(u);
430}