http.c revision 60196
1/*- 2 * Copyright (c) 1998 Dag-Erling Co�dan Sm�rgrav 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer 10 * in this position and unchanged. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 3. The name of the author may not be used to endorse or promote products 15 * derived from this software without specific prior written permission 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 * 28 * $FreeBSD: head/lib/libfetch/http.c 60196 2000-05-07 20:51:31Z des $ 29 */ 30 31/* 32 * The base64 code in this file is based on code from MIT fetch, which 33 * has the following copyright and license: 34 * 35 *- 36 * Copyright 1997 Massachusetts Institute of Technology 37 * 38 * Permission to use, copy, modify, and distribute this software and 39 * its documentation for any purpose and without fee is hereby 40 * granted, provided that both the above copyright notice and this 41 * permission notice appear in all copies, that both the above 42 * copyright notice and this permission notice appear in all 43 * supporting documentation, and that the name of M.I.T. not be used 44 * in advertising or publicity pertaining to distribution of the 45 * software without specific, written prior permission. M.I.T. makes 46 * no representations about the suitability of this software for any 47 * purpose. It is provided "as is" without express or implied 48 * warranty. 49 * 50 * THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''. M.I.T. DISCLAIMS 51 * ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE, 52 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 53 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT 54 * SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 55 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 56 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF 57 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 58 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 59 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 60 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 61 * SUCH DAMAGE. */ 62 63#include <sys/param.h> 64 65#include <err.h> 66#include <ctype.h> 67#include <netdb.h> 68#include <stdarg.h> 69#include <stdio.h> 70#include <stdlib.h> 71#include <string.h> 72#include <unistd.h> 73 74#include "fetch.h" 75#include "common.h" 76#include "httperr.h" 77 78extern char *__progname; 79 80#define ENDL "\r\n" 81 82#define HTTP_OK 200 83#define HTTP_PARTIAL 206 84 85struct cookie 86{ 87 FILE *real_f; 88#define ENC_NONE 0 89#define ENC_CHUNKED 1 90 int encoding; /* 1 = chunked, 0 = none */ 91#define HTTPCTYPELEN 59 92 char content_type[HTTPCTYPELEN+1]; 93 char *buf; 94 int b_cur, eof; 95 unsigned b_len, chunksize; 96}; 97 98/* 99 * Send a formatted line; optionally echo to terminal 100 */ 101static int 102_http_cmd(FILE *f, char *fmt, ...) 103{ 104 va_list ap; 105 106 va_start(ap, fmt); 107 vfprintf(f, fmt, ap); 108#ifndef NDEBUG 109 fprintf(stderr, "\033[1m>>> "); 110 vfprintf(stderr, fmt, ap); 111 fprintf(stderr, "\033[m"); 112#endif 113 va_end(ap); 114 115 return 0; /* XXX */ 116} 117 118/* 119 * Fill the input buffer, do chunk decoding on the fly 120 */ 121static char * 122_http_fillbuf(struct cookie *c) 123{ 124 char *ln; 125 unsigned int len; 126 127 if (c->eof) 128 return NULL; 129 130 if (c->encoding == ENC_NONE) { 131 c->buf = fgetln(c->real_f, &(c->b_len)); 132 c->b_cur = 0; 133 } else if (c->encoding == ENC_CHUNKED) { 134 if (c->chunksize == 0) { 135 ln = fgetln(c->real_f, &len); 136 DEBUG(fprintf(stderr, "\033[1m_http_fillbuf(): new chunk: " 137 "%*.*s\033[m\n", (int)len-2, (int)len-2, ln)); 138 sscanf(ln, "%x", &(c->chunksize)); 139 if (!c->chunksize) { 140 DEBUG(fprintf(stderr, "\033[1m_http_fillbuf(): " 141 "end of last chunk\033[m\n")); 142 c->eof = 1; 143 return NULL; 144 } 145 DEBUG(fprintf(stderr, "\033[1m_http_fillbuf(): " 146 "new chunk: %X\033[m\n", c->chunksize)); 147 } 148 c->buf = fgetln(c->real_f, &(c->b_len)); 149 if (c->b_len > c->chunksize) 150 c->b_len = c->chunksize; 151 c->chunksize -= c->b_len; 152 c->b_cur = 0; 153 } 154 else return NULL; /* unknown encoding */ 155 return c->buf; 156} 157 158/* 159 * Read function 160 */ 161static int 162_http_readfn(struct cookie *c, char *buf, int len) 163{ 164 int l, pos = 0; 165 while (len) { 166 /* empty buffer */ 167 if (!c->buf || (c->b_cur == c->b_len)) 168 if (!_http_fillbuf(c)) 169 break; 170 171 l = c->b_len - c->b_cur; 172 if (len < l) l = len; 173 memcpy(buf + pos, c->buf + c->b_cur, l); 174 c->b_cur += l; 175 pos += l; 176 len -= l; 177 } 178 179 if (ferror(c->real_f)) 180 return -1; 181 else return pos; 182} 183 184/* 185 * Write function 186 */ 187static int 188_http_writefn(struct cookie *c, const char *buf, int len) 189{ 190 size_t r = fwrite(buf, 1, (size_t)len, c->real_f); 191 return r ? r : -1; 192} 193 194/* 195 * Close function 196 */ 197static int 198_http_closefn(struct cookie *c) 199{ 200 int r = fclose(c->real_f); 201 free(c); 202 return (r == EOF) ? -1 : 0; 203} 204 205/* 206 * Extract content type from cookie 207 */ 208char * 209fetchContentType(FILE *f) 210{ 211 /* 212 * We have no way of making sure this really *is* one of our cookies, 213 * so just check for a null pointer and hope for the best. 214 */ 215 return f->_cookie ? (((struct cookie *)f->_cookie)->content_type) : NULL; 216} 217 218/* 219 * Base64 encoding 220 */ 221int 222_http_base64(char *dst, char *src, int l) 223{ 224 static const char base64[] = 225 "ABCDEFGHIJKLMNOPQRSTUVWXYZ" 226 "abcdefghijklmnopqrstuvwxyz" 227 "0123456789+/"; 228 int t, r = 0; 229 230 while (l >= 3) { 231 t = (src[0] << 16) | (src[1] << 8) | src[2]; 232 dst[0] = base64[(t >> 18) & 0x3f]; 233 dst[1] = base64[(t >> 12) & 0x3f]; 234 dst[2] = base64[(t >> 6) & 0x3f]; 235 dst[3] = base64[(t >> 0) & 0x3f]; 236 src += 3; l -= 3; 237 dst += 4; r += 4; 238 } 239 240 switch (l) { 241 case 2: 242 t = (src[0] << 16) | (src[1] << 8); 243 dst[0] = base64[(t >> 18) & 0x3f]; 244 dst[1] = base64[(t >> 12) & 0x3f]; 245 dst[2] = base64[(t >> 6) & 0x3f]; 246 dst[3] = '='; 247 dst += 4; 248 r += 4; 249 break; 250 case 1: 251 t = src[0] << 16; 252 dst[0] = base64[(t >> 18) & 0x3f]; 253 dst[1] = base64[(t >> 12) & 0x3f]; 254 dst[2] = dst[3] = '='; 255 dst += 4; 256 r += 4; 257 break; 258 case 0: 259 break; 260 } 261 262 *dst = 0; 263 return r; 264} 265 266/* 267 * Encode username and password 268 */ 269char * 270_http_auth(char *usr, char *pwd) 271{ 272 int len, lu, lp; 273 char *str, *s; 274 275 lu = strlen(usr); 276 lp = strlen(pwd); 277 278 len = (lu * 4 + 2) / 3 /* user name, round up */ 279 + 1 /* colon */ 280 + (lp * 4 + 2) / 3 /* password, round up */ 281 + 1; /* null */ 282 283 if ((s = str = (char *)malloc(len)) == NULL) 284 return NULL; 285 286 s += _http_base64(s, usr, lu); 287 *s++ = ':'; 288 s += _http_base64(s, pwd, lp); 289 *s = 0; 290 291 return str; 292} 293 294/* 295 * Retrieve a file by HTTP 296 */ 297FILE * 298fetchGetHTTP(struct url *URL, char *flags) 299{ 300 int sd = -1, e, i, enc = ENC_NONE, direct, verbose; 301 struct cookie *c; 302 char *ln, *p, *px, *q; 303 FILE *f, *cf; 304 size_t len; 305 off_t pos = 0; 306 307 direct = (flags && strchr(flags, 'd')); 308 verbose = (flags && strchr(flags, 'v')); 309 310 /* allocate cookie */ 311 if ((c = calloc(1, sizeof *c)) == NULL) 312 return NULL; 313 314 /* check port */ 315 if (!URL->port) { 316 struct servent *se; 317 318 if ((se = getservbyname("http", "tcp")) != NULL) 319 URL->port = ntohs(se->s_port); 320 else 321 URL->port = 80; 322 } 323 324 /* attempt to connect to proxy server */ 325 if (!direct && (px = getenv("HTTP_PROXY")) != NULL) { 326 char host[MAXHOSTNAMELEN]; 327 int port = 0; 328 329 /* measure length */ 330 len = strcspn(px, ":"); 331 332 /* get port (XXX atoi is a little too tolerant perhaps?) */ 333 if (px[len] == ':') { 334 if (strspn(px+len+1, "0123456789") != strlen(px+len+1) 335 || strlen(px+len+1) > 5) { 336 /* XXX we should emit some kind of warning */ 337 } 338 port = atoi(px+len+1); 339 if (port < 1 || port > 65535) { 340 /* XXX we should emit some kind of warning */ 341 } 342 } 343 if (!port) { 344#if 0 345 /* 346 * commented out, since there is currently no service name 347 * for HTTP proxies 348 */ 349 struct servent *se; 350 351 if ((se = getservbyname("xxxx", "tcp")) != NULL) 352 port = ntohs(se->s_port); 353 else 354#endif 355 port = 3128; 356 } 357 358 /* get host name */ 359 if (len >= MAXHOSTNAMELEN) 360 len = MAXHOSTNAMELEN - 1; 361 strncpy(host, px, len); 362 host[len] = 0; 363 364 /* connect */ 365 sd = _fetch_connect(host, port, verbose); 366 } 367 368 /* if no proxy is configured or could be contacted, try direct */ 369 if (sd == -1) { 370 if ((sd = _fetch_connect(URL->host, URL->port, verbose)) == -1) 371 goto ouch; 372 } 373 374 /* reopen as stream */ 375 if ((f = fdopen(sd, "r+")) == NULL) 376 goto ouch; 377 c->real_f = f; 378 379 /* send request (proxies require absolute form, so use that) */ 380 if (verbose) 381 _fetch_info("requesting http://%s:%d%s", 382 URL->host, URL->port, URL->doc); 383 _http_cmd(f, "GET http://%s:%d%s HTTP/1.1" ENDL, 384 URL->host, URL->port, URL->doc); 385 386 /* start sending headers away */ 387 if (URL->user[0] || URL->pwd[0]) { 388 char *auth_str = _http_auth(URL->user, URL->pwd); 389 if (!auth_str) 390 goto fouch; 391 _http_cmd(f, "Authorization: Basic %s" ENDL, auth_str); 392 free(auth_str); 393 } 394 _http_cmd(f, "Host: %s:%d" ENDL, URL->host, URL->port); 395 _http_cmd(f, "User-Agent: %s " _LIBFETCH_VER ENDL, __progname); 396 if (URL->offset) 397 _http_cmd(f, "Range: bytes=%lld-" ENDL, URL->offset); 398 _http_cmd(f, "Connection: close" ENDL ENDL); 399 400 /* get response */ 401 if ((ln = fgetln(f, &len)) == NULL) 402 goto fouch; 403 DEBUG(fprintf(stderr, "response: [\033[1m%*.*s\033[m]\n", 404 (int)len-2, (int)len-2, ln)); 405 406 /* we can't use strchr() and friends since ln isn't NUL-terminated */ 407 p = ln; 408 while ((p < ln + len) && !isspace(*p)) 409 p++; 410 while ((p < ln + len) && !isdigit(*p)) 411 p++; 412 if (!isdigit(*p)) 413 goto fouch; 414 e = atoi(p); 415 DEBUG(fprintf(stderr, "code: [\033[1m%d\033[m]\n", e)); 416 417 /* add code to handle redirects later */ 418 if (e != (URL->offset ? HTTP_PARTIAL : HTTP_OK)) { 419 _http_seterr(e); 420 goto fouch; 421 } 422 423 /* browse through header */ 424 while (1) { 425 if ((ln = fgetln(f, &len)) == NULL) 426 goto fouch; 427 if ((ln[0] == '\r') || (ln[0] == '\n')) 428 break; 429 DEBUG(fprintf(stderr, "header: [\033[1m%*.*s\033[m]\n", 430 (int)len-2, (int)len-2, ln)); 431#define XFERENC "Transfer-Encoding:" 432 if (strncasecmp(ln, XFERENC, sizeof XFERENC - 1) == 0) { 433 p = ln + sizeof XFERENC - 1; 434 while ((p < ln + len) && isspace(*p)) 435 p++; 436 for (q = p; (q < ln + len) && !isspace(*q); q++) 437 /* VOID */ ; 438 *q = 0; 439 if (strcasecmp(p, "chunked") == 0) 440 enc = ENC_CHUNKED; 441 DEBUG(fprintf(stderr, "xferenc: [\033[1m%s\033[m]\n", p)); 442#undef XFERENC 443#define CONTTYPE "Content-Type:" 444 } else if (strncasecmp(ln, CONTTYPE, sizeof CONTTYPE - 1) == 0) { 445 p = ln + sizeof CONTTYPE - 1; 446 while ((p < ln + len) && isspace(*p)) 447 p++; 448 for (i = 0; p < ln + len; p++) 449 if (i < HTTPCTYPELEN) 450 c->content_type[i++] = *p; 451 do c->content_type[i--] = 0; while (isspace(c->content_type[i])); 452 DEBUG(fprintf(stderr, "conttype: [\033[1m%s\033[m]\n", 453 c->content_type)); 454#undef CONTTYPE 455#define CONTRANGE "Content-Range:" 456#define BYTES "bytes " 457 } else if (strncasecmp(ln, CONTRANGE, sizeof CONTRANGE - 1) == 0) { 458 p = ln + sizeof CONTRANGE - 1; 459 while ((p < ln + len) && isspace(*p)) 460 p++; 461 if (strncasecmp(p, BYTES, sizeof BYTES - 1) != 0 462 || (p += 6) >= ln + len) 463 goto fouch; 464 while ((p < ln + len) && isdigit(*p)) 465 pos = pos * 10 + (*p++ - '0'); 466 /* XXX wouldn't hurt to be slightly more paranoid here */ 467 DEBUG(fprintf(stderr, "contrange: [\033[1m%lld-\033[m]\n", pos)); 468 if (pos > URL->offset) 469 goto fouch; 470#undef BYTES 471#undef CONTRANGE 472 } 473 } 474 475 /* only body remains */ 476 c->encoding = enc; 477 cf = funopen(c, 478 (int (*)(void *, char *, int))_http_readfn, 479 (int (*)(void *, const char *, int))_http_writefn, 480 (fpos_t (*)(void *, fpos_t, int))NULL, 481 (int (*)(void *))_http_closefn); 482 if (cf == NULL) 483 goto fouch; 484 485 while (pos < URL->offset) 486 if (fgetc(cf) == EOF) 487 goto cfouch; 488 489 return cf; 490 491ouch: 492 if (sd >= 0) 493 close(sd); 494 free(c); 495 _http_seterr(999); /* XXX do this properly RSN */ 496 return NULL; 497fouch: 498 fclose(f); 499 free(c); 500 _http_seterr(999); /* XXX do this properly RSN */ 501 return NULL; 502cfouch: 503 fclose(cf); 504 _http_seterr(999); /* XXX do this properly RSN */ 505 return NULL; 506} 507 508FILE * 509fetchPutHTTP(struct url *URL, char *flags) 510{ 511 warnx("fetchPutHTTP(): not implemented"); 512 return NULL; 513} 514 515/* 516 * Get an HTTP document's metadata 517 */ 518int 519fetchStatHTTP(struct url *url, struct url_stat *us, char *flags) 520{ 521 warnx("fetchStatHTTP(): not implemented"); 522 return -1; 523} 524 525/* 526 * List a directory 527 */ 528struct url_ent * 529fetchListHTTP(struct url *url, char *flags) 530{ 531 warnx("fetchListHTTP(): not implemented"); 532 return NULL; 533} 534