http.c revision 61896
1/*- 2 * Copyright (c) 1998 Dag-Erling Co�dan Sm�rgrav 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer 10 * in this position and unchanged. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 3. The name of the author may not be used to endorse or promote products 15 * derived from this software without specific prior written permission 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 * 28 * $FreeBSD: head/lib/libfetch/http.c 61896 2000-06-21 09:49:51Z des $ 29 */ 30 31/* 32 * The base64 code in this file is based on code from MIT fetch, which 33 * has the following copyright and license: 34 * 35 *- 36 * Copyright 1997 Massachusetts Institute of Technology 37 * 38 * Permission to use, copy, modify, and distribute this software and 39 * its documentation for any purpose and without fee is hereby 40 * granted, provided that both the above copyright notice and this 41 * permission notice appear in all copies, that both the above 42 * copyright notice and this permission notice appear in all 43 * supporting documentation, and that the name of M.I.T. not be used 44 * in advertising or publicity pertaining to distribution of the 45 * software without specific, written prior permission. M.I.T. makes 46 * no representations about the suitability of this software for any 47 * purpose. It is provided "as is" without express or implied 48 * warranty. 49 * 50 * THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''. M.I.T. DISCLAIMS 51 * ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE, 52 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 53 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT 54 * SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 55 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 56 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF 57 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 58 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 59 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 60 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 61 * SUCH DAMAGE. */ 62 63#include <sys/param.h> 64#include <sys/socket.h> 65 66#include <err.h> 67#include <ctype.h> 68#include <locale.h> 69#include <netdb.h> 70#include <stdarg.h> 71#include <stdio.h> 72#include <stdlib.h> 73#include <string.h> 74#include <time.h> 75#include <unistd.h> 76 77#include "fetch.h" 78#include "common.h" 79#include "httperr.h" 80 81extern char *__progname; 82 83#define ENDL "\r\n" 84 85#define HTTP_OK 200 86#define HTTP_PARTIAL 206 87#define HTTP_MOVED 302 88 89struct cookie 90{ 91 FILE *real_f; 92#define ENC_NONE 0 93#define ENC_CHUNKED 1 94 int encoding; /* 1 = chunked, 0 = none */ 95#define HTTPCTYPELEN 59 96 char content_type[HTTPCTYPELEN+1]; 97 char *buf; 98 int b_cur, eof; 99 unsigned b_len, chunksize; 100}; 101 102/* 103 * Send a formatted line; optionally echo to terminal 104 */ 105static int 106_http_cmd(FILE *f, char *fmt, ...) 107{ 108 va_list ap; 109 110 va_start(ap, fmt); 111 vfprintf(f, fmt, ap); 112#ifndef NDEBUG 113 fprintf(stderr, "\033[1m>>> "); 114 vfprintf(stderr, fmt, ap); 115 fprintf(stderr, "\033[m"); 116#endif 117 va_end(ap); 118 119 return 0; /* XXX */ 120} 121 122/* 123 * Fill the input buffer, do chunk decoding on the fly 124 */ 125static char * 126_http_fillbuf(struct cookie *c) 127{ 128 char *ln; 129 unsigned int len; 130 131 if (c->eof) 132 return NULL; 133 134 if (c->encoding == ENC_NONE) { 135 c->buf = fgetln(c->real_f, &(c->b_len)); 136 c->b_cur = 0; 137 } else if (c->encoding == ENC_CHUNKED) { 138 if (c->chunksize == 0) { 139 ln = fgetln(c->real_f, &len); 140 if (len <= 2) 141 return NULL; 142 DEBUG(fprintf(stderr, "\033[1m_http_fillbuf(): new chunk: " 143 "%*.*s\033[m\n", (int)len-2, (int)len-2, ln)); 144 sscanf(ln, "%x", &(c->chunksize)); 145 if (!c->chunksize) { 146 DEBUG(fprintf(stderr, "\033[1m_http_fillbuf(): " 147 "end of last chunk\033[m\n")); 148 c->eof = 1; 149 return NULL; 150 } 151 DEBUG(fprintf(stderr, "\033[1m_http_fillbuf(): " 152 "new chunk: %X\033[m\n", c->chunksize)); 153 } 154 c->buf = fgetln(c->real_f, &(c->b_len)); 155 if (c->b_len > c->chunksize) 156 c->b_len = c->chunksize; 157 c->chunksize -= c->b_len; 158 c->b_cur = 0; 159 } 160 else return NULL; /* unknown encoding */ 161 return c->buf; 162} 163 164/* 165 * Read function 166 */ 167static int 168_http_readfn(struct cookie *c, char *buf, int len) 169{ 170 int l, pos = 0; 171 while (len) { 172 /* empty buffer */ 173 if (!c->buf || (c->b_cur == c->b_len)) 174 if (!_http_fillbuf(c)) 175 break; 176 177 l = c->b_len - c->b_cur; 178 if (len < l) l = len; 179 memcpy(buf + pos, c->buf + c->b_cur, l); 180 c->b_cur += l; 181 pos += l; 182 len -= l; 183 } 184 185 if (ferror(c->real_f)) 186 return -1; 187 else return pos; 188} 189 190/* 191 * Write function 192 */ 193static int 194_http_writefn(struct cookie *c, const char *buf, int len) 195{ 196 size_t r = fwrite(buf, 1, (size_t)len, c->real_f); 197 return r ? r : -1; 198} 199 200/* 201 * Close function 202 */ 203static int 204_http_closefn(struct cookie *c) 205{ 206 int r = fclose(c->real_f); 207 free(c); 208 return (r == EOF) ? -1 : 0; 209} 210 211/* 212 * Extract content type from cookie 213 */ 214char * 215fetchContentType(FILE *f) 216{ 217 /* 218 * We have no way of making sure this really *is* one of our cookies, 219 * so just check for a null pointer and hope for the best. 220 */ 221 return f->_cookie ? (((struct cookie *)f->_cookie)->content_type) : NULL; 222} 223 224/* 225 * Base64 encoding 226 */ 227int 228_http_base64(char *dst, char *src, int l) 229{ 230 static const char base64[] = 231 "ABCDEFGHIJKLMNOPQRSTUVWXYZ" 232 "abcdefghijklmnopqrstuvwxyz" 233 "0123456789+/"; 234 int t, r = 0; 235 236 while (l >= 3) { 237 t = (src[0] << 16) | (src[1] << 8) | src[2]; 238 dst[0] = base64[(t >> 18) & 0x3f]; 239 dst[1] = base64[(t >> 12) & 0x3f]; 240 dst[2] = base64[(t >> 6) & 0x3f]; 241 dst[3] = base64[(t >> 0) & 0x3f]; 242 src += 3; l -= 3; 243 dst += 4; r += 4; 244 } 245 246 switch (l) { 247 case 2: 248 t = (src[0] << 16) | (src[1] << 8); 249 dst[0] = base64[(t >> 18) & 0x3f]; 250 dst[1] = base64[(t >> 12) & 0x3f]; 251 dst[2] = base64[(t >> 6) & 0x3f]; 252 dst[3] = '='; 253 dst += 4; 254 r += 4; 255 break; 256 case 1: 257 t = src[0] << 16; 258 dst[0] = base64[(t >> 18) & 0x3f]; 259 dst[1] = base64[(t >> 12) & 0x3f]; 260 dst[2] = dst[3] = '='; 261 dst += 4; 262 r += 4; 263 break; 264 case 0: 265 break; 266 } 267 268 *dst = 0; 269 return r; 270} 271 272/* 273 * Encode username and password 274 */ 275char * 276_http_auth(char *usr, char *pwd) 277{ 278 int len, lu, lp; 279 char *str, *s; 280 281 lu = strlen(usr); 282 lp = strlen(pwd); 283 284 len = (lu * 4 + 2) / 3 /* user name, round up */ 285 + 1 /* colon */ 286 + (lp * 4 + 2) / 3 /* password, round up */ 287 + 1; /* null */ 288 289 if ((s = str = (char *)malloc(len)) == NULL) 290 return NULL; 291 292 s += _http_base64(s, usr, lu); 293 *s++ = ':'; 294 s += _http_base64(s, pwd, lp); 295 *s = 0; 296 297 return str; 298} 299 300/* 301 * Connect to server or proxy 302 */ 303FILE * 304_http_connect(struct url *URL, char *flags) 305{ 306 int direct, sd = -1, verbose; 307#ifdef INET6 308 int af = AF_UNSPEC; 309#else 310 int af = AF_INET; 311#endif 312 size_t len; 313 char *px; 314 FILE *f; 315 316 direct = (flags && strchr(flags, 'd')); 317 verbose = (flags && strchr(flags, 'v')); 318 if ((flags && strchr(flags, '4'))) 319 af = AF_INET; 320 else if ((flags && strchr(flags, '6'))) 321 af = AF_INET6; 322 323 /* check port */ 324 if (!URL->port) { 325 struct servent *se; 326 327 if (strcasecmp(URL->scheme, "ftp") == 0) 328 if ((se = getservbyname("ftp", "tcp")) != NULL) 329 URL->port = ntohs(se->s_port); 330 else 331 URL->port = 21; 332 else 333 if ((se = getservbyname("http", "tcp")) != NULL) 334 URL->port = ntohs(se->s_port); 335 else 336 URL->port = 80; 337 } 338 339 /* attempt to connect to proxy server */ 340 if (!direct && (px = getenv("HTTP_PROXY")) != NULL) { 341 char host[MAXHOSTNAMELEN]; 342 int port = 0; 343 344 /* measure length */ 345#ifdef INET6 346 if (px[0] != '[' || 347 (len = strcspn(px, "]")) >= strlen(px) || 348 (px[++len] != '\0' && px[len] != ':')) 349#endif 350 len = strcspn(px, ":"); 351 352 /* get port (XXX atoi is a little too tolerant perhaps?) */ 353 if (px[len] == ':') { 354 if (strspn(px+len+1, "0123456789") != strlen(px+len+1) 355 || strlen(px+len+1) > 5) { 356 /* XXX we should emit some kind of warning */ 357 } 358 port = atoi(px+len+1); 359 if (port < 1 || port > 65535) { 360 /* XXX we should emit some kind of warning */ 361 } 362 } 363 if (!port) { 364#if 0 365 /* 366 * commented out, since there is currently no service name 367 * for HTTP proxies 368 */ 369 struct servent *se; 370 371 if ((se = getservbyname("xxxx", "tcp")) != NULL) 372 port = ntohs(se->s_port); 373 else 374#endif 375 port = 3128; 376 } 377 378 /* get host name */ 379#ifdef INET6 380 if (len > 1 && px[0] == '[' && px[len - 1] == ']') { 381 px++; 382 len -= 2; 383 } 384#endif 385 if (len >= MAXHOSTNAMELEN) 386 len = MAXHOSTNAMELEN - 1; 387 strncpy(host, px, len); 388 host[len] = 0; 389 390 /* connect */ 391 sd = _fetch_connect(host, port, af, verbose); 392 } 393 394 /* if no proxy is configured or could be contacted, try direct */ 395 if (sd == -1) { 396 if (strcasecmp(URL->scheme, "ftp") == 0) 397 goto ouch; 398 if ((sd = _fetch_connect(URL->host, URL->port, af, verbose)) == -1) 399 goto ouch; 400 } 401 402 /* reopen as stream */ 403 if ((f = fdopen(sd, "r+")) == NULL) 404 goto ouch; 405 406 return f; 407 408ouch: 409 if (sd >= 0) 410 close(sd); 411 _http_seterr(999); /* XXX do this properly RSN */ 412 return NULL; 413} 414 415/* 416 * Check a header line 417 */ 418char * 419_http_match(char *str, char *hdr) 420{ 421 while (*str && *hdr && tolower(*str++) == tolower(*hdr++)) 422 /* nothing */; 423 if (*str || *hdr != ':') 424 return NULL; 425 while (*hdr && isspace(*++hdr)) 426 /* nothing */; 427 return hdr; 428} 429 430/* 431 * Send a HEAD or GET request 432 */ 433int 434_http_request(FILE *f, char *op, struct url *URL, char *flags) 435{ 436 int e, verbose; 437 char *ln, *p; 438 size_t len; 439 char *host; 440#ifdef INET6 441 char hbuf[MAXHOSTNAMELEN + 1]; 442#endif 443 444 verbose = (flags && strchr(flags, 'v')); 445 446 host = URL->host; 447#ifdef INET6 448 if (strchr(URL->host, ':')) { 449 snprintf(hbuf, sizeof(hbuf), "[%s]", URL->host); 450 host = hbuf; 451 } 452#endif 453 454 /* send request (proxies require absolute form, so use that) */ 455 if (verbose) 456 _fetch_info("requesting %s://%s:%d%s", 457 URL->scheme, host, URL->port, URL->doc); 458 _http_cmd(f, "%s %s://%s:%d%s HTTP/1.1" ENDL, 459 op, URL->scheme, host, URL->port, URL->doc); 460 461 /* start sending headers away */ 462 if (URL->user[0] || URL->pwd[0]) { 463 char *auth_str = _http_auth(URL->user, URL->pwd); 464 if (!auth_str) 465 return 999; /* XXX wrong */ 466 _http_cmd(f, "Authorization: Basic %s" ENDL, auth_str); 467 free(auth_str); 468 } 469 _http_cmd(f, "Host: %s:%d" ENDL, host, URL->port); 470 _http_cmd(f, "User-Agent: %s " _LIBFETCH_VER ENDL, __progname); 471 if (URL->offset) 472 _http_cmd(f, "Range: bytes=%lld-" ENDL, URL->offset); 473 _http_cmd(f, "Connection: close" ENDL ENDL); 474 475 /* get response */ 476 if ((ln = fgetln(f, &len)) == NULL) 477 return 999; 478 DEBUG(fprintf(stderr, "response: [\033[1m%*.*s\033[m]\n", 479 (int)len-2, (int)len-2, ln)); 480 481 /* we can't use strchr() and friends since ln isn't NUL-terminated */ 482 p = ln; 483 while ((p < ln + len) && !isspace(*p)) 484 p++; 485 while ((p < ln + len) && !isdigit(*p)) 486 p++; 487 if (!isdigit(*p)) 488 return 999; 489 490 e = atoi(p); 491 DEBUG(fprintf(stderr, "code: [\033[1m%d\033[m]\n", e)); 492 return e; 493} 494 495/* 496 * Retrieve a file by HTTP 497 */ 498FILE * 499fetchGetHTTP(struct url *URL, char *flags) 500{ 501 int e, enc = ENC_NONE, i, noredirect; 502 struct cookie *c; 503 char *ln, *p, *q; 504 FILE *f, *cf; 505 size_t len; 506 off_t pos = 0; 507 508 noredirect = (flags && strchr(flags, 'A')); 509 510 /* allocate cookie */ 511 if ((c = calloc(1, sizeof *c)) == NULL) 512 return NULL; 513 514 /* connect */ 515 if ((f = _http_connect(URL, flags)) == NULL) { 516 free(c); 517 return NULL; 518 } 519 c->real_f = f; 520 521 e = _http_request(f, "GET", URL, flags); 522 if (e != (URL->offset ? HTTP_PARTIAL : HTTP_OK) 523 && (e != HTTP_MOVED || noredirect)) { 524 _http_seterr(e); 525 free(c); 526 fclose(f); 527 return NULL; 528 } 529 530 /* browse through header */ 531 while (1) { 532 if ((ln = fgetln(f, &len)) == NULL) 533 goto fouch; 534 if ((ln[0] == '\r') || (ln[0] == '\n')) 535 break; 536 while (isspace(ln[len-1])) 537 --len; 538 ln[len] = '\0'; /* XXX */ 539 DEBUG(fprintf(stderr, "header: [\033[1m%s\033[m]\n", ln)); 540 if ((p = _http_match("Location", ln)) != NULL) { 541 struct url *url; 542 543 for (q = p; *q && !isspace(*q); q++) 544 /* VOID */ ; 545 *q = 0; 546 if ((url = fetchParseURL(p)) == NULL) 547 goto fouch; 548 url->offset = URL->offset; 549 url->length = URL->length; 550 DEBUG(fprintf(stderr, "location: [\033[1m%s\033[m]\n", p)); 551 cf = fetchGetHTTP(url, flags); 552 fetchFreeURL(url); 553 fclose(f); 554 return cf; 555 } else if ((p = _http_match("Transfer-Encoding", ln)) != NULL) { 556 for (q = p; *q && !isspace(*q); q++) 557 /* VOID */ ; 558 *q = 0; 559 if (strcasecmp(p, "chunked") == 0) 560 enc = ENC_CHUNKED; 561 DEBUG(fprintf(stderr, "transfer encoding: [\033[1m%s\033[m]\n", p)); 562 } else if ((p = _http_match("Content-Type", ln)) != NULL) { 563 for (i = 0; *p && i < HTTPCTYPELEN; p++, i++) 564 c->content_type[i] = *p; 565 do c->content_type[i--] = 0; while (isspace(c->content_type[i])); 566 DEBUG(fprintf(stderr, "content type: [\033[1m%s\033[m]\n", 567 c->content_type)); 568 } else if ((p = _http_match("Content-Range", ln)) != NULL) { 569 if (strncasecmp(p, "bytes ", 6) != 0) 570 goto fouch; 571 p += 6; 572 while (*p && isdigit(*p)) 573 pos = pos * 10 + (*p++ - '0'); 574 /* XXX wouldn't hurt to be slightly more paranoid here */ 575 DEBUG(fprintf(stderr, "content range: [\033[1m%lld-\033[m]\n", pos)); 576 if (pos > URL->offset) 577 goto fouch; 578 } 579 } 580 581 /* only body remains */ 582 c->encoding = enc; 583 cf = funopen(c, 584 (int (*)(void *, char *, int))_http_readfn, 585 (int (*)(void *, const char *, int))_http_writefn, 586 (fpos_t (*)(void *, fpos_t, int))NULL, 587 (int (*)(void *))_http_closefn); 588 if (cf == NULL) 589 goto fouch; 590 591 while (pos < URL->offset) 592 if (fgetc(cf) == EOF) 593 goto cfouch; 594 595 return cf; 596 597fouch: 598 fclose(f); 599 free(c); 600 _http_seterr(999); /* XXX do this properly RSN */ 601 return NULL; 602cfouch: 603 fclose(cf); 604 _http_seterr(999); /* XXX do this properly RSN */ 605 return NULL; 606} 607 608FILE * 609fetchPutHTTP(struct url *URL, char *flags) 610{ 611 warnx("fetchPutHTTP(): not implemented"); 612 return NULL; 613} 614 615/* 616 * Get an HTTP document's metadata 617 */ 618int 619fetchStatHTTP(struct url *URL, struct url_stat *us, char *flags) 620{ 621 int e, noredirect; 622 size_t len; 623 char *ln, *p, *q; 624 FILE *f; 625 626 noredirect = (flags && strchr(flags, 'A')); 627 628 us->size = -1; 629 us->atime = us->mtime = 0; 630 631 /* connect */ 632 if ((f = _http_connect(URL, flags)) == NULL) 633 return -1; 634 635 e = _http_request(f, "HEAD", URL, flags); 636 if (e != HTTP_OK && (e != HTTP_MOVED || noredirect)) { 637 _http_seterr(e); 638 fclose(f); 639 return -1; 640 } 641 642 while (1) { 643 if ((ln = fgetln(f, &len)) == NULL) 644 goto fouch; 645 if ((ln[0] == '\r') || (ln[0] == '\n')) 646 break; 647 while (isspace(ln[len-1])) 648 --len; 649 ln[len] = '\0'; /* XXX */ 650 DEBUG(fprintf(stderr, "header: [\033[1m%s\033[m]\n", ln)); 651 if ((p = _http_match("Location", ln)) != NULL) { 652 struct url *url; 653 654 for (q = p; *q && !isspace(*q); q++) 655 /* VOID */ ; 656 *q = 0; 657 if ((url = fetchParseURL(p)) == NULL) 658 goto ouch; 659 url->offset = URL->offset; 660 url->length = URL->length; 661 DEBUG(fprintf(stderr, "location: [\033[1m%s\033[m]\n", p)); 662 e = fetchStatHTTP(url, us, flags); 663 fetchFreeURL(url); 664 fclose(f); 665 return e; 666 } else if ((p = _http_match("Last-Modified", ln)) != NULL) { 667 struct tm tm; 668 char locale[64]; 669 670 strncpy(locale, setlocale(LC_TIME, NULL), sizeof locale); 671 setlocale(LC_TIME, "C"); 672 strptime(p, "%a, %d %b %Y %H:%M:%S GMT", &tm); 673 /* XXX should add support for date-2 and date-3 */ 674 setlocale(LC_TIME, locale); 675 us->atime = us->mtime = timegm(&tm); 676 DEBUG(fprintf(stderr, "last modified: [\033[1m%04d-%02d-%02d " 677 "%02d:%02d:%02d\033[m]\n", 678 tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday, 679 tm.tm_hour, tm.tm_min, tm.tm_sec)); 680 } else if ((p = _http_match("Content-Length", ln)) != NULL) { 681 us->size = 0; 682 while (*p && isdigit(*p)) 683 us->size = us->size * 10 + (*p++ - '0'); 684 DEBUG(fprintf(stderr, "content length: [\033[1m%lld\033[m]\n", us->size)); 685 } 686 } 687 688 fclose(f); 689 return 0; 690 ouch: 691 _http_seterr(999); /* XXX do this properly RSN */ 692 fouch: 693 fclose(f); 694 return -1; 695} 696 697/* 698 * List a directory 699 */ 700struct url_ent * 701fetchListHTTP(struct url *url, char *flags) 702{ 703 warnx("fetchListHTTP(): not implemented"); 704 return NULL; 705} 706