http.c revision 60737
1/*- 2 * Copyright (c) 1998 Dag-Erling Co�dan Sm�rgrav 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer 10 * in this position and unchanged. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 3. The name of the author may not be used to endorse or promote products 15 * derived from this software without specific prior written permission 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 * 28 * $FreeBSD: head/lib/libfetch/http.c 60737 2000-05-20 18:23:51Z ume $ 29 */ 30 31/* 32 * The base64 code in this file is based on code from MIT fetch, which 33 * has the following copyright and license: 34 * 35 *- 36 * Copyright 1997 Massachusetts Institute of Technology 37 * 38 * Permission to use, copy, modify, and distribute this software and 39 * its documentation for any purpose and without fee is hereby 40 * granted, provided that both the above copyright notice and this 41 * permission notice appear in all copies, that both the above 42 * copyright notice and this permission notice appear in all 43 * supporting documentation, and that the name of M.I.T. not be used 44 * in advertising or publicity pertaining to distribution of the 45 * software without specific, written prior permission. M.I.T. makes 46 * no representations about the suitability of this software for any 47 * purpose. It is provided "as is" without express or implied 48 * warranty. 49 * 50 * THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''. M.I.T. DISCLAIMS 51 * ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE, 52 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 53 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT 54 * SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 55 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 56 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF 57 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 58 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 59 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 60 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 61 * SUCH DAMAGE. */ 62 63#include <sys/param.h> 64#include <sys/socket.h> 65 66#include <err.h> 67#include <ctype.h> 68#include <locale.h> 69#include <netdb.h> 70#include <stdarg.h> 71#include <stdio.h> 72#include <stdlib.h> 73#include <string.h> 74#include <time.h> 75#include <unistd.h> 76 77#include "fetch.h" 78#include "common.h" 79#include "httperr.h" 80 81extern char *__progname; 82 83#define ENDL "\r\n" 84 85#define HTTP_OK 200 86#define HTTP_PARTIAL 206 87 88struct cookie 89{ 90 FILE *real_f; 91#define ENC_NONE 0 92#define ENC_CHUNKED 1 93 int encoding; /* 1 = chunked, 0 = none */ 94#define HTTPCTYPELEN 59 95 char content_type[HTTPCTYPELEN+1]; 96 char *buf; 97 int b_cur, eof; 98 unsigned b_len, chunksize; 99}; 100 101/* 102 * Send a formatted line; optionally echo to terminal 103 */ 104static int 105_http_cmd(FILE *f, char *fmt, ...) 106{ 107 va_list ap; 108 109 va_start(ap, fmt); 110 vfprintf(f, fmt, ap); 111#ifndef NDEBUG 112 fprintf(stderr, "\033[1m>>> "); 113 vfprintf(stderr, fmt, ap); 114 fprintf(stderr, "\033[m"); 115#endif 116 va_end(ap); 117 118 return 0; /* XXX */ 119} 120 121/* 122 * Fill the input buffer, do chunk decoding on the fly 123 */ 124static char * 125_http_fillbuf(struct cookie *c) 126{ 127 char *ln; 128 unsigned int len; 129 130 if (c->eof) 131 return NULL; 132 133 if (c->encoding == ENC_NONE) { 134 c->buf = fgetln(c->real_f, &(c->b_len)); 135 c->b_cur = 0; 136 } else if (c->encoding == ENC_CHUNKED) { 137 if (c->chunksize == 0) { 138 ln = fgetln(c->real_f, &len); 139 if (len <= 2) 140 return NULL; 141 DEBUG(fprintf(stderr, "\033[1m_http_fillbuf(): new chunk: " 142 "%*.*s\033[m\n", (int)len-2, (int)len-2, ln)); 143 sscanf(ln, "%x", &(c->chunksize)); 144 if (!c->chunksize) { 145 DEBUG(fprintf(stderr, "\033[1m_http_fillbuf(): " 146 "end of last chunk\033[m\n")); 147 c->eof = 1; 148 return NULL; 149 } 150 DEBUG(fprintf(stderr, "\033[1m_http_fillbuf(): " 151 "new chunk: %X\033[m\n", c->chunksize)); 152 } 153 c->buf = fgetln(c->real_f, &(c->b_len)); 154 if (c->b_len > c->chunksize) 155 c->b_len = c->chunksize; 156 c->chunksize -= c->b_len; 157 c->b_cur = 0; 158 } 159 else return NULL; /* unknown encoding */ 160 return c->buf; 161} 162 163/* 164 * Read function 165 */ 166static int 167_http_readfn(struct cookie *c, char *buf, int len) 168{ 169 int l, pos = 0; 170 while (len) { 171 /* empty buffer */ 172 if (!c->buf || (c->b_cur == c->b_len)) 173 if (!_http_fillbuf(c)) 174 break; 175 176 l = c->b_len - c->b_cur; 177 if (len < l) l = len; 178 memcpy(buf + pos, c->buf + c->b_cur, l); 179 c->b_cur += l; 180 pos += l; 181 len -= l; 182 } 183 184 if (ferror(c->real_f)) 185 return -1; 186 else return pos; 187} 188 189/* 190 * Write function 191 */ 192static int 193_http_writefn(struct cookie *c, const char *buf, int len) 194{ 195 size_t r = fwrite(buf, 1, (size_t)len, c->real_f); 196 return r ? r : -1; 197} 198 199/* 200 * Close function 201 */ 202static int 203_http_closefn(struct cookie *c) 204{ 205 int r = fclose(c->real_f); 206 free(c); 207 return (r == EOF) ? -1 : 0; 208} 209 210/* 211 * Extract content type from cookie 212 */ 213char * 214fetchContentType(FILE *f) 215{ 216 /* 217 * We have no way of making sure this really *is* one of our cookies, 218 * so just check for a null pointer and hope for the best. 219 */ 220 return f->_cookie ? (((struct cookie *)f->_cookie)->content_type) : NULL; 221} 222 223/* 224 * Base64 encoding 225 */ 226int 227_http_base64(char *dst, char *src, int l) 228{ 229 static const char base64[] = 230 "ABCDEFGHIJKLMNOPQRSTUVWXYZ" 231 "abcdefghijklmnopqrstuvwxyz" 232 "0123456789+/"; 233 int t, r = 0; 234 235 while (l >= 3) { 236 t = (src[0] << 16) | (src[1] << 8) | src[2]; 237 dst[0] = base64[(t >> 18) & 0x3f]; 238 dst[1] = base64[(t >> 12) & 0x3f]; 239 dst[2] = base64[(t >> 6) & 0x3f]; 240 dst[3] = base64[(t >> 0) & 0x3f]; 241 src += 3; l -= 3; 242 dst += 4; r += 4; 243 } 244 245 switch (l) { 246 case 2: 247 t = (src[0] << 16) | (src[1] << 8); 248 dst[0] = base64[(t >> 18) & 0x3f]; 249 dst[1] = base64[(t >> 12) & 0x3f]; 250 dst[2] = base64[(t >> 6) & 0x3f]; 251 dst[3] = '='; 252 dst += 4; 253 r += 4; 254 break; 255 case 1: 256 t = src[0] << 16; 257 dst[0] = base64[(t >> 18) & 0x3f]; 258 dst[1] = base64[(t >> 12) & 0x3f]; 259 dst[2] = dst[3] = '='; 260 dst += 4; 261 r += 4; 262 break; 263 case 0: 264 break; 265 } 266 267 *dst = 0; 268 return r; 269} 270 271/* 272 * Encode username and password 273 */ 274char * 275_http_auth(char *usr, char *pwd) 276{ 277 int len, lu, lp; 278 char *str, *s; 279 280 lu = strlen(usr); 281 lp = strlen(pwd); 282 283 len = (lu * 4 + 2) / 3 /* user name, round up */ 284 + 1 /* colon */ 285 + (lp * 4 + 2) / 3 /* password, round up */ 286 + 1; /* null */ 287 288 if ((s = str = (char *)malloc(len)) == NULL) 289 return NULL; 290 291 s += _http_base64(s, usr, lu); 292 *s++ = ':'; 293 s += _http_base64(s, pwd, lp); 294 *s = 0; 295 296 return str; 297} 298 299/* 300 * Connect to server or proxy 301 */ 302FILE * 303_http_connect(struct url *URL, char *flags) 304{ 305 int direct, sd = -1, verbose; 306#ifdef INET6 307 int af = AF_UNSPEC; 308#else 309 int af = AF_INET; 310#endif 311 size_t len; 312 char *px; 313 FILE *f; 314 315 direct = (flags && strchr(flags, 'd')); 316 verbose = (flags && strchr(flags, 'v')); 317 if ((flags && strchr(flags, '4'))) 318 af = AF_INET; 319 else if ((flags && strchr(flags, '6'))) 320 af = AF_INET6; 321 322 /* check port */ 323 if (!URL->port) { 324 struct servent *se; 325 326 if (strcasecmp(URL->scheme, "ftp") == 0) 327 if ((se = getservbyname("ftp", "tcp")) != NULL) 328 URL->port = ntohs(se->s_port); 329 else 330 URL->port = 21; 331 else 332 if ((se = getservbyname("http", "tcp")) != NULL) 333 URL->port = ntohs(se->s_port); 334 else 335 URL->port = 80; 336 } 337 338 /* attempt to connect to proxy server */ 339 if (!direct && (px = getenv("HTTP_PROXY")) != NULL) { 340 char host[MAXHOSTNAMELEN]; 341 int port = 0; 342 343 /* measure length */ 344#ifdef INET6 345 if (px[0] != '[' || 346 (len = strcspn(px, "]")) >= strlen(px) || 347 (px[++len] != '\0' && px[len] != ':')) 348#endif 349 len = strcspn(px, ":"); 350 351 /* get port (XXX atoi is a little too tolerant perhaps?) */ 352 if (px[len] == ':') { 353 if (strspn(px+len+1, "0123456789") != strlen(px+len+1) 354 || strlen(px+len+1) > 5) { 355 /* XXX we should emit some kind of warning */ 356 } 357 port = atoi(px+len+1); 358 if (port < 1 || port > 65535) { 359 /* XXX we should emit some kind of warning */ 360 } 361 } 362 if (!port) { 363#if 0 364 /* 365 * commented out, since there is currently no service name 366 * for HTTP proxies 367 */ 368 struct servent *se; 369 370 if ((se = getservbyname("xxxx", "tcp")) != NULL) 371 port = ntohs(se->s_port); 372 else 373#endif 374 port = 3128; 375 } 376 377 /* get host name */ 378#ifdef INET6 379 if (len > 1 && px[0] == '[' && px[len - 1] == ']') { 380 px++; 381 len -= 2; 382 } 383#endif 384 if (len >= MAXHOSTNAMELEN) 385 len = MAXHOSTNAMELEN - 1; 386 strncpy(host, px, len); 387 host[len] = 0; 388 389 /* connect */ 390 sd = _fetch_connect(host, port, af, verbose); 391 } 392 393 /* if no proxy is configured or could be contacted, try direct */ 394 if (sd == -1) { 395 if (strcasecmp(URL->scheme, "ftp") == 0) 396 goto ouch; 397 if ((sd = _fetch_connect(URL->host, URL->port, af, verbose)) == -1) 398 goto ouch; 399 } 400 401 /* reopen as stream */ 402 if ((f = fdopen(sd, "r+")) == NULL) 403 goto ouch; 404 405 return f; 406 407ouch: 408 if (sd >= 0) 409 close(sd); 410 _http_seterr(999); /* XXX do this properly RSN */ 411 return NULL; 412} 413 414/* 415 * Send a HEAD or GET request 416 */ 417int 418_http_request(FILE *f, char *op, struct url *URL, char *flags) 419{ 420 int e, verbose; 421 char *ln, *p; 422 size_t len; 423 char *host; 424#ifdef INET6 425 char hbuf[MAXHOSTNAMELEN + 1]; 426#endif 427 428 verbose = (flags && strchr(flags, 'v')); 429 430 host = URL->host; 431#ifdef INET6 432 if (strchr(URL->host, ':')) { 433 snprintf(hbuf, sizeof(hbuf), "[%s]", URL->host); 434 host = hbuf; 435 } 436#endif 437 438 /* send request (proxies require absolute form, so use that) */ 439 if (verbose) 440 _fetch_info("requesting %s://%s:%d%s", 441 URL->scheme, host, URL->port, URL->doc); 442 _http_cmd(f, "%s %s://%s:%d%s HTTP/1.1" ENDL, 443 op, URL->scheme, host, URL->port, URL->doc); 444 445 /* start sending headers away */ 446 if (URL->user[0] || URL->pwd[0]) { 447 char *auth_str = _http_auth(URL->user, URL->pwd); 448 if (!auth_str) 449 return 999; /* XXX wrong */ 450 _http_cmd(f, "Authorization: Basic %s" ENDL, auth_str); 451 free(auth_str); 452 } 453 _http_cmd(f, "Host: %s:%d" ENDL, host, URL->port); 454 _http_cmd(f, "User-Agent: %s " _LIBFETCH_VER ENDL, __progname); 455 if (URL->offset) 456 _http_cmd(f, "Range: bytes=%lld-" ENDL, URL->offset); 457 _http_cmd(f, "Connection: close" ENDL ENDL); 458 459 /* get response */ 460 if ((ln = fgetln(f, &len)) == NULL) 461 return 999; 462 DEBUG(fprintf(stderr, "response: [\033[1m%*.*s\033[m]\n", 463 (int)len-2, (int)len-2, ln)); 464 465 /* we can't use strchr() and friends since ln isn't NUL-terminated */ 466 p = ln; 467 while ((p < ln + len) && !isspace(*p)) 468 p++; 469 while ((p < ln + len) && !isdigit(*p)) 470 p++; 471 if (!isdigit(*p)) 472 return 999; 473 474 e = atoi(p); 475 DEBUG(fprintf(stderr, "code: [\033[1m%d\033[m]\n", e)); 476 return e; 477} 478 479/* 480 * Check a header line 481 */ 482char * 483_http_match(char *str, char *hdr) 484{ 485 while (*str && *hdr && tolower(*str++) == tolower(*hdr++)) 486 /* nothing */; 487 if (*str || *hdr != ':') 488 return NULL; 489 while (*hdr && isspace(*++hdr)) 490 /* nothing */; 491 return hdr; 492} 493 494/* 495 * Retrieve a file by HTTP 496 */ 497FILE * 498fetchGetHTTP(struct url *URL, char *flags) 499{ 500 int e, enc = ENC_NONE, i; 501 struct cookie *c; 502 char *ln, *p, *q; 503 FILE *f, *cf; 504 size_t len; 505 off_t pos = 0; 506 507 /* allocate cookie */ 508 if ((c = calloc(1, sizeof *c)) == NULL) 509 return NULL; 510 511 /* connect */ 512 if ((f = _http_connect(URL, flags)) == NULL) { 513 free(c); 514 return NULL; 515 } 516 c->real_f = f; 517 518 e = _http_request(f, "GET", URL, flags); 519 520 /* add code to handle redirects later */ 521 if (e != (URL->offset ? HTTP_PARTIAL : HTTP_OK)) { 522 _http_seterr(e); 523 goto fouch; 524 } 525 526 /* browse through header */ 527 while (1) { 528 if ((ln = fgetln(f, &len)) == NULL) 529 goto fouch; 530 if ((ln[0] == '\r') || (ln[0] == '\n')) 531 break; 532 while (isspace(ln[len-1])) 533 --len; 534 ln[len] = '\0'; /* XXX */ 535 DEBUG(fprintf(stderr, "header: [\033[1m%s\033[m]\n", ln)); 536 if ((p = _http_match("Transfer-Encoding", ln)) != NULL) { 537 for (q = p; *q && !isspace(*q); q++) 538 /* VOID */ ; 539 *q = 0; 540 if (strcasecmp(p, "chunked") == 0) 541 enc = ENC_CHUNKED; 542 DEBUG(fprintf(stderr, "transfer encoding: [\033[1m%s\033[m]\n", p)); 543 } else if ((p = _http_match("Content-Type", ln)) != NULL) { 544 for (i = 0; *p && i < HTTPCTYPELEN; p++, i++) 545 c->content_type[i] = *p; 546 do c->content_type[i--] = 0; while (isspace(c->content_type[i])); 547 DEBUG(fprintf(stderr, "content type: [\033[1m%s\033[m]\n", 548 c->content_type)); 549 } else if ((p = _http_match("Content-Range", ln)) != NULL) { 550 if (strncasecmp(p, "bytes ", 6) != 0) 551 goto fouch; 552 p += 6; 553 while (*p && isdigit(*p)) 554 pos = pos * 10 + (*p++ - '0'); 555 /* XXX wouldn't hurt to be slightly more paranoid here */ 556 DEBUG(fprintf(stderr, "content range: [\033[1m%lld-\033[m]\n", pos)); 557 if (pos > URL->offset) 558 goto fouch; 559 } 560 } 561 562 /* only body remains */ 563 c->encoding = enc; 564 cf = funopen(c, 565 (int (*)(void *, char *, int))_http_readfn, 566 (int (*)(void *, const char *, int))_http_writefn, 567 (fpos_t (*)(void *, fpos_t, int))NULL, 568 (int (*)(void *))_http_closefn); 569 if (cf == NULL) 570 goto fouch; 571 572 while (pos < URL->offset) 573 if (fgetc(cf) == EOF) 574 goto cfouch; 575 576 return cf; 577 578fouch: 579 fclose(f); 580 free(c); 581 _http_seterr(999); /* XXX do this properly RSN */ 582 return NULL; 583cfouch: 584 fclose(cf); 585 _http_seterr(999); /* XXX do this properly RSN */ 586 return NULL; 587} 588 589FILE * 590fetchPutHTTP(struct url *URL, char *flags) 591{ 592 warnx("fetchPutHTTP(): not implemented"); 593 return NULL; 594} 595 596/* 597 * Get an HTTP document's metadata 598 */ 599int 600fetchStatHTTP(struct url *URL, struct url_stat *us, char *flags) 601{ 602 int e; 603 size_t len; 604 char *ln, *p; 605 FILE *f; 606 607 us->size = -1; 608 us->atime = us->mtime = 0; 609 610 /* connect */ 611 if ((f = _http_connect(URL, flags)) == NULL) 612 return -1; 613 614 if ((e = _http_request(f, "HEAD", URL, flags)) != HTTP_OK) { 615 _http_seterr(e); 616 goto ouch; 617 } 618 619 while (1) { 620 if ((ln = fgetln(f, &len)) == NULL) 621 goto fouch; 622 if ((ln[0] == '\r') || (ln[0] == '\n')) 623 break; 624 while (isspace(ln[len-1])) 625 --len; 626 ln[len] = '\0'; /* XXX */ 627 DEBUG(fprintf(stderr, "header: [\033[1m%s\033[m]\n", ln)); 628 if ((p = _http_match("Last-Modified", ln)) != NULL) { 629 struct tm tm; 630 char locale[64]; 631 632 strncpy(locale, setlocale(LC_TIME, NULL), sizeof locale); 633 setlocale(LC_TIME, "C"); 634 strptime(p, "%a, %d %b %Y %H:%M:%S GMT", &tm); 635 /* XXX should add support for date-2 and date-3 */ 636 setlocale(LC_TIME, locale); 637 us->atime = us->mtime = timegm(&tm); 638 DEBUG(fprintf(stderr, "last modified: [\033[1m%04d-%02d-%02d " 639 "%02d:%02d:%02d\033[m]\n", 640 tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday, 641 tm.tm_hour, tm.tm_min, tm.tm_sec)); 642 } else if ((p = _http_match("Content-Length", ln)) != NULL) { 643 us->size = 0; 644 while (*p && isdigit(*p)) 645 us->size = us->size * 10 + (*p++ - '0'); 646 DEBUG(fprintf(stderr, "content length: [\033[1m%lld\033[m]\n", us->size)); 647 } 648 } 649 650 fclose(f); 651 return 0; 652 ouch: 653 _http_seterr(999); /* XXX do this properly RSN */ 654 fouch: 655 fclose(f); 656 return -1; 657} 658 659/* 660 * List a directory 661 */ 662struct url_ent * 663fetchListHTTP(struct url *url, char *flags) 664{ 665 warnx("fetchListHTTP(): not implemented"); 666 return NULL; 667} 668