http.c revision 60707
1139749Simp/*- 219410Sguido * Copyright (c) 1998 Dag-Erling Co�dan Sm�rgrav 319410Sguido * All rights reserved. 419410Sguido * 519410Sguido * Redistribution and use in source and binary forms, with or without 619410Sguido * modification, are permitted provided that the following conditions 719410Sguido * are met: 819410Sguido * 1. Redistributions of source code must retain the above copyright 919410Sguido * notice, this list of conditions and the following disclaimer 1019410Sguido * in this position and unchanged. 1119410Sguido * 2. Redistributions in binary form must reproduce the above copyright 1219410Sguido * notice, this list of conditions and the following disclaimer in the 1319410Sguido * documentation and/or other materials provided with the distribution. 1419410Sguido * 3. The name of the author may not be used to endorse or promote products 1519410Sguido * derived from this software without specific prior written permission 1619410Sguido * 1719410Sguido * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 1819410Sguido * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 1919410Sguido * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 2019410Sguido * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 2119410Sguido * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 2219410Sguido * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 2319410Sguido * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 2419410Sguido * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 2519410Sguido * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 2619410Sguido * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 2719410Sguido * 2819410Sguido * $FreeBSD: head/lib/libfetch/http.c 60707 2000-05-19 09:45:42Z des $ 2919410Sguido */ 3019410Sguido 31119418Sobrien/* 32119418Sobrien * The base64 code in this file is based on code from MIT fetch, which 33119418Sobrien * has the following copyright and license: 3419410Sguido * 3519410Sguido *- 3619410Sguido * Copyright 1997 Massachusetts Institute of Technology 3719410Sguido * 3845791Speter * Permission to use, copy, modify, and distribute this software and 3945791Speter * its documentation for any purpose and without fee is hereby 4019410Sguido * granted, provided that both the above copyright notice and this 4145791Speter * permission notice appear in all copies, that both the above 4245791Speter * copyright notice and this permission notice appear in all 4345791Speter * supporting documentation, and that the name of M.I.T. not be used 4445791Speter * in advertising or publicity pertaining to distribution of the 45151017Sjhb * software without specific, written prior permission. M.I.T. makes 4619410Sguido * no representations about the suitability of this software for any 4732350Seivind * purpose. It is provided "as is" without express or implied 4819410Sguido * warranty. 4955953Speter * 5019410Sguido * THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''. M.I.T. DISCLAIMS 5119410Sguido * ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE, 52121491Simp * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 5319410Sguido * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT 5419410Sguido * SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 5519410Sguido * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 5619410Sguido * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF 5719410Sguido * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 5819410Sguido * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 5919410Sguido * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 6019410Sguido * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 6119410Sguido * SUCH DAMAGE. */ 6219410Sguido 6319410Sguido#include <sys/param.h> 6419410Sguido 6592739Salfred#include <err.h> 6619410Sguido#include <ctype.h> 67133980Sgibbs#include <locale.h> 6845791Speter#include <netdb.h> 6919410Sguido#include <stdarg.h> 70133980Sgibbs#include <stdio.h> 71133980Sgibbs#include <stdlib.h> 72182143Simp#include <string.h> 73133980Sgibbs#include <time.h> 74182143Simp#include <unistd.h> 75133980Sgibbs 76182143Simp#include "fetch.h" 77133980Sgibbs#include "common.h" 78182143Simp#include "httperr.h" 79133980Sgibbs 80133980Sgibbsextern char *__progname; 81133980Sgibbs 82133980Sgibbs#define ENDL "\r\n" 8319410Sguido 8419410Sguido#define HTTP_OK 200 8519410Sguido#define HTTP_PARTIAL 206 8645791Speter 8719410Sguidostruct cookie 88133980Sgibbs{ 89133980Sgibbs FILE *real_f; 90133980Sgibbs#define ENC_NONE 0 9119410Sguido#define ENC_CHUNKED 1 92133980Sgibbs int encoding; /* 1 = chunked, 0 = none */ 93133980Sgibbs#define HTTPCTYPELEN 59 94133980Sgibbs char content_type[HTTPCTYPELEN+1]; 95133980Sgibbs char *buf; 9619410Sguido int b_cur, eof; 97133980Sgibbs unsigned b_len, chunksize; 98133980Sgibbs}; 9919410Sguido 100133980Sgibbs/* 101133980Sgibbs * Send a formatted line; optionally echo to terminal 10219410Sguido */ 103133980Sgibbsstatic int 104133980Sgibbs_http_cmd(FILE *f, char *fmt, ...) 105133980Sgibbs{ 10645791Speter va_list ap; 107133980Sgibbs 10819410Sguido va_start(ap, fmt); 10919410Sguido vfprintf(f, fmt, ap); 11019410Sguido#ifndef NDEBUG 11145791Speter fprintf(stderr, "\033[1m>>> "); 11219410Sguido vfprintf(stderr, fmt, ap); 113133980Sgibbs fprintf(stderr, "\033[m"); 114133980Sgibbs#endif 115133980Sgibbs va_end(ap); 116133980Sgibbs 117133980Sgibbs return 0; /* XXX */ 11819410Sguido} 119133980Sgibbs 120133980Sgibbs/* 121133980Sgibbs * Fill the input buffer, do chunk decoding on the fly 122133980Sgibbs */ 123133980Sgibbsstatic char * 124133980Sgibbs_http_fillbuf(struct cookie *c) 125133980Sgibbs{ 126133980Sgibbs char *ln; 127133980Sgibbs unsigned int len; 128133980Sgibbs 129133980Sgibbs if (c->eof) 130133980Sgibbs return NULL; 131133980Sgibbs 132133980Sgibbs if (c->encoding == ENC_NONE) { 133133980Sgibbs c->buf = fgetln(c->real_f, &(c->b_len)); 134133980Sgibbs c->b_cur = 0; 135133980Sgibbs } else if (c->encoding == ENC_CHUNKED) { 136133980Sgibbs if (c->chunksize == 0) { 13729674Sgibbs ln = fgetln(c->real_f, &len); 138133980Sgibbs if (len <= 2) 139151014Sjhb return NULL; 140151014Sjhb DEBUG(fprintf(stderr, "\033[1m_http_fillbuf(): new chunk: " 14129674Sgibbs "%*.*s\033[m\n", (int)len-2, (int)len-2, ln)); 142133980Sgibbs sscanf(ln, "%x", &(c->chunksize)); 143133980Sgibbs if (!c->chunksize) { 144133980Sgibbs DEBUG(fprintf(stderr, "\033[1m_http_fillbuf(): " 145133980Sgibbs "end of last chunk\033[m\n")); 146133980Sgibbs c->eof = 1; 147133980Sgibbs return NULL; 148133980Sgibbs } 14919410Sguido DEBUG(fprintf(stderr, "\033[1m_http_fillbuf(): " 150133980Sgibbs "new chunk: %X\033[m\n", c->chunksize)); 15119410Sguido } 152151014Sjhb c->buf = fgetln(c->real_f, &(c->b_len)); 153133980Sgibbs if (c->b_len > c->chunksize) 15468417Swpaul c->b_len = c->chunksize; 155166901Spiso c->chunksize -= c->b_len; 156166901Spiso c->b_cur = 0; 157151014Sjhb } 15819410Sguido else return NULL; /* unknown encoding */ 159151014Sjhb return c->buf; 16019410Sguido} 161151014Sjhb 162151014Sjhb/* 163151017Sjhb * Read function 164151017Sjhb */ 165133980Sgibbsstatic int 166133980Sgibbs_http_readfn(struct cookie *c, char *buf, int len) 167133980Sgibbs{ 168133980Sgibbs int l, pos = 0; 169133980Sgibbs while (len) { 170133980Sgibbs /* empty buffer */ 171133980Sgibbs if (!c->buf || (c->b_cur == c->b_len)) 172151014Sjhb if (!_http_fillbuf(c)) 17319410Sguido break; 17419410Sguido 17545791Speter l = c->b_len - c->b_cur; 17645791Speter if (len < l) l = len; 177133980Sgibbs memcpy(buf + pos, c->buf + c->b_cur, l); 178133980Sgibbs c->b_cur += l; 17945791Speter pos += l; 180246128Ssbz len -= l; 18145791Speter } 18245791Speter 18345791Speter if (ferror(c->real_f)) 18445791Speter return -1; 18545791Speter else return pos; 186133518Sgibbs} 18745791Speter 18845791Speter/* 18945791Speter * Write function 19045791Speter */ 19145791Speterstatic int 192_http_writefn(struct cookie *c, const char *buf, int len) 193{ 194 size_t r = fwrite(buf, 1, (size_t)len, c->real_f); 195 return r ? r : -1; 196} 197 198/* 199 * Close function 200 */ 201static int 202_http_closefn(struct cookie *c) 203{ 204 int r = fclose(c->real_f); 205 free(c); 206 return (r == EOF) ? -1 : 0; 207} 208 209/* 210 * Extract content type from cookie 211 */ 212char * 213fetchContentType(FILE *f) 214{ 215 /* 216 * We have no way of making sure this really *is* one of our cookies, 217 * so just check for a null pointer and hope for the best. 218 */ 219 return f->_cookie ? (((struct cookie *)f->_cookie)->content_type) : NULL; 220} 221 222/* 223 * Base64 encoding 224 */ 225int 226_http_base64(char *dst, char *src, int l) 227{ 228 static const char base64[] = 229 "ABCDEFGHIJKLMNOPQRSTUVWXYZ" 230 "abcdefghijklmnopqrstuvwxyz" 231 "0123456789+/"; 232 int t, r = 0; 233 234 while (l >= 3) { 235 t = (src[0] << 16) | (src[1] << 8) | src[2]; 236 dst[0] = base64[(t >> 18) & 0x3f]; 237 dst[1] = base64[(t >> 12) & 0x3f]; 238 dst[2] = base64[(t >> 6) & 0x3f]; 239 dst[3] = base64[(t >> 0) & 0x3f]; 240 src += 3; l -= 3; 241 dst += 4; r += 4; 242 } 243 244 switch (l) { 245 case 2: 246 t = (src[0] << 16) | (src[1] << 8); 247 dst[0] = base64[(t >> 18) & 0x3f]; 248 dst[1] = base64[(t >> 12) & 0x3f]; 249 dst[2] = base64[(t >> 6) & 0x3f]; 250 dst[3] = '='; 251 dst += 4; 252 r += 4; 253 break; 254 case 1: 255 t = src[0] << 16; 256 dst[0] = base64[(t >> 18) & 0x3f]; 257 dst[1] = base64[(t >> 12) & 0x3f]; 258 dst[2] = dst[3] = '='; 259 dst += 4; 260 r += 4; 261 break; 262 case 0: 263 break; 264 } 265 266 *dst = 0; 267 return r; 268} 269 270/* 271 * Encode username and password 272 */ 273char * 274_http_auth(char *usr, char *pwd) 275{ 276 int len, lu, lp; 277 char *str, *s; 278 279 lu = strlen(usr); 280 lp = strlen(pwd); 281 282 len = (lu * 4 + 2) / 3 /* user name, round up */ 283 + 1 /* colon */ 284 + (lp * 4 + 2) / 3 /* password, round up */ 285 + 1; /* null */ 286 287 if ((s = str = (char *)malloc(len)) == NULL) 288 return NULL; 289 290 s += _http_base64(s, usr, lu); 291 *s++ = ':'; 292 s += _http_base64(s, pwd, lp); 293 *s = 0; 294 295 return str; 296} 297 298/* 299 * Connect to server or proxy 300 */ 301FILE * 302_http_connect(struct url *URL, char *flags) 303{ 304 int direct, sd = -1, verbose; 305 size_t len; 306 char *px; 307 FILE *f; 308 309 direct = (flags && strchr(flags, 'd')); 310 verbose = (flags && strchr(flags, 'v')); 311 312 /* check port */ 313 if (!URL->port) { 314 struct servent *se; 315 316 if (strcasecmp(URL->scheme, "ftp") == 0) 317 if ((se = getservbyname("ftp", "tcp")) != NULL) 318 URL->port = ntohs(se->s_port); 319 else 320 URL->port = 21; 321 else 322 if ((se = getservbyname("http", "tcp")) != NULL) 323 URL->port = ntohs(se->s_port); 324 else 325 URL->port = 80; 326 } 327 328 /* attempt to connect to proxy server */ 329 if (!direct && (px = getenv("HTTP_PROXY")) != NULL) { 330 char host[MAXHOSTNAMELEN]; 331 int port = 0; 332 333 /* measure length */ 334 len = strcspn(px, ":"); 335 336 /* get port (XXX atoi is a little too tolerant perhaps?) */ 337 if (px[len] == ':') { 338 if (strspn(px+len+1, "0123456789") != strlen(px+len+1) 339 || strlen(px+len+1) > 5) { 340 /* XXX we should emit some kind of warning */ 341 } 342 port = atoi(px+len+1); 343 if (port < 1 || port > 65535) { 344 /* XXX we should emit some kind of warning */ 345 } 346 } 347 if (!port) { 348#if 0 349 /* 350 * commented out, since there is currently no service name 351 * for HTTP proxies 352 */ 353 struct servent *se; 354 355 if ((se = getservbyname("xxxx", "tcp")) != NULL) 356 port = ntohs(se->s_port); 357 else 358#endif 359 port = 3128; 360 } 361 362 /* get host name */ 363 if (len >= MAXHOSTNAMELEN) 364 len = MAXHOSTNAMELEN - 1; 365 strncpy(host, px, len); 366 host[len] = 0; 367 368 /* connect */ 369 sd = _fetch_connect(host, port, verbose); 370 } 371 372 /* if no proxy is configured or could be contacted, try direct */ 373 if (sd == -1) { 374 if (strcasecmp(URL->scheme, "ftp") == 0) 375 goto ouch; 376 if ((sd = _fetch_connect(URL->host, URL->port, verbose)) == -1) 377 goto ouch; 378 } 379 380 /* reopen as stream */ 381 if ((f = fdopen(sd, "r+")) == NULL) 382 goto ouch; 383 384 return f; 385 386ouch: 387 if (sd >= 0) 388 close(sd); 389 _http_seterr(999); /* XXX do this properly RSN */ 390 return NULL; 391} 392 393/* 394 * Send a HEAD or GET request 395 */ 396int 397_http_request(FILE *f, char *op, struct url *URL, char *flags) 398{ 399 int e, verbose; 400 char *ln, *p; 401 size_t len; 402 403 verbose = (flags && strchr(flags, 'v')); 404 405 /* send request (proxies require absolute form, so use that) */ 406 if (verbose) 407 _fetch_info("requesting %s://%s:%d%s", 408 URL->scheme, URL->host, URL->port, URL->doc); 409 _http_cmd(f, "%s %s://%s:%d%s HTTP/1.1" ENDL, 410 op, URL->scheme, URL->host, URL->port, URL->doc); 411 412 /* start sending headers away */ 413 if (URL->user[0] || URL->pwd[0]) { 414 char *auth_str = _http_auth(URL->user, URL->pwd); 415 if (!auth_str) 416 return 999; /* XXX wrong */ 417 _http_cmd(f, "Authorization: Basic %s" ENDL, auth_str); 418 free(auth_str); 419 } 420 _http_cmd(f, "Host: %s:%d" ENDL, URL->host, URL->port); 421 _http_cmd(f, "User-Agent: %s " _LIBFETCH_VER ENDL, __progname); 422 if (URL->offset) 423 _http_cmd(f, "Range: bytes=%lld-" ENDL, URL->offset); 424 _http_cmd(f, "Connection: close" ENDL ENDL); 425 426 /* get response */ 427 if ((ln = fgetln(f, &len)) == NULL) 428 return 999; 429 DEBUG(fprintf(stderr, "response: [\033[1m%*.*s\033[m]\n", 430 (int)len-2, (int)len-2, ln)); 431 432 /* we can't use strchr() and friends since ln isn't NUL-terminated */ 433 p = ln; 434 while ((p < ln + len) && !isspace(*p)) 435 p++; 436 while ((p < ln + len) && !isdigit(*p)) 437 p++; 438 if (!isdigit(*p)) 439 return 999; 440 441 e = atoi(p); 442 DEBUG(fprintf(stderr, "code: [\033[1m%d\033[m]\n", e)); 443 return e; 444} 445 446/* 447 * Check a header line 448 */ 449char * 450_http_match(char *str, char *hdr) 451{ 452 while (*str && *hdr && tolower(*str++) == tolower(*hdr++)) 453 /* nothing */; 454 if (*str || *hdr != ':') 455 return NULL; 456 while (*hdr && isspace(*++hdr)) 457 /* nothing */; 458 return hdr; 459} 460 461/* 462 * Retrieve a file by HTTP 463 */ 464FILE * 465fetchGetHTTP(struct url *URL, char *flags) 466{ 467 int e, enc = ENC_NONE, i; 468 struct cookie *c; 469 char *ln, *p, *q; 470 FILE *f, *cf; 471 size_t len; 472 off_t pos = 0; 473 474 /* allocate cookie */ 475 if ((c = calloc(1, sizeof *c)) == NULL) 476 return NULL; 477 478 /* connect */ 479 if ((f = _http_connect(URL, flags)) == NULL) { 480 free(c); 481 return NULL; 482 } 483 c->real_f = f; 484 485 e = _http_request(f, "GET", URL, flags); 486 487 /* add code to handle redirects later */ 488 if (e != (URL->offset ? HTTP_PARTIAL : HTTP_OK)) { 489 _http_seterr(e); 490 goto fouch; 491 } 492 493 /* browse through header */ 494 while (1) { 495 if ((ln = fgetln(f, &len)) == NULL) 496 goto fouch; 497 if ((ln[0] == '\r') || (ln[0] == '\n')) 498 break; 499 while (isspace(ln[len-1])) 500 --len; 501 ln[len] = '\0'; /* XXX */ 502 DEBUG(fprintf(stderr, "header: [\033[1m%s\033[m]\n", ln)); 503 if ((p = _http_match("Transfer-Encoding", ln)) != NULL) { 504 for (q = p; *q && !isspace(*q); q++) 505 /* VOID */ ; 506 *q = 0; 507 if (strcasecmp(p, "chunked") == 0) 508 enc = ENC_CHUNKED; 509 DEBUG(fprintf(stderr, "transfer encoding: [\033[1m%s\033[m]\n", p)); 510 } else if ((p = _http_match("Content-Type", ln)) != NULL) { 511 for (i = 0; *p && i < HTTPCTYPELEN; p++, i++) 512 c->content_type[i] = *p; 513 do c->content_type[i--] = 0; while (isspace(c->content_type[i])); 514 DEBUG(fprintf(stderr, "content type: [\033[1m%s\033[m]\n", 515 c->content_type)); 516 } else if ((p = _http_match("Content-Range", ln)) != NULL) { 517 if (strncasecmp(p, "bytes ", 6) != 0) 518 goto fouch; 519 p += 6; 520 while (*p && isdigit(*p)) 521 pos = pos * 10 + (*p++ - '0'); 522 /* XXX wouldn't hurt to be slightly more paranoid here */ 523 DEBUG(fprintf(stderr, "content range: [\033[1m%lld-\033[m]\n", pos)); 524 if (pos > URL->offset) 525 goto fouch; 526 } 527 } 528 529 /* only body remains */ 530 c->encoding = enc; 531 cf = funopen(c, 532 (int (*)(void *, char *, int))_http_readfn, 533 (int (*)(void *, const char *, int))_http_writefn, 534 (fpos_t (*)(void *, fpos_t, int))NULL, 535 (int (*)(void *))_http_closefn); 536 if (cf == NULL) 537 goto fouch; 538 539 while (pos < URL->offset) 540 if (fgetc(cf) == EOF) 541 goto cfouch; 542 543 return cf; 544 545fouch: 546 fclose(f); 547 free(c); 548 _http_seterr(999); /* XXX do this properly RSN */ 549 return NULL; 550cfouch: 551 fclose(cf); 552 _http_seterr(999); /* XXX do this properly RSN */ 553 return NULL; 554} 555 556FILE * 557fetchPutHTTP(struct url *URL, char *flags) 558{ 559 warnx("fetchPutHTTP(): not implemented"); 560 return NULL; 561} 562 563/* 564 * Get an HTTP document's metadata 565 */ 566int 567fetchStatHTTP(struct url *URL, struct url_stat *us, char *flags) 568{ 569 int e; 570 size_t len; 571 char *ln, *p; 572 FILE *f; 573 574 us->size = -1; 575 us->atime = us->mtime = 0; 576 577 /* connect */ 578 if ((f = _http_connect(URL, flags)) == NULL) 579 return -1; 580 581 if ((e = _http_request(f, "HEAD", URL, flags)) != HTTP_OK) { 582 _http_seterr(e); 583 goto ouch; 584 } 585 586 while (1) { 587 if ((ln = fgetln(f, &len)) == NULL) 588 goto fouch; 589 if ((ln[0] == '\r') || (ln[0] == '\n')) 590 break; 591 while (isspace(ln[len-1])) 592 --len; 593 ln[len] = '\0'; /* XXX */ 594 DEBUG(fprintf(stderr, "header: [\033[1m%s\033[m]\n", ln)); 595 if ((p = _http_match("Last-Modified", ln)) != NULL) { 596 struct tm tm; 597 char locale[64]; 598 599 strncpy(locale, setlocale(LC_TIME, NULL), sizeof locale); 600 setlocale(LC_TIME, "C"); 601 strptime(p, "%a, %d %b %Y %H:%M:%S GMT", &tm); 602 /* XXX should add support for date-2 and date-3 */ 603 setlocale(LC_TIME, locale); 604 us->atime = us->mtime = timegm(&tm); 605 DEBUG(fprintf(stderr, "last modified: [\033[1m%04d-%02d-%02d " 606 "%02d:%02d:%02d\033[m]\n", 607 tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday, 608 tm.tm_hour, tm.tm_min, tm.tm_sec)); 609 } else if ((p = _http_match("Content-Length", ln)) != NULL) { 610 us->size = 0; 611 while (*p && isdigit(*p)) 612 us->size = us->size * 10 + (*p++ - '0'); 613 DEBUG(fprintf(stderr, "content length: [\033[1m%lld\033[m]\n", us->size)); 614 } 615 } 616 617 fclose(f); 618 return 0; 619 ouch: 620 _http_seterr(999); /* XXX do this properly RSN */ 621 fouch: 622 fclose(f); 623 return -1; 624} 625 626/* 627 * List a directory 628 */ 629struct url_ent * 630fetchListHTTP(struct url *url, char *flags) 631{ 632 warnx("fetchListHTTP(): not implemented"); 633 return NULL; 634} 635