http.c revision 60581
1/*- 2 * Copyright (c) 1998 Dag-Erling Co�dan Sm�rgrav 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer 10 * in this position and unchanged. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 3. The name of the author may not be used to endorse or promote products 15 * derived from this software without specific prior written permission 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 * 28 * $FreeBSD: head/lib/libfetch/http.c 60581 2000-05-15 08:24:29Z des $ 29 */ 30 31/* 32 * The base64 code in this file is based on code from MIT fetch, which 33 * has the following copyright and license: 34 * 35 *- 36 * Copyright 1997 Massachusetts Institute of Technology 37 * 38 * Permission to use, copy, modify, and distribute this software and 39 * its documentation for any purpose and without fee is hereby 40 * granted, provided that both the above copyright notice and this 41 * permission notice appear in all copies, that both the above 42 * copyright notice and this permission notice appear in all 43 * supporting documentation, and that the name of M.I.T. not be used 44 * in advertising or publicity pertaining to distribution of the 45 * software without specific, written prior permission. M.I.T. makes 46 * no representations about the suitability of this software for any 47 * purpose. It is provided "as is" without express or implied 48 * warranty. 49 * 50 * THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''. M.I.T. DISCLAIMS 51 * ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE, 52 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 53 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT 54 * SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 55 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 56 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF 57 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 58 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 59 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 60 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 61 * SUCH DAMAGE. */ 62 63#include <sys/param.h> 64 65#include <err.h> 66#include <ctype.h> 67#include <locale.h> 68#include <netdb.h> 69#include <stdarg.h> 70#include <stdio.h> 71#include <stdlib.h> 72#include <string.h> 73#include <time.h> 74#include <unistd.h> 75 76#include "fetch.h" 77#include "common.h" 78#include "httperr.h" 79 80extern char *__progname; 81 82#define ENDL "\r\n" 83 84#define HTTP_OK 200 85#define HTTP_PARTIAL 206 86 87struct cookie 88{ 89 FILE *real_f; 90#define ENC_NONE 0 91#define ENC_CHUNKED 1 92 int encoding; /* 1 = chunked, 0 = none */ 93#define HTTPCTYPELEN 59 94 char content_type[HTTPCTYPELEN+1]; 95 char *buf; 96 int b_cur, eof; 97 unsigned b_len, chunksize; 98}; 99 100/* 101 * Send a formatted line; optionally echo to terminal 102 */ 103static int 104_http_cmd(FILE *f, char *fmt, ...) 105{ 106 va_list ap; 107 108 va_start(ap, fmt); 109 vfprintf(f, fmt, ap); 110#ifndef NDEBUG 111 fprintf(stderr, "\033[1m>>> "); 112 vfprintf(stderr, fmt, ap); 113 fprintf(stderr, "\033[m"); 114#endif 115 va_end(ap); 116 117 return 0; /* XXX */ 118} 119 120/* 121 * Fill the input buffer, do chunk decoding on the fly 122 */ 123static char * 124_http_fillbuf(struct cookie *c) 125{ 126 char *ln; 127 unsigned int len; 128 129 if (c->eof) 130 return NULL; 131 132 if (c->encoding == ENC_NONE) { 133 c->buf = fgetln(c->real_f, &(c->b_len)); 134 c->b_cur = 0; 135 } else if (c->encoding == ENC_CHUNKED) { 136 if (c->chunksize == 0) { 137 ln = fgetln(c->real_f, &len); 138 DEBUG(fprintf(stderr, "\033[1m_http_fillbuf(): new chunk: " 139 "%*.*s\033[m\n", (int)len-2, (int)len-2, ln)); 140 sscanf(ln, "%x", &(c->chunksize)); 141 if (!c->chunksize) { 142 DEBUG(fprintf(stderr, "\033[1m_http_fillbuf(): " 143 "end of last chunk\033[m\n")); 144 c->eof = 1; 145 return NULL; 146 } 147 DEBUG(fprintf(stderr, "\033[1m_http_fillbuf(): " 148 "new chunk: %X\033[m\n", c->chunksize)); 149 } 150 c->buf = fgetln(c->real_f, &(c->b_len)); 151 if (c->b_len > c->chunksize) 152 c->b_len = c->chunksize; 153 c->chunksize -= c->b_len; 154 c->b_cur = 0; 155 } 156 else return NULL; /* unknown encoding */ 157 return c->buf; 158} 159 160/* 161 * Read function 162 */ 163static int 164_http_readfn(struct cookie *c, char *buf, int len) 165{ 166 int l, pos = 0; 167 while (len) { 168 /* empty buffer */ 169 if (!c->buf || (c->b_cur == c->b_len)) 170 if (!_http_fillbuf(c)) 171 break; 172 173 l = c->b_len - c->b_cur; 174 if (len < l) l = len; 175 memcpy(buf + pos, c->buf + c->b_cur, l); 176 c->b_cur += l; 177 pos += l; 178 len -= l; 179 } 180 181 if (ferror(c->real_f)) 182 return -1; 183 else return pos; 184} 185 186/* 187 * Write function 188 */ 189static int 190_http_writefn(struct cookie *c, const char *buf, int len) 191{ 192 size_t r = fwrite(buf, 1, (size_t)len, c->real_f); 193 return r ? r : -1; 194} 195 196/* 197 * Close function 198 */ 199static int 200_http_closefn(struct cookie *c) 201{ 202 int r = fclose(c->real_f); 203 free(c); 204 return (r == EOF) ? -1 : 0; 205} 206 207/* 208 * Extract content type from cookie 209 */ 210char * 211fetchContentType(FILE *f) 212{ 213 /* 214 * We have no way of making sure this really *is* one of our cookies, 215 * so just check for a null pointer and hope for the best. 216 */ 217 return f->_cookie ? (((struct cookie *)f->_cookie)->content_type) : NULL; 218} 219 220/* 221 * Base64 encoding 222 */ 223int 224_http_base64(char *dst, char *src, int l) 225{ 226 static const char base64[] = 227 "ABCDEFGHIJKLMNOPQRSTUVWXYZ" 228 "abcdefghijklmnopqrstuvwxyz" 229 "0123456789+/"; 230 int t, r = 0; 231 232 while (l >= 3) { 233 t = (src[0] << 16) | (src[1] << 8) | src[2]; 234 dst[0] = base64[(t >> 18) & 0x3f]; 235 dst[1] = base64[(t >> 12) & 0x3f]; 236 dst[2] = base64[(t >> 6) & 0x3f]; 237 dst[3] = base64[(t >> 0) & 0x3f]; 238 src += 3; l -= 3; 239 dst += 4; r += 4; 240 } 241 242 switch (l) { 243 case 2: 244 t = (src[0] << 16) | (src[1] << 8); 245 dst[0] = base64[(t >> 18) & 0x3f]; 246 dst[1] = base64[(t >> 12) & 0x3f]; 247 dst[2] = base64[(t >> 6) & 0x3f]; 248 dst[3] = '='; 249 dst += 4; 250 r += 4; 251 break; 252 case 1: 253 t = src[0] << 16; 254 dst[0] = base64[(t >> 18) & 0x3f]; 255 dst[1] = base64[(t >> 12) & 0x3f]; 256 dst[2] = dst[3] = '='; 257 dst += 4; 258 r += 4; 259 break; 260 case 0: 261 break; 262 } 263 264 *dst = 0; 265 return r; 266} 267 268/* 269 * Encode username and password 270 */ 271char * 272_http_auth(char *usr, char *pwd) 273{ 274 int len, lu, lp; 275 char *str, *s; 276 277 lu = strlen(usr); 278 lp = strlen(pwd); 279 280 len = (lu * 4 + 2) / 3 /* user name, round up */ 281 + 1 /* colon */ 282 + (lp * 4 + 2) / 3 /* password, round up */ 283 + 1; /* null */ 284 285 if ((s = str = (char *)malloc(len)) == NULL) 286 return NULL; 287 288 s += _http_base64(s, usr, lu); 289 *s++ = ':'; 290 s += _http_base64(s, pwd, lp); 291 *s = 0; 292 293 return str; 294} 295 296/* 297 * Connect to server or proxy 298 */ 299FILE * 300_http_connect(struct url *URL, char *flags) 301{ 302 int direct, sd = -1, verbose; 303 size_t len; 304 char *px; 305 FILE *f; 306 307 direct = (flags && strchr(flags, 'd')); 308 verbose = (flags && strchr(flags, 'v')); 309 310 /* check port */ 311 if (!URL->port) { 312 struct servent *se; 313 314 if ((se = getservbyname("http", "tcp")) != NULL) 315 URL->port = ntohs(se->s_port); 316 else 317 URL->port = 80; 318 } 319 320 /* attempt to connect to proxy server */ 321 if (!direct && (px = getenv("HTTP_PROXY")) != NULL) { 322 char host[MAXHOSTNAMELEN]; 323 int port = 0; 324 325 /* measure length */ 326 len = strcspn(px, ":"); 327 328 /* get port (XXX atoi is a little too tolerant perhaps?) */ 329 if (px[len] == ':') { 330 if (strspn(px+len+1, "0123456789") != strlen(px+len+1) 331 || strlen(px+len+1) > 5) { 332 /* XXX we should emit some kind of warning */ 333 } 334 port = atoi(px+len+1); 335 if (port < 1 || port > 65535) { 336 /* XXX we should emit some kind of warning */ 337 } 338 } 339 if (!port) { 340#if 0 341 /* 342 * commented out, since there is currently no service name 343 * for HTTP proxies 344 */ 345 struct servent *se; 346 347 if ((se = getservbyname("xxxx", "tcp")) != NULL) 348 port = ntohs(se->s_port); 349 else 350#endif 351 port = 3128; 352 } 353 354 /* get host name */ 355 if (len >= MAXHOSTNAMELEN) 356 len = MAXHOSTNAMELEN - 1; 357 strncpy(host, px, len); 358 host[len] = 0; 359 360 /* connect */ 361 sd = _fetch_connect(host, port, verbose); 362 } 363 364 /* if no proxy is configured or could be contacted, try direct */ 365 if (sd == -1) { 366 if ((sd = _fetch_connect(URL->host, URL->port, verbose)) == -1) 367 goto ouch; 368 } 369 370 /* reopen as stream */ 371 if ((f = fdopen(sd, "r+")) == NULL) 372 goto ouch; 373 374 return f; 375 376ouch: 377 if (sd >= 0) 378 close(sd); 379 _http_seterr(999); /* XXX do this properly RSN */ 380 return NULL; 381} 382 383/* 384 * Send a HEAD or GET request 385 */ 386int 387_http_request(FILE *f, char *op, struct url *URL, char *flags) 388{ 389 int e, verbose; 390 char *ln, *p; 391 size_t len; 392 393 verbose = (flags && strchr(flags, 'v')); 394 395 /* send request (proxies require absolute form, so use that) */ 396 if (verbose) 397 _fetch_info("requesting http://%s:%d%s", 398 URL->host, URL->port, URL->doc); 399 _http_cmd(f, "%s %s://%s:%d%s HTTP/1.1" ENDL, 400 op, URL->scheme, URL->host, URL->port, URL->doc); 401 402 /* start sending headers away */ 403 if (URL->user[0] || URL->pwd[0]) { 404 char *auth_str = _http_auth(URL->user, URL->pwd); 405 if (!auth_str) 406 return 999; /* XXX wrong */ 407 _http_cmd(f, "Authorization: Basic %s" ENDL, auth_str); 408 free(auth_str); 409 } 410 _http_cmd(f, "Host: %s:%d" ENDL, URL->host, URL->port); 411 _http_cmd(f, "User-Agent: %s " _LIBFETCH_VER ENDL, __progname); 412 if (URL->offset) 413 _http_cmd(f, "Range: bytes=%lld-" ENDL, URL->offset); 414 _http_cmd(f, "Connection: close" ENDL ENDL); 415 416 /* get response */ 417 if ((ln = fgetln(f, &len)) == NULL) 418 return 999; 419 DEBUG(fprintf(stderr, "response: [\033[1m%*.*s\033[m]\n", 420 (int)len-2, (int)len-2, ln)); 421 422 /* we can't use strchr() and friends since ln isn't NUL-terminated */ 423 p = ln; 424 while ((p < ln + len) && !isspace(*p)) 425 p++; 426 while ((p < ln + len) && !isdigit(*p)) 427 p++; 428 if (!isdigit(*p)) 429 return 999; 430 431 e = atoi(p); 432 DEBUG(fprintf(stderr, "code: [\033[1m%d\033[m]\n", e)); 433 return e; 434} 435 436/* 437 * Check a header line 438 */ 439char * 440_http_match(char *str, char *hdr) 441{ 442 while (*str && *hdr && tolower(*str++) == tolower(*hdr++)) 443 /* nothing */; 444 if (*str || *hdr != ':') 445 return NULL; 446 while (*hdr && isspace(*++hdr)) 447 /* nothing */; 448 return hdr; 449} 450 451/* 452 * Retrieve a file by HTTP 453 */ 454FILE * 455fetchGetHTTP(struct url *URL, char *flags) 456{ 457 int e, enc = ENC_NONE, i; 458 struct cookie *c; 459 char *ln, *p, *q; 460 FILE *f, *cf; 461 size_t len; 462 off_t pos = 0; 463 464 /* allocate cookie */ 465 if ((c = calloc(1, sizeof *c)) == NULL) 466 return NULL; 467 468 /* connect */ 469 if ((f = _http_connect(URL, flags)) == NULL) { 470 free(c); 471 return NULL; 472 } 473 c->real_f = f; 474 475 e = _http_request(f, "GET", URL, flags); 476 477 /* add code to handle redirects later */ 478 if (e != (URL->offset ? HTTP_PARTIAL : HTTP_OK)) { 479 _http_seterr(e); 480 goto fouch; 481 } 482 483 /* browse through header */ 484 while (1) { 485 if ((ln = fgetln(f, &len)) == NULL) 486 goto fouch; 487 if ((ln[0] == '\r') || (ln[0] == '\n')) 488 break; 489 while (isspace(ln[len-1])) 490 --len; 491 ln[len] = '\0'; /* XXX */ 492 DEBUG(fprintf(stderr, "header: [\033[1m%s\033[m]\n", ln)); 493 if ((p = _http_match("Transfer-Encoding", ln)) != NULL) { 494 for (q = p; *q && !isspace(*q); q++) 495 /* VOID */ ; 496 *q = 0; 497 if (strcasecmp(p, "chunked") == 0) 498 enc = ENC_CHUNKED; 499 DEBUG(fprintf(stderr, "transfer encoding: [\033[1m%s\033[m]\n", p)); 500 } else if ((p = _http_match("Content-Type", ln)) != NULL) { 501 for (i = 0; *p && i < HTTPCTYPELEN; p++, i++) 502 c->content_type[i] = *p; 503 do c->content_type[i--] = 0; while (isspace(c->content_type[i])); 504 DEBUG(fprintf(stderr, "content type: [\033[1m%s\033[m]\n", 505 c->content_type)); 506 } else if ((p = _http_match("Content-Range", ln)) != NULL) { 507 if (strncasecmp(p, "bytes ", 6) != 0) 508 goto fouch; 509 p += 6; 510 while (*p && isdigit(*p)) 511 pos = pos * 10 + (*p++ - '0'); 512 /* XXX wouldn't hurt to be slightly more paranoid here */ 513 DEBUG(fprintf(stderr, "content range: [\033[1m%lld-\033[m]\n", pos)); 514 if (pos > URL->offset) 515 goto fouch; 516 } 517 } 518 519 /* only body remains */ 520 c->encoding = enc; 521 cf = funopen(c, 522 (int (*)(void *, char *, int))_http_readfn, 523 (int (*)(void *, const char *, int))_http_writefn, 524 (fpos_t (*)(void *, fpos_t, int))NULL, 525 (int (*)(void *))_http_closefn); 526 if (cf == NULL) 527 goto fouch; 528 529 while (pos < URL->offset) 530 if (fgetc(cf) == EOF) 531 goto cfouch; 532 533 return cf; 534 535fouch: 536 fclose(f); 537 free(c); 538 _http_seterr(999); /* XXX do this properly RSN */ 539 return NULL; 540cfouch: 541 fclose(cf); 542 _http_seterr(999); /* XXX do this properly RSN */ 543 return NULL; 544} 545 546FILE * 547fetchPutHTTP(struct url *URL, char *flags) 548{ 549 warnx("fetchPutHTTP(): not implemented"); 550 return NULL; 551} 552 553/* 554 * Get an HTTP document's metadata 555 */ 556int 557fetchStatHTTP(struct url *URL, struct url_stat *us, char *flags) 558{ 559 int e; 560 size_t len; 561 char *ln, *p; 562 FILE *f; 563 564 us->size = -1; 565 us->atime = us->mtime = 0; 566 567 /* connect */ 568 if ((f = _http_connect(URL, flags)) == NULL) 569 return -1; 570 571 if ((e = _http_request(f, "HEAD", URL, flags)) != HTTP_OK) { 572 _http_seterr(e); 573 goto ouch; 574 } 575 576 while (1) { 577 if ((ln = fgetln(f, &len)) == NULL) 578 goto fouch; 579 if ((ln[0] == '\r') || (ln[0] == '\n')) 580 break; 581 while (isspace(ln[len-1])) 582 --len; 583 ln[len] = '\0'; /* XXX */ 584 DEBUG(fprintf(stderr, "header: [\033[1m%s\033[m]\n", ln)); 585 if ((p = _http_match("Last-Modified", ln)) != NULL) { 586 struct tm tm; 587 char locale[64]; 588 589 strncpy(locale, setlocale(LC_TIME, NULL), sizeof locale); 590 setlocale(LC_TIME, "C"); 591 strptime(p, "%a, %d %b %Y %H:%M:%S GMT", &tm); 592 /* XXX should add support for date-2 and date-3 */ 593 setlocale(LC_TIME, locale); 594 us->atime = us->mtime = timegm(&tm); 595 DEBUG(fprintf(stderr, "last modified: [\033[1m%04d-%02d-%02d " 596 "%02d:%02d:%02d\033[m]\n", 597 tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday, 598 tm.tm_hour, tm.tm_min, tm.tm_sec)); 599 } else if ((p = _http_match("Content-Length", ln)) != NULL) { 600 us->size = 0; 601 while (*p && isdigit(*p)) 602 us->size = us->size * 10 + (*p++ - '0'); 603 DEBUG(fprintf(stderr, "content length: [\033[1m%lld\033[m]\n", us->size)); 604 } 605 } 606 607 fclose(f); 608 return 0; 609 ouch: 610 _http_seterr(999); /* XXX do this properly RSN */ 611 fouch: 612 fclose(f); 613 return -1; 614} 615 616/* 617 * List a directory 618 */ 619struct url_ent * 620fetchListHTTP(struct url *url, char *flags) 621{ 622 warnx("fetchListHTTP(): not implemented"); 623 return NULL; 624} 625