http.c revision 60189
1/*- 2 * Copyright (c) 1998 Dag-Erling Co�dan Sm�rgrav 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer 10 * in this position and unchanged. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 3. The name of the author may not be used to endorse or promote products 15 * derived from this software without specific prior written permission 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 * 28 * $FreeBSD: head/lib/libfetch/http.c 60189 2000-05-07 20:01:55Z des $ 29 */ 30 31/* 32 * The base64 code in this file is based on code from MIT fetch, which 33 * has the following copyright and license: 34 * 35 *- 36 * Copyright 1997 Massachusetts Institute of Technology 37 * 38 * Permission to use, copy, modify, and distribute this software and 39 * its documentation for any purpose and without fee is hereby 40 * granted, provided that both the above copyright notice and this 41 * permission notice appear in all copies, that both the above 42 * copyright notice and this permission notice appear in all 43 * supporting documentation, and that the name of M.I.T. not be used 44 * in advertising or publicity pertaining to distribution of the 45 * software without specific, written prior permission. M.I.T. makes 46 * no representations about the suitability of this software for any 47 * purpose. It is provided "as is" without express or implied 48 * warranty. 49 * 50 * THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''. M.I.T. DISCLAIMS 51 * ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE, 52 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 53 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT 54 * SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 55 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 56 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF 57 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 58 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 59 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 60 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 61 * SUCH DAMAGE. */ 62 63#include <sys/param.h> 64 65#include <err.h> 66#include <ctype.h> 67#include <netdb.h> 68#include <stdarg.h> 69#include <stdio.h> 70#include <stdlib.h> 71#include <string.h> 72#include <unistd.h> 73 74#include "fetch.h" 75#include "common.h" 76#include "httperr.h" 77 78extern char *__progname; 79 80#define ENDL "\r\n" 81 82struct cookie 83{ 84 FILE *real_f; 85#define ENC_NONE 0 86#define ENC_CHUNKED 1 87 int encoding; /* 1 = chunked, 0 = none */ 88#define HTTPCTYPELEN 59 89 char content_type[HTTPCTYPELEN+1]; 90 char *buf; 91 int b_cur, eof; 92 unsigned b_len, chunksize; 93}; 94 95/* 96 * Send a formatted line; optionally echo to terminal 97 */ 98static int 99_http_cmd(FILE *f, char *fmt, ...) 100{ 101 va_list ap; 102 103 va_start(ap, fmt); 104 vfprintf(f, fmt, ap); 105#ifndef NDEBUG 106 fprintf(stderr, "\033[1m>>> "); 107 vfprintf(stderr, fmt, ap); 108 fprintf(stderr, "\033[m"); 109#endif 110 va_end(ap); 111 112 return 0; /* XXX */ 113} 114 115/* 116 * Fill the input buffer, do chunk decoding on the fly 117 */ 118static char * 119_http_fillbuf(struct cookie *c) 120{ 121 char *ln; 122 unsigned int len; 123 124 if (c->eof) 125 return NULL; 126 127 if (c->encoding == ENC_NONE) { 128 c->buf = fgetln(c->real_f, &(c->b_len)); 129 c->b_cur = 0; 130 } else if (c->encoding == ENC_CHUNKED) { 131 if (c->chunksize == 0) { 132 ln = fgetln(c->real_f, &len); 133 DEBUG(fprintf(stderr, "\033[1m_http_fillbuf(): new chunk: " 134 "%*.*s\033[m\n", (int)len-2, (int)len-2, ln)); 135 sscanf(ln, "%x", &(c->chunksize)); 136 if (!c->chunksize) { 137 DEBUG(fprintf(stderr, "\033[1m_http_fillbuf(): " 138 "end of last chunk\033[m\n")); 139 c->eof = 1; 140 return NULL; 141 } 142 DEBUG(fprintf(stderr, "\033[1m_http_fillbuf(): " 143 "new chunk: %X\033[m\n", c->chunksize)); 144 } 145 c->buf = fgetln(c->real_f, &(c->b_len)); 146 if (c->b_len > c->chunksize) 147 c->b_len = c->chunksize; 148 c->chunksize -= c->b_len; 149 c->b_cur = 0; 150 } 151 else return NULL; /* unknown encoding */ 152 return c->buf; 153} 154 155/* 156 * Read function 157 */ 158static int 159_http_readfn(struct cookie *c, char *buf, int len) 160{ 161 int l, pos = 0; 162 while (len) { 163 /* empty buffer */ 164 if (!c->buf || (c->b_cur == c->b_len)) 165 if (!_http_fillbuf(c)) 166 break; 167 168 l = c->b_len - c->b_cur; 169 if (len < l) l = len; 170 memcpy(buf + pos, c->buf + c->b_cur, l); 171 c->b_cur += l; 172 pos += l; 173 len -= l; 174 } 175 176 if (ferror(c->real_f)) 177 return -1; 178 else return pos; 179} 180 181/* 182 * Write function 183 */ 184static int 185_http_writefn(struct cookie *c, const char *buf, int len) 186{ 187 size_t r = fwrite(buf, 1, (size_t)len, c->real_f); 188 return r ? r : -1; 189} 190 191/* 192 * Close function 193 */ 194static int 195_http_closefn(struct cookie *c) 196{ 197 int r = fclose(c->real_f); 198 free(c); 199 return (r == EOF) ? -1 : 0; 200} 201 202/* 203 * Extract content type from cookie 204 */ 205char * 206fetchContentType(FILE *f) 207{ 208 /* 209 * We have no way of making sure this really *is* one of our cookies, 210 * so just check for a null pointer and hope for the best. 211 */ 212 return f->_cookie ? (((struct cookie *)f->_cookie)->content_type) : NULL; 213} 214 215/* 216 * Base64 encoding 217 */ 218int 219_http_base64(char *dst, char *src, int l) 220{ 221 static const char base64[] = 222 "ABCDEFGHIJKLMNOPQRSTUVWXYZ" 223 "abcdefghijklmnopqrstuvwxyz" 224 "0123456789+/"; 225 int t, r = 0; 226 227 while (l >= 3) { 228 t = (src[0] << 16) | (src[1] << 8) | src[2]; 229 dst[0] = base64[(t >> 18) & 0x3f]; 230 dst[1] = base64[(t >> 12) & 0x3f]; 231 dst[2] = base64[(t >> 6) & 0x3f]; 232 dst[3] = base64[(t >> 0) & 0x3f]; 233 src += 3; l -= 3; 234 dst += 4; r += 4; 235 } 236 237 switch (l) { 238 case 2: 239 t = (src[0] << 16) | (src[1] << 8); 240 dst[0] = base64[(t >> 18) & 0x3f]; 241 dst[1] = base64[(t >> 12) & 0x3f]; 242 dst[2] = base64[(t >> 6) & 0x3f]; 243 dst[3] = '='; 244 dst += 4; 245 r += 4; 246 break; 247 case 1: 248 t = src[0] << 16; 249 dst[0] = base64[(t >> 18) & 0x3f]; 250 dst[1] = base64[(t >> 12) & 0x3f]; 251 dst[2] = dst[3] = '='; 252 dst += 4; 253 r += 4; 254 break; 255 case 0: 256 break; 257 } 258 259 *dst = 0; 260 return r; 261} 262 263/* 264 * Encode username and password 265 */ 266char * 267_http_auth(char *usr, char *pwd) 268{ 269 int len, lu, lp; 270 char *str, *s; 271 272 lu = strlen(usr); 273 lp = strlen(pwd); 274 275 len = (lu * 4 + 2) / 3 /* user name, round up */ 276 + 1 /* colon */ 277 + (lp * 4 + 2) / 3 /* password, round up */ 278 + 1; /* null */ 279 280 if ((s = str = (char *)malloc(len)) == NULL) 281 return NULL; 282 283 s += _http_base64(s, usr, lu); 284 *s++ = ':'; 285 s += _http_base64(s, pwd, lp); 286 *s = 0; 287 288 return str; 289} 290 291/* 292 * Retrieve a file by HTTP 293 */ 294FILE * 295fetchGetHTTP(struct url *URL, char *flags) 296{ 297 int sd = -1, e, i, enc = ENC_NONE, direct, verbose; 298 struct cookie *c; 299 char *ln, *p, *px, *q; 300 FILE *f, *cf; 301 size_t len; 302 303 direct = (flags && strchr(flags, 'd')); 304 verbose = (flags && strchr(flags, 'v')); 305 306 /* allocate cookie */ 307 if ((c = calloc(1, sizeof *c)) == NULL) 308 return NULL; 309 310 /* check port */ 311 if (!URL->port) { 312 struct servent *se; 313 314 if ((se = getservbyname("http", "tcp")) != NULL) 315 URL->port = ntohs(se->s_port); 316 else 317 URL->port = 80; 318 } 319 320 /* attempt to connect to proxy server */ 321 if (!direct && (px = getenv("HTTP_PROXY")) != NULL) { 322 char host[MAXHOSTNAMELEN]; 323 int port = 0; 324 325 /* measure length */ 326 len = strcspn(px, ":"); 327 328 /* get port (XXX atoi is a little too tolerant perhaps?) */ 329 if (px[len] == ':') { 330 if (strspn(px+len+1, "0123456789") != strlen(px+len+1) 331 || strlen(px+len+1) > 5) { 332 /* XXX we should emit some kind of warning */ 333 } 334 port = atoi(px+len+1); 335 if (port < 1 || port > 65535) { 336 /* XXX we should emit some kind of warning */ 337 } 338 } 339 if (!port) { 340#if 0 341 /* 342 * commented out, since there is currently no service name 343 * for HTTP proxies 344 */ 345 struct servent *se; 346 347 if ((se = getservbyname("xxxx", "tcp")) != NULL) 348 port = ntohs(se->s_port); 349 else 350#endif 351 port = 3128; 352 } 353 354 /* get host name */ 355 if (len >= MAXHOSTNAMELEN) 356 len = MAXHOSTNAMELEN - 1; 357 strncpy(host, px, len); 358 host[len] = 0; 359 360 /* connect */ 361 sd = _fetch_connect(host, port, verbose); 362 } 363 364 /* if no proxy is configured or could be contacted, try direct */ 365 if (sd == -1) { 366 if ((sd = _fetch_connect(URL->host, URL->port, verbose)) == -1) 367 goto ouch; 368 } 369 370 /* reopen as stream */ 371 if ((f = fdopen(sd, "r+")) == NULL) 372 goto ouch; 373 c->real_f = f; 374 375 /* send request (proxies require absolute form, so use that) */ 376 if (verbose) 377 _fetch_info("requesting http://%s:%d%s", 378 URL->host, URL->port, URL->doc); 379 _http_cmd(f, "GET http://%s:%d%s HTTP/1.1" ENDL, 380 URL->host, URL->port, URL->doc); 381 382 /* start sending headers away */ 383 if (URL->user[0] || URL->pwd[0]) { 384 char *auth_str = _http_auth(URL->user, URL->pwd); 385 if (!auth_str) 386 goto fouch; 387 _http_cmd(f, "Authorization: Basic %s" ENDL, auth_str); 388 free(auth_str); 389 } 390 _http_cmd(f, "Host: %s:%d" ENDL, URL->host, URL->port); 391 _http_cmd(f, "User-Agent: %s " _LIBFETCH_VER ENDL, __progname); 392 _http_cmd(f, "Connection: close" ENDL ENDL); 393 394 /* get response */ 395 if ((ln = fgetln(f, &len)) == NULL) 396 goto fouch; 397 DEBUG(fprintf(stderr, "response: [\033[1m%*.*s\033[m]\n", 398 (int)len-2, (int)len-2, ln)); 399 400 /* we can't use strchr() and friends since ln isn't NUL-terminated */ 401 p = ln; 402 while ((p < ln + len) && !isspace(*p)) 403 p++; 404 while ((p < ln + len) && !isdigit(*p)) 405 p++; 406 if (!isdigit(*p)) 407 goto fouch; 408 e = atoi(p); 409 DEBUG(fprintf(stderr, "code: [\033[1m%d\033[m]\n", e)); 410 411 /* add code to handle redirects later */ 412 if (e != 200) { 413 _http_seterr(e); 414 goto fouch; 415 } 416 417 /* browse through header */ 418 while (1) { 419 if ((ln = fgetln(f, &len)) == NULL) 420 goto fouch; 421 if ((ln[0] == '\r') || (ln[0] == '\n')) 422 break; 423 DEBUG(fprintf(stderr, "header: [\033[1m%*.*s\033[m]\n", 424 (int)len-2, (int)len-2, ln)); 425#define XFERENC "Transfer-Encoding:" 426 if (strncasecmp(ln, XFERENC, sizeof XFERENC - 1) == 0) { 427 p = ln + sizeof XFERENC - 1; 428 while ((p < ln + len) && isspace(*p)) 429 p++; 430 for (q = p; (q < ln + len) && !isspace(*q); q++) 431 /* VOID */ ; 432 *q = 0; 433 if (strcasecmp(p, "chunked") == 0) 434 enc = ENC_CHUNKED; 435 DEBUG(fprintf(stderr, "xferenc: [\033[1m%s\033[m]\n", p)); 436#undef XFERENC 437#define CONTTYPE "Content-Type:" 438 } else if (strncasecmp(ln, CONTTYPE, sizeof CONTTYPE - 1) == 0) { 439 p = ln + sizeof CONTTYPE - 1; 440 while ((p < ln + len) && isspace(*p)) 441 p++; 442 for (i = 0; p < ln + len; p++) 443 if (i < HTTPCTYPELEN) 444 c->content_type[i++] = *p; 445 do c->content_type[i--] = 0; while (isspace(c->content_type[i])); 446 DEBUG(fprintf(stderr, "conttype: [\033[1m%s\033[m]\n", 447 c->content_type)); 448#undef CONTTYPE 449 } 450 } 451 452 /* only body remains */ 453 c->encoding = enc; 454 cf = funopen(c, 455 (int (*)(void *, char *, int))_http_readfn, 456 (int (*)(void *, const char *, int))_http_writefn, 457 (fpos_t (*)(void *, fpos_t, int))NULL, 458 (int (*)(void *))_http_closefn); 459 if (cf == NULL) 460 goto fouch; 461 462 return cf; 463 464ouch: 465 if (sd >= 0) 466 close(sd); 467 free(c); 468 _http_seterr(999); /* XXX do this properly RSN */ 469 return NULL; 470fouch: 471 fclose(f); 472 free(c); 473 _http_seterr(999); /* XXX do this properly RSN */ 474 return NULL; 475} 476 477FILE * 478fetchPutHTTP(struct url *URL, char *flags) 479{ 480 warnx("fetchPutHTTP(): not implemented"); 481 return NULL; 482} 483 484/* 485 * Get an HTTP document's metadata 486 */ 487int 488fetchStatHTTP(struct url *url, struct url_stat *us, char *flags) 489{ 490 warnx("fetchStatHTTP(): not implemented"); 491 return -1; 492} 493 494/* 495 * List a directory 496 */ 497struct url_ent * 498fetchListHTTP(struct url *url, char *flags) 499{ 500 warnx("fetchListHTTP(): not implemented"); 501 return NULL; 502} 503