http.c revision 40975
1/*- 2 * Copyright (c) 1998 Dag-Erling Co�dan Sm�rgrav 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer 10 * in this position and unchanged. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 3. The name of the author may not be used to endorse or promote products 15 * derived from this software without specific prior written permission 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 * 28 * $Id: http.c,v 1.6 1998/11/05 19:48:17 des Exp $ 29 */ 30 31/* 32 * The base64 code in this file is based on code from MIT fetch, which 33 * has the following copyright and license: 34 * 35 *- 36 * Copyright 1997 Massachusetts Institute of Technology 37 * 38 * Permission to use, copy, modify, and distribute this software and 39 * its documentation for any purpose and without fee is hereby 40 * granted, provided that both the above copyright notice and this 41 * permission notice appear in all copies, that both the above 42 * copyright notice and this permission notice appear in all 43 * supporting documentation, and that the name of M.I.T. not be used 44 * in advertising or publicity pertaining to distribution of the 45 * software without specific, written prior permission. M.I.T. makes 46 * no representations about the suitability of this software for any 47 * purpose. It is provided "as is" without express or implied 48 * warranty. 49 * 50 * THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''. M.I.T. DISCLAIMS 51 * ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE, 52 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 53 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT 54 * SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 55 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 56 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF 57 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 58 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 59 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 60 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 61 * SUCH DAMAGE. */ 62 63#include <sys/param.h> 64#include <sys/errno.h> 65#include <sys/socket.h> 66#include <sys/types.h> 67 68#include <netinet/in.h> 69 70#include <err.h> 71#include <ctype.h> 72#include <netdb.h> 73#include <stdarg.h> 74#include <stdio.h> 75#include <stdlib.h> 76#include <string.h> 77#include <unistd.h> 78 79#include "fetch.h" 80#include "common.h" 81#include "httperr.inc" 82 83#ifndef NDEBUG 84#define DEBUG(x) do x; while (0) 85#else 86#define DEBUG(x) do { } while (0) 87#endif 88 89extern char *__progname; 90 91#define ENDL "\r\n" 92 93struct cookie 94{ 95 FILE *real_f; 96#define ENC_NONE 0 97#define ENC_CHUNKED 1 98 int encoding; /* 1 = chunked, 0 = none */ 99#define HTTPCTYPELEN 59 100 char content_type[HTTPCTYPELEN+1]; 101 char *buf; 102 int b_cur, eof; 103 unsigned b_len, chunksize; 104}; 105 106/* 107 * Send a formatted line; optionally echo to terminal 108 */ 109static int 110_http_cmd(FILE *f, char *fmt, ...) 111{ 112 va_list ap; 113 114 va_start(ap, fmt); 115 vfprintf(f, fmt, ap); 116#ifndef NDEBUG 117 fprintf(stderr, "\033[1m>>> "); 118 vfprintf(stderr, fmt, ap); 119 fprintf(stderr, "\033[m"); 120#endif 121 va_end(ap); 122 123 return 0; /* XXX */ 124} 125 126/* 127 * Fill the input buffer, do chunk decoding on the fly 128 */ 129static char * 130_http_fillbuf(struct cookie *c) 131{ 132 char *ln; 133 unsigned int len; 134 135 if (c->eof) 136 return NULL; 137 138 if (c->encoding == ENC_NONE) { 139 c->buf = fgetln(c->real_f, &(c->b_len)); 140 c->b_cur = 0; 141 } else if (c->encoding == ENC_CHUNKED) { 142 if (c->chunksize == 0) { 143 ln = fgetln(c->real_f, &len); 144 DEBUG(fprintf(stderr, "\033[1m_http_fillbuf(): new chunk: " 145 "%*.*s\033[m\n", (int)len-2, (int)len-2, ln)); 146 sscanf(ln, "%x", &(c->chunksize)); 147 if (!c->chunksize) { 148 DEBUG(fprintf(stderr, "\033[1m_http_fillbuf(): " 149 "end of last chunk\033[m\n")); 150 c->eof = 1; 151 return NULL; 152 } 153 DEBUG(fprintf(stderr, "\033[1m_http_fillbuf(): " 154 "new chunk: %X\033[m\n", c->chunksize)); 155 } 156 c->buf = fgetln(c->real_f, &(c->b_len)); 157 if (c->b_len > c->chunksize) 158 c->b_len = c->chunksize; 159 c->chunksize -= c->b_len; 160 c->b_cur = 0; 161 } 162 else return NULL; /* unknown encoding */ 163 return c->buf; 164} 165 166/* 167 * Read function 168 */ 169static int 170_http_readfn(struct cookie *c, char *buf, int len) 171{ 172 int l, pos = 0; 173 while (len) { 174 /* empty buffer */ 175 if (!c->buf || (c->b_cur == c->b_len)) 176 if (!_http_fillbuf(c)) 177 break; 178 179 l = c->b_len - c->b_cur; 180 if (len < l) l = len; 181 memcpy(buf + pos, c->buf + c->b_cur, l); 182 c->b_cur += l; 183 pos += l; 184 len -= l; 185 } 186 187 if (ferror(c->real_f)) 188 return -1; 189 else return pos; 190} 191 192/* 193 * Write function 194 */ 195static int 196_http_writefn(struct cookie *c, const char *buf, int len) 197{ 198 size_t r = fwrite(buf, 1, (size_t)len, c->real_f); 199 return r ? r : -1; 200} 201 202/* 203 * Close function 204 */ 205static int 206_http_closefn(struct cookie *c) 207{ 208 int r = fclose(c->real_f); 209 free(c); 210 return (r == EOF) ? -1 : 0; 211} 212 213/* 214 * Extract content type from cookie 215 */ 216char * 217fetchContentType(FILE *f) 218{ 219 /* 220 * We have no way of making sure this really *is* one of our cookies, 221 * so just check for a null pointer and hope for the best. 222 */ 223 return f->_cookie ? (((struct cookie *)f->_cookie)->content_type) : NULL; 224} 225 226/* 227 * Base64 encoding 228 */ 229int 230_http_base64(char *dst, char *src, int l) 231{ 232 static const char base64[] = 233 "ABCDEFGHIJKLMNOPQRSTUVWXYZ" 234 "abcdefghijklmnopqrstuvwxyz" 235 "0123456789+/"; 236 int t, r = 0; 237 238 while (l >= 3) { 239 t = (src[0] << 16) | (src[1] << 8) | src[2]; 240 dst[0] = base64[(t >> 18) & 0x3f]; 241 dst[1] = base64[(t >> 12) & 0x3f]; 242 dst[2] = base64[(t >> 6) & 0x3f]; 243 dst[3] = base64[(t >> 0) & 0x3f]; 244 src += 3; l -= 3; 245 dst += 4; r += 4; 246 } 247 248 switch (l) { 249 case 2: 250 t = (src[0] << 16) | (src[1] << 8); 251 dst[0] = base64[(t >> 18) & 0x3f]; 252 dst[1] = base64[(t >> 12) & 0x3f]; 253 dst[2] = base64[(t >> 6) & 0x3f]; 254 dst[3] = '='; 255 dst += 4; 256 r += 4; 257 break; 258 case 1: 259 t = src[0] << 16; 260 dst[0] = base64[(t >> 18) & 0x3f]; 261 dst[1] = base64[(t >> 12) & 0x3f]; 262 dst[2] = dst[3] = '='; 263 dst += 4; 264 r += 4; 265 break; 266 case 0: 267 break; 268 } 269 270 *dst = 0; 271 return r; 272} 273 274/* 275 * Encode username and password 276 */ 277char * 278_http_auth(char *usr, char *pwd) 279{ 280 int len, lu, lp; 281 char *str, *s; 282 283 lu = strlen(usr); 284 lp = strlen(pwd); 285 286 len = (lu * 4 + 2) / 3 /* user name, round up */ 287 + 1 /* colon */ 288 + (lp * 4 + 2) / 3 /* password, round up */ 289 + 1; /* null */ 290 291 if ((s = str = (char *)malloc(len)) == NULL) 292 return NULL; 293 294 s += _http_base64(s, usr, lu); 295 *s++ = ':'; 296 s += _http_base64(s, pwd, lp); 297 *s = 0; 298 299 return str; 300} 301 302/* 303 * Retrieve a file by HTTP 304 */ 305FILE * 306fetchGetHTTP(struct url *URL, char *flags) 307{ 308 int sd = -1, err, i, enc = ENC_NONE; 309 struct cookie *c; 310 char *ln, *p, *q; 311 FILE *f, *cf; 312 size_t len; 313 314 /* allocate cookie */ 315 if ((c = calloc(1, sizeof(struct cookie))) == NULL) 316 return NULL; 317 318 /* check port */ 319 if (!URL->port) 320 URL->port = 80; /* default HTTP port */ 321 322 /* attempt to connect to proxy server */ 323 if (getenv("HTTP_PROXY")) { 324 char *px, host[MAXHOSTNAMELEN]; 325 int port = 3128; /* XXX I think 3128 is default... check? */ 326 size_t len; 327 328 /* measure length */ 329 px = getenv("HTTP_PROXY"); 330 len = strcspn(px, ":"); 331 332 /* get port (atoi is a little too tolerant perhaps?) */ 333 if (px[len] == ':') 334 port = atoi(px+len+1); 335 336 /* get host name */ 337 if (len >= MAXHOSTNAMELEN) 338 len = MAXHOSTNAMELEN - 1; 339 strncpy(host, px, len); 340 host[len] = 0; 341 342 /* connect */ 343 sd = fetchConnect(host, port); 344 } 345 346 /* if no proxy is configured or could be contacted, try direct */ 347 if (sd == -1) { 348 if ((sd = fetchConnect(URL->host, URL->port)) == -1) 349 goto ouch; 350 } 351 352 /* reopen as stream */ 353 if ((f = fdopen(sd, "r+")) == NULL) 354 goto ouch; 355 c->real_f = f; 356 357 /* send request (proxies require absolute form, so use that) */ 358 _http_cmd(f, "GET http://%s:%d%s HTTP/1.1" ENDL, 359 URL->host, URL->port, URL->doc); 360 361 /* start sending headers away */ 362 if (URL->user[0] || URL->pwd[0]) { 363 char *auth_str = _http_auth(URL->user, URL->pwd); 364 if (!auth_str) 365 goto fouch; 366 _http_cmd(f, "Authorization: Basic %s" ENDL, auth_str); 367 free(auth_str); 368 } 369 _http_cmd(f, "Host: %s:%d" ENDL, URL->host, URL->port); 370 _http_cmd(f, "User-Agent: %s " _LIBFETCH_VER ENDL, __progname); 371 _http_cmd(f, "Connection: close" ENDL ENDL); 372 373 /* get response */ 374 if ((ln = fgetln(f, &len)) == NULL) 375 goto fouch; 376 DEBUG(fprintf(stderr, "response: [\033[1m%*.*s\033[m]\n", 377 (int)len-2, (int)len-2, ln)); 378 379 /* we can't use strchr() and friends since ln isn't NUL-terminated */ 380 p = ln; 381 while ((p < ln + len) && !isspace(*p)) 382 p++; 383 while ((p < ln + len) && !isdigit(*p)) 384 p++; 385 if (!isdigit(*p)) 386 goto fouch; 387 err = atoi(p); 388 DEBUG(fprintf(stderr, "code: [\033[1m%d\033[m]\n", err)); 389 390 /* add code to handle redirects later */ 391 if (err != 200) { 392 _http_seterr(err); 393 goto fouch; 394 } 395 396 /* browse through header */ 397 while (1) { 398 if ((ln = fgetln(f, &len)) == NULL) 399 goto fouch; 400 if ((ln[0] == '\r') || (ln[0] == '\n')) 401 break; 402 DEBUG(fprintf(stderr, "header: [\033[1m%*.*s\033[m]\n", 403 (int)len-2, (int)len-2, ln)); 404#define XFERENC "Transfer-Encoding:" 405 if (strncasecmp(ln, XFERENC, sizeof(XFERENC)-1) == 0) { 406 p = ln + sizeof(XFERENC) - 1; 407 while ((p < ln + len) && isspace(*p)) 408 p++; 409 for (q = p; (q < ln + len) && !isspace(*q); q++) 410 /* VOID */ ; 411 *q = 0; 412 if (strcasecmp(p, "chunked") == 0) 413 enc = ENC_CHUNKED; 414 DEBUG(fprintf(stderr, "xferenc: [\033[1m%s\033[m]\n", p)); 415#undef XFERENC 416#define CONTTYPE "Content-Type:" 417 } else if (strncasecmp(ln, CONTTYPE, sizeof(CONTTYPE)-1) == 0) { 418 p = ln + sizeof(CONTTYPE) - 1; 419 while ((p < ln + len) && isspace(*p)) 420 p++; 421 for (i = 0; p < ln + len; p++) 422 if (i < HTTPCTYPELEN) 423 c->content_type[i++] = *p; 424 do c->content_type[i--] = 0; while (isspace(c->content_type[i])); 425 DEBUG(fprintf(stderr, "conttype: [\033[1m%s\033[m]\n", 426 c->content_type)); 427#undef CONTTYPE 428 } 429 } 430 431 /* only body remains */ 432 c->encoding = enc; 433 cf = funopen(c, 434 (int (*)(void *, char *, int))_http_readfn, 435 (int (*)(void *, const char *, int))_http_writefn, 436 (fpos_t (*)(void *, fpos_t, int))NULL, 437 (int (*)(void *))_http_closefn); 438 if (cf == NULL) 439 goto fouch; 440 return cf; 441 442ouch: 443 if (sd >= 0) 444 close(sd); 445 free(c); 446 return NULL; 447fouch: 448 fclose(f); 449 free(c); 450 return NULL; 451} 452 453FILE * 454fetchPutHTTP(struct url *URL, char *flags) 455{ 456 warnx("fetchPutHTTP(): not implemented"); 457 return NULL; 458} 459 460/* 461 * Get an HTTP document's metadata 462 */ 463int 464fetchStatHTTP(struct url *url, struct url_stat *us, char *flags) 465{ 466 warnx("fetchStatHTTP(): not implemented"); 467 return -1; 468} 469