http.c revision 37535
1133819Stjr/*- 2133819Stjr * Copyright (c) 1998 Dag-Erling Co�dan Sm�rgrav 3133819Stjr * All rights reserved. 4133819Stjr * 5133819Stjr * Redistribution and use in source and binary forms, with or without 6133819Stjr * modification, are permitted provided that the following conditions 7133819Stjr * are met: 8133819Stjr * 1. Redistributions of source code must retain the above copyright 9133819Stjr * notice, this list of conditions and the following disclaimer 10133819Stjr * in this position and unchanged. 11133819Stjr * 2. Redistributions in binary form must reproduce the above copyright 12133819Stjr * notice, this list of conditions and the following disclaimer in the 13133819Stjr * documentation and/or other materials provided with the distribution. 14133819Stjr * 3. The name of the author may not be used to endorse or promote products 15133819Stjr * derived from this software without specific prior written permission 16133819Stjr * 17133819Stjr * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18133819Stjr * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19133819Stjr * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20133819Stjr * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 21133819Stjr * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 22133819Stjr * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23133819Stjr * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24133819Stjr * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25133819Stjr * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26133819Stjr * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27133819Stjr * 28133819Stjr * $Id$ 29133819Stjr */ 30133819Stjr 31133819Stjr#include <sys/param.h> 32133819Stjr#include <sys/errno.h> 33133819Stjr#include <sys/socket.h> 34133819Stjr#include <sys/types.h> 35133819Stjr 36133819Stjr#include <netinet/in.h> 37165832Snetchild 38165832Snetchild#include <err.h> 39162954Sphk#include <ctype.h> 40142057Sjhb#include <netdb.h> 41161310Snetchild#include <stdio.h> 42133819Stjr#include <stdlib.h> 43133819Stjr#include <string.h> 44133819Stjr#include <unistd.h> 45133819Stjr 46133819Stjr#include "fetch.h" 47133819Stjr#include "httperr.c" 48133819Stjr 49133819Stjr#ifndef NDEBUG 50133819Stjr#define DEBUG(x) do x; while (0) 51133819Stjr#else 52133819Stjr#define DEBUG(x) do { } while (0) 53133819Stjr#endif 54133819Stjr 55133819Stjrextern char *__progname; 56133819Stjr 57142057Sjhbextern int fprint64(FILE *f, const unsigned char *buf); 58142057Sjhb 59133819Stjr#define ENDL "\r\n" 60133819Stjr 61133819Stjrstruct cookie 62133819Stjr{ 63133819Stjr FILE *real_f; 64133819Stjr#define ENC_NONE 0 65133819Stjr#define ENC_CHUNKED 1 66161474Snetchild int encoding; /* 1 = chunked, 0 = none */ 67133819Stjr#define HTTPCTYPELEN 59 68133819Stjr char content_type[HTTPCTYPELEN+1]; 69133819Stjr char *buf; 70133819Stjr int b_cur, eof; 71133819Stjr unsigned b_len, chunksize; 72133819Stjr}; 73133819Stjr 74133819Stjrstatic int 75133819Stjr_http_connect(char *host, int port) 76133819Stjr{ 77133819Stjr struct sockaddr_in sin; 78133819Stjr struct hostent *he; 79133819Stjr int fd; 80133819Stjr 81133819Stjr /* look up host name */ 82133819Stjr if ((he = gethostbyname(host)) == NULL) 83133819Stjr return -1; 84133819Stjr 85133819Stjr /* set up socket address structure */ 86133819Stjr bzero(&sin, sizeof(sin)); 87133819Stjr bcopy(he->h_addr, (char *)&sin.sin_addr, he->h_length); 88133819Stjr sin.sin_family = he->h_addrtype; 89133819Stjr sin.sin_port = htons(port); 90133819Stjr 91133819Stjr /* try to connect */ 92133819Stjr if ((fd = socket(sin.sin_family, SOCK_STREAM, 0)) < 0) 93133819Stjr return -1; 94133819Stjr if (connect(fd, (struct sockaddr *)&sin, sizeof sin) < 0) { 95133819Stjr close(fd); 96133819Stjr return -1; 97133819Stjr } 98133819Stjr 99133819Stjr return fd; 100142057Sjhb} 101142057Sjhb 102142057Sjhbstatic char * 103142057Sjhb_http_fillbuf(struct cookie *c) 104142057Sjhb{ 105142057Sjhb char *ln; 106142057Sjhb unsigned int len; 107133819Stjr 108142057Sjhb if (c->eof) 109142057Sjhb return NULL; 110142057Sjhb 111133819Stjr if (c->encoding == ENC_NONE) { 112133819Stjr c->buf = fgetln(c->real_f, &(c->b_len)); 113142057Sjhb c->b_cur = 0; 114142057Sjhb } else if (c->encoding == ENC_CHUNKED) { 115142057Sjhb if (c->chunksize == 0) { 116133819Stjr ln = fgetln(c->real_f, &len); 117142057Sjhb DEBUG(fprintf(stderr, "\033[1m_http_fillbuf(): new chunk: " 118142057Sjhb "%*.*s\033[m\n", (int)len-2, (int)len-2, ln)); 119142057Sjhb sscanf(ln, "%x", &(c->chunksize)); 120142057Sjhb if (!c->chunksize) { 121147588Sjhb DEBUG(fprintf(stderr, "\033[1m_http_fillbuf(): " 122147588Sjhb "end of last chunk\033[m\n")); 123142057Sjhb c->eof = 1; 124142057Sjhb return NULL; 125142057Sjhb } 126142057Sjhb DEBUG(fprintf(stderr, "\033[1m_http_fillbuf(): " 127142057Sjhb "new chunk: %X\033[m\n", c->chunksize)); 128133819Stjr } 129142057Sjhb c->buf = fgetln(c->real_f, &(c->b_len)); 130133819Stjr if (c->b_len > c->chunksize) 131142057Sjhb c->b_len = c->chunksize; 132142057Sjhb c->chunksize -= c->b_len; 133142057Sjhb c->b_cur = 0; 134142057Sjhb } 135142057Sjhb else return NULL; /* unknown encoding */ 136142057Sjhb return c->buf; 137142057Sjhb} 138156440Sups 139142057Sjhbstatic int 140142057Sjhb_http_readfn(struct cookie *c, char *buf, int len) 141142057Sjhb{ 142142057Sjhb int l, pos = 0; 143142057Sjhb while (len) { 144142057Sjhb /* empty buffer */ 145142057Sjhb if (!c->buf || (c->b_cur == c->b_len)) 146142057Sjhb if (!_http_fillbuf(c)) 147156440Sups break; 148142057Sjhb 149142057Sjhb l = c->b_len - c->b_cur; 150142057Sjhb if (len < l) l = len; 151142057Sjhb memcpy(buf + pos, c->buf + c->b_cur, l); 152142057Sjhb c->b_cur += l; 153142057Sjhb pos += l; 154156440Sups len -= l; 155156440Sups } 156156440Sups 157142057Sjhb if (ferror(c->real_f)) 158142057Sjhb return -1; 159142057Sjhb else return pos; 160142057Sjhb} 161133819Stjr 162142057Sjhbstatic int 163142057Sjhb_http_writefn(struct cookie *c, const char *buf, int len) 164142057Sjhb{ 165142057Sjhb size_t r = fwrite(buf, 1, (size_t)len, c->real_f); 166142057Sjhb return r ? r : -1; 167142057Sjhb} 168142057Sjhb 169142057Sjhbstatic int 170142057Sjhb_http_closefn(struct cookie *c) 171133819Stjr{ 172133819Stjr int r = fclose(c->real_f); 173156440Sups free(c); 174142057Sjhb return (r == EOF) ? -1 : 0; 175142057Sjhb} 176142057Sjhb 177142057Sjhbchar * 178142057SjhbfetchContentType(FILE *f) 179142057Sjhb{ 180142057Sjhb /* 181156440Sups * We have no way of making sure this really *is* one of our cookies, 182156440Sups * so just check for a null pointer and hope for the best. 183142057Sjhb */ 184142057Sjhb return f->_cookie ? (((struct cookie *)f->_cookie)->content_type) : NULL; 185142057Sjhb} 186142057Sjhb 187142057SjhbFILE * 188133819StjrfetchGetHTTP(url_t *URL, char *flags) 189133819Stjr{ 190142057Sjhb int fd = -1, err, i, enc = ENC_NONE; 191156440Sups struct cookie *c; 192156440Sups char *ln, *p, *q; 193156440Sups FILE *f, *cf; 194156440Sups size_t len; 195156440Sups 196156440Sups /* allocate cookie */ 197133819Stjr if ((c = calloc(1, sizeof(struct cookie))) == NULL) 198133819Stjr return NULL; 199142057Sjhb 200142057Sjhb /* check port */ 201142057Sjhb if (!URL->port) 202142057Sjhb URL->port = 80; /* default HTTP port */ 203142057Sjhb 204142057Sjhb /* attempt to connect to proxy server */ 205142057Sjhb if (getenv("HTTP_PROXY")) { 206142057Sjhb char *px, host[MAXHOSTNAMELEN]; 207142057Sjhb int port = 3128; /* XXX I think 3128 is default... check? */ 208142057Sjhb size_t len; 209142057Sjhb 210142057Sjhb /* measure length */ 211142057Sjhb px = getenv("HTTP_PROXY"); 212142057Sjhb len = strcspn(px, ":"); 213142057Sjhb 214142057Sjhb /* get port (atoi is a little too tolerant perhaps?) */ 215142057Sjhb if (px[len] == ':') 216142057Sjhb port = atoi(px+len+1); 217142057Sjhb 218161474Snetchild /* get host name */ 219161474Snetchild if (len >= MAXHOSTNAMELEN) 220161474Snetchild len = MAXHOSTNAMELEN - 1; 221161474Snetchild strncpy(host, px, len); 222161474Snetchild host[len] = 0; 223161474Snetchild 224161474Snetchild /* connect */ 225161474Snetchild fd = _http_connect(host, port); 226142057Sjhb } 227142057Sjhb 228142057Sjhb /* if no proxy is configured or could be contacted, try direct */ 229133819Stjr if (fd < 0) { 230133819Stjr if ((fd = _http_connect(URL->host, URL->port)) < 0) 231133819Stjr goto ouch; 232133819Stjr } 233133819Stjr 234133819Stjr /* reopen as stream */ 235133819Stjr if ((f = fdopen(fd, "r+")) == NULL) 236144449Sjhb goto ouch; 237144449Sjhb c->real_f = f; 238133819Stjr 239144449Sjhb /* send request (proxies require absolute form, so use that) */ 240144449Sjhb fprintf(f, "GET http://%s:%d/%s HTTP/1.1" ENDL, 241144449Sjhb URL->host, URL->port, URL->doc); 242144449Sjhb 243144449Sjhb /* start sending headers away */ 244133819Stjr if (URL->user[0] || URL->pwd[0]) { 245144449Sjhb fprintf(f, "Authorization: Basic "); 246144449Sjhb fprint64(f, (const unsigned char *)URL->user); 247133819Stjr fputc(':', f); 248144449Sjhb fprint64(f, (const unsigned char *)URL->pwd); 249144449Sjhb fputs(ENDL, f); 250144449Sjhb } 251144449Sjhb fprintf(f, "Host: %s:%d" ENDL, URL->host, URL->port); 252144449Sjhb fprintf(f, "User-Agent: %s " _LIBFETCH_VER ENDL, __progname); 253144449Sjhb fprintf(f, "Connection: close" ENDL ENDL); 254144449Sjhb 255144449Sjhb /* get response */ 256144449Sjhb if ((ln = fgetln(f, &len)) == NULL) 257144449Sjhb goto fouch; 258144449Sjhb DEBUG(fprintf(stderr, "response: [\033[1m%*.*s\033[m]\n", 259133819Stjr (int)len-2, (int)len-2, ln)); 260144449Sjhb 261144449Sjhb /* we can't use strchr() and friends since ln isn't NUL-terminated */ 262144449Sjhb p = ln; 263144449Sjhb while ((p < ln + len) && !isspace(*p)) 264144449Sjhb p++; 265144449Sjhb while ((p < ln + len) && !isdigit(*p)) 266144449Sjhb p++; 267144449Sjhb if (!isdigit(*p)) 268144449Sjhb goto fouch; 269144449Sjhb err = atoi(p); 270144449Sjhb DEBUG(fprintf(stderr, "code: [\033[1m%d\033[m]\n", err)); 271144449Sjhb 272144449Sjhb /* add code to handle redirects later */ 273144449Sjhb if (err != 200) 274144449Sjhb goto fouch; 275144449Sjhb 276133819Stjr /* browse through header */ 277144449Sjhb while (1) { 278144449Sjhb if ((ln = fgetln(f, &len)) == NULL) 279144449Sjhb goto fouch; 280144449Sjhb if ((ln[0] == '\r') || (ln[0] == '\n')) 281144449Sjhb break; 282133819Stjr DEBUG(fprintf(stderr, "header: [\033[1m%*.*s\033[m]\n", 283144449Sjhb (int)len-2, (int)len-2, ln)); 284144449Sjhb#define XFERENC "Transfer-Encoding:" 285144449Sjhb if (strncasecmp(ln, XFERENC, sizeof(XFERENC)-1) == 0) { 286144449Sjhb p = ln + sizeof(XFERENC) - 1; 287144449Sjhb while ((p < ln + len) && isspace(*p)) 288133819Stjr p++; 289133819Stjr for (q = p; (q < ln + len) && !isspace(*q); q++) 290133819Stjr /* VOID */ ; 291133819Stjr *q = 0; 292133819Stjr if (strcasecmp(p, "chunked") == 0) 293133819Stjr enc = ENC_CHUNKED; 294144449Sjhb DEBUG(fprintf(stderr, "xferenc: [\033[1m%s\033[m]\n", p)); 295144449Sjhb#undef XFERENC 296133819Stjr#define CONTTYPE "Content-Type:" 297144449Sjhb } else if (strncasecmp(ln, CONTTYPE, sizeof(CONTTYPE)-1) == 0) { 298144449Sjhb p = ln + sizeof(CONTTYPE) - 1; 299144449Sjhb while ((p < ln + len) && isspace(*p)) 300144449Sjhb p++; 301144449Sjhb for (i = 0; p < ln + len; p++) 302133819Stjr if (i < HTTPCTYPELEN) 303133819Stjr c->content_type[i++] = *p; 304133819Stjr do c->content_type[i--] = 0; while (isspace(c->content_type[i])); 305133819Stjr DEBUG(fprintf(stderr, "conttype: [\033[1m%s\033[m]\n", 306133819Stjr c->content_type)); 307133819Stjr#undef CONTTYPE 308133819Stjr } 309133819Stjr } 310133819Stjr 311133819Stjr /* only body remains */ 312133819Stjr c->encoding = enc; 313133819Stjr cf = funopen(c, 314133819Stjr (int (*)(void *, char *, int))_http_readfn, 315133819Stjr (int (*)(void *, const char *, int))_http_writefn, 316133819Stjr (fpos_t (*)(void *, fpos_t, int))NULL, 317133819Stjr (int (*)(void *))_http_closefn); 318133819Stjr if (cf == NULL) 319133819Stjr goto fouch; 320133819Stjr return cf; 321133819Stjr 322133819Stjrouch: 323133819Stjr if (fd >= 0) 324133819Stjr close(fd); 325133819Stjr free(c); 326133819Stjr return NULL; 327133819Stjrfouch: 328133819Stjr fclose(f); 329133819Stjr free(c); 330133819Stjr return NULL; 331133819Stjr} 332133819Stjr 333133819StjrFILE * 334133819StjrfetchPutHTTP(url_t *URL, char *flags) 335133819Stjr{ 336133819Stjr warnx("fetchPutHTTP(): not implemented"); 337133819Stjr return NULL; 338133819Stjr} 339133819Stjr