http.c revision 63236
1208747Sraj/*- 2208747Sraj * Copyright (c) 2000 Dag-Erling Co�dan Sm�rgrav 3208747Sraj * All rights reserved. 4208747Sraj * 5208747Sraj * Redistribution and use in source and binary forms, with or without 6208747Sraj * modification, are permitted provided that the following conditions 7208747Sraj * are met: 8208747Sraj * 1. Redistributions of source code must retain the above copyright 9208747Sraj * notice, this list of conditions and the following disclaimer 10208747Sraj * in this position and unchanged. 11208747Sraj * 2. Redistributions in binary form must reproduce the above copyright 12208747Sraj * notice, this list of conditions and the following disclaimer in the 13208747Sraj * documentation and/or other materials provided with the distribution. 14208747Sraj * 3. The name of the author may not be used to endorse or promote products 15208747Sraj * derived from this software without specific prior written permission. 16208747Sraj * 17208747Sraj * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18208747Sraj * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19208747Sraj * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20208747Sraj * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 21208747Sraj * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 22208747Sraj * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23208747Sraj * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24208747Sraj * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25208747Sraj * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26208747Sraj * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27208747Sraj * 28208747Sraj * $FreeBSD: head/lib/libfetch/http.c 63236 2000-07-16 01:04:10Z des $ 29208747Sraj */ 30208747Sraj 31208747Sraj/* 32208747Sraj * The following copyright applies to the base64 code: 33208747Sraj * 34208747Sraj *- 35208747Sraj * Copyright 1997 Massachusetts Institute of Technology 36208747Sraj * 37208747Sraj * Permission to use, copy, modify, and distribute this software and 38239274Sgonzo * its documentation for any purpose and without fee is hereby 39208747Sraj * granted, provided that both the above copyright notice and this 40208747Sraj * permission notice appear in all copies, that both the above 41208747Sraj * copyright notice and this permission notice appear in all 42208747Sraj * supporting documentation, and that the name of M.I.T. not be used 43208747Sraj * in advertising or publicity pertaining to distribution of the 44208747Sraj * software without specific, written prior permission. M.I.T. makes 45208747Sraj * no representations about the suitability of this software for any 46208747Sraj * purpose. It is provided "as is" without express or implied 47208747Sraj * warranty. 48208747Sraj * 49208747Sraj * THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''. M.I.T. DISCLAIMS 50208747Sraj * ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE, 51208747Sraj * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 52208747Sraj * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT 53208747Sraj * SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 54208747Sraj * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 55208747Sraj * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF 56208747Sraj * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 57208747Sraj * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 58208747Sraj * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 59208747Sraj * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 60208747Sraj * SUCH DAMAGE. 61208747Sraj */ 62208747Sraj 63208747Sraj#include <sys/param.h> 64208747Sraj#include <sys/socket.h> 65208747Sraj 66257457Sbrooks#include <ctype.h> 67257457Sbrooks#include <err.h> 68208747Sraj#include <errno.h> 69239274Sgonzo#include <locale.h> 70208747Sraj#include <netdb.h> 71208747Sraj#include <stdarg.h> 72208747Sraj#include <stdio.h> 73208747Sraj#include <stdlib.h> 74208747Sraj#include <string.h> 75208747Sraj#include <time.h> 76208747Sraj#include <unistd.h> 77208747Sraj 78208747Sraj#include "fetch.h" 79208747Sraj#include "common.h" 80208747Sraj#include "httperr.h" 81208747Sraj 82208747Srajextern char *__progname; /* XXX not portable */ 83208747Sraj 84208747Sraj/* Maximum number of redirects to follow */ 85208747Sraj#define MAX_REDIRECT 5 86208747Sraj 87239274Sgonzo/* Symbolic names for reply codes we care about */ 88239274Sgonzo#define HTTP_OK 200 89239274Sgonzo#define HTTP_PARTIAL 206 90239274Sgonzo#define HTTP_MOVED_PERM 301 91239274Sgonzo#define HTTP_MOVED_TEMP 302 92208747Sraj#define HTTP_SEE_OTHER 303 93239274Sgonzo#define HTTP_NEED_AUTH 401 94239274Sgonzo#define HTTP_NEED_PROXY_AUTH 403 95239274Sgonzo#define HTTP_PROTOCOL_ERROR 999 96208747Sraj 97208747Sraj#define HTTP_REDIRECT(xyz) ((xyz) == HTTP_MOVED_PERM \ 98208747Sraj || (xyz) == HTTP_MOVED_TEMP \ 99208747Sraj || (xyz) == HTTP_SEE_OTHER) 100208747Sraj 101208747Sraj 102208747Sraj 103208747Sraj/***************************************************************************** 104208747Sraj * I/O functions for decoding chunked streams 105208747Sraj */ 106208747Sraj 107239274Sgonzostruct cookie 108239274Sgonzo{ 109239274Sgonzo int fd; 110208747Sraj char *buf; 111239274Sgonzo size_t b_size; 112208747Sraj size_t b_len; 113239274Sgonzo int b_pos; 114208747Sraj int eof; 115239274Sgonzo int error; 116239274Sgonzo long chunksize; 117239274Sgonzo#ifdef DEBUG 118208747Sraj long total; 119239274Sgonzo#endif 120239274Sgonzo}; 121239274Sgonzo 122239274Sgonzo/* 123239274Sgonzo * Get next chunk header 124239274Sgonzo */ 125208747Srajstatic int 126239274Sgonzo_http_new_chunk(struct cookie *c) 127239274Sgonzo{ 128239274Sgonzo char *p; 129239274Sgonzo 130239274Sgonzo if (_fetch_getln(c->fd, &c->buf, &c->b_size, &c->b_len) == -1) 131239274Sgonzo return -1; 132239274Sgonzo 133239274Sgonzo if (c->b_len < 2 || !ishexnumber(*c->buf)) 134239274Sgonzo return -1; 135239274Sgonzo 136239274Sgonzo for (p = c->buf; !isspace(*p) && *p != ';' && p < c->buf + c->b_len; ++p) 137239274Sgonzo if (!ishexnumber(*p)) 138239274Sgonzo return -1; 139239274Sgonzo else if (isdigit(*p)) 140239274Sgonzo c->chunksize = c->chunksize * 16 + *p - '0'; 141239274Sgonzo else 142239274Sgonzo c->chunksize = c->chunksize * 16 + 10 + tolower(*p) - 'a'; 143239274Sgonzo 144239274Sgonzo#ifdef DEBUG 145239274Sgonzo c->total += c->chunksize; 146239274Sgonzo if (c->chunksize == 0) 147239274Sgonzo fprintf(stderr, "\033[1m_http_fillbuf(): " 148239274Sgonzo "end of last chunk\033[m\n"); 149208747Sraj else 150208747Sraj fprintf(stderr, "\033[1m_http_fillbuf(): " 151208747Sraj "new chunk: %ld (%ld)\033[m\n", c->chunksize, c->total); 152208747Sraj#endif 153208747Sraj 154208747Sraj return c->chunksize; 155208747Sraj} 156208747Sraj 157208747Sraj/* 158208747Sraj * Fill the input buffer, do chunk decoding on the fly 159208747Sraj */ 160208747Srajstatic int 161208747Sraj_http_fillbuf(struct cookie *c) 162208747Sraj{ 163208747Sraj if (c->error) 164208747Sraj return -1; 165208747Sraj if (c->eof) 166208747Sraj return 0; 167208747Sraj 168208747Sraj if (c->chunksize == 0) { 169208747Sraj switch (_http_new_chunk(c)) { 170208747Sraj case -1: 171208747Sraj c->error = 1; 172208747Sraj return -1; 173208747Sraj case 0: 174208747Sraj c->eof = 1; 175208747Sraj return 0; 176208747Sraj } 177208747Sraj } 178208747Sraj 179208747Sraj if (c->b_size < c->chunksize) { 180208747Sraj char *tmp; 181208747Sraj 182208747Sraj if ((tmp = realloc(c->buf, c->chunksize)) == NULL) 183208747Sraj return -1; 184208747Sraj c->buf = tmp; 185208747Sraj c->b_size = c->chunksize; 186208747Sraj } 187208747Sraj 188208747Sraj if ((c->b_len = read(c->fd, c->buf, c->chunksize)) == -1) 189208747Sraj return -1; 190208747Sraj c->chunksize -= c->b_len; 191208747Sraj 192208747Sraj if (c->chunksize == 0) { 193208747Sraj char endl[2]; 194208747Sraj read(c->fd, endl, 2); 195208747Sraj } 196208747Sraj 197208747Sraj c->b_pos = 0; 198208747Sraj 199208747Sraj return c->b_len; 200208747Sraj} 201208747Sraj 202208747Sraj/* 203208747Sraj * Read function 204208747Sraj */ 205208747Srajstatic int 206208747Sraj_http_readfn(void *v, char *buf, int len) 207208747Sraj{ 208208747Sraj struct cookie *c = (struct cookie *)v; 209208747Sraj int l, pos; 210208747Sraj 211208747Sraj if (c->error) 212208747Sraj return -1; 213208747Sraj if (c->eof) 214208747Sraj return 0; 215208747Sraj 216208747Sraj for (pos = 0; len > 0; pos += l, len -= l) { 217208747Sraj /* empty buffer */ 218208747Sraj if (!c->buf || c->b_pos == c->b_len) 219208747Sraj if (_http_fillbuf(c) < 1) 220208747Sraj break; 221208747Sraj l = c->b_len - c->b_pos; 222208747Sraj if (len < l) 223208747Sraj l = len; 224208747Sraj bcopy(c->buf + c->b_pos, buf + pos, l); 225208747Sraj c->b_pos += l; 226208747Sraj } 227208747Sraj 228208747Sraj if (!pos && c->error) 229208747Sraj return -1; 230208747Sraj return pos; 231208747Sraj} 232208747Sraj 233208747Sraj/* 234208747Sraj * Write function 235208747Sraj */ 236208747Srajstatic int 237208747Sraj_http_writefn(void *v, const char *buf, int len) 238208747Sraj{ 239208747Sraj struct cookie *c = (struct cookie *)v; 240208747Sraj 241208747Sraj return write(c->fd, buf, len); 242208747Sraj} 243208747Sraj 244208747Sraj/* 245208747Sraj * Close function 246208747Sraj */ 247208747Srajstatic int 248208747Sraj_http_closefn(void *v) 249208747Sraj{ 250208747Sraj struct cookie *c = (struct cookie *)v; 251208747Sraj int r; 252208747Sraj 253208747Sraj r = close(c->fd); 254208747Sraj if (c->buf) 255208747Sraj free(c->buf); 256208747Sraj free(c); 257208747Sraj return r; 258208747Sraj} 259208747Sraj 260208747Sraj/* 261208747Sraj * Wrap a file descriptor up 262208747Sraj */ 263208747Srajstatic FILE * 264208747Sraj_http_funopen(int fd) 265208747Sraj{ 266208747Sraj struct cookie *c; 267208747Sraj FILE *f; 268208747Sraj 269208747Sraj if ((c = calloc(1, sizeof *c)) == NULL) { 270208747Sraj _fetch_syserr(); 271208747Sraj return NULL; 272208747Sraj } 273208747Sraj c->fd = fd; 274208747Sraj if (!(f = funopen(c, _http_readfn, _http_writefn, NULL, _http_closefn))) { 275208747Sraj _fetch_syserr(); 276208747Sraj free(c); 277208747Sraj return NULL; 278208747Sraj } 279208747Sraj return f; 280208747Sraj} 281208747Sraj 282208747Sraj 283208747Sraj/***************************************************************************** 284208747Sraj * Helper functions for talking to the server and parsing its replies 285208747Sraj */ 286208747Sraj 287208747Sraj/* Header types */ 288208747Srajtypedef enum { 289208747Sraj hdr_syserror = -2, 290208747Sraj hdr_error = -1, 291208747Sraj hdr_end = 0, 292208747Sraj hdr_unknown = 1, 293208747Sraj hdr_content_length, 294208747Sraj hdr_content_range, 295208747Sraj hdr_last_modified, 296208747Sraj hdr_location, 297208747Sraj hdr_transfer_encoding 298208747Sraj} hdr; 299208747Sraj 300208747Sraj/* Names of interesting headers */ 301208747Srajstatic struct { 302208747Sraj hdr num; 303208747Sraj char *name; 304208747Sraj} hdr_names[] = { 305208747Sraj { hdr_content_length, "Content-Length" }, 306208747Sraj { hdr_content_range, "Content-Range" }, 307208747Sraj { hdr_last_modified, "Last-Modified" }, 308208747Sraj { hdr_location, "Location" }, 309208747Sraj { hdr_transfer_encoding, "Transfer-Encoding" }, 310208747Sraj { hdr_unknown, NULL }, 311208747Sraj}; 312208747Sraj 313208747Srajstatic char *reply_buf; 314208747Srajstatic size_t reply_size; 315208747Srajstatic size_t reply_length; 316208747Sraj 317208747Sraj/* 318208747Sraj * Send a formatted line; optionally echo to terminal 319208747Sraj */ 320208747Srajstatic int 321208747Sraj_http_cmd(int fd, char *fmt, ...) 322208747Sraj{ 323208747Sraj va_list ap; 324208747Sraj size_t len; 325208747Sraj char *msg; 326208747Sraj int r; 327208747Sraj 328208747Sraj va_start(ap, fmt); 329208747Sraj len = vasprintf(&msg, fmt, ap); 330208747Sraj va_end(ap); 331208747Sraj 332208747Sraj if (msg == NULL) { 333208747Sraj errno = ENOMEM; 334208747Sraj _fetch_syserr(); 335208747Sraj return -1; 336208747Sraj } 337208747Sraj 338208747Sraj r = _fetch_putln(fd, msg, len); 339208747Sraj free(msg); 340208747Sraj 341208747Sraj if (r == -1) { 342208747Sraj _fetch_syserr(); 343208747Sraj return -1; 344208747Sraj } 345208747Sraj 346208747Sraj return 0; 347208747Sraj} 348208747Sraj 349208747Sraj/* 350208747Sraj * Get and parse status line 351208747Sraj */ 352208747Srajstatic int 353208747Sraj_http_get_reply(int fd) 354208747Sraj{ 355208747Sraj if (_fetch_getln(fd, &reply_buf, &reply_size, &reply_length) == -1) 356208747Sraj return -1; 357208747Sraj /* 358208747Sraj * A valid status line looks like "HTTP/m.n xyz reason" where m 359208747Sraj * and n are the major and minor protocol version numbers and xyz 360208747Sraj * is the reply code. 361208747Sraj * We grok HTTP 1.0 and 1.1, so m must be 1 and n must be 0 or 1. 362208747Sraj * We don't care about the reason phrase. 363208747Sraj */ 364208747Sraj if (strncmp(reply_buf, "HTTP/1.", 7) != 0 365208747Sraj || (reply_buf[7] != '0' && reply_buf[7] != '1') || reply_buf[8] != ' ' 366208747Sraj || !isdigit(reply_buf[9]) 367208747Sraj || !isdigit(reply_buf[10]) 368208747Sraj || !isdigit(reply_buf[11])) 369208747Sraj return HTTP_PROTOCOL_ERROR; 370208747Sraj 371208747Sraj return ((reply_buf[9] - '0') * 100 372208747Sraj + (reply_buf[10] - '0') * 10 373208747Sraj + (reply_buf[11] - '0')); 374208747Sraj} 375208747Sraj 376208747Sraj/* 377208747Sraj * Check a header; if the type matches the given string, return a 378208747Sraj * pointer to the beginning of the value. 379208747Sraj */ 380208747Srajstatic char * 381208747Sraj_http_match(char *str, char *hdr) 382208747Sraj{ 383208747Sraj while (*str && *hdr && tolower(*str++) == tolower(*hdr++)) 384208747Sraj /* nothing */; 385208747Sraj if (*str || *hdr != ':') 386208747Sraj return NULL; 387208747Sraj while (*hdr && isspace(*++hdr)) 388208747Sraj /* nothing */; 389208747Sraj return hdr; 390208747Sraj} 391208747Sraj 392208747Sraj/* 393208747Sraj * Get the next header and return the appropriate symbolic code. 394208747Sraj */ 395208747Srajstatic hdr 396208747Sraj_http_next_header(int fd, char **p) 397208747Sraj{ 398208747Sraj int i; 399208747Sraj 400208747Sraj if (_fetch_getln(fd, &reply_buf, &reply_size, &reply_length) == -1) 401208747Sraj return hdr_syserror; 402208747Sraj while (reply_length && isspace(reply_buf[reply_length-1])) 403208747Sraj reply_length--; 404208747Sraj reply_buf[reply_length] = 0; 405208747Sraj if (reply_length == 0) 406208747Sraj return hdr_end; 407208747Sraj /* 408208747Sraj * We could check for malformed headers but we don't really care. 409208747Sraj * A valid header starts with a token immediately followed by a 410208747Sraj * colon; a token is any sequence of non-control, non-whitespace 411208747Sraj * characters except "()<>@,;:\\\"{}". 412208747Sraj */ 413208747Sraj for (i = 0; hdr_names[i].num != hdr_unknown; i++) 414208747Sraj if ((*p = _http_match(hdr_names[i].name, reply_buf)) != NULL) 415208747Sraj return hdr_names[i].num; 416208747Sraj return hdr_unknown; 417208747Sraj} 418208747Sraj 419208747Sraj/* 420208747Sraj * Parse a last-modified header 421208747Sraj */ 422208747Srajstatic time_t 423208747Sraj_http_parse_mtime(char *p) 424239274Sgonzo{ 425208747Sraj char locale[64]; 426248509Sray struct tm tm; 427208747Sraj 428208747Sraj strncpy(locale, setlocale(LC_TIME, NULL), sizeof locale); 429208747Sraj setlocale(LC_TIME, "C"); 430208747Sraj strptime(p, "%a, %d %b %Y %H:%M:%S GMT", &tm); 431239274Sgonzo /* XXX should add support for date-2 and date-3 */ 432208747Sraj setlocale(LC_TIME, locale); 433208747Sraj DEBUG(fprintf(stderr, "last modified: [\033[1m%04d-%02d-%02d " 434208747Sraj "%02d:%02d:%02d\033[m]\n", 435240484Sgber tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday, 436240484Sgber tm.tm_hour, tm.tm_min, tm.tm_sec)); 437240484Sgber return timegm(&tm); 438240484Sgber} 439208747Sraj 440208747Sraj/* 441208747Sraj * Parse a content-length header 442208747Sraj */ 443208747Srajstatic off_t 444208747Sraj_http_parse_length(char *p) 445208747Sraj{ 446208747Sraj off_t len; 447208747Sraj 448208747Sraj for (len = 0; *p && isdigit(*p); ++p) 449208747Sraj len = len * 10 + (*p - '0'); 450208747Sraj DEBUG(fprintf(stderr, "content length: [\033[1m%lld\033[m]\n", len)); 451248509Sray return len; 452248509Sray} 453208747Sraj 454208747Sraj/* 455208747Sraj * Parse a content-range header 456208747Sraj */ 457208747Srajstatic off_t 458208747Sraj_http_parse_range(char *p) 459208747Sraj{ 460239274Sgonzo off_t off; 461248467Sray 462208747Sraj if (strncasecmp(p, "bytes ", 6) != 0) 463248467Sray return -1; 464208747Sraj for (p += 6, off = 0; *p && isdigit(*p); ++p) 465208747Sraj off = off * 10 + *p - '0'; 466248467Sray if (*p != '-') 467208747Sraj return -1; 468208747Sraj DEBUG(fprintf(stderr, "content range: [\033[1m%lld-\033[m]\n", off)); 469208747Sraj return off; 470208747Sraj} 471208747Sraj 472208747Sraj 473208747Sraj/***************************************************************************** 474208747Sraj * Helper functions for authorization 475208747Sraj */ 476208747Sraj 477208747Sraj/* 478208747Sraj * Base64 encoding 479208747Sraj */ 480208747Srajstatic char * 481208747Sraj_http_base64(char *src) 482208747Sraj{ 483208747Sraj static const char base64[] = 484208747Sraj "ABCDEFGHIJKLMNOPQRSTUVWXYZ" 485208747Sraj "abcdefghijklmnopqrstuvwxyz" 486208747Sraj "0123456789+/"; 487208747Sraj char *str, *dst; 488208747Sraj size_t l; 489208747Sraj int t, r; 490208747Sraj 491208747Sraj l = strlen(src); 492208747Sraj if ((str = malloc(((l + 2) / 3) * 4)) == NULL) 493208747Sraj return NULL; 494208747Sraj dst = str; 495208747Sraj r = 0; 496208747Sraj 497208747Sraj while (l >= 3) { 498208747Sraj t = (src[0] << 16) | (src[1] << 8) | src[2]; 499208747Sraj dst[0] = base64[(t >> 18) & 0x3f]; 500208747Sraj dst[1] = base64[(t >> 12) & 0x3f]; 501208747Sraj dst[2] = base64[(t >> 6) & 0x3f]; 502208747Sraj dst[3] = base64[(t >> 0) & 0x3f]; 503208747Sraj src += 3; l -= 3; 504208747Sraj dst += 4; r += 4; 505208747Sraj } 506208747Sraj 507218073Smarcel switch (l) { 508208747Sraj case 2: 509208747Sraj t = (src[0] << 16) | (src[1] << 8); 510208747Sraj dst[0] = base64[(t >> 18) & 0x3f]; 511208747Sraj dst[1] = base64[(t >> 12) & 0x3f]; 512208747Sraj dst[2] = base64[(t >> 6) & 0x3f]; 513208747Sraj dst[3] = '='; 514208747Sraj dst += 4; 515208747Sraj r += 4; 516208747Sraj break; 517208747Sraj case 1: 518208747Sraj t = src[0] << 16; 519208747Sraj dst[0] = base64[(t >> 18) & 0x3f]; 520208747Sraj dst[1] = base64[(t >> 12) & 0x3f]; 521208747Sraj dst[2] = dst[3] = '='; 522208747Sraj dst += 4; 523208747Sraj r += 4; 524208747Sraj break; 525208747Sraj case 0: 526208747Sraj break; 527208747Sraj } 528208747Sraj 529239689Sgonzo *dst = 0; 530239689Sgonzo return str; 531208747Sraj} 532208747Sraj 533208747Sraj/* 534208747Sraj * Encode username and password 535208747Sraj */ 536208747Srajstatic int 537208747Sraj_http_basic_auth(int fd, char *hdr, char *usr, char *pwd) 538208747Sraj{ 539208747Sraj char *upw, *auth; 540208747Sraj int r; 541208747Sraj 542208747Sraj if (asprintf(&upw, "%s:%s", usr, pwd) == -1) 543208747Sraj return -1; 544208747Sraj auth = _http_base64(upw); 545208747Sraj free(upw); 546208747Sraj if (auth == NULL) 547208747Sraj return -1; 548208747Sraj r = _http_cmd(fd, "%s: Basic %s", hdr, auth); 549208747Sraj free(auth); 550208747Sraj return r; 551208747Sraj} 552208747Sraj 553208747Sraj/* 554208747Sraj * Send an authorization header 555208747Sraj */ 556208747Srajstatic int 557209905Sraj_http_authorize(int fd, char *hdr, char *p) 558209905Sraj{ 559208747Sraj /* basic authorization */ 560218073Smarcel if (strncasecmp(p, "basic:", 6) == 0) { 561218073Smarcel char *user, *pwd, *str; 562208747Sraj int r; 563208747Sraj 564208747Sraj /* skip realm */ 565208747Sraj for (p += 6; *p && *p != ':'; ++p) 566208747Sraj /* nothing */ ; 567208747Sraj if (!*p || strchr(++p, ':') == NULL) 568208747Sraj return -1; 569208747Sraj if ((str = strdup(p)) == NULL) 570232518Sraj return -1; /* XXX */ 571208747Sraj user = str; 572208747Sraj pwd = strchr(str, ':'); 573208747Sraj *pwd++ = '\0'; 574208747Sraj r = _http_basic_auth(fd, hdr, user, pwd); 575232518Sraj free(str); 576232518Sraj return r; 577208747Sraj } 578208747Sraj return -1; 579208747Sraj} 580208747Sraj 581208747Sraj 582208747Sraj/***************************************************************************** 583208747Sraj * Helper functions for connecting to a server or proxy 584208747Sraj */ 585208747Sraj 586208747Sraj/* 587208747Sraj * Connect to the specified HTTP proxy server. 588208747Sraj */ 589208747Srajstatic int 590208747Sraj_http_proxy_connect(char *proxy, int af, int verbose) 591232518Sraj{ 592232518Sraj char *hostname, *p; 593232518Sraj int fd, port; 594232518Sraj 595232518Sraj /* get hostname */ 596232518Sraj hostname = NULL; 597232518Sraj#ifdef INET6 598232518Sraj /* host part can be an IPv6 address enclosed in square brackets */ 599232518Sraj if (*proxy == '[') { 600232518Sraj if ((p = strchr(proxy, ']')) == NULL) { 601232518Sraj /* no terminating bracket */ 602232518Sraj /* XXX should set an error code */ 603232518Sraj goto ouch; 604232518Sraj } 605232518Sraj if (p[1] != '\0' && p[1] != ':') { 606232518Sraj /* garbage after address */ 607232518Sraj /* XXX should set an error code */ 608232518Sraj goto ouch; 609232518Sraj } 610232518Sraj if ((hostname = malloc(p - proxy)) == NULL) { 611232518Sraj errno = ENOMEM; 612232518Sraj _fetch_syserr(); 613232518Sraj goto ouch; 614232518Sraj } 615232518Sraj strncpy(hostname, proxy + 1, p - proxy - 1); 616232518Sraj hostname[p - proxy - 1] = '\0'; 617232518Sraj ++p; 618232518Sraj } else { 619232518Sraj#endif /* INET6 */ 620232518Sraj if ((p = strchr(proxy, ':')) == NULL) 621232518Sraj p = strchr(proxy, '\0'); 622232518Sraj if ((hostname = malloc(p - proxy + 1)) == NULL) { 623232518Sraj errno = ENOMEM; 624232518Sraj _fetch_syserr(); 625232518Sraj goto ouch; 626232518Sraj } 627232518Sraj strncpy(hostname, proxy, p - proxy); 628232518Sraj hostname[p - proxy] = '\0'; 629232518Sraj#ifdef INET6 630232518Sraj } 631232518Sraj#endif /* INET6 */ 632208747Sraj DEBUG(fprintf(stderr, "proxy name: [%s]\n", hostname)); 633208747Sraj 634208747Sraj /* get port number */ 635208747Sraj port = 0; 636243690Sgonzo if (*p == ':') { 637243690Sgonzo ++p; 638243690Sgonzo if (strspn(p, "0123456789") != strlen(p) || strlen(p) > 5) { 639243690Sgonzo /* port number is non-numeric or too long */ 640243690Sgonzo /* XXX should set an error code */ 641243690Sgonzo goto ouch; 642243690Sgonzo } 643243690Sgonzo port = atoi(p); 644243690Sgonzo if (port < 1 || port > 65535) { 645243690Sgonzo /* port number is out of range */ 646243690Sgonzo /* XXX should set an error code */ 647243690Sgonzo goto ouch; 648243690Sgonzo } 649243690Sgonzo } 650243690Sgonzo 651243690Sgonzo if (!port) { 652243690Sgonzo#if 0 653243690Sgonzo /* 654243690Sgonzo * commented out, since there is currently no service name 655243690Sgonzo * for HTTP proxies 656243690Sgonzo */ 657243690Sgonzo struct servent *se; 658243690Sgonzo 659243690Sgonzo if ((se = getservbyname("xxxx", "tcp")) != NULL) 660243690Sgonzo port = ntohs(se->s_port); 661243690Sgonzo else 662243690Sgonzo#endif 663243690Sgonzo port = 3128; 664243690Sgonzo } 665243690Sgonzo DEBUG(fprintf(stderr, "proxy port: %d\n", port)); 666243690Sgonzo 667243690Sgonzo /* connect */ 668243690Sgonzo if ((fd = _fetch_connect(hostname, port, af, verbose)) == -1) 669243690Sgonzo _fetch_syserr(); 670243690Sgonzo return fd; 671243690Sgonzo 672243690Sgonzo ouch: 673243690Sgonzo if (hostname) 674243690Sgonzo free(hostname); 675243690Sgonzo return -1; 676243690Sgonzo} 677243690Sgonzo 678243690Sgonzo/* 679243690Sgonzo * Connect to the correct HTTP server or proxy. 680243690Sgonzo */ 681243690Sgonzostatic int 682243690Sgonzo_http_connect(struct url *URL, int *proxy, char *flags) 683243690Sgonzo{ 684243690Sgonzo int direct, verbose; 685243690Sgonzo int af, fd; 686243690Sgonzo char *p; 687243690Sgonzo 688243690Sgonzo#ifdef INET6 689243690Sgonzo af = AF_UNSPEC; 690243690Sgonzo#else 691243690Sgonzo af = AF_INET; 692243690Sgonzo#endif 693243690Sgonzo 694243690Sgonzo direct = (flags && strchr(flags, 'd')); 695243690Sgonzo verbose = (flags && strchr(flags, 'v')); 696208747Sraj if (flags && strchr(flags, '4')) 697208747Sraj af = AF_INET; 698208747Sraj else if (flags && strchr(flags, '6')) 699208747Sraj af = AF_INET6; 700208747Sraj 701208747Sraj /* check port */ 702208747Sraj if (!URL->port) { 703208747Sraj struct servent *se; 704208747Sraj 705208747Sraj /* Scheme can be ftp if we're using a proxy */ 706208747Sraj if (strcasecmp(URL->scheme, "ftp") == 0) 707228201Sjchandra if ((se = getservbyname("ftp", "tcp")) != NULL) 708208747Sraj URL->port = ntohs(se->s_port); 709208747Sraj else 710208747Sraj URL->port = 21; 711208747Sraj else 712208747Sraj if ((se = getservbyname("http", "tcp")) != NULL) 713208747Sraj URL->port = ntohs(se->s_port); 714208747Sraj else 715208747Sraj URL->port = 80; 716208747Sraj } 717208747Sraj 718208747Sraj if (!direct && (p = getenv("HTTP_PROXY")) != NULL) { 719208747Sraj /* attempt to connect to proxy server */ 720208747Sraj if ((fd = _http_proxy_connect(p, af, verbose)) == -1) 721208747Sraj return -1; 722208747Sraj *proxy = 1; 723208747Sraj } else { 724208747Sraj /* if no proxy is configured, try direct */ 725208747Sraj if (strcasecmp(URL->scheme, "ftp") == 0) { 726208747Sraj /* can't talk http to an ftp server */ 727208747Sraj /* XXX should set an error code */ 728208747Sraj return -1; 729208747Sraj } 730208747Sraj if ((fd = _fetch_connect(URL->host, URL->port, af, verbose)) == -1) 731208747Sraj /* _fetch_connect() has already set an error code */ 732208747Sraj return -1; 733208747Sraj *proxy = 0; 734208747Sraj } 735208747Sraj 736208747Sraj return fd; 737208747Sraj} 738208747Sraj 739208747Sraj 740208747Sraj/***************************************************************************** 741208747Sraj * Core 742208747Sraj */ 743208747Sraj 744208747Sraj/* 745208747Sraj * Send a request and process the reply 746208747Sraj */ 747208747Srajstatic FILE * 748208747Sraj_http_request(struct url *URL, char *op, struct url_stat *us, char *flags) 749208747Sraj{ 750208747Sraj struct url *url, *new; 751208747Sraj int chunked, need_auth, noredirect, proxy, verbose; 752208747Sraj int code, fd, i, n; 753208747Sraj off_t offset; 754208747Sraj char *p; 755208747Sraj FILE *f; 756208747Sraj hdr h; 757208747Sraj char *host; 758208747Sraj#ifdef INET6 759240485Sgber char hbuf[MAXHOSTNAMELEN + 1]; 760240485Sgber#endif 761240485Sgber 762240485Sgber noredirect = (flags && strchr(flags, 'A')); 763240485Sgber verbose = (flags && strchr(flags, 'v')); 764240485Sgber 765240485Sgber n = noredirect ? 1 : MAX_REDIRECT; 766240485Sgber 767240485Sgber /* just to appease compiler warnings */ 768240485Sgber code = HTTP_PROTOCOL_ERROR; 769240485Sgber chunked = 0; 770 offset = 0; 771 fd = -1; 772 773 for (url = URL, i = 0; i < n; ++i) { 774 new = NULL; 775 us->size = -1; 776 us->atime = us->mtime = 0; 777 chunked = 0; 778 need_auth = 0; 779 offset = 0; 780 fd = -1; 781 retry: 782 /* connect to server or proxy */ 783 if ((fd = _http_connect(url, &proxy, flags)) == -1) 784 goto ouch; 785 786 host = url->host; 787#ifdef INET6 788 if (strchr(url->host, ':')) { 789 snprintf(hbuf, sizeof(hbuf), "[%s]", url->host); 790 host = hbuf; 791 } 792#endif 793 794 /* send request */ 795 if (verbose) 796 _fetch_info("requesting %s://%s:%d%s", 797 url->scheme, host, url->port, url->doc); 798 if (proxy) { 799 _http_cmd(fd, "%s %s://%s:%d%s HTTP/1.1", 800 op, url->scheme, host, url->port, url->doc); 801 } else { 802 _http_cmd(fd, "%s %s HTTP/1.1", 803 op, url->doc); 804 } 805 806 /* proxy authorization */ 807 if (proxy && (p = getenv("HTTP_PROXY_AUTH")) != NULL) 808 _http_authorize(fd, "Proxy-Authorization", p); 809 810 /* server authorization */ 811 if (need_auth) { 812 if (*url->user || *url->pwd) 813 _http_basic_auth(fd, "Authorization", 814 url->user ? url->user : "", 815 url->pwd ? url->pwd : ""); 816 else if ((p = getenv("HTTP_AUTH")) != NULL) 817 _http_authorize(fd, "Authorization", p); 818 else { 819 _http_seterr(HTTP_NEED_AUTH); 820 goto ouch; 821 } 822 } 823 824 /* other headers */ 825 _http_cmd(fd, "Host: %s:%d", host, url->port); 826 _http_cmd(fd, "User-Agent: %s " _LIBFETCH_VER, __progname); 827 if (URL->offset) 828 _http_cmd(fd, "Range: bytes=%lld-", url->offset); 829 _http_cmd(fd, "Connection: close"); 830 _http_cmd(fd, ""); 831 832 /* get reply */ 833 switch ((code = _http_get_reply(fd))) { 834 case HTTP_OK: 835 case HTTP_PARTIAL: 836 /* fine */ 837 break; 838 case HTTP_MOVED_PERM: 839 case HTTP_MOVED_TEMP: 840 /* 841 * Not so fine, but we still have to read the headers to 842 * get the new location. 843 */ 844 break; 845 case HTTP_NEED_AUTH: 846 if (need_auth) { 847 /* 848 * We already sent out authorization code, so there's 849 * nothing more we can do. 850 */ 851 _http_seterr(code); 852 goto ouch; 853 } 854 /* try again, but send the password this time */ 855 if (verbose) 856 _fetch_info("server requires authorization"); 857 need_auth = 1; 858 close(fd); 859 goto retry; 860 case HTTP_NEED_PROXY_AUTH: 861 /* 862 * If we're talking to a proxy, we already sent our proxy 863 * authorization code, so there's nothing more we can do. 864 */ 865 _http_seterr(code); 866 goto ouch; 867 case HTTP_PROTOCOL_ERROR: 868 /* fall through */ 869 case -1: 870 _fetch_syserr(); 871 goto ouch; 872 default: 873 _http_seterr(code); 874 goto ouch; 875 } 876 877 /* get headers */ 878 do { 879 switch ((h = _http_next_header(fd, &p))) { 880 case hdr_syserror: 881 _fetch_syserr(); 882 goto ouch; 883 case hdr_error: 884 _http_seterr(HTTP_PROTOCOL_ERROR); 885 goto ouch; 886 case hdr_content_length: 887 us->size = _http_parse_length(p); 888 break; 889 case hdr_content_range: 890 offset = _http_parse_range(p); 891 break; 892 case hdr_last_modified: 893 us->atime = us->mtime = _http_parse_mtime(p); 894 break; 895 case hdr_location: 896 if (!HTTP_REDIRECT(code)) 897 break; 898 if (new) 899 free(new); 900 if (verbose) 901 _fetch_info("%d redirect to %s", code, p); 902 if (*p == '/') 903 /* absolute path */ 904 new = fetchMakeURL(url->scheme, url->host, url->port, p, 905 url->user, url->pwd); 906 else 907 new = fetchParseURL(p); 908 if (new == NULL) { 909 /* XXX should set an error code */ 910 DEBUG(fprintf(stderr, "failed to parse new URL\n")); 911 goto ouch; 912 } 913 if (!*new->user && !*new->pwd) { 914 strcpy(new->user, url->user); 915 strcpy(new->pwd, url->pwd); 916 } 917 new->offset = url->offset; 918 new->length = url->length; 919 break; 920 case hdr_transfer_encoding: 921 /* XXX weak test*/ 922 chunked = (strcasecmp(p, "chunked") == 0); 923 break; 924 case hdr_end: 925 /* fall through */ 926 case hdr_unknown: 927 /* ignore */ 928 break; 929 } 930 } while (h > hdr_end); 931 932 /* we either have a hit, or a redirect with no Location: header */ 933 if (code == HTTP_OK || code == HTTP_PARTIAL || !new) 934 break; 935 936 /* we have a redirect */ 937 close(fd); 938 if (url != URL) 939 fetchFreeURL(url); 940 url = new; 941 } 942 943 /* no success */ 944 if (fd == -1) { 945 _http_seterr(code); 946 goto ouch; 947 } 948 949 /* wrap it up in a FILE */ 950 if ((f = chunked ? _http_funopen(fd) : fdopen(fd, "r")) == NULL) { 951 _fetch_syserr(); 952 goto ouch; 953 } 954 955 while (offset++ < url->offset) 956 if (fgetc(f) == EOF) { 957 _fetch_syserr(); 958 fclose(f); 959 f = NULL; 960 } 961 962 if (url != URL) 963 fetchFreeURL(url); 964 965 return f; 966 967 ouch: 968 if (url != URL) 969 fetchFreeURL(url); 970 if (fd != -1) 971 close(fd); 972 return NULL; 973} 974 975 976/***************************************************************************** 977 * Entry points 978 */ 979 980/* 981 * Retrieve a file by HTTP 982 */ 983FILE * 984fetchGetHTTP(struct url *URL, char *flags) 985{ 986 struct url_stat us; 987 988 return _http_request(URL, "GET", &us, flags); 989} 990 991FILE * 992fetchPutHTTP(struct url *URL, char *flags) 993{ 994 warnx("fetchPutHTTP(): not implemented"); 995 return NULL; 996} 997 998/* 999 * Get an HTTP document's metadata 1000 */ 1001int 1002fetchStatHTTP(struct url *URL, struct url_stat *us, char *flags) 1003{ 1004 FILE *f; 1005 1006 if ((f = _http_request(URL, "HEAD", us, flags)) == NULL) 1007 return -1; 1008 fclose(f); 1009 return 0; 1010} 1011 1012/* 1013 * List a directory 1014 */ 1015struct url_ent * 1016fetchListHTTP(struct url *url, char *flags) 1017{ 1018 warnx("fetchListHTTP(): not implemented"); 1019 return NULL; 1020} 1021