1package HTTP::Proxy; 2 3use HTTP::Daemon; 4use HTTP::Date qw(time2str); 5use LWP::UserAgent; 6use LWP::ConnCache; 7use Fcntl ':flock'; # import LOCK_* constants 8use IO::Select; 9use Sys::Hostname; # hostname() 10use Carp; 11 12use strict; 13use vars qw( $VERSION $AUTOLOAD @METHODS 14 @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS ); 15 16require Exporter; 17@ISA = qw(Exporter); 18@EXPORT = (); # no export by default 19@EXPORT_OK = qw( ERROR NONE PROXY STATUS PROCESS SOCKET HEADERS FILTERS 20 DATA CONNECT ENGINE ALL ); 21%EXPORT_TAGS = ( log => [@EXPORT_OK] ); # only one tag 22 23$VERSION = '0.300'; 24 25my $CRLF = "\015\012"; # "\r\n" is not portable 26 27# standard filters 28use HTTP::Proxy::HeaderFilter::standard; 29 30# constants used for logging 31use constant ERROR => -1; # always log 32use constant NONE => 0; # never log 33use constant PROXY => 1; # proxy information 34use constant STATUS => 2; # HTTP status 35use constant PROCESS => 4; # sub-process life (and death) 36use constant SOCKET => 8; # low-level connections 37use constant HEADERS => 16; # HTTP headers 38use constant FILTERS => 32; # Messages from filters 39use constant DATA => 64; # Data received by the filters 40use constant CONNECT => 128; # Data transmitted by the CONNECT method 41use constant ENGINE => 256; # Internal information from the Engine 42use constant ALL => 511; # All of the above 43 44# modules that need those constants to be defined 45use HTTP::Proxy::Engine; 46use HTTP::Proxy::FilterStack; 47 48# Methods we can forward 49my %METHODS; 50 51# HTTP (RFC 2616) 52$METHODS{http} = [qw( CONNECT DELETE GET HEAD OPTIONS POST PUT TRACE )]; 53 54# WebDAV (RFC 2518) 55$METHODS{webdav} = [ 56 @{ $METHODS{http} }, 57 qw( COPY LOCK MKCOL MOVE PROPFIND PROPPATCH UNLOCK ) 58]; 59 60# Delta-V (RFC 3253) 61$METHODS{deltav} = [ 62 @{ $METHODS{webdav} }, 63 qw( BASELINE-CONTROL CHECKIN CHECKOUT LABEL MERGE MKACTIVITY 64 MKWORKSPACE REPORT UNCHECKOUT UPDATE VERSION-CONTROL ), 65]; 66 67# the whole method list 68@METHODS = HTTP::Proxy->known_methods(); 69 70# useful regexes (from RFC 2616 BNF grammar) 71my %RX; 72$RX{token} = qr/[-!#\$%&'*+.0-9A-Z^_`a-z|~]+/; 73$RX{mime} = qr($RX{token}/$RX{token}); 74$RX{method} = '(?:' . join ( '|', @METHODS ) . ')'; 75$RX{method} = qr/$RX{method}/; 76 77sub new { 78 my $class = shift; 79 my %params = @_; 80 81 # some defaults 82 my %defaults = ( 83 agent => undef, 84 chunk => 4096, 85 daemon => undef, 86 host => 'localhost', 87 logfh => *STDERR, 88 logmask => NONE, 89 max_connections => 0, 90 max_keep_alive_requests => 10, 91 port => 8080, 92 stash => {}, 93 timeout => 60, 94 via => hostname() . " (HTTP::Proxy/$VERSION)", 95 x_forwarded_for => 1, 96 ); 97 98 # non modifiable defaults 99 my $self = bless { conn => 0, loop => 1 }, $class; 100 101 # support for deprecated stuff 102 { 103 my %convert = ( 104 maxchild => 'max_clients', 105 maxconn => 'max_connections', 106 maxserve => 'max_keep_alive_requests', 107 ); 108 while( my ($old, $new) = each %convert ) { 109 if( exists $params{$old} ) { 110 $params{$new} = delete $params{$old}; 111 carp "$old is deprecated, please use $new"; 112 } 113 } 114 } 115 116 # get attributes 117 $self->{$_} = exists $params{$_} ? delete( $params{$_} ) : $defaults{$_} 118 for keys %defaults; 119 120 # choose an engine with the remaining parameters 121 $self->{engine} = HTTP::Proxy::Engine->new( %params, proxy => $self ); 122 $self->log( PROXY, "PROXY", "Selected engine " . ref $self->{engine} ); 123 124 return $self; 125} 126 127sub known_methods { 128 my ( $class, @args ) = @_; 129 130 @args = map { lc } @args ? @args : ( keys %METHODS ); 131 exists $METHODS{$_} || carp "Method group $_ doesn't exist" 132 for @args; 133 my %seen; 134 return grep { !$seen{$_}++ } map { @{ $METHODS{$_} || [] } } @args; 135} 136 137sub timeout { 138 my $self = shift; 139 my $old = $self->{timeout}; 140 if (@_) { 141 $self->{timeout} = shift; 142 $self->agent->timeout( $self->{timeout} ) if $self->agent; 143 } 144 return $old; 145} 146 147sub url { 148 my $self = shift; 149 if ( not defined $self->daemon ) { 150 carp "HTTP daemon not started yet"; 151 return undef; 152 } 153 return $self->daemon->url; 154} 155 156# normal accessors 157for my $attr ( qw( 158 agent chunk daemon host logfh port request response hop_headers 159 logmask via x_forwarded_for client_headers engine 160 max_connections max_keep_alive_requests 161 ) 162 ) 163{ 164 no strict 'refs'; 165 *{"HTTP::Proxy::$attr"} = sub { 166 my $self = shift; 167 my $old = $self->{$attr}; 168 $self->{$attr} = shift if @_; 169 return $old; 170 } 171} 172 173# read-only accessors 174for my $attr (qw( conn loop client_socket )) { 175 no strict 'refs'; 176 *{"HTTP::Proxy::$attr"} = sub { $_[0]{$attr} } 177} 178 179sub max_clients { shift->engine->max_clients( @_ ) } 180 181# deprecated methods are still supported 182{ 183 my %convert = ( 184 maxchild => 'max_clients', 185 maxconn => 'max_connections', 186 maxserve => 'max_keep_alive_requests', 187 ); 188 while ( my ( $old, $new ) = each %convert ) { 189 no strict 'refs'; 190 *$old = sub { 191 carp "$old is deprecated, please use $new"; 192 goto \&$new; 193 }; 194 } 195} 196 197sub stash { 198 my $stash = shift->{stash}; 199 return $stash unless @_; 200 return $stash->{ $_[0] } if @_ == 1; 201 return $stash->{ $_[0] } = $_[1]; 202} 203 204sub new_connection { ++$_[0]{conn} } 205 206sub start { 207 my $self = shift; 208 209 $self->init; 210 $SIG{INT} = $SIG{TERM} = sub { $self->{loop} = 0 }; 211 212 # the main loop 213 my $engine = $self->engine; 214 $engine->start if $engine->can('start'); 215 while( $self->loop ) { 216 $engine->run; 217 last if $self->max_connections && $self->conn >= $self->max_connections; 218 } 219 $engine->stop if $engine->can('stop'); 220 221 $self->log( STATUS, "STATUS", 222 "Processed " . $self->conn . " connection(s)" ); 223 224 return $self->conn; 225} 226 227# semi-private init method 228sub init { 229 my $self = shift; 230 231 # must be run only once 232 return if $self->{_init}++; 233 234 $self->_init_daemon if ( !defined $self->daemon ); 235 $self->_init_agent if ( !defined $self->agent ); 236 237 # specific agent config 238 $self->agent->requests_redirectable( [] ); 239 $self->agent->agent(''); # for TRACE support 240 $self->agent->protocols_allowed( [qw( http https ftp gopher )] ); 241 242 # standard header filters 243 $self->{headers}{request} = HTTP::Proxy::FilterStack->new; 244 $self->{headers}{response} = HTTP::Proxy::FilterStack->new; 245 246 # the same standard filter is used to handle headers 247 my $std = HTTP::Proxy::HeaderFilter::standard->new(); 248 $std->proxy( $self ); 249 $self->{headers}{request}->push( [ sub { 1 }, $std ] ); 250 $self->{headers}{response}->push( [ sub { 1 }, $std ] ); 251 252 # standard body filters 253 $self->{body}{request} = HTTP::Proxy::FilterStack->new(1); 254 $self->{body}{response} = HTTP::Proxy::FilterStack->new(1); 255 256 return; 257} 258 259# 260# private init methods 261# 262 263sub _init_daemon { 264 my $self = shift; 265 my %args = ( 266 LocalAddr => $self->host, 267 LocalPort => $self->port, 268 ReuseAddr => 1, 269 ); 270 delete $args{LocalPort} unless $self->port; # 0 means autoselect 271 my $daemon = HTTP::Daemon->new(%args) 272 or die "Cannot initialize proxy daemon: $!"; 273 $self->daemon($daemon); 274 275 return $daemon; 276} 277 278sub _init_agent { 279 my $self = shift; 280 my $agent = LWP::UserAgent->new( 281 env_proxy => 1, 282 keep_alive => 2, 283 parse_head => 0, 284 timeout => $self->timeout, 285 ) 286 or die "Cannot initialize proxy agent: $!"; 287 $self->agent($agent); 288 return $agent; 289} 290 291# This is the internal "loop" that lets the child process process the 292# incoming connections. 293 294sub serve_connections { 295 my ( $self, $conn ) = @_; 296 my $response; 297 $self->{client_socket} = $conn; # read-only 298 $self->log( SOCKET, "SOCKET", "New connection from " . $conn->peerhost 299 . ":" . $conn->peerport ); 300 301 my ( $last, $served ) = ( 0, 0 ); 302 303 while ( $self->loop() ) { 304 my $req; 305 { 306 local $SIG{INT} = local $SIG{TERM} = 'DEFAULT'; 307 $req = $conn->get_request(); 308 } 309 310 $served++; 311 312 # initialisation 313 $self->request($req); 314 $self->response(undef); 315 316 # Got a request? 317 unless ( defined $req ) { 318 $self->log( SOCKET, "SOCKET", 319 "Getting request failed: " . $conn->reason ) 320 if $conn->reason ne 'No more requests from this connection'; 321 return; 322 } 323 $self->log( STATUS, "REQUEST", $req->method . ' ' 324 . ( $req->method eq 'CONNECT' ? $req->uri->host_port : $req->uri ) ); 325 326 # can we forward this method? 327 if ( !grep { $_ eq $req->method } @METHODS ) { 328 $response = HTTP::Response->new( 501, 'Not Implemented' ); 329 $response->content_type( "text/plain" ); 330 $response->content( 331 "Method " . $req->method . " is not supported by this proxy." ); 332 $self->response($response); 333 goto SEND; 334 } 335 336 # transparent proxying support 337 if( not defined $req->uri->scheme ) { 338 if( my $host = $req->header('Host') ) { 339 $req->uri->scheme( 'http' ); 340 $req->uri->host( $host ); 341 } 342 else { 343 $response = HTTP::Response->new( 400, 'Bad request' ); 344 $response->content_type( "text/plain" ); 345 $response->content("Can't do transparent proxying without a Host: header."); 346 $self->response($response); 347 goto SEND; 348 } 349 } 350 351 # can we serve this protocol? 352 if ( !$self->is_protocol_supported( my $s = $req->uri->scheme ) ) 353 { 354 # should this be 400 Bad Request? 355 $response = HTTP::Response->new( 501, 'Not Implemented' ); 356 $response->content_type( "text/plain" ); 357 $response->content("Scheme $s is not supported by this proxy."); 358 $self->response($response); 359 goto SEND; 360 } 361 362 # select the request filters 363 $self->{$_}{request}->select_filters( $req ) for qw( headers body ); 364 365 # massage the request 366 $self->{headers}{request}->filter( $req->headers, $req ); 367 368 # FIXME I don't know how to get the LWP::Protocol objet... 369 # NOTE: the request is always received in one piece 370 $self->{body}{request}->filter( $req->content_ref, $req, undef ); 371 $self->{body}{request}->eod; # end of data 372 $self->log( HEADERS, "REQUEST", $req->headers->as_string ); 373 374 # CONNECT method is a very special case 375 if( ! defined $self->response and $req->method eq 'CONNECT' ) { 376 $last = $self->_handle_CONNECT($served); 377 return if $last; 378 } 379 380 # the header filters created a response, 381 # we won't contact the origin server 382 # FIXME should the response header and body be filtered? 383 goto SEND if defined $self->response; 384 385 # FIXME - don't forward requests to ourselves! 386 387 # pop a response 388 my ( $sent, $chunked ) = ( 0, 0 ); 389 $response = $self->agent->simple_request( 390 $req, 391 sub { 392 my ( $data, $response, $proto ) = @_; 393 394 # first time, filter the headers 395 if ( !$sent ) { 396 $sent++; 397 $self->response( $response ); 398 399 # select the response filters 400 $self->{$_}{response}->select_filters( $response ) 401 for qw( headers body ); 402 403 $self->{headers}{response} 404 ->filter( $response->headers, $response ); 405 ( $last, $chunked ) = 406 $self->_send_response_headers( $served ); 407 } 408 409 # filter and send the data 410 $self->log( DATA, "DATA", 411 "got " . length($data) . " bytes of body data" ); 412 $self->{body}{response}->filter( \$data, $response, $proto ); 413 if ($chunked) { 414 printf $conn "%x$CRLF%s$CRLF", length($data), $data 415 if length($data); # the filter may leave nothing 416 } 417 else { print $conn $data; } 418 }, 419 $self->chunk 420 ); 421 422 # remove the header added by LWP::UA before it sends the response back 423 $response->remove_header('Client-Date'); 424 425 # the callback is not called by LWP::UA->request 426 # in some cases (HEAD, redirect, error responses have no body) 427 if ( !$sent ) { 428 $self->response($response); 429 $self->{$_}{response}->select_filters( $response ) 430 for qw( headers body ); 431 $self->{headers}{response} 432 ->filter( $response->headers, $response ); 433 } 434 435 # do a last pass, in case there was something left in the buffers 436 my $data = ""; # FIXME $protocol is undef here too 437 $self->{body}{response}->filter_last( \$data, $response, undef ); 438 if ( length $data ) { 439 if ($chunked) { 440 printf $conn "%x$CRLF%s$CRLF", length($data), $data; 441 } 442 else { print $conn $data; } 443 } 444 445 # last chunk 446 print $conn "0$CRLF$CRLF" if $chunked; # no trailers either 447 $self->response($response); 448 449 # what about X-Died and X-Content-Range? 450 if( my $died = $response->header('X-Died') ) { 451 $self->log( ERROR, "ERROR", $died ); 452 $sent = 0; 453 $response = HTTP::Response->new( 500, "Proxy filter error" ); 454 $response->content_type( "text/plain" ); 455 $response->content($died); 456 $self->response($response); 457 } 458 459 SEND: 460 461 $response = $self->response ; 462 463 # responses that weren't filtered through callbacks 464 # (empty body or error) 465 # FIXME some error response headers might not be filtered 466 if ( !$sent ) { 467 ($last, $chunked) = $self->_send_response_headers( $served ); 468 my $content = $response->content; 469 if ($chunked) { 470 printf $conn "%x$CRLF%s$CRLF", length($content), $content 471 if length($content); # the filter may leave nothing 472 print $conn "0$CRLF$CRLF"; 473 } 474 else { print $conn $content; } 475 } 476 477 # FIXME ftp, gopher 478 $conn->print( $response->content ) 479 if defined $req->uri->scheme 480 and $req->uri->scheme =~ /^(?:ftp|gopher)$/ 481 and $response->is_success; 482 483 $self->log( SOCKET, "SOCKET", "Connection closed by the proxy" ), last 484 if $last || $served >= $self->max_keep_alive_requests; 485 } 486 $self->log( SOCKET, "SOCKET", "Connection closed by the client" ) 487 if !$last 488 and $served < $self->max_keep_alive_requests; 489 $self->log( PROCESS, "PROCESS", "Served $served requests" ); 490 $conn->close; 491} 492 493# INTERNAL METHOD 494# send the response headers for the proxy 495# expects $served (number of requests served) 496# returns $last and $chunked (last request served, chunked encoding) 497sub _send_response_headers { 498 my ( $self, $served ) = @_; 499 my ( $last, $chunked ) = ( 0, 0 ); 500 my $conn = $self->client_socket; 501 my $response = $self->response; 502 503 # correct headers 504 $response->remove_header("Content-Length") 505 if $self->{body}{response}->will_modify(); 506 $response->header( Server => "HTTP::Proxy/$VERSION" ) 507 unless $response->header( 'Server' ); 508 $response->header( Date => time2str(time) ) 509 unless $response->header( 'Date' ); 510 511 # this is adapted from HTTP::Daemon 512 if ( $conn->antique_client ) { $last++ } 513 else { 514 my $code = $response->code; 515 $conn->send_status_line( $code, $response->message, 516 $self->request()->protocol() ); 517 if ( $code =~ /^(1\d\d|[23]04)$/ ) { 518 519 # make sure content is empty 520 $response->remove_header("Content-Length"); 521 $response->content(''); 522 } 523 elsif ( $response->request && $response->request->method eq "HEAD" ) 524 { # probably OK, says HTTP::Daemon 525 } 526 else { 527 if ( $conn->proto_ge("HTTP/1.1") ) { 528 $chunked++; 529 $response->push_header( "Transfer-Encoding" => "chunked" ); 530 $response->push_header( "Connection" => "close" ) 531 if $served >= $self->max_keep_alive_requests; 532 } 533 else { 534 $last++; 535 $conn->force_last_request; 536 } 537 } 538 print $conn $response->headers_as_string($CRLF); 539 print $conn $CRLF; # separates headers and content 540 } 541 $self->log( STATUS, "RESPONSE", $response->status_line ); 542 $self->log( HEADERS, "RESPONSE", $response->headers->as_string ); 543 return ($last, $chunked); 544} 545 546# INTERNAL method 547# FIXME no man-in-the-middle for now 548sub _handle_CONNECT { 549 my ($self, $served) = @_; 550 my $last = 0; 551 552 my $conn = $self->client_socket; 553 my $req = $self->request; 554 my $upstream; 555 556 # connect upstream 557 if ( my $up = $self->agent->proxy('http') ) { 558 559 # clean up authentication info from proxy URL 560 $up =~ s{^http://[^/\@]*\@}{http://}; 561 562 # forward to upstream proxy 563 $self->log( PROXY, "PROXY", 564 "Forwarding CONNECT request to next proxy: $up" ); 565 my $response = $self->agent->simple_request($req); 566 567 # check the upstream proxy's response 568 my $code = $response->code; 569 if ( $code == 407 ) { # don't forward Proxy Authentication requests 570 my $response_407 = $response->as_string; 571 $response_407 =~ s/^Client-.*$//mg; 572 $response = HTTP::Response->new(502); 573 $response->content_type("text/plain"); 574 $response->content( "Upstream proxy ($up) " 575 . "requested authentication:\n\n" 576 . $response_407 ); 577 $self->response($response); 578 return $last; 579 } 580 elsif ( $code != 200 ) { # forward every other failure 581 $self->response($response); 582 return $last; 583 } 584 585 $upstream = $response->{client_socket}; 586 } 587 else { # direct connection 588 $upstream = IO::Socket::INET->new( PeerAddr => $req->uri->host_port ); 589 } 590 591 # no upstream socket obtained 592 if( !$upstream ) { 593 my $response = HTTP::Response->new( 500 ); 594 $response->content_type( "text/plain" ); 595 $response->content( "CONNECT failed: $@"); 596 $self->response($response); 597 return $last; 598 } 599 600 # send the response headers (FIXME more headers required?) 601 my $response = HTTP::Response->new(200); 602 $self->response($response); 603 $self->{$_}{response}->select_filters( $response ) for qw( headers body ); 604 605 $self->_send_response_headers( $served ); 606 607 # we now have a TCP connection 608 $last = 1; 609 610 my $select = IO::Select->new; 611 for ( $conn, $upstream ) { 612 $_->autoflush(1); 613 $_->blocking(0); 614 $select->add($_); 615 } 616 617 # loop while there is data 618 while ( my @ready = $select->can_read ) { 619 for (@ready) { 620 my $data = ""; 621 my ($sock, $peer, $from ) = $conn eq $_ 622 ? ( $conn, $upstream, "client" ) 623 : ( $upstream, $conn, "server" ); 624 625 # read the data 626 my $read = $sock->sysread( $data, 4096 ); 627 628 # check for errors 629 if(not defined $read ) { 630 $self->log( ERROR, "CONNECT", "Read undef from $from ($!)" ); 631 next; 632 } 633 634 # end of connection 635 if ( $read == 0 ) { 636 $_->close for ( $sock, $peer ); 637 $select->remove( $sock, $peer ); 638 $self->log( SOCKET, "CONNECT", "Connection closed by the $from" ); 639 $self->log( PROCESS, "PROCESS", "Served $served requests" ); 640 next; 641 } 642 643 # proxy the data 644 $self->log( CONNECT, "CONNECT", "$read bytes received from $from" ); 645 $peer->syswrite($data, length $data); 646 } 647 } 648 $self->log( CONNECT, "CONNECT", "End of CONNECT proxyfication"); 649 return $last; 650} 651 652sub push_filter { 653 my $self = shift; 654 my %arg = ( 655 mime => 'text/*', 656 method => join( ',', @METHODS ), 657 scheme => 'http', 658 host => '', 659 path => '', 660 query => '', 661 ); 662 663 # parse parameters 664 for( my $i = 0; $i < @_ ; $i += 2 ) { 665 next if $_[$i] !~ /^(mime|method|scheme|host|path|query)$/; 666 $arg{$_[$i]} = $_[$i+1]; 667 splice @_, $i, 2; 668 $i -= 2; 669 } 670 croak "Odd number of arguments" if @_ % 2; 671 672 # the proxy must be initialised 673 $self->init; 674 675 # prepare the variables for the closure 676 my ( $mime, $method, $scheme, $host, $path, $query ) = 677 @arg{qw( mime method scheme host path query )}; 678 679 if ( defined $mime && $mime ne '' ) { 680 $mime =~ m!/! or croak "Invalid MIME type definition: $mime"; 681 $mime =~ s/\*/$RX{token}/; #turn it into a regex 682 $mime = qr/^$mime(?:$|\s*;?)/; 683 } 684 685 my @method = split /\s*,\s*/, $method; 686 for (@method) { croak "Invalid method: $_" if !/$RX{method}/ } 687 $method = @method ? '(?:' . join ( '|', @method ) . ')' : ''; 688 $method = qr/^$method$/; 689 690 my @scheme = split /\s*,\s*/, $scheme; 691 for (@scheme) { 692 croak "Unsupported scheme: $_" 693 if !$self->is_protocol_supported($_); 694 } 695 $scheme = @scheme ? '(?:' . join ( '|', @scheme ) . ')' : ''; 696 $scheme = qr/$scheme/; 697 698 $host ||= '.*'; $host = qr/$host/i; 699 $path ||= '.*'; $path = qr/$path/; 700 $query ||= '.*'; $query = qr/$query/; 701 702 # push the filter and its match method on the correct stack 703 while(@_) { 704 my ($message, $filter ) = (shift, shift); 705 croak "'$message' is not a filter stack" 706 unless $message =~ /^(request|response)$/; 707 708 croak "Not a Filter reference for filter queue $message" 709 unless ref( $filter ) 710 && ( $filter->isa('HTTP::Proxy::HeaderFilter') 711 || $filter->isa('HTTP::Proxy::BodyFilter') ); 712 713 my $stack; 714 $stack = 'headers' if $filter->isa('HTTP::Proxy::HeaderFilter'); 715 $stack = 'body' if $filter->isa('HTTP::Proxy::BodyFilter'); 716 717 # MIME can only match on reponse 718 my $mime = $mime; 719 undef $mime if $message eq 'request'; 720 721 # compute the match sub as a closure 722 # for $self, $mime, $method, $scheme, $host, $path 723 my $match = sub { 724 return 0 725 if ( defined $mime ) 726 && ( $self->response->content_type || '' ) !~ $mime; 727 return 0 if ( $self->{request}->method || '' ) !~ $method; 728 return 0 if ( $self->{request}->uri->scheme || '' ) !~ $scheme; 729 return 0 if ( $self->{request}->uri->authority || '' ) !~ $host; 730 return 0 if ( $self->{request}->uri->path || '' ) !~ $path; 731 return 0 if ( $self->{request}->uri->query || '' ) !~ $query; 732 return 1; # it's a match 733 }; 734 735 # push it on the corresponding FilterStack 736 $self->{$stack}{$message}->push( [ $match, $filter ] ); 737 $filter->proxy( $self ); 738 } 739} 740 741sub is_protocol_supported { 742 my ( $self, $scheme ) = @_; 743 my $ok = 1; 744 if ( !$self->agent->is_protocol_supported($scheme) ) { 745 746 # double check, in case a dummy scheme was added 747 # to be handled directly by a filter 748 $ok = 0; 749 $scheme eq $_ && $ok++ for @{ $self->agent->protocols_allowed }; 750 } 751 $ok; 752} 753 754sub log { 755 my $self = shift; 756 my $level = shift; 757 my $fh = $self->logfh; 758 759 return unless $self->logmask & $level || $level == ERROR; 760 761 my ( $prefix, $msg ) = ( @_, '' ); 762 my @lines = split /\n/, $msg; 763 @lines = ('') if not @lines; 764 765 flock( $fh, LOCK_EX ); 766 print $fh "[" . localtime() . "] ($$) $prefix: $_\n" for @lines; 767 flock( $fh, LOCK_UN ); 768} 769 7701; 771 772__END__ 773 774=head1 NAME 775 776HTTP::Proxy - A pure Perl HTTP proxy 777 778=head1 SYNOPSIS 779 780 use HTTP::Proxy; 781 782 # initialisation 783 my $proxy = HTTP::Proxy->new( port => 3128 ); 784 785 # alternate initialisation 786 my $proxy = HTTP::Proxy->new; 787 $proxy->port( 3128 ); # the classical accessors are here! 788 789 # this is a MainLoop-like method 790 $proxy->start; 791 792=head1 DESCRIPTION 793 794This module implements a HTTP proxy, using a L<HTTP::Daemon> to accept 795client connections, and a LWP::UserAgent to ask for the requested pages. 796 797The most interesting feature of this proxy object is its ability to 798filter the HTTP requests and responses through user-defined filters. 799 800Once the proxy is created, with the C<new()> method, it is possible 801to alter its behaviour by adding so-called "filters". This is 802done by the C<push_filter()> method. Once the filter is ready to 803run, it can be launched, with the C<start()> method. This method 804does not normally return until the proxy is killed or otherwise 805stopped. 806 807An important thing to note is that the proxy is (except when running 808the C<NoFork> engine) a I<forking> proxy: it doesn't support passing 809information between child processes, and you can count on reliable 810information passing only during a single HTTP connection (request + 811response). 812 813=head1 FILTERS 814 815You can alter the way the default L<HTTP::Proxy> works by plugging callbacks 816(filter objects, actually) at different stages of the request/response 817handling. 818 819When a request is received by the L<HTTP::Proxy> object, it is filtered through 820a standard filter that transform this request accordingly to RFC 2616 821(by adding the C<Via:> header, and a few other transformations). This is 822the default, bare minimum behaviour. 823 824The response is also filtered in the same manner. There is a total of four 825filter chains: C<request-headers>, C<request-body>, C<reponse-headers> and 826C<response-body>. 827 828You can add your own filters to the default ones with the 829C<push_filter()> method. The method pushes a filter on the appropriate 830filter stack. 831 832 $proxy->push_filter( response => $filter ); 833 834The headers/body category is determined by the base class of the filter. 835There are two base classes for filters, which are 836L<HTTP::Proxy::HeaderFilter> and L<HTTP::Proxy::BodyFilter> (the names 837are self-explanatory). See the documentation of those two classes 838to find out how to write your own header or body filters. 839 840The named parameter is used to determine the request/response part. 841 842It is possible to push the same filter on the request and response 843stacks, as in the following example: 844 845 $proxy->push_filter( request => $filter, response => $filter ); 846 847If several filters match the message, they will be applied in the order 848they were pushed on their filter stack. 849 850Named parameters can be used to create the match routine. They are: 851 852 method - the request method 853 scheme - the URI scheme 854 host - the URI authority (host:port) 855 path - the URI path 856 query - the URI query string 857 mime - the MIME type (for a response-body filter) 858 859The filters are applied only when all the the parameters match the 860request or the response. All these named parameters have default values, 861which are: 862 863 method => 'OPTIONS,GET,HEAD,POST,PUT,DELETE,TRACE,CONNECT' 864 scheme => 'http' 865 host => '' 866 path => '' 867 query => '' 868 mime => 'text/*' 869 870The C<mime> parameter is a glob-like string, with a required C</> 871character and a C<*> as a joker. Thus, C<*/*> matches I<all> responses, 872and C<""> those with no C<Content-Type:> header. To match any 873reponse (with or without a C<Content-Type:> header), use C<undef>. 874 875The C<mime> parameter is only meaningful with the C<response-body> 876filter stack. It is ignored if passed to any other filter stack. 877 878The C<method> and C<scheme> parameters are strings consisting of 879comma-separated values. The C<host> and C<path> parameters are regular 880expressions. 881 882A match routine is compiled by the proxy and used to check if a particular 883request or response must be filtered through a particular filter. 884 885It is also possible to push several filters on the same stack with 886the same match subroutine: 887 888 # convert italics to bold 889 $proxy->push_filter( 890 mime => 'text/html', 891 response => HTTP::Proxy::BodyFilter::tags->new(), 892 response => HTTP::Proxy::BodyFilter::simple->new( 893 sub { ${ $_[1] } =~ s!(</?)i>!$1b>!ig } 894 ) 895 ); 896 897For more details regarding the creation of new filters, check the 898L<HTTP::Proxy::HeaderFilter> and L<HTTP::Proxy::BodyFilter> documentation. 899 900Here's an example of subclassing a base filter class: 901 902 # fixes a common typo ;-) 903 # but chances are that this will modify a correct URL 904 { 905 package FilterPerl; 906 use base qw( HTTP::Proxy::BodyFilter ); 907 908 sub filter { 909 my ( $self, $dataref, $message, $protocol, $buffer ) = @_; 910 $$dataref =~ s/PERL/Perl/g; 911 } 912 } 913 $proxy->push_filter( response => FilterPerl->new() ); 914 915Other examples can be found in the documentation for 916L<HTTP::Proxy::HeaderFilter>, L<HTTP::Proxy::BodyFilter>, 917L<HTTP::Proxy::HeaderFilter::simple>, L<HTTP::Proxy::BodyFilter::simple>. 918 919 # a simple anonymiser 920 # see eg/anonymiser.pl for the complete code 921 $proxy->push_filter( 922 mime => undef, 923 request => HTTP::Proxy::HeaderFilter::simple->new( 924 sub { $_[1]->remove_header(qw( User-Agent From Referer Cookie )) }, 925 ), 926 response => HTTP::Proxy::HeaderFilter::simple->new( 927 sub { $_[1]->remove_header(qw( Set-Cookie )); }, 928 ) 929 ); 930 931IMPORTANT: If you use your own L<LWP::UserAgent>, you must install it 932before your calls to C<push_filter()>, otherwise 933the match method will make wrong assumptions about the schemes your 934agent supports. 935 936NOTE: It is likely that possibility of changing the agent or the daemon 937may disappear in future versions. 938 939=head1 METHODS 940 941=head2 Constructor and initialisation 942 943=over 4 944 945=item new() 946 947The C<new()> method creates a new L<HTTP::Proxy> object. All attributes can 948be passed as parameters to replace the default. 949 950Parameters that are not L<HTTP::Proxy> attributes will be ignored and 951passed to the chosen L<HTTP::Proxy::Engine> object. 952 953=item init() 954 955C<init()> initialise the proxy without starting it. It is usually not 956needed. 957 958This method is called by C<start()> if needed. 959 960=item push_filter() 961 962The C<push_filter()> method is used to add filters to the proxy. 963It is fully described in section L<FILTERS>. 964 965=back 966 967=head2 Accessors and mutators 968 969L<HTTP::Proxy> class has several accessors and mutators. 970 971Called with arguments, the accessor returns the current value. 972Called with a single argument, it sets the current value and 973returns the previous one, in case you want to keep it. 974 975If you call a read-only accessor with a parameter, this parameter 976will be ignored. 977 978The defined accessors are (in alphabetical order): 979 980=over 4 981 982=item agent 983 984The L<LWP::UserAgent> object used internally to connect to remote sites. 985 986=item chunk 987 988The chunk size for the L<LWP::UserAgent> callbacks. 989 990=item client_socket (read-only) 991 992The socket currently connected to the client. Mostly useful in filters. 993 994=item client_headers 995 996This attribute holds a reference to the client headers set up by 997L<LWP::UserAgent> 998(C<Client-Aborted>, C<Client-Bad-Header-Line>, C<Client-Date>, 999C<Client-Junk>, C<Client-Peer>, C<Client-Request-Num>, 1000C<Client-Response-Num>, C<Client-SSL-Cert-Issuer>, 1001C<Client-SSL-Cert-Subject>, C<Client-SSL-Cipher>, C<Client-SSL-Warning>, 1002C<Client-Transfer-Encoding>, C<Client-Warning>). 1003 1004They are removed by the filter L<HTTP::Proxy::HeaderFilter::standard> from 1005the request and response objects received by the proxy. 1006 1007If a filter (such as a SSL certificate verification filter) need to 1008access them, it must do it through this accessor. 1009 1010=item conn (read-only) 1011 1012The number of connections processed by this L<HTTP::Proxy> instance. 1013 1014=item daemon 1015 1016The L<HTTP::Daemon> object used to accept incoming connections. 1017(You usually never need this.) 1018 1019=item engine 1020 1021The L<HTTP::Proxy::Engine> object that manages the child processes. 1022 1023=item hop_headers 1024 1025This attribute holds a reference to the hop-by-hop headers 1026(C<Connection>, C<Keep-Alive>, C<Proxy-Authenticate>, C<Proxy-Authorization>, 1027C<TE>, C<Trailers>, C<Transfer-Encoding>, C<Upgrade>). 1028 1029They are removed by the filter HTTP::Proxy::HeaderFilter::standard from 1030the request and response objects received by the proxy. 1031 1032If a filter (such as a proxy authorisation filter) need to access them, 1033it must do it through this accessor. 1034 1035=item host 1036 1037The proxy L<HTTP::Daemon> host (default: 'localhost'). 1038 1039This means that by default, the proxy answers only to clients on the 1040local machine. You can pass a specific interface address or C<"">/C<undef> 1041for any interface. 1042 1043This default prevents your proxy to be used as an anonymous proxy 1044by script kiddies. 1045 1046=item known_methods( @groups ) (read-only) 1047 1048This method returns all HTTP (and extensions to HTTP) known to 1049C<HTTP::Proxy>. Methods are grouped by type. Known method groups are: 1050C<HTTP>, C<WebDAV> and C<DeltaV>. 1051 1052Called with an empty list, this method will return all known methods. 1053This method is case-insensitive, and will C<carp()> if an unknown 1054group name is passed. 1055 1056=item logfh 1057 1058A filehandle to a logfile (default: C<*STDERR>). 1059 1060=item logmask( [$mask] ) 1061 1062Be verbose in the logs (default: C<NONE>). 1063 1064Here are the various elements that can be added to the mask (their values 1065are powers of 2, starting from 0 and listed here in ascending order): 1066 1067 NONE - Log only errors 1068 PROXY - Proxy information 1069 STATUS - Requested URL, reponse status and total number 1070 of connections processed 1071 PROCESS - Subprocesses information (fork, wait, etc.) 1072 SOCKET - Information about low-level sockets 1073 HEADERS - Full request and response headers are sent along 1074 FILTERS - Filter information 1075 DATA - Data received by the filters 1076 CONNECT - Data transmitted by the CONNECT method 1077 ENGINE - Engine information 1078 ALL - Log all of the above 1079 1080If you only want status and process information, you can use: 1081 1082 $proxy->logmask( STATUS | PROCESS ); 1083 1084Note that all the logging constants are not exported by default, but 1085by the C<:log> tag. They can also be exported one by one. 1086 1087=item loop (read-only) 1088 1089Internal. False when the main loop is about to be broken. 1090 1091=item max_clients 1092 1093=item maxchild 1094 1095The maximum number of child process the L<HTTP::Proxy> object will spawn 1096to handle client requests (default: depends on the engine). 1097 1098This method is currently delegated to the L<HTTP::Proxy::Engine> object. 1099 1100C<maxchild> is deprecated and will disappear. 1101 1102=item max_connections 1103 1104=item maxconn 1105 1106The maximum number of TCP connections the proxy will accept before 1107returning from start(). 0 (the default) means never stop accepting 1108connections. 1109 1110C<maxconn> is deprecated. 1111 1112Note: C<max_connections> will be deprecated soon, for two reasons: 1) 1113it is more of an L<HTTP::Proxy::Engine> attribute, 2) not all engines will 1114support it. 1115 1116=item max_keep_alive_requests 1117 1118=item maxserve 1119 1120The maximum number of requests the proxy will serve in a single connection. 1121(same as C<MaxRequestsPerChild> in Apache) 1122 1123C<maxserve> is deprecated. 1124 1125=item port 1126 1127The proxy L<HTTP::Daemon> port (default: 8080). 1128 1129=item request 1130 1131The request originaly received by the proxy from the user-agent, which 1132will be modified by the request filters. 1133 1134=item response 1135 1136The response received from the origin server by the proxy. It is 1137normally C<undef> until the proxy actually receives the beginning 1138of a response from the origin server. 1139 1140If one of the request filters sets this attribute, it "short-circuits" 1141the request/response scheme, and the proxy will return this response 1142(which is NOT filtered through the response filter stacks) instead of 1143the expected origin server response. This is useful for caching (though 1144Squid does it much better) and proxy authentication, for example. 1145 1146=item stash 1147 1148The stash is a hash where filters can store data to share between them. 1149 1150The stash() method can be used to set the whole hash (with a HASH reference). 1151To access individual keys simply do: 1152 1153 $proxy->stash( 'bloop' ); 1154 1155To set it, type: 1156 1157 $proxy->stash( bloop => 'owww' ); 1158 1159It's also possibly to get a reference to the stash: 1160 1161 my $s = $filter->proxy->stash(); 1162 $s->{bang} = 'bam'; 1163 1164 # $proxy->stash( 'bang' ) will now return 'bam' 1165 1166B<Warning:> since the proxy forks for each TCP connection, the data is 1167only shared between filters in the same child process. 1168 1169=item timeout 1170 1171The timeout used by the internal L<LWP::UserAgent> (default: 60). 1172 1173=item url (read-only) 1174 1175The url where the proxy can be reached. 1176 1177=item via 1178 1179The content of the Via: header. Setting it to an empty string will 1180prevent its addition. (default: C<$hostname (HTTP::Proxy/$VERSION)>) 1181 1182=item x_forwarded_for 1183 1184If set to a true value, the proxy will send the C<X-Forwarded-For:> header. 1185(default: true) 1186 1187=back 1188 1189=head2 Connection handling methods 1190 1191=over 4 1192 1193=item start() 1194 1195This method works like Tk's C<MainLoop>: you hand over control to the 1196L<HTTP::Proxy> object you created and configured. 1197 1198If C<maxconn> is not zero, C<start()> will return after accepting 1199at most that many connections. It will return the total number of 1200connexions. 1201 1202=item serve_connections() 1203 1204This is the internal method used to handle each new TCP connection 1205to the proxy. 1206 1207=back 1208 1209=head2 Other methods 1210 1211=over 4 1212 1213=item log( $level, $prefix, $message ) 1214 1215Adds C<$message> at the end of C<logfh>, if $level matches C<logmask>. 1216The C<log()> method also prints a timestamp. 1217 1218The output looks like: 1219 1220 [Thu Dec 5 12:30:12 2002] ($$) $prefix: $message 1221 1222where C<$$> is the current processus id. 1223 1224If C<$message> is a multiline string, several log lines will be output, 1225each line starting with C<$prefix>. 1226 1227=item is_protocol_supported( $scheme ) 1228 1229Returns a boolean indicating if $scheme is supported by the proxy. 1230 1231This method is only used internaly. 1232 1233It is essential to allow L<HTTP::Proxy> users to create "pseudo-schemes" 1234that LWP doesn't know about, but that one of the proxy filters can handle 1235directly. New schemes are added as follows: 1236 1237 $proxy->init(); # required to get an agent 1238 $proxy->agent->protocols_allowed( 1239 [ @{ $proxy->agent->protocols_allowed }, 'myhttp' ] ); 1240 1241=item new_connection() 1242 1243Increase the proxy's TCP connections counter. Only used by 1244L<HTTP::Proxy::Engine> objects. 1245 1246=back 1247 1248=head2 Apache-like attributes 1249 1250L<HTTP::Proxy> has several Apache-like attributes that control the 1251way the HTTP and TCP connections are handled. 1252 1253The following attributes control the TCP connection. They are passed to 1254the underlying L<HTTP::Proxy::Engine>, which may (or may not) use them 1255to change its behaviour. 1256 1257=over 4 1258 1259=item start_servers 1260 1261Number of child process to fork at the beginning. 1262 1263=item max_clients 1264 1265Maximum number of concurrent TCP connections (i.e. child processes). 1266 1267=item max_requests_per_child 1268 1269Maximum number of TCP connections handled by the same child process. 1270 1271=item min_spare_servers 1272 1273Minimum number of inactive child processes. 1274 1275=item max_spare_servers 1276 1277Maximum number of inactive child processes. 1278 1279=back 1280 1281Those attributes control the HTTP connection: 1282 1283=over 4 1284 1285=item keep_alive 1286 1287Support for keep alive HTTP connections. 1288 1289=item max_keep_alive_requests 1290 1291Maximum number of HTTP connections within a single TCP connection. 1292 1293=item keep_alive_timeout 1294 1295Timeout for keep-alive connection. 1296 1297=back 1298 1299=head1 EXPORTED SYMBOLS 1300 1301No symbols are exported by default. The C<:log> tag exports all the 1302logging constants. 1303 1304=head1 BUGS 1305 1306This module does not work under Windows, but I can't see why, and do not 1307have a development platform under that system. Patches and explanations 1308very welcome. 1309 1310I guess it is because C<fork()> is not well supported. 1311 1312 $proxy->maxchild(0); 1313 1314=over 4 1315 1316=item However, David Fishburn says: 1317 1318This did not work for me under WinXP - ActiveState Perl 5.6, but it DOES 1319work on WinXP ActiveState Perl 5.8. 1320 1321=back 1322 1323Several people have tried to help, but we haven't found a way to make it work 1324correctly yet. 1325 1326As from version 0.16, the default engine is L<HTTP::Proxy::Engine::NoFork>. 1327Let me know if it works better. 1328 1329=head1 SEE ALSO 1330 1331L<HTTP::Proxy::Engine>, L<HTTP::Proxy::BodyFilter>, 1332L<HTTP::Proxy::HeaderFilter>, the examples in F<eg/>. 1333 1334=head1 AUTHOR 1335 1336Philippe "BooK" Bruhat, E<lt>book@cpan.orgE<gt>. 1337 1338There is also a mailing-list: http-proxy@mongueurs.net for general 1339discussion about L<HTTP::Proxy>. 1340 1341=head1 THANKS 1342 1343Many people helped me during the development of this module, either on 1344mailing-lists, IRC or over a beer in a pub... 1345 1346So, in no particular order, thanks to the libwww-perl team for such a 1347terrific suite of modules, perl-qa (tips for testing), the French Perl 1348I<Mongueurs> (for code tricks, beers and encouragements) and my growing 1349user base... C<;-)> 1350 1351I'd like to particularly thank Dan Grigsby, who's been using 1352L<HTTP::Proxy> since 2003 (before the filter classes even existed). He is 1353apparently making a living from a product based on L<HTTP::Proxy>. Thanks 1354a lot for your confidence in my work! 1355 1356=head1 COPYRIGHT 1357 1358Copyright 2002-2013, Philippe Bruhat. 1359 1360=head1 LICENSE 1361 1362This module is free software; you can redistribute it or modify it under 1363the same terms as Perl itself. 1364 1365=cut 1366 1367