1# 2# httpproxy.rb -- HTTPProxy Class 3# 4# Author: IPR -- Internet Programming with Ruby -- writers 5# Copyright (c) 2002 GOTO Kentaro 6# Copyright (c) 2002 Internet Programming with Ruby writers. All rights 7# reserved. 8# 9# $IPR: httpproxy.rb,v 1.18 2003/03/08 18:58:10 gotoyuzo Exp $ 10# $kNotwork: straw.rb,v 1.3 2002/02/12 15:13:07 gotoken Exp $ 11 12require "webrick/httpserver" 13require "net/http" 14 15Net::HTTP::version_1_2 if RUBY_VERSION < "1.7" 16 17module WEBrick 18 19 NullReader = Object.new # :nodoc: 20 class << NullReader # :nodoc: 21 def read(*args) 22 nil 23 end 24 alias gets read 25 end 26 27 FakeProxyURI = Object.new # :nodoc: 28 class << FakeProxyURI # :nodoc: 29 def method_missing(meth, *args) 30 if %w(scheme host port path query userinfo).member?(meth.to_s) 31 return nil 32 end 33 super 34 end 35 end 36 37 # :startdoc: 38 39 ## 40 # An HTTP Proxy server which proxies GET, HEAD and POST requests. 41 # 42 # To create a simple proxy server: 43 # 44 # require 'webrick' 45 # require 'webrick/httpproxy' 46 # 47 # proxy = WEBrick::HTTPProxyServer.new Port: 8000 48 # 49 # trap 'INT' do proxy.shutdown end 50 # trap 'TERM' do proxy.shutdown end 51 # 52 # proxy.start 53 # 54 # See ::new for proxy-specific configuration items. 55 # 56 # == Modifying proxied responses 57 # 58 # To modify content the proxy server returns use the +:ProxyContentHandler+ 59 # option: 60 # 61 # handler = proc do |req, res| 62 # if res['content-type'] == 'text/plain' then 63 # res.body << "\nThis content was proxied!\n" 64 # end 65 # end 66 # 67 # proxy = 68 # WEBrick::HTTPProxyServer.new Port: 8000, ProxyContentHandler: handler 69 70 class HTTPProxyServer < HTTPServer 71 72 ## 73 # Proxy server configurations. The proxy server handles the following 74 # configuration items in addition to those supported by HTTPServer: 75 # 76 # :ProxyAuthProc:: Called with a request and response to authorize a 77 # request 78 # :ProxyVia:: Appended to the via header 79 # :ProxyURI:: The proxy server's URI 80 # :ProxyContentHandler:: Called with a request and response and allows 81 # modification of the response 82 # :ProxyTimeout:: Sets the proxy timeouts to 30 seconds for open and 60 83 # seconds for read operations 84 85 def initialize(config={}, default=Config::HTTP) 86 super(config, default) 87 c = @config 88 @via = "#{c[:HTTPVersion]} #{c[:ServerName]}:#{c[:Port]}" 89 end 90 91 # :stopdoc: 92 def service(req, res) 93 if req.request_method == "CONNECT" 94 do_CONNECT(req, res) 95 elsif req.unparsed_uri =~ %r!^http://! 96 proxy_service(req, res) 97 else 98 super(req, res) 99 end 100 end 101 102 def proxy_auth(req, res) 103 if proc = @config[:ProxyAuthProc] 104 proc.call(req, res) 105 end 106 req.header.delete("proxy-authorization") 107 end 108 109 def proxy_uri(req, res) 110 # should return upstream proxy server's URI 111 return @config[:ProxyURI] 112 end 113 114 def proxy_service(req, res) 115 # Proxy Authentication 116 proxy_auth(req, res) 117 118 begin 119 self.send("do_#{req.request_method}", req, res) 120 rescue NoMethodError 121 raise HTTPStatus::MethodNotAllowed, 122 "unsupported method `#{req.request_method}'." 123 rescue => err 124 logger.debug("#{err.class}: #{err.message}") 125 raise HTTPStatus::ServiceUnavailable, err.message 126 end 127 128 # Process contents 129 if handler = @config[:ProxyContentHandler] 130 handler.call(req, res) 131 end 132 end 133 134 def do_CONNECT(req, res) 135 # Proxy Authentication 136 proxy_auth(req, res) 137 138 ua = Thread.current[:WEBrickSocket] # User-Agent 139 raise HTTPStatus::InternalServerError, 140 "[BUG] cannot get socket" unless ua 141 142 host, port = req.unparsed_uri.split(":", 2) 143 # Proxy authentication for upstream proxy server 144 if proxy = proxy_uri(req, res) 145 proxy_request_line = "CONNECT #{host}:#{port} HTTP/1.0" 146 if proxy.userinfo 147 credentials = "Basic " + [proxy.userinfo].pack("m").delete("\n") 148 end 149 host, port = proxy.host, proxy.port 150 end 151 152 begin 153 @logger.debug("CONNECT: upstream proxy is `#{host}:#{port}'.") 154 os = TCPSocket.new(host, port) # origin server 155 156 if proxy 157 @logger.debug("CONNECT: sending a Request-Line") 158 os << proxy_request_line << CRLF 159 @logger.debug("CONNECT: > #{proxy_request_line}") 160 if credentials 161 @logger.debug("CONNECT: sending a credentials") 162 os << "Proxy-Authorization: " << credentials << CRLF 163 end 164 os << CRLF 165 proxy_status_line = os.gets(LF) 166 @logger.debug("CONNECT: read a Status-Line form the upstream server") 167 @logger.debug("CONNECT: < #{proxy_status_line}") 168 if %r{^HTTP/\d+\.\d+\s+200\s*} =~ proxy_status_line 169 while line = os.gets(LF) 170 break if /\A(#{CRLF}|#{LF})\z/om =~ line 171 end 172 else 173 raise HTTPStatus::BadGateway 174 end 175 end 176 @logger.debug("CONNECT #{host}:#{port}: succeeded") 177 res.status = HTTPStatus::RC_OK 178 rescue => ex 179 @logger.debug("CONNECT #{host}:#{port}: failed `#{ex.message}'") 180 res.set_error(ex) 181 raise HTTPStatus::EOFError 182 ensure 183 if handler = @config[:ProxyContentHandler] 184 handler.call(req, res) 185 end 186 res.send_response(ua) 187 access_log(@config, req, res) 188 189 # Should clear request-line not to send the response twice. 190 # see: HTTPServer#run 191 req.parse(NullReader) rescue nil 192 end 193 194 begin 195 while fds = IO::select([ua, os]) 196 if fds[0].member?(ua) 197 buf = ua.sysread(1024); 198 @logger.debug("CONNECT: #{buf.bytesize} byte from User-Agent") 199 os.syswrite(buf) 200 elsif fds[0].member?(os) 201 buf = os.sysread(1024); 202 @logger.debug("CONNECT: #{buf.bytesize} byte from #{host}:#{port}") 203 ua.syswrite(buf) 204 end 205 end 206 rescue => ex 207 os.close 208 @logger.debug("CONNECT #{host}:#{port}: closed") 209 end 210 211 raise HTTPStatus::EOFError 212 end 213 214 def do_GET(req, res) 215 perform_proxy_request(req, res) do |http, path, header| 216 http.get(path, header) 217 end 218 end 219 220 def do_HEAD(req, res) 221 perform_proxy_request(req, res) do |http, path, header| 222 http.head(path, header) 223 end 224 end 225 226 def do_POST(req, res) 227 perform_proxy_request(req, res) do |http, path, header| 228 http.post(path, req.body || "", header) 229 end 230 end 231 232 def do_OPTIONS(req, res) 233 res['allow'] = "GET,HEAD,POST,OPTIONS,CONNECT" 234 end 235 236 private 237 238 # Some header fields should not be transferred. 239 HopByHop = %w( connection keep-alive proxy-authenticate upgrade 240 proxy-authorization te trailers transfer-encoding ) 241 ShouldNotTransfer = %w( set-cookie proxy-connection ) 242 def split_field(f) f ? f.split(/,\s+/).collect{|i| i.downcase } : [] end 243 244 def choose_header(src, dst) 245 connections = split_field(src['connection']) 246 src.each{|key, value| 247 key = key.downcase 248 if HopByHop.member?(key) || # RFC2616: 13.5.1 249 connections.member?(key) || # RFC2616: 14.10 250 ShouldNotTransfer.member?(key) # pragmatics 251 @logger.debug("choose_header: `#{key}: #{value}'") 252 next 253 end 254 dst[key] = value 255 } 256 end 257 258 # Net::HTTP is stupid about the multiple header fields. 259 # Here is workaround: 260 def set_cookie(src, dst) 261 if str = src['set-cookie'] 262 cookies = [] 263 str.split(/,\s*/).each{|token| 264 if /^[^=]+;/o =~ token 265 cookies[-1] << ", " << token 266 elsif /=/o =~ token 267 cookies << token 268 else 269 cookies[-1] << ", " << token 270 end 271 } 272 dst.cookies.replace(cookies) 273 end 274 end 275 276 def set_via(h) 277 if @config[:ProxyVia] 278 if h['via'] 279 h['via'] << ", " << @via 280 else 281 h['via'] = @via 282 end 283 end 284 end 285 286 def setup_proxy_header(req, res) 287 # Choose header fields to transfer 288 header = Hash.new 289 choose_header(req, header) 290 set_via(header) 291 return header 292 end 293 294 def setup_upstream_proxy_authentication(req, res, header) 295 if upstream = proxy_uri(req, res) 296 if upstream.userinfo 297 header['proxy-authorization'] = 298 "Basic " + [upstream.userinfo].pack("m").delete("\n") 299 end 300 return upstream 301 end 302 return FakeProxyURI 303 end 304 305 def perform_proxy_request(req, res) 306 uri = req.request_uri 307 path = uri.path.dup 308 path << "?" << uri.query if uri.query 309 header = setup_proxy_header(req, res) 310 upstream = setup_upstream_proxy_authentication(req, res, header) 311 response = nil 312 313 http = Net::HTTP.new(uri.host, uri.port, upstream.host, upstream.port) 314 http.start do 315 if @config[:ProxyTimeout] 316 ################################## these issues are 317 http.open_timeout = 30 # secs # necessary (maybe bacause 318 http.read_timeout = 60 # secs # Ruby's bug, but why?) 319 ################################## 320 end 321 response = yield(http, path, header) 322 end 323 324 # Persistent connection requirements are mysterious for me. 325 # So I will close the connection in every response. 326 res['proxy-connection'] = "close" 327 res['connection'] = "close" 328 329 # Convert Net::HTTP::HTTPResponse to WEBrick::HTTPResponse 330 res.status = response.code.to_i 331 choose_header(response, res) 332 set_cookie(response, res) 333 set_via(res) 334 res.body = response.body 335 end 336 337 # :stopdoc: 338 end 339end 340