1#
2# httpproxy.rb -- HTTPProxy Class
3#
4# Author: IPR -- Internet Programming with Ruby -- writers
5# Copyright (c) 2002 GOTO Kentaro
6# Copyright (c) 2002 Internet Programming with Ruby writers. All rights
7# reserved.
8#
9# $IPR: httpproxy.rb,v 1.18 2003/03/08 18:58:10 gotoyuzo Exp $
10# $kNotwork: straw.rb,v 1.3 2002/02/12 15:13:07 gotoken Exp $
11
12require "webrick/httpserver"
13require "net/http"
14
15Net::HTTP::version_1_2 if RUBY_VERSION < "1.7"
16
17module WEBrick
18
19  NullReader = Object.new # :nodoc:
20  class << NullReader # :nodoc:
21    def read(*args)
22      nil
23    end
24    alias gets read
25  end
26
27  FakeProxyURI = Object.new # :nodoc:
28  class << FakeProxyURI # :nodoc:
29    def method_missing(meth, *args)
30      if %w(scheme host port path query userinfo).member?(meth.to_s)
31        return nil
32      end
33      super
34    end
35  end
36
37  # :startdoc:
38
39  ##
40  # An HTTP Proxy server which proxies GET, HEAD and POST requests.
41  #
42  # To create a simple proxy server:
43  #
44  #   require 'webrick'
45  #   require 'webrick/httpproxy'
46  #
47  #   proxy = WEBrick::HTTPProxyServer.new Port: 8000
48  #
49  #   trap 'INT'  do proxy.shutdown end
50  #   trap 'TERM' do proxy.shutdown end
51  #
52  #   proxy.start
53  #
54  # See ::new for proxy-specific configuration items.
55  #
56  # == Modifying proxied responses
57  #
58  # To modify content the proxy server returns use the +:ProxyContentHandler+
59  # option:
60  #
61  #   handler = proc do |req, res|
62  #     if res['content-type'] == 'text/plain' then
63  #       res.body << "\nThis content was proxied!\n"
64  #     end
65  #   end
66  #
67  #   proxy =
68  #     WEBrick::HTTPProxyServer.new Port: 8000, ProxyContentHandler: handler
69
70  class HTTPProxyServer < HTTPServer
71
72    ##
73    # Proxy server configurations.  The proxy server handles the following
74    # configuration items in addition to those supported by HTTPServer:
75    #
76    # :ProxyAuthProc:: Called with a request and response to authorize a
77    #                  request
78    # :ProxyVia:: Appended to the via header
79    # :ProxyURI:: The proxy server's URI
80    # :ProxyContentHandler:: Called with a request and response and allows
81    #                        modification of the response
82    # :ProxyTimeout:: Sets the proxy timeouts to 30 seconds for open and 60
83    #                 seconds for read operations
84
85    def initialize(config={}, default=Config::HTTP)
86      super(config, default)
87      c = @config
88      @via = "#{c[:HTTPVersion]} #{c[:ServerName]}:#{c[:Port]}"
89    end
90
91    # :stopdoc:
92    def service(req, res)
93      if req.request_method == "CONNECT"
94        do_CONNECT(req, res)
95      elsif req.unparsed_uri =~ %r!^http://!
96        proxy_service(req, res)
97      else
98        super(req, res)
99      end
100    end
101
102    def proxy_auth(req, res)
103      if proc = @config[:ProxyAuthProc]
104        proc.call(req, res)
105      end
106      req.header.delete("proxy-authorization")
107    end
108
109    def proxy_uri(req, res)
110      # should return upstream proxy server's URI
111      return @config[:ProxyURI]
112    end
113
114    def proxy_service(req, res)
115      # Proxy Authentication
116      proxy_auth(req, res)
117
118      begin
119        self.send("do_#{req.request_method}", req, res)
120      rescue NoMethodError
121        raise HTTPStatus::MethodNotAllowed,
122          "unsupported method `#{req.request_method}'."
123      rescue => err
124        logger.debug("#{err.class}: #{err.message}")
125        raise HTTPStatus::ServiceUnavailable, err.message
126      end
127
128      # Process contents
129      if handler = @config[:ProxyContentHandler]
130        handler.call(req, res)
131      end
132    end
133
134    def do_CONNECT(req, res)
135      # Proxy Authentication
136      proxy_auth(req, res)
137
138      ua = Thread.current[:WEBrickSocket]  # User-Agent
139      raise HTTPStatus::InternalServerError,
140        "[BUG] cannot get socket" unless ua
141
142      host, port = req.unparsed_uri.split(":", 2)
143      # Proxy authentication for upstream proxy server
144      if proxy = proxy_uri(req, res)
145        proxy_request_line = "CONNECT #{host}:#{port} HTTP/1.0"
146        if proxy.userinfo
147          credentials = "Basic " + [proxy.userinfo].pack("m").delete("\n")
148        end
149        host, port = proxy.host, proxy.port
150      end
151
152      begin
153        @logger.debug("CONNECT: upstream proxy is `#{host}:#{port}'.")
154        os = TCPSocket.new(host, port)     # origin server
155
156        if proxy
157          @logger.debug("CONNECT: sending a Request-Line")
158          os << proxy_request_line << CRLF
159          @logger.debug("CONNECT: > #{proxy_request_line}")
160          if credentials
161            @logger.debug("CONNECT: sending a credentials")
162            os << "Proxy-Authorization: " << credentials << CRLF
163          end
164          os << CRLF
165          proxy_status_line = os.gets(LF)
166          @logger.debug("CONNECT: read a Status-Line form the upstream server")
167          @logger.debug("CONNECT: < #{proxy_status_line}")
168          if %r{^HTTP/\d+\.\d+\s+200\s*} =~ proxy_status_line
169            while line = os.gets(LF)
170              break if /\A(#{CRLF}|#{LF})\z/om =~ line
171            end
172          else
173            raise HTTPStatus::BadGateway
174          end
175        end
176        @logger.debug("CONNECT #{host}:#{port}: succeeded")
177        res.status = HTTPStatus::RC_OK
178      rescue => ex
179        @logger.debug("CONNECT #{host}:#{port}: failed `#{ex.message}'")
180        res.set_error(ex)
181        raise HTTPStatus::EOFError
182      ensure
183        if handler = @config[:ProxyContentHandler]
184          handler.call(req, res)
185        end
186        res.send_response(ua)
187        access_log(@config, req, res)
188
189        # Should clear request-line not to send the response twice.
190        # see: HTTPServer#run
191        req.parse(NullReader) rescue nil
192      end
193
194      begin
195        while fds = IO::select([ua, os])
196          if fds[0].member?(ua)
197            buf = ua.sysread(1024);
198            @logger.debug("CONNECT: #{buf.bytesize} byte from User-Agent")
199            os.syswrite(buf)
200          elsif fds[0].member?(os)
201            buf = os.sysread(1024);
202            @logger.debug("CONNECT: #{buf.bytesize} byte from #{host}:#{port}")
203            ua.syswrite(buf)
204          end
205        end
206      rescue => ex
207        os.close
208        @logger.debug("CONNECT #{host}:#{port}: closed")
209      end
210
211      raise HTTPStatus::EOFError
212    end
213
214    def do_GET(req, res)
215      perform_proxy_request(req, res) do |http, path, header|
216        http.get(path, header)
217      end
218    end
219
220    def do_HEAD(req, res)
221      perform_proxy_request(req, res) do |http, path, header|
222        http.head(path, header)
223      end
224    end
225
226    def do_POST(req, res)
227      perform_proxy_request(req, res) do |http, path, header|
228        http.post(path, req.body || "", header)
229      end
230    end
231
232    def do_OPTIONS(req, res)
233      res['allow'] = "GET,HEAD,POST,OPTIONS,CONNECT"
234    end
235
236    private
237
238    # Some header fields should not be transferred.
239    HopByHop = %w( connection keep-alive proxy-authenticate upgrade
240                   proxy-authorization te trailers transfer-encoding )
241    ShouldNotTransfer = %w( set-cookie proxy-connection )
242    def split_field(f) f ? f.split(/,\s+/).collect{|i| i.downcase } : [] end
243
244    def choose_header(src, dst)
245      connections = split_field(src['connection'])
246      src.each{|key, value|
247        key = key.downcase
248        if HopByHop.member?(key)          || # RFC2616: 13.5.1
249           connections.member?(key)       || # RFC2616: 14.10
250           ShouldNotTransfer.member?(key)    # pragmatics
251          @logger.debug("choose_header: `#{key}: #{value}'")
252          next
253        end
254        dst[key] = value
255      }
256    end
257
258    # Net::HTTP is stupid about the multiple header fields.
259    # Here is workaround:
260    def set_cookie(src, dst)
261      if str = src['set-cookie']
262        cookies = []
263        str.split(/,\s*/).each{|token|
264          if /^[^=]+;/o =~ token
265            cookies[-1] << ", " << token
266          elsif /=/o =~ token
267            cookies << token
268          else
269            cookies[-1] << ", " << token
270          end
271        }
272        dst.cookies.replace(cookies)
273      end
274    end
275
276    def set_via(h)
277      if @config[:ProxyVia]
278        if  h['via']
279          h['via'] << ", " << @via
280        else
281          h['via'] = @via
282        end
283      end
284    end
285
286    def setup_proxy_header(req, res)
287      # Choose header fields to transfer
288      header = Hash.new
289      choose_header(req, header)
290      set_via(header)
291      return header
292    end
293
294    def setup_upstream_proxy_authentication(req, res, header)
295      if upstream = proxy_uri(req, res)
296        if upstream.userinfo
297          header['proxy-authorization'] =
298            "Basic " + [upstream.userinfo].pack("m").delete("\n")
299        end
300        return upstream
301      end
302      return FakeProxyURI
303    end
304
305    def perform_proxy_request(req, res)
306      uri = req.request_uri
307      path = uri.path.dup
308      path << "?" << uri.query if uri.query
309      header = setup_proxy_header(req, res)
310      upstream = setup_upstream_proxy_authentication(req, res, header)
311      response = nil
312
313      http = Net::HTTP.new(uri.host, uri.port, upstream.host, upstream.port)
314      http.start do
315        if @config[:ProxyTimeout]
316          ##################################   these issues are
317          http.open_timeout = 30   # secs  #   necessary (maybe bacause
318          http.read_timeout = 60   # secs  #   Ruby's bug, but why?)
319          ##################################
320        end
321        response = yield(http, path, header)
322      end
323
324      # Persistent connection requirements are mysterious for me.
325      # So I will close the connection in every response.
326      res['proxy-connection'] = "close"
327      res['connection'] = "close"
328
329      # Convert Net::HTTP::HTTPResponse to WEBrick::HTTPResponse
330      res.status = response.code.to_i
331      choose_header(response, res)
332      set_cookie(response, res)
333      set_via(res)
334      res.body = response.body
335    end
336
337    # :stopdoc:
338  end
339end
340