1require 'rubygems'
2require 'rubygems/user_interaction'
3require 'cgi'
4require 'thread'
5require 'uri'
6require 'resolv'
7
8##
9# RemoteFetcher handles the details of fetching gems and gem information from
10# a remote source.
11
12class Gem::RemoteFetcher
13
14  include Gem::UserInteraction
15
16  ##
17  # A FetchError exception wraps up the various possible IO and HTTP failures
18  # that could happen while downloading from the internet.
19
20  class FetchError < Gem::Exception
21
22    ##
23    # The URI which was being accessed when the exception happened.
24
25    attr_accessor :uri
26
27    def initialize(message, uri)
28      super message
29      @uri = uri
30    end
31
32    def to_s # :nodoc:
33      "#{super} (#{uri})"
34    end
35
36  end
37
38  ##
39  # A FetchError that indicates that the reason for not being
40  # able to fetch data was that the host could not be contacted
41
42  class UnknownHostError < FetchError
43  end
44
45  @fetcher = nil
46
47  ##
48  # Cached RemoteFetcher instance.
49
50  def self.fetcher
51    @fetcher ||= self.new Gem.configuration[:http_proxy]
52  end
53
54  ##
55  # Initialize a remote fetcher using the source URI and possible proxy
56  # information.
57  #
58  # +proxy+
59  # * [String]: explicit specification of proxy; overrides any environment
60  #             variable setting
61  # * nil: respect environment variables (HTTP_PROXY, HTTP_PROXY_USER,
62  #        HTTP_PROXY_PASS)
63  # * <tt>:no_proxy</tt>: ignore environment variables and _don't_ use a proxy
64  #
65  # +dns+: An object to use for DNS resolution of the API endpoint.
66  #        By default, use Resolv::DNS.
67
68  def initialize(proxy=nil, dns=Resolv::DNS.new)
69    require 'net/http'
70    require 'stringio'
71    require 'time'
72    require 'uri'
73
74    Socket.do_not_reverse_lookup = true
75
76    @connections = {}
77    @connections_mutex = Mutex.new
78    @requests = Hash.new 0
79    @proxy_uri =
80      case proxy
81      when :no_proxy then nil
82      when nil then get_proxy_from_env
83      when URI::HTTP then proxy
84      else URI.parse(proxy)
85      end
86    @user_agent = user_agent
87    @env_no_proxy = get_no_proxy_from_env
88
89    @dns = dns
90  end
91
92  ##
93  #
94  # Given a source at +uri+, calculate what hostname to actually
95  # connect to query the data for it.
96
97  def api_endpoint(uri)
98    host = uri.host
99
100    begin
101      res = @dns.getresource "_rubygems._tcp.#{host}",
102                             Resolv::DNS::Resource::IN::SRV
103    rescue Resolv::ResolvError
104      uri
105    else
106      URI.parse "#{res.target}#{uri.path}"
107    end
108  end
109
110  ##
111  # Given a name and requirement, downloads this gem into cache and returns the
112  # filename. Returns nil if the gem cannot be located.
113  #--
114  # Should probably be integrated with #download below, but that will be a
115  # larger, more emcompassing effort. -erikh
116
117  def download_to_cache dependency
118    found, _ = Gem::SpecFetcher.fetcher.spec_for_dependency dependency
119
120    return if found.empty?
121
122    spec, source = found.sort_by { |(s,_)| s.version }.last
123
124    download spec, source.uri.to_s
125  end
126
127  ##
128  # Moves the gem +spec+ from +source_uri+ to the cache dir unless it is
129  # already there.  If the source_uri is local the gem cache dir copy is
130  # always replaced.
131
132  def download(spec, source_uri, install_dir = Gem.dir)
133    cache_dir =
134      if Dir.pwd == install_dir then # see fetch_command
135        install_dir
136      elsif File.writable? install_dir then
137        File.join install_dir, "cache"
138      else
139        File.join Gem.user_dir, "cache"
140      end
141
142    gem_file_name = File.basename spec.cache_file
143    local_gem_path = File.join cache_dir, gem_file_name
144
145    FileUtils.mkdir_p cache_dir rescue nil unless File.exist? cache_dir
146
147   # Always escape URI's to deal with potential spaces and such
148    unless URI::Generic === source_uri
149      source_uri = URI.parse(URI.const_defined?(:DEFAULT_PARSER) ?
150                             URI::DEFAULT_PARSER.escape(source_uri.to_s) :
151                             URI.escape(source_uri.to_s))
152    end
153
154    scheme = source_uri.scheme
155
156    # URI.parse gets confused by MS Windows paths with forward slashes.
157    scheme = nil if scheme =~ /^[a-z]$/i
158
159    # REFACTOR: split this up and dispatch on scheme (eg download_http)
160    # REFACTOR: be sure to clean up fake fetcher when you do this... cleaner
161    case scheme
162    when 'http', 'https' then
163      unless File.exist? local_gem_path then
164        begin
165          say "Downloading gem #{gem_file_name}" if
166            Gem.configuration.really_verbose
167
168          remote_gem_path = source_uri + "gems/#{gem_file_name}"
169
170          self.cache_update_path remote_gem_path, local_gem_path
171        rescue Gem::RemoteFetcher::FetchError
172          raise if spec.original_platform == spec.platform
173
174          alternate_name = "#{spec.original_name}.gem"
175
176          say "Failed, downloading gem #{alternate_name}" if
177            Gem.configuration.really_verbose
178
179          remote_gem_path = source_uri + "gems/#{alternate_name}"
180
181          self.cache_update_path remote_gem_path, local_gem_path
182        end
183      end
184    when 'file' then
185      begin
186        path = source_uri.path
187        path = File.dirname(path) if File.extname(path) == '.gem'
188
189        remote_gem_path = correct_for_windows_path(File.join(path, 'gems', gem_file_name))
190
191        FileUtils.cp(remote_gem_path, local_gem_path)
192      rescue Errno::EACCES
193        local_gem_path = source_uri.to_s
194      end
195
196      say "Using local gem #{local_gem_path}" if
197        Gem.configuration.really_verbose
198    when nil then # TODO test for local overriding cache
199      source_path = if Gem.win_platform? && source_uri.scheme &&
200                       !source_uri.path.include?(':') then
201                      "#{source_uri.scheme}:#{source_uri.path}"
202                    else
203                      source_uri.path
204                    end
205
206      source_path = unescape source_path
207
208      begin
209        FileUtils.cp source_path, local_gem_path unless
210          File.identical?(source_path, local_gem_path)
211      rescue Errno::EACCES
212        local_gem_path = source_uri.to_s
213      end
214
215      say "Using local gem #{local_gem_path}" if
216        Gem.configuration.really_verbose
217    else
218      raise ArgumentError, "unsupported URI scheme #{source_uri.scheme}"
219    end
220
221    local_gem_path
222  end
223
224  ##
225  # File Fetcher. Dispatched by +fetch_path+. Use it instead.
226
227  def fetch_file uri, *_
228    Gem.read_binary correct_for_windows_path uri.path
229  end
230
231  ##
232  # HTTP Fetcher. Dispatched by +fetch_path+. Use it instead.
233
234  def fetch_http uri, last_modified = nil, head = false, depth = 0
235    fetch_type = head ? Net::HTTP::Head : Net::HTTP::Get
236    response   = request uri, fetch_type, last_modified
237
238    case response
239    when Net::HTTPOK, Net::HTTPNotModified then
240      head ? response : response.body
241    when Net::HTTPMovedPermanently, Net::HTTPFound, Net::HTTPSeeOther,
242         Net::HTTPTemporaryRedirect then
243      raise FetchError.new('too many redirects', uri) if depth > 10
244
245      location = URI.parse response['Location']
246
247      if https?(uri) && !https?(location)
248        raise FetchError.new("redirecting to non-https resource: #{location}", uri)
249      end
250
251      fetch_http(location, last_modified, head, depth + 1)
252    else
253      raise FetchError.new("bad response #{response.message} #{response.code}", uri)
254    end
255  end
256
257  alias :fetch_https :fetch_http
258
259  ##
260  # Downloads +uri+ and returns it as a String.
261
262  def fetch_path(uri, mtime = nil, head = false)
263    uri = URI.parse uri unless URI::Generic === uri
264
265    raise ArgumentError, "bad uri: #{uri}" unless uri
266
267    unless uri.scheme
268      raise ArgumentError, "uri scheme is invalid: #{uri.scheme.inspect}"
269    end
270
271    data = send "fetch_#{uri.scheme}", uri, mtime, head
272
273    if data and !head and uri.to_s =~ /gz$/
274      begin
275        data = Gem.gunzip data
276      rescue Zlib::GzipFile::Error
277        raise FetchError.new("server did not return a valid file", uri.to_s)
278      end
279    end
280
281    data
282  rescue FetchError
283    raise
284  rescue Timeout::Error
285    raise UnknownHostError.new('timed out', uri.to_s)
286  rescue IOError, SocketError, SystemCallError => e
287    if e.message =~ /getaddrinfo/
288      raise UnknownHostError.new('no such name', uri.to_s)
289    else
290      raise FetchError.new("#{e.class}: #{e}", uri.to_s)
291    end
292  end
293
294  ##
295  # Downloads +uri+ to +path+ if necessary. If no path is given, it just
296  # passes the data.
297
298  def cache_update_path uri, path = nil, update = true
299    mtime = path && File.stat(path).mtime rescue nil
300
301    if mtime && Net::HTTPNotModified === fetch_path(uri, mtime, true)
302      Gem.read_binary(path)
303    else
304      data = fetch_path(uri)
305
306      if update and path then
307        open(path, 'wb') do |io|
308          io.write data
309        end
310      end
311
312      data
313    end
314  end
315
316  ##
317  # Returns the size of +uri+ in bytes.
318
319  def fetch_size(uri) # TODO: phase this out
320    response = fetch_path(uri, nil, true)
321
322    response['content-length'].to_i
323  end
324
325  def escape_auth_info(str)
326    str && CGI.escape(str)
327  end
328
329  def unescape_auth_info(str)
330    str && CGI.unescape(str)
331  end
332
333  def escape(str)
334    return unless str
335    @uri_parser ||= uri_escaper
336    @uri_parser.escape str
337  end
338
339  def unescape(str)
340    return unless str
341    @uri_parser ||= uri_escaper
342    @uri_parser.unescape str
343  end
344
345  def uri_escaper
346    URI::Parser.new
347  rescue NameError
348    URI
349  end
350
351  ##
352  # Returns list of no_proxy entries (if any) from the environment
353
354  def get_no_proxy_from_env
355    env_no_proxy = ENV['no_proxy'] || ENV['NO_PROXY']
356
357    return [] if env_no_proxy.nil?  or env_no_proxy.empty?
358
359    env_no_proxy.split(/\s*,\s*/)
360  end
361
362  ##
363  # Returns an HTTP proxy URI if one is set in the environment variables.
364
365  def get_proxy_from_env
366    env_proxy = ENV['http_proxy'] || ENV['HTTP_PROXY']
367
368    return nil if env_proxy.nil? or env_proxy.empty?
369
370    uri = URI.parse(normalize_uri(env_proxy))
371
372    if uri and uri.user.nil? and uri.password.nil? then
373      # Probably we have http_proxy_* variables?
374      uri.user = escape_auth_info(ENV['http_proxy_user'] || ENV['HTTP_PROXY_USER'])
375      uri.password = escape_auth_info(ENV['http_proxy_pass'] || ENV['HTTP_PROXY_PASS'])
376    end
377
378    uri
379  end
380
381  ##
382  # Normalize the URI by adding "http://" if it is missing.
383
384  def normalize_uri(uri)
385    (uri =~ /^(https?|ftp|file):/i) ? uri : "http://#{uri}"
386  end
387
388  ##
389  # Creates or an HTTP connection based on +uri+, or retrieves an existing
390  # connection, using a proxy if needed.
391
392  def connection_for(uri)
393    net_http_args = [uri.host, uri.port]
394
395    if @proxy_uri and not no_proxy?(uri.host) then
396      net_http_args += [
397        @proxy_uri.host,
398        @proxy_uri.port,
399        unescape_auth_info(@proxy_uri.user),
400        unescape_auth_info(@proxy_uri.password)
401      ]
402    end
403
404    connection_id = [Thread.current.object_id, *net_http_args].join ':'
405
406    connection = @connections_mutex.synchronize do
407      @connections[connection_id] ||= Net::HTTP.new(*net_http_args)
408      @connections[connection_id]
409    end
410
411    if https?(uri) and not connection.started? then
412      configure_connection_for_https(connection)
413    end
414
415    connection.start unless connection.started?
416
417    connection
418  rescue defined?(OpenSSL::SSL) ? OpenSSL::SSL::SSLError : Errno::EHOSTDOWN,
419         Errno::EHOSTDOWN => e
420    raise FetchError.new(e.message, uri)
421  end
422
423  def configure_connection_for_https(connection)
424    require 'net/https'
425    connection.use_ssl = true
426    connection.verify_mode =
427      Gem.configuration.ssl_verify_mode || OpenSSL::SSL::VERIFY_PEER
428    store = OpenSSL::X509::Store.new
429    if Gem.configuration.ssl_ca_cert
430      if File.directory? Gem.configuration.ssl_ca_cert
431        store.add_path Gem.configuration.ssl_ca_cert
432      else
433        store.add_file Gem.configuration.ssl_ca_cert
434      end
435    else
436      store.set_default_paths
437      add_rubygems_trusted_certs(store)
438    end
439    connection.cert_store = store
440  rescue LoadError => e
441    raise unless (e.respond_to?(:path) && e.path == 'openssl') ||
442                 e.message =~ / -- openssl$/
443
444    raise Gem::Exception.new(
445            'Unable to require openssl, install OpenSSL and rebuild ruby (preferred) or use non-HTTPS sources')
446  end
447
448  def add_rubygems_trusted_certs(store)
449    pattern = File.expand_path("./ssl_certs/*.pem", File.dirname(__FILE__))
450    Dir.glob(pattern).each do |ssl_cert_file|
451      store.add_file ssl_cert_file
452    end
453  end
454
455  def correct_for_windows_path(path)
456    if path[0].chr == '/' && path[1].chr =~ /[a-z]/i && path[2].chr == ':'
457      path = path[1..-1]
458    else
459      path
460    end
461  end
462
463  def no_proxy? host
464    host = host.downcase
465    @env_no_proxy.each do |pattern|
466      pattern = pattern.downcase
467      return true if host[-pattern.length, pattern.length ] == pattern
468    end
469    return false
470  end
471
472  ##
473  # Performs a Net::HTTP request of type +request_class+ on +uri+ returning
474  # a Net::HTTP response object.  request maintains a table of persistent
475  # connections to reduce connect overhead.
476
477  def request(uri, request_class, last_modified = nil)
478    request = request_class.new uri.request_uri
479
480    unless uri.nil? || uri.user.nil? || uri.user.empty? then
481      request.basic_auth uri.user, uri.password
482    end
483
484    request.add_field 'User-Agent', @user_agent
485    request.add_field 'Connection', 'keep-alive'
486    request.add_field 'Keep-Alive', '30'
487
488    if last_modified then
489      last_modified = last_modified.utc
490      request.add_field 'If-Modified-Since', last_modified.rfc2822
491    end
492
493    yield request if block_given?
494
495    connection = connection_for uri
496
497    retried = false
498    bad_response = false
499
500    begin
501      @requests[connection.object_id] += 1
502
503      say "#{request.method} #{uri}" if
504        Gem.configuration.really_verbose
505
506      file_name = File.basename(uri.path)
507      # perform download progress reporter only for gems
508      if request.response_body_permitted? && file_name =~ /\.gem$/
509        reporter = ui.download_reporter
510        response = connection.request(request) do |incomplete_response|
511          if Net::HTTPOK === incomplete_response
512            reporter.fetch(file_name, incomplete_response.content_length)
513            downloaded = 0
514            data = ''
515
516            incomplete_response.read_body do |segment|
517              data << segment
518              downloaded += segment.length
519              reporter.update(downloaded)
520            end
521            reporter.done
522            if incomplete_response.respond_to? :body=
523              incomplete_response.body = data
524            else
525              incomplete_response.instance_variable_set(:@body, data)
526            end
527          end
528        end
529      else
530        response = connection.request request
531      end
532
533      say "#{response.code} #{response.message}" if
534        Gem.configuration.really_verbose
535
536    rescue Net::HTTPBadResponse
537      say "bad response" if Gem.configuration.really_verbose
538
539      reset connection
540
541      raise FetchError.new('too many bad responses', uri) if bad_response
542
543      bad_response = true
544      retry
545    # HACK work around EOFError bug in Net::HTTP
546    # NOTE Errno::ECONNABORTED raised a lot on Windows, and make impossible
547    # to install gems.
548    rescue EOFError, Timeout::Error,
549           Errno::ECONNABORTED, Errno::ECONNRESET, Errno::EPIPE
550
551      requests = @requests[connection.object_id]
552      say "connection reset after #{requests} requests, retrying" if
553        Gem.configuration.really_verbose
554
555      raise FetchError.new('too many connection resets', uri) if retried
556
557      reset connection
558
559      retried = true
560      retry
561    end
562
563    response
564  end
565
566  ##
567  # Resets HTTP connection +connection+.
568
569  def reset(connection)
570    @requests.delete connection.object_id
571
572    connection.finish
573    connection.start
574  end
575
576  def user_agent
577    ua = "RubyGems/#{Gem::VERSION} #{Gem::Platform.local}"
578
579    ruby_version = RUBY_VERSION
580    ruby_version += 'dev' if RUBY_PATCHLEVEL == -1
581
582    ua << " Ruby/#{ruby_version} (#{RUBY_RELEASE_DATE}"
583    if RUBY_PATCHLEVEL >= 0 then
584      ua << " patchlevel #{RUBY_PATCHLEVEL}"
585    elsif defined?(RUBY_REVISION) then
586      ua << " revision #{RUBY_REVISION}"
587    end
588    ua << ")"
589
590    ua << " #{RUBY_ENGINE}" if defined?(RUBY_ENGINE) and RUBY_ENGINE != 'ruby'
591
592    ua
593  end
594
595  def https?(uri)
596    uri.scheme.downcase == 'https'
597  end
598
599end
600
601