1# = uri/generic.rb
2#
3# Author:: Akira Yamada <akira@ruby-lang.org>
4# License:: You can redistribute it and/or modify it under the same term as Ruby.
5# Revision:: $Id: generic.rb 43062 2013-09-26 16:08:11Z nagachika $
6#
7# See URI for general documentation
8#
9
10require 'uri/common'
11
12module URI
13
14  #
15  # Base class for all URI classes.
16  # Implements generic URI syntax as per RFC 2396.
17  #
18  class Generic
19    include URI
20
21    #
22    # A Default port of nil for URI::Generic
23    #
24    DEFAULT_PORT = nil
25
26    #
27    # Returns default port
28    #
29    def self.default_port
30      self::DEFAULT_PORT
31    end
32
33    #
34    # Returns default port
35    #
36    def default_port
37      self.class.default_port
38    end
39
40    #
41    # An Array of the available components for URI::Generic
42    #
43    COMPONENT = [
44      :scheme,
45      :userinfo, :host, :port, :registry,
46      :path, :opaque,
47      :query,
48      :fragment
49    ].freeze
50
51    #
52    # Components of the URI in the order.
53    #
54    def self.component
55      self::COMPONENT
56    end
57
58    #
59    # Default to not use the registry for a URI::Generic
60    #
61    USE_REGISTRY = false
62
63    #
64    # Returns whether a registry of naming
65    # authorities are being used.
66    #
67    def self.use_registry
68      self::USE_REGISTRY
69    end
70
71    #
72    # == Synopsis
73    #
74    # See #new
75    #
76    # == Description
77    #
78    # At first, tries to create a new URI::Generic instance using
79    # URI::Generic::build. But, if exception URI::InvalidComponentError is raised,
80    # then it URI::Escape.escape all URI components and tries again.
81    #
82    #
83    def self.build2(args)
84      begin
85        return self.build(args)
86      rescue InvalidComponentError
87        if args.kind_of?(Array)
88          return self.build(args.collect{|x|
89            if x.is_a?(String)
90              DEFAULT_PARSER.escape(x)
91            else
92              x
93            end
94          })
95        elsif args.kind_of?(Hash)
96          tmp = {}
97          args.each do |key, value|
98            tmp[key] = if value
99                DEFAULT_PARSER.escape(value)
100              else
101                value
102              end
103          end
104          return self.build(tmp)
105        end
106      end
107    end
108
109    #
110    # == Synopsis
111    #
112    # See #new
113    #
114    # == Description
115    #
116    # Creates a new URI::Generic instance from components of URI::Generic
117    # with check.  Components are: scheme, userinfo, host, port, registry, path,
118    # opaque, query and fragment. You can provide arguments either by an Array or a Hash.
119    # See #new for hash keys to use or for order of array items.
120    #
121    def self.build(args)
122      if args.kind_of?(Array) &&
123          args.size == ::URI::Generic::COMPONENT.size
124        tmp = args.dup
125      elsif args.kind_of?(Hash)
126        tmp = ::URI::Generic::COMPONENT.collect do |c|
127          if args.include?(c)
128            args[c]
129          else
130            nil
131          end
132        end
133      else
134        component = self.class.component rescue ::URI::Generic::COMPONENT
135        raise ArgumentError,
136        "expected Array of or Hash of components of #{self.class} (#{component.join(', ')})"
137      end
138
139      tmp << nil
140      tmp << true
141      return self.new(*tmp)
142    end
143    #
144    # == Args
145    #
146    # +scheme+::
147    #   Protocol scheme, i.e. 'http','ftp','mailto' and so on.
148    # +userinfo+::
149    #   User name and password, i.e. 'sdmitry:bla'
150    # +host+::
151    #   Server host name
152    # +port+::
153    #   Server port
154    # +registry+::
155    #   Registry of naming authorities.
156    # +path+::
157    #   Path on server
158    # +opaque+::
159    #   Opaque part
160    # +query+::
161    #   Query data
162    # +fragment+::
163    #   A part of URI after '#' sign
164    # +parser+::
165    #   Parser for internal use [URI::DEFAULT_PARSER by default]
166    # +arg_check+::
167    #   Check arguments [false by default]
168    #
169    # == Description
170    #
171    # Creates a new URI::Generic instance from ``generic'' components without check.
172    #
173    def initialize(scheme,
174                   userinfo, host, port, registry,
175                   path, opaque,
176                   query,
177                   fragment,
178                   parser = DEFAULT_PARSER,
179                   arg_check = false)
180      @scheme = nil
181      @user = nil
182      @password = nil
183      @host = nil
184      @port = nil
185      @path = nil
186      @query = nil
187      @opaque = nil
188      @registry = nil
189      @fragment = nil
190      @parser = parser == DEFAULT_PARSER ? nil : parser
191
192      if arg_check
193        self.scheme = scheme
194        self.userinfo = userinfo
195        self.host = host
196        self.port = port
197        self.path = path
198        self.query = query
199        self.opaque = opaque
200        self.registry = registry
201        self.fragment = fragment
202      else
203        self.set_scheme(scheme)
204        self.set_userinfo(userinfo)
205        self.set_host(host)
206        self.set_port(port)
207        self.set_path(path)
208        self.set_query(query)
209        self.set_opaque(opaque)
210        self.set_registry(registry)
211        self.set_fragment(fragment)
212      end
213      if @registry && !self.class.use_registry
214        raise InvalidURIError,
215          "the scheme #{@scheme} does not accept registry part: #{@registry} (or bad hostname?)"
216      end
217
218      @scheme.freeze if @scheme
219      self.set_path('') if !@path && !@opaque # (see RFC2396 Section 5.2)
220      self.set_port(self.default_port) if self.default_port && !@port
221    end
222
223    #
224    # returns the scheme component of the URI.
225    #
226    #   URI("http://foo/bar/baz").scheme #=> "http"
227    #
228    attr_reader :scheme
229
230    # returns the host component of the URI.
231    #
232    #   URI("http://foo/bar/baz").host #=> "foo"
233    #
234    # It returns nil if no host component.
235    #
236    #   URI("mailto:foo@example.org").host #=> nil
237    #
238    # The component doesn't contains the port number.
239    #
240    #   URI("http://foo:8080/bar/baz").host #=> "foo"
241    #
242    # Since IPv6 addresses are wrapped by brackets in URIs,
243    # this method returns IPv6 addresses wrapped by brackets.
244    # This form is not appropriate to pass socket methods such as TCPSocket.open.
245    # If unwrapped host names are required, use "hostname" method.
246    #
247    #   URI("http://[::1]/bar/baz").host #=> "[::1]"
248    #   URI("http://[::1]/bar/baz").hostname #=> "::1"
249    #
250    attr_reader :host
251
252    # returns the port component of the URI.
253    #
254    #   URI("http://foo/bar/baz").port #=> "80"
255    #
256    #   URI("http://foo:8080/bar/baz").port #=> "8080"
257    #
258    attr_reader :port
259
260    # returns the registry component of the URI.
261    #
262    #  (see RFC2396 Section 3.2)
263    #
264    attr_reader :registry
265
266    # returns the path component of the URI.
267    #
268    #   URI("http://foo/bar/baz").path #=> "/bar/baz"
269    #
270    attr_reader :path
271
272    # returns the query component of the URI.
273    #
274    #   URI("http://foo/bar/baz?search=FooBar").query #=> "search=FooBar"
275    #
276    attr_reader :query
277
278    # returns the opaque part of the URI.
279    #
280    #   URI("mailto:foo@example.org").opaque #=> "foo@example.org"
281    #
282    # Portion of the path that does make use of the slash '/'.
283    # The path typically refers to the absolute path and the opaque part.
284    #  (see RFC2396 Section 3 and 5.2)
285    #
286    attr_reader :opaque
287
288    # returns the fragment component of the URI.
289    #
290    #   URI("http://foo/bar/baz?search=FooBar#ponies").fragment #=> "ponies"
291    #
292    attr_reader :fragment
293
294    # returns the parser to be used.
295    #
296    # Unless a URI::Parser is defined, then DEFAULT_PARSER is used.
297    #
298    def parser
299      if !defined?(@parser) || !@parser
300        DEFAULT_PARSER
301      else
302        @parser || DEFAULT_PARSER
303      end
304    end
305
306    # replace self by other URI object
307    def replace!(oth)
308      if self.class != oth.class
309        raise ArgumentError, "expected #{self.class} object"
310      end
311
312      component.each do |c|
313        self.__send__("#{c}=", oth.__send__(c))
314      end
315    end
316    private :replace!
317
318    #
319    # Components of the URI in the order.
320    #
321    def component
322      self.class.component
323    end
324
325    #
326    # check the scheme +v+ component against the URI::Parser Regexp for :SCHEME
327    #
328    def check_scheme(v)
329      if v && parser.regexp[:SCHEME] !~ v
330        raise InvalidComponentError,
331          "bad component(expected scheme component): #{v}"
332      end
333
334      return true
335    end
336    private :check_scheme
337
338    # protected setter for the scheme component +v+
339    #
340    # see also URI::Generic.scheme=
341    #
342    def set_scheme(v)
343      @scheme = v ? v.downcase : v
344    end
345    protected :set_scheme
346
347    #
348    # == Args
349    #
350    # +v+::
351    #    String
352    #
353    # == Description
354    #
355    # public setter for the scheme component +v+.
356    # (with validation)
357    #
358    # see also URI::Generic.check_scheme
359    #
360    # == Usage
361    #
362    #   require 'uri'
363    #
364    #   uri = URI.parse("http://my.example.com")
365    #   uri.scheme = "https"
366    #   # =>  "https"
367    #   uri
368    #   #=> #<URI::HTTP:0x000000008e89e8 URL:https://my.example.com>
369    #
370    def scheme=(v)
371      check_scheme(v)
372      set_scheme(v)
373      v
374    end
375
376    #
377    # check the +user+ and +password+.
378    #
379    # If +password+ is not provided, then +user+ is
380    # split, using URI::Generic.split_userinfo, to
381    # pull +user+ and +password.
382    #
383    # see also URI::Generic.check_user, URI::Generic.check_password
384    #
385    def check_userinfo(user, password = nil)
386      if !password
387        user, password = split_userinfo(user)
388      end
389      check_user(user)
390      check_password(password, user)
391
392      return true
393    end
394    private :check_userinfo
395
396    #
397    # check the user +v+ component for RFC2396 compliance
398    # and against the URI::Parser Regexp for :USERINFO
399    #
400    # Can not have a registry or opaque component defined,
401    # with a user component defined.
402    #
403    def check_user(v)
404      if @registry || @opaque
405        raise InvalidURIError,
406          "can not set user with registry or opaque"
407      end
408
409      return v unless v
410
411      if parser.regexp[:USERINFO] !~ v
412        raise InvalidComponentError,
413          "bad component(expected userinfo component or user component): #{v}"
414      end
415
416      return true
417    end
418    private :check_user
419
420    #
421    # check the password +v+ component for RFC2396 compliance
422    # and against the URI::Parser Regexp for :USERINFO
423    #
424    # Can not have a registry or opaque component defined,
425    # with a user component defined.
426    #
427    def check_password(v, user = @user)
428      if @registry || @opaque
429        raise InvalidURIError,
430          "can not set password with registry or opaque"
431      end
432      return v unless v
433
434      if !user
435        raise InvalidURIError,
436          "password component depends user component"
437      end
438
439      if parser.regexp[:USERINFO] !~ v
440        raise InvalidComponentError,
441          "bad component(expected user component): #{v}"
442      end
443
444      return true
445    end
446    private :check_password
447
448    #
449    # Sets userinfo, argument is string like 'name:pass'
450    #
451    def userinfo=(userinfo)
452      if userinfo.nil?
453        return nil
454      end
455      check_userinfo(*userinfo)
456      set_userinfo(*userinfo)
457      # returns userinfo
458    end
459
460    #
461    # == Args
462    #
463    # +v+::
464    #    String
465    #
466    # == Description
467    #
468    # public setter for the +user+ component.
469    # (with validation)
470    #
471    # see also URI::Generic.check_user
472    #
473    # == Usage
474    #
475    #   require 'uri'
476    #
477    #   uri = URI.parse("http://john:S3nsit1ve@my.example.com")
478    #   uri.user = "sam"
479    #   # =>  "sam"
480    #   uri
481    #   #=> #<URI::HTTP:0x00000000881d90 URL:http://sam:V3ry_S3nsit1ve@my.example.com>
482    #
483    def user=(user)
484      check_user(user)
485      set_user(user)
486      # returns user
487    end
488
489    #
490    # == Args
491    #
492    # +v+::
493    #    String
494    #
495    # == Description
496    #
497    # public setter for the +password+ component.
498    # (with validation)
499    #
500    # see also URI::Generic.check_password
501    #
502    # == Usage
503    #
504    #   require 'uri'
505    #
506    #   uri = URI.parse("http://john:S3nsit1ve@my.example.com")
507    #   uri.password = "V3ry_S3nsit1ve"
508    #   # =>  "V3ry_S3nsit1ve"
509    #   uri
510    #   #=> #<URI::HTTP:0x00000000881d90 URL:http://john:V3ry_S3nsit1ve@my.example.com>
511    #
512    def password=(password)
513      check_password(password)
514      set_password(password)
515      # returns password
516    end
517
518    # protect setter for the +user+ component, and +password+ if available.
519    # (with validation)
520    #
521    # see also URI::Generic.userinfo=
522    #
523    def set_userinfo(user, password = nil)
524      unless password
525        user, password = split_userinfo(user)
526      end
527      @user     = user
528      @password = password if password
529
530      [@user, @password]
531    end
532    protected :set_userinfo
533
534    # protected setter for the user component +v+
535    #
536    # see also URI::Generic.user=
537    #
538    def set_user(v)
539      set_userinfo(v, @password)
540      v
541    end
542    protected :set_user
543
544    # protected setter for the password component +v+
545    #
546    # see also URI::Generic.password=
547    #
548    def set_password(v)
549      @password = v
550      # returns v
551    end
552    protected :set_password
553
554    # returns the userinfo +ui+ as user, password
555    # if properly formated as 'user:password'
556    def split_userinfo(ui)
557      return nil, nil unless ui
558      user, password = ui.split(/:/, 2)
559
560      return user, password
561    end
562    private :split_userinfo
563
564    # escapes 'user:password' +v+ based on RFC 1738 section 3.1
565    def escape_userpass(v)
566      v = parser.escape(v, /[@:\/]/o) # RFC 1738 section 3.1 #/
567    end
568    private :escape_userpass
569
570    # returns the userinfo, either as 'user' or 'user:password'
571    def userinfo
572      if @user.nil?
573        nil
574      elsif @password.nil?
575        @user
576      else
577        @user + ':' + @password
578      end
579    end
580
581    # returns the user component
582    def user
583      @user
584    end
585
586    # returns the password component
587    def password
588      @password
589    end
590
591    #
592    # check the host +v+ component for RFC2396 compliance
593    # and against the URI::Parser Regexp for :HOST
594    #
595    # Can not have a registry or opaque component defined,
596    # with a host component defined.
597    #
598    def check_host(v)
599      return v unless v
600
601      if @registry || @opaque
602        raise InvalidURIError,
603          "can not set host with registry or opaque"
604      elsif parser.regexp[:HOST] !~ v
605        raise InvalidComponentError,
606          "bad component(expected host component): #{v}"
607      end
608
609      return true
610    end
611    private :check_host
612
613    # protected setter for the host component +v+
614    #
615    # see also URI::Generic.host=
616    #
617    def set_host(v)
618      @host = v
619    end
620    protected :set_host
621
622    #
623    # == Args
624    #
625    # +v+::
626    #    String
627    #
628    # == Description
629    #
630    # public setter for the host component +v+.
631    # (with validation)
632    #
633    # see also URI::Generic.check_host
634    #
635    # == Usage
636    #
637    #   require 'uri'
638    #
639    #   uri = URI.parse("http://my.example.com")
640    #   uri.host = "foo.com"
641    #   # =>  "foo.com"
642    #   uri
643    #   #=> #<URI::HTTP:0x000000008e89e8 URL:http://foo.com>
644    #
645    def host=(v)
646      check_host(v)
647      set_host(v)
648      v
649    end
650
651    # extract the host part of the URI and unwrap brackets for IPv6 addresses.
652    #
653    # This method is same as URI::Generic#host except
654    # brackets for IPv6 (andn future IP) addresses are removed.
655    #
656    # u = URI("http://[::1]/bar")
657    # p u.hostname      #=> "::1"
658    # p u.host          #=> "[::1]"
659    #
660    def hostname
661      v = self.host
662      /\A\[(.*)\]\z/ =~ v ? $1 : v
663    end
664
665    # set the host part of the URI as the argument with brackets for IPv6 addresses.
666    #
667    # This method is same as URI::Generic#host= except
668    # the argument can be bare IPv6 address.
669    #
670    # u = URI("http://foo/bar")
671    # p u.to_s                  #=> "http://foo/bar"
672    # u.hostname = "::1"
673    # p u.to_s                  #=> "http://[::1]/bar"
674    #
675    # If the arugument seems IPv6 address,
676    # it is wrapped by brackets.
677    #
678    def hostname=(v)
679      v = "[#{v}]" if /\A\[.*\]\z/ !~ v && /:/ =~ v
680      self.host = v
681    end
682
683    #
684    # check the port +v+ component for RFC2396 compliance
685    # and against the URI::Parser Regexp for :PORT
686    #
687    # Can not have a registry or opaque component defined,
688    # with a port component defined.
689    #
690    def check_port(v)
691      return v unless v
692
693      if @registry || @opaque
694        raise InvalidURIError,
695          "can not set port with registry or opaque"
696      elsif !v.kind_of?(Fixnum) && parser.regexp[:PORT] !~ v
697        raise InvalidComponentError,
698          "bad component(expected port component): #{v}"
699      end
700
701      return true
702    end
703    private :check_port
704
705    # protected setter for the port component +v+
706    #
707    # see also URI::Generic.port=
708    #
709    def set_port(v)
710      unless !v || v.kind_of?(Fixnum)
711        if v.empty?
712          v = nil
713        else
714          v = v.to_i
715        end
716      end
717      @port = v
718    end
719    protected :set_port
720
721    #
722    # == Args
723    #
724    # +v+::
725    #    String
726    #
727    # == Description
728    #
729    # public setter for the port component +v+.
730    # (with validation)
731    #
732    # see also URI::Generic.check_port
733    #
734    # == Usage
735    #
736    #   require 'uri'
737    #
738    #   uri = URI.parse("http://my.example.com")
739    #   uri.port = 8080
740    #   # =>  8080
741    #   uri
742    #   #=> #<URI::HTTP:0x000000008e89e8 URL:http://my.example.com:8080>
743    #
744    def port=(v)
745      check_port(v)
746      set_port(v)
747      port
748    end
749
750    #
751    # check the registry +v+ component for RFC2396 compliance
752    # and against the URI::Parser Regexp for :REGISTRY
753    #
754    # Can not have a host, port or user component defined,
755    # with a registry component defined.
756    #
757    def check_registry(v)
758      return v unless v
759
760      # raise if both server and registry are not nil, because:
761      # authority     = server | reg_name
762      # server        = [ [ userinfo "@" ] hostport ]
763      if @host || @port || @user # userinfo = @user + ':' + @password
764        raise InvalidURIError,
765          "can not set registry with host, port, or userinfo"
766      elsif v && parser.regexp[:REGISTRY] !~ v
767        raise InvalidComponentError,
768          "bad component(expected registry component): #{v}"
769      end
770
771      return true
772    end
773    private :check_registry
774
775    # protected setter for the registry component +v+
776    #
777    # see also URI::Generic.registry=
778    #
779    def set_registry(v)
780      @registry = v
781    end
782    protected :set_registry
783
784    #
785    # == Args
786    #
787    # +v+::
788    #    String
789    #
790    # == Description
791    #
792    # public setter for the registry component +v+.
793    # (with validation)
794    #
795    # see also URI::Generic.check_registry
796    #
797    def registry=(v)
798      check_registry(v)
799      set_registry(v)
800      v
801    end
802
803    #
804    # check the path +v+ component for RFC2396 compliance
805    # and against the URI::Parser Regexp
806    # for :ABS_PATH and :REL_PATH
807    #
808    # Can not have a opaque component defined,
809    # with a path component defined.
810    #
811    def check_path(v)
812      # raise if both hier and opaque are not nil, because:
813      # absoluteURI   = scheme ":" ( hier_part | opaque_part )
814      # hier_part     = ( net_path | abs_path ) [ "?" query ]
815      if v && @opaque
816        raise InvalidURIError,
817          "path conflicts with opaque"
818      end
819
820      # If scheme is ftp, path may be relative.
821      # See RFC 1738 section 3.2.2, and RFC 2396.
822      if @scheme && @scheme != "ftp"
823        if v && v != '' && parser.regexp[:ABS_PATH] !~ v
824          raise InvalidComponentError,
825            "bad component(expected absolute path component): #{v}"
826        end
827      else
828        if v && v != '' && parser.regexp[:ABS_PATH] !~ v && parser.regexp[:REL_PATH] !~ v
829          raise InvalidComponentError,
830            "bad component(expected relative path component): #{v}"
831        end
832      end
833
834      return true
835    end
836    private :check_path
837
838    # protected setter for the path component +v+
839    #
840    # see also URI::Generic.path=
841    #
842    def set_path(v)
843      @path = v
844    end
845    protected :set_path
846
847    #
848    # == Args
849    #
850    # +v+::
851    #    String
852    #
853    # == Description
854    #
855    # public setter for the path component +v+.
856    # (with validation)
857    #
858    # see also URI::Generic.check_path
859    #
860    # == Usage
861    #
862    #   require 'uri'
863    #
864    #   uri = URI.parse("http://my.example.com/pub/files")
865    #   uri.path = "/faq/"
866    #   # =>  "/faq/"
867    #   uri
868    #   #=> #<URI::HTTP:0x000000008e89e8 URL:http://my.example.com/faq/>
869    #
870    def path=(v)
871      check_path(v)
872      set_path(v)
873      v
874    end
875
876    #
877    # check the query +v+ component for RFC2396 compliance
878    # and against the URI::Parser Regexp for :QUERY
879    #
880    # Can not have a opaque component defined,
881    # with a query component defined.
882    #
883    def check_query(v)
884      return v unless v
885
886      # raise if both hier and opaque are not nil, because:
887      # absoluteURI   = scheme ":" ( hier_part | opaque_part )
888      # hier_part     = ( net_path | abs_path ) [ "?" query ]
889      if @opaque
890        raise InvalidURIError,
891          "query conflicts with opaque"
892      end
893
894      if v && v != '' && parser.regexp[:QUERY] !~ v
895          raise InvalidComponentError,
896            "bad component(expected query component): #{v}"
897      end
898
899      return true
900    end
901    private :check_query
902
903    # protected setter for the query component +v+
904    #
905    # see also URI::Generic.query=
906    #
907    def set_query(v)
908      @query = v
909    end
910    protected :set_query
911
912    #
913    # == Args
914    #
915    # +v+::
916    #    String
917    #
918    # == Description
919    #
920    # public setter for the query component +v+.
921    # (with validation)
922    #
923    # see also URI::Generic.check_query
924    #
925    # == Usage
926    #
927    #   require 'uri'
928    #
929    #   uri = URI.parse("http://my.example.com/?id=25")
930    #   uri.query = "id=1"
931    #   # =>  "id=1"
932    #   uri
933    #   #=> #<URI::HTTP:0x000000008e89e8 URL:http://my.example.com/?id=1>
934    #
935    def query=(v)
936      check_query(v)
937      set_query(v)
938      v
939    end
940
941    #
942    # check the opaque +v+ component for RFC2396 compliance and
943    # against the URI::Parser Regexp for :OPAQUE
944    #
945    # Can not have a host, port, user or path component defined,
946    # with an opaque component defined.
947    #
948    def check_opaque(v)
949      return v unless v
950
951      # raise if both hier and opaque are not nil, because:
952      # absoluteURI   = scheme ":" ( hier_part | opaque_part )
953      # hier_part     = ( net_path | abs_path ) [ "?" query ]
954      if @host || @port || @user || @path  # userinfo = @user + ':' + @password
955        raise InvalidURIError,
956          "can not set opaque with host, port, userinfo or path"
957      elsif v && parser.regexp[:OPAQUE] !~ v
958        raise InvalidComponentError,
959          "bad component(expected opaque component): #{v}"
960      end
961
962      return true
963    end
964    private :check_opaque
965
966    # protected setter for the opaque component +v+
967    #
968    # see also URI::Generic.opaque=
969    #
970    def set_opaque(v)
971      @opaque = v
972    end
973    protected :set_opaque
974
975    #
976    # == Args
977    #
978    # +v+::
979    #    String
980    #
981    # == Description
982    #
983    # public setter for the opaque component +v+.
984    # (with validation)
985    #
986    # see also URI::Generic.check_opaque
987    #
988    def opaque=(v)
989      check_opaque(v)
990      set_opaque(v)
991      v
992    end
993
994    #
995    # check the fragment +v+ component against the URI::Parser Regexp for :FRAGMENT
996    #
997    def check_fragment(v)
998      return v unless v
999
1000      if v && v != '' && parser.regexp[:FRAGMENT] !~ v
1001        raise InvalidComponentError,
1002          "bad component(expected fragment component): #{v}"
1003      end
1004
1005      return true
1006    end
1007    private :check_fragment
1008
1009    # protected setter for the fragment component +v+
1010    #
1011    # see also URI::Generic.fragment=
1012    #
1013    def set_fragment(v)
1014      @fragment = v
1015    end
1016    protected :set_fragment
1017
1018    #
1019    # == Args
1020    #
1021    # +v+::
1022    #    String
1023    #
1024    # == Description
1025    #
1026    # public setter for the fragment component +v+.
1027    # (with validation)
1028    #
1029    # see also URI::Generic.check_fragment
1030    #
1031    # == Usage
1032    #
1033    #   require 'uri'
1034    #
1035    #   uri = URI.parse("http://my.example.com/?id=25#time=1305212049")
1036    #   uri.fragment = "time=1305212086"
1037    #   # =>  "time=1305212086"
1038    #   uri
1039    #   #=> #<URI::HTTP:0x000000007a81f8 URL:http://my.example.com/?id=25#time=1305212086>
1040    #
1041    def fragment=(v)
1042      check_fragment(v)
1043      set_fragment(v)
1044      v
1045    end
1046
1047    #
1048    # Checks if URI has a path
1049    #
1050    def hierarchical?
1051      if @path
1052        true
1053      else
1054        false
1055      end
1056    end
1057
1058    #
1059    # Checks if URI is an absolute one
1060    #
1061    def absolute?
1062      if @scheme
1063        true
1064      else
1065        false
1066      end
1067    end
1068    alias absolute absolute?
1069
1070    #
1071    # Checks if URI is relative
1072    #
1073    def relative?
1074      !absolute?
1075    end
1076
1077    #
1078    # returns an Array of the path split on '/'
1079    #
1080    def split_path(path)
1081      path.split(%r{/+}, -1)
1082    end
1083    private :split_path
1084
1085    #
1086    # Merges a base path +base+, with relative path +rel+,
1087    # returns a modified base path.
1088    #
1089    def merge_path(base, rel)
1090
1091      # RFC2396, Section 5.2, 5)
1092      # RFC2396, Section 5.2, 6)
1093      base_path = split_path(base)
1094      rel_path  = split_path(rel)
1095
1096      # RFC2396, Section 5.2, 6), a)
1097      base_path << '' if base_path.last == '..'
1098      while i = base_path.index('..')
1099        base_path.slice!(i - 1, 2)
1100      end
1101
1102      if (first = rel_path.first) and first.empty?
1103        base_path.clear
1104        rel_path.shift
1105      end
1106
1107      # RFC2396, Section 5.2, 6), c)
1108      # RFC2396, Section 5.2, 6), d)
1109      rel_path.push('') if rel_path.last == '.' || rel_path.last == '..'
1110      rel_path.delete('.')
1111
1112      # RFC2396, Section 5.2, 6), e)
1113      tmp = []
1114      rel_path.each do |x|
1115        if x == '..' &&
1116            !(tmp.empty? || tmp.last == '..')
1117          tmp.pop
1118        else
1119          tmp << x
1120        end
1121      end
1122
1123      add_trailer_slash = !tmp.empty?
1124      if base_path.empty?
1125        base_path = [''] # keep '/' for root directory
1126      elsif add_trailer_slash
1127        base_path.pop
1128      end
1129      while x = tmp.shift
1130        if x == '..'
1131          # RFC2396, Section 4
1132          # a .. or . in an absolute path has no special meaning
1133          base_path.pop if base_path.size > 1
1134        else
1135          # if x == '..'
1136          #   valid absolute (but abnormal) path "/../..."
1137          # else
1138          #   valid absolute path
1139          # end
1140          base_path << x
1141          tmp.each {|t| base_path << t}
1142          add_trailer_slash = false
1143          break
1144        end
1145      end
1146      base_path.push('') if add_trailer_slash
1147
1148      return base_path.join('/')
1149    end
1150    private :merge_path
1151
1152    #
1153    # == Args
1154    #
1155    # +oth+::
1156    #    URI or String
1157    #
1158    # == Description
1159    #
1160    # Destructive form of #merge
1161    #
1162    # == Usage
1163    #
1164    #   require 'uri'
1165    #
1166    #   uri = URI.parse("http://my.example.com")
1167    #   uri.merge!("/main.rbx?page=1")
1168    #   p uri
1169    #   # =>  #<URI::HTTP:0x2021f3b0 URL:http://my.example.com/main.rbx?page=1>
1170    #
1171    def merge!(oth)
1172      t = merge(oth)
1173      if self == t
1174        nil
1175      else
1176        replace!(t)
1177        self
1178      end
1179    end
1180
1181    #
1182    # == Args
1183    #
1184    # +oth+::
1185    #    URI or String
1186    #
1187    # == Description
1188    #
1189    # Merges two URI's.
1190    #
1191    # == Usage
1192    #
1193    #   require 'uri'
1194    #
1195    #   uri = URI.parse("http://my.example.com")
1196    #   p uri.merge("/main.rbx?page=1")
1197    #   # =>  #<URI::HTTP:0x2021f3b0 URL:http://my.example.com/main.rbx?page=1>
1198    #
1199    def merge(oth)
1200      begin
1201        base, rel = merge0(oth)
1202      rescue
1203        raise $!.class, $!.message
1204      end
1205
1206      if base == rel
1207        return base
1208      end
1209
1210      authority = rel.userinfo || rel.host || rel.port
1211
1212      # RFC2396, Section 5.2, 2)
1213      if (rel.path.nil? || rel.path.empty?) && !authority && !rel.query
1214        base.set_fragment(rel.fragment) if rel.fragment
1215        return base
1216      end
1217
1218      base.set_query(nil)
1219      base.set_fragment(nil)
1220
1221      # RFC2396, Section 5.2, 4)
1222      if !authority
1223        base.set_path(merge_path(base.path, rel.path)) if base.path && rel.path
1224      else
1225        # RFC2396, Section 5.2, 4)
1226        base.set_path(rel.path) if rel.path
1227      end
1228
1229      # RFC2396, Section 5.2, 7)
1230      base.set_userinfo(rel.userinfo) if rel.userinfo
1231      base.set_host(rel.host)         if rel.host
1232      base.set_port(rel.port)         if rel.port
1233      base.set_query(rel.query)       if rel.query
1234      base.set_fragment(rel.fragment) if rel.fragment
1235
1236      return base
1237    end # merge
1238    alias + merge
1239
1240    # return base and rel.
1241    # you can modify `base', but can not `rel'.
1242    def merge0(oth)
1243      oth = parser.send(:convert_to_uri, oth)
1244
1245      if self.relative? && oth.relative?
1246        raise BadURIError,
1247          "both URI are relative"
1248      end
1249
1250      if self.absolute? && oth.absolute?
1251        #raise BadURIError,
1252        #  "both URI are absolute"
1253        # hmm... should return oth for usability?
1254        return oth, oth
1255      end
1256
1257      if self.absolute?
1258        return self.dup, oth
1259      else
1260        return oth, oth
1261      end
1262    end
1263    private :merge0
1264
1265    # :stopdoc:
1266    def route_from_path(src, dst)
1267      case dst
1268      when src
1269        # RFC2396, Section 4.2
1270        return ''
1271      when %r{(?:\A|/)\.\.?(?:/|\z)}
1272        # dst has abnormal absolute path,
1273        # like "/./", "/../", "/x/../", ...
1274        return dst.dup
1275      end
1276
1277      src_path = src.scan(%r{(?:\A|[^/]+)/})
1278      dst_path = dst.scan(%r{(?:\A|[^/]+)/?})
1279
1280      # discard same parts
1281      while !dst_path.empty? && dst_path.first == src_path.first
1282        src_path.shift
1283        dst_path.shift
1284      end
1285
1286      tmp = dst_path.join
1287
1288      # calculate
1289      if src_path.empty?
1290        if tmp.empty?
1291          return './'
1292        elsif dst_path.first.include?(':') # (see RFC2396 Section 5)
1293          return './' + tmp
1294        else
1295          return tmp
1296        end
1297      end
1298
1299      return '../' * src_path.size + tmp
1300    end
1301    private :route_from_path
1302    # :startdoc:
1303
1304    # :stopdoc:
1305    def route_from0(oth)
1306      oth = parser.send(:convert_to_uri, oth)
1307      if self.relative?
1308        raise BadURIError,
1309          "relative URI: #{self}"
1310      end
1311      if oth.relative?
1312        raise BadURIError,
1313          "relative URI: #{oth}"
1314      end
1315
1316      if self.scheme != oth.scheme
1317        return self, self.dup
1318      end
1319      rel = URI::Generic.new(nil, # it is relative URI
1320                             self.userinfo, self.host, self.port,
1321                             self.registry, self.path, self.opaque,
1322                             self.query, self.fragment, parser)
1323
1324      if rel.userinfo != oth.userinfo ||
1325          rel.host.to_s.downcase != oth.host.to_s.downcase ||
1326          rel.port != oth.port
1327
1328        if self.userinfo.nil? && self.host.nil?
1329          return self, self.dup
1330        end
1331
1332        rel.set_port(nil) if rel.port == oth.default_port
1333        return rel, rel
1334      end
1335      rel.set_userinfo(nil)
1336      rel.set_host(nil)
1337      rel.set_port(nil)
1338
1339      if rel.path && rel.path == oth.path
1340        rel.set_path('')
1341        rel.set_query(nil) if rel.query == oth.query
1342        return rel, rel
1343      elsif rel.opaque && rel.opaque == oth.opaque
1344        rel.set_opaque('')
1345        rel.set_query(nil) if rel.query == oth.query
1346        return rel, rel
1347      end
1348
1349      # you can modify `rel', but can not `oth'.
1350      return oth, rel
1351    end
1352    private :route_from0
1353    # :startdoc:
1354
1355    #
1356    # == Args
1357    #
1358    # +oth+::
1359    #    URI or String
1360    #
1361    # == Description
1362    #
1363    # Calculates relative path from oth to self
1364    #
1365    # == Usage
1366    #
1367    #   require 'uri'
1368    #
1369    #   uri = URI.parse('http://my.example.com/main.rbx?page=1')
1370    #   p uri.route_from('http://my.example.com')
1371    #   #=> #<URI::Generic:0x20218858 URL:/main.rbx?page=1>
1372    #
1373    def route_from(oth)
1374      # you can modify `rel', but can not `oth'.
1375      begin
1376        oth, rel = route_from0(oth)
1377      rescue
1378        raise $!.class, $!.message
1379      end
1380      if oth == rel
1381        return rel
1382      end
1383
1384      rel.set_path(route_from_path(oth.path, self.path))
1385      if rel.path == './' && self.query
1386        # "./?foo" -> "?foo"
1387        rel.set_path('')
1388      end
1389
1390      return rel
1391    end
1392
1393    alias - route_from
1394
1395    #
1396    # == Args
1397    #
1398    # +oth+::
1399    #    URI or String
1400    #
1401    # == Description
1402    #
1403    # Calculates relative path to oth from self
1404    #
1405    # == Usage
1406    #
1407    #   require 'uri'
1408    #
1409    #   uri = URI.parse('http://my.example.com')
1410    #   p uri.route_to('http://my.example.com/main.rbx?page=1')
1411    #   #=> #<URI::Generic:0x2020c2f6 URL:/main.rbx?page=1>
1412    #
1413    def route_to(oth)
1414      parser.send(:convert_to_uri, oth).route_from(self)
1415    end
1416
1417    #
1418    # Returns normalized URI
1419    #
1420    def normalize
1421      uri = dup
1422      uri.normalize!
1423      uri
1424    end
1425
1426    #
1427    # Destructive version of #normalize
1428    #
1429    def normalize!
1430      if path && path == ''
1431        set_path('/')
1432      end
1433      if scheme && scheme != scheme.downcase
1434        set_scheme(self.scheme.downcase)
1435      end
1436      if host && host != host.downcase
1437        set_host(self.host.downcase)
1438      end
1439    end
1440
1441    # returns the assemble String with path and query components
1442    def path_query
1443      str = @path
1444      if @query
1445        str += '?' + @query
1446      end
1447      str
1448    end
1449    private :path_query
1450
1451    #
1452    # Constructs String from URI
1453    #
1454    def to_s
1455      str = ''
1456      if @scheme
1457        str << @scheme
1458        str << ':'
1459      end
1460
1461      if @opaque
1462        str << @opaque
1463
1464      else
1465        if @registry
1466          str << @registry
1467        else
1468          if @host
1469            str << '//'
1470          end
1471          if self.userinfo
1472            str << self.userinfo
1473            str << '@'
1474          end
1475          if @host
1476            str << @host
1477          end
1478          if @port && @port != self.default_port
1479            str << ':'
1480            str << @port.to_s
1481          end
1482        end
1483
1484        str << path_query
1485      end
1486
1487      if @fragment
1488        str << '#'
1489        str << @fragment
1490      end
1491
1492      str
1493    end
1494
1495    #
1496    # Compares to URI's
1497    #
1498    def ==(oth)
1499      if self.class == oth.class
1500        self.normalize.component_ary == oth.normalize.component_ary
1501      else
1502        false
1503      end
1504    end
1505
1506    def hash
1507      self.component_ary.hash
1508    end
1509
1510    def eql?(oth)
1511      self.class == oth.class &&
1512      parser == oth.parser &&
1513      self.component_ary.eql?(oth.component_ary)
1514    end
1515
1516=begin
1517
1518--- URI::Generic#===(oth)
1519
1520=end
1521#    def ===(oth)
1522#      raise NotImplementedError
1523#    end
1524
1525=begin
1526=end
1527
1528
1529    # returns an Array of the components defined from the COMPONENT Array
1530    def component_ary
1531      component.collect do |x|
1532        self.send(x)
1533      end
1534    end
1535    protected :component_ary
1536
1537    # == Args
1538    #
1539    # +components+::
1540    #    Multiple Symbol arguments defined in URI::HTTP
1541    #
1542    # == Description
1543    #
1544    # Selects specified components from URI
1545    #
1546    # == Usage
1547    #
1548    #   require 'uri'
1549    #
1550    #   uri = URI.parse('http://myuser:mypass@my.example.com/test.rbx')
1551    #   p uri.select(:userinfo, :host, :path)
1552    #   # => ["myuser:mypass", "my.example.com", "/test.rbx"]
1553    #
1554    def select(*components)
1555      components.collect do |c|
1556        if component.include?(c)
1557          self.send(c)
1558        else
1559          raise ArgumentError,
1560            "expected of components of #{self.class} (#{self.class.component.join(', ')})"
1561        end
1562      end
1563    end
1564
1565    @@to_s = Kernel.instance_method(:to_s)
1566    def inspect
1567      @@to_s.bind(self).call.sub!(/>\z/) {" URL:#{self}>"}
1568    end
1569
1570    #
1571    # == Args
1572    #
1573    # +v+::
1574    #    URI or String
1575    #
1576    # == Description
1577    #
1578    #  attempt to parse other URI +oth+
1579    #  return [parsed_oth, self]
1580    #
1581    # == Usage
1582    #
1583    #   require 'uri'
1584    #
1585    #   uri = URI.parse("http://my.example.com")
1586    #   uri.coerce("http://foo.com")
1587    #   #=> [#<URI::HTTP:0x00000000bcb028 URL:http://foo.com/>, #<URI::HTTP:0x00000000d92178 URL:http://my.example.com>]
1588    #
1589    def coerce(oth)
1590      case oth
1591      when String
1592        oth = parser.parse(oth)
1593      else
1594        super
1595      end
1596
1597      return oth, self
1598    end
1599
1600    # returns a proxy URI.
1601    # The proxy URI is obtained from environment variables such as http_proxy,
1602    # ftp_proxy, no_proxy, etc.
1603    # If there is no proper proxy, nil is returned.
1604    #
1605    # Note that capitalized variables (HTTP_PROXY, FTP_PROXY, NO_PROXY, etc.)
1606    # are examined too.
1607    #
1608    # But http_proxy and HTTP_PROXY is treated specially under CGI environment.
1609    # It's because HTTP_PROXY may be set by Proxy: header.
1610    # So HTTP_PROXY is not used.
1611    # http_proxy is not used too if the variable is case insensitive.
1612    # CGI_HTTP_PROXY can be used instead.
1613    def find_proxy
1614      raise BadURIError, "relative URI: #{self}" if self.relative?
1615      name = self.scheme.downcase + '_proxy'
1616      proxy_uri = nil
1617      if name == 'http_proxy' && ENV.include?('REQUEST_METHOD') # CGI?
1618        # HTTP_PROXY conflicts with *_proxy for proxy settings and
1619        # HTTP_* for header information in CGI.
1620        # So it should be careful to use it.
1621        pairs = ENV.reject {|k, v| /\Ahttp_proxy\z/i !~ k }
1622        case pairs.length
1623        when 0 # no proxy setting anyway.
1624          proxy_uri = nil
1625        when 1
1626          k, _ = pairs.shift
1627          if k == 'http_proxy' && ENV[k.upcase] == nil
1628            # http_proxy is safe to use because ENV is case sensitive.
1629            proxy_uri = ENV[name]
1630          else
1631            proxy_uri = nil
1632          end
1633        else # http_proxy is safe to use because ENV is case sensitive.
1634          proxy_uri = ENV.to_hash[name]
1635        end
1636        if !proxy_uri
1637          # Use CGI_HTTP_PROXY.  cf. libwww-perl.
1638          proxy_uri = ENV["CGI_#{name.upcase}"]
1639        end
1640      elsif name == 'http_proxy'
1641        unless proxy_uri = ENV[name]
1642          if proxy_uri = ENV[name.upcase]
1643            warn 'The environment variable HTTP_PROXY is discouraged.  Use http_proxy.'
1644          end
1645        end
1646      else
1647        proxy_uri = ENV[name] || ENV[name.upcase]
1648      end
1649
1650      if proxy_uri.nil? || proxy_uri.empty?
1651        return nil
1652      end
1653
1654      if self.hostname
1655        require 'socket'
1656        begin
1657          addr = IPSocket.getaddress(self.hostname)
1658          return nil if /\A127\.|\A::1\z/ =~ addr
1659        rescue SocketError
1660        end
1661      end
1662
1663      name = 'no_proxy'
1664      if no_proxy = ENV[name] || ENV[name.upcase]
1665        no_proxy.scan(/([^:,]*)(?::(\d+))?/) {|host, port|
1666          if /(\A|\.)#{Regexp.quote host}\z/i =~ self.host &&
1667            (!port || self.port == port.to_i)
1668            return nil
1669          end
1670        }
1671      end
1672      URI.parse(proxy_uri)
1673    end
1674  end
1675end
1676