1package URI; 2 3use strict; 4use vars qw($VERSION); 5$VERSION = "1.34"; # $Date: 2004/10/05 08:36:13 $ 6 7use vars qw($ABS_REMOTE_LEADING_DOTS $ABS_ALLOW_RELATIVE_SCHEME); 8 9my %implements; # mapping from scheme to implementor class 10 11# Some "official" character classes 12 13use vars qw($reserved $mark $unreserved $uric $scheme_re); 14$reserved = q(;/?:@&=+$,[]); 15$mark = q(-_.!~*'()); #'; emacs 16$unreserved = "A-Za-z0-9\Q$mark\E"; 17$uric = quotemeta($reserved) . $unreserved . "%"; 18 19$scheme_re = '[a-zA-Z][a-zA-Z0-9.+\-]*'; 20 21use Carp (); 22use URI::Escape (); 23 24use overload ('""' => sub { ${$_[0]} }, 25 '==' => sub { overload::StrVal($_[0]) eq 26 overload::StrVal($_[1]) 27 }, 28 fallback => 1, 29 ); 30 31sub new 32{ 33 my($class, $uri, $scheme) = @_; 34 35 $uri = defined ($uri) ? "$uri" : ""; # stringify 36 # Get rid of potential wrapping 37 $uri =~ s/^<(?:URL:)?(.*)>$/$1/; # 38 $uri =~ s/^"(.*)"$/$1/; 39 $uri =~ s/^\s+//; 40 $uri =~ s/\s+$//; 41 42 my $impclass; 43 if ($uri =~ m/^($scheme_re):/so) { 44 $scheme = $1; 45 } 46 else { 47 if (($impclass = ref($scheme))) { 48 $scheme = $scheme->scheme; 49 } 50 elsif ($scheme && $scheme =~ m/^($scheme_re)(?::|$)/o) { 51 $scheme = $1; 52 } 53 } 54 $impclass ||= implementor($scheme) || 55 do { 56 require URI::_foreign; 57 $impclass = 'URI::_foreign'; 58 }; 59 60 return $impclass->_init($uri, $scheme); 61} 62 63 64sub new_abs 65{ 66 my($class, $uri, $base) = @_; 67 $uri = $class->new($uri, $base); 68 $uri->abs($base); 69} 70 71 72sub _init 73{ 74 my $class = shift; 75 my($str, $scheme) = @_; 76 $str =~ s/([^$uric\#])/$URI::Escape::escapes{$1}/go; 77 $str = "$scheme:$str" unless $str =~ /^$scheme_re:/o || 78 $class->_no_scheme_ok; 79 my $self = bless \$str, $class; 80 $self; 81} 82 83 84sub implementor 85{ 86 my($scheme, $impclass) = @_; 87 if (!$scheme || $scheme !~ /\A$scheme_re\z/o) { 88 require URI::_generic; 89 return "URI::_generic"; 90 } 91 92 $scheme = lc($scheme); 93 94 if ($impclass) { 95 # Set the implementor class for a given scheme 96 my $old = $implements{$scheme}; 97 $impclass->_init_implementor($scheme); 98 $implements{$scheme} = $impclass; 99 return $old; 100 } 101 102 my $ic = $implements{$scheme}; 103 return $ic if $ic; 104 105 # scheme not yet known, look for internal or 106 # preloaded (with 'use') implementation 107 $ic = "URI::$scheme"; # default location 108 109 # turn scheme into a valid perl identifier by a simple tranformation... 110 $ic =~ s/\+/_P/g; 111 $ic =~ s/\./_O/g; 112 $ic =~ s/\-/_/g; 113 114 no strict 'refs'; 115 # check we actually have one for the scheme: 116 unless (@{"${ic}::ISA"}) { 117 # Try to load it 118 eval "require $ic"; 119 die $@ if $@ && $@ !~ /Can\'t locate.*in \@INC/; 120 return unless @{"${ic}::ISA"}; 121 } 122 123 $ic->_init_implementor($scheme); 124 $implements{$scheme} = $ic; 125 $ic; 126} 127 128 129sub _init_implementor 130{ 131 my($class, $scheme) = @_; 132 # Remember that one implementor class may actually 133 # serve to implement several URI schemes. 134} 135 136 137sub clone 138{ 139 my $self = shift; 140 my $other = $$self; 141 bless \$other, ref $self; 142} 143 144 145sub _no_scheme_ok { 0 } 146 147sub _scheme 148{ 149 my $self = shift; 150 151 unless (@_) { 152 return unless $$self =~ /^($scheme_re):/o; 153 return $1; 154 } 155 156 my $old; 157 my $new = shift; 158 if (defined($new) && length($new)) { 159 Carp::croak("Bad scheme '$new'") unless $new =~ /^$scheme_re$/o; 160 $old = $1 if $$self =~ s/^($scheme_re)://o; 161 my $newself = URI->new("$new:$$self"); 162 $$self = $$newself; 163 bless $self, ref($newself); 164 } 165 else { 166 if ($self->_no_scheme_ok) { 167 $old = $1 if $$self =~ s/^($scheme_re)://o; 168 Carp::carp("Oops, opaque part now look like scheme") 169 if $^W && $$self =~ m/^$scheme_re:/o 170 } 171 else { 172 $old = $1 if $$self =~ m/^($scheme_re):/o; 173 } 174 } 175 176 return $old; 177} 178 179sub scheme 180{ 181 my $scheme = shift->_scheme(@_); 182 return unless defined $scheme; 183 lc($scheme); 184} 185 186 187sub opaque 188{ 189 my $self = shift; 190 191 unless (@_) { 192 $$self =~ /^(?:$scheme_re:)?([^\#]*)/o or die; 193 return $1; 194 } 195 196 $$self =~ /^($scheme_re:)? # optional scheme 197 ([^\#]*) # opaque 198 (\#.*)? # optional fragment 199 $/sx or die; 200 201 my $old_scheme = $1; 202 my $old_opaque = $2; 203 my $old_frag = $3; 204 205 my $new_opaque = shift; 206 $new_opaque = "" unless defined $new_opaque; 207 $new_opaque =~ s/([^$uric])/$URI::Escape::escapes{$1}/go; 208 209 $$self = defined($old_scheme) ? $old_scheme : ""; 210 $$self .= $new_opaque; 211 $$self .= $old_frag if defined $old_frag; 212 213 $old_opaque; 214} 215 216*path = \&opaque; # alias 217 218 219sub fragment 220{ 221 my $self = shift; 222 unless (@_) { 223 return unless $$self =~ /\#(.*)/s; 224 return $1; 225 } 226 227 my $old; 228 $old = $1 if $$self =~ s/\#(.*)//s; 229 230 my $new_frag = shift; 231 if (defined $new_frag) { 232 $new_frag =~ s/([^$uric])/$URI::Escape::escapes{$1}/go; 233 $$self .= "#$new_frag"; 234 } 235 $old; 236} 237 238 239sub as_string 240{ 241 my $self = shift; 242 $$self; 243} 244 245 246sub canonical 247{ 248 # Make sure scheme is lowercased, that we don't escape unreserved chars, 249 # and that we use upcase escape sequences. 250 251 my $self = shift; 252 my $scheme = $self->_scheme || ""; 253 my $uc_scheme = $scheme =~ /[A-Z]/; 254 my $esc = $$self =~ /%[a-fA-F0-9]{2}/; 255 return $self unless $uc_scheme || $esc; 256 257 my $other = $self->clone; 258 if ($uc_scheme) { 259 $other->_scheme(lc $scheme); 260 } 261 if ($esc) { 262 $$other =~ s{%([0-9a-fA-F]{2})} 263 { my $a = chr(hex($1)); 264 $a =~ /^[$unreserved]\z/o ? $a : "%\U$1" 265 }ge; 266 } 267 return $other; 268} 269 270# Compare two URIs, subclasses will provide a more correct implementation 271sub eq { 272 my($self, $other) = @_; 273 $self = URI->new($self, $other) unless ref $self; 274 $other = URI->new($other, $self) unless ref $other; 275 ref($self) eq ref($other) && # same class 276 $self->canonical->as_string eq $other->canonical->as_string; 277} 278 279# generic-URI transformation methods 280sub abs { $_[0]; } 281sub rel { $_[0]; } 282 283# help out Storable 284sub STORABLE_freeze { 285 my($self, $cloning) = @_; 286 return $$self; 287} 288 289sub STORABLE_thaw { 290 my($self, $cloning, $str) = @_; 291 $$self = $str; 292} 293 2941; 295 296__END__ 297 298=head1 NAME 299 300URI - Uniform Resource Identifiers (absolute and relative) 301 302=head1 SYNOPSIS 303 304 $u1 = URI->new("http://www.perl.com"); 305 $u2 = URI->new("foo", "http"); 306 $u3 = $u2->abs($u1); 307 $u4 = $u3->clone; 308 $u5 = URI->new("HTTP://WWW.perl.com:80")->canonical; 309 310 $str = $u->as_string; 311 $str = "$u"; 312 313 $scheme = $u->scheme; 314 $opaque = $u->opaque; 315 $path = $u->path; 316 $frag = $u->fragment; 317 318 $u->scheme("ftp"); 319 $u->host("ftp.perl.com"); 320 $u->path("cpan/"); 321 322=head1 DESCRIPTION 323 324This module implements the C<URI> class. Objects of this class 325represent "Uniform Resource Identifier references" as specified in RFC 3262396 (and updated by RFC 2732). 327 328A Uniform Resource Identifier is a compact string of characters that 329identifies an abstract or physical resource. A Uniform Resource 330Identifier can be further classified as either a Uniform Resource Locator 331(URL) or a Uniform Resource Name (URN). The distinction between URL 332and URN does not matter to the C<URI> class interface. A 333"URI-reference" is a URI that may have additional information attached 334in the form of a fragment identifier. 335 336An absolute URI reference consists of three parts: a I<scheme>, a 337I<scheme-specific part> and a I<fragment> identifier. A subset of URI 338references share a common syntax for hierarchical namespaces. For 339these, the scheme-specific part is further broken down into 340I<authority>, I<path> and I<query> components. These URIs can also 341take the form of relative URI references, where the scheme (and 342usually also the authority) component is missing, but implied by the 343context of the URI reference. The three forms of URI reference 344syntax are summarized as follows: 345 346 <scheme>:<scheme-specific-part>#<fragment> 347 <scheme>://<authority><path>?<query>#<fragment> 348 <path>?<query>#<fragment> 349 350The components into which a URI reference can be divided depend on the 351I<scheme>. The C<URI> class provides methods to get and set the 352individual components. The methods available for a specific 353C<URI> object depend on the scheme. 354 355=head1 CONSTRUCTORS 356 357The following methods construct new C<URI> objects: 358 359=over 4 360 361=item $uri = URI->new( $str ) 362 363=item $uri = URI->new( $str, $scheme ) 364 365Constructs a new URI object. The string 366representation of a URI is given as argument, together with an optional 367scheme specification. Common URI wrappers like "" and <>, as well as 368leading and trailing white space, are automatically removed from 369the $str argument before it is processed further. 370 371The constructor determines the scheme, maps this to an appropriate 372URI subclass, constructs a new object of that class and returns it. 373 374The $scheme argument is only used when $str is a 375relative URI. It can be either a simple string that 376denotes the scheme, a string containing an absolute URI reference, or 377an absolute C<URI> object. If no $scheme is specified for a relative 378URI $str, then $str is simply treated as a generic URI (no scheme-specific 379methods available). 380 381The set of characters available for building URI references is 382restricted (see L<URI::Escape>). Characters outside this set are 383automatically escaped by the URI constructor. 384 385=item $uri = URI->new_abs( $str, $base_uri ) 386 387Constructs a new absolute URI object. The $str argument can 388denote a relative or absolute URI. If relative, then it is 389absolutized using $base_uri as base. The $base_uri must be an absolute 390URI. 391 392=item $uri = URI::file->new( $filename ) 393 394=item $uri = URI::file->new( $filename, $os ) 395 396Constructs a new I<file> URI from a file name. See L<URI::file>. 397 398=item $uri = URI::file->new_abs( $filename ) 399 400=item $uri = URI::file->new_abs( $filename, $os ) 401 402Constructs a new absolute I<file> URI from a file name. See 403L<URI::file>. 404 405=item $uri = URI::file->cwd 406 407Returns the current working directory as a I<file> URI. See 408L<URI::file>. 409 410=item $uri->clone 411 412Returns a copy of the $uri. 413 414=back 415 416=head1 COMMON METHODS 417 418The methods described in this section are available for all C<URI> 419objects. 420 421Methods that give access to components of a URI always return the 422old value of the component. The value returned is C<undef> if the 423component was not present. There is generally a difference between a 424component that is empty (represented as C<"">) and a component that is 425missing (represented as C<undef>). If an accessor method is given an 426argument, it updates the corresponding component in addition to 427returning the old value of the component. Passing an undefined 428argument removes the component (if possible). The description of 429each accessor method indicates whether the component is passed as 430an escaped or an unescaped string. A component that can be further 431divided into sub-parts are usually passed escaped, as unescaping might 432change its semantics. 433 434The common methods available for all URI are: 435 436=over 4 437 438=item $uri->scheme 439 440=item $uri->scheme( $new_scheme ) 441 442Sets and returns the scheme part of the $uri. If the $uri is 443relative, then $uri->scheme returns C<undef>. If called with an 444argument, it updates the scheme of $uri, possibly changing the 445class of $uri, and returns the old scheme value. The method croaks 446if the new scheme name is illegal; a scheme name must begin with a 447letter and must consist of only US-ASCII letters, numbers, and a few 448special marks: ".", "+", "-". This restriction effectively means 449that the scheme must be passed unescaped. Passing an undefined 450argument to the scheme method makes the URI relative (if possible). 451 452Letter case does not matter for scheme names. The string 453returned by $uri->scheme is always lowercase. If you want the scheme 454just as it was written in the URI in its original case, 455you can use the $uri->_scheme method instead. 456 457=item $uri->opaque 458 459=item $uri->opaque( $new_opaque ) 460 461Sets and returns the scheme-specific part of the $uri 462(everything between the scheme and the fragment) 463as an escaped string. 464 465=item $uri->path 466 467=item $uri->path( $new_path ) 468 469Sets and returns the same value as $uri->opaque unless the URI 470supports the generic syntax for hierarchical namespaces. 471In that case the generic method is overridden to set and return 472the part of the URI between the I<host name> and the I<fragment>. 473 474=item $uri->fragment 475 476=item $uri->fragment( $new_frag ) 477 478Returns the fragment identifier of a URI reference 479as an escaped string. 480 481=item $uri->as_string 482 483Returns a URI object to a plain string. URI objects are 484also converted to plain strings automatically by overloading. This 485means that $uri objects can be used as plain strings in most Perl 486constructs. 487 488=item $uri->canonical 489 490Returns a normalized version of the URI. The rules 491for normalization are scheme-dependent. They usually involve 492lowercasing the scheme and Internet host name components, 493removing the explicit port specification if it matches the default port, 494uppercasing all escape sequences, and unescaping octets that can be 495better represented as plain characters. 496 497For efficiency reasons, if the $uri is already in normalized form, 498then a reference to it is returned instead of a copy. 499 500=item $uri->eq( $other_uri ) 501 502=item URI::eq( $first_uri, $other_uri ) 503 504Tests whether two URI references are equal. URI references 505that normalize to the same string are considered equal. The method 506can also be used as a plain function which can also test two string 507arguments. 508 509If you need to test whether two C<URI> object references denote the 510same object, use the '==' operator. 511 512=item $uri->abs( $base_uri ) 513 514Returns an absolute URI reference. If $uri is already 515absolute, then a reference to it is simply returned. If the $uri 516is relative, then a new absolute URI is constructed by combining the 517$uri and the $base_uri, and returned. 518 519=item $uri->rel( $base_uri ) 520 521Returns a relative URI reference if it is possible to 522make one that denotes the same resource relative to $base_uri. 523If not, then $uri is simply returned. 524 525=back 526 527=head1 GENERIC METHODS 528 529The following methods are available to schemes that use the 530common/generic syntax for hierarchical namespaces. The descriptions of 531schemes below indicate which these are. Unknown schemes are 532assumed to support the generic syntax, and therefore the following 533methods: 534 535=over 4 536 537=item $uri->authority 538 539=item $uri->authority( $new_authority ) 540 541Sets and returns the escaped authority component 542of the $uri. 543 544=item $uri->path 545 546=item $uri->path( $new_path ) 547 548Sets and returns the escaped path component of 549the $uri (the part between the host name and the query or fragment). 550The path can never be undefined, but it can be the empty string. 551 552=item $uri->path_query 553 554=item $uri->path_query( $new_path_query ) 555 556Sets and returns the escaped path and query 557components as a single entity. The path and the query are 558separated by a "?" character, but the query can itself contain "?". 559 560=item $uri->path_segments 561 562=item $uri->path_segments( $segment, ... ) 563 564Sets and returns the path. In a scalar context, it returns 565the same value as $uri->path. In a list context, it returns the 566unescaped path segments that make up the path. Path segments that 567have parameters are returned as an anonymous array. The first element 568is the unescaped path segment proper; subsequent elements are escaped 569parameter strings. Such an anonymous array uses overloading so it can 570be treated as a string too, but this string does not include the 571parameters. 572 573=item $uri->query 574 575=item $uri->query( $new_query ) 576 577Sets and returns the escaped query component of 578the $uri. 579 580=item $uri->query_form 581 582=item $uri->query_form( $key1 => $val1, $key2 => $val2, ... ) 583 584=item $uri->query_form( \@key_value_pairs ) 585 586=item $uri->query_form( \%hash ) 587 588Sets and returns query components that use the 589I<application/x-www-form-urlencoded> format. Key/value pairs are 590separated by "&", and the key is separated from the value by a "=" 591character. 592 593The form can be set either by passing separate key/value pairs, or via 594an array or hash reference. Passing an empty array or an empty hash 595removes the query component, whereas passing no arguments at all leaves 596the component unchanged. The order of keys is undefined if a hash 597reference is passed. The old value is always returned as a list of 598separate key/value pairs. Assigning this list to a hash is unwise as 599the keys returned might repeat. 600 601The values passed when setting the form can be plain strings or 602references to arrays of strings. Passing an array of values has the 603same effect as passing the key repeatedly with one value at a time. 604All the following statements have the same effect: 605 606 $uri->query_form(foo => 1, foo => 2); 607 $uri->query_form(foo => [1, 2]); 608 $uri->query_form([ foo => 1, foo => 2 ]); 609 $uri->query_form([ foo => [1, 2] ]); 610 $uri->query_form({ foo => [1, 2] }); 611 612The C<URI::QueryParam> module can be loaded to add further methods to 613manipulate the form of a URI. See L<URI::QueryParam> for details. 614 615=item $uri->query_keywords 616 617=item $uri->query_keywords( $keywords, ... ) 618 619=item $uri->query_keywords( \@keywords ) 620 621Sets and returns query components that use the 622keywords separated by "+" format. 623 624The keywords can be set either by passing separate keywords directly 625or by passing a reference to an array of keywords. Passing an empty 626array removes the query component, whereas passing no arguments at 627all leaves the component unchanged. The old value is always returned 628as a list of separate words. 629 630=back 631 632=head1 SERVER METHODS 633 634For schemes where the I<authority> component denotes an Internet host, 635the following methods are available in addition to the generic 636methods. 637 638=over 4 639 640=item $uri->userinfo 641 642=item $uri->userinfo( $new_userinfo ) 643 644Sets and returns the escaped userinfo part of the 645authority component. 646 647For some schemes this is a user name and a password separated by 648a colon. This practice is not recommended. Embedding passwords in 649clear text (such as URI) has proven to be a security risk in almost 650every case where it has been used. 651 652=item $uri->host 653 654=item $uri->host( $new_host ) 655 656Sets and returns the unescaped hostname. 657 658If the $new_host string ends with a colon and a number, then this 659number also sets the port. 660 661=item $uri->port 662 663=item $uri->port( $new_port ) 664 665Sets and returns the port. The port is a simple integer 666that should be greater than 0. 667 668If a port is not specified explicitly in the URI, then the URI scheme's default port 669is returned. If you don't want the default port 670substituted, then you can use the $uri->_port method instead. 671 672=item $uri->host_port 673 674=item $uri->host_port( $new_host_port ) 675 676Sets and returns the host and port as a single 677unit. The returned value includes a port, even if it matches the 678default port. The host part and the port part are separated by a 679colon: ":". 680 681=item $uri->default_port 682 683Returns the default port of the URI scheme to which $uri 684belongs. For I<http> this is the number 80, for I<ftp> this 685is the number 21, etc. The default port for a scheme can not be 686changed. 687 688=back 689 690=head1 SCHEME-SPECIFIC SUPPORT 691 692Scheme-specific support is provided for the following URI schemes. For C<URI> 693objects that do not belong to one of these, you can only use the common and 694generic methods. 695 696=over 4 697 698=item B<data>: 699 700The I<data> URI scheme is specified in RFC 2397. It allows inclusion 701of small data items as "immediate" data, as if it had been included 702externally. 703 704C<URI> objects belonging to the data scheme support the common methods 705and two new methods to access their scheme-specific components: 706$uri->media_type and $uri->data. See L<URI::data> for details. 707 708=item B<file>: 709 710An old specification of the I<file> URI scheme is found in RFC 1738. 711A new RFC 2396 based specification in not available yet, but file URI 712references are in common use. 713 714C<URI> objects belonging to the file scheme support the common and 715generic methods. In addition, they provide two methods for mapping file URIs 716back to local file names; $uri->file and $uri->dir. See L<URI::file> 717for details. 718 719=item B<ftp>: 720 721An old specification of the I<ftp> URI scheme is found in RFC 1738. A 722new RFC 2396 based specification in not available yet, but ftp URI 723references are in common use. 724 725C<URI> objects belonging to the ftp scheme support the common, 726generic and server methods. In addition, they provide two methods for 727accessing the userinfo sub-components: $uri->user and $uri->password. 728 729=item B<gopher>: 730 731The I<gopher> URI scheme is specified in 732<draft-murali-url-gopher-1996-12-04> and will hopefully be available 733as a RFC 2396 based specification. 734 735C<URI> objects belonging to the gopher scheme support the common, 736generic and server methods. In addition, they support some methods for 737accessing gopher-specific path components: $uri->gopher_type, 738$uri->selector, $uri->search, $uri->string. 739 740=item B<http>: 741 742The I<http> URI scheme is specified in RFC 2616. 743The scheme is used to reference resources hosted by HTTP servers. 744 745C<URI> objects belonging to the http scheme support the common, 746generic and server methods. 747 748=item B<https>: 749 750The I<https> URI scheme is a Netscape invention which is commonly 751implemented. The scheme is used to reference HTTP servers through SSL 752connections. Its syntax is the same as http, but the default 753port is different. 754 755=item B<ldap>: 756 757The I<ldap> URI scheme is specified in RFC 2255. LDAP is the 758Lightweight Directory Access Protocol. An ldap URI describes an LDAP 759search operation to perform to retrieve information from an LDAP 760directory. 761 762C<URI> objects belonging to the ldap scheme support the common, 763generic and server methods as well as ldap-specific methods: $uri->dn, 764$uri->attributes, $uri->scope, $uri->filter, $uri->extensions. See 765L<URI::ldap> for details. 766 767=item B<ldapi>: 768 769Like the I<ldap> URI scheme, but uses a UNIX domain socket. The 770server methods are not supported, and the local socket path is 771available as $uri->un_path. The I<ldapi> scheme is used by the 772OpenLDAP package. There is no real specification for it, but it is 773mentioned in various OpenLDAP manual pages. 774 775=item B<ldaps>: 776 777Like the I<ldap> URI scheme, but uses an SSL connection. This 778scheme is deprecated, as the preferred way is to use the I<start_tls> 779mechanism. 780 781=item B<mailto>: 782 783The I<mailto> URI scheme is specified in RFC 2368. The scheme was 784originally used to designate the Internet mailing address of an 785individual or service. It has (in RFC 2368) been extended to allow 786setting of other mail header fields and the message body. 787 788C<URI> objects belonging to the mailto scheme support the common 789methods and the generic query methods. In addition, they support the 790following mailto-specific methods: $uri->to, $uri->headers. 791 792=item B<mms>: 793 794The I<mms> URL specification can be found at L<http://sdp.ppona.com/> 795C<URI> objects belonging to the mms scheme support the common, 796generic, and server methods, with the exception of userinfo and 797query-related sub-components. 798 799=item B<news>: 800 801The I<news>, I<nntp> and I<snews> URI schemes are specified in 802<draft-gilman-news-url-01> and will hopefully be available as an RFC 8032396 based specification soon. 804 805C<URI> objects belonging to the news scheme support the common, 806generic and server methods. In addition, they provide some methods to 807access the path: $uri->group and $uri->message. 808 809=item B<nntp>: 810 811See I<news> scheme. 812 813=item B<pop>: 814 815The I<pop> URI scheme is specified in RFC 2384. The scheme is used to 816reference a POP3 mailbox. 817 818C<URI> objects belonging to the pop scheme support the common, generic 819and server methods. In addition, they provide two methods to access the 820userinfo components: $uri->user and $uri->auth 821 822=item B<rlogin>: 823 824An old specification of the I<rlogin> URI scheme is found in RFC 8251738. C<URI> objects belonging to the rlogin scheme support the 826common, generic and server methods. 827 828=item B<rtsp>: 829 830The I<rtsp> URL specification can be found in section 3.2 of RFC 2326. 831C<URI> objects belonging to the rtsp scheme support the common, 832generic, and server methods, with the exception of userinfo and 833query-related sub-components. 834 835=item B<rtspu>: 836 837The I<rtspu> URI scheme is used to talk to RTSP servers over UDP 838instead of TCP. The syntax is the same as rtsp. 839 840=item B<rsync>: 841 842Information about rsync is available from http://rsync.samba.org. 843C<URI> objects belonging to the rsync scheme support the common, 844generic and server methods. In addition, they provide methods to 845access the userinfo sub-components: $uri->user and $uri->password. 846 847=item B<sip>: 848 849The I<sip> URI specification is described in sections 19.1 and 25 850of RFC 3261. C<URI> objects belonging to the sip scheme support the 851common, generic, and server methods with the exception of path related 852sub-components. In addition, they provide two methods to get and set 853I<sip> parameters: $uri->params_form and $uri->params. 854 855=item B<sips>: 856 857See I<sip> scheme. Its syntax is the same as sip, but the default 858port is different. 859 860=item B<snews>: 861 862See I<news> scheme. Its syntax is the same as news, but the default 863port is different. 864 865=item B<telnet>: 866 867An old specification of the I<telnet> URI scheme is found in RFC 8681738. C<URI> objects belonging to the telnet scheme support the 869common, generic and server methods. 870 871=item B<tn3270>: 872 873These URIs are used like I<telnet> URIs but for connections to IBM 874mainframes. C<URI> objects belonging to the tn3270 scheme support the 875common, generic and server methods. 876 877=item B<ssh>: 878 879Information about ssh is available at http://www.openssh.com/. 880C<URI> objects belonging to the ssh scheme support the common, 881generic and server methods. In addition, they provide methods to 882access the userinfo sub-components: $uri->user and $uri->password. 883 884=item B<urn>: 885 886The syntax of Uniform Resource Names is specified in RFC 2141. C<URI> 887objects belonging to the urn scheme provide the common methods, and also the 888methods $uri->nid and $uri->nss, which return the Namespace Identifier 889and the Namespace-Specific String respectively. 890 891The Namespace Identifier basically works like the Scheme identifier of 892URIs, and further divides the URN namespace. Namespace Identifier 893assignments are maintained at 894<http://www.iana.org/assignments/urn-namespaces>. 895 896Letter case is not significant for the Namespace Identifier. It is 897always returned in lower case by the $uri->nid method. The $uri->_nid 898method can be used if you want it in its original case. 899 900=item B<urn>:B<isbn>: 901 902The C<urn:isbn:> namespace contains International Standard Book 903Numbers (ISBNs) and is described in RFC 3187. A C<URI> object belonging 904to this namespace has the following extra methods (if the 905Business::ISBN module is available): $uri->isbn, 906$uri->isbn_publisher_code, $uri->isbn_country_code, $uri->isbn_as_ean. 907 908=item B<urn>:B<oid>: 909 910The C<urn:oid:> namespace contains Object Identifiers (OIDs) and is 911described in RFC 3061. An object identifier consists of sequences of digits 912separated by dots. A C<URI> object belonging to this namespace has an 913additional method called $uri->oid that can be used to get/set the oid 914value. In a list context, oid numbers are returned as separate elements. 915 916=back 917 918=head1 CONFIGURATION VARIABLES 919 920The following configuration variables influence how the class and its 921methods behave: 922 923=over 4 924 925=item $URI::ABS_ALLOW_RELATIVE_SCHEME 926 927Some older parsers used to allow the scheme name to be present in the 928relative URL if it was the same as the base URL scheme. RFC 2396 says 929that this should be avoided, but you can enable this old behaviour by 930setting the $URI::ABS_ALLOW_RELATIVE_SCHEME variable to a TRUE value. 931The difference is demonstrated by the following examples: 932 933 URI->new("http:foo")->abs("http://host/a/b") 934 ==> "http:foo" 935 936 local $URI::ABS_ALLOW_RELATIVE_SCHEME = 1; 937 URI->new("http:foo")->abs("http://host/a/b") 938 ==> "http:/host/a/foo" 939 940 941=item $URI::ABS_REMOTE_LEADING_DOTS 942 943You can also have the abs() method ignore excess ".." 944segments in the relative URI by setting $URI::ABS_REMOTE_LEADING_DOTS 945to a TRUE value. The difference is demonstrated by the following 946examples: 947 948 URI->new("../../../foo")->abs("http://host/a/b") 949 ==> "http://host/../../foo" 950 951 local $URI::ABS_REMOTE_LEADING_DOTS = 1; 952 URI->new("../../../foo")->abs("http://host/a/b") 953 ==> "http://host/foo" 954 955=back 956 957=head1 BUGS 958 959Using regexp variables like $1 directly as arguments to the URI methods 960does not work too well with current perl implementations. I would argue 961that this is actually a bug in perl. The workaround is to quote 962them. Example: 963 964 /(...)/ || die; 965 $u->query("$1"); 966 967=head1 PARSING URIs WITH REGEXP 968 969As an alternative to this module, the following (official) regular 970expression can be used to decode a URI: 971 972 my($scheme, $authority, $path, $query, $fragment) = 973 $uri =~ m|(?:([^:/?#]+):)?(?://([^/?#]*))?([^?#]*)(?:\?([^#]*))?(?:#(.*))?|; 974 975The C<URI::Split> module provides the function uri_split() as a 976readable alternative. 977 978=head1 SEE ALSO 979 980L<URI::file>, L<URI::WithBase>, L<URI::QueryParam>, L<URI::Escape>, 981L<URI::Split>, L<URI::Heuristic> 982 983RFC 2396: "Uniform Resource Identifiers (URI): Generic Syntax", 984Berners-Lee, Fielding, Masinter, August 1998. 985 986http://www.iana.org/assignments/uri-schemes 987 988http://www.iana.org/assignments/urn-namespaces 989 990http://www.w3.org/Addressing/ 991 992=head1 COPYRIGHT 993 994Copyright 1995-2003 Gisle Aas. 995 996Copyright 1995 Martijn Koster. 997 998This program is free software; you can redistribute it and/or modify 999it under the same terms as Perl itself. 1000 1001=head1 AUTHORS / ACKNOWLEDGMENTS 1002 1003This module is based on the C<URI::URL> module, which in turn was 1004(distantly) based on the C<wwwurl.pl> code in the libwww-perl for 1005perl4 developed by Roy Fielding, as part of the Arcadia project at the 1006University of California, Irvine, with contributions from Brooks 1007Cutter. 1008 1009C<URI::URL> was developed by Gisle Aas, Tim Bunce, Roy Fielding and 1010Martijn Koster with input from other people on the libwww-perl mailing 1011list. 1012 1013C<URI> and related subclasses was developed by Gisle Aas. 1014 1015=cut 1016