1/* 2 * reserved comment block 3 * DO NOT REMOVE OR ALTER! 4 */ 5/* 6 * Licensed to the Apache Software Foundation (ASF) under one or more 7 * contributor license agreements. See the NOTICE file distributed with 8 * this work for additional information regarding copyright ownership. 9 * The ASF licenses this file to You under the Apache License, Version 2.0 10 * (the "License"); you may not use this file except in compliance with 11 * the License. You may obtain a copy of the License at 12 * 13 * http://www.apache.org/licenses/LICENSE-2.0 14 * 15 * Unless required by applicable law or agreed to in writing, software 16 * distributed under the License is distributed on an "AS IS" BASIS, 17 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 18 * See the License for the specific language governing permissions and 19 * limitations under the License. 20 */ 21 22package com.sun.org.apache.xml.internal.serializer.utils; 23 24import java.io.IOException; 25import java.util.Objects; 26 27 28/** 29 * A class to represent a Uniform Resource Identifier (URI). This class 30 * is designed to handle the parsing of URIs and provide access to 31 * the various components (scheme, host, port, userinfo, path, query 32 * string and fragment) that may constitute a URI. 33 * <p> 34 * Parsing of a URI specification is done according to the URI 35 * syntax described in RFC 2396 36 * <http://www.ietf.org/rfc/rfc2396.txt?number=2396>. Every URI consists 37 * of a scheme, followed by a colon (':'), followed by a scheme-specific 38 * part. For URIs that follow the "generic URI" syntax, the scheme- 39 * specific part begins with two slashes ("//") and may be followed 40 * by an authority segment (comprised of user information, host, and 41 * port), path segment, query segment and fragment. Note that RFC 2396 42 * no longer specifies the use of the parameters segment and excludes 43 * the "user:password" syntax as part of the authority segment. If 44 * "user:password" appears in a URI, the entire user/password string 45 * is stored as userinfo. 46 * <p> 47 * For URIs that do not follow the "generic URI" syntax (e.g. mailto), 48 * the entire scheme-specific part is treated as the "path" portion 49 * of the URI. 50 * <p> 51 * Note that, unlike the java.net.URL class, this class does not provide 52 * any built-in network access functionality nor does it provide any 53 * scheme-specific functionality (for example, it does not know a 54 * default port for a specific scheme). Rather, it only knows the 55 * grammar and basic set of operations that can be applied to a URI. 56 * 57 * This class is a copy of the one in com.sun.org.apache.xml.internal.utils. 58 * It exists to cut the serializers dependancy on that package. 59 * 60 * A minor change from the original is that this class no longer implements 61 * Serializable, and the serialVersionUID magic field is dropped, and 62 * the class is no longer "public". 63 * 64 * @xsl.usage internal 65 */ 66final class URI 67{ 68 /** 69 * MalformedURIExceptions are thrown in the process of building a URI 70 * or setting fields on a URI when an operation would result in an 71 * invalid URI specification. 72 * 73 */ 74 public static class MalformedURIException extends IOException 75 { 76 77 /** 78 * Constructs a <code>MalformedURIException</code> with no specified 79 * detail message. 80 */ 81 public MalformedURIException() 82 { 83 super(); 84 } 85 86 /** 87 * Constructs a <code>MalformedURIException</code> with the 88 * specified detail message. 89 * 90 * @param p_msg the detail message. 91 */ 92 public MalformedURIException(String p_msg) 93 { 94 super(p_msg); 95 } 96 } 97 98 /** reserved characters */ 99 private static final String RESERVED_CHARACTERS = ";/?:@&=+$,"; 100 101 /** 102 * URI punctuation mark characters - these, combined with 103 * alphanumerics, constitute the "unreserved" characters 104 */ 105 private static final String MARK_CHARACTERS = "-_.!~*'() "; 106 107 /** scheme can be composed of alphanumerics and these characters */ 108 private static final String SCHEME_CHARACTERS = "+-."; 109 110 /** 111 * userinfo can be composed of unreserved, escaped and these 112 * characters 113 */ 114 private static final String USERINFO_CHARACTERS = ";:&=+$,"; 115 116 /** Stores the scheme (usually the protocol) for this URI. 117 * @serial */ 118 private String m_scheme = null; 119 120 /** If specified, stores the userinfo for this URI; otherwise null. 121 * @serial */ 122 private String m_userinfo = null; 123 124 /** If specified, stores the host for this URI; otherwise null. 125 * @serial */ 126 private String m_host = null; 127 128 /** If specified, stores the port for this URI; otherwise -1. 129 * @serial */ 130 private int m_port = -1; 131 132 /** If specified, stores the path for this URI; otherwise null. 133 * @serial */ 134 private String m_path = null; 135 136 /** 137 * If specified, stores the query string for this URI; otherwise 138 * null. 139 * @serial 140 */ 141 private String m_queryString = null; 142 143 /** If specified, stores the fragment for this URI; otherwise null. 144 * @serial */ 145 private String m_fragment = null; 146 147 /** Indicate whether in DEBUG mode */ 148 private static boolean DEBUG = false; 149 150 /** 151 * Construct a new and uninitialized URI. 152 */ 153 public URI(){} 154 155 /** 156 * Construct a new URI from another URI. All fields for this URI are 157 * set equal to the fields of the URI passed in. 158 * 159 * @param p_other the URI to copy (cannot be null) 160 */ 161 public URI(URI p_other) 162 { 163 initialize(p_other); 164 } 165 166 /** 167 * Construct a new URI from a URI specification string. If the 168 * specification follows the "generic URI" syntax, (two slashes 169 * following the first colon), the specification will be parsed 170 * accordingly - setting the scheme, userinfo, host,port, path, query 171 * string and fragment fields as necessary. If the specification does 172 * not follow the "generic URI" syntax, the specification is parsed 173 * into a scheme and scheme-specific part (stored as the path) only. 174 * 175 * @param p_uriSpec the URI specification string (cannot be null or 176 * empty) 177 * 178 * @throws MalformedURIException if p_uriSpec violates any syntax 179 * rules 180 */ 181 public URI(String p_uriSpec) throws MalformedURIException 182 { 183 this((URI) null, p_uriSpec); 184 } 185 186 /** 187 * Construct a new URI from a base URI and a URI specification string. 188 * The URI specification string may be a relative URI. 189 * 190 * @param p_base the base URI (cannot be null if p_uriSpec is null or 191 * empty) 192 * @param p_uriSpec the URI specification string (cannot be null or 193 * empty if p_base is null) 194 * 195 * @throws MalformedURIException if p_uriSpec violates any syntax 196 * rules 197 */ 198 public URI(URI p_base, String p_uriSpec) throws MalformedURIException 199 { 200 initialize(p_base, p_uriSpec); 201 } 202 203 /** 204 * Construct a new URI that does not follow the generic URI syntax. 205 * Only the scheme and scheme-specific part (stored as the path) are 206 * initialized. 207 * 208 * @param p_scheme the URI scheme (cannot be null or empty) 209 * @param p_schemeSpecificPart the scheme-specific part (cannot be 210 * null or empty) 211 * 212 * @throws MalformedURIException if p_scheme violates any 213 * syntax rules 214 */ 215 public URI(String p_scheme, String p_schemeSpecificPart) 216 throws MalformedURIException 217 { 218 219 if (p_scheme == null || p_scheme.trim().length() == 0) 220 { 221 throw new MalformedURIException( 222 "Cannot construct URI with null/empty scheme!"); 223 } 224 225 if (p_schemeSpecificPart == null 226 || p_schemeSpecificPart.trim().length() == 0) 227 { 228 throw new MalformedURIException( 229 "Cannot construct URI with null/empty scheme-specific part!"); 230 } 231 232 setScheme(p_scheme); 233 setPath(p_schemeSpecificPart); 234 } 235 236 /** 237 * Construct a new URI that follows the generic URI syntax from its 238 * component parts. Each component is validated for syntax and some 239 * basic semantic checks are performed as well. See the individual 240 * setter methods for specifics. 241 * 242 * @param p_scheme the URI scheme (cannot be null or empty) 243 * @param p_host the hostname or IPv4 address for the URI 244 * @param p_path the URI path - if the path contains '?' or '#', 245 * then the query string and/or fragment will be 246 * set from the path; however, if the query and 247 * fragment are specified both in the path and as 248 * separate parameters, an exception is thrown 249 * @param p_queryString the URI query string (cannot be specified 250 * if path is null) 251 * @param p_fragment the URI fragment (cannot be specified if path 252 * is null) 253 * 254 * @throws MalformedURIException if any of the parameters violates 255 * syntax rules or semantic rules 256 */ 257 public URI(String p_scheme, String p_host, String p_path, String p_queryString, String p_fragment) 258 throws MalformedURIException 259 { 260 this(p_scheme, null, p_host, -1, p_path, p_queryString, p_fragment); 261 } 262 263 /** 264 * Construct a new URI that follows the generic URI syntax from its 265 * component parts. Each component is validated for syntax and some 266 * basic semantic checks are performed as well. See the individual 267 * setter methods for specifics. 268 * 269 * @param p_scheme the URI scheme (cannot be null or empty) 270 * @param p_userinfo the URI userinfo (cannot be specified if host 271 * is null) 272 * @param p_host the hostname or IPv4 address for the URI 273 * @param p_port the URI port (may be -1 for "unspecified"; cannot 274 * be specified if host is null) 275 * @param p_path the URI path - if the path contains '?' or '#', 276 * then the query string and/or fragment will be 277 * set from the path; however, if the query and 278 * fragment are specified both in the path and as 279 * separate parameters, an exception is thrown 280 * @param p_queryString the URI query string (cannot be specified 281 * if path is null) 282 * @param p_fragment the URI fragment (cannot be specified if path 283 * is null) 284 * 285 * @throws MalformedURIException if any of the parameters violates 286 * syntax rules or semantic rules 287 */ 288 public URI(String p_scheme, String p_userinfo, String p_host, int p_port, String p_path, String p_queryString, String p_fragment) 289 throws MalformedURIException 290 { 291 292 if (p_scheme == null || p_scheme.trim().length() == 0) 293 { 294 throw new MalformedURIException(Utils.messages.createMessage(MsgKey.ER_SCHEME_REQUIRED, null)); //"Scheme is required!"); 295 } 296 297 if (p_host == null) 298 { 299 if (p_userinfo != null) 300 { 301 throw new MalformedURIException( 302 Utils.messages.createMessage(MsgKey.ER_NO_USERINFO_IF_NO_HOST, null)); //"Userinfo may not be specified if host is not specified!"); 303 } 304 305 if (p_port != -1) 306 { 307 throw new MalformedURIException( 308 Utils.messages.createMessage(MsgKey.ER_NO_PORT_IF_NO_HOST, null)); //"Port may not be specified if host is not specified!"); 309 } 310 } 311 312 if (p_path != null) 313 { 314 if (p_path.indexOf('?') != -1 && p_queryString != null) 315 { 316 throw new MalformedURIException( 317 Utils.messages.createMessage(MsgKey.ER_NO_QUERY_STRING_IN_PATH, null)); //"Query string cannot be specified in path and query string!"); 318 } 319 320 if (p_path.indexOf('#') != -1 && p_fragment != null) 321 { 322 throw new MalformedURIException( 323 Utils.messages.createMessage(MsgKey.ER_NO_FRAGMENT_STRING_IN_PATH, null)); //"Fragment cannot be specified in both the path and fragment!"); 324 } 325 } 326 327 setScheme(p_scheme); 328 setHost(p_host); 329 setPort(p_port); 330 setUserinfo(p_userinfo); 331 setPath(p_path); 332 setQueryString(p_queryString); 333 setFragment(p_fragment); 334 } 335 336 /** 337 * Initialize all fields of this URI from another URI. 338 * 339 * @param p_other the URI to copy (cannot be null) 340 */ 341 private void initialize(URI p_other) 342 { 343 344 m_scheme = p_other.getScheme(); 345 m_userinfo = p_other.getUserinfo(); 346 m_host = p_other.getHost(); 347 m_port = p_other.getPort(); 348 m_path = p_other.getPath(); 349 m_queryString = p_other.getQueryString(); 350 m_fragment = p_other.getFragment(); 351 } 352 353 /** 354 * Initializes this URI from a base URI and a URI specification string. 355 * See RFC 2396 Section 4 and Appendix B for specifications on parsing 356 * the URI and Section 5 for specifications on resolving relative URIs 357 * and relative paths. 358 * 359 * @param p_base the base URI (may be null if p_uriSpec is an absolute 360 * URI) 361 * @param p_uriSpec the URI spec string which may be an absolute or 362 * relative URI (can only be null/empty if p_base 363 * is not null) 364 * 365 * @throws MalformedURIException if p_base is null and p_uriSpec 366 * is not an absolute URI or if 367 * p_uriSpec violates syntax rules 368 */ 369 private void initialize(URI p_base, String p_uriSpec) 370 throws MalformedURIException 371 { 372 373 if (p_base == null 374 && (p_uriSpec == null || p_uriSpec.trim().length() == 0)) 375 { 376 throw new MalformedURIException( 377 Utils.messages.createMessage(MsgKey.ER_CANNOT_INIT_URI_EMPTY_PARMS, null)); //"Cannot initialize URI with empty parameters."); 378 } 379 380 // just make a copy of the base if spec is empty 381 if (p_uriSpec == null || p_uriSpec.trim().length() == 0) 382 { 383 initialize(p_base); 384 385 return; 386 } 387 388 String uriSpec = p_uriSpec.trim(); 389 int uriSpecLen = uriSpec.length(); 390 int index = 0; 391 392 // check for scheme 393 int colonIndex = uriSpec.indexOf(':'); 394 if (colonIndex < 0) 395 { 396 if (p_base == null) 397 { 398 throw new MalformedURIException(Utils.messages.createMessage(MsgKey.ER_NO_SCHEME_IN_URI, new Object[]{uriSpec})); //"No scheme found in URI: "+uriSpec); 399 } 400 } 401 else 402 { 403 initializeScheme(uriSpec); 404 uriSpec = uriSpec.substring(colonIndex+1); 405 uriSpecLen = uriSpec.length(); 406 } 407 408 // two slashes means generic URI syntax, so we get the authority 409 if (((index + 1) < uriSpecLen) 410 && (uriSpec.substring(index).startsWith("//"))) 411 { 412 index += 2; 413 414 int startPos = index; 415 416 // get authority - everything up to path, query or fragment 417 char testChar = '\0'; 418 419 while (index < uriSpecLen) 420 { 421 testChar = uriSpec.charAt(index); 422 423 if (testChar == '/' || testChar == '?' || testChar == '#') 424 { 425 break; 426 } 427 428 index++; 429 } 430 431 // if we found authority, parse it out, otherwise we set the 432 // host to empty string 433 if (index > startPos) 434 { 435 initializeAuthority(uriSpec.substring(startPos, index)); 436 } 437 else 438 { 439 m_host = ""; 440 } 441 } 442 443 initializePath(uriSpec.substring(index)); 444 445 // Resolve relative URI to base URI - see RFC 2396 Section 5.2 446 // In some cases, it might make more sense to throw an exception 447 // (when scheme is specified is the string spec and the base URI 448 // is also specified, for example), but we're just following the 449 // RFC specifications 450 if (p_base != null) 451 { 452 453 // check to see if this is the current doc - RFC 2396 5.2 #2 454 // note that this is slightly different from the RFC spec in that 455 // we don't include the check for query string being null 456 // - this handles cases where the urispec is just a query 457 // string or a fragment (e.g. "?y" or "#s") - 458 // see <http://www.ics.uci.edu/~fielding/url/test1.html> which 459 // identified this as a bug in the RFC 460 if (m_path.length() == 0 && m_scheme == null && m_host == null) 461 { 462 m_scheme = p_base.getScheme(); 463 m_userinfo = p_base.getUserinfo(); 464 m_host = p_base.getHost(); 465 m_port = p_base.getPort(); 466 m_path = p_base.getPath(); 467 468 if (m_queryString == null) 469 { 470 m_queryString = p_base.getQueryString(); 471 } 472 473 return; 474 } 475 476 // check for scheme - RFC 2396 5.2 #3 477 // if we found a scheme, it means absolute URI, so we're done 478 if (m_scheme == null) 479 { 480 m_scheme = p_base.getScheme(); 481 } 482 483 // check for authority - RFC 2396 5.2 #4 484 // if we found a host, then we've got a network path, so we're done 485 if (m_host == null) 486 { 487 m_userinfo = p_base.getUserinfo(); 488 m_host = p_base.getHost(); 489 m_port = p_base.getPort(); 490 } 491 else 492 { 493 return; 494 } 495 496 // check for absolute path - RFC 2396 5.2 #5 497 if (m_path.length() > 0 && m_path.startsWith("/")) 498 { 499 return; 500 } 501 502 // if we get to this point, we need to resolve relative path 503 // RFC 2396 5.2 #6 504 String path = ""; 505 String basePath = p_base.getPath(); 506 507 // 6a - get all but the last segment of the base URI path 508 if (basePath != null) 509 { 510 int lastSlash = basePath.lastIndexOf('/'); 511 512 if (lastSlash != -1) 513 { 514 path = basePath.substring(0, lastSlash + 1); 515 } 516 } 517 518 // 6b - append the relative URI path 519 path = path.concat(m_path); 520 521 // 6c - remove all "./" where "." is a complete path segment 522 index = -1; 523 524 while ((index = path.indexOf("/./")) != -1) 525 { 526 path = path.substring(0, index + 1).concat(path.substring(index + 3)); 527 } 528 529 // 6d - remove "." if path ends with "." as a complete path segment 530 if (path.endsWith("/.")) 531 { 532 path = path.substring(0, path.length() - 1); 533 } 534 535 // 6e - remove all "<segment>/../" where "<segment>" is a complete 536 // path segment not equal to ".." 537 index = -1; 538 539 int segIndex = -1; 540 String tempString = null; 541 542 while ((index = path.indexOf("/../")) > 0) 543 { 544 tempString = path.substring(0, path.indexOf("/../")); 545 segIndex = tempString.lastIndexOf('/'); 546 547 if (segIndex != -1) 548 { 549 if (!tempString.substring(segIndex++).equals("..")) 550 { 551 path = path.substring(0, segIndex).concat(path.substring(index 552 + 4)); 553 } 554 } 555 } 556 557 // 6f - remove ending "<segment>/.." where "<segment>" is a 558 // complete path segment 559 if (path.endsWith("/..")) 560 { 561 tempString = path.substring(0, path.length() - 3); 562 segIndex = tempString.lastIndexOf('/'); 563 564 if (segIndex != -1) 565 { 566 path = path.substring(0, segIndex + 1); 567 } 568 } 569 570 m_path = path; 571 } 572 } 573 574 /** 575 * Initialize the scheme for this URI from a URI string spec. 576 * 577 * @param p_uriSpec the URI specification (cannot be null) 578 * 579 * @throws MalformedURIException if URI does not have a conformant 580 * scheme 581 */ 582 private void initializeScheme(String p_uriSpec) throws MalformedURIException 583 { 584 585 int uriSpecLen = p_uriSpec.length(); 586 int index = 0; 587 String scheme = null; 588 char testChar = '\0'; 589 590 while (index < uriSpecLen) 591 { 592 testChar = p_uriSpec.charAt(index); 593 594 if (testChar == ':' || testChar == '/' || testChar == '?' 595 || testChar == '#') 596 { 597 break; 598 } 599 600 index++; 601 } 602 603 scheme = p_uriSpec.substring(0, index); 604 605 if (scheme.length() == 0) 606 { 607 throw new MalformedURIException(Utils.messages.createMessage(MsgKey.ER_NO_SCHEME_INURI, null)); //"No scheme found in URI."); 608 } 609 else 610 { 611 setScheme(scheme); 612 } 613 } 614 615 /** 616 * Initialize the authority (userinfo, host and port) for this 617 * URI from a URI string spec. 618 * 619 * @param p_uriSpec the URI specification (cannot be null) 620 * 621 * @throws MalformedURIException if p_uriSpec violates syntax rules 622 */ 623 private void initializeAuthority(String p_uriSpec) 624 throws MalformedURIException 625 { 626 627 int index = 0; 628 int start = 0; 629 int end = p_uriSpec.length(); 630 char testChar = '\0'; 631 String userinfo = null; 632 633 // userinfo is everything up @ 634 if (p_uriSpec.indexOf('@', start) != -1) 635 { 636 while (index < end) 637 { 638 testChar = p_uriSpec.charAt(index); 639 640 if (testChar == '@') 641 { 642 break; 643 } 644 645 index++; 646 } 647 648 userinfo = p_uriSpec.substring(start, index); 649 650 index++; 651 } 652 653 // host is everything up to ':' 654 String host = null; 655 656 start = index; 657 658 while (index < end) 659 { 660 testChar = p_uriSpec.charAt(index); 661 662 if (testChar == ':') 663 { 664 break; 665 } 666 667 index++; 668 } 669 670 host = p_uriSpec.substring(start, index); 671 672 int port = -1; 673 674 if (host.length() > 0) 675 { 676 677 // port 678 if (testChar == ':') 679 { 680 index++; 681 682 start = index; 683 684 while (index < end) 685 { 686 index++; 687 } 688 689 String portStr = p_uriSpec.substring(start, index); 690 691 if (portStr.length() > 0) 692 { 693 for (int i = 0; i < portStr.length(); i++) 694 { 695 if (!isDigit(portStr.charAt(i))) 696 { 697 throw new MalformedURIException( 698 portStr + " is invalid. Port should only contain digits!"); 699 } 700 } 701 702 try 703 { 704 port = Integer.parseInt(portStr); 705 } 706 catch (NumberFormatException nfe) 707 { 708 709 // can't happen 710 } 711 } 712 } 713 } 714 715 setHost(host); 716 setPort(port); 717 setUserinfo(userinfo); 718 } 719 720 /** 721 * Initialize the path for this URI from a URI string spec. 722 * 723 * @param p_uriSpec the URI specification (cannot be null) 724 * 725 * @throws MalformedURIException if p_uriSpec violates syntax rules 726 */ 727 private void initializePath(String p_uriSpec) throws MalformedURIException 728 { 729 730 if (p_uriSpec == null) 731 { 732 throw new MalformedURIException( 733 "Cannot initialize path from null string!"); 734 } 735 736 int index = 0; 737 int start = 0; 738 int end = p_uriSpec.length(); 739 char testChar = '\0'; 740 741 // path - everything up to query string or fragment 742 while (index < end) 743 { 744 testChar = p_uriSpec.charAt(index); 745 746 if (testChar == '?' || testChar == '#') 747 { 748 break; 749 } 750 751 // check for valid escape sequence 752 if (testChar == '%') 753 { 754 if (index + 2 >= end ||!isHex(p_uriSpec.charAt(index + 1)) 755 ||!isHex(p_uriSpec.charAt(index + 2))) 756 { 757 throw new MalformedURIException( 758 Utils.messages.createMessage(MsgKey.ER_PATH_CONTAINS_INVALID_ESCAPE_SEQUENCE, null)); //"Path contains invalid escape sequence!"); 759 } 760 } 761 else if (!isReservedCharacter(testChar) 762 &&!isUnreservedCharacter(testChar)) 763 { 764 if ('\\' != testChar) 765 throw new MalformedURIException(Utils.messages.createMessage(MsgKey.ER_PATH_INVALID_CHAR, new Object[]{String.valueOf(testChar)})); //"Path contains invalid character: " 766 //+ testChar); 767 } 768 769 index++; 770 } 771 772 m_path = p_uriSpec.substring(start, index); 773 774 // query - starts with ? and up to fragment or end 775 if (testChar == '?') 776 { 777 index++; 778 779 start = index; 780 781 while (index < end) 782 { 783 testChar = p_uriSpec.charAt(index); 784 785 if (testChar == '#') 786 { 787 break; 788 } 789 790 if (testChar == '%') 791 { 792 if (index + 2 >= end ||!isHex(p_uriSpec.charAt(index + 1)) 793 ||!isHex(p_uriSpec.charAt(index + 2))) 794 { 795 throw new MalformedURIException( 796 "Query string contains invalid escape sequence!"); 797 } 798 } 799 else if (!isReservedCharacter(testChar) 800 &&!isUnreservedCharacter(testChar)) 801 { 802 throw new MalformedURIException( 803 "Query string contains invalid character:" + testChar); 804 } 805 806 index++; 807 } 808 809 m_queryString = p_uriSpec.substring(start, index); 810 } 811 812 // fragment - starts with # 813 if (testChar == '#') 814 { 815 index++; 816 817 start = index; 818 819 while (index < end) 820 { 821 testChar = p_uriSpec.charAt(index); 822 823 if (testChar == '%') 824 { 825 if (index + 2 >= end ||!isHex(p_uriSpec.charAt(index + 1)) 826 ||!isHex(p_uriSpec.charAt(index + 2))) 827 { 828 throw new MalformedURIException( 829 "Fragment contains invalid escape sequence!"); 830 } 831 } 832 else if (!isReservedCharacter(testChar) 833 &&!isUnreservedCharacter(testChar)) 834 { 835 throw new MalformedURIException( 836 "Fragment contains invalid character:" + testChar); 837 } 838 839 index++; 840 } 841 842 m_fragment = p_uriSpec.substring(start, index); 843 } 844 } 845 846 /** 847 * Get the scheme for this URI. 848 * 849 * @return the scheme for this URI 850 */ 851 public String getScheme() 852 { 853 return m_scheme; 854 } 855 856 /** 857 * Get the scheme-specific part for this URI (everything following the 858 * scheme and the first colon). See RFC 2396 Section 5.2 for spec. 859 * 860 * @return the scheme-specific part for this URI 861 */ 862 public String getSchemeSpecificPart() 863 { 864 865 final StringBuilder schemespec = new StringBuilder(); 866 867 if (m_userinfo != null || m_host != null || m_port != -1) 868 { 869 schemespec.append("//"); 870 } 871 872 if (m_userinfo != null) 873 { 874 schemespec.append(m_userinfo); 875 schemespec.append('@'); 876 } 877 878 if (m_host != null) 879 { 880 schemespec.append(m_host); 881 } 882 883 if (m_port != -1) 884 { 885 schemespec.append(':'); 886 schemespec.append(m_port); 887 } 888 889 if (m_path != null) 890 { 891 schemespec.append((m_path)); 892 } 893 894 if (m_queryString != null) 895 { 896 schemespec.append('?'); 897 schemespec.append(m_queryString); 898 } 899 900 if (m_fragment != null) 901 { 902 schemespec.append('#'); 903 schemespec.append(m_fragment); 904 } 905 906 return schemespec.toString(); 907 } 908 909 /** 910 * Get the userinfo for this URI. 911 * 912 * @return the userinfo for this URI (null if not specified). 913 */ 914 public String getUserinfo() 915 { 916 return m_userinfo; 917 } 918 919 /** 920 * Get the host for this URI. 921 * 922 * @return the host for this URI (null if not specified). 923 */ 924 public String getHost() 925 { 926 return m_host; 927 } 928 929 /** 930 * Get the port for this URI. 931 * 932 * @return the port for this URI (-1 if not specified). 933 */ 934 public int getPort() 935 { 936 return m_port; 937 } 938 939 /** 940 * Get the path for this URI (optionally with the query string and 941 * fragment). 942 * 943 * @param p_includeQueryString if true (and query string is not null), 944 * then a "?" followed by the query string 945 * will be appended 946 * @param p_includeFragment if true (and fragment is not null), 947 * then a "#" followed by the fragment 948 * will be appended 949 * 950 * @return the path for this URI possibly including the query string 951 * and fragment 952 */ 953 public String getPath(boolean p_includeQueryString, 954 boolean p_includeFragment) 955 { 956 957 final StringBuilder pathString = new StringBuilder(m_path); 958 959 if (p_includeQueryString && m_queryString != null) 960 { 961 pathString.append('?'); 962 pathString.append(m_queryString); 963 } 964 965 if (p_includeFragment && m_fragment != null) 966 { 967 pathString.append('#'); 968 pathString.append(m_fragment); 969 } 970 971 return pathString.toString(); 972 } 973 974 /** 975 * Get the path for this URI. Note that the value returned is the path 976 * only and does not include the query string or fragment. 977 * 978 * @return the path for this URI. 979 */ 980 public String getPath() 981 { 982 return m_path; 983 } 984 985 /** 986 * Get the query string for this URI. 987 * 988 * @return the query string for this URI. Null is returned if there 989 * was no "?" in the URI spec, empty string if there was a 990 * "?" but no query string following it. 991 */ 992 public String getQueryString() 993 { 994 return m_queryString; 995 } 996 997 /** 998 * Get the fragment for this URI. 999 * 1000 * @return the fragment for this URI. Null is returned if there 1001 * was no "#" in the URI spec, empty string if there was a 1002 * "#" but no fragment following it. 1003 */ 1004 public String getFragment() 1005 { 1006 return m_fragment; 1007 } 1008 1009 /** 1010 * Set the scheme for this URI. The scheme is converted to lowercase 1011 * before it is set. 1012 * 1013 * @param p_scheme the scheme for this URI (cannot be null) 1014 * 1015 * @throws MalformedURIException if p_scheme is not a conformant 1016 * scheme name 1017 */ 1018 public void setScheme(String p_scheme) throws MalformedURIException 1019 { 1020 1021 if (p_scheme == null) 1022 { 1023 throw new MalformedURIException(Utils.messages.createMessage(MsgKey.ER_SCHEME_FROM_NULL_STRING, null)); //"Cannot set scheme from null string!"); 1024 } 1025 1026 if (!isConformantSchemeName(p_scheme)) 1027 { 1028 throw new MalformedURIException(Utils.messages.createMessage(MsgKey.ER_SCHEME_NOT_CONFORMANT, null)); //"The scheme is not conformant."); 1029 } 1030 1031 m_scheme = p_scheme.toLowerCase(); 1032 } 1033 1034 /** 1035 * Set the userinfo for this URI. If a non-null value is passed in and 1036 * the host value is null, then an exception is thrown. 1037 * 1038 * @param p_userinfo the userinfo for this URI 1039 * 1040 * @throws MalformedURIException if p_userinfo contains invalid 1041 * characters 1042 */ 1043 public void setUserinfo(String p_userinfo) throws MalformedURIException 1044 { 1045 1046 if (p_userinfo == null) 1047 { 1048 m_userinfo = null; 1049 } 1050 else 1051 { 1052 if (m_host == null) 1053 { 1054 throw new MalformedURIException( 1055 "Userinfo cannot be set when host is null!"); 1056 } 1057 1058 // userinfo can contain alphanumerics, mark characters, escaped 1059 // and ';',':','&','=','+','$',',' 1060 int index = 0; 1061 int end = p_userinfo.length(); 1062 char testChar = '\0'; 1063 1064 while (index < end) 1065 { 1066 testChar = p_userinfo.charAt(index); 1067 1068 if (testChar == '%') 1069 { 1070 if (index + 2 >= end ||!isHex(p_userinfo.charAt(index + 1)) 1071 ||!isHex(p_userinfo.charAt(index + 2))) 1072 { 1073 throw new MalformedURIException( 1074 "Userinfo contains invalid escape sequence!"); 1075 } 1076 } 1077 else if (!isUnreservedCharacter(testChar) 1078 && USERINFO_CHARACTERS.indexOf(testChar) == -1) 1079 { 1080 throw new MalformedURIException( 1081 "Userinfo contains invalid character:" + testChar); 1082 } 1083 1084 index++; 1085 } 1086 } 1087 1088 m_userinfo = p_userinfo; 1089 } 1090 1091 /** 1092 * Set the host for this URI. If null is passed in, the userinfo 1093 * field is also set to null and the port is set to -1. 1094 * 1095 * @param p_host the host for this URI 1096 * 1097 * @throws MalformedURIException if p_host is not a valid IP 1098 * address or DNS hostname. 1099 */ 1100 public void setHost(String p_host) throws MalformedURIException 1101 { 1102 1103 if (p_host == null || p_host.trim().length() == 0) 1104 { 1105 m_host = p_host; 1106 m_userinfo = null; 1107 m_port = -1; 1108 } 1109 else if (!isWellFormedAddress(p_host)) 1110 { 1111 throw new MalformedURIException(Utils.messages.createMessage(MsgKey.ER_HOST_ADDRESS_NOT_WELLFORMED, null)); //"Host is not a well formed address!"); 1112 } 1113 1114 m_host = p_host; 1115 } 1116 1117 /** 1118 * Set the port for this URI. -1 is used to indicate that the port is 1119 * not specified, otherwise valid port numbers are between 0 and 65535. 1120 * If a valid port number is passed in and the host field is null, 1121 * an exception is thrown. 1122 * 1123 * @param p_port the port number for this URI 1124 * 1125 * @throws MalformedURIException if p_port is not -1 and not a 1126 * valid port number 1127 */ 1128 public void setPort(int p_port) throws MalformedURIException 1129 { 1130 1131 if (p_port >= 0 && p_port <= 65535) 1132 { 1133 if (m_host == null) 1134 { 1135 throw new MalformedURIException( 1136 Utils.messages.createMessage(MsgKey.ER_PORT_WHEN_HOST_NULL, null)); //"Port cannot be set when host is null!"); 1137 } 1138 } 1139 else if (p_port != -1) 1140 { 1141 throw new MalformedURIException(Utils.messages.createMessage(MsgKey.ER_INVALID_PORT, null)); //"Invalid port number!"); 1142 } 1143 1144 m_port = p_port; 1145 } 1146 1147 /** 1148 * Set the path for this URI. If the supplied path is null, then the 1149 * query string and fragment are set to null as well. If the supplied 1150 * path includes a query string and/or fragment, these fields will be 1151 * parsed and set as well. Note that, for URIs following the "generic 1152 * URI" syntax, the path specified should start with a slash. 1153 * For URIs that do not follow the generic URI syntax, this method 1154 * sets the scheme-specific part. 1155 * 1156 * @param p_path the path for this URI (may be null) 1157 * 1158 * @throws MalformedURIException if p_path contains invalid 1159 * characters 1160 */ 1161 public void setPath(String p_path) throws MalformedURIException 1162 { 1163 1164 if (p_path == null) 1165 { 1166 m_path = null; 1167 m_queryString = null; 1168 m_fragment = null; 1169 } 1170 else 1171 { 1172 initializePath(p_path); 1173 } 1174 } 1175 1176 /** 1177 * Append to the end of the path of this URI. If the current path does 1178 * not end in a slash and the path to be appended does not begin with 1179 * a slash, a slash will be appended to the current path before the 1180 * new segment is added. Also, if the current path ends in a slash 1181 * and the new segment begins with a slash, the extra slash will be 1182 * removed before the new segment is appended. 1183 * 1184 * @param p_addToPath the new segment to be added to the current path 1185 * 1186 * @throws MalformedURIException if p_addToPath contains syntax 1187 * errors 1188 */ 1189 public void appendPath(String p_addToPath) throws MalformedURIException 1190 { 1191 1192 if (p_addToPath == null || p_addToPath.trim().length() == 0) 1193 { 1194 return; 1195 } 1196 1197 if (!isURIString(p_addToPath)) 1198 { 1199 throw new MalformedURIException(Utils.messages.createMessage(MsgKey.ER_PATH_INVALID_CHAR, new Object[]{p_addToPath})); //"Path contains invalid character!"); 1200 } 1201 1202 if (m_path == null || m_path.trim().length() == 0) 1203 { 1204 if (p_addToPath.startsWith("/")) 1205 { 1206 m_path = p_addToPath; 1207 } 1208 else 1209 { 1210 m_path = "/" + p_addToPath; 1211 } 1212 } 1213 else if (m_path.endsWith("/")) 1214 { 1215 if (p_addToPath.startsWith("/")) 1216 { 1217 m_path = m_path.concat(p_addToPath.substring(1)); 1218 } 1219 else 1220 { 1221 m_path = m_path.concat(p_addToPath); 1222 } 1223 } 1224 else 1225 { 1226 if (p_addToPath.startsWith("/")) 1227 { 1228 m_path = m_path.concat(p_addToPath); 1229 } 1230 else 1231 { 1232 m_path = m_path.concat("/" + p_addToPath); 1233 } 1234 } 1235 } 1236 1237 /** 1238 * Set the query string for this URI. A non-null value is valid only 1239 * if this is an URI conforming to the generic URI syntax and 1240 * the path value is not null. 1241 * 1242 * @param p_queryString the query string for this URI 1243 * 1244 * @throws MalformedURIException if p_queryString is not null and this 1245 * URI does not conform to the generic 1246 * URI syntax or if the path is null 1247 */ 1248 public void setQueryString(String p_queryString) 1249 throws MalformedURIException 1250 { 1251 1252 if (p_queryString == null) 1253 { 1254 m_queryString = null; 1255 } 1256 else if (!isGenericURI()) 1257 { 1258 throw new MalformedURIException( 1259 "Query string can only be set for a generic URI!"); 1260 } 1261 else if (getPath() == null) 1262 { 1263 throw new MalformedURIException( 1264 "Query string cannot be set when path is null!"); 1265 } 1266 else if (!isURIString(p_queryString)) 1267 { 1268 throw new MalformedURIException( 1269 "Query string contains invalid character!"); 1270 } 1271 else 1272 { 1273 m_queryString = p_queryString; 1274 } 1275 } 1276 1277 /** 1278 * Set the fragment for this URI. A non-null value is valid only 1279 * if this is a URI conforming to the generic URI syntax and 1280 * the path value is not null. 1281 * 1282 * @param p_fragment the fragment for this URI 1283 * 1284 * @throws MalformedURIException if p_fragment is not null and this 1285 * URI does not conform to the generic 1286 * URI syntax or if the path is null 1287 */ 1288 public void setFragment(String p_fragment) throws MalformedURIException 1289 { 1290 1291 if (p_fragment == null) 1292 { 1293 m_fragment = null; 1294 } 1295 else if (!isGenericURI()) 1296 { 1297 throw new MalformedURIException( 1298 Utils.messages.createMessage(MsgKey.ER_FRAG_FOR_GENERIC_URI, null)); //"Fragment can only be set for a generic URI!"); 1299 } 1300 else if (getPath() == null) 1301 { 1302 throw new MalformedURIException( 1303 Utils.messages.createMessage(MsgKey.ER_FRAG_WHEN_PATH_NULL, null)); //"Fragment cannot be set when path is null!"); 1304 } 1305 else if (!isURIString(p_fragment)) 1306 { 1307 throw new MalformedURIException(Utils.messages.createMessage(MsgKey.ER_FRAG_INVALID_CHAR, null)); //"Fragment contains invalid character!"); 1308 } 1309 else 1310 { 1311 m_fragment = p_fragment; 1312 } 1313 } 1314 1315 /** 1316 * Determines if the passed-in Object is equivalent to this URI. 1317 * 1318 * @param p_test the Object to test for equality. 1319 * 1320 * @return true if p_test is a URI with all values equal to this 1321 * URI, false otherwise 1322 */ 1323 @Override 1324 public boolean equals(Object p_test) 1325 { 1326 1327 if (p_test instanceof URI) 1328 { 1329 URI testURI = (URI) p_test; 1330 1331 if (((m_scheme == null && testURI.m_scheme == null) || (m_scheme != null && testURI.m_scheme != null && m_scheme.equals( 1332 testURI.m_scheme))) && ((m_userinfo == null && testURI.m_userinfo == null) || (m_userinfo != null && testURI.m_userinfo != null && m_userinfo.equals( 1333 testURI.m_userinfo))) && ((m_host == null && testURI.m_host == null) || (m_host != null && testURI.m_host != null && m_host.equals( 1334 testURI.m_host))) && m_port == testURI.m_port && ((m_path == null && testURI.m_path == null) || (m_path != null && testURI.m_path != null && m_path.equals( 1335 testURI.m_path))) && ((m_queryString == null && testURI.m_queryString == null) || (m_queryString != null && testURI.m_queryString != null && m_queryString.equals( 1336 testURI.m_queryString))) && ((m_fragment == null && testURI.m_fragment == null) || (m_fragment != null && testURI.m_fragment != null && m_fragment.equals( 1337 testURI.m_fragment)))) 1338 { 1339 return true; 1340 } 1341 } 1342 1343 return false; 1344 } 1345 1346 @Override 1347 public int hashCode() { 1348 int hash = 5; 1349 hash = 41 * hash + Objects.hashCode(this.m_scheme); 1350 hash = 41 * hash + Objects.hashCode(this.m_userinfo); 1351 hash = 41 * hash + Objects.hashCode(this.m_host); 1352 hash = 41 * hash + this.m_port; 1353 hash = 41 * hash + Objects.hashCode(this.m_path); 1354 hash = 41 * hash + Objects.hashCode(this.m_queryString); 1355 hash = 41 * hash + Objects.hashCode(this.m_fragment); 1356 return hash; 1357 } 1358 1359 /** 1360 * Get the URI as a string specification. See RFC 2396 Section 5.2. 1361 * 1362 * @return the URI string specification 1363 */ 1364 @Override 1365 public String toString() 1366 { 1367 1368 final StringBuilder uriSpecString = new StringBuilder(); 1369 1370 if (m_scheme != null) 1371 { 1372 uriSpecString.append(m_scheme); 1373 uriSpecString.append(':'); 1374 } 1375 1376 uriSpecString.append(getSchemeSpecificPart()); 1377 1378 return uriSpecString.toString(); 1379 } 1380 1381 /** 1382 * Get the indicator as to whether this URI uses the "generic URI" 1383 * syntax. 1384 * 1385 * @return true if this URI uses the "generic URI" syntax, false 1386 * otherwise 1387 */ 1388 public boolean isGenericURI() 1389 { 1390 1391 // presence of the host (whether valid or empty) means 1392 // double-slashes which means generic uri 1393 return (m_host != null); 1394 } 1395 1396 /** 1397 * Determine whether a scheme conforms to the rules for a scheme name. 1398 * A scheme is conformant if it starts with an alphanumeric, and 1399 * contains only alphanumerics, '+','-' and '.'. 1400 * 1401 * 1402 * @param p_scheme The sheme name to check 1403 * @return true if the scheme is conformant, false otherwise 1404 */ 1405 public static boolean isConformantSchemeName(String p_scheme) 1406 { 1407 1408 if (p_scheme == null || p_scheme.trim().length() == 0) 1409 { 1410 return false; 1411 } 1412 1413 if (!isAlpha(p_scheme.charAt(0))) 1414 { 1415 return false; 1416 } 1417 1418 char testChar; 1419 1420 for (int i = 1; i < p_scheme.length(); i++) 1421 { 1422 testChar = p_scheme.charAt(i); 1423 1424 if (!isAlphanum(testChar) && SCHEME_CHARACTERS.indexOf(testChar) == -1) 1425 { 1426 return false; 1427 } 1428 } 1429 1430 return true; 1431 } 1432 1433 /** 1434 * Determine whether a string is syntactically capable of representing 1435 * a valid IPv4 address or the domain name of a network host. A valid 1436 * IPv4 address consists of four decimal digit groups separated by a 1437 * '.'. A hostname consists of domain labels (each of which must 1438 * begin and end with an alphanumeric but may contain '-') separated 1439 * & by a '.'. See RFC 2396 Section 3.2.2. 1440 * 1441 * 1442 * @param p_address The address string to check 1443 * @return true if the string is a syntactically valid IPv4 address 1444 * or hostname 1445 */ 1446 public static boolean isWellFormedAddress(String p_address) 1447 { 1448 1449 if (p_address == null) 1450 { 1451 return false; 1452 } 1453 1454 String address = p_address.trim(); 1455 int addrLength = address.length(); 1456 1457 if (addrLength == 0 || addrLength > 255) 1458 { 1459 return false; 1460 } 1461 1462 if (address.startsWith(".") || address.startsWith("-")) 1463 { 1464 return false; 1465 } 1466 1467 // rightmost domain label starting with digit indicates IP address 1468 // since top level domain label can only start with an alpha 1469 // see RFC 2396 Section 3.2.2 1470 int index = address.lastIndexOf('.'); 1471 1472 if (address.endsWith(".")) 1473 { 1474 index = address.substring(0, index).lastIndexOf('.'); 1475 } 1476 1477 if (index + 1 < addrLength && isDigit(p_address.charAt(index + 1))) 1478 { 1479 char testChar; 1480 int numDots = 0; 1481 1482 // make sure that 1) we see only digits and dot separators, 2) that 1483 // any dot separator is preceded and followed by a digit and 1484 // 3) that we find 3 dots 1485 for (int i = 0; i < addrLength; i++) 1486 { 1487 testChar = address.charAt(i); 1488 1489 if (testChar == '.') 1490 { 1491 if (!isDigit(address.charAt(i - 1)) 1492 || (i + 1 < addrLength &&!isDigit(address.charAt(i + 1)))) 1493 { 1494 return false; 1495 } 1496 1497 numDots++; 1498 } 1499 else if (!isDigit(testChar)) 1500 { 1501 return false; 1502 } 1503 } 1504 1505 if (numDots != 3) 1506 { 1507 return false; 1508 } 1509 } 1510 else 1511 { 1512 1513 // domain labels can contain alphanumerics and '-" 1514 // but must start and end with an alphanumeric 1515 char testChar; 1516 1517 for (int i = 0; i < addrLength; i++) 1518 { 1519 testChar = address.charAt(i); 1520 1521 if (testChar == '.') 1522 { 1523 if (!isAlphanum(address.charAt(i - 1))) 1524 { 1525 return false; 1526 } 1527 1528 if (i + 1 < addrLength &&!isAlphanum(address.charAt(i + 1))) 1529 { 1530 return false; 1531 } 1532 } 1533 else if (!isAlphanum(testChar) && testChar != '-') 1534 { 1535 return false; 1536 } 1537 } 1538 } 1539 1540 return true; 1541 } 1542 1543 /** 1544 * Determine whether a char is a digit. 1545 * 1546 * 1547 * @param p_char the character to check 1548 * @return true if the char is betweeen '0' and '9', false otherwise 1549 */ 1550 private static boolean isDigit(char p_char) 1551 { 1552 return p_char >= '0' && p_char <= '9'; 1553 } 1554 1555 /** 1556 * Determine whether a character is a hexadecimal character. 1557 * 1558 * 1559 * @param p_char the character to check 1560 * @return true if the char is between '0' and '9', 'a' and 'f' 1561 * or 'A' and 'F', false otherwise 1562 */ 1563 private static boolean isHex(char p_char) 1564 { 1565 return (isDigit(p_char) || (p_char >= 'a' && p_char <= 'f') 1566 || (p_char >= 'A' && p_char <= 'F')); 1567 } 1568 1569 /** 1570 * Determine whether a char is an alphabetic character: a-z or A-Z 1571 * 1572 * 1573 * @param p_char the character to check 1574 * @return true if the char is alphabetic, false otherwise 1575 */ 1576 private static boolean isAlpha(char p_char) 1577 { 1578 return ((p_char >= 'a' && p_char <= 'z') 1579 || (p_char >= 'A' && p_char <= 'Z')); 1580 } 1581 1582 /** 1583 * Determine whether a char is an alphanumeric: 0-9, a-z or A-Z 1584 * 1585 * 1586 * @param p_char the character to check 1587 * @return true if the char is alphanumeric, false otherwise 1588 */ 1589 private static boolean isAlphanum(char p_char) 1590 { 1591 return (isAlpha(p_char) || isDigit(p_char)); 1592 } 1593 1594 /** 1595 * Determine whether a character is a reserved character: 1596 * ';', '/', '?', ':', '@', '&', '=', '+', '$' or ',' 1597 * 1598 * 1599 * @param p_char the character to check 1600 * @return true if the string contains any reserved characters 1601 */ 1602 private static boolean isReservedCharacter(char p_char) 1603 { 1604 return RESERVED_CHARACTERS.indexOf(p_char) != -1; 1605 } 1606 1607 /** 1608 * Determine whether a char is an unreserved character. 1609 * 1610 * 1611 * @param p_char the character to check 1612 * @return true if the char is unreserved, false otherwise 1613 */ 1614 private static boolean isUnreservedCharacter(char p_char) 1615 { 1616 return (isAlphanum(p_char) || MARK_CHARACTERS.indexOf(p_char) != -1); 1617 } 1618 1619 /** 1620 * Determine whether a given string contains only URI characters (also 1621 * called "uric" in RFC 2396). uric consist of all reserved 1622 * characters, unreserved characters and escaped characters. 1623 * 1624 * 1625 * @param p_uric URI string 1626 * @return true if the string is comprised of uric, false otherwise 1627 */ 1628 private static boolean isURIString(String p_uric) 1629 { 1630 1631 if (p_uric == null) 1632 { 1633 return false; 1634 } 1635 1636 int end = p_uric.length(); 1637 char testChar = '\0'; 1638 1639 for (int i = 0; i < end; i++) 1640 { 1641 testChar = p_uric.charAt(i); 1642 1643 if (testChar == '%') 1644 { 1645 if (i + 2 >= end ||!isHex(p_uric.charAt(i + 1)) 1646 ||!isHex(p_uric.charAt(i + 2))) 1647 { 1648 return false; 1649 } 1650 else 1651 { 1652 i += 2; 1653 1654 continue; 1655 } 1656 } 1657 1658 if (isReservedCharacter(testChar) || isUnreservedCharacter(testChar)) 1659 { 1660 continue; 1661 } 1662 else 1663 { 1664 return false; 1665 } 1666 } 1667 1668 return true; 1669 } 1670} 1671