1/* 2 * Copyright 2010 Haiku Inc. All rights reserved. 3 * Distributed under the terms of the MIT License. 4 * 5 * Authors: 6 * Christophe Huriaux, c.huriaux@gmail.com 7 */ 8 9 10#include <ctype.h> 11#include <cstdio> 12#include <cstdlib> 13#include <new> 14 15#include <Url.h> 16 17static const char* kArchivedUrl = "be:url string"; 18 19 20BUrl::BUrl(const char* url) 21 : 22 fUrlString(), 23 fProtocol(), 24 fUser(), 25 fPassword(), 26 fHost(), 27 fPort(0), 28 fPath(), 29 fRequest(), 30 fHasAuthority(false) 31{ 32 SetUrlString(url); 33} 34 35 36BUrl::BUrl(BMessage* archive) 37 : 38 fUrlString(), 39 fProtocol(), 40 fUser(), 41 fPassword(), 42 fHost(), 43 fPort(0), 44 fPath(), 45 fRequest(), 46 fHasAuthority(false) 47{ 48 BString url; 49 50 if (archive->FindString(kArchivedUrl, &url) == B_OK) 51 SetUrlString(url); 52} 53 54 55BUrl::BUrl(const BUrl& other) 56 : 57 BArchivable(), 58 fUrlString(), 59 fProtocol(), 60 fUser(), 61 fPassword(), 62 fHost(), 63 fPort(0), 64 fPath(), 65 fRequest(), 66 fHasAuthority(false) 67{ 68 *this = other; 69} 70 71 72BUrl::BUrl() 73 : 74 fUrlString(), 75 fProtocol(), 76 fUser(), 77 fPassword(), 78 fHost(), 79 fPort(0), 80 fPath(), 81 fRequest(), 82 fHasAuthority(false) 83{ 84} 85 86 87BUrl::~BUrl() 88{ 89} 90 91 92// #pragma mark URL fields modifiers 93 94 95BUrl& 96BUrl::SetUrlString(const BString& url) 97{ 98 _ExplodeUrlString(url); 99 return *this; 100} 101 102 103BUrl& 104BUrl::SetProtocol(const BString& protocol) 105{ 106 fProtocol = protocol; 107 fHasProtocol = true; 108 fUrlStringValid = false; 109 return *this; 110} 111 112 113BUrl& 114BUrl::SetUserName(const BString& user) 115{ 116 fUser = user; 117 fHasUserName = true; 118 fUrlStringValid = false; 119 fAuthorityValid = false; 120 fUserInfoValid = false; 121 return *this; 122} 123 124 125BUrl& 126BUrl::SetPassword(const BString& password) 127{ 128 fPassword = password; 129 fHasPassword = true; 130 fUrlStringValid = false; 131 fAuthorityValid = false; 132 fUserInfoValid = false; 133 return *this; 134} 135 136 137BUrl& 138BUrl::SetHost(const BString& host) 139{ 140 fHost = host; 141 fHasHost = true; 142 fUrlStringValid = false; 143 fAuthorityValid = false; 144 return *this; 145} 146 147 148BUrl& 149BUrl::SetPort(int port) 150{ 151 fPort = port; 152 fHasPort = true; 153 fUrlStringValid = false; 154 fAuthorityValid = false; 155 return *this; 156} 157 158 159BUrl& 160BUrl::SetPath(const BString& path) 161{ 162 fPath = path; 163 fHasPath = true; 164 fUrlStringValid = false; 165 return *this; 166} 167 168 169BUrl& 170BUrl::SetRequest(const BString& request) 171{ 172 fRequest = request; 173 fHasRequest = true; 174 fUrlStringValid = false; 175 return *this; 176} 177 178 179BUrl& 180BUrl::SetFragment(const BString& fragment) 181{ 182 fFragment = fragment; 183 fHasFragment = true; 184 fUrlStringValid = false; 185 return *this; 186} 187 188 189// #pragma mark URL fields access 190 191 192const BString& 193BUrl::UrlString() const 194{ 195 if (!fUrlStringValid) { 196 fUrlString.Truncate(0); 197 198 if (HasProtocol()) { 199 fUrlString << fProtocol << ':'; 200 if (HasAuthority()) 201 fUrlString << "//"; 202 } 203 204 fUrlString << Authority(); 205 fUrlString << Path(); 206 207 if (HasRequest()) 208 fUrlString << '?' << fRequest; 209 210 if (HasFragment()) 211 fUrlString << '#' << fFragment; 212 213 fUrlStringValid = true; 214 } 215 216 return fUrlString; 217} 218 219 220const BString& 221BUrl::Protocol() const 222{ 223 return fProtocol; 224} 225 226 227const BString& 228BUrl::UserName() const 229{ 230 return fUser; 231} 232 233 234const BString& 235BUrl::Password() const 236{ 237 return fPassword; 238} 239 240 241const BString& 242BUrl::UserInfo() const 243{ 244 if (!fUserInfoValid) { 245 fUserInfo = fUser; 246 247 if (HasPassword()) 248 fUserInfo << ':' << fPassword; 249 250 fUserInfoValid = true; 251 } 252 253 return fUserInfo; 254} 255 256 257const BString& 258BUrl::Host() const 259{ 260 return fHost; 261} 262 263 264int 265BUrl::Port() const 266{ 267 return fPort; 268} 269 270 271const BString& 272BUrl::Authority() const 273{ 274 if (!fAuthorityValid) { 275 fAuthority.Truncate(0); 276 277 if (HasUserInfo()) 278 fAuthority << UserInfo() << '@'; 279 fAuthority << Host(); 280 281 if (HasPort()) 282 fAuthority << ':' << fPort; 283 284 fAuthorityValid = true; 285 } 286 return fAuthority; 287} 288 289 290const BString& 291BUrl::Path() const 292{ 293 return fPath; 294} 295 296 297const BString& 298BUrl::Request() const 299{ 300 return fRequest; 301} 302 303 304const BString& 305BUrl::Fragment() const 306{ 307 return fFragment; 308} 309 310 311// #pragma mark URL fields tests 312 313 314bool 315BUrl::IsValid() const 316{ 317 // TODO 318 return false; 319} 320 321 322bool 323BUrl::HasProtocol() const 324{ 325 return fHasProtocol; 326} 327 328 329bool 330BUrl::HasAuthority() const 331{ 332 return fHasAuthority; 333} 334 335 336bool 337BUrl::HasUserName() const 338{ 339 return fHasUserName; 340} 341 342 343bool 344BUrl::HasPassword() const 345{ 346 return fHasPassword; 347} 348 349 350bool 351BUrl::HasUserInfo() const 352{ 353 return fHasUserInfo; 354} 355 356 357bool 358BUrl::HasHost() const 359{ 360 return fHasHost; 361} 362 363 364bool 365BUrl::HasPort() const 366{ 367 return fHasPort; 368} 369 370 371bool 372BUrl::HasPath() const 373{ 374 return fHasPath; 375} 376 377 378bool 379BUrl::HasRequest() const 380{ 381 return fHasRequest; 382} 383 384 385bool 386BUrl::HasFragment() const 387{ 388 return fHasFragment; 389} 390 391 392// #pragma mark URL encoding/decoding of needed fields 393 394 395void 396BUrl::UrlEncode(bool strict) 397{ 398 fUser = _DoUrlEncodeChunk(fUser, strict); 399 fPassword = _DoUrlEncodeChunk(fPassword, strict); 400 fHost = _DoUrlEncodeChunk(fHost, strict); 401 fFragment = _DoUrlEncodeChunk(fFragment, strict); 402 fPath = _DoUrlEncodeChunk(fPath, strict, true); 403} 404 405 406void 407BUrl::UrlDecode(bool strict) 408{ 409 fUser = _DoUrlDecodeChunk(fUser, strict); 410 fPassword = _DoUrlDecodeChunk(fPassword, strict); 411 fHost = _DoUrlDecodeChunk(fHost, strict); 412 fFragment = _DoUrlDecodeChunk(fFragment, strict); 413 fPath = _DoUrlDecodeChunk(fPath, strict); 414} 415 416 417// #pragma mark Url encoding/decoding of string 418 419 420/*static*/ BString 421BUrl::UrlEncode(const BString& url, bool strict, bool directory) 422{ 423 return _DoUrlEncodeChunk(url, strict, directory); 424} 425 426 427/*static*/ BString 428BUrl::UrlDecode(const BString& url, bool strict) 429{ 430 return _DoUrlDecodeChunk(url, strict); 431} 432 433 434// #pragma mark BArchivable members 435 436 437status_t 438BUrl::Archive(BMessage* into, bool deep) const 439{ 440 status_t ret = BArchivable::Archive(into, deep); 441 442 if (ret == B_OK) 443 ret = into->AddString(kArchivedUrl, UrlString()); 444 445 return ret; 446} 447 448 449/*static*/ BArchivable* 450BUrl::Instantiate(BMessage* archive) 451{ 452 if (validate_instantiation(archive, "BUrl")) 453 return new(std::nothrow) BUrl(archive); 454 return NULL; 455} 456 457 458// #pragma mark URL comparison 459 460 461bool 462BUrl::operator==(BUrl& other) const 463{ 464 UrlString(); 465 other.UrlString(); 466 467 return fUrlString == other.fUrlString; 468} 469 470 471bool 472BUrl::operator!=(BUrl& other) const 473{ 474 return !(*this == other); 475} 476 477 478// #pragma mark URL assignment 479 480 481const BUrl& 482BUrl::operator=(const BUrl& other) 483{ 484 fUrlStringValid = other.fUrlStringValid; 485 if (fUrlStringValid) 486 fUrlString = other.fUrlString; 487 488 fAuthorityValid = other.fAuthorityValid; 489 if (fAuthorityValid) 490 fAuthority = other.fAuthority; 491 492 fUserInfoValid = other.fUserInfoValid; 493 if (fUserInfoValid) 494 fUserInfo = other.fUserInfo; 495 496 fProtocol = other.fProtocol; 497 fUser = other.fUser; 498 fPassword = other.fPassword; 499 fHost = other.fHost; 500 fPort = other.fPort; 501 fPath = other.fPath; 502 fRequest = other.fRequest; 503 fFragment = other.fFragment; 504 505 fHasProtocol = other.fHasProtocol; 506 fHasUserName = other.fHasUserName; 507 fHasPassword = other.fHasPassword; 508 fHasUserInfo = other.fHasUserInfo; 509 fHasHost = other.fHasHost; 510 fHasPort = other.fHasPort; 511 fHasAuthority = other.fHasAuthority; 512 fHasPath = other.fHasPath; 513 fHasRequest = other.fHasRequest; 514 fHasFragment = other.fHasFragment; 515 516 return *this; 517} 518 519 520const BUrl& 521BUrl::operator=(const BString& string) 522{ 523 SetUrlString(string); 524 return *this; 525} 526 527 528const BUrl& 529BUrl::operator=(const char* string) 530{ 531 SetUrlString(string); 532 return *this; 533} 534 535 536// #pragma mark URL to string conversion 537 538 539BUrl::operator const char*() const 540{ 541 return UrlString(); 542} 543 544 545void 546BUrl::_ResetFields() 547{ 548 fHasProtocol = false; 549 fHasUserName = false; 550 fHasPassword = false; 551 fHasUserInfo = false; 552 fHasHost = false; 553 fHasPort = false; 554 fHasAuthority = false; 555 fHasPath = false; 556 fHasRequest = false; 557 fHasFragment = false; 558 559 fProtocol.Truncate(0); 560 fUser.Truncate(0); 561 fPassword.Truncate(0); 562 fHost.Truncate(0); 563 fPort = 0; 564 fPath.Truncate(0); 565 fRequest.Truncate(0); 566 fFragment.Truncate(0); 567 568 // Force re-generation of these fields 569 fUrlStringValid = false; 570 fUserInfoValid = false; 571 fAuthorityValid = false; 572} 573 574 575void 576BUrl::_ExplodeUrlString(const BString& url) 577{ 578 int16 urlIndex = 0; 579 580 _ResetFields(); 581 582 _ExtractProtocol(url, &urlIndex); 583 _ExtractAuthority(url, &urlIndex); 584 _ExtractPath(url, &urlIndex); 585 _ExtractRequestAndFragment(url, &urlIndex); 586} 587 588 589void 590BUrl::_ExtractProtocol(const BString& urlString, int16* origin) 591{ 592 int16 firstColon = urlString.FindFirst(':', *origin); 593 594 // If no colon is found, assume the protocol 595 // is not present 596 if (firstColon == -1) 597 return; 598 else { 599 urlString.CopyInto(fProtocol, *origin, firstColon - *origin); 600 *origin = firstColon + 1; 601 } 602 603 if (!_IsProtocolValid()) { 604 fHasProtocol = false; 605 fProtocol.Truncate(0); 606 } else 607 fHasProtocol = true; 608} 609 610 611void 612BUrl::_ExtractAuthority(const BString& urlString, int16* origin) 613{ 614 // URI doesn't contain an authority field 615 if (urlString.FindFirst("//", *origin) != *origin) 616 return; 617 618 fHasAuthority = true; 619 620// while (urlString.ByteAt(*origin) == '/') 621// (*origin)++; 622 623 (*origin) += 2; 624 625 626 int16 userInfoEnd = urlString.FindFirst('@', *origin); 627 628 // URL contains userinfo field 629 if (userInfoEnd != -1) { 630 BString userInfo; 631 urlString.CopyInto(userInfo, *origin, userInfoEnd - *origin); 632 633 int16 colonDelimiter = userInfo.FindFirst(':', 0); 634 635 if (colonDelimiter == *origin) { 636 fHasPassword = true; 637 fPassword = userInfo; 638 } else if (colonDelimiter != -1) { 639 fHasUserName = true; 640 fHasPassword = true; 641 642 userInfo.CopyInto(fUser, 0, colonDelimiter); 643 userInfo.CopyInto(fPassword, colonDelimiter + 1, 644 userInfo.Length() - colonDelimiter); 645 } else { 646 fHasUserName = true; 647 fUser = userInfo; 648 } 649 650 fHasUserInfo = true; 651 *origin = userInfoEnd + 1; 652 } 653 654 655 // Extract the host part 656 int16 hostEnd = *origin; 657 658 while (hostEnd < urlString.Length() 659 && !_IsAuthorityTerminator(urlString.ByteAt(hostEnd)) 660 && urlString.ByteAt(hostEnd) != ':') { 661 hostEnd++; 662 } 663 664 // The host is likely to be present if an authority is 665 // defined, but in some weird cases, it's not. 666 if (hostEnd != *origin) { 667 urlString.CopyInto(fHost, *origin, hostEnd - *origin); 668 669 *origin = hostEnd; 670 fHasHost = true; 671 } 672 673 // Extract the port part 674 fPort = 0; 675 if (urlString.ByteAt(*origin) == ':') { 676 int16 portEnd = ++(*origin); 677 678 while (portEnd < urlString.Length() 679 && !_IsAuthorityTerminator(urlString.ByteAt(portEnd))) 680 portEnd++; 681 682 BString portString; 683 urlString.CopyInto(portString, *origin, portEnd - *origin); 684 fPort = atoi(portString.String()); 685 686 // Even if the port is invalid, the URL is considered to 687 // have a port. 688 fHasPort = portString.Length() > 0; 689 *origin = portEnd; 690 } 691} 692 693 694void 695BUrl::_ExtractPath(const BString& urlString, int16* origin) 696{ 697 // Extract path from URL 698 if (urlString.ByteAt(*origin) == '/' || !HasAuthority()) { 699 int16 pathEnd = *origin; 700 701 while (pathEnd < urlString.Length() 702 && !_IsPathTerminator(urlString.ByteAt(pathEnd))) { 703 pathEnd++; 704 } 705 706 urlString.CopyInto(fPath, *origin, pathEnd - *origin); 707 708 *origin = pathEnd; 709 fHasPath = true; 710 } 711} 712 713 714void 715BUrl::_ExtractRequestAndFragment(const BString& urlString, int16* origin) 716{ 717 // Extract request field from URL 718 if (urlString.ByteAt(*origin) == '?') { 719 (*origin)++; 720 int16 requestEnd = urlString.FindFirst('#', *origin); 721 722 fHasRequest = true; 723 724 if (requestEnd == -1) { 725 urlString.CopyInto(fRequest, *origin, urlString.Length() - *origin); 726 return; 727 } else { 728 urlString.CopyInto(fRequest, *origin, requestEnd - *origin); 729 *origin = requestEnd; 730 } 731 } 732 733 // Extract fragment field if needed 734 if (urlString.ByteAt(*origin) == '#') { 735 (*origin)++; 736 urlString.CopyInto(fFragment, *origin, urlString.Length() - *origin); 737 738 fHasFragment = true; 739 } 740} 741 742 743/*static*/ BString 744BUrl::_DoUrlEncodeChunk(const BString& chunk, bool strict, bool directory) 745{ 746 BString result; 747 748 for (int32 i = 0; i < chunk.Length(); i++) { 749 if (_IsUnreserved(chunk[i]) 750 || (directory && (chunk[i] == '/' || chunk[i] == '\\'))) 751 result << chunk[i]; 752 else { 753 if (chunk[i] == ' ' && !strict) { 754 result << '+'; 755 // In non-strict mode, spaces are encoded by a plus sign 756 } else { 757 char hexString[5]; 758 snprintf(hexString, 5, "%X", chunk[i]); 759 760 result << '%' << hexString; 761 } 762 } 763 } 764 765 return result; 766} 767 768 769/*static*/ BString 770BUrl::_DoUrlDecodeChunk(const BString& chunk, bool strict) 771{ 772 BString result; 773 774 for (int32 i = 0; i < chunk.Length(); i++) { 775 if (chunk[i] == '+' && !strict) 776 result << ' '; 777 else if (chunk[i] != '%') 778 result << chunk[i]; 779 else { 780 char hexString[] = { chunk[i+1], chunk[i+2], 0 }; 781 result << (char)strtol(hexString, NULL, 16); 782 783 i += 2; 784 } 785 } 786 return result; 787} 788 789 790bool 791BUrl::_IsProtocolValid() 792{ 793 for (int8 index = 0; index < fProtocol.Length(); index++) { 794 char c = fProtocol[index]; 795 796 if (index == 0 && !isalpha(c)) 797 return false; 798 else if (!isalnum(c) && c != '+' && c != '-' && c != '.') 799 return false; 800 } 801 802 return true; 803} 804 805 806bool 807BUrl::_IsAuthorityTerminator(char c) 808{ 809 if (c == '/' || _IsPathTerminator(c)) 810 return true; 811 else 812 return false; 813} 814 815 816bool 817BUrl::_IsPathTerminator(char c) 818{ 819 if (c == '?' || _IsRequestTerminator(c)) 820 return true; 821 else 822 return false; 823} 824 825 826bool 827BUrl::_IsRequestTerminator(char c) 828{ 829 if (c == '#') 830 return true; 831 else 832 return false; 833} 834 835 836bool 837BUrl::_IsUnreserved(char c) 838{ 839 if (isalnum(c) || c == '-' || c == '.' || c == '_' || c == '~') 840 return true; 841 else 842 return false; 843} 844 845 846bool 847BUrl::_IsGenDelim(char c) 848{ 849 if (c == ':' || c == '/' || c == '?' || c == '#' || c == '[' 850 || c == ']' || c == '@') 851 return true; 852 else 853 return false; 854} 855 856 857bool 858BUrl::_IsSubDelim(char c) 859{ 860 if (c == '!' || c == '$' || c == '&' || c == '\'' || c == '(' 861 || c == ')' || c == '*' || c == '+' || c == ',' || c == ';' 862 || c == '=') 863 return true; 864 else 865 return false; 866} 867