1/* 2 * Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2011, 2012, 2013 Apple Inc. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 1. Redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer. 9 * 2. Redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution. 12 * 13 * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY 14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR 17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 */ 25 26#ifndef KURL_h 27#define KURL_h 28 29#include <wtf/Forward.h> 30#include <wtf/HashMap.h> 31#include <wtf/RetainPtr.h> 32#include <wtf/text/WTFString.h> 33 34#if USE(CF) 35typedef const struct __CFURL* CFURLRef; 36#endif 37 38#if PLATFORM(MAC) 39OBJC_CLASS NSURL; 40#endif 41 42#if PLATFORM(QT) 43QT_BEGIN_NAMESPACE 44class QUrl; 45QT_END_NAMESPACE 46#endif 47 48namespace WebCore { 49 50class TextEncoding; 51struct KURLHash; 52 53enum ParsedURLStringTag { ParsedURLString }; 54 55class KURL { 56public: 57 // Generates a URL which contains a null string. 58 KURL() { invalidate(); } 59 60 // The argument is an absolute URL string. The string is assumed to be output of KURL::string() called on a valid 61 // KURL object, or indiscernible from such. 62 // It is usually best to avoid repeatedly parsing a string, unless memory saving outweigh the possible slow-downs. 63 KURL(ParsedURLStringTag, const String&); 64 explicit KURL(WTF::HashTableDeletedValueType) : m_string(WTF::HashTableDeletedValue) { } 65 bool isHashTableDeletedValue() const { return string().isHashTableDeletedValue(); } 66 67 // Resolves the relative URL with the given base URL. If provided, the 68 // TextEncoding is used to encode non-ASCII characers. The base URL can be 69 // null or empty, in which case the relative URL will be interpreted as 70 // absolute. 71 // FIXME: If the base URL is invalid, this always creates an invalid 72 // URL. Instead I think it would be better to treat all invalid base URLs 73 // the same way we treate null and empty base URLs. 74 KURL(const KURL& base, const String& relative); 75 KURL(const KURL& base, const String& relative, const TextEncoding&); 76 77 String strippedForUseAsReferrer() const; 78 79 // FIXME: The above functions should be harmonized so that passing a 80 // base of null or the empty string gives the same result as the 81 // standard String constructor. 82 83 // Makes a deep copy. Helpful only if you need to use a KURL on another 84 // thread. Since the underlying StringImpl objects are immutable, there's 85 // no other reason to ever prefer copy() over plain old assignment. 86 KURL copy() const; 87 88 bool isNull() const; 89 bool isEmpty() const; 90 bool isValid() const; 91 92 // Returns true if this URL has a path. Note that "http://foo.com/" has a 93 // path of "/", so this function will return true. Only invalid or 94 // non-hierarchical (like "javascript:") URLs will have no path. 95 bool hasPath() const; 96 97 // Returns true if you can set the host and port for the URL. 98 // Non-hierarchical URLs don't have a host and port. 99 bool canSetHostOrPort() const { return isHierarchical(); } 100 101 bool canSetPathname() const { return isHierarchical(); } 102 bool isHierarchical() const; 103 104 const String& string() const { return m_string; } 105 106 String stringCenterEllipsizedToLength(unsigned length = 1024) const; 107 108 String protocol() const; 109 String host() const; 110 unsigned short port() const; 111 bool hasPort() const; 112 String user() const; 113 String pass() const; 114 String path() const; 115 String lastPathComponent() const; 116 String query() const; 117 String fragmentIdentifier() const; 118 bool hasFragmentIdentifier() const; 119 120 String baseAsString() const; 121 122 String fileSystemPath() const; 123 124 // Returns true if the current URL's protocol is the same as the null- 125 // terminated ASCII argument. The argument must be lower-case. 126 bool protocolIs(const char*) const; 127 bool protocolIsData() const { return protocolIs("data"); } 128 bool protocolIsInHTTPFamily() const; 129 bool isLocalFile() const; 130 bool isBlankURL() const; 131 132 bool setProtocol(const String&); 133 void setHost(const String&); 134 135 void removePort(); 136 void setPort(unsigned short); 137 138 // Input is like "foo.com" or "foo.com:8000". 139 void setHostAndPort(const String&); 140 141 void setUser(const String&); 142 void setPass(const String&); 143 144 // If you pass an empty path for HTTP or HTTPS URLs, the resulting path 145 // will be "/". 146 void setPath(const String&); 147 148 // The query may begin with a question mark, or, if not, one will be added 149 // for you. Setting the query to the empty string will leave a "?" in the 150 // URL (with nothing after it). To clear the query, pass a null string. 151 void setQuery(const String&); 152 153 void setFragmentIdentifier(const String&); 154 void removeFragmentIdentifier(); 155 156 friend bool equalIgnoringFragmentIdentifier(const KURL&, const KURL&); 157 158 friend bool protocolHostAndPortAreEqual(const KURL&, const KURL&); 159 160 unsigned hostStart() const; 161 unsigned hostEnd() const; 162 163 unsigned pathStart() const; 164 unsigned pathEnd() const; 165 unsigned pathAfterLastSlash() const; 166 167 operator const String&() const { return string(); } 168 169#if USE(CF) 170 KURL(CFURLRef); 171 RetainPtr<CFURLRef> createCFURL() const; 172#endif 173 174#if PLATFORM(MAC) 175 KURL(NSURL*); 176 operator NSURL*() const; 177#endif 178#ifdef __OBJC__ 179 operator NSString*() const { return string(); } 180#endif 181 182#if PLATFORM(QT) 183 KURL(const QUrl&); 184 operator QUrl() const; 185#endif 186 187 const KURL* innerURL() const { return 0; } 188 189#ifndef NDEBUG 190 void print() const; 191#endif 192 193 bool isSafeToSendToAnotherThread() const; 194 195private: 196 void invalidate(); 197 static bool protocolIs(const String&, const char*); 198 void init(const KURL&, const String&, const TextEncoding&); 199 void copyToBuffer(Vector<char, 512>& buffer) const; 200 201 // Parses the given URL. The originalString parameter allows for an 202 // optimization: When the source is the same as the fixed-up string, 203 // it will use the passed-in string instead of allocating a new one. 204 void parse(const String&); 205 void parse(const char* url, const String* originalString = 0); 206 207 String m_string; 208 bool m_isValid : 1; 209 bool m_protocolIsInHTTPFamily : 1; 210 211 int m_schemeEnd; 212 int m_userStart; 213 int m_userEnd; 214 int m_passwordEnd; 215 int m_hostEnd; 216 int m_portEnd; 217 int m_pathAfterLastSlash; 218 int m_pathEnd; 219 int m_queryEnd; 220 int m_fragmentEnd; 221}; 222 223bool operator==(const KURL&, const KURL&); 224bool operator==(const KURL&, const String&); 225bool operator==(const String&, const KURL&); 226bool operator!=(const KURL&, const KURL&); 227bool operator!=(const KURL&, const String&); 228bool operator!=(const String&, const KURL&); 229 230bool equalIgnoringFragmentIdentifier(const KURL&, const KURL&); 231bool protocolHostAndPortAreEqual(const KURL&, const KURL&); 232 233const KURL& blankURL(); 234 235// Functions to do URL operations on strings. 236// These are operations that aren't faster on a parsed URL. 237// These are also different from the KURL functions in that they don't require the string to be a valid and parsable URL. 238// This is especially important because valid javascript URLs are not necessarily considered valid by KURL. 239 240bool protocolIs(const String& url, const char* protocol); 241bool protocolIsJavaScript(const String& url); 242 243bool isDefaultPortForProtocol(unsigned short port, const String& protocol); 244bool portAllowed(const KURL&); // Blacklist ports that should never be used for Web resources. 245 246bool isValidProtocol(const String&); 247 248String mimeTypeFromDataURL(const String& url); 249String mimeTypeFromURL(const KURL&); 250 251// Unescapes the given string using URL escaping rules, given an optional 252// encoding (defaulting to UTF-8 otherwise). DANGER: If the URL has "%00" 253// in it, the resulting string will have embedded null characters! 254String decodeURLEscapeSequences(const String&); 255String decodeURLEscapeSequences(const String&, const TextEncoding&); 256 257String encodeWithURLEscapeSequences(const String&); 258 259// Inlines. 260 261inline bool operator==(const KURL& a, const KURL& b) 262{ 263 return a.string() == b.string(); 264} 265 266inline bool operator==(const KURL& a, const String& b) 267{ 268 return a.string() == b; 269} 270 271inline bool operator==(const String& a, const KURL& b) 272{ 273 return a == b.string(); 274} 275 276inline bool operator!=(const KURL& a, const KURL& b) 277{ 278 return a.string() != b.string(); 279} 280 281inline bool operator!=(const KURL& a, const String& b) 282{ 283 return a.string() != b; 284} 285 286inline bool operator!=(const String& a, const KURL& b) 287{ 288 return a != b.string(); 289} 290 291// Inline versions of some non-GoogleURL functions so we can get inlining 292// without having to have a lot of ugly ifdefs in the class definition. 293 294inline bool KURL::isNull() const 295{ 296 return m_string.isNull(); 297} 298 299inline bool KURL::isEmpty() const 300{ 301 return m_string.isEmpty(); 302} 303 304inline bool KURL::isValid() const 305{ 306 return m_isValid; 307} 308 309inline bool KURL::hasPath() const 310{ 311 return m_pathEnd != m_portEnd; 312} 313 314inline bool KURL::hasPort() const 315{ 316 return m_hostEnd < m_portEnd; 317} 318 319inline bool KURL::protocolIsInHTTPFamily() const 320{ 321 return m_protocolIsInHTTPFamily; 322} 323 324inline unsigned KURL::hostStart() const 325{ 326 return (m_passwordEnd == m_userStart) ? m_passwordEnd : m_passwordEnd + 1; 327} 328 329inline unsigned KURL::hostEnd() const 330{ 331 return m_hostEnd; 332} 333 334inline unsigned KURL::pathStart() const 335{ 336 return m_portEnd; 337} 338 339inline unsigned KURL::pathEnd() const 340{ 341 return m_pathEnd; 342} 343 344inline unsigned KURL::pathAfterLastSlash() const 345{ 346 return m_pathAfterLastSlash; 347} 348 349} // namespace WebCore 350 351namespace WTF { 352 353 // KURLHash is the default hash for String 354 template<typename T> struct DefaultHash; 355 template<> struct DefaultHash<WebCore::KURL> { 356 typedef WebCore::KURLHash Hash; 357 }; 358 359} // namespace WTF 360 361#endif // KURL_h 362