1/* 2 * Copyright (C) 1999 Lars Knoll (knoll@kde.org) 3 * (C) 1999 Antti Koivisto (koivisto@kde.org) 4 * (C) 2001 Dirk Mueller (mueller@kde.org) 5 * (C) 2006 Alexey Proskuryakov (ap@webkit.org) 6 * Copyright (C) 2004, 2005, 2006, 2007, 2008 Apple Inc. All rights reserved. 7 * 8 * This library is free software; you can redistribute it and/or 9 * modify it under the terms of the GNU Library General Public 10 * License as published by the Free Software Foundation; either 11 * version 2 of the License, or (at your option) any later version. 12 * 13 * This library is distributed in the hope that it will be useful, 14 * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 16 * Library General Public License for more details. 17 * 18 * You should have received a copy of the GNU Library General Public License 19 * along with this library; see the file COPYING.LIB. If not, write to 20 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, 21 * Boston, MA 02110-1301, USA. 22 */ 23 24#include "config.h" 25#include "KURL.h" 26#include "LinkHash.h" 27#include <wtf/text/AtomicString.h> 28#include <wtf/text/StringHash.h> 29#include <wtf/text/WTFString.h> 30 31namespace WebCore { 32 33template <typename CharacterType> 34static inline size_t findSlashDotDotSlash(const CharacterType* characters, size_t length, size_t position) 35{ 36 if (length < 4) 37 return notFound; 38 size_t loopLimit = length - 3; 39 for (size_t i = position; i < loopLimit; ++i) { 40 if (characters[i] == '/' && characters[i + 1] == '.' && characters[i + 2] == '.' && characters[i + 3] == '/') 41 return i; 42 } 43 return notFound; 44} 45 46template <typename CharacterType> 47static inline size_t findSlashSlash(const CharacterType* characters, size_t length, size_t position) 48{ 49 if (length < 2) 50 return notFound; 51 size_t loopLimit = length - 1; 52 for (size_t i = position; i < loopLimit; ++i) { 53 if (characters[i] == '/' && characters[i + 1] == '/') 54 return i; 55 } 56 return notFound; 57} 58 59template <typename CharacterType> 60static inline size_t findSlashDotSlash(const CharacterType* characters, size_t length, size_t position) 61{ 62 if (length < 3) 63 return notFound; 64 size_t loopLimit = length - 2; 65 for (size_t i = position; i < loopLimit; ++i) { 66 if (characters[i] == '/' && characters[i + 1] == '.' && characters[i + 2] == '/') 67 return i; 68 } 69 return notFound; 70} 71 72template <typename CharacterType> 73static inline bool containsColonSlashSlash(const CharacterType* characters, unsigned length) 74{ 75 if (length < 3) 76 return false; 77 unsigned loopLimit = length - 2; 78 for (unsigned i = 0; i < loopLimit; ++i) { 79 if (characters[i] == ':' && characters[i + 1] == '/' && characters[i + 2] == '/') 80 return true; 81 } 82 return false; 83} 84 85template <typename CharacterType> 86static inline void squeezeOutNullCharacters(Vector<CharacterType, 512>& string) 87{ 88 size_t size = string.size(); 89 size_t i = 0; 90 for (i = 0; i < size; ++i) { 91 if (!string[i]) 92 break; 93 } 94 if (i == size) 95 return; 96 size_t j = i; 97 for (++i; i < size; ++i) { 98 if (CharacterType character = string[i]) 99 string[j++] = character; 100 } 101 ASSERT(j < size); 102 string.shrink(j); 103} 104 105template <typename CharacterType> 106static void cleanSlashDotDotSlashes(Vector<CharacterType, 512>& path, size_t firstSlash) 107{ 108 size_t slash = firstSlash; 109 do { 110 size_t previousSlash = slash ? reverseFind(path.data(), path.size(), '/', slash - 1) : notFound; 111 // Don't remove the host, i.e. http://foo.org/../foo.html 112 if (previousSlash == notFound || (previousSlash > 3 && path[previousSlash - 2] == ':' && path[previousSlash - 1] == '/')) { 113 path[slash] = 0; 114 path[slash + 1] = 0; 115 path[slash + 2] = 0; 116 } else { 117 for (size_t i = previousSlash; i < slash + 3; ++i) 118 path[i] = 0; 119 } 120 slash += 3; 121 } while ((slash = findSlashDotDotSlash(path.data(), path.size(), slash)) != notFound); 122 squeezeOutNullCharacters(path); 123} 124 125template <typename CharacterType> 126static void mergeDoubleSlashes(Vector<CharacterType, 512>& path, size_t firstSlash) 127{ 128 size_t refPos = find(path.data(), path.size(), '#'); 129 if (!refPos || refPos == notFound) 130 refPos = path.size(); 131 132 size_t slash = firstSlash; 133 while (slash < refPos) { 134 if (!slash || path[slash - 1] != ':') 135 path[slash++] = 0; 136 else 137 slash += 2; 138 if ((slash = findSlashSlash(path.data(), path.size(), slash)) == notFound) 139 break; 140 } 141 squeezeOutNullCharacters(path); 142} 143 144template <typename CharacterType> 145static void cleanSlashDotSlashes(Vector<CharacterType, 512>& path, size_t firstSlash) 146{ 147 size_t slash = firstSlash; 148 do { 149 path[slash] = 0; 150 path[slash + 1] = 0; 151 slash += 2; 152 } while ((slash = findSlashDotSlash(path.data(), path.size(), slash)) != notFound); 153 squeezeOutNullCharacters(path); 154} 155 156template <typename CharacterType> 157static inline void cleanPath(Vector<CharacterType, 512>& path) 158{ 159 // FIXME: Should not do this in the query or anchor part of the URL. 160 size_t firstSlash = findSlashDotDotSlash(path.data(), path.size(), 0); 161 if (firstSlash != notFound) 162 cleanSlashDotDotSlashes(path, firstSlash); 163 164 // FIXME: Should not do this in the query part. 165 firstSlash = findSlashSlash(path.data(), path.size(), 0); 166 if (firstSlash != notFound) 167 mergeDoubleSlashes(path, firstSlash); 168 169 // FIXME: Should not do this in the query or anchor part. 170 firstSlash = findSlashDotSlash(path.data(), path.size(), 0); 171 if (firstSlash != notFound) 172 cleanSlashDotSlashes(path, firstSlash); 173} 174 175template <typename CharacterType> 176static inline bool matchLetter(CharacterType c, char lowercaseLetter) 177{ 178 return (c | 0x20) == lowercaseLetter; 179} 180 181template <typename CharacterType> 182static inline bool needsTrailingSlash(const CharacterType* characters, unsigned length) 183{ 184 if (length < 6) 185 return false; 186 if (!matchLetter(characters[0], 'h') 187 || !matchLetter(characters[1], 't') 188 || !matchLetter(characters[2], 't') 189 || !matchLetter(characters[3], 'p')) 190 return false; 191 if (!(characters[4] == ':' 192 || (matchLetter(characters[4], 's') && characters[5] == ':'))) 193 return false; 194 195 unsigned pos = characters[4] == ':' ? 5 : 6; 196 197 // Skip initial two slashes if present. 198 if (pos + 1 < length && characters[pos] == '/' && characters[pos + 1] == '/') 199 pos += 2; 200 201 // Find next slash. 202 while (pos < length && characters[pos] != '/') 203 ++pos; 204 205 return pos == length; 206} 207 208template <typename CharacterType> 209static ALWAYS_INLINE LinkHash visitedLinkHashInline(const CharacterType* url, unsigned length) 210{ 211 return AlreadyHashed::avoidDeletedValue(StringHasher::computeHash(url, length)); 212} 213 214LinkHash visitedLinkHash(const String& url) 215{ 216 unsigned length = url.length(); 217 218 if (length && url.is8Bit()) 219 return visitedLinkHashInline(url.characters8(), length); 220 return visitedLinkHashInline(url.characters(), length); 221} 222 223LinkHash visitedLinkHash(const UChar* url, unsigned length) 224{ 225 return visitedLinkHashInline(url, length); 226} 227 228template <typename CharacterType> 229static ALWAYS_INLINE void visitedURLInline(const KURL& base, const CharacterType* characters, unsigned length, Vector<CharacterType, 512>& buffer) 230{ 231 if (!length) 232 return; 233 234 // This is a poor man's completeURL. Faster with less memory allocation. 235 // FIXME: It's missing a lot of what completeURL does and a lot of what KURL does. 236 // For example, it does not handle international domain names properly. 237 238 // FIXME: It is wrong that we do not do further processing on strings that have "://" in them: 239 // 1) The "://" could be in the query or anchor. 240 // 2) The URL's path could have a "/./" or a "/../" or a "//" sequence in it. 241 242 // FIXME: needsTrailingSlash does not properly return true for a URL that has no path, but does 243 // have a query or anchor. 244 245 bool hasColonSlashSlash = containsColonSlashSlash(characters, length); 246 247 if (hasColonSlashSlash && !needsTrailingSlash(characters, length)) { 248 buffer.append(characters, length); 249 return; 250 } 251 252 253 if (hasColonSlashSlash) { 254 // FIXME: This is incorrect for URLs that have a query or anchor; the "/" needs to go at the 255 // end of the path, *before* the query or anchor. 256 buffer.append(characters, length); 257 buffer.append('/'); 258 return; 259 } 260 261 if (!length) 262 buffer.append(base.string().getCharactersWithUpconvert<CharacterType>(), base.string().length()); 263 else { 264 switch (characters[0]) { 265 case '/': 266 buffer.append(base.string().getCharactersWithUpconvert<CharacterType>(), base.pathStart()); 267 break; 268 case '#': 269 buffer.append(base.string().getCharactersWithUpconvert<CharacterType>(), base.pathEnd()); 270 break; 271 default: 272 buffer.append(base.string().getCharactersWithUpconvert<CharacterType>(), base.pathAfterLastSlash()); 273 break; 274 } 275 } 276 buffer.append(characters, length); 277 cleanPath(buffer); 278 if (needsTrailingSlash(buffer.data(), buffer.size())) { 279 // FIXME: This is incorrect for URLs that have a query or anchor; the "/" needs to go at the 280 // end of the path, *before* the query or anchor. 281 buffer.append('/'); 282 } 283 284 return; 285} 286 287void visitedURL(const KURL& base, const AtomicString& attributeURL, Vector<UChar, 512>& buffer) 288{ 289 return visitedURLInline(base, attributeURL.characters(), attributeURL.length(), buffer); 290} 291 292LinkHash visitedLinkHash(const KURL& base, const AtomicString& attributeURL) 293{ 294 if (attributeURL.isEmpty()) 295 return 0; 296 297 if (!base.string().isEmpty() && base.string().is8Bit() && attributeURL.is8Bit()) { 298 Vector<LChar, 512> url; 299 visitedURLInline(base, attributeURL.characters8(), attributeURL.length(), url); 300 if (url.isEmpty()) 301 return 0; 302 303 return visitedLinkHashInline(url.data(), url.size()); 304 } 305 306 Vector<UChar, 512> url; 307 visitedURLInline(base, attributeURL.characters(), attributeURL.length(), url); 308 if (url.isEmpty()) 309 return 0; 310 311 return visitedLinkHashInline(url.data(), url.size()); 312} 313 314} // namespace WebCore 315