1/*
2 * Copyright (C) 1999 Lars Knoll (knoll@kde.org)
3 *           (C) 1999 Antti Koivisto (koivisto@kde.org)
4 *           (C) 2001 Dirk Mueller (mueller@kde.org)
5 *           (C) 2006 Alexey Proskuryakov (ap@webkit.org)
6 * Copyright (C) 2004, 2005, 2006, 2007, 2008 Apple Inc. All rights reserved.
7 *
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Library General Public
10 * License as published by the Free Software Foundation; either
11 * version 2 of the License, or (at your option) any later version.
12 *
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16 * Library General Public License for more details.
17 *
18 * You should have received a copy of the GNU Library General Public License
19 * along with this library; see the file COPYING.LIB.  If not, write to
20 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
21 * Boston, MA 02110-1301, USA.
22 */
23
24#include "config.h"
25#include "KURL.h"
26#include "LinkHash.h"
27#include <wtf/text/AtomicString.h>
28#include <wtf/text/StringHash.h>
29#include <wtf/text/WTFString.h>
30
31namespace WebCore {
32
33template <typename CharacterType>
34static inline size_t findSlashDotDotSlash(const CharacterType* characters, size_t length, size_t position)
35{
36    if (length < 4)
37        return notFound;
38    size_t loopLimit = length - 3;
39    for (size_t i = position; i < loopLimit; ++i) {
40        if (characters[i] == '/' && characters[i + 1] == '.' && characters[i + 2] == '.' && characters[i + 3] == '/')
41            return i;
42    }
43    return notFound;
44}
45
46template <typename CharacterType>
47static inline size_t findSlashSlash(const CharacterType* characters, size_t length, size_t position)
48{
49    if (length < 2)
50        return notFound;
51    size_t loopLimit = length - 1;
52    for (size_t i = position; i < loopLimit; ++i) {
53        if (characters[i] == '/' && characters[i + 1] == '/')
54            return i;
55    }
56    return notFound;
57}
58
59template <typename CharacterType>
60static inline size_t findSlashDotSlash(const CharacterType* characters, size_t length, size_t position)
61{
62    if (length < 3)
63        return notFound;
64    size_t loopLimit = length - 2;
65    for (size_t i = position; i < loopLimit; ++i) {
66        if (characters[i] == '/' && characters[i + 1] == '.' && characters[i + 2] == '/')
67            return i;
68    }
69    return notFound;
70}
71
72template <typename CharacterType>
73static inline bool containsColonSlashSlash(const CharacterType* characters, unsigned length)
74{
75    if (length < 3)
76        return false;
77    unsigned loopLimit = length - 2;
78    for (unsigned i = 0; i < loopLimit; ++i) {
79        if (characters[i] == ':' && characters[i + 1] == '/' && characters[i + 2] == '/')
80            return true;
81    }
82    return false;
83}
84
85template <typename CharacterType>
86static inline void squeezeOutNullCharacters(Vector<CharacterType, 512>& string)
87{
88    size_t size = string.size();
89    size_t i = 0;
90    for (i = 0; i < size; ++i) {
91        if (!string[i])
92            break;
93    }
94    if (i == size)
95        return;
96    size_t j = i;
97    for (++i; i < size; ++i) {
98        if (CharacterType character = string[i])
99            string[j++] = character;
100    }
101    ASSERT(j < size);
102    string.shrink(j);
103}
104
105template <typename CharacterType>
106static void cleanSlashDotDotSlashes(Vector<CharacterType, 512>& path, size_t firstSlash)
107{
108    size_t slash = firstSlash;
109    do {
110        size_t previousSlash = slash ? reverseFind(path.data(), path.size(), '/', slash - 1) : notFound;
111        // Don't remove the host, i.e. http://foo.org/../foo.html
112        if (previousSlash == notFound || (previousSlash > 3 && path[previousSlash - 2] == ':' && path[previousSlash - 1] == '/')) {
113            path[slash] = 0;
114            path[slash + 1] = 0;
115            path[slash + 2] = 0;
116        } else {
117            for (size_t i = previousSlash; i < slash + 3; ++i)
118                path[i] = 0;
119        }
120        slash += 3;
121    } while ((slash = findSlashDotDotSlash(path.data(), path.size(), slash)) != notFound);
122    squeezeOutNullCharacters(path);
123}
124
125template <typename CharacterType>
126static void mergeDoubleSlashes(Vector<CharacterType, 512>& path, size_t firstSlash)
127{
128    size_t refPos = find(path.data(), path.size(), '#');
129    if (!refPos || refPos == notFound)
130        refPos = path.size();
131
132    size_t slash = firstSlash;
133    while (slash < refPos) {
134        if (!slash || path[slash - 1] != ':')
135            path[slash++] = 0;
136        else
137            slash += 2;
138        if ((slash = findSlashSlash(path.data(), path.size(), slash)) == notFound)
139            break;
140    }
141    squeezeOutNullCharacters(path);
142}
143
144template <typename CharacterType>
145static void cleanSlashDotSlashes(Vector<CharacterType, 512>& path, size_t firstSlash)
146{
147    size_t slash = firstSlash;
148    do {
149        path[slash] = 0;
150        path[slash + 1] = 0;
151        slash += 2;
152    } while ((slash = findSlashDotSlash(path.data(), path.size(), slash)) != notFound);
153    squeezeOutNullCharacters(path);
154}
155
156template <typename CharacterType>
157static inline void cleanPath(Vector<CharacterType, 512>& path)
158{
159    // FIXME: Should not do this in the query or anchor part of the URL.
160    size_t firstSlash = findSlashDotDotSlash(path.data(), path.size(), 0);
161    if (firstSlash != notFound)
162        cleanSlashDotDotSlashes(path, firstSlash);
163
164    // FIXME: Should not do this in the query part.
165    firstSlash = findSlashSlash(path.data(), path.size(), 0);
166    if (firstSlash != notFound)
167        mergeDoubleSlashes(path, firstSlash);
168
169    // FIXME: Should not do this in the query or anchor part.
170    firstSlash = findSlashDotSlash(path.data(), path.size(), 0);
171    if (firstSlash != notFound)
172        cleanSlashDotSlashes(path, firstSlash);
173}
174
175template <typename CharacterType>
176static inline bool matchLetter(CharacterType c, char lowercaseLetter)
177{
178    return (c | 0x20) == lowercaseLetter;
179}
180
181template <typename CharacterType>
182static inline bool needsTrailingSlash(const CharacterType* characters, unsigned length)
183{
184    if (length < 6)
185        return false;
186    if (!matchLetter(characters[0], 'h')
187            || !matchLetter(characters[1], 't')
188            || !matchLetter(characters[2], 't')
189            || !matchLetter(characters[3], 'p'))
190        return false;
191    if (!(characters[4] == ':'
192            || (matchLetter(characters[4], 's') && characters[5] == ':')))
193        return false;
194
195    unsigned pos = characters[4] == ':' ? 5 : 6;
196
197    // Skip initial two slashes if present.
198    if (pos + 1 < length && characters[pos] == '/' && characters[pos + 1] == '/')
199        pos += 2;
200
201    // Find next slash.
202    while (pos < length && characters[pos] != '/')
203        ++pos;
204
205    return pos == length;
206}
207
208template <typename CharacterType>
209static ALWAYS_INLINE LinkHash visitedLinkHashInline(const CharacterType* url, unsigned length)
210{
211    return AlreadyHashed::avoidDeletedValue(StringHasher::computeHash(url, length));
212}
213
214LinkHash visitedLinkHash(const String& url)
215{
216    unsigned length = url.length();
217
218    if (length && url.is8Bit())
219        return visitedLinkHashInline(url.characters8(), length);
220    return visitedLinkHashInline(url.characters(), length);
221}
222
223LinkHash visitedLinkHash(const UChar* url, unsigned length)
224{
225    return visitedLinkHashInline(url, length);
226}
227
228template <typename CharacterType>
229static ALWAYS_INLINE void visitedURLInline(const KURL& base, const CharacterType* characters, unsigned length, Vector<CharacterType, 512>& buffer)
230{
231    if (!length)
232        return;
233
234    // This is a poor man's completeURL. Faster with less memory allocation.
235    // FIXME: It's missing a lot of what completeURL does and a lot of what KURL does.
236    // For example, it does not handle international domain names properly.
237
238    // FIXME: It is wrong that we do not do further processing on strings that have "://" in them:
239    //    1) The "://" could be in the query or anchor.
240    //    2) The URL's path could have a "/./" or a "/../" or a "//" sequence in it.
241
242    // FIXME: needsTrailingSlash does not properly return true for a URL that has no path, but does
243    // have a query or anchor.
244
245    bool hasColonSlashSlash = containsColonSlashSlash(characters, length);
246
247    if (hasColonSlashSlash && !needsTrailingSlash(characters, length)) {
248        buffer.append(characters, length);
249        return;
250    }
251
252
253    if (hasColonSlashSlash) {
254        // FIXME: This is incorrect for URLs that have a query or anchor; the "/" needs to go at the
255        // end of the path, *before* the query or anchor.
256        buffer.append(characters, length);
257        buffer.append('/');
258        return;
259    }
260
261    if (!length)
262        buffer.append(base.string().getCharactersWithUpconvert<CharacterType>(), base.string().length());
263    else {
264        switch (characters[0]) {
265            case '/':
266                buffer.append(base.string().getCharactersWithUpconvert<CharacterType>(), base.pathStart());
267                break;
268            case '#':
269                buffer.append(base.string().getCharactersWithUpconvert<CharacterType>(), base.pathEnd());
270                break;
271            default:
272                buffer.append(base.string().getCharactersWithUpconvert<CharacterType>(), base.pathAfterLastSlash());
273                break;
274        }
275    }
276    buffer.append(characters, length);
277    cleanPath(buffer);
278    if (needsTrailingSlash(buffer.data(), buffer.size())) {
279        // FIXME: This is incorrect for URLs that have a query or anchor; the "/" needs to go at the
280        // end of the path, *before* the query or anchor.
281        buffer.append('/');
282    }
283
284    return;
285}
286
287void visitedURL(const KURL& base, const AtomicString& attributeURL, Vector<UChar, 512>& buffer)
288{
289    return visitedURLInline(base, attributeURL.characters(), attributeURL.length(), buffer);
290}
291
292LinkHash visitedLinkHash(const KURL& base, const AtomicString& attributeURL)
293{
294    if (attributeURL.isEmpty())
295        return 0;
296
297    if (!base.string().isEmpty() && base.string().is8Bit() && attributeURL.is8Bit()) {
298        Vector<LChar, 512> url;
299        visitedURLInline(base, attributeURL.characters8(), attributeURL.length(), url);
300        if (url.isEmpty())
301            return 0;
302
303        return visitedLinkHashInline(url.data(), url.size());
304    }
305
306    Vector<UChar, 512> url;
307    visitedURLInline(base, attributeURL.characters(), attributeURL.length(), url);
308    if (url.isEmpty())
309        return 0;
310
311    return visitedLinkHashInline(url.data(), url.size());
312}
313
314}  // namespace WebCore
315