1/*
2 * Copyright (C) 2009, 2010 Apple Inc. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 *    notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 *    notice, this list of conditions and the following disclaimer in the
11 *    documentation and/or other materials provided with the distribution.
12 *
13 * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE COMPUTER, INC. OR
17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 */
25
26#include "config.h"
27#include "UserContentURLPattern.h"
28
29#include "KURL.h"
30#include <wtf/StdLibExtras.h>
31
32namespace WebCore {
33
34bool UserContentURLPattern::matchesPatterns(const KURL& url, const Vector<String>& whitelist, const Vector<String>& blacklist)
35{
36    // In order for a URL to be a match it has to be present in the whitelist and not present in the blacklist.
37    // If there is no whitelist at all, then all URLs are assumed to be in the whitelist.
38    bool matchesWhitelist = whitelist.isEmpty();
39    if (!matchesWhitelist) {
40        size_t whitelistSize = whitelist.size();
41        for (size_t i = 0; i < whitelistSize; ++i) {
42            UserContentURLPattern contentPattern(whitelist[i]);
43            if (contentPattern.matches(url)) {
44                matchesWhitelist = true;
45                break;
46            }
47        }
48    }
49
50    bool matchesBlacklist = false;
51    if (!blacklist.isEmpty()) {
52        size_t blacklistSize = blacklist.size();
53        for (size_t i = 0; i < blacklistSize; ++i) {
54            UserContentURLPattern contentPattern(blacklist[i]);
55            if (contentPattern.matches(url)) {
56                matchesBlacklist = true;
57                break;
58            }
59        }
60    }
61
62    return matchesWhitelist && !matchesBlacklist;
63}
64
65bool UserContentURLPattern::parse(const String& pattern)
66{
67    DEFINE_STATIC_LOCAL(const String, schemeSeparator, (ASCIILiteral("://")));
68
69    size_t schemeEndPos = pattern.find(schemeSeparator);
70    if (schemeEndPos == notFound)
71        return false;
72
73    m_scheme = pattern.left(schemeEndPos);
74
75    unsigned hostStartPos = schemeEndPos + schemeSeparator.length();
76    if (hostStartPos >= pattern.length())
77        return false;
78
79    int pathStartPos = 0;
80
81    if (equalIgnoringCase(m_scheme, "file"))
82        pathStartPos = hostStartPos;
83    else {
84        size_t hostEndPos = pattern.find("/", hostStartPos);
85        if (hostEndPos == notFound)
86            return false;
87
88        m_host = pattern.substring(hostStartPos, hostEndPos - hostStartPos);
89        m_matchSubdomains = false;
90
91        if (m_host == "*") {
92            // The pattern can be just '*', which means match all domains.
93            m_host = "";
94            m_matchSubdomains = true;
95        } else if (m_host.startsWith("*.")) {
96            // The first component can be '*', which means to match all subdomains.
97            m_host = m_host.substring(2); // Length of "*."
98            m_matchSubdomains = true;
99        }
100
101        // No other '*' can occur in the host.
102        if (m_host.find("*") != notFound)
103            return false;
104
105        pathStartPos = hostEndPos;
106    }
107
108    m_path = pattern.right(pattern.length() - pathStartPos);
109
110    return true;
111}
112
113bool UserContentURLPattern::matches(const KURL& test) const
114{
115    if (m_invalid)
116        return false;
117
118    if (!equalIgnoringCase(test.protocol(), m_scheme))
119        return false;
120
121    if (!equalIgnoringCase(m_scheme, "file") && !matchesHost(test))
122        return false;
123
124    return matchesPath(test);
125}
126
127bool UserContentURLPattern::matchesHost(const KURL& test) const
128{
129    const String& host = test.host();
130    if (equalIgnoringCase(host, m_host))
131        return true;
132
133    if (!m_matchSubdomains)
134        return false;
135
136    // If we're matching subdomains, and we have no host, that means the pattern
137    // was <scheme>://*/<whatever>, so we match anything.
138    if (!m_host.length())
139        return true;
140
141    // Check if the domain is a subdomain of our host.
142    if (!host.endsWith(m_host, false))
143        return false;
144
145    ASSERT(host.length() > m_host.length());
146
147    // Check that the character before the suffix is a period.
148    return host[host.length() - m_host.length() - 1] == '.';
149}
150
151struct MatchTester
152{
153    const String m_pattern;
154    unsigned m_patternIndex;
155
156    const String m_test;
157    unsigned m_testIndex;
158
159    MatchTester(const String& pattern, const String& test)
160    : m_pattern(pattern)
161    , m_patternIndex(0)
162    , m_test(test)
163    , m_testIndex(0)
164    {
165    }
166
167    bool testStringFinished() const { return m_testIndex >= m_test.length(); }
168    bool patternStringFinished() const { return m_patternIndex >= m_pattern.length(); }
169
170    void eatWildcard()
171    {
172        while (!patternStringFinished()) {
173            if (m_pattern[m_patternIndex] != '*')
174                return;
175            m_patternIndex++;
176        }
177    }
178
179    void eatSameChars()
180    {
181        while (!patternStringFinished() && !testStringFinished()) {
182            if (m_pattern[m_patternIndex] == '*')
183                return;
184            if (m_pattern[m_patternIndex] != m_test[m_testIndex])
185                return;
186            m_patternIndex++;
187            m_testIndex++;
188        }
189    }
190
191    bool test()
192    {
193        // Eat all the matching chars.
194        eatSameChars();
195
196        // If the string is finished, then the pattern must be empty too, or contains
197        // only wildcards.
198        if (testStringFinished()) {
199            eatWildcard();
200            if (patternStringFinished())
201                return true;
202            return false;
203        }
204
205        // Pattern is empty but not string, this is not a match.
206        if (patternStringFinished())
207            return false;
208
209        // If we don't encounter a *, then we're hosed.
210        if (m_pattern[m_patternIndex] != '*')
211            return false;
212
213        while (!testStringFinished()) {
214            MatchTester nextMatch(*this);
215            nextMatch.m_patternIndex++;
216            if (nextMatch.test())
217                return true;
218            m_testIndex++;
219        }
220
221        // We reached the end of the string.  Let's see if the pattern contains only
222        // wildcards.
223        eatWildcard();
224        return patternStringFinished();
225    }
226};
227
228bool UserContentURLPattern::matchesPath(const KURL& test) const
229{
230    MatchTester match(m_path, test.path());
231    return match.test();
232}
233
234} // namespace WebCore
235