1/* 2 * Copyright (C) 2009, 2010 Apple Inc. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 1. Redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer. 9 * 2. Redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution. 12 * 13 * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY 14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR 17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 */ 25 26#include "config.h" 27#include "UserContentURLPattern.h" 28 29#include "KURL.h" 30#include <wtf/StdLibExtras.h> 31 32namespace WebCore { 33 34bool UserContentURLPattern::matchesPatterns(const KURL& url, const Vector<String>& whitelist, const Vector<String>& blacklist) 35{ 36 // In order for a URL to be a match it has to be present in the whitelist and not present in the blacklist. 37 // If there is no whitelist at all, then all URLs are assumed to be in the whitelist. 38 bool matchesWhitelist = whitelist.isEmpty(); 39 if (!matchesWhitelist) { 40 size_t whitelistSize = whitelist.size(); 41 for (size_t i = 0; i < whitelistSize; ++i) { 42 UserContentURLPattern contentPattern(whitelist[i]); 43 if (contentPattern.matches(url)) { 44 matchesWhitelist = true; 45 break; 46 } 47 } 48 } 49 50 bool matchesBlacklist = false; 51 if (!blacklist.isEmpty()) { 52 size_t blacklistSize = blacklist.size(); 53 for (size_t i = 0; i < blacklistSize; ++i) { 54 UserContentURLPattern contentPattern(blacklist[i]); 55 if (contentPattern.matches(url)) { 56 matchesBlacklist = true; 57 break; 58 } 59 } 60 } 61 62 return matchesWhitelist && !matchesBlacklist; 63} 64 65bool UserContentURLPattern::parse(const String& pattern) 66{ 67 DEFINE_STATIC_LOCAL(const String, schemeSeparator, (ASCIILiteral("://"))); 68 69 size_t schemeEndPos = pattern.find(schemeSeparator); 70 if (schemeEndPos == notFound) 71 return false; 72 73 m_scheme = pattern.left(schemeEndPos); 74 75 unsigned hostStartPos = schemeEndPos + schemeSeparator.length(); 76 if (hostStartPos >= pattern.length()) 77 return false; 78 79 int pathStartPos = 0; 80 81 if (equalIgnoringCase(m_scheme, "file")) 82 pathStartPos = hostStartPos; 83 else { 84 size_t hostEndPos = pattern.find("/", hostStartPos); 85 if (hostEndPos == notFound) 86 return false; 87 88 m_host = pattern.substring(hostStartPos, hostEndPos - hostStartPos); 89 m_matchSubdomains = false; 90 91 if (m_host == "*") { 92 // The pattern can be just '*', which means match all domains. 93 m_host = ""; 94 m_matchSubdomains = true; 95 } else if (m_host.startsWith("*.")) { 96 // The first component can be '*', which means to match all subdomains. 97 m_host = m_host.substring(2); // Length of "*." 98 m_matchSubdomains = true; 99 } 100 101 // No other '*' can occur in the host. 102 if (m_host.find("*") != notFound) 103 return false; 104 105 pathStartPos = hostEndPos; 106 } 107 108 m_path = pattern.right(pattern.length() - pathStartPos); 109 110 return true; 111} 112 113bool UserContentURLPattern::matches(const KURL& test) const 114{ 115 if (m_invalid) 116 return false; 117 118 if (!equalIgnoringCase(test.protocol(), m_scheme)) 119 return false; 120 121 if (!equalIgnoringCase(m_scheme, "file") && !matchesHost(test)) 122 return false; 123 124 return matchesPath(test); 125} 126 127bool UserContentURLPattern::matchesHost(const KURL& test) const 128{ 129 const String& host = test.host(); 130 if (equalIgnoringCase(host, m_host)) 131 return true; 132 133 if (!m_matchSubdomains) 134 return false; 135 136 // If we're matching subdomains, and we have no host, that means the pattern 137 // was <scheme>://*/<whatever>, so we match anything. 138 if (!m_host.length()) 139 return true; 140 141 // Check if the domain is a subdomain of our host. 142 if (!host.endsWith(m_host, false)) 143 return false; 144 145 ASSERT(host.length() > m_host.length()); 146 147 // Check that the character before the suffix is a period. 148 return host[host.length() - m_host.length() - 1] == '.'; 149} 150 151struct MatchTester 152{ 153 const String m_pattern; 154 unsigned m_patternIndex; 155 156 const String m_test; 157 unsigned m_testIndex; 158 159 MatchTester(const String& pattern, const String& test) 160 : m_pattern(pattern) 161 , m_patternIndex(0) 162 , m_test(test) 163 , m_testIndex(0) 164 { 165 } 166 167 bool testStringFinished() const { return m_testIndex >= m_test.length(); } 168 bool patternStringFinished() const { return m_patternIndex >= m_pattern.length(); } 169 170 void eatWildcard() 171 { 172 while (!patternStringFinished()) { 173 if (m_pattern[m_patternIndex] != '*') 174 return; 175 m_patternIndex++; 176 } 177 } 178 179 void eatSameChars() 180 { 181 while (!patternStringFinished() && !testStringFinished()) { 182 if (m_pattern[m_patternIndex] == '*') 183 return; 184 if (m_pattern[m_patternIndex] != m_test[m_testIndex]) 185 return; 186 m_patternIndex++; 187 m_testIndex++; 188 } 189 } 190 191 bool test() 192 { 193 // Eat all the matching chars. 194 eatSameChars(); 195 196 // If the string is finished, then the pattern must be empty too, or contains 197 // only wildcards. 198 if (testStringFinished()) { 199 eatWildcard(); 200 if (patternStringFinished()) 201 return true; 202 return false; 203 } 204 205 // Pattern is empty but not string, this is not a match. 206 if (patternStringFinished()) 207 return false; 208 209 // If we don't encounter a *, then we're hosed. 210 if (m_pattern[m_patternIndex] != '*') 211 return false; 212 213 while (!testStringFinished()) { 214 MatchTester nextMatch(*this); 215 nextMatch.m_patternIndex++; 216 if (nextMatch.test()) 217 return true; 218 m_testIndex++; 219 } 220 221 // We reached the end of the string. Let's see if the pattern contains only 222 // wildcards. 223 eatWildcard(); 224 return patternStringFinished(); 225 } 226}; 227 228bool UserContentURLPattern::matchesPath(const KURL& test) const 229{ 230 MatchTester match(m_path, test.path()); 231 return match.test(); 232} 233 234} // namespace WebCore 235