1/* 2 * Copyright (C) 2011 Adam Barth. All Rights Reserved. 3 * Copyright (C) 2011 Daniel Bates (dbates@intudata.com). 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY 15 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 17 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR 18 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 19 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 20 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 21 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 22 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 24 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 */ 26 27#include "config.h" 28#include "XSSAuditor.h" 29 30#include "Console.h" 31#include "ContentSecurityPolicy.h" 32#include "DOMWindow.h" 33#include "DecodeEscapeSequences.h" 34#include "Document.h" 35#include "DocumentLoader.h" 36#include "FormData.h" 37#include "FormDataList.h" 38#include "Frame.h" 39#include "FrameLoaderClient.h" 40#include "HTMLDocumentParser.h" 41#include "HTMLNames.h" 42#include "HTMLTokenizer.h" 43#include "HTMLParamElement.h" 44#include "HTMLParserIdioms.h" 45#include "InspectorInstrumentation.h" 46#include "InspectorValues.h" 47#include "KURL.h" 48#include "PingLoader.h" 49#include "Settings.h" 50#include "TextEncoding.h" 51#include "TextResourceDecoder.h" 52#include "XLinkNames.h" 53#include "XSSAuditorDelegate.h" 54 55#if ENABLE(SVG) 56#include "SVGNames.h" 57#endif 58 59#include <wtf/Functional.h> 60#include <wtf/MainThread.h> 61#include <wtf/text/CString.h> 62 63namespace WebCore { 64 65using namespace HTMLNames; 66 67static bool isNonCanonicalCharacter(UChar c) 68{ 69 // We remove all non-ASCII characters, including non-printable ASCII characters. 70 // 71 // Note, we don't remove backslashes like PHP stripslashes(), which among other things converts "\\0" to the \0 character. 72 // Instead, we remove backslashes and zeros (since the string "\\0" =(remove backslashes)=> "0"). However, this has the 73 // adverse effect that we remove any legitimate zeros from a string. 74 // 75 // For instance: new String("http://localhost:8000") => new String("http://localhost:8"). 76 return (c == '\\' || c == '0' || c == '\0' || c >= 127); 77} 78 79static String canonicalize(const String& string) 80{ 81 return string.removeCharacters(&isNonCanonicalCharacter); 82} 83 84static bool isRequiredForInjection(UChar c) 85{ 86 return (c == '\'' || c == '"' || c == '<' || c == '>'); 87} 88 89static bool isTerminatingCharacter(UChar c) 90{ 91 return (c == '&' || c == '/' || c == '"' || c == '\'' || c == '<' || c == '>' || c == ','); 92} 93 94static bool isHTMLQuote(UChar c) 95{ 96 return (c == '"' || c == '\''); 97} 98 99static bool isJSNewline(UChar c) 100{ 101 // Per ecma-262 section 7.3 Line Terminators. 102 return (c == '\n' || c == '\r' || c == 0x2028 || c == 0x2029); 103} 104 105static bool startsHTMLCommentAt(const String& string, size_t start) 106{ 107 return (start + 3 < string.length() && string[start] == '<' && string[start+1] == '!' && string[start+2] == '-' && string[start+3] == '-'); 108} 109 110static bool startsSingleLineCommentAt(const String& string, size_t start) 111{ 112 return (start + 1 < string.length() && string[start] == '/' && string[start+1] == '/'); 113} 114 115static bool startsMultiLineCommentAt(const String& string, size_t start) 116{ 117 return (start + 1 < string.length() && string[start] == '/' && string[start+1] == '*'); 118} 119 120// If other files need this, we should move this to HTMLParserIdioms.h 121template<size_t inlineCapacity> 122bool threadSafeMatch(const Vector<UChar, inlineCapacity>& vector, const QualifiedName& qname) 123{ 124 return equalIgnoringNullity(vector, qname.localName().impl()); 125} 126 127static bool hasName(const HTMLToken& token, const QualifiedName& name) 128{ 129 return threadSafeMatch(token.name(), name); 130} 131 132static bool findAttributeWithName(const HTMLToken& token, const QualifiedName& name, size_t& indexOfMatchingAttribute) 133{ 134 // Notice that we're careful not to ref the StringImpl here because we might be on a background thread. 135 const String& attrName = name.namespaceURI() == XLinkNames::xlinkNamespaceURI ? "xlink:" + name.localName().string() : name.localName().string(); 136 137 for (size_t i = 0; i < token.attributes().size(); ++i) { 138 if (equalIgnoringNullity(token.attributes().at(i).name, attrName)) { 139 indexOfMatchingAttribute = i; 140 return true; 141 } 142 } 143 return false; 144} 145 146static bool isNameOfInlineEventHandler(const Vector<UChar, 32>& name) 147{ 148 const size_t lengthOfShortestInlineEventHandlerName = 5; // To wit: oncut. 149 if (name.size() < lengthOfShortestInlineEventHandlerName) 150 return false; 151 return name[0] == 'o' && name[1] == 'n'; 152} 153 154static bool isDangerousHTTPEquiv(const String& value) 155{ 156 String equiv = value.stripWhiteSpace(); 157 return equalIgnoringCase(equiv, "refresh") || equalIgnoringCase(equiv, "set-cookie"); 158} 159 160static inline String decode16BitUnicodeEscapeSequences(const String& string) 161{ 162 // Note, the encoding is ignored since each %u-escape sequence represents a UTF-16 code unit. 163 return decodeEscapeSequences<Unicode16BitEscapeSequence>(string, UTF8Encoding()); 164} 165 166static inline String decodeStandardURLEscapeSequences(const String& string, const TextEncoding& encoding) 167{ 168 // We use decodeEscapeSequences() instead of decodeURLEscapeSequences() (declared in KURL.h) to 169 // avoid platform-specific URL decoding differences (e.g. KURLGoogle). 170 return decodeEscapeSequences<URLEscapeSequence>(string, encoding); 171} 172 173static String fullyDecodeString(const String& string, const TextEncoding& encoding) 174{ 175 size_t oldWorkingStringLength; 176 String workingString = string; 177 do { 178 oldWorkingStringLength = workingString.length(); 179 workingString = decode16BitUnicodeEscapeSequences(decodeStandardURLEscapeSequences(workingString, encoding)); 180 } while (workingString.length() < oldWorkingStringLength); 181 workingString.replace('+', ' '); 182 workingString = canonicalize(workingString); 183 return workingString; 184} 185 186static ContentSecurityPolicy::ReflectedXSSDisposition combineXSSProtectionHeaderAndCSP(ContentSecurityPolicy::ReflectedXSSDisposition xssProtection, ContentSecurityPolicy::ReflectedXSSDisposition reflectedXSS) 187{ 188 ContentSecurityPolicy::ReflectedXSSDisposition result = std::max(xssProtection, reflectedXSS); 189 190 if (result == ContentSecurityPolicy::ReflectedXSSInvalid || result == ContentSecurityPolicy::FilterReflectedXSS || result == ContentSecurityPolicy::ReflectedXSSUnset) 191 return ContentSecurityPolicy::FilterReflectedXSS; 192 193 return result; 194} 195 196static bool isSemicolonSeparatedAttribute(const HTMLToken::Attribute& attribute) 197{ 198#if ENABLE(SVG) 199 return threadSafeMatch(attribute.name, SVGNames::valuesAttr); 200#else 201 return false; 202#endif 203} 204 205static bool semicolonSeparatedValueContainsJavaScriptURL(const String& value) 206{ 207 Vector<String> valueList; 208 value.split(';', valueList); 209 for (size_t i = 0; i < valueList.size(); ++i) { 210 if (protocolIsJavaScript(valueList[i])) 211 return true; 212 } 213 return false; 214} 215 216XSSAuditor::XSSAuditor() 217 : m_isEnabled(false) 218 , m_xssProtection(ContentSecurityPolicy::FilterReflectedXSS) 219 , m_didSendValidCSPHeader(false) 220 , m_didSendValidXSSProtectionHeader(false) 221 , m_state(Uninitialized) 222 , m_scriptTagNestingLevel(0) 223 , m_encoding(UTF8Encoding()) 224{ 225 // Although tempting to call init() at this point, the various objects 226 // we want to reference might not all have been constructed yet. 227} 228 229void XSSAuditor::initForFragment() 230{ 231 ASSERT(isMainThread()); 232 ASSERT(m_state == Uninitialized); 233 m_state = Initialized; 234 // When parsing a fragment, we don't enable the XSS auditor because it's 235 // too much overhead. 236 ASSERT(!m_isEnabled); 237} 238 239void XSSAuditor::init(Document* document, XSSAuditorDelegate* auditorDelegate) 240{ 241 const size_t miniumLengthForSuffixTree = 512; // FIXME: Tune this parameter. 242 const int suffixTreeDepth = 5; 243 244 ASSERT(isMainThread()); 245 if (m_state == Initialized) 246 return; 247 ASSERT(m_state == Uninitialized); 248 m_state = Initialized; 249 250 if (Frame* frame = document->frame()) 251 if (Settings* settings = frame->settings()) 252 m_isEnabled = settings->xssAuditorEnabled(); 253 254 if (!m_isEnabled) 255 return; 256 257 m_documentURL = document->url().copy(); 258 259 // In theory, the Document could have detached from the Frame after the 260 // XSSAuditor was constructed. 261 if (!document->frame()) { 262 m_isEnabled = false; 263 return; 264 } 265 266 if (m_documentURL.isEmpty()) { 267 // The URL can be empty when opening a new browser window or calling window.open(""). 268 m_isEnabled = false; 269 return; 270 } 271 272 if (m_documentURL.protocolIsData()) { 273 m_isEnabled = false; 274 return; 275 } 276 277 if (document->decoder()) 278 m_encoding = document->decoder()->encoding(); 279 280 m_decodedURL = fullyDecodeString(m_documentURL.string(), m_encoding); 281 if (m_decodedURL.find(isRequiredForInjection) == notFound) 282 m_decodedURL = String(); 283 284 String httpBodyAsString; 285 if (DocumentLoader* documentLoader = document->frame()->loader()->documentLoader()) { 286 DEFINE_STATIC_LOCAL(String, XSSProtectionHeader, (ASCIILiteral("X-XSS-Protection"))); 287 String headerValue = documentLoader->response().httpHeaderField(XSSProtectionHeader); 288 String errorDetails; 289 unsigned errorPosition = 0; 290 String reportURL; 291 KURL xssProtectionReportURL; 292 293 // Process the X-XSS-Protection header, then mix in the CSP header's value. 294 ContentSecurityPolicy::ReflectedXSSDisposition xssProtectionHeader = parseXSSProtectionHeader(headerValue, errorDetails, errorPosition, reportURL); 295 m_didSendValidXSSProtectionHeader = xssProtectionHeader != ContentSecurityPolicy::ReflectedXSSUnset && xssProtectionHeader != ContentSecurityPolicy::ReflectedXSSInvalid; 296 if ((xssProtectionHeader == ContentSecurityPolicy::FilterReflectedXSS || xssProtectionHeader == ContentSecurityPolicy::BlockReflectedXSS) && !reportURL.isEmpty()) { 297 xssProtectionReportURL = document->completeURL(reportURL); 298 if (MixedContentChecker::isMixedContent(document->securityOrigin(), xssProtectionReportURL)) { 299 errorDetails = "insecure reporting URL for secure page"; 300 xssProtectionHeader = ContentSecurityPolicy::ReflectedXSSInvalid; 301 xssProtectionReportURL = KURL(); 302 } 303 } 304 if (xssProtectionHeader == ContentSecurityPolicy::ReflectedXSSInvalid) 305 document->addConsoleMessage(SecurityMessageSource, ErrorMessageLevel, "Error parsing header X-XSS-Protection: " + headerValue + ": " + errorDetails + " at character position " + String::format("%u", errorPosition) + ". The default protections will be applied."); 306 307 ContentSecurityPolicy::ReflectedXSSDisposition cspHeader = document->contentSecurityPolicy()->reflectedXSSDisposition(); 308 m_didSendValidCSPHeader = cspHeader != ContentSecurityPolicy::ReflectedXSSUnset && cspHeader != ContentSecurityPolicy::ReflectedXSSInvalid; 309 310 m_xssProtection = combineXSSProtectionHeaderAndCSP(xssProtectionHeader, cspHeader); 311 // FIXME: Combine the two report URLs in some reasonable way. 312 if (auditorDelegate) 313 auditorDelegate->setReportURL(xssProtectionReportURL.copy()); 314 FormData* httpBody = documentLoader->originalRequest().httpBody(); 315 if (httpBody && !httpBody->isEmpty()) { 316 httpBodyAsString = httpBody->flattenToString(); 317 if (!httpBodyAsString.isEmpty()) { 318 m_decodedHTTPBody = fullyDecodeString(httpBodyAsString, m_encoding); 319 if (m_decodedHTTPBody.find(isRequiredForInjection) == notFound) 320 m_decodedHTTPBody = String(); 321 if (m_decodedHTTPBody.length() >= miniumLengthForSuffixTree) 322 m_decodedHTTPBodySuffixTree = adoptPtr(new SuffixTree<ASCIICodebook>(m_decodedHTTPBody, suffixTreeDepth)); 323 } 324 } 325 } 326 327 if (m_decodedURL.isEmpty() && m_decodedHTTPBody.isEmpty()) { 328 m_isEnabled = false; 329 return; 330 } 331} 332 333PassOwnPtr<XSSInfo> XSSAuditor::filterToken(const FilterTokenRequest& request) 334{ 335 ASSERT(m_state == Initialized); 336 if (!m_isEnabled || m_xssProtection == ContentSecurityPolicy::AllowReflectedXSS) 337 return nullptr; 338 339 bool didBlockScript = false; 340 if (request.token.type() == HTMLToken::StartTag) 341 didBlockScript = filterStartToken(request); 342 else if (m_scriptTagNestingLevel) { 343 if (request.token.type() == HTMLToken::Character) 344 didBlockScript = filterCharacterToken(request); 345 else if (request.token.type() == HTMLToken::EndTag) 346 filterEndToken(request); 347 } 348 349 if (didBlockScript) { 350 bool didBlockEntirePage = (m_xssProtection == ContentSecurityPolicy::BlockReflectedXSS); 351 OwnPtr<XSSInfo> xssInfo = XSSInfo::create(didBlockEntirePage, m_didSendValidXSSProtectionHeader, m_didSendValidCSPHeader); 352 return xssInfo.release(); 353 } 354 return nullptr; 355} 356 357bool XSSAuditor::filterStartToken(const FilterTokenRequest& request) 358{ 359 bool didBlockScript = eraseDangerousAttributesIfInjected(request); 360 361 if (hasName(request.token, scriptTag)) { 362 didBlockScript |= filterScriptToken(request); 363 ASSERT(request.shouldAllowCDATA || !m_scriptTagNestingLevel); 364 m_scriptTagNestingLevel++; 365 } else if (hasName(request.token, objectTag)) 366 didBlockScript |= filterObjectToken(request); 367 else if (hasName(request.token, paramTag)) 368 didBlockScript |= filterParamToken(request); 369 else if (hasName(request.token, embedTag)) 370 didBlockScript |= filterEmbedToken(request); 371 else if (hasName(request.token, appletTag)) 372 didBlockScript |= filterAppletToken(request); 373 else if (hasName(request.token, iframeTag)) 374 didBlockScript |= filterIframeToken(request); 375 else if (hasName(request.token, metaTag)) 376 didBlockScript |= filterMetaToken(request); 377 else if (hasName(request.token, baseTag)) 378 didBlockScript |= filterBaseToken(request); 379 else if (hasName(request.token, formTag)) 380 didBlockScript |= filterFormToken(request); 381 else if (hasName(request.token, inputTag)) 382 didBlockScript |= filterInputToken(request); 383 else if (hasName(request.token, buttonTag)) 384 didBlockScript |= filterButtonToken(request); 385 386 return didBlockScript; 387} 388 389void XSSAuditor::filterEndToken(const FilterTokenRequest& request) 390{ 391 ASSERT(m_scriptTagNestingLevel); 392 if (hasName(request.token, scriptTag)) { 393 m_scriptTagNestingLevel--; 394 ASSERT(request.shouldAllowCDATA || !m_scriptTagNestingLevel); 395 } 396} 397 398bool XSSAuditor::filterCharacterToken(const FilterTokenRequest& request) 399{ 400 ASSERT(m_scriptTagNestingLevel); 401 if (isContainedInRequest(m_cachedDecodedSnippet) && isContainedInRequest(decodedSnippetForJavaScript(request))) { 402 request.token.eraseCharacters(); 403 request.token.appendToCharacter(' '); // Technically, character tokens can't be empty. 404 return true; 405 } 406 return false; 407} 408 409bool XSSAuditor::filterScriptToken(const FilterTokenRequest& request) 410{ 411 ASSERT(request.token.type() == HTMLToken::StartTag); 412 ASSERT(hasName(request.token, scriptTag)); 413 414 m_cachedDecodedSnippet = decodedSnippetForName(request); 415 416 bool didBlockScript = false; 417 if (isContainedInRequest(decodedSnippetForName(request))) { 418 didBlockScript |= eraseAttributeIfInjected(request, srcAttr, blankURL().string(), SrcLikeAttribute); 419 didBlockScript |= eraseAttributeIfInjected(request, XLinkNames::hrefAttr, blankURL().string(), SrcLikeAttribute); 420 } 421 422 return didBlockScript; 423} 424 425bool XSSAuditor::filterObjectToken(const FilterTokenRequest& request) 426{ 427 ASSERT(request.token.type() == HTMLToken::StartTag); 428 ASSERT(hasName(request.token, objectTag)); 429 430 bool didBlockScript = false; 431 if (isContainedInRequest(decodedSnippetForName(request))) { 432 didBlockScript |= eraseAttributeIfInjected(request, dataAttr, blankURL().string(), SrcLikeAttribute); 433 didBlockScript |= eraseAttributeIfInjected(request, typeAttr); 434 didBlockScript |= eraseAttributeIfInjected(request, classidAttr); 435 } 436 return didBlockScript; 437} 438 439bool XSSAuditor::filterParamToken(const FilterTokenRequest& request) 440{ 441 ASSERT(request.token.type() == HTMLToken::StartTag); 442 ASSERT(hasName(request.token, paramTag)); 443 444 size_t indexOfNameAttribute; 445 if (!findAttributeWithName(request.token, nameAttr, indexOfNameAttribute)) 446 return false; 447 448 const HTMLToken::Attribute& nameAttribute = request.token.attributes().at(indexOfNameAttribute); 449 if (!HTMLParamElement::isURLParameter(String(nameAttribute.value))) 450 return false; 451 452 return eraseAttributeIfInjected(request, valueAttr, blankURL().string(), SrcLikeAttribute); 453} 454 455bool XSSAuditor::filterEmbedToken(const FilterTokenRequest& request) 456{ 457 ASSERT(request.token.type() == HTMLToken::StartTag); 458 ASSERT(hasName(request.token, embedTag)); 459 460 bool didBlockScript = false; 461 if (isContainedInRequest(decodedSnippetForName(request))) { 462 didBlockScript |= eraseAttributeIfInjected(request, codeAttr, String(), SrcLikeAttribute); 463 didBlockScript |= eraseAttributeIfInjected(request, srcAttr, blankURL().string(), SrcLikeAttribute); 464 didBlockScript |= eraseAttributeIfInjected(request, typeAttr); 465 } 466 return didBlockScript; 467} 468 469bool XSSAuditor::filterAppletToken(const FilterTokenRequest& request) 470{ 471 ASSERT(request.token.type() == HTMLToken::StartTag); 472 ASSERT(hasName(request.token, appletTag)); 473 474 bool didBlockScript = false; 475 if (isContainedInRequest(decodedSnippetForName(request))) { 476 didBlockScript |= eraseAttributeIfInjected(request, codeAttr, String(), SrcLikeAttribute); 477 didBlockScript |= eraseAttributeIfInjected(request, objectAttr); 478 } 479 return didBlockScript; 480} 481 482bool XSSAuditor::filterIframeToken(const FilterTokenRequest& request) 483{ 484 ASSERT(request.token.type() == HTMLToken::StartTag); 485 ASSERT(hasName(request.token, iframeTag)); 486 487 bool didBlockScript = eraseAttributeIfInjected(request, srcdocAttr, String(), ScriptLikeAttribute); 488 if (isContainedInRequest(decodedSnippetForName(request))) 489 didBlockScript |= eraseAttributeIfInjected(request, srcAttr, String(), SrcLikeAttribute); 490 491 return didBlockScript; 492} 493 494bool XSSAuditor::filterMetaToken(const FilterTokenRequest& request) 495{ 496 ASSERT(request.token.type() == HTMLToken::StartTag); 497 ASSERT(hasName(request.token, metaTag)); 498 499 return eraseAttributeIfInjected(request, http_equivAttr); 500} 501 502bool XSSAuditor::filterBaseToken(const FilterTokenRequest& request) 503{ 504 ASSERT(request.token.type() == HTMLToken::StartTag); 505 ASSERT(hasName(request.token, baseTag)); 506 507 return eraseAttributeIfInjected(request, hrefAttr); 508} 509 510bool XSSAuditor::filterFormToken(const FilterTokenRequest& request) 511{ 512 ASSERT(request.token.type() == HTMLToken::StartTag); 513 ASSERT(hasName(request.token, formTag)); 514 515 return eraseAttributeIfInjected(request, actionAttr, blankURL().string()); 516} 517 518bool XSSAuditor::filterInputToken(const FilterTokenRequest& request) 519{ 520 ASSERT(request.token.type() == HTMLToken::StartTag); 521 ASSERT(hasName(request.token, inputTag)); 522 523 return eraseAttributeIfInjected(request, formactionAttr, blankURL().string(), SrcLikeAttribute); 524} 525 526bool XSSAuditor::filterButtonToken(const FilterTokenRequest& request) 527{ 528 ASSERT(request.token.type() == HTMLToken::StartTag); 529 ASSERT(hasName(request.token, buttonTag)); 530 531 return eraseAttributeIfInjected(request, formactionAttr, blankURL().string(), SrcLikeAttribute); 532} 533 534bool XSSAuditor::eraseDangerousAttributesIfInjected(const FilterTokenRequest& request) 535{ 536 DEFINE_STATIC_LOCAL(String, safeJavaScriptURL, (ASCIILiteral("javascript:void(0)"))); 537 538 bool didBlockScript = false; 539 for (size_t i = 0; i < request.token.attributes().size(); ++i) { 540 const HTMLToken::Attribute& attribute = request.token.attributes().at(i); 541 bool isInlineEventHandler = isNameOfInlineEventHandler(attribute.name); 542 // FIXME: It would be better if we didn't create a new String for every attribute in the document. 543 String strippedValue = stripLeadingAndTrailingHTMLSpaces(String(attribute.value)); 544 bool valueContainsJavaScriptURL = (!isInlineEventHandler && protocolIsJavaScript(strippedValue)) || (isSemicolonSeparatedAttribute(attribute) && semicolonSeparatedValueContainsJavaScriptURL(strippedValue)); 545 if (!isInlineEventHandler && !valueContainsJavaScriptURL) 546 continue; 547 if (!isContainedInRequest(decodedSnippetForAttribute(request, attribute, ScriptLikeAttribute))) 548 continue; 549 request.token.eraseValueOfAttribute(i); 550 if (valueContainsJavaScriptURL) 551 request.token.appendToAttributeValue(i, safeJavaScriptURL); 552 didBlockScript = true; 553 } 554 return didBlockScript; 555} 556 557bool XSSAuditor::eraseAttributeIfInjected(const FilterTokenRequest& request, const QualifiedName& attributeName, const String& replacementValue, AttributeKind treatment) 558{ 559 size_t indexOfAttribute = 0; 560 if (findAttributeWithName(request.token, attributeName, indexOfAttribute)) { 561 const HTMLToken::Attribute& attribute = request.token.attributes().at(indexOfAttribute); 562 if (isContainedInRequest(decodedSnippetForAttribute(request, attribute, treatment))) { 563 if (threadSafeMatch(attributeName, srcAttr) && isLikelySafeResource(String(attribute.value))) 564 return false; 565 if (threadSafeMatch(attributeName, http_equivAttr) && !isDangerousHTTPEquiv(String(attribute.value))) 566 return false; 567 request.token.eraseValueOfAttribute(indexOfAttribute); 568 if (!replacementValue.isEmpty()) 569 request.token.appendToAttributeValue(indexOfAttribute, replacementValue); 570 return true; 571 } 572 } 573 return false; 574} 575 576String XSSAuditor::decodedSnippetForName(const FilterTokenRequest& request) 577{ 578 // Grab a fixed number of characters equal to the length of the token's name plus one (to account for the "<"). 579 return fullyDecodeString(request.sourceTracker.sourceForToken(request.token), m_encoding).substring(0, request.token.name().size() + 1); 580} 581 582String XSSAuditor::decodedSnippetForAttribute(const FilterTokenRequest& request, const HTMLToken::Attribute& attribute, AttributeKind treatment) 583{ 584 // The range doesn't inlcude the character which terminates the value. So, 585 // for an input of |name="value"|, the snippet is |name="value|. For an 586 // unquoted input of |name=value |, the snippet is |name=value|. 587 // FIXME: We should grab one character before the name also. 588 int start = attribute.nameRange.start - request.token.startIndex(); 589 int end = attribute.valueRange.end - request.token.startIndex(); 590 String decodedSnippet = fullyDecodeString(request.sourceTracker.sourceForToken(request.token).substring(start, end - start), m_encoding); 591 decodedSnippet.truncate(kMaximumFragmentLengthTarget); 592 if (treatment == SrcLikeAttribute) { 593 int slashCount = 0; 594 bool commaSeen = false; 595 // In HTTP URLs, characters following the first ?, #, or third slash may come from 596 // the page itself and can be merely ignored by an attacker's server when a remote 597 // script or script-like resource is requested. In DATA URLS, the payload starts at 598 // the first comma, and the the first /*, //, or <!-- may introduce a comment. Characters 599 // following this may come from the page itself and may be ignored when the script is 600 // executed. For simplicity, we don't differentiate based on URL scheme, and stop at 601 // the first # or ?, the third slash, or the first slash or < once a comma is seen. 602 for (size_t currentLength = 0; currentLength < decodedSnippet.length(); ++currentLength) { 603 UChar currentChar = decodedSnippet[currentLength]; 604 if (currentChar == '?' 605 || currentChar == '#' 606 || ((currentChar == '/' || currentChar == '\\') && (commaSeen || ++slashCount > 2)) 607 || (currentChar == '<' && commaSeen)) { 608 decodedSnippet.truncate(currentLength); 609 break; 610 } 611 if (currentChar == ',') 612 commaSeen = true; 613 } 614 } else if (treatment == ScriptLikeAttribute) { 615 // Beware of trailing characters which came from the page itself, not the 616 // injected vector. Excluding the terminating character covers common cases 617 // where the page immediately ends the attribute, but doesn't cover more 618 // complex cases where there is other page data following the injection. 619 // Generally, these won't parse as javascript, so the injected vector 620 // typically excludes them from consideration via a single-line comment or 621 // by enclosing them in a string literal terminated later by the page's own 622 // closing punctuation. Since the snippet has not been parsed, the vector 623 // may also try to introduce these via entities. As a result, we'd like to 624 // stop before the first "//", the first <!--, the first entity, or the first 625 // quote not immediately following the first equals sign (taking whitespace 626 // into consideration). To keep things simpler, we don't try to distinguish 627 // between entity-introducing amperands vs. other uses, nor do we bother to 628 // check for a second slash for a comment, nor do we bother to check for 629 // !-- following a less-than sign. We stop instead on any ampersand 630 // slash, or less-than sign. 631 size_t position = 0; 632 if ((position = decodedSnippet.find("=")) != notFound 633 && (position = decodedSnippet.find(isNotHTMLSpace, position + 1)) != notFound 634 && (position = decodedSnippet.find(isTerminatingCharacter, isHTMLQuote(decodedSnippet[position]) ? position + 1 : position)) != notFound) { 635 decodedSnippet.truncate(position); 636 } 637 } 638 return decodedSnippet; 639} 640 641String XSSAuditor::decodedSnippetForJavaScript(const FilterTokenRequest& request) 642{ 643 String string = request.sourceTracker.sourceForToken(request.token); 644 size_t startPosition = 0; 645 size_t endPosition = string.length(); 646 size_t foundPosition = notFound; 647 648 // Skip over initial comments to find start of code. 649 while (startPosition < endPosition) { 650 while (startPosition < endPosition && isHTMLSpace(string[startPosition])) 651 startPosition++; 652 653 // Under SVG/XML rules, only HTML comment syntax matters and the parser returns 654 // these as a separate comment tokens. Having consumed whitespace, we need not look 655 // further for these. 656 if (request.shouldAllowCDATA) 657 break; 658 659 // Under HTML rules, both the HTML and JS comment synatx matters, and the HTML 660 // comment ends at the end of the line, not with -->. 661 if (startsHTMLCommentAt(string, startPosition) || startsSingleLineCommentAt(string, startPosition)) { 662 while (startPosition < endPosition && !isJSNewline(string[startPosition])) 663 startPosition++; 664 } else if (startsMultiLineCommentAt(string, startPosition)) { 665 if (startPosition + 2 < endPosition && (foundPosition = string.find("*/", startPosition + 2)) != notFound) 666 startPosition = foundPosition + 2; 667 else 668 startPosition = endPosition; 669 } else 670 break; 671 } 672 673 String result; 674 while (startPosition < endPosition && !result.length()) { 675 // Stop at next comment (using the same rules as above for SVG/XML vs HTML), when we 676 // encounter a comma, or when we exceed the maximum length target. The comma rule 677 // covers a common parameter concatenation case performed by some webservers. 678 // After hitting the length target, we can only stop at a point where we know we are 679 // not in the middle of a %-escape sequence. For the sake of simplicity, approximate 680 // not stopping inside a (possibly multiply encoded) %-esacpe sequence by breaking on 681 // whitespace only. We should have enough text in these cases to avoid false positives. 682 for (foundPosition = startPosition; foundPosition < endPosition; foundPosition++) { 683 if (!request.shouldAllowCDATA) { 684 if (startsSingleLineCommentAt(string, foundPosition) || startsMultiLineCommentAt(string, foundPosition)) { 685 foundPosition += 2; 686 break; 687 } 688 if (startsHTMLCommentAt(string, foundPosition)) { 689 foundPosition += 4; 690 break; 691 } 692 } 693 if (string[foundPosition] == ',' || (foundPosition > startPosition + kMaximumFragmentLengthTarget && isHTMLSpace(string[foundPosition]))) { 694 break; 695 } 696 } 697 698 result = fullyDecodeString(string.substring(startPosition, foundPosition - startPosition), m_encoding); 699 startPosition = foundPosition + 1; 700 } 701 return result; 702} 703 704bool XSSAuditor::isContainedInRequest(const String& decodedSnippet) 705{ 706 if (decodedSnippet.isEmpty()) 707 return false; 708 if (m_decodedURL.find(decodedSnippet, 0, false) != notFound) 709 return true; 710 if (m_decodedHTTPBodySuffixTree && !m_decodedHTTPBodySuffixTree->mightContain(decodedSnippet)) 711 return false; 712 return m_decodedHTTPBody.find(decodedSnippet, 0, false) != notFound; 713} 714 715bool XSSAuditor::isLikelySafeResource(const String& url) 716{ 717 // Give empty URLs and about:blank a pass. Making a resourceURL from an 718 // empty string below will likely later fail the "no query args test" as 719 // it inherits the document's query args. 720 if (url.isEmpty() || url == blankURL().string()) 721 return true; 722 723 // If the resource is loaded from the same host as the enclosing page, it's 724 // probably not an XSS attack, so we reduce false positives by allowing the 725 // request, ignoring scheme and port considerations. If the resource has a 726 // query string, we're more suspicious, however, because that's pretty rare 727 // and the attacker might be able to trick a server-side script into doing 728 // something dangerous with the query string. 729 if (m_documentURL.host().isEmpty()) 730 return false; 731 732 KURL resourceURL(m_documentURL, url); 733 return (m_documentURL.host() == resourceURL.host() && resourceURL.query().isEmpty()); 734} 735 736bool XSSAuditor::isSafeToSendToAnotherThread() const 737{ 738 return m_documentURL.isSafeToSendToAnotherThread() 739 && m_decodedURL.isSafeToSendToAnotherThread() 740 && m_decodedHTTPBody.isSafeToSendToAnotherThread() 741 && m_cachedDecodedSnippet.isSafeToSendToAnotherThread(); 742} 743 744} // namespace WebCore 745