1/* 2 * Copyright (C) 2010 Google, Inc. All Rights Reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 1. Redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer. 9 * 2. Redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution. 12 * 13 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY 14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR 17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 */ 25 26#include "config.h" 27#include "HTMLDocumentParser.h" 28 29#include "AtomicHTMLToken.h" 30#include "BackgroundHTMLParser.h" 31#include "CompactHTMLToken.h" 32#include "ContentSecurityPolicy.h" 33#include "DocumentFragment.h" 34#include "DocumentLoader.h" 35#include "Element.h" 36#include "Frame.h" 37#include "HTMLIdentifier.h" 38#include "HTMLNames.h" 39#include "HTMLParserScheduler.h" 40#include "HTMLParserThread.h" 41#include "HTMLTokenizer.h" 42#include "HTMLPreloadScanner.h" 43#include "HTMLScriptRunner.h" 44#include "HTMLTreeBuilder.h" 45#include "HTMLDocument.h" 46#include "InspectorInstrumentation.h" 47#include "NestingLevelIncrementer.h" 48#include "Settings.h" 49#include <wtf/Functional.h> 50 51namespace WebCore { 52 53using namespace HTMLNames; 54 55// This is a direct transcription of step 4 from: 56// http://www.whatwg.org/specs/web-apps/current-work/multipage/the-end.html#fragment-case 57static HTMLTokenizer::State tokenizerStateForContextElement(Element* contextElement, bool reportErrors, const HTMLParserOptions& options) 58{ 59 if (!contextElement) 60 return HTMLTokenizer::DataState; 61 62 const QualifiedName& contextTag = contextElement->tagQName(); 63 64 if (contextTag.matches(titleTag) || contextTag.matches(textareaTag)) 65 return HTMLTokenizer::RCDATAState; 66 if (contextTag.matches(styleTag) 67 || contextTag.matches(xmpTag) 68 || contextTag.matches(iframeTag) 69 || (contextTag.matches(noembedTag) && options.pluginsEnabled) 70 || (contextTag.matches(noscriptTag) && options.scriptEnabled) 71 || contextTag.matches(noframesTag)) 72 return reportErrors ? HTMLTokenizer::RAWTEXTState : HTMLTokenizer::PLAINTEXTState; 73 if (contextTag.matches(scriptTag)) 74 return reportErrors ? HTMLTokenizer::ScriptDataState : HTMLTokenizer::PLAINTEXTState; 75 if (contextTag.matches(plaintextTag)) 76 return HTMLTokenizer::PLAINTEXTState; 77 return HTMLTokenizer::DataState; 78} 79 80HTMLDocumentParser::HTMLDocumentParser(HTMLDocument* document, bool reportErrors) 81 : ScriptableDocumentParser(document) 82 , m_options(document) 83 , m_token(m_options.useThreading ? nullptr : adoptPtr(new HTMLToken)) 84 , m_tokenizer(m_options.useThreading ? nullptr : HTMLTokenizer::create(m_options)) 85 , m_scriptRunner(HTMLScriptRunner::create(document, this)) 86 , m_treeBuilder(HTMLTreeBuilder::create(this, document, parserContentPolicy(), reportErrors, m_options)) 87 , m_parserScheduler(HTMLParserScheduler::create(this)) 88 , m_xssAuditorDelegate(document) 89#if ENABLE(THREADED_HTML_PARSER) 90 , m_weakFactory(this) 91#endif 92 , m_preloader(adoptPtr(new HTMLResourcePreloader(document))) 93 , m_isPinnedToMainThread(false) 94 , m_endWasDelayed(false) 95 , m_haveBackgroundParser(false) 96 , m_pumpSessionNestingLevel(0) 97{ 98 ASSERT(shouldUseThreading() || (m_token && m_tokenizer)); 99} 100 101// FIXME: Member variables should be grouped into self-initializing structs to 102// minimize code duplication between these constructors. 103HTMLDocumentParser::HTMLDocumentParser(DocumentFragment* fragment, Element* contextElement, ParserContentPolicy parserContentPolicy) 104 : ScriptableDocumentParser(fragment->document(), parserContentPolicy) 105 , m_options(fragment->document()) 106 , m_token(adoptPtr(new HTMLToken)) 107 , m_tokenizer(HTMLTokenizer::create(m_options)) 108 , m_treeBuilder(HTMLTreeBuilder::create(this, fragment, contextElement, this->parserContentPolicy(), m_options)) 109 , m_xssAuditorDelegate(fragment->document()) 110#if ENABLE(THREADED_HTML_PARSER) 111 , m_weakFactory(this) 112#endif 113 , m_isPinnedToMainThread(true) 114 , m_endWasDelayed(false) 115 , m_haveBackgroundParser(false) 116 , m_pumpSessionNestingLevel(0) 117{ 118 ASSERT(!shouldUseThreading()); 119 bool reportErrors = false; // For now document fragment parsing never reports errors. 120 m_tokenizer->setState(tokenizerStateForContextElement(contextElement, reportErrors, m_options)); 121 m_xssAuditor.initForFragment(); 122} 123 124HTMLDocumentParser::~HTMLDocumentParser() 125{ 126 ASSERT(!m_parserScheduler); 127 ASSERT(!m_pumpSessionNestingLevel); 128 ASSERT(!m_preloadScanner); 129 ASSERT(!m_insertionPreloadScanner); 130 ASSERT(!m_haveBackgroundParser); 131} 132 133#if ENABLE(THREADED_HTML_PARSER) 134void HTMLDocumentParser::pinToMainThread() 135{ 136 ASSERT(!m_haveBackgroundParser); 137 ASSERT(!m_isPinnedToMainThread); 138 m_isPinnedToMainThread = true; 139 if (!m_tokenizer) { 140 ASSERT(!m_token); 141 m_token = adoptPtr(new HTMLToken); 142 m_tokenizer = HTMLTokenizer::create(m_options); 143 } 144} 145#endif 146 147void HTMLDocumentParser::detach() 148{ 149#if ENABLE(THREADED_HTML_PARSER) 150 if (m_haveBackgroundParser) 151 stopBackgroundParser(); 152#endif 153 DocumentParser::detach(); 154 if (m_scriptRunner) 155 m_scriptRunner->detach(); 156 m_treeBuilder->detach(); 157 // FIXME: It seems wrong that we would have a preload scanner here. 158 // Yet during fast/dom/HTMLScriptElement/script-load-events.html we do. 159 m_preloadScanner.clear(); 160 m_insertionPreloadScanner.clear(); 161 m_parserScheduler.clear(); // Deleting the scheduler will clear any timers. 162} 163 164void HTMLDocumentParser::stopParsing() 165{ 166 DocumentParser::stopParsing(); 167 m_parserScheduler.clear(); // Deleting the scheduler will clear any timers. 168#if ENABLE(THREADED_HTML_PARSER) 169 if (m_haveBackgroundParser) 170 stopBackgroundParser(); 171#endif 172} 173 174// This kicks off "Once the user agent stops parsing" as described by: 175// http://www.whatwg.org/specs/web-apps/current-work/multipage/the-end.html#the-end 176void HTMLDocumentParser::prepareToStopParsing() 177{ 178 // FIXME: It may not be correct to disable this for the background parser. 179 // That means hasInsertionPoint() may not be correct in some cases. 180 ASSERT(!hasInsertionPoint() || m_haveBackgroundParser); 181 182 // pumpTokenizer can cause this parser to be detached from the Document, 183 // but we need to ensure it isn't deleted yet. 184 RefPtr<HTMLDocumentParser> protect(this); 185 186 // NOTE: This pump should only ever emit buffered character tokens, 187 // so ForceSynchronous vs. AllowYield should be meaningless. 188#if ENABLE(THREADED_HTML_PARSER) 189 if (m_tokenizer) { 190 ASSERT(!m_haveBackgroundParser); 191 pumpTokenizerIfPossible(ForceSynchronous); 192 } 193#else 194 pumpTokenizerIfPossible(ForceSynchronous); 195#endif 196 197 if (isStopped()) 198 return; 199 200 DocumentParser::prepareToStopParsing(); 201 202 // We will not have a scriptRunner when parsing a DocumentFragment. 203 if (m_scriptRunner) 204 document()->setReadyState(Document::Interactive); 205 206 // Setting the ready state above can fire mutation event and detach us 207 // from underneath. In that case, just bail out. 208 if (isDetached()) 209 return; 210 211 attemptToRunDeferredScriptsAndEnd(); 212} 213 214bool HTMLDocumentParser::isParsingFragment() const 215{ 216 return m_treeBuilder->isParsingFragment(); 217} 218 219bool HTMLDocumentParser::processingData() const 220{ 221 return isScheduledForResume() || inPumpSession() || m_haveBackgroundParser; 222} 223 224void HTMLDocumentParser::pumpTokenizerIfPossible(SynchronousMode mode) 225{ 226 if (isStopped() || isWaitingForScripts()) 227 return; 228 229 // Once a resume is scheduled, HTMLParserScheduler controls when we next pump. 230 if (isScheduledForResume()) { 231 ASSERT(mode == AllowYield); 232 return; 233 } 234 235 pumpTokenizer(mode); 236} 237 238bool HTMLDocumentParser::isScheduledForResume() const 239{ 240 return m_parserScheduler && m_parserScheduler->isScheduledForResume(); 241} 242 243// Used by HTMLParserScheduler 244void HTMLDocumentParser::resumeParsingAfterYield() 245{ 246 // pumpTokenizer can cause this parser to be detached from the Document, 247 // but we need to ensure it isn't deleted yet. 248 RefPtr<HTMLDocumentParser> protect(this); 249 250#if ENABLE(THREADED_HTML_PARSER) 251 if (m_haveBackgroundParser) { 252 pumpPendingSpeculations(); 253 return; 254 } 255#endif 256 257 // We should never be here unless we can pump immediately. Call pumpTokenizer() 258 // directly so that ASSERTS will fire if we're wrong. 259 pumpTokenizer(AllowYield); 260 endIfDelayed(); 261} 262 263void HTMLDocumentParser::runScriptsForPausedTreeBuilder() 264{ 265 ASSERT(scriptingContentIsAllowed(parserContentPolicy())); 266 267 TextPosition scriptStartPosition = TextPosition::belowRangePosition(); 268 RefPtr<Element> scriptElement = m_treeBuilder->takeScriptToProcess(scriptStartPosition); 269 // We will not have a scriptRunner when parsing a DocumentFragment. 270 if (m_scriptRunner) 271 m_scriptRunner->execute(scriptElement.release(), scriptStartPosition); 272} 273 274bool HTMLDocumentParser::canTakeNextToken(SynchronousMode mode, PumpSession& session) 275{ 276 if (isStopped()) 277 return false; 278 279 ASSERT(!m_haveBackgroundParser || mode == ForceSynchronous); 280 281 if (isWaitingForScripts()) { 282 if (mode == AllowYield) 283 m_parserScheduler->checkForYieldBeforeScript(session); 284 285 // If we don't run the script, we cannot allow the next token to be taken. 286 if (session.needsYield) 287 return false; 288 289 // If we're paused waiting for a script, we try to execute scripts before continuing. 290 runScriptsForPausedTreeBuilder(); 291 if (isWaitingForScripts() || isStopped()) 292 return false; 293 } 294 295 // FIXME: It's wrong for the HTMLDocumentParser to reach back to the 296 // Frame, but this approach is how the old parser handled 297 // stopping when the page assigns window.location. What really 298 // should happen is that assigning window.location causes the 299 // parser to stop parsing cleanly. The problem is we're not 300 // perpared to do that at every point where we run JavaScript. 301 if (!isParsingFragment() 302 && document()->frame() && document()->frame()->navigationScheduler()->locationChangePending()) 303 return false; 304 305 if (mode == AllowYield) 306 m_parserScheduler->checkForYieldBeforeToken(session); 307 308 return true; 309} 310 311#if ENABLE(THREADED_HTML_PARSER) 312 313void HTMLDocumentParser::didReceiveParsedChunkFromBackgroundParser(PassOwnPtr<ParsedChunk> chunk) 314{ 315 if (isWaitingForScripts() || !m_speculations.isEmpty()) { 316 m_preloader->takeAndPreload(chunk->preloads); 317 m_speculations.append(chunk); 318 return; 319 } 320 321 // processParsedChunkFromBackgroundParser can cause this parser to be detached from the Document, 322 // but we need to ensure it isn't deleted yet. 323 RefPtr<HTMLDocumentParser> protect(this); 324 325 InspectorInstrumentationCookie cookie = InspectorInstrumentation::willWriteHTML(document(), lineNumber().zeroBasedInt()); 326 327 ASSERT(m_speculations.isEmpty()); 328 chunk->preloads.clear(); // We don't need to preload because we're going to parse immediately. 329 processParsedChunkFromBackgroundParser(chunk); 330 331 InspectorInstrumentation::didWriteHTML(cookie, lineNumber().zeroBasedInt()); 332} 333 334void HTMLDocumentParser::validateSpeculations(PassOwnPtr<ParsedChunk> chunk) 335{ 336 ASSERT(chunk); 337 if (isWaitingForScripts()) { 338 // We're waiting on a network script, just save the chunk, we'll get 339 // a second validateSpeculations call after the script completes. 340 // This call should have been made immediately after runScriptsForPausedTreeBuilder 341 // which may have started a network load and left us waiting. 342 ASSERT(!m_lastChunkBeforeScript); 343 m_lastChunkBeforeScript = chunk; 344 return; 345 } 346 347 ASSERT(!m_lastChunkBeforeScript); 348 OwnPtr<HTMLTokenizer> tokenizer = m_tokenizer.release(); 349 OwnPtr<HTMLToken> token = m_token.release(); 350 351 if (!tokenizer) { 352 // There must not have been any changes to the HTMLTokenizer state on 353 // the main thread, which means the speculation buffer is correct. 354 return; 355 } 356 357 // Currently we're only smart enough to reuse the speculation buffer if the tokenizer 358 // both starts and ends in the DataState. That state is simplest because the HTMLToken 359 // is always in the Uninitialized state. We should consider whether we can reuse the 360 // speculation buffer in other states, but we'd likely need to do something more 361 // sophisticated with the HTMLToken. 362 if (chunk->tokenizerState == HTMLTokenizer::DataState 363 && tokenizer->state() == HTMLTokenizer::DataState 364 && m_input.current().isEmpty() 365 && chunk->treeBuilderState == HTMLTreeBuilderSimulator::stateFor(m_treeBuilder.get())) { 366 ASSERT(token->isUninitialized()); 367 return; 368 } 369 370 discardSpeculationsAndResumeFrom(chunk, token.release(), tokenizer.release()); 371} 372 373void HTMLDocumentParser::discardSpeculationsAndResumeFrom(PassOwnPtr<ParsedChunk> lastChunkBeforeScript, PassOwnPtr<HTMLToken> token, PassOwnPtr<HTMLTokenizer> tokenizer) 374{ 375 m_weakFactory.revokeAll(); 376 m_speculations.clear(); 377 378 OwnPtr<BackgroundHTMLParser::Checkpoint> checkpoint = adoptPtr(new BackgroundHTMLParser::Checkpoint); 379 checkpoint->parser = m_weakFactory.createWeakPtr(); 380 checkpoint->token = token; 381 checkpoint->tokenizer = tokenizer; 382 checkpoint->treeBuilderState = HTMLTreeBuilderSimulator::stateFor(m_treeBuilder.get()); 383 checkpoint->inputCheckpoint = lastChunkBeforeScript->inputCheckpoint; 384 checkpoint->preloadScannerCheckpoint = lastChunkBeforeScript->preloadScannerCheckpoint; 385 checkpoint->unparsedInput = m_input.current().toString().isolatedCopy(); 386 m_input.current().clear(); // FIXME: This should be passed in instead of cleared. 387 388 ASSERT(checkpoint->unparsedInput.isSafeToSendToAnotherThread()); 389 HTMLParserThread::shared()->postTask(bind(&BackgroundHTMLParser::resumeFrom, m_backgroundParser, checkpoint.release())); 390} 391 392void HTMLDocumentParser::processParsedChunkFromBackgroundParser(PassOwnPtr<ParsedChunk> popChunk) 393{ 394 // ASSERT that this object is both attached to the Document and protected. 395 ASSERT(refCount() >= 2); 396 ASSERT(shouldUseThreading()); 397 ASSERT(!m_tokenizer); 398 ASSERT(!m_token); 399 ASSERT(!m_lastChunkBeforeScript); 400 401 ActiveParserSession session(contextForParsingSession()); 402 OwnPtr<ParsedChunk> chunk(popChunk); 403 OwnPtr<CompactHTMLTokenStream> tokens = chunk->tokens.release(); 404 405 HTMLParserThread::shared()->postTask(bind(&BackgroundHTMLParser::startedChunkWithCheckpoint, m_backgroundParser, chunk->inputCheckpoint)); 406 407 for (XSSInfoStream::const_iterator it = chunk->xssInfos.begin(); it != chunk->xssInfos.end(); ++it) { 408 m_textPosition = (*it)->m_textPosition; 409 m_xssAuditorDelegate.didBlockScript(**it); 410 if (isStopped()) 411 break; 412 } 413 414 for (Vector<CompactHTMLToken>::const_iterator it = tokens->begin(); it != tokens->end(); ++it) { 415 ASSERT(!isWaitingForScripts()); 416 417 if (!isParsingFragment() 418 && document()->frame() && document()->frame()->navigationScheduler()->locationChangePending()) { 419 420 // To match main-thread parser behavior (which never checks locationChangePending on the EOF path) 421 // we peek to see if this chunk has an EOF and process it anyway. 422 if (tokens->last().type() == HTMLToken::EndOfFile) { 423 ASSERT(m_speculations.isEmpty()); 424 prepareToStopParsing(); 425 } 426 break; 427 } 428 429 m_textPosition = it->textPosition(); 430 431 constructTreeFromCompactHTMLToken(*it); 432 433 if (isStopped()) 434 break; 435 436 if (isWaitingForScripts()) { 437 ASSERT(it + 1 == tokens->end()); // The </script> is assumed to be the last token of this bunch. 438 runScriptsForPausedTreeBuilder(); 439 validateSpeculations(chunk.release()); 440 break; 441 } 442 443 if (it->type() == HTMLToken::EndOfFile) { 444 ASSERT(it + 1 == tokens->end()); // The EOF is assumed to be the last token of this bunch. 445 ASSERT(m_speculations.isEmpty()); 446 prepareToStopParsing(); 447 break; 448 } 449 450 ASSERT(!m_tokenizer); 451 ASSERT(!m_token); 452 } 453} 454 455void HTMLDocumentParser::pumpPendingSpeculations() 456{ 457 // FIXME: Share this constant with the parser scheduler. 458 const double parserTimeLimit = 0.500; 459 460 // ASSERT that this object is both attached to the Document and protected. 461 ASSERT(refCount() >= 2); 462 // If this assert fails, you need to call validateSpeculations to make sure 463 // m_tokenizer and m_token don't have state that invalidates m_speculations. 464 ASSERT(!m_tokenizer); 465 ASSERT(!m_token); 466 ASSERT(!m_lastChunkBeforeScript); 467 468 // FIXME: Pass in current input length. 469 InspectorInstrumentationCookie cookie = InspectorInstrumentation::willWriteHTML(document(), lineNumber().zeroBasedInt()); 470 471 double startTime = currentTime(); 472 473 while (!m_speculations.isEmpty()) { 474 processParsedChunkFromBackgroundParser(m_speculations.takeFirst()); 475 476 if (isWaitingForScripts() || isStopped()) 477 break; 478 479 if (currentTime() - startTime > parserTimeLimit && !m_speculations.isEmpty()) { 480 m_parserScheduler->scheduleForResume(); 481 break; 482 } 483 } 484 485 InspectorInstrumentation::didWriteHTML(cookie, lineNumber().zeroBasedInt()); 486} 487 488#endif // ENABLE(THREADED_HTML_PARSER) 489 490void HTMLDocumentParser::forcePlaintextForTextDocument() 491{ 492#if ENABLE(THREADED_HTML_PARSER) 493 if (shouldUseThreading()) { 494 // This method is called before any data is appended, so we have to start 495 // the background parser ourselves. 496 if (!m_haveBackgroundParser) 497 startBackgroundParser(); 498 499 HTMLParserThread::shared()->postTask(bind(&BackgroundHTMLParser::forcePlaintextForTextDocument, m_backgroundParser)); 500 } else 501#endif 502 m_tokenizer->setState(HTMLTokenizer::PLAINTEXTState); 503} 504 505Document* HTMLDocumentParser::contextForParsingSession() 506{ 507 // The parsing session should interact with the document only when parsing 508 // non-fragments. Otherwise, we might delay the load event mistakenly. 509 if (isParsingFragment()) 510 return 0; 511 return document(); 512} 513 514void HTMLDocumentParser::pumpTokenizer(SynchronousMode mode) 515{ 516 ASSERT(!isStopped()); 517 ASSERT(!isScheduledForResume()); 518 // ASSERT that this object is both attached to the Document and protected. 519 ASSERT(refCount() >= 2); 520 ASSERT(m_tokenizer); 521 ASSERT(m_token); 522 ASSERT(!m_haveBackgroundParser || mode == ForceSynchronous); 523 524 PumpSession session(m_pumpSessionNestingLevel, contextForParsingSession()); 525 526 // We tell the InspectorInstrumentation about every pump, even if we 527 // end up pumping nothing. It can filter out empty pumps itself. 528 // FIXME: m_input.current().length() is only accurate if we 529 // end up parsing the whole buffer in this pump. We should pass how 530 // much we parsed as part of didWriteHTML instead of willWriteHTML. 531 InspectorInstrumentationCookie cookie = InspectorInstrumentation::willWriteHTML(document(), m_input.current().currentLine().zeroBasedInt()); 532 533 m_xssAuditor.init(document(), &m_xssAuditorDelegate); 534 535 while (canTakeNextToken(mode, session) && !session.needsYield) { 536 if (!isParsingFragment()) 537 m_sourceTracker.start(m_input.current(), m_tokenizer.get(), token()); 538 539 if (!m_tokenizer->nextToken(m_input.current(), token())) 540 break; 541 542 if (!isParsingFragment()) { 543 m_sourceTracker.end(m_input.current(), m_tokenizer.get(), token()); 544 545 // We do not XSS filter innerHTML, which means we (intentionally) fail 546 // http/tests/security/xssAuditor/dom-write-innerHTML.html 547 if (OwnPtr<XSSInfo> xssInfo = m_xssAuditor.filterToken(FilterTokenRequest(token(), m_sourceTracker, m_tokenizer->shouldAllowCDATA()))) 548 m_xssAuditorDelegate.didBlockScript(*xssInfo); 549 } 550 551 constructTreeFromHTMLToken(token()); 552 ASSERT(token().isUninitialized()); 553 } 554 555 // Ensure we haven't been totally deref'ed after pumping. Any caller of this 556 // function should be holding a RefPtr to this to ensure we weren't deleted. 557 ASSERT(refCount() >= 1); 558 559 if (isStopped()) 560 return; 561 562 if (session.needsYield) 563 m_parserScheduler->scheduleForResume(); 564 565 if (isWaitingForScripts()) { 566 ASSERT(m_tokenizer->state() == HTMLTokenizer::DataState); 567 if (!m_preloadScanner) { 568 m_preloadScanner = adoptPtr(new HTMLPreloadScanner(m_options, document()->url())); 569 m_preloadScanner->appendToEnd(m_input.current()); 570 } 571 m_preloadScanner->scan(m_preloader.get(), document()->baseElementURL()); 572 } 573 574 InspectorInstrumentation::didWriteHTML(cookie, m_input.current().currentLine().zeroBasedInt()); 575} 576 577void HTMLDocumentParser::constructTreeFromHTMLToken(HTMLToken& rawToken) 578{ 579 AtomicHTMLToken token(rawToken); 580 581 // We clear the rawToken in case constructTreeFromAtomicToken 582 // synchronously re-enters the parser. We don't clear the token immedately 583 // for Character tokens because the AtomicHTMLToken avoids copying the 584 // characters by keeping a pointer to the underlying buffer in the 585 // HTMLToken. Fortunately, Character tokens can't cause us to re-enter 586 // the parser. 587 // 588 // FIXME: Stop clearing the rawToken once we start running the parser off 589 // the main thread or once we stop allowing synchronous JavaScript 590 // execution from parseAttribute. 591 if (rawToken.type() != HTMLToken::Character) 592 rawToken.clear(); 593 594 m_treeBuilder->constructTree(&token); 595 596 if (!rawToken.isUninitialized()) { 597 ASSERT(rawToken.type() == HTMLToken::Character); 598 rawToken.clear(); 599 } 600} 601 602#if ENABLE(THREADED_HTML_PARSER) 603 604void HTMLDocumentParser::constructTreeFromCompactHTMLToken(const CompactHTMLToken& compactToken) 605{ 606 AtomicHTMLToken token(compactToken); 607 m_treeBuilder->constructTree(&token); 608} 609 610#endif 611 612bool HTMLDocumentParser::hasInsertionPoint() 613{ 614 // FIXME: The wasCreatedByScript() branch here might not be fully correct. 615 // Our model of the EOF character differs slightly from the one in 616 // the spec because our treatment is uniform between network-sourced 617 // and script-sourced input streams whereas the spec treats them 618 // differently. 619 return m_input.hasInsertionPoint() || (wasCreatedByScript() && !m_input.haveSeenEndOfFile()); 620} 621 622void HTMLDocumentParser::insert(const SegmentedString& source) 623{ 624 if (isStopped()) 625 return; 626 627 // pumpTokenizer can cause this parser to be detached from the Document, 628 // but we need to ensure it isn't deleted yet. 629 RefPtr<HTMLDocumentParser> protect(this); 630 631#if ENABLE(THREADED_HTML_PARSER) 632 if (!m_tokenizer) { 633 ASSERT(!inPumpSession()); 634 ASSERT(m_haveBackgroundParser || wasCreatedByScript()); 635 m_token = adoptPtr(new HTMLToken); 636 m_tokenizer = HTMLTokenizer::create(m_options); 637 } 638#endif 639 640 SegmentedString excludedLineNumberSource(source); 641 excludedLineNumberSource.setExcludeLineNumbers(); 642 m_input.insertAtCurrentInsertionPoint(excludedLineNumberSource); 643 pumpTokenizerIfPossible(ForceSynchronous); 644 645 if (isWaitingForScripts()) { 646 // Check the document.write() output with a separate preload scanner as 647 // the main scanner can't deal with insertions. 648 if (!m_insertionPreloadScanner) 649 m_insertionPreloadScanner = adoptPtr(new HTMLPreloadScanner(m_options, document()->url())); 650 m_insertionPreloadScanner->appendToEnd(source); 651 m_insertionPreloadScanner->scan(m_preloader.get(), document()->baseElementURL()); 652 } 653 654 endIfDelayed(); 655} 656 657#if ENABLE(THREADED_HTML_PARSER) 658 659void HTMLDocumentParser::startBackgroundParser() 660{ 661 ASSERT(shouldUseThreading()); 662 ASSERT(!m_haveBackgroundParser); 663 m_haveBackgroundParser = true; 664 665 HTMLIdentifier::init(); 666 667 RefPtr<WeakReference<BackgroundHTMLParser> > reference = WeakReference<BackgroundHTMLParser>::createUnbound(); 668 m_backgroundParser = WeakPtr<BackgroundHTMLParser>(reference); 669 670 OwnPtr<BackgroundHTMLParser::Configuration> config = adoptPtr(new BackgroundHTMLParser::Configuration); 671 config->options = m_options; 672 config->parser = m_weakFactory.createWeakPtr(); 673 config->xssAuditor = adoptPtr(new XSSAuditor); 674 config->xssAuditor->init(document(), &m_xssAuditorDelegate); 675 config->preloadScanner = adoptPtr(new TokenPreloadScanner(document()->url().copy())); 676 677 ASSERT(config->xssAuditor->isSafeToSendToAnotherThread()); 678 ASSERT(config->preloadScanner->isSafeToSendToAnotherThread()); 679 HTMLParserThread::shared()->postTask(bind(&BackgroundHTMLParser::create, reference.release(), config.release())); 680} 681 682void HTMLDocumentParser::stopBackgroundParser() 683{ 684 ASSERT(shouldUseThreading()); 685 ASSERT(m_haveBackgroundParser); 686 m_haveBackgroundParser = false; 687 688 HTMLParserThread::shared()->postTask(bind(&BackgroundHTMLParser::stop, m_backgroundParser)); 689 m_weakFactory.revokeAll(); 690} 691 692#endif 693 694void HTMLDocumentParser::append(PassRefPtr<StringImpl> inputSource) 695{ 696 if (isStopped()) 697 return; 698 699#if ENABLE(THREADED_HTML_PARSER) 700 if (shouldUseThreading()) { 701 if (!m_haveBackgroundParser) 702 startBackgroundParser(); 703 704 ASSERT(inputSource->hasOneRef()); 705 Closure closure = bind(&BackgroundHTMLParser::append, m_backgroundParser, String(inputSource)); 706 // NOTE: Important that the String temporary is destroyed before we post the task 707 // otherwise the String could call deref() on a StringImpl now owned by the background parser. 708 // We would like to ASSERT(closure.arg3()->hasOneRef()) but sadly the args are private. 709 HTMLParserThread::shared()->postTask(closure); 710 return; 711 } 712#endif 713 714 // pumpTokenizer can cause this parser to be detached from the Document, 715 // but we need to ensure it isn't deleted yet. 716 RefPtr<HTMLDocumentParser> protect(this); 717 String source(inputSource); 718 719 if (m_preloadScanner) { 720 if (m_input.current().isEmpty() && !isWaitingForScripts()) { 721 // We have parsed until the end of the current input and so are now moving ahead of the preload scanner. 722 // Clear the scanner so we know to scan starting from the current input point if we block again. 723 m_preloadScanner.clear(); 724 } else { 725 m_preloadScanner->appendToEnd(source); 726 if (isWaitingForScripts()) 727 m_preloadScanner->scan(m_preloader.get(), document()->baseElementURL()); 728 } 729 } 730 731 m_input.appendToEnd(source); 732 733 if (inPumpSession()) { 734 // We've gotten data off the network in a nested write. 735 // We don't want to consume any more of the input stream now. Do 736 // not worry. We'll consume this data in a less-nested write(). 737 return; 738 } 739 740 pumpTokenizerIfPossible(AllowYield); 741 742 endIfDelayed(); 743} 744 745void HTMLDocumentParser::end() 746{ 747 ASSERT(!isDetached()); 748 ASSERT(!isScheduledForResume()); 749 750#if ENABLE(THREADED_HTML_PARSER) 751 if (m_haveBackgroundParser) 752 stopBackgroundParser(); 753#endif 754 755 // Informs the the rest of WebCore that parsing is really finished (and deletes this). 756 m_treeBuilder->finished(); 757} 758 759void HTMLDocumentParser::attemptToRunDeferredScriptsAndEnd() 760{ 761 ASSERT(isStopping()); 762 // FIXME: It may not be correct to disable this for the background parser. 763 // That means hasInsertionPoint() may not be correct in some cases. 764 ASSERT(!hasInsertionPoint() || m_haveBackgroundParser); 765 if (m_scriptRunner && !m_scriptRunner->executeScriptsWaitingForParsing()) 766 return; 767 end(); 768} 769 770void HTMLDocumentParser::attemptToEnd() 771{ 772 // finish() indicates we will not receive any more data. If we are waiting on 773 // an external script to load, we can't finish parsing quite yet. 774 775 if (shouldDelayEnd()) { 776 m_endWasDelayed = true; 777 return; 778 } 779 prepareToStopParsing(); 780} 781 782void HTMLDocumentParser::endIfDelayed() 783{ 784 // If we've already been detached, don't bother ending. 785 if (isDetached()) 786 return; 787 788 if (!m_endWasDelayed || shouldDelayEnd()) 789 return; 790 791 m_endWasDelayed = false; 792 prepareToStopParsing(); 793} 794 795void HTMLDocumentParser::finish() 796{ 797 // FIXME: We should ASSERT(!m_parserStopped) here, since it does not 798 // makes sense to call any methods on DocumentParser once it's been stopped. 799 // However, FrameLoader::stop calls DocumentParser::finish unconditionally. 800 801#if ENABLE(THREADED_HTML_PARSER) 802 // Empty documents never got an append() call, and thus have never started 803 // a background parser. In those cases, we ignore shouldUseThreading() 804 // and fall through to the non-threading case. 805 if (m_haveBackgroundParser) { 806 if (!m_input.haveSeenEndOfFile()) 807 m_input.closeWithoutMarkingEndOfFile(); 808 HTMLParserThread::shared()->postTask(bind(&BackgroundHTMLParser::finish, m_backgroundParser)); 809 return; 810 } 811 812 if (!m_tokenizer) { 813 ASSERT(!m_token); 814 // We're finishing before receiving any data. Rather than booting up 815 // the background parser just to spin it down, we finish parsing 816 // synchronously. 817 m_token = adoptPtr(new HTMLToken); 818 m_tokenizer = HTMLTokenizer::create(m_options); 819 } 820#endif 821 822 // We're not going to get any more data off the network, so we tell the 823 // input stream we've reached the end of file. finish() can be called more 824 // than once, if the first time does not call end(). 825 if (!m_input.haveSeenEndOfFile()) 826 m_input.markEndOfFile(); 827 828 attemptToEnd(); 829} 830 831bool HTMLDocumentParser::isExecutingScript() const 832{ 833 if (!m_scriptRunner) 834 return false; 835 return m_scriptRunner->isExecutingScript(); 836} 837 838OrdinalNumber HTMLDocumentParser::lineNumber() const 839{ 840#if ENABLE(THREADED_HTML_PARSER) 841 if (m_haveBackgroundParser) 842 return m_textPosition.m_line; 843#endif 844 845 return m_input.current().currentLine(); 846} 847 848TextPosition HTMLDocumentParser::textPosition() const 849{ 850#if ENABLE(THREADED_HTML_PARSER) 851 if (m_haveBackgroundParser) 852 return m_textPosition; 853#endif 854 855 const SegmentedString& currentString = m_input.current(); 856 OrdinalNumber line = currentString.currentLine(); 857 OrdinalNumber column = currentString.currentColumn(); 858 859 return TextPosition(line, column); 860} 861 862bool HTMLDocumentParser::isWaitingForScripts() const 863{ 864 // When the TreeBuilder encounters a </script> tag, it returns to the HTMLDocumentParser 865 // where the script is transfered from the treebuilder to the script runner. 866 // The script runner will hold the script until its loaded and run. During 867 // any of this time, we want to count ourselves as "waiting for a script" and thus 868 // run the preload scanner, as well as delay completion of parsing. 869 bool treeBuilderHasBlockingScript = m_treeBuilder->hasParserBlockingScript(); 870 bool scriptRunnerHasBlockingScript = m_scriptRunner && m_scriptRunner->hasParserBlockingScript(); 871 // Since the parser is paused while a script runner has a blocking script, it should 872 // never be possible to end up with both objects holding a blocking script. 873 ASSERT(!(treeBuilderHasBlockingScript && scriptRunnerHasBlockingScript)); 874 // If either object has a blocking script, the parser should be paused. 875 return treeBuilderHasBlockingScript || scriptRunnerHasBlockingScript; 876} 877 878void HTMLDocumentParser::resumeParsingAfterScriptExecution() 879{ 880 ASSERT(!isExecutingScript()); 881 ASSERT(!isWaitingForScripts()); 882 883#if ENABLE(THREADED_HTML_PARSER) 884 if (m_haveBackgroundParser) { 885 validateSpeculations(m_lastChunkBeforeScript.release()); 886 ASSERT(!m_lastChunkBeforeScript); 887 // processParsedChunkFromBackgroundParser can cause this parser to be detached from the Document, 888 // but we need to ensure it isn't deleted yet. 889 RefPtr<HTMLDocumentParser> protect(this); 890 pumpPendingSpeculations(); 891 return; 892 } 893#endif 894 895 m_insertionPreloadScanner.clear(); 896 pumpTokenizerIfPossible(AllowYield); 897 endIfDelayed(); 898} 899 900void HTMLDocumentParser::watchForLoad(CachedResource* cachedScript) 901{ 902 ASSERT(!cachedScript->isLoaded()); 903 // addClient would call notifyFinished if the load were complete. 904 // Callers do not expect to be re-entered from this call, so they should 905 // not an already-loaded CachedResource. 906 cachedScript->addClient(this); 907} 908 909void HTMLDocumentParser::stopWatchingForLoad(CachedResource* cachedScript) 910{ 911 cachedScript->removeClient(this); 912} 913 914void HTMLDocumentParser::appendCurrentInputStreamToPreloadScannerAndScan() 915{ 916 ASSERT(m_preloadScanner); 917 m_preloadScanner->appendToEnd(m_input.current()); 918 m_preloadScanner->scan(m_preloader.get(), document()->baseElementURL()); 919} 920 921void HTMLDocumentParser::notifyFinished(CachedResource* cachedResource) 922{ 923 // pumpTokenizer can cause this parser to be detached from the Document, 924 // but we need to ensure it isn't deleted yet. 925 RefPtr<HTMLDocumentParser> protect(this); 926 927 ASSERT(m_scriptRunner); 928 ASSERT(!isExecutingScript()); 929 if (isStopping()) { 930 attemptToRunDeferredScriptsAndEnd(); 931 return; 932 } 933 934 m_scriptRunner->executeScriptsWaitingForLoad(cachedResource); 935 if (!isWaitingForScripts()) 936 resumeParsingAfterScriptExecution(); 937} 938 939void HTMLDocumentParser::executeScriptsWaitingForStylesheets() 940{ 941 // Document only calls this when the Document owns the DocumentParser 942 // so this will not be called in the DocumentFragment case. 943 ASSERT(m_scriptRunner); 944 // Ignore calls unless we have a script blocking the parser waiting on a 945 // stylesheet load. Otherwise we are currently parsing and this 946 // is a re-entrant call from encountering a </ style> tag. 947 if (!m_scriptRunner->hasScriptsWaitingForStylesheets()) 948 return; 949 950 // pumpTokenizer can cause this parser to be detached from the Document, 951 // but we need to ensure it isn't deleted yet. 952 RefPtr<HTMLDocumentParser> protect(this); 953 m_scriptRunner->executeScriptsWaitingForStylesheets(); 954 if (!isWaitingForScripts()) 955 resumeParsingAfterScriptExecution(); 956} 957 958void HTMLDocumentParser::parseDocumentFragment(const String& source, DocumentFragment* fragment, Element* contextElement, ParserContentPolicy parserContentPolicy) 959{ 960 RefPtr<HTMLDocumentParser> parser = HTMLDocumentParser::create(fragment, contextElement, parserContentPolicy); 961 parser->insert(source); // Use insert() so that the parser will not yield. 962 parser->finish(); 963 ASSERT(!parser->processingData()); // Make sure we're done. <rdar://problem/3963151> 964 parser->detach(); // Allows ~DocumentParser to assert it was detached before destruction. 965} 966 967void HTMLDocumentParser::suspendScheduledTasks() 968{ 969 if (m_parserScheduler) 970 m_parserScheduler->suspend(); 971} 972 973void HTMLDocumentParser::resumeScheduledTasks() 974{ 975 if (m_parserScheduler) 976 m_parserScheduler->resume(); 977} 978 979} 980