1/* 2 * Copyright (C) 2013 Google, Inc. All Rights Reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 1. Redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer. 9 * 2. Redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution. 12 * 13 * THIS SOFTWARE IS PROVIDED BY GOOGLE INC. ``AS IS'' AND ANY 14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GOOGLE INC. OR 17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 */ 25 26#include "config.h" 27 28#if ENABLE(THREADED_HTML_PARSER) 29 30#include "BackgroundHTMLParser.h" 31 32#include "HTMLDocumentParser.h" 33#include "HTMLParserIdioms.h" 34#include "HTMLParserThread.h" 35#include "HTMLTokenizer.h" 36#include "XSSAuditor.h" 37#include <wtf/MainThread.h> 38#include <wtf/text/TextPosition.h> 39 40namespace WebCore { 41 42// On a network with high latency and high bandwidth, using a device 43// with a fast CPU, we could end up speculatively tokenizing 44// the whole document, well ahead of when the main-thread actually needs it. 45// This is a waste of memory (and potentially time if the speculation fails). 46// So we limit our outstanding speculations arbitrarily to 10. 47// Our maximal memory spent speculating will be approximately: 48// outstandingCheckpointLimit * pendingTokenLimit * sizeof(CompactToken) 49// We use a separate low and high water mark to avoid constantly topping 50// off the main thread's token buffer. 51// At time of writing, this is 10 * 1000 * 28 bytes = appox 280kb of memory. 52// These numbers have not been tuned. 53static const size_t outstandingCheckpointLimit = 10; 54 55// We limit our chucks to 1000 tokens, to make sure the main 56// thread is never waiting on the parser thread for tokens. 57// This was tuned in https://bugs.webkit.org/show_bug.cgi?id=110408. 58static const size_t pendingTokenLimit = 1000; 59 60using namespace HTMLNames; 61 62#ifndef NDEBUG 63 64static void checkThatTokensAreSafeToSendToAnotherThread(const CompactHTMLTokenStream* tokens) 65{ 66 for (size_t i = 0; i < tokens->size(); ++i) 67 ASSERT(tokens->at(i).isSafeToSendToAnotherThread()); 68} 69 70static void checkThatPreloadsAreSafeToSendToAnotherThread(const PreloadRequestStream& preloads) 71{ 72 for (size_t i = 0; i < preloads.size(); ++i) 73 ASSERT(preloads[i]->isSafeToSendToAnotherThread()); 74} 75 76#endif 77 78BackgroundHTMLParser::BackgroundHTMLParser(PassRefPtr<WeakReference<BackgroundHTMLParser> > reference, PassOwnPtr<Configuration> config) 79 : m_weakFactory(reference, this) 80 , m_token(adoptPtr(new HTMLToken)) 81 , m_tokenizer(HTMLTokenizer::create(config->options)) 82 , m_treeBuilderSimulator(config->options) 83 , m_options(config->options) 84 , m_parser(config->parser) 85 , m_pendingTokens(adoptPtr(new CompactHTMLTokenStream)) 86 , m_xssAuditor(config->xssAuditor.release()) 87 , m_preloadScanner(config->preloadScanner.release()) 88{ 89} 90 91void BackgroundHTMLParser::append(const String& input) 92{ 93 ASSERT(!m_input.current().isClosed()); 94 m_input.append(input); 95 pumpTokenizer(); 96} 97 98void BackgroundHTMLParser::resumeFrom(PassOwnPtr<Checkpoint> checkpoint) 99{ 100 m_parser = checkpoint->parser; 101 m_token = checkpoint->token.release(); 102 m_tokenizer = checkpoint->tokenizer.release(); 103 m_treeBuilderSimulator.setState(checkpoint->treeBuilderState); 104 m_input.rewindTo(checkpoint->inputCheckpoint, checkpoint->unparsedInput); 105 m_preloadScanner->rewindTo(checkpoint->preloadScannerCheckpoint); 106 pumpTokenizer(); 107} 108 109void BackgroundHTMLParser::startedChunkWithCheckpoint(HTMLInputCheckpoint inputCheckpoint) 110{ 111 // Note, we should not have to worry about the index being invalid 112 // as messages from the main thread will be processed in FIFO order. 113 m_input.invalidateCheckpointsBefore(inputCheckpoint); 114 pumpTokenizer(); 115} 116 117void BackgroundHTMLParser::finish() 118{ 119 markEndOfFile(); 120 pumpTokenizer(); 121} 122 123void BackgroundHTMLParser::stop() 124{ 125 delete this; 126} 127 128void BackgroundHTMLParser::forcePlaintextForTextDocument() 129{ 130 // This is only used by the TextDocumentParser (a subclass of HTMLDocumentParser) 131 // to force us into the PLAINTEXT state w/o using a <plaintext> tag. 132 // The TextDocumentParser uses a <pre> tag for historical/compatibility reasons. 133 m_tokenizer->setState(HTMLTokenizer::PLAINTEXTState); 134} 135 136void BackgroundHTMLParser::markEndOfFile() 137{ 138 ASSERT(!m_input.current().isClosed()); 139 m_input.append(String(&kEndOfFileMarker, 1)); 140 m_input.close(); 141} 142 143void BackgroundHTMLParser::pumpTokenizer() 144{ 145 // No need to start speculating until the main thread has almost caught up. 146 if (m_input.outstandingCheckpointCount() > outstandingCheckpointLimit) 147 return; 148 149 while (true) { 150 m_sourceTracker.start(m_input.current(), m_tokenizer.get(), *m_token); 151 if (!m_tokenizer->nextToken(m_input.current(), *m_token.get())) { 152 // We've reached the end of our current input. 153 sendTokensToMainThread(); 154 break; 155 } 156 m_sourceTracker.end(m_input.current(), m_tokenizer.get(), *m_token); 157 158 { 159 TextPosition position = TextPosition(m_input.current().currentLine(), m_input.current().currentColumn()); 160 161 if (OwnPtr<XSSInfo> xssInfo = m_xssAuditor->filterToken(FilterTokenRequest(*m_token, m_sourceTracker, m_tokenizer->shouldAllowCDATA()))) { 162 xssInfo->m_textPosition = position; 163 m_pendingXSSInfos.append(xssInfo.release()); 164 } 165 166 CompactHTMLToken token(m_token.get(), TextPosition(m_input.current().currentLine(), m_input.current().currentColumn())); 167 168 m_preloadScanner->scan(token, m_pendingPreloads); 169 170 m_pendingTokens->append(token); 171 } 172 173 m_token->clear(); 174 175 if (!m_treeBuilderSimulator.simulate(m_pendingTokens->last(), m_tokenizer.get()) || m_pendingTokens->size() >= pendingTokenLimit) { 176 sendTokensToMainThread(); 177 // If we're far ahead of the main thread, yield for a bit to avoid consuming too much memory. 178 if (m_input.outstandingCheckpointCount() > outstandingCheckpointLimit) 179 break; 180 } 181 } 182} 183 184void BackgroundHTMLParser::sendTokensToMainThread() 185{ 186 if (m_pendingTokens->isEmpty()) 187 return; 188 189#ifndef NDEBUG 190 checkThatTokensAreSafeToSendToAnotherThread(m_pendingTokens.get()); 191 checkThatPreloadsAreSafeToSendToAnotherThread(m_pendingPreloads); 192#endif 193 194 OwnPtr<HTMLDocumentParser::ParsedChunk> chunk = adoptPtr(new HTMLDocumentParser::ParsedChunk); 195 chunk->tokens = m_pendingTokens.release(); 196 chunk->preloads.swap(m_pendingPreloads); 197 chunk->xssInfos.swap(m_pendingXSSInfos); 198 chunk->tokenizerState = m_tokenizer->state(); 199 chunk->treeBuilderState = m_treeBuilderSimulator.state(); 200 chunk->inputCheckpoint = m_input.createCheckpoint(); 201 chunk->preloadScannerCheckpoint = m_preloadScanner->createCheckpoint(); 202 callOnMainThread(bind(&HTMLDocumentParser::didReceiveParsedChunkFromBackgroundParser, m_parser, chunk.release())); 203 204 m_pendingTokens = adoptPtr(new CompactHTMLTokenStream); 205} 206 207} 208 209#endif // ENABLE(THREADED_HTML_PARSER) 210