1/* 2 * Copyright (C) 2010 Google, Inc. All Rights Reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 1. Redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer. 9 * 2. Redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution. 12 * 13 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY 14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR 17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 */ 25 26#ifndef HTMLDocumentParser_h 27#define HTMLDocumentParser_h 28 29#include "CachedResourceClient.h" 30#include "FragmentScriptingPermission.h" 31#include "HTMLInputStream.h" 32#include "HTMLParserOptions.h" 33#include "HTMLPreloadScanner.h" 34#include "HTMLScriptRunnerHost.h" 35#include "HTMLSourceTracker.h" 36#include "HTMLToken.h" 37#include "HTMLTokenizer.h" 38#include "ScriptableDocumentParser.h" 39#include "SegmentedString.h" 40#include "XSSAuditor.h" 41#include "XSSAuditorDelegate.h" 42#include <wtf/Deque.h> 43#include <wtf/WeakPtr.h> 44#include <wtf/text/TextPosition.h> 45 46namespace WebCore { 47 48class BackgroundHTMLParser; 49class CompactHTMLToken; 50class Document; 51class DocumentFragment; 52class HTMLDocument; 53class HTMLParserScheduler; 54class HTMLScriptRunner; 55class HTMLTreeBuilder; 56class HTMLResourcePreloader; 57class ScriptController; 58class ScriptSourceCode; 59 60class PumpSession; 61 62class HTMLDocumentParser : public ScriptableDocumentParser, HTMLScriptRunnerHost, CachedResourceClient { 63 WTF_MAKE_FAST_ALLOCATED; 64public: 65 static PassRefPtr<HTMLDocumentParser> create(HTMLDocument& document) 66 { 67 return adoptRef(new HTMLDocumentParser(document)); 68 } 69 virtual ~HTMLDocumentParser(); 70 71 // Exposed for HTMLParserScheduler 72 void resumeParsingAfterYield(); 73 74 static void parseDocumentFragment(const String&, DocumentFragment&, Element* contextElement, ParserContentPolicy = AllowScriptingContent); 75 76 HTMLTokenizer* tokenizer() const { return m_tokenizer.get(); } 77 78 virtual TextPosition textPosition() const override; 79 80 virtual void suspendScheduledTasks() override; 81 virtual void resumeScheduledTasks() override; 82 83protected: 84 virtual void insert(const SegmentedString&) override; 85 virtual void append(PassRefPtr<StringImpl>) override; 86 virtual void finish() override; 87 88 explicit HTMLDocumentParser(HTMLDocument&); 89 HTMLDocumentParser(DocumentFragment&, Element* contextElement, ParserContentPolicy); 90 91 HTMLTreeBuilder* treeBuilder() const { return m_treeBuilder.get(); } 92 93 void forcePlaintextForTextDocument(); 94 95private: 96 static PassRefPtr<HTMLDocumentParser> create(DocumentFragment& fragment, Element* contextElement, ParserContentPolicy parserContentPolicy) 97 { 98 return adoptRef(new HTMLDocumentParser(fragment, contextElement, parserContentPolicy)); 99 } 100 101 // DocumentParser 102 virtual void detach() override; 103 virtual bool hasInsertionPoint() override; 104 virtual bool processingData() const override; 105 virtual void prepareToStopParsing() override; 106 virtual void stopParsing() override; 107 virtual bool isWaitingForScripts() const override; 108 virtual bool isExecutingScript() const override; 109 virtual void executeScriptsWaitingForStylesheets() override; 110 111 // HTMLScriptRunnerHost 112 virtual void watchForLoad(CachedResource*) override; 113 virtual void stopWatchingForLoad(CachedResource*) override; 114 virtual HTMLInputStream& inputStream() override { return m_input; } 115 virtual bool hasPreloadScanner() const override { return m_preloadScanner.get(); } 116 virtual void appendCurrentInputStreamToPreloadScannerAndScan() override; 117 118 // CachedResourceClient 119 virtual void notifyFinished(CachedResource*) override; 120 121 Document* contextForParsingSession(); 122 123 enum SynchronousMode { 124 AllowYield, 125 ForceSynchronous, 126 }; 127 bool canTakeNextToken(SynchronousMode, PumpSession&); 128 void pumpTokenizer(SynchronousMode); 129 void pumpTokenizerIfPossible(SynchronousMode); 130 void constructTreeFromHTMLToken(HTMLToken&); 131 132 void runScriptsForPausedTreeBuilder(); 133 void resumeParsingAfterScriptExecution(); 134 135 void attemptToEnd(); 136 void endIfDelayed(); 137 void attemptToRunDeferredScriptsAndEnd(); 138 void end(); 139 140 bool isParsingFragment() const; 141 bool isScheduledForResume() const; 142 bool inPumpSession() const { return m_pumpSessionNestingLevel > 0; } 143 bool shouldDelayEnd() const { return inPumpSession() || isWaitingForScripts() || isScheduledForResume() || isExecutingScript(); } 144 145 HTMLToken& token() { return *m_token.get(); } 146 147 HTMLParserOptions m_options; 148 HTMLInputStream m_input; 149 150 std::unique_ptr<HTMLToken> m_token; 151 std::unique_ptr<HTMLTokenizer> m_tokenizer; 152 std::unique_ptr<HTMLScriptRunner> m_scriptRunner; 153 std::unique_ptr<HTMLTreeBuilder> m_treeBuilder; 154 std::unique_ptr<HTMLPreloadScanner> m_preloadScanner; 155 std::unique_ptr<HTMLPreloadScanner> m_insertionPreloadScanner; 156 std::unique_ptr<HTMLParserScheduler> m_parserScheduler; 157 HTMLSourceTracker m_sourceTracker; 158 TextPosition m_textPosition; 159 XSSAuditor m_xssAuditor; 160 XSSAuditorDelegate m_xssAuditorDelegate; 161 162 std::unique_ptr<HTMLResourcePreloader> m_preloader; 163 164 bool m_endWasDelayed; 165 bool m_haveBackgroundParser; 166 unsigned m_pumpSessionNestingLevel; 167}; 168 169} 170 171#endif 172