1/*
2 * Copyright (C) 2010 Google, Inc. All Rights Reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 *    notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 *    notice, this list of conditions and the following disclaimer in the
11 *    documentation and/or other materials provided with the distribution.
12 *
13 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR
17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 */
25
26#ifndef HTMLDocumentParser_h
27#define HTMLDocumentParser_h
28
29#include "CachedResourceClient.h"
30#include "FragmentScriptingPermission.h"
31#include "HTMLInputStream.h"
32#include "HTMLParserOptions.h"
33#include "HTMLPreloadScanner.h"
34#include "HTMLScriptRunnerHost.h"
35#include "HTMLSourceTracker.h"
36#include "HTMLToken.h"
37#include "HTMLTokenizer.h"
38#include "ScriptableDocumentParser.h"
39#include "SegmentedString.h"
40#include "XSSAuditor.h"
41#include "XSSAuditorDelegate.h"
42#include <wtf/Deque.h>
43#include <wtf/WeakPtr.h>
44#include <wtf/text/TextPosition.h>
45
46namespace WebCore {
47
48class BackgroundHTMLParser;
49class CompactHTMLToken;
50class Document;
51class DocumentFragment;
52class HTMLDocument;
53class HTMLParserScheduler;
54class HTMLScriptRunner;
55class HTMLTreeBuilder;
56class HTMLResourcePreloader;
57class ScriptController;
58class ScriptSourceCode;
59
60class PumpSession;
61
62class HTMLDocumentParser :  public ScriptableDocumentParser, HTMLScriptRunnerHost, CachedResourceClient {
63    WTF_MAKE_FAST_ALLOCATED;
64public:
65    static PassRefPtr<HTMLDocumentParser> create(HTMLDocument& document)
66    {
67        return adoptRef(new HTMLDocumentParser(document));
68    }
69    virtual ~HTMLDocumentParser();
70
71    // Exposed for HTMLParserScheduler
72    void resumeParsingAfterYield();
73
74    static void parseDocumentFragment(const String&, DocumentFragment&, Element* contextElement, ParserContentPolicy = AllowScriptingContent);
75
76    HTMLTokenizer* tokenizer() const { return m_tokenizer.get(); }
77
78    virtual TextPosition textPosition() const override;
79
80    virtual void suspendScheduledTasks() override;
81    virtual void resumeScheduledTasks() override;
82
83protected:
84    virtual void insert(const SegmentedString&) override;
85    virtual void append(PassRefPtr<StringImpl>) override;
86    virtual void finish() override;
87
88    explicit HTMLDocumentParser(HTMLDocument&);
89    HTMLDocumentParser(DocumentFragment&, Element* contextElement, ParserContentPolicy);
90
91    HTMLTreeBuilder* treeBuilder() const { return m_treeBuilder.get(); }
92
93    void forcePlaintextForTextDocument();
94
95private:
96    static PassRefPtr<HTMLDocumentParser> create(DocumentFragment& fragment, Element* contextElement, ParserContentPolicy parserContentPolicy)
97    {
98        return adoptRef(new HTMLDocumentParser(fragment, contextElement, parserContentPolicy));
99    }
100
101    // DocumentParser
102    virtual void detach() override;
103    virtual bool hasInsertionPoint() override;
104    virtual bool processingData() const override;
105    virtual void prepareToStopParsing() override;
106    virtual void stopParsing() override;
107    virtual bool isWaitingForScripts() const override;
108    virtual bool isExecutingScript() const override;
109    virtual void executeScriptsWaitingForStylesheets() override;
110
111    // HTMLScriptRunnerHost
112    virtual void watchForLoad(CachedResource*) override;
113    virtual void stopWatchingForLoad(CachedResource*) override;
114    virtual HTMLInputStream& inputStream() override { return m_input; }
115    virtual bool hasPreloadScanner() const override { return m_preloadScanner.get(); }
116    virtual void appendCurrentInputStreamToPreloadScannerAndScan() override;
117
118    // CachedResourceClient
119    virtual void notifyFinished(CachedResource*) override;
120
121    Document* contextForParsingSession();
122
123    enum SynchronousMode {
124        AllowYield,
125        ForceSynchronous,
126    };
127    bool canTakeNextToken(SynchronousMode, PumpSession&);
128    void pumpTokenizer(SynchronousMode);
129    void pumpTokenizerIfPossible(SynchronousMode);
130    void constructTreeFromHTMLToken(HTMLToken&);
131
132    void runScriptsForPausedTreeBuilder();
133    void resumeParsingAfterScriptExecution();
134
135    void attemptToEnd();
136    void endIfDelayed();
137    void attemptToRunDeferredScriptsAndEnd();
138    void end();
139
140    bool isParsingFragment() const;
141    bool isScheduledForResume() const;
142    bool inPumpSession() const { return m_pumpSessionNestingLevel > 0; }
143    bool shouldDelayEnd() const { return inPumpSession() || isWaitingForScripts() || isScheduledForResume() || isExecutingScript(); }
144
145    HTMLToken& token() { return *m_token.get(); }
146
147    HTMLParserOptions m_options;
148    HTMLInputStream m_input;
149
150    std::unique_ptr<HTMLToken> m_token;
151    std::unique_ptr<HTMLTokenizer> m_tokenizer;
152    std::unique_ptr<HTMLScriptRunner> m_scriptRunner;
153    std::unique_ptr<HTMLTreeBuilder> m_treeBuilder;
154    std::unique_ptr<HTMLPreloadScanner> m_preloadScanner;
155    std::unique_ptr<HTMLPreloadScanner> m_insertionPreloadScanner;
156    std::unique_ptr<HTMLParserScheduler> m_parserScheduler;
157    HTMLSourceTracker m_sourceTracker;
158    TextPosition m_textPosition;
159    XSSAuditor m_xssAuditor;
160    XSSAuditorDelegate m_xssAuditorDelegate;
161
162    std::unique_ptr<HTMLResourcePreloader> m_preloader;
163
164    bool m_endWasDelayed;
165    bool m_haveBackgroundParser;
166    unsigned m_pumpSessionNestingLevel;
167};
168
169}
170
171#endif
172