1/*
2 * Copyright (C) 2010 Google, Inc. All Rights Reserved.
3 * Copyright (C) 2011 Apple Inc. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY GOOGLE INC. ``AS IS'' AND ANY
15 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL GOOGLE INC. OR
18 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26
27#ifndef HTMLTreeBuilder_h
28#define HTMLTreeBuilder_h
29
30#include "FragmentScriptingPermission.h"
31#include "HTMLConstructionSite.h"
32#include "HTMLElementStack.h"
33#include "HTMLFormattingElementList.h"
34#include "HTMLParserOptions.h"
35#include "HTMLTokenizer.h"
36#include <wtf/Noncopyable.h>
37#include <wtf/PassRefPtr.h>
38#include <wtf/RefPtr.h>
39#include <wtf/Vector.h>
40#include <wtf/text/StringBuilder.h>
41#include <wtf/text/TextPosition.h>
42
43namespace WebCore {
44
45class AtomicHTMLToken;
46class Document;
47class DocumentFragment;
48class Element;
49class Frame;
50class HTMLToken;
51class HTMLDocument;
52class Node;
53class HTMLDocumentParser;
54
55class HTMLTreeBuilder {
56    WTF_MAKE_NONCOPYABLE(HTMLTreeBuilder); WTF_MAKE_FAST_ALLOCATED;
57public:
58    HTMLTreeBuilder(HTMLDocumentParser&, HTMLDocument&, ParserContentPolicy, const HTMLParserOptions&);
59    HTMLTreeBuilder(HTMLDocumentParser&, DocumentFragment&, Element* contextElement, ParserContentPolicy, const HTMLParserOptions&);
60    ~HTMLTreeBuilder();
61
62    const HTMLElementStack* openElements() const { return m_tree.openElements(); }
63
64    bool isParsingFragment() const { return !!m_fragmentContext.fragment(); }
65#if ENABLE(TEMPLATE_ELEMENT)
66    bool isParsingTemplateContents() const { return m_tree.openElements()->hasTemplateInHTMLScope(); }
67#else
68    bool isParsingTemplateContents() const { return false; }
69#endif
70    bool isParsingFragmentOrTemplateContents() const { return isParsingFragment() || isParsingTemplateContents(); }
71
72    void detach();
73
74    void constructTree(AtomicHTMLToken*);
75
76    bool hasParserBlockingScript() const { return !!m_scriptToProcess; }
77    // Must be called to take the parser-blocking script before calling the parser again.
78    PassRefPtr<Element> takeScriptToProcess(TextPosition& scriptStartPosition);
79
80    // Done, close any open tags, etc.
81    void finished();
82
83    void setShouldSkipLeadingNewline(bool shouldSkip) { m_shouldSkipLeadingNewline = shouldSkip; }
84
85private:
86    class ExternalCharacterTokenBuffer;
87    // Represents HTML5 "insertion mode"
88    // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#insertion-mode
89    enum class InsertionMode {
90        Initial,
91        BeforeHTML,
92        BeforeHead,
93        InHead,
94        InHeadNoscript,
95        AfterHead,
96        TemplateContents,
97        InBody,
98        Text,
99        InTable,
100        InTableText,
101        InCaption,
102        InColumnGroup,
103        InTableBody,
104        InRow,
105        InCell,
106        InSelect,
107        InSelectInTable,
108        AfterBody,
109        InFrameset,
110        AfterFrameset,
111        AfterAfterBody,
112        AfterAfterFrameset,
113    };
114
115#if ENABLE(TELEPHONE_NUMBER_DETECTION)
116    void insertPhoneNumberLink(const String&);
117    void linkifyPhoneNumbers(const String&);
118#endif
119
120    void processToken(AtomicHTMLToken*);
121
122    void processDoctypeToken(AtomicHTMLToken*);
123    void processStartTag(AtomicHTMLToken*);
124    void processEndTag(AtomicHTMLToken*);
125    void processComment(AtomicHTMLToken*);
126    void processCharacter(AtomicHTMLToken*);
127    void processEndOfFile(AtomicHTMLToken*);
128
129    bool processStartTagForInHead(AtomicHTMLToken*);
130    void processStartTagForInBody(AtomicHTMLToken*);
131    void processStartTagForInTable(AtomicHTMLToken*);
132    void processEndTagForInBody(AtomicHTMLToken*);
133    void processEndTagForInTable(AtomicHTMLToken*);
134    void processEndTagForInTableBody(AtomicHTMLToken*);
135    void processEndTagForInRow(AtomicHTMLToken*);
136    void processEndTagForInCell(AtomicHTMLToken*);
137
138    void processIsindexStartTagForInBody(AtomicHTMLToken*);
139    void processHtmlStartTagForInBody(AtomicHTMLToken*);
140    bool processBodyEndTagForInBody(AtomicHTMLToken*);
141    bool processTableEndTagForInTable();
142    bool processCaptionEndTagForInCaption();
143    bool processColgroupEndTagForInColumnGroup();
144    bool processTrEndTagForInRow();
145    // FIXME: This function should be inlined into its one call site or it
146    // needs to assert which tokens it can be called with.
147    void processAnyOtherEndTagForInBody(AtomicHTMLToken*);
148
149    void processCharacterBuffer(ExternalCharacterTokenBuffer&);
150    inline void processCharacterBufferForInBody(ExternalCharacterTokenBuffer&);
151
152    void processFakeStartTag(const QualifiedName&, const Vector<Attribute>& attributes = Vector<Attribute>());
153    void processFakeEndTag(const QualifiedName&);
154    void processFakeEndTag(const AtomicString&);
155    void processFakeCharacters(const String&);
156    void processFakePEndTagIfPInButtonScope();
157
158    void processGenericRCDATAStartTag(AtomicHTMLToken*);
159    void processGenericRawTextStartTag(AtomicHTMLToken*);
160    void processScriptStartTag(AtomicHTMLToken*);
161
162    // Default processing for the different insertion modes.
163    void defaultForInitial();
164    void defaultForBeforeHTML();
165    void defaultForBeforeHead();
166    void defaultForInHead();
167    void defaultForInHeadNoscript();
168    void defaultForAfterHead();
169    void defaultForInTableText();
170
171    inline bool shouldProcessTokenInForeignContent(AtomicHTMLToken*);
172    void processTokenInForeignContent(AtomicHTMLToken*);
173
174    Vector<Attribute> attributesForIsindexInput(AtomicHTMLToken*);
175
176    void callTheAdoptionAgency(AtomicHTMLToken*);
177
178    void closeTheCell();
179
180    template <bool shouldClose(const HTMLStackItem*)>
181    void processCloseWhenNestedTag(AtomicHTMLToken*);
182
183    void parseError(AtomicHTMLToken*);
184
185    InsertionMode insertionMode() const { return m_insertionMode; }
186    void setInsertionMode(InsertionMode mode) { m_insertionMode = mode; }
187
188    void resetInsertionModeAppropriately();
189
190#if ENABLE(TEMPLATE_ELEMENT)
191    void processTemplateStartTag(AtomicHTMLToken*);
192    bool processTemplateEndTag(AtomicHTMLToken*);
193    bool processEndOfFileForInTemplateContents(AtomicHTMLToken*);
194#endif
195
196    class FragmentParsingContext {
197        WTF_MAKE_NONCOPYABLE(FragmentParsingContext);
198    public:
199        FragmentParsingContext();
200        FragmentParsingContext(DocumentFragment&, Element* contextElement);
201        ~FragmentParsingContext();
202
203        DocumentFragment* fragment() const { return m_fragment; }
204        Element* contextElement() const { ASSERT(m_fragment); return m_contextElement; }
205
206    private:
207        DocumentFragment* m_fragment;
208        Element* m_contextElement;
209    };
210
211    bool m_framesetOk;
212#ifndef NDEBUG
213    bool m_isAttached;
214#endif
215    FragmentParsingContext m_fragmentContext;
216    HTMLConstructionSite m_tree;
217
218    // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#insertion-mode
219    InsertionMode m_insertionMode;
220
221    // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#original-insertion-mode
222    InsertionMode m_originalInsertionMode;
223
224#if ENABLE(TEMPLATE_ELEMENT)
225    Vector<InsertionMode> m_templateInsertionModes;
226#endif
227
228    // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#pending-table-character-tokens
229    StringBuilder m_pendingTableCharacters;
230
231    bool m_shouldSkipLeadingNewline;
232
233    // We access parser because HTML5 spec requires that we be able to change the state of the tokenizer
234    // from within parser actions. We also need it to track the current position.
235    HTMLDocumentParser& m_parser;
236
237    RefPtr<Element> m_scriptToProcess; // <script> tag which needs processing before resuming the parser.
238    TextPosition m_scriptToProcessStartPosition; // Starting line number of the script tag needing processing.
239
240    HTMLParserOptions m_options;
241};
242
243}
244
245#endif
246