1/* 2 * Copyright (C) 2010 Google, Inc. All Rights Reserved. 3 * Copyright (C) 2011 Apple Inc. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY GOOGLE INC. ``AS IS'' AND ANY 15 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 17 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GOOGLE INC. OR 18 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 19 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 20 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 21 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 22 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 24 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 */ 26 27#ifndef HTMLTreeBuilder_h 28#define HTMLTreeBuilder_h 29 30#include "FragmentScriptingPermission.h" 31#include "HTMLConstructionSite.h" 32#include "HTMLElementStack.h" 33#include "HTMLFormattingElementList.h" 34#include "HTMLParserOptions.h" 35#include "HTMLTokenizer.h" 36#include <wtf/Noncopyable.h> 37#include <wtf/PassRefPtr.h> 38#include <wtf/RefPtr.h> 39#include <wtf/Vector.h> 40#include <wtf/text/StringBuilder.h> 41#include <wtf/text/TextPosition.h> 42 43namespace WebCore { 44 45class AtomicHTMLToken; 46class Document; 47class DocumentFragment; 48class Element; 49class Frame; 50class HTMLToken; 51class HTMLDocument; 52class Node; 53class HTMLDocumentParser; 54 55class HTMLTreeBuilder { 56 WTF_MAKE_NONCOPYABLE(HTMLTreeBuilder); WTF_MAKE_FAST_ALLOCATED; 57public: 58 HTMLTreeBuilder(HTMLDocumentParser&, HTMLDocument&, ParserContentPolicy, const HTMLParserOptions&); 59 HTMLTreeBuilder(HTMLDocumentParser&, DocumentFragment&, Element* contextElement, ParserContentPolicy, const HTMLParserOptions&); 60 ~HTMLTreeBuilder(); 61 62 const HTMLElementStack* openElements() const { return m_tree.openElements(); } 63 64 bool isParsingFragment() const { return !!m_fragmentContext.fragment(); } 65#if ENABLE(TEMPLATE_ELEMENT) 66 bool isParsingTemplateContents() const { return m_tree.openElements()->hasTemplateInHTMLScope(); } 67#else 68 bool isParsingTemplateContents() const { return false; } 69#endif 70 bool isParsingFragmentOrTemplateContents() const { return isParsingFragment() || isParsingTemplateContents(); } 71 72 void detach(); 73 74 void constructTree(AtomicHTMLToken*); 75 76 bool hasParserBlockingScript() const { return !!m_scriptToProcess; } 77 // Must be called to take the parser-blocking script before calling the parser again. 78 PassRefPtr<Element> takeScriptToProcess(TextPosition& scriptStartPosition); 79 80 // Done, close any open tags, etc. 81 void finished(); 82 83 void setShouldSkipLeadingNewline(bool shouldSkip) { m_shouldSkipLeadingNewline = shouldSkip; } 84 85private: 86 class ExternalCharacterTokenBuffer; 87 // Represents HTML5 "insertion mode" 88 // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#insertion-mode 89 enum class InsertionMode { 90 Initial, 91 BeforeHTML, 92 BeforeHead, 93 InHead, 94 InHeadNoscript, 95 AfterHead, 96 TemplateContents, 97 InBody, 98 Text, 99 InTable, 100 InTableText, 101 InCaption, 102 InColumnGroup, 103 InTableBody, 104 InRow, 105 InCell, 106 InSelect, 107 InSelectInTable, 108 AfterBody, 109 InFrameset, 110 AfterFrameset, 111 AfterAfterBody, 112 AfterAfterFrameset, 113 }; 114 115#if ENABLE(TELEPHONE_NUMBER_DETECTION) 116 void insertPhoneNumberLink(const String&); 117 void linkifyPhoneNumbers(const String&); 118#endif 119 120 void processToken(AtomicHTMLToken*); 121 122 void processDoctypeToken(AtomicHTMLToken*); 123 void processStartTag(AtomicHTMLToken*); 124 void processEndTag(AtomicHTMLToken*); 125 void processComment(AtomicHTMLToken*); 126 void processCharacter(AtomicHTMLToken*); 127 void processEndOfFile(AtomicHTMLToken*); 128 129 bool processStartTagForInHead(AtomicHTMLToken*); 130 void processStartTagForInBody(AtomicHTMLToken*); 131 void processStartTagForInTable(AtomicHTMLToken*); 132 void processEndTagForInBody(AtomicHTMLToken*); 133 void processEndTagForInTable(AtomicHTMLToken*); 134 void processEndTagForInTableBody(AtomicHTMLToken*); 135 void processEndTagForInRow(AtomicHTMLToken*); 136 void processEndTagForInCell(AtomicHTMLToken*); 137 138 void processIsindexStartTagForInBody(AtomicHTMLToken*); 139 void processHtmlStartTagForInBody(AtomicHTMLToken*); 140 bool processBodyEndTagForInBody(AtomicHTMLToken*); 141 bool processTableEndTagForInTable(); 142 bool processCaptionEndTagForInCaption(); 143 bool processColgroupEndTagForInColumnGroup(); 144 bool processTrEndTagForInRow(); 145 // FIXME: This function should be inlined into its one call site or it 146 // needs to assert which tokens it can be called with. 147 void processAnyOtherEndTagForInBody(AtomicHTMLToken*); 148 149 void processCharacterBuffer(ExternalCharacterTokenBuffer&); 150 inline void processCharacterBufferForInBody(ExternalCharacterTokenBuffer&); 151 152 void processFakeStartTag(const QualifiedName&, const Vector<Attribute>& attributes = Vector<Attribute>()); 153 void processFakeEndTag(const QualifiedName&); 154 void processFakeEndTag(const AtomicString&); 155 void processFakeCharacters(const String&); 156 void processFakePEndTagIfPInButtonScope(); 157 158 void processGenericRCDATAStartTag(AtomicHTMLToken*); 159 void processGenericRawTextStartTag(AtomicHTMLToken*); 160 void processScriptStartTag(AtomicHTMLToken*); 161 162 // Default processing for the different insertion modes. 163 void defaultForInitial(); 164 void defaultForBeforeHTML(); 165 void defaultForBeforeHead(); 166 void defaultForInHead(); 167 void defaultForInHeadNoscript(); 168 void defaultForAfterHead(); 169 void defaultForInTableText(); 170 171 inline bool shouldProcessTokenInForeignContent(AtomicHTMLToken*); 172 void processTokenInForeignContent(AtomicHTMLToken*); 173 174 Vector<Attribute> attributesForIsindexInput(AtomicHTMLToken*); 175 176 void callTheAdoptionAgency(AtomicHTMLToken*); 177 178 void closeTheCell(); 179 180 template <bool shouldClose(const HTMLStackItem*)> 181 void processCloseWhenNestedTag(AtomicHTMLToken*); 182 183 void parseError(AtomicHTMLToken*); 184 185 InsertionMode insertionMode() const { return m_insertionMode; } 186 void setInsertionMode(InsertionMode mode) { m_insertionMode = mode; } 187 188 void resetInsertionModeAppropriately(); 189 190#if ENABLE(TEMPLATE_ELEMENT) 191 void processTemplateStartTag(AtomicHTMLToken*); 192 bool processTemplateEndTag(AtomicHTMLToken*); 193 bool processEndOfFileForInTemplateContents(AtomicHTMLToken*); 194#endif 195 196 class FragmentParsingContext { 197 WTF_MAKE_NONCOPYABLE(FragmentParsingContext); 198 public: 199 FragmentParsingContext(); 200 FragmentParsingContext(DocumentFragment&, Element* contextElement); 201 ~FragmentParsingContext(); 202 203 DocumentFragment* fragment() const { return m_fragment; } 204 Element* contextElement() const { ASSERT(m_fragment); return m_contextElement; } 205 206 private: 207 DocumentFragment* m_fragment; 208 Element* m_contextElement; 209 }; 210 211 bool m_framesetOk; 212#ifndef NDEBUG 213 bool m_isAttached; 214#endif 215 FragmentParsingContext m_fragmentContext; 216 HTMLConstructionSite m_tree; 217 218 // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#insertion-mode 219 InsertionMode m_insertionMode; 220 221 // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#original-insertion-mode 222 InsertionMode m_originalInsertionMode; 223 224#if ENABLE(TEMPLATE_ELEMENT) 225 Vector<InsertionMode> m_templateInsertionModes; 226#endif 227 228 // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#pending-table-character-tokens 229 StringBuilder m_pendingTableCharacters; 230 231 bool m_shouldSkipLeadingNewline; 232 233 // We access parser because HTML5 spec requires that we be able to change the state of the tokenizer 234 // from within parser actions. We also need it to track the current position. 235 HTMLDocumentParser& m_parser; 236 237 RefPtr<Element> m_scriptToProcess; // <script> tag which needs processing before resuming the parser. 238 TextPosition m_scriptToProcessStartPosition; // Starting line number of the script tag needing processing. 239 240 HTMLParserOptions m_options; 241}; 242 243} 244 245#endif 246