1/* 2 * Copyright (C) 2013 Google, Inc. All Rights Reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 1. Redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer. 9 * 2. Redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution. 12 * 13 * THIS SOFTWARE IS PROVIDED BY GOOGLE INC. ``AS IS'' AND ANY 14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GOOGLE INC. OR 17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 */ 25 26#include "config.h" 27 28#if ENABLE(THREADED_HTML_PARSER) 29 30#include "HTMLTreeBuilderSimulator.h" 31 32#include "HTMLDocumentParser.h" 33#include "HTMLNames.h" 34#include "HTMLParserIdioms.h" 35#include "HTMLTokenizer.h" 36#include "HTMLTreeBuilder.h" 37#include "MathMLNames.h" 38#include "SVGNames.h" 39 40namespace WebCore { 41 42using namespace HTMLNames; 43 44static bool tokenExitsForeignContent(const CompactHTMLToken& token) 45{ 46 // FIXME: This is copied from HTMLTreeBuilder::processTokenInForeignContent and changed to use threadSafeHTMLNamesMatch. 47 const HTMLIdentifier& tagName = token.data(); 48 return threadSafeHTMLNamesMatch(tagName, bTag) 49 || threadSafeHTMLNamesMatch(tagName, bigTag) 50 || threadSafeHTMLNamesMatch(tagName, blockquoteTag) 51 || threadSafeHTMLNamesMatch(tagName, bodyTag) 52 || threadSafeHTMLNamesMatch(tagName, brTag) 53 || threadSafeHTMLNamesMatch(tagName, centerTag) 54 || threadSafeHTMLNamesMatch(tagName, codeTag) 55 || threadSafeHTMLNamesMatch(tagName, ddTag) 56 || threadSafeHTMLNamesMatch(tagName, divTag) 57 || threadSafeHTMLNamesMatch(tagName, dlTag) 58 || threadSafeHTMLNamesMatch(tagName, dtTag) 59 || threadSafeHTMLNamesMatch(tagName, emTag) 60 || threadSafeHTMLNamesMatch(tagName, embedTag) 61 || threadSafeHTMLNamesMatch(tagName, h1Tag) 62 || threadSafeHTMLNamesMatch(tagName, h2Tag) 63 || threadSafeHTMLNamesMatch(tagName, h3Tag) 64 || threadSafeHTMLNamesMatch(tagName, h4Tag) 65 || threadSafeHTMLNamesMatch(tagName, h5Tag) 66 || threadSafeHTMLNamesMatch(tagName, h6Tag) 67 || threadSafeHTMLNamesMatch(tagName, headTag) 68 || threadSafeHTMLNamesMatch(tagName, hrTag) 69 || threadSafeHTMLNamesMatch(tagName, iTag) 70 || threadSafeHTMLNamesMatch(tagName, imgTag) 71 || threadSafeHTMLNamesMatch(tagName, liTag) 72 || threadSafeHTMLNamesMatch(tagName, listingTag) 73 || threadSafeHTMLNamesMatch(tagName, menuTag) 74 || threadSafeHTMLNamesMatch(tagName, metaTag) 75 || threadSafeHTMLNamesMatch(tagName, nobrTag) 76 || threadSafeHTMLNamesMatch(tagName, olTag) 77 || threadSafeHTMLNamesMatch(tagName, pTag) 78 || threadSafeHTMLNamesMatch(tagName, preTag) 79 || threadSafeHTMLNamesMatch(tagName, rubyTag) 80 || threadSafeHTMLNamesMatch(tagName, sTag) 81 || threadSafeHTMLNamesMatch(tagName, smallTag) 82 || threadSafeHTMLNamesMatch(tagName, spanTag) 83 || threadSafeHTMLNamesMatch(tagName, strongTag) 84 || threadSafeHTMLNamesMatch(tagName, strikeTag) 85 || threadSafeHTMLNamesMatch(tagName, subTag) 86 || threadSafeHTMLNamesMatch(tagName, supTag) 87 || threadSafeHTMLNamesMatch(tagName, tableTag) 88 || threadSafeHTMLNamesMatch(tagName, ttTag) 89 || threadSafeHTMLNamesMatch(tagName, uTag) 90 || threadSafeHTMLNamesMatch(tagName, ulTag) 91 || threadSafeHTMLNamesMatch(tagName, varTag) 92 || (threadSafeHTMLNamesMatch(tagName, fontTag) && (token.getAttributeItem(colorAttr) || token.getAttributeItem(faceAttr) || token.getAttributeItem(sizeAttr))); 93} 94 95static bool tokenExitsSVG(const CompactHTMLToken& token) 96{ 97 // FIXME: It's very fragile that we special case foreignObject here to be case-insensitive. 98 return equalIgnoringCaseNonNull(token.data().asStringImpl(), SVGNames::foreignObjectTag.localName().impl()); 99} 100 101static bool tokenExitsMath(const CompactHTMLToken& token) 102{ 103 // FIXME: This is copied from HTMLElementStack::isMathMLTextIntegrationPoint and changed to use threadSafeMatch. 104 const HTMLIdentifier& tagName = token.data(); 105 return threadSafeMatch(tagName, MathMLNames::miTag) 106 || threadSafeMatch(tagName, MathMLNames::moTag) 107 || threadSafeMatch(tagName, MathMLNames::mnTag) 108 || threadSafeMatch(tagName, MathMLNames::msTag) 109 || threadSafeMatch(tagName, MathMLNames::mtextTag); 110} 111 112HTMLTreeBuilderSimulator::HTMLTreeBuilderSimulator(const HTMLParserOptions& options) 113 : m_options(options) 114{ 115 m_namespaceStack.append(HTML); 116} 117 118HTMLTreeBuilderSimulator::State HTMLTreeBuilderSimulator::stateFor(HTMLTreeBuilder* treeBuilder) 119{ 120 ASSERT(isMainThread()); 121 State namespaceStack; 122 for (HTMLElementStack::ElementRecord* record = treeBuilder->openElements()->topRecord(); record; record = record->next()) { 123 Namespace currentNamespace = HTML; 124 if (record->namespaceURI() == SVGNames::svgNamespaceURI) 125 currentNamespace = SVG; 126 else if (record->namespaceURI() == MathMLNames::mathmlNamespaceURI) 127 currentNamespace = MathML; 128 129 if (namespaceStack.isEmpty() || namespaceStack.last() != currentNamespace) 130 namespaceStack.append(currentNamespace); 131 } 132 namespaceStack.reverse(); 133 return namespaceStack; 134} 135 136bool HTMLTreeBuilderSimulator::simulate(const CompactHTMLToken& token, HTMLTokenizer* tokenizer) 137{ 138 if (token.type() == HTMLToken::StartTag) { 139 const HTMLIdentifier& tagName = token.data(); 140 if (threadSafeMatch(tagName, SVGNames::svgTag)) 141 m_namespaceStack.append(SVG); 142 if (threadSafeMatch(tagName, MathMLNames::mathTag)) 143 m_namespaceStack.append(MathML); 144 if (inForeignContent() && tokenExitsForeignContent(token)) 145 m_namespaceStack.removeLast(); 146 if ((m_namespaceStack.last() == SVG && tokenExitsSVG(token)) 147 || (m_namespaceStack.last() == MathML && tokenExitsMath(token))) 148 m_namespaceStack.append(HTML); 149 if (!inForeignContent()) { 150 // FIXME: This is just a copy of Tokenizer::updateStateFor which uses threadSafeMatches. 151 if (threadSafeHTMLNamesMatch(tagName, textareaTag) || threadSafeHTMLNamesMatch(tagName, titleTag)) 152 tokenizer->setState(HTMLTokenizer::RCDATAState); 153 else if (threadSafeHTMLNamesMatch(tagName, plaintextTag)) 154 tokenizer->setState(HTMLTokenizer::PLAINTEXTState); 155 else if (threadSafeHTMLNamesMatch(tagName, scriptTag)) 156 tokenizer->setState(HTMLTokenizer::ScriptDataState); 157 else if (threadSafeHTMLNamesMatch(tagName, styleTag) 158 || threadSafeHTMLNamesMatch(tagName, iframeTag) 159 || threadSafeHTMLNamesMatch(tagName, xmpTag) 160 || (threadSafeHTMLNamesMatch(tagName, noembedTag) && m_options.pluginsEnabled) 161 || threadSafeHTMLNamesMatch(tagName, noframesTag) 162 || (threadSafeHTMLNamesMatch(tagName, noscriptTag) && m_options.scriptEnabled)) 163 tokenizer->setState(HTMLTokenizer::RAWTEXTState); 164 } 165 } 166 167 if (token.type() == HTMLToken::EndTag) { 168 const HTMLIdentifier& tagName = token.data(); 169 if ((m_namespaceStack.last() == SVG && threadSafeMatch(tagName, SVGNames::svgTag)) 170 || (m_namespaceStack.last() == MathML && threadSafeMatch(tagName, MathMLNames::mathTag)) 171 || (m_namespaceStack.contains(SVG) && m_namespaceStack.last() == HTML && tokenExitsSVG(token)) 172 || (m_namespaceStack.contains(MathML) && m_namespaceStack.last() == HTML && tokenExitsMath(token))) 173 m_namespaceStack.removeLast(); 174 if (threadSafeHTMLNamesMatch(tagName, scriptTag)) { 175 if (!inForeignContent()) 176 tokenizer->setState(HTMLTokenizer::DataState); 177 return false; 178 } 179 } 180 181 // FIXME: Also setForceNullCharacterReplacement when in text mode. 182 tokenizer->setForceNullCharacterReplacement(inForeignContent()); 183 tokenizer->setShouldAllowCDATA(inForeignContent()); 184 return true; 185} 186 187} 188 189#endif // ENABLE(THREADED_HTML_PARSER) 190