1/*
2 * Copyright (C) 2013 Google, Inc. All Rights Reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 *    notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 *    notice, this list of conditions and the following disclaimer in the
11 *    documentation and/or other materials provided with the distribution.
12 *
13 * THIS SOFTWARE IS PROVIDED BY GOOGLE INC. ``AS IS'' AND ANY
14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL GOOGLE INC. OR
17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 */
25
26#include "config.h"
27
28#if ENABLE(THREADED_HTML_PARSER)
29
30#include "HTMLTreeBuilderSimulator.h"
31
32#include "HTMLDocumentParser.h"
33#include "HTMLNames.h"
34#include "HTMLParserIdioms.h"
35#include "HTMLTokenizer.h"
36#include "HTMLTreeBuilder.h"
37#include "MathMLNames.h"
38#include "SVGNames.h"
39
40namespace WebCore {
41
42using namespace HTMLNames;
43
44static bool tokenExitsForeignContent(const CompactHTMLToken& token)
45{
46    // FIXME: This is copied from HTMLTreeBuilder::processTokenInForeignContent and changed to use threadSafeHTMLNamesMatch.
47    const HTMLIdentifier& tagName = token.data();
48    return threadSafeHTMLNamesMatch(tagName, bTag)
49        || threadSafeHTMLNamesMatch(tagName, bigTag)
50        || threadSafeHTMLNamesMatch(tagName, blockquoteTag)
51        || threadSafeHTMLNamesMatch(tagName, bodyTag)
52        || threadSafeHTMLNamesMatch(tagName, brTag)
53        || threadSafeHTMLNamesMatch(tagName, centerTag)
54        || threadSafeHTMLNamesMatch(tagName, codeTag)
55        || threadSafeHTMLNamesMatch(tagName, ddTag)
56        || threadSafeHTMLNamesMatch(tagName, divTag)
57        || threadSafeHTMLNamesMatch(tagName, dlTag)
58        || threadSafeHTMLNamesMatch(tagName, dtTag)
59        || threadSafeHTMLNamesMatch(tagName, emTag)
60        || threadSafeHTMLNamesMatch(tagName, embedTag)
61        || threadSafeHTMLNamesMatch(tagName, h1Tag)
62        || threadSafeHTMLNamesMatch(tagName, h2Tag)
63        || threadSafeHTMLNamesMatch(tagName, h3Tag)
64        || threadSafeHTMLNamesMatch(tagName, h4Tag)
65        || threadSafeHTMLNamesMatch(tagName, h5Tag)
66        || threadSafeHTMLNamesMatch(tagName, h6Tag)
67        || threadSafeHTMLNamesMatch(tagName, headTag)
68        || threadSafeHTMLNamesMatch(tagName, hrTag)
69        || threadSafeHTMLNamesMatch(tagName, iTag)
70        || threadSafeHTMLNamesMatch(tagName, imgTag)
71        || threadSafeHTMLNamesMatch(tagName, liTag)
72        || threadSafeHTMLNamesMatch(tagName, listingTag)
73        || threadSafeHTMLNamesMatch(tagName, menuTag)
74        || threadSafeHTMLNamesMatch(tagName, metaTag)
75        || threadSafeHTMLNamesMatch(tagName, nobrTag)
76        || threadSafeHTMLNamesMatch(tagName, olTag)
77        || threadSafeHTMLNamesMatch(tagName, pTag)
78        || threadSafeHTMLNamesMatch(tagName, preTag)
79        || threadSafeHTMLNamesMatch(tagName, rubyTag)
80        || threadSafeHTMLNamesMatch(tagName, sTag)
81        || threadSafeHTMLNamesMatch(tagName, smallTag)
82        || threadSafeHTMLNamesMatch(tagName, spanTag)
83        || threadSafeHTMLNamesMatch(tagName, strongTag)
84        || threadSafeHTMLNamesMatch(tagName, strikeTag)
85        || threadSafeHTMLNamesMatch(tagName, subTag)
86        || threadSafeHTMLNamesMatch(tagName, supTag)
87        || threadSafeHTMLNamesMatch(tagName, tableTag)
88        || threadSafeHTMLNamesMatch(tagName, ttTag)
89        || threadSafeHTMLNamesMatch(tagName, uTag)
90        || threadSafeHTMLNamesMatch(tagName, ulTag)
91        || threadSafeHTMLNamesMatch(tagName, varTag)
92        || (threadSafeHTMLNamesMatch(tagName, fontTag) && (token.getAttributeItem(colorAttr) || token.getAttributeItem(faceAttr) || token.getAttributeItem(sizeAttr)));
93}
94
95static bool tokenExitsSVG(const CompactHTMLToken& token)
96{
97    // FIXME: It's very fragile that we special case foreignObject here to be case-insensitive.
98    return equalIgnoringCaseNonNull(token.data().asStringImpl(), SVGNames::foreignObjectTag.localName().impl());
99}
100
101static bool tokenExitsMath(const CompactHTMLToken& token)
102{
103    // FIXME: This is copied from HTMLElementStack::isMathMLTextIntegrationPoint and changed to use threadSafeMatch.
104    const HTMLIdentifier& tagName = token.data();
105    return threadSafeMatch(tagName, MathMLNames::miTag)
106        || threadSafeMatch(tagName, MathMLNames::moTag)
107        || threadSafeMatch(tagName, MathMLNames::mnTag)
108        || threadSafeMatch(tagName, MathMLNames::msTag)
109        || threadSafeMatch(tagName, MathMLNames::mtextTag);
110}
111
112HTMLTreeBuilderSimulator::HTMLTreeBuilderSimulator(const HTMLParserOptions& options)
113    : m_options(options)
114{
115    m_namespaceStack.append(HTML);
116}
117
118HTMLTreeBuilderSimulator::State HTMLTreeBuilderSimulator::stateFor(HTMLTreeBuilder* treeBuilder)
119{
120    ASSERT(isMainThread());
121    State namespaceStack;
122    for (HTMLElementStack::ElementRecord* record = treeBuilder->openElements()->topRecord(); record; record = record->next()) {
123        Namespace currentNamespace = HTML;
124        if (record->namespaceURI() == SVGNames::svgNamespaceURI)
125            currentNamespace = SVG;
126        else if (record->namespaceURI() == MathMLNames::mathmlNamespaceURI)
127            currentNamespace = MathML;
128
129        if (namespaceStack.isEmpty() || namespaceStack.last() != currentNamespace)
130            namespaceStack.append(currentNamespace);
131    }
132    namespaceStack.reverse();
133    return namespaceStack;
134}
135
136bool HTMLTreeBuilderSimulator::simulate(const CompactHTMLToken& token, HTMLTokenizer* tokenizer)
137{
138    if (token.type() == HTMLToken::StartTag) {
139        const HTMLIdentifier& tagName = token.data();
140        if (threadSafeMatch(tagName, SVGNames::svgTag))
141            m_namespaceStack.append(SVG);
142        if (threadSafeMatch(tagName, MathMLNames::mathTag))
143            m_namespaceStack.append(MathML);
144        if (inForeignContent() && tokenExitsForeignContent(token))
145            m_namespaceStack.removeLast();
146        if ((m_namespaceStack.last() == SVG && tokenExitsSVG(token))
147            || (m_namespaceStack.last() == MathML && tokenExitsMath(token)))
148            m_namespaceStack.append(HTML);
149        if (!inForeignContent()) {
150            // FIXME: This is just a copy of Tokenizer::updateStateFor which uses threadSafeMatches.
151            if (threadSafeHTMLNamesMatch(tagName, textareaTag) || threadSafeHTMLNamesMatch(tagName, titleTag))
152                tokenizer->setState(HTMLTokenizer::RCDATAState);
153            else if (threadSafeHTMLNamesMatch(tagName, plaintextTag))
154                tokenizer->setState(HTMLTokenizer::PLAINTEXTState);
155            else if (threadSafeHTMLNamesMatch(tagName, scriptTag))
156                tokenizer->setState(HTMLTokenizer::ScriptDataState);
157            else if (threadSafeHTMLNamesMatch(tagName, styleTag)
158                || threadSafeHTMLNamesMatch(tagName, iframeTag)
159                || threadSafeHTMLNamesMatch(tagName, xmpTag)
160                || (threadSafeHTMLNamesMatch(tagName, noembedTag) && m_options.pluginsEnabled)
161                || threadSafeHTMLNamesMatch(tagName, noframesTag)
162                || (threadSafeHTMLNamesMatch(tagName, noscriptTag) && m_options.scriptEnabled))
163                tokenizer->setState(HTMLTokenizer::RAWTEXTState);
164        }
165    }
166
167    if (token.type() == HTMLToken::EndTag) {
168        const HTMLIdentifier& tagName = token.data();
169        if ((m_namespaceStack.last() == SVG && threadSafeMatch(tagName, SVGNames::svgTag))
170            || (m_namespaceStack.last() == MathML && threadSafeMatch(tagName, MathMLNames::mathTag))
171            || (m_namespaceStack.contains(SVG) && m_namespaceStack.last() == HTML && tokenExitsSVG(token))
172            || (m_namespaceStack.contains(MathML) && m_namespaceStack.last() == HTML && tokenExitsMath(token)))
173            m_namespaceStack.removeLast();
174        if (threadSafeHTMLNamesMatch(tagName, scriptTag)) {
175            if (!inForeignContent())
176                tokenizer->setState(HTMLTokenizer::DataState);
177            return false;
178        }
179    }
180
181    // FIXME: Also setForceNullCharacterReplacement when in text mode.
182    tokenizer->setForceNullCharacterReplacement(inForeignContent());
183    tokenizer->setShouldAllowCDATA(inForeignContent());
184    return true;
185}
186
187}
188
189#endif // ENABLE(THREADED_HTML_PARSER)
190