1/*
2 * Copyright (C) 2000 Peter Kelly (pmk@post.com)
3 * Copyright (C) 2005, 2006, 2008 Apple Inc. All rights reserved.
4 * Copyright (C) 2006 Alexey Proskuryakov (ap@webkit.org)
5 * Copyright (C) 2007 Samuel Weinig (sam@webkit.org)
6 * Copyright (C) 2008 Nokia Corporation and/or its subsidiary(-ies)
7 * Copyright (C) 2008 Holger Hans Peter Freyther
8 * Copyright (C) 2008 Torch Mobile Inc. All rights reserved. (http://www.torchmobile.com/)
9 *
10 * This library is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Library General Public
12 * License as published by the Free Software Foundation; either
13 * version 2 of the License, or (at your option) any later version.
14 *
15 * This library is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18 * Library General Public License for more details.
19 *
20 * You should have received a copy of the GNU Library General Public License
21 * along with this library; see the file COPYING.LIB.  If not, write to
22 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
23 * Boston, MA 02110-1301, USA.
24 */
25
26#include "config.h"
27#include "XMLDocumentParser.h"
28
29#include "CDATASection.h"
30#include "CachedScript.h"
31#include "Comment.h"
32#include "CachedResourceLoader.h"
33#include "Document.h"
34#include "DocumentFragment.h"
35#include "DocumentType.h"
36#include "Frame.h"
37#include "FrameLoader.h"
38#include "FrameView.h"
39#include "HTMLLinkElement.h"
40#include "HTMLNames.h"
41#include "HTMLStyleElement.h"
42#include "ImageLoader.h"
43#include "ProcessingInstruction.h"
44#include "ResourceError.h"
45#include "ResourceRequest.h"
46#include "ResourceResponse.h"
47#include "SVGNames.h"
48#include "SVGStyleElement.h"
49#include "ScriptElement.h"
50#include "ScriptSourceCode.h"
51#include "TextResourceDecoder.h"
52#include "TreeDepthLimit.h"
53#include <wtf/Ref.h>
54#include <wtf/StringExtras.h>
55#include <wtf/Threading.h>
56#include <wtf/Vector.h>
57
58namespace WebCore {
59
60using namespace HTMLNames;
61
62void XMLDocumentParser::pushCurrentNode(ContainerNode* n)
63{
64    ASSERT(n);
65    ASSERT(m_currentNode);
66    if (n != document())
67        n->ref();
68    m_currentNodeStack.append(m_currentNode);
69    m_currentNode = n;
70    if (m_currentNodeStack.size() > maxDOMTreeDepth)
71        handleError(XMLErrors::fatal, "Excessive node nesting.", textPosition());
72}
73
74void XMLDocumentParser::popCurrentNode()
75{
76    if (!m_currentNode)
77        return;
78    ASSERT(m_currentNodeStack.size());
79
80    if (m_currentNode != document())
81        m_currentNode->deref();
82
83    m_currentNode = m_currentNodeStack.last();
84    m_currentNodeStack.removeLast();
85}
86
87void XMLDocumentParser::clearCurrentNodeStack()
88{
89    if (m_currentNode && m_currentNode != document())
90        m_currentNode->deref();
91    m_currentNode = 0;
92    m_leafTextNode = 0;
93
94    if (m_currentNodeStack.size()) { // Aborted parsing.
95        for (size_t i = m_currentNodeStack.size() - 1; i != 0; --i)
96            m_currentNodeStack[i]->deref();
97        if (m_currentNodeStack[0] && m_currentNodeStack[0] != document())
98            m_currentNodeStack[0]->deref();
99        m_currentNodeStack.clear();
100    }
101}
102
103void XMLDocumentParser::insert(const SegmentedString&)
104{
105    ASSERT_NOT_REACHED();
106}
107
108void XMLDocumentParser::append(PassRefPtr<StringImpl> inputSource)
109{
110    SegmentedString source(inputSource);
111    if (m_sawXSLTransform || !m_sawFirstElement)
112        m_originalSourceForTransform.append(source);
113
114    if (isStopped() || m_sawXSLTransform)
115        return;
116
117    if (m_parserPaused) {
118        m_pendingSrc.append(source);
119        return;
120    }
121
122    doWrite(source.toString());
123
124    // After parsing, go ahead and dispatch image beforeload events.
125    ImageLoader::dispatchPendingBeforeLoadEvents();
126}
127
128void XMLDocumentParser::handleError(XMLErrors::ErrorType type, const char* m, TextPosition position)
129{
130    if (!m_xmlErrors)
131        m_xmlErrors = std::make_unique<XMLErrors>(document());
132    m_xmlErrors->handleError(type, m, position);
133    if (type != XMLErrors::warning)
134        m_sawError = true;
135    if (type == XMLErrors::fatal)
136        stopParsing();
137}
138
139void XMLDocumentParser::enterText()
140{
141    ASSERT(m_bufferedText.size() == 0);
142    ASSERT(!m_leafTextNode);
143    m_leafTextNode = Text::create(m_currentNode->document(), "");
144    m_currentNode->parserAppendChild(m_leafTextNode.get());
145}
146
147static inline String toString(const xmlChar* string, size_t size)
148{
149    return String::fromUTF8(reinterpret_cast<const char*>(string), size);
150}
151
152
153void XMLDocumentParser::exitText()
154{
155    if (isStopped())
156        return;
157
158    if (!m_leafTextNode)
159        return;
160
161    m_leafTextNode->appendData(toString(m_bufferedText.data(), m_bufferedText.size()), IGNORE_EXCEPTION);
162    Vector<xmlChar> empty;
163    m_bufferedText.swap(empty);
164
165    m_leafTextNode = 0;
166}
167
168void XMLDocumentParser::detach()
169{
170    clearCurrentNodeStack();
171    ScriptableDocumentParser::detach();
172}
173
174void XMLDocumentParser::end()
175{
176    // XMLDocumentParserLibxml2 will do bad things to the document if doEnd() is called.
177    // I don't believe XMLDocumentParserQt needs doEnd called in the fragment case.
178    ASSERT(!m_parsingFragment);
179
180    doEnd();
181
182    // doEnd() call above can detach the parser and null out its document.
183    // In that case, we just bail out.
184    if (isDetached())
185        return;
186
187    // doEnd() could process a script tag, thus pausing parsing.
188    if (m_parserPaused)
189        return;
190
191    if (m_sawError)
192        insertErrorMessageBlock();
193    else {
194        exitText();
195        document()->styleResolverChanged(RecalcStyleImmediately);
196    }
197
198    if (isParsing())
199        prepareToStopParsing();
200    document()->setReadyState(Document::Interactive);
201    clearCurrentNodeStack();
202    document()->finishedParsing();
203}
204
205void XMLDocumentParser::finish()
206{
207    // FIXME: We should ASSERT(!m_parserStopped) here, since it does not
208    // makes sense to call any methods on DocumentParser once it's been stopped.
209    // However, FrameLoader::stop calls DocumentParser::finish unconditionally.
210
211    if (m_parserPaused)
212        m_finishCalled = true;
213    else
214        end();
215}
216
217void XMLDocumentParser::insertErrorMessageBlock()
218{
219    ASSERT(m_xmlErrors);
220    m_xmlErrors->insertErrorMessageBlock();
221}
222
223void XMLDocumentParser::notifyFinished(CachedResource* unusedResource)
224{
225    ASSERT_UNUSED(unusedResource, unusedResource == m_pendingScript);
226    ASSERT(m_pendingScript->accessCount() > 0);
227
228    ScriptSourceCode sourceCode(m_pendingScript.get());
229    bool errorOccurred = m_pendingScript->errorOccurred();
230    bool wasCanceled = m_pendingScript->wasCanceled();
231
232    m_pendingScript->removeClient(this);
233    m_pendingScript = 0;
234
235    RefPtr<Element> e = m_scriptElement;
236    m_scriptElement = 0;
237
238    ScriptElement* scriptElement = toScriptElementIfPossible(e.get());
239    ASSERT(scriptElement);
240
241    // JavaScript can detach this parser, make sure it's kept alive even if detached.
242    Ref<XMLDocumentParser> protect(*this);
243
244    if (errorOccurred)
245        scriptElement->dispatchErrorEvent();
246    else if (!wasCanceled) {
247        scriptElement->executeScript(sourceCode);
248        scriptElement->dispatchLoadEvent();
249    }
250
251    m_scriptElement = 0;
252
253    if (!isDetached() && !m_requestingScript)
254        resumeParsing();
255}
256
257bool XMLDocumentParser::isWaitingForScripts() const
258{
259    return m_pendingScript;
260}
261
262void XMLDocumentParser::pauseParsing()
263{
264    if (m_parsingFragment)
265        return;
266
267    m_parserPaused = true;
268}
269
270bool XMLDocumentParser::parseDocumentFragment(const String& chunk, DocumentFragment& fragment, Element* contextElement, ParserContentPolicy parserContentPolicy)
271{
272    if (!chunk.length())
273        return true;
274
275    // FIXME: We need to implement the HTML5 XML Fragment parsing algorithm:
276    // http://www.whatwg.org/specs/web-apps/current-work/multipage/the-xhtml-syntax.html#xml-fragment-parsing-algorithm
277    // For now we have a hack for script/style innerHTML support:
278    if (contextElement && (contextElement->hasLocalName(HTMLNames::scriptTag.localName()) || contextElement->hasLocalName(HTMLNames::styleTag.localName()))) {
279        fragment.parserAppendChild(fragment.document().createTextNode(chunk));
280        return true;
281    }
282
283    RefPtr<XMLDocumentParser> parser = XMLDocumentParser::create(fragment, contextElement, parserContentPolicy);
284    bool wellFormed = parser->appendFragmentSource(chunk);
285    // Do not call finish(). The finish() and doEnd() implementations touch the main document and loader and can cause crashes in the fragment case.
286    parser->detach(); // Allows ~DocumentParser to assert it was detached before destruction.
287    return wellFormed; // appendFragmentSource()'s wellFormed is more permissive than Document::wellFormed().
288}
289
290} // namespace WebCore
291