1/*
2 * Copyright (C) 2000 Peter Kelly (pmk@post.com)
3 * Copyright (C) 2005, 2006, 2008 Apple Inc. All rights reserved.
4 * Copyright (C) 2006 Alexey Proskuryakov (ap@webkit.org)
5 * Copyright (C) 2007 Samuel Weinig (sam@webkit.org)
6 * Copyright (C) 2008 Nokia Corporation and/or its subsidiary(-ies)
7 * Copyright (C) 2008 Holger Hans Peter Freyther
8 * Copyright (C) 2008 Torch Mobile Inc. All rights reserved. (http://www.torchmobile.com/)
9 *
10 * This library is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Library General Public
12 * License as published by the Free Software Foundation; either
13 * version 2 of the License, or (at your option) any later version.
14 *
15 * This library is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18 * Library General Public License for more details.
19 *
20 * You should have received a copy of the GNU Library General Public License
21 * along with this library; see the file COPYING.LIB.  If not, write to
22 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
23 * Boston, MA 02110-1301, USA.
24 */
25
26#include "config.h"
27#include "XMLDocumentParser.h"
28
29#include "CDATASection.h"
30#include "CachedScript.h"
31#include "Comment.h"
32#include "CachedResourceLoader.h"
33#include "Document.h"
34#include "DocumentFragment.h"
35#include "DocumentType.h"
36#include "Frame.h"
37#include "FrameLoader.h"
38#include "FrameView.h"
39#include "HTMLLinkElement.h"
40#include "HTMLNames.h"
41#include "HTMLStyleElement.h"
42#include "ImageLoader.h"
43#include "ProcessingInstruction.h"
44#include "ResourceError.h"
45#include "ResourceRequest.h"
46#include "ResourceResponse.h"
47#include "ScriptElement.h"
48#include "ScriptSourceCode.h"
49#include "ScriptValue.h"
50#include "TextResourceDecoder.h"
51#include "TreeDepthLimit.h"
52#include "XMLErrors.h"
53#include <wtf/StringExtras.h>
54#include <wtf/Threading.h>
55#include <wtf/Vector.h>
56
57#if ENABLE(SVG)
58#include "SVGNames.h"
59#include "SVGStyleElement.h"
60#endif
61
62using namespace std;
63
64namespace WebCore {
65
66using namespace HTMLNames;
67
68void XMLDocumentParser::pushCurrentNode(ContainerNode* n)
69{
70    ASSERT(n);
71    ASSERT(m_currentNode);
72    if (n != document())
73        n->ref();
74    m_currentNodeStack.append(m_currentNode);
75    m_currentNode = n;
76    if (m_currentNodeStack.size() > maxDOMTreeDepth)
77        handleError(XMLErrors::fatal, "Excessive node nesting.", textPosition());
78}
79
80void XMLDocumentParser::popCurrentNode()
81{
82    if (!m_currentNode)
83        return;
84    ASSERT(m_currentNodeStack.size());
85
86    if (m_currentNode != document())
87        m_currentNode->deref();
88
89    m_currentNode = m_currentNodeStack.last();
90    m_currentNodeStack.removeLast();
91}
92
93void XMLDocumentParser::clearCurrentNodeStack()
94{
95    if (m_currentNode && m_currentNode != document())
96        m_currentNode->deref();
97    m_currentNode = 0;
98    m_leafTextNode = 0;
99
100    if (m_currentNodeStack.size()) { // Aborted parsing.
101        for (size_t i = m_currentNodeStack.size() - 1; i != 0; --i)
102            m_currentNodeStack[i]->deref();
103        if (m_currentNodeStack[0] && m_currentNodeStack[0] != document())
104            m_currentNodeStack[0]->deref();
105        m_currentNodeStack.clear();
106    }
107}
108
109void XMLDocumentParser::insert(const SegmentedString&)
110{
111    ASSERT_NOT_REACHED();
112}
113
114void XMLDocumentParser::append(PassRefPtr<StringImpl> inputSource)
115{
116    SegmentedString source(inputSource);
117    if (m_sawXSLTransform || !m_sawFirstElement)
118        m_originalSourceForTransform.append(source);
119
120    if (isStopped() || m_sawXSLTransform)
121        return;
122
123    if (m_parserPaused) {
124        m_pendingSrc.append(source);
125        return;
126    }
127
128    doWrite(source.toString());
129
130    // After parsing, go ahead and dispatch image beforeload events.
131    ImageLoader::dispatchPendingBeforeLoadEvents();
132}
133
134void XMLDocumentParser::handleError(XMLErrors::ErrorType type, const char* m, TextPosition position)
135{
136    m_xmlErrors.handleError(type, m, position);
137    if (type != XMLErrors::warning)
138        m_sawError = true;
139    if (type == XMLErrors::fatal)
140        stopParsing();
141}
142
143void XMLDocumentParser::enterText()
144{
145#if !USE(QXMLSTREAM)
146    ASSERT(m_bufferedText.size() == 0);
147#endif
148    ASSERT(!m_leafTextNode);
149    m_leafTextNode = Text::create(m_currentNode->document(), "");
150    m_currentNode->parserAppendChild(m_leafTextNode.get());
151}
152
153#if !USE(QXMLSTREAM)
154static inline String toString(const xmlChar* string, size_t size)
155{
156    return String::fromUTF8(reinterpret_cast<const char*>(string), size);
157}
158#endif
159
160
161void XMLDocumentParser::exitText()
162{
163    if (isStopped())
164        return;
165
166    if (!m_leafTextNode)
167        return;
168
169#if !USE(QXMLSTREAM)
170    m_leafTextNode->appendData(toString(m_bufferedText.data(), m_bufferedText.size()), IGNORE_EXCEPTION);
171    Vector<xmlChar> empty;
172    m_bufferedText.swap(empty);
173#endif
174
175    if (m_view && m_leafTextNode->parentNode() && m_leafTextNode->parentNode()->attached()
176        && !m_leafTextNode->attached())
177        m_leafTextNode->attach();
178
179    m_leafTextNode = 0;
180}
181
182void XMLDocumentParser::detach()
183{
184    clearCurrentNodeStack();
185    ScriptableDocumentParser::detach();
186}
187
188void XMLDocumentParser::end()
189{
190    // XMLDocumentParserLibxml2 will do bad things to the document if doEnd() is called.
191    // I don't believe XMLDocumentParserQt needs doEnd called in the fragment case.
192    ASSERT(!m_parsingFragment);
193
194    doEnd();
195
196    // doEnd() call above can detach the parser and null out its document.
197    // In that case, we just bail out.
198    if (isDetached())
199        return;
200
201    // doEnd() could process a script tag, thus pausing parsing.
202    if (m_parserPaused)
203        return;
204
205    if (m_sawError)
206        insertErrorMessageBlock();
207    else {
208        exitText();
209        document()->styleResolverChanged(RecalcStyleImmediately);
210    }
211
212    if (isParsing())
213        prepareToStopParsing();
214    document()->setReadyState(Document::Interactive);
215    clearCurrentNodeStack();
216    document()->finishedParsing();
217}
218
219void XMLDocumentParser::finish()
220{
221    // FIXME: We should ASSERT(!m_parserStopped) here, since it does not
222    // makes sense to call any methods on DocumentParser once it's been stopped.
223    // However, FrameLoader::stop calls DocumentParser::finish unconditionally.
224
225    if (m_parserPaused)
226        m_finishCalled = true;
227    else
228        end();
229}
230
231void XMLDocumentParser::insertErrorMessageBlock()
232{
233#if USE(QXMLSTREAM)
234    if (m_parsingFragment)
235        return;
236#endif
237
238    m_xmlErrors.insertErrorMessageBlock();
239}
240
241void XMLDocumentParser::notifyFinished(CachedResource* unusedResource)
242{
243    ASSERT_UNUSED(unusedResource, unusedResource == m_pendingScript);
244    ASSERT(m_pendingScript->accessCount() > 0);
245
246    ScriptSourceCode sourceCode(m_pendingScript.get());
247    bool errorOccurred = m_pendingScript->errorOccurred();
248    bool wasCanceled = m_pendingScript->wasCanceled();
249
250    m_pendingScript->removeClient(this);
251    m_pendingScript = 0;
252
253    RefPtr<Element> e = m_scriptElement;
254    m_scriptElement = 0;
255
256    ScriptElement* scriptElement = toScriptElementIfPossible(e.get());
257    ASSERT(scriptElement);
258
259    // JavaScript can detach this parser, make sure it's kept alive even if detached.
260    RefPtr<XMLDocumentParser> protect(this);
261
262    if (errorOccurred)
263        scriptElement->dispatchErrorEvent();
264    else if (!wasCanceled) {
265        scriptElement->executeScript(sourceCode);
266        scriptElement->dispatchLoadEvent();
267    }
268
269    m_scriptElement = 0;
270
271    if (!isDetached() && !m_requestingScript)
272        resumeParsing();
273}
274
275bool XMLDocumentParser::isWaitingForScripts() const
276{
277    return m_pendingScript;
278}
279
280void XMLDocumentParser::pauseParsing()
281{
282    if (m_parsingFragment)
283        return;
284
285    m_parserPaused = true;
286}
287
288bool XMLDocumentParser::parseDocumentFragment(const String& chunk, DocumentFragment* fragment, Element* contextElement, ParserContentPolicy parserContentPolicy)
289{
290    if (!chunk.length())
291        return true;
292
293    // FIXME: We need to implement the HTML5 XML Fragment parsing algorithm:
294    // http://www.whatwg.org/specs/web-apps/current-work/multipage/the-xhtml-syntax.html#xml-fragment-parsing-algorithm
295    // For now we have a hack for script/style innerHTML support:
296    if (contextElement && (contextElement->hasLocalName(HTMLNames::scriptTag) || contextElement->hasLocalName(HTMLNames::styleTag))) {
297        fragment->parserAppendChild(fragment->document()->createTextNode(chunk));
298        return true;
299    }
300
301    RefPtr<XMLDocumentParser> parser = XMLDocumentParser::create(fragment, contextElement, parserContentPolicy);
302    bool wellFormed = parser->appendFragmentSource(chunk);
303    // Do not call finish().  Current finish() and doEnd() implementations touch the main Document/loader
304    // and can cause crashes in the fragment case.
305    parser->detach(); // Allows ~DocumentParser to assert it was detached before destruction.
306    return wellFormed; // appendFragmentSource()'s wellFormed is more permissive than wellFormed().
307}
308
309} // namespace WebCore
310